aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm-c/Comdat.h11
-rw-r--r--llvm/include/llvm-c/Core.h16
-rw-r--r--llvm/include/llvm-c/DebugInfo.h82
-rw-r--r--llvm/include/llvm-c/DisassemblerTypes.h10
-rw-r--r--llvm/include/llvm-c/Error.h11
-rw-r--r--llvm/include/llvm-c/ErrorHandling.h10
-rw-r--r--llvm/include/llvm-c/IRReader.h11
-rw-r--r--llvm/include/llvm-c/LLJIT.h11
-rw-r--r--llvm/include/llvm-c/Linker.h11
-rw-r--r--llvm/include/llvm-c/Orc.h54
-rw-r--r--llvm/include/llvm-c/OrcEE.h11
-rw-r--r--llvm/include/llvm-c/Support.h10
-rw-r--r--llvm/include/llvm-c/TargetMachine.h10
-rw-r--r--llvm/include/llvm-c/Transforms/PassBuilder.h13
-rw-r--r--llvm/include/llvm-c/lto.h12
-rw-r--r--llvm/include/llvm/ADT/APFloat.h4
-rw-r--r--llvm/include/llvm/ADT/APInt.h883
-rw-r--r--llvm/include/llvm/ADT/APSInt.h10
-rw-r--r--llvm/include/llvm/ADT/ArrayRef.h4
-rw-r--r--llvm/include/llvm/ADT/BitVector.h24
-rw-r--r--llvm/include/llvm/ADT/CombinationGenerator.h148
-rw-r--r--llvm/include/llvm/ADT/DenseMapInfo.h16
-rw-r--r--llvm/include/llvm/ADT/EquivalenceClasses.h33
-rw-r--r--llvm/include/llvm/ADT/FunctionExtras.h16
-rw-r--r--llvm/include/llvm/ADT/Hashing.h8
-rw-r--r--llvm/include/llvm/ADT/ImmutableList.h3
-rw-r--r--llvm/include/llvm/ADT/IntervalMap.h2
-rw-r--r--llvm/include/llvm/ADT/MapVector.h1
-rw-r--r--llvm/include/llvm/ADT/PointerIntPair.h4
-rw-r--r--llvm/include/llvm/ADT/PointerUnion.h31
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h171
-rw-r--r--llvm/include/llvm/ADT/Sequence.h164
-rw-r--r--llvm/include/llvm/ADT/SetOperations.h9
-rw-r--r--llvm/include/llvm/ADT/SmallBitVector.h51
-rw-r--r--llvm/include/llvm/ADT/SmallVector.h15
-rw-r--r--llvm/include/llvm/ADT/StringExtras.h137
-rw-r--r--llvm/include/llvm/ADT/StringMap.h27
-rw-r--r--llvm/include/llvm/ADT/StringRef.h3
-rw-r--r--llvm/include/llvm/ADT/Triple.h158
-rw-r--r--llvm/include/llvm/ADT/TypeSwitch.h7
-rw-r--r--llvm/include/llvm/ADT/iterator.h49
-rw-r--r--llvm/include/llvm/Analysis/AliasAnalysis.h69
-rw-r--r--llvm/include/llvm/Analysis/AssumeBundleQueries.h9
-rw-r--r--llvm/include/llvm/Analysis/AssumptionCache.h10
-rw-r--r--llvm/include/llvm/Analysis/BasicAliasAnalysis.h82
-rw-r--r--llvm/include/llvm/Analysis/CGSCCPassManager.h101
-rw-r--r--llvm/include/llvm/Analysis/CaptureTracking.h25
-rw-r--r--llvm/include/llvm/Analysis/ConstantFolding.h30
-rw-r--r--llvm/include/llvm/Analysis/CostModel.h26
-rw-r--r--llvm/include/llvm/Analysis/Delinearization.h105
-rw-r--r--llvm/include/llvm/Analysis/HeatUtils.h7
-rw-r--r--llvm/include/llvm/Analysis/IRSimilarityIdentifier.h250
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h88
-rw-r--r--llvm/include/llvm/Analysis/IVUsers.h3
-rw-r--r--llvm/include/llvm/Analysis/InlineAdvisor.h38
-rw-r--r--llvm/include/llvm/Analysis/InlineCost.h3
-rw-r--r--llvm/include/llvm/Analysis/InlineOrder.h172
-rw-r--r--llvm/include/llvm/Analysis/InstructionSimplify.h2
-rw-r--r--llvm/include/llvm/Analysis/LazyCallGraph.h65
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h26
-rw-r--r--llvm/include/llvm/Analysis/LoopAnalysisManager.h1
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h17
-rw-r--r--llvm/include/llvm/Analysis/LoopInfoImpl.h5
-rw-r--r--llvm/include/llvm/Analysis/LoopNestAnalysis.h22
-rw-r--r--llvm/include/llvm/Analysis/MLInlineAdvisor.h2
-rw-r--r--llvm/include/llvm/Analysis/MemorySSA.h30
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h28
-rw-r--r--llvm/include/llvm/Analysis/ObjCARCUtil.h45
-rw-r--r--llvm/include/llvm/Analysis/ProfileSummaryInfo.h12
-rw-r--r--llvm/include/llvm/Analysis/ReplayInlineAdvisor.h53
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h282
-rw-r--r--llvm/include/llvm/Analysis/StackLifetime.h2
-rw-r--r--llvm/include/llvm/Analysis/StackSafetyAnalysis.h8
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.h6
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h83
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h131
-rw-r--r--llvm/include/llvm/Analysis/TypeMetadataUtils.h28
-rw-r--r--llvm/include/llvm/Analysis/Utils/TFUtils.h7
-rw-r--r--llvm/include/llvm/Analysis/ValueTracking.h36
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h12
-rw-r--r--llvm/include/llvm/AsmParser/LLLexer.h4
-rw-r--r--llvm/include/llvm/AsmParser/LLParser.h27
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h8
-rw-r--r--llvm/include/llvm/BinaryFormat/Dwarf.def3
-rw-r--r--llvm/include/llvm/BinaryFormat/DynamicTags.def12
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h23
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def4
-rw-r--r--llvm/include/llvm/BinaryFormat/MachO.def2
-rw-r--r--llvm/include/llvm/BinaryFormat/Wasm.h41
-rw-r--r--llvm/include/llvm/BinaryFormat/WasmTraits.h18
-rw-r--r--llvm/include/llvm/BinaryFormat/XCOFF.h16
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeAnalyzer.h2
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeCommon.h8
-rw-r--r--llvm/include/llvm/Bitcode/LLVMBitCodes.h1
-rw-r--r--llvm/include/llvm/CodeGen/Analysis.h5
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h7
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h264
-rw-r--r--llvm/include/llvm/CodeGen/CodeGenCommonISel.h219
-rw-r--r--llvm/include/llvm/CodeGen/CommandFlags.h7
-rw-r--r--llvm/include/llvm/CodeGen/FunctionLoweringInfo.h1
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h14
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h131
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h37
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h38
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h184
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h17
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h29
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h165
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h49
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h8
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Utils.h100
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h11
-rw-r--r--llvm/include/llvm/CodeGen/IndirectThunks.h2
-rw-r--r--llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h3
-rw-r--r--llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h3
-rw-r--r--llvm/include/llvm/CodeGen/LiveInterval.h10
-rw-r--r--llvm/include/llvm/CodeGen/LiveIntervalUnion.h29
-rw-r--r--llvm/include/llvm/CodeGen/LiveVariables.h6
-rw-r--r--llvm/include/llvm/CodeGen/LowLevelType.h3
-rw-r--r--llvm/include/llvm/CodeGen/MIRFSDiscriminator.h4
-rw-r--r--llvm/include/llvm/CodeGen/MIRFormatter.h7
-rw-r--r--llvm/include/llvm/CodeGen/MIRSampleProfile.h76
-rw-r--r--llvm/include/llvm/CodeGen/MIRYamlMapping.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineCombinerPattern.h13
-rw-r--r--llvm/include/llvm/CodeGen/MachineDominators.h16
-rw-r--r--llvm/include/llvm/CodeGen/MachineFrameInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h16
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstr.h14
-rw-r--r--llvm/include/llvm/CodeGen/MachineMemOperand.h12
-rw-r--r--llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h6
-rw-r--r--llvm/include/llvm/CodeGen/MachineRegisterInfo.h41
-rw-r--r--llvm/include/llvm/CodeGen/MacroFusion.h14
-rw-r--r--llvm/include/llvm/CodeGen/Passes.h12
-rw-r--r--llvm/include/llvm/CodeGen/RegAllocCommon.h7
-rw-r--r--llvm/include/llvm/CodeGen/RegisterScavenging.h3
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h91
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h1
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h238
-rw-r--r--llvm/include/llvm/CodeGen/SwitchLoweringUtils.h8
-rw-r--r--llvm/include/llvm/CodeGen/TargetCallingConv.h8
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h33
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h84
-rw-r--r--llvm/include/llvm/CodeGen/TargetPassConfig.h14
-rw-r--r--llvm/include/llvm/CodeGen/TargetRegisterInfo.h6
-rw-r--r--llvm/include/llvm/CodeGen/TargetSchedule.h1
-rw-r--r--llvm/include/llvm/CodeGen/ValueTypes.td3
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFLinker.h24
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CVRecord.h1
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def2
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h1
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h21
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h6
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h28
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h32
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h4
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h7
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h14
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h40
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h17
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h30
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/StringTable.h1
-rw-r--r--llvm/include/llvm/DebugInfo/MSF/MSFCommon.h3
-rw-r--r--llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h20
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h4
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h5
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h1
-rw-r--r--llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h1
-rw-r--r--llvm/include/llvm/Demangle/Demangle.h14
-rw-r--r--llvm/include/llvm/Demangle/ItaniumDemangle.h1315
-rw-r--r--llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h93
-rw-r--r--llvm/include/llvm/Demangle/Utility.h51
-rw-r--r--llvm/include/llvm/ExecutionEngine/ExecutionEngine.h1
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h39
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h20
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h98
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h420
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h225
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h63
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h38
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/riscv.h14
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h275
-rw-r--r--llvm/include/llvm/ExecutionEngine/MCJIT.h3
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h100
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h64
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h330
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h9
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h9
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h67
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h97
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h85
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h133
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h6
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h272
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h34
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h69
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h70
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h88
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h6
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h436
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h925
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h386
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h464
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h138
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h79
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h68
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h1659
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h183
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h769
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h124
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h235
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h286
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h124
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h140
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h36
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h660
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h20
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h64
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h70
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h182
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h131
-rw-r--r--llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h37
-rw-r--r--llvm/include/llvm/ExecutionEngine/RuntimeDyld.h14
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMP.td69
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPConstants.h8
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h117
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h430
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPKinds.def63
-rw-r--r--llvm/include/llvm/IR/AbstractCallSite.h2
-rw-r--r--llvm/include/llvm/IR/Argument.h2
-rw-r--r--llvm/include/llvm/IR/Assumptions.h22
-rw-r--r--llvm/include/llvm/IR/Attributes.h371
-rw-r--r--llvm/include/llvm/IR/Attributes.td3
-rw-r--r--llvm/include/llvm/IR/BasicBlock.h12
-rw-r--r--llvm/include/llvm/IR/Constant.h6
-rw-r--r--llvm/include/llvm/IR/ConstantRange.h40
-rw-r--r--llvm/include/llvm/IR/Constants.h15
-rw-r--r--llvm/include/llvm/IR/DIBuilder.h80
-rw-r--r--llvm/include/llvm/IR/DataLayout.h23
-rw-r--r--llvm/include/llvm/IR/DebugInfo.h2
-rw-r--r--llvm/include/llvm/IR/DebugInfoMetadata.h437
-rw-r--r--llvm/include/llvm/IR/DerivedTypes.h7
-rw-r--r--llvm/include/llvm/IR/DiagnosticInfo.h36
-rw-r--r--llvm/include/llvm/IR/DiagnosticPrinter.h2
-rw-r--r--llvm/include/llvm/IR/Dominators.h6
-rw-r--r--llvm/include/llvm/IR/FPEnv.h14
-rw-r--r--llvm/include/llvm/IR/Function.h252
-rw-r--r--llvm/include/llvm/IR/GCStrategy.h3
-rw-r--r--llvm/include/llvm/IR/GlobalAlias.h33
-rw-r--r--llvm/include/llvm/IR/GlobalIFunc.h44
-rw-r--r--llvm/include/llvm/IR/GlobalIndirectSymbol.h93
-rw-r--r--llvm/include/llvm/IR/GlobalObject.h7
-rw-r--r--llvm/include/llvm/IR/GlobalValue.h14
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h32
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h292
-rw-r--r--llvm/include/llvm/IR/Instruction.h18
-rw-r--r--llvm/include/llvm/IR/Instructions.h105
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h22
-rw-r--r--llvm/include/llvm/IR/Intrinsics.h3
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td158
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td85
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td52
-rw-r--r--llvm/include/llvm/IR/IntrinsicsBPF.td3
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td778
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td60
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCV.td177
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSystemZ.td8
-rw-r--r--llvm/include/llvm/IR/IntrinsicsWebAssembly.td70
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td762
-rw-r--r--llvm/include/llvm/IR/LLVMContext.h4
-rw-r--r--llvm/include/llvm/IR/MatrixBuilder.h32
-rw-r--r--llvm/include/llvm/IR/Metadata.h35
-rw-r--r--llvm/include/llvm/IR/Module.h9
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h50
-rw-r--r--llvm/include/llvm/IR/Operator.h7
-rw-r--r--llvm/include/llvm/IR/OptBisect.h26
-rw-r--r--llvm/include/llvm/IR/PassManager.h81
-rw-r--r--llvm/include/llvm/IR/PassManagerInternal.h9
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h131
-rw-r--r--llvm/include/llvm/IR/ProfileSummary.h38
-rw-r--r--llvm/include/llvm/IR/PseudoProbe.h4
-rw-r--r--llvm/include/llvm/IR/ReplaceConstant.h4
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.def4
-rw-r--r--llvm/include/llvm/IR/Type.h38
-rw-r--r--llvm/include/llvm/IR/VPIntrinsics.def141
-rw-r--r--llvm/include/llvm/IR/Value.h35
-rw-r--r--llvm/include/llvm/InitializePasses.h5
-rw-r--r--llvm/include/llvm/InterfaceStub/IFSHandler.h3
-rw-r--r--llvm/include/llvm/LTO/Caching.h38
-rw-r--r--llvm/include/llvm/LTO/Config.h3
-rw-r--r--llvm/include/llvm/LTO/LTO.h47
-rw-r--r--llvm/include/llvm/LTO/SummaryBasedOptimizations.h2
-rw-r--r--llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h2
-rw-r--r--llvm/include/llvm/LTO/legacy/LTOModule.h4
-rw-r--r--llvm/include/llvm/LinkAllIR.h3
-rw-r--r--llvm/include/llvm/LinkAllPasses.h3
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h10
-rw-r--r--llvm/include/llvm/MC/MCAsmInfoGOFF.h29
-rw-r--r--llvm/include/llvm/MC/MCContext.h2
-rw-r--r--llvm/include/llvm/MC/MCDwarf.h38
-rw-r--r--llvm/include/llvm/MC/MCELFObjectWriter.h2
-rw-r--r--llvm/include/llvm/MC/MCELFStreamer.h2
-rw-r--r--llvm/include/llvm/MC/MCExpr.h2
-rw-r--r--llvm/include/llvm/MC/MCFragment.h31
-rw-r--r--llvm/include/llvm/MC/MCInstrAnalysis.h11
-rw-r--r--llvm/include/llvm/MC/MCInstrDesc.h4
-rw-r--r--llvm/include/llvm/MC/MCObjectFileInfo.h4
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h6
-rw-r--r--llvm/include/llvm/MC/MCPseudoProbe.h292
-rw-r--r--llvm/include/llvm/MC/MCRegister.h1
-rw-r--r--llvm/include/llvm/MC/MCSchedule.h1
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h10
-rw-r--r--llvm/include/llvm/MC/MCSymbolWasm.h12
-rw-r--r--llvm/include/llvm/MC/MCWasmStreamer.h5
-rw-r--r--llvm/include/llvm/MC/MCWinCOFFStreamer.h2
-rw-r--r--llvm/include/llvm/MC/TargetRegistry.h (renamed from llvm/include/llvm/Support/TargetRegistry.h)84
-rw-r--r--llvm/include/llvm/MCA/CustomBehaviour.h51
-rw-r--r--llvm/include/llvm/MCA/Instruction.h4
-rw-r--r--llvm/include/llvm/MCA/Stages/InOrderIssueStage.h5
-rw-r--r--llvm/include/llvm/MCA/View.h (renamed from llvm/tools/llvm-mca/Views/View.h)4
-rw-r--r--llvm/include/llvm/Object/ELF.h7
-rw-r--r--llvm/include/llvm/Object/ELFObjectFile.h17
-rw-r--r--llvm/include/llvm/Object/ELFTypes.h8
-rw-r--r--llvm/include/llvm/Object/Error.h4
-rw-r--r--llvm/include/llvm/Object/MachO.h3
-rw-r--r--llvm/include/llvm/Object/Wasm.h9
-rw-r--r--llvm/include/llvm/Object/XCOFFObjectFile.h153
-rw-r--r--llvm/include/llvm/ObjectYAML/MachOYAML.h1
-rw-r--r--llvm/include/llvm/ObjectYAML/WasmYAML.h38
-rw-r--r--llvm/include/llvm/ObjectYAML/XCOFFYAML.h54
-rw-r--r--llvm/include/llvm/Option/Arg.h5
-rw-r--r--llvm/include/llvm/Option/OptParser.td2
-rw-r--r--llvm/include/llvm/Option/OptTable.h13
-rw-r--r--llvm/include/llvm/Option/Option.h14
-rw-r--r--llvm/include/llvm/Passes/OptimizationLevel.h127
-rw-r--r--llvm/include/llvm/Passes/PassBuilder.h178
-rw-r--r--llvm/include/llvm/Passes/StandardInstrumentations.h217
-rw-r--r--llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h7
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h18
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc11
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfReader.h18
-rw-r--r--llvm/include/llvm/ProfileData/ProfileCommon.h10
-rw-r--r--llvm/include/llvm/ProfileData/SampleProf.h376
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfReader.h49
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfWriter.h74
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.def36
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.def18
-rw-r--r--llvm/include/llvm/Support/Allocator.h2
-rw-r--r--llvm/include/llvm/Support/AtomicOrdering.h10
-rw-r--r--llvm/include/llvm/Support/BinaryByteStream.h34
-rw-r--r--llvm/include/llvm/Support/BinaryItemStream.h14
-rw-r--r--llvm/include/llvm/Support/BinaryStream.h12
-rw-r--r--llvm/include/llvm/Support/BinaryStreamArray.h7
-rw-r--r--llvm/include/llvm/Support/BinaryStreamReader.h14
-rw-r--r--llvm/include/llvm/Support/BinaryStreamRef.h71
-rw-r--r--llvm/include/llvm/Support/BinaryStreamWriter.h14
-rw-r--r--llvm/include/llvm/Support/Caching.h71
-rw-r--r--llvm/include/llvm/Support/CommandLine.h64
-rw-r--r--llvm/include/llvm/Support/Compiler.h60
-rw-r--r--llvm/include/llvm/Support/CrashRecoveryContext.h3
-rw-r--r--llvm/include/llvm/Support/DOTGraphTraits.h5
-rw-r--r--llvm/include/llvm/Support/DataExtractor.h3
-rw-r--r--llvm/include/llvm/Support/Debug.h21
-rw-r--r--llvm/include/llvm/Support/DivisionByConstantInfo.h38
-rw-r--r--llvm/include/llvm/Support/Error.h37
-rw-r--r--llvm/include/llvm/Support/ErrorHandling.h26
-rw-r--r--llvm/include/llvm/Support/ExtensibleRTTI.h7
-rw-r--r--llvm/include/llvm/Support/FileSystem.h8
-rw-r--r--llvm/include/llvm/Support/FileSystem/UniqueID.h27
-rw-r--r--llvm/include/llvm/Support/FormatVariadic.h2
-rw-r--r--llvm/include/llvm/Support/GenericDomTreeConstruction.h4
-rw-r--r--llvm/include/llvm/Support/GraphWriter.h91
-rw-r--r--llvm/include/llvm/Support/HashBuilder.h438
-rw-r--r--llvm/include/llvm/Support/JSON.h46
-rw-r--r--llvm/include/llvm/Support/KnownBits.h21
-rw-r--r--llvm/include/llvm/Support/MD5.h37
-rw-r--r--llvm/include/llvm/Support/MSP430AttributeParser.h44
-rw-r--r--llvm/include/llvm/Support/MSP430Attributes.h44
-rw-r--r--llvm/include/llvm/Support/MachineValueType.h50
-rw-r--r--llvm/include/llvm/Support/Memory.h13
-rw-r--r--llvm/include/llvm/Support/PGOOptions.h65
-rw-r--r--llvm/include/llvm/Support/Parallel.h5
-rw-r--r--llvm/include/llvm/Support/Path.h67
-rw-r--r--llvm/include/llvm/Support/Process.h6
-rw-r--r--llvm/include/llvm/Support/RISCVISAInfo.h89
-rw-r--r--llvm/include/llvm/Support/RISCVTargetParser.def10
-rw-r--r--llvm/include/llvm/Support/Signposts.h43
-rw-r--r--llvm/include/llvm/Support/TargetOpcodes.def3
-rw-r--r--llvm/include/llvm/Support/TargetSelect.h12
-rw-r--r--llvm/include/llvm/Support/TypeSize.h8
-rw-r--r--llvm/include/llvm/Support/VersionTuple.h7
-rw-r--r--llvm/include/llvm/Support/VirtualFileSystem.h35
-rw-r--r--llvm/include/llvm/Support/Windows/WindowsSupport.h4
-rw-r--r--llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h8
-rw-r--r--llvm/include/llvm/Support/X86TargetParser.def135
-rw-r--r--llvm/include/llvm/Support/X86TargetParser.h4
-rw-r--r--llvm/include/llvm/Support/YAMLTraits.h2
-rw-r--r--llvm/include/llvm/Support/raw_ostream.h8
-rw-r--r--llvm/include/llvm/TableGen/DirectiveEmitter.h2
-rw-r--r--llvm/include/llvm/TableGen/Error.h22
-rw-r--r--llvm/include/llvm/TableGen/Record.h51
-rw-r--r--llvm/include/llvm/Target/GenericOpcodes.td12
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td101
-rw-r--r--llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td2
-rw-r--r--llvm/include/llvm/Target/Target.td22
-rw-r--r--llvm/include/llvm/Target/TargetLoweringObjectFile.h9
-rw-r--r--llvm/include/llvm/Target/TargetMachine.h25
-rw-r--r--llvm/include/llvm/Target/TargetOptions.h34
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td28
-rw-r--r--llvm/include/llvm/TextAPI/Architecture.h6
-rw-r--r--llvm/include/llvm/TextAPI/ArchitectureSet.h6
-rw-r--r--llvm/include/llvm/TextAPI/InterfaceFile.h8
-rw-r--r--llvm/include/llvm/TextAPI/PackedVersion.h6
-rw-r--r--llvm/include/llvm/TextAPI/Platform.h6
-rw-r--r--llvm/include/llvm/TextAPI/Symbol.h6
-rw-r--r--llvm/include/llvm/TextAPI/Target.h6
-rw-r--r--llvm/include/llvm/TextAPI/TextAPIReader.h6
-rw-r--r--llvm/include/llvm/TextAPI/TextAPIWriter.h6
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h214
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionAttrs.h8
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionImport.h29
-rw-r--r--llvm/include/llvm/Transforms/IPO/IROutliner.h36
-rw-r--r--llvm/include/llvm/Transforms/IPO/Inliner.h16
-rw-r--r--llvm/include/llvm/Transforms/IPO/LoopExtractor.h2
-rw-r--r--llvm/include/llvm/Transforms/IPO/ModuleInliner.h51
-rw-r--r--llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h13
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleContextTracker.h55
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombine.h10
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombiner.h50
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation.h6
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h48
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h79
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h7
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h30
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h7
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h7
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h17
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h8
-rw-r--r--llvm/include/llvm/Transforms/Scalar/EarlyCSE.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/GVN.h15
-rw-r--r--llvm/include/llvm/Transforms/Scalar/JumpThreading.h8
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopPassManager.h94
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h9
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SROA.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h12
-rw-r--r--llvm/include/llvm/Transforms/Utils/AddDiscriminators.h1
-rw-r--r--llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h30
-rw-r--r--llvm/include/llvm/Transforms/Utils/BuildLibCalls.h10
-rw-r--r--llvm/include/llvm/Transforms/Utils/Cloning.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/CodeExtractor.h18
-rw-r--r--llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h16
-rw-r--r--llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/GlobalStatus.h15
-rw-r--r--llvm/include/llvm/Transforms/Utils/InstructionWorklist.h (renamed from llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h)29
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h40
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopPeel.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h56
-rw-r--r--llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h7
-rw-r--r--llvm/include/llvm/Transforms/Utils/PredicateInfo.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h70
-rw-r--r--llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h9
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h2
-rw-r--r--llvm/include/llvm/Transforms/Utils/UnrollLoop.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/ValueMapper.h11
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h2
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h2
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h4
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/VectorCombine.h10
-rw-r--r--llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h3
-rw-r--r--llvm/include/llvm/module.modulemap18
-rw-r--r--llvm/lib/Analysis/AliasAnalysis.cpp56
-rw-r--r--llvm/lib/Analysis/AssumeBundleQueries.cpp4
-rw-r--r--llvm/lib/Analysis/AssumptionCache.cpp30
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp622
-rw-r--r--llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp2
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp2
-rw-r--r--llvm/lib/Analysis/CGSCCPassManager.cpp18
-rw-r--r--llvm/lib/Analysis/CaptureTracking.cpp93
-rw-r--r--llvm/lib/Analysis/CmpInstAnalysis.cpp8
-rw-r--r--llvm/lib/Analysis/CodeMetrics.cpp5
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp377
-rw-r--r--llvm/lib/Analysis/CostModel.cpp22
-rw-r--r--llvm/lib/Analysis/Delinearization.cpp489
-rw-r--r--llvm/lib/Analysis/DemandedBits.cpp15
-rw-r--r--llvm/lib/Analysis/DependenceAnalysis.cpp37
-rw-r--r--llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp11
-rw-r--r--llvm/lib/Analysis/HeatUtils.cpp7
-rw-r--r--llvm/lib/Analysis/IRSimilarityIdentifier.cpp262
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp205
-rw-r--r--llvm/lib/Analysis/IVUsers.cpp62
-rw-r--r--llvm/lib/Analysis/InlineAdvisor.cpp124
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp168
-rw-r--r--llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp7
-rw-r--r--llvm/lib/Analysis/InstructionPrecedenceTracking.cpp14
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp299
-rw-r--r--llvm/lib/Analysis/LazyCallGraph.cpp26
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp113
-rw-r--r--llvm/lib/Analysis/Lint.cpp6
-rw-r--r--llvm/lib/Analysis/Loads.cpp13
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp95
-rw-r--r--llvm/lib/Analysis/LoopCacheAnalysis.cpp18
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp12
-rw-r--r--llvm/lib/Analysis/LoopNestAnalysis.cpp173
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp3
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp9
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp35
-rw-r--r--llvm/lib/Analysis/MemorySSA.cpp207
-rw-r--r--llvm/lib/Analysis/MemorySSAUpdater.cpp53
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp46
-rw-r--r--llvm/lib/Analysis/ObjCARCInstKind.cpp5
-rw-r--r--llvm/lib/Analysis/OverflowInstAnalysis.cpp2
-rw-r--r--llvm/lib/Analysis/PHITransAddr.cpp4
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp16
-rw-r--r--llvm/lib/Analysis/ReplayInlineAdvisor.cpp106
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp1718
-rw-r--r--llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp12
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp18
-rw-r--r--llvm/lib/Analysis/StackSafetyAnalysis.cpp118
-rw-r--r--llvm/lib/Analysis/TFUtils.cpp83
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp300
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp38
-rw-r--r--llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp30
-rw-r--r--llvm/lib/Analysis/TypeMetadataUtils.cpp63
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp248
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp115
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp14
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp315
-rw-r--r--llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp7
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp62
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp277
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp215
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp77
-rw-r--r--llvm/lib/Bitcode/Writer/ValueEnumerator.cpp7
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp34
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/ARMException.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp129
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp172
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp179
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h13
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp10
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp27
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp77
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp80
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h32
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp46
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WasmException.cpp29
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.cpp26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.h4
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp27
-rw-r--r--llvm/lib/CodeGen/BasicBlockSections.cpp18
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp19
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp42
-rw-r--r--llvm/lib/CodeGen/BreakFalseDeps.cpp2
-rw-r--r--llvm/lib/CodeGen/CodeGenCommonISel.cpp169
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp92
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp34
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp25
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp20
-rw-r--r--llvm/lib/CodeGen/DwarfEHPrepare.cpp74
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp4
-rw-r--r--llvm/lib/CodeGen/ExpandPostRAPseudos.cpp7
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp138
-rw-r--r--llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp7
-rw-r--r--llvm/lib/CodeGen/GCMetadata.cpp25
-rw-r--r--llvm/lib/CodeGen/GCRootLowering.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp26
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp54
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Combiner.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp1792
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp27
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp1
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp366
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp16
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp8
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp18
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp851
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp669
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp3
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp22
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp403
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp33
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp98
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp2
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp14
-rw-r--r--llvm/lib/CodeGen/IntrinsicLowering.cpp3
-rw-r--r--llvm/lib/CodeGen/LLVMTargetMachine.cpp2
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp2913
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h1051
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp58
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h5
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp247
-rw-r--r--llvm/lib/CodeGen/LiveDebugVariables.cpp19
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp43
-rw-r--r--llvm/lib/CodeGen/LiveIntervalUnion.cpp23
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp60
-rw-r--r--llvm/lib/CodeGen/LivePhysRegs.cpp22
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp32
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp86
-rw-r--r--llvm/lib/CodeGen/LoopTraversal.cpp3
-rw-r--r--llvm/lib/CodeGen/LowLevelType.cpp10
-rw-r--r--llvm/lib/CodeGen/MIRCanonicalizerPass.cpp8
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp22
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp3
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp2
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp343
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp34
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp90
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp76
-rw-r--r--llvm/lib/CodeGen/MachineDominators.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp47
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp11
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp38
-rw-r--r--llvm/lib/CodeGen/MachineLoopInfo.cpp8
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp5
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp5
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp53
-rw-r--r--llvm/lib/CodeGen/MachineSizeOpts.cpp6
-rw-r--r--llvm/lib/CodeGen/MachineStripDebug.cpp21
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp167
-rw-r--r--llvm/lib/CodeGen/MacroFusion.cpp6
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp101
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp47
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp2
-rw-r--r--llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp24
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp23
-rw-r--r--llvm/lib/CodeGen/PseudoProbeInserter.cpp9
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp2
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp47
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp4
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h90
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp3
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp132
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp15
-rw-r--r--llvm/lib/CodeGen/RegisterScavenging.cpp15
-rw-r--r--llvm/lib/CodeGen/ReplaceWithVeclib.cpp4
-rw-r--r--llvm/lib/CodeGen/SafeStack.cpp28
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.cpp4
-rw-r--r--llvm/lib/CodeGen/SafeStackLayout.h15
-rw-r--r--llvm/lib/CodeGen/ScheduleDAG.cpp3
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp959
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp28
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp23
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp52
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp17
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp505
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h32
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp80
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp422
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp924
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp426
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h203
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp35
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp182
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp481
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp48
-rw-r--r--llvm/lib/CodeGen/SplitKit.h10
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp2
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp30
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--llvm/lib/CodeGen/SwitchLoweringUtils.cpp2
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp148
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp20
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp72
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp47
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp130
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp256
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp152
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/VirtRegMap.cpp25
-rw-r--r--llvm/lib/CodeGen/WasmEHPrepare.cpp4
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp1
-rw-r--r--llvm/lib/DWARFLinker/DWARFStreamer.cpp2
-rw-r--r--llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp2
-rw-r--r--llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp42
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp4
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp45
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp28
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp11
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp11
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp33
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp660
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp24
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp18
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp10
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp216
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp105
-rw-r--r--llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp30
-rw-r--r--llvm/lib/DebugInfo/GSYM/FileWriter.cpp7
-rw-r--r--llvm/lib/DebugInfo/GSYM/Range.cpp7
-rw-r--r--llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp90
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp2
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Symbolize.cpp24
-rw-r--r--llvm/lib/Demangle/DLangDemangle.cpp45
-rw-r--r--llvm/lib/Demangle/Demangle.cpp51
-rw-r--r--llvm/lib/Demangle/ItaniumDemangle.cpp70
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp100
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangleNodes.cpp408
-rw-r--r--llvm/lib/Demangle/RustDemangle.cpp182
-rw-r--r--llvm/lib/ExecutionEngine/ExecutionEngine.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h104
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp185
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp255
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp575
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLink.cpp14
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp290
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h65
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp585
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO.cpp7
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp180
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h47
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp68
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp214
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp33
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/aarch64.cpp30
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/x86_64.cpp137
-rw-r--r--llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp44
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp135
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp450
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp818
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp10
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp107
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp184
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp317
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp146
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp72
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp77
-rw-r--r--llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LLJIT.cpp105
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp82
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp398
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Mangling.cpp173
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp58
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp55
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp47
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp58
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp250
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp406
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp36
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp84
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h36
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp71
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp129
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp261
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp293
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp48
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp43
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp32
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp372
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h12
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h3
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h1
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h3
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h1
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h1
-rw-r--r--llvm/lib/ExecutionEngine/TargetSelect.cpp2
-rw-r--r--llvm/lib/FileCheck/FileCheck.cpp12
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp714
-rw-r--r--llvm/lib/IR/AbstractCallSite.cpp2
-rw-r--r--llvm/lib/IR/AsmWriter.cpp644
-rw-r--r--llvm/lib/IR/Assumptions.cpp85
-rw-r--r--llvm/lib/IR/Attributes.cpp350
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp106
-rw-r--r--llvm/lib/IR/BasicBlock.cpp6
-rw-r--r--llvm/lib/IR/ConstantFold.cpp281
-rw-r--r--llvm/lib/IR/ConstantRange.cpp208
-rw-r--r--llvm/lib/IR/Constants.cpp137
-rw-r--r--llvm/lib/IR/Core.cpp38
-rw-r--r--llvm/lib/IR/DIBuilder.cpp200
-rw-r--r--llvm/lib/IR/DataLayout.cpp111
-rw-r--r--llvm/lib/IR/DebugInfo.cpp76
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp229
-rw-r--r--llvm/lib/IR/DiagnosticHandler.cpp2
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp40
-rw-r--r--llvm/lib/IR/DiagnosticPrinter.cpp2
-rw-r--r--llvm/lib/IR/FPEnv.cpp10
-rw-r--r--llvm/lib/IR/Function.cpp229
-rw-r--r--llvm/lib/IR/GCStrategy.cpp18
-rw-r--r--llvm/lib/IR/Globals.cpp115
-rw-r--r--llvm/lib/IR/IRBuilder.cpp19
-rw-r--r--llvm/lib/IR/Instruction.cpp20
-rw-r--r--llvm/lib/IR/Instructions.cpp248
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp63
-rw-r--r--llvm/lib/IR/LLVMContext.cpp8
-rw-r--r--llvm/lib/IR/LLVMContextImpl.cpp25
-rw-r--r--llvm/lib/IR/LLVMContextImpl.h205
-rw-r--r--llvm/lib/IR/LegacyPassManager.cpp2
-rw-r--r--llvm/lib/IR/Mangler.cpp7
-rw-r--r--llvm/lib/IR/Metadata.cpp11
-rw-r--r--llvm/lib/IR/Module.cpp4
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp21
-rw-r--r--llvm/lib/IR/Operator.cpp35
-rw-r--r--llvm/lib/IR/OptBisect.cpp18
-rw-r--r--llvm/lib/IR/PassManager.cpp15
-rw-r--r--llvm/lib/IR/ProfileSummary.cpp4
-rw-r--r--llvm/lib/IR/PseudoProbe.cpp8
-rw-r--r--llvm/lib/IR/ReplaceConstant.cpp52
-rw-r--r--llvm/lib/IR/Statepoint.cpp6
-rw-r--r--llvm/lib/IR/Type.cpp48
-rw-r--r--llvm/lib/IR/TypeFinder.cpp8
-rw-r--r--llvm/lib/IR/User.cpp2
-rw-r--r--llvm/lib/IR/Value.cpp27
-rw-r--r--llvm/lib/IR/Verifier.cpp328
-rw-r--r--llvm/lib/InterfaceStub/ELFObjHandler.cpp2
-rw-r--r--llvm/lib/InterfaceStub/IFSHandler.cpp12
-rw-r--r--llvm/lib/InterfaceStub/IFSStub.cpp4
-rw-r--r--llvm/lib/LTO/LTO.cpp56
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp47
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp9
-rw-r--r--llvm/lib/LTO/LTOModule.cpp15
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp51
-rw-r--r--llvm/lib/Linker/IRMover.cpp125
-rw-r--r--llvm/lib/Linker/LinkModules.cpp100
-rw-r--r--llvm/lib/MC/ConstantPools.cpp2
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp3
-rw-r--r--llvm/lib/MC/MCAsmInfoGOFF.cpp27
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp5
-rw-r--r--llvm/lib/MC/MCAssembler.cpp17
-rw-r--r--llvm/lib/MC/MCDisassembler/Disassembler.cpp2
-rw-r--r--llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp2
-rw-r--r--llvm/lib/MC/MCDwarf.cpp94
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp11
-rw-r--r--llvm/lib/MC/MCExpr.cpp4
-rw-r--r--llvm/lib/MC/MCFragment.cpp8
-rw-r--r--llvm/lib/MC/MCInstrAnalysis.cpp12
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp15
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp10
-rw-r--r--llvm/lib/MC/MCParser/AsmLexer.cpp3
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp18
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp25
-rw-r--r--llvm/lib/MC/MCParser/GOFFAsmParser.cpp48
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp7
-rw-r--r--llvm/lib/MC/MCPseudoProbe.cpp392
-rw-r--r--llvm/lib/MC/MCSectionXCOFF.cpp6
-rw-r--r--llvm/lib/MC/MCStreamer.cpp7
-rw-r--r--llvm/lib/MC/MCWasmStreamer.cpp87
-rw-r--r--llvm/lib/MC/MCWin64EH.cpp6
-rw-r--r--llvm/lib/MC/MCWinCOFFStreamer.cpp9
-rw-r--r--llvm/lib/MC/MCXCOFFStreamer.cpp2
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp2
-rw-r--r--llvm/lib/MC/TargetRegistry.cpp (renamed from llvm/lib/Support/TargetRegistry.cpp)2
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp79
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp339
-rw-r--r--llvm/lib/MCA/Context.cpp5
-rw-r--r--llvm/lib/MCA/CustomBehaviour.cpp18
-rw-r--r--llvm/lib/MCA/HardwareUnits/RegisterFile.cpp13
-rw-r--r--llvm/lib/MCA/InstrBuilder.cpp2
-rw-r--r--llvm/lib/MCA/Stages/InOrderIssueStage.cpp28
-rw-r--r--llvm/lib/MCA/Stages/InstructionTables.cpp2
-rw-r--r--llvm/lib/MCA/View.cpp (renamed from llvm/tools/llvm-mca/Views/View.cpp)2
-rw-r--r--llvm/lib/Object/Archive.cpp2
-rw-r--r--llvm/lib/Object/COFFModuleDefinition.cpp5
-rw-r--r--llvm/lib/Object/ELF.cpp71
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp82
-rw-r--r--llvm/lib/Object/IRObjectFile.cpp2
-rw-r--r--llvm/lib/Object/IRSymtab.cpp29
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp46
-rw-r--r--llvm/lib/Object/ModuleSymbolTable.cpp7
-rw-r--r--llvm/lib/Object/Object.cpp9
-rw-r--r--llvm/lib/Object/ObjectFile.cpp15
-rw-r--r--llvm/lib/Object/RelocationResolver.cpp2
-rw-r--r--llvm/lib/Object/WasmObjectFile.cpp119
-rw-r--r--llvm/lib/Object/XCOFFObjectFile.cpp502
-rw-r--r--llvm/lib/ObjectYAML/COFFEmitter.cpp22
-rw-r--r--llvm/lib/ObjectYAML/COFFYAML.cpp36
-rw-r--r--llvm/lib/ObjectYAML/ELFEmitter.cpp9
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp19
-rw-r--r--llvm/lib/ObjectYAML/MachOEmitter.cpp35
-rw-r--r--llvm/lib/ObjectYAML/MachOYAML.cpp27
-rw-r--r--llvm/lib/ObjectYAML/WasmEmitter.cpp43
-rw-r--r--llvm/lib/ObjectYAML/WasmYAML.cpp28
-rw-r--r--llvm/lib/ObjectYAML/XCOFFEmitter.cpp411
-rw-r--r--llvm/lib/ObjectYAML/XCOFFYAML.cpp43
-rw-r--r--llvm/lib/Option/OptTable.cpp64
-rw-r--r--llvm/lib/Option/Option.cpp47
-rw-r--r--llvm/lib/Passes/OptimizationLevel.cpp30
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp1848
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp1798
-rw-r--r--llvm/lib/Passes/PassRegistry.def113
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp1218
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp3
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp109
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp107
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp5
-rw-r--r--llvm/lib/ProfileData/ProfileSummaryBuilder.cpp18
-rw-r--r--llvm/lib/ProfileData/SampleProf.cpp73
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp262
-rw-r--r--llvm/lib/ProfileData/SampleProfWriter.cpp235
-rw-r--r--llvm/lib/Support/AArch64TargetParser.cpp10
-rw-r--r--llvm/lib/Support/APFixedPoint.cpp4
-rw-r--r--llvm/lib/Support/APFloat.cpp38
-rw-r--r--llvm/lib/Support/APInt.cpp455
-rw-r--r--llvm/lib/Support/ARMTargetParser.cpp14
-rw-r--r--llvm/lib/Support/BinaryStreamReader.cpp16
-rw-r--r--llvm/lib/Support/BinaryStreamRef.cpp31
-rw-r--r--llvm/lib/Support/BinaryStreamWriter.cpp6
-rw-r--r--llvm/lib/Support/Caching.cpp (renamed from llvm/lib/LTO/Caching.cpp)56
-rw-r--r--llvm/lib/Support/CommandLine.cpp13
-rw-r--r--llvm/lib/Support/CrashRecoveryContext.cpp3
-rw-r--r--llvm/lib/Support/DebugOptions.h2
-rw-r--r--llvm/lib/Support/DivisionByConstantInfo.cpp107
-rw-r--r--llvm/lib/Support/Error.cpp11
-rw-r--r--llvm/lib/Support/ErrorHandling.cpp17
-rw-r--r--llvm/lib/Support/ExtensibleRTTI.cpp7
-rw-r--r--llvm/lib/Support/FileUtilities.cpp6
-rw-r--r--llvm/lib/Support/GraphWriter.cpp10
-rw-r--r--llvm/lib/Support/Host.cpp32
-rw-r--r--llvm/lib/Support/JSON.cpp5
-rw-r--r--llvm/lib/Support/KnownBits.cpp15
-rw-r--r--llvm/lib/Support/LockFileManager.cpp2
-rw-r--r--llvm/lib/Support/MD5.cpp85
-rw-r--r--llvm/lib/Support/MSP430AttributeParser.cpp53
-rw-r--r--llvm/lib/Support/MSP430Attributes.cpp22
-rw-r--r--llvm/lib/Support/Parallel.cpp7
-rw-r--r--llvm/lib/Support/Path.cpp103
-rw-r--r--llvm/lib/Support/Process.cpp3
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp718
-rw-r--r--llvm/lib/Support/Signposts.cpp32
-rw-r--r--llvm/lib/Support/SmallVector.cpp19
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp6
-rw-r--r--llvm/lib/Support/TimeProfiler.cpp8
-rw-r--r--llvm/lib/Support/Timer.cpp5
-rw-r--r--llvm/lib/Support/Triple.cpp92
-rw-r--r--llvm/lib/Support/Unix/Memory.inc8
-rw-r--r--llvm/lib/Support/Unix/Path.inc61
-rw-r--r--llvm/lib/Support/Unix/Process.inc3
-rw-r--r--llvm/lib/Support/Unix/Program.inc3
-rw-r--r--llvm/lib/Support/Unix/Unix.h5
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp175
-rw-r--r--llvm/lib/Support/Windows/Path.inc38
-rw-r--r--llvm/lib/Support/Windows/Process.inc4
-rw-r--r--llvm/lib/Support/Windows/Program.inc1
-rw-r--r--llvm/lib/Support/X86TargetParser.cpp82
-rw-r--r--llvm/lib/Support/raw_ostream.cpp5
-rw-r--r--llvm/lib/TableGen/Main.cpp6
-rw-r--r--llvm/lib/TableGen/Record.cpp299
-rw-r--r--llvm/lib/TableGen/TGParser.cpp13
-rw-r--r--llvm/lib/TableGen/TGParser.h9
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td918
-rw-r--r--llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp11
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td102
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp17
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp86
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp104
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp1254
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h37
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td349
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp392
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h6
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td227
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp293
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td616
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA53.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA55.td7
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA57.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA64FX.td10
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedCyclone.td66
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM3.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM4.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM5.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedFalkor.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedKryo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedTSV110.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64Schedule.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp33
-rw-r--r--llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp36
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h66
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td75
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp48
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp479
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h36
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp304
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp373
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp67
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp11
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp638
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp381
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp31
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp8
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp121
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp57
-rw-r--r--llvm/lib/Target/AArch64/GISel/select-saddo.mir158
-rw-r--r--llvm/lib/Target/AArch64/GISel/select-ssubo.mir158
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp13
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp46
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp13
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp6
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h1
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td302
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td178
-rw-r--r--llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp168
-rw-r--r--llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h74
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h53
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp301
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp90
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp402
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp72
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td39
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp93
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp382
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h26
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp95
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUFeatures.td1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp468
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h256
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp393
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h22
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp76
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td40
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp261
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp45
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp75
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp111
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h69
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h6
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp95
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp195
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp351
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp39
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp70
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp313
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h72
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp132
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h70
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp40
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp48
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td51
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td23
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp93
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h5
-rw-r--r--llvm/lib/Target/AMDGPU/EvergreenInstructions.td68
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td46
-rw-r--r--llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp112
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp69
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.h2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h26
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp361
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h103
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h3
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp207
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h30
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h21
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h5
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp224
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h48
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h44
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td40
-rw-r--r--llvm/lib/Target/AMDGPU/R600.h50
-rw-r--r--llvm/lib/Target/AMDGPU/R600.td1
-rw-r--r--llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp184
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.h6
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.td23
-rw-r--r--llvm/lib/Target/AMDGPU/R600Instructions.td22
-rw-r--r--llvm/lib/Target/AMDGPU/R600MCInstLower.cpp73
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/R600MachineScheduler.h2
-rw-r--r--llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600Packetizer.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600Processors.td4
-rw-r--r--llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.cpp46
-rw-r--r--llvm/lib/Target/AMDGPU/R600Subtarget.h1
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetMachine.cpp143
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetMachine.h48
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp142
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h69
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h130
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp35
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp102
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp306
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp716
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h40
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td61
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td154
-rw-r--r--llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp81
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp77
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp152
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h7
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.h11
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegister.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIPostRABundler.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp264
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h42
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td102
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td10
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td41
-rw-r--r--llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp24
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h10
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h3
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td19
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td60
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td66
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td14
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td12
-rw-r--r--llvm/lib/Target/ARC/ARCAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/ARC/ARCExpandPseudos.cpp79
-rw-r--r--llvm/lib/Target/ARC/ARCISelLowering.cpp48
-rw-r--r--llvm/lib/Target/ARC/ARCISelLowering.h3
-rw-r--r--llvm/lib/Target/ARC/ARCInstrFormats.td96
-rw-r--r--llvm/lib/Target/ARC/ARCInstrInfo.cpp38
-rw-r--r--llvm/lib/Target/ARC/ARCInstrInfo.h8
-rw-r--r--llvm/lib/Target/ARC/ARCInstrInfo.td86
-rw-r--r--llvm/lib/Target/ARC/ARCOptAddrMode.cpp81
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.cpp20
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.h5
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.td80
-rw-r--r--llvm/lib/Target/ARC/ARCSubtarget.cpp6
-rw-r--r--llvm/lib/Target/ARC/ARCSubtarget.h5
-rw-r--r--llvm/lib/Target/ARC/ARCTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp33
-rw-r--r--llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/A15SDOptimizer.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARM.td73
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp208
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h11
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMBlockPlacement.cpp108
-rw-r--r--llvm/lib/Target/ARM/ARMCallLowering.cpp19
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp25
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp124
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp41
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp1268
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h5
-rw-r--r--llvm/lib/Target/ARM/ARMInstrCDE.td12
-rw-r--r--llvm/lib/Target/ARM/ARMInstrFormats.td10
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td27
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td965
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td58
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td14
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td22
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp133
-rw-r--r--llvm/lib/Target/ARM/ARMMCInstLower.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterInfo.td8
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h24
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMTargetObjectFile.cpp10
-rw-r--r--llvm/lib/Target/ARM/ARMTargetObjectFile.h5
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp104
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h14
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp29
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp33
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp19
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h21
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h6
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h8
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp5
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp1
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp195
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp4
-rw-r--r--llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp83
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp22
-rw-r--r--llvm/lib/Target/ARM/MVETailPredication.cpp20
-rw-r--r--llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp10
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.cpp4
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp22
-rw-r--r--llvm/lib/Target/AVR/AVR.h4
-rw-r--r--llvm/lib/Target/AVR/AVR.td11
-rw-r--r--llvm/lib/Target/AVR/AVRAsmPrinter.cpp58
-rw-r--r--llvm/lib/Target/AVR/AVRCallingConv.td10
-rw-r--r--llvm/lib/Target/AVR/AVRDevices.td794
-rw-r--r--llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp574
-rw-r--r--llvm/lib/Target/AVR/AVRFrameLowering.cpp32
-rw-r--r--llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp57
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.cpp90
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.h7
-rw-r--r--llvm/lib/Target/AVR/AVRInstrFormats.td301
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.cpp64
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.h10
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.td2923
-rw-r--r--llvm/lib/Target/AVR/AVRMCInstLower.cpp8
-rw-r--r--llvm/lib/Target/AVR/AVRMCInstLower.h1
-rw-r--r--llvm/lib/Target/AVR/AVRMachineFunctionInfo.h12
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.cpp31
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.h9
-rw-r--r--llvm/lib/Target/AVR/AVRRegisterInfo.td233
-rw-r--r--llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp30
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.h17
-rw-r--r--llvm/lib/Target/AVR/AVRTargetMachine.cpp5
-rw-r--r--llvm/lib/Target/AVR/AVRTargetMachine.h7
-rw-r--r--llvm/lib/Target/AVR/AVRTargetObjectFile.cpp7
-rw-r--r--llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp46
-rw-r--r--llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp137
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp34
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h4
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp12
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h4
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp4
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h1
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp26
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h3
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp11
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp10
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp19
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h2
-rw-r--r--llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp5
-rw-r--r--llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPF.h9
-rw-r--r--llvm/lib/Target/BPF/BPFAdjustOpt.cpp62
-rw-r--r--llvm/lib/Target/BPF/BPFAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp45
-rw-r--r--llvm/lib/Target/BPF/BPFIRPeephole.cpp118
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp24
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.h4
-rw-r--r--llvm/lib/Target/BPF/BPFMIChecking.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp12
-rw-r--r--llvm/lib/Target/BPF/BPFRegisterInfo.td2
-rw-r--r--llvm/lib/Target/BPF/BPFSubtarget.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp11
-rw-r--r--llvm/lib/Target/BPF/BPFTargetTransformInfo.h17
-rw-r--r--llvm/lib/Target/BPF/BTF.def2
-rw-r--r--llvm/lib/Target/BPF/BTF.h2
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp169
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.h28
-rw-r--r--llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp2
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp6
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp873
-rw-r--r--llvm/lib/Target/CSKY/CSKY.h (renamed from llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h)17
-rw-r--r--llvm/lib/Target/CSKY/CSKY.td87
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp58
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.h40
-rw-r--r--llvm/lib/Target/CSKY/CSKYCallingConv.h63
-rw-r--r--llvm/lib/Target/CSKY/CSKYCallingConv.td82
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.cpp57
-rw-r--r--llvm/lib/Target/CSKY/CSKYFrameLowering.h38
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp75
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp346
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.h69
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormats.td221
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td219
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.cpp25
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.h36
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.td644
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td452
-rw-r--r--llvm/lib/Target/CSKY/CSKYMCInstLower.cpp117
-rw-r--r--llvm/lib/Target/CSKY/CSKYMCInstLower.h35
-rw-r--r--llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h62
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp95
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.h45
-rw-r--r--llvm/lib/Target/CSKY/CSKYRegisterInfo.td15
-rw-r--r--llvm/lib/Target/CSKY/CSKYSubtarget.cpp74
-rw-r--r--llvm/lib/Target/CSKY/CSKYSubtarget.h120
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetMachine.cpp41
-rw-r--r--llvm/lib/Target/CSKY/CSKYTargetMachine.h8
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp49
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h3
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h70
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h27
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp102
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h17
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp98
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h47
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp35
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h9
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h3
-rw-r--r--llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp81
-rw-r--r--llvm/lib/Target/Hexagon/HexagonArch.h6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp35
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp30
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td24
-rw-r--r--llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td38
-rw-r--r--llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp18
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMux.cpp28
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp20
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp59
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrFormats.td3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPseudo.td8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.td187
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetStreamer.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp24
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp11
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp6
-rw-r--r--llvm/lib/Target/Lanai/LanaiAluCode.h2
-rw-r--r--llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.cpp16
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.h5
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.cpp19
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.h6
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.td6
-rw-r--r--llvm/lib/Target/Lanai/LanaiTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h13
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp6
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp5
-rw-r--r--llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp232
-rw-r--r--llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp5
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp (renamed from llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp)4
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.h (renamed from llvm/lib/Target/M68k/GlSel/M68kCallLowering.h)2
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp (renamed from llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp)0
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp (renamed from llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp)0
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h (renamed from llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h)0
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp105
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h (renamed from llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h)6
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td (renamed from llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td)2
-rw-r--r--llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp27
-rw-r--r--llvm/lib/Target/M68k/M68k.td2
-rw-r--r--llvm/lib/Target/M68k/M68kAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kCallingConv.h22
-rw-r--r--llvm/lib/Target/M68k/M68kFrameLowering.cpp8
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.cpp17
-rw-r--r--llvm/lib/Target/M68k/M68kInstrArithmetic.td95
-rw-r--r--llvm/lib/Target/M68k/M68kInstrCompiler.td10
-rw-r--r--llvm/lib/Target/M68k/M68kInstrFormats.td2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.h2
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.td74
-rw-r--r--llvm/lib/Target/M68k/M68kSubtarget.cpp8
-rw-r--r--llvm/lib/Target/M68k/M68kTargetMachine.cpp12
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp8
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp2
-rw-r--r--llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp6
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp19
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430FrameLowering.cpp5
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MSP430InstrInfo.cpp5
-rw-r--r--llvm/lib/Target/MSP430/MSP430Subtarget.cpp2
-rw-r--r--llvm/lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp10
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td2
-rw-r--r--llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td30
-rw-r--r--llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td73
-rw-r--r--llvm/lib/Target/Mips/MicroMipsInstrInfo.td61
-rw-r--r--llvm/lib/Target/Mips/Mips16HardFloat.cpp13
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.td12
-rw-r--r--llvm/lib/Target/Mips/Mips32r6InstrInfo.td7
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsCallLowering.cpp31
-rw-r--r--llvm/lib/Target/Mips/MipsDSPInstrInfo.td45
-rw-r--r--llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp12
-rw-r--r--llvm/lib/Target/Mips/MipsEVAInstrInfo.td9
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp7
-rw-r--r--llvm/lib/Target/Mips/MipsInstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsMSAInstrInfo.td50
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp9
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.cpp4
-rw-r--r--llvm/lib/Target/Mips/MipsSEInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsSubtarget.cpp10
-rw-r--r--llvm/lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.td6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp168
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp11
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp23
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp666
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp25
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td10
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td6101
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXPeephole.cpp25
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp22
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td12
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp1677
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp27
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.h4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h11
-rw-r--r--llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h7
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td2075
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td13
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td30
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp13
-rw-r--r--llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp30
-rw-r--r--llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp10
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp19
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp44
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.h22
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp157
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp610
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h26
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td123
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrHTM.td8
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp33
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h5
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td95
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrPrefix.td446
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td147
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp1066
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp80
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.cpp19
-rw-r--r--llvm/lib/Target/PowerPC/PPCMacroFusion.def37
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp25
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCSchedPredicates.td294
-rw-r--r--llvm/lib/Target/PowerPC/PPCSchedule.td3
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP10.td416
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP9.td2
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.h2
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h9
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXCopy.cpp7
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp309
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp15
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h3
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp14
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h37
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp22
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h27
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp13
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp149
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp66
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td132
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp41
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp475
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp488
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h31
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp1906
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h58
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp424
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td142
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp389
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h35
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td170
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td7
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td26
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td1199
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td714
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td108
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td220
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td (renamed from llvm/lib/Target/RISCV/RISCVInstrInfoB.td)145
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td40
-rw-r--r--llvm/lib/Target/RISCV/RISCVMCInstLower.cpp23
-rw-r--r--llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td110
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td820
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp33
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h18
-rw-r--r--llvm/lib/Target/RISCV/RISCVSystemOperands.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp11
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h53
-rw-r--r--llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp2
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.cpp14
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.td34
-rw-r--r--llvm/lib/Target/Sparc/SparcSubtarget.cpp2
-rw-r--r--llvm/lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp51
-rw-r--r--llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp24
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h5
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp35
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp52
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h10
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp41
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp64
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp33
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.h33
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.h71
-rw-r--r--llvm/lib/Target/SystemZ/SystemZCallingConv.td45
-rw-r--r--llvm/lib/Target/SystemZ/SystemZElimCompare.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp341
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.h56
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp263
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h13
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFP.td7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFormats.td49
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp39
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.h11
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td36
-rw-r--r--llvm/lib/Target/SystemZ/SystemZLongBranch.cpp18
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp19
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td21
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp8
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.h21
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp110
-rw-r--r--llvm/lib/Target/SystemZ/SystemZShortenInst.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.cpp4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.h19
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp11
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetStreamer.h55
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h9
-rw-r--r--llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/TargetMachine.cpp64
-rw-r--r--llvm/lib/Target/TargetMachineC.cpp2
-rw-r--r--llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp2
-rw-r--r--llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp2
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp5
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp1
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp2
-rw-r--r--llvm/lib/Target/VE/VEAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.cpp12
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.cpp6
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.td52
-rw-r--r--llvm/lib/Target/VE/VESubtarget.cpp2
-rw-r--r--llvm/lib/Target/VE/VETargetMachine.cpp2
-rw-r--r--llvm/lib/Target/VE/VVPInstrPatternsVec.td7
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp52
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h18
-rw-r--r--llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp7
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h3
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/README.txt8
-rw-r--r--llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h23
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.h7
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssembly.td3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp50
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp55
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp14
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp50
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp54
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISD.def9
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp85
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp562
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h45
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td38
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td15
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td57
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td8
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td226
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td39
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp15
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp905
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp84
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp48
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp51
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp13
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp100
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp231
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h3
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp38
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h13
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp48
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp36
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h13
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp3
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp18
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp48
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp17
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp33
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp1
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp2
-rw-r--r--llvm/lib/Target/X86/X86.h4
-rw-r--r--llvm/lib/Target/X86/X86.td743
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.h19
-rw-r--r--llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp20
-rw-r--r--llvm/lib/Target/X86/X86CallLowering.cpp4
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.td88
-rw-r--r--llvm/lib/Target/X86/X86CmovConversion.cpp8
-rw-r--r--llvm/lib/Target/X86/X86DynAllocaExpander.cpp (renamed from llvm/lib/Target/X86/X86WinAllocaExpander.cpp)58
-rw-r--r--llvm/lib/Target/X86/X86ExpandPseudo.cpp50
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp32
-rw-r--r--llvm/lib/Target/X86/X86FastTileConfig.cpp4
-rw-r--r--llvm/lib/Target/X86/X86FixupLEAs.cpp3
-rw-r--r--llvm/lib/Target/X86/X86FlagsCopyLowering.cpp6
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp41
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp131
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp271
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp3561
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h83
-rw-r--r--llvm/lib/Target/X86/X86IndirectBranchTracking.cpp2
-rw-r--r--llvm/lib/Target/X86/X86IndirectThunks.cpp2
-rw-r--r--llvm/lib/Target/X86/X86InsertWait.cpp21
-rw-r--r--llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td2246
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td49
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td48
-rw-r--r--llvm/lib/Target/X86/X86InstrControl.td36
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA.td46
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.cpp46
-rw-r--r--llvm/lib/Target/X86/X86InstrFPStack.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp475
-rw-r--r--llvm/lib/Target/X86/X86InstrFormats.td74
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td103
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp754
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h40
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td16
-rw-r--r--llvm/lib/Target/X86/X86InstrKL.td7
-rw-r--r--llvm/lib/Target/X86/X86InstrMPX.td77
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td239
-rw-r--r--llvm/lib/Target/X86/X86InstrSystem.td15
-rw-r--r--llvm/lib/Target/X86/X86InstrVecCompiler.td96
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h231
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp3
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp2
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp15
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp503
-rw-r--r--llvm/lib/Target/X86/X86LowerTileCopy.cpp4
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp265
-rw-r--r--llvm/lib/Target/X86/X86MachineFunctionInfo.h15
-rw-r--r--llvm/lib/Target/X86/X86OptimizeLEAs.cpp9
-rw-r--r--llvm/lib/Target/X86/X86PadShortFunction.cpp9
-rw-r--r--llvm/lib/Target/X86/X86PfmCounters.td20
-rw-r--r--llvm/lib/Target/X86/X86PreTileConfig.cpp5
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.cpp8
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td27
-rw-r--r--llvm/lib/Target/X86/X86SchedBroadwell.td266
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td64
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td2636
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td9
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td9
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td9
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td25
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td112
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBdVer2.td7
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td5
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td153
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td41
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver2.td41
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver3.td35
-rw-r--r--llvm/lib/Target/X86/X86SelectionDAGInfo.cpp7
-rw-r--r--llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp2
-rw-r--r--llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp8
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp20
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h47
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp7
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp961
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h115
-rw-r--r--llvm/lib/Target/X86/X86VZeroUpper.cpp6
-rw-r--r--llvm/lib/Target/X86/X86WinEHState.cpp2
-rw-r--r--llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp2
-rw-r--r--llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreISelLowering.cpp12
-rw-r--r--llvm/lib/Target/XCore/XCoreInstrInfo.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp7
-rw-r--r--llvm/lib/Target/XCore/XCoreSubtarget.cpp2
-rw-r--r--llvm/lib/Target/XCore/XCoreTargetMachine.cpp4
-rw-r--r--llvm/lib/TextAPI/TextStub.cpp4
-rw-r--r--llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp1
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp22
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h38
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp92
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCleanup.cpp20
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroEarly.cpp3
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp85
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInstr.h2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp61
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp14
-rw-r--r--llvm/lib/Transforms/IPO/AlwaysInliner.cpp12
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp44
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp194
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp683
-rw-r--r--llvm/lib/Transforms/IPO/ConstantMerge.cpp44
-rw-r--r--llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp43
-rw-r--r--llvm/lib/Transforms/IPO/ExtractGV.cpp29
-rw-r--r--llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp440
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp149
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp332
-rw-r--r--llvm/lib/Transforms/IPO/GlobalDCE.cpp14
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp368
-rw-r--r--llvm/lib/Transforms/IPO/GlobalSplit.cpp5
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp977
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp262
-rw-r--r--llvm/lib/Transforms/IPO/Internalize.cpp30
-rw-r--r--llvm/lib/Transforms/IPO/LoopExtractor.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp79
-rw-r--r--llvm/lib/Transforms/IPO/MergeFunctions.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/ModuleInliner.cpp354
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp719
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp7
-rw-r--r--llvm/lib/Transforms/IPO/SCCP.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp164
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp226
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp15
-rw-r--r--llvm/lib/Transforms/IPO/StripSymbols.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp38
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp21
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp88
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp1103
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp538
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp165
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp616
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp48
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp44
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp14
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp26
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp226
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp549
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp76
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp175
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp384
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp187
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp8
-rw-r--r--llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp483
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp10
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp384
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp7
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp136
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp18
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp121
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp19
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp104
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp16
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.cpp35
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.h8
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp15
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp41
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp14
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp13
-rw-r--r--llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h3
-rw-r--r--llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/ADCE.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/BDCE.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp5
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/ConstraintElimination.cpp35
-rw-r--r--llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp93
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp36
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp1063
-rw-r--r--llvm/lib/Transforms/Scalar/DivRemPairs.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp20
-rw-r--r--llvm/lib/Transforms/Scalar/Float2Int.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp234
-rw-r--r--llvm/lib/Transforms/Scalar/GVNHoist.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp86
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp211
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp190
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp64
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp369
-rw-r--r--llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp126
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDeletion.cpp55
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFlatten.cpp270
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp419
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp21
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp42
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp160
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp16
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSink.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp180
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp340
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnswitch.cpp22
-rw-r--r--llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp7
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp26
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp622
-rw-r--r--llvm/lib/Transforms/Scalar/MergeICmps.cpp204
-rw-r--r--llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp9
-rw-r--r--llvm/lib/Transforms/Scalar/NaryReassociate.cpp114
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp11
-rw-r--r--llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp48
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp17
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp206
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp18
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp120
-rw-r--r--llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp19
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp14
-rw-r--r--llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp34
-rw-r--r--llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp29
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp23
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp33
-rw-r--r--llvm/lib/Transforms/Utils/CallPromotionUtils.cpp26
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp126
-rw-r--r--llvm/lib/Transforms/Utils/CodeMoverUtils.cpp71
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp14
-rw-r--r--llvm/lib/Transforms/Utils/Evaluator.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/FixIrreducible.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/FlattenCFG.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/FunctionComparator.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/GlobalStatus.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/InjectTLIMappings.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp202
-rw-r--r--llvm/lib/Transforms/Utils/LCSSA.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp150
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp198
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp24
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp13
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp19
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp440
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp313
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp16
-rw-r--r--llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LowerSwitch.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/PredicateInfo.cpp46
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp30
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp188
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp275
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp144
-rw-r--r--llvm/lib/Transforms/Utils/SplitModule.cpp29
-rw-r--r--llvm/lib/Transforms/Utils/SymbolRewriter.cpp8
-rw-r--r--llvm/lib/Transforms/Utils/VNCoercion.cpp27
-rw-r--r--llvm/lib/Transforms/Utils/ValueMapper.cpp67
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp108
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp8
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h39
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp947
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp2406
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp50
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h76
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp104
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp30
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.h7
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp290
-rw-r--r--llvm/lib/WindowsManifest/WindowsManifestMerger.cpp8
-rw-r--r--llvm/lib/XRay/InstrumentationMap.cpp8
-rw-r--r--llvm/tools/bugpoint/CrashDebugger.cpp24
-rw-r--r--llvm/tools/bugpoint/OptimizerDriver.cpp4
-rw-r--r--llvm/tools/bugpoint/ToolRunner.cpp12
-rw-r--r--llvm/tools/llc/llc.cpp39
-rw-r--r--llvm/tools/lli/ChildTarget/ChildTarget.cpp95
-rw-r--r--llvm/tools/lli/ForwardingMemoryManager.h (renamed from llvm/tools/lli/RemoteJITUtils.h)62
-rw-r--r--llvm/tools/lli/lli.cpp68
-rw-r--r--llvm/tools/llvm-ar/llvm-ar.cpp8
-rw-r--r--llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp11
-rw-r--r--llvm/tools/llvm-cov/CodeCoverage.cpp12
-rw-r--r--llvm/tools/llvm-cov/CoverageExporterLcov.cpp2
-rw-r--r--llvm/tools/llvm-cov/CoverageFilters.cpp2
-rw-r--r--llvm/tools/llvm-cxxdump/Error.cpp1
-rw-r--r--llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp4
-rw-r--r--llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp29
-rw-r--r--llvm/tools/llvm-diff/lib/DiffConsumer.cpp (renamed from llvm/tools/llvm-diff/DiffConsumer.cpp)6
-rw-r--r--llvm/tools/llvm-diff/lib/DiffConsumer.h (renamed from llvm/tools/llvm-diff/DiffConsumer.h)1
-rw-r--r--llvm/tools/llvm-diff/lib/DiffLog.cpp (renamed from llvm/tools/llvm-diff/DiffLog.cpp)0
-rw-r--r--llvm/tools/llvm-diff/lib/DiffLog.h (renamed from llvm/tools/llvm-diff/DiffLog.h)0
-rw-r--r--llvm/tools/llvm-diff/lib/DifferenceEngine.cpp (renamed from llvm/tools/llvm-diff/DifferenceEngine.cpp)0
-rw-r--r--llvm/tools/llvm-diff/lib/DifferenceEngine.h (renamed from llvm/tools/llvm-diff/DifferenceEngine.h)0
-rw-r--r--llvm/tools/llvm-diff/llvm-diff.cpp4
-rw-r--r--llvm/tools/llvm-dwarfdump/Statistics.cpp331
-rw-r--r--llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp23
-rw-r--r--llvm/tools/llvm-dwp/llvm-dwp.cpp2
-rw-r--r--llvm/tools/llvm-lto/llvm-lto.cpp44
-rw-r--r--llvm/tools/llvm-lto2/llvm-lto2.cpp12
-rw-r--r--llvm/tools/llvm-mc/Disassembler.cpp4
-rw-r--r--llvm/tools/llvm-mc/llvm-mc.cpp4
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.cpp2
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.h6
-rw-r--r--llvm/tools/llvm-mca/PipelinePrinter.cpp1
-rw-r--r--llvm/tools/llvm-mca/PipelinePrinter.h2
-rw-r--r--llvm/tools/llvm-mca/Views/DispatchStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionView.h2
-rw-r--r--llvm/tools/llvm-mca/Views/RegisterFileStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/SchedulerStatistics.h2
-rw-r--r--llvm/tools/llvm-mca/Views/SummaryView.h8
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.cpp19
-rw-r--r--llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp33
-rw-r--r--llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h57
-rw-r--r--llvm/tools/llvm-mca/llvm-mca.cpp98
-rw-r--r--llvm/tools/llvm-nm/llvm-nm.cpp115
-rw-r--r--llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp19
-rw-r--r--llvm/tools/llvm-objcopy/COFF/Object.cpp2
-rw-r--r--llvm/tools/llvm-objcopy/COFF/Writer.cpp18
-rw-r--r--llvm/tools/llvm-objcopy/CommonConfig.h22
-rw-r--r--llvm/tools/llvm-objcopy/ConfigManager.cpp124
-rw-r--r--llvm/tools/llvm-objcopy/ELF/ELFConfig.h10
-rw-r--r--llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp133
-rw-r--r--llvm/tools/llvm-objcopy/ELF/Object.cpp164
-rw-r--r--llvm/tools/llvm-objcopy/ELF/Object.h34
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOConfig.h24
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp45
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h47
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp63
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h3
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOReader.cpp38
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOReader.h2
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp195
-rw-r--r--llvm/tools/llvm-objcopy/MachO/MachOWriter.h9
-rw-r--r--llvm/tools/llvm-objcopy/MachO/Object.cpp20
-rw-r--r--llvm/tools/llvm-objcopy/MachO/Object.h18
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOpts.td11
-rw-r--r--llvm/tools/llvm-objdump/COFFDump.cpp205
-rw-r--r--llvm/tools/llvm-objdump/COFFDump.h2
-rw-r--r--llvm/tools/llvm-objdump/ELFDump.cpp2
-rw-r--r--llvm/tools/llvm-objdump/MachODump.cpp10
-rw-r--r--llvm/tools/llvm-objdump/ObjdumpOpts.td58
-rw-r--r--llvm/tools/llvm-objdump/XCOFFDump.cpp18
-rw-r--r--llvm/tools/llvm-objdump/XCOFFDump.h4
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.cpp142
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.h16
-rw-r--r--llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp2
-rw-r--r--llvm/tools/llvm-pdbutil/LinePrinter.cpp30
-rw-r--r--llvm/tools/llvm-pdbutil/LinePrinter.h10
-rw-r--r--llvm/tools/llvm-profdata/llvm-profdata.cpp157
-rw-r--r--llvm/tools/llvm-readobj/ARMEHABIPrinter.h2
-rw-r--r--llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp42
-rw-r--r--llvm/tools/llvm-readobj/ARMWinEHPrinter.h3
-rw-r--r--llvm/tools/llvm-readobj/COFFDumper.cpp29
-rw-r--r--llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h3
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp241
-rw-r--r--llvm/tools/llvm-readobj/MachODumper.cpp30
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.cpp18
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.h5
-rw-r--r--llvm/tools/llvm-readobj/Opts.td20
-rw-r--r--llvm/tools/llvm-readobj/WasmDumper.cpp6
-rw-r--r--llvm/tools/llvm-readobj/Win64EHDumper.cpp47
-rw-r--r--llvm/tools/llvm-readobj/XCOFFDumper.cpp309
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.cpp19
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.h2
-rw-r--r--llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp74
-rw-r--r--llvm/tools/llvm-stress/llvm-stress.cpp6
-rw-r--r--llvm/tools/llvm-strings/llvm-strings.cpp4
-rw-r--r--llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp2
-rw-r--r--llvm/tools/llvm-tli-checker/Opts.td16
-rw-r--r--llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp357
-rw-r--r--llvm/tools/llvm-xray/xray-color-helper.cpp8
-rw-r--r--llvm/tools/llvm-xray/xray-converter.cpp14
-rw-r--r--llvm/tools/llvm-xray/xray-extract.cpp13
-rw-r--r--llvm/tools/opt/NewPMDriver.cpp64
-rw-r--r--llvm/tools/opt/opt.cpp79
-rw-r--r--llvm/utils/TableGen/AsmMatcherEmitter.cpp16
-rw-r--r--llvm/utils/TableGen/AsmWriterEmitter.cpp5
-rw-r--r--llvm/utils/TableGen/CodeEmitterGen.cpp4
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.cpp31
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.h7
-rw-r--r--llvm/utils/TableGen/CodeGenMapTable.cpp15
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.cpp23
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.h1
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.cpp1
-rw-r--r--llvm/utils/TableGen/CompressInstEmitter.cpp (renamed from llvm/utils/TableGen/RISCVCompressInstEmitter.cpp)172
-rw-r--r--llvm/utils/TableGen/GlobalISelEmitter.cpp10
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/PredicateExpander.cpp2
-rw-r--r--llvm/utils/TableGen/RegisterInfoEmitter.cpp1
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.cpp2
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.h4
-rw-r--r--llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp4
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp6
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.cpp27
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.h3
2208 files changed, 127227 insertions, 62449 deletions
diff --git a/llvm/include/llvm-c/Comdat.h b/llvm/include/llvm-c/Comdat.h
index 81cde1107fa4..8002bc0581af 100644
--- a/llvm/include/llvm-c/Comdat.h
+++ b/llvm/include/llvm-c/Comdat.h
@@ -19,6 +19,13 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @defgroup LLVMCCoreComdat Comdats
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
typedef enum {
LLVMAnyComdatSelectionKind, ///< The linker may choose any COMDAT.
LLVMExactMatchComdatSelectionKind, ///< The data referenced by the COMDAT must
@@ -66,6 +73,10 @@ LLVMComdatSelectionKind LLVMGetComdatSelectionKind(LLVMComdatRef C);
*/
void LLVMSetComdatSelectionKind(LLVMComdatRef C, LLVMComdatSelectionKind Kind);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 1a5e763cfc60..d170eff17951 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -1580,10 +1580,10 @@ LLVMTypeRef LLVMX86AMXType(void);
macro(ConstantVector) \
macro(GlobalValue) \
macro(GlobalAlias) \
- macro(GlobalIFunc) \
macro(GlobalObject) \
macro(Function) \
macro(GlobalVariable) \
+ macro(GlobalIFunc) \
macro(UndefValue) \
macro(PoisonValue) \
macro(Instruction) \
@@ -3287,7 +3287,7 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC);
*/
unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr);
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, LLVMAttributeIndex Idx,
unsigned Align);
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
@@ -3611,11 +3611,21 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, LLVMMetadataRef Loc);
* current debug location for the given builder. If the builder has no current
* debug location, this function is a no-op.
*
+ * @deprecated LLVMSetInstDebugLocation is deprecated in favor of the more general
+ * LLVMAddMetadataToInst.
+ *
* @see llvm::IRBuilder::SetInstDebugLocation()
*/
void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
/**
+ * Adds the metadata registered with the given builder to the given instruction.
+ *
+ * @see llvm::IRBuilder::AddMetadataToInst()
+ */
+void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst);
+
+/**
* Get the dafult floating-point math metadata for a given builder.
*
* @see llvm::IRBuilder::getDefaultFPMathTag()
@@ -4081,6 +4091,7 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
/**
* @defgroup LLVMCCorePassRegistry Pass Registry
+ * @ingroup LLVMCCore
*
* @{
*/
@@ -4095,6 +4106,7 @@ LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
/**
* @defgroup LLVMCCorePassManagers Pass Managers
+ * @ingroup LLVMCCore
*
* @{
*/
diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h
index 8c085807914b..d7fb898b60d2 100644
--- a/llvm/include/llvm-c/DebugInfo.h
+++ b/llvm/include/llvm-c/DebugInfo.h
@@ -22,6 +22,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCCoreDebugInfo Debug Information
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
+/**
* Debug info flags.
*/
typedef enum {
@@ -227,6 +234,13 @@ void LLVMDisposeDIBuilder(LLVMDIBuilderRef Builder);
void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder);
/**
+ * Finalize a specific subprogram.
+ * No new variables may be added to this subprogram afterwards.
+ */
+void LLVMDIBuilderFinalizeSubprogram(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef Subprogram);
+
+/**
* A CompileUnit provides an anchor for all debugging
* information generated during this instance of compilation.
* \param Lang Source programming language, eg.
@@ -389,48 +403,48 @@ LLVMDIBuilderCreateImportedModuleFromNamespace(LLVMDIBuilderRef Builder,
* \param ImportedEntity Previous imported entity to alias.
* \param File File where the declaration is located.
* \param Line Line number of the declaration.
+ * \param Elements Renamed elements.
+ * \param NumElements Number of renamed elements.
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromAlias(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef ImportedEntity,
- LLVMMetadataRef File,
- unsigned Line);
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromAlias(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope,
+ LLVMMetadataRef ImportedEntity, LLVMMetadataRef File, unsigned Line,
+ LLVMMetadataRef *Elements, unsigned NumElements);
/**
* Create a descriptor for an imported module.
- * \param Builder The \c DIBuilder.
- * \param Scope The scope this module is imported into
- * \param M The module being imported here
- * \param File File where the declaration is located.
- * \param Line Line number of the declaration.
+ * \param Builder The \c DIBuilder.
+ * \param Scope The scope this module is imported into
+ * \param M The module being imported here
+ * \param File File where the declaration is located.
+ * \param Line Line number of the declaration.
+ * \param Elements Renamed elements.
+ * \param NumElements Number of renamed elements.
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromModule(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef M,
- LLVMMetadataRef File,
- unsigned Line);
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromModule(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef M,
+ LLVMMetadataRef File, unsigned Line, LLVMMetadataRef *Elements,
+ unsigned NumElements);
/**
* Create a descriptor for an imported function, type, or variable. Suitable
* for e.g. FORTRAN-style USE declarations.
- * \param Builder The DIBuilder.
- * \param Scope The scope this module is imported into.
- * \param Decl The declaration (or definition) of a function, type,
- or variable.
- * \param File File where the declaration is located.
- * \param Line Line number of the declaration.
- * \param Name A name that uniquely identifies this imported declaration.
- * \param NameLen The length of the C string passed to \c Name.
+ * \param Builder The DIBuilder.
+ * \param Scope The scope this module is imported into.
+ * \param Decl The declaration (or definition) of a function, type,
+ or variable.
+ * \param File File where the declaration is located.
+ * \param Line Line number of the declaration.
+ * \param Name A name that uniquely identifies this imported
+ declaration.
+ * \param NameLen The length of the C string passed to \c Name.
+ * \param Elements Renamed elements.
+ * \param NumElements Number of renamed elements.
*/
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedDeclaration(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef Decl,
- LLVMMetadataRef File,
- unsigned Line,
- const char *Name, size_t NameLen);
+LLVMMetadataRef LLVMDIBuilderCreateImportedDeclaration(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef Decl,
+ LLVMMetadataRef File, unsigned Line, const char *Name, size_t NameLen,
+ LLVMMetadataRef *Elements, unsigned NumElements);
/**
* Creates a new DebugLocation that describes a source location.
@@ -1360,6 +1374,10 @@ void LLVMInstructionSetDebugLoc(LLVMValueRef Inst, LLVMMetadataRef Loc);
*/
LLVMMetadataKind LLVMGetMetadataKind(LLVMMetadataRef Metadata);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h
index ae5c68227594..53baaef11033 100644
--- a/llvm/include/llvm-c/DisassemblerTypes.h
+++ b/llvm/include/llvm-c/DisassemblerTypes.h
@@ -18,6 +18,12 @@
#endif
/**
+ * @addtogroup LLVMCDisassembler
+ *
+ * @{
+ */
+
+/**
* An opaque reference to a disassembler context.
*/
typedef void *LLVMDisasmContextRef;
@@ -157,4 +163,8 @@ typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo,
/* The output reference is to a C++ symbol name. */
#define LLVMDisassembler_ReferenceType_DeMangled_Name 9
+/**
+ * @}
+ */
+
#endif
diff --git a/llvm/include/llvm-c/Error.h b/llvm/include/llvm-c/Error.h
index bc702ac7a1bf..c3baaf65186a 100644
--- a/llvm/include/llvm-c/Error.h
+++ b/llvm/include/llvm-c/Error.h
@@ -18,6 +18,13 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @defgroup LLVMCError Error Handling
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
#define LLVMErrorSuccess 0
/**
@@ -67,6 +74,10 @@ LLVMErrorTypeId LLVMGetStringErrorTypeId(void);
*/
LLVMErrorRef LLVMCreateStringError(const char *ErrMsg);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/ErrorHandling.h b/llvm/include/llvm-c/ErrorHandling.h
index 5ba099c209c0..d9b9f22752b8 100644
--- a/llvm/include/llvm-c/ErrorHandling.h
+++ b/llvm/include/llvm-c/ErrorHandling.h
@@ -18,6 +18,12 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @addtogroup LLVMCError
+ *
+ * @{
+ */
+
typedef void (*LLVMFatalErrorHandler)(const char *Reason);
/**
@@ -42,6 +48,10 @@ void LLVMResetFatalErrorHandler(void);
*/
void LLVMEnablePrettyStackTrace(void);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/IRReader.h b/llvm/include/llvm-c/IRReader.h
index 5a3f633c3d91..905b84fa5a86 100644
--- a/llvm/include/llvm-c/IRReader.h
+++ b/llvm/include/llvm-c/IRReader.h
@@ -20,6 +20,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCCoreIRReader IR Reader
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
+/**
* Read LLVM IR from a memory buffer and convert it into an in-memory Module
* object. Returns 0 on success.
* Optionally returns a human-readable description of any errors that
@@ -32,6 +39,10 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
char **OutMessage);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/LLJIT.h b/llvm/include/llvm-c/LLJIT.h
index f689ca0f1cf0..a06133aac4fb 100644
--- a/llvm/include/llvm-c/LLJIT.h
+++ b/llvm/include/llvm-c/LLJIT.h
@@ -32,6 +32,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCExecutionEngineLLJIT LLJIT
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
+/**
* A function for constructing an ObjectLinkingLayer instance to be used
* by an LLJIT instance.
*
@@ -235,6 +242,10 @@ LLVMOrcIRTransformLayerRef LLVMOrcLLJITGetIRTransformLayer(LLVMOrcLLJITRef J);
*/
const char *LLVMOrcLLJITGetDataLayoutStr(LLVMOrcLLJITRef J);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif /* LLVM_C_LLJIT_H */
diff --git a/llvm/include/llvm-c/Linker.h b/llvm/include/llvm-c/Linker.h
index 1ad9cc958753..acff5d5e2225 100644
--- a/llvm/include/llvm-c/Linker.h
+++ b/llvm/include/llvm-c/Linker.h
@@ -19,6 +19,13 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @defgroup LLVMCCoreLinker Linker
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
/* This enum is provided for backwards-compatibility only. It has no effect. */
typedef enum {
LLVMLinkerDestroySource = 0, /* This is the default behavior. */
@@ -35,4 +42,8 @@ LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src);
LLVM_C_EXTERN_C_END
+/**
+ * @}
+ */
+
#endif
diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index 1790afbcecc7..e2f30b7cdf45 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -34,6 +34,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCExecutionEngineORC On-Request-Compilation
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
+/**
* Represents an address in the executor process.
*/
typedef uint64_t LLVMOrcJITTargetAddress;
@@ -921,6 +928,49 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
LLVMOrcSymbolPredicate Filter, void *FilterCtx);
/**
+ * Get a LLVMOrcCreateDynamicLibararySearchGeneratorForPath that will reflect
+ * library symbols into the JITDylib. On success the resulting generator is
+ * owned by the client. Ownership is typically transferred by adding the
+ * instance to a JITDylib using LLVMOrcJITDylibAddGenerator,
+ *
+ * The GlobalPrefix argument specifies the character that appears on the front
+ * of linker-mangled symbols for the target platform (e.g. '_' on MachO).
+ * If non-null, this character will be stripped from the start of all symbol
+ * strings before passing the remaining substring to dlsym.
+ *
+ * The optional Filter and Ctx arguments can be used to supply a symbol name
+ * filter: Only symbols for which the filter returns true will be visible to
+ * JIT'd code. If the Filter argument is null then all library symbols will
+ * be visible to JIT'd code. Note that the symbol name passed to the Filter
+ * function is the full mangled symbol: The client is responsible for stripping
+ * the global prefix if present.
+ *
+ * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE!
+ *
+ */
+LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, const char *FileName,
+ char GlobalPrefix, LLVMOrcSymbolPredicate Filter, void *FilterCtx);
+
+/**
+ * Get a LLVMOrcCreateStaticLibrarySearchGeneratorForPath that will reflect
+ * static library symbols into the JITDylib. On success the resulting
+ * generator is owned by the client. Ownership is typically transferred by
+ * adding the instance to a JITDylib using LLVMOrcJITDylibAddGenerator,
+ *
+ * Call with the optional TargetTriple argument will succeed if the file at
+ * the given path is a static library or a MachO universal binary containing a
+ * static library that is compatible with the given triple. Otherwise it will
+ * return an error.
+ *
+ * THIS API IS EXPERIMENTAL AND LIKELY TO CHANGE IN THE NEAR FUTURE!
+ *
+ */
+LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, LLVMOrcObjectLayerRef ObjLayer,
+ const char *FileName, const char *TargetTriple);
+
+/**
* Create a ThreadSafeContext containing a new LLVMContext.
*
* Ownership of the underlying ThreadSafeContext data is shared: Clients
@@ -1133,6 +1183,10 @@ void LLVMOrcDisposeDumpObjects(LLVMOrcDumpObjectsRef DumpObjects);
LLVMErrorRef LLVMOrcDumpObjects_CallOperator(LLVMOrcDumpObjectsRef DumpObjects,
LLVMMemoryBufferRef *ObjBuffer);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif /* LLVM_C_ORC_H */
diff --git a/llvm/include/llvm-c/OrcEE.h b/llvm/include/llvm-c/OrcEE.h
index 2435e7421a42..e7ae0f5e6be2 100644
--- a/llvm/include/llvm-c/OrcEE.h
+++ b/llvm/include/llvm-c/OrcEE.h
@@ -33,6 +33,13 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @defgroup LLVMCExecutionEngineORCEE ExecutionEngine-based ORC Utils
+ * @ingroup LLVMCExecutionEngine
+ *
+ * @{
+ */
+
+/**
* Create a RTDyldObjectLinkingLayer instance using the standard
* SectionMemoryManager for memory management.
*/
@@ -50,6 +57,10 @@ void LLVMOrcRTDyldObjectLinkingLayerRegisterJITEventListener(
LLVMOrcObjectLayerRef RTDyldObjLinkingLayer,
LLVMJITEventListenerRef Listener);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif /* LLVM_C_ORCEE_H */
diff --git a/llvm/include/llvm-c/Support.h b/llvm/include/llvm-c/Support.h
index 866df32efa98..17657861b32b 100644
--- a/llvm/include/llvm-c/Support.h
+++ b/llvm/include/llvm-c/Support.h
@@ -21,6 +21,12 @@
LLVM_C_EXTERN_C_BEGIN
/**
+ * @addtogroup LLVMCCore
+ *
+ * @{
+ */
+
+/**
* This function permanently loads the dynamic library at the given path.
* It is safe to call this function multiple times for the same library.
*
@@ -57,6 +63,10 @@ void *LLVMSearchForAddressOfSymbol(const char *symbolName);
*/
void LLVMAddSymbol(const char *symbolName, void *symbolValue);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/TargetMachine.h b/llvm/include/llvm-c/TargetMachine.h
index f82edd948b59..23c8c63ff0b4 100644
--- a/llvm/include/llvm-c/TargetMachine.h
+++ b/llvm/include/llvm-c/TargetMachine.h
@@ -25,6 +25,12 @@
LLVM_C_EXTERN_C_BEGIN
+/**
+ * @addtogroup LLVMCTarget
+ *
+ * @{
+ */
+
typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef;
typedef struct LLVMTarget *LLVMTargetRef;
@@ -156,6 +162,10 @@ char* LLVMGetHostCPUFeatures(void);
/** Adds the target-specific analysis passes to the pass manager. */
void LLVMAddAnalysisPasses(LLVMTargetMachineRef T, LLVMPassManagerRef PM);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif
diff --git a/llvm/include/llvm-c/Transforms/PassBuilder.h b/llvm/include/llvm-c/Transforms/PassBuilder.h
index 5635f10d6877..6d9f1b45c707 100644
--- a/llvm/include/llvm-c/Transforms/PassBuilder.h
+++ b/llvm/include/llvm-c/Transforms/PassBuilder.h
@@ -18,6 +18,13 @@
#include "llvm-c/TargetMachine.h"
#include "llvm-c/Types.h"
+/**
+ * @defgroup LLVMCCoreNewPM New Pass Manager
+ * @ingroup LLVMCCore
+ *
+ * @{
+ */
+
LLVM_C_EXTERN_C_BEGIN
/**
@@ -50,7 +57,7 @@ LLVMErrorRef LLVMRunPasses(LLVMModuleRef M, const char *Passes,
* responsible for it. The client should call LLVMDisposePassBuilderOptions
* to free the pass builder options.
*/
-LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions();
+LLVMPassBuilderOptionsRef LLVMCreatePassBuilderOptions(void);
/**
* Toggle adding the VerifierPass for the PassBuilder, ensuring all functions
@@ -97,6 +104,10 @@ void LLVMPassBuilderOptionsSetMergeFunctions(LLVMPassBuilderOptionsRef Options,
*/
void LLVMDisposePassBuilderOptions(LLVMPassBuilderOptionsRef Options);
+/**
+ * @}
+ */
+
LLVM_C_EXTERN_C_END
#endif // LLVM_C_TRANSFORMS_PASSBUILDER_H
diff --git a/llvm/include/llvm-c/lto.h b/llvm/include/llvm-c/lto.h
index f6fc8588f5f7..5ceb02224d2b 100644
--- a/llvm/include/llvm-c/lto.h
+++ b/llvm/include/llvm-c/lto.h
@@ -46,7 +46,7 @@ typedef bool lto_bool_t;
* @{
*/
-#define LTO_API_VERSION 28
+#define LTO_API_VERSION 29
/**
* \since prior to LTO_API_VERSION=3
@@ -313,6 +313,16 @@ extern lto_bool_t lto_module_get_macho_cputype(lto_module_t mod,
unsigned int *out_cpusubtype);
/**
+ * This function can be used by the linker to check if a given module has
+ * any constructor or destructor functions.
+ *
+ * Returns true if the module has either the @llvm.global_ctors or the
+ * @llvm.global_dtors symbol. Otherwise returns false.
+ *
+ * \since LTO_API_VERSION=29
+ */
+extern lto_bool_t lto_module_has_ctor_dtor(lto_module_t mod);
+/**
* Diagnostic severity.
*
* \since LTO_API_VERSION=7
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index f493a03b4b87..40e0e32c77a8 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -961,9 +961,7 @@ public:
/// Returns a float which is bitcasted from an all one value int.
///
/// \param Semantics - type float semantics
- /// \param BitWidth - Select float type
- static APFloat getAllOnesValue(const fltSemantics &Semantics,
- unsigned BitWidth);
+ static APFloat getAllOnesValue(const fltSemantics &Semantics);
/// Used to insert APFloat objects, or objects that contain APFloat objects,
/// into FoldingSets.
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index ff586f763e82..595cd94b6b8f 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -31,7 +31,7 @@ class raw_ostream;
template <typename T> class SmallVectorImpl;
template <typename T> class ArrayRef;
template <typename T> class Optional;
-template <typename T> struct DenseMapInfo;
+template <typename T, typename Enable> struct DenseMapInfo;
class APInt;
@@ -66,6 +66,11 @@ inline APInt operator-(APInt);
/// not.
/// * In general, the class tries to follow the style of computation that LLVM
/// uses in its IR. This simplifies its use for LLVM.
+/// * APInt supports zero-bit-width values, but operations that require bits
+/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit
+/// integer). This means that operations like zero extension and logical
+/// shifts are defined, but sign extension and ashr is not. Zero bit values
+/// compare and hash equal to themselves, and countLeadingZeros returns 0.
///
class LLVM_NODISCARD APInt {
public:
@@ -87,176 +92,6 @@ public:
static constexpr WordType WORDTYPE_MAX = ~WordType(0);
-private:
- /// This union is used to store the integer value. When the
- /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
- union {
- uint64_t VAL; ///< Used to store the <= 64 bits integer value.
- uint64_t *pVal; ///< Used to store the >64 bits integer value.
- } U;
-
- unsigned BitWidth; ///< The number of bits in this APInt.
-
- friend struct DenseMapInfo<APInt>;
-
- friend class APSInt;
-
- /// Fast internal constructor
- ///
- /// This constructor is used only internally for speed of construction of
- /// temporaries. It is unsafe for general use so it is not public.
- APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
- U.pVal = val;
- }
-
- /// Determine which word a bit is in.
- ///
- /// \returns the word position for the specified bit position.
- static unsigned whichWord(unsigned bitPosition) {
- return bitPosition / APINT_BITS_PER_WORD;
- }
-
- /// Determine which bit in a word a bit is in.
- ///
- /// \returns the bit position in a word for the specified bit position
- /// in the APInt.
- static unsigned whichBit(unsigned bitPosition) {
- return bitPosition % APINT_BITS_PER_WORD;
- }
-
- /// Get a single bit mask.
- ///
- /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
- /// This method generates and returns a uint64_t (word) mask for a single
- /// bit at a specific bit position. This is used to mask the bit in the
- /// corresponding word.
- static uint64_t maskBit(unsigned bitPosition) {
- return 1ULL << whichBit(bitPosition);
- }
-
- /// Clear unused high order bits
- ///
- /// This method is used internally to clear the top "N" bits in the high order
- /// word that are not used by the APInt. This is needed after the most
- /// significant word is assigned a value to ensure that those bits are
- /// zero'd out.
- APInt &clearUnusedBits() {
- // Compute how many bits are used in the final word
- unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
-
- // Mask out the high bits.
- uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
- if (isSingleWord())
- U.VAL &= mask;
- else
- U.pVal[getNumWords() - 1] &= mask;
- return *this;
- }
-
- /// Get the word corresponding to a bit position
- /// \returns the corresponding word for the specified bit position.
- uint64_t getWord(unsigned bitPosition) const {
- return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
- }
-
- /// Utility method to change the bit width of this APInt to new bit width,
- /// allocating and/or deallocating as necessary. There is no guarantee on the
- /// value of any bits upon return. Caller should populate the bits after.
- void reallocate(unsigned NewBitWidth);
-
- /// Convert a char array into an APInt
- ///
- /// \param radix 2, 8, 10, 16, or 36
- /// Converts a string into a number. The string must be non-empty
- /// and well-formed as a number of the given base. The bit-width
- /// must be sufficient to hold the result.
- ///
- /// This is used by the constructors that take string arguments.
- ///
- /// StringRef::getAsInteger is superficially similar but (1) does
- /// not assume that the string is well-formed and (2) grows the
- /// result to hold the input.
- void fromString(unsigned numBits, StringRef str, uint8_t radix);
-
- /// An internal division function for dividing APInts.
- ///
- /// This is used by the toString method to divide by the radix. It simply
- /// provides a more convenient form of divide for internal use since KnuthDiv
- /// has specific constraints on its inputs. If those constraints are not met
- /// then it provides a simpler form of divide.
- static void divide(const WordType *LHS, unsigned lhsWords,
- const WordType *RHS, unsigned rhsWords, WordType *Quotient,
- WordType *Remainder);
-
- /// out-of-line slow case for inline constructor
- void initSlowCase(uint64_t val, bool isSigned);
-
- /// shared code between two array constructors
- void initFromArray(ArrayRef<uint64_t> array);
-
- /// out-of-line slow case for inline copy constructor
- void initSlowCase(const APInt &that);
-
- /// out-of-line slow case for shl
- void shlSlowCase(unsigned ShiftAmt);
-
- /// out-of-line slow case for lshr.
- void lshrSlowCase(unsigned ShiftAmt);
-
- /// out-of-line slow case for ashr.
- void ashrSlowCase(unsigned ShiftAmt);
-
- /// out-of-line slow case for operator=
- void AssignSlowCase(const APInt &RHS);
-
- /// out-of-line slow case for operator==
- bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY;
-
- /// out-of-line slow case for countLeadingZeros
- unsigned countLeadingZerosSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countLeadingOnes.
- unsigned countLeadingOnesSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countTrailingZeros.
- unsigned countTrailingZerosSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countTrailingOnes
- unsigned countTrailingOnesSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for countPopulation
- unsigned countPopulationSlowCase() const LLVM_READONLY;
-
- /// out-of-line slow case for intersects.
- bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
-
- /// out-of-line slow case for isSubsetOf.
- bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
-
- /// out-of-line slow case for setBits.
- void setBitsSlowCase(unsigned loBit, unsigned hiBit);
-
- /// out-of-line slow case for flipAllBits.
- void flipAllBitsSlowCase();
-
- /// out-of-line slow case for operator&=.
- void AndAssignSlowCase(const APInt& RHS);
-
- /// out-of-line slow case for operator|=.
- void OrAssignSlowCase(const APInt& RHS);
-
- /// out-of-line slow case for operator^=.
- void XorAssignSlowCase(const APInt& RHS);
-
- /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
- /// to, or greater than RHS.
- int compare(const APInt &RHS) const LLVM_READONLY;
-
- /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
- /// to, or greater than RHS.
- int compareSigned(const APInt &RHS) const LLVM_READONLY;
-
-public:
/// \name Constructors
/// @{
@@ -272,7 +107,6 @@ public:
/// \param isSigned how to treat signedness of val
APInt(unsigned numBits, uint64_t val, bool isSigned = false)
: BitWidth(numBits) {
- assert(BitWidth && "bitwidth too small");
if (isSingleWord()) {
U.VAL = val;
clearUnusedBits();
@@ -312,7 +146,9 @@ public:
/// \param radix the radix to use for the conversion
APInt(unsigned numBits, StringRef str, uint8_t radix);
- /// Simply makes *this a copy of that.
+ /// Default constructor that creates an APInt with a 1-bit zero value.
+ explicit APInt() : BitWidth(1) { U.VAL = 0; }
+
/// Copy Constructor.
APInt(const APInt &that) : BitWidth(that.BitWidth) {
if (isSingleWord())
@@ -333,19 +169,131 @@ public:
delete[] U.pVal;
}
- /// Default constructor that creates an uninteresting APInt
- /// representing a 1-bit zero value.
+ /// @}
+ /// \name Value Generators
+ /// @{
+
+ /// Get the '0' value for the specified bit-width.
+ static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
+
+ /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
+ static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
+
+ /// Return an APInt zero bits wide.
+ static APInt getZeroWidth() { return getZero(0); }
+
+ /// Gets maximum unsigned value of APInt for specific bit width.
+ static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
+
+ /// Gets maximum signed value of APInt for a specific bit width.
+ static APInt getSignedMaxValue(unsigned numBits) {
+ APInt API = getAllOnes(numBits);
+ API.clearBit(numBits - 1);
+ return API;
+ }
+
+ /// Gets minimum unsigned value of APInt for a specific bit width.
+ static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
+
+ /// Gets minimum signed value of APInt for a specific bit width.
+ static APInt getSignedMinValue(unsigned numBits) {
+ APInt API(numBits, 0);
+ API.setBit(numBits - 1);
+ return API;
+ }
+
+ /// Get the SignMask for a specific bit width.
///
- /// This is useful for object deserialization (pair this with the static
- /// method Read).
- explicit APInt() : BitWidth(1) { U.VAL = 0; }
+ /// This is just a wrapper function of getSignedMinValue(), and it helps code
+ /// readability when we want to get a SignMask.
+ static APInt getSignMask(unsigned BitWidth) {
+ return getSignedMinValue(BitWidth);
+ }
- /// Returns whether this instance allocated memory.
- bool needsCleanup() const { return !isSingleWord(); }
+ /// Return an APInt of a specified width with all bits set.
+ static APInt getAllOnes(unsigned numBits) {
+ return APInt(numBits, WORDTYPE_MAX, true);
+ }
- /// Used to insert APInt objects, or objects that contain APInt objects, into
- /// FoldingSets.
- void Profile(FoldingSetNodeID &id) const;
+ /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
+ static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
+
+ /// Return an APInt with exactly one bit set in the result.
+ static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
+ APInt Res(numBits, 0);
+ Res.setBit(BitNo);
+ return Res;
+ }
+
+ /// Get a value with a block of bits set.
+ ///
+ /// Constructs an APInt value that has a contiguous range of bits set. The
+ /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
+ /// bits will be zero. For example, with parameters(32, 0, 16) you would get
+ /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
+ /// \p hiBit.
+ ///
+ /// \param numBits the intended bit width of the result
+ /// \param loBit the index of the lowest bit set.
+ /// \param hiBit the index of the highest bit set.
+ ///
+ /// \returns An APInt value with the requested bits set.
+ static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
+ APInt Res(numBits, 0);
+ Res.setBits(loBit, hiBit);
+ return Res;
+ }
+
+ /// Wrap version of getBitsSet.
+ /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
+ /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
+ /// with parameters (32, 28, 4), you would get 0xF000000F.
+ /// If \p hiBit is equal to \p loBit, you would get a result with all bits
+ /// set.
+ static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
+ unsigned hiBit) {
+ APInt Res(numBits, 0);
+ Res.setBitsWithWrap(loBit, hiBit);
+ return Res;
+ }
+
+ /// Constructs an APInt value that has a contiguous range of bits set. The
+ /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
+ /// bits will be zero. For example, with parameters(32, 12) you would get
+ /// 0xFFFFF000.
+ ///
+ /// \param numBits the intended bit width of the result
+ /// \param loBit the index of the lowest bit to set.
+ ///
+ /// \returns An APInt value with the requested bits set.
+ static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
+ APInt Res(numBits, 0);
+ Res.setBitsFrom(loBit);
+ return Res;
+ }
+
+ /// Constructs an APInt value that has the top hiBitsSet bits set.
+ ///
+ /// \param numBits the bitwidth of the result
+ /// \param hiBitsSet the number of high-order bits set in the result.
+ static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
+ APInt Res(numBits, 0);
+ Res.setHighBits(hiBitsSet);
+ return Res;
+ }
+
+ /// Constructs an APInt value that has the bottom loBitsSet bits set.
+ ///
+ /// \param numBits the bitwidth of the result
+ /// \param loBitsSet the number of low-order bits set in the result.
+ static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
+ APInt Res(numBits, 0);
+ Res.setLowBits(loBitsSet);
+ return Res;
+ }
+
+ /// Return a value containing V broadcasted over NewLen bits.
+ static APInt getSplat(unsigned NewLen, const APInt &V);
/// @}
/// \name Value Tests
@@ -373,7 +321,7 @@ public:
/// This tests the high bit of this APInt to determine if it is set.
///
/// \returns true if this APInt has its sign bit set, false otherwise.
- bool isSignBitSet() const { return (*this)[BitWidth-1]; }
+ bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
/// Determine if sign bit of this APInt is clear.
///
@@ -388,50 +336,62 @@ public:
/// that 0 is not a positive value.
///
/// \returns true if this APInt is positive.
- bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+ bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
/// Determine if this APInt Value is non-positive (<= 0).
///
/// \returns true if this APInt is non-positive.
bool isNonPositive() const { return !isStrictlyPositive(); }
- /// Determine if all bits are set
- ///
- /// This checks to see if the value has all bits of the APInt are set or not.
- bool isAllOnesValue() const {
+ /// Determine if all bits are set. This is true for zero-width values.
+ bool isAllOnes() const {
+ if (BitWidth == 0)
+ return true;
if (isSingleWord())
return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
return countTrailingOnesSlowCase() == BitWidth;
}
- /// Determine if all bits are clear
- ///
- /// This checks to see if the value has all bits of the APInt are clear or
- /// not.
- bool isNullValue() const { return !*this; }
+ /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
+ bool isAllOnesValue() const { return isAllOnes(); }
+
+ /// Determine if this value is zero, i.e. all bits are clear.
+ bool isZero() const {
+ if (isSingleWord())
+ return U.VAL == 0;
+ return countLeadingZerosSlowCase() == BitWidth;
+ }
+
+ /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
+ bool isNullValue() const { return isZero(); }
/// Determine if this is a value of 1.
///
/// This checks to see if the value of this APInt is one.
- bool isOneValue() const {
+ bool isOne() const {
if (isSingleWord())
return U.VAL == 1;
return countLeadingZerosSlowCase() == BitWidth - 1;
}
+ /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
+ bool isOneValue() const { return isOne(); }
+
/// Determine if this is the largest unsigned value.
///
/// This checks to see if the value of this APInt is the maximum unsigned
/// value for the APInt's bit width.
- bool isMaxValue() const { return isAllOnesValue(); }
+ bool isMaxValue() const { return isAllOnes(); }
/// Determine if this is the largest signed value.
///
/// This checks to see if the value of this APInt is the maximum signed
/// value for the APInt's bit width.
bool isMaxSignedValue() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
+ }
return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
}
@@ -439,39 +399,48 @@ public:
///
/// This checks to see if the value of this APInt is the minimum unsigned
/// value for the APInt's bit width.
- bool isMinValue() const { return isNullValue(); }
+ bool isMinValue() const { return isZero(); }
/// Determine if this is the smallest signed value.
///
/// This checks to see if the value of this APInt is the minimum signed
/// value for the APInt's bit width.
bool isMinSignedValue() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return U.VAL == (WordType(1) << (BitWidth - 1));
+ }
return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
}
/// Check if this APInt has an N-bits unsigned integer value.
- bool isIntN(unsigned N) const {
- assert(N && "N == 0 ???");
- return getActiveBits() <= N;
- }
+ bool isIntN(unsigned N) const { return getActiveBits() <= N; }
/// Check if this APInt has an N-bits signed integer value.
- bool isSignedIntN(unsigned N) const {
- assert(N && "N == 0 ???");
- return getMinSignedBits() <= N;
- }
+ bool isSignedIntN(unsigned N) const { return getMinSignedBits() <= N; }
/// Check if this APInt's value is a power of two greater than zero.
///
/// \returns true if the argument APInt value is a power of two > 0.
bool isPowerOf2() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return isPowerOf2_64(U.VAL);
+ }
return countPopulationSlowCase() == 1;
}
+ /// Check if this APInt's negated value is a power of two greater than zero.
+ bool isNegatedPowerOf2() const {
+ assert(BitWidth && "zero width values not allowed");
+ if (isNonNegative())
+ return false;
+ // NegatedPowerOf2 - shifted mask in the top bits.
+ unsigned LO = countLeadingOnes();
+ unsigned TZ = countTrailingZeros();
+ return (LO + TZ) == BitWidth;
+ }
+
/// Check if the APInt's value is returned by getSignMask.
///
/// \returns true if this is the value returned by getSignMask.
@@ -480,7 +449,7 @@ public:
/// Convert APInt to a boolean value.
///
/// This converts the APInt to a boolean value as a test against zero.
- bool getBoolValue() const { return !!*this; }
+ bool getBoolValue() const { return !isZero(); }
/// If this value is smaller than the specified limit, return it, otherwise
/// return the limit value. This causes the value to saturate to the limit.
@@ -527,152 +496,22 @@ public:
return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
}
- /// @}
- /// \name Value Generators
- /// @{
-
- /// Gets maximum unsigned value of APInt for specific bit width.
- static APInt getMaxValue(unsigned numBits) {
- return getAllOnesValue(numBits);
- }
-
- /// Gets maximum signed value of APInt for a specific bit width.
- static APInt getSignedMaxValue(unsigned numBits) {
- APInt API = getAllOnesValue(numBits);
- API.clearBit(numBits - 1);
- return API;
- }
-
- /// Gets minimum unsigned value of APInt for a specific bit width.
- static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
-
- /// Gets minimum signed value of APInt for a specific bit width.
- static APInt getSignedMinValue(unsigned numBits) {
- APInt API(numBits, 0);
- API.setBit(numBits - 1);
- return API;
- }
-
- /// Get the SignMask for a specific bit width.
- ///
- /// This is just a wrapper function of getSignedMinValue(), and it helps code
- /// readability when we want to get a SignMask.
- static APInt getSignMask(unsigned BitWidth) {
- return getSignedMinValue(BitWidth);
- }
-
- /// Get the all-ones value.
- ///
- /// \returns the all-ones value for an APInt of the specified bit-width.
- static APInt getAllOnesValue(unsigned numBits) {
- return APInt(numBits, WORDTYPE_MAX, true);
- }
-
- /// Get the '0' value.
- ///
- /// \returns the '0' value for an APInt of the specified bit-width.
- static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
-
/// Compute an APInt containing numBits highbits from this APInt.
///
- /// Get an APInt with the same BitWidth as this APInt, just zero mask
- /// the low bits and right shift to the least significant bit.
+ /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
+ /// bits and right shift to the least significant bit.
///
/// \returns the high "numBits" bits of this APInt.
APInt getHiBits(unsigned numBits) const;
/// Compute an APInt containing numBits lowbits from this APInt.
///
- /// Get an APInt with the same BitWidth as this APInt, just zero mask
- /// the high bits.
+ /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
+ /// bits.
///
/// \returns the low "numBits" bits of this APInt.
APInt getLoBits(unsigned numBits) const;
- /// Return an APInt with exactly one bit set in the result.
- static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
- APInt Res(numBits, 0);
- Res.setBit(BitNo);
- return Res;
- }
-
- /// Get a value with a block of bits set.
- ///
- /// Constructs an APInt value that has a contiguous range of bits set. The
- /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
- /// bits will be zero. For example, with parameters(32, 0, 16) you would get
- /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
- /// \p hiBit.
- ///
- /// \param numBits the intended bit width of the result
- /// \param loBit the index of the lowest bit set.
- /// \param hiBit the index of the highest bit set.
- ///
- /// \returns An APInt value with the requested bits set.
- static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
- assert(loBit <= hiBit && "loBit greater than hiBit");
- APInt Res(numBits, 0);
- Res.setBits(loBit, hiBit);
- return Res;
- }
-
- /// Wrap version of getBitsSet.
- /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
- /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
- /// with parameters (32, 28, 4), you would get 0xF000000F.
- /// If \p hiBit is equal to \p loBit, you would get a result with all bits
- /// set.
- static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
- unsigned hiBit) {
- APInt Res(numBits, 0);
- Res.setBitsWithWrap(loBit, hiBit);
- return Res;
- }
-
- /// Get a value with upper bits starting at loBit set.
- ///
- /// Constructs an APInt value that has a contiguous range of bits set. The
- /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
- /// bits will be zero. For example, with parameters(32, 12) you would get
- /// 0xFFFFF000.
- ///
- /// \param numBits the intended bit width of the result
- /// \param loBit the index of the lowest bit to set.
- ///
- /// \returns An APInt value with the requested bits set.
- static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
- APInt Res(numBits, 0);
- Res.setBitsFrom(loBit);
- return Res;
- }
-
- /// Get a value with high bits set
- ///
- /// Constructs an APInt value that has the top hiBitsSet bits set.
- ///
- /// \param numBits the bitwidth of the result
- /// \param hiBitsSet the number of high-order bits set in the result.
- static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
- APInt Res(numBits, 0);
- Res.setHighBits(hiBitsSet);
- return Res;
- }
-
- /// Get a value with low bits set
- ///
- /// Constructs an APInt value that has the bottom loBitsSet bits set.
- ///
- /// \param numBits the bitwidth of the result
- /// \param loBitsSet the number of low-order bits set in the result.
- static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
- APInt Res(numBits, 0);
- Res.setLowBits(loBitsSet);
- return Res;
- }
-
- /// Return a value containing V broadcasted over NewLen bits.
- static APInt getSplat(unsigned NewLen, const APInt &V);
-
/// Determine if two APInts have the same value, after zero-extending
/// one of them (if needed!) to ensure that the bit-widths match.
static bool isSameValue(const APInt &I1, const APInt &I2) {
@@ -701,12 +540,10 @@ public:
/// \name Unary Operators
/// @{
- /// Postfix increment operator.
- ///
- /// Increments *this by 1.
+ /// Postfix increment operator. Increment *this by 1.
///
/// \returns a new APInt value representing the original value of *this.
- const APInt operator++(int) {
+ APInt operator++(int) {
APInt API(*this);
++(*this);
return API;
@@ -717,12 +554,10 @@ public:
/// \returns *this incremented by one
APInt &operator++();
- /// Postfix decrement operator.
- ///
- /// Decrements *this by 1.
+ /// Postfix decrement operator. Decrement *this by 1.
///
/// \returns a new APInt value representing the original value of *this.
- const APInt operator--(int) {
+ APInt operator--(int) {
APInt API(*this);
--(*this);
return API;
@@ -733,16 +568,9 @@ public:
/// \returns *this decremented by one.
APInt &operator--();
- /// Logical negation operator.
- ///
- /// Performs logical negation operation on this APInt.
- ///
- /// \returns true if *this is zero, false otherwise.
- bool operator!() const {
- if (isSingleWord())
- return U.VAL == 0;
- return countLeadingZerosSlowCase() == BitWidth;
- }
+ /// Logical negation operation on this APInt returns true if zero, like normal
+ /// integers.
+ bool operator!() const { return isZero(); }
/// @}
/// \name Assignment Operators
@@ -752,14 +580,15 @@ public:
///
/// \returns *this after assignment of RHS.
APInt &operator=(const APInt &RHS) {
- // If the bitwidths are the same, we can avoid mucking with memory
+ // The common case (both source or dest being inline) doesn't require
+ // allocation or deallocation.
if (isSingleWord() && RHS.isSingleWord()) {
U.VAL = RHS.U.VAL;
BitWidth = RHS.BitWidth;
- return clearUnusedBits();
+ return *this;
}
- AssignSlowCase(RHS);
+ assignSlowCase(RHS);
return *this;
}
@@ -780,7 +609,6 @@ public:
BitWidth = that.BitWidth;
that.BitWidth = 0;
-
return *this;
}
@@ -812,7 +640,7 @@ public:
if (isSingleWord())
U.VAL &= RHS.U.VAL;
else
- AndAssignSlowCase(RHS);
+ andAssignSlowCase(RHS);
return *this;
}
@@ -827,7 +655,7 @@ public:
return *this;
}
U.pVal[0] &= RHS;
- memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+ memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
return *this;
}
@@ -842,7 +670,7 @@ public:
if (isSingleWord())
U.VAL |= RHS.U.VAL;
else
- OrAssignSlowCase(RHS);
+ orAssignSlowCase(RHS);
return *this;
}
@@ -871,7 +699,7 @@ public:
if (isSingleWord())
U.VAL ^= RHS.U.VAL;
else
- XorAssignSlowCase(RHS);
+ xorAssignSlowCase(RHS);
return *this;
}
@@ -1057,6 +885,17 @@ public:
/// Rotate right by rotateAmt.
APInt rotr(const APInt &rotateAmt) const;
+ /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
+ /// equivalent to:
+ /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
+ APInt concat(const APInt &NewLSB) const {
+ /// If the result will be small, then both the merged values are small.
+ unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
+ if (NewWidth <= APINT_BITS_PER_WORD)
+ return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
+ return concatSlowCase(NewLSB);
+ }
+
/// Unsigned division operation.
///
/// Perform an unsigned divide operation on this APInt by RHS. Both this and
@@ -1151,7 +990,7 @@ public:
assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
if (isSingleWord())
return U.VAL == RHS.U.VAL;
- return EqualSlowCase(RHS);
+ return equalSlowCase(RHS);
}
/// Equality operator.
@@ -1436,8 +1275,6 @@ public:
clearUnusedBits();
}
- /// Set a given bit to 1.
- ///
/// Set the given bit to 1 whose position is given as "bitPosition".
void setBit(unsigned BitPosition) {
assert(BitPosition < BitWidth && "BitPosition out of range");
@@ -1449,9 +1286,7 @@ public:
}
/// Set the sign bit to 1.
- void setSignBit() {
- setBit(BitWidth - 1);
- }
+ void setSignBit() { setBit(BitWidth - 1); }
/// Set a given bit to a given value.
void setBitVal(unsigned BitPosition, bool BitValue) {
@@ -1497,14 +1332,10 @@ public:
}
/// Set the top bits starting from loBit.
- void setBitsFrom(unsigned loBit) {
- return setBits(loBit, BitWidth);
- }
+ void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
/// Set the bottom loBits bits.
- void setLowBits(unsigned loBits) {
- return setBits(0, loBits);
- }
+ void setLowBits(unsigned loBits) { return setBits(0, loBits); }
/// Set the top hiBits bits.
void setHighBits(unsigned hiBits) {
@@ -1539,9 +1370,7 @@ public:
}
/// Set the sign bit to 0.
- void clearSignBit() {
- clearBit(BitWidth - 1);
- }
+ void clearSignBit() { clearBit(BitWidth - 1); }
/// Toggle every bit to its opposite value.
void flipAllBits() {
@@ -1629,8 +1458,10 @@ public:
/// uint64_t. The bitwidth must be <= 64 or the value must fit within a
/// uint64_t. Otherwise an assertion will result.
uint64_t getZExtValue() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ assert(BitWidth && "zero width values not allowed");
return U.VAL;
+ }
assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
return U.pVal[0];
}
@@ -1678,8 +1509,11 @@ public:
/// \returns 0 if the high order bit is not set, otherwise returns the number
/// of 1 bits from the most significant to the least
unsigned countLeadingOnes() const {
- if (isSingleWord())
+ if (isSingleWord()) {
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ return 0;
return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
+ }
return countLeadingOnesSlowCase();
}
@@ -1774,9 +1608,7 @@ public:
/// The conversion does not do a translation from integer to double, it just
/// re-interprets the bits as a double. Note that it is valid to do this on
/// any bit width. Exactly 64 bits will be translated.
- double bitsToDouble() const {
- return BitsToDouble(getWord(0));
- }
+ double bitsToDouble() const { return BitsToDouble(getWord(0)); }
/// Converts APInt bits to a float
///
@@ -1808,7 +1640,7 @@ public:
/// @{
/// \returns the floor log base 2 of this APInt.
- unsigned logBase2() const { return getActiveBits() - 1; }
+ unsigned logBase2() const { return getActiveBits() - 1; }
/// \returns the ceil log base 2 of this APInt.
unsigned ceilLogBase2() const {
@@ -1826,25 +1658,7 @@ public:
///
/// to get around any mathematical concerns resulting from
/// referencing 2 in a space where 2 does no exist.
- unsigned nearestLogBase2() const {
- // Special case when we have a bitwidth of 1. If VAL is 1, then we
- // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
- // UINT32_MAX.
- if (BitWidth == 1)
- return U.VAL - 1;
-
- // Handle the zero case.
- if (isNullValue())
- return UINT32_MAX;
-
- // The non-zero case is handled by computing:
- //
- // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
- //
- // where x[i] is referring to the value of the ith bit of x.
- unsigned lg = logBase2();
- return lg + unsigned((*this)[lg - 1]);
- }
+ unsigned nearestLogBase2() const;
/// \returns the log base 2 of this APInt if its an exact power of two, -1
/// otherwise
@@ -1854,12 +1668,12 @@ public:
return logBase2();
}
- /// Compute the square root
+ /// Compute the square root.
APInt sqrt() const;
- /// Get the absolute value;
- ///
- /// If *this is < 0 then return -(*this), otherwise *this;
+ /// Get the absolute value. If *this is < 0 then return -(*this), otherwise
+ /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit
+ /// wide APInt) is unchanged due to how negation works.
APInt abs() const {
if (isNegative())
return -(*this);
@@ -1870,18 +1684,6 @@ public:
APInt multiplicativeInverse(const APInt &modulo) const;
/// @}
- /// \name Support for division by constant
- /// @{
-
- /// Calculate the magic number for signed division by a constant.
- struct ms;
- ms magic() const;
-
- /// Calculate the magic number for unsigned division by a constant.
- struct mu;
- mu magicu(unsigned LeadingZeros = 0) const;
-
- /// @}
/// \name Building-block Operations for APInt and APFloat
/// @{
@@ -1908,9 +1710,8 @@ public:
/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
/// significant bit of DST. All high bits above srcBITS in DST are
/// zero-filled.
- static void tcExtract(WordType *, unsigned dstCount,
- const WordType *, unsigned srcBits,
- unsigned srcLSB);
+ static void tcExtract(WordType *, unsigned dstCount, const WordType *,
+ unsigned srcBits, unsigned srcLSB);
/// Set the given bit of a bignum. Zero-based.
static void tcSetBit(WordType *, unsigned bit);
@@ -1927,14 +1728,13 @@ public:
static void tcNegate(WordType *, unsigned);
/// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
- static WordType tcAdd(WordType *, const WordType *,
- WordType carry, unsigned);
+ static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
/// DST += RHS. Returns the carry flag.
static WordType tcAddPart(WordType *, WordType, unsigned);
/// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
- static WordType tcSubtract(WordType *, const WordType *,
- WordType carry, unsigned);
+ static WordType tcSubtract(WordType *, const WordType *, WordType carry,
+ unsigned);
/// DST -= RHS. Returns the carry flag.
static WordType tcSubtractPart(WordType *, WordType, unsigned);
@@ -1950,8 +1750,7 @@ public:
/// otherwise overflow occurred and return one.
static int tcMultiplyPart(WordType *dst, const WordType *src,
WordType multiplier, WordType carry,
- unsigned srcParts, unsigned dstParts,
- bool add);
+ unsigned srcParts, unsigned dstParts, bool add);
/// DST = LHS * RHS, where DST has the same width as the operands and is
/// filled with the least significant parts of the result. Returns one if
@@ -1962,8 +1761,8 @@ public:
/// DST = LHS * RHS, where DST has width the sum of the widths of the
/// operands. No overflow occurs. DST must be disjoint from both operands.
- static void tcFullMultiply(WordType *, const WordType *,
- const WordType *, unsigned, unsigned);
+ static void tcFullMultiply(WordType *, const WordType *, const WordType *,
+ unsigned, unsigned);
/// If RHS is zero LHS and REMAINDER are left unchanged, return one.
/// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
@@ -1974,9 +1773,8 @@ public:
/// SCRATCH is a bignum of the same size as the operands and result for use by
/// the routine; its contents need not be initialized and are destroyed. LHS,
/// REMAINDER and SCRATCH must be distinct.
- static int tcDivide(WordType *lhs, const WordType *rhs,
- WordType *remainder, WordType *scratch,
- unsigned parts);
+ static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
+ WordType *scratch, unsigned parts);
/// Shift a bignum left Count bits. Shifted in bits are zero. There are no
/// restrictions on Count.
@@ -1986,12 +1784,6 @@ public:
/// restrictions on Count.
static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
- /// The obvious AND, OR and XOR and complement operations.
- static void tcAnd(WordType *, const WordType *, unsigned);
- static void tcOr(WordType *, const WordType *, unsigned);
- static void tcXor(WordType *, const WordType *, unsigned);
- static void tcComplement(WordType *, unsigned);
-
/// Comparison (unsigned) of two bignums.
static int tcCompare(const WordType *, const WordType *, unsigned);
@@ -2005,26 +1797,185 @@ public:
return tcSubtractPart(dst, 1, parts);
}
- /// Set the least significant BITS and clear the rest.
- static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
+ /// Used to insert APInt objects, or objects that contain APInt objects, into
+ /// FoldingSets.
+ void Profile(FoldingSetNodeID &id) const;
/// debug method
void dump() const;
- /// @}
-};
+ /// Returns whether this instance allocated memory.
+ bool needsCleanup() const { return !isSingleWord(); }
-/// Magic data for optimising signed division by a constant.
-struct APInt::ms {
- APInt m; ///< magic number
- unsigned s; ///< shift amount
-};
+private:
+ /// This union is used to store the integer value. When the
+ /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
+ union {
+ uint64_t VAL; ///< Used to store the <= 64 bits integer value.
+ uint64_t *pVal; ///< Used to store the >64 bits integer value.
+ } U;
+
+ unsigned BitWidth; ///< The number of bits in this APInt.
+
+ friend struct DenseMapInfo<APInt, void>;
+ friend class APSInt;
+
+ /// This constructor is used only internally for speed of construction of
+ /// temporaries. It is unsafe since it takes ownership of the pointer, so it
+ /// is not public.
+ APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
+
+ /// Determine which word a bit is in.
+ ///
+ /// \returns the word position for the specified bit position.
+ static unsigned whichWord(unsigned bitPosition) {
+ return bitPosition / APINT_BITS_PER_WORD;
+ }
+
+ /// Determine which bit in a word the specified bit position is in.
+ static unsigned whichBit(unsigned bitPosition) {
+ return bitPosition % APINT_BITS_PER_WORD;
+ }
+
+ /// Get a single bit mask.
+ ///
+ /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
+ /// This method generates and returns a uint64_t (word) mask for a single
+ /// bit at a specific bit position. This is used to mask the bit in the
+ /// corresponding word.
+ static uint64_t maskBit(unsigned bitPosition) {
+ return 1ULL << whichBit(bitPosition);
+ }
+
+ /// Clear unused high order bits
+ ///
+ /// This method is used internally to clear the top "N" bits in the high order
+ /// word that are not used by the APInt. This is needed after the most
+ /// significant word is assigned a value to ensure that those bits are
+ /// zero'd out.
+ APInt &clearUnusedBits() {
+ // Compute how many bits are used in the final word.
+ unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
+
+ // Mask out the high bits.
+ uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ mask = 0;
+
+ if (isSingleWord())
+ U.VAL &= mask;
+ else
+ U.pVal[getNumWords() - 1] &= mask;
+ return *this;
+ }
+
+ /// Get the word corresponding to a bit position
+ /// \returns the corresponding word for the specified bit position.
+ uint64_t getWord(unsigned bitPosition) const {
+ return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
+ }
+
+ /// Utility method to change the bit width of this APInt to new bit width,
+ /// allocating and/or deallocating as necessary. There is no guarantee on the
+ /// value of any bits upon return. Caller should populate the bits after.
+ void reallocate(unsigned NewBitWidth);
+
+ /// Convert a char array into an APInt
+ ///
+ /// \param radix 2, 8, 10, 16, or 36
+ /// Converts a string into a number. The string must be non-empty
+ /// and well-formed as a number of the given base. The bit-width
+ /// must be sufficient to hold the result.
+ ///
+ /// This is used by the constructors that take string arguments.
+ ///
+ /// StringRef::getAsInteger is superficially similar but (1) does
+ /// not assume that the string is well-formed and (2) grows the
+ /// result to hold the input.
+ void fromString(unsigned numBits, StringRef str, uint8_t radix);
+
+ /// An internal division function for dividing APInts.
+ ///
+ /// This is used by the toString method to divide by the radix. It simply
+ /// provides a more convenient form of divide for internal use since KnuthDiv
+ /// has specific constraints on its inputs. If those constraints are not met
+ /// then it provides a simpler form of divide.
+ static void divide(const WordType *LHS, unsigned lhsWords,
+ const WordType *RHS, unsigned rhsWords, WordType *Quotient,
+ WordType *Remainder);
+
+ /// out-of-line slow case for inline constructor
+ void initSlowCase(uint64_t val, bool isSigned);
+
+ /// shared code between two array constructors
+ void initFromArray(ArrayRef<uint64_t> array);
+
+ /// out-of-line slow case for inline copy constructor
+ void initSlowCase(const APInt &that);
+
+ /// out-of-line slow case for shl
+ void shlSlowCase(unsigned ShiftAmt);
+
+ /// out-of-line slow case for lshr.
+ void lshrSlowCase(unsigned ShiftAmt);
+
+ /// out-of-line slow case for ashr.
+ void ashrSlowCase(unsigned ShiftAmt);
+
+ /// out-of-line slow case for operator=
+ void assignSlowCase(const APInt &RHS);
+
+ /// out-of-line slow case for operator==
+ bool equalSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+ /// out-of-line slow case for countLeadingZeros
+ unsigned countLeadingZerosSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countLeadingOnes.
+ unsigned countLeadingOnesSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countTrailingZeros.
+ unsigned countTrailingZerosSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countTrailingOnes
+ unsigned countTrailingOnesSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for countPopulation
+ unsigned countPopulationSlowCase() const LLVM_READONLY;
+
+ /// out-of-line slow case for intersects.
+ bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+ /// out-of-line slow case for isSubsetOf.
+ bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY;
+
+ /// out-of-line slow case for setBits.
+ void setBitsSlowCase(unsigned loBit, unsigned hiBit);
+
+ /// out-of-line slow case for flipAllBits.
+ void flipAllBitsSlowCase();
-/// Magic data for optimising unsigned division by a constant.
-struct APInt::mu {
- APInt m; ///< magic number
- bool a; ///< add indicator
- unsigned s; ///< shift amount
+ /// out-of-line slow case for concat.
+ APInt concatSlowCase(const APInt &NewLSB) const;
+
+ /// out-of-line slow case for operator&=.
+ void andAssignSlowCase(const APInt &RHS);
+
+ /// out-of-line slow case for operator|=.
+ void orAssignSlowCase(const APInt &RHS);
+
+ /// out-of-line slow case for operator^=.
+ void xorAssignSlowCase(const APInt &RHS);
+
+ /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+ /// to, or greater than RHS.
+ int compare(const APInt &RHS) const LLVM_READONLY;
+
+ /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
+ /// to, or greater than RHS.
+ int compareSigned(const APInt &RHS) const LLVM_READONLY;
+
+ /// @}
};
inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
@@ -2161,7 +2112,6 @@ inline APInt operator*(uint64_t LHS, APInt b) {
return b;
}
-
namespace APIntOps {
/// Determine the smaller of two APInts considered to be signed.
@@ -2277,7 +2227,16 @@ Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
const APInt &B);
-} // End of APIntOps namespace
+/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
+/// by \param A to \param NewBitWidth bits.
+///
+/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
+/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
+/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
+///
+/// TODO: Do we need a mode where all bits must be set when merging down?
+APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth);
+} // namespace APIntOps
// See friend declaration above. This additional declaration is required in
// order to compile LLVM with IBM xlC compiler.
@@ -2292,7 +2251,7 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
/// Provide DenseMapInfo for APInt.
-template <> struct DenseMapInfo<APInt> {
+template <> struct DenseMapInfo<APInt, void> {
static inline APInt getEmptyKey() {
APInt V(nullptr, 0);
V.U.VAL = 0;
diff --git a/llvm/include/llvm/ADT/APSInt.h b/llvm/include/llvm/ADT/APSInt.h
index 1509d472f131..c1cf3c546070 100644
--- a/llvm/include/llvm/ADT/APSInt.h
+++ b/llvm/include/llvm/ADT/APSInt.h
@@ -58,7 +58,7 @@ public:
/// that 0 is not a positive value.
///
/// \returns true if this APSInt is positive.
- bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
+ bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
APSInt &operator=(APInt RHS) {
// Retain our current sign.
@@ -344,17 +344,17 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APSInt &I) {
}
/// Provide DenseMapInfo for APSInt, using the DenseMapInfo for APInt.
-template <> struct DenseMapInfo<APSInt> {
+template <> struct DenseMapInfo<APSInt, void> {
static inline APSInt getEmptyKey() {
- return APSInt(DenseMapInfo<APInt>::getEmptyKey());
+ return APSInt(DenseMapInfo<APInt, void>::getEmptyKey());
}
static inline APSInt getTombstoneKey() {
- return APSInt(DenseMapInfo<APInt>::getTombstoneKey());
+ return APSInt(DenseMapInfo<APInt, void>::getTombstoneKey());
}
static unsigned getHashValue(const APSInt &Key) {
- return DenseMapInfo<APInt>::getHashValue(Key);
+ return DenseMapInfo<APInt, void>::getHashValue(Key);
}
static bool isEqual(const APSInt &LHS, const APSInt &RHS) {
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index 2df49223c987..61f85cfc812b 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -26,8 +26,6 @@
namespace llvm {
- template<typename T> struct DenseMapInfo;
-
/// ArrayRef - Represent a constant reference to an array (0 or more elements
/// consecutively in memory), i.e. a start pointer and a length. It allows
/// various APIs to take consecutive elements easily and conveniently.
@@ -572,7 +570,7 @@ namespace llvm {
}
// Provide DenseMapInfo for ArrayRefs.
- template <typename T> struct DenseMapInfo<ArrayRef<T>> {
+ template <typename T> struct DenseMapInfo<ArrayRef<T>, void> {
static inline ArrayRef<T> getEmptyKey() {
return ArrayRef<T>(
reinterpret_cast<const T *>(~static_cast<uintptr_t>(0)), size_t(0));
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index 31d388073633..cd1964cbdd98 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -85,7 +85,7 @@ class BitVector {
unsigned Size; // Size of bitvector in bits.
public:
- typedef unsigned size_type;
+ using size_type = unsigned;
// Encapsulation of a single bit.
class reference {
@@ -536,8 +536,8 @@ public:
[&Arg](auto const &BV) { return Arg.size() == BV; }) &&
"consistent sizes");
Out.resize(Arg.size());
- for (size_t i = 0, e = Arg.Bits.size(); i != e; ++i)
- Out.Bits[i] = f(Arg.Bits[i], Args.Bits[i]...);
+ for (size_type I = 0, E = Arg.Bits.size(); I != E; ++I)
+ Out.Bits[I] = f(Arg.Bits[I], Args.Bits[I]...);
Out.clear_unused_bits();
return Out;
}
@@ -545,16 +545,16 @@ public:
BitVector &operator|=(const BitVector &RHS) {
if (size() < RHS.size())
resize(RHS.size());
- for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i)
- Bits[i] |= RHS.Bits[i];
+ for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I)
+ Bits[I] |= RHS.Bits[I];
return *this;
}
BitVector &operator^=(const BitVector &RHS) {
if (size() < RHS.size())
resize(RHS.size());
- for (size_t i = 0, e = RHS.Bits.size(); i != e; ++i)
- Bits[i] ^= RHS.Bits[i];
+ for (size_type I = 0, E = RHS.Bits.size(); I != E; ++I)
+ Bits[I] ^= RHS.Bits[I];
return *this;
}
@@ -808,11 +808,11 @@ private:
public:
/// Return the size (in bytes) of the bit vector.
- size_t getMemorySize() const { return Bits.size() * sizeof(BitWord); }
- size_t getBitCapacity() const { return Bits.size() * BITWORD_SIZE; }
+ size_type getMemorySize() const { return Bits.size() * sizeof(BitWord); }
+ size_type getBitCapacity() const { return Bits.size() * BITWORD_SIZE; }
};
-inline size_t capacity_in_bytes(const BitVector &X) {
+inline BitVector::size_type capacity_in_bytes(const BitVector &X) {
return X.getMemorySize();
}
@@ -824,8 +824,8 @@ template <> struct DenseMapInfo<BitVector> {
return V;
}
static unsigned getHashValue(const BitVector &V) {
- return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
- std::make_pair(V.size(), V.getData()));
+ return DenseMapInfo<std::pair<BitVector::size_type, ArrayRef<uintptr_t>>>::
+ getHashValue(std::make_pair(V.size(), V.getData()));
}
static bool isEqual(const BitVector &LHS, const BitVector &RHS) {
if (LHS.isInvalid() || RHS.isInvalid())
diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h
new file mode 100644
index 000000000000..ab6afd555726
--- /dev/null
+++ b/llvm/include/llvm/ADT/CombinationGenerator.h
@@ -0,0 +1,148 @@
+//===-- llvm/ADT/CombinationGenerator.h ------------------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Combination generator.
+///
+/// Example: given input {{0, 1}, {2}, {3, 4}} it will produce the following
+/// combinations: {0, 2, 3}, {0, 2, 4}, {1, 2, 3}, {1, 2, 4}.
+///
+/// It is useful to think of input as vector-of-vectors, where the
+/// outer vector is the variable space, and inner vector is choice space.
+/// The number of choices for each variable can be different.
+///
+/// As for implementation, it is useful to think of this as a weird number,
+/// where each digit (==variable) may have different base (==number of choices).
+/// Thus modelling of 'produce next combination' is exactly analogous to the
+/// incrementing of an number - increment lowest digit (pick next choice for the
+/// variable), and if it wrapped to the beginning then increment next digit.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_COMBINATIONGENERATOR_H
+#define LLVM_ADT_COMBINATIONGENERATOR_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+
+template <typename choice_type, typename choices_storage_type,
+ int variable_smallsize>
+class CombinationGenerator {
+ template <typename T> struct WrappingIterator {
+ using value_type = T;
+
+ const ArrayRef<value_type> Range;
+ typename decltype(Range)::const_iterator Position;
+
+ // Rewind the tape, placing the position to again point at the beginning.
+ void rewind() { Position = Range.begin(); }
+
+ // Advance position forward, possibly wrapping to the beginning.
+ // Returns whether the wrap happened.
+ bool advance() {
+ ++Position;
+ bool Wrapped = Position == Range.end();
+ if (Wrapped)
+ rewind();
+ return Wrapped;
+ }
+
+ // Get the value at which we are currently pointing.
+ const value_type &operator*() const { return *Position; }
+
+ WrappingIterator(ArrayRef<value_type> Range_) : Range(Range_) {
+ assert(!Range.empty() && "The range must not be empty.");
+ rewind();
+ }
+ };
+
+ const ArrayRef<choices_storage_type> VariablesChoices;
+
+ void performGeneration(
+ const function_ref<bool(ArrayRef<choice_type>)> Callback) const {
+ SmallVector<WrappingIterator<choice_type>, variable_smallsize>
+ VariablesState;
+
+ // 'increment' of the the whole VariablesState is defined identically to the
+ // increment of a number: starting from the least significant element,
+ // increment it, and if it wrapped, then propagate that carry by also
+ // incrementing next (more significant) element.
+ auto IncrementState =
+ [](MutableArrayRef<WrappingIterator<choice_type>> VariablesState)
+ -> bool {
+ for (WrappingIterator<choice_type> &Variable :
+ llvm::reverse(VariablesState)) {
+ bool Wrapped = Variable.advance();
+ if (!Wrapped)
+ return false; // There you go, next combination is ready.
+ // We have carry - increment more significant variable next..
+ }
+ return true; // MSB variable wrapped, no more unique combinations.
+ };
+
+ // Initialize the per-variable state to refer to the possible choices for
+ // that variable.
+ VariablesState.reserve(VariablesChoices.size());
+ for (ArrayRef<choice_type> VC : VariablesChoices)
+ VariablesState.emplace_back(VC);
+
+ // Temporary buffer to store each combination before performing Callback.
+ SmallVector<choice_type, variable_smallsize> CurrentCombination;
+ CurrentCombination.resize(VariablesState.size());
+
+ while (true) {
+ // Gather the currently-selected variable choices into a vector.
+ for (auto I : llvm::zip(VariablesState, CurrentCombination))
+ std::get<1>(I) = *std::get<0>(I);
+ // And pass the new combination into callback, as intended.
+ if (/*Abort=*/Callback(CurrentCombination))
+ return;
+ // And tick the state to next combination, which will be unique.
+ if (IncrementState(VariablesState))
+ return; // All combinations produced.
+ }
+ };
+
+public:
+ CombinationGenerator(ArrayRef<choices_storage_type> VariablesChoices_)
+ : VariablesChoices(VariablesChoices_) {
+#ifndef NDEBUG
+ assert(!VariablesChoices.empty() && "There should be some variables.");
+ llvm::for_each(VariablesChoices, [](ArrayRef<choice_type> VariableChoices) {
+ assert(!VariableChoices.empty() &&
+ "There must always be some choice, at least a placeholder one.");
+ });
+#endif
+ }
+
+ // How many combinations can we produce, max?
+ // This is at most how many times the callback will be called.
+ size_t numCombinations() const {
+ size_t NumVariants = 1;
+ for (ArrayRef<choice_type> VariableChoices : VariablesChoices)
+ NumVariants *= VariableChoices.size();
+ assert(NumVariants >= 1 &&
+ "We should always end up producing at least one combination");
+ return NumVariants;
+ }
+
+ // Actually perform exhaustive combination generation.
+ // Each result will be passed into the callback.
+ void generate(const function_ref<bool(ArrayRef<choice_type>)> Callback) {
+ performGeneration(Callback);
+ }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index d276acbfa6a6..75b7371a3683 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -13,10 +13,10 @@
#ifndef LLVM_ADT_DENSEMAPINFO_H
#define LLVM_ADT_DENSEMAPINFO_H
-#include "llvm/ADT/Hashing.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
+#include <tuple>
#include <utility>
namespace llvm {
@@ -39,7 +39,12 @@ static inline unsigned combineHashValue(unsigned a, unsigned b) {
} // end namespace detail
-template<typename T>
+/// An information struct used to provide DenseMap with the various necessary
+/// components for a given value type `T`. `Enable` is an optional additional
+/// parameter that is used to support SFINAE (generally using std::enable_if_t)
+/// in derived DenseMapInfo specializations; in non-SFINAE use cases this should
+/// just be `void`.
+template<typename T, typename Enable = void>
struct DenseMapInfo {
//static inline T getEmptyKey();
//static inline T getTombstoneKey();
@@ -282,13 +287,6 @@ template <typename... Ts> struct DenseMapInfo<std::tuple<Ts...>> {
}
};
-template <> struct DenseMapInfo<hash_code> {
- static inline hash_code getEmptyKey() { return hash_code(-1); }
- static inline hash_code getTombstoneKey() { return hash_code(-2); }
- static unsigned getHashValue(hash_code val) { return val; }
- static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
-};
-
} // end namespace llvm
#endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/llvm/include/llvm/ADT/EquivalenceClasses.h b/llvm/include/llvm/ADT/EquivalenceClasses.h
index 273b00f99d5d..de6bb3bca7e3 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -30,7 +30,8 @@ namespace llvm {
///
/// This implementation is an efficient implementation that only stores one copy
/// of the element being indexed per entry in the set, and allows any arbitrary
-/// type to be indexed (as long as it can be ordered with operator<).
+/// type to be indexed (as long as it can be ordered with operator< or a
+/// comparator is provided).
///
/// Here is a simple example using integers:
///
@@ -54,7 +55,7 @@ namespace llvm {
/// 4
/// 5 1 2
///
-template <class ElemTy>
+template <class ElemTy, class Compare = std::less<ElemTy>>
class EquivalenceClasses {
/// ECValue - The EquivalenceClasses data structure is just a set of these.
/// Each of these represents a relation for a value. First it stores the
@@ -101,22 +102,40 @@ class EquivalenceClasses {
assert(RHS.isLeader() && RHS.getNext() == nullptr && "Not a singleton!");
}
- bool operator<(const ECValue &UFN) const { return Data < UFN.Data; }
-
bool isLeader() const { return (intptr_t)Next & 1; }
const ElemTy &getData() const { return Data; }
const ECValue *getNext() const {
return (ECValue*)((intptr_t)Next & ~(intptr_t)1);
}
+ };
+
+ /// A wrapper of the comparator, to be passed to the set.
+ struct ECValueComparator {
+ using is_transparent = void;
+
+ ECValueComparator() : compare(Compare()) {}
+
+ bool operator()(const ECValue &lhs, const ECValue &rhs) const {
+ return compare(lhs.Data, rhs.Data);
+ }
+
+ template <typename T>
+ bool operator()(const T &lhs, const ECValue &rhs) const {
+ return compare(lhs, rhs.Data);
+ }
+
+ template <typename T>
+ bool operator()(const ECValue &lhs, const T &rhs) const {
+ return compare(lhs.Data, rhs);
+ }
- template<typename T>
- bool operator<(const T &Val) const { return Data < Val; }
+ const Compare compare;
};
/// TheMapping - This implicitly provides a mapping from ElemTy values to the
/// ECValues, it just keeps the key as part of the value.
- std::set<ECValue> TheMapping;
+ std::set<ECValue, ECValueComparator> TheMapping;
public:
EquivalenceClasses() = default;
diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h
index e67ef7377c88..5a37417ddde5 100644
--- a/llvm/include/llvm/ADT/FunctionExtras.h
+++ b/llvm/include/llvm/ADT/FunctionExtras.h
@@ -37,6 +37,7 @@
#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/Support/MemAlloc.h"
#include "llvm/Support/type_traits.h"
+#include <cstring>
#include <memory>
#include <type_traits>
@@ -64,11 +65,16 @@ template <typename CallableT, typename ThisT>
using EnableUnlessSameType =
std::enable_if_t<!std::is_same<remove_cvref_t<CallableT>, ThisT>::value>;
template <typename CallableT, typename Ret, typename... Params>
-using EnableIfCallable =
- std::enable_if_t<std::is_void<Ret>::value ||
- std::is_convertible<decltype(std::declval<CallableT>()(
- std::declval<Params>()...)),
- Ret>::value>;
+using EnableIfCallable = std::enable_if_t<llvm::disjunction<
+ std::is_void<Ret>,
+ std::is_same<decltype(std::declval<CallableT>()(std::declval<Params>()...)),
+ Ret>,
+ std::is_same<const decltype(std::declval<CallableT>()(
+ std::declval<Params>()...)),
+ Ret>,
+ std::is_convertible<decltype(std::declval<CallableT>()(
+ std::declval<Params>()...)),
+ Ret>>::value>;
template <typename ReturnT, typename... ParamTs> class UniqueFunctionBase {
protected:
diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index e296c1c53ebd..74a87a3d8dbb 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -56,6 +56,7 @@
#include <utility>
namespace llvm {
+template <typename T, typename Enable> struct DenseMapInfo;
/// An opaque object representing a hash code.
///
@@ -677,6 +678,13 @@ hash_code hash_value(const std::basic_string<T> &arg) {
return hash_combine_range(arg.begin(), arg.end());
}
+template <> struct DenseMapInfo<hash_code, void> {
+ static inline hash_code getEmptyKey() { return hash_code(-1); }
+ static inline hash_code getTombstoneKey() { return hash_code(-2); }
+ static unsigned getHashValue(hash_code val) { return val; }
+ static bool isEqual(hash_code LHS, hash_code RHS) { return LHS == RHS; }
+};
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/ADT/ImmutableList.h b/llvm/include/llvm/ADT/ImmutableList.h
index c9ee494734e7..cf27c5a16d28 100644
--- a/llvm/include/llvm/ADT/ImmutableList.h
+++ b/llvm/include/llvm/ADT/ImmutableList.h
@@ -220,8 +220,7 @@ public:
// Partially-specialized Traits.
//===----------------------------------------------------------------------===//
-template<typename T> struct DenseMapInfo;
-template<typename T> struct DenseMapInfo<ImmutableList<T>> {
+template <typename T> struct DenseMapInfo<ImmutableList<T>, void> {
static inline ImmutableList<T> getEmptyKey() {
return reinterpret_cast<ImmutableListImpl<T>*>(-1);
}
diff --git a/llvm/include/llvm/ADT/IntervalMap.h b/llvm/include/llvm/ADT/IntervalMap.h
index 26a7ed0cd333..3c107a3622a9 100644
--- a/llvm/include/llvm/ADT/IntervalMap.h
+++ b/llvm/include/llvm/ADT/IntervalMap.h
@@ -1137,7 +1137,7 @@ public:
/// overlaps(a, b) - Return true if the intervals in this map overlap with the
/// interval [a;b].
- bool overlaps(KeyT a, KeyT b) {
+ bool overlaps(KeyT a, KeyT b) const {
assert(Traits::nonEmpty(a, b));
const_iterator I = find(a);
if (!I.valid())
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index 1de1124f4ea2..f9540999381a 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -43,6 +43,7 @@ class MapVector {
"The mapped_type of the specified Map must be an integral type");
public:
+ using key_type = KeyT;
using value_type = typename VectorType::value_type;
using size_type = typename VectorType::size_type;
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index cb8b202c48b7..393ace6b70fc 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -22,7 +22,7 @@
namespace llvm {
-template <typename T> struct DenseMapInfo;
+template <typename T, typename Enable> struct DenseMapInfo;
template <typename PointerT, unsigned IntBits, typename PtrTraits>
struct PointerIntPairInfo;
@@ -192,7 +192,7 @@ struct PointerIntPairInfo {
// Provide specialization of DenseMapInfo for PointerIntPair.
template <typename PointerTy, unsigned IntBits, typename IntType>
-struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
+struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>, void> {
using Ty = PointerIntPair<PointerTy, IntBits, IntType>;
static Ty getEmptyKey() {
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index c39691061b72..0874f67db3fe 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -17,42 +17,13 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstdint>
namespace llvm {
-template <typename T> struct PointerUnionTypeSelectorReturn {
- using Return = T;
-};
-
-/// Get a type based on whether two types are the same or not.
-///
-/// For:
-///
-/// \code
-/// using Ret = typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return;
-/// \endcode
-///
-/// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
-template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelector {
- using Return = typename PointerUnionTypeSelectorReturn<RET_NE>::Return;
-};
-
-template <typename T, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
- using Return = typename PointerUnionTypeSelectorReturn<RET_EQ>::Return;
-};
-
-template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-struct PointerUnionTypeSelectorReturn<
- PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>> {
- using Return =
- typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return;
-};
-
namespace pointer_union_detail {
/// Determine the number of bits required to store integers with values < n.
/// This is ceil(log2(n)).
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index eb001346b609..48f15b02283a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -272,20 +272,24 @@ template <typename T> auto drop_begin(T &&RangeOrContainer, size_t N = 1) {
// be applied whenever operator* is invoked on the iterator.
template <typename ItTy, typename FuncTy,
- typename FuncReturnTy =
- decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))>
+ typename ReferenceTy =
+ decltype(std::declval<FuncTy>()(*std::declval<ItTy>()))>
class mapped_iterator
: public iterator_adaptor_base<
- mapped_iterator<ItTy, FuncTy>, ItTy,
- typename std::iterator_traits<ItTy>::iterator_category,
- typename std::remove_reference<FuncReturnTy>::type> {
+ mapped_iterator<ItTy, FuncTy>, ItTy,
+ typename std::iterator_traits<ItTy>::iterator_category,
+ std::remove_reference_t<ReferenceTy>,
+ typename std::iterator_traits<ItTy>::difference_type,
+ std::remove_reference_t<ReferenceTy> *, ReferenceTy> {
public:
mapped_iterator(ItTy U, FuncTy F)
: mapped_iterator::iterator_adaptor_base(std::move(U)), F(std::move(F)) {}
ItTy getCurrent() { return this->I; }
- FuncReturnTy operator*() const { return F(*this->I); }
+ const FuncTy &getFunction() const { return F; }
+
+ ReferenceTy operator*() const { return F(*this->I); }
private:
FuncTy F;
@@ -303,6 +307,32 @@ auto map_range(ContainerTy &&C, FuncTy F) {
return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F));
}
+/// A base type of mapped iterator, that is useful for building derived
+/// iterators that do not need/want to store the map function (as in
+/// mapped_iterator). These iterators must simply provide a `mapElement` method
+/// that defines how to map a value of the iterator to the provided reference
+/// type.
+template <typename DerivedT, typename ItTy, typename ReferenceTy>
+class mapped_iterator_base
+ : public iterator_adaptor_base<
+ DerivedT, ItTy,
+ typename std::iterator_traits<ItTy>::iterator_category,
+ std::remove_reference_t<ReferenceTy>,
+ typename std::iterator_traits<ItTy>::difference_type,
+ std::remove_reference_t<ReferenceTy> *, ReferenceTy> {
+public:
+ using BaseT = mapped_iterator_base;
+
+ mapped_iterator_base(ItTy U)
+ : mapped_iterator_base::iterator_adaptor_base(std::move(U)) {}
+
+ ItTy getCurrent() { return this->I; }
+
+ ReferenceTy operator*() const {
+ return static_cast<const DerivedT &>(*this).mapElement(*this->I);
+ }
+};
+
/// Helper to determine if type T has a member called rbegin().
template <typename Ty> class has_rbegin_impl {
using yes = char[1];
@@ -371,12 +401,7 @@ class filter_iterator_base
typename std::common_type<
IterTag, typename std::iterator_traits<
WrappedIteratorT>::iterator_category>::type> {
- using BaseT = iterator_adaptor_base<
- filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>,
- WrappedIteratorT,
- typename std::common_type<
- IterTag, typename std::iterator_traits<
- WrappedIteratorT>::iterator_category>::type>;
+ using BaseT = typename filter_iterator_base::iterator_adaptor_base;
protected:
WrappedIteratorT End;
@@ -411,12 +436,10 @@ template <typename WrappedIteratorT, typename PredicateT,
typename IterTag = std::forward_iterator_tag>
class filter_iterator_impl
: public filter_iterator_base<WrappedIteratorT, PredicateT, IterTag> {
- using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT, IterTag>;
-
public:
filter_iterator_impl(WrappedIteratorT Begin, WrappedIteratorT End,
PredicateT Pred)
- : BaseT(Begin, End, Pred) {}
+ : filter_iterator_impl::filter_iterator_base(Begin, End, Pred) {}
};
/// Specialization of filter_iterator_base for bidirectional iteration.
@@ -425,8 +448,8 @@ class filter_iterator_impl<WrappedIteratorT, PredicateT,
std::bidirectional_iterator_tag>
: public filter_iterator_base<WrappedIteratorT, PredicateT,
std::bidirectional_iterator_tag> {
- using BaseT = filter_iterator_base<WrappedIteratorT, PredicateT,
- std::bidirectional_iterator_tag>;
+ using BaseT = typename filter_iterator_impl::filter_iterator_base;
+
void findPrevValid() {
while (!this->Pred(*this->I))
BaseT::operator--();
@@ -514,9 +537,7 @@ template <typename WrappedIteratorT>
class early_inc_iterator_impl
: public iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
WrappedIteratorT, std::input_iterator_tag> {
- using BaseT =
- iterator_adaptor_base<early_inc_iterator_impl<WrappedIteratorT>,
- WrappedIteratorT, std::input_iterator_tag>;
+ using BaseT = typename early_inc_iterator_impl::iterator_adaptor_base;
using PointerT = typename std::iterator_traits<WrappedIteratorT>::pointer;
@@ -630,12 +651,18 @@ protected:
return std::tuple<Iters...>(std::prev(std::get<Ns>(iterators))...);
}
+ template <size_t... Ns>
+ bool test_all_equals(const zip_common &other,
+ std::index_sequence<Ns...>) const {
+ return all_of(std::initializer_list<bool>{std::get<Ns>(this->iterators) ==
+ std::get<Ns>(other.iterators)...},
+ identity<bool>{});
+ }
+
public:
zip_common(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {}
- value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
-
- const value_type operator*() const {
+ value_type operator*() const {
return deref(std::index_sequence_for<Iters...>{});
}
@@ -650,6 +677,11 @@ public:
iterators = tup_dec(std::index_sequence_for<Iters...>{});
return *reinterpret_cast<ZipType *>(this);
}
+
+ /// Return true if all the iterator are matching `other`'s iterators.
+ bool all_equals(zip_common &other) {
+ return test_all_equals(other, std::index_sequence_for<Iters...>{});
+ }
};
template <typename... Iters>
@@ -801,8 +833,6 @@ public:
: iterators(std::forward<Iters>(ts.first)...),
end_iterators(std::forward<Iters>(ts.second)...) {}
- value_type operator*() { return deref(std::index_sequence_for<Iters...>{}); }
-
value_type operator*() const {
return deref(std::index_sequence_for<Iters...>{});
}
@@ -1073,8 +1103,7 @@ template <typename DerivedT, typename BaseT, typename T,
typename PointerT = T *, typename ReferenceT = T &>
class indexed_accessor_range_base {
public:
- using RangeBaseT =
- indexed_accessor_range_base<DerivedT, BaseT, T, PointerT, ReferenceT>;
+ using RangeBaseT = indexed_accessor_range_base;
/// An iterator element of this range.
class iterator : public indexed_accessor_iterator<iterator, BaseT, T,
@@ -1087,8 +1116,7 @@ public:
private:
iterator(BaseT owner, ptrdiff_t curIndex)
- : indexed_accessor_iterator<iterator, BaseT, T, PointerT, ReferenceT>(
- owner, curIndex) {}
+ : iterator::indexed_accessor_iterator(owner, curIndex) {}
/// Allow access to the constructor.
friend indexed_accessor_range_base<DerivedT, BaseT, T, PointerT,
@@ -1234,20 +1262,39 @@ public:
}
};
+namespace detail {
+/// Return a reference to the first or second member of a reference. Otherwise,
+/// return a copy of the member of a temporary.
+///
+/// When passing a range whose iterators return values instead of references,
+/// the reference must be dropped from `decltype((elt.first))`, which will
+/// always be a reference, to avoid returning a reference to a temporary.
+template <typename EltTy, typename FirstTy> class first_or_second_type {
+public:
+ using type =
+ typename std::conditional_t<std::is_reference<EltTy>::value, FirstTy,
+ std::remove_reference_t<FirstTy>>;
+};
+} // end namespace detail
+
/// Given a container of pairs, return a range over the first elements.
template <typename ContainerTy> auto make_first_range(ContainerTy &&c) {
- return llvm::map_range(
- std::forward<ContainerTy>(c),
- [](decltype((*std::begin(c))) elt) -> decltype((elt.first)) {
- return elt.first;
- });
+ using EltTy = decltype((*std::begin(c)));
+ return llvm::map_range(std::forward<ContainerTy>(c),
+ [](EltTy elt) -> typename detail::first_or_second_type<
+ EltTy, decltype((elt.first))>::type {
+ return elt.first;
+ });
}
/// Given a container of pairs, return a range over the second elements.
template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
+ using EltTy = decltype((*std::begin(c)));
return llvm::map_range(
std::forward<ContainerTy>(c),
- [](decltype((*std::begin(c))) elt) -> decltype((elt.second)) {
+ [](EltTy elt) ->
+ typename detail::first_or_second_type<EltTy,
+ decltype((elt.second))>::type {
return elt.second;
});
}
@@ -1260,7 +1307,7 @@ template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
/// compares less than the first component of another std::pair.
struct less_first {
template <typename T> bool operator()(const T &lhs, const T &rhs) const {
- return lhs.first < rhs.first;
+ return std::less<>()(lhs.first, rhs.first);
}
};
@@ -1268,7 +1315,7 @@ struct less_first {
/// compares less than the second component of another std::pair.
struct less_second {
template <typename T> bool operator()(const T &lhs, const T &rhs) const {
- return lhs.second < rhs.second;
+ return std::less<>()(lhs.second, rhs.second);
}
};
@@ -1877,8 +1924,7 @@ template <typename R> struct result_pair {
}
std::size_t index() const { return Index; }
- const value_reference value() const { return *Iter; }
- value_reference value() { return *Iter; }
+ value_reference value() const { return *Iter; }
private:
std::size_t Index = std::numeric_limits<std::size_t>::max();
@@ -1887,11 +1933,8 @@ private:
template <typename R>
class enumerator_iter
- : public iterator_facade_base<
- enumerator_iter<R>, std::forward_iterator_tag, result_pair<R>,
- typename std::iterator_traits<IterOfRange<R>>::difference_type,
- typename std::iterator_traits<IterOfRange<R>>::pointer,
- typename std::iterator_traits<IterOfRange<R>>::reference> {
+ : public iterator_facade_base<enumerator_iter<R>, std::forward_iterator_tag,
+ const result_pair<R>> {
using result_type = result_pair<R>;
public:
@@ -1901,7 +1944,6 @@ public:
enumerator_iter(std::size_t Index, IterOfRange<R> Iter)
: Result(Index, Iter) {}
- result_type &operator*() { return Result; }
const result_type &operator*() const { return Result; }
enumerator_iter &operator++() {
@@ -1986,6 +2028,45 @@ decltype(auto) apply_tuple(F &&f, Tuple &&t) {
Indices{});
}
+namespace detail {
+
+template <typename Predicate, typename... Args>
+bool all_of_zip_predicate_first(Predicate &&P, Args &&...args) {
+ auto z = zip(args...);
+ auto it = z.begin();
+ auto end = z.end();
+ while (it != end) {
+ if (!apply_tuple([&](auto &&...args) { return P(args...); }, *it))
+ return false;
+ ++it;
+ }
+ return it.all_equals(end);
+}
+
+// Just an adaptor to switch the order of argument and have the predicate before
+// the zipped inputs.
+template <typename... ArgsThenPredicate, size_t... InputIndexes>
+bool all_of_zip_predicate_last(
+ std::tuple<ArgsThenPredicate...> argsThenPredicate,
+ std::index_sequence<InputIndexes...>) {
+ auto constexpr OutputIndex =
+ std::tuple_size<decltype(argsThenPredicate)>::value - 1;
+ return all_of_zip_predicate_first(std::get<OutputIndex>(argsThenPredicate),
+ std::get<InputIndexes>(argsThenPredicate)...);
+}
+
+} // end namespace detail
+
+/// Compare two zipped ranges using the provided predicate (as last argument).
+/// Return true if all elements satisfy the predicate and false otherwise.
+// Return false if the zipped iterator aren't all at end (size mismatch).
+template <typename... ArgsAndPredicate>
+bool all_of_zip(ArgsAndPredicate &&...argsAndPredicate) {
+ return detail::all_of_zip_predicate_last(
+ std::forward_as_tuple(argsAndPredicate...),
+ std::make_index_sequence<sizeof...(argsAndPredicate) - 1>{});
+}
+
/// Return true if the sequence [Begin, End) has exactly N items. Runs in O(N)
/// time. Not meant for use with random-access iterators.
/// Can optionally take a predicate to filter lazily some items.
diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h
index 3e4bf0932222..fdbf397984d0 100644
--- a/llvm/include/llvm/ADT/Sequence.h
+++ b/llvm/include/llvm/ADT/Sequence.h
@@ -6,9 +6,74 @@
//
//===----------------------------------------------------------------------===//
/// \file
-/// This routine provides some synthesis utilities to produce sequences of
-/// values. The names are intentionally kept very short as they tend to occur
-/// in common and widely used contexts.
+/// Provides some synthesis utilities to produce sequences of values. The names
+/// are intentionally kept very short as they tend to occur in common and
+/// widely used contexts.
+///
+/// The `seq(A, B)` function produces a sequence of values from `A` to up to
+/// (but not including) `B`, i.e., [`A`, `B`), that can be safely iterated over.
+/// `seq` supports both integral (e.g., `int`, `char`, `uint32_t`) and enum
+/// types. `seq_inclusive(A, B)` produces a sequence of values from `A` to `B`,
+/// including `B`.
+///
+/// Examples with integral types:
+/// ```
+/// for (int x : seq(0, 3))
+/// outs() << x << " ";
+/// ```
+///
+/// Prints: `0 1 2 `.
+///
+/// ```
+/// for (int x : seq_inclusive(0, 3))
+/// outs() << x << " ";
+/// ```
+///
+/// Prints: `0 1 2 3 `.
+///
+/// Similar to `seq` and `seq_inclusive`, the `enum_seq` and
+/// `enum_seq_inclusive` functions produce sequences of enum values that can be
+/// iterated over.
+/// To enable iteration with enum types, you need to either mark enums as safe
+/// to iterate on by specializing `enum_iteration_traits`, or opt into
+/// potentially unsafe iteration at every callsite by passing
+/// `force_iteration_on_noniterable_enum`.
+///
+/// Examples with enum types:
+/// ```
+/// namespace X {
+/// enum class MyEnum : unsigned {A = 0, B, C};
+/// } // namespace X
+///
+/// template <> struct enum_iteration_traits<X::MyEnum> {
+/// static contexpr bool is_iterable = true;
+/// };
+///
+/// class MyClass {
+/// public:
+/// enum Safe { D = 3, E, F };
+/// enum MaybeUnsafe { G = 1, H = 2, I = 4 };
+/// };
+///
+/// template <> struct enum_iteration_traits<MyClass::Safe> {
+/// static contexpr bool is_iterable = true;
+/// };
+/// ```
+///
+/// ```
+/// for (auto v : enum_seq(MyClass::Safe::D, MyClass::Safe::F))
+/// outs() << int(v) << " ";
+/// ```
+///
+/// Prints: `3 4 `.
+///
+/// ```
+/// for (auto v : enum_seq(MyClass::MaybeUnsafe::H, MyClass::MaybeUnsafe::I,
+/// force_iteration_on_noniterable_enum))
+/// outs() << int(v) << " ";
+/// ```
+///
+/// Prints: `2 3 `.
///
//===----------------------------------------------------------------------===//
@@ -18,12 +83,31 @@
#include <cassert> // assert
#include <iterator> // std::random_access_iterator_tag
#include <limits> // std::numeric_limits
-#include <type_traits> // std::underlying_type, std::is_enum
+#include <type_traits> // std::is_integral, std::is_enum, std::underlying_type,
+ // std::enable_if
#include "llvm/Support/MathExtras.h" // AddOverflow / SubOverflow
namespace llvm {
+// Enum traits that marks enums as safe or unsafe to iterate over.
+// By default, enum types are *not* considered safe for iteration.
+// To allow iteration for your enum type, provide a specialization with
+// `is_iterable` set to `true` in the `llvm` namespace.
+// Alternatively, you can pass the `force_iteration_on_noniterable_enum` tag
+// to `enum_seq` or `enum_seq_inclusive`.
+template <typename EnumT> struct enum_iteration_traits {
+ static constexpr bool is_iterable = false;
+};
+
+struct force_iteration_on_noniterable_enum_t {
+ explicit force_iteration_on_noniterable_enum_t() = default;
+};
+
+// TODO: Make this `inline` once we update to C++17 to avoid ORD violations.
+constexpr force_iteration_on_noniterable_enum_t
+ force_iteration_on_noniterable_enum;
+
namespace detail {
// Returns whether a value of type U can be represented with type T.
@@ -213,27 +297,81 @@ private:
iterator PastEndValue;
};
-/// Iterate over an integral/enum type from Begin up to - but not including -
-/// End.
-/// Note on enum iteration: `seq` will generate each consecutive value, even if
-/// no enumerator with that value exists.
+/// Iterate over an integral type from Begin up to - but not including - End.
/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
/// iteration).
-template <typename T> auto seq(T Begin, T End) {
+template <typename T, typename = std::enable_if_t<std::is_integral<T>::value &&
+ !std::is_enum<T>::value>>
+auto seq(T Begin, T End) {
return iota_range<T>(Begin, End, false);
}
-/// Iterate over an integral/enum type from Begin to End inclusive.
-/// Note on enum iteration: `seq_inclusive` will generate each consecutive
-/// value, even if no enumerator with that value exists.
+/// Iterate over an integral type from Begin to End inclusive.
/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
/// iteration).
-template <typename T> auto seq_inclusive(T Begin, T End) {
+template <typename T, typename = std::enable_if_t<std::is_integral<T>::value &&
+ !std::is_enum<T>::value>>
+auto seq_inclusive(T Begin, T End) {
return iota_range<T>(Begin, End, true);
}
+/// Iterate over an enum type from Begin up to - but not including - End.
+/// Note: `enum_seq` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
+/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq(EnumT Begin, EnumT End) {
+ static_assert(enum_iteration_traits<EnumT>::is_iterable,
+ "Enum type is not marked as iterable.");
+ return iota_range<EnumT>(Begin, End, false);
+}
+
+/// Iterate over an enum type from Begin up to - but not including - End, even
+/// when `EnumT` is not marked as safely iterable by `enum_iteration_traits`.
+/// Note: `enum_seq` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX] for
+/// forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq(EnumT Begin, EnumT End, force_iteration_on_noniterable_enum_t) {
+ return iota_range<EnumT>(Begin, End, false);
+}
+
+/// Iterate over an enum type from Begin to End inclusive.
+/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
+/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq_inclusive(EnumT Begin, EnumT End) {
+ static_assert(enum_iteration_traits<EnumT>::is_iterable,
+ "Enum type is not marked as iterable.");
+ return iota_range<EnumT>(Begin, End, true);
+}
+
+/// Iterate over an enum type from Begin to End inclusive, even when `EnumT`
+/// is not marked as safely iterable by `enum_iteration_traits`.
+/// Note: `enum_seq_inclusive` will generate each consecutive value, even if no
+/// enumerator with that value exists.
+/// Note: Begin and End values have to be within [INTMAX_MIN, INTMAX_MAX - 1]
+/// for forward iteration (resp. [INTMAX_MIN + 1, INTMAX_MAX - 1] for reverse
+/// iteration).
+template <typename EnumT,
+ typename = std::enable_if_t<std::is_enum<EnumT>::value>>
+auto enum_seq_inclusive(EnumT Begin, EnumT End,
+ force_iteration_on_noniterable_enum_t) {
+ return iota_range<EnumT>(Begin, End, true);
+}
+
} // end namespace llvm
#endif // LLVM_ADT_SEQUENCE_H
diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h
index 62f1d26dc1c2..3e30b6bb83d3 100644
--- a/llvm/include/llvm/ADT/SetOperations.h
+++ b/llvm/include/llvm/ADT/SetOperations.h
@@ -77,15 +77,6 @@ bool set_is_subset(const S1Ty &S1, const S2Ty &S2) {
return true;
}
-/// set_is_strict_subset(A, B) - Return true iff A in B and and A != B
-///
-template <class S1Ty, class S2Ty>
-bool set_is_strict_subset(const S1Ty &S1, const S2Ty &S2) {
- if (S1.size() >= S2.size())
- return false;
- return set_is_subset(S1, S2);
-}
-
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h
index f570bac23ad5..51ee5dbbce05 100644
--- a/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/llvm/include/llvm/ADT/SmallBitVector.h
@@ -60,7 +60,7 @@ class SmallBitVector {
"Unsupported word size");
public:
- using size_type = unsigned;
+ using size_type = uintptr_t;
// Encapsulation of a single bit.
class reference {
@@ -96,7 +96,7 @@ private:
return reinterpret_cast<BitVector *>(X);
}
- void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) {
+ void switchToSmall(uintptr_t NewSmallBits, size_type NewSize) {
X = 1;
setSmallSize(NewSize);
setSmallBits(NewSmallBits);
@@ -120,9 +120,11 @@ private:
}
// Return the size.
- size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; }
+ size_type getSmallSize() const {
+ return getSmallRawBits() >> SmallNumDataBits;
+ }
- void setSmallSize(size_t Size) {
+ void setSmallSize(size_type Size) {
setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits));
}
@@ -189,7 +191,7 @@ public:
}
/// Returns the number of bits in this bitvector.
- size_t size() const {
+ size_type size() const {
return isSmall() ? getSmallSize() : getPointer()->size();
}
@@ -336,8 +338,8 @@ public:
} else {
BitVector *BV = new BitVector(N, t);
uintptr_t OldBits = getSmallBits();
- for (size_t i = 0, e = getSmallSize(); i != e; ++i)
- (*BV)[i] = (OldBits >> i) & 1;
+ for (size_type I = 0, E = getSmallSize(); I != E; ++I)
+ (*BV)[I] = (OldBits >> I) & 1;
switchToLarge(BV);
}
}
@@ -346,11 +348,11 @@ public:
if (isSmall()) {
if (N > SmallNumDataBits) {
uintptr_t OldBits = getSmallRawBits();
- size_t SmallSize = getSmallSize();
+ size_type SmallSize = getSmallSize();
BitVector *BV = new BitVector(SmallSize);
- for (size_t i = 0; i < SmallSize; ++i)
- if ((OldBits >> i) & 1)
- BV->set(i);
+ for (size_type I = 0; I < SmallSize; ++I)
+ if ((OldBits >> I) & 1)
+ BV->set(I);
BV->reserve(N);
switchToLarge(BV);
}
@@ -491,8 +493,8 @@ public:
else if (!isSmall() && !RHS.isSmall())
return *getPointer() == *RHS.getPointer();
else {
- for (size_t i = 0, e = size(); i != e; ++i) {
- if ((*this)[i] != RHS[i])
+ for (size_type I = 0, E = size(); I != E; ++I) {
+ if ((*this)[I] != RHS[I])
return false;
}
return true;
@@ -512,11 +514,11 @@ public:
else if (!isSmall() && !RHS.isSmall())
getPointer()->operator&=(*RHS.getPointer());
else {
- size_t i, e;
- for (i = 0, e = std::min(size(), RHS.size()); i != e; ++i)
- (*this)[i] = test(i) && RHS.test(i);
- for (e = size(); i != e; ++i)
- reset(i);
+ size_type I, E;
+ for (I = 0, E = std::min(size(), RHS.size()); I != E; ++I)
+ (*this)[I] = test(I) && RHS.test(I);
+ for (E = size(); I != E; ++I)
+ reset(I);
}
return *this;
}
@@ -561,8 +563,8 @@ public:
else if (!isSmall() && !RHS.isSmall())
getPointer()->operator|=(*RHS.getPointer());
else {
- for (size_t i = 0, e = RHS.size(); i != e; ++i)
- (*this)[i] = test(i) || RHS.test(i);
+ for (size_type I = 0, E = RHS.size(); I != E; ++I)
+ (*this)[I] = test(I) || RHS.test(I);
}
return *this;
}
@@ -574,8 +576,8 @@ public:
else if (!isSmall() && !RHS.isSmall())
getPointer()->operator^=(*RHS.getPointer());
else {
- for (size_t i = 0, e = RHS.size(); i != e; ++i)
- (*this)[i] = test(i) != RHS.test(i);
+ for (size_type I = 0, E = RHS.size(); I != E; ++I)
+ (*this)[I] = test(I) != RHS.test(I);
}
return *this;
}
@@ -721,8 +723,9 @@ template <> struct DenseMapInfo<SmallBitVector> {
}
static unsigned getHashValue(const SmallBitVector &V) {
uintptr_t Store;
- return DenseMapInfo<std::pair<unsigned, ArrayRef<uintptr_t>>>::getHashValue(
- std::make_pair(V.size(), V.getData(Store)));
+ return DenseMapInfo<
+ std::pair<SmallBitVector::size_type, ArrayRef<uintptr_t>>>::
+ getHashValue(std::make_pair(V.size(), V.getData(Store)));
}
static bool isEqual(const SmallBitVector &LHS, const SmallBitVector &RHS) {
if (LHS.isInvalid() || RHS.isInvalid())
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index b8a11030fc33..0d13524f25ce 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -1239,13 +1239,22 @@ inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
return X.capacity_in_bytes();
}
+template <typename RangeType>
+using ValueTypeFromRangeType =
+ typename std::remove_const<typename std::remove_reference<
+ decltype(*std::begin(std::declval<RangeType &>()))>::type>::type;
+
/// Given a range of type R, iterate the entire range and return a
/// SmallVector with elements of the vector. This is useful, for example,
/// when you want to iterate a range and then sort the results.
template <unsigned Size, typename R>
-SmallVector<typename std::remove_const<typename std::remove_reference<
- decltype(*std::begin(std::declval<R &>()))>::type>::type,
- Size>
+SmallVector<ValueTypeFromRangeType<R>, Size> to_vector(R &&Range) {
+ return {std::begin(Range), std::end(Range)};
+}
+template <typename R>
+SmallVector<ValueTypeFromRangeType<R>,
+ CalculateSmallVectorDefaultInlinedElements<
+ ValueTypeFromRangeType<R>>::value>
to_vector(R &&Range) {
return {std::begin(Range), std::end(Range)};
}
diff --git a/llvm/include/llvm/ADT/StringExtras.h b/llvm/include/llvm/ADT/StringExtras.h
index 6bda25b85313..2ca672e7855b 100644
--- a/llvm/include/llvm/ADT/StringExtras.h
+++ b/llvm/include/llvm/ADT/StringExtras.h
@@ -67,22 +67,27 @@ inline ArrayRef<uint8_t> arrayRefFromStringRef(StringRef Input) {
///
/// If \p C is not a valid hex digit, -1U is returned.
inline unsigned hexDigitValue(char C) {
- struct HexTable {
- unsigned LUT[255] = {};
- constexpr HexTable() {
- // Default initialize everything to invalid.
- for (int i = 0; i < 255; ++i)
- LUT[i] = ~0U;
- // Initialize `0`-`9`.
- for (int i = 0; i < 10; ++i)
- LUT['0' + i] = i;
- // Initialize `A`-`F` and `a`-`f`.
- for (int i = 0; i < 6; ++i)
- LUT['A' + i] = LUT['a' + i] = 10 + i;
- }
+ /* clang-format off */
+ static const int16_t LUT[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // '0'..'9'
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'A'..'F'
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 'a'..'f'
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};
- constexpr HexTable Table;
- return Table.LUT[static_cast<unsigned char>(C)];
+ /* clang-format on */
+ return LUT[static_cast<unsigned char>(C)];
}
/// Checks if character \p C is one of the 10 decimal digits.
@@ -210,24 +215,31 @@ inline bool tryGetFromHex(StringRef Input, std::string &Output) {
if (Input.empty())
return true;
- Output.reserve((Input.size() + 1) / 2);
+ // If the input string is not properly aligned on 2 nibbles we pad out the
+ // front with a 0 prefix; e.g. `ABC` -> `0ABC`.
+ Output.resize((Input.size() + 1) / 2);
+ char *OutputPtr = const_cast<char *>(Output.data());
if (Input.size() % 2 == 1) {
uint8_t Hex = 0;
if (!tryGetHexFromNibbles('0', Input.front(), Hex))
return false;
-
- Output.push_back(Hex);
+ *OutputPtr++ = Hex;
Input = Input.drop_front();
}
- assert(Input.size() % 2 == 0);
- while (!Input.empty()) {
+ // Convert the nibble pairs (e.g. `9C`) into bytes (0x9C).
+ // With the padding above we know the input is aligned and the output expects
+ // exactly half as many bytes as nibbles in the input.
+ size_t InputSize = Input.size();
+ assert(InputSize % 2 == 0);
+ const char *InputPtr = Input.data();
+ for (size_t OutputIndex = 0; OutputIndex < InputSize / 2; ++OutputIndex) {
uint8_t Hex = 0;
- if (!tryGetHexFromNibbles(Input[0], Input[1], Hex))
+ if (!tryGetHexFromNibbles(InputPtr[OutputIndex * 2 + 0], // MSB
+ InputPtr[OutputIndex * 2 + 1], // LSB
+ Hex))
return false;
-
- Output.push_back(Hex);
- Input = Input.drop_front(2);
+ OutputPtr[OutputIndex] = Hex;
}
return true;
}
@@ -501,6 +513,83 @@ public:
}
};
+/// A forward iterator over partitions of string over a separator.
+class SplittingIterator
+ : public iterator_facade_base<SplittingIterator, std::forward_iterator_tag,
+ StringRef> {
+ char SeparatorStorage;
+ StringRef Current;
+ StringRef Next;
+ StringRef Separator;
+
+public:
+ SplittingIterator(StringRef Str, StringRef Separator)
+ : Next(Str), Separator(Separator) {
+ ++*this;
+ }
+
+ SplittingIterator(StringRef Str, char Separator)
+ : SeparatorStorage(Separator), Next(Str),
+ Separator(&SeparatorStorage, 1) {
+ ++*this;
+ }
+
+ SplittingIterator(const SplittingIterator &R)
+ : SeparatorStorage(R.SeparatorStorage), Current(R.Current), Next(R.Next),
+ Separator(R.Separator) {
+ if (R.Separator.data() == &R.SeparatorStorage)
+ Separator = StringRef(&SeparatorStorage, 1);
+ }
+
+ SplittingIterator &operator=(const SplittingIterator &R) {
+ if (this == &R)
+ return *this;
+
+ SeparatorStorage = R.SeparatorStorage;
+ Current = R.Current;
+ Next = R.Next;
+ Separator = R.Separator;
+ if (R.Separator.data() == &R.SeparatorStorage)
+ Separator = StringRef(&SeparatorStorage, 1);
+ return *this;
+ }
+
+ bool operator==(const SplittingIterator &R) const {
+ assert(Separator == R.Separator);
+ return Current.data() == R.Current.data();
+ }
+
+ const StringRef &operator*() const { return Current; }
+
+ StringRef &operator*() { return Current; }
+
+ SplittingIterator &operator++() {
+ std::tie(Current, Next) = Next.split(Separator);
+ return *this;
+ }
+};
+
+/// Split the specified string over a separator and return a range-compatible
+/// iterable over its partitions. Used to permit conveniently iterating
+/// over separated strings like so:
+///
+/// \code
+/// for (StringRef x : llvm::split("foo,bar,baz", ","))
+/// ...;
+/// \end
+///
+/// Note that the passed string must remain valid throuhgout lifetime
+/// of the iterators.
+inline iterator_range<SplittingIterator> split(StringRef Str, StringRef Separator) {
+ return {SplittingIterator(Str, Separator),
+ SplittingIterator(StringRef(), Separator)};
+}
+
+inline iterator_range<SplittingIterator> split(StringRef Str, char Separator) {
+ return {SplittingIterator(Str, Separator),
+ SplittingIterator(StringRef(), Separator)};
+}
+
} // end namespace llvm
#endif // LLVM_ADT_STRINGEXTRAS_H
diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index a82afc9a817c..669956d41e0c 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -126,9 +126,7 @@ public:
StringMap(std::initializer_list<std::pair<StringRef, ValueTy>> List)
: StringMapImpl(List.size(), static_cast<unsigned>(sizeof(MapEntryTy))) {
- for (const auto &P : List) {
- insert(P);
- }
+ insert(List);
}
StringMap(StringMap &&RHS)
@@ -297,6 +295,21 @@ public:
return try_emplace(KV.first, std::move(KV.second));
}
+ /// Inserts elements from range [first, last). If multiple elements in the
+ /// range have keys that compare equivalent, it is unspecified which element
+ /// is inserted .
+ template <typename InputIt> void insert(InputIt First, InputIt Last) {
+ for (InputIt It = First; It != Last; ++It)
+ insert(*It);
+ }
+
+ /// Inserts elements from initializer list ilist. If multiple elements in
+ /// the range have keys that compare equivalent, it is unspecified which
+ /// element is inserted
+ void insert(std::initializer_list<std::pair<StringRef, ValueTy>> List) {
+ insert(List.begin(), List.end());
+ }
+
/// Inserts an element or assigns to the current element if the key already
/// exists. The return type is the same as try_emplace.
template <typename V>
@@ -465,13 +478,7 @@ public:
explicit StringMapKeyIterator(StringMapConstIterator<ValueTy> Iter)
: base(std::move(Iter)) {}
- StringRef &operator*() {
- Key = this->wrapped()->getKey();
- return Key;
- }
-
-private:
- StringRef Key;
+ StringRef operator*() const { return this->wrapped()->getKey(); }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 17e64f7f81bb..9f4b89218042 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -35,7 +35,6 @@ namespace llvm {
class APInt;
class hash_code;
template <typename T> class SmallVectorImpl;
- template <typename T> struct DenseMapInfo;
class StringRef;
/// Helper functions for StringRef::getAsInteger.
@@ -949,7 +948,7 @@ namespace llvm {
hash_code hash_value(StringRef S);
// Provide DenseMapInfo for StringRefs.
- template <> struct DenseMapInfo<StringRef> {
+ template <> struct DenseMapInfo<StringRef, void> {
static inline StringRef getEmptyKey() {
return StringRef(
reinterpret_cast<const char *>(~static_cast<uintptr_t>(0)), 0);
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/include/llvm/ADT/Triple.h
index 76f3514050f0..2fd3047acbfd 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/include/llvm/ADT/Triple.h
@@ -93,6 +93,8 @@ public:
hsail64, // AMD HSAIL with 64-bit pointers
spir, // SPIR: standard portable IR for OpenCL 32-bit version
spir64, // SPIR: standard portable IR for OpenCL 64-bit version
+ spirv32, // SPIR-V with 32-bit pointers
+ spirv64, // SPIR-V with 64-bit pointers
kalimba, // Kalimba: generic kalimba
shave, // SHAVE: Movidius vector VLIW processors
lanai, // Lanai: Lanai 32-bit
@@ -106,6 +108,9 @@ public:
enum SubArchType {
NoSubArch,
+ ARMSubArch_v9_2a,
+ ARMSubArch_v9_1a,
+ ARMSubArch_v9,
ARMSubArch_v8_7a,
ARMSubArch_v8_6a,
ARMSubArch_v8_5a,
@@ -290,10 +295,10 @@ public:
/// @name Normalization
/// @{
- /// normalize - Turn an arbitrary machine specification into the canonical
- /// triple form (or something sensible that the Triple class understands if
- /// nothing better can reasonably be done). In particular, it handles the
- /// common case in which otherwise valid components are in the wrong order.
+ /// Turn an arbitrary machine specification into the canonical triple form (or
+ /// something sensible that the Triple class understands if nothing better can
+ /// reasonably be done). In particular, it handles the common case in which
+ /// otherwise valid components are in the wrong order.
static std::string normalize(StringRef Str);
/// Return the normalized form of this triple's string.
@@ -303,25 +308,24 @@ public:
/// @name Typed Component Access
/// @{
- /// getArch - Get the parsed architecture type of this triple.
+ /// Get the parsed architecture type of this triple.
ArchType getArch() const { return Arch; }
- /// getSubArch - get the parsed subarchitecture type for this triple.
+ /// get the parsed subarchitecture type for this triple.
SubArchType getSubArch() const { return SubArch; }
- /// getVendor - Get the parsed vendor type of this triple.
+ /// Get the parsed vendor type of this triple.
VendorType getVendor() const { return Vendor; }
- /// getOS - Get the parsed operating system type of this triple.
+ /// Get the parsed operating system type of this triple.
OSType getOS() const { return OS; }
- /// hasEnvironment - Does this triple have the optional environment
- /// (fourth) component?
+ /// Does this triple have the optional environment (fourth) component?
bool hasEnvironment() const {
return getEnvironmentName() != "";
}
- /// getEnvironment - Get the parsed environment type of this triple.
+ /// Get the parsed environment type of this triple.
EnvironmentType getEnvironment() const { return Environment; }
/// Parse the version number from the OS name component of the
@@ -333,39 +337,39 @@ public:
void getEnvironmentVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
- /// getFormat - Get the object format for this triple.
+ /// Get the object format for this triple.
ObjectFormatType getObjectFormat() const { return ObjectFormat; }
- /// getOSVersion - Parse the version number from the OS name component of the
- /// triple, if present.
+ /// Parse the version number from the OS name component of the triple, if
+ /// present.
///
/// For example, "fooos1.2.3" would return (1, 2, 3).
///
/// If an entry is not defined, it will be returned as 0.
void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const;
- /// getOSMajorVersion - Return just the major version number, this is
- /// specialized because it is a common query.
+ /// Return just the major version number, this is specialized because it is a
+ /// common query.
unsigned getOSMajorVersion() const {
unsigned Maj, Min, Micro;
getOSVersion(Maj, Min, Micro);
return Maj;
}
- /// getMacOSXVersion - Parse the version number as with getOSVersion and then
- /// translate generic "darwin" versions to the corresponding OS X versions.
- /// This may also be called with IOS triples but the OS X version number is
- /// just set to a constant 10.4.0 in that case. Returns true if successful.
+ /// Parse the version number as with getOSVersion and then translate generic
+ /// "darwin" versions to the corresponding OS X versions. This may also be
+ /// called with IOS triples but the OS X version number is just set to a
+ /// constant 10.4.0 in that case. Returns true if successful.
bool getMacOSXVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
- /// getiOSVersion - Parse the version number as with getOSVersion. This should
- /// only be called with IOS or generic triples.
+ /// Parse the version number as with getOSVersion. This should only be called
+ /// with IOS or generic triples.
void getiOSVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
- /// getWatchOSVersion - Parse the version number as with getOSVersion. This
- /// should only be called with WatchOS or generic triples.
+ /// Parse the version number as with getOSVersion. This should only be called
+ /// with WatchOS or generic triples.
void getWatchOSVersion(unsigned &Major, unsigned &Minor,
unsigned &Micro) const;
@@ -377,24 +381,24 @@ public:
const std::string &getTriple() const { return Data; }
- /// getArchName - Get the architecture (first) component of the
- /// triple.
+ /// Get the architecture (first) component of the triple.
StringRef getArchName() const;
- /// getVendorName - Get the vendor (second) component of the triple.
+ /// Get the architecture name based on Kind and SubArch.
+ StringRef getArchName(ArchType Kind, SubArchType SubArch = NoSubArch) const;
+
+ /// Get the vendor (second) component of the triple.
StringRef getVendorName() const;
- /// getOSName - Get the operating system (third) component of the
- /// triple.
+ /// Get the operating system (third) component of the triple.
StringRef getOSName() const;
- /// getEnvironmentName - Get the optional environment (fourth)
- /// component of the triple, or "" if empty.
+ /// Get the optional environment (fourth) component of the triple, or "" if
+ /// empty.
StringRef getEnvironmentName() const;
- /// getOSAndEnvironmentName - Get the operating system and optional
- /// environment components as a single string (separated by a '-'
- /// if the environment component is present).
+ /// Get the operating system and optional environment components as a single
+ /// string (separated by a '-' if the environment component is present).
StringRef getOSAndEnvironmentName() const;
/// @}
@@ -420,8 +424,8 @@ public:
/// Note that this tests for 16-bit pointer width, and nothing else.
bool isArch16Bit() const;
- /// isOSVersionLT - Helper function for doing comparisons against version
- /// numbers included in the target triple.
+ /// Helper function for doing comparisons against version numbers included in
+ /// the target triple.
bool isOSVersionLT(unsigned Major, unsigned Minor = 0,
unsigned Micro = 0) const {
unsigned LHS[3];
@@ -443,14 +447,13 @@ public:
return isOSVersionLT(RHS[0], RHS[1], RHS[2]);
}
- /// isMacOSXVersionLT - Comparison function for checking OS X version
- /// compatibility, which handles supporting skewed version numbering schemes
- /// used by the "darwin" triples.
+ /// Comparison function for checking OS X version compatibility, which handles
+ /// supporting skewed version numbering schemes used by the "darwin" triples.
bool isMacOSXVersionLT(unsigned Major, unsigned Minor = 0,
unsigned Micro = 0) const;
- /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both
- /// "darwin" and "osx" as OS X triples.
+ /// Is this a Mac OS X triple. For legacy reasons, we support both "darwin"
+ /// and "osx" as OS X triples.
bool isMacOSX() const {
return getOS() == Triple::Darwin || getOS() == Triple::MacOSX;
}
@@ -480,7 +483,7 @@ public:
bool isOSzOS() const { return getOS() == Triple::ZOS; }
- /// isOSDarwin - Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
+ /// Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
bool isOSDarwin() const {
return isMacOSX() || isiOS() || isWatchOS();
}
@@ -698,6 +701,11 @@ public:
return getArch() == Triple::spir || getArch() == Triple::spir64;
}
+ /// Tests whether the target is SPIR-V (32/64-bit).
+ bool isSPIRV() const {
+ return getArch() == Triple::spirv32 || getArch() == Triple::spirv64;
+ }
+
/// Tests whether the target is NVPTX (32- or 64-bit).
bool isNVPTX() const {
return getArch() == Triple::nvptx || getArch() == Triple::nvptx64;
@@ -720,6 +728,19 @@ public:
return getArch() == Triple::arm || getArch() == Triple::armeb;
}
+ /// Tests whether the target supports the EHABI exception
+ /// handling standard.
+ bool isTargetEHABICompatible() const {
+ return (isARM() || isThumb()) &&
+ (getEnvironment() == Triple::EABI ||
+ getEnvironment() == Triple::GNUEABI ||
+ getEnvironment() == Triple::MuslEABI ||
+ getEnvironment() == Triple::EABIHF ||
+ getEnvironment() == Triple::GNUEABIHF ||
+ getEnvironment() == Triple::MuslEABIHF || isAndroid()) &&
+ isOSBinFormatELF();
+ }
+
/// Tests whether the target is AArch64 (little and big endian).
bool isAArch64() const {
return getArch() == Triple::aarch64 || getArch() == Triple::aarch64_be ||
@@ -833,46 +854,38 @@ public:
/// @name Mutators
/// @{
- /// setArch - Set the architecture (first) component of the triple
- /// to a known type.
- void setArch(ArchType Kind);
+ /// Set the architecture (first) component of the triple to a known type.
+ void setArch(ArchType Kind, SubArchType SubArch = NoSubArch);
- /// setVendor - Set the vendor (second) component of the triple to a
- /// known type.
+ /// Set the vendor (second) component of the triple to a known type.
void setVendor(VendorType Kind);
- /// setOS - Set the operating system (third) component of the triple
- /// to a known type.
+ /// Set the operating system (third) component of the triple to a known type.
void setOS(OSType Kind);
- /// setEnvironment - Set the environment (fourth) component of the triple
- /// to a known type.
+ /// Set the environment (fourth) component of the triple to a known type.
void setEnvironment(EnvironmentType Kind);
- /// setObjectFormat - Set the object file format
+ /// Set the object file format.
void setObjectFormat(ObjectFormatType Kind);
- /// setTriple - Set all components to the new triple \p Str.
+ /// Set all components to the new triple \p Str.
void setTriple(const Twine &Str);
- /// setArchName - Set the architecture (first) component of the
- /// triple by name.
+ /// Set the architecture (first) component of the triple by name.
void setArchName(StringRef Str);
- /// setVendorName - Set the vendor (second) component of the triple
- /// by name.
+ /// Set the vendor (second) component of the triple by name.
void setVendorName(StringRef Str);
- /// setOSName - Set the operating system (third) component of the
- /// triple by name.
+ /// Set the operating system (third) component of the triple by name.
void setOSName(StringRef Str);
- /// setEnvironmentName - Set the optional environment (fourth)
- /// component of the triple by name.
+ /// Set the optional environment (fourth) component of the triple by name.
void setEnvironmentName(StringRef Str);
- /// setOSAndEnvironmentName - Set the operating system and optional
- /// environment components with a single string.
+ /// Set the operating system and optional environment components with a single
+ /// string.
void setOSAndEnvironmentName(StringRef Str);
/// @}
@@ -938,33 +951,30 @@ public:
/// @name Static helpers for IDs.
/// @{
- /// getArchTypeName - Get the canonical name for the \p Kind architecture.
+ /// Get the canonical name for the \p Kind architecture.
static StringRef getArchTypeName(ArchType Kind);
- /// getArchTypePrefix - Get the "prefix" canonical name for the \p Kind
- /// architecture. This is the prefix used by the architecture specific
- /// builtins, and is suitable for passing to \see
- /// Intrinsic::getIntrinsicForGCCBuiltin().
+ /// Get the "prefix" canonical name for the \p Kind architecture. This is the
+ /// prefix used by the architecture specific builtins, and is suitable for
+ /// passing to \see Intrinsic::getIntrinsicForGCCBuiltin().
///
/// \return - The architecture prefix, or 0 if none is defined.
static StringRef getArchTypePrefix(ArchType Kind);
- /// getVendorTypeName - Get the canonical name for the \p Kind vendor.
+ /// Get the canonical name for the \p Kind vendor.
static StringRef getVendorTypeName(VendorType Kind);
- /// getOSTypeName - Get the canonical name for the \p Kind operating system.
+ /// Get the canonical name for the \p Kind operating system.
static StringRef getOSTypeName(OSType Kind);
- /// getEnvironmentTypeName - Get the canonical name for the \p Kind
- /// environment.
+ /// Get the canonical name for the \p Kind environment.
static StringRef getEnvironmentTypeName(EnvironmentType Kind);
/// @}
/// @name Static helpers for converting alternate architecture names.
/// @{
- /// getArchTypeForLLVMName - The canonical type for the given LLVM
- /// architecture name (e.g., "x86").
+ /// The canonical type for the given LLVM architecture name (e.g., "x86").
static ArchType getArchTypeForLLVMName(StringRef Str);
/// @}
diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h
index 815b9a40afaf..3b7598f3251d 100644
--- a/llvm/include/llvm/ADT/TypeSwitch.h
+++ b/llvm/include/llvm/ADT/TypeSwitch.h
@@ -35,7 +35,12 @@ public:
/// Invoke a case on the derived class with multiple case types.
template <typename CaseT, typename CaseT2, typename... CaseTs,
typename CallableT>
- DerivedT &Case(CallableT &&caseFn) {
+ // This is marked always_inline and nodebug so it doesn't show up in stack
+ // traces at -O0 (or other optimization levels). Large TypeSwitch's are
+ // common, are equivalent to a switch, and don't add any value to stack
+ // traces.
+ LLVM_ATTRIBUTE_ALWAYS_INLINE LLVM_ATTRIBUTE_NODEBUG DerivedT &
+ Case(CallableT &&caseFn) {
DerivedT &derived = static_cast<DerivedT &>(*this);
return derived.template Case<CaseT>(caseFn)
.template Case<CaseT2, CaseTs...>(caseFn);
diff --git a/llvm/include/llvm/ADT/iterator.h b/llvm/include/llvm/ADT/iterator.h
index b3c6608e9b6e..6f0c42fe08be 100644
--- a/llvm/include/llvm/ADT/iterator.h
+++ b/llvm/include/llvm/ADT/iterator.h
@@ -35,6 +35,21 @@ namespace llvm {
/// terms of addition of one. These aren't equivalent for all iterator
/// categories, and respecting that adds a lot of complexity for little gain.
///
+/// Iterators are expected to have const rules analogous to pointers, with a
+/// single, const-qualified operator*() that returns ReferenceT. This matches
+/// the second and third pointers in the following example:
+/// \code
+/// int Value;
+/// { int *I = &Value; } // ReferenceT 'int&'
+/// { int *const I = &Value; } // ReferenceT 'int&'; const
+/// { const int *I = &Value; } // ReferenceT 'const int&'
+/// { const int *const I = &Value; } // ReferenceT 'const int&'; const
+/// \endcode
+/// If an iterator facade returns a handle to its own state, then T (and
+/// PointerT and ReferenceT) should usually be const-qualified. Otherwise, if
+/// clients are expected to modify the handle itself, the field can be declared
+/// mutable or use const_cast.
+///
/// Classes wishing to use `iterator_facade_base` should implement the following
/// methods:
///
@@ -42,8 +57,7 @@ namespace llvm {
/// (All of the following methods)
/// - DerivedT &operator=(const DerivedT &R);
/// - bool operator==(const DerivedT &R) const;
-/// - const T &operator*() const;
-/// - T &operator*();
+/// - T &operator*() const;
/// - DerivedT &operator++();
///
/// Bidirectional Iterators:
@@ -95,6 +109,22 @@ protected:
operator ReferenceT() const { return *I; }
};
+ /// A proxy object for computing a pointer via indirecting a copy of a
+ /// reference. This is used in APIs which need to produce a pointer but for
+ /// which the reference might be a temporary. The proxy preserves the
+ /// reference internally and exposes the pointer via a arrow operator.
+ class PointerProxy {
+ friend iterator_facade_base;
+
+ ReferenceT R;
+
+ template <typename RefT>
+ PointerProxy(RefT &&R) : R(std::forward<RefT>(R)) {}
+
+ public:
+ PointerT operator->() const { return &R; }
+ };
+
public:
DerivedT operator+(DifferenceTypeT n) const {
static_assert(std::is_base_of<iterator_facade_base, DerivedT>::value,
@@ -172,19 +202,13 @@ public:
return !(static_cast<const DerivedT &>(*this) < RHS);
}
- PointerT operator->() { return &static_cast<DerivedT *>(this)->operator*(); }
- PointerT operator->() const {
- return &static_cast<const DerivedT *>(this)->operator*();
- }
- ReferenceProxy operator[](DifferenceTypeT n) {
- static_assert(IsRandomAccess,
- "Subscripting is only defined for random access iterators.");
- return ReferenceProxy(static_cast<DerivedT *>(this)->operator+(n));
+ PointerProxy operator->() const {
+ return static_cast<const DerivedT *>(this)->operator*();
}
ReferenceProxy operator[](DifferenceTypeT n) const {
static_assert(IsRandomAccess,
"Subscripting is only defined for random access iterators.");
- return ReferenceProxy(static_cast<const DerivedT *>(this)->operator+(n));
+ return static_cast<const DerivedT *>(this)->operator+(n);
}
};
@@ -330,8 +354,7 @@ public:
explicit pointer_iterator(WrappedIteratorT u)
: pointer_iterator::iterator_adaptor_base(std::move(u)) {}
- T &operator*() { return Ptr = &*this->I; }
- const T &operator*() const { return Ptr = &*this->I; }
+ T &operator*() const { return Ptr = &*this->I; }
};
template <typename RangeT, typename WrappedIteratorT =
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 7fec0feb09d5..2770a1a9b277 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -61,6 +61,7 @@ class DominatorTree;
class FenceInst;
class Function;
class InvokeInst;
+class LoopInfo;
class PreservedAnalyses;
class TargetLibraryInfo;
class Value;
@@ -378,6 +379,50 @@ createModRefInfo(const FunctionModRefBehavior FMRB) {
return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
}
+/// Virtual base class for providers of capture information.
+struct CaptureInfo {
+ virtual ~CaptureInfo() = 0;
+ virtual bool isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) = 0;
+};
+
+/// Context-free CaptureInfo provider, which computes and caches whether an
+/// object is captured in the function at all, but does not distinguish whether
+/// it was captured before or after the context instruction.
+class SimpleCaptureInfo final : public CaptureInfo {
+ SmallDenseMap<const Value *, bool, 8> IsCapturedCache;
+
+public:
+ bool isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) override;
+};
+
+/// Context-sensitive CaptureInfo provider, which computes and caches the
+/// earliest common dominator closure of all captures. It provides a good
+/// approximation to a precise "captures before" analysis.
+class EarliestEscapeInfo final : public CaptureInfo {
+ DominatorTree &DT;
+ const LoopInfo &LI;
+
+ /// Map from identified local object to an instruction before which it does
+ /// not escape, or nullptr if it never escapes. The "earliest" instruction
+ /// may be a conservative approximation, e.g. the first instruction in the
+ /// function is always a legal choice.
+ DenseMap<const Value *, Instruction *> EarliestEscapes;
+
+ /// Reverse map from instruction to the objects it is the earliest escape for.
+ /// This is used for cache invalidation purposes.
+ DenseMap<Instruction *, TinyPtrVector<const Value *>> Inst2Obj;
+
+public:
+ EarliestEscapeInfo(DominatorTree &DT, const LoopInfo &LI) : DT(DT), LI(LI) {}
+
+ bool isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) override;
+
+ void removeInstruction(Instruction *I);
+};
+
/// Reduced version of MemoryLocation that only stores a pointer and size.
/// Used for caching AATags independent BasicAA results.
struct AACacheLoc {
@@ -425,8 +470,7 @@ public:
using AliasCacheT = SmallDenseMap<LocPair, CacheEntry, 8>;
AliasCacheT AliasCache;
- using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
- IsCapturedCacheT IsCapturedCache;
+ CaptureInfo *CI;
/// Query depth used to distinguish recursive queries.
unsigned Depth = 0;
@@ -439,18 +483,26 @@ public:
/// assumption is disproven.
SmallVector<AAQueryInfo::LocPair, 4> AssumptionBasedResults;
- AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+ AAQueryInfo(CaptureInfo *CI) : CI(CI) {}
/// Create a new AAQueryInfo based on this one, but with the cache cleared.
/// This is used for recursive queries across phis, where cache results may
/// not be valid.
AAQueryInfo withEmptyCache() {
- AAQueryInfo NewAAQI;
+ AAQueryInfo NewAAQI(CI);
NewAAQI.Depth = Depth;
return NewAAQI;
}
};
+/// AAQueryInfo that uses SimpleCaptureInfo.
+class SimpleAAQueryInfo : public AAQueryInfo {
+ SimpleCaptureInfo CI;
+
+public:
+ SimpleAAQueryInfo() : AAQueryInfo(&CI) {}
+};
+
class BatchAAResults;
class AAResults {
@@ -770,7 +822,7 @@ public:
/// helpers above.
ModRefInfo getModRefInfo(const Instruction *I,
const Optional<MemoryLocation> &OptLoc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(I, OptLoc, AAQIP);
}
@@ -797,7 +849,7 @@ public:
ModRefInfo callCapturesBefore(const Instruction *I,
const MemoryLocation &MemLoc,
DominatorTree *DT) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return callCapturesBefore(I, MemLoc, DT, AAQIP);
}
@@ -896,9 +948,12 @@ private:
class BatchAAResults {
AAResults &AA;
AAQueryInfo AAQI;
+ SimpleCaptureInfo SimpleCI;
public:
- BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {}
+ BatchAAResults(AAResults &AAR) : AA(AAR), AAQI(&SimpleCI) {}
+ BatchAAResults(AAResults &AAR, CaptureInfo *CI) : AA(AAR), AAQI(CI) {}
+
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
return AA.alias(LocA, LocB, AAQI);
}
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 49c0cd89a4db..77da19110246 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -20,7 +20,6 @@
#include "llvm/ADT/DenseMap.h"
namespace llvm {
-class IntrinsicInst;
class AssumptionCache;
class DominatorTree;
@@ -70,15 +69,15 @@ template<> struct DenseMapInfo<Attribute::AttrKind> {
using RetainedKnowledgeKey = std::pair<Value *, Attribute::AttrKind>;
struct MinMax {
- unsigned Min;
- unsigned Max;
+ uint64_t Min;
+ uint64_t Max;
};
/// A mapping from intrinsics (=`llvm.assume` calls) to a value range
/// (=knowledge) that is encoded in them. How the value range is interpreted
/// depends on the RetainedKnowledgeKey that was used to get this out of the
/// RetainedKnowledgeMap.
-using Assume2KnowledgeMap = DenseMap<IntrinsicInst *, MinMax>;
+using Assume2KnowledgeMap = DenseMap<AssumeInst *, MinMax>;
using RetainedKnowledgeMap =
DenseMap<RetainedKnowledgeKey, Assume2KnowledgeMap>;
@@ -100,7 +99,7 @@ void fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result);
/// - ArgValue will be 4.
struct RetainedKnowledge {
Attribute::AttrKind AttrKind = Attribute::None;
- unsigned ArgValue = 0;
+ uint64_t ArgValue = 0;
Value *WasOn = nullptr;
bool operator==(RetainedKnowledge Other) const {
return AttrKind == Other.AttrKind && WasOn == Other.WasOn &&
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 51d04bd8cf02..12dd9b04c932 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -29,6 +29,7 @@ namespace llvm {
class AssumeInst;
class Function;
class raw_ostream;
+class TargetTransformInfo;
class Value;
/// A cache of \@llvm.assume calls within a function.
@@ -59,6 +60,8 @@ private:
/// We track this to lazily populate our assumptions.
Function &F;
+ TargetTransformInfo *TTI;
+
/// Vector of weak value handles to calls of the \@llvm.assume
/// intrinsic.
SmallVector<ResultElem, 4> AssumeHandles;
@@ -103,7 +106,8 @@ private:
public:
/// Construct an AssumptionCache from a function by scanning all of
/// its instructions.
- AssumptionCache(Function &F) : F(F) {}
+ AssumptionCache(Function &F, TargetTransformInfo *TTI = nullptr)
+ : F(F), TTI(TTI) {}
/// This cache is designed to be self-updating and so it should never be
/// invalidated.
@@ -174,9 +178,7 @@ class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> {
public:
using Result = AssumptionCache;
- AssumptionCache run(Function &F, FunctionAnalysisManager &) {
- return AssumptionCache(F);
- }
+ AssumptionCache run(Function &F, FunctionAnalysisManager &);
};
/// Printer pass for the \c AssumptionAnalysis results.
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index 991c0cbb642a..ed9d1ba4c5a7 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -13,10 +13,8 @@
#ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H
#define LLVM_ANALYSIS_BASICALIASANALYSIS_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
@@ -28,7 +26,6 @@
namespace llvm {
struct AAMDNodes;
-class APInt;
class AssumptionCache;
class BasicBlock;
class DataLayout;
@@ -98,71 +95,7 @@ public:
FunctionModRefBehavior getModRefBehavior(const Function *Fn);
private:
- // A linear transformation of a Value; this class represents ZExt(SExt(V,
- // SExtBits), ZExtBits) * Scale + Offset.
- struct VariableGEPIndex {
- // An opaque Value - we can't decompose this further.
- const Value *V;
-
- // We need to track what extensions we've done as we consider the same Value
- // with different extensions as different variables in a GEP's linear
- // expression;
- // e.g.: if V == -1, then sext(x) != zext(x).
- unsigned ZExtBits;
- unsigned SExtBits;
-
- APInt Scale;
-
- // Context instruction to use when querying information about this index.
- const Instruction *CxtI;
-
- /// True if all operations in this expression are NSW.
- bool IsNSW;
-
- void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
- void print(raw_ostream &OS) const {
- OS << "(V=" << V->getName()
- << ", zextbits=" << ZExtBits
- << ", sextbits=" << SExtBits
- << ", scale=" << Scale << ")";
- }
- };
-
- // Represents the internal structure of a GEP, decomposed into a base pointer,
- // constant offsets, and variable scaled indices.
- struct DecomposedGEP {
- // Base pointer of the GEP
- const Value *Base;
- // Total constant offset from base.
- APInt Offset;
- // Scaled variable (non-constant) indices.
- SmallVector<VariableGEPIndex, 4> VarIndices;
- // Is GEP index scale compile-time constant.
- bool HasCompileTimeConstantScale;
- // Are all operations inbounds GEPs or non-indexing operations?
- // (None iff expression doesn't involve any geps)
- Optional<bool> InBounds;
-
- void dump() const {
- print(dbgs());
- dbgs() << "\n";
- }
- void print(raw_ostream &OS) const {
- OS << "(DecomposedGEP Base=" << Base->getName()
- << ", Offset=" << Offset
- << ", VarIndices=[";
- for (size_t i = 0; i < VarIndices.size(); i++) {
- if (i != 0)
- OS << ", ";
- VarIndices[i].print(OS);
- }
- OS << "], HasCompileTimeConstantScale=" << HasCompileTimeConstantScale
- << ")";
- }
- };
+ struct DecomposedGEP;
/// Tracks phi nodes we have visited.
///
@@ -187,10 +120,6 @@ private:
DecomposeGEPExpression(const Value *V, const DataLayout &DL,
AssumptionCache *AC, DominatorTree *DT);
- static bool isGEPBaseAtNegativeOffset(const GEPOperator *GEPOp,
- const DecomposedGEP &DecompGEP, const DecomposedGEP &DecompObject,
- LocationSize ObjectAccessSize);
-
/// A Heuristic for aliasGEP that searches for a constant offset
/// between the variables.
///
@@ -200,15 +129,14 @@ private:
/// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if
/// the addition overflows.
bool
- constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
- LocationSize V1Size, LocationSize V2Size,
- const APInt &BaseOffset, AssumptionCache *AC,
+ constantOffsetHeuristic(const DecomposedGEP &GEP, LocationSize V1Size,
+ LocationSize V2Size, AssumptionCache *AC,
DominatorTree *DT);
bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
- void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
- const SmallVectorImpl<VariableGEPIndex> &Src);
+ void subtractDecomposedGEPs(DecomposedGEP &DestGEP,
+ const DecomposedGEP &SrcGEP);
AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
const Value *V2, LocationSize V2Size,
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index e361cccef960..7cf172dc1dd1 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -20,7 +20,7 @@
/// A secondary more general goal is to be able to isolate optimization on
/// unrelated parts of the IR module. This is useful to ensure our
/// optimizations are principled and don't miss oportunities where refinement
-/// of one part of the module influence transformations in another part of the
+/// of one part of the module influences transformations in another part of the
/// module. But this is also useful if we want to parallelize the optimizations
/// across common large module graph shapes which tend to be very wide and have
/// large regions of unrelated cliques.
@@ -161,6 +161,12 @@ struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager,
(void)AM.template getResult<AnalysisT>(C, CG);
return PreservedAnalyses::all();
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "require<" << PassName << ">";
+ }
};
/// A proxy from a \c CGSCCAnalysisManager to a \c Module.
@@ -215,7 +221,7 @@ using ModuleAnalysisManagerCGSCCProxy =
LazyCallGraph &>;
/// Support structure for SCC passes to communicate updates the call graph back
-/// to the CGSCC pass manager infrsatructure.
+/// to the CGSCC pass manager infrastructure.
///
/// The CGSCC pass manager runs SCC passes which are allowed to update the call
/// graph and SCC structures. This means the structure the pass manager works
@@ -274,22 +280,22 @@ struct CGSCCUpdateResult {
/// If non-null, the updated current \c RefSCC being processed.
///
- /// This is set when a graph refinement takes place an the "current" point in
- /// the graph moves "down" or earlier in the post-order walk. This will often
- /// cause the "current" RefSCC to be a newly created RefSCC object and the
- /// old one to be added to the above worklist. When that happens, this
+ /// This is set when a graph refinement takes place and the "current" point
+ /// in the graph moves "down" or earlier in the post-order walk. This will
+ /// often cause the "current" RefSCC to be a newly created RefSCC object and
+ /// the old one to be added to the above worklist. When that happens, this
/// pointer is non-null and can be used to continue processing the "top" of
/// the post-order walk.
LazyCallGraph::RefSCC *UpdatedRC;
/// If non-null, the updated current \c SCC being processed.
///
- /// This is set when a graph refinement takes place an the "current" point in
- /// the graph moves "down" or earlier in the post-order walk. This will often
- /// cause the "current" SCC to be a newly created SCC object and the old one
- /// to be added to the above worklist. When that happens, this pointer is
- /// non-null and can be used to continue processing the "top" of the
- /// post-order walk.
+ /// This is set when a graph refinement takes place and the "current" point
+ /// in the graph moves "down" or earlier in the post-order walk. This will
+ /// often cause the "current" SCC to be a newly created SCC object and the
+ /// old one to be added to the above worklist. When that happens, this
+ /// pointer is non-null and can be used to continue processing the "top" of
+ /// the post-order walk.
LazyCallGraph::SCC *UpdatedC;
/// Preserved analyses across SCCs.
@@ -298,7 +304,7 @@ struct CGSCCUpdateResult {
/// (changing both the CG structure and the function IR itself). However,
/// this means we need to take special care to correctly mark what analyses
/// are preserved *across* SCCs. We have to track this out-of-band here
- /// because within the main `PassManeger` infrastructure we need to mark
+ /// because within the main `PassManager` infrastructure we need to mark
/// everything within an SCC as preserved in order to avoid repeatedly
/// invalidating the same analyses as we unnest pass managers and adaptors.
/// So we track the cross-SCC version of the preserved analyses here from any
@@ -363,6 +369,13 @@ public:
/// Runs the CGSCC pass across every SCC in the module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "cgscc(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
static bool isRequired() { return true; }
private:
@@ -377,8 +390,11 @@ createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT &&Pass) {
using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
PreservedAnalyses, CGSCCAnalysisManager,
LazyCallGraph &, CGSCCUpdateResult &>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return ModuleToPostOrderCGSCCPassAdaptor(
- std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)));
+ std::unique_ptr<ModuleToPostOrderCGSCCPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<CGSCCPassT>(Pass))));
}
/// A proxy from a \c FunctionAnalysisManager to an \c SCC.
@@ -461,11 +477,14 @@ class CGSCCToFunctionPassAdaptor
public:
using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
- explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
- : Pass(std::move(Pass)) {}
+ explicit CGSCCToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass,
+ bool EagerlyInvalidate, bool NoRerun)
+ : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate),
+ NoRerun(NoRerun) {}
CGSCCToFunctionPassAdaptor(CGSCCToFunctionPassAdaptor &&Arg)
- : Pass(std::move(Arg.Pass)) {}
+ : Pass(std::move(Arg.Pass)), EagerlyInvalidate(Arg.EagerlyInvalidate),
+ NoRerun(Arg.NoRerun) {}
friend void swap(CGSCCToFunctionPassAdaptor &LHS,
CGSCCToFunctionPassAdaptor &RHS) {
@@ -481,24 +500,56 @@ public:
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "function";
+ if (EagerlyInvalidate)
+ OS << "<eager-inv>";
+ OS << "(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
static bool isRequired() { return true; }
private:
std::unique_ptr<PassConceptT> Pass;
+ bool EagerlyInvalidate;
+ bool NoRerun;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename FunctionPassT>
CGSCCToFunctionPassAdaptor
-createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass) {
+createCGSCCToFunctionPassAdaptor(FunctionPassT &&Pass,
+ bool EagerlyInvalidate = false,
+ bool NoRerun = false) {
using PassModelT =
detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
FunctionAnalysisManager>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return CGSCCToFunctionPassAdaptor(
- std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
+ std::unique_ptr<CGSCCToFunctionPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<FunctionPassT>(Pass))),
+ EagerlyInvalidate, NoRerun);
}
+// A marker to determine if function passes should be run on a function within a
+// CGSCCToFunctionPassAdaptor. This is used to prevent running an expensive
+// function pass (manager) on a function multiple times if SCC mutations cause a
+// function to be visited multiple times and the function is not modified by
+// other SCC passes.
+class ShouldNotRunFunctionPassesAnalysis
+ : public AnalysisInfoMixin<ShouldNotRunFunctionPassesAnalysis> {
+public:
+ static AnalysisKey Key;
+ struct Result {};
+
+ Result run(Function &F, FunctionAnalysisManager &FAM) { return Result(); }
+};
+
/// A helper that repeats an SCC pass each time an indirect call is refined to
/// a direct call by that pass.
///
@@ -528,6 +579,13 @@ public:
PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "devirt<" << MaxIterations << ">(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
private:
std::unique_ptr<PassConceptT> Pass;
int MaxIterations;
@@ -541,8 +599,11 @@ DevirtSCCRepeatedPass createDevirtSCCRepeatedPass(CGSCCPassT &&Pass,
using PassModelT = detail::PassModel<LazyCallGraph::SCC, CGSCCPassT,
PreservedAnalyses, CGSCCAnalysisManager,
LazyCallGraph &, CGSCCUpdateResult &>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return DevirtSCCRepeatedPass(
- std::make_unique<PassModelT>(std::forward<CGSCCPassT>(Pass)),
+ std::unique_ptr<DevirtSCCRepeatedPass::PassConceptT>(
+ new PassModelT(std::forward<CGSCCPassT>(Pass))),
MaxIterations);
}
diff --git a/llvm/include/llvm/Analysis/CaptureTracking.h b/llvm/include/llvm/Analysis/CaptureTracking.h
index 9da5f18e944b..50d12db7a1c3 100644
--- a/llvm/include/llvm/Analysis/CaptureTracking.h
+++ b/llvm/include/llvm/Analysis/CaptureTracking.h
@@ -22,6 +22,8 @@ namespace llvm {
class DataLayout;
class Instruction;
class DominatorTree;
+ class LoopInfo;
+ class Function;
/// getDefaultMaxUsesToExploreForCaptureTracking - Return default value of
/// the maximal number of uses to explore before giving up. It is used by
@@ -55,10 +57,25 @@ namespace llvm {
/// MaxUsesToExplore specifies how many uses the analysis should explore for
/// one value before giving up due too "too many uses". If MaxUsesToExplore
/// is zero, a default value is assumed.
- bool PointerMayBeCapturedBefore(
- const Value *V, bool ReturnCaptures, bool StoreCaptures,
- const Instruction *I, const DominatorTree *DT, bool IncludeI = false,
- unsigned MaxUsesToExplore = 0);
+ bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
+ bool StoreCaptures, const Instruction *I,
+ const DominatorTree *DT,
+ bool IncludeI = false,
+ unsigned MaxUsesToExplore = 0,
+ const LoopInfo *LI = nullptr);
+
+ // Returns the 'earliest' instruction that captures \p V in \F. An instruction
+ // A is considered earlier than instruction B, if A dominates B. If 2 escapes
+ // do not dominate each other, the terminator of the common dominator is
+ // chosen. If not all uses can be analyzed, the earliest escape is set to
+ // the first instruction in the function entry block. If \p V does not escape,
+ // nullptr is returned. Note that the caller of the function has to ensure
+ // that the instruction the result value is compared against is not in a
+ // cycle.
+ Instruction *FindEarliestCapture(const Value *V, Function &F,
+ bool ReturnCaptures, bool StoreCaptures,
+ const DominatorTree &DT,
+ unsigned MaxUsesToExplore = 0);
/// This callback is used in conjunction with PointerMayBeCaptured. In
/// addition to the interface here, you'll need to provide your own getters
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index 62742fdf9a91..45fb879f0c1f 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -128,10 +128,25 @@ Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
ArrayRef<int> Mask);
-/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
-/// produce if it is constant and determinable. If this is not determinable,
-/// return null.
-Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, const DataLayout &DL);
+/// Extract value of C at the given Offset reinterpreted as Ty. If bits past
+/// the end of C are accessed, they are assumed to be poison.
+Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset,
+ const DataLayout &DL);
+
+/// Extract value of C reinterpreted as Ty. Same as previous API with zero
+/// offset.
+Constant *ConstantFoldLoadFromConst(Constant *C, Type *Ty,
+ const DataLayout &DL);
+
+/// Return the value that a load from C with offset Offset would produce if it
+/// is constant and determinable. If this is not determinable, return null.
+Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset,
+ const DataLayout &DL);
+
+/// Return the value that a load from C would produce if it is constant and
+/// determinable. If this is not determinable, return null.
+Constant *ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
+ const DataLayout &DL);
/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
/// getelementptr constantexpr, return the constant value being addressed by the
@@ -140,13 +155,6 @@ Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE,
Type *Ty,
const DataLayout &DL);
-/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
-/// indices (with an *implied* zero pointer index that is not in the list),
-/// return the constant value being addressed by a virtual load, or null if
-/// something is funny and we can't decide.
-Constant *ConstantFoldLoadThroughGEPIndices(Constant *C,
- ArrayRef<Constant *> Indices);
-
/// canConstantFoldCallTo - Return true if its even possible to fold a call to
/// the specified function.
bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
diff --git a/llvm/include/llvm/Analysis/CostModel.h b/llvm/include/llvm/Analysis/CostModel.h
new file mode 100644
index 000000000000..649168050cec
--- /dev/null
+++ b/llvm/include/llvm/Analysis/CostModel.h
@@ -0,0 +1,26 @@
+//===- CostModel.h - --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_COSTMODEL_H
+#define LLVM_ANALYSIS_COSTMODEL_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+/// Printer pass for cost modeling results.
+class CostModelPrinterPass : public PassInfoMixin<CostModelPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit CostModelPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_COSTMODEL_H
diff --git a/llvm/include/llvm/Analysis/Delinearization.h b/llvm/include/llvm/Analysis/Delinearization.h
index 2658b6bbc80c..6e942530f253 100644
--- a/llvm/include/llvm/Analysis/Delinearization.h
+++ b/llvm/include/llvm/Analysis/Delinearization.h
@@ -16,10 +16,115 @@
#ifndef LLVM_ANALYSIS_DELINEARIZATION_H
#define LLVM_ANALYSIS_DELINEARIZATION_H
+#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
+class GetElementPtrInst;
+class ScalarEvolution;
+class SCEV;
+
+/// Compute the array dimensions Sizes from the set of Terms extracted from
+/// the memory access function of this SCEVAddRecExpr (second step of
+/// delinearization).
+void findArrayDimensions(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize);
+
+/// Collect parametric terms occurring in step expressions (first step of
+/// delinearization).
+void collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Terms);
+
+/// Return in Subscripts the access functions for each dimension in Sizes
+/// (third step of delinearization).
+void computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes);
+/// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+/// subscripts and sizes of an array access.
+///
+/// The delinearization is a 3 step process: the first two steps compute the
+/// sizes of each subscript and the third step computes the access functions
+/// for the delinearized array:
+///
+/// 1. Find the terms in the step functions
+/// 2. Compute the array size
+/// 3. Compute the access function: divide the SCEV by the array size
+/// starting with the innermost dimensions found in step 2. The Quotient
+/// is the SCEV to be divided in the next step of the recursion. The
+/// Remainder is the subscript of the innermost dimension. Loop over all
+/// array dimensions computed in step 2.
+///
+/// To compute a uniform array size for several memory accesses to the same
+/// object, one can collect in step 1 all the step terms for all the memory
+/// accesses, and compute in step 2 a unique array shape. This guarantees
+/// that the array shape will be the same across all memory accesses.
+///
+/// FIXME: We could derive the result of steps 1 and 2 from a description of
+/// the array shape given in metadata.
+///
+/// Example:
+///
+/// A[][n][m]
+///
+/// for i
+/// for j
+/// for k
+/// A[j+k][2i][5i] =
+///
+/// The initial SCEV:
+///
+/// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
+///
+/// 1. Find the different terms in the step functions:
+/// -> [2*m, 5, n*m, n*m]
+///
+/// 2. Compute the array size: sort and unique them
+/// -> [n*m, 2*m, 5]
+/// find the GCD of all the terms = 1
+/// divide by the GCD and erase constant terms
+/// -> [n*m, 2*m]
+/// GCD = m
+/// divide by GCD -> [n, 2]
+/// remove constant terms
+/// -> [n]
+/// size of the array is A[unknown][n][m]
+///
+/// 3. Compute the access function
+/// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
+/// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
+/// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
+/// The remainder is the subscript of the innermost array dimension: [5i].
+///
+/// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
+/// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
+/// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
+/// The Remainder is the subscript of the next array dimension: [2i].
+///
+/// The subscript of the outermost dimension is the Quotient: [j+k].
+///
+/// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
+void delinearize(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize);
+
+/// Gathers the individual index expressions from a GEP instruction.
+///
+/// This function optimistically assumes the GEP references into a fixed size
+/// array. If this is actually true, this function returns a list of array
+/// subscript expressions in \p Subscripts and a list of integers describing
+/// the size of the individual array dimensions in \p Sizes. Both lists have
+/// either equal length or the size list is one element shorter in case there
+/// is no known size available for the outermost array dimension. Returns true
+/// if successful and false otherwise.
+bool getIndexExpressionsFromGEP(ScalarEvolution &SE,
+ const GetElementPtrInst *GEP,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<int> &Sizes);
+
struct DelinearizationPrinterPass
: public PassInfoMixin<DelinearizationPrinterPass> {
explicit DelinearizationPrinterPass(raw_ostream &OS);
diff --git a/llvm/include/llvm/Analysis/HeatUtils.h b/llvm/include/llvm/Analysis/HeatUtils.h
index b665e211c6ac..9ecbbaf318da 100644
--- a/llvm/include/llvm/Analysis/HeatUtils.h
+++ b/llvm/include/llvm/Analysis/HeatUtils.h
@@ -1,9 +1,8 @@
//===-- HeatUtils.h - Utility for printing heat colors ----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index b623b9ca58d8..51c5c620230b 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -110,7 +110,8 @@ enum InstrType { Legal, Illegal, Invisible };
/// by \ref isSameOperationAs.
/// TODO: Handle GetElementPtrInsts, as some of the operands have to be the
/// exact same, and some do not.
-struct IRInstructionData : ilist_node<IRInstructionData> {
+struct IRInstructionData
+ : ilist_node<IRInstructionData, ilist_sentinel_tracking<true>> {
/// The source Instruction that is being wrapped.
Instruction *Inst = nullptr;
@@ -127,12 +128,41 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
/// to a less than form. It is None otherwise.
Optional<CmpInst::Predicate> RevisedPredicate;
+ /// This structure holds the distances of how far "ahead of" or "behind" the
+ /// target blocks of a branch, or the incoming blocks of a phi nodes are.
+ /// If the value is negative, it means that the block was registered before
+ /// the block of this instruction in terms of blocks in the function.
+ /// Code Example:
+ /// \code
+ /// block_1:
+ /// br i1 %0, label %block_2, label %block_3
+ /// block_2:
+ /// br i1 %1, label %block_1, label %block_2
+ /// block_3:
+ /// br i1 %2, label %block_2, label %block_1
+ /// ; Replacing the labels with relative values, this becomes:
+ /// block_1:
+ /// br i1 %0, distance 1, distance 2
+ /// block_2:
+ /// br i1 %1, distance -1, distance 0
+ /// block_3:
+ /// br i1 %2, distance -1, distance -2
+ /// \endcode
+ /// Taking block_2 as our example, block_1 is "behind" block_2, and block_2 is
+ /// "ahead" of block_2.
+ SmallVector<int, 4> RelativeBlockLocations;
+
/// Gather the information that is difficult to gather for an Instruction, or
/// is changed. i.e. the operands of an Instruction and the Types of those
/// operands. This extra information allows for similarity matching to make
/// assertions that allow for more flexibility when checking for whether an
/// Instruction performs the same operation.
IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL);
+ IRInstructionData(IRInstructionDataList &IDL);
+
+ /// Fills data stuctures for IRInstructionData when it is constructed from a
+ // reference or a pointer.
+ void initializeInstruction();
/// Get the predicate that the compare instruction is using for hashing the
/// instruction. the IRInstructionData must be wrapping a CmpInst.
@@ -145,6 +175,16 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
/// \return the consistent comparison predicate.
static CmpInst::Predicate predicateForConsistency(CmpInst *CI);
+ /// For an IRInstructionData containing a branch, finds the
+ /// relative distances from the source basic block to the target by taking
+ /// the difference of the number assigned to the current basic block and the
+ /// target basic block of the branch.
+ ///
+ /// \param BasicBlockToInteger - The mapping of basic blocks to their location
+ /// in the module.
+ void
+ setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+
/// Hashes \p Value based on its opcode, types, and operand types.
/// Two IRInstructionData instances produce the same hash when they perform
/// the same operation.
@@ -198,7 +238,8 @@ struct IRInstructionData : ilist_node<IRInstructionData> {
IRInstructionDataList *IDL = nullptr;
};
-struct IRInstructionDataList : simple_ilist<IRInstructionData> {};
+struct IRInstructionDataList
+ : simple_ilist<IRInstructionData, ilist_sentinel_tracking<true>> {};
/// Compare one IRInstructionData class to another IRInstructionData class for
/// whether they are performing a the same operation, and can mapped to the
@@ -288,6 +329,10 @@ struct IRInstructionMapper {
DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>
InstructionIntegerMap;
+ /// A mapping for a basic block in a module to its assigned number/location
+ /// in the module.
+ DenseMap<BasicBlock *, unsigned> BasicBlockToInteger;
+
/// Set if we added an illegal number in the previous step.
/// Since each illegal number is unique, we only need one of them between
/// each range of legal numbers. This lets us make sure we don't add more
@@ -322,6 +367,14 @@ struct IRInstructionMapper {
IRInstructionData *allocateIRInstructionData(Instruction &I, bool Legality,
IRInstructionDataList &IDL);
+ /// Get an empty allocated IRInstructionData struct using the
+ /// InstDataAllocator.
+ ///
+ /// \param IDL - The InstructionDataList that the IRInstructionData is
+ /// inserted into.
+ /// \returns An allocated IRInstructionData struct.
+ IRInstructionData *allocateIRInstructionData(IRInstructionDataList &IDL);
+
/// Get an allocated IRInstructionDataList object using the IDLAllocator.
///
/// \returns An allocated IRInstructionDataList object.
@@ -329,6 +382,24 @@ struct IRInstructionMapper {
IRInstructionDataList *IDL = nullptr;
+ /// Assigns values to all the basic blocks in function \p F starting from
+ /// integer \p BBNumber.
+ ///
+ /// \param F - The function containing the basic blocks to assign numbers to.
+ /// \param BBNumber - The number to start from.
+ void initializeForBBs(Function &F, unsigned &BBNumber) {
+ for (BasicBlock &BB : F)
+ BasicBlockToInteger.insert(std::make_pair(&BB, BBNumber++));
+ }
+
+ /// Assigns values to all the basic blocks in Module \p M.
+ /// \param M - The module containing the basic blocks to assign numbers to.
+ void initializeForBBs(Module &M) {
+ unsigned BBNumber = 0;
+ for (Function &F : M)
+ initializeForBBs(F, BBNumber);
+ }
+
/// Maps the Instructions in a BasicBlock \p BB to legal or illegal integers
/// determined by \p InstrType. Two Instructions are mapped to the same value
/// if they are close as defined by the InstructionData class above.
@@ -386,7 +457,11 @@ struct IRInstructionMapper {
InstructionClassification() {}
// TODO: Determine a scheme to resolve when the label is similar enough.
- InstrType visitBranchInst(BranchInst &BI) { return Illegal; }
+ InstrType visitBranchInst(BranchInst &BI) {
+ if (EnableBranches)
+ return Legal;
+ return Illegal;
+ }
// TODO: Determine a scheme to resolve when the labels are similar enough.
InstrType visitPHINode(PHINode &PN) { return Illegal; }
// TODO: Handle allocas.
@@ -419,6 +494,10 @@ struct IRInstructionMapper {
// TODO: Handle interblock similarity.
InstrType visitTerminator(Instruction &I) { return Illegal; }
InstrType visitInstruction(Instruction &I) { return Legal; }
+
+ // The flag variable that lets the classifier know whether we should
+ // allow branches to be checked for similarity.
+ bool EnableBranches = false;
};
/// Maps an Instruction to a member of InstrType.
@@ -488,6 +567,12 @@ private:
DenseMap<Value *, unsigned> ValueToNumber;
/// Stores the mapping of the number to the value assigned this number.
DenseMap<unsigned, Value *> NumberToValue;
+ /// Stores the mapping of a value's number to canonical numbering in the
+ /// candidate's respective similarity group.
+ DenseMap<unsigned, unsigned> NumberToCanonNum;
+ /// Stores the mapping of canonical number in the candidate's respective
+ /// similarity group to a value number.
+ DenseMap<unsigned, unsigned> CanonNumToNumber;
/// @}
public:
@@ -506,13 +591,27 @@ public:
static bool isSimilar(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
- /// \param A - The first IRInstructionCandidate to compare.
- /// \param B - The second IRInstructionCandidate to compare.
+ /// \param [in] A - The first IRInstructionCandidate to compare.
+ /// \param [in] B - The second IRInstructionCandidate to compare.
/// \returns True when every IRInstructionData in \p A is structurally similar
/// to \p B.
static bool compareStructure(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B);
+ /// \param [in] A - The first IRInstructionCandidate to compare.
+ /// \param [in] B - The second IRInstructionCandidate to compare.
+ /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
+ /// candidate \p A to candidate \B.
+ /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
+ /// candidate \p B to candidate \A.
+ /// \returns True when every IRInstructionData in \p A is structurally similar
+ /// to \p B.
+ static bool
+ compareStructure(const IRSimilarityCandidate &A,
+ const IRSimilarityCandidate &B,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
+
struct OperandMapping {
/// The IRSimilarityCandidate that holds the instruction the OperVals were
/// pulled from.
@@ -526,6 +625,21 @@ public:
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMapping;
};
+ /// A helper struct to hold the candidate, for a branch instruction, the
+ /// relative location of a label, and the label itself. This is mostly to
+ /// group the values together before passing them as a bundle to a function.
+ struct RelativeLocMapping {
+ /// The IRSimilarityCandidate that holds the instruction the relative
+ /// location was pulled from.
+ const IRSimilarityCandidate &IRSC;
+
+ /// The relative location to be analyzed.
+ int RelativeLocation;
+
+ /// The corresponding value.
+ Value *OperVal;
+ };
+
/// Compare the operands in \p A and \p B and check that the current mapping
/// of global value numbers from \p A to \p B and \p B to \A is consistent.
///
@@ -549,6 +663,94 @@ public:
static bool compareCommutativeOperandMapping(OperandMapping A,
OperandMapping B);
+ /// Compare the relative locations in \p A and \p B and check that the
+ /// distances match if both locations are contained in the region, and that
+ /// the branches both point outside the region if they do not.
+ /// Example Region:
+ /// \code
+ /// entry:
+ /// br i1 %0, label %block_1, label %block_3
+ /// block_0:
+ /// br i1 %0, label %block_1, label %block_2
+ /// block_1:
+ /// br i1 %0, label %block_2, label %block_3
+ /// block_2:
+ /// br i1 %1, label %block_1, label %block_4
+ /// block_3:
+ /// br i1 %2, label %block_2, label %block_5
+ /// \endcode
+ /// If we compare the branches in block_0 and block_1 the relative values are
+ /// 1 and 2 for both, so we consider this a match.
+ ///
+ /// If we compare the branches in entry and block_0 the relative values are
+ /// 2 and 3, and 1 and 2 respectively. Since these are not the same we do not
+ /// consider them a match.
+ ///
+ /// If we compare the branches in block_1 and block_2 the relative values are
+ /// 1 and 2, and -1 and None respectively. As a result we do not consider
+ /// these to be the same
+ ///
+ /// If we compare the branches in block_2 and block_3 the relative values are
+ /// -1 and None for both. We do consider these to be a match.
+ ///
+ /// \param A - The first IRInstructionCandidate, relative location value,
+ /// and incoming block.
+ /// \param B - The second IRInstructionCandidate, relative location value,
+ /// and incoming block.
+ /// \returns true if the relative locations match.
+ static bool checkRelativeLocations(RelativeLocMapping A,
+ RelativeLocMapping B);
+
+ /// Create a mapping from the value numbering to a different separate set of
+ /// numbers. This will serve as a guide for relating one candidate to another.
+ /// The canonical number gives use the ability identify which global value
+ /// number in one candidate relates to the global value number in the other.
+ ///
+ /// \param [in, out] CurrCand - The IRSimilarityCandidate to create a
+ /// canonical numbering for.
+ static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand);
+
+ /// Create a mapping for the value numbering of the calling
+ /// IRSimilarityCandidate, to a different separate set of numbers, based on
+ /// the canonical ordering in \p SourceCand. These are defined based on the
+ /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of
+ /// these relationships should have the same information, just in opposite
+ /// directions.
+ ///
+ /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
+ /// canonical numbering from.
+ /// \param ToSourceMapping - The mapping of value numbers from this candidate
+ /// to \p SourceCand.
+ /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
+ /// to this candidate.
+ void createCanonicalRelationFrom(
+ IRSimilarityCandidate &SourceCand,
+ DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+ DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
+
+ /// \param [in,out] BBSet - The set to track the basic blocks.
+ void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
+ for (IRInstructionData &ID : *this) {
+ BasicBlock *BB = ID.Inst->getParent();
+ if (BBSet.contains(BB))
+ continue;
+ BBSet.insert(BB);
+ }
+ }
+
+ /// \param [in,out] BBSet - The set to track the basic blocks.
+ /// \param [in,out] BBList - A list in order of use to track the basic blocks.
+ void getBasicBlocks(DenseSet<BasicBlock *> &BBSet,
+ SmallVector<BasicBlock *> &BBList) const {
+ for (IRInstructionData &ID : *this) {
+ BasicBlock *BB = ID.Inst->getParent();
+ if (BBSet.contains(BB))
+ continue;
+ BBSet.insert(BB);
+ BBList.push_back(BB);
+ }
+ }
+
/// Compare the start and end indices of the two IRSimilarityCandidates for
/// whether they overlap. If the start instruction of one
/// IRSimilarityCandidate is less than the end instruction of the other, and
@@ -611,6 +813,32 @@ public:
return VNIt->second;
}
+ /// Find the canonical number from the global value number \p N stored in the
+ /// candidate.
+ ///
+ /// \param N - The global value number to find the canonical number for.
+ /// \returns An optional containing the value, and None if it could not be
+ /// found.
+ Optional<unsigned> getCanonicalNum(unsigned N) {
+ DenseMap<unsigned, unsigned>::iterator NCIt = NumberToCanonNum.find(N);
+ if (NCIt == NumberToCanonNum.end())
+ return None;
+ return NCIt->second;
+ }
+
+ /// Find the global value number from the canonical number \p N stored in the
+ /// candidate.
+ ///
+ /// \param N - The canonical number to find the global vlaue number for.
+ /// \returns An optional containing the value, and None if it could not be
+ /// found.
+ Optional<unsigned> fromCanonicalNum(unsigned N) {
+ DenseMap<unsigned, unsigned>::iterator CNIt = CanonNumToNumber.find(N);
+ if (CNIt == CanonNumToNumber.end())
+ return None;
+ return CNIt->second;
+ }
+
/// \param RHS -The IRSimilarityCandidate to compare against
/// \returns true if the IRSimilarityCandidate is occurs after the
/// IRSimilarityCandidate in the program.
@@ -623,6 +851,9 @@ public:
iterator end() const { return std::next(iterator(back())); }
};
+typedef DenseMap<IRSimilarityCandidate *,
+ DenseMap<unsigned, DenseSet<unsigned>>>
+ CandidateGVNMapping;
typedef std::vector<IRSimilarityCandidate> SimilarityGroup;
typedef std::vector<SimilarityGroup> SimilarityGroupList;
@@ -651,8 +882,9 @@ typedef std::vector<SimilarityGroup> SimilarityGroupList;
/// analyzing the module.
class IRSimilarityIdentifier {
public:
- IRSimilarityIdentifier()
- : Mapper(&InstDataAllocator, &InstDataListAllocator) {}
+ IRSimilarityIdentifier(bool MatchBranches = true)
+ : Mapper(&InstDataAllocator, &InstDataListAllocator),
+ EnableBranches(MatchBranches) {}
private:
/// Map the instructions in the module to unsigned integers, using mapping
@@ -728,6 +960,10 @@ private:
/// instance of IRInstructionData.
IRInstructionMapper Mapper;
+ /// The flag variable that marks whether we should check branches for
+ /// similarity, or only look within basic blocks.
+ bool EnableBranches = true;
+
/// The SimilarityGroups found with the most recent run of \ref
/// findSimilarity. None if there is no recent run.
Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 82e1b14960bd..c26dbc457949 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -36,20 +36,24 @@ class DominatorTree;
/// These are the kinds of recurrences that we support.
enum class RecurKind {
- None, ///< Not a recurrence.
- Add, ///< Sum of integers.
- Mul, ///< Product of integers.
- Or, ///< Bitwise or logical OR of integers.
- And, ///< Bitwise or logical AND of integers.
- Xor, ///< Bitwise or logical XOR of integers.
- SMin, ///< Signed integer min implemented in terms of select(cmp()).
- SMax, ///< Signed integer max implemented in terms of select(cmp()).
- UMin, ///< Unisgned integer min implemented in terms of select(cmp()).
- UMax, ///< Unsigned integer max implemented in terms of select(cmp()).
- FAdd, ///< Sum of floats.
- FMul, ///< Product of floats.
- FMin, ///< FP min implemented in terms of select(cmp()).
- FMax ///< FP max implemented in terms of select(cmp()).
+ None, ///< Not a recurrence.
+ Add, ///< Sum of integers.
+ Mul, ///< Product of integers.
+ Or, ///< Bitwise or logical OR of integers.
+ And, ///< Bitwise or logical AND of integers.
+ Xor, ///< Bitwise or logical XOR of integers.
+ SMin, ///< Signed integer min implemented in terms of select(cmp()).
+ SMax, ///< Signed integer max implemented in terms of select(cmp()).
+ UMin, ///< Unisgned integer min implemented in terms of select(cmp()).
+ UMax, ///< Unsigned integer max implemented in terms of select(cmp()).
+ FAdd, ///< Sum of floats.
+ FMul, ///< Product of floats.
+ FMin, ///< FP min implemented in terms of select(cmp()).
+ FMax, ///< FP max implemented in terms of select(cmp()).
+ SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
+ ///< invariant
+ SelectFCmp ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
+ ///< invariant
};
/// The RecurrenceDescriptor is used to identify recurrences variables in a
@@ -112,12 +116,14 @@ public:
};
/// Returns a struct describing if the instruction 'I' can be a recurrence
- /// variable of type 'Kind'. If the recurrence is a min/max pattern of
- /// select(icmp()) this function advances the instruction pointer 'I' from the
- /// compare instruction to the select instruction and stores this pointer in
- /// 'PatternLastInst' member of the returned struct.
- static InstDesc isRecurrenceInstr(Instruction *I, RecurKind Kind,
- InstDesc &Prev, FastMathFlags FMF);
+ /// variable of type 'Kind' for a Loop \p L and reduction PHI \p Phi.
+ /// If the recurrence is a min/max pattern of select(icmp()) this function
+ /// advances the instruction pointer 'I' from the compare instruction to the
+ /// select instruction and stores this pointer in 'PatternLastInst' member of
+ /// the returned struct.
+ static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I,
+ RecurKind Kind, InstDesc &Prev,
+ FastMathFlags FuncFMF);
/// Returns true if instruction I has multiple uses in Insts
static bool hasMultipleUsesOf(Instruction *I,
@@ -127,20 +133,29 @@ public:
/// Returns true if all uses of the instruction I is within the Set.
static bool areAllUsesIn(Instruction *I, SmallPtrSetImpl<Instruction *> &Set);
- /// Returns a struct describing if the instruction is a
- /// Select(ICmp(X, Y), X, Y) instruction pattern corresponding to a min(X, Y)
- /// or max(X, Y). \p Prev specifies the description of an already processed
- /// select instruction, so its corresponding cmp can be matched to it.
- static InstDesc isMinMaxSelectCmpPattern(Instruction *I,
- const InstDesc &Prev);
+ /// Returns a struct describing if the instruction is a llvm.(s/u)(min/max),
+ /// llvm.minnum/maxnum or a Select(ICmp(X, Y), X, Y) pair of instructions
+ /// corresponding to a min(X, Y) or max(X, Y), matching the recurrence kind \p
+ /// Kind. \p Prev specifies the description of an already processed select
+ /// instruction, so its corresponding cmp can be matched to it.
+ static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind,
+ const InstDesc &Prev);
+
+ /// Returns a struct describing whether the instruction is either a
+ /// Select(ICmp(A, B), X, Y), or
+ /// Select(FCmp(A, B), X, Y)
+ /// where one of (X, Y) is a loop invariant integer and the other is a PHI
+ /// value. \p Prev specifies the description of an already processed select
+ /// instruction, so its corresponding cmp can be matched to it.
+ static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
+ Instruction *I, InstDesc &Prev);
/// Returns a struct describing if the instruction is a
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
static InstDesc isConditionalRdxPattern(RecurKind Kind, Instruction *I);
/// Returns identity corresponding to the RecurrenceKind.
- static Constant *getRecurrenceIdentity(RecurKind K, Type *Tp,
- FastMathFlags FMF);
+ Value *getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF);
/// Returns the opcode corresponding to the RecurrenceKind.
static unsigned getOpcode(RecurKind Kind);
@@ -150,7 +165,7 @@ public:
/// non-null, the minimal bit width needed to compute the reduction will be
/// computed.
static bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
- FastMathFlags FMF,
+ FastMathFlags FuncFMF,
RecurrenceDescriptor &RedDes,
DemandedBits *DB = nullptr,
AssumptionCache *AC = nullptr,
@@ -220,6 +235,12 @@ public:
return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind);
}
+ /// Returns true if the recurrence kind is of the form
+ /// select(cmp(),x,y) where one of (x,y) is loop invariant.
+ static bool isSelectCmpRecurrenceKind(RecurKind Kind) {
+ return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp;
+ }
+
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() const { return RecurrenceType; }
@@ -329,6 +350,11 @@ public:
: Instruction::BinaryOpsEnd;
}
+ Type *getElementType() const {
+ assert(IK == IK_PtrInduction && "Only pointer induction has element type");
+ return ElementType;
+ }
+
/// Returns a reference to the type cast instructions in the induction
/// update chain, that are redundant when guarded with a runtime
/// SCEV overflow check.
@@ -340,6 +366,7 @@ private:
/// Private constructor - used by \c isInductionPHI.
InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
BinaryOperator *InductionBinOp = nullptr,
+ Type *ElementType = nullptr,
SmallVectorImpl<Instruction *> *Casts = nullptr);
/// Start value.
@@ -350,6 +377,9 @@ private:
const SCEV *Step = nullptr;
// Instruction that advances induction variable.
BinaryOperator *InductionBinOp = nullptr;
+ // Element type for pointer induction variables.
+ // TODO: This can be dropped once support for typed pointers is removed.
+ Type *ElementType = nullptr;
// Instructions used for type-casts of the induction variable,
// that are redundant when guarded with a runtime SCEV overflow check.
SmallVector<Instruction *, 2> RedundantCasts;
diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h
index f8ea3bcca229..e2026a4d5875 100644
--- a/llvm/include/llvm/Analysis/IVUsers.h
+++ b/llvm/include/llvm/Analysis/IVUsers.h
@@ -157,9 +157,6 @@ public:
/// dump - This method is used for debugging.
void dump() const;
-
-protected:
- bool AddUsersImpl(Instruction *I, SmallPtrSetImpl<Loop*> &SimpleLoopNests);
};
Pass *createIVUsersPass();
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index c27aaf0db8f2..9f9bc3a5e71b 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -22,6 +22,7 @@ class CallBase;
class Function;
class Module;
class OptimizationRemarkEmitter;
+struct ReplayInlinerSettings;
/// There are 3 scenarios we can use the InlineAdvisor:
/// - Default - use manual heuristics.
@@ -143,7 +144,11 @@ public:
/// be up-to-date wrt previous inlining decisions. \p MandatoryOnly indicates
/// only mandatory (always-inline) call sites should be recommended - this
/// allows the InlineAdvisor track such inlininings.
- /// Returns an InlineAdvice with the inlining recommendation.
+ /// Returns:
+ /// - An InlineAdvice with the inlining recommendation.
+ /// - Null when no recommendation is made (https://reviews.llvm.org/D110658).
+ /// TODO: Consider removing the Null return scenario by incorporating the
+ /// SampleProfile inliner into an InlineAdvisor
std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB,
bool MandatoryOnly = false);
@@ -157,6 +162,12 @@ public:
/// to prepare for a partial update.
virtual void onPassExit() {}
+ /// Called when the module is invalidated. We let the advisor implementation
+ /// decide what to refresh - in the case of the development mode
+ /// implementation, for example, we wouldn't want to delete the whole object
+ /// and need to re-load the model evaluator.
+ virtual void onModuleInvalidated() {}
+
protected:
InlineAdvisor(Module &M, FunctionAnalysisManager &FAM);
virtual std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) = 0;
@@ -219,15 +230,18 @@ public:
InlineAdvisorAnalysis() = default;
struct Result {
Result(Module &M, ModuleAnalysisManager &MAM) : M(M), MAM(MAM) {}
- bool invalidate(Module &, const PreservedAnalyses &,
+ bool invalidate(Module &, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &) {
- // InlineAdvisor must be preserved across analysis invalidations.
- return false;
+ if (Advisor && !PA.areAllPreserved())
+ Advisor->onModuleInvalidated();
+ // Check whether the analysis has been explicitly invalidated. Otherwise,
+ // it's stateless and remains preserved.
+ auto PAC = PA.getChecker<InlineAdvisorAnalysis>();
+ return !PAC.preservedWhenStateless();
}
bool tryCreate(InlineParams Params, InliningAdvisorMode Mode,
- StringRef ReplayFile);
+ const ReplayInlinerSettings &ReplaySettings);
InlineAdvisor *getAdvisor() const { return Advisor.get(); }
- void clear() { Advisor.reset(); }
private:
Module &M;
@@ -263,12 +277,16 @@ shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost,
/// Emit ORE message.
void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
const BasicBlock *Block, const Function &Callee,
- const Function &Caller, const InlineCost &IC,
- bool ForProfileContext = false,
+ const Function &Caller, bool IsMandatory,
+ function_ref<void(OptimizationRemark &)> ExtraContext = {},
const char *PassName = nullptr);
-/// get call site location as string
-std::string getCallSiteLocation(DebugLoc DLoc);
+/// Emit ORE message based in cost (default heuristic).
+void emitInlinedIntoBasedOnCost(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
+ const BasicBlock *Block, const Function &Callee,
+ const Function &Caller, const InlineCost &IC,
+ bool ForProfileContext = false,
+ const char *PassName = nullptr);
/// Add location info to ORE message.
void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 4e1b28d4633f..b22841343b1a 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -213,6 +213,9 @@ struct InlineParams {
/// Indicate whether we should allow inline deferral.
Optional<bool> EnableDeferral = true;
+
+ /// Indicate whether we allow inlining for recursive call.
+ Optional<bool> AllowRecursiveCall = false;
};
/// Generate the parameters to tune the inline cost analysis based only on the
diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h
new file mode 100644
index 000000000000..def3192356f4
--- /dev/null
+++ b/llvm/include/llvm/Analysis/InlineOrder.h
@@ -0,0 +1,172 @@
+//===- InlineOrder.h - Inlining order abstraction -*- C++ ---*-------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_INLINEORDER_H
+#define LLVM_ANALYSIS_INLINEORDER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include <algorithm>
+#include <utility>
+
+namespace llvm {
+class CallBase;
+class Function;
+class Module;
+
+template <typename T> class InlineOrder {
+public:
+ using reference = T &;
+ using const_reference = const T &;
+
+ virtual ~InlineOrder() {}
+
+ virtual size_t size() = 0;
+
+ virtual void push(const T &Elt) = 0;
+
+ virtual T pop() = 0;
+
+ virtual const_reference front() = 0;
+
+ virtual void erase_if(function_ref<bool(T)> Pred) = 0;
+
+ bool empty() { return !size(); }
+};
+
+template <typename T, typename Container = SmallVector<T, 16>>
+class DefaultInlineOrder : public InlineOrder<T> {
+ using reference = T &;
+ using const_reference = const T &;
+
+public:
+ size_t size() override { return Calls.size() - FirstIndex; }
+
+ void push(const T &Elt) override { Calls.push_back(Elt); }
+
+ T pop() override {
+ assert(size() > 0);
+ return Calls[FirstIndex++];
+ }
+
+ const_reference front() override {
+ assert(size() > 0);
+ return Calls[FirstIndex];
+ }
+
+ void erase_if(function_ref<bool(T)> Pred) override {
+ Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
+ Calls.end());
+ }
+
+private:
+ Container Calls;
+ size_t FirstIndex = 0;
+};
+
+class InlineSizePriority {
+public:
+ InlineSizePriority(int Size) : Size(Size) {}
+
+ static bool isMoreDesirable(const InlineSizePriority &S1,
+ const InlineSizePriority &S2) {
+ return S1.Size < S2.Size;
+ }
+
+ static InlineSizePriority evaluate(CallBase *CB) {
+ Function *Callee = CB->getCalledFunction();
+ return InlineSizePriority(Callee->getInstructionCount());
+ }
+
+ int Size;
+};
+
+template <typename PriorityT>
+class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
+ using T = std::pair<CallBase *, int>;
+ using HeapT = std::pair<CallBase *, PriorityT>;
+ using reference = T &;
+ using const_reference = const T &;
+
+ static bool cmp(const HeapT &P1, const HeapT &P2) {
+ return PriorityT::isMoreDesirable(P2.second, P1.second);
+ }
+
+ // A call site could become less desirable for inlining because of the size
+ // growth from prior inlining into the callee. This method is used to lazily
+ // update the desirability of a call site if it's decreasing. It is only
+ // called on pop() or front(), not every time the desirability changes. When
+ // the desirability of the front call site decreases, an updated one would be
+ // pushed right back into the heap. For simplicity, those cases where
+ // the desirability of a call site increases are ignored here.
+ void adjust() {
+ bool Changed = false;
+ do {
+ CallBase *CB = Heap.front().first;
+ const PriorityT PreviousGoodness = Heap.front().second;
+ const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
+ Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
+ if (Changed) {
+ std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.pop_back();
+ Heap.push_back({CB, CurrentGoodness});
+ std::push_heap(Heap.begin(), Heap.end(), cmp);
+ }
+ } while (Changed);
+ }
+
+public:
+ size_t size() override { return Heap.size(); }
+
+ void push(const T &Elt) override {
+ CallBase *CB = Elt.first;
+ const int InlineHistoryID = Elt.second;
+ const PriorityT Goodness = PriorityT::evaluate(CB);
+
+ Heap.push_back({CB, Goodness});
+ std::push_heap(Heap.begin(), Heap.end(), cmp);
+ InlineHistoryMap[CB] = InlineHistoryID;
+ }
+
+ T pop() override {
+ assert(size() > 0);
+ adjust();
+
+ CallBase *CB = Heap.front().first;
+ T Result = std::make_pair(CB, InlineHistoryMap[CB]);
+ InlineHistoryMap.erase(CB);
+ std::pop_heap(Heap.begin(), Heap.end(), cmp);
+ Heap.pop_back();
+ return Result;
+ }
+
+ const_reference front() override {
+ assert(size() > 0);
+ adjust();
+
+ CallBase *CB = Heap.front().first;
+ return *InlineHistoryMap.find(CB);
+ }
+
+ void erase_if(function_ref<bool(T)> Pred) override {
+ auto PredWrapper = [=](HeapT P) -> bool {
+ return Pred(std::make_pair(P.first, 0));
+ };
+ llvm::erase_if(Heap, PredWrapper);
+ std::make_heap(Heap.begin(), Heap.end(), cmp);
+ }
+
+private:
+ SmallVector<HeapT, 16> Heap;
+ DenseMap<CallBase *, int> InlineHistoryMap;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INLINEORDER_H
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index efaf1847276b..f0f8e4bc9175 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -248,7 +248,7 @@ Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
const SimplifyQuery &Q);
/// Given operands for a GetElementPtrInst, fold the result or return null.
-Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds,
const SimplifyQuery &Q);
/// Given operands for an InsertValueInst, fold the result or return null.
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index ca276d2f3cf8..0580f4d7b226 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -145,7 +145,7 @@ public:
/// around but clear them.
explicit operator bool() const;
- /// Returnss the \c Kind of the edge.
+ /// Returns the \c Kind of the edge.
Kind getKind() const;
/// Test whether the edge represents a direct call to a function.
@@ -307,9 +307,9 @@ public:
/// A node in the call graph.
///
- /// This represents a single node. It's primary roles are to cache the list of
- /// callees, de-duplicate and provide fast testing of whether a function is
- /// a callee, and facilitate iteration of child nodes in the graph.
+ /// This represents a single node. Its primary roles are to cache the list of
+ /// callees, de-duplicate and provide fast testing of whether a function is a
+ /// callee, and facilitate iteration of child nodes in the graph.
///
/// The node works much like an optional in order to lazily populate the
/// edges of each node. Until populated, there are no edges. Once populated,
@@ -392,7 +392,7 @@ public:
/// Internal helper to directly replace the function with a new one.
///
- /// This is used to facilitate tranfsormations which need to replace the
+ /// This is used to facilitate transformations which need to replace the
/// formal Function object but directly move the body and users from one to
/// the other.
void replaceFunction(Function &NewF);
@@ -419,7 +419,7 @@ public:
/// outer structure. SCCs do not support mutation of the call graph, that
/// must be done through the containing \c RefSCC in order to fully reason
/// about the ordering and connections of the graph.
- class SCC {
+ class LLVM_EXTERNAL_VISIBILITY SCC {
friend class LazyCallGraph;
friend class LazyCallGraph::Node;
@@ -435,7 +435,7 @@ public:
Nodes.clear();
}
- /// Print a short descrtiption useful for debugging or logging.
+ /// Print a short description useful for debugging or logging.
///
/// We print the function names in the SCC wrapped in '()'s and skipping
/// the middle functions if there are a large number.
@@ -467,9 +467,10 @@ public:
/// Verify invariants about the SCC.
///
/// This will attempt to validate all of the basic invariants within an
- /// SCC, but not that it is a strongly connected componet per-se. Primarily
- /// useful while building and updating the graph to check that basic
- /// properties are in place rather than having inexplicable crashes later.
+ /// SCC, but not that it is a strongly connected component per se.
+ /// Primarily useful while building and updating the graph to check that
+ /// basic properties are in place rather than having inexplicable crashes
+ /// later.
void verify();
#endif
@@ -511,7 +512,7 @@ public:
/// Provide a short name by printing this SCC to a std::string.
///
- /// This copes with the fact that we don't have a name per-se for an SCC
+ /// This copes with the fact that we don't have a name per se for an SCC
/// while still making the use of this in debugging and logging useful.
std::string getName() const {
std::string Name;
@@ -644,7 +645,7 @@ public:
/// Provide a short name by printing this RefSCC to a std::string.
///
- /// This copes with the fact that we don't have a name per-se for an RefSCC
+ /// This copes with the fact that we don't have a name per se for an RefSCC
/// while still making the use of this in debugging and logging useful.
std::string getName() const {
std::string Name;
@@ -1085,47 +1086,9 @@ public:
/// updates that set with every constant visited.
///
/// For each defined function, calls \p Callback with that function.
- template <typename CallbackT>
static void visitReferences(SmallVectorImpl<Constant *> &Worklist,
SmallPtrSetImpl<Constant *> &Visited,
- CallbackT Callback) {
- while (!Worklist.empty()) {
- Constant *C = Worklist.pop_back_val();
-
- if (Function *F = dyn_cast<Function>(C)) {
- if (!F->isDeclaration())
- Callback(*F);
- continue;
- }
-
- // The blockaddress constant expression is a weird special case, we can't
- // generically walk its operands the way we do for all other constants.
- if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
- // If we've already visited the function referred to by the block
- // address, we don't need to revisit it.
- if (Visited.count(BA->getFunction()))
- continue;
-
- // If all of the blockaddress' users are instructions within the
- // referred to function, we don't need to insert a cycle.
- if (llvm::all_of(BA->users(), [&](User *U) {
- if (Instruction *I = dyn_cast<Instruction>(U))
- return I->getFunction() == BA->getFunction();
- return false;
- }))
- continue;
-
- // Otherwise we should go visit the referred to function.
- Visited.insert(BA->getFunction());
- Worklist.push_back(BA->getFunction());
- continue;
- }
-
- for (Value *Op : C->operand_values())
- if (Visited.insert(cast<Constant>(Op)).second)
- Worklist.push_back(cast<Constant>(Op));
- }
- }
+ function_ref<void(Function &)> Callback);
///@}
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 0a0ef1536caf..2b4edfac61fc 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -177,21 +177,11 @@ public:
/// Register the location (instructions are given increasing numbers)
/// of a write access.
- void addAccess(StoreInst *SI) {
- Value *Ptr = SI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
- InstMap.push_back(SI);
- ++AccessIdx;
- }
+ void addAccess(StoreInst *SI);
/// Register the location (instructions are given increasing numbers)
/// of a write access.
- void addAccess(LoadInst *LI) {
- Value *Ptr = LI->getPointerOperand();
- Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
- InstMap.push_back(LI);
- ++AccessIdx;
- }
+ void addAccess(LoadInst *LI);
/// Check whether the dependencies between the accesses are safe.
///
@@ -664,15 +654,14 @@ Value *stripIntegerCast(Value *V);
/// If necessary this method will version the stride of the pointer according
/// to \p PtrToStride and therefore add further predicates to \p PSE.
///
-/// If \p OrigPtr is not null, use it to look up the stride value instead of \p
-/// Ptr. \p PtrToStride provides the mapping between the pointer value and its
+/// \p PtrToStride provides the mapping between the pointer value and its
/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
- Value *Ptr, Value *OrigPtr = nullptr);
+ Value *Ptr);
-/// If the pointer has a constant stride return it in units of its
-/// element size. Otherwise return zero.
+/// If the pointer has a constant stride return it in units of the access type
+/// size. Otherwise return zero.
///
/// Ensure that it does not wrap in the address space, assuming the predicate
/// associated with \p PSE is true.
@@ -681,7 +670,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
-int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
+int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
+ const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false, bool ShouldCheckWrap = true);
diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index 92db1d67fc4e..bc8a1e74e447 100644
--- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -58,6 +58,7 @@ struct LoopStandardAnalysisResults {
TargetLibraryInfo &TLI;
TargetTransformInfo &TTI;
BlockFrequencyInfo *BFI;
+ BranchProbabilityInfo *BPI;
MemorySSA *MSSA;
};
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 164ec50e47bc..15c9d911ab80 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -527,7 +527,7 @@ extern template class LoopBase<BasicBlock, Loop>;
/// Represents a single loop in the control flow graph. Note that not all SCCs
/// in the CFG are necessarily loops.
-class Loop : public LoopBase<BasicBlock, Loop> {
+class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
public:
/// A range representing the start and end location of a loop.
class LocRange {
@@ -950,7 +950,7 @@ public:
///
/// Note that because loops form a forest of trees, preorder is equivalent to
/// reverse postorder.
- SmallVector<LoopT *, 4> getLoopsInPreorder();
+ SmallVector<LoopT *, 4> getLoopsInPreorder() const;
/// Return all of the loops in the function in preorder across the loop
/// nests, with siblings in *reverse* program order.
@@ -960,7 +960,7 @@ public:
///
/// Also note that this is *not* a reverse preorder. Only the siblings are in
/// reverse program order.
- SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder();
+ SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder() const;
/// Return the inner most loop that BB lives in. If a basic block is in no
/// loop (for example the entry node), null is returned.
@@ -1213,6 +1213,13 @@ public:
};
+/// Enable verification of loop info.
+///
+/// The flag enables checks which are expensive and are disabled by default
+/// unless the `EXPENSIVE_CHECKS` macro is defined. The `-verify-loop-info`
+/// flag allows the checks to be enabled selectively without re-compilation.
+extern bool VerifyLoopInfo;
+
// Allow clients to walk the list of nested loops...
template <> struct GraphTraits<const Loop *> {
typedef const Loop *NodeRef;
@@ -1305,6 +1312,10 @@ bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name);
llvm::Optional<int>
getOptionalIntLoopAttribute(const Loop *TheLoop, StringRef Name);
+/// Find named metadata for a loop with an integer value. Return \p Default if
+/// not set.
+int getIntLoopAttribute(const Loop *TheLoop, StringRef Name, int Default = 0);
+
/// Find string metadata for loop
///
/// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 2cc9afb7c2cd..b8b8330d0fe1 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -574,7 +574,8 @@ void LoopInfoBase<BlockT, LoopT>::analyze(const DomTreeBase<BlockT> &DomTree) {
}
template <class BlockT, class LoopT>
-SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
+SmallVector<LoopT *, 4>
+LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() const {
SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
// The outer-most loop actually goes into the result in the same relative
// order as we walk it. But LoopInfo stores the top level loops in reverse
@@ -592,7 +593,7 @@ SmallVector<LoopT *, 4> LoopInfoBase<BlockT, LoopT>::getLoopsInPreorder() {
template <class BlockT, class LoopT>
SmallVector<LoopT *, 4>
-LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() {
+LoopInfoBase<BlockT, LoopT>::getLoopsInReverseSiblingPreorder() const {
SmallVector<LoopT *, 4> PreOrderLoops, PreOrderWorklist;
// The outer-most loop actually goes into the result in the same relative
// order as we walk it. LoopInfo stores the top level loops in reverse
diff --git a/llvm/include/llvm/Analysis/LoopNestAnalysis.h b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
index 9a749a1c8eae..3d4a064cf7e3 100644
--- a/llvm/include/llvm/Analysis/LoopNestAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopNestAnalysis.h
@@ -21,11 +21,14 @@
namespace llvm {
using LoopVectorTy = SmallVector<Loop *, 8>;
+
class LPMUpdater;
/// This class represents a loop nest and can be used to query its properties.
-class LoopNest {
+class LLVM_EXTERNAL_VISIBILITY LoopNest {
public:
+ using InstrVectorTy = SmallVector<const Instruction *>;
+
/// Construct a loop nest rooted by loop \p Root.
LoopNest(Loop &Root, ScalarEvolution &SE);
@@ -48,6 +51,12 @@ public:
static bool arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE);
+ /// Return a vector of instructions that prevent the LoopNest given
+ /// by loops \p OuterLoop and \p InnerLoop from being perfect.
+ static InstrVectorTy getInterveningInstructions(const Loop &OuterLoop,
+ const Loop &InnerLoop,
+ ScalarEvolution &SE);
+
/// Return the maximum nesting depth of the loop nest rooted by loop \p Root.
/// For example given the loop nest:
/// \code
@@ -150,6 +159,17 @@ public:
protected:
const unsigned MaxPerfectDepth; // maximum perfect nesting depth level.
LoopVectorTy Loops; // the loops in the nest (in breadth first order).
+
+private:
+ enum LoopNestEnum {
+ PerfectLoopNest,
+ ImperfectLoopNest,
+ InvalidLoopStructure,
+ OuterLoopLowerBoundUnknown
+ };
+ static LoopNestEnum analyzeLoopNestForPerfectNest(const Loop &OuterLoop,
+ const Loop &InnerLoop,
+ ScalarEvolution &SE);
};
raw_ostream &operator<<(raw_ostream &, const LoopNest &);
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 54edbb823263..a218561e61c7 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -38,6 +38,7 @@ public:
bool isForcedToStop() const { return ForceStop; }
int64_t getLocalCalls(Function &F);
const MLModelRunner &getModelRunner() const { return *ModelRunner.get(); }
+ void onModuleInvalidated() override { Invalid = true; }
protected:
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
@@ -55,6 +56,7 @@ protected:
private:
int64_t getModuleIRSize() const;
+ bool Invalid = true;
std::unique_ptr<CallGraph> CG;
int64_t NodeCount = 0;
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index f40b99968fd3..48aeef371e3d 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -106,9 +106,6 @@
namespace llvm {
-/// Enables memory ssa as a dependency for loop passes.
-extern cl::opt<bool> EnableMSSALoopDependency;
-
class AllocaInst;
class Function;
class Instruction;
@@ -786,21 +783,22 @@ public:
/// dominates Use \p B.
bool dominates(const MemoryAccess *A, const Use &B) const;
+ enum class VerificationLevel { Fast, Full };
/// Verify that MemorySSA is self consistent (IE definitions dominate
/// all uses, uses appear in the right places). This is used by unit tests.
- void verifyMemorySSA() const;
+ void verifyMemorySSA(VerificationLevel = VerificationLevel::Fast) const;
/// Used in various insertion functions to specify whether we are talking
/// about the beginning or end of a block.
enum InsertionPlace { Beginning, End, BeforeTerminator };
protected:
- // Used by Memory SSA annotater, dumpers, and wrapper pass
- friend class MemorySSAAnnotatedWriter;
+ // Used by Memory SSA dumpers and wrapper pass
friend class MemorySSAPrinterLegacyPass;
friend class MemorySSAUpdater;
- void verifyOrderingDominationAndDefUses(Function &F) const;
+ void verifyOrderingDominationAndDefUses(
+ Function &F, VerificationLevel = VerificationLevel::Fast) const;
void verifyDominationNumbers(const Function &F) const;
void verifyPrevDefInPhis(Function &F) const;
@@ -898,6 +896,13 @@ private:
unsigned NextID;
};
+/// Enables verification of MemorySSA.
+///
+/// The checks which this flag enables is exensive and disabled by default
+/// unless `EXPENSIVE_CHECKS` is defined. The flag `-verify-memoryssa` can be
+/// used to selectively enable the verification without re-compilation.
+extern bool VerifyMemorySSA;
+
// Internal MemorySSA utils, for use by MemorySSA classes and walkers
class MemorySSAUtil {
protected:
@@ -956,6 +961,17 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+/// Printer pass for \c MemorySSA via the walker.
+class MemorySSAWalkerPrinterPass
+ : public PassInfoMixin<MemorySSAWalkerPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ explicit MemorySSAWalkerPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
/// Verifier pass for \c MemorySSA.
struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index 62bdade95d96..17062ab907a6 100644
--- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -78,14 +78,17 @@ inline const Value *GetUnderlyingObjCPtr(const Value *V) {
}
/// A wrapper for GetUnderlyingObjCPtr used for results memoization.
-inline const Value *
-GetUnderlyingObjCPtrCached(const Value *V,
- DenseMap<const Value *, WeakTrackingVH> &Cache) {
- if (auto InCache = Cache.lookup(V))
- return InCache;
+inline const Value *GetUnderlyingObjCPtrCached(
+ const Value *V,
+ DenseMap<const Value *, std::pair<WeakVH, WeakTrackingVH>> &Cache) {
+ // The entry is invalid if either value handle is null.
+ auto InCache = Cache.lookup(V);
+ if (InCache.first && InCache.second)
+ return InCache.second;
const Value *Computed = GetUnderlyingObjCPtr(V);
- Cache[V] = const_cast<Value *>(Computed);
+ Cache[V] =
+ std::make_pair(const_cast<Value *>(V), const_cast<Value *>(Computed));
return Computed;
}
@@ -168,8 +171,8 @@ bool IsPotentialRetainableObjPtr(const Value *Op, AAResults &AA);
/// Helper for GetARCInstKind. Determines what kind of construct CS
/// is.
inline ARCInstKind GetCallSiteClass(const CallBase &CB) {
- for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I)
- if (IsPotentialRetainableObjPtr(*I))
+ for (const Use &U : CB.args())
+ if (IsPotentialRetainableObjPtr(U))
return CB.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
return CB.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
@@ -204,11 +207,10 @@ inline bool IsObjCIdentifiedObject(const Value *V) {
return true;
StringRef Section = GV->getSection();
- if (Section.find("__message_refs") != StringRef::npos ||
- Section.find("__objc_classrefs") != StringRef::npos ||
- Section.find("__objc_superrefs") != StringRef::npos ||
- Section.find("__objc_methname") != StringRef::npos ||
- Section.find("__cstring") != StringRef::npos)
+ if (Section.contains("__message_refs") ||
+ Section.contains("__objc_classrefs") ||
+ Section.contains("__objc_superrefs") ||
+ Section.contains("__objc_methname") || Section.contains("__cstring"))
return true;
}
}
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 2566bfbcf61c..362dd6c29992 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -11,9 +11,11 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_IR_OBJCARCUTIL_H
-#define LLVM_IR_OBJCARCUTIL_H
+#ifndef LLVM_ANALYSIS_OBJCARCUTIL_H
+#define LLVM_ANALYSIS_OBJCARCUTIL_H
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/LLVMContext.h"
@@ -24,13 +26,6 @@ inline const char *getRVMarkerModuleFlagStr() {
return "clang.arc.retainAutoreleasedReturnValueMarker";
}
-enum AttachedCallOperandBundle : unsigned { RVOB_Retain, RVOB_Claim };
-
-inline AttachedCallOperandBundle
-getAttachedCallOperandBundleEnum(bool IsRetain) {
- return IsRetain ? RVOB_Retain : RVOB_Claim;
-}
-
inline bool hasAttachedCallOpBundle(const CallBase *CB) {
// Ignore the bundle if the return type is void. Global optimization passes
// can turn the called function's return type to void. That should happen only
@@ -43,14 +38,32 @@ inline bool hasAttachedCallOpBundle(const CallBase *CB) {
.hasValue();
}
-inline bool hasAttachedCallOpBundle(const CallBase *CB, bool IsRetain) {
- assert(hasAttachedCallOpBundle(CB) &&
- "call doesn't have operand bundle clang_arc_attachedcall");
+/// This function returns operand bundle clang_arc_attachedcall's argument,
+/// which is the address of the ARC runtime function.
+inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) {
auto B = CB->getOperandBundle(LLVMContext::OB_clang_arc_attachedcall);
- if (!B.hasValue())
- return false;
- return cast<ConstantInt>(B->Inputs[0])->getZExtValue() ==
- getAttachedCallOperandBundleEnum(IsRetain);
+ if (!B.hasValue() || B->Inputs.size() == 0)
+ return None;
+
+ return cast<Function>(B->Inputs[0]);
+}
+
+/// Check whether the function is retainRV/claimRV.
+inline bool isRetainOrClaimRV(ARCInstKind Kind) {
+ return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::ClaimRV;
+}
+
+/// This function returns the ARCInstKind of the function attached to operand
+/// bundle clang_arc_attachedcall. It returns None if the call doesn't have the
+/// operand bundle or the operand is null. Otherwise it returns either RetainRV
+/// or ClaimRV.
+inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) {
+ Optional<Function *> Fn = getAttachedARCFunction(CB);
+ if (!Fn.hasValue())
+ return ARCInstKind::None;
+ auto FnClass = GetFunctionClass(*Fn);
+ assert(isRetainOrClaimRV(FnClass) && "unexpected ARC runtime function");
+ return FnClass;
}
} // end namespace objcarc
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index c95404d96f4e..886800d8a0f5 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -134,9 +134,13 @@ public:
bool isColdCount(uint64_t C) const;
/// Returns true if count \p C is considered hot with regard to a given
/// hot percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isHotCountNthPercentile(int PercentileCutoff, uint64_t C) const;
/// Returns true if count \p C is considered cold with regard to a given
/// cold percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
/// Returns true if BasicBlock \p BB is considered hot.
bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
@@ -144,10 +148,14 @@ public:
bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
/// Returns true if BasicBlock \p BB is considered hot with regard to a given
/// hot percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
BlockFrequencyInfo *BFI) const;
/// Returns true if BasicBlock \p BB is considered cold with regard to a given
/// cold percentile cutoff value.
+ /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
+ /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
BlockFrequencyInfo *BFI) const;
/// Returns true if the call site \p CB is considered hot.
@@ -162,11 +170,11 @@ public:
uint64_t getOrCompColdCountThreshold() const;
/// Returns HotCountThreshold if set.
uint64_t getHotCountThreshold() const {
- return HotCountThreshold ? HotCountThreshold.getValue() : 0;
+ return HotCountThreshold.getValueOr(0);
}
/// Returns ColdCountThreshold if set.
uint64_t getColdCountThreshold() const {
- return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
+ return ColdCountThreshold.getValueOr(0);
}
private:
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
index 3018bcc241d8..a0eb9af62205 100644
--- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -20,6 +20,46 @@ class Function;
class Module;
class OptimizationRemarkEmitter;
+struct CallSiteFormat {
+ enum class Format : int {
+ Line,
+ LineColumn,
+ LineDiscriminator,
+ LineColumnDiscriminator
+ };
+
+ bool outputColumn() const {
+ return OutputFormat == Format::LineColumn ||
+ OutputFormat == Format::LineColumnDiscriminator;
+ }
+
+ bool outputDiscriminator() const {
+ return OutputFormat == Format::LineDiscriminator ||
+ OutputFormat == Format::LineColumnDiscriminator;
+ }
+
+ Format OutputFormat;
+};
+
+/// Replay Inliner Setup
+struct ReplayInlinerSettings {
+ enum class Scope : int { Function, Module };
+ enum class Fallback : int { Original, AlwaysInline, NeverInline };
+
+ StringRef ReplayFile;
+ Scope ReplayScope;
+ Fallback ReplayFallback;
+ CallSiteFormat ReplayFormat;
+};
+
+/// Get call site location as a string with the given format
+std::string formatCallSiteLocation(DebugLoc DLoc, const CallSiteFormat &Format);
+
+std::unique_ptr<InlineAdvisor> getReplayInlineAdvisor(
+ Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks);
+
/// Replay inline advisor that uses optimization remarks from inlining of
/// previous build to guide current inlining. This is useful for inliner tuning.
class ReplayInlineAdvisor : public InlineAdvisor {
@@ -27,15 +67,24 @@ public:
ReplayInlineAdvisor(Module &M, FunctionAnalysisManager &FAM,
LLVMContext &Context,
std::unique_ptr<InlineAdvisor> OriginalAdvisor,
- StringRef RemarksFile, bool EmitRemarks);
+ const ReplayInlinerSettings &ReplaySettings,
+ bool EmitRemarks);
std::unique_ptr<InlineAdvice> getAdviceImpl(CallBase &CB) override;
bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
private:
- StringSet<> InlineSitesFromRemarks;
+ bool hasInlineAdvice(Function &F) const {
+ return (ReplaySettings.ReplayScope ==
+ ReplayInlinerSettings::Scope::Module) ||
+ CallersToReplay.contains(F.getName());
+ }
std::unique_ptr<InlineAdvisor> OriginalAdvisor;
bool HasReplayRemarks = false;
+ const ReplayInlinerSettings ReplaySettings;
bool EmitRemarks = false;
+
+ StringMap<bool> InlineSitesFromRemarks;
+ StringSet<> CallersToReplay;
};
} // namespace llvm
#endif // LLVM_ANALYSIS_REPLAYINLINEADVISOR_H
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index ae9c73fede96..a2260688e3d6 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -25,7 +25,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SetVector.h"
@@ -112,6 +111,24 @@ public:
/// Note that NUW and NSW are also valid properties of a recurrence, and
/// either implies NW. For convenience, NW will be set for a recurrence
/// whenever either NUW or NSW are set.
+ ///
+ /// We require that the flag on a SCEV apply to the entire scope in which
+ /// that SCEV is defined. A SCEV's scope is set of locations dominated by
+ /// a defining location, which is in turn described by the following rules:
+ /// * A SCEVUnknown is at the point of definition of the Value.
+ /// * A SCEVConstant is defined at all points.
+ /// * A SCEVAddRec is defined starting with the header of the associated
+ /// loop.
+ /// * All other SCEVs are defined at the earlest point all operands are
+ /// defined.
+ ///
+ /// The above rules describe a maximally hoisted form (without regards to
+ /// potential control dependence). A SCEV is defined anywhere a
+ /// corresponding instruction could be defined in said maximally hoisted
+ /// form. Note that SCEVUDivExpr (currently the only expression type which
+ /// can trap) can be defined per these rules in regions where it would trap
+ /// at runtime. A SCEV being defined does not require the existence of any
+ /// instruction within the defined scope.
enum NoWrapFlags {
FlagAnyWrap = 0, // No guarantee.
FlagNW = (1 << 0), // No self-wrap.
@@ -472,6 +489,10 @@ public:
clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) {
return (SCEV::NoWrapFlags)(Flags & ~OffFlags);
}
+ LLVM_NODISCARD static bool hasFlags(SCEV::NoWrapFlags Flags,
+ SCEV::NoWrapFlags TestFlags) {
+ return TestFlags == maskFlags(Flags, TestFlags);
+ };
ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree &DT, LoopInfo &LI);
@@ -498,13 +519,26 @@ public:
// Returns a wider type among {Ty1, Ty2}.
Type *getWiderType(Type *Ty1, Type *Ty2) const;
+ /// Return true if there exists a point in the program at which both
+ /// A and B could be operands to the same instruction.
+ /// SCEV expressions are generally assumed to correspond to instructions
+ /// which could exists in IR. In general, this requires that there exists
+ /// a use point in the program where all operands dominate the use.
+ ///
+ /// Example:
+ /// loop {
+ /// if
+ /// loop { v1 = load @global1; }
+ /// else
+ /// loop { v2 = load @global2; }
+ /// }
+ /// No SCEV with operand V1, and v2 can exist in this program.
+ bool instructionCouldExistWitthOperands(const SCEV *A, const SCEV *B);
+
/// Return true if the SCEV is a scAddRecExpr or it contains
/// scAddRecExpr. The result will be cached in HasRecMap.
bool containsAddRecurrence(const SCEV *S);
- /// Erase Value from ValueExprMap and ExprValueMap.
- void eraseValueFromMap(Value *V);
-
/// Is operation \p BinOp between \p LHS and \p RHS provably does not have
/// a signed/unsigned overflow (\p Signed)?
bool willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
@@ -516,6 +550,12 @@ public:
std::pair<SCEV::NoWrapFlags, bool /*Deduced*/>
getStrengthenedNoWrapFlagsFromBinOp(const OverflowingBinaryOperator *OBO);
+ /// Notify this ScalarEvolution that \p User directly uses SCEVs in \p Ops.
+ void registerUser(const SCEV *User, ArrayRef<const SCEV *> Ops);
+
+ /// Return true if the SCEV expression contains an undef value.
+ bool containsUndefs(const SCEV *S) const;
+
/// Return a SCEV expression for the full generality of the specified
/// expression.
const SCEV *getSCEV(Value *V);
@@ -700,6 +740,9 @@ public:
/// cases do exist.
const SCEV *getPointerBase(const SCEV *V);
+ /// Compute an expression equivalent to S - getPointerBase(S).
+ const SCEV *removePointerBase(const SCEV *S);
+
/// Return a SCEV expression for the specified value at the specified scope
/// in the program. The L value specifies a loop nest to evaluate the
/// expression at, where null is the top-level or a specified loop is
@@ -735,9 +778,13 @@ public:
/// Convert from an "exit count" (i.e. "backedge taken count") to a "trip
/// count". A "trip count" is the number of times the header of the loop
/// will execute if an exit is taken after the specified number of backedges
- /// have been taken. (e.g. TripCount = ExitCount + 1) A zero result
- /// must be interpreted as a loop having an unknown trip count.
- const SCEV *getTripCountFromExitCount(const SCEV *ExitCount);
+ /// have been taken. (e.g. TripCount = ExitCount + 1). Note that the
+ /// expression can overflow if ExitCount = UINT_MAX. \p Extend controls
+ /// how potential overflow is handled. If true, a wider result type is
+ /// returned. ex: EC = 255 (i8), TC = 256 (i9). If false, result unsigned
+ /// wraps with 2s-complement semantics. ex: EC = 255 (i8), TC = 0 (i8)
+ const SCEV *getTripCountFromExitCount(const SCEV *ExitCount,
+ bool Extend = true);
/// Returns the exact trip count of the loop if we can compute it, and
/// the result is a small constant. '0' is used to represent an unknown
@@ -762,6 +809,13 @@ public:
/// Returns 0 if the trip count is unknown or not constant.
unsigned getSmallConstantMaxTripCount(const Loop *L);
+ /// Returns the upper bound of the loop trip count infered from array size.
+ /// Can not access bytes starting outside the statically allocated size
+ /// without being immediate UB.
+ /// Returns SCEVCouldNotCompute if the trip count could not inferred
+ /// from array accesses.
+ const SCEV *getConstantMaxTripCountFromArray(const Loop *L);
+
/// Returns the largest constant divisor of the trip count as a normal
/// unsigned value, if possible. This means that the actual trip count is
/// always a multiple of the returned value. Returns 1 if the trip count is
@@ -988,14 +1042,13 @@ public:
/// Test if the given expression is known to satisfy the condition described
/// by Pred, LHS, and RHS in the given Context.
bool isKnownPredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS, const Instruction *Context);
+ const SCEV *RHS, const Instruction *CtxI);
/// Check whether the condition described by Pred, LHS, and RHS is true or
/// false in the given \p Context. If we know it, return the evaluation of
/// this condition. If neither is proved, return None.
Optional<bool> evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS,
- const Instruction *Context);
+ const SCEV *RHS, const Instruction *CtxI);
/// Test if the condition described by Pred, LHS, RHS is known to be true on
/// every iteration of the loop of the recurrency LHS.
@@ -1045,7 +1098,7 @@ public:
getLoopInvariantExitCondDuringFirstIterations(ICmpInst::Predicate Pred,
const SCEV *LHS,
const SCEV *RHS, const Loop *L,
- const Instruction *Context,
+ const Instruction *CtxI,
const SCEV *MaxIter);
/// Simplify LHS and RHS in a comparison with predicate Pred. Return true
@@ -1092,110 +1145,11 @@ public:
/// Return the size of an element read or written by Inst.
const SCEV *getElementSize(Instruction *Inst);
- /// Compute the array dimensions Sizes from the set of Terms extracted from
- /// the memory access function of this SCEVAddRecExpr (second step of
- /// delinearization).
- void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize);
-
void print(raw_ostream &OS) const;
void verify() const;
bool invalidate(Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv);
- /// Collect parametric terms occurring in step expressions (first step of
- /// delinearization).
- void collectParametricTerms(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Terms);
-
- /// Return in Subscripts the access functions for each dimension in Sizes
- /// (third step of delinearization).
- void computeAccessFunctions(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes);
-
- /// Gathers the individual index expressions from a GEP instruction.
- ///
- /// This function optimistically assumes the GEP references into a fixed size
- /// array. If this is actually true, this function returns a list of array
- /// subscript expressions in \p Subscripts and a list of integers describing
- /// the size of the individual array dimensions in \p Sizes. Both lists have
- /// either equal length or the size list is one element shorter in case there
- /// is no known size available for the outermost array dimension. Returns true
- /// if successful and false otherwise.
- bool getIndexExpressionsFromGEP(const GetElementPtrInst *GEP,
- SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<int> &Sizes);
-
- /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
- /// subscripts and sizes of an array access.
- ///
- /// The delinearization is a 3 step process: the first two steps compute the
- /// sizes of each subscript and the third step computes the access functions
- /// for the delinearized array:
- ///
- /// 1. Find the terms in the step functions
- /// 2. Compute the array size
- /// 3. Compute the access function: divide the SCEV by the array size
- /// starting with the innermost dimensions found in step 2. The Quotient
- /// is the SCEV to be divided in the next step of the recursion. The
- /// Remainder is the subscript of the innermost dimension. Loop over all
- /// array dimensions computed in step 2.
- ///
- /// To compute a uniform array size for several memory accesses to the same
- /// object, one can collect in step 1 all the step terms for all the memory
- /// accesses, and compute in step 2 a unique array shape. This guarantees
- /// that the array shape will be the same across all memory accesses.
- ///
- /// FIXME: We could derive the result of steps 1 and 2 from a description of
- /// the array shape given in metadata.
- ///
- /// Example:
- ///
- /// A[][n][m]
- ///
- /// for i
- /// for j
- /// for k
- /// A[j+k][2i][5i] =
- ///
- /// The initial SCEV:
- ///
- /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
- ///
- /// 1. Find the different terms in the step functions:
- /// -> [2*m, 5, n*m, n*m]
- ///
- /// 2. Compute the array size: sort and unique them
- /// -> [n*m, 2*m, 5]
- /// find the GCD of all the terms = 1
- /// divide by the GCD and erase constant terms
- /// -> [n*m, 2*m]
- /// GCD = m
- /// divide by GCD -> [n, 2]
- /// remove constant terms
- /// -> [n]
- /// size of the array is A[unknown][n][m]
- ///
- /// 3. Compute the access function
- /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
- /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
- /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
- /// The remainder is the subscript of the innermost array dimension: [5i].
- ///
- /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
- /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
- /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
- /// The Remainder is the subscript of the next array dimension: [2i].
- ///
- /// The subscript of the outermost dimension is the Quotient: [j+k].
- ///
- /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
- void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize);
-
/// Return the DataLayout associated with the module this SCEV instance is
/// operating on.
const DataLayout &getDataLayout() const {
@@ -1234,6 +1188,18 @@ public:
/// Try to apply information from loop guards for \p L to \p Expr.
const SCEV *applyLoopGuards(const SCEV *Expr, const Loop *L);
+ /// Return true if the loop has no abnormal exits. That is, if the loop
+ /// is not infinite, it must exit through an explicit edge in the CFG.
+ /// (As opposed to either a) throwing out of the function or b) entering a
+ /// well defined infinite loop in some callee.)
+ bool loopHasNoAbnormalExits(const Loop *L) {
+ return getLoopProperties(L).HasNoAbnormalExits;
+ }
+
+ /// Return true if this loop is finite by assumption. That is,
+ /// to be infinite, it must also be undefined.
+ bool loopIsFiniteByAssumption(const Loop *L);
+
private:
/// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
/// Value is deleted.
@@ -1532,15 +1498,15 @@ private:
LoopDispositions;
struct LoopProperties {
- /// Set to true if the loop contains no instruction that can have side
- /// effects (i.e. via throwing an exception, volatile or atomic access).
- bool HasNoAbnormalExits;
-
/// Set to true if the loop contains no instruction that can abnormally exit
/// the loop (i.e. via throwing an exception, by terminating the thread
/// cleanly or by infinite looping in a called function). Strictly
/// speaking, the last one is not leaving the loop, but is identical to
/// leaving the loop for reasoning about undefined behavior.
+ bool HasNoAbnormalExits;
+
+ /// Set to true if the loop contains no instruction that can have side
+ /// effects (i.e. via throwing an exception, volatile or atomic access).
bool HasNoSideEffects;
};
@@ -1554,14 +1520,6 @@ private:
return getLoopProperties(L).HasNoSideEffects;
}
- bool loopHasNoAbnormalExits(const Loop *L) {
- return getLoopProperties(L).HasNoAbnormalExits;
- }
-
- /// Return true if this loop is finite by assumption. That is,
- /// to be infinite, it must also be undefined.
- bool loopIsFiniteByAssumption(const Loop *L);
-
/// Compute a LoopDisposition value.
LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
@@ -1574,6 +1532,9 @@ private:
/// Compute a BlockDisposition value.
BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
+ /// Stores all SCEV that use a given SCEV as its direct operand.
+ DenseMap<const SCEV *, SmallPtrSet<const SCEV *, 8> > SCEVUsers;
+
/// Memoized results from getRange
DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
@@ -1600,22 +1561,22 @@ private:
/// copied if its needed for longer.
const ConstantRange &getRangeRef(const SCEV *S, RangeSignHint Hint);
- /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}.
+ /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}.
/// Helper for \c getRange.
- ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
+ ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step,
const SCEV *MaxBECount, unsigned BitWidth);
/// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
- /// Start,+,\p Stop}<nw>.
+ /// Start,+,\p Step}<nw>.
ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec,
const SCEV *MaxBECount,
unsigned BitWidth,
RangeSignHint SignHint);
/// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
- /// Stop} by "factoring out" a ternary expression from the add recurrence.
+ /// Step} by "factoring out" a ternary expression from the add recurrence.
/// Helper called by \c getRange.
- ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop,
+ ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Step,
const SCEV *MaxBECount, unsigned BitWidth);
/// If the unknown expression U corresponds to a simple recurrence, return
@@ -1761,12 +1722,6 @@ private:
BasicBlock *ExitingBB,
bool IsSubExpr);
- /// Given an exit condition of 'icmp op load X, cst', try to see if we can
- /// compute the backedge-taken count.
- ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI, Constant *RHS,
- const Loop *L,
- ICmpInst::Predicate p);
-
/// Compute the exit limit of a loop that is controlled by a
/// "(IV >> 1) != 0" type comparison. We cannot compute the exact trip
/// count in these cases (since SCEV has no way of expressing them), but we
@@ -1839,7 +1794,7 @@ private:
const SCEV *RHS,
ICmpInst::Predicate FoundPred,
const SCEV *FoundLHS, const SCEV *FoundRHS,
- const Instruction *Context);
+ const Instruction *CtxI);
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
@@ -1914,7 +1869,7 @@ private:
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS,
- const Instruction *Context);
+ const Instruction *CtxI);
/// Test whether the condition described by Pred, LHS, and RHS is true
/// whenever the condition described by Pred, FoundLHS, and FoundRHS is
@@ -1956,12 +1911,18 @@ private:
bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
SCEV::NoWrapFlags &Flags);
- /// Drop memoized information computed for S.
- void forgetMemoizedResults(const SCEV *S);
+ /// Drop memoized information for all \p SCEVs.
+ void forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs);
+
+ /// Helper for forgetMemoizedResults.
+ void forgetMemoizedResultsImpl(const SCEV *S);
/// Return an existing SCEV for V if there is one, otherwise return nullptr.
const SCEV *getExistingSCEV(Value *V);
+ /// Erase Value from ValueExprMap and ExprValueMap.
+ void eraseValueFromMap(Value *V);
+
/// Return false iff given SCEV contains a SCEVUnknown with NULL value-
/// pointer.
bool checkValidity(const SCEV *S) const;
@@ -1995,6 +1956,27 @@ private:
/// would trigger undefined behavior on overflow.
SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
+ /// Return a scope which provides an upper bound on the defining scope of
+ /// 'S'. Specifically, return the first instruction in said bounding scope.
+ /// Return nullptr if the scope is trivial (function entry).
+ /// (See scope definition rules associated with flag discussion above)
+ const Instruction *getNonTrivialDefiningScopeBound(const SCEV *S);
+
+ /// Return a scope which provides an upper bound on the defining scope for
+ /// a SCEV with the operands in Ops. The outparam Precise is set if the
+ /// bound found is a precise bound (i.e. must be the defining scope.)
+ const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
+ bool &Precise);
+
+ /// Wrapper around the above for cases which don't care if the bound
+ /// is precise.
+ const Instruction *getDefiningScopeBound(ArrayRef<const SCEV *> Ops);
+
+ /// Given two instructions in the same function, return true if we can
+ /// prove B must execute given A executes.
+ bool isGuaranteedToTransferExecutionTo(const Instruction *A,
+ const Instruction *B);
+
/// Return true if the SCEV corresponding to \p I is never poison. Proving
/// this is more complex than proving that just \p I is never poison, since
/// SCEV commons expressions across control flow, and you can have cases
@@ -2036,8 +2018,11 @@ private:
/// permitted by Start, End, and Stride. This is for loops of the form
/// {Start, +, Stride} LT End.
///
- /// Precondition: the induction variable is known to be positive. We *don't*
- /// assert these preconditions so please be careful.
+ /// Preconditions:
+ /// * the induction variable is known to be positive.
+ /// * the induction variable is assumed not to overflow (i.e. either it
+ /// actually doesn't, or we'd have to immediately execute UB)
+ /// We *don't* assert these preconditions so please be careful.
const SCEV *computeMaxBECountForLT(const SCEV *Start, const SCEV *Stride,
const SCEV *End, unsigned BitWidth,
bool IsSigned);
@@ -2072,31 +2057,20 @@ private:
/// an add rec on said loop.
void getUsedLoops(const SCEV *S, SmallPtrSetImpl<const Loop *> &LoopsUsed);
- /// Find all of the loops transitively used in \p S, and update \c LoopUsers
- /// accordingly.
- void addToLoopUseLists(const SCEV *S);
-
/// Try to match the pattern generated by getURemExpr(A, B). If successful,
/// Assign A and B to LHS and RHS, respectively.
bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
/// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
- /// `UniqueSCEVs`.
- ///
- /// The first component of the returned tuple is the SCEV if found and null
- /// otherwise. The second component is the `FoldingSetNodeID` that was
- /// constructed to look up the SCEV and the third component is the insertion
- /// point.
- std::tuple<SCEV *, FoldingSetNodeID, void *>
- findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
+ /// `UniqueSCEVs`. Return if found, else nullptr.
+ SCEV *findExistingSCEVInCache(SCEVTypes SCEVType, ArrayRef<const SCEV *> Ops);
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
BumpPtrAllocator SCEVAllocator;
- /// This maps loops to a list of SCEV expressions that (transitively) use said
- /// loop.
- DenseMap<const Loop *, SmallVector<const SCEV *, 4>> LoopUsers;
+ /// This maps loops to a list of addrecs that directly use said loop.
+ DenseMap<const Loop *, SmallVector<const SCEVAddRecExpr *, 4>> LoopUsers;
/// Cache tentative mappings from UnknownSCEVs in a Loop, to a SCEV expression
/// they can be rewritten into under certain predicates.
diff --git a/llvm/include/llvm/Analysis/StackLifetime.h b/llvm/include/llvm/Analysis/StackLifetime.h
index df342a9533ee..239aec4e258b 100644
--- a/llvm/include/llvm/Analysis/StackLifetime.h
+++ b/llvm/include/llvm/Analysis/StackLifetime.h
@@ -191,6 +191,8 @@ public:
StackLifetimePrinterPass(raw_ostream &OS, StackLifetime::LivenessType Type)
: Type(Type), OS(OS) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
index 59c1e3e3bd56..751735f3e59f 100644
--- a/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
+++ b/llvm/include/llvm/Analysis/StackSafetyAnalysis.h
@@ -75,7 +75,15 @@ public:
StackSafetyGlobalInfo &operator=(StackSafetyGlobalInfo &&);
~StackSafetyGlobalInfo();
+ // Whether we can prove that all accesses to this Alloca are in-range and
+ // during its lifetime.
bool isSafe(const AllocaInst &AI) const;
+
+ // Returns true if the instruction can be proven to do only two types of
+ // memory accesses:
+ // (1) live stack locations in-bounds or
+ // (2) non-stack locations.
+ bool stackAccessIsSafe(const Instruction &I) const;
void print(raw_ostream &O) const;
void dump() const;
};
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 22bfeda0efd0..6e3e1380535e 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -76,7 +76,7 @@ class TargetLibraryInfoImpl {
/// Return true if the function type FTy is valid for the library function
/// F, regardless of whether the function is available.
bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F,
- const DataLayout *DL) const;
+ const Module &M) const;
public:
/// List of known vector-functions libraries.
@@ -115,6 +115,8 @@ public:
///
/// If it is one of the known library functions, return true and set F to the
/// corresponding value.
+ ///
+ /// FDecl is assumed to have a parent Module when using this function.
bool getLibFunc(const Function &FDecl, LibFunc &F) const;
/// Forces a function to be marked as unavailable.
@@ -238,7 +240,7 @@ public:
else {
// Disable individual libc/libm calls in TargetLibraryInfo.
LibFunc LF;
- AttributeSet FnAttrs = (*F)->getAttributes().getFnAttributes();
+ AttributeSet FnAttrs = (*F)->getAttributes().getFnAttrs();
for (const Attribute &Attr : FnAttrs) {
if (!Attr.isStringAttribute())
continue;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 628058142e48..170d6b8f35ff 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,7 +21,6 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
-#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/InstructionCost.h"
#include <functional>
+#include <utility>
namespace llvm {
@@ -47,12 +47,14 @@ class ExtractElementInst;
class Function;
class GlobalValue;
class InstCombiner;
+class OptimizationRemarkEmitter;
class IntrinsicInst;
class LoadInst;
class LoopAccessInfo;
class Loop;
class LoopInfo;
class ProfileSummaryInfo;
+class RecurrenceDescriptor;
class SCEV;
class ScalarEvolution;
class StoreInst;
@@ -97,7 +99,7 @@ struct HardwareLoopInfo {
Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr;
- const SCEV *TripCount = nullptr;
+ const SCEV *ExitCount = nullptr;
IntegerType *CountType = nullptr;
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
// value in every iteration.
@@ -382,8 +384,15 @@ public:
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+ /// Return true if globals in this address space can have initializers other
+ /// than `undef`.
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
+
unsigned getAssumedAddrSpace(const Value *V) const;
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const;
+
/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
/// NewV, which has a different address space. This should happen for every
/// operand index that collectFlatAddressOperands returned for the intrinsic.
@@ -506,7 +515,8 @@ public:
/// transformation. The caller will initialize UP with the current
/// target-independent defaults.
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
- UnrollingPreferences &UP) const;
+ UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const;
/// Query the target whether it would be profitable to convert the given loop
/// into a hardware loop.
@@ -660,6 +670,9 @@ public:
/// Return true if the target supports masked expand load.
bool isLegalMaskedExpandLoad(Type *DataType) const;
+ /// Return true if we should be enabling ordered reductions for the target.
+ bool enableOrderedReductions() const;
+
/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This
@@ -907,6 +920,9 @@ public:
/// architectural maximum vector length, and None otherwise.
Optional<unsigned> getMaxVScale() const;
+ /// \return the value of vscale to tune the cost model for.
+ Optional<unsigned> getVScaleForTuning() const;
+
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
@@ -1094,8 +1110,8 @@ public:
/// is using a compare with the specified predicate as condition. When vector
/// types are passed, \p VecPred must be used for all lanes.
InstructionCost
- getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
- CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
+ getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
const Instruction *I = nullptr) const;
@@ -1104,6 +1120,16 @@ public:
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index = -1) const;
+ /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
+ /// \p ReplicationFactor times.
+ ///
+ /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+ /// <0,0,0,1,1,1,2,2,2,3,3,3>
+ InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind);
+
/// \return The cost of Load and Store instructions.
InstructionCost
getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
@@ -1452,13 +1478,18 @@ public:
virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const = 0;
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+ virtual bool
+ canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0;
virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+ virtual std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const = 0;
virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *OldV,
Value *NewV) const = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
- UnrollingPreferences &UP) = 0;
+ UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) = 0;
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
@@ -1505,6 +1536,7 @@ public:
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+ virtual bool enableOrderedReductions() = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
@@ -1563,6 +1595,7 @@ public:
virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() const = 0;
virtual Optional<unsigned> getMaxVScale() const = 0;
+ virtual Optional<unsigned> getVScaleForTuning() const = 0;
virtual bool shouldMaximizeVectorBandwidth() const = 0;
virtual ElementCount getMinimumVF(unsigned ElemWidth,
bool IsScalable) const = 0;
@@ -1623,6 +1656,12 @@ public:
const Instruction *I) = 0;
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
+
+ virtual InstructionCost
+ getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) = 0;
+
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
Align Alignment,
unsigned AddressSpace,
@@ -1730,8 +1769,8 @@ public:
InstructionCost
getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
- enum TargetTransformInfo::TargetCostKind CostKind) override {
- return Impl.getGEPCost(PointeeType, Ptr, Operands);
+ TargetTransformInfo::TargetCostKind CostKind) override {
+ return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
@@ -1775,10 +1814,20 @@ public:
return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
}
+ bool
+ canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
+ return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
+ }
+
unsigned getAssumedAddrSpace(const Value *V) const override {
return Impl.getAssumedAddrSpace(V);
}
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const override {
+ return Impl.getPredicatedAddrSpace(V);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const override {
return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
@@ -1788,8 +1837,9 @@ public:
return Impl.isLoweredToCall(F);
}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- UnrollingPreferences &UP) override {
- return Impl.getUnrollingPreferences(L, SE, UP);
+ UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) override {
+ return Impl.getUnrollingPreferences(L, SE, UP, ORE);
}
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) override {
@@ -1886,6 +1936,9 @@ public:
bool isLegalMaskedExpandLoad(Type *DataType) override {
return Impl.isLegalMaskedExpandLoad(DataType);
}
+ bool enableOrderedReductions() override {
+ return Impl.enableOrderedReductions();
+ }
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
@@ -2015,6 +2068,9 @@ public:
Optional<unsigned> getMaxVScale() const override {
return Impl.getMaxVScale();
}
+ Optional<unsigned> getVScaleForTuning() const override {
+ return Impl.getVScaleForTuning();
+ }
bool shouldMaximizeVectorBandwidth() const override {
return Impl.shouldMaximizeVectorBandwidth();
}
@@ -2115,6 +2171,13 @@ public:
unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
+ InstructionCost
+ getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ }
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index c07a33c9f155..05ef2495475f 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -24,6 +24,7 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
+#include <utility>
using namespace llvm::PatternMatch;
@@ -47,10 +48,9 @@ public:
const DataLayout &getDataLayout() const { return DL; }
- InstructionCost
- getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const {
+ InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) const {
// In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes.
for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -105,9 +105,17 @@ public:
}
bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+ return AS == 0;
+ };
unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const {
+ return std::make_pair(nullptr, -1);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const {
return nullptr;
@@ -187,7 +195,8 @@ public:
}
void getUnrollingPreferences(Loop *, ScalarEvolution &,
- TTI::UnrollingPreferences &) const {}
+ TTI::UnrollingPreferences &,
+ OptimizationRemarkEmitter *) const {}
void getPeelingPreferences(Loop *, ScalarEvolution &,
TTI::PeelingPreferences &) const {}
@@ -262,6 +271,8 @@ public:
bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
+ bool enableOrderedReductions() const { return false; }
+
bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
@@ -394,6 +405,7 @@ public:
unsigned getMinVectorRegisterBitWidth() const { return 128; }
Optional<unsigned> getMaxVScale() const { return None; }
+ Optional<unsigned> getVScaleForTuning() const { return None; }
bool shouldMaximizeVectorBandwidth() const { return false; }
@@ -539,6 +551,12 @@ public:
return 1;
}
+ unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ return 1;
+ }
+
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
TTI::TargetCostKind CostKind,
@@ -614,7 +632,8 @@ public:
return 1;
}
- unsigned getNumberOfParts(Type *Tp) const { return 0; }
+ // Assume that we have a register of the right size for the type.
+ unsigned getNumberOfParts(Type *Tp) const { return 1; }
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *,
const SCEV *) const {
@@ -632,9 +651,10 @@ public:
return 1;
}
- InstructionCost getExtendedAddReductionCost(
- bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const {
+ InstructionCost
+ getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy,
+ VectorType *Ty,
+ TTI::TargetCostKind CostKind) const {
return 1;
}
@@ -856,10 +876,9 @@ protected:
public:
using BaseT::getGEPCost;
- InstructionCost
- getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+ InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
assert(cast<PointerType>(Ptr->getType()->getScalarType())
->isOpaqueOrPointeeTypeMatches(PointeeType) &&
@@ -964,10 +983,10 @@ public:
return TTI::TCC_Free;
break;
case Instruction::GetElementPtr: {
- const GEPOperator *GEP = cast<GEPOperator>(U);
+ const auto *GEP = cast<GEPOperator>(U);
return TargetTTI->getGEPCost(GEP->getSourceElementType(),
GEP->getPointerOperand(),
- Operands.drop_front());
+ Operands.drop_front(), CostKind);
}
case Instruction::Add:
case Instruction::FAdd:
@@ -1063,58 +1082,94 @@ public:
auto *IE = dyn_cast<InsertElementInst>(U);
if (!IE)
return TTI::TCC_Basic; // FIXME
- auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
- unsigned Idx = CI ? CI->getZExtValue() : -1;
+ unsigned Idx = -1;
+ if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
+ if (CI->getValue().getActiveBits() <= 32)
+ Idx = CI->getZExtValue();
return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx);
}
case Instruction::ShuffleVector: {
auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
if (!Shuffle)
return TTI::TCC_Basic; // FIXME
+
auto *VecTy = cast<VectorType>(U->getType());
auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
+ int NumSubElts, SubIndex;
+
+ if (Shuffle->changesLength()) {
+ // Treat a 'subvector widening' as a free shuffle.
+ if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
+ return 0;
+
+ if (Shuffle->isExtractSubvectorMask(SubIndex))
+ return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
+ Shuffle->getShuffleMask(), SubIndex,
+ VecTy);
+
+ if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+ return TargetTTI->getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
+ SubIndex,
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
+ int ReplicationFactor, VF;
+ if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
+ APInt DemandedDstElts =
+ APInt::getNullValue(Shuffle->getShuffleMask().size());
+ for (auto I : enumerate(Shuffle->getShuffleMask())) {
+ if (I.value() != UndefMaskElem)
+ DemandedDstElts.setBit(I.index());
+ }
+ return TargetTTI->getReplicationShuffleCost(
+ VecSrcTy->getElementType(), ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ }
- // TODO: Identify and add costs for insert subvector, etc.
- int SubIndex;
- if (Shuffle->isExtractSubvectorMask(SubIndex))
- return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
- Shuffle->getShuffleMask(), SubIndex,
- VecTy);
- else if (Shuffle->changesLength())
return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
- else if (Shuffle->isIdentity())
+ }
+
+ if (Shuffle->isIdentity())
return 0;
- else if (Shuffle->isReverse())
+
+ if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isSelect())
+
+ if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isTranspose())
+
+ if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isZeroEltSplat())
+
+ if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
- else if (Shuffle->isSingleSource())
+
+ if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
+ if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
+ return TargetTTI->getShuffleCost(
+ TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex,
+ FixedVectorType::get(VecTy->getScalarType(), NumSubElts));
+
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
Shuffle->getShuffleMask(), 0, nullptr);
}
case Instruction::ExtractElement: {
- unsigned Idx = -1;
auto *EEI = dyn_cast<ExtractElementInst>(U);
if (!EEI)
return TTI::TCC_Basic; // FIXME
-
- auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1));
- if (CI)
- Idx = CI->getZExtValue();
-
- return TargetTTI->getVectorInstrCost(Opcode, U->getOperand(0)->getType(),
- Idx);
+ unsigned Idx = -1;
+ if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
+ if (CI->getValue().getActiveBits() <= 32)
+ Idx = CI->getZExtValue();
+ Type *DstTy = U->getOperand(0)->getType();
+ return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx);
}
}
// By default, just classify everything as 'basic'.
diff --git a/llvm/include/llvm/Analysis/TypeMetadataUtils.h b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
index 3f7603142900..074c40942b06 100644
--- a/llvm/include/llvm/Analysis/TypeMetadataUtils.h
+++ b/llvm/include/llvm/Analysis/TypeMetadataUtils.h
@@ -22,6 +22,7 @@ namespace llvm {
class CallBase;
class CallInst;
class Constant;
+class Function;
class DominatorTree;
class Instruction;
class Module;
@@ -56,7 +57,30 @@ void findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
const CallInst *CI, DominatorTree &DT);
-Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M);
-}
+/// Processes a Constant recursively looking into elements of arrays, structs
+/// and expressions to find a trivial pointer element that is located at the
+/// given offset (relative to the beginning of the whole outer Constant).
+///
+/// Used for example from GlobalDCE to find an entry in a C++ vtable that
+/// matches a vcall offset.
+///
+/// To support Swift vtables, getPointerAtOffset can see through "relative
+/// pointers", i.e. (sub-)expressions of the form of:
+///
+/// @symbol = ... {
+/// i32 trunc (i64 sub (
+/// i64 ptrtoint (<type> @target to i64), i64 ptrtoint (... @symbol to i64)
+/// ) to i32)
+/// }
+///
+/// For such (sub-)expressions, getPointerAtOffset returns the @target pointer.
+Constant *getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
+ Constant *TopLevelGlobal = nullptr);
+
+/// Finds the same "relative pointer" pattern as described above, where the
+/// target is `F`, and replaces the entire pattern with a constant zero.
+void replaceRelativePointerUsersWithZero(Function *F);
+
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index 47ee23e06000..1f6be0e60eb9 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -104,6 +104,9 @@ Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
struct LoggedFeatureSpec {
TensorSpec Spec;
Optional<std::string> LoggingName;
+ const std::string &getLoggingName() const {
+ return LoggingName ? *LoggingName : Spec.name();
+ }
};
/// Load the output specs. If SpecFileOverride is not empty, that path is used.
@@ -170,7 +173,9 @@ public:
// we can consider using bytes.
char *addEntryAndGetFloatOrInt64Buffer(size_t FeatureID);
- void print(raw_ostream &OS);
+ // Flush the content of the log to the stream, clearing the stored data in the
+ // process.
+ void flush(raw_ostream &OS);
private:
std::vector<LoggedFeatureSpec> FeatureSpecs;
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 90ec742f18e6..b4f38a3e976f 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -203,6 +203,15 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
const DominatorTree *DT = nullptr,
bool UseInstrInfo = true);
+ /// Get the minimum bit size for this Value \p Op as a signed integer.
+ /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+ /// Similar to the APInt::getMinSignedBits function.
+ unsigned ComputeMinSignedBits(const Value *Op, const DataLayout &DL,
+ unsigned Depth = 0,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr);
+
/// This function computes the integer multiple of Base that equals V. If
/// successful, it returns true and returns the multiple in Multiple. If
/// unsuccessful, it returns false. Also, if V can be simplified to an
@@ -549,6 +558,7 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true,
AssumptionCache *AC = nullptr,
const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
unsigned Depth = 0);
/// Return true if this function can prove that the instruction I will
@@ -573,6 +583,18 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// instruction variant of this function.
bool isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB);
+ /// Return true if every instruction in the range (Begin, End) is
+ /// guaranteed to transfer execution to its static successor. \p ScanLimit
+ /// bounds the search to avoid scanning huge blocks.
+ bool isGuaranteedToTransferExecutionToSuccessor(
+ BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
+ unsigned ScanLimit = 32);
+
+ /// Same as previous, but with range expressed via iterator_range.
+ bool isGuaranteedToTransferExecutionToSuccessor(
+ iterator_range<BasicBlock::const_iterator> Range,
+ unsigned ScanLimit = 32);
+
/// Return true if this function can prove that the instruction I
/// is executed for every iteration of the loop L.
///
@@ -624,10 +646,16 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// true. If Op raises immediate UB but never creates poison or undef
/// (e.g. sdiv I, 0), canCreatePoison returns false.
///
+ /// \p ConsiderFlags controls whether poison producing flags on the
+ /// instruction are considered. This can be used to see if the instruction
+ /// could still introduce undef or poison even without poison generating flags
+ /// which might be on the instruction. (i.e. could the result of
+ /// Op->dropPoisonGeneratingFlags() still create poison or undef)
+ ///
/// canCreatePoison returns true if Op can create poison from non-poison
/// operands.
- bool canCreateUndefOrPoison(const Operator *Op);
- bool canCreatePoison(const Operator *Op);
+ bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlags = true);
+ bool canCreatePoison(const Operator *Op, bool ConsiderFlags = true);
/// Return true if V is poison given that ValAssumedPoison is already poison.
/// For example, if ValAssumedPoison is `icmp X, 10` and V is `icmp X, 5`,
@@ -744,6 +772,10 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// minimum/maximum flavor.
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
+ /// Return the minimum or maximum constant value for the specified integer
+ /// min/max flavor and type.
+ APInt getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth);
+
/// Check if the values in \p VL are select instructions that can be converted
/// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
/// conversion is possible, together with a bool indicating whether all select
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index c890216c9e01..24e2318de48b 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -533,6 +533,12 @@ llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
llvm::SmallVector<int, 16>
createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
+/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle
+/// mask assuming both operands are identical. This assumes that the unary
+/// shuffle will use elements from operand 0 (operand 1 will be unused).
+llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask,
+ unsigned NumElts);
+
/// Concatenate a list of vectors.
///
/// This function generates code that concatenate the vectors in \p Vecs into a
@@ -686,10 +692,8 @@ public:
if (getMember(getFactor() - 1))
return false;
- // We have a group with gaps. It therefore cannot be a group of stores,
- // and it can't be a reversed access, because such groups get invalidated.
- assert(!getMember(0)->mayWriteToMemory() &&
- "Group should have been invalidated");
+ // We have a group with gaps. It therefore can't be a reversed access,
+ // because such groups get invalidated (TODO).
assert(!isReverse() && "Group should have been invalidated");
// This is a group of loads, with gaps, and without a last-member
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index c97d9781c33b..c30165e4a97b 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_ASMPARSER_LLLEXER_H
-#define LLVM_LIB_ASMPARSER_LLLEXER_H
+#ifndef LLVM_ASMPARSER_LLLEXER_H
+#define LLVM_ASMPARSER_LLLEXER_H
#include "LLToken.h"
#include "llvm/ADT/APFloat.h"
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index 70db9218fa3d..d621c232378c 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
-#define LLVM_LIB_ASMPARSER_LLPARSER_H
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
#include "LLLexer.h"
#include "llvm/ADT/Optional.h"
@@ -172,9 +172,8 @@ namespace llvm {
/// getGlobalVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc,
- bool IsCall);
- GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+ GlobalValue *getGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
+ GlobalValue *getGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
/// Get a Comdat with the specified name, creating a forward reference
/// record if needed.
@@ -270,7 +269,6 @@ namespace llvm {
bool parseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma);
bool parseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc,
bool &AteExtraComma);
- bool parseOptionalCommaInAlloca(bool &IsInAlloca);
bool parseAllocSizeArguments(unsigned &BaseSizeArg,
Optional<unsigned> &HowManyArg);
bool parseVScaleRangeArguments(unsigned &MinValue, unsigned &MaxValue);
@@ -306,11 +304,10 @@ namespace llvm {
unsigned DLLStorageClass, bool DSOLocal,
GlobalVariable::ThreadLocalMode TLM,
GlobalVariable::UnnamedAddr UnnamedAddr);
- bool parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
- unsigned L, unsigned Visibility,
- unsigned DLLStorageClass, bool DSOLocal,
- GlobalVariable::ThreadLocalMode TLM,
- GlobalVariable::UnnamedAddr UnnamedAddr);
+ bool parseAliasOrIFunc(const std::string &Name, LocTy NameLoc, unsigned L,
+ unsigned Visibility, unsigned DLLStorageClass,
+ bool DSOLocal, GlobalVariable::ThreadLocalMode TLM,
+ GlobalVariable::UnnamedAddr UnnamedAddr);
bool parseComdat();
bool parseStandaloneMetadata();
bool parseNamedMetadata();
@@ -424,8 +421,8 @@ namespace llvm {
/// GetVal - Get a value with the specified name or ID, creating a
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
- Value *getVal(const std::string &Name, Type *Ty, LocTy Loc, bool IsCall);
- Value *getVal(unsigned ID, Type *Ty, LocTy Loc, bool IsCall);
+ Value *getVal(const std::string &Name, Type *Ty, LocTy Loc);
+ Value *getVal(unsigned ID, Type *Ty, LocTy Loc);
/// setInstName - After an instruction is parsed and inserted into its
/// basic block, this installs its name.
@@ -447,10 +444,10 @@ namespace llvm {
};
bool convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
- PerFunctionState *PFS, bool IsCall);
+ PerFunctionState *PFS);
Value *checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
- Value *Val, bool IsCall);
+ Value *Val);
bool parseConstantValue(Type *Ty, Constant *&C);
bool parseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index aa49c68fe924..f8ca054863ac 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_ASMPARSER_LLTOKEN_H
-#define LLVM_LIB_ASMPARSER_LLTOKEN_H
+#ifndef LLVM_ASMPARSER_LLTOKEN_H
+#define LLVM_ASMPARSER_LLTOKEN_H
namespace llvm {
namespace lltok {
@@ -190,6 +190,7 @@ enum Kind {
kw_convergent,
kw_dereferenceable,
kw_dereferenceable_or_null,
+ kw_disable_sanitizer_instrumentation,
kw_elementtype,
kw_inaccessiblememonly,
kw_inaccessiblemem_or_argmemonly,
@@ -403,6 +404,9 @@ enum Kind {
kw_returnDoesNotAlias,
kw_noInline,
kw_alwaysInline,
+ kw_noUnwind,
+ kw_mayThrow,
+ kw_hasUnknownCall,
kw_calls,
kw_callee,
kw_params,
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index 34f124b5779a..61f3f27ebb47 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -248,6 +248,9 @@ HANDLE_DW_TAG(0x5103, ALTIUM_rev_carry_type, 0, ALTIUM, DW_KIND_NONE)
// M16 __rom qualifier
HANDLE_DW_TAG(0x5111, ALTIUM_rom, 0, ALTIUM, DW_KIND_NONE)
+// LLVM
+HANDLE_DW_TAG(0x6000, LLVM_annotation, 0, LLVM, DW_KIND_NONE)
+
// Green Hills.
HANDLE_DW_TAG(0x8004, GHS_namespace, 0, GHS, DW_KIND_NONE)
HANDLE_DW_TAG(0x8005, GHS_using_namespace, 0, GHS, DW_KIND_NONE)
diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def
index c08f8a53bdb5..814d8b113ec4 100644
--- a/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -31,6 +31,11 @@
#define PPC64_DYNAMIC_TAG_DEFINED
#endif
+#ifndef RISCV_DYNAMIC_TAG
+#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#define RISCV_DYNAMIC_TAG_DEFINED
+#endif
+
#ifndef DYNAMIC_TAG_MARKER
#define DYNAMIC_TAG_MARKER(name, value) DYNAMIC_TAG(name, value)
#define DYNAMIC_TAG_MARKER_DEFINED
@@ -213,6 +218,9 @@ PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
// first glink lazy resolver stub.
+// RISC-V specific dynamic array tags.
+RISCV_DYNAMIC_TAG(RISCV_VARIANT_CC, 0x70000001)
+
// Sun machine-independent extensions.
DYNAMIC_TAG(AUXILIARY, 0x7FFFFFFD) // Shared object to load before self
DYNAMIC_TAG(USED, 0x7FFFFFFE) // Same as DT_NEEDED
@@ -243,3 +251,7 @@ DYNAMIC_TAG(FILTER, 0x7FFFFFFF) // Shared object to get values from
#undef PPC64_DYNAMIC_TAG
#undef PPC64_DYNAMIC_TAG_DEFINED
#endif
+#ifdef RISCV_DYNAMIC_TAG_DEFINED
+#undef RISCV_DYNAMIC_TAG
+#undef RISCV_DYNAMIC_TAG_DEFINED
+#endif
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6148f968cdba..a270fd399aeb 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -660,6 +660,12 @@ enum {
#include "ELFRelocs/RISCV.def"
};
+enum {
+ // Symbol may follow different calling convention than the standard calling
+ // convention.
+ STO_RISCV_VARIANT_CC = 0x80
+};
+
// ELF Relocation types for S390/zSeries
enum {
#include "ELFRelocs/SystemZ.def"
@@ -1596,6 +1602,16 @@ enum {
NT_FREEBSD_PROCSTAT_AUXV = 16,
};
+// OpenBSD core note types.
+enum {
+ NT_OPENBSD_PROCINFO = 10,
+ NT_OPENBSD_AUXV = 11,
+ NT_OPENBSD_REGS = 20,
+ NT_OPENBSD_FPREGS = 21,
+ NT_OPENBSD_XFPREGS = 22,
+ NT_OPENBSD_WCOOKIE = 23,
+};
+
// AMDGPU-specific section indices.
enum {
SHN_AMDGPU_LDS = 0xff00, // Variable in LDS; symbol encoded like SHN_COMMON
@@ -1618,6 +1634,13 @@ enum {
NT_AMDGPU_METADATA = 32
};
+// LLVMOMPOFFLOAD specific notes.
+enum : unsigned {
+ NT_LLVM_OPENMP_OFFLOAD_VERSION = 1,
+ NT_LLVM_OPENMP_OFFLOAD_PRODUCER = 2,
+ NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION = 3
+};
+
enum {
GNU_ABI_TAG_LINUX = 0,
GNU_ABI_TAG_HURD = 1,
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
index 9f2f0540bcbd..454450950444 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
@@ -46,10 +46,6 @@ ELF_RELOC(R_RISCV_ALIGN, 43)
ELF_RELOC(R_RISCV_RVC_BRANCH, 44)
ELF_RELOC(R_RISCV_RVC_JUMP, 45)
ELF_RELOC(R_RISCV_RVC_LUI, 46)
-ELF_RELOC(R_RISCV_GPREL_I, 47)
-ELF_RELOC(R_RISCV_GPREL_S, 48)
-ELF_RELOC(R_RISCV_TPREL_I, 49)
-ELF_RELOC(R_RISCV_TPREL_S, 50)
ELF_RELOC(R_RISCV_RELAX, 51)
ELF_RELOC(R_RISCV_SUB6, 52)
ELF_RELOC(R_RISCV_SET6, 53)
diff --git a/llvm/include/llvm/BinaryFormat/MachO.def b/llvm/include/llvm/BinaryFormat/MachO.def
index 76dcc58ba048..f68ecefa6c9e 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.def
+++ b/llvm/include/llvm/BinaryFormat/MachO.def
@@ -74,6 +74,8 @@ HANDLE_LOAD_COMMAND(LC_VERSION_MIN_TVOS, 0x0000002Fu, version_min_command)
HANDLE_LOAD_COMMAND(LC_VERSION_MIN_WATCHOS, 0x00000030u, version_min_command)
HANDLE_LOAD_COMMAND(LC_NOTE, 0x00000031u, note_command)
HANDLE_LOAD_COMMAND(LC_BUILD_VERSION, 0x00000032u, build_version_command)
+HANDLE_LOAD_COMMAND(LC_DYLD_EXPORTS_TRIE, 0x80000033u, linkedit_data_command)
+HANDLE_LOAD_COMMAND(LC_DYLD_CHAINED_FIXUPS, 0x80000034u, linkedit_data_command)
#endif
diff --git a/llvm/include/llvm/BinaryFormat/Wasm.h b/llvm/include/llvm/BinaryFormat/Wasm.h
index c38e64928521..0bc8c4e167d8 100644
--- a/llvm/include/llvm/BinaryFormat/Wasm.h
+++ b/llvm/include/llvm/BinaryFormat/Wasm.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file defines manifest constants for the wasm object file format.
-// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
//
//===----------------------------------------------------------------------===//
@@ -36,12 +36,25 @@ struct WasmObjectHeader {
uint32_t Version;
};
+struct WasmDylinkImportInfo {
+ StringRef Module;
+ StringRef Field;
+ uint32_t Flags;
+};
+
+struct WasmDylinkExportInfo {
+ StringRef Name;
+ uint32_t Flags;
+};
+
struct WasmDylinkInfo {
uint32_t MemorySize; // Memory size in bytes
uint32_t MemoryAlignment; // P2 alignment of memory
uint32_t TableSize; // Table size in elements
uint32_t TableAlignment; // P2 alignment of table
std::vector<StringRef> Needed; // Shared library dependencies
+ std::vector<WasmDylinkImportInfo> ImportInfo;
+ std::vector<WasmDylinkExportInfo> ExportInfo;
};
struct WasmProducerInfo {
@@ -101,15 +114,9 @@ struct WasmGlobal {
StringRef SymbolName; // from the "linking" section
};
-struct WasmTagType {
- // Kind of tag. Currently only WASM_TAG_ATTRIBUTE_EXCEPTION is possible.
- uint8_t Attribute;
- uint32_t SigIndex;
-};
-
struct WasmTag {
uint32_t Index;
- WasmTagType Type;
+ uint32_t SigIndex;
StringRef SymbolName; // from the "linking" section
};
@@ -122,7 +129,6 @@ struct WasmImport {
WasmGlobalType Global;
WasmTableType Table;
WasmLimits Memory;
- WasmTagType Tag;
};
};
@@ -133,6 +139,7 @@ struct WasmLocalDecl {
struct WasmFunction {
uint32_t Index;
+ uint32_t SigIndex;
std::vector<WasmLocalDecl> Locals;
ArrayRef<uint8_t> Body;
uint32_t CodeSectionOffset;
@@ -284,11 +291,14 @@ enum : unsigned {
// Opcodes used in synthetic functions.
enum : unsigned {
- WASM_OPCODE_IF = 0x04,
- WASM_OPCODE_ELSE = 0x05,
+ WASM_OPCODE_BLOCK = 0x02,
+ WASM_OPCODE_BR = 0x0c,
+ WASM_OPCODE_BR_TABLE = 0x0e,
+ WASM_OPCODE_RETURN = 0x0f,
WASM_OPCODE_DROP = 0x1a,
WASM_OPCODE_MISC_PREFIX = 0xfc,
WASM_OPCODE_MEMORY_INIT = 0x08,
+ WASM_OPCODE_MEMORY_FILL = 0x0b,
WASM_OPCODE_DATA_DROP = 0x09,
WASM_OPCODE_ATOMICS_PREFIX = 0xfe,
WASM_OPCODE_ATOMIC_NOTIFY = 0x00,
@@ -339,6 +349,14 @@ enum : unsigned {
WASM_SYMBOL_TABLE = 0x8,
};
+// Kind codes used in the custom "dylink" section
+enum : unsigned {
+ WASM_DYLINK_MEM_INFO = 0x1,
+ WASM_DYLINK_NEEDED = 0x2,
+ WASM_DYLINK_EXPORT_INFO = 0x3,
+ WASM_DYLINK_IMPORT_INFO = 0x4,
+};
+
// Kind codes used in the custom "linking" section in the WASM_COMDAT_INFO
enum : unsigned {
WASM_COMDAT_DATA = 0x0,
@@ -379,6 +397,7 @@ const unsigned WASM_SYMBOL_UNDEFINED = 0x10;
const unsigned WASM_SYMBOL_EXPORTED = 0x20;
const unsigned WASM_SYMBOL_EXPLICIT_NAME = 0x40;
const unsigned WASM_SYMBOL_NO_STRIP = 0x80;
+const unsigned WASM_SYMBOL_TLS = 0x100;
#define WASM_RELOC(name, value) name = value,
diff --git a/llvm/include/llvm/BinaryFormat/WasmTraits.h b/llvm/include/llvm/BinaryFormat/WasmTraits.h
index 930ee690bcc0..bef9dd3291ca 100644
--- a/llvm/include/llvm/BinaryFormat/WasmTraits.h
+++ b/llvm/include/llvm/BinaryFormat/WasmTraits.h
@@ -18,10 +18,8 @@
namespace llvm {
-template <typename T> struct DenseMapInfo;
-
// Traits for using WasmSignature in a DenseMap.
-template <> struct DenseMapInfo<wasm::WasmSignature> {
+template <> struct DenseMapInfo<wasm::WasmSignature, void> {
static wasm::WasmSignature getEmptyKey() {
wasm::WasmSignature Sig;
Sig.State = wasm::WasmSignature::Empty;
@@ -47,7 +45,7 @@ template <> struct DenseMapInfo<wasm::WasmSignature> {
};
// Traits for using WasmGlobalType in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmGlobalType> {
+template <> struct DenseMapInfo<wasm::WasmGlobalType, void> {
static wasm::WasmGlobalType getEmptyKey() {
return wasm::WasmGlobalType{1, true};
}
@@ -64,7 +62,7 @@ template <> struct DenseMapInfo<wasm::WasmGlobalType> {
};
// Traits for using WasmLimits in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmLimits> {
+template <> struct DenseMapInfo<wasm::WasmLimits, void> {
static wasm::WasmLimits getEmptyKey() {
return wasm::WasmLimits{0xff, 0xff, 0xff};
}
@@ -86,19 +84,19 @@ template <> struct DenseMapInfo<wasm::WasmLimits> {
};
// Traits for using WasmTableType in a DenseMap
-template <> struct DenseMapInfo<wasm::WasmTableType> {
+template <> struct DenseMapInfo<wasm::WasmTableType, void> {
static wasm::WasmTableType getEmptyKey() {
- return wasm::WasmTableType{0,
- DenseMapInfo<wasm::WasmLimits>::getEmptyKey()};
+ return wasm::WasmTableType{
+ 0, DenseMapInfo<wasm::WasmLimits, void>::getEmptyKey()};
}
static wasm::WasmTableType getTombstoneKey() {
return wasm::WasmTableType{
- 1, DenseMapInfo<wasm::WasmLimits>::getTombstoneKey()};
+ 1, DenseMapInfo<wasm::WasmLimits, void>::getTombstoneKey()};
}
static unsigned getHashValue(const wasm::WasmTableType &TableType) {
return hash_combine(
TableType.ElemType,
- DenseMapInfo<wasm::WasmLimits>::getHashValue(TableType.Limits));
+ DenseMapInfo<wasm::WasmLimits, void>::getHashValue(TableType.Limits));
}
static bool isEqual(const wasm::WasmTableType &LHS,
const wasm::WasmTableType &RHS) {
diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h
index 8a42d26f3f4a..cffd8618f1e3 100644
--- a/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -28,9 +28,14 @@ namespace XCOFF {
constexpr size_t FileNamePadSize = 6;
constexpr size_t NameSize = 8;
constexpr size_t FileHeaderSize32 = 20;
+constexpr size_t FileHeaderSize64 = 24;
+constexpr size_t AuxFileHeaderSize32 = 72;
+constexpr size_t AuxFileHeaderSize64 = 110;
constexpr size_t SectionHeaderSize32 = 40;
+constexpr size_t SectionHeaderSize64 = 72;
constexpr size_t SymbolTableEntrySize = 18;
constexpr size_t RelocationSerializationSize32 = 10;
+constexpr size_t RelocationSerializationSize64 = 14;
constexpr uint16_t RelocOverflow = 65535;
constexpr uint8_t AllocRegNo = 31;
@@ -38,6 +43,17 @@ enum ReservedSectionNum : int16_t { N_DEBUG = -2, N_ABS = -1, N_UNDEF = 0 };
enum MagicNumber : uint16_t { XCOFF32 = 0x01DF, XCOFF64 = 0x01F7 };
+// This field only exists in the XCOFF64 definition.
+enum AuxHeaderFlags64 : uint16_t {
+ SHR_SYMTAB = 0x8000, ///< At exec time, create shared symbol table for program
+ ///< (main program only).
+ FORK_POLICY = 0x4000, ///< Forktree policy specified (main program only).
+ FORK_COR = 0x2000 ///< If _AOUT_FORK_POLICY is set, specify copy-on-reference
+ ///< if this bit is set. Specify copy-on- write otherwise.
+ ///< If _AOUT_FORK_POLICY is 0, this bit is reserved for
+ ///< future use and should be set to 0.
+};
+
// x_smclas field of x_csect from system header: /usr/include/syms.h
/// Storage Mapping Class definitions.
enum StorageMappingClass : uint8_t {
diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
index de828be3bf1b..f6fc284da33f 100644
--- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
+++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -42,6 +42,8 @@ struct BCDumpOptions {
bool Symbolic = false;
/// Print binary blobs using hex escapes.
bool ShowBinaryBlobs = false;
+ /// Print BLOCKINFO block details.
+ bool DumpBlockinfo = false;
BCDumpOptions(raw_ostream &OS) : OS(OS) {}
};
diff --git a/llvm/include/llvm/Bitcode/BitcodeCommon.h b/llvm/include/llvm/Bitcode/BitcodeCommon.h
index 6a3e74550bc4..22d1872fe49c 100644
--- a/llvm/include/llvm/Bitcode/BitcodeCommon.h
+++ b/llvm/include/llvm/Bitcode/BitcodeCommon.h
@@ -19,10 +19,14 @@
namespace llvm {
struct AllocaPackedValues {
- using Align = Bitfield::Element<unsigned, 0, 5>;
- using UsedWithInAlloca = Bitfield::Element<bool, Align::NextBit, 1>;
+ // We increased the number of bits needed to represent alignment to be more
+ // than 5, but to preserve backward compatibility we store the upper bits
+ // separately.
+ using AlignLower = Bitfield::Element<unsigned, 0, 5>;
+ using UsedWithInAlloca = Bitfield::Element<bool, AlignLower::NextBit, 1>;
using ExplicitType = Bitfield::Element<bool, UsedWithInAlloca::NextBit, 1>;
using SwiftError = Bitfield::Element<bool, ExplicitType::NextBit, 1>;
+ using AlignUpper = Bitfield::Element<unsigned, SwiftError::NextBit, 3>;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 28870afb2fcb..04eb2739cbd5 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -671,6 +671,7 @@ enum AttributeKindCodes {
ATTR_KIND_SWIFT_ASYNC = 75,
ATTR_KIND_NO_SANITIZE_COVERAGE = 76,
ATTR_KIND_ELEMENTTYPE = 77,
+ ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION = 78,
};
enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index bdfb416d9bd9..60442326d6c7 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -104,9 +104,12 @@ ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC);
/// getICmpCondCode - Return the ISD condition code corresponding to
/// the given LLVM IR integer condition code.
-///
ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
+/// getICmpCondCode - Return the LLVM IR integer condition code
+/// corresponding to the given ISD integer condition code.
+ICmpInst::Predicate getICmpCondCode(ISD::CondCode Pred);
+
/// Test if the given instruction is in a position to be optimized
/// with a tail-call. This roughly means that it's in a block with
/// a return and there's nothing that needs to be scheduled
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 5dea86e67d64..d7d3692877de 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -41,7 +41,6 @@ class DIEAbbrev;
class DwarfDebug;
class GCMetadataPrinter;
class GCStrategy;
-class GlobalIndirectSymbol;
class GlobalObject;
class GlobalValue;
class GlobalVariable;
@@ -708,7 +707,7 @@ public:
/// ${:comment}. Targets can override this to add support for their own
/// strange codes.
virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
- const char *Code) const;
+ StringRef Code) const;
/// Print the MachineOperand as a symbol. Targets with complex handling of
/// symbol references should override the base implementation.
@@ -795,8 +794,8 @@ private:
void emitModuleCommandLines(Module &M);
GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &S);
- /// Emit GlobalAlias or GlobalIFunc.
- void emitGlobalIndirectSymbol(Module &M, const GlobalIndirectSymbol &GIS);
+ void emitGlobalAlias(Module &M, const GlobalAlias &GA);
+ void emitGlobalIFunc(Module &M, const GlobalIFunc &GI);
/// This method decides whether the specified basic block requires a label.
bool shouldEmitLabelForBasicBlock(const MachineBasicBlock &MBB) const;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index e3b834ec42c3..324b7dcfb3ac 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/CodeGen/ISDOpcodes.h"
@@ -282,6 +283,11 @@ public:
return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
}
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const {
+ return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const {
return nullptr;
@@ -363,8 +369,9 @@ public:
}
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands) {
- return BaseT::getGEPCost(PointeeType, Ptr, Operands);
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -484,7 +491,8 @@ public:
int getInlinerVectorBonusPercent() { return 150; }
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// This unrolling functionality is target independent, but to provide some
// motivation for its intended use, for x86:
@@ -526,6 +534,15 @@ public:
continue;
}
+ if (ORE) {
+ ORE->emit([&]() {
+ return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
+ L->getHeader())
+ << "advising against unrolling the loop because it "
+ "contains a "
+ << ore::NV("Call", &I);
+ });
+ }
return;
}
}
@@ -653,6 +670,7 @@ public:
}
Optional<unsigned> getMaxVScale() const { return None; }
+ Optional<unsigned> getVScaleForTuning() const { return None; }
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
@@ -686,7 +704,7 @@ public:
bool Extract) {
auto *Ty = cast<FixedVectorType>(InTy);
- APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements());
+ APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
@@ -737,8 +755,7 @@ public:
unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -1102,6 +1119,39 @@ public:
return LT.first;
}
+ InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
+ "Unexpected size of DemandedDstElts.");
+
+ InstructionCost Cost;
+
+ auto *SrcVT = FixedVectorType::get(EltTy, VF);
+ auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
+
+ // The Mask shuffling cost is extract all the elements of the Mask
+ // and insert each of them Factor times into the wide vector:
+ //
+ // E.g. an interleaved group with factor 3:
+ // %mask = icmp ult <8 x i32> %vec1, %vec2
+ // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
+ // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
+ // The cost is estimated as extract all mask elements from the <8xi1> mask
+ // vector and insert them factor times into the <24xi1> shuffled mask
+ // vector.
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
+ Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
+ /*Insert*/ false,
+ /*Extract*/ true);
+ Cost +=
+ thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
+ /*Insert*/ true, /*Extract*/ false);
+
+ return Cost;
+ }
+
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
@@ -1201,9 +1251,9 @@ public:
// used (those corresponding to elements [0:1] and [8:9] of the unlegalized
// type). The other loads are unused.
//
- // We only scale the cost of loads since interleaved store groups aren't
- // allowed to have gaps.
- if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
+ // TODO: Note that legalization can turn masked loads/stores into unmasked
+ // (legalized) loads/stores. This can be reflected in the cost.
+ if (Cost.isValid() && VecTySize > VecTyLTSize) {
// The number of loads of a legal type it will take to represent a load
// of the unlegalized vector type.
unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
@@ -1220,10 +1270,24 @@ public:
// Scale the cost of the load by the fraction of legal instructions that
// will be used.
- Cost *= UsedInsts.count() / NumLegalInsts;
+ Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(),
+ NumLegalInsts);
}
// Then plus the cost of interleave operation.
+ assert(Indices.size() <= Factor &&
+ "Interleaved memory op has too many members");
+
+ const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
+ const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
+
+ APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
+ for (unsigned Index : Indices) {
+ assert(Index < Factor && "Invalid index for interleaved memory op");
+ for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
+ DemandedLoadStoreElts.setBit(Index + Elm * Factor);
+ }
+
if (Opcode == Instruction::Load) {
// The interleave cost is similar to extract sub vectors' elements
// from the wide vector, and insert them into sub vectors.
@@ -1233,79 +1297,56 @@ public:
// %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
// The cost is estimated as extract elements at 0, 2, 4, 6 from the
// <8 x i32> vector and insert them into a <4 x i32> vector.
-
- assert(Indices.size() <= Factor &&
- "Interleaved memory op has too many members");
-
- for (unsigned Index : Indices) {
- assert(Index < Factor && "Invalid index for interleaved memory op");
-
- // Extract elements from loaded vector for each sub vector.
- for (unsigned i = 0; i < NumSubElts; i++)
- Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
- Index + i * Factor);
- }
-
- InstructionCost InsSubCost = 0;
- for (unsigned i = 0; i < NumSubElts; i++)
- InsSubCost +=
- thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i);
-
+ InstructionCost InsSubCost =
+ thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
+ /*Insert*/ true, /*Extract*/ false);
Cost += Indices.size() * InsSubCost;
+ Cost +=
+ thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+ /*Insert*/ false, /*Extract*/ true);
} else {
- // The interleave cost is extract all elements from sub vectors, and
+ // The interleave cost is extract elements from sub vectors, and
// insert them into the wide vector.
//
- // E.g. An interleaved store of factor 2:
- // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
- // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
- // The cost is estimated as extract all elements from both <4 x i32>
- // vectors and insert into the <8 x i32> vector.
-
- InstructionCost ExtSubCost = 0;
- for (unsigned i = 0; i < NumSubElts; i++)
- ExtSubCost +=
- thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
- Cost += ExtSubCost * Factor;
-
- for (unsigned i = 0; i < NumElts; i++)
- Cost += static_cast<T *>(this)
- ->getVectorInstrCost(Instruction::InsertElement, VT, i);
+ // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
+ // (using VF=4):
+ // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
+ // %gaps.mask = <true, true, false, true, true, false,
+ // true, true, false, true, true, false>
+ // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
+ // i32 Align, <12 x i1> %gaps.mask
+ // The cost is estimated as extract all elements (of actual members,
+ // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
+ // i32> vector.
+ InstructionCost ExtSubCost =
+ thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
+ /*Insert*/ false, /*Extract*/ true);
+ Cost += ExtSubCost * Indices.size();
+ Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
+ /*Insert*/ true,
+ /*Extract*/ false);
}
if (!UseMaskForCond)
return Cost;
Type *I8Type = Type::getInt8Ty(VT->getContext());
- auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
- SubVT = FixedVectorType::get(I8Type, NumSubElts);
-
- // The Mask shuffling cost is extract all the elements of the Mask
- // and insert each of them Factor times into the wide vector:
- //
- // E.g. an interleaved group with factor 3:
- // %mask = icmp ult <8 x i32> %vec1, %vec2
- // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
- // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
- // The cost is estimated as extract all mask elements from the <8xi1> mask
- // vector and insert them factor times into the <24xi1> shuffled mask
- // vector.
- for (unsigned i = 0; i < NumSubElts; i++)
- Cost +=
- thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
- for (unsigned i = 0; i < NumElts; i++)
- Cost +=
- thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i);
+ Cost += thisT()->getReplicationShuffleCost(
+ I8Type, Factor, NumSubElts,
+ UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
+ CostKind);
// The Gaps mask is invariant and created outside the loop, therefore the
// cost of creating it is not accounted for here. However if we have both
// a MaskForGaps and some other mask that guards the execution of the
// memory access, we need to account for the cost of And-ing the two masks
// inside the loop.
- if (UseMaskForGaps)
+ if (UseMaskForGaps) {
+ auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
CostKind);
+ }
return Cost;
}
@@ -1460,10 +1501,10 @@ public:
Type *CondTy = RetTy->getWithNewBitWidth(1);
Cost +=
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ CmpInst::ICMP_EQ, CostKind);
Cost +=
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ CmpInst::ICMP_EQ, CostKind);
}
return Cost;
}
@@ -1689,26 +1730,34 @@ public:
return thisT()->getMinMaxReductionCost(
VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
/*IsUnsigned=*/true, CostKind);
- case Intrinsic::abs:
+ case Intrinsic::abs: {
+ // abs(X) = select(icmp(X,0),X,sub(0,X))
+ Type *CondTy = RetTy->getWithNewBitWidth(1);
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
+ InstructionCost Cost = 0;
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ Pred, CostKind);
+ // TODO: Should we add an OperandValueProperties::OP_Zero property?
+ Cost += thisT()->getArithmeticInstrCost(
+ BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+ return Cost;
+ }
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
case Intrinsic::umin: {
- // abs(X) = select(icmp(X,0),X,sub(0,X))
// minmax(X,Y) = select(icmp(X,Y),X,Y)
Type *CondTy = RetTy->getWithNewBitWidth(1);
+ bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
+ CmpInst::Predicate Pred =
+ IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
InstructionCost Cost = 0;
- // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- // TODO: Should we add an OperandValueProperties::OP_Zero property?
- if (IID == Intrinsic::abs)
- Cost += thisT()->getArithmeticInstrCost(
- BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
+ Pred, CostKind);
return Cost;
}
case Intrinsic::sadd_sat:
@@ -1719,6 +1768,7 @@ public:
Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
? Intrinsic::sadd_with_overflow
: Intrinsic::ssub_with_overflow;
+ CmpInst::Predicate Pred = CmpInst::ICMP_SGT;
// SatMax -> Overflow && SumDiff < 0
// SatMin -> Overflow && SumDiff >= 0
@@ -1726,12 +1776,10 @@ public:
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
nullptr, ScalarizationCostPassed);
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- Cost += 2 * thisT()->getCmpSelInstrCost(
- BinaryOperator::Select, RetTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
+ Pred, CostKind);
+ Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
+ CondTy, Pred, CostKind);
return Cost;
}
case Intrinsic::uadd_sat:
@@ -1784,23 +1832,16 @@ public:
? BinaryOperator::Add
: BinaryOperator::Sub;
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
- //
// Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Overflow -> (Result < LHS) ^ (RHS < 0)
// Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ // Overflow -> (Result < LHS) ^ (RHS > 0)
InstructionCost Cost = 0;
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
- Cost += 3 * thisT()->getCmpSelInstrCost(
- Instruction::ICmp, SumTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
Cost += 2 * thisT()->getCmpSelInstrCost(
- Instruction::Select, OverflowTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
+ Instruction::ICmp, SumTy, OverflowTy,
+ CmpInst::ICMP_SGT, CostKind);
+ Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
CostKind);
return Cost;
}
@@ -1811,12 +1852,15 @@ public:
unsigned Opcode = IID == Intrinsic::uadd_with_overflow
? BinaryOperator::Add
: BinaryOperator::Sub;
+ CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
+ ? CmpInst::ICMP_ULT
+ : CmpInst::ICMP_UGT;
InstructionCost Cost = 0;
Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
Cost +=
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Pred, CostKind);
return Cost;
}
case Intrinsic::smul_with_overflow:
@@ -1825,9 +1869,9 @@ public:
Type *OverflowTy = RetTy->getContainedType(1);
unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
+ bool IsSigned = IID == Intrinsic::smul_with_overflow;
- unsigned ExtOp =
- IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
+ unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
TTI::CastContextHint CCH = TTI::CastContextHint::None;
InstructionCost Cost = 0;
@@ -1836,18 +1880,17 @@ public:
thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
CCH, CostKind);
- Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
+ Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue);
- if (IID == Intrinsic::smul_with_overflow)
+ if (IsSigned)
Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
CostKind, TTI::OK_AnyValue,
TTI::OK_UniformConstantValue);
- Cost +=
- thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(
+ BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
return Cost;
}
case Intrinsic::ctpop:
@@ -1974,16 +2017,16 @@ public:
/// \param RetTy Return value types.
/// \param Tys Argument types.
/// \returns The cost of Call instruction.
- InstructionCost
- getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
+ InstructionCost getCallInstrCost(Function *F, Type *RetTy,
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) {
return 10;
}
unsigned getNumberOfParts(Type *Tp) {
std::pair<InstructionCost, MVT> LT =
getTLI()->getTypeLegalizationCost(DL, Tp);
- return *LT.first.getValue();
+ return LT.first.isValid() ? *LT.first.getValue() : 0;
}
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *,
@@ -2060,7 +2103,8 @@ public:
// By default reductions need one shuffle per reduction level.
ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
- ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
+ ArithCost +=
+ NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
return ShuffleCost + ArithCost +
thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
}
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
new file mode 100644
index 000000000000..270f935b6738
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -0,0 +1,219 @@
+//===- CodeGenCommonISel.h - Common code between ISels ---------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares common utilities that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CODEGENCOMMONISEL_H
+#define LLVM_CODEGEN_CODEGENCOMMONISEL_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <cassert>
+namespace llvm {
+
+class BasicBlock;
+class MachineBasicBlock;
+/// Encapsulates all of the information needed to generate a stack protector
+/// check, and signals to isel when initialized that one needs to be generated.
+///
+/// *NOTE* The following is a high level documentation of SelectionDAG Stack
+/// Protector Generation. This is now also ported be shared with GlobalISel,
+/// but without any significant changes.
+///
+/// High Level Overview of ISel Stack Protector Generation:
+///
+/// Previously, the "stack protector" IR pass handled stack protector
+/// generation. This necessitated splitting basic blocks at the IR level to
+/// create the success/failure basic blocks in the tail of the basic block in
+/// question. As a result of this, calls that would have qualified for the
+/// sibling call optimization were no longer eligible for optimization since
+/// said calls were no longer right in the "tail position" (i.e. the immediate
+/// predecessor of a ReturnInst instruction).
+///
+/// Since the sibling call optimization causes the callee to reuse the caller's
+/// stack, if we could delay the generation of the stack protector check until
+/// later in CodeGen after the sibling call decision was made, we get both the
+/// tail call optimization and the stack protector check!
+///
+/// A few goals in solving this problem were:
+///
+/// 1. Preserve the architecture independence of stack protector generation.
+///
+/// 2. Preserve the normal IR level stack protector check for platforms like
+/// OpenBSD for which we support platform-specific stack protector
+/// generation.
+///
+/// The main problem that guided the present solution is that one can not
+/// solve this problem in an architecture independent manner at the IR level
+/// only. This is because:
+///
+/// 1. The decision on whether or not to perform a sibling call on certain
+/// platforms (for instance i386) requires lower level information
+/// related to available registers that can not be known at the IR level.
+///
+/// 2. Even if the previous point were not true, the decision on whether to
+/// perform a tail call is done in LowerCallTo in SelectionDAG (or
+/// CallLowering in GlobalISel) which occurs after the Stack Protector
+/// Pass. As a result, one would need to put the relevant callinst into the
+/// stack protector check success basic block (where the return inst is
+/// placed) and then move it back later at ISel/MI time before the
+/// stack protector check if the tail call optimization failed. The MI
+/// level option was nixed immediately since it would require
+/// platform-specific pattern matching. The ISel level option was
+/// nixed because SelectionDAG only processes one IR level basic block at a
+/// time implying one could not create a DAG Combine to move the callinst.
+///
+/// To get around this problem:
+///
+/// 1. SelectionDAG can only process one block at a time, we can generate
+/// multiple machine basic blocks for one IR level basic block.
+/// This is how we handle bit tests and switches.
+///
+/// 2. At the MI level, tail calls are represented via a special return
+/// MIInst called "tcreturn". Thus if we know the basic block in which we
+/// wish to insert the stack protector check, we get the correct behavior
+/// by always inserting the stack protector check right before the return
+/// statement. This is a "magical transformation" since no matter where
+/// the stack protector check intrinsic is, we always insert the stack
+/// protector check code at the end of the BB.
+///
+/// Given the aforementioned constraints, the following solution was devised:
+///
+/// 1. On platforms that do not support ISel stack protector check
+/// generation, allow for the normal IR level stack protector check
+/// generation to continue.
+///
+/// 2. On platforms that do support ISel stack protector check
+/// generation:
+///
+/// a. Use the IR level stack protector pass to decide if a stack
+/// protector is required/which BB we insert the stack protector check
+/// in by reusing the logic already therein.
+///
+/// b. After we finish selecting the basic block, we produce the validation
+/// code with one of these techniques:
+/// 1) with a call to a guard check function
+/// 2) with inlined instrumentation
+///
+/// 1) We insert a call to the check function before the terminator.
+///
+/// 2) We first find a splice point in the parent basic block
+/// before the terminator and then splice the terminator of said basic
+/// block into the success basic block. Then we code-gen a new tail for
+/// the parent basic block consisting of the two loads, the comparison,
+/// and finally two branches to the success/failure basic blocks. We
+/// conclude by code-gening the failure basic block if we have not
+/// code-gened it already (all stack protector checks we generate in
+/// the same function, use the same failure basic block).
+class StackProtectorDescriptor {
+public:
+ StackProtectorDescriptor() = default;
+
+ /// Returns true if all fields of the stack protector descriptor are
+ /// initialized implying that we should/are ready to emit a stack protector.
+ bool shouldEmitStackProtector() const {
+ return ParentMBB && SuccessMBB && FailureMBB;
+ }
+
+ bool shouldEmitFunctionBasedCheckStackProtector() const {
+ return ParentMBB && !SuccessMBB && !FailureMBB;
+ }
+
+ /// Initialize the stack protector descriptor structure for a new basic
+ /// block.
+ void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+ bool FunctionBasedInstrumentation) {
+ // Make sure we are not initialized yet.
+ assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
+ "already initialized!");
+ ParentMBB = MBB;
+ if (!FunctionBasedInstrumentation) {
+ SuccessMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ true);
+ FailureMBB = addSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+ }
+ }
+
+ /// Reset state that changes when we handle different basic blocks.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. The specific basic block we are generating a
+ /// stack protector for (ParentMBB).
+ ///
+ /// 2. The successor machine basic block that will contain the tail of
+ /// parent mbb after we create the stack protector check (SuccessMBB). This
+ /// BB is visited only on stack protector check success.
+ void resetPerBBState() {
+ ParentMBB = nullptr;
+ SuccessMBB = nullptr;
+ }
+
+ /// Reset state that only changes when we switch functions.
+ ///
+ /// This currently includes:
+ ///
+ /// 1. FailureMBB since we reuse the failure code path for all stack
+ /// protector checks created in an individual function.
+ ///
+ /// 2.The guard variable since the guard variable we are checking against is
+ /// always the same.
+ void resetPerFunctionState() { FailureMBB = nullptr; }
+
+ MachineBasicBlock *getParentMBB() { return ParentMBB; }
+ MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
+ MachineBasicBlock *getFailureMBB() { return FailureMBB; }
+
+private:
+ /// The basic block for which we are generating the stack protector.
+ ///
+ /// As a result of stack protector generation, we will splice the
+ /// terminators of this basic block into the successor mbb SuccessMBB and
+ /// replace it with a compare/branch to the successor mbbs
+ /// SuccessMBB/FailureMBB depending on whether or not the stack protector
+ /// was violated.
+ MachineBasicBlock *ParentMBB = nullptr;
+
+ /// A basic block visited on stack protector check success that contains the
+ /// terminators of ParentMBB.
+ MachineBasicBlock *SuccessMBB = nullptr;
+
+ /// This basic block visited on stack protector check failure that will
+ /// contain a call to __stack_chk_fail().
+ MachineBasicBlock *FailureMBB = nullptr;
+
+ /// Add a successor machine basic block to ParentMBB. If the successor mbb
+ /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
+ /// block will be created. Assign a large weight if IsLikely is true.
+ MachineBasicBlock *addSuccessorMBB(const BasicBlock *BB,
+ MachineBasicBlock *ParentMBB,
+ bool IsLikely,
+ MachineBasicBlock *SuccMBB = nullptr);
+};
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+findSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII);
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_CODEGENCOMMONISEL_H
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
index 5a4351756297..ed3cd54df272 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -48,7 +48,6 @@ Optional<CodeModel::Model> getExplicitCodeModel();
llvm::ExceptionHandling getExceptionModel();
-CodeGenFileType getFileType();
Optional<CodeGenFileType> getExplicitFileType();
CodeGenFileType getFileType();
@@ -74,6 +73,8 @@ llvm::FloatABI::ABIType getFloatABIForCalls();
llvm::FPOpFusion::FPOpFusionMode getFuseFPOps();
+SwiftAsyncFramePointerMode getSwiftAsyncFramePointer();
+
bool getDontPlaceZerosInBSS();
bool getEnableGuaranteedTailCallOpt();
@@ -128,8 +129,6 @@ bool getEnableMachineFunctionSplitter();
bool getEnableDebugEntryValues();
-bool getPseudoProbeForProfiling();
-
bool getValueTrackingVariableLocations();
bool getForceDwarfFrameSection();
@@ -138,6 +137,8 @@ bool getXRayOmitFunctionIndex();
bool getDebugStrictDwarf();
+unsigned getAlignLoops();
+
/// Create this object with static storage to register codegen-related command
/// line options.
struct RegisterCodeGenFlags {
diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index b6bde0249f88..524730d53694 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -17,7 +17,6 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 6bdaddd9c6f5..9c878d4b087b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -116,6 +116,9 @@ public:
/// vreg that the swifterror should be copied into after the call.
Register SwiftErrorVReg;
+ /// Original IR callsite corresponding to this call, if available.
+ const CallBase *CB = nullptr;
+
MDNode *KnownCallees = nullptr;
/// True if the call must be tail call optimized.
@@ -259,7 +262,7 @@ public:
/// handle the appropriate COPY (either to or from) and mark any
/// relevant uses/defines as needed.
virtual void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) = 0;
+ CCValAssign VA) = 0;
/// The specified value has been assigned to a stack
/// location. Load or store it there, with appropriate extension
@@ -279,11 +282,14 @@ public:
}
/// Handle custom values, which may be passed into one or more of \p VAs.
+ /// \p If the handler wants the assignments to be delayed until after
+ /// mem loc assignments, then it sets \p Thunk to the thunk to do the
+ /// assignment.
/// \return The number of \p VAs that have been assigned after the first
/// one, and which should therefore be skipped from further
/// processing.
- virtual unsigned assignCustomValue(ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ virtual unsigned assignCustomValue(ArgInfo &Arg, ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk = nullptr) {
// This is not a pure virtual method because not all targets need to worry
// about custom values.
llvm_unreachable("Custom values not supported");
@@ -315,7 +321,7 @@ public:
/// Provides a default implementation for argument handling.
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
};
/// Base class for ValueHandlers used for arguments passed to a function call,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 56459b68dce0..ff4ad4b72636 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -36,7 +36,10 @@ class GISelKnownBits;
class MachineDominatorTree;
class LegalizerInfo;
struct LegalityQuery;
+class RegisterBank;
+class RegisterBankInfo;
class TargetLowering;
+class TargetRegisterInfo;
struct PreferredTuple {
LLT Ty; // The result type of the extend.
@@ -54,6 +57,7 @@ struct IndexedLoadStoreMatchInfo {
struct PtrAddChain {
int64_t Imm;
Register Base;
+ const RegisterBank *Bank;
};
struct RegisterImmPair {
@@ -68,6 +72,16 @@ struct ShiftOfShiftedLogic {
uint64_t ValSum;
};
+using BuildFnTy = std::function<void(MachineIRBuilder &)>;
+
+struct MergeTruncStoresInfo {
+ SmallVector<GStore *> FoundStores;
+ GStore *LowestIdxStore = nullptr;
+ Register WideSrcVal;
+ bool NeedBSwap = false;
+ bool NeedRotate = false;
+};
+
using OperandBuildSteps =
SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
struct InstructionBuildSteps {
@@ -95,6 +109,8 @@ protected:
GISelKnownBits *KB;
MachineDominatorTree *MDT;
const LegalizerInfo *LI;
+ const RegisterBankInfo *RBI;
+ const TargetRegisterInfo *TRI;
public:
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
@@ -120,6 +136,22 @@ public:
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp,
Register ToReg) const;
+ /// Replace the opcode in instruction with a new opcode and inform the
+ /// observer of the changes.
+ void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const;
+
+ /// Get the register bank of \p Reg.
+ /// If Reg has not been assigned a register, a register class,
+ /// or a register bank, then this returns nullptr.
+ ///
+ /// \pre Reg.isValid()
+ const RegisterBank *getRegBank(Register Reg) const;
+
+ /// Set the register bank of \p Reg.
+ /// Does nothing if the RegBank is null.
+ /// This is the counterpart to getRegBank.
+ void setRegBank(Register Reg, const RegisterBank *RegBank);
+
/// If \p MI is COPY, try to combine it.
/// Returns true if MI changed.
bool tryCombineCopy(MachineInstr &MI);
@@ -144,6 +176,9 @@ public:
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
+ /// Match (and (load x), mask) -> zextload x
+ bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Combine \p MI into a pre-indexed or post-indexed load/store operation if
/// legal and the surrounding code makes it useful.
bool tryCombineIndexedLoadStore(MachineInstr &MI);
@@ -341,6 +376,9 @@ public:
bool matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
void applyCombineFAbsOfFAbs(MachineInstr &MI, Register &Src);
+ /// Transform fabs(fneg(x)) to fabs(x).
+ bool matchCombineFAbsOfFNeg(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Transform trunc ([asz]ext x) to x or ([asz]ext x) or (trunc x).
bool matchCombineTruncOfExt(MachineInstr &MI,
std::pair<Register, unsigned> &MatchInfo);
@@ -445,7 +483,7 @@ public:
/// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
bool matchOverlappingAnd(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ BuildFnTy &MatchInfo);
/// \return true if \p MI is a G_AND instruction whose operands are x and y
/// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
@@ -501,8 +539,10 @@ public:
///
/// And check if the tree can be replaced with a M-bit load + possibly a
/// bswap.
- bool matchLoadOrCombine(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
+ void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
@@ -519,12 +559,10 @@ public:
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// By default, it erases the instruction \p MI from the function.
- void applyBuildFn(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo);
/// Use a function which takes in a MachineIRBuilder to perform a combine.
/// This variant does not erase \p MI after calling the build function.
- void applyBuildFnNoErase(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo);
bool matchFunnelShiftToRotate(MachineInstr &MI);
void applyFunnelShiftToRotate(MachineInstr &MI);
@@ -535,21 +573,57 @@ public:
/// or false constant based off of KnownBits information.
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);
- bool matchBitfieldExtractFromSExtInReg(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
- /// Match: and (lshr x, cst), mask -> ubfx x, cst, width
- bool matchBitfieldExtractFromAnd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
+ /// \returns true if a G_ICMP \p MI can be replaced with its LHS based off of
+ /// KnownBits information.
+ bool
+ matchICmpToLHSKnownBits(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+
+ /// \returns true if (and (or x, c1), c2) can be replaced with (and x, c2)
+ bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo);
+ bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI,
+ BuildFnTy &MatchInfo);
+ /// Match: and (lshr x, cst), mask -> ubfx x, cst, width
+ bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width
+ bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Match: shr (and x, n), k -> ubfx x, pos, width
+ bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ // Helpers for reassociation:
+ bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS,
+ BuildFnTy &MatchInfo);
+ bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo);
+ bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS,
+ MachineInstr *RHS, BuildFnTy &MatchInfo);
/// Reassociate pointer calculations with G_ADD involved, to allow better
/// addressing mode usage.
- bool matchReassocPtrAdd(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
-
+ bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
/// Do constant folding when opportunities are exposed after MIR building.
bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo);
+ /// \returns true if it is possible to narrow the width of a scalar binop
+ /// feeding a G_AND instruction \p MI.
+ bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ /// Given an G_UDIV \p MI expressing a divide by constant, return an
+ /// expression that implements it by multiplying by a magic number.
+ /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
+ MachineInstr *buildUDivUsingMul(MachineInstr &MI);
+ /// Combine G_UDIV by constant into a multiply by magic constant.
+ bool matchUDivByConst(MachineInstr &MI);
+ void applyUDivByConst(MachineInstr &MI);
+
+ // G_UMULH x, (1 << c)) -> x >> (bitwidth - c)
+ bool matchUMulHToLShr(MachineInstr &MI);
+ void applyUMulHToLShr(MachineInstr &MI);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
@@ -560,20 +634,21 @@ public:
/// and rename: s/bool tryEmit/void emit/
bool tryEmitMemcpyInline(MachineInstr &MI);
-private:
- // Memcpy family optimization helpers.
- bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, Align DstAlign, Align SrcAlign,
- bool IsVolatile);
- bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, uint64_t Limit, Align DstAlign,
- Align SrcAlign, bool IsVolatile);
- bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src,
- uint64_t KnownLen, Align DstAlign, Align SrcAlign,
- bool IsVolatile);
- bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
- uint64_t KnownLen, Align DstAlign, bool IsVolatile);
+ /// Match:
+ /// (G_UMULO x, 2) -> (G_UADDO x, x)
+ /// (G_SMULO x, 2) -> (G_SADDO x, x)
+ bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Transform (fadd x, fneg(y)) -> (fsub x, y)
+ /// (fadd fneg(x), y) -> (fsub y, x)
+ /// (fsub x, fneg(y)) -> (fadd x, y)
+ /// (fmul fneg(x), fneg(y)) -> (fmul x, y)
+ /// (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+ /// (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+ /// (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+ bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo);
+
+private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
///
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 1162134b2ad2..7103656365b1 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -57,9 +57,9 @@ public:
bool isUnordered() const { return getMMO().isUnordered(); }
/// Returns the size in bytes of the memory access.
- uint64_t getMemSize() { return getMMO().getSize();
+ uint64_t getMemSize() const { return getMMO().getSize();
} /// Returns the size in bits of the memory access.
- uint64_t getMemSizeInBits() { return getMMO().getSizeInBits(); }
+ uint64_t getMemSizeInBits() const { return getMMO().getSizeInBits(); }
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
@@ -195,6 +195,37 @@ public:
}
};
+/// Represents a G_PTR_ADD.
+class GPtrAdd : public GenericMachineInstr {
+public:
+ Register getBaseReg() const { return getReg(1); }
+ Register getOffsetReg() const { return getReg(2); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_PTR_ADD;
+ }
+};
+
+/// Represents a G_IMPLICIT_DEF.
+class GImplicitDef : public GenericMachineInstr {
+public:
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
+ }
+};
+
+/// Represents a G_SELECT.
+class GSelect : public GenericMachineInstr {
+public:
+ Register getCondReg() const { return getReg(1); }
+ Register getTrueReg() const { return getReg(2); }
+ Register getFalseReg() const { return getReg(3); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_SELECT;
+ }
+};
+
} // namespace llvm
-#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H \ No newline at end of file
+#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 8eab8a5846a7..ebe16cd4f58c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -466,9 +467,8 @@ private:
bool translateSIToFP(const User &U, MachineIRBuilder &MIRBuilder) {
return translateCast(TargetOpcode::G_SITOFP, U, MIRBuilder);
}
- bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
- return true;
- }
+ bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder);
+
bool translateSExt(const User &U, MachineIRBuilder &MIRBuilder) {
return translateCast(TargetOpcode::G_SEXT, U, MIRBuilder);
}
@@ -586,6 +586,8 @@ private:
/// stop translating such blocks early.
bool HasTailCall = false;
+ StackProtectorDescriptor SPDescriptor;
+
/// Switch analysis and optimization.
class GISelSwitchLowering : public SwitchCG::SwitchLowering {
public:
@@ -614,8 +616,34 @@ private:
// * Clear the different maps.
void finalizeFunction();
- // Handle emitting jump tables for each basic block.
- void finalizeBasicBlock();
+ // Processing steps done per block. E.g. emitting jump tables, stack
+ // protectors etc. Returns true if no errors, false if there was a problem
+ // that caused an abort.
+ bool finalizeBasicBlock(const BasicBlock &BB, MachineBasicBlock &MBB);
+
+ /// Codegen a new tail for a stack protector check ParentMBB which has had its
+ /// tail spliced into a stack protector check success bb.
+ ///
+ /// For a high level explanation of how this fits into the stack protector
+ /// generation see the comment on the declaration of class
+ /// StackProtectorDescriptor.
+ ///
+ /// \return true if there were no problems.
+ bool emitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB);
+
+ /// Codegen the failure basic block for a stack protector check.
+ ///
+ /// A failure stack protector machine basic block consists simply of a call to
+ /// __stack_chk_fail().
+ ///
+ /// For a high level explanation of how this fits into the stack protector
+ /// generation see the comment on the declaration of class
+ /// StackProtectorDescriptor.
+ ///
+ /// \return true if there were no problems.
+ bool emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *FailureBB);
/// Get the VRegs that represent \p Val.
/// Non-aggregate types have just one corresponding VReg and the list can be
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
index b1f2103da309..f6704df3f49d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
@@ -478,4 +478,4 @@ private:
} // end namespace llvm
-#endif // define LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H
+#endif // LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index 44a48927d35a..8a603de2f91d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
@@ -22,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "legalizer"
@@ -52,7 +54,8 @@ public:
bool tryCombineAnyExt(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts,
- SmallVectorImpl<Register> &UpdatedDefs) {
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelObserverWrapper &Observer) {
assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
Builder.setInstrAndDebugLoc(MI);
@@ -63,7 +66,11 @@ public:
Register TruncSrc;
if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
- Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
+ if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
+ replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
+ Observer);
+ else
+ Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
UpdatedDefs.push_back(DstReg);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
@@ -120,12 +127,14 @@ public:
return false;
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
- APInt MaskVal = APInt::getAllOnesValue(SrcTy.getScalarSizeInBits());
+ APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
auto Mask = Builder.buildConstant(
DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
- auto Extended = SextSrc ? Builder.buildSExtOrTrunc(DstTy, SextSrc) :
- Builder.buildAnyExtOrTrunc(DstTy, TruncSrc);
- Builder.buildAnd(DstReg, Extended, Mask);
+ if (SextSrc && (DstTy != MRI.getType(SextSrc)))
+ SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
+ if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
+ TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
+ Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -176,9 +185,9 @@ public:
LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
LLT SrcTy = MRI.getType(SrcReg);
uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
- Builder.buildInstr(
- TargetOpcode::G_SEXT_INREG, {DstReg},
- {Builder.buildAnyExtOrTrunc(DstTy, TruncSrc), SizeInBits});
+ if (DstTy != MRI.getType(TruncSrc))
+ TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
+ Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
@@ -544,12 +553,14 @@ public:
MachineIRBuilder &MIB;
const LegalizerInfo &LI;
- private:
+ // Stores the best register found in the current query so far.
+ Register CurrentBest = Register();
+
/// Given an concat_vector op \p Concat and a start bit and size, try to
/// find the origin of the value defined by that start position and size.
///
- /// \returns A register if a value can be found, otherwise an empty
- /// Register.
+ /// \returns a register with the requested size, or the current best
+ /// register found during the current query.
Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
unsigned Size) {
assert(Size > 0);
@@ -566,22 +577,22 @@ public:
// FIXME: we might be able return multiple sources? Or create an
// appropriate concat to make it fit.
if (InRegOffset + Size > SrcSize)
- return Register();
+ return CurrentBest;
- // If the bits exactly cover a single source, then return the operand as
- // our value reg.
Register SrcReg = Concat.getReg(StartSrcIdx);
- if (InRegOffset == 0 && Size == SrcSize)
- return SrcReg; // A source operand matches exactly.
+ if (InRegOffset == 0 && Size == SrcSize) {
+ CurrentBest = SrcReg;
+ return findValueFromDefImpl(SrcReg, 0, Size);
+ }
- return findValueFromDef(SrcReg, InRegOffset, Size);
+ return findValueFromDefImpl(SrcReg, InRegOffset, Size);
}
/// Given an build_vector op \p BV and a start bit and size, try to find
/// the origin of the value defined by that start position and size.
///
- /// \returns A register if a value can be found, otherwise an empty
- /// Register.
+ /// \returns a register with the requested size, or the current best
+ /// register found during the current query.
Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
unsigned Size) {
assert(Size > 0);
@@ -596,17 +607,21 @@ public:
unsigned InRegOffset = StartBit % SrcSize;
if (InRegOffset != 0)
- return Register(); // Give up, bits don't start at a scalar source.
+ return CurrentBest; // Give up, bits don't start at a scalar source.
if (Size < SrcSize)
- return Register(); // Scalar source is too large for requested bits.
+ return CurrentBest; // Scalar source is too large for requested bits.
// If the bits cover multiple sources evenly, then create a new
// build_vector to synthesize the required size, if that's been requested.
if (Size > SrcSize) {
if (Size % SrcSize > 0)
- return Register(); // Isn't covered exactly by sources.
+ return CurrentBest; // Isn't covered exactly by sources.
unsigned NumSrcsUsed = Size / SrcSize;
+ // If we're requesting all of the sources, just return this def.
+ if (NumSrcsUsed == BV.getNumSources())
+ return BV.getReg(0);
+
LLT SrcTy = MRI.getType(Src1Reg);
LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
@@ -614,7 +629,7 @@ public:
LegalizeActionStep ActionStep =
LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
if (ActionStep.Action != LegalizeActions::Legal)
- return Register();
+ return CurrentBest;
SmallVector<Register> NewSrcs;
for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
@@ -630,8 +645,8 @@ public:
/// Given an G_INSERT op \p MI and a start bit and size, try to find
/// the origin of the value defined by that start position and size.
///
- /// \returns A register if a value can be found, otherwise an empty
- /// Register.
+ /// \returns a register with the requested size, or the current best
+ /// register found during the current query.
Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
unsigned Size) {
assert(MI.getOpcode() == TargetOpcode::G_INSERT);
@@ -685,28 +700,25 @@ public:
if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
SrcRegToUse = ContainerSrcReg;
NewStartBit = StartBit;
- return findValueFromDef(SrcRegToUse, NewStartBit, Size);
+ return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
}
if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
SrcRegToUse = InsertedReg;
NewStartBit = StartBit - InsertOffset;
- return findValueFromDef(SrcRegToUse, NewStartBit, Size);
+ if (NewStartBit == 0 &&
+ Size == MRI.getType(SrcRegToUse).getSizeInBits())
+ CurrentBest = SrcRegToUse;
+ return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
}
// The bit range spans both the inserted and container regions.
return Register();
}
- public:
- ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
- const LegalizerInfo &Info)
- : MRI(Mri), MIB(Builder), LI(Info) {}
-
- /// Try to find a source of the value defined in the def \p DefReg, starting
- /// at position \p StartBit with size \p Size.
- /// \returns an empty Register if no value could be found, or \p DefReg if
- /// if that was the best we could do.
- Register findValueFromDef(Register DefReg, unsigned StartBit,
- unsigned Size) {
+ /// Internal implementation for findValueFromDef(). findValueFromDef()
+ /// initializes some data like the CurrentBest register, which this method
+ /// and its callees rely upon.
+ Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
+ unsigned Size) {
MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI);
// If the instruction has a single def, then simply delegate the search.
// For unmerge however with multiple defs, we need to compute the offset
@@ -724,7 +736,7 @@ public:
}
Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
Register SrcOriginReg =
- findValueFromDef(SrcReg, StartBit + DefStartBit, Size);
+ findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
if (SrcOriginReg)
return SrcOriginReg;
// Failed to find a further value. If the StartBit and Size perfectly
@@ -732,7 +744,7 @@ public:
// nothing.
if (StartBit == 0 && Size == DefSize)
return DefReg;
- return Register();
+ return CurrentBest;
}
case TargetOpcode::G_BUILD_VECTOR:
return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
@@ -740,41 +752,48 @@ public:
case TargetOpcode::G_INSERT:
return findValueFromInsert(*Def, StartBit, Size);
default:
- return Register();
+ return CurrentBest;
}
}
- };
- bool tryCombineUnmergeValues(GUnmerge &MI,
- SmallVectorImpl<MachineInstr *> &DeadInsts,
- SmallVectorImpl<Register> &UpdatedDefs,
- GISelChangeObserver &Observer) {
- unsigned NumDefs = MI.getNumDefs();
- Register SrcReg = MI.getSourceReg();
- MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
- if (!SrcDef)
- return false;
-
- LLT OpTy = MRI.getType(SrcReg);
- LLT DestTy = MRI.getType(MI.getReg(0));
- unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+ public:
+ ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
+ const LegalizerInfo &Info)
+ : MRI(Mri), MIB(Builder), LI(Info) {}
- Builder.setInstrAndDebugLoc(MI);
+ /// Try to find a source of the value defined in the def \p DefReg, starting
+ /// at position \p StartBit with size \p Size.
+ /// \returns a register with the requested size, or an empty Register if no
+ /// better value could be found.
+ Register findValueFromDef(Register DefReg, unsigned StartBit,
+ unsigned Size) {
+ CurrentBest = Register();
+ Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
+ return FoundReg != DefReg ? FoundReg : Register();
+ }
- auto tryCombineViaValueFinder = [&]() {
- ArtifactValueFinder ValueFinder(MRI, Builder, LI);
+ /// Try to combine the defs of an unmerge \p MI by attempting to find
+ /// values that provides the bits for each def reg.
+ /// \returns true if all the defs of the unmerge have been made dead.
+ bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
+ SmallVectorImpl<Register> &UpdatedDefs) {
+ unsigned NumDefs = MI.getNumDefs();
+ LLT DestTy = MRI.getType(MI.getReg(0));
SmallBitVector DeadDefs(NumDefs);
for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
Register DefReg = MI.getReg(DefIdx);
- Register FoundVal =
- ValueFinder.findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
- if (!FoundVal || FoundVal == DefReg)
+ if (MRI.use_nodbg_empty(DefReg)) {
+ DeadDefs[DefIdx] = true;
+ continue;
+ }
+ Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
+ if (!FoundVal)
continue;
if (MRI.getType(FoundVal) != DestTy)
continue;
- replaceRegOrBuildCopy(DefReg, FoundVal, MRI, Builder, UpdatedDefs,
+ replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
Observer);
// We only want to replace the uses, not the def of the old reg.
Observer.changingInstr(MI);
@@ -782,12 +801,31 @@ public:
Observer.changedInstr(MI);
DeadDefs[DefIdx] = true;
}
- if (DeadDefs.all()) {
- markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
- return true;
- }
+ return DeadDefs.all();
+ }
+ };
+
+ bool tryCombineUnmergeValues(GUnmerge &MI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts,
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelChangeObserver &Observer) {
+ unsigned NumDefs = MI.getNumDefs();
+ Register SrcReg = MI.getSourceReg();
+ MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
+ if (!SrcDef)
return false;
- };
+
+ LLT OpTy = MRI.getType(SrcReg);
+ LLT DestTy = MRI.getType(MI.getReg(0));
+ unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
+
+ Builder.setInstrAndDebugLoc(MI);
+
+ ArtifactValueFinder Finder(MRI, Builder, LI);
+ if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
+ markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
+ return true;
+ }
if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
// %0:_(<4 x s16>) = G_FOO
@@ -813,7 +851,7 @@ public:
return false;
break;
default:
- return tryCombineViaValueFinder();
+ return false;
}
auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
@@ -845,11 +883,7 @@ public:
ConvertOp, OpTy, DestTy)) {
// We might have a chance to combine later by trying to combine
// unmerge(cast) first
- if (tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs))
- return true;
-
- // Try using the value finder.
- return tryCombineViaValueFinder();
+ return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
}
const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
@@ -1042,7 +1076,7 @@ public:
default:
return false;
case TargetOpcode::G_ANYEXT:
- Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
+ Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
break;
case TargetOpcode::G_ZEXT:
Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 67141f3a6326..74615c73741a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -256,6 +256,20 @@ private:
LLT SrcTy, LLT NarrowTy,
unsigned ScalarOpc);
+ // Memcpy family legalization helpers.
+ LegalizeResult lowerMemset(MachineInstr &MI, Register Dst, Register Val,
+ uint64_t KnownLen, Align Alignment,
+ bool IsVolatile);
+ LegalizeResult lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign,
+ Align SrcAlign, bool IsVolatile);
+ LegalizeResult lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, uint64_t Limit, Align DstAlign,
+ Align SrcAlign, bool IsVolatile);
+ LegalizeResult lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile);
+
public:
/// Return the alignment to use for a stack temporary object with the given
/// type.
@@ -402,6 +416,9 @@ public:
LegalizeResult lowerDIVREM(MachineInstr &MI);
LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
+ LegalizeResult lowerVectorReduction(MachineInstr &MI);
+ LegalizeResult lowerMemcpyInline(MachineInstr &MI);
+ LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
};
/// Helper function that creates a libcall to the given \p Name using the given
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 4fdfabbfb161..68c14240ebc7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -15,8 +15,6 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERINFO_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -113,6 +111,14 @@ struct LegalityQuery {
LLT MemoryTy;
uint64_t AlignInBits;
AtomicOrdering Ordering;
+
+ MemDesc() = default;
+ MemDesc(LLT MemoryTy, uint64_t AlignInBits, AtomicOrdering Ordering)
+ : MemoryTy(MemoryTy), AlignInBits(AlignInBits), Ordering(Ordering) {}
+ MemDesc(const MachineMemOperand &MMO)
+ : MemoryTy(MMO.getMemoryType()),
+ AlignInBits(MMO.getAlign().value() * 8),
+ Ordering(MMO.getSuccessOrdering()) {}
};
/// Operations which require memory can use this to place requirements on the
@@ -293,6 +299,10 @@ LegalityPredicate scalarOrEltNarrowerThan(unsigned TypeIdx, unsigned Size);
/// type that's wider than the given size.
LegalityPredicate scalarOrEltWiderThan(unsigned TypeIdx, unsigned Size);
+/// True iff the specified type index is a scalar whose size is not a multiple
+/// of Size.
+LegalityPredicate sizeNotMultipleOf(unsigned TypeIdx, unsigned Size);
+
/// True iff the specified type index is a scalar whose size is not a power of
/// 2.
LegalityPredicate sizeNotPow2(unsigned TypeIdx);
@@ -348,6 +358,11 @@ LegalizeMutation changeElementSizeTo(unsigned TypeIdx, unsigned FromTypeIdx);
/// next power of 2.
LegalizeMutation widenScalarOrEltToNextPow2(unsigned TypeIdx, unsigned Min = 0);
+/// Widen the scalar type or vector element type for the given type index to
+/// next multiple of \p Size.
+LegalizeMutation widenScalarOrEltToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size);
+
/// Add more elements to the type for the given type index to the next power of
/// 2.
LegalizeMutation moreElementsToNextPow2(unsigned TypeIdx, unsigned Min = 0);
@@ -828,6 +843,16 @@ public:
LegalizeMutations::widenScalarOrEltToNextPow2(TypeIdx, MinSize));
}
+ /// Widen the scalar to the next multiple of Size. No effect if the
+ /// type is not a scalar or is a multiple of Size.
+ LegalizeRuleSet &widenScalarToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ using namespace LegalityPredicates;
+ return actionIf(
+ LegalizeAction::WidenScalar, sizeNotMultipleOf(typeIdx(TypeIdx), Size),
+ LegalizeMutations::widenScalarOrEltToNextMultipleOf(TypeIdx, Size));
+ }
+
/// Widen the scalar or vector element type to the next power of two that is
/// at least MinSize. No effect if the scalar size is a power of two.
LegalizeRuleSet &widenScalarOrEltToNextPow2(unsigned TypeIdx,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
new file mode 100644
index 000000000000..29575f386d7a
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -0,0 +1,165 @@
+//== llvm/CodeGen/GlobalISel/LoadStoreOpt.h - LoadStoreOpt -------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// This is an optimization pass for GlobalISel generic memory operations.
+/// Specifically, it focuses on merging stores and loads to consecutive
+/// addresses.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
+#define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+
+namespace llvm {
+// Forward declarations.
+class MachineRegisterInfo;
+class TargetTransformInfo;
+namespace GISelAddressing {
+/// Helper struct to store a base, index and offset that forms an address
+struct BaseIndexOffset {
+ Register BaseReg;
+ Register IndexReg;
+ int64_t Offset = 0;
+ bool IsIndexSignExt = false;
+};
+
+/// Returns a BaseIndexOffset which describes the pointer in \p Ptr.
+BaseIndexOffset getPointerInfo(Register Ptr, MachineRegisterInfo &MRI);
+
+/// Compute whether or not a memory access at \p MI1 aliases with an access at
+/// \p MI2 \returns true if either alias/no-alias is known. Sets \p IsAlias
+/// accordingly.
+bool aliasIsKnownForLoadStore(const MachineInstr &MI1, const MachineInstr &MI2,
+ bool &IsAlias, MachineRegisterInfo &MRI);
+
+/// Returns true if the instruction \p MI may alias \p Other.
+/// This function uses multiple strategies to detect aliasing, whereas
+/// aliasIsKnownForLoadStore just looks at the addresses of load/stores and is
+/// tries to reason about base/index/offsets.
+bool instMayAlias(const MachineInstr &MI, const MachineInstr &Other,
+ MachineRegisterInfo &MRI, AliasAnalysis *AA);
+} // namespace GISelAddressing
+
+using namespace GISelAddressing;
+
+class LoadStoreOpt : public MachineFunctionPass {
+public:
+ static char ID;
+
+private:
+ /// An input function to decide if the pass should run or not
+ /// on the given MachineFunction.
+ std::function<bool(const MachineFunction &)> DoNotRunPass;
+
+ MachineRegisterInfo *MRI;
+ const TargetLowering *TLI;
+ MachineFunction *MF;
+ AliasAnalysis *AA;
+ const LegalizerInfo *LI;
+
+ MachineIRBuilder Builder;
+
+ /// Initialize the field members using \p MF.
+ void init(MachineFunction &MF);
+
+ class StoreMergeCandidate {
+ public:
+ // The base pointer used as the base for all stores in this candidate.
+ Register BasePtr;
+ // Our algorithm is very simple at the moment. We assume that in instruction
+ // order stores are writing to incremeneting consecutive addresses. So when
+ // we walk the block in reverse order, the next eligible store must write to
+ // an offset one store width lower than CurrentLowestOffset.
+ uint64_t CurrentLowestOffset;
+ SmallVector<GStore *> Stores;
+ // A vector of MachineInstr/unsigned pairs to denote potential aliases that
+ // need to be checked before the candidate is considered safe to merge. The
+ // unsigned value is an index into the Stores vector. The indexed store is
+ // the highest-indexed store that has already been checked to not have an
+ // alias with the instruction. We record this so we don't have to repeat
+ // alias checks that have been already done, only those with stores added
+ // after the potential alias is recorded.
+ SmallVector<std::pair<MachineInstr *, unsigned>> PotentialAliases;
+
+ void addPotentialAlias(MachineInstr &MI);
+
+ /// Reset this candidate back to an empty one.
+ void reset() {
+ Stores.clear();
+ PotentialAliases.clear();
+ CurrentLowestOffset = 0;
+ BasePtr = Register();
+ }
+ };
+
+ bool isLegalOrBeforeLegalizer(const LegalityQuery &Query,
+ MachineFunction &MF) const;
+ /// If the given store is valid to be a member of the candidate, add it and
+ /// return true. Otherwise, returns false.
+ bool addStoreToCandidate(GStore &MI, StoreMergeCandidate &C);
+ /// Returns true if the instruction \p MI would potentially alias with any
+ /// stores in the candidate \p C.
+ bool operationAliasesWithCandidate(MachineInstr &MI, StoreMergeCandidate &C);
+ /// Merges the stores in the given vector into a wide store.
+ /// \p returns true if at least some of the stores were merged.
+ /// This may decide not to merge stores if heuristics predict it will not be
+ /// worth it.
+ bool mergeStores(SmallVectorImpl<GStore *> &StoresToMerge);
+ /// Perform a merge of all the stores in \p Stores into a single store.
+ /// Erases the old stores from the block when finished.
+ /// \returns true if merging was done. It may fail to perform a merge if
+ /// there are issues with materializing legal wide values.
+ bool doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores);
+ bool processMergeCandidate(StoreMergeCandidate &C);
+ bool mergeBlockStores(MachineBasicBlock &MBB);
+ bool mergeFunctionStores(MachineFunction &MF);
+
+ /// Initialize some target-specific data structures for the store merging
+ /// optimization. \p AddrSpace indicates which address space to use when
+ /// probing the legalizer info for legal stores.
+ void initializeStoreMergeTargetInfo(unsigned AddrSpace = 0);
+ /// A map between address space numbers and a bitvector of supported stores
+ /// sizes. Each bit in the bitvector represents whether a store size of
+ /// that bit's value is legal. E.g. if bit 64 is set, then 64 bit scalar
+ /// stores are legal.
+ DenseMap<unsigned, BitVector> LegalStoreSizes;
+ bool IsPreLegalizer;
+ /// Contains instructions to be erased at the end of a block scan.
+ SmallSet<MachineInstr *, 16> InstsToErase;
+
+public:
+ LoadStoreOpt();
+ LoadStoreOpt(std::function<bool(const MachineFunction &)>);
+
+ StringRef getPassName() const override { return "LoadStoreOpt"; }
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA);
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // End namespace llvm.
+
+#endif
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 4c6b47ab9bc8..e813d030eec3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -63,7 +63,7 @@ struct ConstantMatch {
int64_t &CR;
ConstantMatch(int64_t &C) : CR(C) {}
bool match(const MachineRegisterInfo &MRI, Register Reg) {
- if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) {
+ if (auto MaybeCst = getIConstantVRegSExtVal(Reg, MRI)) {
CR = *MaybeCst;
return true;
}
@@ -73,21 +73,46 @@ struct ConstantMatch {
inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
-struct ICstRegMatch {
- Register &CR;
- ICstRegMatch(Register &C) : CR(C) {}
+struct GCstAndRegMatch {
+ Optional<ValueAndVReg> &ValReg;
+ GCstAndRegMatch(Optional<ValueAndVReg> &ValReg) : ValReg(ValReg) {}
bool match(const MachineRegisterInfo &MRI, Register Reg) {
- if (auto MaybeCst = getConstantVRegValWithLookThrough(
- Reg, MRI, /*LookThroughInstrs*/ true,
- /*HandleFConstants*/ false)) {
- CR = MaybeCst->VReg;
- return true;
- }
- return false;
+ ValReg = getIConstantVRegValWithLookThrough(Reg, MRI);
+ return ValReg ? true : false;
}
};
-inline ICstRegMatch m_ICst(Register &Reg) { return ICstRegMatch(Reg); }
+inline GCstAndRegMatch m_GCst(Optional<ValueAndVReg> &ValReg) {
+ return GCstAndRegMatch(ValReg);
+}
+
+struct GFCstAndRegMatch {
+ Optional<FPValueAndVReg> &FPValReg;
+ GFCstAndRegMatch(Optional<FPValueAndVReg> &FPValReg) : FPValReg(FPValReg) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI);
+ return FPValReg ? true : false;
+ }
+};
+
+inline GFCstAndRegMatch m_GFCst(Optional<FPValueAndVReg> &FPValReg) {
+ return GFCstAndRegMatch(FPValReg);
+}
+
+struct GFCstOrSplatGFCstMatch {
+ Optional<FPValueAndVReg> &FPValReg;
+ GFCstOrSplatGFCstMatch(Optional<FPValueAndVReg> &FPValReg)
+ : FPValReg(FPValReg) {}
+ bool match(const MachineRegisterInfo &MRI, Register Reg) {
+ return (FPValReg = getFConstantSplat(Reg, MRI)) ||
+ (FPValReg = getFConstantVRegValWithLookThrough(Reg, MRI));
+ };
+};
+
+inline GFCstOrSplatGFCstMatch
+m_GFCstOrSplat(Optional<FPValueAndVReg> &FPValReg) {
+ return GFCstOrSplatGFCstMatch(FPValReg);
+}
/// Matcher for a specific constant value.
struct SpecificConstantMatch {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index 9b652d8e16bc..069f71b54328 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -1537,6 +1537,14 @@ public:
return buildInstr(TargetOpcode::G_XOR, {Dst}, {Src0, NegOne});
}
+ /// Build and insert integer negation
+ /// \p Zero = G_CONSTANT 0
+ /// \p Res = G_SUB Zero, \p Op0
+ MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0) {
+ auto Zero = buildConstant(Dst.getLLTTy(*getMRI()), 0);
+ return buildInstr(TargetOpcode::G_SUB, {Dst}, {Zero, Src0});
+ }
+
/// Build and insert \p Res = G_CTPOP \p Op0, \p Src0
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0) {
return buildInstr(TargetOpcode::G_CTPOP, {Dst}, {Src0});
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 818475a48abb..86545b976b8d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -14,6 +14,9 @@
#ifndef LLVM_CODEGEN_GLOBALISEL_UTILS_H
#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
+#include "GISelWorkList.h"
+#include "LostDebugLocObserver.h"
+#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Register.h"
@@ -44,6 +47,7 @@ class TargetRegisterClass;
class ConstantInt;
class ConstantFP;
class APFloat;
+class MachineIRBuilder;
// Convenience macros for dealing with vector reduction opcodes.
#define GISEL_VECREDUCE_CASES_ALL \
@@ -162,13 +166,12 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
MachineOptimizationRemarkMissed &R);
/// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
-Optional<APInt> getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI);
+Optional<APInt> getIConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI);
-/// If \p VReg is defined by a G_CONSTANT fits in int64_t
-/// returns it.
-Optional<int64_t> getConstantVRegSExtVal(Register VReg,
- const MachineRegisterInfo &MRI);
+/// If \p VReg is defined by a G_CONSTANT fits in int64_t returns it.
+Optional<int64_t> getIConstantVRegSExtVal(Register VReg,
+ const MachineRegisterInfo &MRI);
/// Simple struct used to hold a constant integer value and a virtual
/// register.
@@ -176,22 +179,32 @@ struct ValueAndVReg {
APInt Value;
Register VReg;
};
-/// If \p VReg is defined by a statically evaluable chain of
-/// instructions rooted on a G_F/CONSTANT (\p LookThroughInstrs == true)
-/// and that constant fits in int64_t, returns its value as well as the
-/// virtual register defined by this G_F/CONSTANT.
-/// When \p LookThroughInstrs == false this function behaves like
-/// getConstantVRegVal.
-/// When \p HandleFConstants == false the function bails on G_FCONSTANTs.
-/// When \p LookThroughAnyExt == true the function treats G_ANYEXT same as
-/// G_SEXT.
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_CONSTANT returns its APInt value and def register.
Optional<ValueAndVReg>
-getConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI,
- bool LookThroughInstrs = true,
- bool HandleFConstants = true,
- bool LookThroughAnyExt = false);
-const ConstantInt *getConstantIntVRegVal(Register VReg,
- const MachineRegisterInfo &MRI);
+getIConstantVRegValWithLookThrough(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true);
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_CONSTANT or G_FCONSTANT returns its value as APInt and def register.
+Optional<ValueAndVReg> getAnyConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true, bool LookThroughAnyExt = false);
+
+struct FPValueAndVReg {
+ APFloat Value;
+ Register VReg;
+};
+
+/// If \p VReg is defined by a statically evaluable chain of instructions rooted
+/// on a G_FCONSTANT returns its APFloat value and def register.
+Optional<FPValueAndVReg>
+getFConstantVRegValWithLookThrough(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool LookThroughInstrs = true);
+
const ConstantFP* getConstantFPVRegVal(Register VReg,
const MachineRegisterInfo &MRI);
@@ -254,6 +267,14 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI);
+/// Tries to constant fold a vector binop with sources \p Op1 and \p Op2.
+/// If successful, returns the G_BUILD_VECTOR representing the folded vector
+/// constant. \p MIB should have an insertion point already set to create new
+/// G_CONSTANT instructions as needed.
+Optional<MachineInstr *>
+ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const Register Op2,
+ const MachineRegisterInfo &MRI, MachineIRBuilder &MIB);
+
Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI);
@@ -261,6 +282,11 @@ Optional<APFloat> ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI);
+/// Tries to constant fold a G_CTLZ operation on \p Src. If \p Src is a vector
+/// then it tries to do an element-wise constant fold.
+Optional<SmallVector<unsigned>>
+ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI);
+
/// Test if the given value is known to have exactly one bit set. This differs
/// from computeKnownBits in that it doesn't necessarily determine which bit is
/// set.
@@ -346,15 +372,23 @@ Optional<int> getSplatIndex(MachineInstr &MI);
Optional<int64_t> getBuildVectorConstantSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
+/// Returns a floating point scalar constant of a build vector splat if it
+/// exists. When \p AllowUndef == true some elements can be undef but not all.
+Optional<FPValueAndVReg> getFConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef = true);
+
/// Return true if the specified instruction is a G_BUILD_VECTOR or
/// G_BUILD_VECTOR_TRUNC where all of the elements are 0 or undef.
bool isBuildVectorAllZeros(const MachineInstr &MI,
- const MachineRegisterInfo &MRI);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef = false);
/// Return true if the specified instruction is a G_BUILD_VECTOR or
/// G_BUILD_VECTOR_TRUNC where all of the elements are ~0 or undef.
bool isBuildVectorAllOnes(const MachineInstr &MI,
- const MachineRegisterInfo &MRI);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef = false);
/// \returns a value when \p MI is a vector splat. The splat can be either a
/// Register or a constant.
@@ -378,6 +412,17 @@ bool isBuildVectorAllOnes(const MachineInstr &MI,
Optional<RegOrConstant> getVectorSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI);
+/// Determines if \p MI defines a constant integer or a build vector of
+/// constant integers. Treats undef values as constants.
+bool isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
+/// Determines if \p MI defines a constant integer or a splat vector of
+/// constant integers.
+/// \returns the scalar constant or None.
+Optional<APInt> isConstantOrConstantSplatVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
+
/// Attempt to match a unary predicate against a scalar/splat constant or every
/// element of a constant G_BUILD_VECTOR. If \p ConstVal is null, the source
/// value was undef.
@@ -398,5 +443,14 @@ int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP);
bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI);
+using SmallInstListTy = GISelWorkList<4>;
+void saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver,
+ SmallInstListTy &DeadInstChain);
+void eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver = nullptr);
+void eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver = nullptr);
+
} // End namespace llvm.
#endif
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 6803f4d76cf0..fd106f55a43d 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1260,6 +1260,11 @@ static const int FIRST_TARGET_STRICTFP_OPCODE = BUILTIN_OP_END + 400;
/// be used with SelectionDAG::getMemIntrinsicNode.
static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END + 500;
+/// Whether this is bitwise logic opcode.
+inline bool isBitwiseLogicOp(unsigned Opcode) {
+ return Opcode == ISD::AND || Opcode == ISD::OR || Opcode == ISD::XOR;
+}
+
/// Get underlying scalar opcode for VECREDUCE opcode.
/// For example ISD::AND for ISD::VECREDUCE_AND.
NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
@@ -1267,6 +1272,12 @@ NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode);
/// Whether this is a vector-predicated Opcode.
bool isVPOpcode(unsigned Opcode);
+/// Whether this is a vector-predicated binary operation opcode.
+bool isVPBinaryOp(unsigned Opcode);
+
+/// Whether this is a vector-predicated reduction opcode.
+bool isVPReduction(unsigned Opcode);
+
/// The operand position of the vector mask.
Optional<unsigned> getVPMaskIdx(unsigned Opcode);
diff --git a/llvm/include/llvm/CodeGen/IndirectThunks.h b/llvm/include/llvm/CodeGen/IndirectThunks.h
index 74973f38bc79..90f9912f0ee0 100644
--- a/llvm/include/llvm/CodeGen/IndirectThunks.h
+++ b/llvm/include/llvm/CodeGen/IndirectThunks.h
@@ -62,7 +62,7 @@ void ThunkInserter<Derived>::createThunkFunction(MachineModuleInfo &MMI,
AttrBuilder B;
B.addAttribute(llvm::Attribute::NoUnwind);
B.addAttribute(llvm::Attribute::Naked);
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
// Populate our function a bit so that we can verify.
BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
diff --git a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
index 81b0025fdddc..c22f9d49f374 100644
--- a/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -24,6 +24,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 1b13ff53ac85..d615a5db4504 100644
--- a/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/llvm/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -27,6 +27,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h
index c2b158ac1b7f..923a45821dd4 100644
--- a/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -521,11 +521,11 @@ namespace llvm {
removeSegment(S.start, S.end, RemoveDeadValNo);
}
- /// Remove segment pointed to by iterator @p I from this range. This does
- /// not remove dead value numbers.
- iterator removeSegment(iterator I) {
- return segments.erase(I);
- }
+ /// Remove segment pointed to by iterator @p I from this range.
+ iterator removeSegment(iterator I, bool RemoveDeadValNo = false);
+
+ /// Mark \p ValNo for deletion if no segments in this range use it.
+ void removeValNoIfDead(VNInfo *ValNo);
/// Query Liveness at Idx.
/// The sub-instruction slot of Idx doesn't matter, only the instruction
diff --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
index 4ebe0f2dcfd8..3b6a4a379d72 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -114,12 +114,19 @@ public:
const LiveRange *LR = nullptr;
LiveRange::const_iterator LRI; ///< current position in LR
ConstSegmentIter LiveUnionI; ///< current position in LiveUnion
- Optional<SmallVector<LiveInterval *, 4>> InterferingVRegs;
+ SmallVector<LiveInterval *, 4> InterferingVRegs;
bool CheckedFirstInterference = false;
bool SeenAllInterferences = false;
unsigned Tag = 0;
unsigned UserTag = 0;
+ // Count the virtual registers in this union that interfere with this
+ // query's live virtual register, up to maxInterferingRegs.
+ unsigned collectInterferingVRegs(unsigned MaxInterferingRegs);
+
+ // Was this virtual register visited during collectInterferingVRegs?
+ bool isSeenInterference(LiveInterval *VirtReg) const;
+
public:
Query() = default;
Query(const LiveRange &LR, const LiveIntervalUnion &LIU)
@@ -131,7 +138,7 @@ public:
const LiveIntervalUnion &NewLiveUnion) {
LiveUnion = &NewLiveUnion;
LR = &NewLR;
- InterferingVRegs = None;
+ InterferingVRegs.clear();
CheckedFirstInterference = false;
SeenAllInterferences = false;
Tag = NewLiveUnion.getTag();
@@ -151,20 +158,12 @@ public:
// Does this live virtual register interfere with the union?
bool checkInterference() { return collectInterferingVRegs(1); }
- // Count the virtual registers in this union that interfere with this
- // query's live virtual register, up to maxInterferingRegs.
- unsigned collectInterferingVRegs(
- unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max());
-
- // Was this virtual register visited during collectInterferingVRegs?
- bool isSeenInterference(LiveInterval *VirtReg) const;
-
- // Did collectInterferingVRegs collect all interferences?
- bool seenAllInterferences() const { return SeenAllInterferences; }
-
// Vector generated by collectInterferingVRegs.
- const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
- return *InterferingVRegs;
+ const SmallVectorImpl<LiveInterval *> &interferingVRegs(
+ unsigned MaxInterferingRegs = std::numeric_limits<unsigned>::max()) {
+ if (!SeenAllInterferences || MaxInterferingRegs < InterferingVRegs.size())
+ collectInterferingVRegs(MaxInterferingRegs);
+ return InterferingVRegs;
}
};
diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h
index 9b0667bbbeb0..dee316677b25 100644
--- a/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -188,6 +188,12 @@ public:
//===--------------------------------------------------------------------===//
// API to update live variable information
+ /// Recompute liveness from scratch for a virtual register \p Reg that is
+ /// known to have a single def that dominates all uses. This can be useful
+ /// after removing some uses of \p Reg. It is not necessary for the whole
+ /// machine function to be in SSA form.
+ void recomputeForSingleDefVirtReg(Register Reg);
+
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
void replaceKillInstruction(Register Reg, MachineInstr &OldMI,
diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h
index 40985e16b37a..922f93d2e598 100644
--- a/llvm/include/llvm/CodeGen/LowLevelType.h
+++ b/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -16,8 +16,8 @@
#ifndef LLVM_CODEGEN_LOWLEVELTYPE_H
#define LLVM_CODEGEN_LOWLEVELTYPE_H
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -31,6 +31,7 @@ LLT getLLTForType(Type &Ty, const DataLayout &DL);
/// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish
/// pointers, so these will convert to a plain integer.
MVT getMVTForLLT(LLT Ty);
+EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx);
/// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support
/// scalarable vector types, and will assert if used.
diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
index 6137411b6dba..deb6b37a9bcf 100644
--- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
+++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
@@ -57,6 +57,10 @@ public:
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
+ StringRef getPassName() const override {
+ return "Add FS discriminators in MIR";
+ }
+
/// getNumFSBBs() - Return the number of machine BBs that have FS samples.
unsigned getNumFSBBs();
diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h
index 9cb92091db50..12c90600f6df 100644
--- a/llvm/include/llvm/CodeGen/MIRFormatter.h
+++ b/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MIRFormatter.h -----------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
new file mode 100644
index 000000000000..2503524ccfdf
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
@@ -0,0 +1,76 @@
+//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the supoorting functions for machine level Sample FDO
+// loader. This is used in Flow Sensitive SampelFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+
+#include <cassert>
+
+namespace llvm {
+
+using namespace sampleprof;
+
+class MIRProfileLoader;
+class MIRProfileLoaderPass : public MachineFunctionPass {
+ MachineFunction *MF;
+ std::string ProfileFileName;
+ FSDiscriminatorPass P;
+ unsigned LowBit;
+ unsigned HighBit;
+
+public:
+ static char ID;
+ /// FS bits will only use the '1' bits in the Mask.
+ MIRProfileLoaderPass(std::string FileName = "",
+ std::string RemappingFileName = "",
+ FSDiscriminatorPass P = FSDiscriminatorPass::Pass1);
+
+ /// getMachineFunction - Return the last machine function computed.
+ const MachineFunction *getMachineFunction() const { return MF; }
+
+ StringRef getPassName() const override { return "SampleFDO loader in MIR"; }
+
+private:
+ void init(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &) override;
+ bool doInitialization(Module &M) override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ std::unique_ptr<MIRProfileLoader> MIRSampleLoader;
+ /// Hold the information of the basic block frequency.
+ MachineBlockFrequencyInfo *MBFI;
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index e7428e7ad260..b6d7c2487126 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -694,6 +694,7 @@ struct MachineFunction {
// Register information
bool TracksRegLiveness = false;
bool HasWinCFI = false;
+ bool FailsVerification = false;
std::vector<VirtualRegisterDefinition> VirtualRegisters;
std::vector<MachineFunctionLiveIn> LiveIns;
Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
@@ -722,6 +723,7 @@ template <> struct MappingTraits<MachineFunction> {
YamlIO.mapOptional("failedISel", MF.FailedISel, false);
YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness, false);
YamlIO.mapOptional("hasWinCFI", MF.HasWinCFI, false);
+ YamlIO.mapOptional("failsVerification", MF.FailsVerification, false);
YamlIO.mapOptional("registers", MF.VirtualRegisters,
std::vector<VirtualRegisterDefinition>());
YamlIO.mapOptional("liveins", MF.LiveIns,
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index ac0cc70744d1..67544779f34c 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -153,7 +153,18 @@ enum class MachineCombinerPattern {
FMLSv4f32_OP1,
FMLSv4f32_OP2,
FMLSv4i32_indexed_OP1,
- FMLSv4i32_indexed_OP2
+ FMLSv4i32_indexed_OP2,
+
+ FMULv2i32_indexed_OP1,
+ FMULv2i32_indexed_OP2,
+ FMULv2i64_indexed_OP1,
+ FMULv2i64_indexed_OP2,
+ FMULv4i16_indexed_OP1,
+ FMULv4i16_indexed_OP2,
+ FMULv4i32_indexed_OP1,
+ FMULv4i32_indexed_OP2,
+ FMULv8i16_indexed_OP1,
+ FMULv8i16_indexed_OP2,
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h
index 46bf73cdd7b6..f749e9ff7e0a 100644
--- a/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -36,6 +36,7 @@ extern template class DomTreeNodeBase<MachineBasicBlock>;
extern template class DominatorTreeBase<MachineBasicBlock, false>; // DomTree
extern template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTree
+using MachineDomTree = DomTreeBase<MachineBasicBlock>;
using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
//===-------------------------------------
@@ -43,8 +44,6 @@ using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
/// compute a normal dominator tree.
///
class MachineDominatorTree : public MachineFunctionPass {
- using DomTreeT = DomTreeBase<MachineBasicBlock>;
-
/// Helper structure used to hold all the basic blocks
/// involved in the split of a critical edge.
struct CriticalEdge {
@@ -67,7 +66,7 @@ class MachineDominatorTree : public MachineFunctionPass {
mutable SmallSet<MachineBasicBlock *, 32> NewBBs;
/// The DominatorTreeBase that is used to compute a normal dominator tree.
- std::unique_ptr<DomTreeT> DT;
+ std::unique_ptr<MachineDomTree> DT;
/// Apply all the recorded critical edges to the DT.
/// This updates the underlying DT information in a way that uses
@@ -84,8 +83,9 @@ public:
calculate(MF);
}
- DomTreeT &getBase() {
- if (!DT) DT.reset(new DomTreeT());
+ MachineDomTree &getBase() {
+ if (!DT)
+ DT.reset(new MachineDomTree());
applySplitCriticalEdges();
return *DT;
}
@@ -112,6 +112,12 @@ public:
return DT->dominates(A, B);
}
+ void getDescendants(MachineBasicBlock *A,
+ SmallVectorImpl<MachineBasicBlock *> &Result) {
+ applySplitCriticalEdges();
+ DT->getDescendants(A, Result);
+ }
+
bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
applySplitCriticalEdges();
return DT->dominates(A, B);
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 28a59703dc60..5df468102a8a 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -342,6 +342,8 @@ public:
: StackAlignment(assumeAligned(StackAlignment)),
StackRealignable(StackRealignable), ForcedRealign(ForcedRealign) {}
+ MachineFrameInfo(const MachineFrameInfo &) = delete;
+
/// Return true if there are any stack objects in this function.
bool hasStackObjects() const { return !Objects.empty(); }
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 786fe908f68f..dcbd19ac6b5a 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -149,6 +149,9 @@ public:
// all sizes attached to them have been eliminated.
// TiedOpsRewritten: The twoaddressinstruction pass will set this flag, it
// means that tied-def have been rewritten to meet the RegConstraint.
+ // FailsVerification: Means that the function is not expected to pass machine
+ // verification. This can be set by passes that introduce known problems that
+ // have not been fixed yet.
enum class Property : unsigned {
IsSSA,
NoPHIs,
@@ -159,7 +162,8 @@ public:
RegBankSelected,
Selected,
TiedOpsRewritten,
- LastProperty = TiedOpsRewritten,
+ FailsVerification,
+ LastProperty = FailsVerification,
};
bool hasProperty(Property P) const {
@@ -227,7 +231,7 @@ struct LandingPadInfo {
: LandingPadBlock(MBB) {}
};
-class MachineFunction {
+class LLVM_EXTERNAL_VISIBILITY MachineFunction {
Function &F;
const LLVMTargetMachine &Target;
const TargetSubtargetInfo *STI;
@@ -536,6 +540,14 @@ public:
/// (or DBG_PHI).
void finalizeDebugInstrRefs();
+ /// Returns true if the function's variable locations should be tracked with
+ /// instruction referencing.
+ bool useDebugInstrRef() const;
+
+ /// A reserved operand number representing the instructions memory operand,
+ /// for instructions that have a stack spill fused into them.
+ const static unsigned int DebugOperandMemNumber;
+
MachineFunction(Function &F, const LLVMTargetMachine &Target,
const TargetSubtargetInfo &STI, unsigned FunctionNum,
MachineModuleInfo &MMI);
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 757907f6d887..0ac934e208b6 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -517,7 +517,7 @@ public:
SmallSet<Register, 4> getUsedDebugRegs() const {
assert(isDebugValue() && "not a DBG_VALUE*");
SmallSet<Register, 4> UsedRegs;
- for (auto MO : debug_operands())
+ for (const auto &MO : debug_operands())
if (MO.isReg() && MO.getReg())
UsedRegs.insert(MO.getReg());
return UsedRegs;
@@ -1331,6 +1331,7 @@ public:
case TargetOpcode::LIFETIME_START:
case TargetOpcode::LIFETIME_END:
case TargetOpcode::PSEUDO_PROBE:
+ case TargetOpcode::ARITH_FENCE:
return true;
}
}
@@ -1859,17 +1860,6 @@ public:
}
}
- PseudoProbeAttributes getPseudoProbeAttribute() const {
- assert(isPseudoProbe() && "Must be a pseudo probe instruction");
- return (PseudoProbeAttributes)getOperand(3).getImm();
- }
-
- void addPseudoProbeAttribute(PseudoProbeAttributes Attr) {
- assert(isPseudoProbe() && "Must be a pseudo probe instruction");
- MachineOperand &AttrOperand = getOperand(3);
- AttrOperand.setImm(AttrOperand.getImm() | (uint32_t)Attr);
- }
-
private:
/// If this instruction is embedded into a MachineFunction, return the
/// MachineRegisterInfo object for the current function, otherwise
diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h
index 07b8e5ebcc1d..00080b171974 100644
--- a/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -282,17 +282,7 @@ public:
/// success and failure orderings for an atomic operation. (For operations
/// other than cmpxchg, this is equivalent to getSuccessOrdering().)
AtomicOrdering getMergedOrdering() const {
- AtomicOrdering Ordering = getSuccessOrdering();
- AtomicOrdering FailureOrdering = getFailureOrdering();
- if (FailureOrdering == AtomicOrdering::SequentiallyConsistent)
- return AtomicOrdering::SequentiallyConsistent;
- if (FailureOrdering == AtomicOrdering::Acquire) {
- if (Ordering == AtomicOrdering::Monotonic)
- return AtomicOrdering::Acquire;
- if (Ordering == AtomicOrdering::Release)
- return AtomicOrdering::AcquireRelease;
- }
- return Ordering;
+ return getMergedAtomicOrdering(getSuccessOrdering(), getFailureOrdering());
}
bool isLoad() const { return FlagVals & MOLoad; }
diff --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index 8cc5909c40b7..285b858c96cb 100644
--- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -118,6 +118,12 @@ public:
: DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
PassName, RemarkName, Loc, MBB) {}
+ MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
+ const MachineInstr *MI)
+ : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
+ PassName, RemarkName, MI->getDebugLoc(),
+ MI->getParent()) {}
+
static bool classof(const DiagnosticInfo *DI) {
return DI->getKind() == DK_MachineOptimizationRemarkAnalysis;
}
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index ca3dd992bbd5..dbabfe5f0f32 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -821,7 +821,7 @@ public:
/// deleted during LiveDebugVariables analysis.
void markUsesInDebugValueAsUndef(Register Reg) const;
- /// updateDbgUsersToReg - Update a collection of DBG_VALUE instructions
+ /// updateDbgUsersToReg - Update a collection of debug instructions
/// to refer to the designated register.
void updateDbgUsersToReg(MCRegister OldReg, MCRegister NewReg,
ArrayRef<MachineInstr *> Users) const {
@@ -829,21 +829,34 @@ public:
for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo()); RUI.isValid();
++RUI)
OldRegUnits.insert(*RUI);
- for (MachineInstr *MI : Users) {
- assert(MI->isDebugValue());
- for (auto &Op : MI->debug_operands()) {
- if (Op.isReg()) {
- for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo());
- RUI.isValid(); ++RUI) {
- if (OldRegUnits.contains(*RUI)) {
- Op.setReg(NewReg);
- break;
- }
+
+ // If this operand is a register, check whether it overlaps with OldReg.
+ // If it does, replace with NewReg.
+ auto UpdateOp = [this, &NewReg, &OldReg, &OldRegUnits](MachineOperand &Op) {
+ if (Op.isReg()) {
+ for (MCRegUnitIterator RUI(OldReg, getTargetRegisterInfo());
+ RUI.isValid(); ++RUI) {
+ if (OldRegUnits.contains(*RUI)) {
+ Op.setReg(NewReg);
+ break;
}
}
}
- assert(MI->hasDebugOperandForReg(NewReg) &&
- "Expected debug value to have some overlap with OldReg");
+ };
+
+ // Iterate through (possibly several) operands to DBG_VALUEs and update
+ // each. For DBG_PHIs, only one operand will be present.
+ for (MachineInstr *MI : Users) {
+ if (MI->isDebugValue()) {
+ for (auto &Op : MI->debug_operands())
+ UpdateOp(Op);
+ assert(MI->hasDebugOperandForReg(NewReg) &&
+ "Expected debug value to have some overlap with OldReg");
+ } else if (MI->isDebugPHI()) {
+ UpdateOp(MI->getOperand(0));
+ } else {
+ llvm_unreachable("Non-DBG_VALUE, Non-DBG_PHI debug instr updated");
+ }
}
}
@@ -964,7 +977,7 @@ public:
MCRegister getLiveInPhysReg(Register VReg) const;
/// getLiveInVirtReg - If PReg is a live-in physical register, return the
- /// corresponding live-in physical register.
+ /// corresponding live-in virtual register.
Register getLiveInVirtReg(MCRegister PReg) const;
/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
diff --git a/llvm/include/llvm/CodeGen/MacroFusion.h b/llvm/include/llvm/CodeGen/MacroFusion.h
index 3a140fe63fde..ea2c7a5faae3 100644
--- a/llvm/include/llvm/CodeGen/MacroFusion.h
+++ b/llvm/include/llvm/CodeGen/MacroFusion.h
@@ -23,6 +23,8 @@ class MachineInstr;
class ScheduleDAGMutation;
class TargetInstrInfo;
class TargetSubtargetInfo;
+class ScheduleDAGInstrs;
+class SUnit;
/// Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
@@ -32,6 +34,18 @@ using ShouldSchedulePredTy = std::function<bool(const TargetInstrInfo &TII,
const MachineInstr *FirstMI,
const MachineInstr &SecondMI)>;
+/// Checks if the number of cluster edges between SU and its predecessors is
+/// less than FuseLimit
+bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit);
+
+/// Create an artificial edge between FirstSU and SecondSU.
+/// Make data dependencies from the FirstSU also dependent on the SecondSU to
+/// prevent them from being scheduled between the FirstSU and the SecondSU
+/// and vice-versa.
+/// Fusing more than 2 instructions is not currently supported.
+bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
+ SUnit &SecondSU);
+
/// Create a DAG scheduling mutation to pair instructions back to back
/// for instructions that benefit according to the target-specific
/// shouldScheduleAdjacent predicate function.
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index da1bab718948..d5ad12fadfa0 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -37,6 +37,10 @@ class raw_ostream;
// List of target independent CodeGen pass IDs.
namespace llvm {
+
+ /// AtomicExpandPass - At IR level this pass replace atomic instructions with
+ /// __atomic_* library calls, or target specific instruction which implement the
+ /// same semantics in a way which better fits the target backend.
FunctionPass *createAtomicExpandPass();
/// createUnreachableBlockEliminationPass - The LLVM code generator does not
@@ -171,6 +175,9 @@ namespace llvm {
/// This pass adds flow sensitive discriminators.
extern char &MIRAddFSDiscriminatorsID;
+ /// This pass reads flow sensitive profile.
+ extern char &MIRProfileLoaderPassID;
+
/// FastRegisterAllocation Pass - This pass register allocates as fast as
/// possible. It is best suited for debug code where live ranges are short.
///
@@ -513,6 +520,11 @@ namespace llvm {
FunctionPass *
createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P);
+ /// Read Flow Sensitive Profile.
+ FunctionPass *createMIRProfileLoaderPass(std::string File,
+ std::string RemappingFile,
+ sampleprof::FSDiscriminatorPass P);
+
/// Creates MIR Debugify pass. \see MachineDebugify.cpp
ModulePass *createDebugifyMachineModulePass();
diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h
index 39b77d919370..757ca8e112ee 100644
--- a/llvm/include/llvm/CodeGen/RegAllocCommon.h
+++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h
@@ -1,9 +1,8 @@
//===- RegAllocCommon.h - Utilities shared between allocators ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h
index 4f48ea2dc8e8..218e05f6eb6b 100644
--- a/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -211,9 +211,6 @@ private:
/// Initialize RegisterScavenger.
void init(MachineBasicBlock &MBB);
- /// Mark live-in registers of basic block as used.
- void setLiveInsUsed(const MachineBasicBlock &MBB);
-
/// Spill a register after position \p After and reload it before position
/// \p UseMI.
ScavengedInfo &spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 948a4763b872..5a3f4e9a23ff 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -531,7 +531,7 @@ public:
}
#ifndef NDEBUG
- void VerifyDAGDiverence();
+ void VerifyDAGDivergence();
#endif
/// This iterates over the nodes in the SelectionDAG, folding
@@ -621,8 +621,8 @@ public:
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget = false,
bool IsOpaque = false) {
- return getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL,
- VT, IsTarget, IsOpaque);
+ return getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()), DL, VT,
+ IsTarget, IsOpaque);
}
SDValue getConstant(const ConstantInt &Val, const SDLoc &DL, EVT VT,
@@ -1307,6 +1307,74 @@ public:
SDValue getIndexedStore(SDValue OrigStore, const SDLoc &dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM);
+ SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges = nullptr, bool IsExpanding = false);
+ inline SDValue
+ getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment = MaybeAlign(),
+ MachineMemOperand::Flags MMOFlags = MachineMemOperand::MONone,
+ const AAMDNodes &AAInfo = AAMDNodes(),
+ const MDNode *Ranges = nullptr, bool IsExpanding = false) {
+ // Ensures that codegen never sees a None Alignment.
+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL,
+ PtrInfo, MemVT, Alignment.getValueOr(getEVTAlign(MemVT)),
+ MMOFlags, AAInfo, Ranges, IsExpanding);
+ }
+ SDValue getLoadVP(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding = false);
+ SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges = nullptr,
+ bool IsExpanding = false);
+ SDValue getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachineMemOperand *MMO,
+ bool IsExpanding = false);
+ SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
+ SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding = false);
+ SDValue getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT,
+ SDValue Chain, SDValue Ptr, SDValue Mask, SDValue EVL,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding = false);
+ SDValue getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM);
+ SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachinePointerInfo PtrInfo,
+ Align Alignment, MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo = AAMDNodes(),
+ bool IsCompressing = false);
+ SDValue getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr,
+ SDValue Mask, SDValue EVL, MachineMemOperand *MMO,
+ bool IsCompressing = false);
+ SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsCompressing = false);
+ SDValue getTruncStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO, bool IsCompressing = false);
+ SDValue getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, SDValue Base,
+ SDValue Offset, ISD::MemIndexedMode AM);
+
+ SDValue getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType);
+ SDValue getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType);
+
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base,
SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT,
MachineMemOperand *MMO, ISD::MemIndexedMode AM,
@@ -1664,10 +1732,6 @@ public:
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops);
- SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags = SDNodeFlags());
-
/// Fold floating-point operations with 2 operands when both operands are
/// constants and/or undefined.
SDValue foldConstantFPMath(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -1769,6 +1833,19 @@ public:
unsigned ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned Depth = 0) const;
+ /// Get the minimum bit size for this Value \p Op as a signed integer.
+ /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+ /// Similar to the APInt::getMinSignedBits function.
+ /// Helper wrapper to ComputeNumSignBits.
+ unsigned ComputeMinSignedBits(SDValue Op, unsigned Depth = 0) const;
+
+ /// Get the minimum bit size for this Value \p Op as a signed integer.
+ /// i.e. x == sext(trunc(x to MinSignedBits) to bitwidth(x)).
+ /// Similar to the APInt::getMinSignedBits function.
+ /// Helper wrapper to ComputeNumSignBits.
+ unsigned ComputeMinSignedBits(SDValue Op, const APInt &DemandedElts,
+ unsigned Depth = 0) const;
+
/// Return true if this function can prove that \p Op is never poison
/// and, if \p PoisonOnly is false, does not have undef bits.
bool isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly = false,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
index 4ee58333495b..6a3d76be0ed6 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGAddressAnalysis.h
@@ -50,6 +50,7 @@ public:
SDValue getIndex() { return Index; }
SDValue getIndex() const { return Index; }
bool hasValidOffset() const { return Offset.hasValue(); }
+ int64_t getOffset() const { return *Offset; }
// Returns true if `Other` and `*this` are both some offset from the same base
// pointer. In that case, `Off` is set to the offset between `*this` and
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index deeca98af3f3..2855e1f1e587 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -58,7 +58,6 @@ namespace llvm {
class APInt;
class Constant;
-template <typename T> struct DenseMapInfo;
class GlobalValue;
class MachineBasicBlock;
class MachineConstantPoolValue;
@@ -509,15 +508,19 @@ BEGIN_TWO_BYTE_PACK()
class LSBaseSDNodeBitfields {
friend class LSBaseSDNode;
+ friend class VPLoadStoreSDNode;
friend class MaskedLoadStoreSDNode;
friend class MaskedGatherScatterSDNode;
+ friend class VPGatherScatterSDNode;
uint16_t : NumMemSDNodeBits;
// This storage is shared between disparate class hierarchies to hold an
// enumeration specific to the class hierarchy in use.
// LSBaseSDNode => enum ISD::MemIndexedMode
+ // VPLoadStoreBaseSDNode => enum ISD::MemIndexedMode
// MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode
+ // VPGatherScatterSDNode => enum ISD::MemIndexType
// MaskedGatherScatterSDNode => enum ISD::MemIndexType
uint16_t AddressingMode : 3;
};
@@ -525,8 +528,10 @@ BEGIN_TWO_BYTE_PACK()
class LoadSDNodeBitfields {
friend class LoadSDNode;
+ friend class VPLoadSDNode;
friend class MaskedLoadSDNode;
friend class MaskedGatherSDNode;
+ friend class VPGatherSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -536,8 +541,10 @@ BEGIN_TWO_BYTE_PACK()
class StoreSDNodeBitfields {
friend class StoreSDNode;
+ friend class VPStoreSDNode;
friend class MaskedStoreSDNode;
friend class MaskedScatterSDNode;
+ friend class VPScatterSDNode;
uint16_t : NumLSBaseSDNodeBits;
@@ -1353,7 +1360,9 @@ public:
const SDValue &getBasePtr() const {
switch (getOpcode()) {
case ISD::STORE:
+ case ISD::VP_STORE:
case ISD::MSTORE:
+ case ISD::VP_SCATTER:
return getOperand(2);
case ISD::MGATHER:
case ISD::MSCATTER:
@@ -1393,6 +1402,10 @@ public:
case ISD::MSTORE:
case ISD::MGATHER:
case ISD::MSCATTER:
+ case ISD::VP_LOAD:
+ case ISD::VP_STORE:
+ case ISD::VP_GATHER:
+ case ISD::VP_SCATTER:
return true;
default:
return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
@@ -1563,8 +1576,12 @@ public:
Align getAlignValue() const { return Value->getAlignValue(); }
bool isOne() const { return Value->isOne(); }
- bool isNullValue() const { return Value->isZero(); }
- bool isAllOnesValue() const { return Value->isMinusOne(); }
+ bool isZero() const { return Value->isZero(); }
+ // NOTE: This is soft-deprecated. Please use `isZero()` instead.
+ bool isNullValue() const { return isZero(); }
+ bool isAllOnes() const { return Value->isMinusOne(); }
+ // NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
+ bool isAllOnesValue() const { return isAllOnes(); }
bool isMaxSignedValue() const { return Value->isMaxValue(true); }
bool isMinSignedValue() const { return Value->isMinValue(true); }
@@ -2031,8 +2048,25 @@ public:
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
uint32_t BitWidth) const;
+ /// Extract the raw bit data from a build vector of Undef, Constant or
+ /// ConstantFP node elements. Each raw bit element will be \p
+ /// DstEltSizeInBits wide, undef elements are treated as zero, and entirely
+ /// undefined elements are flagged in \p UndefElements.
+ bool getConstantRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &RawBitElements,
+ BitVector &UndefElements) const;
+
bool isConstant() const;
+ /// Recast bit data \p SrcBitElements to \p DstEltSizeInBits wide elements.
+ /// Undef elements are treated as zero, and entirely undefined elements are
+ /// flagged in \p DstUndefElements.
+ static void recastRawBits(bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &DstBitElements,
+ ArrayRef<APInt> SrcBitElements,
+ BitVector &DstUndefElements,
+ const BitVector &SrcUndefElements);
+
static bool classof(const SDNode *N) {
return N->getOpcode() == ISD::BUILD_VECTOR;
}
@@ -2318,6 +2352,116 @@ public:
}
};
+/// This base class is used to represent VP_LOAD and VP_STORE nodes
+class VPLoadStoreSDNode : public MemSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
+ MachineMemOperand *MMO)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = AM;
+ assert(getAddressingMode() == AM && "Value truncated");
+ }
+
+ // VPLoadSDNode (Chain, Ptr, Offset, Mask, EVL)
+ // VPStoreSDNode (Chain, Data, Ptr, Offset, Mask, EVL)
+ // Mask is a vector of i1 elements;
+ // the type of EVL is TLI.getVPExplicitVectorLengthTy().
+ const SDValue &getOffset() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 2 : 3);
+ }
+ const SDValue &getBasePtr() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 1 : 2);
+ }
+ const SDValue &getMask() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 3 : 4);
+ }
+ const SDValue &getVectorLength() const {
+ return getOperand(getOpcode() == ISD::VP_LOAD ? 4 : 5);
+ }
+
+ /// Return the addressing mode for this load or store:
+ /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
+ ISD::MemIndexedMode getAddressingMode() const {
+ return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
+ }
+
+ /// Return true if this is a pre/post inc/dec load/store.
+ bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
+
+ /// Return true if this is NOT a pre/post inc/dec load/store.
+ bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_LOAD || N->getOpcode() == ISD::VP_STORE;
+ }
+};
+
+/// This class is used to represent a VP_LOAD node
+class VPLoadSDNode : public VPLoadStoreSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+ ISD::MemIndexedMode AM, ISD::LoadExtType ETy, bool isExpanding,
+ EVT MemVT, MachineMemOperand *MMO)
+ : VPLoadStoreSDNode(ISD::VP_LOAD, Order, dl, VTs, AM, MemVT, MMO) {
+ LoadSDNodeBits.ExtTy = ETy;
+ LoadSDNodeBits.IsExpanding = isExpanding;
+ }
+
+ ISD::LoadExtType getExtensionType() const {
+ return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
+ }
+
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getOffset() const { return getOperand(2); }
+ const SDValue &getMask() const { return getOperand(3); }
+ const SDValue &getVectorLength() const { return getOperand(4); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_LOAD;
+ }
+ bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
+};
+
+/// This class is used to represent a VP_STORE node
+class VPStoreSDNode : public VPLoadStoreSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
+ ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing,
+ EVT MemVT, MachineMemOperand *MMO)
+ : VPLoadStoreSDNode(ISD::VP_STORE, Order, dl, VTs, AM, MemVT, MMO) {
+ StoreSDNodeBits.IsTruncating = isTrunc;
+ StoreSDNodeBits.IsCompressing = isCompressing;
+ }
+
+ /// Return true if this is a truncating store.
+ /// For integers this is the same as doing a TRUNCATE and storing the result.
+ /// For floats, it is the same as doing an FP_ROUND and storing the result.
+ bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
+
+ /// Returns true if the op does a compression to the vector before storing.
+ /// The node contiguously stores the active elements (integers or floats)
+ /// in src (those with their respective bit set in writemask k) to unaligned
+ /// memory at base_addr.
+ bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
+
+ const SDValue &getValue() const { return getOperand(1); }
+ const SDValue &getBasePtr() const { return getOperand(2); }
+ const SDValue &getOffset() const { return getOperand(3); }
+ const SDValue &getMask() const { return getOperand(4); }
+ const SDValue &getVectorLength() const { return getOperand(5); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_STORE;
+ }
+};
+
/// This base class is used to represent MLOAD and MSTORE nodes
class MaskedLoadStoreSDNode : public MemSDNode {
public:
@@ -2424,6 +2568,94 @@ public:
};
/// This is a base class used to represent
+/// VP_GATHER and VP_SCATTER nodes
+///
+class VPGatherScatterSDNode : public MemSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
+ const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ LSBaseSDNodeBits.AddressingMode = IndexType;
+ assert(getIndexType() == IndexType && "Value truncated");
+ }
+
+ /// How is Index applied to BasePtr when computing addresses.
+ ISD::MemIndexType getIndexType() const {
+ return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode);
+ }
+ bool isIndexScaled() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::UNSIGNED_SCALED);
+ }
+ bool isIndexSigned() const {
+ return (getIndexType() == ISD::SIGNED_SCALED) ||
+ (getIndexType() == ISD::SIGNED_UNSCALED);
+ }
+
+ // In the both nodes address is Op1, mask is Op2:
+ // VPGatherSDNode (Chain, base, index, scale, mask, vlen)
+ // VPScatterSDNode (Chain, value, base, index, scale, mask, vlen)
+ // Mask is a vector of i1 elements
+ const SDValue &getBasePtr() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 1 : 2);
+ }
+ const SDValue &getIndex() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 2 : 3);
+ }
+ const SDValue &getScale() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 3 : 4);
+ }
+ const SDValue &getMask() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 4 : 5);
+ }
+ const SDValue &getVectorLength() const {
+ return getOperand((getOpcode() == ISD::VP_GATHER) ? 5 : 6);
+ }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_GATHER ||
+ N->getOpcode() == ISD::VP_SCATTER;
+ }
+};
+
+/// This class is used to represent an VP_GATHER node
+///
+class VPGatherSDNode : public VPGatherScatterSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : VPGatherScatterSDNode(ISD::VP_GATHER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {}
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_GATHER;
+ }
+};
+
+/// This class is used to represent an VP_SCATTER node
+///
+class VPScatterSDNode : public VPGatherScatterSDNode {
+public:
+ friend class SelectionDAG;
+
+ VPScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+ MachineMemOperand *MMO, ISD::MemIndexType IndexType)
+ : VPGatherScatterSDNode(ISD::VP_SCATTER, Order, dl, VTs, MemVT, MMO,
+ IndexType) {}
+
+ const SDValue &getValue() const { return getOperand(1); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::VP_SCATTER;
+ }
+};
+
+/// This is a base class used to represent
/// MGATHER and MSCATTER nodes
///
class MaskedGatherScatterSDNode : public MemSDNode {
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 51f1d7d6fd21..bc22d7789856 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -183,12 +183,12 @@ struct JumpTableHeader {
const Value *SValue;
MachineBasicBlock *HeaderBB;
bool Emitted;
- bool OmitRangeCheck;
+ bool FallthroughUnreachable;
JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
bool E = false)
: First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
- Emitted(E), OmitRangeCheck(false) {}
+ Emitted(E), FallthroughUnreachable(false) {}
};
using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
@@ -218,14 +218,14 @@ struct BitTestBlock {
BitTestInfo Cases;
BranchProbability Prob;
BranchProbability DefaultProb;
- bool OmitRangeCheck;
+ bool FallthroughUnreachable;
BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, bool E,
bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
BitTestInfo C, BranchProbability Pr)
: First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr), OmitRangeCheck(false) {}
+ Cases(std::move(C)), Prob(Pr), FallthroughUnreachable(false) {}
};
/// Return the range of values within a range.
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 29e644898f6b..7713dd0800c0 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -247,11 +247,11 @@ namespace ISD {
unsigned PartOffset;
OutputArg() = default;
- OutputArg(ArgFlagsTy flags, EVT vt, EVT argvt, bool isfixed,
+ OutputArg(ArgFlagsTy flags, MVT vt, EVT argvt, bool isfixed,
unsigned origIdx, unsigned partOffs)
- : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx),
- PartOffset(partOffs) {
- VT = vt.getSimpleVT();
+ : Flags(flags), IsFixed(isfixed), OrigArgIndex(origIdx),
+ PartOffset(partOffs) {
+ VT = vt;
ArgVT = argvt;
}
};
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 05d0591f1e5d..8bc730a3eda5 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -411,9 +411,12 @@ public:
/// This method returns a null pointer if the transformation cannot be
/// performed, otherwise it returns the last new instruction.
///
- virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const {
+ /// If \p LIS is not nullptr, the LiveIntervals info should be updated for
+ /// replacing \p MI with new instructions, even though this function does not
+ /// remove MI.
+ virtual MachineInstr *convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
return nullptr;
}
@@ -583,15 +586,14 @@ public:
}
/// Insert an unconditional indirect branch at the end of \p MBB to \p
- /// NewDestBB. \p BrOffset indicates the offset of \p NewDestBB relative to
+ /// NewDestBB. Optionally, insert the clobbered register restoring in \p
+ /// RestoreBB. \p BrOffset indicates the offset of \p NewDestBB relative to
/// the offset of the position to insert the new branch.
- ///
- /// \returns The number of bytes added to the block.
- virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset = 0,
- RegScavenger *RS = nullptr) const {
+ virtual void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset = 0,
+ RegScavenger *RS = nullptr) const {
llvm_unreachable("target did not implement");
}
@@ -1537,7 +1539,8 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
virtual bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
return false;
}
@@ -1545,7 +1548,8 @@ public:
/// into something more efficient. E.g., on ARM most instructions can set the
/// flags register, obviating the need for a separate CMP.
virtual bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int Mask, int Value,
+ Register SrcReg2, int64_t Mask,
+ int64_t Value,
const MachineRegisterInfo *MRI) const {
return false;
}
@@ -1624,9 +1628,6 @@ public:
unsigned defaultDefLatency(const MCSchedModel &SchedModel,
const MachineInstr &DefMI) const;
- int computeDefOperandLatency(const InstrItineraryData *ItinData,
- const MachineInstr &DefMI) const;
-
/// Return true if this opcode has high latency to its result.
virtual bool isHighLatencyDef(int opc) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 692dc4d7d4cf..87f5168ec48f 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,6 +30,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -371,10 +372,18 @@ public:
return getPointerTy(DL);
}
- /// EVT is not used in-tree, but is used by out-of-tree target.
- /// A documentation for this function would be nice...
+ /// Return the type to use for a scalar shift opcode, given the shifted amount
+ /// type. Targets should return a legal type if the input type is legal.
+ /// Targets can return a type that is too small if the input type is illegal.
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
+ /// Returns the type for the shift amount of a shift opcode. For vectors,
+ /// returns the input type. For scalars, behavior depends on \p LegalTypes. If
+ /// \p LegalTypes is true, calls getScalarShiftAmountTy, otherwise uses
+ /// pointer type. If getScalarShiftAmountTy or pointer type cannot represent
+ /// all possible shift amounts, returns MVT::i32. In general, \p LegalTypes
+ /// should be set to true for calls during type legalization and after type
+ /// legalization has been completed.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
bool LegalTypes = true) const;
@@ -591,7 +600,7 @@ public:
/// Returns if it's reasonable to merge stores to MemVT size.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
return true;
}
@@ -1396,6 +1405,11 @@ public:
return NVT;
}
+ virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
+ bool AllowUnknown = false) const {
+ return getValueType(DL, Ty, AllowUnknown);
+ }
+
/// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
/// operations except for the pointer size. If AllowUnknown is true, this
/// will return MVT::Other for types with no EVT counterpart (e.g. structs),
@@ -1448,7 +1462,7 @@ public:
/// Return the desired alignment for ByVal or InAlloca aggregate function
/// arguments in the caller parameter area. This is the actual alignment, not
/// its logarithm.
- virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
+ virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
/// Return the type of registers that this ValueType will eventually require.
MVT getRegisterType(MVT VT) const {
@@ -1763,9 +1777,7 @@ public:
Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
/// Return the preferred loop alignment.
- virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
- return PrefLoopAlignment;
- }
+ virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const;
/// Should loops be aligned even when the function is marked OptSize (but not
/// MinSize).
@@ -2077,6 +2089,20 @@ public:
return false;
}
+ /// Return true if it may be profitable to transform
+ /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+ /// This may not be true if c1 and c2 can be represented as immediates but
+ /// c1*c2 cannot, for example.
+ /// The target should check if c1, c2 and c1*c2 can be represented as
+ /// immediates, or have to be materialized into registers. If it is not sure
+ /// about some cases, a default true can be returned to let the DAGCombiner
+ /// decide.
+ /// AddNode is (add x, c1), and ConstNode is c2.
+ virtual bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const {
+ return true;
+ }
+
/// Return true if it is more correct/profitable to use strict FP_TO_INT
/// conversion operations - canonicalizing the FP source value instead of
/// converting all cases and then selecting based on value.
@@ -2177,8 +2203,7 @@ protected:
/// Indicate that the specified operation does not work with the specified
/// type and indicate what to do about it. Note that VT may refer to either
/// the type of a result or that of an operand of Op.
- void setOperationAction(unsigned Op, MVT VT,
- LegalizeAction Action) {
+ void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
OpActions[(unsigned)VT.SimpleTy][Op] = Action;
}
@@ -2197,8 +2222,7 @@ protected:
/// Indicate that the specified truncating store does not work with the
/// specified type and indicate what to do about it.
- void setTruncStoreAction(MVT ValVT, MVT MemVT,
- LegalizeAction Action) {
+ void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) {
assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
}
@@ -2506,8 +2530,11 @@ public:
return false;
}
- virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
- return false;
+ virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; }
+ virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+ LLVMContext &Ctx) const {
+ return isTruncateFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+ getApproximateEVTForLLT(ToTy, DL, Ctx));
}
virtual bool isProfitableToHoist(Instruction *I) const { return true; }
@@ -2583,8 +2610,11 @@ public:
return false;
}
- virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
- return false;
+ virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; }
+ virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL,
+ LLVMContext &Ctx) const {
+ return isZExtFree(getApproximateEVTForLLT(FromTy, DL, Ctx),
+ getApproximateEVTForLLT(ToTy, DL, Ctx));
}
/// Return true if sign-extension from FromTy to ToTy is cheaper than
@@ -3807,7 +3837,7 @@ public:
RetSExt = Call.hasRetAttr(Attribute::SExt);
RetZExt = Call.hasRetAttr(Attribute::ZExt);
NoMerge = Call.hasFnAttr(Attribute::NoMerge);
-
+
Callee = Target;
CallConv = Call.getCallingConv();
@@ -4424,33 +4454,29 @@ public:
/// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
- /// \param Result output after conversion
- /// \returns True, if the expansion was successful, false otherwise
- bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const;
/// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
- /// \param Result output after conversion
- /// \returns True, if the expansion was successful, false otherwise
- bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// \param N Node to expand
- /// \param Result output after conversion
- /// \returns True, if the expansion was successful, false otherwise
- bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const;
/// Expand ABS nodes. Expands vector/scalar ABS nodes,
/// vector nodes can only succeed if all operations are legal/custom.
/// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
/// \param N Node to expand
- /// \param Result output after conversion
/// \param IsNegative indicate negated abs
- /// \returns True, if the expansion was successful, false otherwise
- bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG,
- bool IsNegative = false) const;
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative = false) const;
/// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64
/// scalar types. Returns SDValue() if expand fails.
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 11138039a3c5..9b13b61fc9de 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -187,8 +187,7 @@ public:
void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
/// Insert InsertedPassID pass after TargetPassID pass.
- void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter = true);
+ void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID);
/// Allow the target to enable a specific standard pass by default.
void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
@@ -323,8 +322,7 @@ public:
/// Add standard passes after a pass that has just been added. For example,
/// the MachineVerifier if it is enabled.
- void addMachinePostPasses(const std::string &Banner, bool AllowVerify = true,
- bool AllowStrip = true);
+ void addMachinePostPasses(const std::string &Banner);
/// Check whether or not GlobalISel should abort on error.
/// When this is disabled, GlobalISel will fall back on SDISel instead of
@@ -449,16 +447,12 @@ protected:
/// Add a CodeGen pass at this point in the pipeline after checking overrides.
/// Return the pass that was added, or zero if no pass was added.
- /// @p verifyAfter if true and adding a machine function pass add an extra
- /// machine verification pass afterwards.
- AnalysisID addPass(AnalysisID PassID, bool verifyAfter = true);
+ AnalysisID addPass(AnalysisID PassID);
/// Add a pass to the PassManager if that pass is supposed to be run, as
/// determined by the StartAfter and StopAfter options. Takes ownership of the
/// pass.
- /// @p verifyAfter if true and adding a machine function pass add an extra
- /// machine verification pass afterwards.
- void addPass(Pass *P, bool verifyAfter = true);
+ void addPass(Pass *P);
/// addMachinePasses helper to create the target-selected or overriden
/// regalloc pass.
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 92ce5b737090..8483d078ca74 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -57,6 +57,8 @@ public:
/// Classes with a higher priority value are assigned first by register
/// allocators using a greedy heuristic. The value is in the range [0,63].
const uint8_t AllocationPriority;
+ /// Configurable target specific flags.
+ const uint8_t TSFlags;
/// Whether the class supports two (or more) disjunct subregister indices.
const bool HasDisjunctSubRegs;
/// Whether a combination of subregisters can cover every register in the
@@ -871,10 +873,6 @@ public:
/// (3) Bottom-up allocation is no longer guaranteed to optimally color.
virtual bool reverseLocalAssignment() const { return false; }
- /// Add the allocation priority to global and split ranges as well as the
- /// local ranges when registers are added to the queue.
- virtual bool addAllocPriorityToGlobalRanges() const { return false; }
-
/// Allow the target to override the cost of using a callee-saved register for
/// the first time. Default value of 0 means we will use a callee-saved
/// register if it is available.
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index aa6b82e14aa6..049ede89ab46 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -15,7 +15,6 @@
#ifndef LLVM_CODEGEN_TARGETSCHEDULE_H
#define LLVM_CODEGEN_TARGETSCHEDULE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 0e88e705e16b..7f989e08e9bf 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -216,6 +216,7 @@ def untyped : ValueType<8, 174>; // Produces an untyped value
def funcref : ValueType<0, 175>; // WebAssembly's funcref type
def externref : ValueType<0, 176>; // WebAssembly's externref type
def x86amx : ValueType<8192, 177>; // X86 AMX value
+def i64x8 : ValueType<512, 178>; // 8 Consecutive GPRs (AArch64)
def token : ValueType<0, 248>; // TokenTy
@@ -243,7 +244,7 @@ def Any : ValueType<0, 255>;
/// This class is for targets that want to use pointer types in patterns
/// with the GlobalISelEmitter. Targets must define their own pointer
/// derived from this class. The scalar argument should be an
-/// integer type with the same bit size as the ponter.
+/// integer type with the same bit size as the pointer.
/// e.g. def p0 : PtrValueType <i64, 0>;
class PtrValueType <ValueType scalar, int addrspace> :
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
index 7b89c9f66f86..1c6d0b1ead86 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -80,7 +80,7 @@ public:
CompileUnit::DIEInfo &Info) = 0;
/// Apply the valid relocations to the buffer \p Data, taking into
- /// account that Data is at \p BaseOffset in the debug_info section.
+ /// account that Data is at \p BaseOffset in the .debug_info section.
///
/// \returns true whether any reloc has been applied.
virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset,
@@ -109,7 +109,7 @@ public:
/// Emit section named SecName with data SecData.
virtual void emitSectionContents(StringRef SecData, StringRef SecName) = 0;
- /// Emit the abbreviation table \p Abbrevs to the debug_abbrev section.
+ /// Emit the abbreviation table \p Abbrevs to the .debug_abbrev section.
virtual void
emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
unsigned DwarfVersion) = 0;
@@ -137,7 +137,7 @@ public:
virtual void
emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &Table) = 0;
- /// Emit debug_ranges for \p FuncRange by translating the
+ /// Emit .debug_ranges for \p FuncRange by translating the
/// original \p Entries.
virtual void emitRangesEntries(
int64_t UnitPcOffset, uint64_t OrigLowPc,
@@ -145,17 +145,17 @@ public:
const std::vector<DWARFDebugRangeList::RangeListEntry> &Entries,
unsigned AddressSize) = 0;
- /// Emit debug_aranges entries for \p Unit and if \p DoRangesSection is true,
- /// also emit the debug_ranges entries for the DW_TAG_compile_unit's
+ /// Emit .debug_aranges entries for \p Unit and if \p DoRangesSection is true,
+ /// also emit the .debug_ranges entries for the DW_TAG_compile_unit's
/// DW_AT_ranges attribute.
virtual void emitUnitRangesEntries(CompileUnit &Unit,
bool DoRangesSection) = 0;
- /// Copy the debug_line over to the updated binary while unobfuscating the
+ /// Copy the .debug_line over to the updated binary while unobfuscating the
/// file names and directories.
virtual void translateLineTable(DataExtractor LineData, uint64_t Offset) = 0;
- /// Emit the line table described in \p Rows into the debug_line section.
+ /// Emit the line table described in \p Rows into the .debug_line section.
virtual void emitLineTableForUnit(MCDwarfLineTableParams Params,
StringRef PrologueBytes,
unsigned MinInstLength,
@@ -175,7 +175,7 @@ public:
virtual void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint32_t Address,
StringRef Bytes) = 0;
- /// Emit the debug_loc contribution for \p Unit by copying the entries from
+ /// Emit the .debug_loc contribution for \p Unit by copying the entries from
/// \p Dwarf and offsetting them. Update the location attributes to point to
/// the new entries.
virtual void emitLocationsForUnit(
@@ -184,7 +184,7 @@ public:
ProcessExpr) = 0;
/// Emit the compilation unit header for \p Unit in the
- /// debug_info section.
+ /// .debug_info section.
///
/// As a side effect, this also switches the current Dwarf version
/// of the MC layer to the one of U.getOrigUnit().
@@ -695,7 +695,7 @@ private:
/// Assign an abbreviation number to \p Abbrev
void assignAbbrev(DIEAbbrev &Abbrev);
- /// Compute and emit debug_ranges section for \p Unit, and
+ /// Compute and emit .debug_ranges section for \p Unit, and
/// patch the attributes referencing it.
void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf,
const DWARFFile &File) const;
@@ -706,7 +706,7 @@ private:
/// Extract the line tables from the original dwarf, extract the relevant
/// parts according to the linked function ranges and emit the result in the
- /// debug_line section.
+ /// .debug_line section.
void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf,
const DWARFFile &File);
@@ -753,7 +753,7 @@ private:
StringMap<uint32_t> EmittedCIEs;
/// Offset of the last CIE that has been emitted in the output
- /// debug_frame section.
+ /// .debug_frame section.
uint32_t LastCIEOffset = 0;
/// Apple accelerator tables.
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
index 18392e3608e7..99de8ebef812 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -10,7 +10,6 @@
#define LLVM_DEBUGINFO_CODEVIEW_CVRECORD_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
index 48ea7e52c172..4cee3abdde87 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeViewRegisters.def
@@ -373,7 +373,7 @@ CV_REGISTER(AMD64_K7, 765)
CV_REGISTER(ARM_NOREG, 0)
-// General purpose 32-bit integer regisers
+// General purpose 32-bit integer registers
CV_REGISTER(ARM_R0, 10)
CV_REGISTER(ARM_R1, 11)
diff --git a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index bdc6cf46509b..226a436c0930 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -18,6 +18,7 @@
namespace llvm {
class ScopedPrinter;
+class StringRef;
namespace codeview {
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index dcb26f12b13e..cdf3f60f88be 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -144,6 +144,27 @@ public:
const dwarf::Attribute Attr,
const DWARFUnit &U) const;
+ /// Compute an offset from a DIE specified by DIE offset and attribute index.
+ ///
+ /// \param AttrIndex an index of DWARF attribute.
+ /// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation
+ /// code in the .debug_info data.
+ /// \param U the DWARFUnit the contains the DIE.
+ /// \returns an offset of the attribute.
+ uint64_t getAttributeOffsetFromIndex(uint32_t AttrIndex, uint64_t DIEOffset,
+ const DWARFUnit &U) const;
+
+ /// Extract a DWARF form value from a DIE speccified by attribute index and
+ /// its offset.
+ ///
+ /// \param AttrIndex an index of DWARF attribute.
+ /// \param Offset offset of the attribute.
+ /// \param U the DWARFUnit the contains the DIE.
+ /// \returns Optional DWARF form value if the attribute was extracted.
+ Optional<DWARFFormValue>
+ getAttributeValueFromOffset(uint32_t AttrIndex, uint64_t Offset,
+ const DWARFUnit &U) const;
+
bool extract(DataExtractor Data, uint64_t* OffsetPtr);
void dump(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
index 154f7893aa17..537a03ec11fc 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAddressRange.h
@@ -39,6 +39,8 @@ struct DWARFAddressRange {
/// Returns true if [LowPC, HighPC) intersects with [RHS.LowPC, RHS.HighPC).
bool intersects(const DWARFAddressRange &RHS) const {
assert(valid() && RHS.valid());
+ if (SectionIndex != RHS.SectionIndex)
+ return false;
// Empty ranges can't intersect.
if (LowPC == HighPC || RHS.LowPC == RHS.HighPC)
return false;
@@ -69,12 +71,12 @@ struct DWARFAddressRange {
inline bool operator<(const DWARFAddressRange &LHS,
const DWARFAddressRange &RHS) {
- return std::tie(LHS.LowPC, LHS.HighPC) < std::tie(RHS.LowPC, RHS.HighPC);
+ return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) < std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC);
}
inline bool operator==(const DWARFAddressRange &LHS,
const DWARFAddressRange &RHS) {
- return std::tie(LHS.LowPC, LHS.HighPC) == std::tie(RHS.LowPC, RHS.HighPC);
+ return std::tie(LHS.SectionIndex, LHS.LowPC, LHS.HighPC) == std::tie(RHS.SectionIndex, RHS.LowPC, RHS.HighPC);
}
raw_ostream &operator<<(raw_ostream &OS, const DWARFAddressRange &R);
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 75b2280658f1..902973ff5722 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -243,6 +243,7 @@ public:
}
DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash);
+ DWARFTypeUnit *getTypeUnitForHash(uint16_t Version, uint64_t Hash, bool IsDWO);
/// Return the compile unit that includes an offset (relative to .debug_info).
DWARFCompileUnit *getCompileUnitForOffset(uint64_t Offset);
@@ -373,8 +374,24 @@ public:
return {2, 4, 8};
}
static bool isAddressSizeSupported(unsigned AddressSize) {
- return llvm::any_of(getSupportedAddressSizes(),
- [=](auto Elem) { return Elem == AddressSize; });
+ return llvm::is_contained(getSupportedAddressSizes(), AddressSize);
+ }
+ template <typename... Ts>
+ static Error checkAddressSizeSupported(unsigned AddressSize,
+ std::error_code EC, char const *Fmt,
+ const Ts &...Vals) {
+ if (isAddressSizeSupported(AddressSize))
+ return Error::success();
+ std::string Buffer;
+ raw_string_ostream Stream(Buffer);
+ Stream << format(Fmt, Vals...)
+ << " has unsupported address size: " << AddressSize
+ << " (supported are ";
+ ListSeparator LS;
+ for (unsigned Size : DWARFContext::getSupportedAddressSizes())
+ Stream << LS << Size;
+ Stream << ')';
+ return make_error<StringError>(Stream.str(), EC);
}
std::shared_ptr<DWARFContext> getDWOContext(StringRef AbsolutePath);
@@ -387,9 +404,12 @@ public:
function_ref<void(Error)> getWarningHandler() { return WarningHandler; }
+ enum class ProcessDebugRelocations { Process, Ignore };
+
static std::unique_ptr<DWARFContext>
- create(const object::ObjectFile &Obj, const LoadedObjectInfo *L = nullptr,
- std::string DWPName = "",
+ create(const object::ObjectFile &Obj,
+ ProcessDebugRelocations RelocAction = ProcessDebugRelocations::Process,
+ const LoadedObjectInfo *L = nullptr, std::string DWPName = "",
std::function<void(Error)> RecoverableErrorHandler =
WithColor::defaultErrorHandler,
std::function<void(Error)> WarningHandler =
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index 0bfe9f376f46..c4370cb54113 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -24,9 +24,11 @@ class DWARFDebugInfoEntry {
/// Offset within the .debug_info of the start of this entry.
uint64_t Offset = 0;
- /// The integer depth of this DIE within the compile unit DIEs where the
- /// compile/type unit DIE has a depth of zero.
- uint32_t Depth = 0;
+ /// Index of the parent die. UINT32_MAX if there is no parent.
+ uint32_t ParentIdx = UINT32_MAX;
+
+ /// Index of the sibling die. Zero if there is no sibling.
+ uint32_t SiblingIdx = 0;
const DWARFAbbreviationDeclaration *AbbrevDecl = nullptr;
@@ -36,15 +38,31 @@ public:
/// Extracts a debug info entry, which is a child of a given unit,
/// starting at a given offset. If DIE can't be extracted, returns false and
/// doesn't change OffsetPtr.
- bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr);
-
/// High performance extraction should use this call.
bool extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
const DWARFDataExtractor &DebugInfoData, uint64_t UEndOffset,
- uint32_t Depth);
+ uint32_t ParentIdx);
uint64_t getOffset() const { return Offset; }
- uint32_t getDepth() const { return Depth; }
+
+ /// Returns index of the parent die.
+ Optional<uint32_t> getParentIdx() const {
+ if (ParentIdx == UINT32_MAX)
+ return None;
+
+ return ParentIdx;
+ }
+
+ /// Returns index of the sibling die.
+ Optional<uint32_t> getSiblingIdx() const {
+ if (SiblingIdx == 0)
+ return None;
+
+ return SiblingIdx;
+ }
+
+ /// Set index of sibling.
+ void setSiblingIdx(uint32_t Idx) { SiblingIdx = Idx; }
dwarf::Tag getTag() const {
return AbbrevDecl ? AbbrevDecl->getTag() : dwarf::DW_TAG_null;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index d1d65372740b..ee15b6d4112d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -110,10 +110,6 @@ public:
/// Length of the prologue in bytes.
uint64_t getLength() const;
- int32_t getMaxLineIncrementForSpecialOpcode() const {
- return LineBase + (int8_t)LineRange - 1;
- }
-
/// Get DWARF-version aware access to the file name entry at the provided
/// index.
const llvm::DWARFDebugLine::FileNameEntry &
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
index 2f72c642a2d5..0d9f37c5610b 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h
@@ -49,12 +49,7 @@ public:
/// 2. An address, which defines the appropriate base address for
/// use in interpreting the beginning and ending address offsets of
/// subsequent entries of the location list.
- bool isBaseAddressSelectionEntry(uint8_t AddressSize) const {
- assert(AddressSize == 4 || AddressSize == 8);
- if (AddressSize == 4)
- return StartAddress == -1U;
- return StartAddress == -1ULL;
- }
+ bool isBaseAddressSelectionEntry(uint8_t AddressSize) const;
};
private:
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
index 1903bab5e73f..8f93ebc4ebc0 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -182,6 +182,8 @@ public:
DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const;
DWARFDie getAttributeValueAsReferencedDie(const DWARFFormValue &V) const;
+ DWARFDie resolveTypeUnitReference() const;
+
/// Extract the range base attribute from this DIE as absolute section offset.
///
/// This is a utility function that checks for either the DW_AT_rnglists_base
@@ -220,16 +222,6 @@ public:
/// information is available.
Expected<DWARFAddressRangesVector> getAddressRanges() const;
- /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any
- /// of its children.
- ///
- /// Get the hi/low PC range if both attributes are available or exrtracts the
- /// non-contiguous address ranges from the DW_AT_ranges attribute for this DIE
- /// and all children.
- ///
- /// \param Ranges the addres range vector to fill in.
- void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const;
-
bool addressRangeContainsAddress(const uint64_t Address) const;
Expected<DWARFLocationExpressionsVector>
@@ -246,6 +238,8 @@ public:
/// for ShortName if LinkageName is not found.
/// Returns null if no name is found.
const char *getName(DINameKind Kind) const;
+ void getFullName(raw_string_ostream &,
+ std::string *OriginalFullName = nullptr) const;
/// Return the DIE short name resolving DW_AT_specification or
/// DW_AT_abstract_origin references if necessary. Returns null if no name
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 794e859bfe72..b694eeacfd9d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -86,24 +86,30 @@ public:
uint64_t OperandEndOffsets[2];
public:
- Description &getDescription() { return Desc; }
- uint8_t getCode() { return Opcode; }
- uint64_t getRawOperand(unsigned Idx) { return Operands[Idx]; }
- uint64_t getOperandEndOffset(unsigned Idx) { return OperandEndOffsets[Idx]; }
- uint64_t getEndOffset() { return EndOffset; }
- bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset,
- Optional<dwarf::DwarfFormat> Format);
- bool isError() { return Error; }
+ const Description &getDescription() const { return Desc; }
+ uint8_t getCode() const { return Opcode; }
+ uint64_t getRawOperand(unsigned Idx) const { return Operands[Idx]; }
+ uint64_t getOperandEndOffset(unsigned Idx) const {
+ return OperandEndOffsets[Idx];
+ }
+ uint64_t getEndOffset() const { return EndOffset; }
+ bool isError() const { return Error; }
bool print(raw_ostream &OS, DIDumpOptions DumpOpts,
const DWARFExpression *Expr, const MCRegisterInfo *RegInfo,
- DWARFUnit *U, bool isEH);
- bool verify(DWARFUnit *U);
+ DWARFUnit *U, bool isEH) const;
+
+ /// Verify \p Op. Does not affect the return of \a isError().
+ static bool verify(const Operation &Op, DWARFUnit *U);
+
+ private:
+ bool extract(DataExtractor Data, uint8_t AddressSize, uint64_t Offset,
+ Optional<dwarf::DwarfFormat> Format);
};
/// An iterator to go through the expression operations.
class iterator
: public iterator_facade_base<iterator, std::forward_iterator_tag,
- Operation> {
+ const Operation> {
friend class DWARFExpression;
const DWARFExpression *Expr;
uint64_t Offset;
@@ -116,19 +122,17 @@ public:
}
public:
- class Operation &operator++() {
+ iterator &operator++() {
Offset = Op.isError() ? Expr->Data.getData().size() : Op.EndOffset;
Op.Error =
Offset >= Expr->Data.getData().size() ||
!Op.extract(Expr->Data, Expr->AddressSize, Offset, Expr->Format);
- return Op;
+ return *this;
}
- class Operation &operator*() {
- return Op;
- }
+ const Operation &operator*() const { return Op; }
- iterator skipBytes(uint64_t Add) {
+ iterator skipBytes(uint64_t Add) const {
return iterator(Expr, Op.EndOffset + Add);
}
@@ -159,6 +163,8 @@ public:
bool operator==(const DWARFExpression &RHS) const;
+ StringRef getData() const { return Data.getData(); }
+
private:
DataExtractor Data;
uint8_t AddressSize;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 43be024f0d37..3c051c3ea018 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -102,10 +102,6 @@ public:
return extractValue(Data, OffsetPtr, FormParams, nullptr, U);
}
- bool isInlinedCStr() const {
- return Value.data != nullptr && Value.data == (const uint8_t *)Value.cstr;
- }
-
/// getAsFoo functions below return the extracted value as Foo if only
/// DWARFFormValue has form class is suitable for representing Foo.
Optional<uint64_t> getAsReference() const;
@@ -123,6 +119,19 @@ public:
Optional<ArrayRef<uint8_t>> getAsBlock() const;
Optional<uint64_t> getAsCStringOffset() const;
Optional<uint64_t> getAsReferenceUVal() const;
+ /// Correctly extract any file paths from a form value.
+ ///
+ /// These attributes can be in the from DW_AT_decl_file or DW_AT_call_file
+ /// attributes. We need to use the file index in the correct DWARFUnit's line
+ /// table prologue, and each DWARFFormValue has the DWARFUnit the form value
+ /// was extracted from.
+ ///
+ /// \param Kind The kind of path to extract.
+ ///
+ /// \returns A valid string value on success, or llvm::None if the form class
+ /// is not FC_Constant, or if the file index is not valid.
+ Optional<std::string>
+ getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const;
/// Skip a form's value in \p DebugInfoData at the offset specified by
/// \p OffsetPtr.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index 93d7e2b563fd..d471b80c7fe1 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -49,8 +49,6 @@ public:
DieRangeInfo(std::vector<DWARFAddressRange> Ranges)
: Ranges(std::move(Ranges)) {}
- typedef std::vector<DWARFAddressRange>::const_iterator
- address_range_iterator;
typedef std::set<DieRangeInfo>::const_iterator die_range_info_iterator;
/// Inserts the address range. If the range overlaps with an existing
@@ -62,16 +60,6 @@ public:
/// children address ranges must all be contained in.
Optional<DWARFAddressRange> insert(const DWARFAddressRange &R);
- /// Finds an address range in the sorted vector of ranges.
- address_range_iterator findRange(const DWARFAddressRange &R) const {
- auto Begin = Ranges.begin();
- auto End = Ranges.end();
- auto Iter = std::upper_bound(Begin, End, R);
- if (Iter != Begin)
- --Iter;
- return Iter;
- }
-
/// Inserts the address range info. If any of its ranges overlaps with a
/// range in an existing range info, the range info is *not* added and an
/// iterator to the overlapping range info.
@@ -91,14 +79,11 @@ private:
raw_ostream &OS;
DWARFContext &DCtx;
DIDumpOptions DumpOpts;
- /// A map that tracks all references (converted absolute references) so we
- /// can verify each reference points to a valid DIE and not an offset that
- /// lies between to valid DIEs.
- std::map<uint64_t, std::set<uint64_t>> ReferenceToDIEOffsets;
uint32_t NumDebugLineErrors = 0;
// Used to relax some checks that do not currently work portably
bool IsObjectFile;
bool IsMachOObject;
+ using ReferenceMap = std::map<uint64_t, std::set<uint64_t>>;
raw_ostream &error() const;
raw_ostream &warn() const;
@@ -140,6 +125,7 @@ private:
bool verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
uint64_t *Offset, unsigned UnitIndex, uint8_t &UnitType,
bool &isUnitDWARF64);
+ bool verifyName(const DWARFDie &Die);
/// Verifies the header of a unit in a .debug_info or .debug_types section.
///
@@ -156,7 +142,9 @@ private:
/// \param Unit The DWARF Unit to verify.
///
/// \returns The number of errors that occurred during verification.
- unsigned verifyUnitContents(DWARFUnit &Unit);
+ unsigned verifyUnitContents(DWARFUnit &Unit,
+ ReferenceMap &UnitLocalReferences,
+ ReferenceMap &CrossUnitReferences);
/// Verifies the unit headers and contents in a .debug_info or .debug_types
/// section.
@@ -208,7 +196,9 @@ private:
///
/// \returns NumErrors The number of errors occurred during verification of
/// attributes' forms in a unit
- unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue);
+ unsigned verifyDebugInfoForm(const DWARFDie &Die, DWARFAttribute &AttrValue,
+ ReferenceMap &UnitLocalReferences,
+ ReferenceMap &CrossUnitReferences);
/// Verifies the all valid references that were found when iterating through
/// all of the DIE attributes.
@@ -220,7 +210,9 @@ private:
///
/// \returns NumErrors The number of errors occurred during verification of
/// references for the .debug_info and .debug_types sections
- unsigned verifyDebugInfoReferences();
+ unsigned verifyDebugInfoReferences(
+ const ReferenceMap &,
+ llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForDieOffset);
/// Verify the DW_AT_stmt_list encoding and value and ensure that no
/// compile units that have the same DW_AT_stmt_list value.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
index f7f800d01647..045c9e3f3ebd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/StringTable.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
#define LLVM_DEBUGINFO_GSYM_STRINGTABLE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/GSYM/Range.h"
#include <stdint.h>
diff --git a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
index 83331b14b8af..a922839a999d 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
@@ -93,6 +93,9 @@ inline bool isValidBlockSize(uint32_t Size) {
case 1024:
case 2048:
case 4096:
+ case 8192:
+ case 16384:
+ case 32768:
return true;
}
return false;
diff --git a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index 473c89e8106f..296a4840b779 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -58,12 +58,12 @@ public:
return support::little;
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override;
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override;
- uint32_t getLength() override;
+ uint64_t getLength() override;
BumpPtrAllocator &getAllocator() { return Allocator; }
@@ -79,10 +79,10 @@ protected:
private:
const MSFStreamLayout &getStreamLayout() const { return StreamLayout; }
- void fixCacheAfterWrite(uint32_t Offset, ArrayRef<uint8_t> Data) const;
+ void fixCacheAfterWrite(uint64_t Offset, ArrayRef<uint8_t> Data) const;
- Error readBytes(uint32_t Offset, MutableArrayRef<uint8_t> Buffer);
- bool tryReadContiguously(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, MutableArrayRef<uint8_t> Buffer);
+ bool tryReadContiguously(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer);
const uint32_t BlockSize;
@@ -125,13 +125,13 @@ public:
return support::little;
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override;
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override;
- uint32_t getLength() override;
+ uint64_t getLength() override;
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override;
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override;
Error commit() override;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
index 5fb13ad30e91..de5b46f21672 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h
@@ -31,9 +31,7 @@ struct FileInfoSubstreamHeader;
class DbiModuleSourceFilesIterator
: public iterator_facade_base<DbiModuleSourceFilesIterator,
std::random_access_iterator_tag, StringRef> {
- using BaseType =
- iterator_facade_base<DbiModuleSourceFilesIterator,
- std::random_access_iterator_tag, StringRef>;
+ using BaseType = typename DbiModuleSourceFilesIterator::iterator_facade_base;
public:
DbiModuleSourceFilesIterator(const DbiModuleList &Modules, uint32_t Modi,
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
index 95c0a89551ed..474bd796b2b3 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h
@@ -38,6 +38,7 @@ class HashTableIterator
: public iterator_facade_base<HashTableIterator<ValueT>,
std::forward_iterator_tag,
const std::pair<uint32_t, ValueT>> {
+ using BaseT = typename HashTableIterator::iterator_facade_base;
friend HashTable<ValueT>;
HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
@@ -76,9 +77,7 @@ public:
// Implement postfix op++ in terms of prefix op++ by using the superclass
// implementation.
- using iterator_facade_base<HashTableIterator<ValueT>,
- std::forward_iterator_tag,
- const std::pair<uint32_t, ValueT>>::operator++;
+ using BaseT::operator++;
HashTableIterator &operator++() {
while (Index < Map->Buckets.size()) {
++Index;
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
index 1df059ffa9fd..f110e90b3f90 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NAMEDSTREAMMAP_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
index 5dedc70f11ba..be0ddf0a063a 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVELINENUMBER_H
-#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/CodeView/Line.h"
#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
index 8f1834d0a2c2..90b5d8068959 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEFUNCTIONSIG_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
index 4ae8f1471781..21995ca665c1 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h
@@ -9,7 +9,6 @@
#ifndef LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
#define LLVM_DEBUGINFO_PDB_NATIVE_NATIVETYPEVTSHAPE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/PDB/Native/NativeRawSymbol.h"
diff --git a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
index 3c414e7a9005..004d005280d4 100644
--- a/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
+++ b/llvm/include/llvm/DebugInfo/PDB/Native/PDBFileBuilder.h
@@ -10,7 +10,6 @@
#define LLVM_DEBUGINFO_PDB_NATIVE_PDBFILEBUILDER_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index c396a1dc5dd3..3150e049320b 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -31,7 +31,6 @@ enum : int {
char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
int *status);
-
enum MSDemangleFlags {
MSDF_None = 0,
MSDF_DumpBackrefs = 1 << 0,
@@ -39,6 +38,7 @@ enum MSDemangleFlags {
MSDF_NoCallingConvention = 1 << 2,
MSDF_NoReturnType = 1 << 3,
MSDF_NoMemberType = 1 << 4,
+ MSDF_NoVariableType = 1 << 5,
};
/// Demangles the Microsoft symbol pointed at by mangled_name and returns it.
@@ -53,13 +53,16 @@ enum MSDemangleFlags {
/// receives the size of the demangled string on output if n_buf is not nullptr.
/// status receives one of the demangle_ enum entries above if it's not nullptr.
/// Flags controls various details of the demangled representation.
-char *microsoftDemangle(const char *mangled_name, size_t *n_read,
- char *buf, size_t *n_buf,
- int *status, MSDemangleFlags Flags = MSDF_None);
+char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf,
+ size_t *n_buf, int *status,
+ MSDemangleFlags Flags = MSDF_None);
// Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle.
char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
+// Demangles a D mangled symbol.
+char *dlangDemangle(const char *MangledName);
+
/// Attempt to demangle a string using different demangling schemes.
/// The function uses heuristics to determine which demangling scheme to use.
/// \param MangledName - reference to string to demangle.
@@ -67,6 +70,8 @@ char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status);
/// demangling occurred.
std::string demangle(const std::string &MangledName);
+bool nonMicrosoftDemangle(const char *MangledName, std::string &Result);
+
/// "Partial" demangler. This supports demangling a string into an AST
/// (typically an intermediate stage in itaniumDemangle) and querying certain
/// properties or partially printing the demangled name.
@@ -118,6 +123,7 @@ struct ItaniumPartialDemangler {
bool isSpecialName() const;
~ItaniumPartialDemangler();
+
private:
void *RootNode;
void *Context;
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 9163b713d118..86f5c992b63d 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -57,6 +57,7 @@
X(LocalName) \
X(VectorType) \
X(PixelVectorType) \
+ X(BinaryFPType) \
X(SyntheticTemplateParamName) \
X(TypeTemplateParamDecl) \
X(NonTypeTemplateParamDecl) \
@@ -109,6 +110,126 @@
DEMANGLE_NAMESPACE_BEGIN
+template <class T, size_t N> class PODSmallVector {
+ static_assert(std::is_pod<T>::value,
+ "T is required to be a plain old data type");
+
+ T *First = nullptr;
+ T *Last = nullptr;
+ T *Cap = nullptr;
+ T Inline[N] = {0};
+
+ bool isInline() const { return First == Inline; }
+
+ void clearInline() {
+ First = Inline;
+ Last = Inline;
+ Cap = Inline + N;
+ }
+
+ void reserve(size_t NewCap) {
+ size_t S = size();
+ if (isInline()) {
+ auto *Tmp = static_cast<T *>(std::malloc(NewCap * sizeof(T)));
+ if (Tmp == nullptr)
+ std::terminate();
+ std::copy(First, Last, Tmp);
+ First = Tmp;
+ } else {
+ First = static_cast<T *>(std::realloc(First, NewCap * sizeof(T)));
+ if (First == nullptr)
+ std::terminate();
+ }
+ Last = First + S;
+ Cap = First + NewCap;
+ }
+
+public:
+ PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
+
+ PODSmallVector(const PODSmallVector &) = delete;
+ PODSmallVector &operator=(const PODSmallVector &) = delete;
+
+ PODSmallVector(PODSmallVector &&Other) : PODSmallVector() {
+ if (Other.isInline()) {
+ std::copy(Other.begin(), Other.end(), First);
+ Last = First + Other.size();
+ Other.clear();
+ return;
+ }
+
+ First = Other.First;
+ Last = Other.Last;
+ Cap = Other.Cap;
+ Other.clearInline();
+ }
+
+ PODSmallVector &operator=(PODSmallVector &&Other) {
+ if (Other.isInline()) {
+ if (!isInline()) {
+ std::free(First);
+ clearInline();
+ }
+ std::copy(Other.begin(), Other.end(), First);
+ Last = First + Other.size();
+ Other.clear();
+ return *this;
+ }
+
+ if (isInline()) {
+ First = Other.First;
+ Last = Other.Last;
+ Cap = Other.Cap;
+ Other.clearInline();
+ return *this;
+ }
+
+ std::swap(First, Other.First);
+ std::swap(Last, Other.Last);
+ std::swap(Cap, Other.Cap);
+ Other.clear();
+ return *this;
+ }
+
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void push_back(const T &Elem) {
+ if (Last == Cap)
+ reserve(size() * 2);
+ *Last++ = Elem;
+ }
+
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void pop_back() {
+ assert(Last != First && "Popping empty vector!");
+ --Last;
+ }
+
+ void dropBack(size_t Index) {
+ assert(Index <= size() && "dropBack() can't expand!");
+ Last = First + Index;
+ }
+
+ T *begin() { return First; }
+ T *end() { return Last; }
+
+ bool empty() const { return First == Last; }
+ size_t size() const { return static_cast<size_t>(Last - First); }
+ T &back() {
+ assert(Last != First && "Calling back() on empty vector!");
+ return *(Last - 1);
+ }
+ T &operator[](size_t Index) {
+ assert(Index < size() && "Invalid access!");
+ return *(begin() + Index);
+ }
+ void clear() { Last = First; }
+
+ ~PODSmallVector() {
+ if (!isInline())
+ std::free(First);
+ }
+};
+
// Base class of all AST nodes. The AST is built by the parser, then is
// traversed by the printLeft/Right functions to produce a demangled string.
class Node {
@@ -155,50 +276,48 @@ public:
// would construct an equivalent node.
//template<typename Fn> void match(Fn F) const;
- bool hasRHSComponent(OutputStream &S) const {
+ bool hasRHSComponent(OutputBuffer &OB) const {
if (RHSComponentCache != Cache::Unknown)
return RHSComponentCache == Cache::Yes;
- return hasRHSComponentSlow(S);
+ return hasRHSComponentSlow(OB);
}
- bool hasArray(OutputStream &S) const {
+ bool hasArray(OutputBuffer &OB) const {
if (ArrayCache != Cache::Unknown)
return ArrayCache == Cache::Yes;
- return hasArraySlow(S);
+ return hasArraySlow(OB);
}
- bool hasFunction(OutputStream &S) const {
+ bool hasFunction(OutputBuffer &OB) const {
if (FunctionCache != Cache::Unknown)
return FunctionCache == Cache::Yes;
- return hasFunctionSlow(S);
+ return hasFunctionSlow(OB);
}
Kind getKind() const { return K; }
- virtual bool hasRHSComponentSlow(OutputStream &) const { return false; }
- virtual bool hasArraySlow(OutputStream &) const { return false; }
- virtual bool hasFunctionSlow(OutputStream &) const { return false; }
+ virtual bool hasRHSComponentSlow(OutputBuffer &) const { return false; }
+ virtual bool hasArraySlow(OutputBuffer &) const { return false; }
+ virtual bool hasFunctionSlow(OutputBuffer &) const { return false; }
// Dig through "glue" nodes like ParameterPack and ForwardTemplateReference to
// get at a node that actually represents some concrete syntax.
- virtual const Node *getSyntaxNode(OutputStream &) const {
- return this;
- }
+ virtual const Node *getSyntaxNode(OutputBuffer &) const { return this; }
- void print(OutputStream &S) const {
- printLeft(S);
+ void print(OutputBuffer &OB) const {
+ printLeft(OB);
if (RHSComponentCache != Cache::No)
- printRight(S);
+ printRight(OB);
}
- // Print the "left" side of this Node into OutputStream.
- virtual void printLeft(OutputStream &) const = 0;
+ // Print the "left" side of this Node into OutputBuffer.
+ virtual void printLeft(OutputBuffer &) const = 0;
// Print the "right". This distinction is necessary to represent C++ types
// that appear on the RHS of their subtype, such as arrays or functions.
// Since most types don't have such a component, provide a default
// implementation.
- virtual void printRight(OutputStream &) const {}
+ virtual void printRight(OutputBuffer &) const {}
virtual StringView getBaseName() const { return StringView(); }
@@ -227,19 +346,19 @@ public:
Node *operator[](size_t Idx) const { return Elements[Idx]; }
- void printWithComma(OutputStream &S) const {
+ void printWithComma(OutputBuffer &OB) const {
bool FirstElement = true;
for (size_t Idx = 0; Idx != NumElements; ++Idx) {
- size_t BeforeComma = S.getCurrentPosition();
+ size_t BeforeComma = OB.getCurrentPosition();
if (!FirstElement)
- S += ", ";
- size_t AfterComma = S.getCurrentPosition();
- Elements[Idx]->print(S);
+ OB += ", ";
+ size_t AfterComma = OB.getCurrentPosition();
+ Elements[Idx]->print(OB);
// Elements[Idx] is an empty parameter pack expansion, we should erase the
// comma we just printed.
- if (AfterComma == S.getCurrentPosition()) {
- S.setCurrentPosition(BeforeComma);
+ if (AfterComma == OB.getCurrentPosition()) {
+ OB.setCurrentPosition(BeforeComma);
continue;
}
@@ -254,9 +373,7 @@ struct NodeArrayNode : Node {
template<typename Fn> void match(Fn F) const { F(Array); }
- void printLeft(OutputStream &S) const override {
- Array.printWithComma(S);
- }
+ void printLeft(OutputBuffer &OB) const override { Array.printWithComma(OB); }
};
class DotSuffix final : public Node {
@@ -269,11 +386,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Prefix, Suffix); }
- void printLeft(OutputStream &s) const override {
- Prefix->print(s);
- s += " (";
- s += Suffix;
- s += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ Prefix->print(OB);
+ OB += " (";
+ OB += Suffix;
+ OB += ")";
}
};
@@ -288,12 +405,12 @@ public:
template <typename Fn> void match(Fn F) const { F(Ty, Ext, TA); }
- void printLeft(OutputStream &S) const override {
- Ty->print(S);
- S += " ";
- S += Ext;
+ void printLeft(OutputBuffer &OB) const override {
+ Ty->print(OB);
+ OB += " ";
+ OB += Ext;
if (TA != nullptr)
- TA->print(S);
+ TA->print(OB);
}
};
@@ -319,13 +436,13 @@ protected:
const Qualifiers Quals;
const Node *Child;
- void printQuals(OutputStream &S) const {
+ void printQuals(OutputBuffer &OB) const {
if (Quals & QualConst)
- S += " const";
+ OB += " const";
if (Quals & QualVolatile)
- S += " volatile";
+ OB += " volatile";
if (Quals & QualRestrict)
- S += " restrict";
+ OB += " restrict";
}
public:
@@ -336,22 +453,22 @@ public:
template<typename Fn> void match(Fn F) const { F(Child, Quals); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Child->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return Child->hasRHSComponent(OB);
}
- bool hasArraySlow(OutputStream &S) const override {
- return Child->hasArray(S);
+ bool hasArraySlow(OutputBuffer &OB) const override {
+ return Child->hasArray(OB);
}
- bool hasFunctionSlow(OutputStream &S) const override {
- return Child->hasFunction(S);
+ bool hasFunctionSlow(OutputBuffer &OB) const override {
+ return Child->hasFunction(OB);
}
- void printLeft(OutputStream &S) const override {
- Child->printLeft(S);
- printQuals(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Child->printLeft(OB);
+ printQuals(OB);
}
- void printRight(OutputStream &S) const override { Child->printRight(S); }
+ void printRight(OutputBuffer &OB) const override { Child->printRight(OB); }
};
class ConversionOperatorType final : public Node {
@@ -363,9 +480,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty); }
- void printLeft(OutputStream &S) const override {
- S += "operator ";
- Ty->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "operator ";
+ Ty->print(OB);
}
};
@@ -379,9 +496,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty, Postfix); }
- void printLeft(OutputStream &s) const override {
- Ty->printLeft(s);
- s += Postfix;
+ void printLeft(OutputBuffer &OB) const override {
+ Ty->printLeft(OB);
+ OB += Postfix;
}
};
@@ -396,7 +513,7 @@ public:
StringView getName() const { return Name; }
StringView getBaseName() const override { return Name; }
- void printLeft(OutputStream &s) const override { s += Name; }
+ void printLeft(OutputBuffer &OB) const override { OB += Name; }
};
class ElaboratedTypeSpefType : public Node {
@@ -408,10 +525,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Kind, Child); }
- void printLeft(OutputStream &S) const override {
- S += Kind;
- S += ' ';
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Kind;
+ OB += ' ';
+ Child->print(OB);
}
};
@@ -426,11 +543,11 @@ struct AbiTagAttr : Node {
template<typename Fn> void match(Fn F) const { F(Base, Tag); }
- void printLeft(OutputStream &S) const override {
- Base->printLeft(S);
- S += "[abi:";
- S += Tag;
- S += "]";
+ void printLeft(OutputBuffer &OB) const override {
+ Base->printLeft(OB);
+ OB += "[abi:";
+ OB += Tag;
+ OB += "]";
}
};
@@ -442,10 +559,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Conditions); }
- void printLeft(OutputStream &S) const override {
- S += " [enable_if:";
- Conditions.printWithComma(S);
- S += ']';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += " [enable_if:";
+ Conditions.printWithComma(OB);
+ OB += ']';
}
};
@@ -466,11 +583,11 @@ public:
static_cast<const NameType *>(Ty)->getName() == "objc_object";
}
- void printLeft(OutputStream &S) const override {
- Ty->print(S);
- S += "<";
- S += Protocol;
- S += ">";
+ void printLeft(OutputBuffer &OB) const override {
+ Ty->print(OB);
+ OB += "<";
+ OB += Protocol;
+ OB += ">";
}
};
@@ -484,34 +601,34 @@ public:
template<typename Fn> void match(Fn F) const { F(Pointee); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Pointee->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return Pointee->hasRHSComponent(OB);
}
- void printLeft(OutputStream &s) const override {
+ void printLeft(OutputBuffer &OB) const override {
// We rewrite objc_object<SomeProtocol>* into id<SomeProtocol>.
if (Pointee->getKind() != KObjCProtoName ||
!static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
- Pointee->printLeft(s);
- if (Pointee->hasArray(s))
- s += " ";
- if (Pointee->hasArray(s) || Pointee->hasFunction(s))
- s += "(";
- s += "*";
+ Pointee->printLeft(OB);
+ if (Pointee->hasArray(OB))
+ OB += " ";
+ if (Pointee->hasArray(OB) || Pointee->hasFunction(OB))
+ OB += "(";
+ OB += "*";
} else {
const auto *objcProto = static_cast<const ObjCProtoName *>(Pointee);
- s += "id<";
- s += objcProto->Protocol;
- s += ">";
+ OB += "id<";
+ OB += objcProto->Protocol;
+ OB += ">";
}
}
- void printRight(OutputStream &s) const override {
+ void printRight(OutputBuffer &OB) const override {
if (Pointee->getKind() != KObjCProtoName ||
!static_cast<const ObjCProtoName *>(Pointee)->isObjCObject()) {
- if (Pointee->hasArray(s) || Pointee->hasFunction(s))
- s += ")";
- Pointee->printRight(s);
+ if (Pointee->hasArray(OB) || Pointee->hasFunction(OB))
+ OB += ")";
+ Pointee->printRight(OB);
}
}
};
@@ -531,15 +648,30 @@ class ReferenceType : public Node {
// Dig through any refs to refs, collapsing the ReferenceTypes as we go. The
// rule here is rvalue ref to rvalue ref collapses to a rvalue ref, and any
// other combination collapses to a lvalue ref.
- std::pair<ReferenceKind, const Node *> collapse(OutputStream &S) const {
+ //
+ // A combination of a TemplateForwardReference and a back-ref Substitution
+ // from an ill-formed string may have created a cycle; use cycle detection to
+ // avoid looping forever.
+ std::pair<ReferenceKind, const Node *> collapse(OutputBuffer &OB) const {
auto SoFar = std::make_pair(RK, Pointee);
+ // Track the chain of nodes for the Floyd's 'tortoise and hare'
+ // cycle-detection algorithm, since getSyntaxNode(S) is impure
+ PODSmallVector<const Node *, 8> Prev;
for (;;) {
- const Node *SN = SoFar.second->getSyntaxNode(S);
+ const Node *SN = SoFar.second->getSyntaxNode(OB);
if (SN->getKind() != KReferenceType)
break;
auto *RT = static_cast<const ReferenceType *>(SN);
SoFar.second = RT->Pointee;
SoFar.first = std::min(SoFar.first, RT->RK);
+
+ // The middle of Prev is the 'slow' pointer moving at half speed
+ Prev.push_back(SoFar.second);
+ if (Prev.size() > 1 && SoFar.second == Prev[(Prev.size() - 1) / 2]) {
+ // Cycle detected
+ SoFar.second = nullptr;
+ break;
+ }
}
return SoFar;
}
@@ -551,31 +683,35 @@ public:
template<typename Fn> void match(Fn F) const { F(Pointee, RK); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return Pointee->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return Pointee->hasRHSComponent(OB);
}
- void printLeft(OutputStream &s) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
- Collapsed.second->printLeft(s);
- if (Collapsed.second->hasArray(s))
- s += " ";
- if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
- s += "(";
+ std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
+ if (!Collapsed.second)
+ return;
+ Collapsed.second->printLeft(OB);
+ if (Collapsed.second->hasArray(OB))
+ OB += " ";
+ if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB))
+ OB += "(";
- s += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
+ OB += (Collapsed.first == ReferenceKind::LValue ? "&" : "&&");
}
- void printRight(OutputStream &s) const override {
+ void printRight(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- std::pair<ReferenceKind, const Node *> Collapsed = collapse(s);
- if (Collapsed.second->hasArray(s) || Collapsed.second->hasFunction(s))
- s += ")";
- Collapsed.second->printRight(s);
+ std::pair<ReferenceKind, const Node *> Collapsed = collapse(OB);
+ if (!Collapsed.second)
+ return;
+ if (Collapsed.second->hasArray(OB) || Collapsed.second->hasFunction(OB))
+ OB += ")";
+ Collapsed.second->printRight(OB);
}
};
@@ -590,24 +726,24 @@ public:
template<typename Fn> void match(Fn F) const { F(ClassType, MemberType); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- return MemberType->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ return MemberType->hasRHSComponent(OB);
}
- void printLeft(OutputStream &s) const override {
- MemberType->printLeft(s);
- if (MemberType->hasArray(s) || MemberType->hasFunction(s))
- s += "(";
+ void printLeft(OutputBuffer &OB) const override {
+ MemberType->printLeft(OB);
+ if (MemberType->hasArray(OB) || MemberType->hasFunction(OB))
+ OB += "(";
else
- s += " ";
- ClassType->print(s);
- s += "::*";
+ OB += " ";
+ ClassType->print(OB);
+ OB += "::*";
}
- void printRight(OutputStream &s) const override {
- if (MemberType->hasArray(s) || MemberType->hasFunction(s))
- s += ")";
- MemberType->printRight(s);
+ void printRight(OutputBuffer &OB) const override {
+ if (MemberType->hasArray(OB) || MemberType->hasFunction(OB))
+ OB += ")";
+ MemberType->printRight(OB);
}
};
@@ -624,19 +760,19 @@ public:
template<typename Fn> void match(Fn F) const { F(Base, Dimension); }
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasArraySlow(OutputStream &) const override { return true; }
+ bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+ bool hasArraySlow(OutputBuffer &) const override { return true; }
- void printLeft(OutputStream &S) const override { Base->printLeft(S); }
+ void printLeft(OutputBuffer &OB) const override { Base->printLeft(OB); }
- void printRight(OutputStream &S) const override {
- if (S.back() != ']')
- S += " ";
- S += "[";
+ void printRight(OutputBuffer &OB) const override {
+ if (OB.back() != ']')
+ OB += " ";
+ OB += "[";
if (Dimension)
- Dimension->print(S);
- S += "]";
- Base->printRight(S);
+ Dimension->print(OB);
+ OB += "]";
+ Base->printRight(OB);
}
};
@@ -660,8 +796,8 @@ public:
F(Ret, Params, CVQuals, RefQual, ExceptionSpec);
}
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasFunctionSlow(OutputStream &) const override { return true; }
+ bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+ bool hasFunctionSlow(OutputBuffer &) const override { return true; }
// Handle C++'s ... quirky decl grammar by using the left & right
// distinction. Consider:
@@ -670,32 +806,32 @@ public:
// that takes a char and returns an int. If we're trying to print f, start
// by printing out the return types's left, then print our parameters, then
// finally print right of the return type.
- void printLeft(OutputStream &S) const override {
- Ret->printLeft(S);
- S += " ";
+ void printLeft(OutputBuffer &OB) const override {
+ Ret->printLeft(OB);
+ OB += " ";
}
- void printRight(OutputStream &S) const override {
- S += "(";
- Params.printWithComma(S);
- S += ")";
- Ret->printRight(S);
+ void printRight(OutputBuffer &OB) const override {
+ OB += "(";
+ Params.printWithComma(OB);
+ OB += ")";
+ Ret->printRight(OB);
if (CVQuals & QualConst)
- S += " const";
+ OB += " const";
if (CVQuals & QualVolatile)
- S += " volatile";
+ OB += " volatile";
if (CVQuals & QualRestrict)
- S += " restrict";
+ OB += " restrict";
if (RefQual == FrefQualLValue)
- S += " &";
+ OB += " &";
else if (RefQual == FrefQualRValue)
- S += " &&";
+ OB += " &&";
if (ExceptionSpec != nullptr) {
- S += ' ';
- ExceptionSpec->print(S);
+ OB += ' ';
+ ExceptionSpec->print(OB);
}
}
};
@@ -707,10 +843,10 @@ public:
template<typename Fn> void match(Fn F) const { F(E); }
- void printLeft(OutputStream &S) const override {
- S += "noexcept(";
- E->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "noexcept(";
+ E->print(OB);
+ OB += ")";
}
};
@@ -722,10 +858,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Types); }
- void printLeft(OutputStream &S) const override {
- S += "throw(";
- Types.printWithComma(S);
- S += ')';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "throw(";
+ Types.printWithComma(OB);
+ OB += ')';
}
};
@@ -756,41 +892,41 @@ public:
NodeArray getParams() const { return Params; }
const Node *getReturnType() const { return Ret; }
- bool hasRHSComponentSlow(OutputStream &) const override { return true; }
- bool hasFunctionSlow(OutputStream &) const override { return true; }
+ bool hasRHSComponentSlow(OutputBuffer &) const override { return true; }
+ bool hasFunctionSlow(OutputBuffer &) const override { return true; }
const Node *getName() const { return Name; }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Ret) {
- Ret->printLeft(S);
- if (!Ret->hasRHSComponent(S))
- S += " ";
+ Ret->printLeft(OB);
+ if (!Ret->hasRHSComponent(OB))
+ OB += " ";
}
- Name->print(S);
+ Name->print(OB);
}
- void printRight(OutputStream &S) const override {
- S += "(";
- Params.printWithComma(S);
- S += ")";
+ void printRight(OutputBuffer &OB) const override {
+ OB += "(";
+ Params.printWithComma(OB);
+ OB += ")";
if (Ret)
- Ret->printRight(S);
+ Ret->printRight(OB);
if (CVQuals & QualConst)
- S += " const";
+ OB += " const";
if (CVQuals & QualVolatile)
- S += " volatile";
+ OB += " volatile";
if (CVQuals & QualRestrict)
- S += " restrict";
+ OB += " restrict";
if (RefQual == FrefQualLValue)
- S += " &";
+ OB += " &";
else if (RefQual == FrefQualRValue)
- S += " &&";
+ OB += " &&";
if (Attrs != nullptr)
- Attrs->print(S);
+ Attrs->print(OB);
}
};
@@ -803,9 +939,9 @@ public:
template<typename Fn> void match(Fn F) const { F(OpName); }
- void printLeft(OutputStream &S) const override {
- S += "operator\"\" ";
- OpName->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "operator\"\" ";
+ OpName->print(OB);
}
};
@@ -819,9 +955,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Special, Child); }
- void printLeft(OutputStream &S) const override {
- S += Special;
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Special;
+ Child->print(OB);
}
};
@@ -836,11 +972,11 @@ public:
template<typename Fn> void match(Fn F) const { F(FirstType, SecondType); }
- void printLeft(OutputStream &S) const override {
- S += "construction vtable for ";
- FirstType->print(S);
- S += "-in-";
- SecondType->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "construction vtable for ";
+ FirstType->print(OB);
+ OB += "-in-";
+ SecondType->print(OB);
}
};
@@ -855,10 +991,10 @@ struct NestedName : Node {
StringView getBaseName() const override { return Name->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- Qual->print(S);
- S += "::";
- Name->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Qual->print(OB);
+ OB += "::";
+ Name->print(OB);
}
};
@@ -871,10 +1007,10 @@ struct LocalName : Node {
template<typename Fn> void match(Fn F) const { F(Encoding, Entity); }
- void printLeft(OutputStream &S) const override {
- Encoding->print(S);
- S += "::";
- Entity->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Encoding->print(OB);
+ OB += "::";
+ Entity->print(OB);
}
};
@@ -891,10 +1027,10 @@ public:
StringView getBaseName() const override { return Name->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- Qualifier->print(S);
- S += "::";
- Name->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Qualifier->print(OB);
+ OB += "::";
+ Name->print(OB);
}
};
@@ -909,12 +1045,12 @@ public:
template<typename Fn> void match(Fn F) const { F(BaseType, Dimension); }
- void printLeft(OutputStream &S) const override {
- BaseType->print(S);
- S += " vector[";
+ void printLeft(OutputBuffer &OB) const override {
+ BaseType->print(OB);
+ OB += " vector[";
if (Dimension)
- Dimension->print(S);
- S += "]";
+ Dimension->print(OB);
+ OB += "]";
}
};
@@ -927,11 +1063,26 @@ public:
template<typename Fn> void match(Fn F) const { F(Dimension); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
// FIXME: This should demangle as "vector pixel".
- S += "pixel vector[";
- Dimension->print(S);
- S += "]";
+ OB += "pixel vector[";
+ Dimension->print(OB);
+ OB += "]";
+ }
+};
+
+class BinaryFPType final : public Node {
+ const Node *Dimension;
+
+public:
+ BinaryFPType(const Node *Dimension_)
+ : Node(KBinaryFPType), Dimension(Dimension_) {}
+
+ template<typename Fn> void match(Fn F) const { F(Dimension); }
+
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "_Float";
+ Dimension->print(OB);
}
};
@@ -953,20 +1104,20 @@ public:
template<typename Fn> void match(Fn F) const { F(Kind, Index); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
switch (Kind) {
case TemplateParamKind::Type:
- S += "$T";
+ OB += "$T";
break;
case TemplateParamKind::NonType:
- S += "$N";
+ OB += "$N";
break;
case TemplateParamKind::Template:
- S += "$TT";
+ OB += "$TT";
break;
}
if (Index > 0)
- S << Index - 1;
+ OB << Index - 1;
}
};
@@ -980,13 +1131,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Name); }
- void printLeft(OutputStream &S) const override {
- S += "typename ";
- }
+ void printLeft(OutputBuffer &OB) const override { OB += "typename "; }
- void printRight(OutputStream &S) const override {
- Name->print(S);
- }
+ void printRight(OutputBuffer &OB) const override { Name->print(OB); }
};
/// A non-type template parameter declaration, 'int N'.
@@ -1000,15 +1147,15 @@ public:
template<typename Fn> void match(Fn F) const { F(Name, Type); }
- void printLeft(OutputStream &S) const override {
- Type->printLeft(S);
- if (!Type->hasRHSComponent(S))
- S += " ";
+ void printLeft(OutputBuffer &OB) const override {
+ Type->printLeft(OB);
+ if (!Type->hasRHSComponent(OB))
+ OB += " ";
}
- void printRight(OutputStream &S) const override {
- Name->print(S);
- Type->printRight(S);
+ void printRight(OutputBuffer &OB) const override {
+ Name->print(OB);
+ Type->printRight(OB);
}
};
@@ -1025,15 +1172,13 @@ public:
template<typename Fn> void match(Fn F) const { F(Name, Params); }
- void printLeft(OutputStream &S) const override {
- S += "template<";
- Params.printWithComma(S);
- S += "> typename ";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "template<";
+ Params.printWithComma(OB);
+ OB += "> typename ";
}
- void printRight(OutputStream &S) const override {
- Name->print(S);
- }
+ void printRight(OutputBuffer &OB) const override { Name->print(OB); }
};
/// A template parameter pack declaration, 'typename ...T'.
@@ -1046,14 +1191,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Param); }
- void printLeft(OutputStream &S) const override {
- Param->printLeft(S);
- S += "...";
+ void printLeft(OutputBuffer &OB) const override {
+ Param->printLeft(OB);
+ OB += "...";
}
- void printRight(OutputStream &S) const override {
- Param->printRight(S);
- }
+ void printRight(OutputBuffer &OB) const override { Param->printRight(OB); }
};
/// An unexpanded parameter pack (either in the expression or type context). If
@@ -1067,11 +1210,11 @@ public:
class ParameterPack final : public Node {
NodeArray Data;
- // Setup OutputStream for a pack expansion unless we're already expanding one.
- void initializePackExpansion(OutputStream &S) const {
- if (S.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
- S.CurrentPackMax = static_cast<unsigned>(Data.size());
- S.CurrentPackIndex = 0;
+ // Setup OutputBuffer for a pack expansion unless we're already expanding one.
+ void initializePackExpansion(OutputBuffer &OB) const {
+ if (OB.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
+ OB.CurrentPackMax = static_cast<unsigned>(Data.size());
+ OB.CurrentPackIndex = 0;
}
}
@@ -1094,38 +1237,38 @@ public:
template<typename Fn> void match(Fn F) const { F(Data); }
- bool hasRHSComponentSlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasRHSComponent(S);
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasRHSComponent(OB);
}
- bool hasArraySlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasArray(S);
+ bool hasArraySlow(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasArray(OB);
}
- bool hasFunctionSlow(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() && Data[Idx]->hasFunction(S);
+ bool hasFunctionSlow(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() && Data[Idx]->hasFunction(OB);
}
- const Node *getSyntaxNode(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
- return Idx < Data.size() ? Data[Idx]->getSyntaxNode(S) : this;
+ const Node *getSyntaxNode(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
+ return Idx < Data.size() ? Data[Idx]->getSyntaxNode(OB) : this;
}
- void printLeft(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
+ void printLeft(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
if (Idx < Data.size())
- Data[Idx]->printLeft(S);
+ Data[Idx]->printLeft(OB);
}
- void printRight(OutputStream &S) const override {
- initializePackExpansion(S);
- size_t Idx = S.CurrentPackIndex;
+ void printRight(OutputBuffer &OB) const override {
+ initializePackExpansion(OB);
+ size_t Idx = OB.CurrentPackIndex;
if (Idx < Data.size())
- Data[Idx]->printRight(S);
+ Data[Idx]->printRight(OB);
}
};
@@ -1144,8 +1287,8 @@ public:
NodeArray getElements() const { return Elements; }
- void printLeft(OutputStream &S) const override {
- Elements.printWithComma(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Elements.printWithComma(OB);
}
};
@@ -1162,35 +1305,35 @@ public:
const Node *getChild() const { return Child; }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
constexpr unsigned Max = std::numeric_limits<unsigned>::max();
- SwapAndRestore<unsigned> SavePackIdx(S.CurrentPackIndex, Max);
- SwapAndRestore<unsigned> SavePackMax(S.CurrentPackMax, Max);
- size_t StreamPos = S.getCurrentPosition();
+ SwapAndRestore<unsigned> SavePackIdx(OB.CurrentPackIndex, Max);
+ SwapAndRestore<unsigned> SavePackMax(OB.CurrentPackMax, Max);
+ size_t StreamPos = OB.getCurrentPosition();
// Print the first element in the pack. If Child contains a ParameterPack,
// it will set up S.CurrentPackMax and print the first element.
- Child->print(S);
+ Child->print(OB);
// No ParameterPack was found in Child. This can occur if we've found a pack
// expansion on a <function-param>.
- if (S.CurrentPackMax == Max) {
- S += "...";
+ if (OB.CurrentPackMax == Max) {
+ OB += "...";
return;
}
// We found a ParameterPack, but it has no elements. Erase whatever we may
// of printed.
- if (S.CurrentPackMax == 0) {
- S.setCurrentPosition(StreamPos);
+ if (OB.CurrentPackMax == 0) {
+ OB.setCurrentPosition(StreamPos);
return;
}
// Else, iterate through the rest of the elements in the pack.
- for (unsigned I = 1, E = S.CurrentPackMax; I < E; ++I) {
- S += ", ";
- S.CurrentPackIndex = I;
- Child->print(S);
+ for (unsigned I = 1, E = OB.CurrentPackMax; I < E; ++I) {
+ OB += ", ";
+ OB.CurrentPackIndex = I;
+ Child->print(OB);
}
}
};
@@ -1205,12 +1348,12 @@ public:
NodeArray getParams() { return Params; }
- void printLeft(OutputStream &S) const override {
- S += "<";
- Params.printWithComma(S);
- if (S.back() == '>')
- S += " ";
- S += ">";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "<";
+ Params.printWithComma(OB);
+ if (OB.back() == '>')
+ OB += " ";
+ OB += ">";
}
};
@@ -1252,42 +1395,42 @@ struct ForwardTemplateReference : Node {
// special handling.
template<typename Fn> void match(Fn F) const = delete;
- bool hasRHSComponentSlow(OutputStream &S) const override {
+ bool hasRHSComponentSlow(OutputBuffer &OB) const override {
if (Printing)
return false;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasRHSComponent(S);
+ return Ref->hasRHSComponent(OB);
}
- bool hasArraySlow(OutputStream &S) const override {
+ bool hasArraySlow(OutputBuffer &OB) const override {
if (Printing)
return false;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasArray(S);
+ return Ref->hasArray(OB);
}
- bool hasFunctionSlow(OutputStream &S) const override {
+ bool hasFunctionSlow(OutputBuffer &OB) const override {
if (Printing)
return false;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->hasFunction(S);
+ return Ref->hasFunction(OB);
}
- const Node *getSyntaxNode(OutputStream &S) const override {
+ const Node *getSyntaxNode(OutputBuffer &OB) const override {
if (Printing)
return this;
SwapAndRestore<bool> SavePrinting(Printing, true);
- return Ref->getSyntaxNode(S);
+ return Ref->getSyntaxNode(OB);
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- Ref->printLeft(S);
+ Ref->printLeft(OB);
}
- void printRight(OutputStream &S) const override {
+ void printRight(OutputBuffer &OB) const override {
if (Printing)
return;
SwapAndRestore<bool> SavePrinting(Printing, true);
- Ref->printRight(S);
+ Ref->printRight(OB);
}
};
@@ -1303,9 +1446,9 @@ struct NameWithTemplateArgs : Node {
StringView getBaseName() const override { return Name->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- Name->print(S);
- TemplateArgs->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ Name->print(OB);
+ TemplateArgs->print(OB);
}
};
@@ -1320,9 +1463,9 @@ public:
StringView getBaseName() const override { return Child->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- S += "::";
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "::";
+ Child->print(OB);
}
};
@@ -1335,9 +1478,9 @@ struct StdQualifiedName : Node {
StringView getBaseName() const override { return Child->getBaseName(); }
- void printLeft(OutputStream &S) const override {
- S += "std::";
- Child->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "std::";
+ Child->print(OB);
}
};
@@ -1377,26 +1520,26 @@ public:
DEMANGLE_UNREACHABLE;
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
switch (SSK) {
case SpecialSubKind::allocator:
- S += "std::allocator";
+ OB += "std::allocator";
break;
case SpecialSubKind::basic_string:
- S += "std::basic_string";
+ OB += "std::basic_string";
break;
case SpecialSubKind::string:
- S += "std::basic_string<char, std::char_traits<char>, "
- "std::allocator<char> >";
+ OB += "std::basic_string<char, std::char_traits<char>, "
+ "std::allocator<char> >";
break;
case SpecialSubKind::istream:
- S += "std::basic_istream<char, std::char_traits<char> >";
+ OB += "std::basic_istream<char, std::char_traits<char> >";
break;
case SpecialSubKind::ostream:
- S += "std::basic_ostream<char, std::char_traits<char> >";
+ OB += "std::basic_ostream<char, std::char_traits<char> >";
break;
case SpecialSubKind::iostream:
- S += "std::basic_iostream<char, std::char_traits<char> >";
+ OB += "std::basic_iostream<char, std::char_traits<char> >";
break;
}
}
@@ -1429,25 +1572,25 @@ public:
DEMANGLE_UNREACHABLE;
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
switch (SSK) {
case SpecialSubKind::allocator:
- S += "std::allocator";
+ OB += "std::allocator";
break;
case SpecialSubKind::basic_string:
- S += "std::basic_string";
+ OB += "std::basic_string";
break;
case SpecialSubKind::string:
- S += "std::string";
+ OB += "std::string";
break;
case SpecialSubKind::istream:
- S += "std::istream";
+ OB += "std::istream";
break;
case SpecialSubKind::ostream:
- S += "std::ostream";
+ OB += "std::ostream";
break;
case SpecialSubKind::iostream:
- S += "std::iostream";
+ OB += "std::iostream";
break;
}
}
@@ -1465,10 +1608,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Basename, IsDtor, Variant); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsDtor)
- S += "~";
- S += Basename->getBaseName();
+ OB += "~";
+ OB += Basename->getBaseName();
}
};
@@ -1480,9 +1623,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Base); }
- void printLeft(OutputStream &S) const override {
- S += "~";
- Base->printLeft(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "~";
+ Base->printLeft(OB);
}
};
@@ -1494,10 +1637,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Count); }
- void printLeft(OutputStream &S) const override {
- S += "'unnamed";
- S += Count;
- S += "\'";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "'unnamed";
+ OB += Count;
+ OB += "\'";
}
};
@@ -1516,22 +1659,22 @@ public:
F(TemplateParams, Params, Count);
}
- void printDeclarator(OutputStream &S) const {
+ void printDeclarator(OutputBuffer &OB) const {
if (!TemplateParams.empty()) {
- S += "<";
- TemplateParams.printWithComma(S);
- S += ">";
+ OB += "<";
+ TemplateParams.printWithComma(OB);
+ OB += ">";
}
- S += "(";
- Params.printWithComma(S);
- S += ")";
+ OB += "(";
+ Params.printWithComma(OB);
+ OB += ")";
}
- void printLeft(OutputStream &S) const override {
- S += "\'lambda";
- S += Count;
- S += "\'";
- printDeclarator(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "\'lambda";
+ OB += Count;
+ OB += "\'";
+ printDeclarator(OB);
}
};
@@ -1543,10 +1686,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Bindings); }
- void printLeft(OutputStream &S) const override {
- S += '[';
- Bindings.printWithComma(S);
- S += ']';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += '[';
+ Bindings.printWithComma(OB);
+ OB += ']';
}
};
@@ -1564,22 +1707,22 @@ public:
template<typename Fn> void match(Fn F) const { F(LHS, InfixOperator, RHS); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
// might be a template argument expression, then we need to disambiguate
// with parens.
if (InfixOperator == ">")
- S += "(";
+ OB += "(";
- S += "(";
- LHS->print(S);
- S += ") ";
- S += InfixOperator;
- S += " (";
- RHS->print(S);
- S += ")";
+ OB += "(";
+ LHS->print(OB);
+ OB += ") ";
+ OB += InfixOperator;
+ OB += " (";
+ RHS->print(OB);
+ OB += ")";
if (InfixOperator == ">")
- S += ")";
+ OB += ")";
}
};
@@ -1593,12 +1736,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Op1, Op2); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Op1->print(S);
- S += ")[";
- Op2->print(S);
- S += "]";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Op1->print(OB);
+ OB += ")[";
+ Op2->print(OB);
+ OB += "]";
}
};
@@ -1612,11 +1755,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Child, Operator); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Child->print(S);
- S += ")";
- S += Operator;
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Child->print(OB);
+ OB += ")";
+ OB += Operator;
}
};
@@ -1631,14 +1774,14 @@ public:
template<typename Fn> void match(Fn F) const { F(Cond, Then, Else); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Cond->print(S);
- S += ") ? (";
- Then->print(S);
- S += ") : (";
- Else->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Cond->print(OB);
+ OB += ") ? (";
+ Then->print(OB);
+ OB += ") : (";
+ Else->print(OB);
+ OB += ")";
}
};
@@ -1653,10 +1796,10 @@ public:
template<typename Fn> void match(Fn F) const { F(LHS, Kind, RHS); }
- void printLeft(OutputStream &S) const override {
- LHS->print(S);
- S += Kind;
- RHS->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ LHS->print(OB);
+ OB += Kind;
+ RHS->print(OB);
}
};
@@ -1677,20 +1820,20 @@ public:
F(Type, SubExpr, Offset, UnionSelectors, OnePastTheEnd);
}
- void printLeft(OutputStream &S) const override {
- SubExpr->print(S);
- S += ".<";
- Type->print(S);
- S += " at offset ";
+ void printLeft(OutputBuffer &OB) const override {
+ SubExpr->print(OB);
+ OB += ".<";
+ Type->print(OB);
+ OB += " at offset ";
if (Offset.empty()) {
- S += "0";
+ OB += "0";
} else if (Offset[0] == 'n') {
- S += "-";
- S += Offset.dropFront();
+ OB += "-";
+ OB += Offset.dropFront();
} else {
- S += Offset;
+ OB += Offset;
}
- S += ">";
+ OB += ">";
}
};
@@ -1706,10 +1849,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Prefix, Infix, Postfix); }
- void printLeft(OutputStream &S) const override {
- S += Prefix;
- Infix->print(S);
- S += Postfix;
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Prefix;
+ Infix->print(OB);
+ OB += Postfix;
}
};
@@ -1725,13 +1868,13 @@ public:
template<typename Fn> void match(Fn F) const { F(CastKind, To, From); }
- void printLeft(OutputStream &S) const override {
- S += CastKind;
- S += "<";
- To->printLeft(S);
- S += ">(";
- From->printLeft(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += CastKind;
+ OB += "<";
+ To->printLeft(OB);
+ OB += ">(";
+ From->printLeft(OB);
+ OB += ")";
}
};
@@ -1744,11 +1887,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Pack); }
- void printLeft(OutputStream &S) const override {
- S += "sizeof...(";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "sizeof...(";
ParameterPackExpansion PPE(Pack);
- PPE.printLeft(S);
- S += ")";
+ PPE.printLeft(OB);
+ OB += ")";
}
};
@@ -1762,11 +1905,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Callee, Args); }
- void printLeft(OutputStream &S) const override {
- Callee->print(S);
- S += "(";
- Args.printWithComma(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ Callee->print(OB);
+ OB += "(";
+ Args.printWithComma(OB);
+ OB += ")";
}
};
@@ -1787,25 +1930,24 @@ public:
F(ExprList, Type, InitList, IsGlobal, IsArray);
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsGlobal)
- S += "::operator ";
- S += "new";
+ OB += "::operator ";
+ OB += "new";
if (IsArray)
- S += "[]";
- S += ' ';
+ OB += "[]";
+ OB += ' ';
if (!ExprList.empty()) {
- S += "(";
- ExprList.printWithComma(S);
- S += ")";
+ OB += "(";
+ ExprList.printWithComma(OB);
+ OB += ")";
}
- Type->print(S);
+ Type->print(OB);
if (!InitList.empty()) {
- S += "(";
- InitList.printWithComma(S);
- S += ")";
+ OB += "(";
+ InitList.printWithComma(OB);
+ OB += ")";
}
-
}
};
@@ -1820,13 +1962,13 @@ public:
template<typename Fn> void match(Fn F) const { F(Op, IsGlobal, IsArray); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsGlobal)
- S += "::";
- S += "delete";
+ OB += "::";
+ OB += "delete";
if (IsArray)
- S += "[] ";
- Op->print(S);
+ OB += "[] ";
+ Op->print(OB);
}
};
@@ -1840,11 +1982,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Prefix, Child); }
- void printLeft(OutputStream &S) const override {
- S += Prefix;
- S += "(";
- Child->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Prefix;
+ OB += "(";
+ Child->print(OB);
+ OB += ")";
}
};
@@ -1856,9 +1998,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Number); }
- void printLeft(OutputStream &S) const override {
- S += "fp";
- S += Number;
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "fp";
+ OB += Number;
}
};
@@ -1872,12 +2014,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Type, Expressions); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Type->print(S);
- S += ")(";
- Expressions.printWithComma(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Type->print(OB);
+ OB += ")(";
+ Expressions.printWithComma(OB);
+ OB += ")";
}
};
@@ -1894,12 +2036,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Type, SubExpr, Offset); }
- void printLeft(OutputStream &S) const override {
- S += "(";
- Type->print(S);
- S += ")(";
- SubExpr->print(S);
- S += ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "(";
+ Type->print(OB);
+ OB += ")(";
+ SubExpr->print(OB);
+ OB += ")";
}
};
@@ -1912,12 +2054,12 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty, Inits); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Ty)
- Ty->print(S);
- S += '{';
- Inits.printWithComma(S);
- S += '}';
+ Ty->print(OB);
+ OB += '{';
+ Inits.printWithComma(OB);
+ OB += '}';
}
};
@@ -1931,18 +2073,18 @@ public:
template<typename Fn> void match(Fn F) const { F(Elem, Init, IsArray); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (IsArray) {
- S += '[';
- Elem->print(S);
- S += ']';
+ OB += '[';
+ Elem->print(OB);
+ OB += ']';
} else {
- S += '.';
- Elem->print(S);
+ OB += '.';
+ Elem->print(OB);
}
if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
- S += " = ";
- Init->print(S);
+ OB += " = ";
+ Init->print(OB);
}
};
@@ -1956,15 +2098,15 @@ public:
template<typename Fn> void match(Fn F) const { F(First, Last, Init); }
- void printLeft(OutputStream &S) const override {
- S += '[';
- First->print(S);
- S += " ... ";
- Last->print(S);
- S += ']';
+ void printLeft(OutputBuffer &OB) const override {
+ OB += '[';
+ First->print(OB);
+ OB += " ... ";
+ Last->print(OB);
+ OB += ']';
if (Init->getKind() != KBracedExpr && Init->getKind() != KBracedRangeExpr)
- S += " = ";
- Init->print(S);
+ OB += " = ";
+ Init->print(OB);
}
};
@@ -1983,43 +2125,43 @@ public:
F(IsLeftFold, OperatorName, Pack, Init);
}
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
auto PrintPack = [&] {
- S += '(';
- ParameterPackExpansion(Pack).print(S);
- S += ')';
+ OB += '(';
+ ParameterPackExpansion(Pack).print(OB);
+ OB += ')';
};
- S += '(';
+ OB += '(';
if (IsLeftFold) {
// init op ... op pack
if (Init != nullptr) {
- Init->print(S);
- S += ' ';
- S += OperatorName;
- S += ' ';
+ Init->print(OB);
+ OB += ' ';
+ OB += OperatorName;
+ OB += ' ';
}
// ... op pack
- S += "... ";
- S += OperatorName;
- S += ' ';
+ OB += "... ";
+ OB += OperatorName;
+ OB += ' ';
PrintPack();
} else { // !IsLeftFold
// pack op ...
PrintPack();
- S += ' ';
- S += OperatorName;
- S += " ...";
+ OB += ' ';
+ OB += OperatorName;
+ OB += " ...";
// pack op ... op init
if (Init != nullptr) {
- S += ' ';
- S += OperatorName;
- S += ' ';
- Init->print(S);
+ OB += ' ';
+ OB += OperatorName;
+ OB += ' ';
+ Init->print(OB);
}
}
- S += ')';
+ OB += ')';
}
};
@@ -2031,9 +2173,9 @@ public:
template<typename Fn> void match(Fn F) const { F(Op); }
- void printLeft(OutputStream &S) const override {
- S += "throw ";
- Op->print(S);
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "throw ";
+ Op->print(OB);
}
};
@@ -2045,8 +2187,8 @@ public:
template<typename Fn> void match(Fn F) const { F(Value); }
- void printLeft(OutputStream &S) const override {
- S += Value ? StringView("true") : StringView("false");
+ void printLeft(OutputBuffer &OB) const override {
+ OB += Value ? StringView("true") : StringView("false");
}
};
@@ -2058,10 +2200,10 @@ public:
template<typename Fn> void match(Fn F) const { F(Type); }
- void printLeft(OutputStream &S) const override {
- S += "\"<";
- Type->print(S);
- S += ">\"";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "\"<";
+ Type->print(OB);
+ OB += ">\"";
}
};
@@ -2073,11 +2215,11 @@ public:
template<typename Fn> void match(Fn F) const { F(Type); }
- void printLeft(OutputStream &S) const override {
- S += "[]";
+ void printLeft(OutputBuffer &OB) const override {
+ OB += "[]";
if (Type->getKind() == KClosureTypeName)
- static_cast<const ClosureTypeName *>(Type)->printDeclarator(S);
- S += "{...}";
+ static_cast<const ClosureTypeName *>(Type)->printDeclarator(OB);
+ OB += "{...}";
}
};
@@ -2092,15 +2234,15 @@ public:
template<typename Fn> void match(Fn F) const { F(Ty, Integer); }
- void printLeft(OutputStream &S) const override {
- S << "(";
- Ty->print(S);
- S << ")";
+ void printLeft(OutputBuffer &OB) const override {
+ OB << "(";
+ Ty->print(OB);
+ OB << ")";
if (Integer[0] == 'n')
- S << "-" << Integer.dropFront(1);
+ OB << "-" << Integer.dropFront(1);
else
- S << Integer;
+ OB << Integer;
}
};
@@ -2114,21 +2256,21 @@ public:
template<typename Fn> void match(Fn F) const { F(Type, Value); }
- void printLeft(OutputStream &S) const override {
+ void printLeft(OutputBuffer &OB) const override {
if (Type.size() > 3) {
- S += "(";
- S += Type;
- S += ")";
+ OB += "(";
+ OB += Type;
+ OB += ")";
}
if (Value[0] == 'n') {
- S += "-";
- S += Value.dropFront(1);
+ OB += "-";
+ OB += Value.dropFront(1);
} else
- S += Value;
+ OB += Value;
if (Type.size() <= 3)
- S += Type;
+ OB += Type;
}
};
@@ -2158,7 +2300,7 @@ public:
template<typename Fn> void match(Fn F) const { F(Contents); }
- void printLeft(OutputStream &s) const override {
+ void printLeft(OutputBuffer &OB) const override {
const char *first = Contents.begin();
const char *last = Contents.end() + 1;
@@ -2184,7 +2326,7 @@ public:
#endif
char num[FloatData<Float>::max_demangled_size] = {0};
int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
- s += StringView(num, num + n);
+ OB += StringView(num, num + n);
}
}
};
@@ -2217,125 +2359,6 @@ FOR_EACH_NODE_KIND(SPECIALIZATION)
#undef FOR_EACH_NODE_KIND
-template <class T, size_t N>
-class PODSmallVector {
- static_assert(std::is_pod<T>::value,
- "T is required to be a plain old data type");
-
- T* First = nullptr;
- T* Last = nullptr;
- T* Cap = nullptr;
- T Inline[N] = {0};
-
- bool isInline() const { return First == Inline; }
-
- void clearInline() {
- First = Inline;
- Last = Inline;
- Cap = Inline + N;
- }
-
- void reserve(size_t NewCap) {
- size_t S = size();
- if (isInline()) {
- auto* Tmp = static_cast<T*>(std::malloc(NewCap * sizeof(T)));
- if (Tmp == nullptr)
- std::terminate();
- std::copy(First, Last, Tmp);
- First = Tmp;
- } else {
- First = static_cast<T*>(std::realloc(First, NewCap * sizeof(T)));
- if (First == nullptr)
- std::terminate();
- }
- Last = First + S;
- Cap = First + NewCap;
- }
-
-public:
- PODSmallVector() : First(Inline), Last(First), Cap(Inline + N) {}
-
- PODSmallVector(const PODSmallVector&) = delete;
- PODSmallVector& operator=(const PODSmallVector&) = delete;
-
- PODSmallVector(PODSmallVector&& Other) : PODSmallVector() {
- if (Other.isInline()) {
- std::copy(Other.begin(), Other.end(), First);
- Last = First + Other.size();
- Other.clear();
- return;
- }
-
- First = Other.First;
- Last = Other.Last;
- Cap = Other.Cap;
- Other.clearInline();
- }
-
- PODSmallVector& operator=(PODSmallVector&& Other) {
- if (Other.isInline()) {
- if (!isInline()) {
- std::free(First);
- clearInline();
- }
- std::copy(Other.begin(), Other.end(), First);
- Last = First + Other.size();
- Other.clear();
- return *this;
- }
-
- if (isInline()) {
- First = Other.First;
- Last = Other.Last;
- Cap = Other.Cap;
- Other.clearInline();
- return *this;
- }
-
- std::swap(First, Other.First);
- std::swap(Last, Other.Last);
- std::swap(Cap, Other.Cap);
- Other.clear();
- return *this;
- }
-
- void push_back(const T& Elem) {
- if (Last == Cap)
- reserve(size() * 2);
- *Last++ = Elem;
- }
-
- void pop_back() {
- assert(Last != First && "Popping empty vector!");
- --Last;
- }
-
- void dropBack(size_t Index) {
- assert(Index <= size() && "dropBack() can't expand!");
- Last = First + Index;
- }
-
- T* begin() { return First; }
- T* end() { return Last; }
-
- bool empty() const { return First == Last; }
- size_t size() const { return static_cast<size_t>(Last - First); }
- T& back() {
- assert(Last != First && "Calling back() on empty vector!");
- return *(Last - 1);
- }
- T& operator[](size_t Index) {
- assert(Index < size() && "Invalid access!");
- return *(begin() + Index);
- }
- void clear() { Last = First; }
-
- ~PODSmallVector() {
- if (!isInline())
- std::free(First);
- }
-};
-
template <typename Derived, typename Alloc> struct AbstractManglingParser {
const char *First;
const char *Last;
@@ -3884,6 +3907,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
case 'h':
First += 2;
return make<NameType>("half");
+ // ::= DF <number> _ # ISO/IEC TS 18661 binary floating point (N bits)
+ case 'F': {
+ First += 2;
+ Node *DimensionNumber = make<NameType>(parseNumber());
+ if (!DimensionNumber)
+ return nullptr;
+ if (!consumeIf('_'))
+ return nullptr;
+ return make<BinaryFPType>(DimensionNumber);
+ }
// ::= Di # char32_t
case 'i':
First += 2;
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 77446e9b0f07..46daa3885a06 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -21,11 +21,11 @@
namespace llvm {
namespace itanium_demangle {
-class OutputStream;
+class OutputBuffer;
}
}
-using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::OutputBuffer;
using llvm::itanium_demangle::StringView;
namespace llvm {
@@ -80,6 +80,7 @@ enum OutputFlags {
OF_NoAccessSpecifier = 4,
OF_NoMemberType = 8,
OF_NoReturnType = 16,
+ OF_NoVariableType = 32,
};
// Types
@@ -261,7 +262,7 @@ struct Node {
NodeKind kind() const { return Kind; }
- virtual void output(OutputStream &OS, OutputFlags Flags) const = 0;
+ virtual void output(OutputBuffer &OB, OutputFlags Flags) const = 0;
std::string toString(OutputFlags Flags = OF_Default) const;
@@ -300,12 +301,12 @@ struct SpecialTableSymbolNode;
struct TypeNode : public Node {
explicit TypeNode(NodeKind K) : Node(K) {}
- virtual void outputPre(OutputStream &OS, OutputFlags Flags) const = 0;
- virtual void outputPost(OutputStream &OS, OutputFlags Flags) const = 0;
+ virtual void outputPre(OutputBuffer &OB, OutputFlags Flags) const = 0;
+ virtual void outputPost(OutputBuffer &OB, OutputFlags Flags) const = 0;
- void output(OutputStream &OS, OutputFlags Flags) const override {
- outputPre(OS, Flags);
- outputPost(OS, Flags);
+ void output(OutputBuffer &OB, OutputFlags Flags) const override {
+ outputPre(OB, Flags);
+ outputPost(OB, Flags);
}
Qualifiers Quals = Q_None;
@@ -315,8 +316,8 @@ struct PrimitiveTypeNode : public TypeNode {
explicit PrimitiveTypeNode(PrimitiveKind K)
: TypeNode(NodeKind::PrimitiveType), PrimKind(K) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override {}
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override {}
PrimitiveKind PrimKind;
};
@@ -325,8 +326,8 @@ struct FunctionSignatureNode : public TypeNode {
explicit FunctionSignatureNode(NodeKind K) : TypeNode(K) {}
FunctionSignatureNode() : TypeNode(NodeKind::FunctionSignature) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
// Valid if this FunctionTypeNode is the Pointee of a PointerType or
// MemberPointerType.
@@ -359,13 +360,13 @@ struct IdentifierNode : public Node {
NodeArrayNode *TemplateParams = nullptr;
protected:
- void outputTemplateParameters(OutputStream &OS, OutputFlags Flags) const;
+ void outputTemplateParameters(OutputBuffer &OB, OutputFlags Flags) const;
};
struct VcallThunkIdentifierNode : public IdentifierNode {
VcallThunkIdentifierNode() : IdentifierNode(NodeKind::VcallThunkIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
uint64_t OffsetInVTable = 0;
};
@@ -374,7 +375,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
DynamicStructorIdentifierNode()
: IdentifierNode(NodeKind::DynamicStructorIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
VariableSymbolNode *Variable = nullptr;
QualifiedNameNode *Name = nullptr;
@@ -384,7 +385,7 @@ struct DynamicStructorIdentifierNode : public IdentifierNode {
struct NamedIdentifierNode : public IdentifierNode {
NamedIdentifierNode() : IdentifierNode(NodeKind::NamedIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StringView Name;
};
@@ -394,7 +395,7 @@ struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
: IdentifierNode(NodeKind::IntrinsicFunctionIdentifier),
Operator(Operator) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
IntrinsicFunctionKind Operator;
};
@@ -403,7 +404,7 @@ struct LiteralOperatorIdentifierNode : public IdentifierNode {
LiteralOperatorIdentifierNode()
: IdentifierNode(NodeKind::LiteralOperatorIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StringView Name;
};
@@ -412,7 +413,7 @@ struct LocalStaticGuardIdentifierNode : public IdentifierNode {
LocalStaticGuardIdentifierNode()
: IdentifierNode(NodeKind::LocalStaticGuardIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
bool IsThread = false;
uint32_t ScopeIndex = 0;
@@ -422,7 +423,7 @@ struct ConversionOperatorIdentifierNode : public IdentifierNode {
ConversionOperatorIdentifierNode()
: IdentifierNode(NodeKind::ConversionOperatorIdentifier) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
// The type that this operator converts too.
TypeNode *TargetType = nullptr;
@@ -434,7 +435,7 @@ struct StructorIdentifierNode : public IdentifierNode {
: IdentifierNode(NodeKind::StructorIdentifier),
IsDestructor(IsDestructor) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
// The name of the class that this is a structor of.
IdentifierNode *Class = nullptr;
@@ -444,8 +445,8 @@ struct StructorIdentifierNode : public IdentifierNode {
struct ThunkSignatureNode : public FunctionSignatureNode {
ThunkSignatureNode() : FunctionSignatureNode(NodeKind::ThunkSignature) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
struct ThisAdjustor {
uint32_t StaticOffset = 0;
@@ -459,8 +460,8 @@ struct ThunkSignatureNode : public FunctionSignatureNode {
struct PointerTypeNode : public TypeNode {
PointerTypeNode() : TypeNode(NodeKind::PointerType) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
// Is this a pointer, reference, or rvalue-reference?
PointerAffinity Affinity = PointerAffinity::None;
@@ -476,8 +477,8 @@ struct PointerTypeNode : public TypeNode {
struct TagTypeNode : public TypeNode {
explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
QualifiedNameNode *QualifiedName = nullptr;
TagKind Tag;
@@ -486,11 +487,11 @@ struct TagTypeNode : public TypeNode {
struct ArrayTypeNode : public TypeNode {
ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
- void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const;
- void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const;
+ void outputDimensionsImpl(OutputBuffer &OB, OutputFlags Flags) const;
+ void outputOneDimension(OutputBuffer &OB, OutputFlags Flags, Node *N) const;
// A list of array dimensions. e.g. [3,4,5] in `int Foo[3][4][5]`
NodeArrayNode *Dimensions = nullptr;
@@ -501,14 +502,14 @@ struct ArrayTypeNode : public TypeNode {
struct IntrinsicNode : public TypeNode {
IntrinsicNode() : TypeNode(NodeKind::IntrinsicType) {}
- void output(OutputStream &OS, OutputFlags Flags) const override {}
+ void output(OutputBuffer &OB, OutputFlags Flags) const override {}
};
struct CustomTypeNode : public TypeNode {
CustomTypeNode() : TypeNode(NodeKind::Custom) {}
- void outputPre(OutputStream &OS, OutputFlags Flags) const override;
- void outputPost(OutputStream &OS, OutputFlags Flags) const override;
+ void outputPre(OutputBuffer &OB, OutputFlags Flags) const override;
+ void outputPost(OutputBuffer &OB, OutputFlags Flags) const override;
IdentifierNode *Identifier = nullptr;
};
@@ -516,9 +517,9 @@ struct CustomTypeNode : public TypeNode {
struct NodeArrayNode : public Node {
NodeArrayNode() : Node(NodeKind::NodeArray) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
- void output(OutputStream &OS, OutputFlags Flags, StringView Separator) const;
+ void output(OutputBuffer &OB, OutputFlags Flags, StringView Separator) const;
Node **Nodes = nullptr;
size_t Count = 0;
@@ -527,7 +528,7 @@ struct NodeArrayNode : public Node {
struct QualifiedNameNode : public Node {
QualifiedNameNode() : Node(NodeKind::QualifiedName) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
NodeArrayNode *Components = nullptr;
@@ -541,7 +542,7 @@ struct TemplateParameterReferenceNode : public Node {
TemplateParameterReferenceNode()
: Node(NodeKind::TemplateParameterReference) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
SymbolNode *Symbol = nullptr;
@@ -556,7 +557,7 @@ struct IntegerLiteralNode : public Node {
IntegerLiteralNode(uint64_t Value, bool IsNegative)
: Node(NodeKind::IntegerLiteral), Value(Value), IsNegative(IsNegative) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
uint64_t Value = 0;
bool IsNegative = false;
@@ -566,7 +567,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode {
RttiBaseClassDescriptorNode()
: IdentifierNode(NodeKind::RttiBaseClassDescriptor) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
uint32_t NVOffset = 0;
int32_t VBPtrOffset = 0;
@@ -576,7 +577,7 @@ struct RttiBaseClassDescriptorNode : public IdentifierNode {
struct SymbolNode : public Node {
explicit SymbolNode(NodeKind K) : Node(K) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
QualifiedNameNode *Name = nullptr;
};
@@ -584,7 +585,7 @@ struct SpecialTableSymbolNode : public SymbolNode {
explicit SpecialTableSymbolNode()
: SymbolNode(NodeKind::SpecialTableSymbol) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
QualifiedNameNode *TargetName = nullptr;
Qualifiers Quals = Qualifiers::Q_None;
};
@@ -593,7 +594,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode {
LocalStaticGuardVariableNode()
: SymbolNode(NodeKind::LocalStaticGuardVariable) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
bool IsVisible = false;
};
@@ -601,7 +602,7 @@ struct LocalStaticGuardVariableNode : public SymbolNode {
struct EncodedStringLiteralNode : public SymbolNode {
EncodedStringLiteralNode() : SymbolNode(NodeKind::EncodedStringLiteral) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StringView DecodedString;
bool IsTruncated = false;
@@ -611,7 +612,7 @@ struct EncodedStringLiteralNode : public SymbolNode {
struct VariableSymbolNode : public SymbolNode {
VariableSymbolNode() : SymbolNode(NodeKind::VariableSymbol) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
StorageClass SC = StorageClass::None;
TypeNode *Type = nullptr;
@@ -620,7 +621,7 @@ struct VariableSymbolNode : public SymbolNode {
struct FunctionSymbolNode : public SymbolNode {
FunctionSymbolNode() : SymbolNode(NodeKind::FunctionSymbol) {}
- void output(OutputStream &OS, OutputFlags Flags) const override;
+ void output(OutputBuffer &OB, OutputFlags Flags) const override;
FunctionSignatureNode *Signature = nullptr;
};
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 04ff65a35aed..4fea9351a4bf 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -24,7 +24,7 @@ DEMANGLE_NAMESPACE_BEGIN
// Stream that AST nodes write their string representation into after the AST
// has been parsed.
-class OutputStream {
+class OutputBuffer {
char *Buffer = nullptr;
size_t CurrentPosition = 0;
size_t BufferCapacity = 0;
@@ -63,9 +63,9 @@ class OutputStream {
}
public:
- OutputStream(char *StartBuf, size_t Size)
+ OutputBuffer(char *StartBuf, size_t Size)
: Buffer(StartBuf), CurrentPosition(0), BufferCapacity(Size) {}
- OutputStream() = default;
+ OutputBuffer() = default;
void reset(char *Buffer_, size_t BufferCapacity_) {
CurrentPosition = 0;
Buffer = Buffer_;
@@ -77,7 +77,7 @@ public:
unsigned CurrentPackIndex = std::numeric_limits<unsigned>::max();
unsigned CurrentPackMax = std::numeric_limits<unsigned>::max();
- OutputStream &operator+=(StringView R) {
+ OutputBuffer &operator+=(StringView R) {
size_t Size = R.size();
if (Size == 0)
return *this;
@@ -87,17 +87,28 @@ public:
return *this;
}
- OutputStream &operator+=(char C) {
+ OutputBuffer &operator+=(char C) {
grow(1);
Buffer[CurrentPosition++] = C;
return *this;
}
- OutputStream &operator<<(StringView R) { return (*this += R); }
+ OutputBuffer &operator<<(StringView R) { return (*this += R); }
- OutputStream &operator<<(char C) { return (*this += C); }
+ OutputBuffer prepend(StringView R) {
+ size_t Size = R.size();
+
+ grow(Size);
+ std::memmove(Buffer + Size, Buffer, CurrentPosition);
+ std::memcpy(Buffer, R.begin(), Size);
+ CurrentPosition += Size;
- OutputStream &operator<<(long long N) {
+ return *this;
+ }
+
+ OutputBuffer &operator<<(char C) { return (*this += C); }
+
+ OutputBuffer &operator<<(long long N) {
if (N < 0)
writeUnsigned(static_cast<unsigned long long>(-N), true);
else
@@ -105,27 +116,37 @@ public:
return *this;
}
- OutputStream &operator<<(unsigned long long N) {
+ OutputBuffer &operator<<(unsigned long long N) {
writeUnsigned(N, false);
return *this;
}
- OutputStream &operator<<(long N) {
+ OutputBuffer &operator<<(long N) {
return this->operator<<(static_cast<long long>(N));
}
- OutputStream &operator<<(unsigned long N) {
+ OutputBuffer &operator<<(unsigned long N) {
return this->operator<<(static_cast<unsigned long long>(N));
}
- OutputStream &operator<<(int N) {
+ OutputBuffer &operator<<(int N) {
return this->operator<<(static_cast<long long>(N));
}
- OutputStream &operator<<(unsigned int N) {
+ OutputBuffer &operator<<(unsigned int N) {
return this->operator<<(static_cast<unsigned long long>(N));
}
+ void insert(size_t Pos, const char *S, size_t N) {
+ assert(Pos <= CurrentPosition);
+ if (N == 0)
+ return;
+ grow(N);
+ std::memmove(Buffer + Pos + N, Buffer + Pos, CurrentPosition - Pos);
+ std::memcpy(Buffer + Pos, S, N);
+ CurrentPosition += N;
+ }
+
size_t getCurrentPosition() const { return CurrentPosition; }
void setCurrentPosition(size_t NewPos) { CurrentPosition = NewPos; }
@@ -171,7 +192,7 @@ public:
SwapAndRestore &operator=(const SwapAndRestore &) = delete;
};
-inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
+inline bool initializeOutputBuffer(char *Buf, size_t *N, OutputBuffer &OB,
size_t InitSize) {
size_t BufferSize;
if (Buf == nullptr) {
@@ -182,7 +203,7 @@ inline bool initializeOutputStream(char *Buf, size_t *N, OutputStream &S,
} else
BufferSize = *N;
- S.reset(Buf, BufferSize);
+ OB.reset(Buf, BufferSize);
return true;
}
diff --git a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index 2e386518f0bf..43c91fb5f988 100644
--- a/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -21,7 +21,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/Binary.h"
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h
new file mode 100644
index 000000000000..50eb598139ea
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h
@@ -0,0 +1,39 @@
+//===--- ELF_aarch64.h - JIT link functions for ELF/aarch64 --*- C++ -*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for ELF/aarch64.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Create a LinkGraph from an ELF/aarch64 relocatable object
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer);
+
+/// jit-link the given object buffer, which must be a ELF aarch64 relocatable
+/// object file.
+void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
index 1339ab51cbb9..5a8b186a2c3e 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
@@ -35,4 +35,4 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
} // end namespace jitlink
} // end namespace llvm
-#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV64_H
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_RISCV_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
index d8ed953363e6..f5fa9e96c594 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
@@ -21,29 +21,17 @@ namespace jitlink {
namespace ELF_x86_64_Edges {
enum ELFX86RelocationKind : Edge::Kind {
Branch32 = Edge::FirstRelocation,
- Branch32ToStub,
- Pointer32,
+ Pointer32Signed,
Pointer64,
- Pointer64Anon,
PCRel32,
- PCRel64,
- PCRel32Minus1,
- PCRel32Minus2,
- PCRel32Minus4,
- PCRel32Anon,
- PCRel32Minus1Anon,
- PCRel32Minus2Anon,
- PCRel32Minus4Anon,
PCRel32GOTLoad,
- PCRel32GOT,
+ PCRel32GOTLoadRelaxable,
+ PCRel32REXGOTLoadRelaxable,
+ PCRel32TLV,
PCRel64GOT,
GOTOFF64,
GOT64,
- PCRel32TLV,
- Delta32,
Delta64,
- NegDelta32,
- NegDelta64,
};
} // end namespace ELF_x86_64_Edges
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index 6162a675ec12..83d85953fce6 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -13,19 +13,19 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
#define LLVM_EXECUTIONENGINE_JITLINK_JITLINK_H
-#include "JITLinkMemoryManager.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include <map>
@@ -225,7 +225,7 @@ public:
/// Get the content for this block. Block must not be a zero-fill block.
ArrayRef<char> getContent() const {
- assert(Data && "Section does not contain content");
+ assert(Data && "Block does not contain content");
return ArrayRef<char>(Data, Size);
}
@@ -233,6 +233,7 @@ public:
/// Caller is responsible for ensuring the underlying bytes are not
/// deallocated while pointed to by this block.
void setContent(ArrayRef<char> Content) {
+ assert(Content.data() && "Setting null content");
Data = Content.data();
Size = Content.size();
ContentMutable = false;
@@ -251,6 +252,7 @@ public:
/// to call this on a block with immutable content -- consider using
/// getMutableContent instead.
MutableArrayRef<char> getAlreadyMutableContent() {
+ assert(Data && "Block does not contain content");
assert(ContentMutable && "Content is not mutable");
return MutableArrayRef<char>(const_cast<char *>(Data), Size);
}
@@ -260,6 +262,7 @@ public:
/// The caller is responsible for ensuring that the memory pointed to by
/// MutableContent is not deallocated while pointed to by this block.
void setMutableContent(MutableArrayRef<char> MutableContent) {
+ assert(MutableContent.data() && "Setting null content");
Data = MutableContent.data();
Size = MutableContent.size();
ContentMutable = true;
@@ -295,6 +298,7 @@ public:
/// Add an edge to this block.
void addEdge(Edge::Kind K, Edge::OffsetT Offset, Symbol &Target,
Edge::AddendT Addend) {
+ assert(!isZeroFill() && "Adding edge to zero-fill block?");
Edges.push_back(Edge(K, Offset, Target, Addend));
}
@@ -339,6 +343,12 @@ private:
std::vector<Edge> Edges;
};
+// Align a JITTargetAddress to conform with block alignment requirements.
+inline JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) {
+ uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment();
+ return Addr + Delta;
+}
+
/// Describes symbol linkage. This can be used to make resolve definition
/// clashes.
enum class Linkage : uint8_t {
@@ -640,8 +650,7 @@ class Section {
friend class LinkGraph;
private:
- Section(StringRef Name, sys::Memory::ProtectionFlags Prot,
- SectionOrdinal SecOrdinal)
+ Section(StringRef Name, MemProt Prot, SectionOrdinal SecOrdinal)
: Name(Name), Prot(Prot), SecOrdinal(SecOrdinal) {}
using SymbolSet = DenseSet<Symbol *>;
@@ -666,12 +675,16 @@ public:
StringRef getName() const { return Name; }
/// Returns the protection flags for this section.
- sys::Memory::ProtectionFlags getProtectionFlags() const { return Prot; }
+ MemProt getMemProt() const { return Prot; }
/// Set the protection flags for this section.
- void setProtectionFlags(sys::Memory::ProtectionFlags Prot) {
- this->Prot = Prot;
- }
+ void setMemProt(MemProt Prot) { this->Prot = Prot; }
+
+ /// Get the deallocation policy for this section.
+ MemDeallocPolicy getMemDeallocPolicy() const { return MDP; }
+
+ /// Set the deallocation policy for this section.
+ void setMemDeallocPolicy(MemDeallocPolicy MDP) { this->MDP = MDP; }
/// Returns the ordinal for this section.
SectionOrdinal getOrdinal() const { return SecOrdinal; }
@@ -686,6 +699,7 @@ public:
return make_range(Blocks.begin(), Blocks.end());
}
+ /// Returns the number of blocks in this section.
BlockSet::size_type blocks_size() const { return Blocks.size(); }
/// Returns an iterator over the symbols defined in this section.
@@ -734,7 +748,8 @@ private:
}
StringRef Name;
- sys::Memory::ProtectionFlags Prot;
+ MemProt Prot;
+ MemDeallocPolicy MDP = MemDeallocPolicy::Standard;
SectionOrdinal SecOrdinal = 0;
BlockSet Blocks;
SymbolSet Symbols;
@@ -916,6 +931,11 @@ public:
: Name(std::move(Name)), TT(TT), PointerSize(PointerSize),
Endianness(Endianness), GetEdgeKindName(std::move(GetEdgeKindName)) {}
+ LinkGraph(const LinkGraph &) = delete;
+ LinkGraph &operator=(const LinkGraph &) = delete;
+ LinkGraph(LinkGraph &&) = delete;
+ LinkGraph &operator=(LinkGraph &&) = delete;
+
/// Returns the name of this graph (usually the name of the original
/// underlying MemoryBuffer).
const std::string &getName() const { return Name; }
@@ -962,7 +982,7 @@ public:
}
/// Create a section with the given name, protection flags, and alignment.
- Section &createSection(StringRef Name, sys::Memory::ProtectionFlags Prot) {
+ Section &createSection(StringRef Name, MemProt Prot) {
assert(llvm::find_if(Sections,
[&](std::unique_ptr<Section> &Sec) {
return Sec->getName() == Name;
@@ -1100,10 +1120,10 @@ public:
Symbol &addDefinedSymbol(Block &Content, JITTargetAddress Offset,
StringRef Name, JITTargetAddress Size, Linkage L,
Scope S, bool IsCallable, bool IsLive) {
- assert(llvm::count_if(defined_symbols(),
- [&](const Symbol *Sym) {
- return Sym->getName() == Name;
- }) == 0 &&
+ assert((S == Scope::Local || llvm::count_if(defined_symbols(),
+ [&](const Symbol *Sym) {
+ return Sym->getName() == Name;
+ }) == 0) &&
"Duplicate defined symbol");
auto &Sym =
Symbol::constructNamedDef(Allocator.Allocate<Symbol>(), Content, Offset,
@@ -1237,6 +1257,7 @@ public:
void transferDefinedSymbol(Symbol &Sym, Block &DestBlock,
JITTargetAddress NewOffset,
Optional<JITTargetAddress> ExplicitNewSize) {
+ auto &OldSection = Sym.getBlock().getSection();
Sym.setBlock(DestBlock);
Sym.setOffset(NewOffset);
if (ExplicitNewSize)
@@ -1246,6 +1267,10 @@ public:
if (Sym.getSize() > RemainingBlockSize)
Sym.setSize(RemainingBlockSize);
}
+ if (&DestBlock.getSection() != &OldSection) {
+ OldSection.removeSymbol(Sym);
+ DestBlock.getSection().addSymbol(Sym);
+ }
}
/// Transfers the given Block and all Symbols pointing to it to the given
@@ -1280,6 +1305,8 @@ public:
bool PreserveSrcSection = false) {
if (&DstSection == &SrcSection)
return;
+ for (auto *B : SrcSection.blocks())
+ B->setSection(DstSection);
SrcSection.transferContentTo(DstSection);
if (!PreserveSrcSection)
removeSection(SrcSection);
@@ -1345,6 +1372,13 @@ public:
Sections.erase(I);
}
+ /// Accessor for the AllocActions object for this graph. This can be used to
+ /// register allocation action calls prior to finalization.
+ ///
+ /// Accessing this object after finalization will result in undefined
+ /// behavior.
+ JITLinkMemoryManager::AllocActions &allocActions() { return AAs; }
+
/// Dump the graph.
void dump(raw_ostream &OS);
@@ -1361,6 +1395,7 @@ private:
SectionList Sections;
ExternalSymbolSet ExternalSymbols;
ExternalSymbolSet AbsoluteSymbols;
+ JITLinkMemoryManager::AllocActions AAs;
};
inline MutableArrayRef<char> Block::getMutableContent(LinkGraph &G) {
@@ -1650,8 +1685,7 @@ public:
/// finalized (i.e. emitted to memory and memory permissions set). If all of
/// this objects dependencies have also been finalized then the code is ready
/// to run.
- virtual void
- notifyFinalized(std::unique_ptr<JITLinkMemoryManager::Allocation> A) = 0;
+ virtual void notifyFinalized(JITLinkMemoryManager::FinalizedAlloc Alloc) = 0;
/// Called by JITLink prior to linking to determine whether default passes for
/// the target should be added. The default implementation returns true.
@@ -1683,6 +1717,36 @@ Error markAllSymbolsLive(LinkGraph &G);
Error makeTargetOutOfRangeError(const LinkGraph &G, const Block &B,
const Edge &E);
+/// Base case for edge-visitors where the visitor-list is empty.
+inline void visitEdge(LinkGraph &G, Block *B, Edge &E) {}
+
+/// Applies the first visitor in the list to the given edge. If the visitor's
+/// visitEdge method returns true then we return immediately, otherwise we
+/// apply the next visitor.
+template <typename VisitorT, typename... VisitorTs>
+void visitEdge(LinkGraph &G, Block *B, Edge &E, VisitorT &&V,
+ VisitorTs &&...Vs) {
+ if (!V.visitEdge(G, B, E))
+ visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...);
+}
+
+/// For each edge in the given graph, apply a list of visitors to the edge,
+/// stopping when the first visitor's visitEdge method returns true.
+///
+/// Only visits edges that were in the graph at call time: if any visitor
+/// adds new edges those will not be visited. Visitors are not allowed to
+/// remove edges (though they can change their kind, target, and addend).
+template <typename... VisitorTs>
+void visitExistingEdges(LinkGraph &G, VisitorTs &&...Vs) {
+ // We may add new blocks during this process, but we don't want to iterate
+ // over them, so build a worklist.
+ std::vector<Block *> Worklist(G.blocks().begin(), G.blocks().end());
+
+ for (auto *B : Worklist)
+ for (auto &E : B->edges())
+ visitEdge(G, B, E, std::forward<VisitorTs>(Vs)...);
+}
+
/// Create a LinkGraph from the given object buffer.
///
/// Note: The graph does not take ownership of the underlying buffer, nor copy
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index cee7d6b09c48..62c271dfc0b2 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -13,106 +13,416 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
#define LLVM_EXECUTIONENGINE_JITLINK_JITLINKMEMORYMANAGER_H
-#include "llvm/ADT/DenseMap.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/RecyclingAllocator.h"
#include <cstdint>
#include <future>
+#include <mutex>
namespace llvm {
namespace jitlink {
+class Block;
+class LinkGraph;
+class Section;
+
/// Manages allocations of JIT memory.
///
/// Instances of this class may be accessed concurrently from multiple threads
/// and their implemetations should include any necessary synchronization.
class JITLinkMemoryManager {
public:
- using ProtectionFlags = sys::Memory::ProtectionFlags;
+ /// Represents a call to a graph-memory-management support function in the
+ /// executor.
+ ///
+ /// Support functions are called as:
+ ///
+ /// auto *Result =
+ /// ((char*(*)(const void*, size_t))FnAddr)(
+ /// (const void*)CtxAddr, (size_t)CtxSize)
+ ///
+ /// A null result is interpreted as success.
+ ///
+ /// A non-null result is interpreted as a heap-allocated string containing
+ /// an error message to report to the allocator (the allocator's
+ /// executor-side implementation code is responsible for freeing the error
+ /// string).
+ struct AllocActionCall {
+ JITTargetAddress FnAddr = 0;
+ JITTargetAddress CtxAddr = 0;
+ JITTargetAddress CtxSize = 0;
+ };
+
+ /// A pair of AllocActionCalls, one to be run at finalization time, one to be
+ /// run at deallocation time.
+ ///
+ /// AllocActionCallPairs should be constructed for paired operations (e.g.
+ /// __register_ehframe and __deregister_ehframe for eh-frame registration).
+ /// See comments for AllocActions for execution ordering.
+ ///
+ /// For unpaired operations one or the other member can be left unused, as
+ /// AllocationActionCalls with an FnAddr of zero will be skipped.
+ struct AllocActionCallPair {
+ AllocActionCall Finalize;
+ AllocActionCall Dealloc;
+ };
+
+ /// A vector of allocation actions to be run for this allocation.
+ ///
+ /// Finalize allocations will be run in order at finalize time. Dealloc
+ /// actions will be run in reverse order at deallocation time.
+ using AllocActions = std::vector<AllocActionCallPair>;
+
+ /// Represents a finalized allocation.
+ ///
+ /// Finalized allocations must be passed to the
+ /// JITLinkMemoryManager:deallocate method prior to being destroyed.
+ ///
+ /// The interpretation of the Address associated with the finalized allocation
+ /// is up to the memory manager implementation. Common options are using the
+ /// base address of the allocation, or the address of a memory management
+ /// object that tracks the allocation.
+ class FinalizedAlloc {
+ friend class JITLinkMemoryManager;
- class SegmentRequest {
public:
- SegmentRequest() = default;
- SegmentRequest(uint64_t Alignment, size_t ContentSize,
- uint64_t ZeroFillSize)
- : Alignment(Alignment), ContentSize(ContentSize),
- ZeroFillSize(ZeroFillSize) {
- assert(isPowerOf2_32(Alignment) && "Alignment must be power of 2");
+ static constexpr JITTargetAddress InvalidAddr = ~JITTargetAddress(0);
+
+ FinalizedAlloc() = default;
+ explicit FinalizedAlloc(JITTargetAddress A) : A(A) {
+ assert(A != 0 && "Explicitly creating an invalid allocation?");
+ }
+ FinalizedAlloc(const FinalizedAlloc &) = delete;
+ FinalizedAlloc(FinalizedAlloc &&Other) : A(Other.A) {
+ Other.A = InvalidAddr;
+ }
+ FinalizedAlloc &operator=(const FinalizedAlloc &) = delete;
+ FinalizedAlloc &operator=(FinalizedAlloc &&Other) {
+ assert(A == InvalidAddr &&
+ "Cannot overwrite active finalized allocation");
+ std::swap(A, Other.A);
+ return *this;
+ }
+ ~FinalizedAlloc() {
+ assert(A == InvalidAddr && "Finalized allocation was not deallocated");
+ }
+
+ /// FinalizedAllocs convert to false for default-constructed, and
+ /// true otherwise. Default-constructed allocs need not be deallocated.
+ explicit operator bool() const { return A != InvalidAddr; }
+
+ /// Returns the address associated with this finalized allocation.
+ /// The allocation is unmodified.
+ JITTargetAddress getAddress() const { return A; }
+
+ /// Returns the address associated with this finalized allocation and
+ /// resets this object to the default state.
+ /// This should only be used by allocators when deallocating memory.
+ JITTargetAddress release() {
+ JITTargetAddress Tmp = A;
+ A = InvalidAddr;
+ return Tmp;
}
- uint64_t getAlignment() const { return Alignment; }
- size_t getContentSize() const { return ContentSize; }
- uint64_t getZeroFillSize() const { return ZeroFillSize; }
+
private:
- uint64_t Alignment = 0;
- size_t ContentSize = 0;
- uint64_t ZeroFillSize = 0;
+ JITTargetAddress A = InvalidAddr;
};
- using SegmentsRequestMap = DenseMap<unsigned, SegmentRequest>;
-
- /// Represents an allocation created by the memory manager.
+ /// Represents an allocation which has not been finalized yet.
///
- /// An allocation object is responsible for allocating and owning jit-linker
- /// working and target memory, and for transfering from working to target
- /// memory.
+ /// InFlightAllocs manage both executor memory allocations and working
+ /// memory allocations.
///
- class Allocation {
+ /// On finalization, the InFlightAlloc should transfer the content of
+ /// working memory into executor memory, apply memory protections, and
+ /// run any finalization functions.
+ ///
+ /// Working memory should be kept alive at least until one of the following
+ /// happens: (1) the InFlightAlloc instance is destroyed, (2) the
+ /// InFlightAlloc is abandoned, (3) finalized target memory is destroyed.
+ ///
+ /// If abandon is called then working memory and executor memory should both
+ /// be freed.
+ class InFlightAlloc {
public:
- using FinalizeContinuation = std::function<void(Error)>;
-
- virtual ~Allocation();
+ using OnFinalizedFunction = unique_function<void(Expected<FinalizedAlloc>)>;
+ using OnAbandonedFunction = unique_function<void(Error)>;
- /// Should return the address of linker working memory for the segment with
- /// the given protection flags.
- virtual MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) = 0;
+ virtual ~InFlightAlloc();
- /// Should return the final address in the target process where the segment
- /// will reside.
- virtual JITTargetAddress getTargetMemory(ProtectionFlags Seg) = 0;
+ /// Called prior to finalization if the allocation should be abandoned.
+ virtual void abandon(OnAbandonedFunction OnAbandoned) = 0;
- /// Should transfer from working memory to target memory, and release
- /// working memory.
- virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0;
+ /// Called to transfer working memory to the target and apply finalization.
+ virtual void finalize(OnFinalizedFunction OnFinalized) = 0;
- /// Calls finalizeAsync and waits for completion.
- Error finalize() {
- std::promise<MSVCPError> FinalizeResultP;
+ /// Synchronous convenience version of finalize.
+ Expected<FinalizedAlloc> finalize() {
+ std::promise<MSVCPExpected<FinalizedAlloc>> FinalizeResultP;
auto FinalizeResultF = FinalizeResultP.get_future();
- finalizeAsync(
- [&](Error Err) { FinalizeResultP.set_value(std::move(Err)); });
+ finalize([&](Expected<FinalizedAlloc> Result) {
+ FinalizeResultP.set_value(std::move(Result));
+ });
return FinalizeResultF.get();
}
-
- /// Should deallocate target memory.
- virtual Error deallocate() = 0;
};
+ /// Typedef for the argument to be passed to OnAllocatedFunction.
+ using AllocResult = Expected<std::unique_ptr<InFlightAlloc>>;
+
+ /// Called when allocation has been completed.
+ using OnAllocatedFunction = unique_function<void(AllocResult)>;
+
+ /// Called when deallocation has completed.
+ using OnDeallocatedFunction = unique_function<void(Error)>;
+
virtual ~JITLinkMemoryManager();
- /// Create an Allocation object.
+ /// Start the allocation process.
///
- /// The JD argument represents the target JITLinkDylib, and can be used by
- /// JITLinkMemoryManager implementers to manage per-dylib allocation pools
- /// (e.g. one pre-reserved address space slab per dylib to ensure that all
- /// allocations for the dylib are within a certain range). The JD argument
- /// may be null (representing an allocation not associated with any
- /// JITDylib.
+ /// If the initial allocation is successful then the OnAllocated function will
+ /// be called with a std::unique_ptr<InFlightAlloc> value. If the assocation
+ /// is unsuccessful then the OnAllocated function will be called with an
+ /// Error.
+ virtual void allocate(const JITLinkDylib *JD, LinkGraph &G,
+ OnAllocatedFunction OnAllocated) = 0;
+
+ /// Convenience function for blocking allocation.
+ AllocResult allocate(const JITLinkDylib *JD, LinkGraph &G) {
+ std::promise<MSVCPExpected<std::unique_ptr<InFlightAlloc>>> AllocResultP;
+ auto AllocResultF = AllocResultP.get_future();
+ allocate(JD, G, [&](AllocResult Alloc) {
+ AllocResultP.set_value(std::move(Alloc));
+ });
+ return AllocResultF.get();
+ }
+
+ /// Deallocate a list of allocation objects.
///
- /// The request argument describes the segment sizes and permisssions being
- /// requested.
- virtual Expected<std::unique_ptr<Allocation>>
- allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) = 0;
+ /// Dealloc actions will be run in reverse order (from the end of the vector
+ /// to the start).
+ virtual void deallocate(std::vector<FinalizedAlloc> Allocs,
+ OnDeallocatedFunction OnDeallocated) = 0;
+
+ /// Convenience function for deallocation of a single alloc.
+ void deallocate(FinalizedAlloc Alloc, OnDeallocatedFunction OnDeallocated) {
+ std::vector<FinalizedAlloc> Allocs;
+ Allocs.push_back(std::move(Alloc));
+ deallocate(std::move(Allocs), std::move(OnDeallocated));
+ }
+
+ /// Convenience function for blocking deallocation.
+ Error deallocate(std::vector<FinalizedAlloc> Allocs) {
+ std::promise<MSVCPError> DeallocResultP;
+ auto DeallocResultF = DeallocResultP.get_future();
+ deallocate(std::move(Allocs),
+ [&](Error Err) { DeallocResultP.set_value(std::move(Err)); });
+ return DeallocResultF.get();
+ }
+
+ /// Convenience function for blocking deallocation of a single alloc.
+ Error deallocate(FinalizedAlloc Alloc) {
+ std::vector<FinalizedAlloc> Allocs;
+ Allocs.push_back(std::move(Alloc));
+ return deallocate(std::move(Allocs));
+ }
+};
+
+/// BasicLayout simplifies the implementation of JITLinkMemoryManagers.
+///
+/// BasicLayout groups Sections into Segments based on their memory protection
+/// and deallocation policies. JITLinkMemoryManagers can construct a BasicLayout
+/// from a Graph, and then assign working memory and addresses to each of the
+/// Segments. These addreses will be mapped back onto the Graph blocks in
+/// the apply method.
+class BasicLayout {
+public:
+ /// The Alignment, ContentSize and ZeroFillSize of each segment will be
+ /// pre-filled from the Graph. Clients must set the Addr and WorkingMem fields
+ /// prior to calling apply.
+ //
+ // FIXME: The C++98 initializer is an attempt to work around compile failures
+ // due to http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1397.
+ // We should be able to switch this back to member initialization once that
+ // issue is fixed.
+ class Segment {
+ friend class BasicLayout;
+
+ public:
+ Segment()
+ : ContentSize(0), ZeroFillSize(0), Addr(0), WorkingMem(nullptr),
+ NextWorkingMemOffset(0) {}
+ Align Alignment;
+ size_t ContentSize;
+ uint64_t ZeroFillSize;
+ JITTargetAddress Addr;
+ char *WorkingMem = nullptr;
+
+ private:
+ size_t NextWorkingMemOffset;
+ std::vector<Block *> ContentBlocks, ZeroFillBlocks;
+ };
+
+ /// A convenience class that further groups segments based on memory
+ /// deallocation policy. This allows clients to make two slab allocations:
+ /// one for all standard segments, and one for all finalize segments.
+ struct ContiguousPageBasedLayoutSizes {
+ uint64_t StandardSegs = 0;
+ uint64_t FinalizeSegs = 0;
+
+ uint64_t total() const { return StandardSegs + FinalizeSegs; }
+ };
+
+private:
+ using SegmentMap = AllocGroupSmallMap<Segment>;
+
+public:
+ BasicLayout(LinkGraph &G);
+
+ /// Return a reference to the graph this allocation was created from.
+ LinkGraph &getGraph() { return G; }
+
+ /// Returns the total number of required to allocate all segments (with each
+ /// segment padded out to page size) for all standard segments, and all
+ /// finalize segments.
+ ///
+ /// This is a convenience function for the common case where the segments will
+ /// be allocated contiguously.
+ ///
+ /// This function will return an error if any segment has an alignment that
+ /// is higher than a page.
+ Expected<ContiguousPageBasedLayoutSizes>
+ getContiguousPageBasedLayoutSizes(uint64_t PageSize);
+
+ /// Returns an iterator over the segments of the layout.
+ iterator_range<SegmentMap::iterator> segments() {
+ return {Segments.begin(), Segments.end()};
+ }
+
+ /// Apply the layout to the graph.
+ Error apply();
+
+ /// Returns a reference to the AllocActions in the graph.
+ /// This convenience function saves callers from having to #include
+ /// LinkGraph.h if all they need are allocation actions.
+ JITLinkMemoryManager::AllocActions &graphAllocActions();
+
+private:
+ LinkGraph &G;
+ SegmentMap Segments;
+};
+
+/// A utility class for making simple allocations using JITLinkMemoryManager.
+///
+/// SimpleSegementAlloc takes a mapping of AllocGroups to Segments and uses
+/// this to create a LinkGraph with one Section (containing one Block) per
+/// Segment. Clients can obtain a pointer to the working memory and executor
+/// address of that block using the Segment's AllocGroup. Once memory has been
+/// populated, clients can call finalize to finalize the memory.
+class SimpleSegmentAlloc {
+public:
+ /// Describes a segment to be allocated.
+ struct Segment {
+ Segment() = default;
+ Segment(size_t ContentSize, Align ContentAlign)
+ : ContentSize(ContentSize), ContentAlign(ContentAlign) {}
+
+ size_t ContentSize = 0;
+ Align ContentAlign;
+ };
+
+ /// Describes the segment working memory and executor address.
+ struct SegmentInfo {
+ JITTargetAddress Addr = 0;
+ MutableArrayRef<char> WorkingMem;
+ };
+
+ using SegmentMap = AllocGroupSmallMap<Segment>;
+
+ using OnCreatedFunction = unique_function<void(Expected<SimpleSegmentAlloc>)>;
+
+ using OnFinalizedFunction =
+ JITLinkMemoryManager::InFlightAlloc::OnFinalizedFunction;
+
+ static void Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ SegmentMap Segments, OnCreatedFunction OnCreated);
+
+ static Expected<SimpleSegmentAlloc> Create(JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD,
+ SegmentMap Segments);
+
+ SimpleSegmentAlloc(SimpleSegmentAlloc &&);
+ SimpleSegmentAlloc &operator=(SimpleSegmentAlloc &&);
+ ~SimpleSegmentAlloc();
+
+ /// Returns the SegmentInfo for the given group.
+ SegmentInfo getSegInfo(AllocGroup AG);
+
+ /// Finalize all groups (async version).
+ void finalize(OnFinalizedFunction OnFinalized) {
+ Alloc->finalize(std::move(OnFinalized));
+ }
+
+ /// Finalize all groups.
+ Expected<JITLinkMemoryManager::FinalizedAlloc> finalize() {
+ return Alloc->finalize();
+ }
+
+private:
+ SimpleSegmentAlloc(
+ std::unique_ptr<LinkGraph> G, AllocGroupSmallMap<Block *> ContentBlocks,
+ std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc);
+
+ std::unique_ptr<LinkGraph> G;
+ AllocGroupSmallMap<Block *> ContentBlocks;
+ std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc;
};
/// A JITLinkMemoryManager that allocates in-process memory.
class InProcessMemoryManager : public JITLinkMemoryManager {
public:
- Expected<std::unique_ptr<Allocation>>
- allocate(const JITLinkDylib *JD, const SegmentsRequestMap &Request) override;
+ class IPInFlightAlloc;
+
+ /// Attempts to auto-detect the host page size.
+ static Expected<std::unique_ptr<InProcessMemoryManager>> Create();
+
+ /// Create an instance using the given page size.
+ InProcessMemoryManager(uint64_t PageSize) : PageSize(PageSize) {}
+
+ void allocate(const JITLinkDylib *JD, LinkGraph &G,
+ OnAllocatedFunction OnAllocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::allocate;
+
+ void deallocate(std::vector<FinalizedAlloc> Alloc,
+ OnDeallocatedFunction OnDeallocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::deallocate;
+
+private:
+ // FIXME: Use an in-place array instead of a vector for DeallocActions.
+ // There shouldn't need to be a heap alloc for this.
+ struct FinalizedAllocInfo {
+ sys::MemoryBlock StandardSegments;
+ std::vector<AllocActionCall> DeallocActions;
+ };
+
+ FinalizedAlloc
+ createFinalizedAlloc(sys::MemoryBlock StandardSegments,
+ std::vector<AllocActionCall> DeallocActions);
+
+ uint64_t PageSize;
+ std::mutex FinalizedAllocsMutex;
+ RecyclingAllocator<BumpPtrAllocator, FinalizedAllocInfo> FinalizedAllocInfos;
};
} // end namespace jitlink
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
index ecbc93e1467d..aee14c0d1fe5 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h
@@ -29,6 +29,8 @@ enum MachOARM64RelocationKind : Edge::Kind {
PageOffset12,
GOTPage21,
GOTPageOffset12,
+ TLVPage21,
+ TLVPageOffset12,
PointerToGOT,
PairedAddend,
LDRLiteral19,
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
new file mode 100644
index 000000000000..8fdce93ebc56
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
@@ -0,0 +1,225 @@
+//===-------- MemoryFlags.h - Memory allocation flags -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines types and operations related to memory protection and allocation
+// lifetimes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
+#define LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
+
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Describes Read/Write/Exec permissions for memory.
+enum class MemProt {
+ None = 0,
+ Read = 1U << 0,
+ Write = 1U << 1,
+ Exec = 1U << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue = */ Exec)
+};
+
+/// Print a MemProt as an RWX triple.
+raw_ostream &operator<<(raw_ostream &OS, MemProt MP);
+
+/// Convert a MemProt value to a corresponding sys::Memory::ProtectionFlags
+/// value.
+inline sys::Memory::ProtectionFlags toSysMemoryProtectionFlags(MemProt MP) {
+ std::underlying_type_t<sys::Memory::ProtectionFlags> PF = 0;
+ if ((MP & MemProt::Read) != MemProt::None)
+ PF |= sys::Memory::MF_READ;
+ if ((MP & MemProt::Write) != MemProt::None)
+ PF |= sys::Memory::MF_WRITE;
+ if ((MP & MemProt::Exec) != MemProt::None)
+ PF |= sys::Memory::MF_EXEC;
+ return static_cast<sys::Memory::ProtectionFlags>(PF);
+}
+
+/// Convert a sys::Memory::ProtectionFlags value to a corresponding MemProt
+/// value.
+inline MemProt fromSysMemoryProtectionFlags(sys::Memory::ProtectionFlags PF) {
+ MemProt MP = MemProt::None;
+ if (PF & sys::Memory::MF_READ)
+ MP |= MemProt::Read;
+ if (PF & sys::Memory::MF_WRITE)
+ MP |= MemProt::Write;
+ if (PF & sys::Memory::MF_EXEC)
+ MP |= MemProt::None;
+ return MP;
+}
+
+/// Describes a memory deallocation policy for memory to be allocated by a
+/// JITLinkMemoryManager.
+///
+/// All memory allocated by a call to JITLinkMemoryManager::allocate should be
+/// deallocated if a call is made to
+/// JITLinkMemoryManager::InFlightAllocation::abandon. The policies below apply
+/// to finalized allocations.
+enum class MemDeallocPolicy {
+ /// Standard memory should be deallocated when the deallocate method is called
+ /// for the finalized allocation.
+ Standard,
+
+ /// Finalize memory should be overwritten and then deallocated after all
+ /// finalization functions have been run.
+ Finalize
+};
+
+/// Print a MemDeallocPolicy.
+raw_ostream &operator<<(raw_ostream &OS, MemDeallocPolicy MDP);
+
+/// A pair of memory protections and allocation policies.
+///
+/// Optimized for use as a small map key.
+class AllocGroup {
+ friend struct llvm::DenseMapInfo<AllocGroup>;
+
+ using underlying_type = uint8_t;
+ static constexpr unsigned BitsForProt = 3;
+ static constexpr unsigned BitsForDeallocPolicy = 1;
+ static constexpr unsigned MaxIdentifiers =
+ 1U << (BitsForProt + BitsForDeallocPolicy);
+
+public:
+ static constexpr unsigned NumGroups = MaxIdentifiers;
+
+ /// Create a default AllocGroup. No memory protections, standard
+ /// deallocation policy.
+ AllocGroup() = default;
+
+ /// Create an AllocGroup from a MemProt only -- uses
+ /// MemoryDeallocationPolicy::Standard.
+ AllocGroup(MemProt MP) : Id(static_cast<underlying_type>(MP)) {}
+
+ /// Create an AllocGroup from a MemProt and a MemoryDeallocationPolicy.
+ AllocGroup(MemProt MP, MemDeallocPolicy MDP)
+ : Id(static_cast<underlying_type>(MP) |
+ (static_cast<underlying_type>(MDP) << BitsForProt)) {}
+
+ /// Returns the MemProt for this group.
+ MemProt getMemProt() const {
+ return static_cast<MemProt>(Id & ((1U << BitsForProt) - 1));
+ }
+
+ /// Returns the MemoryDeallocationPolicy for this group.
+ MemDeallocPolicy getMemDeallocPolicy() const {
+ return static_cast<MemDeallocPolicy>(Id >> BitsForProt);
+ }
+
+ friend bool operator==(const AllocGroup &LHS, const AllocGroup &RHS) {
+ return LHS.Id == RHS.Id;
+ }
+
+ friend bool operator!=(const AllocGroup &LHS, const AllocGroup &RHS) {
+ return !(LHS == RHS);
+ }
+
+ friend bool operator<(const AllocGroup &LHS, const AllocGroup &RHS) {
+ return LHS.Id < RHS.Id;
+ }
+
+private:
+ AllocGroup(underlying_type RawId) : Id(RawId) {}
+ underlying_type Id = 0;
+};
+
+/// A specialized small-map for AllocGroups.
+///
+/// Iteration order is guaranteed to match key ordering.
+template <typename T> class AllocGroupSmallMap {
+private:
+ using ElemT = std::pair<AllocGroup, T>;
+ using VectorTy = SmallVector<ElemT, 4>;
+
+ static bool compareKey(const ElemT &E, const AllocGroup &G) {
+ return E.first < G;
+ }
+
+public:
+ using iterator = typename VectorTy::iterator;
+
+ AllocGroupSmallMap() = default;
+ AllocGroupSmallMap(std::initializer_list<std::pair<AllocGroup, T>> Inits) {
+ Elems.reserve(Inits.size());
+ for (const auto &E : Inits)
+ Elems.push_back(E);
+ llvm::sort(Elems, [](const ElemT &LHS, const ElemT &RHS) {
+ return LHS.first < RHS.first;
+ });
+ }
+
+ iterator begin() { return Elems.begin(); }
+ iterator end() { return Elems.end(); }
+ iterator find(AllocGroup G) {
+ auto I = lower_bound(Elems, G, compareKey);
+ return (I->first == G) ? I : end();
+ }
+
+ bool empty() const { return Elems.empty(); }
+ size_t size() const { return Elems.size(); }
+
+ T &operator[](AllocGroup G) {
+ auto I = lower_bound(Elems, G, compareKey);
+ if (I == Elems.end() || I->first != G)
+ I = Elems.insert(I, std::make_pair(G, T()));
+ return I->second;
+ }
+
+private:
+ VectorTy Elems;
+};
+
+/// Print an AllocGroup.
+raw_ostream &operator<<(raw_ostream &OS, AllocGroup AG);
+
+} // end namespace jitlink
+
+template <> struct DenseMapInfo<jitlink::MemProt> {
+ static inline jitlink::MemProt getEmptyKey() {
+ return jitlink::MemProt(~uint8_t(0));
+ }
+ static inline jitlink::MemProt getTombstoneKey() {
+ return jitlink::MemProt(~uint8_t(0) - 1);
+ }
+ static unsigned getHashValue(const jitlink::MemProt &Val) {
+ using UT = std::underlying_type_t<jitlink::MemProt>;
+ return DenseMapInfo<UT>::getHashValue(static_cast<UT>(Val));
+ }
+ static bool isEqual(const jitlink::MemProt &LHS,
+ const jitlink::MemProt &RHS) {
+ return LHS == RHS;
+ }
+};
+
+template <> struct DenseMapInfo<jitlink::AllocGroup> {
+ static inline jitlink::AllocGroup getEmptyKey() {
+ return jitlink::AllocGroup(~uint8_t(0));
+ }
+ static inline jitlink::AllocGroup getTombstoneKey() {
+ return jitlink::AllocGroup(~uint8_t(0) - 1);
+ }
+ static unsigned getHashValue(const jitlink::AllocGroup &Val) {
+ return DenseMapInfo<jitlink::AllocGroup::underlying_type>::getHashValue(
+ Val.Id);
+ }
+ static bool isEqual(const jitlink::AllocGroup &LHS,
+ const jitlink::AllocGroup &RHS) {
+ return LHS == RHS;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_MEMORYFLAGS_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
new file mode 100644
index 000000000000..c20f62d515ec
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
@@ -0,0 +1,63 @@
+//===---------------------- TableManager.h ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Fix edge for edge that needs an entry to reference the target symbol
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H
+#define LLVM_EXECUTIONENGINE_JITLINK_TABLEMANAGER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// A CRTP base for tables that are built on demand, e.g. Global Offset Tables
+/// and Procedure Linkage Tables.
+/// The getEntyrForTarget function returns the table entry corresponding to the
+/// given target, calling down to the implementation class to build an entry if
+/// one does not already exist.
+template <typename TableManagerImplT> class TableManager {
+public:
+ /// Return the constructed entry
+ ///
+ /// Use parameter G to construct the entry for target symbol
+ Symbol &getEntryForTarget(LinkGraph &G, Symbol &Target) {
+ assert(Target.hasName() && "Edge cannot point to anonymous target");
+
+ auto EntryI = Entries.find(Target.getName());
+
+ // Build the entry if it doesn't exist.
+ if (EntryI == Entries.end()) {
+ auto &Entry = impl().createEntry(G, Target);
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Created" << impl().getSectionName() << "entry for "
+ << Target.getName() << ": " << Entry << "\n";
+ });
+ EntryI = Entries.insert(std::make_pair(Target.getName(), &Entry)).first;
+ }
+
+ assert(EntryI != Entries.end() && "Could not get entry symbol");
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Using " << impl().getSectionName() << " entry "
+ << *EntryI->second << "\n";
+ });
+ return *EntryI->second;
+ }
+
+private:
+ TableManagerImplT &impl() { return static_cast<TableManagerImplT &>(*this); }
+ DenseMap<StringRef, Symbol *> Entries;
+};
+
+} // namespace jitlink
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
new file mode 100644
index 000000000000..994ce783b058
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
@@ -0,0 +1,38 @@
+//=== aarch64.h - Generic JITLink aarch64 edge kinds, utilities -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing aarch64 objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm {
+namespace jitlink {
+namespace aarch64 {
+
+/// Represets aarch64 fixups
+enum EdgeKind_aarch64 : Edge::Kind {
+
+ /// Set a CALL immediate field to bits [27:2] of X = Target - Fixup + Addend
+ R_AARCH64_CALL26 = Edge::FirstRelocation,
+
+};
+
+/// Returns a string name for the given aarch64 edge. For debugging purposes
+/// only
+const char *getEdgeKindName(Edge::Kind K);
+
+} // namespace aarch64
+} // namespace jitlink
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index a4509f3888a4..b8d08d88c1c9 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -70,7 +70,19 @@ enum EdgeKind_riscv : Edge::Kind {
///
/// Fixup expression:
/// Fixup <- (Target - Fixup + Addend)
- R_RISCV_CALL
+ R_RISCV_CALL,
+
+ /// PC relative GOT offset
+ ///
+ /// Fixup expression:
+ /// Fixup <- (GOT - Fixup + Addend) >> 12
+ R_RISCV_GOT_HI20,
+
+ /// PC relative call by PLT
+ ///
+ /// Fixup expression:
+ /// Fixup <- (Target - Fixup + Addend)
+ R_RISCV_CALL_PLT
};
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
index 006d983537e9..3130ea381534 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
@@ -14,6 +14,7 @@
#define LLVM_EXECUTIONENGINE_JITLINK_X86_64_H
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/TableManager.h"
#include <limits>
@@ -42,6 +43,16 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
Pointer32,
+ /// A signed 32-bit pointer value relocation
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target + Addend : int32
+ ///
+ /// Errors:
+ /// - The target must reside in the signed 32-bits([-2**31, 2**32 - 1]) of
+ /// the address space, otherwise an out-of-range error will be returned.
+ Pointer32Signed,
+
/// A 64-bit delta.
///
/// Delta from the fixup to the target.
@@ -85,6 +96,18 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// an out-of-range error will be returned.
NegDelta32,
+ /// A 64-bit GOT delta.
+ ///
+ /// Delta from the global offset table to the target
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target - GOTSymbol + Addend : int64
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to a null pointer GOTSymbol, which the GOT section
+ /// symbol was not been defined.
+ Delta64FromGOT,
+
/// A 32-bit PC-relative branch.
///
/// Represents a PC-relative call or branch to a target. This can be used to
@@ -120,7 +143,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// This edge kind has the same fixup expression as BranchPCRel32, but further
/// identifies the call/branch as being to a pointer jump stub. For edges of
/// this kind the jump stub should not be bypassed (use
- /// BranchPCRel32ToPtrJumpStubRelaxable for that), but the pointer location
+ /// BranchPCRel32ToPtrJumpStubBypassable for that), but the pointer location
/// target may be recorded to allow manipulation at runtime.
///
/// Fixup expression:
@@ -136,7 +159,8 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
/// The edge kind has the same fixup expression as BranchPCRel32ToPtrJumpStub,
/// but identifies the call/branch as being to a pointer jump stub that may be
- /// bypassed if the ultimate target is within range of the fixup location.
+ /// bypassed with a direct jump to the ultimate target if the ultimate target
+ /// is within range of the fixup location.
///
/// Fixup expression:
/// Fixup <- Target - Fixup + Addend - 4: int32
@@ -145,7 +169,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - The result of the fixup expression must fit into an int32, otherwise
/// an out-of-range error will be returned.
///
- BranchPCRel32ToPtrJumpStubRelaxable,
+ BranchPCRel32ToPtrJumpStubBypassable,
/// A GOT entry getter/constructor, transformed to Delta32 pointing at the GOT
/// entry for the original target.
@@ -167,7 +191,62 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
RequestGOTAndTransformToDelta32,
- /// A PC-relative reference to a GOT entry, relaxable if GOT entry target
+ /// A GOT entry getter/constructor, transformed to Delta64 pointing at the GOT
+ /// entry for the original target.
+ ///
+ /// Indicates that this edge should be transformed into a Delta64 targeting
+ /// the GOT entry for the edge's current target, maintaining the same addend.
+ /// A GOT entry for the target should be created if one does not already
+ /// exist.
+ ///
+ /// Edges of this kind are usually handled by a GOT builder pass inserted by
+ /// default.
+ ///
+ /// Fixup expression:
+ /// NONE
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+ /// phase will result in an assert/unreachable during the fixup phase.
+ ///
+ RequestGOTAndTransformToDelta64,
+
+ /// A GOT entry offset within GOT getter/constructor, transformed to
+ /// Delta64FromGOT
+ /// pointing at the GOT entry for the original target
+ ///
+ /// Indicates that this edge should be transformed into a Delta64FromGOT
+ /// targeting
+ /// the GOT entry for the edge's current target, maintaining the same addend.
+ /// A GOT entry for the target should be created if one does not already
+ /// exist.
+ ///
+ /// Edges of this kind are usually handled by a GOT builder pass inserted by
+ /// default
+ ///
+ /// Fixup expression:
+ /// NONE
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+ /// phase will result in an assert/unreachable during the fixup phase
+ RequestGOTAndTransformToDelta64FromGOT,
+
+ /// A PC-relative load of a GOT entry, relaxable if GOT entry target is
+ /// in-range of the fixup
+ ///
+ /// TODO: Explain the optimization
+ ///
+ /// Fixup expression
+ /// Fixup <- Target - (Fixup + 4) + Addend : int32
+ ///
+ /// Errors:
+ /// - The result of the fixup expression must fit into an int32, otherwise
+ /// an out-of-range error will be returned.
+ //
+ PCRel32GOTLoadRelaxable,
+
+ /// A PC-relative REX load of a GOT entry, relaxable if GOT entry target
/// is in-range of the fixup.
///
/// If the GOT entry target is in-range of the fixup then the load from the
@@ -180,17 +259,39 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - The result of the fixup expression must fit into an int32, otherwise
/// an out-of-range error will be returned.
///
- PCRel32GOTLoadRelaxable,
+ PCRel32GOTLoadREXRelaxable,
- /// A GOT entry getter/constructor, transformed to PCRel32ToGOTLoadRelaxable
- /// pointing at the GOT entry for the original target.
+ /// A GOT entry getter/constructor, transformed to
+ /// PCRel32ToGOTLoadREXRelaxable pointing at the GOT entry for the original
+ /// target.
///
- /// Indicates that this edge should be transformed into a
- /// PC32ToGOTLoadRelaxable targeting the GOT entry for the edge's current
- /// target, maintaining the same addend. A GOT entry for the target should be
- /// created if one does not already exist.
+ /// Indicates that this edge should be lowered to a PC32ToGOTLoadREXRelaxable
+ /// targeting the GOT entry for the edge's current target, maintaining the
+ /// same addend. A GOT entry for the target should be created if one does not
+ /// already exist.
///
- /// Edges of this kind are usually handled by a GOT builder pass inserted by
+ /// Edges of this kind are usually lowered by a GOT builder pass inserted by
+ /// default.
+ ///
+ /// Fixup expression:
+ /// NONE
+ ///
+ /// Errors:
+ /// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
+ /// phase will result in an assert/unreachable during the fixup phase.
+ ///
+ RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable,
+
+ /// A GOT entry getter/constructor, transformed to
+ /// PCRel32ToGOTLoadRelaxable pointing at the GOT entry for the original
+ /// target.
+ ///
+ /// Indicates that this edge should be lowered to a PC32ToGOTLoadRelaxable
+ /// targeting the GOT entry for the edge's current target, maintaining the
+ /// same addend. A GOT entry for the target should be created if one does not
+ /// already exist.
+ ///
+ /// Edges of this kind are usually lowered by a GOT builder pass inserted by
/// default.
///
/// Fixup expression:
@@ -202,10 +303,10 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
RequestGOTAndTransformToPCRel32GOTLoadRelaxable,
- /// A PC-relative reference to a Thread Local Variable Pointer (TLVP) entry,
+ /// A PC-relative REX load of a Thread Local Variable Pointer (TLVP) entry,
/// relaxable if the TLVP entry target is in-range of the fixup.
///
- /// If the TLVP entry target is in-range of the fixup then the load frmo the
+ /// If the TLVP entry target is in-range of the fixup then the load from the
/// TLVP may be replaced with a direct memory address calculation.
///
/// The target of this edge must be a thread local variable entry of the form
@@ -222,15 +323,18 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - The target must be either external, or a TLV entry of the required
/// form, otherwise a malformed TLV entry error will be returned.
///
- PCRel32TLVPLoadRelaxable,
+ PCRel32TLVPLoadREXRelaxable,
+
+ /// TODO: Explain the generic edge kind
+ RequestTLSDescInGOTAndTransformToDelta32,
/// A TLVP entry getter/constructor, transformed to
- /// Delta32ToTLVPLoadRelaxable.
+ /// Delta32ToTLVPLoadREXRelaxable.
///
/// Indicates that this edge should be transformed into a
- /// Delta32ToTLVPLoadRelaxable targeting the TLVP entry for the edge's current
- /// target. A TLVP entry for the target should be created if one does not
- /// already exist.
+ /// Delta32ToTLVPLoadREXRelaxable targeting the TLVP entry for the edge's
+ /// current target. A TLVP entry for the target should be created if one does
+ /// not already exist.
///
/// Fixup expression:
/// NONE
@@ -239,7 +343,7 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
/// phase will result in an assert/unreachable during the fixup phase.
///
- RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable
+ RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable
};
/// Returns a string name for the given x86-64 edge. For debugging purposes
@@ -258,7 +362,8 @@ inline bool isInRangeForImmS32(int64_t Value) {
}
/// Apply fixup expression for edge to block content.
-inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
+inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
+ const Symbol *GOTSymbol) {
using namespace support;
char *BlockWorkingMem = B.getAlreadyMutableContent().data();
@@ -281,12 +386,21 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
return makeTargetOutOfRangeError(G, B, E);
break;
}
+ case Pointer32Signed: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend();
+ if (LLVM_LIKELY(isInRangeForImmS32(Value)))
+ *(little32_t *)FixupPtr = Value;
+ else
+ return makeTargetOutOfRangeError(G, B, E);
+ break;
+ }
case BranchPCRel32:
case BranchPCRel32ToPtrJumpStub:
- case BranchPCRel32ToPtrJumpStubRelaxable:
+ case BranchPCRel32ToPtrJumpStubBypassable:
case PCRel32GOTLoadRelaxable:
- case PCRel32TLVPLoadRelaxable: {
+ case PCRel32GOTLoadREXRelaxable:
+ case PCRel32TLVPLoadREXRelaxable: {
int64_t Value =
E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
if (LLVM_LIKELY(isInRangeForImmS32(Value)))
@@ -325,6 +439,13 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
return makeTargetOutOfRangeError(G, B, E);
break;
}
+ case Delta64FromGOT: {
+ assert(GOTSymbol && "No GOT section symbol");
+ int64_t Value =
+ E.getTarget().getAddress() - GOTSymbol->getAddress() + E.getAddend();
+ *(little64_t *)FixupPtr = Value;
+ break;
+ }
default: {
// If you hit this you should check that *constructor and other non-fixup
@@ -395,6 +516,114 @@ inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G,
false);
}
+/// Global Offset Table Builder.
+class GOTTableManager : public TableManager<GOTTableManager> {
+public:
+ static StringRef getSectionName() { return "$__GOT"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ Edge::Kind KindToSet = Edge::Invalid;
+ switch (E.getKind()) {
+ case x86_64::Delta64FromGOT: {
+ // we need to make sure that the GOT section exists, but don't otherwise
+ // need to fix up this edge
+ getGOTSection(G);
+ return false;
+ }
+ case x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable:
+ KindToSet = x86_64::PCRel32GOTLoadREXRelaxable;
+ break;
+ case x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable:
+ KindToSet = x86_64::PCRel32GOTLoadRelaxable;
+ break;
+ case x86_64::RequestGOTAndTransformToDelta64:
+ KindToSet = x86_64::Delta64;
+ break;
+ case x86_64::RequestGOTAndTransformToDelta64FromGOT:
+ KindToSet = x86_64::Delta64FromGOT;
+ break;
+ case x86_64::RequestGOTAndTransformToDelta32:
+ KindToSet = x86_64::Delta32;
+ break;
+ default:
+ return false;
+ }
+ assert(KindToSet != Edge::Invalid &&
+ "Fell through switch, but no new kind to set");
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+ << formatv("{0:x}", B->getAddress()) << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setKind(KindToSet);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ return createAnonymousPointer(G, getGOTSection(G), &Target);
+ }
+
+private:
+ Section &getGOTSection(LinkGraph &G) {
+ if (!GOTSection)
+ GOTSection = &G.createSection(getSectionName(), MemProt::Read);
+ return *GOTSection;
+ }
+
+ Section *GOTSection = nullptr;
+};
+
+/// Procedure Linkage Table Builder.
+class PLTTableManager : public TableManager<PLTTableManager> {
+public:
+ PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {}
+
+ static StringRef getSectionName() { return "$__STUBS"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ if (E.getKind() == x86_64::BranchPCRel32 && !E.getTarget().isDefined()) {
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+ << formatv("{0:x}", B->getAddress()) << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to
+ // be optimized when the target is in-range.
+ E.setKind(x86_64::BranchPCRel32ToPtrJumpStubBypassable);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+ return false;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ return createAnonymousPointerJumpStub(G, getStubsSection(G),
+ GOT.getEntryForTarget(G, Target));
+ }
+
+public:
+ Section &getStubsSection(LinkGraph &G) {
+ if (!PLTSection)
+ PLTSection =
+ &G.createSection(getSectionName(), MemProt::Read | MemProt::Exec);
+ return *PLTSection;
+ }
+
+ GOTTableManager &GOT;
+ Section *PLTSection = nullptr;
+};
+
+/// Optimize the GOT and Stub relocations if the edge target address is in range
+/// 1. PCRel32GOTLoadRelaxable. For this edge kind, if the target is in range,
+/// then replace GOT load with lea
+/// 2. BranchPCRel32ToPtrJumpStubRelaxable. For this edge kind, if the target is
+/// in range, replace a indirect jump by plt stub with a direct jump to the
+/// target
+Error optimizeGOTAndStubAccesses(LinkGraph &G);
+
} // namespace x86_64
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/MCJIT.h b/llvm/include/llvm/ExecutionEngine/MCJIT.h
index 8253bf98963b..adce98f380c5 100644
--- a/llvm/include/llvm/ExecutionEngine/MCJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/MCJIT.h
@@ -26,6 +26,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index e832d8d57dfa..5cac65b49a05 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -21,7 +21,7 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
-#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ExtensibleRTTI.h"
@@ -434,13 +434,16 @@ class SymbolsNotFound : public ErrorInfo<SymbolsNotFound> {
public:
static char ID;
- SymbolsNotFound(SymbolNameSet Symbols);
- SymbolsNotFound(SymbolNameVector Symbols);
+ SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP, SymbolNameSet Symbols);
+ SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameVector Symbols);
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const SymbolNameVector &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
SymbolNameVector Symbols;
};
@@ -449,12 +452,15 @@ class SymbolsCouldNotBeRemoved : public ErrorInfo<SymbolsCouldNotBeRemoved> {
public:
static char ID;
- SymbolsCouldNotBeRemoved(SymbolNameSet Symbols);
+ SymbolsCouldNotBeRemoved(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameSet Symbols);
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const SymbolNameSet &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
SymbolNameSet Symbols;
};
@@ -466,13 +472,17 @@ class MissingSymbolDefinitions : public ErrorInfo<MissingSymbolDefinitions> {
public:
static char ID;
- MissingSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols)
- : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {}
+ MissingSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP,
+ std::string ModuleName, SymbolNameVector Symbols)
+ : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)),
+ Symbols(std::move(Symbols)) {}
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const std::string &getModuleName() const { return ModuleName; }
const SymbolNameVector &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
std::string ModuleName;
SymbolNameVector Symbols;
};
@@ -485,13 +495,17 @@ class UnexpectedSymbolDefinitions : public ErrorInfo<UnexpectedSymbolDefinitions
public:
static char ID;
- UnexpectedSymbolDefinitions(std::string ModuleName, SymbolNameVector Symbols)
- : ModuleName(std::move(ModuleName)), Symbols(std::move(Symbols)) {}
+ UnexpectedSymbolDefinitions(std::shared_ptr<SymbolStringPool> SSP,
+ std::string ModuleName, SymbolNameVector Symbols)
+ : SSP(std::move(SSP)), ModuleName(std::move(ModuleName)),
+ Symbols(std::move(Symbols)) {}
std::error_code convertToErrorCode() const override;
void log(raw_ostream &OS) const override;
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() { return SSP; }
const std::string &getModuleName() const { return ModuleName; }
const SymbolNameVector &getSymbols() const { return Symbols; }
private:
+ std::shared_ptr<SymbolStringPool> SSP;
std::string ModuleName;
SymbolNameVector Symbols;
};
@@ -1241,21 +1255,6 @@ public:
const DenseMap<JITDylib *, SymbolLookupSet> &InitSyms);
};
-/// Represents an abstract task for ORC to run.
-class Task : public RTTIExtends<Task, RTTIRoot> {
-public:
- static char ID;
-
- /// Description of the task to be performed. Used for logging.
- virtual void printDescription(raw_ostream &OS) = 0;
-
- /// Run the task.
- virtual void run() = 0;
-
-private:
- void anchor() override;
-};
-
/// A materialization task.
class MaterializationTask : public RTTIExtends<MaterializationTask, Task> {
public:
@@ -1285,13 +1284,16 @@ public:
/// For reporting errors.
using ErrorReporter = std::function<void(Error)>;
+ /// Send a result to the remote.
+ using SendResultFunction = unique_function<void(shared::WrapperFunctionResult)>;
+
/// For dispatching ORC tasks (typically materialization tasks).
using DispatchTaskFunction = unique_function<void(std::unique_ptr<Task> T)>;
/// An asynchronous wrapper-function callable from the executor via
/// jit-dispatch.
using JITDispatchHandlerFunction = unique_function<void(
- ExecutorProcessControl::SendResultFunction SendResult,
+ SendResultFunction SendResult,
const char *ArgData, size_t ArgSize)>;
/// A map associating tag names with asynchronous wrapper function
@@ -1303,13 +1305,19 @@ public:
/// object.
ExecutionSession(std::unique_ptr<ExecutorProcessControl> EPC);
- /// End the session. Closes all JITDylibs.
+ /// End the session. Closes all JITDylibs and disconnects from the
+ /// executor.
Error endSession();
/// Get the ExecutorProcessControl object associated with this
/// ExecutionSession.
ExecutorProcessControl &getExecutorProcessControl() { return *EPC; }
+ /// Get the SymbolStringPool for this instance.
+ std::shared_ptr<SymbolStringPool> getSymbolStringPool() {
+ return EPC->getSymbolStringPool();
+ }
+
/// Add a symbol name to the SymbolStringPool and return a pointer to it.
SymbolStringPtr intern(StringRef SymName) { return EPC->intern(SymName); }
@@ -1462,10 +1470,9 @@ public:
/// \endcode{.cpp}
///
/// The given OnComplete function will be called to return the result.
- void callWrapperAsync(ExecutorProcessControl::SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
- ArrayRef<char> ArgBuffer) {
- EPC->callWrapperAsync(std::move(OnComplete), WrapperFnAddr, ArgBuffer);
+ template <typename... ArgTs>
+ void callWrapperAsync(ArgTs &&... Args) {
+ EPC->callWrapperAsync(std::forward<ArgTs>(Args)...);
}
/// Run a wrapper function in the executor. The wrapper function should be
@@ -1474,30 +1481,18 @@ public:
/// \code{.cpp}
/// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
/// \endcode{.cpp}
- shared::WrapperFunctionResult callWrapper(JITTargetAddress WrapperFnAddr,
+ shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr,
ArrayRef<char> ArgBuffer) {
- std::promise<shared::WrapperFunctionResult> RP;
- auto RF = RP.get_future();
- callWrapperAsync(
- [&](shared::WrapperFunctionResult R) { RP.set_value(std::move(R)); },
- WrapperFnAddr, ArgBuffer);
- return RF.get();
+ return EPC->callWrapper(WrapperFnAddr, ArgBuffer);
}
/// Run a wrapper function using SPS to serialize the arguments and
/// deserialize the results.
template <typename SPSSignature, typename SendResultT, typename... ArgTs>
- void callSPSWrapperAsync(SendResultT &&SendResult,
- JITTargetAddress WrapperFnAddr,
+ void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult,
const ArgTs &...Args) {
- shared::WrapperFunction<SPSSignature>::callAsync(
- [this,
- WrapperFnAddr](ExecutorProcessControl::SendResultFunction SendResult,
- const char *ArgData, size_t ArgSize) {
- callWrapperAsync(std::move(SendResult), WrapperFnAddr,
- ArrayRef<char>(ArgData, ArgSize));
- },
- std::move(SendResult), Args...);
+ EPC->callSPSWrapperAsync<SPSSignature, SendResultT, ArgTs...>(
+ WrapperFnAddr, std::forward<SendResultT>(SendResult), Args...);
}
/// Run a wrapper function using SPS to serialize the arguments and
@@ -1506,13 +1501,10 @@ public:
/// If SPSSignature is a non-void function signature then the second argument
/// (the first in the Args list) should be a reference to a return value.
template <typename SPSSignature, typename... WrapperCallArgTs>
- Error callSPSWrapper(JITTargetAddress WrapperFnAddr,
+ Error callSPSWrapper(ExecutorAddr WrapperFnAddr,
WrapperCallArgTs &&...WrapperCallArgs) {
- return shared::WrapperFunction<SPSSignature>::call(
- [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) {
- return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize));
- },
- std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
+ return EPC->callSPSWrapper<SPSSignature, WrapperCallArgTs...>(
+ WrapperFnAddr, std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
}
/// Wrap a handler that takes concrete argument types (and a sender for a
@@ -1525,7 +1517,7 @@ public:
template <typename SPSSignature, typename HandlerT>
static JITDispatchHandlerFunction wrapAsyncWithSPS(HandlerT &&H) {
return [H = std::forward<HandlerT>(H)](
- ExecutorProcessControl::SendResultFunction SendResult,
+ SendResultFunction SendResult,
const char *ArgData, size_t ArgSize) mutable {
shared::WrapperFunction<SPSSignature>::handleAsync(ArgData, ArgSize, H,
std::move(SendResult));
@@ -1564,7 +1556,7 @@ public:
/// This should be called by the ExecutorProcessControl instance in response
/// to incoming jit-dispatch requests from the executor.
void
- runJITDispatchHandler(ExecutorProcessControl::SendResultFunction SendResult,
+ runJITDispatchHandler(SendResultFunction SendResult,
JITTargetAddress HandlerFnTagAddr,
ArrayRef<char> ArgBuffer);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
new file mode 100644
index 000000000000..af092b3287d3
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h
@@ -0,0 +1,64 @@
+//===--- DebugerSupportPlugin.h -- Utils for debugger support ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generates debug objects and registers them using the jit-loader-gdb protocol.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
+#define LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+
+namespace llvm {
+namespace orc {
+
+/// For each object containing debug info, installs JITLink passes to synthesize
+/// a debug object and then register it via the GDB JIT-registration interface.
+///
+/// Currently MachO only. For ELF use DebugObjectManagerPlugin. These two
+/// plugins will be merged in the near future.
+class GDBJITDebugInfoRegistrationPlugin : public ObjectLinkingLayer::Plugin {
+public:
+ class DebugSectionSynthesizer {
+ public:
+ virtual ~DebugSectionSynthesizer() {}
+ virtual Error startSynthesis() = 0;
+ virtual Error completeSynthesisAndRegister() = 0;
+ };
+
+ static Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>>
+ Create(ExecutionSession &ES, JITDylib &ProcessJD, const Triple &TT);
+
+ GDBJITDebugInfoRegistrationPlugin(ExecutorAddr RegisterActionAddr)
+ : RegisterActionAddr(RegisterActionAddr) {}
+
+ Error notifyFailed(MaterializationResponsibility &MR) override;
+ Error notifyRemovingResources(ResourceKey K) override;
+
+ void notifyTransferringResources(ResourceKey DstKey,
+ ResourceKey SrcKey) override;
+
+ void modifyPassConfig(MaterializationResponsibility &MR,
+ jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &PassConfig) override;
+
+private:
+ void modifyPassConfigForMachO(MaterializationResponsibility &MR,
+ jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &PassConfig);
+
+ ExecutorAddr RegisterActionAddr;
+};
+
+} // namespace orc
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_DEBUGGERSUPPORT_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
new file mode 100644
index 000000000000..20da3e3b89eb
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
@@ -0,0 +1,330 @@
+//===-- ELFNixPlatform.h -- Utilities for executing ELF in Orc --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Linux/BSD support for executing JIT'd ELF in Orc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
+#define LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+#include <future>
+#include <thread>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+struct ELFPerObjectSectionsToRegister {
+ ExecutorAddrRange EHFrameSection;
+ ExecutorAddrRange ThreadDataSection;
+};
+
+struct ELFNixJITDylibInitializers {
+ using SectionList = std::vector<ExecutorAddrRange>;
+
+ ELFNixJITDylibInitializers(std::string Name, ExecutorAddr DSOHandleAddress)
+ : Name(std::move(Name)), DSOHandleAddress(std::move(DSOHandleAddress)) {}
+
+ std::string Name;
+ ExecutorAddr DSOHandleAddress;
+
+ StringMap<SectionList> InitSections;
+};
+
+class ELFNixJITDylibDeinitializers {};
+
+using ELFNixJITDylibInitializerSequence =
+ std::vector<ELFNixJITDylibInitializers>;
+
+using ELFNixJITDylibDeinitializerSequence =
+ std::vector<ELFNixJITDylibDeinitializers>;
+
+/// Mediates between ELFNix initialization and ExecutionSession state.
+class ELFNixPlatform : public Platform {
+public:
+ /// Try to create a ELFNixPlatform instance, adding the ORC runtime to the
+ /// given JITDylib.
+ ///
+ /// The ORC runtime requires access to a number of symbols in
+ /// libc++. It is up to the caller to ensure that the requried
+ /// symbols can be referenced by code added to PlatformJD. The
+ /// standard way to achieve this is to first attach dynamic library
+ /// search generators for either the given process, or for the
+ /// specific required libraries, to PlatformJD, then to create the
+ /// platform instance:
+ ///
+ /// \code{.cpp}
+ /// auto &PlatformJD = ES.createBareJITDylib("stdlib");
+ /// PlatformJD.addGenerator(
+ /// ExitOnErr(EPCDynamicLibrarySearchGenerator
+ /// ::GetForTargetProcess(EPC)));
+ /// ES.setPlatform(
+ /// ExitOnErr(ELFNixPlatform::Create(ES, ObjLayer, EPC, PlatformJD,
+ /// "/path/to/orc/runtime")));
+ /// \endcode
+ ///
+ /// Alternatively, these symbols could be added to another JITDylib that
+ /// PlatformJD links against.
+ ///
+ /// Clients are also responsible for ensuring that any JIT'd code that
+ /// depends on runtime functions (including any code using TLV or static
+ /// destructors) can reference the runtime symbols. This is usually achieved
+ /// by linking any JITDylibs containing regular code against
+ /// PlatformJD.
+ ///
+ /// By default, ELFNixPlatform will add the set of aliases returned by the
+ /// standardPlatformAliases function. This includes both required aliases
+ /// (e.g. __cxa_atexit -> __orc_rt_elf_cxa_atexit for static destructor
+ /// support), and optional aliases that provide JIT versions of common
+ /// functions (e.g. dlopen -> __orc_rt_elf_jit_dlopen). Clients can
+ /// override these defaults by passing a non-None value for the
+ /// RuntimeAliases function, in which case the client is responsible for
+ /// setting up all aliases (including the required ones).
+ static Expected<std::unique_ptr<ELFNixPlatform>>
+ Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, const char *OrcRuntimePath,
+ Optional<SymbolAliasMap> RuntimeAliases = None);
+
+ ExecutionSession &getExecutionSession() const { return ES; }
+ ObjectLinkingLayer &getObjectLinkingLayer() const { return ObjLinkingLayer; }
+
+ Error setupJITDylib(JITDylib &JD) override;
+ Error notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) override;
+ Error notifyRemoving(ResourceTracker &RT) override;
+
+ /// Returns an AliasMap containing the default aliases for the ELFNixPlatform.
+ /// This can be modified by clients when constructing the platform to add
+ /// or remove aliases.
+ static SymbolAliasMap standardPlatformAliases(ExecutionSession &ES);
+
+ /// Returns the array of required CXX aliases.
+ static ArrayRef<std::pair<const char *, const char *>> requiredCXXAliases();
+
+ /// Returns the array of standard runtime utility aliases for ELF.
+ static ArrayRef<std::pair<const char *, const char *>>
+ standardRuntimeUtilityAliases();
+
+ /// Returns true if the given section name is an initializer section.
+ static bool isInitializerSection(StringRef SecName);
+
+private:
+ // The ELFNixPlatformPlugin scans/modifies LinkGraphs to support ELF
+ // platform features including initializers, exceptions, TLV, and language
+ // runtime registration.
+ class ELFNixPlatformPlugin : public ObjectLinkingLayer::Plugin {
+ public:
+ ELFNixPlatformPlugin(ELFNixPlatform &MP) : MP(MP) {}
+
+ void modifyPassConfig(MaterializationResponsibility &MR,
+ jitlink::LinkGraph &G,
+ jitlink::PassConfiguration &Config) override;
+
+ SyntheticSymbolDependenciesMap
+ getSyntheticSymbolDependencies(MaterializationResponsibility &MR) override;
+
+ // FIXME: We should be tentatively tracking scraped sections and discarding
+ // if the MR fails.
+ Error notifyFailed(MaterializationResponsibility &MR) override {
+ return Error::success();
+ }
+
+ Error notifyRemovingResources(ResourceKey K) override {
+ return Error::success();
+ }
+
+ void notifyTransferringResources(ResourceKey DstKey,
+ ResourceKey SrcKey) override {}
+
+ private:
+ using InitSymbolDepMap =
+ DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>;
+
+ void addInitializerSupportPasses(MaterializationResponsibility &MR,
+ jitlink::PassConfiguration &Config);
+
+ void addDSOHandleSupportPasses(MaterializationResponsibility &MR,
+ jitlink::PassConfiguration &Config);
+
+ void addEHAndTLVSupportPasses(MaterializationResponsibility &MR,
+ jitlink::PassConfiguration &Config);
+
+ Error preserveInitSections(jitlink::LinkGraph &G,
+ MaterializationResponsibility &MR);
+
+ Error registerInitSections(jitlink::LinkGraph &G, JITDylib &JD);
+
+ Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
+
+ std::mutex PluginMutex;
+ ELFNixPlatform &MP;
+ InitSymbolDepMap InitSymbolDeps;
+ };
+
+ using SendInitializerSequenceFn =
+ unique_function<void(Expected<ELFNixJITDylibInitializerSequence>)>;
+
+ using SendDeinitializerSequenceFn =
+ unique_function<void(Expected<ELFNixJITDylibDeinitializerSequence>)>;
+
+ using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>;
+
+ static bool supportedTarget(const Triple &TT);
+
+ ELFNixPlatform(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD,
+ std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator,
+ Error &Err);
+
+ // Associate ELFNixPlatform JIT-side runtime support functions with handlers.
+ Error associateRuntimeSupportFunctions(JITDylib &PlatformJD);
+
+ void getInitializersBuildSequencePhase(SendInitializerSequenceFn SendResult,
+ JITDylib &JD,
+ std::vector<JITDylibSP> DFSLinkOrder);
+
+ void getInitializersLookupPhase(SendInitializerSequenceFn SendResult,
+ JITDylib &JD);
+
+ void rt_getInitializers(SendInitializerSequenceFn SendResult,
+ StringRef JDName);
+
+ void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
+ ExecutorAddr Handle);
+
+ void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle,
+ StringRef SymbolName);
+
+ // Records the addresses of runtime symbols used by the platform.
+ Error bootstrapELFNixRuntime(JITDylib &PlatformJD);
+
+ Error registerInitInfo(JITDylib &JD,
+ ArrayRef<jitlink::Section *> InitSections);
+
+ Error registerPerObjectSections(const ELFPerObjectSectionsToRegister &POSR);
+
+ Expected<uint64_t> createPThreadKey();
+
+ ExecutionSession &ES;
+ ObjectLinkingLayer &ObjLinkingLayer;
+
+ SymbolStringPtr DSOHandleSymbol;
+ std::atomic<bool> RuntimeBootstrapped{false};
+
+ ExecutorAddr orc_rt_elfnix_platform_bootstrap;
+ ExecutorAddr orc_rt_elfnix_platform_shutdown;
+ ExecutorAddr orc_rt_elfnix_register_object_sections;
+ ExecutorAddr orc_rt_elfnix_create_pthread_key;
+
+ DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
+
+ // InitSeqs gets its own mutex to avoid locking the whole session when
+ // aggregating data from the jitlink.
+ std::mutex PlatformMutex;
+ DenseMap<JITDylib *, ELFNixJITDylibInitializers> InitSeqs;
+ std::vector<ELFPerObjectSectionsToRegister> BootstrapPOSRs;
+
+ DenseMap<JITTargetAddress, JITDylib *> HandleAddrToJITDylib;
+ DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
+};
+
+namespace shared {
+
+using SPSELFPerObjectSectionsToRegister =
+ SPSTuple<SPSExecutorAddrRange, SPSExecutorAddrRange>;
+
+template <>
+class SPSSerializationTraits<SPSELFPerObjectSectionsToRegister,
+ ELFPerObjectSectionsToRegister> {
+
+public:
+ static size_t size(const ELFPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFPerObjectSectionsToRegister::AsArgList::size(
+ MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ELFPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFPerObjectSectionsToRegister::AsArgList::serialize(
+ OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ ELFPerObjectSectionsToRegister &MOPOSR) {
+ return SPSELFPerObjectSectionsToRegister::AsArgList::deserialize(
+ IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
+ }
+};
+
+using SPSNamedExecutorAddrRangeSequenceMap =
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
+
+using SPSELFNixJITDylibInitializers =
+ SPSTuple<SPSString, SPSExecutorAddr, SPSNamedExecutorAddrRangeSequenceMap>;
+
+using SPSELFNixJITDylibInitializerSequence =
+ SPSSequence<SPSELFNixJITDylibInitializers>;
+
+/// Serialization traits for ELFNixJITDylibInitializers.
+template <>
+class SPSSerializationTraits<SPSELFNixJITDylibInitializers,
+ ELFNixJITDylibInitializers> {
+public:
+ static size_t size(const ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::size(
+ MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::serialize(
+ OB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ ELFNixJITDylibInitializers &MOJDIs) {
+ return SPSELFNixJITDylibInitializers::AsArgList::deserialize(
+ IB, MOJDIs.Name, MOJDIs.DSOHandleAddress, MOJDIs.InitSections);
+ }
+};
+
+using SPSELFJITDylibDeinitializers = SPSEmpty;
+
+using SPSELFJITDylibDeinitializerSequence =
+ SPSSequence<SPSELFJITDylibDeinitializers>;
+
+template <>
+class SPSSerializationTraits<SPSELFJITDylibDeinitializers,
+ ELFNixJITDylibDeinitializers> {
+public:
+ static size_t size(const ELFNixJITDylibDeinitializers &MOJDDs) { return 0; }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ELFNixJITDylibDeinitializers &MOJDDs) {
+ return true;
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ ELFNixJITDylibDeinitializers &MOJDDs) {
+ MOJDDs = ELFNixJITDylibDeinitializers();
+ return true;
+ }
+};
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_ELFNIXPLATFORM_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
index 410a202b3296..940d0d28ae83 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
@@ -14,6 +14,7 @@
#define LLVM_EXECUTIONENGINE_ORC_EPCDEBUGOBJECTREGISTRAR_H
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Memory.h"
@@ -32,7 +33,7 @@ class ExecutionSession;
/// Abstract interface for registering debug objects in the executor process.
class DebugObjectRegistrar {
public:
- virtual Error registerDebugObject(sys::MemoryBlock) = 0;
+ virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0;
virtual ~DebugObjectRegistrar() {}
};
@@ -40,14 +41,14 @@ public:
/// executor process.
class EPCDebugObjectRegistrar : public DebugObjectRegistrar {
public:
- EPCDebugObjectRegistrar(ExecutionSession &ES, JITTargetAddress RegisterFn)
+ EPCDebugObjectRegistrar(ExecutionSession &ES, ExecutorAddr RegisterFn)
: ES(ES), RegisterFn(RegisterFn) {}
- Error registerDebugObject(sys::MemoryBlock TargetMem) override;
+ Error registerDebugObject(ExecutorAddrRange TargetMem) override;
private:
ExecutionSession &ES;
- JITTargetAddress RegisterFn;
+ ExecutorAddr RegisterFn;
};
/// Create a ExecutorProcessControl-based DebugObjectRegistrar that emits debug
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
index 8cd6e9319a28..6d113a7bdf1a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
@@ -14,6 +14,7 @@
#define LLVM_EXECUTIONENGINE_ORC_EPCEHFRAMEREGISTRAR_H
#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
namespace llvm {
namespace orc {
@@ -33,8 +34,8 @@ public:
/// Create a EPCEHFrameRegistrar with the given ExecutorProcessControl
/// object and registration/deregistration function addresses.
EPCEHFrameRegistrar(ExecutionSession &ES,
- JITTargetAddress RegisterEHFrameWrapperFnAddr,
- JITTargetAddress DeregisterEHFRameWrapperFnAddr)
+ ExecutorAddr RegisterEHFrameWrapperFnAddr,
+ ExecutorAddr DeregisterEHFRameWrapperFnAddr)
: ES(ES), RegisterEHFrameWrapperFnAddr(RegisterEHFrameWrapperFnAddr),
DeregisterEHFrameWrapperFnAddr(DeregisterEHFRameWrapperFnAddr) {}
@@ -45,8 +46,8 @@ public:
private:
ExecutionSession &ES;
- JITTargetAddress RegisterEHFrameWrapperFnAddr;
- JITTargetAddress DeregisterEHFrameWrapperFnAddr;
+ ExecutorAddr RegisterEHFrameWrapperFnAddr;
+ ExecutorAddr DeregisterEHFrameWrapperFnAddr;
};
} // end namespace orc
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h
new file mode 100644
index 000000000000..02e580c86f54
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h
@@ -0,0 +1,67 @@
+//===- EPCGenericDylibManager.h -- Generic EPC Dylib management -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements dylib loading and searching by making calls to
+// ExecutorProcessControl::callWrapper.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
+
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+
+namespace llvm {
+namespace orc {
+
+class SymbolLookupSet;
+
+class EPCGenericDylibManager {
+public:
+ /// Function addresses for memory access.
+ struct SymbolAddrs {
+ ExecutorAddr Instance;
+ ExecutorAddr Open;
+ ExecutorAddr Lookup;
+ };
+
+ /// Create an EPCGenericMemoryAccess instance from a given set of
+ /// function addrs.
+ static Expected<EPCGenericDylibManager>
+ CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC);
+
+ /// Create an EPCGenericMemoryAccess instance from a given set of
+ /// function addrs.
+ EPCGenericDylibManager(ExecutorProcessControl &EPC, SymbolAddrs SAs)
+ : EPC(EPC), SAs(SAs) {}
+
+ /// Loads the dylib with the given name.
+ Expected<tpctypes::DylibHandle> open(StringRef Path, uint64_t Mode);
+
+ /// Looks up symbols within the given dylib.
+ Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H,
+ const SymbolLookupSet &Lookup);
+
+ /// Looks up symbols within the given dylib.
+ Expected<std::vector<ExecutorAddr>>
+ lookup(tpctypes::DylibHandle H, const RemoteSymbolLookupSet &Lookup);
+
+private:
+ ExecutorProcessControl &EPC;
+ SymbolAddrs SAs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICDYLIBMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
new file mode 100644
index 000000000000..b9825f17ec17
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h
@@ -0,0 +1,97 @@
+//===- EPCGenericJITLinkMemoryManager.h - EPC-based mem manager -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements JITLinkMemoryManager by making remove calls via
+// ExecutorProcessControl::callWrapperAsync.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
+public:
+ /// Function addresses for memory access.
+ struct SymbolAddrs {
+ ExecutorAddr Allocator;
+ ExecutorAddr Reserve;
+ ExecutorAddr Finalize;
+ ExecutorAddr Deallocate;
+ };
+
+ /// Create an EPCGenericJITLinkMemoryManager instance from a given set of
+ /// function addrs.
+ EPCGenericJITLinkMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs)
+ : EPC(EPC), SAs(SAs) {}
+
+ void allocate(const jitlink::JITLinkDylib *JD, jitlink::LinkGraph &G,
+ OnAllocatedFunction OnAllocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::allocate;
+
+ void deallocate(std::vector<FinalizedAlloc> Allocs,
+ OnDeallocatedFunction OnDeallocated) override;
+
+ // Use overloads from base class.
+ using JITLinkMemoryManager::deallocate;
+
+private:
+ class InFlightAlloc;
+
+ void completeAllocation(ExecutorAddr AllocAddr, jitlink::BasicLayout BL,
+ OnAllocatedFunction OnAllocated);
+
+ ExecutorProcessControl &EPC;
+ SymbolAddrs SAs;
+};
+
+namespace shared {
+
+/// FIXME: This specialization should be moved into TargetProcessControlTypes.h
+/// (or whereever those types get merged to) once ORC depends on JITLink.
+template <>
+class SPSSerializationTraits<SPSExecutorAddr,
+ jitlink::JITLinkMemoryManager::FinalizedAlloc> {
+public:
+ static size_t size(const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+ return SPSArgList<SPSExecutorAddr>::size(ExecutorAddr(FA.getAddress()));
+ }
+
+ static bool
+ serialize(SPSOutputBuffer &OB,
+ const jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+ return SPSArgList<SPSExecutorAddr>::serialize(
+ OB, ExecutorAddr(FA.getAddress()));
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ jitlink::JITLinkMemoryManager::FinalizedAlloc &FA) {
+ ExecutorAddr A;
+ if (!SPSArgList<SPSExecutorAddr>::deserialize(IB, A))
+ return false;
+ FA = jitlink::JITLinkMemoryManager::FinalizedAlloc(A.getValue());
+ return true;
+ }
+};
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICJITLINKMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h
new file mode 100644
index 000000000000..8c1d457d06ab
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h
@@ -0,0 +1,85 @@
+//===- EPCGenericMemoryAccess.h - Generic EPC MemoryAccess impl -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements ExecutorProcessControl::MemoryAccess by making calls to
+// ExecutorProcessControl::callWrapperAsync.
+//
+// This simplifies the implementaton of new ExecutorProcessControl instances,
+// as this implementation will always work (at the cost of some performance
+// overhead for the calls).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericMemoryAccess : public ExecutorProcessControl::MemoryAccess {
+public:
+ /// Function addresses for memory access.
+ struct FuncAddrs {
+ ExecutorAddr WriteUInt8s;
+ ExecutorAddr WriteUInt16s;
+ ExecutorAddr WriteUInt32s;
+ ExecutorAddr WriteUInt64s;
+ ExecutorAddr WriteBuffers;
+ };
+
+ /// Create an EPCGenericMemoryAccess instance from a given set of
+ /// function addrs.
+ EPCGenericMemoryAccess(ExecutorProcessControl &EPC, FuncAddrs FAs)
+ : EPC(EPC), FAs(FAs) {}
+
+ void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt8Write>)>(
+ FAs.WriteUInt8s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt16Write>)>(
+ FAs.WriteUInt16s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt32Write>)>(
+ FAs.WriteUInt32s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessUInt64Write>)>(
+ FAs.WriteUInt64s, std::move(OnWriteComplete), Ws);
+ }
+
+ void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) override {
+ using namespace shared;
+ EPC.callSPSWrapperAsync<void(SPSSequence<SPSMemoryAccessBufferWrite>)>(
+ FAs.WriteBuffers, std::move(OnWriteComplete), Ws);
+ }
+
+private:
+ ExecutorProcessControl &EPC;
+ FuncAddrs FAs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICMEMORYACCESS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
new file mode 100644
index 000000000000..b6fdfb92ced3
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h
@@ -0,0 +1,133 @@
+//===---- EPCGenericRTDyldMemoryManager.h - EPC-based MemMgr ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a RuntimeDyld::MemoryManager that uses EPC and the ORC runtime
+// bootstrap functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
+
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+/// Remote-mapped RuntimeDyld-compatible memory manager.
+class EPCGenericRTDyldMemoryManager : public RuntimeDyld::MemoryManager {
+public:
+ /// Symbol addresses for memory access.
+ struct SymbolAddrs {
+ ExecutorAddr Instance;
+ ExecutorAddr Reserve;
+ ExecutorAddr Finalize;
+ ExecutorAddr Deallocate;
+ ExecutorAddr RegisterEHFrame;
+ ExecutorAddr DeregisterEHFrame;
+ };
+
+ /// Create an EPCGenericRTDyldMemoryManager using the given EPC, looking up
+ /// the default symbol names in the bootstrap symbol set.
+ static Expected<std::unique_ptr<EPCGenericRTDyldMemoryManager>>
+ CreateWithDefaultBootstrapSymbols(ExecutorProcessControl &EPC);
+
+ /// Create an EPCGenericRTDyldMemoryManager using the given EPC and symbol
+ /// addrs.
+ EPCGenericRTDyldMemoryManager(ExecutorProcessControl &EPC, SymbolAddrs SAs);
+
+ EPCGenericRTDyldMemoryManager(const EPCGenericRTDyldMemoryManager &) = delete;
+ EPCGenericRTDyldMemoryManager &
+ operator=(const EPCGenericRTDyldMemoryManager &) = delete;
+ EPCGenericRTDyldMemoryManager(EPCGenericRTDyldMemoryManager &&) = delete;
+ EPCGenericRTDyldMemoryManager &
+ operator=(EPCGenericRTDyldMemoryManager &&) = delete;
+ ~EPCGenericRTDyldMemoryManager();
+
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) override;
+
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, StringRef SectionName,
+ bool IsReadOnly) override;
+
+ void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t RODataSize, uint32_t RODataAlign,
+ uintptr_t RWDataSize,
+ uint32_t RWDataAlign) override;
+
+ bool needsToReserveAllocationSpace() override;
+
+ void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) override;
+
+ void deregisterEHFrames() override;
+
+ void notifyObjectLoaded(RuntimeDyld &Dyld,
+ const object::ObjectFile &Obj) override;
+
+ bool finalizeMemory(std::string *ErrMsg = nullptr) override;
+
+private:
+ struct Alloc {
+ public:
+ Alloc(uint64_t Size, unsigned Align)
+ : Size(Size), Align(Align),
+ Contents(std::make_unique<uint8_t[]>(Size + Align - 1)) {}
+
+ uint64_t Size;
+ unsigned Align;
+ std::unique_ptr<uint8_t[]> Contents;
+ ExecutorAddr RemoteAddr;
+ };
+
+ struct EHFrame {
+ ExecutorAddr Addr;
+ uint64_t Size;
+ };
+
+ // Group of section allocations to be allocated together in the executor. The
+ // RemoteCodeAddr will stand in as the id of the group for deallocation
+ // purposes.
+ struct AllocGroup {
+ AllocGroup() = default;
+ AllocGroup(const AllocGroup &) = delete;
+ AllocGroup &operator=(const AllocGroup &) = delete;
+ AllocGroup(AllocGroup &&) = default;
+ AllocGroup &operator=(AllocGroup &&) = default;
+
+ ExecutorAddrRange RemoteCode;
+ ExecutorAddrRange RemoteROData;
+ ExecutorAddrRange RemoteRWData;
+ std::vector<EHFrame> UnfinalizedEHFrames;
+ std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
+ };
+
+ // Maps all allocations in Allocs to aligned blocks
+ void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs,
+ ExecutorAddr NextAddr);
+
+ ExecutorProcessControl &EPC;
+ SymbolAddrs SAs;
+
+ std::mutex M;
+ std::vector<AllocGroup> Unmapped;
+ std::vector<AllocGroup> Unfinalized;
+ std::vector<ExecutorAddr> FinalizedAllocs;
+ std::string ErrMsg;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif // LLVM_EXECUTIONENGINE_ORC_EPCGENERICRTDYLDMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
index 64f16d507c97..92de5882bafe 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
@@ -126,7 +126,7 @@ public:
}
private:
- using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+ using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
struct IndirectStubInfo {
IndirectStubInfo() = default;
@@ -149,12 +149,12 @@ private:
ExecutorProcessControl &EPC;
std::unique_ptr<ABISupport> ABI;
JITTargetAddress ResolverBlockAddr;
- std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation> ResolverBlock;
+ FinalizedAlloc ResolverBlock;
std::unique_ptr<TrampolinePool> TP;
std::unique_ptr<LazyCallThroughManager> LCTM;
std::vector<IndirectStubInfo> AvailableIndirectStubs;
- std::vector<std::unique_ptr<Allocation>> IndirectStubAllocs;
+ std::vector<FinalizedAlloc> IndirectStubAllocs;
};
/// This will call writeResolver on the given EPCIndirectionUtils instance
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
index d540d0cd0608..105dac8e8d04 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
@@ -13,7 +13,6 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H
#define LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
@@ -21,6 +20,7 @@
#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
@@ -37,11 +37,65 @@ class SymbolLookupSet;
/// ExecutorProcessControl supports interaction with a JIT target process.
class ExecutorProcessControl {
friend class ExecutionSession;
-
public:
- /// Sender to return the result of a WrapperFunction executed in the JIT.
- using SendResultFunction =
- unique_function<void(shared::WrapperFunctionResult)>;
+
+ /// A handler or incoming WrapperFunctionResults -- either return values from
+ /// callWrapper* calls, or incoming JIT-dispatch requests.
+ ///
+ /// IncomingWFRHandlers are constructible from
+ /// unique_function<void(shared::WrapperFunctionResult)>s using the
+ /// runInPlace function or a RunWithDispatch object.
+ class IncomingWFRHandler {
+ friend class ExecutorProcessControl;
+ public:
+ IncomingWFRHandler() = default;
+ explicit operator bool() const { return !!H; }
+ void operator()(shared::WrapperFunctionResult WFR) { H(std::move(WFR)); }
+ private:
+ template <typename FnT> IncomingWFRHandler(FnT &&Fn)
+ : H(std::forward<FnT>(Fn)) {}
+
+ unique_function<void(shared::WrapperFunctionResult)> H;
+ };
+
+ /// Constructs an IncomingWFRHandler from a function object that is callable
+ /// as void(shared::WrapperFunctionResult). The function object will be called
+ /// directly. This should be used with care as it may block listener threads
+ /// in remote EPCs. It is only suitable for simple tasks (e.g. setting a
+ /// future), or for performing some quick analysis before dispatching "real"
+ /// work as a Task.
+ class RunInPlace {
+ public:
+ template <typename FnT>
+ IncomingWFRHandler operator()(FnT &&Fn) {
+ return IncomingWFRHandler(std::forward<FnT>(Fn));
+ }
+ };
+
+ /// Constructs an IncomingWFRHandler from a function object by creating a new
+ /// function object that dispatches the original using a TaskDispatcher,
+ /// wrapping the original as a GenericNamedTask.
+ ///
+ /// This is the default approach for running WFR handlers.
+ class RunAsTask {
+ public:
+ RunAsTask(TaskDispatcher &D) : D(D) {}
+
+ template <typename FnT>
+ IncomingWFRHandler operator()(FnT &&Fn) {
+ return IncomingWFRHandler(
+ [&D = this->D, Fn = std::move(Fn)]
+ (shared::WrapperFunctionResult WFR) mutable {
+ D.dispatch(
+ makeGenericNamedTask(
+ [Fn = std::move(Fn), WFR = std::move(WFR)]() mutable {
+ Fn(std::move(WFR));
+ }, "WFR handler task"));
+ });
+ }
+ private:
+ TaskDispatcher &D;
+ };
/// APIs for manipulating memory in the target process.
class MemoryAccess {
@@ -51,53 +105,58 @@ public:
virtual ~MemoryAccess();
- virtual void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) = 0;
- virtual void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
- WriteResultFn OnWriteComplete) = 0;
+ virtual void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) = 0;
Error writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt8sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt16sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt32sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeUInt64sAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
Error writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws) {
std::promise<MSVCPError> ResultP;
auto ResultF = ResultP.get_future();
- writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+ writeBuffersAsync(Ws,
+ [&](Error Err) { ResultP.set_value(std::move(Err)); });
return ResultF.get();
}
};
@@ -113,10 +172,14 @@ public:
/// Contains the address of the dispatch function and context that the ORC
/// runtime can use to call functions in the JIT.
struct JITDispatchInfo {
- ExecutorAddress JITDispatchFunctionAddress;
- ExecutorAddress JITDispatchContextAddress;
+ ExecutorAddr JITDispatchFunction;
+ ExecutorAddr JITDispatchContext;
};
+ ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<TaskDispatcher> D)
+ : SSP(std::move(SSP)), D(std::move(D)) {}
+
virtual ~ExecutorProcessControl();
/// Return the ExecutionSession associated with this instance.
@@ -132,6 +195,8 @@ public:
/// Return a shared pointer to the SymbolStringPool for this instance.
std::shared_ptr<SymbolStringPool> getSymbolStringPool() const { return SSP; }
+ TaskDispatcher &getDispatcher() { return *D; }
+
/// Return the Triple for the target process.
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -153,6 +218,29 @@ public:
return *MemMgr;
}
+ /// Returns the bootstrap symbol map.
+ const StringMap<ExecutorAddr> &getBootstrapSymbolsMap() const {
+ return BootstrapSymbols;
+ }
+
+ /// For each (ExecutorAddr&, StringRef) pair, looks up the string in the
+ /// bootstrap symbols map and writes its address to the ExecutorAddr if
+ /// found. If any symbol is not found then the function returns an error.
+ Error getBootstrapSymbols(
+ ArrayRef<std::pair<ExecutorAddr &, StringRef>> Pairs) const {
+ for (auto &KV : Pairs) {
+ auto I = BootstrapSymbols.find(KV.second);
+ if (I == BootstrapSymbols.end())
+ return make_error<StringError>("Symbol \"" + KV.second +
+ "\" not found "
+ "in bootstrap symbols map",
+ inconvertibleErrorCode());
+
+ KV.first = I->second;
+ }
+ return Error::success();
+ }
+
/// Load the dynamic library at the given path and return a handle to it.
/// If LibraryPath is null this function will return the global handle for
/// the target process.
@@ -163,44 +251,119 @@ public:
/// The result of the lookup is a 2-dimentional array of target addresses
/// that correspond to the lookup order. If a required symbol is not
/// found then this method will return an error. If a weakly referenced
- /// symbol is not found then it be assigned a '0' value in the result.
- /// that correspond to the lookup order.
+ /// symbol is not found then it be assigned a '0' value.
virtual Expected<std::vector<tpctypes::LookupResult>>
lookupSymbols(ArrayRef<LookupRequest> Request) = 0;
/// Run function with a main-like signature.
- virtual Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ virtual Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) = 0;
- /// Run a wrapper function in the executor.
+ /// Run a wrapper function in the executor. The given WFRHandler will be
+ /// called on the result when it is returned.
///
/// The wrapper function should be callable as:
///
/// \code{.cpp}
/// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
/// \endcode{.cpp}
- ///
- /// The given OnComplete function will be called to return the result.
- virtual void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
+ virtual void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
ArrayRef<char> ArgBuffer) = 0;
+ /// Run a wrapper function in the executor using the given Runner to dispatch
+ /// OnComplete when the result is ready.
+ template <typename RunPolicyT, typename FnT>
+ void callWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr,
+ FnT &&OnComplete, ArrayRef<char> ArgBuffer) {
+ callWrapperAsync(
+ WrapperFnAddr, Runner(std::forward<FnT>(OnComplete)), ArgBuffer);
+ }
+
+ /// Run a wrapper function in the executor. OnComplete will be dispatched
+ /// as a GenericNamedTask using this instance's TaskDispatch object.
+ template <typename FnT>
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr, FnT &&OnComplete,
+ ArrayRef<char> ArgBuffer) {
+ callWrapperAsync(RunAsTask(*D), WrapperFnAddr,
+ std::forward<FnT>(OnComplete), ArgBuffer);
+ }
+
+ /// Run a wrapper function in the executor. The wrapper function should be
+ /// callable as:
+ ///
+ /// \code{.cpp}
+ /// CWrapperFunctionResult fn(uint8_t *Data, uint64_t Size);
+ /// \endcode{.cpp}
+ shared::WrapperFunctionResult callWrapper(ExecutorAddr WrapperFnAddr,
+ ArrayRef<char> ArgBuffer) {
+ std::promise<shared::WrapperFunctionResult> RP;
+ auto RF = RP.get_future();
+ callWrapperAsync(
+ RunInPlace(), WrapperFnAddr,
+ [&](shared::WrapperFunctionResult R) {
+ RP.set_value(std::move(R));
+ }, ArgBuffer);
+ return RF.get();
+ }
+
+ /// Run a wrapper function using SPS to serialize the arguments and
+ /// deserialize the results.
+ template <typename SPSSignature, typename RunPolicyT, typename SendResultT,
+ typename... ArgTs>
+ void callSPSWrapperAsync(RunPolicyT &&Runner, ExecutorAddr WrapperFnAddr,
+ SendResultT &&SendResult, const ArgTs &...Args) {
+ shared::WrapperFunction<SPSSignature>::callAsync(
+ [this, WrapperFnAddr, Runner = std::move(Runner)]
+ (auto &&SendResult, const char *ArgData, size_t ArgSize) mutable {
+ this->callWrapperAsync(std::move(Runner), WrapperFnAddr,
+ std::move(SendResult),
+ ArrayRef<char>(ArgData, ArgSize));
+ },
+ std::forward<SendResultT>(SendResult), Args...);
+ }
+
+ /// Run a wrapper function using SPS to serialize the arguments and
+ /// deserialize the results.
+ template <typename SPSSignature, typename SendResultT, typename... ArgTs>
+ void callSPSWrapperAsync(ExecutorAddr WrapperFnAddr, SendResultT &&SendResult,
+ const ArgTs &...Args) {
+ callSPSWrapperAsync<SPSSignature>(RunAsTask(*D), WrapperFnAddr,
+ std::forward<SendResultT>(SendResult),
+ Args...);
+ }
+
+ /// Run a wrapper function using SPS to serialize the arguments and
+ /// deserialize the results.
+ ///
+ /// If SPSSignature is a non-void function signature then the second argument
+ /// (the first in the Args list) should be a reference to a return value.
+ template <typename SPSSignature, typename... WrapperCallArgTs>
+ Error callSPSWrapper(ExecutorAddr WrapperFnAddr,
+ WrapperCallArgTs &&...WrapperCallArgs) {
+ return shared::WrapperFunction<SPSSignature>::call(
+ [this, WrapperFnAddr](const char *ArgData, size_t ArgSize) {
+ return callWrapper(WrapperFnAddr, ArrayRef<char>(ArgData, ArgSize));
+ },
+ std::forward<WrapperCallArgTs>(WrapperCallArgs)...);
+ }
+
/// Disconnect from the target process.
///
/// This should be called after the JIT session is shut down.
virtual Error disconnect() = 0;
protected:
- ExecutorProcessControl(std::shared_ptr<SymbolStringPool> SSP)
- : SSP(std::move(SSP)) {}
std::shared_ptr<SymbolStringPool> SSP;
+ std::unique_ptr<TaskDispatcher> D;
ExecutionSession *ES = nullptr;
Triple TargetTriple;
unsigned PageSize = 0;
JITDispatchInfo JDI;
MemoryAccess *MemAccess = nullptr;
jitlink::JITLinkMemoryManager *MemMgr = nullptr;
+ StringMap<ExecutorAddr> BootstrapSymbols;
};
/// A ExecutorProcessControl instance that asserts if any of its methods are
@@ -210,9 +373,12 @@ class UnsupportedExecutorProcessControl : public ExecutorProcessControl {
public:
UnsupportedExecutorProcessControl(
std::shared_ptr<SymbolStringPool> SSP = nullptr,
+ std::unique_ptr<TaskDispatcher> D = nullptr,
const std::string &TT = "", unsigned PageSize = 0)
: ExecutorProcessControl(SSP ? std::move(SSP)
- : std::make_shared<SymbolStringPool>()) {
+ : std::make_shared<SymbolStringPool>(),
+ D ? std::move(D)
+ : std::make_unique<InPlaceTaskDispatcher>()) {
this->TargetTriple = Triple(TT);
this->PageSize = PageSize;
}
@@ -226,13 +392,13 @@ public:
llvm_unreachable("Unsupported");
}
- Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) override {
llvm_unreachable("Unsupported");
}
- void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
ArrayRef<char> ArgBuffer) override {
llvm_unreachable("Unsupported");
}
@@ -246,8 +412,9 @@ class SelfExecutorProcessControl
private ExecutorProcessControl::MemoryAccess {
public:
SelfExecutorProcessControl(
- std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple,
- unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
+ std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D,
+ Triple TargetTriple, unsigned PageSize,
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr);
/// Create a SelfExecutorProcessControl with the given symbol string pool and
/// memory manager.
@@ -256,6 +423,7 @@ public:
/// be created and used by default.
static Expected<std::unique_ptr<SelfExecutorProcessControl>>
Create(std::shared_ptr<SymbolStringPool> SSP = nullptr,
+ std::unique_ptr<TaskDispatcher> D = nullptr,
std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr = nullptr);
Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override;
@@ -263,32 +431,32 @@ public:
Expected<std::vector<tpctypes::LookupResult>>
lookupSymbols(ArrayRef<LookupRequest> Request) override;
- Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
+ Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) override;
- void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
ArrayRef<char> ArgBuffer) override;
Error disconnect() override;
private:
- void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt8sAsync(ArrayRef<tpctypes::UInt8Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt16sAsync(ArrayRef<tpctypes::UInt16Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt32sAsync(ArrayRef<tpctypes::UInt32Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeUInt64sAsync(ArrayRef<tpctypes::UInt64Write> Ws,
+ WriteResultFn OnWriteComplete) override;
- void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
- WriteResultFn OnWriteComplete) override;
+ void writeBuffersAsync(ArrayRef<tpctypes::BufferWrite> Ws,
+ WriteResultFn OnWriteComplete) override;
- static shared::detail::CWrapperFunctionResult
+ static shared::CWrapperFunctionResult
jitDispatchViaWrapperFunctionManager(void *Ctx, const void *FnTag,
const char *Data, size_t Size);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 78e3ceef50e2..4d6d46595fc3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -45,6 +45,13 @@ class PointerType;
class Triple;
class Twine;
class Value;
+class MCDisassembler;
+class MCInstrAnalysis;
+
+namespace jitlink {
+class LinkGraph;
+class Symbol;
+} // namespace jitlink
namespace orc {
@@ -557,6 +564,33 @@ GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
ValueToValueMapTy &VMap);
+/// Introduce relocations to \p Sym in its own definition if there are any
+/// pointers formed via PC-relative address that do not already have a
+/// relocation.
+///
+/// This is useful when introducing indirection via a stub function at link time
+/// without compiler support. If a function pointer is formed without a
+/// relocation, e.g. in the definition of \c foo
+///
+/// \code
+/// _foo:
+/// leaq -7(%rip), rax # form pointer to _foo without relocation
+/// _bar:
+/// leaq (%rip), %rax # uses X86_64_RELOC_SIGNED to '_foo'
+/// \endcode
+///
+/// the pointer to \c _foo computed by \c _foo and \c _bar may differ if we
+/// introduce a stub for _foo. If the pointer is used as a key, this may be
+/// observable to the program. This pass will attempt to introduce the missing
+/// "self-relocation" on the leaq instruction.
+///
+/// This is based on disassembly and should be considered "best effort". It may
+/// silently fail to add relocations.
+Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
+ jitlink::LinkGraph &G,
+ MCDisassembler &Disassembler,
+ MCInstrAnalysis &MIA);
+
} // end namespace orc
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h b/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h
deleted file mode 100644
index f3d616deae8f..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===-- LLVMSPSSerializers.h - SPS serialization for LLVM types -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// SPS Serialization for common LLVM types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
-#define LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
-
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-template <typename SPSValueT, typename ValueT>
-class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>,
- StringMap<ValueT>> {
-public:
- static size_t size(const StringMap<ValueT> &M) {
- size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size()));
- for (auto &E : M)
- Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second);
- return Sz;
- }
-
- static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) {
- if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size())))
- return false;
-
- for (auto &E : M)
- if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second))
- return false;
-
- return true;
- }
-
- static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) {
- uint64_t Size;
- assert(M.empty() && "M already contains elements");
-
- if (!SPSArgList<uint64_t>::deserialize(IB, Size))
- return false;
-
- while (Size--) {
- StringRef S;
- ValueT V;
- if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V))
- return false;
- if (!M.insert(std::make_pair(S, V)).second)
- return false;
- }
-
- return true;
- }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LLVMSPSSERIALIZERS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h
new file mode 100644
index 000000000000..a598405ee4f6
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h
@@ -0,0 +1,70 @@
+//===-- LookupAndRecordAddrs.h - Symbol lookup support utility --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Record the addresses of a set of symbols into ExecutorAddr objects.
+//
+// This can be used to avoid repeated lookup (via ExecutionSession::lookup) of
+// the given symbols.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
+#define LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
+
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+#include <vector>
+
+namespace llvm {
+namespace orc {
+
+/// Record addresses of the given symbols in the given ExecutorAddrs.
+///
+/// Useful for making permanent records of symbol addreses to call or
+/// access in the executor (e.g. runtime support functions in Platform
+/// subclasses).
+///
+/// By default the symbols are looked up using
+/// SymbolLookupFlags::RequiredSymbol, and an error will be generated if any of
+/// the requested symbols are not defined.
+///
+/// If SymbolLookupFlags::WeaklyReferencedSymbol is used then any missing
+/// symbols will have their corresponding address objects set to zero, and
+/// this function will never generate an error (the caller will need to check
+/// addresses before using them).
+///
+/// Asynchronous version.
+void lookupAndRecordAddrs(
+ unique_function<void(Error)> OnRecorded, ExecutionSession &ES, LookupKind K,
+ const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+/// Record addresses of the given symbols in the given ExecutorAddrs.
+///
+/// Blocking version.
+Error lookupAndRecordAddrs(
+ ExecutionSession &ES, LookupKind K, const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+/// Record addresses of given symbols in the given ExecutorAddrs.
+///
+/// ExecutorProcessControl lookup version. Lookups are always implicitly
+/// weak.
+Error lookupAndRecordAddrs(
+ ExecutorProcessControl &EPC, tpctypes::DylibHandle H,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags = SymbolLookupFlags::RequiredSymbol);
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_LOOKUPANDRECORDADDRS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index f77dfd208413..d7b5e2eda6ee 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
-#include "llvm/ExecutionEngine/Orc/LLVMSPSSerializers.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
@@ -27,22 +26,16 @@
namespace llvm {
namespace orc {
-struct MachOPerObjectSectionsToRegister {
- ExecutorAddressRange EHFrameSection;
- ExecutorAddressRange ThreadDataSection;
-};
-
struct MachOJITDylibInitializers {
- using SectionList = std::vector<ExecutorAddressRange>;
+ using SectionList = std::vector<ExecutorAddrRange>;
- MachOJITDylibInitializers(std::string Name,
- ExecutorAddress MachOHeaderAddress)
+ MachOJITDylibInitializers(std::string Name, ExecutorAddr MachOHeaderAddress)
: Name(std::move(Name)),
MachOHeaderAddress(std::move(MachOHeaderAddress)) {}
std::string Name;
- ExecutorAddress MachOHeaderAddress;
- ExecutorAddress ObjCImageInfoAddress;
+ ExecutorAddr MachOHeaderAddress;
+ ExecutorAddr ObjCImageInfoAddress;
StringMap<SectionList> InitSections;
};
@@ -155,15 +148,12 @@ private:
using InitSymbolDepMap =
DenseMap<MaterializationResponsibility *, JITLinkSymbolSet>;
- void addInitializerSupportPasses(MaterializationResponsibility &MR,
- jitlink::PassConfiguration &Config);
-
- void addMachOHeaderSupportPasses(MaterializationResponsibility &MR,
- jitlink::PassConfiguration &Config);
-
void addEHAndTLVSupportPasses(MaterializationResponsibility &MR,
jitlink::PassConfiguration &Config);
+ Error associateJITDylibHeaderSymbol(jitlink::LinkGraph &G,
+ MaterializationResponsibility &MR);
+
Error preserveInitSections(jitlink::LinkGraph &G,
MaterializationResponsibility &MR);
@@ -174,6 +164,10 @@ private:
Error fixTLVSectionsAndEdges(jitlink::LinkGraph &G, JITDylib &JD);
+ Error registerEHAndTLVSections(jitlink::LinkGraph &G);
+
+ Error registerEHSectionsPhase1(jitlink::LinkGraph &G);
+
std::mutex PluginMutex;
MachOPlatform &MP;
DenseMap<JITDylib *, std::pair<uint32_t, uint32_t>> ObjCImageInfos;
@@ -186,7 +180,7 @@ private:
using SendDeinitializerSequenceFn =
unique_function<void(Expected<MachOJITDylibDeinitializerSequence>)>;
- using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddress>)>;
+ using SendSymbolAddressFn = unique_function<void(Expected<ExecutorAddr>)>;
static bool supportedTarget(const Triple &TT);
@@ -209,31 +203,34 @@ private:
StringRef JDName);
void rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
- ExecutorAddress Handle);
+ ExecutorAddr Handle);
- void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddress Handle,
+ void rt_lookupSymbol(SendSymbolAddressFn SendResult, ExecutorAddr Handle,
StringRef SymbolName);
// Records the addresses of runtime symbols used by the platform.
Error bootstrapMachORuntime(JITDylib &PlatformJD);
- Error registerInitInfo(JITDylib &JD, ExecutorAddress ObjCImageInfoAddr,
+ Error registerInitInfo(JITDylib &JD, ExecutorAddr ObjCImageInfoAddr,
ArrayRef<jitlink::Section *> InitSections);
- Error registerPerObjectSections(const MachOPerObjectSectionsToRegister &POSR);
-
Expected<uint64_t> createPThreadKey();
+ enum PlatformState { BootstrapPhase1, BootstrapPhase2, Initialized };
+
ExecutionSession &ES;
ObjectLinkingLayer &ObjLinkingLayer;
SymbolStringPtr MachOHeaderStartSymbol;
- std::atomic<bool> RuntimeBootstrapped{false};
+ std::atomic<PlatformState> State{BootstrapPhase1};
- ExecutorAddress orc_rt_macho_platform_bootstrap;
- ExecutorAddress orc_rt_macho_platform_shutdown;
- ExecutorAddress orc_rt_macho_register_object_sections;
- ExecutorAddress orc_rt_macho_create_pthread_key;
+ ExecutorAddr orc_rt_macho_platform_bootstrap;
+ ExecutorAddr orc_rt_macho_platform_shutdown;
+ ExecutorAddr orc_rt_macho_register_ehframe_section;
+ ExecutorAddr orc_rt_macho_deregister_ehframe_section;
+ ExecutorAddr orc_rt_macho_register_thread_data_section;
+ ExecutorAddr orc_rt_macho_deregister_thread_data_section;
+ ExecutorAddr orc_rt_macho_create_pthread_key;
DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
@@ -241,7 +238,6 @@ private:
// aggregating data from the jitlink.
std::mutex PlatformMutex;
DenseMap<JITDylib *, MachOJITDylibInitializers> InitSeqs;
- std::vector<MachOPerObjectSectionsToRegister> BootstrapPOSRs;
DenseMap<JITTargetAddress, JITDylib *> HeaderAddrToJITDylib;
DenseMap<JITDylib *, uint64_t> JITDylibToPThreadKey;
@@ -249,38 +245,12 @@ private:
namespace shared {
-using SPSMachOPerObjectSectionsToRegister =
- SPSTuple<SPSExecutorAddressRange, SPSExecutorAddressRange>;
-
-template <>
-class SPSSerializationTraits<SPSMachOPerObjectSectionsToRegister,
- MachOPerObjectSectionsToRegister> {
-
-public:
- static size_t size(const MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::size(
- MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-
- static bool serialize(SPSOutputBuffer &OB,
- const MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::serialize(
- OB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-
- static bool deserialize(SPSInputBuffer &IB,
- MachOPerObjectSectionsToRegister &MOPOSR) {
- return SPSMachOPerObjectSectionsToRegister::AsArgList::deserialize(
- IB, MOPOSR.EHFrameSection, MOPOSR.ThreadDataSection);
- }
-};
-
-using SPSNamedExecutorAddressRangeSequenceMap =
- SPSSequence<SPSTuple<SPSString, SPSExecutorAddressRangeSequence>>;
+using SPSNamedExecutorAddrRangeSequenceMap =
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddrRangeSequence>>;
using SPSMachOJITDylibInitializers =
- SPSTuple<SPSString, SPSExecutorAddress, SPSExecutorAddress,
- SPSNamedExecutorAddressRangeSequenceMap>;
+ SPSTuple<SPSString, SPSExecutorAddr, SPSExecutorAddr,
+ SPSNamedExecutorAddrRangeSequenceMap>;
using SPSMachOJITDylibInitializerSequence =
SPSSequence<SPSMachOJITDylibInitializers>;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index 5632118eee4e..109922a46e26 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -184,13 +184,13 @@ public:
}
private:
- using AllocPtr = std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation>;
+ using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
void modifyPassConfig(MaterializationResponsibility &MR,
jitlink::LinkGraph &G,
jitlink::PassConfiguration &PassConfig);
void notifyLoaded(MaterializationResponsibility &MR);
- Error notifyEmitted(MaterializationResponsibility &MR, AllocPtr Alloc);
+ Error notifyEmitted(MaterializationResponsibility &MR, FinalizedAlloc FA);
Error handleRemoveResources(ResourceKey K) override;
void handleTransferResources(ResourceKey DstKey, ResourceKey SrcKey) override;
@@ -201,7 +201,7 @@ private:
bool OverrideObjectFlags = false;
bool AutoClaimObjectSymbols = false;
ReturnObjectBufferFunction ReturnObjectBuffer;
- DenseMap<ResourceKey, std::vector<AllocPtr>> Allocs;
+ DenseMap<ResourceKey, std::vector<FinalizedAlloc>> Allocs;
std::vector<std::unique_ptr<Plugin>> Plugins;
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
deleted file mode 100644
index 4310ba9ce9e0..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRPCExecutorProcessControl.h
+++ /dev/null
@@ -1,436 +0,0 @@
-//===-- OrcRPCExecutorProcessControl.h - Remote target control --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Executor control via ORC RPC.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
-
-#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-#include "llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
-
-namespace llvm {
-namespace orc {
-
-/// JITLinkMemoryManager implementation for a process connected via an ORC RPC
-/// endpoint.
-template <typename OrcRPCEPCImplT>
-class OrcRPCEPCJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
-private:
- struct HostAlloc {
- std::unique_ptr<char[]> Mem;
- uint64_t Size;
- };
-
- struct TargetAlloc {
- JITTargetAddress Address = 0;
- uint64_t AllocatedSize = 0;
- };
-
- using HostAllocMap = DenseMap<int, HostAlloc>;
- using TargetAllocMap = DenseMap<int, TargetAlloc>;
-
-public:
- class OrcRPCAllocation : public Allocation {
- public:
- OrcRPCAllocation(OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent,
- HostAllocMap HostAllocs, TargetAllocMap TargetAllocs)
- : Parent(Parent), HostAllocs(std::move(HostAllocs)),
- TargetAllocs(std::move(TargetAllocs)) {
- assert(HostAllocs.size() == TargetAllocs.size() &&
- "HostAllocs size should match TargetAllocs");
- }
-
- ~OrcRPCAllocation() override {
- assert(TargetAllocs.empty() && "failed to deallocate");
- }
-
- MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
- auto I = HostAllocs.find(Seg);
- assert(I != HostAllocs.end() && "No host allocation for segment");
- auto &HA = I->second;
- return {HA.Mem.get(), static_cast<size_t>(HA.Size)};
- }
-
- JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
- auto I = TargetAllocs.find(Seg);
- assert(I != TargetAllocs.end() && "No target allocation for segment");
- return I->second.Address;
- }
-
- void finalizeAsync(FinalizeContinuation OnFinalize) override {
-
- std::vector<tpctypes::BufferWrite> BufferWrites;
- orcrpctpc::ReleaseOrFinalizeMemRequest FMR;
-
- for (auto &KV : HostAllocs) {
- assert(TargetAllocs.count(KV.first) &&
- "No target allocation for buffer");
- auto &HA = KV.second;
- auto &TA = TargetAllocs[KV.first];
- BufferWrites.push_back({TA.Address, StringRef(HA.Mem.get(), HA.Size)});
- FMR.push_back({orcrpctpc::toWireProtectionFlags(
- static_cast<sys::Memory::ProtectionFlags>(KV.first)),
- TA.Address, TA.AllocatedSize});
- }
-
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "finalizeAsync " << (void *)this << ":\n";
- auto FMRI = FMR.begin();
- for (auto &B : BufferWrites) {
- auto Prot = FMRI->Prot;
- ++FMRI;
- dbgs() << " Writing " << formatv("{0:x16}", B.Buffer.size())
- << " bytes to " << ((Prot & orcrpctpc::WPF_Read) ? 'R' : '-')
- << ((Prot & orcrpctpc::WPF_Write) ? 'W' : '-')
- << ((Prot & orcrpctpc::WPF_Exec) ? 'X' : '-')
- << " segment: local " << (const void *)B.Buffer.data()
- << " -> target " << formatv("{0:x16}", B.Address) << "\n";
- }
- });
- if (auto Err =
- Parent.Parent.getMemoryAccess().writeBuffers(BufferWrites)) {
- OnFinalize(std::move(Err));
- return;
- }
-
- DEBUG_WITH_TYPE("orc", dbgs() << " Applying permissions...\n");
- if (auto Err =
- Parent.getEndpoint().template callAsync<orcrpctpc::FinalizeMem>(
- [OF = std::move(OnFinalize)](Error Err2) {
- // FIXME: Dispatch to work queue.
- std::thread([OF = std::move(OF),
- Err3 = std::move(Err2)]() mutable {
- DEBUG_WITH_TYPE(
- "orc", { dbgs() << " finalizeAsync complete\n"; });
- OF(std::move(Err3));
- }).detach();
- return Error::success();
- },
- FMR)) {
- DEBUG_WITH_TYPE("orc", dbgs() << " failed.\n");
- Parent.getEndpoint().abandonPendingResponses();
- Parent.reportError(std::move(Err));
- }
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Leaving finalizeAsync (finalization may continue in "
- "background)\n";
- });
- }
-
- Error deallocate() override {
- orcrpctpc::ReleaseOrFinalizeMemRequest RMR;
- for (auto &KV : TargetAllocs)
- RMR.push_back({orcrpctpc::toWireProtectionFlags(
- static_cast<sys::Memory::ProtectionFlags>(KV.first)),
- KV.second.Address, KV.second.AllocatedSize});
- TargetAllocs.clear();
-
- return Parent.getEndpoint().template callB<orcrpctpc::ReleaseMem>(RMR);
- }
-
- private:
- OrcRPCEPCJITLinkMemoryManager<OrcRPCEPCImplT> &Parent;
- HostAllocMap HostAllocs;
- TargetAllocMap TargetAllocs;
- };
-
- OrcRPCEPCJITLinkMemoryManager(OrcRPCEPCImplT &Parent) : Parent(Parent) {}
-
- Expected<std::unique_ptr<Allocation>>
- allocate(const jitlink::JITLinkDylib *JD,
- const SegmentsRequestMap &Request) override {
- orcrpctpc::ReserveMemRequest RMR;
- HostAllocMap HostAllocs;
-
- for (auto &KV : Request) {
- assert(KV.second.getContentSize() <= std::numeric_limits<size_t>::max() &&
- "Content size is out-of-range for host");
-
- RMR.push_back({orcrpctpc::toWireProtectionFlags(
- static_cast<sys::Memory::ProtectionFlags>(KV.first)),
- KV.second.getContentSize() + KV.second.getZeroFillSize(),
- KV.second.getAlignment()});
- HostAllocs[KV.first] = {
- std::make_unique<char[]>(KV.second.getContentSize()),
- KV.second.getContentSize()};
- }
-
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Orc remote memmgr got request:\n";
- for (auto &KV : Request)
- dbgs() << " permissions: "
- << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
- << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
- << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
- << ", content size: "
- << formatv("{0:x16}", KV.second.getContentSize())
- << " + zero-fill-size: "
- << formatv("{0:x16}", KV.second.getZeroFillSize())
- << ", align: " << KV.second.getAlignment() << "\n";
- });
-
- // FIXME: LLVM RPC needs to be fixed to support alt
- // serialization/deserialization on return types. For now just
- // translate from std::map to DenseMap manually.
- auto TmpTargetAllocs =
- Parent.getEndpoint().template callB<orcrpctpc::ReserveMem>(RMR);
- if (!TmpTargetAllocs)
- return TmpTargetAllocs.takeError();
-
- if (TmpTargetAllocs->size() != RMR.size())
- return make_error<StringError>(
- "Number of target allocations does not match request",
- inconvertibleErrorCode());
-
- TargetAllocMap TargetAllocs;
- for (auto &E : *TmpTargetAllocs)
- TargetAllocs[orcrpctpc::fromWireProtectionFlags(E.Prot)] = {
- E.Address, E.AllocatedSize};
-
- DEBUG_WITH_TYPE("orc", {
- auto HAI = HostAllocs.begin();
- for (auto &KV : TargetAllocs)
- dbgs() << " permissions: "
- << ((KV.first & sys::Memory::MF_READ) ? 'R' : '-')
- << ((KV.first & sys::Memory::MF_WRITE) ? 'W' : '-')
- << ((KV.first & sys::Memory::MF_EXEC) ? 'X' : '-')
- << " assigned local " << (void *)HAI->second.Mem.get()
- << ", target " << formatv("{0:x16}", KV.second.Address) << "\n";
- });
-
- return std::make_unique<OrcRPCAllocation>(*this, std::move(HostAllocs),
- std::move(TargetAllocs));
- }
-
-private:
- void reportError(Error Err) { Parent.reportError(std::move(Err)); }
-
- decltype(std::declval<OrcRPCEPCImplT>().getEndpoint()) getEndpoint() {
- return Parent.getEndpoint();
- }
-
- OrcRPCEPCImplT &Parent;
-};
-
-/// ExecutorProcessControl::MemoryAccess implementation for a process connected
-/// via an ORC RPC endpoint.
-template <typename OrcRPCEPCImplT>
-class OrcRPCEPCMemoryAccess : public ExecutorProcessControl::MemoryAccess {
-public:
- OrcRPCEPCMemoryAccess(OrcRPCEPCImplT &Parent) : Parent(Parent) {}
-
- void writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt8s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeUInt16s(ArrayRef<tpctypes::UInt16Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt16s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeUInt32s(ArrayRef<tpctypes::UInt32Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt32s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeUInt64s(ArrayRef<tpctypes::UInt64Write> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteUInt64s>(Ws, std::move(OnWriteComplete));
- }
-
- void writeBuffers(ArrayRef<tpctypes::BufferWrite> Ws,
- WriteResultFn OnWriteComplete) override {
- writeViaRPC<orcrpctpc::WriteBuffers>(Ws, std::move(OnWriteComplete));
- }
-
-private:
- template <typename WriteRPCFunction, typename WriteElementT>
- void writeViaRPC(ArrayRef<WriteElementT> Ws, WriteResultFn OnWriteComplete) {
- if (auto Err = Parent.getEndpoint().template callAsync<WriteRPCFunction>(
- [OWC = std::move(OnWriteComplete)](Error Err2) mutable -> Error {
- OWC(std::move(Err2));
- return Error::success();
- },
- Ws)) {
- Parent.reportError(std::move(Err));
- Parent.getEndpoint().abandonPendingResponses();
- }
- }
-
- OrcRPCEPCImplT &Parent;
-};
-
-// ExecutorProcessControl for a process connected via an ORC RPC Endpoint.
-template <typename RPCEndpointT>
-class OrcRPCExecutorProcessControlBase : public ExecutorProcessControl {
-public:
- using ErrorReporter = unique_function<void(Error)>;
-
- using OnCloseConnectionFunction = unique_function<Error(Error)>;
-
- OrcRPCExecutorProcessControlBase(std::shared_ptr<SymbolStringPool> SSP,
- RPCEndpointT &EP, ErrorReporter ReportError)
- : ExecutorProcessControl(std::move(SSP)),
- ReportError(std::move(ReportError)), EP(EP) {
- using ThisT = OrcRPCExecutorProcessControlBase<RPCEndpointT>;
- EP.template addAsyncHandler<orcrpctpc::RunWrapper>(*this,
- &ThisT::runWrapperInJIT);
- }
-
- void reportError(Error Err) { ReportError(std::move(Err)); }
-
- RPCEndpointT &getEndpoint() { return EP; }
-
- Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override {
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Loading dylib \"" << (DylibPath ? DylibPath : "") << "\" ";
- if (!DylibPath)
- dbgs() << "(process symbols)";
- dbgs() << "\n";
- });
- if (!DylibPath)
- DylibPath = "";
- auto H = EP.template callB<orcrpctpc::LoadDylib>(DylibPath);
- DEBUG_WITH_TYPE("orc", {
- if (H)
- dbgs() << " got handle " << formatv("{0:x16}", *H) << "\n";
- else
- dbgs() << " error, unable to load\n";
- });
- return H;
- }
-
- Expected<std::vector<tpctypes::LookupResult>>
- lookupSymbols(ArrayRef<LookupRequest> Request) override {
- std::vector<orcrpctpc::RemoteLookupRequest> RR;
- for (auto &E : Request) {
- RR.push_back({});
- RR.back().first = E.Handle;
- for (auto &KV : E.Symbols)
- RR.back().second.push_back(
- {(*KV.first).str(),
- KV.second == SymbolLookupFlags::WeaklyReferencedSymbol});
- }
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Compound lookup:\n";
- for (auto &R : Request) {
- dbgs() << " In " << formatv("{0:x16}", R.Handle) << ": {";
- bool First = true;
- for (auto &KV : R.Symbols) {
- dbgs() << (First ? "" : ",") << " " << *KV.first;
- First = false;
- }
- dbgs() << " }\n";
- }
- });
- return EP.template callB<orcrpctpc::LookupSymbols>(RR);
- }
-
- Expected<int32_t> runAsMain(JITTargetAddress MainFnAddr,
- ArrayRef<std::string> Args) override {
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Running as main: " << formatv("{0:x16}", MainFnAddr)
- << ", args = [";
- for (unsigned I = 0; I != Args.size(); ++I)
- dbgs() << (I ? "," : "") << " \"" << Args[I] << "\"";
- dbgs() << "]\n";
- });
- auto Result = EP.template callB<orcrpctpc::RunMain>(MainFnAddr, Args);
- DEBUG_WITH_TYPE("orc", {
- dbgs() << " call to " << formatv("{0:x16}", MainFnAddr);
- if (Result)
- dbgs() << " returned result " << *Result << "\n";
- else
- dbgs() << " failed\n";
- });
- return Result;
- }
-
- void callWrapperAsync(SendResultFunction OnComplete,
- JITTargetAddress WrapperFnAddr,
- ArrayRef<char> ArgBuffer) override {
- DEBUG_WITH_TYPE("orc", {
- dbgs() << "Running as wrapper function "
- << formatv("{0:x16}", WrapperFnAddr) << " with "
- << formatv("{0:x16}", ArgBuffer.size()) << " argument buffer\n";
- });
- auto Result = EP.template callB<orcrpctpc::RunWrapper>(
- WrapperFnAddr,
- ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()),
- ArgBuffer.size()));
-
- if (!Result)
- OnComplete(shared::WrapperFunctionResult::createOutOfBandError(
- toString(Result.takeError())));
- OnComplete(std::move(*Result));
- }
-
- Error closeConnection(OnCloseConnectionFunction OnCloseConnection) {
- DEBUG_WITH_TYPE("orc", dbgs() << "Closing connection to remote\n");
- return EP.template callAsync<orcrpctpc::CloseConnection>(
- std::move(OnCloseConnection));
- }
-
- Error closeConnectionAndWait() {
- std::promise<MSVCPError> P;
- auto F = P.get_future();
- if (auto Err = closeConnection([&](Error Err2) -> Error {
- P.set_value(std::move(Err2));
- return Error::success();
- })) {
- EP.abandonAllPendingResponses();
- return joinErrors(std::move(Err), F.get());
- }
- return F.get();
- }
-
-protected:
- /// Subclasses must call this during construction to initialize the
- /// TargetTriple and PageSize members.
- Error initializeORCRPCEPCBase() {
- if (auto EPI = EP.template callB<orcrpctpc::GetExecutorProcessInfo>()) {
- this->TargetTriple = Triple(EPI->Triple);
- this->PageSize = PageSize;
- this->JDI = {ExecutorAddress(EPI->DispatchFuncAddr),
- ExecutorAddress(EPI->DispatchCtxAddr)};
- return Error::success();
- } else
- return EPI.takeError();
- }
-
-private:
- Error runWrapperInJIT(
- std::function<Error(Expected<shared::WrapperFunctionResult>)> SendResult,
- JITTargetAddress FunctionTag, std::vector<uint8_t> ArgBuffer) {
-
- getExecutionSession().runJITDispatchHandler(
- [this, SendResult = std::move(SendResult)](
- Expected<shared::WrapperFunctionResult> R) {
- if (auto Err = SendResult(std::move(R)))
- ReportError(std::move(Err));
- },
- FunctionTag,
- {reinterpret_cast<const char *>(ArgBuffer.data()), ArgBuffer.size()});
- return Error::success();
- }
-
- ErrorReporter ReportError;
- RPCEndpointT &EP;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCRPCEXECUTORPROCESSCONTROL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
deleted file mode 100644
index 3d139740d677..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ /dev/null
@@ -1,925 +0,0 @@
-//===- OrcRemoteTargetClient.h - Orc Remote-target Client -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the OrcRemoteTargetClient class and helpers. This class
-// can be used to communicate over an RawByteChannel with an
-// OrcRemoteTargetServer instance to support remote-JITing.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
-
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstdint>
-#include <memory>
-#include <string>
-#include <tuple>
-#include <utility>
-#include <vector>
-
-#define DEBUG_TYPE "orc-remote"
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-/// This class provides utilities (including memory manager, indirect stubs
-/// manager, and compile callback manager types) that support remote JITing
-/// in ORC.
-///
-/// Each of the utility classes talks to a JIT server (an instance of the
-/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out
-/// its actions.
-class OrcRemoteTargetClient
- : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
- /// Remote-mapped RuntimeDyld-compatible memory manager.
- class RemoteRTDyldMemoryManager : public RuntimeDyld::MemoryManager {
- friend class OrcRemoteTargetClient;
-
- public:
- ~RemoteRTDyldMemoryManager() {
- Client.destroyRemoteAllocator(Id);
- LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
- }
-
- RemoteRTDyldMemoryManager(const RemoteRTDyldMemoryManager &) = delete;
- RemoteRTDyldMemoryManager &
- operator=(const RemoteRTDyldMemoryManager &) = delete;
- RemoteRTDyldMemoryManager(RemoteRTDyldMemoryManager &&) = default;
- RemoteRTDyldMemoryManager &operator=(RemoteRTDyldMemoryManager &&) = delete;
-
- uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID,
- StringRef SectionName) override {
- Unmapped.back().CodeAllocs.emplace_back(Size, Alignment);
- uint8_t *Alloc = reinterpret_cast<uint8_t *>(
- Unmapped.back().CodeAllocs.back().getLocalAddress());
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated code for "
- << SectionName << ": " << Alloc << " (" << Size
- << " bytes, alignment " << Alignment << ")\n");
- return Alloc;
- }
-
- uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID, StringRef SectionName,
- bool IsReadOnly) override {
- if (IsReadOnly) {
- Unmapped.back().RODataAllocs.emplace_back(Size, Alignment);
- uint8_t *Alloc = reinterpret_cast<uint8_t *>(
- Unmapped.back().RODataAllocs.back().getLocalAddress());
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated ro-data for "
- << SectionName << ": " << Alloc << " (" << Size
- << " bytes, alignment " << Alignment << ")\n");
- return Alloc;
- } // else...
-
- Unmapped.back().RWDataAllocs.emplace_back(Size, Alignment);
- uint8_t *Alloc = reinterpret_cast<uint8_t *>(
- Unmapped.back().RWDataAllocs.back().getLocalAddress());
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " allocated rw-data for "
- << SectionName << ": " << Alloc << " (" << Size
- << " bytes, alignment " << Alignment << ")\n");
- return Alloc;
- }
-
- void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
- uintptr_t RODataSize, uint32_t RODataAlign,
- uintptr_t RWDataSize,
- uint32_t RWDataAlign) override {
- Unmapped.push_back(ObjectAllocs());
-
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " reserved:\n");
-
- if (CodeSize != 0) {
- Unmapped.back().RemoteCodeAddr =
- Client.reserveMem(Id, CodeSize, CodeAlign);
-
- LLVM_DEBUG(
- dbgs() << " code: "
- << format("0x%016" PRIx64, Unmapped.back().RemoteCodeAddr)
- << " (" << CodeSize << " bytes, alignment " << CodeAlign
- << ")\n");
- }
-
- if (RODataSize != 0) {
- Unmapped.back().RemoteRODataAddr =
- Client.reserveMem(Id, RODataSize, RODataAlign);
-
- LLVM_DEBUG(
- dbgs() << " ro-data: "
- << format("0x%016" PRIx64, Unmapped.back().RemoteRODataAddr)
- << " (" << RODataSize << " bytes, alignment " << RODataAlign
- << ")\n");
- }
-
- if (RWDataSize != 0) {
- Unmapped.back().RemoteRWDataAddr =
- Client.reserveMem(Id, RWDataSize, RWDataAlign);
-
- LLVM_DEBUG(
- dbgs() << " rw-data: "
- << format("0x%016" PRIx64, Unmapped.back().RemoteRWDataAddr)
- << " (" << RWDataSize << " bytes, alignment " << RWDataAlign
- << ")\n");
- }
- }
-
- bool needsToReserveAllocationSpace() override { return true; }
-
- void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
- size_t Size) override {
- UnfinalizedEHFrames.push_back({LoadAddr, Size});
- }
-
- void deregisterEHFrames() override {
- for (auto &Frame : RegisteredEHFrames) {
- // FIXME: Add error poll.
- Client.deregisterEHFrames(Frame.Addr, Frame.Size);
- }
- }
-
- void notifyObjectLoaded(RuntimeDyld &Dyld,
- const object::ObjectFile &Obj) override {
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n");
- for (auto &ObjAllocs : Unmapped) {
- mapAllocsToRemoteAddrs(Dyld, ObjAllocs.CodeAllocs,
- ObjAllocs.RemoteCodeAddr);
- mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RODataAllocs,
- ObjAllocs.RemoteRODataAddr);
- mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RWDataAllocs,
- ObjAllocs.RemoteRWDataAddr);
- Unfinalized.push_back(std::move(ObjAllocs));
- }
- Unmapped.clear();
- }
-
- bool finalizeMemory(std::string *ErrMsg = nullptr) override {
- LLVM_DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n");
-
- for (auto &ObjAllocs : Unfinalized) {
- if (copyAndProtect(ObjAllocs.CodeAllocs, ObjAllocs.RemoteCodeAddr,
- sys::Memory::MF_READ | sys::Memory::MF_EXEC))
- return true;
-
- if (copyAndProtect(ObjAllocs.RODataAllocs, ObjAllocs.RemoteRODataAddr,
- sys::Memory::MF_READ))
- return true;
-
- if (copyAndProtect(ObjAllocs.RWDataAllocs, ObjAllocs.RemoteRWDataAddr,
- sys::Memory::MF_READ | sys::Memory::MF_WRITE))
- return true;
- }
- Unfinalized.clear();
-
- for (auto &EHFrame : UnfinalizedEHFrames) {
- if (auto Err = Client.registerEHFrames(EHFrame.Addr, EHFrame.Size)) {
- // FIXME: Replace this once finalizeMemory can return an Error.
- handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
- if (ErrMsg) {
- raw_string_ostream ErrOut(*ErrMsg);
- EIB.log(ErrOut);
- }
- });
- return false;
- }
- }
- RegisteredEHFrames = std::move(UnfinalizedEHFrames);
- UnfinalizedEHFrames = {};
-
- return false;
- }
-
- private:
- class Alloc {
- public:
- Alloc(uint64_t Size, unsigned Align)
- : Size(Size), Align(Align), Contents(new char[Size + Align - 1]) {}
-
- Alloc(const Alloc &) = delete;
- Alloc &operator=(const Alloc &) = delete;
- Alloc(Alloc &&) = default;
- Alloc &operator=(Alloc &&) = default;
-
- uint64_t getSize() const { return Size; }
-
- unsigned getAlign() const { return Align; }
-
- char *getLocalAddress() const {
- uintptr_t LocalAddr = reinterpret_cast<uintptr_t>(Contents.get());
- LocalAddr = alignTo(LocalAddr, Align);
- return reinterpret_cast<char *>(LocalAddr);
- }
-
- void setRemoteAddress(JITTargetAddress RemoteAddr) {
- this->RemoteAddr = RemoteAddr;
- }
-
- JITTargetAddress getRemoteAddress() const { return RemoteAddr; }
-
- private:
- uint64_t Size;
- unsigned Align;
- std::unique_ptr<char[]> Contents;
- JITTargetAddress RemoteAddr = 0;
- };
-
- struct ObjectAllocs {
- ObjectAllocs() = default;
- ObjectAllocs(const ObjectAllocs &) = delete;
- ObjectAllocs &operator=(const ObjectAllocs &) = delete;
- ObjectAllocs(ObjectAllocs &&) = default;
- ObjectAllocs &operator=(ObjectAllocs &&) = default;
-
- JITTargetAddress RemoteCodeAddr = 0;
- JITTargetAddress RemoteRODataAddr = 0;
- JITTargetAddress RemoteRWDataAddr = 0;
- std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
- };
-
- RemoteRTDyldMemoryManager(OrcRemoteTargetClient &Client,
- ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {
- LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
- }
-
- // Maps all allocations in Allocs to aligned blocks
- void mapAllocsToRemoteAddrs(RuntimeDyld &Dyld, std::vector<Alloc> &Allocs,
- JITTargetAddress NextAddr) {
- for (auto &Alloc : Allocs) {
- NextAddr = alignTo(NextAddr, Alloc.getAlign());
- Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextAddr);
- LLVM_DEBUG(
- dbgs() << " " << static_cast<void *>(Alloc.getLocalAddress())
- << " -> " << format("0x%016" PRIx64, NextAddr) << "\n");
- Alloc.setRemoteAddress(NextAddr);
-
- // Only advance NextAddr if it was non-null to begin with,
- // otherwise leave it as null.
- if (NextAddr)
- NextAddr += Alloc.getSize();
- }
- }
-
- // Copies data for each alloc in the list, then set permissions on the
- // segment.
- bool copyAndProtect(const std::vector<Alloc> &Allocs,
- JITTargetAddress RemoteSegmentAddr,
- unsigned Permissions) {
- if (RemoteSegmentAddr) {
- assert(!Allocs.empty() && "No sections in allocated segment");
-
- for (auto &Alloc : Allocs) {
- LLVM_DEBUG(dbgs() << " copying section: "
- << static_cast<void *>(Alloc.getLocalAddress())
- << " -> "
- << format("0x%016" PRIx64, Alloc.getRemoteAddress())
- << " (" << Alloc.getSize() << " bytes)\n";);
-
- if (Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
- Alloc.getSize()))
- return true;
- }
-
- LLVM_DEBUG(dbgs() << " setting "
- << (Permissions & sys::Memory::MF_READ ? 'R' : '-')
- << (Permissions & sys::Memory::MF_WRITE ? 'W' : '-')
- << (Permissions & sys::Memory::MF_EXEC ? 'X' : '-')
- << " permissions on block: "
- << format("0x%016" PRIx64, RemoteSegmentAddr)
- << "\n");
- if (Client.setProtections(Id, RemoteSegmentAddr, Permissions))
- return true;
- }
- return false;
- }
-
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- std::vector<ObjectAllocs> Unmapped;
- std::vector<ObjectAllocs> Unfinalized;
-
- struct EHFrame {
- JITTargetAddress Addr;
- uint64_t Size;
- };
- std::vector<EHFrame> UnfinalizedEHFrames;
- std::vector<EHFrame> RegisteredEHFrames;
- };
-
- class RPCMMAlloc : public jitlink::JITLinkMemoryManager::Allocation {
- using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
- using FinalizeContinuation =
- jitlink::JITLinkMemoryManager::Allocation::FinalizeContinuation;
- using ProtectionFlags = sys::Memory::ProtectionFlags;
- using SegmentsRequestMap =
- DenseMap<unsigned, jitlink::JITLinkMemoryManager::SegmentRequest>;
-
- RPCMMAlloc(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {}
-
- public:
- static Expected<std::unique_ptr<RPCMMAlloc>>
- Create(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id,
- const SegmentsRequestMap &Request) {
- auto *MM = new RPCMMAlloc(Client, Id);
-
- if (Error Err = MM->allocateHostBlocks(Request))
- return std::move(Err);
-
- if (Error Err = MM->allocateTargetBlocks())
- return std::move(Err);
-
- return std::unique_ptr<RPCMMAlloc>(MM);
- }
-
- MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
- assert(HostSegBlocks.count(Seg) && "No allocation for segment");
- return {static_cast<char *>(HostSegBlocks[Seg].base()),
- HostSegBlocks[Seg].allocatedSize()};
- }
-
- JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
- assert(TargetSegBlocks.count(Seg) && "No allocation for segment");
- return pointerToJITTargetAddress(TargetSegBlocks[Seg].base());
- }
-
- void finalizeAsync(FinalizeContinuation OnFinalize) override {
- // Host allocations (working memory) remain ReadWrite.
- OnFinalize(copyAndProtect());
- }
-
- Error deallocate() override {
- // TODO: Cannot release target allocation. RPCAPI has no function
- // symmetric to reserveMem(). Add RPC call like freeMem()?
- return errorCodeToError(sys::Memory::releaseMappedMemory(HostAllocation));
- }
-
- private:
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- AllocationMap HostSegBlocks;
- AllocationMap TargetSegBlocks;
- JITTargetAddress TargetSegmentAddr;
- sys::MemoryBlock HostAllocation;
-
- Error allocateHostBlocks(const SegmentsRequestMap &Request) {
- unsigned TargetPageSize = Client.getPageSize();
-
- if (!isPowerOf2_64(static_cast<uint64_t>(TargetPageSize)))
- return make_error<StringError>("Host page size is not a power of 2",
- inconvertibleErrorCode());
-
- auto TotalSize = calcTotalAllocSize(Request, TargetPageSize);
- if (!TotalSize)
- return TotalSize.takeError();
-
- // Allocate one slab to cover all the segments.
- const sys::Memory::ProtectionFlags ReadWrite =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
- std::error_code EC;
- HostAllocation =
- sys::Memory::allocateMappedMemory(*TotalSize, nullptr, ReadWrite, EC);
- if (EC)
- return errorCodeToError(EC);
-
- char *SlabAddr = static_cast<char *>(HostAllocation.base());
-#ifndef NDEBUG
- char *SlabAddrEnd = SlabAddr + HostAllocation.allocatedSize();
-#endif
-
- // Allocate segment memory from the slab.
- for (auto &KV : Request) {
- const auto &Seg = KV.second;
-
- uint64_t SegmentSize = Seg.getContentSize() + Seg.getZeroFillSize();
- uint64_t AlignedSegmentSize = alignTo(SegmentSize, TargetPageSize);
-
- // Zero out zero-fill memory.
- char *ZeroFillBegin = SlabAddr + Seg.getContentSize();
- memset(ZeroFillBegin, 0, Seg.getZeroFillSize());
-
- // Record the block for this segment.
- HostSegBlocks[KV.first] =
- sys::MemoryBlock(SlabAddr, AlignedSegmentSize);
-
- SlabAddr += AlignedSegmentSize;
- assert(SlabAddr <= SlabAddrEnd && "Out of range");
- }
-
- return Error::success();
- }
-
- Error allocateTargetBlocks() {
- // Reserve memory for all blocks on the target. We need as much space on
- // the target as we allocated on the host.
- TargetSegmentAddr = Client.reserveMem(Id, HostAllocation.allocatedSize(),
- Client.getPageSize());
- if (!TargetSegmentAddr)
- return make_error<StringError>("Failed to reserve memory on the target",
- inconvertibleErrorCode());
-
- // Map memory blocks into the allocation, that match the host allocation.
- JITTargetAddress TargetAllocAddr = TargetSegmentAddr;
- for (const auto &KV : HostSegBlocks) {
- size_t TargetAllocSize = KV.second.allocatedSize();
-
- TargetSegBlocks[KV.first] =
- sys::MemoryBlock(jitTargetAddressToPointer<void *>(TargetAllocAddr),
- TargetAllocSize);
-
- TargetAllocAddr += TargetAllocSize;
- assert(TargetAllocAddr - TargetSegmentAddr <=
- HostAllocation.allocatedSize() &&
- "Out of range on target");
- }
-
- return Error::success();
- }
-
- Error copyAndProtect() {
- unsigned Permissions = 0u;
-
- // Copy segments one by one.
- for (auto &KV : TargetSegBlocks) {
- Permissions |= KV.first;
-
- const sys::MemoryBlock &TargetBlock = KV.second;
- const sys::MemoryBlock &HostBlock = HostSegBlocks.lookup(KV.first);
-
- size_t TargetAllocSize = TargetBlock.allocatedSize();
- auto TargetAllocAddr = pointerToJITTargetAddress(TargetBlock.base());
- auto *HostAllocBegin = static_cast<const char *>(HostBlock.base());
-
- bool CopyErr =
- Client.writeMem(TargetAllocAddr, HostAllocBegin, TargetAllocSize);
- if (CopyErr)
- return createStringError(inconvertibleErrorCode(),
- "Failed to copy %d segment to the target",
- KV.first);
- }
-
- // Set permission flags for all segments at once.
- bool ProtectErr =
- Client.setProtections(Id, TargetSegmentAddr, Permissions);
- if (ProtectErr)
- return createStringError(inconvertibleErrorCode(),
- "Failed to apply permissions for %d segment "
- "on the target",
- Permissions);
- return Error::success();
- }
-
- static Expected<size_t>
- calcTotalAllocSize(const SegmentsRequestMap &Request,
- unsigned TargetPageSize) {
- size_t TotalSize = 0;
- for (const auto &KV : Request) {
- const auto &Seg = KV.second;
-
- if (Seg.getAlignment() > TargetPageSize)
- return make_error<StringError>("Cannot request alignment higher than "
- "page alignment on target",
- inconvertibleErrorCode());
-
- TotalSize = alignTo(TotalSize, TargetPageSize);
- TotalSize += Seg.getContentSize();
- TotalSize += Seg.getZeroFillSize();
- }
-
- return TotalSize;
- }
- };
-
- class RemoteJITLinkMemoryManager : public jitlink::JITLinkMemoryManager {
- public:
- RemoteJITLinkMemoryManager(OrcRemoteTargetClient &Client,
- ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {}
-
- RemoteJITLinkMemoryManager(const RemoteJITLinkMemoryManager &) = delete;
- RemoteJITLinkMemoryManager(RemoteJITLinkMemoryManager &&) = default;
-
- RemoteJITLinkMemoryManager &
- operator=(const RemoteJITLinkMemoryManager &) = delete;
- RemoteJITLinkMemoryManager &
- operator=(RemoteJITLinkMemoryManager &&) = delete;
-
- ~RemoteJITLinkMemoryManager() {
- Client.destroyRemoteAllocator(Id);
- LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
- }
-
- Expected<std::unique_ptr<Allocation>>
- allocate(const jitlink::JITLinkDylib *JD,
- const SegmentsRequestMap &Request) override {
- return RPCMMAlloc::Create(Client, Id, Request);
- }
-
- private:
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- };
-
- /// Remote indirect stubs manager.
- class RemoteIndirectStubsManager : public IndirectStubsManager {
- public:
- RemoteIndirectStubsManager(OrcRemoteTargetClient &Client,
- ResourceIdMgr::ResourceId Id)
- : Client(Client), Id(Id) {}
-
- ~RemoteIndirectStubsManager() override {
- Client.destroyIndirectStubsManager(Id);
- }
-
- Error createStub(StringRef StubName, JITTargetAddress StubAddr,
- JITSymbolFlags StubFlags) override {
- if (auto Err = reserveStubs(1))
- return Err;
-
- return createStubInternal(StubName, StubAddr, StubFlags);
- }
-
- Error createStubs(const StubInitsMap &StubInits) override {
- if (auto Err = reserveStubs(StubInits.size()))
- return Err;
-
- for (auto &Entry : StubInits)
- if (auto Err = createStubInternal(Entry.first(), Entry.second.first,
- Entry.second.second))
- return Err;
-
- return Error::success();
- }
-
- JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
- auto I = StubIndexes.find(Name);
- if (I == StubIndexes.end())
- return nullptr;
- auto Key = I->second.first;
- auto Flags = I->second.second;
- auto StubSymbol = JITEvaluatedSymbol(getStubAddr(Key), Flags);
- if (ExportedStubsOnly && !StubSymbol.getFlags().isExported())
- return nullptr;
- return StubSymbol;
- }
-
- JITEvaluatedSymbol findPointer(StringRef Name) override {
- auto I = StubIndexes.find(Name);
- if (I == StubIndexes.end())
- return nullptr;
- auto Key = I->second.first;
- auto Flags = I->second.second;
- return JITEvaluatedSymbol(getPtrAddr(Key), Flags);
- }
-
- Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
- auto I = StubIndexes.find(Name);
- assert(I != StubIndexes.end() && "No stub pointer for symbol");
- auto Key = I->second.first;
- return Client.writePointer(getPtrAddr(Key), NewAddr);
- }
-
- private:
- struct RemoteIndirectStubsInfo {
- JITTargetAddress StubBase;
- JITTargetAddress PtrBase;
- unsigned NumStubs;
- };
-
- using StubKey = std::pair<uint16_t, uint16_t>;
-
- Error reserveStubs(unsigned NumStubs) {
- if (NumStubs <= FreeStubs.size())
- return Error::success();
-
- unsigned NewStubsRequired = NumStubs - FreeStubs.size();
- JITTargetAddress StubBase;
- JITTargetAddress PtrBase;
- unsigned NumStubsEmitted;
-
- if (auto StubInfoOrErr = Client.emitIndirectStubs(Id, NewStubsRequired))
- std::tie(StubBase, PtrBase, NumStubsEmitted) = *StubInfoOrErr;
- else
- return StubInfoOrErr.takeError();
-
- unsigned NewBlockId = RemoteIndirectStubsInfos.size();
- RemoteIndirectStubsInfos.push_back({StubBase, PtrBase, NumStubsEmitted});
-
- for (unsigned I = 0; I < NumStubsEmitted; ++I)
- FreeStubs.push_back(std::make_pair(NewBlockId, I));
-
- return Error::success();
- }
-
- Error createStubInternal(StringRef StubName, JITTargetAddress InitAddr,
- JITSymbolFlags StubFlags) {
- auto Key = FreeStubs.back();
- FreeStubs.pop_back();
- StubIndexes[StubName] = std::make_pair(Key, StubFlags);
- return Client.writePointer(getPtrAddr(Key), InitAddr);
- }
-
- JITTargetAddress getStubAddr(StubKey K) {
- assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 &&
- "Missing stub address");
- return RemoteIndirectStubsInfos[K.first].StubBase +
- K.second * Client.getIndirectStubSize();
- }
-
- JITTargetAddress getPtrAddr(StubKey K) {
- assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 &&
- "Missing pointer address");
- return RemoteIndirectStubsInfos[K.first].PtrBase +
- K.second * Client.getPointerSize();
- }
-
- OrcRemoteTargetClient &Client;
- ResourceIdMgr::ResourceId Id;
- std::vector<RemoteIndirectStubsInfo> RemoteIndirectStubsInfos;
- std::vector<StubKey> FreeStubs;
- StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
- };
-
- class RemoteTrampolinePool : public TrampolinePool {
- public:
- RemoteTrampolinePool(OrcRemoteTargetClient &Client) : Client(Client) {}
-
- private:
- Error grow() override {
- JITTargetAddress BlockAddr = 0;
- uint32_t NumTrampolines = 0;
- if (auto TrampolineInfoOrErr = Client.emitTrampolineBlock())
- std::tie(BlockAddr, NumTrampolines) = *TrampolineInfoOrErr;
- else
- return TrampolineInfoOrErr.takeError();
-
- uint32_t TrampolineSize = Client.getTrampolineSize();
- for (unsigned I = 0; I < NumTrampolines; ++I)
- AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize));
-
- return Error::success();
- }
-
- OrcRemoteTargetClient &Client;
- };
-
- /// Remote compile callback manager.
- class RemoteCompileCallbackManager : public JITCompileCallbackManager {
- public:
- RemoteCompileCallbackManager(OrcRemoteTargetClient &Client,
- ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress)
- : JITCompileCallbackManager(
- std::make_unique<RemoteTrampolinePool>(Client), ES,
- ErrorHandlerAddress) {}
- };
-
- /// Create an OrcRemoteTargetClient.
- /// Channel is the ChannelT instance to communicate on. It is assumed that
- /// the channel is ready to be read from and written to.
- static Expected<std::unique_ptr<OrcRemoteTargetClient>>
- Create(shared::RawByteChannel &Channel, ExecutionSession &ES) {
- Error Err = Error::success();
- auto Client = std::unique_ptr<OrcRemoteTargetClient>(
- new OrcRemoteTargetClient(Channel, ES, Err));
- if (Err)
- return std::move(Err);
- return std::move(Client);
- }
-
- /// Call the int(void) function at the given address in the target and return
- /// its result.
- Expected<int> callIntVoid(JITTargetAddress Addr) {
- LLVM_DEBUG(dbgs() << "Calling int(*)(void) "
- << format("0x%016" PRIx64, Addr) << "\n");
- return callB<exec::CallIntVoid>(Addr);
- }
-
- /// Call the int(int) function at the given address in the target and return
- /// its result.
- Expected<int> callIntInt(JITTargetAddress Addr, int Arg) {
- LLVM_DEBUG(dbgs() << "Calling int(*)(int) " << format("0x%016" PRIx64, Addr)
- << "\n");
- return callB<exec::CallIntInt>(Addr, Arg);
- }
-
- /// Call the int(int, char*[]) function at the given address in the target and
- /// return its result.
- Expected<int> callMain(JITTargetAddress Addr,
- const std::vector<std::string> &Args) {
- LLVM_DEBUG(dbgs() << "Calling int(*)(int, char*[]) "
- << format("0x%016" PRIx64, Addr) << "\n");
- return callB<exec::CallMain>(Addr, Args);
- }
-
- /// Call the void() function at the given address in the target and wait for
- /// it to finish.
- Error callVoidVoid(JITTargetAddress Addr) {
- LLVM_DEBUG(dbgs() << "Calling void(*)(void) "
- << format("0x%016" PRIx64, Addr) << "\n");
- return callB<exec::CallVoidVoid>(Addr);
- }
-
- /// Create an RCMemoryManager which will allocate its memory on the remote
- /// target.
- Expected<std::unique_ptr<RemoteRTDyldMemoryManager>>
- createRemoteMemoryManager() {
- auto Id = AllocatorIds.getNext();
- if (auto Err = callB<mem::CreateRemoteAllocator>(Id))
- return std::move(Err);
- return std::unique_ptr<RemoteRTDyldMemoryManager>(
- new RemoteRTDyldMemoryManager(*this, Id));
- }
-
- /// Create a JITLink-compatible memory manager which will allocate working
- /// memory on the host and target memory on the remote target.
- Expected<std::unique_ptr<RemoteJITLinkMemoryManager>>
- createRemoteJITLinkMemoryManager() {
- auto Id = AllocatorIds.getNext();
- if (auto Err = callB<mem::CreateRemoteAllocator>(Id))
- return std::move(Err);
- LLVM_DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
- return std::unique_ptr<RemoteJITLinkMemoryManager>(
- new RemoteJITLinkMemoryManager(*this, Id));
- }
-
- /// Create an RCIndirectStubsManager that will allocate stubs on the remote
- /// target.
- Expected<std::unique_ptr<RemoteIndirectStubsManager>>
- createIndirectStubsManager() {
- auto Id = IndirectStubOwnerIds.getNext();
- if (auto Err = callB<stubs::CreateIndirectStubsOwner>(Id))
- return std::move(Err);
- return std::make_unique<RemoteIndirectStubsManager>(*this, Id);
- }
-
- Expected<RemoteCompileCallbackManager &>
- enableCompileCallbacks(JITTargetAddress ErrorHandlerAddress) {
- assert(!CallbackManager && "CallbackManager already obtained");
-
- // Emit the resolver block on the JIT server.
- if (auto Err = callB<stubs::EmitResolverBlock>())
- return std::move(Err);
-
- // Create the callback manager.
- CallbackManager.emplace(*this, ES, ErrorHandlerAddress);
- RemoteCompileCallbackManager &Mgr = *CallbackManager;
- return Mgr;
- }
-
- /// Search for symbols in the remote process. Note: This should be used by
- /// symbol resolvers *after* they've searched the local symbol table in the
- /// JIT stack.
- Expected<JITTargetAddress> getSymbolAddress(StringRef Name) {
- return callB<utils::GetSymbolAddress>(Name);
- }
-
- /// Get the triple for the remote target.
- const std::string &getTargetTriple() const { return RemoteTargetTriple; }
-
- Error terminateSession() { return callB<utils::TerminateSession>(); }
-
-private:
- OrcRemoteTargetClient(shared::RawByteChannel &Channel, ExecutionSession &ES,
- Error &Err)
- : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
- true),
- ES(ES) {
- ErrorAsOutParameter EAO(&Err);
-
- addHandler<utils::RequestCompile>(
- [this](JITTargetAddress Addr) -> JITTargetAddress {
- if (CallbackManager)
- return CallbackManager->executeCompileCallback(Addr);
- return 0;
- });
-
- if (auto RIOrErr = callB<utils::GetRemoteInfo>()) {
- std::tie(RemoteTargetTriple, RemotePointerSize, RemotePageSize,
- RemoteTrampolineSize, RemoteIndirectStubSize) = *RIOrErr;
- Err = Error::success();
- } else
- Err = RIOrErr.takeError();
- }
-
- void deregisterEHFrames(JITTargetAddress Addr, uint32_t Size) {
- if (auto Err = callB<eh::RegisterEHFrames>(Addr, Size))
- ES.reportError(std::move(Err));
- }
-
- void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
- if (auto Err = callB<mem::DestroyRemoteAllocator>(Id)) {
- // FIXME: This will be triggered by a removeModuleSet call: Propagate
- // error return up through that.
- llvm_unreachable("Failed to destroy remote allocator.");
- AllocatorIds.release(Id);
- }
- }
-
- void destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) {
- IndirectStubOwnerIds.release(Id);
- if (auto Err = callB<stubs::DestroyIndirectStubsOwner>(Id))
- ES.reportError(std::move(Err));
- }
-
- Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
- emitIndirectStubs(ResourceIdMgr::ResourceId Id, uint32_t NumStubsRequired) {
- return callB<stubs::EmitIndirectStubs>(Id, NumStubsRequired);
- }
-
- Expected<std::tuple<JITTargetAddress, uint32_t>> emitTrampolineBlock() {
- return callB<stubs::EmitTrampolineBlock>();
- }
-
- uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; }
- uint32_t getPageSize() const { return RemotePageSize; }
- uint32_t getPointerSize() const { return RemotePointerSize; }
-
- uint32_t getTrampolineSize() const { return RemoteTrampolineSize; }
-
- Expected<std::vector<uint8_t>> readMem(char *Dst, JITTargetAddress Src,
- uint64_t Size) {
- return callB<mem::ReadMem>(Src, Size);
- }
-
- Error registerEHFrames(JITTargetAddress &RAddr, uint32_t Size) {
- // FIXME: Duplicate error and report it via ReportError too?
- return callB<eh::RegisterEHFrames>(RAddr, Size);
- }
-
- JITTargetAddress reserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size,
- uint32_t Align) {
- if (auto AddrOrErr = callB<mem::ReserveMem>(Id, Size, Align))
- return *AddrOrErr;
- else {
- ES.reportError(AddrOrErr.takeError());
- return 0;
- }
- }
-
- bool setProtections(ResourceIdMgr::ResourceId Id,
- JITTargetAddress RemoteSegAddr, unsigned ProtFlags) {
- if (auto Err = callB<mem::SetProtections>(Id, RemoteSegAddr, ProtFlags)) {
- ES.reportError(std::move(Err));
- return true;
- } else
- return false;
- }
-
- bool writeMem(JITTargetAddress Addr, const char *Src, uint64_t Size) {
- if (auto Err = callB<mem::WriteMem>(DirectBufferWriter(Src, Addr, Size))) {
- ES.reportError(std::move(Err));
- return true;
- } else
- return false;
- }
-
- Error writePointer(JITTargetAddress Addr, JITTargetAddress PtrVal) {
- return callB<mem::WritePtr>(Addr, PtrVal);
- }
-
- static Error doNothing() { return Error::success(); }
-
- ExecutionSession &ES;
- std::function<void(Error)> ReportError;
- std::string RemoteTargetTriple;
- uint32_t RemotePointerSize = 0;
- uint32_t RemotePageSize = 0;
- uint32_t RemoteTrampolineSize = 0;
- uint32_t RemoteIndirectStubSize = 0;
- ResourceIdMgr AllocatorIds, IndirectStubOwnerIds;
- Optional<RemoteCompileCallbackManager> CallbackManager;
-};
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
-
-#undef DEBUG_TYPE
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
deleted file mode 100644
index 367bfb369191..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ /dev/null
@@ -1,386 +0,0 @@
-//===- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Orc remote-target RPC API. It should not be used
-// directly, but is used by the RemoteTargetClient and RemoteTargetServer
-// classes.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-
-namespace llvm {
-namespace orc {
-
-namespace remote {
-
-/// Template error for missing resources.
-template <typename ResourceIdT>
-class ResourceNotFound
- : public ErrorInfo<ResourceNotFound<ResourceIdT>> {
-public:
- static char ID;
-
- ResourceNotFound(ResourceIdT ResourceId,
- std::string ResourceDescription = "")
- : ResourceId(std::move(ResourceId)),
- ResourceDescription(std::move(ResourceDescription)) {}
-
- std::error_code convertToErrorCode() const override {
- return orcError(OrcErrorCode::UnknownResourceHandle);
- }
-
- void log(raw_ostream &OS) const override {
- OS << (ResourceDescription.empty()
- ? "Remote resource with id "
- : ResourceDescription)
- << " " << ResourceId << " not found";
- }
-
-private:
- ResourceIdT ResourceId;
- std::string ResourceDescription;
-};
-
-template <typename ResourceIdT>
-char ResourceNotFound<ResourceIdT>::ID = 0;
-
-class DirectBufferWriter {
-public:
- DirectBufferWriter() = default;
- DirectBufferWriter(const char *Src, JITTargetAddress Dst, uint64_t Size)
- : Src(Src), Dst(Dst), Size(Size) {}
-
- const char *getSrc() const { return Src; }
- JITTargetAddress getDst() const { return Dst; }
- uint64_t getSize() const { return Size; }
-
-private:
- const char *Src;
- JITTargetAddress Dst;
- uint64_t Size;
-};
-
-} // end namespace remote
-
-namespace shared {
-
-template <> class SerializationTypeName<JITSymbolFlags> {
-public:
- static const char *getName() { return "JITSymbolFlags"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, JITSymbolFlags> {
-public:
-
- static Error serialize(ChannelT &C, const JITSymbolFlags &Flags) {
- return serializeSeq(C, Flags.getRawFlagsValue(), Flags.getTargetFlags());
- }
-
- static Error deserialize(ChannelT &C, JITSymbolFlags &Flags) {
- JITSymbolFlags::UnderlyingType JITFlags;
- JITSymbolFlags::TargetFlagsType TargetFlags;
- if (auto Err = deserializeSeq(C, JITFlags, TargetFlags))
- return Err;
- Flags = JITSymbolFlags(static_cast<JITSymbolFlags::FlagNames>(JITFlags),
- TargetFlags);
- return Error::success();
- }
-};
-
-template <> class SerializationTypeName<remote::DirectBufferWriter> {
-public:
- static const char *getName() { return "DirectBufferWriter"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, remote::DirectBufferWriter, remote::DirectBufferWriter,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, const remote::DirectBufferWriter &DBW) {
- if (auto EC = serializeSeq(C, DBW.getDst()))
- return EC;
- if (auto EC = serializeSeq(C, DBW.getSize()))
- return EC;
- return C.appendBytes(DBW.getSrc(), DBW.getSize());
- }
-
- static Error deserialize(ChannelT &C, remote::DirectBufferWriter &DBW) {
- JITTargetAddress Dst;
- if (auto EC = deserializeSeq(C, Dst))
- return EC;
- uint64_t Size;
- if (auto EC = deserializeSeq(C, Size))
- return EC;
- char *Addr = reinterpret_cast<char *>(static_cast<uintptr_t>(Dst));
-
- DBW = remote::DirectBufferWriter(nullptr, Dst, Size);
-
- return C.readBytes(Addr, Size);
- }
-};
-
-} // end namespace shared
-
-namespace remote {
-
-class ResourceIdMgr {
-public:
- using ResourceId = uint64_t;
- static const ResourceId InvalidId = ~0U;
-
- ResourceIdMgr() = default;
- explicit ResourceIdMgr(ResourceId FirstValidId)
- : NextId(std::move(FirstValidId)) {}
-
- ResourceId getNext() {
- if (!FreeIds.empty()) {
- ResourceId I = FreeIds.back();
- FreeIds.pop_back();
- return I;
- }
- assert(NextId + 1 != ~0ULL && "All ids allocated");
- return NextId++;
- }
-
- void release(ResourceId I) { FreeIds.push_back(I); }
-
-private:
- ResourceId NextId = 1;
- std::vector<ResourceId> FreeIds;
-};
-
-/// Registers EH frames on the remote.
-namespace eh {
-
- /// Registers EH frames on the remote.
-class RegisterEHFrames
- : public shared::RPCFunction<RegisterEHFrames,
- void(JITTargetAddress Addr, uint32_t Size)> {
-public:
- static const char *getName() { return "RegisterEHFrames"; }
-};
-
- /// Deregisters EH frames on the remote.
-class DeregisterEHFrames
- : public shared::RPCFunction<DeregisterEHFrames,
- void(JITTargetAddress Addr, uint32_t Size)> {
-public:
- static const char *getName() { return "DeregisterEHFrames"; }
-};
-
-} // end namespace eh
-
-/// RPC functions for executing remote code.
-namespace exec {
-
- /// Call an 'int32_t()'-type function on the remote, returns the called
- /// function's return value.
-class CallIntVoid
- : public shared::RPCFunction<CallIntVoid, int32_t(JITTargetAddress Addr)> {
-public:
- static const char *getName() { return "CallIntVoid"; }
-};
-
- /// Call an 'int32_t(int32_t)'-type function on the remote, returns the called
- /// function's return value.
-class CallIntInt
- : public shared::RPCFunction<CallIntInt,
- int32_t(JITTargetAddress Addr, int)> {
-public:
- static const char *getName() { return "CallIntInt"; }
-};
-
- /// Call an 'int32_t(int32_t, char**)'-type function on the remote, returns the
- /// called function's return value.
-class CallMain
- : public shared::RPCFunction<CallMain,
- int32_t(JITTargetAddress Addr,
- std::vector<std::string> Args)> {
-public:
- static const char *getName() { return "CallMain"; }
-};
-
- /// Calls a 'void()'-type function on the remote, returns when the called
- /// function completes.
-class CallVoidVoid
- : public shared::RPCFunction<CallVoidVoid, void(JITTargetAddress FnAddr)> {
-public:
- static const char *getName() { return "CallVoidVoid"; }
-};
-
-} // end namespace exec
-
-/// RPC functions for remote memory management / inspection / modification.
-namespace mem {
-
- /// Creates a memory allocator on the remote.
-class CreateRemoteAllocator
- : public shared::RPCFunction<CreateRemoteAllocator,
- void(ResourceIdMgr::ResourceId AllocatorID)> {
-public:
- static const char *getName() { return "CreateRemoteAllocator"; }
-};
-
- /// Destroys a remote allocator, freeing any memory allocated by it.
-class DestroyRemoteAllocator
- : public shared::RPCFunction<DestroyRemoteAllocator,
- void(ResourceIdMgr::ResourceId AllocatorID)> {
-public:
- static const char *getName() { return "DestroyRemoteAllocator"; }
-};
-
- /// Read a remote memory block.
-class ReadMem
- : public shared::RPCFunction<
- ReadMem, std::vector<uint8_t>(JITTargetAddress Src, uint64_t Size)> {
-public:
- static const char *getName() { return "ReadMem"; }
-};
-
- /// Reserve a block of memory on the remote via the given allocator.
-class ReserveMem
- : public shared::RPCFunction<
- ReserveMem, JITTargetAddress(ResourceIdMgr::ResourceId AllocID,
- uint64_t Size, uint32_t Align)> {
-public:
- static const char *getName() { return "ReserveMem"; }
-};
-
- /// Set the memory protection on a memory block.
-class SetProtections
- : public shared::RPCFunction<
- SetProtections, void(ResourceIdMgr::ResourceId AllocID,
- JITTargetAddress Dst, uint32_t ProtFlags)> {
-public:
- static const char *getName() { return "SetProtections"; }
-};
-
- /// Write to a remote memory block.
-class WriteMem
- : public shared::RPCFunction<WriteMem,
- void(remote::DirectBufferWriter DB)> {
-public:
- static const char *getName() { return "WriteMem"; }
-};
-
- /// Write to a remote pointer.
-class WritePtr
- : public shared::RPCFunction<WritePtr, void(JITTargetAddress Dst,
- JITTargetAddress Val)> {
-public:
- static const char *getName() { return "WritePtr"; }
-};
-
-} // end namespace mem
-
-/// RPC functions for remote stub and trampoline management.
-namespace stubs {
-
- /// Creates an indirect stub owner on the remote.
-class CreateIndirectStubsOwner
- : public shared::RPCFunction<CreateIndirectStubsOwner,
- void(ResourceIdMgr::ResourceId StubOwnerID)> {
-public:
- static const char *getName() { return "CreateIndirectStubsOwner"; }
-};
-
- /// RPC function for destroying an indirect stubs owner.
-class DestroyIndirectStubsOwner
- : public shared::RPCFunction<DestroyIndirectStubsOwner,
- void(ResourceIdMgr::ResourceId StubsOwnerID)> {
-public:
- static const char *getName() { return "DestroyIndirectStubsOwner"; }
-};
-
- /// EmitIndirectStubs result is (StubsBase, PtrsBase, NumStubsEmitted).
-class EmitIndirectStubs
- : public shared::RPCFunction<
- EmitIndirectStubs,
- std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>(
- ResourceIdMgr::ResourceId StubsOwnerID,
- uint32_t NumStubsRequired)> {
-public:
- static const char *getName() { return "EmitIndirectStubs"; }
-};
-
- /// RPC function to emit the resolver block and return its address.
-class EmitResolverBlock
- : public shared::RPCFunction<EmitResolverBlock, void()> {
-public:
- static const char *getName() { return "EmitResolverBlock"; }
-};
-
- /// EmitTrampolineBlock result is (BlockAddr, NumTrampolines).
-class EmitTrampolineBlock
- : public shared::RPCFunction<EmitTrampolineBlock,
- std::tuple<JITTargetAddress, uint32_t>()> {
-public:
- static const char *getName() { return "EmitTrampolineBlock"; }
-};
-
-} // end namespace stubs
-
-/// Miscelaneous RPC functions for dealing with remotes.
-namespace utils {
-
- /// GetRemoteInfo result is (Triple, PointerSize, PageSize, TrampolineSize,
- /// IndirectStubsSize).
-class GetRemoteInfo
- : public shared::RPCFunction<
- GetRemoteInfo,
- std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> {
-public:
- static const char *getName() { return "GetRemoteInfo"; }
-};
-
- /// Get the address of a remote symbol.
-class GetSymbolAddress
- : public shared::RPCFunction<GetSymbolAddress,
- JITTargetAddress(std::string SymbolName)> {
-public:
- static const char *getName() { return "GetSymbolAddress"; }
-};
-
- /// Request that the host execute a compile callback.
-class RequestCompile
- : public shared::RPCFunction<
- RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> {
-public:
- static const char *getName() { return "RequestCompile"; }
-};
-
- /// Notify the remote and terminate the session.
-class TerminateSession : public shared::RPCFunction<TerminateSession, void()> {
-public:
- static const char *getName() { return "TerminateSession"; }
-};
-
-} // namespace utils
-
-class OrcRemoteTargetRPCAPI
- : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
- // FIXME: Remove constructors once MSVC supports synthesizing move-ops.
- OrcRemoteTargetRPCAPI(shared::RawByteChannel &C)
- : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(C, true) {}
-};
-
-} // end namespace remote
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
deleted file mode 100644
index ce9bf064303d..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ /dev/null
@@ -1,464 +0,0 @@
-//===- OrcRemoteTargetServer.h - Orc Remote-target Server -------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the OrcRemoteTargetServer class. It can be used to build a
-// JIT server that can execute code sent from an OrcRemoteTargetClient.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
-
-#include "llvm/ExecutionEngine/JITSymbol.h"
-#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cassert>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <map>
-#include <memory>
-#include <string>
-#include <system_error>
-#include <tuple>
-#include <type_traits>
-#include <vector>
-
-#define DEBUG_TYPE "orc-remote"
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-template <typename ChannelT, typename TargetT>
-class OrcRemoteTargetServer
- : public shared::SingleThreadedRPCEndpoint<shared::RawByteChannel> {
-public:
- using SymbolLookupFtor =
- std::function<JITTargetAddress(const std::string &Name)>;
-
- using EHFrameRegistrationFtor =
- std::function<void(uint8_t *Addr, uint32_t Size)>;
-
- OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup,
- EHFrameRegistrationFtor EHFramesRegister,
- EHFrameRegistrationFtor EHFramesDeregister)
- : shared::SingleThreadedRPCEndpoint<shared::RawByteChannel>(Channel,
- true),
- SymbolLookup(std::move(SymbolLookup)),
- EHFramesRegister(std::move(EHFramesRegister)),
- EHFramesDeregister(std::move(EHFramesDeregister)) {
- using ThisT = std::remove_reference_t<decltype(*this)>;
- addHandler<exec::CallIntVoid>(*this, &ThisT::handleCallIntVoid);
- addHandler<exec::CallIntInt>(*this, &ThisT::handleCallIntInt);
- addHandler<exec::CallMain>(*this, &ThisT::handleCallMain);
- addHandler<exec::CallVoidVoid>(*this, &ThisT::handleCallVoidVoid);
- addHandler<mem::CreateRemoteAllocator>(*this,
- &ThisT::handleCreateRemoteAllocator);
- addHandler<mem::DestroyRemoteAllocator>(
- *this, &ThisT::handleDestroyRemoteAllocator);
- addHandler<mem::ReadMem>(*this, &ThisT::handleReadMem);
- addHandler<mem::ReserveMem>(*this, &ThisT::handleReserveMem);
- addHandler<mem::SetProtections>(*this, &ThisT::handleSetProtections);
- addHandler<mem::WriteMem>(*this, &ThisT::handleWriteMem);
- addHandler<mem::WritePtr>(*this, &ThisT::handleWritePtr);
- addHandler<eh::RegisterEHFrames>(*this, &ThisT::handleRegisterEHFrames);
- addHandler<eh::DeregisterEHFrames>(*this, &ThisT::handleDeregisterEHFrames);
- addHandler<stubs::CreateIndirectStubsOwner>(
- *this, &ThisT::handleCreateIndirectStubsOwner);
- addHandler<stubs::DestroyIndirectStubsOwner>(
- *this, &ThisT::handleDestroyIndirectStubsOwner);
- addHandler<stubs::EmitIndirectStubs>(*this,
- &ThisT::handleEmitIndirectStubs);
- addHandler<stubs::EmitResolverBlock>(*this,
- &ThisT::handleEmitResolverBlock);
- addHandler<stubs::EmitTrampolineBlock>(*this,
- &ThisT::handleEmitTrampolineBlock);
- addHandler<utils::GetSymbolAddress>(*this, &ThisT::handleGetSymbolAddress);
- addHandler<utils::GetRemoteInfo>(*this, &ThisT::handleGetRemoteInfo);
- addHandler<utils::TerminateSession>(*this, &ThisT::handleTerminateSession);
- }
-
- // FIXME: Remove move/copy ops once MSVC supports synthesizing move ops.
- OrcRemoteTargetServer(const OrcRemoteTargetServer &) = delete;
- OrcRemoteTargetServer &operator=(const OrcRemoteTargetServer &) = delete;
-
- OrcRemoteTargetServer(OrcRemoteTargetServer &&Other) = default;
- OrcRemoteTargetServer &operator=(OrcRemoteTargetServer &&) = delete;
-
- Expected<JITTargetAddress> requestCompile(JITTargetAddress TrampolineAddr) {
- return callB<utils::RequestCompile>(TrampolineAddr);
- }
-
- bool receivedTerminate() const { return TerminateFlag; }
-
-private:
- struct Allocator {
- Allocator() = default;
- Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {}
-
- Allocator &operator=(Allocator &&Other) {
- Allocs = std::move(Other.Allocs);
- return *this;
- }
-
- ~Allocator() {
- for (auto &Alloc : Allocs)
- sys::Memory::releaseMappedMemory(Alloc.second);
- }
-
- Error allocate(void *&Addr, size_t Size, uint32_t Align) {
- std::error_code EC;
- sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(
- Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
- if (EC)
- return errorCodeToError(EC);
-
- Addr = MB.base();
- assert(Allocs.find(MB.base()) == Allocs.end() && "Duplicate alloc");
- Allocs[MB.base()] = std::move(MB);
- return Error::success();
- }
-
- Error setProtections(void *block, unsigned Flags) {
- auto I = Allocs.find(block);
- if (I == Allocs.end())
- return errorCodeToError(orcError(OrcErrorCode::RemoteMProtectAddrUnrecognized));
- return errorCodeToError(
- sys::Memory::protectMappedMemory(I->second, Flags));
- }
-
- private:
- std::map<void *, sys::MemoryBlock> Allocs;
- };
-
- static Error doNothing() { return Error::success(); }
-
- static JITTargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) {
- auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr);
- auto AddrOrErr = T->requestCompile(static_cast<JITTargetAddress>(
- reinterpret_cast<uintptr_t>(TrampolineAddr)));
- // FIXME: Allow customizable failure substitution functions.
- assert(AddrOrErr && "Compile request failed");
- return *AddrOrErr;
- }
-
- Expected<int32_t> handleCallIntVoid(JITTargetAddress Addr) {
- using IntVoidFnTy = int (*)();
-
- IntVoidFnTy Fn =
- reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr));
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n");
- int Result = Fn();
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
-
- return Result;
- }
-
- Expected<int32_t> handleCallIntInt(JITTargetAddress Addr, int Arg) {
- using IntIntFnTy = int (*)(int);
-
- IntIntFnTy Fn = reinterpret_cast<IntIntFnTy>(static_cast<uintptr_t>(Addr));
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr)
- << " with argument " << Arg << "\n");
- int Result = Fn(Arg);
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
-
- return Result;
- }
-
- Expected<int32_t> handleCallMain(JITTargetAddress Addr,
- std::vector<std::string> Args) {
- using MainFnTy = int (*)(int, const char *[]);
-
- MainFnTy Fn = reinterpret_cast<MainFnTy>(static_cast<uintptr_t>(Addr));
- int ArgC = Args.size() + 1;
- int Idx = 1;
- std::unique_ptr<const char *[]> ArgV(new const char *[ArgC + 1]);
- ArgV[0] = "<jit process>";
- for (auto &Arg : Args)
- ArgV[Idx++] = Arg.c_str();
- ArgV[ArgC] = 0;
- LLVM_DEBUG(for (int Idx = 0; Idx < ArgC; ++Idx) {
- llvm::dbgs() << "Arg " << Idx << ": " << ArgV[Idx] << "\n";
- });
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n");
- int Result = Fn(ArgC, ArgV.get());
- LLVM_DEBUG(dbgs() << " Result = " << Result << "\n");
-
- return Result;
- }
-
- Error handleCallVoidVoid(JITTargetAddress Addr) {
- using VoidVoidFnTy = void (*)();
-
- VoidVoidFnTy Fn =
- reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr));
-
- LLVM_DEBUG(dbgs() << " Calling " << format("0x%016x", Addr) << "\n");
- Fn();
- LLVM_DEBUG(dbgs() << " Complete.\n");
-
- return Error::success();
- }
-
- Error handleCreateRemoteAllocator(ResourceIdMgr::ResourceId Id) {
- auto I = Allocators.find(Id);
- if (I != Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorIdAlreadyInUse));
- LLVM_DEBUG(dbgs() << " Created allocator " << Id << "\n");
- Allocators[Id] = Allocator();
- return Error::success();
- }
-
- Error handleCreateIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
- auto I = IndirectStubsOwners.find(Id);
- if (I != IndirectStubsOwners.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse));
- LLVM_DEBUG(dbgs() << " Create indirect stubs owner " << Id << "\n");
- IndirectStubsOwners[Id] = ISBlockOwnerList();
- return Error::success();
- }
-
- Error handleDeregisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
- uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
- LLVM_DEBUG(dbgs() << " Registering EH frames at "
- << format("0x%016x", TAddr) << ", Size = " << Size
- << " bytes\n");
- EHFramesDeregister(Addr, Size);
- return Error::success();
- }
-
- Error handleDestroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
- auto I = Allocators.find(Id);
- if (I == Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
- Allocators.erase(I);
- LLVM_DEBUG(dbgs() << " Destroyed allocator " << Id << "\n");
- return Error::success();
- }
-
- Error handleDestroyIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
- auto I = IndirectStubsOwners.find(Id);
- if (I == IndirectStubsOwners.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist));
- IndirectStubsOwners.erase(I);
- return Error::success();
- }
-
- Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
- handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id,
- uint32_t NumStubsRequired) {
- LLVM_DEBUG(dbgs() << " ISMgr " << Id << " request " << NumStubsRequired
- << " stubs.\n");
-
- auto StubOwnerItr = IndirectStubsOwners.find(Id);
- if (StubOwnerItr == IndirectStubsOwners.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist));
-
- auto IS = LocalIndirectStubsInfo<TargetT>::create(
- NumStubsRequired, sys::Process::getPageSizeEstimate());
- if (!IS)
- return IS.takeError();
-
- JITTargetAddress StubsBase = pointerToJITTargetAddress(IS->getStub(0));
- JITTargetAddress PtrsBase = pointerToJITTargetAddress(IS->getPtr(0));
- uint32_t NumStubsEmitted = IS->getNumStubs();
-
- auto &BlockList = StubOwnerItr->second;
- BlockList.push_back(std::move(*IS));
-
- return std::make_tuple(StubsBase, PtrsBase, NumStubsEmitted);
- }
-
- Error handleEmitResolverBlock() {
- std::error_code EC;
- ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
- TargetT::ResolverCodeSize, nullptr,
- sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
- if (EC)
- return errorCodeToError(EC);
-
- TargetT::writeResolverCode(static_cast<char *>(ResolverBlock.base()),
- pointerToJITTargetAddress(ResolverBlock.base()),
- pointerToJITTargetAddress(&reenter),
- pointerToJITTargetAddress(this));
-
- return errorCodeToError(sys::Memory::protectMappedMemory(
- ResolverBlock.getMemoryBlock(),
- sys::Memory::MF_READ | sys::Memory::MF_EXEC));
- }
-
- Expected<std::tuple<JITTargetAddress, uint32_t>> handleEmitTrampolineBlock() {
- std::error_code EC;
- auto TrampolineBlock =
- sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
- sys::Process::getPageSizeEstimate(), nullptr,
- sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
- if (EC)
- return errorCodeToError(EC);
-
- uint32_t NumTrampolines =
- (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) /
- TargetT::TrampolineSize;
-
- char *TrampolineMem = static_cast<char *>(TrampolineBlock.base());
- TargetT::writeTrampolines(
- TrampolineMem, pointerToJITTargetAddress(TrampolineMem),
- pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines);
-
- EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
- sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
-
- TrampolineBlocks.push_back(std::move(TrampolineBlock));
-
- return std::make_tuple(pointerToJITTargetAddress(TrampolineMem),
- NumTrampolines);
- }
-
- Expected<JITTargetAddress> handleGetSymbolAddress(const std::string &Name) {
- JITTargetAddress Addr = SymbolLookup(Name);
- LLVM_DEBUG(dbgs() << " Symbol '" << Name
- << "' = " << format("0x%016x", Addr) << "\n");
- return Addr;
- }
-
- Expected<std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>>
- handleGetRemoteInfo() {
- std::string ProcessTriple = sys::getProcessTriple();
- uint32_t PointerSize = TargetT::PointerSize;
- uint32_t PageSize = sys::Process::getPageSizeEstimate();
- uint32_t TrampolineSize = TargetT::TrampolineSize;
- uint32_t IndirectStubSize = TargetT::StubSize;
- LLVM_DEBUG(dbgs() << " Remote info:\n"
- << " triple = '" << ProcessTriple << "'\n"
- << " pointer size = " << PointerSize << "\n"
- << " page size = " << PageSize << "\n"
- << " trampoline size = " << TrampolineSize << "\n"
- << " indirect stub size = " << IndirectStubSize
- << "\n");
- return std::make_tuple(ProcessTriple, PointerSize, PageSize, TrampolineSize,
- IndirectStubSize);
- }
-
- Expected<std::vector<uint8_t>> handleReadMem(JITTargetAddress RSrc,
- uint64_t Size) {
- uint8_t *Src = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(RSrc));
-
- LLVM_DEBUG(dbgs() << " Reading " << Size << " bytes from "
- << format("0x%016x", RSrc) << "\n");
-
- std::vector<uint8_t> Buffer;
- Buffer.resize(Size);
- for (uint8_t *P = Src; Size != 0; --Size)
- Buffer.push_back(*P++);
-
- return Buffer;
- }
-
- Error handleRegisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
- uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
- LLVM_DEBUG(dbgs() << " Registering EH frames at "
- << format("0x%016x", TAddr) << ", Size = " << Size
- << " bytes\n");
- EHFramesRegister(Addr, Size);
- return Error::success();
- }
-
- Expected<JITTargetAddress> handleReserveMem(ResourceIdMgr::ResourceId Id,
- uint64_t Size, uint32_t Align) {
- auto I = Allocators.find(Id);
- if (I == Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
- auto &Allocator = I->second;
- void *LocalAllocAddr = nullptr;
- if (auto Err = Allocator.allocate(LocalAllocAddr, Size, Align))
- return std::move(Err);
-
- LLVM_DEBUG(dbgs() << " Allocator " << Id << " reserved " << LocalAllocAddr
- << " (" << Size << " bytes, alignment " << Align
- << ")\n");
-
- JITTargetAddress AllocAddr = static_cast<JITTargetAddress>(
- reinterpret_cast<uintptr_t>(LocalAllocAddr));
-
- return AllocAddr;
- }
-
- Error handleSetProtections(ResourceIdMgr::ResourceId Id,
- JITTargetAddress Addr, uint32_t Flags) {
- auto I = Allocators.find(Id);
- if (I == Allocators.end())
- return errorCodeToError(
- orcError(OrcErrorCode::RemoteAllocatorDoesNotExist));
- auto &Allocator = I->second;
- void *LocalAddr = reinterpret_cast<void *>(static_cast<uintptr_t>(Addr));
- LLVM_DEBUG(dbgs() << " Allocator " << Id << " set permissions on "
- << LocalAddr << " to "
- << (Flags & sys::Memory::MF_READ ? 'R' : '-')
- << (Flags & sys::Memory::MF_WRITE ? 'W' : '-')
- << (Flags & sys::Memory::MF_EXEC ? 'X' : '-') << "\n");
- return Allocator.setProtections(LocalAddr, Flags);
- }
-
- Error handleTerminateSession() {
- TerminateFlag = true;
- return Error::success();
- }
-
- Error handleWriteMem(DirectBufferWriter DBW) {
- LLVM_DEBUG(dbgs() << " Writing " << DBW.getSize() << " bytes to "
- << format("0x%016x", DBW.getDst()) << "\n");
- return Error::success();
- }
-
- Error handleWritePtr(JITTargetAddress Addr, JITTargetAddress PtrVal) {
- LLVM_DEBUG(dbgs() << " Writing pointer *" << format("0x%016x", Addr)
- << " = " << format("0x%016x", PtrVal) << "\n");
- uintptr_t *Ptr =
- reinterpret_cast<uintptr_t *>(static_cast<uintptr_t>(Addr));
- *Ptr = static_cast<uintptr_t>(PtrVal);
- return Error::success();
- }
-
- SymbolLookupFtor SymbolLookup;
- EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister;
- std::map<ResourceIdMgr::ResourceId, Allocator> Allocators;
- using ISBlockOwnerList = std::vector<LocalIndirectStubsInfo<TargetT>>;
- std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners;
- sys::OwningMemoryBlock ResolverBlock;
- std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
- bool TerminateFlag = false;
-};
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
-
-#undef DEBUG_TYPE
-
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
index 78a6623d7594..3c0b2b9edd52 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
@@ -34,25 +34,26 @@ private:
};
/// Represents an address in the executor process.
-class ExecutorAddress {
+class ExecutorAddr {
public:
- ExecutorAddress() = default;
- explicit ExecutorAddress(uint64_t Addr) : Addr(Addr) {}
+ ExecutorAddr() = default;
- /// Create an ExecutorAddress from the given pointer.
+ /// Create an ExecutorAddr from the given value.
+ explicit ExecutorAddr(uint64_t Addr) : Addr(Addr) {}
+
+ /// Create an ExecutorAddr from the given pointer.
/// Warning: This should only be used when JITing in-process.
- template <typename T> static ExecutorAddress fromPtr(T *Value) {
- return ExecutorAddress(
+ template <typename T> static ExecutorAddr fromPtr(T *Value) {
+ return ExecutorAddr(
static_cast<uint64_t>(reinterpret_cast<uintptr_t>(Value)));
}
- /// Cast this ExecutorAddress to a pointer of the given type.
- /// Warning: This should only be esude when JITing in-process.
+ /// Cast this ExecutorAddr to a pointer of the given type.
+ /// Warning: This should only be used when JITing in-process.
template <typename T> T toPtr() const {
static_assert(std::is_pointer<T>::value, "T must be a pointer type");
uintptr_t IntPtr = static_cast<uintptr_t>(Addr);
- assert(IntPtr == Addr &&
- "JITTargetAddress value out of range for uintptr_t");
+ assert(IntPtr == Addr && "ExecutorAddr value out of range for uintptr_t");
return reinterpret_cast<T>(IntPtr);
}
@@ -62,53 +63,47 @@ public:
explicit operator bool() const { return Addr != 0; }
- friend bool operator==(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator==(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr == RHS.Addr;
}
- friend bool operator!=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator!=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr != RHS.Addr;
}
- friend bool operator<(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator<(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr < RHS.Addr;
}
- friend bool operator<=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator<=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr <= RHS.Addr;
}
- friend bool operator>(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator>(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr > RHS.Addr;
}
- friend bool operator>=(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+ friend bool operator>=(const ExecutorAddr &LHS, const ExecutorAddr &RHS) {
return LHS.Addr >= RHS.Addr;
}
- ExecutorAddress &operator++() {
+ ExecutorAddr &operator++() {
++Addr;
return *this;
}
- ExecutorAddress &operator--() {
+ ExecutorAddr &operator--() {
--Addr;
return *this;
}
- ExecutorAddress operator++(int) { return ExecutorAddress(Addr++); }
- ExecutorAddress operator--(int) { return ExecutorAddress(Addr++); }
+ ExecutorAddr operator++(int) { return ExecutorAddr(Addr++); }
+ ExecutorAddr operator--(int) { return ExecutorAddr(Addr--); }
- ExecutorAddress &operator+=(const ExecutorAddrDiff Delta) {
+ ExecutorAddr &operator+=(const ExecutorAddrDiff Delta) {
Addr += Delta.getValue();
return *this;
}
- ExecutorAddress &operator-=(const ExecutorAddrDiff Delta) {
+ ExecutorAddr &operator-=(const ExecutorAddrDiff Delta) {
Addr -= Delta.getValue();
return *this;
}
@@ -118,83 +113,98 @@ private:
};
/// Subtracting two addresses yields an offset.
-inline ExecutorAddrDiff operator-(const ExecutorAddress &LHS,
- const ExecutorAddress &RHS) {
+inline ExecutorAddrDiff operator-(const ExecutorAddr &LHS,
+ const ExecutorAddr &RHS) {
return ExecutorAddrDiff(LHS.getValue() - RHS.getValue());
}
/// Adding an offset and an address yields an address.
-inline ExecutorAddress operator+(const ExecutorAddress &LHS,
- const ExecutorAddrDiff &RHS) {
- return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddr &LHS,
+ const ExecutorAddrDiff &RHS) {
+ return ExecutorAddr(LHS.getValue() + RHS.getValue());
}
/// Adding an address and an offset yields an address.
-inline ExecutorAddress operator+(const ExecutorAddrDiff &LHS,
- const ExecutorAddress &RHS) {
- return ExecutorAddress(LHS.getValue() + RHS.getValue());
+inline ExecutorAddr operator+(const ExecutorAddrDiff &LHS,
+ const ExecutorAddr &RHS) {
+ return ExecutorAddr(LHS.getValue() + RHS.getValue());
}
/// Represents an address range in the exceutor process.
-struct ExecutorAddressRange {
- ExecutorAddressRange() = default;
- ExecutorAddressRange(ExecutorAddress StartAddress, ExecutorAddress EndAddress)
- : StartAddress(StartAddress), EndAddress(EndAddress) {}
+struct ExecutorAddrRange {
+ ExecutorAddrRange() = default;
+ ExecutorAddrRange(ExecutorAddr Start, ExecutorAddr End)
+ : Start(Start), End(End) {}
+ ExecutorAddrRange(ExecutorAddr Start, ExecutorAddrDiff Size)
+ : Start(Start), End(Start + Size) {}
- bool empty() const { return StartAddress == EndAddress; }
- ExecutorAddrDiff size() const { return EndAddress - StartAddress; }
+ bool empty() const { return Start == End; }
+ ExecutorAddrDiff size() const { return End - Start; }
- ExecutorAddress StartAddress;
- ExecutorAddress EndAddress;
+ friend bool operator==(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return LHS.Start == RHS.Start && LHS.End == RHS.End;
+ }
+ friend bool operator!=(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return !(LHS == RHS);
+ }
+ bool contains(ExecutorAddr Addr) const { return Start <= Addr && Addr < End; }
+ bool overlaps(const ExecutorAddrRange &Other) {
+ return !(Other.End <= Start || End <= Other.Start);
+ }
+
+ ExecutorAddr Start;
+ ExecutorAddr End;
};
namespace shared {
-/// SPS serializatior for ExecutorAddress.
-template <> class SPSSerializationTraits<SPSExecutorAddress, ExecutorAddress> {
+class SPSExecutorAddr {};
+
+/// SPS serializatior for ExecutorAddr.
+template <> class SPSSerializationTraits<SPSExecutorAddr, ExecutorAddr> {
public:
- static size_t size(const ExecutorAddress &EA) {
+ static size_t size(const ExecutorAddr &EA) {
return SPSArgList<uint64_t>::size(EA.getValue());
}
- static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddress &EA) {
+ static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddr &EA) {
return SPSArgList<uint64_t>::serialize(BOB, EA.getValue());
}
- static bool deserialize(SPSInputBuffer &BIB, ExecutorAddress &EA) {
+ static bool deserialize(SPSInputBuffer &BIB, ExecutorAddr &EA) {
uint64_t Tmp;
if (!SPSArgList<uint64_t>::deserialize(BIB, Tmp))
return false;
- EA = ExecutorAddress(Tmp);
+ EA = ExecutorAddr(Tmp);
return true;
}
};
-using SPSExecutorAddressRange =
- SPSTuple<SPSExecutorAddress, SPSExecutorAddress>;
+using SPSExecutorAddrRange = SPSTuple<SPSExecutorAddr, SPSExecutorAddr>;
/// Serialization traits for address ranges.
template <>
-class SPSSerializationTraits<SPSExecutorAddressRange, ExecutorAddressRange> {
+class SPSSerializationTraits<SPSExecutorAddrRange, ExecutorAddrRange> {
public:
- static size_t size(const ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::size(
- Value.StartAddress, Value.EndAddress);
+ static size_t size(const ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::size(Value.Start,
+ Value.End);
}
- static bool serialize(SPSOutputBuffer &BOB,
- const ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::serialize(
- BOB, Value.StartAddress, Value.EndAddress);
+ static bool serialize(SPSOutputBuffer &BOB, const ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::serialize(
+ BOB, Value.Start, Value.End);
}
- static bool deserialize(SPSInputBuffer &BIB, ExecutorAddressRange &Value) {
- return SPSArgList<SPSExecutorAddress, SPSExecutorAddress>::deserialize(
- BIB, Value.StartAddress, Value.EndAddress);
+ static bool deserialize(SPSInputBuffer &BIB, ExecutorAddrRange &Value) {
+ return SPSArgList<SPSExecutorAddr, SPSExecutorAddr>::deserialize(
+ BIB, Value.Start, Value.End);
}
};
-using SPSExecutorAddressRangeSequence = SPSSequence<SPSExecutorAddressRange>;
+using SPSExecutorAddrRangeSequence = SPSSequence<SPSExecutorAddrRange>;
} // End namespace shared.
} // End namespace orc.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
deleted file mode 100644
index 3f96fe3da49d..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===- FDRawByteChannel.h - File descriptor based byte-channel -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// File descriptor based RawByteChannel.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
-
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#else
-#include <io.h>
-#endif
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Serialization channel that reads from and writes from file descriptors.
-class FDRawByteChannel final : public RawByteChannel {
-public:
- FDRawByteChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
-
- llvm::Error readBytes(char *Dst, unsigned Size) override {
- assert(Dst && "Attempt to read into null.");
- ssize_t Completed = 0;
- while (Completed < static_cast<ssize_t>(Size)) {
- ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed);
- if (Read <= 0) {
- auto ErrNo = errno;
- if (ErrNo == EAGAIN || ErrNo == EINTR)
- continue;
- else
- return llvm::errorCodeToError(
- std::error_code(errno, std::generic_category()));
- }
- Completed += Read;
- }
- return llvm::Error::success();
- }
-
- llvm::Error appendBytes(const char *Src, unsigned Size) override {
- assert(Src && "Attempt to append from null.");
- ssize_t Completed = 0;
- while (Completed < static_cast<ssize_t>(Size)) {
- ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed);
- if (Written < 0) {
- auto ErrNo = errno;
- if (ErrNo == EAGAIN || ErrNo == EINTR)
- continue;
- else
- return llvm::errorCodeToError(
- std::error_code(errno, std::generic_category()));
- }
- Completed += Written;
- }
- return llvm::Error::success();
- }
-
- llvm::Error send() override { return llvm::Error::success(); }
-
-private:
- int InFD, OutFD;
-};
-
-} // namespace shared
-} // namespace orc
-} // namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_FDRAWBYTECHANNEL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
new file mode 100644
index 000000000000..3ef43f33d84c
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h
@@ -0,0 +1,68 @@
+//===---- OrcRTBridge.h -- Utils for interacting with orc-rt ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Declares types and symbol names provided by the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+
+namespace llvm {
+namespace orc {
+namespace rt {
+
+extern const char *SimpleExecutorDylibManagerInstanceName;
+extern const char *SimpleExecutorDylibManagerOpenWrapperName;
+extern const char *SimpleExecutorDylibManagerLookupWrapperName;
+
+extern const char *SimpleExecutorMemoryManagerInstanceName;
+extern const char *SimpleExecutorMemoryManagerReserveWrapperName;
+extern const char *SimpleExecutorMemoryManagerFinalizeWrapperName;
+extern const char *SimpleExecutorMemoryManagerDeallocateWrapperName;
+
+extern const char *MemoryWriteUInt8sWrapperName;
+extern const char *MemoryWriteUInt16sWrapperName;
+extern const char *MemoryWriteUInt32sWrapperName;
+extern const char *MemoryWriteUInt64sWrapperName;
+extern const char *MemoryWriteBuffersWrapperName;
+
+extern const char *RegisterEHFrameSectionCustomDirectWrapperName;
+extern const char *DeregisterEHFrameSectionCustomDirectWrapperName;
+
+extern const char *RunAsMainWrapperName;
+
+using SPSSimpleExecutorDylibManagerOpenSignature =
+ shared::SPSExpected<uint64_t>(shared::SPSExecutorAddr, shared::SPSString,
+ uint64_t);
+
+using SPSSimpleExecutorDylibManagerLookupSignature =
+ shared::SPSExpected<shared::SPSSequence<shared::SPSExecutorAddr>>(
+ shared::SPSExecutorAddr, uint64_t, shared::SPSRemoteSymbolLookupSet);
+
+using SPSSimpleExecutorMemoryManagerReserveSignature =
+ shared::SPSExpected<shared::SPSExecutorAddr>(shared::SPSExecutorAddr,
+ uint64_t);
+using SPSSimpleExecutorMemoryManagerFinalizeSignature =
+ shared::SPSError(shared::SPSExecutorAddr, shared::SPSFinalizeRequest);
+using SPSSimpleExecutorMemoryManagerDeallocateSignature = shared::SPSError(
+ shared::SPSExecutorAddr, shared::SPSSequence<shared::SPSExecutorAddr>);
+
+using SPSRunAsMainSignature = int64_t(shared::SPSExecutorAddr,
+ shared::SPSSequence<shared::SPSString>);
+
+} // end namespace rt
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_ORCRTBRIDGE_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
deleted file mode 100644
index 1ff47ce42758..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RPCUtils.h
+++ /dev/null
@@ -1,1659 +0,0 @@
-//===- RPCUtils.h - Utilities for building RPC APIs -------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Utilities to support construction of simple RPC APIs.
-//
-// The RPC utilities aim for ease of use (minimal conceptual overhead) for C++
-// programmers, high performance, low memory overhead, and efficient use of the
-// communications channel.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
-
-#include <map>
-#include <thread>
-#include <vector>
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
-#include "llvm/Support/MSVCErrorWorkarounds.h"
-
-#include <future>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Base class of all fatal RPC errors (those that necessarily result in the
-/// termination of the RPC session).
-class RPCFatalError : public ErrorInfo<RPCFatalError> {
-public:
- static char ID;
-};
-
-/// RPCConnectionClosed is returned from RPC operations if the RPC connection
-/// has already been closed due to either an error or graceful disconnection.
-class ConnectionClosed : public ErrorInfo<ConnectionClosed> {
-public:
- static char ID;
- std::error_code convertToErrorCode() const override;
- void log(raw_ostream &OS) const override;
-};
-
-/// BadFunctionCall is returned from handleOne when the remote makes a call with
-/// an unrecognized function id.
-///
-/// This error is fatal because Orc RPC needs to know how to parse a function
-/// call to know where the next call starts, and if it doesn't recognize the
-/// function id it cannot parse the call.
-template <typename FnIdT, typename SeqNoT>
-class BadFunctionCall
- : public ErrorInfo<BadFunctionCall<FnIdT, SeqNoT>, RPCFatalError> {
-public:
- static char ID;
-
- BadFunctionCall(FnIdT FnId, SeqNoT SeqNo)
- : FnId(std::move(FnId)), SeqNo(std::move(SeqNo)) {}
-
- std::error_code convertToErrorCode() const override {
- return orcError(OrcErrorCode::UnexpectedRPCCall);
- }
-
- void log(raw_ostream &OS) const override {
- OS << "Call to invalid RPC function id '" << FnId
- << "' with "
- "sequence number "
- << SeqNo;
- }
-
-private:
- FnIdT FnId;
- SeqNoT SeqNo;
-};
-
-template <typename FnIdT, typename SeqNoT>
-char BadFunctionCall<FnIdT, SeqNoT>::ID = 0;
-
-/// InvalidSequenceNumberForResponse is returned from handleOne when a response
-/// call arrives with a sequence number that doesn't correspond to any in-flight
-/// function call.
-///
-/// This error is fatal because Orc RPC needs to know how to parse the rest of
-/// the response call to know where the next call starts, and if it doesn't have
-/// a result parser for this sequence number it can't do that.
-template <typename SeqNoT>
-class InvalidSequenceNumberForResponse
- : public ErrorInfo<InvalidSequenceNumberForResponse<SeqNoT>,
- RPCFatalError> {
-public:
- static char ID;
-
- InvalidSequenceNumberForResponse(SeqNoT SeqNo) : SeqNo(std::move(SeqNo)) {}
-
- std::error_code convertToErrorCode() const override {
- return orcError(OrcErrorCode::UnexpectedRPCCall);
- };
-
- void log(raw_ostream &OS) const override {
- OS << "Response has unknown sequence number " << SeqNo;
- }
-
-private:
- SeqNoT SeqNo;
-};
-
-template <typename SeqNoT>
-char InvalidSequenceNumberForResponse<SeqNoT>::ID = 0;
-
-/// This non-fatal error will be passed to asynchronous result handlers in place
-/// of a result if the connection goes down before a result returns, or if the
-/// function to be called cannot be negotiated with the remote.
-class ResponseAbandoned : public ErrorInfo<ResponseAbandoned> {
-public:
- static char ID;
-
- std::error_code convertToErrorCode() const override;
- void log(raw_ostream &OS) const override;
-};
-
-/// This error is returned if the remote does not have a handler installed for
-/// the given RPC function.
-class CouldNotNegotiate : public ErrorInfo<CouldNotNegotiate> {
-public:
- static char ID;
-
- CouldNotNegotiate(std::string Signature);
- std::error_code convertToErrorCode() const override;
- void log(raw_ostream &OS) const override;
- const std::string &getSignature() const { return Signature; }
-
-private:
- std::string Signature;
-};
-
-template <typename DerivedFunc, typename FnT> class RPCFunction;
-
-// RPC Function class.
-// DerivedFunc should be a user defined class with a static 'getName()' method
-// returning a const char* representing the function's name.
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-class RPCFunction<DerivedFunc, RetT(ArgTs...)> {
-public:
- /// User defined function type.
- using Type = RetT(ArgTs...);
-
- /// Return type.
- using ReturnType = RetT;
-
- /// Returns the full function prototype as a string.
- static const char *getPrototype() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << SerializationTypeName<RetT>::getName() << " "
- << DerivedFunc::getName() << "("
- << SerializationTypeNameSequence<ArgTs...>() << ")";
- return Name;
- }();
- return Name.data();
- }
-};
-
-/// Allocates RPC function ids during autonegotiation.
-/// Specializations of this class must provide four members:
-///
-/// static T getInvalidId():
-/// Should return a reserved id that will be used to represent missing
-/// functions during autonegotiation.
-///
-/// static T getResponseId():
-/// Should return a reserved id that will be used to send function responses
-/// (return values).
-///
-/// static T getNegotiateId():
-/// Should return a reserved id for the negotiate function, which will be used
-/// to negotiate ids for user defined functions.
-///
-/// template <typename Func> T allocate():
-/// Allocate a unique id for function Func.
-template <typename T, typename = void> class RPCFunctionIdAllocator;
-
-/// This specialization of RPCFunctionIdAllocator provides a default
-/// implementation for integral types.
-template <typename T>
-class RPCFunctionIdAllocator<T, std::enable_if_t<std::is_integral<T>::value>> {
-public:
- static T getInvalidId() { return T(0); }
- static T getResponseId() { return T(1); }
- static T getNegotiateId() { return T(2); }
-
- template <typename Func> T allocate() { return NextId++; }
-
-private:
- T NextId = 3;
-};
-
-namespace detail {
-
-/// Provides a typedef for a tuple containing the decayed argument types.
-template <typename T> class RPCFunctionArgsTuple;
-
-template <typename RetT, typename... ArgTs>
-class RPCFunctionArgsTuple<RetT(ArgTs...)> {
-public:
- using Type = std::tuple<std::decay_t<std::remove_reference_t<ArgTs>>...>;
-};
-
-// ResultTraits provides typedefs and utilities specific to the return type
-// of functions.
-template <typename RetT> class ResultTraits {
-public:
- // The return type wrapped in llvm::Expected.
- using ErrorReturnType = Expected<RetT>;
-
-#ifdef _MSC_VER
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<MSVCPExpected<RetT>>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<MSVCPExpected<RetT>>;
-#else
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<ErrorReturnType>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<ErrorReturnType>;
-#endif
-
- // Create a 'blank' value of the ErrorReturnType, ready and safe to
- // overwrite.
- static ErrorReturnType createBlankErrorReturnValue() {
- return ErrorReturnType(RetT());
- }
-
- // Consume an abandoned ErrorReturnType.
- static void consumeAbandoned(ErrorReturnType RetOrErr) {
- consumeError(RetOrErr.takeError());
- }
-
- static ErrorReturnType returnError(Error Err) { return std::move(Err); }
-};
-
-// ResultTraits specialization for void functions.
-template <> class ResultTraits<void> {
-public:
- // For void functions, ErrorReturnType is llvm::Error.
- using ErrorReturnType = Error;
-
-#ifdef _MSC_VER
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<MSVCPError>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<MSVCPError>;
-#else
- // The ErrorReturnType wrapped in a std::promise.
- using ReturnPromiseType = std::promise<ErrorReturnType>;
-
- // The ErrorReturnType wrapped in a std::future.
- using ReturnFutureType = std::future<ErrorReturnType>;
-#endif
-
- // Create a 'blank' value of the ErrorReturnType, ready and safe to
- // overwrite.
- static ErrorReturnType createBlankErrorReturnValue() {
- return ErrorReturnType::success();
- }
-
- // Consume an abandoned ErrorReturnType.
- static void consumeAbandoned(ErrorReturnType Err) {
- consumeError(std::move(Err));
- }
-
- static ErrorReturnType returnError(Error Err) { return Err; }
-};
-
-// ResultTraits<Error> is equivalent to ResultTraits<void>. This allows
-// handlers for void RPC functions to return either void (in which case they
-// implicitly succeed) or Error (in which case their error return is
-// propagated). See usage in HandlerTraits::runHandlerHelper.
-template <> class ResultTraits<Error> : public ResultTraits<void> {};
-
-// ResultTraits<Expected<T>> is equivalent to ResultTraits<T>. This allows
-// handlers for RPC functions returning a T to return either a T (in which
-// case they implicitly succeed) or Expected<T> (in which case their error
-// return is propagated). See usage in HandlerTraits::runHandlerHelper.
-template <typename RetT>
-class ResultTraits<Expected<RetT>> : public ResultTraits<RetT> {};
-
-// Determines whether an RPC function's defined error return type supports
-// error return value.
-template <typename T> class SupportsErrorReturn {
-public:
- static const bool value = false;
-};
-
-template <> class SupportsErrorReturn<Error> {
-public:
- static const bool value = true;
-};
-
-template <typename T> class SupportsErrorReturn<Expected<T>> {
-public:
- static const bool value = true;
-};
-
-// RespondHelper packages return values based on whether or not the declared
-// RPC function return type supports error returns.
-template <bool FuncSupportsErrorReturn> class RespondHelper;
-
-// RespondHelper specialization for functions that support error returns.
-template <> class RespondHelper<true> {
-public:
- // Send Expected<T>.
- template <typename WireRetT, typename HandlerRetT, typename ChannelT,
- typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo,
- Expected<HandlerRetT> ResultOrErr) {
- if (!ResultOrErr && ResultOrErr.template errorIsA<RPCFatalError>())
- return ResultOrErr.takeError();
-
- // Open the response message.
- if (auto Err = C.startSendMessage(ResponseId, SeqNo))
- return Err;
-
- // Serialize the result.
- if (auto Err =
- SerializationTraits<ChannelT, WireRetT, Expected<HandlerRetT>>::
- serialize(C, std::move(ResultOrErr)))
- return Err;
-
- // Close the response message.
- if (auto Err = C.endSendMessage())
- return Err;
- return C.send();
- }
-
- template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo, Error Err) {
- if (Err && Err.isA<RPCFatalError>())
- return Err;
- if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
- return Err2;
- if (auto Err2 = serializeSeq(C, std::move(Err)))
- return Err2;
- if (auto Err2 = C.endSendMessage())
- return Err2;
- return C.send();
- }
-};
-
-// RespondHelper specialization for functions that do not support error returns.
-template <> class RespondHelper<false> {
-public:
- template <typename WireRetT, typename HandlerRetT, typename ChannelT,
- typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo,
- Expected<HandlerRetT> ResultOrErr) {
- if (auto Err = ResultOrErr.takeError())
- return Err;
-
- // Open the response message.
- if (auto Err = C.startSendMessage(ResponseId, SeqNo))
- return Err;
-
- // Serialize the result.
- if (auto Err =
- SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize(
- C, *ResultOrErr))
- return Err;
-
- // End the response message.
- if (auto Err = C.endSendMessage())
- return Err;
-
- return C.send();
- }
-
- template <typename ChannelT, typename FunctionIdT, typename SequenceNumberT>
- static Error sendResult(ChannelT &C, const FunctionIdT &ResponseId,
- SequenceNumberT SeqNo, Error Err) {
- if (Err)
- return Err;
- if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
- return Err2;
- if (auto Err2 = C.endSendMessage())
- return Err2;
- return C.send();
- }
-};
-
-// Send a response of the given wire return type (WireRetT) over the
-// channel, with the given sequence number.
-template <typename WireRetT, typename HandlerRetT, typename ChannelT,
- typename FunctionIdT, typename SequenceNumberT>
-Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
- Expected<HandlerRetT> ResultOrErr) {
- return RespondHelper<SupportsErrorReturn<WireRetT>::value>::
- template sendResult<WireRetT>(C, ResponseId, SeqNo,
- std::move(ResultOrErr));
-}
-
-// Send an empty response message on the given channel to indicate that
-// the handler ran.
-template <typename WireRetT, typename ChannelT, typename FunctionIdT,
- typename SequenceNumberT>
-Error respond(ChannelT &C, const FunctionIdT &ResponseId, SequenceNumberT SeqNo,
- Error Err) {
- return RespondHelper<SupportsErrorReturn<WireRetT>::value>::sendResult(
- C, ResponseId, SeqNo, std::move(Err));
-}
-
-// Converts a given type to the equivalent error return type.
-template <typename T> class WrappedHandlerReturn {
-public:
- using Type = Expected<T>;
-};
-
-template <typename T> class WrappedHandlerReturn<Expected<T>> {
-public:
- using Type = Expected<T>;
-};
-
-template <> class WrappedHandlerReturn<void> {
-public:
- using Type = Error;
-};
-
-template <> class WrappedHandlerReturn<Error> {
-public:
- using Type = Error;
-};
-
-template <> class WrappedHandlerReturn<ErrorSuccess> {
-public:
- using Type = Error;
-};
-
-// Traits class that strips the response function from the list of handler
-// arguments.
-template <typename FnT> class AsyncHandlerTraits;
-
-template <typename ResultT, typename... ArgTs>
-class AsyncHandlerTraits<Error(std::function<Error(Expected<ResultT>)>,
- ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Expected<ResultT>;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<Error(std::function<Error(Error)>, ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Error;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<ErrorSuccess(std::function<Error(Error)>, ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Error;
-};
-
-template <typename... ArgTs>
-class AsyncHandlerTraits<void(std::function<Error(Error)>, ArgTs...)> {
-public:
- using Type = Error(ArgTs...);
- using ResultType = Error;
-};
-
-template <typename ResponseHandlerT, typename... ArgTs>
-class AsyncHandlerTraits<Error(ResponseHandlerT, ArgTs...)>
- : public AsyncHandlerTraits<Error(std::decay_t<ResponseHandlerT>,
- ArgTs...)> {};
-
-// This template class provides utilities related to RPC function handlers.
-// The base case applies to non-function types (the template class is
-// specialized for function types) and inherits from the appropriate
-// speciilization for the given non-function type's call operator.
-template <typename HandlerT>
-class HandlerTraits
- : public HandlerTraits<
- decltype(&std::remove_reference<HandlerT>::type::operator())> {};
-
-// Traits for handlers with a given function type.
-template <typename RetT, typename... ArgTs>
-class HandlerTraits<RetT(ArgTs...)> {
-public:
- // Function type of the handler.
- using Type = RetT(ArgTs...);
-
- // Return type of the handler.
- using ReturnType = RetT;
-
- // Call the given handler with the given arguments.
- template <typename HandlerT, typename... TArgTs>
- static typename WrappedHandlerReturn<RetT>::Type
- unpackAndRun(HandlerT &Handler, std::tuple<TArgTs...> &Args) {
- return unpackAndRunHelper(Handler, Args,
- std::index_sequence_for<TArgTs...>());
- }
-
- // Call the given handler with the given arguments.
- template <typename HandlerT, typename ResponderT, typename... TArgTs>
- static Error unpackAndRunAsync(HandlerT &Handler, ResponderT &Responder,
- std::tuple<TArgTs...> &Args) {
- return unpackAndRunAsyncHelper(Handler, Responder, Args,
- std::index_sequence_for<TArgTs...>());
- }
-
- // Call the given handler with the given arguments.
- template <typename HandlerT>
- static std::enable_if_t<
- std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value, Error>
- run(HandlerT &Handler, ArgTs &&...Args) {
- Handler(std::move(Args)...);
- return Error::success();
- }
-
- template <typename HandlerT, typename... TArgTs>
- static std::enable_if_t<
- !std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value,
- typename HandlerTraits<HandlerT>::ReturnType>
- run(HandlerT &Handler, TArgTs... Args) {
- return Handler(std::move(Args)...);
- }
-
- // Serialize arguments to the channel.
- template <typename ChannelT, typename... CArgTs>
- static Error serializeArgs(ChannelT &C, const CArgTs... CArgs) {
- return SequenceSerialization<ChannelT, ArgTs...>::serialize(C, CArgs...);
- }
-
- // Deserialize arguments from the channel.
- template <typename ChannelT, typename... CArgTs>
- static Error deserializeArgs(ChannelT &C, std::tuple<CArgTs...> &Args) {
- return deserializeArgsHelper(C, Args, std::index_sequence_for<CArgTs...>());
- }
-
-private:
- template <typename ChannelT, typename... CArgTs, size_t... Indexes>
- static Error deserializeArgsHelper(ChannelT &C, std::tuple<CArgTs...> &Args,
- std::index_sequence<Indexes...> _) {
- return SequenceSerialization<ChannelT, ArgTs...>::deserialize(
- C, std::get<Indexes>(Args)...);
- }
-
- template <typename HandlerT, typename ArgTuple, size_t... Indexes>
- static typename WrappedHandlerReturn<
- typename HandlerTraits<HandlerT>::ReturnType>::Type
- unpackAndRunHelper(HandlerT &Handler, ArgTuple &Args,
- std::index_sequence<Indexes...>) {
- return run(Handler, std::move(std::get<Indexes>(Args))...);
- }
-
- template <typename HandlerT, typename ResponderT, typename ArgTuple,
- size_t... Indexes>
- static typename WrappedHandlerReturn<
- typename HandlerTraits<HandlerT>::ReturnType>::Type
- unpackAndRunAsyncHelper(HandlerT &Handler, ResponderT &Responder,
- ArgTuple &Args, std::index_sequence<Indexes...>) {
- return run(Handler, Responder, std::move(std::get<Indexes>(Args))...);
- }
-};
-
-// Handler traits for free functions.
-template <typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (*)(ArgTs...)> : public HandlerTraits<RetT(ArgTs...)> {
-};
-
-// Handler traits for class methods (especially call operators for lambdas).
-template <typename Class, typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (Class::*)(ArgTs...)>
- : public HandlerTraits<RetT(ArgTs...)> {};
-
-// Handler traits for const class methods (especially call operators for
-// lambdas).
-template <typename Class, typename RetT, typename... ArgTs>
-class HandlerTraits<RetT (Class::*)(ArgTs...) const>
- : public HandlerTraits<RetT(ArgTs...)> {};
-
-// Utility to peel the Expected wrapper off a response handler error type.
-template <typename HandlerT> class ResponseHandlerArg;
-
-template <typename ArgT> class ResponseHandlerArg<Error(Expected<ArgT>)> {
-public:
- using ArgType = Expected<ArgT>;
- using UnwrappedArgType = ArgT;
-};
-
-template <typename ArgT>
-class ResponseHandlerArg<ErrorSuccess(Expected<ArgT>)> {
-public:
- using ArgType = Expected<ArgT>;
- using UnwrappedArgType = ArgT;
-};
-
-template <> class ResponseHandlerArg<Error(Error)> {
-public:
- using ArgType = Error;
-};
-
-template <> class ResponseHandlerArg<ErrorSuccess(Error)> {
-public:
- using ArgType = Error;
-};
-
-// ResponseHandler represents a handler for a not-yet-received function call
-// result.
-template <typename ChannelT> class ResponseHandler {
-public:
- virtual ~ResponseHandler() {}
-
- // Reads the function result off the wire and acts on it. The meaning of
- // "act" will depend on how this method is implemented in any given
- // ResponseHandler subclass but could, for example, mean running a
- // user-specified handler or setting a promise value.
- virtual Error handleResponse(ChannelT &C) = 0;
-
- // Abandons this outstanding result.
- virtual void abandon() = 0;
-
- // Create an error instance representing an abandoned response.
- static Error createAbandonedResponseError() {
- return make_error<ResponseAbandoned>();
- }
-};
-
-// ResponseHandler subclass for RPC functions with non-void returns.
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-class ResponseHandlerImpl : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result by deserializing it from the channel then passing it
- // to the user defined handler.
- Error handleResponse(ChannelT &C) override {
- using UnwrappedArgType = typename ResponseHandlerArg<
- typename HandlerTraits<HandlerT>::Type>::UnwrappedArgType;
- UnwrappedArgType Result;
- if (auto Err =
- SerializationTraits<ChannelT, FuncRetT,
- UnwrappedArgType>::deserialize(C, Result))
- return Err;
- if (auto Err = C.endReceiveMessage())
- return Err;
- return Handler(std::move(Result));
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-// ResponseHandler subclass for RPC functions with void returns.
-template <typename ChannelT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, void, HandlerT>
- : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result (no actual value, just a notification that the function
- // has completed on the remote end) by calling the user-defined handler with
- // Error::success().
- Error handleResponse(ChannelT &C) override {
- if (auto Err = C.endReceiveMessage())
- return Err;
- return Handler(Error::success());
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, Expected<FuncRetT>, HandlerT>
- : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result by deserializing it from the channel then passing it
- // to the user defined handler.
- Error handleResponse(ChannelT &C) override {
- using HandlerArgType = typename ResponseHandlerArg<
- typename HandlerTraits<HandlerT>::Type>::ArgType;
- HandlerArgType Result((typename HandlerArgType::value_type()));
-
- if (auto Err = SerializationTraits<ChannelT, Expected<FuncRetT>,
- HandlerArgType>::deserialize(C, Result))
- return Err;
- if (auto Err = C.endReceiveMessage())
- return Err;
- return Handler(std::move(Result));
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-template <typename ChannelT, typename HandlerT>
-class ResponseHandlerImpl<ChannelT, Error, HandlerT>
- : public ResponseHandler<ChannelT> {
-public:
- ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
-
- // Handle the result by deserializing it from the channel then passing it
- // to the user defined handler.
- Error handleResponse(ChannelT &C) override {
- Error Result = Error::success();
- if (auto Err = SerializationTraits<ChannelT, Error, Error>::deserialize(
- C, Result)) {
- consumeError(std::move(Result));
- return Err;
- }
- if (auto Err = C.endReceiveMessage()) {
- consumeError(std::move(Result));
- return Err;
- }
- return Handler(std::move(Result));
- }
-
- // Abandon this response by calling the handler with an 'abandoned response'
- // error.
- void abandon() override {
- if (auto Err = Handler(this->createAbandonedResponseError())) {
- // Handlers should not fail when passed an abandoned response error.
- report_fatal_error(std::move(Err));
- }
- }
-
-private:
- HandlerT Handler;
-};
-
-// Create a ResponseHandler from a given user handler.
-template <typename ChannelT, typename FuncRetT, typename HandlerT>
-std::unique_ptr<ResponseHandler<ChannelT>> createResponseHandler(HandlerT H) {
- return std::make_unique<ResponseHandlerImpl<ChannelT, FuncRetT, HandlerT>>(
- std::move(H));
-}
-
-// Helper for wrapping member functions up as functors. This is useful for
-// installing methods as result handlers.
-template <typename ClassT, typename RetT, typename... ArgTs>
-class MemberFnWrapper {
-public:
- using MethodT = RetT (ClassT::*)(ArgTs...);
- MemberFnWrapper(ClassT &Instance, MethodT Method)
- : Instance(Instance), Method(Method) {}
- RetT operator()(ArgTs &&...Args) {
- return (Instance.*Method)(std::move(Args)...);
- }
-
-private:
- ClassT &Instance;
- MethodT Method;
-};
-
-// Helper that provides a Functor for deserializing arguments.
-template <typename... ArgTs> class ReadArgs {
-public:
- Error operator()() { return Error::success(); }
-};
-
-template <typename ArgT, typename... ArgTs>
-class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
-public:
- ReadArgs(ArgT &Arg, ArgTs &...Args) : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
-
- Error operator()(ArgT &ArgVal, ArgTs &...ArgVals) {
- this->Arg = std::move(ArgVal);
- return ReadArgs<ArgTs...>::operator()(ArgVals...);
- }
-
-private:
- ArgT &Arg;
-};
-
-// Manage sequence numbers.
-template <typename SequenceNumberT> class SequenceNumberManager {
-public:
- // Reset, making all sequence numbers available.
- void reset() {
- std::lock_guard<std::mutex> Lock(SeqNoLock);
- NextSequenceNumber = 0;
- FreeSequenceNumbers.clear();
- }
-
- // Get the next available sequence number. Will re-use numbers that have
- // been released.
- SequenceNumberT getSequenceNumber() {
- std::lock_guard<std::mutex> Lock(SeqNoLock);
- if (FreeSequenceNumbers.empty())
- return NextSequenceNumber++;
- auto SequenceNumber = FreeSequenceNumbers.back();
- FreeSequenceNumbers.pop_back();
- return SequenceNumber;
- }
-
- // Release a sequence number, making it available for re-use.
- void releaseSequenceNumber(SequenceNumberT SequenceNumber) {
- std::lock_guard<std::mutex> Lock(SeqNoLock);
- FreeSequenceNumbers.push_back(SequenceNumber);
- }
-
-private:
- std::mutex SeqNoLock;
- SequenceNumberT NextSequenceNumber = 0;
- std::vector<SequenceNumberT> FreeSequenceNumbers;
-};
-
-// Checks that predicate P holds for each corresponding pair of type arguments
-// from T1 and T2 tuple.
-template <template <class, class> class P, typename T1Tuple, typename T2Tuple>
-class RPCArgTypeCheckHelper;
-
-template <template <class, class> class P>
-class RPCArgTypeCheckHelper<P, std::tuple<>, std::tuple<>> {
-public:
- static const bool value = true;
-};
-
-template <template <class, class> class P, typename T, typename... Ts,
- typename U, typename... Us>
-class RPCArgTypeCheckHelper<P, std::tuple<T, Ts...>, std::tuple<U, Us...>> {
-public:
- static const bool value =
- P<T, U>::value &&
- RPCArgTypeCheckHelper<P, std::tuple<Ts...>, std::tuple<Us...>>::value;
-};
-
-template <template <class, class> class P, typename T1Sig, typename T2Sig>
-class RPCArgTypeCheck {
-public:
- using T1Tuple = typename RPCFunctionArgsTuple<T1Sig>::Type;
- using T2Tuple = typename RPCFunctionArgsTuple<T2Sig>::Type;
-
- static_assert(std::tuple_size<T1Tuple>::value >=
- std::tuple_size<T2Tuple>::value,
- "Too many arguments to RPC call");
- static_assert(std::tuple_size<T1Tuple>::value <=
- std::tuple_size<T2Tuple>::value,
- "Too few arguments to RPC call");
-
- static const bool value = RPCArgTypeCheckHelper<P, T1Tuple, T2Tuple>::value;
-};
-
-template <typename ChannelT, typename WireT, typename ConcreteT>
-class CanSerialize {
-private:
- using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
-
- template <typename T>
- static std::true_type check(
- std::enable_if_t<std::is_same<decltype(T::serialize(
- std::declval<ChannelT &>(),
- std::declval<const ConcreteT &>())),
- Error>::value,
- void *>);
-
- template <typename> static std::false_type check(...);
-
-public:
- static const bool value = decltype(check<S>(0))::value;
-};
-
-template <typename ChannelT, typename WireT, typename ConcreteT>
-class CanDeserialize {
-private:
- using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
-
- template <typename T>
- static std::true_type
- check(std::enable_if_t<
- std::is_same<decltype(T::deserialize(std::declval<ChannelT &>(),
- std::declval<ConcreteT &>())),
- Error>::value,
- void *>);
-
- template <typename> static std::false_type check(...);
-
-public:
- static const bool value = decltype(check<S>(0))::value;
-};
-
-/// Contains primitive utilities for defining, calling and handling calls to
-/// remote procedures. ChannelT is a bidirectional stream conforming to the
-/// RPCChannel interface (see RPCChannel.h), FunctionIdT is a procedure
-/// identifier type that must be serializable on ChannelT, and SequenceNumberT
-/// is an integral type that will be used to number in-flight function calls.
-///
-/// These utilities support the construction of very primitive RPC utilities.
-/// Their intent is to ensure correct serialization and deserialization of
-/// procedure arguments, and to keep the client and server's view of the API in
-/// sync.
-template <typename ImplT, typename ChannelT, typename FunctionIdT,
- typename SequenceNumberT>
-class RPCEndpointBase {
-protected:
- class OrcRPCInvalid : public RPCFunction<OrcRPCInvalid, void()> {
- public:
- static const char *getName() { return "__orc_rpc$invalid"; }
- };
-
- class OrcRPCResponse : public RPCFunction<OrcRPCResponse, void()> {
- public:
- static const char *getName() { return "__orc_rpc$response"; }
- };
-
- class OrcRPCNegotiate
- : public RPCFunction<OrcRPCNegotiate, FunctionIdT(std::string)> {
- public:
- static const char *getName() { return "__orc_rpc$negotiate"; }
- };
-
- // Helper predicate for testing for the presence of SerializeTraits
- // serializers.
- template <typename WireT, typename ConcreteT>
- class CanSerializeCheck : detail::CanSerialize<ChannelT, WireT, ConcreteT> {
- public:
- using detail::CanSerialize<ChannelT, WireT, ConcreteT>::value;
-
- static_assert(value, "Missing serializer for argument (Can't serialize the "
- "first template type argument of CanSerializeCheck "
- "from the second)");
- };
-
- // Helper predicate for testing for the presence of SerializeTraits
- // deserializers.
- template <typename WireT, typename ConcreteT>
- class CanDeserializeCheck
- : detail::CanDeserialize<ChannelT, WireT, ConcreteT> {
- public:
- using detail::CanDeserialize<ChannelT, WireT, ConcreteT>::value;
-
- static_assert(value, "Missing deserializer for argument (Can't deserialize "
- "the second template type argument of "
- "CanDeserializeCheck from the first)");
- };
-
-public:
- /// Construct an RPC instance on a channel.
- RPCEndpointBase(ChannelT &C, bool LazyAutoNegotiation)
- : C(C), LazyAutoNegotiation(LazyAutoNegotiation) {
- // Hold ResponseId in a special variable, since we expect Response to be
- // called relatively frequently, and want to avoid the map lookup.
- ResponseId = FnIdAllocator.getResponseId();
- RemoteFunctionIds[OrcRPCResponse::getPrototype()] = ResponseId;
-
- // Register the negotiate function id and handler.
- auto NegotiateId = FnIdAllocator.getNegotiateId();
- RemoteFunctionIds[OrcRPCNegotiate::getPrototype()] = NegotiateId;
- Handlers[NegotiateId] = wrapHandler<OrcRPCNegotiate>(
- [this](const std::string &Name) { return handleNegotiate(Name); });
- }
-
- /// Negotiate a function id for Func with the other end of the channel.
- template <typename Func> Error negotiateFunction(bool Retry = false) {
- return getRemoteFunctionId<Func>(true, Retry).takeError();
- }
-
- /// Append a call Func, does not call send on the channel.
- /// The first argument specifies a user-defined handler to be run when the
- /// function returns. The handler should take an Expected<Func::ReturnType>,
- /// or an Error (if Func::ReturnType is void). The handler will be called
- /// with an error if the return value is abandoned due to a channel error.
- template <typename Func, typename HandlerT, typename... ArgTs>
- Error appendCallAsync(HandlerT Handler, const ArgTs &...Args) {
-
- static_assert(
- detail::RPCArgTypeCheck<CanSerializeCheck, typename Func::Type,
- void(ArgTs...)>::value,
- "");
-
- // Look up the function ID.
- FunctionIdT FnId;
- if (auto FnIdOrErr = getRemoteFunctionId<Func>(LazyAutoNegotiation, false))
- FnId = *FnIdOrErr;
- else {
- // Negotiation failed. Notify the handler then return the negotiate-failed
- // error.
- cantFail(Handler(make_error<ResponseAbandoned>()));
- return FnIdOrErr.takeError();
- }
-
- SequenceNumberT SeqNo; // initialized in locked scope below.
- {
- // Lock the pending responses map and sequence number manager.
- std::lock_guard<std::mutex> Lock(ResponsesMutex);
-
- // Allocate a sequence number.
- SeqNo = SequenceNumberMgr.getSequenceNumber();
- assert(!PendingResponses.count(SeqNo) &&
- "Sequence number already allocated");
-
- // Install the user handler.
- PendingResponses[SeqNo] =
- detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
- std::move(Handler));
- }
-
- // Open the function call message.
- if (auto Err = C.startSendMessage(FnId, SeqNo)) {
- abandonPendingResponses();
- return Err;
- }
-
- // Serialize the call arguments.
- if (auto Err = detail::HandlerTraits<typename Func::Type>::serializeArgs(
- C, Args...)) {
- abandonPendingResponses();
- return Err;
- }
-
- // Close the function call messagee.
- if (auto Err = C.endSendMessage()) {
- abandonPendingResponses();
- return Err;
- }
-
- return Error::success();
- }
-
- Error sendAppendedCalls() { return C.send(); };
-
- template <typename Func, typename HandlerT, typename... ArgTs>
- Error callAsync(HandlerT Handler, const ArgTs &...Args) {
- if (auto Err = appendCallAsync<Func>(std::move(Handler), Args...))
- return Err;
- return C.send();
- }
-
- /// Handle one incoming call.
- Error handleOne() {
- FunctionIdT FnId;
- SequenceNumberT SeqNo;
- if (auto Err = C.startReceiveMessage(FnId, SeqNo)) {
- abandonPendingResponses();
- return Err;
- }
- if (FnId == ResponseId)
- return handleResponse(SeqNo);
- auto I = Handlers.find(FnId);
- if (I != Handlers.end())
- return I->second(C, SeqNo);
-
- // else: No handler found. Report error to client?
- return make_error<BadFunctionCall<FunctionIdT, SequenceNumberT>>(FnId,
- SeqNo);
- }
-
- /// Helper for handling setter procedures - this method returns a functor that
- /// sets the variables referred to by Args... to values deserialized from the
- /// channel.
- /// E.g.
- ///
- /// typedef Function<0, bool, int> Func1;
- ///
- /// ...
- /// bool B;
- /// int I;
- /// if (auto Err = expect<Func1>(Channel, readArgs(B, I)))
- /// /* Handle Args */ ;
- ///
- template <typename... ArgTs>
- static detail::ReadArgs<ArgTs...> readArgs(ArgTs &...Args) {
- return detail::ReadArgs<ArgTs...>(Args...);
- }
-
- /// Abandon all outstanding result handlers.
- ///
- /// This will call all currently registered result handlers to receive an
- /// "abandoned" error as their argument. This is used internally by the RPC
- /// in error situations, but can also be called directly by clients who are
- /// disconnecting from the remote and don't or can't expect responses to their
- /// outstanding calls. (Especially for outstanding blocking calls, calling
- /// this function may be necessary to avoid dead threads).
- void abandonPendingResponses() {
- // Lock the pending responses map and sequence number manager.
- std::lock_guard<std::mutex> Lock(ResponsesMutex);
-
- for (auto &KV : PendingResponses)
- KV.second->abandon();
- PendingResponses.clear();
- SequenceNumberMgr.reset();
- }
-
- /// Remove the handler for the given function.
- /// A handler must currently be registered for this function.
- template <typename Func> void removeHandler() {
- auto IdItr = LocalFunctionIds.find(Func::getPrototype());
- assert(IdItr != LocalFunctionIds.end() &&
- "Function does not have a registered handler");
- auto HandlerItr = Handlers.find(IdItr->second);
- assert(HandlerItr != Handlers.end() &&
- "Function does not have a registered handler");
- Handlers.erase(HandlerItr);
- }
-
- /// Clear all handlers.
- void clearHandlers() { Handlers.clear(); }
-
-protected:
- FunctionIdT getInvalidFunctionId() const {
- return FnIdAllocator.getInvalidId();
- }
-
- /// Add the given handler to the handler map and make it available for
- /// autonegotiation and execution.
- template <typename Func, typename HandlerT>
- void addHandlerImpl(HandlerT Handler) {
-
- static_assert(detail::RPCArgTypeCheck<
- CanDeserializeCheck, typename Func::Type,
- typename detail::HandlerTraits<HandlerT>::Type>::value,
- "");
-
- FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
- LocalFunctionIds[Func::getPrototype()] = NewFnId;
- Handlers[NewFnId] = wrapHandler<Func>(std::move(Handler));
- }
-
- template <typename Func, typename HandlerT>
- void addAsyncHandlerImpl(HandlerT Handler) {
-
- static_assert(
- detail::RPCArgTypeCheck<
- CanDeserializeCheck, typename Func::Type,
- typename detail::AsyncHandlerTraits<
- typename detail::HandlerTraits<HandlerT>::Type>::Type>::value,
- "");
-
- FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
- LocalFunctionIds[Func::getPrototype()] = NewFnId;
- Handlers[NewFnId] = wrapAsyncHandler<Func>(std::move(Handler));
- }
-
- Error handleResponse(SequenceNumberT SeqNo) {
- using Handler = typename decltype(PendingResponses)::mapped_type;
- Handler PRHandler;
-
- {
- // Lock the pending responses map and sequence number manager.
- std::unique_lock<std::mutex> Lock(ResponsesMutex);
- auto I = PendingResponses.find(SeqNo);
-
- if (I != PendingResponses.end()) {
- PRHandler = std::move(I->second);
- PendingResponses.erase(I);
- SequenceNumberMgr.releaseSequenceNumber(SeqNo);
- } else {
- // Unlock the pending results map to prevent recursive lock.
- Lock.unlock();
- abandonPendingResponses();
- return make_error<InvalidSequenceNumberForResponse<SequenceNumberT>>(
- SeqNo);
- }
- }
-
- assert(PRHandler &&
- "If we didn't find a response handler we should have bailed out");
-
- if (auto Err = PRHandler->handleResponse(C)) {
- abandonPendingResponses();
- return Err;
- }
-
- return Error::success();
- }
-
- FunctionIdT handleNegotiate(const std::string &Name) {
- auto I = LocalFunctionIds.find(Name);
- if (I == LocalFunctionIds.end())
- return getInvalidFunctionId();
- return I->second;
- }
-
- // Find the remote FunctionId for the given function.
- template <typename Func>
- Expected<FunctionIdT> getRemoteFunctionId(bool NegotiateIfNotInMap,
- bool NegotiateIfInvalid) {
- bool DoNegotiate;
-
- // Check if we already have a function id...
- auto I = RemoteFunctionIds.find(Func::getPrototype());
- if (I != RemoteFunctionIds.end()) {
- // If it's valid there's nothing left to do.
- if (I->second != getInvalidFunctionId())
- return I->second;
- DoNegotiate = NegotiateIfInvalid;
- } else
- DoNegotiate = NegotiateIfNotInMap;
-
- // We don't have a function id for Func yet, but we're allowed to try to
- // negotiate one.
- if (DoNegotiate) {
- auto &Impl = static_cast<ImplT &>(*this);
- if (auto RemoteIdOrErr =
- Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) {
- RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
- if (*RemoteIdOrErr == getInvalidFunctionId())
- return make_error<CouldNotNegotiate>(Func::getPrototype());
- return *RemoteIdOrErr;
- } else
- return RemoteIdOrErr.takeError();
- }
-
- // No key was available in the map and we weren't allowed to try to
- // negotiate one, so return an unknown function error.
- return make_error<CouldNotNegotiate>(Func::getPrototype());
- }
-
- using WrappedHandlerFn = std::function<Error(ChannelT &, SequenceNumberT)>;
-
- // Wrap the given user handler in the necessary argument-deserialization code,
- // result-serialization code, and call to the launch policy (if present).
- template <typename Func, typename HandlerT>
- WrappedHandlerFn wrapHandler(HandlerT Handler) {
- return [this, Handler](ChannelT &Channel,
- SequenceNumberT SeqNo) mutable -> Error {
- // Start by deserializing the arguments.
- using ArgsTuple = typename detail::RPCFunctionArgsTuple<
- typename detail::HandlerTraits<HandlerT>::Type>::Type;
- auto Args = std::make_shared<ArgsTuple>();
-
- if (auto Err =
- detail::HandlerTraits<typename Func::Type>::deserializeArgs(
- Channel, *Args))
- return Err;
-
- // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
- // for RPCArgs. Void cast RPCArgs to work around this for now.
- // FIXME: Remove this workaround once we can assume a working GCC version.
- (void)Args;
-
- // End receieve message, unlocking the channel for reading.
- if (auto Err = Channel.endReceiveMessage())
- return Err;
-
- using HTraits = detail::HandlerTraits<HandlerT>;
- using FuncReturn = typename Func::ReturnType;
- return detail::respond<FuncReturn>(Channel, ResponseId, SeqNo,
- HTraits::unpackAndRun(Handler, *Args));
- };
- }
-
- // Wrap the given user handler in the necessary argument-deserialization code,
- // result-serialization code, and call to the launch policy (if present).
- template <typename Func, typename HandlerT>
- WrappedHandlerFn wrapAsyncHandler(HandlerT Handler) {
- return [this, Handler](ChannelT &Channel,
- SequenceNumberT SeqNo) mutable -> Error {
- // Start by deserializing the arguments.
- using AHTraits = detail::AsyncHandlerTraits<
- typename detail::HandlerTraits<HandlerT>::Type>;
- using ArgsTuple =
- typename detail::RPCFunctionArgsTuple<typename AHTraits::Type>::Type;
- auto Args = std::make_shared<ArgsTuple>();
-
- if (auto Err =
- detail::HandlerTraits<typename Func::Type>::deserializeArgs(
- Channel, *Args))
- return Err;
-
- // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
- // for RPCArgs. Void cast RPCArgs to work around this for now.
- // FIXME: Remove this workaround once we can assume a working GCC version.
- (void)Args;
-
- // End receieve message, unlocking the channel for reading.
- if (auto Err = Channel.endReceiveMessage())
- return Err;
-
- using HTraits = detail::HandlerTraits<HandlerT>;
- using FuncReturn = typename Func::ReturnType;
- auto Responder = [this,
- SeqNo](typename AHTraits::ResultType RetVal) -> Error {
- return detail::respond<FuncReturn>(C, ResponseId, SeqNo,
- std::move(RetVal));
- };
-
- return HTraits::unpackAndRunAsync(Handler, Responder, *Args);
- };
- }
-
- ChannelT &C;
-
- bool LazyAutoNegotiation;
-
- RPCFunctionIdAllocator<FunctionIdT> FnIdAllocator;
-
- FunctionIdT ResponseId;
- std::map<std::string, FunctionIdT> LocalFunctionIds;
- std::map<const char *, FunctionIdT> RemoteFunctionIds;
-
- std::map<FunctionIdT, WrappedHandlerFn> Handlers;
-
- std::mutex ResponsesMutex;
- detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr;
- std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>>
- PendingResponses;
-};
-
-} // end namespace detail
-
-template <typename ChannelT, typename FunctionIdT = uint32_t,
- typename SequenceNumberT = uint32_t>
-class MultiThreadedRPCEndpoint
- : public detail::RPCEndpointBase<
- MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT> {
-private:
- using BaseClass = detail::RPCEndpointBase<
- MultiThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT>;
-
-public:
- MultiThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
- : BaseClass(C, LazyAutoNegotiation) {}
-
- /// Add a handler for the given RPC function.
- /// This installs the given handler functor for the given RPCFunction, and
- /// makes the RPC function available for negotiation/calling from the remote.
- template <typename Func, typename HandlerT>
- void addHandler(HandlerT Handler) {
- return this->template addHandlerImpl<Func>(std::move(Handler));
- }
-
- /// Add a class-method as a handler.
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- template <typename Func, typename HandlerT>
- void addAsyncHandler(HandlerT Handler) {
- return this->template addAsyncHandlerImpl<Func>(std::move(Handler));
- }
-
- /// Add a class-method as a handler.
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addAsyncHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- /// Return type for non-blocking call primitives.
- template <typename Func>
- using NonBlockingCallResult = typename detail::ResultTraits<
- typename Func::ReturnType>::ReturnFutureType;
-
- /// Call Func on Channel C. Does not block, does not call send. Returns a pair
- /// of a future result and the sequence number assigned to the result.
- ///
- /// This utility function is primarily used for single-threaded mode support,
- /// where the sequence number can be used to wait for the corresponding
- /// result. In multi-threaded mode the appendCallNB method, which does not
- /// return the sequence numeber, should be preferred.
- template <typename Func, typename... ArgTs>
- Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &...Args) {
- using RTraits = detail::ResultTraits<typename Func::ReturnType>;
- using ErrorReturn = typename RTraits::ErrorReturnType;
- using ErrorReturnPromise = typename RTraits::ReturnPromiseType;
-
- ErrorReturnPromise Promise;
- auto FutureResult = Promise.get_future();
-
- if (auto Err = this->template appendCallAsync<Func>(
- [Promise = std::move(Promise)](ErrorReturn RetOrErr) mutable {
- Promise.set_value(std::move(RetOrErr));
- return Error::success();
- },
- Args...)) {
- RTraits::consumeAbandoned(FutureResult.get());
- return std::move(Err);
- }
- return std::move(FutureResult);
- }
-
- /// The same as appendCallNBWithSeq, except that it calls C.send() to
- /// flush the channel after serializing the call.
- template <typename Func, typename... ArgTs>
- Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &...Args) {
- auto Result = appendCallNB<Func>(Args...);
- if (!Result)
- return Result;
- if (auto Err = this->C.send()) {
- this->abandonPendingResponses();
- detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
- std::move(Result->get()));
- return std::move(Err);
- }
- return Result;
- }
-
- /// Call Func on Channel C. Blocks waiting for a result. Returns an Error
- /// for void functions or an Expected<T> for functions returning a T.
- ///
- /// This function is for use in threaded code where another thread is
- /// handling responses and incoming calls.
- template <typename Func, typename... ArgTs,
- typename AltRetT = typename Func::ReturnType>
- typename detail::ResultTraits<AltRetT>::ErrorReturnType
- callB(const ArgTs &...Args) {
- if (auto FutureResOrErr = callNB<Func>(Args...))
- return FutureResOrErr->get();
- else
- return FutureResOrErr.takeError();
- }
-
- /// Handle incoming RPC calls.
- Error handlerLoop() {
- while (true)
- if (auto Err = this->handleOne())
- return Err;
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename FunctionIdT = uint32_t,
- typename SequenceNumberT = uint32_t>
-class SingleThreadedRPCEndpoint
- : public detail::RPCEndpointBase<
- SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT> {
-private:
- using BaseClass = detail::RPCEndpointBase<
- SingleThreadedRPCEndpoint<ChannelT, FunctionIdT, SequenceNumberT>,
- ChannelT, FunctionIdT, SequenceNumberT>;
-
-public:
- SingleThreadedRPCEndpoint(ChannelT &C, bool LazyAutoNegotiation)
- : BaseClass(C, LazyAutoNegotiation) {}
-
- template <typename Func, typename HandlerT>
- void addHandler(HandlerT Handler) {
- return this->template addHandlerImpl<Func>(std::move(Handler));
- }
-
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- template <typename Func, typename HandlerT>
- void addAsyncHandler(HandlerT Handler) {
- return this->template addAsyncHandlerImpl<Func>(std::move(Handler));
- }
-
- /// Add a class-method as a handler.
- template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
- void addAsyncHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
- addAsyncHandler<Func>(
- detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
- }
-
- template <typename Func, typename... ArgTs,
- typename AltRetT = typename Func::ReturnType>
- typename detail::ResultTraits<AltRetT>::ErrorReturnType
- callB(const ArgTs &...Args) {
- bool ReceivedResponse = false;
- using AltRetTraits = detail::ResultTraits<AltRetT>;
- using ResultType = typename AltRetTraits::ErrorReturnType;
- ResultType Result = AltRetTraits::createBlankErrorReturnValue();
-
- // We have to 'Check' result (which we know is in a success state at this
- // point) so that it can be overwritten in the async handler.
- (void)!!Result;
-
- if (Error Err = this->template appendCallAsync<Func>(
- [&](ResultType R) {
- Result = std::move(R);
- ReceivedResponse = true;
- return Error::success();
- },
- Args...)) {
- AltRetTraits::consumeAbandoned(std::move(Result));
- return AltRetTraits::returnError(std::move(Err));
- }
-
- if (Error Err = this->C.send()) {
- AltRetTraits::consumeAbandoned(std::move(Result));
- return AltRetTraits::returnError(std::move(Err));
- }
-
- while (!ReceivedResponse) {
- if (Error Err = this->handleOne()) {
- AltRetTraits::consumeAbandoned(std::move(Result));
- return AltRetTraits::returnError(std::move(Err));
- }
- }
-
- return Result;
- }
-};
-
-/// Asynchronous dispatch for a function on an RPC endpoint.
-template <typename RPCClass, typename Func> class RPCAsyncDispatch {
-public:
- RPCAsyncDispatch(RPCClass &Endpoint) : Endpoint(Endpoint) {}
-
- template <typename HandlerT, typename... ArgTs>
- Error operator()(HandlerT Handler, const ArgTs &...Args) const {
- return Endpoint.template appendCallAsync<Func>(std::move(Handler), Args...);
- }
-
-private:
- RPCClass &Endpoint;
-};
-
-/// Construct an asynchronous dispatcher from an RPC endpoint and a Func.
-template <typename Func, typename RPCEndpointT>
-RPCAsyncDispatch<RPCEndpointT, Func> rpcAsyncDispatch(RPCEndpointT &Endpoint) {
- return RPCAsyncDispatch<RPCEndpointT, Func>(Endpoint);
-}
-
-/// Allows a set of asynchrounous calls to be dispatched, and then
-/// waited on as a group.
-class ParallelCallGroup {
-public:
- ParallelCallGroup() = default;
- ParallelCallGroup(const ParallelCallGroup &) = delete;
- ParallelCallGroup &operator=(const ParallelCallGroup &) = delete;
-
- /// Make as asynchronous call.
- template <typename AsyncDispatcher, typename HandlerT, typename... ArgTs>
- Error call(const AsyncDispatcher &AsyncDispatch, HandlerT Handler,
- const ArgTs &...Args) {
- // Increment the count of outstanding calls. This has to happen before
- // we invoke the call, as the handler may (depending on scheduling)
- // be run immediately on another thread, and we don't want the decrement
- // in the wrapped handler below to run before the increment.
- {
- std::unique_lock<std::mutex> Lock(M);
- ++NumOutstandingCalls;
- }
-
- // Wrap the user handler in a lambda that will decrement the
- // outstanding calls count, then poke the condition variable.
- using ArgType = typename detail::ResponseHandlerArg<
- typename detail::HandlerTraits<HandlerT>::Type>::ArgType;
- auto WrappedHandler = [this, Handler = std::move(Handler)](ArgType Arg) {
- auto Err = Handler(std::move(Arg));
- std::unique_lock<std::mutex> Lock(M);
- --NumOutstandingCalls;
- CV.notify_all();
- return Err;
- };
-
- return AsyncDispatch(std::move(WrappedHandler), Args...);
- }
-
- /// Blocks until all calls have been completed and their return value
- /// handlers run.
- void wait() {
- std::unique_lock<std::mutex> Lock(M);
- while (NumOutstandingCalls > 0)
- CV.wait(Lock);
- }
-
-private:
- std::mutex M;
- std::condition_variable CV;
- uint32_t NumOutstandingCalls = 0;
-};
-
-/// Convenience class for grouping RPCFunctions into APIs that can be
-/// negotiated as a block.
-///
-template <typename... Funcs> class APICalls {
-public:
- /// Test whether this API contains Function F.
- template <typename F> class Contains {
- public:
- static const bool value = false;
- };
-
- /// Negotiate all functions in this API.
- template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
- return Error::success();
- }
-};
-
-template <typename Func, typename... Funcs> class APICalls<Func, Funcs...> {
-public:
- template <typename F> class Contains {
- public:
- static const bool value = std::is_same<F, Func>::value |
- APICalls<Funcs...>::template Contains<F>::value;
- };
-
- template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
- if (auto Err = R.template negotiateFunction<Func>())
- return Err;
- return APICalls<Funcs...>::negotiate(R);
- }
-};
-
-template <typename... InnerFuncs, typename... Funcs>
-class APICalls<APICalls<InnerFuncs...>, Funcs...> {
-public:
- template <typename F> class Contains {
- public:
- static const bool value =
- APICalls<InnerFuncs...>::template Contains<F>::value |
- APICalls<Funcs...>::template Contains<F>::value;
- };
-
- template <typename RPCEndpoint> static Error negotiate(RPCEndpoint &R) {
- if (auto Err = APICalls<InnerFuncs...>::negotiate(R))
- return Err;
- return APICalls<Funcs...>::negotiate(R);
- }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RPCUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
deleted file mode 100644
index 2ee471939251..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h
+++ /dev/null
@@ -1,183 +0,0 @@
-//===- RawByteChannel.h -----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ExecutionEngine/Orc/Shared/Serialization.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-#include <mutex>
-#include <string>
-#include <type_traits>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-/// Interface for byte-streams to be used with ORC Serialization.
-class RawByteChannel {
-public:
- virtual ~RawByteChannel() = default;
-
- /// Read Size bytes from the stream into *Dst.
- virtual Error readBytes(char *Dst, unsigned Size) = 0;
-
- /// Read size bytes from *Src and append them to the stream.
- virtual Error appendBytes(const char *Src, unsigned Size) = 0;
-
- /// Flush the stream if possible.
- virtual Error send() = 0;
-
- /// Notify the channel that we're starting a message send.
- /// Locks the channel for writing.
- template <typename FunctionIdT, typename SequenceIdT>
- Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
- writeLock.lock();
- if (auto Err = serializeSeq(*this, FnId, SeqNo)) {
- writeLock.unlock();
- return Err;
- }
- return Error::success();
- }
-
- /// Notify the channel that we're ending a message send.
- /// Unlocks the channel for writing.
- Error endSendMessage() {
- writeLock.unlock();
- return Error::success();
- }
-
- /// Notify the channel that we're starting a message receive.
- /// Locks the channel for reading.
- template <typename FunctionIdT, typename SequenceNumberT>
- Error startReceiveMessage(FunctionIdT &FnId, SequenceNumberT &SeqNo) {
- readLock.lock();
- if (auto Err = deserializeSeq(*this, FnId, SeqNo)) {
- readLock.unlock();
- return Err;
- }
- return Error::success();
- }
-
- /// Notify the channel that we're ending a message receive.
- /// Unlocks the channel for reading.
- Error endReceiveMessage() {
- readLock.unlock();
- return Error::success();
- }
-
- /// Get the lock for stream reading.
- std::mutex &getReadLock() { return readLock; }
-
- /// Get the lock for stream writing.
- std::mutex &getWriteLock() { return writeLock; }
-
-private:
- std::mutex readLock, writeLock;
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<
- ChannelT, T, T,
- std::enable_if_t<
- std::is_base_of<RawByteChannel, ChannelT>::value &&
- (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value ||
- std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
- std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
- std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
- std::is_same<T, char>::value)>> {
-public:
- static Error serialize(ChannelT &C, T V) {
- support::endian::byte_swap<T, support::big>(V);
- return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T));
- };
-
- static Error deserialize(ChannelT &C, T &V) {
- if (auto Err = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T)))
- return Err;
- support::endian::byte_swap<T, support::big>(V);
- return Error::success();
- };
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, bool, bool,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, bool V) {
- uint8_t Tmp = V ? 1 : 0;
- if (auto Err = C.appendBytes(reinterpret_cast<const char *>(&Tmp), 1))
- return Err;
- return Error::success();
- }
-
- static Error deserialize(ChannelT &C, bool &V) {
- uint8_t Tmp = 0;
- if (auto Err = C.readBytes(reinterpret_cast<char *>(&Tmp), 1))
- return Err;
- V = Tmp != 0;
- return Error::success();
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, std::string, StringRef,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- /// Serialization channel serialization for std::strings.
- static Error serialize(RawByteChannel &C, StringRef S) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
- return Err;
- return C.appendBytes((const char *)S.data(), S.size());
- }
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<
- ChannelT, std::string, T,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value &&
- (std::is_same<T, const char *>::value ||
- std::is_same<T, char *>::value)>> {
-public:
- static Error serialize(RawByteChannel &C, const char *S) {
- return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
- S);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, std::string, std::string,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- /// Serialization channel serialization for std::strings.
- static Error serialize(RawByteChannel &C, const std::string &S) {
- return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
- S);
- }
-
- /// Serialization channel deserialization for std::strings.
- static Error deserialize(RawByteChannel &C, std::string &S) {
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
- S.resize(Count);
- return C.readBytes(&S[0], Count);
- }
-};
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_RAWBYTECHANNEL_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
deleted file mode 100644
index 0ea483ba2abb..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/Serialization.h
+++ /dev/null
@@ -1,769 +0,0 @@
-//===- Serialization.h ------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
-#define LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
-#include "llvm/Support/thread.h"
-#include <map>
-#include <mutex>
-#include <set>
-#include <sstream>
-#include <string>
-#include <vector>
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-template <typename T> class SerializationTypeName;
-
-/// TypeNameSequence is a utility for rendering sequences of types to a string
-/// by rendering each type, separated by ", ".
-template <typename... ArgTs> class SerializationTypeNameSequence {};
-
-/// Render an empty TypeNameSequence to an ostream.
-template <typename OStream>
-OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<> &V) {
- return OS;
-}
-
-/// Render a TypeNameSequence of a single type to an ostream.
-template <typename OStream, typename ArgT>
-OStream &operator<<(OStream &OS, const SerializationTypeNameSequence<ArgT> &V) {
- OS << SerializationTypeName<ArgT>::getName();
- return OS;
-}
-
-/// Render a TypeNameSequence of more than one type to an ostream.
-template <typename OStream, typename ArgT1, typename ArgT2, typename... ArgTs>
-OStream &
-operator<<(OStream &OS,
- const SerializationTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) {
- OS << SerializationTypeName<ArgT1>::getName() << ", "
- << SerializationTypeNameSequence<ArgT2, ArgTs...>();
- return OS;
-}
-
-template <> class SerializationTypeName<void> {
-public:
- static const char *getName() { return "void"; }
-};
-
-template <> class SerializationTypeName<int8_t> {
-public:
- static const char *getName() { return "int8_t"; }
-};
-
-template <> class SerializationTypeName<uint8_t> {
-public:
- static const char *getName() { return "uint8_t"; }
-};
-
-template <> class SerializationTypeName<int16_t> {
-public:
- static const char *getName() { return "int16_t"; }
-};
-
-template <> class SerializationTypeName<uint16_t> {
-public:
- static const char *getName() { return "uint16_t"; }
-};
-
-template <> class SerializationTypeName<int32_t> {
-public:
- static const char *getName() { return "int32_t"; }
-};
-
-template <> class SerializationTypeName<uint32_t> {
-public:
- static const char *getName() { return "uint32_t"; }
-};
-
-template <> class SerializationTypeName<int64_t> {
-public:
- static const char *getName() { return "int64_t"; }
-};
-
-template <> class SerializationTypeName<uint64_t> {
-public:
- static const char *getName() { return "uint64_t"; }
-};
-
-template <> class SerializationTypeName<bool> {
-public:
- static const char *getName() { return "bool"; }
-};
-
-template <> class SerializationTypeName<std::string> {
-public:
- static const char *getName() { return "std::string"; }
-};
-
-template <> class SerializationTypeName<Error> {
-public:
- static const char *getName() { return "Error"; }
-};
-
-template <typename T> class SerializationTypeName<Expected<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "Expected<" << SerializationTypeNameSequence<T>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T1, typename T2>
-class SerializationTypeName<std::pair<T1, T2>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::pair<" << SerializationTypeNameSequence<T1, T2>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename... ArgTs> class SerializationTypeName<std::tuple<ArgTs...>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::tuple<" << SerializationTypeNameSequence<ArgTs...>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T> class SerializationTypeName<Optional<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "Optional<" << SerializationTypeName<T>::getName() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T> class SerializationTypeName<std::vector<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::vector<" << SerializationTypeName<T>::getName() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename T> class SerializationTypeName<std::set<T>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::set<" << SerializationTypeName<T>::getName() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-template <typename K, typename V> class SerializationTypeName<std::map<K, V>> {
-public:
- static const char *getName() {
- static std::string Name = [] {
- std::string Name;
- raw_string_ostream(Name)
- << "std::map<" << SerializationTypeNameSequence<K, V>() << ">";
- return Name;
- }();
- return Name.data();
- }
-};
-
-/// The SerializationTraits<ChannelT, T> class describes how to serialize and
-/// deserialize an instance of type T to/from an abstract channel of type
-/// ChannelT. It also provides a representation of the type's name via the
-/// getName method.
-///
-/// Specializations of this class should provide the following functions:
-///
-/// @code{.cpp}
-///
-/// static const char* getName();
-/// static Error serialize(ChannelT&, const T&);
-/// static Error deserialize(ChannelT&, T&);
-///
-/// @endcode
-///
-/// The third argument of SerializationTraits is intended to support SFINAE.
-/// E.g.:
-///
-/// @code{.cpp}
-///
-/// class MyVirtualChannel { ... };
-///
-/// template <DerivedChannelT>
-/// class SerializationTraits<DerivedChannelT, bool,
-/// std::enable_if_t<
-/// std::is_base_of<VirtChannel, DerivedChannel>::value
-/// >> {
-/// public:
-/// static const char* getName() { ... };
-/// }
-///
-/// @endcode
-template <typename ChannelT, typename WireType,
- typename ConcreteType = WireType, typename = void>
-class SerializationTraits;
-
-template <typename ChannelT> class SequenceTraits {
-public:
- static Error emitSeparator(ChannelT &C) { return Error::success(); }
- static Error consumeSeparator(ChannelT &C) { return Error::success(); }
-};
-
-/// Utility class for serializing sequences of values of varying types.
-/// Specializations of this class contain 'serialize' and 'deserialize' methods
-/// for the given channel. The ArgTs... list will determine the "over-the-wire"
-/// types to be serialized. The serialize and deserialize methods take a list
-/// CArgTs... ("caller arg types") which must be the same length as ArgTs...,
-/// but may be different types from ArgTs, provided that for each CArgT there
-/// is a SerializationTraits specialization
-/// SerializeTraits<ChannelT, ArgT, CArgT> with methods that can serialize the
-/// caller argument to over-the-wire value.
-template <typename ChannelT, typename... ArgTs> class SequenceSerialization;
-
-template <typename ChannelT> class SequenceSerialization<ChannelT> {
-public:
- static Error serialize(ChannelT &C) { return Error::success(); }
- static Error deserialize(ChannelT &C) { return Error::success(); }
-};
-
-template <typename ChannelT, typename ArgT>
-class SequenceSerialization<ChannelT, ArgT> {
-public:
- template <typename CArgT> static Error serialize(ChannelT &C, CArgT &&CArg) {
- return SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
- C, std::forward<CArgT>(CArg));
- }
-
- template <typename CArgT> static Error deserialize(ChannelT &C, CArgT &CArg) {
- return SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg);
- }
-};
-
-template <typename ChannelT, typename ArgT, typename... ArgTs>
-class SequenceSerialization<ChannelT, ArgT, ArgTs...> {
-public:
- template <typename CArgT, typename... CArgTs>
- static Error serialize(ChannelT &C, CArgT &&CArg, CArgTs &&...CArgs) {
- if (auto Err =
- SerializationTraits<ChannelT, ArgT, std::decay_t<CArgT>>::serialize(
- C, std::forward<CArgT>(CArg)))
- return Err;
- if (auto Err = SequenceTraits<ChannelT>::emitSeparator(C))
- return Err;
- return SequenceSerialization<ChannelT, ArgTs...>::serialize(
- C, std::forward<CArgTs>(CArgs)...);
- }
-
- template <typename CArgT, typename... CArgTs>
- static Error deserialize(ChannelT &C, CArgT &CArg, CArgTs &...CArgs) {
- if (auto Err =
- SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg))
- return Err;
- if (auto Err = SequenceTraits<ChannelT>::consumeSeparator(C))
- return Err;
- return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, CArgs...);
- }
-};
-
-template <typename ChannelT, typename... ArgTs>
-Error serializeSeq(ChannelT &C, ArgTs &&...Args) {
- return SequenceSerialization<ChannelT, std::decay_t<ArgTs>...>::serialize(
- C, std::forward<ArgTs>(Args)...);
-}
-
-template <typename ChannelT, typename... ArgTs>
-Error deserializeSeq(ChannelT &C, ArgTs &...Args) {
- return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, Args...);
-}
-
-template <typename ChannelT> class SerializationTraits<ChannelT, Error> {
-public:
- using WrappedErrorSerializer =
- std::function<Error(ChannelT &C, const ErrorInfoBase &)>;
-
- using WrappedErrorDeserializer =
- std::function<Error(ChannelT &C, Error &Err)>;
-
- template <typename ErrorInfoT, typename SerializeFtor,
- typename DeserializeFtor>
- static void registerErrorType(std::string Name, SerializeFtor Serialize,
- DeserializeFtor Deserialize) {
- assert(!Name.empty() &&
- "The empty string is reserved for the Success value");
-
- const std::string *KeyName = nullptr;
- {
- // We're abusing the stability of std::map here: We take a reference to
- // the key of the deserializers map to save us from duplicating the string
- // in the serializer. This should be changed to use a stringpool if we
- // switch to a map type that may move keys in memory.
- std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex);
- auto I = Deserializers.insert(
- Deserializers.begin(),
- std::make_pair(std::move(Name), std::move(Deserialize)));
- KeyName = &I->first;
- }
-
- {
- assert(KeyName != nullptr && "No keyname pointer");
- std::lock_guard<std::recursive_mutex> Lock(SerializersMutex);
- Serializers[ErrorInfoT::classID()] =
- [KeyName, Serialize = std::move(Serialize)](
- ChannelT &C, const ErrorInfoBase &EIB) -> Error {
- assert(EIB.dynamicClassID() == ErrorInfoT::classID() &&
- "Serializer called for wrong error type");
- if (auto Err = serializeSeq(C, *KeyName))
- return Err;
- return Serialize(C, static_cast<const ErrorInfoT &>(EIB));
- };
- }
- }
-
- static Error serialize(ChannelT &C, Error &&Err) {
- std::lock_guard<std::recursive_mutex> Lock(SerializersMutex);
-
- if (!Err)
- return serializeSeq(C, std::string());
-
- return handleErrors(std::move(Err), [&C](const ErrorInfoBase &EIB) {
- auto SI = Serializers.find(EIB.dynamicClassID());
- if (SI == Serializers.end())
- return serializeAsStringError(C, EIB);
- return (SI->second)(C, EIB);
- });
- }
-
- static Error deserialize(ChannelT &C, Error &Err) {
- std::lock_guard<std::recursive_mutex> Lock(DeserializersMutex);
-
- std::string Key;
- if (auto Err = deserializeSeq(C, Key))
- return Err;
-
- if (Key.empty()) {
- ErrorAsOutParameter EAO(&Err);
- Err = Error::success();
- return Error::success();
- }
-
- auto DI = Deserializers.find(Key);
- assert(DI != Deserializers.end() && "No deserializer for error type");
- return (DI->second)(C, Err);
- }
-
-private:
- static Error serializeAsStringError(ChannelT &C, const ErrorInfoBase &EIB) {
- std::string ErrMsg;
- {
- raw_string_ostream ErrMsgStream(ErrMsg);
- EIB.log(ErrMsgStream);
- }
- return serialize(C, make_error<StringError>(std::move(ErrMsg),
- inconvertibleErrorCode()));
- }
-
- static std::recursive_mutex SerializersMutex;
- static std::recursive_mutex DeserializersMutex;
- static std::map<const void *, WrappedErrorSerializer> Serializers;
- static std::map<std::string, WrappedErrorDeserializer> Deserializers;
-};
-
-template <typename ChannelT>
-std::recursive_mutex SerializationTraits<ChannelT, Error>::SerializersMutex;
-
-template <typename ChannelT>
-std::recursive_mutex SerializationTraits<ChannelT, Error>::DeserializersMutex;
-
-template <typename ChannelT>
-std::map<const void *,
- typename SerializationTraits<ChannelT, Error>::WrappedErrorSerializer>
- SerializationTraits<ChannelT, Error>::Serializers;
-
-template <typename ChannelT>
-std::map<std::string, typename SerializationTraits<
- ChannelT, Error>::WrappedErrorDeserializer>
- SerializationTraits<ChannelT, Error>::Deserializers;
-
-/// Registers a serializer and deserializer for the given error type on the
-/// given channel type.
-template <typename ChannelT, typename ErrorInfoT, typename SerializeFtor,
- typename DeserializeFtor>
-void registerErrorSerialization(std::string Name, SerializeFtor &&Serialize,
- DeserializeFtor &&Deserialize) {
- SerializationTraits<ChannelT, Error>::template registerErrorType<ErrorInfoT>(
- std::move(Name), std::forward<SerializeFtor>(Serialize),
- std::forward<DeserializeFtor>(Deserialize));
-}
-
-/// Registers serialization/deserialization for StringError.
-template <typename ChannelT> void registerStringError() {
- static bool AlreadyRegistered = false;
- if (!AlreadyRegistered) {
- registerErrorSerialization<ChannelT, StringError>(
- "StringError",
- [](ChannelT &C, const StringError &SE) {
- return serializeSeq(C, SE.getMessage());
- },
- [](ChannelT &C, Error &Err) -> Error {
- ErrorAsOutParameter EAO(&Err);
- std::string Msg;
- if (auto E2 = deserializeSeq(C, Msg))
- return E2;
- Err = make_error<StringError>(
- std::move(Msg),
- orcError(OrcErrorCode::UnknownErrorCodeFromRemote));
- return Error::success();
- });
- AlreadyRegistered = true;
- }
-}
-
-/// SerializationTraits for Expected<T1> from an Expected<T2>.
-template <typename ChannelT, typename T1, typename T2>
-class SerializationTraits<ChannelT, Expected<T1>, Expected<T2>> {
-public:
- static Error serialize(ChannelT &C, Expected<T2> &&ValOrErr) {
- if (ValOrErr) {
- if (auto Err = serializeSeq(C, true))
- return Err;
- return SerializationTraits<ChannelT, T1, T2>::serialize(C, *ValOrErr);
- }
- if (auto Err = serializeSeq(C, false))
- return Err;
- return serializeSeq(C, ValOrErr.takeError());
- }
-
- static Error deserialize(ChannelT &C, Expected<T2> &ValOrErr) {
- ExpectedAsOutParameter<T2> EAO(&ValOrErr);
- bool HasValue;
- if (auto Err = deserializeSeq(C, HasValue))
- return Err;
- if (HasValue)
- return SerializationTraits<ChannelT, T1, T2>::deserialize(C, *ValOrErr);
- Error Err = Error::success();
- if (auto E2 = deserializeSeq(C, Err))
- return E2;
- ValOrErr = std::move(Err);
- return Error::success();
- }
-};
-
-/// SerializationTraits for Expected<T1> from a T2.
-template <typename ChannelT, typename T1, typename T2>
-class SerializationTraits<ChannelT, Expected<T1>, T2> {
-public:
- static Error serialize(ChannelT &C, T2 &&Val) {
- return serializeSeq(C, Expected<T2>(std::forward<T2>(Val)));
- }
-};
-
-/// SerializationTraits for Expected<T1> from an Error.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, Expected<T>, Error> {
-public:
- static Error serialize(ChannelT &C, Error &&Err) {
- return serializeSeq(C, Expected<T>(std::move(Err)));
- }
-};
-
-/// SerializationTraits default specialization for std::pair.
-template <typename ChannelT, typename T1, typename T2, typename T3, typename T4>
-class SerializationTraits<ChannelT, std::pair<T1, T2>, std::pair<T3, T4>> {
-public:
- static Error serialize(ChannelT &C, const std::pair<T3, T4> &V) {
- if (auto Err = SerializationTraits<ChannelT, T1, T3>::serialize(C, V.first))
- return Err;
- return SerializationTraits<ChannelT, T2, T4>::serialize(C, V.second);
- }
-
- static Error deserialize(ChannelT &C, std::pair<T3, T4> &V) {
- if (auto Err =
- SerializationTraits<ChannelT, T1, T3>::deserialize(C, V.first))
- return Err;
- return SerializationTraits<ChannelT, T2, T4>::deserialize(C, V.second);
- }
-};
-
-/// SerializationTraits default specialization for std::tuple.
-template <typename ChannelT, typename... ArgTs>
-class SerializationTraits<ChannelT, std::tuple<ArgTs...>> {
-public:
- /// RPC channel serialization for std::tuple.
- static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) {
- return serializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
- }
-
- /// RPC channel deserialization for std::tuple.
- static Error deserialize(ChannelT &C, std::tuple<ArgTs...> &V) {
- return deserializeTupleHelper(C, V, std::index_sequence_for<ArgTs...>());
- }
-
-private:
- // Serialization helper for std::tuple.
- template <size_t... Is>
- static Error serializeTupleHelper(ChannelT &C, const std::tuple<ArgTs...> &V,
- std::index_sequence<Is...> _) {
- return serializeSeq(C, std::get<Is>(V)...);
- }
-
- // Serialization helper for std::tuple.
- template <size_t... Is>
- static Error deserializeTupleHelper(ChannelT &C, std::tuple<ArgTs...> &V,
- std::index_sequence<Is...> _) {
- return deserializeSeq(C, std::get<Is>(V)...);
- }
-};
-
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, Optional<T>> {
-public:
- /// Serialize an Optional<T>.
- static Error serialize(ChannelT &C, const Optional<T> &O) {
- if (auto Err = serializeSeq(C, O != None))
- return Err;
- if (O)
- if (auto Err = serializeSeq(C, *O))
- return Err;
- return Error::success();
- }
-
- /// Deserialize an Optional<T>.
- static Error deserialize(ChannelT &C, Optional<T> &O) {
- bool HasValue = false;
- if (auto Err = deserializeSeq(C, HasValue))
- return Err;
- if (HasValue)
- if (auto Err = deserializeSeq(C, *O))
- return Err;
- return Error::success();
- };
-};
-
-/// SerializationTraits default specialization for std::vector.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, std::vector<T>> {
-public:
- /// Serialize a std::vector<T> from std::vector<T>.
- static Error serialize(ChannelT &C, const std::vector<T> &V) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
- return Err;
-
- for (const auto &E : V)
- if (auto Err = serializeSeq(C, E))
- return Err;
-
- return Error::success();
- }
-
- /// Deserialize a std::vector<T> to a std::vector<T>.
- static Error deserialize(ChannelT &C, std::vector<T> &V) {
- assert(V.empty() &&
- "Expected default-constructed vector to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- V.resize(Count);
- for (auto &E : V)
- if (auto Err = deserializeSeq(C, E))
- return Err;
-
- return Error::success();
- }
-};
-
-/// Enable vector serialization from an ArrayRef.
-template <typename ChannelT, typename T>
-class SerializationTraits<ChannelT, std::vector<T>, ArrayRef<T>> {
-public:
- static Error serialize(ChannelT &C, ArrayRef<T> V) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
- return Err;
-
- for (const auto &E : V)
- if (auto Err = serializeSeq(C, E))
- return Err;
-
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename T, typename T2>
-class SerializationTraits<ChannelT, std::set<T>, std::set<T2>> {
-public:
- /// Serialize a std::set<T> from std::set<T2>.
- static Error serialize(ChannelT &C, const std::set<T2> &S) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
- return Err;
-
- for (const auto &E : S)
- if (auto Err = SerializationTraits<ChannelT, T, T2>::serialize(C, E))
- return Err;
-
- return Error::success();
- }
-
- /// Deserialize a std::set<T> to a std::set<T>.
- static Error deserialize(ChannelT &C, std::set<T2> &S) {
- assert(S.empty() && "Expected default-constructed set to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- while (Count-- != 0) {
- T2 Val;
- if (auto Err = SerializationTraits<ChannelT, T, T2>::deserialize(C, Val))
- return Err;
-
- auto Added = S.insert(Val).second;
- if (!Added)
- return make_error<StringError>("Duplicate element in deserialized set",
- orcError(OrcErrorCode::UnknownORCError));
- }
-
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename K, typename V, typename K2, typename V2>
-class SerializationTraits<ChannelT, std::map<K, V>, std::map<K2, V2>> {
-public:
- /// Serialize a std::map<K, V> from std::map<K2, V2>.
- static Error serialize(ChannelT &C, const std::map<K2, V2> &M) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
- return Err;
-
- for (const auto &E : M) {
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
- return Err;
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
- return Err;
- }
-
- return Error::success();
- }
-
- /// Deserialize a std::map<K, V> to a std::map<K, V>.
- static Error deserialize(ChannelT &C, std::map<K2, V2> &M) {
- assert(M.empty() && "Expected default-constructed map to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- while (Count-- != 0) {
- std::pair<K2, V2> Val;
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
- return Err;
-
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
- return Err;
-
- auto Added = M.insert(Val).second;
- if (!Added)
- return make_error<StringError>("Duplicate element in deserialized map",
- orcError(OrcErrorCode::UnknownORCError));
- }
-
- return Error::success();
- }
-};
-
-template <typename ChannelT, typename K, typename V, typename K2, typename V2>
-class SerializationTraits<ChannelT, std::map<K, V>, DenseMap<K2, V2>> {
-public:
- /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
- static Error serialize(ChannelT &C, const DenseMap<K2, V2> &M) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(M.size())))
- return Err;
-
- for (auto &E : M) {
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::serialize(C, E.first))
- return Err;
-
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::serialize(C, E.second))
- return Err;
- }
-
- return Error::success();
- }
-
- /// Serialize a std::map<K, V> from DenseMap<K2, V2>.
- static Error deserialize(ChannelT &C, DenseMap<K2, V2> &M) {
- assert(M.empty() && "Expected default-constructed map to deserialize into");
-
- uint64_t Count = 0;
- if (auto Err = deserializeSeq(C, Count))
- return Err;
-
- while (Count-- != 0) {
- std::pair<K2, V2> Val;
- if (auto Err =
- SerializationTraits<ChannelT, K, K2>::deserialize(C, Val.first))
- return Err;
-
- if (auto Err =
- SerializationTraits<ChannelT, V, V2>::deserialize(C, Val.second))
- return Err;
-
- auto Added = M.insert(Val).second;
- if (!Added)
- return make_error<StringError>("Duplicate element in deserialized map",
- orcError(OrcErrorCode::UnknownORCError));
- }
-
- return Error::success();
- }
-};
-
-} // namespace shared
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SERIALIZATION_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
index 854f1098d5af..9ac13a493e9d 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h
@@ -33,10 +33,12 @@
#define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEPACKEDSERIALIZATION_H
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SwapByteOrder.h"
+#include <limits>
#include <string>
#include <tuple>
#include <type_traits>
@@ -193,13 +195,6 @@ template <typename SPSElementTagT> class SPSSequence;
/// SPS tag type for strings, which are equivalent to sequences of chars.
using SPSString = SPSSequence<char>;
-/// SPS tag type for executor addresseses.
-class SPSExecutorAddress {};
-
-template <>
-class SPSSerializationTraits<SPSExecutorAddress, uint64_t>
- : public SPSSerializationTraits<uint64_t, uint64_t> {};
-
/// SPS tag type for maps.
///
/// SPS maps are just sequences of (Key, Value) tuples.
@@ -289,6 +284,40 @@ public:
}
};
+/// Trivial ArrayRef<T> -> SPSSequence<SPSElementTagT> serialization.
+template <typename SPSElementTagT, typename T>
+class TrivialSPSSequenceSerialization<SPSElementTagT, ArrayRef<T>> {
+public:
+ static constexpr bool available = true;
+};
+
+/// Specialized SPSSequence<char> -> ArrayRef<char> serialization.
+///
+/// On deserialize, points directly into the input buffer.
+template <> class SPSSerializationTraits<SPSSequence<char>, ArrayRef<char>> {
+public:
+ static size_t size(const ArrayRef<char> &A) {
+ return SPSArgList<uint64_t>::size(static_cast<uint64_t>(A.size())) +
+ A.size();
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const ArrayRef<char> &A) {
+ if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(A.size())))
+ return false;
+ return OB.write(A.data(), A.size());
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, ArrayRef<char> &A) {
+ uint64_t Size;
+ if (!SPSArgList<uint64_t>::deserialize(IB, Size))
+ return false;
+ if (Size > std::numeric_limits<size_t>::max())
+ return false;
+ A = {IB.data(), static_cast<size_t>(Size)};
+ return IB.skip(Size);
+ }
+};
+
/// 'Trivial' sequence serialization: Sequence is serialized as a uint64_t size
/// followed by a for-earch loop over the elements of the sequence to serialize
/// each of them.
@@ -330,6 +359,44 @@ public:
}
};
+/// SPSTuple serialization for std::tuple.
+template <typename... SPSTagTs, typename... Ts>
+class SPSSerializationTraits<SPSTuple<SPSTagTs...>, std::tuple<Ts...>> {
+private:
+ using TupleArgList = typename SPSTuple<SPSTagTs...>::AsArgList;
+ using ArgIndices = std::make_index_sequence<sizeof...(Ts)>;
+
+ template <std::size_t... I>
+ static size_t size(const std::tuple<Ts...> &T, std::index_sequence<I...>) {
+ return TupleArgList::size(std::get<I>(T)...);
+ }
+
+ template <std::size_t... I>
+ static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T,
+ std::index_sequence<I...>) {
+ return TupleArgList::serialize(OB, std::get<I>(T)...);
+ }
+
+ template <std::size_t... I>
+ static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T,
+ std::index_sequence<I...>) {
+ return TupleArgList::deserialize(IB, std::get<I>(T)...);
+ }
+
+public:
+ static size_t size(const std::tuple<Ts...> &T) {
+ return size(T, ArgIndices{});
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const std::tuple<Ts...> &T) {
+ return serialize(OB, T, ArgIndices{});
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, std::tuple<Ts...> &T) {
+ return deserialize(IB, T, ArgIndices{});
+ }
+};
+
/// SPSTuple serialization for std::pair.
template <typename SPSTagT1, typename SPSTagT2, typename T1, typename T2>
class SPSSerializationTraits<SPSTuple<SPSTagT1, SPSTagT2>, std::pair<T1, T2>> {
@@ -380,6 +447,49 @@ public:
}
};
+/// Serialization for StringMap<ValueT>s.
+template <typename SPSValueT, typename ValueT>
+class SPSSerializationTraits<SPSSequence<SPSTuple<SPSString, SPSValueT>>,
+ StringMap<ValueT>> {
+public:
+ static size_t size(const StringMap<ValueT> &M) {
+ size_t Sz = SPSArgList<uint64_t>::size(static_cast<uint64_t>(M.size()));
+ for (auto &E : M)
+ Sz += SPSArgList<SPSString, SPSValueT>::size(E.first(), E.second);
+ return Sz;
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const StringMap<ValueT> &M) {
+ if (!SPSArgList<uint64_t>::serialize(OB, static_cast<uint64_t>(M.size())))
+ return false;
+
+ for (auto &E : M)
+ if (!SPSArgList<SPSString, SPSValueT>::serialize(OB, E.first(), E.second))
+ return false;
+
+ return true;
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, StringMap<ValueT> &M) {
+ uint64_t Size;
+ assert(M.empty() && "M already contains elements");
+
+ if (!SPSArgList<uint64_t>::deserialize(IB, Size))
+ return false;
+
+ while (Size--) {
+ StringRef S;
+ ValueT V;
+ if (!SPSArgList<SPSString, SPSValueT>::deserialize(IB, S, V))
+ return false;
+ if (!M.insert(std::make_pair(S, V)).second)
+ return false;
+ }
+
+ return true;
+ }
+};
+
/// SPS tag type for errors.
class SPSError;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
new file mode 100644
index 000000000000..9e074ed1f931
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
@@ -0,0 +1,235 @@
+//===--- SimpleRemoteEPCUtils.h - Utils for Simple Remote EPC ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Message definitions and other utilities for SimpleRemoteEPC and
+// SimpleRemoteEPCServer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+#include "llvm/Support/Error.h"
+
+#include <atomic>
+#include <mutex>
+#include <string>
+#include <thread>
+
+namespace llvm {
+namespace orc {
+
+namespace SimpleRemoteEPCDefaultBootstrapSymbolNames {
+extern const char *ExecutorSessionObjectName;
+extern const char *DispatchFnName;
+} // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames
+
+enum class SimpleRemoteEPCOpcode : uint8_t {
+ Setup,
+ Hangup,
+ Result,
+ CallWrapper,
+ LastOpC = CallWrapper
+};
+
+struct SimpleRemoteEPCExecutorInfo {
+ std::string TargetTriple;
+ uint64_t PageSize;
+ StringMap<ExecutorAddr> BootstrapSymbols;
+};
+
+using SimpleRemoteEPCArgBytesVector = SmallVector<char, 128>;
+
+class SimpleRemoteEPCTransportClient {
+public:
+ enum HandleMessageAction { ContinueSession, EndSession };
+
+ virtual ~SimpleRemoteEPCTransportClient();
+
+ /// Handle receipt of a message.
+ ///
+ /// Returns an Error if the message cannot be handled, 'EndSession' if the
+ /// client will not accept any further messages, and 'ContinueSession'
+ /// otherwise.
+ virtual Expected<HandleMessageAction>
+ handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) = 0;
+
+ /// Handle a disconnection from the underlying transport. No further messages
+ /// should be sent to handleMessage after this is called.
+ /// Err may contain an Error value indicating unexpected disconnection. This
+ /// allows clients to log such errors, but no attempt should be made at
+ /// recovery (which should be handled inside the transport class, if it is
+ /// supported at all).
+ virtual void handleDisconnect(Error Err) = 0;
+};
+
+class SimpleRemoteEPCTransport {
+public:
+ virtual ~SimpleRemoteEPCTransport();
+
+ /// Called during setup of the client to indicate that the client is ready
+ /// to receive messages.
+ ///
+ /// Transport objects should not access the client until this method is
+ /// called.
+ virtual Error start() = 0;
+
+ /// Send a SimpleRemoteEPC message.
+ ///
+ /// This function may be called concurrently. Subclasses should implement
+ /// locking if required for the underlying transport.
+ virtual Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) = 0;
+
+ /// Trigger disconnection from the transport. The implementation should
+ /// respond by calling handleDisconnect on the client once disconnection
+ /// is complete. May be called more than once and from different threads.
+ virtual void disconnect() = 0;
+};
+
+/// Uses read/write on FileDescriptors for transport.
+class FDSimpleRemoteEPCTransport : public SimpleRemoteEPCTransport {
+public:
+ /// Create a FDSimpleRemoteEPCTransport using the given FDs for
+ /// reading (InFD) and writing (OutFD).
+ static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+ Create(SimpleRemoteEPCTransportClient &C, int InFD, int OutFD);
+
+ /// Create a FDSimpleRemoteEPCTransport using the given FD for both
+ /// reading and writing.
+ static Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+ Create(SimpleRemoteEPCTransportClient &C, int FD) {
+ return Create(C, FD, FD);
+ }
+
+ ~FDSimpleRemoteEPCTransport() override;
+
+ Error start() override;
+
+ Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes) override;
+
+ void disconnect() override;
+
+private:
+ FDSimpleRemoteEPCTransport(SimpleRemoteEPCTransportClient &C, int InFD,
+ int OutFD)
+ : C(C), InFD(InFD), OutFD(OutFD) {}
+
+ Error readBytes(char *Dst, size_t Size, bool *IsEOF = nullptr);
+ int writeBytes(const char *Src, size_t Size);
+ void listenLoop();
+
+ std::mutex M;
+ SimpleRemoteEPCTransportClient &C;
+ std::thread ListenerThread;
+ int InFD, OutFD;
+ std::atomic<bool> Disconnected{false};
+};
+
+struct RemoteSymbolLookupSetElement {
+ std::string Name;
+ bool Required;
+};
+
+using RemoteSymbolLookupSet = std::vector<RemoteSymbolLookupSetElement>;
+
+struct RemoteSymbolLookup {
+ uint64_t H;
+ RemoteSymbolLookupSet Symbols;
+};
+
+namespace shared {
+
+using SPSRemoteSymbolLookupSetElement = SPSTuple<SPSString, bool>;
+
+using SPSRemoteSymbolLookupSet = SPSSequence<SPSRemoteSymbolLookupSetElement>;
+
+using SPSRemoteSymbolLookup = SPSTuple<uint64_t, SPSRemoteSymbolLookupSet>;
+
+/// Tuple containing target triple, page size, and bootstrap symbols.
+using SPSSimpleRemoteEPCExecutorInfo =
+ SPSTuple<SPSString, uint64_t,
+ SPSSequence<SPSTuple<SPSString, SPSExecutorAddr>>>;
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookupSetElement,
+ RemoteSymbolLookupSetElement> {
+public:
+ static size_t size(const RemoteSymbolLookupSetElement &V) {
+ return SPSArgList<SPSString, bool>::size(V.Name, V.Required);
+ }
+
+ static size_t serialize(SPSOutputBuffer &OB,
+ const RemoteSymbolLookupSetElement &V) {
+ return SPSArgList<SPSString, bool>::serialize(OB, V.Name, V.Required);
+ }
+
+ static size_t deserialize(SPSInputBuffer &IB,
+ RemoteSymbolLookupSetElement &V) {
+ return SPSArgList<SPSString, bool>::deserialize(IB, V.Name, V.Required);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookup, RemoteSymbolLookup> {
+public:
+ static size_t size(const RemoteSymbolLookup &V) {
+ return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::size(V.H, V.Symbols);
+ }
+
+ static size_t serialize(SPSOutputBuffer &OB, const RemoteSymbolLookup &V) {
+ return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::serialize(OB, V.H,
+ V.Symbols);
+ }
+
+ static size_t deserialize(SPSInputBuffer &IB, RemoteSymbolLookup &V) {
+ return SPSArgList<uint64_t, SPSRemoteSymbolLookupSet>::deserialize(
+ IB, V.H, V.Symbols);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSSimpleRemoteEPCExecutorInfo,
+ SimpleRemoteEPCExecutorInfo> {
+public:
+ static size_t size(const SimpleRemoteEPCExecutorInfo &SI) {
+ return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::size(
+ SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const SimpleRemoteEPCExecutorInfo &SI) {
+ return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::serialize(
+ OB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, SimpleRemoteEPCExecutorInfo &SI) {
+ return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::deserialize(
+ IB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ }
+};
+
+using SPSLoadDylibSignature = SPSExpected<SPSExecutorAddr>(SPSExecutorAddr,
+ SPSString, uint64_t);
+
+using SPSLookupSymbolsSignature =
+ SPSExpected<SPSSequence<SPSSequence<SPSExecutorAddr>>>(
+ SPSExecutorAddr, SPSSequence<SPSRemoteSymbolLookup>);
+
+} // end namespace shared
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_SIMPLEREMOTEEPCUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
index a44bcd4c8064..0e8b7e7d345a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
@@ -17,6 +17,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/Support/Memory.h"
#include <vector>
@@ -24,12 +28,108 @@ namespace llvm {
namespace orc {
namespace tpctypes {
+enum WireProtectionFlags : uint8_t {
+ WPF_None = 0,
+ WPF_Read = 1U << 0,
+ WPF_Write = 1U << 1,
+ WPF_Exec = 1U << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec)
+};
+
+/// Convert from sys::Memory::ProtectionFlags
+inline WireProtectionFlags
+toWireProtectionFlags(sys::Memory::ProtectionFlags PF) {
+ WireProtectionFlags WPF = WPF_None;
+ if (PF & sys::Memory::MF_READ)
+ WPF |= WPF_Read;
+ if (PF & sys::Memory::MF_WRITE)
+ WPF |= WPF_Write;
+ if (PF & sys::Memory::MF_EXEC)
+ WPF |= WPF_Exec;
+ return WPF;
+}
+
+inline sys::Memory::ProtectionFlags
+fromWireProtectionFlags(WireProtectionFlags WPF) {
+ int PF = 0;
+ if (WPF & WPF_Read)
+ PF |= sys::Memory::MF_READ;
+ if (WPF & WPF_Write)
+ PF |= sys::Memory::MF_WRITE;
+ if (WPF & WPF_Exec)
+ PF |= sys::Memory::MF_EXEC;
+ return static_cast<sys::Memory::ProtectionFlags>(PF);
+}
+
+inline std::string getWireProtectionFlagsStr(WireProtectionFlags WPF) {
+ std::string Result;
+ Result += (WPF & WPF_Read) ? 'R' : '-';
+ Result += (WPF & WPF_Write) ? 'W' : '-';
+ Result += (WPF & WPF_Exec) ? 'X' : '-';
+ return Result;
+}
+
+struct WrapperFunctionCall {
+ ExecutorAddr Func;
+ ExecutorAddrRange ArgData;
+
+ WrapperFunctionCall() = default;
+ WrapperFunctionCall(ExecutorAddr Func, ExecutorAddr ArgData,
+ ExecutorAddrDiff ArgSize)
+ : Func(Func), ArgData(ArgData, ArgSize) {}
+ WrapperFunctionCall(ExecutorAddr Func, ExecutorAddrRange ArgData)
+ : Func(Func), ArgData(ArgData) {}
+
+ shared::WrapperFunctionResult run() {
+ using FnTy =
+ shared::CWrapperFunctionResult(const char *ArgData, size_t ArgSize);
+ return shared::WrapperFunctionResult(
+ Func.toPtr<FnTy *>()(ArgData.Start.toPtr<const char *>(),
+ static_cast<size_t>(ArgData.size().getValue())));
+ }
+
+ /// Run call and deserialize result using SPS.
+ template <typename SPSRetT, typename RetT> Error runWithSPSRet(RetT &RetVal) {
+ auto WFR = run();
+ if (const char *ErrMsg = WFR.getOutOfBandError())
+ return make_error<StringError>(ErrMsg, inconvertibleErrorCode());
+ shared::SPSInputBuffer IB(WFR.data(), WFR.size());
+ if (!shared::SPSSerializationTraits<SPSRetT, RetT>::deserialize(IB, RetVal))
+ return make_error<StringError>("Could not deserialize result from "
+ "serialized wrapper function call",
+ inconvertibleErrorCode());
+ return Error::success();
+ }
+
+ /// Overload for SPS functions returning void.
+ Error runWithSPSRet() {
+ shared::SPSEmpty E;
+ return runWithSPSRet<shared::SPSEmpty>(E);
+ }
+};
+
+struct AllocationActionsPair {
+ WrapperFunctionCall Finalize;
+ WrapperFunctionCall Deallocate;
+};
+
+struct SegFinalizeRequest {
+ WireProtectionFlags Prot;
+ ExecutorAddr Addr;
+ uint64_t Size;
+ ArrayRef<char> Content;
+};
+
+struct FinalizeRequest {
+ std::vector<SegFinalizeRequest> Segments;
+ std::vector<AllocationActionsPair> Actions;
+};
+
template <typename T> struct UIntWrite {
UIntWrite() = default;
- UIntWrite(JITTargetAddress Address, T Value)
- : Address(Address), Value(Value) {}
+ UIntWrite(ExecutorAddr Addr, T Value) : Addr(Addr), Value(Value) {}
- JITTargetAddress Address = 0;
+ ExecutorAddr Addr;
T Value = 0;
};
@@ -49,10 +149,10 @@ using UInt64Write = UIntWrite<uint64_t>;
/// For use with TargetProcessControl::MemoryAccess objects.
struct BufferWrite {
BufferWrite() = default;
- BufferWrite(JITTargetAddress Address, StringRef Buffer)
- : Address(Address), Buffer(Buffer) {}
+ BufferWrite(ExecutorAddr Addr, StringRef Buffer)
+ : Addr(Addr), Buffer(Buffer) {}
- JITTargetAddress Address = 0;
+ ExecutorAddr Addr;
StringRef Buffer;
};
@@ -62,6 +162,180 @@ using DylibHandle = JITTargetAddress;
using LookupResult = std::vector<JITTargetAddress>;
} // end namespace tpctypes
+
+namespace shared {
+
+class SPSMemoryProtectionFlags {};
+
+using SPSWrapperFunctionCall = SPSTuple<SPSExecutorAddr, SPSExecutorAddrRange>;
+
+using SPSSegFinalizeRequest =
+ SPSTuple<SPSMemoryProtectionFlags, SPSExecutorAddr, uint64_t,
+ SPSSequence<char>>;
+
+using SPSAllocationActionsPair =
+ SPSTuple<SPSWrapperFunctionCall, SPSWrapperFunctionCall>;
+
+using SPSFinalizeRequest = SPSTuple<SPSSequence<SPSSegFinalizeRequest>,
+ SPSSequence<SPSAllocationActionsPair>>;
+
+template <typename T>
+using SPSMemoryAccessUIntWrite = SPSTuple<SPSExecutorAddr, T>;
+
+using SPSMemoryAccessUInt8Write = SPSMemoryAccessUIntWrite<uint8_t>;
+using SPSMemoryAccessUInt16Write = SPSMemoryAccessUIntWrite<uint16_t>;
+using SPSMemoryAccessUInt32Write = SPSMemoryAccessUIntWrite<uint32_t>;
+using SPSMemoryAccessUInt64Write = SPSMemoryAccessUIntWrite<uint64_t>;
+
+using SPSMemoryAccessBufferWrite = SPSTuple<SPSExecutorAddr, SPSSequence<char>>;
+
+template <>
+class SPSSerializationTraits<SPSMemoryProtectionFlags,
+ tpctypes::WireProtectionFlags> {
+public:
+ static size_t size(const tpctypes::WireProtectionFlags &WPF) {
+ return SPSArgList<uint8_t>::size(static_cast<uint8_t>(WPF));
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::WireProtectionFlags &WPF) {
+ return SPSArgList<uint8_t>::serialize(OB, static_cast<uint8_t>(WPF));
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::WireProtectionFlags &WPF) {
+ uint8_t Val;
+ if (!SPSArgList<uint8_t>::deserialize(IB, Val))
+ return false;
+ WPF = static_cast<tpctypes::WireProtectionFlags>(Val);
+ return true;
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSWrapperFunctionCall,
+ tpctypes::WrapperFunctionCall> {
+ using AL = SPSWrapperFunctionCall::AsArgList;
+
+public:
+ static size_t size(const tpctypes::WrapperFunctionCall &WFC) {
+ return AL::size(WFC.Func, WFC.ArgData);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::WrapperFunctionCall &WFC) {
+ return AL::serialize(OB, WFC.Func, WFC.ArgData);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::WrapperFunctionCall &WFC) {
+ return AL::deserialize(IB, WFC.Func, WFC.ArgData);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSAllocationActionsPair,
+ tpctypes::AllocationActionsPair> {
+ using AL = SPSAllocationActionsPair::AsArgList;
+
+public:
+ static size_t size(const tpctypes::AllocationActionsPair &AAP) {
+ return AL::size(AAP.Finalize, AAP.Deallocate);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::AllocationActionsPair &AAP) {
+ return AL::serialize(OB, AAP.Finalize, AAP.Deallocate);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::AllocationActionsPair &AAP) {
+ return AL::deserialize(IB, AAP.Finalize, AAP.Deallocate);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSSegFinalizeRequest,
+ tpctypes::SegFinalizeRequest> {
+ using SFRAL = SPSSegFinalizeRequest::AsArgList;
+
+public:
+ static size_t size(const tpctypes::SegFinalizeRequest &SFR) {
+ return SFRAL::size(SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::SegFinalizeRequest &SFR) {
+ return SFRAL::serialize(OB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB,
+ tpctypes::SegFinalizeRequest &SFR) {
+ return SFRAL::deserialize(IB, SFR.Prot, SFR.Addr, SFR.Size, SFR.Content);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSFinalizeRequest, tpctypes::FinalizeRequest> {
+ using FRAL = SPSFinalizeRequest::AsArgList;
+
+public:
+ static size_t size(const tpctypes::FinalizeRequest &FR) {
+ return FRAL::size(FR.Segments, FR.Actions);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::FinalizeRequest &FR) {
+ return FRAL::serialize(OB, FR.Segments, FR.Actions);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, tpctypes::FinalizeRequest &FR) {
+ return FRAL::deserialize(IB, FR.Segments, FR.Actions);
+ }
+};
+
+template <typename T>
+class SPSSerializationTraits<SPSMemoryAccessUIntWrite<T>,
+ tpctypes::UIntWrite<T>> {
+public:
+ static size_t size(const tpctypes::UIntWrite<T> &W) {
+ return SPSTuple<SPSExecutorAddr, T>::AsArgList::size(W.Addr, W.Value);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const tpctypes::UIntWrite<T> &W) {
+ return SPSTuple<SPSExecutorAddr, T>::AsArgList::serialize(OB, W.Addr,
+ W.Value);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, tpctypes::UIntWrite<T> &W) {
+ return SPSTuple<SPSExecutorAddr, T>::AsArgList::deserialize(IB, W.Addr,
+ W.Value);
+ }
+};
+
+template <>
+class SPSSerializationTraits<SPSMemoryAccessBufferWrite,
+ tpctypes::BufferWrite> {
+public:
+ static size_t size(const tpctypes::BufferWrite &W) {
+ return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList::size(
+ W.Addr, W.Buffer);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB, const tpctypes::BufferWrite &W) {
+ return SPSTuple<SPSExecutorAddr, SPSSequence<char>>::AsArgList ::serialize(
+ OB, W.Addr, W.Buffer);
+ }
+
+ static bool deserialize(SPSInputBuffer &IB, tpctypes::BufferWrite &W) {
+ return SPSTuple<SPSExecutorAddr,
+ SPSSequence<char>>::AsArgList ::deserialize(IB, W.Addr,
+ W.Buffer);
+ }
+};
+
+
+} // end namespace shared
} // end namespace orc
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
index 2f14a1c76332..bf841b1f706b 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h
@@ -10,9 +10,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
-#define LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h"
#include "llvm/Support/Error.h"
@@ -22,24 +23,18 @@ namespace llvm {
namespace orc {
namespace shared {
-namespace detail {
-
-// DO NOT USE DIRECTLY.
// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
union CWrapperFunctionResultDataUnion {
char *ValuePtr;
char Value[sizeof(ValuePtr)];
};
-// DO NOT USE DIRECTLY.
// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
typedef struct {
CWrapperFunctionResultDataUnion Data;
size_t Size;
} CWrapperFunctionResult;
-} // end namespace detail
-
/// C++ wrapper function result: Same as CWrapperFunctionResult but
/// auto-releases memory.
class WrapperFunctionResult {
@@ -48,11 +43,11 @@ public:
WrapperFunctionResult() { init(R); }
/// Create a WrapperFunctionResult by taking ownership of a
- /// detail::CWrapperFunctionResult.
+ /// CWrapperFunctionResult.
///
/// Warning: This should only be used by clients writing wrapper-function
/// caller utilities (like TargetProcessControl).
- WrapperFunctionResult(detail::CWrapperFunctionResult R) : R(R) {
+ WrapperFunctionResult(CWrapperFunctionResult R) : R(R) {
// Reset R.
init(R);
}
@@ -77,18 +72,25 @@ public:
free(R.Data.ValuePtr);
}
- /// Release ownership of the contained detail::CWrapperFunctionResult.
+ /// Release ownership of the contained CWrapperFunctionResult.
/// Warning: Do not use -- this method will be removed in the future. It only
/// exists to temporarily support some code that will eventually be moved to
/// the ORC runtime.
- detail::CWrapperFunctionResult release() {
- detail::CWrapperFunctionResult Tmp;
+ CWrapperFunctionResult release() {
+ CWrapperFunctionResult Tmp;
init(Tmp);
std::swap(R, Tmp);
return Tmp;
}
/// Get a pointer to the data contained in this instance.
+ char *data() {
+ assert((R.Size != 0 || R.Data.ValuePtr == nullptr) &&
+ "Cannot get data for out-of-band error value");
+ return R.Size > sizeof(R.Data.Value) ? R.Data.ValuePtr : R.Data.Value;
+ }
+
+ /// Get a const pointer to the data contained in this instance.
const char *data() const {
assert((R.Size != 0 || R.Data.ValuePtr == nullptr) &&
"Cannot get data for out-of-band error value");
@@ -108,24 +110,19 @@ public:
/// Create a WrapperFunctionResult with the given size and return a pointer
/// to the underlying memory.
- static char *allocate(WrapperFunctionResult &WFR, size_t Size) {
+ static WrapperFunctionResult allocate(size_t Size) {
// Reset.
- WFR = WrapperFunctionResult();
+ WrapperFunctionResult WFR;
WFR.R.Size = Size;
- char *DataPtr;
- if (WFR.R.Size > sizeof(WFR.R.Data.Value)) {
- DataPtr = (char *)malloc(WFR.R.Size);
- WFR.R.Data.ValuePtr = DataPtr;
- } else
- DataPtr = WFR.R.Data.Value;
- return DataPtr;
+ if (WFR.R.Size > sizeof(WFR.R.Data.Value))
+ WFR.R.Data.ValuePtr = (char *)malloc(WFR.R.Size);
+ return WFR;
}
/// Copy from the given char range.
static WrapperFunctionResult copyFrom(const char *Source, size_t Size) {
- WrapperFunctionResult WFR;
- char *DataPtr = allocate(WFR, Size);
- memcpy(DataPtr, Source, Size);
+ auto WFR = allocate(Size);
+ memcpy(WFR.data(), Source, Size);
return WFR;
}
@@ -161,12 +158,12 @@ public:
}
private:
- static void init(detail::CWrapperFunctionResult &R) {
+ static void init(CWrapperFunctionResult &R) {
R.Data.ValuePtr = nullptr;
R.Size = 0;
}
- detail::CWrapperFunctionResult R;
+ CWrapperFunctionResult R;
};
namespace detail {
@@ -174,10 +171,8 @@ namespace detail {
template <typename SPSArgListT, typename... ArgTs>
WrapperFunctionResult
serializeViaSPSToWrapperFunctionResult(const ArgTs &...Args) {
- WrapperFunctionResult Result;
- char *DataPtr =
- WrapperFunctionResult::allocate(Result, SPSArgListT::size(Args...));
- SPSOutputBuffer OB(DataPtr, Result.size());
+ auto Result = WrapperFunctionResult::allocate(SPSArgListT::size(Args...));
+ SPSOutputBuffer OB(Result.data(), Result.size());
if (!SPSArgListT::serialize(OB, Args...))
return WrapperFunctionResult::createOutOfBandError(
"Error serializing arguments to blob in call");
@@ -315,6 +310,7 @@ private:
static void callAsync(HandlerT &&H,
SerializeAndSendResultT &&SerializeAndSendResult,
ArgTupleT Args, std::index_sequence<I...>) {
+ (void)Args; // Silence a buggy GCC warning.
return std::forward<HandlerT>(H)(std::move(SerializeAndSendResult),
std::move(std::get<I>(Args))...);
}
@@ -486,10 +482,16 @@ public:
}
auto SendSerializedResult = [SDR = std::move(SendDeserializedResult)](
- WrapperFunctionResult R) {
+ WrapperFunctionResult R) mutable {
RetT RetVal = detail::ResultDeserializer<SPSRetTagT, RetT>::makeValue();
detail::ResultDeserializer<SPSRetTagT, RetT>::makeSafe(RetVal);
+ if (auto *ErrMsg = R.getOutOfBandError()) {
+ SDR(make_error<StringError>(ErrMsg, inconvertibleErrorCode()),
+ std::move(RetVal));
+ return;
+ }
+
SPSInputBuffer IB(R.data(), R.size());
if (auto Err = detail::ResultDeserializer<SPSRetTagT, RetT>::deserialize(
RetVal, R.data(), R.size()))
@@ -547,12 +549,68 @@ public:
return WrapperFunction<SPSEmpty(SPSTagTs...)>::call(Caller, BE, Args...);
}
+ template <typename AsyncCallerFn, typename SendDeserializedResultFn,
+ typename... ArgTs>
+ static void callAsync(AsyncCallerFn &&Caller,
+ SendDeserializedResultFn &&SendDeserializedResult,
+ const ArgTs &...Args) {
+ WrapperFunction<SPSEmpty(SPSTagTs...)>::callAsync(
+ std::forward<AsyncCallerFn>(Caller),
+ [SDR = std::move(SendDeserializedResult)](Error SerializeErr,
+ SPSEmpty E) mutable {
+ SDR(std::move(SerializeErr));
+ },
+ Args...);
+ }
+
using WrapperFunction<SPSEmpty(SPSTagTs...)>::handle;
using WrapperFunction<SPSEmpty(SPSTagTs...)>::handleAsync;
};
+/// A function object that takes an ExecutorAddr as its first argument,
+/// casts that address to a ClassT*, then calls the given method on that
+/// pointer passing in the remaining function arguments. This utility
+/// removes some of the boilerplate from writing wrappers for method calls.
+///
+/// @code{.cpp}
+/// class MyClass {
+/// public:
+/// void myMethod(uint32_t, bool) { ... }
+/// };
+///
+/// // SPS Method signature -- note MyClass object address as first argument.
+/// using SPSMyMethodWrapperSignature =
+/// SPSTuple<SPSExecutorAddr, uint32_t, bool>;
+///
+/// WrapperFunctionResult
+/// myMethodCallWrapper(const char *ArgData, size_t ArgSize) {
+/// return WrapperFunction<SPSMyMethodWrapperSignature>::handle(
+/// ArgData, ArgSize, makeMethodWrapperHandler(&MyClass::myMethod));
+/// }
+/// @endcode
+///
+template <typename RetT, typename ClassT, typename... ArgTs>
+class MethodWrapperHandler {
+public:
+ using MethodT = RetT (ClassT::*)(ArgTs...);
+ MethodWrapperHandler(MethodT M) : M(M) {}
+ RetT operator()(ExecutorAddr ObjAddr, ArgTs &...Args) {
+ return (ObjAddr.toPtr<ClassT*>()->*M)(std::forward<ArgTs>(Args)...);
+ }
+
+private:
+ MethodT M;
+};
+
+/// Create a MethodWrapperHandler object from the given method pointer.
+template <typename RetT, typename ClassT, typename... ArgTs>
+MethodWrapperHandler<RetT, ClassT, ArgTs...>
+makeMethodWrapperHandler(RetT (ClassT::*Method)(ArgTs...)) {
+ return MethodWrapperHandler<RetT, ClassT, ArgTs...>(Method);
+}
+
} // end namespace shared
} // end namespace orc
} // end namespace llvm
-#endif // LLVM_EXECUTIONENGINE_ORC_WRAPPERFUNCTIONUTILS_H
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_WRAPPERFUNCTIONUTILS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h
new file mode 100644
index 000000000000..bd72e4535325
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h
@@ -0,0 +1,140 @@
+//===---- SimpleRemoteEPC.h - Simple remote executor control ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple remote executor process control.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
+#define LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h"
+#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+
+#include <future>
+
+namespace llvm {
+namespace orc {
+
+class SimpleRemoteEPC : public ExecutorProcessControl,
+ public SimpleRemoteEPCTransportClient {
+public:
+ /// A setup object containing callbacks to construct a memory manager and
+ /// memory access object. Both are optional. If not specified,
+ /// EPCGenericJITLinkMemoryManager and EPCGenericMemoryAccess will be used.
+ struct Setup {
+ using CreateMemoryManagerFn =
+ Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>(
+ SimpleRemoteEPC &);
+ using CreateMemoryAccessFn =
+ Expected<std::unique_ptr<MemoryAccess>>(SimpleRemoteEPC &);
+
+ unique_function<CreateMemoryManagerFn> CreateMemoryManager;
+ unique_function<CreateMemoryAccessFn> CreateMemoryAccess;
+ };
+
+ /// Create a SimpleRemoteEPC using the given transport type and args.
+ template <typename TransportT, typename... TransportTCtorArgTs>
+ static Expected<std::unique_ptr<SimpleRemoteEPC>>
+ Create(std::unique_ptr<TaskDispatcher> D, Setup S,
+ TransportTCtorArgTs &&...TransportTCtorArgs) {
+ std::unique_ptr<SimpleRemoteEPC> SREPC(
+ new SimpleRemoteEPC(std::make_shared<SymbolStringPool>(),
+ std::move(D)));
+ auto T = TransportT::Create(
+ *SREPC, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...);
+ if (!T)
+ return T.takeError();
+ SREPC->T = std::move(*T);
+ if (auto Err = SREPC->setup(std::move(S)))
+ return joinErrors(std::move(Err), SREPC->disconnect());
+ return std::move(SREPC);
+ }
+
+ SimpleRemoteEPC(const SimpleRemoteEPC &) = delete;
+ SimpleRemoteEPC &operator=(const SimpleRemoteEPC &) = delete;
+ SimpleRemoteEPC(SimpleRemoteEPC &&) = delete;
+ SimpleRemoteEPC &operator=(SimpleRemoteEPC &&) = delete;
+ ~SimpleRemoteEPC();
+
+ Expected<tpctypes::DylibHandle> loadDylib(const char *DylibPath) override;
+
+ Expected<std::vector<tpctypes::LookupResult>>
+ lookupSymbols(ArrayRef<LookupRequest> Request) override;
+
+ Expected<int32_t> runAsMain(ExecutorAddr MainFnAddr,
+ ArrayRef<std::string> Args) override;
+
+ void callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
+ ArrayRef<char> ArgBuffer) override;
+
+ Error disconnect() override;
+
+ Expected<HandleMessageAction>
+ handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) override;
+
+ void handleDisconnect(Error Err) override;
+
+private:
+ SimpleRemoteEPC(std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<TaskDispatcher> D)
+ : ExecutorProcessControl(std::move(SSP), std::move(D)) {}
+
+ static Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>
+ createDefaultMemoryManager(SimpleRemoteEPC &SREPC);
+ static Expected<std::unique_ptr<MemoryAccess>>
+ createDefaultMemoryAccess(SimpleRemoteEPC &SREPC);
+
+ Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes);
+
+ Error handleSetup(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ Error setup(Setup S);
+
+ Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ Error handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes);
+
+ uint64_t getNextSeqNo() { return NextSeqNo++; }
+ void releaseSeqNo(uint64_t SeqNo) {}
+
+ using PendingCallWrapperResultsMap =
+ DenseMap<uint64_t, IncomingWFRHandler>;
+
+ std::mutex SimpleRemoteEPCMutex;
+ std::condition_variable DisconnectCV;
+ bool Disconnected = false;
+ Error DisconnectErr = Error::success();
+
+ std::unique_ptr<SimpleRemoteEPCTransport> T;
+ std::unique_ptr<jitlink::JITLinkMemoryManager> OwnedMemMgr;
+ std::unique_ptr<MemoryAccess> OwnedMemAccess;
+
+ std::unique_ptr<EPCGenericDylibManager> DylibMgr;
+ ExecutorAddr RunAsMainAddr;
+
+ uint64_t NextSeqNo = 0;
+ PendingCallWrapperResultsMap PendingCallWrapperResults;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SIMPLEREMOTEEPC_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h
new file mode 100644
index 000000000000..32c127634b25
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h
@@ -0,0 +1,36 @@
+//===- ExecutorService.h - Provide bootstrap symbols to session -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Provides a service by supplying some set of bootstrap symbols.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+namespace llvm {
+namespace orc {
+
+class ExecutorBootstrapService {
+public:
+ virtual ~ExecutorBootstrapService();
+
+ virtual void
+ addBootstrapSymbols(StringMap<ExecutorAddr> &BootstrapSymbols) = 0;
+ virtual Error shutdown() = 0;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_EXECUTORBOOTSTRAPSERVICE_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
index 3fad98b5f178..cfb951178da6 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h
@@ -16,7 +16,7 @@
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include <cstdint>
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size);
#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_JITLOADERGDB_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
deleted file mode 100644
index 96e4341fce68..000000000000
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/OrcRPCTPCServer.h
+++ /dev/null
@@ -1,660 +0,0 @@
-//===-- OrcRPCTPCServer.h -- OrcRPCTargetProcessControl Server --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// OrcRPCTargetProcessControl server class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
-#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
-
-#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/ExecutionEngine/Orc/Shared/RawByteChannel.h"
-#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
-#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
-#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Process.h"
-
-#include <atomic>
-
-namespace llvm {
-namespace orc {
-
-namespace orcrpctpc {
-
-enum WireProtectionFlags : uint8_t {
- WPF_None = 0,
- WPF_Read = 1U << 0,
- WPF_Write = 1U << 1,
- WPF_Exec = 1U << 2,
- LLVM_MARK_AS_BITMASK_ENUM(WPF_Exec)
-};
-
-struct ExecutorProcessInfo {
- std::string Triple;
- unsigned PageSize;
- JITTargetAddress DispatchFuncAddr;
- JITTargetAddress DispatchCtxAddr;
-};
-
-/// Convert from sys::Memory::ProtectionFlags
-inline WireProtectionFlags
-toWireProtectionFlags(sys::Memory::ProtectionFlags PF) {
- WireProtectionFlags WPF = WPF_None;
- if (PF & sys::Memory::MF_READ)
- WPF |= WPF_Read;
- if (PF & sys::Memory::MF_WRITE)
- WPF |= WPF_Write;
- if (PF & sys::Memory::MF_EXEC)
- WPF |= WPF_Exec;
- return WPF;
-}
-
-inline sys::Memory::ProtectionFlags
-fromWireProtectionFlags(WireProtectionFlags WPF) {
- int PF = 0;
- if (WPF & WPF_Read)
- PF |= sys::Memory::MF_READ;
- if (WPF & WPF_Write)
- PF |= sys::Memory::MF_WRITE;
- if (WPF & WPF_Exec)
- PF |= sys::Memory::MF_EXEC;
- return static_cast<sys::Memory::ProtectionFlags>(PF);
-}
-
-struct ReserveMemRequestElement {
- WireProtectionFlags Prot = WPF_None;
- uint64_t Size = 0;
- uint64_t Alignment = 0;
-};
-
-using ReserveMemRequest = std::vector<ReserveMemRequestElement>;
-
-struct ReserveMemResultElement {
- WireProtectionFlags Prot = WPF_None;
- JITTargetAddress Address = 0;
- uint64_t AllocatedSize = 0;
-};
-
-using ReserveMemResult = std::vector<ReserveMemResultElement>;
-
-struct ReleaseOrFinalizeMemRequestElement {
- WireProtectionFlags Prot = WPF_None;
- JITTargetAddress Address = 0;
- uint64_t Size = 0;
-};
-
-using ReleaseOrFinalizeMemRequest =
- std::vector<ReleaseOrFinalizeMemRequestElement>;
-
-} // end namespace orcrpctpc
-
-namespace shared {
-
-template <> class SerializationTypeName<WrapperFunctionResult> {
-public:
- static const char *getName() { return "WrapperFunctionResult"; }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, WrapperFunctionResult, WrapperFunctionResult,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, const WrapperFunctionResult &E) {
- if (auto Err = serializeSeq(C, static_cast<uint64_t>(E.size())))
- return Err;
- if (E.size() == 0)
- return Error::success();
- return C.appendBytes(E.data(), E.size());
- }
-
- static Error deserialize(ChannelT &C, WrapperFunctionResult &E) {
- uint64_t Size;
- if (auto Err = deserializeSeq(C, Size))
- return Err;
-
- WrapperFunctionResult Tmp;
- char *Data = WrapperFunctionResult::allocate(Tmp, Size);
-
- if (auto Err = C.readBytes(Data, Size))
- return Err;
-
- E = std::move(Tmp);
-
- return Error::success();
- }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt8Write> {
-public:
- static const char *getName() { return "UInt8Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt16Write> {
-public:
- static const char *getName() { return "UInt16Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt32Write> {
-public:
- static const char *getName() { return "UInt32Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::UInt64Write> {
-public:
- static const char *getName() { return "UInt64Write"; }
-};
-
-template <> class SerializationTypeName<tpctypes::BufferWrite> {
-public:
- static const char *getName() { return "BufferWrite"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ReserveMemRequestElement> {
-public:
- static const char *getName() { return "ReserveMemRequestElement"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ReserveMemResultElement> {
-public:
- static const char *getName() { return "ReserveMemResultElement"; }
-};
-
-template <>
-class SerializationTypeName<orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
-public:
- static const char *getName() { return "ReleaseOrFinalizeMemRequestElement"; }
-};
-
-template <> class SerializationTypeName<orcrpctpc::ExecutorProcessInfo> {
-public:
- static const char *getName() { return "ExecutorProcessInfo"; }
-};
-
-template <typename ChannelT, typename WriteT>
-class SerializationTraits<
- ChannelT, WriteT, WriteT,
- std::enable_if_t<std::is_same<WriteT, tpctypes::UInt8Write>::value ||
- std::is_same<WriteT, tpctypes::UInt16Write>::value ||
- std::is_same<WriteT, tpctypes::UInt32Write>::value ||
- std::is_same<WriteT, tpctypes::UInt64Write>::value>> {
-public:
- static Error serialize(ChannelT &C, const WriteT &W) {
- return serializeSeq(C, W.Address, W.Value);
- }
- static Error deserialize(ChannelT &C, WriteT &W) {
- return deserializeSeq(C, W.Address, W.Value);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<
- ChannelT, tpctypes::BufferWrite, tpctypes::BufferWrite,
- std::enable_if_t<std::is_base_of<RawByteChannel, ChannelT>::value>> {
-public:
- static Error serialize(ChannelT &C, const tpctypes::BufferWrite &W) {
- uint64_t Size = W.Buffer.size();
- if (auto Err = serializeSeq(C, W.Address, Size))
- return Err;
-
- return C.appendBytes(W.Buffer.data(), Size);
- }
- static Error deserialize(ChannelT &C, tpctypes::BufferWrite &W) {
- JITTargetAddress Address;
- uint64_t Size;
-
- if (auto Err = deserializeSeq(C, Address, Size))
- return Err;
-
- char *Buffer = jitTargetAddressToPointer<char *>(Address);
-
- if (auto Err = C.readBytes(Buffer, Size))
- return Err;
-
- W = {Address, StringRef(Buffer, Size)};
- return Error::success();
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ReserveMemRequestElement> {
-public:
- static Error serialize(ChannelT &C,
- const orcrpctpc::ReserveMemRequestElement &E) {
- return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Size, E.Alignment);
- }
-
- static Error deserialize(ChannelT &C,
- orcrpctpc::ReserveMemRequestElement &E) {
- return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Size,
- E.Alignment);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ReserveMemResultElement> {
-public:
- static Error serialize(ChannelT &C,
- const orcrpctpc::ReserveMemResultElement &E) {
- return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address,
- E.AllocatedSize);
- }
-
- static Error deserialize(ChannelT &C, orcrpctpc::ReserveMemResultElement &E) {
- return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
- E.AllocatedSize);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT,
- orcrpctpc::ReleaseOrFinalizeMemRequestElement> {
-public:
- static Error
- serialize(ChannelT &C,
- const orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
- return serializeSeq(C, static_cast<uint8_t>(E.Prot), E.Address, E.Size);
- }
-
- static Error deserialize(ChannelT &C,
- orcrpctpc::ReleaseOrFinalizeMemRequestElement &E) {
- return deserializeSeq(C, *reinterpret_cast<uint8_t *>(&E.Prot), E.Address,
- E.Size);
- }
-};
-
-template <typename ChannelT>
-class SerializationTraits<ChannelT, orcrpctpc::ExecutorProcessInfo> {
-public:
- static Error serialize(ChannelT &C,
- const orcrpctpc::ExecutorProcessInfo &EPI) {
- return serializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr,
- EPI.DispatchCtxAddr);
- }
-
- static Error deserialize(ChannelT &C, orcrpctpc::ExecutorProcessInfo &EPI) {
- return deserializeSeq(C, EPI.Triple, EPI.PageSize, EPI.DispatchFuncAddr,
- EPI.DispatchCtxAddr);
- }
-};
-
-} // end namespace shared
-
-namespace orcrpctpc {
-
-using RemoteSymbolLookupSet = std::vector<std::pair<std::string, bool>>;
-using RemoteLookupRequest =
- std::pair<tpctypes::DylibHandle, RemoteSymbolLookupSet>;
-
-class GetExecutorProcessInfo
- : public shared::RPCFunction<GetExecutorProcessInfo,
- orcrpctpc::ExecutorProcessInfo()> {
-public:
- static const char *getName() { return "GetJITDispatchInfo"; }
-};
-
-class ReserveMem
- : public shared::RPCFunction<ReserveMem, Expected<ReserveMemResult>(
- ReserveMemRequest)> {
-public:
- static const char *getName() { return "ReserveMem"; }
-};
-
-class FinalizeMem
- : public shared::RPCFunction<FinalizeMem,
- Error(ReleaseOrFinalizeMemRequest)> {
-public:
- static const char *getName() { return "FinalizeMem"; }
-};
-
-class ReleaseMem
- : public shared::RPCFunction<ReleaseMem,
- Error(ReleaseOrFinalizeMemRequest)> {
-public:
- static const char *getName() { return "ReleaseMem"; }
-};
-
-class WriteUInt8s
- : public shared::RPCFunction<WriteUInt8s,
- Error(std::vector<tpctypes::UInt8Write>)> {
-public:
- static const char *getName() { return "WriteUInt8s"; }
-};
-
-class WriteUInt16s
- : public shared::RPCFunction<WriteUInt16s,
- Error(std::vector<tpctypes::UInt16Write>)> {
-public:
- static const char *getName() { return "WriteUInt16s"; }
-};
-
-class WriteUInt32s
- : public shared::RPCFunction<WriteUInt32s,
- Error(std::vector<tpctypes::UInt32Write>)> {
-public:
- static const char *getName() { return "WriteUInt32s"; }
-};
-
-class WriteUInt64s
- : public shared::RPCFunction<WriteUInt64s,
- Error(std::vector<tpctypes::UInt64Write>)> {
-public:
- static const char *getName() { return "WriteUInt64s"; }
-};
-
-class WriteBuffers
- : public shared::RPCFunction<WriteBuffers,
- Error(std::vector<tpctypes::BufferWrite>)> {
-public:
- static const char *getName() { return "WriteBuffers"; }
-};
-
-class LoadDylib
- : public shared::RPCFunction<LoadDylib, Expected<tpctypes::DylibHandle>(
- std::string DylibPath)> {
-public:
- static const char *getName() { return "LoadDylib"; }
-};
-
-class LookupSymbols
- : public shared::RPCFunction<LookupSymbols,
- Expected<std::vector<tpctypes::LookupResult>>(
- std::vector<RemoteLookupRequest>)> {
-public:
- static const char *getName() { return "LookupSymbols"; }
-};
-
-class RunMain
- : public shared::RPCFunction<RunMain,
- int64_t(JITTargetAddress MainAddr,
- std::vector<std::string> Args)> {
-public:
- static const char *getName() { return "RunMain"; }
-};
-
-class RunWrapper
- : public shared::RPCFunction<RunWrapper,
- shared::WrapperFunctionResult(
- JITTargetAddress, std::vector<uint8_t>)> {
-public:
- static const char *getName() { return "RunWrapper"; }
-};
-
-class CloseConnection : public shared::RPCFunction<CloseConnection, void()> {
-public:
- static const char *getName() { return "CloseConnection"; }
-};
-
-} // end namespace orcrpctpc
-
-/// TargetProcessControl for a process connected via an ORC RPC Endpoint.
-template <typename RPCEndpointT> class OrcRPCTPCServer {
-private:
- using ThisT = OrcRPCTPCServer<RPCEndpointT>;
-
-public:
- /// Create an OrcRPCTPCServer from the given endpoint.
- OrcRPCTPCServer(RPCEndpointT &EP) : EP(EP) {
-
- TripleStr = sys::getProcessTriple();
- PageSize = sys::Process::getPageSizeEstimate();
-
- EP.template addHandler<orcrpctpc::GetExecutorProcessInfo>(
- *this, &ThisT::getExecutorProcessInfo);
- EP.template addHandler<orcrpctpc::ReserveMem>(*this, &ThisT::reserveMemory);
- EP.template addHandler<orcrpctpc::FinalizeMem>(*this,
- &ThisT::finalizeMemory);
- EP.template addHandler<orcrpctpc::ReleaseMem>(*this, &ThisT::releaseMemory);
-
- EP.template addHandler<orcrpctpc::WriteUInt8s>(
- handleWriteUInt<tpctypes::UInt8Write>);
- EP.template addHandler<orcrpctpc::WriteUInt16s>(
- handleWriteUInt<tpctypes::UInt16Write>);
- EP.template addHandler<orcrpctpc::WriteUInt32s>(
- handleWriteUInt<tpctypes::UInt32Write>);
- EP.template addHandler<orcrpctpc::WriteUInt64s>(
- handleWriteUInt<tpctypes::UInt64Write>);
- EP.template addHandler<orcrpctpc::WriteBuffers>(handleWriteBuffer);
-
- EP.template addHandler<orcrpctpc::LoadDylib>(*this, &ThisT::loadDylib);
- EP.template addHandler<orcrpctpc::LookupSymbols>(*this,
- &ThisT::lookupSymbols);
-
- EP.template addHandler<orcrpctpc::RunMain>(*this, &ThisT::runMain);
- EP.template addHandler<orcrpctpc::RunWrapper>(*this, &ThisT::runWrapper);
-
- EP.template addHandler<orcrpctpc::CloseConnection>(*this,
- &ThisT::closeConnection);
- }
-
- /// Set the ProgramName to be used as the first argv element when running
- /// functions via runAsMain.
- void setProgramName(Optional<std::string> ProgramName = None) {
- this->ProgramName = std::move(ProgramName);
- }
-
- /// Get the RPC endpoint for this server.
- RPCEndpointT &getEndpoint() { return EP; }
-
- /// Run the server loop.
- Error run() {
- while (!Finished) {
- if (auto Err = EP.handleOne())
- return Err;
- }
- return Error::success();
- }
-
- Expected<shared::WrapperFunctionResult>
- runWrapperInJIT(JITTargetAddress FunctionId, ArrayRef<char> ArgBuffer) {
- return EP.template callB<orcrpctpc::RunWrapper>(
- FunctionId,
- ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(ArgBuffer.data()),
- ArgBuffer.size()));
- }
-
-private:
- static shared::detail::CWrapperFunctionResult
- jitDispatchViaOrcRPCTPCServer(void *Ctx, const void *FnTag, const char *Data,
- size_t Size) {
- assert(Ctx && "Attempt to dispatch with null context ptr");
- auto R = static_cast<ThisT *>(Ctx)->runWrapperInJIT(
- pointerToJITTargetAddress(FnTag), {Data, Size});
- if (!R) {
- auto ErrMsg = toString(R.takeError());
- return shared::WrapperFunctionResult::createOutOfBandError(ErrMsg.data())
- .release();
- }
- return R->release();
- }
-
- orcrpctpc::ExecutorProcessInfo getExecutorProcessInfo() {
- return {TripleStr, static_cast<uint32_t>(PageSize),
- pointerToJITTargetAddress(jitDispatchViaOrcRPCTPCServer),
- pointerToJITTargetAddress(this)};
- }
-
- template <typename WriteT>
- static void handleWriteUInt(const std::vector<WriteT> &Ws) {
- using ValueT = decltype(std::declval<WriteT>().Value);
- for (auto &W : Ws)
- *jitTargetAddressToPointer<ValueT *>(W.Address) = W.Value;
- }
-
- std::string getProtStr(orcrpctpc::WireProtectionFlags WPF) {
- std::string Result;
- Result += (WPF & orcrpctpc::WPF_Read) ? 'R' : '-';
- Result += (WPF & orcrpctpc::WPF_Write) ? 'W' : '-';
- Result += (WPF & orcrpctpc::WPF_Exec) ? 'X' : '-';
- return Result;
- }
-
- static void handleWriteBuffer(const std::vector<tpctypes::BufferWrite> &Ws) {
- for (auto &W : Ws) {
- memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
- W.Buffer.size());
- }
- }
-
- Expected<orcrpctpc::ReserveMemResult>
- reserveMemory(const orcrpctpc::ReserveMemRequest &Request) {
- orcrpctpc::ReserveMemResult Allocs;
- auto PF = sys::Memory::MF_READ | sys::Memory::MF_WRITE;
-
- uint64_t TotalSize = 0;
-
- for (const auto &E : Request) {
- uint64_t Size = alignTo(E.Size, PageSize);
- uint16_t Align = E.Alignment;
-
- if ((Align > PageSize) || (PageSize % Align))
- return make_error<StringError>(
- "Page alignmen does not satisfy requested alignment",
- inconvertibleErrorCode());
-
- TotalSize += Size;
- }
-
- // Allocate memory slab.
- std::error_code EC;
- auto MB = sys::Memory::allocateMappedMemory(TotalSize, nullptr, PF, EC);
- if (EC)
- return make_error<StringError>("Unable to allocate memory: " +
- EC.message(),
- inconvertibleErrorCode());
-
- // Zero-fill the whole thing.
- memset(MB.base(), 0, MB.allocatedSize());
-
- // Carve up sections to return.
- uint64_t SectionBase = 0;
- for (const auto &E : Request) {
- uint64_t SectionSize = alignTo(E.Size, PageSize);
- Allocs.push_back({E.Prot,
- pointerToJITTargetAddress(MB.base()) + SectionBase,
- SectionSize});
- SectionBase += SectionSize;
- }
-
- return Allocs;
- }
-
- Error finalizeMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &FMR) {
- for (const auto &E : FMR) {
- sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
-
- auto PF = orcrpctpc::fromWireProtectionFlags(E.Prot);
- if (auto EC =
- sys::Memory::protectMappedMemory(MB, static_cast<unsigned>(PF)))
- return make_error<StringError>("error protecting memory: " +
- EC.message(),
- inconvertibleErrorCode());
- }
- return Error::success();
- }
-
- Error releaseMemory(const orcrpctpc::ReleaseOrFinalizeMemRequest &RMR) {
- for (const auto &E : RMR) {
- sys::MemoryBlock MB(jitTargetAddressToPointer<void *>(E.Address), E.Size);
-
- if (auto EC = sys::Memory::releaseMappedMemory(MB))
- return make_error<StringError>("error release memory: " + EC.message(),
- inconvertibleErrorCode());
- }
- return Error::success();
- }
-
- Expected<tpctypes::DylibHandle> loadDylib(const std::string &Path) {
- std::string ErrMsg;
- const char *DLPath = !Path.empty() ? Path.c_str() : nullptr;
- auto DL = sys::DynamicLibrary::getPermanentLibrary(DLPath, &ErrMsg);
- if (!DL.isValid())
- return make_error<StringError>(std::move(ErrMsg),
- inconvertibleErrorCode());
-
- tpctypes::DylibHandle H = Dylibs.size();
- Dylibs[H] = std::move(DL);
- return H;
- }
-
- Expected<std::vector<tpctypes::LookupResult>>
- lookupSymbols(const std::vector<orcrpctpc::RemoteLookupRequest> &Request) {
- std::vector<tpctypes::LookupResult> Result;
-
- for (const auto &E : Request) {
- auto I = Dylibs.find(E.first);
- if (I == Dylibs.end())
- return make_error<StringError>("Unrecognized handle",
- inconvertibleErrorCode());
- auto &DL = I->second;
- Result.push_back({});
-
- for (const auto &KV : E.second) {
- auto &SymString = KV.first;
- bool WeakReference = KV.second;
-
- const char *Sym = SymString.c_str();
-#ifdef __APPLE__
- if (*Sym == '_')
- ++Sym;
-#endif
-
- void *Addr = DL.getAddressOfSymbol(Sym);
- if (!Addr && !WeakReference)
- return make_error<StringError>(Twine("Missing definition for ") + Sym,
- inconvertibleErrorCode());
-
- Result.back().push_back(pointerToJITTargetAddress(Addr));
- }
- }
-
- return Result;
- }
-
- int64_t runMain(JITTargetAddress MainFnAddr,
- const std::vector<std::string> &Args) {
- Optional<StringRef> ProgramNameOverride;
- if (ProgramName)
- ProgramNameOverride = *ProgramName;
-
- return runAsMain(
- jitTargetAddressToFunction<int (*)(int, char *[])>(MainFnAddr), Args,
- ProgramNameOverride);
- }
-
- shared::WrapperFunctionResult
- runWrapper(JITTargetAddress WrapperFnAddr,
- const std::vector<uint8_t> &ArgBuffer) {
- using WrapperFnTy = shared::detail::CWrapperFunctionResult (*)(
- const char *Data, uint64_t Size);
- auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr);
- return WrapperFn(reinterpret_cast<const char *>(ArgBuffer.data()),
- ArgBuffer.size());
- }
-
- void closeConnection() { Finished = true; }
-
- std::string TripleStr;
- uint64_t PageSize = 0;
- Optional<std::string> ProgramName;
- RPCEndpointT &EP;
- std::atomic<bool> Finished{false};
- DenseMap<tpctypes::DylibHandle, sys::DynamicLibrary> Dylibs;
-};
-
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRPCTPCSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
index 3b4aabb90371..735aa53e41fd 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h
@@ -33,10 +33,26 @@ Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
} // end namespace orc
} // end namespace llvm
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+/// An eh-frame registration utility suitable for use as a support function
+/// call. This function expects the direct address and size of the eh-frame
+/// section to register as its arguments (it does not treat its arguments as
+/// pointers to an SPS-serialized arg buffer).
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size);
+
+/// An eh-frame deregistration utility suitable for use as a support function
+/// call. This function expects the direct address and size of the eh-frame
+/// section to register as its arguments (it does not treat its arguments as
+/// pointers to an SPS-serialized arg buffer).
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_deregisterEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size);
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_registerEHFrameSectionWrapper(const char *Data, uint64_t Size);
-extern "C" llvm::orc::shared::detail::CWrapperFunctionResult
+extern "C" llvm::orc::shared::CWrapperFunctionResult
llvm_orc_deregisterEHFrameSectionWrapper(const char *Data, uint64_t Size);
#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_REGISTEREHFRAMES_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h
new file mode 100644
index 000000000000..cbab234f8a2d
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h
@@ -0,0 +1,64 @@
+//===--------------- SimpleExecutorDylibManager.h ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple dynamic library management class. Allows dynamic libraries to be
+// loaded and searched.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+/// Simple page-based allocator.
+class SimpleExecutorDylibManager : public ExecutorBootstrapService {
+public:
+ virtual ~SimpleExecutorDylibManager();
+
+ Expected<tpctypes::DylibHandle> open(const std::string &Path, uint64_t Mode);
+ Expected<std::vector<ExecutorAddr>> lookup(tpctypes::DylibHandle H,
+ const RemoteSymbolLookupSet &L);
+
+ Error shutdown() override;
+ void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
+
+private:
+ using DylibsMap = DenseMap<uint64_t, sys::DynamicLibrary>;
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ openWrapper(const char *ArgData, size_t ArgSize);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ lookupWrapper(const char *ArgData, size_t ArgSize);
+
+ std::mutex M;
+ uint64_t NextId = 0;
+ DylibsMap Dylibs;
+};
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORDYLIBMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
new file mode 100644
index 000000000000..6858f6d4db6e
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h
@@ -0,0 +1,70 @@
+//===---------------- SimpleExecutorMemoryManager.h -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple allocator class suitable for basic remote-JIT use.
+//
+// FIXME: The functionality in this file should be moved to the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/Support/Error.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+/// Simple page-based allocator.
+class SimpleExecutorMemoryManager : public ExecutorBootstrapService {
+public:
+ virtual ~SimpleExecutorMemoryManager();
+
+ Expected<ExecutorAddr> allocate(uint64_t Size);
+ Error finalize(tpctypes::FinalizeRequest &FR);
+ Error deallocate(const std::vector<ExecutorAddr> &Bases);
+
+ Error shutdown() override;
+ void addBootstrapSymbols(StringMap<ExecutorAddr> &M) override;
+
+private:
+ struct Allocation {
+ size_t Size = 0;
+ std::vector<tpctypes::WrapperFunctionCall> DeallocationActions;
+ };
+
+ using AllocationsMap = DenseMap<void *, Allocation>;
+
+ Error deallocateImpl(void *Base, Allocation &A);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ reserveWrapper(const char *ArgData, size_t ArgSize);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ finalizeWrapper(const char *ArgData, size_t ArgSize);
+
+ static llvm::orc::shared::CWrapperFunctionResult
+ deallocateWrapper(const char *ArgData, size_t ArgSize);
+
+ std::mutex M;
+ AllocationsMap Allocations;
+};
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEEXECUTORMEMORYMANAGER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
new file mode 100644
index 000000000000..afd3d39dbb53
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
@@ -0,0 +1,182 @@
+//===---- SimpleRemoteEPCServer.h - EPC over abstract channel ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// EPC over simple abstract channel.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Error.h"
+
+#include <condition_variable>
+#include <future>
+#include <memory>
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+/// A simple EPC server implementation.
+class SimpleRemoteEPCServer : public SimpleRemoteEPCTransportClient {
+public:
+ using ReportErrorFunction = unique_function<void(Error)>;
+
+ /// Dispatches calls to runWrapper.
+ class Dispatcher {
+ public:
+ virtual ~Dispatcher();
+ virtual void dispatch(unique_function<void()> Work) = 0;
+ virtual void shutdown() = 0;
+ };
+
+#if LLVM_ENABLE_THREADS
+ class ThreadDispatcher : public Dispatcher {
+ public:
+ void dispatch(unique_function<void()> Work) override;
+ void shutdown() override;
+
+ private:
+ std::mutex DispatchMutex;
+ bool Running = true;
+ size_t Outstanding = 0;
+ std::condition_variable OutstandingCV;
+ };
+#endif
+
+ class Setup {
+ friend class SimpleRemoteEPCServer;
+
+ public:
+ SimpleRemoteEPCServer &server() { return S; }
+ StringMap<ExecutorAddr> &bootstrapSymbols() { return BootstrapSymbols; }
+ std::vector<std::unique_ptr<ExecutorBootstrapService>> &services() {
+ return Services;
+ }
+ void setDispatcher(std::unique_ptr<Dispatcher> D) { S.D = std::move(D); }
+ void setErrorReporter(unique_function<void(Error)> ReportError) {
+ S.ReportError = std::move(ReportError);
+ }
+
+ private:
+ Setup(SimpleRemoteEPCServer &S) : S(S) {}
+ SimpleRemoteEPCServer &S;
+ StringMap<ExecutorAddr> BootstrapSymbols;
+ std::vector<std::unique_ptr<ExecutorBootstrapService>> Services;
+ };
+
+ static StringMap<ExecutorAddr> defaultBootstrapSymbols();
+
+ template <typename TransportT, typename... TransportTCtorArgTs>
+ static Expected<std::unique_ptr<SimpleRemoteEPCServer>>
+ Create(unique_function<Error(Setup &S)> SetupFunction,
+ TransportTCtorArgTs &&...TransportTCtorArgs) {
+ auto Server = std::make_unique<SimpleRemoteEPCServer>();
+ Setup S(*Server);
+ if (auto Err = SetupFunction(S))
+ return std::move(Err);
+
+ // Set ReportError up-front so that it can be used if construction
+ // process fails.
+ if (!Server->ReportError)
+ Server->ReportError = [](Error Err) {
+ logAllUnhandledErrors(std::move(Err), errs(), "SimpleRemoteEPCServer ");
+ };
+
+ // Attempt to create transport.
+ auto T = TransportT::Create(
+ *Server, std::forward<TransportTCtorArgTs>(TransportTCtorArgs)...);
+ if (!T)
+ return T.takeError();
+ Server->T = std::move(*T);
+ if (auto Err = Server->T->start())
+ return std::move(Err);
+
+ // If transport creation succeeds then start up services.
+ Server->Services = std::move(S.services());
+ Server->Services.push_back(
+ std::make_unique<rt_bootstrap::SimpleExecutorDylibManager>());
+ for (auto &Service : Server->Services)
+ Service->addBootstrapSymbols(S.bootstrapSymbols());
+
+ if (auto Err = Server->sendSetupMessage(std::move(S.BootstrapSymbols)))
+ return std::move(Err);
+ return std::move(Server);
+ }
+
+ /// Set an error reporter for this server.
+ void setErrorReporter(ReportErrorFunction ReportError) {
+ this->ReportError = std::move(ReportError);
+ }
+
+ /// Call to handle an incoming message.
+ ///
+ /// Returns 'Disconnect' if the message is a 'detach' message from the remote
+ /// otherwise returns 'Continue'. If the server has moved to an error state,
+ /// returns an error, which should be reported and treated as a 'Disconnect'.
+ Expected<HandleMessageAction>
+ handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) override;
+
+ Error waitForDisconnect();
+
+ void handleDisconnect(Error Err) override;
+
+private:
+ Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr, ArrayRef<char> ArgBytes);
+
+ Error sendSetupMessage(StringMap<ExecutorAddr> BootstrapSymbols);
+
+ Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+ void handleCallWrapper(uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes);
+
+ shared::WrapperFunctionResult
+ doJITDispatch(const void *FnTag, const char *ArgData, size_t ArgSize);
+
+ static shared::CWrapperFunctionResult jitDispatchEntry(void *DispatchCtx,
+ const void *FnTag,
+ const char *ArgData,
+ size_t ArgSize);
+
+ uint64_t getNextSeqNo() { return NextSeqNo++; }
+ void releaseSeqNo(uint64_t) {}
+
+ using PendingJITDispatchResultsMap =
+ DenseMap<uint64_t, std::promise<shared::WrapperFunctionResult> *>;
+
+ std::mutex ServerStateMutex;
+ std::condition_variable ShutdownCV;
+ enum { ServerRunning, ServerShuttingDown, ServerShutDown } RunState;
+ Error ShutdownErr = Error::success();
+ std::unique_ptr<SimpleRemoteEPCTransport> T;
+ std::unique_ptr<Dispatcher> D;
+ std::vector<std::unique_ptr<ExecutorBootstrapService>> Services;
+ ReportErrorFunction ReportError;
+
+ uint64_t NextSeqNo = 0;
+ PendingJITDispatchResultsMap PendingJITDispatchResults;
+ std::vector<sys::DynamicLibrary> Dylibs;
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_SIMPLEREMOTEEPCSERVER_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
new file mode 100644
index 000000000000..c57264e59655
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h
@@ -0,0 +1,131 @@
+//===--------- TaskDispatch.h - ORC task dispatch utils ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Task and TaskDispatch classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
+#define LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ExtensibleRTTI.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cassert>
+#include <string>
+
+#if LLVM_ENABLE_THREADS
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#endif
+
+namespace llvm {
+namespace orc {
+
+/// Represents an abstract task for ORC to run.
+class Task : public RTTIExtends<Task, RTTIRoot> {
+public:
+ static char ID;
+
+ virtual ~Task() {}
+
+ /// Description of the task to be performed. Used for logging.
+ virtual void printDescription(raw_ostream &OS) = 0;
+
+ /// Run the task.
+ virtual void run() = 0;
+
+private:
+ void anchor() override;
+};
+
+/// Base class for generic tasks.
+class GenericNamedTask : public RTTIExtends<GenericNamedTask, Task> {
+public:
+ static char ID;
+ static const char *DefaultDescription;
+};
+
+/// Generic task implementation.
+template <typename FnT> class GenericNamedTaskImpl : public GenericNamedTask {
+public:
+ GenericNamedTaskImpl(FnT &&Fn, std::string DescBuffer)
+ : Fn(std::forward<FnT>(Fn)), Desc(DescBuffer.c_str()),
+ DescBuffer(std::move(DescBuffer)) {}
+ GenericNamedTaskImpl(FnT &&Fn, const char *Desc)
+ : Fn(std::forward<FnT>(Fn)), Desc(Desc) {
+ assert(Desc && "Description cannot be null");
+ }
+ void printDescription(raw_ostream &OS) override { OS << Desc; }
+ void run() override { Fn(); }
+
+private:
+ FnT Fn;
+ const char *Desc;
+ std::string DescBuffer;
+};
+
+/// Create a generic named task from a std::string description.
+template <typename FnT>
+std::unique_ptr<GenericNamedTask> makeGenericNamedTask(FnT &&Fn,
+ std::string Desc) {
+ return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn),
+ std::move(Desc));
+}
+
+/// Create a generic named task from a const char * description.
+template <typename FnT>
+std::unique_ptr<GenericNamedTask>
+makeGenericNamedTask(FnT &&Fn, const char *Desc = nullptr) {
+ if (!Desc)
+ Desc = GenericNamedTask::DefaultDescription;
+ return std::make_unique<GenericNamedTaskImpl<FnT>>(std::forward<FnT>(Fn),
+ Desc);
+}
+
+/// Abstract base for classes that dispatch ORC Tasks.
+class TaskDispatcher {
+public:
+ virtual ~TaskDispatcher();
+
+ /// Run the given task.
+ virtual void dispatch(std::unique_ptr<Task> T) = 0;
+
+ /// Called by ExecutionSession. Waits until all tasks have completed.
+ virtual void shutdown() = 0;
+};
+
+/// Runs all tasks on the current thread.
+class InPlaceTaskDispatcher : public TaskDispatcher {
+public:
+ void dispatch(std::unique_ptr<Task> T) override;
+ void shutdown() override;
+};
+
+#if LLVM_ENABLE_THREADS
+
+class DynamicThreadPoolTaskDispatcher : public TaskDispatcher {
+public:
+ void dispatch(std::unique_ptr<Task> T) override;
+ void shutdown() override;
+private:
+ std::mutex DispatchMutex;
+ bool Running = true;
+ size_t Outstanding = 0;
+ std::condition_variable OutstandingCV;
+};
+
+#endif // LLVM_ENABLE_THREADS
+
+} // End namespace orc
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TASKDISPATCH_H
diff --git a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h b/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
deleted file mode 100644
index 6cca1933f39f..000000000000
--- a/llvm/include/llvm/ExecutionEngine/OrcMCJITReplacement.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===---- OrcMCJITReplacement.h - Orc-based MCJIT replacement ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file forces OrcMCJITReplacement to link in on certain operating systems.
-// (Windows).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H
-#define LLVM_EXECUTIONENGINE_ORCMCJITREPLACEMENT_H
-
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include <cstdlib>
-
-extern "C" void LLVMLinkInOrcMCJITReplacement();
-
-namespace {
- struct ForceOrcMCJITReplacementLinking {
- ForceOrcMCJITReplacementLinking() {
- // We must reference OrcMCJITReplacement in such a way that compilers will
- // not delete it all as dead code, even with whole program optimization,
- // yet is effectively a NO-OP. As the compiler isn't smart enough to know
- // that getenv() never returns -1, this will do the job.
- if (std::getenv("bar") != (char*) -1)
- return;
-
- LLVMLinkInOrcMCJITReplacement();
- }
- } ForceOrcMCJITReplacementLinking;
-}
-
-#endif
diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index 128c9967a596..c434b45077a3 100644
--- a/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -112,6 +112,20 @@ public:
StringRef SectionName,
bool IsReadOnly) = 0;
+ /// An allocated TLS section
+ struct TLSSection {
+ /// The pointer to the initialization image
+ uint8_t *InitializationImage;
+ /// The TLS offset
+ intptr_t Offset;
+ };
+
+ /// Allocate a memory block of (at least) the given size to be used for
+ /// thread-local storage (TLS).
+ virtual TLSSection allocateTLSSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName);
+
/// Inform the memory manager about the total amount of memory required to
/// allocate all sections to be loaded:
/// \p CodeSize - the total size of all code sections
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 3dc6194c7830..5ee379b7fcad 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -144,6 +144,26 @@ def OMPC_Schedule : Clause<"schedule"> {
];
}
+def OMP_MEMORY_ORDER_SeqCst : ClauseVal<"seq_cst", 1, 1> {}
+def OMP_MEMORY_ORDER_AcqRel : ClauseVal<"acq_rel", 2, 1> {}
+def OMP_MEMORY_ORDER_Acquire : ClauseVal<"acquire", 3, 1> {}
+def OMP_MEMORY_ORDER_Release : ClauseVal<"release", 4, 1> {}
+def OMP_MEMORY_ORDER_Relaxed : ClauseVal<"relaxed", 5, 1> {}
+def OMP_MEMORY_ORDER_Default : ClauseVal<"default", 6, 0> {
+ let isDefault = 1;
+}
+def OMPC_MemoryOrder : Clause<"memory_order"> {
+ let enumClauseValue = "MemoryOrderKind";
+ let allowedClauseValues = [
+ OMP_MEMORY_ORDER_SeqCst,
+ OMP_MEMORY_ORDER_AcqRel,
+ OMP_MEMORY_ORDER_Acquire,
+ OMP_MEMORY_ORDER_Release,
+ OMP_MEMORY_ORDER_Relaxed,
+ OMP_MEMORY_ORDER_Default
+ ];
+}
+
def OMPC_Ordered : Clause<"ordered"> {
let clangClass = "OMPOrderedClause";
let flangClass = "ScalarIntConstantExpr";
@@ -261,13 +281,17 @@ def OMPC_Allocate : Clause<"allocate"> {
}
def OMPC_NonTemporal : Clause<"nontemporal"> {
let clangClass = "OMPNontemporalClause";
+ let flangClass = "Name";
+ let isValueList = true;
}
-def OMP_ORDER_concurrent : ClauseVal<"default",2,0> { let isDefault = 1; }
+def OMP_ORDER_concurrent : ClauseVal<"concurrent",1,1> {}
+def OMP_ORDER_unknown : ClauseVal<"unknown",2,0> { let isDefault = 1; }
def OMPC_Order : Clause<"order"> {
let clangClass = "OMPOrderClause";
let enumClauseValue = "OrderKind";
let allowedClauseValues = [
+ OMP_ORDER_unknown,
OMP_ORDER_concurrent
];
}
@@ -312,6 +336,8 @@ def OMPC_Uniform : Clause<"uniform"> {
}
def OMPC_DeviceType : Clause<"device_type"> {}
def OMPC_Match : Clause<"match"> {}
+def OMPC_AdjustArgs : Clause<"adjust_args"> { }
+def OMPC_AppendArgs : Clause<"append_args"> { }
def OMPC_Depobj : Clause<"depobj"> {
let clangClass = "OMPDepobjClause";
let isImplicit = true;
@@ -337,6 +363,14 @@ def OMPC_Filter : Clause<"filter"> {
let clangClass = "OMPFilterClause";
let flangClass = "ScalarIntExpr";
}
+def OMPC_Align : Clause<"align"> {
+ let clangClass = "OMPAlignClause";
+}
+def OMPC_When: Clause<"when"> {}
+
+def OMPC_Bind : Clause<"bind"> {
+ let clangClass = "OMPBindClause";
+}
//===----------------------------------------------------------------------===//
// Definition of OpenMP directives
@@ -473,8 +507,8 @@ def OMP_TaskWait : Directive<"taskwait"> {
}
def OMP_TaskGroup : Directive<"taskgroup"> {
let allowedClauses = [
- VersionedClause<OMPC_TaskReduction>,
- VersionedClause<OMPC_Allocate>
+ VersionedClause<OMPC_TaskReduction, 50>,
+ VersionedClause<OMPC_Allocate, 50>
];
}
def OMP_Flush : Directive<"flush"> {
@@ -489,10 +523,12 @@ def OMP_Flush : Directive<"flush"> {
}
def OMP_Ordered : Directive<"ordered"> {
let allowedClauses = [
- VersionedClause<OMPC_Threads>,
- VersionedClause<OMPC_Simd>,
VersionedClause<OMPC_Depend>
];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Threads>,
+ VersionedClause<OMPC_Simd>
+ ];
}
def OMP_Atomic : Directive<"atomic"> {
let allowedClauses = [
@@ -1506,13 +1542,18 @@ def OMP_TargetTeamsDistributeSimd :
}
def OMP_Allocate : Directive<"allocate"> {
let allowedOnceClauses = [
- VersionedClause<OMPC_Allocator>
+ VersionedClause<OMPC_Allocator>,
+ VersionedClause<OMPC_Align, 51>
];
}
def OMP_DeclareVariant : Directive<"declare variant"> {
let allowedClauses = [
VersionedClause<OMPC_Match>
];
+ let allowedExclusiveClauses = [
+ VersionedClause<OMPC_AdjustArgs, 51>,
+ VersionedClause<OMPC_AppendArgs, 51>
+ ];
}
def OMP_MasterTaskloop : Directive<"master taskloop"> {
let allowedClauses = [
@@ -1699,6 +1740,22 @@ def OMP_masked : Directive<"masked"> {
VersionedClause<OMPC_Filter>
];
}
+def OMP_loop : Directive<"loop"> {
+ let allowedClauses = [
+ VersionedClause<OMPC_LastPrivate>,
+ VersionedClause<OMPC_Private>,
+ VersionedClause<OMPC_Reduction>,
+ ];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_Bind, 50>,
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Order>,
+ ];
+}
+def OMP_Metadirective : Directive<"metadirective"> {
+ let allowedClauses = [VersionedClause<OMPC_When>];
+ let allowedOnceClauses = [VersionedClause<OMPC_Default>];
+}
def OMP_Unknown : Directive<"unknown"> {
let isDefault = true;
}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index d174cc8992dd..2fec3e7e4230 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -128,6 +128,14 @@ enum class OMPScheduleType {
LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ ModifierMask)
};
+enum OMPTgtExecModeFlags : int8_t {
+ OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
+ OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
+ OMP_TGT_EXEC_MODE_GENERIC_SPMD =
+ OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
+ LLVM_MARK_AS_BITMASK_ENUM(/* LargestValue */ OMP_TGT_EXEC_MODE_GENERIC_SPMD)
+};
+
} // end namespace omp
} // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 0b6aed1e9e12..89f5de229b3b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -29,100 +29,89 @@ namespace omp {
///
/// Example usage in clang:
/// const unsigned slot_size =
-/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+/// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
///
/// Example usage in libomptarget/deviceRTLs:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #ifdef __AMDGPU__
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// #else
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// #endif
/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget hsa plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget cuda plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
-enum GVIDX {
- /// The maximum number of workers in a kernel.
- /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
- GV_Threads,
+
+struct GV {
/// The size reserved for data in a shared memory slot.
- GV_Slot_Size,
+ const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp.
- GV_Warp_Size,
- /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
- /// for NVPTX.
- GV_Warp_Size_32,
- /// The number of bits required to represent the max number of threads in warp
- GV_Warp_Size_Log2,
- /// GV_Warp_Size * GV_Slot_Size,
- GV_Warp_Slot_Size,
+ const unsigned GV_Warp_Size;
+
+ constexpr unsigned warpSlotSize() const {
+ return GV_Warp_Size * GV_Slot_Size;
+ }
+
/// the maximum number of teams.
- GV_Max_Teams,
- /// Global Memory Alignment
- GV_Mem_Align,
- /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_Mask,
+ const unsigned GV_Max_Teams;
// An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here.
- GV_SimpleBufferSize,
+ const unsigned GV_SimpleBufferSize;
// The absolute maximum team size for a working group
- GV_Max_WG_Size,
+ const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group
- GV_Default_WG_Size,
- // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
- GV_Max_Warp_Number,
- /// The slot size that should be reserved for a working warp.
- /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_MaskL
+ const unsigned GV_Default_WG_Size;
+
+ constexpr unsigned maxWarpNumber() const {
+ return GV_Max_WG_Size / GV_Warp_Size;
+ }
};
/// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
- 448, // GV_Threads
- 256, // GV_Slot_Size
- 64, // GV_Warp_Size
- 32, // GV_Warp_Size_32
- 6, // GV_Warp_Size_Log2
- 64 * 256, // GV_Warp_Slot_Size
- 128, // GV_Max_Teams
- 256, // GV_Mem_Align
- 63, // GV_Warp_Size_Log2_Mask
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size,
- 256, // GV_Defaut_WG_Size
- 1024 / 64, // GV_Max_WG_Size / GV_WarpSize
- 63 // GV_Warp_Size_Log2_MaskL
+static constexpr GV AMDGPUGridValues64 = {
+ 256, // GV_Slot_Size
+ 64, // GV_Warp_Size
+ 128, // GV_Max_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Default_WG_Size
};
+static constexpr GV AMDGPUGridValues32 = {
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
+ 128, // GV_Max_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Default_WG_Size
+};
+
+template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
+ static_assert(wavesize == 32 || wavesize == 64, "");
+ return wavesize == 32 ? AMDGPUGridValues32 : AMDGPUGridValues64;
+}
+
/// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
- 992, // GV_Threads
- 256, // GV_Slot_Size
- 32, // GV_Warp_Size
- 32, // GV_Warp_Size_32
- 5, // GV_Warp_Size_Log2
- 32 * 256, // GV_Warp_Slot_Size
- 1024, // GV_Max_Teams
- 256, // GV_Mem_Align
- (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size
- 128, // GV_Defaut_WG_Size
- 1024 / 32, // GV_Max_WG_Size / GV_WarpSize
- 31 // GV_Warp_Size_Log2_MaskL
+static constexpr GV NVPTXGridValues = {
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
+ 1024, // GV_Max_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size
+ 128, // GV_Default_WG_Size
};
} // namespace omp
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 8144f1527a06..563e0eed1762 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -257,18 +257,17 @@ public:
///
/// * Sign of the step and the comparison operator might disagree:
///
- /// for (int i = 0; i < 42; --i)
+ /// for (int i = 0; i < 42; i -= 1u)
///
//
/// \param Loc The insert and source location description.
/// \param BodyGenCB Callback that will generate the loop body code.
/// \param Start Value of the loop counter for the first iterations.
- /// \param Stop Loop counter values past this will stop the the
- /// iterations.
+ /// \param Stop Loop counter values past this will stop the loop.
/// \param Step Loop counter increment after each iteration; negative
- /// means counting down. \param IsSigned Whether Start, Stop
- /// and Stop are signed integers.
- /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+ /// means counting down.
+ /// \param IsSigned Whether Start, Stop and Step are signed integers.
+ /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
/// counter.
/// \param ComputeIP Insertion point for instructions computing the trip
/// count. Can be used to ensure the trip count is available
@@ -335,7 +334,7 @@ public:
/// has a trip count of 0). This is permitted by the OpenMP specification.
///
/// \param DL Debug location for instructions added for collapsing,
- /// such as instructions to compute derive the input loop's
+ /// such as instructions to compute/derive the input loop's
/// induction variables.
/// \param Loops Loops in the loop nest to collapse. Loops are specified
/// from outermost-to-innermost and every control flow of a
@@ -358,8 +357,16 @@ public:
/// the current thread, updates the relevant instructions in the canonical
/// loop and calls to an OpenMP runtime finalization function after the loop.
///
- /// \param Loc The source location description, the insertion location
- /// is not used.
+ /// TODO: Workshare loops with static scheduling may contain up to two loops
+ /// that fulfill the requirements of an OpenMP canonical loop. One for
+ /// iterating over all iterations of a chunk and another one for iterating
+ /// over all chunks that are executed on the same thread. Returning
+ /// CanonicalLoopInfo objects representing them may eventually be useful for
+ /// the apply clause planned in OpenMP 6.0, but currently whether these are
+ /// canonical loops is irrelevant.
+ ///
+ /// \param DL Debug location for instructions added for the
+ /// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
/// \param AllocaIP An insertion point for Alloca instructions usable in the
/// preheader of the loop.
@@ -368,12 +375,11 @@ public:
/// \param Chunk The size of loop chunk considered as a unit when
/// scheduling. If \p nullptr, defaults to 1.
///
- /// \returns Updated CanonicalLoopInfo.
- CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
- CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- bool NeedsBarrier,
- Value *Chunk = nullptr);
+ /// \returns Point where to insert code after the workshare construct.
+ InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ bool NeedsBarrier,
+ Value *Chunk = nullptr);
/// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
///
@@ -382,8 +388,9 @@ public:
/// turn it into a workshare loop. In particular, it calls to an OpenMP
/// runtime function in the preheader to obtain, and then in each iteration
/// to update the loop counter.
- /// \param Loc The source location description, the insertion location
- /// is not used.
+ ///
+ /// \param DL Debug location for instructions added for the
+ /// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
/// \param AllocaIP An insertion point for Alloca instructions usable in the
/// preheader of the loop.
@@ -393,13 +400,12 @@ public:
/// \param Chunk The size of loop chunk considered as a unit when
/// scheduling. If \p nullptr, defaults to 1.
///
- /// \returns Point where to insert code after the loop.
- InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
- CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- omp::OMPScheduleType SchedType,
- bool NeedsBarrier,
- Value *Chunk = nullptr);
+ /// \returns Point where to insert code after the workshare construct.
+ InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ omp::OMPScheduleType SchedType,
+ bool NeedsBarrier,
+ Value *Chunk = nullptr);
/// Modifies the canonical loop to be a workshare loop.
///
@@ -410,19 +416,17 @@ public:
/// the current thread, updates the relevant instructions in the canonical
/// loop and calls to an OpenMP runtime finalization function after the loop.
///
- /// \param Loc The source location description, the insertion location
- /// is not used.
+ /// \param DL Debug location for instructions added for the
+ /// workshare-loop construct itself.
/// \param CLI A descriptor of the canonical loop to workshare.
/// \param AllocaIP An insertion point for Alloca instructions usable in the
/// preheader of the loop.
/// \param NeedsBarrier Indicates whether a barrier must be insterted after
/// the loop.
///
- /// \returns Updated CanonicalLoopInfo.
- CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
- CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP,
- bool NeedsBarrier);
+ /// \returns Point where to insert code after the workshare construct.
+ InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP, bool NeedsBarrier);
/// Tile a loop nest.
///
@@ -471,6 +475,48 @@ public:
tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
ArrayRef<Value *> TileSizes);
+ /// Fully unroll a loop.
+ ///
+ /// Instead of unrolling the loop immediately (and duplicating its body
+ /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
+ /// metadata.
+ ///
+ /// \param DL Debug location for instructions added by unrolling.
+ /// \param Loop The loop to unroll. The loop will be invalidated.
+ void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop);
+
+ /// Fully or partially unroll a loop. How the loop is unrolled is determined
+ /// using LLVM's LoopUnrollPass.
+ ///
+ /// \param DL Debug location for instructions added by unrolling.
+ /// \param Loop The loop to unroll. The loop will be invalidated.
+ void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop);
+
+ /// Partially unroll a loop.
+ ///
+ /// The CanonicalLoopInfo of the unrolled loop for use with chained
+ /// loop-associated directive can be requested using \p UnrolledCLI. Not
+ /// needing the CanonicalLoopInfo allows more efficient code generation by
+ /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
+ /// A loop-associated directive applied to the unrolled loop needs to know the
+ /// new trip count which means that if using a heuristically determined unroll
+ /// factor (\p Factor == 0), that factor must be computed immediately. We are
+ /// using the same logic as the LoopUnrollPass to derived the unroll factor,
+ /// but which assumes that some canonicalization has taken place (e.g.
+ /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
+ /// better when the unrolled loop's CanonicalLoopInfo is not needed.
+ ///
+ /// \param DL Debug location for instructions added by unrolling.
+ /// \param Loop The loop to unroll. The loop will be invalidated.
+ /// \param Factor The factor to unroll the loop by. A factor of 0
+ /// indicates that a heuristic should be used to determine
+ /// the unroll-factor.
+ /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
+ /// partially unrolled loop. Otherwise, uses loop metadata
+ /// to defer unrolling to the LoopUnrollPass.
+ void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
+ CanonicalLoopInfo **UnrolledCLI);
+
/// Generator for '#omp flush'
///
/// \param Loc The location where the flush directive was encountered
@@ -486,6 +532,115 @@ public:
/// \param Loc The location where the taskyield directive was encountered.
void createTaskyield(const LocationDescription &Loc);
+ /// Functions used to generate reductions. Such functions take two Values
+ /// representing LHS and RHS of the reduction, respectively, and a reference
+ /// to the value that is updated to refer to the reduction result.
+ using ReductionGenTy =
+ function_ref<InsertPointTy(InsertPointTy, Value *, Value *, Value *&)>;
+
+ /// Functions used to generate atomic reductions. Such functions take two
+ /// Values representing pointers to LHS and RHS of the reduction. They are
+ /// expected to atomically update the LHS to the reduced value.
+ using AtomicReductionGenTy =
+ function_ref<InsertPointTy(InsertPointTy, Value *, Value *)>;
+
+ /// Information about an OpenMP reduction.
+ struct ReductionInfo {
+ ReductionInfo(Value *Variable, Value *PrivateVariable,
+ ReductionGenTy ReductionGen,
+ AtomicReductionGenTy AtomicReductionGen)
+ : Variable(Variable), PrivateVariable(PrivateVariable),
+ ReductionGen(ReductionGen), AtomicReductionGen(AtomicReductionGen) {}
+
+ /// Returns the type of the element being reduced.
+ Type *getElementType() const {
+ return Variable->getType()->getPointerElementType();
+ }
+
+ /// Reduction variable of pointer type.
+ Value *Variable;
+
+ /// Thread-private partial reduction variable.
+ Value *PrivateVariable;
+
+ /// Callback for generating the reduction body. The IR produced by this will
+ /// be used to combine two values in a thread-safe context, e.g., under
+ /// lock or within the same thread, and therefore need not be atomic.
+ ReductionGenTy ReductionGen;
+
+ /// Callback for generating the atomic reduction body, may be null. The IR
+ /// produced by this will be used to atomically combine two values during
+ /// reduction. If null, the implementation will use the non-atomic version
+ /// along with the appropriate synchronization mechanisms.
+ AtomicReductionGenTy AtomicReductionGen;
+ };
+
+ // TODO: provide atomic and non-atomic reduction generators for reduction
+ // operators defined by the OpenMP specification.
+
+ /// Generator for '#omp reduction'.
+ ///
+ /// Emits the IR instructing the runtime to perform the specific kind of
+ /// reductions. Expects reduction variables to have been privatized and
+ /// initialized to reduction-neutral values separately. Emits the calls to
+ /// runtime functions as well as the reduction function and the basic blocks
+ /// performing the reduction atomically and non-atomically.
+ ///
+ /// The code emitted for the following:
+ ///
+ /// \code
+ /// type var_1;
+ /// type var_2;
+ /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
+ /// /* body */;
+ /// \endcode
+ ///
+ /// corresponds to the following sketch.
+ ///
+ /// \code
+ /// void _outlined_par() {
+ /// // N is the number of different reductions.
+ /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
+ /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
+ /// _omp_reduction_func,
+ /// _gomp_critical_user.reduction.var)) {
+ /// case 1: {
+ /// var_1 = var_1 <reduction-op> privatized_var_1;
+ /// var_2 = var_2 <reduction-op> privatized_var_2;
+ /// // ...
+ /// __kmpc_end_reduce(...);
+ /// break;
+ /// }
+ /// case 2: {
+ /// _Atomic<ReductionOp>(var_1, privatized_var_1);
+ /// _Atomic<ReductionOp>(var_2, privatized_var_2);
+ /// // ...
+ /// break;
+ /// }
+ /// default: break;
+ /// }
+ /// }
+ ///
+ /// void _omp_reduction_func(void **lhs, void **rhs) {
+ /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
+ /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
+ /// // ...
+ /// }
+ /// \endcode
+ ///
+ /// \param Loc The location where the reduction was
+ /// encountered. Must be within the associate
+ /// directive and after the last local access to the
+ /// reduction variables.
+ /// \param AllocaIP An insertion point suitable for allocas usable
+ /// in reductions.
+ /// \param ReductionInfos A list of info on each reduction variable.
+ /// \param IsNoWait A flag set if the reduction is marked as nowait.
+ InsertPointTy createReductions(const LocationDescription &Loc,
+ InsertPointTy AllocaIP,
+ ArrayRef<ReductionInfo> ReductionInfos,
+ bool IsNoWait = false);
+
///}
/// Return the insertion point used by the underlying IRBuilder.
@@ -515,6 +670,10 @@ public:
Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
unsigned Line, unsigned Column);
+ /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
+ /// fallback if \p DL does not specify the function name.
+ Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr);
+
/// Return the (LLVM-IR) string describing the source location \p Loc.
Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
@@ -524,8 +683,8 @@ public:
omp::IdentFlag Flags = omp::IdentFlag(0),
unsigned Reserve2Flags = 0);
- // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
- Type *getLanemaskType();
+ /// Create a global flag \p Namein the module with initial value \p Value.
+ GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
/// Generate control flow and cleanup for cancellation.
///
@@ -651,11 +810,11 @@ public:
/// \param Loc The source location description.
/// \param MapperFunc Function to be called.
/// \param SrcLocInfo Source location information global.
- /// \param MaptypesArgs
- /// \param MapnamesArg
+ /// \param MaptypesArg The argument types.
+ /// \param MapnamesArg The argument names.
/// \param MapperAllocas The AllocaInst used for the call.
/// \param DeviceID Device ID for the call.
- /// \param TotalNbOperand Number of operand in the call.
+ /// \param NumOperands Number of operands in the call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
struct MapperAllocas &MapperAllocas, int64_t DeviceID,
@@ -705,7 +864,7 @@ public:
/// \param BodyGenCB Callback that will generate the region code.
/// \param FiniCB Callback to finialize variable copies.
///
- /// \returns The insertion position *after* the master.
+ /// \returns The insertion position *after* the masked.
InsertPointTy createMasked(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB, Value *Filter);
@@ -718,12 +877,41 @@ public:
/// \param CriticalName name of the lock used by the critical directive
/// \param HintInst Hint Instruction for hint clause associated with critical
///
- /// \returns The insertion position *after* the master.
+ /// \returns The insertion position *after* the critical.
InsertPointTy createCritical(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
FinalizeCallbackTy FiniCB,
StringRef CriticalName, Value *HintInst);
+ /// Generator for '#omp ordered depend (source | sink)'
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param AllocaIP The insertion point to be used for alloca instructions.
+ /// \param NumLoops The number of loops in depend clause.
+ /// \param StoreValues The value will be stored in vector address.
+ /// \param Name The name of alloca instruction.
+ /// \param IsDependSource If true, depend source; otherwise, depend sink.
+ ///
+ /// \return The insertion position *after* the ordered.
+ InsertPointTy createOrderedDepend(const LocationDescription &Loc,
+ InsertPointTy AllocaIP, unsigned NumLoops,
+ ArrayRef<llvm::Value *> StoreValues,
+ const Twine &Name, bool IsDependSource);
+
+ /// Generator for '#omp ordered [threads | simd]'
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the region code.
+ /// \param FiniCB Callback to finalize variable copies.
+ /// \param IsThreads If true, with threads clause or without clause;
+ /// otherwise, with simd clause;
+ ///
+ /// \returns The insertion position *after* the ordered.
+ InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
+ BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB,
+ bool IsThreads);
+
/// Generator for '#omp sections'
///
/// \param Loc The insert and source location description.
@@ -816,14 +1004,16 @@ public:
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
/// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+ InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
+ bool RequiresFullRuntime);
/// Create a runtime call for kmpc_target_deinit
///
/// \param Loc The insert and source location description.
/// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
/// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
- void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
+ void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD,
+ bool RequiresFullRuntime);
///}
@@ -1121,7 +1311,25 @@ public:
/// The control-flow structure is standardized for easy consumption by
/// directives associated with loops. For instance, the worksharing-loop
/// construct may change this control flow such that each loop iteration is
-/// executed on only one thread.
+/// executed on only one thread. The constraints of a canonical loop in brief
+/// are:
+///
+/// * The number of loop iterations must have been computed before entering the
+/// loop.
+///
+/// * Has an (unsigned) logical induction variable that starts at zero and
+/// increments by one.
+///
+/// * The loop's CFG itself has no side-effects. The OpenMP specification
+/// itself allows side-effects, but the order in which they happen, including
+/// how often or whether at all, is unspecified. We expect that the frontend
+/// will emit those side-effect instructions somewhere (e.g. before the loop)
+/// such that the CanonicalLoopInfo itself can be side-effect free.
+///
+/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
+/// execution of a loop body that satifies these constraints. It does NOT
+/// represent arbitrary SESE regions that happen to contain a loop. Do not use
+/// CanonicalLoopInfo for such purposes.
///
/// The control flow can be described as follows:
///
@@ -1141,73 +1349,149 @@ public:
/// |
/// After
///
-/// Code in the header, condition block, latch and exit block must not have any
-/// side-effect. The body block is the single entry point into the loop body,
-/// which may contain arbitrary control flow as long as all control paths
-/// eventually branch to the latch block.
+/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
+/// including) and end at AfterIP (at the After's first instruction, excluding).
+/// That is, instructions in the Preheader and After blocks (except the
+/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
+/// side-effects. Typically, the Preheader is used to compute the loop's trip
+/// count. The instructions from BodyIP (at the Body block's first instruction,
+/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
+/// control and thus can have side-effects. The body block is the single entry
+/// point into the loop body, which may contain arbitrary control flow as long
+/// as all control paths eventually branch to the Latch block.
+///
+/// TODO: Consider adding another standardized BasicBlock between Body CFG and
+/// Latch to guarantee that there is only a single edge to the latch. It would
+/// make loop transformations easier to not needing to consider multiple
+/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
+/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
+/// executes after each body iteration.
+///
+/// There must be no loop-carried dependencies through llvm::Values. This is
+/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
+/// for the induction variable.
+///
+/// All code in Header, Cond, Latch and Exit (plus the terminator of the
+/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
+/// by assertOK(). They are expected to not be modified unless explicitly
+/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
+/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
+/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
+/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
+/// anymore as its underlying control flow may not exist anymore.
+/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
+/// may also return a new CanonicalLoopInfo that can be passed to other
+/// loop-associated construct implementing methods. These loop-transforming
+/// methods may either create a new CanonicalLoopInfo usually using
+/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
+/// modify one of the input CanonicalLoopInfo and return it as representing the
+/// modified loop. What is done is an implementation detail of
+/// transformation-implementing method and callers should always assume that the
+/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
+/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
+/// created by createCanonicalLoop, such that transforming methods do not have
+/// to special case where the CanonicalLoopInfo originated from.
+///
+/// Generally, methods consuming CanonicalLoopInfo do not need an
+/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
+/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
+/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
+/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
+/// any InsertPoint in the Preheader, After or Block can still be used after
+/// calling such a method.
///
-/// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
-/// classes.
+/// TODO: Provide mechanisms for exception handling and cancellation points.
+///
+/// Defined outside OpenMPIRBuilder because nested classes cannot be
+/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
class CanonicalLoopInfo {
friend class OpenMPIRBuilder;
private:
- /// Whether this object currently represents a loop.
- bool IsValid = false;
-
- BasicBlock *Preheader;
- BasicBlock *Header;
- BasicBlock *Cond;
- BasicBlock *Body;
- BasicBlock *Latch;
- BasicBlock *Exit;
- BasicBlock *After;
+ BasicBlock *Preheader = nullptr;
+ BasicBlock *Header = nullptr;
+ BasicBlock *Cond = nullptr;
+ BasicBlock *Body = nullptr;
+ BasicBlock *Latch = nullptr;
+ BasicBlock *Exit = nullptr;
+ BasicBlock *After = nullptr;
/// Add the control blocks of this loop to \p BBs.
///
/// This does not include any block from the body, including the one returned
/// by getBody().
+ ///
+ /// FIXME: This currently includes the Preheader and After blocks even though
+ /// their content is (mostly) not under CanonicalLoopInfo's control.
+ /// Re-evaluated whether this makes sense.
void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
public:
+ /// Returns whether this object currently represents the IR of a loop. If
+ /// returning false, it may have been consumed by a loop transformation or not
+ /// been intialized. Do not use in this case;
+ bool isValid() const { return Header; }
+
/// The preheader ensures that there is only a single edge entering the loop.
/// Code that must be execute before any loop iteration can be emitted here,
/// such as computing the loop trip count and begin lifetime markers. Code in
/// the preheader is not considered part of the canonical loop.
- BasicBlock *getPreheader() const { return Preheader; }
+ BasicBlock *getPreheader() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Preheader;
+ }
/// The header is the entry for each iteration. In the canonical control flow,
/// it only contains the PHINode for the induction variable.
- BasicBlock *getHeader() const { return Header; }
+ BasicBlock *getHeader() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Header;
+ }
/// The condition block computes whether there is another loop iteration. If
/// yes, branches to the body; otherwise to the exit block.
- BasicBlock *getCond() const { return Cond; }
+ BasicBlock *getCond() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Cond;
+ }
/// The body block is the single entry for a loop iteration and not controlled
/// by CanonicalLoopInfo. It can contain arbitrary control flow but must
/// eventually branch to the \p Latch block.
- BasicBlock *getBody() const { return Body; }
+ BasicBlock *getBody() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Body;
+ }
/// Reaching the latch indicates the end of the loop body code. In the
/// canonical control flow, it only contains the increment of the induction
/// variable.
- BasicBlock *getLatch() const { return Latch; }
+ BasicBlock *getLatch() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Latch;
+ }
/// Reaching the exit indicates no more iterations are being executed.
- BasicBlock *getExit() const { return Exit; }
+ BasicBlock *getExit() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Exit;
+ }
/// The after block is intended for clean-up code such as lifetime end
/// markers. It is separate from the exit block to ensure, analogous to the
/// preheader, it having just a single entry edge and being free from PHI
/// nodes should there be multiple loop exits (such as from break
/// statements/cancellations).
- BasicBlock *getAfter() const { return After; }
+ BasicBlock *getAfter() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return After;
+ }
/// Returns the llvm::Value containing the number of loop iterations. It must
/// be valid in the preheader and always interpreted as an unsigned integer of
/// any bit-width.
Value *getTripCount() const {
+ assert(isValid() && "Requires a valid canonical loop");
Instruction *CmpI = &Cond->front();
assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
return CmpI->getOperand(1);
@@ -1216,33 +1500,47 @@ public:
/// Returns the instruction representing the current logical induction
/// variable. Always unsigned, always starting at 0 with an increment of one.
Instruction *getIndVar() const {
+ assert(isValid() && "Requires a valid canonical loop");
Instruction *IndVarPHI = &Header->front();
assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
return IndVarPHI;
}
/// Return the type of the induction variable (and the trip count).
- Type *getIndVarType() const { return getIndVar()->getType(); }
+ Type *getIndVarType() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return getIndVar()->getType();
+ }
/// Return the insertion point for user code before the loop.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
+ assert(isValid() && "Requires a valid canonical loop");
return {Preheader, std::prev(Preheader->end())};
};
/// Return the insertion point for user code in the body.
OpenMPIRBuilder::InsertPointTy getBodyIP() const {
+ assert(isValid() && "Requires a valid canonical loop");
return {Body, Body->begin()};
};
/// Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const {
+ assert(isValid() && "Requires a valid canonical loop");
return {After, After->begin()};
};
- Function *getFunction() const { return Header->getParent(); }
+ Function *getFunction() const {
+ assert(isValid() && "Requires a valid canonical loop");
+ return Header->getParent();
+ }
/// Consistency self-check.
void assertOK() const;
+
+ /// Invalidate this loop. That is, the underlying IR does not fulfill the
+ /// requirements of an OpenMP canonical loop anymore.
+ void invalidate();
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index eb673b199fc4..8e4f7568fb9c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -39,7 +39,6 @@ __OMP_TYPE(Int32Ptr)
__OMP_TYPE(Int64Ptr)
OMP_TYPE(SizeTy, M.getDataLayout().getIntPtrType(Ctx))
-OMP_TYPE(LanemaskTy, getLanemaskType())
#define __OMP_PTR_TYPE(NAME, BASE) OMP_TYPE(NAME, BASE->getPointerTo())
@@ -272,6 +271,15 @@ __OMP_RTL(__kmpc_for_static_init_8, false, Void, IdentPtr, Int32, Int32,
__OMP_RTL(__kmpc_for_static_init_8u, false, Void, IdentPtr, Int32, Int32,
Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
__OMP_RTL(__kmpc_for_static_fini, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_4, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_4u, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int32Ptr, Int32Ptr, Int32Ptr, Int32, Int32)
+__OMP_RTL(__kmpc_distribute_static_init_8, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
+__OMP_RTL(__kmpc_distribute_static_init_8u, false, Void, IdentPtr, Int32, Int32,
+ Int32Ptr, Int64Ptr, Int64Ptr, Int64Ptr, Int64, Int64)
+__OMP_RTL(__kmpc_distribute_static_fini, false, Void, IdentPtr, Int32)
__OMP_RTL(__kmpc_dist_dispatch_init_4, false, Void, IdentPtr, Int32, Int32,
Int32Ptr, Int32, Int32, Int32, Int32)
__OMP_RTL(__kmpc_dist_dispatch_init_4u, false, Void, IdentPtr, Int32, Int32,
@@ -415,8 +423,8 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
/* Int */ Int32, /* kmp_task_t */ VoidPtr)
/// OpenMP Device runtime functions
-__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int1, Int1, Int1)
-__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int1, Int1)
+__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int8, Int1, Int1)
+__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int8, Int1)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
@@ -442,9 +450,12 @@ __OMP_RTL(__kmpc_get_shared_variables, false, Void, VoidPtrPtrPtr)
__OMP_RTL(__kmpc_parallel_level, false, Int8, )
__OMP_RTL(__kmpc_is_spmd_exec_mode, false, Int8, )
__OMP_RTL(__kmpc_barrier_simple_spmd, false, Void, IdentPtr, Int32)
+__OMP_RTL(__kmpc_barrier_simple_generic, false, Void, IdentPtr, Int32)
-__OMP_RTL(__kmpc_warp_active_thread_mask, false, LanemaskTy,)
-__OMP_RTL(__kmpc_syncwarp, false, Void, LanemaskTy)
+__OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
+__OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
+
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
__OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
@@ -510,6 +521,11 @@ __OMP_ATTRS_SET(NoCaptureAttrs,
? AttributeSet(EnumAttr(NoCapture))
: AttributeSet(EnumAttr(NoCapture)))
+__OMP_ATTRS_SET(AlwaysInlineAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(AlwaysInline))
+ : AttributeSet(EnumAttr(AlwaysInline)))
+
#if 0
__OMP_ATTRS_SET(InaccessibleOnlyAttrs,
OptimisticAttributes
@@ -535,6 +551,11 @@ __OMP_ATTRS_SET(ReadOnlyPtrAttrs,
EnumAttr(NoCapture))
: AttributeSet())
+__OMP_ATTRS_SET(DeviceAllocAttrs,
+ OptimisticAttributes
+ ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync))
+ : AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync)))
+
#if 0
__OMP_ATTRS_SET(WriteOnlyPtrAttrs,
OptimisticAttributes
@@ -575,6 +596,8 @@ __OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_barrier_simple_spmd, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_barrier_simple_generic, BarrierAttrs, AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_warp_active_thread_mask, BarrierAttrs, AttributeSet(),
ParamAttrs())
__OMP_RTL_ATTRS(__kmpc_syncwarp, BarrierAttrs, AttributeSet(), ParamAttrs())
@@ -703,6 +726,28 @@ __OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
AttributeSet(), AttributeSet()))
__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs,
AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_4u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_init_8u, GetterArgWriteAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+ ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+ AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_distribute_static_fini, InaccessibleArgOnlyAttrs,
+ AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs,
AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
@@ -854,9 +899,9 @@ __OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_alloc_shared, DefaultAttrs, ReturnPtrAttrs,
+__OMP_RTL_ATTRS(__kmpc_alloc_shared, DeviceAllocAttrs, ReturnPtrAttrs,
ParamAttrs())
-__OMP_RTL_ATTRS(__kmpc_free_shared, AllocAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(__kmpc_free_shared, DeviceAllocAttrs, AttributeSet(),
ParamAttrs(NoCaptureAttrs))
__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, ParamAttrs())
@@ -897,6 +942,9 @@ __OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(),
__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
+ ParamAttrs())
+
#undef __OMP_RTL_ATTRS
#undef OMP_RTL_ATTRS
#undef AttributeSet
@@ -920,6 +968,7 @@ __OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
OMP_IDENT_FLAG(OMP_IDENT_FLAG_##Name, #Name, Value)
__OMP_IDENT_FLAG(KMPC, 0x02)
+__OMP_IDENT_FLAG(ATOMIC_REDUCE, 0x10)
__OMP_IDENT_FLAG(BARRIER_EXPL, 0x20)
__OMP_IDENT_FLAG(BARRIER_IMPL, 0x0040)
__OMP_IDENT_FLAG(BARRIER_IMPL_MASK, 0x01C0)
diff --git a/llvm/include/llvm/IR/AbstractCallSite.h b/llvm/include/llvm/IR/AbstractCallSite.h
index e8cf05001542..31df4c75b6e7 100644
--- a/llvm/include/llvm/IR/AbstractCallSite.h
+++ b/llvm/include/llvm/IR/AbstractCallSite.h
@@ -153,7 +153,7 @@ public:
/// Return the number of parameters of the callee.
unsigned getNumArgOperands() const {
if (isDirectCall())
- return CB->getNumArgOperands();
+ return CB->arg_size();
// Subtract 1 for the callee encoding.
return CI.ParameterEncoding.size() - 1;
}
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index dcf658f439b4..396ab6a9d01d 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -97,7 +97,7 @@ public:
/// If this is a byval or inalloca argument, return its alignment.
/// FIXME: Remove this function once transition to Align is over.
/// Use getParamAlign() instead.
- unsigned getParamAlignment() const;
+ uint64_t getParamAlignment() const;
/// If this is a byval or inalloca argument, return its alignment.
MaybeAlign getParamAlign() const;
diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h
index f64616c25d87..08e6c8b6f1e0 100644
--- a/llvm/include/llvm/IR/Assumptions.h
+++ b/llvm/include/llvm/IR/Assumptions.h
@@ -15,12 +15,14 @@
#ifndef LLVM_IR_ASSUMPTIONS_H
#define LLVM_IR_ASSUMPTIONS_H
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
namespace llvm {
class Function;
+class CallBase;
/// The key we use for assumption attributes.
constexpr StringRef AssumptionAttrKey = "llvm.assume";
@@ -43,7 +45,25 @@ private:
};
/// Return true if \p F has the assumption \p AssumptionStr attached.
-bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr);
+bool hasAssumption(const Function &F,
+ const KnownAssumptionString &AssumptionStr);
+
+/// Return true if \p CB or the callee has the assumption \p AssumptionStr
+/// attached.
+bool hasAssumption(const CallBase &CB,
+ const KnownAssumptionString &AssumptionStr);
+
+/// Return the set of all assumptions for the function \p F.
+DenseSet<StringRef> getAssumptions(const Function &F);
+
+/// Return the set of all assumptions for the call \p CB.
+DenseSet<StringRef> getAssumptions(const CallBase &CB);
+
+/// Appends the set of assumptions \p Assumptions to \F.
+bool addAssumptions(Function &F, const DenseSet<StringRef> &Assumptions);
+
+/// Appends the set of assumptions \p Assumptions to \CB.
+bool addAssumptions(CallBase &CB, const DenseSet<StringRef> &Assumptions);
} // namespace llvm
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index d7bd3edb3d4c..282be640d8be 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -37,7 +37,6 @@ class AttrBuilder;
class AttributeImpl;
class AttributeListImpl;
class AttributeSetNode;
-template<typename T> struct DenseMapInfo;
class FoldingSetNodeID;
class Function;
class LLVMContext;
@@ -78,6 +77,7 @@ public:
TombstoneKey, ///< Use as Tombstone key for DenseMap of AttrKind
};
+ static const unsigned NumIntAttrKinds = LastIntAttr - FirstIntAttr + 1;
static const unsigned NumTypeAttrKinds = LastTypeAttr - FirstTypeAttr + 1;
static bool isEnumAttrKind(AttrKind Kind) {
@@ -265,7 +265,7 @@ inline Attribute unwrap(LLVMAttributeRef Attr) {
/// and removing string or integer attributes involves a FoldingSet lookup.
class AttributeSet {
friend AttributeListImpl;
- template <typename Ty> friend struct DenseMapInfo;
+ template <typename Ty, typename Enable> friend struct DenseMapInfo;
// TODO: Extract AvailableAttrs from AttributeSetNode and store them here.
// This will allow an efficient implementation of addAttribute and
@@ -366,7 +366,7 @@ public:
//===----------------------------------------------------------------------===//
/// \class
/// Provide DenseMapInfo for AttributeSet.
-template <> struct DenseMapInfo<AttributeSet> {
+template <> struct DenseMapInfo<AttributeSet, void> {
static AttributeSet getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
@@ -408,7 +408,7 @@ private:
friend class AttributeListImpl;
friend class AttributeSet;
friend class AttributeSetNode;
- template <typename Ty> friend struct DenseMapInfo;
+ template <typename Ty, typename Enable> friend struct DenseMapInfo;
/// The attributes that we are managing. This can be null to represent
/// the empty attributes list.
@@ -432,8 +432,8 @@ private:
static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets);
- AttributeList setAttributes(LLVMContext &C, unsigned Index,
- AttributeSet Attrs) const;
+ AttributeList setAttributesAtIndex(LLVMContext &C, unsigned Index,
+ AttributeSet Attrs) const;
public:
AttributeList() = default;
@@ -454,32 +454,84 @@ public:
static AttributeList get(LLVMContext &C, unsigned Index,
const AttrBuilder &B);
+ // TODO: remove non-AtIndex versions of these methods.
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeList addAttributeAtIndex(
+ LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
- addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
- StringRef Value = StringRef()) const;
+ addAttributeAtIndex(LLVMContext &C, unsigned Index, StringRef Kind,
+ StringRef Value = StringRef()) const;
/// Add an attribute to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
- Attribute A) const;
+ LLVM_NODISCARD AttributeList addAttributeAtIndex(LLVMContext &C,
+ unsigned Index,
+ Attribute A) const;
/// Add attributes to the attribute set at the given index.
/// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) const;
+ LLVM_NODISCARD AttributeList addAttributesAtIndex(LLVMContext &C,
+ unsigned Index,
+ const AttrBuilder &B) const;
+
+ /// Add a function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C,
+ Attribute::AttrKind Kind) const {
+ return addAttributeAtIndex(C, FunctionIndex, Kind);
+ }
+
+ /// Add a function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttribute(LLVMContext &C,
+ Attribute Attr) const {
+ return addAttributeAtIndex(C, FunctionIndex, Attr);
+ }
+
+ /// Add a function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttribute(
+ LLVMContext &C, StringRef Kind, StringRef Value = StringRef()) const {
+ return addAttributeAtIndex(C, FunctionIndex, Kind, Value);
+ }
+
+ /// Add function attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addFnAttributes(LLVMContext &C,
+ const AttrBuilder &B) const {
+ return addAttributesAtIndex(C, FunctionIndex, B);
+ }
+
+ /// Add a return value attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C,
+ Attribute::AttrKind Kind) const {
+ return addAttributeAtIndex(C, ReturnIndex, Kind);
+ }
+
+ /// Add a return value attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addRetAttribute(LLVMContext &C,
+ Attribute Attr) const {
+ return addAttributeAtIndex(C, ReturnIndex, Attr);
+ }
+
+ /// Add a return value attribute to the list. Returns a new list because
+ /// attribute lists are immutable.
+ LLVM_NODISCARD AttributeList addRetAttributes(LLVMContext &C,
+ const AttrBuilder &B) const {
+ return addAttributesAtIndex(C, ReturnIndex, B);
+ }
/// Add an argument attribute to the list. Returns a new list because
/// attribute lists are immutable.
LLVM_NODISCARD AttributeList addParamAttribute(
LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
- return addAttribute(C, ArgNo + FirstArgIndex, Kind);
+ return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
}
/// Add an argument attribute to the list. Returns a new list because
@@ -487,7 +539,7 @@ public:
LLVM_NODISCARD AttributeList
addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind,
StringRef Value = StringRef()) const {
- return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value);
+ return addAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind, Value);
}
/// Add an attribute to the attribute list at the given arg indices. Returns a
@@ -501,34 +553,87 @@ public:
LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C,
unsigned ArgNo,
const AttrBuilder &B) const {
- return addAttributes(C, ArgNo + FirstArgIndex, B);
+ return addAttributesAtIndex(C, ArgNo + FirstArgIndex, B);
}
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const;
+ LLVM_NODISCARD AttributeList removeAttributeAtIndex(
+ LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const;
/// Remove the specified attribute at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeAttributeAtIndex(LLVMContext &C,
+ unsigned Index,
+ StringRef Kind) const;
LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind) const;
+ StringRef Kind) const {
+ return removeAttributeAtIndex(C, Index, Kind);
+ }
/// Remove the specified attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList removeAttributes(
+ LLVM_NODISCARD AttributeList removeAttributesAtIndex(
LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const;
/// Remove all attributes at the specified index from this
/// attribute list. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C,
- unsigned Index) const;
+ LLVM_NODISCARD AttributeList removeAttributesAtIndex(LLVMContext &C,
+ unsigned Index) const;
+
+ /// Remove the specified attribute at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeFnAttribute(LLVMContext &C, Attribute::AttrKind Kind) const {
+ return removeAttributeAtIndex(C, FunctionIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeFnAttribute(LLVMContext &C,
+ StringRef Kind) const {
+ return removeAttributeAtIndex(C, FunctionIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeFnAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const {
+ return removeAttributesAtIndex(C, FunctionIndex, AttrsToRemove);
+ }
+
+ /// Remove the attributes at the function index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeFnAttributes(LLVMContext &C) const {
+ return removeAttributesAtIndex(C, FunctionIndex);
+ }
+
+ /// Remove the specified attribute at the return value index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const {
+ return removeAttributeAtIndex(C, ReturnIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the return value index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList removeRetAttribute(LLVMContext &C,
+ StringRef Kind) const {
+ return removeAttributeAtIndex(C, ReturnIndex, Kind);
+ }
+
+ /// Remove the specified attribute at the return value index from this
+ /// attribute list. Returns a new list because attribute lists are immutable.
+ LLVM_NODISCARD AttributeList
+ removeRetAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const {
+ return removeAttributesAtIndex(C, ReturnIndex, AttrsToRemove);
+ }
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttribute(
LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
- return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+ return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
@@ -536,80 +641,55 @@ public:
LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C,
unsigned ArgNo,
StringRef Kind) const {
- return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+ return removeAttributeAtIndex(C, ArgNo + FirstArgIndex, Kind);
}
/// Remove the specified attribute at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttributes(
LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const {
- return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
+ return removeAttributesAtIndex(C, ArgNo + FirstArgIndex, AttrsToRemove);
}
/// Remove all attributes at the specified arg index from this
/// attribute list. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C,
unsigned ArgNo) const {
- return removeAttributes(C, ArgNo + FirstArgIndex);
+ return removeAttributesAtIndex(C, ArgNo + FirstArgIndex);
}
/// Replace the type contained by attribute \p AttrKind at index \p ArgNo wih
/// \p ReplacementTy, preserving all other attributes.
- LLVM_NODISCARD AttributeList replaceAttributeType(LLVMContext &C,
- unsigned ArgNo,
- Attribute::AttrKind Kind,
- Type *ReplacementTy) const {
- Attribute Attr = getAttribute(ArgNo, Kind);
- auto Attrs = removeAttribute(C, ArgNo, Kind);
- return Attrs.addAttribute(C, ArgNo, Attr.getWithNewType(C, ReplacementTy));
+ LLVM_NODISCARD AttributeList replaceAttributeTypeAtIndex(
+ LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind,
+ Type *ReplacementTy) const {
+ Attribute Attr = getAttributeAtIndex(ArgNo, Kind);
+ auto Attrs = removeAttributeAtIndex(C, ArgNo, Kind);
+ return Attrs.addAttributeAtIndex(C, ArgNo,
+ Attr.getWithNewType(C, ReplacementTy));
}
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// index. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
- unsigned Index,
- uint64_t Bytes) const;
+ LLVM_NODISCARD AttributeList addDereferenceableRetAttr(LLVMContext &C,
+ uint64_t Bytes) const;
/// \brief Add the dereferenceable attribute to the attribute set at the given
/// arg index. Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList addDereferenceableParamAttr(
- LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
- return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
- }
-
- /// Add the dereferenceable_or_null attribute to the attribute set at
- /// the given index. Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr(
- LLVMContext &C, unsigned Index, uint64_t Bytes) const;
+ LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const;
/// Add the dereferenceable_or_null attribute to the attribute set at
/// the given arg index. Returns a new list because attribute lists are
/// immutable.
LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr(
- LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
- return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes);
- }
-
- /// Add the allocsize attribute to the attribute set at the given index.
- /// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList
- addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg);
+ LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const;
/// Add the allocsize attribute to the attribute set at the given arg index.
/// Returns a new list because attribute lists are immutable.
LLVM_NODISCARD AttributeList
addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg) {
- return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg);
- }
-
- /// Add the vscale_range attribute to the attribute set at the given index.
- /// Returns a new list because attribute lists are immutable.
- LLVM_NODISCARD AttributeList addVScaleRangeAttr(LLVMContext &C,
- unsigned Index,
- unsigned MinValue,
- unsigned MaxValue);
+ const Optional<unsigned> &NumElemsArg);
//===--------------------------------------------------------------------===//
// AttributeList Accessors
@@ -620,48 +700,59 @@ public:
/// The attributes for the argument or parameter at the given index are
/// returned.
- AttributeSet getParamAttributes(unsigned ArgNo) const;
+ AttributeSet getParamAttrs(unsigned ArgNo) const;
/// The attributes for the ret value are returned.
- AttributeSet getRetAttributes() const;
+ AttributeSet getRetAttrs() const;
/// The function attributes are returned.
- AttributeSet getFnAttributes() const;
+ AttributeSet getFnAttrs() const;
/// Return true if the attribute exists at the given index.
- bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+ bool hasAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const;
/// Return true if the attribute exists at the given index.
- bool hasAttribute(unsigned Index, StringRef Kind) const;
+ bool hasAttributeAtIndex(unsigned Index, StringRef Kind) const;
/// Return true if attribute exists at the given index.
- bool hasAttributes(unsigned Index) const;
+ bool hasAttributesAtIndex(unsigned Index) const;
/// Return true if the attribute exists for the given argument
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return hasAttribute(ArgNo + FirstArgIndex, Kind);
+ return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
}
/// Return true if the attribute exists for the given argument
bool hasParamAttr(unsigned ArgNo, StringRef Kind) const {
- return hasAttribute(ArgNo + FirstArgIndex, Kind);
+ return hasAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
}
/// Return true if attributes exists for the given argument
bool hasParamAttrs(unsigned ArgNo) const {
- return hasAttributes(ArgNo + FirstArgIndex);
+ return hasAttributesAtIndex(ArgNo + FirstArgIndex);
+ }
+
+ /// Return true if the attribute exists for the return value.
+ bool hasRetAttr(Attribute::AttrKind Kind) const {
+ return hasAttributeAtIndex(ReturnIndex, Kind);
}
- /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
- /// may be faster.
- bool hasFnAttribute(Attribute::AttrKind Kind) const;
+ /// Return true if the attribute exists for the return value.
+ bool hasRetAttr(StringRef Kind) const {
+ return hasAttributeAtIndex(ReturnIndex, Kind);
+ }
+
+ /// Return true if attributes exist for the return value.
+ bool hasRetAttrs() const { return hasAttributesAtIndex(ReturnIndex); }
+
+ /// Return true if the attribute exists for the function.
+ bool hasFnAttr(Attribute::AttrKind Kind) const;
- /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
- /// may be faster.
- bool hasFnAttribute(StringRef Kind) const;
+ /// Return true if the attribute exists for the function.
+ bool hasFnAttr(StringRef Kind) const;
- /// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
- bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
+ /// Return true the attributes exist for the function.
+ bool hasFnAttrs() const { return hasAttributesAtIndex(FunctionIndex); }
/// Return true if the specified attribute is set for at least one
/// parameter or for the return value. If Index is not nullptr, the index
@@ -670,19 +761,29 @@ public:
unsigned *Index = nullptr) const;
/// Return the attribute object that exists at the given index.
- Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+ Attribute getAttributeAtIndex(unsigned Index, Attribute::AttrKind Kind) const;
/// Return the attribute object that exists at the given index.
- Attribute getAttribute(unsigned Index, StringRef Kind) const;
+ Attribute getAttributeAtIndex(unsigned Index, StringRef Kind) const;
/// Return the attribute object that exists at the arg index.
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return getAttribute(ArgNo + FirstArgIndex, Kind);
+ return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
}
/// Return the attribute object that exists at the given index.
Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
- return getAttribute(ArgNo + FirstArgIndex, Kind);
+ return getAttributeAtIndex(ArgNo + FirstArgIndex, Kind);
+ }
+
+ /// Return the attribute object that exists for the function.
+ Attribute getFnAttr(Attribute::AttrKind Kind) const {
+ return getAttributeAtIndex(FunctionIndex, Kind);
+ }
+
+ /// Return the attribute object that exists for the function.
+ Attribute getFnAttr(StringRef Kind) const {
+ return getAttributeAtIndex(FunctionIndex, Kind);
}
/// Return the alignment of the return value.
@@ -712,34 +813,26 @@ public:
/// Return the elementtype type for the specified function parameter.
Type *getParamElementType(unsigned ArgNo) const;
- /// Get the stack alignment.
- MaybeAlign getStackAlignment(unsigned Index) const;
+ /// Get the stack alignment of the function.
+ MaybeAlign getFnStackAlignment() const;
- /// Get the number of dereferenceable bytes (or zero if unknown).
- uint64_t getDereferenceableBytes(unsigned Index) const;
+ /// Get the stack alignment of the return value.
+ MaybeAlign getRetStackAlignment() const;
- /// Get the number of dereferenceable bytes (or zero if unknown) of an
- /// arg.
- uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
- return getDereferenceableBytes(ArgNo + FirstArgIndex);
- }
+ /// Get the number of dereferenceable bytes (or zero if unknown) of the return
+ /// value.
+ uint64_t getRetDereferenceableBytes() const;
- /// Get the number of dereferenceable_or_null bytes (or zero if
- /// unknown).
- uint64_t getDereferenceableOrNullBytes(unsigned Index) const;
+ /// Get the number of dereferenceable bytes (or zero if unknown) of an arg.
+ uint64_t getParamDereferenceableBytes(unsigned Index) const;
- /// Get the number of dereferenceable_or_null bytes (or zero if
- /// unknown) of an arg.
- uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
- return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex);
- }
+ /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of
+ /// the return value.
+ uint64_t getRetDereferenceableOrNullBytes() const;
- /// Get the allocsize argument numbers (or pair(0, 0) if unknown).
- std::pair<unsigned, Optional<unsigned>>
- getAllocSizeArgs(unsigned Index) const;
-
- /// Get the vscale_range argument numbers (or pair(0, 0) if unknown).
- std::pair<unsigned, unsigned> getVScaleRangeArgs(unsigned Index) const;
+ /// Get the number of dereferenceable_or_null bytes (or zero if unknown) of an
+ /// arg.
+ uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;
/// Return the attributes at the index as a string.
std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
@@ -758,9 +851,32 @@ public:
unsigned getNumAttrSets() const;
- /// Use these to iterate over the valid attribute indices.
- unsigned index_begin() const { return AttributeList::FunctionIndex; }
- unsigned index_end() const { return getNumAttrSets() - 1; }
+ // Implementation of indexes(). Produces iterators that wrap an index. Mostly
+ // to hide the awkwardness of unsigned wrapping when iterating over valid
+ // indexes.
+ struct index_iterator {
+ unsigned NumAttrSets;
+ index_iterator(int NumAttrSets) : NumAttrSets(NumAttrSets) {}
+ struct int_wrapper {
+ int_wrapper(unsigned i) : i(i) {}
+ unsigned i;
+ unsigned operator*() { return i; }
+ bool operator!=(const int_wrapper &Other) { return i != Other.i; }
+ int_wrapper &operator++() {
+ // This is expected to undergo unsigned wrapping since FunctionIndex is
+ // ~0 and that's where we start.
+ ++i;
+ return *this;
+ }
+ };
+
+ int_wrapper begin() { return int_wrapper(AttributeList::FunctionIndex); }
+
+ int_wrapper end() { return int_wrapper(NumAttrSets - 1); }
+ };
+
+ /// Use this to iterate over the valid attribute indexes.
+ index_iterator indexes() const { return index_iterator(getNumAttrSets()); }
/// operator==/!= - Provide equality predicates.
bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; }
@@ -782,7 +898,7 @@ public:
//===----------------------------------------------------------------------===//
/// \class
/// Provide DenseMapInfo for AttributeList.
-template <> struct DenseMapInfo<AttributeList> {
+template <> struct DenseMapInfo<AttributeList, void> {
static AttributeList getEmptyKey() {
auto Val = static_cast<uintptr_t>(-1);
Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
@@ -814,14 +930,10 @@ template <> struct DenseMapInfo<AttributeList> {
class AttrBuilder {
std::bitset<Attribute::EndAttrKinds> Attrs;
std::map<SmallString<32>, SmallString<32>, std::less<>> TargetDepAttrs;
- MaybeAlign Alignment;
- MaybeAlign StackAlignment;
- uint64_t DerefBytes = 0;
- uint64_t DerefOrNullBytes = 0;
- uint64_t AllocSizeArgs = 0;
- uint64_t VScaleRangeArgs = 0;
+ std::array<uint64_t, Attribute::NumIntAttrKinds> IntAttrs = {};
std::array<Type *, Attribute::NumTypeAttrKinds> TypeAttrs = {};
+ Optional<unsigned> kindToIntIndex(Attribute::AttrKind Kind) const;
Optional<unsigned> kindToTypeIndex(Attribute::AttrKind Kind) const;
public:
@@ -891,19 +1003,31 @@ public:
/// Return true if the builder has an alignment attribute.
bool hasAlignmentAttr() const;
+ /// Return raw (possibly packed/encoded) value of integer attribute or 0 if
+ /// not set.
+ uint64_t getRawIntAttr(Attribute::AttrKind Kind) const;
+
/// Retrieve the alignment attribute, if it exists.
- MaybeAlign getAlignment() const { return Alignment; }
+ MaybeAlign getAlignment() const {
+ return MaybeAlign(getRawIntAttr(Attribute::Alignment));
+ }
/// Retrieve the stack alignment attribute, if it exists.
- MaybeAlign getStackAlignment() const { return StackAlignment; }
+ MaybeAlign getStackAlignment() const {
+ return MaybeAlign(getRawIntAttr(Attribute::StackAlignment));
+ }
/// Retrieve the number of dereferenceable bytes, if the
/// dereferenceable attribute exists (zero is returned otherwise).
- uint64_t getDereferenceableBytes() const { return DerefBytes; }
+ uint64_t getDereferenceableBytes() const {
+ return getRawIntAttr(Attribute::Dereferenceable);
+ }
/// Retrieve the number of dereferenceable_or_null bytes, if the
/// dereferenceable_or_null attribute exists (zero is returned otherwise).
- uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
+ uint64_t getDereferenceableOrNullBytes() const {
+ return getRawIntAttr(Attribute::DereferenceableOrNull);
+ }
/// Retrieve type for the given type attribute.
Type *getTypeAttr(Attribute::AttrKind Kind) const;
@@ -933,6 +1057,9 @@ public:
/// it doesn't exist, pair(0, 0) is returned.
std::pair<unsigned, unsigned> getVScaleRangeArgs() const;
+ /// Add integer attribute with raw value (packed/encoded if necessary).
+ AttrBuilder &addRawIntAttr(Attribute::AttrKind Kind, uint64_t Value);
+
/// This turns an alignment into the form used internally in Attribute.
/// This call has no effect if Align is not set.
AttrBuilder &addAlignmentAttr(MaybeAlign Align);
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 99b474161df7..de25b51a6292 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -86,6 +86,9 @@ def Dereferenceable : IntAttr<"dereferenceable", [ParamAttr, RetAttr]>;
def DereferenceableOrNull : IntAttr<"dereferenceable_or_null",
[ParamAttr, RetAttr]>;
+/// Do not instrument function with sanitizers.
+def DisableSanitizerInstrumentation: EnumAttr<"disable_sanitizer_instrumentation", [FnAttr]>;
+
/// Provide pointer element type to intrinsic.
def ElementType : TypeAttr<"elementtype", [ParamAttr]>;
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 0af4ec4ef138..184ddfc01c29 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -167,8 +167,8 @@ public:
/// Returns a pointer to the first instruction in this block that is not a
/// PHINode or a debug intrinsic, or any pseudo operation if \c SkipPseudoOp
/// is true.
- const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) const;
- Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = false) {
+ const Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) const;
+ Instruction *getFirstNonPHIOrDbg(bool SkipPseudoOp = true) {
return const_cast<Instruction *>(
static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbg(
SkipPseudoOp));
@@ -178,8 +178,8 @@ public:
/// PHINode, a debug intrinsic, or a lifetime intrinsic, or any pseudo
/// operation if \c SkipPseudoOp is true.
const Instruction *
- getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) const;
- Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = false) {
+ getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) const;
+ Instruction *getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp = true) {
return const_cast<Instruction *>(
static_cast<const BasicBlock *>(this)->getFirstNonPHIOrDbgOrLifetime(
SkipPseudoOp));
@@ -200,14 +200,14 @@ public:
/// SkipPseudoOp is true.
iterator_range<filter_iterator<BasicBlock::const_iterator,
std::function<bool(const Instruction &)>>>
- instructionsWithoutDebug(bool SkipPseudoOp = false) const;
+ instructionsWithoutDebug(bool SkipPseudoOp = true) const;
/// Return an iterator range over the instructions in the block, skipping any
/// debug instructions. Skip and any pseudo operations as well if \c
/// SkipPseudoOp is true.
iterator_range<
filter_iterator<BasicBlock::iterator, std::function<bool(Instruction &)>>>
- instructionsWithoutDebug(bool SkipPseudoOp = false);
+ instructionsWithoutDebug(bool SkipPseudoOp = true);
/// Return the size of the basic block ignoring debug instructions
filter_iterator<BasicBlock::const_iterator,
diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h
index 4e2022b36e30..c8999b71f3d1 100644
--- a/llvm/include/llvm/IR/Constant.h
+++ b/llvm/include/llvm/IR/Constant.h
@@ -198,6 +198,12 @@ public:
/// hanging off of the globals.
void removeDeadConstantUsers() const;
+ /// Return true if the constant has exactly one live use.
+ ///
+ /// This returns the same result as calling Value::hasOneUse after
+ /// Constant::removeDeadConstantUsers, but doesn't remove dead constants.
+ bool hasOneLiveUse() const;
+
const Constant *stripPointerCasts() const {
return cast<Constant>(Value::stripPointerCasts());
}
diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index 44b8c395c89e..fea4d0da1d0d 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -128,6 +128,28 @@ public:
/// NOTE: false does not mean that inverse predicate holds!
bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const;
+ /// Return true iff CR1 ult CR2 is equivalent to CR1 slt CR2.
+ /// Does not depend on strictness/direction of the predicate.
+ static bool
+ areInsensitiveToSignednessOfICmpPredicate(const ConstantRange &CR1,
+ const ConstantRange &CR2);
+
+ /// Return true iff CR1 ult CR2 is equivalent to CR1 sge CR2.
+ /// Does not depend on strictness/direction of the predicate.
+ static bool
+ areInsensitiveToSignednessOfInvertedICmpPredicate(const ConstantRange &CR1,
+ const ConstantRange &CR2);
+
+ /// If the comparison between constant ranges this and Other
+ /// is insensitive to the signedness of the comparison predicate,
+ /// return a predicate equivalent to \p Pred, with flipped signedness
+ /// (i.e. unsigned instead of signed or vice versa), and maybe inverted,
+ /// otherwise returns CmpInst::Predicate::BAD_ICMP_PREDICATE.
+ static CmpInst::Predicate
+ getEquivalentPredWithFlippedSignedness(CmpInst::Predicate Pred,
+ const ConstantRange &CR1,
+ const ConstantRange &CR2);
+
/// Produce the largest range containing all X such that "X BinOp Y" is
/// guaranteed not to wrap (overflow) for *all* Y in Other. However, there may
/// be *some* Y in Other for which additional X not contained in the result
@@ -167,6 +189,11 @@ public:
/// successful.
bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const;
+ /// Set up \p Pred, \p RHS and \p Offset such that (V + Offset) Pred RHS
+ /// is true iff V is in the range. Prefers using Offset == 0 if possible.
+ void
+ getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS, APInt &Offset) const;
+
/// Return the lower value for this range.
const APInt &getLower() const { return Lower; }
@@ -305,6 +332,14 @@ public:
ConstantRange unionWith(const ConstantRange &CR,
PreferredRangeType Type = Smallest) const;
+ /// Intersect the two ranges and return the result if it can be represented
+ /// exactly, otherwise return None.
+ Optional<ConstantRange> exactIntersectWith(const ConstantRange &CR) const;
+
+ /// Union the two ranges and return the result if it can be represented
+ /// exactly, otherwise return None.
+ Optional<ConstantRange> exactUnionWith(const ConstantRange &CR) const;
+
/// Return a new range representing the possible values resulting
/// from an application of the specified cast operator to this range. \p
/// BitWidth is the target bitwidth of the cast. For casts which don't
@@ -383,6 +418,11 @@ public:
/// treating both this and \p Other as unsigned ranges.
ConstantRange multiply(const ConstantRange &Other) const;
+ /// Return range of possible values for a signed multiplication of this and
+ /// \p Other. However, if overflow is possible always return a full range
+ /// rather than trying to determine a more precise result.
+ ConstantRange smul_fast(const ConstantRange &Other) const;
+
/// Return a new range representing the possible values resulting
/// from a signed maximum of a value in this range and a value in \p Other.
ConstantRange smax(const ConstantRange &Other) const;
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 1f716a45b70f..71414d95d9a3 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -191,19 +191,19 @@ public:
/// This is just a convenience method to make client code smaller for a
/// common code. It also correctly performs the comparison without the
/// potential for an assertion from getZExtValue().
- bool isZero() const { return Val.isNullValue(); }
+ bool isZero() const { return Val.isZero(); }
/// This is just a convenience method to make client code smaller for a
/// common case. It also correctly performs the comparison without the
/// potential for an assertion from getZExtValue().
/// Determine if the value is one.
- bool isOne() const { return Val.isOneValue(); }
+ bool isOne() const { return Val.isOne(); }
/// This function will return true iff every bit in this constant is set
/// to true.
/// @returns true iff this constant's bits are all set to true.
/// Determine if the value is all ones.
- bool isMinusOne() const { return Val.isAllOnesValue(); }
+ bool isMinusOne() const { return Val.isAllOnes(); }
/// This function will return true iff this constant represents the largest
/// value that may be represented by the constant's type.
@@ -1287,10 +1287,6 @@ public:
/// Return a string representation for an opcode.
const char *getOpcodeName() const;
- /// Return a constant expression identical to this one, but with the specified
- /// operand set to the specified value.
- Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
-
/// This returns the current constant expression with the operands replaced
/// with the specified values. The specified array must have the same number
/// of operands as our current one.
@@ -1312,13 +1308,14 @@ public:
Type *SrcTy = nullptr) const;
/// Returns an Instruction which implements the same operation as this
- /// ConstantExpr. The instruction is not linked to any basic block.
+ /// ConstantExpr. If \p InsertBefore is not null, the new instruction is
+ /// inserted before it, otherwise it is not inserted into any basic block.
///
/// A better approach to this could be to have a constructor for Instruction
/// which would take a ConstantExpr parameter, but that would have spread
/// implementation details of ConstantExpr outside of Constants.cpp, which
/// would make it harder to remove ConstantExprs altogether.
- Instruction *getAsInstruction() const;
+ Instruction *getAsInstruction(Instruction *InsertBefore = nullptr) const;
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index 23ac47ca4d81..61c6dd885980 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -181,7 +181,7 @@ namespace llvm {
DIFile *File);
/// Create a single enumerator value.
- DIEnumerator *createEnumerator(StringRef Name, APSInt Value);
+ DIEnumerator *createEnumerator(StringRef Name, const APSInt &Value);
DIEnumerator *createEnumerator(StringRef Name, uint64_t Val,
bool IsUnsigned = false);
@@ -219,11 +219,12 @@ namespace llvm {
/// \param AlignInBits Alignment. (optional)
/// \param DWARFAddressSpace DWARF address space. (optional)
/// \param Name Pointer type name. (optional)
- DIDerivedType *createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
- uint32_t AlignInBits = 0,
- Optional<unsigned> DWARFAddressSpace =
- None,
- StringRef Name = "");
+ /// \param Annotations Member annotations.
+ DIDerivedType *
+ createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
+ uint32_t AlignInBits = 0,
+ Optional<unsigned> DWARFAddressSpace = None,
+ StringRef Name = "", DINodeArray Annotations = nullptr);
/// Create debugging information entry for a pointer to member.
/// \param PointeeTy Type pointed to by this pointer.
@@ -250,9 +251,11 @@ namespace llvm {
/// \param LineNo Line number.
/// \param Context The surrounding context for the typedef.
/// \param AlignInBits Alignment. (optional)
+ /// \param Annotations Annotations. (optional)
DIDerivedType *createTypedef(DIType *Ty, StringRef Name, DIFile *File,
unsigned LineNo, DIScope *Context,
- uint32_t AlignInBits = 0);
+ uint32_t AlignInBits = 0,
+ DINodeArray Annotations = nullptr);
/// Create debugging information entry for a 'friend'.
DIDerivedType *createFriend(DIType *Ty, DIType *FriendTy);
@@ -279,12 +282,13 @@ namespace llvm {
/// \param OffsetInBits Member offset.
/// \param Flags Flags to encode member attribute, e.g. private
/// \param Ty Parent type.
+ /// \param Annotations Member annotations.
DIDerivedType *createMemberType(DIScope *Scope, StringRef Name,
DIFile *File, unsigned LineNo,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
+ uint64_t SizeInBits, uint32_t AlignInBits,
uint64_t OffsetInBits,
- DINode::DIFlags Flags, DIType *Ty);
+ DINode::DIFlags Flags, DIType *Ty,
+ DINodeArray Annotations = nullptr);
/// Create debugging information entry for a variant. A variant
/// normally should be a member of a variant part.
@@ -317,10 +321,14 @@ namespace llvm {
/// \param StorageOffsetInBits Member storage offset.
/// \param Flags Flags to encode member attribute.
/// \param Ty Parent type.
- DIDerivedType *createBitFieldMemberType(
- DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
- uint64_t SizeInBits, uint64_t OffsetInBits,
- uint64_t StorageOffsetInBits, DINode::DIFlags Flags, DIType *Ty);
+ /// \param Annotations Member annotations.
+ DIDerivedType *createBitFieldMemberType(DIScope *Scope, StringRef Name,
+ DIFile *File, unsigned LineNo,
+ uint64_t SizeInBits,
+ uint64_t OffsetInBits,
+ uint64_t StorageOffsetInBits,
+ DINode::DIFlags Flags, DIType *Ty,
+ DINodeArray Annotations = nullptr);
/// Create debugging information entry for a
/// C++ static data member.
@@ -586,7 +594,7 @@ namespace llvm {
unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
unsigned RuntimeLang = 0, uint64_t SizeInBits = 0,
uint32_t AlignInBits = 0, DINode::DIFlags Flags = DINode::FlagFwdDecl,
- StringRef UniqueIdentifier = "");
+ StringRef UniqueIdentifier = "", DINodeArray Annotations = nullptr);
/// Retain DIScope* in a module even if it is not referenced
/// through debug info anchors.
@@ -636,7 +644,8 @@ namespace llvm {
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
unsigned LineNo, DIType *Ty, bool IsLocalToUnit, bool isDefined = true,
DIExpression *Expr = nullptr, MDNode *Decl = nullptr,
- MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0);
+ MDTuple *TemplateParams = nullptr, uint32_t AlignInBits = 0,
+ DINodeArray Annotations = nullptr);
/// Identical to createGlobalVariable
/// except that the resulting DbgNode is temporary and meant to be RAUWed.
@@ -682,7 +691,8 @@ namespace llvm {
createParameterVariable(DIScope *Scope, StringRef Name, unsigned ArgNo,
DIFile *File, unsigned LineNo, DIType *Ty,
bool AlwaysPreserve = false,
- DINode::DIFlags Flags = DINode::FlagZero);
+ DINode::DIFlags Flags = DINode::FlagZero,
+ DINodeArray Annotations = nullptr);
/// Create a new descriptor for the specified
/// variable which has a complex address expression for its address.
@@ -711,6 +721,7 @@ namespace llvm {
/// \param SPFlags Additional flags specific to subprograms.
/// \param TParams Function template parameters.
/// \param ThrownTypes Exception types this function may throw.
+ /// \param Annotations Attribute Annotations.
DISubprogram *
createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
DIFile *File, unsigned LineNo, DISubroutineType *Ty,
@@ -718,7 +729,8 @@ namespace llvm {
DISubprogram::DISPFlags SPFlags = DISubprogram::SPFlagZero,
DITemplateParameterArray TParams = nullptr,
DISubprogram *Decl = nullptr,
- DITypeArray ThrownTypes = nullptr);
+ DITypeArray ThrownTypes = nullptr,
+ DINodeArray Annotations = nullptr);
/// Identical to createFunction,
/// except that the resulting DbgNode is meant to be RAUWed.
@@ -818,29 +830,35 @@ namespace llvm {
unsigned Line, unsigned Col);
/// Create a descriptor for an imported module.
- /// \param Context The scope this module is imported into
- /// \param NS The namespace being imported here.
- /// \param File File where the declaration is located.
- /// \param Line Line number of the declaration.
+ /// \param Context The scope this module is imported into
+ /// \param NS The namespace being imported here.
+ /// \param File File where the declaration is located.
+ /// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedModule(DIScope *Context, DINamespace *NS,
- DIFile *File, unsigned Line);
+ DIFile *File, unsigned Line,
+ DINodeArray Elements = nullptr);
/// Create a descriptor for an imported module.
/// \param Context The scope this module is imported into.
/// \param NS An aliased namespace.
/// \param File File where the declaration is located.
/// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedModule(DIScope *Context,
DIImportedEntity *NS, DIFile *File,
- unsigned Line);
+ unsigned Line,
+ DINodeArray Elements = nullptr);
/// Create a descriptor for an imported module.
- /// \param Context The scope this module is imported into.
- /// \param M The module being imported here
- /// \param File File where the declaration is located.
- /// \param Line Line number of the declaration.
+ /// \param Context The scope this module is imported into.
+ /// \param M The module being imported here
+ /// \param File File where the declaration is located.
+ /// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedModule(DIScope *Context, DIModule *M,
- DIFile *File, unsigned Line);
+ DIFile *File, unsigned Line,
+ DINodeArray Elements = nullptr);
/// Create a descriptor for an imported function.
/// \param Context The scope this module is imported into.
@@ -848,9 +866,11 @@ namespace llvm {
/// variable.
/// \param File File where the declaration is located.
/// \param Line Line number of the declaration.
+ /// \param Elements Renamed elements.
DIImportedEntity *createImportedDeclaration(DIScope *Context, DINode *Decl,
DIFile *File, unsigned Line,
- StringRef Name = "");
+ StringRef Name = "",
+ DINodeArray Elements = nullptr);
/// Insert a new llvm.dbg.declare intrinsic call.
/// \param Storage llvm::Value of the variable
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 300f73c12df0..46acd403bef1 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -19,6 +19,7 @@
#ifndef LLVM_IR_DATALAYOUT_H
#define LLVM_IR_DATALAYOUT_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -135,6 +136,7 @@ private:
MM_MachO,
MM_WinCOFF,
MM_WinCOFFX86,
+ MM_GOFF,
MM_Mips,
MM_XCOFF
};
@@ -316,6 +318,7 @@ public:
switch (ManglingMode) {
case MM_None:
case MM_ELF:
+ case MM_GOFF:
case MM_Mips:
case MM_WinCOFF:
case MM_XCOFF:
@@ -334,6 +337,8 @@ public:
case MM_ELF:
case MM_WinCOFF:
return ".L";
+ case MM_GOFF:
+ return "@";
case MM_Mips:
return "$";
case MM_MachO:
@@ -372,8 +377,8 @@ public:
/// the backends/clients are updated.
unsigned getPointerSize(unsigned AS = 0) const;
- /// Returns the maximum pointer size over all address spaces.
- unsigned getMaxPointerSize() const;
+ /// Returns the maximum index size over all address spaces.
+ unsigned getMaxIndexSize() const;
// Index size used for address calculation.
unsigned getIndexSize(unsigned AS) const;
@@ -405,9 +410,9 @@ public:
return getPointerSize(AS) * 8;
}
- /// Returns the maximum pointer size over all address spaces.
- unsigned getMaxPointerSizeInBits() const {
- return getMaxPointerSize() * 8;
+ /// Returns the maximum index size over all address spaces.
+ unsigned getMaxIndexSizeInBits() const {
+ return getMaxIndexSize() * 8;
}
/// Size in bits of index used for address calculation in getelementptr.
@@ -514,7 +519,7 @@ public:
/// Returns the minimum ABI-required alignment for the specified type.
/// FIXME: Deprecate this function once migration to Align is over.
- unsigned getABITypeAlignment(Type *Ty) const;
+ uint64_t getABITypeAlignment(Type *Ty) const;
/// Returns the minimum ABI-required alignment for the specified type.
Align getABITypeAlign(Type *Ty) const;
@@ -537,7 +542,7 @@ public:
///
/// This is always at least as good as the ABI alignment.
/// FIXME: Deprecate this function once migration to Align is over.
- unsigned getPrefTypeAlignment(Type *Ty) const;
+ uint64_t getPrefTypeAlignment(Type *Ty) const;
/// Returns the preferred stack/global alignment for the specified
/// type.
@@ -579,6 +584,10 @@ public:
/// This is used to implement getelementptr.
int64_t getIndexedOffsetInType(Type *ElemTy, ArrayRef<Value *> Indices) const;
+ /// Get GEP indices to access Offset inside ElemTy. ElemTy is updated to be
+ /// the result element type and Offset to be the residual offset.
+ SmallVector<APInt> getGEPIndicesForOffset(Type *&ElemTy, APInt &Offset) const;
+
/// Returns a StructLayout object, indicating the alignment of the
/// struct, its size, and the offsets of its fields.
///
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index eba422a9fde6..730c69d0c622 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -106,8 +106,6 @@ public:
void reset();
private:
- void InitializeTypeMap(const Module &M);
-
void processCompileUnit(DICompileUnit *CU);
void processScope(DIScope *Scope);
void processType(DIType *DT);
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 20a032f04909..c04f07c534af 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -256,11 +256,13 @@ class GenericDINode : public DINode {
public:
unsigned getHash() const { return SubclassData32; }
- DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, StringRef Header,
- ArrayRef<Metadata *> DwarfOps),
+ DEFINE_MDNODE_GET(GenericDINode,
+ (unsigned Tag, StringRef Header,
+ ArrayRef<Metadata *> DwarfOps),
(Tag, Header, DwarfOps))
- DEFINE_MDNODE_GET(GenericDINode, (unsigned Tag, MDString *Header,
- ArrayRef<Metadata *> DwarfOps),
+ DEFINE_MDNODE_GET(GenericDINode,
+ (unsigned Tag, MDString *Header,
+ ArrayRef<Metadata *> DwarfOps),
(Tag, Header, DwarfOps))
/// Return a (temporary) clone of this.
@@ -324,7 +326,7 @@ public:
DEFINE_MDNODE_GET(DISubrange, (int64_t Count, int64_t LowerBound = 0),
(Count, LowerBound))
- DEFINE_MDNODE_GET(DISubrange, (Metadata *CountNode, int64_t LowerBound = 0),
+ DEFINE_MDNODE_GET(DISubrange, (Metadata * CountNode, int64_t LowerBound = 0),
(CountNode, LowerBound))
DEFINE_MDNODE_GET(DISubrange,
@@ -334,9 +336,7 @@ public:
TempDISubrange clone() const { return cloneImpl(); }
- Metadata *getRawCountNode() const {
- return getOperand(0).get();
- }
+ Metadata *getRawCountNode() const { return getOperand(0).get(); }
Metadata *getRawLowerBound() const { return getOperand(1).get(); }
@@ -548,14 +548,13 @@ public:
};
/// A single checksum, represented by a \a Kind and a \a Value (a string).
- template <typename T>
- struct ChecksumInfo {
+ template <typename T> struct ChecksumInfo {
/// The kind of checksum which \a Value encodes.
ChecksumKind Kind;
/// The string value of the checksum.
T Value;
- ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) { }
+ ChecksumInfo(ChecksumKind Kind, T Value) : Kind(Kind), Value(Value) {}
~ChecksumInfo() = default;
bool operator==(const ChecksumInfo<T> &X) const {
return Kind == X.Kind && Value == X.Value;
@@ -578,15 +577,17 @@ private:
static DIFile *getImpl(LLVMContext &Context, StringRef Filename,
StringRef Directory,
Optional<ChecksumInfo<StringRef>> CS,
- Optional<StringRef> Source,
- StorageType Storage, bool ShouldCreate = true) {
+ Optional<StringRef> Source, StorageType Storage,
+ bool ShouldCreate = true) {
Optional<ChecksumInfo<MDString *>> MDChecksum;
if (CS)
MDChecksum.emplace(CS->Kind, getCanonicalMDString(Context, CS->Value));
- return getImpl(Context, getCanonicalMDString(Context, Filename),
- getCanonicalMDString(Context, Directory), MDChecksum,
- Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source)) : None,
- Storage, ShouldCreate);
+ return getImpl(
+ Context, getCanonicalMDString(Context, Filename),
+ getCanonicalMDString(Context, Directory), MDChecksum,
+ Source ? Optional<MDString *>(getCanonicalMDString(Context, *Source))
+ : None,
+ Storage, ShouldCreate);
}
static DIFile *getImpl(LLVMContext &Context, MDString *Filename,
MDString *Directory,
@@ -600,13 +601,15 @@ private:
}
public:
- DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory,
- Optional<ChecksumInfo<StringRef>> CS = None,
- Optional<StringRef> Source = None),
+ DEFINE_MDNODE_GET(DIFile,
+ (StringRef Filename, StringRef Directory,
+ Optional<ChecksumInfo<StringRef>> CS = None,
+ Optional<StringRef> Source = None),
(Filename, Directory, CS, Source))
- DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory,
- Optional<ChecksumInfo<MDString *>> CS = None,
- Optional<MDString *> Source = None),
+ DEFINE_MDNODE_GET(DIFile,
+ (MDString * Filename, MDString *Directory,
+ Optional<ChecksumInfo<MDString *>> CS = None,
+ Optional<MDString *> Source = None),
(Filename, Directory, CS, Source))
TempDIFile clone() const { return cloneImpl(); }
@@ -707,7 +710,6 @@ public:
DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
StringRef getName() const { return getStringOperand(2); }
-
Metadata *getRawScope() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -936,47 +938,48 @@ class DIDerivedType : public DIType {
unsigned Line, DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
- Metadata *ExtraData, StorageType Storage, bool ShouldCreate = true) {
+ Metadata *ExtraData, DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
Line, Scope, BaseType, SizeInBits, AlignInBits, OffsetInBits,
- DWARFAddressSpace, Flags, ExtraData, Storage, ShouldCreate);
- }
- static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
- MDString *Name, Metadata *File, unsigned Line,
- Metadata *Scope, Metadata *BaseType,
- uint64_t SizeInBits, uint32_t AlignInBits,
- uint64_t OffsetInBits,
- Optional<unsigned> DWARFAddressSpace,
- DIFlags Flags, Metadata *ExtraData,
- StorageType Storage, bool ShouldCreate = true);
+ DWARFAddressSpace, Flags, ExtraData, Annotations.get(),
+ Storage, ShouldCreate);
+ }
+ static DIDerivedType *
+ getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
+ unsigned Line, Metadata *Scope, Metadata *BaseType,
+ uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+ Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+ Metadata *ExtraData, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate = true);
TempDIDerivedType cloneImpl() const {
- return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(),
- getScope(), getBaseType(), getSizeInBits(),
- getAlignInBits(), getOffsetInBits(),
- getDWARFAddressSpace(), getFlags(), getExtraData());
+ return getTemporary(
+ getContext(), getTag(), getName(), getFile(), getLine(), getScope(),
+ getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(),
+ getDWARFAddressSpace(), getFlags(), getExtraData(), getAnnotations());
}
public:
- DEFINE_MDNODE_GET(DIDerivedType,
- (unsigned Tag, MDString *Name, Metadata *File,
- unsigned Line, Metadata *Scope, Metadata *BaseType,
- uint64_t SizeInBits, uint32_t AlignInBits,
- uint64_t OffsetInBits,
- Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
- Metadata *ExtraData = nullptr),
- (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
- AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
- ExtraData))
+ DEFINE_MDNODE_GET(
+ DIDerivedType,
+ (unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
+ Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
+ uint32_t AlignInBits, uint64_t OffsetInBits,
+ Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
+ Metadata *ExtraData = nullptr, Metadata *Annotations = nullptr),
+ (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
+ OffsetInBits, DWARFAddressSpace, Flags, ExtraData, Annotations))
DEFINE_MDNODE_GET(DIDerivedType,
(unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
DIScope *Scope, DIType *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, DIFlags Flags,
- Metadata *ExtraData = nullptr),
+ Metadata *ExtraData = nullptr,
+ DINodeArray Annotations = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
- ExtraData))
+ ExtraData, Annotations))
TempDIDerivedType clone() const { return cloneImpl(); }
@@ -999,6 +1002,12 @@ public:
Metadata *getExtraData() const { return getRawExtraData(); }
Metadata *getRawExtraData() const { return getOperand(4); }
+ /// Get annotations associated with this derived type.
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
+ Metadata *getRawAnnotations() const { return getOperand(5); }
+
/// Get casted version of extra data.
/// @{
DIType *getClassType() const {
@@ -1065,8 +1074,8 @@ class DICompositeType : public DIType {
/// Change fields in place.
void mutate(unsigned Tag, unsigned Line, unsigned RuntimeLang,
- uint64_t SizeInBits, uint32_t AlignInBits,
- uint64_t OffsetInBits, DIFlags Flags) {
+ uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+ DIFlags Flags) {
assert(isDistinct() && "Only distinct nodes can mutate");
assert(getRawIdentifier() && "Only ODR-uniqued nodes should mutate");
this->RuntimeLang = RuntimeLang;
@@ -1081,13 +1090,14 @@ class DICompositeType : public DIType {
DITemplateParameterArray TemplateParams, StringRef Identifier,
DIDerivedType *Discriminator, Metadata *DataLocation,
Metadata *Associated, Metadata *Allocated, Metadata *Rank,
- StorageType Storage, bool ShouldCreate = true) {
+ DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(
Context, Tag, getCanonicalMDString(Context, Name), File, Line, Scope,
BaseType, SizeInBits, AlignInBits, OffsetInBits, Flags, Elements.get(),
RuntimeLang, VTableHolder, TemplateParams.get(),
getCanonicalMDString(Context, Identifier), Discriminator, DataLocation,
- Associated, Allocated, Rank, Storage, ShouldCreate);
+ Associated, Allocated, Rank, Annotations.get(), Storage, ShouldCreate);
}
static DICompositeType *
getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
@@ -1097,16 +1107,16 @@ class DICompositeType : public DIType {
Metadata *VTableHolder, Metadata *TemplateParams,
MDString *Identifier, Metadata *Discriminator, Metadata *DataLocation,
Metadata *Associated, Metadata *Allocated, Metadata *Rank,
- StorageType Storage, bool ShouldCreate = true);
+ Metadata *Annotations, StorageType Storage, bool ShouldCreate = true);
TempDICompositeType cloneImpl() const {
- return getTemporary(getContext(), getTag(), getName(), getFile(), getLine(),
- getScope(), getBaseType(), getSizeInBits(),
- getAlignInBits(), getOffsetInBits(), getFlags(),
- getElements(), getRuntimeLang(), getVTableHolder(),
- getTemplateParams(), getIdentifier(),
- getDiscriminator(), getRawDataLocation(),
- getRawAssociated(), getRawAllocated(), getRawRank());
+ return getTemporary(
+ getContext(), getTag(), getName(), getFile(), getLine(), getScope(),
+ getBaseType(), getSizeInBits(), getAlignInBits(), getOffsetInBits(),
+ getFlags(), getElements(), getRuntimeLang(), getVTableHolder(),
+ getTemplateParams(), getIdentifier(), getDiscriminator(),
+ getRawDataLocation(), getRawAssociated(), getRawAllocated(),
+ getRawRank(), getAnnotations());
}
public:
@@ -1119,10 +1129,12 @@ public:
DITemplateParameterArray TemplateParams = nullptr,
StringRef Identifier = "", DIDerivedType *Discriminator = nullptr,
Metadata *DataLocation = nullptr, Metadata *Associated = nullptr,
- Metadata *Allocated = nullptr, Metadata *Rank = nullptr),
+ Metadata *Allocated = nullptr, Metadata *Rank = nullptr,
+ DINodeArray Annotations = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
+ Identifier, Discriminator, DataLocation, Associated, Allocated, Rank,
+ Annotations))
DEFINE_MDNODE_GET(
DICompositeType,
(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
@@ -1132,10 +1144,11 @@ public:
Metadata *TemplateParams = nullptr, MDString *Identifier = nullptr,
Metadata *Discriminator = nullptr, Metadata *DataLocation = nullptr,
Metadata *Associated = nullptr, Metadata *Allocated = nullptr,
- Metadata *Rank = nullptr),
+ Metadata *Rank = nullptr, Metadata *Annotations = nullptr),
(Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation, Associated, Allocated, Rank))
+ Identifier, Discriminator, DataLocation, Associated, Allocated, Rank,
+ Annotations))
TempDICompositeType clone() const { return cloneImpl(); }
@@ -1154,7 +1167,7 @@ public:
unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank);
+ Metadata *Rank, Metadata *Annotations);
static DICompositeType *getODRTypeIfExists(LLVMContext &Context,
MDString &Identifier);
@@ -1175,7 +1188,7 @@ public:
unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated,
- Metadata *Allocated, Metadata *Rank);
+ Metadata *Allocated, Metadata *Rank, Metadata *Annotations);
DIType *getBaseType() const { return cast_or_null<DIType>(getRawBaseType()); }
DINodeArray getElements() const {
@@ -1196,7 +1209,9 @@ public:
Metadata *getRawTemplateParams() const { return getOperand(6); }
MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
Metadata *getRawDiscriminator() const { return getOperand(8); }
- DIDerivedType *getDiscriminator() const { return getOperandAs<DIDerivedType>(8); }
+ DIDerivedType *getDiscriminator() const {
+ return getOperandAs<DIDerivedType>(8);
+ }
Metadata *getRawDataLocation() const { return getOperand(9); }
DIVariable *getDataLocation() const {
return dyn_cast_or_null<DIVariable>(getRawDataLocation());
@@ -1228,6 +1243,11 @@ public:
return dyn_cast_or_null<DIExpression>(getRawRank());
}
+ Metadata *getRawAnnotations() const { return getOperand(13); }
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
+
/// Replace operands.
///
/// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
@@ -1507,9 +1527,7 @@ public:
void replaceEnumTypes(DICompositeTypeArray N) {
replaceOperandWith(4, N.get());
}
- void replaceRetainedTypes(DITypeArray N) {
- replaceOperandWith(5, N.get());
- }
+ void replaceRetainedTypes(DITypeArray N) { replaceOperandWith(5, N.get()); }
void replaceGlobalVariables(DIGlobalVariableExpressionArray N) {
replaceOperandWith(6, N.get());
}
@@ -1691,7 +1709,8 @@ public:
/// base discriminator is set in the new DILocation, the other encoded values
/// are elided.
/// If the discriminator cannot be encoded, the function returns None.
- inline Optional<const DILocation *> cloneWithBaseDiscriminator(unsigned BD) const;
+ inline Optional<const DILocation *>
+ cloneWithBaseDiscriminator(unsigned BD) const;
/// Returns the duplication factor stored in the discriminator, or 1 if no
/// duplication factor (or 0) is encoded.
@@ -1707,7 +1726,8 @@ public:
/// duplication factor encoded in the discriminator. The current duplication
/// factor is as defined by getDuplicationFactor().
/// Returns None if encoding failed.
- inline Optional<const DILocation *> cloneByMultiplyingDuplicationFactor(unsigned DF) const;
+ inline Optional<const DILocation *>
+ cloneByMultiplyingDuplicationFactor(unsigned DF) const;
/// When two instructions are combined into a single instruction we also
/// need to combine the original locations into a single location.
@@ -1730,8 +1750,8 @@ public:
/// This function applies getMergedLocation() repeatedly left-to-right.
///
/// \p Locs: The locations to be merged.
- static
- const DILocation *getMergedLocations(ArrayRef<const DILocation *> Locs);
+ static const DILocation *
+ getMergedLocations(ArrayRef<const DILocation *> Locs);
/// Return the masked discriminator value for an input discrimnator value D
/// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
@@ -1755,13 +1775,18 @@ public:
/// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
/// have certain special case behavior (e.g. treating empty duplication factor
/// as the value '1').
- /// This API, in conjunction with cloneWithDiscriminator, may be used to encode
- /// the raw values provided. \p BD: base discriminator \p DF: duplication factor
+ /// This API, in conjunction with cloneWithDiscriminator, may be used to
+ /// encode the raw values provided.
+ ///
+ /// \p BD: base discriminator
+ /// \p DF: duplication factor
/// \p CI: copy index
+ ///
/// The return is None if the values cannot be encoded in 32 bits - for
- /// example, values for BD or DF larger than 12 bits. Otherwise, the return
- /// is the encoded value.
- static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI);
+ /// example, values for BD or DF larger than 12 bits. Otherwise, the return is
+ /// the encoded value.
+ static Optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF,
+ unsigned CI);
/// Raw decoder for values in an encoded discriminator D.
static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
@@ -1781,11 +1806,10 @@ public:
/// Returns the copy identifier for a given encoded discriminator \p D.
static unsigned getCopyIdentifierFromDiscriminator(unsigned D) {
- return getUnsignedFromPrefixEncoding(getNextComponentInDiscriminator(
- getNextComponentInDiscriminator(D)));
+ return getUnsignedFromPrefixEncoding(
+ getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
}
-
Metadata *getRawScope() const { return getOperand(0); }
Metadata *getRawInlinedAt() const {
if (getNumOperands() == 2)
@@ -1839,10 +1863,10 @@ public:
unsigned Virtuality = SPFlagNonvirtual,
bool IsMainSubprogram = false) {
// We're assuming virtuality is the low-order field.
- static_assert(
- int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
- int(SPFlagPureVirtual) == int(dwarf::DW_VIRTUALITY_pure_virtual),
- "Virtuality constant mismatch");
+ static_assert(int(SPFlagVirtual) == int(dwarf::DW_VIRTUALITY_virtual) &&
+ int(SPFlagPureVirtual) ==
+ int(dwarf::DW_VIRTUALITY_pure_virtual),
+ "Virtuality constant mismatch");
return static_cast<DISPFlags>(
(Virtuality & SPFlagVirtuality) |
(IsLocalToUnit ? SPFlagLocalToUnit : SPFlagZero) |
@@ -1874,23 +1898,23 @@ private:
DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
DINodeArray RetainedNodes, DITypeArray ThrownTypes,
- StorageType Storage, bool ShouldCreate = true) {
+ DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
ScopeLine, ContainingType, VirtualIndex, ThisAdjustment,
Flags, SPFlags, Unit, TemplateParams.get(), Declaration,
- RetainedNodes.get(), ThrownTypes.get(), Storage,
- ShouldCreate);
+ RetainedNodes.get(), ThrownTypes.get(), Annotations.get(),
+ Storage, ShouldCreate);
}
- static DISubprogram *getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, MDString *LinkageName,
- Metadata *File, unsigned Line, Metadata *Type,
- unsigned ScopeLine, Metadata *ContainingType,
- unsigned VirtualIndex, int ThisAdjustment,
- DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
- Metadata *TemplateParams, Metadata *Declaration,
- Metadata *RetainedNodes, Metadata *ThrownTypes,
- StorageType Storage, bool ShouldCreate = true);
+ static DISubprogram *
+ getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
+ unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
+ int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
+ Metadata *TemplateParams, Metadata *Declaration,
+ Metadata *RetainedNodes, Metadata *ThrownTypes, Metadata *Annotations,
+ StorageType Storage, bool ShouldCreate = true);
TempDISubprogram cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
@@ -1898,7 +1922,7 @@ private:
getContainingType(), getVirtualIndex(),
getThisAdjustment(), getFlags(), getSPFlags(),
getUnit(), getTemplateParams(), getDeclaration(),
- getRetainedNodes(), getThrownTypes());
+ getRetainedNodes(), getThrownTypes(), getAnnotations());
}
public:
@@ -1910,10 +1934,10 @@ public:
DIFlags Flags, DISPFlags SPFlags, DICompileUnit *Unit,
DITemplateParameterArray TemplateParams = nullptr,
DISubprogram *Declaration = nullptr, DINodeArray RetainedNodes = nullptr,
- DITypeArray ThrownTypes = nullptr),
+ DITypeArray ThrownTypes = nullptr, DINodeArray Annotations = nullptr),
(Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
- Declaration, RetainedNodes, ThrownTypes))
+ Declaration, RetainedNodes, ThrownTypes, Annotations))
DEFINE_MDNODE_GET(
DISubprogram,
@@ -1922,10 +1946,11 @@ public:
Metadata *ContainingType, unsigned VirtualIndex, int ThisAdjustment,
DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
- Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr),
+ Metadata *RetainedNodes = nullptr, Metadata *ThrownTypes = nullptr,
+ Metadata *Annotations = nullptr),
(Scope, Name, LinkageName, File, Line, Type, ScopeLine, ContainingType,
VirtualIndex, ThisAdjustment, Flags, SPFlags, Unit, TemplateParams,
- Declaration, RetainedNodes, ThrownTypes))
+ Declaration, RetainedNodes, ThrownTypes, Annotations))
TempDISubprogram clone() const { return cloneImpl(); }
@@ -1942,7 +1967,10 @@ public:
unsigned getVirtualIndex() const { return VirtualIndex; }
int getThisAdjustment() const { return ThisAdjustment; }
unsigned getScopeLine() const { return ScopeLine; }
- void setScopeLine(unsigned L) { assert(isDistinct()); ScopeLine = L; }
+ void setScopeLine(unsigned L) {
+ assert(isDistinct());
+ ScopeLine = L;
+ }
DIFlags getFlags() const { return Flags; }
DISPFlags getSPFlags() const { return SPFlags; }
bool isLocalToUnit() const { return getSPFlags() & SPFlagLocalToUnit; }
@@ -2028,6 +2056,9 @@ public:
DITypeArray getThrownTypes() const {
return cast_or_null<MDTuple>(getRawThrownTypes());
}
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
Metadata *getRawScope() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -2045,6 +2076,9 @@ public:
Metadata *getRawThrownTypes() const {
return getNumOperands() > 10 ? getOperandAs<Metadata>(10) : nullptr;
}
+ Metadata *getRawAnnotations() const {
+ return getNumOperands() > 11 ? getOperandAs<Metadata>(11) : nullptr;
+ }
void replaceRawLinkageName(MDString *LinkageName) {
replaceOperandWith(3, LinkageName);
@@ -2112,11 +2146,13 @@ class DILexicalBlock : public DILexicalBlockBase {
}
public:
- DEFINE_MDNODE_GET(DILexicalBlock, (DILocalScope * Scope, DIFile *File,
- unsigned Line, unsigned Column),
+ DEFINE_MDNODE_GET(DILexicalBlock,
+ (DILocalScope * Scope, DIFile *File, unsigned Line,
+ unsigned Column),
(Scope, File, Line, Column))
- DEFINE_MDNODE_GET(DILexicalBlock, (Metadata * Scope, Metadata *File,
- unsigned Line, unsigned Column),
+ DEFINE_MDNODE_GET(DILexicalBlock,
+ (Metadata * Scope, Metadata *File, unsigned Line,
+ unsigned Column),
(Scope, File, Line, Column))
TempDILexicalBlock clone() const { return cloneImpl(); }
@@ -2161,8 +2197,9 @@ class DILexicalBlockFile : public DILexicalBlockBase {
}
public:
- DEFINE_MDNODE_GET(DILexicalBlockFile, (DILocalScope * Scope, DIFile *File,
- unsigned Discriminator),
+ DEFINE_MDNODE_GET(DILexicalBlockFile,
+ (DILocalScope * Scope, DIFile *File,
+ unsigned Discriminator),
(Scope, File, Discriminator))
DEFINE_MDNODE_GET(DILexicalBlockFile,
(Metadata * Scope, Metadata *File, unsigned Discriminator),
@@ -2212,7 +2249,8 @@ unsigned DILocation::getCopyIdentifier() const {
return getCopyIdentifierFromDiscriminator(getDiscriminator());
}
-Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
+Optional<const DILocation *>
+DILocation::cloneWithBaseDiscriminator(unsigned D) const {
unsigned BD, DF, CI;
if (EnableFSDiscriminator) {
@@ -2230,7 +2268,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
return None;
}
-Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *>
+DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
DF *= getDuplicationFactor();
@@ -2274,10 +2313,10 @@ class DINamespace : public DIScope {
public:
DEFINE_MDNODE_GET(DINamespace,
- (DIScope *Scope, StringRef Name, bool ExportSymbols),
+ (DIScope * Scope, StringRef Name, bool ExportSymbols),
(Scope, Name, ExportSymbols))
DEFINE_MDNODE_GET(DINamespace,
- (Metadata *Scope, MDString *Name, bool ExportSymbols),
+ (Metadata * Scope, MDString *Name, bool ExportSymbols),
(Scope, Name, ExportSymbols))
TempDINamespace clone() const { return cloneImpl(); }
@@ -2426,7 +2465,7 @@ public:
(StringRef Name, DIType *Type, bool IsDefault),
(Name, Type, IsDefault))
DEFINE_MDNODE_GET(DITemplateTypeParameter,
- (MDString *Name, Metadata *Type, bool IsDefault),
+ (MDString * Name, Metadata *Type, bool IsDefault),
(Name, Type, IsDefault))
TempDITemplateTypeParameter clone() const { return cloneImpl(); }
@@ -2819,7 +2858,8 @@ public:
/// \param OffsetInBits Offset of the piece in bits.
/// \param SizeInBits Size of the piece in bits.
/// \return Creating a fragment expression may fail if \c Expr
- /// contains arithmetic operations that would be truncated.
+ /// contains arithmetic operations that would be
+ /// truncated.
static Optional<DIExpression *>
createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits,
unsigned SizeInBits);
@@ -2876,6 +2916,12 @@ public:
return getNumElements() > 0 &&
getElement(0) == dwarf::DW_OP_LLVM_entry_value;
}
+
+ /// Try to shorten an expression with an initial constant operand.
+ /// Returns a new expression and constant on success, or the original
+ /// expression and constant on failure.
+ std::pair<DIExpression *, const ConstantInt *>
+ constantFold(const ConstantInt *CI);
};
inline bool operator==(const DIExpression::FragmentInfo &A,
@@ -2927,46 +2973,47 @@ class DIGlobalVariable : public DIVariable {
StringRef LinkageName, DIFile *File, unsigned Line, DIType *Type,
bool IsLocalToUnit, bool IsDefinition,
DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
- uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true) {
+ uint32_t AlignInBits, DINodeArray Annotations, StorageType Storage,
+ bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
getCanonicalMDString(Context, LinkageName), File, Line, Type,
IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration,
- cast_or_null<Metadata>(TemplateParams), AlignInBits, Storage,
- ShouldCreate);
+ cast_or_null<Metadata>(TemplateParams), AlignInBits,
+ Annotations.get(), Storage, ShouldCreate);
}
static DIGlobalVariable *
getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
- uint32_t AlignInBits, StorageType Storage, bool ShouldCreate = true);
+ uint32_t AlignInBits, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate = true);
TempDIGlobalVariable cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
getFile(), getLine(), getType(), isLocalToUnit(),
isDefinition(), getStaticDataMemberDeclaration(),
- getTemplateParams(), getAlignInBits());
+ getTemplateParams(), getAlignInBits(),
+ getAnnotations());
}
public:
- DEFINE_MDNODE_GET(DIGlobalVariable,
- (DIScope * Scope, StringRef Name, StringRef LinkageName,
- DIFile *File, unsigned Line, DIType *Type,
- bool IsLocalToUnit, bool IsDefinition,
- DIDerivedType *StaticDataMemberDeclaration,
- MDTuple *TemplateParams, uint32_t AlignInBits),
- (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, StaticDataMemberDeclaration, TemplateParams,
- AlignInBits))
- DEFINE_MDNODE_GET(DIGlobalVariable,
- (Metadata * Scope, MDString *Name, MDString *LinkageName,
- Metadata *File, unsigned Line, Metadata *Type,
- bool IsLocalToUnit, bool IsDefinition,
- Metadata *StaticDataMemberDeclaration,
- Metadata *TemplateParams, uint32_t AlignInBits),
- (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
- IsDefinition, StaticDataMemberDeclaration, TemplateParams,
- AlignInBits))
+ DEFINE_MDNODE_GET(
+ DIGlobalVariable,
+ (DIScope * Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+ unsigned Line, DIType *Type, bool IsLocalToUnit, bool IsDefinition,
+ DIDerivedType *StaticDataMemberDeclaration, MDTuple *TemplateParams,
+ uint32_t AlignInBits, DINodeArray Annotations),
+ (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+ StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations))
+ DEFINE_MDNODE_GET(
+ DIGlobalVariable,
+ (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
+ unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
+ Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
+ uint32_t AlignInBits, Metadata *Annotations),
+ (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+ StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations))
TempDIGlobalVariable clone() const { return cloneImpl(); }
@@ -2977,11 +3024,15 @@ public:
DIDerivedType *getStaticDataMemberDeclaration() const {
return cast_or_null<DIDerivedType>(getRawStaticDataMemberDeclaration());
}
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(6); }
Metadata *getRawTemplateParams() const { return getOperand(7); }
MDTuple *getTemplateParams() const { return getOperandAs<MDTuple>(7); }
+ Metadata *getRawAnnotations() const { return getOperand(8); }
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DIGlobalVariableKind;
@@ -2997,20 +3048,20 @@ class DICommonBlock : public DIScope {
DICommonBlock(LLVMContext &Context, StorageType Storage, unsigned LineNo,
ArrayRef<Metadata *> Ops)
: DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
- Ops), LineNo(LineNo) {}
+ Ops),
+ LineNo(LineNo) {}
static DICommonBlock *getImpl(LLVMContext &Context, DIScope *Scope,
DIGlobalVariable *Decl, StringRef Name,
DIFile *File, unsigned LineNo,
- StorageType Storage,
- bool ShouldCreate = true) {
+ StorageType Storage, bool ShouldCreate = true) {
return getImpl(Context, Scope, Decl, getCanonicalMDString(Context, Name),
File, LineNo, Storage, ShouldCreate);
}
static DICommonBlock *getImpl(LLVMContext &Context, Metadata *Scope,
Metadata *Decl, MDString *Name, Metadata *File,
- unsigned LineNo,
- StorageType Storage, bool ShouldCreate = true);
+ unsigned LineNo, StorageType Storage,
+ bool ShouldCreate = true);
TempDICommonBlock cloneImpl() const {
return getTemporary(getContext(), getScope(), getDecl(), getName(),
@@ -3019,11 +3070,11 @@ class DICommonBlock : public DIScope {
public:
DEFINE_MDNODE_GET(DICommonBlock,
- (DIScope *Scope, DIGlobalVariable *Decl, StringRef Name,
+ (DIScope * Scope, DIGlobalVariable *Decl, StringRef Name,
DIFile *File, unsigned LineNo),
(Scope, Decl, Name, File, LineNo))
DEFINE_MDNODE_GET(DICommonBlock,
- (Metadata *Scope, Metadata *Decl, MDString *Name,
+ (Metadata * Scope, Metadata *Decl, MDString *Name,
Metadata *File, unsigned LineNo),
(Scope, Decl, Name, File, LineNo))
@@ -3069,34 +3120,39 @@ class DILocalVariable : public DIVariable {
static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
StringRef Name, DIFile *File, unsigned Line,
DIType *Type, unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits, StorageType Storage,
+ uint32_t AlignInBits, DINodeArray Annotations,
+ StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
- Line, Type, Arg, Flags, AlignInBits, Storage, ShouldCreate);
+ Line, Type, Arg, Flags, AlignInBits, Annotations.get(),
+ Storage, ShouldCreate);
}
static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope,
MDString *Name, Metadata *File, unsigned Line,
Metadata *Type, unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits, StorageType Storage,
+ uint32_t AlignInBits, Metadata *Annotations,
+ StorageType Storage,
bool ShouldCreate = true);
TempDILocalVariable cloneImpl() const {
return getTemporary(getContext(), getScope(), getName(), getFile(),
getLine(), getType(), getArg(), getFlags(),
- getAlignInBits());
+ getAlignInBits(), getAnnotations());
}
public:
DEFINE_MDNODE_GET(DILocalVariable,
(DILocalScope * Scope, StringRef Name, DIFile *File,
unsigned Line, DIType *Type, unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits),
- (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+ uint32_t AlignInBits, DINodeArray Annotations),
+ (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits,
+ Annotations))
DEFINE_MDNODE_GET(DILocalVariable,
(Metadata * Scope, MDString *Name, Metadata *File,
- unsigned Line, Metadata *Type, unsigned Arg,
- DIFlags Flags, uint32_t AlignInBits),
- (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+ unsigned Line, Metadata *Type, unsigned Arg, DIFlags Flags,
+ uint32_t AlignInBits, Metadata *Annotations),
+ (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits,
+ Annotations))
TempDILocalVariable clone() const { return cloneImpl(); }
@@ -3111,6 +3167,11 @@ public:
unsigned getArg() const { return Arg; }
DIFlags getFlags() const { return Flags; }
+ DINodeArray getAnnotations() const {
+ return cast_or_null<MDTuple>(getRawAnnotations());
+ }
+ Metadata *getRawAnnotations() const { return getOperand(4); }
+
bool isArtificial() const { return getFlags() & FlagArtificial; }
bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
@@ -3141,16 +3202,14 @@ class DILabel : public DINode {
: DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
~DILabel() = default;
- static DILabel *getImpl(LLVMContext &Context, DIScope *Scope,
- StringRef Name, DIFile *File, unsigned Line,
- StorageType Storage,
+ static DILabel *getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
+ DIFile *File, unsigned Line, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
Line, Storage, ShouldCreate);
}
- static DILabel *getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, Metadata *File, unsigned Line,
- StorageType Storage,
+ static DILabel *getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ Metadata *File, unsigned Line, StorageType Storage,
bool ShouldCreate = true);
TempDILabel cloneImpl() const {
@@ -3295,31 +3354,33 @@ class DIImportedEntity : public DINode {
static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
DIScope *Scope, DINode *Entity, DIFile *File,
unsigned Line, StringRef Name,
- StorageType Storage,
+ DINodeArray Elements, StorageType Storage,
bool ShouldCreate = true) {
return getImpl(Context, Tag, Scope, Entity, File, Line,
- getCanonicalMDString(Context, Name), Storage, ShouldCreate);
+ getCanonicalMDString(Context, Name), Elements.get(), Storage,
+ ShouldCreate);
}
- static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
- Metadata *Scope, Metadata *Entity,
- Metadata *File, unsigned Line,
- MDString *Name, StorageType Storage,
- bool ShouldCreate = true);
+ static DIImportedEntity *
+ getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, Metadata *Entity,
+ Metadata *File, unsigned Line, MDString *Name, Metadata *Elements,
+ StorageType Storage, bool ShouldCreate = true);
TempDIImportedEntity cloneImpl() const {
return getTemporary(getContext(), getTag(), getScope(), getEntity(),
- getFile(), getLine(), getName());
+ getFile(), getLine(), getName(), getElements());
}
public:
DEFINE_MDNODE_GET(DIImportedEntity,
(unsigned Tag, DIScope *Scope, DINode *Entity, DIFile *File,
- unsigned Line, StringRef Name = ""),
- (Tag, Scope, Entity, File, Line, Name))
+ unsigned Line, StringRef Name = "",
+ DINodeArray Elements = nullptr),
+ (Tag, Scope, Entity, File, Line, Name, Elements))
DEFINE_MDNODE_GET(DIImportedEntity,
(unsigned Tag, Metadata *Scope, Metadata *Entity,
- Metadata *File, unsigned Line, MDString *Name),
- (Tag, Scope, Entity, File, Line, Name))
+ Metadata *File, unsigned Line, MDString *Name,
+ Metadata *Elements = nullptr),
+ (Tag, Scope, Entity, File, Line, Name, Elements))
TempDIImportedEntity clone() const { return cloneImpl(); }
@@ -3328,11 +3389,15 @@ public:
DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); }
StringRef getName() const { return getStringOperand(2); }
DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+ DINodeArray getElements() const {
+ return cast_or_null<MDTuple>(getRawElements());
+ }
Metadata *getRawScope() const { return getOperand(0); }
Metadata *getRawEntity() const { return getOperand(1); }
MDString *getRawName() const { return getOperandAs<MDString>(2); }
Metadata *getRawFile() const { return getOperand(3); }
+ Metadata *getRawElements() const { return getOperand(4); }
static bool classof(const Metadata *MD) {
return MD->getMetadataID() == DIImportedEntityKind;
@@ -3457,11 +3522,13 @@ class DIMacro : public DIMacroNode {
}
public:
- DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name,
- StringRef Value = ""),
+ DEFINE_MDNODE_GET(DIMacro,
+ (unsigned MIType, unsigned Line, StringRef Name,
+ StringRef Value = ""),
(MIType, Line, Name, Value))
- DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name,
- MDString *Value),
+ DEFINE_MDNODE_GET(DIMacro,
+ (unsigned MIType, unsigned Line, MDString *Name,
+ MDString *Value),
(MIType, Line, Name, Value))
TempDIMacro clone() const { return cloneImpl(); }
@@ -3508,11 +3575,13 @@ class DIMacroFile : public DIMacroNode {
}
public:
- DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File,
- DIMacroNodeArray Elements),
+ DEFINE_MDNODE_GET(DIMacroFile,
+ (unsigned MIType, unsigned Line, DIFile *File,
+ DIMacroNodeArray Elements),
(MIType, Line, File, Elements))
- DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line,
- Metadata *File, Metadata *Elements),
+ DEFINE_MDNODE_GET(DIMacroFile,
+ (unsigned MIType, unsigned Line, Metadata *File,
+ Metadata *Elements),
(MIType, Line, File, Elements))
TempDIMacroFile clone() const { return cloneImpl(); }
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index b68a912b5f70..8a1b26e699e3 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -49,10 +49,11 @@ public:
/// This enum is just used to hold constants we need for IntegerType.
enum {
MIN_INT_BITS = 1, ///< Minimum number of bits that can be specified
- MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified
+ MAX_INT_BITS = (1<<23) ///< Maximum number of bits that can be specified
///< Note that bit width is stored in the Type classes SubclassData field
- ///< which has 24 bits. This yields a maximum bit width of 16,777,215
- ///< bits.
+ ///< which has 24 bits. SelectionDAG type legalization can require a
+ ///< power of 2 IntegerType, so limit to the largest representable power
+ ///< of 2, 8388608.
};
/// This static method is the primary way of constructing an IntegerType.
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 5064f4f4edf7..73b0be43e136 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -33,6 +33,7 @@ namespace llvm {
// Forward declarations.
class DiagnosticPrinter;
+class CallInst;
class Function;
class Instruction;
class InstructionCost;
@@ -79,6 +80,7 @@ enum DiagnosticKind {
DK_PGOProfile,
DK_Unsupported,
DK_SrcMgr,
+ DK_DontCall,
DK_FirstPluginKind // Must be last value to work with
// getNextAvailablePluginDiagnosticKind
};
@@ -194,10 +196,9 @@ public:
/// \p The function that is concerned by this stack size diagnostic.
/// \p The computed stack size.
DiagnosticInfoResourceLimit(const Function &Fn, const char *ResourceName,
- uint64_t ResourceSize,
+ uint64_t ResourceSize, uint64_t ResourceLimit,
DiagnosticSeverity Severity = DS_Warning,
- DiagnosticKind Kind = DK_ResourceLimit,
- uint64_t ResourceLimit = 0)
+ DiagnosticKind Kind = DK_ResourceLimit)
: DiagnosticInfo(Kind, Severity), Fn(Fn), ResourceName(ResourceName),
ResourceSize(ResourceSize), ResourceLimit(ResourceLimit) {}
@@ -218,10 +219,10 @@ class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit {
void anchor() override;
public:
DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize,
- DiagnosticSeverity Severity = DS_Warning,
- uint64_t StackLimit = 0)
- : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize, Severity,
- DK_StackSize, StackLimit) {}
+ uint64_t StackLimit,
+ DiagnosticSeverity Severity = DS_Warning)
+ : DiagnosticInfoResourceLimit(Fn, "stack frame size", StackSize,
+ StackLimit, Severity, DK_StackSize) {}
uint64_t getStackSize() const { return getResourceSize(); }
uint64_t getStackLimit() const { return getResourceLimit(); }
@@ -1070,6 +1071,27 @@ public:
}
};
+void diagnoseDontCall(const CallInst &CI);
+
+class DiagnosticInfoDontCall : public DiagnosticInfo {
+ StringRef CalleeName;
+ StringRef Note;
+ unsigned LocCookie;
+
+public:
+ DiagnosticInfoDontCall(StringRef CalleeName, StringRef Note,
+ DiagnosticSeverity DS, unsigned LocCookie)
+ : DiagnosticInfo(DK_DontCall, DS), CalleeName(CalleeName), Note(Note),
+ LocCookie(LocCookie) {}
+ StringRef getFunctionName() const { return CalleeName; }
+ StringRef getNote() const { return Note; }
+ unsigned getLocCookie() const { return LocCookie; }
+ void print(DiagnosticPrinter &DP) const override;
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == DK_DontCall;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_IR_DIAGNOSTICINFO_H
diff --git a/llvm/include/llvm/IR/DiagnosticPrinter.h b/llvm/include/llvm/IR/DiagnosticPrinter.h
index 102932ceefa5..2df6fc3dfe73 100644
--- a/llvm/include/llvm/IR/DiagnosticPrinter.h
+++ b/llvm/include/llvm/IR/DiagnosticPrinter.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/DiagnosticPrinter.h - Diagnostic Printer ----*- C++ -*-===//
+//===- llvm/IR/DiagnosticPrinter.h - Diagnostic Printer ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index 4d140c3ad0f2..475355af5647 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -277,6 +277,12 @@ struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
+/// Enables verification of dominator trees.
+///
+/// This check is expensive and is disabled by default. `-verify-dom-info`
+/// allows selectively enabling the check without needing to recompile.
+extern bool VerifyDomInfo;
+
/// Legacy analysis pass which computes a \c DominatorTree.
class DominatorTreeWrapperPass : public FunctionPass {
DominatorTree DT;
diff --git a/llvm/include/llvm/IR/FPEnv.h b/llvm/include/llvm/IR/FPEnv.h
index 621540000b5c..bf435ec6d109 100644
--- a/llvm/include/llvm/IR/FPEnv.h
+++ b/llvm/include/llvm/IR/FPEnv.h
@@ -39,24 +39,30 @@ enum ExceptionBehavior : uint8_t {
/// Returns a valid RoundingMode enumerator when given a string
/// that is valid as input in constrained intrinsic rounding mode
/// metadata.
-Optional<RoundingMode> StrToRoundingMode(StringRef);
+Optional<RoundingMode> convertStrToRoundingMode(StringRef);
/// For any RoundingMode enumerator, returns a string valid as input in
/// constrained intrinsic rounding mode metadata.
-Optional<StringRef> RoundingModeToStr(RoundingMode);
+Optional<StringRef> convertRoundingModeToStr(RoundingMode);
/// Returns a valid ExceptionBehavior enumerator when given a string
/// valid as input in constrained intrinsic exception behavior metadata.
-Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef);
+Optional<fp::ExceptionBehavior> convertStrToExceptionBehavior(StringRef);
/// For any ExceptionBehavior enumerator, returns a string valid as
/// input in constrained intrinsic exception behavior metadata.
-Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior);
+Optional<StringRef> convertExceptionBehaviorToStr(fp::ExceptionBehavior);
/// Returns true if the exception handling behavior and rounding mode
/// match what is used in the default floating point environment.
inline bool isDefaultFPEnvironment(fp::ExceptionBehavior EB, RoundingMode RM) {
return EB == fp::ebIgnore && RM == RoundingMode::NearestTiesToEven;
}
+
+/// Returns true if the rounding mode RM may be QRM at compile time or
+/// at run time.
+inline bool canRoundingModeBe(RoundingMode RM, RoundingMode QRM) {
+ return RM == QRM || RM == RoundingMode::Dynamic;
+}
}
#endif
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index e0094e2afff2..669418eacbb0 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -48,6 +48,7 @@ typedef unsigned ID;
class AssemblyAnnotationWriter;
class Constant;
+struct DenormalMode;
class DISubprogram;
class LLVMContext;
class Module;
@@ -58,7 +59,8 @@ class User;
class BranchProbabilityInfo;
class BlockFrequencyInfo;
-class Function : public GlobalObject, public ilist_node<Function> {
+class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject,
+ public ilist_node<Function> {
public:
using BasicBlockListType = SymbolTableList<BasicBlock>;
@@ -245,72 +247,22 @@ public:
setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
}
- /// Return the attribute list for this Function.
- AttributeList getAttributes() const { return AttributeSets; }
-
- /// Set the attribute list for this Function.
- void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
-
- /// Add function attributes to this function.
- void addFnAttr(Attribute::AttrKind Kind) {
- addAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Add function attributes to this function.
- void addFnAttr(StringRef Kind, StringRef Val = StringRef()) {
- addAttribute(AttributeList::FunctionIndex,
- Attribute::get(getContext(), Kind, Val));
- }
-
- /// Add function attributes to this function.
- void addFnAttr(Attribute Attr) {
- addAttribute(AttributeList::FunctionIndex, Attr);
- }
-
- /// Remove function attributes from this function.
- void removeFnAttr(Attribute::AttrKind Kind) {
- removeAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Remove function attribute from this function.
- void removeFnAttr(StringRef Kind) {
- setAttributes(getAttributes().removeAttribute(
- getContext(), AttributeList::FunctionIndex, Kind));
- }
-
- /// A function will have the "coroutine.presplit" attribute if it's
- /// a coroutine and has not gone through full CoroSplit pass.
- bool isPresplitCoroutine() const {
- return hasFnAttribute("coroutine.presplit");
- }
-
- enum ProfileCountType { PCT_Invalid, PCT_Real, PCT_Synthetic };
+ enum ProfileCountType { PCT_Real, PCT_Synthetic };
/// Class to represent profile counts.
///
/// This class represents both real and synthetic profile counts.
class ProfileCount {
private:
- uint64_t Count;
- ProfileCountType PCT;
- static ProfileCount Invalid;
+ uint64_t Count = 0;
+ ProfileCountType PCT = PCT_Real;
public:
- ProfileCount() : Count(-1), PCT(PCT_Invalid) {}
ProfileCount(uint64_t Count, ProfileCountType PCT)
: Count(Count), PCT(PCT) {}
- bool hasValue() const { return PCT != PCT_Invalid; }
uint64_t getCount() const { return Count; }
ProfileCountType getType() const { return PCT; }
bool isSynthetic() const { return PCT == PCT_Synthetic; }
- explicit operator bool() { return hasValue(); }
- bool operator!() const { return !hasValue(); }
- // Update the count retaining the same profile count type.
- ProfileCount &setCount(uint64_t C) {
- Count = C;
- return *this;
- }
- static ProfileCount getInvalid() { return ProfileCount(-1, PCT_Invalid); }
};
/// Set the entry count for this function.
@@ -330,7 +282,7 @@ public:
///
/// Entry count is the number of times the function was executed.
/// When AllowSynthetic is false, only pgo_data will be returned.
- ProfileCount getEntryCount(bool AllowSynthetic = false) const;
+ Optional<ProfileCount> getEntryCount(bool AllowSynthetic = false) const;
/// Return true if the function is annotated with profile data.
///
@@ -351,43 +303,6 @@ public:
/// Get the section prefix for this function.
Optional<StringRef> getSectionPrefix() const;
- /// Return true if the function has the attribute.
- bool hasFnAttribute(Attribute::AttrKind Kind) const {
- return AttributeSets.hasFnAttribute(Kind);
- }
-
- /// Return true if the function has the attribute.
- bool hasFnAttribute(StringRef Kind) const {
- return AttributeSets.hasFnAttribute(Kind);
- }
-
- /// Return the attribute for the given attribute kind.
- Attribute getFnAttribute(Attribute::AttrKind Kind) const {
- return getAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Return the attribute for the given attribute kind.
- Attribute getFnAttribute(StringRef Kind) const {
- return getAttribute(AttributeList::FunctionIndex, Kind);
- }
-
- /// Return the stack alignment for the function.
- unsigned getFnStackAlignment() const {
- if (!hasFnAttribute(Attribute::StackAlignment))
- return 0;
- if (const auto MA =
- AttributeSets.getStackAlignment(AttributeList::FunctionIndex))
- return MA->value();
- return 0;
- }
-
- /// Return the stack alignment for the function.
- MaybeAlign getFnStackAlign() const {
- if (!hasFnAttribute(Attribute::StackAlignment))
- return None;
- return AttributeSets.getStackAlignment(AttributeList::FunctionIndex);
- }
-
/// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
/// to use during code generation.
bool hasGC() const {
@@ -397,17 +312,36 @@ public:
void setGC(std::string Str);
void clearGC();
- /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
- bool hasStackProtectorFnAttr() const;
+ /// Return the attribute list for this Function.
+ AttributeList getAttributes() const { return AttributeSets; }
- /// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute::AttrKind Kind);
+ /// Set the attribute list for this Function.
+ void setAttributes(AttributeList Attrs) { AttributeSets = Attrs; }
+ // TODO: remove non-AtIndex versions of these methods.
/// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute Attr);
+ void addAttributeAtIndex(unsigned i, Attribute Attr);
+
+ /// Add function attributes to this function.
+ void addFnAttr(Attribute::AttrKind Kind);
+
+ /// Add function attributes to this function.
+ void addFnAttr(StringRef Kind, StringRef Val = StringRef());
+
+ /// Add function attributes to this function.
+ void addFnAttr(Attribute Attr);
+
+ /// Add function attributes to this function.
+ void addFnAttrs(const AttrBuilder &Attrs);
- /// adds the attributes to the list of attributes.
- void addAttributes(unsigned i, const AttrBuilder &Attrs);
+ /// Add return value attributes to this function.
+ void addRetAttr(Attribute::AttrKind Kind);
+
+ /// Add return value attributes to this function.
+ void addRetAttr(Attribute Attr);
+
+ /// Add return value attributes to this function.
+ void addRetAttrs(const AttrBuilder &Attrs);
/// adds the attribute to the list of attributes for the given arg.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
@@ -419,13 +353,27 @@ public:
void addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, Attribute::AttrKind Kind);
+ void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind);
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, StringRef Kind);
+ void removeAttributeAtIndex(unsigned i, StringRef Kind);
+
+ /// Remove function attributes from this function.
+ void removeFnAttr(Attribute::AttrKind Kind);
+
+ /// Remove function attribute from this function.
+ void removeFnAttr(StringRef Kind);
+
+ void removeFnAttrs(const AttrBuilder &Attrs);
- /// removes the attributes from the list of attributes.
- void removeAttributes(unsigned i, const AttrBuilder &Attrs);
+ /// removes the attribute from the return value list of attributes.
+ void removeRetAttr(Attribute::AttrKind Kind);
+
+ /// removes the attribute from the return value list of attributes.
+ void removeRetAttr(StringRef Kind);
+
+ /// removes the attributes from the return value list of attributes.
+ void removeRetAttrs(const AttrBuilder &Attrs);
/// removes the attribute from the list of attributes.
void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind);
@@ -436,54 +384,57 @@ public:
/// removes the attribute from the list of attributes.
void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs);
- /// removes noundef and other attributes that imply undefined behavior if a
- /// `undef` or `poison` value is passed from the list of attributes.
- void removeParamUndefImplyingAttrs(unsigned ArgNo);
+ /// Return true if the function has the attribute.
+ bool hasFnAttribute(Attribute::AttrKind Kind) const;
- /// check if an attributes is in the list of attributes.
- bool hasAttribute(unsigned i, Attribute::AttrKind Kind) const {
- return getAttributes().hasAttribute(i, Kind);
- }
+ /// Return true if the function has the attribute.
+ bool hasFnAttribute(StringRef Kind) const;
- /// check if an attributes is in the list of attributes.
- bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return getAttributes().hasParamAttribute(ArgNo, Kind);
- }
+ /// check if an attribute is in the list of attributes for the return value.
+ bool hasRetAttribute(Attribute::AttrKind Kind) const;
- /// gets the specified attribute from the list of attributes.
- Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const {
- return getAttributes().getParamAttr(ArgNo, Kind);
- }
+ /// check if an attributes is in the list of attributes.
+ bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
/// gets the attribute from the list of attributes.
- Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
- return AttributeSets.getAttribute(i, Kind);
- }
+ Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const;
/// gets the attribute from the list of attributes.
- Attribute getAttribute(unsigned i, StringRef Kind) const {
- return AttributeSets.getAttribute(i, Kind);
+ Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const;
+
+ /// Return the attribute for the given attribute kind.
+ Attribute getFnAttribute(Attribute::AttrKind Kind) const;
+
+ /// Return the attribute for the given attribute kind.
+ Attribute getFnAttribute(StringRef Kind) const;
+
+ /// gets the specified attribute from the list of attributes.
+ Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
+
+ /// removes noundef and other attributes that imply undefined behavior if a
+ /// `undef` or `poison` value is passed from the list of attributes.
+ void removeParamUndefImplyingAttrs(unsigned ArgNo);
+
+ /// Return the stack alignment for the function.
+ MaybeAlign getFnStackAlign() const {
+ return AttributeSets.getFnStackAlignment();
}
- /// adds the dereferenceable attribute to the list of attributes.
- void addDereferenceableAttr(unsigned i, uint64_t Bytes);
+ /// Returns true if the function has ssp, sspstrong, or sspreq fn attrs.
+ bool hasStackProtectorFnAttr() const;
/// adds the dereferenceable attribute to the list of attributes for
/// the given arg.
void addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes);
/// adds the dereferenceable_or_null attribute to the list of
- /// attributes.
- void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
-
- /// adds the dereferenceable_or_null attribute to the list of
/// attributes for the given arg.
void addDereferenceableOrNullParamAttr(unsigned ArgNo, uint64_t Bytes);
/// Extract the alignment for a call or parameter (0=unknown).
/// FIXME: Remove this function once transition to Align is over.
/// Use getParamAlign() instead.
- unsigned getParamAlignment(unsigned ArgNo) const {
+ uint64_t getParamAlignment(unsigned ArgNo) const {
if (const auto MA = getParamAlign(ArgNo))
return MA->value();
return 0;
@@ -517,11 +468,9 @@ public:
return AttributeSets.getParamByRefType(ArgNo);
}
- /// Extract the number of dereferenceable bytes for a call or
- /// parameter (0=unknown).
- /// @param i AttributeList index, referring to a return value or argument.
- uint64_t getDereferenceableBytes(unsigned i) const {
- return AttributeSets.getDereferenceableBytes(i);
+ /// Extract the preallocated type for a parameter.
+ Type *getParamPreallocatedType(unsigned ArgNo) const {
+ return AttributeSets.getParamPreallocatedType(ArgNo);
}
/// Extract the number of dereferenceable bytes for a parameter.
@@ -530,13 +479,6 @@ public:
return AttributeSets.getParamDereferenceableBytes(ArgNo);
}
- /// Extract the number of dereferenceable_or_null bytes for a call or
- /// parameter (0=unknown).
- /// @param i AttributeList index, referring to a return value or argument.
- uint64_t getDereferenceableOrNullBytes(unsigned i) const {
- return AttributeSets.getDereferenceableOrNullBytes(i);
- }
-
/// Extract the number of dereferenceable_or_null bytes for a
/// parameter.
/// @param ArgNo AttributeList ArgNo, referring to an argument.
@@ -544,6 +486,12 @@ public:
return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
}
+ /// A function will have the "coroutine.presplit" attribute if it's
+ /// a coroutine and has not gone through full CoroSplit pass.
+ bool isPresplitCoroutine() const {
+ return hasFnAttribute("coroutine.presplit");
+ }
+
/// Determine if the function does not access memory.
bool doesNotAccessMemory() const {
return hasFnAttribute(Attribute::ReadNone);
@@ -692,19 +640,16 @@ public:
/// Determine if the function returns a structure through first
/// or second pointer argument.
bool hasStructRetAttr() const {
- return AttributeSets.hasParamAttribute(0, Attribute::StructRet) ||
- AttributeSets.hasParamAttribute(1, Attribute::StructRet);
+ return AttributeSets.hasParamAttr(0, Attribute::StructRet) ||
+ AttributeSets.hasParamAttr(1, Attribute::StructRet);
}
/// Determine if the parameter or return value is marked with NoAlias
/// attribute.
bool returnDoesNotAlias() const {
- return AttributeSets.hasAttribute(AttributeList::ReturnIndex,
- Attribute::NoAlias);
- }
- void setReturnDoesNotAlias() {
- addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ return AttributeSets.hasRetAttr(Attribute::NoAlias);
}
+ void setReturnDoesNotAlias() { addRetAttr(Attribute::NoAlias); }
/// Do not optimize this function (-O0).
bool hasOptNone() const { return hasFnAttribute(Attribute::OptimizeNone); }
@@ -904,13 +849,14 @@ public:
/// hasAddressTaken - returns true if there are any uses of this function
/// other than direct calls or invokes to it, or blockaddress expressions.
/// Optionally passes back an offending user for diagnostic purposes,
- /// ignores callback uses, assume like pointer annotation calls, and
- /// references in llvm.used and llvm.compiler.used variables.
- ///
+ /// ignores callback uses, assume like pointer annotation calls, references in
+ /// llvm.used and llvm.compiler.used variables, and operand bundle
+ /// "clang.arc.attachedcall".
bool hasAddressTaken(const User ** = nullptr,
bool IgnoreCallbackUses = false,
bool IgnoreAssumeLikeCalls = true,
- bool IngoreLLVMUsed = false) const;
+ bool IngoreLLVMUsed = false,
+ bool IgnoreARCAttachedCall = false) const;
/// isDefTriviallyDead - Return true if it is trivially safe to remove
/// this function definition from the module (because it isn't externally
diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h
index a69958d596c6..4fa8e3a8dcf4 100644
--- a/llvm/include/llvm/IR/GCStrategy.h
+++ b/llvm/include/llvm/IR/GCStrategy.h
@@ -131,6 +131,9 @@ public:
/// GCMetadataPrinterRegistery as well.
using GCRegistry = Registry<GCStrategy>;
+/// Lookup the GCStrategy object associated with the given gc name.
+std::unique_ptr<GCStrategy> getGCStrategy(const StringRef Name);
+
} // end namespace llvm
#endif // LLVM_IR_GCSTRATEGY_H
diff --git a/llvm/include/llvm/IR/GlobalAlias.h b/llvm/include/llvm/IR/GlobalAlias.h
index f2d9b9676ec9..01134448a8fa 100644
--- a/llvm/include/llvm/IR/GlobalAlias.h
+++ b/llvm/include/llvm/IR/GlobalAlias.h
@@ -15,7 +15,8 @@
#define LLVM_IR_GLOBALALIAS_H
#include "llvm/ADT/ilist_node.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Value.h"
namespace llvm {
@@ -24,8 +25,7 @@ class Twine;
class Module;
template <typename ValueSubClass> class SymbolTableListTraits;
-class GlobalAlias : public GlobalIndirectSymbol,
- public ilist_node<GlobalAlias> {
+class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
friend class SymbolTableListTraits<GlobalAlias>;
GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
@@ -58,6 +58,17 @@ public:
// Linkage, Type, Parent and AddressSpace taken from the Aliasee.
static GlobalAlias *create(const Twine &Name, GlobalValue *Aliasee);
+ // allocate space for exactly one operand
+ void *operator new(size_t S) { return User::operator new(S, 1); }
+ void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+ void copyAttributesFrom(const GlobalAlias *Src) {
+ GlobalValue::copyAttributesFrom(Src);
+ }
+
/// removeFromParent - This method unlinks 'this' from the containing module,
/// but does not delete it.
///
@@ -71,10 +82,14 @@ public:
/// These methods retrieve and set alias target.
void setAliasee(Constant *Aliasee);
const Constant *getAliasee() const {
- return getIndirectSymbol();
+ return static_cast<Constant *>(Op<0>().get());
}
- Constant *getAliasee() {
- return getIndirectSymbol();
+ Constant *getAliasee() { return static_cast<Constant *>(Op<0>().get()); }
+
+ const GlobalObject *getAliaseeObject() const;
+ GlobalObject *getAliaseeObject() {
+ return const_cast<GlobalObject *>(
+ static_cast<const GlobalAlias *>(this)->getAliaseeObject());
}
static bool isValidLinkage(LinkageTypes L) {
@@ -88,6 +103,12 @@ public:
}
};
+template <>
+struct OperandTraits<GlobalAlias>
+ : public FixedNumOperandTraits<GlobalAlias, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Constant)
+
} // end namespace llvm
#endif // LLVM_IR_GLOBALALIAS_H
diff --git a/llvm/include/llvm/IR/GlobalIFunc.h b/llvm/include/llvm/IR/GlobalIFunc.h
index ddd29c8a4a19..10088ee2fff4 100644
--- a/llvm/include/llvm/IR/GlobalIFunc.h
+++ b/llvm/include/llvm/IR/GlobalIFunc.h
@@ -18,7 +18,9 @@
#define LLVM_IR_GLOBALIFUNC_H
#include "llvm/ADT/ilist_node.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/OperandTraits.h"
#include "llvm/IR/Value.h"
namespace llvm {
@@ -29,8 +31,7 @@ class Module;
// Traits class for using GlobalIFunc in symbol table in Module.
template <typename ValueSubClass> class SymbolTableListTraits;
-class GlobalIFunc final : public GlobalIndirectSymbol,
- public ilist_node<GlobalIFunc> {
+class GlobalIFunc final : public GlobalObject, public ilist_node<GlobalIFunc> {
friend class SymbolTableListTraits<GlobalIFunc>;
GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
@@ -46,6 +47,17 @@ public:
LinkageTypes Linkage, const Twine &Name,
Constant *Resolver, Module *Parent);
+ // allocate space for exactly one operand
+ void *operator new(size_t S) { return User::operator new(S, 1); }
+ void operator delete(void *Ptr) { User::operator delete(Ptr); }
+
+ /// Provide fast operand accessors
+ DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+ void copyAttributesFrom(const GlobalIFunc *Src) {
+ GlobalObject::copyAttributesFrom(Src);
+ }
+
/// This method unlinks 'this' from the containing module, but does not
/// delete it.
void removeFromParent();
@@ -54,14 +66,22 @@ public:
void eraseFromParent();
/// These methods retrieve and set ifunc resolver function.
- void setResolver(Constant *Resolver) {
- setIndirectSymbol(Resolver);
- }
+ void setResolver(Constant *Resolver) { Op<0>().set(Resolver); }
const Constant *getResolver() const {
- return getIndirectSymbol();
+ return static_cast<Constant *>(Op<0>().get());
}
- Constant *getResolver() {
- return getIndirectSymbol();
+ Constant *getResolver() { return static_cast<Constant *>(Op<0>().get()); }
+
+ // Return the resolver function after peeling off potential ConstantExpr
+ // indirection.
+ const Function *getResolverFunction() const;
+ Function *getResolverFunction() {
+ return const_cast<Function *>(
+ static_cast<const GlobalIFunc *>(this)->getResolverFunction());
+ }
+
+ static FunctionType *getResolverFunctionType(Type *IFuncValTy) {
+ return FunctionType::get(IFuncValTy->getPointerTo(), false);
}
// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -70,6 +90,12 @@ public:
}
};
+template <>
+struct OperandTraits<GlobalIFunc>
+ : public FixedNumOperandTraits<GlobalIFunc, 1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIFunc, Constant)
+
} // end namespace llvm
#endif // LLVM_IR_GLOBALIFUNC_H
diff --git a/llvm/include/llvm/IR/GlobalIndirectSymbol.h b/llvm/include/llvm/IR/GlobalIndirectSymbol.h
deleted file mode 100644
index e45c7529885d..000000000000
--- a/llvm/include/llvm/IR/GlobalIndirectSymbol.h
+++ /dev/null
@@ -1,93 +0,0 @@
-//===- llvm/GlobalIndirectSymbol.h - GlobalIndirectSymbol class -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the GlobalIndirectSymbol class, which
-// is a base class for GlobalAlias and GlobalIFunc. It contains all common code
-// for aliases and ifuncs.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_GLOBALINDIRECTSYMBOL_H
-#define LLVM_IR_GLOBALINDIRECTSYMBOL_H
-
-#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/OperandTraits.h"
-#include "llvm/IR/User.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Casting.h"
-#include <cstddef>
-
-namespace llvm {
-
-class GlobalIndirectSymbol : public GlobalValue {
-protected:
- GlobalIndirectSymbol(Type *Ty, ValueTy VTy, unsigned AddressSpace,
- LinkageTypes Linkage, const Twine &Name, Constant *Symbol);
-
-public:
- GlobalIndirectSymbol(const GlobalIndirectSymbol &) = delete;
- GlobalIndirectSymbol &operator=(const GlobalIndirectSymbol &) = delete;
-
- // allocate space for exactly one operand
- void *operator new(size_t S) { return User::operator new(S, 1); }
- void operator delete(void *Ptr) { User::operator delete(Ptr); }
-
- /// Provide fast operand accessors
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
-
- void copyAttributesFrom(const GlobalValue *Src) {
- GlobalValue::copyAttributesFrom(Src);
- }
-
- /// These methods set and retrieve indirect symbol.
- void setIndirectSymbol(Constant *Symbol) {
- setOperand(0, Symbol);
- }
- const Constant *getIndirectSymbol() const {
- return getOperand(0);
- }
- Constant *getIndirectSymbol() {
- return const_cast<Constant *>(
- static_cast<const GlobalIndirectSymbol *>(this)->getIndirectSymbol());
- }
-
- const GlobalObject *getBaseObject() const;
- GlobalObject *getBaseObject() {
- return const_cast<GlobalObject *>(
- static_cast<const GlobalIndirectSymbol *>(this)->getBaseObject());
- }
-
- const GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) const {
- return dyn_cast<GlobalObject>(
- getIndirectSymbol()->stripAndAccumulateInBoundsConstantOffsets(DL,
- Offset));
- }
- GlobalObject *getBaseObject(const DataLayout &DL, APInt &Offset) {
- return const_cast<GlobalObject *>(
- static_cast<const GlobalIndirectSymbol *>(this)
- ->getBaseObject(DL, Offset));
- }
-
- // Methods for support type inquiry through isa, cast, and dyn_cast:
- static bool classof(const Value *V) {
- return V->getValueID() == Value::GlobalAliasVal ||
- V->getValueID() == Value::GlobalIFuncVal;
- }
-};
-
-template <>
-struct OperandTraits<GlobalIndirectSymbol> :
- public FixedNumOperandTraits<GlobalIndirectSymbol, 1> {
-};
-
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIndirectSymbol, Constant)
-
-} // end namespace llvm
-
-#endif // LLVM_IR_GLOBALINDIRECTSYMBOL_H
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 341fbec66080..e15cf718bb10 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -51,7 +51,7 @@ protected:
Comdat *ObjComdat;
enum {
- LastAlignmentBit = 4,
+ LastAlignmentBit = 5,
HasSectionHashEntryBit,
GlobalObjectBits,
@@ -68,7 +68,7 @@ public:
GlobalObject(const GlobalObject &) = delete;
/// FIXME: Remove this function once transition to Align is over.
- unsigned getAlignment() const {
+ uint64_t getAlignment() const {
MaybeAlign Align = getAlign();
return Align ? Align->value() : 0;
}
@@ -153,7 +153,8 @@ public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
return V->getValueID() == Value::FunctionVal ||
- V->getValueID() == Value::GlobalVariableVal;
+ V->getValueID() == Value::GlobalVariableVal ||
+ V->getValueID() == Value::GlobalIFuncVal;
}
private:
diff --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index cf704d1f2374..1818f2a8f3cc 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -302,11 +302,14 @@ public:
static bool isAvailableExternallyLinkage(LinkageTypes Linkage) {
return Linkage == AvailableExternallyLinkage;
}
+ static bool isLinkOnceAnyLinkage(LinkageTypes Linkage) {
+ return Linkage == LinkOnceAnyLinkage;
+ }
static bool isLinkOnceODRLinkage(LinkageTypes Linkage) {
return Linkage == LinkOnceODRLinkage;
}
static bool isLinkOnceLinkage(LinkageTypes Linkage) {
- return Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage;
+ return isLinkOnceAnyLinkage(Linkage) || isLinkOnceODRLinkage(Linkage);
}
static bool isWeakAnyLinkage(LinkageTypes Linkage) {
return Linkage == WeakAnyLinkage;
@@ -433,6 +436,9 @@ public:
return isAvailableExternallyLinkage(getLinkage());
}
bool hasLinkOnceLinkage() const { return isLinkOnceLinkage(getLinkage()); }
+ bool hasLinkOnceAnyLinkage() const {
+ return isLinkOnceAnyLinkage(getLinkage());
+ }
bool hasLinkOnceODRLinkage() const {
return isLinkOnceODRLinkage(getLinkage());
}
@@ -548,10 +554,10 @@ public:
return !(isDeclarationForLinker() || isWeakForLinker());
}
- const GlobalObject *getBaseObject() const;
- GlobalObject *getBaseObject() {
+ const GlobalObject *getAliaseeObject() const;
+ GlobalObject *getAliaseeObject() {
return const_cast<GlobalObject *>(
- static_cast<const GlobalValue *>(this)->getBaseObject());
+ static_cast<const GlobalValue *>(this)->getAliaseeObject());
}
/// Returns whether this is a reference to an absolute symbol.
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 8998ad0f94a9..b4e099e4ec20 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -316,7 +316,7 @@ public:
/// Set the exception handling to be used with constrained floating point
void setDefaultConstrainedExcept(fp::ExceptionBehavior NewExcept) {
#ifndef NDEBUG
- Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(NewExcept);
+ Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(NewExcept);
assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
#endif
DefaultConstrainedExcept = NewExcept;
@@ -325,7 +325,7 @@ public:
/// Set the rounding mode handling to be used with constrained floating point
void setDefaultConstrainedRounding(RoundingMode NewRounding) {
#ifndef NDEBUG
- Optional<StringRef> RoundingStr = RoundingModeToStr(NewRounding);
+ Optional<StringRef> RoundingStr = convertRoundingModeToStr(NewRounding);
assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
#endif
DefaultConstrainedRounding = NewRounding;
@@ -351,7 +351,7 @@ public:
}
void setConstrainedFPCallAttr(CallBase *I) {
- I->addAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
+ I->addFnAttr(Attribute::StrictFP);
}
void setDefaultOperandBundles(ArrayRef<OperandBundleDef> OpBundles) {
@@ -697,12 +697,16 @@ public:
MDNode *TBAAStructTag = nullptr, MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
- /// Create a vector fadd reduction intrinsic of the source vector.
- /// The first parameter is a scalar accumulator value for ordered reductions.
+ /// Create a sequential vector fadd reduction intrinsic of the source vector.
+ /// The first parameter is a scalar accumulator value. An unordered reduction
+ /// can be created by adding the reassoc fast-math flag to the resulting
+ /// sequential reduction.
CallInst *CreateFAddReduce(Value *Acc, Value *Src);
- /// Create a vector fmul reduction intrinsic of the source vector.
- /// The first parameter is a scalar accumulator value for ordered reductions.
+ /// Create a sequential vector fmul reduction intrinsic of the source vector.
+ /// The first parameter is a scalar accumulator value. An unordered reduction
+ /// can be created by adding the reassoc fast-math flag to the resulting
+ /// sequential reduction.
CallInst *CreateFMulReduce(Value *Acc, Value *Src);
/// Create a vector int add reduction intrinsic of the source vector.
@@ -1172,7 +1176,7 @@ private:
if (Rounding.hasValue())
UseRounding = Rounding.getValue();
- Optional<StringRef> RoundingStr = RoundingModeToStr(UseRounding);
+ Optional<StringRef> RoundingStr = convertRoundingModeToStr(UseRounding);
assert(RoundingStr.hasValue() && "Garbage strict rounding mode!");
auto *RoundingMDS = MDString::get(Context, RoundingStr.getValue());
@@ -1185,7 +1189,7 @@ private:
if (Except.hasValue())
UseExcept = Except.getValue();
- Optional<StringRef> ExceptStr = ExceptionBehaviorToStr(UseExcept);
+ Optional<StringRef> ExceptStr = convertExceptionBehaviorToStr(UseExcept);
assert(ExceptStr.hasValue() && "Garbage strict exception behavior!");
auto *ExceptMDS = MDString::get(Context, ExceptStr.getValue());
@@ -2448,6 +2452,16 @@ public:
return CreateExtractElement(Vec, getInt64(Idx), Name);
}
+ Value *CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx,
+ const Twine &Name = "") {
+ return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name);
+ }
+
+ Value *CreateInsertElement(Type *VecTy, Value *NewElt, uint64_t Idx,
+ const Twine &Name = "") {
+ return CreateInsertElement(PoisonValue::get(VecTy), NewElt, Idx, Name);
+ }
+
Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
const Twine &Name = "") {
if (auto *VC = dyn_cast<Constant>(Vec))
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index ef2c279ed455..143a87f4997d 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
@@ -755,6 +756,20 @@ public:
using PredicateField =
Bitfield::Element<Predicate, 0, 6, LAST_ICMP_PREDICATE>;
+ /// Returns the sequence of all FCmp predicates.
+ static auto FCmpPredicates() {
+ return enum_seq_inclusive(Predicate::FIRST_FCMP_PREDICATE,
+ Predicate::LAST_FCMP_PREDICATE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ /// Returns the sequence of all ICmp predicates.
+ static auto ICmpPredicates() {
+ return enum_seq_inclusive(Predicate::FIRST_ICMP_PREDICATE,
+ Predicate::LAST_ICMP_PREDICATE,
+ force_iteration_on_noniterable_enum);
+ }
+
protected:
CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
Value *LHS, Value *RHS, const Twine &Name = "",
@@ -1325,33 +1340,23 @@ public:
bool arg_empty() const { return arg_end() == arg_begin(); }
unsigned arg_size() const { return arg_end() - arg_begin(); }
- // Legacy API names that duplicate the above and will be removed once users
- // are migrated.
- iterator_range<User::op_iterator> arg_operands() {
- return make_range(arg_begin(), arg_end());
- }
- iterator_range<User::const_op_iterator> arg_operands() const {
- return make_range(arg_begin(), arg_end());
- }
- unsigned getNumArgOperands() const { return arg_size(); }
-
Value *getArgOperand(unsigned i) const {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
return getOperand(i);
}
void setArgOperand(unsigned i, Value *v) {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
setOperand(i, v);
}
/// Wrappers for getting the \c Use of a call argument.
const Use &getArgOperandUse(unsigned i) const {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
return User::getOperandUse(i);
}
Use &getArgOperandUse(unsigned i) {
- assert(i < getNumArgOperands() && "Out of bounds!");
+ assert(i < arg_size() && "Out of bounds!");
return User::getOperandUse(i);
}
@@ -1485,92 +1490,104 @@ public:
/// the attribute is allowed for the call.
bool hasFnAttr(StringRef Kind) const { return hasFnAttrImpl(Kind); }
+ // TODO: remove non-AtIndex versions of these methods.
/// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+ void addAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+ Attrs = Attrs.addAttributeAtIndex(getContext(), i, Kind);
}
/// adds the attribute to the list of attributes.
- void addAttribute(unsigned i, Attribute Attr) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Attr);
- setAttributes(PAL);
+ void addAttributeAtIndex(unsigned i, Attribute Attr) {
+ Attrs = Attrs.addAttributeAtIndex(getContext(), i, Attr);
+ }
+
+ /// Adds the attribute to the function.
+ void addFnAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.addFnAttribute(getContext(), Kind);
+ }
+
+ /// Adds the attribute to the function.
+ void addFnAttr(Attribute Attr) {
+ Attrs = Attrs.addFnAttribute(getContext(), Attr);
+ }
+
+ /// Adds the attribute to the return value.
+ void addRetAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.addRetAttribute(getContext(), Kind);
+ }
+
+ /// Adds the attribute to the return value.
+ void addRetAttr(Attribute Attr) {
+ Attrs = Attrs.addRetAttribute(getContext(), Attr);
}
/// Adds the attribute to the indicated argument
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Kind);
}
/// Adds the attribute to the indicated argument
void addParamAttr(unsigned ArgNo, Attribute Attr) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.addParamAttribute(getContext(), ArgNo, Attr);
}
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+ void removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+ Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind);
}
/// removes the attribute from the list of attributes.
- void removeAttribute(unsigned i, StringRef Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+ void removeAttributeAtIndex(unsigned i, StringRef Kind) {
+ Attrs = Attrs.removeAttributeAtIndex(getContext(), i, Kind);
+ }
+
+ /// Removes the attributes from the function
+ void removeFnAttrs(const AttrBuilder &AttrsToRemove) {
+ Attrs = Attrs.removeFnAttributes(getContext(), AttrsToRemove);
+ }
+
+ /// Removes the attribute from the function
+ void removeFnAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.removeFnAttribute(getContext(), Kind);
}
- void removeAttributes(unsigned i, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttributes(getContext(), i, Attrs);
- setAttributes(PAL);
+ /// Removes the attribute from the return value
+ void removeRetAttr(Attribute::AttrKind Kind) {
+ Attrs = Attrs.removeRetAttribute(getContext(), Kind);
+ }
+
+ /// Removes the attributes from the return value
+ void removeRetAttrs(const AttrBuilder &AttrsToRemove) {
+ Attrs = Attrs.removeRetAttributes(getContext(), AttrsToRemove);
}
/// Removes the attribute from the given argument
void removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind);
}
/// Removes the attribute from the given argument
void removeParamAttr(unsigned ArgNo, StringRef Kind) {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ assert(ArgNo < arg_size() && "Out of bounds");
+ Attrs = Attrs.removeParamAttribute(getContext(), ArgNo, Kind);
}
/// Removes the attributes from the given argument
- void removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs);
- setAttributes(PAL);
+ void removeParamAttrs(unsigned ArgNo, const AttrBuilder &AttrsToRemove) {
+ Attrs = Attrs.removeParamAttributes(getContext(), ArgNo, AttrsToRemove);
}
/// adds the dereferenceable attribute to the list of attributes.
- void addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+ void addDereferenceableParamAttr(unsigned i, uint64_t Bytes) {
+ Attrs = Attrs.addDereferenceableParamAttr(getContext(), i, Bytes);
}
- /// adds the dereferenceable_or_null attribute to the list of
- /// attributes.
- void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+ /// adds the dereferenceable attribute to the list of attributes.
+ void addDereferenceableRetAttr(uint64_t Bytes) {
+ Attrs = Attrs.addDereferenceableRetAttr(getContext(), Bytes);
}
/// Determine whether the return value has the given attribute.
@@ -1584,24 +1601,34 @@ public:
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const;
/// Get the attribute of a given kind at a position.
- Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
- return getAttributes().getAttribute(i, Kind);
+ Attribute getAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) const {
+ return getAttributes().getAttributeAtIndex(i, Kind);
}
/// Get the attribute of a given kind at a position.
- Attribute getAttribute(unsigned i, StringRef Kind) const {
- return getAttributes().getAttribute(i, Kind);
+ Attribute getAttributeAtIndex(unsigned i, StringRef Kind) const {
+ return getAttributes().getAttributeAtIndex(i, Kind);
+ }
+
+ /// Get the attribute of a given kind for the function.
+ Attribute getFnAttr(StringRef Kind) const {
+ return getAttributes().getFnAttr(Kind);
+ }
+
+ /// Get the attribute of a given kind for the function.
+ Attribute getFnAttr(Attribute::AttrKind Kind) const {
+ return getAttributes().getFnAttr(Kind);
}
/// Get the attribute of a given kind from a given arg
Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ assert(ArgNo < arg_size() && "Out of bounds");
return getAttributes().getParamAttr(ArgNo, Kind);
}
/// Get the attribute of a given kind from a given arg
Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
- assert(ArgNo < getNumArgOperands() && "Out of bounds");
+ assert(ArgNo < arg_size() && "Out of bounds");
return getAttributes().getParamAttr(ArgNo, Kind);
}
@@ -1609,42 +1636,35 @@ public:
/// A.
///
/// Data operands include call arguments and values used in operand bundles,
- /// but does not include the callee operand. This routine dispatches to the
- /// underlying AttributeList or the OperandBundleUser as appropriate.
+ /// but does not include the callee operand.
///
/// The index \p i is interpreted as
///
- /// \p i == Attribute::ReturnIndex -> the return value
- /// \p i in [1, arg_size + 1) -> argument number (\p i - 1)
- /// \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
- /// (\p i - 1) in the operand list.
+ /// \p i in [0, arg_size) -> argument number (\p i)
+ /// \p i in [arg_size, data_operand_size) -> bundle operand at index
+ /// (\p i) in the operand list.
bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const {
// Note that we have to add one because `i` isn't zero-indexed.
- assert(i < (getNumArgOperands() + getNumTotalBundleOperands() + 1) &&
+ assert(i < arg_size() + getNumTotalBundleOperands() &&
"Data operand index out of bounds!");
// The attribute A can either be directly specified, if the operand in
// question is a call argument; or be indirectly implied by the kind of its
// containing operand bundle, if the operand is a bundle operand.
- if (i == AttributeList::ReturnIndex)
- return hasRetAttr(Kind);
-
- // FIXME: Avoid these i - 1 calculations and update the API to use
- // zero-based indices.
- if (i < (getNumArgOperands() + 1))
- return paramHasAttr(i - 1, Kind);
+ if (i < arg_size())
+ return paramHasAttr(i, Kind);
- assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+ assert(hasOperandBundles() && i >= getBundleOperandsStartIndex() &&
"Must be either a call argument or an operand bundle!");
- return bundleOperandHasAttr(i - 1, Kind);
+ return bundleOperandHasAttr(i, Kind);
}
/// Determine whether this data operand is not captured.
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool doesNotCapture(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::NoCapture);
}
/// Determine whether this argument is passed by value.
@@ -1685,21 +1705,21 @@ public:
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool doesNotAccessMemory(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
}
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool onlyReadsMemory(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
- dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::ReadOnly) ||
+ dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
}
// FIXME: Once this API is no longer duplicated in `CallSite`, rename this to
// better indicate that this may return a conservative answer.
bool doesNotReadMemory(unsigned OpNo) const {
- return dataOperandHasImpliedAttr(OpNo + 1, Attribute::WriteOnly) ||
- dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
+ return dataOperandHasImpliedAttr(OpNo, Attribute::WriteOnly) ||
+ dataOperandHasImpliedAttr(OpNo, Attribute::ReadNone);
}
/// Extract the alignment of the return value.
@@ -1743,14 +1763,26 @@ public:
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
- uint64_t getDereferenceableBytes(unsigned i) const {
- return Attrs.getDereferenceableBytes(i);
+ uint64_t getRetDereferenceableBytes() const {
+ return Attrs.getRetDereferenceableBytes();
+ }
+
+ /// Extract the number of dereferenceable bytes for a call or
+ /// parameter (0=unknown).
+ uint64_t getParamDereferenceableBytes(unsigned i) const {
+ return Attrs.getParamDereferenceableBytes(i);
}
- /// Extract the number of dereferenceable_or_null bytes for a call or
+ /// Extract the number of dereferenceable_or_null bytes for a call
+ /// (0=unknown).
+ uint64_t getRetDereferenceableOrNullBytes() const {
+ return Attrs.getRetDereferenceableOrNullBytes();
+ }
+
+ /// Extract the number of dereferenceable_or_null bytes for a
/// parameter (0=unknown).
- uint64_t getDereferenceableOrNullBytes(unsigned i) const {
- return Attrs.getDereferenceableOrNullBytes(i);
+ uint64_t getParamDereferenceableOrNullBytes(unsigned i) const {
+ return Attrs.getParamDereferenceableOrNullBytes(i);
}
/// Return true if the return value is known to be not null.
@@ -1760,7 +1792,7 @@ public:
/// Determine if the return value is marked with NoAlias attribute.
bool returnDoesNotAlias() const {
- return Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ return Attrs.hasRetAttr(Attribute::NoAlias);
}
/// If one of the arguments has the 'returned' attribute, returns its
@@ -1779,40 +1811,30 @@ public:
/// Return true if the call should not be inlined.
bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
- void setIsNoInline() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
- }
+ void setIsNoInline() { addFnAttr(Attribute::NoInline); }
/// Determine if the call does not access memory.
bool doesNotAccessMemory() const { return hasFnAttr(Attribute::ReadNone); }
- void setDoesNotAccessMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
- }
+ void setDoesNotAccessMemory() { addFnAttr(Attribute::ReadNone); }
/// Determine if the call does not access or only reads memory.
bool onlyReadsMemory() const {
return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
}
- void setOnlyReadsMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReadOnly);
- }
+ void setOnlyReadsMemory() { addFnAttr(Attribute::ReadOnly); }
/// Determine if the call does not access or only writes memory.
bool doesNotReadMemory() const {
return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
}
- void setDoesNotReadMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::WriteOnly);
- }
+ void setDoesNotReadMemory() { addFnAttr(Attribute::WriteOnly); }
/// Determine if the call can access memmory only using pointers based
/// on its arguments.
bool onlyAccessesArgMemory() const {
return hasFnAttr(Attribute::ArgMemOnly);
}
- void setOnlyAccessesArgMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ArgMemOnly);
- }
+ void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }
/// Determine if the function may only access memory that is
/// inaccessible from the IR.
@@ -1820,7 +1842,7 @@ public:
return hasFnAttr(Attribute::InaccessibleMemOnly);
}
void setOnlyAccessesInaccessibleMemory() {
- addAttribute(AttributeList::FunctionIndex, Attribute::InaccessibleMemOnly);
+ addFnAttr(Attribute::InaccessibleMemOnly);
}
/// Determine if the function may only access memory that is
@@ -1829,49 +1851,36 @@ public:
return hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
void setOnlyAccessesInaccessibleMemOrArgMem() {
- addAttribute(AttributeList::FunctionIndex,
- Attribute::InaccessibleMemOrArgMemOnly);
+ addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
}
/// Determine if the call cannot return.
bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
- void setDoesNotReturn() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
- }
+ void setDoesNotReturn() { addFnAttr(Attribute::NoReturn); }
/// Determine if the call should not perform indirect branch tracking.
bool doesNoCfCheck() const { return hasFnAttr(Attribute::NoCfCheck); }
/// Determine if the call cannot unwind.
bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
- void setDoesNotThrow() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
- }
+ void setDoesNotThrow() { addFnAttr(Attribute::NoUnwind); }
/// Determine if the invoke cannot be duplicated.
bool cannotDuplicate() const { return hasFnAttr(Attribute::NoDuplicate); }
- void setCannotDuplicate() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoDuplicate);
- }
+ void setCannotDuplicate() { addFnAttr(Attribute::NoDuplicate); }
/// Determine if the call cannot be tail merged.
bool cannotMerge() const { return hasFnAttr(Attribute::NoMerge); }
- void setCannotMerge() {
- addAttribute(AttributeList::FunctionIndex, Attribute::NoMerge);
- }
+ void setCannotMerge() { addFnAttr(Attribute::NoMerge); }
/// Determine if the invoke is convergent
bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
- void setConvergent() {
- addAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
- }
- void setNotConvergent() {
- removeAttribute(AttributeList::FunctionIndex, Attribute::Convergent);
- }
+ void setConvergent() { addFnAttr(Attribute::Convergent); }
+ void setNotConvergent() { removeFnAttr(Attribute::Convergent); }
/// Determine if the call returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {
- if (getNumArgOperands() == 0)
+ if (arg_empty())
return false;
// Be friendly and also check the callee.
@@ -1918,6 +1927,13 @@ public:
Idx < getBundleOperandsEndIndex();
}
+ /// Return true if the operand at index \p Idx is a bundle operand that has
+ /// tag ID \p ID.
+ bool isOperandBundleOfType(uint32_t ID, unsigned Idx) const {
+ return isBundleOperand(Idx) &&
+ getOperandBundleForOperand(Idx).getTagID() == ID;
+ }
+
/// Returns true if the use is a bundle operand.
bool isBundleOperand(const Use *U) const {
assert(this == U->getUser() &&
@@ -2258,7 +2274,7 @@ private:
bool hasFnAttrOnCalledFunction(StringRef Kind) const;
template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
- if (Attrs.hasFnAttribute(Kind))
+ if (Attrs.hasFnAttr(Kind))
return true;
// Operand bundles override attributes on the called function, but don't
@@ -2272,12 +2288,12 @@ private:
/// Determine whether the return value has the given attribute. Supports
/// Attribute::AttrKind and StringRef as \p AttrKind types.
template <typename AttrKind> bool hasRetAttrImpl(AttrKind Kind) const {
- if (Attrs.hasAttribute(AttributeList::ReturnIndex, Kind))
+ if (Attrs.hasRetAttr(Kind))
return true;
// Look at the callee, if available.
if (const Function *F = getCalledFunction())
- return F->getAttributes().hasAttribute(AttributeList::ReturnIndex, Kind);
+ return F->getAttributes().hasRetAttr(Kind);
return false;
}
};
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index deb85cf277fe..9878082ffffa 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -59,11 +59,11 @@ protected:
// Template alias so that all Instruction storing alignment use the same
// definiton.
// Valid alignments are powers of two from 2^0 to 2^MaxAlignmentExponent =
- // 2^29. We store them as Log2(Alignment), so we need 5 bits to encode the 30
+ // 2^32. We store them as Log2(Alignment), so we need 6 bits to encode the 33
// possible values.
template <unsigned Offset>
using AlignmentBitfieldElementT =
- typename Bitfield::Element<unsigned, Offset, 5,
+ typename Bitfield::Element<unsigned, Offset, 6,
Value::MaxAlignmentExponent>;
template <unsigned Offset>
@@ -307,11 +307,6 @@ public:
Value::getAllMetadata(MDs);
}
- /// Fills the AAMDNodes structure with AA metadata from this instruction.
- /// When Merge is true, the existing AA metadata is merged with that from this
- /// instruction providing the most-general result.
- void getAAMetadata(AAMDNodes &N, bool Merge = false) const;
-
/// Set the metadata of the specified kind to the specified node. This updates
/// or replaces metadata if already present, or removes it if Node is null.
void setMetadata(unsigned KindID, MDNode *Node);
@@ -352,7 +347,10 @@ public:
/// to the existing node.
void addAnnotationMetadata(StringRef Annotation);
- /// Sets the metadata on this instruction from the AAMDNodes structure.
+ /// Returns the AA metadata for this instruction.
+ AAMDNodes getAAMetadata() const;
+
+ /// Sets the AA metadata on this instruction from the AAMDNodes structure.
void setAAMetadata(const AAMDNodes &N);
/// Retrieve the raw weight values of a conditional branch or select.
@@ -389,6 +387,10 @@ public:
/// Determine whether the no signed wrap flag is set.
bool hasNoSignedWrap() const;
+ /// Return true if this operator has flags which may cause this instruction
+ /// to evaluate to poison despite having non-poison inputs.
+ bool hasPoisonGeneratingFlags() const;
+
/// Drops flags that may cause this instruction to evaluate to poison despite
/// having non-poison inputs.
void dropPoisonGeneratingFlags();
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 0c43a56daa33..6d32a898b668 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -126,7 +126,7 @@ public:
}
// FIXME: Remove this one transition to Align is over.
- unsigned getAlignment() const { return getAlign().value(); }
+ uint64_t getAlignment() const { return getAlign().value(); }
/// Return true if this alloca is in the entry block of the function and is a
/// constant size. If so, the code generator will fold it into the
@@ -217,7 +217,7 @@ public:
/// Return the alignment of the access that is being performed.
/// FIXME: Remove this function once transition to Align is over.
/// Use getAlign() instead.
- unsigned getAlignment() const { return getAlign().value(); }
+ uint64_t getAlignment() const { return getAlign().value(); }
/// Return the alignment of the access that is being performed.
Align getAlign() const {
@@ -348,7 +348,7 @@ public:
/// Return the alignment of the access that is being performed
/// FIXME: Remove this function once transition to Align is over.
/// Use getAlign() instead.
- unsigned getAlignment() const { return getAlign().value(); }
+ uint64_t getAlignment() const { return getAlign().value(); }
Align getAlign() const {
return Align(1ULL << (getSubclassData<AlignmentField>()));
@@ -1339,6 +1339,10 @@ public:
return P == ICMP_SLE || P == ICMP_ULE;
}
+ /// Returns the sequence of all ICmp predicates.
+ ///
+ static auto predicates() { return ICmpPredicates(); }
+
/// Exchange the two operands to this instruction in such a way that it does
/// not modify the semantics of the instruction. The predicate value may be
/// changed to retain the same result if the predicate is order dependent
@@ -1349,6 +1353,10 @@ public:
Op<0>().swap(Op<1>());
}
+ /// Return result of `LHS Pred RHS` comparison.
+ static bool compare(const APInt &LHS, const APInt &RHS,
+ ICmpInst::Predicate Pred);
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ICmp;
@@ -1457,6 +1465,10 @@ public:
Op<0>().swap(Op<1>());
}
+ /// Returns the sequence of all FCmp predicates.
+ ///
+ static auto predicates() { return FCmpPredicates(); }
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::FCmp;
@@ -1685,9 +1697,7 @@ public:
/// Return true if the call can return twice
bool canReturnTwice() const { return hasFnAttr(Attribute::ReturnsTwice); }
- void setCanReturnTwice() {
- addAttribute(AttributeList::FunctionIndex, Attribute::ReturnsTwice);
- }
+ void setCanReturnTwice() { addFnAttr(Attribute::ReturnsTwice); }
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
@@ -2019,6 +2029,14 @@ protected:
ShuffleVectorInst *cloneImpl() const;
public:
+ ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr);
+ ShuffleVectorInst(Value *V1, Value *Mask, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
+ ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr = "",
+ Instruction *InsertBefore = nullptr);
+ ShuffleVectorInst(Value *V1, ArrayRef<int> Mask, const Twine &NameStr,
+ BasicBlock *InsertAtEnd);
ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
const Twine &NameStr = "",
Instruction *InsertBefor = nullptr);
@@ -2306,6 +2324,57 @@ public:
return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index);
}
+ /// Return true if this shuffle mask is an insert subvector mask.
+ /// A valid insert subvector mask inserts the lowest elements of a second
+ /// source operand into an in-place first source operand operand.
+ /// Both the sub vector width and the insertion index is returned.
+ static bool isInsertSubvectorMask(ArrayRef<int> Mask, int NumSrcElts,
+ int &NumSubElts, int &Index);
+ static bool isInsertSubvectorMask(const Constant *Mask, int NumSrcElts,
+ int &NumSubElts, int &Index) {
+ assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(Mask->getType()))
+ return false;
+ SmallVector<int, 16> MaskAsInts;
+ getShuffleMask(Mask, MaskAsInts);
+ return isInsertSubvectorMask(MaskAsInts, NumSrcElts, NumSubElts, Index);
+ }
+
+ /// Return true if this shuffle mask is an insert subvector mask.
+ bool isInsertSubvectorMask(int &NumSubElts, int &Index) const {
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ int NumSrcElts =
+ cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+ return isInsertSubvectorMask(ShuffleMask, NumSrcElts, NumSubElts, Index);
+ }
+
+ /// Return true if this shuffle mask replicates each of the \p VF elements
+ /// in a vector \p ReplicationFactor times.
+ /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
+ /// <0,0,0,1,1,1,2,2,2,3,3,3>
+ static bool isReplicationMask(ArrayRef<int> Mask, int &ReplicationFactor,
+ int &VF);
+ static bool isReplicationMask(const Constant *Mask, int &ReplicationFactor,
+ int &VF) {
+ assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant.");
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(Mask->getType()))
+ return false;
+ SmallVector<int, 16> MaskAsInts;
+ getShuffleMask(Mask, MaskAsInts);
+ return isReplicationMask(MaskAsInts, ReplicationFactor, VF);
+ }
+
+ /// Return true if this shuffle mask is a replication mask.
+ bool isReplicationMask(int &ReplicationFactor, int &VF) const;
+
/// Change values in a shuffle permute mask assuming the two vector operands
/// of length InVecNumElts have swapped position.
static void commuteShuffleMask(MutableArrayRef<int> Mask,
@@ -3281,14 +3350,14 @@ public:
CaseHandle(SwitchInst *SI, ptrdiff_t Index) : CaseHandleImpl(SI, Index) {}
/// Sets the new value for current case.
- void setValue(ConstantInt *V) {
+ void setValue(ConstantInt *V) const {
assert((unsigned)Index < SI->getNumCases() &&
"Index out the number of cases.");
SI->setOperand(2 + Index*2, reinterpret_cast<Value*>(V));
}
/// Sets the new successor for current case.
- void setSuccessor(BasicBlock *S) {
+ void setSuccessor(BasicBlock *S) const {
SI->setSuccessor(getSuccessorIndex(), S);
}
};
@@ -3297,7 +3366,7 @@ public:
class CaseIteratorImpl
: public iterator_facade_base<CaseIteratorImpl<CaseHandleT>,
std::random_access_iterator_tag,
- CaseHandleT> {
+ const CaseHandleT> {
using SwitchInstT = typename CaseHandleT::SwitchInstType;
CaseHandleT Case;
@@ -3356,7 +3425,6 @@ public:
assert(Case.SI == RHS.Case.SI && "Incompatible operators.");
return Case.Index < RHS.Case.Index;
}
- CaseHandleT &operator*() { return Case; }
const CaseHandleT &operator*() const { return Case; }
};
@@ -3446,15 +3514,12 @@ public:
/// default case iterator to indicate that it is handled by the default
/// handler.
CaseIt findCaseValue(const ConstantInt *C) {
- CaseIt I = llvm::find_if(
- cases(), [C](CaseHandle &Case) { return Case.getCaseValue() == C; });
- if (I != case_end())
- return I;
-
- return case_default();
+ return CaseIt(
+ this,
+ const_cast<const SwitchInst *>(this)->findCaseValue(C)->getCaseIndex());
}
ConstCaseIt findCaseValue(const ConstantInt *C) const {
- ConstCaseIt I = llvm::find_if(cases(), [C](ConstCaseHandle &Case) {
+ ConstCaseIt I = llvm::find_if(cases(), [C](const ConstCaseHandle &Case) {
return Case.getCaseValue() == C;
});
if (I != case_end())
@@ -4069,14 +4134,12 @@ public:
///
Value *getIndirectDestLabel(unsigned i) const {
assert(i < getNumIndirectDests() && "Out of bounds!");
- return getOperand(i + getNumArgOperands() + getNumTotalBundleOperands() +
- 1);
+ return getOperand(i + arg_size() + getNumTotalBundleOperands() + 1);
}
Value *getIndirectDestLabelUse(unsigned i) const {
assert(i < getNumIndirectDests() && "Out of bounds!");
- return getOperandUse(i + getNumArgOperands() + getNumTotalBundleOperands() +
- 1);
+ return getOperandUse(i + arg_size() + getNumTotalBundleOperands() + 1);
}
// Return the destination basic blocks...
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 6b42cb949050..d186029db8cf 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -448,6 +448,28 @@ public:
static Optional<unsigned> getFunctionalOpcodeForVP(Intrinsic::ID ID);
};
+/// This represents vector predication reduction intrinsics.
+class VPReductionIntrinsic : public VPIntrinsic {
+public:
+ static bool isVPReduction(Intrinsic::ID ID);
+
+ unsigned getStartParamPos() const;
+ unsigned getVectorParamPos() const;
+
+ static Optional<unsigned> getStartParamPos(Intrinsic::ID ID);
+ static Optional<unsigned> getVectorParamPos(Intrinsic::ID ID);
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ /// @{
+ static bool classof(const IntrinsicInst *I) {
+ return VPReductionIntrinsic::isVPReduction(I->getIntrinsicID());
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+ /// @}
+};
+
/// This is the common base class for constrained floating point intrinsics.
class ConstrainedFPIntrinsic : public IntrinsicInst {
public:
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 80a2f5a8cd3e..2ff48380ac28 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -140,7 +140,8 @@ namespace Intrinsic {
Subdivide2Argument,
Subdivide4Argument,
VecOfBitcastsToInt,
- AMX
+ AMX,
+ PPCQuad,
} Kind;
union {
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 28fcc13266b1..637e6d8f6cf5 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -312,6 +312,8 @@ def llvm_v1i128_ty : LLVMType<v1i128>; // 1 x i128
def llvm_v2f16_ty : LLVMType<v2f16>; // 2 x half (__fp16)
def llvm_v4f16_ty : LLVMType<v4f16>; // 4 x half (__fp16)
def llvm_v8f16_ty : LLVMType<v8f16>; // 8 x half (__fp16)
+def llvm_v16f16_ty : LLVMType<v16f16>; // 16 x half (__fp16)
+def llvm_v32f16_ty : LLVMType<v32f16>; // 32 x half (__fp16)
def llvm_v2bf16_ty : LLVMType<v2bf16>; // 2 x bfloat (__bf16)
def llvm_v4bf16_ty : LLVMType<v4bf16>; // 4 x bfloat (__bf16)
def llvm_v8bf16_ty : LLVMType<v8bf16>; // 8 x bfloat (__bf16)
@@ -1329,10 +1331,10 @@ def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, IntrWillReturn]>;
// The pseudoprobe intrinsic works as a place holder to the block it probes.
-// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
-// optimizer as having opaque side effects so that it won't be get rid of or moved
+// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
+// optimizer as having opaque side effects so that it won't be get rid of or moved
// out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+def int_pseudoprobe : DefaultAttrsIntrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrInaccessibleMemOnly, IntrWillReturn]>;
// Arithmetic fence intrinsic.
@@ -1497,12 +1499,96 @@ let IntrProperties =
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty]>;
}
+// Shuffles.
+def int_vp_select : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
+ [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty]>;
+
+// Reductions
+let IntrProperties = [IntrSpeculatable, IntrNoMem, IntrNoSync, IntrWillReturn] in {
+ def int_vp_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_fmul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_add : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_mul : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_and : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_or : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_xor : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_smax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_smin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_umax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_umin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_fmax : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+ def int_vp_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [LLVMVectorElementType<0>,
+ llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty]>;
+}
def int_get_active_lane_mask:
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyint_ty, LLVMMatchType<1>],
[IntrNoMem, IntrNoSync, IntrWillReturn]>;
+def int_experimental_vp_splice:
+ DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_i32_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
//===-------------------------- Masked Intrinsics -------------------------===//
//
def int_masked_load:
@@ -1558,12 +1644,15 @@ def int_icall_branch_funnel : DefaultAttrsIntrinsic<[], [llvm_vararg_ty], []>;
def int_load_relative: DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
[IntrReadMem, IntrArgMemOnly]>;
+def int_asan_check_memaccess :
+ Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty], [ImmArg<ArgIndex<1>>]>;
+
def int_hwasan_check_memaccess :
Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
+ [ImmArg<ArgIndex<2>>]>;
def int_hwasan_check_memaccess_shortgranules :
Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_i32_ty],
- [IntrInaccessibleMemOnly, ImmArg<ArgIndex<2>>]>;
+ [ImmArg<ArgIndex<2>>]>;
// Xray intrinsics
//===----------------------------------------------------------------------===//
@@ -1658,7 +1747,7 @@ def int_matrix_multiply
def int_matrix_column_major_load
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty,
+ [LLVMPointerToElt<0>, llvm_anyint_ty, llvm_i1_ty,
llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem,
NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
@@ -1667,7 +1756,7 @@ def int_matrix_column_major_load
def int_matrix_column_major_store
: DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMPointerToElt<0>,
- llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
+ llvm_anyint_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem,
WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>]>;
@@ -1761,6 +1850,61 @@ def int_experimental_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+//===----------------- Pointer Authentication Intrinsics ------------------===//
+//
+
+// Sign an unauthenticated pointer using the specified key and discriminator,
+// passed in that order.
+// Returns the first argument, with some known bits replaced with a signature.
+def int_ptrauth_sign : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+// Authenticate a signed pointer, using the specified key and discriminator.
+// Returns the first argument, with the signature bits removed.
+// The signature must be valid.
+def int_ptrauth_auth : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem,ImmArg<ArgIndex<1>>]>;
+
+// Authenticate a signed pointer and resign it.
+// The second (key) and third (discriminator) arguments specify the signing
+// schema used for authenticating.
+// The fourth and fifth arguments specify the schema used for signing.
+// The signature must be valid.
+// This is a combined form of @llvm.ptrauth.sign and @llvm.ptrauth.auth, with
+// an additional integrity guarantee on the intermediate value.
+def int_ptrauth_resign : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty,
+ llvm_i32_ty, llvm_i64_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<3>>]>;
+
+// Strip the embedded signature out of a signed pointer.
+// The second argument specifies the key.
+// This behaves like @llvm.ptrauth.auth, but doesn't require the signature to
+// be valid.
+def int_ptrauth_strip : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+// Blend a small integer discriminator with an address discriminator, producing
+// a new discriminator value.
+def int_ptrauth_blend : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+
+// Compute the signature of a value, using a given discriminator.
+// This differs from @llvm.ptrauth.sign in that it doesn't embed the computed
+// signature in the pointer, but instead returns the signature as a value.
+// That allows it to be used to sign non-pointer data: in that sense, it is
+// generic. There is no generic @llvm.ptrauth.auth: instead, the signature
+// can be computed using @llvm.ptrauth.sign_generic, and compared with icmp.
+def int_ptrauth_sign_generic : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 87e0f83f85b7..c586af45f34d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -962,6 +962,25 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
LLVMPointerToElt<0>],
[IntrReadMem, IntrArgMemOnly]>;
+ class AdvSIMD_2Vec_PredLoad_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ class AdvSIMD_3Vec_PredLoad_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ class AdvSIMD_4Vec_PredLoad_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMPointerToElt<0>],
+ [IntrReadMem, IntrArgMemOnly]>;
+
class AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1365,7 +1384,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
// This class of intrinsics are not intended to be useful within LLVM IR but
// are instead here to support some of the more regid parts of the ACLE.
- class Builtin_SVCVT<string name, LLVMType OUT, LLVMType PRED, LLVMType IN>
+ class Builtin_SVCVT<LLVMType OUT, LLVMType PRED, LLVMType IN>
: DefaultAttrsIntrinsic<[OUT], [OUT, PRED, IN], [IntrNoMem]>;
}
@@ -1535,6 +1554,10 @@ def int_aarch64_sve_ld2 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
def int_aarch64_sve_ld3 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
def int_aarch64_sve_ld4 : AdvSIMD_ManyVec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld2_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld3_sret : AdvSIMD_3Vec_PredLoad_Intrinsic;
+def int_aarch64_sve_ld4_sret : AdvSIMD_4Vec_PredLoad_Intrinsic;
+
def int_aarch64_sve_ldnt1 : AdvSIMD_1Vec_PredLoad_Intrinsic;
def int_aarch64_sve_ldnf1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
def int_aarch64_sve_ldff1 : AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic;
@@ -1957,44 +1980,44 @@ def int_aarch64_sve_fcmpgt : AdvSIMD_SVE_Compare_Intrinsic;
def int_aarch64_sve_fcmpne : AdvSIMD_SVE_Compare_Intrinsic;
def int_aarch64_sve_fcmpuo : AdvSIMD_SVE_Compare_Intrinsic;
-def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<"svcvt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<"svcvtnt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<"svcvt_u64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtzu_i64f32 : Builtin_SVCVT<llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<"svcvt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<"svcvt_f16_f64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<"svcvt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f16f64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<"svcvt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<"svcvt_f64_f16_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<"svcvt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f16 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<"svcvtlt_f32_f16_m", llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
-def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<"svcvtlt_f64_f32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<"svcvtnt_f16_f32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
-def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<"svcvtnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtlt_f32f16 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
+def int_aarch64_sve_fcvtlt_f64f32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f16f32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4f32_ty>;
+def int_aarch64_sve_fcvtnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<"svcvtx_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<"svcvtxnt_f32_f64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtx_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
+def int_aarch64_sve_fcvtxnt_f32f64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
-def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_s32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_s64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_s64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_s32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_scvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_scvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<"svcvt_f16_u32_m", llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
-def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<"svcvt_f16_u64_m", llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<"svcvt_f32_u64_m", llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
-def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<"svcvt_f64_u32_m", llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i32 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv4i1_ty, llvm_nxv4i32_ty>;
+def int_aarch64_sve_ucvtf_f16i64 : Builtin_SVCVT<llvm_nxv8f16_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f32i64 : Builtin_SVCVT<llvm_nxv4f32_ty, llvm_nxv2i1_ty, llvm_nxv2i64_ty>;
+def int_aarch64_sve_ucvtf_f64i32 : Builtin_SVCVT<llvm_nxv2f64_ty, llvm_nxv2i1_ty, llvm_nxv4i32_ty>;
//
// Predicate creation
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 46a7aeb39c9a..0a44670de76e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -684,7 +684,14 @@ class AMDGPUDimAtomicProfile<string opmod,
let IsAtomic = true;
}
-class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim> : AMDGPUDimProfile<"GET_RESINFO", dim> {
+class AMDGPUDimAtomicFloatProfile<string opmod, AMDGPUDimProps dim,
+ list<AMDGPUArg> dataargs>
+ : AMDGPUDimAtomicProfile<opmod, dim, dataargs> {
+ let RetTypes = [llvm_anyfloat_ty];
+}
+
+class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
+ : AMDGPUDimProfile<"GET_RESINFO", dim> {
let RetTypes = [llvm_anyfloat_ty];
let DataArgs = [];
let AddrArgs = [AMDGPUArg<llvm_anyint_ty, "mip">];
@@ -860,17 +867,24 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
// atomic intrinsics
//////////////////////////////////////////////////////////////////////////
defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
- multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs> {
- foreach dim = AMDGPUDims.All in {
- def !strconcat(NAME, "_", dim.Name)
- : AMDGPUImageDimIntrinsic<
- AMDGPUDimAtomicProfile<opmod, dim, dataargs>,
- [], [SDNPMemOperand]>;
- }
+ multiclass AMDGPUImageDimAtomicX<string opmod, list<AMDGPUArg> dataargs,
+ int isFloat = 0> {
+ foreach dim = AMDGPUDims.All in {
+ def !strconcat(NAME, "_", dim.Name): AMDGPUImageDimIntrinsic<
+ !if (isFloat, AMDGPUDimAtomicFloatProfile<opmod, dim, dataargs>,
+ AMDGPUDimAtomicProfile<opmod, dim, dataargs>),
+ [], [SDNPMemOperand]>;
+ }
+ }
+
+ multiclass AMDGPUImageDimAtomic<string opmod, int isFloat = 0> {
+ defm ""
+ : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">],
+ isFloat>;
}
- multiclass AMDGPUImageDimAtomic<string opmod> {
- defm "" : AMDGPUImageDimAtomicX<opmod, [AMDGPUArg<LLVMMatchType<0>, "vdata">]>;
+ multiclass AMDGPUImageDimFloatAtomic<string opmod> {
+ defm "" : AMDGPUImageDimAtomic<opmod, 1 /*isFloat*/>;
}
defm int_amdgcn_image_atomic_swap : AMDGPUImageDimAtomic<"ATOMIC_SWAP">;
@@ -878,8 +892,10 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
defm int_amdgcn_image_atomic_sub : AMDGPUImageDimAtomic<"ATOMIC_SUB">;
defm int_amdgcn_image_atomic_smin : AMDGPUImageDimAtomic<"ATOMIC_SMIN">;
defm int_amdgcn_image_atomic_umin : AMDGPUImageDimAtomic<"ATOMIC_UMIN">;
+ defm int_amdgcn_image_atomic_fmin : AMDGPUImageDimFloatAtomic<"ATOMIC_FMIN">;
defm int_amdgcn_image_atomic_smax : AMDGPUImageDimAtomic<"ATOMIC_SMAX">;
defm int_amdgcn_image_atomic_umax : AMDGPUImageDimAtomic<"ATOMIC_UMAX">;
+ defm int_amdgcn_image_atomic_fmax : AMDGPUImageDimFloatAtomic<"ATOMIC_FMAX">;
defm int_amdgcn_image_atomic_and : AMDGPUImageDimAtomic<"ATOMIC_AND">;
defm int_amdgcn_image_atomic_or : AMDGPUImageDimAtomic<"ATOMIC_OR">;
defm int_amdgcn_image_atomic_xor : AMDGPUImageDimAtomic<"ATOMIC_XOR">;
@@ -1015,8 +1031,10 @@ def int_amdgcn_raw_buffer_atomic_add : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_sub : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_smin : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_umin : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_atomic_smax : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_umax : AMDGPURawBufferAtomic;
+def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_atomic_and : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_or : AMDGPURawBufferAtomic;
def int_amdgcn_raw_buffer_atomic_xor : AMDGPURawBufferAtomic;
@@ -1036,10 +1054,6 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-// gfx90a intrinsics
-def int_amdgcn_raw_buffer_atomic_fmin : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-def int_amdgcn_raw_buffer_atomic_fmax : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
!if(NoRtn, [], [data_ty]),
[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
@@ -1521,6 +1535,16 @@ def int_amdgcn_mul_u24 : Intrinsic<[llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]
>;
+def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+>;
+
+def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, IntrWillReturn]
+>;
+
// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
//
// bar_val is the total number of waves that will wait on this
diff --git a/llvm/include/llvm/IR/IntrinsicsBPF.td b/llvm/include/llvm/IR/IntrinsicsBPF.td
index 4b4dd94b1599..a6bd6f841aab 100644
--- a/llvm/include/llvm/IR/IntrinsicsBPF.td
+++ b/llvm/include/llvm/IR/IntrinsicsBPF.td
@@ -34,4 +34,7 @@ let TargetPrefix = "bpf" in { // All intrinsics start with "llvm.bpf."
[IntrNoMem]>;
def int_bpf_passthrough : GCCBuiltin<"__builtin_bpf_passthrough">,
Intrinsic<[llvm_any_ty], [llvm_i32_ty, llvm_any_ty], [IntrNoMem]>;
+ def int_bpf_compare : GCCBuiltin<"__builtin_bpf_compare">,
+ Intrinsic<[llvm_i1_ty], [llvm_i32_ty, llvm_anyint_ty, llvm_anyint_ty],
+ [IntrNoMem]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index cc43d23bec1c..6f55d1ef730e 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -43,7 +43,7 @@ def llvm_shared_i64ptr_ty : LLVMQualPointerType<llvm_i64_ty, 3>; // (shared)i64*
// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
// Geom: m<M>n<N>k<K>. E.g. m8n32k16
-// Frag: [abcd]
+// Frag: [a|b|c|d] ([x1|x2|x4] for ldmatrix)
// PtxEltType: PTX type for the element.
class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
string geom = Geom;
@@ -190,6 +190,11 @@ class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
!eq(gft,"m16n8k256:b:b1") : !listsplat(llvm_i32_ty, 2),
!eq(gft,"m16n8k256:c:s32") : !listsplat(llvm_i32_ty, 4),
!eq(gft,"m16n8k256:d:s32") : !listsplat(llvm_i32_ty, 4),
+
+ // ldmatrix b16 -> s32 @ m8n8
+ !eq(gft,"m8n8:x1:b16") : !listsplat(llvm_i32_ty, 1),
+ !eq(gft,"m8n8:x2:b16") : !listsplat(llvm_i32_ty, 2),
+ !eq(gft,"m8n8:x4:b16") : !listsplat(llvm_i32_ty, 4),
);
}
@@ -256,6 +261,17 @@ class MMA_NAME<string ALayout, string BLayout, int Satfinite, string b1op,
!subst("llvm.", "int_", llvm));
}
+class LDMATRIX_NAME<WMMA_REGS Frag, int Trans> {
+ string intr = "llvm.nvvm.ldmatrix.sync.aligned"
+ # "." # Frag.geom
+ # "." # Frag.frag
+ # !if(Trans, ".trans", "")
+ # "." # Frag.ptx_elt_type
+ ;
+ string record = !subst(".", "_",
+ !subst("llvm.", "int_", intr));
+}
+
// Generates list of 4-tuples of WMMA_REGS representing a valid MMA op.
// Geom: list of supported geometries.
// TypeN: PTX type of the corresponding fragment's element.
@@ -286,9 +302,19 @@ class MMA_LDST_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
list<string> ops = !foreach(x, ret, x.gft);
}
-// Creates list of valid combinations of fragments. This is the master list that
+class LDMATRIX_OPS<list<string> Geom, list<string> Frags, list<string> Types> {
+ list<WMMA_REGS> ret =
+ !foldl([]<WMMA_REGS>, Geom, t1, geom, !listconcat(t1,
+ !foldl([]<WMMA_REGS>, Frags, t2, frag, !listconcat(t2,
+ !foldl([]<WMMA_REGS>, Types, t3, type, !listconcat(t3,
+ [WMMA_REGS<geom, frag, type>]))))));
+ // Debugging aid for readable representation of the list above.
+ list<string> ops = !foreach(x, ret, x.gft);
+}
+
+// Creates list of valid combinations of fragments. This is the main list that
// drives generation of corresponding intrinsics and instructions.
-class NVVM_MMA_OPS<int _ = 0> {
+class NVVM_MMA_OPS {
list<list<WMMA_REGS>> tf32_wmma_ops = MMA_OPS<
["m16n16k8"],
["tf32"], [], ["f32"], []>.ret;
@@ -370,11 +396,14 @@ class NVVM_MMA_OPS<int _ = 0> {
// Separate A/B/C fragments (loads) from D (stores).
list<WMMA_REGS> all_ld_ops = !filter(op, all_ldst_ops, !ne(op.frag, "d"));
list<WMMA_REGS> all_st_ops = !filter(op, all_ldst_ops, !eq(op.frag, "d"));
+
+ list<WMMA_REGS> ldmatrix_b16_ops = LDMATRIX_OPS<
+ ["m8n8"], ["x1", "x2", "x4"], ["b16"]>.ret;
+ list<WMMA_REGS> all_ldmatrix_ops = ldmatrix_b16_ops;
}
def NVVM_MMA_OPS : NVVM_MMA_OPS;
-
// Returns true if this combination of fragment and layout for WMMA load/store
// ops is supported; false otherwise.
// E.g.
@@ -489,6 +518,23 @@ class NVVM_MMA_SUPPORTED<list<WMMA_REGS> frags, string layout_a, string layout_b
);
}
+// Returns true if the fragment is valid for ldmatrix ops is supported;
+// false otherwise.
+// E.g.
+// if NVVM_LDMATRIX_SUPPORTED<...>.ret then
+// def : FOO<>; // The record will only be defined for supported ops.
+//
+class NVVM_LDMATRIX_SUPPORTED<WMMA_REGS frag> {
+ string g = frag.geom;
+ string t = frag.ptx_elt_type;
+
+ bit ret = !cond(
+ // Only currently support m8n8 and b16
+ !and(!eq(g, "m8n8"), !eq(t, "b16")): true,
+ true: false
+ );
+}
+
class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
string Suffix = !if(sync, "sync_", "")
# mode # "_"
@@ -511,7 +557,7 @@ class SHFL_INFO<bit sync, string mode, string type, bit return_pred> {
let TargetPrefix = "nvvm" in {
def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//
@@ -519,150 +565,150 @@ let TargetPrefix = "nvvm" in {
//
def int_nvvm_fmin_f : GCCBuiltin<"__nvvm_fmin_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmin_ftz_f : GCCBuiltin<"__nvvm_fmin_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmax_f : GCCBuiltin<"__nvvm_fmax_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
- , [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty]
+ , [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmax_ftz_f : GCCBuiltin<"__nvvm_fmax_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmin_d : GCCBuiltin<"__nvvm_fmin_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_fmax_d : GCCBuiltin<"__nvvm_fmax_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
//
// Multiplication
//
def int_nvvm_mulhi_i : GCCBuiltin<"__nvvm_mulhi_i">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mulhi_ui : GCCBuiltin<"__nvvm_mulhi_ui">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mulhi_ll : GCCBuiltin<"__nvvm_mulhi_ll">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mulhi_ull : GCCBuiltin<"__nvvm_mulhi_ull">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rn_ftz_f : GCCBuiltin<"__nvvm_mul_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rn_f : GCCBuiltin<"__nvvm_mul_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rz_ftz_f : GCCBuiltin<"__nvvm_mul_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rz_f : GCCBuiltin<"__nvvm_mul_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rm_ftz_f : GCCBuiltin<"__nvvm_mul_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rm_f : GCCBuiltin<"__nvvm_mul_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rp_ftz_f : GCCBuiltin<"__nvvm_mul_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rp_f : GCCBuiltin<"__nvvm_mul_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rn_d : GCCBuiltin<"__nvvm_mul_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rz_d : GCCBuiltin<"__nvvm_mul_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rm_d : GCCBuiltin<"__nvvm_mul_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul_rp_d : GCCBuiltin<"__nvvm_mul_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul24_i : GCCBuiltin<"__nvvm_mul24_i">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_mul24_ui : GCCBuiltin<"__nvvm_mul24_ui">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
//
// Div
//
def int_nvvm_div_approx_ftz_f : GCCBuiltin<"__nvvm_div_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_approx_f : GCCBuiltin<"__nvvm_div_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rn_ftz_f : GCCBuiltin<"__nvvm_div_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rn_f : GCCBuiltin<"__nvvm_div_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rz_ftz_f : GCCBuiltin<"__nvvm_div_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rz_f : GCCBuiltin<"__nvvm_div_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rm_ftz_f : GCCBuiltin<"__nvvm_div_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rm_f : GCCBuiltin<"__nvvm_div_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rp_ftz_f : GCCBuiltin<"__nvvm_div_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rp_f : GCCBuiltin<"__nvvm_div_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rn_d : GCCBuiltin<"__nvvm_div_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rz_d : GCCBuiltin<"__nvvm_div_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rm_d : GCCBuiltin<"__nvvm_div_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
def int_nvvm_div_rp_d : GCCBuiltin<"__nvvm_div_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
//
// Sad
//
def int_nvvm_sad_i : GCCBuiltin<"__nvvm_sad_i">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
def int_nvvm_sad_ui : GCCBuiltin<"__nvvm_sad_ui">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, Commutative]>;
//
@@ -670,493 +716,493 @@ let TargetPrefix = "nvvm" in {
//
def int_nvvm_floor_ftz_f : GCCBuiltin<"__nvvm_floor_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_floor_f : GCCBuiltin<"__nvvm_floor_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_floor_d : GCCBuiltin<"__nvvm_floor_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ceil_ftz_f : GCCBuiltin<"__nvvm_ceil_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ceil_f : GCCBuiltin<"__nvvm_ceil_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ceil_d : GCCBuiltin<"__nvvm_ceil_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Abs
//
def int_nvvm_fabs_ftz_f : GCCBuiltin<"__nvvm_fabs_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fabs_f : GCCBuiltin<"__nvvm_fabs_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fabs_d : GCCBuiltin<"__nvvm_fabs_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Round
//
def int_nvvm_round_ftz_f : GCCBuiltin<"__nvvm_round_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_round_f : GCCBuiltin<"__nvvm_round_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_round_d : GCCBuiltin<"__nvvm_round_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Trunc
//
def int_nvvm_trunc_ftz_f : GCCBuiltin<"__nvvm_trunc_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_trunc_f : GCCBuiltin<"__nvvm_trunc_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_trunc_d : GCCBuiltin<"__nvvm_trunc_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Saturate
//
def int_nvvm_saturate_ftz_f : GCCBuiltin<"__nvvm_saturate_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_saturate_f : GCCBuiltin<"__nvvm_saturate_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_saturate_d : GCCBuiltin<"__nvvm_saturate_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Exp2 Log2
//
def int_nvvm_ex2_approx_ftz_f : GCCBuiltin<"__nvvm_ex2_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_f : GCCBuiltin<"__nvvm_ex2_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_d : GCCBuiltin<"__nvvm_ex2_approx_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_ftz_f : GCCBuiltin<"__nvvm_lg2_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_f : GCCBuiltin<"__nvvm_lg2_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_d : GCCBuiltin<"__nvvm_lg2_approx_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sin Cos
//
def int_nvvm_sin_approx_ftz_f : GCCBuiltin<"__nvvm_sin_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sin_approx_f : GCCBuiltin<"__nvvm_sin_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_cos_approx_ftz_f : GCCBuiltin<"__nvvm_cos_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
//
// Fma
//
def int_nvvm_fma_rn_ftz_f : GCCBuiltin<"__nvvm_fma_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rn_f : GCCBuiltin<"__nvvm_fma_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rz_ftz_f : GCCBuiltin<"__nvvm_fma_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rz_f : GCCBuiltin<"__nvvm_fma_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rm_ftz_f : GCCBuiltin<"__nvvm_fma_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rm_f : GCCBuiltin<"__nvvm_fma_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rp_ftz_f : GCCBuiltin<"__nvvm_fma_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rp_f : GCCBuiltin<"__nvvm_fma_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rn_d : GCCBuiltin<"__nvvm_fma_rn_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rz_d : GCCBuiltin<"__nvvm_fma_rz_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rm_d : GCCBuiltin<"__nvvm_fma_rm_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_fma_rp_d : GCCBuiltin<"__nvvm_fma_rp_d">,
- Intrinsic<[llvm_double_ty],
+ DefaultAttrsIntrinsic<[llvm_double_ty],
[llvm_double_ty, llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ [IntrNoMem, IntrSpeculatable]>;
//
// Rcp
//
def int_nvvm_rcp_rn_ftz_f : GCCBuiltin<"__nvvm_rcp_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rn_f : GCCBuiltin<"__nvvm_rcp_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_ftz_f : GCCBuiltin<"__nvvm_rcp_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_f : GCCBuiltin<"__nvvm_rcp_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_ftz_f : GCCBuiltin<"__nvvm_rcp_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_f : GCCBuiltin<"__nvvm_rcp_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_ftz_f : GCCBuiltin<"__nvvm_rcp_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_f : GCCBuiltin<"__nvvm_rcp_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rcp_rn_d : GCCBuiltin<"__nvvm_rcp_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rz_d : GCCBuiltin<"__nvvm_rcp_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rm_d : GCCBuiltin<"__nvvm_rcp_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_rp_d : GCCBuiltin<"__nvvm_rcp_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_rcp_approx_ftz_d : GCCBuiltin<"__nvvm_rcp_approx_ftz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Sqrt
//
def int_nvvm_sqrt_f : GCCBuiltin<"__nvvm_sqrt_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_ftz_f : GCCBuiltin<"__nvvm_sqrt_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_f : GCCBuiltin<"__nvvm_sqrt_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_ftz_f : GCCBuiltin<"__nvvm_sqrt_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_f : GCCBuiltin<"__nvvm_sqrt_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_ftz_f : GCCBuiltin<"__nvvm_sqrt_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_f : GCCBuiltin<"__nvvm_sqrt_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_ftz_f : GCCBuiltin<"__nvvm_sqrt_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_f : GCCBuiltin<"__nvvm_sqrt_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_approx_ftz_f : GCCBuiltin<"__nvvm_sqrt_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_approx_f : GCCBuiltin<"__nvvm_sqrt_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rn_d : GCCBuiltin<"__nvvm_sqrt_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rz_d : GCCBuiltin<"__nvvm_sqrt_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rm_d : GCCBuiltin<"__nvvm_sqrt_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_nvvm_sqrt_rp_d : GCCBuiltin<"__nvvm_sqrt_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Rsqrt
//
def int_nvvm_rsqrt_approx_ftz_f : GCCBuiltin<"__nvvm_rsqrt_approx_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rsqrt_approx_f : GCCBuiltin<"__nvvm_rsqrt_approx_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_rsqrt_approx_d : GCCBuiltin<"__nvvm_rsqrt_approx_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
//
// Add
//
def int_nvvm_add_rn_ftz_f : GCCBuiltin<"__nvvm_add_rn_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rn_f : GCCBuiltin<"__nvvm_add_rn_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rz_ftz_f : GCCBuiltin<"__nvvm_add_rz_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rz_f : GCCBuiltin<"__nvvm_add_rz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rm_ftz_f : GCCBuiltin<"__nvvm_add_rm_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rm_f : GCCBuiltin<"__nvvm_add_rm_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rp_ftz_f : GCCBuiltin<"__nvvm_add_rp_ftz_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rp_f : GCCBuiltin<"__nvvm_add_rp_f">,
- Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rn_d : GCCBuiltin<"__nvvm_add_rn_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rz_d : GCCBuiltin<"__nvvm_add_rz_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rm_d : GCCBuiltin<"__nvvm_add_rm_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_add_rp_d : GCCBuiltin<"__nvvm_add_rp_d">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
//
// Convert
//
def int_nvvm_d2f_rn_ftz : GCCBuiltin<"__nvvm_d2f_rn_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rn : GCCBuiltin<"__nvvm_d2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rz_ftz : GCCBuiltin<"__nvvm_d2f_rz_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rz : GCCBuiltin<"__nvvm_d2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rm_ftz : GCCBuiltin<"__nvvm_d2f_rm_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rm : GCCBuiltin<"__nvvm_d2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rp_ftz : GCCBuiltin<"__nvvm_d2f_rp_ftz">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2f_rp : GCCBuiltin<"__nvvm_d2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rn : GCCBuiltin<"__nvvm_d2i_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rz : GCCBuiltin<"__nvvm_d2i_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rm : GCCBuiltin<"__nvvm_d2i_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_rp : GCCBuiltin<"__nvvm_d2i_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rn : GCCBuiltin<"__nvvm_d2ui_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rz : GCCBuiltin<"__nvvm_d2ui_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rm : GCCBuiltin<"__nvvm_d2ui_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ui_rp : GCCBuiltin<"__nvvm_d2ui_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rn : GCCBuiltin<"__nvvm_i2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rz : GCCBuiltin<"__nvvm_i2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rm : GCCBuiltin<"__nvvm_i2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2d_rp : GCCBuiltin<"__nvvm_i2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rn : GCCBuiltin<"__nvvm_ui2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rz : GCCBuiltin<"__nvvm_ui2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rm : GCCBuiltin<"__nvvm_ui2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2d_rp : GCCBuiltin<"__nvvm_ui2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rn_ftz : GCCBuiltin<"__nvvm_f2i_rn_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rn : GCCBuiltin<"__nvvm_f2i_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rz_ftz : GCCBuiltin<"__nvvm_f2i_rz_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rz : GCCBuiltin<"__nvvm_f2i_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rm_ftz : GCCBuiltin<"__nvvm_f2i_rm_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rm : GCCBuiltin<"__nvvm_f2i_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rp_ftz : GCCBuiltin<"__nvvm_f2i_rp_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2i_rp : GCCBuiltin<"__nvvm_f2i_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rn_ftz : GCCBuiltin<"__nvvm_f2ui_rn_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rn : GCCBuiltin<"__nvvm_f2ui_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rz_ftz : GCCBuiltin<"__nvvm_f2ui_rz_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rz : GCCBuiltin<"__nvvm_f2ui_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rm_ftz : GCCBuiltin<"__nvvm_f2ui_rm_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rm : GCCBuiltin<"__nvvm_f2ui_rm">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rp_ftz : GCCBuiltin<"__nvvm_f2ui_rp_ftz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ui_rp : GCCBuiltin<"__nvvm_f2ui_rp">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rn : GCCBuiltin<"__nvvm_i2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rz : GCCBuiltin<"__nvvm_i2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rm : GCCBuiltin<"__nvvm_i2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_i2f_rp : GCCBuiltin<"__nvvm_i2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rn : GCCBuiltin<"__nvvm_ui2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rz : GCCBuiltin<"__nvvm_ui2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rm : GCCBuiltin<"__nvvm_ui2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ui2f_rp : GCCBuiltin<"__nvvm_ui2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_lohi_i2d : GCCBuiltin<"__nvvm_lohi_i2d">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, Commutative]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, Commutative]>;
def int_nvvm_d2i_lo : GCCBuiltin<"__nvvm_d2i_lo">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2i_hi : GCCBuiltin<"__nvvm_d2i_hi">,
- Intrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rn_ftz : GCCBuiltin<"__nvvm_f2ll_rn_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rn : GCCBuiltin<"__nvvm_f2ll_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rz_ftz : GCCBuiltin<"__nvvm_f2ll_rz_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rz : GCCBuiltin<"__nvvm_f2ll_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rm_ftz : GCCBuiltin<"__nvvm_f2ll_rm_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rm : GCCBuiltin<"__nvvm_f2ll_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rp_ftz : GCCBuiltin<"__nvvm_f2ll_rp_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ll_rp : GCCBuiltin<"__nvvm_f2ll_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rn_ftz : GCCBuiltin<"__nvvm_f2ull_rn_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rn : GCCBuiltin<"__nvvm_f2ull_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rz_ftz : GCCBuiltin<"__nvvm_f2ull_rz_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rz : GCCBuiltin<"__nvvm_f2ull_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rm_ftz : GCCBuiltin<"__nvvm_f2ull_rm_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rm : GCCBuiltin<"__nvvm_f2ull_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rp_ftz : GCCBuiltin<"__nvvm_f2ull_rp_ftz">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2ull_rp : GCCBuiltin<"__nvvm_f2ull_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rn : GCCBuiltin<"__nvvm_d2ll_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rz : GCCBuiltin<"__nvvm_d2ll_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rm : GCCBuiltin<"__nvvm_d2ll_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ll_rp : GCCBuiltin<"__nvvm_d2ll_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rn : GCCBuiltin<"__nvvm_d2ull_rn">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rz : GCCBuiltin<"__nvvm_d2ull_rz">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rm : GCCBuiltin<"__nvvm_d2ull_rm">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_d2ull_rp : GCCBuiltin<"__nvvm_d2ull_rp">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rn : GCCBuiltin<"__nvvm_ll2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rz : GCCBuiltin<"__nvvm_ll2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rm : GCCBuiltin<"__nvvm_ll2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2f_rp : GCCBuiltin<"__nvvm_ll2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rn : GCCBuiltin<"__nvvm_ull2f_rn">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rz : GCCBuiltin<"__nvvm_ull2f_rz">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rm : GCCBuiltin<"__nvvm_ull2f_rm">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2f_rp : GCCBuiltin<"__nvvm_ull2f_rp">,
- Intrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rn : GCCBuiltin<"__nvvm_ll2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rz : GCCBuiltin<"__nvvm_ll2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rm : GCCBuiltin<"__nvvm_ll2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ll2d_rp : GCCBuiltin<"__nvvm_ll2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rn : GCCBuiltin<"__nvvm_ull2d_rn">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rz : GCCBuiltin<"__nvvm_ull2d_rz">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rm : GCCBuiltin<"__nvvm_ull2d_rm">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_ull2d_rp : GCCBuiltin<"__nvvm_ull2d_rp">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2h_rn_ftz : GCCBuiltin<"__nvvm_f2h_rn_ftz">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_f2h_rn : GCCBuiltin<"__nvvm_f2h_rn">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
//
// Bitcast
//
def int_nvvm_bitcast_f2i : GCCBuiltin<"__nvvm_bitcast_f2i">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_bitcast_i2f : GCCBuiltin<"__nvvm_bitcast_i2f">,
- Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_bitcast_ll2d : GCCBuiltin<"__nvvm_bitcast_ll2d">,
- Intrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i64_ty], [IntrNoMem, IntrSpeculatable]>;
def int_nvvm_bitcast_d2ll : GCCBuiltin<"__nvvm_bitcast_d2ll">,
- Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem, IntrSpeculatable]>;
// FNS
def int_nvvm_fns : GCCBuiltin<"__nvvm_fns">,
- Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem]>;
// Atomics not available as llvm intrinsics.
@@ -1385,37 +1431,37 @@ def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
// - This complements the llvm bitcast, which can be used to cast one type
// of pointer to another type of pointer, while the address space remains
// the same.
-def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_local_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.local.to.gen">;
-def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_shared_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.shared.to.gen">;
-def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_global_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.global.to.gen">;
-def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_constant_to_gen: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.constant.to.gen">;
-def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_global: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.global">;
-def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_shared: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.shared">;
-def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_local: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.local">;
-def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty],
- [llvm_anyptr_ty], [IntrNoMem],
+def int_nvvm_ptr_gen_to_constant: DefaultAttrsIntrinsic<[llvm_anyptr_ty],
+ [llvm_anyptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.constant">;
// Used in nvvm internally to help address space opt and ptx code generation
// This is for params that are passed to kernel functions by pointer by-val.
def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],
[llvm_anyptr_ty],
- [IntrNoMem],
+ [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.ptr.gen.to.param">;
// Move intrinsics, used in nvvm internally
@@ -1453,149 +1499,149 @@ def int_nvvm_reflect :
// isspacep.{const, global, local, shared}
def int_nvvm_isspacep_const
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.const">,
GCCBuiltin<"__nvvm_isspacep_const">;
def int_nvvm_isspacep_global
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.global">,
GCCBuiltin<"__nvvm_isspacep_global">;
def int_nvvm_isspacep_local
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.local">,
GCCBuiltin<"__nvvm_isspacep_local">;
def int_nvvm_isspacep_shared
- : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.isspacep.shared">,
GCCBuiltin<"__nvvm_isspacep_shared">;
// Environment register read
def int_nvvm_read_ptx_sreg_envreg0
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg0">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
def int_nvvm_read_ptx_sreg_envreg1
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg1">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
def int_nvvm_read_ptx_sreg_envreg2
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg2">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
def int_nvvm_read_ptx_sreg_envreg3
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg3">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
def int_nvvm_read_ptx_sreg_envreg4
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg4">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
def int_nvvm_read_ptx_sreg_envreg5
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg5">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
def int_nvvm_read_ptx_sreg_envreg6
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg6">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
def int_nvvm_read_ptx_sreg_envreg7
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg7">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
def int_nvvm_read_ptx_sreg_envreg8
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg8">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
def int_nvvm_read_ptx_sreg_envreg9
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg9">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
def int_nvvm_read_ptx_sreg_envreg10
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg10">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
def int_nvvm_read_ptx_sreg_envreg11
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg11">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
def int_nvvm_read_ptx_sreg_envreg12
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg12">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
def int_nvvm_read_ptx_sreg_envreg13
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg13">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
def int_nvvm_read_ptx_sreg_envreg14
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg14">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
def int_nvvm_read_ptx_sreg_envreg15
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg15">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
def int_nvvm_read_ptx_sreg_envreg16
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg16">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
def int_nvvm_read_ptx_sreg_envreg17
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg17">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
def int_nvvm_read_ptx_sreg_envreg18
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg18">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
def int_nvvm_read_ptx_sreg_envreg19
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg19">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
def int_nvvm_read_ptx_sreg_envreg20
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg20">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
def int_nvvm_read_ptx_sreg_envreg21
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg21">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
def int_nvvm_read_ptx_sreg_envreg22
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg22">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
def int_nvvm_read_ptx_sreg_envreg23
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg23">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
def int_nvvm_read_ptx_sreg_envreg24
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg24">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
def int_nvvm_read_ptx_sreg_envreg25
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg25">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
def int_nvvm_read_ptx_sreg_envreg26
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg26">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
def int_nvvm_read_ptx_sreg_envreg27
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg27">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
def int_nvvm_read_ptx_sreg_envreg28
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg28">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
def int_nvvm_read_ptx_sreg_envreg29
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg29">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
def int_nvvm_read_ptx_sreg_envreg30
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg30">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
def int_nvvm_read_ptx_sreg_envreg31
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
"llvm.nvvm.read.ptx.sreg.envreg31">,
GCCBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
@@ -4200,49 +4246,49 @@ def int_nvvm_sust_p_3d_v4i32_trap
def int_nvvm_rotate_b32
- : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem], "llvm.nvvm.rotate.b32">,
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b32">,
GCCBuiltin<"__nvvm_rotate_b32">;
def int_nvvm_rotate_b64
- :Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem], "llvm.nvvm.rotate.b64">,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.b64">,
GCCBuiltin<"__nvvm_rotate_b64">;
def int_nvvm_rotate_right_b64
- : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem], "llvm.nvvm.rotate.right.b64">,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.rotate.right.b64">,
GCCBuiltin<"__nvvm_rotate_right_b64">;
def int_nvvm_swap_lo_hi_b64
- : Intrinsic<[llvm_i64_ty], [llvm_i64_ty],
- [IntrNoMem], "llvm.nvvm.swap.lo.hi.b64">,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty],
+ [IntrNoMem, IntrSpeculatable], "llvm.nvvm.swap.lo.hi.b64">,
GCCBuiltin<"__nvvm_swap_lo_hi_b64">;
// Accessing special registers.
multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
// FIXME: Do we need the 128-bit integer type version?
-// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
+// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
// FIXME: Enable this once v4i32 support is enabled in back-end.
-// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
+// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
- def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
- def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
- def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
- def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
}
class PTXReadSRegIntrinsic_r32<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadSRegIntrinsic_r64<string name>
- : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>,
GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
// Intrinsics to read registers with non-constant values. E.g. the values that
@@ -4519,4 +4565,20 @@ foreach layout_a = ["row", "col"] in {
} // layout_b
} // layout_a
+// LDMATRIX
+class NVVM_LDMATRIX<WMMA_REGS Frag, int Transposed>
+ : Intrinsic<Frag.regs, [llvm_anyptr_ty],
+ [IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
+ NoCapture<ArgIndex<0>>],
+ LDMATRIX_NAME<Frag, Transposed>.intr>;
+
+foreach transposed = [0, 1] in {
+ foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in {
+ if NVVM_LDMATRIX_SUPPORTED<frag>.ret then {
+ def LDMATRIX_NAME<frag, transposed>.record
+ : NVVM_LDMATRIX<frag, transposed>;
+ }
+ }
+}
+
} // let TargetPrefix = "nvvm"
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 92d3bdea37ed..8290342c0d51 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -31,10 +31,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Get content from current FPSCR register
def int_ppc_readflm : GCCBuiltin<"__builtin_readflm">,
- Intrinsic<[llvm_double_ty], [], [IntrNoMem]>;
+ Intrinsic<[llvm_double_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
// Set FPSCR register, and return previous content
def int_ppc_setflm : GCCBuiltin<"__builtin_setflm">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty], []>;
+ Intrinsic<[llvm_double_ty], [llvm_double_ty],
+ [IntrHasSideEffects]>;
// Intrinsics for [double]word extended forms of divide instructions
def int_ppc_divwe : GCCBuiltin<"__builtin_divwe">,
@@ -50,6 +52,15 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
[IntrNoMem]>;
+ def int_ppc_unpack_longdouble : GCCBuiltin<"__builtin_unpack_longdouble">,
+ Intrinsic<[llvm_double_ty],
+ [llvm_ppcf128_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_ppc_pack_longdouble : GCCBuiltin<"__builtin_pack_longdouble">,
+ Intrinsic<[llvm_ppcf128_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+
// Generate a random number
def int_ppc_darn : GCCBuiltin<"__builtin_darn">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
@@ -1042,6 +1053,9 @@ let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_vbpermq : GCCBuiltin<"__builtin_altivec_vbpermq">,
Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
[IntrNoMem]>;
+ def int_ppc_altivec_vbpermd : GCCBuiltin<"__builtin_altivec_vbpermd">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
}
def int_ppc_altivec_vexptefp : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
@@ -1626,8 +1640,7 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
// load
def int_ppc_load2r
- : GCCBuiltin<"__builtin_ppc_load2r">,
- Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+ : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_ppc_load4r
: GCCBuiltin<"__builtin_ppc_load4r">,
Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
@@ -1706,7 +1719,10 @@ let TargetPrefix = "ppc" in {
def int_ppc_fres
: GCCBuiltin<"__builtin_ppc_fres">,
Intrinsic <[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-
+ def int_ppc_addex
+ : GCCBuiltin<"__builtin_ppc_addex">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<2>>]>;
def int_ppc_fsel : GCCBuiltin<"__builtin_ppc_fsel">,
Intrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty,
llvm_double_ty], [IntrNoMem]>;
@@ -1717,6 +1733,33 @@ let TargetPrefix = "ppc" in {
Intrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
def int_ppc_frsqrtes : GCCBuiltin<"__builtin_ppc_frsqrtes">,
Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
+ def int_ppc_compare_exp_uo : GCCBuiltin<"__builtin_ppc_compare_exp_uo">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_compare_exp_lt : GCCBuiltin<"__builtin_ppc_compare_exp_lt">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_compare_exp_gt : GCCBuiltin<"__builtin_ppc_compare_exp_gt">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_compare_exp_eq : GCCBuiltin<"__builtin_ppc_compare_exp_eq">,
+ Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_double_ty],
+ [IntrNoMem]>;
+ def int_ppc_test_data_class_d : Intrinsic<[llvm_i32_ty],
+ [llvm_double_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_test_data_class_f : Intrinsic<[llvm_i32_ty],
+ [llvm_float_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ def int_ppc_convert_f128_to_ppcf128
+ : Intrinsic<[llvm_ppcf128_ty], [llvm_f128_ty], [IntrNoMem]>;
+ def int_ppc_convert_ppcf128_to_f128
+ : Intrinsic<[llvm_f128_ty], [llvm_ppcf128_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -1738,4 +1781,11 @@ let TargetPrefix = "ppc" in {
llvm_i64_ty, llvm_i64_ty,
llvm_i64_ty, llvm_i64_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ def int_ppc_atomic_load_i128 :
+ Intrinsic<[llvm_i64_ty, llvm_i64_ty],
+ [llvm_ptr_ty],
+ [IntrArgMemOnly, IntrReadMem, NoCapture<ArgIndex<0>>]>;
+ def int_ppc_atomic_store_i128 :
+ Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_ptr_ty],
+ [IntrArgMemOnly, IntrWriteMem, NoCapture<ArgIndex<2>>]>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index a46709bf09d1..3ceb347e97bf 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -159,16 +159,17 @@ let TargetPrefix = "riscv" in {
[NoCapture<ArgIndex<0>>]>,
RISCVVIntrinsic;
// For unit stride load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
class RISCVUSLoadMask
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty],
- [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride fault-only-first load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
@@ -177,8 +178,8 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<1>],
- [NoCapture<ArgIndex<1>>]>, RISCVVIntrinsic;
+ LLVMMatchType<1>, LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic;
// For strided load
// Input: (pointer, stride, vl)
class RISCVSLoad
@@ -187,13 +188,15 @@ let TargetPrefix = "riscv" in {
llvm_anyint_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For strided load with mask
- // Input: (maskedoff, pointer, stride, mask, vl)
+ // Input: (maskedoff, pointer, stride, mask, vl, ta)
class RISCVSLoadMask
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
- [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
+ LLVMMatchType<1>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For indexed load
// Input: (pointer, index, vl)
class RISCVILoad
@@ -202,13 +205,15 @@ let TargetPrefix = "riscv" in {
llvm_anyvector_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For indexed load with mask
- // Input: (maskedoff, pointer, index, mask, vl)
+ // Input: (maskedoff, pointer, index, mask, vl, ta)
class RISCVILoadMask
: Intrinsic<[llvm_anyvector_ty ],
[LLVMMatchType<0>,
LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride store
// Input: (vector_in, pointer, vl)
class RISCVUSStore
@@ -265,10 +270,16 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first source vector (with mask).
- // Input: (vector_in, mask, vl)
+ // Input: (vector_in, mask, vl, ta)
class RISCVUnaryAAMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
+ class RISCVUnaryAAMaskNoTA
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
@@ -284,12 +295,13 @@ let TargetPrefix = "riscv" in {
[LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
- // Input: (vector_in, vector_in, int_vector_in, vl)
+ // Input: (vector_in, vector_in, int_vector_in, vl, ta)
class RISCVRGatherVVMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMVectorOfBitcastsToInt<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// Input: (vector_in, int16_vector_in, vl)
class RISCVRGatherEI16VVNoMask
: Intrinsic<[llvm_anyvector_ty],
@@ -297,13 +309,14 @@ let TargetPrefix = "riscv" in {
llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
- // Input: (vector_in, vector_in, int16_vector_in, vl)
+ // Input: (vector_in, vector_in, int16_vector_in, vl, ta)
class RISCVRGatherEI16VVMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i16_ty>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first source vector, and the
// second operand is XLen.
// Input: (vector_in, xlen_in, vl)
@@ -314,12 +327,13 @@ let TargetPrefix = "riscv" in {
}
// For destination vector type is the same as first source vector (with mask).
// Second operand is XLen.
- // Input: (maskedoff, vector_in, xlen_in, mask, vl)
+ // Input: (maskedoff, vector_in, xlen_in, mask, vl, ta)
class RISCVGatherVXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
+ LLVMMatchType<1>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
}
// For destination vector type is the same as first source vector.
// Input: (vector_in, vector_in/scalar_in, vl)
@@ -330,12 +344,13 @@ let TargetPrefix = "riscv" in {
let SplatOperand = 2;
}
// For destination vector type is the same as first source vector (with mask).
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryAAXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
let SplatOperand = 3;
}
// For destination vector type is the same as first source vector. The
@@ -347,12 +362,13 @@ let TargetPrefix = "riscv" in {
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first source vector (with mask).
// The second source operand must match the destination type or be an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryAAShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is NOT the same as first source vector.
// Input: (vector_in, vector_in/scalar_in, vl)
class RISCVBinaryABXNoMask
@@ -362,12 +378,13 @@ let TargetPrefix = "riscv" in {
let SplatOperand = 2;
}
// For destination vector type is NOT the same as first source vector (with mask).
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryABXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
let SplatOperand = 3;
}
// For destination vector type is NOT the same as first source vector. The
@@ -379,12 +396,13 @@ let TargetPrefix = "riscv" in {
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is NOT the same as first source vector (with mask).
// The second source operand must match the destination type or be an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVBinaryABShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic;
// For binary operations with V0 as input.
// Input: (vector_in, vector_in/scalar_in, V0, vl)
class RISCVBinaryWithV0
@@ -461,12 +479,13 @@ let TargetPrefix = "riscv" in {
}
// For Saturating binary operations with mask.
// The destination vector type is the same as first source vector.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVSaturatingBinaryAAXMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
let SplatOperand = 3;
}
// For Saturating binary operations.
@@ -480,12 +499,13 @@ let TargetPrefix = "riscv" in {
// For Saturating binary operations with mask.
// The destination vector type is the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVSaturatingBinaryAAShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
// For Saturating binary operations.
// The destination vector type is NOT the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
@@ -497,12 +517,13 @@ let TargetPrefix = "riscv" in {
// For Saturating binary operations with mask.
// The destination vector type is NOT the same as first source vector (with mask).
// The second source operand matches the destination type or is an XLen scalar.
- // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, ta)
class RISCVSaturatingBinaryABShiftMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>],
+ [ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic;
class RISCVTernaryAAAXNoMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
@@ -579,13 +600,13 @@ let TargetPrefix = "riscv" in {
[llvm_anyvector_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is NOT the same as source vector (with mask).
- // Input: (maskedoff, vector_in, mask, vl)
+ // Input: (maskedoff, vector_in, mask, vl, ta)
class RISCVUnaryABMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>,
- llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
// For unary operations with the same vector type in/out without mask
// Output: (vector)
// Input: (vector_in, vl)
@@ -614,12 +635,13 @@ let TargetPrefix = "riscv" in {
[llvm_anyvector_ty, llvm_anyint_ty],
[IntrNoMem]>, RISCVVIntrinsic;
// For Conversion unary operations with mask.
- // Input: (maskedoff, vector_in, mask, vl)
+ // Input: (maskedoff, vector_in, mask, vl, ta)
class RISCVConversionMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
- [IntrNoMem]>, RISCVVIntrinsic;
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic;
// For atomic operations without mask
// Input: (base, index, value, vl)
class RISCVAMONoMask
@@ -643,15 +665,16 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For unit stride segment load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
class RISCVUSSegLoadMask<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
[LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty]),
- [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<1>]),
+ [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride fault-only-first segment load
// Input: (pointer, vl)
@@ -664,7 +687,7 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, LLVMMatchType<1>],
[NoCapture<ArgIndex<0>>]>, RISCVVIntrinsic;
// For unit stride fault-only-first segment load with mask
- // Input: (maskedoff, pointer, mask, vl)
+ // Input: (maskedoff, pointer, mask, vl, ta)
// Output: (data, vl)
// NOTE: We model this with default memory properties since we model writing
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
@@ -674,8 +697,9 @@ let TargetPrefix = "riscv" in {
!listconcat(!listsplat(LLVMMatchType<0>, nf),
[LLVMPointerToElt<0>,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<1>]),
- [NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic;
+ LLVMMatchType<1>, LLVMMatchType<1>]),
+ [ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>]>,
+ RISCVVIntrinsic;
// For stride segment load
// Input: (pointer, offset, vl)
@@ -685,7 +709,7 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For stride segment load with mask
- // Input: (maskedoff, pointer, offset, mask, vl)
+ // Input: (maskedoff, pointer, offset, mask, vl, ta)
class RISCVSSegLoadMask<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
@@ -693,8 +717,9 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>,
llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<1>]),
- [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+ LLVMMatchType<1>, LLVMMatchType<1>]),
+ [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For indexed segment load
// Input: (pointer, index, vl)
@@ -704,7 +729,7 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic;
// For indexed segment load with mask
- // Input: (maskedoff, pointer, index, mask, vl)
+ // Input: (maskedoff, pointer, index, mask, vl, ta)
class RISCVISegLoadMask<int nf>
: Intrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
@@ -712,8 +737,9 @@ let TargetPrefix = "riscv" in {
[LLVMPointerToElt<0>,
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_anyint_ty]),
- [NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic;
+ llvm_anyint_ty, LLVMMatchType<2>]),
+ [ImmArg<ArgIndex<!add(nf, 4)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
+ RISCVVIntrinsic;
// For unit stride segment store
// Input: (value, pointer, vl)
@@ -947,8 +973,8 @@ let TargetPrefix = "riscv" in {
defm vsoxei : RISCVIStore;
defm vsuxei : RISCVIStore;
- def int_riscv_vle1 : RISCVUSLoad;
- def int_riscv_vse1 : RISCVUSStore;
+ def int_riscv_vlm : RISCVUSLoad;
+ def int_riscv_vsm : RISCVUSStore;
defm vamoswap : RISCVAMO;
defm vamoadd : RISCVAMO;
@@ -1049,7 +1075,7 @@ let TargetPrefix = "riscv" in {
defm vssubu : RISCVSaturatingBinaryAAX;
defm vssub : RISCVSaturatingBinaryAAX;
- def int_riscv_vmerge : RISCVBinaryWithV0;
+ defm vmerge : RISCVBinaryWithV0;
def int_riscv_vmv_v_v : Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, llvm_anyint_ty],
@@ -1124,7 +1150,7 @@ let TargetPrefix = "riscv" in {
defm vrgather_vx : RISCVRGatherVX;
defm vrgatherei16_vv : RISCVRGatherEI16VV;
- def "int_riscv_vcompress" : RISCVUnaryAAMask;
+ def "int_riscv_vcompress" : RISCVUnaryAAMaskNoTA;
defm vaaddu : RISCVSaturatingBinaryAAX;
defm vaadd : RISCVSaturatingBinaryAAX;
@@ -1159,25 +1185,25 @@ let TargetPrefix = "riscv" in {
defm vwredsum : RISCVReduction;
defm vfredosum : RISCVReduction;
- defm vfredsum : RISCVReduction;
+ defm vfredusum : RISCVReduction;
defm vfredmin : RISCVReduction;
defm vfredmax : RISCVReduction;
- defm vfwredsum : RISCVReduction;
+ defm vfwredusum : RISCVReduction;
defm vfwredosum : RISCVReduction;
def int_riscv_vmand: RISCVBinaryAAANoMask;
def int_riscv_vmnand: RISCVBinaryAAANoMask;
- def int_riscv_vmandnot: RISCVBinaryAAANoMask;
+ def int_riscv_vmandn: RISCVBinaryAAANoMask;
def int_riscv_vmxor: RISCVBinaryAAANoMask;
def int_riscv_vmor: RISCVBinaryAAANoMask;
def int_riscv_vmnor: RISCVBinaryAAANoMask;
- def int_riscv_vmornot: RISCVBinaryAAANoMask;
+ def int_riscv_vmorn: RISCVBinaryAAANoMask;
def int_riscv_vmxnor: RISCVBinaryAAANoMask;
def int_riscv_vmclr : RISCVNullaryIntrinsic;
def int_riscv_vmset : RISCVNullaryIntrinsic;
- defm vpopc : RISCVMaskUnarySOut;
+ defm vcpop : RISCVMaskUnarySOut;
defm vfirst : RISCVMaskUnarySOut;
defm vmsbf : RISCVMaskUnaryMOut;
defm vmsof : RISCVMaskUnaryMOut;
@@ -1245,4 +1271,15 @@ let TargetPrefix = "riscv" in {
defm vsuxseg # nf : RISCVISegStore<nf>;
}
+ // Strided loads/stores for fixed vectors.
+ def int_riscv_masked_strided_load
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyptr_ty,
+ llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [NoCapture<ArgIndex<1>>, IntrReadMem]>;
+ def int_riscv_masked_strided_store
+ : Intrinsic<[],
+ [llvm_anyvector_ty, llvm_anyptr_ty,
+ llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
+ [NoCapture<ArgIndex<1>>, IntrWriteMem]>;
} // TargetPrefix = "riscv"
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 81435e98bea0..a149b571072c 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -144,7 +144,7 @@ multiclass SystemZBinaryCCBHF {
def fs : SystemZBinaryCC<llvm_v4i32_ty>;
}
-multiclass SystemZCompareBHFG<string name> {
+multiclass SystemZCompareBHFG {
def bs : SystemZBinaryCC<llvm_v16i8_ty>;
def hs : SystemZBinaryCC<llvm_v8i16_ty>;
def fs : SystemZBinaryCC<llvm_v4i32_ty>;
@@ -341,9 +341,9 @@ let TargetPrefix = "s390" in {
def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>;
- defm int_s390_vceq : SystemZCompareBHFG<"vceq">;
- defm int_s390_vch : SystemZCompareBHFG<"vch">;
- defm int_s390_vchl : SystemZCompareBHFG<"vchl">;
+ defm int_s390_vceq : SystemZCompareBHFG;
+ defm int_s390_vch : SystemZCompareBHFG;
+ defm int_s390_vchl : SystemZCompareBHFG;
defm int_s390_vfae : SystemZTernaryIntBHF<"vfae">;
defm int_s390_vfae : SystemZTernaryIntCCBHF;
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index 11990554037d..6a8e6c797f85 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -50,7 +50,8 @@ def int_wasm_trunc_saturate_unsigned : Intrinsic<[llvm_anyint_ty],
//===----------------------------------------------------------------------===//
// throw / rethrow
-// The immediate argument is an index to a tag, which is 0 for C++.
+// The first immediate argument is an index to a tag, which is 0 for C++
+// exception. The second argument is the thrown exception pointer.
def int_wasm_throw : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty],
[Throws, IntrNoReturn, ImmArg<ArgIndex<0>>]>;
def int_wasm_rethrow : Intrinsic<[], [], [Throws, IntrNoReturn]>;
@@ -63,8 +64,9 @@ def int_wasm_get_ehselector : Intrinsic<[llvm_i32_ty], [llvm_token_ty],
[IntrHasSideEffects]>;
// wasm.catch returns the pointer to the exception object caught by wasm 'catch'
-// instruction. This returns a single pointer, which is sufficient for C++
-// support. The immediate argument is an index to for a tag, which is 0 for C++.
+// instruction. This returns a single pointer, which is the case for C++
+// exceptions. The immediate argument is an index to for a tag, which is 0 for
+// C++ exceptions.
def int_wasm_catch : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty],
[IntrHasSideEffects, ImmArg<ArgIndex<0>>]>;
@@ -162,6 +164,15 @@ def int_wasm_q15mulr_sat_signed :
[llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem, IntrSpeculatable]>;
+def int_wasm_pmin :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_pmax :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
def int_wasm_extadd_pairwise_signed :
Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>],
@@ -172,6 +183,59 @@ def int_wasm_extadd_pairwise_unsigned :
[IntrNoMem, IntrSpeculatable]>;
//===----------------------------------------------------------------------===//
+// Relaxed SIMD intrinsics (experimental)
+//===----------------------------------------------------------------------===//
+
+def int_wasm_fma :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_fms :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_laneselect :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_swizzle :
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_min :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+def int_wasm_relaxed_max :
+ Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_signed:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_unsigned:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4f32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_zero_signed:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+def int_wasm_relaxed_trunc_zero_unsigned:
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v2f64_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
+
+//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5848356b5b1a..8de737a1c7a5 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -792,7 +792,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_sse41_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i8_ty],
- [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
// Test instruction with bitwise comparison.
@@ -1779,7 +1779,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
llvm_v32i8_ty], [IntrNoMem]>;
def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
- llvm_i8_ty], [IntrNoMem, Commutative, ImmArg<ArgIndex<2>>]>;
+ llvm_i8_ty], [IntrNoMem, ImmArg<ArgIndex<2>>]>;
}
//===----------------------------------------------------------------------===//
@@ -5093,6 +5093,10 @@ let TargetPrefix = "x86" in {
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
+ def int_x86_cast_vector_to_tile:
+ Intrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+ def int_x86_cast_tile_to_vector:
+ Intrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -5108,3 +5112,757 @@ let TargetPrefix = "x86" in {
def int_x86_senduipi : GCCBuiltin<"__builtin_ia32_senduipi">,
Intrinsic<[], [llvm_i64_ty], []>;
}
+
+//===----------------------------------------------------------------------===//
+// avx512_fp16: vaddph
+let TargetPrefix = "x86" in {
+ def int_x86_avx512fp16_add_ph_512
+ : GCCBuiltin<"__builtin_ia32_addph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_sub_ph_512
+ : GCCBuiltin<"__builtin_ia32_subph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_mul_ph_512
+ : GCCBuiltin<"__builtin_ia32_mulph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_div_ph_512
+ : GCCBuiltin<"__builtin_ia32_divph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_max_ph_128
+ : GCCBuiltin<"__builtin_ia32_maxph128">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_max_ph_256
+ : GCCBuiltin<"__builtin_ia32_maxph256">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_max_ph_512
+ : GCCBuiltin<"__builtin_ia32_maxph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_min_ph_128
+ : GCCBuiltin<"__builtin_ia32_minph128">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_min_ph_256
+ : GCCBuiltin<"__builtin_ia32_minph256">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_min_ph_512
+ : GCCBuiltin<"__builtin_ia32_minph512">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+
+ def int_x86_avx512fp16_mask_cmp_ph_512
+ : Intrinsic<[ llvm_v32i1_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_v32i1_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_cmp_ph_256
+ : Intrinsic<[ llvm_v16i1_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i32_ty, llvm_v16i1_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_mask_cmp_ph_128
+ : Intrinsic<[ llvm_v8i1_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8i1_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+
+ def int_x86_avx512fp16_mask_add_sh_round
+ : GCCBuiltin<"__builtin_ia32_addsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_sub_sh_round
+ : GCCBuiltin<"__builtin_ia32_subsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_mul_sh_round
+ : GCCBuiltin<"__builtin_ia32_mulsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_div_sh_round
+ : GCCBuiltin<"__builtin_ia32_divsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_min_sh_round
+ : GCCBuiltin<"__builtin_ia32_minsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_max_sh_round
+ : GCCBuiltin<"__builtin_ia32_maxsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_cmp_sh
+ : GCCBuiltin<"__builtin_ia32_cmpsh_mask">,
+ Intrinsic<[ llvm_i8_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_vcomi_sh
+ : GCCBuiltin<"__builtin_ia32_vcomish">,
+ Intrinsic<[ llvm_i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2psx_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2psx_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f16_ty, llvm_v8f32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2psx_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2psx512_mask">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f16_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_128
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_256
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtps2phx_512
+ : GCCBuiltin<"__builtin_ia32_vcvtps2phx512_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f32_ty, llvm_v16f16_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4f64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtpd2ph_512
+ : GCCBuiltin<"__builtin_ia32_vcvtpd2ph512_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f64_ty, llvm_v8f16_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd128_mask">,
+ Intrinsic<[ llvm_v2f64_ty ],
+ [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd256_mask">,
+ Intrinsic<[ llvm_v4f64_ty ],
+ [ llvm_v8f16_ty, llvm_v4f64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2pd_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2pd512_mask">,
+ Intrinsic<[ llvm_v8f64_ty ],
+ [ llvm_v8f16_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsh2ss_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2ss_round_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v8f16_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtss2sh_round
+ : GCCBuiltin<"__builtin_ia32_vcvtss2sh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v4f32_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsd2sh_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsd2sh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v2f64_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vcvtsh2sd_round
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2sd_round_mask">,
+ Intrinsic<[ llvm_v2f64_ty ],
+ [ llvm_v2f64_ty, llvm_v8f16_ty, llvm_v2f64_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2w_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2w_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2w_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2w512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2w_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2w512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uw_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uw512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw128_mask">,
+ Intrinsic<[ llvm_v8i16_ty ],
+ [ llvm_v8f16_ty, llvm_v8i16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw256_mask">,
+ Intrinsic<[ llvm_v16i16_ty ],
+ [ llvm_v16f16_ty, llvm_v16i16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uw_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uw512_mask">,
+ Intrinsic<[ llvm_v32i16_ty ],
+ [ llvm_v32f16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtph2dq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2dq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2dq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2dq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2udq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2udq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtdq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtdq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtudq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtudq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i32_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2dq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2dq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq128_mask">,
+ Intrinsic<[ llvm_v4i32_ty ],
+ [ llvm_v8f16_ty, llvm_v4i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq256_mask">,
+ Intrinsic<[ llvm_v8i32_ty ],
+ [ llvm_v8f16_ty, llvm_v8i32_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2udq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2udq512_mask">,
+ Intrinsic<[ llvm_v16i32_ty ],
+ [ llvm_v16f16_ty, llvm_v16i32_ty, llvm_i16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vcvtqq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtqq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtqq2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtqq2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2qq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2qq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvtuqq2ph_128
+ : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v2i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtuqq2ph_256
+ : GCCBuiltin<"__builtin_ia32_vcvtuqq2ph256_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v4i64_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_128
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_256
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvtph2uqq_512
+ : GCCBuiltin<"__builtin_ia32_vcvtph2uqq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2qq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2qq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_128
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq128_mask">,
+ Intrinsic<[ llvm_v2i64_ty ],
+ [ llvm_v8f16_ty, llvm_v2i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_256
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq256_mask">,
+ Intrinsic<[ llvm_v4i64_ty ],
+ [ llvm_v8f16_ty, llvm_v4i64_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vcvttph2uqq_512
+ : GCCBuiltin<"__builtin_ia32_vcvttph2uqq512_mask">,
+ Intrinsic<[ llvm_v8i64_ty ],
+ [ llvm_v8f16_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_vcvtsh2si32
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2si32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2usi32
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2usi32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2si64
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2si64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtsh2usi64
+ : GCCBuiltin<"__builtin_ia32_vcvtsh2usi64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvtusi2sh
+ : GCCBuiltin<"__builtin_ia32_vcvtusi2sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtusi642sh
+ : GCCBuiltin<"__builtin_ia32_vcvtusi642sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtsi2sh
+ : GCCBuiltin<"__builtin_ia32_vcvtsi2sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvtsi642sh
+ : GCCBuiltin<"__builtin_ia32_vcvtsi642sh">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i64_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>> ]>;
+ def int_x86_avx512fp16_vcvttsh2si32
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2si32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2si64
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2si64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2usi32
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2usi32">,
+ Intrinsic<[ llvm_i32_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_vcvttsh2usi64
+ : GCCBuiltin<"__builtin_ia32_vcvttsh2usi64">,
+ Intrinsic<[ llvm_i64_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+
+ def int_x86_avx512fp16_sqrt_ph_512
+ : Intrinsic<[ llvm_v32f16_ty ], [ llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_sqrt_sh
+ : Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_rsqrt_ph_128
+ : GCCBuiltin<"__builtin_ia32_rsqrtph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rsqrt_ph_256
+ : GCCBuiltin<"__builtin_ia32_rsqrtph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rsqrt_ph_512
+ : GCCBuiltin<"__builtin_ia32_rsqrtph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rsqrt_sh
+ : GCCBuiltin<"__builtin_ia32_rsqrtsh_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_ph_128
+ : GCCBuiltin<"__builtin_ia32_rcpph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_ph_256
+ : GCCBuiltin<"__builtin_ia32_rcpph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_ph_512
+ : GCCBuiltin<"__builtin_ia32_rcpph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_rcp_sh
+ : GCCBuiltin<"__builtin_ia32_rcpsh_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_reduce_ph_128
+ : GCCBuiltin<"__builtin_ia32_reduceph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_reduce_ph_256
+ : GCCBuiltin<"__builtin_ia32_reduceph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_reduce_ph_512
+ : GCCBuiltin<"__builtin_ia32_reduceph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_reduce_sh
+ : GCCBuiltin<"__builtin_ia32_reducesh_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>;
+ def int_x86_avx512fp16_fpclass_ph_128
+ : Intrinsic<[ llvm_v8i1_ty ], [ llvm_v8f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_fpclass_ph_256
+ : Intrinsic<[ llvm_v16i1_ty ], [ llvm_v16f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_fpclass_ph_512
+ : Intrinsic<[ llvm_v32i1_ty ], [ llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_fpclass_sh
+ : GCCBuiltin<"__builtin_ia32_fpclasssh_mask">,
+ Intrinsic<[ llvm_i8_ty ], [ llvm_v8f16_ty, llvm_i32_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_getexp_ph_128
+ : GCCBuiltin<"__builtin_ia32_getexpph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ], [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_getexp_ph_256
+ : GCCBuiltin<"__builtin_ia32_getexpph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_getexp_ph_512
+ : GCCBuiltin<"__builtin_ia32_getexpph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_mask_getexp_sh
+ : GCCBuiltin<"__builtin_ia32_getexpsh128_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_getmant_ph_128
+ : GCCBuiltin<"__builtin_ia32_getmantph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_getmant_ph_256
+ : GCCBuiltin<"__builtin_ia32_getmantph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_getmant_ph_512
+ : GCCBuiltin<"__builtin_ia32_getmantph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_getmant_sh
+ : GCCBuiltin<"__builtin_ia32_getmantsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty,
+ llvm_i8_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_ph_128
+ : GCCBuiltin<"__builtin_ia32_rndscaleph_128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_i32_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_ph_256
+ : GCCBuiltin<"__builtin_ia32_rndscaleph_256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_i32_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_ph_512
+ : GCCBuiltin<"__builtin_ia32_rndscaleph_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_i32_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_rndscale_sh
+ : GCCBuiltin<"__builtin_ia32_rndscalesh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>> ]>;
+ def int_x86_avx512fp16_mask_scalef_ph_128
+ : GCCBuiltin<"__builtin_ia32_scalefph128_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_scalef_ph_256
+ : GCCBuiltin<"__builtin_ia32_scalefph256_mask">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty, llvm_i16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_scalef_ph_512
+ : GCCBuiltin<"__builtin_ia32_scalefph512_mask">,
+ Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_scalef_sh
+ : GCCBuiltin<"__builtin_ia32_scalefsh_round_mask">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+
+ def int_x86_avx512fp16_vfmadd_ph_512
+ : Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_vfmaddsub_ph_128
+ : GCCBuiltin<"__builtin_ia32_vfmaddsubph">,
+ Intrinsic<[ llvm_v8f16_ty ],
+ [ llvm_v8f16_ty, llvm_v8f16_ty, llvm_v8f16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_vfmaddsub_ph_256
+ : GCCBuiltin<"__builtin_ia32_vfmaddsubph256">,
+ Intrinsic<[ llvm_v16f16_ty ],
+ [ llvm_v16f16_ty, llvm_v16f16_ty, llvm_v16f16_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_vfmaddsub_ph_512
+ : Intrinsic<[ llvm_v32f16_ty ],
+ [ llvm_v32f16_ty, llvm_v32f16_ty, llvm_v32f16_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+ def int_x86_avx512fp16_vfmadd_f16
+ : Intrinsic<[ llvm_half_ty ],
+ [ llvm_half_ty, llvm_half_ty, llvm_half_ty, llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<3>> ]>;
+
+ def int_x86_avx512fp16_mask_vfcmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph128_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph256_maskz">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_maskz">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph128_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph256_maskz">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask3">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph512_maskz">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfmaddcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfmaddcsh_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfcmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_maskz_vfcmadd_csh
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcsh_maskz">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmul_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfmulcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmul_cph_128
+ : GCCBuiltin<"__builtin_ia32_vfcmulcph128_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmul_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfmulcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfcmul_cph_256
+ : GCCBuiltin<"__builtin_ia32_vfcmulcph256_mask">,
+ Intrinsic<[ llvm_v8f32_ty ],
+ [ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
+ [ IntrNoMem ]>;
+ def int_x86_avx512fp16_mask_vfmul_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfmulcph512_mask">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfcmul_cph_512
+ : GCCBuiltin<"__builtin_ia32_vfcmulcph512_mask">,
+ Intrinsic<[ llvm_v16f32_ty ],
+ [ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfmul_csh
+ : GCCBuiltin<"__builtin_ia32_vfmulcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+ def int_x86_avx512fp16_mask_vfcmul_csh
+ : GCCBuiltin<"__builtin_ia32_vfcmulcsh_mask">,
+ Intrinsic<[ llvm_v4f32_ty ],
+ [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty,
+ llvm_i32_ty ],
+ [ IntrNoMem, ImmArg<ArgIndex<4>> ]>;
+}
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index bc605f108340..1c902ebce5ad 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -305,6 +305,10 @@ public:
/// LLVMContext is used by compilation.
void setOptPassGate(OptPassGate&);
+ /// Enable opaque pointers. Can only be called before creating the first
+ /// pointer type.
+ void enableOpaquePointers() const;
+
/// Whether typed pointers are supported. If false, all pointers are opaque.
bool supportsTypedPointers() const;
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index b14127df2182..6cc5797269e2 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -74,7 +74,7 @@ public:
Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows),
B.getInt32(Columns)};
- Type *OverloadedTypes[] = {RetType};
+ Type *OverloadedTypes[] = {RetType, Stride->getType()};
Function *TheFn = Intrinsic::getDeclaration(
getModule(), Intrinsic::matrix_column_major_load, OverloadedTypes);
@@ -82,7 +82,7 @@ public:
CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name);
Attribute AlignAttr =
Attribute::getWithAlignment(Call->getContext(), Alignment);
- Call->addAttribute(1, AlignAttr);
+ Call->addParamAttr(0, AlignAttr);
return Call;
}
@@ -97,7 +97,7 @@ public:
Value *Ops[] = {Matrix, Ptr,
Stride, B.getInt1(IsVolatile),
B.getInt32(Rows), B.getInt32(Columns)};
- Type *OverloadedTypes[] = {Matrix->getType()};
+ Type *OverloadedTypes[] = {Matrix->getType(), Stride->getType()};
Function *TheFn = Intrinsic::getDeclaration(
getModule(), Intrinsic::matrix_column_major_store, OverloadedTypes);
@@ -105,7 +105,7 @@ public:
CallInst *Call = B.CreateCall(TheFn->getFunctionType(), TheFn, Ops, Name);
Attribute AlignAttr =
Attribute::getWithAlignment(Call->getContext(), Alignment);
- Call->addAttribute(2, AlignAttr);
+ Call->addParamAttr(1, AlignAttr);
return Call;
}
@@ -231,9 +231,23 @@ public:
: (IsUnsigned ? B.CreateUDiv(LHS, RHS) : B.CreateSDiv(LHS, RHS));
}
- /// Extracts the element at (\p RowIdx, \p ColumnIdx) from \p Matrix.
- Value *CreateExtractElement(Value *Matrix, Value *RowIdx, Value *ColumnIdx,
- unsigned NumRows, Twine const &Name = "") {
+ /// Create an assumption that \p Idx is less than \p NumElements.
+ void CreateIndexAssumption(Value *Idx, unsigned NumElements,
+ Twine const &Name = "") {
+
+ Value *NumElts =
+ B.getIntN(Idx->getType()->getScalarSizeInBits(), NumElements);
+ auto *Cmp = B.CreateICmpULT(Idx, NumElts);
+ if (auto *ConstCond = dyn_cast<ConstantInt>(Cmp))
+ assert(ConstCond->isOne() && "Index must be valid!");
+ else
+ B.CreateAssumption(Cmp);
+ }
+
+ /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from
+ /// a matrix with \p NumRows embedded in a vector.
+ Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows,
+ Twine const &Name = "") {
unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
ColumnIdx->getType()->getScalarSizeInBits());
@@ -241,9 +255,7 @@ public:
RowIdx = B.CreateZExt(RowIdx, IntTy);
ColumnIdx = B.CreateZExt(ColumnIdx, IntTy);
Value *NumRowsV = B.getIntN(MaxWidth, NumRows);
- return B.CreateExtractElement(
- Matrix, B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx),
- "matext");
+ return B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx);
}
};
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index c5840564454e..26d70b4db2d5 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -707,6 +707,15 @@ struct AAMDNodes {
Result.NoAlias = NoAlias;
return Result;
}
+
+ /// Given two sets of AAMDNodes applying to potentially different locations,
+ /// determine the best AAMDNodes that apply to both.
+ AAMDNodes merge(const AAMDNodes &Other) const;
+
+ /// Determine the best AAMDNodes after concatenating two different locations
+ /// together. Different from `merge`, where different locations should
+ /// overlap each other, `concat` puts non-overlapping locations together.
+ AAMDNodes concat(const AAMDNodes &Other) const;
};
// Specialize DenseMapInfo for AAMDNodes.
@@ -897,6 +906,7 @@ struct TempMDNodeDeleter {
class MDNode : public Metadata {
friend class ReplaceableMetadataImpl;
friend class LLVMContextImpl;
+ friend class DIArgList;
unsigned NumOperands;
unsigned NumUnresolved;
@@ -1028,6 +1038,31 @@ public:
return cast<T>(N.release()->replaceWithDistinctImpl());
}
+ /// Print in tree shape.
+ ///
+ /// Prints definition of \c this in tree shape.
+ ///
+ /// If \c M is provided, metadata nodes will be numbered canonically;
+ /// otherwise, pointer addresses are substituted.
+ /// @{
+ void printTree(raw_ostream &OS, const Module *M = nullptr) const;
+ void printTree(raw_ostream &OS, ModuleSlotTracker &MST,
+ const Module *M = nullptr) const;
+ /// @}
+
+ /// User-friendly dump in tree shape.
+ ///
+ /// If \c M is provided, metadata nodes will be numbered canonically;
+ /// otherwise, pointer addresses are substituted.
+ ///
+ /// Note: this uses an explicit overload instead of default arguments so that
+ /// the nullptr version is easy to call from a debugger.
+ ///
+ /// @{
+ void dumpTree() const;
+ void dumpTree(const Module *M) const;
+ /// @}
+
private:
MDNode *replaceWithPermanentImpl();
MDNode *replaceWithUniquedImpl();
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index 81e29d9b86e8..bd3a196c7181 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -64,9 +64,9 @@ class VersionTuple;
/// constant references to global variables in the module. When a global
/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
/// The main container class for the LLVM Intermediate Representation.
-class Module {
-/// @name Types And Enumerations
-/// @{
+class LLVM_EXTERNAL_VISIBILITY Module {
+ /// @name Types And Enumerations
+ /// @{
public:
/// The type for the list of global variables.
using GlobalListType = SymbolTableList<GlobalVariable>;
@@ -324,6 +324,9 @@ public:
/// name is not found.
GlobalValue *getNamedValue(StringRef Name) const;
+ /// Return the number of global values in the module.
+ unsigned getNumNamedValues() const;
+
/// Return a unique non-zero ID for the specified metadata kind. This ID is
/// uniqued across modules in the current LLVMContext.
unsigned getMDKindID(StringRef Name) const;
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 4b84f6b0408d..e00b78d45c63 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -572,6 +572,50 @@ public:
unsigned NoInline : 1;
// Indicate if function should be always inlined.
unsigned AlwaysInline : 1;
+ // Indicate if function never raises an exception. Can be modified during
+ // thinlink function attribute propagation
+ unsigned NoUnwind : 1;
+ // Indicate if function contains instructions that mayThrow
+ unsigned MayThrow : 1;
+
+ // If there are calls to unknown targets (e.g. indirect)
+ unsigned HasUnknownCall : 1;
+
+ FFlags &operator&=(const FFlags &RHS) {
+ this->ReadNone &= RHS.ReadNone;
+ this->ReadOnly &= RHS.ReadOnly;
+ this->NoRecurse &= RHS.NoRecurse;
+ this->ReturnDoesNotAlias &= RHS.ReturnDoesNotAlias;
+ this->NoInline &= RHS.NoInline;
+ this->AlwaysInline &= RHS.AlwaysInline;
+ this->NoUnwind &= RHS.NoUnwind;
+ this->MayThrow &= RHS.MayThrow;
+ this->HasUnknownCall &= RHS.HasUnknownCall;
+ return *this;
+ }
+
+ bool anyFlagSet() {
+ return this->ReadNone | this->ReadOnly | this->NoRecurse |
+ this->ReturnDoesNotAlias | this->NoInline | this->AlwaysInline |
+ this->NoUnwind | this->MayThrow | this->HasUnknownCall;
+ }
+
+ operator std::string() {
+ std::string Output;
+ raw_string_ostream OS(Output);
+ OS << "funcFlags: (";
+ OS << "readNone: " << this->ReadNone;
+ OS << ", readOnly: " << this->ReadOnly;
+ OS << ", noRecurse: " << this->NoRecurse;
+ OS << ", returnDoesNotAlias: " << this->ReturnDoesNotAlias;
+ OS << ", noInline: " << this->NoInline;
+ OS << ", alwaysInline: " << this->AlwaysInline;
+ OS << ", noUnwind: " << this->NoUnwind;
+ OS << ", mayThrow: " << this->MayThrow;
+ OS << ", hasUnknownCall: " << this->HasUnknownCall;
+ OS << ")";
+ return OS.str();
+ }
};
/// Describes the uses of a parameter by the function.
@@ -688,6 +732,10 @@ public:
/// Get function summary flags.
FFlags fflags() const { return FunFlags; }
+ void setNoRecurse() { FunFlags.NoRecurse = true; }
+
+ void setNoUnwind() { FunFlags.NoUnwind = true; }
+
/// Get the instruction count recorded for this function.
unsigned instCount() const { return InstCount; }
@@ -700,6 +748,8 @@ public:
/// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
+ std::vector<EdgeTy> &mutableCalls() { return CallGraphEdgeList; }
+
void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); }
/// Returns the list of type identifiers used by this function in
diff --git a/llvm/include/llvm/IR/Operator.h b/llvm/include/llvm/IR/Operator.h
index d0bce742cc96..b83d83f0d0ab 100644
--- a/llvm/include/llvm/IR/Operator.h
+++ b/llvm/include/llvm/IR/Operator.h
@@ -59,6 +59,10 @@ public:
static bool classof(const Value *V) {
return isa<Instruction>(V) || isa<ConstantExpr>(V);
}
+
+ /// Return true if this operator has flags which may cause this operator
+ /// to evaluate to poison despite having non-poison inputs.
+ bool hasPoisonGeneratingFlags() const;
};
/// Utility class for integer operators which may exhibit overflow - Add, Sub,
@@ -243,6 +247,9 @@ public:
void operator|=(const FastMathFlags &OtherFlags) {
Flags |= OtherFlags.Flags;
}
+ bool operator!=(const FastMathFlags &OtherFlags) const {
+ return Flags != OtherFlags.Flags;
+ }
};
/// Utility class for floating point operations which can have
diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h
index 6c2a1b01d897..63fd98073b51 100644
--- a/llvm/include/llvm/IR/OptBisect.h
+++ b/llvm/include/llvm/IR/OptBisect.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ManagedStatic.h"
+#include <limits>
namespace llvm {
@@ -43,14 +44,12 @@ public:
/// optimization-related problems.
class OptBisect : public OptPassGate {
public:
- /// Default constructor, initializes the OptBisect state based on the
- /// -opt-bisect-limit command line argument.
- ///
- /// By default, bisection is disabled.
- ///
+ /// Default constructor. Initializes the state to "disabled". The bisection
+ /// will be enabled by the cl::opt call-back when the command line option
+ /// is processed.
/// Clients should not instantiate this class directly. All access should go
/// through LLVMContext.
- OptBisect();
+ OptBisect() = default;
virtual ~OptBisect() = default;
@@ -60,7 +59,14 @@ public:
bool shouldRunPass(const Pass *P, StringRef IRDescription) override;
/// isEnabled() should return true before calling shouldRunPass().
- bool isEnabled() const override { return BisectEnabled; }
+ bool isEnabled() const override { return BisectLimit != Disabled; }
+
+ /// Set the new optimization limit and reset the counter. Passing
+ /// OptBisect::Disabled disables the limiting.
+ void setLimit(int Limit) {
+ BisectLimit = Limit;
+ LastBisectNum = 0;
+ }
/// Checks the bisect limit to determine if the specified pass should run.
///
@@ -75,9 +81,11 @@ public:
/// instance, function passes should call FunctionPass::skipFunction().
bool checkPass(const StringRef PassName, const StringRef TargetDesc);
+ static const int Disabled = std::numeric_limits<int>::max();
+
private:
- bool BisectEnabled = false;
- unsigned LastBisectNum = 0;
+ int BisectLimit = Disabled;
+ int LastBisectNum = 0;
};
/// Singleton instance of the OptBisect class, so multiple pass managers don't
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index 8e592bfb0c78..e88d2233daba 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -377,10 +377,16 @@ template <typename DerivedT> struct PassInfoMixin {
static_assert(std::is_base_of<PassInfoMixin, DerivedT>::value,
"Must pass the derived type as the template argument!");
StringRef Name = getTypeName<DerivedT>();
- if (Name.startswith("llvm::"))
- Name = Name.drop_front(strlen("llvm::"));
+ Name.consume_front("llvm::");
return Name;
}
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ StringRef ClassName = DerivedT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << PassName;
+ }
};
/// A CRTP mix-in that provides informational APIs needed for analysis passes.
@@ -480,6 +486,16 @@ public:
return *this;
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
+ auto *P = Passes[Idx].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ if (Idx + 1 < Size)
+ OS << ",";
+ }
+ }
+
/// Run all of the passes in this manager over the given unit of IR.
/// ExtraArgs are passed to each pass.
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
@@ -520,12 +536,6 @@ public:
// Finally, intersect the preserved analyses to compute the aggregate
// preserved set for this pass manager.
PA.intersect(std::move(PassPA));
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- //IR.getContext().yield();
}
// Invalidation was handled after each pass in the above loop for the
@@ -538,13 +548,16 @@ public:
}
template <typename PassT>
- std::enable_if_t<!std::is_same<PassT, PassManager>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<!std::is_same<PassT, PassManager>::value>
+ addPass(PassT &&Pass) {
using PassModelT =
detail::PassModel<IRUnitT, PassT, PreservedAnalyses, AnalysisManagerT,
ExtraArgTs...>;
-
- Passes.emplace_back(new PassModelT(std::forward<PassT>(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ Passes.push_back(std::unique_ptr<PassConceptT>(
+ new PassModelT(std::forward<PassT>(Pass))));
}
/// When adding a pass manager pass that has the same type as this pass
@@ -553,10 +566,11 @@ public:
/// implementation complexity and avoid potential invalidation issues that may
/// happen with nested pass managers of the same type.
template <typename PassT>
- std::enable_if_t<std::is_same<PassT, PassManager>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<std::is_same<PassT, PassManager>::value>
+ addPass(PassT &&Pass) {
for (auto &P : Pass.Passes)
- Passes.emplace_back(std::move(P));
+ Passes.push_back(std::move(P));
}
/// Returns if the pass manager contains any passes.
@@ -1190,29 +1204,37 @@ class ModuleToFunctionPassAdaptor
public:
using PassConceptT = detail::PassConcept<Function, FunctionAnalysisManager>;
- explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass)
- : Pass(std::move(Pass)) {}
+ explicit ModuleToFunctionPassAdaptor(std::unique_ptr<PassConceptT> Pass,
+ bool EagerlyInvalidate)
+ : Pass(std::move(Pass)), EagerlyInvalidate(EagerlyInvalidate) {}
/// Runs the function pass across every function in the module.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
private:
std::unique_ptr<PassConceptT> Pass;
+ bool EagerlyInvalidate;
};
/// A function to deduce a function pass type and wrap it in the
/// templated adaptor.
template <typename FunctionPassT>
ModuleToFunctionPassAdaptor
-createModuleToFunctionPassAdaptor(FunctionPassT &&Pass) {
+createModuleToFunctionPassAdaptor(FunctionPassT &&Pass,
+ bool EagerlyInvalidate = false) {
using PassModelT =
detail::PassModel<Function, FunctionPassT, PreservedAnalyses,
FunctionAnalysisManager>;
-
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return ModuleToFunctionPassAdaptor(
- std::make_unique<PassModelT>(std::forward<FunctionPassT>(Pass)));
+ std::unique_ptr<ModuleToFunctionPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<FunctionPassT>(Pass))),
+ EagerlyInvalidate);
}
/// A utility pass template to force an analysis result to be available.
@@ -1243,6 +1265,12 @@ struct RequireAnalysisPass
return PreservedAnalyses::all();
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "require<" << PassName << ">";
+ }
static bool isRequired() { return true; }
};
@@ -1263,6 +1291,12 @@ struct InvalidateAnalysisPass
PA.abandon<AnalysisT>();
return PA;
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "invalidate<" << PassName << ">";
+ }
};
/// A utility pass that does nothing, but preserves no analyses.
@@ -1312,6 +1346,13 @@ public:
return PA;
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "repeat<" << Count << ">(";
+ P.printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+ }
+
private:
int Count;
PassT P;
diff --git a/llvm/include/llvm/IR/PassManagerInternal.h b/llvm/include/llvm/IR/PassManagerInternal.h
index 8f42e69f3063..29b55a8172e6 100644
--- a/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/llvm/include/llvm/IR/PassManagerInternal.h
@@ -46,6 +46,9 @@ struct PassConcept {
virtual PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
ExtraArgTs... ExtraArgs) = 0;
+ virtual void
+ printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) = 0;
/// Polymorphic method to access the name of a pass.
virtual StringRef name() const = 0;
@@ -85,6 +88,12 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
return Pass.run(IR, AM, ExtraArgs...);
}
+ void printPipeline(
+ raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) override {
+ Pass.printPipeline(OS, MapClassName2PassName);
+ }
+
StringRef name() const override { return PassT::name(); }
template <typename T>
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index cbd429f84ee4..b858733530e3 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -438,7 +438,7 @@ inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
}
struct is_all_ones {
- bool isValue(const APInt &C) { return C.isAllOnesValue(); }
+ bool isValue(const APInt &C) { return C.isAllOnes(); }
};
/// Match an integer or vector with all bits set.
/// For vectors, this includes constants with undefined elements.
@@ -506,7 +506,7 @@ inline cst_pred_ty<is_nonpositive> m_NonPositive() {
inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
struct is_one {
- bool isValue(const APInt &C) { return C.isOneValue(); }
+ bool isValue(const APInt &C) { return C.isOne(); }
};
/// Match an integer 1 or a vector with all elements equal to 1.
/// For vectors, this includes constants with undefined elements.
@@ -515,7 +515,7 @@ inline cst_pred_ty<is_one> m_One() {
}
struct is_zero_int {
- bool isValue(const APInt &C) { return C.isNullValue(); }
+ bool isValue(const APInt &C) { return C.isZero(); }
};
/// Match an integer 0 or a vector with all elements equal to 0.
/// For vectors, this includes constants with undefined elements.
@@ -549,7 +549,7 @@ inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
}
struct is_negated_power2 {
- bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
+ bool isValue(const APInt &C) { return C.isNegatedPowerOf2(); }
};
/// Match a integer or vector negated power-of-2.
/// For vectors, this includes constants with undefined elements.
@@ -593,32 +593,7 @@ inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
struct icmp_pred_with_threshold {
ICmpInst::Predicate Pred;
const APInt *Thr;
- bool isValue(const APInt &C) {
- switch (Pred) {
- case ICmpInst::Predicate::ICMP_EQ:
- return C.eq(*Thr);
- case ICmpInst::Predicate::ICMP_NE:
- return C.ne(*Thr);
- case ICmpInst::Predicate::ICMP_UGT:
- return C.ugt(*Thr);
- case ICmpInst::Predicate::ICMP_UGE:
- return C.uge(*Thr);
- case ICmpInst::Predicate::ICMP_ULT:
- return C.ult(*Thr);
- case ICmpInst::Predicate::ICMP_ULE:
- return C.ule(*Thr);
- case ICmpInst::Predicate::ICMP_SGT:
- return C.sgt(*Thr);
- case ICmpInst::Predicate::ICMP_SGE:
- return C.sge(*Thr);
- case ICmpInst::Predicate::ICMP_SLT:
- return C.slt(*Thr);
- case ICmpInst::Predicate::ICMP_SLE:
- return C.sle(*Thr);
- default:
- llvm_unreachable("Unhandled ICmp predicate");
- }
- }
+ bool isValue(const APInt &C) { return ICmpInst::compare(C, *Thr, Pred); }
};
/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
/// to Threshold. For vectors, this includes constants with undefined elements.
@@ -988,20 +963,22 @@ struct BinaryOp_match {
// The LHS is always matched first.
BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
- template <typename OpTy> bool match(OpTy *V) {
- if (V->getValueID() == Value::InstructionVal + Opcode) {
+ template <typename OpTy> inline bool match(unsigned Opc, OpTy *V) {
+ if (V->getValueID() == Value::InstructionVal + Opc) {
auto *I = cast<BinaryOperator>(V);
return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
(Commutable && L.match(I->getOperand(1)) &&
R.match(I->getOperand(0)));
}
if (auto *CE = dyn_cast<ConstantExpr>(V))
- return CE->getOpcode() == Opcode &&
+ return CE->getOpcode() == Opc &&
((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
(Commutable && L.match(CE->getOperand(1)) &&
R.match(CE->getOperand(0))));
return false;
}
+
+ template <typename OpTy> bool match(OpTy *V) { return match(Opcode, V); }
};
template <typename LHS, typename RHS>
@@ -1246,6 +1223,26 @@ m_NUWShl(const LHS &L, const RHS &R) {
L, R);
}
+template <typename LHS_t, typename RHS_t, bool Commutable = false>
+struct SpecificBinaryOp_match
+ : public BinaryOp_match<LHS_t, RHS_t, 0, Commutable> {
+ unsigned Opcode;
+
+ SpecificBinaryOp_match(unsigned Opcode, const LHS_t &LHS, const RHS_t &RHS)
+ : BinaryOp_match<LHS_t, RHS_t, 0, Commutable>(LHS, RHS), Opcode(Opcode) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ return BinaryOp_match<LHS_t, RHS_t, 0, Commutable>::match(Opcode, V);
+ }
+};
+
+/// Matches a specific opcode.
+template <typename LHS, typename RHS>
+inline SpecificBinaryOp_match<LHS, RHS> m_BinOp(unsigned Opcode, const LHS &L,
+ const RHS &R) {
+ return SpecificBinaryOp_match<LHS, RHS>(Opcode, L, R);
+}
+
//===----------------------------------------------------------------------===//
// Class that matches a group of binary opcodes.
//
@@ -2223,6 +2220,13 @@ m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
R);
}
+/// Matches a specific opcode with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline SpecificBinaryOp_match<LHS, RHS, true>
+m_c_BinOp(unsigned Opcode, const LHS &L, const RHS &R) {
+ return SpecificBinaryOp_match<LHS, RHS, true>(Opcode, L, R);
+}
+
/// Matches a Add with LHS and RHS in either order.
template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
@@ -2456,7 +2460,7 @@ inline VScaleVal_match m_VScale(const DataLayout &DL) {
return VScaleVal_match(DL);
}
-template <typename LHS, typename RHS, unsigned Opcode>
+template <typename LHS, typename RHS, unsigned Opcode, bool Commutable = false>
struct LogicalOp_match {
LHS L;
RHS R;
@@ -2464,27 +2468,32 @@ struct LogicalOp_match {
LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
template <typename T> bool match(T *V) {
- if (auto *I = dyn_cast<Instruction>(V)) {
- if (!I->getType()->isIntOrIntVectorTy(1))
- return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || !I->getType()->isIntOrIntVectorTy(1))
+ return false;
- if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
- R.match(I->getOperand(1)))
- return true;
+ if (I->getOpcode() == Opcode) {
+ auto *Op0 = I->getOperand(0);
+ auto *Op1 = I->getOperand(1);
+ return (L.match(Op0) && R.match(Op1)) ||
+ (Commutable && L.match(Op1) && R.match(Op0));
+ }
- if (auto *SI = dyn_cast<SelectInst>(I)) {
- if (Opcode == Instruction::And) {
- if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
- if (C->isNullValue() && L.match(SI->getCondition()) &&
- R.match(SI->getTrueValue()))
- return true;
- } else {
- assert(Opcode == Instruction::Or);
- if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
- if (C->isOneValue() && L.match(SI->getCondition()) &&
- R.match(SI->getFalseValue()))
- return true;
- }
+ if (auto *Select = dyn_cast<SelectInst>(I)) {
+ auto *Cond = Select->getCondition();
+ auto *TVal = Select->getTrueValue();
+ auto *FVal = Select->getFalseValue();
+ if (Opcode == Instruction::And) {
+ auto *C = dyn_cast<Constant>(FVal);
+ if (C && C->isNullValue())
+ return (L.match(Cond) && R.match(TVal)) ||
+ (Commutable && L.match(TVal) && R.match(Cond));
+ } else {
+ assert(Opcode == Instruction::Or);
+ auto *C = dyn_cast<Constant>(TVal);
+ if (C && C->isOneValue())
+ return (L.match(Cond) && R.match(FVal)) ||
+ (Commutable && L.match(FVal) && R.match(Cond));
}
}
@@ -2503,6 +2512,13 @@ m_LogicalAnd(const LHS &L, const RHS &R) {
/// Matches L && R where L and R are arbitrary values.
inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); }
+/// Matches L && R with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::And, true>
+m_c_LogicalAnd(const LHS &L, const RHS &R) {
+ return LogicalOp_match<LHS, RHS, Instruction::And, true>(L, R);
+}
+
/// Matches L || R either in the form of L | R or L ? true : R.
/// Note that the latter form is poison-blocking.
template <typename LHS, typename RHS>
@@ -2512,8 +2528,13 @@ m_LogicalOr(const LHS &L, const RHS &R) {
}
/// Matches L || R where L and R are arbitrary values.
-inline auto m_LogicalOr() {
- return m_LogicalOr(m_Value(), m_Value());
+inline auto m_LogicalOr() { return m_LogicalOr(m_Value(), m_Value()); }
+
+/// Matches L || R with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline LogicalOp_match<LHS, RHS, Instruction::Or, true>
+m_c_LogicalOr(const LHS &L, const RHS &R) {
+ return LogicalOp_match<LHS, RHS, Instruction::Or, true>(L, R);
}
} // end namespace PatternMatch
diff --git a/llvm/include/llvm/IR/ProfileSummary.h b/llvm/include/llvm/IR/ProfileSummary.h
index 889568e7946b..4bb6bb8d4a40 100644
--- a/llvm/include/llvm/IR/ProfileSummary.h
+++ b/llvm/include/llvm/IR/ProfileSummary.h
@@ -31,9 +31,9 @@ class raw_ostream;
// number of counts needed to reach this target and the minimum among these
// counts.
struct ProfileSummaryEntry {
- uint32_t Cutoff; ///< The required percentile of counts.
- uint64_t MinCount; ///< The minimum count for this percentile.
- uint64_t NumCounts; ///< Number of counts >= the minimum count.
+ const uint32_t Cutoff; ///< The required percentile of counts.
+ const uint64_t MinCount; ///< The minimum count for this percentile.
+ const uint64_t NumCounts; ///< Number of counts >= the minimum count.
ProfileSummaryEntry(uint32_t TheCutoff, uint64_t TheMinCount,
uint64_t TheNumCounts)
@@ -48,9 +48,9 @@ public:
private:
const Kind PSK;
- SummaryEntryVector DetailedSummary;
- uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
- uint32_t NumCounts, NumFunctions;
+ const SummaryEntryVector DetailedSummary;
+ const uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount;
+ const uint32_t NumCounts, NumFunctions;
/// If 'Partial' is false, it means the profile being used to optimize
/// a target is collected from the same target.
/// If 'Partial' is true, it means the profile is for common/shared
@@ -61,14 +61,14 @@ private:
/// of the program being built to the number of profile counters in the
/// partial sample profile. When 'Partial' is false, it is undefined. This is
/// currently only available under thin LTO mode.
- double PartialProfileRatio = 0;
+ double PartialProfileRatio = 0.0;
/// Return detailed summary as metadata.
Metadata *getDetailedSummaryMD(LLVMContext &Context);
public:
static const int Scale = 1000000;
- ProfileSummary(Kind K, SummaryEntryVector DetailedSummary,
+ ProfileSummary(Kind K, const SummaryEntryVector &DetailedSummary,
uint64_t TotalCount, uint64_t MaxCount,
uint64_t MaxInternalCount, uint64_t MaxFunctionCount,
uint32_t NumCounts, uint32_t NumFunctions,
@@ -85,22 +85,22 @@ public:
bool AddPartialProfileRatioField = true);
/// Construct profile summary from metdata.
static ProfileSummary *getFromMD(Metadata *MD);
- SummaryEntryVector &getDetailedSummary() { return DetailedSummary; }
- uint32_t getNumFunctions() { return NumFunctions; }
- uint64_t getMaxFunctionCount() { return MaxFunctionCount; }
- uint32_t getNumCounts() { return NumCounts; }
- uint64_t getTotalCount() { return TotalCount; }
- uint64_t getMaxCount() { return MaxCount; }
- uint64_t getMaxInternalCount() { return MaxInternalCount; }
+ const SummaryEntryVector &getDetailedSummary() { return DetailedSummary; }
+ uint32_t getNumFunctions() const { return NumFunctions; }
+ uint64_t getMaxFunctionCount() const { return MaxFunctionCount; }
+ uint32_t getNumCounts() const { return NumCounts; }
+ uint64_t getTotalCount() const { return TotalCount; }
+ uint64_t getMaxCount() const { return MaxCount; }
+ uint64_t getMaxInternalCount() const { return MaxInternalCount; }
void setPartialProfile(bool PP) { Partial = PP; }
- bool isPartialProfile() { return Partial; }
- double getPartialProfileRatio() { return PartialProfileRatio; }
+ bool isPartialProfile() const { return Partial; }
+ double getPartialProfileRatio() const { return PartialProfileRatio; }
void setPartialProfileRatio(double R) {
assert(isPartialProfile() && "Unexpected when not partial profile");
PartialProfileRatio = R;
}
- void printSummary(raw_ostream &OS);
- void printDetailedSummary(raw_ostream &OS);
+ void printSummary(raw_ostream &OS) const;
+ void printDetailedSummary(raw_ostream &OS) const;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index 53100f049910..51ba7e675efe 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -27,10 +27,6 @@ constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
-enum class PseudoProbeAttributes {
- Reserved = 0x1, // Reserved for future use.
-};
-
// The saturated distrution factor representing 100% for block probes.
constexpr static uint64_t PseudoProbeFullDistributionFactor =
std::numeric_limits<uint64_t>::max();
diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h
index 4d95143a4bd2..5ad1d0a6f920 100644
--- a/llvm/include/llvm/IR/ReplaceConstant.h
+++ b/llvm/include/llvm/IR/ReplaceConstant.h
@@ -21,10 +21,6 @@
namespace llvm {
-/// Create a replacement instruction for constant expression \p CE and insert
-/// it before \p Instr.
-Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr);
-
/// The given instruction \p I contains given constant expression \p CE as one
/// of its operands, possibly nested within constant expression trees. Convert
/// all reachable paths from contant expression operands of \p I to \p CE into
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index c73172612b1e..62d67308114f 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -287,6 +287,7 @@ HANDLE_LIBCALL(FPEXT_F80_F128, "__extendxftf2")
HANDLE_LIBCALL(FPEXT_F64_F128, "__extenddftf2")
HANDLE_LIBCALL(FPEXT_F32_F128, "__extendsftf2")
HANDLE_LIBCALL(FPEXT_F16_F128, "__extendhftf2")
+HANDLE_LIBCALL(FPEXT_F16_F80, "__extendhfxf2")
HANDLE_LIBCALL(FPEXT_F32_F64, "__extendsfdf2")
HANDLE_LIBCALL(FPEXT_F16_F64, "__extendhfdf2")
HANDLE_LIBCALL(FPEXT_F16_F32, "__gnu_h2f_ieee")
@@ -375,6 +376,8 @@ HANDLE_LIBCALL(UINTTOFP_I128_F64, "__floatuntidf")
HANDLE_LIBCALL(UINTTOFP_I128_F80, "__floatuntixf")
HANDLE_LIBCALL(UINTTOFP_I128_F128, "__floatuntitf")
HANDLE_LIBCALL(UINTTOFP_I128_PPCF128, "__floatuntitf")
+HANDLE_LIBCALL(CONVERT_F128_PPCF128, "__extendkftf2")
+HANDLE_LIBCALL(CONVERT_PPCF128_F128, "__trunctfkf2")
// Comparison
HANDLE_LIBCALL(OEQ_F32, "__eqsf2")
@@ -431,6 +434,7 @@ HANDLE_LIBCALL(MEMSET_ELEMENT_UNORDERED_ATOMIC_16, "__llvm_memset_element_unorde
// Exception handling
HANDLE_LIBCALL(UNWIND_RESUME, "_Unwind_Resume")
+HANDLE_LIBCALL(CXA_END_CLEANUP, "__cxa_end_cleanup")
// Note: there are two sets of atomics libcalls; see
// <https://llvm.org/docs/Atomics.html> for more info on the
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 430bc34a47e7..47431adc6fac 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -14,7 +14,6 @@
#ifndef LLVM_IR_TYPE_H
#define LLVM_IR_TYPE_H
-#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/CBindingWrapping.h"
@@ -29,6 +28,7 @@
namespace llvm {
class IntegerType;
+struct fltSemantics;
class LLVMContext;
class PointerType;
class raw_ostream;
@@ -166,18 +166,7 @@ public:
getTypeID() == PPC_FP128TyID;
}
- const fltSemantics &getFltSemantics() const {
- switch (getTypeID()) {
- case HalfTyID: return APFloat::IEEEhalf();
- case BFloatTyID: return APFloat::BFloat();
- case FloatTyID: return APFloat::IEEEsingle();
- case DoubleTyID: return APFloat::IEEEdouble();
- case X86_FP80TyID: return APFloat::x87DoubleExtended();
- case FP128TyID: return APFloat::IEEEquad();
- case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
- default: llvm_unreachable("Invalid floating type");
- }
- }
+ const fltSemantics &getFltSemantics() const;
/// Return true if this is X86 MMX.
bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
@@ -312,7 +301,7 @@ public:
/// Return whether the type is IEEE compatible, as defined by the eponymous
/// method in APFloat.
- bool isIEEE() const { return APFloat::getZero(getFltSemantics()).isIEEE(); }
+ bool isIEEE() const;
/// If this is a vector type, return the element type, otherwise return
/// 'this'.
@@ -443,26 +432,7 @@ public:
}
llvm_unreachable("Unsupported type in Type::getScalarTy");
}
- static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S) {
- Type *Ty;
- if (&S == &APFloat::IEEEhalf())
- Ty = Type::getHalfTy(C);
- else if (&S == &APFloat::BFloat())
- Ty = Type::getBFloatTy(C);
- else if (&S == &APFloat::IEEEsingle())
- Ty = Type::getFloatTy(C);
- else if (&S == &APFloat::IEEEdouble())
- Ty = Type::getDoubleTy(C);
- else if (&S == &APFloat::x87DoubleExtended())
- Ty = Type::getX86_FP80Ty(C);
- else if (&S == &APFloat::IEEEquad())
- Ty = Type::getFP128Ty(C);
- else {
- assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format");
- Ty = Type::getPPC_FP128Ty(C);
- }
- return Ty;
- }
+ static Type *getFloatingPointTy(LLVMContext &C, const fltSemantics &S);
//===--------------------------------------------------------------------===//
// Convenience methods for getting pointer types with one of the above builtin
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 92e2cd3a2783..361d6357b303 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -111,6 +111,21 @@ END_REGISTER_VP_SDNODE(SDOPC)
#define HANDLE_VP_IS_MEMOP(VPID, POINTERPOS, DATAPOS)
#endif
+// Map this VP reduction intrinsic to its reduction operand positions.
+#ifndef HANDLE_VP_REDUCTION
+#define HANDLE_VP_REDUCTION(ID, STARTPOS, VECTORPOS)
+#endif
+
+// A property to infer VP binary-op SDNode opcodes automatically.
+#ifndef PROPERTY_VP_BINARYOP_SDNODE
+#define PROPERTY_VP_BINARYOP_SDNODE(ID)
+#endif
+
+// A property to infer VP reduction SDNode opcodes automatically.
+#ifndef PROPERTY_VP_REDUCTION_SDNODE
+#define PROPERTY_VP_REDUCTION_SDNODE(ID)
+#endif
+
/// } Property Macros
///// Integer Arithmetic {
@@ -122,6 +137,7 @@ END_REGISTER_VP_SDNODE(SDOPC)
#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDOPC, OPC) \
BEGIN_REGISTER_VP(INTRIN, 2, 3, SDOPC, -1) \
HANDLE_VP_TO_OPC(OPC) \
+PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
END_REGISTER_VP(INTRIN, SDOPC)
@@ -181,6 +197,7 @@ HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor)
BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, SDOPC, -1) \
HANDLE_VP_TO_OPC(OPC) \
HANDLE_VP_TO_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \
+ PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
END_REGISTER_VP(vp_##OPSUFFIX, SDOPC)
// llvm.vp.fadd(x,y,mask,vlen)
@@ -204,33 +221,146 @@ HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
///// Memory Operations {
// llvm.vp.store(ptr,val,mask,vlen)
-BEGIN_REGISTER_VP(vp_store, 2, 3, VP_STORE, 0)
+BEGIN_REGISTER_VP_INTRINSIC(vp_store, 2, 3)
+// chain = VP_STORE chain,val,base,offset,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_STORE, 0, vp_store, 4, 5)
HANDLE_VP_TO_OPC(Store)
HANDLE_VP_TO_INTRIN(masked_store)
HANDLE_VP_IS_MEMOP(vp_store, 1, 0)
END_REGISTER_VP(vp_store, VP_STORE)
// llvm.vp.scatter(ptr,val,mask,vlen)
-BEGIN_REGISTER_VP(vp_scatter, 2, 3, VP_SCATTER, 0)
+BEGIN_REGISTER_VP_INTRINSIC(vp_scatter, 2, 3)
+// chain = VP_SCATTER chain,val,base,indices,scale,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_SCATTER, -1, vp_scatter, 5, 6)
HANDLE_VP_TO_INTRIN(masked_scatter)
HANDLE_VP_IS_MEMOP(vp_scatter, 1, 0)
END_REGISTER_VP(vp_scatter, VP_SCATTER)
// llvm.vp.load(ptr,mask,vlen)
-BEGIN_REGISTER_VP(vp_load, 1, 2, VP_LOAD, -1)
+BEGIN_REGISTER_VP_INTRINSIC(vp_load, 1, 2)
+// val,chain = VP_LOAD chain,base,offset,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_LOAD, -1, vp_load, 3, 4)
HANDLE_VP_TO_OPC(Load)
HANDLE_VP_TO_INTRIN(masked_load)
HANDLE_VP_IS_MEMOP(vp_load, 0, None)
END_REGISTER_VP(vp_load, VP_LOAD)
// llvm.vp.gather(ptr,mask,vlen)
-BEGIN_REGISTER_VP(vp_gather, 1, 2, VP_GATHER, -1)
+BEGIN_REGISTER_VP_INTRINSIC(vp_gather, 1, 2)
+// val,chain = VP_GATHER chain,base,indices,scale,mask,evl
+BEGIN_REGISTER_VP_SDNODE(VP_GATHER, -1, vp_gather, 4, 5)
HANDLE_VP_TO_INTRIN(masked_gather)
HANDLE_VP_IS_MEMOP(vp_gather, 0, None)
END_REGISTER_VP(vp_gather, VP_GATHER)
///// } Memory Operations
+///// Reductions {
+
+// Specialized helper macro for VP reductions (%start, %x, %mask, %evl).
+#ifdef HELPER_REGISTER_REDUCTION_VP
+#error "The internal helper macro HELPER_REGISTER_REDUCTION_VP is already defined!"
+#endif
+#define HELPER_REGISTER_REDUCTION_VP(VPINTRIN, SDOPC, INTRIN) \
+BEGIN_REGISTER_VP(VPINTRIN, 2, 3, SDOPC, -1) \
+HANDLE_VP_TO_INTRIN(INTRIN) \
+HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
+PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+END_REGISTER_VP(VPINTRIN, SDOPC)
+
+// llvm.vp.reduce.add(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_add, VP_REDUCE_ADD,
+ experimental_vector_reduce_add)
+
+// llvm.vp.reduce.mul(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_mul, VP_REDUCE_MUL,
+ experimental_vector_reduce_mul)
+
+// llvm.vp.reduce.and(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_and, VP_REDUCE_AND,
+ experimental_vector_reduce_and)
+
+// llvm.vp.reduce.or(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_or, VP_REDUCE_OR,
+ experimental_vector_reduce_or)
+
+// llvm.vp.reduce.xor(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_xor, VP_REDUCE_XOR,
+ experimental_vector_reduce_xor)
+
+// llvm.vp.reduce.smax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_smax, VP_REDUCE_SMAX,
+ experimental_vector_reduce_smax)
+
+// llvm.vp.reduce.smin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_smin, VP_REDUCE_SMIN,
+ experimental_vector_reduce_smin)
+
+// llvm.vp.reduce.umax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_umax, VP_REDUCE_UMAX,
+ experimental_vector_reduce_umax)
+
+// llvm.vp.reduce.umin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_umin, VP_REDUCE_UMIN,
+ experimental_vector_reduce_umin)
+
+// llvm.vp.reduce.fmax(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmax, VP_REDUCE_FMAX,
+ experimental_vector_reduce_fmax)
+
+// llvm.vp.reduce.fmin(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_VP(vp_reduce_fmin, VP_REDUCE_FMIN,
+ experimental_vector_reduce_fmin)
+
+#undef HELPER_REGISTER_REDUCTION_VP
+
+// Specialized helper macro for VP reductions as above but with two forms:
+// sequential and reassociative. These manifest as the presence of 'reassoc'
+// fast-math flags in the IR and as two distinct ISD opcodes in the
+// SelectionDAG.
+#ifdef HELPER_REGISTER_REDUCTION_SEQ_VP
+#error "The internal helper macro HELPER_REGISTER_REDUCTION_SEQ_VP is already defined!"
+#endif
+#define HELPER_REGISTER_REDUCTION_SEQ_VP(VPINTRIN, SDOPC, SEQ_SDOPC, INTRIN) \
+BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, 2, 3) \
+BEGIN_REGISTER_VP_SDNODE(SDOPC, -1, VPINTRIN, 2, 3) \
+END_REGISTER_VP_SDNODE(SDOPC) \
+BEGIN_REGISTER_VP_SDNODE(SEQ_SDOPC, -1, VPINTRIN, 2, 3) \
+END_REGISTER_VP_SDNODE(SEQ_SDOPC) \
+HANDLE_VP_TO_INTRIN(INTRIN) \
+HANDLE_VP_REDUCTION(VPINTRIN, 0, 1) \
+PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+PROPERTY_VP_REDUCTION_SDNODE(SEQ_SDOPC) \
+END_REGISTER_VP_INTRINSIC(VPINTRIN)
+
+// llvm.vp.reduce.fadd(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fadd, VP_REDUCE_FADD,
+ VP_REDUCE_SEQ_FADD,
+ experimental_vector_reduce_fadd)
+
+// llvm.vp.reduce.fmul(start,x,mask,vlen)
+HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
+ VP_REDUCE_SEQ_FMUL,
+ experimental_vector_reduce_fmul)
+
+#undef HELPER_REGISTER_REDUCTION_SEQ_VP
+
+///// } Reduction
+
+///// Shuffles {
+
+// llvm.vp.select(mask,on_true,on_false,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_select, 0, 3)
+// BEGIN_REGISTER_VP_SDNODE(VP_SELECT, -1, vp_select, 0, 4)
+// END_REGISTER_CASES(vp_select, VP_SELECT)
+END_REGISTER_VP_INTRINSIC(vp_select)
+
+BEGIN_REGISTER_VP(experimental_vp_splice, 3, 5,
+ EXPERIMENTAL_VP_SPLICE, -1)
+END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
+
+///// } Shuffles
#undef BEGIN_REGISTER_VP
#undef BEGIN_REGISTER_VP_INTRINSIC
@@ -242,3 +372,6 @@ END_REGISTER_VP(vp_gather, VP_GATHER)
#undef HANDLE_VP_TO_CONSTRAINEDFP
#undef HANDLE_VP_TO_INTRIN
#undef HANDLE_VP_IS_MEMOP
+#undef HANDLE_VP_REDUCTION
+#undef PROPERTY_VP_BINARYOP_SDNODE
+#undef PROPERTY_VP_REDUCTION_SDNODE
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 2ad1c9e8c300..fc2ed00d770f 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -37,7 +37,6 @@ class DataLayout;
class Function;
class GlobalAlias;
class GlobalIFunc;
-class GlobalIndirectSymbol;
class GlobalObject;
class GlobalValue;
class GlobalVariable;
@@ -454,14 +453,18 @@ public:
/// Return true if there is exactly one use of this value that cannot be
/// dropped.
- ///
- /// This is specialized because it is a common request and does not require
- /// traversing the whole use list.
Use *getSingleUndroppableUse();
const Use *getSingleUndroppableUse() const {
return const_cast<Value *>(this)->getSingleUndroppableUse();
}
+ /// Return true if there is exactly one unique user of this value that cannot be
+ /// dropped (that user can have multiple uses of this value).
+ User *getUniqueUndroppableUser();
+ const User *getUniqueUndroppableUser() const {
+ return const_cast<Value *>(this)->getUniqueUndroppableUser();
+ }
+
/// Return true if there this value.
///
/// This is specialized because it is a common request and does not require
@@ -690,6 +693,9 @@ public:
/// If \p AllowNonInbounds is true, offsets in GEPs are stripped and
/// accumulated even if the GEP is not "inbounds".
///
+ /// If \p AllowInvariantGroup is true then this method also looks through
+ /// strip.invariant.group and launder.invariant.group intrinsics.
+ ///
/// If \p ExternalAnalysis is provided it will be used to calculate a offset
/// when a operand of GEP is not constant.
/// For example, for a value \p ExternalAnalysis might try to calculate a
@@ -705,13 +711,15 @@ public:
/// is unchanged.
const Value *stripAndAccumulateConstantOffsets(
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
+ bool AllowInvariantGroup = false,
function_ref<bool(Value &Value, APInt &Offset)> ExternalAnalysis =
nullptr) const;
Value *stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset,
- bool AllowNonInbounds) {
+ bool AllowNonInbounds,
+ bool AllowInvariantGroup = false) {
return const_cast<Value *>(
static_cast<const Value *>(this)->stripAndAccumulateConstantOffsets(
- DL, Offset, AllowNonInbounds));
+ DL, Offset, AllowNonInbounds, AllowInvariantGroup));
}
/// This is a wrapper around stripAndAccumulateConstantOffsets with the
@@ -781,8 +789,8 @@ public:
///
/// This is the greatest alignment value supported by load, store, and alloca
/// instructions, and global values.
- static const unsigned MaxAlignmentExponent = 29;
- static const unsigned MaximumAlignment = 1u << MaxAlignmentExponent;
+ static constexpr unsigned MaxAlignmentExponent = 32;
+ static constexpr uint64_t MaximumAlignment = 1ULL << MaxAlignmentExponent;
/// Mutate the type of this Value to be of the specified type.
///
@@ -1012,21 +1020,16 @@ template <> struct isa_impl<GlobalIFunc, Value> {
}
};
-template <> struct isa_impl<GlobalIndirectSymbol, Value> {
- static inline bool doit(const Value &Val) {
- return isa<GlobalAlias>(Val) || isa<GlobalIFunc>(Val);
- }
-};
-
template <> struct isa_impl<GlobalValue, Value> {
static inline bool doit(const Value &Val) {
- return isa<GlobalObject>(Val) || isa<GlobalIndirectSymbol>(Val);
+ return isa<GlobalObject>(Val) || isa<GlobalAlias>(Val);
}
};
template <> struct isa_impl<GlobalObject, Value> {
static inline bool doit(const Value &Val) {
- return isa<GlobalVariable>(Val) || isa<Function>(Val);
+ return isa<GlobalVariable>(Val) || isa<Function>(Val) ||
+ isa<GlobalIFunc>(Val);
}
};
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 365240de321a..845d7dcdebd2 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -64,6 +64,7 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
void initializeAAResultsWrapperPassPass(PassRegistry&);
void initializeADCELegacyPassPass(PassRegistry&);
void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
+void initializeAddFSDiscriminatorsPass(PassRegistry &);
void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -183,6 +184,7 @@ void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
void initializeHardwareLoopsPass(PassRegistry&);
+void initializeMIRProfileLoaderPassPass(PassRegistry &);
void initializeMemProfilerLegacyPassPass(PassRegistry &);
void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -234,7 +236,8 @@ void initializeLiveIntervalsPass(PassRegistry&);
void initializeLiveRangeShrinkPass(PassRegistry&);
void initializeLiveRegMatrixPass(PassRegistry&);
void initializeLiveStacksPass(PassRegistry&);
-void initializeLiveVariablesPass(PassRegistry&);
+void initializeLiveVariablesPass(PassRegistry &);
+void initializeLoadStoreOptPass(PassRegistry &);
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&);
void initializeLoaderPassPass(PassRegistry&);
void initializeLocalStackSlotPassPass(PassRegistry&);
diff --git a/llvm/include/llvm/InterfaceStub/IFSHandler.h b/llvm/include/llvm/InterfaceStub/IFSHandler.h
index de627492366f..6ae6a421318e 100644
--- a/llvm/include/llvm/InterfaceStub/IFSHandler.h
+++ b/llvm/include/llvm/InterfaceStub/IFSHandler.h
@@ -51,6 +51,9 @@ Error validateIFSTarget(IFSStub &Stub, bool ParseTriple);
void stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch,
bool StripEndianness, bool StripBitWidth);
+/// Strips symbols from IFS symbol table that are undefined.
+void stripIFSUndefinedSymbols(IFSStub &Stub);
+
/// Parse llvm triple string into a IFSTarget struct.
IFSTarget parseTriple(StringRef TripleStr);
diff --git a/llvm/include/llvm/LTO/Caching.h b/llvm/include/llvm/LTO/Caching.h
deleted file mode 100644
index 43b978328b74..000000000000
--- a/llvm/include/llvm/LTO/Caching.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the localCache function, which allows clients to add a
-// filesystem cache to ThinLTO.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LTO_CACHING_H
-#define LLVM_LTO_CACHING_H
-
-#include "llvm/LTO/LTO.h"
-
-namespace llvm {
-namespace lto {
-
-/// This type defines the callback to add a pre-existing native object file
-/// (e.g. in a cache).
-///
-/// Buffer callbacks must be thread safe.
-using AddBufferFn =
- std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
-
-/// Create a local file system cache which uses the given cache directory and
-/// file callback. This function also creates the cache directory if it does not
-/// already exist.
-Expected<NativeObjectCache> localCache(StringRef CacheDirectoryPath,
- AddBufferFn AddBuffer);
-
-} // namespace lto
-} // namespace llvm
-
-#endif
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 5fd3c9f408f3..eb793d62907e 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -70,6 +70,9 @@ struct Config {
/// Run PGO context sensitive IR instrumentation.
bool RunCSIRInstr = false;
+ /// Turn on/off the warning about a hash mismatch in the PGO profile data.
+ bool PGOWarnMismatch = true;
+
/// Asserts whether we can assume whole program visibility during the LTO
/// link.
bool HasWholeProgramVisibility = false;
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index ea1dea2d6f42..d2b0fef1ca47 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -21,8 +21,10 @@
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/LTO/Config.h"
#include "llvm/Object/IRSymtab.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/thread.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
namespace llvm {
@@ -38,7 +40,7 @@ class ToolOutputFile;
/// Resolve linkage for prevailing symbols in the \p Index. Linkage changes
/// recorded in the index and the ThinLTO backends must apply the changes to
-/// the module via thinLTOResolvePrevailingInModule.
+/// the module via thinLTOFinalizeInModule.
///
/// This is done for correctness (if value exported, ensure we always
/// emit a copy), and compile-time optimization (allow drop of duplicates).
@@ -186,47 +188,13 @@ private:
}
};
-/// This class wraps an output stream for a native object. Most clients should
-/// just be able to return an instance of this base class from the stream
-/// callback, but if a client needs to perform some action after the stream is
-/// written to, that can be done by deriving from this class and overriding the
-/// destructor.
-class NativeObjectStream {
-public:
- NativeObjectStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
- std::unique_ptr<raw_pwrite_stream> OS;
- virtual ~NativeObjectStream() = default;
-};
-
-/// This type defines the callback to add a native object that is generated on
-/// the fly.
-///
-/// Stream callbacks must be thread safe.
-using AddStreamFn =
- std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>;
-
-/// This is the type of a native object cache. To request an item from the
-/// cache, pass a unique string as the Key. For hits, the cached file will be
-/// added to the link and this function will return AddStreamFn(). For misses,
-/// the cache will return a stream callback which must be called at most once to
-/// produce content for the stream. The native object stream produced by the
-/// stream callback will add the file to the link after the stream is written
-/// to.
-///
-/// Clients generally look like this:
-///
-/// if (AddStreamFn AddStream = Cache(Task, Key))
-/// ProduceContent(AddStream);
-using NativeObjectCache =
- std::function<AddStreamFn(unsigned Task, StringRef Key)>;
-
/// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
/// The details of this type definition aren't important; clients can only
/// create a ThinBackend using one of the create*ThinBackend() functions below.
using ThinBackend = std::function<std::unique_ptr<ThinBackendProc>(
const Config &C, ModuleSummaryIndex &CombinedIndex,
StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache)>;
+ AddStreamFn AddStream, FileCache Cache)>;
/// This ThinBackend runs the individual backend jobs in-process.
/// The default value means to use one job per hardware core (not hyper-thread).
@@ -299,7 +267,7 @@ public:
///
/// The client will receive at most one callback (via either AddStream or
/// Cache) for each task identifier.
- Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
+ Error run(AddStreamFn AddStream, FileCache Cache = nullptr);
/// Static method that returns a list of libcall symbols that can be generated
/// by LTO but might not be visible from bitcode symbol table.
@@ -431,7 +399,7 @@ private:
const SymbolResolution *&ResI, const SymbolResolution *ResE);
Error runRegularLTO(AddStreamFn AddStream);
- Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+ Error runThinLTO(AddStreamFn AddStream, FileCache Cache,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
Error checkPartiallySplit();
@@ -444,6 +412,9 @@ private:
// Identify symbols exported dynamically, and that therefore could be
// referenced by a shared library not visible to the linker.
DenseSet<GlobalValue::GUID> DynamicExportSymbols;
+
+ // Diagnostic optimization remarks file
+ std::unique_ptr<ToolOutputFile> DiagnosticOutputFile;
};
/// The resolution for a symbol. The linker must provide a SymbolResolution for
diff --git a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
index 6697c821a5ea..508ab2587ac5 100644
--- a/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
+++ b/llvm/include/llvm/LTO/SummaryBasedOptimizations.h
@@ -10,6 +10,8 @@
#define LLVM_LTO_SUMMARYBASEDOPTIMIZATIONS_H
namespace llvm {
class ModuleSummaryIndex;
+
+/// Compute synthetic function entry counts.
void computeSyntheticCounts(ModuleSummaryIndex &Index);
} // namespace llvm
diff --git a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index 31688e43e174..333f483f29c5 100644
--- a/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -176,7 +176,7 @@ struct LTOCodeGenerator {
/// created using the \p AddStream callback. Returns true on success.
///
/// Calls \a verifyMergedModuleOnce().
- bool compileOptimized(lto::AddStreamFn AddStream, unsigned ParallelismLevel);
+ bool compileOptimized(AddStreamFn AddStream, unsigned ParallelismLevel);
/// Enable the Freestanding mode: indicate that the optimizer should not
/// assume builtins are present on the target.
diff --git a/llvm/include/llvm/LTO/legacy/LTOModule.h b/llvm/include/llvm/LTO/legacy/LTOModule.h
index 2a25dab58ada..01e63db4bab3 100644
--- a/llvm/include/llvm/LTO/legacy/LTOModule.h
+++ b/llvm/include/llvm/LTO/legacy/LTOModule.h
@@ -167,6 +167,10 @@ public:
Expected<uint32_t> getMachOCPUSubType() const;
+ /// Returns true if the module has either the @llvm.global_ctors or the
+ /// @llvm.global_dtors symbol. Otherwise returns false.
+ bool hasCtorDtor() const;
+
private:
/// Parse metadata from the module
// FIXME: it only parses "llvm.linker.options" metadata at the moment
diff --git a/llvm/include/llvm/LinkAllIR.h b/llvm/include/llvm/LinkAllIR.h
index 4b0aabeee701..ceed784d557d 100644
--- a/llvm/include/llvm/LinkAllIR.h
+++ b/llvm/include/llvm/LinkAllIR.h
@@ -38,6 +38,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
llvm::LLVMContext Context;
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index 45978828a8ce..c8b9aaeed76a 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -64,6 +64,9 @@ namespace {
// delete it all as dead code, even with whole program optimization,
// yet is effectively a NO-OP. As the compiler isn't smart enough
// to know that getenv() never returns -1, this will do the job.
+ // This is so that globals in the translation units where these functions
+ // are defined are forced to be initialized, populating various
+ // registries.
if (std::getenv("bar") != (char*) -1)
return;
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 08739d51f751..bb57c3453d10 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -55,7 +55,8 @@ public:
/// Give the target a chance to manipulate state related to instruction
/// alignment (e.g. padding for optimization), instruction relaxablility, etc.
/// before and after actually emitting the instruction.
- virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) {}
+ virtual void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
+ const MCSubtargetInfo &STI) {}
virtual void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) {}
/// lifetime management
@@ -185,13 +186,16 @@ public:
/// Returns the maximum size of a nop in bytes on this target.
///
- virtual unsigned getMaximumNopSize() const { return 0; }
+ virtual unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const {
+ return 0;
+ }
/// Write an (optimal) nop sequence of Count bytes to the given output. If the
/// target cannot generate such a sequence, it should return an error.
///
/// \return - True on success.
- virtual bool writeNopData(raw_ostream &OS, uint64_t Count) const = 0;
+ virtual bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const = 0;
/// Give backend an opportunity to finish layout after relaxation
virtual void finishLayout(MCAssembler const &Asm,
diff --git a/llvm/include/llvm/MC/MCAsmInfoGOFF.h b/llvm/include/llvm/MC/MCAsmInfoGOFF.h
new file mode 100644
index 000000000000..1f3b26311b37
--- /dev/null
+++ b/llvm/include/llvm/MC/MCAsmInfoGOFF.h
@@ -0,0 +1,29 @@
+//===- MCAsmInfoGOFF.h - GOFF Asm Info Fields -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines certain target specific asm properties for GOFF (z/OS)
+/// based targets.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMINFOGOFF_H
+#define LLVM_MC_MCASMINFOGOFF_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+class MCAsmInfoGOFF : public MCAsmInfo {
+ virtual void anchor();
+
+protected:
+ MCAsmInfoGOFF();
+};
+} // end namespace llvm
+
+#endif // LLVM_MC_MCASMINFOGOFF_H
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 877b2dc4ac92..bde750759a0b 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -817,7 +817,7 @@ namespace llvm {
// Unrecoverable error has occurred. Display the best diagnostic we can
// and bail via exit(1). For now, most MC backend errors are unrecoverable.
// FIXME: We should really do something about that.
- LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, const Twine &Msg);
+ [[noreturn]] void reportFatalError(SMLoc L, const Twine &Msg);
const MCAsmMacro *lookupMacro(StringRef Name) {
StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index 23efdc70609b..7e72d56f3097 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCSection.h"
+#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MD5.h"
#include <cassert>
@@ -34,7 +35,6 @@ namespace llvm {
template <typename T> class ArrayRef;
class MCAsmBackend;
class MCContext;
-class MCDwarfLineStr;
class MCObjectStreamer;
class MCStreamer;
class MCSymbol;
@@ -47,6 +47,24 @@ namespace mcdwarf {
MCSymbol *emitListsTableHeaderStart(MCStreamer &S);
} // namespace mcdwarf
+/// Manage the .debug_line_str section contents, if we use it.
+class MCDwarfLineStr {
+ MCSymbol *LineStrLabel = nullptr;
+ StringTableBuilder LineStrings{StringTableBuilder::DWARF};
+ bool UseRelocs = false;
+
+public:
+ /// Construct an instance that can emit .debug_line_str (for use in a normal
+ /// v5 line table).
+ explicit MCDwarfLineStr(MCContext &Ctx);
+
+ /// Emit a reference to the string.
+ void emitRef(MCStreamer *MCOS, StringRef Path);
+
+ /// Emit the .debug_line_str section if appropriate.
+ void emitSection(MCStreamer *MCOS);
+};
+
/// Instances of this class represent the name of the dwarf .file directive and
/// its associated dwarf file number in the MC file. MCDwarfFile's are created
/// and uniqued by the MCContext class. In Dwarf 4 file numbers start from 1;
@@ -170,6 +188,15 @@ public:
MCSymbol *getLabel() const { return Label; }
+ // This indicates the line entry is synthesized for an end entry.
+ bool IsEndEntry = false;
+
+ // Override the label with the given EndLabel.
+ void setEndLabel(MCSymbol *EndLabel) {
+ Label = EndLabel;
+ IsEndEntry = true;
+ }
+
// This is called when an instruction is assembled into the specified
// section and if there is information from the last .loc directive that
// has yet to have a line entry made for it is made.
@@ -187,6 +214,10 @@ public:
MCLineDivisions[Sec].push_back(LineEntry);
}
+ // Add an end entry by cloning the last entry, if exists, for the section
+ // the given EndLabel belongs to. The label is replaced by the given EndLabel.
+ void addEndEntry(MCSymbol *EndLabel);
+
using MCDwarfLineEntryCollection = std::vector<MCDwarfLineEntry>;
using iterator = MCDwarfLineEntryCollection::iterator;
using const_iterator = MCDwarfLineEntryCollection::const_iterator;
@@ -317,6 +348,11 @@ public:
void emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
Optional<MCDwarfLineStr> &LineStr) const;
+ // This emits a single line table associated with a given Section.
+ static void
+ emitOne(MCStreamer *MCOS, MCSection *Section,
+ const MCLineSection::MCDwarfLineEntryCollection &LineEntries);
+
Expected<unsigned> tryGetFile(StringRef &Directory, StringRef &FileName,
Optional<MD5::MD5Result> Checksum,
Optional<StringRef> Source,
diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h
index 9f4b8de7947b..fa17759bc21a 100644
--- a/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -78,6 +78,8 @@ public:
case Triple::PS4:
case Triple::FreeBSD:
return ELF::ELFOSABI_FREEBSD;
+ case Triple::Solaris:
+ return ELF::ELFOSABI_SOLARIS;
default:
return ELF::ELFOSABI_NONE;
}
diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h
index 8c1e22a14702..8f2b176862c8 100644
--- a/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/llvm/include/llvm/MC/MCELFStreamer.h
@@ -39,7 +39,7 @@ public:
/// \name MCStreamer Interface
/// @{
- void InitSections(bool NoExecStack) override;
+ void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override;
void changeSection(MCSection *Section, const MCExpr *Subsection) override;
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index 38cca2413e1e..bf1f32bb91ba 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -200,6 +200,7 @@ public:
VK_GOTREL,
VK_PCREL,
VK_GOTPCREL,
+ VK_GOTPCREL_NORELAX,
VK_GOTTPOFF,
VK_INDNTPOFF,
VK_NTPOFF,
@@ -328,6 +329,7 @@ public:
VK_WASM_TLSREL, // Memory address relative to __tls_base
VK_WASM_MBREL, // Memory address relative to __memory_base
VK_WASM_TBREL, // Table index relative to __table_base
+ VK_WASM_GOT_TLS, // Wasm global index of TLS symbol.
VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index f3a785fb09b7..736fdd992063 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -311,6 +311,9 @@ class MCAlignFragment : public MCFragment {
/// cannot be satisfied in this width then this fragment is ignored.
unsigned MaxBytesToEmit;
+ /// When emitting Nops some subtargets have specific nop encodings.
+ const MCSubtargetInfo *STI;
+
public:
MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
@@ -326,7 +329,12 @@ public:
unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
bool hasEmitNops() const { return EmitNops; }
- void setEmitNops(bool Value) { EmitNops = Value; }
+ void setEmitNops(bool Value, const MCSubtargetInfo *STI) {
+ EmitNops = Value;
+ this->STI = STI;
+ }
+
+ const MCSubtargetInfo *getSubtargetInfo() const { return STI; }
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Align;
@@ -369,17 +377,22 @@ class MCNopsFragment : public MCFragment {
/// Source location of the directive that this fragment was created for.
SMLoc Loc;
+ /// When emitting Nops some subtargets have specific nop encodings.
+ const MCSubtargetInfo &STI;
+
public:
MCNopsFragment(int64_t NumBytes, int64_t ControlledNopLength, SMLoc L,
- MCSection *Sec = nullptr)
+ const MCSubtargetInfo &STI, MCSection *Sec = nullptr)
: MCFragment(FT_Nops, false, Sec), Size(NumBytes),
- ControlledNopLength(ControlledNopLength), Loc(L) {}
+ ControlledNopLength(ControlledNopLength), Loc(L), STI(STI) {}
int64_t getNumBytes() const { return Size; }
int64_t getControlledNopLength() const { return ControlledNopLength; }
SMLoc getLoc() const { return Loc; }
+ const MCSubtargetInfo *getSubtargetInfo() const { return &STI; }
+
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Nops;
}
@@ -572,10 +585,14 @@ class MCBoundaryAlignFragment : public MCFragment {
/// is not meaningful before that.
uint64_t Size = 0;
+ /// When emitting Nops some subtargets have specific nop encodings.
+ const MCSubtargetInfo &STI;
+
public:
- MCBoundaryAlignFragment(Align AlignBoundary, MCSection *Sec = nullptr)
- : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary) {
- }
+ MCBoundaryAlignFragment(Align AlignBoundary, const MCSubtargetInfo &STI,
+ MCSection *Sec = nullptr)
+ : MCFragment(FT_BoundaryAlign, false, Sec), AlignBoundary(AlignBoundary),
+ STI(STI) {}
uint64_t getSize() const { return Size; }
void setSize(uint64_t Value) { Size = Value; }
@@ -589,6 +606,8 @@ public:
LastFragment = F;
}
+ const MCSubtargetInfo *getSubtargetInfo() const { return &STI; }
+
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_BoundaryAlign;
}
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 898ca47b13b8..632a7d8f820e 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -154,9 +154,14 @@ public:
/// Given an instruction tries to get the address of a memory operand. Returns
/// the address on success.
- virtual Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
- uint64_t Addr,
- uint64_t Size) const;
+ virtual Optional<uint64_t>
+ evaluateMemoryOperandAddress(const MCInst &Inst, const MCSubtargetInfo *STI,
+ uint64_t Addr, uint64_t Size) const;
+
+ /// Given an instruction with a memory operand that could require relocation,
+ /// returns the offset within the instruction of that relocation.
+ virtual Optional<uint64_t>
+ getMemoryOperandRelocationOffset(const MCInst &Inst, uint64_t Size) const;
/// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
virtual std::vector<std::pair<uint64_t, uint64_t>>
diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h
index 0e6b677098e8..e8ffd29170e6 100644
--- a/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/llvm/include/llvm/MC/MCInstrDesc.h
@@ -76,7 +76,7 @@ enum OperandType {
OPERAND_FIRST_TARGET = 13,
};
-}
+} // namespace MCOI
/// This holds information about one operand of a machine instruction,
/// indicating the register class for register operands, etc.
@@ -185,7 +185,7 @@ enum Flag {
VariadicOpsAreDefs,
Authenticated,
};
-}
+} // namespace MCID
/// Describe properties that are true of each instruction in the target
/// description file. This captures information about side effects, register
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 8ae86ef2a574..ba7450ac64f1 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -225,10 +225,13 @@ protected:
// XCOFF specific sections
MCSection *TOCBaseSection = nullptr;
+ MCSection *ReadOnly8Section = nullptr;
+ MCSection *ReadOnly16Section = nullptr;
public:
void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
bool LargeCodeModel = false);
+ virtual ~MCObjectFileInfo();
MCContext &getContext() const { return *Ctx; }
bool getSupportsWeakOmittedEHFrame() const {
@@ -251,6 +254,7 @@ public:
return CompactUnwindDwarfEHFrameOnly;
}
+ virtual unsigned getTextSectionAlignment() const { return 4; }
MCSection *getTextSection() const { return TextSection; }
MCSection *getDataSection() const { return DataSection; }
MCSection *getBSSSection() const { return BSSSection; }
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index dcdee2b5774b..9d6416e4a18d 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -137,7 +137,7 @@ public:
void emitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0) override;
- void emitCodeAlignment(unsigned ByteAlignment,
+ void emitCodeAlignment(unsigned ByteAlignment, const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0) override;
void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
SMLoc Loc) override;
@@ -181,8 +181,8 @@ public:
SMLoc Loc = SMLoc()) override;
void emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr,
SMLoc Loc = SMLoc()) override;
- void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
- SMLoc Loc) override;
+ void emitNops(int64_t NumBytes, int64_t ControlledNopLength, SMLoc Loc,
+ const MCSubtargetInfo &STI) override;
void emitFileDirective(StringRef Filename) override;
void emitFileDirective(StringRef Filename, StringRef CompilerVerion,
StringRef TimeStamp, StringRef Description) override;
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index 1c6926b9a9e6..abc9705f0851 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -44,17 +44,26 @@
#ifndef LLVM_MC_MCPSEUDOPROBE_H
#define LLVM_MC_MCPSEUDOPROBE_H
-#include "llvm/ADT/MapVector.h"
-#include "llvm/MC/MCSection.h"
-#include <functional>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/PseudoProbe.h"
+#include "llvm/Support/ErrorOr.h"
+#include <list>
#include <map>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
#include <vector>
namespace llvm {
+class MCSection;
class MCStreamer;
class MCSymbol;
class MCObjectStreamer;
+class raw_ostream;
enum class MCPseudoProbeFlag {
// If set, indicates that the probe is encoded as an address delta
@@ -62,69 +71,211 @@ enum class MCPseudoProbeFlag {
AddressDelta = 0x1,
};
+// Function descriptor decoded from .pseudo_probe_desc section
+struct MCPseudoProbeFuncDesc {
+ uint64_t FuncGUID = 0;
+ uint64_t FuncHash = 0;
+ std::string FuncName;
+
+ MCPseudoProbeFuncDesc(uint64_t GUID, uint64_t Hash, StringRef Name)
+ : FuncGUID(GUID), FuncHash(Hash), FuncName(Name){};
+
+ void print(raw_ostream &OS);
+};
+
+class MCPseudoProbe;
+class MCDecodedPseudoProbe;
+
+// An inline frame has the form <Guid, ProbeID>
+using InlineSite = std::tuple<uint64_t, uint32_t>;
+using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+// GUID to PseudoProbeFuncDesc map
+using GUIDProbeFunctionMap =
+ std::unordered_map<uint64_t, MCPseudoProbeFuncDesc>;
+// Address to pseudo probes map.
+using AddressProbesMap =
+ std::unordered_map<uint64_t, std::list<MCDecodedPseudoProbe>>;
+
+class MCPseudoProbeInlineTree;
+class MCDecodedPseudoProbeInlineTree;
+
+class MCPseudoProbeBase {
+protected:
+ uint64_t Guid;
+ uint64_t Index;
+ uint8_t Attributes;
+ uint8_t Type;
+ // The value should be equal to PseudoProbeReservedId::Last + 1 which is
+ // defined in SampleProfileProbe.h. The header file is not included here to
+ // reduce the dependency from MC to IPO.
+ const static uint32_t PseudoProbeFirstId = 1;
+
+public:
+ MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T)
+ : Guid(G), Index(I), Attributes(At), Type(T) {}
+
+ bool isEntry() const { return Index == PseudoProbeFirstId; }
+
+ uint64_t getGuid() const { return Guid; }
+
+ uint64_t getIndex() const { return Index; }
+
+ uint8_t getAttributes() const { return Attributes; }
+
+ uint8_t getType() const { return Type; }
+
+ bool isBlock() const {
+ return Type == static_cast<uint8_t>(PseudoProbeType::Block);
+ }
+
+ bool isIndirectCall() const {
+ return Type == static_cast<uint8_t>(PseudoProbeType::IndirectCall);
+ }
+
+ bool isDirectCall() const {
+ return Type == static_cast<uint8_t>(PseudoProbeType::DirectCall);
+ }
+
+ bool isCall() const { return isIndirectCall() || isDirectCall(); }
+
+ void setAttributes(uint8_t Attr) { Attributes = Attr; }
+};
+
/// Instances of this class represent a pseudo probe instance for a pseudo probe
/// table entry, which is created during a machine instruction is assembled and
/// uses an address from a temporary label created at the current address in the
/// current section.
-class MCPseudoProbe {
+class MCPseudoProbe : public MCPseudoProbeBase {
MCSymbol *Label;
- uint64_t Guid;
- uint64_t Index;
- uint8_t Type;
- uint8_t Attributes;
public:
MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attributes)
- : Label(Label), Guid(Guid), Index(Index), Type(Type),
- Attributes(Attributes) {
+ : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) {
assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
assert(Attributes <= 0xFF &&
"Probe attributes too big to encode, exceeding 2^16");
}
MCSymbol *getLabel() const { return Label; }
+ void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+};
- uint64_t getGuid() const { return Guid; }
+// Represents a callsite with caller function name and probe id
+using MCPseduoProbeFrameLocation = std::pair<StringRef, uint32_t>;
- uint64_t getIndex() const { return Index; }
+class MCDecodedPseudoProbe : public MCPseudoProbeBase {
+ uint64_t Address;
+ MCDecodedPseudoProbeInlineTree *InlineTree;
- uint8_t getType() const { return Type; }
+public:
+ MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
+ uint8_t At, MCDecodedPseudoProbeInlineTree *Tree)
+ : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad),
+ InlineTree(Tree){};
- uint8_t getAttributes() const { return Attributes; }
+ uint64_t getAddress() const { return Address; }
- void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *LastProbe) const;
+ void setAddress(uint64_t Addr) { Address = Addr; }
+
+ MCDecodedPseudoProbeInlineTree *getInlineTreeNode() const {
+ return InlineTree;
+ }
+
+ // Get the inlined context by traversing current inline tree backwards,
+ // each tree node has its InlineSite which is taken as the context.
+ // \p ContextStack is populated in root to leaf order
+ void
+ getInlineContext(SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
+ const GUIDProbeFunctionMap &GUID2FuncMAP) const;
+
+ // Helper function to get the string from context stack
+ std::string
+ getInlineContextStr(const GUIDProbeFunctionMap &GUID2FuncMAP) const;
+
+ // Print pseudo probe while disassembling
+ void print(raw_ostream &OS, const GUIDProbeFunctionMap &GUID2FuncMAP,
+ bool ShowName) const;
};
-// An inline frame has the form <Guid, ProbeID>
-using InlineSite = std::tuple<uint64_t, uint32_t>;
-using MCPseudoProbeInlineStack = SmallVector<InlineSite, 8>;
+template <typename ProbeType, typename DerivedProbeInlineTreeType>
+class MCPseudoProbeInlineTreeBase {
+ struct InlineSiteHash {
+ uint64_t operator()(const InlineSite &Site) const {
+ return std::get<0>(Site) ^ std::get<1>(Site);
+ }
+ };
-// A Tri-tree based data structure to group probes by inline stack.
-// A tree is allocated for a standalone .text section. A fake
-// instance is created as the root of a tree.
-// A real instance of this class is created for each function, either an
-// unlined function that has code in .text section or an inlined function.
-class MCPseudoProbeInlineTree {
- uint64_t Guid;
+protected:
+ // Track children (e.g. inlinees) of current context
+ using InlinedProbeTreeMap = std::unordered_map<
+ InlineSite, std::unique_ptr<DerivedProbeInlineTreeType>, InlineSiteHash>;
+ InlinedProbeTreeMap Children;
// Set of probes that come with the function.
- std::vector<MCPseudoProbe> Probes;
- // Use std::map for a deterministic output.
- std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
+ std::vector<ProbeType> Probes;
+ MCPseudoProbeInlineTreeBase() {
+ static_assert(std::is_base_of<MCPseudoProbeInlineTreeBase,
+ DerivedProbeInlineTreeType>::value,
+ "DerivedProbeInlineTreeType must be subclass of "
+ "MCPseudoProbeInlineTreeBase");
+ }
+
+public:
+ uint64_t Guid = 0;
// Root node has a GUID 0.
- bool isRoot() { return Guid == 0; }
- MCPseudoProbeInlineTree *getOrAddNode(InlineSite Site);
+ bool isRoot() const { return Guid == 0; }
+ InlinedProbeTreeMap &getChildren() { return Children; }
+ const InlinedProbeTreeMap &getChildren() const { return Children; }
+ std::vector<ProbeType> &getProbes() { return Probes; }
+ void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
+ // Caller node of the inline site
+ MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent;
+ DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
+ auto Ret = Children.emplace(
+ Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
+ Ret.first->second->Parent = this;
+ return Ret.first->second.get();
+ };
+};
+// A Tri-tree based data structure to group probes by inline stack.
+// A tree is allocated for a standalone .text section. A fake
+// instance is created as the root of a tree.
+// A real instance of this class is created for each function, either a
+// not inlined function that has code in .text section or an inlined function.
+class MCPseudoProbeInlineTree
+ : public MCPseudoProbeInlineTreeBase<MCPseudoProbe,
+ MCPseudoProbeInlineTree> {
public:
MCPseudoProbeInlineTree() = default;
- MCPseudoProbeInlineTree(uint64_t Guid) : Guid(Guid) {}
- ~MCPseudoProbeInlineTree();
+ MCPseudoProbeInlineTree(uint64_t Guid) { this->Guid = Guid; }
+ MCPseudoProbeInlineTree(const InlineSite &Site) {
+ this->Guid = std::get<0>(Site);
+ }
+
+ // MCPseudoProbeInlineTree method based on Inlinees
void addPseudoProbe(const MCPseudoProbe &Probe,
const MCPseudoProbeInlineStack &InlineStack);
void emit(MCObjectStreamer *MCOS, const MCPseudoProbe *&LastProbe);
};
+// inline tree node for the decoded pseudo probe
+class MCDecodedPseudoProbeInlineTree
+ : public MCPseudoProbeInlineTreeBase<MCDecodedPseudoProbe *,
+ MCDecodedPseudoProbeInlineTree> {
+public:
+ InlineSite ISite;
+ // Used for decoding
+ uint32_t ChildrenToProcess = 0;
+
+ MCDecodedPseudoProbeInlineTree(){};
+ MCDecodedPseudoProbeInlineTree(const InlineSite &Site) : ISite(Site){};
+
+ // Return false if it's a dummy inline site
+ bool hasInlineSite() const { return std::get<0>(ISite) != 0; }
+};
+
/// Instances of this class represent the pseudo probes inserted into a compile
/// unit.
class MCPseudoProbeSection {
@@ -172,6 +323,83 @@ public:
static int DdgPrintIndent;
#endif
};
+
+class MCPseudoProbeDecoder {
+ // GUID to PseudoProbeFuncDesc map.
+ GUIDProbeFunctionMap GUID2FuncDescMap;
+
+ // Address to probes map.
+ AddressProbesMap Address2ProbesMap;
+
+ // The dummy root of the inline trie, all the outlined function will directly
+ // be the children of the dummy root, all the inlined function will be the
+ // children of its inlineer. So the relation would be like:
+ // DummyRoot --> OutlinedFunc --> InlinedFunc1 --> InlinedFunc2
+ MCDecodedPseudoProbeInlineTree DummyInlineRoot;
+
+ /// Points to the current location in the buffer.
+ const uint8_t *Data = nullptr;
+
+ /// Points to the end of the buffer.
+ const uint8_t *End = nullptr;
+
+ // Decoding helper function
+ template <typename T> ErrorOr<T> readUnencodedNumber();
+ template <typename T> ErrorOr<T> readUnsignedNumber();
+ template <typename T> ErrorOr<T> readSignedNumber();
+ ErrorOr<StringRef> readString(uint32_t Size);
+
+public:
+ // Decode pseudo_probe_desc section to build GUID to PseudoProbeFuncDesc map.
+ bool buildGUID2FuncDescMap(const uint8_t *Start, std::size_t Size);
+
+ // Decode pseudo_probe section to build address to probes map.
+ bool buildAddress2ProbeMap(const uint8_t *Start, std::size_t Size);
+
+ // Print pseudo_probe_desc section info
+ void printGUID2FuncDescMap(raw_ostream &OS);
+
+ // Print pseudo_probe section info, used along with show-disassembly
+ void printProbeForAddress(raw_ostream &OS, uint64_t Address);
+
+ // do printProbeForAddress for all addresses
+ void printProbesForAllAddresses(raw_ostream &OS);
+
+ // Look up the probe of a call for the input address
+ const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const;
+
+ const MCPseudoProbeFuncDesc *getFuncDescForGUID(uint64_t GUID) const;
+
+ // Helper function to populate one probe's inline stack into
+ // \p InlineContextStack.
+ // Current leaf location info will be added if IncludeLeaf is true
+ // Example:
+ // Current probe(bar:3) inlined at foo:2 then inlined at main:1
+ // IncludeLeaf = true, Output: [main:1, foo:2, bar:3]
+ // IncludeLeaf = false, Output: [main:1, foo:2]
+ void getInlineContextForProbe(
+ const MCDecodedPseudoProbe *Probe,
+ SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
+ bool IncludeLeaf) const;
+
+ const AddressProbesMap &getAddress2ProbesMap() const {
+ return Address2ProbesMap;
+ }
+
+ AddressProbesMap &getAddress2ProbesMap() { return Address2ProbesMap; }
+
+ const GUIDProbeFunctionMap &getGUID2FuncDescMap() const {
+ return GUID2FuncDescMap;
+ }
+
+ const MCPseudoProbeFuncDesc *
+ getInlinerDescForProbe(const MCDecodedPseudoProbe *Probe) const;
+
+ const MCDecodedPseudoProbeInlineTree &getDummyInlineRoot() const {
+ return DummyInlineRoot;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_MC_MCPSEUDOPROBE_H
diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h
index 72507b7d8ee4..1e8c747785eb 100644
--- a/llvm/include/llvm/MC/MCRegister.h
+++ b/llvm/include/llvm/MC/MCRegister.h
@@ -10,6 +10,7 @@
#define LLVM_MC_MCREGISTER_H
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
#include <cassert>
#include <limits>
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index acfbfd387ff3..6dffc158af50 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -14,7 +14,6 @@
#ifndef LLVM_MC_MCSCHEDULE_H
#define LLVM_MC_MCSCHEDULE_H
-#include "llvm/ADT/Optional.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/DataTypes.h"
#include <cassert>
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index fd326ff18712..e00f50f617fa 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -123,6 +123,8 @@ public:
/// This is used to emit bytes in \p Data as sequence of .byte directives.
virtual void emitRawBytes(StringRef Data);
+ virtual void emitConstantPools();
+
virtual void finish();
};
@@ -165,7 +167,7 @@ public:
virtual void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value);
- void finish() override;
+ void emitConstantPools() override;
/// Reset any state between object emissions, i.e. the equivalent of
/// MCStreamer's reset method.
@@ -445,7 +447,7 @@ public:
}
/// Create the default sections and set the initial one.
- virtual void InitSections(bool NoExecStack);
+ virtual void initSections(bool NoExecStack, const MCSubtargetInfo &STI);
MCSymbol *endSection(MCSection *Section);
@@ -797,7 +799,7 @@ public:
SMLoc Loc = SMLoc());
virtual void emitNops(int64_t NumBytes, int64_t ControlledNopLength,
- SMLoc Loc);
+ SMLoc Loc, const MCSubtargetInfo& STI);
/// Emit NumBytes worth of zeros.
/// This function properly handles data in virtual sections.
@@ -831,10 +833,12 @@ public:
///
/// \param ByteAlignment - The alignment to reach. This must be a power of
/// two on some targets.
+ /// \param STI - The MCSubtargetInfo in operation when padding is emitted.
/// \param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If
/// the alignment cannot be reached in this many bytes, no bytes are
/// emitted.
virtual void emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0);
/// Emit some number of copies of \p Value until the byte offset \p
diff --git a/llvm/include/llvm/MC/MCSymbolWasm.h b/llvm/include/llvm/MC/MCSymbolWasm.h
index 852ab678e616..5a4852e0e895 100644
--- a/llvm/include/llvm/MC/MCSymbolWasm.h
+++ b/llvm/include/llvm/MC/MCSymbolWasm.h
@@ -27,7 +27,6 @@ class MCSymbolWasm : public MCSymbol {
wasm::WasmSignature *Signature = nullptr;
Optional<wasm::WasmGlobalType> GlobalType;
Optional<wasm::WasmTableType> TableType;
- Optional<wasm::WasmTagType> TagType;
/// An expression describing how to calculate the size of a symbol. If a
/// symbol has no size this field will be NULL.
@@ -67,6 +66,11 @@ public:
modifyFlags(wasm::WASM_SYMBOL_NO_STRIP, wasm::WASM_SYMBOL_NO_STRIP);
}
+ bool isTLS() const { return getFlags() & wasm::WASM_SYMBOL_TLS; }
+ void setTLS() const {
+ modifyFlags(wasm::WASM_SYMBOL_TLS, wasm::WASM_SYMBOL_TLS);
+ }
+
bool isWeak() const { return IsWeak; }
void setWeak(bool isWeak) { IsWeak = isWeak; }
@@ -142,12 +146,6 @@ public:
wasm::WasmLimits Limits = {wasm::WASM_LIMITS_FLAG_NONE, 0, 0};
setTableType({uint8_t(VT), Limits});
}
-
- const wasm::WasmTagType &getTagType() const {
- assert(TagType.hasValue());
- return TagType.getValue();
- }
- void setTagType(wasm::WasmTagType ET) { TagType = ET; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCWasmStreamer.h b/llvm/include/llvm/MC/MCWasmStreamer.h
index 6651f071f799..818f59e5ab3e 100644
--- a/llvm/include/llvm/MC/MCWasmStreamer.h
+++ b/llvm/include/llvm/MC/MCWasmStreamer.h
@@ -41,6 +41,9 @@ public:
/// @{
void changeSection(MCSection *Section, const MCExpr *Subsection) override;
+ void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
+ void emitLabelAtPos(MCSymbol *Symbol, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) override;
void emitAssemblerFlag(MCAssemblerFlag Flag) override;
void emitThumbFunc(MCSymbol *Func) override;
void emitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
@@ -68,6 +71,8 @@ private:
void emitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &) override;
void emitInstToData(const MCInst &Inst, const MCSubtargetInfo &) override;
+ void fixSymbolsInTLSFixups(const MCExpr *expr);
+
/// Merge the content of the fragment \p EF into the fragment \p DF.
void mergeFragment(MCDataFragment *, MCDataFragment *);
diff --git a/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index 53b2ef0bd96e..af1ed6faf753 100644
--- a/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -39,7 +39,7 @@ public:
/// \name MCStreamer interface
/// \{
- void InitSections(bool NoExecStack) override;
+ void initSections(bool NoExecStack, const MCSubtargetInfo &STI) override;
void emitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void emitAssemblerFlag(MCAssemblerFlag Flag) override;
void emitThumbFunc(MCSymbol *Func) override;
diff --git a/llvm/include/llvm/Support/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index e661ae26cb4e..da9a9269edbf 100644
--- a/llvm/include/llvm/Support/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -1,4 +1,4 @@
-//===- Support/TargetRegistry.h - Target Registration -----------*- C++ -*-===//
+//===- MC/TargetRegistry.h - Target Registration ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,8 +15,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_TARGETREGISTRY_H
-#define LLVM_SUPPORT_TARGETREGISTRY_H
+#ifndef LLVM_MC_TARGETREGISTRY_H
+#define LLVM_MC_TARGETREGISTRY_H
#include "llvm-c/DisassemblerTypes.h"
#include "llvm/ADT/Optional.h"
@@ -59,6 +59,11 @@ class raw_ostream;
class raw_pwrite_stream;
class TargetMachine;
class TargetOptions;
+namespace mca {
+class CustomBehaviour;
+class InstrPostProcess;
+class SourceMgr;
+} // namespace mca
MCStreamer *createNullStreamer(MCContext &Ctx);
// Takes ownership of \p TAB and \p CE.
@@ -114,6 +119,13 @@ MCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
void *DisInfo, MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &&RelInfo);
+mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII);
+
+mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII);
+
/// Target - Wrapper for Target specific information.
///
/// For registration purposes, this is a POD type so that targets can be
@@ -206,6 +218,15 @@ public:
LLVMSymbolLookupCallback SymbolLookUp, void *DisInfo, MCContext *Ctx,
std::unique_ptr<MCRelocationInfo> &&RelInfo);
+ using CustomBehaviourCtorTy =
+ mca::CustomBehaviour *(*)(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII);
+
+ using InstrPostProcessCtorTy =
+ mca::InstrPostProcess *(*)(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII);
+
private:
/// Next - The next registered target in the linked list, maintained by the
/// TargetRegistry.
@@ -305,6 +326,14 @@ private:
/// MCSymbolizer, if registered (default = llvm::createMCSymbolizer)
MCSymbolizerCtorTy MCSymbolizerCtorFn = nullptr;
+ /// CustomBehaviourCtorFn - Construction function for this target's
+ /// CustomBehaviour, if registered (default = nullptr).
+ CustomBehaviourCtorTy CustomBehaviourCtorFn = nullptr;
+
+ /// InstrPostProcessCtorFn - Construction function for this target's
+ /// InstrPostProcess, if registered (default = nullptr).
+ InstrPostProcessCtorTy InstrPostProcessCtorFn = nullptr;
+
public:
Target() = default;
@@ -623,6 +652,25 @@ public:
std::move(RelInfo));
}
+ /// createCustomBehaviour - Create a target specific CustomBehaviour.
+ /// This class is used by llvm-mca and requires backend functionality.
+ mca::CustomBehaviour *createCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII) const {
+ if (CustomBehaviourCtorFn)
+ return CustomBehaviourCtorFn(STI, SrcMgr, MCII);
+ return nullptr;
+ }
+
+ /// createInstrPostProcess - Create a target specific InstrPostProcess.
+ /// This class is used by llvm-mca and requires backend functionality.
+ mca::InstrPostProcess *createInstrPostProcess(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII) const {
+ if (InstrPostProcessCtorFn)
+ return InstrPostProcessCtorFn(STI, MCII);
+ return nullptr;
+ }
+
/// @}
};
@@ -959,6 +1007,34 @@ struct TargetRegistry {
T.MCSymbolizerCtorFn = Fn;
}
+ /// RegisterCustomBehaviour - Register a CustomBehaviour
+ /// implementation for the given target.
+ ///
+ /// Clients are responsible for ensuring that registration doesn't occur
+ /// while another thread is attempting to access the registry. Typically
+ /// this is done by initializing all targets at program startup.
+ ///
+ /// @param T - The target being registered.
+ /// @param Fn - A function to construct a CustomBehaviour for the target.
+ static void RegisterCustomBehaviour(Target &T,
+ Target::CustomBehaviourCtorTy Fn) {
+ T.CustomBehaviourCtorFn = Fn;
+ }
+
+ /// RegisterInstrPostProcess - Register an InstrPostProcess
+ /// implementation for the given target.
+ ///
+ /// Clients are responsible for ensuring that registration doesn't occur
+ /// while another thread is attempting to access the registry. Typically
+ /// this is done by initializing all targets at program startup.
+ ///
+ /// @param T - The target being registered.
+ /// @param Fn - A function to construct an InstrPostProcess for the target.
+ static void RegisterInstrPostProcess(Target &T,
+ Target::InstrPostProcessCtorTy Fn) {
+ T.InstrPostProcessCtorFn = Fn;
+ }
+
/// @}
};
@@ -1294,4 +1370,4 @@ private:
} // end namespace llvm
-#endif // LLVM_SUPPORT_TARGETREGISTRY_H
+#endif // LLVM_MC_TARGETREGISTRY_H
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index 655a9c49c599..395b07cf722b 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -22,6 +22,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/SourceMgr.h"
+#include "llvm/MCA/View.h"
namespace llvm {
namespace mca {
@@ -55,29 +56,53 @@ public:
class CustomBehaviour {
protected:
const MCSubtargetInfo &STI;
- const SourceMgr &SrcMgr;
+ const mca::SourceMgr &SrcMgr;
const MCInstrInfo &MCII;
public:
- CustomBehaviour(const MCSubtargetInfo &STI, const SourceMgr &SrcMgr,
+ CustomBehaviour(const MCSubtargetInfo &STI, const mca::SourceMgr &SrcMgr,
const MCInstrInfo &MCII)
: STI(STI), SrcMgr(SrcMgr), MCII(MCII) {}
virtual ~CustomBehaviour();
- // Before the llvm-mca pipeline dispatches an instruction, it first checks
- // for any register or resource dependencies / hazards. If it doesn't find
- // any, this method will be invoked to determine if there are any custom
- // hazards that the instruction needs to wait for.
- // The return value of this method is the number of cycles that the
- // instruction needs to wait for.
- // It's safe to underestimate the number of cycles to wait for since these
- // checks will be invoked again before the intruction gets dispatched.
- // However, it's not safe (accurate) to overestimate the number of cycles
- // to wait for since the instruction will wait for AT LEAST that number of
- // cycles before attempting to be dispatched again.
+ /// Before the llvm-mca pipeline dispatches an instruction, it first checks
+ /// for any register or resource dependencies / hazards. If it doesn't find
+ /// any, this method will be invoked to determine if there are any custom
+ /// hazards that the instruction needs to wait for.
+ /// The return value of this method is the number of cycles that the
+ /// instruction needs to wait for.
+ /// It's safe to underestimate the number of cycles to wait for since these
+ /// checks will be invoked again before the intruction gets dispatched.
+ /// However, it's not safe (accurate) to overestimate the number of cycles
+ /// to wait for since the instruction will wait for AT LEAST that number of
+ /// cycles before attempting to be dispatched again.
virtual unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst,
const InstRef &IR);
+
+ // Functions that target CBs can override to return a list of
+ // target specific Views that need to live within /lib/Target/ so that
+ // they can benefit from the target CB or from backend functionality that is
+ // not already exposed through MC-layer classes. Keep in mind that how this
+ // function is used is that the function is called within llvm-mca.cpp and
+ // then each unique_ptr<View> is passed into the PipelinePrinter::addView()
+ // function. This function will then std::move the View into its own vector of
+ // Views. So any CB that overrides this function needs to make sure that they
+ // are not relying on the current address or reference of the View
+ // unique_ptrs. If you do need the CB and View to be able to communicate with
+ // each other, consider giving the View a reference or pointer to the CB when
+ // the View is constructed. Then the View can query the CB for information
+ // when it needs it.
+ /// Return a vector of Views that will be added before all other Views.
+ virtual std::vector<std::unique_ptr<View>>
+ getStartViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts);
+ /// Return a vector of Views that will be added after the InstructionInfoView.
+ virtual std::vector<std::unique_ptr<View>>
+ getPostInstrInfoViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts);
+ /// Return a vector of Views that will be added after all other Views.
+ virtual std::vector<std::unique_ptr<View>>
+ getEndViews(llvm::MCInstPrinter &IP, llvm::ArrayRef<llvm::MCInst> Insts);
};
} // namespace mca
diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h
index 988cddcbe013..3eb32186d551 100644
--- a/llvm/include/llvm/MCA/Instruction.h
+++ b/llvm/include/llvm/MCA/Instruction.h
@@ -46,7 +46,7 @@ class MCAOperand {
kSFPImmediate, ///< Single-floating-point immediate operand.
kDFPImmediate, ///< Double-Floating-point immediate operand.
};
- MCAOperandType Kind = kInvalid;
+ MCAOperandType Kind;
union {
unsigned RegVal;
@@ -62,7 +62,7 @@ class MCAOperand {
unsigned Index;
public:
- MCAOperand() : FPImmVal(0) {}
+ MCAOperand() : Kind(kInvalid), FPImmVal(), Index() {}
bool isValid() const { return Kind != kInvalid; }
bool isReg() const { return Kind == kRegister; }
diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
index b7006e761647..42f386a13d85 100644
--- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -21,6 +21,7 @@
namespace llvm {
namespace mca {
+class LSUnit;
class RegisterFile;
struct StallInfo {
@@ -29,6 +30,7 @@ struct StallInfo {
REGISTER_DEPS,
DISPATCH,
DELAY,
+ LOAD_STORE,
CUSTOM_STALL
};
@@ -54,6 +56,7 @@ class InOrderIssueStage final : public Stage {
RegisterFile &PRF;
ResourceManager RM;
CustomBehaviour &CB;
+ LSUnit &LSU;
/// Instructions that were issued, but not executed yet.
SmallVector<InstRef, 4> IssuedInst;
@@ -110,7 +113,7 @@ class InOrderIssueStage final : public Stage {
public:
InOrderIssueStage(const MCSubtargetInfo &STI, RegisterFile &PRF,
- CustomBehaviour &CB);
+ CustomBehaviour &CB, LSUnit &LSU);
unsigned getIssueWidth() const;
bool isAvailable(const InstRef &) const override;
diff --git a/llvm/tools/llvm-mca/Views/View.h b/llvm/include/llvm/MCA/View.h
index c604733d4ec9..ff8fc1ceb3f1 100644
--- a/llvm/tools/llvm-mca/Views/View.h
+++ b/llvm/include/llvm/MCA/View.h
@@ -12,8 +12,8 @@
///
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_MCA_VIEW_H
-#define LLVM_TOOLS_LLVM_MCA_VIEW_H
+#ifndef LLVM_MCA_VIEW_H
+#define LLVM_MCA_VIEW_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MCA/HWEventListener.h"
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index c5f966891bd0..37f23c435ae1 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -81,10 +81,6 @@ getElfArchType(StringRef Object) {
(uint8_t)Object[ELF::EI_DATA]);
}
-static inline Error createError(const Twine &Err) {
- return make_error<StringError>(Err, object_error::parse_failed);
-}
-
enum PPCInstrMasks : uint64_t {
PADDI_R12_NO_DISP = 0x0610000039800000,
ADDIS_R12_TO_R2_NO_DISP = 0x3D820000,
@@ -392,8 +388,7 @@ public:
Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr &Sec) const;
Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const;
Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const;
- Expected<std::vector<Elf_BBAddrMap>>
- decodeBBAddrMap(const Elf_Shdr &Sec) const;
+ Expected<std::vector<BBAddrMap>> decodeBBAddrMap(const Elf_Shdr &Sec) const;
};
using ELF32LEFile = ELFFile<ELF32LE>;
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index c87a09f86fae..716b94d92d03 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -96,6 +96,10 @@ public:
std::vector<std::pair<Optional<DataRefImpl>, uint64_t>>
getPltAddresses() const;
+
+ /// Returns a vector containing a symbol version for each dynamic symbol.
+ /// Returns an empty vector if version sections do not exist.
+ Expected<std::vector<VersionEntry>> readDynsymVersions() const;
};
class ELFSectionRef : public SectionRef {
@@ -407,7 +411,8 @@ public:
const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
auto RelSecOrErr = EF.getSection(Rel.d.a);
if (!RelSecOrErr)
- report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message());
+ report_fatal_error(
+ Twine(errorToErrorCode(RelSecOrErr.takeError()).message()));
return *RelSecOrErr;
}
@@ -728,7 +733,8 @@ Expected<uint32_t> ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
} else if (EF.getHeader().e_machine == ELF::EM_ARM) {
if (Expected<StringRef> NameOrErr = getSymbolName(Sym)) {
StringRef Name = *NameOrErr;
- if (Name.startswith("$d") || Name.startswith("$t") ||
+ // TODO Investigate why empty name symbols need to be marked.
+ if (Name.empty() || Name.startswith("$d") || Name.startswith("$t") ||
Name.startswith("$a"))
Result |= SymbolRef::SF_FormatSpecific;
} else {
@@ -966,7 +972,8 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const {
// Error check sh_link here so that getRelocationSymbol can just use it.
auto SymSecOrErr = EF.getSection(RelSec->sh_link);
if (!SymSecOrErr)
- report_fatal_error(errorToErrorCode(SymSecOrErr.takeError()).message());
+ report_fatal_error(
+ Twine(errorToErrorCode(SymSecOrErr.takeError()).message()));
RelData.d.b += S->sh_size / S->sh_entsize;
return relocation_iterator(RelocationRef(RelData, this));
@@ -1055,7 +1062,7 @@ ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
assert(getRelSection(Rel)->sh_type == ELF::SHT_REL);
auto Ret = EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
if (!Ret)
- report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+ report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
return *Ret;
}
@@ -1065,7 +1072,7 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
assert(getRelSection(Rela)->sh_type == ELF::SHT_RELA);
auto Ret = EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
if (!Ret)
- report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+ report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
return *Ret;
}
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 54ebd751d8d2..e59a63d93989 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -44,7 +44,6 @@ template <class ELFT> struct Elf_Nhdr_Impl;
template <class ELFT> class Elf_Note_Impl;
template <class ELFT> class Elf_Note_Iterator_Impl;
template <class ELFT> struct Elf_CGProfile_Impl;
-template <class ELFT> struct Elf_BBAddrMap_Impl;
template <endianness E, bool Is64> struct ELFType {
private:
@@ -76,7 +75,6 @@ public:
using Note = Elf_Note_Impl<ELFType<E, Is64>>;
using NoteIterator = Elf_Note_Iterator_Impl<ELFType<E, Is64>>;
using CGProfile = Elf_CGProfile_Impl<ELFType<E, Is64>>;
- using BBAddrMap = Elf_BBAddrMap_Impl<ELFType<E, Is64>>;
using DynRange = ArrayRef<Dyn>;
using ShdrRange = ArrayRef<Shdr>;
using SymRange = ArrayRef<Sym>;
@@ -131,7 +129,6 @@ using ELF64BE = ELFType<support::big, true>;
using Elf_Note = typename ELFT::Note; \
using Elf_Note_Iterator = typename ELFT::NoteIterator; \
using Elf_CGProfile = typename ELFT::CGProfile; \
- using Elf_BBAddrMap = typename ELFT::BBAddrMap; \
using Elf_Dyn_Range = typename ELFT::DynRange; \
using Elf_Shdr_Range = typename ELFT::ShdrRange; \
using Elf_Sym_Range = typename ELFT::SymRange; \
@@ -797,9 +794,8 @@ template <class ELFT> struct Elf_Mips_ABIFlags {
};
// Struct representing the BBAddrMap for one function.
-template <class ELFT> struct Elf_BBAddrMap_Impl {
- LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
- uintX_t Addr; // Function address
+struct BBAddrMap {
+ uint64_t Addr; // Function address
// Struct representing the BBAddrMap information for one basic block.
struct BBEntry {
uint32_t Offset; // Offset of basic block relative to function start.
diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h
index 07744188444a..1fc1f6603a36 100644
--- a/llvm/include/llvm/Object/Error.h
+++ b/llvm/include/llvm/Object/Error.h
@@ -82,6 +82,10 @@ private:
/// error() function needs to called on the llvm::Error.
Error isNotObjectErrorInvalidFileType(llvm::Error Err);
+inline Error createError(const Twine &Err) {
+ return make_error<StringError>(Err, object_error::parse_failed);
+}
+
} // end namespace object.
} // end namespace llvm.
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index d2ad12e98deb..ca5d63e4074f 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -311,6 +311,9 @@ public:
bool isSectionBitcode(DataRefImpl Sec) const override;
bool isDebugSection(DataRefImpl Sec) const override;
+ /// Return the raw contents of an entire segment.
+ ArrayRef<uint8_t> getSegmentContents(StringRef SegmentName) const;
+
/// When dsymutil generates the companion file, it strips all unnecessary
/// sections (e.g. everything in the _TEXT segment) by omitting their body
/// and setting the offset in their corresponding load command to zero.
diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index 2cea950fcf25..e4802c087b8b 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -9,7 +9,7 @@
// This file declares the WasmObjectFile class, which implements the ObjectFile
// interface for Wasm files.
//
-// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+// See: https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
//
//===----------------------------------------------------------------------===//
@@ -37,15 +37,13 @@ public:
WasmSymbol(const wasm::WasmSymbolInfo &Info,
const wasm::WasmGlobalType *GlobalType,
const wasm::WasmTableType *TableType,
- const wasm::WasmTagType *TagType,
const wasm::WasmSignature *Signature)
: Info(Info), GlobalType(GlobalType), TableType(TableType),
- TagType(TagType), Signature(Signature) {}
+ Signature(Signature) {}
const wasm::WasmSymbolInfo &Info;
const wasm::WasmGlobalType *GlobalType;
const wasm::WasmTableType *TableType;
- const wasm::WasmTagType *TagType;
const wasm::WasmSignature *Signature;
bool isTypeFunction() const {
@@ -138,7 +136,6 @@ public:
return TargetFeatures;
}
ArrayRef<wasm::WasmSignature> types() const { return Signatures; }
- ArrayRef<uint32_t> functionTypes() const { return FunctionTypes; }
ArrayRef<wasm::WasmImport> imports() const { return Imports; }
ArrayRef<wasm::WasmTable> tables() const { return Tables; }
ArrayRef<wasm::WasmLimits> memories() const { return Memories; }
@@ -260,6 +257,7 @@ private:
// Custom section types
Error parseDylinkSection(ReadContext &Ctx);
+ Error parseDylink0Section(ReadContext &Ctx);
Error parseNameSection(ReadContext &Ctx);
Error parseLinkingSection(ReadContext &Ctx);
Error parseLinkingSectionSymtab(ReadContext &Ctx);
@@ -274,7 +272,6 @@ private:
wasm::WasmProducerInfo ProducerInfo;
std::vector<wasm::WasmFeatureEntry> TargetFeatures;
std::vector<wasm::WasmSignature> Signatures;
- std::vector<uint32_t> FunctionTypes;
std::vector<wasm::WasmTable> Tables;
std::vector<wasm::WasmLimits> Memories;
std::vector<wasm::WasmGlobal> Globals;
diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h
index 7d024fbc3eae..94136afc45ea 100644
--- a/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -51,6 +51,101 @@ struct XCOFFFileHeader64 {
support::ubig32_t NumberOfSymTableEntries;
};
+template <typename T> struct XCOFFAuxiliaryHeader {
+ static constexpr uint8_t AuxiHeaderFlagMask = 0xF0;
+ static constexpr uint8_t AuxiHeaderTDataAlignmentMask = 0x0F;
+
+public:
+ uint8_t getFlag() const {
+ return static_cast<const T *>(this)->FlagAndTDataAlignment &
+ AuxiHeaderFlagMask;
+ }
+ uint8_t getTDataAlignment() const {
+ return static_cast<const T *>(this)->FlagAndTDataAlignment &
+ AuxiHeaderTDataAlignmentMask;
+ }
+};
+
+struct XCOFFAuxiliaryHeader32 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
+ support::ubig16_t
+ AuxMagic; ///< If the value of the o_vstamp field is greater than 1, the
+ ///< o_mflags field is reserved for future use and it should
+ ///< contain 0. Otherwise, this field is not used.
+ support::ubig16_t
+ Version; ///< The valid values are 1 and 2. When the o_vstamp field is 2
+ ///< in an XCOFF32 file, the new interpretation of the n_type
+ ///< field in the symbol table entry is used.
+ support::ubig32_t TextSize;
+ support::ubig32_t InitDataSize;
+ support::ubig32_t BssDataSize;
+ support::ubig32_t EntryPointAddr;
+ support::ubig32_t TextStartAddr;
+ support::ubig32_t DataStartAddr;
+ support::ubig32_t TOCAnchorAddr;
+ support::ubig16_t SecNumOfEntryPoint;
+ support::ubig16_t SecNumOfText;
+ support::ubig16_t SecNumOfData;
+ support::ubig16_t SecNumOfTOC;
+ support::ubig16_t SecNumOfLoader;
+ support::ubig16_t SecNumOfBSS;
+ support::ubig16_t MaxAlignOfText;
+ support::ubig16_t MaxAlignOfData;
+ support::ubig16_t ModuleType;
+ uint8_t CpuFlag;
+ uint8_t CpuType;
+ support::ubig32_t MaxStackSize; ///< If the value is 0, the system default
+ ///< maximum stack size is used.
+ support::ubig32_t MaxDataSize; ///< If the value is 0, the system default
+ ///< maximum data size is used.
+ support::ubig32_t
+ ReservedForDebugger; ///< This field should contain 0. When a loaded
+ ///< program is being debugged, the memory image of
+ ///< this field may be modified by a debugger to
+ ///< insert a trap instruction.
+ uint8_t TextPageSize; ///< Specifies the size of pages for the exec text. The
+ ///< default value is 0 (system-selected page size).
+ uint8_t DataPageSize; ///< Specifies the size of pages for the exec data. The
+ ///< default value is 0 (system-selected page size).
+ uint8_t StackPageSize; ///< Specifies the size of pages for the stack. The
+ ///< default value is 0 (system-selected page size).
+ uint8_t FlagAndTDataAlignment;
+ support::ubig16_t SecNumOfTData;
+ support::ubig16_t SecNumOfTBSS;
+};
+
+struct XCOFFAuxiliaryHeader64 : XCOFFAuxiliaryHeader<XCOFFAuxiliaryHeader32> {
+ support::ubig16_t AuxMagic;
+ support::ubig16_t Version;
+ support::ubig32_t ReservedForDebugger;
+ support::ubig64_t TextStartAddr;
+ support::ubig64_t DataStartAddr;
+ support::ubig64_t TOCAnchorAddr;
+ support::ubig16_t SecNumOfEntryPoint;
+ support::ubig16_t SecNumOfText;
+ support::ubig16_t SecNumOfData;
+ support::ubig16_t SecNumOfTOC;
+ support::ubig16_t SecNumOfLoader;
+ support::ubig16_t SecNumOfBSS;
+ support::ubig16_t MaxAlignOfText;
+ support::ubig16_t MaxAlignOfData;
+ support::ubig16_t ModuleType;
+ uint8_t CpuFlag;
+ uint8_t CpuType;
+ uint8_t TextPageSize;
+ uint8_t DataPageSize;
+ uint8_t StackPageSize;
+ uint8_t FlagAndTDataAlignment;
+ support::ubig64_t TextSize;
+ support::ubig64_t InitDataSize;
+ support::ubig64_t BssDataSize;
+ support::ubig64_t EntryPointAddr;
+ support::ubig64_t MaxStackSize;
+ support::ubig64_t MaxDataSize;
+ support::ubig16_t SecNumOfTData;
+ support::ubig16_t SecNumOfTBSS;
+ support::ubig16_t XCOFF64Flag;
+};
+
template <typename T> struct XCOFFSectionHeader {
// Least significant 3 bits are reserved.
static constexpr unsigned SectionFlagsReservedMask = 0x7;
@@ -97,6 +192,31 @@ struct XCOFFSectionHeader64 : XCOFFSectionHeader<XCOFFSectionHeader64> {
char Padding[4];
};
+struct LoaderSectionHeader32 {
+ support::ubig32_t Version;
+ support::ubig32_t NumberOfSymTabEnt;
+ support::ubig32_t NumberOfRelTabEnt;
+ support::ubig32_t LengthOfImpidStrTbl;
+ support::ubig32_t NumberOfImpid;
+ support::big32_t OffsetToImpid;
+ support::ubig32_t LengthOfStrTbl;
+ support::big32_t OffsetToStrTbl;
+};
+
+struct LoaderSectionHeader64 {
+ support::ubig32_t Version;
+ support::ubig32_t NumberOfSymTabEnt;
+ support::ubig32_t NumberOfRelTabEnt;
+ support::ubig32_t LengthOfImpidStrTbl;
+ support::ubig32_t NumberOfImpid;
+ support::ubig32_t LengthOfStrTbl;
+ support::big64_t OffsetToImpid;
+ support::big64_t OffsetToStrTbl;
+ support::big64_t OffsetToSymTbl;
+ char Padding[16];
+ support::big32_t OffsetToRelEnt;
+};
+
struct XCOFFStringTable {
uint32_t Size;
const char *Data;
@@ -228,7 +348,7 @@ struct XCOFFSectAuxEntForStat {
uint8_t Pad[10];
}; // 32-bit XCOFF file only.
-struct XCOFFRelocation32 {
+template <typename AddressType> struct XCOFFRelocation {
// Masks for packing/unpacking the r_rsize field of relocations.
// The msb is used to indicate if the bits being relocated are signed or
@@ -244,7 +364,7 @@ struct XCOFFRelocation32 {
static constexpr uint8_t XR_BIASED_LENGTH_MASK = 0x3f;
public:
- support::ubig32_t VirtualAddress;
+ AddressType VirtualAddress;
support::ubig32_t SymbolIndex;
// Packed field, see XR_* masks for details of packing.
@@ -260,11 +380,18 @@ public:
uint8_t getRelocatedLength() const;
};
+extern template struct XCOFFRelocation<llvm::support::ubig32_t>;
+extern template struct XCOFFRelocation<llvm::support::ubig64_t>;
+
+struct XCOFFRelocation32 : XCOFFRelocation<llvm::support::ubig32_t> {};
+struct XCOFFRelocation64 : XCOFFRelocation<llvm::support::ubig64_t> {};
+
class XCOFFSymbolRef;
class XCOFFObjectFile : public ObjectFile {
private:
const void *FileHeader = nullptr;
+ const void *AuxiliaryHeader = nullptr;
const void *SectionHeaderTable = nullptr;
const void *SymbolTblPtr = nullptr;
@@ -275,6 +402,7 @@ private:
const XCOFFSectionHeader32 *sectionHeaderTable32() const;
const XCOFFSectionHeader64 *sectionHeaderTable64() const;
+ template <typename T> const T *sectionHeaderTable() const;
size_t getFileHeaderSize() const;
size_t getSectionHeaderSize() const;
@@ -283,6 +411,7 @@ private:
const XCOFFSectionHeader64 *toSection64(DataRefImpl Ref) const;
uintptr_t getSectionHeaderTableAddress() const;
uintptr_t getEndOfSymbolTableAddress() const;
+ Expected<uintptr_t> getLoaderSectionAddress() const;
// This returns a pointer to the start of the storage for the name field of
// the 32-bit or 64-bit SectionHeader struct. This string is *not* necessarily
@@ -322,6 +451,7 @@ public:
Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
+ uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
@@ -368,6 +498,9 @@ public:
// Below here is the non-inherited interface.
bool is64Bit() const;
+ const XCOFFAuxiliaryHeader32 *auxiliaryHeader32() const;
+ const XCOFFAuxiliaryHeader64 *auxiliaryHeader64() const;
+
const void *getPointerToSymbolTable() const { return SymbolTblPtr; }
Expected<StringRef> getSymbolSectionName(XCOFFSymbolRef Ref) const;
@@ -398,6 +531,11 @@ public:
uint32_t getNumberOfSymbolTableEntries() const;
uint32_t getSymbolIndex(uintptr_t SymEntPtr) const;
+ uint64_t getSymbolSize(DataRefImpl Symb) const;
+ uintptr_t getSymbolByIndex(uint32_t Idx) const {
+ return reinterpret_cast<uintptr_t>(SymbolTblPtr) +
+ XCOFF::SymbolTableEntrySize * Idx;
+ }
uintptr_t getSymbolEntryAddressByIndex(uint32_t SymbolTableIndex) const;
Expected<StringRef> getSymbolNameByIndex(uint32_t SymbolTableIndex) const;
@@ -415,11 +553,15 @@ public:
void checkSymbolEntryPointer(uintptr_t SymbolEntPtr) const;
// Relocation-related interfaces.
+ template <typename T>
Expected<uint32_t>
- getLogicalNumberOfRelocationEntries(const XCOFFSectionHeader32 &Sec) const;
+ getNumberOfRelocationEntries(const XCOFFSectionHeader<T> &Sec) const;
- Expected<ArrayRef<XCOFFRelocation32>>
- relocations(const XCOFFSectionHeader32 &) const;
+ template <typename Shdr, typename Reloc>
+ Expected<ArrayRef<Reloc>> relocations(const Shdr &Sec) const;
+
+ // Loader section related interfaces.
+ Expected<StringRef> getImportFileTable() const;
// This function returns string table entry.
Expected<StringRef> getStringTableEntry(uint32_t Offset) const;
@@ -572,6 +714,7 @@ class XCOFFTracebackTable {
Optional<uint8_t> ExtensionTable;
XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err);
+
public:
/// Parse an XCOFF Traceback Table from \a Ptr with \a Size bytes.
/// Returns an XCOFFTracebackTable upon successful parsing, otherwise an
diff --git a/llvm/include/llvm/ObjectYAML/MachOYAML.h b/llvm/include/llvm/ObjectYAML/MachOYAML.h
index 5d1d3ee23594..ee89f4eac61f 100644
--- a/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -131,6 +131,7 @@ struct Object {
std::vector<LoadCommand> LoadCommands;
std::vector<Section> Sections;
LinkEditData LinkEdit;
+ Optional<llvm::yaml::BinaryRef> RawLinkEditSegment;
DWARFYAML::Data DWARF;
};
diff --git a/llvm/include/llvm/ObjectYAML/WasmYAML.h b/llvm/include/llvm/ObjectYAML/WasmYAML.h
index 661e06fba8bd..e3a1ba0d58a6 100644
--- a/llvm/include/llvm/ObjectYAML/WasmYAML.h
+++ b/llvm/include/llvm/ObjectYAML/WasmYAML.h
@@ -77,12 +77,6 @@ struct Global {
wasm::WasmInitExpr InitExpr;
};
-struct Tag {
- uint32_t Index;
- uint32_t Attribute;
- uint32_t SigIndex;
-};
-
struct Import {
StringRef Module;
StringRef Field;
@@ -92,7 +86,7 @@ struct Import {
Global GlobalImport;
Table TableImport;
Limits Memory;
- Tag TagImport;
+ uint32_t TagIndex;
};
};
@@ -199,12 +193,23 @@ struct CustomSection : Section {
yaml::BinaryRef Payload;
};
+struct DylinkImportInfo {
+ StringRef Module;
+ StringRef Field;
+ SymbolFlags Flags;
+};
+
+struct DylinkExportInfo {
+ StringRef Name;
+ SymbolFlags Flags;
+};
+
struct DylinkSection : CustomSection {
- DylinkSection() : CustomSection("dylink") {}
+ DylinkSection() : CustomSection("dylink.0") {}
static bool classof(const Section *S) {
auto C = dyn_cast<CustomSection>(S);
- return C && C->Name == "dylink";
+ return C && C->Name == "dylink.0";
}
uint32_t MemorySize;
@@ -212,6 +217,8 @@ struct DylinkSection : CustomSection {
uint32_t TableSize;
uint32_t TableAlignment;
std::vector<StringRef> Needed;
+ std::vector<DylinkImportInfo> ImportInfo;
+ std::vector<DylinkExportInfo> ExportInfo;
};
struct NameSection : CustomSection {
@@ -323,7 +330,7 @@ struct TagSection : Section {
return S->Type == wasm::WASM_SEC_TAG;
}
- std::vector<Tag> Tags;
+ std::vector<uint32_t> TagTypes;
};
struct GlobalSection : Section {
@@ -425,7 +432,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::SymbolInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::InitFunction)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::ComdatEntry)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Comdat)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::Tag)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkImportInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::WasmYAML::DylinkExportInfo)
namespace llvm {
namespace yaml {
@@ -570,8 +578,12 @@ template <> struct ScalarEnumerationTraits<WasmYAML::RelocType> {
static void enumeration(IO &IO, WasmYAML::RelocType &Kind);
};
-template <> struct MappingTraits<WasmYAML::Tag> {
- static void mapping(IO &IO, WasmYAML::Tag &Tag);
+template <> struct MappingTraits<WasmYAML::DylinkImportInfo> {
+ static void mapping(IO &IO, WasmYAML::DylinkImportInfo &Info);
+};
+
+template <> struct MappingTraits<WasmYAML::DylinkExportInfo> {
+ static void mapping(IO &IO, WasmYAML::DylinkExportInfo &Info);
};
} // end namespace yaml
diff --git a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
index 2630175642c4..aa1bc396f134 100644
--- a/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/XCOFFYAML.h
@@ -24,11 +24,43 @@ struct FileHeader {
uint16_t NumberOfSections;
int32_t TimeStamp;
llvm::yaml::Hex64 SymbolTableOffset;
- uint32_t NumberOfSymTableEntries;
+ int32_t NumberOfSymTableEntries;
uint16_t AuxHeaderSize;
llvm::yaml::Hex16 Flags;
};
+struct AuxiliaryHeader {
+ Optional<llvm::yaml::Hex16> Magic;
+ Optional<llvm::yaml::Hex16> Version;
+ Optional<llvm::yaml::Hex64> TextStartAddr;
+ Optional<llvm::yaml::Hex64> DataStartAddr;
+ Optional<llvm::yaml::Hex64> TOCAnchorAddr;
+ Optional<uint16_t> SecNumOfEntryPoint;
+ Optional<uint16_t> SecNumOfText;
+ Optional<uint16_t> SecNumOfData;
+ Optional<uint16_t> SecNumOfTOC;
+ Optional<uint16_t> SecNumOfLoader;
+ Optional<uint16_t> SecNumOfBSS;
+ Optional<llvm::yaml::Hex16> MaxAlignOfText;
+ Optional<llvm::yaml::Hex16> MaxAlignOfData;
+ Optional<llvm::yaml::Hex16> ModuleType;
+ Optional<llvm::yaml::Hex8> CpuFlag;
+ Optional<llvm::yaml::Hex8> CpuType;
+ Optional<llvm::yaml::Hex8> TextPageSize;
+ Optional<llvm::yaml::Hex8> DataPageSize;
+ Optional<llvm::yaml::Hex8> StackPageSize;
+ Optional<llvm::yaml::Hex8> FlagAndTDataAlignment;
+ Optional<llvm::yaml::Hex64> TextSize;
+ Optional<llvm::yaml::Hex64> InitDataSize;
+ Optional<llvm::yaml::Hex64> BssDataSize;
+ Optional<llvm::yaml::Hex64> EntryPointAddr;
+ Optional<llvm::yaml::Hex64> MaxStackSize;
+ Optional<llvm::yaml::Hex64> MaxDataSize;
+ Optional<uint16_t> SecNumOfTData;
+ Optional<uint16_t> SecNumOfTBSS;
+ Optional<llvm::yaml::Hex16> Flag;
+};
+
struct Relocation {
llvm::yaml::Hex64 VirtualAddress;
llvm::yaml::Hex64 SymbolIndex;
@@ -53,16 +85,27 @@ struct Section {
struct Symbol {
StringRef SymbolName;
llvm::yaml::Hex64 Value; // Symbol value; storage class-dependent.
- StringRef SectionName;
+ Optional<StringRef> SectionName;
+ Optional<uint16_t> SectionIndex;
llvm::yaml::Hex16 Type;
XCOFF::StorageClass StorageClass;
uint8_t NumberOfAuxEntries;
};
+struct StringTable {
+ Optional<uint32_t> ContentSize; // The total size of the string table.
+ Optional<uint32_t> Length; // The value of the length field for the first
+ // 4 bytes of the table.
+ Optional<std::vector<StringRef>> Strings;
+ Optional<yaml::BinaryRef> RawContent;
+};
+
struct Object {
FileHeader Header;
+ Optional<AuxiliaryHeader> AuxHeader;
std::vector<Section> Sections;
std::vector<Symbol> Symbols;
+ StringTable StrTbl;
Object();
};
} // namespace XCOFFYAML
@@ -87,6 +130,9 @@ template <> struct MappingTraits<XCOFFYAML::FileHeader> {
static void mapping(IO &IO, XCOFFYAML::FileHeader &H);
};
+template <> struct MappingTraits<XCOFFYAML::AuxiliaryHeader> {
+ static void mapping(IO &IO, XCOFFYAML::AuxiliaryHeader &AuxHdr);
+};
template <> struct MappingTraits<XCOFFYAML::Symbol> {
static void mapping(IO &IO, XCOFFYAML::Symbol &S);
@@ -100,6 +146,10 @@ template <> struct MappingTraits<XCOFFYAML::Section> {
static void mapping(IO &IO, XCOFFYAML::Section &Sec);
};
+template <> struct MappingTraits<XCOFFYAML::StringTable> {
+ static void mapping(IO &IO, XCOFFYAML::StringTable &Str);
+};
+
template <> struct MappingTraits<XCOFFYAML::Object> {
static void mapping(IO &IO, XCOFFYAML::Object &Obj);
};
diff --git a/llvm/include/llvm/Option/Arg.h b/llvm/include/llvm/Option/Arg.h
index 22e2bcf06a6e..4be254ccdab4 100644
--- a/llvm/include/llvm/Option/Arg.h
+++ b/llvm/include/llvm/Option/Arg.h
@@ -118,10 +118,7 @@ public:
const SmallVectorImpl<const char *> &getValues() const { return Values; }
bool containsValue(StringRef Value) const {
- for (unsigned i = 0, e = getNumValues(); i != e; ++i)
- if (Values[i] == Value)
- return true;
- return false;
+ return llvm::is_contained(Values, Value);
}
/// Append the argument onto the given array as strings.
diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td
index 96014b505d0f..9c73f478db5e 100644
--- a/llvm/include/llvm/Option/OptParser.td
+++ b/llvm/include/llvm/Option/OptParser.td
@@ -214,7 +214,7 @@ class MarshallingInfoBitfieldFlag<KeyPathAndMacro kpm, code value>
}
// Implementation detail of BoolOption.
-class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value, code name,
+class MarshallingInfoBooleanFlag<KeyPathAndMacro kpm, code defaultvalue, code value,
code other_value, code other_name>
: MarshallingInfoFlag<kpm, defaultvalue> {
code Normalizer = "makeBooleanOptionNormalizer("#value#", "#other_value#", OPT_"#other_name#")";
diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h
index ca2013ee6f04..07d9870f71b3 100644
--- a/llvm/include/llvm/Option/OptTable.h
+++ b/llvm/include/llvm/Option/OptTable.h
@@ -64,8 +64,8 @@ private:
bool GroupedShortOptions = false;
const char *EnvVar = nullptr;
- unsigned TheInputOptionID = 0;
- unsigned TheUnknownOptionID = 0;
+ unsigned InputOptionID = 0;
+ unsigned UnknownOptionID = 0;
/// The index of the first option which can be parsed (i.e., is not a
/// special option like 'input' or 'unknown', and is not an option group).
@@ -83,7 +83,8 @@ private:
return OptionInfos[id - 1];
}
- Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const;
+ std::unique_ptr<Arg> parseOneArgGrouped(InputArgList &Args,
+ unsigned &Index) const;
protected:
OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
@@ -199,9 +200,9 @@ public:
/// \return The parsed argument, or 0 if the argument is missing values
/// (in which case Index still points at the conceptual next argument string
/// to parse).
- Arg *ParseOneArg(const ArgList &Args, unsigned &Index,
- unsigned FlagsToInclude = 0,
- unsigned FlagsToExclude = 0) const;
+ std::unique_ptr<Arg> ParseOneArg(const ArgList &Args, unsigned &Index,
+ unsigned FlagsToInclude = 0,
+ unsigned FlagsToExclude = 0) const;
/// Parse an list of arguments into an InputArgList.
///
diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h
index 196cf656355d..106f6863fca1 100644
--- a/llvm/include/llvm/Option/Option.h
+++ b/llvm/include/llvm/Option/Option.h
@@ -205,9 +205,9 @@ public:
/// always be false.
bool matches(OptSpecifier ID) const;
- /// accept - Potentially accept the current argument, returning a
- /// new Arg instance, or 0 if the option does not accept this
- /// argument (or the argument is missing values).
+ /// Potentially accept the current argument, returning a new Arg instance,
+ /// or 0 if the option does not accept this argument (or the argument is
+ /// missing values).
///
/// If the option accepts the current argument, accept() sets
/// Index to the position where argument parsing should resume
@@ -217,12 +217,12 @@ public:
/// underlying storage to represent a Joined argument.
/// \p GroupedShortOption If true, we are handling the fallback case of
/// parsing a prefix of the current argument as a short option.
- Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption,
- unsigned &Index) const;
+ std::unique_ptr<Arg> accept(const ArgList &Args, StringRef CurArg,
+ bool GroupedShortOption, unsigned &Index) const;
private:
- Arg *acceptInternal(const ArgList &Args, StringRef CurArg,
- unsigned &Index) const;
+ std::unique_ptr<Arg> acceptInternal(const ArgList &Args, StringRef CurArg,
+ unsigned &Index) const;
public:
void print(raw_ostream &O) const;
diff --git a/llvm/include/llvm/Passes/OptimizationLevel.h b/llvm/include/llvm/Passes/OptimizationLevel.h
new file mode 100644
index 000000000000..d2c3fde4935f
--- /dev/null
+++ b/llvm/include/llvm/Passes/OptimizationLevel.h
@@ -0,0 +1,127 @@
+//===-------- LLVM-provided High-Level Optimization levels -*- C++ -*------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This header enumerates the LLVM-provided high-level optimization levels.
+/// Each level has a specific goal and rationale.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSES_OPTIMIZATIONLEVEL_H
+#define LLVM_PASSES_OPTIMIZATIONLEVEL_H
+
+#include <assert.h>
+
+namespace llvm {
+
+class OptimizationLevel final {
+ unsigned SpeedLevel = 2;
+ unsigned SizeLevel = 0;
+ OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
+ : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
+ // Check that only valid combinations are passed.
+ assert(SpeedLevel <= 3 &&
+ "Optimization level for speed should be 0, 1, 2, or 3");
+ assert(SizeLevel <= 2 &&
+ "Optimization level for size should be 0, 1, or 2");
+ assert((SizeLevel == 0 || SpeedLevel == 2) &&
+ "Optimize for size should be encoded with speedup level == 2");
+ }
+
+public:
+ OptimizationLevel() = default;
+ /// Disable as many optimizations as possible. This doesn't completely
+ /// disable the optimizer in all cases, for example always_inline functions
+ /// can be required to be inlined for correctness.
+ static const OptimizationLevel O0;
+
+ /// Optimize quickly without destroying debuggability.
+ ///
+ /// This level is tuned to produce a result from the optimizer as quickly
+ /// as possible and to avoid destroying debuggability. This tends to result
+ /// in a very good development mode where the compiled code will be
+ /// immediately executed as part of testing. As a consequence, where
+ /// possible, we would like to produce efficient-to-execute code, but not
+ /// if it significantly slows down compilation or would prevent even basic
+ /// debugging of the resulting binary.
+ ///
+ /// As an example, complex loop transformations such as versioning,
+ /// vectorization, or fusion don't make sense here due to the degree to
+ /// which the executed code differs from the source code, and the compile
+ /// time cost.
+ static const OptimizationLevel O1;
+ /// Optimize for fast execution as much as possible without triggering
+ /// significant incremental compile time or code size growth.
+ ///
+ /// The key idea is that optimizations at this level should "pay for
+ /// themselves". So if an optimization increases compile time by 5% or
+ /// increases code size by 5% for a particular benchmark, that benchmark
+ /// should also be one which sees a 5% runtime improvement. If the compile
+ /// time or code size penalties happen on average across a diverse range of
+ /// LLVM users' benchmarks, then the improvements should as well.
+ ///
+ /// And no matter what, the compile time needs to not grow superlinearly
+ /// with the size of input to LLVM so that users can control the runtime of
+ /// the optimizer in this mode.
+ ///
+ /// This is expected to be a good default optimization level for the vast
+ /// majority of users.
+ static const OptimizationLevel O2;
+ /// Optimize for fast execution as much as possible.
+ ///
+ /// This mode is significantly more aggressive in trading off compile time
+ /// and code size to get execution time improvements. The core idea is that
+ /// this mode should include any optimization that helps execution time on
+ /// balance across a diverse collection of benchmarks, even if it increases
+ /// code size or compile time for some benchmarks without corresponding
+ /// improvements to execution time.
+ ///
+ /// Despite being willing to trade more compile time off to get improved
+ /// execution time, this mode still tries to avoid superlinear growth in
+ /// order to make even significantly slower compile times at least scale
+ /// reasonably. This does not preclude very substantial constant factor
+ /// costs though.
+ static const OptimizationLevel O3;
+ /// Similar to \c O2 but tries to optimize for small code size instead of
+ /// fast execution without triggering significant incremental execution
+ /// time slowdowns.
+ ///
+ /// The logic here is exactly the same as \c O2, but with code size and
+ /// execution time metrics swapped.
+ ///
+ /// A consequence of the different core goal is that this should in general
+ /// produce substantially smaller executables that still run in
+ /// a reasonable amount of time.
+ static const OptimizationLevel Os;
+ /// A very specialized mode that will optimize for code size at any and all
+ /// costs.
+ ///
+ /// This is useful primarily when there are absolute size limitations and
+ /// any effort taken to reduce the size is worth it regardless of the
+ /// execution time impact. You should expect this level to produce rather
+ /// slow, but very small, code.
+ static const OptimizationLevel Oz;
+
+ bool isOptimizingForSpeed() const { return SizeLevel == 0 && SpeedLevel > 0; }
+
+ bool isOptimizingForSize() const { return SizeLevel > 0; }
+
+ bool operator==(const OptimizationLevel &Other) const {
+ return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
+ }
+ bool operator!=(const OptimizationLevel &Other) const {
+ return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
+ }
+
+ unsigned getSpeedupLevel() const { return SpeedLevel; }
+
+ unsigned getSizeLevel() const { return SizeLevel; }
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index fae3e2cd2e0b..7c7883e98183 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -18,9 +18,12 @@
#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/OptimizationLevel.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include <vector>
@@ -31,57 +34,6 @@ class AAManager;
class TargetMachine;
class ModuleSummaryIndex;
-/// A struct capturing PGO tunables.
-struct PGOOptions {
- enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
- enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
- PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
- std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
- CSPGOAction CSAction = NoCSAction,
- bool DebugInfoForProfiling = false,
- bool PseudoProbeForProfiling = false)
- : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
- ProfileRemappingFile(ProfileRemappingFile), Action(Action),
- CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
- (Action == SampleUse &&
- !PseudoProbeForProfiling)),
- PseudoProbeForProfiling(PseudoProbeForProfiling) {
- // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
- // callback with IRUse action without ProfileFile.
-
- // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
- assert(this->CSAction == NoCSAction ||
- (this->Action != IRInstr && this->Action != SampleUse));
-
- // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
- assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
-
- // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
- // a profile.
- assert(this->CSAction != CSIRUse || this->Action == IRUse);
-
- // If neither Action nor CSAction, DebugInfoForProfiling or
- // PseudoProbeForProfiling needs to be true.
- assert(this->Action != NoAction || this->CSAction != NoCSAction ||
- this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
-
- // Pseudo probe emission does not work with -fdebug-info-for-profiling since
- // they both use the discriminator field of debug lines but for different
- // purposes.
- if (this->DebugInfoForProfiling && this->PseudoProbeForProfiling) {
- report_fatal_error(
- "Pseudo probes cannot be used with -debug-info-for-profiling", false);
- }
- }
- std::string ProfileFile;
- std::string CSProfileGenFile;
- std::string ProfileRemappingFile;
- PGOAction Action;
- CSPGOAction CSAction;
- bool DebugInfoForProfiling;
- bool PseudoProbeForProfiling;
-};
-
/// Tunable parameters for passes in the default pipelines.
class PipelineTuningOptions {
public:
@@ -122,6 +74,15 @@ public:
/// Tuning option to enable/disable function merging. Its default value is
/// false.
bool MergeFunctions;
+
+ // Experimental option to eagerly invalidate more analyses. This has the
+ // potential to decrease max memory usage in exchange for more compile time.
+ // This may affect codegen due to either passes using analyses only when
+ // cached, or invalidating and recalculating an analysis that was
+ // stale/imprecise but still valid. Currently this invalidates all function
+ // analyses after various module->function or cgscc->function adaptors in the
+ // default pipelines.
+ bool EagerlyInvalidateAnalyses;
};
/// This class provides access to building LLVM's passes.
@@ -150,116 +111,6 @@ public:
std::vector<PipelineElement> InnerPipeline;
};
- /// LLVM-provided high-level optimization levels.
- ///
- /// This enumerates the LLVM-provided high-level optimization levels. Each
- /// level has a specific goal and rationale.
- class OptimizationLevel final {
- unsigned SpeedLevel = 2;
- unsigned SizeLevel = 0;
- OptimizationLevel(unsigned SpeedLevel, unsigned SizeLevel)
- : SpeedLevel(SpeedLevel), SizeLevel(SizeLevel) {
- // Check that only valid combinations are passed.
- assert(SpeedLevel <= 3 &&
- "Optimization level for speed should be 0, 1, 2, or 3");
- assert(SizeLevel <= 2 &&
- "Optimization level for size should be 0, 1, or 2");
- assert((SizeLevel == 0 || SpeedLevel == 2) &&
- "Optimize for size should be encoded with speedup level == 2");
- }
-
- public:
- OptimizationLevel() = default;
- /// Disable as many optimizations as possible. This doesn't completely
- /// disable the optimizer in all cases, for example always_inline functions
- /// can be required to be inlined for correctness.
- static const OptimizationLevel O0;
-
- /// Optimize quickly without destroying debuggability.
- ///
- /// This level is tuned to produce a result from the optimizer as quickly
- /// as possible and to avoid destroying debuggability. This tends to result
- /// in a very good development mode where the compiled code will be
- /// immediately executed as part of testing. As a consequence, where
- /// possible, we would like to produce efficient-to-execute code, but not
- /// if it significantly slows down compilation or would prevent even basic
- /// debugging of the resulting binary.
- ///
- /// As an example, complex loop transformations such as versioning,
- /// vectorization, or fusion don't make sense here due to the degree to
- /// which the executed code differs from the source code, and the compile
- /// time cost.
- static const OptimizationLevel O1;
- /// Optimize for fast execution as much as possible without triggering
- /// significant incremental compile time or code size growth.
- ///
- /// The key idea is that optimizations at this level should "pay for
- /// themselves". So if an optimization increases compile time by 5% or
- /// increases code size by 5% for a particular benchmark, that benchmark
- /// should also be one which sees a 5% runtime improvement. If the compile
- /// time or code size penalties happen on average across a diverse range of
- /// LLVM users' benchmarks, then the improvements should as well.
- ///
- /// And no matter what, the compile time needs to not grow superlinearly
- /// with the size of input to LLVM so that users can control the runtime of
- /// the optimizer in this mode.
- ///
- /// This is expected to be a good default optimization level for the vast
- /// majority of users.
- static const OptimizationLevel O2;
- /// Optimize for fast execution as much as possible.
- ///
- /// This mode is significantly more aggressive in trading off compile time
- /// and code size to get execution time improvements. The core idea is that
- /// this mode should include any optimization that helps execution time on
- /// balance across a diverse collection of benchmarks, even if it increases
- /// code size or compile time for some benchmarks without corresponding
- /// improvements to execution time.
- ///
- /// Despite being willing to trade more compile time off to get improved
- /// execution time, this mode still tries to avoid superlinear growth in
- /// order to make even significantly slower compile times at least scale
- /// reasonably. This does not preclude very substantial constant factor
- /// costs though.
- static const OptimizationLevel O3;
- /// Similar to \c O2 but tries to optimize for small code size instead of
- /// fast execution without triggering significant incremental execution
- /// time slowdowns.
- ///
- /// The logic here is exactly the same as \c O2, but with code size and
- /// execution time metrics swapped.
- ///
- /// A consequence of the different core goal is that this should in general
- /// produce substantially smaller executables that still run in
- /// a reasonable amount of time.
- static const OptimizationLevel Os;
- /// A very specialized mode that will optimize for code size at any and all
- /// costs.
- ///
- /// This is useful primarily when there are absolute size limitations and
- /// any effort taken to reduce the size is worth it regardless of the
- /// execution time impact. You should expect this level to produce rather
- /// slow, but very small, code.
- static const OptimizationLevel Oz;
-
- bool isOptimizingForSpeed() const {
- return SizeLevel == 0 && SpeedLevel > 0;
- }
-
- bool isOptimizingForSize() const { return SizeLevel > 0; }
-
- bool operator==(const OptimizationLevel &Other) const {
- return SizeLevel == Other.SizeLevel && SpeedLevel == Other.SpeedLevel;
- }
- bool operator!=(const OptimizationLevel &Other) const {
- return SizeLevel != Other.SizeLevel || SpeedLevel != Other.SpeedLevel;
- }
-
- unsigned getSpeedupLevel() const { return SpeedLevel; }
-
- unsigned getSizeLevel() const { return SizeLevel; }
- };
-
explicit PassBuilder(TargetMachine *TM = nullptr,
PipelineTuningOptions PTO = PipelineTuningOptions(),
Optional<PGOOptions> PGOOpt = None,
@@ -346,6 +197,11 @@ public:
ModuleInlinerWrapperPass buildInlinerPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase);
+ /// Construct the module pipeline that performs inlining with
+ /// module inliner pass.
+ ModuleInlinerPass buildModuleInlinerPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase);
+
/// Construct the core LLVM module optimization pipeline.
///
/// This pipeline focuses on optimizing the execution speed of the IR. It
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 2f573585e766..6cab4ce7d138 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -215,8 +215,6 @@ protected:
virtual void handleFiltered(StringRef PassID, std::string &Name) = 0;
// Called when an ignored pass is encountered.
virtual void handleIgnored(StringRef PassID, std::string &Name) = 0;
- // Called to compare the before and after representations of the IR.
- virtual bool same(const IRUnitT &Before, const IRUnitT &After) = 0;
// Stack of IRs before passes.
std::vector<IRUnitT> BeforeStack;
@@ -269,50 +267,47 @@ protected:
void handleAfter(StringRef PassID, std::string &Name,
const std::string &Before, const std::string &After,
Any) override;
- // Called to compare the before and after representations of the IR.
- bool same(const std::string &Before, const std::string &After) override;
};
-// The following classes hold a representation of the IR for a change
-// reporter that uses string comparisons of the basic blocks
-// that are created using print (ie, similar to dump()).
-// These classes respect the filtering of passes and functions using
-// -filter-passes and -filter-print-funcs.
-//
// Information that needs to be saved for a basic block in order to compare
// before and after the pass to determine if it was changed by a pass.
-class ChangedBlockData {
+template <typename T> class BlockDataT {
public:
- ChangedBlockData(const BasicBlock &B);
-
- bool operator==(const ChangedBlockData &That) const {
- return Body == That.Body;
- }
- bool operator!=(const ChangedBlockData &That) const {
- return Body != That.Body;
+ BlockDataT(const BasicBlock &B) : Label(B.getName().str()), Data(B) {
+ raw_string_ostream SS(Body);
+ B.print(SS, nullptr, true, true);
}
+ bool operator==(const BlockDataT &That) const { return Body == That.Body; }
+ bool operator!=(const BlockDataT &That) const { return Body != That.Body; }
+
// Return the label of the represented basic block.
StringRef getLabel() const { return Label; }
// Return the string representation of the basic block.
StringRef getBody() const { return Body; }
+ // Return the associated data
+ const T &getData() const { return Data; }
+
protected:
std::string Label;
std::string Body;
+
+ // Extra data associated with a basic block
+ T Data;
};
-template <typename IRData> class OrderedChangedData {
+template <typename T> class OrderedChangedData {
public:
// Return the names in the order they were saved
std::vector<std::string> &getOrder() { return Order; }
const std::vector<std::string> &getOrder() const { return Order; }
// Return a map of names to saved representations
- StringMap<IRData> &getData() { return Data; }
- const StringMap<IRData> &getData() const { return Data; }
+ StringMap<T> &getData() { return Data; }
+ const StringMap<T> &getData() const { return Data; }
- bool operator==(const OrderedChangedData<IRData> &That) const {
+ bool operator==(const OrderedChangedData<T> &That) const {
return Data == That.getData();
}
@@ -321,55 +316,64 @@ public:
// with ones that are only in \p Before interspersed based on where they
// occur in \p Before. This is used to present the output in an order
// based on how the data is ordered in LLVM.
- static void
- report(const OrderedChangedData &Before, const OrderedChangedData &After,
- function_ref<void(const IRData *, const IRData *)> HandlePair);
+ static void report(const OrderedChangedData &Before,
+ const OrderedChangedData &After,
+ function_ref<void(const T *, const T *)> HandlePair);
protected:
std::vector<std::string> Order;
- StringMap<IRData> Data;
+ StringMap<T> Data;
+};
+
+// Do not need extra information for patch-style change reporter.
+class EmptyData {
+public:
+ EmptyData(const BasicBlock &) {}
};
// The data saved for comparing functions.
-using ChangedFuncData = OrderedChangedData<ChangedBlockData>;
+template <typename T>
+class FuncDataT : public OrderedChangedData<BlockDataT<T>> {
+public:
+ FuncDataT(std::string S) : EntryBlockName(S) {}
+
+ // Return the name of the entry block
+ std::string getEntryBlockName() const { return EntryBlockName; }
+
+protected:
+ std::string EntryBlockName;
+};
-// A map of names to the saved data.
-using ChangedIRData = OrderedChangedData<ChangedFuncData>;
+// The data saved for comparing IRs.
+template <typename T>
+class IRDataT : public OrderedChangedData<FuncDataT<T>> {};
-// A class that compares two IRs and does a diff between them. The
-// added lines are prefixed with a '+', the removed lines are prefixed
-// with a '-' and unchanged lines are prefixed with a space (to have
-// things line up).
-class ChangedIRComparer {
+// Abstract template base class for a class that compares two IRs. The
+// class is created with the 2 IRs to compare and then compare is called.
+// The static function analyzeIR is used to build up the IR representation.
+template <typename T> class IRComparer {
public:
- ChangedIRComparer(raw_ostream &OS, const ChangedIRData &Before,
- const ChangedIRData &After, bool ColourMode)
- : Before(Before), After(After), Out(OS), UseColour(ColourMode) {}
+ IRComparer(const IRDataT<T> &Before, const IRDataT<T> &After)
+ : Before(Before), After(After) {}
- // Compare the 2 IRs.
- void compare(Any IR, StringRef Prefix, StringRef PassID, StringRef Name);
+ // Compare the 2 IRs. \p handleFunctionCompare is called to handle the
+ // compare of a function. When \p InModule is set,
+ // this function is being handled as part of comparing a module.
+ void compare(
+ bool CompareModule,
+ std::function<void(bool InModule, unsigned Minor,
+ const FuncDataT<T> &Before, const FuncDataT<T> &After)>
+ CompareFunc);
// Analyze \p IR and build the IR representation in \p Data.
- static void analyzeIR(Any IR, ChangedIRData &Data);
+ static void analyzeIR(Any IR, IRDataT<T> &Data);
protected:
- // Return the module when that is the appropriate level of
- // comparison for \p IR.
- static const Module *getModuleForComparison(Any IR);
-
// Generate the data for \p F into \p Data.
- static bool generateFunctionData(ChangedIRData &Data, const Function &F);
+ static bool generateFunctionData(IRDataT<T> &Data, const Function &F);
- // Called to handle the compare of a function. When \p InModule is set,
- // this function is being handled as part of comparing a module.
- void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
- bool InModule, const ChangedFuncData &Before,
- const ChangedFuncData &After);
-
- const ChangedIRData &Before;
- const ChangedIRData &After;
- raw_ostream &Out;
- bool UseColour;
+ const IRDataT<T> &Before;
+ const IRDataT<T> &After;
};
// A change printer that prints out in-line differences in the basic
@@ -378,25 +382,28 @@ protected:
// and added, respectively. Changes to the IR that do not affect basic
// blocks are not reported as having changed the IR. The option
// -print-module-scope does not affect this change reporter.
-class InLineChangePrinter : public TextChangeReporter<ChangedIRData> {
+class InLineChangePrinter : public TextChangeReporter<IRDataT<EmptyData>> {
public:
InLineChangePrinter(bool VerboseMode, bool ColourMode)
- : TextChangeReporter<ChangedIRData>(VerboseMode), UseColour(ColourMode) {}
+ : TextChangeReporter<IRDataT<EmptyData>>(VerboseMode),
+ UseColour(ColourMode) {}
~InLineChangePrinter() override;
void registerCallbacks(PassInstrumentationCallbacks &PIC);
protected:
// Create a representation of the IR.
virtual void generateIRRepresentation(Any IR, StringRef PassID,
- ChangedIRData &Output) override;
+ IRDataT<EmptyData> &Output) override;
// Called when an interesting IR has changed.
virtual void handleAfter(StringRef PassID, std::string &Name,
- const ChangedIRData &Before,
- const ChangedIRData &After, Any) override;
- // Called to compare the before and after representations of the IR.
- virtual bool same(const ChangedIRData &Before,
- const ChangedIRData &After) override;
+ const IRDataT<EmptyData> &Before,
+ const IRDataT<EmptyData> &After, Any) override;
+
+ void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
+ StringRef Divider, bool InModule, unsigned Minor,
+ const FuncDataT<EmptyData> &Before,
+ const FuncDataT<EmptyData> &After);
bool UseColour;
};
@@ -409,6 +416,81 @@ public:
void registerCallbacks(PassInstrumentationCallbacks &PIC);
};
+// Class that holds transitions between basic blocks. The transitions
+// are contained in a map of values to names of basic blocks.
+class DCData {
+public:
+ // Fill the map with the transitions from basic block \p B.
+ DCData(const BasicBlock &B);
+
+ // Return an iterator to the names of the successor blocks.
+ StringMap<std::string>::const_iterator begin() const {
+ return Successors.begin();
+ }
+ StringMap<std::string>::const_iterator end() const {
+ return Successors.end();
+ }
+
+ // Return the label of the basic block reached on a transition on \p S.
+ const StringRef getSuccessorLabel(StringRef S) const {
+ assert(Successors.count(S) == 1 && "Expected to find successor.");
+ return Successors.find(S)->getValue();
+ }
+
+protected:
+ // Add a transition to \p Succ on \p Label
+ void addSuccessorLabel(StringRef Succ, StringRef Label) {
+ std::pair<std::string, std::string> SS{Succ.str(), Label.str()};
+ Successors.insert(SS);
+ }
+
+ StringMap<std::string> Successors;
+};
+
+// A change reporter that builds a website with links to pdf files showing
+// dot control flow graphs with changed instructions shown in colour.
+class DotCfgChangeReporter : public ChangeReporter<IRDataT<DCData>> {
+public:
+ DotCfgChangeReporter(bool Verbose);
+ ~DotCfgChangeReporter() override;
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+protected:
+ // Initialize the HTML file and output the header.
+ bool initializeHTML();
+
+ // Called on the first IR processed.
+ void handleInitialIR(Any IR) override;
+ // Called before and after a pass to get the representation of the IR.
+ void generateIRRepresentation(Any IR, StringRef PassID,
+ IRDataT<DCData> &Output) override;
+ // Called when the pass is not iteresting.
+ void omitAfter(StringRef PassID, std::string &Name) override;
+ // Called when an interesting IR has changed.
+ void handleAfter(StringRef PassID, std::string &Name,
+ const IRDataT<DCData> &Before, const IRDataT<DCData> &After,
+ Any) override;
+ // Called when an interesting pass is invalidated.
+ void handleInvalidated(StringRef PassID) override;
+ // Called when the IR or pass is not interesting.
+ void handleFiltered(StringRef PassID, std::string &Name) override;
+ // Called when an ignored pass is encountered.
+ void handleIgnored(StringRef PassID, std::string &Name) override;
+
+ // Generate the pdf file into \p Dir / \p PDFFileName using \p DotFile as
+ // input and return the html <a> tag with \Text as the content.
+ static std::string genHTML(StringRef Text, StringRef DotFile,
+ StringRef PDFFileName);
+
+ void handleFunctionCompare(StringRef Name, StringRef Prefix, StringRef PassID,
+ StringRef Divider, bool InModule, unsigned Minor,
+ const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After);
+
+ unsigned N = 0;
+ std::unique_ptr<raw_fd_ostream> HTML;
+};
+
/// This class provides an interface to register all the standard pass
/// instrumentations and manages their state (if any).
class StandardInstrumentations {
@@ -421,6 +503,7 @@ class StandardInstrumentations {
IRChangedPrinter PrintChangedIR;
PseudoProbeVerifier PseudoProbeVerification;
InLineChangePrinter PrintChangedDiff;
+ DotCfgChangeReporter WebsiteChangeReporter;
VerifyInstrumentation Verify;
bool VerifyEach;
@@ -440,8 +523,12 @@ public:
extern template class ChangeReporter<std::string>;
extern template class TextChangeReporter<std::string>;
-extern template class ChangeReporter<ChangedIRData>;
-extern template class TextChangeReporter<ChangedIRData>;
+extern template class BlockDataT<EmptyData>;
+extern template class FuncDataT<EmptyData>;
+extern template class IRDataT<EmptyData>;
+extern template class ChangeReporter<IRDataT<EmptyData>>;
+extern template class TextChangeReporter<IRDataT<EmptyData>>;
+extern template class IRComparer<EmptyData>;
} // namespace llvm
diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 8f336c13af61..d3a5d44ce8dd 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -693,8 +693,9 @@ public:
/// An iterator over the \c LineCoverageStats objects for lines described by
/// a \c CoverageData instance.
class LineCoverageIterator
- : public iterator_facade_base<
- LineCoverageIterator, std::forward_iterator_tag, LineCoverageStats> {
+ : public iterator_facade_base<LineCoverageIterator,
+ std::forward_iterator_tag,
+ const LineCoverageStats> {
public:
LineCoverageIterator(const CoverageData &CD)
: LineCoverageIterator(CD, CD.begin()->Line) {}
@@ -711,8 +712,6 @@ public:
const LineCoverageStats &operator*() const { return Stats; }
- LineCoverageStats &operator*() { return Stats; }
-
LineCoverageIterator &operator++();
LineCoverageIterator getEnd() const {
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 08a934e6985f..4395c2abb33e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -205,9 +205,9 @@ StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName,
StringRef FileName = "<unknown>");
/// Given a vector of strings (function PGO names) \c NameStrs, the
-/// method generates a combined string \c Result thatis ready to be
+/// method generates a combined string \c Result that is ready to be
/// serialized. The \c Result string is comprised of three fields:
-/// The first field is the legnth of the uncompressed strings, and the
+/// The first field is the length of the uncompressed strings, and the
/// the second field is the length of the zlib-compressed string.
/// Both fields are encoded in ULEB128. If \c doCompress is false, the
/// third field is the uncompressed strings; otherwise it is the
@@ -308,7 +308,8 @@ inline std::error_code make_error_code(instrprof_error E) {
class InstrProfError : public ErrorInfo<InstrProfError> {
public:
- InstrProfError(instrprof_error Err) : Err(Err) {
+ InstrProfError(instrprof_error Err, const Twine &ErrStr = Twine())
+ : Err(Err), Msg(ErrStr.str()) {
assert(Err != instrprof_error::success && "Not an error");
}
@@ -321,6 +322,7 @@ public:
}
instrprof_error get() const { return Err; }
+ const std::string &getMessage() const { return Msg; }
/// Consume an Error and return the raw enum value contained within it. The
/// Error must either be a success value, or contain a single InstrProfError.
@@ -337,6 +339,7 @@ public:
private:
instrprof_error Err;
+ std::string Msg;
};
class SoftInstrProfErrors {
@@ -474,7 +477,8 @@ public:
/// is used by the raw and text profile readers.
Error addFuncName(StringRef FuncName) {
if (FuncName.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "function name is empty");
auto Ins = NameTab.insert(FuncName);
if (Ins.second) {
MD5NameMap.push_back(std::make_pair(
@@ -1104,6 +1108,8 @@ namespace RawInstrProf {
// Version 5: Bit 60 of FuncHash is reserved for the flag for the context
// sensitive records.
// Version 6: Added binary id.
+// Version 7: Reorder binary id and include version in signature.
+// Version 8: Use relative counter pointer.
const uint64_t Version = INSTR_PROF_RAW_VERSION;
template <class IntPtrT> inline uint64_t getMagic();
@@ -1142,8 +1148,8 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
// aware this is an ir_level profile so it can set the version flag.
-void createIRLevelProfileFlagVar(Module &M, bool IsCS,
- bool InstrEntryBBEnabled);
+GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
+ bool InstrEntryBBEnabled);
// Create the variable for the profile file name.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 08a642469627..008b8dde5820 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -75,9 +75,7 @@ INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \
INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
Inc->getHash()->getZExtValue()))
-INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
- ConstantExpr::getBitCast(CounterPtr, \
- llvm::Type::getInt64PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, IntPtrTy, CounterPtr, RelativeCounterPtr)
/* This is used to map function pointers for the indirect call targets to
* function name hashes during the conversion from raw to merged profile
* data.
@@ -129,15 +127,16 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::Type::getInt8PtrTy(Ctx), Next, \
#endif
INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesBeforeCounters, PaddingBytesBeforeCounters)
INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
INSTR_PROF_RAW_HEADER(uint64_t, PaddingBytesAfterCounters, PaddingBytesAfterCounters)
INSTR_PROF_RAW_HEADER(uint64_t, NamesSize, NamesSize)
-INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta,
+ (uintptr_t)CountersBegin - (uintptr_t)DataBegin)
INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
-INSTR_PROF_RAW_HEADER(uint64_t, BinaryIdsSize, __llvm_write_binary_ids(NULL))
#undef INSTR_PROF_RAW_HEADER
/* INSTR_PROF_RAW_HEADER end */
@@ -646,7 +645,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
(uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
/* Raw profile format version (start from 1). */
-#define INSTR_PROF_RAW_VERSION 6
+#define INSTR_PROF_RAW_VERSION 8
/* Indexed profile format version (start from 1). */
#define INSTR_PROF_INDEX_VERSION 7
/* Coverage mapping format version (start from 0). */
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 501c6f011d53..b62d4ff044a3 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -71,6 +71,7 @@ public:
/// format. Provides an iterator over NamedInstrProfRecords.
class InstrProfReader {
instrprof_error LastError = instrprof_error::success;
+ std::string LastErrorMsg;
public:
InstrProfReader() = default;
@@ -114,14 +115,21 @@ protected:
std::unique_ptr<InstrProfSymtab> Symtab;
/// Set the current error and return same.
- Error error(instrprof_error Err) {
+ Error error(instrprof_error Err, const std::string &ErrMsg = "") {
LastError = Err;
+ LastErrorMsg = ErrMsg;
if (Err == instrprof_error::success)
return Error::success();
- return make_error<InstrProfError>(Err);
+ return make_error<InstrProfError>(Err, ErrMsg);
}
- Error error(Error &&E) { return error(InstrProfError::take(std::move(E))); }
+ Error error(Error &&E) {
+ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ LastError = IPE.get();
+ LastErrorMsg = IPE.getMessage();
+ });
+ return make_error<InstrProfError>(LastError, LastErrorMsg);
+ }
/// Clear the current error and return a successful one.
Error success() { return error(instrprof_error::success); }
@@ -136,7 +144,7 @@ public:
/// Get the current error.
Error getError() {
if (hasError())
- return make_error<InstrProfError>(LastError);
+ return make_error<InstrProfError>(LastError, LastErrorMsg);
return Error::success();
}
@@ -197,7 +205,7 @@ public:
/// Reader for the raw instrprof binary format from runtime.
///
-/// This format is a raw memory dump of the instrumentation-baed profiling data
+/// This format is a raw memory dump of the instrumentation-based profiling data
/// from the runtime. It has no index.
///
/// Templated on the unsigned type whose size matches pointers on the platform
diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index f2d9ccc45fdc..ad92af22d92e 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -66,9 +66,9 @@ public:
/// Find the summary entry for a desired percentile of counts.
static const ProfileSummaryEntry &
- getEntryForPercentile(SummaryEntryVector &DS, uint64_t Percentile);
- static uint64_t getHotCountThreshold(SummaryEntryVector &DS);
- static uint64_t getColdCountThreshold(SummaryEntryVector &DS);
+ getEntryForPercentile(const SummaryEntryVector &DS, uint64_t Percentile);
+ static uint64_t getHotCountThreshold(const SummaryEntryVector &DS);
+ static uint64_t getColdCountThreshold(const SummaryEntryVector &DS);
};
class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
@@ -92,8 +92,8 @@ public:
void addRecord(const sampleprof::FunctionSamples &FS,
bool isCallsiteSample = false);
- std::unique_ptr<ProfileSummary> computeSummaryForProfiles(
- const StringMap<sampleprof::FunctionSamples> &Profiles);
+ std::unique_ptr<ProfileSummary>
+ computeSummaryForProfiles(const sampleprof::SampleProfileMap &Profiles);
std::unique_ptr<ProfileSummary> getSummary();
};
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 2f71bbc6bbbe..7ac9eccf8ac2 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -29,10 +29,13 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
+#include <list>
#include <map>
#include <set>
+#include <sstream>
#include <string>
#include <system_error>
+#include <unordered_map>
#include <utility>
namespace llvm {
@@ -104,10 +107,10 @@ static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
/// current Format uses MD5 to represent the string.
static inline StringRef getRepInFormat(StringRef Name, bool UseMD5,
std::string &GUIDBuf) {
- if (Name.empty())
+ if (Name.empty() || !UseMD5)
return Name;
GUIDBuf = std::to_string(Function::getGUID(Name));
- return UseMD5 ? StringRef(GUIDBuf) : Name;
+ return GUIDBuf;
}
static inline uint64_t SPVersion() { return 103; }
@@ -122,13 +125,14 @@ enum SecType {
SecProfileSymbolList = 3,
SecFuncOffsetTable = 4,
SecFuncMetadata = 5,
+ SecCSNameTable = 6,
// marker for the first type of profile.
SecFuncProfileFirst = 32,
SecLBRProfile = SecFuncProfileFirst
};
static inline std::string getSecName(SecType Type) {
- switch (Type) {
+ switch ((int)Type) { // Avoid -Wcovered-switch-default
case SecInValid:
return "InvalidSection";
case SecProfSummary:
@@ -141,10 +145,13 @@ static inline std::string getSecName(SecType Type) {
return "FuncOffsetTableSection";
case SecFuncMetadata:
return "FunctionMetadata";
+ case SecCSNameTable:
+ return "CSNameTableSection";
case SecLBRProfile:
return "LBRProfileSection";
+ default:
+ return "UnknownSection";
}
- llvm_unreachable("A SecType has no name for output");
}
// Entry type of section header table used by SampleProfileExtBinaryBaseReader
@@ -202,6 +209,13 @@ enum class SecFuncMetadataFlags : uint32_t {
SecFlagHasAttribute = (1 << 1)
};
+enum class SecFuncOffsetFlags : uint32_t {
+ SecFlagInvalid = 0,
+ // Store function offsets in an order of contexts. The order ensures that
+ // callee contexts of a given context laid out next to it.
+ SecFlagOrdered = (1 << 0),
+};
+
// Verify section specific flag is used for the correct section.
template <class SecFlagType>
static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
@@ -222,6 +236,8 @@ static inline void verifySecFlag(SecType Type, SecFlagType Flag) {
IsFlagLegal = std::is_same<SecFuncMetadataFlags, SecFlagType>();
break;
default:
+ case SecFuncOffsetTable:
+ IsFlagLegal = std::is_same<SecFuncOffsetFlags, SecFlagType>();
break;
}
if (!IsFlagLegal)
@@ -396,54 +412,123 @@ enum ContextAttributeMask {
ContextShouldBeInlined = 0x2, // Leaf of context should be inlined
};
+// Represents a context frame with function name and line location
+struct SampleContextFrame {
+ StringRef FuncName;
+ LineLocation Location;
+
+ SampleContextFrame() : Location(0, 0) {}
+
+ SampleContextFrame(StringRef FuncName, LineLocation Location)
+ : FuncName(FuncName), Location(Location) {}
+
+ bool operator==(const SampleContextFrame &That) const {
+ return Location == That.Location && FuncName == That.FuncName;
+ }
+
+ bool operator!=(const SampleContextFrame &That) const {
+ return !(*this == That);
+ }
+
+ std::string toString(bool OutputLineLocation) const {
+ std::ostringstream OContextStr;
+ OContextStr << FuncName.str();
+ if (OutputLineLocation) {
+ OContextStr << ":" << Location.LineOffset;
+ if (Location.Discriminator)
+ OContextStr << "." << Location.Discriminator;
+ }
+ return OContextStr.str();
+ }
+};
+
+static inline hash_code hash_value(const SampleContextFrame &arg) {
+ return hash_combine(arg.FuncName, arg.Location.LineOffset,
+ arg.Location.Discriminator);
+}
+
+using SampleContextFrameVector = SmallVector<SampleContextFrame, 10>;
+using SampleContextFrames = ArrayRef<SampleContextFrame>;
+
+struct SampleContextFrameHash {
+ uint64_t operator()(const SampleContextFrameVector &S) const {
+ return hash_combine_range(S.begin(), S.end());
+ }
+};
+
// Sample context for FunctionSamples. It consists of the calling context,
// the function name and context state. Internally sample context is represented
-// using StringRef, which is also the input for constructing a `SampleContext`.
+// using ArrayRef, which is also the input for constructing a `SampleContext`.
// It can accept and represent both full context string as well as context-less
// function name.
-// Example of full context string (note the wrapping `[]`):
-// `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
-// Example of context-less function name (same as AutoFDO):
-// `_Z8funcLeafi`
+// For a CS profile, a full context vector can look like:
+// `main:3 _Z5funcAi:1 _Z8funcLeafi`
+// For a base CS profile without calling context, the context vector should only
+// contain the leaf frame name.
+// For a non-CS profile, the context vector should be empty.
class SampleContext {
public:
SampleContext() : State(UnknownContext), Attributes(ContextNone) {}
- SampleContext(StringRef ContextStr, ContextStateMask CState = UnknownContext)
- : Attributes(ContextNone) {
- setContext(ContextStr, CState);
- }
- // Promote context by removing top frames (represented by `ContextStrToRemove`).
- // Note that with string representation of context, the promotion is effectively
- // a substr operation with `ContextStrToRemove` removed from left.
- void promoteOnPath(StringRef ContextStrToRemove) {
- assert(FullContext.startswith(ContextStrToRemove));
+ SampleContext(StringRef Name)
+ : Name(Name), State(UnknownContext), Attributes(ContextNone) {}
- // Remove leading context and frame separator " @ ".
- FullContext = FullContext.substr(ContextStrToRemove.size() + 3);
- CallingContext = CallingContext.substr(ContextStrToRemove.size() + 3);
+ SampleContext(SampleContextFrames Context,
+ ContextStateMask CState = RawContext)
+ : Attributes(ContextNone) {
+ assert(!Context.empty() && "Context is empty");
+ setContext(Context, CState);
}
- // Split the top context frame (left-most substr) from context.
- static std::pair<StringRef, StringRef>
- splitContextString(StringRef ContextStr) {
- return ContextStr.split(" @ ");
+ // Give a context string, decode and populate internal states like
+ // Function name, Calling context and context state. Example of input
+ // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
+ SampleContext(StringRef ContextStr,
+ std::list<SampleContextFrameVector> &CSNameTable,
+ ContextStateMask CState = RawContext)
+ : Attributes(ContextNone) {
+ assert(!ContextStr.empty());
+ // Note that `[]` wrapped input indicates a full context string, otherwise
+ // it's treated as context-less function name only.
+ bool HasContext = ContextStr.startswith("[");
+ if (!HasContext) {
+ State = UnknownContext;
+ Name = ContextStr;
+ } else {
+ CSNameTable.emplace_back();
+ SampleContextFrameVector &Context = CSNameTable.back();
+ createCtxVectorFromStr(ContextStr, Context);
+ setContext(Context, CState);
+ }
}
- // Reconstruct a new context with the last k frames, return the context-less
- // name if K = 1
- StringRef getContextWithLastKFrames(uint32_t K) {
- if (K == 1)
- return getNameWithoutContext();
-
- size_t I = FullContext.size();
- while (K--) {
- I = FullContext.find_last_of(" @ ", I);
- if (I == StringRef::npos)
- return FullContext;
- I -= 2;
+ /// Create a context vector from a given context string and save it in
+ /// `Context`.
+ static void createCtxVectorFromStr(StringRef ContextStr,
+ SampleContextFrameVector &Context) {
+ // Remove encapsulating '[' and ']' if any
+ ContextStr = ContextStr.substr(1, ContextStr.size() - 2);
+ StringRef ContextRemain = ContextStr;
+ StringRef ChildContext;
+ StringRef CalleeName;
+ while (!ContextRemain.empty()) {
+ auto ContextSplit = ContextRemain.split(" @ ");
+ ChildContext = ContextSplit.first;
+ ContextRemain = ContextSplit.second;
+ LineLocation CallSiteLoc(0, 0);
+ decodeContextString(ChildContext, CalleeName, CallSiteLoc);
+ Context.emplace_back(CalleeName, CallSiteLoc);
}
- return FullContext.slice(I + 3, StringRef::npos);
+ }
+
+ // Promote context by removing top frames with the length of
+ // `ContextFramesToRemove`. Note that with array representation of context,
+ // the promotion is effectively a slice operation with first
+ // `ContextFramesToRemove` elements removed from left.
+ void promoteOnPath(uint32_t ContextFramesToRemove) {
+ assert(ContextFramesToRemove <= FullContext.size() &&
+ "Cannot remove more than the whole context");
+ FullContext = FullContext.drop_front(ContextFramesToRemove);
}
// Decode context string for a frame to get function name and location.
@@ -469,7 +554,7 @@ public:
}
}
- operator StringRef() const { return FullContext; }
+ operator SampleContextFrames() const { return FullContext; }
bool hasAttribute(ContextAttributeMask A) { return Attributes & (uint32_t)A; }
void setAttribute(ContextAttributeMask A) { Attributes |= (uint32_t)A; }
uint32_t getAllAttributes() { return Attributes; }
@@ -478,60 +563,114 @@ public:
void setState(ContextStateMask S) { State |= (uint32_t)S; }
void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
bool hasContext() const { return State != UnknownContext; }
- bool isBaseContext() const { return CallingContext.empty(); }
- StringRef getNameWithoutContext() const { return Name; }
- StringRef getCallingContext() const { return CallingContext; }
- StringRef getNameWithContext() const { return FullContext; }
-
-private:
- // Give a context string, decode and populate internal states like
- // Function name, Calling context and context state. Example of input
- // `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
- void setContext(StringRef ContextStr, ContextStateMask CState) {
- assert(!ContextStr.empty());
- // Note that `[]` wrapped input indicates a full context string, otherwise
- // it's treated as context-less function name only.
- bool HasContext = ContextStr.startswith("[");
- if (!HasContext && CState == UnknownContext) {
- State = UnknownContext;
- Name = FullContext = ContextStr;
- } else {
- // Assume raw context profile if unspecified
- if (CState == UnknownContext)
- State = RawContext;
- else
- State = CState;
-
- // Remove encapsulating '[' and ']' if any
- if (HasContext)
- FullContext = ContextStr.substr(1, ContextStr.size() - 2);
- else
- FullContext = ContextStr;
-
- // Caller is to the left of callee in context string
- auto NameContext = FullContext.rsplit(" @ ");
- if (NameContext.second.empty()) {
- Name = NameContext.first;
- CallingContext = NameContext.second;
- } else {
- Name = NameContext.second;
- CallingContext = NameContext.first;
+ bool isBaseContext() const { return FullContext.size() == 1; }
+ StringRef getName() const { return Name; }
+ SampleContextFrames getContextFrames() const { return FullContext; }
+
+ static std::string getContextString(SampleContextFrames Context,
+ bool IncludeLeafLineLocation = false) {
+ std::ostringstream OContextStr;
+ for (uint32_t I = 0; I < Context.size(); I++) {
+ if (OContextStr.str().size()) {
+ OContextStr << " @ ";
}
+ OContextStr << Context[I].toString(I != Context.size() - 1 ||
+ IncludeLeafLineLocation);
}
+ return OContextStr.str();
+ }
+
+ std::string toString() const {
+ if (!hasContext())
+ return Name.str();
+ return getContextString(FullContext, false);
+ }
+
+ uint64_t getHashCode() const {
+ return hasContext() ? hash_value(getContextFrames())
+ : hash_value(getName());
+ }
+
+ /// Set the name of the function.
+ void setName(StringRef FunctionName) {
+ assert(FullContext.empty() &&
+ "setName should only be called for non-CS profile");
+ Name = FunctionName;
+ }
+
+ void setContext(SampleContextFrames Context,
+ ContextStateMask CState = RawContext) {
+ assert(CState != UnknownContext);
+ FullContext = Context;
+ Name = Context.back().FuncName;
+ State = CState;
+ }
+
+ bool operator==(const SampleContext &That) const {
+ return State == That.State && Name == That.Name &&
+ FullContext == That.FullContext;
+ }
+
+ bool operator!=(const SampleContext &That) const { return !(*this == That); }
+
+ bool operator<(const SampleContext &That) const {
+ if (State != That.State)
+ return State < That.State;
+
+ if (!hasContext()) {
+ return (Name.compare(That.Name)) == -1;
+ }
+
+ uint64_t I = 0;
+ while (I < std::min(FullContext.size(), That.FullContext.size())) {
+ auto &Context1 = FullContext[I];
+ auto &Context2 = That.FullContext[I];
+ auto V = Context1.FuncName.compare(Context2.FuncName);
+ if (V)
+ return V == -1;
+ if (Context1.Location != Context2.Location)
+ return Context1.Location < Context2.Location;
+ I++;
+ }
+
+ return FullContext.size() < That.FullContext.size();
+ }
+
+ struct Hash {
+ uint64_t operator()(const SampleContext &Context) const {
+ return Context.getHashCode();
+ }
+ };
+
+ bool IsPrefixOf(const SampleContext &That) const {
+ auto ThisContext = FullContext;
+ auto ThatContext = That.FullContext;
+ if (ThatContext.size() < ThisContext.size())
+ return false;
+ ThatContext = ThatContext.take_front(ThisContext.size());
+ // Compare Leaf frame first
+ if (ThisContext.back().FuncName != ThatContext.back().FuncName)
+ return false;
+ // Compare leading context
+ return ThisContext.drop_back() == ThatContext.drop_back();
}
- // Full context string including calling context and leaf function name
- StringRef FullContext;
- // Function name for the associated sample profile
+private:
+ /// Mangled name of the function.
StringRef Name;
- // Calling context (leaf function excluded) for the associated sample profile
- StringRef CallingContext;
+ // Full context including calling context and leaf function name
+ SampleContextFrames FullContext;
// State of the associated sample profile
uint32_t State;
// Attribute of the associated sample profile
uint32_t Attributes;
};
+static inline hash_code hash_value(const SampleContext &arg) {
+ return arg.hasContext() ? hash_value(arg.getContextFrames())
+ : hash_value(arg.getName());
+}
+
class FunctionSamples;
class SampleProfileReaderItaniumRemapper;
@@ -592,6 +731,20 @@ public:
return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
}
+ // Accumulate all body samples to set total samples.
+ void updateTotalSamples() {
+ setTotalSamples(0);
+ for (const auto &I : BodySamples)
+ addTotalSamples(I.second.getSamples());
+
+ for (auto &I : CallsiteSamples) {
+ for (auto &CS : I.second) {
+ CS.second.updateTotalSamples();
+ addTotalSamples(CS.second.getTotalSamples());
+ }
+ }
+ }
+
/// Return the number of samples collected at the given location.
/// Each location is specified by \p LineOffset and \p Discriminator.
/// If the location is not found in profile, return error.
@@ -709,10 +862,9 @@ public:
/// Optionally scale samples by \p Weight.
sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
sampleprof_error Result = sampleprof_error::success;
- Name = Other.getName();
if (!GUIDToFuncNameMap)
GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
- if (Context.getNameWithContext().empty())
+ if (Context.getName().empty())
Context = Other.getContext();
if (FunctionHash == 0) {
// Set the function hash code for the target profile.
@@ -758,7 +910,7 @@ public:
};
if (isDeclaration(SymbolMap.lookup(getFuncName()))) {
// Add to the import list only when it's defined out of module.
- S.insert(getGUID(Name));
+ S.insert(getGUID(getName()));
}
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
@@ -775,18 +927,13 @@ public:
}
/// Set the name of the function.
- void setName(StringRef FunctionName) { Name = FunctionName; }
+ void setName(StringRef FunctionName) { Context.setName(FunctionName); }
/// Return the function name.
- StringRef getName() const { return Name; }
-
- /// Return function name with context.
- StringRef getNameWithContext() const {
- return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
- }
+ StringRef getName() const { return Context.getName(); }
/// Return the original function name.
- StringRef getFuncName() const { return getFuncName(Name); }
+ StringRef getFuncName() const { return getFuncName(getName()); }
void setFunctionHash(uint64_t Hash) { FunctionHash = Hash; }
@@ -913,9 +1060,6 @@ public:
void findAllNames(DenseSet<StringRef> &NameSet) const;
private:
- /// Mangled name of the function.
- StringRef Name;
-
/// CFG hash value for the function.
uint64_t FunctionHash = 0;
@@ -961,6 +1105,14 @@ private:
raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
+using SampleProfileMap =
+ std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;
+
+using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;
+
+void sortFuncProfiles(const SampleProfileMap &ProfileMap,
+ std::vector<NameFunctionSamples> &SortedProfiles);
+
/// Sort a LocationT->SampleT map by LocationT.
///
/// It produces a sorted list of <LocationT, SampleT> records by ascending
@@ -989,18 +1141,24 @@ private:
/// sure ProfileMap's key is consistent with FunctionSample's name/context.
class SampleContextTrimmer {
public:
- SampleContextTrimmer(StringMap<FunctionSamples> &Profiles)
- : ProfileMap(Profiles){};
- // Trim and merge cold context profile when requested.
+ SampleContextTrimmer(SampleProfileMap &Profiles) : ProfileMap(Profiles){};
+ // Trim and merge cold context profile when requested. TrimBaseProfileOnly
+ // should only be effective when TrimColdContext is true. On top of
+ // TrimColdContext, TrimBaseProfileOnly can be used to specify to trim all
+ // cold profiles or only cold base profiles. Trimming base profiles only is
+ // mainly to honor the preinliner decsion. Note that when MergeColdContext is
+ // true, preinliner decsion is not honored anyway so TrimBaseProfileOnly will
+ // be ignored.
void trimAndMergeColdContextProfiles(uint64_t ColdCountThreshold,
bool TrimColdContext,
bool MergeColdContext,
- uint32_t ColdContextFrameLength);
+ uint32_t ColdContextFrameLength,
+ bool TrimBaseProfileOnly);
// Canonicalize context profile name and attributes.
void canonicalizeContextProfiles();
private:
- StringMap<FunctionSamples> &ProfileMap;
+ SampleProfileMap &ProfileMap;
};
/// ProfileSymbolList records the list of function symbols shown up
@@ -1045,6 +1203,22 @@ private:
};
} // end namespace sampleprof
+
+using namespace sampleprof;
+// Provide DenseMapInfo for SampleContext.
+template <> struct DenseMapInfo<SampleContext> {
+ static inline SampleContext getEmptyKey() { return SampleContext(); }
+
+ static inline SampleContext getTombstoneKey() { return SampleContext("@"); }
+
+ static unsigned getHashValue(const SampleContext &Val) {
+ return Val.getHashCode();
+ }
+
+ static bool isEqual(const SampleContext &LHS, const SampleContext &RHS) {
+ return LHS == RHS;
+ }
+};
} // end namespace llvm
#endif // LLVM_PROFILEDATA_SAMPLEPROF_H
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 2d5925bdb2b4..e6d31f1b9098 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -242,9 +242,11 @@
#include "llvm/Support/SymbolRemappingReader.h"
#include <algorithm>
#include <cstdint>
+#include <list>
#include <memory>
#include <string>
#include <system_error>
+#include <unordered_set>
#include <vector>
namespace llvm {
@@ -380,8 +382,8 @@ public:
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;
- /// Print the profile for \p FName on stream \p OS.
- void dumpFunctionProfile(StringRef FName, raw_ostream &OS = dbgs());
+ /// Print the profile for \p FContext on stream \p OS.
+ void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());
/// Collect functions with definitions in Module M. For reader which
/// support loading function profiles on demand, return true when the
@@ -407,6 +409,13 @@ public:
std::string FGUID;
StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
+ auto It = Profiles.find(CanonName);
+ if (It != Profiles.end())
+ return &It->second;
+ if (!FGUID.empty()) {
+ assert(useMD5() && "New name should only be generated for md5 profile");
+ CanonName = *MD5NameBuffer.insert(FGUID).first;
+ }
return &Profiles[CanonName];
}
@@ -429,7 +438,7 @@ public:
}
/// Return all the profiles.
- StringMap<FunctionSamples> &getProfiles() { return Profiles; }
+ SampleProfileMap &getProfiles() { return Profiles; }
/// Report a parse error message.
void reportError(int64_t LineNumber, const Twine &Msg) const {
@@ -495,7 +504,7 @@ protected:
/// The profile of every function executed at runtime is collected
/// in the structure FunctionSamples. This maps function objects
/// to their corresponding profiles.
- StringMap<FunctionSamples> Profiles;
+ SampleProfileMap Profiles;
/// LLVM context used to emit diagnostics.
LLVMContext &Ctx;
@@ -503,6 +512,10 @@ protected:
/// Memory buffer holding the profile file.
std::unique_ptr<MemoryBuffer> Buffer;
+ /// Extra name buffer holding names created on demand.
+ /// This should only be needed for md5 profiles.
+ std::unordered_set<std::string> MD5NameBuffer;
+
/// Profile summary information.
std::unique_ptr<ProfileSummary> Summary;
@@ -555,6 +568,11 @@ public:
/// Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);
+
+private:
+ /// CSNameTable is used to save full context vectors. This serves as an
+ /// underlying immutable buffer for all clients.
+ std::list<SampleContextFrameVector> CSNameTable;
};
class SampleProfileReaderBinary : public SampleProfileReader {
@@ -626,6 +644,7 @@ protected:
/// Read a string indirectly via the name table.
virtual ErrorOr<StringRef> readStringFromTable();
+ virtual ErrorOr<SampleContext> readSampleContextFromTable();
private:
std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
@@ -683,6 +702,7 @@ protected:
std::error_code readFuncProfiles();
std::error_code readMD5NameTable();
std::error_code readNameTableSec(bool IsMD5);
+ std::error_code readCSNameTableSec();
std::error_code readProfileSymbolList();
virtual std::error_code readHeader() override;
@@ -692,12 +712,19 @@ protected:
// placeholder for subclasses to dispatch their own section readers.
virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
virtual ErrorOr<StringRef> readStringFromTable() override;
+ virtual ErrorOr<SampleContext> readSampleContextFromTable() override;
+ ErrorOr<SampleContextFrames> readContextFromTable();
std::unique_ptr<ProfileSymbolList> ProfSymList;
- /// The table mapping from function name to the offset of its FunctionSample
- /// towards file start.
- DenseMap<StringRef, uint64_t> FuncOffsetTable;
+ /// The table mapping from function context to the offset of its
+ /// FunctionSample towards file start.
+ DenseMap<SampleContext, uint64_t> FuncOffsetTable;
+
+ /// Function offset mapping ordered by contexts.
+ std::unique_ptr<std::vector<std::pair<SampleContext, uint64_t>>>
+ OrderedFuncOffsets;
+
/// The set containing the functions to use when compiling a module.
DenseSet<StringRef> FuncsToUse;
@@ -716,10 +743,16 @@ protected:
/// the lifetime of MD5StringBuf is not shorter than that of NameTable.
std::unique_ptr<std::vector<std::string>> MD5StringBuf;
+ /// CSNameTable is used to save full context vectors. This serves as an
+ /// underlying immutable buffer for all clients.
+ std::unique_ptr<const std::vector<SampleContextFrameVector>> CSNameTable;
+
/// If SkipFlatProf is true, skip the sections with
/// SecFlagFlat flag.
bool SkipFlatProf = false;
+ bool FuncOffsetsOrdered = false;
+
public:
SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
LLVMContext &C, SampleProfileFormat Format)
@@ -753,6 +786,8 @@ private:
virtual std::error_code verifySPMagic(uint64_t Magic) override;
virtual std::error_code
readCustomSection(const SecHdrTableEntry &Entry) override {
+ // Update the data reader pointer to the end of the section.
+ Data = End;
return sampleprof_error::success;
};
diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index 107f7a730a3c..773beac24ebc 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -52,7 +52,7 @@ public:
/// Write all the sample profiles in the given map of samples.
///
/// \returns status code of the file update operation.
- virtual std::error_code write(const StringMap<FunctionSamples> &ProfileMap);
+ virtual std::error_code write(const SampleProfileMap &ProfileMap);
raw_ostream &getOutputStream() { return *OutputStream; }
@@ -78,12 +78,10 @@ protected:
: OutputStream(std::move(OS)) {}
/// Write a file header for the profile file.
- virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0;
+ virtual std::error_code writeHeader(const SampleProfileMap &ProfileMap) = 0;
// Write function profiles to the profile file.
- virtual std::error_code
- writeFuncProfiles(const StringMap<FunctionSamples> &ProfileMap);
+ virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap);
/// Output stream where to emit the profile to.
std::unique_ptr<raw_ostream> OutputStream;
@@ -92,7 +90,7 @@ protected:
std::unique_ptr<ProfileSummary> Summary;
/// Compute summary for this profile.
- void computeSummary(const StringMap<FunctionSamples> &ProfileMap);
+ void computeSummary(const SampleProfileMap &ProfileMap);
/// Profile format.
SampleProfileFormat Format = SPF_None;
@@ -107,8 +105,7 @@ protected:
SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS)
: SampleProfileWriter(OS), Indent(0) {}
- std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override {
+ std::error_code writeHeader(const SampleProfileMap &ProfileMap) override {
return sampleprof_error::success;
}
@@ -132,19 +129,22 @@ public:
virtual std::error_code writeSample(const FunctionSamples &S) override;
protected:
+ virtual MapVector<StringRef, uint32_t> &getNameTable() { return NameTable; }
virtual std::error_code writeMagicIdent(SampleProfileFormat Format);
virtual std::error_code writeNameTable();
virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeHeader(const SampleProfileMap &ProfileMap) override;
std::error_code writeSummary();
- std::error_code writeNameIdx(StringRef FName, bool IsContextName = false);
+ virtual std::error_code writeContextIdx(const SampleContext &Context);
+ std::error_code writeNameIdx(StringRef FName);
std::error_code writeBody(const FunctionSamples &S);
- inline void stablizeNameTable(std::set<StringRef> &V);
+ inline void stablizeNameTable(MapVector<StringRef, uint32_t> &NameTable,
+ std::set<StringRef> &V);
MapVector<StringRef, uint32_t> NameTable;
- std::unordered_set<std::string> BracketedContextStr;
- void addName(StringRef FName, bool IsContextName = false);
+ void addName(StringRef FName);
+ virtual void addContext(const SampleContext &Context);
void addNames(const FunctionSamples &S);
private:
@@ -168,6 +168,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
// DefaultLayout
SmallVector<SecHdrTableEntry, 8>({{SecProfSummary, 0, 0, 0, 0},
{SecNameTable, 0, 0, 0, 0},
+ {SecCSNameTable, 0, 0, 0, 0},
{SecFuncOffsetTable, 0, 0, 0, 0},
{SecLBRProfile, 0, 0, 0, 0},
{SecProfileSymbolList, 0, 0, 0, 0},
@@ -190,8 +191,7 @@ const std::array<SmallVector<SecHdrTableEntry, 8>, NumOfLayout>
class SampleProfileWriterExtBinaryBase : public SampleProfileWriterBinary {
using SampleProfileWriterBinary::SampleProfileWriterBinary;
public:
- virtual std::error_code
- write(const StringMap<FunctionSamples> &ProfileMap) override;
+ virtual std::error_code write(const SampleProfileMap &ProfileMap) override;
virtual void setToCompressAllSections() override;
void setToCompressSection(SecType Type);
@@ -246,29 +246,32 @@ protected:
addSecFlag(SectionHdrLayout[SectionIdx], Flag);
}
+ virtual void addContext(const SampleContext &Context) override;
+
// placeholder for subclasses to dispatch their own section writers.
virtual std::error_code writeCustomSection(SecType Type) = 0;
// Verify the SecLayout is supported by the format.
virtual void verifySecLayout(SectionLayout SL) = 0;
// specify the order to write sections.
- virtual std::error_code
- writeSections(const StringMap<FunctionSamples> &ProfileMap) = 0;
+ virtual std::error_code writeSections(const SampleProfileMap &ProfileMap) = 0;
// Dispatch section writer for each section. \p LayoutIdx is the sequence
// number indicating where the section is located in SectionHdrLayout.
- virtual std::error_code
- writeOneSection(SecType Type, uint32_t LayoutIdx,
- const StringMap<FunctionSamples> &ProfileMap);
+ virtual std::error_code writeOneSection(SecType Type, uint32_t LayoutIdx,
+ const SampleProfileMap &ProfileMap);
// Helper function to write name table.
virtual std::error_code writeNameTable() override;
+ virtual std::error_code
+ writeContextIdx(const SampleContext &Context) override;
+ std::error_code writeCSNameIdx(const SampleContext &Context);
+ std::error_code writeCSNameTableSection();
- std::error_code writeFuncMetadata(const StringMap<FunctionSamples> &Profiles);
+ std::error_code writeFuncMetadata(const SampleProfileMap &Profiles);
// Functions to write various kinds of sections.
- std::error_code
- writeNameTableSection(const StringMap<FunctionSamples> &ProfileMap);
+ std::error_code writeNameTableSection(const SampleProfileMap &ProfileMap);
std::error_code writeFuncOffsetTable();
std::error_code writeProfileSymbolListSection();
@@ -289,7 +292,7 @@ private:
void allocSecHdrTable();
std::error_code writeSecHdrTable();
virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeHeader(const SampleProfileMap &ProfileMap) override;
std::error_code compressAndOutput();
// We will swap the raw_ostream held by LocalBufStream and that
@@ -312,12 +315,16 @@ private:
// be read.
std::vector<SecHdrTableEntry> SecHdrTable;
- // FuncOffsetTable maps function name to its profile offset in SecLBRProfile
- // section. It is used to load function profile on demand.
- MapVector<StringRef, uint64_t> FuncOffsetTable;
+ // FuncOffsetTable maps function context to its profile offset in
+ // SecLBRProfile section. It is used to load function profile on demand.
+ MapVector<SampleContext, uint64_t> FuncOffsetTable;
// Whether to use MD5 to represent string.
bool UseMD5 = false;
+ /// CSNameTable maps function context to its offset in SecCSNameTable section.
+ /// The offset will be used everywhere where the context is referenced.
+ MapVector<SampleContext, uint32_t> CSNameTable;
+
ProfileSymbolList *ProfSymList = nullptr;
};
@@ -327,13 +334,11 @@ public:
: SampleProfileWriterExtBinaryBase(OS) {}
private:
- std::error_code
- writeDefaultLayout(const StringMap<FunctionSamples> &ProfileMap);
- std::error_code
- writeCtxSplitLayout(const StringMap<FunctionSamples> &ProfileMap);
+ std::error_code writeDefaultLayout(const SampleProfileMap &ProfileMap);
+ std::error_code writeCtxSplitLayout(const SampleProfileMap &ProfileMap);
virtual std::error_code
- writeSections(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeSections(const SampleProfileMap &ProfileMap) override;
virtual std::error_code writeCustomSection(SecType Type) override {
return sampleprof_error::success;
@@ -380,8 +385,7 @@ class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary {
public:
virtual std::error_code writeSample(const FunctionSamples &S) override;
- virtual std::error_code
- write(const StringMap<FunctionSamples> &ProfileMap) override;
+ virtual std::error_code write(const SampleProfileMap &ProfileMap) override;
protected:
/// The table mapping from function name to the offset of its FunctionSample
@@ -392,7 +396,7 @@ protected:
uint64_t TableOffset;
virtual std::error_code writeNameTable() override;
virtual std::error_code
- writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+ writeHeader(const SampleProfileMap &ProfileMap) override;
std::error_code writeFuncOffsetTable();
};
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.def b/llvm/include/llvm/Support/AArch64TargetParser.def
index ae2fc673c54e..b3cfb71601f1 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -58,6 +58,24 @@ AARCH64_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
AArch64::AEK_SM4 | AArch64::AEK_SHA3 | AArch64::AEK_BF16 |
AArch64::AEK_SHA2 | AArch64::AEK_AES | AArch64::AEK_I8MM))
+AARCH64_ARCH("armv9-a", ARMV9A, "9-A", "v9a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SVE2))
+AARCH64_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SVE2))
+AARCH64_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (AArch64::AEK_CRC | AArch64::AEK_FP |
+ AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE |
+ AArch64::AEK_RDM | AArch64::AEK_RCPC | AArch64::AEK_DOTPROD |
+ AArch64::AEK_SVE2))
// For v8-R, we do not enable crypto and align with GCC that enables a more
// minimal set of optional architecture extensions.
AARCH64_ARCH("armv8-r", ARMV8R, "8-R", "v8r",
@@ -126,6 +144,11 @@ AARCH64_CPU_NAME("cortex-a53", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a55", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC))
+AARCH64_CPU_NAME("cortex-a510", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
+ AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+ AArch64::AEK_FP16FML))
AARCH64_CPU_NAME("cortex-a57", ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_CRC))
AARCH64_CPU_NAME("cortex-a65", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
@@ -155,11 +178,20 @@ AARCH64_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
AARCH64_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM |
+ AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML |
+ AArch64::AEK_SVE2BITPERM | AArch64::AEK_BF16))
AARCH64_CPU_NAME("cortex-r82", ARMV8R, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_LSE))
AARCH64_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_FP16 | AArch64::AEK_DOTPROD | AArch64::AEK_RCPC |
AArch64::AEK_SSBS))
+AARCH64_CPU_NAME("cortex-x2", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
+ AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
+ AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
+ AArch64::AEK_FP16FML))
AARCH64_CPU_NAME("neoverse-e1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_DOTPROD | AArch64::AEK_FP16 | AArch64::AEK_RAS |
AArch64::AEK_RCPC | AArch64::AEK_SSBS))
@@ -172,6 +204,10 @@ AARCH64_CPU_NAME("neoverse-n2", ARMV8_5A, FK_CRYPTO_NEON_FP_ARMV8, false,
AArch64::AEK_I8MM | AArch64::AEK_MTE | AArch64::AEK_RAS |
AArch64::AEK_RCPC | AArch64::AEK_SB | AArch64::AEK_SSBS |
AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM))
+AARCH64_CPU_NAME("neoverse-512tvb", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
+ (AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
+ AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
+ AArch64::AEK_DOTPROD ))
AARCH64_CPU_NAME("neoverse-v1", ARMV8_4A, FK_CRYPTO_NEON_FP_ARMV8, false,
(AArch64::AEK_RAS | AArch64::AEK_SVE | AArch64::AEK_SSBS |
AArch64::AEK_RCPC | AArch64::AEK_FP16 | AArch64::AEK_BF16 |
diff --git a/llvm/include/llvm/Support/ARMTargetParser.def b/llvm/include/llvm/Support/ARMTargetParser.def
index 14b169a6e111..fd08f3e6960c 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/llvm/include/llvm/Support/ARMTargetParser.def
@@ -122,6 +122,21 @@ ARM_ARCH("armv8.7-a", ARMV8_7A, "8.7-A", "v8.7a",
(ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
+ARM_ARCH("armv9-a", ARMV9A, "9-A", "v9a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD))
+ARM_ARCH("armv9.1-a", ARMV9_1A, "9.1-A", "v9.1a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
+ARM_ARCH("armv9.2-a", ARMV9_2A, "9.2-A", "v9.2a",
+ ARMBuildAttrs::CPUArch::v8_A, FK_NEON_FP_ARMV8,
+ (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
+ ARM::AEK_HWDIVTHUMB | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS |
+ ARM::AEK_DOTPROD | ARM::AEK_BF16 | ARM::AEK_I8MM))
ARM_ARCH("armv8-r", ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
FK_NEON_FP_ARMV8,
(ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIVTHUMB |
@@ -296,6 +311,9 @@ ARM_CPU_NAME("cortex-a78", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("cortex-a78c", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
ARM::AEK_FP16 | ARM::AEK_DOTPROD)
+ARM_CPU_NAME("cortex-a710", ARMV9A, FK_NEON_FP_ARMV8, false,
+ (ARM::AEK_DOTPROD | ARM::AEK_FP16FML | ARM::AEK_BF16 | ARM::AEK_SB |
+ ARM::AEK_I8MM))
ARM_CPU_NAME("cortex-x1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
(ARM::AEK_FP16 | ARM::AEK_DOTPROD))
ARM_CPU_NAME("neoverse-n1", ARMV8_2A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index 245432debce6..9e8ce4e36197 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -277,7 +277,7 @@ public:
size_t TotalMemory = 0;
for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I)
TotalMemory += computeSlabSize(std::distance(Slabs.begin(), I));
- for (auto &PtrAndSize : CustomSizedSlabs)
+ for (const auto &PtrAndSize : CustomSizedSlabs)
TotalMemory += PtrAndSize.second;
return TotalMemory;
}
diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h
index 27ca825cef46..1a0d108300bc 100644
--- a/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/llvm/include/llvm/Support/AtomicOrdering.h
@@ -133,6 +133,16 @@ inline bool isReleaseOrStronger(AtomicOrdering AO) {
return isAtLeastOrStrongerThan(AO, AtomicOrdering::Release);
}
+/// Return a single atomic ordering that is at least as strong as both the \p AO
+/// and \p Other orderings for an atomic operation.
+inline AtomicOrdering getMergedAtomicOrdering(AtomicOrdering AO,
+ AtomicOrdering Other) {
+ if ((AO == AtomicOrdering::Acquire && Other == AtomicOrdering::Release) ||
+ (AO == AtomicOrdering::Release && Other == AtomicOrdering::Acquire))
+ return AtomicOrdering::AcquireRelease;
+ return isStrongerThan(AO, Other) ? AO : Other;
+}
+
inline AtomicOrderingCABI toCABI(AtomicOrdering AO) {
static const AtomicOrderingCABI lookup[8] = {
/* NotAtomic */ AtomicOrderingCABI::relaxed,
diff --git a/llvm/include/llvm/Support/BinaryByteStream.h b/llvm/include/llvm/Support/BinaryByteStream.h
index ca5bb5abecfc..7d8b6d2dc43d 100644
--- a/llvm/include/llvm/Support/BinaryByteStream.h
+++ b/llvm/include/llvm/Support/BinaryByteStream.h
@@ -38,7 +38,7 @@ public:
llvm::support::endianness getEndian() const override { return Endian; }
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForRead(Offset, Size))
return EC;
@@ -46,7 +46,7 @@ public:
return Error::success();
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForRead(Offset, 1))
return EC;
@@ -54,7 +54,7 @@ public:
return Error::success();
}
- uint32_t getLength() override { return Data.size(); }
+ uint64_t getLength() override { return Data.size(); }
ArrayRef<uint8_t> data() const { return Data; }
@@ -97,19 +97,19 @@ public:
return ImmutableStream.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return ImmutableStream.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return ImmutableStream.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return ImmutableStream.getLength(); }
+ uint64_t getLength() override { return ImmutableStream.getLength(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override {
if (Buffer.empty())
return Error::success();
@@ -145,7 +145,7 @@ public:
llvm::support::endianness getEndian() const override { return Endian; }
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForWrite(Offset, Buffer.size()))
return EC;
@@ -154,11 +154,11 @@ public:
return Error::success();
}
- void insert(uint32_t Offset, ArrayRef<uint8_t> Bytes) {
+ void insert(uint64_t Offset, ArrayRef<uint8_t> Bytes) {
Data.insert(Data.begin() + Offset, Bytes.begin(), Bytes.end());
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
if (auto EC = checkOffsetForWrite(Offset, 1))
return EC;
@@ -167,9 +167,9 @@ public:
return Error::success();
}
- uint32_t getLength() override { return Data.size(); }
+ uint64_t getLength() override { return Data.size(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Buffer) override {
if (Buffer.empty())
return Error::success();
@@ -182,7 +182,7 @@ public:
if (Offset > getLength())
return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
- uint32_t RequiredSize = Offset + Buffer.size();
+ uint64_t RequiredSize = Offset + Buffer.size();
if (RequiredSize > Data.size())
Data.resize(RequiredSize);
@@ -240,19 +240,19 @@ public:
return Impl.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return Impl.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return Impl.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return Impl.getLength(); }
+ uint64_t getLength() override { return Impl.getLength(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) override {
return Impl.writeBytes(Offset, Data);
}
diff --git a/llvm/include/llvm/Support/BinaryItemStream.h b/llvm/include/llvm/Support/BinaryItemStream.h
index 4d27013ce368..eb512bf4721a 100644
--- a/llvm/include/llvm/Support/BinaryItemStream.h
+++ b/llvm/include/llvm/Support/BinaryItemStream.h
@@ -38,7 +38,7 @@ public:
llvm::support::endianness getEndian() const override { return Endian; }
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
auto ExpectedIndex = translateOffsetIndex(Offset);
if (!ExpectedIndex)
@@ -52,7 +52,7 @@ public:
return Error::success();
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
auto ExpectedIndex = translateOffsetIndex(Offset);
if (!ExpectedIndex)
@@ -66,7 +66,7 @@ public:
computeItemOffsets();
}
- uint32_t getLength() override {
+ uint64_t getLength() override {
return ItemEndOffsets.empty() ? 0 : ItemEndOffsets.back();
}
@@ -74,16 +74,16 @@ private:
void computeItemOffsets() {
ItemEndOffsets.clear();
ItemEndOffsets.reserve(Items.size());
- uint32_t CurrentOffset = 0;
+ uint64_t CurrentOffset = 0;
for (const auto &Item : Items) {
- uint32_t Len = Traits::length(Item);
+ uint64_t Len = Traits::length(Item);
assert(Len > 0 && "no empty items");
CurrentOffset += Len;
ItemEndOffsets.push_back(CurrentOffset);
}
}
- Expected<uint32_t> translateOffsetIndex(uint32_t Offset) {
+ Expected<uint32_t> translateOffsetIndex(uint64_t Offset) {
// Make sure the offset is somewhere in our items array.
if (Offset >= getLength())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
@@ -98,7 +98,7 @@ private:
ArrayRef<T> Items;
// Sorted vector of offsets to accelerate lookup.
- std::vector<uint32_t> ItemEndOffsets;
+ std::vector<uint64_t> ItemEndOffsets;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/BinaryStream.h b/llvm/include/llvm/Support/BinaryStream.h
index fcf4398550ee..e87129d8c201 100644
--- a/llvm/include/llvm/Support/BinaryStream.h
+++ b/llvm/include/llvm/Support/BinaryStream.h
@@ -41,22 +41,22 @@ public:
/// Given an offset into the stream and a number of bytes, attempt to
/// read the bytes and set the output ArrayRef to point to data owned by the
/// stream.
- virtual Error readBytes(uint32_t Offset, uint32_t Size,
+ virtual Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) = 0;
/// Given an offset into the stream, read as much as possible without
/// copying any data.
- virtual Error readLongestContiguousChunk(uint32_t Offset,
+ virtual Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) = 0;
/// Return the number of bytes of data in this stream.
- virtual uint32_t getLength() = 0;
+ virtual uint64_t getLength() = 0;
/// Return the properties of this stream.
virtual BinaryStreamFlags getFlags() const { return BSF_None; }
protected:
- Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) {
+ Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) {
if (Offset > getLength())
return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
if (getLength() < DataSize + Offset)
@@ -77,7 +77,7 @@ public:
/// Attempt to write the given bytes into the stream at the desired
/// offset. This will always necessitate a copy. Cannot shrink or grow the
/// stream, only writes into existing allocated space.
- virtual Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) = 0;
+ virtual Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) = 0;
/// For buffered streams, commits changes to the backing store.
virtual Error commit() = 0;
@@ -86,7 +86,7 @@ public:
BinaryStreamFlags getFlags() const override { return BSF_Write; }
protected:
- Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) {
+ Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) {
if (!(getFlags() & BSF_Append))
return checkOffsetForRead(Offset, DataSize);
diff --git a/llvm/include/llvm/Support/BinaryStreamArray.h b/llvm/include/llvm/Support/BinaryStreamArray.h
index 148ab85169f2..85d29be26ca9 100644
--- a/llvm/include/llvm/Support/BinaryStreamArray.h
+++ b/llvm/include/llvm/Support/BinaryStreamArray.h
@@ -153,7 +153,7 @@ private:
template <typename ValueType, typename Extractor>
class VarStreamArrayIterator
: public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
- std::forward_iterator_tag, ValueType> {
+ std::forward_iterator_tag, const ValueType> {
typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
typedef VarStreamArray<ValueType, Extractor> ArrayType;
@@ -197,11 +197,6 @@ public:
return ThisValue;
}
- ValueType &operator*() {
- assert(Array && !HasError);
- return ThisValue;
- }
-
IterType &operator+=(unsigned N) {
for (unsigned I = 0; I < N; ++I) {
// We are done with the current record, discard it so that we are
diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h
index 9ad98a89aaf9..29b4b09b848c 100644
--- a/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -251,16 +251,16 @@ public:
}
bool empty() const { return bytesRemaining() == 0; }
- void setOffset(uint32_t Off) { Offset = Off; }
- uint32_t getOffset() const { return Offset; }
- uint32_t getLength() const { return Stream.getLength(); }
- uint32_t bytesRemaining() const { return getLength() - getOffset(); }
+ void setOffset(uint64_t Off) { Offset = Off; }
+ uint64_t getOffset() const { return Offset; }
+ uint64_t getLength() const { return Stream.getLength(); }
+ uint64_t bytesRemaining() const { return getLength() - getOffset(); }
/// Advance the stream's offset by \p Amount bytes.
///
/// \returns a success error code if at least \p Amount bytes remain in the
/// stream, otherwise returns an appropriate error code.
- Error skip(uint32_t Amount);
+ Error skip(uint64_t Amount);
/// Examine the next byte of the underlying stream without advancing the
/// stream's offset. If the stream is empty the behavior is undefined.
@@ -271,11 +271,11 @@ public:
Error padToAlignment(uint32_t Align);
std::pair<BinaryStreamReader, BinaryStreamReader>
- split(uint32_t Offset) const;
+ split(uint64_t Offset) const;
private:
BinaryStreamRef Stream;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h
index ba4c3873586d..e0aaab82ffab 100644
--- a/llvm/include/llvm/Support/BinaryStreamRef.h
+++ b/llvm/include/llvm/Support/BinaryStreamRef.h
@@ -30,12 +30,12 @@ protected:
Length = BorrowedImpl.getLength();
}
- BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint32_t Offset,
- Optional<uint32_t> Length)
+ BinaryStreamRefBase(std::shared_ptr<StreamType> SharedImpl, uint64_t Offset,
+ Optional<uint64_t> Length)
: SharedImpl(SharedImpl), BorrowedImpl(SharedImpl.get()),
ViewOffset(Offset), Length(Length) {}
- BinaryStreamRefBase(StreamType &BorrowedImpl, uint32_t Offset,
- Optional<uint32_t> Length)
+ BinaryStreamRefBase(StreamType &BorrowedImpl, uint64_t Offset,
+ Optional<uint64_t> Length)
: BorrowedImpl(&BorrowedImpl), ViewOffset(Offset), Length(Length) {}
BinaryStreamRefBase(const BinaryStreamRefBase &Other) = default;
BinaryStreamRefBase &operator=(const BinaryStreamRefBase &Other) = default;
@@ -48,7 +48,7 @@ public:
return BorrowedImpl->getEndian();
}
- uint32_t getLength() const {
+ uint64_t getLength() const {
if (Length.hasValue())
return *Length;
@@ -58,7 +58,7 @@ public:
/// Return a new BinaryStreamRef with the first \p N elements removed. If
/// this BinaryStreamRef is length-tracking, then the resulting one will be
/// too.
- RefType drop_front(uint32_t N) const {
+ RefType drop_front(uint64_t N) const {
if (!BorrowedImpl)
return RefType();
@@ -76,7 +76,7 @@ public:
/// Return a new BinaryStreamRef with the last \p N elements removed. If
/// this BinaryStreamRef is length-tracking and \p N is greater than 0, then
/// this BinaryStreamRef will no longer length-track.
- RefType drop_back(uint32_t N) const {
+ RefType drop_back(uint64_t N) const {
if (!BorrowedImpl)
return RefType();
@@ -96,26 +96,26 @@ public:
}
/// Return a new BinaryStreamRef with only the first \p N elements remaining.
- RefType keep_front(uint32_t N) const {
+ RefType keep_front(uint64_t N) const {
assert(N <= getLength());
return drop_back(getLength() - N);
}
/// Return a new BinaryStreamRef with only the last \p N elements remaining.
- RefType keep_back(uint32_t N) const {
+ RefType keep_back(uint64_t N) const {
assert(N <= getLength());
return drop_front(getLength() - N);
}
/// Return a new BinaryStreamRef with the first and last \p N elements
/// removed.
- RefType drop_symmetric(uint32_t N) const {
+ RefType drop_symmetric(uint64_t N) const {
return drop_front(N).drop_back(N);
}
/// Return a new BinaryStreamRef with the first \p Offset elements removed,
/// and retaining exactly \p Len elements.
- RefType slice(uint32_t Offset, uint32_t Len) const {
+ RefType slice(uint64_t Offset, uint64_t Len) const {
return drop_front(Offset).keep_front(Len);
}
@@ -132,7 +132,7 @@ public:
}
protected:
- Error checkOffsetForRead(uint32_t Offset, uint32_t DataSize) const {
+ Error checkOffsetForRead(uint64_t Offset, uint64_t DataSize) const {
if (Offset > getLength())
return make_error<BinaryStreamError>(stream_error_code::invalid_offset);
if (getLength() < DataSize + Offset)
@@ -142,8 +142,8 @@ protected:
std::shared_ptr<StreamType> SharedImpl;
StreamType *BorrowedImpl = nullptr;
- uint32_t ViewOffset = 0;
- Optional<uint32_t> Length;
+ uint64_t ViewOffset = 0;
+ Optional<uint64_t> Length;
};
/// BinaryStreamRef is to BinaryStream what ArrayRef is to an Array. It
@@ -157,15 +157,15 @@ class BinaryStreamRef
: public BinaryStreamRefBase<BinaryStreamRef, BinaryStream> {
friend BinaryStreamRefBase<BinaryStreamRef, BinaryStream>;
friend class WritableBinaryStreamRef;
- BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint32_t ViewOffset,
- Optional<uint32_t> Length)
+ BinaryStreamRef(std::shared_ptr<BinaryStream> Impl, uint64_t ViewOffset,
+ Optional<uint64_t> Length)
: BinaryStreamRefBase(Impl, ViewOffset, Length) {}
public:
BinaryStreamRef() = default;
BinaryStreamRef(BinaryStream &Stream);
- BinaryStreamRef(BinaryStream &Stream, uint32_t Offset,
- Optional<uint32_t> Length);
+ BinaryStreamRef(BinaryStream &Stream, uint64_t Offset,
+ Optional<uint64_t> Length);
explicit BinaryStreamRef(ArrayRef<uint8_t> Data,
llvm::support::endianness Endian);
explicit BinaryStreamRef(StringRef Data, llvm::support::endianness Endian);
@@ -176,8 +176,8 @@ public:
BinaryStreamRef &operator=(BinaryStreamRef &&Other) = default;
// Use BinaryStreamRef.slice() instead.
- BinaryStreamRef(BinaryStreamRef &S, uint32_t Offset,
- uint32_t Length) = delete;
+ BinaryStreamRef(BinaryStreamRef &S, uint64_t Offset,
+ uint64_t Length) = delete;
/// Given an Offset into this StreamRef and a Size, return a reference to a
/// buffer owned by the stream.
@@ -185,7 +185,7 @@ public:
/// \returns a success error code if the entire range of data is within the
/// bounds of this BinaryStreamRef's view and the implementation could read
/// the data, and an appropriate error code otherwise.
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) const;
/// Given an Offset into this BinaryStreamRef, return a reference to the
@@ -193,29 +193,28 @@ public:
///
/// \returns a success error code if implementation could read the data,
/// and an appropriate error code otherwise.
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) const;
};
struct BinarySubstreamRef {
- uint32_t Offset = 0; // Offset in the parent stream
+ uint64_t Offset = 0; // Offset in the parent stream
BinaryStreamRef StreamData; // Stream Data
- BinarySubstreamRef slice(uint32_t Off, uint32_t Size) const {
+ BinarySubstreamRef slice(uint64_t Off, uint64_t Size) const {
BinaryStreamRef SubSub = StreamData.slice(Off, Size);
return {Off + Offset, SubSub};
}
- BinarySubstreamRef drop_front(uint32_t N) const {
+ BinarySubstreamRef drop_front(uint64_t N) const {
return slice(N, size() - N);
}
- BinarySubstreamRef keep_front(uint32_t N) const { return slice(0, N); }
+ BinarySubstreamRef keep_front(uint64_t N) const { return slice(0, N); }
- std::pair<BinarySubstreamRef, BinarySubstreamRef>
- split(uint32_t Off) const {
+ std::pair<BinarySubstreamRef, BinarySubstreamRef> split(uint64_t Off) const {
return std::make_pair(keep_front(Off), drop_front(Off));
}
- uint32_t size() const { return StreamData.getLength(); }
+ uint64_t size() const { return StreamData.getLength(); }
bool empty() const { return size() == 0; }
};
@@ -224,10 +223,10 @@ class WritableBinaryStreamRef
WritableBinaryStream> {
friend BinaryStreamRefBase<WritableBinaryStreamRef, WritableBinaryStream>;
WritableBinaryStreamRef(std::shared_ptr<WritableBinaryStream> Impl,
- uint32_t ViewOffset, Optional<uint32_t> Length)
+ uint64_t ViewOffset, Optional<uint64_t> Length)
: BinaryStreamRefBase(Impl, ViewOffset, Length) {}
- Error checkOffsetForWrite(uint32_t Offset, uint32_t DataSize) const {
+ Error checkOffsetForWrite(uint64_t Offset, uint64_t DataSize) const {
if (!(BorrowedImpl->getFlags() & BSF_Append))
return checkOffsetForRead(Offset, DataSize);
@@ -239,8 +238,8 @@ class WritableBinaryStreamRef
public:
WritableBinaryStreamRef() = default;
WritableBinaryStreamRef(WritableBinaryStream &Stream);
- WritableBinaryStreamRef(WritableBinaryStream &Stream, uint32_t Offset,
- Optional<uint32_t> Length);
+ WritableBinaryStreamRef(WritableBinaryStream &Stream, uint64_t Offset,
+ Optional<uint64_t> Length);
explicit WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
llvm::support::endianness Endian);
WritableBinaryStreamRef(const WritableBinaryStreamRef &Other) = default;
@@ -251,8 +250,8 @@ public:
WritableBinaryStreamRef &operator=(WritableBinaryStreamRef &&Other) = default;
// Use WritableBinaryStreamRef.slice() instead.
- WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint32_t Offset,
- uint32_t Length) = delete;
+ WritableBinaryStreamRef(WritableBinaryStreamRef &S, uint64_t Offset,
+ uint64_t Length) = delete;
/// Given an Offset into this WritableBinaryStreamRef and some input data,
/// writes the data to the underlying stream.
@@ -260,7 +259,7 @@ public:
/// \returns a success error code if the data could fit within the underlying
/// stream at the specified location and the implementation could write the
/// data, and an appropriate error code otherwise.
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const;
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) const;
/// Conver this WritableBinaryStreamRef to a read-only BinaryStreamRef.
operator BinaryStreamRef() const;
diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h
index ceba792e6b26..3054f4ac7ef0 100644
--- a/llvm/include/llvm/Support/BinaryStreamWriter.h
+++ b/llvm/include/llvm/Support/BinaryStreamWriter.h
@@ -124,7 +124,7 @@ public:
///
/// \returns a success error code if the data was successfully written,
/// otherwise returns an appropriate error code.
- Error writeStreamRef(BinaryStreamRef Ref, uint32_t Size);
+ Error writeStreamRef(BinaryStreamRef Ref, uint64_t Size);
/// Writes the object \p Obj to the underlying stream, as if by using memcpy.
/// It is up to the caller to ensure that type of \p Obj can be safely copied
@@ -178,17 +178,17 @@ public:
}
/// Splits the Writer into two Writers at a given offset.
- std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint32_t Off) const;
+ std::pair<BinaryStreamWriter, BinaryStreamWriter> split(uint64_t Off) const;
- void setOffset(uint32_t Off) { Offset = Off; }
- uint32_t getOffset() const { return Offset; }
- uint32_t getLength() const { return Stream.getLength(); }
- uint32_t bytesRemaining() const { return getLength() - getOffset(); }
+ void setOffset(uint64_t Off) { Offset = Off; }
+ uint64_t getOffset() const { return Offset; }
+ uint64_t getLength() const { return Stream.getLength(); }
+ uint64_t bytesRemaining() const { return getLength() - getOffset(); }
Error padToAlignment(uint32_t Align);
protected:
WritableBinaryStreamRef Stream;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
new file mode 100644
index 000000000000..1e5fea17f708
--- /dev/null
+++ b/llvm/include/llvm/Support/Caching.h
@@ -0,0 +1,71 @@
+//===- Caching.h - LLVM Local File Cache ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CachedFileStream and the localCache function, which
+// simplifies caching files on the local filesystem in a directory whose
+// contents are managed by a CachePruningPolicy.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CACHING_H
+#define LLVM_SUPPORT_CACHING_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+/// This class wraps an output stream for a file. Most clients should just be
+/// able to return an instance of this base class from the stream callback, but
+/// if a client needs to perform some action after the stream is written to,
+/// that can be done by deriving from this class and overriding the destructor.
+class CachedFileStream {
+public:
+ CachedFileStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
+ std::unique_ptr<raw_pwrite_stream> OS;
+ virtual ~CachedFileStream() = default;
+};
+
+/// This type defines the callback to add a file that is generated on the fly.
+///
+/// Stream callbacks must be thread safe.
+using AddStreamFn =
+ std::function<Expected<std::unique_ptr<CachedFileStream>>(unsigned Task)>;
+
+/// This is the type of a file cache. To request an item from the cache, pass a
+/// unique string as the Key. For hits, the cached file will be added to the
+/// link and this function will return AddStreamFn(). For misses, the cache will
+/// return a stream callback which must be called at most once to produce
+/// content for the stream. The file stream produced by the stream callback will
+/// add the file to the link after the stream is written to.
+///
+/// Clients generally look like this:
+///
+/// if (AddStreamFn AddStream = Cache(Task, Key))
+/// ProduceContent(AddStream);
+using FileCache =
+ std::function<Expected<AddStreamFn>(unsigned Task, StringRef Key)>;
+
+/// This type defines the callback to add a pre-existing file (e.g. in a cache).
+///
+/// Buffer callbacks must be thread safe.
+using AddBufferFn =
+ std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
+
+/// Create a local file system cache which uses the given cache name, temporary
+/// file prefix, cache directory and file callback. This function also creates
+/// the cache directory if it does not already exist. The cache name appears in
+/// error messages for errors during caching. The temporary file prefix is used
+/// in the temporary file naming scheme used when writing files atomically.
+Expected<FileCache> localCache(Twine CacheNameRef, Twine TempFilePrefixRef,
+ Twine CacheDirectoryPathRef,
+ AddBufferFn AddBuffer);
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 14d7e21f78b2..2ee02010ff1d 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -316,9 +316,7 @@ public:
}
bool isInAllSubCommands() const {
- return any_of(Subs, [](const SubCommand *SC) {
- return SC == &*AllSubCommands;
- });
+ return llvm::is_contained(Subs, &*AllSubCommands);
}
//-------------------------------------------------------------------------===
@@ -926,6 +924,9 @@ public:
//--------------------------------------------------
// parser<bool>
//
+
+extern template class basic_parser<bool>;
+
template <> class parser<bool> : public basic_parser<bool> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -949,10 +950,11 @@ public:
void anchor() override;
};
-extern template class basic_parser<bool>;
-
//--------------------------------------------------
// parser<boolOrDefault>
+
+extern template class basic_parser<boolOrDefault>;
+
template <> class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -974,11 +976,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<boolOrDefault>;
-
//--------------------------------------------------
// parser<int>
//
+
+extern template class basic_parser<int>;
+
template <> class parser<int> : public basic_parser<int> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -996,11 +999,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<int>;
-
//--------------------------------------------------
// parser<long>
//
+
+extern template class basic_parser<long>;
+
template <> class parser<long> final : public basic_parser<long> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1018,11 +1022,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<long>;
-
//--------------------------------------------------
// parser<long long>
//
+
+extern template class basic_parser<long long>;
+
template <> class parser<long long> : public basic_parser<long long> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1040,11 +1045,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<long long>;
-
//--------------------------------------------------
// parser<unsigned>
//
+
+extern template class basic_parser<unsigned>;
+
template <> class parser<unsigned> : public basic_parser<unsigned> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1062,11 +1068,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<unsigned>;
-
//--------------------------------------------------
// parser<unsigned long>
//
+
+extern template class basic_parser<unsigned long>;
+
template <>
class parser<unsigned long> final : public basic_parser<unsigned long> {
public:
@@ -1085,11 +1092,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<unsigned long>;
-
//--------------------------------------------------
// parser<unsigned long long>
//
+
+extern template class basic_parser<unsigned long long>;
+
template <>
class parser<unsigned long long> : public basic_parser<unsigned long long> {
public:
@@ -1109,11 +1117,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<unsigned long long>;
-
//--------------------------------------------------
// parser<double>
//
+
+extern template class basic_parser<double>;
+
template <> class parser<double> : public basic_parser<double> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1131,11 +1140,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<double>;
-
//--------------------------------------------------
// parser<float>
//
+
+extern template class basic_parser<float>;
+
template <> class parser<float> : public basic_parser<float> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1153,11 +1163,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<float>;
-
//--------------------------------------------------
// parser<std::string>
//
+
+extern template class basic_parser<std::string>;
+
template <> class parser<std::string> : public basic_parser<std::string> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1178,11 +1189,12 @@ public:
void anchor() override;
};
-extern template class basic_parser<std::string>;
-
//--------------------------------------------------
// parser<char>
//
+
+extern template class basic_parser<char>;
+
template <> class parser<char> : public basic_parser<char> {
public:
parser(Option &O) : basic_parser(O) {}
@@ -1203,8 +1215,6 @@ public:
void anchor() override;
};
-extern template class basic_parser<char>;
-
//--------------------------------------------------
// PrintOptionDiff
//
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index 57052b596edb..c5318137ed3d 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -97,7 +97,7 @@
/// Sadly, this is separate from just rvalue reference support because GCC
/// and MSVC implemented this later than everything else. This appears to be
/// corrected in MSVC 2019 but not MSVC 2017.
-#if __has_feature(cxx_rvalue_references) || LLVM_GNUC_PREREQ(4, 8, 1) || \
+#if __has_feature(cxx_rvalue_references) || defined(__GNUC__) || \
LLVM_MSC_PREREQ(1920)
#define LLVM_HAS_RVALUE_REFERENCE_THIS 1
#else
@@ -123,8 +123,8 @@
/// LLVM_EXTERNAL_VISIBILITY - classes, functions, and variables marked with
/// this attribute will be made public and visible outside of any shared library
/// they are linked in to.
-#if (__has_attribute(visibility) || LLVM_GNUC_PREREQ(4, 0, 0)) && \
- !defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32)
+#if __has_attribute(visibility) && !defined(__MINGW32__) && \
+ !defined(__CYGWIN__) && !defined(_WIN32)
#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
#define LLVM_EXTERNAL_VISIBILITY __attribute__ ((visibility("default")))
#else
@@ -138,7 +138,7 @@
#define LLVM_PREFETCH(addr, rw, locality)
#endif
-#if __has_attribute(used) || LLVM_GNUC_PREREQ(3, 1, 0)
+#if __has_attribute(used)
#define LLVM_ATTRIBUTE_USED __attribute__((__used__))
#else
#define LLVM_ATTRIBUTE_USED
@@ -182,15 +182,15 @@
// more portable solution:
// (void)unused_var_name;
// Prefer cast-to-void wherever it is sufficient.
-#if __has_attribute(unused) || LLVM_GNUC_PREREQ(3, 1, 0)
+#if __has_attribute(unused)
#define LLVM_ATTRIBUTE_UNUSED __attribute__((__unused__))
#else
#define LLVM_ATTRIBUTE_UNUSED
#endif
// FIXME: Provide this for PE/COFF targets.
-#if (__has_attribute(weak) || LLVM_GNUC_PREREQ(4, 0, 0)) && \
- (!defined(__MINGW32__) && !defined(__CYGWIN__) && !defined(_WIN32))
+#if __has_attribute(weak) && !defined(__MINGW32__) && !defined(__CYGWIN__) && \
+ !defined(_WIN32)
#define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__))
#else
#define LLVM_ATTRIBUTE_WEAK
@@ -212,7 +212,13 @@
#define LLVM_READONLY
#endif
-#if __has_builtin(__builtin_expect) || LLVM_GNUC_PREREQ(4, 0, 0)
+#if __has_attribute(minsize)
+#define LLVM_ATTRIBUTE_MINSIZE __attribute__((minsize))
+#else
+#define LLVM_ATTRIBUTE_MINSIZE
+#endif
+
+#if __has_builtin(__builtin_expect) || defined(__GNUC__)
#define LLVM_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
#define LLVM_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
#else
@@ -222,7 +228,7 @@
/// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
/// mark a method "not for inlining".
-#if __has_attribute(noinline) || LLVM_GNUC_PREREQ(3, 4, 0)
+#if __has_attribute(noinline)
#define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline)
@@ -231,10 +237,8 @@
#endif
/// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
-/// so, mark a method "always inline" because it is performance sensitive. GCC
-/// 3.4 supported this but is buggy in various cases and produces unimplemented
-/// errors, just use it in GCC 4.0 and later.
-#if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0)
+/// so, mark a method "always inline" because it is performance sensitive.
+#if __has_attribute(always_inline)
#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
@@ -242,15 +246,16 @@
#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline
#endif
-#ifdef __GNUC__
-#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn))
-#elif defined(_MSC_VER)
-#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn)
+/// LLVM_ATTRIBUTE_NO_DEBUG - On compilers where we have a directive to do
+/// so, mark a method "no debug" because debug info makes the debugger
+/// experience worse.
+#if __has_attribute(nodebug)
+#define LLVM_ATTRIBUTE_NODEBUG __attribute__((nodebug))
#else
-#define LLVM_ATTRIBUTE_NORETURN
+#define LLVM_ATTRIBUTE_NODEBUG
#endif
-#if __has_attribute(returns_nonnull) || LLVM_GNUC_PREREQ(4, 9, 0)
+#if __has_attribute(returns_nonnull)
#define LLVM_ATTRIBUTE_RETURNS_NONNULL __attribute__((returns_nonnull))
#elif defined(_MSC_VER)
#define LLVM_ATTRIBUTE_RETURNS_NONNULL _Ret_notnull_
@@ -322,15 +327,17 @@
/// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands
/// to an expression which states that it is undefined behavior for the
/// compiler to reach this point. Otherwise is not defined.
-#if __has_builtin(__builtin_unreachable) || LLVM_GNUC_PREREQ(4, 5, 0)
+#if __has_builtin(__builtin_unreachable) || defined(__GNUC__)
# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable()
#elif defined(_MSC_VER)
# define LLVM_BUILTIN_UNREACHABLE __assume(false)
+#else
+# define LLVM_BUILTIN_UNREACHABLE
#endif
/// LLVM_BUILTIN_TRAP - On compilers which support it, expands to an expression
/// which causes the program to exit abnormally.
-#if __has_builtin(__builtin_trap) || LLVM_GNUC_PREREQ(4, 3, 0)
+#if __has_builtin(__builtin_trap) || defined(__GNUC__)
# define LLVM_BUILTIN_TRAP __builtin_trap()
#elif defined(_MSC_VER)
// The __debugbreak intrinsic is supported by MSVC, does not require forward
@@ -361,7 +368,7 @@
/// \macro LLVM_ASSUME_ALIGNED
/// Returns a pointer with an assumed alignment.
-#if __has_builtin(__builtin_assume_aligned) || LLVM_GNUC_PREREQ(4, 7, 0)
+#if __has_builtin(__builtin_assume_aligned) || defined(__GNUC__)
# define LLVM_ASSUME_ALIGNED(p, a) __builtin_assume_aligned(p, a)
#elif defined(LLVM_BUILTIN_UNREACHABLE)
# define LLVM_ASSUME_ALIGNED(p, a) \
@@ -549,4 +556,13 @@ void AnnotateIgnoreWritesEnd(const char *file, int line);
#define LLVM_ENABLE_EXCEPTIONS 1
#endif
+/// \macro LLVM_NO_PROFILE_INSTRUMENT_FUNCTION
+/// Disable the profile instrument for a function.
+#if __has_attribute(no_profile_instrument_function)
+#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION \
+ __attribute__((no_profile_instrument_function))
+#else
+#define LLVM_NO_PROFILE_INSTRUMENT_FUNCTION
+#endif
+
#endif
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index 498690655fd1..2604ccb38431 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -99,8 +99,7 @@ public:
/// Explicitly trigger a crash recovery in the current process, and
/// return failure from RunSafely(). This function does not return.
- LLVM_ATTRIBUTE_NORETURN
- void HandleExit(int RetCode);
+ [[noreturn]] void HandleExit(int RetCode);
/// Throw again a signal or an exception, after it was catched once by a
/// CrashRecoveryContext.
diff --git a/llvm/include/llvm/Support/DOTGraphTraits.h b/llvm/include/llvm/Support/DOTGraphTraits.h
index a73538fa1462..ffa9abe328c8 100644
--- a/llvm/include/llvm/Support/DOTGraphTraits.h
+++ b/llvm/include/llvm/Support/DOTGraphTraits.h
@@ -65,6 +65,11 @@ public:
return false;
}
+ // renderNodesUsingHTML - If the function returns true, nodes will be
+ // rendered using HTML-like labels which allows colors, etc in the nodes
+ // and the edge source labels.
+ static bool renderNodesUsingHTML() { return false; }
+
/// getNodeLabel - Given a node and a pointer to the top level graph, return
/// the label to print in the node.
template<typename GraphType>
diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index f9335c161563..f4f5905d4bcc 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -70,6 +70,9 @@ public:
/// the position of the Cursor before the first error was encountered.
uint64_t tell() const { return Offset; }
+ /// Set the cursor to the new offset. This does not impact the error state.
+ void seek(uint64_t NewOffSet) { Offset = NewOffSet; }
+
/// Return error contained inside this Cursor, if any. Clears the internal
/// Cursor state.
Error takeError() { return std::move(Err); }
diff --git a/llvm/include/llvm/Support/Debug.h b/llvm/include/llvm/Support/Debug.h
index 64b730951bda..2ff978476c79 100644
--- a/llvm/include/llvm/Support/Debug.h
+++ b/llvm/include/llvm/Support/Debug.h
@@ -78,27 +78,6 @@ void setCurrentDebugTypes(const char **Types, unsigned Count);
///
extern bool DebugFlag;
-/// \name Verification flags.
-///
-/// These flags turns on/off that are expensive and are turned off by default,
-/// unless macro EXPENSIVE_CHECKS is defined. The flags allow selectively
-/// turning the checks on without need to recompile.
-/// \{
-
-/// Enables verification of dominator trees.
-///
-extern bool VerifyDomInfo;
-
-/// Enables verification of loop info.
-///
-extern bool VerifyLoopInfo;
-
-/// Enables verification of MemorySSA.
-///
-extern bool VerifyMemorySSA;
-
-///\}
-
/// EnableDebugBuffering - This defaults to false. If true, the debug
/// stream will install signal handlers to dump any buffered debug
/// output. It allows clients to selectively allow the debug stream
diff --git a/llvm/include/llvm/Support/DivisionByConstantInfo.h b/llvm/include/llvm/Support/DivisionByConstantInfo.h
new file mode 100644
index 000000000000..5bb326178c3e
--- /dev/null
+++ b/llvm/include/llvm/Support/DivisionByConstantInfo.h
@@ -0,0 +1,38 @@
+//== llvm/Support/DivisonByConstantInfo.h - division by constant -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements support for optimizing divisions by a constant
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H
+#define LLVM_SUPPORT_DIVISON_BY_CONSTANT_INFO_H
+
+#include "llvm/ADT/APInt.h"
+
+namespace llvm {
+
+/// Magic data for optimising signed division by a constant.
+struct SignedDivisionByConstantInfo {
+ static SignedDivisionByConstantInfo get(const APInt &D);
+ APInt Magic; ///< magic number
+ unsigned ShiftAmount; ///< shift amount
+};
+
+/// Magic data for optimising unsigned division by a constant.
+struct UnsignedDivisonByConstantInfo {
+ static UnsignedDivisonByConstantInfo get(const APInt &D,
+ unsigned LeadingZeros = 0);
+ APInt Magic; ///< magic number
+ bool IsAdd; ///< add indicator
+ unsigned ShiftAmount; ///< shift amount
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index e8f340e452ef..e2002b89ada2 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -257,8 +257,7 @@ private:
// of debug prints can cause the function to be too large for inlining. So
// it's important that we define this function out of line so that it can't be
// inlined.
- LLVM_ATTRIBUTE_NORETURN
- void fatalUncheckedError() const;
+ [[noreturn]] void fatalUncheckedError() const;
#endif
void assertIsChecked() {
@@ -314,7 +313,7 @@ private:
}
friend raw_ostream &operator<<(raw_ostream &OS, const Error &E) {
- if (auto P = E.getPtr())
+ if (auto *P = E.getPtr())
P->log(OS);
else
OS << "success";
@@ -374,7 +373,7 @@ class ErrorList final : public ErrorInfo<ErrorList> {
public:
void log(raw_ostream &OS) const override {
OS << "Multiple errors:\n";
- for (auto &ErrPayload : Payloads) {
+ for (const auto &ErrPayload : Payloads) {
ErrPayload->log(OS);
OS << "\n";
}
@@ -578,6 +577,16 @@ public:
return const_cast<Expected<T> *>(this)->get();
}
+ /// Returns \a takeError() after moving the held T (if any) into \p V.
+ template <class OtherT>
+ Error moveInto(OtherT &Value,
+ std::enable_if_t<std::is_assignable<OtherT &, T &&>::value> * =
+ nullptr) && {
+ if (*this)
+ Value = std::move(get());
+ return takeError();
+ }
+
/// Check that this Expected<T> is an error of type ErrT.
template <typename ErrT> bool errorIsA() const {
return HasError && (*getErrorStorage())->template isA<ErrT>();
@@ -688,9 +697,7 @@ private:
}
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
- LLVM_ATTRIBUTE_NORETURN
- LLVM_ATTRIBUTE_NOINLINE
- void fatalUncheckedExpected() const {
+ [[noreturn]] LLVM_ATTRIBUTE_NOINLINE void fatalUncheckedExpected() const {
dbgs() << "Expected<T> must be checked before access or destruction.\n";
if (HasError) {
dbgs() << "Unchecked Expected<T> contained error:\n";
@@ -722,8 +729,7 @@ private:
/// Report a serious error, calling any installed error handler. See
/// ErrorHandling.h.
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err,
- bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(Error Err, bool gen_crash_diag = true);
/// Report a fatal error if Err is a failure value.
///
@@ -1159,7 +1165,7 @@ protected:
/// It should only be used in this situation, and should never be used where a
/// sensible conversion to std::error_code is available, as attempts to convert
/// to/from this error will result in a fatal error. (i.e. it is a programmatic
-///error to try to convert such a value).
+/// error to try to convert such a value).
std::error_code inconvertibleErrorCode();
/// Helper for converting an std::error_code to a Error.
@@ -1263,13 +1269,20 @@ class FileError final : public ErrorInfo<FileError> {
public:
void log(raw_ostream &OS) const override {
- assert(Err && !FileName.empty() && "Trying to log after takeError().");
+ assert(Err && "Trying to log after takeError().");
OS << "'" << FileName << "': ";
if (Line.hasValue())
OS << "line " << Line.getValue() << ": ";
Err->log(OS);
}
+ std::string messageWithoutFileInfo() const {
+ std::string Msg;
+ raw_string_ostream OS(Msg);
+ Err->log(OS);
+ return OS.str();
+ }
+
StringRef getFileName() { return FileName; }
Error takeError() { return Error(std::move(Err)); }
@@ -1283,8 +1296,6 @@ private:
FileError(const Twine &F, Optional<size_t> LineNum,
std::unique_ptr<ErrorInfoBase> E) {
assert(E && "Cannot create FileError from Error success value.");
- assert(!F.isTriviallyEmpty() &&
- "The file name provided to FileError must not be empty.");
FileName = F.str();
Err = std::move(E);
Line = std::move(LineNum);
diff --git a/llvm/include/llvm/Support/ErrorHandling.h b/llvm/include/llvm/Support/ErrorHandling.h
index 0ec0242d569d..f980510d37f0 100644
--- a/llvm/include/llvm/Support/ErrorHandling.h
+++ b/llvm/include/llvm/Support/ErrorHandling.h
@@ -15,15 +15,14 @@
#define LLVM_SUPPORT_ERRORHANDLING_H
#include "llvm/Support/Compiler.h"
-#include <string>
namespace llvm {
-class StringRef;
+ class StringRef;
class Twine;
/// An error handler callback.
typedef void (*fatal_error_handler_t)(void *user_data,
- const std::string& reason,
+ const char *reason,
bool gen_crash_diag);
/// install_fatal_error_handler - Installs a new error handler to be used
@@ -68,14 +67,13 @@ class StringRef;
/// standard error, followed by a newline.
/// After the error handler is called this function will call abort(), it
/// does not return.
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
- bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
- bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
- bool gen_crash_diag = true);
-LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
- bool gen_crash_diag = true);
+/// NOTE: The std::string variant was removed to avoid a <string> dependency.
+[[noreturn]] void report_fatal_error(const char *reason,
+ bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(StringRef reason,
+ bool gen_crash_diag = true);
+[[noreturn]] void report_fatal_error(const Twine &reason,
+ bool gen_crash_diag = true);
/// Installs a new bad alloc error handler that should be used whenever a
/// bad alloc error, e.g. failing malloc/calloc, is encountered by LLVM.
@@ -113,13 +111,13 @@ void install_out_of_memory_new_handler();
/// If no error handler is installed (default), throws a bad_alloc exception
/// if LLVM is compiled with exception support. Otherwise prints the error
/// to standard error and calls abort().
-LLVM_ATTRIBUTE_NORETURN void report_bad_alloc_error(const char *Reason,
- bool GenCrashDiag = true);
+[[noreturn]] void report_bad_alloc_error(const char *Reason,
+ bool GenCrashDiag = true);
/// This function calls abort(), and prints the optional message to stderr.
/// Use the llvm_unreachable macro (that adds location info), instead of
/// calling this function directly.
-LLVM_ATTRIBUTE_NORETURN void
+[[noreturn]] void
llvm_unreachable_internal(const char *msg = nullptr, const char *file = nullptr,
unsigned line = 0);
}
diff --git a/llvm/include/llvm/Support/ExtensibleRTTI.h b/llvm/include/llvm/Support/ExtensibleRTTI.h
index 6b8510ce759f..21055247e932 100644
--- a/llvm/include/llvm/Support/ExtensibleRTTI.h
+++ b/llvm/include/llvm/Support/ExtensibleRTTI.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/ExtensibleRTTI.h - ExtensibleRTTI support --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h
index 38779ef4a3af..1a049533b82b 100644
--- a/llvm/include/llvm/Support/FileSystem.h
+++ b/llvm/include/llvm/Support/FileSystem.h
@@ -772,7 +772,8 @@ enum OpenFlags : unsigned {
/// The file should be opened in append mode.
OF_Append = 4,
- /// Delete the file on close. Only makes a difference on windows.
+ /// The returned handle can be used for deleting the file. Only makes a
+ /// difference on windows.
OF_Delete = 8,
/// When a child process is launched, this file should remain open in the
@@ -865,6 +866,11 @@ public:
// The open file descriptor.
int FD = -1;
+#ifdef _WIN32
+ // Whether we need to manually remove the file on close.
+ bool RemoveOnClose = false;
+#endif
+
// Keep this with the given name.
Error keep(const Twine &Name);
diff --git a/llvm/include/llvm/Support/FileSystem/UniqueID.h b/llvm/include/llvm/Support/FileSystem/UniqueID.h
index 229410c8292e..0d5367236e8d 100644
--- a/llvm/include/llvm/Support/FileSystem/UniqueID.h
+++ b/llvm/include/llvm/Support/FileSystem/UniqueID.h
@@ -14,7 +14,10 @@
#ifndef LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
#define LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
#include <cstdint>
+#include <utility>
namespace llvm {
namespace sys {
@@ -47,6 +50,30 @@ public:
} // end namespace fs
} // end namespace sys
+
+// Support UniqueIDs as DenseMap keys.
+template <> struct DenseMapInfo<llvm::sys::fs::UniqueID> {
+ static inline llvm::sys::fs::UniqueID getEmptyKey() {
+ auto EmptyKey = DenseMapInfo<std::pair<uint64_t, uint64_t>>::getEmptyKey();
+ return {EmptyKey.first, EmptyKey.second};
+ }
+
+ static inline llvm::sys::fs::UniqueID getTombstoneKey() {
+ auto TombstoneKey =
+ DenseMapInfo<std::pair<uint64_t, uint64_t>>::getTombstoneKey();
+ return {TombstoneKey.first, TombstoneKey.second};
+ }
+
+ static hash_code getHashValue(const llvm::sys::fs::UniqueID &Tag) {
+ return hash_value(std::make_pair(Tag.getDevice(), Tag.getFile()));
+ }
+
+ static bool isEqual(const llvm::sys::fs::UniqueID &LHS,
+ const llvm::sys::fs::UniqueID &RHS) {
+ return LHS == RHS;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_SUPPORT_FILESYSTEM_UNIQUEID_H
diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h
index 094b054f773f..89575f01b717 100644
--- a/llvm/include/llvm/Support/FormatVariadic.h
+++ b/llvm/include/llvm/Support/FormatVariadic.h
@@ -94,7 +94,7 @@ public:
continue;
}
- auto W = Adapters[R.Index];
+ auto *W = Adapters[R.Index];
FmtAlign Align(*W, R.Where, R.Align, R.Pad);
Align.format(S, R.Options);
diff --git a/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index d306ebe99bc1..e504a0eddeba 100644
--- a/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -78,7 +78,7 @@ struct SemiNCAInfo {
using UpdateT = typename DomTreeT::UpdateType;
using UpdateKind = typename DomTreeT::UpdateKind;
struct BatchUpdateInfo {
- // Note: Updates inside PreViewCFG are aleady legalized.
+ // Note: Updates inside PreViewCFG are already legalized.
BatchUpdateInfo(GraphDiffT &PreViewCFG, GraphDiffT *PostViewCFG = nullptr)
: PreViewCFG(PreViewCFG), PostViewCFG(PostViewCFG),
NumLegalized(PreViewCFG.getNumLegalizedUpdates()) {}
@@ -430,7 +430,6 @@ struct SemiNCAInfo {
// is unreachable. This is because we are still going to only visit each
// unreachable node once, we may just visit it in two directions,
// depending on how lucky we get.
- SmallPtrSet<NodePtr, 4> ConnectToExitBlock;
for (const NodePtr I : nodes(DT.Parent)) {
if (SNCA.NodeToInfo.count(I) == 0) {
LLVM_DEBUG(dbgs()
@@ -457,7 +456,6 @@ struct SemiNCAInfo {
LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node "
<< "(non-trivial root): "
<< BlockNamePrinter(FurthestAway) << "\n");
- ConnectToExitBlock.insert(FurthestAway);
Roots.push_back(FurthestAway);
LLVM_DEBUG(dbgs() << "\t\t\tPrev DFSNum: " << Num << ", new DFSNum: "
<< NewNum << "\n\t\t\tRemoving DFS info\n");
diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h
index b886bf45f474..11a31bf40160 100644
--- a/llvm/include/llvm/Support/GraphWriter.h
+++ b/llvm/include/llvm/Support/GraphWriter.h
@@ -66,6 +66,7 @@ template<typename GraphType>
class GraphWriter {
raw_ostream &O;
const GraphType &G;
+ bool RenderUsingHTML = false;
using DOTTraits = DOTGraphTraits<GraphType>;
using GTraits = GraphTraits<GraphType>;
@@ -86,6 +87,9 @@ class GraphWriter {
child_iterator EE = GTraits::child_end(Node);
bool hasEdgeSourceLabels = false;
+ if (RenderUsingHTML)
+ O << "</tr><tr>";
+
for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) {
std::string label = DTraits.getEdgeSourceLabel(Node, EI);
@@ -94,14 +98,22 @@ class GraphWriter {
hasEdgeSourceLabels = true;
- if (i)
- O << "|";
+ if (RenderUsingHTML)
+ O << "<td colspan=\"1\" port=\"s" << i << "\">" << label << "</td>";
+ else {
+ if (i)
+ O << "|";
- O << "<s" << i << ">" << DOT::EscapeString(label);
+ O << "<s" << i << ">" << DOT::EscapeString(label);
+ }
}
- if (EI != EE && hasEdgeSourceLabels)
- O << "|<s64>truncated...";
+ if (EI != EE && hasEdgeSourceLabels) {
+ if (RenderUsingHTML)
+ O << "<td colspan=\"1\" port=\"s64\">truncated...</td>";
+ else
+ O << "|<s64>truncated...";
+ }
return hasEdgeSourceLabels;
}
@@ -109,6 +121,7 @@ class GraphWriter {
public:
GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) {
DTraits = DOTTraits(SN);
+ RenderUsingHTML = DTraits.renderNodesUsingHTML();
}
void writeGraph(const std::string &Title = "") {
@@ -163,12 +176,39 @@ public:
void writeNode(NodeRef Node) {
std::string NodeAttributes = DTraits.getNodeAttributes(Node, G);
- O << "\tNode" << static_cast<const void*>(Node) << " [shape=record,";
+ O << "\tNode" << static_cast<const void *>(Node) << " [shape=";
+ if (RenderUsingHTML)
+ O << "none,";
+ else
+ O << "record,";
+
if (!NodeAttributes.empty()) O << NodeAttributes << ",";
- O << "label=\"{";
+ O << "label=";
+
+ if (RenderUsingHTML) {
+ // Count the numbewr of edges out of the node to determine how
+ // many columns to span (max 64)
+ unsigned ColSpan = 0;
+ child_iterator EI = GTraits::child_begin(Node);
+ child_iterator EE = GTraits::child_end(Node);
+ for (; EI != EE && ColSpan != 64; ++EI, ++ColSpan)
+ ;
+ if (ColSpan == 0)
+ ColSpan = 1;
+ // Include truncated messages when counting.
+ if (EI != EE)
+ ++ColSpan;
+ O << "<<table border=\"0\" cellborder=\"1\" cellspacing=\"0\""
+ << " cellpadding=\"0\"><tr><td align=\"text\" colspan=\"" << ColSpan
+ << "\">";
+ } else
+ O << "\"{";
if (!DTraits.renderGraphFromBottomUp()) {
- O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+ if (RenderUsingHTML)
+ O << DTraits.getNodeLabel(Node, G) << "</td>";
+ else
+ O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
// If we should include the address of the node in the label, do so now.
std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
@@ -185,15 +225,25 @@ public:
bool hasEdgeSourceLabels = getEdgeSourceLabels(EdgeSourceLabels, Node);
if (hasEdgeSourceLabels) {
- if (!DTraits.renderGraphFromBottomUp()) O << "|";
-
- O << "{" << EdgeSourceLabels.str() << "}";
-
- if (DTraits.renderGraphFromBottomUp()) O << "|";
+ if (!DTraits.renderGraphFromBottomUp())
+ if (!RenderUsingHTML)
+ O << "|";
+
+ if (RenderUsingHTML)
+ O << EdgeSourceLabels.str();
+ else
+ O << "{" << EdgeSourceLabels.str() << "}";
+
+ if (DTraits.renderGraphFromBottomUp())
+ if (!RenderUsingHTML)
+ O << "|";
}
if (DTraits.renderGraphFromBottomUp()) {
- O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+ if (RenderUsingHTML)
+ O << DTraits.getNodeLabel(Node, G);
+ else
+ O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
// If we should include the address of the node in the label, do so now.
std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
@@ -215,12 +265,17 @@ public:
<< DOT::EscapeString(DTraits.getEdgeDestLabel(Node, i));
}
- if (i != e)
- O << "|<d64>truncated...";
- O << "}";
+ if (RenderUsingHTML)
+ O << "<td colspan=\"1\">... truncated</td>";
+ else if (i != e)
+ O << "|<d64>truncated...}";
}
- O << "}\"];\n"; // Finish printing the "node" line
+ if (RenderUsingHTML)
+ O << "</tr></table>>";
+ else
+ O << "}\"";
+ O << "];\n"; // Finish printing the "node" line
// Output all of the edges now
child_iterator EI = GTraits::child_begin(Node);
diff --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h
new file mode 100644
index 000000000000..bf93a0d22da7
--- /dev/null
+++ b/llvm/include/llvm/Support/HashBuilder.h
@@ -0,0 +1,438 @@
+//===- llvm/Support/HashBuilder.h - Convenient hashing interface-*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an interface allowing to conveniently build hashes of
+// various data types, without relying on the underlying hasher type to know
+// about hashed data types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_HASHBUILDER_H
+#define LLVM_SUPPORT_HASHBUILDER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/type_traits.h"
+
+#include <iterator>
+#include <utility>
+
+namespace llvm {
+
+namespace hashbuilder_detail {
+/// Trait to indicate whether a type's bits can be hashed directly (after
+/// endianness correction).
+template <typename U>
+struct IsHashableData
+ : std::integral_constant<bool, is_integral_or_enum<U>::value> {};
+
+} // namespace hashbuilder_detail
+
+/// Declares the hasher member, and functions forwarding directly to the hasher.
+template <typename HasherT> class HashBuilderBase {
+public:
+ HasherT &getHasher() { return Hasher; }
+
+ /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+ ///
+ /// This may not take the size of `Data` into account.
+ /// Users of this function should pay attention to respect endianness
+ /// contraints.
+ void update(ArrayRef<uint8_t> Data) { this->getHasher().update(Data); }
+
+ /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
+ ///
+ /// This may not take the size of `Data` into account.
+ /// Users of this function should pay attention to respect endianness
+ /// contraints.
+ void update(StringRef Data) {
+ update(makeArrayRef(reinterpret_cast<const uint8_t *>(Data.data()),
+ Data.size()));
+ }
+
+ /// Forward to `HasherT::final()` if available.
+ template <typename HasherT_ = HasherT> StringRef final() {
+ return this->getHasher().final();
+ }
+
+ /// Forward to `HasherT::result()` if available.
+ template <typename HasherT_ = HasherT> StringRef result() {
+ return this->getHasher().result();
+ }
+
+protected:
+ explicit HashBuilderBase(HasherT &Hasher) : Hasher(Hasher) {}
+
+ template <typename... ArgTypes>
+ explicit HashBuilderBase(ArgTypes &&...Args)
+ : OptionalHasher(in_place, std::forward<ArgTypes>(Args)...),
+ Hasher(*OptionalHasher) {}
+
+private:
+ Optional<HasherT> OptionalHasher;
+ HasherT &Hasher;
+};
+
+/// Implementation of the `HashBuilder` interface.
+///
+/// `support::endianness::native` is not supported. `HashBuilder` is
+/// expected to canonicalize `support::endianness::native` to one of
+/// `support::endianness::big` or `support::endianness::little`.
+template <typename HasherT, support::endianness Endianness>
+class HashBuilderImpl : public HashBuilderBase<HasherT> {
+ static_assert(Endianness != support::endianness::native,
+ "HashBuilder should canonicalize endianness");
+
+public:
+ explicit HashBuilderImpl(HasherT &Hasher)
+ : HashBuilderBase<HasherT>(Hasher) {}
+ template <typename... ArgTypes>
+ explicit HashBuilderImpl(ArgTypes &&...Args)
+ : HashBuilderBase<HasherT>(Args...) {}
+
+ /// Implement hashing for hashable data types, e.g. integral or enum values.
+ template <typename T>
+ std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value,
+ HashBuilderImpl &>
+ add(T Value) {
+ return adjustForEndiannessAndAdd(Value);
+ }
+
+ /// Support hashing `ArrayRef`.
+ ///
+ /// `Value.size()` is taken into account to ensure cases like
+ /// ```
+ /// builder.add({1});
+ /// builder.add({2, 3});
+ /// ```
+ /// and
+ /// ```
+ /// builder.add({1, 2});
+ /// builder.add({3});
+ /// ```
+ /// do not collide.
+ template <typename T> HashBuilderImpl &add(ArrayRef<T> Value) {
+ // As of implementation time, simply calling `addRange(Value)` would also go
+ // through the `update` fast path. But that would rely on the implementation
+ // details of `ArrayRef::begin()` and `ArrayRef::end()`. Explicitly call
+ // `update` to guarantee the fast path.
+ add(Value.size());
+ if (hashbuilder_detail::IsHashableData<T>::value &&
+ Endianness == support::endian::system_endianness()) {
+ this->update(
+ makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+ Value.size() * sizeof(T)));
+ } else {
+ for (auto &V : Value)
+ add(V);
+ }
+ return *this;
+ }
+
+ /// Support hashing `StringRef`.
+ ///
+ /// `Value.size()` is taken into account to ensure cases like
+ /// ```
+ /// builder.add("a");
+ /// builder.add("bc");
+ /// ```
+ /// and
+ /// ```
+ /// builder.add("ab");
+ /// builder.add("c");
+ /// ```
+ /// do not collide.
+ HashBuilderImpl &add(StringRef Value) {
+ // As of implementation time, simply calling `addRange(Value)` would also go
+ // through `update`. But that would rely on the implementation of
+ // `StringRef::begin()` and `StringRef::end()`. Explicitly call `update` to
+ // guarantee the fast path.
+ add(Value.size());
+ this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(Value.begin()),
+ Value.size()));
+ return *this;
+ }
+
+ template <typename T>
+ using HasAddHashT =
+ decltype(addHash(std::declval<HashBuilderImpl &>(), std::declval<T &>()));
+ /// Implement hashing for user-defined `struct`s.
+ ///
+ /// Any user-define `struct` can participate in hashing via `HashBuilder` by
+ /// providing a `addHash` templated function.
+ ///
+ /// ```
+ /// template <typename HasherT, support::endianness Endianness>
+ /// void addHash(HashBuilder<HasherT, Endianness> &HBuilder,
+ /// const UserDefinedStruct &Value);
+ /// ```
+ ///
+ /// For example:
+ /// ```
+ /// struct SimpleStruct {
+ /// char c;
+ /// int i;
+ /// };
+ ///
+ /// template <typename HasherT, support::endianness Endianness>
+ /// void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ /// const SimpleStruct &Value) {
+ /// HBuilder.add(Value.c);
+ /// HBuilder.add(Value.i);
+ /// }
+ /// ```
+ ///
+ /// To avoid endianness issues, specializations of `addHash` should
+ /// generally rely on exising `add`, `addRange`, and `addRangeElements`
+ /// functions. If directly using `update`, an implementation must correctly
+ /// handle endianness.
+ ///
+ /// ```
+ /// struct __attribute__ ((packed)) StructWithFastHash {
+ /// int I;
+ /// char C;
+ ///
+ /// // If possible, we want to hash both `I` and `C` in a single
+ /// // `update` call for performance concerns.
+ /// template <typename HasherT, support::endianness Endianness>
+ /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ /// const StructWithFastHash &Value) {
+ /// if (Endianness == support::endian::system_endianness()) {
+ /// HBuilder.update(makeArrayRef(
+ /// reinterpret_cast<const uint8_t *>(&Value), sizeof(Value)));
+ /// } else {
+ /// // Rely on existing `add` methods to handle endianness.
+ /// HBuilder.add(Value.I);
+ /// HBuilder.add(Value.C);
+ /// }
+ /// }
+ /// };
+ /// ```
+ ///
+ /// To avoid collisions, specialization of `addHash` for variable-size
+ /// types must take the size into account.
+ ///
+ /// For example:
+ /// ```
+ /// struct CustomContainer {
+ /// private:
+ /// size_t Size;
+ /// int Elements[100];
+ ///
+ /// public:
+ /// CustomContainer(size_t Size) : Size(Size) {
+ /// for (size_t I = 0; I != Size; ++I)
+ /// Elements[I] = I;
+ /// }
+ /// template <typename HasherT, support::endianness Endianness>
+ /// friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ /// const CustomContainer &Value) {
+ /// if (Endianness == support::endian::system_endianness()) {
+ /// HBuilder.update(makeArrayRef(
+ /// reinterpret_cast<const uint8_t *>(&Value.Size),
+ /// sizeof(Value.Size) + Value.Size * sizeof(Value.Elements[0])));
+ /// } else {
+ /// // `addRange` will take care of encoding the size.
+ /// HBuilder.addRange(&Value.Elements[0], &Value.Elements[0] +
+ /// Value.Size);
+ /// }
+ /// }
+ /// };
+ /// ```
+ template <typename T>
+ std::enable_if_t<is_detected<HasAddHashT, T>::value &&
+ !hashbuilder_detail::IsHashableData<T>::value,
+ HashBuilderImpl &>
+ add(const T &Value) {
+ addHash(*this, Value);
+ return *this;
+ }
+
+ template <typename T1, typename T2>
+ HashBuilderImpl &add(const std::pair<T1, T2> &Value) {
+ add(Value.first);
+ add(Value.second);
+ return *this;
+ }
+
+ template <typename... Ts> HashBuilderImpl &add(const std::tuple<Ts...> &Arg) {
+ return addTupleHelper(Arg, typename std::index_sequence_for<Ts...>());
+ }
+
+ /// A convenenience variadic helper.
+ /// It simply iterates over its arguments, in order.
+ /// ```
+ /// add(Arg1, Arg2);
+ /// ```
+ /// is equivalent to
+ /// ```
+ /// add(Arg1)
+ /// add(Arg2)
+ /// ```
+ template <typename T, typename... Ts>
+ typename std::enable_if<(sizeof...(Ts) >= 1), HashBuilderImpl &>::type
+ add(const T &FirstArg, const Ts &...Args) {
+ add(FirstArg);
+ add(Args...);
+ return *this;
+ }
+
+ template <typename ForwardIteratorT>
+ HashBuilderImpl &addRange(ForwardIteratorT First, ForwardIteratorT Last) {
+ add(std::distance(First, Last));
+ return addRangeElements(First, Last);
+ }
+
+ template <typename RangeT> HashBuilderImpl &addRange(const RangeT &Range) {
+ return addRange(adl_begin(Range), adl_end(Range));
+ }
+
+ template <typename ForwardIteratorT>
+ HashBuilderImpl &addRangeElements(ForwardIteratorT First,
+ ForwardIteratorT Last) {
+ return addRangeElementsImpl(
+ First, Last,
+ typename std::iterator_traits<ForwardIteratorT>::iterator_category());
+ }
+
+ template <typename RangeT>
+ HashBuilderImpl &addRangeElements(const RangeT &Range) {
+ return addRangeElements(adl_begin(Range), adl_end(Range));
+ }
+
+ template <typename T>
+ using HasByteSwapT = decltype(support::endian::byte_swap(
+ std::declval<T &>(), support::endianness::little));
+ /// Adjust `Value` for the target endianness and add it to the hash.
+ template <typename T>
+ std::enable_if_t<is_detected<HasByteSwapT, T>::value, HashBuilderImpl &>
+ adjustForEndiannessAndAdd(const T &Value) {
+ T SwappedValue = support::endian::byte_swap(Value, Endianness);
+ this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(&SwappedValue),
+ sizeof(SwappedValue)));
+ return *this;
+ }
+
+private:
+ template <typename... Ts, std::size_t... Indices>
+ HashBuilderImpl &addTupleHelper(const std::tuple<Ts...> &Arg,
+ std::index_sequence<Indices...>) {
+ add(std::get<Indices>(Arg)...);
+ return *this;
+ }
+
+ // FIXME: Once available, specialize this function for `contiguous_iterator`s,
+ // and use it for `ArrayRef` and `StringRef`.
+ template <typename ForwardIteratorT>
+ HashBuilderImpl &addRangeElementsImpl(ForwardIteratorT First,
+ ForwardIteratorT Last,
+ std::forward_iterator_tag) {
+ for (auto It = First; It != Last; ++It)
+ add(*It);
+ return *this;
+ }
+
+ template <typename T>
+ std::enable_if_t<hashbuilder_detail::IsHashableData<T>::value &&
+ Endianness == support::endian::system_endianness(),
+ HashBuilderImpl &>
+ addRangeElementsImpl(T *First, T *Last, std::forward_iterator_tag) {
+ this->update(makeArrayRef(reinterpret_cast<const uint8_t *>(First),
+ (Last - First) * sizeof(T)));
+ return *this;
+ }
+};
+
+/// Interface to help hash various types through a hasher type.
+///
+/// Via provided specializations of `add`, `addRange`, and `addRangeElements`
+/// functions, various types (e.g. `ArrayRef`, `StringRef`, etc.) can be hashed
+/// without requiring any knowledge of hashed types from the hasher type.
+///
+/// The only method expected from the templated hasher type `HasherT` is:
+/// * void update(ArrayRef<uint8_t> Data)
+///
+/// Additionally, the following methods will be forwarded to the hasher type:
+/// * decltype(std::declval<HasherT &>().final()) final()
+/// * decltype(std::declval<HasherT &>().result()) result()
+///
+/// From a user point of view, the interface provides the following:
+/// * `template<typename T> add(const T &Value)`
+/// The `add` function implements hashing of various types.
+/// * `template <typename ItT> void addRange(ItT First, ItT Last)`
+/// The `addRange` function is designed to aid hashing a range of values.
+/// It explicitly adds the size of the range in the hash.
+/// * `template <typename ItT> void addRangeElements(ItT First, ItT Last)`
+/// The `addRangeElements` function is also designed to aid hashing a range of
+/// values. In contrast to `addRange`, it **ignores** the size of the range,
+/// behaving as if elements were added one at a time with `add`.
+///
+/// User-defined `struct` types can participate in this interface by providing
+/// an `addHash` templated function. See the associated template specialization
+/// for details.
+///
+/// This interface does not impose requirements on the hasher
+/// `update(ArrayRef<uint8_t> Data)` method. We want to avoid collisions for
+/// variable-size types; for example for
+/// ```
+/// builder.add({1});
+/// builder.add({2, 3});
+/// ```
+/// and
+/// ```
+/// builder.add({1, 2});
+/// builder.add({3});
+/// ```
+/// . Thus, specializations of `add` and `addHash` for variable-size types must
+/// not assume that the hasher type considers the size as part of the hash; they
+/// must explicitly add the size to the hash. See for example specializations
+/// for `ArrayRef` and `StringRef`.
+///
+/// Additionally, since types are eventually forwarded to the hasher's
+/// `void update(ArrayRef<uint8_t>)` method, endianness plays a role in the hash
+/// computation (for example when computing `add((int)123)`).
+/// Specifiying a non-`native` `Endianness` template parameter allows to compute
+/// stable hash across platforms with different endianness.
+template <class HasherT, support::endianness Endianness>
+using HashBuilder =
+ HashBuilderImpl<HasherT, (Endianness == support::endianness::native
+ ? support::endian::system_endianness()
+ : Endianness)>;
+
+namespace hashbuilder_detail {
+class HashCodeHasher {
+public:
+ HashCodeHasher() : Code(0) {}
+ void update(ArrayRef<uint8_t> Data) {
+ hash_code DataCode = hash_value(Data);
+ Code = hash_combine(Code, DataCode);
+ }
+ hash_code Code;
+};
+
+using HashCodeHashBuilder = HashBuilder<hashbuilder_detail::HashCodeHasher,
+ support::endianness::native>;
+} // namespace hashbuilder_detail
+
+/// Provide a default implementation of `hash_value` when `addHash(const T &)`
+/// is supported.
+template <typename T>
+std::enable_if_t<
+ is_detected<hashbuilder_detail::HashCodeHashBuilder::HasAddHashT, T>::value,
+ hash_code>
+hash_value(const T &Value) {
+ hashbuilder_detail::HashCodeHashBuilder HBuilder;
+ HBuilder.add(Value);
+ return HBuilder.getHasher().Code;
+}
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_HASHBUILDER_H
diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h
index c753cee60ec1..469f50be40e0 100644
--- a/llvm/include/llvm/Support/JSON.h
+++ b/llvm/include/llvm/Support/JSON.h
@@ -234,7 +234,7 @@ inline bool operator!=(const Array &L, const Array &R) { return !(L == R); }
/// Each Value is one of the JSON kinds:
/// null (nullptr_t)
/// boolean (bool)
-/// number (double or int64)
+/// number (double, int64 or uint64)
/// string (StringRef)
/// array (json::Array)
/// object (json::Object)
@@ -342,9 +342,20 @@ public:
Value(T B) : Type(T_Boolean) {
create<bool>(B);
}
- // Integers (except boolean). Must be non-narrowing convertible to int64_t.
+
+ // Unsigned 64-bit long integers.
+ template <typename T,
+ typename = std::enable_if_t<std::is_same<T, uint64_t>::value>,
+ bool = false, bool = false>
+ Value(T V) : Type(T_UINT64) {
+ create<uint64_t>(uint64_t{V});
+ }
+
+ // Integers (except boolean and uint64_t).
+ // Must be non-narrowing convertible to int64_t.
template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
- typename = std::enable_if_t<!std::is_same<T, bool>::value>>
+ typename = std::enable_if_t<!std::is_same<T, bool>::value>,
+ typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>>
Value(T I) : Type(T_Integer) {
create<int64_t>(int64_t{I});
}
@@ -382,6 +393,7 @@ public:
return Boolean;
case T_Double:
case T_Integer:
+ case T_UINT64:
return Number;
case T_String:
case T_StringRef:
@@ -410,6 +422,8 @@ public:
return as<double>();
if (LLVM_LIKELY(Type == T_Integer))
return as<int64_t>();
+ if (LLVM_LIKELY(Type == T_UINT64))
+ return as<uint64_t>();
return llvm::None;
}
// Succeeds if the Value is a Number, and exactly representable as int64_t.
@@ -425,6 +439,16 @@ public:
}
return llvm::None;
}
+ llvm::Optional<uint64_t> getAsUINT64() const {
+ if (Type == T_UINT64)
+ return as<uint64_t>();
+ else if (Type == T_Integer) {
+ int64_t N = as<int64_t>();
+ if (N >= 0)
+ return as<uint64_t>();
+ }
+ return llvm::None;
+ }
llvm::Optional<llvm::StringRef> getAsString() const {
if (Type == T_String)
return llvm::StringRef(as<std::string>());
@@ -467,11 +491,12 @@ private:
friend class OStream;
- enum ValueType : char {
+ enum ValueType : char16_t {
T_Null,
T_Boolean,
T_Double,
T_Integer,
+ T_UINT64,
T_StringRef,
T_String,
T_Object,
@@ -479,8 +504,9 @@ private:
};
// All members mutable, see moveFrom().
mutable ValueType Type;
- mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, llvm::StringRef,
- std::string, json::Array, json::Object>
+ mutable llvm::AlignedCharArrayUnion<bool, double, int64_t, uint64_t,
+ llvm::StringRef, std::string, json::Array,
+ json::Object>
Union;
friend bool operator==(const Value &, const Value &);
};
@@ -683,6 +709,14 @@ inline bool fromJSON(const Value &E, bool &Out, Path P) {
P.report("expected boolean");
return false;
}
+inline bool fromJSON(const Value &E, uint64_t &Out, Path P) {
+ if (auto S = E.getAsUINT64()) {
+ Out = *S;
+ return true;
+ }
+ P.report("expected uint64_t");
+ return false;
+}
inline bool fromJSON(const Value &E, std::nullptr_t &Out, Path P) {
if (auto S = E.getAsNull()) {
Out = *S;
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index cfec5796493f..1f32760a6fd1 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -60,7 +60,7 @@ public:
}
/// Returns true if we don't know any bits.
- bool isUnknown() const { return Zero.isNullValue() && One.isNullValue(); }
+ bool isUnknown() const { return Zero.isZero() && One.isZero(); }
/// Resets the known state of all bits.
void resetAll() {
@@ -71,13 +71,13 @@ public:
/// Returns true if value is all zero.
bool isZero() const {
assert(!hasConflict() && "KnownBits conflict!");
- return Zero.isAllOnesValue();
+ return Zero.isAllOnes();
}
/// Returns true if value is all one bits.
bool isAllOnes() const {
assert(!hasConflict() && "KnownBits conflict!");
- return One.isAllOnesValue();
+ return One.isAllOnes();
}
/// Make all bits known to be zero and discard any previous information.
@@ -99,10 +99,12 @@ public:
bool isNonNegative() const { return Zero.isSignBitSet(); }
/// Returns true if this value is known to be non-zero.
- bool isNonZero() const { return !One.isNullValue(); }
+ bool isNonZero() const { return !One.isZero(); }
/// Returns true if this value is known to be positive.
- bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); }
+ bool isStrictlyPositive() const {
+ return Zero.isSignBitSet() && !One.isZero();
+ }
/// Make this value negative.
void makeNegative() {
@@ -280,6 +282,10 @@ public:
return getBitWidth() - Zero.countPopulation();
}
+ unsigned countMaxActiveBits() const {
+ return getBitWidth() - countMinLeadingZeros();
+ }
+
/// Create known bits from a known constant.
static KnownBits makeConstant(const APInt &C) {
return KnownBits(~C, C);
@@ -292,7 +298,7 @@ public:
/// Return true if LHS and RHS have no common bits set.
static bool haveNoCommonBitsSet(const KnownBits &LHS, const KnownBits &RHS) {
- return (LHS.Zero | RHS.Zero).isAllOnesValue();
+ return (LHS.Zero | RHS.Zero).isAllOnes();
}
/// Compute known bits resulting from adding LHS, RHS and a 1-bit Carry.
@@ -304,7 +310,8 @@ public:
KnownBits RHS);
/// Compute known bits resulting from multiplying LHS and RHS.
- static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS);
+ static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS,
+ bool SelfMultiply = false);
/// Compute known bits from sign-extended multiply-hi.
static KnownBits mulhs(const KnownBits &LHS, const KnownBits &RHS);
diff --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h
index 3b2d5b974d0b..3b960cd4fd88 100644
--- a/llvm/include/llvm/Support/MD5.h
+++ b/llvm/include/llvm/Support/MD5.h
@@ -39,18 +39,6 @@ template <unsigned N> class SmallString;
template <typename T> class ArrayRef;
class MD5 {
- // Any 32-bit or wider unsigned integer data type will do.
- typedef uint32_t MD5_u32plus;
-
- MD5_u32plus a = 0x67452301;
- MD5_u32plus b = 0xefcdab89;
- MD5_u32plus c = 0x98badcfe;
- MD5_u32plus d = 0x10325476;
- MD5_u32plus hi = 0;
- MD5_u32plus lo = 0;
- uint8_t buffer[64];
- MD5_u32plus block[16];
-
public:
struct MD5Result {
std::array<uint8_t, 16> Bytes;
@@ -90,6 +78,14 @@ public:
/// Finishes off the hash and puts the result in result.
void final(MD5Result &Result);
+ /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+ StringRef final();
+
+ /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+ /// This is suitable for getting the MD5 at any time without invalidating the
+ /// internal state, so that more calls can be made into `update`.
+ StringRef result();
+
/// Translates the bytes in \p Res to a hex string that is
/// deposited into \p Str. The result will be of length 32.
static void stringifyResult(MD5Result &Result, SmallString<32> &Str);
@@ -98,6 +94,23 @@ public:
static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data);
private:
+ // Any 32-bit or wider unsigned integer data type will do.
+ typedef uint32_t MD5_u32plus;
+
+ // Internal State
+ struct {
+ MD5_u32plus a = 0x67452301;
+ MD5_u32plus b = 0xefcdab89;
+ MD5_u32plus c = 0x98badcfe;
+ MD5_u32plus d = 0x10325476;
+ MD5_u32plus hi = 0;
+ MD5_u32plus lo = 0;
+ uint8_t buffer[64];
+ MD5_u32plus block[16];
+ } InternalState;
+
+ MD5Result Result;
+
const uint8_t *body(ArrayRef<uint8_t> Data);
};
diff --git a/llvm/include/llvm/Support/MSP430AttributeParser.h b/llvm/include/llvm/Support/MSP430AttributeParser.h
new file mode 100644
index 000000000000..bc9b21494470
--- /dev/null
+++ b/llvm/include/llvm/Support/MSP430AttributeParser.h
@@ -0,0 +1,44 @@
+//===-- MSP430AttributeParser.h - MSP430 Attribute Parser -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains support routines for parsing MSP430 ELF build attributes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H
+#define LLVM_SUPPORT_MSP430ATTRIBUTEPARSER_H
+
+#include "llvm/Support/ELFAttributeParser.h"
+#include "llvm/Support/MSP430Attributes.h"
+
+namespace llvm {
+class MSP430AttributeParser : public ELFAttributeParser {
+ struct DisplayHandler {
+ MSP430Attrs::AttrType Attribute;
+ Error (MSP430AttributeParser::*Routine)(MSP430Attrs::AttrType);
+ };
+ static const std::array<DisplayHandler, 4> DisplayRoutines;
+
+ Error parseISA(MSP430Attrs::AttrType Tag);
+ Error parseCodeModel(MSP430Attrs::AttrType Tag);
+ Error parseDataModel(MSP430Attrs::AttrType Tag);
+ Error parseEnumSize(MSP430Attrs::AttrType Tag);
+
+ Error handler(uint64_t Tag, bool &Handled) override;
+
+public:
+ MSP430AttributeParser(ScopedPrinter *SW)
+ : ELFAttributeParser(SW, MSP430Attrs::getMSP430AttributeTags(),
+ "mspabi") {}
+ MSP430AttributeParser()
+ : ELFAttributeParser(MSP430Attrs::getMSP430AttributeTags(), "mspabi") {}
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/MSP430Attributes.h b/llvm/include/llvm/Support/MSP430Attributes.h
new file mode 100644
index 000000000000..fccd65e844c3
--- /dev/null
+++ b/llvm/include/llvm/Support/MSP430Attributes.h
@@ -0,0 +1,44 @@
+//===-- MSP430Attributes.h - MSP430 Attributes ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains enumerations for MSP430 ELF build attributes as
+/// defined in the MSP430 ELF psABI specification.
+///
+/// MSP430 ELF psABI specification
+///
+/// https://www.ti.com/lit/pdf/slaa534
+///
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_SUPPORT_MSP430ATTRIBUTES_H
+#define LLVM_SUPPORT_MSP430ATTRIBUTES_H
+
+#include "llvm/Support/ELFAttributes.h"
+
+namespace llvm {
+namespace MSP430Attrs {
+
+const TagNameMap &getMSP430AttributeTags();
+
+enum AttrType : unsigned {
+ // Attribute types in ELF/.MSP430.attributes.
+ TagISA = 4,
+ TagCodeModel = 6,
+ TagDataModel = 8,
+ TagEnumSize = 10
+};
+
+enum ISA { ISAMSP430 = 1, ISAMSP430X = 2 };
+enum CodeModel { CMSmall = 1, CMLarge = 2 };
+enum DataModel { DMSmall = 1, DMLarge = 2, DMRestricted = 3 };
+enum EnumSize { ESSmall = 1, ESInteger = 2, ESDontCare = 3 };
+
+} // namespace MSP430Attrs
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index 31f2d5a48183..ce10a4c58dfe 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -270,9 +270,10 @@ namespace llvm {
funcref = 175, // WebAssembly's funcref type
externref = 176, // WebAssembly's externref type
x86amx = 177, // This is an X86 AMX value
+ i64x8 = 178, // 8 Consecutive GPRs (AArch64)
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
- LAST_VALUETYPE = x86amx, // This always remains at the end of the list.
+ LAST_VALUETYPE = i64x8, // This always remains at the end of the list.
VALUETYPE_SIZE = LAST_VALUETYPE + 1,
// This is the current maximum for LAST_VALUETYPE.
@@ -987,6 +988,7 @@ namespace llvm {
case nxv16f16:
case nxv8f32:
case nxv4f64: return TypeSize::Scalable(256);
+ case i64x8:
case v512i1:
case v64i8:
case v32i16:
@@ -1403,51 +1405,61 @@ namespace llvm {
/// SimpleValueType Iteration
/// @{
static auto all_valuetypes() {
- return seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto integer_valuetypes() {
- return seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
- MVT::LAST_INTEGER_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
+ MVT::LAST_INTEGER_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fp_valuetypes() {
- return seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
- MVT::LAST_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
+ MVT::LAST_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fixedlen_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto scalable_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto integer_fixedlen_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fp_fixedlen_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto integer_scalable_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
static auto fp_scalable_vector_valuetypes() {
- return seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE);
+ return enum_seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
}
/// @}
};
diff --git a/llvm/include/llvm/Support/Memory.h b/llvm/include/llvm/Support/Memory.h
index 31e0abbcdb61..d7d60371d315 100644
--- a/llvm/include/llvm/Support/Memory.h
+++ b/llvm/include/llvm/Support/Memory.h
@@ -37,7 +37,7 @@ namespace sys {
/// The size as it was allocated. This is always greater or equal to the
/// size that was originally requested.
size_t allocatedSize() const { return AllocatedSize; }
-
+
private:
void *Address; ///< Address of first byte of memory area
size_t AllocatedSize; ///< Size, in bytes of the memory area
@@ -148,13 +148,22 @@ namespace sys {
return *this;
}
~OwningMemoryBlock() {
- Memory::releaseMappedMemory(M);
+ if (M.base())
+ Memory::releaseMappedMemory(M);
}
void *base() const { return M.base(); }
/// The size as it was allocated. This is always greater or equal to the
/// size that was originally requested.
size_t allocatedSize() const { return M.allocatedSize(); }
MemoryBlock getMemoryBlock() const { return M; }
+ std::error_code release() {
+ std::error_code EC;
+ if (M.base()) {
+ EC = Memory::releaseMappedMemory(M);
+ M = MemoryBlock();
+ }
+ return EC;
+ }
private:
MemoryBlock M;
};
diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h
new file mode 100644
index 000000000000..2141e2159c0c
--- /dev/null
+++ b/llvm/include/llvm/Support/PGOOptions.h
@@ -0,0 +1,65 @@
+//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Define option tunables for PGO.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PGOOPTIONS_H
+#define LLVM_SUPPORT_PGOOPTIONS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+/// A struct capturing PGO tunables.
+struct PGOOptions {
+ enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
+ enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+ PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
+ std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
+ CSPGOAction CSAction = NoCSAction,
+ bool DebugInfoForProfiling = false,
+ bool PseudoProbeForProfiling = false)
+ : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+ ProfileRemappingFile(ProfileRemappingFile), Action(Action),
+ CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
+ (Action == SampleUse &&
+ !PseudoProbeForProfiling)),
+ PseudoProbeForProfiling(PseudoProbeForProfiling) {
+ // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+ // callback with IRUse action without ProfileFile.
+
+ // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+ assert(this->CSAction == NoCSAction ||
+ (this->Action != IRInstr && this->Action != SampleUse));
+
+ // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+ assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+ // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+ // a profile.
+ assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+ // If neither Action nor CSAction, DebugInfoForProfiling or
+ // PseudoProbeForProfiling needs to be true.
+ assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+ this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+ }
+ std::string ProfileFile;
+ std::string CSProfileGenFile;
+ std::string ProfileRemappingFile;
+ PGOAction Action;
+ CSPGOAction CSAction;
+ bool DebugInfoForProfiling;
+ bool PseudoProbeForProfiling;
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 28d171d45256..5c3b26d5754c 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -40,7 +40,10 @@ class Latch {
public:
explicit Latch(uint32_t Count = 0) : Count(Count) {}
- ~Latch() { sync(); }
+ ~Latch() {
+ // Ensure at least that sync() was called.
+ assert(Count == 0);
+ }
void inc() {
std::lock_guard<std::mutex> lock(Mutex);
diff --git a/llvm/include/llvm/Support/Path.h b/llvm/include/llvm/Support/Path.h
index af70e086a1b6..da5095714f48 100644
--- a/llvm/include/llvm/Support/Path.h
+++ b/llvm/include/llvm/Support/Path.h
@@ -25,7 +25,29 @@ namespace llvm {
namespace sys {
namespace path {
-enum class Style { windows, posix, native };
+enum class Style {
+ native,
+ posix,
+ windows_slash,
+ windows_backslash,
+ windows = windows_backslash, // deprecated
+};
+
+/// Check if \p S uses POSIX path rules.
+constexpr bool is_style_posix(Style S) {
+ if (S == Style::posix)
+ return true;
+ if (S != Style::native)
+ return false;
+#if defined(_WIN32)
+ return false;
+#else
+ return true;
+#endif
+}
+
+/// Check if \p S uses Windows path rules.
+constexpr bool is_style_windows(Style S) { return !is_style_posix(S); }
/// @name Lexical Component Iterator
/// @{
@@ -174,6 +196,21 @@ bool replace_path_prefix(SmallVectorImpl<char> &Path, StringRef OldPrefix,
StringRef NewPrefix,
Style style = Style::native);
+/// Remove redundant leading "./" pieces and consecutive separators.
+///
+/// @param path Input path.
+/// @result The cleaned-up \a path.
+StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
+
+/// In-place remove any './' and optionally '../' components from a path.
+///
+/// @param path processed path
+/// @param remove_dot_dot specify if '../' (except for leading "../") should be
+/// removed
+/// @result True if path was changed
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
+ Style style = Style::native);
+
/// Append to path.
///
/// @code
@@ -212,7 +249,7 @@ void append(SmallVectorImpl<char> &path, const_iterator begin,
/// Convert path to the native form. This is used to give paths to users and
/// operating system calls in the platform's normal way. For example, on Windows
-/// all '/' are converted to '\'.
+/// all '/' are converted to '\'. On Unix, it converts all '\' to '/'.
///
/// @param path A path that is transformed to native format.
/// @param result Holds the result of the transformation.
@@ -226,6 +263,17 @@ void native(const Twine &path, SmallVectorImpl<char> &result,
/// @param path A path that is transformed to native format.
void native(SmallVectorImpl<char> &path, Style style = Style::native);
+/// For Windows path styles, convert path to use the preferred path separators.
+/// For other styles, do nothing.
+///
+/// @param path A path that is transformed to preferred format.
+inline void make_preferred(SmallVectorImpl<char> &path,
+ Style style = Style::native) {
+ if (!is_style_windows(style))
+ return;
+ native(path, style);
+}
+
/// Replaces backslashes with slashes if Windows.
///
/// @param path processed path
@@ -499,21 +547,6 @@ bool is_absolute_gnu(const Twine &path, Style style = Style::native);
/// @result True if the path is relative, false if it is not.
bool is_relative(const Twine &path, Style style = Style::native);
-/// Remove redundant leading "./" pieces and consecutive separators.
-///
-/// @param path Input path.
-/// @result The cleaned-up \a path.
-StringRef remove_leading_dotslash(StringRef path, Style style = Style::native);
-
-/// In-place remove any './' and optionally '../' components from a path.
-///
-/// @param path processed path
-/// @param remove_dot_dot specify if '../' (except for leading "../") should be
-/// removed
-/// @result True if path was changed
-bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false,
- Style style = Style::native);
-
} // end namespace path
} // end namespace sys
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h
index 6687e5e7ff9a..ee03efeed9b2 100644
--- a/llvm/include/llvm/Support/Process.h
+++ b/llvm/include/llvm/Support/Process.h
@@ -214,12 +214,10 @@ public:
/// In that case, the control flow will resume after RunSafely(), like for a
/// crash, rather than exiting the current process.
/// Use \arg NoCleanup for calling _exit() instead of exit().
- LLVM_ATTRIBUTE_NORETURN
- static void Exit(int RetCode, bool NoCleanup = false);
+ [[noreturn]] static void Exit(int RetCode, bool NoCleanup = false);
private:
- LLVM_ATTRIBUTE_NORETURN
- static void ExitNoCleanup(int RetCode);
+ [[noreturn]] static void ExitNoCleanup(int RetCode);
};
}
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
new file mode 100644
index 000000000000..7110de601123
--- /dev/null
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -0,0 +1,89 @@
+//===-- RISCVISAInfo.h - RISCV ISA Information ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RISCVISAINFO_H
+#define LLVM_SUPPORT_RISCVISAINFO_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+struct RISCVExtensionInfo {
+ std::string ExtName;
+ unsigned MajorVersion;
+ unsigned MinorVersion;
+};
+
+class RISCVISAInfo {
+public:
+ RISCVISAInfo(const RISCVISAInfo &) = delete;
+ RISCVISAInfo &operator=(const RISCVISAInfo &) = delete;
+
+ static bool compareExtension(const std::string &LHS, const std::string &RHS);
+
+ /// Helper class for OrderedExtensionMap.
+ struct ExtensionComparator {
+ bool operator()(const std::string &LHS, const std::string &RHS) const {
+ return compareExtension(LHS, RHS);
+ }
+ };
+
+ /// OrderedExtensionMap is std::map, it's specialized to keep entries
+ /// in canonical order of extension.
+ typedef std::map<std::string, RISCVExtensionInfo, ExtensionComparator>
+ OrderedExtensionMap;
+
+ /// Parse RISCV ISA info from arch string.
+ static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+ parseArchString(StringRef Arch, bool EnableExperimentalExtension,
+ bool ExperimentalExtensionVersionCheck = true);
+
+ /// Parse RISCV ISA info from feature vector.
+ static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+ parseFeatures(unsigned XLen, const std::vector<std::string> &Features);
+
+ /// Convert RISCV ISA info to a feature vector.
+ void toFeatures(std::vector<StringRef> &Features,
+ std::function<StringRef(const Twine &)> StrAlloc) const;
+
+ const OrderedExtensionMap &getExtensions() const { return Exts; };
+
+ unsigned getXLen() const { return XLen; };
+ unsigned getFLen() const { return FLen; };
+
+ bool hasExtension(StringRef Ext) const;
+ std::string toString() const;
+
+ static bool isSupportedExtensionFeature(StringRef Ext);
+ static bool isSupportedExtension(StringRef Ext);
+ static bool isSupportedExtension(StringRef Ext, unsigned MajorVersion,
+ unsigned MinorVersion);
+
+private:
+ RISCVISAInfo(unsigned XLen) : XLen(XLen), FLen(0) {}
+
+ unsigned XLen;
+ unsigned FLen;
+
+ OrderedExtensionMap Exts;
+
+ void addExtension(StringRef ExtName, unsigned MajorVersion,
+ unsigned MinorVersion);
+
+ void updateFLen();
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def
index 6a06f9258105..f658cdb91c6b 100644
--- a/llvm/include/llvm/Support/RISCVTargetParser.def
+++ b/llvm/include/llvm/Support/RISCVTargetParser.def
@@ -19,9 +19,17 @@ PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""})
PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""})
PROC(SIFIVE_732, {"sifive-7-rv32"}, FK_NONE, {""})
PROC(SIFIVE_764, {"sifive-7-rv64"}, FK_64BIT, {""})
+PROC(SIFIVE_E20, {"sifive-e20"}, FK_NONE, {"rv32imc"})
+PROC(SIFIVE_E21, {"sifive-e21"}, FK_NONE, {"rv32imac"})
+PROC(SIFIVE_E24, {"sifive-e24"}, FK_NONE, {"rv32imafc"})
PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"})
-PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_E34, {"sifive-e34"}, FK_NONE, {"rv32imafc"})
PROC(SIFIVE_E76, {"sifive-e76"}, FK_NONE, {"rv32imafc"})
+PROC(SIFIVE_S21, {"sifive-s21"}, FK_64BIT, {"rv64imac"})
+PROC(SIFIVE_S51, {"sifive-s51"}, FK_64BIT, {"rv64imac"})
+PROC(SIFIVE_S54, {"sifive-s54"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_S76, {"sifive-s76"}, FK_64BIT, {"rv64gc"})
+PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
PROC(SIFIVE_U74, {"sifive-u74"}, FK_64BIT, {"rv64gc"})
#undef PROC
diff --git a/llvm/include/llvm/Support/Signposts.h b/llvm/include/llvm/Support/Signposts.h
index bc6abba0a0e1..dabbba6f89d1 100644
--- a/llvm/include/llvm/Support/Signposts.h
+++ b/llvm/include/llvm/Support/Signposts.h
@@ -1,9 +1,8 @@
//===-- llvm/Support/Signposts.h - Interval debug annotations ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,17 +17,8 @@
#define LLVM_SUPPORT_SIGNPOSTS_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/Config/llvm-config.h"
#include <memory>
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
-#include <Availability.h>
-#include <os/signpost.h>
-#endif
-
-#define SIGNPOSTS_AVAILABLE() \
- __builtin_available(macos 10.14, iOS 12, tvOS 12, watchOS 5, *)
-
namespace llvm {
class SignpostEmitterImpl;
@@ -45,33 +35,8 @@ public:
/// Begin a signposted interval for a given object.
void startInterval(const void *O, StringRef Name);
-
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
- os_log_t &getLogger() const;
- os_signpost_id_t getSignpostForObject(const void *O);
-#endif
-
- /// A macro to take advantage of the special format string handling
- /// in the os_signpost API. The format string substitution is
- /// deferred to the log consumer and done outside of the
- /// application.
-#if LLVM_SUPPORT_XCODE_SIGNPOSTS
-#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...) \
- do { \
- if ((SIGNPOST_EMITTER).isEnabled()) \
- if (SIGNPOSTS_AVAILABLE()) \
- os_signpost_interval_begin((SIGNPOST_EMITTER).getLogger(), \
- (SIGNPOST_EMITTER).getSignpostForObject(O), \
- "LLVM Timers", __VA_ARGS__); \
- } while (0)
-#else
-#define SIGNPOST_EMITTER_START_INTERVAL(SIGNPOST_EMITTER, O, ...) \
- do { \
- } while (0)
-#endif
-
/// End a signposted interval for a given object.
- void endInterval(const void *O);
+ void endInterval(const void *O, StringRef Name);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index fbe0d1a55bfc..b34b885ddc35 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -652,6 +652,9 @@ HANDLE_TARGET_OPCODE(G_UMAX)
/// Generic integer absolute value.
HANDLE_TARGET_OPCODE(G_ABS)
+HANDLE_TARGET_OPCODE(G_LROUND)
+HANDLE_TARGET_OPCODE(G_LLROUND)
+
/// Generic BRANCH instruction. This is an unconditional branch.
HANDLE_TARGET_OPCODE(G_BR)
diff --git a/llvm/include/llvm/Support/TargetSelect.h b/llvm/include/llvm/Support/TargetSelect.h
index 9ffb84c4a570..e57614cea758 100644
--- a/llvm/include/llvm/Support/TargetSelect.h
+++ b/llvm/include/llvm/Support/TargetSelect.h
@@ -41,6 +41,10 @@ extern "C" {
#define LLVM_DISASSEMBLER(TargetName) \
void LLVMInitialize##TargetName##Disassembler();
#include "llvm/Config/Disassemblers.def"
+
+// Declare all of the available TargetMCA initialization functions.
+#define LLVM_TARGETMCA(TargetName) void LLVMInitialize##TargetName##TargetMCA();
+#include "llvm/Config/TargetMCAs.def"
}
namespace llvm {
@@ -159,6 +163,14 @@ namespace llvm {
return true;
#endif
}
+
+ /// InitializeAllTargetMCAs - The main program should call
+ /// this function to initialize the target CustomBehaviour and
+ /// InstrPostProcess classes.
+ inline void InitializeAllTargetMCAs() {
+#define LLVM_TARGETMCA(TargetName) LLVMInitialize##TargetName##TargetMCA();
+#include "llvm/Config/TargetMCAs.def"
+ }
}
#endif
diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
index 30bbbd7db8c9..7d1274735a37 100644
--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@@ -229,7 +229,6 @@ public:
bool isZero() const { return !Value; }
bool isNonZero() const { return !isZero(); }
explicit operator bool() const { return isNonZero(); }
- ScalarTy getValue() const { return Value; }
ScalarTy getValue(unsigned Dim) const {
return Dim == UnivariateDim ? Value : 0;
}
@@ -250,7 +249,7 @@ public:
//===----------------------------------------------------------------------===//
// LinearPolySize - base class for fixed- or scalable sizes.
-// ^ ^
+// ^ ^
// | |
// | +----- ElementCount - Leaf class to represent an element count
// | (vscale x unsigned)
@@ -294,7 +293,7 @@ public:
static LeafTy getNull() { return get(0, false); }
/// Returns the minimum value this size can represent.
- ScalarTy getKnownMinValue() const { return this->getValue(); }
+ ScalarTy getKnownMinValue() const { return this->Value; }
/// Returns whether the size is scaled by a runtime quantity (vscale).
bool isScalable() const { return this->UnivariateDim == ScalableDim; }
/// A return value of true indicates we know at compile time that the number
@@ -500,8 +499,7 @@ inline raw_ostream &operator<<(raw_ostream &OS,
return OS;
}
-template <typename T> struct DenseMapInfo;
-template <> struct DenseMapInfo<ElementCount> {
+template <> struct DenseMapInfo<ElementCount, void> {
static inline ElementCount getEmptyKey() {
return ElementCount::getScalable(~0U);
}
diff --git a/llvm/include/llvm/Support/VersionTuple.h b/llvm/include/llvm/Support/VersionTuple.h
index a48ae0bf52bd..1a1072d228f1 100644
--- a/llvm/include/llvm/Support/VersionTuple.h
+++ b/llvm/include/llvm/Support/VersionTuple.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/Support/HashBuilder.h"
#include <string>
#include <tuple>
@@ -164,6 +165,12 @@ public:
return llvm::hash_combine(VT.Major, VT.Minor, VT.Subminor, VT.Build);
}
+ template <typename HasherT, llvm::support::endianness Endianness>
+ friend void addHash(HashBuilderImpl<HasherT, Endianness> &HBuilder,
+ const VersionTuple &VT) {
+ HBuilder.add(VT.Major, VT.Minor, VT.Subminor, VT.Build);
+ }
+
/// Retrieve a string representation of the version number.
std::string getAsString() const;
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 323e6719645d..10d2389ee079 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -121,6 +121,14 @@ public:
/// Closes the file.
virtual std::error_code close() = 0;
+
+ // Get the same file with a different path.
+ static ErrorOr<std::unique_ptr<File>>
+ getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P);
+
+protected:
+ // Set the file's underlying path.
+ virtual void setPath(const Twine &Path) {}
};
/// A member of a directory, yielded by a directory_iterator.
@@ -596,6 +604,17 @@ class RedirectingFileSystemParser;
/// contain multiple path components (e.g. /path/to/file). However, any
/// directory in such a path that contains more than one child must be uniquely
/// represented by a 'directory' entry.
+///
+/// When the 'use-external-name' field is set, calls to \a vfs::File::status()
+/// give the external (remapped) filesystem name instead of the name the file
+/// was accessed by. This is an intentional leak through the \a
+/// RedirectingFileSystem abstraction layer. It enables clients to discover
+/// (and use) the external file location when communicating with users or tools
+/// that don't use the same VFS overlay.
+///
+/// FIXME: 'use-external-name' causes behaviour that's inconsistent with how
+/// "real" filesystems behave. Maybe there should be a separate channel for
+/// this information.
class RedirectingFileSystem : public vfs::FileSystem {
public:
enum EntryKind { EK_Directory, EK_DirectoryRemap, EK_File };
@@ -746,6 +765,12 @@ private:
/// with the given error code on a path associated with the provided Entry.
bool shouldFallBackToExternalFS(std::error_code EC, Entry *E = nullptr) const;
+ /// Get the File status, or error, from the underlying external file system.
+ /// This returns the status with the originally requested name, while looking
+ /// up the entry using the canonical path.
+ ErrorOr<Status> getExternalStatus(const Twine &CanonicalPath,
+ const Twine &OriginalPath) const;
+
// In a RedirectingFileSystem, keys can be specified in Posix or Windows
// style (or even a mixture of both), so this comparison helper allows
// slashes (representing a root) to match backslashes (and vice versa). Note
@@ -777,12 +802,7 @@ private:
/// Whether to perform case-sensitive comparisons.
///
/// Currently, case-insensitive matching only works correctly with ASCII.
- bool CaseSensitive =
-#ifdef _WIN32
- false;
-#else
- true;
-#endif
+ bool CaseSensitive = is_style_posix(sys::path::Style::native);
/// IsRelativeOverlay marks whether a ExternalContentsPrefixDir path must
/// be prefixed in every 'external-contents' when reading from YAML files.
@@ -808,7 +828,8 @@ private:
Entry *From) const;
/// Get the status for a path with the provided \c LookupResult.
- ErrorOr<Status> status(const Twine &Path, const LookupResult &Result);
+ ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
+ const LookupResult &Result);
public:
/// Looks up \p Path in \c Roots and returns a LookupResult giving the
diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h
index a45eeaba4ad5..917822678e97 100644
--- a/llvm/include/llvm/Support/Windows/WindowsSupport.h
+++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h
@@ -68,10 +68,10 @@ llvm::VersionTuple GetWindowsOSVersion();
bool MakeErrMsg(std::string *ErrMsg, const std::string &prefix);
// Include GetLastError() in a fatal error message.
-LLVM_ATTRIBUTE_NORETURN inline void ReportLastErrorFatal(const char *Msg) {
+[[noreturn]] inline void ReportLastErrorFatal(const char *Msg) {
std::string ErrMsg;
MakeErrMsg(&ErrMsg, Msg);
- llvm::report_fatal_error(ErrMsg);
+ llvm::report_fatal_error(Twine(ErrMsg));
}
template <typename HandleTraits>
diff --git a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
index 757a3c0c8a71..aca717a9f6cb 100644
--- a/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
+++ b/llvm/include/llvm/Support/X86DisassemblerDecoderCommon.h
@@ -31,6 +31,8 @@ namespace X86Disassembler {
#define XOP9_MAP_SYM x86DisassemblerXOP9Opcodes
#define XOPA_MAP_SYM x86DisassemblerXOPAOpcodes
#define THREEDNOW_MAP_SYM x86Disassembler3DNowOpcodes
+#define MAP5_SYM x86DisassemblerMap5Opcodes
+#define MAP6_SYM x86DisassemblerMap6Opcodes
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR "x86DisassemblerContexts"
@@ -42,6 +44,8 @@ namespace X86Disassembler {
#define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes"
#define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes"
#define THREEDNOW_MAP_STR "x86Disassembler3DNowOpcodes"
+#define MAP5_STR "x86DisassemblerMap5Opcodes"
+#define MAP6_STR "x86DisassemblerMap6Opcodes"
// Attributes of an instruction that must be known before the opcode can be
// processed correctly. Most of these indicate the presence of particular
@@ -292,7 +296,9 @@ enum OpcodeType {
XOP8_MAP = 4,
XOP9_MAP = 5,
XOPA_MAP = 6,
- THREEDNOW_MAP = 7
+ THREEDNOW_MAP = 7,
+ MAP5 = 8,
+ MAP6 = 9
};
// The following structs are used for the hierarchical decode table. After
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index ffcc2238e3ce..4443d822d3e8 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -91,54 +91,59 @@ X86_CPU_SUBTYPE(AMDFAM19H_ZNVER3, "znver3")
X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake")
#undef X86_CPU_SUBTYPE
-
-// This macro is used for cpu types present in compiler-rt/libgcc.
+// This macro is used for cpu types present in compiler-rt/libgcc. The third
+// parameter PRIORITY is as required by the attribute 'target' checking. Note
+// that not all are supported/prioritized by GCC, so synchronization with GCC's
+// implementation may require changing some existing values.
+//
+// We cannot just re-sort the list though because its order is dictated by the
+// order of bits in CodeGenFunction::GetX86CpuSupportsMask.
#ifndef X86_FEATURE_COMPAT
-#define X86_FEATURE_COMPAT(ENUM, STR) X86_FEATURE(ENUM, STR)
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR)
#endif
#ifndef X86_FEATURE
#define X86_FEATURE(ENUM, STR)
#endif
-X86_FEATURE_COMPAT(CMOV, "cmov")
-X86_FEATURE_COMPAT(MMX, "mmx")
-X86_FEATURE_COMPAT(POPCNT, "popcnt")
-X86_FEATURE_COMPAT(SSE, "sse")
-X86_FEATURE_COMPAT(SSE2, "sse2")
-X86_FEATURE_COMPAT(SSE3, "sse3")
-X86_FEATURE_COMPAT(SSSE3, "ssse3")
-X86_FEATURE_COMPAT(SSE4_1, "sse4.1")
-X86_FEATURE_COMPAT(SSE4_2, "sse4.2")
-X86_FEATURE_COMPAT(AVX, "avx")
-X86_FEATURE_COMPAT(AVX2, "avx2")
-X86_FEATURE_COMPAT(SSE4_A, "sse4a")
-X86_FEATURE_COMPAT(FMA4, "fma4")
-X86_FEATURE_COMPAT(XOP, "xop")
-X86_FEATURE_COMPAT(FMA, "fma")
-X86_FEATURE_COMPAT(AVX512F, "avx512f")
-X86_FEATURE_COMPAT(BMI, "bmi")
-X86_FEATURE_COMPAT(BMI2, "bmi2")
-X86_FEATURE_COMPAT(AES, "aes")
-X86_FEATURE_COMPAT(PCLMUL, "pclmul")
-X86_FEATURE_COMPAT(AVX512VL, "avx512vl")
-X86_FEATURE_COMPAT(AVX512BW, "avx512bw")
-X86_FEATURE_COMPAT(AVX512DQ, "avx512dq")
-X86_FEATURE_COMPAT(AVX512CD, "avx512cd")
-X86_FEATURE_COMPAT(AVX512ER, "avx512er")
-X86_FEATURE_COMPAT(AVX512PF, "avx512pf")
-X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi")
-X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma")
-X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw")
-X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps")
-X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq")
-X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2")
-X86_FEATURE_COMPAT(GFNI, "gfni")
-X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq")
-X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni")
-X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg")
-X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16")
-X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect")
+X86_FEATURE_COMPAT(CMOV, "cmov", 0)
+X86_FEATURE_COMPAT(MMX, "mmx", 1)
+X86_FEATURE_COMPAT(POPCNT, "popcnt", 9)
+X86_FEATURE_COMPAT(SSE, "sse", 2)
+X86_FEATURE_COMPAT(SSE2, "sse2", 3)
+X86_FEATURE_COMPAT(SSE3, "sse3", 4)
+X86_FEATURE_COMPAT(SSSE3, "ssse3", 5)
+X86_FEATURE_COMPAT(SSE4_1, "sse4.1", 7)
+X86_FEATURE_COMPAT(SSE4_2, "sse4.2", 8)
+X86_FEATURE_COMPAT(AVX, "avx", 12)
+X86_FEATURE_COMPAT(AVX2, "avx2", 18)
+X86_FEATURE_COMPAT(SSE4_A, "sse4a", 6)
+X86_FEATURE_COMPAT(FMA4, "fma4", 14)
+X86_FEATURE_COMPAT(XOP, "xop", 15)
+X86_FEATURE_COMPAT(FMA, "fma", 16)
+X86_FEATURE_COMPAT(AVX512F, "avx512f", 19)
+X86_FEATURE_COMPAT(BMI, "bmi", 13)
+X86_FEATURE_COMPAT(BMI2, "bmi2", 17)
+X86_FEATURE_COMPAT(AES, "aes", 10)
+X86_FEATURE_COMPAT(PCLMUL, "pclmul", 11)
+X86_FEATURE_COMPAT(AVX512VL, "avx512vl", 20)
+X86_FEATURE_COMPAT(AVX512BW, "avx512bw", 21)
+X86_FEATURE_COMPAT(AVX512DQ, "avx512dq", 22)
+X86_FEATURE_COMPAT(AVX512CD, "avx512cd", 23)
+X86_FEATURE_COMPAT(AVX512ER, "avx512er", 24)
+X86_FEATURE_COMPAT(AVX512PF, "avx512pf", 25)
+X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi", 26)
+X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma", 27)
+X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw", 28)
+X86_FEATURE_COMPAT(AVX5124FMAPS, "avx5124fmaps", 29)
+X86_FEATURE_COMPAT(AVX512VPOPCNTDQ, "avx512vpopcntdq", 30)
+X86_FEATURE_COMPAT(AVX512VBMI2, "avx512vbmi2", 31)
+X86_FEATURE_COMPAT(GFNI, "gfni", 32)
+X86_FEATURE_COMPAT(VPCLMULQDQ, "vpclmulqdq", 33)
+X86_FEATURE_COMPAT(AVX512VNNI, "avx512vnni", 34)
+X86_FEATURE_COMPAT(AVX512BITALG, "avx512bitalg", 35)
+X86_FEATURE_COMPAT(AVX512BF16, "avx512bf16", 36)
+X86_FEATURE_COMPAT(AVX512VP2INTERSECT, "avx512vp2intersect", 37)
// Features below here are not in libgcc/compiler-rt.
X86_FEATURE (3DNOW, "3dnow")
X86_FEATURE (3DNOWA, "3dnowa")
@@ -153,6 +158,7 @@ X86_FEATURE (CLWB, "clwb")
X86_FEATURE (CLZERO, "clzero")
X86_FEATURE (CMPXCHG16B, "cx16")
X86_FEATURE (CMPXCHG8B, "cx8")
+X86_FEATURE (CRC32, "crc32")
X86_FEATURE (ENQCMD, "enqcmd")
X86_FEATURE (F16C, "f16c")
X86_FEATURE (FSGSBASE, "fsgsbase")
@@ -193,6 +199,7 @@ X86_FEATURE (XSAVEC, "xsavec")
X86_FEATURE (XSAVEOPT, "xsaveopt")
X86_FEATURE (XSAVES, "xsaves")
X86_FEATURE (HRESET, "hreset")
+X86_FEATURE (AVX512FP16, "avx512fp16")
X86_FEATURE (AVXVNNI, "avxvnni")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
@@ -202,3 +209,49 @@ X86_FEATURE (LVI_CFI, "lvi-cfi")
X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening")
#undef X86_FEATURE_COMPAT
#undef X86_FEATURE
+
+#ifndef CPU_SPECIFIC
+#define CPU_SPECIFIC(NAME, MANGLING, FEATURES)
+#endif
+
+#ifndef CPU_SPECIFIC_ALIAS
+#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME)
+#endif
+
+CPU_SPECIFIC("generic", 'A', "")
+CPU_SPECIFIC("pentium", 'B', "")
+CPU_SPECIFIC("pentium_pro", 'C', "+cmov")
+CPU_SPECIFIC("pentium_mmx", 'D', "+mmx")
+CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx")
+CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse")
+CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii")
+CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2")
+CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
+CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
+CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
+CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
+CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
+CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
+CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx")
+CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge")
+CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx")
+CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge")
+CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell")
+CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
+CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell")
+CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
+CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd")
+CPU_SPECIFIC_ALIAS("mic_avx512", "knl")
+CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx")
+CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb")
+CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi")
+CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq")
+
+#undef CPU_SPECIFIC_ALIAS
+#undef CPU_SPECIFIC
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index ed02066933a7..bfa3e23dbd9d 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -13,6 +13,7 @@
#ifndef LLVM_SUPPORT_X86TARGETPARSER_H
#define LLVM_SUPPORT_X86TARGETPARSER_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
@@ -154,6 +155,9 @@ void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features);
void updateImpliedFeatures(StringRef Feature, bool Enabled,
StringMap<bool> &Features);
+uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
+unsigned getFeaturePriority(ProcessorFeatures Feat);
+
} // namespace X86
} // namespace llvm
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 9ac9eb300983..bea232e6e000 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -1641,7 +1641,7 @@ void IO::processKeyWithDefault(const char *Key, Optional<T> &Val,
// usually None.
bool IsNone = false;
if (!outputting())
- if (auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode()))
+ if (const auto *Node = dyn_cast<ScalarNode>(((Input *)this)->getCurrentNode()))
// We use rtrim to ignore possible white spaces that might exist when a
// comment is present on the same line.
IsNone = Node->getRawValue().rtrim(' ') == "<none>";
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index c669c2babad9..98c26ef0b1e5 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -330,6 +330,8 @@ public:
// changeColor() has no effect until enable_colors(true) is called.
virtual void enable_colors(bool enable) { ColorEnabled = enable; }
+ bool colors_enabled() const { return ColorEnabled; }
+
/// Tie this stream to the specified stream. Replaces any existing tied-to
/// stream. Specifying a nullptr unties the stream.
void tie(raw_ostream *TieTo) { TiedStream = TieTo; }
@@ -719,7 +721,11 @@ class buffer_unique_ostream : public raw_svector_ostream {
public:
buffer_unique_ostream(std::unique_ptr<raw_ostream> OS)
- : raw_svector_ostream(Buffer), OS(std::move(OS)) {}
+ : raw_svector_ostream(Buffer), OS(std::move(OS)) {
+ // Turn off buffering on OS, which we now own, to avoid allocating a buffer
+ // when the destructor writes only to be immediately flushed again.
+ this->OS->SetUnbuffered();
+ }
~buffer_unique_ostream() override { *OS << str(); }
};
diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h
index 5c4a736eb107..d73b9ae49235 100644
--- a/llvm/include/llvm/TableGen/DirectiveEmitter.h
+++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h
@@ -152,7 +152,7 @@ public:
}
return C;
});
- N.erase(std::remove(N.begin(), N.end(), '_'), N.end());
+ llvm::erase_value(N, '_');
return N;
}
diff --git a/llvm/include/llvm/TableGen/Error.h b/llvm/include/llvm/TableGen/Error.h
index a0e23aca211e..da0132b10f4f 100644
--- a/llvm/include/llvm/TableGen/Error.h
+++ b/llvm/include/llvm/TableGen/Error.h
@@ -22,13 +22,10 @@ namespace llvm {
void PrintNote(const Twine &Msg);
void PrintNote(ArrayRef<SMLoc> NoteLoc, const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const Record *Rec,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalNote(const RecordVal *RecVal,
- const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const Twine &Msg);
+[[noreturn]] void PrintFatalNote(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const Record *Rec, const Twine &Msg);
+[[noreturn]] void PrintFatalNote(const RecordVal *RecVal, const Twine &Msg);
void PrintWarning(const Twine &Msg);
void PrintWarning(ArrayRef<SMLoc> WarningLoc, const Twine &Msg);
@@ -40,13 +37,10 @@ void PrintError(const char *Loc, const Twine &Msg);
void PrintError(const Record *Rec, const Twine &Msg);
void PrintError(const RecordVal *RecVal, const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(ArrayRef<SMLoc> ErrorLoc,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const Record *Rec,
- const Twine &Msg);
-LLVM_ATTRIBUTE_NORETURN void PrintFatalError(const RecordVal *RecVal,
- const Twine &Msg);
+[[noreturn]] void PrintFatalError(const Twine &Msg);
+[[noreturn]] void PrintFatalError(ArrayRef<SMLoc> ErrorLoc, const Twine &Msg);
+[[noreturn]] void PrintFatalError(const Record *Rec, const Twine &Msg);
+[[noreturn]] void PrintFatalError(const RecordVal *RecVal, const Twine &Msg);
void CheckAssert(SMLoc Loc, Init *Condition, Init *Message);
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 713d9375448c..5869a5cf0423 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -39,6 +39,9 @@
#include <vector>
namespace llvm {
+namespace detail {
+struct RecordContext;
+} // namespace detail
class ListRecTy;
struct MultiClass;
@@ -100,7 +103,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
/// 'bit' - Represent a single bit
class BitRecTy : public RecTy {
- static BitRecTy Shared;
+ friend detail::RecordContext;
BitRecTy() : RecTy(BitRecTyKind) {}
@@ -109,7 +112,7 @@ public:
return RT->getRecTyKind() == BitRecTyKind;
}
- static BitRecTy *get() { return &Shared; }
+ static BitRecTy *get();
std::string getAsString() const override { return "bit"; }
@@ -140,7 +143,7 @@ public:
/// 'int' - Represent an integer value of no particular size
class IntRecTy : public RecTy {
- static IntRecTy Shared;
+ friend detail::RecordContext;
IntRecTy() : RecTy(IntRecTyKind) {}
@@ -149,7 +152,7 @@ public:
return RT->getRecTyKind() == IntRecTyKind;
}
- static IntRecTy *get() { return &Shared; }
+ static IntRecTy *get();
std::string getAsString() const override { return "int"; }
@@ -158,7 +161,7 @@ public:
/// 'string' - Represent an string value
class StringRecTy : public RecTy {
- static StringRecTy Shared;
+ friend detail::RecordContext;
StringRecTy() : RecTy(StringRecTyKind) {}
@@ -167,7 +170,7 @@ public:
return RT->getRecTyKind() == StringRecTyKind;
}
- static StringRecTy *get() { return &Shared; }
+ static StringRecTy *get();
std::string getAsString() const override;
@@ -200,7 +203,7 @@ public:
/// 'dag' - Represent a dag fragment
class DagRecTy : public RecTy {
- static DagRecTy Shared;
+ friend detail::RecordContext;
DagRecTy() : RecTy(DagRecTyKind) {}
@@ -209,7 +212,7 @@ public:
return RT->getRecTyKind() == DagRecTyKind;
}
- static DagRecTy *get() { return &Shared; }
+ static DagRecTy *get();
std::string getAsString() const override;
};
@@ -221,6 +224,7 @@ public:
class RecordRecTy final : public RecTy, public FoldingSetNode,
public TrailingObjects<RecordRecTy, Record *> {
friend class Record;
+ friend detail::RecordContext;
unsigned NumClasses;
@@ -437,6 +441,8 @@ public:
/// '?' - Represents an uninitialized value.
class UnsetInit : public Init {
+ friend detail::RecordContext;
+
UnsetInit() : Init(IK_UnsetInit) {}
public:
@@ -468,9 +474,11 @@ public:
/// 'true'/'false' - Represent a concrete initializer for a bit.
class BitInit final : public TypedInit {
+ friend detail::RecordContext;
+
bool Value;
- explicit BitInit(bool V) : TypedInit(IK_BitInit, BitRecTy::get()), Value(V) {}
+ explicit BitInit(bool V, RecTy *T) : TypedInit(IK_BitInit, T), Value(V) {}
public:
BitInit(const BitInit &) = delete;
@@ -637,7 +645,7 @@ public:
}
StringRef getValue() const { return Value; }
- StringFormat getFormat() const { return Format; }
+ StringFormat getFormat() const { return Format; }
bool hasCodeFormat() const { return Format == SF_Code; }
Init *convertInitializerTo(RecTy *Ty) const override;
@@ -1414,6 +1422,7 @@ private:
SMLoc Loc; // Source location of definition of name.
PointerIntPair<RecTy *, 2, FieldKind> TyAndKind;
Init *Value;
+ bool IsUsed = false;
public:
RecordVal(Init *N, RecTy *T, FieldKind K);
@@ -1458,6 +1467,11 @@ public:
/// Set the value and source location of the field.
bool setValue(Init *V, SMLoc NewLoc);
+ /// Whether this value is used. Useful for reporting warnings, for example
+ /// when a template argument is unused.
+ void setUsed(bool Used) { IsUsed = Used; }
+ bool isUsed() const { return IsUsed; }
+
void dump() const;
/// Print the value to an output stream, possibly with a semicolon.
@@ -1483,8 +1497,6 @@ public:
};
private:
- static unsigned LastID;
-
Init *Name;
// Location where record was instantiated, followed by the location of
// multiclass prototypes used.
@@ -1515,8 +1527,8 @@ public:
// Constructs a record.
explicit Record(Init *N, ArrayRef<SMLoc> locs, RecordKeeper &records,
bool Anonymous = false, bool Class = false)
- : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records),
- ID(LastID++), IsAnonymous(Anonymous), IsClass(Class) {
+ : Name(N), Locs(locs.begin(), locs.end()), TrackedRecords(records),
+ ID(getNewUID()), IsAnonymous(Anonymous), IsClass(Class) {
checkName();
}
@@ -1528,12 +1540,12 @@ public:
// ID number. Don't copy CorrespondingDefInit either, since it's owned by the
// original record. All other fields can be copied normally.
Record(const Record &O)
- : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
- Values(O.Values), Assertions(O.Assertions), SuperClasses(O.SuperClasses),
- TrackedRecords(O.TrackedRecords), ID(LastID++),
- IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) { }
+ : Name(O.Name), Locs(O.Locs), TemplateArgs(O.TemplateArgs),
+ Values(O.Values), Assertions(O.Assertions),
+ SuperClasses(O.SuperClasses), TrackedRecords(O.TrackedRecords),
+ ID(getNewUID()), IsAnonymous(O.IsAnonymous), IsClass(O.IsClass) {}
- static unsigned getNewUID() { return LastID++; }
+ static unsigned getNewUID();
unsigned getID() const { return ID; }
@@ -1632,6 +1644,7 @@ public:
}
void checkRecordAssertions();
+ void checkUnusedTemplateArgs();
bool isSubClassOf(const Record *R) const {
for (const auto &SCPair : SuperClasses)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index e3e1d5fc3c65..72c974834a2f 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -225,6 +225,18 @@ def G_FREEZE : GenericInstruction {
let hasSideEffects = false;
}
+def G_LROUND: GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+
+def G_LLROUND: GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = false;
+}
+
//------------------------------------------------------------------------------
// Binary ops.
//------------------------------------------------------------------------------
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f35156d59849..e2d3dbdda88a 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -130,7 +130,13 @@ def extending_loads : GICombineRule<
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD):$root,
[{ return Helper.matchCombineExtendingLoads(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
-def combines_for_extload: GICombineGroup<[extending_loads]>;
+
+def load_and_mask : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchCombineLoadWithAndMask(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+def combines_for_extload: GICombineGroup<[extending_loads, load_and_mask]>;
def sext_trunc_sextload : GICombineRule<
(defs root:$d),
@@ -197,6 +203,12 @@ def reduce_shl_of_extend : GICombineRule<
[{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>;
+def narrow_binop_feeding_and : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchNarrowBinopFeedingAnd(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// [us]itofp(undef) = 0, because the result value is bounded.
def undef_to_fp_zero : GICombineRule<
(defs root:$root),
@@ -275,7 +287,7 @@ def select_constant_cmp: GICombineRule<
def right_identity_zero: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR,
- G_PTR_ADD):$root,
+ G_PTR_ADD, G_ROTL, G_ROTR):$root,
[{ return Helper.matchConstantOp(${root}->getOperand(2), 0); }]),
(apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
@@ -507,6 +519,13 @@ def fabs_fabs_fold: GICombineRule<
(apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
+// Fold (fabs (fneg x)) -> (fabs x).
+def fabs_fneg_fold: GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FABS):$root,
+ [{ return Helper.matchCombineFAbsOfFNeg(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// Fold (unmerge cst) -> cst1, cst2, ...
def unmerge_cst_matchinfo : GIDefMatchData<"SmallVector<APInt, 8>">;
def unmerge_cst : GICombineRule<
@@ -588,6 +607,14 @@ def load_or_combine : GICombineRule<
[{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">;
+def truncstore_merge : GICombineRule<
+ (defs root:$root, truncstore_merge_matcdata:$info),
+ (match (wip_match_opcode G_STORE):$root,
+ [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]),
+ (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>;
+
def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
def extend_through_phis : GICombineRule<
(defs root:$root, extend_through_phis_matchdata:$matchinfo),
@@ -638,6 +665,18 @@ def icmp_to_true_false_known_bits : GICombineRule<
[{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+def icmp_to_lhs_known_bits : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ICMP):$root,
+ [{ return Helper.matchICmpToLHSKnownBits(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def and_or_disjoint_mask : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchAndOrDisjointMask(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${info}); }])>;
+
def bitfield_extract_from_and : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -652,8 +691,31 @@ def bitfield_extract_from_sext_inreg : GICombineRule<
[{ return Helper.matchBitfieldExtractFromSExtInReg(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+def bitfield_extract_from_shr : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchBitfieldExtractFromShr(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
+def bitfield_extract_from_shr_and : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchBitfieldExtractFromShrAnd(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
def form_bitfield_extract : GICombineGroup<[bitfield_extract_from_sext_inreg,
- bitfield_extract_from_and]>;
+ bitfield_extract_from_and,
+ bitfield_extract_from_shr,
+ bitfield_extract_from_shr_and]>;
+
+def udiv_by_const : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_UDIV):$root,
+ [{ return Helper.matchUDivByConst(*${root}); }]),
+ (apply [{ Helper.applyUDivByConst(*${root}); }])>;
+
+def intdiv_combines : GICombineGroup<[udiv_by_const]>;
+
def reassoc_ptradd : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_PTR_ADD):$root,
@@ -669,6 +731,26 @@ def constant_fold : GICombineRule<
[{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+def mulo_by_2: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_UMULO, G_SMULO):$root,
+ [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
+def mulh_to_lshr : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_UMULH):$root,
+ [{ return Helper.matchUMulHToLShr(*${root}); }]),
+ (apply [{ Helper.applyUMulHToLShr(*${root}); }])>;
+
+def mulh_combines : GICombineGroup<[mulh_to_lshr]>;
+
+def redundant_neg_operands: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMAD, G_FMA):$root,
+ [{ return Helper.matchRedundantNegOperands(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -685,13 +767,14 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
fneg_fneg_fold, right_identity_one]>;
def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
- overlapping_and]>;
+ overlapping_and, mulo_by_2]>;
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits]>;
+ zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>;
-def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
+def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
+ narrow_binop_feeding_and]>;
def phi_combines : GICombineGroup<[extend_through_phis]>;
@@ -713,8 +796,10 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shl,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
- div_rem_to_divrem, funnel_shift_combines, form_bitfield_extract,
- constant_fold]>;
+ truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
+ form_bitfield_extract, constant_fold, fabs_fneg_fold,
+ intdiv_combines, mulh_combines, redundant_neg_operands,
+ and_or_disjoint_mask ]>;
// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 8a5052401e9b..12eee24b578f 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -144,6 +144,8 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
def : GINodeEquiv<G_ROTR, rotr>;
def : GINodeEquiv<G_ROTL, rotl>;
+def : GINodeEquiv<G_LROUND, lround>;
+def : GINodeEquiv<G_LLROUND, llround>;
def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index e9720d765167..7ae690b83770 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -306,6 +306,9 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
// the assembly matcher will provide a function to map from diagnostic types
// to message strings.
string DiagnosticString = "";
+
+ // Target-specific flags. This becomes the TSFlags field in TargetRegisterClass.
+ bits<8> TSFlags = 0;
}
// The memberList in a RegisterClass is a dag of set operations. TableGen
@@ -650,6 +653,25 @@ class Instruction : InstructionEncoding {
/// instruction selection predicates. FastISel cannot handle such cases, but
/// SelectionDAG can.
bit FastISelShouldIgnore = false;
+
+ /// HasPositionOrder: Indicate tablegen to sort the instructions by record
+ /// ID, so that instruction that is defined earlier can be sorted earlier
+ /// in the assembly matching table.
+ bit HasPositionOrder = false;
+}
+
+/// Defines a Pat match between compressed and uncompressed instruction.
+/// The relationship and helper function generation are handled by
+/// CompressInstEmitter backend.
+class CompressPat<dag input, dag output, list<Predicate> predicates = []> {
+ /// Uncompressed instruction description.
+ dag Input = input;
+ /// Compressed instruction description.
+ dag Output = output;
+ /// Predicates that must be true for this to match.
+ list<Predicate> Predicates = predicates;
+ /// Duplicate match when tied operand is just different.
+ bit isCompressOnly = false;
}
/// Defines an additional encoding that disassembles to the given instruction
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 93bfdd20e082..752032d3d04d 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegister.h"
#include <cstdint>
namespace llvm {
@@ -219,6 +220,14 @@ public:
return SupportDebugThreadLocalLocation;
}
+ /// Returns the register used as static base in RWPI variants.
+ virtual const MCRegister getStaticBase() const { return MCRegister::NoRegister; }
+
+ /// Get the target specific RWPI relocation.
+ virtual const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const {
+ return nullptr;
+ }
+
/// Get the target specific PC relative GOT entry relocation
virtual const MCExpr *getIndirectSymViaGOTPCRel(const GlobalValue *GV,
const MCSymbol *Sym,
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index dd17af4a642a..acfb265a9ff9 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -13,6 +13,7 @@
#ifndef LLVM_TARGET_TARGETMACHINE_H
#define LLVM_TARGET_TARGETMACHINE_H
+#include "llvm/ADT/Optional.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
@@ -20,9 +21,11 @@
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
+#include <utility>
namespace llvm {
@@ -110,6 +113,9 @@ protected: // Can only create subclasses.
unsigned RequireStructuredCFG : 1;
unsigned O0WantsFastISel : 1;
+ // PGO related tunables.
+ Optional<PGOOptions> PGOOption = None;
+
public:
const TargetOptions DefaultOptions;
mutable TargetOptions Options;
@@ -303,6 +309,9 @@ public:
return false;
}
+ void setPGOOption(Optional<PGOOptions> PGOOpt) { PGOOption = PGOOpt; }
+ const Optional<PGOOptions> &getPGOOption() const { return PGOOption; }
+
/// If the specified generic pointer could be assumed as a pointer to a
/// specific address space, return that address space.
///
@@ -311,6 +320,18 @@ public:
/// properties.
virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
+ /// If the specified predicate checks whether a generic pointer falls within
+ /// a specified address space, return that generic pointer and the address
+ /// space being queried.
+ ///
+ /// Such predicates could be specified in @llvm.assume intrinsics for the
+ /// optimizer to assume that the given generic pointer always falls within
+ /// the address space based on that predicate.
+ virtual std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const {
+ return std::make_pair(nullptr, -1);
+ }
+
/// Get a \c TargetIRAnalysis appropriate for the target.
///
/// This is used to construct the new pass manager's target IR analysis pass,
@@ -464,6 +485,10 @@ public:
virtual bool useIPRA() const {
return false;
}
+
+ /// The default variant to use in unqualified `asm` instructions.
+ /// If this returns 0, `asm "$(foo$|bar$)"` will evaluate to `asm "foo"`.
+ virtual int unqualifiedInlineAsmVariant() const { return 0; }
};
/// Helper method for getting the code model, returning Default if
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index e5bea9041479..912f6d1c153a 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -110,12 +110,23 @@ namespace llvm {
DisableWithDiag // Disable the abort but emit a diagnostic on failure.
};
+ /// Indicates when and how the Swift async frame pointer bit should be set.
+ enum class SwiftAsyncFramePointerMode {
+ /// Determine whether to set the bit statically or dynamically based
+ /// on the deployment target.
+ DeploymentBased,
+ /// Always set the bit.
+ Always,
+ /// Never set the bit.
+ Never,
+ };
+
class TargetOptions {
public:
TargetOptions()
: UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
NoTrappingFPMath(true), NoSignedZerosFPMath(false),
- EnableAIXExtendedAltivecABI(false),
+ ApproxFuncFPMath(false), EnableAIXExtendedAltivecABI(false),
HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
@@ -129,7 +140,7 @@ namespace llvm {
EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
EmitAddrsig(false), EmitCallSiteInfo(false),
SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
- PseudoProbeForProfiling(false), ValueTrackingVariableLocations(false),
+ ValueTrackingVariableLocations(false),
ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
DebugStrictDwarf(false),
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
@@ -172,9 +183,15 @@ namespace llvm {
/// argument or result as insignificant.
unsigned NoSignedZerosFPMath : 1;
+ /// ApproxFuncFPMath - This flag is enabled when the
+ /// -enable-approx-func-fp-math is specified on the command line. This
+ /// specifies that optimizations are allowed to substitute math functions
+ /// with approximate calculations
+ unsigned ApproxFuncFPMath : 1;
+
/// EnableAIXExtendedAltivecABI - This flag returns true when -vec-extabi is
/// specified. The code generator is then able to use both volatile and
- /// nonvolitle vector regisers. When false, the code generator only uses
+ /// nonvolitle vector registers. When false, the code generator only uses
/// volatile vector registers which is the default setting on AIX.
unsigned EnableAIXExtendedAltivecABI : 1;
@@ -219,6 +236,11 @@ namespace llvm {
/// selection fails to lower/select an instruction.
GlobalISelAbortMode GlobalISelAbort = GlobalISelAbortMode::Enable;
+ /// Control when and how the Swift async frame pointer bit should
+ /// be set.
+ SwiftAsyncFramePointerMode SwiftAsyncFramePointer =
+ SwiftAsyncFramePointerMode::Always;
+
/// UseInitArray - Use .init_array instead of .ctors for static
/// constructors.
unsigned UseInitArray : 1;
@@ -305,9 +327,6 @@ namespace llvm {
/// production.
bool ShouldEmitDebugEntryValues() const;
- /// Emit pseudo probes into the binary for sample profiling
- unsigned PseudoProbeForProfiling : 1;
-
// When set to true, use experimental new debug variable location tracking,
// which seeks to follow the values of variables rather than their location,
// post isel.
@@ -328,6 +347,9 @@ namespace llvm {
/// passed on the command line.
std::string StackUsageOutput;
+ /// If greater than 0, override TargetLoweringBase::PrefLoopAlignment.
+ unsigned LoopAlignment = 0;
+
/// FloatABIType - This setting is set by -float-abi=xxx option is specfied
/// on the command line. This setting may either be Default, Soft, or Hard.
/// Default selects the target's default behavior. Soft selects the ABI for
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 44ec2250a9c5..d8ef7c49a5f9 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -297,10 +297,6 @@ def SDTAtomicLoad : SDTypeProfile<1, 1, [
SDTCisInt<0>, SDTCisPtrTy<1>
]>;
-def SDTConvertOp : SDTypeProfile<1, 5, [ //cvtss, su, us, uu, ff, fs, fu, sf, su
- SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>, SDTCisPtrTy<4>, SDTCisPtrTy<5>
-]>;
-
class SDCallSeqStart<list<SDTypeConstraint> constraints> :
SDTypeProfile<0, 2, constraints>;
class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
@@ -1050,6 +1046,10 @@ def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = true;
let ScalarMemoryVT = i32;
}
+def extloadvf16 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+ let IsLoad = true;
+ let ScalarMemoryVT = f16;
+}
def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
let IsLoad = true;
let ScalarMemoryVT = f32;
@@ -1472,7 +1472,7 @@ def any_fsetccs : PatFrags<(ops node:$lhs, node:$rhs, node:$pred),
[(strict_fsetccs node:$lhs, node:$rhs, node:$pred),
(setcc node:$lhs, node:$rhs, node:$pred)]>;
-multiclass binary_atomic_op_ord<SDNode atomic_op> {
+multiclass binary_atomic_op_ord {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$val)> {
let IsAtomic = true;
@@ -1500,7 +1500,7 @@ multiclass binary_atomic_op_ord<SDNode atomic_op> {
}
}
-multiclass ternary_atomic_op_ord<SDNode atomic_op> {
+multiclass ternary_atomic_op_ord {
def NAME#_monotonic : PatFrag<(ops node:$ptr, node:$cmp, node:$val),
(!cast<SDPatternOperator>(NAME) node:$ptr, node:$cmp, node:$val)> {
let IsAtomic = true;
@@ -1550,10 +1550,10 @@ multiclass binary_atomic_op<SDNode atomic_op, bit IsInt = 1> {
let MemoryVT = !if(IsInt, i64, f64);
}
- defm NAME#_8 : binary_atomic_op_ord<atomic_op>;
- defm NAME#_16 : binary_atomic_op_ord<atomic_op>;
- defm NAME#_32 : binary_atomic_op_ord<atomic_op>;
- defm NAME#_64 : binary_atomic_op_ord<atomic_op>;
+ defm NAME#_8 : binary_atomic_op_ord;
+ defm NAME#_16 : binary_atomic_op_ord;
+ defm NAME#_32 : binary_atomic_op_ord;
+ defm NAME#_64 : binary_atomic_op_ord;
}
multiclass ternary_atomic_op<SDNode atomic_op> {
@@ -1578,10 +1578,10 @@ multiclass ternary_atomic_op<SDNode atomic_op> {
let MemoryVT = i64;
}
- defm NAME#_8 : ternary_atomic_op_ord<atomic_op>;
- defm NAME#_16 : ternary_atomic_op_ord<atomic_op>;
- defm NAME#_32 : ternary_atomic_op_ord<atomic_op>;
- defm NAME#_64 : ternary_atomic_op_ord<atomic_op>;
+ defm NAME#_8 : ternary_atomic_op_ord;
+ defm NAME#_16 : ternary_atomic_op_ord;
+ defm NAME#_32 : ternary_atomic_op_ord;
+ defm NAME#_64 : ternary_atomic_op_ord;
}
defm atomic_load_add : binary_atomic_op<atomic_load_add>;
diff --git a/llvm/include/llvm/TextAPI/Architecture.h b/llvm/include/llvm/TextAPI/Architecture.h
index 3cd8a3a19e96..978359995074 100644
--- a/llvm/include/llvm/TextAPI/Architecture.h
+++ b/llvm/include/llvm/TextAPI/Architecture.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
-#define LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#ifndef LLVM_TEXTAPI_ARCHITECTURE_H
+#define LLVM_TEXTAPI_ARCHITECTURE_H
#include <cstdint>
#include <utility>
@@ -54,4 +54,4 @@ raw_ostream &operator<<(raw_ostream &OS, Architecture Arch);
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURE_H
+#endif // LLVM_TEXTAPI_ARCHITECTURE_H
diff --git a/llvm/include/llvm/TextAPI/ArchitectureSet.h b/llvm/include/llvm/TextAPI/ArchitectureSet.h
index e9b374e4f69f..f17cb74c9183 100644
--- a/llvm/include/llvm/TextAPI/ArchitectureSet.h
+++ b/llvm/include/llvm/TextAPI/ArchitectureSet.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
-#define LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
+#ifndef LLVM_TEXTAPI_ARCHITECTURESET_H
+#define LLVM_TEXTAPI_ARCHITECTURESET_H
#include "llvm/TextAPI/Architecture.h"
#include <cstddef>
@@ -168,4 +168,4 @@ raw_ostream &operator<<(raw_ostream &OS, ArchitectureSet Set);
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_ARCHITECTURESET_H
+#endif // LLVM_TEXTAPI_ARCHITECTURESET_H
diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h
index d17c0c1c5b47..03a541454e1a 100644
--- a/llvm/include/llvm/TextAPI/InterfaceFile.h
+++ b/llvm/include/llvm/TextAPI/InterfaceFile.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
-#define LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
+#ifndef LLVM_TEXTAPI_INTERFACEFILE_H
+#define LLVM_TEXTAPI_INTERFACEFILE_H
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/DenseMap.h"
@@ -445,7 +445,7 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
KeyInfoT, BucketT> &RHS) {
if (LHS.size() != RHS.size())
return false;
- for (auto KV : LHS) {
+ for (const auto &KV : LHS) {
auto I = RHS.find(KV.first);
if (I == RHS.end() || *I->second != *KV.second)
return false;
@@ -456,4 +456,4 @@ bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_INTERFACEFILE_H
+#endif // LLVM_TEXTAPI_INTERFACEFILE_H
diff --git a/llvm/include/llvm/TextAPI/PackedVersion.h b/llvm/include/llvm/TextAPI/PackedVersion.h
index e3d2bd5ae2e5..24bec2ebe8fc 100644
--- a/llvm/include/llvm/TextAPI/PackedVersion.h
+++ b/llvm/include/llvm/TextAPI/PackedVersion.h
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
-#define LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
+#ifndef LLVM_TEXTAPI_PACKEDVERSION_H
+#define LLVM_TEXTAPI_PACKEDVERSION_H
#include <cstdint>
#include <utility>
@@ -64,4 +64,4 @@ inline raw_ostream &operator<<(raw_ostream &OS, const PackedVersion &Version) {
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_PACKEDVERSION_H
+#endif // LLVM_TEXTAPI_PACKEDVERSION_H
diff --git a/llvm/include/llvm/TextAPI/Platform.h b/llvm/include/llvm/TextAPI/Platform.h
index 3f052b7b8624..f7affc3ae980 100644
--- a/llvm/include/llvm/TextAPI/Platform.h
+++ b/llvm/include/llvm/TextAPI/Platform.h
@@ -9,8 +9,8 @@
// Defines the Platforms supported by Tapi and helpers.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_PLATFORM_H
-#define LLVM_TEXTAPI_MACHO_PLATFORM_H
+#ifndef LLVM_TEXTAPI_PLATFORM_H
+#define LLVM_TEXTAPI_PLATFORM_H
#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -46,4 +46,4 @@ std::string getOSAndEnvironmentName(PlatformKind Platform,
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_PLATFORM_H
+#endif // LLVM_TEXTAPI_PLATFORM_H
diff --git a/llvm/include/llvm/TextAPI/Symbol.h b/llvm/include/llvm/TextAPI/Symbol.h
index 02f184d2502f..dfc84908bba2 100644
--- a/llvm/include/llvm/TextAPI/Symbol.h
+++ b/llvm/include/llvm/TextAPI/Symbol.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_SYMBOL_H
-#define LLVM_TEXTAPI_MACHO_SYMBOL_H
+#ifndef LLVM_TEXTAPI_SYMBOL_H
+#define LLVM_TEXTAPI_SYMBOL_H
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StringRef.h"
@@ -132,4 +132,4 @@ private:
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_SYMBOL_H
+#endif // LLVM_TEXTAPI_SYMBOL_H
diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h
index 53f56a6ee7b0..c2588b9d5a21 100644
--- a/llvm/include/llvm/TextAPI/Target.h
+++ b/llvm/include/llvm/TextAPI/Target.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_TARGET_H
-#define LLVM_TEXTAPI_MACHO_TARGET_H
+#ifndef LLVM_TEXTAPI_TARGET_H
+#define LLVM_TEXTAPI_TARGET_H
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Error.h"
@@ -67,4 +67,4 @@ raw_ostream &operator<<(raw_ostream &OS, const Target &Target);
} // namespace MachO
} // namespace llvm
-#endif // LLVM_TEXTAPI_MACHO_TARGET_H
+#endif // LLVM_TEXTAPI_TARGET_H
diff --git a/llvm/include/llvm/TextAPI/TextAPIReader.h b/llvm/include/llvm/TextAPI/TextAPIReader.h
index a403bab8465d..389335312a74 100644
--- a/llvm/include/llvm/TextAPI/TextAPIReader.h
+++ b/llvm/include/llvm/TextAPI/TextAPIReader.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
-#define LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
+#ifndef LLVM_TEXTAPI_TEXTAPIREADER_H
+#define LLVM_TEXTAPI_TEXTAPIREADER_H
#include "llvm/Support/Error.h"
@@ -30,4 +30,4 @@ public:
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_TEXTAPIREADER_H
+#endif // LLVM_TEXTAPI_TEXTAPIREADER_H
diff --git a/llvm/include/llvm/TextAPI/TextAPIWriter.h b/llvm/include/llvm/TextAPI/TextAPIWriter.h
index 763805168ae6..f9857a806f60 100644
--- a/llvm/include/llvm/TextAPI/TextAPIWriter.h
+++ b/llvm/include/llvm/TextAPI/TextAPIWriter.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
-#define LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
+#ifndef LLVM_TEXTAPI_TEXTAPIWRITER_H
+#define LLVM_TEXTAPI_TEXTAPIWRITER_H
namespace llvm {
@@ -28,4 +28,4 @@ public:
} // end namespace MachO.
} // end namespace llvm.
-#endif // LLVM_TEXTAPI_MACHO_TEXTAPIWRITER_H
+#endif // LLVM_TEXTAPI_TEXTAPIWRITER_H
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c93b8adcc890..d4cbc9bd20b7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -101,6 +101,7 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
@@ -591,7 +592,7 @@ struct IRPosition {
LLVMContext &Ctx = getAnchorValue().getContext();
for (Attribute::AttrKind AK : AKs)
- AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
+ AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK);
if (CB)
CB->setAttributes(AttrList);
@@ -1150,8 +1151,6 @@ struct Attributor {
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions.
/// \param RewriteSignatures Whether to rewrite function signatures.
- /// \param MaxFixedPointIterations Maximum number of iterations to run until
- /// fixpoint.
Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
CallGraphUpdater &CGUpdater,
DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
@@ -1169,8 +1168,9 @@ struct Attributor {
/// \param CGUpdater Helper to update an underlying call graph.
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions
- /// \param MaxFixedPointIterations Maximum number of iterations to run until
- /// fixpoint.
+ /// \param RewriteSignatures Whether to rewrite function signatures.
+ /// \param MaxFixpointIterations Maximum number of iterations to run until
+ /// fixpoint.
/// \param OREGetter A callback function that returns an ORE object from a
/// Function pointer.
/// \param PassName The name of the pass emitting remarks.
@@ -1855,6 +1855,10 @@ public:
///
static void createShallowWrapper(Function &F);
+ /// Returns true if the function \p F can be internalized. i.e. it has a
+ /// compatible linkage.
+ static bool isInternalizable(Function &F);
+
/// Make another copy of the function \p F such that the copied version has
/// internal linkage afterwards and can be analysed. Then we replace all uses
/// of the original function to the copied one
@@ -1870,6 +1874,22 @@ public:
/// null pointer.
static Function *internalizeFunction(Function &F, bool Force = false);
+ /// Make copies of each function in the set \p FnSet such that the copied
+ /// version has internal linkage afterwards and can be analysed. Then we
+ /// replace all uses of the original function to the copied one. The map
+ /// \p FnMap contains a mapping of functions to their internalized versions.
+ ///
+ /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
+ /// linkage can be internalized because these linkages guarantee that other
+ /// definitions with the same name have the same semantics as this one.
+ ///
+ /// This version will internalize all the functions in the set \p FnSet at
+ /// once and then replace the uses. This prevents internalized functions being
+ /// called by external functions when there is an internalized version in the
+ /// module.
+ static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+ DenseMap<Function *, Function *> &FnMap);
+
/// Return the data layout associated with the anchor scope.
const DataLayout &getDataLayout() const { return InfoCache.DL; }
@@ -2492,6 +2512,139 @@ struct IntegerRangeState : public AbstractState {
return *this;
}
};
+
+/// Simple state for a set.
+///
+/// This represents a state containing a set of values. The interface supports
+/// modelling sets that contain all possible elements. The state's internal
+/// value is modified using union or intersection operations.
+template <typename BaseTy> struct SetState : public AbstractState {
+ /// A wrapper around a set that has semantics for handling unions and
+ /// intersections with a "universal" set that contains all elements.
+ struct SetContents {
+ /// Creates a universal set with no concrete elements or an empty set.
+ SetContents(bool Universal) : Universal(Universal) {}
+
+ /// Creates a non-universal set with concrete values.
+ SetContents(const DenseSet<BaseTy> &Assumptions)
+ : Universal(false), Set(Assumptions) {}
+
+ SetContents(bool Universal, const DenseSet<BaseTy> &Assumptions)
+ : Universal(Universal), Set(Assumptions) {}
+
+ const DenseSet<BaseTy> &getSet() const { return Set; }
+
+ bool isUniversal() const { return Universal; }
+
+ bool empty() const { return Set.empty() && !Universal; }
+
+ /// Finds A := A ^ B where A or B could be the "Universal" set which
+ /// contains every possible attribute. Returns true if changes were made.
+ bool getIntersection(const SetContents &RHS) {
+ bool IsUniversal = Universal;
+ unsigned Size = Set.size();
+
+ // A := A ^ U = A
+ if (RHS.isUniversal())
+ return false;
+
+ // A := U ^ B = B
+ if (Universal)
+ Set = RHS.getSet();
+ else
+ set_intersect(Set, RHS.getSet());
+
+ Universal &= RHS.isUniversal();
+ return IsUniversal != Universal || Size != Set.size();
+ }
+
+ /// Finds A := A u B where A or B could be the "Universal" set which
+ /// contains every possible attribute. returns true if changes were made.
+ bool getUnion(const SetContents &RHS) {
+ bool IsUniversal = Universal;
+ unsigned Size = Set.size();
+
+ // A := A u U = U = U u B
+ if (!RHS.isUniversal() && !Universal)
+ set_union(Set, RHS.getSet());
+
+ Universal |= RHS.isUniversal();
+ return IsUniversal != Universal || Size != Set.size();
+ }
+
+ private:
+ /// Indicates if this set is "universal", containing every possible element.
+ bool Universal;
+
+ /// The set of currently active assumptions.
+ DenseSet<BaseTy> Set;
+ };
+
+ SetState() : Known(false), Assumed(true), IsAtFixedpoint(false) {}
+
+ /// Initializes the known state with an initial set and initializes the
+ /// assumed state as universal.
+ SetState(const DenseSet<BaseTy> &Known)
+ : Known(Known), Assumed(true), IsAtFixedpoint(false) {}
+
+ /// See AbstractState::isValidState()
+ bool isValidState() const override { return !Assumed.empty(); }
+
+ /// See AbstractState::isAtFixpoint()
+ bool isAtFixpoint() const override { return IsAtFixedpoint; }
+
+ /// See AbstractState::indicateOptimisticFixpoint(...)
+ ChangeStatus indicateOptimisticFixpoint() override {
+ IsAtFixedpoint = true;
+ Known = Assumed;
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractState::indicatePessimisticFixpoint(...)
+ ChangeStatus indicatePessimisticFixpoint() override {
+ IsAtFixedpoint = true;
+ Assumed = Known;
+ return ChangeStatus::CHANGED;
+ }
+
+ /// Return the known state encoding.
+ const SetContents &getKnown() const { return Known; }
+
+ /// Return the assumed state encoding.
+ const SetContents &getAssumed() const { return Assumed; }
+
+ /// Returns if the set state contains the element.
+ bool setContains(const BaseTy &Elem) const {
+ return Assumed.getSet().contains(Elem) || Known.getSet().contains(Elem);
+ }
+
+ /// Performs the set intersection between this set and \p RHS. Returns true if
+ /// changes were made.
+ bool getIntersection(const SetContents &RHS) {
+ unsigned SizeBefore = Assumed.getSet().size();
+
+ // Get intersection and make sure that the known set is still a proper
+ // subset of the assumed set. A := K u (A ^ R).
+ Assumed.getIntersection(RHS);
+ Assumed.getUnion(Known);
+
+ return SizeBefore != Assumed.getSet().size();
+ }
+
+ /// Performs the set union between this set and \p RHS. Returns true if
+ /// changes were made.
+ bool getUnion(const SetContents &RHS) { return Assumed.getUnion(RHS); }
+
+private:
+ /// The set of values known for this state.
+ SetContents Known;
+
+ /// The set of assumed values for this state.
+ SetContents Assumed;
+
+ bool IsAtFixedpoint;
+};
+
/// Helper struct necessary as the modular build fails if the virtual method
/// IRAttribute::manifest is defined in the Attributor.cpp.
struct IRAttributeManifest {
@@ -3394,7 +3547,7 @@ struct AADereferenceable
};
using AAAlignmentStateType =
- IncIntegerState<uint32_t, Value::MaximumAlignment, 1>;
+ IncIntegerState<uint64_t, Value::MaximumAlignment, 1>;
/// An abstract interface for all align attributes.
struct AAAlign : public IRAttribute<
Attribute::Alignment,
@@ -3402,10 +3555,10 @@ struct AAAlign : public IRAttribute<
AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return assumed alignment.
- unsigned getAssumedAlign() const { return getAssumed(); }
+ uint64_t getAssumedAlign() const { return getAssumed(); }
/// Return known alignment.
- unsigned getKnownAlign() const { return getKnown(); }
+ uint64_t getKnownAlign() const { return getKnown(); }
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAlign"; }
@@ -3776,7 +3929,7 @@ struct AAMemoryLocation
/// Return true if we assume that the associated functions has no observable
/// accesses.
bool isAssumedReadNone() const {
- return isAssumed(NO_LOCATIONS) | isAssumedStackOnly();
+ return isAssumed(NO_LOCATIONS) || isAssumedStackOnly();
}
/// Return true if we know that the associated functions has at most
@@ -3920,19 +4073,19 @@ struct AAValueConstantRange
static AAValueConstantRange &createForPosition(const IRPosition &IRP,
Attributor &A);
- /// Return an assumed range for the assocaited value a program point \p CtxI.
+ /// Return an assumed range for the associated value a program point \p CtxI.
/// If \p I is nullptr, simply return an assumed range.
virtual ConstantRange
getAssumedConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const = 0;
- /// Return a known range for the assocaited value at a program point \p CtxI.
+ /// Return a known range for the associated value at a program point \p CtxI.
/// If \p I is nullptr, simply return a known range.
virtual ConstantRange
getKnownConstantRange(Attributor &A,
const Instruction *CtxI = nullptr) const = 0;
- /// Return an assumed constant for the assocaited value a program point \p
+ /// Return an assumed constant for the associated value a program point \p
/// CtxI.
Optional<ConstantInt *>
getAssumedConstantInt(Attributor &A,
@@ -4435,6 +4588,9 @@ struct AAFunctionReachability
/// If the function represented by this possition can reach \p Fn.
virtual bool canReach(Attributor &A, Function *Fn) const = 0;
+ /// Can \p CB reach \p Fn
+ virtual bool canReach(Attributor &A, CallBase &CB, Function *Fn) const = 0;
+
/// Create an abstract attribute view for the position \p IRP.
static AAFunctionReachability &createForPosition(const IRPosition &IRP,
Attributor &A);
@@ -4587,6 +4743,40 @@ struct AAPointerInfo : public AbstractAttribute {
static const char ID;
};
+/// An abstract attribute for getting assumption information.
+struct AAAssumptionInfo
+ : public StateWrapper<SetState<StringRef>, AbstractAttribute,
+ DenseSet<StringRef>> {
+ using Base =
+ StateWrapper<SetState<StringRef>, AbstractAttribute, DenseSet<StringRef>>;
+
+ AAAssumptionInfo(const IRPosition &IRP, Attributor &A,
+ const DenseSet<StringRef> &Known)
+ : Base(IRP, Known) {}
+
+ /// Returns true if the assumption set contains the assumption \p Assumption.
+ virtual bool hasAssumption(const StringRef Assumption) const = 0;
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAssumptionInfo &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAAssumptionInfo"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAssumptionInfo
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
/// Run options, used by the pass manager.
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index ce61eea05c79..0b6734a3929d 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -17,6 +17,7 @@
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
@@ -38,6 +39,13 @@ enum MemoryAccessKind {
/// Returns the memory access properties of this copy of the function.
MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR);
+/// Propagate function attributes for function summaries along the index's
+/// callgraph during thinlink
+bool thinLTOPropagateFunctionAttrs(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing);
+
/// Computes function attributes in post-order over the call graph.
///
/// By operating in post-order, this pass computes precise attributes for
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index aad938d48570..c5bafb89fcb5 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -167,16 +167,24 @@ void ComputeCrossModuleImportForModuleFromIndex(
FunctionImporter::ImportMapTy &ImportList);
/// PrevailingType enum used as a return type of callback passed
-/// to computeDeadSymbols. Yes and No values used when status explicitly
-/// set by symbols resolution, otherwise status is Unknown.
+/// to computeDeadSymbolsAndUpdateIndirectCalls. Yes and No values used when
+/// status explicitly set by symbols resolution, otherwise status is Unknown.
enum class PrevailingType { Yes, No, Unknown };
+/// Update call edges for indirect calls to local functions added from
+/// SamplePGO when needed. Normally this is done during
+/// computeDeadSymbolsAndUpdateIndirectCalls, but can be called standalone
+/// when that is not called (e.g. during testing).
+void updateIndirectCalls(ModuleSummaryIndex &Index);
+
/// Compute all the symbols that are "dead": i.e these that can't be reached
/// in the graph from any of the given symbols listed in
/// \p GUIDPreservedSymbols. Non-prevailing symbols are symbols without a
/// prevailing copy anywhere in IR and are normally dead, \p isPrevailing
/// predicate returns status of symbol.
-void computeDeadSymbols(
+/// Also update call edges for indirect calls to local functions added from
+/// SamplePGO when needed.
+void computeDeadSymbolsAndUpdateIndirectCalls(
ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing);
@@ -214,12 +222,15 @@ std::error_code EmitImportsFiles(
StringRef ModulePath, StringRef OutputFilename,
const std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
-/// Resolve prevailing symbol linkages and constrain visibility (1. CanAutoHide,
-/// 2. consider visibility from other definitions for ELF) in \p TheModule based
-/// on the information recorded in the summaries during global summary-based
-/// analysis.
-void thinLTOResolvePrevailingInModule(Module &TheModule,
- const GVSummaryMapTy &DefinedGlobals);
+/// Based on the information recorded in the summaries during global
+/// summary-based analysis:
+/// 1. Resolve prevailing symbol linkages and constrain visibility (CanAutoHide
+/// and consider visibility from other definitions for ELF) in \p TheModule
+/// 2. (optional) Apply propagated function attributes to \p TheModule if
+/// PropagateAttrs is true
+void thinLTOFinalizeInModule(Module &TheModule,
+ const GVSummaryMapTy &DefinedGlobals,
+ bool PropagateAttrs);
/// Internalize \p TheModule based on the information recorded in the summaries
/// during global summary-based analysis.
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 442a8ec1d2e2..110c0b4dcf16 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -86,6 +86,15 @@ struct OutlinableRegion {
DenseMap<unsigned, unsigned> ExtractedArgToAgg;
DenseMap<unsigned, unsigned> AggArgToExtracted;
+ /// Marks whether we need to change the order of the arguments when mapping
+ /// the old extracted function call to the new aggregate outlined function
+ /// call.
+ bool ChangedArgOrder = false;
+
+ /// Marks whether this region ends in a branch, there is special handling
+ /// required for the following basic blocks in this case.
+ bool EndsInBranch = false;
+
/// Mapping of the argument number in the deduplicated function
/// to a given constant, which is used when creating the arguments to the call
/// to the newly created deduplicated function. This is handled separately
@@ -147,6 +156,14 @@ struct OutlinableRegion {
/// containing the called function.
void reattachCandidate();
+ /// Find a corresponding value for \p V in similar OutlinableRegion \p Other.
+ ///
+ /// \param Other [in] - The OutlinableRegion to find the corresponding Value
+ /// in.
+ /// \param V [in] - The Value to look for in the other region.
+ /// \return The corresponding Value to \p V if it exists, otherwise nullptr.
+ Value *findCorrespondingValueIn(const OutlinableRegion &Other, Value *V);
+
/// Get the size of the code removed from the region.
///
/// \param [in] TTI - The TargetTransformInfo for the parent function.
@@ -176,6 +193,16 @@ private:
/// \returns The number of Functions created.
unsigned doOutline(Module &M);
+ /// Check whether an OutlinableRegion is incompatible with code already
+ /// outlined. OutlinableRegions are incomptaible when there are overlapping
+ /// instructions, or code that has not been recorded has been added to the
+ /// instructions.
+ ///
+ /// \param [in] Region - The OutlinableRegion to check for conflicts with
+ /// already outlined code.
+ /// \returns whether the region can safely be outlined.
+ bool isCompatibleWithAlreadyOutlinedCode(const OutlinableRegion &Region);
+
/// Remove all the IRSimilarityCandidates from \p CandidateVec that have
/// instructions contained in a previously outlined region and put the
/// remaining regions in \p CurrentGroup.
@@ -301,8 +328,9 @@ private:
struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
InstructionAllowed() {}
- // TODO: Determine a scheme to resolve when the label is similar enough.
- bool visitBranchInst(BranchInst &BI) { return false; }
+ bool visitBranchInst(BranchInst &BI) {
+ return EnableBranches;
+ }
// TODO: Determine a scheme to resolve when the labels are similar enough.
bool visitPHINode(PHINode &PN) { return false; }
// TODO: Handle allocas.
@@ -341,6 +369,10 @@ private:
// TODO: Handle interblock similarity.
bool visitTerminator(Instruction &I) { return false; }
bool visitInstruction(Instruction &I) { return true; }
+
+ // The flag variable that marks whether we should allow branch instructions
+ // to be outlined.
+ bool EnableBranches = false;
};
/// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
index 23a39d7f2e2b..a7060943c4c0 100644
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -14,7 +14,6 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
#include "llvm/IR/PassManager.h"
#include <utility>
@@ -103,6 +102,9 @@ public:
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
private:
InlineAdvisor &getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M);
@@ -130,17 +132,27 @@ public:
/// before run is called, as part of pass pipeline building.
CGSCCPassManager &getPM() { return PM; }
- /// Allow adding module-level passes benefiting the contained CGSCC passes.
+ /// Add a module pass that runs before the CGSCC passes.
template <class T> void addModulePass(T Pass) {
MPM.addPass(std::move(Pass));
}
+ /// Add a module pass that runs after the CGSCC passes.
+ template <class T> void addLateModulePass(T Pass) {
+ AfterCGMPM.addPass(std::move(Pass));
+ }
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
private:
const InlineParams Params;
const InliningAdvisorMode Mode;
const unsigned MaxDevirtIterations;
+ // TODO: Clean this up so we only have one ModulePassManager.
CGSCCPassManager PM;
ModulePassManager MPM;
+ ModulePassManager AfterCGMPM;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
index def3c5943919..aa697484d0e9 100644
--- a/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
+++ b/llvm/include/llvm/Transforms/IPO/LoopExtractor.h
@@ -23,6 +23,8 @@ namespace llvm {
struct LoopExtractorPass : public PassInfoMixin<LoopExtractorPass> {
LoopExtractorPass(unsigned NumLoops = ~0) : NumLoops(NumLoops) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
private:
unsigned NumLoops;
diff --git a/llvm/include/llvm/Transforms/IPO/ModuleInliner.h b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
new file mode 100644
index 000000000000..963d74d71003
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/ModuleInliner.h
@@ -0,0 +1,51 @@
+//===- ModuleInliner.h - Module level Inliner pass --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+#define LLVM_TRANSFORMS_IPO_MODULEINLINER_H
+
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/IR/PassManager.h"
+#include <utility>
+
+namespace llvm {
+
+class AssumptionCacheTracker;
+class ProfileSummaryInfo;
+
+/// The module inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a module pass. Different from SCC inliner, it considers every
+/// call in every function in the whole module and tries to inline if
+/// profitable. With this module level inliner, it is possible to evaluate more
+/// heuristics in the module level such like PriorityInlineOrder. It can be
+/// tuned with a number of parameters to control what cost model is used and
+/// what tradeoffs are made when making the decision.
+class ModuleInlinerPass : public PassInfoMixin<ModuleInlinerPass> {
+public:
+ ModuleInlinerPass(InlineParams Params = getInlineParams(),
+ InliningAdvisorMode Mode = InliningAdvisorMode::Default)
+ : Params(Params), Mode(Mode){};
+ ModuleInlinerPass(ModuleInlinerPass &&Arg) = default;
+
+ PreservedAnalyses run(Module &, ModuleAnalysisManager &);
+
+private:
+ InlineAdvisor &getAdvisor(const ModuleAnalysisManager &MAM,
+ FunctionAnalysisManager &FAM, Module &M);
+ std::unique_ptr<InlineAdvisor> OwnedAdvisor;
+ const InlineParams Params;
+ const InliningAdvisorMode Mode;
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_MODULEINLINER_H
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 4f941d26df4c..7f321a688aff 100644
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -154,7 +154,6 @@ public:
/// tests.
const ModuleSummaryIndex *ImportSummary = nullptr;
- bool DisableTailCalls;
bool DisableUnrollLoops;
bool CallGraphProfile;
bool SLPVectorize;
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 0adaa1b16d54..6e45f8f6fb05 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H
-#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDCALLGRAPH_H
+#ifndef LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
+#define LLVM_TRANSFORMS_IPO_PROFILEDCALLGRAPH_H
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/StringMap.h"
@@ -42,7 +42,7 @@ public:
using iterator = std::set<ProfiledCallGraphNode *>::iterator;
// Constructor for non-CS profile.
- ProfiledCallGraph(StringMap<FunctionSamples> &ProfileMap) {
+ ProfiledCallGraph(SampleProfileMap &ProfileMap) {
assert(!FunctionSamples::ProfileIsCS && "CS profile is not handled here");
for (const auto &Samples : ProfileMap) {
addProfiledCalls(Samples.second);
@@ -56,7 +56,7 @@ public:
std::queue<ContextTrieNode *> Queue;
for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) {
ContextTrieNode *Callee = &Child.second;
- addProfiledFunction(Callee->getFuncName());
+ addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
}
@@ -72,9 +72,10 @@ public:
// context-based one, which may in turn block context-based inlining.
for (auto &Child : Caller->getAllChildContext()) {
ContextTrieNode *Callee = &Child.second;
- addProfiledFunction(Callee->getFuncName());
+ addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
- addProfiledCall(Caller->getFuncName(), Callee->getFuncName());
+ addProfiledCall(ContextTracker.getFuncNameFor(Caller),
+ ContextTracker.getFuncNameFor(Callee));
}
}
}
diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 94f7796298db..5d80da407d7e 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -42,31 +42,34 @@ public:
: ParentContext(Parent), FuncName(FName), FuncSamples(FSamples),
CallSiteLoc(CallLoc){};
ContextTrieNode *getChildContext(const LineLocation &CallSite,
- StringRef CalleeName);
+ StringRef ChildName);
ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
- StringRef CalleeName,
+ StringRef ChildName,
bool AllowCreate = true);
ContextTrieNode &moveToChildContext(const LineLocation &CallSite,
ContextTrieNode &&NodeToMove,
- StringRef ContextStrToRemove,
+ uint32_t ContextFramesToRemove,
bool DeleteNode = true);
- void removeChildContext(const LineLocation &CallSite, StringRef CalleeName);
- std::map<uint32_t, ContextTrieNode> &getAllChildContext();
+ void removeChildContext(const LineLocation &CallSite, StringRef ChildName);
+ std::map<uint64_t, ContextTrieNode> &getAllChildContext();
StringRef getFuncName() const;
FunctionSamples *getFunctionSamples() const;
void setFunctionSamples(FunctionSamples *FSamples);
+ Optional<uint32_t> getFunctionSize() const;
+ void addFunctionSize(uint32_t FSize);
LineLocation getCallSiteLoc() const;
ContextTrieNode *getParentContext() const;
void setParentContext(ContextTrieNode *Parent);
- void dump();
+ void dumpNode();
+ void dumpTree();
private:
- static uint32_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
+ static uint64_t nodeHash(StringRef ChildName, const LineLocation &Callsite);
// Map line+discriminator location to child context
- std::map<uint32_t, ContextTrieNode> AllChildContext;
+ std::map<uint64_t, ContextTrieNode> AllChildContext;
// Link to parent context node
ContextTrieNode *ParentContext;
@@ -77,6 +80,9 @@ private:
// Function Samples for current context
FunctionSamples *FuncSamples;
+ // Function size for current context
+ Optional<uint32_t> FuncSize;
+
// Callsite location in parent context
LineLocation CallSiteLoc;
};
@@ -90,9 +96,22 @@ private:
// calling context and the context is identified by path from root to the node.
class SampleContextTracker {
public:
- using ContextSamplesTy = SmallVector<FunctionSamples *, 16>;
-
- SampleContextTracker(StringMap<FunctionSamples> &Profiles);
+ struct ProfileComparer {
+ bool operator()(FunctionSamples *A, FunctionSamples *B) const {
+ // Sort function profiles by the number of total samples and their
+ // contexts.
+ if (A->getTotalSamples() == B->getTotalSamples())
+ return A->getContext() < B->getContext();
+ return A->getTotalSamples() > B->getTotalSamples();
+ }
+ };
+
+ // Keep profiles of a function sorted so that they will be processed/promoted
+ // deterministically.
+ using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>;
+
+ SampleContextTracker(SampleProfileMap &Profiles,
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap);
// Query context profile for a specific callee with given name at a given
// call-site. The full context is identified by location of call instruction.
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
@@ -116,6 +135,8 @@ public:
FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true);
// Retrieve the context trie node for given profile context
ContextTrieNode *getContextFor(const SampleContext &Context);
+ // Get real function name for a given trie node.
+ StringRef getFuncNameFor(ContextTrieNode *Node) const;
// Mark a context profile as inlined when function is inlined.
// This makes sure that inlined context profile will be excluded in
// function's base profile.
@@ -136,14 +157,18 @@ private:
ContextTrieNode &addTopLevelContextNode(StringRef FName);
ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &NodeToPromo);
void mergeContextNode(ContextTrieNode &FromNode, ContextTrieNode &ToNode,
- StringRef ContextStrToRemove);
- ContextTrieNode &promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
- ContextTrieNode &ToNodeParent,
- StringRef ContextStrToRemove);
+ uint32_t ContextFramesToRemove);
+ ContextTrieNode &
+ promoteMergeContextSamplesTree(ContextTrieNode &FromNode,
+ ContextTrieNode &ToNodeParent,
+ uint32_t ContextFramesToRemove);
// Map from function name to context profiles (excluding base profile)
StringMap<ContextSamplesTy> FuncToCtxtProfiles;
+ // Map from function guid to real function names. Only used in md5 mode.
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap;
+
// Root node for context trie tree
ContextTrieNode RootContext;
};
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index af6d2a18a25a..6dee38c83b36 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -18,12 +18,14 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
namespace llvm {
class InstCombinePass : public PassInfoMixin<InstCombinePass> {
- InstCombineWorklist Worklist;
+ InstructionWorklist Worklist;
const unsigned MaxIterations;
public:
@@ -38,7 +40,7 @@ public:
/// This is a basic whole-function wrapper around the instcombine utility. It
/// will try to combine all instructions in the function.
class InstructionCombiningPass : public FunctionPass {
- InstCombineWorklist Worklist;
+ InstructionWorklist Worklist;
const unsigned MaxIterations;
public:
@@ -67,4 +69,6 @@ FunctionPass *createInstructionCombiningPass();
FunctionPass *createInstructionCombiningPass(unsigned MaxIterations);
}
+#undef DEBUG_TYPE
+
#endif
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index ba0d41f9b748..c6aee439b5a0 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -25,10 +25,10 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include <cassert>
#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
namespace llvm {
@@ -43,7 +43,9 @@ class TargetTransformInfo;
/// This class provides both the logic to recursively visit instructions and
/// combine them.
class LLVM_LIBRARY_VISIBILITY InstCombiner {
- /// Only used to call target specific inst combining.
+ /// Only used to call target specific intrinsic combining.
+ /// It must **NOT** be used for any other purpose, as InstCombine is a
+ /// target-independent canonicalization transform.
TargetTransformInfo &TTI;
public:
@@ -57,7 +59,7 @@ public:
protected:
/// A worklist of the instructions that need to be simplified.
- InstCombineWorklist &Worklist;
+ InstructionWorklist &Worklist;
// Mode in which we are running the combiner.
const bool MinimizeSize;
@@ -81,7 +83,7 @@ protected:
bool MadeIRChange = false;
public:
- InstCombiner(InstCombineWorklist &Worklist, BuilderTy &Builder,
+ InstCombiner(InstructionWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
@@ -165,16 +167,16 @@ public:
switch (Pred) {
case ICmpInst::ICMP_SLT: // True if LHS s< 0
TrueIfSigned = true;
- return RHS.isNullValue();
+ return RHS.isZero();
case ICmpInst::ICMP_SLE: // True if LHS s<= -1
TrueIfSigned = true;
- return RHS.isAllOnesValue();
+ return RHS.isAllOnes();
case ICmpInst::ICMP_SGT: // True if LHS s> -1
TrueIfSigned = false;
- return RHS.isAllOnesValue();
+ return RHS.isAllOnes();
case ICmpInst::ICMP_SGE: // True if LHS s>= 0
TrueIfSigned = false;
- return RHS.isNullValue();
+ return RHS.isZero();
case ICmpInst::ICMP_UGT:
// True if LHS u> RHS and RHS == sign-bit-mask - 1
TrueIfSigned = true;
@@ -246,12 +248,13 @@ public:
// If `V` is of the form `A + Constant` then `-1 - V` can be folded into
// `(-1 - Constant) - A` if we are willing to invert all of the uses.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
- if (BO->getOpcode() == Instruction::Add ||
- BO->getOpcode() == Instruction::Sub)
- if (match(BO, PatternMatch::m_c_BinOp(PatternMatch::m_Value(),
- PatternMatch::m_ImmConstant())))
- return WillInvertAllUses;
+ if (match(V, m_Add(PatternMatch::m_Value(), PatternMatch::m_ImmConstant())))
+ return WillInvertAllUses;
+
+ // If `V` is of the form `Constant - A` then `-1 - V` can be folded into
+ // `A + (-1 - Constant)` if we are willing to invert all of the uses.
+ if (match(V, m_Sub(PatternMatch::m_ImmConstant(), PatternMatch::m_Value())))
+ return WillInvertAllUses;
// Selects with invertible operands are freely invertible
if (match(V,
@@ -259,6 +262,12 @@ public:
m_Not(PatternMatch::m_Value()))))
return WillInvertAllUses;
+ // Min/max may be in the form of intrinsics, so handle those identically
+ // to select patterns.
+ if (match(V, m_MaxOrMin(m_Not(PatternMatch::m_Value()),
+ m_Not(PatternMatch::m_Value()))))
+ return WillInvertAllUses;
+
return false;
}
@@ -354,14 +363,6 @@ public:
return ConstantVector::get(Out);
}
- /// Create and insert the idiom we use to indicate a block is unreachable
- /// without having to rewrite the CFG from within InstCombine.
- static void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
- auto &Ctx = InsertAt->getContext();
- new StoreInst(ConstantInt::getTrue(Ctx),
- UndefValue::get(Type::getInt1PtrTy(Ctx)), InsertAt);
- }
-
void addToWorklist(Instruction *I) { Worklist.push(I); }
AssumptionCache &getAssumptionCache() const { return AC; }
@@ -479,6 +480,11 @@ public:
return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
}
+ unsigned ComputeMinSignedBits(const Value *Op, unsigned Depth = 0,
+ const Instruction *CxtI = nullptr) const {
+ return llvm::ComputeMinSignedBits(Op, DL, Depth, &AC, CxtI, &DT);
+ }
+
OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
const Value *RHS,
const Instruction *CxtI) const {
diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h
index 03108bacb0da..a288a3972c3d 100644
--- a/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation.h
@@ -78,7 +78,7 @@ struct GCOVOptions {
ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
GCOVOptions::getDefault());
-// PGO Instrumention. Parameter IsCS indicates if this is the context senstive
+// PGO Instrumention. Parameter IsCS indicates if this is the context sensitive
// instrumentation.
ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false);
ModulePass *
@@ -138,7 +138,7 @@ struct InstrProfOptions {
};
/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if
-// this is the context senstive instrumentation.
+// this is the context sensitive instrumentation.
ModulePass *createInstrProfilingLegacyPass(
const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false);
@@ -169,6 +169,8 @@ struct SanitizerCoverageOptions {
bool PCTable = false;
bool NoPrune = false;
bool StackDepth = false;
+ bool TraceLoads = false;
+ bool TraceStores = false;
SanitizerCoverageOptions() = default;
};
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
index 3781253d2694..c13407a44091 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h
@@ -1,9 +1,8 @@
//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -90,6 +89,14 @@ private:
static AnalysisKey Key;
};
+struct AddressSanitizerOptions {
+ bool CompileKernel = false;
+ bool Recover = false;
+ bool UseAfterScope = false;
+ AsanDetectStackUseAfterReturnMode UseAfterReturn =
+ AsanDetectStackUseAfterReturnMode::Runtime;
+};
+
/// Public interface to the address sanitizer pass for instrumenting code to
/// check for various memory errors at runtime.
///
@@ -99,19 +106,15 @@ private:
/// surrounding requested memory to be checked for invalid accesses.
class AddressSanitizerPass : public PassInfoMixin<AddressSanitizerPass> {
public:
- explicit AddressSanitizerPass(
- bool CompileKernel = false, bool Recover = false,
- bool UseAfterScope = false,
- AsanDetectStackUseAfterReturnMode UseAfterReturn =
- AsanDetectStackUseAfterReturnMode::Runtime);
+ AddressSanitizerPass(const AddressSanitizerOptions &Options)
+ : Options(Options){};
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
private:
- bool CompileKernel;
- bool Recover;
- bool UseAfterScope;
- AsanDetectStackUseAfterReturnMode UseAfterReturn;
+ AddressSanitizerOptions Options;
};
/// Public interface to the address sanitizer module pass for instrumenting code
@@ -122,16 +125,17 @@ private:
class ModuleAddressSanitizerPass
: public PassInfoMixin<ModuleAddressSanitizerPass> {
public:
- explicit ModuleAddressSanitizerPass(
- bool CompileKernel = false, bool Recover = false, bool UseGlobalGC = true,
+ ModuleAddressSanitizerPass(
+ const AddressSanitizerOptions &Options, bool UseGlobalGC = true,
bool UseOdrIndicator = false,
AsanDtorKind DestructorKind = AsanDtorKind::Global);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
private:
- bool CompileKernel;
- bool Recover;
+ AddressSanitizerOptions Options;
bool UseGlobalGC;
bool UseOdrIndicator;
AsanDtorKind DestructorKind;
@@ -148,6 +152,16 @@ ModulePass *createModuleAddressSanitizerLegacyPassPass(
bool UseOdrIndicator = true,
AsanDtorKind DestructorKind = AsanDtorKind::Global);
+struct ASanAccessInfo {
+ const int32_t Packed;
+ const uint8_t AccessSizeIndex;
+ const bool IsWrite;
+ const bool CompileKernel;
+
+ explicit ASanAccessInfo(int32_t Packed);
+ ASanAccessInfo(bool IsWrite, bool CompileKernel, uint8_t AccessSizeIndex);
+};
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 0228992af874..6c351e3f8e1f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -1,9 +1,8 @@
//===--------- Definition of the AddressSanitizer class ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
namespace llvm {
@@ -26,7 +26,6 @@ class InterestingMemoryOperand {
public:
Use *PtrUse;
bool IsWrite;
- Type *OpType;
uint64_t TypeSize;
MaybeAlign Alignment;
// The mask Value, if we're looking at a masked load/store.
@@ -35,8 +34,7 @@ public:
InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite,
class Type *OpType, MaybeAlign Alignment,
Value *MaybeMask = nullptr)
- : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment),
- MaybeMask(MaybeMask) {
+ : IsWrite(IsWrite), Alignment(Alignment), MaybeMask(MaybeMask) {
const DataLayout &DL = I->getModule()->getDataLayout();
TypeSize = DL.getTypeStoreSizeInBits(OpType);
PtrUse = &I->getOperandUse(OperandNo);
@@ -47,47 +45,56 @@ public:
Value *getPtr() { return PtrUse->get(); }
};
-// For an alloca valid between lifetime markers Start and End, call the
+// For an alloca valid between lifetime markers Start and Ends, call the
// Callback for all possible exits out of the lifetime in the containing
// function, which can return from the instructions in RetVec.
//
-// Returns whether End was the only possible exit. If it wasn't, the caller
-// should remove End to ensure that work done at the other exits does not
-// happen outside of the lifetime.
+// Returns whether Ends covered all possible exits. If they did not,
+// the caller should remove Ends to ensure that work done at the other
+// exits does not happen outside of the lifetime.
template <typename F>
bool forAllReachableExits(const DominatorTree &DT, const PostDominatorTree &PDT,
- const Instruction *Start, Instruction *End,
+ const Instruction *Start,
+ const SmallVectorImpl<IntrinsicInst *> &Ends,
const SmallVectorImpl<Instruction *> &RetVec,
F Callback) {
- // We need to ensure that if we tag some object, we certainly untag it
- // before the function exits.
- if (PDT.dominates(End, Start)) {
- Callback(End);
- } else {
- SmallVector<Instruction *, 8> ReachableRetVec;
- unsigned NumCoveredExits = 0;
- for (auto &RI : RetVec) {
- if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
- continue;
- ReachableRetVec.push_back(RI);
- if (DT.dominates(End, RI))
- ++NumCoveredExits;
- }
- // If there's a mix of covered and non-covered exits, just put the untag
- // on exits, so we avoid the redundancy of untagging twice.
- if (NumCoveredExits == ReachableRetVec.size()) {
+ if (Ends.size() == 1 && PDT.dominates(Ends[0], Start)) {
+ Callback(Ends[0]);
+ return true;
+ }
+ SmallVector<Instruction *, 8> ReachableRetVec;
+ unsigned NumCoveredExits = 0;
+ for (auto &RI : RetVec) {
+ if (!isPotentiallyReachable(Start, RI, nullptr, &DT))
+ continue;
+ ReachableRetVec.push_back(RI);
+ // TODO(fmayer): We don't support diamond shapes, where multiple lifetime
+ // ends together dominate the RI, but none of them does by itself.
+ // Check how often this happens and decide whether to support this here.
+ if (std::any_of(Ends.begin(), Ends.end(),
+ [&](Instruction *End) { return DT.dominates(End, RI); }))
+ ++NumCoveredExits;
+ }
+ // If there's a mix of covered and non-covered exits, just put the untag
+ // on exits, so we avoid the redundancy of untagging twice.
+ if (NumCoveredExits == ReachableRetVec.size()) {
+ for (auto *End : Ends)
Callback(End);
- } else {
- for (auto &RI : ReachableRetVec)
- Callback(RI);
- // We may have inserted untag outside of the lifetime interval.
- // Signal the caller to remove the lifetime end call for this alloca.
- return false;
- }
+ } else {
+ for (auto &RI : ReachableRetVec)
+ Callback(RI);
+ // We may have inserted untag outside of the lifetime interval.
+ // Signal the caller to remove the lifetime end call for this alloca.
+ return false;
}
return true;
}
+// Get AddressSanitizer parameters.
+void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset);
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
index 029b3fc4b788..f019d1c00a35 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h
@@ -1,9 +1,8 @@
//===--------- Definition of the AddressSanitizer options -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file defines data types used to set Address Sanitizer options.
diff --git a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
index 2e4f3338030a..3118a3762935 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h
@@ -1,9 +1,8 @@
//===--------- Definition of the HWAddressSanitizer class -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,21 +18,32 @@
namespace llvm {
+struct HWAddressSanitizerOptions {
+ HWAddressSanitizerOptions()
+ : HWAddressSanitizerOptions(false, false, false){};
+ HWAddressSanitizerOptions(bool CompileKernel, bool Recover,
+ bool DisableOptimization)
+ : CompileKernel(CompileKernel), Recover(Recover),
+ DisableOptimization(DisableOptimization){};
+ bool CompileKernel;
+ bool Recover;
+ bool DisableOptimization;
+};
+
/// This is a public interface to the hardware address sanitizer pass for
/// instrumenting code to check for various memory errors at runtime, similar to
/// AddressSanitizer but based on partial hardware assistance.
class HWAddressSanitizerPass : public PassInfoMixin<HWAddressSanitizerPass> {
public:
- explicit HWAddressSanitizerPass(bool CompileKernel = false,
- bool Recover = false,
- bool DisableOptimization = false);
+ explicit HWAddressSanitizerPass(HWAddressSanitizerOptions Options)
+ : Options(Options){};
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
static bool isRequired() { return true; }
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
private:
- bool CompileKernel;
- bool Recover;
- bool DisableOptimization;
+ HWAddressSanitizerOptions Options;
};
FunctionPass *
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
index f0f375e0acf6..e3d75f675c93 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h
@@ -1,9 +1,8 @@
//===- InstrOrderFile.h ---- Late IR instrumentation for order file ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index ac6a07d299a6..f4d1b1d90e6f 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -1,9 +1,8 @@
//===--------- Definition of the MemProfiler class --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
index f5f9ec7829bd..d47beb93397e 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemorySanitizer.h
@@ -40,6 +40,23 @@ struct MemorySanitizerPass : public PassInfoMixin<MemorySanitizerPass> {
MemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+ static bool isRequired() { return true; }
+
+private:
+ MemorySanitizerOptions Options;
+};
+
+/// A module pass for msan instrumentation.
+///
+/// Instruments functions to detect unitialized reads. This function pass
+/// inserts calls to runtime library functions. If the functions aren't declared
+/// yet, the pass inserts the declarations. Otherwise the existing globals are
+/// used.
+struct ModuleMemorySanitizerPass : public PassInfoMixin<ModuleMemorySanitizerPass> {
+ ModuleMemorySanitizerPass(MemorySanitizerOptions Options) : Options(Options) {}
+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
static bool isRequired() { return true; }
diff --git a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
index f9c507624e6d..e795043630d5 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/ThreadSanitizer.h
@@ -27,6 +27,14 @@ FunctionPass *createThreadSanitizerLegacyPassPass();
/// yet, the pass inserts the declarations. Otherwise the existing globals are
struct ThreadSanitizerPass : public PassInfoMixin<ThreadSanitizerPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ static bool isRequired() { return true; }
+};
+
+/// A module pass for tsan instrumentation.
+///
+/// Create ctor and init functions.
+struct ModuleThreadSanitizerPass
+ : public PassInfoMixin<ModuleThreadSanitizerPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
static bool isRequired() { return true; }
};
diff --git a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
index 1e7fd71dcbf4..877d8145e746 100644
--- a/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -32,6 +32,8 @@ struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> {
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
bool UseMemorySSA;
};
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 5c29b289d158..cbe5057b9cde 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -115,17 +115,20 @@ struct GVNOptions {
///
/// FIXME: We should have a good summary of the GVN algorithm implemented by
/// this particular pass here.
-class GVN : public PassInfoMixin<GVN> {
+class GVNPass : public PassInfoMixin<GVNPass> {
GVNOptions Options;
public:
struct Expression;
- GVN(GVNOptions Options = {}) : Options(Options) {}
+ GVNPass(GVNOptions Options = {}) : Options(Options) {}
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
/// This removes the specified instruction from
/// our various maps and marks it for deletion.
void markInstructionForDeletion(Instruction *I) {
@@ -179,11 +182,11 @@ public:
Expression createExtractvalueExpr(ExtractValueInst *EI);
uint32_t lookupOrAddCall(CallInst *C);
uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn);
+ uint32_t Num, GVNPass &Gvn);
bool areCallValsEqual(uint32_t Num, uint32_t NewNum, const BasicBlock *Pred,
- const BasicBlock *PhiBlock, GVN &Gvn);
+ const BasicBlock *PhiBlock, GVNPass &Gvn);
std::pair<uint32_t, bool> assignExpNewValueNum(Expression &exp);
- bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn);
+ bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVNPass &Gvn);
public:
ValueTable();
@@ -197,7 +200,7 @@ public:
uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred,
Value *LHS, Value *RHS);
uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn);
+ uint32_t Num, GVNPass &Gvn);
void eraseTranslateCacheEntry(uint32_t Num, const BasicBlock &CurrBlock);
bool exists(Value *V) const;
void add(Value *V, uint32_t num);
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 816ea1071e52..0ac7d7c62b7a 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -44,6 +44,7 @@ class PHINode;
class SelectInst;
class SwitchInst;
class TargetLibraryInfo;
+class TargetTransformInfo;
class Value;
/// A private "module" namespace for types and utilities used by
@@ -78,6 +79,7 @@ enum ConstantPreference { WantInteger, WantBlockAddress };
/// revectored to the false side of the second if.
class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
TargetLibraryInfo *TLI;
+ TargetTransformInfo *TTI;
LazyValueInfo *LVI;
AAResults *AA;
DomTreeUpdater *DTU;
@@ -99,9 +101,9 @@ public:
JumpThreadingPass(bool InsertFreezeWhenUnfoldingSelect = false, int T = -1);
// Glue for old PM.
- bool runImpl(Function &F, TargetLibraryInfo *TLI, LazyValueInfo *LVI,
- AAResults *AA, DomTreeUpdater *DTU, bool HasProfileData,
- std::unique_ptr<BlockFrequencyInfo> BFI,
+ bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU,
+ bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI,
std::unique_ptr<BranchProbabilityInfo> BPI);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 020cfb9a6c85..419729271a23 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -94,6 +94,8 @@ public:
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
/// Add either a loop pass or a loop-nest pass to the pass manager. Append \p
/// Pass to the list of loop passes if it has a dedicated \fn run() method for
/// loops and to the list of loop-nest passes if the \fn run() method is for
@@ -101,51 +103,65 @@ public:
/// to the end of \var IsLoopNestPass so we can easily identify the types of
/// passes in the pass manager later.
template <typename PassT>
- std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(PassT &&Pass) {
using LoopPassModelT =
detail::PassModel<Loop, PassT, PreservedAnalyses, LoopAnalysisManager,
LoopStandardAnalysisResults &, LPMUpdater &>;
IsLoopNestPass.push_back(false);
- LoopPasses.emplace_back(new LoopPassModelT(std::forward<PassT>(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>(
+ new LoopPassModelT(std::forward<PassT>(Pass))));
}
template <typename PassT>
- std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
- addPass(PassT &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(PassT &&Pass) {
using LoopNestPassModelT =
detail::PassModel<LoopNest, PassT, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
IsLoopNestPass.push_back(true);
- LoopNestPasses.emplace_back(
- new LoopNestPassModelT(std::forward<PassT>(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>(
+ new LoopNestPassModelT(std::forward<PassT>(Pass))));
}
// Specializations of `addPass` for `RepeatedPass`. These are necessary since
// `RepeatedPass` has a templated `run` method that will result in incorrect
// detection of `HasRunOnLoopT`.
template <typename PassT>
- std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
- addPass(RepeatedPass<PassT> &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(RepeatedPass<PassT> &&Pass) {
using RepeatedLoopPassModelT =
detail::PassModel<Loop, RepeatedPass<PassT>, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
IsLoopNestPass.push_back(false);
- LoopPasses.emplace_back(new RepeatedLoopPassModelT(std::move(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopPasses.push_back(std::unique_ptr<LoopPassConceptT>(
+ new RepeatedLoopPassModelT(std::move(Pass))));
}
template <typename PassT>
- std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
- addPass(RepeatedPass<PassT> &&Pass) {
+ LLVM_ATTRIBUTE_MINSIZE
+ std::enable_if_t<!is_detected<HasRunOnLoopT, PassT>::value>
+ addPass(RepeatedPass<PassT> &&Pass) {
using RepeatedLoopNestPassModelT =
detail::PassModel<LoopNest, RepeatedPass<PassT>, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
IsLoopNestPass.push_back(true);
- LoopNestPasses.emplace_back(
- new RepeatedLoopNestPassModelT(std::move(Pass)));
+ // Do not use make_unique or emplace_back, they cause too many template
+ // instantiations, causing terrible compile times.
+ LoopNestPasses.push_back(std::unique_ptr<LoopNestPassConceptT>(
+ new RepeatedLoopNestPassModelT(std::move(Pass))));
}
bool isEmpty() const { return LoopPasses.empty() && LoopNestPasses.empty(); }
@@ -215,6 +231,12 @@ struct RequireAnalysisPass<AnalysisT, Loop, LoopAnalysisManager,
(void)AM.template getResult<AnalysisT>(L, AR);
return PreservedAnalyses::all();
}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ auto ClassName = AnalysisT::name();
+ auto PassName = MapClassName2PassName(ClassName);
+ OS << "require<" << PassName << ">";
+ }
};
/// An alias template to easily name a require analysis loop pass.
@@ -259,8 +281,6 @@ public:
/// state, this routine will mark that the current loop should be skipped by
/// the rest of the pass management infrastructure.
void markLoopAsDeleted(Loop &L, llvm::StringRef Name) {
- assert((!LoopNestMode || CurrentL == &L) &&
- "L should be a top-level loop in loop-nest mode.");
LAM.clear(L, Name);
assert((&L == CurrentL || CurrentL->contains(&L)) &&
"Cannot delete a loop outside of the "
@@ -413,10 +433,12 @@ public:
explicit FunctionToLoopPassAdaptor(std::unique_ptr<PassConceptT> Pass,
bool UseMemorySSA = false,
bool UseBlockFrequencyInfo = false,
+ bool UseBranchProbabilityInfo = false,
bool LoopNestMode = false)
: Pass(std::move(Pass)), LoopCanonicalizationFPM(),
UseMemorySSA(UseMemorySSA),
UseBlockFrequencyInfo(UseBlockFrequencyInfo),
+ UseBranchProbabilityInfo(UseBranchProbabilityInfo),
LoopNestMode(LoopNestMode) {
LoopCanonicalizationFPM.addPass(LoopSimplifyPass());
LoopCanonicalizationFPM.addPass(LCSSAPass());
@@ -424,6 +446,8 @@ public:
/// Runs the loop passes across every loop in the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
@@ -436,6 +460,7 @@ private:
bool UseMemorySSA = false;
bool UseBlockFrequencyInfo = false;
+ bool UseBranchProbabilityInfo = false;
const bool LoopNestMode;
};
@@ -447,13 +472,17 @@ template <typename LoopPassT>
inline std::enable_if_t<is_detected<HasRunOnLoopT, LoopPassT>::value,
FunctionToLoopPassAdaptor>
createFunctionToLoopPassAdaptor(LoopPassT &&Pass, bool UseMemorySSA = false,
- bool UseBlockFrequencyInfo = false) {
+ bool UseBlockFrequencyInfo = false,
+ bool UseBranchProbabilityInfo = false) {
using PassModelT =
detail::PassModel<Loop, LoopPassT, PreservedAnalyses, LoopAnalysisManager,
LoopStandardAnalysisResults &, LPMUpdater &>;
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
return FunctionToLoopPassAdaptor(
- std::make_unique<PassModelT>(std::forward<LoopPassT>(Pass)), UseMemorySSA,
- UseBlockFrequencyInfo, false);
+ std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+ new PassModelT(std::forward<LoopPassT>(Pass))),
+ UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, false);
}
/// If \p Pass is a loop-nest pass, \p Pass will first be wrapped into a
@@ -462,24 +491,29 @@ template <typename LoopNestPassT>
inline std::enable_if_t<!is_detected<HasRunOnLoopT, LoopNestPassT>::value,
FunctionToLoopPassAdaptor>
createFunctionToLoopPassAdaptor(LoopNestPassT &&Pass, bool UseMemorySSA = false,
- bool UseBlockFrequencyInfo = false) {
+ bool UseBlockFrequencyInfo = false,
+ bool UseBranchProbabilityInfo = false) {
LoopPassManager LPM;
LPM.addPass(std::forward<LoopNestPassT>(Pass));
using PassModelT =
detail::PassModel<Loop, LoopPassManager, PreservedAnalyses,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
- return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
- UseMemorySSA, UseBlockFrequencyInfo, true);
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
+ return FunctionToLoopPassAdaptor(
+ std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+ new PassModelT(std::move(LPM))),
+ UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo, true);
}
/// If \p Pass is an instance of \c LoopPassManager, the returned adaptor will
/// be in loop-nest mode if the pass manager contains only loop-nest passes.
template <>
inline FunctionToLoopPassAdaptor
-createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
- bool UseMemorySSA,
- bool UseBlockFrequencyInfo) {
+createFunctionToLoopPassAdaptor<LoopPassManager>(
+ LoopPassManager &&LPM, bool UseMemorySSA, bool UseBlockFrequencyInfo,
+ bool UseBranchProbabilityInfo) {
// Check if LPM contains any loop pass and if it does not, returns an adaptor
// in loop-nest mode.
using PassModelT =
@@ -487,9 +521,13 @@ createFunctionToLoopPassAdaptor<LoopPassManager>(LoopPassManager &&LPM,
LoopAnalysisManager, LoopStandardAnalysisResults &,
LPMUpdater &>;
bool LoopNestMode = (LPM.getNumLoopPasses() == 0);
- return FunctionToLoopPassAdaptor(std::make_unique<PassModelT>(std::move(LPM)),
- UseMemorySSA, UseBlockFrequencyInfo,
- LoopNestMode);
+ // Do not use make_unique, it causes too many template instantiations,
+ // causing terrible compile times.
+ return FunctionToLoopPassAdaptor(
+ std::unique_ptr<FunctionToLoopPassAdaptor::PassConceptT>(
+ new PassModelT(std::move(LPM))),
+ UseMemorySSA, UseBlockFrequencyInfo, UseBranchProbabilityInfo,
+ LoopNestMode);
}
/// Pass for printing a loop's contents as textual IR.
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
index 30cc08cb42ae..6afe7ecd2a5d 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -140,6 +140,8 @@ public:
: UnrollOpts(UnrollOpts) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
index dd574e4f32c6..d44d297dd4ff 100644
--- a/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h
@@ -23,6 +23,8 @@ class LowerMatrixIntrinsicsPass
public:
LowerMatrixIntrinsicsPass(bool Minimal = false) : Minimal(Minimal) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
static bool isRequired() { return true; }
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 635b706d0bef..3a4db13d670a 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -31,7 +31,6 @@ class Instruction;
class LoadInst;
class MemCpyInst;
class MemMoveInst;
-class MemoryDependenceResults;
class MemorySSA;
class MemorySSAUpdater;
class MemSetInst;
@@ -40,7 +39,6 @@ class TargetLibraryInfo;
class Value;
class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
- MemoryDependenceResults *MD = nullptr;
TargetLibraryInfo *TLI = nullptr;
AAResults *AA = nullptr;
AssumptionCache *AC = nullptr;
@@ -54,9 +52,8 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
// Glue for the old PM.
- bool runImpl(Function &F, MemoryDependenceResults *MD, TargetLibraryInfo *TLI,
- AAResults *AA, AssumptionCache *AC, DominatorTree *DT,
- MemorySSA *MSSA);
+ bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA,
+ AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA);
private:
// Helper functions
@@ -65,7 +62,7 @@ private:
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
- Value *cpyDst, Value *cpySrc, uint64_t cpyLen,
+ Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
Align cpyAlign, CallInst *C);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
diff --git a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index c5f6d6e0e8bd..256d03675a07 100644
--- a/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -48,6 +48,8 @@ public:
MergedLoadStoreMotionPass(const MergedLoadStoreMotionOptions &PassOptions)
: Options(PassOptions) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
}
diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h
index 6ef7c6b22c0b..f1a43435d89a 100644
--- a/llvm/include/llvm/Transforms/Scalar/SROA.h
+++ b/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -62,7 +62,7 @@ class SROALegacyPass;
/// onto insert and extract operations on a vector value, and convert them to
/// this form. By doing so, it will enable promotion of vector aggregates to
/// SSA vector values.
-class SROA : public PassInfoMixin<SROA> {
+class SROAPass : public PassInfoMixin<SROAPass> {
LLVMContext *C = nullptr;
DominatorTree *DT = nullptr;
AssumptionCache *AC = nullptr;
@@ -110,7 +110,7 @@ class SROA : public PassInfoMixin<SROA> {
SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
public:
- SROA() = default;
+ SROAPass() = default;
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
index c1a9ab475ead..dfb1619c7f2a 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h
@@ -69,6 +69,9 @@ public:
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
/// Create the legacy pass object for the simple loop unswitcher.
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index 7c5393851ae6..67db5031a443 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -41,6 +41,9 @@ public:
/// Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
};
}
diff --git a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index 0b570c0d1342..f87588db4ee2 100644
--- a/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -33,7 +33,7 @@ struct ASanStackVariableDescription {
uint64_t Size; // Size of the variable in bytes.
size_t LifetimeSize; // Size in bytes to use for lifetime analysis check.
// Will be rounded up to Granularity.
- size_t Alignment; // Alignment of the variable (power of 2).
+ uint64_t Alignment; // Alignment of the variable (power of 2).
AllocaInst *AI; // The actual AllocaInst.
size_t Offset; // Offset from the beginning of the frame;
// set by ComputeASanStackFrameLayout.
@@ -42,20 +42,20 @@ struct ASanStackVariableDescription {
// Output data struct for ComputeASanStackFrameLayout.
struct ASanStackFrameLayout {
- size_t Granularity; // Shadow granularity.
- size_t FrameAlignment; // Alignment for the entire frame.
- size_t FrameSize; // Size of the frame in bytes.
+ uint64_t Granularity; // Shadow granularity.
+ uint64_t FrameAlignment; // Alignment for the entire frame.
+ uint64_t FrameSize; // Size of the frame in bytes.
};
ASanStackFrameLayout ComputeASanStackFrameLayout(
// The array of stack variables. The elements may get reordered and changed.
SmallVectorImpl<ASanStackVariableDescription> &Vars,
// AddressSanitizer's shadow granularity. Usually 8, may also be 16, 32, 64.
- size_t Granularity,
+ uint64_t Granularity,
// The minimal size of the left-most redzone (header).
// At least 4 pointer sizes, power of 2, and >= Granularity.
// The resulting FrameSize should be multiple of MinHeaderSize.
- size_t MinHeaderSize);
+ uint64_t MinHeaderSize);
// Compute frame description, see DescribeAddressIfStack in ASan runtime.
SmallString<64> ComputeASanStackFrameDescription(
diff --git a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
index f512c6c06331..0aee2fe95cad 100644
--- a/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
+++ b/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
@@ -24,6 +24,7 @@ class Function;
class AddDiscriminatorsPass : public PassInfoMixin<AddDiscriminatorsPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index b45c1820bb20..8970afb3aeaa 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -129,6 +129,13 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
/// To. Copies DebugLoc from BI to I, if I doesn't already have a DebugLoc.
void ReplaceInstWithInst(Instruction *From, Instruction *To);
+/// Check if we can prove that all paths starting from this block converge
+/// to a block that either has a @llvm.experimental.deoptimize call
+/// prior to its terminating return instruction or is terminated by unreachable.
+/// All blocks in the traversed sequence must have an unique successor, maybe
+/// except for the last one.
+bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB);
+
/// Option class for critical edge splitting.
///
/// This provides a builder interface for overriding the default options used
@@ -214,29 +221,6 @@ BasicBlock *SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
CriticalEdgeSplittingOptions(),
const Twine &BBName = "");
-inline BasicBlock *
-SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
- const CriticalEdgeSplittingOptions &Options =
- CriticalEdgeSplittingOptions()) {
- return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(),
- Options);
-}
-
-/// If the edge from *PI to BB is not critical, return false. Otherwise, split
-/// all edges between the two blocks and return true. This updates all of the
-/// same analyses as the other SplitCriticalEdge function. If P is specified, it
-/// updates the analyses described above.
-inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI,
- const CriticalEdgeSplittingOptions &Options =
- CriticalEdgeSplittingOptions()) {
- bool MadeChange = false;
- Instruction *TI = (*PI)->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (TI->getSuccessor(i) == Succ)
- MadeChange |= !!SplitCriticalEdge(TI, i, Options);
- return MadeChange;
-}
-
/// If an edge from Src to Dst is critical, split the edge and return true,
/// otherwise return false. This method requires that there be an edge between
/// the two blocks. It updates the analyses passed in the options struct
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index e7d41933a6c9..87d33b9b11b7 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -54,12 +54,6 @@ namespace llvm {
/// 'i8*' type.
Value *emitStrDup(Value *Ptr, IRBuilderBase &B, const TargetLibraryInfo *TLI);
- /// Emit a call to the strnlen function to the builder, for the specified
- /// pointer. Ptr is required to be some pointer type, MaxLen must be of size_t
- /// type, and the return value has 'intptr_t' type.
- Value *emitStrNLen(Value *Ptr, Value *MaxLen, IRBuilderBase &B,
- const DataLayout &DL, const TargetLibraryInfo *TLI);
-
/// Emit a call to the strchr function to the builder, for the specified
/// pointer and character. Ptr is required to be some pointer type, and the
/// return value has 'i8*' type.
@@ -205,8 +199,8 @@ namespace llvm {
const TargetLibraryInfo *TLI);
/// Emit a call to the calloc function.
- Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
- IRBuilderBase &B, const TargetLibraryInfo &TLI);
+ Value *emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo &TLI);
}
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index f4fb265c25e0..5a1f322b2054 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -296,10 +296,10 @@ BasicBlock *DuplicateInstructionsInSplitBetween(BasicBlock *BB,
DomTreeUpdater &DTU);
/// Updates profile information by adjusting the entry count by adding
-/// entryDelta then scaling callsite information by the new count divided by the
+/// EntryDelta then scaling callsite information by the new count divided by the
/// old count. VMap is used during inlinng to also update the new clone
void updateProfileCallee(
- Function *Callee, int64_t entryDelta,
+ Function *Callee, int64_t EntryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 1d9f2d135488..f08173e45a5b 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -100,6 +100,10 @@ public:
unsigned NumExitBlocks = std::numeric_limits<unsigned>::max();
Type *RetTy;
+ // Mapping from the original exit blocks, to the new blocks inside
+ // the function.
+ SmallVector<BasicBlock *, 4> OldTargets;
+
// Suffix to use when creating extracted function (appended to the original
// function name + "."). If empty, the default is to use the entry block
// label, if non-empty, otherwise "extracted".
@@ -139,6 +143,20 @@ public:
/// returns false.
Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC);
+ /// Perform the extraction, returning the new function and providing an
+ /// interface to see what was categorized as inputs and outputs.
+ ///
+ /// \param CEAC - Cache to speed up operations for the CodeExtractor when
+ /// hoisting, and extracting lifetime values and assumes.
+ /// \param Inputs [out] - filled with values marked as inputs to the
+ /// newly outlined function.
+ /// \param Outputs [out] - filled with values marked as outputs to the
+ /// newly outlined function.
+ /// \returns zero when called on a CodeExtractor instance where isEligible
+ /// returns false.
+ Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &Inputs, ValueSet &Outputs);
+
/// Verify that assumption cache isn't stale after a region is extracted.
/// Returns true when verifier finds errors. AssumptionCache is passed as
/// parameter to make this function stateless.
diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 630f936471f2..0f32a97f9fcc 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -40,7 +40,8 @@ bool isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
DominatorTree &DT,
const PostDominatorTree *PDT = nullptr,
- DependenceInfo *DI = nullptr);
+ DependenceInfo *DI = nullptr,
+ bool CheckForEntireBlock = false);
/// Return true if all instructions (except the terminator) in \p BB can be
/// safely moved before \p InsertPoint.
@@ -62,6 +63,19 @@ void moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
DominatorTree &DT, const PostDominatorTree &PDT,
DependenceInfo &DI);
+/// In case that two BBs \p ThisBlock and \p OtherBlock are control flow
+/// equivalent but they do not strictly dominate and post-dominate each
+/// other, we determine if \p ThisBlock is reached after \p OtherBlock
+/// in the control flow.
+bool nonStrictlyPostDominate(const BasicBlock *ThisBlock,
+ const BasicBlock *OtherBlock,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT);
+
+// Check if I0 is reached before I1 in the control flow.
+bool isReachedBefore(const Instruction *I0, const Instruction *I1,
+ const DominatorTree *DT, const PostDominatorTree *PDT);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CODEMOVERUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
index 31c023019272..f2b038494a5d 100644
--- a/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
+++ b/llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h
@@ -27,6 +27,9 @@ struct EntryExitInstrumenterPass
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
bool PostInlining;
static bool isRequired() { return true; }
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 024d84a7abc8..749b7b2bb5d8 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -130,9 +130,6 @@ bool renameModuleForThinLTO(
bool ClearDSOLocalOnDeclarations,
SetVector<GlobalValue *> *GlobalsToImport = nullptr);
-/// Compute synthetic function entry counts.
-void computeSyntheticCounts(ModuleSummaryIndex &Index);
-
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
index 519593c96766..78d7845c4353 100644
--- a/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
+++ b/llvm/include/llvm/Transforms/Utils/GlobalStatus.h
@@ -9,6 +9,7 @@
#ifndef LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
#define LLVM_TRANSFORMS_UTILS_GLOBALSTATUS_H
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/AtomicOrdering.h"
namespace llvm {
@@ -45,7 +46,7 @@ struct GlobalStatus {
/// This global is stored to, but only its initializer and one other value
/// is ever stored to it. If this global isStoredOnce, we track the value
- /// stored to it in StoredOnceValue below. This is only tracked for scalar
+ /// stored to it via StoredOnceStore below. This is only tracked for scalar
/// globals.
StoredOnce,
@@ -55,8 +56,16 @@ struct GlobalStatus {
} StoredType = NotStored;
/// If only one value (besides the initializer constant) is ever stored to
- /// this global, keep track of what value it is.
- Value *StoredOnceValue = nullptr;
+ /// this global, keep track of what value it is via the store instruction.
+ const StoreInst *StoredOnceStore = nullptr;
+
+ /// If only one value (besides the initializer constant) is ever stored to
+ /// this global return the stored value.
+ Value *getStoredOnceValue() const {
+ return (StoredType == StoredOnce && StoredOnceStore)
+ ? StoredOnceStore->getOperand(0)
+ : nullptr;
+ }
/// These start out null/false. When the first accessing function is noticed,
/// it is recorded. When a second different accessing function is noticed,
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
index 25aabe199d0f..a318c2cd28bb 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
@@ -1,4 +1,4 @@
-//===- InstCombineWorklist.h - Worklist for InstCombine pass ----*- C++ -*-===//
+//=== InstructionWorklist.h - Worklist for InstCombine & others -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
-#define LLVM_TRANSFORMS_INSTCOMBINE_INSTCOMBINEWORKLIST_H
+#ifndef LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H
+#define LLVM_TRANSFORMS_UTILS_INSTRUCTIONWORKLIST_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -18,13 +18,11 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#define DEBUG_TYPE "instcombine"
-
namespace llvm {
-/// InstCombineWorklist - This is the worklist management logic for
-/// InstCombine.
-class InstCombineWorklist {
+/// InstructionWorklist - This is the worklist management logic for
+/// InstCombine and other simplification passes.
+class InstructionWorklist {
SmallVector<Instruction *, 256> Worklist;
DenseMap<Instruction *, unsigned> WorklistMap;
/// These instructions will be added in reverse order after the current
@@ -33,10 +31,10 @@ class InstCombineWorklist {
SmallSetVector<Instruction *, 16> Deferred;
public:
- InstCombineWorklist() = default;
+ InstructionWorklist() = default;
- InstCombineWorklist(InstCombineWorklist &&) = default;
- InstCombineWorklist &operator=(InstCombineWorklist &&) = default;
+ InstructionWorklist(InstructionWorklist &&) = default;
+ InstructionWorklist &operator=(InstructionWorklist &&) = default;
bool isEmpty() const { return Worklist.empty() && Deferred.empty(); }
@@ -45,7 +43,7 @@ public:
/// You likely want to use this method.
void add(Instruction *I) {
if (Deferred.insert(I))
- LLVM_DEBUG(dbgs() << "IC: ADD DEFERRED: " << *I << '\n');
+ LLVM_DEBUG(dbgs() << "ADD DEFERRED: " << *I << '\n');
}
/// Add value to the worklist if it is an instruction.
@@ -62,7 +60,7 @@ public:
assert(I->getParent() && "Instruction not inserted yet?");
if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
- LLVM_DEBUG(dbgs() << "IC: ADD: " << *I << '\n');
+ LLVM_DEBUG(dbgs() << "ADD: " << *I << '\n');
Worklist.push_back(I);
}
}
@@ -85,7 +83,7 @@ public:
/// Remove I from the worklist if it exists.
void remove(Instruction *I) {
- DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+ DenseMap<Instruction *, unsigned>::iterator It = WorklistMap.find(I);
if (It != WorklistMap.end()) {
// Don't bother moving everything down, just null out the slot.
Worklist[It->second] = nullptr;
@@ -110,7 +108,6 @@ public:
push(cast<Instruction>(U));
}
-
/// Check that the worklist is empty and nuke the backing store for the map.
void zap() {
assert(WorklistMap.empty() && "Worklist empty, but map not?");
@@ -123,6 +120,4 @@ public:
} // end namespace llvm.
-#undef DEBUG_TYPE
-
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 0102aa9ef3cc..72cb606eb51a 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -55,6 +55,7 @@ class MDNode;
class MemorySSAUpdater;
class PHINode;
class StoreInst;
+class SwitchInst;
class TargetLibraryInfo;
class TargetTransformInfo;
@@ -78,7 +79,8 @@ bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions = false,
//
/// Return true if the result produced by the instruction is not used, and the
-/// instruction has no side effects.
+/// instruction will return. Certain side-effecting instructions are also
+/// considered dead if there are no uses of the instruction.
bool isInstructionTriviallyDead(Instruction *I,
const TargetLibraryInfo *TLI = nullptr);
@@ -236,6 +238,10 @@ CallInst *createCallMatchingInvoke(InvokeInst *II);
/// This function converts the specified invoek into a normall call.
void changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr);
+/// This function removes the default destination from the specified switch.
+void createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU = nullptr);
+
///===---------------------------------------------------------------------===//
/// Dbg Intrinsic utilities
///
@@ -292,14 +298,30 @@ void salvageDebugInfo(Instruction &I);
void salvageDebugInfoForDbgValues(Instruction &I,
ArrayRef<DbgVariableIntrinsic *> Insns);
-/// Given an instruction \p I and DIExpression \p DIExpr operating on it, write
-/// the effects of \p I into the returned DIExpression, or return nullptr if
-/// it cannot be salvaged. \p StackVal: whether DW_OP_stack_value should be
-/// appended to the expression. \p LocNo: the index of the location operand to
-/// which \p I applies, should be 0 for debug info without a DIArgList.
-DIExpression *salvageDebugInfoImpl(Instruction &I, DIExpression *DIExpr,
- bool StackVal, unsigned LocNo,
- SmallVectorImpl<Value *> &AdditionalValues);
+/// Given an instruction \p I and DIExpression \p DIExpr operating on
+/// it, append the effects of \p I to the DIExpression operand list
+/// \p Ops, or return \p nullptr if it cannot be salvaged.
+/// \p CurrentLocOps is the number of SSA values referenced by the
+/// incoming \p Ops. \return the first non-constant operand
+/// implicitly referred to by Ops. If \p I references more than one
+/// non-constant operand, any additional operands are added to
+/// \p AdditionalValues.
+///
+/// \example
+////
+/// I = add %a, i32 1
+///
+/// Return = %a
+/// Ops = llvm::dwarf::DW_OP_lit1 llvm::dwarf::DW_OP_add
+///
+/// I = add %a, %b
+///
+/// Return = %a
+/// Ops = llvm::dwarf::DW_OP_LLVM_arg0 llvm::dwarf::DW_OP_add
+/// AdditionalValues = %b
+Value *salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Ops,
+ SmallVectorImpl<Value *> &AdditionalValues);
/// Point debug users of \p From to \p To or salvage them. Use this function
/// only when replacing all uses of \p From with \p To, with a guarantee that
diff --git a/llvm/include/llvm/Transforms/Utils/LoopPeel.h b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
index 8f857e1e5c21..6f1b4a880457 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopPeel.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopPeel.h
@@ -32,8 +32,8 @@ gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
void computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE,
- unsigned Threshold = UINT_MAX);
+ unsigned &TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, unsigned Threshold = UINT_MAX);
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 247b911b7c8f..30c3f71e0947 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -147,11 +147,22 @@ protected:
/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
/// instructions of the loop and loop safety information as
/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p CurLoop is a loop to do sinking on. \p OutermostLoop is used only when
+/// this function is called by \p sinkRegionForLoopNest.
bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *,
- TargetTransformInfo *, Loop *, AliasSetTracker *,
- MemorySSAUpdater *, ICFLoopSafetyInfo *,
- SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
+ TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater *,
+ ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
+ OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr);
+
+/// Call sinkRegion on loops contained within the specified loop
+/// in order from innermost to outermost.
+bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *,
+ DominatorTree *, BlockFrequencyInfo *,
+ TargetLibraryInfo *, TargetTransformInfo *, Loop *,
+ MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ SinkAndHoistLICMFlags &,
+ OptimizationRemarkEmitter *);
/// Walk the specified region of the CFG (defined by all blocks
/// dominated by the specified block, and that are in the current loop) in depth
@@ -163,9 +174,8 @@ bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
/// Diagnostics is emitted via \p ORE. It returns changed status.
bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
- AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
- ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
- OptimizationRemarkEmitter *, bool);
+ MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
+ SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
@@ -199,7 +209,7 @@ bool promoteLoopAccessesToScalars(
const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
- Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
+ Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
OptimizationRemarkEmitter *);
/// Does a BFS from a given node to all of its children inside a given loop.
@@ -338,6 +348,18 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
SinkAndHoistLICMFlags *LICMFlags = nullptr,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Returns the comparison predicate used when expanding a min/max reduction.
+CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
+
+/// See RecurrenceDescriptor::isSelectCmpPattern for a description of the
+/// pattern we are trying to match. In this pattern we are only ever selecting
+/// between two values: 1) an initial PHI start value, and 2) a loop invariant
+/// value. This function uses \p LoopExitInst to determine 2), which we then use
+/// to select between \p Left and \p Right. Any lane value in \p Left that
+/// matches 2) will be merged into \p Right.
+Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
+ Value *Left, Value *Right);
+
/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
/// The Builder's fast-math-flags must be set to propagate the expected values.
Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
@@ -365,12 +387,22 @@ Value *createSimpleTargetReduction(IRBuilderBase &B,
RecurKind RdxKind,
ArrayRef<Value *> RedOps = None);
+/// Create a target reduction of the given vector \p Src for a reduction of the
+/// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation
+/// is described by \p Desc.
+Value *createSelectCmpTargetReduction(IRBuilderBase &B,
+ const TargetTransformInfo *TTI,
+ Value *Src,
+ const RecurrenceDescriptor &Desc,
+ PHINode *OrigPhi);
+
/// Create a generic target reduction using a recurrence descriptor \p Desc
/// The target is queried to determine if intrinsics or shuffle sequences are
/// required to implement the reduction.
/// Fast-math-flags are propagated using the RecurrenceDescriptor.
Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
- const RecurrenceDescriptor &Desc, Value *Src);
+ const RecurrenceDescriptor &Desc, Value *Src,
+ PHINode *OrigPhi = nullptr);
/// Create an ordered reduction intrinsic using the given recurrence
/// descriptor \p Desc.
@@ -463,12 +495,8 @@ Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
LoopInfo *LI, LPPassManager *LPM);
/// Add code that checks at runtime if the accessed arrays in \p PointerChecks
-/// overlap.
-///
-/// Returns a pair of instructions where the first element is the first
-/// instruction generated in possibly a sequence of instructions and the
-/// second value is the final comparator value or NULL if no check is needed.
-std::pair<Instruction *, Instruction *>
+/// overlap. Returns the final comparator value or NULL if no check is needed.
+Value *
addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
SCEVExpander &Expander);
diff --git a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
index 7b4a1cdbf4fd..e5f8a46eaf23 100644
--- a/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
+++ b/llvm/include/llvm/Transforms/Utils/MemoryOpRemark.h
@@ -1,9 +1,8 @@
//===- MemoryOpRemark.h - Memory operation remark analysis -*- C++ ------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index c4030735d965..c922476ac79d 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -51,11 +51,13 @@
#define LLVM_TRANSFORMS_UTILS_PREDICATEINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
namespace llvm {
@@ -176,7 +178,7 @@ public:
class PredicateInfo {
public:
PredicateInfo(Function &, DominatorTree &, AssumptionCache &);
- ~PredicateInfo() = default;
+ ~PredicateInfo();
void verifyPredicateInfo() const;
@@ -203,6 +205,8 @@ private:
// the Predicate Info, they belong to the ValueInfo structs in the ValueInfos
// vector.
DenseMap<const Value *, const PredicateBase *> PredicateMap;
+ // The set of ssa_copy declarations we created with our custom mangling.
+ SmallSet<AssertingVH<Function>, 20> CreatedDeclarations;
};
// This pass does eager building and then printing of PredicateInfo. It is used
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 3a78e22b7e94..5de575aed059 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -70,10 +70,6 @@ public:
/// rewritten value when RewriteAllUses is called.
void AddUse(unsigned Var, Use *U);
- /// Return true if the SSAUpdater already has a value for the specified
- /// variable in the specified block.
- bool HasValueForBlock(unsigned Var, BasicBlock *BB);
-
/// Perform all the necessary updates, including new PHI-nodes insertion and
/// the requested uses update.
///
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index e0759d359dbe..6a2f0acf46f3 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -56,27 +56,28 @@ template <> struct IRTraits<BasicBlock> {
using FunctionT = Function;
using BlockFrequencyInfoT = BlockFrequencyInfo;
using LoopT = Loop;
- using LoopInfoT = LoopInfo;
+ using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
+ using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
+ using PostDominatorTreeT = PostDominatorTree;
+ using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
using OptRemarkEmitterT = OptimizationRemarkEmitter;
using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
- using DominatorTreeT = DominatorTree;
- using PostDominatorTreeT = PostDominatorTree;
+ using PredRangeT = pred_range;
+ using SuccRangeT = succ_range;
static Function &getFunction(Function &F) { return F; }
static const BasicBlock *getEntryBB(const Function *F) {
return &F->getEntryBlock();
}
+ static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
+ static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
};
} // end namespace afdo_detail
-extern cl::opt<unsigned> SampleProfileMaxPropagateIterations;
-extern cl::opt<unsigned> SampleProfileRecordCoverage;
-extern cl::opt<unsigned> SampleProfileSampleCoverage;
-extern cl::opt<bool> NoWarnSampleUnused;
-
template <typename BT> class SampleProfileLoaderBaseImpl {
public:
- SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {}
+ SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
+ : Filename(Name), RemappingFilename(RemapName) {}
void dump() { Reader->dump(); }
using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
@@ -85,14 +86,19 @@ public:
typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
- using LoopInfoT = typename afdo_detail::IRTraits<BT>::LoopInfoT;
+ using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
+ using DominatorTreePtrT =
+ typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
+ using PostDominatorTreePtrT =
+ typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
+ using PostDominatorTreeT =
+ typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
using OptRemarkEmitterT =
typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
using OptRemarkAnalysisT =
typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
- using DominatorTreeT = typename afdo_detail::IRTraits<BT>::DominatorTreeT;
- using PostDominatorTreeT =
- typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
+ using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
+ using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
using EquivalenceClassMap =
@@ -112,6 +118,12 @@ protected:
const BasicBlockT *getEntryBB(const FunctionT *F) {
return afdo_detail::IRTraits<BT>::getEntryBB(F);
}
+ PredRangeT getPredecessors(BasicBlockT *BB) {
+ return afdo_detail::IRTraits<BT>::getPredecessors(BB);
+ }
+ SuccRangeT getSuccessors(BasicBlockT *BB) {
+ return afdo_detail::IRTraits<BT>::getSuccessors(BB);
+ }
unsigned getFunctionLoc(FunctionT &Func);
virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
@@ -129,12 +141,11 @@ protected:
void findEquivalencesFor(BasicBlockT *BB1,
ArrayRef<BasicBlockT *> Descendants,
PostDominatorTreeT *DomTree);
-
void propagateWeights(FunctionT &F);
uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
void buildEdges(FunctionT &F);
bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
- void clearFunctionData();
+ void clearFunctionData(bool ResetDT = true);
void computeDominanceAndLoopInfo(FunctionT &F);
bool
computeAndPropagateWeights(FunctionT &F,
@@ -168,9 +179,9 @@ protected:
EquivalenceClassMap EquivalenceClass;
/// Dominance, post-dominance and loop information.
- std::unique_ptr<DominatorTreeT> DT;
- std::unique_ptr<PostDominatorTreeT> PDT;
- std::unique_ptr<LoopInfoT> LI;
+ DominatorTreePtrT DT;
+ PostDominatorTreePtrT PDT;
+ LoopInfoPtrT LI;
/// Predecessors for each basic block in the CFG.
BlockEdgeMap Predecessors;
@@ -190,6 +201,9 @@ protected:
/// Name of the profile file to load.
std::string Filename;
+ /// Name of the profile remapping file to load.
+ std::string RemappingFilename;
+
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
@@ -199,15 +213,17 @@ protected:
/// Clear all the per-function data used to load samples and propagate weights.
template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::clearFunctionData() {
+void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
BlockWeights.clear();
EdgeWeights.clear();
VisitedBlocks.clear();
VisitedEdges.clear();
EquivalenceClass.clear();
- DT = nullptr;
- PDT = nullptr;
- LI = nullptr;
+ if (ResetDT) {
+ DT = nullptr;
+ PDT = nullptr;
+ LI = nullptr;
+ }
Predecessors.clear();
Successors.clear();
CoverageTracker.clear();
@@ -475,7 +491,7 @@ void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
// class by making BB2's equivalence class be BB1.
DominatedBBs.clear();
DT->getDescendants(BB1, DominatedBBs);
- findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+ findEquivalencesFor(BB1, DominatedBBs, &*PDT);
LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
}
@@ -692,7 +708,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
SmallPtrSet<BasicBlockT *, 16> Visited;
if (!Predecessors[B1].empty())
llvm_unreachable("Found a stale predecessors list in a basic block.");
- for (BasicBlockT *B2 : predecessors(B1))
+ for (auto *B2 : getPredecessors(B1))
if (Visited.insert(B2).second)
Predecessors[B1].push_back(B2);
@@ -700,7 +716,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
Visited.clear();
if (!Successors[B1].empty())
llvm_unreachable("Found a stale successors list in a basic block.");
- for (BasicBlockT *B2 : successors(B1))
+ for (auto *B2 : getSuccessors(B1))
if (Visited.insert(B2).second)
Successors[B1].push_back(B2);
}
@@ -911,12 +927,12 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
template <typename BT>
void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
FunctionT &F) {
- DT.reset(new DominatorTreeT);
+ DT.reset(new DominatorTree);
DT->recalculate(F);
PDT.reset(new PostDominatorTree(F));
- LI.reset(new LoopInfoT);
+ LI.reset(new LoopInfo);
LI->analyze(*DT);
}
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 59bf3a342caa..efc3cc775e11 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -32,8 +32,10 @@ extern cl::opt<unsigned> SCEVCheapExpansionBudget;
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are safe to speculate anywhere their operands are
-/// defined.
-bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
+/// defined, and the expander is capable of expanding the expression.
+/// CanonicalMode indicates whether the expander will be used in canonical mode.
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE,
+ bool CanonicalMode = true);
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are defined and safe to speculate at the specified
@@ -489,9 +491,6 @@ private:
Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L, Type *ExpandTy,
Type *IntTy, bool useSubtract);
- void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
- Instruction *Pos, PHINode *LoopPhi);
-
void fixupInsertPoints(Instruction *I);
/// If required, create LCSSA PHIs for \p Users' operand \p OpIdx. If new
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 8703434e1696..a88e72fc9ba8 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -132,8 +132,6 @@ private:
eraseFromParent(I);
}
- Value *foldMallocMemset(CallInst *Memset, IRBuilderBase &B);
-
public:
LibCallSimplifier(
const DataLayout &DL, const TargetLibraryInfo *TLI,
diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index d95ead2def3d..320c36b36924 100644
--- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -117,7 +117,8 @@ MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ llvm::OptimizationRemarkEmitter &ORE, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount);
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 4245f51cc1e2..95fd0b14dd51 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -22,7 +22,6 @@ namespace llvm {
class Constant;
class Function;
-class GlobalIndirectSymbol;
class GlobalVariable;
class Instruction;
class MDNode;
@@ -122,7 +121,8 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
/// instance:
/// - \a scheduleMapGlobalInitializer()
/// - \a scheduleMapAppendingVariable()
-/// - \a scheduleMapGlobalIndirectSymbol()
+/// - \a scheduleMapGlobalAlias()
+/// - \a scheduleMapGlobalIFunc()
/// - \a scheduleRemapFunction()
///
/// Sometimes a callback needs a different mapping context. Such a context can
@@ -182,9 +182,10 @@ public:
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers,
unsigned MappingContextID = 0);
- void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
- Constant &Target,
- unsigned MappingContextID = 0);
+ void scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MappingContextID = 0);
+ void scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver,
+ unsigned MappingContextID = 0);
void scheduleRemapFunction(Function &F, unsigned MappingContextID = 0);
};
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index e7dcdda8af89..ed9e0beb0339 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -340,7 +340,7 @@ public:
/// -1 - Address is consecutive, and decreasing.
/// NOTE: This method must only be used before modifying the original scalar
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
- int isConsecutivePtr(Value *Ptr) const;
+ int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index ad6a4b561a9b..d105496ad47f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -153,6 +153,8 @@ public:
ProfileSummaryInfo *PSI;
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
// Shim for old PM.
LoopVectorizeResult
diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index f416a592d683..cd605aacb52d 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -94,9 +94,11 @@ private:
bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
/// Try to vectorize a list of operands.
+ /// \param LimitForRegisterSize Vectorize only using maximal allowed register
+ /// size.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
- bool AllowReorder = false);
+ bool LimitForRegisterSize = false);
/// Try to vectorize a chain that may start at the operands of \p I.
bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
diff --git a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
index b7809aa24cae..a32f9fba967f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
+++ b/llvm/include/llvm/Transforms/Vectorize/VectorCombine.h
@@ -20,10 +20,16 @@
namespace llvm {
/// Optimize scalar/vector interactions in IR using target cost models.
-struct VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
+class VectorCombinePass : public PassInfoMixin<VectorCombinePass> {
+ /// If true only perform scalarization combines and do not introduce new
+ /// vector operations.
+ bool ScalarizationOnly;
+
public:
+ VectorCombinePass(bool ScalarizationOnly = false)
+ : ScalarizationOnly(ScalarizationOnly) {}
+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
};
-
}
#endif // LLVM_TRANSFORMS_VECTORIZE_VECTORCOMBINE_H
diff --git a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
index 31f4daeb7019..2da74bb9dce8 100644
--- a/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
+++ b/llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h
@@ -30,6 +30,7 @@
namespace llvm {
class MemoryBuffer;
+class MemoryBufferRef;
namespace windows_manifest {
@@ -49,7 +50,7 @@ class WindowsManifestMerger {
public:
WindowsManifestMerger();
~WindowsManifestMerger();
- Error merge(const MemoryBuffer &Manifest);
+ Error merge(MemoryBufferRef Manifest);
// Returns vector containing merged xml manifest, or uninitialized vector for
// empty manifest.
diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
index 848fb266374e..6cbbb9a4028e 100644
--- a/llvm/include/llvm/module.modulemap
+++ b/llvm/include/llvm/module.modulemap
@@ -181,21 +181,9 @@ module LLVM_ExecutionEngine {
// translation unit (or none) and aren't part of this module.
exclude header "ExecutionEngine/MCJIT.h"
exclude header "ExecutionEngine/Interpreter.h"
- exclude header "ExecutionEngine/OrcMCJITReplacement.h"
-
- // FIXME: These exclude directives were added as a workaround for
- // <rdar://problem/29247092> and should be removed once it is fixed.
- exclude header "ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
- exclude header "ExecutionEngine/Orc/OrcRemoteTargetClient.h"
- exclude header "ExecutionEngine/Orc/OrcRemoteTargetServer.h"
- exclude header "ExecutionEngine/Orc/RemoteObjectLayer.h"
// Exclude headers from LLVM_OrcSupport.
exclude header "ExecutionEngine/Orc/Shared/OrcError.h"
- exclude header "ExecutionEngine/Orc/RPC/RPCUtils.h"
- exclude header "ExecutionEngine/Orc/RPC/RPCSerialization.h"
- exclude header "ExecutionEngine/Orc/RPC/RawByteChannel.h"
-
}
module LLVM_FileCheck {
@@ -221,9 +209,6 @@ module LLVM_OrcSupport {
requires cplusplus
header "ExecutionEngine/Orc/Shared/OrcError.h"
- header "ExecutionEngine/Orc/Shared/RPCUtils.h"
- header "ExecutionEngine/Orc/Shared/Serialization.h"
- header "ExecutionEngine/Orc/Shared/RawByteChannel.h"
export *
}
@@ -389,6 +374,9 @@ module LLVM_Transforms {
umbrella "Transforms"
module * { export * }
+
+ // Requires DEBUG_TYPE to be defined by including file.
+ exclude header "Transforms/Utils/InstructionWorklist.h"
}
extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap"
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index e7445e225d52..d030f74481cf 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -119,7 +119,7 @@ bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
AliasResult AAResults::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return alias(LocA, LocB, AAQIP);
}
@@ -162,7 +162,7 @@ AliasResult AAResults::alias(const MemoryLocation &LocA,
bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
bool OrLocal) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return pointsToConstantMemory(Loc, AAQIP, OrLocal);
}
@@ -190,7 +190,7 @@ ModRefInfo AAResults::getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
}
ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(I, Call2, AAQIP);
}
@@ -200,25 +200,24 @@ ModRefInfo AAResults::getModRefInfo(Instruction *I, const CallBase *Call2,
if (const auto *Call1 = dyn_cast<CallBase>(I)) {
// Check if the two calls modify the same memory.
return getModRefInfo(Call1, Call2, AAQI);
- } else if (I->isFenceLike()) {
- // If this is a fence, just return ModRef.
- return ModRefInfo::ModRef;
- } else {
- // Otherwise, check if the call modifies or references the
- // location this memory access defines. The best we can say
- // is that if the call references what this instruction
- // defines, it must be clobbered by this location.
- const MemoryLocation DefLoc = MemoryLocation::get(I);
- ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI);
- if (isModOrRefSet(MR))
- return setModAndRef(MR);
}
+ // If this is a fence, just return ModRef.
+ if (I->isFenceLike())
+ return ModRefInfo::ModRef;
+ // Otherwise, check if the call modifies or references the
+ // location this memory access defines. The best we can say
+ // is that if the call references what this instruction
+ // defines, it must be clobbered by this location.
+ const MemoryLocation DefLoc = MemoryLocation::get(I);
+ ModRefInfo MR = getModRefInfo(Call2, DefLoc, AAQI);
+ if (isModOrRefSet(MR))
+ return setModAndRef(MR);
return ModRefInfo::NoModRef;
}
ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(Call, Loc, AAQIP);
}
@@ -285,7 +284,7 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
ModRefInfo AAResults::getModRefInfo(const CallBase *Call1,
const CallBase *Call2) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(Call1, Call2, AAQIP);
}
@@ -475,7 +474,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) {
ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(L, Loc, AAQIP);
}
ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
@@ -500,7 +499,7 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(S, Loc, AAQIP);
}
ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
@@ -532,7 +531,7 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
}
ModRefInfo AAResults::getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(S, Loc, AAQIP);
}
@@ -548,7 +547,7 @@ ModRefInfo AAResults::getModRefInfo(const FenceInst *S,
ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(V, Loc, AAQIP);
}
@@ -578,7 +577,7 @@ ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(CatchPad, Loc, AAQIP);
}
@@ -598,7 +597,7 @@ ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(CatchRet, Loc, AAQIP);
}
@@ -618,7 +617,7 @@ ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(CX, Loc, AAQIP);
}
@@ -646,7 +645,7 @@ ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
const MemoryLocation &Loc) {
- AAQueryInfo AAQIP;
+ SimpleAAQueryInfo AAQIP;
return getModRefInfo(RMW, Loc, AAQIP);
}
@@ -746,7 +745,7 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!Call->doesNotCapture(ArgNo) && ArgNo < Call->getNumArgOperands() &&
+ (!Call->doesNotCapture(ArgNo) && ArgNo < Call->arg_size() &&
!Call->isByValArgument(ArgNo)))
continue;
@@ -808,11 +807,6 @@ AAResults::Concept::~Concept() = default;
// Provide a definition for the static object used to identify passes.
AnalysisKey AAManager::Key;
-namespace {
-
-
-} // end anonymous namespace
-
ExternalAAWrapperPass::ExternalAAWrapperPass() : ImmutablePass(ID) {
initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
}
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index dee044346f02..9d4fe1225b33 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -84,7 +84,7 @@ void llvm::fillMapFromAssume(AssumeInst &Assume, RetainedKnowledgeMap &Result) {
getValueFromBundleOpInfo(Assume, Bundles, ABA_Argument));
if (!CI)
continue;
- unsigned Val = CI->getZExtValue();
+ uint64_t Val = CI->getZExtValue();
auto Lookup = Result.find(Key);
if (Lookup == Result.end() || !Lookup->second.count(&Assume)) {
Result[Key][&Assume] = {Val, Val};
@@ -102,7 +102,7 @@ llvm::getKnowledgeFromBundle(AssumeInst &Assume,
Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey());
if (bundleHasArgument(BOI, ABA_WasOn))
Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn);
- auto GetArgOr1 = [&](unsigned Idx) -> unsigned {
+ auto GetArgOr1 = [&](unsigned Idx) -> uint64_t {
if (auto *ConstInt = dyn_cast<ConstantInt>(
getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx)))
return ConstInt->getZExtValue();
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index 0d95b33601f9..3e0214e21ecd 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -56,7 +57,7 @@ AssumptionCache::getOrInsertAffectedValues(Value *V) {
}
static void
-findAffectedValues(CallBase *CI,
+findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
SmallVectorImpl<AssumptionCache::ResultElem> &Affected) {
// Note: This code must be kept in-sync with the code in
// computeKnownBitsFromAssume in ValueTracking.
@@ -124,24 +125,32 @@ findAffectedValues(CallBase *CI,
match(B, m_ConstantInt()))
AddAffected(X);
}
+
+ if (TTI) {
+ const Value *Ptr;
+ unsigned AS;
+ std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(Cond);
+ if (Ptr)
+ AddAffected(const_cast<Value *>(Ptr->stripInBoundsOffsets()));
+ }
}
void AssumptionCache::updateAffectedValues(AssumeInst *CI) {
SmallVector<AssumptionCache::ResultElem, 16> Affected;
- findAffectedValues(CI, Affected);
+ findAffectedValues(CI, TTI, Affected);
for (auto &AV : Affected) {
auto &AVV = getOrInsertAffectedValues(AV.Assume);
- if (std::find_if(AVV.begin(), AVV.end(), [&](ResultElem &Elem) {
+ if (llvm::none_of(AVV, [&](ResultElem &Elem) {
return Elem.Assume == CI && Elem.Index == AV.Index;
- }) == AVV.end())
+ }))
AVV.push_back({CI, AV.Index});
}
}
void AssumptionCache::unregisterAssumption(AssumeInst *CI) {
SmallVector<AssumptionCache::ResultElem, 16> Affected;
- findAffectedValues(CI, Affected);
+ findAffectedValues(CI, TTI, Affected);
for (auto &AV : Affected) {
auto AVI = AffectedValues.find_as(AV.Assume);
@@ -248,6 +257,12 @@ void AssumptionCache::registerAssumption(AssumeInst *CI) {
updateAffectedValues(CI);
}
+AssumptionCache AssumptionAnalysis::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+ return AssumptionCache(F, &TTI);
+}
+
AnalysisKey AssumptionAnalysis::Key;
PreservedAnalyses AssumptionPrinterPass::run(Function &F,
@@ -278,10 +293,13 @@ AssumptionCache &AssumptionCacheTracker::getAssumptionCache(Function &F) {
if (I != AssumptionCaches.end())
return *I->second;
+ auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
+ auto *TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
+
// Ok, build a new cache by scanning the function, insert it and the value
// handle into our map, and return the newly populated cache.
auto IP = AssumptionCaches.insert(std::make_pair(
- FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F)));
+ FunctionCallbackVH(&F, this), std::make_unique<AssumptionCache>(F, TTI)));
assert(IP.second && "Scanning function already in the map?");
return *IP.first->second;
}
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 357772c9c4f2..88b0f37b1d48 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -68,15 +69,6 @@ using namespace llvm;
static cl::opt<bool> EnableRecPhiAnalysis("basic-aa-recphi", cl::Hidden,
cl::init(true));
-/// By default, even on 32-bit architectures we use 64-bit integers for
-/// calculations. This will allow us to more-aggressively decompose indexing
-/// expressions calculated using i64 values (e.g., long long in C) which is
-/// common enough to worry about.
-static cl::opt<bool> ForceAtLeast64Bits("basic-aa-force-at-least-64b",
- cl::Hidden, cl::init(true));
-static cl::opt<bool> DoubleCalcBits("basic-aa-double-calc-bits",
- cl::Hidden, cl::init(false));
-
/// SearchLimitReached / SearchTimes shows how often the limit of
/// to decompose GEPs is reached. It will affect the precision
/// of basic alias analysis.
@@ -91,8 +83,7 @@ STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
// The max limit of the search depth in DecomposeGEPExpression() and
-// getUnderlyingObject(), both functions need to use the same search
-// depth otherwise the algorithm in aliasGEP will assert.
+// getUnderlyingObject().
static const unsigned MaxLookupSearchDepth = 6;
bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
@@ -120,9 +111,6 @@ static bool isEscapeSource(const Value *V) {
if (isa<CallBase>(V))
return true;
- if (isa<Argument>(V))
- return true;
-
// The load case works because isNonEscapingLocalObject considers all
// stores to be escapes (it passes true for the StoreCaptures argument
// to PointerMayBeCaptured).
@@ -206,12 +194,12 @@ static uint64_t getMinimalExtentFrom(const Value &V,
bool NullIsValidLoc) {
// If we have dereferenceability information we know a lower bound for the
// extent as accesses for a lower offset would be valid. We need to exclude
- // the "or null" part if null is a valid pointer.
+ // the "or null" part if null is a valid pointer. We can ignore frees, as an
+ // access after free would be undefined behavior.
bool CanBeNull, CanBeFreed;
uint64_t DerefBytes =
V.getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
DerefBytes = (CanBeNull && NullIsValidLoc) ? 0 : DerefBytes;
- DerefBytes = CanBeFreed ? 0 : DerefBytes;
// If queried with a precise location size, we assume that location size to be
// accessed, thus valid.
if (LocSize.isPrecise())
@@ -227,82 +215,163 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
}
//===----------------------------------------------------------------------===//
+// CaptureInfo implementations
+//===----------------------------------------------------------------------===//
+
+CaptureInfo::~CaptureInfo() = default;
+
+bool SimpleCaptureInfo::isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) {
+ return isNonEscapingLocalObject(Object, &IsCapturedCache);
+}
+
+bool EarliestEscapeInfo::isNotCapturedBeforeOrAt(const Value *Object,
+ const Instruction *I) {
+ if (!isIdentifiedFunctionLocal(Object))
+ return false;
+
+ auto Iter = EarliestEscapes.insert({Object, nullptr});
+ if (Iter.second) {
+ Instruction *EarliestCapture = FindEarliestCapture(
+ Object, *const_cast<Function *>(I->getFunction()),
+ /*ReturnCaptures=*/false, /*StoreCaptures=*/true, DT);
+ if (EarliestCapture) {
+ auto Ins = Inst2Obj.insert({EarliestCapture, {}});
+ Ins.first->second.push_back(Object);
+ }
+ Iter.first->second = EarliestCapture;
+ }
+
+ // No capturing instruction.
+ if (!Iter.first->second)
+ return true;
+
+ return I != Iter.first->second &&
+ !isPotentiallyReachable(Iter.first->second, I, nullptr, &DT, &LI);
+}
+
+void EarliestEscapeInfo::removeInstruction(Instruction *I) {
+ auto Iter = Inst2Obj.find(I);
+ if (Iter != Inst2Obj.end()) {
+ for (const Value *Obj : Iter->second)
+ EarliestEscapes.erase(Obj);
+ Inst2Obj.erase(I);
+ }
+}
+
+//===----------------------------------------------------------------------===//
// GetElementPtr Instruction Decomposition and Analysis
//===----------------------------------------------------------------------===//
namespace {
-/// Represents zext(sext(V)).
-struct ExtendedValue {
+/// Represents zext(sext(trunc(V))).
+struct CastedValue {
const Value *V;
- unsigned ZExtBits;
- unsigned SExtBits;
+ unsigned ZExtBits = 0;
+ unsigned SExtBits = 0;
+ unsigned TruncBits = 0;
- explicit ExtendedValue(const Value *V, unsigned ZExtBits = 0,
- unsigned SExtBits = 0)
- : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits) {}
+ explicit CastedValue(const Value *V) : V(V) {}
+ explicit CastedValue(const Value *V, unsigned ZExtBits, unsigned SExtBits,
+ unsigned TruncBits)
+ : V(V), ZExtBits(ZExtBits), SExtBits(SExtBits), TruncBits(TruncBits) {}
unsigned getBitWidth() const {
- return V->getType()->getPrimitiveSizeInBits() + ZExtBits + SExtBits;
+ return V->getType()->getPrimitiveSizeInBits() - TruncBits + ZExtBits +
+ SExtBits;
}
- ExtendedValue withValue(const Value *NewV) const {
- return ExtendedValue(NewV, ZExtBits, SExtBits);
+ CastedValue withValue(const Value *NewV) const {
+ return CastedValue(NewV, ZExtBits, SExtBits, TruncBits);
}
- ExtendedValue withZExtOfValue(const Value *NewV) const {
+ /// Replace V with zext(NewV)
+ CastedValue withZExtOfValue(const Value *NewV) const {
unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
NewV->getType()->getPrimitiveSizeInBits();
+ if (ExtendBy <= TruncBits)
+ return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy);
+
// zext(sext(zext(NewV))) == zext(zext(zext(NewV)))
- return ExtendedValue(NewV, ZExtBits + SExtBits + ExtendBy, 0);
+ ExtendBy -= TruncBits;
+ return CastedValue(NewV, ZExtBits + SExtBits + ExtendBy, 0, 0);
}
- ExtendedValue withSExtOfValue(const Value *NewV) const {
+ /// Replace V with sext(NewV)
+ CastedValue withSExtOfValue(const Value *NewV) const {
unsigned ExtendBy = V->getType()->getPrimitiveSizeInBits() -
NewV->getType()->getPrimitiveSizeInBits();
+ if (ExtendBy <= TruncBits)
+ return CastedValue(NewV, ZExtBits, SExtBits, TruncBits - ExtendBy);
+
// zext(sext(sext(NewV)))
- return ExtendedValue(NewV, ZExtBits, SExtBits + ExtendBy);
+ ExtendBy -= TruncBits;
+ return CastedValue(NewV, ZExtBits, SExtBits + ExtendBy, 0);
}
APInt evaluateWith(APInt N) const {
assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
"Incompatible bit width");
+ if (TruncBits) N = N.trunc(N.getBitWidth() - TruncBits);
if (SExtBits) N = N.sext(N.getBitWidth() + SExtBits);
if (ZExtBits) N = N.zext(N.getBitWidth() + ZExtBits);
return N;
}
+ ConstantRange evaluateWith(ConstantRange N) const {
+ assert(N.getBitWidth() == V->getType()->getPrimitiveSizeInBits() &&
+ "Incompatible bit width");
+ if (TruncBits) N = N.truncate(N.getBitWidth() - TruncBits);
+ if (SExtBits) N = N.signExtend(N.getBitWidth() + SExtBits);
+ if (ZExtBits) N = N.zeroExtend(N.getBitWidth() + ZExtBits);
+ return N;
+ }
+
bool canDistributeOver(bool NUW, bool NSW) const {
// zext(x op<nuw> y) == zext(x) op<nuw> zext(y)
// sext(x op<nsw> y) == sext(x) op<nsw> sext(y)
+ // trunc(x op y) == trunc(x) op trunc(y)
return (!ZExtBits || NUW) && (!SExtBits || NSW);
}
+
+ bool hasSameCastsAs(const CastedValue &Other) const {
+ return ZExtBits == Other.ZExtBits && SExtBits == Other.SExtBits &&
+ TruncBits == Other.TruncBits;
+ }
};
-/// Represents zext(sext(V)) * Scale + Offset.
+/// Represents zext(sext(trunc(V))) * Scale + Offset.
struct LinearExpression {
- ExtendedValue Val;
+ CastedValue Val;
APInt Scale;
APInt Offset;
/// True if all operations in this expression are NSW.
bool IsNSW;
- LinearExpression(const ExtendedValue &Val, const APInt &Scale,
+ LinearExpression(const CastedValue &Val, const APInt &Scale,
const APInt &Offset, bool IsNSW)
: Val(Val), Scale(Scale), Offset(Offset), IsNSW(IsNSW) {}
- LinearExpression(const ExtendedValue &Val) : Val(Val), IsNSW(true) {
+ LinearExpression(const CastedValue &Val) : Val(Val), IsNSW(true) {
unsigned BitWidth = Val.getBitWidth();
Scale = APInt(BitWidth, 1);
Offset = APInt(BitWidth, 0);
}
+
+ LinearExpression mul(const APInt &Other, bool MulIsNSW) const {
+ // The check for zero offset is necessary, because generally
+ // (X +nsw Y) *nsw Z does not imply (X *nsw Z) +nsw (Y *nsw Z).
+ bool NSW = IsNSW && (Other.isOne() || (MulIsNSW && Offset.isZero()));
+ return LinearExpression(Val, Scale * Other, Offset * Other, NSW);
+ }
};
}
/// Analyzes the specified value as a linear expression: "A*V + B", where A and
/// B are constant integers.
static LinearExpression GetLinearExpression(
- const ExtendedValue &Val, const DataLayout &DL, unsigned Depth,
+ const CastedValue &Val, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, DominatorTree *DT) {
// Limit our recursion depth.
if (Depth == 6)
@@ -325,6 +394,11 @@ static LinearExpression GetLinearExpression(
if (!Val.canDistributeOver(NUW, NSW))
return Val;
+ // While we can distribute over trunc, we cannot preserve nowrap flags
+ // in that case.
+ if (Val.TruncBits)
+ NUW = NSW = false;
+
LinearExpression E(Val);
switch (BOp->getOpcode()) {
default:
@@ -353,14 +427,11 @@ static LinearExpression GetLinearExpression(
E.IsNSW &= NSW;
break;
}
- case Instruction::Mul: {
+ case Instruction::Mul:
E = GetLinearExpression(Val.withValue(BOp->getOperand(0)), DL,
- Depth + 1, AC, DT);
- E.Offset *= RHS;
- E.Scale *= RHS;
- E.IsNSW &= NSW;
+ Depth + 1, AC, DT)
+ .mul(RHS, NSW);
break;
- }
case Instruction::Shl:
// We're trying to linearize an expression of the kind:
// shl i8 -128, 36
@@ -394,25 +465,75 @@ static LinearExpression GetLinearExpression(
return Val;
}
-/// To ensure a pointer offset fits in an integer of size PointerSize
-/// (in bits) when that size is smaller than the maximum pointer size. This is
+/// To ensure a pointer offset fits in an integer of size IndexSize
+/// (in bits) when that size is smaller than the maximum index size. This is
/// an issue, for example, in particular for 32b pointers with negative indices
/// that rely on two's complement wrap-arounds for precise alias information
-/// where the maximum pointer size is 64b.
-static APInt adjustToPointerSize(const APInt &Offset, unsigned PointerSize) {
- assert(PointerSize <= Offset.getBitWidth() && "Invalid PointerSize!");
- unsigned ShiftBits = Offset.getBitWidth() - PointerSize;
+/// where the maximum index size is 64b.
+static APInt adjustToIndexSize(const APInt &Offset, unsigned IndexSize) {
+ assert(IndexSize <= Offset.getBitWidth() && "Invalid IndexSize!");
+ unsigned ShiftBits = Offset.getBitWidth() - IndexSize;
return (Offset << ShiftBits).ashr(ShiftBits);
}
-static unsigned getMaxPointerSize(const DataLayout &DL) {
- unsigned MaxPointerSize = DL.getMaxPointerSizeInBits();
- if (MaxPointerSize < 64 && ForceAtLeast64Bits) MaxPointerSize = 64;
- if (DoubleCalcBits) MaxPointerSize *= 2;
+namespace {
+// A linear transformation of a Value; this class represents
+// ZExt(SExt(Trunc(V, TruncBits), SExtBits), ZExtBits) * Scale.
+struct VariableGEPIndex {
+ CastedValue Val;
+ APInt Scale;
+
+ // Context instruction to use when querying information about this index.
+ const Instruction *CxtI;
+
+ /// True if all operations in this expression are NSW.
+ bool IsNSW;
- return MaxPointerSize;
+ void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ void print(raw_ostream &OS) const {
+ OS << "(V=" << Val.V->getName()
+ << ", zextbits=" << Val.ZExtBits
+ << ", sextbits=" << Val.SExtBits
+ << ", truncbits=" << Val.TruncBits
+ << ", scale=" << Scale << ")";
+ }
+};
}
+// Represents the internal structure of a GEP, decomposed into a base pointer,
+// constant offsets, and variable scaled indices.
+struct BasicAAResult::DecomposedGEP {
+ // Base pointer of the GEP
+ const Value *Base;
+ // Total constant offset from base.
+ APInt Offset;
+ // Scaled variable (non-constant) indices.
+ SmallVector<VariableGEPIndex, 4> VarIndices;
+ // Are all operations inbounds GEPs or non-indexing operations?
+ // (None iff expression doesn't involve any geps)
+ Optional<bool> InBounds;
+
+ void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ void print(raw_ostream &OS) const {
+ OS << "(DecomposedGEP Base=" << Base->getName()
+ << ", Offset=" << Offset
+ << ", VarIndices=[";
+ for (size_t i = 0; i < VarIndices.size(); i++) {
+ if (i != 0)
+ OS << ", ";
+ VarIndices[i].print(OS);
+ }
+ OS << "])";
+ }
+};
+
+
/// If V is a symbolic pointer expression, decompose it into a base pointer
/// with a constant offset and a number of scaled symbolic offsets.
///
@@ -420,11 +541,6 @@ static unsigned getMaxPointerSize(const DataLayout &DL) {
/// in the VarIndices vector) are Value*'s that are known to be scaled by the
/// specified amount, but which may have other unrepresented high bits. As
/// such, the gep cannot necessarily be reconstructed from its decomposed form.
-///
-/// This function is capable of analyzing everything that getUnderlyingObject
-/// can look through. To be able to do that getUnderlyingObject and
-/// DecomposeGEPExpression must use the same search depth
-/// (MaxLookupSearchDepth).
BasicAAResult::DecomposedGEP
BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
AssumptionCache *AC, DominatorTree *DT) {
@@ -433,10 +549,9 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
SearchTimes++;
const Instruction *CxtI = dyn_cast<Instruction>(V);
- unsigned MaxPointerSize = getMaxPointerSize(DL);
+ unsigned MaxIndexSize = DL.getMaxIndexSizeInBits();
DecomposedGEP Decomposed;
- Decomposed.Offset = APInt(MaxPointerSize, 0);
- Decomposed.HasCompileTimeConstantScale = true;
+ Decomposed.Offset = APInt(MaxIndexSize, 0);
do {
// See if this is a bitcast or GEP.
const Operator *Op = dyn_cast<Operator>(V);
@@ -493,24 +608,19 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
else if (!GEPOp->isInBounds())
Decomposed.InBounds = false;
- // Don't attempt to analyze GEPs over unsized objects.
- if (!GEPOp->getSourceElementType()->isSized()) {
- Decomposed.Base = V;
- return Decomposed;
- }
+ assert(GEPOp->getSourceElementType()->isSized() && "GEP must be sized");
// Don't attempt to analyze GEPs if index scale is not a compile-time
// constant.
if (isa<ScalableVectorType>(GEPOp->getSourceElementType())) {
Decomposed.Base = V;
- Decomposed.HasCompileTimeConstantScale = false;
return Decomposed;
}
unsigned AS = GEPOp->getPointerAddressSpace();
// Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
gep_type_iterator GTI = gep_type_begin(GEPOp);
- unsigned PointerSize = DL.getPointerSizeInBits(AS);
+ unsigned IndexSize = DL.getIndexSizeInBits(AS);
// Assume all GEP operands are constants until proven otherwise.
bool GepHasConstantOffset = true;
for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
@@ -533,49 +643,34 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
continue;
Decomposed.Offset +=
DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize() *
- CIdx->getValue().sextOrTrunc(MaxPointerSize);
+ CIdx->getValue().sextOrTrunc(MaxIndexSize);
continue;
}
GepHasConstantOffset = false;
- APInt Scale(MaxPointerSize,
- DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize());
- // If the integer type is smaller than the pointer size, it is implicitly
- // sign extended to pointer size.
+ // If the integer type is smaller than the index size, it is implicitly
+ // sign extended or truncated to index size.
unsigned Width = Index->getType()->getIntegerBitWidth();
- unsigned SExtBits = PointerSize > Width ? PointerSize - Width : 0;
+ unsigned SExtBits = IndexSize > Width ? IndexSize - Width : 0;
+ unsigned TruncBits = IndexSize < Width ? Width - IndexSize : 0;
LinearExpression LE = GetLinearExpression(
- ExtendedValue(Index, 0, SExtBits), DL, 0, AC, DT);
-
- // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
- // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-
- // It can be the case that, even through C1*V+C2 does not overflow for
- // relevant values of V, (C2*Scale) can overflow. In that case, we cannot
- // decompose the expression in this way.
- //
- // FIXME: C1*Scale and the other operations in the decomposed
- // (C1*Scale)*V+C2*Scale can also overflow. We should check for this
- // possibility.
- bool Overflow;
- APInt ScaledOffset = LE.Offset.sextOrTrunc(MaxPointerSize)
- .smul_ov(Scale, Overflow);
- if (Overflow) {
- LE = LinearExpression(ExtendedValue(Index, 0, SExtBits));
- } else {
- Decomposed.Offset += ScaledOffset;
- Scale *= LE.Scale.sextOrTrunc(MaxPointerSize);
- }
+ CastedValue(Index, 0, SExtBits, TruncBits), DL, 0, AC, DT);
+
+ // Scale by the type size.
+ unsigned TypeSize =
+ DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize();
+ LE = LE.mul(APInt(IndexSize, TypeSize), GEPOp->isInBounds());
+ Decomposed.Offset += LE.Offset.sextOrSelf(MaxIndexSize);
+ APInt Scale = LE.Scale.sextOrSelf(MaxIndexSize);
// If we already had an occurrence of this index variable, merge this
// scale into it. For example, we want to handle:
// A[x][x] -> x*16 + x*4 -> x*20
// This also ensures that 'x' only appears in the index list once.
for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
- if (Decomposed.VarIndices[i].V == LE.Val.V &&
- Decomposed.VarIndices[i].ZExtBits == LE.Val.ZExtBits &&
- Decomposed.VarIndices[i].SExtBits == LE.Val.SExtBits) {
+ if (Decomposed.VarIndices[i].Val.V == LE.Val.V &&
+ Decomposed.VarIndices[i].Val.hasSameCastsAs(LE.Val)) {
Scale += Decomposed.VarIndices[i].Scale;
Decomposed.VarIndices.erase(Decomposed.VarIndices.begin() + i);
break;
@@ -583,19 +678,18 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
}
// Make sure that we have a scale that makes sense for this target's
- // pointer size.
- Scale = adjustToPointerSize(Scale, PointerSize);
+ // index size.
+ Scale = adjustToIndexSize(Scale, IndexSize);
if (!!Scale) {
- VariableGEPIndex Entry = {
- LE.Val.V, LE.Val.ZExtBits, LE.Val.SExtBits, Scale, CxtI, LE.IsNSW};
+ VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW};
Decomposed.VarIndices.push_back(Entry);
}
}
// Take care of wrap-arounds
if (GepHasConstantOffset)
- Decomposed.Offset = adjustToPointerSize(Decomposed.Offset, PointerSize);
+ Decomposed.Offset = adjustToIndexSize(Decomposed.Offset, IndexSize);
// Analyze the base pointer next.
V = GEPOp->getOperand(0);
@@ -838,7 +932,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// then the call can not mod/ref the pointer unless the call takes the pointer
// as an argument, and itself doesn't capture it.
if (!isa<Constant>(Object) && Call != Object &&
- isNonEscapingLocalObject(Object, &AAQI.IsCapturedCache)) {
+ AAQI.CI->isNotCapturedBeforeOrAt(Object, Call)) {
// Optimistically assume that call doesn't touch Object and check this
// assumption in the following loop.
@@ -852,8 +946,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
// pointer were passed to arguments that were neither of these, then it
// couldn't be no-capture.
if (!(*CI)->getType()->isPointerTy() ||
- (!Call->doesNotCapture(OperandNo) &&
- OperandNo < Call->getNumArgOperands() &&
+ (!Call->doesNotCapture(OperandNo) && OperandNo < Call->arg_size() &&
!Call->isByValArgument(OperandNo)))
continue;
@@ -1046,20 +1139,13 @@ AliasResult BasicAAResult::aliasGEP(
DecomposedGEP DecompGEP1 = DecomposeGEPExpression(GEP1, DL, &AC, DT);
DecomposedGEP DecompGEP2 = DecomposeGEPExpression(V2, DL, &AC, DT);
- // Don't attempt to analyze the decomposed GEP if index scale is not a
- // compile-time constant.
- if (!DecompGEP1.HasCompileTimeConstantScale ||
- !DecompGEP2.HasCompileTimeConstantScale)
+ // Bail if we were not able to decompose anything.
+ if (DecompGEP1.Base == GEP1 && DecompGEP2.Base == V2)
return AliasResult::MayAlias;
- assert(DecompGEP1.Base == UnderlyingV1 && DecompGEP2.Base == UnderlyingV2 &&
- "DecomposeGEPExpression returned a result different from "
- "getUnderlyingObject");
-
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
- DecompGEP1.Offset -= DecompGEP2.Offset;
- GetIndexDifference(DecompGEP1.VarIndices, DecompGEP2.VarIndices);
+ subtractDecomposedGEPs(DecompGEP1, DecompGEP2);
// If an inbounds GEP would have to start from an out of bounds address
// for the two to alias, then we can assume noalias.
@@ -1079,14 +1165,14 @@ AliasResult BasicAAResult::aliasGEP(
// For GEPs with identical offsets, we can preserve the size and AAInfo
// when performing the alias check on the underlying objects.
if (DecompGEP1.Offset == 0 && DecompGEP1.VarIndices.empty())
- return getBestAAResults().alias(
- MemoryLocation(UnderlyingV1, V1Size),
- MemoryLocation(UnderlyingV2, V2Size), AAQI);
+ return getBestAAResults().alias(MemoryLocation(DecompGEP1.Base, V1Size),
+ MemoryLocation(DecompGEP2.Base, V2Size),
+ AAQI);
// Do the base pointers alias?
AliasResult BaseAlias = getBestAAResults().alias(
- MemoryLocation::getBeforeOrAfter(UnderlyingV1),
- MemoryLocation::getBeforeOrAfter(UnderlyingV2), AAQI);
+ MemoryLocation::getBeforeOrAfter(DecompGEP1.Base),
+ MemoryLocation::getBeforeOrAfter(DecompGEP2.Base), AAQI);
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
@@ -1100,7 +1186,7 @@ AliasResult BasicAAResult::aliasGEP(
// is less than the size of the associated memory object, then we know
// that the objects are partially overlapping. If the difference is
// greater, we know they do not overlap.
- if (DecompGEP1.Offset != 0 && DecompGEP1.VarIndices.empty()) {
+ if (DecompGEP1.VarIndices.empty()) {
APInt &Off = DecompGEP1.Offset;
// Initialize for Off >= 0 (V2 <= GEP1) case.
@@ -1122,133 +1208,124 @@ AliasResult BasicAAResult::aliasGEP(
Off = -Off;
}
- if (VLeftSize.hasValue()) {
- const uint64_t LSize = VLeftSize.getValue();
- if (Off.ult(LSize)) {
- // Conservatively drop processing if a phi was visited and/or offset is
- // too big.
- AliasResult AR = AliasResult::PartialAlias;
- if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
- (Off + VRightSize.getValue()).ule(LSize)) {
- // Memory referenced by right pointer is nested. Save the offset in
- // cache. Note that originally offset estimated as GEP1-V2, but
- // AliasResult contains the shift that represents GEP1+Offset=V2.
- AR.setOffset(-Off.getSExtValue());
- AR.swap(Swapped);
- }
- return AR;
+ if (!VLeftSize.hasValue())
+ return AliasResult::MayAlias;
+
+ const uint64_t LSize = VLeftSize.getValue();
+ if (Off.ult(LSize)) {
+ // Conservatively drop processing if a phi was visited and/or offset is
+ // too big.
+ AliasResult AR = AliasResult::PartialAlias;
+ if (VRightSize.hasValue() && Off.ule(INT32_MAX) &&
+ (Off + VRightSize.getValue()).ule(LSize)) {
+ // Memory referenced by right pointer is nested. Save the offset in
+ // cache. Note that originally offset estimated as GEP1-V2, but
+ // AliasResult contains the shift that represents GEP1+Offset=V2.
+ AR.setOffset(-Off.getSExtValue());
+ AR.swap(Swapped);
}
- return AliasResult::NoAlias;
+ return AR;
}
+ return AliasResult::NoAlias;
}
- if (!DecompGEP1.VarIndices.empty()) {
- APInt GCD;
- bool AllNonNegative = DecompGEP1.Offset.isNonNegative();
- bool AllNonPositive = DecompGEP1.Offset.isNonPositive();
- for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
- APInt Scale = DecompGEP1.VarIndices[i].Scale;
- APInt ScaleForGCD = DecompGEP1.VarIndices[i].Scale;
- if (!DecompGEP1.VarIndices[i].IsNSW)
- ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(),
- Scale.countTrailingZeros());
-
- if (i == 0)
- GCD = ScaleForGCD.abs();
- else
- GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
-
- if (AllNonNegative || AllNonPositive) {
- // If the Value could change between cycles, then any reasoning about
- // the Value this cycle may not hold in the next cycle. We'll just
- // give up if we can't determine conditions that hold for every cycle:
- const Value *V = DecompGEP1.VarIndices[i].V;
- const Instruction *CxtI = DecompGEP1.VarIndices[i].CxtI;
-
- KnownBits Known = computeKnownBits(V, DL, 0, &AC, CxtI, DT);
- bool SignKnownZero = Known.isNonNegative();
- bool SignKnownOne = Known.isNegative();
-
- // Zero-extension widens the variable, and so forces the sign
- // bit to zero.
- bool IsZExt = DecompGEP1.VarIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
- SignKnownZero |= IsZExt;
- SignKnownOne &= !IsZExt;
-
- AllNonNegative &= (SignKnownZero && Scale.isNonNegative()) ||
- (SignKnownOne && Scale.isNonPositive());
- AllNonPositive &= (SignKnownZero && Scale.isNonPositive()) ||
- (SignKnownOne && Scale.isNonNegative());
- }
- }
+ // We need to know both acess sizes for all the following heuristics.
+ if (!V1Size.hasValue() || !V2Size.hasValue())
+ return AliasResult::MayAlias;
- // We now have accesses at two offsets from the same base:
- // 1. (...)*GCD + DecompGEP1.Offset with size V1Size
- // 2. 0 with size V2Size
- // Using arithmetic modulo GCD, the accesses are at
- // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits
- // into the range [V2Size..GCD), then we know they cannot overlap.
- APInt ModOffset = DecompGEP1.Offset.srem(GCD);
- if (ModOffset.isNegative())
- ModOffset += GCD; // We want mod, not rem.
- if (V1Size.hasValue() && V2Size.hasValue() &&
- ModOffset.uge(V2Size.getValue()) &&
- (GCD - ModOffset).uge(V1Size.getValue()))
- return AliasResult::NoAlias;
+ APInt GCD;
+ ConstantRange OffsetRange = ConstantRange(DecompGEP1.Offset);
+ for (unsigned i = 0, e = DecompGEP1.VarIndices.size(); i != e; ++i) {
+ const VariableGEPIndex &Index = DecompGEP1.VarIndices[i];
+ const APInt &Scale = Index.Scale;
+ APInt ScaleForGCD = Scale;
+ if (!Index.IsNSW)
+ ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(),
+ Scale.countTrailingZeros());
+
+ if (i == 0)
+ GCD = ScaleForGCD.abs();
+ else
+ GCD = APIntOps::GreatestCommonDivisor(GCD, ScaleForGCD.abs());
+
+ ConstantRange CR =
+ computeConstantRange(Index.Val.V, true, &AC, Index.CxtI);
+ KnownBits Known =
+ computeKnownBits(Index.Val.V, DL, 0, &AC, Index.CxtI, DT);
+ CR = CR.intersectWith(
+ ConstantRange::fromKnownBits(Known, /* Signed */ true),
+ ConstantRange::Signed);
+ CR = Index.Val.evaluateWith(CR).sextOrTrunc(OffsetRange.getBitWidth());
+
+ assert(OffsetRange.getBitWidth() == Scale.getBitWidth() &&
+ "Bit widths are normalized to MaxIndexSize");
+ if (Index.IsNSW)
+ OffsetRange = OffsetRange.add(CR.smul_sat(ConstantRange(Scale)));
+ else
+ OffsetRange = OffsetRange.add(CR.smul_fast(ConstantRange(Scale)));
+ }
- // If we know all the variables are non-negative, then the total offset is
- // also non-negative and >= DecompGEP1.Offset. We have the following layout:
- // [0, V2Size) ... [TotalOffset, TotalOffer+V1Size]
- // If DecompGEP1.Offset >= V2Size, the accesses don't alias.
- if (AllNonNegative && V2Size.hasValue() &&
- DecompGEP1.Offset.uge(V2Size.getValue()))
- return AliasResult::NoAlias;
- // Similarly, if the variables are non-positive, then the total offset is
- // also non-positive and <= DecompGEP1.Offset. We have the following layout:
- // [TotalOffset, TotalOffset+V1Size) ... [0, V2Size)
- // If -DecompGEP1.Offset >= V1Size, the accesses don't alias.
- if (AllNonPositive && V1Size.hasValue() &&
- (-DecompGEP1.Offset).uge(V1Size.getValue()))
- return AliasResult::NoAlias;
+ // We now have accesses at two offsets from the same base:
+ // 1. (...)*GCD + DecompGEP1.Offset with size V1Size
+ // 2. 0 with size V2Size
+ // Using arithmetic modulo GCD, the accesses are at
+ // [ModOffset..ModOffset+V1Size) and [0..V2Size). If the first access fits
+ // into the range [V2Size..GCD), then we know they cannot overlap.
+ APInt ModOffset = DecompGEP1.Offset.srem(GCD);
+ if (ModOffset.isNegative())
+ ModOffset += GCD; // We want mod, not rem.
+ if (ModOffset.uge(V2Size.getValue()) &&
+ (GCD - ModOffset).uge(V1Size.getValue()))
+ return AliasResult::NoAlias;
- if (V1Size.hasValue() && V2Size.hasValue()) {
- // Try to determine whether abs(VarIndex) > 0.
- Optional<APInt> MinAbsVarIndex;
- if (DecompGEP1.VarIndices.size() == 1) {
- // VarIndex = Scale*V. If V != 0 then abs(VarIndex) >= abs(Scale).
- const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
- if (isKnownNonZero(Var.V, DL, 0, &AC, Var.CxtI, DT))
- MinAbsVarIndex = Var.Scale.abs();
- } else if (DecompGEP1.VarIndices.size() == 2) {
- // VarIndex = Scale*V0 + (-Scale)*V1.
- // If V0 != V1 then abs(VarIndex) >= abs(Scale).
- // Check that VisitedPhiBBs is empty, to avoid reasoning about
- // inequality of values across loop iterations.
- const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
- const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
- if (Var0.Scale == -Var1.Scale && Var0.ZExtBits == Var1.ZExtBits &&
- Var0.SExtBits == Var1.SExtBits && VisitedPhiBBs.empty() &&
- isKnownNonEqual(Var0.V, Var1.V, DL, &AC, /* CxtI */ nullptr, DT))
- MinAbsVarIndex = Var0.Scale.abs();
- }
+ // Compute ranges of potentially accessed bytes for both accesses. If the
+ // interseciton is empty, there can be no overlap.
+ unsigned BW = OffsetRange.getBitWidth();
+ ConstantRange Range1 = OffsetRange.add(
+ ConstantRange(APInt(BW, 0), APInt(BW, V1Size.getValue())));
+ ConstantRange Range2 =
+ ConstantRange(APInt(BW, 0), APInt(BW, V2Size.getValue()));
+ if (Range1.intersectWith(Range2).isEmptySet())
+ return AliasResult::NoAlias;
- if (MinAbsVarIndex) {
- // The constant offset will have added at least +/-MinAbsVarIndex to it.
- APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex;
- APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex;
- // Check that an access at OffsetLo or lower, and an access at OffsetHi
- // or higher both do not alias.
- if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) &&
- OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue()))
- return AliasResult::NoAlias;
- }
+ // Try to determine the range of values for VarIndex such that
+ // VarIndex <= -MinAbsVarIndex || MinAbsVarIndex <= VarIndex.
+ Optional<APInt> MinAbsVarIndex;
+ if (DecompGEP1.VarIndices.size() == 1) {
+ // VarIndex = Scale*V.
+ const VariableGEPIndex &Var = DecompGEP1.VarIndices[0];
+ if (Var.Val.TruncBits == 0 &&
+ isKnownNonZero(Var.Val.V, DL, 0, &AC, Var.CxtI, DT)) {
+ // If V != 0 then abs(VarIndex) >= abs(Scale).
+ MinAbsVarIndex = Var.Scale.abs();
}
+ } else if (DecompGEP1.VarIndices.size() == 2) {
+ // VarIndex = Scale*V0 + (-Scale)*V1.
+ // If V0 != V1 then abs(VarIndex) >= abs(Scale).
+ // Check that VisitedPhiBBs is empty, to avoid reasoning about
+ // inequality of values across loop iterations.
+ const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
+ const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
+ if (Var0.Scale == -Var1.Scale && Var0.Val.TruncBits == 0 &&
+ Var0.Val.hasSameCastsAs(Var1.Val) && VisitedPhiBBs.empty() &&
+ isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr,
+ DT))
+ MinAbsVarIndex = Var0.Scale.abs();
+ }
- if (constantOffsetHeuristic(DecompGEP1.VarIndices, V1Size, V2Size,
- DecompGEP1.Offset, &AC, DT))
+ if (MinAbsVarIndex) {
+ // The constant offset will have added at least +/-MinAbsVarIndex to it.
+ APInt OffsetLo = DecompGEP1.Offset - *MinAbsVarIndex;
+ APInt OffsetHi = DecompGEP1.Offset + *MinAbsVarIndex;
+ // We know that Offset <= OffsetLo || Offset >= OffsetHi
+ if (OffsetLo.isNegative() && (-OffsetLo).uge(V1Size.getValue()) &&
+ OffsetHi.isNonNegative() && OffsetHi.uge(V2Size.getValue()))
return AliasResult::NoAlias;
}
+ if (constantOffsetHeuristic(DecompGEP1, V1Size, V2Size, &AC, DT))
+ return AliasResult::NoAlias;
+
// Statically, we can see that the base objects are the same, but the
// pointers have dynamic offsets which we can't resolve. And none of our
// little tricks above worked.
@@ -1517,10 +1594,10 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
// location if that memory location doesn't escape. Or it may pass a
// nocapture value to other functions as long as they don't capture it.
if (isEscapeSource(O1) &&
- isNonEscapingLocalObject(O2, &AAQI.IsCapturedCache))
+ AAQI.CI->isNotCapturedBeforeOrAt(O2, cast<Instruction>(O1)))
return AliasResult::NoAlias;
if (isEscapeSource(O2) &&
- isNonEscapingLocalObject(O1, &AAQI.IsCapturedCache))
+ AAQI.CI->isNotCapturedBeforeOrAt(O1, cast<Instruction>(O2)))
return AliasResult::NoAlias;
}
@@ -1692,62 +1769,54 @@ bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
}
/// Computes the symbolic difference between two de-composed GEPs.
-///
-/// Dest and Src are the variable indices from two decomposed GetElementPtr
-/// instructions GEP1 and GEP2 which have common base pointers.
-void BasicAAResult::GetIndexDifference(
- SmallVectorImpl<VariableGEPIndex> &Dest,
- const SmallVectorImpl<VariableGEPIndex> &Src) {
- if (Src.empty())
- return;
-
- for (unsigned i = 0, e = Src.size(); i != e; ++i) {
- const Value *V = Src[i].V;
- unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
- APInt Scale = Src[i].Scale;
-
+void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
+ const DecomposedGEP &SrcGEP) {
+ DestGEP.Offset -= SrcGEP.Offset;
+ for (const VariableGEPIndex &Src : SrcGEP.VarIndices) {
// Find V in Dest. This is N^2, but pointer indices almost never have more
// than a few variable indexes.
- for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
- if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
- Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits)
+ bool Found = false;
+ for (auto I : enumerate(DestGEP.VarIndices)) {
+ VariableGEPIndex &Dest = I.value();
+ if (!isValueEqualInPotentialCycles(Dest.Val.V, Src.Val.V) ||
+ !Dest.Val.hasSameCastsAs(Src.Val))
continue;
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
- if (Dest[j].Scale != Scale) {
- Dest[j].Scale -= Scale;
- Dest[j].IsNSW = false;
- } else
- Dest.erase(Dest.begin() + j);
- Scale = 0;
+ if (Dest.Scale != Src.Scale) {
+ Dest.Scale -= Src.Scale;
+ Dest.IsNSW = false;
+ } else {
+ DestGEP.VarIndices.erase(DestGEP.VarIndices.begin() + I.index());
+ }
+ Found = true;
break;
}
// If we didn't consume this entry, add it to the end of the Dest list.
- if (!!Scale) {
- VariableGEPIndex Entry = {V, ZExtBits, SExtBits,
- -Scale, Src[i].CxtI, Src[i].IsNSW};
- Dest.push_back(Entry);
+ if (!Found) {
+ VariableGEPIndex Entry = {Src.Val, -Src.Scale, Src.CxtI, Src.IsNSW};
+ DestGEP.VarIndices.push_back(Entry);
}
}
}
bool BasicAAResult::constantOffsetHeuristic(
- const SmallVectorImpl<VariableGEPIndex> &VarIndices,
- LocationSize MaybeV1Size, LocationSize MaybeV2Size, const APInt &BaseOffset,
- AssumptionCache *AC, DominatorTree *DT) {
- if (VarIndices.size() != 2 || !MaybeV1Size.hasValue() ||
+ const DecomposedGEP &GEP, LocationSize MaybeV1Size,
+ LocationSize MaybeV2Size, AssumptionCache *AC, DominatorTree *DT) {
+ if (GEP.VarIndices.size() != 2 || !MaybeV1Size.hasValue() ||
!MaybeV2Size.hasValue())
return false;
const uint64_t V1Size = MaybeV1Size.getValue();
const uint64_t V2Size = MaybeV2Size.getValue();
- const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
+ const VariableGEPIndex &Var0 = GEP.VarIndices[0], &Var1 = GEP.VarIndices[1];
- if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
- Var0.Scale != -Var1.Scale || Var0.V->getType() != Var1.V->getType())
+ if (Var0.Val.TruncBits != 0 || !Var0.Val.hasSameCastsAs(Var1.Val) ||
+ Var0.Scale != -Var1.Scale ||
+ Var0.Val.V->getType() != Var1.Val.V->getType())
return false;
// We'll strip off the Extensions of Var0 and Var1 and do another round
@@ -1755,11 +1824,10 @@ bool BasicAAResult::constantOffsetHeuristic(
// is zext(%x + 1) we should get V1 == %x and V1Offset == 1.
LinearExpression E0 =
- GetLinearExpression(ExtendedValue(Var0.V), DL, 0, AC, DT);
+ GetLinearExpression(CastedValue(Var0.Val.V), DL, 0, AC, DT);
LinearExpression E1 =
- GetLinearExpression(ExtendedValue(Var1.V), DL, 0, AC, DT);
- if (E0.Scale != E1.Scale || E0.Val.ZExtBits != E1.Val.ZExtBits ||
- E0.Val.SExtBits != E1.Val.SExtBits ||
+ GetLinearExpression(CastedValue(Var1.Val.V), DL, 0, AC, DT);
+ if (E0.Scale != E1.Scale || !E0.Val.hasSameCastsAs(E1.Val) ||
!isValueEqualInPotentialCycles(E0.Val.V, E1.Val.V))
return false;
@@ -1779,8 +1847,8 @@ bool BasicAAResult::constantOffsetHeuristic(
// arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
// values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
// V2Size can fit in the MinDiffBytes gap.
- return MinDiffBytes.uge(V1Size + BaseOffset.abs()) &&
- MinDiffBytes.uge(V2Size + BaseOffset.abs());
+ return MinDiffBytes.uge(V1Size + GEP.Offset.abs()) &&
+ MinDiffBytes.uge(V2Size + GEP.Offset.abs());
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index e4e45b3076be..2a5e1f65d731 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -602,7 +602,7 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
if (!EntryCount)
return None;
// Use 128 bit APInt to do the arithmetic to avoid overflow.
- APInt BlockCount(128, EntryCount.getCount());
+ APInt BlockCount(128, EntryCount->getCount());
APInt BlockFreq(128, Freq);
APInt EntryFreq(128, getEntryFreq());
BlockCount *= BlockFreq;
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index aa6b93fe3f07..33fdc8b628c5 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -190,7 +190,7 @@ void BranchProbabilityInfo::SccInfo::getSccExitBlocks(
if (isSCCExitingBlock(BB, SccNum))
for (const auto *Succ : successors(BB))
if (getSCCNum(Succ) != SccNum)
- Exits.push_back(const_cast<BasicBlock *>(BB));
+ Exits.push_back(const_cast<BasicBlock *>(Succ));
}
}
diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp
index 253cc0b0a579..c60b70ae5b69 100644
--- a/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -38,12 +38,13 @@ using namespace llvm;
// Explicit template instantiations and specialization definitions for core
// template typedefs.
namespace llvm {
-
static cl::opt<bool> AbortOnMaxDevirtIterationsReached(
"abort-on-max-devirt-iterations-reached",
cl::desc("Abort when the max iterations for devirtualization CGSCC repeat "
"pass is reached"));
+AnalysisKey ShouldNotRunFunctionPassesAnalysis::Key;
+
// Explicit instantiations for the core proxy templates.
template class AllAnalysesOn<LazyCallGraph::SCC>;
template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
@@ -119,12 +120,6 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// Finally, we intersect the final preserved analyses to compute the
// aggregate preserved set for this pass manager.
PA.intersect(std::move(PassPA));
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
// Before we mark all of *this* SCC's analyses as preserved below, intersect
@@ -547,6 +542,9 @@ PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C,
Function &F = N->getFunction();
+ if (NoRerun && FAM.getCachedResult<ShouldNotRunFunctionPassesAnalysis>(F))
+ continue;
+
PassInstrumentation PI = FAM.getResult<PassInstrumentationAnalysis>(F);
if (!PI.runBeforePass<Function>(*Pass, F))
continue;
@@ -562,7 +560,9 @@ PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C,
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
- FAM.invalidate(F, PassPA);
+ FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA);
+ if (NoRerun)
+ (void)FAM.getResult<ShouldNotRunFunctionPassesAnalysis>(F);
// Then intersect the preserved set so that invalidation of module
// analyses will eventually occur when the module pass completes.
@@ -863,7 +863,7 @@ incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
// split-off SCCs.
// We know however that this will preserve any FAM proxy so go ahead and mark
// that.
- PreservedAnalyses PA;
+ auto PA = PreservedAnalyses::allInSet<AllAnalysesOn<Function>>();
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
AM.invalidate(*OldC, PA);
diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp
index 5fe4f9befc86..8955658cb9e7 100644
--- a/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/llvm/lib/Analysis/CaptureTracking.cpp
@@ -98,10 +98,10 @@ namespace {
/// as the given instruction and the use.
struct CapturesBefore : public CaptureTracker {
- CapturesBefore(bool ReturnCaptures, const Instruction *I, const DominatorTree *DT,
- bool IncludeI)
- : BeforeHere(I), DT(DT),
- ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
+ CapturesBefore(bool ReturnCaptures, const Instruction *I,
+ const DominatorTree *DT, bool IncludeI, const LoopInfo *LI)
+ : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
+ IncludeI(IncludeI), Captured(false), LI(LI) {}
void tooManyUses() override { Captured = true; }
@@ -115,7 +115,7 @@ namespace {
return true;
// Check whether there is a path from I to BeforeHere.
- return !isPotentiallyReachable(I, BeforeHere, nullptr, DT);
+ return !isPotentiallyReachable(I, BeforeHere, nullptr, DT, LI);
}
bool captured(const Use *U) override {
@@ -140,6 +140,68 @@ namespace {
bool IncludeI;
bool Captured;
+
+ const LoopInfo *LI;
+ };
+
+ /// Find the 'earliest' instruction before which the pointer is known not to
+ /// be captured. Here an instruction A is considered earlier than instruction
+ /// B, if A dominates B. If 2 escapes do not dominate each other, the
+ /// terminator of the common dominator is chosen. If not all uses cannot be
+ /// analyzed, the earliest escape is set to the first instruction in the
+ /// function entry block.
+ // NOTE: Users have to make sure instructions compared against the earliest
+ // escape are not in a cycle.
+ struct EarliestCaptures : public CaptureTracker {
+
+ EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
+ : DT(DT), ReturnCaptures(ReturnCaptures), Captured(false), F(F) {}
+
+ void tooManyUses() override {
+ Captured = true;
+ EarliestCapture = &*F.getEntryBlock().begin();
+ }
+
+ bool captured(const Use *U) override {
+ Instruction *I = cast<Instruction>(U->getUser());
+ if (isa<ReturnInst>(I) && !ReturnCaptures)
+ return false;
+
+ if (!EarliestCapture) {
+ EarliestCapture = I;
+ } else if (EarliestCapture->getParent() == I->getParent()) {
+ if (I->comesBefore(EarliestCapture))
+ EarliestCapture = I;
+ } else {
+ BasicBlock *CurrentBB = I->getParent();
+ BasicBlock *EarliestBB = EarliestCapture->getParent();
+ if (DT.dominates(EarliestBB, CurrentBB)) {
+ // EarliestCapture already comes before the current use.
+ } else if (DT.dominates(CurrentBB, EarliestBB)) {
+ EarliestCapture = I;
+ } else {
+ // Otherwise find the nearest common dominator and use its terminator.
+ auto *NearestCommonDom =
+ DT.findNearestCommonDominator(CurrentBB, EarliestBB);
+ EarliestCapture = NearestCommonDom->getTerminator();
+ }
+ }
+ Captured = true;
+
+ // Return false to continue analysis; we need to see all potential
+ // captures.
+ return false;
+ }
+
+ Instruction *EarliestCapture = nullptr;
+
+ const DominatorTree &DT;
+
+ bool ReturnCaptures;
+
+ bool Captured;
+
+ Function &F;
};
}
@@ -183,7 +245,8 @@ bool llvm::PointerMayBeCaptured(const Value *V,
bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
bool StoreCaptures, const Instruction *I,
const DominatorTree *DT, bool IncludeI,
- unsigned MaxUsesToExplore) {
+ unsigned MaxUsesToExplore,
+ const LoopInfo *LI) {
assert(!isa<GlobalValue>(V) &&
"It doesn't make sense to ask whether a global is captured.");
@@ -194,7 +257,7 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
// TODO: See comment in PointerMayBeCaptured regarding what could be done
// with StoreCaptures.
- CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
+ CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, LI);
PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
if (CB.Captured)
++NumCapturedBefore;
@@ -203,6 +266,22 @@ bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
return CB.Captured;
}
+Instruction *llvm::FindEarliestCapture(const Value *V, Function &F,
+ bool ReturnCaptures, bool StoreCaptures,
+ const DominatorTree &DT,
+ unsigned MaxUsesToExplore) {
+ assert(!isa<GlobalValue>(V) &&
+ "It doesn't make sense to ask whether a global is captured.");
+
+ EarliestCaptures CB(ReturnCaptures, F, DT);
+ PointerMayBeCaptured(V, &CB, MaxUsesToExplore);
+ if (CB.Captured)
+ ++NumCapturedBefore;
+ else
+ ++NumNotCapturedBefore;
+ return CB.EarliestCapture;
+}
+
void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker,
unsigned MaxUsesToExplore) {
assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp
index a5757be2c4f4..5b951980a0aa 100644
--- a/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -77,28 +77,28 @@ bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
return false;
case ICmpInst::ICMP_SLT:
// X < 0 is equivalent to (X & SignMask) != 0.
- if (!C->isNullValue())
+ if (!C->isZero())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_NE;
break;
case ICmpInst::ICMP_SLE:
// X <= -1 is equivalent to (X & SignMask) != 0.
- if (!C->isAllOnesValue())
+ if (!C->isAllOnes())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_NE;
break;
case ICmpInst::ICMP_SGT:
// X > -1 is equivalent to (X & SignMask) == 0.
- if (!C->isAllOnesValue())
+ if (!C->isAllOnes())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_EQ;
break;
case ICmpInst::ICMP_SGE:
// X >= 0 is equivalent to (X & SignMask) == 0.
- if (!C->isNullValue())
+ if (!C->isZero())
return false;
Mask = APInt::getSignMask(C->getBitWidth());
Pred = ICmpInst::ICMP_EQ;
diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp
index 8c8e2ee6627f..27c52506352f 100644
--- a/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/llvm/lib/Analysis/CodeMetrics.cpp
@@ -34,8 +34,9 @@ appendSpeculatableOperands(const Value *V,
for (const Value *Operand : U->operands())
if (Visited.insert(Operand).second)
- if (isSafeToSpeculativelyExecute(Operand))
- Worklist.push_back(Operand);
+ if (const auto *I = dyn_cast<Instruction>(Operand))
+ if (!I->mayHaveSideEffects() && !I->isTerminator())
+ Worklist.push_back(I);
}
static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited,
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index b28a0d6c78cd..3ed3b8902343 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -63,11 +63,6 @@
using namespace llvm;
namespace {
-Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
- ArrayRef<Constant *> Ops,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool ForLoadOperand);
//===----------------------------------------------------------------------===//
// Constant Folding internal helper functions
@@ -357,9 +352,9 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
const DataLayout &DL) {
do {
Type *SrcTy = C->getType();
- uint64_t DestSize = DL.getTypeSizeInBits(DestTy);
- uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy);
- if (SrcSize < DestSize)
+ TypeSize DestSize = DL.getTypeSizeInBits(DestTy);
+ TypeSize SrcSize = DL.getTypeSizeInBits(SrcTy);
+ if (!TypeSize::isKnownGE(SrcSize, DestSize))
return nullptr;
// Catch the obvious splat cases (since all-zeros can coerce non-integral
@@ -550,19 +545,16 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
return false;
}
-Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
- const DataLayout &DL) {
+Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
+ int64_t Offset, const DataLayout &DL) {
// Bail out early. Not expect to load from scalable global variable.
if (isa<ScalableVectorType>(LoadTy))
return nullptr;
- auto *PTy = cast<PointerType>(C->getType());
auto *IntType = dyn_cast<IntegerType>(LoadTy);
// If this isn't an integer load we can't fold it directly.
if (!IntType) {
- unsigned AS = PTy->getAddressSpace();
-
// If this is a float/double load, we can try folding it as an int32/64 load
// and then bitcast the result. This can be useful for union cases. Note
// that address spaces don't matter here since we're not going to result in
@@ -580,8 +572,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
} else
return nullptr;
- C = FoldBitCast(C, MapTy->getPointerTo(AS), DL);
- if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, MapTy, DL)) {
+ if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
!LoadTy->isX86_AMXTy())
// Materializing a zero can be done trivially without a bitcast
@@ -607,19 +598,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
if (BytesLoaded > 32 || BytesLoaded == 0)
return nullptr;
- GlobalValue *GVal;
- APInt OffsetAI;
- if (!IsConstantOffsetFromGlobal(C, GVal, OffsetAI, DL))
- return nullptr;
-
- auto *GV = dyn_cast<GlobalVariable>(GVal);
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- !GV->getInitializer()->getType()->isSized())
- return nullptr;
-
- int64_t Offset = OffsetAI.getSExtValue();
- int64_t InitializerSize =
- DL.getTypeAllocSize(GV->getInitializer()->getType()).getFixedSize();
+ int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize();
// If we're not accessing anything in this constant, the result is undefined.
if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
@@ -640,7 +619,7 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
Offset = 0;
}
- if (!ReadDataFromGlobal(GV->getInitializer(), Offset, CurPtr, BytesLeft, DL))
+ if (!ReadDataFromGlobal(C, Offset, CurPtr, BytesLeft, DL))
return nullptr;
APInt ResultVal = APInt(IntType->getBitWidth(), 0);
@@ -661,111 +640,70 @@ Constant *FoldReinterpretLoadFromConstPtr(Constant *C, Type *LoadTy,
return ConstantInt::get(IntType->getContext(), ResultVal);
}
-Constant *ConstantFoldLoadThroughBitcastExpr(ConstantExpr *CE, Type *DestTy,
- const DataLayout &DL) {
- auto *SrcPtr = CE->getOperand(0);
- if (!SrcPtr->getType()->isPointerTy())
+/// If this Offset points exactly to the start of an aggregate element, return
+/// that element, otherwise return nullptr.
+Constant *getConstantAtOffset(Constant *Base, APInt Offset,
+ const DataLayout &DL) {
+ if (Offset.isZero())
+ return Base;
+
+ if (!isa<ConstantAggregate>(Base) && !isa<ConstantDataSequential>(Base))
+ return nullptr;
+
+ Type *ElemTy = Base->getType();
+ SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
+ if (!Offset.isZero() || !Indices[0].isZero())
return nullptr;
- return ConstantFoldLoadFromConstPtr(SrcPtr, DestTy, DL);
+ Constant *C = Base;
+ for (const APInt &Index : drop_begin(Indices)) {
+ if (Index.isNegative() || Index.getActiveBits() >= 32)
+ return nullptr;
+
+ C = C->getAggregateElement(Index.getZExtValue());
+ if (!C)
+ return nullptr;
+ }
+
+ return C;
}
} // end anonymous namespace
-Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
- const DataLayout &DL) {
- // First, try the easy cases:
- if (auto *GV = dyn_cast<GlobalVariable>(C))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- return ConstantFoldLoadThroughBitcast(GV->getInitializer(), Ty, DL);
+Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
+ const APInt &Offset,
+ const DataLayout &DL) {
+ if (Constant *AtOffset = getConstantAtOffset(C, Offset, DL))
+ if (Constant *Result = ConstantFoldLoadThroughBitcast(AtOffset, Ty, DL))
+ return Result;
- if (auto *GA = dyn_cast<GlobalAlias>(C))
- if (GA->getAliasee() && !GA->isInterposable())
- return ConstantFoldLoadFromConstPtr(GA->getAliasee(), Ty, DL);
+ // Try hard to fold loads from bitcasted strange and non-type-safe things.
+ if (Offset.getMinSignedBits() <= 64)
+ return FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL);
- // If the loaded value isn't a constant expr, we can't handle it.
- auto *CE = dyn_cast<ConstantExpr>(C);
- if (!CE)
- return nullptr;
+ return nullptr;
+}
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- if (auto *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
- if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(
- GV->getInitializer(), CE, Ty, DL))
- return V;
- }
- } else {
- // Try to simplify GEP if the pointer operand wasn't a GlobalVariable.
- // SymbolicallyEvaluateGEP() with `ForLoadOperand = true` can potentially
- // simplify the GEP more than it normally would have been, but should only
- // be used for const folding loads.
- SmallVector<Constant *> Ops;
- for (unsigned I = 0, E = CE->getNumOperands(); I != E; ++I)
- Ops.push_back(cast<Constant>(CE->getOperand(I)));
- if (auto *Simplified = dyn_cast_or_null<ConstantExpr>(
- SymbolicallyEvaluateGEP(cast<GEPOperator>(CE), Ops, DL, nullptr,
- /*ForLoadOperand*/ true))) {
- // If the symbolically evaluated GEP is another GEP, we can only const
- // fold it if the resulting pointer operand is a GlobalValue. Otherwise
- // there is nothing else to simplify since the GEP is already in the
- // most simplified form.
- if (isa<GEPOperator>(Simplified)) {
- if (auto *GV = dyn_cast<GlobalVariable>(Simplified->getOperand(0))) {
- if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(
- GV->getInitializer(), Simplified, Ty, DL))
- return V;
- }
- }
- } else {
- return ConstantFoldLoadFromConstPtr(Simplified, Ty, DL);
- }
- }
- }
- }
+Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
+ const DataLayout &DL) {
+ return ConstantFoldLoadFromConst(C, Ty, APInt(64, 0), DL);
+}
- if (CE->getOpcode() == Instruction::BitCast)
- if (Constant *LoadedC = ConstantFoldLoadThroughBitcastExpr(CE, Ty, DL))
- return LoadedC;
-
- // Instead of loading constant c string, use corresponding integer value
- // directly if string length is small enough.
- StringRef Str;
- if (getConstantStringInfo(CE, Str) && !Str.empty()) {
- size_t StrLen = Str.size();
- unsigned NumBits = Ty->getPrimitiveSizeInBits();
- // Replace load with immediate integer if the result is an integer or fp
- // value.
- if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
- (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
- APInt StrVal(NumBits, 0);
- APInt SingleChar(NumBits, 0);
- if (DL.isLittleEndian()) {
- for (unsigned char C : reverse(Str.bytes())) {
- SingleChar = static_cast<uint64_t>(C);
- StrVal = (StrVal << 8) | SingleChar;
- }
- } else {
- for (unsigned char C : Str.bytes()) {
- SingleChar = static_cast<uint64_t>(C);
- StrVal = (StrVal << 8) | SingleChar;
- }
- // Append NULL at the end.
- SingleChar = 0;
- StrVal = (StrVal << 8) | SingleChar;
- }
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
+ APInt Offset,
+ const DataLayout &DL) {
+ C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true));
- Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
- if (Ty->isFloatingPointTy())
- Res = ConstantExpr::getBitCast(Res, Ty);
- return Res;
- }
- }
+ if (auto *GV = dyn_cast<GlobalVariable>(C))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer())
+ if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
+ Offset, DL))
+ return Result;
// If this load comes from anywhere in a constant global, and if the global
// is all undef or zero, we know what it loads.
- if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(CE))) {
+ if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
if (GV->getInitializer()->isNullValue())
return Constant::getNullValue(Ty);
@@ -774,8 +712,13 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
}
}
- // Try hard to fold loads from bitcasted strange and non-type-safe things.
- return FoldReinterpretLoadFromConstPtr(CE, Ty, DL);
+ return nullptr;
+}
+
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
+ const DataLayout &DL) {
+ APInt Offset(DL.getIndexTypeSizeInBits(C->getType()), 0);
+ return ConstantFoldLoadFromConstPtr(C, Ty, Offset, DL);
}
namespace {
@@ -795,11 +738,11 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
if (Opc == Instruction::And) {
KnownBits Known0 = computeKnownBits(Op0, DL);
KnownBits Known1 = computeKnownBits(Op1, DL);
- if ((Known1.One | Known0.Zero).isAllOnesValue()) {
+ if ((Known1.One | Known0.Zero).isAllOnes()) {
// All the bits of Op0 that the 'and' could be masking are already zero.
return Op0;
}
- if ((Known0.One | Known1.Zero).isAllOnesValue()) {
+ if ((Known0.One | Known1.Zero).isAllOnes()) {
// All the bits of Op1 that the 'and' could be masking are already zero.
return Op1;
}
@@ -867,17 +810,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
}
/// Strip the pointer casts, but preserve the address space information.
-Constant *StripPtrCastKeepAS(Constant *Ptr, bool ForLoadOperand) {
+Constant *StripPtrCastKeepAS(Constant *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
auto *OldPtrTy = cast<PointerType>(Ptr->getType());
Ptr = cast<Constant>(Ptr->stripPointerCasts());
- if (ForLoadOperand) {
- while (isa<GlobalAlias>(Ptr) && !cast<GlobalAlias>(Ptr)->isInterposable() &&
- !cast<GlobalAlias>(Ptr)->getBaseObject()->isInterposable()) {
- Ptr = cast<GlobalAlias>(Ptr)->getAliasee();
- }
- }
-
auto *NewPtrTy = cast<PointerType>(Ptr->getType());
// Preserve the address space number of the pointer.
@@ -893,8 +829,7 @@ Constant *StripPtrCastKeepAS(Constant *Ptr, bool ForLoadOperand) {
Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
ArrayRef<Constant *> Ops,
const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool ForLoadOperand) {
+ const TargetLibraryInfo *TLI) {
const GEPOperator *InnermostGEP = GEP;
bool InBounds = GEP->isInBounds();
@@ -939,7 +874,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
DL.getIndexedOffsetInType(
SrcElemTy,
makeArrayRef((Value * const *)Ops.data() + 1, Ops.size() - 1)));
- Ptr = StripPtrCastKeepAS(Ptr, ForLoadOperand);
+ Ptr = StripPtrCastKeepAS(Ptr);
// If this is a GEP of a GEP, fold it all into a single GEP.
while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
@@ -961,7 +896,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
Ptr = cast<Constant>(GEP->getOperand(0));
SrcElemTy = GEP->getSourceElementType();
Offset += APInt(BitWidth, DL.getIndexedOffsetInType(SrcElemTy, NestedOps));
- Ptr = StripPtrCastKeepAS(Ptr, ForLoadOperand);
+ Ptr = StripPtrCastKeepAS(Ptr);
}
// If the base value for this address is a literal integer value, fold the
@@ -985,72 +920,41 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// we eliminate over-indexing of the notional static type array bounds.
// This makes it easy to determine if the getelementptr is "inbounds".
// Also, this helps GlobalOpt do SROA on GlobalVariables.
- SmallVector<Constant *, 32> NewIdxs;
- Type *Ty = PTy;
- SrcElemTy = PTy->getElementType();
- do {
- if (!Ty->isStructTy()) {
- if (Ty->isPointerTy()) {
- // The only pointer indexing we'll do is on the first index of the GEP.
- if (!NewIdxs.empty())
- break;
+ // For GEPs of GlobalValues, use the value type even for opaque pointers.
+ // Otherwise use an i8 GEP.
+ if (auto *GV = dyn_cast<GlobalValue>(Ptr))
+ SrcElemTy = GV->getValueType();
+ else if (!PTy->isOpaque())
+ SrcElemTy = PTy->getElementType();
+ else
+ SrcElemTy = Type::getInt8Ty(Ptr->getContext());
- Ty = SrcElemTy;
+ if (!SrcElemTy->isSized())
+ return nullptr;
- // Only handle pointers to sized types, not pointers to functions.
- if (!Ty->isSized())
- return nullptr;
- } else {
- Type *NextTy = GetElementPtrInst::getTypeAtIndex(Ty, (uint64_t)0);
- if (!NextTy)
- break;
- Ty = NextTy;
- }
+ Type *ElemTy = SrcElemTy;
+ SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(ElemTy, Offset);
+ if (Offset != 0)
+ return nullptr;
- // Determine which element of the array the offset points into.
- APInt ElemSize(BitWidth, DL.getTypeAllocSize(Ty));
- if (ElemSize == 0) {
- // The element size is 0. This may be [0 x Ty]*, so just use a zero
- // index for this level and proceed to the next level to see if it can
- // accommodate the offset.
- NewIdxs.push_back(ConstantInt::get(IntIdxTy, 0));
- } else {
- // The element size is non-zero divide the offset by the element
- // size (rounding down), to compute the index at this level.
- bool Overflow;
- APInt NewIdx = Offset.sdiv_ov(ElemSize, Overflow);
- if (Overflow)
- break;
- Offset -= NewIdx * ElemSize;
- NewIdxs.push_back(ConstantInt::get(IntIdxTy, NewIdx));
- }
- } else {
- auto *STy = cast<StructType>(Ty);
- // If we end up with an offset that isn't valid for this struct type, we
- // can't re-form this GEP in a regular form, so bail out. The pointer
- // operand likely went through casts that are necessary to make the GEP
- // sensible.
- const StructLayout &SL = *DL.getStructLayout(STy);
- if (Offset.isNegative() || Offset.uge(SL.getSizeInBytes()))
- break;
+ // Try to add additional zero indices to reach the desired result element
+ // type.
+ // TODO: Should we avoid extra zero indices if ResElemTy can't be reached and
+ // we'll have to insert a bitcast anyway?
+ while (ElemTy != ResElemTy) {
+ Type *NextTy = GetElementPtrInst::getTypeAtIndex(ElemTy, (uint64_t)0);
+ if (!NextTy)
+ break;
- // Determine which field of the struct the offset points into. The
- // getZExtValue is fine as we've already ensured that the offset is
- // within the range representable by the StructLayout API.
- unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
- NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
- ElIdx));
- Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
- Ty = STy->getTypeAtIndex(ElIdx);
- }
- } while (Ty != ResElemTy);
+ Indices.push_back(APInt::getZero(isa<StructType>(ElemTy) ? 32 : BitWidth));
+ ElemTy = NextTy;
+ }
- // If we haven't used up the entire offset by descending the static
- // type, then the offset is pointing into the middle of an indivisible
- // member, so we can't simplify it.
- if (Offset != 0)
- return nullptr;
+ SmallVector<Constant *, 32> NewIdxs;
+ for (const APInt &Index : Indices)
+ NewIdxs.push_back(ConstantInt::get(
+ Type::getIntNTy(Ptr->getContext(), Index.getBitWidth()), Index));
// Preserve the inrange index from the innermost GEP if possible. We must
// have calculated the same indices up to and including the inrange index.
@@ -1067,8 +971,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// Create a GEP.
Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
InBounds, InRangeIndex);
- assert(C->getType()->getPointerElementType() == Ty &&
- "Computed GetElementPtr has unexpected type!");
+ assert(
+ cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) &&
+ "Computed GetElementPtr has unexpected type!");
// If we ended up indexing a member with a type that doesn't match
// the type of what the original indices indexed, add a cast.
@@ -1099,8 +1004,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
- if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI,
- /*ForLoadOperand*/ false))
+ if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
return C;
return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
@@ -1375,21 +1279,31 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C,
default:
llvm_unreachable("Missing case");
case Instruction::PtrToInt:
- // If the input is a inttoptr, eliminate the pair. This requires knowing
- // the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ Constant *FoldedValue = nullptr;
+ // If the input is a inttoptr, eliminate the pair. This requires knowing
+ // the width of a pointer, so it can't be done in ConstantExpr::getCast.
if (CE->getOpcode() == Instruction::IntToPtr) {
- Constant *Input = CE->getOperand(0);
- unsigned InWidth = Input->getType()->getScalarSizeInBits();
- unsigned PtrWidth = DL.getPointerTypeSizeInBits(CE->getType());
- if (PtrWidth < InWidth) {
- Constant *Mask =
- ConstantInt::get(CE->getContext(),
- APInt::getLowBitsSet(InWidth, PtrWidth));
- Input = ConstantExpr::getAnd(Input, Mask);
+ // zext/trunc the inttoptr to pointer size.
+ FoldedValue = ConstantExpr::getIntegerCast(
+ CE->getOperand(0), DL.getIntPtrType(CE->getType()),
+ /*IsSigned=*/false);
+ } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) {
+ // If we have GEP, we can perform the following folds:
+ // (ptrtoint (gep null, x)) -> x
+ // (ptrtoint (gep (gep null, x), y) -> x + y, etc.
+ unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
+ APInt BaseOffset(BitWidth, 0);
+ auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets(
+ DL, BaseOffset, /*AllowNonInbounds=*/true));
+ if (Base->isNullValue()) {
+ FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset);
}
- // Do a zext or trunc to get to the dest size.
- return ConstantExpr::getIntegerCast(Input, DestTy, false);
+ }
+ if (FoldedValue) {
+ // Do a zext or trunc to get to the ptrtoint dest size.
+ return ConstantExpr::getIntegerCast(FoldedValue, DestTy,
+ /*IsSigned=*/false);
}
}
return ConstantExpr::getCast(Opcode, C, DestTy);
@@ -1446,19 +1360,6 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
return ConstantFoldLoadThroughBitcast(C, Ty, DL);
}
-Constant *
-llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
- ArrayRef<Constant *> Indices) {
- // Loop over all of the operands, tracking down which value we are
- // addressing.
- for (Constant *Index : Indices) {
- C = C->getAggregateElement(Index);
- if (!C)
- return nullptr;
- }
- return C;
-}
-
//===----------------------------------------------------------------------===//
// Constant Folding for Calls
//
@@ -1879,7 +1780,7 @@ static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI,
// know that its evaluation does not raise exceptions, so side effect
// is absent. To allow removing the call, mark it as not accessing memory.
if (EB && *EB != fp::ExceptionBehavior::ebIgnore)
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ CI->addFnAttr(Attribute::ReadNone);
return true;
}
@@ -2112,7 +2013,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
/// the host native double versions. Float versions are not called
/// directly but for all these it is true (float)(f((double)arg)) ==
/// f(arg). Long double not supported yet.
- APFloat APF = Op->getValueAPF();
+ const APFloat &APF = Op->getValueAPF();
switch (IntrinsicID) {
default: break;
@@ -2163,7 +2064,9 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
return nullptr;
LibFunc Func = NotLibFunc;
- TLI->getLibFunc(Name, Func);
+ if (!TLI->getLibFunc(Name, Func))
+ return nullptr;
+
switch (Func) {
default:
break;
@@ -2416,12 +2319,12 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
if (const auto *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isFloatingPointTy())
return nullptr;
- APFloat Op1V = Op1->getValueAPF();
+ const APFloat &Op1V = Op1->getValueAPF();
if (const auto *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (Op2->getType() != Op1->getType())
return nullptr;
- APFloat Op2V = Op2->getValueAPF();
+ const APFloat &Op2V = Op2->getValueAPF();
if (const auto *ConstrIntr = dyn_cast<ConstrainedFPIntrinsic>(Call)) {
RoundingMode RM = getEvaluationRoundingMode(ConstrIntr);
@@ -2487,7 +2390,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return nullptr;
LibFunc Func = NotLibFunc;
- TLI->getLibFunc(Name, Func);
+ if (!TLI->getLibFunc(Name, Func))
+ return nullptr;
+
switch (Func) {
default:
break;
@@ -2671,7 +2576,7 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
assert(C1 && "Must be constant int");
// cttz(0, 1) and ctlz(0, 1) are undef.
- if (C1->isOneValue() && (!C0 || C0->isNullValue()))
+ if (C1->isOne() && (!C0 || C0->isZero()))
return UndefValue::get(Ty);
if (!C0)
return Constant::getNullValue(Ty);
@@ -2683,11 +2588,11 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
case Intrinsic::abs:
// Undef or minimum val operand with poison min --> undef
assert(C1 && "Must be constant int");
- if (C1->isOneValue() && (!C0 || C0->isMinSignedValue()))
+ if (C1->isOne() && (!C0 || C0->isMinSignedValue()))
return UndefValue::get(Ty);
// Undef operand with no poison min --> 0 (sign bit must be clear)
- if (C1->isNullValue() && !C0)
+ if (C1->isZero() && !C0)
return Constant::getNullValue(Ty);
return ConstantInt::get(Ty, C0->abs());
@@ -3191,7 +3096,7 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
if (!TLI || !TLI->getLibFunc(*F, Func))
return false;
- if (Call->getNumArgOperands() == 1) {
+ if (Call->arg_size() == 1) {
if (ConstantFP *OpC = dyn_cast<ConstantFP>(Call->getArgOperand(0))) {
const APFloat &Op = OpC->getValueAPF();
switch (Func) {
@@ -3280,7 +3185,7 @@ bool llvm::isMathLibCallNoop(const CallBase *Call,
}
}
- if (Call->getNumArgOperands() == 2) {
+ if (Call->arg_size() == 2) {
ConstantFP *Op0C = dyn_cast<ConstantFP>(Call->getArgOperand(0));
ConstantFP *Op1C = dyn_cast<ConstantFP>(Call->getArgOperand(1));
if (Op0C && Op1C) {
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index 83b7d5cbfc3e..f407ec0d017a 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -16,10 +16,12 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/CostModel.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -113,3 +115,23 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
}
}
}
+
+PreservedAnalyses CostModelPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ OS << "Cost Model for function '" << F.getName() << "'\n";
+ for (BasicBlock &B : F) {
+ for (Instruction &Inst : B) {
+ // TODO: Use a pass parameter instead of cl::opt CostKind to determine
+ // which cost kind to print.
+ InstructionCost Cost = TTI.getInstructionCost(&Inst, CostKind);
+ if (auto CostVal = Cost.getValue())
+ OS << "Cost Model: Found an estimated cost of " << *CostVal;
+ else
+ OS << "Cost Model: Invalid cost";
+
+ OS << " for instruction: " << Inst << "\n";
+ }
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp
index 448e970e9bcc..670532c6d9a8 100644
--- a/llvm/lib/Analysis/Delinearization.cpp
+++ b/llvm/lib/Analysis/Delinearization.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionDivision.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -36,6 +37,492 @@ using namespace llvm;
#define DL_NAME "delinearize"
#define DEBUG_TYPE DL_NAME
+// Return true when S contains at least an undef value.
+static inline bool containsUndefs(const SCEV *S) {
+ return SCEVExprContains(S, [](const SCEV *S) {
+ if (const auto *SU = dyn_cast<SCEVUnknown>(S))
+ return isa<UndefValue>(SU->getValue());
+ return false;
+ });
+}
+
+namespace {
+
+// Collect all steps of SCEV expressions.
+struct SCEVCollectStrides {
+ ScalarEvolution &SE;
+ SmallVectorImpl<const SCEV *> &Strides;
+
+ SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
+ : SE(SE), Strides(S) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ Strides.push_back(AR->getStepRecurrence(SE));
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+// Collect all SCEVUnknown and SCEVMulExpr expressions.
+struct SCEVCollectTerms {
+ SmallVectorImpl<const SCEV *> &Terms;
+
+ SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
+ isa<SCEVSignExtendExpr>(S)) {
+ if (!containsUndefs(S))
+ Terms.push_back(S);
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+// Check if a SCEV contains an AddRecExpr.
+struct SCEVHasAddRec {
+ bool &ContainsAddRec;
+
+ SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
+ ContainsAddRec = false;
+ }
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVAddRecExpr>(S)) {
+ ContainsAddRec = true;
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+// Find factors that are multiplied with an expression that (possibly as a
+// subexpression) contains an AddRecExpr. In the expression:
+//
+// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
+//
+// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
+// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
+// parameters as they form a product with an induction variable.
+//
+// This collector expects all array size parameters to be in the same MulExpr.
+// It might be necessary to later add support for collecting parameters that are
+// spread over different nested MulExpr.
+struct SCEVCollectAddRecMultiplies {
+ SmallVectorImpl<const SCEV *> &Terms;
+ ScalarEvolution &SE;
+
+ SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T,
+ ScalarEvolution &SE)
+ : Terms(T), SE(SE) {}
+
+ bool follow(const SCEV *S) {
+ if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
+ bool HasAddRec = false;
+ SmallVector<const SCEV *, 0> Operands;
+ for (auto Op : Mul->operands()) {
+ const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
+ if (Unknown && !isa<CallInst>(Unknown->getValue())) {
+ Operands.push_back(Op);
+ } else if (Unknown) {
+ HasAddRec = true;
+ } else {
+ bool ContainsAddRec = false;
+ SCEVHasAddRec ContiansAddRec(ContainsAddRec);
+ visitAll(Op, ContiansAddRec);
+ HasAddRec |= ContainsAddRec;
+ }
+ }
+ if (Operands.size() == 0)
+ return true;
+
+ if (!HasAddRec)
+ return false;
+
+ Terms.push_back(SE.getMulExpr(Operands));
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+
+ bool isDone() const { return false; }
+};
+
+} // end anonymous namespace
+
+/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
+/// two places:
+/// 1) The strides of AddRec expressions.
+/// 2) Unknowns that are multiplied with AddRec expressions.
+void llvm::collectParametricTerms(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Terms) {
+ SmallVector<const SCEV *, 4> Strides;
+ SCEVCollectStrides StrideCollector(SE, Strides);
+ visitAll(Expr, StrideCollector);
+
+ LLVM_DEBUG({
+ dbgs() << "Strides:\n";
+ for (const SCEV *S : Strides)
+ dbgs() << *S << "\n";
+ });
+
+ for (const SCEV *S : Strides) {
+ SCEVCollectTerms TermCollector(Terms);
+ visitAll(S, TermCollector);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
+
+ SCEVCollectAddRecMultiplies MulCollector(Terms, SE);
+ visitAll(Expr, MulCollector);
+}
+
+static bool findArrayDimensionsRec(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes) {
+ int Last = Terms.size() - 1;
+ const SCEV *Step = Terms[Last];
+
+ // End of recursion.
+ if (Last == 0) {
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
+ SmallVector<const SCEV *, 2> Qs;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Qs.push_back(Op);
+
+ Step = SE.getMulExpr(Qs);
+ }
+
+ Sizes.push_back(Step);
+ return true;
+ }
+
+ for (const SCEV *&Term : Terms) {
+ // Normalize the terms before the next call to findArrayDimensionsRec.
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, Step, &Q, &R);
+
+ // Bail out when GCD does not evenly divide one of the terms.
+ if (!R->isZero())
+ return false;
+
+ Term = Q;
+ }
+
+ // Remove all SCEVConstants.
+ erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
+
+ if (Terms.size() > 0)
+ if (!findArrayDimensionsRec(SE, Terms, Sizes))
+ return false;
+
+ Sizes.push_back(Step);
+ return true;
+}
+
+// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
+static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+ for (const SCEV *T : Terms)
+ if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
+ return true;
+
+ return false;
+}
+
+// Return the number of product terms in S.
+static inline int numberOfTerms(const SCEV *S) {
+ if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
+ return Expr->getNumOperands();
+ return 1;
+}
+
+static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
+ if (isa<SCEVConstant>(T))
+ return nullptr;
+
+ if (isa<SCEVUnknown>(T))
+ return T;
+
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
+ SmallVector<const SCEV *, 2> Factors;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Factors.push_back(Op);
+
+ return SE.getMulExpr(Factors);
+ }
+
+ return T;
+}
+
+void llvm::findArrayDimensions(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) {
+ if (Terms.size() < 1 || !ElementSize)
+ return;
+
+ // Early return when Terms do not contain parameters: we do not delinearize
+ // non parametric SCEVs.
+ if (!containsParameters(Terms))
+ return;
+
+ LLVM_DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
+
+ // Remove duplicates.
+ array_pod_sort(Terms.begin(), Terms.end());
+ Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
+
+ // Put larger terms first.
+ llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
+ return numberOfTerms(LHS) > numberOfTerms(RHS);
+ });
+
+ // Try to divide all terms by the element size. If term is not divisible by
+ // element size, proceed with the original term.
+ for (const SCEV *&Term : Terms) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+ if (!Q->isZero())
+ Term = Q;
+ }
+
+ SmallVector<const SCEV *, 4> NewTerms;
+
+ // Remove constant factors.
+ for (const SCEV *T : Terms)
+ if (const SCEV *NewT = removeConstantFactors(SE, T))
+ NewTerms.push_back(NewT);
+
+ LLVM_DEBUG({
+ dbgs() << "Terms after sorting:\n";
+ for (const SCEV *T : NewTerms)
+ dbgs() << *T << "\n";
+ });
+
+ if (NewTerms.empty() || !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
+ Sizes.clear();
+ return;
+ }
+
+ // The last element to be pushed into Sizes is the size of an element.
+ Sizes.push_back(ElementSize);
+
+ LLVM_DEBUG({
+ dbgs() << "Sizes:\n";
+ for (const SCEV *S : Sizes)
+ dbgs() << *S << "\n";
+ });
+}
+
+void llvm::computeAccessFunctions(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes) {
+ // Early exit in case this SCEV is not an affine multivariate function.
+ if (Sizes.empty())
+ return;
+
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
+ if (!AR->isAffine())
+ return;
+
+ const SCEV *Res = Expr;
+ int Last = Sizes.size() - 1;
+ for (int i = Last; i >= 0; i--) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+
+ LLVM_DEBUG({
+ dbgs() << "Res: " << *Res << "\n";
+ dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+ dbgs() << "Res divided by Sizes[i]:\n";
+ dbgs() << "Quotient: " << *Q << "\n";
+ dbgs() << "Remainder: " << *R << "\n";
+ });
+
+ Res = Q;
+
+ // Do not record the last subscript corresponding to the size of elements in
+ // the array.
+ if (i == Last) {
+
+ // Bail out if the byte offset is non-zero.
+ if (!R->isZero()) {
+ Subscripts.clear();
+ Sizes.clear();
+ return;
+ }
+
+ continue;
+ }
+
+ // Record the access function for the current subscript.
+ Subscripts.push_back(R);
+ }
+
+ // Also push in last position the remainder of the last division: it will be
+ // the access function of the innermost dimension.
+ Subscripts.push_back(Res);
+
+ std::reverse(Subscripts.begin(), Subscripts.end());
+
+ LLVM_DEBUG({
+ dbgs() << "Subscripts:\n";
+ for (const SCEV *S : Subscripts)
+ dbgs() << *S << "\n";
+ });
+}
+
+/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
+/// sizes of an array access. Returns the remainder of the delinearization that
+/// is the offset start of the array. The SCEV->delinearize algorithm computes
+/// the multiples of SCEV coefficients: that is a pattern matching of sub
+/// expressions in the stride and base of a SCEV corresponding to the
+/// computation of a GCD (greatest common divisor) of base and stride. When
+/// SCEV->delinearize fails, it returns the SCEV unchanged.
+///
+/// For example: when analyzing the memory access A[i][j][k] in this loop nest
+///
+/// void foo(long n, long m, long o, double A[n][m][o]) {
+///
+/// for (long i = 0; i < n; i++)
+/// for (long j = 0; j < m; j++)
+/// for (long k = 0; k < o; k++)
+/// A[i][j][k] = 1.0;
+/// }
+///
+/// the delinearization input is the following AddRec SCEV:
+///
+/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+///
+/// From this SCEV, we are able to say that the base offset of the access is %A
+/// because it appears as an offset that does not divide any of the strides in
+/// the loops:
+///
+/// CHECK: Base offset: %A
+///
+/// and then SCEV->delinearize determines the size of some of the dimensions of
+/// the array as these are the multiples by which the strides are happening:
+///
+/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double)
+/// bytes.
+///
+/// Note that the outermost dimension remains of UnknownSize because there are
+/// no strides that would help identifying the size of the last dimension: when
+/// the array has been statically allocated, one could compute the size of that
+/// dimension by dividing the overall size of the array by the size of the known
+/// dimensions: %m * %o * 8.
+///
+/// Finally delinearize provides the access functions for the array reference
+/// that does correspond to A[i][j][k] of the above C testcase:
+///
+/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
+///
+/// The testcases are checking the output of a function pass:
+/// DelinearizationPass that walks through all loads and stores of a function
+/// asking for the SCEV of the memory access with respect to all enclosing
+/// loops, calling SCEV->delinearize on that and printing the results.
+void llvm::delinearize(ScalarEvolution &SE, const SCEV *Expr,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) {
+ // First step: collect parametric terms.
+ SmallVector<const SCEV *, 4> Terms;
+ collectParametricTerms(SE, Expr, Terms);
+
+ if (Terms.empty())
+ return;
+
+ // Second step: find subscript sizes.
+ findArrayDimensions(SE, Terms, Sizes, ElementSize);
+
+ if (Sizes.empty())
+ return;
+
+ // Third step: compute the access functions for each subscript.
+ computeAccessFunctions(SE, Expr, Subscripts, Sizes);
+
+ if (Subscripts.empty())
+ return;
+
+ LLVM_DEBUG({
+ dbgs() << "succeeded to delinearize " << *Expr << "\n";
+ dbgs() << "ArrayDecl[UnknownSize]";
+ for (const SCEV *S : Sizes)
+ dbgs() << "[" << *S << "]";
+
+ dbgs() << "\nArrayRef";
+ for (const SCEV *S : Subscripts)
+ dbgs() << "[" << *S << "]";
+ dbgs() << "\n";
+ });
+}
+
+bool llvm::getIndexExpressionsFromGEP(ScalarEvolution &SE,
+ const GetElementPtrInst *GEP,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<int> &Sizes) {
+ assert(Subscripts.empty() && Sizes.empty() &&
+ "Expected output lists to be empty on entry to this function.");
+ assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
+ Type *Ty = nullptr;
+ bool DroppedFirstDim = false;
+ for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
+ const SCEV *Expr = SE.getSCEV(GEP->getOperand(i));
+ if (i == 1) {
+ Ty = GEP->getSourceElementType();
+ if (auto *Const = dyn_cast<SCEVConstant>(Expr))
+ if (Const->getValue()->isZero()) {
+ DroppedFirstDim = true;
+ continue;
+ }
+ Subscripts.push_back(Expr);
+ continue;
+ }
+
+ auto *ArrayTy = dyn_cast<ArrayType>(Ty);
+ if (!ArrayTy) {
+ Subscripts.clear();
+ Sizes.clear();
+ return false;
+ }
+
+ Subscripts.push_back(Expr);
+ if (!(DroppedFirstDim && i == 2))
+ Sizes.push_back(ArrayTy->getNumElements());
+
+ Ty = ArrayTy->getElementType();
+ }
+ return !Subscripts.empty();
+}
+
namespace {
class Delinearization : public FunctionPass {
@@ -84,7 +571,7 @@ void printDelinearization(raw_ostream &O, Function *F, LoopInfo *LI,
O << "AccessFunction: " << *AccessFn << "\n";
SmallVector<const SCEV *, 3> Subscripts, Sizes;
- SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
+ delinearize(*SE, AccessFn, Subscripts, Sizes, SE->getElementSize(&Inst));
if (Subscripts.size() == 0 || Sizes.size() == 0 ||
Subscripts.size() != Sizes.size()) {
O << "failed to delinearize\n";
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index ca6d58fac825..117b12fc0701 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -362,7 +362,7 @@ void DemandedBits::performAnalysis() {
if (Instruction *J = dyn_cast<Instruction>(OI)) {
Type *T = J->getType();
if (T->isIntOrIntVectorTy())
- AliveBits[J] = APInt::getAllOnesValue(T->getScalarSizeInBits());
+ AliveBits[J] = APInt::getAllOnes(T->getScalarSizeInBits());
else
Visited.insert(J);
Worklist.insert(J);
@@ -407,7 +407,7 @@ void DemandedBits::performAnalysis() {
Type *T = OI->getType();
if (T->isIntOrIntVectorTy()) {
unsigned BitWidth = T->getScalarSizeInBits();
- APInt AB = APInt::getAllOnesValue(BitWidth);
+ APInt AB = APInt::getAllOnes(BitWidth);
if (InputIsKnownDead) {
AB = APInt(BitWidth, 0);
} else {
@@ -417,7 +417,7 @@ void DemandedBits::performAnalysis() {
Known, Known2, KnownBitsComputed);
// Keep track of uses which have no demanded bits.
- if (AB.isNullValue())
+ if (AB.isZero())
DeadUses.insert(&OI);
else
DeadUses.erase(&OI);
@@ -448,8 +448,7 @@ APInt DemandedBits::getDemandedBits(Instruction *I) {
return Found->second;
const DataLayout &DL = I->getModule()->getDataLayout();
- return APInt::getAllOnesValue(
- DL.getTypeSizeInBits(I->getType()->getScalarType()));
+ return APInt::getAllOnes(DL.getTypeSizeInBits(I->getType()->getScalarType()));
}
APInt DemandedBits::getDemandedBits(Use *U) {
@@ -461,7 +460,7 @@ APInt DemandedBits::getDemandedBits(Use *U) {
// We only track integer uses, everything else produces a mask with all bits
// set
if (!T->isIntOrIntVectorTy())
- return APInt::getAllOnesValue(BitWidth);
+ return APInt::getAllOnes(BitWidth);
if (isUseDead(U))
return APInt(BitWidth, 0);
@@ -469,7 +468,7 @@ APInt DemandedBits::getDemandedBits(Use *U) {
performAnalysis();
APInt AOut = getDemandedBits(UserI);
- APInt AB = APInt::getAllOnesValue(BitWidth);
+ APInt AB = APInt::getAllOnes(BitWidth);
KnownBits Known, Known2;
bool KnownBitsComputed = false;
@@ -504,7 +503,7 @@ bool DemandedBits::isUseDead(Use *U) {
// is dead. These uses might not be explicitly present in the DeadUses map.
if (UserI->getType()->isIntOrIntVectorTy()) {
auto Found = AliveBits.find(UserI);
- if (Found != AliveBits.end() && Found->second.isNullValue())
+ if (Found != AliveBits.end() && Found->second.isZero())
return true;
}
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 9564cfb2aa45..f827f74d5367 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -53,6 +53,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -119,6 +120,11 @@ static cl::opt<bool> DisableDelinearizationChecks(
"dependence vectors for languages that allow the subscript of one "
"dimension to underflow or overflow into another dimension."));
+static cl::opt<unsigned> MIVMaxLevelThreshold(
+ "da-miv-max-level-threshold", cl::init(7), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Maximum depth allowed for the recursive algorithm used to "
+ "explore MIV direction vectors."));
+
//===----------------------------------------------------------------------===//
// basics
@@ -2319,7 +2325,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
LLVM_DEBUG(dbgs() << "starting gcd\n");
++GCDapplications;
unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
- APInt RunningGCD = APInt::getNullValue(BitWidth);
+ APInt RunningGCD = APInt::getZero(BitWidth);
// Examine Src coefficients.
// Compute running GCD and record source constant.
@@ -2359,7 +2365,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst,
}
const SCEV *DstConst = Coefficients;
- APInt ExtraGCD = APInt::getNullValue(BitWidth);
+ APInt ExtraGCD = APInt::getZero(BitWidth);
const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
LLVM_DEBUG(dbgs() << " Delta = " << *Delta << "\n");
const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
@@ -2602,6 +2608,19 @@ unsigned DependenceInfo::exploreDirections(unsigned Level, CoefficientInfo *A,
const SmallBitVector &Loops,
unsigned &DepthExpanded,
const SCEV *Delta) const {
+ // This algorithm has worst case complexity of O(3^n), where 'n' is the number
+ // of common loop levels. To avoid excessive compile-time, pessimize all the
+ // results and immediately return when the number of common levels is beyond
+ // the given threshold.
+ if (CommonLevels > MIVMaxLevelThreshold) {
+ LLVM_DEBUG(dbgs() << "Number of common levels exceeded the threshold. MIV "
+ "direction exploration is terminated.\n");
+ for (unsigned K = 1; K <= CommonLevels; ++K)
+ if (Loops[K])
+ Bound[K].DirSet = Dependence::DVEntry::ALL;
+ return 1;
+ }
+
if (Level > CommonLevels) {
// record result
LLVM_DEBUG(dbgs() << "\t[");
@@ -3320,8 +3339,8 @@ bool DependenceInfo::tryDelinearizeFixedSize(
return false;
SmallVector<int, 4> SrcSizes, DstSizes;
- SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
- SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
+ getIndexExpressionsFromGEP(*SE, SrcGEP, SrcSubscripts, SrcSizes);
+ getIndexExpressionsFromGEP(*SE, DstGEP, DstSubscripts, DstSizes);
// Check that the two size arrays are non-empty and equal in length and
// value.
@@ -3421,16 +3440,16 @@ bool DependenceInfo::tryDelinearizeParametricSize(
// First step: collect parametric terms in both array references.
SmallVector<const SCEV *, 4> Terms;
- SE->collectParametricTerms(SrcAR, Terms);
- SE->collectParametricTerms(DstAR, Terms);
+ collectParametricTerms(*SE, SrcAR, Terms);
+ collectParametricTerms(*SE, DstAR, Terms);
// Second step: find subscript sizes.
SmallVector<const SCEV *, 4> Sizes;
- SE->findArrayDimensions(Terms, Sizes, ElementSize);
+ findArrayDimensions(*SE, Terms, Sizes, ElementSize);
// Third step: compute the access functions for each subscript.
- SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
- SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
+ computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes);
+ computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes);
// Fail when there is only a subscript: that's a linearized access function.
if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index ecfefa36918c..d87fa849d839 100644
--- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -1,9 +1,8 @@
//===- DevelopmentModeInlineAdvisor.cpp - runtime-loadable model runner --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -228,6 +227,8 @@ private:
(*CallerSizeEstimateBefore + *CalleeSizeEstimateBefore);
getAdvisor()->updateNativeSizeEstimate(Reward);
log(Reward, /*Success=*/true);
+ } else {
+ log(NoReward, /*Success=*/true);
}
}
@@ -377,7 +378,7 @@ void TrainingLogger::logInlineEvent(const InlineEvent &Event,
void TrainingLogger::print() {
std::error_code EC;
raw_fd_ostream OutFile(LogFileName, EC);
- L->print(OutFile);
+ L->flush(OutFile);
}
DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
diff --git a/llvm/lib/Analysis/HeatUtils.cpp b/llvm/lib/Analysis/HeatUtils.cpp
index a1a11be5fee3..0057de322cac 100644
--- a/llvm/lib/Analysis/HeatUtils.cpp
+++ b/llvm/lib/Analysis/HeatUtils.cpp
@@ -1,9 +1,8 @@
//===-- HeatUtils.cpp - Utility for printing heat colors --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index a6298afb66f5..f22c6aa04f5e 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -23,13 +23,23 @@
using namespace llvm;
using namespace IRSimilarity;
+cl::opt<bool>
+ DisableBranches("no-ir-sim-branch-matching", cl::init(false),
+ cl::ReallyHidden,
+ cl::desc("disable similarity matching, and outlining, "
+ "across branches for debugging purposes."));
+
IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
IRInstructionDataList &IDList)
: Inst(&I), Legal(Legality), IDL(&IDList) {
+ initializeInstruction();
+}
+
+void IRInstructionData::initializeInstruction() {
// We check for whether we have a comparison instruction. If it is, we
// find the "less than" version of the predicate for consistency for
// comparison instructions throught the program.
- if (CmpInst *C = dyn_cast<CmpInst>(&I)) {
+ if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
CmpInst::Predicate Predicate = predicateForConsistency(C);
if (Predicate != C->getPredicate())
RevisedPredicate = Predicate;
@@ -37,8 +47,8 @@ IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
// Here we collect the operands and their types for determining whether
// the structure of the operand use matches between two different candidates.
- for (Use &OI : I.operands()) {
- if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) {
+ for (Use &OI : Inst->operands()) {
+ if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
// If we have a CmpInst where the predicate is reversed, it means the
// operands must be reversed as well.
OperVals.insert(OperVals.begin(), OI.get());
@@ -49,6 +59,33 @@ IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
}
}
+IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
+ : Inst(nullptr), Legal(false), IDL(&IDList) {}
+
+void IRInstructionData::setBranchSuccessors(
+ DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
+ assert(isa<BranchInst>(Inst) && "Instruction must be branch");
+
+ BranchInst *BI = cast<BranchInst>(Inst);
+ DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
+
+ BBNumIt = BasicBlockToInteger.find(BI->getParent());
+ assert(BBNumIt != BasicBlockToInteger.end() &&
+ "Could not find location for BasicBlock!");
+
+ int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
+
+ for (BasicBlock *Successor : BI->successors()) {
+ BBNumIt = BasicBlockToInteger.find(Successor);
+ assert(BBNumIt != BasicBlockToInteger.end() &&
+ "Could not find number for BasicBlock!");
+ int OtherBlockNumber = static_cast<int>(BBNumIt->second);
+
+ int Relative = OtherBlockNumber - CurrentBlockNumber;
+ RelativeBlockLocations.push_back(Relative);
+ }
+}
+
CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
switch (CI->getPredicate()) {
case CmpInst::FCMP_OGT:
@@ -143,6 +180,10 @@ bool IRSimilarity::isClose(const IRInstructionData &A,
return false;
}
+ if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
+ A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
+ return false;
+
return true;
}
@@ -156,10 +197,6 @@ void IRInstructionMapper::convertToUnsignedVec(
std::vector<unsigned> IntegerMappingForBB;
std::vector<IRInstructionData *> InstrListForBB;
- HaveLegalRange = false;
- CanCombineWithPrevInstr = false;
- AddedIllegalLastTime = true;
-
for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
switch (InstClassifier.visit(*It)) {
case InstrType::Legal:
@@ -175,7 +212,8 @@ void IRInstructionMapper::convertToUnsignedVec(
}
if (HaveLegalRange) {
- mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
+ if (AddedIllegalLastTime)
+ mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
for (IRInstructionData *ID : InstrListForBB)
this->IDL->push_back(*ID);
llvm::append_range(InstrList, InstrListForBB);
@@ -203,6 +241,9 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
InstrListForBB.push_back(ID);
+ if (isa<BranchInst>(*It))
+ ID->setBranchSuccessors(BasicBlockToInteger);
+
// Add to the instruction list
bool WasInserted;
DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
@@ -235,6 +276,11 @@ IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
}
+IRInstructionData *
+IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
+ return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
+}
+
IRInstructionDataList *
IRInstructionMapper::allocateIRInstructionDataList() {
return new (IDLAllocator->Allocate()) IRInstructionDataList();
@@ -255,6 +301,8 @@ unsigned IRInstructionMapper::mapToIllegalUnsigned(
IRInstructionData *ID = nullptr;
if (!End)
ID = allocateIRInstructionData(*It, false, *IDL);
+ else
+ ID = allocateIRInstructionData(*IDL);
InstrListForBB.push_back(ID);
// Remember that we added an illegal number last time.
@@ -563,8 +611,50 @@ bool IRSimilarityCandidate::compareCommutativeOperandMapping(
return true;
}
+bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
+ RelativeLocMapping B) {
+ // Get the basic blocks the label refers to.
+ BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
+ BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
+
+ // Get the basic blocks contained in each region.
+ DenseSet<BasicBlock *> BasicBlockA;
+ DenseSet<BasicBlock *> BasicBlockB;
+ A.IRSC.getBasicBlocks(BasicBlockA);
+ B.IRSC.getBasicBlocks(BasicBlockB);
+
+ // Determine if the block is contained in the region.
+ bool AContained = BasicBlockA.contains(ABB);
+ bool BContained = BasicBlockB.contains(BBB);
+
+ // Both blocks need to be contained in the region, or both need to be outside
+ // the reigon.
+ if (AContained != BContained)
+ return false;
+
+ // If both are contained, then we need to make sure that the relative
+ // distance to the target blocks are the same.
+ if (AContained)
+ return A.RelativeLocation == B.RelativeLocation;
+ return true;
+}
+
bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
+ DenseMap<unsigned, DenseSet<unsigned>> MappingA;
+ DenseMap<unsigned, DenseSet<unsigned>> MappingB;
+ return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
+}
+
+typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
+ SmallVector<int, 4> &, ArrayRef<Value *> &,
+ ArrayRef<Value *> &>
+ ZippedRelativeLocationsT;
+
+bool IRSimilarityCandidate::compareStructure(
+ const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
if (A.getLength() != B.getLength())
return false;
@@ -574,15 +664,12 @@ bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
iterator ItA = A.begin();
iterator ItB = B.begin();
- // These sets create a create a mapping between the values in one candidate
- // to values in the other candidate. If we create a set with one element,
- // and that same element maps to the original element in the candidate
- // we have a good mapping.
- DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
- DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
+ // These ValueNumber Mapping sets create a create a mapping between the values
+ // in one candidate to values in the other candidate. If we create a set with
+ // one element, and that same element maps to the original element in the
+ // candidate we have a good mapping.
DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
- bool WasInserted;
// Iterate over the instructions contained in each candidate
unsigned SectionLength = A.getStartIdx() + A.getLength();
@@ -605,6 +692,7 @@ bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
unsigned InstValA = A.ValueToNumber.find(IA)->second;
unsigned InstValB = B.ValueToNumber.find(IB)->second;
+ bool WasInserted;
// Ensure that the mappings for the instructions exists.
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
@@ -632,6 +720,37 @@ bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
{A, OperValsA, ValueNumberMappingA},
{B, OperValsB, ValueNumberMappingB}))
return false;
+
+ // Here we check that between two corresponding instructions,
+ // when referring to a basic block in the same region, the
+ // relative locations are the same. And, that the instructions refer to
+ // basic blocks outside the region in the same corresponding locations.
+
+ // We are able to make the assumption about blocks outside of the region
+ // since the target block labels are considered values and will follow the
+ // same number matching that we defined for the other instructions in the
+ // region. So, at this point, in each location we target a specific block
+ // outside the region, we are targeting a corresponding block in each
+ // analagous location in the region we are comparing to.
+ if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
+ !(isa<PHINode>(IA) && isa<PHINode>(IB)))
+ continue;
+
+ SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
+ SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
+ if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
+ OperValsA.size() != OperValsB.size())
+ return false;
+
+ ZippedRelativeLocationsT ZippedRelativeLocations =
+ zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
+ if (any_of(ZippedRelativeLocations,
+ [&A, &B](std::tuple<int, int, Value *, Value *> R) {
+ return !checkRelativeLocations(
+ {A, std::get<0>(R), std::get<2>(R)},
+ {B, std::get<1>(R), std::get<3>(R)});
+ }))
+ return false;
}
return true;
}
@@ -657,6 +776,8 @@ void IRSimilarityIdentifier::populateMapper(
std::vector<unsigned> IntegerMappingForModule;
// Iterate over the functions in the module to map each Instruction in each
// BasicBlock to an unsigned integer.
+ Mapper.initializeForBBs(M);
+
for (Function &F : M) {
if (F.empty())
@@ -664,15 +785,18 @@ void IRSimilarityIdentifier::populateMapper(
for (BasicBlock &BB : F) {
- if (BB.sizeWithoutDebug() < 2)
- continue;
-
// BB has potential to have similarity since it has a size greater than 2
// and can therefore match other regions greater than 2. Map it to a list
// of unsigned integers.
Mapper.convertToUnsignedVec(BB, InstrListForModule,
IntegerMappingForModule);
}
+
+ BasicBlock::iterator It = F.begin()->end();
+ Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
+ true);
+ if (InstrListForModule.size() > 0)
+ Mapper.IDL->push_back(*InstrListForModule.back());
}
// Insert the InstrListForModule at the end of the overall InstrList so that
@@ -707,6 +831,8 @@ static void createCandidatesFromSuffixTree(
std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
unsigned StringLen = RS.Length;
+ if (StringLen < 2)
+ return;
// Create an IRSimilarityCandidate for instance of this subsequence \p RS.
for (const unsigned &StartIdx : RS.StartIndices) {
@@ -739,6 +865,84 @@ static void createCandidatesFromSuffixTree(
}
}
+void IRSimilarityCandidate::createCanonicalRelationFrom(
+ IRSimilarityCandidate &SourceCand,
+ DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+ DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
+ assert(SourceCand.CanonNumToNumber.size() != 0 &&
+ "Base canonical relationship is empty!");
+ assert(SourceCand.NumberToCanonNum.size() != 0 &&
+ "Base canonical relationship is empty!");
+
+ assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
+ assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
+
+ DenseSet<unsigned> UsedGVNs;
+ // Iterate over the mappings provided from this candidate to SourceCand. We
+ // are then able to map the GVN in this candidate to the same canonical number
+ // given to the corresponding GVN in SourceCand.
+ for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
+ unsigned SourceGVN = GVNMapping.first;
+
+ assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
+
+ unsigned ResultGVN;
+ // We need special handling if we have more than one potential value. This
+ // means that there are at least two GVNs that could correspond to this GVN.
+ // This could lead to potential swapping later on, so we make a decision
+ // here to ensure a one-to-one mapping.
+ if (GVNMapping.second.size() > 1) {
+ bool Found = false;
+ for (unsigned Val : GVNMapping.second) {
+ // We make sure the target value number hasn't already been reserved.
+ if (UsedGVNs.contains(Val))
+ continue;
+
+ // We make sure that the opposite mapping is still consistent.
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
+ FromSourceMapping.find(Val);
+
+ if (!It->second.contains(SourceGVN))
+ continue;
+
+ // We pick the first item that satisfies these conditions.
+ Found = true;
+ ResultGVN = Val;
+ break;
+ }
+
+ assert(Found && "Could not find matching value for source GVN");
+ (void)Found;
+
+ } else
+ ResultGVN = *GVNMapping.second.begin();
+
+ // Whatever GVN is found, we mark it as used.
+ UsedGVNs.insert(ResultGVN);
+
+ unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
+ CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
+ NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
+ }
+}
+
+void IRSimilarityCandidate::createCanonicalMappingFor(
+ IRSimilarityCandidate &CurrCand) {
+ assert(CurrCand.CanonNumToNumber.size() == 0 &&
+ "Canonical Relationship is non-empty");
+ assert(CurrCand.NumberToCanonNum.size() == 0 &&
+ "Canonical Relationship is non-empty");
+
+ unsigned CanonNum = 0;
+ // Iterate over the value numbers found, the order does not matter in this
+ // case.
+ for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
+ CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
+ CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
+ CanonNum++;
+ }
+}
+
/// From the list of IRSimilarityCandidates, perform a comparison between each
/// IRSimilarityCandidate to determine if there are overlapping
/// IRInstructionData, or if they do not have the same structure.
@@ -774,6 +978,8 @@ static void findCandidateStructures(
// Iterate over the candidates to determine its structural and overlapping
// compatibility with other instructions
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
+ DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
for (CandIt = CandsForRepSubstring.begin(),
CandEndIt = CandsForRepSubstring.end();
CandIt != CandEndIt; CandIt++) {
@@ -792,9 +998,11 @@ static void findCandidateStructures(
// Check if we already have a list of IRSimilarityCandidates for the current
// structural group. Create one if one does not exist.
CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
- if (CurrentGroupPair == StructuralGroups.end())
+ if (CurrentGroupPair == StructuralGroups.end()) {
+ IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
+ }
// Iterate over the IRSimilarityCandidates following the current
// IRSimilarityCandidate in the list to determine whether the two
@@ -811,11 +1019,15 @@ static void findCandidateStructures(
// Otherwise we determine if they have the same structure and add it to
// vector if they match.
- SameStructure =
- IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt);
+ ValueNumberMappingA.clear();
+ ValueNumberMappingB.clear();
+ SameStructure = IRSimilarityCandidate::compareStructure(
+ *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
if (!SameStructure)
continue;
+ InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
+ ValueNumberMappingB);
CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
CurrentGroupPair->second.push_back(*InnerCandIt);
}
@@ -862,6 +1074,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
+ Mapper.InstClassifier.EnableBranches = this->EnableBranches;
populateMapper(Modules, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
@@ -871,6 +1084,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
resetSimilarityCandidates();
+ Mapper.InstClassifier.EnableBranches = this->EnableBranches;
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
@@ -891,7 +1105,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
}
bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
- IRSI.reset(new IRSimilarityIdentifier());
+ IRSI.reset(new IRSimilarityIdentifier(!DisableBranches));
return false;
}
@@ -907,9 +1121,9 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
AnalysisKey IRSimilarityAnalysis::Key;
IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
- ModuleAnalysisManager &) {
+ ModuleAnalysisManager &) {
- auto IRSI = IRSimilarityIdentifier();
+ auto IRSI = IRSimilarityIdentifier(!DisableBranches);
IRSI.findSimilarity(M);
return IRSI;
}
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index fc6051b35efc..c4b7239b43ab 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -43,8 +43,8 @@ using namespace llvm::PatternMatch;
bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
SmallPtrSetImpl<Instruction *> &Set) {
- for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
- if (!Set.count(dyn_cast<Instruction>(*Use)))
+ for (const Use &Use : I->operands())
+ if (!Set.count(dyn_cast<Instruction>(Use)))
return false;
return true;
}
@@ -62,6 +62,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
return true;
}
return false;
@@ -144,12 +146,9 @@ static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
// meaning that we will use sext instructions instead of zext
// instructions to restore the original type.
IsSigned = true;
- if (!Bits.isNegative())
- // If the value is not known to be negative, we don't known what the
- // upper bit is, and therefore, we don't know what kind of extend we
- // will need. In this case, just increase the bit width by one bit and
- // use sext.
- ++MaxBitWidth;
+ // Make sure at at least one sign bit is included in the result, so it
+ // will get properly sign-extended.
+ ++MaxBitWidth;
}
}
if (!isPowerOf2_64(MaxBitWidth))
@@ -199,7 +198,10 @@ static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
if (Kind != RecurKind::FAdd)
return false;
- if (Exit->getOpcode() != Instruction::FAdd || Exit != ExactFPMathInst)
+ // Ensure the exit instruction is an FAdd, and that it only has one user
+ // other than the reduction PHI
+ if (Exit->getOpcode() != Instruction::FAdd || Exit->hasNUsesOrMore(3) ||
+ Exit != ExactFPMathInst)
return false;
// The only pattern accepted is the one in which the reduction PHI
@@ -272,7 +274,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
} else if (RecurrenceType->isIntegerTy()) {
if (!isIntegerRecurrenceKind(Kind))
return false;
- if (isArithmeticRecurrenceKind(Kind))
+ if (!isMinMaxRecurrenceKind(Kind))
Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
} else {
// Pointer min/max may exist, but it is not supported as a reduction op.
@@ -327,7 +329,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// the starting value (the Phi or an AND instruction if the Phi has been
// type-promoted).
if (Cur != Start) {
- ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, FuncFMF);
+ ReduxDesc =
+ isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
if (!ReduxDesc.isRecurrence())
return false;
// FIXME: FMF is allowed on phi, but propagation is not handled correctly.
@@ -360,6 +363,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
// A reduction operation must only have one use of the reduction value.
if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
+ !isSelectCmpRecurrenceKind(Kind) &&
hasMultipleUsesOf(Cur, VisitedInsts, 1))
return false;
@@ -367,10 +371,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
return false;
- if (isIntMinMaxRecurrenceKind(Kind) &&
+ if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
(isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
- if (isFPMinMaxRecurrenceKind(Kind) &&
+ if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
(isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
++NumCmpSelectPatternInst;
@@ -423,7 +427,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
!isa<SelectInst>(UI)) ||
(!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
- !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())))
+ !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
+ .isRecurrence() &&
+ !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
return false;
// Remember that we completed the cycle.
@@ -435,8 +441,13 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
}
// This means we have seen one but not the other instruction of the
- // pattern or more than just a select and cmp.
- if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2)
+ // pattern or more than just a select and cmp. Zero implies that we saw a
+ // llvm.min/max instrinsic, which is always OK.
+ if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
+ NumCmpSelectPatternInst != 0)
+ return false;
+
+ if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
return false;
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
@@ -505,11 +516,70 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
return true;
}
+// We are looking for loops that do something like this:
+// int r = 0;
+// for (int i = 0; i < n; i++) {
+// if (src[i] > 3)
+// r = 3;
+// }
+// where the reduction value (r) only has two states, in this example 0 or 3.
+// The generated LLVM IR for this type of loop will be like this:
+// for.body:
+// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
+// ...
+// %cmp = icmp sgt i32 %5, 3
+// %spec.select = select i1 %cmp, i32 3, i32 %r
+// ...
+// In general we can support vectorization of loops where 'r' flips between
+// any two non-constants, provided they are loop invariant. The only thing
+// we actually care about at the end of the loop is whether or not any lane
+// in the selected vector is different from the start value. The final
+// across-vector reduction after the loop simply involves choosing the start
+// value if nothing changed (0 in the example above) or the other selected
+// value (3 in the example above).
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
- const InstDesc &Prev) {
- assert((isa<CmpInst>(I) || isa<SelectInst>(I)) &&
- "Expected a cmp or select instruction");
+RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
+ Instruction *I, InstDesc &Prev) {
+ // We must handle the select(cmp(),x,y) as a single instruction. Advance to
+ // the select.
+ CmpInst::Predicate Pred;
+ if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
+ if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
+ return InstDesc(Select, Prev.getRecKind());
+ }
+
+ // Only match select with single use cmp condition.
+ if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
+ m_Value())))
+ return InstDesc(false, I);
+
+ SelectInst *SI = cast<SelectInst>(I);
+ Value *NonPhi = nullptr;
+
+ if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
+ NonPhi = SI->getFalseValue();
+ else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
+ NonPhi = SI->getTrueValue();
+ else
+ return InstDesc(false, I);
+
+ // We are looking for selects of the form:
+ // select(cmp(), phi, loop_invariant) or
+ // select(cmp(), loop_invariant, phi)
+ if (!Loop->isLoopInvariant(NonPhi))
+ return InstDesc(false, I);
+
+ return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
+ : RecurKind::SelectFCmp);
+}
+
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
+ const InstDesc &Prev) {
+ assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
+ "Expected a cmp or select or call instruction");
+ if (!isMinMaxRecurrenceKind(Kind))
+ return InstDesc(false, I);
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
@@ -519,28 +589,33 @@ RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
return InstDesc(Select, Prev.getRecKind());
}
- // Only match select with single use cmp condition.
- if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
+ // Only match select with single use cmp condition, or a min/max intrinsic.
+ if (!isa<IntrinsicInst>(I) &&
+ !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
m_Value())))
return InstDesc(false, I);
// Look for a min/max pattern.
if (match(I, m_UMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::UMin);
+ return InstDesc(Kind == RecurKind::UMin, I);
if (match(I, m_UMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::UMax);
+ return InstDesc(Kind == RecurKind::UMax, I);
if (match(I, m_SMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::SMax);
+ return InstDesc(Kind == RecurKind::SMax, I);
if (match(I, m_SMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::SMin);
+ return InstDesc(Kind == RecurKind::SMin, I);
if (match(I, m_OrdFMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMin);
+ return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_OrdFMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMax);
+ return InstDesc(Kind == RecurKind::FMax, I);
if (match(I, m_UnordFMin(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMin);
+ return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_UnordFMax(m_Value(), m_Value())))
- return InstDesc(I, RecurKind::FMax);
+ return InstDesc(Kind == RecurKind::FMax, I);
+ if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMin, I);
+ if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMax, I);
return InstDesc(false, I);
}
@@ -592,8 +667,10 @@ RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
}
RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind,
- InstDesc &Prev, FastMathFlags FMF) {
+RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
+ Instruction *I, RecurKind Kind,
+ InstDesc &Prev, FastMathFlags FuncFMF) {
+ assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
switch (I->getOpcode()) {
default:
return InstDesc(false, I);
@@ -624,9 +701,15 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind,
LLVM_FALLTHROUGH;
case Instruction::FCmp:
case Instruction::ICmp:
+ case Instruction::Call:
+ if (isSelectCmpRecurrenceKind(Kind))
+ return isSelectCmpPattern(L, OrigPhi, I, Prev);
if (isIntMinMaxRecurrenceKind(Kind) ||
- (FMF.noNaNs() && FMF.noSignedZeros() && isFPMinMaxRecurrenceKind(Kind)))
- return isMinMaxSelectCmpPattern(I, Prev);
+ (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
+ (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
+ I->hasNoSignedZeros())) &&
+ isFPMinMaxRecurrenceKind(Kind)))
+ return isMinMaxPattern(I, Kind, Prev);
return InstDesc(false, I);
}
}
@@ -649,7 +732,6 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
RecurrenceDescriptor &RedDes,
DemandedBits *DB, AssumptionCache *AC,
DominatorTree *DT) {
-
BasicBlock *Header = TheLoop->getHeader();
Function &F = *Header->getParent();
FastMathFlags FMF;
@@ -694,6 +776,12 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
+ DT)) {
+ LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
+ << *Phi << "\n");
+ return true;
+ }
if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
@@ -710,6 +798,12 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
+ DT)) {
+ LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
+ << " PHI." << *Phi << "\n");
+ return true;
+ }
// Not a reduction of known type.
return false;
}
@@ -816,8 +910,8 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
/// This function returns the identity element (or neutral element) for
/// the operation K.
-Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
- FastMathFlags FMF) {
+Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
+ FastMathFlags FMF) {
switch (K) {
case RecurKind::Xor:
case RecurKind::Add:
@@ -857,6 +951,10 @@ Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
return ConstantFP::getInfinity(Tp, true);
case RecurKind::FMax:
return ConstantFP::getInfinity(Tp, false);
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
+ return getRecurrenceStartValue();
+ break;
default:
llvm_unreachable("Unknown recurrence kind");
}
@@ -882,9 +980,11 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
+ case RecurKind::SelectICmp:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
+ case RecurKind::SelectFCmp:
return Instruction::FCmp;
default:
llvm_unreachable("Unknown recurrence operation");
@@ -963,8 +1063,10 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
const SCEV *Step, BinaryOperator *BOp,
+ Type *ElementType,
SmallVectorImpl<Instruction *> *Casts)
- : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
+ : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp),
+ ElementType(ElementType) {
assert(IK != IK_NoInduction && "Not an induction");
// Start value type should match the induction kind and the value
@@ -992,6 +1094,11 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
InductionBinOp->getOpcode() == Instruction::FSub))) &&
"Binary opcode should be specified for FP induction");
+ if (IK == IK_PtrInduction)
+ assert(ElementType && "Pointer induction must have element type");
+ else
+ assert(!ElementType && "Non-pointer induction cannot have element type");
+
if (Casts) {
for (auto &Inst : *Casts) {
RedundantCasts.push_back(Inst);
@@ -1239,8 +1346,6 @@ bool InductionDescriptor::isInductionPHI(
BasicBlock *Latch = AR->getLoop()->getLoopLatch();
if (!Latch)
return false;
- BinaryOperator *BOp =
- dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
@@ -1250,8 +1355,10 @@ bool InductionDescriptor::isInductionPHI(
return false;
if (PhiTy->isIntegerTy()) {
+ BinaryOperator *BOp =
+ dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
- CastsToIgnore);
+ /* ElementType */ nullptr, CastsToIgnore);
return true;
}
@@ -1260,15 +1367,16 @@ bool InductionDescriptor::isInductionPHI(
if (!ConstStep)
return false;
- ConstantInt *CV = ConstStep->getValue();
- Type *PointerElementType = PhiTy->getPointerElementType();
- // The pointer stride cannot be determined if the pointer element type is not
- // sized.
- if (!PointerElementType->isSized())
+ // Always use i8 element type for opaque pointer inductions.
+ PointerType *PtrTy = cast<PointerType>(PhiTy);
+ Type *ElementType = PtrTy->isOpaque() ? Type::getInt8Ty(PtrTy->getContext())
+ : PtrTy->getElementType();
+ if (!ElementType->isSized())
return false;
+ ConstantInt *CV = ConstStep->getValue();
const DataLayout &DL = Phi->getModule()->getDataLayout();
- int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
+ int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(ElementType));
if (!Size)
return false;
@@ -1277,6 +1385,7 @@ bool InductionDescriptor::isInductionPHI(
return false;
auto *StepValue =
SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
- D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp);
+ D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue,
+ /* BinOp */ nullptr, ElementType);
return true;
}
diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp
index db6cff720642..d7b202f83189 100644
--- a/llvm/lib/Analysis/IVUsers.cpp
+++ b/llvm/lib/Analysis/IVUsers.cpp
@@ -90,34 +90,6 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
return false;
}
-/// Return true if all loop headers that dominate this block are in simplified
-/// form.
-static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
- const LoopInfo *LI,
- SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
- Loop *NearestLoop = nullptr;
- for (DomTreeNode *Rung = DT->getNode(BB);
- Rung; Rung = Rung->getIDom()) {
- BasicBlock *DomBB = Rung->getBlock();
- Loop *DomLoop = LI->getLoopFor(DomBB);
- if (DomLoop && DomLoop->getHeader() == DomBB) {
- // If we have already checked this loop nest, stop checking.
- if (SimpleLoopNests.count(DomLoop))
- break;
- // If the domtree walk reaches a loop with no preheader, return false.
- if (!DomLoop->isLoopSimplifyForm())
- return false;
- // If we have not already checked this loop nest, remember the loop
- // header nearest to BB. The nearest loop may not contain BB.
- if (!NearestLoop)
- NearestLoop = DomLoop;
- }
- }
- if (NearestLoop)
- SimpleLoopNests.insert(NearestLoop);
- return true;
-}
-
/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
/// and now we need to decide whether the user should use the preinc or post-inc
/// value. If this user should use the post-inc version of the IV, return true.
@@ -162,11 +134,10 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
return true;
}
-/// AddUsersImpl - Inspect the specified instruction. If it is a
-/// reducible SCEV, recursively add its users to the IVUsesByStride set and
-/// return true. Otherwise, return false.
-bool IVUsers::AddUsersImpl(Instruction *I,
- SmallPtrSetImpl<Loop*> &SimpleLoopNests) {
+/// Inspect the specified instruction. If it is a reducible SCEV, recursively
+/// add its users to the IVUsesByStride set and return true. Otherwise, return
+/// false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
const DataLayout &DL = I->getModule()->getDataLayout();
// Add this IV user to the Processed set before returning false to ensure that
@@ -213,18 +184,6 @@ bool IVUsers::AddUsersImpl(Instruction *I,
if (isa<PHINode>(User) && Processed.count(User))
continue;
- // Only consider IVUsers that are dominated by simplified loop
- // headers. Otherwise, SCEVExpander will crash.
- BasicBlock *UseBB = User->getParent();
- // A phi's use is live out of its predecessor block.
- if (PHINode *PHI = dyn_cast<PHINode>(User)) {
- unsigned OperandNo = U.getOperandNo();
- unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
- UseBB = PHI->getIncomingBlock(ValNo);
- }
- if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
- return false;
-
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
@@ -234,12 +193,12 @@ bool IVUsers::AddUsersImpl(Instruction *I,
bool AddUserToIVUsers = false;
if (LI->getLoopFor(User->getParent()) != L) {
if (isa<PHINode>(User) || Processed.count(User) ||
- !AddUsersImpl(User, SimpleLoopNests)) {
+ !AddUsersIfInteresting(User)) {
LLVM_DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
}
- } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
+ } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
LLVM_DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
<< " OF SCEV: " << *ISE << '\n');
AddUserToIVUsers = true;
@@ -288,15 +247,6 @@ bool IVUsers::AddUsersImpl(Instruction *I,
return true;
}
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
- // SCEVExpander can only handle users that are dominated by simplified loop
- // entries. Keep track of all loops that are only dominated by other simple
- // loops so we don't traverse the domtree for each user.
- SmallPtrSet<Loop*,16> SimpleLoopNests;
-
- return AddUsersImpl(I, SimpleLoopNests);
-}
-
IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
IVUses.push_back(new IVStrideUse(this, User, Operand));
return IVUses.back();
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index a8ad2d6696bf..73d1eff1b968 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -49,6 +49,42 @@ static cl::opt<int>
extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
+namespace {
+using namespace llvm::ore;
+class MandatoryInlineAdvice : public InlineAdvice {
+public:
+ MandatoryInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
+ OptimizationRemarkEmitter &ORE,
+ bool IsInliningMandatory)
+ : InlineAdvice(Advisor, CB, ORE, IsInliningMandatory) {}
+
+private:
+ void recordInliningWithCalleeDeletedImpl() override { recordInliningImpl(); }
+
+ void recordInliningImpl() override {
+ if (IsInliningRecommended)
+ emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, IsInliningRecommended,
+ [&](OptimizationRemark &Remark) {
+ Remark << ": always inline attribute";
+ });
+ }
+
+ void recordUnsuccessfulInliningImpl(const InlineResult &Result) override {
+ if (IsInliningRecommended)
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+ << "'" << NV("Callee", Callee) << "' is not AlwaysInline into '"
+ << NV("Caller", Caller)
+ << "': " << NV("Reason", Result.getFailureReason());
+ });
+ }
+
+ void recordUnattemptedInliningImpl() override {
+ assert(!IsInliningRecommended && "Expected to attempt inlining");
+ }
+};
+} // namespace
+
void DefaultInlineAdvice::recordUnsuccessfulInliningImpl(
const InlineResult &Result) {
using namespace ore;
@@ -56,20 +92,20 @@ void DefaultInlineAdvice::recordUnsuccessfulInliningImpl(
"; " + inlineCostStr(*OIC));
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller) << ": "
- << NV("Reason", Result.getFailureReason());
+ << "'" << NV("Callee", Callee) << "' is not inlined into '"
+ << NV("Caller", Caller)
+ << "': " << NV("Reason", Result.getFailureReason());
});
}
void DefaultInlineAdvice::recordInliningWithCalleeDeletedImpl() {
if (EmitRemarks)
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
}
void DefaultInlineAdvice::recordInliningImpl() {
if (EmitRemarks)
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
}
llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
@@ -151,9 +187,9 @@ void InlineAdvice::recordInliningWithCalleeDeleted() {
AnalysisKey InlineAdvisorAnalysis::Key;
-bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
- InliningAdvisorMode Mode,
- StringRef ReplayFile) {
+bool InlineAdvisorAnalysis::Result::tryCreate(
+ InlineParams Params, InliningAdvisorMode Mode,
+ const ReplayInlinerSettings &ReplaySettings) {
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
switch (Mode) {
case InliningAdvisorMode::Default:
@@ -161,10 +197,10 @@ bool InlineAdvisorAnalysis::Result::tryCreate(InlineParams Params,
Advisor.reset(new DefaultInlineAdvisor(M, FAM, Params));
// Restrict replay to default advisor, ML advisors are stateful so
// replay will need augmentations to interleave with them correctly.
- if (!ReplayFile.empty()) {
- Advisor = std::make_unique<ReplayInlineAdvisor>(
- M, FAM, M.getContext(), std::move(Advisor), ReplayFile,
- /* EmitRemarks =*/true);
+ if (!ReplaySettings.ReplayFile.empty()) {
+ Advisor = llvm::getReplayInlineAdvisor(M, FAM, M.getContext(),
+ std::move(Advisor), ReplaySettings,
+ /* EmitRemarks =*/true);
}
break;
case InliningAdvisorMode::Development:
@@ -313,7 +349,7 @@ void llvm::setInlineRemark(CallBase &CB, StringRef Message) {
return;
Attribute Attr = Attribute::get(CB.getContext(), "inline-remark", Message);
- CB.addAttribute(AttributeList::FunctionIndex, Attr);
+ CB.addFnAttr(Attr);
}
/// Return the cost only if the inliner should attempt to inline at the given
@@ -343,15 +379,15 @@ llvm::shouldInline(CallBase &CB,
if (IC.isNever()) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because it should never be inlined "
- << IC;
+ << "'" << NV("Callee", Callee) << "' not inlined into '"
+ << NV("Caller", Caller)
+ << "' because it should never be inlined " << IC;
});
} else {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call)
- << NV("Callee", Callee) << " not inlined into "
- << NV("Caller", Caller) << " because too costly to inline "
+ << "'" << NV("Callee", Callee) << "' not inlined into '"
+ << NV("Caller", Caller) << "' because too costly to inline "
<< IC;
});
}
@@ -368,9 +404,9 @@ llvm::shouldInline(CallBase &CB,
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "IncreaseCostInOtherContexts",
Call)
- << "Not inlining. Cost of inlining " << NV("Callee", Callee)
- << " increases the cost of inlining " << NV("Caller", Caller)
- << " in other contexts";
+ << "Not inlining. Cost of inlining '" << NV("Callee", Callee)
+ << "' increases the cost of inlining '" << NV("Caller", Caller)
+ << "' in other contexts";
});
setInlineRemark(CB, "deferred");
// IC does not bool() to false, so get an InlineCost that will.
@@ -383,7 +419,8 @@ llvm::shouldInline(CallBase &CB,
return IC;
}
-std::string llvm::getCallSiteLocation(DebugLoc DLoc) {
+std::string llvm::formatCallSiteLocation(DebugLoc DLoc,
+ const CallSiteFormat &Format) {
std::string Buffer;
raw_string_ostream CallSiteLoc(Buffer);
bool First = true;
@@ -399,9 +436,10 @@ std::string llvm::getCallSiteLocation(DebugLoc DLoc) {
StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
if (Name.empty())
Name = DIL->getScope()->getSubprogram()->getName();
- CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset) << ":"
- << llvm::utostr(DIL->getColumn());
- if (Discriminator)
+ CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
+ if (Format.outputColumn())
+ CallSiteLoc << ":" << llvm::utostr(DIL->getColumn());
+ if (Format.outputDiscriminator() && Discriminator)
CallSiteLoc << "." << llvm::utostr(Discriminator);
First = false;
}
@@ -435,25 +473,38 @@ void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
Remark << ";";
}
-void llvm::emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
- const BasicBlock *Block, const Function &Callee,
- const Function &Caller, const InlineCost &IC,
- bool ForProfileContext, const char *PassName) {
+void llvm::emitInlinedInto(
+ OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block,
+ const Function &Callee, const Function &Caller, bool AlwaysInline,
+ function_ref<void(OptimizationRemark &)> ExtraContext,
+ const char *PassName) {
ORE.emit([&]() {
- bool AlwaysInline = IC.isAlways();
StringRef RemarkName = AlwaysInline ? "AlwaysInline" : "Inlined";
OptimizationRemark Remark(PassName ? PassName : DEBUG_TYPE, RemarkName,
DLoc, Block);
- Remark << ore::NV("Callee", &Callee) << " inlined into ";
- Remark << ore::NV("Caller", &Caller);
- if (ForProfileContext)
- Remark << " to match profiling context";
- Remark << " with " << IC;
+ Remark << "'" << ore::NV("Callee", &Callee) << "' inlined into '"
+ << ore::NV("Caller", &Caller) << "'";
+ if (ExtraContext)
+ ExtraContext(Remark);
addLocationToRemarks(Remark, DLoc);
return Remark;
});
}
+void llvm::emitInlinedIntoBasedOnCost(
+ OptimizationRemarkEmitter &ORE, DebugLoc DLoc, const BasicBlock *Block,
+ const Function &Callee, const Function &Caller, const InlineCost &IC,
+ bool ForProfileContext, const char *PassName) {
+ llvm::emitInlinedInto(
+ ORE, DLoc, Block, Callee, Caller, IC.isAlways(),
+ [&](OptimizationRemark &Remark) {
+ if (ForProfileContext)
+ Remark << " to match profiling context";
+ Remark << " with " << IC;
+ },
+ PassName);
+}
+
InlineAdvisor::InlineAdvisor(Module &M, FunctionAnalysisManager &FAM)
: M(M), FAM(FAM) {
if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) {
@@ -475,7 +526,8 @@ InlineAdvisor::~InlineAdvisor() {
std::unique_ptr<InlineAdvice> InlineAdvisor::getMandatoryAdvice(CallBase &CB,
bool Advice) {
- return std::make_unique<InlineAdvice>(this, CB, getCallerORE(CB), Advice);
+ return std::make_unique<MandatoryInlineAdvice>(this, CB, getCallerORE(CB),
+ Advice);
}
InlineAdvisor::MandatoryInliningKind
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 4c2413e14435..ff31e81aad08 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -135,6 +135,31 @@ static cl::opt<bool> DisableGEPConstOperand(
namespace {
class InlineCostCallAnalyzer;
+/// This function behaves more like CallBase::hasFnAttr: when it looks for the
+/// requested attribute, it check both the call instruction and the called
+/// function (if it's available and operand bundles don't prohibit that).
+Attribute getFnAttr(CallBase &CB, StringRef AttrKind) {
+ Attribute CallAttr = CB.getFnAttr(AttrKind);
+ if (CallAttr.isValid())
+ return CallAttr;
+
+ // Operand bundles override attributes on the called function, but don't
+ // override attributes directly present on the call instruction.
+ if (!CB.isFnAttrDisallowedByOpBundle(AttrKind))
+ if (const Function *F = CB.getCalledFunction())
+ return F->getFnAttribute(AttrKind);
+
+ return {};
+}
+
+Optional<int> getStringFnAttrAsInt(CallBase &CB, StringRef AttrKind) {
+ Attribute Attr = getFnAttr(CB, AttrKind);
+ int AttrValue;
+ if (Attr.getValueAsString().getAsInteger(10, AttrValue))
+ return None;
+ return AttrValue;
+}
+
// This struct is used to store information about inline cost of a
// particular instruction
struct InstructionCostDetail {
@@ -235,6 +260,10 @@ protected:
/// Called the analysis engine determines load elimination won't happen.
virtual void onDisableLoadElimination() {}
+ /// Called when we visit a CallBase, before the analysis starts. Return false
+ /// to stop further processing of the instruction.
+ virtual bool onCallBaseVisitStart(CallBase &Call) { return true; }
+
/// Called to account for a call.
virtual void onCallPenalty() {}
@@ -333,6 +362,10 @@ protected:
/// whenever we simplify away the stores that would otherwise cause them to be
/// loads.
bool EnableLoadElimination;
+
+ /// Whether we allow inlining for recursive call.
+ bool AllowRecursiveCall;
+
SmallPtrSet<Value *, 16> LoadAddrSet;
AllocaInst *getSROAArgForValueOrNull(Value *V) const {
@@ -354,6 +387,7 @@ protected:
bool simplifyCallSite(Function *F, CallBase &Call);
template <typename Callable>
bool simplifyInstruction(Instruction &I, Callable Evaluate);
+ bool simplifyIntrinsicCallIsConstant(CallBase &CB);
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
/// Return true if the given argument to the function being considered for
@@ -421,7 +455,8 @@ public:
OptimizationRemarkEmitter *ORE = nullptr)
: TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
- CandidateCall(Call), EnableLoadElimination(true) {}
+ CandidateCall(Call), EnableLoadElimination(true),
+ AllowRecursiveCall(false) {}
InlineResult analyze();
@@ -510,6 +545,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// sense that it's not weighted by profile counts at all.
int ColdSize = 0;
+ // Whether inlining is decided by cost-threshold analysis.
+ bool DecidedByCostThreshold = false;
+
// Whether inlining is decided by cost-benefit analysis.
bool DecidedByCostBenefit = false;
@@ -558,6 +596,22 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
addCost(LoadEliminationCost);
LoadEliminationCost = 0;
}
+
+ bool onCallBaseVisitStart(CallBase &Call) override {
+ if (Optional<int> AttrCallThresholdBonus =
+ getStringFnAttrAsInt(Call, "call-threshold-bonus"))
+ Threshold += *AttrCallThresholdBonus;
+
+ if (Optional<int> AttrCallCost =
+ getStringFnAttrAsInt(Call, "call-inline-cost")) {
+ addCost(*AttrCallCost);
+ // Prevent further processing of the call since we want to override its
+ // inline cost, not just add to it.
+ return false;
+ }
+ return true;
+ }
+
void onCallPenalty() override { addCost(CallPenalty); }
void onCallArgumentSetup(const CallBase &Call) override {
// Pay the price of the argument setup. We account for the average 1
@@ -717,7 +771,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Make sure we have a nonzero entry count.
auto EntryCount = F.getEntryCount();
- if (!EntryCount || !EntryCount.getCount())
+ if (!EntryCount || !EntryCount->getCount())
return false;
BlockFrequencyInfo *CalleeBFI = &(GetBFI(F));
@@ -763,7 +817,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
if (BranchInst *BI = dyn_cast<BranchInst>(&I)) {
// Count a conditional branch as savings if it becomes unconditional.
if (BI->isConditional() &&
- dyn_cast_or_null<ConstantInt>(
+ isa_and_nonnull<ConstantInt>(
SimplifiedValues.lookup(BI->getCondition()))) {
CurrentSavings += InlineConstants::InstrCost;
}
@@ -783,8 +837,8 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
// Compute the cycle savings per call.
auto EntryProfileCount = F.getEntryCount();
- assert(EntryProfileCount.hasValue() && EntryProfileCount.getCount());
- auto EntryCount = EntryProfileCount.getCount();
+ assert(EntryProfileCount.hasValue() && EntryProfileCount->getCount());
+ auto EntryCount = EntryProfileCount->getCount();
CycleSavings += EntryCount / 2;
CycleSavings = CycleSavings.udiv(EntryCount);
@@ -847,6 +901,14 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= VectorBonus / 2;
+ if (Optional<int> AttrCost =
+ getStringFnAttrAsInt(CandidateCall, "function-inline-cost"))
+ Cost = *AttrCost;
+
+ if (Optional<int> AttrThreshold =
+ getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
+ Threshold = *AttrThreshold;
+
if (auto Result = costBenefitAnalysis()) {
DecidedByCostBenefit = true;
if (Result.getValue())
@@ -855,14 +917,24 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
return InlineResult::failure("Cost over threshold.");
}
- if (IgnoreThreshold || Cost < std::max(1, Threshold))
+ if (IgnoreThreshold)
return InlineResult::success();
- return InlineResult::failure("Cost over threshold.");
+
+ DecidedByCostThreshold = true;
+ return Cost < std::max(1, Threshold)
+ ? InlineResult::success()
+ : InlineResult::failure("Cost over threshold.");
}
+
bool shouldStop() override {
+ if (IgnoreThreshold || ComputeFullInlineCost)
+ return false;
// Bail out the moment we cross the threshold. This means we'll under-count
// the cost, but only when undercounting doesn't matter.
- return !IgnoreThreshold && Cost >= Threshold && !ComputeFullInlineCost;
+ if (Cost < Threshold)
+ return false;
+ DecidedByCostThreshold = true;
+ return true;
}
void onLoadEliminationOpportunity() override {
@@ -930,7 +1002,9 @@ public:
Params(Params), Threshold(Params.DefaultThreshold),
BoostIndirectCalls(BoostIndirect), IgnoreThreshold(IgnoreThreshold),
CostBenefitAnalysisEnabled(isCostBenefitAnalysisEnabled()),
- Writer(this) {}
+ Writer(this) {
+ AllowRecursiveCall = Params.AllowRecursiveCall.getValue();
+ }
/// Annotation Writer for instruction details
InlineCostAnnotationWriter Writer;
@@ -939,7 +1013,7 @@ public:
// Prints the same analysis as dump(), but its definition is not dependent
// on the build.
- void print();
+ void print(raw_ostream &OS);
Optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
@@ -952,6 +1026,7 @@ public:
int getCost() const { return Cost; }
Optional<CostBenefitPair> getCostBenefitPair() { return CostBenefit; }
bool wasDecidedByCostBenefit() const { return DecidedByCostBenefit; }
+ bool wasDecidedByCostThreshold() const { return DecidedByCostThreshold; }
};
class InlineCostFeaturesAnalyzer final : public CallAnalyzer {
@@ -1310,7 +1385,7 @@ bool CallAnalyzer::visitPHI(PHINode &I) {
// Or could we skip the getPointerSizeInBits call completely? As far as I can
// see the ZeroOffset is used as a dummy value, so we can probably use any
// bit width for the ZeroOffset?
- APInt ZeroOffset = APInt::getNullValue(DL.getPointerSizeInBits(0));
+ APInt ZeroOffset = APInt::getZero(DL.getPointerSizeInBits(0));
bool CheckSROA = I.getType()->isPointerTy();
// Track the constant or pointer with constant offset we've seen so far.
@@ -1471,6 +1546,27 @@ bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) {
return true;
}
+/// Try to simplify a call to llvm.is.constant.
+///
+/// Duplicate the argument checking from CallAnalyzer::simplifyCallSite since
+/// we expect calls of this specific intrinsic to be infrequent.
+///
+/// FIXME: Given that we know CB's parent (F) caller
+/// (CandidateCall->getParent()->getParent()), we might be able to determine
+/// whether inlining F into F's caller would change how the call to
+/// llvm.is.constant would evaluate.
+bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
+ Value *Arg = CB.getArgOperand(0);
+ auto *C = dyn_cast<Constant>(Arg);
+
+ if (!C)
+ C = dyn_cast_or_null<Constant>(SimplifiedValues.lookup(Arg));
+
+ Type *RT = CB.getFunctionType()->getReturnType();
+ SimplifiedValues[&CB] = ConstantInt::get(RT, C ? 1 : 0);
+ return true;
+}
+
bool CallAnalyzer::visitBitCast(BitCastInst &I) {
// Propagate constants through bitcasts.
if (simplifyInstruction(I, [&](SmallVectorImpl<Constant *> &COps) {
@@ -1799,8 +1895,8 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
SingleBBBonus = Threshold * SingleBBBonusPercent / 100;
VectorBonus = Threshold * VectorBonusPercent / 100;
- bool OnlyOneCallAndLocalLinkage =
- F.hasLocalLinkage() && F.hasOneUse() && &F == Call.getCalledFunction();
+ bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
+ &F == Call.getCalledFunction();
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically. It may seem odd to update
// Cost in updateThreshold, but the bonus depends on the logic in this method.
@@ -2029,6 +2125,9 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallBase &Call) {
}
bool CallAnalyzer::visitCallBase(CallBase &Call) {
+ if (!onCallBaseVisitStart(Call))
+ return true;
+
if (Call.hasFnAttr(Attribute::ReturnsTwice) &&
!F.hasFnAttribute(Attribute::ReturnsTwice)) {
// This aborts the entire analysis.
@@ -2091,6 +2190,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
if (auto *SROAArg = getSROAArgForValueOrNull(II->getOperand(0)))
SROAArgValues[II] = SROAArg;
return true;
+ case Intrinsic::is_constant:
+ return simplifyIntrinsicCallIsConstant(Call);
}
}
@@ -2098,7 +2199,8 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) {
// This flag will fully abort the analysis, so don't bother with anything
// else.
IsRecursiveCall = true;
- return false;
+ if (!AllowRecursiveCall)
+ return false;
}
if (TTI.isLoweredToCall(F)) {
@@ -2123,7 +2225,7 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
// inliner more regular and predictable. Interestingly, conditional branches
// which will fold away are also free.
return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
- dyn_cast_or_null<ConstantInt>(
+ isa_and_nonnull<ConstantInt>(
SimplifiedValues.lookup(BI.getCondition()));
}
@@ -2305,11 +2407,8 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
// inlining due to debug symbols. Eventually, the number of unsimplified
// instructions shouldn't factor into the cost computation, but until then,
// hack around it here.
- if (isa<DbgInfoIntrinsic>(I))
- continue;
-
- // Skip pseudo-probes.
- if (isa<PseudoProbeInst>(I))
+ // Similarly, skip pseudo-probes.
+ if (I.isDebugOrPseudoInst())
continue;
// Skip ephemeral values.
@@ -2336,7 +2435,7 @@ CallAnalyzer::analyzeBlock(BasicBlock *BB,
using namespace ore;
// If the visit this instruction detected an uninlinable pattern, abort.
InlineResult IR = InlineResult::success();
- if (IsRecursiveCall)
+ if (IsRecursiveCall && !AllowRecursiveCall)
IR = InlineResult::failure("recursive");
else if (ExposesReturnsTwice)
IR = InlineResult::failure("exposes returns twice");
@@ -2398,7 +2497,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
unsigned AS = V->getType()->getPointerAddressSpace();
unsigned IntPtrWidth = DL.getIndexSizeInBits(AS);
- APInt Offset = APInt::getNullValue(IntPtrWidth);
+ APInt Offset = APInt::getZero(IntPtrWidth);
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
@@ -2601,7 +2700,7 @@ InlineResult CallAnalyzer::analyze() {
onBlockAnalyzed(BB);
}
- bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
+ bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneLiveUse() &&
&F == CandidateCall.getCalledFunction();
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
@@ -2612,10 +2711,10 @@ InlineResult CallAnalyzer::analyze() {
return finalizeAnalysis();
}
-void InlineCostCallAnalyzer::print() {
-#define DEBUG_PRINT_STAT(x) dbgs() << " " #x ": " << x << "\n"
+void InlineCostCallAnalyzer::print(raw_ostream &OS) {
+#define DEBUG_PRINT_STAT(x) OS << " " #x ": " << x << "\n"
if (PrintInstructionComments)
- F.print(dbgs(), &Writer);
+ F.print(OS, &Writer);
DEBUG_PRINT_STAT(NumConstantArgs);
DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
DEBUG_PRINT_STAT(NumAllocaArgs);
@@ -2634,7 +2733,7 @@ void InlineCostCallAnalyzer::print() {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Dump stats about this call's analysis.
-LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(); }
+LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
#endif
/// Test that there are no attribute conflicts between Caller and Callee
@@ -2849,13 +2948,13 @@ InlineCost llvm::getInlineCost(
return InlineCost::getNever("cost over benefit", CA.getCostBenefitPair());
}
- // Check if there was a reason to force inlining or no inlining.
- if (!ShouldInline.isSuccess() && CA.getCost() < CA.getThreshold())
- return InlineCost::getNever(ShouldInline.getFailureReason());
- if (ShouldInline.isSuccess() && CA.getCost() >= CA.getThreshold())
- return InlineCost::getAlways("empty function");
+ if (CA.wasDecidedByCostThreshold())
+ return InlineCost::get(CA.getCost(), CA.getThreshold());
- return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
+ // No details on how the decision was made, simply return always or never.
+ return ShouldInline.isSuccess()
+ ? InlineCost::getAlways("empty function")
+ : InlineCost::getNever(ShouldInline.getFailureReason());
}
InlineResult llvm::isInlineViable(Function &F) {
@@ -3028,7 +3127,8 @@ InlineCostAnnotationPrinterPass::run(Function &F,
ICCA.analyze();
OS << " Analyzing call of " << CalledFunction->getName()
<< "... (caller:" << CI->getCaller()->getName() << ")\n";
- ICCA.print();
+ ICCA.print(OS);
+ OS << "\n";
}
}
}
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
index 3c90e82fb952..a2e231e2d0f4 100644
--- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -1,9 +1,8 @@
//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
index 7d1e630e6e80..9fee57c54b85 100644
--- a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -19,11 +19,15 @@
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
+#define DEBUG_TYPE "ipt"
+STATISTIC(NumInstScanned, "Number of insts scanned while updating ibt");
+
#ifndef NDEBUG
static cl::opt<bool> ExpensiveAsserts(
"ipt-expensive-asserts",
@@ -64,11 +68,13 @@ bool InstructionPrecedenceTracking::isPreceededBySpecialInstruction(
void InstructionPrecedenceTracking::fill(const BasicBlock *BB) {
FirstSpecialInsts.erase(BB);
- for (auto &I : *BB)
+ for (auto &I : *BB) {
+ NumInstScanned++;
if (isSpecialInstruction(&I)) {
FirstSpecialInsts[BB] = &I;
return;
}
+ }
// Mark this block as having no special instructions.
FirstSpecialInsts[BB] = nullptr;
@@ -107,8 +113,10 @@ void InstructionPrecedenceTracking::insertInstructionTo(const Instruction *Inst,
}
void InstructionPrecedenceTracking::removeInstruction(const Instruction *Inst) {
- if (isSpecialInstruction(Inst))
- FirstSpecialInsts.erase(Inst->getParent());
+ auto *BB = Inst->getParent();
+ assert(BB && "must be called before instruction is actually removed");
+ if (FirstSpecialInsts.count(BB) && FirstSpecialInsts[BB] == Inst)
+ FirstSpecialInsts.erase(BB);
}
void InstructionPrecedenceTracking::removeUsersOf(const Instruction *Inst) {
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 23083bc8178e..864eeea4f8bf 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -70,8 +70,8 @@ static Value *SimplifyOrInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const SimplifyQuery &, unsigned);
static Value *SimplifyCastInst(unsigned, Value *, Type *,
const SimplifyQuery &, unsigned);
-static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, const SimplifyQuery &,
- unsigned);
+static Value *SimplifyGEPInst(Type *, ArrayRef<Value *>, bool,
+ const SimplifyQuery &, unsigned);
static Value *SimplifySelectInst(Value *, Value *, Value *,
const SimplifyQuery &, unsigned);
@@ -698,13 +698,12 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &DL, Value *&V,
bool AllowNonInbounds = false) {
assert(V->getType()->isPtrOrPtrVectorTy());
- Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
- APInt Offset = APInt::getNullValue(IntIdxTy->getIntegerBitWidth());
+ APInt Offset = APInt::getZero(DL.getIndexTypeSizeInBits(V->getType()));
V = V->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds);
// As that strip may trace through `addrspacecast`, need to sext or trunc
// the offset calculated.
- IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
+ Type *IntIdxTy = DL.getIndexType(V->getType())->getScalarType();
Offset = Offset.sextOrTrunc(IntIdxTy->getIntegerBitWidth());
Constant *OffsetIntPtr = ConstantInt::get(IntIdxTy, Offset);
@@ -1407,8 +1406,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
match(Op0, m_c_Or(m_NUWShl(m_Value(X), m_APInt(ShLAmt)), m_Value(Y))) &&
*ShRAmt == *ShLAmt) {
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- const unsigned Width = Op0->getType()->getScalarSizeInBits();
- const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (ShRAmt->uge(EffWidthY))
return X;
}
@@ -1429,9 +1427,11 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
MaxRecurse))
return V;
- // all ones >>a X -> -1
+ // -1 >>a X --> -1
+ // (-1 << X) a>> X --> -1
// Do not return Op0 because it may contain undef elements if it's a vector.
- if (match(Op0, m_AllOnes()))
+ if (match(Op0, m_AllOnes()) ||
+ match(Op0, m_Shl(m_AllOnes(), m_Specific(Op1))))
return Constant::getAllOnesValue(Op0->getType());
// (X << A) >> A -> X
@@ -1765,7 +1765,7 @@ static Value *simplifyAndOrOfICmpsWithLimitConst(ICmpInst *Cmp0, ICmpInst *Cmp1,
if (match(Cmp0->getOperand(1), m_APInt(C)))
MinMaxC = HasNotOp ? ~*C : *C;
else if (isa<ConstantPointerNull>(Cmp0->getOperand(1)))
- MinMaxC = APInt::getNullValue(8);
+ MinMaxC = APInt::getZero(8);
else
return nullptr;
@@ -2040,24 +2040,32 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
if (match(Op1, m_c_Or(m_Specific(Op0), m_Value())))
return Op0;
+ // (X | Y) & (X | ~Y) --> X (commuted 8 ways)
+ Value *X, *Y;
+ if (match(Op0, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Op1, m_c_Or(m_Deferred(X), m_Deferred(Y))))
+ return X;
+ if (match(Op1, m_c_Or(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Op0, m_c_Or(m_Deferred(X), m_Deferred(Y))))
+ return X;
+
if (Value *V = simplifyLogicOfAddSub(Op0, Op1, Instruction::And))
return V;
// A mask that only clears known zeros of a shifted value is a no-op.
- Value *X;
const APInt *Mask;
const APInt *ShAmt;
if (match(Op1, m_APInt(Mask))) {
// If all bits in the inverted and shifted mask are clear:
// and (shl X, ShAmt), Mask --> shl X, ShAmt
if (match(Op0, m_Shl(m_Value(X), m_APInt(ShAmt))) &&
- (~(*Mask)).lshr(*ShAmt).isNullValue())
+ (~(*Mask)).lshr(*ShAmt).isZero())
return Op0;
// If all bits in the inverted and shifted mask are clear:
// and (lshr X, ShAmt), Mask --> lshr X, ShAmt
if (match(Op0, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
- (~(*Mask)).shl(*ShAmt).isNullValue())
+ (~(*Mask)).shl(*ShAmt).isZero())
return Op0;
}
@@ -2141,7 +2149,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
// if Mask = ((1 << effective_width_of(X)) - 1) << A
// SimplifyDemandedBits in InstCombine can optimize the general case.
// This pattern aims to help other passes for a common case.
- Value *Y, *XShifted;
+ Value *XShifted;
if (match(Op1, m_APInt(Mask)) &&
match(Op0, m_c_Or(m_CombineAnd(m_NUWShl(m_Value(X), m_APInt(ShAmt)),
m_Value(XShifted)),
@@ -2149,11 +2157,11 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
const unsigned Width = Op0->getType()->getScalarSizeInBits();
const unsigned ShftCnt = ShAmt->getLimitedValue(Width);
const KnownBits YKnown = computeKnownBits(Y, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- const unsigned EffWidthY = Width - YKnown.countMinLeadingZeros();
+ const unsigned EffWidthY = YKnown.countMaxActiveBits();
if (EffWidthY <= ShftCnt) {
const KnownBits XKnown = computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI,
Q.DT);
- const unsigned EffWidthX = Width - XKnown.countMinLeadingZeros();
+ const unsigned EffWidthX = XKnown.countMaxActiveBits();
const APInt EffBitsY = APInt::getLowBitsSet(Width, EffWidthY);
const APInt EffBitsX = APInt::getLowBitsSet(Width, EffWidthX) << ShftCnt;
// If the mask is extracting all bits from X or Y as is, we can skip
@@ -2257,6 +2265,19 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_c_Xor(m_Not(m_Specific(A)), m_Specific(B)))))
return Op0;
+ // (A | B) | (A ^ B) --> A | B
+ // (B | A) | (A ^ B) --> B | A
+ if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op0, m_c_Or(m_Specific(A), m_Specific(B))))
+ return Op0;
+
+ // Commute the outer 'or' operands.
+ // (A ^ B) | (A | B) --> A | B
+ // (A ^ B) | (B | A) --> B | A
+ if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
+ match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
+ return Op1;
+
// (~A & B) | ~(A | B) --> ~A
// (~A & B) | ~(B | A) --> ~A
// (B & ~A) | ~(A | B) --> ~A
@@ -2276,6 +2297,23 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
match(Op0, m_Not(m_c_Or(m_Specific(A), m_Specific(B)))))
return NotA;
+ // Rotated -1 is still -1:
+ // (-1 << X) | (-1 >> (C - X)) --> -1
+ // (-1 >> X) | (-1 << (C - X)) --> -1
+ // ...with C <= bitwidth (and commuted variants).
+ Value *X, *Y;
+ if ((match(Op0, m_Shl(m_AllOnes(), m_Value(X))) &&
+ match(Op1, m_LShr(m_AllOnes(), m_Value(Y)))) ||
+ (match(Op1, m_Shl(m_AllOnes(), m_Value(X))) &&
+ match(Op0, m_LShr(m_AllOnes(), m_Value(Y))))) {
+ const APInt *C;
+ if ((match(X, m_Sub(m_APInt(C), m_Specific(Y))) ||
+ match(Y, m_Sub(m_APInt(C), m_Specific(X)))) &&
+ C->ule(X->getType()->getScalarSizeInBits())) {
+ return ConstantInt::getAllOnesValue(X->getType());
+ }
+ }
+
if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
return V;
@@ -3090,7 +3128,7 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
// - C isn't zero.
if (Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(LBO)) ||
- match(LHS, m_Shl(m_One(), m_Value())) || !C->isNullValue()) {
+ match(LHS, m_Shl(m_One(), m_Value())) || !C->isZero()) {
if (Pred == ICmpInst::ICMP_EQ)
return ConstantInt::getFalse(GetCompareTy(RHS));
if (Pred == ICmpInst::ICMP_NE)
@@ -3640,30 +3678,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
CRHS->getPointerOperand(), Q))
return C;
- if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
- if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
- if (GLHS->getPointerOperand() == GRHS->getPointerOperand() &&
- GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() &&
- (ICmpInst::isEquality(Pred) ||
- (GLHS->isInBounds() && GRHS->isInBounds() &&
- Pred == ICmpInst::getSignedPredicate(Pred)))) {
- // The bases are equal and the indices are constant. Build a constant
- // expression GEP with the same indices and a null base pointer to see
- // what constant folding can make out of it.
- Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
- SmallVector<Value *, 4> IndicesLHS(GLHS->indices());
- Constant *NewLHS = ConstantExpr::getGetElementPtr(
- GLHS->getSourceElementType(), Null, IndicesLHS);
-
- SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
- Constant *NewRHS = ConstantExpr::getGetElementPtr(
- GLHS->getSourceElementType(), Null, IndicesRHS);
- Constant *NewICmp = ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
- return ConstantFoldConstant(NewICmp, Q.DL);
- }
- }
- }
-
// If the comparison is with the result of a select instruction, check whether
// comparing with either branch of the select always yields the same value.
if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
@@ -3966,7 +3980,8 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
return PreventSelfSimplify(SimplifyGEPInst(GEP->getSourceElementType(),
- NewOps, Q, MaxRecurse - 1));
+ NewOps, GEP->isInBounds(), Q,
+ MaxRecurse - 1));
if (isa<SelectInst>(I))
return PreventSelfSimplify(
@@ -4080,6 +4095,22 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
std::swap(TrueVal, FalseVal);
}
+ // Check for integer min/max with a limit constant:
+ // X > MIN_INT ? X : MIN_INT --> X
+ // X < MAX_INT ? X : MAX_INT --> X
+ if (TrueVal->getType()->isIntOrIntVectorTy()) {
+ Value *X, *Y;
+ SelectPatternFlavor SPF =
+ matchDecomposedSelectPattern(cast<ICmpInst>(CondVal), TrueVal, FalseVal,
+ X, Y).Flavor;
+ if (SelectPatternResult::isMinOrMax(SPF) && Pred == getMinMaxPred(SPF)) {
+ APInt LimitC = getMinMaxLimit(getInverseMinMaxFlavor(SPF),
+ X->getType()->getScalarSizeInBits());
+ if (match(Y, m_SpecificInt(LimitC)))
+ return X;
+ }
+ }
+
if (Pred == ICmpInst::ICMP_EQ && match(CmpRHS, m_Zero())) {
Value *X;
const APInt *Y;
@@ -4210,14 +4241,27 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
return FalseVal;
}
- // select i1 Cond, i1 true, i1 false --> i1 Cond
assert(Cond->getType()->isIntOrIntVectorTy(1) &&
"Select must have bool or bool vector condition");
assert(TrueVal->getType() == FalseVal->getType() &&
"Select must have same types for true/false ops");
- if (Cond->getType() == TrueVal->getType() &&
- match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt()))
- return Cond;
+
+ if (Cond->getType() == TrueVal->getType()) {
+ // select i1 Cond, i1 true, i1 false --> i1 Cond
+ if (match(TrueVal, m_One()) && match(FalseVal, m_ZeroInt()))
+ return Cond;
+
+ // (X || Y) && (X || !Y) --> X (commuted 8 ways)
+ Value *X, *Y;
+ if (match(FalseVal, m_ZeroInt())) {
+ if (match(Cond, m_c_LogicalOr(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(TrueVal, m_c_LogicalOr(m_Specific(X), m_Specific(Y))))
+ return X;
+ if (match(TrueVal, m_c_LogicalOr(m_Value(X), m_Not(m_Value(Y)))) &&
+ match(Cond, m_c_LogicalOr(m_Specific(X), m_Specific(Y))))
+ return X;
+ }
+ }
// select ?, X, X -> X
if (TrueVal == FalseVal)
@@ -4295,7 +4339,7 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
/// Given operands for an GetElementPtrInst, see if we can fold the result.
/// If not, this returns null.
-static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds,
const SimplifyQuery &Q, unsigned) {
// The type of the GEP pointer operand.
unsigned AS =
@@ -4396,14 +4440,14 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
// gep (gep V, C), (sub 0, V) -> C
if (match(Ops.back(),
m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr)))) &&
- !BasePtrOffset.isNullValue()) {
+ !BasePtrOffset.isZero()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
// gep (gep V, C), (xor V, -1) -> C-1
if (match(Ops.back(),
m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes())) &&
- !BasePtrOffset.isOneValue()) {
+ !BasePtrOffset.isOne()) {
auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1);
return ConstantExpr::getIntToPtr(CI, GEPTy);
}
@@ -4415,13 +4459,13 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
return nullptr;
auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ops[0]),
- Ops.slice(1));
+ Ops.slice(1), InBounds);
return ConstantFoldConstant(CE, Q.DL);
}
-Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
+Value *llvm::SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops, bool InBounds,
const SimplifyQuery &Q) {
- return ::SimplifyGEPInst(SrcTy, Ops, Q, RecursionLimit);
+ return ::SimplifyGEPInst(SrcTy, Ops, InBounds, Q, RecursionLimit);
}
/// Given operands for an InsertValueInst, see if we can fold the result.
@@ -4891,6 +4935,11 @@ static Constant *simplifyFPOp(ArrayRef<Value *> Ops, FastMathFlags FMF,
return nullptr;
}
+// TODO: Move this out to a header file:
+static inline bool canIgnoreSNaN(fp::ExceptionBehavior EB, FastMathFlags FMF) {
+ return (EB == fp::ebIgnore || FMF.noNaNs());
+}
+
/// Given operands for an FAdd, see if we can fold the result. If not, this
/// returns null.
static Value *
@@ -4905,17 +4954,25 @@ SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (Constant *C = simplifyFPOp({Op0, Op1}, FMF, Q, ExBehavior, Rounding))
return C;
- if (!isDefaultFPEnvironment(ExBehavior, Rounding))
- return nullptr;
-
// fadd X, -0 ==> X
- if (match(Op1, m_NegZeroFP()))
- return Op0;
+ // With strict/constrained FP, we have these possible edge cases that do
+ // not simplify to Op0:
+ // fadd SNaN, -0.0 --> QNaN
+ // fadd +0.0, -0.0 --> -0.0 (but only with round toward negative)
+ if (canIgnoreSNaN(ExBehavior, FMF) &&
+ (!canRoundingModeBe(Rounding, RoundingMode::TowardNegative) ||
+ FMF.noSignedZeros()))
+ if (match(Op1, m_NegZeroFP()))
+ return Op0;
// fadd X, 0 ==> X, when we know X is not -0
- if (match(Op1, m_PosZeroFP()) &&
- (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
- return Op0;
+ if (canIgnoreSNaN(ExBehavior, FMF))
+ if (match(Op1, m_PosZeroFP()) &&
+ (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+ return Op0;
+
+ if (!isDefaultFPEnvironment(ExBehavior, Rounding))
+ return nullptr;
// With nnan: -X + X --> 0.0 (and commuted variant)
// We don't have to explicitly exclude infinities (ninf): INF + -INF == NaN.
@@ -5457,6 +5514,9 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
if (match(Op0,
m_Intrinsic<Intrinsic::experimental_vector_reverse>(m_Value(X))))
return X;
+ // experimental.vector.reverse(splat(X)) -> splat(X)
+ if (isSplatValue(Op0))
+ return Op0;
break;
default:
break;
@@ -5772,13 +5832,32 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
- // Intrinsics with no operands have some kind of side effect. Don't simplify.
- unsigned NumOperands = Call->getNumArgOperands();
- if (!NumOperands)
- return nullptr;
-
+ unsigned NumOperands = Call->arg_size();
Function *F = cast<Function>(Call->getCalledFunction());
Intrinsic::ID IID = F->getIntrinsicID();
+
+ // Most of the intrinsics with no operands have some kind of side effect.
+ // Don't simplify.
+ if (!NumOperands) {
+ switch (IID) {
+ case Intrinsic::vscale: {
+ // Call may not be inserted into the IR yet at point of calling simplify.
+ if (!Call->getParent() || !Call->getParent()->getParent())
+ return nullptr;
+ auto Attr = Call->getFunction()->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return nullptr;
+ unsigned VScaleMin, VScaleMax;
+ std::tie(VScaleMin, VScaleMax) = Attr.getVScaleRangeArgs();
+ if (VScaleMin == VScaleMax && VScaleMax != 0)
+ return ConstantInt::get(F->getReturnType(), VScaleMin);
+ return nullptr;
+ }
+ default:
+ return nullptr;
+ }
+ }
+
if (NumOperands == 1)
return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q);
@@ -5814,9 +5893,18 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
- if (ShAmtC->urem(BitWidth).isNullValue())
+ if (ShAmtC->urem(BitWidth).isZero())
return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
}
+
+ // Rotating zero by anything is zero.
+ if (match(Op0, m_Zero()) && match(Op1, m_Zero()))
+ return ConstantInt::getNullValue(F->getReturnType());
+
+ // Rotating -1 by anything is -1.
+ if (match(Op0, m_AllOnes()) && match(Op1, m_AllOnes()))
+ return ConstantInt::getAllOnesValue(F->getReturnType());
+
return nullptr;
}
case Intrinsic::experimental_constrained_fma: {
@@ -5939,7 +6027,7 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) {
return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
- unsigned NumArgs = Call->getNumArgOperands();
+ unsigned NumArgs = Call->arg_size();
ConstantArgs.reserve(NumArgs);
for (auto &Arg : Call->args()) {
Constant *C = dyn_cast<Constant>(&Arg);
@@ -5990,73 +6078,27 @@ Value *llvm::SimplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
return ::SimplifyFreezeInst(Op0, Q);
}
-static Constant *ConstructLoadOperandConstant(Value *Op) {
- SmallVector<Value *, 4> Worklist;
- // Invalid IR in unreachable code may contain self-referential values. Don't infinitely loop.
- SmallPtrSet<Value *, 4> Visited;
- Worklist.push_back(Op);
- while (true) {
- Value *CurOp = Worklist.back();
- if (!Visited.insert(CurOp).second)
- return nullptr;
- if (isa<Constant>(CurOp))
- break;
- if (auto *BC = dyn_cast<BitCastOperator>(CurOp)) {
- Worklist.push_back(BC->getOperand(0));
- } else if (auto *GEP = dyn_cast<GEPOperator>(CurOp)) {
- for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
- if (!isa<Constant>(GEP->getOperand(I)))
- return nullptr;
- }
- Worklist.push_back(GEP->getOperand(0));
- } else if (auto *II = dyn_cast<IntrinsicInst>(CurOp)) {
- if (II->isLaunderOrStripInvariantGroup())
- Worklist.push_back(II->getOperand(0));
- else
- return nullptr;
- } else {
- return nullptr;
- }
- }
-
- Constant *NewOp = cast<Constant>(Worklist.pop_back_val());
- while (!Worklist.empty()) {
- Value *CurOp = Worklist.pop_back_val();
- if (isa<BitCastOperator>(CurOp)) {
- NewOp = ConstantExpr::getBitCast(NewOp, CurOp->getType());
- } else if (auto *GEP = dyn_cast<GEPOperator>(CurOp)) {
- SmallVector<Constant *> Idxs;
- Idxs.reserve(GEP->getNumOperands() - 1);
- for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
- Idxs.push_back(cast<Constant>(GEP->getOperand(I)));
- }
- NewOp = ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), NewOp,
- Idxs, GEP->isInBounds(),
- GEP->getInRangeIndex());
- } else {
- assert(isa<IntrinsicInst>(CurOp) &&
- cast<IntrinsicInst>(CurOp)->isLaunderOrStripInvariantGroup() &&
- "expected invariant group intrinsic");
- NewOp = ConstantExpr::getBitCast(NewOp, CurOp->getType());
- }
- }
- return NewOp;
-}
-
static Value *SimplifyLoadInst(LoadInst *LI, Value *PtrOp,
const SimplifyQuery &Q) {
if (LI->isVolatile())
return nullptr;
- // Try to make the load operand a constant, specifically handle
- // invariant.group intrinsics.
+ APInt Offset(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
auto *PtrOpC = dyn_cast<Constant>(PtrOp);
- if (!PtrOpC)
- PtrOpC = ConstructLoadOperandConstant(PtrOp);
+ // Try to convert operand into a constant by stripping offsets while looking
+ // through invariant.group intrinsics. Don't bother if the underlying object
+ // is not constant, as calculating GEP offsets is expensive.
+ if (!PtrOpC && isa<Constant>(getUnderlyingObject(PtrOp))) {
+ PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
+ Q.DL, Offset, /* AllowNonInbounts */ true,
+ /* AllowInvariantGroup */ true);
+ // Index size may have changed due to address space casts.
+ Offset = Offset.sextOrTrunc(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()));
+ PtrOpC = dyn_cast<Constant>(PtrOp);
+ }
if (PtrOpC)
- return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Q.DL);
-
+ return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Offset, Q.DL);
return nullptr;
}
@@ -6156,8 +6198,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
Result = SimplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
break;
case Instruction::GetElementPtr: {
- Result = SimplifyGEPInst(cast<GetElementPtrInst>(I)->getSourceElementType(),
- NewOps, Q);
+ auto *GEPI = cast<GetElementPtrInst>(I);
+ Result = SimplifyGEPInst(GEPI->getSourceElementType(), NewOps,
+ GEPI->isInBounds(), Q);
break;
}
case Instruction::InsertValue: {
diff --git a/llvm/lib/Analysis/LazyCallGraph.cpp b/llvm/lib/Analysis/LazyCallGraph.cpp
index 8f87552fca1f..0007c54b16d0 100644
--- a/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -220,8 +220,7 @@ bool LazyCallGraph::invalidate(Module &, const PreservedAnalyses &PA,
// Check whether the analysis, all analyses on functions, or the function's
// CFG have been preserved.
auto PAC = PA.getChecker<llvm::LazyCallGraphAnalysis>();
- return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>() ||
- PAC.preservedSet<CFGAnalyses>());
+ return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>());
}
LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
@@ -1962,6 +1961,29 @@ void LazyCallGraph::buildRefSCCs() {
});
}
+void LazyCallGraph::visitReferences(SmallVectorImpl<Constant *> &Worklist,
+ SmallPtrSetImpl<Constant *> &Visited,
+ function_ref<void(Function &)> Callback) {
+ while (!Worklist.empty()) {
+ Constant *C = Worklist.pop_back_val();
+
+ if (Function *F = dyn_cast<Function>(C)) {
+ if (!F->isDeclaration())
+ Callback(*F);
+ continue;
+ }
+
+ // blockaddresses are weird and don't participate in the call graph anyway,
+ // skip them.
+ if (isa<BlockAddress>(C))
+ continue;
+
+ for (Value *Op : C->operand_values())
+ if (Visited.insert(cast<Constant>(Op)).second)
+ Worklist.push_back(cast<Constant>(Op));
+ }
+}
+
AnalysisKey LazyCallGraphAnalysis::Key;
LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 1dababafb8a6..50fa169c2081 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -126,7 +126,7 @@ static ValueLatticeElement intersect(const ValueLatticeElement &A,
// Note: An empty range is implicitly converted to unknown or undef depending
// on MayIncludeUndef internally.
return ValueLatticeElement::getRange(
- std::move(Range), /*MayIncludeUndef=*/A.isConstantRangeIncludingUndef() |
+ std::move(Range), /*MayIncludeUndef=*/A.isConstantRangeIncludingUndef() ||
B.isConstantRangeIncludingUndef());
}
@@ -832,7 +832,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect(
};
}();
return ValueLatticeElement::getRange(
- ResultCR, TrueVal.isConstantRangeIncludingUndef() |
+ ResultCR, TrueVal.isConstantRangeIncludingUndef() ||
FalseVal.isConstantRangeIncludingUndef());
}
@@ -846,7 +846,7 @@ Optional<ValueLatticeElement> LazyValueInfoImpl::solveBlockValueSelect(
}
if (SPR.Flavor == SPF_NABS) {
- ConstantRange Zero(APInt::getNullValue(TrueCR.getBitWidth()));
+ ConstantRange Zero(APInt::getZero(TrueCR.getBitWidth()));
if (LHS == SI->getTrueValue())
return ValueLatticeElement::getRange(
Zero.sub(TrueCR.abs()), FalseVal.isConstantRangeIncludingUndef());
@@ -1117,12 +1117,11 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
}
// If (Val & Mask) != 0 then the value must be larger than the lowest set
// bit of Mask.
- if (EdgePred == ICmpInst::ICMP_NE && !Mask->isNullValue() &&
- C->isNullValue()) {
+ if (EdgePred == ICmpInst::ICMP_NE && !Mask->isZero() && C->isZero()) {
unsigned BitWidth = Ty->getIntegerBitWidth();
return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
APInt::getOneBitSet(BitWidth, Mask->countTrailingZeros()),
- APInt::getNullValue(BitWidth)));
+ APInt::getZero(BitWidth)));
}
}
@@ -1780,62 +1779,62 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
// We could consider extending this to search further backwards through the
// CFG and/or value graph, but there are non-obvious compile time vs quality
// tradeoffs.
- if (CxtI) {
- BasicBlock *BB = CxtI->getParent();
-
- // Function entry or an unreachable block. Bail to avoid confusing
- // analysis below.
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- if (PI == PE)
- return Unknown;
-
- // If V is a PHI node in the same block as the context, we need to ask
- // questions about the predicate as applied to the incoming value along
- // each edge. This is useful for eliminating cases where the predicate is
- // known along all incoming edges.
- if (auto *PHI = dyn_cast<PHINode>(V))
- if (PHI->getParent() == BB) {
- Tristate Baseline = Unknown;
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
- Value *Incoming = PHI->getIncomingValue(i);
- BasicBlock *PredBB = PHI->getIncomingBlock(i);
- // Note that PredBB may be BB itself.
- Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB,
- CxtI);
-
- // Keep going as long as we've seen a consistent known result for
- // all inputs.
- Baseline = (i == 0) ? Result /* First iteration */
- : (Baseline == Result ? Baseline : Unknown); /* All others */
- if (Baseline == Unknown)
- break;
- }
- if (Baseline != Unknown)
- return Baseline;
+ BasicBlock *BB = CxtI->getParent();
+
+ // Function entry or an unreachable block. Bail to avoid confusing
+ // analysis below.
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (PI == PE)
+ return Unknown;
+
+ // If V is a PHI node in the same block as the context, we need to ask
+ // questions about the predicate as applied to the incoming value along
+ // each edge. This is useful for eliminating cases where the predicate is
+ // known along all incoming edges.
+ if (auto *PHI = dyn_cast<PHINode>(V))
+ if (PHI->getParent() == BB) {
+ Tristate Baseline = Unknown;
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
+ Value *Incoming = PHI->getIncomingValue(i);
+ BasicBlock *PredBB = PHI->getIncomingBlock(i);
+ // Note that PredBB may be BB itself.
+ Tristate Result =
+ getPredicateOnEdge(Pred, Incoming, C, PredBB, BB, CxtI);
+
+ // Keep going as long as we've seen a consistent known result for
+ // all inputs.
+ Baseline = (i == 0) ? Result /* First iteration */
+ : (Baseline == Result ? Baseline
+ : Unknown); /* All others */
+ if (Baseline == Unknown)
+ break;
}
+ if (Baseline != Unknown)
+ return Baseline;
+ }
- // For a comparison where the V is outside this block, it's possible
- // that we've branched on it before. Look to see if the value is known
- // on all incoming edges.
- if (!isa<Instruction>(V) ||
- cast<Instruction>(V)->getParent() != BB) {
- // For predecessor edge, determine if the comparison is true or false
- // on that edge. If they're all true or all false, we can conclude
- // the value of the comparison in this block.
- Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
- if (Baseline != Unknown) {
- // Check that all remaining incoming values match the first one.
- while (++PI != PE) {
- Tristate Ret = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
- if (Ret != Baseline) break;
- }
- // If we terminated early, then one of the values didn't match.
- if (PI == PE) {
- return Baseline;
- }
+ // For a comparison where the V is outside this block, it's possible
+ // that we've branched on it before. Look to see if the value is known
+ // on all incoming edges.
+ if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can conclude
+ // the value of the comparison in this block.
+ Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+ if (Baseline != Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ Tristate Ret = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
+ if (Ret != Baseline)
+ break;
+ }
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ return Baseline;
}
}
}
+
return Unknown;
}
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index 4de5e1e06c7e..f9a7a5bdf434 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -235,7 +235,7 @@ void Lint::visitCallBase(CallBase &I) {
for (auto BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
// Skip ByVal arguments since they will be memcpy'd to the callee's
// stack so we're not really passing the pointer anyway.
- if (PAL.hasParamAttribute(ArgNo, Attribute::ByVal))
+ if (PAL.hasParamAttr(ArgNo, Attribute::ByVal))
continue;
// If both arguments are readonly, they have no dependence.
if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo))
@@ -268,7 +268,7 @@ void Lint::visitCallBase(CallBase &I) {
for (Value *Arg : I.args()) {
// Skip ByVal arguments since they will be memcpy'd to the callee's
// stack anyway.
- if (PAL.hasParamAttribute(ArgNo++, Attribute::ByVal))
+ if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal))
continue;
Value *Obj = findValue(Arg, /*OffsetOk=*/true);
Assert(!isa<AllocaInst>(Obj),
@@ -715,6 +715,7 @@ PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) {
return PreservedAnalyses::all();
}
+namespace {
class LintLegacyPass : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
@@ -733,6 +734,7 @@ public:
}
void print(raw_ostream &O, const Module *M) const override {}
};
+} // namespace
char LintLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 1c55f485aa76..0fbf1db0685d 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -147,7 +147,7 @@ static bool isDereferenceableAndAlignedPointer(
Alignment, Size, DL, CtxI, DT,
TLI, Visited, MaxDepth);
- if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
+ if (const AddrSpaceCastOperator *ASC = dyn_cast<AddrSpaceCastOperator>(V))
return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Alignment,
Size, DL, CtxI, DT, TLI,
Visited, MaxDepth);
@@ -451,8 +451,8 @@ static bool areNonOverlapSameBaseLoadAndStore(const Value *LoadPtr,
const Value *StorePtr,
Type *StoreTy,
const DataLayout &DL) {
- APInt LoadOffset(DL.getTypeSizeInBits(LoadPtr->getType()), 0);
- APInt StoreOffset(DL.getTypeSizeInBits(StorePtr->getType()), 0);
+ APInt LoadOffset(DL.getIndexTypeSizeInBits(LoadPtr->getType()), 0);
+ APInt StoreOffset(DL.getIndexTypeSizeInBits(StorePtr->getType()), 0);
const Value *LoadBase = LoadPtr->stripAndAccumulateConstantOffsets(
DL, LoadOffset, /* AllowNonInbounds */ false);
const Value *StoreBase = StorePtr->stripAndAccumulateConstantOffsets(
@@ -511,8 +511,11 @@ static Value *getAvailableLoadStore(Instruction *Inst, const Value *Ptr,
if (CastInst::isBitOrNoopPointerCastable(Val->getType(), AccessTy, DL))
return Val;
- if (auto *C = dyn_cast<Constant>(Val))
- return ConstantFoldLoadThroughBitcast(C, AccessTy, DL);
+ TypeSize StoreSize = DL.getTypeStoreSize(Val->getType());
+ TypeSize LoadSize = DL.getTypeStoreSize(AccessTy);
+ if (TypeSize::isKnownLE(LoadSize, StoreSize))
+ if (auto *C = dyn_cast<Constant>(Val))
+ return ConstantFoldLoadFromConst(C, AccessTy, DL);
}
return nullptr;
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index a239928ecf38..f9bd7167317f 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -142,13 +142,12 @@ Value *llvm::stripIntegerCast(Value *V) {
const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
- Value *Ptr, Value *OrigPtr) {
+ Value *Ptr) {
const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
// If there is an entry in the map return the SCEV of the pointer with the
// symbolic stride replaced by one.
- ValueToValueMap::const_iterator SI =
- PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
+ ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
if (SI == PtrToStride.end())
// For a non-symbolic stride, just return the original expression.
return OrigSCEV;
@@ -659,7 +658,8 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;
- int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
+ Type *AccessTy = Ptr->getType()->getPointerElementType();
+ int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;
@@ -1026,15 +1026,17 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}
/// Check whether the access through \p Ptr has a constant stride.
-int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
- const Loop *Lp, const ValueToValueMap &StridesMap,
- bool Assume, bool ShouldCheckWrap) {
+int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
+ Value *Ptr, const Loop *Lp,
+ const ValueToValueMap &StridesMap, bool Assume,
+ bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
+ unsigned AddrSpace = Ty->getPointerAddressSpace();
- // Make sure that the pointer does not point to aggregate types.
- auto *PtrTy = cast<PointerType>(Ty);
- if (PtrTy->getElementType()->isAggregateType()) {
+ // Make sure we're not accessing an aggregate type.
+ // TODO: Why? This doesn't make any sense.
+ if (AccessTy->isAggregateType()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
return 0;
@@ -1071,8 +1073,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
if (!IsNoWrapAddRec && !IsInBoundsGEP &&
- NullPointerIsDefined(Lp->getHeader()->getParent(),
- PtrTy->getAddressSpace())) {
+ NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
IsNoWrapAddRec = true;
@@ -1100,7 +1101,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
}
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
- int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
+ int64_t Size = DL.getTypeAllocSize(AccessTy);
const APInt &APStepVal = C->getAPInt();
// Huge step value - give up.
@@ -1120,7 +1121,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// zero we know that this won't happen without triggering undefined behavior.
if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
(IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
- PtrTy->getAddressSpace()))) {
+ AddrSpace))) {
if (Assume) {
// We can avoid this case by adding a run-time check.
LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
@@ -1262,6 +1263,47 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
return Diff && *Diff == 1;
}
+static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
+ function_ref<void(Value *)> AddPointer) {
+ SmallPtrSet<Value *, 8> Visited;
+ SmallVector<Value *> WorkList;
+ WorkList.push_back(StartPtr);
+
+ while (!WorkList.empty()) {
+ Value *Ptr = WorkList.pop_back_val();
+ if (!Visited.insert(Ptr).second)
+ continue;
+ auto *PN = dyn_cast<PHINode>(Ptr);
+ // SCEV does not look through non-header PHIs inside the loop. Such phis
+ // can be analyzed by adding separate accesses for each incoming pointer
+ // value.
+ if (PN && InnermostLoop.contains(PN->getParent()) &&
+ PN->getParent() != InnermostLoop.getHeader()) {
+ for (const Use &Inc : PN->incoming_values())
+ WorkList.push_back(Inc);
+ } else
+ AddPointer(Ptr);
+ }
+}
+
+void MemoryDepChecker::addAccess(StoreInst *SI) {
+ visitPointers(SI->getPointerOperand(), *InnermostLoop,
+ [this, SI](Value *Ptr) {
+ Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
+ InstMap.push_back(SI);
+ ++AccessIdx;
+ });
+}
+
+void MemoryDepChecker::addAccess(LoadInst *LI) {
+ visitPointers(LI->getPointerOperand(), *InnermostLoop,
+ [this, LI](Value *Ptr) {
+ Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
+ InstMap.push_back(LI);
+ ++AccessIdx;
+ });
+}
+
MemoryDepChecker::VectorizationSafetyStatus
MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
switch (Type) {
@@ -1478,6 +1520,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
+ Type *ATy = APtr->getType()->getPointerElementType();
+ Type *BTy = BPtr->getType()->getPointerElementType();
// Two reads are independent.
if (!AIsWrite && !BIsWrite)
@@ -1488,8 +1532,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;
- int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
- int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
+ int64_t StrideAPtr =
+ getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
+ int64_t StrideBPtr =
+ getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);
const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
@@ -1498,6 +1544,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// dependence.
if (StrideAPtr < 0) {
std::swap(APtr, BPtr);
+ std::swap(ATy, BTy);
std::swap(Src, Sink);
std::swap(AIsWrite, BIsWrite);
std::swap(AIdx, BIdx);
@@ -1519,8 +1566,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}
- Type *ATy = APtr->getType()->getPointerElementType();
- Type *BTy = BPtr->getType()->getPointerElementType();
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
uint64_t Stride = std::abs(StrideAPtr);
@@ -1958,7 +2003,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
- Accesses.addStore(Loc);
+ visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+ [&Accesses, Loc](Value *Ptr) {
+ MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
+ Accesses.addStore(NewLoc);
+ });
}
}
@@ -1982,7 +2031,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// words may be written to the same address.
bool IsReadOnlyPtr = false;
if (Seen.insert(Ptr).second ||
- !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
+ !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
}
@@ -2002,7 +2051,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
Loc.AATags.TBAA = nullptr;
- Accesses.addLoad(Loc, IsReadOnlyPtr);
+ visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
+ [&Accesses, Loc, IsReadOnlyPtr](Value *Ptr) {
+ MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
+ Accesses.addLoad(NewLoc, IsReadOnlyPtr);
+ });
}
// If we write (or read-write) to a single destination and there are no
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 8a613647bbea..7b895d8a5dc2 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -30,6 +30,7 @@
#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -290,8 +291,8 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
const SCEV *Coeff = getLastCoefficient();
const SCEV *ElemSize = Sizes.back();
const SCEV *Stride = SE.getMulExpr(Coeff, ElemSize);
- const SCEV *CacheLineSize = SE.getConstant(Stride->getType(), CLS);
Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
+ const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
if (SE.isKnownNegative(Stride))
Stride = SE.getNegativeSCEV(Stride);
Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
@@ -344,8 +345,8 @@ bool IndexedReference::delinearize(const LoopInfo &LI) {
LLVM_DEBUG(dbgs().indent(2) << "In Loop '" << L->getName()
<< "', AccessFn: " << *AccessFn << "\n");
- SE.delinearize(AccessFn, Subscripts, Sizes,
- SE.getElementSize(&StoreOrLoadInst));
+ llvm::delinearize(SE, AccessFn, Subscripts, Sizes,
+ SE.getElementSize(&StoreOrLoadInst));
if (Subscripts.empty() || Sizes.empty() ||
Subscripts.size() != Sizes.size()) {
@@ -425,9 +426,7 @@ bool IndexedReference::isConsecutive(const Loop &L, unsigned CLS) const {
const SCEV *IndexedReference::getLastCoefficient() const {
const SCEV *LastSubscript = getLastSubscript();
- assert(isa<SCEVAddRecExpr>(LastSubscript) &&
- "Expecting a SCEV add recurrence expression");
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LastSubscript);
+ auto *AR = cast<SCEVAddRecExpr>(LastSubscript);
return AR->getStepRecurrence(SE);
}
@@ -522,10 +521,9 @@ void CacheCost::calculateCacheFootprint() {
LLVM_DEBUG(dbgs() << "COMPUTING LOOP CACHE COSTS\n");
for (const Loop *L : Loops) {
- assert((std::find_if(LoopCosts.begin(), LoopCosts.end(),
- [L](const LoopCacheCostTy &LCC) {
- return LCC.first == L;
- }) == LoopCosts.end()) &&
+ assert(llvm::none_of(
+ LoopCosts,
+ [L](const LoopCacheCostTy &LCC) { return LCC.first == L; }) &&
"Should not add duplicate element");
CacheCostTy LoopCost = computeLoopCacheCost(*L, RefGroups);
LoopCosts.push_back(std::make_pair(L, LoopCost));
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 66aab4c195c8..b35fb2a190f6 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -301,15 +301,16 @@ PHINode *Loop::getInductionVariable(ScalarEvolution &SE) const {
if (!CmpInst)
return nullptr;
- Instruction *LatchCmpOp0 = dyn_cast<Instruction>(CmpInst->getOperand(0));
- Instruction *LatchCmpOp1 = dyn_cast<Instruction>(CmpInst->getOperand(1));
+ Value *LatchCmpOp0 = CmpInst->getOperand(0);
+ Value *LatchCmpOp1 = CmpInst->getOperand(1);
for (PHINode &IndVar : Header->phis()) {
InductionDescriptor IndDesc;
if (!InductionDescriptor::isInductionPHI(&IndVar, this, &SE, IndDesc))
continue;
- Instruction *StepInst = IndDesc.getInductionBinOp();
+ BasicBlock *Latch = getLoopLatch();
+ Value *StepInst = IndVar.getIncomingValueForBlock(Latch);
// case 1:
// IndVar = phi[{InitialValue, preheader}, {StepInst, latch}]
@@ -1102,6 +1103,11 @@ llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
return IntMD->getSExtValue();
}
+int llvm::getIntLoopAttribute(const Loop *TheLoop, StringRef Name,
+ int Default) {
+ return getOptionalIntLoopAttribute(TheLoop, Name).getValueOr(Default);
+}
+
static const char *LLVMLoopMustProgress = "llvm.loop.mustprogress";
bool llvm::hasMustProgress(const Loop *L) {
diff --git a/llvm/lib/Analysis/LoopNestAnalysis.cpp b/llvm/lib/Analysis/LoopNestAnalysis.cpp
index 2649ed60f762..675bb7a7749c 100644
--- a/llvm/lib/Analysis/LoopNestAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopNestAnalysis.cpp
@@ -50,8 +50,66 @@ std::unique_ptr<LoopNest> LoopNest::getLoopNest(Loop &Root,
return std::make_unique<LoopNest>(Root, SE);
}
+static CmpInst *getOuterLoopLatchCmp(const Loop &OuterLoop) {
+
+ const BasicBlock *Latch = OuterLoop.getLoopLatch();
+ assert(Latch && "Expecting a valid loop latch");
+
+ const BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
+ assert(BI && BI->isConditional() &&
+ "Expecting loop latch terminator to be a branch instruction");
+
+ CmpInst *OuterLoopLatchCmp = dyn_cast<CmpInst>(BI->getCondition());
+ DEBUG_WITH_TYPE(
+ VerboseDebug, if (OuterLoopLatchCmp) {
+ dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp
+ << "\n";
+ });
+ return OuterLoopLatchCmp;
+}
+
+static CmpInst *getInnerLoopGuardCmp(const Loop &InnerLoop) {
+
+ BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch();
+ CmpInst *InnerLoopGuardCmp =
+ (InnerGuard) ? dyn_cast<CmpInst>(InnerGuard->getCondition()) : nullptr;
+
+ DEBUG_WITH_TYPE(
+ VerboseDebug, if (InnerLoopGuardCmp) {
+ dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp
+ << "\n";
+ });
+ return InnerLoopGuardCmp;
+}
+
+static bool checkSafeInstruction(const Instruction &I,
+ const CmpInst *InnerLoopGuardCmp,
+ const CmpInst *OuterLoopLatchCmp,
+ Optional<Loop::LoopBounds> OuterLoopLB) {
+
+ bool IsAllowed =
+ isSafeToSpeculativelyExecute(&I) || isa<PHINode>(I) || isa<BranchInst>(I);
+ if (!IsAllowed)
+ return false;
+ // The only binary instruction allowed is the outer loop step instruction,
+ // the only comparison instructions allowed are the inner loop guard
+ // compare instruction and the outer loop latch compare instruction.
+ if ((isa<BinaryOperator>(I) && &I != &OuterLoopLB->getStepInst()) ||
+ (isa<CmpInst>(I) && &I != OuterLoopLatchCmp && &I != InnerLoopGuardCmp)) {
+ return false;
+ }
+ return true;
+}
+
bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
ScalarEvolution &SE) {
+ return (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE) ==
+ PerfectLoopNest);
+}
+
+LoopNest::LoopNestEnum LoopNest::analyzeLoopNestForPerfectNest(
+ const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) {
+
assert(!OuterLoop.isInnermost() && "Outer loop should have subloops");
assert(!InnerLoop.isOutermost() && "Inner loop should have a parent");
LLVM_DEBUG(dbgs() << "Checking whether loop '" << OuterLoop.getName()
@@ -66,7 +124,7 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
// the outer loop latch.
if (!checkLoopsStructure(OuterLoop, InnerLoop, SE)) {
LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure.\n");
- return false;
+ return InvalidLoopStructure;
}
// Bail out if we cannot retrieve the outer loop bounds.
@@ -74,33 +132,11 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
if (OuterLoopLB == None) {
LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: "
<< OuterLoop << "\n";);
- return false;
+ return OuterLoopLowerBoundUnknown;
}
- // Identify the outer loop latch comparison instruction.
- const BasicBlock *Latch = OuterLoop.getLoopLatch();
- assert(Latch && "Expecting a valid loop latch");
- const BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator());
- assert(BI && BI->isConditional() &&
- "Expecting loop latch terminator to be a branch instruction");
-
- const CmpInst *OuterLoopLatchCmp = dyn_cast<CmpInst>(BI->getCondition());
- DEBUG_WITH_TYPE(
- VerboseDebug, if (OuterLoopLatchCmp) {
- dbgs() << "Outer loop latch compare instruction: " << *OuterLoopLatchCmp
- << "\n";
- });
-
- // Identify the inner loop guard instruction.
- BranchInst *InnerGuard = InnerLoop.getLoopGuardBranch();
- const CmpInst *InnerLoopGuardCmp =
- (InnerGuard) ? dyn_cast<CmpInst>(InnerGuard->getCondition()) : nullptr;
-
- DEBUG_WITH_TYPE(
- VerboseDebug, if (InnerLoopGuardCmp) {
- dbgs() << "Inner loop guard compare instruction: " << *InnerLoopGuardCmp
- << "\n";
- });
+ CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop);
+ CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop);
// Determine whether instructions in a basic block are one of:
// - the inner loop guard comparison
@@ -109,29 +145,15 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
// - a phi node, a cast or a branch
auto containsOnlySafeInstructions = [&](const BasicBlock &BB) {
return llvm::all_of(BB, [&](const Instruction &I) {
- bool isAllowed = isSafeToSpeculativelyExecute(&I) || isa<PHINode>(I) ||
- isa<BranchInst>(I);
- if (!isAllowed) {
- DEBUG_WITH_TYPE(VerboseDebug, {
- dbgs() << "Instruction: " << I << "\nin basic block: " << BB
- << " is considered unsafe.\n";
- });
- return false;
- }
-
- // The only binary instruction allowed is the outer loop step instruction,
- // the only comparison instructions allowed are the inner loop guard
- // compare instruction and the outer loop latch compare instruction.
- if ((isa<BinaryOperator>(I) && &I != &OuterLoopLB->getStepInst()) ||
- (isa<CmpInst>(I) && &I != OuterLoopLatchCmp &&
- &I != InnerLoopGuardCmp)) {
+ bool IsSafeInstr = checkSafeInstruction(I, InnerLoopGuardCmp,
+ OuterLoopLatchCmp, OuterLoopLB);
+ if (IsSafeInstr) {
DEBUG_WITH_TYPE(VerboseDebug, {
dbgs() << "Instruction: " << I << "\nin basic block:" << BB
<< "is unsafe.\n";
});
- return false;
}
- return true;
+ return IsSafeInstr;
});
};
@@ -148,13 +170,72 @@ bool LoopNest::arePerfectlyNested(const Loop &OuterLoop, const Loop &InnerLoop,
!containsOnlySafeInstructions(*InnerLoop.getExitBlock())) {
LLVM_DEBUG(dbgs() << "Not perfectly nested: code surrounding inner loop is "
"unsafe\n";);
- return false;
+ return ImperfectLoopNest;
}
LLVM_DEBUG(dbgs() << "Loop '" << OuterLoop.getName() << "' and '"
<< InnerLoop.getName() << "' are perfectly nested.\n");
- return true;
+ return PerfectLoopNest;
+}
+
+LoopNest::InstrVectorTy LoopNest::getInterveningInstructions(
+ const Loop &OuterLoop, const Loop &InnerLoop, ScalarEvolution &SE) {
+ InstrVectorTy Instr;
+ switch (analyzeLoopNestForPerfectNest(OuterLoop, InnerLoop, SE)) {
+ case PerfectLoopNest:
+ LLVM_DEBUG(dbgs() << "The loop Nest is Perfect, returning empty "
+ "instruction vector. \n";);
+ return Instr;
+
+ case InvalidLoopStructure:
+ LLVM_DEBUG(dbgs() << "Not perfectly nested: invalid loop structure. "
+ "Instruction vector is empty.\n";);
+ return Instr;
+
+ case OuterLoopLowerBoundUnknown:
+ LLVM_DEBUG(dbgs() << "Cannot compute loop bounds of OuterLoop: "
+ << OuterLoop << "\nInstruction vector is empty.\n";);
+ return Instr;
+
+ case ImperfectLoopNest:
+ break;
+ }
+
+ // Identify the outer loop latch comparison instruction.
+ auto OuterLoopLB = OuterLoop.getBounds(SE);
+
+ CmpInst *OuterLoopLatchCmp = getOuterLoopLatchCmp(OuterLoop);
+ CmpInst *InnerLoopGuardCmp = getInnerLoopGuardCmp(InnerLoop);
+
+ auto GetUnsafeInstructions = [&](const BasicBlock &BB) {
+ for (const Instruction &I : BB) {
+ if (!checkSafeInstruction(I, InnerLoopGuardCmp, OuterLoopLatchCmp,
+ OuterLoopLB)) {
+ Instr.push_back(&I);
+ DEBUG_WITH_TYPE(VerboseDebug, {
+ dbgs() << "Instruction: " << I << "\nin basic block:" << BB
+ << "is unsafe.\n";
+ });
+ }
+ }
+ };
+
+ // Check the code surrounding the inner loop for instructions that are deemed
+ // unsafe.
+ const BasicBlock *OuterLoopHeader = OuterLoop.getHeader();
+ const BasicBlock *OuterLoopLatch = OuterLoop.getLoopLatch();
+ const BasicBlock *InnerLoopPreHeader = InnerLoop.getLoopPreheader();
+ const BasicBlock *InnerLoopExitBlock = InnerLoop.getExitBlock();
+
+ GetUnsafeInstructions(*OuterLoopHeader);
+ GetUnsafeInstructions(*OuterLoopLatch);
+ GetUnsafeInstructions(*InnerLoopExitBlock);
+
+ if (InnerLoopPreHeader != OuterLoopHeader) {
+ GetUnsafeInstructions(*InnerLoopPreHeader);
+ }
+ return Instr;
}
SmallVector<LoopVectorTy, 4>
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 5b95ed223fd9..6fc4c42bdd71 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -116,6 +116,8 @@ MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
void MLInlineAdvisor::onPassEntry() {
// Function passes executed between InlinerPass runs may have changed the
// module-wide features.
+ if (!Invalid)
+ return;
NodeCount = 0;
EdgeCount = 0;
for (auto &F : M)
@@ -123,6 +125,7 @@ void MLInlineAdvisor::onPassEntry() {
++NodeCount;
EdgeCount += getLocalCalls(F);
}
+ Invalid = false;
}
int64_t MLInlineAdvisor::getLocalCalls(Function &F) {
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 68e997656d84..4f2b5b34304d 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -111,7 +111,7 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_reallocf, {ReallocLike, 2, 1, -1}},
{LibFunc_strdup, {StrDupLike, 1, -1, -1}},
{LibFunc_strndup, {StrDupLike, 2, 1, -1}},
- {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1}}
+ {LibFunc___kmpc_alloc_shared, {MallocLike, 1, 0, -1}},
// TODO: Handle "int posix_memalign(void **, size_t, size_t)"
};
@@ -135,9 +135,8 @@ static const Function *getCalledFunction(const Value *V, bool LookThroughBitCast
return nullptr;
}
-/// Returns the allocation data for the given value if it's either a call to a
-/// known allocation function, or a call to a function with the allocsize
-/// attribute.
+/// Returns the allocation data for the given value if it's a call to a known
+/// allocation function.
static Optional<AllocFnsTy>
getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
const TargetLibraryInfo *TLI) {
@@ -610,7 +609,7 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout &DL,
SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) {
IntTyBits = DL.getIndexTypeSizeInBits(V->getType());
- Zero = APInt::getNullValue(IntTyBits);
+ Zero = APInt::getZero(IntTyBits);
V = V->stripPointerCasts();
if (Instruction *I = dyn_cast<Instruction>(V)) {
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index ef9cda37ce35..7f2d04c49565 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -35,54 +35,44 @@ void LocationSize::print(raw_ostream &OS) const {
}
MemoryLocation MemoryLocation::get(const LoadInst *LI) {
- AAMDNodes AATags;
- LI->getAAMetadata(AATags);
const auto &DL = LI->getModule()->getDataLayout();
return MemoryLocation(
LI->getPointerOperand(),
- LocationSize::precise(DL.getTypeStoreSize(LI->getType())), AATags);
+ LocationSize::precise(DL.getTypeStoreSize(LI->getType())),
+ LI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const StoreInst *SI) {
- AAMDNodes AATags;
- SI->getAAMetadata(AATags);
const auto &DL = SI->getModule()->getDataLayout();
return MemoryLocation(SI->getPointerOperand(),
LocationSize::precise(DL.getTypeStoreSize(
SI->getValueOperand()->getType())),
- AATags);
+ SI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const VAArgInst *VI) {
- AAMDNodes AATags;
- VI->getAAMetadata(AATags);
-
return MemoryLocation(VI->getPointerOperand(),
- LocationSize::afterPointer(), AATags);
+ LocationSize::afterPointer(), VI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const AtomicCmpXchgInst *CXI) {
- AAMDNodes AATags;
- CXI->getAAMetadata(AATags);
const auto &DL = CXI->getModule()->getDataLayout();
return MemoryLocation(CXI->getPointerOperand(),
LocationSize::precise(DL.getTypeStoreSize(
CXI->getCompareOperand()->getType())),
- AATags);
+ CXI->getAAMetadata());
}
MemoryLocation MemoryLocation::get(const AtomicRMWInst *RMWI) {
- AAMDNodes AATags;
- RMWI->getAAMetadata(AATags);
const auto &DL = RMWI->getModule()->getDataLayout();
return MemoryLocation(RMWI->getPointerOperand(),
LocationSize::precise(DL.getTypeStoreSize(
RMWI->getValOperand()->getType())),
- AATags);
+ RMWI->getAAMetadata());
}
Optional<MemoryLocation> MemoryLocation::getOrNone(const Instruction *Inst) {
@@ -117,10 +107,7 @@ MemoryLocation MemoryLocation::getForSource(const AnyMemTransferInst *MTI) {
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- AAMDNodes AATags;
- MTI->getAAMetadata(AATags);
-
- return MemoryLocation(MTI->getRawSource(), Size, AATags);
+ return MemoryLocation(MTI->getRawSource(), Size, MTI->getAAMetadata());
}
MemoryLocation MemoryLocation::getForDest(const MemIntrinsic *MI) {
@@ -138,17 +125,13 @@ MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) {
// memcpy/memmove can have AA tags. For memcpy, they apply
// to both the source and the destination.
- AAMDNodes AATags;
- MI->getAAMetadata(AATags);
-
- return MemoryLocation(MI->getRawDest(), Size, AATags);
+ return MemoryLocation(MI->getRawDest(), Size, MI->getAAMetadata());
}
MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
unsigned ArgIdx,
const TargetLibraryInfo *TLI) {
- AAMDNodes AATags;
- Call->getAAMetadata(AATags);
+ AAMDNodes AATags = Call->getAAMetadata();
const Value *Arg = Call->getArgOperand(ArgIdx);
// We may be able to produce an exact size for known intrinsics.
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index b402b0467f5d..ac20e20f0c0d 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -90,22 +90,18 @@ bool llvm::VerifyMemorySSA = true;
#else
bool llvm::VerifyMemorySSA = false;
#endif
-/// Enables memory ssa as a dependency for loop passes in legacy pass manager.
-cl::opt<bool> llvm::EnableMSSALoopDependency(
- "enable-mssa-loop-dependency", cl::Hidden, cl::init(true),
- cl::desc("Enable MemorySSA dependency for loop pass manager"));
static cl::opt<bool, true>
VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA),
cl::Hidden, cl::desc("Enable verification of MemorySSA."));
-namespace llvm {
+const static char LiveOnEntryStr[] = "liveOnEntry";
+
+namespace {
/// An assembly annotator class to print Memory SSA information in
/// comments.
class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter {
- friend class MemorySSA;
-
const MemorySSA *MSSA;
public:
@@ -124,7 +120,34 @@ public:
}
};
-} // end namespace llvm
+/// An assembly annotator class to print Memory SSA information in
+/// comments.
+class MemorySSAWalkerAnnotatedWriter : public AssemblyAnnotationWriter {
+ MemorySSA *MSSA;
+ MemorySSAWalker *Walker;
+
+public:
+ MemorySSAWalkerAnnotatedWriter(MemorySSA *M)
+ : MSSA(M), Walker(M->getWalker()) {}
+
+ void emitInstructionAnnot(const Instruction *I,
+ formatted_raw_ostream &OS) override {
+ if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
+ MemoryAccess *Clobber = Walker->getClobberingMemoryAccess(MA);
+ OS << "; " << *MA;
+ if (Clobber) {
+ OS << " - clobbered by ";
+ if (MSSA->isLiveOnEntryDef(Clobber))
+ OS << LiveOnEntryStr;
+ else
+ OS << *Clobber;
+ }
+ OS << "\n";
+ }
+ }
+};
+
+} // namespace
namespace {
@@ -286,6 +309,7 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
case Intrinsic::invariant_end:
case Intrinsic::assume:
case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::pseudoprobe:
return {false, AliasResult(AliasResult::NoAlias)};
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare:
@@ -1016,7 +1040,8 @@ public:
// updated if a new clobber is found by this SkipSelf search. If this
// additional query becomes heavily used we may decide to cache the result.
// Walker instantiations will decide how to set the SkipSelf bool.
- MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool);
+ MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, unsigned &, bool,
+ bool UseInvariantGroup = true);
};
/// A MemorySSAWalker that does AA walks to disambiguate accesses. It no
@@ -1041,6 +1066,11 @@ public:
unsigned &UWL) {
return Walker->getClobberingMemoryAccessBase(MA, Loc, UWL);
}
+ // This method is not accessible outside of this file.
+ MemoryAccess *getClobberingMemoryAccessWithoutInvariantGroup(MemoryAccess *MA,
+ unsigned &UWL) {
+ return Walker->getClobberingMemoryAccessBase(MA, UWL, false, false);
+ }
MemoryAccess *getClobberingMemoryAccess(MemoryAccess *MA) override {
unsigned UpwardWalkLimit = MaxCheckLimit;
@@ -1437,10 +1467,13 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
unsigned UpwardWalkLimit = MaxCheckLimit;
while (UpperBound > LocInfo.LowerBound) {
if (isa<MemoryPhi>(VersionStack[UpperBound])) {
- // For phis, use the walker, see where we ended up, go there
+ // For phis, use the walker, see where we ended up, go there.
+ // The invariant.group handling in MemorySSA is ad-hoc and doesn't
+ // support updates, so don't use it to optimize uses.
MemoryAccess *Result =
- Walker->getClobberingMemoryAccess(MU, UpwardWalkLimit);
- // We are guaranteed to find it or something is wrong
+ Walker->getClobberingMemoryAccessWithoutInvariantGroup(
+ MU, UpwardWalkLimit);
+ // We are guaranteed to find it or something is wrong.
while (VersionStack[UpperBound] != Result) {
assert(UpperBound != 0);
--UpperBound;
@@ -1750,6 +1783,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
break;
case Intrinsic::assume:
case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::pseudoprobe:
return nullptr;
}
}
@@ -1864,10 +1898,17 @@ void MemorySSA::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void MemorySSA::dump() const { print(dbgs()); }
#endif
-void MemorySSA::verifyMemorySSA() const {
- verifyOrderingDominationAndDefUses(F);
+void MemorySSA::verifyMemorySSA(VerificationLevel VL) const {
+#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
+ VL = VerificationLevel::Full;
+#endif
+
+#ifndef NDEBUG
+ verifyOrderingDominationAndDefUses(F, VL);
verifyDominationNumbers(F);
- verifyPrevDefInPhis(F);
+ if (VL == VerificationLevel::Full)
+ verifyPrevDefInPhis(F);
+#endif
// Previously, the verification used to also verify that the clobberingAccess
// cached by MemorySSA is the same as the clobberingAccess found at a later
// query to AA. This does not hold true in general due to the current fragility
@@ -1881,7 +1922,6 @@ void MemorySSA::verifyMemorySSA() const {
}
void MemorySSA::verifyPrevDefInPhis(Function &F) const {
-#if !defined(NDEBUG) && defined(EXPENSIVE_CHECKS)
for (const BasicBlock &BB : F) {
if (MemoryPhi *Phi = getMemoryAccess(&BB)) {
for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
@@ -1896,6 +1936,8 @@ void MemorySSA::verifyPrevDefInPhis(Function &F) const {
auto *LastAcc = &*(--DefList->end());
assert(LastAcc == IncAcc &&
"Incorrect incoming access into phi.");
+ (void)IncAcc;
+ (void)LastAcc;
break;
}
DTNode = DTNode->getIDom();
@@ -1911,13 +1953,11 @@ void MemorySSA::verifyPrevDefInPhis(Function &F) const {
}
}
}
-#endif
}
/// Verify that all of the blocks we believe to have valid domination numbers
/// actually have valid domination numbers.
void MemorySSA::verifyDominationNumbers(const Function &F) const {
-#ifndef NDEBUG
if (BlockNumberingValid.empty())
return;
@@ -1943,13 +1983,13 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
unsigned long ThisNumber = ThisNumberIter->second;
assert(ThisNumber > LastNumber &&
"Domination numbers should be strictly increasing!");
+ (void)LastNumber;
LastNumber = ThisNumber;
}
}
assert(ValidBlocks.empty() &&
"All valid BasicBlocks should exist in F -- dangling pointers?");
-#endif
}
/// Verify ordering: the order and existence of MemoryAccesses matches the
@@ -1958,8 +1998,8 @@ void MemorySSA::verifyDominationNumbers(const Function &F) const {
/// Verify def-uses: the immediate use information - walk all the memory
/// accesses and verifying that, for each use, it appears in the appropriate
/// def's use list
-void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
-#if !defined(NDEBUG)
+void MemorySSA::verifyOrderingDominationAndDefUses(Function &F,
+ VerificationLevel VL) const {
// Walk all the blocks, comparing what the lookups think and what the access
// lists think, as well as the order in the blocks vs the order in the access
// lists.
@@ -1974,19 +2014,21 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
ActualAccesses.push_back(Phi);
ActualDefs.push_back(Phi);
// Verify domination
- for (const Use &U : Phi->uses())
+ for (const Use &U : Phi->uses()) {
assert(dominates(Phi, U) && "Memory PHI does not dominate it's uses");
-#if defined(EXPENSIVE_CHECKS)
- // Verify def-uses.
- assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
- pred_begin(&B), pred_end(&B))) &&
- "Incomplete MemoryPhi Node");
- for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
- verifyUseInDefs(Phi->getIncomingValue(I), Phi);
- assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&
- "Incoming phi block not a block predecessor");
+ (void)U;
+ }
+ // Verify def-uses for full verify.
+ if (VL == VerificationLevel::Full) {
+ assert(Phi->getNumOperands() == static_cast<unsigned>(std::distance(
+ pred_begin(&B), pred_end(&B))) &&
+ "Incomplete MemoryPhi Node");
+ for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
+ verifyUseInDefs(Phi->getIncomingValue(I), Phi);
+ assert(is_contained(predecessors(&B), Phi->getIncomingBlock(I)) &&
+ "Incoming phi block not a block predecessor");
+ }
}
-#endif
}
for (Instruction &I : B) {
@@ -2002,14 +2044,15 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
// Verify ordering.
ActualDefs.push_back(MA);
// Verify domination.
- for (const Use &U : MD->uses())
+ for (const Use &U : MD->uses()) {
assert(dominates(MD, U) &&
"Memory Def does not dominate it's uses");
+ (void)U;
+ }
}
-#if defined(EXPENSIVE_CHECKS)
- // Verify def-uses.
- verifyUseInDefs(MA->getDefiningAccess(), MA);
-#endif
+ // Verify def-uses for full verify.
+ if (VL == VerificationLevel::Full)
+ verifyUseInDefs(MA->getDefiningAccess(), MA);
}
}
// Either we hit the assert, really have no accesses, or we have both
@@ -2044,13 +2087,11 @@ void MemorySSA::verifyOrderingDominationAndDefUses(Function &F) const {
}
ActualDefs.clear();
}
-#endif
}
/// Verify the def-use lists in MemorySSA, by verifying that \p Use
/// appears in the use list of \p Def.
void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
-#ifndef NDEBUG
// The live on entry use may cause us to get a NULL def here
if (!Def)
assert(isLiveOnEntryDef(Use) &&
@@ -2058,7 +2099,6 @@ void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
else
assert(is_contained(Def->users(), Use) &&
"Did not find use in def's use list");
-#endif
}
/// Perform a local numbering on blocks so that instruction ordering can be
@@ -2138,8 +2178,6 @@ bool MemorySSA::dominates(const MemoryAccess *Dominator,
return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
}
-const static char LiveOnEntryStr[] = "liveOnEntry";
-
void MemoryAccess::print(raw_ostream &OS) const {
switch (getValueID()) {
case MemoryPhiVal: return static_cast<const MemoryPhi *>(this)->print(OS);
@@ -2355,6 +2393,16 @@ PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
return PreservedAnalyses::all();
}
+PreservedAnalyses MemorySSAWalkerPrinterPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ OS << "MemorySSA (walker) for function: " << F.getName() << "\n";
+ MemorySSAWalkerAnnotatedWriter Writer(&MSSA);
+ F.print(OS, &Writer);
+
+ return PreservedAnalyses::all();
+}
+
PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
FunctionAnalysisManager &AM) {
AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA();
@@ -2438,15 +2486,88 @@ MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
return Clobber;
}
+static const Instruction *
+getInvariantGroupClobberingInstruction(Instruction &I, DominatorTree &DT) {
+ if (!I.hasMetadata(LLVMContext::MD_invariant_group) || I.isVolatile())
+ return nullptr;
+
+ // We consider bitcasts and zero GEPs to be the same pointer value. Start by
+ // stripping bitcasts and zero GEPs, then we will recursively look at loads
+ // and stores through bitcasts and zero GEPs.
+ Value *PointerOperand = getLoadStorePointerOperand(&I)->stripPointerCasts();
+
+ // It's not safe to walk the use list of a global value because function
+ // passes aren't allowed to look outside their functions.
+ // FIXME: this could be fixed by filtering instructions from outside of
+ // current function.
+ if (isa<Constant>(PointerOperand))
+ return nullptr;
+
+ // Queue to process all pointers that are equivalent to load operand.
+ SmallVector<const Value *, 8> PointerUsesQueue;
+ PointerUsesQueue.push_back(PointerOperand);
+
+ const Instruction *MostDominatingInstruction = &I;
+
+ // FIXME: This loop is O(n^2) because dominates can be O(n) and in worst case
+ // we will see all the instructions. It may not matter in practice. If it
+ // does, we will have to support MemorySSA construction and updates.
+ while (!PointerUsesQueue.empty()) {
+ const Value *Ptr = PointerUsesQueue.pop_back_val();
+ assert(Ptr && !isa<GlobalValue>(Ptr) &&
+ "Null or GlobalValue should not be inserted");
+
+ for (const User *Us : Ptr->users()) {
+ auto *U = dyn_cast<Instruction>(Us);
+ if (!U || U == &I || !DT.dominates(U, MostDominatingInstruction))
+ continue;
+
+ // Add bitcasts and zero GEPs to queue.
+ if (isa<BitCastInst>(U)) {
+ PointerUsesQueue.push_back(U);
+ continue;
+ }
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+ if (GEP->hasAllZeroIndices())
+ PointerUsesQueue.push_back(U);
+ continue;
+ }
+
+ // If we hit a load/store with an invariant.group metadata and the same
+ // pointer operand, we can assume that value pointed to by the pointer
+ // operand didn't change.
+ if (U->hasMetadata(LLVMContext::MD_invariant_group) &&
+ getLoadStorePointerOperand(U) == Ptr && !U->isVolatile()) {
+ MostDominatingInstruction = U;
+ }
+ }
+ }
+ return MostDominatingInstruction == &I ? nullptr : MostDominatingInstruction;
+}
+
template <typename AliasAnalysisType>
MemoryAccess *
MemorySSA::ClobberWalkerBase<AliasAnalysisType>::getClobberingMemoryAccessBase(
- MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf) {
+ MemoryAccess *MA, unsigned &UpwardWalkLimit, bool SkipSelf,
+ bool UseInvariantGroup) {
auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
// If this is a MemoryPhi, we can't do anything.
if (!StartingAccess)
return MA;
+ if (UseInvariantGroup) {
+ if (auto *I = getInvariantGroupClobberingInstruction(
+ *StartingAccess->getMemoryInst(), MSSA->getDomTree())) {
+ assert(isa<LoadInst>(I) || isa<StoreInst>(I));
+
+ auto *ClobberMA = MSSA->getMemoryAccess(I);
+ assert(ClobberMA);
+ if (isa<MemoryUse>(ClobberMA))
+ return ClobberMA->getDefiningAccess();
+ return ClobberMA;
+ }
+ }
+
bool IsOptimized = false;
// If this is an already optimized use or def, return the optimized result.
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index 616864f360bf..9c841883de6d 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -296,9 +296,8 @@ static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB,
assert(i != -1 && "Should have found the basic block in the phi");
// We can't just compare i against getNumOperands since one is signed and the
// other not. So use it to index into the block iterator.
- for (auto BBIter = MP->block_begin() + i; BBIter != MP->block_end();
- ++BBIter) {
- if (*BBIter != BB)
+ for (const BasicBlock *BlockBB : llvm::drop_begin(MP->blocks(), i)) {
+ if (BlockBB != BB)
break;
MP->setIncomingValue(i, NewDef);
++i;
@@ -491,8 +490,7 @@ void MemorySSAUpdater::fixupDefs(const SmallVectorImpl<WeakVH> &Vars) {
}
while (!Worklist.empty()) {
- const BasicBlock *FixupBlock = Worklist.back();
- Worklist.pop_back();
+ const BasicBlock *FixupBlock = Worklist.pop_back_val();
// Get the first def in the block that isn't a phi node.
if (auto *Defs = MSSA->getWritableBlockDefs(FixupBlock)) {
@@ -822,25 +820,30 @@ void MemorySSAUpdater::applyUpdates(ArrayRef<CFGUpdate> Updates,
}
if (!DeleteUpdates.empty()) {
- if (!UpdateDT) {
- SmallVector<CFGUpdate, 0> Empty;
- // Deletes are reversed applied, because this CFGView is pretending the
- // deletes did not happen yet, hence the edges still exist.
- DT.applyUpdates(Empty, RevDeleteUpdates);
+ if (!InsertUpdates.empty()) {
+ if (!UpdateDT) {
+ SmallVector<CFGUpdate, 0> Empty;
+ // Deletes are reversed applied, because this CFGView is pretending the
+ // deletes did not happen yet, hence the edges still exist.
+ DT.applyUpdates(Empty, RevDeleteUpdates);
+ } else {
+ // Apply all updates, with the RevDeleteUpdates as PostCFGView.
+ DT.applyUpdates(Updates, RevDeleteUpdates);
+ }
+
+ // Note: the MSSA update below doesn't distinguish between a GD with
+ // (RevDelete,false) and (Delete, true), but this matters for the DT
+ // updates above; for "children" purposes they are equivalent; but the
+ // updates themselves convey the desired update, used inside DT only.
+ GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
+ applyInsertUpdates(InsertUpdates, DT, &GD);
+ // Update DT to redelete edges; this matches the real CFG so we can
+ // perform the standard update without a postview of the CFG.
+ DT.applyUpdates(DeleteUpdates);
} else {
- // Apply all updates, with the RevDeleteUpdates as PostCFGView.
- DT.applyUpdates(Updates, RevDeleteUpdates);
+ if (UpdateDT)
+ DT.applyUpdates(DeleteUpdates);
}
-
- // Note: the MSSA update below doesn't distinguish between a GD with
- // (RevDelete,false) and (Delete, true), but this matters for the DT
- // updates above; for "children" purposes they are equivalent; but the
- // updates themselves convey the desired update, used inside DT only.
- GraphDiff<BasicBlock *> GD(RevDeleteUpdates);
- applyInsertUpdates(InsertUpdates, DT, &GD);
- // Update DT to redelete edges; this matches the real CFG so we can perform
- // the standard update without a postview of the CFG.
- DT.applyUpdates(DeleteUpdates);
} else {
if (UpdateDT)
DT.applyUpdates(Updates);
@@ -1131,11 +1134,7 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef<CFGUpdate> Updates,
if (auto DefsList = MSSA->getWritableBlockDefs(BlockWithDefsToReplace)) {
for (auto &DefToReplaceUses : *DefsList) {
BasicBlock *DominatingBlock = DefToReplaceUses.getBlock();
- Value::use_iterator UI = DefToReplaceUses.use_begin(),
- E = DefToReplaceUses.use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
+ for (Use &U : llvm::make_early_inc_range(DefToReplaceUses.uses())) {
MemoryAccess *Usr = cast<MemoryAccess>(U.getUser());
if (MemoryPhi *UsrPhi = dyn_cast<MemoryPhi>(Usr)) {
BasicBlock *DominatedBlock = UsrPhi->getIncomingBlock(U);
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index e43553222128..d80814852e19 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -264,11 +264,27 @@ static void computeFunctionSummary(
std::vector<const Instruction *> NonVolatileStores;
bool HasInlineAsmMaybeReferencingInternal = false;
- for (const BasicBlock &BB : F)
+ bool HasIndirBranchToBlockAddress = false;
+ bool HasUnknownCall = false;
+ bool MayThrow = false;
+ for (const BasicBlock &BB : F) {
+ // We don't allow inlining of function with indirect branch to blockaddress.
+ // If the blockaddress escapes the function, e.g., via a global variable,
+ // inlining may lead to an invalid cross-function reference. So we shouldn't
+ // import such function either.
+ if (BB.hasAddressTaken()) {
+ for (User *U : BlockAddress::get(const_cast<BasicBlock *>(&BB))->users())
+ if (!isa<CallBrInst>(*U)) {
+ HasIndirBranchToBlockAddress = true;
+ break;
+ }
+ }
+
for (const Instruction &I : BB) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
++NumInsts;
+
// Regular LTO module doesn't participate in ThinLTO import,
// so no reference from it can be read/writeonly, since this
// would require importing variable as local copy
@@ -300,8 +316,11 @@ static void computeFunctionSummary(
}
findRefEdges(Index, &I, RefEdges, Visited);
const auto *CB = dyn_cast<CallBase>(&I);
- if (!CB)
+ if (!CB) {
+ if (I.mayThrow())
+ MayThrow = true;
continue;
+ }
const auto *CI = dyn_cast<CallInst>(&I);
// Since we don't know exactly which local values are referenced in inline
@@ -323,7 +342,7 @@ static void computeFunctionSummary(
// called aliasee for the checks below.
if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
assert(!CalledFunction && "Expected null called function in callsite for alias");
- CalledFunction = dyn_cast<Function>(GA->getBaseObject());
+ CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
}
// Check if this is a direct call to a known function or a known
// intrinsic, or an indirect call with profile data.
@@ -357,6 +376,7 @@ static void computeFunctionSummary(
ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq);
}
} else {
+ HasUnknownCall = true;
// Skip inline assembly calls.
if (CI && CI->isInlineAsm())
continue;
@@ -386,6 +406,7 @@ static void computeFunctionSummary(
.updateHotness(getHotness(Candidate.Count, PSI));
}
}
+ }
Index.addBlockCount(F.size());
std::vector<ValueInfo> Refs;
@@ -452,8 +473,9 @@ static void computeFunctionSummary(
: CalleeInfo::HotnessType::Critical);
bool NonRenamableLocal = isNonRenamableLocal(F);
- bool NotEligibleForImport =
- NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
+ bool NotEligibleForImport = NonRenamableLocal ||
+ HasInlineAsmMaybeReferencingInternal ||
+ HasIndirBranchToBlockAddress;
GlobalValueSummary::GVFlags Flags(
F.getLinkage(), F.getVisibility(), NotEligibleForImport,
/* Live = */ false, F.isDSOLocal(),
@@ -464,8 +486,9 @@ static void computeFunctionSummary(
F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
// FIXME: refactor this to use the same code that inliner is using.
// Don't try to import functions with noinline attribute.
- F.getAttributes().hasFnAttribute(Attribute::NoInline),
- F.hasFnAttribute(Attribute::AlwaysInline)};
+ F.getAttributes().hasFnAttr(Attribute::NoInline),
+ F.hasFnAttribute(Attribute::AlwaysInline),
+ F.hasFnAttribute(Attribute::NoUnwind), MayThrow, HasUnknownCall};
std::vector<FunctionSummary::ParamAccess> ParamAccesses;
if (auto *SSI = GetSSICallback(F))
ParamAccesses = SSI->getParamAccesses(Index);
@@ -622,7 +645,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
/* Live = */ false, A.isDSOLocal(),
A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
auto AS = std::make_unique<AliasSummary>(Flags);
- auto *Aliasee = A.getBaseObject();
+ auto *Aliasee = A.getAliaseeObject();
auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
assert(AliaseeVI && "Alias expects aliasee summary to be available");
assert(AliaseeVI.getSummaryList().size() == 1 &&
@@ -711,7 +734,10 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
F->hasFnAttribute(Attribute::NoRecurse),
F->returnDoesNotAlias(),
/* NoInline = */ false,
- F->hasFnAttribute(Attribute::AlwaysInline)},
+ F->hasFnAttribute(Attribute::AlwaysInline),
+ F->hasFnAttribute(Attribute::NoUnwind),
+ /* MayThrow */ true,
+ /* HasUnknownCall */ true},
/*EntryCount=*/0, ArrayRef<ValueInfo>{},
ArrayRef<FunctionSummary::EdgeTy>{},
ArrayRef<GlobalValue::GUID>{},
diff --git a/llvm/lib/Analysis/ObjCARCInstKind.cpp b/llvm/lib/Analysis/ObjCARCInstKind.cpp
index 704d15f3280d..f74a9f7f104f 100644
--- a/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -296,9 +296,8 @@ ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) {
// operand isn't actually being dereferenced, it is being stored to
// memory where we can no longer track who might read it and dereference
// it, so we have to consider it potentially used.
- for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
- OI != OE; ++OI)
- if (IsPotentialRetainableObjPtr(*OI))
+ for (const Use &U : I->operands())
+ if (IsPotentialRetainableObjPtr(U))
return ARCInstKind::User;
}
}
diff --git a/llvm/lib/Analysis/OverflowInstAnalysis.cpp b/llvm/lib/Analysis/OverflowInstAnalysis.cpp
index 9f17d5b2064d..87a85e6a7364 100644
--- a/llvm/lib/Analysis/OverflowInstAnalysis.cpp
+++ b/llvm/lib/Analysis/OverflowInstAnalysis.cpp
@@ -69,4 +69,4 @@ bool llvm::isCheckForZeroAndMulWithOverflow(Value *Op0, Value *Op1,
bool IsAnd) {
Use *Y;
return isCheckForZeroAndMulWithOverflow(Op0, Op1, IsAnd, Y);
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp
index 7f77ab146c4c..c73e1fd82915 100644
--- a/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/llvm/lib/Analysis/PHITransAddr.cpp
@@ -226,8 +226,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEP;
// Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(),
- GEPOps, {DL, TLI, DT, AC})) {
+ if (Value *V = SimplifyGEPInst(GEP->getSourceElementType(), GEPOps,
+ GEP->isInBounds(), {DL, TLI, DT, AC})) {
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 6dda0bf0a1b4..268ed9d04741 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -103,7 +103,7 @@ bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) const {
// FIXME: The heuristic used below for determining hotness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return FunctionCount && isHotCount(FunctionCount.getCount());
+ return FunctionCount && isHotCount(FunctionCount->getCount());
}
/// Returns true if the function contains hot code. This can include a hot
@@ -116,7 +116,7 @@ bool ProfileSummaryInfo::isFunctionHotInCallGraph(
if (!F || !hasProfileSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
- if (isHotCount(FunctionCount.getCount()))
+ if (isHotCount(FunctionCount->getCount()))
return true;
if (hasSampleProfile()) {
@@ -145,7 +145,7 @@ bool ProfileSummaryInfo::isFunctionColdInCallGraph(
if (!F || !hasProfileSummary())
return false;
if (auto FunctionCount = F->getEntryCount())
- if (!isColdCount(FunctionCount.getCount()))
+ if (!isColdCount(FunctionCount->getCount()))
return false;
if (hasSampleProfile()) {
@@ -176,10 +176,10 @@ bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile(
return false;
if (auto FunctionCount = F->getEntryCount()) {
if (isHot &&
- isHotCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
+ isHotCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
return true;
if (!isHot &&
- !isColdCountNthPercentile(PercentileCutoff, FunctionCount.getCount()))
+ !isColdCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
return false;
}
if (hasSampleProfile()) {
@@ -230,7 +230,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const {
// FIXME: The heuristic used below for determining coldness is based on
// preliminary SPEC tuning for inliner. This will eventually be a
// convenience method that calls isHotCount.
- return FunctionCount && isColdCount(FunctionCount.getCount());
+ return FunctionCount && isColdCount(FunctionCount->getCount());
}
/// Compute the hot and cold thresholds.
@@ -316,11 +316,11 @@ bool ProfileSummaryInfo::isColdCountNthPercentile(int PercentileCutoff,
}
uint64_t ProfileSummaryInfo::getOrCompHotCountThreshold() const {
- return HotCountThreshold ? HotCountThreshold.getValue() : UINT64_MAX;
+ return HotCountThreshold.getValueOr(UINT64_MAX);
}
uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const {
- return ColdCountThreshold ? ColdCountThreshold.getValue() : 0;
+ return ColdCountThreshold.getValueOr(0);
}
bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB,
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
index b9dac2f3ff11..f83d8b0fd230 100644
--- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -17,18 +17,21 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Support/LineIterator.h"
+#include <memory>
using namespace llvm;
-#define DEBUG_TYPE "inline-replay"
+#define DEBUG_TYPE "replay-inline"
ReplayInlineAdvisor::ReplayInlineAdvisor(
Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
- std::unique_ptr<InlineAdvisor> OriginalAdvisor, StringRef RemarksFile,
- bool EmitRemarks)
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
: InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)),
- HasReplayRemarks(false), EmitRemarks(EmitRemarks) {
- auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
+ HasReplayRemarks(false), ReplaySettings(ReplaySettings),
+ EmitRemarks(EmitRemarks) {
+
+ auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile);
std::error_code EC = BufferOrErr.getError();
if (EC) {
Context.emitError("Could not open remarks file: " + EC.message());
@@ -36,47 +39,112 @@ ReplayInlineAdvisor::ReplayInlineAdvisor(
}
// Example for inline remarks to parse:
- // main:3:1.1: _Z3subii inlined into main at callsite sum:1 @ main:3:1.1
+ // main:3:1.1: '_Z3subii' inlined into 'main' at callsite sum:1 @
+ // main:3:1.1;
// We use the callsite string after `at callsite` to replay inlining.
line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
+ const std::string PositiveRemark = "' inlined into '";
+ const std::string NegativeRemark = "' will not be inlined into '";
+
for (; !LineIt.is_at_eof(); ++LineIt) {
StringRef Line = *LineIt;
auto Pair = Line.split(" at callsite ");
- auto Callee = Pair.first.split(" inlined into").first.rsplit(": ").second;
+ bool IsPositiveRemark = true;
+ if (Pair.first.contains(NegativeRemark))
+ IsPositiveRemark = false;
+
+ auto CalleeCaller =
+ Pair.first.split(IsPositiveRemark ? PositiveRemark : NegativeRemark);
+
+ StringRef Callee = CalleeCaller.first.rsplit(": '").second;
+ StringRef Caller = CalleeCaller.second.rsplit("'").first;
auto CallSite = Pair.second.split(";").first;
- if (Callee.empty() || CallSite.empty())
- continue;
+ if (Callee.empty() || Caller.empty() || CallSite.empty()) {
+ Context.emitError("Invalid remark format: " + Line);
+ return;
+ }
std::string Combined = (Callee + CallSite).str();
- InlineSitesFromRemarks.insert(Combined);
+ InlineSitesFromRemarks[Combined] = IsPositiveRemark;
+ if (ReplaySettings.ReplayScope == ReplayInlinerSettings::Scope::Function)
+ CallersToReplay.insert(Caller);
}
HasReplayRemarks = true;
}
+std::unique_ptr<InlineAdvisor> llvm::getReplayInlineAdvisor(
+ Module &M, FunctionAnalysisManager &FAM, LLVMContext &Context,
+ std::unique_ptr<InlineAdvisor> OriginalAdvisor,
+ const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks) {
+ auto Advisor = std::make_unique<ReplayInlineAdvisor>(
+ M, FAM, Context, std::move(OriginalAdvisor), ReplaySettings, EmitRemarks);
+ if (!Advisor->areReplayRemarksLoaded())
+ Advisor.reset();
+ return Advisor;
+}
+
std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdviceImpl(CallBase &CB) {
assert(HasReplayRemarks);
Function &Caller = *CB.getCaller();
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
- if (InlineSitesFromRemarks.empty())
- return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE,
- EmitRemarks);
+ // Decision not made by replay system
+ if (!hasInlineAdvice(*CB.getFunction())) {
+ // If there's a registered original advisor, return its decision
+ if (OriginalAdvisor)
+ return OriginalAdvisor->getAdvice(CB);
- std::string CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
+ // If no decision is made above, return non-decision
+ return {};
+ }
+
+ std::string CallSiteLoc =
+ formatCallSiteLocation(CB.getDebugLoc(), ReplaySettings.ReplayFormat);
StringRef Callee = CB.getCalledFunction()->getName();
std::string Combined = (Callee + CallSiteLoc).str();
- auto Iter = InlineSitesFromRemarks.find(Combined);
- Optional<InlineCost> InlineRecommended = None;
+ // Replay decision, if it has one
+ auto Iter = InlineSitesFromRemarks.find(Combined);
if (Iter != InlineSitesFromRemarks.end()) {
- InlineRecommended = llvm::InlineCost::getAlways("found in replay");
+ if (InlineSitesFromRemarks[Combined]) {
+ LLVM_DEBUG(dbgs() << "Replay Inliner: Inlined " << Callee << " @ "
+ << CallSiteLoc << "\n");
+ return std::make_unique<DefaultInlineAdvice>(
+ this, CB, llvm::InlineCost::getAlways("previously inlined"), ORE,
+ EmitRemarks);
+ } else {
+ LLVM_DEBUG(dbgs() << "Replay Inliner: Not Inlined " << Callee << " @ "
+ << CallSiteLoc << "\n");
+ // A negative inline is conveyed by "None" Optional<InlineCost>
+ return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE,
+ EmitRemarks);
+ }
+ }
+
+ // Fallback decisions
+ if (ReplaySettings.ReplayFallback ==
+ ReplayInlinerSettings::Fallback::AlwaysInline)
+ return std::make_unique<DefaultInlineAdvice>(
+ this, CB, llvm::InlineCost::getAlways("AlwaysInline Fallback"), ORE,
+ EmitRemarks);
+ else if (ReplaySettings.ReplayFallback ==
+ ReplayInlinerSettings::Fallback::NeverInline)
+ // A negative inline is conveyed by "None" Optional<InlineCost>
+ return std::make_unique<DefaultInlineAdvice>(this, CB, None, ORE,
+ EmitRemarks);
+ else {
+ assert(ReplaySettings.ReplayFallback ==
+ ReplayInlinerSettings::Fallback::Original);
+ // If there's a registered original advisor, return its decision
+ if (OriginalAdvisor)
+ return OriginalAdvisor->getAdvice(CB);
}
- return std::make_unique<DefaultInlineAdvice>(this, CB, InlineRecommended, ORE,
- EmitRemarks);
+ // If no decision is made above, return non-decision
+ return {};
}
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f22d834b5e57..f7c22cfb0310 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -139,8 +139,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "scalar-evolution"
-STATISTIC(NumArrayLenItCounts,
- "Number of trip counts computed with array length");
STATISTIC(NumTripCountsComputed,
"Number of loops with predictable loop counts");
STATISTIC(NumTripCountsNotComputed,
@@ -1100,7 +1098,7 @@ const SCEV *ScalarEvolution::getLosslessPtrToIntExpr(const SCEV *Op,
SCEV *S = new (SCEVAllocator)
SCEVPtrToIntExpr(ID.Intern(SCEVAllocator), Op, IntPtrTy);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1220,7 +1218,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
SCEV *S =
new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1274,7 +1272,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1603,7 +1601,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1872,7 +1870,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -1911,7 +1909,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Op);
return S;
}
@@ -2108,7 +2106,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, { Op });
return S;
}
@@ -2390,6 +2388,24 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
}
}
+ // <0,+,nonnegative><nw> is also nuw
+ // TODO: Add corresponding nsw case
+ if (Type == scAddRecExpr && ScalarEvolution::hasFlags(Flags, SCEV::FlagNW) &&
+ !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) && Ops.size() == 2 &&
+ Ops[0]->isZero() && IsKnownNonNegative(Ops[1]))
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+
+ // both (udiv X, Y) * Y and Y * (udiv X, Y) are always NUW
+ if (Type == scMulExpr && !ScalarEvolution::hasFlags(Flags, SCEV::FlagNUW) &&
+ Ops.size() == 2) {
+ if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[0]))
+ if (UDiv->getOperand(1) == Ops[1])
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ if (auto *UDiv = dyn_cast<SCEVUDivExpr>(Ops[1]))
+ if (UDiv->getOperand(1) == Ops[0])
+ Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+ }
+
return Flags;
}
@@ -2449,7 +2465,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateAddExpr(Ops, ComputeFlags(Ops));
- if (SCEV *S = std::get<0>(findExistingSCEVInCache(scAddExpr, Ops))) {
+ if (SCEV *S = findExistingSCEVInCache(scAddExpr, Ops)) {
// Don't strengthen flags if we have no new information.
SCEVAddExpr *Add = static_cast<SCEVAddExpr *>(S);
if (Add->getNoWrapFlags(OrigFlags) != OrigFlags)
@@ -2562,8 +2578,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
APInt ConstAdd = C1 + C2;
auto AddFlags = AddExpr->getNoWrapFlags();
// Adding a smaller constant is NUW if the original AddExpr was NUW.
- if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNUW) ==
- SCEV::FlagNUW &&
+ if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNUW) &&
ConstAdd.ule(C1)) {
PreservedFlags =
ScalarEvolution::setFlags(PreservedFlags, SCEV::FlagNUW);
@@ -2571,8 +2586,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// Adding a constant with the same sign and small magnitude is NSW, if the
// original AddExpr was NSW.
- if (ScalarEvolution::maskFlags(AddFlags, SCEV::FlagNSW) ==
- SCEV::FlagNSW &&
+ if (ScalarEvolution::hasFlags(AddFlags, SCEV::FlagNSW) &&
C1.isSignBitSet() == ConstAdd.isSignBitSet() &&
ConstAdd.abs().ule(C1.abs())) {
PreservedFlags =
@@ -2580,14 +2594,26 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
}
if (PreservedFlags != SCEV::FlagAnyWrap) {
- SmallVector<const SCEV *, 4> NewOps(AddExpr->op_begin(),
- AddExpr->op_end());
+ SmallVector<const SCEV *, 4> NewOps(AddExpr->operands());
NewOps[0] = getConstant(ConstAdd);
return getAddExpr(NewOps, PreservedFlags);
}
}
}
+ // Canonicalize (-1 * urem X, Y) + X --> (Y * X/Y)
+ if (Ops.size() == 2) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[0]);
+ if (Mul && Mul->getNumOperands() == 2 &&
+ Mul->getOperand(0)->isAllOnesValue()) {
+ const SCEV *X;
+ const SCEV *Y;
+ if (matchURem(Mul->getOperand(1), X, Y) && X == Ops[1]) {
+ return getMulExpr(Y, getUDivExpr(X, Y));
+ }
+ }
+ }
+
// Skip past any other cast SCEVs.
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
++Idx;
@@ -2766,7 +2792,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
// Compute nowrap flags for the addition of the loop-invariant ops and
- // the addrec. Temporarily push it as an operand for that purpose.
+ // the addrec. Temporarily push it as an operand for that purpose. These
+ // flags are valid in the scope of the addrec only.
LIOps.push_back(AddRec);
SCEV::NoWrapFlags Flags = ComputeFlags(LIOps);
LIOps.pop_back();
@@ -2775,10 +2802,25 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
LIOps.push_back(AddRec->getStart());
SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
- // This follows from the fact that the no-wrap flags on the outer add
- // expression are applicable on the 0th iteration, when the add recurrence
- // will be equal to its start value.
- AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
+
+ // It is not in general safe to propagate flags valid on an add within
+ // the addrec scope to one outside it. We must prove that the inner
+ // scope is guaranteed to execute if the outer one does to be able to
+ // safely propagate. We know the program is undefined if poison is
+ // produced on the inner scoped addrec. We also know that *for this use*
+ // the outer scoped add can't overflow (because of the flags we just
+ // computed for the inner scoped add) without the program being undefined.
+ // Proving that entry to the outer scope neccesitates entry to the inner
+ // scope, thus proves the program undefined if the flags would be violated
+ // in the outer scope.
+ SCEV::NoWrapFlags AddFlags = Flags;
+ if (AddFlags != SCEV::FlagAnyWrap) {
+ auto *DefI = getDefiningScopeBound(LIOps);
+ auto *ReachI = &*AddRecLoop->getHeader()->begin();
+ if (!isGuaranteedToTransferExecutionTo(DefI, ReachI))
+ AddFlags = SCEV::FlagAnyWrap;
+ }
+ AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1);
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer add and the inner addrec are guaranteed to have no overflow.
@@ -2862,7 +2904,7 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
S = new (SCEVAllocator)
SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Ops);
}
S->setNoWrapFlags(Flags);
return S;
@@ -2885,7 +2927,8 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
S = new (SCEVAllocator)
SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ LoopUsers[L].push_back(S);
+ registerUser(S, Ops);
}
setNoWrapFlags(S, Flags);
return S;
@@ -2907,7 +2950,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Ops);
}
S->setNoWrapFlags(Flags);
return S;
@@ -3022,7 +3065,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Depth > MaxArithDepth || hasHugeExpression(Ops))
return getOrCreateMulExpr(Ops, ComputeFlags(Ops));
- if (SCEV *S = std::get<0>(findExistingSCEVInCache(scMulExpr, Ops))) {
+ if (SCEV *S = findExistingSCEVInCache(scMulExpr, Ops)) {
// Don't strengthen flags if we have no new information.
SCEVMulExpr *Mul = static_cast<SCEVMulExpr *>(S);
if (Mul->getNoWrapFlags(OrigFlags) != OrigFlags)
@@ -3416,7 +3459,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
LHS, RHS);
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, {LHS, RHS});
return S;
}
@@ -3593,13 +3636,21 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// getSCEV(Base)->getType() has the same address space as Base->getType()
// because SCEV::getType() preserves the address space.
Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType());
- // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
- // instruction to its SCEV, because the Instruction may be guarded by control
- // flow and the no-overflow bits may not be valid for the expression in any
- // context. This can be fixed similarly to how these flags are handled for
- // adds.
+ const bool AssumeInBoundsFlags = [&]() {
+ if (!GEP->isInBounds())
+ return false;
+
+ // We'd like to propagate flags from the IR to the corresponding SCEV nodes,
+ // but to do that, we have to ensure that said flag is valid in the entire
+ // defined scope of the SCEV.
+ auto *GEPI = dyn_cast<Instruction>(GEP);
+ // TODO: non-instructions have global scope. We might be able to prove
+ // some global scope cases
+ return GEPI && isSCEVExprNeverPoison(GEPI);
+ }();
+
SCEV::NoWrapFlags OffsetWrap =
- GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+ AssumeInBoundsFlags ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
Type *CurTy = GEP->getType();
bool FirstIter = true;
@@ -3645,21 +3696,22 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
// Add the base address and the offset. We cannot use the nsw flag, as the
// base address is unsigned. However, if we know that the offset is
// non-negative, we can use nuw.
- SCEV::NoWrapFlags BaseWrap = GEP->isInBounds() && isKnownNonNegative(Offset)
+ SCEV::NoWrapFlags BaseWrap = AssumeInBoundsFlags && isKnownNonNegative(Offset)
? SCEV::FlagNUW : SCEV::FlagAnyWrap;
- return getAddExpr(BaseExpr, Offset, BaseWrap);
+ auto *GEPExpr = getAddExpr(BaseExpr, Offset, BaseWrap);
+ assert(BaseExpr->getType() == GEPExpr->getType() &&
+ "GEP should not change type mid-flight.");
+ return GEPExpr;
}
-std::tuple<SCEV *, FoldingSetNodeID, void *>
-ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
- ArrayRef<const SCEV *> Ops) {
+SCEV *ScalarEvolution::findExistingSCEVInCache(SCEVTypes SCEVType,
+ ArrayRef<const SCEV *> Ops) {
FoldingSetNodeID ID;
- void *IP = nullptr;
ID.AddInteger(SCEVType);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- return std::tuple<SCEV *, FoldingSetNodeID, void *>(
- UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP);
+ void *IP = nullptr;
+ return UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
}
const SCEV *ScalarEvolution::getAbsExpr(const SCEV *Op, bool IsNSW) {
@@ -3689,7 +3741,7 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
GroupByComplexity(Ops, &LI, DT);
// Check if we have created the same expression before.
- if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) {
+ if (const SCEV *S = findExistingSCEVInCache(Kind, Ops)) {
return S;
}
@@ -3787,10 +3839,12 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
// Okay, it looks like we really DO need an expr. Check to see if we
// already have one, otherwise create a new one.
- const SCEV *ExistingSCEV;
FoldingSetNodeID ID;
- void *IP;
- std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops);
+ ID.AddInteger(Kind);
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+ ID.AddPointer(Ops[i]);
+ void *IP = nullptr;
+ const SCEV *ExistingSCEV = UniqueSCEVs.FindNodeOrInsertPos(ID, IP);
if (ExistingSCEV)
return ExistingSCEV;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
@@ -3799,7 +3853,7 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
SCEVMinMaxExpr(ID.Intern(SCEVAllocator), Kind, O, Ops.size());
UniqueSCEVs.InsertNode(S, IP);
- addToLoopUseLists(S);
+ registerUser(S, Ops);
return S;
}
@@ -3943,6 +3997,21 @@ Type *ScalarEvolution::getWiderType(Type *T1, Type *T2) const {
return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2;
}
+bool ScalarEvolution::instructionCouldExistWitthOperands(const SCEV *A,
+ const SCEV *B) {
+ /// For a valid use point to exist, the defining scope of one operand
+ /// must dominate the other.
+ bool PreciseA, PreciseB;
+ auto *ScopeA = getDefiningScopeBound({A}, PreciseA);
+ auto *ScopeB = getDefiningScopeBound({B}, PreciseB);
+ if (!PreciseA || !PreciseB)
+ // Can't tell.
+ return false;
+ return (ScopeA == ScopeB) || DT.dominates(ScopeA, ScopeB) ||
+ DT.dominates(ScopeB, ScopeA);
+}
+
+
const SCEV *ScalarEvolution::getCouldNotCompute() {
return CouldNotCompute.get();
}
@@ -4025,24 +4094,6 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
}
}
-/// Check whether value has nuw/nsw/exact set but SCEV does not.
-/// TODO: In reality it is better to check the poison recursively
-/// but this is better than nothing.
-static bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) {
- if (auto *I = dyn_cast<Instruction>(V)) {
- if (isa<OverflowingBinaryOperator>(I)) {
- if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
- if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
- return true;
- if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
- return true;
- }
- } else if (isa<PossiblyExactOperator>(I) && I->isExact())
- return true;
- }
- return false;
-}
-
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -4056,7 +4107,7 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
// ValueExprMap before insert S->{V, 0} into ExprValueMap.
std::pair<ValueExprMapType::iterator, bool> Pair =
ValueExprMap.insert({SCEVCallbackVH(V, this), S});
- if (Pair.second && !SCEVLostPoisonFlags(S, V)) {
+ if (Pair.second) {
ExprValueMap[S].insert({V, nullptr});
// If S == Stripped + Offset, add Stripped -> {V, Offset} into
@@ -4120,6 +4171,8 @@ static const SCEV *MatchNotExpr(const SCEV *Expr) {
/// Return a SCEV corresponding to ~V = -1-V
const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+ assert(!V->getType()->isPointerTy() && "Can't negate pointer");
+
if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
return getConstant(
cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
@@ -4146,17 +4199,16 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
return getMinusSCEV(getMinusOne(Ty), V);
}
-/// Compute an expression equivalent to S - getPointerBase(S).
-static const SCEV *removePointerBase(ScalarEvolution *SE, const SCEV *P) {
+const SCEV *ScalarEvolution::removePointerBase(const SCEV *P) {
assert(P->getType()->isPointerTy());
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(P)) {
// The base of an AddRec is the first operand.
SmallVector<const SCEV *> Ops{AddRec->operands()};
- Ops[0] = removePointerBase(SE, Ops[0]);
+ Ops[0] = removePointerBase(Ops[0]);
// Don't try to transfer nowrap flags for now. We could in some cases
// (for example, if pointer operand of the AddRec is a SCEVUnknown).
- return SE->getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap);
+ return getAddRecExpr(Ops, AddRec->getLoop(), SCEV::FlagAnyWrap);
}
if (auto *Add = dyn_cast<SCEVAddExpr>(P)) {
// The base of an Add is the pointer operand.
@@ -4164,21 +4216,17 @@ static const SCEV *removePointerBase(ScalarEvolution *SE, const SCEV *P) {
const SCEV **PtrOp = nullptr;
for (const SCEV *&AddOp : Ops) {
if (AddOp->getType()->isPointerTy()) {
- // If we find an Add with multiple pointer operands, treat it as a
- // pointer base to be consistent with getPointerBase. Eventually
- // we should be able to assert this is impossible.
- if (PtrOp)
- return SE->getZero(P->getType());
+ assert(!PtrOp && "Cannot have multiple pointer ops");
PtrOp = &AddOp;
}
}
- *PtrOp = removePointerBase(SE, *PtrOp);
+ *PtrOp = removePointerBase(*PtrOp);
// Don't try to transfer nowrap flags for now. We could in some cases
// (for example, if the pointer operand of the Add is a SCEVUnknown).
- return SE->getAddExpr(Ops);
+ return getAddExpr(Ops);
}
// Any other expression must be a pointer base.
- return SE->getZero(P->getType());
+ return getZero(P->getType());
}
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
@@ -4195,8 +4243,8 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
if (!LHS->getType()->isPointerTy() ||
getPointerBase(LHS) != getPointerBase(RHS))
return getCouldNotCompute();
- LHS = removePointerBase(this, LHS);
- RHS = removePointerBase(this, RHS);
+ LHS = removePointerBase(LHS);
+ RHS = removePointerBase(RHS);
}
// We represent LHS - RHS as LHS + (-1)*RHS. This transformation
@@ -4204,7 +4252,7 @@ const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
auto AddFlags = SCEV::FlagAnyWrap;
const bool RHSIsNotMinSigned =
!getSignedRangeMin(RHS).isMinSignedValue();
- if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+ if (hasFlags(Flags, SCEV::FlagNSW)) {
// Let M be the minimum representable signed value. Then (-1)*RHS
// signed-wraps if and only if RHS is M. That can happen even for
// a NSW subtraction because e.g. (-1)*M signed-wraps even though
@@ -4359,14 +4407,11 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
const SCEV *PtrOp = nullptr;
for (const SCEV *AddOp : Add->operands()) {
if (AddOp->getType()->isPointerTy()) {
- // Cannot find the base of an expression with multiple pointer ops.
- if (PtrOp)
- return V;
+ assert(!PtrOp && "Cannot have multiple pointer ops");
PtrOp = AddOp;
}
}
- if (!PtrOp) // All operands were non-pointer.
- return V;
+ assert(PtrOp && "Must have pointer op");
V = PtrOp;
} else // Not something we can look further into.
return V;
@@ -4374,24 +4419,25 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
}
/// Push users of the given Instruction onto the given Worklist.
-static void
-PushDefUseChildren(Instruction *I,
- SmallVectorImpl<Instruction *> &Worklist) {
+static void PushDefUseChildren(Instruction *I,
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSetImpl<Instruction *> &Visited) {
// Push the def-use children onto the Worklist stack.
- for (User *U : I->users())
- Worklist.push_back(cast<Instruction>(U));
+ for (User *U : I->users()) {
+ auto *UserInsn = cast<Instruction>(U);
+ if (Visited.insert(UserInsn).second)
+ Worklist.push_back(UserInsn);
+ }
}
void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
SmallVector<Instruction *, 16> Worklist;
- PushDefUseChildren(PN, Worklist);
-
SmallPtrSet<Instruction *, 8> Visited;
+ SmallVector<const SCEV *, 8> ToForget;
Visited.insert(PN);
+ Worklist.push_back(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
auto It = ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
@@ -4413,12 +4459,13 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
!isa<SCEVUnknown>(Old) ||
(I != PN && Old == SymName)) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(Old);
+ ToForget.push_back(Old);
}
}
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(I, Worklist, Visited);
}
+ forgetMemoizedResults(ToForget);
}
namespace {
@@ -6109,7 +6156,7 @@ ScalarEvolution::getRangeRef(const SCEV *S,
// initial value.
if (AddRec->hasNoUnsignedWrap()) {
APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart());
- if (!UnsignedMinValue.isNullValue())
+ if (!UnsignedMinValue.isZero())
ConservativeResult = ConservativeResult.intersectWith(
ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType);
}
@@ -6211,9 +6258,9 @@ ScalarEvolution::getRangeRef(const SCEV *S,
if (NS > 1) {
// If we know any of the sign bits, we know all of the sign bits.
- if (!Known.Zero.getHiBits(NS).isNullValue())
+ if (!Known.Zero.getHiBits(NS).isZero())
Known.Zero.setHighBits(NS);
- if (!Known.One.getHiBits(NS).isNullValue())
+ if (!Known.One.getHiBits(NS).isZero())
Known.One.setHighBits(NS);
}
@@ -6549,17 +6596,99 @@ SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap;
}
-bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
- // Here we check that I is in the header of the innermost loop containing I,
- // since we only deal with instructions in the loop header. The actual loop we
- // need to check later will come from an add recurrence, but getting that
- // requires computing the SCEV of the operands, which can be expensive. This
- // check we can do cheaply to rule out some cases early.
- Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent());
- if (InnermostContainingLoop == nullptr ||
- InnermostContainingLoop->getHeader() != I->getParent())
- return false;
+const Instruction *
+ScalarEvolution::getNonTrivialDefiningScopeBound(const SCEV *S) {
+ if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
+ return &*AddRec->getLoop()->getHeader()->begin();
+ if (auto *U = dyn_cast<SCEVUnknown>(S))
+ if (auto *I = dyn_cast<Instruction>(U->getValue()))
+ return I;
+ return nullptr;
+}
+/// Fills \p Ops with unique operands of \p S, if it has operands. If not,
+/// \p Ops remains unmodified.
+static void collectUniqueOps(const SCEV *S,
+ SmallVectorImpl<const SCEV *> &Ops) {
+ SmallPtrSet<const SCEV *, 4> Unique;
+ auto InsertUnique = [&](const SCEV *S) {
+ if (Unique.insert(S).second)
+ Ops.push_back(S);
+ };
+ if (auto *S2 = dyn_cast<SCEVCastExpr>(S))
+ for (auto *Op : S2->operands())
+ InsertUnique(Op);
+ else if (auto *S2 = dyn_cast<SCEVNAryExpr>(S))
+ for (auto *Op : S2->operands())
+ InsertUnique(Op);
+ else if (auto *S2 = dyn_cast<SCEVUDivExpr>(S))
+ for (auto *Op : S2->operands())
+ InsertUnique(Op);
+}
+
+const Instruction *
+ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops,
+ bool &Precise) {
+ Precise = true;
+ // Do a bounded search of the def relation of the requested SCEVs.
+ SmallSet<const SCEV *, 16> Visited;
+ SmallVector<const SCEV *> Worklist;
+ auto pushOp = [&](const SCEV *S) {
+ if (!Visited.insert(S).second)
+ return;
+ // Threshold of 30 here is arbitrary.
+ if (Visited.size() > 30) {
+ Precise = false;
+ return;
+ }
+ Worklist.push_back(S);
+ };
+
+ for (auto *S : Ops)
+ pushOp(S);
+
+ const Instruction *Bound = nullptr;
+ while (!Worklist.empty()) {
+ auto *S = Worklist.pop_back_val();
+ if (auto *DefI = getNonTrivialDefiningScopeBound(S)) {
+ if (!Bound || DT.dominates(Bound, DefI))
+ Bound = DefI;
+ } else {
+ SmallVector<const SCEV *, 4> Ops;
+ collectUniqueOps(S, Ops);
+ for (auto *Op : Ops)
+ pushOp(Op);
+ }
+ }
+ return Bound ? Bound : &*F.getEntryBlock().begin();
+}
+
+const Instruction *
+ScalarEvolution::getDefiningScopeBound(ArrayRef<const SCEV *> Ops) {
+ bool Discard;
+ return getDefiningScopeBound(Ops, Discard);
+}
+
+bool ScalarEvolution::isGuaranteedToTransferExecutionTo(const Instruction *A,
+ const Instruction *B) {
+ if (A->getParent() == B->getParent() &&
+ isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
+ B->getIterator()))
+ return true;
+
+ auto *BLoop = LI.getLoopFor(B->getParent());
+ if (BLoop && BLoop->getHeader() == B->getParent() &&
+ BLoop->getLoopPreheader() == A->getParent() &&
+ isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
+ A->getParent()->end()) &&
+ isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(),
+ B->getIterator()))
+ return true;
+ return false;
+}
+
+
+bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
// Only proceed if we can prove that I does not yield poison.
if (!programUndefinedIfPoison(I))
return false;
@@ -6570,39 +6699,20 @@ bool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) {
// instructions can map to the same SCEV. If we apply NSW or NUW from I to
// the SCEV, we must guarantee no wrapping for that SCEV also when it is
// derived from other instructions that map to the same SCEV. We cannot make
- // that guarantee for cases where I is not executed. So we need to find the
- // loop that I is considered in relation to and prove that I is executed for
- // every iteration of that loop. That implies that the value that I
- // calculates does not wrap anywhere in the loop, so then we can apply the
- // flags to the SCEV.
- //
- // We check isLoopInvariant to disambiguate in case we are adding recurrences
- // from different loops, so that we know which loop to prove that I is
- // executed in.
- for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) {
+ // that guarantee for cases where I is not executed. So we need to find a
+ // upper bound on the defining scope for the SCEV, and prove that I is
+ // executed every time we enter that scope. When the bounding scope is a
+ // loop (the common case), this is equivalent to proving I executes on every
+ // iteration of that loop.
+ SmallVector<const SCEV *> SCEVOps;
+ for (const Use &Op : I->operands()) {
// I could be an extractvalue from a call to an overflow intrinsic.
// TODO: We can do better here in some cases.
- if (!isSCEVable(I->getOperand(OpIndex)->getType()))
- return false;
- const SCEV *Op = getSCEV(I->getOperand(OpIndex));
- if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
- bool AllOtherOpsLoopInvariant = true;
- for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands();
- ++OtherOpIndex) {
- if (OtherOpIndex != OpIndex) {
- const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex));
- if (!isLoopInvariant(OtherOp, AddRec->getLoop())) {
- AllOtherOpsLoopInvariant = false;
- break;
- }
- }
- }
- if (AllOtherOpsLoopInvariant &&
- isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop()))
- return true;
- }
+ if (isSCEVable(Op->getType()))
+ SCEVOps.push_back(getSCEV(Op));
}
- return false;
+ auto *DefI = getDefiningScopeBound(SCEVOps);
+ return isGuaranteedToTransferExecutionTo(DefI, I);
}
bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
@@ -7144,10 +7254,21 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
-const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount) {
- // Get the trip count from the BE count by adding 1. Overflow, results
- // in zero which means "unknown".
- return getAddExpr(ExitCount, getOne(ExitCount->getType()));
+const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount,
+ bool Extend) {
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ return getCouldNotCompute();
+
+ auto *ExitCountType = ExitCount->getType();
+ assert(ExitCountType->isIntegerTy());
+
+ if (!Extend)
+ return getAddExpr(ExitCount, getOne(ExitCountType));
+
+ auto *WiderType = Type::getIntNTy(ExitCountType->getContext(),
+ 1 + ExitCountType->getScalarSizeInBits());
+ return getAddExpr(getNoopOrZeroExtend(ExitCount, WiderType),
+ getOne(WiderType));
}
static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
@@ -7186,6 +7307,131 @@ unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
return getConstantTripCount(MaxExitCount);
}
+const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
+ // We can't infer from Array in Irregular Loop.
+ // FIXME: It's hard to infer loop bound from array operated in Nested Loop.
+ if (!L->isLoopSimplifyForm() || !L->isInnermost())
+ return getCouldNotCompute();
+
+ // FIXME: To make the scene more typical, we only analysis loops that have
+ // one exiting block and that block must be the latch. To make it easier to
+ // capture loops that have memory access and memory access will be executed
+ // in each iteration.
+ const BasicBlock *LoopLatch = L->getLoopLatch();
+ assert(LoopLatch && "See defination of simplify form loop.");
+ if (L->getExitingBlock() != LoopLatch)
+ return getCouldNotCompute();
+
+ const DataLayout &DL = getDataLayout();
+ SmallVector<const SCEV *> InferCountColl;
+ for (auto *BB : L->getBlocks()) {
+ // Go here, we can know that Loop is a single exiting and simplified form
+ // loop. Make sure that infer from Memory Operation in those BBs must be
+ // executed in loop. First step, we can make sure that max execution time
+ // of MemAccessBB in loop represents latch max excution time.
+ // If MemAccessBB does not dom Latch, skip.
+ // Entry
+ // │
+ // ┌─────▼─────┐
+ // │Loop Header◄─────┐
+ // └──┬──────┬─┘ │
+ // │ │ │
+ // ┌────────▼──┐ ┌─▼─────┐ │
+ // │MemAccessBB│ │OtherBB│ │
+ // └────────┬──┘ └─┬─────┘ │
+ // │ │ │
+ // ┌─▼──────▼─┐ │
+ // │Loop Latch├─────┘
+ // └────┬─────┘
+ // ▼
+ // Exit
+ if (!DT.dominates(BB, LoopLatch))
+ continue;
+
+ for (Instruction &Inst : *BB) {
+ // Find Memory Operation Instruction.
+ auto *GEP = getLoadStorePointerOperand(&Inst);
+ if (!GEP)
+ continue;
+
+ auto *ElemSize = dyn_cast<SCEVConstant>(getElementSize(&Inst));
+ // Do not infer from scalar type, eg."ElemSize = sizeof()".
+ if (!ElemSize)
+ continue;
+
+ // Use a existing polynomial recurrence on the trip count.
+ auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(GEP));
+ if (!AddRec)
+ continue;
+ auto *ArrBase = dyn_cast<SCEVUnknown>(getPointerBase(AddRec));
+ auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*this));
+ if (!ArrBase || !Step)
+ continue;
+ assert(isLoopInvariant(ArrBase, L) && "See addrec definition");
+
+ // Only handle { %array + step },
+ // FIXME: {(SCEVAddRecExpr) + step } could not be analysed here.
+ if (AddRec->getStart() != ArrBase)
+ continue;
+
+ // Memory operation pattern which have gaps.
+ // Or repeat memory opreation.
+ // And index of GEP wraps arround.
+ if (Step->getAPInt().getActiveBits() > 32 ||
+ Step->getAPInt().getZExtValue() !=
+ ElemSize->getAPInt().getZExtValue() ||
+ Step->isZero() || Step->getAPInt().isNegative())
+ continue;
+
+ // Only infer from stack array which has certain size.
+ // Make sure alloca instruction is not excuted in loop.
+ AllocaInst *AllocateInst = dyn_cast<AllocaInst>(ArrBase->getValue());
+ if (!AllocateInst || L->contains(AllocateInst->getParent()))
+ continue;
+
+ // Make sure only handle normal array.
+ auto *Ty = dyn_cast<ArrayType>(AllocateInst->getAllocatedType());
+ auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize());
+ if (!Ty || !ArrSize || !ArrSize->isOne())
+ continue;
+ // Also make sure step was increased the same with sizeof allocated
+ // element type.
+ const PointerType *GEPT = dyn_cast<PointerType>(GEP->getType());
+ if (Ty->getElementType() != GEPT->getElementType())
+ continue;
+
+ // FIXME: Since gep indices are silently zext to the indexing type,
+ // we will have a narrow gep index which wraps around rather than
+ // increasing strictly, we shoule ensure that step is increasing
+ // strictly by the loop iteration.
+ // Now we can infer a max execution time by MemLength/StepLength.
+ const SCEV *MemSize =
+ getConstant(Step->getType(), DL.getTypeAllocSize(Ty));
+ auto *MaxExeCount =
+ dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step));
+ if (!MaxExeCount || MaxExeCount->getAPInt().getActiveBits() > 32)
+ continue;
+
+ // If the loop reaches the maximum number of executions, we can not
+ // access bytes starting outside the statically allocated size without
+ // being immediate UB. But it is allowed to enter loop header one more
+ // time.
+ auto *InferCount = dyn_cast<SCEVConstant>(
+ getAddExpr(MaxExeCount, getOne(MaxExeCount->getType())));
+ // Discard the maximum number of execution times under 32bits.
+ if (!InferCount || InferCount->getAPInt().getActiveBits() > 32)
+ continue;
+
+ InferCountColl.push_back(InferCount);
+ }
+ }
+
+ if (InferCountColl.size() == 0)
+ return getCouldNotCompute();
+
+ return getUMinFromMismatchedTypes(InferCountColl);
+}
+
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -7287,13 +7533,15 @@ bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
}
/// Push PHI nodes in the header of the given loop onto the given Worklist.
-static void
-PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+static void PushLoopPHIs(const Loop *L,
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSetImpl<Instruction *> &Visited) {
BasicBlock *Header = L->getHeader();
// Push all Loop-header PHIs onto the Worklist stack.
for (PHINode &PN : Header->phis())
- Worklist.push_back(&PN);
+ if (Visited.insert(&PN).second)
+ Worklist.push_back(&PN);
}
const ScalarEvolution::BackedgeTakenInfo &
@@ -7354,9 +7602,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// it handles SCEVUnknown PHI nodes specially.
if (Result.hasAnyInfo()) {
SmallVector<Instruction *, 16> Worklist;
- PushLoopPHIs(L, Worklist);
-
SmallPtrSet<Instruction *, 8> Discovered;
+ SmallVector<const SCEV *, 8> ToForget;
+ PushLoopPHIs(L, Worklist, Discovered);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
@@ -7373,7 +7621,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// own when it gets to that point.
if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(Old);
+ ToForget.push_back(Old);
}
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
@@ -7405,6 +7653,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
Worklist.push_back(I);
}
}
+ forgetMemoizedResults(ToForget);
}
// Re-lookup the insert position, since the call to
@@ -7441,6 +7690,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
SmallVector<const Loop *, 16> LoopWorklist(1, L);
SmallVector<Instruction *, 32> Worklist;
SmallPtrSet<Instruction *, 16> Visited;
+ SmallVector<const SCEV *, 16> ToForget;
// Iterate over all the loops and sub-loops to drop SCEV information.
while (!LoopWorklist.empty()) {
@@ -7462,29 +7712,27 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
auto LoopUsersItr = LoopUsers.find(CurrL);
if (LoopUsersItr != LoopUsers.end()) {
- for (auto *S : LoopUsersItr->second)
- forgetMemoizedResults(S);
+ ToForget.insert(ToForget.end(), LoopUsersItr->second.begin(),
+ LoopUsersItr->second.end());
LoopUsers.erase(LoopUsersItr);
}
// Drop information about expressions based on loop-header PHIs.
- PushLoopPHIs(CurrL, Worklist);
+ PushLoopPHIs(CurrL, Worklist, Visited);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(It->second);
+ ToForget.push_back(It->second);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(I, Worklist, Visited);
}
LoopPropertiesCache.erase(CurrL);
@@ -7492,6 +7740,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
// ValuesAtScopes map.
LoopWorklist.append(CurrL->begin(), CurrL->end());
}
+ forgetMemoizedResults(ToForget);
}
void ScalarEvolution::forgetTopmostLoop(const Loop *L) {
@@ -7506,25 +7755,25 @@ void ScalarEvolution::forgetValue(Value *V) {
// Drop information about expressions based on loop-header PHIs.
SmallVector<Instruction *, 16> Worklist;
+ SmallPtrSet<Instruction *, 8> Visited;
+ SmallVector<const SCEV *, 8> ToForget;
Worklist.push_back(I);
+ Visited.insert(I);
- SmallPtrSet<Instruction *, 8> Visited;
while (!Worklist.empty()) {
I = Worklist.pop_back_val();
- if (!Visited.insert(I).second)
- continue;
-
ValueExprMapType::iterator It =
ValueExprMap.find_as(static_cast<Value *>(I));
if (It != ValueExprMap.end()) {
eraseValueFromMap(It->first);
- forgetMemoizedResults(It->second);
+ ToForget.push_back(It->second);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(I, Worklist, Visited);
}
+ forgetMemoizedResults(ToForget);
}
void ScalarEvolution::forgetLoopDispositions(const Loop *L) {
@@ -7598,7 +7847,7 @@ ScalarEvolution::BackedgeTakenInfo::getConstantMax(ScalarEvolution *SE) const {
return !ENT.hasAlwaysTruePredicate();
};
- if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getConstantMax())
+ if (!getConstantMax() || any_of(ExitNotTaken, PredicateNotAlwaysTrue))
return SE->getCouldNotCompute();
assert((isa<SCEVCouldNotCompute>(getConstantMax()) ||
@@ -7635,6 +7884,12 @@ ScalarEvolution::ExitLimit::ExitLimit(
const SCEV *E, const SCEV *M, bool MaxOrZero,
ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
: ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
+ // If we prove the max count is zero, so is the symbolic bound. This happens
+ // in practice due to differences in a) how context sensitive we've chosen
+ // to be and b) how we reason about bounds impied by UB.
+ if (MaxNotTaken->isZero())
+ ExactNotTaken = MaxNotTaken;
+
assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
!isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
"Exact is not allowed to be less precise than Max");
@@ -7740,7 +7995,7 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator()))
if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
bool ExitIfTrue = !L->contains(BI->getSuccessor(0));
- if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne()))
+ if (ExitIfTrue == CI->isZero())
continue;
}
@@ -8030,15 +8285,6 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
Pred = ExitCond->getInversePredicate();
const ICmpInst::Predicate OriginalPred = Pred;
- // Handle common loops like: for (X = "string"; *X; ++X)
- if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
- if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
- ExitLimit ItCnt =
- computeLoadConstantCompareExitLimit(LI, RHS, L, Pred);
- if (ItCnt.hasAnyInfo())
- return ItCnt;
- }
-
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
@@ -8070,6 +8316,32 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
}
+ // If this loop must exit based on this condition (or execute undefined
+ // behaviour), and we can prove the test sequence produced must repeat
+ // the same values on self-wrap of the IV, then we can infer that IV
+ // doesn't self wrap because if it did, we'd have an infinite (undefined)
+ // loop.
+ if (ControlsExit && isLoopInvariant(RHS, L) && loopHasNoAbnormalExits(L) &&
+ loopIsFiniteByAssumption(L)) {
+
+ // TODO: We can peel off any functions which are invertible *in L*. Loop
+ // invariant terms are effectively constants for our purposes here.
+ auto *InnerLHS = LHS;
+ if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS))
+ InnerLHS = ZExt->getOperand();
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(InnerLHS)) {
+ auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this));
+ if (!AR->hasNoSelfWrap() && AR->getLoop() == L && AR->isAffine() &&
+ StrideC && StrideC->getAPInt().isPowerOf2()) {
+ auto Flags = AR->getNoWrapFlags();
+ Flags = setFlags(Flags, SCEV::FlagNW);
+ SmallVector<const SCEV*> Operands{AR->operands()};
+ Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags);
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
+ }
+ }
+ }
+
switch (Pred) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
@@ -8169,85 +8441,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
return cast<SCEVConstant>(Val)->getValue();
}
-/// Given an exit condition of 'icmp op load X, cst', try to see if we can
-/// compute the backedge execution count.
-ScalarEvolution::ExitLimit
-ScalarEvolution::computeLoadConstantCompareExitLimit(
- LoadInst *LI,
- Constant *RHS,
- const Loop *L,
- ICmpInst::Predicate predicate) {
- if (LI->isVolatile()) return getCouldNotCompute();
-
- // Check to see if the loaded pointer is a getelementptr of a global.
- // TODO: Use SCEV instead of manually grubbing with GEPs.
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
- if (!GEP) return getCouldNotCompute();
-
- // Make sure that it is really a constant global we are gepping, with an
- // initializer, and make sure the first IDX is really 0.
- GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
- if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
- GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
- !cast<Constant>(GEP->getOperand(1))->isNullValue())
- return getCouldNotCompute();
-
- // Okay, we allow one non-constant index into the GEP instruction.
- Value *VarIdx = nullptr;
- std::vector<Constant*> Indexes;
- unsigned VarIdxNum = 0;
- for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
- Indexes.push_back(CI);
- } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
- if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
- VarIdx = GEP->getOperand(i);
- VarIdxNum = i-2;
- Indexes.push_back(nullptr);
- }
-
- // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
- if (!VarIdx)
- return getCouldNotCompute();
-
- // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
- // Check to see if X is a loop variant variable value now.
- const SCEV *Idx = getSCEV(VarIdx);
- Idx = getSCEVAtScope(Idx, L);
-
- // We can only recognize very limited forms of loop index expressions, in
- // particular, only affine AddRec's like {C1,+,C2}<L>.
- const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
- if (!IdxExpr || IdxExpr->getLoop() != L || !IdxExpr->isAffine() ||
- isLoopInvariant(IdxExpr, L) ||
- !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
- !isa<SCEVConstant>(IdxExpr->getOperand(1)))
- return getCouldNotCompute();
-
- unsigned MaxSteps = MaxBruteForceIterations;
- for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
- ConstantInt *ItCst = ConstantInt::get(
- cast<IntegerType>(IdxExpr->getType()), IterationNum);
- ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
-
- // Form the GEP offset.
- Indexes[VarIdxNum] = Val;
-
- Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
- Indexes);
- if (!Result) break; // Cannot compute!
-
- // Evaluate the condition for this iteration.
- Result = ConstantExpr::getICmp(predicate, Result, RHS);
- if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure
- if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
- ++NumArrayLenItCounts;
- return getConstant(ItCst); // Found terminating iteration!
- }
- }
- return getCouldNotCompute();
-}
-
ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
@@ -9160,7 +9353,7 @@ GetQuadraticEquation(const SCEVAddRecExpr *AddRec) {
APInt L = LC->getAPInt();
APInt M = MC->getAPInt();
APInt N = NC->getAPInt();
- assert(!N.isNullValue() && "This is not a quadratic addrec");
+ assert(!N.isZero() && "This is not a quadratic addrec");
unsigned BitWidth = LC->getAPInt().getBitWidth();
unsigned NewWidth = BitWidth + 1;
@@ -9486,9 +9679,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// N = Distance (as unsigned)
if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) {
APInt MaxBECount = getUnsignedRangeMax(applyLoopGuards(Distance, L));
- APInt MaxBECountBase = getUnsignedRangeMax(Distance);
- if (MaxBECountBase.ult(MaxBECount))
- MaxBECount = MaxBECountBase;
+ MaxBECount = APIntOps::umin(MaxBECount, getUnsignedRangeMax(Distance));
// When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated,
// we end up with a loop whose backedge-taken count is n - 1. Detect this
@@ -9521,11 +9712,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
const SCEV *Max = getCouldNotCompute();
if (Exact != getCouldNotCompute()) {
APInt MaxInt = getUnsignedRangeMax(applyLoopGuards(Exact, L));
- APInt BaseMaxInt = getUnsignedRangeMax(Exact);
- if (BaseMaxInt.ult(MaxInt))
- Max = getConstant(BaseMaxInt);
- else
- Max = getConstant(MaxInt);
+ Max = getConstant(APIntOps::umin(MaxInt, getUnsignedRangeMax(Exact)));
}
return ExitLimit(Exact, Max, false, Predicates);
}
@@ -9533,9 +9720,12 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// Solve the general equation.
const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(),
getNegativeSCEV(Start), *this);
- const SCEV *M = E == getCouldNotCompute()
- ? E
- : getConstant(getUnsignedRangeMax(E));
+
+ const SCEV *M = E;
+ if (E != getCouldNotCompute()) {
+ APInt MaxWithGuards = getUnsignedRangeMax(applyLoopGuards(E, L));
+ M = getConstant(APIntOps::umin(MaxWithGuards, getUnsignedRangeMax(E)));
+ }
return ExitLimit(E, M, false, Predicates);
}
@@ -9911,23 +10101,23 @@ Optional<bool> ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred,
bool ScalarEvolution::isKnownPredicateAt(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
// TODO: Analyze guards and assumes from Context's block.
return isKnownPredicate(Pred, LHS, RHS) ||
- isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS);
+ isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS);
}
-Optional<bool>
-ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
- const SCEV *RHS,
- const Instruction *Context) {
+Optional<bool> ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred,
+ const SCEV *LHS,
+ const SCEV *RHS,
+ const Instruction *CtxI) {
Optional<bool> KnownWithoutContext = evaluatePredicate(Pred, LHS, RHS);
if (KnownWithoutContext)
return KnownWithoutContext;
- if (isBasicBlockEntryGuardedByCond(Context->getParent(), Pred, LHS, RHS))
+ if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS))
return true;
- else if (isBasicBlockEntryGuardedByCond(Context->getParent(),
+ else if (isBasicBlockEntryGuardedByCond(CtxI->getParent(),
ICmpInst::getInversePredicate(Pred),
LHS, RHS))
return false;
@@ -10057,7 +10247,7 @@ ScalarEvolution::getLoopInvariantPredicate(ICmpInst::Predicate Pred,
Optional<ScalarEvolution::LoopInvariantPredicate>
ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
- const Instruction *Context, const SCEV *MaxIter) {
+ const Instruction *CtxI, const SCEV *MaxIter) {
// Try to prove the following set of facts:
// - The predicate is monotonic in the iteration space.
// - If the check does not fail on the 1st iteration:
@@ -10111,7 +10301,7 @@ ScalarEvolution::getLoopInvariantExitCondDuringFirstIterations(
if (Step == MinusOne)
NoOverflowPred = CmpInst::getSwappedPredicate(NoOverflowPred);
const SCEV *Start = AR->getStart();
- if (!isKnownPredicateAt(NoOverflowPred, Start, Last, Context))
+ if (!isKnownPredicateAt(NoOverflowPred, Start, Last, CtxI))
return None;
// Everything is fine.
@@ -10448,12 +10638,12 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
// Try to prove (Pred, LHS, RHS) using isImpliedCond.
auto ProveViaCond = [&](const Value *Condition, bool Inverse) {
- const Instruction *Context = &BB->front();
- if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, Context))
+ const Instruction *CtxI = &BB->front();
+ if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse, CtxI))
return true;
if (ProvingStrictComparison) {
auto ProofFn = [&](ICmpInst::Predicate P) {
- return isImpliedCond(P, LHS, RHS, Condition, Inverse, Context);
+ return isImpliedCond(P, LHS, RHS, Condition, Inverse, CtxI);
};
if (SplitAndProve(ProofFn))
return true;
@@ -10525,7 +10715,7 @@ bool ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
const Value *FoundCondValue, bool Inverse,
- const Instruction *Context) {
+ const Instruction *CtxI) {
// False conditions implies anything. Do not bother analyzing it further.
if (FoundCondValue ==
ConstantInt::getBool(FoundCondValue->getContext(), Inverse))
@@ -10541,12 +10731,12 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const Value *Op0, *Op1;
if (match(FoundCondValue, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
if (!Inverse)
- return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) ||
- isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
+ return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) ||
+ isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI);
} else if (match(FoundCondValue, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
if (Inverse)
- return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, Context) ||
- isImpliedCond(Pred, LHS, RHS, Op1, Inverse, Context);
+ return isImpliedCond(Pred, LHS, RHS, Op0, Inverse, CtxI) ||
+ isImpliedCond(Pred, LHS, RHS, Op1, Inverse, CtxI);
}
const ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
@@ -10563,14 +10753,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
- return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, Context);
+ return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS, CtxI);
}
bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
const SCEV *RHS,
ICmpInst::Predicate FoundPred,
const SCEV *FoundLHS, const SCEV *FoundRHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
// Balance the types.
if (getTypeSizeInBits(LHS->getType()) <
getTypeSizeInBits(FoundLHS->getType())) {
@@ -10583,12 +10773,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
auto BitWidth = getTypeSizeInBits(NarrowType);
const SCEV *MaxValue = getZeroExtendExpr(
getConstant(APInt::getMaxValue(BitWidth)), WideType);
- if (isKnownPredicate(ICmpInst::ICMP_ULE, FoundLHS, MaxValue) &&
- isKnownPredicate(ICmpInst::ICMP_ULE, FoundRHS, MaxValue)) {
+ if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundLHS,
+ MaxValue) &&
+ isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, FoundRHS,
+ MaxValue)) {
const SCEV *TruncFoundLHS = getTruncateExpr(FoundLHS, NarrowType);
const SCEV *TruncFoundRHS = getTruncateExpr(FoundRHS, NarrowType);
if (isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, TruncFoundLHS,
- TruncFoundRHS, Context))
+ TruncFoundRHS, CtxI))
return true;
}
}
@@ -10615,13 +10807,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
}
}
return isImpliedCondBalancedTypes(Pred, LHS, RHS, FoundPred, FoundLHS,
- FoundRHS, Context);
+ FoundRHS, CtxI);
}
bool ScalarEvolution::isImpliedCondBalancedTypes(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
ICmpInst::Predicate FoundPred, const SCEV *FoundLHS, const SCEV *FoundRHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
assert(getTypeSizeInBits(LHS->getType()) ==
getTypeSizeInBits(FoundLHS->getType()) &&
"Types should be balanced!");
@@ -10647,7 +10839,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// Check whether the found predicate is the same as the desired predicate.
if (FoundPred == Pred)
- return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
// Check whether swapping the found predicate makes it the same as the
// desired predicate.
@@ -10663,27 +10855,70 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// do this if it would break canonical constant/addrec ordering.
if (!isa<SCEVConstant>(RHS) && !isa<SCEVAddRecExpr>(LHS))
return isImpliedCondOperands(FoundPred, RHS, LHS, FoundLHS, FoundRHS,
- Context);
+ CtxI);
if (!isa<SCEVConstant>(FoundRHS) && !isa<SCEVAddRecExpr>(FoundLHS))
- return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, Context);
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS, CtxI);
+
+ // There's no clear preference between forms 3. and 4., try both. Avoid
+ // forming getNotSCEV of pointer values as the resulting subtract is
+ // not legal.
+ if (!LHS->getType()->isPointerTy() && !RHS->getType()->isPointerTy() &&
+ isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS),
+ FoundLHS, FoundRHS, CtxI))
+ return true;
- // Don't try to getNotSCEV pointers.
- if (LHS->getType()->isPointerTy() || FoundLHS->getType()->isPointerTy())
- return false;
+ if (!FoundLHS->getType()->isPointerTy() &&
+ !FoundRHS->getType()->isPointerTy() &&
+ isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS),
+ getNotSCEV(FoundRHS), CtxI))
+ return true;
- // There's no clear preference between forms 3. and 4., try both.
- return isImpliedCondOperands(FoundPred, getNotSCEV(LHS), getNotSCEV(RHS),
- FoundLHS, FoundRHS, Context) ||
- isImpliedCondOperands(Pred, LHS, RHS, getNotSCEV(FoundLHS),
- getNotSCEV(FoundRHS), Context);
+ return false;
}
- // Unsigned comparison is the same as signed comparison when both the operands
- // are non-negative.
- if (CmpInst::isUnsigned(FoundPred) &&
- CmpInst::getSignedPredicate(FoundPred) == Pred &&
- isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
- return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context);
+ auto IsSignFlippedPredicate = [](CmpInst::Predicate P1,
+ CmpInst::Predicate P2) {
+ assert(P1 != P2 && "Handled earlier!");
+ return CmpInst::isRelational(P2) &&
+ P1 == CmpInst::getFlippedSignednessPredicate(P2);
+ };
+ if (IsSignFlippedPredicate(Pred, FoundPred)) {
+ // Unsigned comparison is the same as signed comparison when both the
+ // operands are non-negative or negative.
+ if ((isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) ||
+ (isKnownNegative(FoundLHS) && isKnownNegative(FoundRHS)))
+ return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI);
+ // Create local copies that we can freely swap and canonicalize our
+ // conditions to "le/lt".
+ ICmpInst::Predicate CanonicalPred = Pred, CanonicalFoundPred = FoundPred;
+ const SCEV *CanonicalLHS = LHS, *CanonicalRHS = RHS,
+ *CanonicalFoundLHS = FoundLHS, *CanonicalFoundRHS = FoundRHS;
+ if (ICmpInst::isGT(CanonicalPred) || ICmpInst::isGE(CanonicalPred)) {
+ CanonicalPred = ICmpInst::getSwappedPredicate(CanonicalPred);
+ CanonicalFoundPred = ICmpInst::getSwappedPredicate(CanonicalFoundPred);
+ std::swap(CanonicalLHS, CanonicalRHS);
+ std::swap(CanonicalFoundLHS, CanonicalFoundRHS);
+ }
+ assert((ICmpInst::isLT(CanonicalPred) || ICmpInst::isLE(CanonicalPred)) &&
+ "Must be!");
+ assert((ICmpInst::isLT(CanonicalFoundPred) ||
+ ICmpInst::isLE(CanonicalFoundPred)) &&
+ "Must be!");
+ if (ICmpInst::isSigned(CanonicalPred) && isKnownNonNegative(CanonicalRHS))
+ // Use implication:
+ // x <u y && y >=s 0 --> x <s y.
+ // If we can prove the left part, the right part is also proven.
+ return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
+ CanonicalRHS, CanonicalFoundLHS,
+ CanonicalFoundRHS);
+ if (ICmpInst::isUnsigned(CanonicalPred) && isKnownNegative(CanonicalRHS))
+ // Use implication:
+ // x <s y && y <s 0 --> x <u y.
+ // If we can prove the left part, the right part is also proven.
+ return isImpliedCondOperands(CanonicalFoundPred, CanonicalLHS,
+ CanonicalRHS, CanonicalFoundLHS,
+ CanonicalFoundRHS);
+ }
// Check if we can make progress by sharpening ranges.
if (FoundPred == ICmpInst::ICMP_NE &&
@@ -10721,7 +10956,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// We know V `Pred` SharperMin. If this implies LHS `Pred`
// RHS, we're done.
if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(SharperMin),
- Context))
+ CtxI))
return true;
LLVM_FALLTHROUGH;
@@ -10736,8 +10971,7 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
//
// If V `Pred` Min implies LHS `Pred` RHS, we're done.
- if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min),
- Context))
+ if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min), CtxI))
return true;
break;
@@ -10745,14 +10979,14 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
case ICmpInst::ICMP_SLE:
case ICmpInst::ICMP_ULE:
if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
- LHS, V, getConstant(SharperMin), Context))
+ LHS, V, getConstant(SharperMin), CtxI))
return true;
LLVM_FALLTHROUGH;
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT:
if (isImpliedCondOperands(CmpInst::getSwappedPredicate(Pred), RHS,
- LHS, V, getConstant(Min), Context))
+ LHS, V, getConstant(Min), CtxI))
return true;
break;
@@ -10766,12 +11000,11 @@ bool ScalarEvolution::isImpliedCondBalancedTypes(
// Check whether the actual condition is beyond sufficient.
if (FoundPred == ICmpInst::ICMP_EQ)
if (ICmpInst::isTrueWhenEqual(Pred))
- if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, Context))
+ if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
return true;
if (Pred == ICmpInst::ICMP_NE)
if (!ICmpInst::isTrueWhenEqual(FoundPred))
- if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS,
- Context))
+ if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS, CtxI))
return true;
// Otherwise assume the worst.
@@ -10852,7 +11085,7 @@ Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
- const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *Context) {
+ const SCEV *FoundLHS, const SCEV *FoundRHS, const Instruction *CtxI) {
// Try to recognize the following pattern:
//
// FoundRHS = ...
@@ -10866,9 +11099,9 @@ bool ScalarEvolution::isImpliedCondOperandsViaAddRecStart(
// each iteration of this loop, including the first iteration. Therefore, in
// this case, `FoundLHS Pred FoundRHS` implies `Start Pred FoundRHS`. Try to
// prove the original pred using this fact.
- if (!Context)
+ if (!CtxI)
return false;
- const BasicBlock *ContextBB = Context->getParent();
+ const BasicBlock *ContextBB = CtxI->getParent();
// Make sure AR varies in the context block.
if (auto *AR = dyn_cast<SCEVAddRecExpr>(FoundLHS)) {
const Loop *L = AR->getLoop();
@@ -11090,7 +11323,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
const SCEV *FoundLHS,
const SCEV *FoundRHS,
- const Instruction *Context) {
+ const Instruction *CtxI) {
if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
return true;
@@ -11098,7 +11331,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
return true;
if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
- Context))
+ CtxI))
return true;
return isImpliedCondOperandsHelper(Pred, LHS, RHS,
@@ -11534,6 +11767,12 @@ const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
if (IsSigned && BitWidth == 1)
return getZero(Stride->getType());
+ // This code has only been closely audited for negative strides in the
+ // unsigned comparison case, it may be correct for signed comparison, but
+ // that needs to be established.
+ assert((!IsSigned || !isKnownNonPositive(Stride)) &&
+ "Stride is expected strictly positive for signed case!");
+
// Calculate the maximum backedge count based on the range of values
// permitted by Start, End, and Stride.
APInt MinStart =
@@ -11576,6 +11815,80 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
bool PredicatedIV = false;
+ auto canAssumeNoSelfWrap = [&](const SCEVAddRecExpr *AR) {
+ // Can we prove this loop *must* be UB if overflow of IV occurs?
+ // Reasoning goes as follows:
+ // * Suppose the IV did self wrap.
+ // * If Stride evenly divides the iteration space, then once wrap
+ // occurs, the loop must revisit the same values.
+ // * We know that RHS is invariant, and that none of those values
+ // caused this exit to be taken previously. Thus, this exit is
+ // dynamically dead.
+ // * If this is the sole exit, then a dead exit implies the loop
+ // must be infinite if there are no abnormal exits.
+ // * If the loop were infinite, then it must either not be mustprogress
+ // or have side effects. Otherwise, it must be UB.
+ // * It can't (by assumption), be UB so we have contradicted our
+ // premise and can conclude the IV did not in fact self-wrap.
+ if (!isLoopInvariant(RHS, L))
+ return false;
+
+ auto *StrideC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this));
+ if (!StrideC || !StrideC->getAPInt().isPowerOf2())
+ return false;
+
+ if (!ControlsExit || !loopHasNoAbnormalExits(L))
+ return false;
+
+ return loopIsFiniteByAssumption(L);
+ };
+
+ if (!IV) {
+ if (auto *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS)) {
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ZExt->getOperand());
+ if (AR && AR->getLoop() == L && AR->isAffine()) {
+ auto canProveNUW = [&]() {
+ if (!isLoopInvariant(RHS, L))
+ return false;
+
+ if (!isKnownNonZero(AR->getStepRecurrence(*this)))
+ // We need the sequence defined by AR to strictly increase in the
+ // unsigned integer domain for the logic below to hold.
+ return false;
+
+ const unsigned InnerBitWidth = getTypeSizeInBits(AR->getType());
+ const unsigned OuterBitWidth = getTypeSizeInBits(RHS->getType());
+ // If RHS <=u Limit, then there must exist a value V in the sequence
+ // defined by AR (e.g. {Start,+,Step}) such that V >u RHS, and
+ // V <=u UINT_MAX. Thus, we must exit the loop before unsigned
+ // overflow occurs. This limit also implies that a signed comparison
+ // (in the wide bitwidth) is equivalent to an unsigned comparison as
+ // the high bits on both sides must be zero.
+ APInt StrideMax = getUnsignedRangeMax(AR->getStepRecurrence(*this));
+ APInt Limit = APInt::getMaxValue(InnerBitWidth) - (StrideMax - 1);
+ Limit = Limit.zext(OuterBitWidth);
+ return getUnsignedRangeMax(applyLoopGuards(RHS, L)).ule(Limit);
+ };
+ auto Flags = AR->getNoWrapFlags();
+ if (!hasFlags(Flags, SCEV::FlagNUW) && canProveNUW())
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), Flags);
+ if (AR->hasNoUnsignedWrap()) {
+ // Emulate what getZeroExtendExpr would have done during construction
+ // if we'd been able to infer the fact just above at that time.
+ const SCEV *Step = AR->getStepRecurrence(*this);
+ Type *Ty = ZExt->getType();
+ auto *S = getAddRecExpr(
+ getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 0),
+ getZeroExtendExpr(Step, Ty, 0), L, AR->getNoWrapFlags());
+ IV = dyn_cast<SCEVAddRecExpr>(S);
+ }
+ }
+ }
+ }
+
+
if (!IV && AllowPredicates) {
// Try to make this an AddRec using runtime tests, in the first X
// iterations of this loop, where X is the SCEV expression found by the
@@ -11626,32 +11939,29 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
//
// a) IV is either nuw or nsw depending upon signedness (indicated by the
// NoWrap flag).
- // b) loop is single exit with no side effects.
- //
+ // b) the loop is guaranteed to be finite (e.g. is mustprogress and has
+ // no side effects within the loop)
+ // c) loop has a single static exit (with no abnormal exits)
//
// Precondition a) implies that if the stride is negative, this is a single
// trip loop. The backedge taken count formula reduces to zero in this case.
//
- // Precondition b) implies that if rhs is invariant in L, then unknown
- // stride being zero means the backedge can't be taken without UB.
+ // Precondition b) and c) combine to imply that if rhs is invariant in L,
+ // then a zero stride means the backedge can't be taken without executing
+ // undefined behavior.
//
// The positive stride case is the same as isKnownPositive(Stride) returning
// true (original behavior of the function).
//
- // We want to make sure that the stride is truly unknown as there are edge
- // cases where ScalarEvolution propagates no wrap flags to the
- // post-increment/decrement IV even though the increment/decrement operation
- // itself is wrapping. The computed backedge taken count may be wrong in
- // such cases. This is prevented by checking that the stride is not known to
- // be either positive or non-positive. For example, no wrap flags are
- // propagated to the post-increment IV of this loop with a trip count of 2 -
- //
- // unsigned char i;
- // for(i=127; i<128; i+=129)
- // A[i] = i;
- //
- if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
- !loopIsFiniteByAssumption(L))
+ if (PredicatedIV || !NoWrap || !loopIsFiniteByAssumption(L) ||
+ !loopHasNoAbnormalExits(L))
+ return getCouldNotCompute();
+
+ // This bailout is protecting the logic in computeMaxBECountForLT which
+ // has not yet been sufficiently auditted or tested with negative strides.
+ // We used to filter out all known-non-positive cases here, we're in the
+ // process of being less restrictive bit by bit.
+ if (IsSigned && isKnownNonPositive(Stride))
return getCouldNotCompute();
if (!isKnownNonZero(Stride)) {
@@ -11687,37 +11997,12 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
}
} else if (!Stride->isOne() && !NoWrap) {
auto isUBOnWrap = [&]() {
- // Can we prove this loop *must* be UB if overflow of IV occurs?
- // Reasoning goes as follows:
- // * Suppose the IV did self wrap.
- // * If Stride evenly divides the iteration space, then once wrap
- // occurs, the loop must revisit the same values.
- // * We know that RHS is invariant, and that none of those values
- // caused this exit to be taken previously. Thus, this exit is
- // dynamically dead.
- // * If this is the sole exit, then a dead exit implies the loop
- // must be infinite if there are no abnormal exits.
- // * If the loop were infinite, then it must either not be mustprogress
- // or have side effects. Otherwise, it must be UB.
- // * It can't (by assumption), be UB so we have contradicted our
- // premise and can conclude the IV did not in fact self-wrap.
// From no-self-wrap, we need to then prove no-(un)signed-wrap. This
// follows trivially from the fact that every (un)signed-wrapped, but
// not self-wrapped value must be LT than the last value before
// (un)signed wrap. Since we know that last value didn't exit, nor
// will any smaller one.
-
- if (!isLoopInvariant(RHS, L))
- return false;
-
- auto *StrideC = dyn_cast<SCEVConstant>(Stride);
- if (!StrideC || !StrideC->getAPInt().isPowerOf2())
- return false;
-
- if (!ControlsExit || !loopHasNoAbnormalExits(L))
- return false;
-
- return loopIsFiniteByAssumption(L);
+ return canAssumeNoSelfWrap(IV);
};
// Avoid proven overflow cases: this will ensure that the backedge taken
@@ -11740,7 +12025,9 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const SCEV *Start = IV->getStart();
// Preserve pointer-typed Start/RHS to pass to isLoopEntryGuardedByCond.
- // Use integer-typed versions for actual computation.
+ // If we convert to integers, isLoopEntryGuardedByCond will miss some cases.
+ // Use integer-typed versions for actual computation; we can't subtract
+ // pointers in general.
const SCEV *OrigStart = Start;
const SCEV *OrigRHS = RHS;
if (Start->getType()->isPointerTy()) {
@@ -11771,10 +12058,13 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// is End and so the result is as above, and if not max(End,Start) is Start
// so we get a backedge count of zero.
const SCEV *BECount = nullptr;
- auto *StartMinusStride = getMinusSCEV(OrigStart, Stride);
+ auto *OrigStartMinusStride = getMinusSCEV(OrigStart, Stride);
+ assert(isAvailableAtLoopEntry(OrigStartMinusStride, L) && "Must be!");
+ assert(isAvailableAtLoopEntry(OrigStart, L) && "Must be!");
+ assert(isAvailableAtLoopEntry(OrigRHS, L) && "Must be!");
// Can we prove (max(RHS,Start) > Start - Stride?
- if (isLoopEntryGuardedByCond(L, Cond, StartMinusStride, Start) &&
- isLoopEntryGuardedByCond(L, Cond, StartMinusStride, RHS)) {
+ if (isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigStart) &&
+ isLoopEntryGuardedByCond(L, Cond, OrigStartMinusStride, OrigRHS)) {
// In this case, we can use a refined formula for computing backedge taken
// count. The general formula remains:
// "End-Start /uceiling Stride" where "End = max(RHS,Start)"
@@ -11795,10 +12085,8 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// Our preconditions trivially imply no overflow in that form.
const SCEV *MinusOne = getMinusOne(Stride->getType());
const SCEV *Numerator =
- getMinusSCEV(getAddExpr(RHS, MinusOne), StartMinusStride);
- if (!isa<SCEVCouldNotCompute>(Numerator)) {
- BECount = getUDivExpr(Numerator, Stride);
- }
+ getMinusSCEV(getAddExpr(RHS, MinusOne), getMinusSCEV(Start, Stride));
+ BECount = getUDivExpr(Numerator, Stride);
}
const SCEV *BECountIfBackedgeTaken = nullptr;
@@ -12141,7 +12429,7 @@ SCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const {
}
// Return true when S contains at least an undef value.
-static inline bool containsUndefs(const SCEV *S) {
+bool ScalarEvolution::containsUndefs(const SCEV *S) const {
return SCEVExprContains(S, [](const SCEV *S) {
if (const auto *SU = dyn_cast<SCEVUnknown>(S))
return isa<UndefValue>(SU->getValue());
@@ -12149,237 +12437,6 @@ static inline bool containsUndefs(const SCEV *S) {
});
}
-namespace {
-
-// Collect all steps of SCEV expressions.
-struct SCEVCollectStrides {
- ScalarEvolution &SE;
- SmallVectorImpl<const SCEV *> &Strides;
-
- SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
- : SE(SE), Strides(S) {}
-
- bool follow(const SCEV *S) {
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
- Strides.push_back(AR->getStepRecurrence(SE));
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-// Collect all SCEVUnknown and SCEVMulExpr expressions.
-struct SCEVCollectTerms {
- SmallVectorImpl<const SCEV *> &Terms;
-
- SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {}
-
- bool follow(const SCEV *S) {
- if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
- isa<SCEVSignExtendExpr>(S)) {
- if (!containsUndefs(S))
- Terms.push_back(S);
-
- // Stop recursion: once we collected a term, do not walk its operands.
- return false;
- }
-
- // Keep looking.
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-// Check if a SCEV contains an AddRecExpr.
-struct SCEVHasAddRec {
- bool &ContainsAddRec;
-
- SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
- ContainsAddRec = false;
- }
-
- bool follow(const SCEV *S) {
- if (isa<SCEVAddRecExpr>(S)) {
- ContainsAddRec = true;
-
- // Stop recursion: once we collected a term, do not walk its operands.
- return false;
- }
-
- // Keep looking.
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-// Find factors that are multiplied with an expression that (possibly as a
-// subexpression) contains an AddRecExpr. In the expression:
-//
-// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop))
-//
-// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
-// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
-// parameters as they form a product with an induction variable.
-//
-// This collector expects all array size parameters to be in the same MulExpr.
-// It might be necessary to later add support for collecting parameters that are
-// spread over different nested MulExpr.
-struct SCEVCollectAddRecMultiplies {
- SmallVectorImpl<const SCEV *> &Terms;
- ScalarEvolution &SE;
-
- SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
- : Terms(T), SE(SE) {}
-
- bool follow(const SCEV *S) {
- if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
- bool HasAddRec = false;
- SmallVector<const SCEV *, 0> Operands;
- for (auto Op : Mul->operands()) {
- const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op);
- if (Unknown && !isa<CallInst>(Unknown->getValue())) {
- Operands.push_back(Op);
- } else if (Unknown) {
- HasAddRec = true;
- } else {
- bool ContainsAddRec = false;
- SCEVHasAddRec ContiansAddRec(ContainsAddRec);
- visitAll(Op, ContiansAddRec);
- HasAddRec |= ContainsAddRec;
- }
- }
- if (Operands.size() == 0)
- return true;
-
- if (!HasAddRec)
- return false;
-
- Terms.push_back(SE.getMulExpr(Operands));
- // Stop recursion: once we collected a term, do not walk its operands.
- return false;
- }
-
- // Keep looking.
- return true;
- }
-
- bool isDone() const { return false; }
-};
-
-} // end anonymous namespace
-
-/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
-/// two places:
-/// 1) The strides of AddRec expressions.
-/// 2) Unknowns that are multiplied with AddRec expressions.
-void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Terms) {
- SmallVector<const SCEV *, 4> Strides;
- SCEVCollectStrides StrideCollector(*this, Strides);
- visitAll(Expr, StrideCollector);
-
- LLVM_DEBUG({
- dbgs() << "Strides:\n";
- for (const SCEV *S : Strides)
- dbgs() << *S << "\n";
- });
-
- for (const SCEV *S : Strides) {
- SCEVCollectTerms TermCollector(Terms);
- visitAll(S, TermCollector);
- }
-
- LLVM_DEBUG({
- dbgs() << "Terms:\n";
- for (const SCEV *T : Terms)
- dbgs() << *T << "\n";
- });
-
- SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
- visitAll(Expr, MulCollector);
-}
-
-static bool findArrayDimensionsRec(ScalarEvolution &SE,
- SmallVectorImpl<const SCEV *> &Terms,
- SmallVectorImpl<const SCEV *> &Sizes) {
- int Last = Terms.size() - 1;
- const SCEV *Step = Terms[Last];
-
- // End of recursion.
- if (Last == 0) {
- if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
- SmallVector<const SCEV *, 2> Qs;
- for (const SCEV *Op : M->operands())
- if (!isa<SCEVConstant>(Op))
- Qs.push_back(Op);
-
- Step = SE.getMulExpr(Qs);
- }
-
- Sizes.push_back(Step);
- return true;
- }
-
- for (const SCEV *&Term : Terms) {
- // Normalize the terms before the next call to findArrayDimensionsRec.
- const SCEV *Q, *R;
- SCEVDivision::divide(SE, Term, Step, &Q, &R);
-
- // Bail out when GCD does not evenly divide one of the terms.
- if (!R->isZero())
- return false;
-
- Term = Q;
- }
-
- // Remove all SCEVConstants.
- erase_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); });
-
- if (Terms.size() > 0)
- if (!findArrayDimensionsRec(SE, Terms, Sizes))
- return false;
-
- Sizes.push_back(Step);
- return true;
-}
-
-// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
-static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
- for (const SCEV *T : Terms)
- if (SCEVExprContains(T, [](const SCEV *S) { return isa<SCEVUnknown>(S); }))
- return true;
-
- return false;
-}
-
-// Return the number of product terms in S.
-static inline int numberOfTerms(const SCEV *S) {
- if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
- return Expr->getNumOperands();
- return 1;
-}
-
-static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
- if (isa<SCEVConstant>(T))
- return nullptr;
-
- if (isa<SCEVUnknown>(T))
- return T;
-
- if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
- SmallVector<const SCEV *, 2> Factors;
- for (const SCEV *Op : M->operands())
- if (!isa<SCEVConstant>(Op))
- Factors.push_back(Op);
-
- return SE.getMulExpr(Factors);
- }
-
- return T;
-}
-
/// Return the size of an element read or written by Inst.
const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
Type *Ty;
@@ -12394,248 +12451,6 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
return getSizeOfExpr(ETy, Ty);
}
-void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize) {
- if (Terms.size() < 1 || !ElementSize)
- return;
-
- // Early return when Terms do not contain parameters: we do not delinearize
- // non parametric SCEVs.
- if (!containsParameters(Terms))
- return;
-
- LLVM_DEBUG({
- dbgs() << "Terms:\n";
- for (const SCEV *T : Terms)
- dbgs() << *T << "\n";
- });
-
- // Remove duplicates.
- array_pod_sort(Terms.begin(), Terms.end());
- Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
-
- // Put larger terms first.
- llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) {
- return numberOfTerms(LHS) > numberOfTerms(RHS);
- });
-
- // Try to divide all terms by the element size. If term is not divisible by
- // element size, proceed with the original term.
- for (const SCEV *&Term : Terms) {
- const SCEV *Q, *R;
- SCEVDivision::divide(*this, Term, ElementSize, &Q, &R);
- if (!Q->isZero())
- Term = Q;
- }
-
- SmallVector<const SCEV *, 4> NewTerms;
-
- // Remove constant factors.
- for (const SCEV *T : Terms)
- if (const SCEV *NewT = removeConstantFactors(*this, T))
- NewTerms.push_back(NewT);
-
- LLVM_DEBUG({
- dbgs() << "Terms after sorting:\n";
- for (const SCEV *T : NewTerms)
- dbgs() << *T << "\n";
- });
-
- if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) {
- Sizes.clear();
- return;
- }
-
- // The last element to be pushed into Sizes is the size of an element.
- Sizes.push_back(ElementSize);
-
- LLVM_DEBUG({
- dbgs() << "Sizes:\n";
- for (const SCEV *S : Sizes)
- dbgs() << *S << "\n";
- });
-}
-
-void ScalarEvolution::computeAccessFunctions(
- const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes) {
- // Early exit in case this SCEV is not an affine multivariate function.
- if (Sizes.empty())
- return;
-
- if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
- if (!AR->isAffine())
- return;
-
- const SCEV *Res = Expr;
- int Last = Sizes.size() - 1;
- for (int i = Last; i >= 0; i--) {
- const SCEV *Q, *R;
- SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R);
-
- LLVM_DEBUG({
- dbgs() << "Res: " << *Res << "\n";
- dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
- dbgs() << "Res divided by Sizes[i]:\n";
- dbgs() << "Quotient: " << *Q << "\n";
- dbgs() << "Remainder: " << *R << "\n";
- });
-
- Res = Q;
-
- // Do not record the last subscript corresponding to the size of elements in
- // the array.
- if (i == Last) {
-
- // Bail out if the remainder is too complex.
- if (isa<SCEVAddRecExpr>(R)) {
- Subscripts.clear();
- Sizes.clear();
- return;
- }
-
- continue;
- }
-
- // Record the access function for the current subscript.
- Subscripts.push_back(R);
- }
-
- // Also push in last position the remainder of the last division: it will be
- // the access function of the innermost dimension.
- Subscripts.push_back(Res);
-
- std::reverse(Subscripts.begin(), Subscripts.end());
-
- LLVM_DEBUG({
- dbgs() << "Subscripts:\n";
- for (const SCEV *S : Subscripts)
- dbgs() << *S << "\n";
- });
-}
-
-/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
-/// sizes of an array access. Returns the remainder of the delinearization that
-/// is the offset start of the array. The SCEV->delinearize algorithm computes
-/// the multiples of SCEV coefficients: that is a pattern matching of sub
-/// expressions in the stride and base of a SCEV corresponding to the
-/// computation of a GCD (greatest common divisor) of base and stride. When
-/// SCEV->delinearize fails, it returns the SCEV unchanged.
-///
-/// For example: when analyzing the memory access A[i][j][k] in this loop nest
-///
-/// void foo(long n, long m, long o, double A[n][m][o]) {
-///
-/// for (long i = 0; i < n; i++)
-/// for (long j = 0; j < m; j++)
-/// for (long k = 0; k < o; k++)
-/// A[i][j][k] = 1.0;
-/// }
-///
-/// the delinearization input is the following AddRec SCEV:
-///
-/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
-///
-/// From this SCEV, we are able to say that the base offset of the access is %A
-/// because it appears as an offset that does not divide any of the strides in
-/// the loops:
-///
-/// CHECK: Base offset: %A
-///
-/// and then SCEV->delinearize determines the size of some of the dimensions of
-/// the array as these are the multiples by which the strides are happening:
-///
-/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes.
-///
-/// Note that the outermost dimension remains of UnknownSize because there are
-/// no strides that would help identifying the size of the last dimension: when
-/// the array has been statically allocated, one could compute the size of that
-/// dimension by dividing the overall size of the array by the size of the known
-/// dimensions: %m * %o * 8.
-///
-/// Finally delinearize provides the access functions for the array reference
-/// that does correspond to A[i][j][k] of the above C testcase:
-///
-/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>]
-///
-/// The testcases are checking the output of a function pass:
-/// DelinearizationPass that walks through all loads and stores of a function
-/// asking for the SCEV of the memory access with respect to all enclosing
-/// loops, calling SCEV->delinearize on that and printing the results.
-void ScalarEvolution::delinearize(const SCEV *Expr,
- SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes,
- const SCEV *ElementSize) {
- // First step: collect parametric terms.
- SmallVector<const SCEV *, 4> Terms;
- collectParametricTerms(Expr, Terms);
-
- if (Terms.empty())
- return;
-
- // Second step: find subscript sizes.
- findArrayDimensions(Terms, Sizes, ElementSize);
-
- if (Sizes.empty())
- return;
-
- // Third step: compute the access functions for each subscript.
- computeAccessFunctions(Expr, Subscripts, Sizes);
-
- if (Subscripts.empty())
- return;
-
- LLVM_DEBUG({
- dbgs() << "succeeded to delinearize " << *Expr << "\n";
- dbgs() << "ArrayDecl[UnknownSize]";
- for (const SCEV *S : Sizes)
- dbgs() << "[" << *S << "]";
-
- dbgs() << "\nArrayRef";
- for (const SCEV *S : Subscripts)
- dbgs() << "[" << *S << "]";
- dbgs() << "\n";
- });
-}
-
-bool ScalarEvolution::getIndexExpressionsFromGEP(
- const GetElementPtrInst *GEP, SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<int> &Sizes) {
- assert(Subscripts.empty() && Sizes.empty() &&
- "Expected output lists to be empty on entry to this function.");
- assert(GEP && "getIndexExpressionsFromGEP called with a null GEP");
- Type *Ty = nullptr;
- bool DroppedFirstDim = false;
- for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
- const SCEV *Expr = getSCEV(GEP->getOperand(i));
- if (i == 1) {
- Ty = GEP->getSourceElementType();
- if (auto *Const = dyn_cast<SCEVConstant>(Expr))
- if (Const->getValue()->isZero()) {
- DroppedFirstDim = true;
- continue;
- }
- Subscripts.push_back(Expr);
- continue;
- }
-
- auto *ArrayTy = dyn_cast<ArrayType>(Ty);
- if (!ArrayTy) {
- Subscripts.clear();
- Sizes.clear();
- return false;
- }
-
- Subscripts.push_back(Expr);
- if (!(DroppedFirstDim && i == 2))
- Sizes.push_back(ArrayTy->getNumElements());
-
- Ty = ArrayTy->getElementType();
- }
- return !Subscripts.empty();
-}
-
//===----------------------------------------------------------------------===//
// SCEVCallbackVH Class Implementation
//===----------------------------------------------------------------------===//
@@ -12722,6 +12537,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
LoopDispositions(std::move(Arg.LoopDispositions)),
LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
BlockDispositions(std::move(Arg.BlockDispositions)),
+ SCEVUsers(std::move(Arg.SCEVUsers)),
UnsignedRanges(std::move(Arg.UnsignedRanges)),
SignedRanges(std::move(Arg.SignedRanges)),
UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
@@ -12934,7 +12750,7 @@ ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
Values.emplace_back(L, LoopVariant);
LoopDisposition D = computeLoopDisposition(S, L);
auto &Values2 = LoopDispositions[S];
- for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ for (auto &V : llvm::reverse(Values2)) {
if (V.getPointer() == L) {
V.setInt(D);
break;
@@ -13042,7 +12858,7 @@ ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
Values.emplace_back(BB, DoesNotDominateBlock);
BlockDisposition D = computeBlockDisposition(S, BB);
auto &Values2 = BlockDispositions[S];
- for (auto &V : make_range(Values2.rbegin(), Values2.rend())) {
+ for (auto &V : llvm::reverse(Values2)) {
if (V.getPointer() == BB) {
V.setInt(D);
break;
@@ -13130,41 +12946,58 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
}
-void
-ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
- ValuesAtScopes.erase(S);
- LoopDispositions.erase(S);
- BlockDispositions.erase(S);
- UnsignedRanges.erase(S);
- SignedRanges.erase(S);
- ExprValueMap.erase(S);
- HasRecMap.erase(S);
- MinTrailingZerosCache.erase(S);
+void ScalarEvolution::forgetMemoizedResults(ArrayRef<const SCEV *> SCEVs) {
+ SmallPtrSet<const SCEV *, 8> ToForget(SCEVs.begin(), SCEVs.end());
+ SmallVector<const SCEV *, 8> Worklist(ToForget.begin(), ToForget.end());
+
+ while (!Worklist.empty()) {
+ const SCEV *Curr = Worklist.pop_back_val();
+ auto Users = SCEVUsers.find(Curr);
+ if (Users != SCEVUsers.end())
+ for (auto *User : Users->second)
+ if (ToForget.insert(User).second)
+ Worklist.push_back(User);
+ }
+
+ for (auto *S : ToForget)
+ forgetMemoizedResultsImpl(S);
for (auto I = PredicatedSCEVRewrites.begin();
I != PredicatedSCEVRewrites.end();) {
std::pair<const SCEV *, const Loop *> Entry = I->first;
- if (Entry.first == S)
+ if (ToForget.count(Entry.first))
PredicatedSCEVRewrites.erase(I++);
else
++I;
}
- auto RemoveSCEVFromBackedgeMap =
- [S](DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
+ auto RemoveSCEVFromBackedgeMap = [&ToForget](
+ DenseMap<const Loop *, BackedgeTakenInfo> &Map) {
for (auto I = Map.begin(), E = Map.end(); I != E;) {
BackedgeTakenInfo &BEInfo = I->second;
- if (BEInfo.hasOperand(S))
+ if (any_of(ToForget,
+ [&BEInfo](const SCEV *S) { return BEInfo.hasOperand(S); }))
Map.erase(I++);
else
++I;
}
- };
+ };
RemoveSCEVFromBackedgeMap(BackedgeTakenCounts);
RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts);
}
+void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
+ ValuesAtScopes.erase(S);
+ LoopDispositions.erase(S);
+ BlockDispositions.erase(S);
+ UnsignedRanges.erase(S);
+ SignedRanges.erase(S);
+ ExprValueMap.erase(S);
+ HasRecMap.erase(S);
+ MinTrailingZerosCache.erase(S);
+}
+
void
ScalarEvolution::getUsedLoops(const SCEV *S,
SmallPtrSetImpl<const Loop *> &LoopsUsed) {
@@ -13185,13 +13018,6 @@ ScalarEvolution::getUsedLoops(const SCEV *S,
SCEVTraversal<FindUsedLoops>(F).visitAll(S);
}
-void ScalarEvolution::addToLoopUseLists(const SCEV *S) {
- SmallPtrSet<const Loop *, 8> LoopsUsed;
- getUsedLoops(S, LoopsUsed);
- for (auto *L : LoopsUsed)
- LoopUsers[L].push_back(S);
-}
-
void ScalarEvolution::verify() const {
ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
ScalarEvolution SE2(F, TLI, AC, DT, LI);
@@ -13282,6 +13108,23 @@ void ScalarEvolution::verify() const {
assert(ValidLoops.contains(AR->getLoop()) &&
"AddRec references invalid loop");
}
+
+ // Verify intergity of SCEV users.
+ for (const auto &S : UniqueSCEVs) {
+ SmallVector<const SCEV *, 4> Ops;
+ collectUniqueOps(&S, Ops);
+ for (const auto *Op : Ops) {
+ // We do not store dependencies of constants.
+ if (isa<SCEVConstant>(Op))
+ continue;
+ auto It = SCEVUsers.find(Op);
+ if (It != SCEVUsers.end() && It->second.count(&S))
+ continue;
+ dbgs() << "Use of operand " << *Op << " by user " << S
+ << " is not being tracked!\n";
+ std::abort();
+ }
+ }
}
bool ScalarEvolution::invalidate(
@@ -13685,6 +13528,16 @@ PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE,
Loop &L)
: SE(SE), L(L) {}
+void ScalarEvolution::registerUser(const SCEV *User,
+ ArrayRef<const SCEV *> Ops) {
+ for (auto *Op : Ops)
+ // We do not expect that forgetting cached data for SCEVConstants will ever
+ // open any prospects for sharpening or introduce any correctness issues,
+ // so we don't bother storing their dependencies.
+ if (!isa<SCEVConstant>(Op))
+ SCEVUsers[Op].insert(User);
+}
+
const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
const SCEV *Expr = SE.getSCEV(V);
RewriteEntry &Entry = RewriteMap[Expr];
@@ -13897,52 +13750,51 @@ ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) {
return getUMinFromMismatchedTypes(ExitCounts);
}
-/// This rewriter is similar to SCEVParameterRewriter (it replaces SCEVUnknown
-/// components following the Map (Value -> SCEV)), but skips AddRecExpr because
-/// we cannot guarantee that the replacement is loop invariant in the loop of
-/// the AddRec.
+/// A rewriter to replace SCEV expressions in Map with the corresponding entry
+/// in the map. It skips AddRecExpr because we cannot guarantee that the
+/// replacement is loop invariant in the loop of the AddRec.
+///
+/// At the moment only rewriting SCEVUnknown and SCEVZeroExtendExpr is
+/// supported.
class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
- ValueToSCEVMapTy &Map;
+ const DenseMap<const SCEV *, const SCEV *> &Map;
public:
- SCEVLoopGuardRewriter(ScalarEvolution &SE, ValueToSCEVMapTy &M)
+ SCEVLoopGuardRewriter(ScalarEvolution &SE,
+ DenseMap<const SCEV *, const SCEV *> &M)
: SCEVRewriteVisitor(SE), Map(M) {}
const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { return Expr; }
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- auto I = Map.find(Expr->getValue());
+ auto I = Map.find(Expr);
if (I == Map.end())
return Expr;
return I->second;
}
+
+ const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+ auto I = Map.find(Expr);
+ if (I == Map.end())
+ return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitZeroExtendExpr(
+ Expr);
+ return I->second;
+ }
};
const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
+ SmallVector<const SCEV *> ExprsToRewrite;
auto CollectCondition = [&](ICmpInst::Predicate Predicate, const SCEV *LHS,
- const SCEV *RHS, ValueToSCEVMapTy &RewriteMap) {
- // If we have LHS == 0, check if LHS is computing a property of some unknown
- // SCEV %v which we can rewrite %v to express explicitly.
- const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
- if (Predicate == CmpInst::ICMP_EQ && RHSC &&
- RHSC->getValue()->isNullValue()) {
- // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
- // explicitly express that.
- const SCEV *URemLHS = nullptr;
- const SCEV *URemRHS = nullptr;
- if (matchURem(LHS, URemLHS, URemRHS)) {
- if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
- Value *V = LHSUnknown->getValue();
- auto Multiple =
- getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS,
- (SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNSW));
- RewriteMap[V] = Multiple;
- return;
- }
- }
- }
-
- if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
+ const SCEV *RHS,
+ DenseMap<const SCEV *, const SCEV *>
+ &RewriteMap) {
+ // WARNING: It is generally unsound to apply any wrap flags to the proposed
+ // replacement SCEV which isn't directly implied by the structure of that
+ // SCEV. In particular, using contextual facts to imply flags is *NOT*
+ // legal. See the scoping rules for flags in the header to understand why.
+
+ // If LHS is a constant, apply information to the other expression.
+ if (isa<SCEVConstant>(LHS)) {
std::swap(LHS, RHS);
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
@@ -13950,7 +13802,8 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
// Check for a condition of the form (-C1 + X < C2). InstCombine will
// create this form when combining two checks of the form (X u< C2 + C1) and
// (X >=u C1).
- auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap]() {
+ auto MatchRangeCheckIdiom = [this, Predicate, LHS, RHS, &RewriteMap,
+ &ExprsToRewrite]() {
auto *AddExpr = dyn_cast<SCEVAddExpr>(LHS);
if (!AddExpr || AddExpr->getNumOperands() != 2)
return false;
@@ -13968,26 +13821,55 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
// Bail out, unless we have a non-wrapping, monotonic range.
if (ExactRegion.isWrappedSet() || ExactRegion.isFullSet())
return false;
- auto I = RewriteMap.find(LHSUnknown->getValue());
- const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
- RewriteMap[LHSUnknown->getValue()] = getUMaxExpr(
+ auto I = RewriteMap.find(LHSUnknown);
+ const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHSUnknown;
+ RewriteMap[LHSUnknown] = getUMaxExpr(
getConstant(ExactRegion.getUnsignedMin()),
getUMinExpr(RewrittenLHS, getConstant(ExactRegion.getUnsignedMax())));
+ ExprsToRewrite.push_back(LHSUnknown);
return true;
};
if (MatchRangeCheckIdiom())
return;
- // For now, limit to conditions that provide information about unknown
- // expressions. RHS also cannot contain add recurrences.
- auto *LHSUnknown = dyn_cast<SCEVUnknown>(LHS);
- if (!LHSUnknown || containsAddRecurrence(RHS))
+ // If we have LHS == 0, check if LHS is computing a property of some unknown
+ // SCEV %v which we can rewrite %v to express explicitly.
+ const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
+ if (Predicate == CmpInst::ICMP_EQ && RHSC &&
+ RHSC->getValue()->isNullValue()) {
+ // If LHS is A % B, i.e. A % B == 0, rewrite A to (A /u B) * B to
+ // explicitly express that.
+ const SCEV *URemLHS = nullptr;
+ const SCEV *URemRHS = nullptr;
+ if (matchURem(LHS, URemLHS, URemRHS)) {
+ if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
+ auto Multiple = getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS);
+ RewriteMap[LHSUnknown] = Multiple;
+ ExprsToRewrite.push_back(LHSUnknown);
+ return;
+ }
+ }
+ }
+
+ // Do not apply information for constants or if RHS contains an AddRec.
+ if (isa<SCEVConstant>(LHS) || containsAddRecurrence(RHS))
+ return;
+
+ // If RHS is SCEVUnknown, make sure the information is applied to it.
+ if (!isa<SCEVUnknown>(LHS) && isa<SCEVUnknown>(RHS)) {
+ std::swap(LHS, RHS);
+ Predicate = CmpInst::getSwappedPredicate(Predicate);
+ }
+
+ // Limit to expressions that can be rewritten.
+ if (!isa<SCEVUnknown>(LHS) && !isa<SCEVZeroExtendExpr>(LHS))
return;
// Check whether LHS has already been rewritten. In that case we want to
// chain further rewrites onto the already rewritten value.
- auto I = RewriteMap.find(LHSUnknown->getValue());
+ auto I = RewriteMap.find(LHS);
const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
+
const SCEV *RewrittenRHS = nullptr;
switch (Predicate) {
case CmpInst::ICMP_ULT:
@@ -14031,14 +13913,17 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
break;
}
- if (RewrittenRHS)
- RewriteMap[LHSUnknown->getValue()] = RewrittenRHS;
+ if (RewrittenRHS) {
+ RewriteMap[LHS] = RewrittenRHS;
+ if (LHS == RewrittenLHS)
+ ExprsToRewrite.push_back(LHS);
+ }
};
// Starting at the loop predecessor, climb up the predecessor chain, as long
// as there are predecessors that can be found that have unique successors
// leading to the original header.
// TODO: share this logic with isLoopEntryGuardedByCond.
- ValueToSCEVMapTy RewriteMap;
+ DenseMap<const SCEV *, const SCEV *> RewriteMap;
for (std::pair<const BasicBlock *, const BasicBlock *> Pair(
L->getLoopPredecessor(), L->getHeader());
Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
@@ -14088,6 +13973,19 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
if (RewriteMap.empty())
return Expr;
+
+ // Now that all rewrite information is collect, rewrite the collected
+ // expressions with the information in the map. This applies information to
+ // sub-expressions.
+ if (ExprsToRewrite.size() > 1) {
+ for (const SCEV *Expr : ExprsToRewrite) {
+ const SCEV *RewriteTo = RewriteMap[Expr];
+ RewriteMap.erase(Expr);
+ SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
+ RewriteMap.insert({Expr, Rewriter.visit(RewriteTo)});
+ }
+ }
+
SCEVLoopGuardRewriter Rewriter(*this, RewriteMap);
return Rewriter.visit(Expr);
}
diff --git a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 2262fc9d7913..f4fa159d1ec7 100644
--- a/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -23,6 +23,15 @@
#include "llvm/InitializePasses.h"
using namespace llvm;
+static bool canComputePointerDiff(ScalarEvolution &SE,
+ const SCEV *A, const SCEV *B) {
+ if (SE.getEffectiveSCEVType(A->getType()) !=
+ SE.getEffectiveSCEVType(B->getType()))
+ return false;
+
+ return SE.instructionCouldExistWitthOperands(A, B);
+}
+
AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB, AAQueryInfo &AAQI) {
// If either of the memory references is empty, it doesn't matter what the
@@ -41,8 +50,7 @@ AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
// If something is known about the difference between the two addresses,
// see if it's enough to prove a NoAlias.
- if (SE.getEffectiveSCEVType(AS->getType()) ==
- SE.getEffectiveSCEVType(BS->getType())) {
+ if (canComputePointerDiff(SE, AS, BS)) {
unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
APInt ASizeInt(BitWidth, LocA.Size.hasValue()
? LocA.Size.getValue()
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index ab5f2db7d1cd..9056cc01484d 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -257,14 +257,12 @@ void StackLifetime::calculateLiveIntervals() {
unsigned AllocaNo = It.second.AllocaNo;
if (IsStart) {
- assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart);
if (!Started.test(AllocaNo)) {
Started.set(AllocaNo);
Ended.reset(AllocaNo);
Start[AllocaNo] = InstNo;
}
} else {
- assert(!Ended.test(AllocaNo));
if (Started.test(AllocaNo)) {
LiveRanges[AllocaNo].addRange(Start[AllocaNo], InstNo);
Started.reset(AllocaNo);
@@ -400,3 +398,19 @@ PreservedAnalyses StackLifetimePrinterPass::run(Function &F,
SL.print(OS);
return PreservedAnalyses::all();
}
+
+void StackLifetimePrinterPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<StackLifetimePrinterPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ switch (Type) {
+ case StackLifetime::LivenessType::May:
+ OS << "may";
+ break;
+ case StackLifetime::LivenessType::Must:
+ OS << "must";
+ break;
+ }
+ OS << ">";
+}
diff --git a/llvm/lib/Analysis/StackSafetyAnalysis.cpp b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
index 76f195fedf31..74cc39b7f2c0 100644
--- a/llvm/lib/Analysis/StackSafetyAnalysis.cpp
+++ b/llvm/lib/Analysis/StackSafetyAnalysis.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
+#include <tuple>
using namespace llvm;
@@ -116,6 +117,7 @@ template <typename CalleeTy> struct UseInfo {
// Access range if the address (alloca or parameters).
// It is allowed to be empty-set when there are no known accesses.
ConstantRange Range;
+ std::map<const Instruction *, ConstantRange> Accesses;
// List of calls which pass address as an argument.
// Value is offset range of address from base address (alloca or calling
@@ -129,6 +131,12 @@ template <typename CalleeTy> struct UseInfo {
UseInfo(unsigned PointerSize) : Range{PointerSize, false} {}
void updateRange(const ConstantRange &R) { Range = unionNoWrap(Range, R); }
+ void addRange(const Instruction *I, const ConstantRange &R) {
+ auto Ins = Accesses.emplace(I, R);
+ if (!Ins.second)
+ Ins.first->second = unionNoWrap(Ins.first->second, R);
+ updateRange(R);
+ }
};
template <typename CalleeTy>
@@ -146,7 +154,7 @@ raw_ostream &operator<<(raw_ostream &OS, const UseInfo<CalleeTy> &U) {
ConstantRange getStaticAllocaSizeRange(const AllocaInst &AI) {
const DataLayout &DL = AI.getModule()->getDataLayout();
TypeSize TS = DL.getTypeAllocSize(AI.getAllocatedType());
- unsigned PointerSize = DL.getMaxPointerSizeInBits();
+ unsigned PointerSize = DL.getPointerTypeSizeInBits(AI.getType());
// Fallback to empty range for alloca size.
ConstantRange R = ConstantRange::getEmpty(PointerSize);
if (TS.isScalable())
@@ -167,7 +175,7 @@ ConstantRange getStaticAllocaSizeRange(const AllocaInst &AI) {
if (Overflow)
return R;
}
- R = ConstantRange(APInt::getNullValue(PointerSize), APSize);
+ R = ConstantRange(APInt::getZero(PointerSize), APSize);
assert(!isUnsafe(R));
return R;
}
@@ -208,7 +216,6 @@ template <typename CalleeTy> struct FunctionInfo {
} else {
assert(Allocas.empty());
}
- O << "\n";
}
};
@@ -223,6 +230,7 @@ struct StackSafetyInfo::InfoTy {
struct StackSafetyGlobalInfo::InfoTy {
GVToSSI Info;
SmallPtrSet<const AllocaInst *, 8> SafeAllocas;
+ std::map<const Instruction *, bool> AccessIsUnsafe;
};
namespace {
@@ -242,7 +250,7 @@ class StackSafetyLocalAnalysis {
ConstantRange getMemIntrinsicAccessRange(const MemIntrinsic *MI, const Use &U,
Value *Base);
- bool analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS,
+ void analyzeAllUses(Value *Ptr, UseInfo<GlobalValue> &AS,
const StackLifetime &SL);
public:
@@ -297,8 +305,8 @@ ConstantRange StackSafetyLocalAnalysis::getAccessRange(Value *Addr, Value *Base,
APInt APSize(PointerSize, Size.getFixedSize(), true);
if (APSize.isNegative())
return UnknownRange;
- return getAccessRange(
- Addr, Base, ConstantRange(APInt::getNullValue(PointerSize), APSize));
+ return getAccessRange(Addr, Base,
+ ConstantRange(APInt::getZero(PointerSize), APSize));
}
ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
@@ -321,14 +329,13 @@ ConstantRange StackSafetyLocalAnalysis::getMemIntrinsicAccessRange(
if (Sizes.getUpper().isNegative() || isUnsafe(Sizes))
return UnknownRange;
Sizes = Sizes.sextOrTrunc(PointerSize);
- ConstantRange SizeRange(APInt::getNullValue(PointerSize),
- Sizes.getUpper() - 1);
+ ConstantRange SizeRange(APInt::getZero(PointerSize), Sizes.getUpper() - 1);
return getAccessRange(U, Base, SizeRange);
}
/// The function analyzes all local uses of Ptr (alloca or argument) and
/// calculates local access range and all function calls where it was used.
-bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
+void StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
UseInfo<GlobalValue> &US,
const StackLifetime &SL) {
SmallPtrSet<const Value *, 16> Visited;
@@ -349,11 +356,11 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
switch (I->getOpcode()) {
case Instruction::Load: {
if (AI && !SL.isAliveAfter(AI, I)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
- US.updateRange(
- getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
+ US.addRange(I,
+ getAccessRange(UI, Ptr, DL.getTypeStoreSize(I->getType())));
break;
}
@@ -363,15 +370,16 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
case Instruction::Store: {
if (V == I->getOperand(0)) {
// Stored the pointer - conservatively assume it may be unsafe.
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
if (AI && !SL.isAliveAfter(AI, I)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
- US.updateRange(getAccessRange(
- UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
+ US.addRange(
+ I, getAccessRange(
+ UI, Ptr, DL.getTypeStoreSize(I->getOperand(0)->getType())));
break;
}
@@ -379,8 +387,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
// Information leak.
// FIXME: Process parameters correctly. This is a leak only if we return
// alloca.
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
case Instruction::Call:
case Instruction::Invoke: {
@@ -388,25 +396,31 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
break;
if (AI && !SL.isAliveAfter(AI, I)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
- US.updateRange(getMemIntrinsicAccessRange(MI, UI, Ptr));
+ US.addRange(I, getMemIntrinsicAccessRange(MI, UI, Ptr));
break;
}
const auto &CB = cast<CallBase>(*I);
+ if (CB.getReturnedArgOperand() == V) {
+ if (Visited.insert(I).second)
+ WorkList.push_back(cast<const Instruction>(I));
+ }
+
if (!CB.isArgOperand(&UI)) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
unsigned ArgNo = CB.getArgOperandNo(&UI);
if (CB.isByValArgument(ArgNo)) {
- US.updateRange(getAccessRange(
- UI, Ptr, DL.getTypeStoreSize(CB.getParamByValType(ArgNo))));
+ US.addRange(I, getAccessRange(
+ UI, Ptr,
+ DL.getTypeStoreSize(CB.getParamByValType(ArgNo))));
break;
}
@@ -416,8 +430,8 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
const GlobalValue *Callee =
dyn_cast<GlobalValue>(CB.getCalledOperand()->stripPointerCasts());
if (!Callee) {
- US.updateRange(UnknownRange);
- return false;
+ US.addRange(I, UnknownRange);
+ break;
}
assert(isa<Function>(Callee) || isa<GlobalAlias>(Callee));
@@ -435,8 +449,6 @@ bool StackSafetyLocalAnalysis::analyzeAllUses(Value *Ptr,
}
}
}
-
- return true;
}
FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() {
@@ -468,7 +480,7 @@ FunctionInfo<GlobalValue> StackSafetyLocalAnalysis::run() {
}
LLVM_DEBUG(Info.print(dbgs(), F.getName(), &F));
- LLVM_DEBUG(dbgs() << "[StackSafety] done\n");
+ LLVM_DEBUG(dbgs() << "\n[StackSafety] done\n");
return Info;
}
@@ -588,8 +600,7 @@ void StackSafetyDataFlowAnalysis<CalleeTy>::runDataFlow() {
updateAllNodes();
while (!WorkList.empty()) {
- const CalleeTy *Callee = WorkList.back();
- WorkList.pop_back();
+ const CalleeTy *Callee = WorkList.pop_back_val();
updateOneNode(Callee);
}
}
@@ -674,7 +685,7 @@ const Function *findCalleeInModule(const GlobalValue *GV) {
const GlobalAlias *A = dyn_cast<GlobalAlias>(GV);
if (!A)
return nullptr;
- GV = A->getBaseObject();
+ GV = A->getAliaseeObject();
if (GV == A)
return nullptr;
}
@@ -741,10 +752,8 @@ GVToSSI createGlobalStackSafetyInfo(
KV.second.Calls.clear();
}
- uint32_t PointerSize = Copy.begin()
- ->first->getParent()
- ->getDataLayout()
- .getMaxPointerSizeInBits();
+ uint32_t PointerSize =
+ Copy.begin()->first->getParent()->getDataLayout().getPointerSizeInBits();
StackSafetyDataFlowAnalysis<GlobalValue> SSDFA(PointerSize, std::move(Copy));
for (auto &F : SSDFA.run()) {
@@ -794,6 +803,7 @@ const StackSafetyInfo::InfoTy &StackSafetyInfo::getInfo() const {
void StackSafetyInfo::print(raw_ostream &O) const {
getInfo().Info.print(O, F->getName(), dyn_cast<Function>(F));
+ O << "\n";
}
const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const {
@@ -806,17 +816,22 @@ const StackSafetyGlobalInfo::InfoTy &StackSafetyGlobalInfo::getInfo() const {
}
}
Info.reset(new InfoTy{
- createGlobalStackSafetyInfo(std::move(Functions), Index), {}});
+ createGlobalStackSafetyInfo(std::move(Functions), Index), {}, {}});
+
for (auto &FnKV : Info->Info) {
for (auto &KV : FnKV.second.Allocas) {
++NumAllocaTotal;
const AllocaInst *AI = KV.first;
- if (getStaticAllocaSizeRange(*AI).contains(KV.second.Range)) {
+ auto AIRange = getStaticAllocaSizeRange(*AI);
+ if (AIRange.contains(KV.second.Range)) {
Info->SafeAllocas.insert(AI);
++NumAllocaStackSafe;
}
+ for (const auto &A : KV.second.Accesses)
+ Info->AccessIsUnsafe[A.first] |= !AIRange.contains(A.second);
}
}
+
if (StackSafetyPrint)
print(errs());
}
@@ -886,6 +901,15 @@ bool StackSafetyGlobalInfo::isSafe(const AllocaInst &AI) const {
return Info.SafeAllocas.count(&AI);
}
+bool StackSafetyGlobalInfo::stackAccessIsSafe(const Instruction &I) const {
+ const auto &Info = getInfo();
+ auto It = Info.AccessIsUnsafe.find(&I);
+ if (It == Info.AccessIsUnsafe.end()) {
+ return true;
+ }
+ return !It->second;
+}
+
void StackSafetyGlobalInfo::print(raw_ostream &O) const {
auto &SSI = getInfo().Info;
if (SSI.empty())
@@ -894,6 +918,16 @@ void StackSafetyGlobalInfo::print(raw_ostream &O) const {
for (auto &F : M.functions()) {
if (!F.isDeclaration()) {
SSI.find(&F)->second.print(O, F.getName(), &F);
+ O << " safe accesses:"
+ << "\n";
+ for (const auto &I : instructions(F)) {
+ const CallInst *Call = dyn_cast<CallInst>(&I);
+ if ((isa<StoreInst>(I) || isa<LoadInst>(I) || isa<MemIntrinsic>(I) ||
+ (Call && Call->hasByValArgument())) &&
+ stackAccessIsSafe(I)) {
+ O << " " << I << "\n";
+ }
+ }
O << "\n";
}
}
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
index e93dc303ae63..3d10479c4544 100644
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -1,9 +1,8 @@
//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -262,29 +261,58 @@ private:
class LoggerDataImpl {
const std::vector<LoggedFeatureSpec> LoggedFeatureSpecs;
const TensorSpec RewardSpec;
+ const bool IncludeReward;
+
+ std::vector<tensorflow::FeatureList> FeatureLists;
+ tensorflow::FeatureList Reward;
+
+ bool isSelfConsistent(const tensorflow::SequenceExample &SE,
+ size_t NrRecords) const {
+ bool Ret = true;
+ for (const auto &TSpecs : LoggedFeatureSpecs) {
+ const auto &Name = TSpecs.getLoggingName();
+ const auto &FL = SE.feature_lists().feature_list().at(Name).feature();
+ if (NrRecords != static_cast<size_t>(FL.size())) {
+ dbgs() << "[TF-UTILS]: " << Name << " has missing records. Expected "
+ << NrRecords << " got " << FL.size() << "\n";
+ Ret = false;
+ }
+ }
+ if (IncludeReward && static_cast<size_t>(SE.feature_lists()
+ .feature_list()
+ .at(RewardSpec.name())
+ .feature()
+ .size()) != NrRecords) {
+ dbgs() << "[TF-UTILS]: reward is missing records.\n";
+ Ret = false;
+ }
+ return Ret;
+ }
- tensorflow::SequenceExample SE;
- std::vector<tensorflow::FeatureList *> FeatureLists;
- tensorflow::FeatureList *Reward = nullptr;
-
-public:
- LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
- : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec) {
+ void transferLog(tensorflow::SequenceExample &SE) {
auto *FL = SE.mutable_feature_lists()->mutable_feature_list();
if (IncludeReward)
- Reward = &(*FL)[RewardSpec.name()];
- // Allocate first the map entries, then capture their address. We will not
- // mutate the set of features after this (i.e. the pointers won't dangle).
- for (const auto &LFS : LoggedSpecs) {
- (*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()] = {};
+ (*FL)[RewardSpec.name()] = std::move(Reward);
+ assert(FeatureLists.size() == LoggedFeatureSpecs.size());
+ for (size_t I = 0; I < FeatureLists.size(); ++I) {
+ const auto &LFS = LoggedFeatureSpecs[I];
+ (*FL)[LFS.getLoggingName()] = std::move(FeatureLists[I]);
}
- for (const auto &LFS : LoggedSpecs)
- FeatureLists.push_back(
- &(*FL)[LFS.LoggingName ? *LFS.LoggingName : LFS.Spec.name()]);
}
- void print(raw_ostream &OS) {
+public:
+ LoggerDataImpl(const std::vector<LoggedFeatureSpec> &LoggedSpecs,
+ const TensorSpec &RewardSpec, bool IncludeReward)
+ : LoggedFeatureSpecs(LoggedSpecs), RewardSpec(RewardSpec),
+ IncludeReward(IncludeReward), FeatureLists(LoggedFeatureSpecs.size()) {}
+
+ // flush the logged info to a stream and clear the log contents.
+ void flush(raw_ostream &OS) {
+ size_t NrRecords = getNrRecords();
+ (void)NrRecords;
+ tensorflow::SequenceExample SE;
+ transferLog(SE);
+ assert(isSelfConsistent(SE, NrRecords));
std::string OutStr;
if (ProtobufTextMode)
google::protobuf::TextFormat::PrintToString(SE, &OutStr);
@@ -298,14 +326,14 @@ public:
const auto &Spec = LoggedFeatureSpecs[FeatureID].Spec;
if (Spec.isElementType<float>()) {
auto *RF = FeatureLists[FeatureID]
- ->add_feature()
+ .add_feature()
->mutable_float_list()
->mutable_value();
RF->Resize(Spec.getElementCount(), 0.0);
return reinterpret_cast<char *>(RF->mutable_data());
} else if (Spec.isElementType<int32_t>() || Spec.isElementType<int64_t>()) {
auto *RF = FeatureLists[FeatureID]
- ->add_feature()
+ .add_feature()
->mutable_int64_list()
->mutable_value();
RF->Resize(Spec.getElementCount(), 0);
@@ -315,17 +343,18 @@ public:
}
template <typename T> void logReward(T Value) {
+ assert(IncludeReward);
if (RewardSpec.isElementType<float>())
- Reward->add_feature()->mutable_float_list()->add_value(Value);
+ Reward.add_feature()->mutable_float_list()->add_value(Value);
else if (RewardSpec.isElementType<int32_t>() ||
RewardSpec.isElementType<int64_t>())
- Reward->add_feature()->mutable_int64_list()->add_value(Value);
+ Reward.add_feature()->mutable_int64_list()->add_value(Value);
else
llvm_unreachable("Unsupported tensor type.");
}
size_t getNrRecords() const {
- return FeatureLists.empty() ? 0 : FeatureLists[0]->feature().size();
+ return FeatureLists.empty() ? 0 : FeatureLists[0].feature().size();
}
};
} // namespace llvm
@@ -538,5 +567,5 @@ char *Logger::addEntryAndGetFloatOrInt64Buffer(size_t FeatureID) {
return reinterpret_cast<char *>(LoggerData->addNewTensor(FeatureID));
}
-void Logger::print(raw_ostream &OS) { LoggerData->print(OS); }
+void Logger::flush(raw_ostream &OS) { LoggerData->flush(OS); }
#endif // defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 4a8818f2e2a8..7326ba74c071 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -123,6 +123,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// Set IO unlocked variants as unavailable
// Set them as available per system below
+ TLI.setUnavailable(LibFunc_getc_unlocked);
TLI.setUnavailable(LibFunc_getchar_unlocked);
TLI.setUnavailable(LibFunc_putc_unlocked);
TLI.setUnavailable(LibFunc_putchar_unlocked);
@@ -156,15 +157,10 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
// isn't true for a target those defaults should be overridden below.
TLI.setIntSize(T.isArch16Bit() ? 16 : 32);
- if (T.isAMDGPU())
- TLI.disableAllFunctions();
-
- // There are no library implementations of memcpy and memset for AMD gpus and
- // these can be difficult to lower in the backend.
+ // There is really no runtime library on AMDGPU, apart from
+ // __kmpc_alloc/free_shared.
if (T.isAMDGPU()) {
- TLI.setUnavailable(LibFunc_memcpy);
- TLI.setUnavailable(LibFunc_memset);
- TLI.setUnavailable(LibFunc_memset_pattern16);
+ TLI.disableAllFunctions();
TLI.setAvailable(llvm::LibFunc___kmpc_alloc_shared);
TLI.setAvailable(llvm::LibFunc___kmpc_free_shared);
return;
@@ -418,6 +414,65 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_utimes);
}
+ // Pick just one set of new/delete variants.
+ if (T.isOSMSVCRT()) {
+ // MSVC, doesn't have the Itanium new/delete.
+ TLI.setUnavailable(LibFunc_ZdaPv);
+ TLI.setUnavailable(LibFunc_ZdaPvRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdaPvSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdaPvj);
+ TLI.setUnavailable(LibFunc_ZdaPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdaPvm);
+ TLI.setUnavailable(LibFunc_ZdaPvmSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPv);
+ TLI.setUnavailable(LibFunc_ZdlPvRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdlPvSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZdlPvj);
+ TLI.setUnavailable(LibFunc_ZdlPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPvm);
+ TLI.setUnavailable(LibFunc_ZdlPvmSt11align_val_t);
+ TLI.setUnavailable(LibFunc_Znaj);
+ TLI.setUnavailable(LibFunc_ZnajRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znam);
+ TLI.setUnavailable(LibFunc_ZnamRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnamSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnamSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znwj);
+ TLI.setUnavailable(LibFunc_ZnwjRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znwm);
+ TLI.setUnavailable(LibFunc_ZnwmRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnwmSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t);
+ } else {
+ // Not MSVC, assume it's Itanium.
+ TLI.setUnavailable(LibFunc_msvc_new_int);
+ TLI.setUnavailable(LibFunc_msvc_new_int_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_new_longlong);
+ TLI.setUnavailable(LibFunc_msvc_new_longlong_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr32);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr32_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr32_int);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr64);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr64_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_ptr64_longlong);
+ TLI.setUnavailable(LibFunc_msvc_new_array_int);
+ TLI.setUnavailable(LibFunc_msvc_new_array_int_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_new_array_longlong);
+ TLI.setUnavailable(LibFunc_msvc_new_array_longlong_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr32);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr32_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr32_int);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr64);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr64_nothrow);
+ TLI.setUnavailable(LibFunc_msvc_delete_array_ptr64_longlong);
+ }
+
switch (T.getOS()) {
case Triple::MacOSX:
// exp10 and exp10f are not available on OS X until 10.9 and iOS until 7.0
@@ -572,6 +627,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_sinh_finite);
TLI.setUnavailable(LibFunc_sinhf_finite);
TLI.setUnavailable(LibFunc_sinhl_finite);
+ TLI.setUnavailable(LibFunc_sqrt_finite);
+ TLI.setUnavailable(LibFunc_sqrtf_finite);
+ TLI.setUnavailable(LibFunc_sqrtl_finite);
}
if ((T.isOSLinux() && T.isGNUEnvironment()) ||
@@ -589,6 +647,140 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setAvailable(LibFunc_fgets_unlocked);
}
+ if (T.isAndroid() && T.isAndroidVersionLT(21)) {
+ TLI.setUnavailable(LibFunc_stpcpy);
+ TLI.setUnavailable(LibFunc_stpncpy);
+ }
+
+ if (T.isPS4()) {
+ // PS4 does have memalign.
+ TLI.setAvailable(LibFunc_memalign);
+
+ // PS4 does not have new/delete with "unsigned int" size parameter;
+ // it only has the "unsigned long" versions.
+ TLI.setUnavailable(LibFunc_ZdaPvj);
+ TLI.setUnavailable(LibFunc_ZdaPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZdlPvj);
+ TLI.setUnavailable(LibFunc_ZdlPvjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_Znaj);
+ TLI.setUnavailable(LibFunc_ZnajRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnajSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znwj);
+ TLI.setUnavailable(LibFunc_ZnwjRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_t);
+ TLI.setUnavailable(LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t);
+
+ // None of the *_chk functions.
+ TLI.setUnavailable(LibFunc_memccpy_chk);
+ TLI.setUnavailable(LibFunc_memcpy_chk);
+ TLI.setUnavailable(LibFunc_memmove_chk);
+ TLI.setUnavailable(LibFunc_mempcpy_chk);
+ TLI.setUnavailable(LibFunc_memset_chk);
+ TLI.setUnavailable(LibFunc_snprintf_chk);
+ TLI.setUnavailable(LibFunc_sprintf_chk);
+ TLI.setUnavailable(LibFunc_stpcpy_chk);
+ TLI.setUnavailable(LibFunc_stpncpy_chk);
+ TLI.setUnavailable(LibFunc_strcat_chk);
+ TLI.setUnavailable(LibFunc_strcpy_chk);
+ TLI.setUnavailable(LibFunc_strlcat_chk);
+ TLI.setUnavailable(LibFunc_strlcat_chk);
+ TLI.setUnavailable(LibFunc_strlcpy_chk);
+ TLI.setUnavailable(LibFunc_strlen_chk);
+ TLI.setUnavailable(LibFunc_strncat_chk);
+ TLI.setUnavailable(LibFunc_strncpy_chk);
+ TLI.setUnavailable(LibFunc_vsnprintf_chk);
+ TLI.setUnavailable(LibFunc_vsprintf_chk);
+
+ // Various Posix system functions.
+ TLI.setUnavailable(LibFunc_access);
+ TLI.setUnavailable(LibFunc_chmod);
+ TLI.setUnavailable(LibFunc_chown);
+ TLI.setUnavailable(LibFunc_closedir);
+ TLI.setUnavailable(LibFunc_ctermid);
+ TLI.setUnavailable(LibFunc_execl);
+ TLI.setUnavailable(LibFunc_execle);
+ TLI.setUnavailable(LibFunc_execlp);
+ TLI.setUnavailable(LibFunc_execv);
+ TLI.setUnavailable(LibFunc_execvP);
+ TLI.setUnavailable(LibFunc_execve);
+ TLI.setUnavailable(LibFunc_execvp);
+ TLI.setUnavailable(LibFunc_execvpe);
+ TLI.setUnavailable(LibFunc_fork);
+ TLI.setUnavailable(LibFunc_fstat);
+ TLI.setUnavailable(LibFunc_fstatvfs);
+ TLI.setUnavailable(LibFunc_getenv);
+ TLI.setUnavailable(LibFunc_getitimer);
+ TLI.setUnavailable(LibFunc_getlogin_r);
+ TLI.setUnavailable(LibFunc_getpwnam);
+ TLI.setUnavailable(LibFunc_gettimeofday);
+ TLI.setUnavailable(LibFunc_lchown);
+ TLI.setUnavailable(LibFunc_lstat);
+ TLI.setUnavailable(LibFunc_mkdir);
+ TLI.setUnavailable(LibFunc_open);
+ TLI.setUnavailable(LibFunc_opendir);
+ TLI.setUnavailable(LibFunc_pclose);
+ TLI.setUnavailable(LibFunc_popen);
+ TLI.setUnavailable(LibFunc_pread);
+ TLI.setUnavailable(LibFunc_pwrite);
+ TLI.setUnavailable(LibFunc_read);
+ TLI.setUnavailable(LibFunc_readlink);
+ TLI.setUnavailable(LibFunc_realpath);
+ TLI.setUnavailable(LibFunc_rename);
+ TLI.setUnavailable(LibFunc_rmdir);
+ TLI.setUnavailable(LibFunc_setitimer);
+ TLI.setUnavailable(LibFunc_stat);
+ TLI.setUnavailable(LibFunc_statvfs);
+ TLI.setUnavailable(LibFunc_system);
+ TLI.setUnavailable(LibFunc_times);
+ TLI.setUnavailable(LibFunc_tmpfile);
+ TLI.setUnavailable(LibFunc_unlink);
+ TLI.setUnavailable(LibFunc_uname);
+ TLI.setUnavailable(LibFunc_unsetenv);
+ TLI.setUnavailable(LibFunc_utime);
+ TLI.setUnavailable(LibFunc_utimes);
+ TLI.setUnavailable(LibFunc_valloc);
+ TLI.setUnavailable(LibFunc_write);
+
+ // Miscellaneous other functions not provided.
+ TLI.setUnavailable(LibFunc_atomic_load);
+ TLI.setUnavailable(LibFunc_atomic_store);
+ TLI.setUnavailable(LibFunc___kmpc_alloc_shared);
+ TLI.setUnavailable(LibFunc___kmpc_free_shared);
+ TLI.setUnavailable(LibFunc_dunder_strndup);
+ TLI.setUnavailable(LibFunc_bcmp);
+ TLI.setUnavailable(LibFunc_bcopy);
+ TLI.setUnavailable(LibFunc_bzero);
+ TLI.setUnavailable(LibFunc_cabs);
+ TLI.setUnavailable(LibFunc_cabsf);
+ TLI.setUnavailable(LibFunc_cabsl);
+ TLI.setUnavailable(LibFunc_ffs);
+ TLI.setUnavailable(LibFunc_flockfile);
+ TLI.setUnavailable(LibFunc_fseeko);
+ TLI.setUnavailable(LibFunc_ftello);
+ TLI.setUnavailable(LibFunc_ftrylockfile);
+ TLI.setUnavailable(LibFunc_funlockfile);
+ TLI.setUnavailable(LibFunc_htonl);
+ TLI.setUnavailable(LibFunc_htons);
+ TLI.setUnavailable(LibFunc_isascii);
+ TLI.setUnavailable(LibFunc_memccpy);
+ TLI.setUnavailable(LibFunc_mempcpy);
+ TLI.setUnavailable(LibFunc_memrchr);
+ TLI.setUnavailable(LibFunc_ntohl);
+ TLI.setUnavailable(LibFunc_ntohs);
+ TLI.setUnavailable(LibFunc_reallocf);
+ TLI.setUnavailable(LibFunc_roundeven);
+ TLI.setUnavailable(LibFunc_roundevenf);
+ TLI.setUnavailable(LibFunc_roundevenl);
+ TLI.setUnavailable(LibFunc_stpcpy);
+ TLI.setUnavailable(LibFunc_stpncpy);
+ TLI.setUnavailable(LibFunc_strlcat);
+ TLI.setUnavailable(LibFunc_strlcpy);
+ TLI.setUnavailable(LibFunc_strndup);
+ TLI.setUnavailable(LibFunc_strnlen);
+ TLI.setUnavailable(LibFunc_toascii);
+ }
+
// As currently implemented in clang, NVPTX code has no standard library to
// speak of. Headers provide a standard-ish library implementation, but many
// of the signatures are wrong -- for example, many libm functions are not
@@ -691,7 +883,7 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&
static StringRef sanitizeFunctionName(StringRef funcName) {
// Filter out empty names and names containing null bytes, those can't be in
// our table.
- if (funcName.empty() || funcName.find('\0') != StringRef::npos)
+ if (funcName.empty() || funcName.contains('\0'))
return StringRef();
// Check for \01 prefix that is used to mangle __asm declarations and
@@ -716,12 +908,12 @@ bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName, LibFunc &F) const {
bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
LibFunc F,
- const DataLayout *DL) const {
- LLVMContext &Ctx = FTy.getContext();
- Type *SizeTTy = DL ? DL->getIntPtrType(Ctx, /*AddressSpace=*/0) : nullptr;
- auto IsSizeTTy = [SizeTTy](Type *Ty) {
- return SizeTTy ? Ty == SizeTTy : Ty->isIntegerTy();
- };
+ const Module &M) const {
+ // FIXME: There is really no guarantee that sizeof(size_t) is equal to
+ // sizeof(int*) for every target. So the assumption used here to derive the
+ // SizeTBits based on the size of an integer pointer in address space zero
+ // isn't always valid.
+ unsigned SizeTBits = M.getDataLayout().getPointerSizeInBits(/*AddrSpace=*/0);
unsigned NumParams = FTy.getNumParams();
switch (F) {
@@ -745,12 +937,12 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getReturnType()->isIntegerTy(32));
case LibFunc_strlen_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strlen:
- return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getReturnType()->isIntegerTy());
+ return NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(SizeTBits);
case LibFunc_strchr:
case LibFunc_strrchr:
@@ -770,7 +962,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
FTy.getParamType(1)->isPointerTy());
case LibFunc_strcat_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strcat:
@@ -780,19 +972,19 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_strncat_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strncat:
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
FTy.getParamType(1) == FTy.getReturnType() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_strcpy_chk:
case LibFunc_stpcpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strcpy:
@@ -804,20 +996,20 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_strlcat_chk:
case LibFunc_strlcpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strlcat:
case LibFunc_strlcpy:
- return NumParams == 3 && IsSizeTTy(FTy.getReturnType()) &&
+ return NumParams == 3 && FTy.getReturnType()->isIntegerTy(SizeTBits) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(2));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits);
case LibFunc_strncpy_chk:
case LibFunc_stpncpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_strncpy:
@@ -825,7 +1017,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0) == FTy.getParamType(1) &&
FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_strxfrm:
return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
@@ -840,7 +1032,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(0) == FTy.getParamType(1) &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_strspn:
case LibFunc_strcspn:
@@ -888,20 +1080,21 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_sprintf_chk:
return NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(2)) &&
+ FTy.getParamType(2)->isIntegerTy(SizeTBits) &&
FTy.getParamType(3)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32);
case LibFunc_snprintf:
- return (NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
- FTy.getParamType(2)->isPointerTy() &&
- FTy.getReturnType()->isIntegerTy(32));
+ return NumParams == 3 && FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isIntegerTy(SizeTBits) &&
+ FTy.getParamType(2)->isPointerTy() &&
+ FTy.getReturnType()->isIntegerTy(32);
case LibFunc_snprintf_chk:
return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)) &&
+ FTy.getParamType(1)->isIntegerTy(SizeTBits) &&
FTy.getParamType(2)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(3)) &&
+ FTy.getParamType(3)->isIntegerTy(SizeTBits) &&
FTy.getParamType(4)->isPointerTy() &&
FTy.getReturnType()->isIntegerTy(32);
@@ -915,16 +1108,17 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vec_malloc:
return (NumParams == 1 && FTy.getReturnType()->isPointerTy());
case LibFunc_memcmp:
- return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
- FTy.getParamType(0)->isPointerTy() &&
- FTy.getParamType(1)->isPointerTy());
+ return NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
+ FTy.getParamType(0)->isPointerTy() &&
+ FTy.getParamType(1)->isPointerTy() &&
+ FTy.getParamType(2)->isIntegerTy(SizeTBits);
case LibFunc_memchr:
case LibFunc_memrchr:
return (NumParams == 3 && FTy.getReturnType()->isPointerTy() &&
FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(1)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_modf:
case LibFunc_modff:
case LibFunc_modfl:
@@ -934,7 +1128,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_mempcpy_chk:
case LibFunc_memmove_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_memcpy:
@@ -943,22 +1137,22 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_memset_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_memset:
return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy() &&
- IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_memccpy_chk:
--NumParams;
- if (!IsSizeTTy(FTy.getParamType(NumParams)))
+ if (!FTy.getParamType(NumParams)->isIntegerTy(SizeTBits))
return false;
LLVM_FALLTHROUGH;
case LibFunc_memccpy:
@@ -970,7 +1164,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vec_realloc:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
FTy.getParamType(0) == FTy.getReturnType() &&
- IsSizeTTy(FTy.getParamType(1)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits));
case LibFunc_read:
return (NumParams == 3 && FTy.getParamType(1)->isPointerTy());
case LibFunc_rewind:
@@ -1051,7 +1245,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
return (NumParams != 0 && FTy.getParamType(0)->isPointerTy());
case LibFunc___kmpc_free_shared:
return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits));
case LibFunc_fopen:
return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
@@ -1141,14 +1335,14 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_vsprintf_chk:
return NumParams == 5 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(1)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(2)) && FTy.getParamType(3)->isPointerTy();
+ FTy.getParamType(2)->isIntegerTy(SizeTBits) && FTy.getParamType(3)->isPointerTy();
case LibFunc_vsnprintf:
return (NumParams == 4 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isPointerTy());
case LibFunc_vsnprintf_chk:
return NumParams == 6 && FTy.getParamType(0)->isPointerTy() &&
FTy.getParamType(2)->isIntegerTy(32) &&
- IsSizeTTy(FTy.getParamType(3)) && FTy.getParamType(4)->isPointerTy();
+ FTy.getParamType(3)->isIntegerTy(SizeTBits) && FTy.getParamType(4)->isPointerTy();
case LibFunc_open:
return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy());
case LibFunc_opendir:
@@ -1560,12 +1754,13 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
case LibFunc_strnlen:
return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(1) &&
FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits));
case LibFunc_posix_memalign:
return (NumParams == 3 && FTy.getReturnType()->isIntegerTy(32) &&
FTy.getParamType(0)->isPointerTy() &&
- IsSizeTTy(FTy.getParamType(1)) && IsSizeTTy(FTy.getParamType(2)));
+ FTy.getParamType(1)->isIntegerTy(SizeTBits) &&
+ FTy.getParamType(2)->isIntegerTy(SizeTBits));
case LibFunc_wcslen:
return (NumParams == 1 && FTy.getParamType(0)->isPointerTy() &&
@@ -1605,10 +1800,11 @@ bool TargetLibraryInfoImpl::getLibFunc(const Function &FDecl,
// avoid string normalization and comparison.
if (FDecl.isIntrinsic()) return false;
- const DataLayout *DL =
- FDecl.getParent() ? &FDecl.getParent()->getDataLayout() : nullptr;
+ const Module *M = FDecl.getParent();
+ assert(M && "Expecting FDecl to be connected to a Module.");
+
return getLibFunc(FDecl.getName(), F) &&
- isValidProtoForLibFunc(*FDecl.getFunctionType(), F, DL);
+ isValidProtoForLibFunc(*FDecl.getFunctionType(), F, *M);
}
void TargetLibraryInfoImpl::disableAllFunctions() {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 304d24fe8e4a..5067f493f02d 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -167,11 +167,7 @@ bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
// Note that this block may not be the loop latch block, even if the loop
// has a latch block.
ExitBlock = BB;
- TripCount = SE.getAddExpr(EC, SE.getOne(EC->getType()));
-
- if (!EC->getType()->isPointerTy() && EC->getType() != CountType)
- TripCount = SE.getZeroExtendExpr(TripCount, CountType);
-
+ ExitCount = EC;
break;
}
@@ -263,10 +259,20 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
}
+bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace(
+ unsigned AS) const {
+ return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
+}
+
unsigned TargetTransformInfo::getAssumedAddrSpace(const Value *V) const {
return TTIImpl->getAssumedAddrSpace(V);
}
+std::pair<const Value *, unsigned>
+TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const {
+ return TTIImpl->getPredicatedAddrSpace(V);
+}
+
Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace(
IntrinsicInst *II, Value *OldV, Value *NewV) const {
return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
@@ -317,8 +323,9 @@ Optional<Value *> TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic(
}
void TargetTransformInfo::getUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
- return TTIImpl->getUnrollingPreferences(L, SE, UP);
+ Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
+ return TTIImpl->getUnrollingPreferences(L, SE, UP, ORE);
}
void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
@@ -409,6 +416,10 @@ bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
return TTIImpl->isLegalMaskedExpandLoad(DataType);
}
+bool TargetTransformInfo::enableOrderedReductions() const {
+ return TTIImpl->enableOrderedReductions();
+}
+
bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
return TTIImpl->hasDivRemOp(DataType, IsSigned);
}
@@ -598,6 +609,10 @@ Optional<unsigned> TargetTransformInfo::getMaxVScale() const {
return TTIImpl->getMaxVScale();
}
+Optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
+ return TTIImpl->getVScaleForTuning();
+}
+
bool TargetTransformInfo::shouldMaximizeVectorBandwidth() const {
return TTIImpl->shouldMaximizeVectorBandwidth();
}
@@ -818,6 +833,15 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode,
return Cost;
}
+InstructionCost TargetTransformInfo::getReplicationShuffleCost(
+ Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost = TTIImpl->getReplicationShuffleCost(
+ EltTy, ReplicationFactor, VF, DemandedDstElts, CostKind);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
InstructionCost TargetTransformInfo::getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, const Instruction *I) const {
diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 20d718f4fad3..23dbb32f38de 100644
--- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -521,21 +521,21 @@ static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
return Ret;
}
-void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
- if (Merge) {
- N.TBAA =
- MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
- N.TBAAStruct = nullptr;
- N.Scope = MDNode::getMostGenericAliasScope(
- N.Scope, getMetadata(LLVMContext::MD_alias_scope));
- N.NoAlias =
- MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
- } else {
- N.TBAA = getMetadata(LLVMContext::MD_tbaa);
- N.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct);
- N.Scope = getMetadata(LLVMContext::MD_alias_scope);
- N.NoAlias = getMetadata(LLVMContext::MD_noalias);
- }
+AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
+ AAMDNodes Result;
+ Result.TBAA = MDNode::getMostGenericTBAA(TBAA, Other.TBAA);
+ Result.TBAAStruct = nullptr;
+ Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
+ Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ return Result;
+}
+
+AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
+ AAMDNodes Result;
+ Result.TBAA = Result.TBAAStruct = nullptr;
+ Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
+ Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
+ return Result;
}
static const MDNode *createAccessTag(const MDNode *AccessType) {
diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp
index f015ba9a09ca..80051fd5f7c1 100644
--- a/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -126,7 +126,8 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
Offset->getZExtValue(), CI, DT);
}
-Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
+Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
+ Constant *TopLevelGlobal) {
if (I->getType()->isPointerTy()) {
if (Offset == 0)
return I;
@@ -142,7 +143,8 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
unsigned Op = SL->getElementContainingOffset(Offset);
return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset - SL->getElementOffset(Op), M);
+ Offset - SL->getElementOffset(Op), M,
+ TopLevelGlobal);
}
if (auto *C = dyn_cast<ConstantArray>(I)) {
ArrayType *VTableTy = C->getType();
@@ -153,7 +155,62 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M) {
return nullptr;
return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
- Offset % ElemSize, M);
+ Offset % ElemSize, M, TopLevelGlobal);
+ }
+
+ // (Swift-specific) relative-pointer support starts here.
+ if (auto *CI = dyn_cast<ConstantInt>(I)) {
+ if (Offset == 0 && CI->getZExtValue() == 0) {
+ return I;
+ }
+ }
+ if (auto *C = dyn_cast<ConstantExpr>(I)) {
+ switch (C->getOpcode()) {
+ case Instruction::Trunc:
+ case Instruction::PtrToInt:
+ return getPointerAtOffset(cast<Constant>(C->getOperand(0)), Offset, M,
+ TopLevelGlobal);
+ case Instruction::Sub: {
+ auto *Operand0 = cast<Constant>(C->getOperand(0));
+ auto *Operand1 = cast<Constant>(C->getOperand(1));
+
+ auto StripGEP = [](Constant *C) {
+ auto *CE = dyn_cast<ConstantExpr>(C);
+ if (!CE)
+ return C;
+ if (CE->getOpcode() != Instruction::GetElementPtr)
+ return C;
+ return CE->getOperand(0);
+ };
+ auto *Operand1TargetGlobal = StripGEP(getPointerAtOffset(Operand1, 0, M));
+
+ // Check that in the "sub (@a, @b)" expression, @b points back to the top
+ // level global (or a GEP thereof) that we're processing. Otherwise bail.
+ if (Operand1TargetGlobal != TopLevelGlobal)
+ return nullptr;
+
+ return getPointerAtOffset(Operand0, Offset, M, TopLevelGlobal);
+ }
+ default:
+ return nullptr;
+ }
}
return nullptr;
}
+
+void llvm::replaceRelativePointerUsersWithZero(Function *F) {
+ for (auto *U : F->users()) {
+ auto *PtrExpr = dyn_cast<ConstantExpr>(U);
+ if (!PtrExpr || PtrExpr->getOpcode() != Instruction::PtrToInt)
+ continue;
+
+ for (auto *PtrToIntUser : PtrExpr->users()) {
+ auto *SubExpr = dyn_cast<ConstantExpr>(PtrToIntUser);
+ if (!SubExpr || SubExpr->getOpcode() != Instruction::Sub)
+ continue;
+
+ SubExpr->replaceNonMetadataUsesWith(
+ ConstantInt::get(SubExpr->getType(), 0));
+ }
+ }
+}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 522d21812c6a..1c41c77a8cfb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -84,6 +84,17 @@ using namespace llvm::PatternMatch;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
+// According to the LangRef, branching on a poison condition is absolutely
+// immediate full UB. However, historically we haven't implemented that
+// consistently as we have an important transformation (non-trivial unswitch)
+// which introduces instances of branch on poison/undef to otherwise well
+// defined programs. This flag exists to let us test optimization benefit
+// of exploiting the specified behavior (in combination with enabling the
+// unswitch fix.)
+static cl::opt<bool> BranchOnPoisonAsUB("branch-on-poison-as-ub",
+ cl::Hidden, cl::init(false));
+
+
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -165,8 +176,8 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
int NumElts =
cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements();
- DemandedLHS = DemandedRHS = APInt::getNullValue(NumElts);
- if (DemandedElts.isNullValue())
+ DemandedLHS = DemandedRHS = APInt::getZero(NumElts);
+ if (DemandedElts.isZero())
return true;
// Simple case of a shuffle with zeroinitializer.
if (all_of(Shuf->getShuffleMask(), [](int Elt) { return Elt == 0; })) {
@@ -206,7 +217,7 @@ static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
- FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1);
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
computeKnownBits(V, DemandedElts, Known, Depth, Q);
}
@@ -279,16 +290,11 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
}
-bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) {
- for (const User *U : CxtI->users()) {
- if (const ICmpInst *IC = dyn_cast<ICmpInst>(U))
- if (IC->isEquality())
- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- return false;
- }
- return true;
+bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
+ return !I->user_empty() && all_of(I->users(), [](const User *U) {
+ ICmpInst::Predicate P;
+ return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
+ });
}
static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
@@ -378,7 +384,7 @@ static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
- FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1);
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
return ComputeNumSignBits(V, DemandedElts, Depth, Q);
}
@@ -390,6 +396,14 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
}
+unsigned llvm::ComputeMinSignedBits(const Value *V, const DataLayout &DL,
+ unsigned Depth, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT) {
+ unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
+ return V->getType()->getScalarSizeInBits() - SignBits + 1;
+}
+
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
bool NSW, const APInt &DemandedElts,
KnownBits &KnownOut, KnownBits &Known2,
@@ -499,7 +513,9 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
if (V == E)
return true;
- if (V == I || isSafeToSpeculativelyExecute(V)) {
+ if (V == I || (isa<Instruction>(V) &&
+ !cast<Instruction>(V)->mayHaveSideEffects() &&
+ !cast<Instruction>(V)->isTerminator())) {
EphValues.insert(V);
if (const User *U = dyn_cast<User>(V))
append_range(WorkSet, U->operands());
@@ -547,10 +563,9 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
// We limit the scan distance between the assume and its context instruction
// to avoid a compile-time explosion. This limit is chosen arbitrarily, so
// it can be adjusted if needed (could be turned into a cl::opt).
- unsigned ScanLimit = 15;
- for (BasicBlock::const_iterator I(CxtI), IE(Inv); I != IE; ++I)
- if (!isGuaranteedToTransferExecutionToSuccessor(&*I) || --ScanLimit == 0)
- return false;
+ auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
+ if (!isGuaranteedToTransferExecutionToSuccessor(Range, 15))
+ return false;
return !isEphemeralValueOf(Inv, CxtI);
}
@@ -582,7 +597,7 @@ static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
return false;
ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
- return !TrueValues.contains(APInt::getNullValue(C->getBitWidth()));
+ return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
}
static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
@@ -641,7 +656,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (V->getType()->isPointerTy()) {
if (RetainedKnowledge RK = getKnowledgeValidInContext(
V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
- Known.Zero.setLowBits(Log2_32(RK.ArgValue));
+ Known.Zero.setLowBits(Log2_64(RK.ArgValue));
}
}
@@ -1210,7 +1225,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// (dependent on endian) to form the full result of known bits.
unsigned NumElts = DemandedElts.getBitWidth();
unsigned SubScale = BitWidth / SubBitWidth;
- APInt SubDemandedElts = APInt::getNullValue(NumElts * SubScale);
+ APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i])
SubDemandedElts.setBit(i * SubScale);
@@ -1383,7 +1398,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
Known = KnownBits::computeForAddSub(
/*Add=*/true, /*NSW=*/false, Known, IndexBits);
}
- if (!Known.isUnknown() && !AccConstIndices.isNullValue()) {
+ if (!Known.isUnknown() && !AccConstIndices.isZero()) {
KnownBits Index = KnownBits::makeConstant(AccConstIndices);
Known = KnownBits::computeForAddSub(
/*Add=*/true, /*NSW=*/false, Known, Index);
@@ -1512,7 +1527,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// taking conservative care to avoid excessive recursion.
if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) {
// Skip if every incoming value references to ourself.
- if (dyn_cast_or_null<UndefValue>(P->hasConstantValue()))
+ if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
break;
Known.Zero.setAllBits();
@@ -1689,6 +1704,33 @@ static void computeKnownBitsFromOperator(const Operator *I,
if (BitWidth >= 32)
Known.Zero.setBitsFrom(31);
break;
+ case Intrinsic::vscale: {
+ if (!II->getParent() || !II->getFunction() ||
+ !II->getFunction()->hasFnAttribute(Attribute::VScaleRange))
+ break;
+
+ auto VScaleRange = II->getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs();
+
+ if (VScaleRange.second == 0)
+ break;
+
+ // If vscale min = max then we know the exact value at compile time
+ // and hence we know the exact bits.
+ if (VScaleRange.first == VScaleRange.second) {
+ Known.One = VScaleRange.first;
+ Known.Zero = VScaleRange.first;
+ Known.Zero.flipAllBits();
+ break;
+ }
+
+ unsigned FirstZeroHighBit = 32 - countLeadingZeros(VScaleRange.second);
+ if (FirstZeroHighBit < BitWidth)
+ Known.Zero.setBitsFrom(FirstZeroHighBit);
+
+ break;
+ }
}
}
break;
@@ -1763,7 +1805,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
- APInt DemandedVecElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedVecElts = APInt::getAllOnes(NumElts);
if (CIdx && CIdx->getValue().ult(NumElts))
DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
@@ -2248,7 +2290,7 @@ static bool isNonZeroRecurrence(const PHINode *PN) {
Value *Start = nullptr, *Step = nullptr;
const APInt *StartC, *StepC;
if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
- !match(Start, m_APInt(StartC)) || StartC->isNullValue())
+ !match(Start, m_APInt(StartC)) || StartC->isZero())
return false;
switch (BO->getOpcode()) {
@@ -2260,7 +2302,7 @@ static bool isNonZeroRecurrence(const PHINode *PN) {
StartC->isNegative() == StepC->isNegative());
case Instruction::Mul:
return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
- match(Step, m_APInt(StepC)) && !StepC->isNullValue();
+ match(Step, m_APInt(StepC)) && !StepC->isZero();
case Instruction::Shl:
return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
case Instruction::AShr:
@@ -2532,7 +2574,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
auto *CIdx = dyn_cast<ConstantInt>(Idx);
if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
unsigned NumElts = VecTy->getNumElements();
- APInt DemandedVecElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedVecElts = APInt::getAllOnes(NumElts);
if (CIdx && CIdx->getValue().ult(NumElts))
DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
return isKnownNonZero(Vec, DemandedVecElts, Depth, Q);
@@ -2559,7 +2601,7 @@ bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) {
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
- FVTy ? APInt::getAllOnesValue(FVTy->getNumElements()) : APInt(1, 1);
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
return isKnownNonZero(V, DemandedElts, Depth, Q);
}
@@ -2694,8 +2736,7 @@ static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
const APInt *C;
return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
(OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
- !C->isNullValue() && !C->isOneValue() &&
- isKnownNonZero(V1, Depth + 1, Q);
+ !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q);
}
return false;
}
@@ -2708,7 +2749,7 @@ static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
const APInt *C;
return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
(OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
- !C->isNullValue() && isKnownNonZero(V1, Depth + 1, Q);
+ !C->isZero() && isKnownNonZero(V1, Depth + 1, Q);
}
return false;
}
@@ -3051,7 +3092,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// If the input is known to be 0 or 1, the output is 0/-1, which is
// all sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return TyBits;
// If we are subtracting one from a positive number, there is no carry
@@ -3075,7 +3116,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
// If the input is known to be 0 or 1, the output is 0/-1, which is
// all sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return TyBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -4533,6 +4574,12 @@ AllocaInst *llvm::findAllocaForValue(Value *V, bool OffsetZero) {
if (OffsetZero && !GEP->hasAllZeroIndices())
return nullptr;
AddWork(GEP->getPointerOperand());
+ } else if (CallBase *CB = dyn_cast<CallBase>(V)) {
+ Value *Returned = CB->getReturnedArgOperand();
+ if (Returned)
+ AddWork(Returned);
+ else
+ return nullptr;
} else {
return nullptr;
}
@@ -4614,7 +4661,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
if (*Denominator == 0)
return false;
// It's safe to hoist if the denominator is not 0 or -1.
- if (!Denominator->isAllOnesValue())
+ if (!Denominator->isAllOnes())
return true;
// At this point we know that the denominator is -1. It is safe to hoist as
// long we know that the numerator is not INT_MIN.
@@ -4922,15 +4969,14 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
}
-static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
- // See whether I has flags that may create poison
- if (const auto *OvOp = dyn_cast<OverflowingBinaryOperator>(Op)) {
- if (OvOp->hasNoSignedWrap() || OvOp->hasNoUnsignedWrap())
- return true;
- }
- if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(Op))
- if (ExactOp->isExact())
- return true;
+static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
+ bool ConsiderFlags) {
+
+ if (ConsiderFlags && Op->hasPoisonGeneratingFlags())
+ return true;
+
+ // TODO: this should really be under the ConsiderFlags block, but currently
+ // these are not dropped by dropPoisonGeneratingFlags
if (const auto *FP = dyn_cast<FPMathOperator>(Op)) {
auto FMF = FP->getFastMathFlags();
if (FMF.noNaNs() || FMF.noInfs())
@@ -5019,10 +5065,10 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
case Instruction::ICmp:
case Instruction::FCmp:
return false;
- case Instruction::GetElementPtr: {
- const auto *GEP = cast<GEPOperator>(Op);
- return GEP->isInBounds();
- }
+ case Instruction::GetElementPtr:
+ // inbounds is handled above
+ // TODO: what about inrange on constexpr?
+ return false;
default: {
const auto *CE = dyn_cast<ConstantExpr>(Op);
if (isa<CastInst>(Op) || (CE && CE->isCast()))
@@ -5035,12 +5081,12 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
}
}
-bool llvm::canCreateUndefOrPoison(const Operator *Op) {
- return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false);
+bool llvm::canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlags) {
+ return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false, ConsiderFlags);
}
-bool llvm::canCreatePoison(const Operator *Op) {
- return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true);
+bool llvm::canCreatePoison(const Operator *Op, bool ConsiderFlags) {
+ return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true, ConsiderFlags);
}
static bool directlyImpliesPoison(const Value *ValAssumedPoison,
@@ -5068,7 +5114,7 @@ static bool directlyImpliesPoison(const Value *ValAssumedPoison,
const WithOverflowInst *II;
if (match(I, m_ExtractValue(m_WithOverflowInst(II))) &&
(match(ValAssumedPoison, m_ExtractValue(m_Specific(II))) ||
- llvm::is_contained(II->arg_operands(), ValAssumedPoison)))
+ llvm::is_contained(II->args(), ValAssumedPoison)))
return true;
}
return false;
@@ -5225,8 +5271,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
Dominator = Dominator->getIDom();
}
- SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NoUndef};
- if (getKnowledgeValidInContext(V, AttrKinds, CtxI, DT, AC))
+ if (getKnowledgeValidInContext(V, {Attribute::NoUndef}, CtxI, DT, AC))
return true;
return false;
@@ -5304,6 +5349,27 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const BasicBlock *BB) {
return true;
}
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(
+ BasicBlock::const_iterator Begin, BasicBlock::const_iterator End,
+ unsigned ScanLimit) {
+ return isGuaranteedToTransferExecutionToSuccessor(make_range(Begin, End),
+ ScanLimit);
+}
+
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(
+ iterator_range<BasicBlock::const_iterator> Range, unsigned ScanLimit) {
+ assert(ScanLimit && "scan limit must be non-zero");
+ for (const Instruction &I : Range) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (--ScanLimit == 0)
+ return false;
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ return false;
+ }
+ return true;
+}
+
bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
const Loop *L) {
// The loop header is guaranteed to be executed for every iteration.
@@ -5391,7 +5457,10 @@ void llvm::getGuaranteedWellDefinedOps(
}
break;
}
-
+ case Instruction::Ret:
+ if (I->getFunction()->hasRetAttribute(Attribute::NoUndef))
+ Operands.insert(I->getOperand(0));
+ break;
default:
break;
}
@@ -5408,7 +5477,16 @@ void llvm::getGuaranteedNonPoisonOps(const Instruction *I,
case Instruction::SRem:
Operands.insert(I->getOperand(1));
break;
-
+ case Instruction::Switch:
+ if (BranchOnPoisonAsUB)
+ Operands.insert(cast<SwitchInst>(I)->getCondition());
+ break;
+ case Instruction::Br: {
+ auto *BR = cast<BranchInst>(I);
+ if (BranchOnPoisonAsUB && BR->isConditional())
+ Operands.insert(BR->getCondition());
+ break;
+ }
default:
break;
}
@@ -5835,15 +5913,13 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
// Is the sign bit set?
// (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
// (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
- if (Pred == CmpInst::ICMP_SLT && C1->isNullValue() &&
- C2->isMaxSignedValue())
+ if (Pred == CmpInst::ICMP_SLT && C1->isZero() && C2->isMaxSignedValue())
return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
// Is the sign bit clear?
// (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
// (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
- if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
- C2->isMinSignedValue())
+ if (Pred == CmpInst::ICMP_SGT && C1->isAllOnes() && C2->isMinSignedValue())
return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
}
@@ -6253,6 +6329,16 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
return getMinMaxPred(getInverseMinMaxFlavor(SPF));
}
+APInt llvm::getMinMaxLimit(SelectPatternFlavor SPF, unsigned BitWidth) {
+ switch (SPF) {
+ case SPF_SMAX: return APInt::getSignedMaxValue(BitWidth);
+ case SPF_SMIN: return APInt::getSignedMinValue(BitWidth);
+ case SPF_UMAX: return APInt::getMaxValue(BitWidth);
+ case SPF_UMIN: return APInt::getMinValue(BitWidth);
+ default: llvm_unreachable("Unexpected flavor");
+ }
+}
+
std::pair<Intrinsic::ID, bool>
llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
// Check if VL contains select instructions that can be folded into a min/max
@@ -6681,7 +6767,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
const APInt *C;
switch (BO.getOpcode()) {
case Instruction::Add:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
// FIXME: If we have both nuw and nsw, we should reduce the range further.
if (IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(&BO))) {
// 'add nuw x, C' produces [C, UINT_MAX].
@@ -6719,7 +6805,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
Upper = APInt::getSignedMaxValue(Width).ashr(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
+ if (!C->isZero() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
if (C->isNegative()) {
// 'ashr C, x' produces [C, C >> (Width-1)]
@@ -6736,11 +6822,11 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
case Instruction::LShr:
if (match(BO.getOperand(1), m_APInt(C)) && C->ult(Width)) {
// 'lshr x, C' produces [0, UINT_MAX >> C].
- Upper = APInt::getAllOnesValue(Width).lshr(*C) + 1;
+ Upper = APInt::getAllOnes(Width).lshr(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
// 'lshr C, x' produces [C >> (Width-1), C].
unsigned ShiftAmount = Width - 1;
- if (!C->isNullValue() && IIQ.isExact(&BO))
+ if (!C->isZero() && IIQ.isExact(&BO))
ShiftAmount = C->countTrailingZeros();
Lower = C->lshr(ShiftAmount);
Upper = *C + 1;
@@ -6773,7 +6859,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
if (match(BO.getOperand(1), m_APInt(C))) {
APInt IntMin = APInt::getSignedMinValue(Width);
APInt IntMax = APInt::getSignedMaxValue(Width);
- if (C->isAllOnesValue()) {
+ if (C->isAllOnes()) {
// 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
// where C != -1 and C != 0 and C != 1
Lower = IntMin + 1;
@@ -6802,7 +6888,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
break;
case Instruction::UDiv:
- if (match(BO.getOperand(1), m_APInt(C)) && !C->isNullValue()) {
+ if (match(BO.getOperand(1), m_APInt(C)) && !C->isZero()) {
// 'udiv x, C' produces [0, UINT_MAX / C].
Upper = APInt::getMaxValue(Width).udiv(*C) + 1;
} else if (match(BO.getOperand(0), m_APInt(C))) {
@@ -6946,7 +7032,7 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
// If the negation part of the abs (in RHS) has the NSW flag,
// then the result of abs(X) is [0..SIGNED_MAX],
// otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
- Lower = APInt::getNullValue(BitWidth);
+ Lower = APInt::getZero(BitWidth);
if (match(RHS, m_Neg(m_Specific(LHS))) &&
IIQ.hasNoSignedWrap(cast<Instruction>(RHS)))
Upper = APInt::getSignedMaxValue(BitWidth) + 1;
@@ -6986,9 +7072,27 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
}
}
+static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
+ // The maximum representable value of a half is 65504. For floats the maximum
+ // value is 3.4e38 which requires roughly 129 bits.
+ unsigned BitWidth = I->getType()->getScalarSizeInBits();
+ if (!I->getOperand(0)->getType()->getScalarType()->isHalfTy())
+ return;
+ if (isa<FPToSIInst>(I) && BitWidth >= 17) {
+ Lower = APInt(BitWidth, -65504);
+ Upper = APInt(BitWidth, 65505);
+ }
+
+ if (isa<FPToUIInst>(I) && BitWidth >= 16) {
+ // For a fptoui the lower limit is left as 0.
+ Upper = APInt(BitWidth, 65505);
+ }
+}
+
ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
AssumptionCache *AC,
const Instruction *CtxI,
+ const DominatorTree *DT,
unsigned Depth) {
assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
@@ -7009,6 +7113,8 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
setLimitsForIntrinsic(*II, Lower, Upper);
else if (auto *SI = dyn_cast<SelectInst>(V))
setLimitsForSelectPattern(*SI, Lower, Upper, IIQ);
+ else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V))
+ setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
@@ -7027,7 +7133,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
"must be an assume intrinsic");
- if (!isValidAssumeForContext(I, CtxI, nullptr))
+ if (!isValidAssumeForContext(I, CtxI, DT))
continue;
Value *Arg = I->getArgOperand(0);
ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
@@ -7035,9 +7141,9 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool UseInstrInfo,
if (!Cmp || Cmp->getOperand(0) != V)
continue;
ConstantRange RHS = computeConstantRange(Cmp->getOperand(1), UseInstrInfo,
- AC, I, Depth + 1);
+ AC, I, DT, Depth + 1);
CR = CR.intersectWith(
- ConstantRange::makeSatisfyingICmpRegion(Cmp->getPredicate(), RHS));
+ ConstantRange::makeAllowedICmpRegion(Cmp->getPredicate(), RHS));
}
}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 0a14a1432934..655c248907f6 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -331,6 +331,12 @@ Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
if (Elt->isNullValue())
return findScalarElement(Val, EltNo);
+ // If the vector is a splat then we can trivially find the scalar element.
+ if (isa<ScalableVectorType>(VTy))
+ if (Value *Splat = getSplatValue(V))
+ if (EltNo < VTy->getElementCount().getKnownMinValue())
+ return Splat;
+
// Otherwise, we don't know.
return nullptr;
}
@@ -824,6 +830,23 @@ llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,
return Mask;
}
+llvm::SmallVector<int, 16> llvm::createUnaryMask(ArrayRef<int> Mask,
+ unsigned NumElts) {
+ // Avoid casts in the loop and make sure we have a reasonable number.
+ int NumEltsSigned = NumElts;
+ assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
+
+ // If the mask chooses an element from operand 1, reduce it to choose from the
+ // corresponding element of operand 0. Undef mask elements are unchanged.
+ SmallVector<int, 16> UnaryMask;
+ for (int MaskElt : Mask) {
+ assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
+ int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
+ UnaryMask.push_back(UnaryElt);
+ }
+ return UnaryMask;
+}
+
/// A helper function for concatenating vectors. This function concatenates two
/// vectors having the same element type. If the second vector has fewer
/// elements than the first, it is padded with undefs.
@@ -940,7 +963,7 @@ APInt llvm::possiblyDemandedEltsInMask(Value *Mask) {
const unsigned VWidth =
cast<FixedVectorType>(Mask->getType())->getNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+ APInt DemandedElts = APInt::getAllOnes(VWidth);
if (auto *CV = dyn_cast<ConstantVector>(Mask))
for (unsigned i = 0; i < VWidth; i++)
if (CV->getAggregateElement(i)->isNullValue())
@@ -980,7 +1003,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
- int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+ int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false);
const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
@@ -1193,15 +1216,24 @@ void InterleavedAccessInfo::analyzeInterleaving(
} // Iteration over A accesses.
} // Iteration over B accesses.
- // Remove interleaved store groups with gaps.
- for (auto *Group : StoreGroups)
- if (Group->getNumMembers() != Group->getFactor()) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved store group due "
- "to gaps.\n");
- releaseGroup(Group);
- }
- // Remove interleaved groups with gaps (currently only loads) whose memory
+ auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
+ int Index,
+ std::string FirstOrLast) -> bool {
+ Instruction *Member = Group->getMember(Index);
+ assert(Member && "Group member does not exist");
+ Value *MemberPtr = getLoadStorePointerOperand(Member);
+ Type *AccessTy = getLoadStoreType(Member);
+ if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
+ /*Assume=*/false, /*ShouldCheckWrap=*/true))
+ return false;
+ LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+ << FirstOrLast
+ << " group member potentially pointer-wrapping.\n");
+ releaseGroup(Group);
+ return true;
+ };
+
+ // Remove interleaved groups with gaps whose memory
// accesses may wrap around. We have to revisit the getPtrStride analysis,
// this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
// not check wrapping (see documentation there).
@@ -1227,26 +1259,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
// So we check only group member 0 (which is always guaranteed to exist),
// and group member Factor - 1; If the latter doesn't exist we rely on
// peeling (if it is a non-reversed accsess -- see Case 3).
- Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
- if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "first group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
+ if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
continue;
- }
- Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
- if (LastMember) {
- Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
- if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
- /*ShouldCheckWrap=*/true)) {
- LLVM_DEBUG(
- dbgs() << "LV: Invalidate candidate interleaved group due to "
- "last group member potentially pointer-wrapping.\n");
- releaseGroup(Group);
- }
- } else {
+ if (Group->getMember(Group->getFactor() - 1))
+ InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
+ std::string("last"));
+ else {
// Case 3: A non-reversed interleaved load group with gaps: We need
// to execute at least one scalar epilogue iteration. This will ensure
// we don't speculatively access memory out-of-bounds. We only need
@@ -1264,6 +1282,39 @@ void InterleavedAccessInfo::analyzeInterleaving(
RequiresScalarEpilogue = true;
}
}
+
+ for (auto *Group : StoreGroups) {
+ // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+ // store would wrap around the address space we would do a memory access at
+ // nullptr even without the transformation.
+ if (Group->getNumMembers() == Group->getFactor())
+ continue;
+
+ // Interleave-store-group with gaps is implemented using masked wide store.
+ // Remove interleaved store groups with gaps if
+ // masked-interleaved-accesses are not enabled by the target.
+ if (!EnablePredicatedInterleavedMemAccesses) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Invalidate candidate interleaved store group due "
+ "to gaps.\n");
+ releaseGroup(Group);
+ continue;
+ }
+
+ // Case 2: If first and last members of the group don't wrap this implies
+ // that all the pointers in the group don't wrap.
+ // So we check only group member 0 (which is always guaranteed to exist),
+ // and the last group member. Case 3 (scalar epilog) is not relevant for
+ // stores with gaps, which are implemented with masked-store (rather than
+ // speculative access, as in loads).
+ if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
+ continue;
+ for (int Index = Group->getFactor() - 1; Index > 0; Index--)
+ if (Group->getMember(Index)) {
+ InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
+ break;
+ }
+ }
}
void InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue() {
@@ -1325,9 +1376,7 @@ std::string VFABI::mangleTLIVectorName(StringRef VectorName,
void VFABI::getVectorVariantNames(
const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
- const StringRef S =
- CI.getAttribute(AttributeList::FunctionIndex, VFABI::MappingsAttrName)
- .getValueAsString();
+ const StringRef S = CI.getFnAttr(VFABI::MappingsAttrName).getValueAsString();
if (S.empty())
return;
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 4f72c6f9921a..41fb0b9008be 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -643,6 +643,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(convergent);
KEYWORD(dereferenceable);
KEYWORD(dereferenceable_or_null);
+ KEYWORD(disable_sanitizer_instrumentation);
KEYWORD(elementtype);
KEYWORD(inaccessiblememonly);
KEYWORD(inaccessiblemem_or_argmemonly);
@@ -769,6 +770,9 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(returnDoesNotAlias);
KEYWORD(noInline);
KEYWORD(alwaysInline);
+ KEYWORD(noUnwind);
+ KEYWORD(mayThrow);
+ KEYWORD(hasUnknownCall);
KEYWORD(calls);
KEYWORD(callee);
KEYWORD(params);
@@ -848,7 +852,15 @@ lltok::Kind LLLexer::LexIdentifier() {
TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
TYPEKEYWORD("token", Type::getTokenTy(Context));
- TYPEKEYWORD("ptr", PointerType::getUnqual(Context));
+
+ if (Keyword == "ptr") {
+ if (Context.supportsTypedPointers()) {
+ Warning("ptr type is only supported in -opaque-pointers mode");
+ return lltok::Error;
+ }
+ TyVal = PointerType::getUnqual(Context);
+ return lltok::Type;
+ }
#undef TYPEKEYWORD
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 799cb03c8c8c..5bce1eaa59a0 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -140,8 +140,8 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
if (Function *Fn = dyn_cast<Function>(V)) {
AttributeList AS = Fn->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
@@ -152,32 +152,28 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
FnAttrs.removeAttribute(Attribute::Alignment);
}
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
Fn->setAttributes(AS);
} else if (CallInst *CI = dyn_cast<CallInst>(V)) {
AttributeList AS = CI->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
CI->setAttributes(AS);
} else if (InvokeInst *II = dyn_cast<InvokeInst>(V)) {
AttributeList AS = II->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
II->setAttributes(AS);
} else if (CallBrInst *CBI = dyn_cast<CallBrInst>(V)) {
AttributeList AS = CBI->getAttributes();
- AttrBuilder FnAttrs(AS.getFnAttributes());
- AS = AS.removeAttributes(Context, AttributeList::FunctionIndex);
+ AttrBuilder FnAttrs(AS.getFnAttrs());
+ AS = AS.removeFnAttributes(Context);
FnAttrs.merge(B);
- AS = AS.addAttributes(Context, AttributeList::FunctionIndex,
- AttributeSet::get(Context, FnAttrs));
+ AS = AS.addFnAttributes(Context, AttributeSet::get(Context, FnAttrs));
CBI->setAttributes(AS);
} else if (auto *GV = dyn_cast<GlobalVariable>(V)) {
AttrBuilder Attrs(GV->getAttributes());
@@ -239,18 +235,18 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
}
- // Look for intrinsic functions and CallInst that need to be upgraded
- for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
- UpgradeCallsToIntrinsic(&*FI++); // must be post-increment, as we remove
+ // Look for intrinsic functions and CallInst that need to be upgraded. We use
+ // make_early_inc_range here because we may remove some functions.
+ for (Function &F : llvm::make_early_inc_range(*M))
+ UpgradeCallsToIntrinsic(&F);
// Some types could be renamed during loading if several modules are
// loaded in the same LLVMContext (LTO scenario). In this case we should
// remangle intrinsics names as well.
- for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) {
- Function *F = &*FI++;
- if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
- F->replaceAllUsesWith(Remangled.getValue());
- F->eraseFromParent();
+ for (Function &F : llvm::make_early_inc_range(*M)) {
+ if (auto Remangled = Intrinsic::remangleIntrinsicFunction(&F)) {
+ F.replaceAllUsesWith(Remangled.getValue());
+ F.eraseFromParent();
}
}
@@ -605,12 +601,15 @@ bool LLParser::parseUnnamedGlobal() {
parseOptionalThreadLocal(TLM) || parseOptionalUnnamedAddr(UnnamedAddr))
return true;
- if (Lex.getKind() != lltok::kw_alias && Lex.getKind() != lltok::kw_ifunc)
+ switch (Lex.getKind()) {
+ default:
return parseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
-
- return parseIndirectSymbol(Name, NameLoc, Linkage, Visibility,
+ case lltok::kw_alias:
+ case lltok::kw_ifunc:
+ return parseAliasOrIFunc(Name, NameLoc, Linkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
+ }
}
/// parseNamedGlobal:
@@ -635,12 +634,15 @@ bool LLParser::parseNamedGlobal() {
parseOptionalThreadLocal(TLM) || parseOptionalUnnamedAddr(UnnamedAddr))
return true;
- if (Lex.getKind() != lltok::kw_alias && Lex.getKind() != lltok::kw_ifunc)
+ switch (Lex.getKind()) {
+ default:
return parseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
-
- return parseIndirectSymbol(Name, NameLoc, Linkage, Visibility,
+ case lltok::kw_alias:
+ case lltok::kw_ifunc:
+ return parseAliasOrIFunc(Name, NameLoc, Linkage, Visibility,
DLLStorageClass, DSOLocal, TLM, UnnamedAddr);
+ }
}
bool LLParser::parseComdat() {
@@ -913,25 +915,25 @@ static std::string typeComparisonErrorMessage(StringRef Message, Type *Ty1,
return ErrOS.str();
}
-/// parseIndirectSymbol:
+/// parseAliasOrIFunc:
/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
/// OptionalThreadLocal OptionalUnnamedAddr
-/// 'alias|ifunc' IndirectSymbol IndirectSymbolAttr*
+/// 'alias|ifunc' AliaseeOrResolver SymbolAttrs*
///
-/// IndirectSymbol
+/// AliaseeOrResolver
/// ::= TypeAndValue
///
-/// IndirectSymbolAttr
+/// SymbolAttrs
/// ::= ',' 'partition' StringConstant
///
/// Everything through OptionalUnnamedAddr has already been parsed.
///
-bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
- unsigned L, unsigned Visibility,
- unsigned DLLStorageClass, bool DSOLocal,
- GlobalVariable::ThreadLocalMode TLM,
- GlobalVariable::UnnamedAddr UnnamedAddr) {
+bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
+ unsigned L, unsigned Visibility,
+ unsigned DLLStorageClass, bool DSOLocal,
+ GlobalVariable::ThreadLocalMode TLM,
+ GlobalVariable::UnnamedAddr UnnamedAddr) {
bool IsAlias;
if (Lex.getKind() == lltok::kw_alias)
IsAlias = true;
@@ -1013,21 +1015,26 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
}
}
- // Okay, create the alias but do not insert it into the module yet.
- std::unique_ptr<GlobalIndirectSymbol> GA;
- if (IsAlias)
+ // Okay, create the alias/ifunc but do not insert it into the module yet.
+ std::unique_ptr<GlobalAlias> GA;
+ std::unique_ptr<GlobalIFunc> GI;
+ GlobalValue *GV;
+ if (IsAlias) {
GA.reset(GlobalAlias::create(Ty, AddrSpace,
(GlobalValue::LinkageTypes)Linkage, Name,
Aliasee, /*Parent*/ nullptr));
- else
- GA.reset(GlobalIFunc::create(Ty, AddrSpace,
+ GV = GA.get();
+ } else {
+ GI.reset(GlobalIFunc::create(Ty, AddrSpace,
(GlobalValue::LinkageTypes)Linkage, Name,
Aliasee, /*Parent*/ nullptr));
- GA->setThreadLocalMode(TLM);
- GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
- GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
- GA->setUnnamedAddr(UnnamedAddr);
- maybeSetDSOLocal(DSOLocal, *GA);
+ GV = GI.get();
+ }
+ GV->setThreadLocalMode(TLM);
+ GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+ GV->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
+ GV->setUnnamedAddr(UnnamedAddr);
+ maybeSetDSOLocal(DSOLocal, *GV);
// At this point we've parsed everything except for the IndirectSymbolAttrs.
// Now parse them if there are any.
@@ -1036,7 +1043,7 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
if (Lex.getKind() == lltok::kw_partition) {
Lex.Lex();
- GA->setPartition(Lex.getStrVal());
+ GV->setPartition(Lex.getStrVal());
if (parseToken(lltok::StringConstant, "expected partition string"))
return true;
} else {
@@ -1045,30 +1052,27 @@ bool LLParser::parseIndirectSymbol(const std::string &Name, LocTy NameLoc,
}
if (Name.empty())
- NumberedVals.push_back(GA.get());
+ NumberedVals.push_back(GV);
if (GVal) {
// Verify that types agree.
- if (GVal->getType() != GA->getType())
+ if (GVal->getType() != GV->getType())
return error(
ExplicitTypeLoc,
"forward reference and definition of alias have different types");
// If they agree, just RAUW the old value with the alias and remove the
// forward ref info.
- GVal->replaceAllUsesWith(GA.get());
+ GVal->replaceAllUsesWith(GV);
GVal->eraseFromParent();
}
// Insert into the module, we know its name won't collide now.
if (IsAlias)
- M->getAliasList().push_back(cast<GlobalAlias>(GA.get()));
+ M->getAliasList().push_back(GA.release());
else
- M->getIFuncList().push_back(cast<GlobalIFunc>(GA.get()));
- assert(GA->getName() == Name && "Should not be a name conflict!");
-
- // The module owns this now
- GA.release();
+ M->getIFuncList().push_back(GI.release());
+ assert(GV->getName() == Name && "Should not be a name conflict!");
return false;
}
@@ -1408,14 +1412,10 @@ static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy) {
}
Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
- Value *Val, bool IsCall) {
+ Value *Val) {
Type *ValTy = Val->getType();
if (ValTy == Ty)
return Val;
- // For calls, we also allow opaque pointers.
- if (IsCall && ValTy == PointerType::get(Ty->getContext(),
- Ty->getPointerAddressSpace()))
- return Val;
if (Ty->isLabelTy())
error(Loc, "'" + Name + "' is not a basic block");
else
@@ -1429,7 +1429,7 @@ Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty,
- LocTy Loc, bool IsCall) {
+ LocTy Loc) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
error(Loc, "global variable reference must have pointer type");
@@ -1451,7 +1451,7 @@ GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
return cast_or_null<GlobalValue>(
- checkValidVariableType(Loc, "@" + Name, Ty, Val, IsCall));
+ checkValidVariableType(Loc, "@" + Name, Ty, Val));
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal = createGlobalFwdRef(M, PTy);
@@ -1459,8 +1459,7 @@ GlobalValue *LLParser::getGlobalVal(const std::string &Name, Type *Ty,
return FwdVal;
}
-GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
- bool IsCall) {
+GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
if (!PTy) {
error(Loc, "global variable reference must have pointer type");
@@ -1480,7 +1479,7 @@ GlobalValue *LLParser::getGlobalVal(unsigned ID, Type *Ty, LocTy Loc,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
return cast_or_null<GlobalValue>(
- checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val, IsCall));
+ checkValidVariableType(Loc, "@" + Twine(ID), Ty, Val));
// Otherwise, create a new forward reference for this value and remember it.
GlobalValue *FwdVal = createGlobalFwdRef(M, PTy);
@@ -1936,7 +1935,7 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
if (!EatIfPresent(lltok::kw_align))
return false;
LocTy AlignLoc = Lex.getLoc();
- uint32_t Value = 0;
+ uint64_t Value = 0;
LocTy ParenLoc = Lex.getLoc();
bool HaveParens = false;
@@ -1945,13 +1944,13 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) {
HaveParens = true;
}
- if (parseUInt32(Value))
+ if (parseUInt64(Value))
return true;
if (HaveParens && !EatIfPresent(lltok::rparen))
return error(ParenLoc, "expected ')'");
- if (!isPowerOf2_32(Value))
+ if (!isPowerOf2_64(Value))
return error(AlignLoc, "alignment is not a power of two");
if (Value > Value::MaximumAlignment)
return error(AlignLoc, "huge alignments are not supported yet");
@@ -2221,6 +2220,26 @@ bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
// Type ::= 'float' | 'void' (etc)
Result = Lex.getTyVal();
Lex.Lex();
+
+ // Handle "ptr" opaque pointer type.
+ //
+ // Type ::= ptr ('addrspace' '(' uint32 ')')?
+ if (Result->isOpaquePointerTy()) {
+ unsigned AddrSpace;
+ if (parseOptionalAddrSpace(AddrSpace))
+ return true;
+ Result = PointerType::get(getContext(), AddrSpace);
+
+ // Give a nice error for 'ptr*'.
+ if (Lex.getKind() == lltok::star)
+ return tokError("ptr* is invalid - use ptr instead");
+
+ // Fall through to parsing the type suffixes only if this 'ptr' is a
+ // function return. Otherwise, return success, implicitly rejecting other
+ // suffixes.
+ if (Lex.getKind() != lltok::lparen)
+ return false;
+ }
break;
case lltok::lbrace:
// Type ::= StructType
@@ -2274,26 +2293,6 @@ bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
}
}
- // Handle (explicit) opaque pointer types (not --force-opaque-pointers).
- //
- // Type ::= ptr ('addrspace' '(' uint32 ')')?
- if (Result->isOpaquePointerTy()) {
- unsigned AddrSpace;
- if (parseOptionalAddrSpace(AddrSpace))
- return true;
- Result = PointerType::get(getContext(), AddrSpace);
-
- // Give a nice error for 'ptr*'.
- if (Lex.getKind() == lltok::star)
- return tokError("ptr* is invalid - use ptr instead");
-
- // Fall through to parsing the type suffixes only if this 'ptr' is a
- // function return. Otherwise, return success, implicitly rejecting other
- // suffixes.
- if (Lex.getKind() != lltok::lparen)
- return false;
- }
-
// parse the type suffixes.
while (true) {
switch (Lex.getKind()) {
@@ -2798,7 +2797,7 @@ bool LLParser::PerFunctionState::finishFunction() {
/// forward reference record if needed. This can return null if the value
/// exists but does not have the right type.
Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
- LocTy Loc, bool IsCall) {
+ LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = F.getValueSymbolTable()->lookup(Name);
@@ -2812,7 +2811,7 @@ Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
- return P.checkValidVariableType(Loc, "%" + Name, Ty, Val, IsCall);
+ return P.checkValidVariableType(Loc, "%" + Name, Ty, Val);
// Don't make placeholders with invalid type.
if (!Ty->isFirstClassType()) {
@@ -2832,8 +2831,7 @@ Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
return FwdVal;
}
-Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc,
- bool IsCall) {
+Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc) {
// Look this name up in the normal function symbol table.
Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
@@ -2847,7 +2845,7 @@ Value *LLParser::PerFunctionState::getVal(unsigned ID, Type *Ty, LocTy Loc,
// If we have the value in the symbol table or fwd-ref table, return it.
if (Val)
- return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val, IsCall);
+ return P.checkValidVariableType(Loc, "%" + Twine(ID), Ty, Val);
if (!Ty->isFirstClassType()) {
P.error(Loc, "invalid use of a non-first-class type");
@@ -2934,12 +2932,12 @@ bool LLParser::PerFunctionState::setInstName(int NameID,
BasicBlock *LLParser::PerFunctionState::getBB(const std::string &Name,
LocTy Loc) {
return dyn_cast_or_null<BasicBlock>(
- getVal(Name, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
+ getVal(Name, Type::getLabelTy(F.getContext()), Loc));
}
BasicBlock *LLParser::PerFunctionState::getBB(unsigned ID, LocTy Loc) {
return dyn_cast_or_null<BasicBlock>(
- getVal(ID, Type::getLabelTy(F.getContext()), Loc, /*IsCall=*/false));
+ getVal(ID, Type::getLabelTy(F.getContext()), Loc));
}
/// defineBB - Define the specified basic block, which is either named or
@@ -3652,7 +3650,7 @@ bool LLParser::parseGlobalValue(Type *Ty, Constant *&C) {
ValID ID;
Value *V = nullptr;
bool Parsed = parseValID(ID, /*PFS=*/nullptr, Ty) ||
- convertValIDToValue(Ty, ID, V, nullptr, /*IsCall=*/false);
+ convertValIDToValue(Ty, ID, V, nullptr);
if (V && !(C = dyn_cast<Constant>(V)))
return error(ID.Loc, "global values must be constants");
return Parsed;
@@ -3876,10 +3874,6 @@ struct MDField : public MDFieldImpl<Metadata *> {
MDField(bool AllowNull = true) : ImplTy(nullptr), AllowNull(AllowNull) {}
};
-struct MDConstant : public MDFieldImpl<ConstantAsMetadata *> {
- MDConstant() : ImplTy(nullptr) {}
-};
-
struct MDStringField : public MDFieldImpl<MDString *> {
bool AllowEmpty;
MDStringField(bool AllowEmpty = true)
@@ -3914,22 +3908,6 @@ struct MDSignedOrMDField : MDEitherFieldImpl<MDSignedField, MDField> {
}
};
-struct MDSignedOrUnsignedField
- : MDEitherFieldImpl<MDSignedField, MDUnsignedField> {
- MDSignedOrUnsignedField() : ImplTy(MDSignedField(0), MDUnsignedField(0)) {}
-
- bool isMDSignedField() const { return WhatIs == IsTypeA; }
- bool isMDUnsignedField() const { return WhatIs == IsTypeB; }
- int64_t getMDSignedValue() const {
- assert(isMDSignedField() && "Wrong field type");
- return A.Val;
- }
- uint64_t getMDUnsignedValue() const {
- assert(isMDUnsignedField() && "Wrong field type");
- return B.Val;
- }
-};
-
} // end anonymous namespace
namespace llvm {
@@ -4578,7 +4556,8 @@ bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) {
OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX)); \
OPTIONAL(flags, DIFlagField, ); \
OPTIONAL(extraData, MDField, ); \
- OPTIONAL(dwarfAddressSpace, MDUnsignedField, (UINT32_MAX, UINT32_MAX));
+ OPTIONAL(dwarfAddressSpace, MDUnsignedField, (UINT32_MAX, UINT32_MAX)); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4590,7 +4569,7 @@ bool LLParser::parseDIDerivedType(MDNode *&Result, bool IsDistinct) {
(Context, tag.Val, name.Val, file.Val, line.Val,
scope.Val, baseType.Val, size.Val, align.Val,
offset.Val, DWARFAddressSpace, flags.Val,
- extraData.Val));
+ extraData.Val, annotations.Val));
return false;
}
@@ -4615,7 +4594,8 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) {
OPTIONAL(dataLocation, MDField, ); \
OPTIONAL(associated, MDField, ); \
OPTIONAL(allocated, MDField, ); \
- OPTIONAL(rank, MDSignedOrMDField, );
+ OPTIONAL(rank, MDSignedOrMDField, ); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4633,7 +4613,7 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) {
scope.Val, baseType.Val, size.Val, align.Val, offset.Val, flags.Val,
elements.Val, runtimeLang.Val, vtableHolder.Val, templateParams.Val,
discriminator.Val, dataLocation.Val, associated.Val, allocated.Val,
- Rank)) {
+ Rank, annotations.Val)) {
Result = CT;
return false;
}
@@ -4645,8 +4625,8 @@ bool LLParser::parseDICompositeType(MDNode *&Result, bool IsDistinct) {
(Context, tag.Val, name.Val, file.Val, line.Val, scope.Val, baseType.Val,
size.Val, align.Val, offset.Val, flags.Val, elements.Val,
runtimeLang.Val, vtableHolder.Val, templateParams.Val, identifier.Val,
- discriminator.Val, dataLocation.Val, associated.Val, allocated.Val,
- Rank));
+ discriminator.Val, dataLocation.Val, associated.Val, allocated.Val, Rank,
+ annotations.Val));
return false;
}
@@ -4746,7 +4726,8 @@ bool LLParser::parseDICompileUnit(MDNode *&Result, bool IsDistinct) {
/// virtuality: DW_VIRTUALTIY_pure_virtual,
/// virtualIndex: 10, thisAdjustment: 4, flags: 11,
/// spFlags: 10, isOptimized: false, templateParams: !4,
-/// declaration: !5, retainedNodes: !6, thrownTypes: !7)
+/// declaration: !5, retainedNodes: !6, thrownTypes: !7,
+/// annotations: !8)
bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
auto Loc = Lex.getLoc();
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
@@ -4770,7 +4751,8 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
OPTIONAL(templateParams, MDField, ); \
OPTIONAL(declaration, MDField, ); \
OPTIONAL(retainedNodes, MDField, ); \
- OPTIONAL(thrownTypes, MDField, );
+ OPTIONAL(thrownTypes, MDField, ); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4789,7 +4771,7 @@ bool LLParser::parseDISubprogram(MDNode *&Result, bool IsDistinct) {
(Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
type.Val, scopeLine.Val, containingType.Val, virtualIndex.Val,
thisAdjustment.Val, flags.Val, SPFlags, unit.Val, templateParams.Val,
- declaration.Val, retainedNodes.Val, thrownTypes.Val));
+ declaration.Val, retainedNodes.Val, thrownTypes.Val, annotations.Val));
return false;
}
@@ -4966,7 +4948,8 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
OPTIONAL(isDefinition, MDBoolField, (true)); \
OPTIONAL(templateParams, MDField, ); \
OPTIONAL(declaration, MDField, ); \
- OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
+ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
@@ -4974,7 +4957,8 @@ bool LLParser::parseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
GET_OR_DISTINCT(DIGlobalVariable,
(Context, scope.Val, name.Val, linkageName.Val, file.Val,
line.Val, type.Val, isLocal.Val, isDefinition.Val,
- declaration.Val, templateParams.Val, align.Val));
+ declaration.Val, templateParams.Val, align.Val,
+ annotations.Val));
return false;
}
@@ -4994,13 +4978,15 @@ bool LLParser::parseDILocalVariable(MDNode *&Result, bool IsDistinct) {
OPTIONAL(line, LineField, ); \
OPTIONAL(type, MDField, ); \
OPTIONAL(flags, DIFlagField, ); \
- OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
+ OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX)); \
+ OPTIONAL(annotations, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
Result = GET_OR_DISTINCT(DILocalVariable,
(Context, scope.Val, name.Val, file.Val, line.Val,
- type.Val, arg.Val, flags.Val, align.Val));
+ type.Val, arg.Val, flags.Val, align.Val,
+ annotations.Val));
return false;
}
@@ -5136,7 +5122,7 @@ bool LLParser::parseDIObjCProperty(MDNode *&Result, bool IsDistinct) {
/// parseDIImportedEntity:
/// ::= !DIImportedEntity(tag: DW_TAG_imported_module, scope: !0, entity: !1,
-/// line: 7, name: "foo")
+/// line: 7, name: "foo", elements: !2)
bool LLParser::parseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \
REQUIRED(tag, DwarfTagField, ); \
@@ -5144,13 +5130,14 @@ bool LLParser::parseDIImportedEntity(MDNode *&Result, bool IsDistinct) {
OPTIONAL(entity, MDField, ); \
OPTIONAL(file, MDField, ); \
OPTIONAL(line, LineField, ); \
- OPTIONAL(name, MDStringField, );
+ OPTIONAL(name, MDStringField, ); \
+ OPTIONAL(elements, MDField, );
PARSE_MD_FIELDS();
#undef VISIT_MD_FIELDS
- Result = GET_OR_DISTINCT(
- DIImportedEntity,
- (Context, tag.Val, scope.Val, entity.Val, file.Val, line.Val, name.Val));
+ Result = GET_OR_DISTINCT(DIImportedEntity,
+ (Context, tag.Val, scope.Val, entity.Val, file.Val,
+ line.Val, name.Val, elements.Val));
return false;
}
@@ -5254,7 +5241,7 @@ bool LLParser::parseMetadata(Metadata *&MD, PerFunctionState *PFS) {
//===----------------------------------------------------------------------===//
bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
- PerFunctionState *PFS, bool IsCall) {
+ PerFunctionState *PFS) {
if (Ty->isFunctionTy())
return error(ID.Loc, "functions are not values, refer to them as pointers");
@@ -5262,12 +5249,12 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
case ValID::t_LocalID:
if (!PFS)
return error(ID.Loc, "invalid use of function-local name");
- V = PFS->getVal(ID.UIntVal, Ty, ID.Loc, IsCall);
+ V = PFS->getVal(ID.UIntVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_LocalName:
if (!PFS)
return error(ID.Loc, "invalid use of function-local name");
- V = PFS->getVal(ID.StrVal, Ty, ID.Loc, IsCall);
+ V = PFS->getVal(ID.StrVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_InlineAsm: {
if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2))
@@ -5278,10 +5265,10 @@ bool LLParser::convertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return false;
}
case ValID::t_GlobalName:
- V = getGlobalVal(ID.StrVal, Ty, ID.Loc, IsCall);
+ V = getGlobalVal(ID.StrVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_GlobalID:
- V = getGlobalVal(ID.UIntVal, Ty, ID.Loc, IsCall);
+ V = getGlobalVal(ID.UIntVal, Ty, ID.Loc);
return V == nullptr;
case ValID::t_APSInt:
if (!Ty->isIntegerTy())
@@ -5405,7 +5392,7 @@ bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
case ValID::t_ConstantStruct:
case ValID::t_PackedConstantStruct: {
Value *V;
- if (convertValIDToValue(Ty, ID, V, /*PFS=*/nullptr, /*IsCall=*/false))
+ if (convertValIDToValue(Ty, ID, V, /*PFS=*/nullptr))
return true;
assert(isa<Constant>(V) && "Expected a constant value");
C = cast<Constant>(V);
@@ -5423,7 +5410,7 @@ bool LLParser::parseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
V = nullptr;
ValID ID;
return parseValID(ID, PFS, Ty) ||
- convertValIDToValue(Ty, ID, V, PFS, /*IsCall=*/false);
+ convertValIDToValue(Ty, ID, V, PFS);
}
bool LLParser::parseTypeAndValue(Value *&V, PerFunctionState *PFS) {
@@ -5571,7 +5558,7 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
AttributeList::get(Context, AttributeSet::get(Context, FuncAttrs),
AttributeSet::get(Context, RetAttrs), Attrs);
- if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy())
+ if (PAL.hasParamAttr(0, Attribute::StructRet) && !RetType->isVoidTy())
return error(RetTypeLoc, "functions with 'sret' argument must return void");
FunctionType *FT = FunctionType::get(RetType, ParamTypeList, IsVarArg);
@@ -5718,7 +5705,7 @@ bool LLParser::PerFunctionState::resolveForwardRefBlockAddresses() {
Value *ResolvedVal = BlockAddress::get(&F, BB);
ResolvedVal = P.checkValidVariableType(BBID.Loc, BBID.StrVal, GV->getType(),
- ResolvedVal, false);
+ ResolvedVal);
if (!ResolvedVal)
return true;
GV->replaceAllUsesWith(ResolvedVal);
@@ -6287,7 +6274,7 @@ bool LLParser::parseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// Look up the callee.
Value *Callee;
if (convertValIDToValue(PointerType::get(Ty, InvokeAddrSpace), CalleeID,
- Callee, &PFS, /*IsCall=*/true))
+ Callee, &PFS))
return true;
// Set up the Attribute for the function.
@@ -6612,8 +6599,7 @@ bool LLParser::parseCallBr(Instruction *&Inst, PerFunctionState &PFS) {
// Look up the callee.
Value *Callee;
- if (convertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS,
- /*IsCall=*/true))
+ if (convertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
return true;
// Set up the Attribute for the function.
@@ -7019,7 +7005,7 @@ bool LLParser::parseCall(Instruction *&Inst, PerFunctionState &PFS,
// Look up the callee.
Value *Callee;
if (convertValIDToValue(PointerType::get(Ty, CallAddrSpace), CalleeID, Callee,
- &PFS, /*IsCall=*/true))
+ &PFS))
return true;
// Set up the Attribute for the function.
@@ -8543,12 +8529,15 @@ bool LLParser::parseFlag(unsigned &Val) {
/// [',' 'returnDoesNotAlias' ':' Flag]? ')'
/// [',' 'noInline' ':' Flag]? ')'
/// [',' 'alwaysInline' ':' Flag]? ')'
+/// [',' 'noUnwind' ':' Flag]? ')'
+/// [',' 'mayThrow' ':' Flag]? ')'
+/// [',' 'hasUnknownCall' ':' Flag]? ')'
bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
assert(Lex.getKind() == lltok::kw_funcFlags);
Lex.Lex();
- if (parseToken(lltok::colon, "expected ':' in funcFlags") |
+ if (parseToken(lltok::colon, "expected ':' in funcFlags") ||
parseToken(lltok::lparen, "expected '(' in funcFlags"))
return true;
@@ -8591,6 +8580,24 @@ bool LLParser::parseOptionalFFlags(FunctionSummary::FFlags &FFlags) {
return true;
FFlags.AlwaysInline = Val;
break;
+ case lltok::kw_noUnwind:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
+ return true;
+ FFlags.NoUnwind = Val;
+ break;
+ case lltok::kw_mayThrow:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
+ return true;
+ FFlags.MayThrow = Val;
+ break;
+ case lltok::kw_hasUnknownCall:
+ Lex.Lex();
+ if (parseToken(lltok::colon, "expected ':'") || parseFlag(Val))
+ return true;
+ FFlags.HasUnknownCall = Val;
+ break;
default:
return error(Lex.getLoc(), "expected function flag type");
}
@@ -8610,7 +8617,7 @@ bool LLParser::parseOptionalCalls(std::vector<FunctionSummary::EdgeTy> &Calls) {
assert(Lex.getKind() == lltok::kw_calls);
Lex.Lex();
- if (parseToken(lltok::colon, "expected ':' in calls") |
+ if (parseToken(lltok::colon, "expected ':' in calls") ||
parseToken(lltok::lparen, "expected '(' in calls"))
return true;
@@ -8702,7 +8709,7 @@ bool LLParser::parseOptionalVTableFuncs(VTableFuncList &VTableFuncs) {
assert(Lex.getKind() == lltok::kw_vTableFuncs);
Lex.Lex();
- if (parseToken(lltok::colon, "expected ':' in vTableFuncs") |
+ if (parseToken(lltok::colon, "expected ':' in vTableFuncs") ||
parseToken(lltok::lparen, "expected '(' in vTableFuncs"))
return true;
diff --git a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
index 1d9c81ef8ebc..3de3dccce0c6 100644
--- a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
+++ b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp
@@ -1,9 +1,8 @@
//===-- MsgPackDocumentYAML.cpp - MsgPack Document YAML interface -------*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index f577d3886e01..2723105b092f 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -529,10 +529,9 @@ Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent,
if (R.AtEndOfStream())
return reportError("bad length");
- Expected<uint32_t> MaybeSize = R.ReadVBR(6);
- if (!MaybeSize)
- return MaybeSize.takeError();
- uint32_t Size = MaybeSize.get();
+ uint32_t Size;
+ if (Error E = R.ReadVBR(6).moveInto(Size))
+ return E;
if (Strings.size() < Size)
return reportError("truncated chars");
@@ -555,11 +554,8 @@ BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer,
Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
Optional<StringRef> CheckHash) {
- Expected<CurStreamTypeType> MaybeType = analyzeHeader(O, Stream);
- if (!MaybeType)
- return MaybeType.takeError();
- else
- CurStreamType = *MaybeType;
+ if (Error E = analyzeHeader(O, Stream).moveInto(CurStreamType))
+ return E;
Stream.setBlockInfo(&BlockInfo);
@@ -567,9 +563,8 @@ Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
// The block info must be a top-level block.
if (BlockInfoStream) {
BitstreamCursor BlockInfoCursor(*BlockInfoStream);
- Expected<CurStreamTypeType> H = analyzeHeader(O, BlockInfoCursor);
- if (!H)
- return H.takeError();
+ if (Error E = analyzeHeader(O, BlockInfoCursor).takeError())
+ return E;
while (!BlockInfoCursor.AtEndOfStream()) {
Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode();
@@ -582,12 +577,11 @@ Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O,
if (!MaybeBlockID)
return MaybeBlockID.takeError();
if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) {
- Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
- BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
- if (!MaybeNewBlockInfo)
- return MaybeNewBlockInfo.takeError();
- Optional<BitstreamBlockInfo> NewBlockInfo =
- std::move(MaybeNewBlockInfo.get());
+ Optional<BitstreamBlockInfo> NewBlockInfo;
+ if (Error E =
+ BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
+ .moveInto(NewBlockInfo))
+ return E;
if (!NewBlockInfo)
return reportError("Malformed BlockInfoBlock in block info file");
BlockInfo = std::move(*NewBlockInfo);
@@ -744,22 +738,20 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
// BLOCKINFO is a special part of the stream.
bool DumpRecords = O.hasValue();
if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
- if (O)
+ if (O && !O->DumpBlockinfo)
O->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
- Expected<Optional<BitstreamBlockInfo>> MaybeNewBlockInfo =
- Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
- if (!MaybeNewBlockInfo)
- return MaybeNewBlockInfo.takeError();
- Optional<BitstreamBlockInfo> NewBlockInfo =
- std::move(MaybeNewBlockInfo.get());
+ Optional<BitstreamBlockInfo> NewBlockInfo;
+ if (Error E = Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true)
+ .moveInto(NewBlockInfo))
+ return E;
if (!NewBlockInfo)
return reportError("Malformed BlockInfoBlock");
BlockInfo = std::move(*NewBlockInfo);
if (Error Err = Stream.JumpToBit(BlockBitStart))
return Err;
// It's not really interesting to dump the contents of the blockinfo
- // block.
- DumpRecords = false;
+ // block, so only do it if the user explicitly requests it.
+ DumpRecords = O && O->DumpBlockinfo;
}
unsigned NumWords = 0;
@@ -796,11 +788,10 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
uint64_t RecordStartBit = Stream.GetCurrentBitNo();
- Expected<BitstreamEntry> MaybeEntry =
- Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs)
+ .moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -847,10 +838,9 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
StringRef Blob;
uint64_t CurrentRecordPos = Stream.GetCurrentBitNo();
- Expected<unsigned> MaybeCode = Stream.readRecord(Entry.ID, Record, &Blob);
- if (!MaybeCode)
- return MaybeCode.takeError();
- unsigned Code = MaybeCode.get();
+ unsigned Code;
+ if (Error E = Stream.readRecord(Entry.ID, Record, &Blob).moveInto(Code))
+ return E;
// Increment the # occurrences of this code.
if (BlockStats.CodeFreq.size() <= Code)
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index d5e366c21f7d..c568461e62b0 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -41,7 +41,6 @@
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -180,10 +179,8 @@ static Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
while (true) {
BitstreamEntry Entry;
- if (Expected<BitstreamEntry> Res = Stream.advance())
- Entry = Res.get();
- else
- return Res.takeError();
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
default:
@@ -227,10 +224,8 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
return "";
BitstreamEntry Entry;
- if (Expected<BitstreamEntry> Res = Stream.advance())
- Entry = std::move(Res.get());
- else
- return Res.takeError();
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::EndBlock:
@@ -246,10 +241,9 @@ static Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
return std::move(Err);
continue;
case BitstreamEntry::Record:
- if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
- continue;
- else
- return Skipped.takeError();
+ if (Error E = Stream.skipRecord(Entry.ID).takeError())
+ return std::move(E);
+ continue;
}
}
}
@@ -306,10 +300,8 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
// need to understand them all.
while (true) {
BitstreamEntry Entry;
- if (Expected<BitstreamEntry> Res = Stream.advance())
- Entry = std::move(Res.get());
- else
- return Res.takeError();
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::Error:
@@ -327,10 +319,9 @@ static Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
continue;
case BitstreamEntry::Record:
- if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID))
- continue;
- else
- return Skipped.takeError();
+ if (Error E = Stream.skipRecord(Entry.ID).takeError())
+ return std::move(E);
+ continue;
}
}
}
@@ -500,10 +491,15 @@ class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
SmallVector<Instruction *, 64> InstructionList;
std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInits;
- std::vector<std::pair<GlobalIndirectSymbol *, unsigned>> IndirectSymbolInits;
- std::vector<std::pair<Function *, unsigned>> FunctionPrefixes;
- std::vector<std::pair<Function *, unsigned>> FunctionPrologues;
- std::vector<std::pair<Function *, unsigned>> FunctionPersonalityFns;
+ std::vector<std::pair<GlobalValue *, unsigned>> IndirectSymbolInits;
+
+ struct FunctionOperandInfo {
+ Function *F;
+ unsigned PersonalityFn;
+ unsigned Prefix;
+ unsigned Prologue;
+ };
+ std::vector<FunctionOperandInfo> FunctionOperands;
/// The set of attributes by index. Index zero in the file is for null, and
/// is thus not represented here. As such all indices are off by one.
@@ -933,6 +929,9 @@ static FunctionSummary::FFlags getDecodedFFlags(uint64_t RawFlags) {
Flags.ReturnDoesNotAlias = (RawFlags >> 3) & 0x1;
Flags.NoInline = (RawFlags >> 4) & 0x1;
Flags.AlwaysInline = (RawFlags >> 5) & 0x1;
+ Flags.NoUnwind = (RawFlags >> 6) & 0x1;
+ Flags.MayThrow = (RawFlags >> 7) & 0x1;
+ Flags.HasUnknownCall = (RawFlags >> 8) & 0x1;
return Flags;
}
@@ -1388,6 +1387,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::Cold;
case bitc::ATTR_KIND_CONVERGENT:
return Attribute::Convergent;
+ case bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION:
+ return Attribute::DisableSanitizerInstrumentation;
case bitc::ATTR_KIND_ELEMENTTYPE:
return Attribute::ElementType;
case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY:
@@ -1785,6 +1786,9 @@ Error BitcodeReader::parseTypeTableBody() {
case bitc::TYPE_CODE_OPAQUE_POINTER: { // OPAQUE_POINTER: [addrspace]
if (Record.size() != 1)
return error("Invalid record");
+ if (Context.supportsTypedPointers())
+ return error(
+ "Opaque pointers are only supported in -opaque-pointers mode");
unsigned AddressSpace = Record[0];
ResultTy = PointerType::get(Context, AddressSpace);
break;
@@ -1913,7 +1917,7 @@ Error BitcodeReader::parseTypeTableBody() {
if (Record[0] == 0)
return error("Invalid vector length");
ResultTy = getTypeByID(Record[1]);
- if (!ResultTy || !StructType::isValidElementType(ResultTy))
+ if (!ResultTy || !VectorType::isValidElementType(ResultTy))
return error("Invalid type");
bool Scalable = Record.size() > 2 ? Record[2] : false;
ResultTy = VectorType::get(ResultTy, Record[0], Scalable);
@@ -2240,17 +2244,12 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
/// Resolve all of the initializers for global values and aliases that we can.
Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
std::vector<std::pair<GlobalVariable *, unsigned>> GlobalInitWorklist;
- std::vector<std::pair<GlobalIndirectSymbol *, unsigned>>
- IndirectSymbolInitWorklist;
- std::vector<std::pair<Function *, unsigned>> FunctionPrefixWorklist;
- std::vector<std::pair<Function *, unsigned>> FunctionPrologueWorklist;
- std::vector<std::pair<Function *, unsigned>> FunctionPersonalityFnWorklist;
+ std::vector<std::pair<GlobalValue *, unsigned>> IndirectSymbolInitWorklist;
+ std::vector<FunctionOperandInfo> FunctionOperandWorklist;
GlobalInitWorklist.swap(GlobalInits);
IndirectSymbolInitWorklist.swap(IndirectSymbolInits);
- FunctionPrefixWorklist.swap(FunctionPrefixes);
- FunctionPrologueWorklist.swap(FunctionPrologues);
- FunctionPersonalityFnWorklist.swap(FunctionPersonalityFns);
+ FunctionOperandWorklist.swap(FunctionOperands);
while (!GlobalInitWorklist.empty()) {
unsigned ValID = GlobalInitWorklist.back().second;
@@ -2274,51 +2273,59 @@ Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]);
if (!C)
return error("Expected a constant");
- GlobalIndirectSymbol *GIS = IndirectSymbolInitWorklist.back().first;
- if (isa<GlobalAlias>(GIS) && C->getType() != GIS->getType())
- return error("Alias and aliasee types don't match");
- GIS->setIndirectSymbol(C);
+ GlobalValue *GV = IndirectSymbolInitWorklist.back().first;
+ if (auto *GA = dyn_cast<GlobalAlias>(GV)) {
+ if (C->getType() != GV->getType())
+ return error("Alias and aliasee types don't match");
+ GA->setAliasee(C);
+ } else if (auto *GI = dyn_cast<GlobalIFunc>(GV)) {
+ Type *ResolverFTy =
+ GlobalIFunc::getResolverFunctionType(GI->getValueType());
+ // Transparently fix up the type for compatiblity with older bitcode
+ GI->setResolver(
+ ConstantExpr::getBitCast(C, ResolverFTy->getPointerTo()));
+ } else {
+ return error("Expected an alias or an ifunc");
+ }
}
IndirectSymbolInitWorklist.pop_back();
}
- while (!FunctionPrefixWorklist.empty()) {
- unsigned ValID = FunctionPrefixWorklist.back().second;
- if (ValID >= ValueList.size()) {
- FunctionPrefixes.push_back(FunctionPrefixWorklist.back());
- } else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- FunctionPrefixWorklist.back().first->setPrefixData(C);
- else
- return error("Expected a constant");
+ while (!FunctionOperandWorklist.empty()) {
+ FunctionOperandInfo &Info = FunctionOperandWorklist.back();
+ if (Info.PersonalityFn) {
+ unsigned ValID = Info.PersonalityFn - 1;
+ if (ValID < ValueList.size()) {
+ if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
+ Info.F->setPersonalityFn(C);
+ else
+ return error("Expected a constant");
+ Info.PersonalityFn = 0;
+ }
}
- FunctionPrefixWorklist.pop_back();
- }
-
- while (!FunctionPrologueWorklist.empty()) {
- unsigned ValID = FunctionPrologueWorklist.back().second;
- if (ValID >= ValueList.size()) {
- FunctionPrologues.push_back(FunctionPrologueWorklist.back());
- } else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- FunctionPrologueWorklist.back().first->setPrologueData(C);
- else
- return error("Expected a constant");
+ if (Info.Prefix) {
+ unsigned ValID = Info.Prefix - 1;
+ if (ValID < ValueList.size()) {
+ if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
+ Info.F->setPrefixData(C);
+ else
+ return error("Expected a constant");
+ Info.Prefix = 0;
+ }
}
- FunctionPrologueWorklist.pop_back();
- }
-
- while (!FunctionPersonalityFnWorklist.empty()) {
- unsigned ValID = FunctionPersonalityFnWorklist.back().second;
- if (ValID >= ValueList.size()) {
- FunctionPersonalityFns.push_back(FunctionPersonalityFnWorklist.back());
- } else {
- if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- FunctionPersonalityFnWorklist.back().first->setPersonalityFn(C);
- else
- return error("Expected a constant");
+ if (Info.Prologue) {
+ unsigned ValID = Info.Prologue - 1;
+ if (ValID < ValueList.size()) {
+ if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
+ Info.F->setPrologueData(C);
+ else
+ return error("Expected a constant");
+ Info.Prologue = 0;
+ }
}
- FunctionPersonalityFnWorklist.pop_back();
+ if (Info.PersonalityFn || Info.Prefix || Info.Prologue)
+ FunctionOperands.push_back(Info);
+ FunctionOperandWorklist.pop_back();
}
return Error::success();
@@ -2351,6 +2358,15 @@ Error BitcodeReader::parseConstants() {
unsigned CstNo;
};
std::vector<DelayedShufTy> DelayedShuffles;
+ struct DelayedSelTy {
+ Type *OpTy;
+ uint64_t Op0Idx;
+ uint64_t Op1Idx;
+ uint64_t Op2Idx;
+ unsigned CstNo;
+ };
+ std::vector<DelayedSelTy> DelayedSelectors;
+
while (true) {
Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
if (!MaybeEntry)
@@ -2387,6 +2403,27 @@ Error BitcodeReader::parseConstants() {
Value *V = ConstantExpr::getShuffleVector(Op0, Op1, Mask);
ValueList.assignValue(V, CstNo);
}
+ for (auto &DelayedSelector : DelayedSelectors) {
+ Type *OpTy = DelayedSelector.OpTy;
+ Type *SelectorTy = Type::getInt1Ty(Context);
+ uint64_t Op0Idx = DelayedSelector.Op0Idx;
+ uint64_t Op1Idx = DelayedSelector.Op1Idx;
+ uint64_t Op2Idx = DelayedSelector.Op2Idx;
+ uint64_t CstNo = DelayedSelector.CstNo;
+ Constant *Op1 = ValueList.getConstantFwdRef(Op1Idx, OpTy);
+ Constant *Op2 = ValueList.getConstantFwdRef(Op2Idx, OpTy);
+ // The selector might be an i1 or an <n x i1>
+ // Get the type from the ValueList before getting a forward ref.
+ if (VectorType *VTy = dyn_cast<VectorType>(OpTy)) {
+ Value *V = ValueList[Op0Idx];
+ assert(V);
+ if (SelectorTy != V->getType())
+ SelectorTy = VectorType::get(SelectorTy, VTy->getElementCount());
+ }
+ Constant *Op0 = ValueList.getConstantFwdRef(Op0Idx, SelectorTy);
+ Value *V = ConstantExpr::getSelect(Op0, Op1, Op2);
+ ValueList.assignValue(V, CstNo);
+ }
if (NextCstNo != ValueList.size())
return error("Invalid constant reference");
@@ -2683,21 +2720,11 @@ Error BitcodeReader::parseConstants() {
if (Record.size() < 3)
return error("Invalid record");
- Type *SelectorTy = Type::getInt1Ty(Context);
-
- // The selector might be an i1, an <n x i1>, or a <vscale x n x i1>
- // Get the type from the ValueList before getting a forward ref.
- if (VectorType *VTy = dyn_cast<VectorType>(CurTy))
- if (Value *V = ValueList[Record[0]])
- if (SelectorTy != V->getType())
- SelectorTy = VectorType::get(SelectorTy,
- VTy->getElementCount());
-
- V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
- SelectorTy),
- ValueList.getConstantFwdRef(Record[1],CurTy),
- ValueList.getConstantFwdRef(Record[2],CurTy));
- break;
+ DelayedSelectors.push_back(
+ {CurTy, Record[0], Record[1], Record[2], NextCstNo});
+ (void)ValueList.getConstantFwdRef(NextCstNo, CurTy);
+ ++NextCstNo;
+ continue;
}
case bitc::CST_CODE_CE_EXTRACTELT
: { // CE_EXTRACTELT: [opty, opval, opty, opval]
@@ -3091,8 +3118,7 @@ Error BitcodeReader::globalCleanup() {
// Force deallocation of memory for these vectors to favor the client that
// want lazy deserialization.
std::vector<std::pair<GlobalVariable *, unsigned>>().swap(GlobalInits);
- std::vector<std::pair<GlobalIndirectSymbol *, unsigned>>().swap(
- IndirectSymbolInits);
+ std::vector<std::pair<GlobalValue *, unsigned>>().swap(IndirectSymbolInits);
return Error::success();
}
@@ -3270,7 +3296,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
}
if (Record.size() > 12) {
- auto AS = getAttributes(Record[12]).getFnAttributes();
+ auto AS = getAttributes(Record[12]).getFnAttrs();
NewGV->setAttributes(AS);
}
@@ -3383,8 +3409,10 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
if (Record.size() > 9)
UnnamedAddr = getDecodedUnnamedAddrType(Record[9]);
Func->setUnnamedAddr(UnnamedAddr);
- if (Record.size() > 10 && Record[10] != 0)
- FunctionPrologues.push_back(std::make_pair(Func, Record[10] - 1));
+
+ FunctionOperandInfo OperandInfo = {Func, 0, 0, 0};
+ if (Record.size() > 10)
+ OperandInfo.Prologue = Record[10];
if (Record.size() > 11)
Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11]));
@@ -3401,11 +3429,11 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
Func->setComdat(reinterpret_cast<Comdat *>(1));
}
- if (Record.size() > 13 && Record[13] != 0)
- FunctionPrefixes.push_back(std::make_pair(Func, Record[13] - 1));
+ if (Record.size() > 13)
+ OperandInfo.Prefix = Record[13];
- if (Record.size() > 14 && Record[14] != 0)
- FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1));
+ if (Record.size() > 14)
+ OperandInfo.PersonalityFn = Record[14];
if (Record.size() > 15) {
Func->setDSOLocal(getDecodedDSOLocal(Record[15]));
@@ -3423,6 +3451,9 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
ValueList.push_back(Func);
+ if (OperandInfo.PersonalityFn || OperandInfo.Prefix || OperandInfo.Prologue)
+ FunctionOperands.push_back(OperandInfo);
+
// If this is a function with a body, remember the prototype we are
// creating now, so that we can match up the body with them later.
if (!isProto) {
@@ -3467,7 +3498,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
auto Val = Record[OpNum++];
auto Linkage = Record[OpNum++];
- GlobalIndirectSymbol *NewGA;
+ GlobalValue *NewGA;
if (BitCode == bitc::MODULE_CODE_ALIAS ||
BitCode == bitc::MODULE_CODE_ALIAS_OLD)
NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name,
@@ -4898,8 +4929,10 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
Type *OpTy = getTypeByID(Record[1]);
Value *Size = getFnValueByID(Record[2], OpTy);
MaybeAlign Align;
- if (Error Err =
- parseAlignmentValue(Bitfield::get<APV::Align>(Rec), Align)) {
+ uint64_t AlignExp =
+ Bitfield::get<APV::AlignLower>(Rec) |
+ (Bitfield::get<APV::AlignUpper>(Rec) << APV::AlignLower::Bits);
+ if (Error Err = parseAlignmentValue(AlignExp, Align)) {
return Err;
}
if (!Ty || !Size)
@@ -5505,21 +5538,16 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
// Upgrade any old intrinsic calls in the function.
for (auto &I : UpgradedIntrinsics) {
- for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
- UI != UE;) {
- User *U = *UI;
- ++UI;
+ for (User *U : llvm::make_early_inc_range(I.first->materialized_users()))
if (CallInst *CI = dyn_cast<CallInst>(U))
UpgradeIntrinsicCall(CI, I.second);
- }
}
// Update calls to the remangled intrinsics
for (auto &I : RemangledIntrinsics)
- for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
- UI != UE;)
+ for (User *U : llvm::make_early_inc_range(I.first->materialized_users()))
// Don't expect any other users than call sites
- cast<CallBase>(*UI++)->setCalledFunction(I.second);
+ cast<CallBase>(U)->setCalledFunction(I.second);
// Finish fn->subprogram upgrade for materialized functions.
if (DISubprogram *SP = MDLoader->lookupSubprogramForFunction(F))
@@ -5567,9 +5595,8 @@ Error BitcodeReader::materialize(GlobalValue *GV) {
// Remove incompatible attributes on function calls.
if (auto *CI = dyn_cast<CallBase>(&I)) {
- CI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(
- CI->getFunctionType()->getReturnType()));
+ CI->removeRetAttrs(AttributeFuncs::typeIncompatible(
+ CI->getFunctionType()->getReturnType()));
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ++ArgNo)
CI->removeParamAttrs(ArgNo, AttributeFuncs::typeIncompatible(
@@ -6742,10 +6769,9 @@ llvm::getBitcodeFileContents(MemoryBufferRef Buffer) {
continue;
}
case BitstreamEntry::Record:
- if (Expected<unsigned> StreamFailed = Stream.skipRecord(Entry.ID))
- continue;
- else
- return StreamFailed.takeError();
+ if (Error E = Stream.skipRecord(Entry.ID).takeError())
+ return std::move(E);
+ continue;
}
}
}
@@ -6768,12 +6794,9 @@ BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
if (IdentificationBit != -1ull) {
if (Error JumpFailed = Stream.JumpToBit(IdentificationBit))
return std::move(JumpFailed);
- Expected<std::string> ProducerIdentificationOrErr =
- readIdentificationBlock(Stream);
- if (!ProducerIdentificationOrErr)
- return ProducerIdentificationOrErr.takeError();
-
- ProducerIdentification = *ProducerIdentificationOrErr;
+ if (Error E =
+ readIdentificationBlock(Stream).moveInto(ProducerIdentification))
+ return std::move(E);
}
if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
@@ -6847,10 +6870,9 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
SmallVector<uint64_t, 64> Record;
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -6895,10 +6917,9 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
return std::move(Err);
while (true) {
- Expected<llvm::BitstreamEntry> MaybeEntry = Stream.advance();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- llvm::BitstreamEntry Entry = MaybeEntry.get();
+ llvm::BitstreamEntry Entry;
+ if (Error E = Stream.advance().moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::Error:
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 8493eb7a28b2..6df5a4a64d51 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -21,8 +21,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/AutoUpgrade.h"
@@ -40,7 +40,6 @@
#include "llvm/IR/GVMaterializer.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -363,7 +362,8 @@ class PlaceholderQueue {
public:
~PlaceholderQueue() {
- assert(empty() && "PlaceholderQueue hasn't been flushed before being destroyed");
+ assert(empty() &&
+ "PlaceholderQueue hasn't been flushed before being destroyed");
}
bool empty() const { return PHs.empty(); }
DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
@@ -546,7 +546,7 @@ class MetadataLoader::MetadataLoaderImpl {
if (auto *DDI = dyn_cast<DbgDeclareInst>(&I))
if (auto *DIExpr = DDI->getExpression())
if (DIExpr->startsWithDeref() &&
- dyn_cast_or_null<Argument>(DDI->getAddress())) {
+ isa_and_nonnull<Argument>(DDI->getAddress())) {
SmallVector<uint64_t, 8> Ops;
Ops.append(std::next(DIExpr->elements_begin()),
DIExpr->elements_end());
@@ -604,7 +604,7 @@ class MetadataLoader::MetadataLoaderImpl {
// If the expression is malformed, make sure we don't
// copy more elements than we should.
HistoricSize = std::min(SubExpr.size(), HistoricSize);
- ArrayRef<uint64_t> Args = SubExpr.slice(1, HistoricSize-1);
+ ArrayRef<uint64_t> Args = SubExpr.slice(1, HistoricSize - 1);
switch (SubExpr.front()) {
case dwarf::DW_OP_plus:
@@ -698,11 +698,12 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
// Get the abbrevs, and preload record positions to make them lazy-loadable.
while (true) {
uint64_t SavedPos = IndexCursor.GetCurrentBitNo();
- Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks(
- BitstreamCursor::AF_DontPopBlockAtEnd);
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E =
+ IndexCursor
+ .advanceSkippingSubblocks(BitstreamCursor::AF_DontPopBlockAtEnd)
+ .moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -715,10 +716,9 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() {
// The interesting case.
++NumMDRecordLoaded;
uint64_t CurrentPos = IndexCursor.GetCurrentBitNo();
- Expected<unsigned> MaybeCode = IndexCursor.skipRecord(Entry.ID);
- if (!MaybeCode)
- return MaybeCode.takeError();
- unsigned Code = MaybeCode.get();
+ unsigned Code;
+ if (Error E = IndexCursor.skipRecord(Entry.ID).moveInto(Code))
+ return std::move(E);
switch (Code) {
case bitc::METADATA_STRINGS: {
// Rewind and parse the strings.
@@ -905,11 +905,12 @@ Expected<bool> MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() {
if (Error Err = TempCursor.JumpToBit(GlobalDeclAttachmentPos))
return std::move(Err);
while (true) {
- Expected<BitstreamEntry> MaybeEntry = TempCursor.advanceSkippingSubblocks(
- BitstreamCursor::AF_DontPopBlockAtEnd);
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E =
+ TempCursor
+ .advanceSkippingSubblocks(BitstreamCursor::AF_DontPopBlockAtEnd)
+ .moveInto(Entry))
+ return std::move(E);
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1025,10 +1026,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
// Read all the records.
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -1081,22 +1081,22 @@ void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata(
if (Error Err = IndexCursor.JumpToBit(
GlobalMetadataBitPosIndex[ID - MDStringRef.size()]))
report_fatal_error("lazyLoadOneMetadata failed jumping: " +
- toString(std::move(Err)));
- Expected<BitstreamEntry> MaybeEntry = IndexCursor.advanceSkippingSubblocks();
- if (!MaybeEntry)
+ Twine(toString(std::move(Err))));
+ BitstreamEntry Entry;
+ if (Error E = IndexCursor.advanceSkippingSubblocks().moveInto(Entry))
// FIXME this drops the error on the floor.
report_fatal_error("lazyLoadOneMetadata failed advanceSkippingSubblocks: " +
- toString(MaybeEntry.takeError()));
- BitstreamEntry Entry = MaybeEntry.get();
+ Twine(toString(std::move(E))));
++NumMDRecordLoaded;
if (Expected<unsigned> MaybeCode =
IndexCursor.readRecord(Entry.ID, Record, &Blob)) {
if (Error Err =
parseOneMetadata(Record, MaybeCode.get(), Placeholders, Blob, ID))
report_fatal_error("Can't lazyload MD, parseOneMetadata: " +
- toString(std::move(Err)));
+ Twine(toString(std::move(Err))));
} else
- report_fatal_error("Can't lazyload MD: " + toString(MaybeCode.takeError()));
+ report_fatal_error("Can't lazyload MD: " +
+ Twine(toString(MaybeCode.takeError())));
}
/// Ensure that all forward-references and placeholders are resolved.
@@ -1193,10 +1193,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// Read name of the named metadata.
SmallString<8> Name(Record.begin(), Record.end());
Record.clear();
- Expected<unsigned> MaybeCode = Stream.ReadCode();
- if (!MaybeCode)
- return MaybeCode.takeError();
- Code = MaybeCode.get();
+ if (Error E = Stream.ReadCode().moveInto(Code))
+ return E;
++NumMDRecordLoaded;
if (Expected<unsigned> MaybeNextBitCode = Stream.readRecord(Code, Record)) {
@@ -1411,8 +1409,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
return error("Invalid record");
IsDistinct = Record[0];
- DINode::DIFlags Flags = (Record.size() > 6) ?
- static_cast<DINode::DIFlags>(Record[6]) : DINode::FlagZero;
+ DINode::DIFlags Flags = (Record.size() > 6)
+ ? static_cast<DINode::DIFlags>(Record[6])
+ : DINode::FlagZero;
MetadataList.assignValue(
GET_OR_DISTINCT(DIBasicType,
@@ -1437,7 +1436,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_DERIVED_TYPE: {
- if (Record.size() < 12 || Record.size() > 13)
+ if (Record.size() < 12 || Record.size() > 14)
return error("Invalid record");
// DWARF address space is encoded as N->getDWARFAddressSpace() + 1. 0 means
@@ -1446,6 +1445,10 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Record.size() > 12 && Record[12])
DWARFAddressSpace = Record[12] - 1;
+ Metadata *Annotations = nullptr;
+ if (Record.size() > 13 && Record[13])
+ Annotations = getMDOrNull(Record[13]);
+
IsDistinct = Record[0];
DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[10]);
MetadataList.assignValue(
@@ -1455,13 +1458,13 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
getDITypeRefOrNull(Record[5]),
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
Record[9], DWARFAddressSpace, Flags,
- getDITypeRefOrNull(Record[11]))),
+ getDITypeRefOrNull(Record[11]), Annotations)),
NextMetadataNo);
NextMetadataNo++;
break;
}
case bitc::METADATA_COMPOSITE_TYPE: {
- if (Record.size() < 16 || Record.size() > 21)
+ if (Record.size() < 16 || Record.size() > 22)
return error("Invalid record");
// If we have a UUID and this is not a forward declaration, lookup the
@@ -1489,6 +1492,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Metadata *Associated = nullptr;
Metadata *Allocated = nullptr;
Metadata *Rank = nullptr;
+ Metadata *Annotations = nullptr;
auto *Identifier = getMDString(Record[15]);
// If this module is being parsed so that it can be ThinLTO imported
// into another module, composite types only need to be imported
@@ -1520,6 +1524,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Record.size() > 20) {
Rank = getMDOrNull(Record[20]);
}
+ if (Record.size() > 21) {
+ Annotations = getMDOrNull(Record[21]);
+ }
}
DICompositeType *CT = nullptr;
if (Identifier)
@@ -1527,7 +1534,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
Context, *Identifier, Tag, Name, File, Line, Scope, BaseType,
SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
VTableHolder, TemplateParams, Discriminator, DataLocation, Associated,
- Allocated, Rank);
+ Allocated, Rank, Annotations);
// Create a node if we didn't get a lazy ODR type.
if (!CT)
@@ -1536,7 +1543,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
SizeInBits, AlignInBits, OffsetInBits, Flags,
Elements, RuntimeLang, VTableHolder, TemplateParams,
Identifier, Discriminator, DataLocation, Associated,
- Allocated, Rank));
+ Allocated, Rank, Annotations));
if (!IsNotUsedInTypeRef && Identifier)
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
@@ -1665,9 +1672,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
SPFlags |= DISubprogram::SPFlagMainSubprogram;
else if (!HasSPFlags)
SPFlags = DISubprogram::toSPFlags(
- /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
- /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11],
- /*DIFlagMainSubprogram*/HasOldMainSubprogramFlag);
+ /*IsLocalToUnit=*/Record[7], /*IsDefinition=*/Record[8],
+ /*IsOptimized=*/Record[14], /*Virtuality=*/Record[11],
+ /*DIFlagMainSubprogram=*/HasOldMainSubprogramFlag);
// All definitions should be distinct.
IsDistinct = (Record[0] & 1) || (SPFlags & DISubprogram::SPFlagDefinition);
@@ -1685,6 +1692,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
bool HasFn = false;
bool HasThisAdj = true;
bool HasThrownTypes = true;
+ bool HasAnnotations = false;
unsigned OffsetA = 0;
unsigned OffsetB = 0;
if (!HasSPFlags) {
@@ -1696,29 +1704,33 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
}
HasThisAdj = Record.size() >= 20;
HasThrownTypes = Record.size() >= 21;
+ } else {
+ HasAnnotations = Record.size() >= 19;
}
Metadata *CUorFn = getMDOrNull(Record[12 + OffsetB]);
DISubprogram *SP = GET_OR_DISTINCT(
DISubprogram,
(Context,
- getDITypeRefOrNull(Record[1]), // scope
- getMDString(Record[2]), // name
- getMDString(Record[3]), // linkageName
- getMDOrNull(Record[4]), // file
- Record[5], // line
- getMDOrNull(Record[6]), // type
- Record[7 + OffsetA], // scopeLine
- getDITypeRefOrNull(Record[8 + OffsetA]), // containingType
- Record[10 + OffsetA], // virtualIndex
- HasThisAdj ? Record[16 + OffsetB] : 0, // thisAdjustment
- Flags, // flags
- SPFlags, // SPFlags
- HasUnit ? CUorFn : nullptr, // unit
- getMDOrNull(Record[13 + OffsetB]), // templateParams
- getMDOrNull(Record[14 + OffsetB]), // declaration
- getMDOrNull(Record[15 + OffsetB]), // retainedNodes
+ getDITypeRefOrNull(Record[1]), // scope
+ getMDString(Record[2]), // name
+ getMDString(Record[3]), // linkageName
+ getMDOrNull(Record[4]), // file
+ Record[5], // line
+ getMDOrNull(Record[6]), // type
+ Record[7 + OffsetA], // scopeLine
+ getDITypeRefOrNull(Record[8 + OffsetA]), // containingType
+ Record[10 + OffsetA], // virtualIndex
+ HasThisAdj ? Record[16 + OffsetB] : 0, // thisAdjustment
+ Flags, // flags
+ SPFlags, // SPFlags
+ HasUnit ? CUorFn : nullptr, // unit
+ getMDOrNull(Record[13 + OffsetB]), // templateParams
+ getMDOrNull(Record[14 + OffsetB]), // declaration
+ getMDOrNull(Record[15 + OffsetB]), // retainedNodes
HasThrownTypes ? getMDOrNull(Record[17 + OffsetB])
- : nullptr // thrownTypes
+ : nullptr, // thrownTypes
+ HasAnnotations ? getMDOrNull(Record[18 + OffsetB])
+ : nullptr // annotations
));
MetadataList.assignValue(SP, NextMetadataNo);
NextMetadataNo++;
@@ -1860,13 +1872,18 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
unsigned Version = Record[0] >> 1;
if (Version == 2) {
+ Metadata *Annotations = nullptr;
+ if (Record.size() > 12)
+ Annotations = getMDOrNull(Record[12]);
+
MetadataList.assignValue(
- GET_OR_DISTINCT(
- DIGlobalVariable,
- (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
- getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
- getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[9]), getMDOrNull(Record[10]), Record[11])),
+ GET_OR_DISTINCT(DIGlobalVariable,
+ (Context, getMDOrNull(Record[1]),
+ getMDString(Record[2]), getMDString(Record[3]),
+ getMDOrNull(Record[4]), Record[5],
+ getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+ getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+ Record[11], Annotations)),
NextMetadataNo);
NextMetadataNo++;
@@ -1874,12 +1891,12 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
// No upgrade necessary. A null field will be introduced to indicate
// that no parameter information is available.
MetadataList.assignValue(
- GET_OR_DISTINCT(DIGlobalVariable,
- (Context, getMDOrNull(Record[1]),
- getMDString(Record[2]), getMDString(Record[3]),
- getMDOrNull(Record[4]), Record[5],
- getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[10]), nullptr, Record[11])),
+ GET_OR_DISTINCT(
+ DIGlobalVariable,
+ (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+ getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+ getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+ getMDOrNull(Record[10]), nullptr, Record[11], nullptr)),
NextMetadataNo);
NextMetadataNo++;
@@ -1912,7 +1929,7 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
(Context, getMDOrNull(Record[1]), getMDString(Record[2]),
getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
- getMDOrNull(Record[10]), nullptr, AlignInBits));
+ getMDOrNull(Record[10]), nullptr, AlignInBits, nullptr));
DIGlobalVariableExpression *DGVE = nullptr;
if (Attach || Expr)
@@ -1942,18 +1959,22 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
bool HasTag = !HasAlignment && Record.size() > 8;
DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[7 + HasTag]);
uint32_t AlignInBits = 0;
+ Metadata *Annotations = nullptr;
if (HasAlignment) {
- if (Record[8 + HasTag] > (uint64_t)std::numeric_limits<uint32_t>::max())
+ if (Record[8] > (uint64_t)std::numeric_limits<uint32_t>::max())
return error("Alignment value is too large");
- AlignInBits = Record[8 + HasTag];
+ AlignInBits = Record[8];
+ if (Record.size() > 9)
+ Annotations = getMDOrNull(Record[9]);
}
+
MetadataList.assignValue(
GET_OR_DISTINCT(DILocalVariable,
(Context, getMDOrNull(Record[1 + HasTag]),
getMDString(Record[2 + HasTag]),
getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
getDITypeRefOrNull(Record[5 + HasTag]),
- Record[6 + HasTag], Flags, AlignInBits)),
+ Record[6 + HasTag], Flags, AlignInBits, Annotations)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1964,10 +1985,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
IsDistinct = Record[0] & 1;
MetadataList.assignValue(
- GET_OR_DISTINCT(DILabel,
- (Context, getMDOrNull(Record[1]),
- getMDString(Record[2]),
- getMDOrNull(Record[3]), Record[4])),
+ GET_OR_DISTINCT(DILabel, (Context, getMDOrNull(Record[1]),
+ getMDString(Record[2]),
+ getMDOrNull(Record[3]), Record[4])),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -1984,8 +2004,8 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
if (Error Err = upgradeDIExpression(Version, Elts, Buffer))
return Err;
- MetadataList.assignValue(
- GET_OR_DISTINCT(DIExpression, (Context, Elts)), NextMetadataNo);
+ MetadataList.assignValue(GET_OR_DISTINCT(DIExpression, (Context, Elts)),
+ NextMetadataNo);
NextMetadataNo++;
break;
}
@@ -2020,17 +2040,19 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
break;
}
case bitc::METADATA_IMPORTED_ENTITY: {
- if (Record.size() != 6 && Record.size() != 7)
+ if (Record.size() < 6 && Record.size() > 8)
return error("Invalid record");
IsDistinct = Record[0];
- bool HasFile = (Record.size() == 7);
+ bool HasFile = (Record.size() >= 7);
+ bool HasElements = (Record.size() >= 8);
MetadataList.assignValue(
GET_OR_DISTINCT(DIImportedEntity,
(Context, Record[1], getMDOrNull(Record[2]),
getDITypeRefOrNull(Record[3]),
HasFile ? getMDOrNull(Record[6]) : nullptr,
- HasFile ? Record[4] : 0, getMDString(Record[5]))),
+ HasFile ? Record[4] : 0, getMDString(Record[5]),
+ HasElements ? getMDOrNull(Record[7]) : nullptr)),
NextMetadataNo);
NextMetadataNo++;
break;
@@ -2121,10 +2143,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
if (R.AtEndOfStream())
return error("Invalid record: metadata strings bad length");
- Expected<uint32_t> MaybeSize = R.ReadVBR(6);
- if (!MaybeSize)
- return MaybeSize.takeError();
- uint32_t Size = MaybeSize.get();
+ uint32_t Size;
+ if (Error E = R.ReadVBR(6).moveInto(Size))
+ return E;
if (Strings.size() < Size)
return error("Invalid record: metadata strings truncated chars");
@@ -2161,10 +2182,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
PlaceholderQueue Placeholders;
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
@@ -2265,10 +2285,9 @@ Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
// Read all the records.
while (true) {
- Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
- if (!MaybeEntry)
- return MaybeEntry.takeError();
- BitstreamEntry Entry = MaybeEntry.get();
+ BitstreamEntry Entry;
+ if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
+ return E;
switch (Entry.Kind) {
case BitstreamEntry::SubBlock: // Handled for us already.
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 0a202c376981..1e9a9197aed7 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -58,6 +58,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRSymtab.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -67,7 +68,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SHA1.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -142,7 +142,6 @@ public:
: Stream(Stream), StrtabBuilder(StrtabBuilder) {}
protected:
- void writeBitcodeHeader();
void writeModuleVersion();
};
@@ -374,7 +373,6 @@ private:
void writeModuleMetadata();
void writeFunctionMetadata(const Function &F);
void writeFunctionMetadataAttachment(const Function &F);
- void writeGlobalVariableMetadataAttachment(const GlobalVariable &GV);
void pushGlobalMetadataAttachment(SmallVectorImpl<uint64_t> &Record,
const GlobalObject &GO);
void writeModuleMetadataKinds();
@@ -628,6 +626,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_IN_ALLOCA;
case Attribute::Cold:
return bitc::ATTR_KIND_COLD;
+ case Attribute::DisableSanitizerInstrumentation:
+ return bitc::ATTR_KIND_DISABLE_SANITIZER_INSTRUMENTATION;
case Attribute::Hot:
return bitc::ATTR_KIND_HOT;
case Attribute::ElementType:
@@ -835,7 +835,7 @@ void ModuleBitcodeWriter::writeAttributeTable() {
SmallVector<uint64_t, 64> Record;
for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
AttributeList AL = Attrs[i];
- for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) {
+ for (unsigned i : AL.indexes()) {
AttributeSet AS = AL.getAttributes(i);
if (AS.hasAttributes())
Record.push_back(VE.getAttributeGroupID({i, AS}));
@@ -973,9 +973,8 @@ void ModuleBitcodeWriter::writeTypeTable() {
// STRUCT: [ispacked, eltty x N]
TypeVals.push_back(ST->isPacked());
// Output all of the element types.
- for (StructType::element_iterator I = ST->element_begin(),
- E = ST->element_end(); I != E; ++I)
- TypeVals.push_back(VE.getTypeID(*I));
+ for (Type *ET : ST->elements())
+ TypeVals.push_back(VE.getTypeID(ET));
if (ST->isLiteral()) {
Code = bitc::TYPE_CODE_STRUCT_ANON;
@@ -1066,6 +1065,9 @@ static uint64_t getEncodedFFlags(FunctionSummary::FFlags Flags) {
RawFlags |= (Flags.ReturnDoesNotAlias << 3);
RawFlags |= (Flags.NoInline << 4);
RawFlags |= (Flags.AlwaysInline << 5);
+ RawFlags |= (Flags.NoUnwind << 6);
+ RawFlags |= (Flags.MayThrow << 7);
+ RawFlags |= (Flags.HasUnknownCall << 8);
return RawFlags;
}
@@ -1687,6 +1689,8 @@ void ModuleBitcodeWriter::writeDIDerivedType(const DIDerivedType *N,
else
Record.push_back(0);
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
+
Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev);
Record.clear();
}
@@ -1716,6 +1720,7 @@ void ModuleBitcodeWriter::writeDICompositeType(
Record.push_back(VE.getMetadataOrNullID(N->getRawAssociated()));
Record.push_back(VE.getMetadataOrNullID(N->getRawAllocated()));
Record.push_back(VE.getMetadataOrNullID(N->getRawRank()));
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev);
Record.clear();
@@ -1811,6 +1816,7 @@ void ModuleBitcodeWriter::writeDISubprogram(const DISubprogram *N,
Record.push_back(VE.getMetadataOrNullID(N->getRetainedNodes().get()));
Record.push_back(N->getThisAdjustment());
Record.push_back(VE.getMetadataOrNullID(N->getThrownTypes().get()));
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev);
Record.clear();
@@ -1958,6 +1964,7 @@ void ModuleBitcodeWriter::writeDIGlobalVariable(
Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams()));
Record.push_back(N->getAlignInBits());
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
Record.clear();
@@ -1989,6 +1996,7 @@ void ModuleBitcodeWriter::writeDILocalVariable(
Record.push_back(N->getArg());
Record.push_back(N->getFlags());
Record.push_back(N->getAlignInBits());
+ Record.push_back(VE.getMetadataOrNullID(N->getAnnotations().get()));
Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
Record.clear();
@@ -2056,6 +2064,7 @@ void ModuleBitcodeWriter::writeDIImportedEntity(
Record.push_back(N->getLine());
Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
Record.push_back(VE.getMetadataOrNullID(N->getRawFile()));
+ Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev);
Record.clear();
@@ -2907,8 +2916,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- for (unsigned i = FTy->getNumParams(), e = II->getNumArgOperands();
- i != e; ++i)
+ for (unsigned i = FTy->getNumParams(), e = II->arg_size(); i != e; ++i)
pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
}
break;
@@ -2989,8 +2997,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- for (unsigned i = FTy->getNumParams(), e = CBI->getNumArgOperands();
- i != e; ++i)
+ for (unsigned i = FTy->getNumParams(), e = CBI->arg_size(); i != e; ++i)
pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
}
break;
@@ -3047,7 +3054,11 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
using APV = AllocaPackedValues;
unsigned Record = 0;
- Bitfield::set<APV::Align>(Record, getEncodedAlign(AI.getAlign()));
+ unsigned EncodedAlign = getEncodedAlign(AI.getAlign());
+ Bitfield::set<APV::AlignLower>(
+ Record, EncodedAlign & ((1 << APV::AlignLower::Bits) - 1));
+ Bitfield::set<APV::AlignUpper>(Record,
+ EncodedAlign >> APV::AlignLower::Bits);
Bitfield::set<APV::UsedWithInAlloca>(Record, AI.isUsedWithInAlloca());
Bitfield::set<APV::ExplicitType>(Record, true);
Bitfield::set<APV::SwiftError>(Record, AI.isSwiftError());
@@ -3154,8 +3165,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
- i != e; ++i)
+ for (unsigned i = FTy->getNumParams(), e = CI.arg_size(); i != e; ++i)
pushValueAndType(CI.getArgOperand(i), InstID, Vals); // varargs
}
break;
@@ -4028,7 +4038,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
FSModVTableRefsAbbrev);
for (const GlobalAlias &A : M.aliases()) {
- auto *Aliasee = A.getBaseObject();
+ auto *Aliasee = A.getAliaseeObject();
if (!Aliasee->hasName())
// Nameless function don't have an entry in the summary, skip it.
continue;
@@ -4141,7 +4151,14 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
// For local linkage, we also emit the original name separately
// immediately after the record.
auto MaybeEmitOriginalName = [&](GlobalValueSummary &S) {
- if (!GlobalValue::isLocalLinkage(S.linkage()))
+ // We don't need to emit the original name if we are writing the index for
+ // distributed backends (in which case ModuleToSummariesForIndex is
+ // non-null). The original name is only needed during the thin link, since
+ // for SamplePGO the indirect call targets for local functions have
+ // have the original name annotated in profile.
+ // Continue to emit it when writing out the entire combined index, which is
+ // used in testing the thin link via llvm-lto.
+ if (ModuleToSummariesForIndex || !GlobalValue::isLocalLinkage(S.linkage()))
return;
NameVals.push_back(S.getOriginalName());
Stream.EmitRecord(bitc::FS_COMBINED_ORIGINAL_NAME, NameVals);
@@ -4194,33 +4211,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
auto GetValueId = [&](const ValueInfo &VI) -> Optional<unsigned> {
- GlobalValue::GUID GUID = VI.getGUID();
- Optional<unsigned> CallValueId = getValueId(GUID);
- if (CallValueId)
- return CallValueId;
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- GUID = Index.getGUIDFromOriginalID(GUID);
- if (!GUID)
- return None;
- CallValueId = getValueId(GUID);
- if (!CallValueId)
- return None;
- // The mapping from OriginalId to GUID may return a GUID
- // that corresponds to a static variable. Filter it out here.
- // This can happen when
- // 1) There is a call to a library function which does not have
- // a CallValidId;
- // 2) There is a static variable with the OriginalGUID identical
- // to the GUID of the library function in 1);
- // When this happens, the logic for SamplePGO kicks in and
- // the static variable in 2) will be found, which needs to be
- // filtered out.
- auto *GVSum = Index.getGlobalValueSummary(GUID, false);
- if (GVSum && GVSum->getSummaryKind() == GlobalValueSummary::GlobalVarKind)
- return None;
- return CallValueId;
+ return getValueId(VI.getGUID());
};
auto *FS = cast<FunctionSummary>(S);
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index d86db61ee1f4..9465a3b11c8f 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -229,8 +229,11 @@ static void predictValueUseListOrderImpl(const Value *V, const Function *F,
// have been read (despite having earlier IDs). Rather than awkwardly
// modeling this behaviour here, orderModule() has assigned IDs to
// initializers of GlobalValues before GlobalValues themselves.
- if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID))
+ if (OM.isGlobalValue(LID) && OM.isGlobalValue(RID)) {
+ if (LID == RID)
+ return LU->getOperandNo() > RU->getOperandNo();
return LID < RID;
+ }
// If ID is 4, then expect: 7 6 5 1 2 3.
if (LID < RID) {
@@ -1036,7 +1039,7 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) {
}
// Do lookups for all attribute groups.
- for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) {
+ for (unsigned i : PAL.indexes()) {
AttributeSet AS = PAL.getAttributes(i);
if (!AS.hasAttributes())
continue;
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index e5d576d879b5..7d8a73e12d3a 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -221,9 +221,6 @@ ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
}
}
-/// getICmpCondCode - Return the ISD condition code corresponding to
-/// the given LLVM IR integer condition code.
-///
ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
switch (Pred) {
case ICmpInst::ICMP_EQ: return ISD::SETEQ;
@@ -241,6 +238,33 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
}
}
+ICmpInst::Predicate llvm::getICmpCondCode(ISD::CondCode Pred) {
+ switch (Pred) {
+ case ISD::SETEQ:
+ return ICmpInst::ICMP_EQ;
+ case ISD::SETNE:
+ return ICmpInst::ICMP_NE;
+ case ISD::SETLE:
+ return ICmpInst::ICMP_SLE;
+ case ISD::SETULE:
+ return ICmpInst::ICMP_ULE;
+ case ISD::SETGE:
+ return ICmpInst::ICMP_SGE;
+ case ISD::SETUGE:
+ return ICmpInst::ICMP_UGE;
+ case ISD::SETLT:
+ return ICmpInst::ICMP_SLT;
+ case ISD::SETULT:
+ return ICmpInst::ICMP_ULT;
+ case ISD::SETGT:
+ return ICmpInst::ICMP_SGT;
+ case ISD::SETUGT:
+ return ICmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("Invalid ISD integer condition code!");
+ }
+}
+
static bool isNoopBitcast(Type *T1, Type *T2,
const TargetLoweringBase& TLI) {
return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) ||
@@ -524,10 +548,8 @@ bool llvm::isInTailCallPosition(const CallBase &Call, const TargetMachine &TM) {
if (&*BBI == &Call)
break;
// Debug info intrinsics do not get in the way of tail call optimization.
- if (isa<DbgInfoIntrinsic>(BBI))
- continue;
// Pseudo probe intrinsics do not block tail call optimization either.
- if (isa<PseudoProbeInst>(BBI))
+ if (BBI->isDebugOrPseudoInst())
continue;
// A lifetime end, assume or noalias.decl intrinsic should not stop tail
// call optimization.
diff --git a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index db4215e92d44..223840c21d8b 100644
--- a/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -75,7 +75,6 @@ void ARMException::endFunction(const MachineFunction *MF) {
// Emit references to personality.
if (Per) {
MCSymbol *PerSym = Asm->getSymbol(Per);
- Asm->OutStreamer->emitSymbolAttribute(PerSym, MCSA_Global);
ATS.emitPersonality(PerSym);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e528d33b5f8c..cc848d28a9a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -71,7 +71,6 @@
#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -102,6 +101,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/Remark.h"
#include "llvm/Remarks/RemarkFormat.h"
@@ -115,7 +115,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -275,7 +274,7 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
.getModuleMetadata(M);
- OutStreamer->InitSections(false);
+ OutStreamer->initSections(false, *TM.getMCSubtargetInfo());
if (DisableDebugInfoPrinting)
MMI->setDebugInfoAvailability(false);
@@ -326,16 +325,10 @@ bool AsmPrinter::doInitialization(Module &M) {
// Emit module-level inline asm if it exists.
if (!M.getModuleInlineAsm().empty()) {
- // We're at the module level. Construct MCSubtarget from the default CPU
- // and target triple.
- std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
- TM.getTargetTriple().str(), TM.getTargetCPU(),
- TM.getTargetFeatureString()));
- assert(STI && "Unable to create subtarget info");
OutStreamer->AddComment("Start of file scope inline assembly");
OutStreamer->AddBlankLine();
- emitInlineAsm(M.getModuleInlineAsm() + "\n",
- OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
+ emitInlineAsm(M.getModuleInlineAsm() + "\n", *TM.getMCSubtargetInfo(),
+ TM.Options.MCOptions);
OutStreamer->AddComment("End of file scope inline assembly");
OutStreamer->AddBlankLine();
}
@@ -1422,7 +1415,7 @@ void AsmPrinter::emitFunctionBody() {
});
R << "BasicBlock: " << ore::NV("BasicBlock", MBB.getName()) << "\n";
for (auto &KV : MnemonicVec) {
- auto Name = (Twine("INST_") + KV.first.trim()).str();
+ auto Name = (Twine("INST_") + getToken(KV.first.trim()).first).str();
R << KV.first << ": " << ore::NV(Name, KV.second) << "\n";
}
ORE->emit(R);
@@ -1610,14 +1603,13 @@ void AsmPrinter::emitGlobalGOTEquivs() {
emitGlobalVariable(GV);
}
-void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
- const GlobalIndirectSymbol& GIS) {
- MCSymbol *Name = getSymbol(&GIS);
- bool IsFunction = GIS.getValueType()->isFunctionTy();
+void AsmPrinter::emitGlobalAlias(Module &M, const GlobalAlias &GA) {
+ MCSymbol *Name = getSymbol(&GA);
+ bool IsFunction = GA.getValueType()->isFunctionTy();
// Treat bitcasts of functions as functions also. This is important at least
// on WebAssembly where object and function addresses can't alias each other.
if (!IsFunction)
- if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol()))
+ if (auto *CE = dyn_cast<ConstantExpr>(GA.getAliasee()))
if (CE->getOpcode() == Instruction::BitCast)
IsFunction =
CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
@@ -1627,61 +1619,80 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
// point, all the extra label is emitted, we just have to emit linkage for
// those labels.
if (TM.getTargetTriple().isOSBinFormatXCOFF()) {
- assert(!isa<GlobalIFunc>(GIS) && "IFunc is not supported on AIX.");
assert(MAI->hasVisibilityOnlyWithLinkage() &&
"Visibility should be handled with emitLinkage() on AIX.");
- emitLinkage(&GIS, Name);
+ emitLinkage(&GA, Name);
// If it's a function, also emit linkage for aliases of function entry
// point.
if (IsFunction)
- emitLinkage(&GIS,
- getObjFileLowering().getFunctionEntryPointSymbol(&GIS, TM));
+ emitLinkage(&GA,
+ getObjFileLowering().getFunctionEntryPointSymbol(&GA, TM));
return;
}
- if (GIS.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ if (GA.hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
- else if (GIS.hasWeakLinkage() || GIS.hasLinkOnceLinkage())
+ else if (GA.hasWeakLinkage() || GA.hasLinkOnceLinkage())
OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
else
- assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+ assert(GA.hasLocalLinkage() && "Invalid alias linkage");
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
if (IsFunction)
- OutStreamer->emitSymbolAttribute(Name, isa<GlobalIFunc>(GIS)
- ? MCSA_ELF_TypeIndFunction
- : MCSA_ELF_TypeFunction);
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
- emitVisibility(Name, GIS.getVisibility());
+ emitVisibility(Name, GA.getVisibility());
- const MCExpr *Expr = lowerConstant(GIS.getIndirectSymbol());
+ const MCExpr *Expr = lowerConstant(GA.getAliasee());
- if (isa<GlobalAlias>(&GIS) && MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
+ if (MAI->hasAltEntry() && isa<MCBinaryExpr>(Expr))
OutStreamer->emitSymbolAttribute(Name, MCSA_AltEntry);
// Emit the directives as assignments aka .set:
OutStreamer->emitAssignment(Name, Expr);
- MCSymbol *LocalAlias = getSymbolPreferLocal(GIS);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GA);
if (LocalAlias != Name)
OutStreamer->emitAssignment(LocalAlias, Expr);
- if (auto *GA = dyn_cast<GlobalAlias>(&GIS)) {
- // If the aliasee does not correspond to a symbol in the output, i.e. the
- // alias is not of an object or the aliased object is private, then set the
- // size of the alias symbol from the type of the alias. We don't do this in
- // other situations as the alias and aliasee having differing types but same
- // size may be intentional.
- const GlobalObject *BaseObject = GA->getBaseObject();
- if (MAI->hasDotTypeDotSizeDirective() && GA->getValueType()->isSized() &&
- (!BaseObject || BaseObject->hasPrivateLinkage())) {
- const DataLayout &DL = M.getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
- OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
- }
+ // If the aliasee does not correspond to a symbol in the output, i.e. the
+ // alias is not of an object or the aliased object is private, then set the
+ // size of the alias symbol from the type of the alias. We don't do this in
+ // other situations as the alias and aliasee having differing types but same
+ // size may be intentional.
+ const GlobalObject *BaseObject = GA.getAliaseeObject();
+ if (MAI->hasDotTypeDotSizeDirective() && GA.getValueType()->isSized() &&
+ (!BaseObject || BaseObject->hasPrivateLinkage())) {
+ const DataLayout &DL = M.getDataLayout();
+ uint64_t Size = DL.getTypeAllocSize(GA.getValueType());
+ OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
}
}
+void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) {
+ assert(!TM.getTargetTriple().isOSBinFormatXCOFF() &&
+ "IFunc is not supported on AIX.");
+
+ MCSymbol *Name = getSymbol(&GI);
+
+ if (GI.hasExternalLinkage() || !MAI->getWeakRefDirective())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_Global);
+ else if (GI.hasWeakLinkage() || GI.hasLinkOnceLinkage())
+ OutStreamer->emitSymbolAttribute(Name, MCSA_WeakReference);
+ else
+ assert(GI.hasLocalLinkage() && "Invalid ifunc linkage");
+
+ OutStreamer->emitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
+ emitVisibility(Name, GI.getVisibility());
+
+ // Emit the directives as assignments aka .set:
+ const MCExpr *Expr = lowerConstant(GI.getResolver());
+ OutStreamer->emitAssignment(Name, Expr);
+ MCSymbol *LocalAlias = getSymbolPreferLocal(GI);
+ if (LocalAlias != Name)
+ OutStreamer->emitAssignment(LocalAlias, Expr);
+}
+
void AsmPrinter::emitRemarksSection(remarks::RemarkStreamer &RS) {
if (!RS.needsSection())
return;
@@ -1815,6 +1826,11 @@ bool AsmPrinter::doFinalization(Module &M) {
}
}
+ // This needs to happen before emitting debug information since that can end
+ // arbitrary sections.
+ if (auto *TS = OutStreamer->getTargetStreamer())
+ TS->emitConstantPools();
+
// Finalize debug and EH information.
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
@@ -1857,11 +1873,11 @@ bool AsmPrinter::doFinalization(Module &M) {
AliasStack.push_back(Cur);
}
for (const GlobalAlias *AncestorAlias : llvm::reverse(AliasStack))
- emitGlobalIndirectSymbol(M, *AncestorAlias);
+ emitGlobalAlias(M, *AncestorAlias);
AliasStack.clear();
}
for (const auto &IFunc : M.ifuncs())
- emitGlobalIndirectSymbol(M, IFunc);
+ emitGlobalIFunc(M, IFunc);
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
@@ -2455,9 +2471,14 @@ void AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV) const {
if (Alignment == Align(1))
return; // 1-byte aligned: no need to emit alignment.
- if (getCurrentSection()->getKind().isText())
- OutStreamer->emitCodeAlignment(Alignment.value());
- else
+ if (getCurrentSection()->getKind().isText()) {
+ const MCSubtargetInfo *STI = nullptr;
+ if (this->MF)
+ STI = &getSubtargetInfo();
+ else
+ STI = TM.getMCSubtargetInfo();
+ OutStreamer->emitCodeAlignment(Alignment.value(), STI);
+ } else
OutStreamer->emitValueToAlignment(Alignment.value());
}
@@ -2513,7 +2534,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
+ report_fatal_error(Twine(OS.str()));
}
case Instruction::GetElementPtr: {
// Generate a symbolic expression for the byte address
@@ -3265,21 +3286,21 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
// the references were generated.
+ const BasicBlock *BB = MBB.getBasicBlock();
if (MBB.hasAddressTaken()) {
- const BasicBlock *BB = MBB.getBasicBlock();
if (isVerbose())
OutStreamer->AddComment("Block address taken");
// MBBs can have their address taken as part of CodeGen without having
// their corresponding BB's address taken in IR
- if (BB->hasAddressTaken())
+ if (BB && BB->hasAddressTaken())
for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
OutStreamer->emitLabel(Sym);
}
// Print some verbose block comments.
if (isVerbose()) {
- if (const BasicBlock *BB = MBB.getBasicBlock()) {
+ if (BB) {
if (BB->hasName()) {
BB->printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, BB->getModule());
@@ -3538,7 +3559,7 @@ void AsmPrinter::emitXRayTable() {
// pointers. This should work for both 32-bit and 64-bit platforms.
if (FnSledIndex) {
OutStreamer->SwitchSection(FnSledIndex);
- OutStreamer->emitCodeAlignment(2 * WordSizeBytes);
+ OutStreamer->emitCodeAlignment(2 * WordSizeBytes, &getSubtargetInfo());
OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
OutStreamer->SwitchSection(PrevSection);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 4a93181f5439..ef1abc47701a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -30,10 +30,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -129,13 +129,16 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, AsmPrinter *AP,
- uint64_t LocCookie, raw_ostream &OS) {
+ MachineModuleInfo *MMI, const MCAsmInfo *MAI,
+ AsmPrinter *AP, uint64_t LocCookie,
+ raw_ostream &OS) {
// Switch to the inline assembly variant.
OS << "\t.intel_syntax\n\t";
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
+ int AsmPrinterVariant = 1; // X86MCAsmInfo.cpp's AsmWriterFlavorTy::Intel.
while (*LastEmitted) {
switch (*LastEmitted) {
@@ -145,8 +148,8 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
*LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
++LiteralEnd;
-
- OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ OS.write(LastEmitted, LiteralEnd - LastEmitted);
LastEmitted = LiteralEnd;
break;
}
@@ -164,6 +167,27 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
case '$':
++LastEmitted; // Consume second '$' character.
break;
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
+ if (CurVariant != -1)
+ report_fatal_error("Nested variants found in inline asm string: '" +
+ Twine(AsmStr) + "'");
+ CurVariant = 0; // We're in the first variant now.
+ break;
+ case '|':
+ ++LastEmitted; // Consume '|' character.
+ if (CurVariant == -1)
+ OS << '|'; // This is gcc's behavior for | outside a variant.
+ else
+ ++CurVariant; // We're in the next variant.
+ break;
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // Consume ')' character.
+ if (CurVariant == -1)
+ OS << '}'; // This is gcc's behavior for } outside a variant.
+ else
+ CurVariant = -1;
+ break;
}
if (Done) break;
@@ -176,16 +200,15 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// If we have ${:foo}, then this is not a real operand reference, it is a
// "magic" string reference, just like in .td files. Arrange to call
// PrintSpecial.
- if (HasCurlyBraces && LastEmitted[0] == ':') {
+ if (HasCurlyBraces && *LastEmitted == ':') {
++LastEmitted;
const char *StrStart = LastEmitted;
const char *StrEnd = strchr(StrStart, '}');
if (!StrEnd)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
- std::string Val(StrStart, StrEnd);
- AP->PrintSpecial(MI, OS, Val.c_str());
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
LastEmitted = StrEnd+1;
break;
}
@@ -201,7 +224,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
- if (Val >= NumOperands-1)
+ if (Val >= NumOperands - 1)
report_fatal_error("Invalid $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
@@ -228,40 +251,50 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// Okay, we finally have a value number. Ask the target to print this
// operand!
- unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
- bool Error = false;
+ bool Error = false;
- // Scan to find the machine operand number for the operand.
- for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands()) break;
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
- }
+ // Scan to find the machine operand number for the operand.
+ for (; Val; --Val) {
+ if (OpNo >= MI->getNumOperands())
+ break;
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+ }
- // We may have a location metadata attached to the end of the
- // instruction, and at no point should see metadata at any
- // other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
- Error = true;
- } else {
- unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
-
- if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(
- MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
+ // We may have a location metadata attached to the end of the
+ // instruction, and at no point should see metadata at any
+ // other point while processing. It's an error if so.
+ if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
+ Error = true;
} else {
- Error = AP->PrintAsmOperand(MI, OpNo,
- Modifier[0] ? Modifier : nullptr, OS);
+ unsigned OpFlags = MI->getOperand(OpNo).getImm();
+ ++OpNo; // Skip over the ID number.
+
+ // FIXME: Shouldn't arch-independent output template handling go into
+ // PrintAsmOperand?
+ // Labels are target independent.
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
+ } else if (InlineAsm::isMemKind(OpFlags)) {
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
+ } else {
+ Error = AP->PrintAsmOperand(MI, OpNo,
+ Modifier[0] ? Modifier : nullptr, OS);
+ }
+ }
+ if (Error) {
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+ MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
}
- }
- if (Error) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "invalid operand in inline asm: '" << AsmStr << "'";
- MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
}
break;
}
@@ -274,10 +307,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
MachineModuleInfo *MMI, const MCAsmInfo *MAI,
AsmPrinter *AP, uint64_t LocCookie,
raw_ostream &OS) {
- int CurVariant = -1; // The number of the {.|.|.} region we are in.
+ int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
- int AsmPrinterVariant = MAI->getAssemblerDialect();
+ int AsmPrinterVariant = MMI->getTarget().unqualifiedInlineAsmVariant();
if (MAI->getEmitGNUAsmStartIndentationMarker())
OS << '\t';
@@ -291,7 +324,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
*LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
++LiteralEnd;
if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
- OS.write(LastEmitted, LiteralEnd-LastEmitted);
+ OS.write(LastEmitted, LiteralEnd - LastEmitted);
LastEmitted = LiteralEnd;
break;
}
@@ -311,24 +344,24 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
OS << '$';
++LastEmitted; // Consume second '$' character.
break;
- case '(': // $( -> same as GCC's { character.
- ++LastEmitted; // Consume '(' character.
+ case '(': // $( -> same as GCC's { character.
+ ++LastEmitted; // Consume '(' character.
if (CurVariant != -1)
report_fatal_error("Nested variants found in inline asm string: '" +
Twine(AsmStr) + "'");
- CurVariant = 0; // We're in the first variant now.
+ CurVariant = 0; // We're in the first variant now.
break;
case '|':
- ++LastEmitted; // consume '|' character.
+ ++LastEmitted; // Consume '|' character.
if (CurVariant == -1)
- OS << '|'; // this is gcc's behavior for | outside a variant
+ OS << '|'; // This is gcc's behavior for | outside a variant.
else
- ++CurVariant; // We're in the next variant.
+ ++CurVariant; // We're in the next variant.
break;
- case ')': // $) -> same as GCC's } char.
- ++LastEmitted; // consume ')' character.
+ case ')': // $) -> same as GCC's } char.
+ ++LastEmitted; // Consume ')' character.
if (CurVariant == -1)
- OS << '}'; // this is gcc's behavior for } outside a variant
+ OS << '}'; // This is gcc's behavior for } outside a variant.
else
CurVariant = -1;
break;
@@ -351,9 +384,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
if (!StrEnd)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
- std::string Val(StrStart, StrEnd);
- AP->PrintSpecial(MI, OS, Val.c_str());
+ if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+ AP->PrintSpecial(MI, OS, StringRef(StrStart, StrEnd - StrStart));
LastEmitted = StrEnd+1;
break;
}
@@ -369,6 +401,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
+ if (Val >= NumOperands - 1)
+ report_fatal_error("Invalid $ operand number in inline asm string: '" +
+ Twine(AsmStr) + "'");
+
char Modifier[2] = { 0, 0 };
if (HasCurlyBraces) {
@@ -390,10 +426,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++LastEmitted; // Consume '}' character.
}
- if (Val >= NumOperands-1)
- report_fatal_error("Invalid $ operand number in inline asm string: '" +
- Twine(AsmStr) + "'");
-
// Okay, we finally have a value number. Ask the target to print this
// operand!
if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
@@ -403,7 +435,8 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// Scan to find the machine operand number for the operand.
for (; Val; --Val) {
- if (OpNo >= MI->getNumOperands()) break;
+ if (OpNo >= MI->getNumOperands())
+ break;
unsigned OpFlags = MI->getOperand(OpNo).getImm();
OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
}
@@ -411,12 +444,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
// We may have a location metadata attached to the end of the
// instruction, and at no point should see metadata at any
// other point while processing. It's an error if so.
- if (OpNo >= MI->getNumOperands() ||
- MI->getOperand(OpNo).isMetadata()) {
+ if (OpNo >= MI->getNumOperands() || MI->getOperand(OpNo).isMetadata()) {
Error = true;
} else {
unsigned OpFlags = MI->getOperand(OpNo).getImm();
- ++OpNo; // Skip over the ID number.
+ ++OpNo; // Skip over the ID number.
// FIXME: Shouldn't arch-independent output template handling go into
// PrintAsmOperand?
@@ -429,8 +461,6 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);
- } else if (Modifier[0] == 'l') {
- Error = true;
} else if (InlineAsm::isMemKind(OpFlags)) {
Error = AP->PrintAsmMemoryOperand(
MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
@@ -506,7 +536,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
EmitGCCInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, MAI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might lead to undefined behaviour.
@@ -540,7 +570,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
"preserved across the asm statement, and clobbering them may "
"lead to undefined behaviour.";
MMI->getModule()->getContext().diagnose(DiagnosticInfoInlineAsm(
- LocCookie, Msg.c_str(), DiagnosticSeverity::DS_Warning));
+ LocCookie, Msg, DiagnosticSeverity::DS_Warning));
MMI->getModule()->getContext().diagnose(
DiagnosticInfoInlineAsm(LocCookie, Note, DiagnosticSeverity::DS_Note));
}
@@ -560,13 +590,13 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const {
/// syntax used is ${:comment}. Targets can override this to add support
/// for their own strange codes.
void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
- const char *Code) const {
- if (!strcmp(Code, "private")) {
+ StringRef Code) const {
+ if (Code == "private") {
const DataLayout &DL = MF->getDataLayout();
OS << DL.getPrivateGlobalPrefix();
- } else if (!strcmp(Code, "comment")) {
+ } else if (Code == "comment") {
OS << MAI->getCommentString();
- } else if (!strcmp(Code, "uid")) {
+ } else if (Code == "uid") {
// Comparing the address of MI isn't sufficient, because machineinstrs may
// be allocated to the same address across functions.
@@ -582,7 +612,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
raw_string_ostream Msg(msg);
Msg << "Unknown special formatter '" << Code
<< "' for machine instr: " << *MI;
- report_fatal_error(Msg.str());
+ report_fatal_error(Twine(Msg.str()));
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index bbb0504550c3..85ff84484ced 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -341,7 +341,16 @@ std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) {
TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
// No scope means global scope and that uses the zero index.
- if (!Scope || isa<DIFile>(Scope))
+ //
+ // We also use zero index when the scope is a DISubprogram
+ // to suppress the emission of LF_STRING_ID for the function,
+ // which can trigger a link-time error with the linker in
+ // VS2019 version 16.11.2 or newer.
+ // Note, however, skipping the debug info emission for the DISubprogram
+ // is a temporary fix. The root issue here is that we need to figure out
+ // the proper way to encode a function nested in another function
+ // (as introduced by the Fortran 'contains' keyword) in CodeView.
+ if (!Scope || isa<DIFile>(Scope) || isa<DISubprogram>(Scope))
return TypeIndex();
assert(!isa<DIType>(Scope) && "shouldn't make a namespace scope for a type");
@@ -561,6 +570,44 @@ void CodeViewDebug::emitCodeViewMagicVersion() {
OS.emitInt32(COFF::DEBUG_SECTION_MAGIC);
}
+static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+ switch (DWLang) {
+ case dwarf::DW_LANG_C:
+ case dwarf::DW_LANG_C89:
+ case dwarf::DW_LANG_C99:
+ case dwarf::DW_LANG_C11:
+ case dwarf::DW_LANG_ObjC:
+ return SourceLanguage::C;
+ case dwarf::DW_LANG_C_plus_plus:
+ case dwarf::DW_LANG_C_plus_plus_03:
+ case dwarf::DW_LANG_C_plus_plus_11:
+ case dwarf::DW_LANG_C_plus_plus_14:
+ return SourceLanguage::Cpp;
+ case dwarf::DW_LANG_Fortran77:
+ case dwarf::DW_LANG_Fortran90:
+ case dwarf::DW_LANG_Fortran95:
+ case dwarf::DW_LANG_Fortran03:
+ case dwarf::DW_LANG_Fortran08:
+ return SourceLanguage::Fortran;
+ case dwarf::DW_LANG_Pascal83:
+ return SourceLanguage::Pascal;
+ case dwarf::DW_LANG_Cobol74:
+ case dwarf::DW_LANG_Cobol85:
+ return SourceLanguage::Cobol;
+ case dwarf::DW_LANG_Java:
+ return SourceLanguage::Java;
+ case dwarf::DW_LANG_D:
+ return SourceLanguage::D;
+ case dwarf::DW_LANG_Swift:
+ return SourceLanguage::Swift;
+ default:
+ // There's no CodeView representation for this language, and CV doesn't
+ // have an "unknown" option for the language field, so we'll use MASM,
+ // as it's very low level.
+ return SourceLanguage::Masm;
+ }
+}
+
void CodeViewDebug::beginModule(Module *M) {
// If module doesn't have named metadata anchors or COFF debug section
// is not available, skip any debug info related stuff.
@@ -574,6 +621,13 @@ void CodeViewDebug::beginModule(Module *M) {
TheCPU = mapArchToCVCPUType(Triple(M->getTargetTriple()).getArch());
+ // Get the current source language.
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
+ CurrentSourceLanguage = MapDWLangToCVLang(CU->getSourceLanguage());
+
collectGlobalVariableInfo();
// Check if we should emit type record hashes.
@@ -731,43 +785,6 @@ void CodeViewDebug::emitTypeGlobalHashes() {
}
}
-static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
- switch (DWLang) {
- case dwarf::DW_LANG_C:
- case dwarf::DW_LANG_C89:
- case dwarf::DW_LANG_C99:
- case dwarf::DW_LANG_C11:
- case dwarf::DW_LANG_ObjC:
- return SourceLanguage::C;
- case dwarf::DW_LANG_C_plus_plus:
- case dwarf::DW_LANG_C_plus_plus_03:
- case dwarf::DW_LANG_C_plus_plus_11:
- case dwarf::DW_LANG_C_plus_plus_14:
- return SourceLanguage::Cpp;
- case dwarf::DW_LANG_Fortran77:
- case dwarf::DW_LANG_Fortran90:
- case dwarf::DW_LANG_Fortran03:
- case dwarf::DW_LANG_Fortran08:
- return SourceLanguage::Fortran;
- case dwarf::DW_LANG_Pascal83:
- return SourceLanguage::Pascal;
- case dwarf::DW_LANG_Cobol74:
- case dwarf::DW_LANG_Cobol85:
- return SourceLanguage::Cobol;
- case dwarf::DW_LANG_Java:
- return SourceLanguage::Java;
- case dwarf::DW_LANG_D:
- return SourceLanguage::D;
- case dwarf::DW_LANG_Swift:
- return SourceLanguage::Swift;
- default:
- // There's no CodeView representation for this language, and CV doesn't
- // have an "unknown" option for the language field, so we'll use MASM,
- // as it's very low level.
- return SourceLanguage::Masm;
- }
-}
-
namespace {
struct Version {
int Part[4];
@@ -797,12 +814,8 @@ void CodeViewDebug::emitCompilerInformation() {
MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_COMPILE3);
uint32_t Flags = 0;
- NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
- const MDNode *Node = *CUs->operands().begin();
- const auto *CU = cast<DICompileUnit>(Node);
-
// The low byte of the flags indicates the source language.
- Flags = MapDWLangToCVLang(CU->getSourceLanguage());
+ Flags = CurrentSourceLanguage;
// TODO: Figure out which other flags need to be set.
if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {
Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO);
@@ -814,6 +827,10 @@ void CodeViewDebug::emitCompilerInformation() {
OS.AddComment("CPUType");
OS.emitInt16(static_cast<uint64_t>(TheCPU));
+ NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+ const MDNode *Node = *CUs->operands().begin();
+ const auto *CU = cast<DICompileUnit>(Node);
+
StringRef CompilerVersion = CU->getProducer();
Version FrontVer = parseVersion(CompilerVersion);
OS.AddComment("Frontend version");
@@ -1573,6 +1590,8 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
return lowerTypeClass(cast<DICompositeType>(Ty));
case dwarf::DW_TAG_union_type:
return lowerTypeUnion(cast<DICompositeType>(Ty));
+ case dwarf::DW_TAG_string_type:
+ return lowerTypeString(cast<DIStringType>(Ty));
case dwarf::DW_TAG_unspecified_type:
if (Ty->getName() == "decltype(nullptr)")
return TypeIndex::NullptrT();
@@ -1617,14 +1636,19 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
const DISubrange *Subrange = cast<DISubrange>(Element);
int64_t Count = -1;
- // Calculate the count if either LowerBound is absent or is zero and
- // either of Count or UpperBound are constant.
- auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
- if (!Subrange->getRawLowerBound() || (LI && (LI->getSExtValue() == 0))) {
- if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt*>())
- Count = CI->getSExtValue();
- else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt*>())
- Count = UI->getSExtValue() + 1; // LowerBound is zero
+
+ // If Subrange has a Count field, use it.
+ // Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1),
+ // where lowerbound is from the LowerBound field of the Subrange,
+ // or the language default lowerbound if that field is unspecified.
+ if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt *>())
+ Count = CI->getSExtValue();
+ else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt *>()) {
+ // Fortran uses 1 as the default lowerbound; other languages use 0.
+ int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0;
+ auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
+ Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound;
+ Count = UI->getSExtValue() - Lowerbound + 1;
}
// Forward declarations of arrays without a size and VLAs use a count of -1.
@@ -1650,6 +1674,26 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
return ElementTypeIndex;
}
+// This function lowers a Fortran character type (DIStringType).
+// Note that it handles only the character*n variant (using SizeInBits
+// field in DIString to describe the type size) at the moment.
+// Other variants (leveraging the StringLength and StringLengthExp
+// fields in DIStringType) remain TBD.
+TypeIndex CodeViewDebug::lowerTypeString(const DIStringType *Ty) {
+ TypeIndex CharType = TypeIndex(SimpleTypeKind::NarrowCharacter);
+ uint64_t ArraySize = Ty->getSizeInBits() >> 3;
+ StringRef Name = Ty->getName();
+ // IndexType is size_t, which depends on the bitness of the target.
+ TypeIndex IndexType = getPointerSizeInBytes() == 8
+ ? TypeIndex(SimpleTypeKind::UInt64Quad)
+ : TypeIndex(SimpleTypeKind::UInt32Long);
+
+ // Create a type of character array of ArraySize.
+ ArrayRecord AR(CharType, IndexType, ArraySize, Name);
+
+ return TypeTable.writeLeafType(AR);
+}
+
TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
TypeIndex Index;
dwarf::TypeKind Kind;
@@ -1728,9 +1772,14 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
}
// Apply some fixups based on the source-level type name.
- if (STK == SimpleTypeKind::Int32 && Ty->getName() == "long int")
+ // Include some amount of canonicalization from an old naming scheme Clang
+ // used to use for integer types (in an outdated effort to be compatible with
+ // GCC's debug info/GDB's behavior, which has since been addressed).
+ if (STK == SimpleTypeKind::Int32 &&
+ (Ty->getName() == "long int" || Ty->getName() == "long"))
STK = SimpleTypeKind::Int32Long;
- if (STK == SimpleTypeKind::UInt32 && Ty->getName() == "long unsigned int")
+ if (STK == SimpleTypeKind::UInt32 && (Ty->getName() == "long unsigned int" ||
+ Ty->getName() == "unsigned long"))
STK = SimpleTypeKind::UInt32Long;
if (STK == SimpleTypeKind::UInt16Short &&
(Ty->getName() == "wchar_t" || Ty->getName() == "__wchar_t"))
@@ -2177,6 +2226,7 @@ void CodeViewDebug::clear() {
TypeIndices.clear();
CompleteTypeIndices.clear();
ScopeGlobals.clear();
+ CVGlobalVariableOffsets.clear();
}
void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
@@ -3062,6 +3112,15 @@ void CodeViewDebug::collectGlobalVariableInfo() {
const DIGlobalVariable *DIGV = GVE->getVariable();
const DIExpression *DIE = GVE->getExpression();
+ if ((DIE->getNumElements() == 2) &&
+ (DIE->getElement(0) == dwarf::DW_OP_plus_uconst))
+ // Record the constant offset for the variable.
+ //
+ // A Fortran common block uses this idiom to encode the offset
+ // of a variable from the common block's starting address.
+ CVGlobalVariableOffsets.insert(
+ std::make_pair(DIGV, DIE->getElement(1)));
+
// Emit constant global variables in a global symbol section.
if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {
CVGlobalVariable CVGV = {DIGV, DIE};
@@ -3226,7 +3285,11 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
DIGV->getRawStaticDataMemberDeclaration()))
Scope = MemberDecl->getScope();
- std::string QualifiedName = getFullyQualifiedName(Scope, DIGV->getName());
+ // For Fortran, the scoping portion is elided in its name so that we can
+ // reference the variable in the command line of the VS debugger.
+ std::string QualifiedName =
+ (moduleIsInFortran()) ? std::string(DIGV->getName())
+ : getFullyQualifiedName(Scope, DIGV->getName());
if (const GlobalVariable *GV =
CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
@@ -3242,7 +3305,13 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
OS.AddComment("Type");
OS.emitInt32(getCompleteTypeIndex(DIGV->getType()).getIndex());
OS.AddComment("DataOffset");
- OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
+
+ uint64_t Offset = 0;
+ if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end())
+ // Use the offset seen while collecting info on globals.
+ Offset = CVGlobalVariableOffsets[DIGV];
+ OS.EmitCOFFSecRel32(GVSym, Offset);
+
OS.AddComment("Segment");
OS.EmitCOFFSectionIndex(GVSym);
OS.AddComment("Name");
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index d133474ee5aa..6f88e15ee8fe 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -186,6 +186,13 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
};
FunctionInfo *CurFn = nullptr;
+ codeview::SourceLanguage CurrentSourceLanguage =
+ codeview::SourceLanguage::Masm;
+
+ // This map records the constant offset in DIExpression of the
+ // DIGlobalVariableExpression referencing the DIGlobalVariable.
+ DenseMap<const DIGlobalVariable *, uint64_t> CVGlobalVariableOffsets;
+
// Map used to seperate variables according to the lexical scope they belong
// in. This is populated by recordLocalVariable() before
// collectLexicalBlocks() separates the variables between the FunctionInfo
@@ -400,6 +407,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex lowerType(const DIType *Ty, const DIType *ClassTy);
codeview::TypeIndex lowerTypeAlias(const DIDerivedType *Ty);
codeview::TypeIndex lowerTypeArray(const DICompositeType *Ty);
+ codeview::TypeIndex lowerTypeString(const DIStringType *Ty);
codeview::TypeIndex lowerTypeBasic(const DIBasicType *Ty);
codeview::TypeIndex lowerTypePointer(
const DIDerivedType *Ty,
@@ -464,6 +472,11 @@ protected:
/// Gather post-function debug information.
void endFunctionImpl(const MachineFunction *) override;
+ /// Check if the current module is in Fortran.
+ bool moduleIsInFortran() {
+ return CurrentSourceLanguage == codeview::SourceLanguage::Fortran;
+ }
+
public:
CodeViewDebug(AsmPrinter *AP);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 802f0e880514..5f4ee747fcca 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -93,19 +93,15 @@ void DIEHash::addParentContext(const DIE &Parent) {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(),
- E = Parents.rend();
- I != E; ++I) {
- const DIE &Die = **I;
-
+ for (const DIE *Die : llvm::reverse(Parents)) {
// ... Append the letter "C" to the sequence...
addULEB128('C');
// ... Followed by the DWARF tag of the construct...
- addULEB128(Die.getTag());
+ addULEB128(Die->getTag());
// ... Then the name, taken from the DW_AT_name attribute.
- StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name);
+ StringRef Name = getDIEStringAttr(*Die, dwarf::DW_AT_name);
LLVM_DEBUG(dbgs() << "... adding context: " << Name << "\n");
if (!Name.empty())
addString(Name);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index bb24f1414ef1..dd795079ac1a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -252,8 +252,8 @@ void DbgValueHistoryMap::trimLocationRanges(
// Now actually remove the entries. Iterate backwards so that our remaining
// ToRemove indices are valid after each erase.
- for (auto Itr = ToRemove.rbegin(), End = ToRemove.rend(); Itr != End; ++Itr)
- HistoryMapEntries.erase(HistoryMapEntries.begin() + *Itr);
+ for (EntryIndex Idx : llvm::reverse(ToRemove))
+ HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx);
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index c81288c0e460..4df34d2c9402 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -174,21 +174,26 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
}
bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
- // SROA may generate dbg value intrinsics to assign an unsigned value to a
- // Fortran CHARACTER(1) type variables. Make them as unsigned.
if (isa<DIStringType>(Ty)) {
- assert((Ty->getSizeInBits()) == 8 && "Not a valid unsigned type!");
+ // Some transformations (e.g. instcombine) may decide to turn a Fortran
+ // character object into an integer, and later ones (e.g. SROA) may
+ // further inject a constant integer in a llvm.dbg.value call to track
+ // the object's value. Here we trust the transformations are doing the
+ // right thing, and treat the constant as unsigned to preserve that value
+ // (i.e. avoid sign extension).
return true;
}
- if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- // FIXME: Enums without a fixed underlying type have unknown signedness
- // here, leading to incorrectly emitted constants.
- if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
- return false;
- // (Pieces of) aggregate types that get hacked apart by SROA may be
- // represented by a constant. Encode them as unsigned bytes.
- return true;
+ if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) {
+ if (!(Ty = CTy->getBaseType()))
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ return false;
+ } else
+ // (Pieces of) aggregate types that get hacked apart by SROA may be
+ // represented by a constant. Encode them as unsigned bytes.
+ return true;
}
if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 62ebadaf3cbe..d7ab2091967f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -158,7 +158,7 @@ public:
friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const {
- for (DbgValueLocEntry DV : ValueLocEntries)
+ for (const DbgValueLocEntry &DV : ValueLocEntries)
DV.dump();
if (Expression)
Expression->dump();
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index faa14dca1c3f..922c91840520 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -143,8 +143,6 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
auto *GVContext = GV->getScope();
const DIType *GTy = GV->getType();
- // Construct the context before querying for the existence of the DIE in
- // case such construction creates the DIE.
auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
: getOrCreateContextDIE(GVContext);
@@ -183,6 +181,8 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
else
addGlobalName(GV->getName(), *VariableDIE, DeclContext);
+ addAnnotation(*VariableDIE, GV->getAnnotations());
+
if (uint32_t AlignInBytes = GV->getAlignInBytes())
addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
@@ -260,14 +260,14 @@ void DwarfCompileUnit::addLocationAttribute(
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ assert((PointerSize == 4 || PointerSize == 8) &&
+ "Add support for other sizes if necessary");
if (Global->isThreadLocal()) {
if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
} else {
// FIXME: Make this work with -gsplit-dwarf.
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
- assert((PointerSize == 4 || PointerSize == 8) &&
- "Add support for other sizes if necessary");
// Based on GCC's support for TLS:
if (!DD->useSplitDwarf()) {
// 1) Start with a constNu of the appropriate pointer size
@@ -290,6 +290,24 @@ void DwarfCompileUnit::addLocationAttribute(
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
+ } else if (Asm->TM.getRelocationModel() == Reloc::RWPI ||
+ Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) {
+ // Constant
+ addUInt(*Loc, dwarf::DW_FORM_data1,
+ PointerSize == 4 ? dwarf::DW_OP_const4u
+ : dwarf::DW_OP_const8u);
+ // Relocation offset
+ addExpr(*Loc, PointerSize == 4 ? dwarf::DW_FORM_data4
+ : dwarf::DW_FORM_data8,
+ Asm->getObjFileLowering().getIndirectSymViaRWPI(Sym));
+ // Base register
+ Register BaseReg = Asm->getObjFileLowering().getStaticBase();
+ BaseReg = Asm->TM.getMCRegisterInfo()->getDwarfRegNum(BaseReg, false);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + BaseReg);
+ // Offset from base register
+ addSInt(*Loc, dwarf::DW_FORM_sdata, 0);
+ // Operation
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
addOpAddress(*Loc, Sym);
@@ -331,12 +349,10 @@ void DwarfCompileUnit::addLocationAttribute(
DIE *DwarfCompileUnit::getOrCreateCommonBlock(
const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
- // Construct the context before querying for the existence of the DIE in case
- // such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
-
+ // Check for pre-existence.
if (DIE *NDie = getDIE(CB))
return NDie;
+ DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
addString(NDie, dwarf::DW_AT_name, Name);
@@ -351,7 +367,8 @@ DIE *DwarfCompileUnit::getOrCreateCommonBlock(
void DwarfCompileUnit::addRange(RangeSpan Range) {
DD->insertSectionLabel(Range.Begin);
- bool SameAsPrevCU = this == DD->getPrevCU();
+ auto *PrevCU = DD->getPrevCU();
+ bool SameAsPrevCU = this == PrevCU;
DD->setPrevCU(this);
// If we have no current ranges just add the range and return, otherwise,
// check the current section and CU against the previous section and CU we
@@ -360,6 +377,9 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
if (CURanges.empty() || !SameAsPrevCU ||
(&CURanges.back().End->getSection() !=
&Range.End->getSection())) {
+ // Before a new range is added, always terminate the prior line table.
+ if (PrevCU)
+ DD->terminateLineTable(PrevCU);
CURanges.push_back(Range);
return;
}
@@ -470,7 +490,6 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
if (!isDwoUnit()) {
addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
- DD->addArangeLabel(SymbolCU(this, SPSym));
} else {
// FIXME: when writing dwo, we need to avoid relocations. Probably
// the "right" solution is to treat globals the way func and data
@@ -961,9 +980,7 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
bool visitedAllDependencies = Item.getInt();
WorkList.pop_back();
- // Dependency is in a different lexical scope or a global.
- if (!Var)
- continue;
+ assert(Var);
// Already handled.
if (Visited.count(Var))
@@ -987,8 +1004,10 @@ sortLocalVars(SmallVectorImpl<DbgVariable *> &Input) {
// visited again after all of its dependencies are handled.
WorkList.push_back({Var, 1});
for (auto *Dependency : dependencies(Var)) {
- auto Dep = dyn_cast_or_null<const DILocalVariable>(Dependency);
- WorkList.push_back({DbgVar[Dep], 0});
+ // Don't add dependency if it is in a different lexical scope or a global.
+ if (const auto *Dep = dyn_cast<const DILocalVariable>(Dependency))
+ if (DbgVariable *Var = DbgVar.lookup(Dep))
+ WorkList.push_back({Var, 0});
}
}
return Result;
@@ -1103,9 +1122,10 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
// shouldn't be found by lookup.
AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);
-
- if (!ContextCU->includeMinimalInlineScopes())
- ContextCU->addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline,
+ DD->getDwarfVersion() <= 4 ? Optional<dwarf::Form>()
+ : dwarf::DW_FORM_implicit_const,
+ dwarf::DW_INL_inlined);
if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
@@ -1162,7 +1182,7 @@ DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
}
DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
- DIE *CalleeDIE,
+ const DISubprogram *CalleeSP,
bool IsTail,
const MCSymbol *PCAddr,
const MCSymbol *CallAddr,
@@ -1176,7 +1196,8 @@ DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
MachineLocation(CallReg));
} else {
- assert(CalleeDIE && "No DIE for call site entry origin");
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
*CalleeDIE);
}
@@ -1265,6 +1286,16 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
if (!Name.empty())
addString(*IMDie, dwarf::DW_AT_name, Name);
+ // This is for imported module with renamed entities (such as variables and
+ // subprograms).
+ DINodeArray Elements = Module->getElements();
+ for (const auto *Element : Elements) {
+ if (!Element)
+ continue;
+ IMDie->addChild(
+ constructImportedEntityDIE(cast<DIImportedEntity>(Element)));
+ }
+
return IMDie;
}
@@ -1479,10 +1510,12 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
if (!Name.empty())
addString(VariableDie, dwarf::DW_AT_name, Name);
const auto *DIVar = Var.getVariable();
- if (DIVar)
+ if (DIVar) {
if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
AlignInBytes);
+ addAnnotation(VariableDie, DIVar->getAnnotations());
+ }
addSourceLine(VariableDie, DIVar);
addType(VariableDie, Var.getType());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 6d8186a5ee2b..6e9261087686 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -249,16 +249,14 @@ public:
dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
/// Construct a call site entry DIE describing a call within \p Scope to a
- /// callee described by \p CalleeDIE.
- /// \p CalleeDIE is a declaration or definition subprogram DIE for the callee.
- /// For indirect calls \p CalleeDIE is set to nullptr.
+ /// callee described by \p CalleeSP.
/// \p IsTail specifies whether the call is a tail call.
/// \p PCAddr points to the PC value after the call instruction.
/// \p CallAddr points to the PC value at the call instruction (or is null).
/// \p CallReg is a register location for an indirect call. For direct calls
/// the \p CallReg is set to 0.
- DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, DIE *CalleeDIE, bool IsTail,
- const MCSymbol *PCAddr,
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
+ bool IsTail, const MCSymbol *PCAddr,
const MCSymbol *CallAddr, unsigned CallReg);
/// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
/// were collected by the \ref collectCallSiteParameters.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index ee14423ca3d0..047676d4c11e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -480,7 +480,7 @@ static bool hasObjCCategory(StringRef Name) {
if (!isObjCClass(Name))
return false;
- return Name.find(") ") != StringRef::npos;
+ return Name.contains(") ");
}
static void getObjCClassCategory(StringRef In, StringRef &Class,
@@ -587,14 +587,6 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
-DIE &DwarfDebug::constructSubprogramDefinitionDIE(const DISubprogram *SP) {
- DICompileUnit *Unit = SP->getUnit();
- assert(SP->isDefinition() && "Subprogram not a definition");
- assert(Unit && "Subprogram definition without parent unit");
- auto &CU = getOrCreateDwarfCompileUnit(Unit);
- return *CU.getOrCreateSubprogramDIE(SP);
-}
-
/// Represents a parameter whose call site value can be described by applying a
/// debug expression to a register in the forwarded register worklist.
struct FwdRegParamInfo {
@@ -945,7 +937,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
continue;
unsigned CallReg = 0;
- DIE *CalleeDIE = nullptr;
+ const DISubprogram *CalleeSP = nullptr;
const Function *CalleeDecl = nullptr;
if (CalleeOp.isReg()) {
CallReg = CalleeOp.getReg();
@@ -955,19 +947,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
if (!CalleeDecl || !CalleeDecl->getSubprogram())
continue;
- const DISubprogram *CalleeSP = CalleeDecl->getSubprogram();
-
- if (CalleeSP->isDefinition()) {
- // Ensure that a subprogram DIE for the callee is available in the
- // appropriate CU.
- CalleeDIE = &constructSubprogramDefinitionDIE(CalleeSP);
- } else {
- // Create the declaration DIE if it is missing. This is required to
- // support compilation of old bitcode with an incomplete list of
- // retained metadata.
- CalleeDIE = CU.getOrCreateSubprogramDIE(CalleeSP);
- }
- assert(CalleeDIE && "Must have a DIE for the callee");
+ CalleeSP = CalleeDecl->getSubprogram();
}
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
@@ -1004,7 +984,7 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
<< (IsTail ? " [IsTail]" : "") << "\n");
DIE &CallSiteDIE = CU.constructCallSiteEntryDIE(
- ScopeDIE, CalleeDIE, IsTail, PCAddr, CallAddr, CallReg);
+ ScopeDIE, CalleeSP, IsTail, PCAddr, CallAddr, CallReg);
// Optionally emit call-site-param debug info.
if (emitDebugEntryValues()) {
@@ -1427,6 +1407,10 @@ void DwarfDebug::finalizeModuleInfo() {
// Emit all Dwarf sections that should come after the content.
void DwarfDebug::endModule() {
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
+ PrevCU = nullptr;
assert(CurFn == nullptr);
assert(CurMI == nullptr);
@@ -2102,12 +2086,22 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
- for (const auto &MBB : *MF)
- for (const auto &MI : MBB)
+ DebugLoc LineZeroLoc;
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
- MI.getDebugLoc())
- return MI.getDebugLoc();
- return DebugLoc();
+ MI.getDebugLoc()) {
+ // Scan forward to try to find a non-zero line number. The prologue_end
+ // marks the first breakpoint in the function after the frame setup, and
+ // a compiler-generated line 0 location is not a meaningful breakpoint.
+ // If none is found, return the first location after the frame setup.
+ if (MI.getDebugLoc().getLine())
+ return MI.getDebugLoc();
+ LineZeroLoc = MI.getDebugLoc();
+ }
+ }
+ }
+ return LineZeroLoc;
}
/// Register a source line with debug info. Returns the unique label that was
@@ -2162,24 +2156,42 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
+ Asm->OutStreamer->getContext().setDwarfCompileUnitID(
+ getDwarfCompileUnitIDForLineTable(CU));
+
+ // Record beginning of function.
+ PrologEndLoc = emitInitialLocDirective(
+ *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+}
+
+unsigned
+DwarfDebug::getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU) {
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
// belongs to so that we add to the correct per-cu line table in the
// non-asm case.
if (Asm->OutStreamer->hasRawTextSupport())
// Use a single line table if we are generating assembly.
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
+ return 0;
else
- Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
+ return CU.getUniqueID();
+}
- // Record beginning of function.
- PrologEndLoc = emitInitialLocDirective(
- *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
+void DwarfDebug::terminateLineTable(const DwarfCompileUnit *CU) {
+ const auto &CURanges = CU->getRanges();
+ auto &LineTable = Asm->OutStreamer->getContext().getMCDwarfLineTable(
+ getDwarfCompileUnitIDForLineTable(*CU));
+ // Add the last range label for the given CU.
+ LineTable.getMCLineSections().addEndEntry(
+ const_cast<MCSymbol *>(CURanges.back().End));
}
void DwarfDebug::skippedNonDebugFunction() {
// If we don't have a subprogram for this function then there will be a hole
// in the range information. Keep note of this by setting the previously used
// section to nullptr.
+ // Terminate the pending line table.
+ if (PrevCU)
+ terminateLineTable(PrevCU);
PrevCU = nullptr;
CurFn = nullptr;
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6356a65b50d3..4e1a1b1e068d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -65,19 +65,21 @@ class Module;
/// such that it could levarage polymorphism to extract common code for
/// DbgVariable and DbgLabel.
class DbgEntity {
- const DINode *Entity;
- const DILocation *InlinedAt;
- DIE *TheDIE = nullptr;
- unsigned SubclassID;
-
public:
enum DbgEntityKind {
DbgVariableKind,
DbgLabelKind
};
- DbgEntity(const DINode *N, const DILocation *IA, unsigned ID)
- : Entity(N), InlinedAt(IA), SubclassID(ID) {}
+private:
+ const DINode *Entity;
+ const DILocation *InlinedAt;
+ DIE *TheDIE = nullptr;
+ const DbgEntityKind SubclassID;
+
+public:
+ DbgEntity(const DINode *N, const DILocation *IA, DbgEntityKind ID)
+ : Entity(N), InlinedAt(IA), SubclassID(ID) {}
virtual ~DbgEntity() {}
/// Accessors.
@@ -85,19 +87,18 @@ public:
const DINode *getEntity() const { return Entity; }
const DILocation *getInlinedAt() const { return InlinedAt; }
DIE *getDIE() const { return TheDIE; }
- unsigned getDbgEntityID() const { return SubclassID; }
+ DbgEntityKind getDbgEntityID() const { return SubclassID; }
/// @}
void setDIE(DIE &D) { TheDIE = &D; }
static bool classof(const DbgEntity *N) {
switch (N->getDbgEntityID()) {
- default:
- return false;
case DbgVariableKind:
case DbgLabelKind:
return true;
}
+ llvm_unreachable("Invalid DbgEntityKind");
}
};
@@ -471,9 +472,6 @@ private:
/// Construct a DIE for this abstract scope.
void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, LexicalScope *Scope);
- /// Construct a DIE for the subprogram definition \p SP and return it.
- DIE &constructSubprogramDefinitionDIE(const DISubprogram *SP);
-
/// Construct DIEs for call site entries describing the calls in \p MF.
void constructCallSiteEntryDIEs(const DISubprogram &SP, DwarfCompileUnit &CU,
DIE &ScopeDIE, const MachineFunction &MF);
@@ -615,7 +613,7 @@ private:
DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
- /// function that describe the same variable. If the resulting
+ /// function that describe the same variable. If the resulting
/// list has only one entry that is valid for entire variable's
/// scope return true.
bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
@@ -635,6 +633,9 @@ protected:
/// Gather and emit post-function debug information.
void endFunctionImpl(const MachineFunction *MF) override;
+ /// Get Dwarf compile unit ID for line table.
+ unsigned getDwarfCompileUnitIDForLineTable(const DwarfCompileUnit &CU);
+
void skippedNonDebugFunction() override;
public:
@@ -781,6 +782,9 @@ public:
const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; }
+ /// Terminate the line table by adding the last range label.
+ void terminateLineTable(const DwarfCompileUnit *CU);
+
/// Returns the entries for the .debug_loc section.
const DebugLocStream &getDebugLocs() const { return DebugLocs; }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 344d30fad347..976e35905144 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -186,9 +186,8 @@ int64_t DwarfUnit::getDefaultLowerBound() const {
/// Check whether the DIE for this MDNode can be shared across CUs.
bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
- // When the MDNode can be part of the type system (this includes subprogram
- // declarations *and* subprogram definitions, even local definitions), the
- // DIE must be shared across CUs.
+ // When the MDNode can be part of the type system, the DIE can be shared
+ // across CUs.
// Combining type units and cross-CU DIE sharing is lower value (since
// cross-CU DIE sharing is used in LTO and removes type redundancy at that
// level already) but may be implementable for some value in projects
@@ -196,7 +195,9 @@ bool DwarfUnit::isShareableAcrossCUs(const DINode *D) const {
// together.
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
return false;
- return (isa<DIType>(D) || isa<DISubprogram>(D)) && !DD->generateTypeUnits();
+ return (isa<DIType>(D) ||
+ (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) &&
+ !DD->generateTypeUnits();
}
DIE *DwarfUnit::getDIE(const DINode *D) const {
@@ -671,7 +672,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
// Reverse iterate over our list to go from the outermost construct to the
// innermost.
- for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
+ for (const DIScope *Ctx : llvm::reverse(Parents)) {
StringRef Name = Ctx->getName();
if (Name.empty() && isa<DINamespace>(Ctx))
Name = "(anonymous namespace)";
@@ -753,6 +754,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ addAnnotation(Buffer, DTy->getAnnotations());
+
// If alignment is specified for a typedef , create and insert DW_AT_alignment
// attribute in DW_TAG_typedef DIE.
if (Tag == dwarf::DW_TAG_typedef && DD->getDwarfVersion() >= 5) {
@@ -832,6 +835,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
}
+void DwarfUnit::addAnnotation(DIE &Buffer, DINodeArray Annotations) {
+ if (!Annotations)
+ return;
+
+ for (const Metadata *Annotation : Annotations->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotation);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+
+ // Currently, only MDString is supported with btf_decl_tag attribute.
+ const MDString *Value = cast<MDString>(MD->getOperand(1));
+
+ DIE &AnnotationDie = createAndAddDIE(dwarf::DW_TAG_LLVM_annotation, Buffer);
+ addString(AnnotationDie, dwarf::DW_AT_name, Name->getString());
+ addString(AnnotationDie, dwarf::DW_AT_const_value, Value->getString());
+ }
+}
+
void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// Add name if not anonymous or intermediate type.
StringRef Name = CTy->getName();
@@ -849,7 +869,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
case dwarf::DW_TAG_variant_part:
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
- case dwarf::DW_TAG_class_type: {
+ case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_namelist: {
// Emit the discriminator for a variant part.
DIDerivedType *Discriminator = nullptr;
if (Tag == dwarf::DW_TAG_variant_part) {
@@ -918,6 +939,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &VariantPart = createAndAddDIE(Composite->getTag(), Buffer);
constructTypeDIE(VariantPart, Composite);
}
+ } else if (Tag == dwarf::DW_TAG_namelist) {
+ auto *Var = dyn_cast<DINode>(Element);
+ auto *VarDIE = getDIE(Var);
+ if (VarDIE) {
+ DIE &ItemDie = createAndAddDIE(dwarf::DW_TAG_namelist_item, Buffer);
+ addDIEEntry(ItemDie, dwarf::DW_AT_namelist_item, *VarDIE);
+ }
}
}
@@ -960,6 +988,8 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (!Name.empty())
addString(Buffer, dwarf::DW_AT_name, Name);
+ addAnnotation(Buffer, CTy->getAnnotations());
+
if (Tag == dwarf::DW_TAG_enumeration_type ||
Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
Tag == dwarf::DW_TAG_union_type) {
@@ -1196,6 +1226,8 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (!SP->getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP->getName());
+ addAnnotation(SPDie, SP->getAnnotations());
+
if (!SkipSPSourceLocation)
addSourceLine(SPDie, SP);
@@ -1546,6 +1578,8 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
+ addAnnotation(MemberDie, DT->getAnnotations());
+
if (DIType *Resolved = DT->getBaseType())
addType(MemberDie, Resolved);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 4d31dd0daf59..8140279adaef 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -294,6 +294,9 @@ public:
void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label, const MCSymbol *Sec);
+ /// Add DW_TAG_LLVM_annotation.
+ void addAnnotation(DIE &Buffer, DINodeArray Annotations);
+
/// Get context owner's DIE.
DIE *createTypeDIE(const DICompositeType *Ty);
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e589c2e64abd..150f19324834 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -812,8 +812,7 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding, MCSymbol *TTBaseLabel) {
Entry = TypeInfos.size();
}
- for (const GlobalValue *GV : make_range(TypeInfos.rbegin(),
- TypeInfos.rend())) {
+ for (const GlobalValue *GV : llvm::reverse(TypeInfos)) {
if (VerboseAsm)
Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
Asm->emitTTypeReference(GV, TTypeEncoding);
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 35a830f416f6..9e6f1a537de3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -20,6 +20,8 @@
using namespace llvm;
+PseudoProbeHandler::~PseudoProbeHandler() = default;
+
void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
uint64_t Type, uint64_t Attr,
const DILocation *DebugLoc) {
@@ -35,7 +37,10 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
auto Name = SP->getLinkageName();
if (Name.empty())
Name = SP->getName();
- uint64_t CallerGuid = Function::getGUID(Name);
+ // Use caching to avoid redundant md5 computation for build speed.
+ uint64_t &CallerGuid = NameGuidMap[Name];
+ if (!CallerGuid)
+ CallerGuid = Function::getGUID(Name);
uint64_t CallerProbeId = PseudoProbeDwarfDiscriminator::extractProbeIndex(
InlinedAt->getDiscriminator());
ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
index f2026a118bf5..7d5e51218693 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h
@@ -26,9 +26,12 @@ class DILocation;
class PseudoProbeHandler : public AsmPrinterHandler {
// Target of pseudo probe emission.
AsmPrinter *Asm;
+ // Name to GUID map, used as caching/memoization for speed.
+ DenseMap<StringRef, uint64_t> NameGuidMap;
public:
PseudoProbeHandler(AsmPrinter *A) : Asm(A){};
+ ~PseudoProbeHandler() override;
void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
uint64_t Attr, const DILocation *DebugLoc);
diff --git a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
index 352a33e8639d..a17a2ca2790e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -18,16 +18,25 @@
using namespace llvm;
void WasmException::endModule() {
- // This is the symbol used in 'throw' and 'catch' instruction to denote this
- // is a C++ exception. This symbol has to be emitted somewhere once in the
- // module. Check if the symbol has already been created, i.e., we have at
- // least one 'throw' or 'catch' instruction in the module, and emit the symbol
- // only if so.
- SmallString<60> NameStr;
- Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
- if (Asm->OutContext.lookupSymbol(NameStr)) {
- MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol("__cpp_exception");
- Asm->OutStreamer->emitLabel(ExceptionSym);
+ // These are symbols used to throw/catch C++ exceptions and C longjmps. These
+ // symbols have to be emitted somewhere once in the module. Check if each of
+ // the symbols has already been created, i.e., we have at least one 'throw' or
+ // 'catch' instruction with the symbol in the module, and emit the symbol only
+ // if so.
+ //
+ // But in dynamic linking, it is in general not possible to come up with a
+ // module instantiating order in which tag-defining modules are loaded before
+ // the importing modules. So we make them undefined symbols here, define tags
+ // in the JS side, and feed them to each importing module.
+ if (!Asm->isPositionIndependent()) {
+ for (const char *SymName : {"__cpp_exception", "__c_longjmp"}) {
+ SmallString<60> NameStr;
+ Mangler::getNameWithPrefix(NameStr, SymName, Asm->getDataLayout());
+ if (Asm->OutContext.lookupSymbol(NameStr)) {
+ MCSymbol *ExceptionSym = Asm->GetExternalSymbolSymbol(SymName);
+ Asm->OutStreamer->emitLabel(ExceptionSym);
+ }
+ }
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index b30d9cc12abc..ef57031c7294 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -43,6 +43,7 @@ WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
// platforms use an imagerel32 relocation to refer to symbols.
useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64);
isAArch64 = Asm->TM.getTargetTriple().isAArch64();
+ isThumb = Asm->TM.getTargetTriple().isThumb();
}
WinException::~WinException() {}
@@ -330,10 +331,12 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
}
const MCExpr *WinException::getLabel(const MCSymbol *Label) {
- if (isAArch64)
- return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
- Asm->OutContext);
- return MCBinaryExpr::createAdd(create32bitRef(Label),
+ return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32,
+ Asm->OutContext);
+}
+
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+ return MCBinaryExpr::createAdd(getLabel(Label),
MCConstantExpr::create(1, Asm->OutContext),
Asm->OutContext);
}
@@ -561,8 +564,8 @@ InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
/// struct Table {
/// int NumEntries;
/// struct Entry {
-/// imagerel32 LabelStart;
-/// imagerel32 LabelEnd;
+/// imagerel32 LabelStart; // Inclusive
+/// imagerel32 LabelEnd; // Exclusive
/// imagerel32 FilterOrFinally; // One means catch-all.
/// imagerel32 LabelLPad; // Zero means __finally.
/// } Entries[NumEntries];
@@ -664,7 +667,7 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
AddComment("LabelStart");
OS.emitValue(getLabel(BeginLabel), 4);
AddComment("LabelEnd");
- OS.emitValue(getLabel(EndLabel), 4);
+ OS.emitValue(getLabelPlusOne(EndLabel), 4);
AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
: "CatchAll");
OS.emitValue(FilterOrFinally, 4);
@@ -949,8 +952,15 @@ void WinException::computeIP2StateTable(
if (!ChangeLabel)
ChangeLabel = StateChange.PreviousEndLabel;
// Emit an entry indicating that PCs after 'Label' have this EH state.
+ // NOTE: On ARM architectures, the StateFromIp automatically takes into
+ // account that the return address is after the call instruction (whose EH
+ // state we should be using), but on other platforms we need to +1 to the
+ // label so that we are using the correct EH state.
+ const MCExpr *LabelExpression = (isAArch64 || isThumb)
+ ? getLabel(ChangeLabel)
+ : getLabelPlusOne(ChangeLabel);
IPToStateTable.push_back(
- std::make_pair(getLabel(ChangeLabel), StateChange.NewState));
+ std::make_pair(LabelExpression, StateChange.NewState));
// FIXME: assert that NewState is between CatchLow and CatchHigh.
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.h b/llvm/lib/CodeGen/AsmPrinter/WinException.h
index feea05ba63ad..638589adf0dd 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -39,6 +39,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// True if we are generating exception handling on Windows for ARM64.
bool isAArch64 = false;
+ /// True if we are generating exception handling on Windows for ARM (Thumb).
+ bool isThumb = false;
+
/// Pointer to the current funclet entry BB.
const MachineBasicBlock *CurrentFuncletEntry = nullptr;
@@ -77,6 +80,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
const MCExpr *create32bitRef(const MCSymbol *Value);
const MCExpr *create32bitRef(const GlobalValue *GV);
const MCExpr *getLabel(const MCSymbol *Label);
+ const MCExpr *getLabelPlusOne(const MCSymbol *Label);
const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
const MCSymbol *OffsetFrom);
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 125a3be585cb..4838f6da750d 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/AtomicExpandUtils.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -179,11 +180,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
// Changing control-flow while iterating through it is a bad idea, so gather a
// list of all atomic instructions before we start.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
- if (I->isAtomic() && !isa<FenceInst>(I))
- AtomicInsts.push_back(I);
- }
+ for (Instruction &I : instructions(F))
+ if (I.isAtomic() && !isa<FenceInst>(&I))
+ AtomicInsts.push_back(&I);
bool MadeChange = false;
for (auto I : AtomicInsts) {
@@ -570,7 +569,9 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
- switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+ LLVMContext &Ctx = AI->getModule()->getContext();
+ TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
+ switch (Kind) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
@@ -600,6 +601,18 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[AI->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[AI->getSyncScopeID()];
+ OptimizationRemarkEmitter ORE(AI->getFunction());
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
+ << "A compare and swap loop was generated for an atomic "
+ << AI->getOperationName(AI->getOperation()) << " operation at "
+ << MemScope << " memory scope";
+ });
expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}
return true;
@@ -1850,7 +1863,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// Now, the return type.
if (CASExpected) {
ResultTy = Type::getInt1Ty(Ctx);
- Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
+ Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
} else if (HasResult && UseSizedLibcall)
ResultTy = SizedIntTy;
else
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index 1a6eed272ca2..c1901bc46d72 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -21,9 +21,21 @@
// clusters of basic blocks. Every cluster will be emitted into a separate
// section with its basic blocks sequenced in the given order. To get the
// optimized performance, the clusters must form an optimal BB layout for the
-// function. Every cluster's section is labeled with a symbol to allow the
-// linker to reorder the sections in any arbitrary sequence. A global order of
-// these sections would encapsulate the function layout.
+// function. We insert a symbol at the beginning of every cluster's section to
+// allow the linker to reorder the sections in any arbitrary sequence. A global
+// order of these sections would encapsulate the function layout.
+// For example, consider the following clusters for a function foo (consisting
+// of 6 basic blocks 0, 1, ..., 5).
+//
+// 0 2
+// 1 3 5
+//
+// * Basic blocks 0 and 2 are placed in one section with symbol `foo`
+// referencing the beginning of this section.
+// * Basic blocks 1, 3, 5 are placed in a separate section. A new symbol
+// `foo.__part.1` will reference the beginning of this section.
+// * Basic block 4 (note that it is not referenced in the list) is placed in
+// one section, and a new symbol `foo.cold` will point to it.
//
// There are a couple of challenges to be addressed:
//
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 65e7e92fe152..5ac8f49a9522 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -611,7 +611,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// there are fallthroughs, and we don't know until after layout.
if (AfterPlacement && FullBlockTail1 && FullBlockTail2) {
auto BothFallThrough = [](MachineBasicBlock *MBB) {
- if (MBB->succ_size() != 0 && !MBB->canFallThrough())
+ if (!MBB->succ_empty() && !MBB->canFallThrough())
return false;
MachineFunction::iterator I(MBB);
MachineFunction *MF = MBB->getParent();
@@ -1198,14 +1198,13 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
// Renumbering blocks alters EH scope membership, recalculate it.
EHScopeMembership = getEHScopeMembership(MF);
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ) {
- MachineBasicBlock *MBB = &*I++;
- MadeChange |= OptimizeBlock(MBB);
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(MF))) {
+ MadeChange |= OptimizeBlock(&MBB);
// If it is dead, remove it.
- if (MBB->pred_empty()) {
- RemoveDeadBlock(MBB);
+ if (MBB.pred_empty()) {
+ RemoveDeadBlock(&MBB);
MadeChange = true;
++NumDeadBlocks;
}
@@ -1753,10 +1752,8 @@ ReoptimizeBlock:
bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
bool MadeChange = false;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
- MachineBasicBlock *MBB = &*I++;
- MadeChange |= HoistCommonCodeInSuccs(MBB);
- }
+ for (MachineBasicBlock &MBB : llvm::make_early_inc_range(MF))
+ MadeChange |= HoistCommonCodeInSuccs(&MBB);
return MadeChange;
}
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 366c303614d6..50825ccf9bac 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -463,10 +463,48 @@ bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
DebugLoc DL = MI.getDebugLoc();
MI.eraseFromParent();
- BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
- *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
+ // Create the optional restore block and, initially, place it at the end of
+ // function. That block will be placed later if it's used; otherwise, it will
+ // be erased.
+ MachineBasicBlock *RestoreBB = createNewBlockAfter(MF->back());
+
+ TII->insertIndirectBranch(*BranchBB, *DestBB, *RestoreBB, DL,
+ DestOffset - SrcOffset, RS.get());
+
+ BlockInfo[BranchBB->getNumber()].Size = computeBlockSize(*BranchBB);
adjustBlockOffsets(*MBB);
+
+ // If RestoreBB is required, try to place just before DestBB.
+ if (!RestoreBB->empty()) {
+ // TODO: For multiple far branches to the same destination, there are
+ // chances that some restore blocks could be shared if they clobber the
+ // same registers and share the same restore sequence. So far, those
+ // restore blocks are just duplicated for each far branch.
+ assert(!DestBB->isEntryBlock());
+ MachineBasicBlock *PrevBB = &*std::prev(DestBB->getIterator());
+ if (auto *FT = PrevBB->getFallThrough()) {
+ assert(FT == DestBB);
+ TII->insertUnconditionalBranch(*PrevBB, FT, DebugLoc());
+ // Recalculate the block size.
+ BlockInfo[PrevBB->getNumber()].Size = computeBlockSize(*PrevBB);
+ }
+ // Now, RestoreBB could be placed directly before DestBB.
+ MF->splice(DestBB->getIterator(), RestoreBB->getIterator());
+ // Update successors and predecessors.
+ RestoreBB->addSuccessor(DestBB);
+ BranchBB->replaceSuccessor(DestBB, RestoreBB);
+ if (TRI->trackLivenessAfterRegAlloc(*MF))
+ computeAndAddLiveIns(LiveRegs, *RestoreBB);
+ // Compute the restore block size.
+ BlockInfo[RestoreBB->getNumber()].Size = computeBlockSize(*RestoreBB);
+ // Update the offset starting from the previous block.
+ adjustBlockOffsets(*PrevBB);
+ } else {
+ // Remove restore block if it's not required.
+ MF->erase(RestoreBB);
+ }
+
return true;
}
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index b11db3e65770..558700bd9b3b 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -244,7 +244,7 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
MachineInstr *UndefMI = UndefReads.back().first;
unsigned OpIdx = UndefReads.back().second;
- for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
+ for (MachineInstr &I : llvm::reverse(*MBB)) {
// Update liveness, including the current instruction's defs.
LiveRegSet.stepBackward(I);
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
new file mode 100644
index 000000000000..877aa69c3e58
--- /dev/null
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -0,0 +1,169 @@
+//===-- CodeGenCommonISel.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines common utilies that are shared between SelectionDAG and
+// GlobalISel frameworks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CodeGenCommonISel.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+
+using namespace llvm;
+
+/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
+/// is 0.
+MachineBasicBlock *
+StackProtectorDescriptor::addSuccessorMBB(
+ const BasicBlock *BB, MachineBasicBlock *ParentMBB, bool IsLikely,
+ MachineBasicBlock *SuccMBB) {
+ // If SuccBB has not been created yet, create it.
+ if (!SuccMBB) {
+ MachineFunction *MF = ParentMBB->getParent();
+ MachineFunction::iterator BBI(ParentMBB);
+ SuccMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(++BBI, SuccMBB);
+ }
+ // Add it as a successor of ParentMBB.
+ ParentMBB->addSuccessor(
+ SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
+ return SuccMBB;
+}
+
+/// Given that the input MI is before a partial terminator sequence TSeq, return
+/// true if M + TSeq also a partial terminator sequence.
+///
+/// A Terminator sequence is a sequence of MachineInstrs which at this point in
+/// lowering copy vregs into physical registers, which are then passed into
+/// terminator instructors so we can satisfy ABI constraints. A partial
+/// terminator sequence is an improper subset of a terminator sequence (i.e. it
+/// may be the whole terminator sequence).
+static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
+ // If we do not have a copy or an implicit def, we return true if and only if
+ // MI is a debug value.
+ if (!MI.isCopy() && !MI.isImplicitDef()) {
+ // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
+ // physical registers if there is debug info associated with the terminator
+ // of our mbb. We want to include said debug info in our terminator
+ // sequence, so we return true in that case.
+ if (MI.isDebugInstr())
+ return true;
+
+ // For GlobalISel, we may have extension instructions for arguments within
+ // copy sequences. Allow these.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // We have left the terminator sequence if we are not doing one of the
+ // following:
+ //
+ // 1. Copying a vreg into a physical register.
+ // 2. Copying a vreg into a vreg.
+ // 3. Defining a register via an implicit def.
+
+ // OPI should always be a register definition...
+ MachineInstr::const_mop_iterator OPI = MI.operands_begin();
+ if (!OPI->isReg() || !OPI->isDef())
+ return false;
+
+ // Defining any register via an implicit def is always ok.
+ if (MI.isImplicitDef())
+ return true;
+
+ // Grab the copy source...
+ MachineInstr::const_mop_iterator OPI2 = OPI;
+ ++OPI2;
+ assert(OPI2 != MI.operands_end()
+ && "Should have a copy implying we should have 2 arguments.");
+
+ // Make sure that the copy dest is not a vreg when the copy source is a
+ // physical register.
+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
+ Register::isPhysicalRegister(OPI2->getReg())))
+ return false;
+
+ return true;
+}
+
+/// Find the split point at which to splice the end of BB into its success stack
+/// protector check machine basic block.
+///
+/// On many platforms, due to ABI constraints, terminators, even before register
+/// allocation, use physical registers. This creates an issue for us since
+/// physical registers at this point can not travel across basic
+/// blocks. Luckily, selectiondag always moves physical registers into vregs
+/// when they enter functions and moves them through a sequence of copies back
+/// into the physical registers right before the terminator creating a
+/// ``Terminator Sequence''. This function is searching for the beginning of the
+/// terminator sequence so that we can ensure that we splice off not just the
+/// terminator, but additionally the copies that move the vregs into the
+/// physical registers.
+MachineBasicBlock::iterator
+llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
+ if (SplitPoint == BB->begin())
+ return SplitPoint;
+
+ MachineBasicBlock::iterator Start = BB->begin();
+ MachineBasicBlock::iterator Previous = SplitPoint;
+ --Previous;
+
+ if (TII.isTailCall(*SplitPoint) &&
+ Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // Call frames cannot be nested, so if this frame is describing the tail
+ // call itself, then we must insert before the sequence even starts. For
+ // example:
+ // <split point>
+ // ADJCALLSTACKDOWN ...
+ // <Moves>
+ // ADJCALLSTACKUP ...
+ // TAILJMP somewhere
+ // On the other hand, it could be an unrelated call in which case this tail
+ // call has to register moves of its own and should be the split point. For
+ // example:
+ // ADJCALLSTACKDOWN
+ // CALL something_else
+ // ADJCALLSTACKUP
+ // <split point>
+ // TAILJMP somewhere
+ do {
+ --Previous;
+ if (Previous->isCall())
+ return SplitPoint;
+ } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
+
+ return Previous;
+ }
+
+ while (MIIsInTerminatorSequence(*Previous)) {
+ SplitPoint = Previous;
+ if (Previous == Start)
+ break;
+ --Previous;
+ }
+
+ return SplitPoint;
+}
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 77ce3d2fb563..ac4180c4c3ab 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -530,10 +530,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
while (MadeChange) {
MadeChange = false;
DT.reset();
- for (Function::iterator I = F.begin(); I != F.end(); ) {
- BasicBlock *BB = &*I++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
+ MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration);
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
@@ -660,12 +659,8 @@ void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
return;
auto &GEPVector = VecI->second;
- const auto &I =
- llvm::find_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
- if (I == GEPVector.end())
- return;
+ llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
- GEPVector.erase(I);
if (GEPVector.empty())
LargeOffsetGEPMap.erase(VecI);
}
@@ -2037,7 +2032,7 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
// Only handle legal scalar cases. Anything else requires too much work.
Type *Ty = CountZeros->getType();
- unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
+ unsigned SizeInBits = Ty->getScalarSizeInBits();
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
@@ -2108,7 +2103,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// idea
unsigned MinSize, PrefAlign;
if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
// We want to align both objects whose address is used directly and
// objects whose address is used in casts and GEPs, though it only makes
// sense for GEPs if the offset is a multiple of the desired alignment and
@@ -2159,7 +2154,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
// into their uses. TODO: generalize this to work over profiling data
if (CI->hasFnAttr(Attribute::Cold) &&
!OptSize && !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
if (!Arg->getType()->isPointerTy())
continue;
unsigned AS = Arg->getType()->getPointerAddressSpace();
@@ -3718,7 +3713,8 @@ private:
// Traverse all Phis until we found equivalent or fail to do that.
bool IsMatched = false;
for (auto &P : PHI->getParent()->phis()) {
- if (&P == PHI)
+ // Skip new Phi nodes.
+ if (PhiNodesToMatch.count(&P))
continue;
if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
break;
@@ -4187,7 +4183,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
if (Inst->getOpcode() == Instruction::Xor) {
const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
// Make sure it is not a NOT.
- if (Cst && !Cst->getValue().isAllOnesValue())
+ if (Cst && !Cst->getValue().isAllOnes())
return true;
}
@@ -4858,10 +4854,9 @@ static constexpr int MaxMemoryUsesToScan = 20;
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
-/// Add the ultimately found memory instructions to MemoryUses.
+/// Add accessed addresses and types to MemoryUses.
static bool FindAllMemoryUses(
- Instruction *I,
- SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
+ Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, int SeenInsts = 0) {
@@ -4882,31 +4877,28 @@ static bool FindAllMemoryUses(
Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
- MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
+ MemoryUses.push_back({U.get(), LI->getType()});
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != StoreInst::getPointerOperandIndex())
+ if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(SI, opNo));
+ MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()});
continue;
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != AtomicRMWInst::getPointerOperandIndex())
+ if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(RMW, opNo));
+ MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});
continue;
}
if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
- unsigned opNo = U.getOperandNo();
- if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
+ if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(CmpX, opNo));
+ MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});
continue;
}
@@ -5016,7 +5008,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// we can remove the addressing mode and effectively trade one live register
// for another (at worst.) In this context, folding an addressing mode into
// the use is just a particularly nice way of sinking it.
- SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+ SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
PSI, BFI))
@@ -5032,18 +5024,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// growth since most architectures have some reasonable small and fast way to
// compute an effective address. (i.e LEA on x86)
SmallVector<Instruction*, 32> MatchedAddrModeInsts;
- for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
- Instruction *User = MemoryUses[i].first;
- unsigned OpNo = MemoryUses[i].second;
-
- // Get the access type of this use. If the use isn't a pointer, we don't
- // know what it accesses.
- Value *Address = User->getOperand(OpNo);
- PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
- if (!AddrTy)
- return false;
- Type *AddressAccessTy = AddrTy->getElementType();
- unsigned AS = AddrTy->getAddressSpace();
+ for (const std::pair<Value *, Type *> &Pair : MemoryUses) {
+ Value *Address = Pair.first;
+ Type *AddressAccessTy = Pair.second;
+ unsigned AS = Address->getType()->getPointerAddressSpace();
// Do a match against the root of this address, ignoring profitability. This
// will tell us if the addressing mode for the memory operation will
@@ -5124,8 +5108,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
- Value *V = worklist.back();
- worklist.pop_back();
+ Value *V = worklist.pop_back_val();
// We allow traversing cyclic Phi nodes.
// In case of success after this loop we ensure that traversing through
@@ -6477,8 +6460,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
APInt WidestAndBits(BitWidth, 0);
while (!WorkList.empty()) {
- Instruction *I = WorkList.back();
- WorkList.pop_back();
+ Instruction *I = WorkList.pop_back_val();
// Break use-def graph loops.
if (!Visited.insert(I).second)
@@ -6950,16 +6932,26 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
BasicBlock *TargetBB = I->getParent();
bool Changed = false;
SmallVector<Use *, 4> ToReplace;
+ Instruction *InsertPoint = I;
+ DenseMap<const Instruction *, unsigned long> InstOrdering;
+ unsigned long InstNumber = 0;
+ for (const auto &I : *TargetBB)
+ InstOrdering[&I] = InstNumber++;
+
for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
- if (UI->getParent() == TargetBB || isa<PHINode>(UI))
+ if (isa<PHINode>(UI))
continue;
+ if (UI->getParent() == TargetBB) {
+ if (InstOrdering[UI] < InstOrdering[InsertPoint])
+ InsertPoint = UI;
+ continue;
+ }
ToReplace.push_back(U);
}
SetVector<Instruction *> MaybeDead;
DenseMap<Instruction *, Instruction *> NewInstructions;
- Instruction *InsertPoint = I;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
@@ -7863,8 +7855,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
- if (BinOp && (BinOp->getOpcode() == Instruction::And) && EnableAndCmpSinking)
- return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
+ sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
+ return true;
// TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
@@ -8030,9 +8023,8 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
DominatorTree DT(F);
for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
- Instruction *Insn = &*BI++;
- DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
+ for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
+ DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
if (!DVI)
continue;
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index f3cba6225107..a1ff02178ffa 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -65,6 +65,7 @@ CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math)
CGOPT(bool, EnableHonorSignDependentRoundingFPMath)
CGOPT(FloatABI::ABIType, FloatABIForCalls)
CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps)
+CGOPT(SwiftAsyncFramePointerMode, SwiftAsyncFramePointer)
CGOPT(bool, DontPlaceZerosInBSS)
CGOPT(bool, EnableGuaranteedTailCallOpt)
CGOPT(bool, DisableTailCalls)
@@ -89,11 +90,11 @@ CGOPT(bool, EnableAddrsig)
CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
-CGOPT(bool, PseudoProbeForProfiling)
CGOPT(bool, ValueTrackingVariableLocations)
CGOPT(bool, ForceDwarfFrameSection)
CGOPT(bool, XRayOmitFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
+CGOPT(unsigned, AlignLoops)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -277,6 +278,18 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"Only fuse FP ops when the result won't be affected.")));
CGBINDOPT(FuseFPOps);
+ static cl::opt<SwiftAsyncFramePointerMode> SwiftAsyncFramePointer(
+ "swift-async-fp",
+ cl::desc("Determine when the Swift async frame pointer should be set"),
+ cl::init(SwiftAsyncFramePointerMode::Always),
+ cl::values(clEnumValN(SwiftAsyncFramePointerMode::DeploymentBased, "auto",
+ "Determine based on deployment target"),
+ clEnumValN(SwiftAsyncFramePointerMode::Always, "always",
+ "Always set the bit"),
+ clEnumValN(SwiftAsyncFramePointerMode::Never, "never",
+ "Never set the bit")));
+ CGBINDOPT(SwiftAsyncFramePointer);
+
static cl::opt<bool> DontPlaceZerosInBSS(
"nozero-initialized-in-bss",
cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -420,11 +433,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableDebugEntryValues);
- static cl::opt<bool> PseudoProbeForProfiling(
- "pseudo-probe-for-profiling", cl::desc("Emit pseudo probes for AutoFDO"),
- cl::init(false));
- CGBINDOPT(PseudoProbeForProfiling);
-
static cl::opt<bool> ValueTrackingVariableLocations(
"experimental-debug-variable-locations",
cl::desc("Use experimental new value-tracking variable locations"),
@@ -452,6 +460,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
"strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
CGBINDOPT(DebugStrictDwarf);
+ static cl::opt<unsigned> AlignLoops("align-loops",
+ cl::desc("Default alignment for loops"));
+ CGBINDOPT(AlignLoops);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -522,18 +534,18 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitAddrsig = getEnableAddrsig();
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
- Options.PseudoProbeForProfiling = getPseudoProbeForProfiling();
Options.ValueTrackingVariableLocations = getValueTrackingVariableLocations();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
+ Options.LoopAlignment = getAlignLoops();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
Options.ThreadModel = getThreadModel();
Options.EABIVersion = getEABIVersion();
Options.DebuggerTuning = getDebuggerTuningOpt();
-
+ Options.SwiftAsyncFramePointer = getSwiftAsyncFramePointer();
return Options;
}
@@ -666,13 +678,11 @@ void codegen::setFunctionAttributes(StringRef CPU, StringRef Features,
if (const auto *F = Call->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::debugtrap ||
F->getIntrinsicID() == Intrinsic::trap)
- Call->addAttribute(
- AttributeList::FunctionIndex,
+ Call->addFnAttr(
Attribute::get(Ctx, "trap-func-name", getTrapFuncName()));
// Let NewAttrs override Attrs.
- F.setAttributes(
- Attrs.addAttributes(Ctx, AttributeList::FunctionIndex, NewAttrs));
+ F.setAttributes(Attrs.addFnAttributes(Ctx, NewAttrs));
}
/// Set function attributes of functions in Module M based on CPU,
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index c56c8c87734f..981f5973fee8 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -212,6 +212,21 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
+ if (MO.isUse() && Special) {
+ if (!KeepRegs.test(Reg)) {
+ for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+ SubRegs.isValid(); ++SubRegs)
+ KeepRegs.set(*SubRegs);
+ }
+ }
+ }
+
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg()) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isValid())
+ continue;
// If this reg is tied and live (Classes[Reg] is set to -1), we can't change
// it or any of its sub or super regs. We need to use KeepRegs to mark the
// reg because not all uses of the same reg within an instruction are
@@ -222,7 +237,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// of a register? In the above 'xor' example, the uses of %eax are undef, so
// earlier instructions could still replace %eax even though the 'xor'
// itself can't be changed.
- if (MI.isRegTiedToUseOperand(i) &&
+ if (MI.isRegTiedToUseOperand(I) &&
Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
@@ -233,14 +248,6 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
KeepRegs.set(*SuperRegs);
}
}
-
- if (MO.isUse() && Special) {
- if (!KeepRegs.test(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- KeepRegs.set(*SubRegs);
- }
- }
}
}
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6e7db95b5c2a..c6c0b79cd7e7 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -138,26 +138,22 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
- for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
- MIE = MBB->rend();
- MII != MIE;) {
- MachineInstr *MI = &*MII++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// If the instruction is dead, delete it!
- if (isDead(MI)) {
- LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+ if (isDead(&MI)) {
+ LLVM_DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << MI);
// It is possible that some DBG_VALUE instructions refer to this
// instruction. They get marked as undef and will be deleted
// in the live debug variable analysis.
- MI->eraseFromParentAndMarkDBGValuesForRemoval();
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
AnyChanges = true;
++NumDeletes;
continue;
}
// Record the physreg defs.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
@@ -175,8 +171,8 @@ bool DeadMachineInstructionElim::eliminateDeadMI(MachineFunction &MF) {
}
// Record the physreg uses, after the defs, in case a physreg is
// both defined and used in the same instruction.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isUse()) {
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index 5ca1e91cc5f4..fb8a3e383950 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/EHPersonalities.h"
@@ -54,13 +55,11 @@ namespace {
class DwarfEHPrepare {
CodeGenOpt::Level OptLevel;
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee &RewindFunction;
-
Function &F;
const TargetLowering &TLI;
DomTreeUpdater *DTU;
const TargetTransformInfo *TTI;
+ const Triple &TargetTriple;
/// Return the exception object from the value passed into
/// the 'resume' instruction (typically an aggregate). Clean up any dead
@@ -78,11 +77,11 @@ class DwarfEHPrepare {
bool InsertUnwindResumeCalls();
public:
- DwarfEHPrepare(CodeGenOpt::Level OptLevel_, FunctionCallee &RewindFunction_,
- Function &F_, const TargetLowering &TLI_, DomTreeUpdater *DTU_,
- const TargetTransformInfo *TTI_)
- : OptLevel(OptLevel_), RewindFunction(RewindFunction_), F(F_), TLI(TLI_),
- DTU(DTU_), TTI(TTI_) {}
+ DwarfEHPrepare(CodeGenOpt::Level OptLevel_, Function &F_,
+ const TargetLowering &TLI_, DomTreeUpdater *DTU_,
+ const TargetTransformInfo *TTI_, const Triple &TargetTriple_)
+ : OptLevel(OptLevel_), F(F_), TLI(TLI_), DTU(DTU_), TTI(TTI_),
+ TargetTriple(TargetTriple_) {}
bool run();
};
@@ -211,13 +210,28 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
if (ResumesLeft == 0)
return true; // We pruned them all.
- // Find the rewind function if we didn't already.
- if (!RewindFunction) {
- FunctionType *FTy =
+ // RewindFunction - _Unwind_Resume or the target equivalent.
+ FunctionCallee RewindFunction;
+ CallingConv::ID RewindFunctionCallingConv;
+ FunctionType *FTy;
+ const char *RewindName;
+ bool DoesRewindFunctionNeedExceptionObject;
+
+ if ((Pers == EHPersonality::GNU_CXX || Pers == EHPersonality::GNU_CXX_SjLj) &&
+ TargetTriple.isTargetEHABICompatible()) {
+ RewindName = TLI.getLibcallName(RTLIB::CXA_END_CLEANUP);
+ FTy = FunctionType::get(Type::getVoidTy(Ctx), false);
+ RewindFunctionCallingConv =
+ TLI.getLibcallCallingConv(RTLIB::CXA_END_CLEANUP);
+ DoesRewindFunctionNeedExceptionObject = false;
+ } else {
+ RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
+ FTy =
FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false);
- const char *RewindName = TLI.getLibcallName(RTLIB::UNWIND_RESUME);
- RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
+ RewindFunctionCallingConv = TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME);
+ DoesRewindFunctionNeedExceptionObject = true;
}
+ RewindFunction = F.getParent()->getOrInsertFunction(RewindName, FTy);
// Create the basic block where the _Unwind_Resume call will live.
if (ResumesLeft == 1) {
@@ -226,10 +240,14 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
ResumeInst *RI = Resumes.front();
BasicBlock *UnwindBB = RI->getParent();
Value *ExnObj = GetExceptionObject(RI);
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(ExnObj);
- // Call the _Unwind_Resume function.
- CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
- CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ // Call the rewind function.
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ CI->setCallingConv(RewindFunctionCallingConv);
// We never expect _Unwind_Resume to return.
CI->setDoesNotReturn();
@@ -240,6 +258,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
std::vector<DominatorTree::UpdateType> Updates;
Updates.reserve(Resumes.size());
+ llvm::SmallVector<Value *, 1> RewindFunctionArgs;
+
BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &F);
PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, "exn.obj",
UnwindBB);
@@ -257,9 +277,13 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
++NumResumesLowered;
}
+ if (DoesRewindFunctionNeedExceptionObject)
+ RewindFunctionArgs.push_back(PN);
+
// Call the function.
- CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB);
- CI->setCallingConv(TLI.getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+ CallInst *CI =
+ CallInst::Create(RewindFunction, RewindFunctionArgs, "", UnwindBB);
+ CI->setCallingConv(RewindFunctionCallingConv);
// We never expect _Unwind_Resume to return.
CI->setDoesNotReturn();
@@ -277,22 +301,20 @@ bool DwarfEHPrepare::run() {
return Changed;
}
-static bool prepareDwarfEH(CodeGenOpt::Level OptLevel,
- FunctionCallee &RewindFunction, Function &F,
+static bool prepareDwarfEH(CodeGenOpt::Level OptLevel, Function &F,
const TargetLowering &TLI, DominatorTree *DT,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI,
+ const Triple &TargetTriple) {
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
- return DwarfEHPrepare(OptLevel, RewindFunction, F, TLI, DT ? &DTU : nullptr,
- TTI)
+ return DwarfEHPrepare(OptLevel, F, TLI, DT ? &DTU : nullptr, TTI,
+ TargetTriple)
.run();
}
namespace {
class DwarfEHPrepareLegacyPass : public FunctionPass {
- // RewindFunction - _Unwind_Resume or the target equivalent.
- FunctionCallee RewindFunction = nullptr;
CodeGenOpt::Level OptLevel;
@@ -315,7 +337,7 @@ public:
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
}
- return prepareDwarfEH(OptLevel, RewindFunction, F, TLI, DT, TTI);
+ return prepareDwarfEH(OptLevel, F, TLI, DT, TTI, TM.getTargetTriple());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 50fdc2114780..d0c2b8c267ff 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -348,17 +348,17 @@ void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
ConstantInt::get(Diff->getType(), 0));
BranchInst *CmpBr =
BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
+ Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates(
{{DominatorTree::Insert, BB, EndBlock},
{DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
- Builder.Insert(CmpBr);
} else {
// The last block has an unconditional branch to EndBlock.
BranchInst *CmpBr = BranchInst::Create(EndBlock);
+ Builder.Insert(CmpBr);
if (DTU)
DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
- Builder.Insert(CmpBr);
}
}
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index d909d6aa5b0a..7300ea6b50ee 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -189,12 +189,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator mi = MBB.begin(), me = MBB.end();
- mi != me;) {
- MachineInstr &MI = *mi;
- // Advance iterator here because MI may be erased.
- ++mi;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Only expand pseudos.
if (!MI.isPseudo())
continue;
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index a8d4d4ebe8bd..bb8d2b3e9a78 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -158,6 +158,11 @@ struct CachingVPExpander {
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
+ /// \brief Lower this VP reduction to a call to an unpredicated reduction
+ /// intrinsic.
+ Value *expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &PI);
+
/// \brief Query TTI and expand the vector predication in \p P accordingly.
Value *expandPredication(VPIntrinsic &PI);
@@ -248,6 +253,136 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
return NewBinOp;
}
+static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
+ Type *EltTy) {
+ bool Negative = false;
+ unsigned EltBits = EltTy->getScalarSizeInBits();
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Expecting a VP reduction intrinsic");
+ case Intrinsic::vp_reduce_add:
+ case Intrinsic::vp_reduce_or:
+ case Intrinsic::vp_reduce_xor:
+ case Intrinsic::vp_reduce_umax:
+ return Constant::getNullValue(EltTy);
+ case Intrinsic::vp_reduce_mul:
+ return ConstantInt::get(EltTy, 1, /*IsSigned*/ false);
+ case Intrinsic::vp_reduce_and:
+ case Intrinsic::vp_reduce_umin:
+ return ConstantInt::getAllOnesValue(EltTy);
+ case Intrinsic::vp_reduce_smin:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMaxValue(EltBits));
+ case Intrinsic::vp_reduce_smax:
+ return ConstantInt::get(EltTy->getContext(),
+ APInt::getSignedMinValue(EltBits));
+ case Intrinsic::vp_reduce_fmax:
+ Negative = true;
+ LLVM_FALLTHROUGH;
+ case Intrinsic::vp_reduce_fmin: {
+ FastMathFlags Flags = VPI.getFastMathFlags();
+ const fltSemantics &Semantics = EltTy->getFltSemantics();
+ return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative)
+ : !Flags.noInfs()
+ ? ConstantFP::getInfinity(EltTy, Negative)
+ : ConstantFP::get(EltTy,
+ APFloat::getLargest(Semantics, Negative));
+ }
+ case Intrinsic::vp_reduce_fadd:
+ return ConstantFP::getNegativeZero(EltTy);
+ case Intrinsic::vp_reduce_fmul:
+ return ConstantFP::get(EltTy, 1.0);
+ }
+}
+
+Value *
+CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
+ VPReductionIntrinsic &VPI) {
+ assert((isSafeToSpeculativelyExecute(&VPI) ||
+ VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ Value *Mask = VPI.getMaskParam();
+ Value *RedOp = VPI.getOperand(VPI.getVectorParamPos());
+
+ // Insert neutral element in masked-out positions
+ if (Mask && !isAllTrueMask(Mask)) {
+ auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType());
+ auto *NeutralVector = Builder.CreateVectorSplat(
+ cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt);
+ RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector);
+ }
+
+ Value *Reduction;
+ Value *Start = VPI.getOperand(VPI.getStartParamPos());
+
+ switch (VPI.getIntrinsicID()) {
+ default:
+ llvm_unreachable("Impossible reduction kind");
+ case Intrinsic::vp_reduce_add:
+ Reduction = Builder.CreateAddReduce(RedOp);
+ Reduction = Builder.CreateAdd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_mul:
+ Reduction = Builder.CreateMulReduce(RedOp);
+ Reduction = Builder.CreateMul(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_and:
+ Reduction = Builder.CreateAndReduce(RedOp);
+ Reduction = Builder.CreateAnd(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_or:
+ Reduction = Builder.CreateOrReduce(RedOp);
+ Reduction = Builder.CreateOr(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_xor:
+ Reduction = Builder.CreateXorReduce(RedOp);
+ Reduction = Builder.CreateXor(Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_smin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umax:
+ Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_umin:
+ Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmax:
+ Reduction = Builder.CreateFPMaxReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fmin:
+ Reduction = Builder.CreateFPMinReduce(RedOp);
+ transferDecorations(*Reduction, VPI);
+ Reduction =
+ Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start);
+ break;
+ case Intrinsic::vp_reduce_fadd:
+ Reduction = Builder.CreateFAddReduce(Start, RedOp);
+ break;
+ case Intrinsic::vp_reduce_fmul:
+ Reduction = Builder.CreateFMulReduce(Start, RedOp);
+ break;
+ }
+
+ replaceOperation(*Reduction, VPI);
+ return Reduction;
+}
+
void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
@@ -321,6 +456,9 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
if (OC && Instruction::isBinaryOp(*OC))
return expandPredicationInBinaryOperator(Builder, VPI);
+ if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI))
+ return expandPredicationInReduction(Builder, *VPRI);
+
return &VPI;
}
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index e3c4e86d203b..ec6bf18b2769 100644
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -1,9 +1,8 @@
//===-- FixupStatepointCallerSaved.cpp - Fixup caller saved registers ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/llvm/lib/CodeGen/GCMetadata.cpp b/llvm/lib/CodeGen/GCMetadata.cpp
index 8fae798b31d9..af5515cc6bfd 100644
--- a/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/llvm/lib/CodeGen/GCMetadata.cpp
@@ -145,24 +145,9 @@ GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) {
if (NMI != GCStrategyMap.end())
return NMI->getValue();
- for (auto& Entry : GCRegistry::entries()) {
- if (Name == Entry.getName()) {
- std::unique_ptr<GCStrategy> S = Entry.instantiate();
- S->Name = std::string(Name);
- GCStrategyMap[Name] = S.get();
- GCStrategyList.push_back(std::move(S));
- return GCStrategyList.back().get();
- }
- }
-
- if (GCRegistry::begin() == GCRegistry::end()) {
- // In normal operation, the registry should not be empty. There should
- // be the builtin GCs if nothing else. The most likely scenario here is
- // that we got here without running the initializers used by the Registry
- // itself and it's registration mechanism.
- const std::string error = ("unsupported GC: " + Name).str() +
- " (did you remember to link and initialize the CodeGen library?)";
- report_fatal_error(error);
- } else
- report_fatal_error(std::string("unsupported GC: ") + Name);
+ std::unique_ptr<GCStrategy> S = llvm::getGCStrategy(Name);
+ S->Name = std::string(Name);
+ GCStrategyMap[Name] = S.get();
+ GCStrategyList.push_back(std::move(S));
+ return GCStrategyList.back().get();
}
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index 58269e172c57..637a877810a1 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -193,8 +193,8 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
bool MadeChange = false;
for (BasicBlock &BB : F)
- for (BasicBlock::iterator II = BB.begin(), E = BB.end(); II != E;) {
- IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++);
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
+ IntrinsicInst *CI = dyn_cast<IntrinsicInst>(&I);
if (!CI)
continue;
@@ -271,16 +271,15 @@ void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
- for (MachineBasicBlock::iterator MI = MBB.begin(), ME = MBB.end();
- MI != ME; ++MI)
- if (MI->isCall()) {
+ for (MachineInstr &MI : MBB)
+ if (MI.isCall()) {
// Do not treat tail or sibling call sites as safe points. This is
// legal since any arguments passed to the callee which live in the
// remnants of the callers frame will be owned and updated by the
// callee if required.
- if (MI->isTerminator())
+ if (MI.isTerminator())
continue;
- VisitCallPoint(MI);
+ VisitCallPoint(&MI);
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index dd560e8ff145..2676becdd807 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -13,6 +13,8 @@
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
@@ -187,6 +189,14 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
// Try to constant fold these.
assert(SrcOps.size() == 2 && "Invalid sources");
assert(DstOps.size() == 1 && "Invalid dsts");
+ if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
+ // Try to constant fold vector constants.
+ auto VecCst = ConstantFoldVectorBinop(
+ Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
+ if (VecCst)
+ return MachineInstrBuilder(getMF(), *VecCst);
+ break;
+ }
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
SrcOps[1].getReg(), *getMRI()))
return buildConstant(DstOps[0], *Cst);
@@ -213,6 +223,22 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildFConstant(DstOps[0], *Cst);
break;
}
+ case TargetOpcode::G_CTLZ: {
+ assert(SrcOps.size() == 1 && "Expected one source");
+ assert(DstOps.size() == 1 && "Expected one dest");
+ auto MaybeCsts = ConstantFoldCTLZ(SrcOps[0].getReg(), *getMRI());
+ if (!MaybeCsts)
+ break;
+ if (MaybeCsts->size() == 1)
+ return buildConstant(DstOps[0], (*MaybeCsts)[0]);
+ // This was a vector constant. Build a G_BUILD_VECTOR for them.
+ SmallVector<Register> ConstantRegs;
+ LLT VecTy = DstOps[0].getLLTTy(*getMRI());
+ for (unsigned Cst : *MaybeCsts)
+ ConstantRegs.emplace_back(
+ buildConstant(VecTy.getScalarType(), Cst).getReg(0));
+ return buildBuildVector(DstOps[0], ConstantRegs);
+ }
}
bool CanCopy = checkCopyToDefsPossible(DstOps);
if (!canPerformCSEForOpc(Opc))
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index d2cda9ece31a..17094a8e44f8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -73,7 +74,7 @@ void CallLowering::addArgFlagsFromAttributes(ISD::ArgFlagsTy &Flags,
const AttributeList &Attrs,
unsigned OpIdx) const {
addFlagsUsingAttrFn(Flags, [&Attrs, &OpIdx](Attribute::AttrKind Attr) {
- return Attrs.hasAttribute(OpIdx, Attr);
+ return Attrs.hasAttributeAtIndex(OpIdx, Attr);
});
}
@@ -139,6 +140,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
if (!Info.OrigRet.Ty->isVoidTy())
setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
+ Info.CB = &CB;
Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
Info.CallConv = CallConv;
Info.SwiftErrorVReg = SwiftErrorVReg;
@@ -165,18 +167,21 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
Align MemAlign = DL.getABITypeAlign(Arg.Ty);
if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
assert(OpIdx >= AttributeList::FirstArgIndex);
- Type *ElementTy = PtrTy->getElementType();
+ unsigned ParamIdx = OpIdx - AttributeList::FirstArgIndex;
- auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
- Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+ Type *ElementTy = FuncInfo.getParamByValType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamInAllocaType(ParamIdx);
+ if (!ElementTy)
+ ElementTy = FuncInfo.getParamPreallocatedType(ParamIdx);
+ assert(ElementTy && "Must have byval, inalloca or preallocated type");
+ Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
- if (auto ParamAlign =
- FuncInfo.getParamStackAlign(OpIdx - AttributeList::FirstArgIndex))
+ if (auto ParamAlign = FuncInfo.getParamStackAlign(ParamIdx))
MemAlign = *ParamAlign;
- else if ((ParamAlign =
- FuncInfo.getParamAlign(OpIdx - AttributeList::FirstArgIndex)))
+ else if ((ParamAlign = FuncInfo.getParamAlign(ParamIdx)))
MemAlign = *ParamAlign;
else
MemAlign = Align(getTLI()->getByValTypeAlignment(ElementTy, DL));
@@ -613,14 +618,31 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const unsigned NumArgs = Args.size();
+ // Stores thunks for outgoing register assignments. This is used so we delay
+ // generating register copies until mem loc assignments are done. We do this
+ // so that if the target is using the delayed stack protector feature, we can
+ // find the split point of the block accurately. E.g. if we have:
+ // G_STORE %val, %memloc
+ // $x0 = COPY %foo
+ // $x1 = COPY %bar
+ // CALL func
+ // ... then the split point for the block will correctly be at, and including,
+ // the copy to $x0. If instead the G_STORE instruction immediately precedes
+ // the CALL, then we'd prematurely choose the CALL as the split point, thus
+ // generating a split block with a CALL that uses undefined physregs.
+ SmallVector<std::function<void()>> DelayedOutgoingRegAssignments;
+
for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
assert(j < ArgLocs.size() && "Skipped too many arg locs");
CCValAssign &VA = ArgLocs[j];
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");
if (VA.needsCustom()) {
- unsigned NumArgRegs =
- Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
+ std::function<void()> Thunk;
+ unsigned NumArgRegs = Handler.assignCustomValue(
+ Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk);
+ if (Thunk)
+ DelayedOutgoingRegAssignments.emplace_back(Thunk);
if (!NumArgRegs)
return false;
j += NumArgRegs;
@@ -739,7 +761,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ });
+ }
}
// Now that all pieces have been assigned, re-pack the register typed values
@@ -753,6 +781,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
j += NumParts - 1;
}
+ for (auto &Fn : DelayedOutgoingRegAssignments)
+ Fn();
return true;
}
@@ -1153,7 +1183,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {
void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
const MVT LocVT = VA.getLocVT();
const LLT LocTy(LocVT);
const LLT RegTy = MRI.getType(ValVReg);
diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 6f103bca6892..381c6df5c97a 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -130,16 +130,15 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
WrapperObserver.addObserver(CSEInfo);
RAIIDelegateInstaller DelInstall(MF, &WrapperObserver);
for (MachineBasicBlock *MBB : post_order(&MF)) {
- for (auto MII = MBB->rbegin(), MIE = MBB->rend(); MII != MIE;) {
- MachineInstr *CurMI = &*MII;
- ++MII;
+ for (MachineInstr &CurMI :
+ llvm::make_early_inc_range(llvm::reverse(*MBB))) {
// Erase dead insts before even adding to the list.
- if (isTriviallyDead(*CurMI, *MRI)) {
- LLVM_DEBUG(dbgs() << *CurMI << "Is dead; erasing.\n");
- CurMI->eraseFromParentAndMarkDBGValuesForRemoval();
+ if (isTriviallyDead(CurMI, *MRI)) {
+ LLVM_DEBUG(dbgs() << CurMI << "Is dead; erasing.\n");
+ CurMI.eraseFromParentAndMarkDBGValuesForRemoval();
continue;
}
- WorkList.deferred_insert(CurMI);
+ WorkList.deferred_insert(&CurMI);
}
}
WorkList.finalize();
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 06d827de2e96..3a52959d54bf 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -12,9 +12,11 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,8 +28,10 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetMachine.h"
#include <tuple>
#define DEBUG_TYPE "gi-combiner"
@@ -46,8 +50,9 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B, GISelKnownBits *KB,
MachineDominatorTree *MDT,
const LegalizerInfo *LI)
- : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
- KB(KB), MDT(MDT), LI(LI) {
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
+ MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
+ TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
(void)this->KB;
}
@@ -64,6 +69,16 @@ static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
return I;
}
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+static Register buildLogBase2(Register V, MachineIRBuilder &MIB) {
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(V);
+ auto Ctlz = MIB.buildCTLZ(Ty, V);
+ auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
+ return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
+}
+
/// \returns The big endian in-memory byte position of byte \p I in a
/// \p ByteWidth bytes wide type.
///
@@ -143,6 +158,24 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
Observer.changedInstr(*FromRegOp.getParent());
}
+void CombinerHelper::replaceOpcodeWith(MachineInstr &FromMI,
+ unsigned ToOpcode) const {
+ Observer.changingInstr(FromMI);
+
+ FromMI.setDesc(Builder.getTII().get(ToOpcode));
+
+ Observer.changedInstr(FromMI);
+}
+
+const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
+ return RBI->getRegBank(Reg, MRI, *TRI);
+}
+
+void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
+ if (RegBank)
+ MRI.setRegBank(Reg, *RegBank);
+}
+
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
if (matchCombineCopy(MI)) {
applyCombineCopy(MI);
@@ -486,10 +519,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
continue;
// Check for legality.
if (LI) {
- LegalityQuery::MemDesc MMDesc;
- MMDesc.MemoryTy = MMO.getMemoryType();
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
+ LegalityQuery::MemDesc MMDesc(MMO);
LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
@@ -623,13 +653,83 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
+bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // If we have the following code:
+ // %mask = G_CONSTANT 255
+ // %ld = G_LOAD %ptr, (load s16)
+ // %and = G_AND %ld, %mask
+ //
+ // Try to fold it into
+ // %ld = G_ZEXTLOAD %ptr, (load s8)
+
+ Register Dst = MI.getOperand(0).getReg();
+ if (MRI.getType(Dst).isVector())
+ return false;
+
+ auto MaybeMask =
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ if (!MaybeMask)
+ return false;
+
+ APInt MaskVal = MaybeMask->Value;
+
+ if (!MaskVal.isMask())
+ return false;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ GAnyLoad *LoadMI = getOpcodeDef<GAnyLoad>(SrcReg, MRI);
+ if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()) ||
+ !LoadMI->isSimple())
+ return false;
+
+ Register LoadReg = LoadMI->getDstReg();
+ LLT LoadTy = MRI.getType(LoadReg);
+ Register PtrReg = LoadMI->getPointerReg();
+ uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
+ unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+
+ // The mask may not be larger than the in-memory type, as it might cover sign
+ // extended bits
+ if (MaskSizeBits > LoadSizeBits)
+ return false;
+
+ // If the mask covers the whole destination register, there's nothing to
+ // extend
+ if (MaskSizeBits >= LoadTy.getSizeInBits())
+ return false;
+
+ // Most targets cannot deal with loads of size < 8 and need to re-legalize to
+ // at least byte loads. Avoid creating such loads here
+ if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
+ return false;
+
+ const MachineMemOperand &MMO = LoadMI->getMMO();
+ LegalityQuery::MemDesc MemDesc(MMO);
+ MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ZEXTLOAD, {LoadTy, MRI.getType(PtrReg)}, {MemDesc}}))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.setInstrAndDebugLoc(*LoadMI);
+ auto &MF = B.getMF();
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MaskSizeBits / 8);
+ B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
+ };
+ return true;
+}
+
bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
const MachineInstr &UseMI) {
assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
"shouldn't consider debug uses");
assert(DefMI.getParent() == UseMI.getParent());
if (&DefMI == &UseMI)
- return false;
+ return true;
const MachineBasicBlock &MBB = *DefMI.getParent();
auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
return &MI == &DefMI || &MI == &UseMI;
@@ -711,6 +811,16 @@ bool CombinerHelper::matchSextInRegOfLoad(
// anyway for most targets.
if (!isPowerOf2_32(NewSizeBits))
return false;
+
+ const MachineMemOperand &MMO = LoadDef->getMMO();
+ LegalityQuery::MemDesc MMDesc(MMO);
+ MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
+ {MRI.getType(LoadDef->getDstReg()),
+ MRI.getType(LoadDef->getPointerReg())},
+ {MMDesc}}))
+ return false;
+
MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
return true;
}
@@ -1093,81 +1203,6 @@ void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
Observer.changedInstr(*BrCond);
}
-static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
- // On Darwin, -Os means optimize for size without hurting performance, so
- // only really optimize for size when -Oz (MinSize) is used.
- if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction().hasMinSize();
- return MF.getFunction().hasOptSize();
-}
-
-// Returns a list of types to use for memory op lowering in MemOps. A partial
-// port of findOptimalMemOpLowering in TargetLowering.
-static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
- unsigned Limit, const MemOp &Op,
- unsigned DstAS, unsigned SrcAS,
- const AttributeList &FuncAttributes,
- const TargetLowering &TLI) {
- if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
- return false;
-
- LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
-
- if (Ty == LLT()) {
- // Use the largest scalar type whose alignment constraints are satisfied.
- // We only need to check DstAlign here as SrcAlign is always greater or
- // equal to DstAlign (or zero).
- Ty = LLT::scalar(64);
- if (Op.isFixedDstAlign())
- while (Op.getDstAlign() < Ty.getSizeInBytes() &&
- !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
- Ty = LLT::scalar(Ty.getSizeInBytes());
- assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
- // FIXME: check for the largest legal type we can load/store to.
- }
-
- unsigned NumMemOps = 0;
- uint64_t Size = Op.size();
- while (Size) {
- unsigned TySize = Ty.getSizeInBytes();
- while (TySize > Size) {
- // For now, only use non-vector load / store's for the left-over pieces.
- LLT NewTy = Ty;
- // FIXME: check for mem op safety and legality of the types. Not all of
- // SDAGisms map cleanly to GISel concepts.
- if (NewTy.isVector())
- NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
- unsigned NewTySize = NewTy.getSizeInBytes();
- assert(NewTySize > 0 && "Could not find appropriate type");
-
- // If the new LLT cannot cover all of the remaining bits, then consider
- // issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
- // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
- MVT VT = getMVTForLLT(Ty);
- if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
- TLI.allowsMisalignedMemoryAccesses(
- VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
- MachineMemOperand::MONone, &Fast) &&
- Fast)
- TySize = Size;
- else {
- Ty = NewTy;
- TySize = NewTySize;
- }
- }
-
- if (++NumMemOps > Limit)
- return false;
-
- MemOps.push_back(Ty);
- Size -= TySize;
- }
-
- return true;
-}
-
static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
if (Ty.isVector())
return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
@@ -1175,460 +1210,20 @@ static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
return IntegerType::get(C, Ty.getSizeInBits());
}
-// Get a vectorized representation of the memset value operand, GISel edition.
-static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
- MachineRegisterInfo &MRI = *MIB.getMRI();
- unsigned NumBits = Ty.getScalarSizeInBits();
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- if (!Ty.isVector() && ValVRegAndVal) {
- APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
- APInt SplatVal = APInt::getSplat(NumBits, Scalar);
- return MIB.buildConstant(Ty, SplatVal).getReg(0);
- }
-
- // Extend the byte value to the larger type, and then multiply by a magic
- // value 0x010101... in order to replicate it across every byte.
- // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
- if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
- return MIB.buildConstant(Ty, 0).getReg(0);
- }
-
- LLT ExtType = Ty.getScalarType();
- auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
- if (NumBits > 8) {
- APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
- auto MagicMI = MIB.buildConstant(ExtType, Magic);
- Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
- }
-
- // For vector types create a G_BUILD_VECTOR.
- if (Ty.isVector())
- Val = MIB.buildSplatVector(Ty, Val).getReg(0);
-
- return Val;
-}
-
-bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
- Register Val, uint64_t KnownLen,
- Align Alignment, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memset length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
-
- auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
- bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
-
- if (!findGISelOptimalMemOpLowering(MemOps, Limit,
- MemOp::Set(KnownLen, DstAlignCanChange,
- Alignment,
- /*IsZeroMemset=*/IsZeroVal,
- /*IsVolatile=*/IsVolatile),
- DstPtrInfo.getAddrSpace(), ~0u,
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- MachineIRBuilder MIB(MI);
- // Find the largest store and generate the bit pattern for it.
- LLT LargestTy = MemOps[0];
- for (unsigned i = 1; i < MemOps.size(); i++)
- if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
- LargestTy = MemOps[i];
-
- // The memset stored value is always defined as an s8, so in order to make it
- // work with larger store types we need to repeat the bit pattern across the
- // wider type.
- Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
-
- if (!MemSetValue)
- return false;
-
- // Generate the stores. For each store type in the list, we generate the
- // matching store of that type to the destination address.
- LLT PtrTy = MRI.getType(Dst);
- unsigned DstOff = 0;
- unsigned Size = KnownLen;
- for (unsigned I = 0; I < MemOps.size(); I++) {
- LLT Ty = MemOps[I];
- unsigned TySize = Ty.getSizeInBytes();
- if (TySize > Size) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- assert(I == MemOps.size() - 1 && I != 0);
- DstOff -= TySize - Size;
- }
-
- // If this store is smaller than the largest store see whether we can get
- // the smaller value for free with a truncate.
- Register Value = MemSetValue;
- if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
- MVT VT = getMVTForLLT(Ty);
- MVT LargestVT = getMVTForLLT(LargestTy);
- if (!LargestTy.isVector() && !Ty.isVector() &&
- TLI.isTruncateFree(LargestVT, VT))
- Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
- else
- Value = getMemsetValue(Val, Ty, MIB);
- if (!Value)
- return false;
- }
-
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
-
- Register Ptr = Dst;
- if (DstOff != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
- Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
-
- MIB.buildStore(Value, Ptr, *StoreMMO);
- DstOff += Ty.getSizeInBytes();
- Size -= TySize;
- }
-
- MI.eraseFromParent();
- return true;
-}
-
bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
-
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- const auto *MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
- bool IsVolatile = MemOp->isVolatile();
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- // FIXME: support dynamically sized G_MEMCPY_INLINE
- assert(LenVRegAndVal.hasValue() &&
- "inline memcpy with dynamic size is not yet supported");
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- Align DstAlign = DstMMO.getBaseAlign();
- Align SrcAlign = SrcMMO.getBaseAlign();
-
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-}
-
-bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- return optimizeMemcpy(MI, Dst, Src, KnownLen,
- std::numeric_limits<uint64_t>::max(), DstAlign,
- SrcAlign, IsVolatile);
-}
-
-bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- uint64_t Limit, Align DstAlign,
- Align SrcAlign, bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memcpy length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- // FIXME: infer better src pointer alignment like SelectionDAG does here.
- // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
- // if the memcpy is in a tail call position.
-
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- IsVolatile),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Now we need to emit a pair of load and stores for each of the types we've
- // collected. I.e. for each type, generate a load from the source pointer of
- // that type width, and then generate a corresponding store to the dest buffer
- // of that value loaded. This can result in a sequence of loads and stores
- // mixed types, depending on what the target specifies as good types to use.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- unsigned Size = KnownLen;
- for (auto CopyTy : MemOps) {
- // Issuing an unaligned load / store pair that overlaps with the previous
- // pair. Adjust the offset accordingly.
- if (CopyTy.getSizeInBytes() > Size)
- CurrOffset -= CopyTy.getSizeInBytes() - Size;
-
- // Construct MMOs for the accesses.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- Register Offset;
- if (CurrOffset != 0) {
- Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
- .getReg(0);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
-
- // Create the store.
- Register StorePtr =
- CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- MIB.buildStore(LdVal, StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- Size -= CopyTy.getSizeInBytes();
- }
-
- MI.eraseFromParent();
- return true;
-}
-
-bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
- Register Src, uint64_t KnownLen,
- Align DstAlign, Align SrcAlign,
- bool IsVolatile) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- auto &DL = MF.getDataLayout();
- LLVMContext &C = MF.getFunction().getContext();
-
- assert(KnownLen != 0 && "Have a zero length memmove length!");
-
- bool DstAlignCanChange = false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- Align Alignment = commonAlignment(DstAlign, SrcAlign);
-
- MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
- if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
- DstAlignCanChange = true;
-
- unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
- std::vector<LLT> MemOps;
-
- const auto &DstMMO = **MI.memoperands_begin();
- const auto &SrcMMO = **std::next(MI.memoperands_begin());
- MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
- MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
-
- // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
- // to a bug in it's findOptimalMemOpLowering implementation. For now do the
- // same thing here.
- if (!findGISelOptimalMemOpLowering(
- MemOps, Limit,
- MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
- /*IsVolatile*/ true),
- DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
- MF.getFunction().getAttributes(), TLI))
- return false;
-
- if (DstAlignCanChange) {
- // Get an estimate of the type from the LLT.
- Type *IRTy = getTypeForLLT(MemOps[0], C);
- Align NewAlign = DL.getABITypeAlign(IRTy);
-
- // Don't promote to an alignment that would require dynamic stack
- // realignment.
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- if (!TRI->hasStackRealignment(MF))
- while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign = NewAlign / 2;
-
- if (NewAlign > Alignment) {
- Alignment = NewAlign;
- unsigned FI = FIDef->getOperand(1).getIndex();
- // Give the stack frame object a larger alignment if needed.
- if (MFI.getObjectAlign(FI) < Alignment)
- MFI.setObjectAlignment(FI, Alignment);
- }
- }
-
- LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
-
- MachineIRBuilder MIB(MI);
- // Memmove requires that we perform the loads first before issuing the stores.
- // Apart from that, this loop is pretty much doing the same thing as the
- // memcpy codegen function.
- unsigned CurrOffset = 0;
- LLT PtrTy = MRI.getType(Src);
- SmallVector<Register, 16> LoadVals;
- for (auto CopyTy : MemOps) {
- // Construct MMO for the load.
- auto *LoadMMO =
- MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- // Create the load.
- Register LoadPtr = Src;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
- }
- LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
- CurrOffset += CopyTy.getSizeInBytes();
- }
-
- CurrOffset = 0;
- for (unsigned I = 0; I < MemOps.size(); ++I) {
- LLT CopyTy = MemOps[I];
- // Now store the values loaded.
- auto *StoreMMO =
- MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
-
- Register StorePtr = Dst;
- if (CurrOffset != 0) {
- auto Offset =
- MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
- StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
- }
- MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
- CurrOffset += CopyTy.getSizeInBytes();
- }
- MI.eraseFromParent();
- return true;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemcpyInline(MI) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
- const unsigned Opc = MI.getOpcode();
- // This combine is fairly complex so it's not written with a separate
- // matcher function.
- assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
- Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
-
- auto MMOIt = MI.memoperands_begin();
- const MachineMemOperand *MemOp = *MMOIt;
-
- Align DstAlign = MemOp->getBaseAlign();
- Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
-
- if (Opc != TargetOpcode::G_MEMSET) {
- assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
- MemOp = *(++MMOIt);
- SrcAlign = MemOp->getBaseAlign();
- }
-
- // See if this is a constant length copy
- auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
- if (!LenVRegAndVal)
- return false; // Leave it to the legalizer to lower it to a libcall.
- uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
-
- if (KnownLen == 0) {
- MI.eraseFromParent();
- return true;
- }
-
- bool IsVolatile = MemOp->isVolatile();
- if (Opc == TargetOpcode::G_MEMCPY_INLINE)
- return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
- IsVolatile);
-
- // Don't try to optimize volatile.
- if (IsVolatile)
- return false;
-
- if (MaxLen && KnownLen > MaxLen)
- return false;
-
- if (Opc == TargetOpcode::G_MEMCPY) {
- auto &MF = *MI.getParent()->getParent();
- const auto &TLI = *MF.getSubtarget().getTargetLowering();
- bool OptSize = shouldLowerMemFuncForSize(MF);
- uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
- return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
- IsVolatile);
- }
- if (Opc == TargetOpcode::G_MEMMOVE)
- return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
- if (Opc == TargetOpcode::G_MEMSET)
- return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
- return false;
+ MachineIRBuilder HelperBuilder(MI);
+ GISelObserverWrapper DummyObserver;
+ LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
+ return Helper.lowerMemCpyFamily(MI, MaxLen) ==
+ LegalizerHelper::LegalizeResult::Legalized;
}
static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
@@ -1706,30 +1301,52 @@ bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
Register Add2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
- // Don't do this combine if there multiple uses of the first PTR_ADD,
- // since we may be able to compute the second PTR_ADD as an immediate
- // offset anyway. Folding the first offset into the second may cause us
- // to go beyond the bounds of our legal addressing modes.
- if (!MRI.hasOneNonDBGUse(Add2))
- return false;
-
- MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
+ MachineInstr *Add2Def = MRI.getVRegDef(Add2);
if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
Register Base = Add2Def->getOperand(1).getReg();
Register Imm2 = Add2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
+ // Check if the new combined immediate forms an illegal addressing mode.
+ // Do not combine if it was legal before but would get illegal.
+ // To do so, we need to find a load/store user of the pointer to get
+ // the access type.
+ Type *AccessTy = nullptr;
+ auto &MF = *MI.getMF();
+ for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
+ if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
+ AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
+ MF.getFunction().getContext());
+ break;
+ }
+ }
+ TargetLoweringBase::AddrMode AMNew;
+ APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
+ AMNew.BaseOffs = CombinedImm.getSExtValue();
+ if (AccessTy) {
+ AMNew.HasBaseReg = true;
+ TargetLoweringBase::AddrMode AMOld;
+ AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
+ AMOld.HasBaseReg = true;
+ unsigned AS = MRI.getType(Add2).getAddressSpace();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
+ !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
+ return false;
+ }
+
// Pass the combined immediate to the apply function.
- MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
+ MatchInfo.Imm = AMNew.BaseOffs;
MatchInfo.Base = Base;
+ MatchInfo.Bank = getRegBank(Imm2);
return true;
}
@@ -1739,6 +1356,7 @@ void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
MachineIRBuilder MIB(MI);
LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
+ setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
Observer.changingInstr(MI);
MI.getOperand(1).setReg(MatchInfo.Base);
MI.getOperand(2).setReg(NewOffset.getReg(0));
@@ -1762,7 +1380,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Shl2 = MI.getOperand(1).getReg();
Register Imm1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
if (!MaybeImmVal)
return false;
@@ -1772,7 +1390,7 @@ bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
Register Base = Shl2Def->getOperand(1).getReg();
Register Imm2 = Shl2Def->getOperand(2).getReg();
- auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
+ auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
if (!MaybeImm2Val)
return false;
@@ -1856,7 +1474,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Find a matching one-use shift by constant.
const Register C1 = MI.getOperand(2).getReg();
- auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
+ auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
if (!MaybeImmVal)
return false;
@@ -1870,7 +1488,7 @@ bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
// Must be a constant.
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1932,8 +1550,8 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
// These were one use so it's safe to remove them.
- MatchInfo.Shift2->eraseFromParent();
- MatchInfo.Logic->eraseFromParent();
+ MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval();
+ MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();
MI.eraseFromParent();
}
@@ -1942,7 +1560,7 @@ bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -1977,7 +1595,7 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
// TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -2045,26 +1663,23 @@ bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
"Expected an unmerge");
- Register SrcReg =
- peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
+ auto &Unmerge = cast<GUnmerge>(MI);
+ Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
- MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
- if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
- SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
- SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
+ auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI);
+ if (!SrcInstr)
return false;
// Check the source type of the merge.
- LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
- LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
+ LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
if (SrcMergeTy != Dst0Ty && !SameSize)
return false;
// They are the same now (modulo a bitcast).
// We can collect all the src registers.
- for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
- ++Idx)
- Operands.push_back(SrcInstr->getOperand(Idx).getReg());
+ for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
+ Operands.push_back(SrcInstr->getSourceReg(Idx));
return true;
}
@@ -2241,7 +1856,7 @@ bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
return false;
auto MaybeImmVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeImmVal)
return false;
@@ -2410,12 +2025,12 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(
bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register LHS = PtrAdd.getBaseReg();
+ Register RHS = PtrAdd.getOffsetReg();
MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
- if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
+ if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) {
int64_t Cst;
if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
NewCst = Cst + *RHSCst;
@@ -2428,12 +2043,12 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
int64_t &NewCst) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
- Register Dst = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register Dst = PtrAdd.getReg(0);
Builder.setInstrAndDebugLoc(MI);
Builder.buildConstant(Dst, NewCst);
- MI.eraseFromParent();
+ PtrAdd.eraseFromParent();
}
bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
@@ -2536,6 +2151,23 @@ bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
}
+bool CombinerHelper::matchCombineFAbsOfFNeg(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
+ Register Src = MI.getOperand(1).getReg();
+ Register NegSrc;
+
+ if (!mi_match(Src, MRI, m_GFNeg(m_Reg(NegSrc))))
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NegSrc);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineTruncOfExt(
MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
@@ -2587,7 +2219,7 @@ bool CombinerHelper::matchCombineTruncOfShl(
{DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
KnownBits Known = KB->getKnownBits(ShiftAmt);
unsigned Size = DstTy.getSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
return true;
}
@@ -2644,13 +2276,13 @@ bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
}
bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
- assert(MI.getOpcode() == TargetOpcode::G_SELECT);
- if (auto MaybeCstCmp =
- getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
- OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
- return true;
- }
- return false;
+ GSelect &SelMI = cast<GSelect>(MI);
+ auto Cst =
+ isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
+ if (!Cst)
+ return false;
+ OpIdx = Cst->isZero() ? 3 : 2;
+ return true;
}
bool CombinerHelper::eraseInst(MachineInstr &MI) {
@@ -2662,12 +2294,14 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
if (!MOP1.isReg() || !MOP2.isReg())
return false;
- MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
- if (!I1)
+ auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
+ if (!InstAndDef1)
return false;
- MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
- if (!I2)
+ auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
+ if (!InstAndDef2)
return false;
+ MachineInstr *I1 = InstAndDef1->MI;
+ MachineInstr *I2 = InstAndDef2->MI;
// Handle a case like this:
//
@@ -2727,15 +2361,26 @@ bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
//
// On the off-chance that there's some target instruction feeding into the
// instruction, let's use produceSameValue instead of isIdenticalTo.
- return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
+ if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
+ // Handle instructions with multiple defs that produce same values. Values
+ // are same for operands with same index.
+ // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
+ // I1 and I2 are different instructions but produce same values,
+ // %1 and %6 are same, %1 and %7 are not the same value.
+ return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
+ I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
+ }
+ return false;
}
bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
if (!MOP.isReg())
return false;
- // MIPatternMatch doesn't let us look through G_ZEXT etc.
- auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
- return ValAndVReg && ValAndVReg->Value == C;
+ auto *MI = MRI.getVRegDef(MOP.getReg());
+ auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
+ return MaybeCst.hasValue() && MaybeCst->getBitWidth() <= 64 &&
+ MaybeCst->getSExtValue() == C;
}
bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
@@ -3115,14 +2760,14 @@ bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
//
// Check if we can replace AndDst with the LHS of the G_AND
if (canReplaceReg(AndDst, LHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace AndDst with the RHS of the G_AND
if (canReplaceReg(AndDst, RHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3161,14 +2806,14 @@ bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
//
// Check if we can replace OrDst with the LHS of the G_OR
if (canReplaceReg(OrDst, LHS, MRI) &&
- (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+ (LHSBits.One | RHSBits.Zero).isAllOnes()) {
Replacement = LHS;
return true;
}
// Check if we can replace OrDst with the RHS of the G_OR
if (canReplaceReg(OrDst, RHS, MRI) &&
- (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+ (LHSBits.Zero | RHSBits.One).isAllOnes()) {
Replacement = RHS;
return true;
}
@@ -3346,7 +2991,8 @@ void CombinerHelper::applyXorOfAndWithSameReg(
}
bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Register DstReg = PtrAdd.getReg(0);
LLT Ty = MRI.getType(DstReg);
const DataLayout &DL = Builder.getMF().getDataLayout();
@@ -3354,20 +3000,20 @@ bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
return false;
if (Ty.isPointer()) {
- auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
+ auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
return ConstVal && *ConstVal == 0;
}
assert(Ty.isVector() && "Expecting a vector type");
- const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+ const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
return isBuildVectorAllZeros(*VecMI, MRI);
}
void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
- Builder.setInstrAndDebugLoc(MI);
- Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
- MI.eraseFromParent();
+ auto &PtrAdd = cast<GPtrAdd>(MI);
+ Builder.setInstrAndDebugLoc(PtrAdd);
+ Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
+ PtrAdd.eraseFromParent();
}
/// The second source operand is known to be a power of 2.
@@ -3704,10 +3350,8 @@ bool CombinerHelper::matchLoadOrCombine(
// may not use index 0.
Register Ptr = LowestIdxLoad->getPointerReg();
const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
- LegalityQuery::MemDesc MMDesc;
+ LegalityQuery::MemDesc MMDesc(MMO);
MMDesc.MemoryTy = Ty;
- MMDesc.AlignInBits = MMO.getAlign().value() * 8;
- MMDesc.Ordering = MMO.getSuccessOrdering();
if (!isLegalOrBeforeLegalizer(
{TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
return false;
@@ -3732,6 +3376,274 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static Optional<int64_t> getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return None;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return None;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits!= 0)
+ return None;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return None;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return None;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+ auto &StoreMI = cast<GStore>(MI);
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ // Search the block up for more stores.
+ // We use a search threshold of 10 instructions here because the combiner
+ // works top-down within a block, and we don't want to search an unbounded
+ // number of predecessor instructions trying to find matching stores.
+ // If we moved this optimization into a separate pass then we could probably
+ // use a more efficient search without having a hard-coded threshold.
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ return false;
+ }
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed = getTargetLowering().allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresRequired; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
+ return false;
+
+ MatchInfo.NeedBSwap = NeedBswap;
+ MatchInfo.NeedRotate = NeedRotate;
+ MatchInfo.LowestIdxStore = LowestIdxStore;
+ MatchInfo.WideSrcVal = WideSrcVal;
+ MatchInfo.FoundStores = std::move(FoundStores);
+ return true;
+}
+
+void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
+ MergeTruncStoresInfo &MatchInfo) {
+
+ Builder.setInstrAndDebugLoc(MI);
+ Register WideSrcVal = MatchInfo.WideSrcVal;
+ LLT WideStoreTy = MRI.getType(WideSrcVal);
+
+ if (MatchInfo.NeedBSwap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (MatchInfo.NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
+ MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
+ MatchInfo.LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : MatchInfo.FoundStores)
+ ST->eraseFromParent();
+}
+
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -3844,7 +3756,7 @@ bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
{TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
return false;
- auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
return false;
@@ -3917,7 +3829,7 @@ bool CombinerHelper::matchExtractAllEltsFromBuildVector(
MRI.use_instr_nodbg_end())) {
if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
return false;
- auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
unsigned Idx = Cst.getValue().getZExtValue();
@@ -4064,6 +3976,78 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchICmpToLHSKnownBits(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // Given:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP ne %x, 0
+ //
+ // Or:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP eq %x, 1
+ //
+ // We can replace %cmp with %x assuming true is 1 on the target.
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (!CmpInst::isEquality(Pred))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
+ /* IsFP = */ false) != 1)
+ return false;
+ int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
+ return false;
+ Register LHS = MI.getOperand(2).getReg();
+ auto KnownLHS = KB->getKnownBits(LHS);
+ if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
+ return false;
+ // Make sure replacing Dst with the LHS is a legal operation.
+ LLT LHSTy = MRI.getType(LHS);
+ unsigned LHSSize = LHSTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Op = TargetOpcode::COPY;
+ if (DstSize != LHSSize)
+ Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
+ if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
+ return true;
+}
+
+// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
+bool CombinerHelper::matchAndOrDisjointMask(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ // Ignore vector types to simplify matching the two constants.
+ // TODO: do this for vectors and scalars via a demanded bits analysis.
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (Ty.isVector())
+ return false;
+
+ Register Src;
+ int64_t MaskAnd;
+ int64_t MaskOr;
+ if (!mi_match(MI, MRI,
+ m_GAnd(m_GOr(m_Reg(Src), m_ICst(MaskOr)), m_ICst(MaskAnd))))
+ return false;
+
+ // Check if MaskOr could turn on any bits in Src.
+ if (MaskAnd & MaskOr)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Src);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
@@ -4130,6 +4114,104 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
return true;
}
+bool CombinerHelper::matchBitfieldExtractFromShr(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+
+ const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
+ ? TargetOpcode::G_SBFX
+ : TargetOpcode::G_UBFX;
+
+ // Check if the type we would use for the extract is legal
+ LLT Ty = MRI.getType(Dst);
+ LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
+ return false;
+
+ Register ShlSrc;
+ int64_t ShrAmt;
+ int64_t ShlAmt;
+ const unsigned Size = Ty.getScalarSizeInBits();
+
+ // Try to match shr (shl x, c1), c2
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ // Make sure that the shift sizes can fit a bitfield extract
+ if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
+ return false;
+
+ // Skip this combine if the G_SEXT_INREG combine could handle it
+ if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
+ return false;
+
+ // Calculate start position and width of the extract
+ const int64_t Pos = ShrAmt - ShlAmt;
+ const int64_t Width = Size - ShrAmt;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(ExtractTy, Width);
+ auto PosCst = B.buildConstant(ExtractTy, Pos);
+ B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
+bool CombinerHelper::matchBitfieldExtractFromShrAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ const unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
+
+ const Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
+ TargetOpcode::G_UBFX, Ty, Ty))
+ return false;
+
+ // Try to match shr (and x, c1), c2
+ Register AndSrc;
+ int64_t ShrAmt;
+ int64_t SMask;
+ if (!mi_match(Dst, MRI,
+ m_BinOp(Opcode,
+ m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
+ m_ICst(ShrAmt))))
+ return false;
+
+ const unsigned Size = Ty.getScalarSizeInBits();
+ if (ShrAmt < 0 || ShrAmt >= Size)
+ return false;
+
+ // Check that ubfx can do the extraction, with no holes in the mask.
+ uint64_t UMask = SMask;
+ UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
+ UMask &= maskTrailingOnes<uint64_t>(Size);
+ if (!isMask_64(UMask))
+ return false;
+
+ // Calculate start position and width of the extract.
+ const int64_t Pos = ShrAmt;
+ const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+
+ // It's preferable to keep the shift, rather than form G_SBFX.
+ // TODO: remove the G_AND via demanded bits analysis.
+ if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto WidthCst = B.buildConstant(Ty, Width);
+ auto PosCst = B.buildConstant(Ty, Pos);
+ B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
+ };
+ return true;
+}
+
bool CombinerHelper::reassociationCanBreakAddressingModePattern(
MachineInstr &PtrAdd) {
assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
@@ -4144,10 +4226,10 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
if (MRI.hasOneNonDBGUse(Src1Reg))
return false;
- auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
+ auto C1 = getIConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
if (!C1)
return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
if (!C2)
return false;
@@ -4198,9 +4280,91 @@ bool CombinerHelper::reassociationCanBreakAddressingModePattern(
return false;
}
-bool CombinerHelper::matchReassocPtrAdd(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
- assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
+bool CombinerHelper::matchReassocConstantInnerRHS(GPtrAdd &MI,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
+ Register Src1Reg = MI.getOperand(1).getReg();
+ if (RHS->getOpcode() != TargetOpcode::G_ADD)
+ return false;
+ auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+
+ auto NewBase =
+ Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(NewBase.getReg(0));
+ MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocConstantInnerLHS(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // if and only if (G_PTR_ADD X, C) has one use.
+ Register LHSBase;
+ Optional<ValueAndVReg> LHSCstOff;
+ if (!mi_match(MI.getBaseReg(), MRI,
+ m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
+ return false;
+
+ auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ // When we change LHSPtrAdd's offset register we might cause it to use a reg
+ // before its def. Sink the instruction so the outer PTR_ADD to ensure this
+ // doesn't happen.
+ LHSPtrAdd->moveBefore(&MI);
+ Register RHSReg = MI.getOffsetReg();
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(LHSCstOff->VReg);
+ Observer.changedInstr(MI);
+ Observer.changingInstr(*LHSPtrAdd);
+ LHSPtrAdd->getOperand(2).setReg(RHSReg);
+ Observer.changedInstr(*LHSPtrAdd);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocFoldConstantsInSubTree(GPtrAdd &MI,
+ MachineInstr *LHS,
+ MachineInstr *RHS,
+ BuildFnTy &MatchInfo) {
+ // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
+ auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
+ if (!LHSPtrAdd)
+ return false;
+
+ Register Src2Reg = MI.getOperand(2).getReg();
+ Register LHSSrc1 = LHSPtrAdd->getBaseReg();
+ Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
+ auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
+ if (!C1)
+ return false;
+ auto C2 = getIConstantVRegVal(Src2Reg, MRI);
+ if (!C2)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(LHSSrc1);
+ MI.getOperand(2).setReg(NewCst.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return !reassociationCanBreakAddressingModePattern(MI);
+}
+
+bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ auto &PtrAdd = cast<GPtrAdd>(MI);
// We're trying to match a few pointer computation patterns here for
// re-association opportunities.
// 1) Isolating a constant operand to be on the RHS, e.g.:
@@ -4209,49 +4373,26 @@ bool CombinerHelper::matchReassocPtrAdd(
// 2) Folding two constants in each sub-tree as long as such folding
// doesn't break a legal addressing mode.
// G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
- MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
- MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
-
- if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
- // Try to match example 1).
- if (RHS->getOpcode() != TargetOpcode::G_ADD)
- return false;
- auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
- if (!C2)
- return false;
+ //
+ // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
+ // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
+ // iif (G_PTR_ADD X, C) has one use.
+ MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
+ MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
+
+ // Try to match example 2.
+ if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- MatchInfo = [=,&MI](MachineIRBuilder &B) {
- LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
+ // Try to match example 3.
+ if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
+ return true;
- auto NewBase =
- Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(NewBase.getReg(0));
- MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
- Observer.changedInstr(MI);
- };
- } else {
- // Try to match example 2.
- Register LHSSrc1 = LHS->getOperand(1).getReg();
- Register LHSSrc2 = LHS->getOperand(2).getReg();
- auto C1 = getConstantVRegVal(LHSSrc2, MRI);
- if (!C1)
- return false;
- auto C2 = getConstantVRegVal(Src2Reg, MRI);
- if (!C2)
- return false;
+ // Try to match example 1.
+ if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
+ return true;
- MatchInfo = [=, &MI](MachineIRBuilder &B) {
- auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
- Observer.changingInstr(MI);
- MI.getOperand(1).setReg(LHSSrc1);
- MI.getOperand(2).setReg(NewCst.getReg(0));
- Observer.changedInstr(MI);
- };
- }
- return !reassociationCanBreakAddressingModePattern(MI);
+ return false;
}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
@@ -4264,6 +4405,361 @@ bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
return true;
}
+bool CombinerHelper::matchNarrowBinopFeedingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ // Look for a binop feeding into an AND with a mask:
+ //
+ // %add = G_ADD %lhs, %rhs
+ // %and = G_AND %add, 000...11111111
+ //
+ // Check if it's possible to perform the binop at a narrower width and zext
+ // back to the original width like so:
+ //
+ // %narrow_lhs = G_TRUNC %lhs
+ // %narrow_rhs = G_TRUNC %rhs
+ // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
+ // %new_add = G_ZEXT %narrow_add
+ // %and = G_AND %new_add, 000...11111111
+ //
+ // This can allow later combines to eliminate the G_AND if it turns out
+ // that the mask is irrelevant.
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+ Register Dst = MI.getOperand(0).getReg();
+ Register AndLHS = MI.getOperand(1).getReg();
+ Register AndRHS = MI.getOperand(2).getReg();
+ LLT WideTy = MRI.getType(Dst);
+
+ // If the potential binop has more than one use, then it's possible that one
+ // of those uses will need its full width.
+ if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
+ return false;
+
+ // Check if the LHS feeding the AND is impacted by the high bits that we're
+ // masking out.
+ //
+ // e.g. for 64-bit x, y:
+ //
+ // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
+ MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
+ if (!LHSInst)
+ return false;
+ unsigned LHSOpc = LHSInst->getOpcode();
+ switch (LHSOpc) {
+ default:
+ return false;
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_SUB:
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ break;
+ }
+
+ // Find the mask on the RHS.
+ auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
+ if (!Cst)
+ return false;
+ auto Mask = Cst->Value;
+ if (!Mask.isMask())
+ return false;
+
+ // No point in combining if there's nothing to truncate.
+ unsigned NarrowWidth = Mask.countTrailingOnes();
+ if (NarrowWidth == WideTy.getSizeInBits())
+ return false;
+ LLT NarrowTy = LLT::scalar(NarrowWidth);
+
+ // Check if adding the zext + truncates could be harmful.
+ auto &MF = *MI.getMF();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
+ !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
+ return false;
+ Register BinOpLHS = LHSInst->getOperand(1).getReg();
+ Register BinOpRHS = LHSInst->getOperand(2).getReg();
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
+ auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
+ auto NarrowBinOp =
+ Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
+ auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(Ext.getReg(0));
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
+ // Check for a constant 2 or a splat of 2 on the RHS.
+ auto RHS = MI.getOperand(3).getReg();
+ bool IsVector = MRI.getType(RHS).isVector();
+ if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
+ return false;
+ if (IsVector) {
+ // FIXME: There's no mi_match pattern for this yet.
+ auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
+ if (!RHSDef)
+ return false;
+ auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
+ if (!Splat || *Splat != 2)
+ return false;
+ }
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
+ : TargetOpcode::G_SADDO;
+ MI.setDesc(Builder.getTII().get(NewOpc));
+ MI.getOperand(3).setReg(MI.getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
+MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ auto &UDiv = cast<GenericMachineInstr>(MI);
+ Register Dst = UDiv.getReg(0);
+ Register LHS = UDiv.getReg(1);
+ Register RHS = UDiv.getReg(2);
+ LLT Ty = MRI.getType(Dst);
+ LLT ScalarTy = Ty.getScalarType();
+ const unsigned EltBits = ScalarTy.getScalarSizeInBits();
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
+ auto &MIB = Builder;
+ MIB.setInstrAndDebugLoc(MI);
+
+ bool UseNPQ = false;
+ SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
+
+ auto BuildUDIVPattern = [&](const Constant *C) {
+ auto *CI = cast<ConstantInt>(C);
+ const APInt &Divisor = CI->getValue();
+ UnsignedDivisonByConstantInfo magics =
+ UnsignedDivisonByConstantInfo::get(Divisor);
+ unsigned PreShift = 0, PostShift = 0;
+
+ // If the divisor is even, we can avoid using the expensive fixup by
+ // shifting the divided value upfront.
+ if (magics.IsAdd != 0 && !Divisor[0]) {
+ PreShift = Divisor.countTrailingZeros();
+ // Get magic number for the shifted divisor.
+ magics =
+ UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
+ }
+
+ APInt Magic = magics.Magic;
+
+ unsigned SelNPQ;
+ if (magics.IsAdd == 0 || Divisor.isOneValue()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
+ "We shouldn't generate an undefined shift!");
+ PostShift = magics.ShiftAmount;
+ SelNPQ = false;
+ } else {
+ PostShift = magics.ShiftAmount - 1;
+ SelNPQ = true;
+ }
+
+ PreShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
+ MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
+ NPQFactors.push_back(
+ MIB.buildConstant(ScalarTy,
+ SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
+ : APInt::getZero(EltBits))
+ .getReg(0));
+ PostShifts.push_back(
+ MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
+ UseNPQ |= SelNPQ;
+ return true;
+ };
+
+ // Collect the shifts/magic values from each element.
+ bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
+ (void)Matched;
+ assert(Matched && "Expected unary predicate match to succeed");
+
+ Register PreShift, PostShift, MagicFactor, NPQFactor;
+ auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
+ if (RHSDef) {
+ PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
+ MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
+ NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
+ PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
+ } else {
+ assert(MRI.getType(RHS).isScalar() &&
+ "Non-build_vector operation should have been a scalar");
+ PreShift = PreShifts[0];
+ MagicFactor = MagicFactors[0];
+ PostShift = PostShifts[0];
+ }
+
+ Register Q = LHS;
+ Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
+
+ // Multiply the numerator (operand 0) by the magic value.
+ Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
+
+ if (UseNPQ) {
+ Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
+
+ // For vectors we might have a mix of non-NPQ/NPQ paths, so use
+ // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
+ if (Ty.isVector())
+ NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
+ else
+ NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
+
+ Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
+ }
+
+ Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
+ auto One = MIB.buildConstant(Ty, 1);
+ auto IsOne = MIB.buildICmp(
+ CmpInst::Predicate::ICMP_EQ,
+ Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
+ return MIB.buildSelect(Ty, IsOne, LHS, Q);
+}
+
+bool CombinerHelper::matchUDivByConst(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UDIV);
+ Register Dst = MI.getOperand(0).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ auto *RHSDef = MRI.getVRegDef(RHS);
+ if (!isConstantOrConstantVector(*RHSDef, MRI))
+ return false;
+
+ auto &MF = *MI.getMF();
+ AttributeList Attr = MF.getFunction().getAttributes();
+ const auto &TLI = getTargetLowering();
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ auto &DL = MF.getDataLayout();
+ if (TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, DL, Ctx), Attr))
+ return false;
+
+ // Don't do this for minsize because the instruction sequence is usually
+ // larger.
+ if (MF.getFunction().hasMinSize())
+ return false;
+
+ // Don't do this if the types are not going to be legal.
+ if (LI) {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
+ return false;
+ if (!isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ICMP,
+ {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
+ DstTy}}))
+ return false;
+ }
+
+ auto CheckEltValue = [&](const Constant *C) {
+ if (auto *CI = dyn_cast_or_null<ConstantInt>(C))
+ return !CI->isZero();
+ return false;
+ };
+ return matchUnaryPredicate(MRI, RHS, CheckEltValue);
+}
+
+void CombinerHelper::applyUDivByConst(MachineInstr &MI) {
+ auto *NewMI = buildUDivUsingMul(MI);
+ replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
+}
+
+bool CombinerHelper::matchUMulHToLShr(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_UMULH);
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ auto MatchPow2ExceptOne = [&](const Constant *C) {
+ if (auto *CI = dyn_cast<ConstantInt>(C))
+ return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
+ return false;
+ };
+ if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
+ return false;
+ return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}});
+}
+
+void CombinerHelper::applyUMulHToLShr(MachineInstr &MI) {
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+ LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
+ unsigned NumEltBits = Ty.getScalarSizeInBits();
+
+ Builder.setInstrAndDebugLoc(MI);
+ auto LogBase2 = buildLogBase2(RHS, Builder);
+ auto ShiftAmt =
+ Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
+ auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
+ Builder.buildLShr(Dst, LHS, Trunc);
+ MI.eraseFromParent();
+}
+
+bool CombinerHelper::matchRedundantNegOperands(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
+ Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ Register Y = MI.getOperand(2).getReg();
+ LLT Type = MRI.getType(Dst);
+
+ // fold (fadd x, fneg(y)) -> (fsub x, y)
+ // fold (fadd fneg(y), x) -> (fsub x, y)
+ // G_ADD is commutative so both cases are checked by m_GFAdd
+ if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
+ Opc = TargetOpcode::G_FSUB;
+ }
+ /// fold (fsub x, fneg(y)) -> (fadd x, y)
+ else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
+ isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
+ Opc = TargetOpcode::G_FADD;
+ }
+ // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
+ // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
+ // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
+ // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
+ else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
+ Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
+ mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
+ mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
+ // no opcode change
+ } else
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(Opc));
+ MI.getOperand(1).setReg(X);
+ MI.getOperand(2).setReg(Y);
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 8146a67d4dfb..306af808659a 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -9,7 +9,7 @@
/// Provides analysis for querying information about KnownBits during GISel
/// passes.
//
-//===------------------
+//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -57,7 +57,7 @@ KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
KnownBits GISelKnownBits::getKnownBits(Register R) {
const LLT Ty = MRI.getType(R);
APInt DemandedElts =
- Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return getKnownBits(R, DemandedElts);
}
@@ -198,8 +198,8 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
case TargetOpcode::COPY:
case TargetOpcode::G_PHI:
case TargetOpcode::PHI: {
- Known.One = APInt::getAllOnesValue(BitWidth);
- Known.Zero = APInt::getAllOnesValue(BitWidth);
+ Known.One = APInt::getAllOnes(BitWidth);
+ Known.Zero = APInt::getAllOnes(BitWidth);
// Destination registers should not have subregisters at this
// point of the pipeline, otherwise the main live-range will be
// defined more than once, which is against SSA.
@@ -245,7 +245,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
break;
}
case TargetOpcode::G_CONSTANT: {
- auto CstVal = getConstantVRegVal(R, MRI);
+ auto CstVal = getIConstantVRegVal(R, MRI);
if (!CstVal)
break;
Known = KnownBits::makeConstant(*CstVal);
@@ -510,6 +510,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Known = Known.reverseBits();
break;
}
+ case TargetOpcode::G_CTPOP: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // We can bound the space the count needs. Also, bits known to be zero can't
+ // contribute to the population.
+ unsigned BitsPossiblySet = Known2.countMaxPopulation();
+ unsigned LowBits = Log2_32(BitsPossiblySet)+1;
+ Known.Zero.setBitsFrom(LowBits);
+ // TODO: we could bound Known.One using the lower bound on the number of
+ // bits which might be set provided by popcnt KnownOne2.
+ break;
+ }
case TargetOpcode::G_UBFX: {
KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
@@ -676,9 +688,8 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
LLT Ty = MRI.getType(R);
- APInt DemandedElts = Ty.isVector()
- ? APInt::getAllOnesValue(Ty.getNumElements())
- : APInt(1, 1);
+ APInt DemandedElts =
+ Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
return computeNumSignBits(R, DemandedElts, Depth);
}
diff --git a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index e0391e6f6467..252b931602c6 100644
--- a/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -18,6 +18,7 @@ using namespace llvm;
void llvm::initializeGlobalISel(PassRegistry &Registry) {
initializeIRTranslatorPass(Registry);
initializeLegalizerPass(Registry);
+ initializeLoadStoreOptPass(Registry);
initializeLocalizerPass(Registry);
initializeRegBankSelectPass(Registry);
initializeInstructionSelectPass(Registry);
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 73b763710fdf..87cc60d51bc2 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -32,6 +33,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -47,6 +49,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -114,7 +117,7 @@ static void reportTranslationError(MachineFunction &MF,
R << (" (in function: " + MF.getName() + ")").str();
if (TPC.isGlobalISelAbortEnabled())
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
else
ORE.emit(R);
}
@@ -566,7 +569,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
if (BrInst.isUnconditional()) {
// If the unconditional target is the layout successor, fallthrough.
- if (!CurMBB.isLayoutSuccessor(Succ0MBB))
+ if (OptLevel == CodeGenOpt::None || !CurMBB.isLayoutSuccessor(Succ0MBB))
MIRBuilder.buildBr(*Succ0MBB);
// Link successors.
@@ -739,8 +742,7 @@ bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
// FIXME: At the moment we don't do any splitting optimizations here like
// SelectionDAG does, so this worklist only has one entry.
while (!WorkList.empty()) {
- SwitchWorkListItem W = WorkList.back();
- WorkList.pop_back();
+ SwitchWorkListItem W = WorkList.pop_back_val();
if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
return false;
}
@@ -784,7 +786,7 @@ bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
JT.Reg = Sub.getReg(0);
- if (JTH.OmitRangeCheck) {
+ if (JTH.FallthroughUnreachable) {
if (JT.MBB != HeaderBB->getNextNode())
MIB.buildBr(*JT.MBB);
return true;
@@ -936,11 +938,10 @@ bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
}
}
- // Skip the range check if the fallthrough block is unreachable.
if (FallthroughUnreachable)
- JTH->OmitRangeCheck = true;
+ JTH->FallthroughUnreachable = true;
- if (!JTH->OmitRangeCheck)
+ if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -1004,14 +1005,22 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
- // Ensure that the type will fit the mask value.
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
LLT MaskTy = SwitchOpTy;
- for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
- if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
- // Switch table case range are encoded into series of masks.
- // Just use pointer type, it's guaranteed to fit.
- MaskTy = LLT::scalar(64);
- break;
+ if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
+ !isPowerOf2_32(MaskTy.getSizeInBits()))
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ else {
+ // Ensure that the type will fit the mask value.
+ for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
+ if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ MaskTy = LLT::scalar(PtrTy.getSizeInBits());
+ break;
+ }
}
}
Register SubReg = RangeSub.getReg(0);
@@ -1023,13 +1032,13 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
MachineBasicBlock *MBB = B.Cases[0].ThisBB;
- if (!B.OmitRangeCheck)
+ if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
- if (!B.OmitRangeCheck) {
+ if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
@@ -1129,10 +1138,8 @@ bool IRTranslator::lowerBitTestWorkItem(
BTB->DefaultProb -= DefaultProb / 2;
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
@@ -1297,11 +1304,9 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
Align BaseAlign = getMemOpAlign(LI);
- AAMDNodes AAMetadata;
- LI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, MRI->getType(Regs[i]),
- commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
+ commonAlignment(BaseAlign, Offsets[i] / 8), LI.getAAMetadata(), Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -1339,11 +1344,9 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
Align BaseAlign = getMemOpAlign(SI);
- AAMDNodes AAMetadata;
- SI.getAAMetadata(AAMetadata);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, MRI->getType(Vals[i]),
- commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
+ commonAlignment(BaseAlign, Offsets[i] / 8), SI.getAAMetadata(), nullptr,
SI.getSyncScopeID(), SI.getOrdering());
MIRBuilder.buildStore(Vals[i], Addr, *MMO);
}
@@ -1590,8 +1593,7 @@ bool IRTranslator::translateMemFunc(const CallInst &CI,
Align DstAlign;
Align SrcAlign;
unsigned IsVol =
- cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
- ->getZExtValue();
+ cast<ConstantInt>(CI.getArgOperand(CI.arg_size() - 1))->getZExtValue();
if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
DstAlign = MCI->getDestAlign().valueOrOne();
@@ -1763,6 +1765,10 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_VECREDUCE_UMAX;
case Intrinsic::vector_reduce_umin:
return TargetOpcode::G_VECREDUCE_UMIN;
+ case Intrinsic::lround:
+ return TargetOpcode::G_LROUND;
+ case Intrinsic::llround:
+ return TargetOpcode::G_LLROUND;
}
return Intrinsic::not_intrinsic;
}
@@ -1779,7 +1785,7 @@ bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
// Yes. Let's translate it.
SmallVector<llvm::SrcOp, 4> VRegs;
- for (auto &Arg : CI.arg_operands())
+ for (auto &Arg : CI.args())
VRegs.push_back(getOrCreateVReg(*Arg));
MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
@@ -2172,7 +2178,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
- for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
+ for (unsigned Idx = 0, E = CI.arg_size(); Idx < E; ++Idx) {
Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
@@ -2228,6 +2234,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+ case Intrinsic::trap:
+ case Intrinsic::debugtrap:
+ case Intrinsic::ubsantrap: {
+ StringRef TrapFuncName =
+ CI.getAttributes().getFnAttr("trap-func-name").getValueAsString();
+ if (TrapFuncName.empty())
+ break; // Use the default handling.
+ CallLowering::CallLoweringInfo Info;
+ if (ID == Intrinsic::ubsantrap) {
+ Info.OrigArgs.push_back({getOrCreateVRegs(*CI.getArgOperand(0)),
+ CI.getArgOperand(0)->getType(), 0});
+ }
+ Info.Callee = MachineOperand::CreateES(TrapFuncName.data());
+ Info.CB = &CI;
+ Info.OrigRet = {Register(), Type::getVoidTy(CI.getContext()), 0};
+ return CLI->lowerCall(MIRBuilder, Info);
+ }
#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \
case Intrinsic::INTRINSIC:
#include "llvm/IR/ConstrainedOps.def"
@@ -2321,6 +2344,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (CI.isInlineAsm())
return translateInlineAsm(CI, MIRBuilder);
+ diagnoseDontCall(CI);
+
Intrinsic::ID ID = Intrinsic::not_intrinsic;
if (F && F->isIntrinsic()) {
ID = F->getIntrinsicID();
@@ -2347,7 +2372,7 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
- for (auto &Arg : enumerate(CI.arg_operands())) {
+ for (auto &Arg : enumerate(CI.args())) {
// If this is required to be an immediate, don't materialize it in a
// register.
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
@@ -2360,10 +2385,15 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
- } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
- auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
- if (!MDN) // This was probably an MDString.
- return false;
+ } else if (auto *MDVal = dyn_cast<MetadataAsValue>(Arg.value())) {
+ auto *MD = MDVal->getMetadata();
+ auto *MDN = dyn_cast<MDNode>(MD);
+ if (!MDN) {
+ if (auto *ConstMD = dyn_cast<ConstantAsMetadata>(MD))
+ MDN = MDNode::get(MF->getFunction().getContext(), ConstMD);
+ else // This was probably an MDString.
+ return false;
+ }
MIB.addMetadata(MDN);
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
@@ -2472,32 +2502,19 @@ bool IRTranslator::translateInvoke(const User &U,
if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
return false;
- bool LowerInlineAsm = false;
- if (I.isInlineAsm()) {
- const InlineAsm *IA = cast<InlineAsm>(I.getCalledOperand());
- if (!IA->canThrow()) {
- // Fast path without emitting EH_LABELs.
-
- if (!translateInlineAsm(I, MIRBuilder))
- return false;
-
- MachineBasicBlock *InvokeMBB = &MIRBuilder.getMBB(),
- *ReturnMBB = &getMBB(*ReturnBB);
-
- // Update successor info.
- addSuccessorWithProb(InvokeMBB, ReturnMBB, BranchProbability::getOne());
-
- MIRBuilder.buildBr(*ReturnMBB);
- return true;
- } else {
- LowerInlineAsm = true;
- }
- }
+ bool LowerInlineAsm = I.isInlineAsm();
+ bool NeedEHLabel = true;
+ // If it can't throw then use a fast-path without emitting EH labels.
+ if (LowerInlineAsm)
+ NeedEHLabel = (cast<InlineAsm>(I.getCalledOperand()))->canThrow();
// Emit the actual call, bracketed by EH_LABELs so that the MF knows about
// the region covered by the try.
- MCSymbol *BeginSymbol = Context.createTempSymbol();
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+ MCSymbol *BeginSymbol = nullptr;
+ if (NeedEHLabel) {
+ BeginSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+ }
if (LowerInlineAsm) {
if (!translateInlineAsm(I, MIRBuilder))
@@ -2505,8 +2522,11 @@ bool IRTranslator::translateInvoke(const User &U,
} else if (!translateCallBase(I, MIRBuilder))
return false;
- MCSymbol *EndSymbol = Context.createTempSymbol();
- MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+ MCSymbol *EndSymbol = nullptr;
+ if (NeedEHLabel) {
+ EndSymbol = Context.createTempSymbol();
+ MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+ }
SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -2528,7 +2548,12 @@ bool IRTranslator::translateInvoke(const User &U,
}
InvokeMBB->normalizeSuccProbs();
- MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ if (NeedEHLabel) {
+ assert(BeginSymbol && "Expected a begin symbol!");
+ assert(EndSymbol && "Expected an end symbol!");
+ MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+ }
+
MIRBuilder.buildBr(ReturnMBB);
return true;
}
@@ -2670,6 +2695,28 @@ bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
+bool IRTranslator::translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
+ if (!MF->getTarget().Options.TrapUnreachable)
+ return true;
+
+ auto &UI = cast<UnreachableInst>(U);
+ // We may be able to ignore unreachable behind a noreturn call.
+ if (MF->getTarget().Options.NoTrapAfterNoreturn) {
+ const BasicBlock &BB = *UI.getParent();
+ if (&UI != &BB.front()) {
+ BasicBlock::const_iterator PredI =
+ std::prev(BasicBlock::const_iterator(UI));
+ if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
+ if (Call->doesNotReturn())
+ return true;
+ }
+ }
+ }
+
+ MIRBuilder.buildIntrinsic(Intrinsic::trap, ArrayRef<Register>(), true);
+ return true;
+}
+
bool IRTranslator::translateInsertElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
@@ -2757,14 +2804,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
Register Cmp = getOrCreateVReg(*I.getCompareOperand());
Register NewVal = getOrCreateVReg(*I.getNewValOperand());
- AAMDNodes AAMetadata;
- I.getAAMetadata(AAMetadata);
-
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
*MF->getMachineMemOperand(
MachinePointerInfo(I.getPointerOperand()), Flags, MRI->getType(Cmp),
- getMemOpAlign(I), AAMetadata, nullptr, I.getSyncScopeID(),
+ getMemOpAlign(I), I.getAAMetadata(), nullptr, I.getSyncScopeID(),
I.getSuccessOrdering(), I.getFailureOrdering()));
return true;
}
@@ -2824,14 +2868,11 @@ bool IRTranslator::translateAtomicRMW(const User &U,
break;
}
- AAMDNodes AAMetadata;
- I.getAAMetadata(AAMetadata);
-
MIRBuilder.buildAtomicRMW(
Opcode, Res, Addr, Val,
*MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
Flags, MRI->getType(Val), getMemOpAlign(I),
- AAMetadata, nullptr, I.getSyncScopeID(),
+ I.getAAMetadata(), nullptr, I.getSyncScopeID(),
I.getOrdering()));
return true;
}
@@ -2985,7 +3026,8 @@ bool IRTranslator::translate(const Constant &C, Register Reg) {
return true;
}
-void IRTranslator::finalizeBasicBlock() {
+bool IRTranslator::finalizeBasicBlock(const BasicBlock &BB,
+ MachineBasicBlock &MBB) {
for (auto &BTB : SL->BitTestCases) {
// Emit header first, if it wasn't already emitted.
if (!BTB.Emitted)
@@ -3005,7 +3047,7 @@ void IRTranslator::finalizeBasicBlock() {
// test, and delete the last bit test.
MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// Second-to-last bit-test with contiguous range: fall through to the
// target of the final bit test.
NextMBB = BTB.Cases[j + 1].TargetBB;
@@ -3019,7 +3061,7 @@ void IRTranslator::finalizeBasicBlock() {
emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// We need to record the replacement phi edge here that normally
// happens in emitBitTestCase before we delete the case, otherwise the
// phi edge will be lost.
@@ -3054,6 +3096,176 @@ void IRTranslator::finalizeBasicBlock() {
for (auto &SwCase : SL->SwitchCases)
emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
SL->SwitchCases.clear();
+
+ // Check if we need to generate stack-protector guard checks.
+ StackProtector &SP = getAnalysis<StackProtector>();
+ if (SP.shouldEmitSDCheck(BB)) {
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ bool FunctionBasedInstrumentation =
+ TLI.getSSPStackGuardCheck(*MF->getFunction().getParent());
+ SPDescriptor.initialize(&BB, &MBB, FunctionBasedInstrumentation);
+ }
+ // Handle stack protector.
+ if (SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+ LLVM_DEBUG(dbgs() << "Unimplemented stack protector case\n");
+ return false;
+ } else if (SPDescriptor.shouldEmitStackProtector()) {
+ MachineBasicBlock *ParentMBB = SPDescriptor.getParentMBB();
+ MachineBasicBlock *SuccessMBB = SPDescriptor.getSuccessMBB();
+
+ // Find the split point to split the parent mbb. At the same time copy all
+ // physical registers used in the tail of parent mbb into virtual registers
+ // before the split point and back into physical registers after the split
+ // point. This prevents us needing to deal with Live-ins and many other
+ // register allocation issues caused by us splitting the parent mbb. The
+ // register allocator will clean up said virtual copies later on.
+ MachineBasicBlock::iterator SplitPoint = findSplitPointForStackProtector(
+ ParentMBB, *MF->getSubtarget().getInstrInfo());
+
+ // Splice the terminator of ParentMBB into SuccessMBB.
+ SuccessMBB->splice(SuccessMBB->end(), ParentMBB, SplitPoint,
+ ParentMBB->end());
+
+ // Add compare/jump on neq/jump to the parent BB.
+ if (!emitSPDescriptorParent(SPDescriptor, ParentMBB))
+ return false;
+
+ // CodeGen Failure MBB if we have not codegened it yet.
+ MachineBasicBlock *FailureMBB = SPDescriptor.getFailureMBB();
+ if (FailureMBB->empty()) {
+ if (!emitSPDescriptorFailure(SPDescriptor, FailureMBB))
+ return false;
+ }
+
+ // Clear the Per-BB State.
+ SPDescriptor.resetPerBBState();
+ }
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorParent(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *ParentBB) {
+ CurBuilder->setInsertPt(*ParentBB, ParentBB->end());
+ // First create the loads to the guard/stack slot for the comparison.
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+ LLT PtrMemTy = getLLTForMVT(TLI.getPointerMemTy(*DL));
+
+ MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+ int FI = MFI.getStackProtectorIndex();
+
+ Register Guard;
+ Register StackSlotPtr = CurBuilder->buildFrameIndex(PtrTy, FI).getReg(0);
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
+ Align Align = DL->getPrefTypeAlign(Type::getInt8PtrTy(M.getContext()));
+
+ // Generate code to load the content of the guard slot.
+ Register GuardVal =
+ CurBuilder
+ ->buildLoad(PtrMemTy, StackSlotPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile)
+ .getReg(0);
+
+ if (TLI.useStackGuardXorFP()) {
+ LLVM_DEBUG(dbgs() << "Stack protector xor'ing with FP not yet implemented");
+ return false;
+ }
+
+ // Retrieve guard check function, nullptr if instrumentation is inlined.
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
+ // This path is currently untestable on GlobalISel, since the only platform
+ // that needs this seems to be Windows, and we fall back on that currently.
+ // The code still lives here in case that changes.
+ // Silence warning about unused variable until the code below that uses
+ // 'GuardCheckFn' is enabled.
+ (void)GuardCheckFn;
+ return false;
+#if 0
+ // The target provides a guard check function to validate the guard value.
+ // Generate a call to that function with the content of the guard slot as
+ // argument.
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
+ assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+ ISD::ArgFlagsTy Flags;
+ if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ Flags.setInReg();
+ CallLowering::ArgInfo GuardArgInfo(
+ {GuardVal, FnTy->getParamType(0), {Flags}});
+
+ CallLowering::CallLoweringInfo Info;
+ Info.OrigArgs.push_back(GuardArgInfo);
+ Info.CallConv = GuardCheckFn->getCallingConv();
+ Info.Callee = MachineOperand::CreateGA(GuardCheckFn, 0);
+ Info.OrigRet = {Register(), FnTy->getReturnType()};
+ if (!CLI->lowerCall(MIRBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector check\n");
+ return false;
+ }
+ return true;
+#endif
+ }
+
+ // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
+ // Otherwise, emit a volatile load to retrieve the stack guard value.
+ if (TLI.useLoadStackGuardNode()) {
+ Guard =
+ MRI->createGenericVirtualRegister(LLT::scalar(PtrTy.getSizeInBits()));
+ getStackGuard(Guard, *CurBuilder);
+ } else {
+ // TODO: test using android subtarget when we support @llvm.thread.pointer.
+ const Value *IRGuard = TLI.getSDagStackGuard(M);
+ Register GuardPtr = getOrCreateVReg(*IRGuard);
+
+ Guard = CurBuilder
+ ->buildLoad(PtrMemTy, GuardPtr,
+ MachinePointerInfo::getFixedStack(*MF, FI), Align,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOVolatile)
+ .getReg(0);
+ }
+
+ // Perform the comparison.
+ auto Cmp =
+ CurBuilder->buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Guard, GuardVal);
+ // If the guard/stackslot do not equal, branch to failure MBB.
+ CurBuilder->buildBrCond(Cmp, *SPD.getFailureMBB());
+ // Otherwise branch to success MBB.
+ CurBuilder->buildBr(*SPD.getSuccessMBB());
+ return true;
+}
+
+bool IRTranslator::emitSPDescriptorFailure(StackProtectorDescriptor &SPD,
+ MachineBasicBlock *FailureBB) {
+ CurBuilder->setInsertPt(*FailureBB, FailureBB->end());
+ const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
+
+ const RTLIB::Libcall Libcall = RTLIB::STACKPROTECTOR_CHECK_FAIL;
+ const char *Name = TLI.getLibcallName(Libcall);
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = {Register(), Type::getVoidTy(MF->getFunction().getContext()),
+ 0};
+ if (!CLI->lowerCall(*CurBuilder, Info)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower call to stack protector fail\n");
+ return false;
+ }
+
+ // On PS4, the "return address" must still be within the calling function,
+ // even if it's at the very end, so emit an explicit TRAP here.
+ // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ // WebAssembly needs an unreachable instruction after a non-returning call,
+ // because the function return type can be different from __stack_chk_fail's
+ // return type (void).
+ const TargetMachine &TM = MF->getTarget();
+ if (TM.getTargetTriple().isPS4CPU() || TM.getTargetTriple().isWasm()) {
+ LLVM_DEBUG(dbgs() << "Unhandled trap emission for stack protector fail\n");
+ return false;
+ }
+ return true;
}
void IRTranslator::finalizeFunction() {
@@ -3069,6 +3281,7 @@ void IRTranslator::finalizeFunction() {
EntryBuilder.reset();
CurBuilder.reset();
FuncInfo.clear();
+ SPDescriptor.resetPerFunctionState();
}
/// Returns true if a BasicBlock \p BB within a variadic function contains a
@@ -3079,7 +3292,7 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
// Walk the block backwards, because tail calls usually only appear at the end
// of a block.
- return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
+ return llvm::any_of(llvm::reverse(BB), [](const Instruction &I) {
const auto *CI = dyn_cast<CallInst>(&I);
return CI && CI->isMustTailCall();
});
@@ -3088,8 +3301,6 @@ static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF = &CurMF;
const Function &F = MF->getFunction();
- if (F.empty())
- return false;
GISelCSEAnalysisWrapper &Wrapper =
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
// Set the CSEConfig and run the analysis.
@@ -3257,7 +3468,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}
- finalizeBasicBlock();
+ if (!finalizeBasicBlock(*BB, MBB))
+ return false;
}
#ifndef NDEBUG
WrapperObserver.removeObserver(&Verifier);
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index bb4d41cfd69f..4ae427484945 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -325,7 +325,8 @@ bool InlineAsmLowering::lowerInlineAsm(
return false;
}
- OpInfo.ConstraintVT = TLI->getValueType(DL, OpTy, true).getSimpleVT();
+ OpInfo.ConstraintVT =
+ TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT();
} else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
@@ -334,13 +335,17 @@ bool InlineAsmLowering::lowerInlineAsm(
TLI->getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = TLI->getSimpleValueType(DL, Call.getType());
+ OpInfo.ConstraintVT =
+ TLI->getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
}
++ResNo;
} else {
OpInfo.ConstraintVT = MVT::Other;
}
+ if (OpInfo.ConstraintVT == MVT::i64x8)
+ return false;
+
// Compute the constraint code and ConstraintType to use.
computeConstraintToUse(TLI, OpInfo);
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 75a8f03fcb3f..9b2692486384 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -30,9 +30,9 @@
#include "llvm/Config/config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "instruction-select"
@@ -130,9 +130,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
#endif
+ // Keep track of selected blocks, so we can delete unreachable ones later.
+ DenseSet<MachineBasicBlock *> SelectedBlocks;
for (MachineBasicBlock *MBB : post_order(&MF)) {
ISel->CurMBB = MBB;
+ SelectedBlocks.insert(MBB);
if (MBB->empty())
continue;
@@ -205,6 +208,15 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
if (MBB.empty())
continue;
+ if (!SelectedBlocks.contains(&MBB)) {
+ // This is an unreachable block and therefore hasn't been selected, since
+ // the main selection loop above uses a postorder block traversal.
+ // We delete all the instructions in this block since it's unreachable.
+ MBB.clear();
+ // Don't delete the block in case the block has it's address taken or is
+ // still being referenced by a phi somewhere.
+ continue;
+ }
// Try to find redundant copies b/w vregs of the same register class.
bool ReachedBegin = false;
for (auto MII = std::prev(MBB.end()), Begin = MBB.begin(); !ReachedBegin;) {
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 4fec9e628ddb..dc5a4d8f85aa 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -37,7 +37,7 @@ bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
return VRegVal->Value.getSExtValue() == Value;
return false;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 7c5e4e52ca3e..1f0738a8d9d2 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -153,6 +153,14 @@ LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {
};
}
+LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isScalar() && QueryTy.getSizeInBits() % Size != 0;
+ };
+}
+
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index fc2570ae4b8e..75b7fcb5663a 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -63,6 +63,16 @@ LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
};
}
+LegalizeMutation
+LegalizeMutations::widenScalarOrEltToNextMultipleOf(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned NewEltSizeInBits = alignTo(Ty.getScalarSizeInBits(), Size);
+ return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
+ };
+}
+
LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 635b1445ee07..0ab4a7f64840 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -218,9 +218,6 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
- auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
- WrapperObserver.erasingInstr(*DeadMI);
- };
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
do {
@@ -232,9 +229,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
- LocObserver.checkpoint(false);
+ eraseInstr(MI, MRI, &LocObserver);
continue;
}
@@ -281,10 +276,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
assert(isPreISelGenericOpcode(MI.getOpcode()) &&
"Expecting generic opcode");
if (isTriviallyDead(MI, MRI)) {
- LLVM_DEBUG(dbgs() << MI << "Is dead\n");
- RemoveDeadInstFromLists(&MI);
- MI.eraseFromParentAndMarkDBGValuesForRemoval();
- LocObserver.checkpoint(false);
+ eraseInstr(MI, MRI, &LocObserver);
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
@@ -292,11 +284,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
WrapperObserver)) {
WorkListObserver.printNewInstrs();
- for (auto *DeadMI : DeadInstructions) {
- LLVM_DEBUG(dbgs() << "Is dead: " << *DeadMI);
- RemoveDeadInstFromLists(DeadMI);
- DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
- }
+ eraseInstrs(DeadInstructions, MRI, &LocObserver);
LocObserver.checkpoint(
VerifyDebugLocs ==
DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index c1e0d2549c42..c74bec7dfc0d 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "legalizer"
@@ -497,8 +498,8 @@ static bool isLibCallInTailPosition(MachineInstr &MI,
return false;
// It's not safe to eliminate the sign / zero extension of the return value.
- if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
- CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
+ CallerAttrs.hasRetAttr(Attribute::SExt))
return false;
// Only tail call if the following instruction is a standard return or if we
@@ -2051,10 +2052,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Register SrcReg = MI.getOperand(1).getReg();
- // First ZEXT the input.
- auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
+ // First extend the input.
+ unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
+ MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
+ ? TargetOpcode::G_ANYEXT
+ : TargetOpcode::G_ZEXT;
+ auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
LLT CurTy = MRI.getType(SrcReg);
- if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
+ unsigned NewOpc = MI.getOpcode();
+ if (NewOpc == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
@@ -2062,10 +2068,12 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
MIBSrc = MIRBuilder.buildOr(
WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
+ // Now we know the operand is non-zero, use the more relaxed opcode.
+ NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
}
// Perform the operation at the larger size.
- auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
+ auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
@@ -2427,7 +2435,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(
MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
- TargetOpcode::G_SEXT);
+ TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy, 0);
Observer.changedInstr(MI);
@@ -2662,7 +2670,7 @@ static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
// Now figure out the amount we need to shift to get the target bits.
auto OffsetMask = B.buildConstant(
- IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
+ IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
return B.buildShl(IdxTy, OffsetIdx,
B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
@@ -2886,13 +2894,14 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
MachineMemOperand &MMO = LoadMI.getMMO();
LLT MemTy = MMO.getMemoryType();
MachineFunction &MF = MIRBuilder.getMF();
- if (MemTy.isVector())
- return UnableToLegalize;
unsigned MemSizeInBits = MemTy.getSizeInBits();
unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (MemSizeInBits != MemStoreSizeInBits) {
+ if (MemTy.isVector())
+ return UnableToLegalize;
+
// Promote to a byte-sized load if not loading an integral number of
// bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
@@ -2928,16 +2937,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
return Legalized;
}
- // This load needs splitting into power of 2 sized loads.
- if (DstTy.isVector())
- return UnableToLegalize;
- if (isPowerOf2_32(MemSizeInBits))
- return UnableToLegalize; // Don't know what we're being asked to do.
-
// Big endian lowering not implemented.
if (MIRBuilder.getDataLayout().isBigEndian())
return UnableToLegalize;
+ // This load needs splitting into power of 2 sized loads.
+ //
// Our strategy here is to generate anyextending loads for the smaller
// types up to next power-2 result type, and then combine the two larger
// result values together, before truncating back down to the non-pow-2
@@ -2950,8 +2955,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
// v1 = i24 trunc v5
// By doing this we generate the correct truncate which should get
// combined away as an artifact with a matching extend.
- uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
- uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
+
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ // This load needs splitting into power of 2 sized loads.
+ LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ SmallSplitSize = MemSizeInBits - LargeSplitSize;
+ } else {
+ // This is already a power of 2, but we still need to split this in half.
+ //
+ // Assume we're being asked to decompose an unaligned load.
+ // TODO: If this requires multiple splits, handle them all at once.
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize;
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
+
+ if (MemTy.isVector()) {
+ // TODO: Handle vector extloads
+ if (MemTy != DstTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
+ }
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -2976,9 +3007,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (AnyExtTy == DstTy)
MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
- else {
+ else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
MIRBuilder.buildTrunc(DstReg, {Or});
+ } else {
+ assert(DstTy.isPointer() && "expected pointer");
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+
+ // FIXME: We currently consider this to be illegal for non-integral address
+ // spaces, but we need still need a way to reinterpret the bits.
+ MIRBuilder.buildIntToPtr(DstReg, Or);
}
LoadMI.eraseFromParent();
@@ -2999,13 +3037,13 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
MachineMemOperand &MMO = **StoreMI.memoperands_begin();
LLT MemTy = MMO.getMemoryType();
- if (SrcTy.isVector())
- return UnableToLegalize;
-
unsigned StoreWidth = MemTy.getSizeInBits();
unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
if (StoreWidth != StoreSizeInBits) {
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+
// Promote to a byte-sized store with upper bits zero if not
// storing an integral number of bytes. For example, promote
// TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
@@ -3026,18 +3064,44 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
return Legalized;
}
- if (isPowerOf2_32(MemTy.getSizeInBits()))
- return UnableToLegalize; // Don't know what we're being asked to do.
+ if (MemTy.isVector()) {
+ // TODO: Handle vector trunc stores
+ if (MemTy != SrcTy)
+ return UnableToLegalize;
+
+ // TODO: We can do better than scalarizing the vector and at least split it
+ // in half.
+ return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
+ }
+
+ unsigned MemSizeInBits = MemTy.getSizeInBits();
+ uint64_t LargeSplitSize, SmallSplitSize;
+
+ if (!isPowerOf2_32(MemSizeInBits)) {
+ LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
+ } else {
+ auto &Ctx = MF.getFunction().getContext();
+ if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
+ }
// Extend to the next pow-2. If this store was itself the result of lowering,
// e.g. an s56 store being broken into s32 + s24, we might have a stored type
- // that's wider the stored size.
- const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
+ // that's wider than the stored size.
+ unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
+ const LLT NewSrcTy = LLT::scalar(AnyExtSize);
+
+ if (SrcTy.isPointer()) {
+ const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
+ SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
+ }
+
auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
// Obtain the smaller value by shifting away the larger value.
- uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
- uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
@@ -3045,9 +3109,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
LLT PtrTy = MRI.getType(PtrReg);
auto OffsetCst = MIRBuilder.buildConstant(
LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
- Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
auto SmallPtr =
- MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
+ MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
MachineMemOperand *LargeMMO =
MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
@@ -3424,6 +3487,14 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case G_ROTL:
case G_ROTR:
return lowerRotate(MI);
+ case G_MEMSET:
+ case G_MEMCPY:
+ case G_MEMMOVE:
+ return lowerMemCpyFamily(MI);
+ case G_MEMCPY_INLINE:
+ return lowerMemcpyInline(MI);
+ GISEL_VECREDUCE_CASES_NONSEQ
+ return lowerVectorReduction(MI);
}
}
@@ -4004,9 +4075,7 @@ LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
// If the index is a constant, we can really break this down as you would
// expect, and index into the target size pieces.
int64_t IdxVal;
- auto MaybeCst =
- getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
- /*HandleFConstants*/ false);
+ auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
if (MaybeCst) {
IdxVal = MaybeCst->Value.getSExtValue();
// Avoid out of bounds indexing the pieces.
@@ -4363,6 +4432,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FMAXIMUM:
case G_FSHL:
case G_FSHR:
+ case G_ROTL:
+ case G_ROTR:
case G_FREEZE:
case G_SADDSAT:
case G_SSUBSAT:
@@ -4572,35 +4643,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
return Legalized;
}
-LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
- MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
- unsigned Opc = MI.getOpcode();
- assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
- Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
- "Sequential reductions not expected");
-
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- // The semantics of the normal non-sequential reductions allow us to freely
- // re-associate the operation.
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
-
- if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
- return UnableToLegalize;
-
- SmallVector<Register> SplitSrcs;
- const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
- extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
- SmallVector<Register> PartialReductions;
- for (unsigned Part = 0; Part < NumParts; ++Part) {
- PartialReductions.push_back(
- MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
- }
-
+static unsigned getScalarOpcForReduction(unsigned Opc) {
unsigned ScalarOpc;
switch (Opc) {
case TargetOpcode::G_VECREDUCE_FADD:
@@ -4643,10 +4686,81 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
ScalarOpc = TargetOpcode::G_UMIN;
break;
default:
- LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
+ llvm_unreachable("Unhandled reduction");
+ }
+ return ScalarOpc;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
+ MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
+ Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
+ "Sequential reductions not expected");
+
+ if (TypeIdx != 1)
return UnableToLegalize;
+
+ // The semantics of the normal non-sequential reductions allow us to freely
+ // re-associate the operation.
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ if (NarrowTy.isVector() &&
+ (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
+ return UnableToLegalize;
+
+ unsigned ScalarOpc = getScalarOpcForReduction(Opc);
+ SmallVector<Register> SplitSrcs;
+ // If NarrowTy is a scalar then we're being asked to scalarize.
+ const unsigned NumParts =
+ NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
+ : SrcTy.getNumElements();
+
+ extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
+ if (NarrowTy.isScalar()) {
+ if (DstTy != NarrowTy)
+ return UnableToLegalize; // FIXME: handle implicit extensions.
+
+ if (isPowerOf2_32(NumParts)) {
+ // Generate a tree of scalar operations to reduce the critical path.
+ SmallVector<Register> PartialResults;
+ unsigned NumPartsLeft = NumParts;
+ while (NumPartsLeft > 1) {
+ for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
+ PartialResults.emplace_back(
+ MIRBuilder
+ .buildInstr(ScalarOpc, {NarrowTy},
+ {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
+ .getReg(0));
+ }
+ SplitSrcs = PartialResults;
+ PartialResults.clear();
+ NumPartsLeft = SplitSrcs.size();
+ }
+ assert(SplitSrcs.size() == 1);
+ MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ // If we can't generate a tree, then just do sequential operations.
+ Register Acc = SplitSrcs[0];
+ for (unsigned Idx = 1; Idx < NumParts; ++Idx)
+ Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
+ .getReg(0);
+ MIRBuilder.buildCopy(DstReg, Acc);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ SmallVector<Register> PartialReductions;
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ PartialReductions.push_back(
+ MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
}
+
// If the types involved are powers of 2, we can generate intermediate vector
// ops, before generating a final reduction operation.
if (isPowerOf2_32(SrcTy.getNumElements()) &&
@@ -4706,7 +4820,7 @@ LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
Register InH = MRI.createGenericVirtualRegister(HalfTy);
MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
- if (Amt.isNullValue()) {
+ if (Amt.isZero()) {
MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
MI.eraseFromParent();
return Legalized;
@@ -4815,10 +4929,9 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
const LLT HalfTy = LLT::scalar(NewBitSize);
const LLT CondTy = LLT::scalar(1);
- if (const MachineInstr *KShiftAmt =
- getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
- return narrowScalarShiftByConstant(
- MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
+ if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
+ return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
+ ShiftAmtTy);
}
// TODO: Expand with known bits.
@@ -5224,26 +5337,23 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
if (Ty.isVector())
return UnableToLegalize;
- unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
- unsigned DstSize = Ty.getSizeInBits();
+ unsigned Size = Ty.getSizeInBits();
unsigned NarrowSize = NarrowTy.getSizeInBits();
- if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
+ if (Size % NarrowSize != 0)
return UnableToLegalize;
- unsigned NumDstParts = DstSize / NarrowSize;
- unsigned NumSrcParts = SrcSize / NarrowSize;
+ unsigned NumParts = Size / NarrowSize;
bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
- unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
+ unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
SmallVector<Register, 2> Src1Parts, Src2Parts;
SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
- extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
- extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
+ extractParts(Src1, NarrowTy, NumParts, Src1Parts);
+ extractParts(Src2, NarrowTy, NumParts, Src2Parts);
multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
// Take only high half of registers if this is high mul.
- ArrayRef<Register> DstRegs(
- IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
+ ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
MIRBuilder.buildMerge(DstReg, DstRegs);
MI.eraseFromParent();
return Legalized;
@@ -5951,7 +6061,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
Register Src = MI.getOperand(1).getReg();
Register Amt = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
LLT AmtTy = MRI.getType(Amt);
unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
@@ -5965,6 +6075,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
isPowerOf2_32(EltSizeInBits))
return lowerRotateWithReverseRotate(MI);
+ // If a funnel shift is supported, use it.
+ unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
+ bool IsFShLegal = false;
+ if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
+ LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
+ auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
+ Register R3) {
+ MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
+ MI.eraseFromParent();
+ return Legalized;
+ };
+ // If a funnel shift in the other direction is supported, use it.
+ if (IsFShLegal) {
+ return buildFunnelShift(FShOpc, Dst, Src, Amt);
+ } else if (isPowerOf2_32(EltSizeInBits)) {
+ Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
+ return buildFunnelShift(RevFsh, Dst, Src, Amt);
+ }
+ }
+
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
@@ -6150,7 +6281,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
: APFloat::IEEEdouble(),
- APInt::getNullValue(SrcTy.getSizeInBits()));
+ APInt::getZero(SrcTy.getSizeInBits()));
TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
@@ -7293,3 +7424,563 @@ LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ LLT DstTy = MRI.getType(SrcReg);
+
+ // The source could be a scalar if the IR type was <1 x sN>.
+ if (SrcTy.isScalar()) {
+ if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
+ return UnableToLegalize; // FIXME: handle extension.
+ // This can be just a plain copy.
+ Observer.changingInstr(MI);
+ MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ return UnableToLegalize;;
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
+}
+
+// Returns a list of types to use for memory op lowering in MemOps. A partial
+// port of findOptimalMemOpLowering in TargetLowering.
+static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
+ unsigned Limit, const MemOp &Op,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes,
+ const TargetLowering &TLI) {
+ if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
+ return false;
+
+ LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
+
+ if (Ty == LLT()) {
+ // Use the largest scalar type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ Ty = LLT::scalar(64);
+ if (Op.isFixedDstAlign())
+ while (Op.getDstAlign() < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
+ assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
+ // FIXME: check for the largest legal type we can load/store to.
+ }
+
+ unsigned NumMemOps = 0;
+ uint64_t Size = Op.size();
+ while (Size) {
+ unsigned TySize = Ty.getSizeInBytes();
+ while (TySize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ LLT NewTy = Ty;
+ // FIXME: check for mem op safety and legality of the types. Not all of
+ // SDAGisms map cleanly to GISel concepts.
+ if (NewTy.isVector())
+ NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
+ NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ unsigned NewTySize = NewTy.getSizeInBytes();
+ assert(NewTySize > 0 && "Could not find appropriate type");
+
+ // If the new LLT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
+ MVT VT = getMVTForLLT(Ty);
+ if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(
+ VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ TySize = Size;
+ else {
+ Ty = NewTy;
+ TySize = NewTySize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(Ty);
+ Size -= TySize;
+ }
+
+ return true;
+}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+// Get a vectorized representation of the memset value operand, GISel edition.
+static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NumBits = Ty.getScalarSizeInBits();
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ if (!Ty.isVector() && ValVRegAndVal) {
+ APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
+ APInt SplatVal = APInt::getSplat(NumBits, Scalar);
+ return MIB.buildConstant(Ty, SplatVal).getReg(0);
+ }
+
+ // Extend the byte value to the larger type, and then multiply by a magic
+ // value 0x010101... in order to replicate it across every byte.
+ // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
+ if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
+ return MIB.buildConstant(Ty, 0).getReg(0);
+ }
+
+ LLT ExtType = Ty.getScalarType();
+ auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
+ if (NumBits > 8) {
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ auto MagicMI = MIB.buildConstant(ExtType, Magic);
+ Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
+ }
+
+ // For vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ Val = MIB.buildSplatVector(Ty, Val).getReg(0);
+
+ return Val;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
+ uint64_t KnownLen, Align Alignment,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memset length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+
+ auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
+ bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
+
+ if (!findGISelOptimalMemOpLowering(MemOps, Limit,
+ MemOp::Set(KnownLen, DstAlignCanChange,
+ Alignment,
+ /*IsZeroMemset=*/IsZeroVal,
+ /*IsVolatile=*/IsVolatile),
+ DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ MachineIRBuilder MIB(MI);
+ // Find the largest store and generate the bit pattern for it.
+ LLT LargestTy = MemOps[0];
+ for (unsigned i = 1; i < MemOps.size(); i++)
+ if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
+ LargestTy = MemOps[i];
+
+ // The memset stored value is always defined as an s8, so in order to make it
+ // work with larger store types we need to repeat the bit pattern across the
+ // wider type.
+ Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
+
+ if (!MemSetValue)
+ return UnableToLegalize;
+
+ // Generate the stores. For each store type in the list, we generate the
+ // matching store of that type to the destination address.
+ LLT PtrTy = MRI.getType(Dst);
+ unsigned DstOff = 0;
+ unsigned Size = KnownLen;
+ for (unsigned I = 0; I < MemOps.size(); I++) {
+ LLT Ty = MemOps[I];
+ unsigned TySize = Ty.getSizeInBytes();
+ if (TySize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(I == MemOps.size() - 1 && I != 0);
+ DstOff -= TySize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ Register Value = MemSetValue;
+ if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
+ MVT VT = getMVTForLLT(Ty);
+ MVT LargestVT = getMVTForLLT(LargestTy);
+ if (!LargestTy.isVector() && !Ty.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
+ else
+ Value = getMemsetValue(Val, Ty, MIB);
+ if (!Value)
+ return UnableToLegalize;
+ }
+
+ auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
+
+ Register Ptr = Dst;
+ if (DstOff != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
+ Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ }
+
+ MIB.buildStore(Value, Ptr, *StoreMMO);
+ DstOff += Ty.getSizeInBytes();
+ Size -= TySize;
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
+
+ const auto *MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+ bool IsVolatile = MemOp->isVolatile();
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ // FIXME: support dynamically sized G_MEMCPY_INLINE
+ assert(LenVRegAndVal.hasValue() &&
+ "inline memcpy with dynamic size is not yet supported");
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ Align DstAlign = DstMMO.getBaseAlign();
+ Align SrcAlign = SrcMMO.getBaseAlign();
+
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
+ return lowerMemcpy(MI, Dst, Src, KnownLen,
+ std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
+ IsVolatile);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, uint64_t Limit, Align DstAlign,
+ Align SrcAlign, bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memcpy length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ // FIXME: infer better src pointer alignment like SelectionDAG does here.
+ // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
+ // if the memcpy is in a tail call position.
+
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ IsVolatile),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Now we need to emit a pair of load and stores for each of the types we've
+ // collected. I.e. for each type, generate a load from the source pointer of
+ // that type width, and then generate a corresponding store to the dest buffer
+ // of that value loaded. This can result in a sequence of loads and stores
+ // mixed types, depending on what the target specifies as good types to use.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ unsigned Size = KnownLen;
+ for (auto CopyTy : MemOps) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ if (CopyTy.getSizeInBytes() > Size)
+ CurrOffset -= CopyTy.getSizeInBytes() - Size;
+
+ // Construct MMOs for the accesses.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ Register Offset;
+ if (CurrOffset != 0) {
+ Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ .getReg(0);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ }
+ auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
+
+ // Create the store.
+ Register StorePtr =
+ CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildStore(LdVal, StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ Size -= CopyTy.getSizeInBytes();
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
+ uint64_t KnownLen, Align DstAlign, Align SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memmove length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ Align Alignment = commonAlignment(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
+ // to a bug in it's findOptimalMemOpLowering implementation. For now do the
+ // same thing here.
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit,
+ MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
+ /*IsVolatile*/ true),
+ DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes(), TLI))
+ return UnableToLegalize;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ Align NewAlign = DL.getABITypeAlign(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->hasStackRealignment(MF))
+ while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
+ NewAlign = NewAlign / 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlign(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Memmove requires that we perform the loads first before issuing the stores.
+ // Apart from that, this loop is pretty much doing the same thing as the
+ // memcpy codegen function.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ SmallVector<Register, 16> LoadVals;
+ for (auto CopyTy : MemOps) {
+ // Construct MMO for the load.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
+ }
+ LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+
+ CurrOffset = 0;
+ for (unsigned I = 0; I < MemOps.size(); ++I) {
+ LLT CopyTy = MemOps[I];
+ // Now store the values loaded.
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
+ }
+ MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ const unsigned Opc = MI.getOpcode();
+ // This combine is fairly complex so it's not written with a separate
+ // matcher function.
+ assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
+ Opc == TargetOpcode::G_MEMSET) &&
+ "Expected memcpy like instruction");
+
+ auto MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+
+ Align DstAlign = MemOp->getBaseAlign();
+ Align SrcAlign;
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register Len = MI.getOperand(2).getReg();
+
+ if (Opc != TargetOpcode::G_MEMSET) {
+ assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
+ MemOp = *(++MMOIt);
+ SrcAlign = MemOp->getBaseAlign();
+ }
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
+ if (!LenVRegAndVal)
+ return UnableToLegalize;
+ uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
+
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ bool IsVolatile = MemOp->isVolatile();
+ if (Opc == TargetOpcode::G_MEMCPY_INLINE)
+ return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
+ IsVolatile);
+
+ // Don't try to optimize volatile.
+ if (IsVolatile)
+ return UnableToLegalize;
+
+ if (MaxLen && KnownLen > MaxLen)
+ return UnableToLegalize;
+
+ if (Opc == TargetOpcode::G_MEMCPY) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+ return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
+ IsVolatile);
+ }
+ if (Opc == TargetOpcode::G_MEMMOVE)
+ return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (Opc == TargetOpcode::G_MEMSET)
+ return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
+ return UnableToLegalize;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 3e3141657e87..30697913a6a4 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -352,8 +352,7 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
for (const auto &MMO : MI.memoperands())
- MemDescrs.push_back({MMO->getMemoryType(), 8 * MMO->getAlign().value(),
- MMO->getSuccessOrdering()});
+ MemDescrs.push_back({*MMO});
return getAction({MI.getOpcode(), Types, MemDescrs});
}
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
new file mode 100644
index 000000000000..03dda806cb1e
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -0,0 +1,669 @@
+//===- LoadStoreOpt.cpp ----------- Generic memory optimizations -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the LoadStoreOpt optimization pass.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+
+#define DEBUG_TYPE "loadstore-opt"
+
+using namespace llvm;
+using namespace ore;
+using namespace MIPatternMatch;
+
+STATISTIC(NumStoresMerged, "Number of stores merged");
+
+const unsigned MaxStoreSizeToForm = 128;
+
+char LoadStoreOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+INITIALIZE_PASS_END(LoadStoreOpt, DEBUG_TYPE, "Generic memory optimizations",
+ false, false)
+
+LoadStoreOpt::LoadStoreOpt(std::function<bool(const MachineFunction &)> F)
+ : MachineFunctionPass(ID), DoNotRunPass(F) {}
+
+LoadStoreOpt::LoadStoreOpt()
+ : LoadStoreOpt([](const MachineFunction &) { return false; }) {}
+
+void LoadStoreOpt::init(MachineFunction &MF) {
+ this->MF = &MF;
+ MRI = &MF.getRegInfo();
+ AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+ TLI = MF.getSubtarget().getTargetLowering();
+ LI = MF.getSubtarget().getLegalizerInfo();
+ Builder.setMF(MF);
+ IsPreLegalizer = !MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::Legalized);
+ InstsToErase.clear();
+}
+
+void LoadStoreOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AAResultsWrapperPass>();
+ getSelectionDAGFallbackAnalysisUsage(AU);
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+BaseIndexOffset GISelAddressing::getPointerInfo(Register Ptr,
+ MachineRegisterInfo &MRI) {
+ BaseIndexOffset Info;
+ Register PtrAddRHS;
+ if (!mi_match(Ptr, MRI, m_GPtrAdd(m_Reg(Info.BaseReg), m_Reg(PtrAddRHS)))) {
+ Info.BaseReg = Ptr;
+ Info.IndexReg = Register();
+ Info.IsIndexSignExt = false;
+ return Info;
+ }
+
+ auto RHSCst = getIConstantVRegValWithLookThrough(PtrAddRHS, MRI);
+ if (RHSCst)
+ Info.Offset = RHSCst->Value.getSExtValue();
+
+ // Just recognize a simple case for now. In future we'll need to match
+ // indexing patterns for base + index + constant.
+ Info.IndexReg = PtrAddRHS;
+ Info.IsIndexSignExt = false;
+ return Info;
+}
+
+bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
+ const MachineInstr &MI2,
+ bool &IsAlias,
+ MachineRegisterInfo &MRI) {
+ auto *LdSt1 = dyn_cast<GLoadStore>(&MI1);
+ auto *LdSt2 = dyn_cast<GLoadStore>(&MI2);
+ if (!LdSt1 || !LdSt2)
+ return false;
+
+ BaseIndexOffset BasePtr0 = getPointerInfo(LdSt1->getPointerReg(), MRI);
+ BaseIndexOffset BasePtr1 = getPointerInfo(LdSt2->getPointerReg(), MRI);
+
+ if (!BasePtr0.BaseReg.isValid() || !BasePtr1.BaseReg.isValid())
+ return false;
+
+ int64_t Size1 = LdSt1->getMemSize();
+ int64_t Size2 = LdSt2->getMemSize();
+
+ int64_t PtrDiff;
+ if (BasePtr0.BaseReg == BasePtr1.BaseReg) {
+ PtrDiff = BasePtr1.Offset - BasePtr0.Offset;
+ // If the size of memory access is unknown, do not use it to do analysis.
+ // One example of unknown size memory access is to load/store scalable
+ // vector objects on the stack.
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ if (PtrDiff >= 0 &&
+ Size1 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ IsAlias = !(Size1 <= PtrDiff);
+ return true;
+ }
+ if (PtrDiff < 0 &&
+ Size2 != static_cast<int64_t>(MemoryLocation::UnknownSize)) {
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ IsAlias = !((PtrDiff + Size2) <= 0);
+ return true;
+ }
+ return false;
+ }
+
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ auto *Base0Def = getDefIgnoringCopies(BasePtr0.BaseReg, MRI);
+ auto *Base1Def = getDefIgnoringCopies(BasePtr1.BaseReg, MRI);
+ if (!Base0Def || !Base1Def)
+ return false; // Couldn't tell anything.
+
+
+ if (Base0Def->getOpcode() != Base1Def->getOpcode())
+ return false;
+
+ if (Base0Def->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ MachineFrameInfo &MFI = Base0Def->getMF()->getFrameInfo();
+ // If the bases have the same frame index but we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (Base0Def != Base1Def &&
+ (!MFI.isFixedObjectIndex(Base0Def->getOperand(1).getIndex()) ||
+ !MFI.isFixedObjectIndex(Base1Def->getOperand(1).getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // This implementation is a lot more primitive than the SDAG one for now.
+ // FIXME: what about constant pools?
+ if (Base0Def->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
+ auto GV0 = Base0Def->getOperand(1).getGlobal();
+ auto GV1 = Base1Def->getOperand(1).getGlobal();
+ if (GV0 != GV1) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ // Can't tell anything about aliasing.
+ return false;
+}
+
+bool GISelAddressing::instMayAlias(const MachineInstr &MI,
+ const MachineInstr &Other,
+ MachineRegisterInfo &MRI,
+ AliasAnalysis *AA) {
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ bool IsAtomic;
+ Register BasePtr;
+ int64_t Offset;
+ uint64_t NumBytes;
+ MachineMemOperand *MMO;
+ };
+
+ auto getCharacteristics =
+ [&](const MachineInstr *MI) -> MemUseCharacteristics {
+ if (const auto *LS = dyn_cast<GLoadStore>(MI)) {
+ Register BaseReg;
+ int64_t Offset = 0;
+ // No pre/post-inc addressing modes are considered here, unlike in SDAG.
+ if (!mi_match(LS->getPointerReg(), MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(Offset)))) {
+ BaseReg = LS->getPointerReg();
+ Offset = 0;
+ }
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(
+ LS->getMMO().getMemoryType().getSizeInBytes());
+ return {LS->isVolatile(), LS->isAtomic(), BaseReg,
+ Offset /*base offset*/, Size, &LS->getMMO()};
+ }
+ // FIXME: support recognizing lifetime instructions.
+ // Default.
+ return {false /*isvolatile*/,
+ /*isAtomic*/ false, Register(),
+ (int64_t)0 /*offset*/, 0 /*size*/,
+ (MachineMemOperand *)nullptr};
+ };
+ MemUseCharacteristics MUC0 = getCharacteristics(&MI),
+ MUC1 = getCharacteristics(&Other);
+
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.isValid() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
+
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias.
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+ }
+
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // FIXME: port the alignment based alias analysis from SDAG's isAlias().
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ uint64_t Size0 = MUC0.NumBytes;
+ uint64_t Size1 = MUC1.NumBytes;
+ if (AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() &&
+ Size0 != MemoryLocation::UnknownSize &&
+ Size1 != MemoryLocation::UnknownSize) {
+ // Use alias analysis information.
+ int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
+ int64_t Overlap0 = Size0 + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = Size1 + SrcValOffset1 - MinOffset;
+ if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ MUC0.MMO->getAAInfo()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ MUC1.MMO->getAAInfo())))
+ return false;
+ }
+
+ // Otherwise we have to assume they alias.
+ return true;
+}
+
+/// Returns true if the instruction creates an unavoidable hazard that
+/// forces a boundary between store merge candidates.
+static bool isInstHardMergeHazard(MachineInstr &MI) {
+ return MI.hasUnmodeledSideEffects() || MI.hasOrderedMemoryRef();
+}
+
+bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
+ // Try to merge all the stores in the vector, splitting into separate segments
+ // as necessary.
+ assert(StoresToMerge.size() > 1 && "Expected multiple stores to merge");
+ LLT OrigTy = MRI->getType(StoresToMerge[0]->getValueReg());
+ LLT PtrTy = MRI->getType(StoresToMerge[0]->getPointerReg());
+ unsigned AS = PtrTy.getAddressSpace();
+ // Ensure the legal store info is computed for this address space.
+ initializeStoreMergeTargetInfo(AS);
+ const auto &LegalSizes = LegalStoreSizes[AS];
+
+#ifndef NDEBUG
+ for (auto StoreMI : StoresToMerge)
+ assert(MRI->getType(StoreMI->getValueReg()) == OrigTy);
+#endif
+
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ bool AnyMerged = false;
+ do {
+ unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
+ unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedSize();
+ // Compute the biggest store we can generate to handle the number of stores.
+ unsigned MergeSizeBits;
+ for (MergeSizeBits = MaxSizeBits; MergeSizeBits > 1; MergeSizeBits /= 2) {
+ LLT StoreTy = LLT::scalar(MergeSizeBits);
+ EVT StoreEVT =
+ getApproximateEVTForLLT(StoreTy, DL, MF->getFunction().getContext());
+ if (LegalSizes.size() > MergeSizeBits && LegalSizes[MergeSizeBits] &&
+ TLI->canMergeStoresTo(AS, StoreEVT, *MF) &&
+ (TLI->isTypeLegal(StoreEVT)))
+ break; // We can generate a MergeSize bits store.
+ }
+ if (MergeSizeBits <= OrigTy.getSizeInBits())
+ return AnyMerged; // No greater merge.
+
+ unsigned NumStoresToMerge = MergeSizeBits / OrigTy.getSizeInBits();
+ // Perform the actual merging.
+ SmallVector<GStore *, 8> SingleMergeStores(
+ StoresToMerge.begin(), StoresToMerge.begin() + NumStoresToMerge);
+ AnyMerged |= doSingleStoreMerge(SingleMergeStores);
+ StoresToMerge.erase(StoresToMerge.begin(),
+ StoresToMerge.begin() + NumStoresToMerge);
+ } while (StoresToMerge.size() > 1);
+ return AnyMerged;
+}
+
+bool LoadStoreOpt::isLegalOrBeforeLegalizer(const LegalityQuery &Query,
+ MachineFunction &MF) const {
+ auto Action = LI->getAction(Query).Action;
+ // If the instruction is unsupported, it can't be legalized at all.
+ if (Action == LegalizeActions::Unsupported)
+ return false;
+ return IsPreLegalizer || Action == LegalizeAction::Legal;
+}
+
+bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
+ assert(Stores.size() > 1);
+ // We know that all the stores are consecutive and there are no aliasing
+ // operations in the range. However, the values that are being stored may be
+ // generated anywhere before each store. To ensure we have the values
+ // available, we materialize the wide value and new store at the place of the
+ // final store in the merge sequence.
+ GStore *FirstStore = Stores[0];
+ const unsigned NumStores = Stores.size();
+ LLT SmallTy = MRI->getType(FirstStore->getValueReg());
+ LLT WideValueTy =
+ LLT::scalar(NumStores * SmallTy.getSizeInBits().getFixedSize());
+
+ // For each store, compute pairwise merged debug locs.
+ DebugLoc MergedLoc;
+ for (unsigned AIdx = 0, BIdx = 1; BIdx < NumStores; ++AIdx, ++BIdx)
+ MergedLoc = DILocation::getMergedLocation(Stores[AIdx]->getDebugLoc(),
+ Stores[BIdx]->getDebugLoc());
+ Builder.setInstr(*Stores.back());
+ Builder.setDebugLoc(MergedLoc);
+
+ // If all of the store values are constants, then create a wide constant
+ // directly. Otherwise, we need to generate some instructions to merge the
+ // existing values together into a wider type.
+ SmallVector<APInt, 8> ConstantVals;
+ for (auto Store : Stores) {
+ auto MaybeCst =
+ getIConstantVRegValWithLookThrough(Store->getValueReg(), *MRI);
+ if (!MaybeCst) {
+ ConstantVals.clear();
+ break;
+ }
+ ConstantVals.emplace_back(MaybeCst->Value);
+ }
+
+ Register WideReg;
+ auto *WideMMO =
+ MF->getMachineMemOperand(&FirstStore->getMMO(), 0, WideValueTy);
+ if (ConstantVals.empty()) {
+ // Mimic the SDAG behaviour here and don't try to do anything for unknown
+ // values. In future, we should also support the cases of loads and
+ // extracted vector elements.
+ return false;
+ }
+
+ assert(ConstantVals.size() == NumStores);
+ // Check if our wide constant is legal.
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {WideValueTy}}, *MF))
+ return false;
+ APInt WideConst(WideValueTy.getSizeInBits(), 0);
+ for (unsigned Idx = 0; Idx < ConstantVals.size(); ++Idx) {
+ // Insert the smaller constant into the corresponding position in the
+ // wider one.
+ WideConst.insertBits(ConstantVals[Idx], Idx * SmallTy.getSizeInBits());
+ }
+ WideReg = Builder.buildConstant(WideValueTy, WideConst).getReg(0);
+ auto NewStore =
+ Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
+ (void) NewStore;
+ LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
+ NumStoresMerged += Stores.size();
+
+ MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
+ MORE.emit([&]() {
+ MachineOptimizationRemark R(DEBUG_TYPE, "MergedStore",
+ FirstStore->getDebugLoc(),
+ FirstStore->getParent());
+ R << "Merged " << NV("NumMerged", Stores.size()) << " stores of "
+ << NV("OrigWidth", SmallTy.getSizeInBytes())
+ << " bytes into a single store of "
+ << NV("NewWidth", WideValueTy.getSizeInBytes()) << " bytes";
+ return R;
+ });
+
+ for (auto MI : Stores)
+ InstsToErase.insert(MI);
+ return true;
+}
+
+bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
+ if (C.Stores.size() < 2) {
+ C.reset();
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "Checking store merge candidate with " << C.Stores.size()
+ << " stores, starting with " << *C.Stores[0]);
+ // We know that the stores in the candidate are adjacent.
+ // Now we need to check if any potential aliasing instructions recorded
+ // during the search alias with load/stores added to the candidate after.
+ // For example, if we have the candidate:
+ // C.Stores = [ST1, ST2, ST3, ST4]
+ // and after seeing ST2 we saw a load LD1, which did not alias with ST1 or
+ // ST2, then we would have recorded it into the PotentialAliases structure
+ // with the associated index value of "1". Then we see ST3 and ST4 and add
+ // them to the candidate group. We know that LD1 does not alias with ST1 or
+ // ST2, since we already did that check. However we don't yet know if it
+ // may alias ST3 and ST4, so we perform those checks now.
+ SmallVector<GStore *> StoresToMerge;
+
+ auto DoesStoreAliasWithPotential = [&](unsigned Idx, GStore &CheckStore) {
+ for (auto AliasInfo : reverse(C.PotentialAliases)) {
+ MachineInstr *PotentialAliasOp = AliasInfo.first;
+ unsigned PreCheckedIdx = AliasInfo.second;
+ if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
+ // Need to check this alias.
+ if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
+ AA)) {
+ LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
+ << " detected\n");
+ return true;
+ }
+ } else {
+ // Once our store index is lower than the index associated with the
+ // potential alias, we know that we've already checked for this alias
+ // and all of the earlier potential aliases too.
+ return false;
+ }
+ }
+ return false;
+ };
+ // Start from the last store in the group, and check if it aliases with any
+ // of the potential aliasing operations in the list.
+ for (int StoreIdx = C.Stores.size() - 1; StoreIdx >= 0; --StoreIdx) {
+ auto *CheckStore = C.Stores[StoreIdx];
+ if (DoesStoreAliasWithPotential(StoreIdx, *CheckStore))
+ continue;
+ StoresToMerge.emplace_back(CheckStore);
+ }
+
+ LLVM_DEBUG(dbgs() << StoresToMerge.size()
+ << " stores remaining after alias checks. Merging...\n");
+
+ // Now we've checked for aliasing hazards, merge any stores left.
+ C.reset();
+ if (StoresToMerge.size() < 2)
+ return false;
+ return mergeStores(StoresToMerge);
+}
+
+bool LoadStoreOpt::operationAliasesWithCandidate(MachineInstr &MI,
+ StoreMergeCandidate &C) {
+ if (C.Stores.empty())
+ return false;
+ return llvm::any_of(C.Stores, [&](MachineInstr *OtherMI) {
+ return instMayAlias(MI, *OtherMI, *MRI, AA);
+ });
+}
+
+void LoadStoreOpt::StoreMergeCandidate::addPotentialAlias(MachineInstr &MI) {
+ PotentialAliases.emplace_back(std::make_pair(&MI, Stores.size() - 1));
+}
+
+bool LoadStoreOpt::addStoreToCandidate(GStore &StoreMI,
+ StoreMergeCandidate &C) {
+ // Check if the given store writes to an adjacent address, and other
+ // requirements.
+ LLT ValueTy = MRI->getType(StoreMI.getValueReg());
+ LLT PtrTy = MRI->getType(StoreMI.getPointerReg());
+
+ // Only handle scalars.
+ if (!ValueTy.isScalar())
+ return false;
+
+ // Don't allow truncating stores for now.
+ if (StoreMI.getMemSizeInBits() != ValueTy.getSizeInBits())
+ return false;
+
+ Register StoreAddr = StoreMI.getPointerReg();
+ auto BIO = getPointerInfo(StoreAddr, *MRI);
+ Register StoreBase = BIO.BaseReg;
+ uint64_t StoreOffCst = BIO.Offset;
+ if (C.Stores.empty()) {
+ // This is the first store of the candidate.
+ // If the offset can't possibly allow for a lower addressed store with the
+ // same base, don't bother adding it.
+ if (StoreOffCst < ValueTy.getSizeInBytes())
+ return false;
+ C.BasePtr = StoreBase;
+ C.CurrentLowestOffset = StoreOffCst;
+ C.Stores.emplace_back(&StoreMI);
+ LLVM_DEBUG(dbgs() << "Starting a new merge candidate group with: "
+ << StoreMI);
+ return true;
+ }
+
+ // Check the store is the same size as the existing ones in the candidate.
+ if (MRI->getType(C.Stores[0]->getValueReg()).getSizeInBits() !=
+ ValueTy.getSizeInBits())
+ return false;
+
+ if (MRI->getType(C.Stores[0]->getPointerReg()).getAddressSpace() !=
+ PtrTy.getAddressSpace())
+ return false;
+
+ // There are other stores in the candidate. Check that the store address
+ // writes to the next lowest adjacent address.
+ if (C.BasePtr != StoreBase)
+ return false;
+ if ((C.CurrentLowestOffset - ValueTy.getSizeInBytes()) !=
+ static_cast<uint64_t>(StoreOffCst))
+ return false;
+
+ // This writes to an adjacent address. Allow it.
+ C.Stores.emplace_back(&StoreMI);
+ C.CurrentLowestOffset = C.CurrentLowestOffset - ValueTy.getSizeInBytes();
+ LLVM_DEBUG(dbgs() << "Candidate added store: " << StoreMI);
+ return true;
+}
+
+bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ // Walk through the block bottom-up, looking for merging candidates.
+ StoreMergeCandidate Candidate;
+ for (auto II = MBB.rbegin(), IE = MBB.rend(); II != IE; ++II) {
+ MachineInstr &MI = *II;
+ if (InstsToErase.contains(&MI))
+ continue;
+
+ if (auto StoreMI = dyn_cast<GStore>(&*II)) {
+ // We have a G_STORE. Add it to the candidate if it writes to an adjacent
+ // address.
+ if (!addStoreToCandidate(*StoreMI, Candidate)) {
+ // Store wasn't eligible to be added. May need to record it as a
+ // potential alias.
+ if (operationAliasesWithCandidate(*StoreMI, Candidate)) {
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+ Candidate.addPotentialAlias(*StoreMI);
+ }
+ continue;
+ }
+
+ // If we don't have any stores yet, this instruction can't pose a problem.
+ if (Candidate.Stores.empty())
+ continue;
+
+ // We're dealing with some other kind of instruction.
+ if (isInstHardMergeHazard(MI)) {
+ Changed |= processMergeCandidate(Candidate);
+ Candidate.Stores.clear();
+ continue;
+ }
+
+ if (!MI.mayLoadOrStore())
+ continue;
+
+ if (operationAliasesWithCandidate(MI, Candidate)) {
+ // We have a potential alias, so process the current candidate if we can
+ // and then continue looking for a new candidate.
+ Changed |= processMergeCandidate(Candidate);
+ continue;
+ }
+
+ // Record this instruction as a potential alias for future stores that are
+ // added to the candidate.
+ Candidate.addPotentialAlias(MI);
+ }
+
+ // Process any candidate left after finishing searching the entire block.
+ Changed |= processMergeCandidate(Candidate);
+
+ // Erase instructions now that we're no longer iterating over the block.
+ for (auto *MI : InstsToErase)
+ MI->eraseFromParent();
+ InstsToErase.clear();
+ return Changed;
+}
+
+bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
+ bool Changed = false;
+ for (auto &BB : MF) {
+ Changed |= mergeBlockStores(BB);
+ }
+ return Changed;
+}
+
+void LoadStoreOpt::initializeStoreMergeTargetInfo(unsigned AddrSpace) {
+ // Query the legalizer info to record what store types are legal.
+ // We record this because we don't want to bother trying to merge stores into
+ // illegal ones, which would just result in being split again.
+
+ if (LegalStoreSizes.count(AddrSpace)) {
+ assert(LegalStoreSizes[AddrSpace].any());
+ return; // Already cached sizes for this address space.
+ }
+
+ // Need to reserve at least MaxStoreSizeToForm + 1 bits.
+ BitVector LegalSizes(MaxStoreSizeToForm * 2);
+ const auto &LI = *MF->getSubtarget().getLegalizerInfo();
+ const auto &DL = MF->getFunction().getParent()->getDataLayout();
+ Type *IntPtrIRTy =
+ DL.getIntPtrType(MF->getFunction().getContext(), AddrSpace);
+ LLT PtrTy = getLLTForType(*IntPtrIRTy->getPointerTo(AddrSpace), DL);
+ // We assume that we're not going to be generating any stores wider than
+ // MaxStoreSizeToForm bits for now.
+ for (unsigned Size = 2; Size <= MaxStoreSizeToForm; Size *= 2) {
+ LLT Ty = LLT::scalar(Size);
+ SmallVector<LegalityQuery::MemDesc, 2> MemDescrs(
+ {{Ty, Ty.getSizeInBits(), AtomicOrdering::NotAtomic}});
+ SmallVector<LLT> StoreTys({Ty, PtrTy});
+ LegalityQuery Q(TargetOpcode::G_STORE, StoreTys, MemDescrs);
+ LegalizeActionStep ActionStep = LI.getAction(Q);
+ if (ActionStep.Action == LegalizeActions::Legal)
+ LegalSizes.set(Size);
+ }
+ assert(LegalSizes.any() && "Expected some store sizes to be legal!");
+ LegalStoreSizes[AddrSpace] = LegalSizes;
+}
+
+bool LoadStoreOpt::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Begin memory optimizations for: " << MF.getName()
+ << '\n');
+
+ init(MF);
+ bool Changed = false;
+ Changed |= mergeFunctionStores(MF);
+
+ LegalStoreSizes.clear();
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index d45fdae43f01..a1acc4195840 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -92,9 +92,8 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
// Check if all the users of MI are local.
// We are going to invalidation the list of use operands, so we
// can't use range iterator.
- for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
- MOIt != MOItEnd;) {
- MachineOperand &MOUse = *MOIt++;
+ for (MachineOperand &MOUse :
+ llvm::make_early_inc_range(MRI->use_operands(Reg))) {
// Check if the use is already local.
MachineBasicBlock *InsertMBB;
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 54ac62793b08..fb5ed35c1f72 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -673,7 +673,8 @@ MachineInstrBuilder MachineIRBuilder::buildShuffleVector(const DstOp &Res,
LLT DstTy = Res.getLLTTy(*getMRI());
LLT Src1Ty = Src1.getLLTTy(*getMRI());
LLT Src2Ty = Src2.getLLTTy(*getMRI());
- assert(Src1Ty.getNumElements() + Src2Ty.getNumElements() >= Mask.size());
+ assert((size_t)(Src1Ty.getNumElements() + Src2Ty.getNumElements()) >=
+ Mask.size());
assert(DstTy.getElementType() == Src1Ty.getElementType() &&
DstTy.getElementType() == Src2Ty.getElementType());
(void)DstTy;
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 644a81d8021e..937d94764be1 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -699,11 +699,11 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// Set a sensible insertion point so that subsequent calls to
// MIRBuilder.
MIRBuilder.setMBB(*MBB);
- for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end();
- MII != End;) {
- // MI might be invalidated by the assignment, so move the
- // iterator before hand.
- MachineInstr &MI = *MII++;
+ SmallVector<MachineInstr *> WorkList(
+ make_pointer_range(reverse(MBB->instrs())));
+
+ while (!WorkList.empty()) {
+ MachineInstr &MI = *WorkList.pop_back_val();
// Ignore target-specific post-isel instructions: they should use proper
// regclasses.
@@ -728,18 +728,6 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
"unable to map instruction", MI);
return false;
}
-
- // It's possible the mapping changed control flow, and moved the following
- // instruction to a new block, so figure out the new parent.
- if (MII != End) {
- MachineBasicBlock *NextInstBB = MII->getParent();
- if (NextInstBB != MBB) {
- LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n");
- MBB = NextInstBB;
- MIRBuilder.setMBB(*MBB);
- End = MBB->end();
- }
- }
}
}
diff --git a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index e2a963747101..1a2102e3ef21 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -570,7 +570,7 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
assert((ValueMask & PartMapMask) == PartMapMask &&
"Some partial mappings overlap");
}
- assert(ValueMask.isAllOnesValue() && "Value is not fully mapped");
+ assert(ValueMask.isAllOnes() && "Value is not fully mapped");
return true;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index f64e41b9dccc..1a440c064a59 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -15,7 +15,9 @@
#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -60,6 +62,8 @@ Register llvm::constrainOperandRegClass(
if (ConstrainedReg != Reg) {
MachineBasicBlock::iterator InsertIt(&InsertPt);
MachineBasicBlock &MBB = *InsertPt.getParent();
+ // FIXME: The copy needs to have the classes constrained for its operands.
+ // Use operand's regbank to get the class for old register (Reg).
if (RegMO.isUse()) {
BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),
TII.get(TargetOpcode::COPY), ConstrainedReg)
@@ -99,19 +103,25 @@ Register llvm::constrainOperandRegClass(
// Assume physical registers are properly constrained.
assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
- const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+ const TargetRegisterClass *OpRC = TII.getRegClass(II, OpIdx, &TRI, MF);
// Some of the target independent instructions, like COPY, may not impose any
// register class constraints on some of their operands: If it's a use, we can
// skip constraining as the instruction defining the register would constrain
// it.
- // We can't constrain unallocatable register classes, because we can't create
- // virtual registers for these classes, so we need to let targets handled this
- // case.
- if (RegClass && !RegClass->isAllocatable())
- RegClass = TRI.getConstrainedRegClassForOperand(RegMO, MRI);
+ if (OpRC) {
+ // Obtain the RC from incoming regbank if it is a proper sub-class. Operands
+ // can have multiple regbanks for a superclass that combine different
+ // register types (E.g., AMDGPU's VGPR and AGPR). The regbank ambiguity
+ // resolved by targets during regbankselect should not be overridden.
+ if (const auto *SubRC = TRI.getCommonSubClass(
+ OpRC, TRI.getConstrainedRegClassForOperand(RegMO, MRI)))
+ OpRC = SubRC;
- if (!RegClass) {
+ OpRC = TRI.getAllocatableClass(OpRC);
+ }
+
+ if (!OpRC) {
assert((!isTargetSpecificOpcode(II.getOpcode()) || RegMO.isUse()) &&
"Register class constraint is required unless either the "
"instruction is target independent or the operand is a use");
@@ -127,7 +137,7 @@ Register llvm::constrainOperandRegClass(
// and they never reach this function.
return Reg;
}
- return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass,
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *OpRC,
RegMO);
}
@@ -236,7 +246,7 @@ static void reportGISelDiagnostic(DiagnosticSeverity Severity,
R << (" (in function: " + MF.getName() + ")").str();
if (IsFatal)
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
else
MORE.emit(R);
}
@@ -267,10 +277,10 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
reportGISelFailure(MF, TPC, MORE, R);
}
-Optional<APInt> llvm::getConstantVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<ValueAndVReg> ValAndVReg =
- getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
+Optional<APInt> llvm::getIConstantVRegVal(Register VReg,
+ const MachineRegisterInfo &MRI) {
+ Optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
+ VReg, MRI, /*LookThroughInstrs*/ false);
assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
"Value found while looking through instrs");
if (!ValAndVReg)
@@ -278,41 +288,27 @@ Optional<APInt> llvm::getConstantVRegVal(Register VReg,
return ValAndVReg->Value;
}
-Optional<int64_t> llvm::getConstantVRegSExtVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- Optional<APInt> Val = getConstantVRegVal(VReg, MRI);
+Optional<int64_t>
+llvm::getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI) {
+ Optional<APInt> Val = getIConstantVRegVal(VReg, MRI);
if (Val && Val->getBitWidth() <= 64)
return Val->getSExtValue();
return None;
}
-Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
- Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
- bool HandleFConstant, bool LookThroughAnyExt) {
+namespace {
+
+typedef std::function<bool(const MachineInstr *)> IsOpcodeFn;
+typedef std::function<Optional<APInt>(const MachineInstr *MI)> GetAPCstFn;
+
+Optional<ValueAndVReg> getConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, IsOpcodeFn IsConstantOpcode,
+ GetAPCstFn getAPCstValue, bool LookThroughInstrs = true,
+ bool LookThroughAnyExt = false) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
- auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
- return Opcode == TargetOpcode::G_CONSTANT ||
- (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT);
- };
- auto GetImmediateValue = [HandleFConstant,
- &MRI](const MachineInstr &MI) -> Optional<APInt> {
- const MachineOperand &CstVal = MI.getOperand(1);
- if (!CstVal.isImm() && !CstVal.isCImm() &&
- (!HandleFConstant || !CstVal.isFPImm()))
- return None;
- if (!CstVal.isFPImm()) {
- unsigned BitWidth =
- MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
- : CstVal.getCImm()->getValue();
- assert(Val.getBitWidth() == BitWidth &&
- "Value bitwidth doesn't match definition type");
- return Val;
- }
- return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
- };
- while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
+
+ while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI) &&
LookThroughInstrs) {
switch (MI->getOpcode()) {
case TargetOpcode::G_ANYEXT:
@@ -339,10 +335,10 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return None;
}
}
- if (!MI || !IsConstantOpcode(MI->getOpcode()))
+ if (!MI || !IsConstantOpcode(MI))
return None;
- Optional<APInt> MaybeVal = GetImmediateValue(*MI);
+ Optional<APInt> MaybeVal = getAPCstValue(MI);
if (!MaybeVal)
return None;
APInt &Val = *MaybeVal;
@@ -365,12 +361,65 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return ValueAndVReg{Val, VReg};
}
-const ConstantInt *llvm::getConstantIntVRegVal(Register VReg,
- const MachineRegisterInfo &MRI) {
- MachineInstr *MI = MRI.getVRegDef(VReg);
- if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
- return nullptr;
- return MI->getOperand(1).getCImm();
+bool isIConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_CONSTANT;
+}
+
+bool isFConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ return MI->getOpcode() == TargetOpcode::G_FCONSTANT;
+}
+
+bool isAnyConstant(const MachineInstr *MI) {
+ if (!MI)
+ return false;
+ unsigned Opc = MI->getOpcode();
+ return Opc == TargetOpcode::G_CONSTANT || Opc == TargetOpcode::G_FCONSTANT;
+}
+
+Optional<APInt> getCImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ return None;
+}
+
+Optional<APInt> getCImmOrFPImmAsAPInt(const MachineInstr *MI) {
+ const MachineOperand &CstVal = MI->getOperand(1);
+ if (CstVal.isCImm())
+ return CstVal.getCImm()->getValue();
+ if (CstVal.isFPImm())
+ return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+ return None;
+}
+
+} // end anonymous namespace
+
+Optional<ValueAndVReg> llvm::getIConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ return getConstantVRegValWithLookThrough(VReg, MRI, isIConstant,
+ getCImmAsAPInt, LookThroughInstrs);
+}
+
+Optional<ValueAndVReg> llvm::getAnyConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
+ bool LookThroughAnyExt) {
+ return getConstantVRegValWithLookThrough(
+ VReg, MRI, isAnyConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs,
+ LookThroughAnyExt);
+}
+
+Optional<FPValueAndVReg> llvm::getFConstantVRegValWithLookThrough(
+ Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ auto Reg = getConstantVRegValWithLookThrough(
+ VReg, MRI, isFConstant, getCImmOrFPImmAsAPInt, LookThroughInstrs);
+ if (!Reg)
+ return None;
+ return FPValueAndVReg{getConstantFPVRegVal(Reg->VReg, MRI)->getValueAPF(),
+ Reg->VReg};
}
const ConstantFP *
@@ -437,16 +486,16 @@ APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI) {
- auto MaybeOp2Cst = getConstantVRegVal(Op2, MRI);
+ auto MaybeOp2Cst = getAnyConstantVRegValWithLookThrough(Op2, MRI, false);
if (!MaybeOp2Cst)
return None;
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp1Cst = getAnyConstantVRegValWithLookThrough(Op1, MRI, false);
if (!MaybeOp1Cst)
return None;
- const APInt &C1 = *MaybeOp1Cst;
- const APInt &C2 = *MaybeOp2Cst;
+ const APInt &C1 = MaybeOp1Cst->Value;
+ const APInt &C2 = MaybeOp2Cst->Value;
switch (Opcode) {
default:
break;
@@ -543,6 +592,35 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}
+Optional<MachineInstr *>
+llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
+ const Register Op2,
+ const MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
+ auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
+ if (!SrcVec1)
+ return None;
+ auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
+ if (!SrcVec2)
+ return None;
+
+ const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));
+
+ SmallVector<Register, 16> FoldedElements;
+ for (unsigned Idx = 0, E = SrcVec1->getNumSources(); Idx < E; ++Idx) {
+ auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
+ SrcVec2->getSourceReg(Idx), MRI);
+ if (!MaybeCst)
+ return None;
+ auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
+ FoldedElements.emplace_back(FoldedCstReg);
+ }
+ // Create the new vector constant.
+ auto CstVec =
+ MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
+ return &*CstVec;
+}
+
bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
bool SNaN) {
const MachineInstr *DefMI = MRI.getVRegDef(Val);
@@ -659,7 +737,7 @@ Register llvm::getFunctionLiveInPhysReg(MachineFunction &MF,
Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm,
const MachineRegisterInfo &MRI) {
- auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ auto MaybeOp1Cst = getIConstantVRegVal(Op1, MRI);
if (MaybeOp1Cst) {
switch (Opcode) {
default:
@@ -677,7 +755,7 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
Register Src,
const MachineRegisterInfo &MRI) {
assert(Opcode == TargetOpcode::G_SITOFP || Opcode == TargetOpcode::G_UITOFP);
- if (auto MaybeSrcVal = getConstantVRegVal(Src, MRI)) {
+ if (auto MaybeSrcVal = getIConstantVRegVal(Src, MRI)) {
APFloat DstVal(getFltSemanticForLLT(DstTy));
DstVal.convertFromAPInt(*MaybeSrcVal, Opcode == TargetOpcode::G_SITOFP,
APFloat::rmNearestTiesToEven);
@@ -686,6 +764,37 @@ Optional<APFloat> llvm::ConstantFoldIntToFloat(unsigned Opcode, LLT DstTy,
return None;
}
+Optional<SmallVector<unsigned>>
+llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
+ LLT Ty = MRI.getType(Src);
+ SmallVector<unsigned> FoldedCTLZs;
+ auto tryFoldScalar = [&](Register R) -> Optional<unsigned> {
+ auto MaybeCst = getIConstantVRegVal(R, MRI);
+ if (!MaybeCst)
+ return None;
+ return MaybeCst->countLeadingZeros();
+ };
+ if (Ty.isVector()) {
+ // Try to constant fold each element.
+ auto *BV = getOpcodeDef<GBuildVector>(Src, MRI);
+ if (!BV)
+ return None;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (auto MaybeFold = tryFoldScalar(BV->getSourceReg(SrcIdx))) {
+ FoldedCTLZs.emplace_back(*MaybeFold);
+ continue;
+ }
+ return None;
+ }
+ return FoldedCTLZs;
+ }
+ if (auto MaybeCst = tryFoldScalar(Src)) {
+ FoldedCTLZs.emplace_back(*MaybeCst);
+ return FoldedCTLZs;
+ }
+ return None;
+}
+
bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
GISelKnownBits *KB) {
Optional<DefinitionAndSourceRegister> DefSrcReg =
@@ -707,7 +816,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// shifting the bit off the end is undefined.
// TODO: Constant splat
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
if (*ConstLHS == 1)
return true;
}
@@ -715,7 +824,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
break;
}
case TargetOpcode::G_LSHR: {
- if (auto ConstLHS = getConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
+ if (auto ConstLHS = getIConstantVRegVal(MI.getOperand(1).getReg(), MRI)) {
if (ConstLHS->isSignMask())
return true;
}
@@ -737,7 +846,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Register Reg, const MachineRegisterInfo &MRI,
// zeros is greater than the truncation amount.
const unsigned BitWidth = Ty.getScalarSizeInBits();
for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
- auto Const = getConstantVRegVal(MI.getOperand(I).getReg(), MRI);
+ auto Const = getIConstantVRegVal(MI.getOperand(I).getReg(), MRI);
if (!Const || !Const->zextOrTrunc(BitWidth).isPowerOf2())
return false;
}
@@ -885,53 +994,81 @@ static bool isBuildVectorOp(unsigned Opcode) {
Opcode == TargetOpcode::G_BUILD_VECTOR_TRUNC;
}
-// TODO: Handle mixed undef elements.
-static bool isBuildVectorConstantSplat(const MachineInstr &MI,
- const MachineRegisterInfo &MRI,
- int64_t SplatValue) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return false;
+namespace {
- const unsigned NumOps = MI.getNumOperands();
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- if (!mi_match(Element, MRI, m_SpecificICst(SplatValue)))
- return false;
+Optional<ValueAndVReg> getAnyConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ MachineInstr *MI = getDefIgnoringCopies(VReg, MRI);
+ if (!MI)
+ return None;
+
+ if (!isBuildVectorOp(MI->getOpcode()))
+ return None;
+
+ Optional<ValueAndVReg> SplatValAndReg = None;
+ for (MachineOperand &Op : MI->uses()) {
+ Register Element = Op.getReg();
+ auto ElementValAndReg =
+ getAnyConstantVRegValWithLookThrough(Element, MRI, true, true);
+
+ // If AllowUndef, treat undef as value that will result in a constant splat.
+ if (!ElementValAndReg) {
+ if (AllowUndef && isa<GImplicitDef>(MRI.getVRegDef(Element)))
+ continue;
+ return None;
+ }
+
+ // Record splat value
+ if (!SplatValAndReg)
+ SplatValAndReg = ElementValAndReg;
+
+ // Different constant then the one already recorded, not a constant splat.
+ if (SplatValAndReg->Value != ElementValAndReg->Value)
+ return None;
}
- return true;
+ return SplatValAndReg;
}
+bool isBuildVectorConstantSplat(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ int64_t SplatValue, bool AllowUndef) {
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, AllowUndef))
+ return mi_match(SplatValAndReg->VReg, MRI, m_SpecificICst(SplatValue));
+ return false;
+}
+
+} // end anonymous namespace
+
Optional<int64_t>
llvm::getBuildVectorConstantSplat(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
- if (!isBuildVectorOp(MI.getOpcode()))
- return None;
-
- const unsigned NumOps = MI.getNumOperands();
- Optional<int64_t> Scalar;
- for (unsigned I = 1; I != NumOps; ++I) {
- Register Element = MI.getOperand(I).getReg();
- int64_t ElementValue;
- if (!mi_match(Element, MRI, m_ICst(ElementValue)))
- return None;
- if (!Scalar)
- Scalar = ElementValue;
- else if (*Scalar != ElementValue)
- return None;
- }
+ if (auto SplatValAndReg =
+ getAnyConstantSplat(MI.getOperand(0).getReg(), MRI, false))
+ return getIConstantVRegSExtVal(SplatValAndReg->VReg, MRI);
+ return None;
+}
- return Scalar;
+Optional<FPValueAndVReg> llvm::getFConstantSplat(Register VReg,
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ if (auto SplatValAndReg = getAnyConstantSplat(VReg, MRI, AllowUndef))
+ return getFConstantVRegValWithLookThrough(SplatValAndReg->VReg, MRI);
+ return None;
}
bool llvm::isBuildVectorAllZeros(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, 0);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, 0, AllowUndef);
}
bool llvm::isBuildVectorAllOnes(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
- return isBuildVectorConstantSplat(MI, MRI, -1);
+ const MachineRegisterInfo &MRI,
+ bool AllowUndef) {
+ return isBuildVectorConstantSplat(MI, MRI, -1, AllowUndef);
}
Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
@@ -948,6 +1085,36 @@ Optional<RegOrConstant> llvm::getVectorSplat(const MachineInstr &MI,
return RegOrConstant(Reg);
}
+bool llvm::isConstantOrConstantVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return true;
+ GBuildVector *BV = dyn_cast<GBuildVector>(&MI);
+ if (!BV)
+ return false;
+ for (unsigned SrcIdx = 0; SrcIdx < BV->getNumSources(); ++SrcIdx) {
+ if (getIConstantVRegValWithLookThrough(BV->getSourceReg(SrcIdx), MRI) ||
+ getOpcodeDef<GImplicitDef>(BV->getSourceReg(SrcIdx), MRI))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+Optional<APInt>
+llvm::isConstantOrConstantSplatVector(MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ Register Def = MI.getOperand(0).getReg();
+ if (auto C = getIConstantVRegValWithLookThrough(Def, MRI))
+ return C->Value;
+ auto MaybeCst = getBuildVectorConstantSplat(MI, MRI);
+ if (!MaybeCst)
+ return None;
+ const unsigned ScalarSize = MRI.getType(Def).getScalarSizeInBits();
+ return APInt(ScalarSize, *MaybeCst, true);
+}
+
bool llvm::matchUnaryPredicate(
const MachineRegisterInfo &MRI, Register Reg,
std::function<bool(const Constant *ConstVal)> Match, bool AllowUndefs) {
@@ -1011,3 +1178,59 @@ bool llvm::shouldOptForSize(const MachineBasicBlock &MBB,
return F.hasOptSize() || F.hasMinSize() ||
llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI);
}
+
+/// These artifacts generally don't have any debug users because they don't
+/// directly originate from IR instructions, but instead usually from
+/// legalization. Avoiding checking for debug users improves compile time.
+/// Note that truncates or extends aren't included because they have IR
+/// counterparts which can have debug users after translation.
+static bool shouldSkipDbgValueFor(MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_UNMERGE_VALUES:
+ case TargetOpcode::G_MERGE_VALUES:
+ case TargetOpcode::G_CONCAT_VECTORS:
+ case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
+ case TargetOpcode::G_INSERT:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver,
+ SmallInstListTy &DeadInstChain) {
+ for (MachineOperand &Op : MI.uses()) {
+ if (Op.isReg() && Op.getReg().isVirtual())
+ DeadInstChain.insert(MRI.getVRegDef(Op.getReg()));
+ }
+ LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
+ DeadInstChain.remove(&MI);
+ if (shouldSkipDbgValueFor(MI))
+ MI.eraseFromParent();
+ else
+ MI.eraseFromParentAndMarkDBGValuesForRemoval();
+ if (LocObserver)
+ LocObserver->checkpoint(false);
+}
+
+void llvm::eraseInstrs(ArrayRef<MachineInstr *> DeadInstrs,
+ MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ SmallInstListTy DeadInstChain;
+ for (MachineInstr *MI : DeadInstrs)
+ saveUsesAndErase(*MI, MRI, LocObserver, DeadInstChain);
+
+ while (!DeadInstChain.empty()) {
+ MachineInstr *Inst = DeadInstChain.pop_back_val();
+ if (!isTriviallyDead(*Inst, MRI))
+ continue;
+ saveUsesAndErase(*Inst, MRI, LocObserver, DeadInstChain);
+ }
+}
+
+void llvm::eraseInstr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ LostDebugLocObserver *LocObserver) {
+ return eraseInstrs({&MI}, MRI, LocObserver);
+}
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 4316034371a5..83b8c2d0eacb 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -187,7 +187,7 @@ namespace {
const DataLayout &DL,
OptimizationRemarkEmitter *ORE) :
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
- TripCount(Info.TripCount),
+ ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
LoopDecrement(Info.LoopDecrement),
@@ -202,7 +202,7 @@ namespace {
OptimizationRemarkEmitter *ORE = nullptr;
Loop *L = nullptr;
Module *M = nullptr;
- const SCEV *TripCount = nullptr;
+ const SCEV *ExitCount = nullptr;
Type *CountType = nullptr;
BranchInst *ExitBranch = nullptr;
Value *LoopDecrement = nullptr;
@@ -296,7 +296,7 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
}
assert(
- (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.TripCount) &&
+ (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
"Hardware Loop must have set exit info.");
BasicBlock *Preheader = L->getLoopPreheader();
@@ -365,7 +365,13 @@ static bool CanGenerateTest(Loop *L, Value *Count) {
return false;
};
- if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
+ // Check if Count is a zext.
+ Value *CountBefZext =
+ isa<ZExtInst>(Count) ? cast<ZExtInst>(Count)->getOperand(0) : nullptr;
+
+ if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1) &&
+ !IsCompareZero(ICmp, CountBefZext, 0) &&
+ !IsCompareZero(ICmp, CountBefZext, 1))
return false;
unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
@@ -381,13 +387,18 @@ Value *HardwareLoop::InitLoopCount() {
// loop counter and tests that is not zero?
SCEVExpander SCEVE(SE, DL, "loopcnt");
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
+
+ ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
// If we're trying to use the 'test and set' form of the intrinsic, we need
// to replace a conditional branch that is controlling entry to the loop. It
// is likely (guaranteed?) that the preheader has an unconditional branch to
// the loop header, so also check if it has a single predecessor.
- if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, TripCount,
- SE.getZero(TripCount->getType()))) {
+ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
+ SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
UseLoopGuard |= ForceGuardLoopEntry;
} else
@@ -399,19 +410,19 @@ Value *HardwareLoop::InitLoopCount() {
BasicBlock *Predecessor = BB->getSinglePredecessor();
// If it's not safe to create a while loop then don't force it and create a
// do-while loop instead
- if (!isSafeToExpandAt(TripCount, Predecessor->getTerminator(), SE))
+ if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
UseLoopGuard = false;
else
BB = Predecessor;
}
- if (!isSafeToExpandAt(TripCount, BB->getTerminator(), SE)) {
- LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand TripCount " << *TripCount
- << "\n");
+ if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
+ << *ExitCount << "\n");
return nullptr;
}
- Value *Count = SCEVE.expandCodeFor(TripCount, CountType,
+ Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
BB->getTerminator());
// FIXME: We've expanded Count where we hope to insert the counter setting
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 71e91b445d9a..64e1f4351456 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -341,9 +341,8 @@ void InlineSpiller::collectRegsToSpill() {
if (Original == Reg)
return;
- for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) {
- MachineInstr &MI = *RI++;
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
Register SnipReg = isFullCopyOf(MI, Reg);
if (!isSibling(SnipReg))
continue;
@@ -465,10 +464,8 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
LLVM_DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n');
// Find all spills and copies of VNI.
- for (MachineRegisterInfo::use_instr_nodbg_iterator
- UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end();
- UI != E; ) {
- MachineInstr &MI = *UI++;
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) {
if (!MI.isCopy() && !MI.mayStore())
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
@@ -676,11 +673,7 @@ void InlineSpiller::reMaterializeAll() {
bool anyRemat = false;
for (Register Reg : RegsToSpill) {
LiveInterval &LI = LIS.getInterval(Reg);
- for (MachineRegisterInfo::reg_bundle_iterator
- RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
- RegI != E; ) {
- MachineInstr &MI = *RegI++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
// Debug values are not allowed to affect codegen.
if (MI.isDebugValue())
continue;
@@ -928,6 +921,39 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
// Update the call site info.
if (MI->isCandidateForCallSiteEntry())
MI->getMF()->moveCallSiteInfo(MI, FoldMI);
+
+ // If we've folded a store into an instruction labelled with debug-info,
+ // record a substitution from the old operand to the memory operand. Handle
+ // the simple common case where operand 0 is the one being folded, plus when
+ // the destination operand is also a tied def. More values could be
+ // substituted / preserved with more analysis.
+ if (MI->peekDebugInstrNum() && Ops[0].second == 0) {
+ // Helper lambda.
+ auto MakeSubstitution = [this,FoldMI,MI,&Ops]() {
+ // Substitute old operand zero to the new instructions memory operand.
+ unsigned OldOperandNum = Ops[0].second;
+ unsigned NewNum = FoldMI->getDebugInstrNum();
+ unsigned OldNum = MI->getDebugInstrNum();
+ MF.makeDebugValueSubstitution({OldNum, OldOperandNum},
+ {NewNum, MachineFunction::DebugOperandMemNumber});
+ };
+
+ const MachineOperand &Op0 = MI->getOperand(Ops[0].second);
+ if (Ops.size() == 1 && Op0.isDef()) {
+ MakeSubstitution();
+ } else if (Ops.size() == 2 && Op0.isDef() && MI->getOperand(1).isTied() &&
+ Op0.getReg() == MI->getOperand(1).getReg()) {
+ MakeSubstitution();
+ }
+ } else if (MI->peekDebugInstrNum()) {
+ // This is a debug-labelled instruction, but the operand being folded isn't
+ // at operand zero. Most likely this means it's a load being folded in.
+ // Substitute any register defs from operand zero up to the one being
+ // folded -- past that point, we don't know what the new operand indexes
+ // will be.
+ MF.substituteDebugValuesForInst(*MI, *FoldMI, Ops[0].second);
+ }
+
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
@@ -1038,57 +1064,53 @@ void InlineSpiller::spillAroundUses(Register Reg) {
LiveInterval &OldLI = LIS.getInterval(Reg);
// Iterate over instructions using Reg.
- for (MachineRegisterInfo::reg_bundle_iterator
- RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
- RegI != E; ) {
- MachineInstr *MI = &*(RegI++);
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
// Debug values are not allowed to affect codegen.
- if (MI->isDebugValue()) {
+ if (MI.isDebugValue()) {
// Modify DBG_VALUE now that the value is in a spill slot.
- MachineBasicBlock *MBB = MI->getParent();
- LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << *MI);
- buildDbgValueForSpill(*MBB, MI, *MI, StackSlot, Reg);
+ MachineBasicBlock *MBB = MI.getParent();
+ LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:\t" << MI);
+ buildDbgValueForSpill(*MBB, &MI, MI, StackSlot, Reg);
MBB->erase(MI);
continue;
}
- assert(!MI->isDebugInstr() && "Did not expect to find a use in debug "
+ assert(!MI.isDebugInstr() && "Did not expect to find a use in debug "
"instruction that isn't a DBG_VALUE");
// Ignore copies to/from snippets. We'll delete them.
- if (SnippetCopies.count(MI))
+ if (SnippetCopies.count(&MI))
continue;
// Stack slot accesses may coalesce away.
- if (coalesceStackAccess(MI, Reg))
+ if (coalesceStackAccess(&MI, Reg))
continue;
// Analyze instruction.
SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
- VirtRegInfo RI = AnalyzeVirtRegInBundle(*MI, Reg, &Ops);
+ VirtRegInfo RI = AnalyzeVirtRegInBundle(MI, Reg, &Ops);
// Find the slot index where this instruction reads and writes OldLI.
// This is usually the def slot, except for tied early clobbers.
- SlotIndex Idx = LIS.getInstructionIndex(*MI).getRegSlot();
+ SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
if (SlotIndex::isSameInstr(Idx, VNI->def))
Idx = VNI->def;
// Check for a sibling copy.
- Register SibReg = isFullCopyOf(*MI, Reg);
+ Register SibReg = isFullCopyOf(MI, Reg);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
- LLVM_DEBUG(dbgs() << "Found new snippet copy: " << *MI);
- SnippetCopies.insert(MI);
+ LLVM_DEBUG(dbgs() << "Found new snippet copy: " << MI);
+ SnippetCopies.insert(&MI);
continue;
}
if (RI.Writes) {
- if (hoistSpillInsideBB(OldLI, *MI)) {
+ if (hoistSpillInsideBB(OldLI, MI)) {
// This COPY is now dead, the value is already in the stack slot.
- MI->getOperand(0).setIsDead();
- DeadDefs.push_back(MI);
+ MI.getOperand(0).setIsDead();
+ DeadDefs.push_back(&MI);
continue;
}
} else {
@@ -1108,7 +1130,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
Register NewVReg = Edit->createFrom(Reg);
if (RI.Reads)
- insertReload(NewVReg, Idx, MI);
+ insertReload(NewVReg, Idx, &MI);
// Rewrite instruction operands.
bool hasLiveDef = false;
@@ -1123,12 +1145,12 @@ void InlineSpiller::spillAroundUses(Register Reg) {
hasLiveDef = true;
}
}
- LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n');
+ LLVM_DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << MI << '\n');
// FIXME: Use a second vreg if instruction has no tied ops.
if (RI.Writes)
if (hasLiveDef)
- insertSpill(NewVReg, true, MI);
+ insertSpill(NewVReg, true, &MI);
}
}
@@ -1163,10 +1185,8 @@ void InlineSpiller::spillAll() {
// Finally delete the SnippetCopies.
for (Register Reg : RegsToSpill) {
- for (MachineRegisterInfo::reg_instr_iterator
- RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
- RI != E; ) {
- MachineInstr &MI = *(RI++);
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
LIS.RemoveMachineInstrFromMaps(MI);
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 24a57cc21c57..5a20580e5479 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -95,7 +95,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.setPreservesCFG();
}
private:
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 71bfb1d87d66..9fabcfb1f326 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -308,12 +308,12 @@ public:
}
// Multiplying by one is a no-op.
- if (C.isOneValue()) {
+ if (C.isOne()) {
return *this;
}
// Multiplying by zero removes the coefficient B and defines all bits.
- if (C.isNullValue()) {
+ if (C.isZero()) {
ErrorMSBs = 0;
deleteB();
}
@@ -464,7 +464,7 @@ public:
return *this;
}
- if (C.isNullValue())
+ if (C.isZero())
return *this;
// Test if the result will be zero
@@ -571,7 +571,7 @@ public:
bool isProvenEqualTo(const Polynomial &o) {
// Subtract both polynomials and test if it is fully defined and zero.
Polynomial r = *this - o;
- return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isNullValue());
+ return (r.ErrorMSBs == 0) && (!r.isFirstOrder()) && (r.A.isZero());
}
/// Print the polynomial into a stream.
@@ -1131,6 +1131,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
InstructionCost InterleavedCost;
InstructionCost InstructionCost = 0;
+ const TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency;
// Get the interleave factor
unsigned Factor = InterleavedLoad.size();
@@ -1158,8 +1159,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
// be expected. Also sum the cost of the Instructions beeing left dead.
for (auto &I : Is) {
// Compute the old cost
- InstructionCost +=
- TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+ InstructionCost += TTI.getInstructionCost(I, CostKind);
// The final SVIs are allowed not to be dead, all uses will be replaced
if (SVIs.find(I) != SVIs.end())
@@ -1212,7 +1212,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
Indices.push_back(i);
InterleavedCost = TTI.getInterleavedMemoryOpCost(
Instruction::Load, ILTy, Factor, Indices, InsertionPoint->getAlign(),
- InsertionPoint->getPointerAddressSpace());
+ InsertionPoint->getPointerAddressSpace(), CostKind);
if (InterleavedCost >= InstructionCost) {
return false;
diff --git a/llvm/lib/CodeGen/IntrinsicLowering.cpp b/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 55089d3b90d0..808a79d9792a 100644
--- a/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -453,8 +453,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
// Verify this is a simple bswap.
- if (CI->getNumArgOperands() != 1 ||
- CI->getType() != CI->getArgOperand(0)->getType() ||
+ if (CI->arg_size() != 1 || CI->getType() != CI->getArgOperand(0)->getType() ||
!CI->getType()->isIntegerTy())
return false;
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 37c0b44ea2b2..0d3685d4141c 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -25,10 +25,10 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index dc9907058340..a4eb3094612b 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -11,114 +11,48 @@
/// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information.
///
/// This pass propagates variable locations between basic blocks, resolving
-/// control flow conflicts between them. The problem is much like SSA
-/// construction, where each DBG_VALUE instruction assigns the *value* that
-/// a variable has, and every instruction where the variable is in scope uses
-/// that variable. The resulting map of instruction-to-value is then translated
-/// into a register (or spill) location for each variable over each instruction.
+/// control flow conflicts between them. The problem is SSA construction, where
+/// each debug instruction assigns the *value* that a variable has, and every
+/// instruction where the variable is in scope uses that variable. The resulting
+/// map of instruction-to-value is then translated into a register (or spill)
+/// location for each variable over each instruction.
///
-/// This pass determines which DBG_VALUE dominates which instructions, or if
-/// none do, where values must be merged (like PHI nodes). The added
-/// complication is that because codegen has already finished, a PHI node may
-/// be needed for a variable location to be correct, but no register or spill
-/// slot merges the necessary values. In these circumstances, the variable
-/// location is dropped.
+/// The primary difference from normal SSA construction is that we cannot
+/// _create_ PHI values that contain variable values. CodeGen has already
+/// completed, and we can't alter it just to make debug-info complete. Thus:
+/// we can identify function positions where we would like a PHI value for a
+/// variable, but must search the MachineFunction to see whether such a PHI is
+/// available. If no such PHI exists, the variable location must be dropped.
///
-/// What makes this analysis non-trivial is loops: we cannot tell in advance
-/// whether a variable location is live throughout a loop, or whether its
-/// location is clobbered (or redefined by another DBG_VALUE), without
-/// exploring all the way through.
-///
-/// To make this simpler we perform two kinds of analysis. First, we identify
+/// To achieve this, we perform two kinds of analysis. First, we identify
/// every value defined by every instruction (ignoring those that only move
-/// another value), then compute a map of which values are available for each
-/// instruction. This is stronger than a reaching-def analysis, as we create
-/// PHI values where other values merge.
-///
-/// Secondly, for each variable, we effectively re-construct SSA using each
-/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the
-/// first analysis from the location they refer to. We can then compute the
-/// dominance frontiers of where a variable has a value, and create PHI nodes
-/// where they merge.
-/// This isn't precisely SSA-construction though, because the function shape
-/// is pre-defined. If a variable location requires a PHI node, but no
-/// PHI for the relevant values is present in the function (as computed by the
-/// first analysis), the location must be dropped.
-///
-/// Once both are complete, we can pass back over all instructions knowing:
-/// * What _value_ each variable should contain, either defined by an
-/// instruction or where control flow merges
-/// * What the location of that value is (if any).
-/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when
-/// a value moves location. After this pass runs, all variable locations within
-/// a block should be specified by DBG_VALUEs within that block, allowing
-/// DbgEntityHistoryCalculator to focus on individual blocks.
-///
-/// This pass is able to go fast because the size of the first
-/// reaching-definition analysis is proportional to the working-set size of
-/// the function, which the compiler tries to keep small. (It's also
-/// proportional to the number of blocks). Additionally, we repeatedly perform
-/// the second reaching-definition analysis with only the variables and blocks
-/// in a single lexical scope, exploiting their locality.
-///
-/// Determining where PHIs happen is trickier with this approach, and it comes
-/// to a head in the major problem for LiveDebugValues: is a value live-through
-/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of
-/// facts about a function, however this analysis needs to generate new value
-/// numbers at joins.
-///
-/// To do this, consider a lattice of all definition values, from instructions
-/// and from PHIs. Each PHI is characterised by the RPO number of the block it
-/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B):
-/// with non-PHI values at the top, and any PHI value in the last block (by RPO
-/// order) at the bottom.
-///
-/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below,
-/// "rank" always refers to the former).
-///
-/// At any join, for each register, we consider:
-/// * All incoming values, and
-/// * The PREVIOUS live-in value at this join.
-/// If all incoming values agree: that's the live-in value. If they do not, the
-/// incoming values are ranked according to the partial order, and the NEXT
-/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of
-/// the same rank are ignored as conflicting). If there are no candidate values,
-/// or if the rank of the live-in would be lower than the rank of the current
-/// blocks PHIs, create a new PHI value.
-///
-/// Intuitively: if it's not immediately obvious what value a join should result
-/// in, we iteratively descend from instruction-definitions down through PHI
-/// values, getting closer to the current block each time. If the current block
-/// is a loop head, this ordering is effectively searching outer levels of
-/// loops, to find a value that's live-through the current loop.
+/// another value), then re-compute an SSA-form representation of the
+/// MachineFunction, using value propagation to eliminate any un-necessary
+/// PHI values. This gives us a map of every value computed in the function,
+/// and its location within the register file / stack.
///
-/// If there is no value that's live-through this loop, a PHI is created for
-/// this location instead. We can't use a lower-ranked PHI because by definition
-/// it doesn't dominate the current block. We can't create a PHI value any
-/// earlier, because we risk creating a PHI value at a location where values do
-/// not in fact merge, thus misrepresenting the truth, and not making the true
-/// live-through value for variable locations.
+/// Secondly, for each variable we perform the same analysis, where each debug
+/// instruction is considered a def, and every instruction where the variable
+/// is in lexical scope as a use. Value propagation is used again to eliminate
+/// any un-necessary PHIs. This gives us a map of each variable to the value
+/// it should have in a block.
///
-/// This algorithm applies to both calculating the availability of values in
-/// the first analysis, and the location of variables in the second. However
-/// for the second we add an extra dimension of pain: creating a variable
-/// location PHI is only valid if, for each incoming edge,
-/// * There is a value for the variable on the incoming edge, and
-/// * All the edges have that value in the same register.
-/// Or put another way: we can only create a variable-location PHI if there is
-/// a matching machine-location PHI, each input to which is the variables value
-/// in the predecessor block.
+/// Once both are complete, we have two maps for each block:
+/// * Variables to the values they should have,
+/// * Values to the register / spill slot they are located in.
+/// After which we can marry-up variable values with a location, and emit
+/// DBG_VALUE instructions specifying those locations. Variable locations may
+/// be dropped in this process due to the desired variable value not being
+/// resident in any machine location, or because there is no PHI value in any
+/// location that accurately represents the desired value. The building of
+/// location lists for each block is left to DbgEntityHistoryCalculator.
///
-/// To accommodate this difference, each point on the lattice is split in
-/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately
-/// have a location determined are "definite" PHIs, and no further work is
-/// needed. Otherwise, a location that all non-backedge predecessors agree
-/// on is picked and propagated as a "proposed" PHI value. If that PHI value
-/// is truly live-through, it'll appear on the loop backedges on the next
-/// dataflow iteration, after which the block live-in moves to be a "definite"
-/// PHI. If it's not truly live-through, the variable value will be downgraded
-/// further as we explore the lattice, or remains "proposed" and is considered
-/// invalid once dataflow completes.
+/// This pass is kept efficient because the size of the first SSA problem
+/// is proportional to the working-set size of the function, which the compiler
+/// tries to keep small. (It's also proportional to the number of blocks).
+/// Additionally, we repeatedly perform the second SSA problem analysis with
+/// only the variables and blocks in a single lexical scope, exploiting their
+/// locality.
///
/// ### Terminology
///
@@ -128,15 +62,13 @@
/// contain the appropriate variable value. A value that is a PHI node is
/// occasionally called an mphi.
///
-/// The first dataflow problem is the "machine value location" problem,
+/// The first SSA problem is the "machine value location" problem,
/// because we're determining which machine locations contain which values.
/// The "locations" are constant: what's unknown is what value they contain.
///
-/// The second dataflow problem (the one for variables) is the "variable value
+/// The second SSA problem (the one for variables) is the "variable value
/// problem", because it's determining what values a variable has, rather than
-/// what location those values are placed in. Unfortunately, it's not that
-/// simple, because producing a PHI value always involves picking a location.
-/// This is an imperfection that we just have to accept, at least for now.
+/// what location those values are placed in.
///
/// TODO:
/// Overlapping fragments
@@ -153,9 +85,10 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -192,16 +125,18 @@
#include <cassert>
#include <cstdint>
#include <functional>
+#include <limits.h>
+#include <limits>
#include <queue>
#include <tuple>
#include <utility>
#include <vector>
-#include <limits.h>
-#include <limits>
+#include "InstrRefBasedImpl.h"
#include "LiveDebugValues.h"
using namespace llvm;
+using namespace LiveDebugValues;
// SSAUpdaterImple sets DEBUG_TYPE, change it.
#undef DEBUG_TYPE
@@ -213,730 +148,6 @@ static cl::opt<bool> EmulateOldLDV("emulate-old-livedebugvalues", cl::Hidden,
cl::desc("Act like old LiveDebugValues did"),
cl::init(false));
-namespace {
-
-// The location at which a spilled value resides. It consists of a register and
-// an offset.
-struct SpillLoc {
- unsigned SpillBase;
- StackOffset SpillOffset;
- bool operator==(const SpillLoc &Other) const {
- return std::make_pair(SpillBase, SpillOffset) ==
- std::make_pair(Other.SpillBase, Other.SpillOffset);
- }
- bool operator<(const SpillLoc &Other) const {
- return std::make_tuple(SpillBase, SpillOffset.getFixed(),
- SpillOffset.getScalable()) <
- std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
- Other.SpillOffset.getScalable());
- }
-};
-
-class LocIdx {
- unsigned Location;
-
- // Default constructor is private, initializing to an illegal location number.
- // Use only for "not an entry" elements in IndexedMaps.
- LocIdx() : Location(UINT_MAX) { }
-
-public:
- #define NUM_LOC_BITS 24
- LocIdx(unsigned L) : Location(L) {
- assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
- }
-
- static LocIdx MakeIllegalLoc() {
- return LocIdx();
- }
-
- bool isIllegal() const {
- return Location == UINT_MAX;
- }
-
- uint64_t asU64() const {
- return Location;
- }
-
- bool operator==(unsigned L) const {
- return Location == L;
- }
-
- bool operator==(const LocIdx &L) const {
- return Location == L.Location;
- }
-
- bool operator!=(unsigned L) const {
- return !(*this == L);
- }
-
- bool operator!=(const LocIdx &L) const {
- return !(*this == L);
- }
-
- bool operator<(const LocIdx &Other) const {
- return Location < Other.Location;
- }
-};
-
-class LocIdxToIndexFunctor {
-public:
- using argument_type = LocIdx;
- unsigned operator()(const LocIdx &L) const {
- return L.asU64();
- }
-};
-
-/// Unique identifier for a value defined by an instruction, as a value type.
-/// Casts back and forth to a uint64_t. Probably replacable with something less
-/// bit-constrained. Each value identifies the instruction and machine location
-/// where the value is defined, although there may be no corresponding machine
-/// operand for it (ex: regmasks clobbering values). The instructions are
-/// one-based, and definitions that are PHIs have instruction number zero.
-///
-/// The obvious limits of a 1M block function or 1M instruction blocks are
-/// problematic; but by that point we should probably have bailed out of
-/// trying to analyse the function.
-class ValueIDNum {
- uint64_t BlockNo : 20; /// The block where the def happens.
- uint64_t InstNo : 20; /// The Instruction where the def happens.
- /// One based, is distance from start of block.
- uint64_t LocNo : NUM_LOC_BITS; /// The machine location where the def happens.
-
-public:
- // XXX -- temporarily enabled while the live-in / live-out tables are moved
- // to something more type-y
- ValueIDNum() : BlockNo(0xFFFFF),
- InstNo(0xFFFFF),
- LocNo(0xFFFFFF) { }
-
- ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc)
- : BlockNo(Block), InstNo(Inst), LocNo(Loc) { }
-
- ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc)
- : BlockNo(Block), InstNo(Inst), LocNo(Loc.asU64()) { }
-
- uint64_t getBlock() const { return BlockNo; }
- uint64_t getInst() const { return InstNo; }
- uint64_t getLoc() const { return LocNo; }
- bool isPHI() const { return InstNo == 0; }
-
- uint64_t asU64() const {
- uint64_t TmpBlock = BlockNo;
- uint64_t TmpInst = InstNo;
- return TmpBlock << 44ull | TmpInst << NUM_LOC_BITS | LocNo;
- }
-
- static ValueIDNum fromU64(uint64_t v) {
- uint64_t L = (v & 0x3FFF);
- return {v >> 44ull, ((v >> NUM_LOC_BITS) & 0xFFFFF), L};
- }
-
- bool operator<(const ValueIDNum &Other) const {
- return asU64() < Other.asU64();
- }
-
- bool operator==(const ValueIDNum &Other) const {
- return std::tie(BlockNo, InstNo, LocNo) ==
- std::tie(Other.BlockNo, Other.InstNo, Other.LocNo);
- }
-
- bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
-
- std::string asString(const std::string &mlocname) const {
- return Twine("Value{bb: ")
- .concat(Twine(BlockNo).concat(
- Twine(", inst: ")
- .concat((InstNo ? Twine(InstNo) : Twine("live-in"))
- .concat(Twine(", loc: ").concat(Twine(mlocname)))
- .concat(Twine("}")))))
- .str();
- }
-
- static ValueIDNum EmptyValue;
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
-/// the the value, and Boolean of whether or not it's indirect.
-class DbgValueProperties {
-public:
- DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
- : DIExpr(DIExpr), Indirect(Indirect) {}
-
- /// Extract properties from an existing DBG_VALUE instruction.
- DbgValueProperties(const MachineInstr &MI) {
- assert(MI.isDebugValue());
- DIExpr = MI.getDebugExpression();
- Indirect = MI.getOperand(1).isImm();
- }
-
- bool operator==(const DbgValueProperties &Other) const {
- return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
- }
-
- bool operator!=(const DbgValueProperties &Other) const {
- return !(*this == Other);
- }
-
- const DIExpression *DIExpr;
- bool Indirect;
-};
-
-/// Tracker for what values are in machine locations. Listens to the Things
-/// being Done by various instructions, and maintains a table of what machine
-/// locations have what values (as defined by a ValueIDNum).
-///
-/// There are potentially a much larger number of machine locations on the
-/// target machine than the actual working-set size of the function. On x86 for
-/// example, we're extremely unlikely to want to track values through control
-/// or debug registers. To avoid doing so, MLocTracker has several layers of
-/// indirection going on, with two kinds of ``location'':
-/// * A LocID uniquely identifies a register or spill location, with a
-/// predictable value.
-/// * A LocIdx is a key (in the database sense) for a LocID and a ValueIDNum.
-/// Whenever a location is def'd or used by a MachineInstr, we automagically
-/// create a new LocIdx for a location, but not otherwise. This ensures we only
-/// account for locations that are actually used or defined. The cost is another
-/// vector lookup (of LocID -> LocIdx) over any other implementation. This is
-/// fairly cheap, and the compiler tries to reduce the working-set at any one
-/// time in the function anyway.
-///
-/// Register mask operands completely blow this out of the water; I've just
-/// piled hacks on top of hacks to get around that.
-class MLocTracker {
-public:
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const TargetLowering &TLI;
-
- /// IndexedMap type, mapping from LocIdx to ValueIDNum.
- using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
-
- /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
- /// packed, entries only exist for locations that are being tracked.
- LocToValueType LocIdxToIDNum;
-
- /// "Map" of machine location IDs (i.e., raw register or spill number) to the
- /// LocIdx key / number for that location. There are always at least as many
- /// as the number of registers on the target -- if the value in the register
- /// is not being tracked, then the LocIdx value will be zero. New entries are
- /// appended if a new spill slot begins being tracked.
- /// This, and the corresponding reverse map persist for the analysis of the
- /// whole function, and is necessarying for decoding various vectors of
- /// values.
- std::vector<LocIdx> LocIDToLocIdx;
-
- /// Inverse map of LocIDToLocIdx.
- IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
-
- /// Unique-ification of spill slots. Used to number them -- their LocID
- /// number is the index in SpillLocs minus one plus NumRegs.
- UniqueVector<SpillLoc> SpillLocs;
-
- // If we discover a new machine location, assign it an mphi with this
- // block number.
- unsigned CurBB;
-
- /// Cached local copy of the number of registers the target has.
- unsigned NumRegs;
-
- /// Collection of register mask operands that have been observed. Second part
- /// of pair indicates the instruction that they happened in. Used to
- /// reconstruct where defs happened if we start tracking a location later
- /// on.
- SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
-
- /// Iterator for locations and the values they contain. Dereferencing
- /// produces a struct/pair containing the LocIdx key for this location,
- /// and a reference to the value currently stored. Simplifies the process
- /// of seeking a particular location.
- class MLocIterator {
- LocToValueType &ValueMap;
- LocIdx Idx;
-
- public:
- class value_type {
- public:
- value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) { }
- const LocIdx Idx; /// Read-only index of this location.
- ValueIDNum &Value; /// Reference to the stored value at this location.
- };
-
- MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
- : ValueMap(ValueMap), Idx(Idx) { }
-
- bool operator==(const MLocIterator &Other) const {
- assert(&ValueMap == &Other.ValueMap);
- return Idx == Other.Idx;
- }
-
- bool operator!=(const MLocIterator &Other) const {
- return !(*this == Other);
- }
-
- void operator++() {
- Idx = LocIdx(Idx.asU64() + 1);
- }
-
- value_type operator*() {
- return value_type(Idx, ValueMap[LocIdx(Idx)]);
- }
- };
-
- MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
- const TargetRegisterInfo &TRI, const TargetLowering &TLI)
- : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
- LocIdxToIDNum(ValueIDNum::EmptyValue),
- LocIdxToLocID(0) {
- NumRegs = TRI.getNumRegs();
- reset();
- LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
- assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
-
- // Always track SP. This avoids the implicit clobbering caused by regmasks
- // from affectings its values. (LiveDebugValues disbelieves calls and
- // regmasks that claim to clobber SP).
- Register SP = TLI.getStackPointerRegisterToSaveRestore();
- if (SP) {
- unsigned ID = getLocID(SP, false);
- (void)lookupOrTrackRegister(ID);
- }
- }
-
- /// Produce location ID number for indexing LocIDToLocIdx. Takes the register
- /// or spill number, and flag for whether it's a spill or not.
- unsigned getLocID(Register RegOrSpill, bool isSpill) {
- return (isSpill) ? RegOrSpill.id() + NumRegs - 1 : RegOrSpill.id();
- }
-
- /// Accessor for reading the value at Idx.
- ValueIDNum getNumAtPos(LocIdx Idx) const {
- assert(Idx.asU64() < LocIdxToIDNum.size());
- return LocIdxToIDNum[Idx];
- }
-
- unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
-
- /// Reset all locations to contain a PHI value at the designated block. Used
- /// sometimes for actual PHI values, othertimes to indicate the block entry
- /// value (before any more information is known).
- void setMPhis(unsigned NewCurBB) {
- CurBB = NewCurBB;
- for (auto Location : locations())
- Location.Value = {CurBB, 0, Location.Idx};
- }
-
- /// Load values for each location from array of ValueIDNums. Take current
- /// bbnum just in case we read a value from a hitherto untouched register.
- void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
- CurBB = NewCurBB;
- // Iterate over all tracked locations, and load each locations live-in
- // value into our local index.
- for (auto Location : locations())
- Location.Value = Locs[Location.Idx.asU64()];
- }
-
- /// Wipe any un-necessary location records after traversing a block.
- void reset(void) {
- // We could reset all the location values too; however either loadFromArray
- // or setMPhis should be called before this object is re-used. Just
- // clear Masks, they're definitely not needed.
- Masks.clear();
- }
-
- /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
- /// the information in this pass uninterpretable.
- void clear(void) {
- reset();
- LocIDToLocIdx.clear();
- LocIdxToLocID.clear();
- LocIdxToIDNum.clear();
- //SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from 0
- SpillLocs = decltype(SpillLocs)();
-
- LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
- }
-
- /// Set a locaiton to a certain value.
- void setMLoc(LocIdx L, ValueIDNum Num) {
- assert(L.asU64() < LocIdxToIDNum.size());
- LocIdxToIDNum[L] = Num;
- }
-
- /// Create a LocIdx for an untracked register ID. Initialize it to either an
- /// mphi value representing a live-in, or a recent register mask clobber.
- LocIdx trackRegister(unsigned ID) {
- assert(ID != 0);
- LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
- LocIdxToIDNum.grow(NewIdx);
- LocIdxToLocID.grow(NewIdx);
-
- // Default: it's an mphi.
- ValueIDNum ValNum = {CurBB, 0, NewIdx};
- // Was this reg ever touched by a regmask?
- for (const auto &MaskPair : reverse(Masks)) {
- if (MaskPair.first->clobbersPhysReg(ID)) {
- // There was an earlier def we skipped.
- ValNum = {CurBB, MaskPair.second, NewIdx};
- break;
- }
- }
-
- LocIdxToIDNum[NewIdx] = ValNum;
- LocIdxToLocID[NewIdx] = ID;
- return NewIdx;
- }
-
- LocIdx lookupOrTrackRegister(unsigned ID) {
- LocIdx &Index = LocIDToLocIdx[ID];
- if (Index.isIllegal())
- Index = trackRegister(ID);
- return Index;
- }
-
- /// Record a definition of the specified register at the given block / inst.
- /// This doesn't take a ValueIDNum, because the definition and its location
- /// are synonymous.
- void defReg(Register R, unsigned BB, unsigned Inst) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- ValueIDNum ValueID = {BB, Inst, Idx};
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- /// Set a register to a value number. To be used if the value number is
- /// known in advance.
- void setReg(Register R, ValueIDNum ValueID) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- ValueIDNum readReg(Register R) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = lookupOrTrackRegister(ID);
- return LocIdxToIDNum[Idx];
- }
-
- /// Reset a register value to zero / empty. Needed to replicate the
- /// VarLoc implementation where a copy to/from a register effectively
- /// clears the contents of the source register. (Values can only have one
- /// machine location in VarLocBasedImpl).
- void wipeRegister(Register R) {
- unsigned ID = getLocID(R, false);
- LocIdx Idx = LocIDToLocIdx[ID];
- LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
- }
-
- /// Determine the LocIdx of an existing register.
- LocIdx getRegMLoc(Register R) {
- unsigned ID = getLocID(R, false);
- return LocIDToLocIdx[ID];
- }
-
- /// Record a RegMask operand being executed. Defs any register we currently
- /// track, stores a pointer to the mask in case we have to account for it
- /// later.
- void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID) {
- // Ensure SP exists, so that we don't override it later.
- Register SP = TLI.getStackPointerRegisterToSaveRestore();
-
- // Def any register we track have that isn't preserved. The regmask
- // terminates the liveness of a register, meaning its value can't be
- // relied upon -- we represent this by giving it a new value.
- for (auto Location : locations()) {
- unsigned ID = LocIdxToLocID[Location.Idx];
- // Don't clobber SP, even if the mask says it's clobbered.
- if (ID < NumRegs && ID != SP && MO->clobbersPhysReg(ID))
- defReg(ID, CurBB, InstID);
- }
- Masks.push_back(std::make_pair(MO, InstID));
- }
-
- /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
- LocIdx getOrTrackSpillLoc(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0) {
- SpillID = SpillLocs.insert(L);
- unsigned L = getLocID(SpillID, true);
- LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
- LocIdxToIDNum.grow(Idx);
- LocIdxToLocID.grow(Idx);
- LocIDToLocIdx.push_back(Idx);
- LocIdxToLocID[Idx] = L;
- return Idx;
- } else {
- unsigned L = getLocID(SpillID, true);
- LocIdx Idx = LocIDToLocIdx[L];
- return Idx;
- }
- }
-
- /// Set the value stored in a spill slot.
- void setSpill(SpillLoc L, ValueIDNum ValueID) {
- LocIdx Idx = getOrTrackSpillLoc(L);
- LocIdxToIDNum[Idx] = ValueID;
- }
-
- /// Read whatever value is in a spill slot, or None if it isn't tracked.
- Optional<ValueIDNum> readSpill(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0)
- return None;
-
- unsigned LocID = getLocID(SpillID, true);
- LocIdx Idx = LocIDToLocIdx[LocID];
- return LocIdxToIDNum[Idx];
- }
-
- /// Determine the LocIdx of a spill slot. Return None if it previously
- /// hasn't had a value assigned.
- Optional<LocIdx> getSpillMLoc(SpillLoc L) {
- unsigned SpillID = SpillLocs.idFor(L);
- if (SpillID == 0)
- return None;
- unsigned LocNo = getLocID(SpillID, true);
- return LocIDToLocIdx[LocNo];
- }
-
- /// Return true if Idx is a spill machine location.
- bool isSpill(LocIdx Idx) const {
- return LocIdxToLocID[Idx] >= NumRegs;
- }
-
- MLocIterator begin() {
- return MLocIterator(LocIdxToIDNum, 0);
- }
-
- MLocIterator end() {
- return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
- }
-
- /// Return a range over all locations currently tracked.
- iterator_range<MLocIterator> locations() {
- return llvm::make_range(begin(), end());
- }
-
- std::string LocIdxToName(LocIdx Idx) const {
- unsigned ID = LocIdxToLocID[Idx];
- if (ID >= NumRegs)
- return Twine("slot ").concat(Twine(ID - NumRegs)).str();
- else
- return TRI.getRegAsmName(ID).str();
- }
-
- std::string IDAsString(const ValueIDNum &Num) const {
- std::string DefName = LocIdxToName(Num.getLoc());
- return Num.asString(DefName);
- }
-
- LLVM_DUMP_METHOD
- void dump() {
- for (auto Location : locations()) {
- std::string MLocName = LocIdxToName(Location.Value.getLoc());
- std::string DefName = Location.Value.asString(MLocName);
- dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
- }
- }
-
- LLVM_DUMP_METHOD
- void dump_mloc_map() {
- for (auto Location : locations()) {
- std::string foo = LocIdxToName(Location.Idx);
- dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
- }
- }
-
- /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
- /// information in \pProperties, for variable Var. Don't insert it anywhere,
- /// just return the builder for it.
- MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
- const DbgValueProperties &Properties) {
- DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
- Var.getVariable()->getScope(),
- const_cast<DILocation *>(Var.getInlinedAt()));
- auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
-
- const DIExpression *Expr = Properties.DIExpr;
- if (!MLoc) {
- // No location -> DBG_VALUE $noreg
- MIB.addReg(0, RegState::Debug);
- MIB.addReg(0, RegState::Debug);
- } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
- unsigned LocID = LocIdxToLocID[*MLoc];
- const SpillLoc &Spill = SpillLocs[LocID - NumRegs + 1];
-
- auto *TRI = MF.getSubtarget().getRegisterInfo();
- Expr = TRI->prependOffsetExpression(Expr, DIExpression::ApplyOffset,
- Spill.SpillOffset);
- unsigned Base = Spill.SpillBase;
- MIB.addReg(Base, RegState::Debug);
- MIB.addImm(0);
- } else {
- unsigned LocID = LocIdxToLocID[*MLoc];
- MIB.addReg(LocID, RegState::Debug);
- if (Properties.Indirect)
- MIB.addImm(0);
- else
- MIB.addReg(0, RegState::Debug);
- }
-
- MIB.addMetadata(Var.getVariable());
- MIB.addMetadata(Expr);
- return MIB;
- }
-};
-
-/// Class recording the (high level) _value_ of a variable. Identifies either
-/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
-/// This class also stores meta-information about how the value is qualified.
-/// Used to reason about variable values when performing the second
-/// (DebugVariable specific) dataflow analysis.
-class DbgValue {
-public:
- union {
- /// If Kind is Def, the value number that this value is based on.
- ValueIDNum ID;
- /// If Kind is Const, the MachineOperand defining this value.
- MachineOperand MO;
- /// For a NoVal DbgValue, which block it was generated in.
- unsigned BlockNo;
- };
- /// Qualifiers for the ValueIDNum above.
- DbgValueProperties Properties;
-
- typedef enum {
- Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
- Def, // This value is defined by an inst, or is a PHI value.
- Const, // A constant value contained in the MachineOperand field.
- Proposed, // This is a tentative PHI value, which may be confirmed or
- // invalidated later.
- NoVal // Empty DbgValue, generated during dataflow. BlockNo stores
- // which block this was generated in.
- } KindT;
- /// Discriminator for whether this is a constant or an in-program value.
- KindT Kind;
-
- DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
- : ID(Val), Properties(Prop), Kind(Kind) {
- assert(Kind == Def || Kind == Proposed);
- }
-
- DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
- : BlockNo(BlockNo), Properties(Prop), Kind(Kind) {
- assert(Kind == NoVal);
- }
-
- DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
- : MO(MO), Properties(Prop), Kind(Kind) {
- assert(Kind == Const);
- }
-
- DbgValue(const DbgValueProperties &Prop, KindT Kind)
- : Properties(Prop), Kind(Kind) {
- assert(Kind == Undef &&
- "Empty DbgValue constructor must pass in Undef kind");
- }
-
- void dump(const MLocTracker *MTrack) const {
- if (Kind == Const) {
- MO.dump();
- } else if (Kind == NoVal) {
- dbgs() << "NoVal(" << BlockNo << ")";
- } else if (Kind == Proposed) {
- dbgs() << "VPHI(" << MTrack->IDAsString(ID) << ")";
- } else {
- assert(Kind == Def);
- dbgs() << MTrack->IDAsString(ID);
- }
- if (Properties.Indirect)
- dbgs() << " indir";
- if (Properties.DIExpr)
- dbgs() << " " << *Properties.DIExpr;
- }
-
- bool operator==(const DbgValue &Other) const {
- if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
- return false;
- else if (Kind == Proposed && ID != Other.ID)
- return false;
- else if (Kind == Def && ID != Other.ID)
- return false;
- else if (Kind == NoVal && BlockNo != Other.BlockNo)
- return false;
- else if (Kind == Const)
- return MO.isIdenticalTo(Other.MO);
-
- return true;
- }
-
- bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
-};
-
-/// Types for recording sets of variable fragments that overlap. For a given
-/// local variable, we record all other fragments of that variable that could
-/// overlap it, to reduce search time.
-using FragmentOfVar =
- std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
-using OverlapMap =
- DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
-
-/// Collection of DBG_VALUEs observed when traversing a block. Records each
-/// variable and the value the DBG_VALUE refers to. Requires the machine value
-/// location dataflow algorithm to have run already, so that values can be
-/// identified.
-class VLocTracker {
-public:
- /// Map DebugVariable to the latest Value it's defined to have.
- /// Needs to be a MapVector because we determine order-in-the-input-MIR from
- /// the order in this container.
- /// We only retain the last DbgValue in each block for each variable, to
- /// determine the blocks live-out variable value. The Vars container forms the
- /// transfer function for this block, as part of the dataflow analysis. The
- /// movement of values between locations inside of a block is handled at a
- /// much later stage, in the TransferTracker class.
- MapVector<DebugVariable, DbgValue> Vars;
- DenseMap<DebugVariable, const DILocation *> Scopes;
- MachineBasicBlock *MBB;
-
-public:
- VLocTracker() {}
-
- void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
- Optional<ValueIDNum> ID) {
- assert(MI.isDebugValue() || MI.isDebugRef());
- DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
- MI.getDebugLoc()->getInlinedAt());
- DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
- : DbgValue(Properties, DbgValue::Undef);
-
- // Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
- if (!Result.second)
- Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
- }
-
- void defVar(const MachineInstr &MI, const MachineOperand &MO) {
- // Only DBG_VALUEs can define constant-valued variables.
- assert(MI.isDebugValue());
- DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
- MI.getDebugLoc()->getInlinedAt());
- DbgValueProperties Properties(MI);
- DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
-
- // Attempt insertion; overwrite if it's already mapped.
- auto Result = Vars.insert(std::make_pair(Var, Rec));
- if (!Result.second)
- Result.first->second = Rec;
- Scopes[Var] = MI.getDebugLoc().get();
- }
-};
-
/// Tracker for converting machine value locations and variable values into
/// variable locations (the output of LiveDebugValues), recorded as DBG_VALUEs
/// specifying block live-in locations and transfers within blocks.
@@ -985,12 +196,12 @@ public:
/// between TransferTrackers view of variable locations and MLocTrackers. For
/// example, MLocTracker observes all clobbers, but TransferTracker lazily
/// does not.
- std::vector<ValueIDNum> VarLocs;
+ SmallVector<ValueIDNum, 32> VarLocs;
/// Map from LocIdxes to which DebugVariables are based that location.
/// Mantained while stepping through the block. Not accurate if
/// VarLocs[Idx] != MTracker->LocIdxToIDNum[Idx].
- std::map<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
+ DenseMap<LocIdx, SmallSet<DebugVariable, 4>> ActiveMLocs;
/// Map from DebugVariable to it's current location and qualifying meta
/// information. To be used in conjunction with ActiveMLocs to construct
@@ -1062,6 +273,8 @@ public:
// Map of the preferred location for each value.
std::map<ValueIDNum, LocIdx> ValueToLoc;
+ ActiveMLocs.reserve(VLocs.size());
+ ActiveVLocs.reserve(VLocs.size());
// Produce a map of value numbers to the current machine locs they live
// in. When emulating VarLocBasedImpl, there should only be one
@@ -1088,7 +301,7 @@ public:
for (auto Var : VLocs) {
if (Var.second.Kind == DbgValue::Const) {
PendingDbgValues.push_back(
- emitMOLoc(Var.second.MO, Var.first, Var.second.Properties));
+ emitMOLoc(*Var.second.MO, Var.first, Var.second.Properties));
continue;
}
@@ -1142,7 +355,7 @@ public:
// instruction or similar with an instruction number, where it doesn't
// actually define a new value, instead it moves a value. In case this
// happens, discard.
- if (MTracker->LocIdxToIDNum[L] != Use.ID)
+ if (MTracker->readMLoc(L) != Use.ID)
continue;
// If a different debug instruction defined the variable value / location
@@ -1220,7 +433,6 @@ public:
DIExpression::prepend(Prop.DIExpr, DIExpression::EntryValue);
Register Reg = MTracker->LocIdxToLocID[Num.getLoc()];
MachineOperand MO = MachineOperand::CreateReg(Reg, false);
- MO.setIsDebug(true);
PendingDbgValues.push_back(emitMOLoc(MO, Var, {NewExpr, Prop.Indirect}));
return true;
@@ -1274,12 +486,12 @@ public:
// Check whether our local copy of values-by-location in #VarLocs is out of
// date. Wipe old tracking data for the location if it's been clobbered in
// the meantime.
- if (MTracker->getNumAtPos(NewLoc) != VarLocs[NewLoc.asU64()]) {
+ if (MTracker->readMLoc(NewLoc) != VarLocs[NewLoc.asU64()]) {
for (auto &P : ActiveMLocs[NewLoc]) {
ActiveVLocs.erase(P);
}
ActiveMLocs[NewLoc.asU64()].clear();
- VarLocs[NewLoc.asU64()] = MTracker->getNumAtPos(NewLoc);
+ VarLocs[NewLoc.asU64()] = MTracker->readMLoc(NewLoc);
}
ActiveMLocs[NewLoc].insert(Var);
@@ -1358,6 +570,8 @@ public:
flushDbgValues(Pos, nullptr);
+ // Re-find ActiveMLocIt, iterator could have been invalidated.
+ ActiveMLocIt = ActiveMLocs.find(MLoc);
ActiveMLocIt->second.clear();
}
@@ -1367,21 +581,23 @@ public:
void transferMlocs(LocIdx Src, LocIdx Dst, MachineBasicBlock::iterator Pos) {
// Does Src still contain the value num we expect? If not, it's been
// clobbered in the meantime, and our variable locations are stale.
- if (VarLocs[Src.asU64()] != MTracker->getNumAtPos(Src))
+ if (VarLocs[Src.asU64()] != MTracker->readMLoc(Src))
return;
// assert(ActiveMLocs[Dst].size() == 0);
//^^^ Legitimate scenario on account of un-clobbered slot being assigned to?
- ActiveMLocs[Dst] = ActiveMLocs[Src];
+
+ // Move set of active variables from one location to another.
+ auto MovingVars = ActiveMLocs[Src];
+ ActiveMLocs[Dst] = MovingVars;
VarLocs[Dst.asU64()] = VarLocs[Src.asU64()];
// For each variable based on Src; create a location at Dst.
- for (auto &Var : ActiveMLocs[Src]) {
+ for (auto &Var : MovingVars) {
auto ActiveVLocIt = ActiveVLocs.find(Var);
assert(ActiveVLocIt != ActiveVLocs.end());
ActiveVLocIt->second.Loc = Dst;
- assert(Dst != 0);
MachineInstr *MI =
MTracker->emitLoc(Dst, Var, ActiveVLocIt->second.Properties);
PendingDbgValues.push_back(MI);
@@ -1413,306 +629,245 @@ public:
}
};
-class InstrRefBasedLDV : public LDVImpl {
-private:
- using FragmentInfo = DIExpression::FragmentInfo;
- using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
-
- // Helper while building OverlapMap, a map of all fragments seen for a given
- // DILocalVariable.
- using VarToFragments =
- DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
-
- /// Machine location/value transfer function, a mapping of which locations
- /// are assigned which new values.
- using MLocTransferMap = std::map<LocIdx, ValueIDNum>;
-
- /// Live in/out structure for the variable values: a per-block map of
- /// variables to their values. XXX, better name?
- using LiveIdxT =
- DenseMap<const MachineBasicBlock *, DenseMap<DebugVariable, DbgValue> *>;
-
- using VarAndLoc = std::pair<DebugVariable, DbgValue>;
-
- /// Type for a live-in value: the predecessor block, and its value.
- using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
-
- /// Vector (per block) of a collection (inner smallvector) of live-ins.
- /// Used as the result type for the variable value dataflow problem.
- using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
-
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- const TargetFrameLowering *TFI;
- const MachineFrameInfo *MFI;
- BitVector CalleeSavedRegs;
- LexicalScopes LS;
- TargetPassConfig *TPC;
-
- /// Object to track machine locations as we step through a block. Could
- /// probably be a field rather than a pointer, as it's always used.
- MLocTracker *MTracker;
+//===----------------------------------------------------------------------===//
+// Implementation
+//===----------------------------------------------------------------------===//
- /// Number of the current block LiveDebugValues is stepping through.
- unsigned CurBB;
+ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+ValueIDNum ValueIDNum::TombstoneValue = {UINT_MAX, UINT_MAX, UINT_MAX - 1};
- /// Number of the current instruction LiveDebugValues is evaluating.
- unsigned CurInst;
+#ifndef NDEBUG
+void DbgValue::dump(const MLocTracker *MTrack) const {
+ if (Kind == Const) {
+ MO->dump();
+ } else if (Kind == NoVal) {
+ dbgs() << "NoVal(" << BlockNo << ")";
+ } else if (Kind == VPHI) {
+ dbgs() << "VPHI(" << BlockNo << "," << MTrack->IDAsString(ID) << ")";
+ } else {
+ assert(Kind == Def);
+ dbgs() << MTrack->IDAsString(ID);
+ }
+ if (Properties.Indirect)
+ dbgs() << " indir";
+ if (Properties.DIExpr)
+ dbgs() << " " << *Properties.DIExpr;
+}
+#endif
- /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
- /// steps through a block. Reads the values at each location from the
- /// MLocTracker object.
- VLocTracker *VTracker;
+MLocTracker::MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI,
+ const TargetLowering &TLI)
+ : MF(MF), TII(TII), TRI(TRI), TLI(TLI),
+ LocIdxToIDNum(ValueIDNum::EmptyValue), LocIdxToLocID(0) {
+ NumRegs = TRI.getNumRegs();
+ reset();
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ assert(NumRegs < (1u << NUM_LOC_BITS)); // Detect bit packing failure
+
+ // Always track SP. This avoids the implicit clobbering caused by regmasks
+ // from affectings its values. (LiveDebugValues disbelieves calls and
+ // regmasks that claim to clobber SP).
+ Register SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (SP) {
+ unsigned ID = getLocID(SP);
+ (void)lookupOrTrackRegister(ID);
+
+ for (MCRegAliasIterator RAI(SP, &TRI, true); RAI.isValid(); ++RAI)
+ SPAliases.insert(*RAI);
+ }
+
+ // Build some common stack positions -- full registers being spilt to the
+ // stack.
+ StackSlotIdxes.insert({{8, 0}, 0});
+ StackSlotIdxes.insert({{16, 0}, 1});
+ StackSlotIdxes.insert({{32, 0}, 2});
+ StackSlotIdxes.insert({{64, 0}, 3});
+ StackSlotIdxes.insert({{128, 0}, 4});
+ StackSlotIdxes.insert({{256, 0}, 5});
+ StackSlotIdxes.insert({{512, 0}, 6});
+
+ // Traverse all the subregister idxes, and ensure there's an index for them.
+ // Duplicates are no problem: we're interested in their position in the
+ // stack slot, we don't want to type the slot.
+ for (unsigned int I = 1; I < TRI.getNumSubRegIndices(); ++I) {
+ unsigned Size = TRI.getSubRegIdxSize(I);
+ unsigned Offs = TRI.getSubRegIdxOffset(I);
+ unsigned Idx = StackSlotIdxes.size();
+
+ // Some subregs have -1, -2 and so forth fed into their fields, to mean
+ // special backend things. Ignore those.
+ if (Size > 60000 || Offs > 60000)
+ continue;
- /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
- /// between locations during stepping, creates new DBG_VALUEs when values move
- /// location.
- TransferTracker *TTracker;
+ StackSlotIdxes.insert({{Size, Offs}, Idx});
+ }
- /// Blocks which are artificial, i.e. blocks which exclusively contain
- /// instructions without DebugLocs, or with line 0 locations.
- SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+ for (auto &Idx : StackSlotIdxes)
+ StackIdxesToPos[Idx.second] = Idx.first;
- // Mapping of blocks to and from their RPOT order.
- DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
- DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
- DenseMap<unsigned, unsigned> BBNumToRPO;
+ NumSlotIdxes = StackSlotIdxes.size();
+}
- /// Pair of MachineInstr, and its 1-based offset into the containing block.
- using InstAndNum = std::pair<const MachineInstr *, unsigned>;
- /// Map from debug instruction number to the MachineInstr labelled with that
- /// number, and its location within the function. Used to transform
- /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
- std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+LocIdx MLocTracker::trackRegister(unsigned ID) {
+ assert(ID != 0);
+ LocIdx NewIdx = LocIdx(LocIdxToIDNum.size());
+ LocIdxToIDNum.grow(NewIdx);
+ LocIdxToLocID.grow(NewIdx);
+
+ // Default: it's an mphi.
+ ValueIDNum ValNum = {CurBB, 0, NewIdx};
+ // Was this reg ever touched by a regmask?
+ for (const auto &MaskPair : reverse(Masks)) {
+ if (MaskPair.first->clobbersPhysReg(ID)) {
+ // There was an earlier def we skipped.
+ ValNum = {CurBB, MaskPair.second, NewIdx};
+ break;
+ }
+ }
- /// Record of where we observed a DBG_PHI instruction.
- class DebugPHIRecord {
- public:
- uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
- MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
- ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
- LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+ LocIdxToIDNum[NewIdx] = ValNum;
+ LocIdxToLocID[NewIdx] = ID;
+ return NewIdx;
+}
- operator unsigned() const { return InstrNum; }
- };
+void MLocTracker::writeRegMask(const MachineOperand *MO, unsigned CurBB,
+ unsigned InstID) {
+ // Def any register we track have that isn't preserved. The regmask
+ // terminates the liveness of a register, meaning its value can't be
+ // relied upon -- we represent this by giving it a new value.
+ for (auto Location : locations()) {
+ unsigned ID = LocIdxToLocID[Location.Idx];
+ // Don't clobber SP, even if the mask says it's clobbered.
+ if (ID < NumRegs && !SPAliases.count(ID) && MO->clobbersPhysReg(ID))
+ defReg(ID, CurBB, InstID);
+ }
+ Masks.push_back(std::make_pair(MO, InstID));
+}
- /// Map from instruction numbers defined by DBG_PHIs to a record of what that
- /// DBG_PHI read and where. Populated and edited during the machine value
- /// location problem -- we use LLVMs SSA Updater to fix changes by
- /// optimizations that destroy PHI instructions.
- SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
-
- // Map of overlapping variable fragments.
- OverlapMap OverlapFragments;
- VarToFragments SeenFragments;
-
- /// Tests whether this instruction is a spill to a stack slot.
- bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
-
- /// Decide if @MI is a spill instruction and return true if it is. We use 2
- /// criteria to make this decision:
- /// - Is this instruction a store to a spill slot?
- /// - Is there a register operand that is both used and killed?
- /// TODO: Store optimization can fold spills into other stores (including
- /// other spills). We do not handle this yet (more than one memory operand).
- bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
- unsigned &Reg);
-
- /// If a given instruction is identified as a spill, return the spill slot
- /// and set \p Reg to the spilled register.
- Optional<SpillLoc> isRestoreInstruction(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg);
-
- /// Given a spill instruction, extract the register and offset used to
- /// address the spill slot in a target independent way.
- SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
-
- /// Observe a single instruction while stepping through a block.
- void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
- ValueIDNum **MLiveIns = nullptr);
-
- /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferDebugValue(const MachineInstr &MI);
-
- /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns);
-
- /// Stores value-information about where this PHI occurred, and what
- /// instruction number is associated with it.
- /// \returns true if MI was recognized and processed.
- bool transferDebugPHI(MachineInstr &MI);
-
- /// Examines whether \p MI is copy instruction, and notifies trackers.
- /// \returns true if MI was recognized and processed.
- bool transferRegisterCopy(MachineInstr &MI);
-
- /// Examines whether \p MI is stack spill or restore instruction, and
- /// notifies trackers. \returns true if MI was recognized and processed.
- bool transferSpillOrRestoreInst(MachineInstr &MI);
-
- /// Examines \p MI for any registers that it defines, and notifies trackers.
- void transferRegisterDef(MachineInstr &MI);
-
- /// Copy one location to the other, accounting for movement of subregisters
- /// too.
- void performCopy(Register Src, Register Dst);
-
- void accumulateFragmentMap(MachineInstr &MI);
-
- /// Determine the machine value number referred to by (potentially several)
- /// DBG_PHI instructions. Block duplication and tail folding can duplicate
- /// DBG_PHIs, shifting the position where values in registers merge, and
- /// forming another mini-ssa problem to solve.
- /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
- /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
- /// \returns The machine value number at position Here, or None.
- Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
- ValueIDNum **MLiveOuts,
- ValueIDNum **MLiveIns, MachineInstr &Here,
- uint64_t InstrNum);
-
- /// Step through the function, recording register definitions and movements
- /// in an MLocTracker. Convert the observations into a per-block transfer
- /// function in \p MLocTransfer, suitable for using with the machine value
- /// location dataflow problem.
- void
- produceMLocTransferFunction(MachineFunction &MF,
- SmallVectorImpl<MLocTransferMap> &MLocTransfer,
- unsigned MaxNumBlocks);
-
- /// Solve the machine value location dataflow problem. Takes as input the
- /// transfer functions in \p MLocTransfer. Writes the output live-in and
- /// live-out arrays to the (initialized to zero) multidimensional arrays in
- /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
- /// number, the inner by LocIdx.
- void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
- SmallVectorImpl<MLocTransferMap> &MLocTransfer);
-
- /// Perform a control flow join (lattice value meet) of the values in machine
- /// locations at \p MBB. Follows the algorithm described in the file-comment,
- /// reading live-outs of predecessors from \p OutLocs, the current live ins
- /// from \p InLocs, and assigning the newly computed live ins back into
- /// \p InLocs. \returns two bools -- the first indicates whether a change
- /// was made, the second whether a lattice downgrade occurred. If the latter
- /// is true, revisiting this block is necessary.
- std::tuple<bool, bool>
- mlocJoin(MachineBasicBlock &MBB,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs);
-
- /// Solve the variable value dataflow problem, for a single lexical scope.
- /// Uses the algorithm from the file comment to resolve control flow joins,
- /// although there are extra hacks, see vlocJoin. Reads the
- /// locations of values from the \p MInLocs and \p MOutLocs arrays (see
- /// mlocDataflow) and reads the variable values transfer function from
- /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally,
- /// with the live-ins permanently stored to \p Output once the fixedpoint is
- /// reached.
- /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
- /// that we should be tracking.
- /// \p AssignBlocks contains the set of blocks that aren't in \p Scope, but
- /// which do contain DBG_VALUEs, which VarLocBasedImpl tracks locations
- /// through.
- void vlocDataflow(const LexicalScope *Scope, const DILocation *DILoc,
- const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
- SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
- LiveInsT &Output, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- SmallVectorImpl<VLocTracker> &AllTheVLocs);
-
- /// Compute the live-ins to a block, considering control flow merges according
- /// to the method in the file comment. Live out and live in variable values
- /// are stored in \p VLOCOutLocs and \p VLOCInLocs. The live-ins for \p MBB
- /// are computed and stored into \p VLOCInLocs. \returns true if the live-ins
- /// are modified.
- /// \p InLocsT Output argument, storage for calculated live-ins.
- /// \returns two bools -- the first indicates whether a change
- /// was made, the second whether a lattice downgrade occurred. If the latter
- /// is true, revisiting this block is necessary.
- std::tuple<bool, bool>
- vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
- SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited,
- unsigned BBNum, const SmallSet<DebugVariable, 4> &AllVars,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
- SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
- DenseMap<DebugVariable, DbgValue> &InLocsT);
-
- /// Continue exploration of the variable-value lattice, as explained in the
- /// file-level comment. \p OldLiveInLocation contains the current
- /// exploration position, from which we need to descend further. \p Values
- /// contains the set of live-in values, \p CurBlockRPONum the RPO number of
- /// the current block, and \p CandidateLocations a set of locations that
- /// should be considered as PHI locations, if we reach the bottom of the
- /// lattice. \returns true if we should downgrade; the value is the agreeing
- /// value number in a non-backedge predecessor.
- bool vlocDowngradeLattice(const MachineBasicBlock &MBB,
- const DbgValue &OldLiveInLocation,
- const SmallVectorImpl<InValueT> &Values,
- unsigned CurBlockRPONum);
-
- /// For the given block and live-outs feeding into it, try to find a
- /// machine location where they all join. If a solution for all predecessors
- /// can't be found, a location where all non-backedge-predecessors join
- /// will be returned instead. While this method finds a join location, this
- /// says nothing as to whether it should be used.
- /// \returns Pair of value ID if found, and true when the correct value
- /// is available on all predecessor edges, or false if it's only available
- /// for non-backedge predecessors.
- std::tuple<Optional<ValueIDNum>, bool>
- pickVPHILoc(MachineBasicBlock &MBB, const DebugVariable &Var,
- const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
- const SmallVectorImpl<MachineBasicBlock *> &BlockOrders);
-
- /// Given the solutions to the two dataflow problems, machine value locations
- /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
- /// TransferTracker class over the function to produce live-in and transfer
- /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
- /// order given by AllVarsNumbering -- this could be any stable order, but
- /// right now "order of appearence in function, when explored in RPO", so
- /// that we can compare explictly against VarLocBasedImpl.
- void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
- const TargetPassConfig &TPC);
-
- /// Boilerplate computation of some initial sets, artifical blocks and
- /// RPOT block ordering.
- void initialSetup(MachineFunction &MF);
-
- bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+SpillLocationNo MLocTracker::getOrTrackSpillLoc(SpillLoc L) {
+ SpillLocationNo SpillID(SpillLocs.idFor(L));
+ if (SpillID.id() == 0) {
+ // Spill location is untracked: create record for this one, and all
+ // subregister slots too.
+ SpillID = SpillLocationNo(SpillLocs.insert(L));
+ for (unsigned StackIdx = 0; StackIdx < NumSlotIdxes; ++StackIdx) {
+ unsigned L = getSpillIDWithIdx(SpillID, StackIdx);
+ LocIdx Idx = LocIdx(LocIdxToIDNum.size()); // New idx
+ LocIdxToIDNum.grow(Idx);
+ LocIdxToLocID.grow(Idx);
+ LocIDToLocIdx.push_back(Idx);
+ LocIdxToLocID[Idx] = L;
+ // Initialize to PHI value; corresponds to the location's live-in value
+ // during transfer function construction.
+ LocIdxToIDNum[Idx] = ValueIDNum(CurBB, 0, Idx);
+ }
+ }
+ return SpillID;
+}
-public:
- /// Default construct and initialize the pass.
- InstrRefBasedLDV();
+std::string MLocTracker::LocIdxToName(LocIdx Idx) const {
+ unsigned ID = LocIdxToLocID[Idx];
+ if (ID >= NumRegs) {
+ StackSlotPos Pos = locIDToSpillIdx(ID);
+ ID -= NumRegs;
+ unsigned Slot = ID / NumSlotIdxes;
+ return Twine("slot ")
+ .concat(Twine(Slot).concat(Twine(" sz ").concat(Twine(Pos.first)
+ .concat(Twine(" offs ").concat(Twine(Pos.second))))))
+ .str();
+ } else {
+ return TRI.getRegAsmName(ID).str();
+ }
+}
- LLVM_DUMP_METHOD
- void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+std::string MLocTracker::IDAsString(const ValueIDNum &Num) const {
+ std::string DefName = LocIdxToName(Num.getLoc());
+ return Num.asString(DefName);
+}
- bool isCalleeSaved(LocIdx L) {
- unsigned Reg = MTracker->LocIdxToLocID[L];
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- if (CalleeSavedRegs.test(*RAI))
- return true;
- return false;
+#ifndef NDEBUG
+LLVM_DUMP_METHOD void MLocTracker::dump() {
+ for (auto Location : locations()) {
+ std::string MLocName = LocIdxToName(Location.Value.getLoc());
+ std::string DefName = Location.Value.asString(MLocName);
+ dbgs() << LocIdxToName(Location.Idx) << " --> " << DefName << "\n";
}
-};
+}
-} // end anonymous namespace
+LLVM_DUMP_METHOD void MLocTracker::dump_mloc_map() {
+ for (auto Location : locations()) {
+ std::string foo = LocIdxToName(Location.Idx);
+ dbgs() << "Idx " << Location.Idx.asU64() << " " << foo << "\n";
+ }
+}
+#endif
-//===----------------------------------------------------------------------===//
-// Implementation
-//===----------------------------------------------------------------------===//
+MachineInstrBuilder MLocTracker::emitLoc(Optional<LocIdx> MLoc,
+ const DebugVariable &Var,
+ const DbgValueProperties &Properties) {
+ DebugLoc DL = DILocation::get(Var.getVariable()->getContext(), 0, 0,
+ Var.getVariable()->getScope(),
+ const_cast<DILocation *>(Var.getInlinedAt()));
+ auto MIB = BuildMI(MF, DL, TII.get(TargetOpcode::DBG_VALUE));
+
+ const DIExpression *Expr = Properties.DIExpr;
+ if (!MLoc) {
+ // No location -> DBG_VALUE $noreg
+ MIB.addReg(0);
+ MIB.addReg(0);
+ } else if (LocIdxToLocID[*MLoc] >= NumRegs) {
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ SpillLocationNo SpillID = locIDToSpill(LocID);
+ StackSlotPos StackIdx = locIDToSpillIdx(LocID);
+ unsigned short Offset = StackIdx.second;
+
+ // TODO: support variables that are located in spill slots, with non-zero
+ // offsets from the start of the spill slot. It would require some more
+ // complex DIExpression calculations. This doesn't seem to be produced by
+ // LLVM right now, so don't try and support it.
+ // Accept no-subregister slots and subregisters where the offset is zero.
+ // The consumer should already have type information to work out how large
+ // the variable is.
+ if (Offset == 0) {
+ const SpillLoc &Spill = SpillLocs[SpillID.id()];
+ Expr = TRI.prependOffsetExpression(Expr, DIExpression::ApplyOffset,
+ Spill.SpillOffset);
+ unsigned Base = Spill.SpillBase;
+ MIB.addReg(Base);
+ MIB.addImm(0);
+ } else {
+ // This is a stack location with a weird subregister offset: emit an undef
+ // DBG_VALUE instead.
+ MIB.addReg(0);
+ MIB.addReg(0);
+ }
+ } else {
+ // Non-empty, non-stack slot, must be a plain register.
+ unsigned LocID = LocIdxToLocID[*MLoc];
+ MIB.addReg(LocID);
+ if (Properties.Indirect)
+ MIB.addImm(0);
+ else
+ MIB.addReg(0);
+ }
-ValueIDNum ValueIDNum::EmptyValue = {UINT_MAX, UINT_MAX, UINT_MAX};
+ MIB.addMetadata(Var.getVariable());
+ MIB.addMetadata(Expr);
+ return MIB;
+}
/// Default construct and initialize the pass.
InstrRefBasedLDV::InstrRefBasedLDV() {}
+bool InstrRefBasedLDV::isCalleeSaved(LocIdx L) const {
+ unsigned Reg = MTracker->LocIdxToLocID[L];
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ if (CalleeSavedRegs.test(*RAI))
+ return true;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -1722,7 +877,7 @@ InstrRefBasedLDV::InstrRefBasedLDV() {}
// void InstrRefBasedLDV::printVarLocInMBB(..)
#endif
-SpillLoc
+SpillLocationNo
InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
@@ -1734,7 +889,28 @@ InstrRefBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
const MachineBasicBlock *MBB = MI.getParent();
Register Reg;
StackOffset Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
- return {Reg, Offset};
+ return MTracker->getOrTrackSpillLoc({Reg, Offset});
+}
+
+Optional<LocIdx> InstrRefBasedLDV::findLocationForMemOperand(const MachineInstr &MI) {
+ SpillLocationNo SpillLoc = extractSpillBaseRegAndOffset(MI);
+
+ // Where in the stack slot is this value defined -- i.e., what size of value
+ // is this? An important question, because it could be loaded into a register
+ // from the stack at some point. Happily the memory operand will tell us
+ // the size written to the stack.
+ auto *MemOperand = *MI.memoperands_begin();
+ unsigned SizeInBits = MemOperand->getSizeInBits();
+
+ // Find that position in the stack indexes we're tracking.
+ auto IdxIt = MTracker->StackSlotIdxes.find({SizeInBits, 0});
+ if (IdxIt == MTracker->StackSlotIdxes.end())
+ // That index is not tracked. This is suprising, and unlikely to ever
+ // occur, but the safe action is to indicate the variable is optimised out.
+ return None;
+
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillLoc, IdxIt->second);
+ return MTracker->getSpillMLoc(SpillID);
}
/// End all previous ranges related to @MI and start a new range from @MI
@@ -1759,6 +935,17 @@ bool InstrRefBasedLDV::transferDebugValue(const MachineInstr &MI) {
if (Scope == nullptr)
return true; // handled it; by doing nothing
+ // For now, ignore DBG_VALUE_LISTs when extending ranges. Allow it to
+ // contribute to locations in this block, but don't propagate further.
+ // Interpret it like a DBG_VALUE $noreg.
+ if (MI.isDebugValueList()) {
+ if (VTracker)
+ VTracker->defVar(MI, Properties, None);
+ if (TTracker)
+ TTracker->redefVar(MI, Properties, None);
+ return true;
+ }
+
const MachineOperand &MO = MI.getOperand(0);
// MLocTracker needs to know that this register is read, even if it's only
@@ -1852,16 +1039,25 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
const MachineInstr &TargetInstr = *InstrIt->second.first;
uint64_t BlockNo = TargetInstr.getParent()->getNumber();
- // Pick out the designated operand.
- assert(OpNo < TargetInstr.getNumOperands());
- const MachineOperand &MO = TargetInstr.getOperand(OpNo);
-
- // Today, this can only be a register.
- assert(MO.isReg() && MO.isDef());
-
- unsigned LocID = MTracker->getLocID(MO.getReg(), false);
- LocIdx L = MTracker->LocIDToLocIdx[LocID];
- NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ // Pick out the designated operand. It might be a memory reference, if
+ // a register def was folded into a stack store.
+ if (OpNo == MachineFunction::DebugOperandMemNumber &&
+ TargetInstr.hasOneMemOperand()) {
+ Optional<LocIdx> L = findLocationForMemOperand(TargetInstr);
+ if (L)
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, *L);
+ } else if (OpNo != MachineFunction::DebugOperandMemNumber) {
+ assert(OpNo < TargetInstr.getNumOperands());
+ const MachineOperand &MO = TargetInstr.getOperand(OpNo);
+
+ // Today, this can only be a register.
+ assert(MO.isReg() && MO.isDef());
+
+ unsigned LocID = MTracker->getLocID(MO.getReg());
+ LocIdx L = MTracker->LocIDToLocIdx[LocID];
+ NewID = ValueIDNum(BlockNo, InstrIt->second.second, L);
+ }
+ // else: NewID is left as None.
} else if (PHIIt != DebugPHINumToValue.end() && PHIIt->InstrNum == InstNo) {
// It's actually a PHI value. Which value it is might not be obvious, use
// the resolver helper to find out.
@@ -1957,7 +1153,7 @@ bool InstrRefBasedLDV::transferDebugInstrRef(MachineInstr &MI,
Optional<LocIdx> FoundLoc = None;
for (auto Location : MTracker->locations()) {
LocIdx CurL = Location.Idx;
- ValueIDNum ID = MTracker->LocIdxToIDNum[CurL];
+ ValueIDNum ID = MTracker->readMLoc(CurL);
if (NewID && ID == NewID) {
// If this is the first location with that value, pick it. Otherwise,
// consider whether it's a "longer term" location.
@@ -2016,6 +1212,10 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
auto PHIRec = DebugPHIRecord(
{InstrNum, MI.getParent(), Num, MTracker->lookupOrTrackRegister(Reg)});
DebugPHINumToValue.push_back(PHIRec);
+
+ // Ensure this register is tracked.
+ for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+ MTracker->lookupOrTrackRegister(*RAI);
} else {
// The value is whatever's in this stack slot.
assert(MO.isFI());
@@ -2026,19 +1226,46 @@ bool InstrRefBasedLDV::transferDebugPHI(MachineInstr &MI) {
if (MFI->isDeadObjectIndex(FI))
return true;
- // Identify this spill slot.
+ // Identify this spill slot, ensure it's tracked.
Register Base;
StackOffset Offs = TFI->getFrameIndexReference(*MI.getMF(), FI, Base);
SpillLoc SL = {Base, Offs};
- Optional<ValueIDNum> Num = MTracker->readSpill(SL);
+ SpillLocationNo SpillNo = MTracker->getOrTrackSpillLoc(SL);
+
+ // Problem: what value should we extract from the stack? LLVM does not
+ // record what size the last store to the slot was, and it would become
+ // sketchy after stack slot colouring anyway. Take a look at what values
+ // are stored on the stack, and pick the largest one that wasn't def'd
+ // by a spill (i.e., the value most likely to have been def'd in a register
+ // and then spilt.
+ std::array<unsigned, 4> CandidateSizes = {64, 32, 16, 8};
+ Optional<ValueIDNum> Result = None;
+ Optional<LocIdx> SpillLoc = None;
+ for (unsigned int I = 0; I < CandidateSizes.size(); ++I) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {CandidateSizes[I], 0});
+ SpillLoc = MTracker->getSpillMLoc(SpillID);
+ ValueIDNum Val = MTracker->readMLoc(*SpillLoc);
+ // If this value was defined in it's own position, then it was probably
+ // an aliasing index of a small value that was spilt.
+ if (Val.getLoc() != SpillLoc->asU64()) {
+ Result = Val;
+ break;
+ }
+ }
- if (!Num)
- // Nothing ever writes to this slot. Curious, but nothing we can do.
- return true;
+ // If we didn't find anything, we're probably looking at a PHI, or a memory
+ // store folded into an instruction. FIXME: Take a guess that's it's 64
+ // bits. This isn't ideal, but tracking the size that the spill is
+ // "supposed" to be is more complex, and benefits a small number of
+ // locations.
+ if (!Result) {
+ unsigned SpillID = MTracker->getLocID(SpillNo, {64, 0});
+ SpillLoc = MTracker->getSpillMLoc(SpillID);
+ Result = MTracker->readMLoc(*SpillLoc);
+ }
// Record this DBG_PHI for later analysis.
- auto DbgPHI = DebugPHIRecord(
- {InstrNum, MI.getParent(), *Num, *MTracker->getSpillMLoc(SL)});
+ auto DbgPHI = DebugPHIRecord({InstrNum, MI.getParent(), *Result, *SpillLoc});
DebugPHINumToValue.push_back(DbgPHI);
}
@@ -2061,10 +1288,6 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
} else if (MI.isMetaInstruction())
return;
- MachineFunction *MF = MI.getMF();
- const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
- Register SP = TLI->getStackPointerRegisterToSaveRestore();
-
// Find the regs killed by MI, and find regmasks of preserved regs.
// Max out the number of statically allocated elements in `DeadRegs`, as this
// prevents fallback to std::set::count() operations.
@@ -2075,7 +1298,7 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
// Determine whether the operand is a register def.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
Register::isPhysicalRegister(MO.getReg()) &&
- !(MI.isCall() && MO.getReg() == SP)) {
+ !(MI.isCall() && MTracker->SPAliases.count(MO.getReg()))) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
@@ -2093,6 +1316,16 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
for (auto *MO : RegMaskPtrs)
MTracker->writeRegMask(MO, CurBB, CurInst);
+ // If this instruction writes to a spill slot, def that slot.
+ if (hasFoldedStackStore(MI)) {
+ SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(L, ValueIDNum(CurBB, CurInst, L));
+ }
+ }
+
if (!TTracker)
return;
@@ -2118,32 +1351,27 @@ void InstrRefBasedLDV::transferRegisterDef(MachineInstr &MI) {
if (MO->clobbersPhysReg(Reg))
TTracker->clobberMloc(L.Idx, MI.getIterator(), false);
}
+
+ // Tell TTracker about any folded stack store.
+ if (hasFoldedStackStore(MI)) {
+ SpillLocationNo SpillNo = extractSpillBaseRegAndOffset(MI);
+ for (unsigned int I = 0; I < MTracker->NumSlotIdxes; ++I) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(SpillNo, I);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ TTracker->clobberMloc(L, MI.getIterator(), true);
+ }
+ }
}
void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
- ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
+ // In all circumstances, re-def all aliases. It's definitely a new value now.
+ for (MCRegAliasIterator RAI(DstRegNum, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+ ValueIDNum SrcValue = MTracker->readReg(SrcRegNum);
MTracker->setReg(DstRegNum, SrcValue);
- // In all circumstances, re-def the super registers. It's definitely a new
- // value now. This doesn't uniquely identify the composition of subregs, for
- // example, two identical values in subregisters composed in different
- // places would not get equal value numbers.
- for (MCSuperRegIterator SRI(DstRegNum, TRI); SRI.isValid(); ++SRI)
- MTracker->defReg(*SRI, CurBB, CurInst);
-
- // If we're emulating VarLocBasedImpl, just define all the subregisters.
- // DBG_VALUEs of them will expect to be tracked from the DBG_VALUE, not
- // through prior copies.
- if (EmulateOldLDV) {
- for (MCSubRegIndexIterator DRI(DstRegNum, TRI); DRI.isValid(); ++DRI)
- MTracker->defReg(DRI.getSubReg(), CurBB, CurInst);
- return;
- }
-
- // Otherwise, actually copy subregisters from one location to another.
- // XXX: in addition, any subregisters of DstRegNum that don't line up with
- // the source register should be def'd.
+ // Copy subregisters from one location to another.
for (MCSubRegIndexIterator SRI(SrcRegNum, TRI); SRI.isValid(); ++SRI) {
unsigned SrcSubReg = SRI.getSubReg();
unsigned SubRegIdx = SRI.getSubRegIndex();
@@ -2154,15 +1382,13 @@ void InstrRefBasedLDV::performCopy(Register SrcRegNum, Register DstRegNum) {
// Do copy. There are two matching subregisters, the source value should
// have been def'd when the super-reg was, the latter might not be tracked
// yet.
- // This will force SrcSubReg to be tracked, if it isn't yet.
- (void)MTracker->readReg(SrcSubReg);
- LocIdx SrcL = MTracker->getRegMLoc(SrcSubReg);
- assert(SrcL.asU64());
- (void)MTracker->readReg(DstSubReg);
- LocIdx DstL = MTracker->getRegMLoc(DstSubReg);
- assert(DstL.asU64());
+ // This will force SrcSubReg to be tracked, if it isn't yet. Will read
+ // mphi values if it wasn't tracked.
+ LocIdx SrcL = MTracker->lookupOrTrackRegister(SrcSubReg);
+ LocIdx DstL = MTracker->lookupOrTrackRegister(DstSubReg);
+ (void)SrcL;
(void)DstL;
- ValueIDNum CpyValue = {SrcValue.getBlock(), SrcValue.getInst(), SrcL};
+ ValueIDNum CpyValue = MTracker->readReg(SrcSubReg);
MTracker->setReg(DstSubReg, CpyValue);
}
@@ -2174,6 +1400,12 @@ bool InstrRefBasedLDV::isSpillInstruction(const MachineInstr &MI,
if (!MI.hasOneMemOperand())
return false;
+ // Reject any memory operand that's aliased -- we can't guarantee its value.
+ auto MMOI = MI.memoperands_begin();
+ const PseudoSourceValue *PVal = (*MMOI)->getPseudoValue();
+ if (PVal->isAliased(MFI))
+ return false;
+
if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
return false; // This is not a spill instruction, since no valid size was
// returned from either function.
@@ -2191,7 +1423,7 @@ bool InstrRefBasedLDV::isLocationSpill(const MachineInstr &MI,
return Reg != 0;
}
-Optional<SpillLoc>
+Optional<SpillLocationNo>
InstrRefBasedLDV::isRestoreInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
if (!MI.hasOneMemOperand())
@@ -2213,84 +1445,117 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
if (EmulateOldLDV)
return false;
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int DummyFI = -1;
+ if (!TII->isStoreToStackSlotPostFE(MI, DummyFI) &&
+ !TII->isLoadFromStackSlotPostFE(MI, DummyFI))
+ return false;
+
MachineFunction *MF = MI.getMF();
unsigned Reg;
- Optional<SpillLoc> Loc;
LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+ // Strictly limit ourselves to plain loads and stores, not all instructions
+ // that can access the stack.
+ int FIDummy;
+ if (!TII->isStoreToStackSlotPostFE(MI, FIDummy) &&
+ !TII->isLoadFromStackSlotPostFE(MI, FIDummy))
+ return false;
+
// First, if there are any DBG_VALUEs pointing at a spill slot that is
// written to, terminate that variable location. The value in memory
// will have changed. DbgEntityHistoryCalculator doesn't try to detect this.
if (isSpillInstruction(MI, MF)) {
- Loc = extractSpillBaseRegAndOffset(MI);
-
- if (TTracker) {
- Optional<LocIdx> MLoc = MTracker->getSpillMLoc(*Loc);
- if (MLoc) {
- // Un-set this location before clobbering, so that we don't salvage
- // the variable location back to the same place.
- MTracker->setMLoc(*MLoc, ValueIDNum::EmptyValue);
+ SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
+
+ // Un-set this location and clobber, so that earlier locations don't
+ // continue past this store.
+ for (unsigned SlotIdx = 0; SlotIdx < MTracker->NumSlotIdxes; ++SlotIdx) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(Loc, SlotIdx);
+ Optional<LocIdx> MLoc = MTracker->getSpillMLoc(SpillID);
+ if (!MLoc)
+ continue;
+
+ // We need to over-write the stack slot with something (here, a def at
+ // this instruction) to ensure no values are preserved in this stack slot
+ // after the spill. It also prevents TTracker from trying to recover the
+ // location and re-installing it in the same place.
+ ValueIDNum Def(CurBB, CurInst, *MLoc);
+ MTracker->setMLoc(*MLoc, Def);
+ if (TTracker)
TTracker->clobberMloc(*MLoc, MI.getIterator());
- }
}
}
// Try to recognise spill and restore instructions that may transfer a value.
if (isLocationSpill(MI, MF, Reg)) {
- Loc = extractSpillBaseRegAndOffset(MI);
- auto ValueID = MTracker->readReg(Reg);
+ SpillLocationNo Loc = extractSpillBaseRegAndOffset(MI);
- // If the location is empty, produce a phi, signify it's the live-in value.
- if (ValueID.getLoc() == 0)
- ValueID = {CurBB, 0, MTracker->getRegMLoc(Reg)};
+ auto DoTransfer = [&](Register SrcReg, unsigned SpillID) {
+ auto ReadValue = MTracker->readReg(SrcReg);
+ LocIdx DstLoc = MTracker->getSpillMLoc(SpillID);
+ MTracker->setMLoc(DstLoc, ReadValue);
+
+ if (TTracker) {
+ LocIdx SrcLoc = MTracker->getRegMLoc(SrcReg);
+ TTracker->transferMlocs(SrcLoc, DstLoc, MI.getIterator());
+ }
+ };
- MTracker->setSpill(*Loc, ValueID);
- auto OptSpillLocIdx = MTracker->getSpillMLoc(*Loc);
- assert(OptSpillLocIdx && "Spill slot set but has no LocIdx?");
- LocIdx SpillLocIdx = *OptSpillLocIdx;
+ // Then, transfer subreg bits.
+ for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ // Ensure this reg is tracked,
+ (void)MTracker->lookupOrTrackRegister(*SRI);
+ unsigned SubregIdx = TRI->getSubRegIndex(Reg, *SRI);
+ unsigned SpillID = MTracker->getLocID(Loc, SubregIdx);
+ DoTransfer(*SRI, SpillID);
+ }
- // Tell TransferTracker about this spill, produce DBG_VALUEs for it.
- if (TTracker)
- TTracker->transferMlocs(MTracker->getRegMLoc(Reg), SpillLocIdx,
- MI.getIterator());
+ // Directly lookup size of main source reg, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
} else {
- if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ Optional<SpillLocationNo> OptLoc = isRestoreInstruction(MI, MF, Reg);
+ if (!OptLoc)
return false;
+ SpillLocationNo Loc = *OptLoc;
- // Is there a value to be restored?
- auto OptValueID = MTracker->readSpill(*Loc);
- if (OptValueID) {
- ValueIDNum ValueID = *OptValueID;
- LocIdx SpillLocIdx = *MTracker->getSpillMLoc(*Loc);
- // XXX -- can we recover sub-registers of this value? Until we can, first
- // overwrite all defs of the register being restored to.
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- MTracker->defReg(*RAI, CurBB, CurInst);
+ // Assumption: we're reading from the base of the stack slot, not some
+ // offset into it. It seems very unlikely LLVM would ever generate
+ // restores where this wasn't true. This then becomes a question of what
+ // subregisters in the destination register line up with positions in the
+ // stack slot.
- // Now override the reg we're restoring to.
- MTracker->setReg(Reg, ValueID);
+ // Def all registers that alias the destination.
+ for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
+ MTracker->defReg(*RAI, CurBB, CurInst);
+
+ // Now find subregisters within the destination register, and load values
+ // from stack slot positions.
+ auto DoTransfer = [&](Register DestReg, unsigned SpillID) {
+ LocIdx SrcIdx = MTracker->getSpillMLoc(SpillID);
+ auto ReadValue = MTracker->readMLoc(SrcIdx);
+ MTracker->setReg(DestReg, ReadValue);
+
+ if (TTracker) {
+ LocIdx DstLoc = MTracker->getRegMLoc(DestReg);
+ TTracker->transferMlocs(SrcIdx, DstLoc, MI.getIterator());
+ }
+ };
- // Report this restore to the transfer tracker too.
- if (TTracker)
- TTracker->transferMlocs(SpillLocIdx, MTracker->getRegMLoc(Reg),
- MI.getIterator());
- } else {
- // There isn't anything in the location; not clear if this is a code path
- // that still runs. Def this register anyway just in case.
- for (MCRegAliasIterator RAI(Reg, TRI, true); RAI.isValid(); ++RAI)
- MTracker->defReg(*RAI, CurBB, CurInst);
-
- // Force the spill slot to be tracked.
- LocIdx L = MTracker->getOrTrackSpillLoc(*Loc);
-
- // Set the restored value to be a machine phi number, signifying that it's
- // whatever the spills live-in value is in this block. Definitely has
- // a LocIdx due to the setSpill above.
- ValueIDNum ValueID = {CurBB, 0, L};
- MTracker->setReg(Reg, ValueID);
- MTracker->setSpill(*Loc, ValueID);
+ for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ unsigned SpillID = MTracker->getLocID(Loc, Subreg);
+ DoTransfer(*SRI, SpillID);
}
+
+ // Directly look up this registers slot idx by size, and transfer.
+ unsigned Size = TRI->getRegSizeInBits(Reg, *MRI);
+ unsigned SpillID = MTracker->getLocID(Loc, {Size, 0});
+ DoTransfer(Reg, SpillID);
}
return true;
}
@@ -2510,12 +1775,11 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
}
// Compute a bitvector of all the registers that are tracked in this block.
- const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
- Register SP = TLI->getStackPointerRegisterToSaveRestore();
BitVector UsedRegs(TRI->getNumRegs());
for (auto Location : MTracker->locations()) {
unsigned ID = MTracker->LocIdxToLocID[Location.Idx];
- if (ID >= TRI->getNumRegs() || ID == SP)
+ // Ignore stack slots, and aliases of the stack pointer.
+ if (ID >= TRI->getNumRegs() || MTracker->SPAliases.count(ID))
continue;
UsedRegs.set(ID);
}
@@ -2531,7 +1795,7 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
// they're all clobbered or at least set in the designated transfer
// elem.
for (unsigned Bit : BV.set_bits()) {
- unsigned ID = MTracker->getLocID(Bit, false);
+ unsigned ID = MTracker->getLocID(Bit);
LocIdx Idx = MTracker->LocIDToLocIdx[ID];
auto &TransferMap = MLocTransfer[I];
@@ -2553,23 +1817,20 @@ void InstrRefBasedLDV::produceMLocTransferFunction(
}
}
-std::tuple<bool, bool>
-InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
- SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- ValueIDNum **OutLocs, ValueIDNum *InLocs) {
+bool InstrRefBasedLDV::mlocJoin(
+ MachineBasicBlock &MBB, SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
- bool DowngradeOccurred = false;
- // Collect predecessors that have been visited. Anything that hasn't been
- // visited yet is a backedge on the first iteration, and the meet of it's
- // lattice value for all locations will be unaffected.
+ // Handle value-propagation when control flow merges on entry to a block. For
+ // any location without a PHI already placed, the location has the same value
+ // as its predecessors. If a PHI is placed, test to see whether it's now a
+ // redundant PHI that we can eliminate.
+
SmallVector<const MachineBasicBlock *, 8> BlockOrders;
- for (auto Pred : MBB.predecessors()) {
- if (Visited.count(Pred)) {
- BlockOrders.push_back(Pred);
- }
- }
+ for (auto Pred : MBB.predecessors())
+ BlockOrders.push_back(Pred);
// Visit predecessors in RPOT order.
auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
@@ -2579,83 +1840,216 @@ InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB,
// Skip entry block.
if (BlockOrders.size() == 0)
- return std::tuple<bool, bool>(false, false);
+ return false;
- // Step through all machine locations, then look at each predecessor and
- // detect disagreements.
- unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second;
+ // Step through all machine locations, look at each predecessor and test
+ // whether we can eliminate redundant PHIs.
for (auto Location : MTracker->locations()) {
LocIdx Idx = Location.Idx;
+
// Pick out the first predecessors live-out value for this location. It's
- // guaranteed to be not a backedge, as we order by RPO.
- ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+ // guaranteed to not be a backedge, as we order by RPO.
+ ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()];
+
+ // If we've already eliminated a PHI here, do no further checking, just
+ // propagate the first live-in value into this block.
+ if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) {
+ if (InLocs[Idx.asU64()] != FirstVal) {
+ InLocs[Idx.asU64()] = FirstVal;
+ Changed |= true;
+ }
+ continue;
+ }
- // Some flags for whether there's a disagreement, and whether it's a
- // disagreement with a backedge or not.
+ // We're now examining a PHI to see whether it's un-necessary. Loop around
+ // the other live-in values and test whether they're all the same.
bool Disagree = false;
- bool NonBackEdgeDisagree = false;
-
- // Loop around everything that wasn't 'base'.
for (unsigned int I = 1; I < BlockOrders.size(); ++I) {
- auto *MBB = BlockOrders[I];
- if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) {
- // Live-out of a predecessor disagrees with the first predecessor.
- Disagree = true;
-
- // Test whether it's a disagreemnt in the backedges or not.
- if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e
- NonBackEdgeDisagree = true;
- }
- }
+ const MachineBasicBlock *PredMBB = BlockOrders[I];
+ const ValueIDNum &PredLiveOut =
+ OutLocs[PredMBB->getNumber()][Idx.asU64()];
- bool OverRide = false;
- if (Disagree && !NonBackEdgeDisagree) {
- // Only the backedges disagree. Consider demoting the livein
- // lattice value, as per the file level comment. The value we consider
- // demoting to is the value that the non-backedge predecessors agree on.
- // The order of values is that non-PHIs are \top, a PHI at this block
- // \bot, and phis between the two are ordered by their RPO number.
- // If there's no agreement, or we've already demoted to this PHI value
- // before, replace with a PHI value at this block.
-
- // Calculate order numbers: zero means normal def, nonzero means RPO
- // number.
- unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1;
- if (!BaseVal.isPHI())
- BaseBlockRPONum = 0;
-
- ValueIDNum &InLocID = InLocs[Idx.asU64()];
- unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1;
- if (!InLocID.isPHI())
- InLocRPONum = 0;
-
- // Should we ignore the disagreeing backedges, and override with the
- // value the other predecessors agree on (in "base")?
- unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1;
- if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) {
- // Override.
- OverRide = true;
- DowngradeOccurred = true;
- }
+ // Incoming values agree, continue trying to eliminate this PHI.
+ if (FirstVal == PredLiveOut)
+ continue;
+
+ // We can also accept a PHI value that feeds back into itself.
+ if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx))
+ continue;
+
+ // Live-out of a predecessor disagrees with the first predecessor.
+ Disagree = true;
}
- // else: if we disagree in the non-backedges, then this is definitely
- // a control flow merge where different values merge. Make it a PHI.
- // Generate a phi...
- ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx};
- ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal;
- if (InLocs[Idx.asU64()] != NewVal) {
+ // No disagreement? No PHI. Otherwise, leave the PHI in live-ins.
+ if (!Disagree) {
+ InLocs[Idx.asU64()] = FirstVal;
Changed |= true;
- InLocs[Idx.asU64()] = NewVal;
}
}
// TODO: Reimplement NumInserted and NumRemoved.
- return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+ return Changed;
+}
+
+void InstrRefBasedLDV::findStackIndexInterference(
+ SmallVectorImpl<unsigned> &Slots) {
+ // We could spend a bit of time finding the exact, minimal, set of stack
+ // indexes that interfere with each other, much like reg units. Or, we can
+ // rely on the fact that:
+ // * The smallest / lowest index will interfere with everything at zero
+ // offset, which will be the largest set of registers,
+ // * Most indexes with non-zero offset will end up being interference units
+ // anyway.
+ // So just pick those out and return them.
+
+ // We can rely on a single-byte stack index existing already, because we
+ // initialize them in MLocTracker.
+ auto It = MTracker->StackSlotIdxes.find({8, 0});
+ assert(It != MTracker->StackSlotIdxes.end());
+ Slots.push_back(It->second);
+
+ // Find anything that has a non-zero offset and add that too.
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ // Is offset zero? If so, ignore.
+ if (!Pair.first.second)
+ continue;
+ Slots.push_back(Pair.second);
+ }
}
-void InstrRefBasedLDV::mlocDataflow(
- ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
+void InstrRefBasedLDV::placeMLocPHIs(
+ MachineFunction &MF, SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ ValueIDNum **MInLocs, SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
+ SmallVector<unsigned, 4> StackUnits;
+ findStackIndexInterference(StackUnits);
+
+ // To avoid repeatedly running the PHI placement algorithm, leverage the
+ // fact that a def of register MUST also def its register units. Find the
+ // units for registers, place PHIs for them, and then replicate them for
+ // aliasing registers. Some inputs that are never def'd (DBG_PHIs of
+ // arguments) don't lead to register units being tracked, just place PHIs for
+ // those registers directly. Stack slots have their own form of "unit",
+ // store them to one side.
+ SmallSet<Register, 32> RegUnitsToPHIUp;
+ SmallSet<LocIdx, 32> NormalLocsToPHI;
+ SmallSet<SpillLocationNo, 32> StackSlots;
+ for (auto Location : MTracker->locations()) {
+ LocIdx L = Location.Idx;
+ if (MTracker->isSpill(L)) {
+ StackSlots.insert(MTracker->locIDToSpill(MTracker->LocIdxToLocID[L]));
+ continue;
+ }
+
+ Register R = MTracker->LocIdxToLocID[L];
+ SmallSet<Register, 8> FoundRegUnits;
+ bool AnyIllegal = false;
+ for (MCRegUnitIterator RUI(R.asMCReg(), TRI); RUI.isValid(); ++RUI) {
+ for (MCRegUnitRootIterator URoot(*RUI, TRI); URoot.isValid(); ++URoot){
+ if (!MTracker->isRegisterTracked(*URoot)) {
+ // Not all roots were loaded into the tracking map: this register
+ // isn't actually def'd anywhere, we only read from it. Generate PHIs
+ // for this reg, but don't iterate units.
+ AnyIllegal = true;
+ } else {
+ FoundRegUnits.insert(*URoot);
+ }
+ }
+ }
+
+ if (AnyIllegal) {
+ NormalLocsToPHI.insert(L);
+ continue;
+ }
+
+ RegUnitsToPHIUp.insert(FoundRegUnits.begin(), FoundRegUnits.end());
+ }
+
+ // Lambda to fetch PHIs for a given location, and write into the PHIBlocks
+ // collection.
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
+ auto CollectPHIsForLoc = [&](LocIdx L) {
+ // Collect the set of defs.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
+ MachineBasicBlock *MBB = OrderToBB[I];
+ const auto &TransferFunc = MLocTransfer[MBB->getNumber()];
+ if (TransferFunc.find(L) != TransferFunc.end())
+ DefBlocks.insert(MBB);
+ }
+
+ // The entry block defs the location too: it's the live-in / argument value.
+ // Only insert if there are other defs though; everything is trivially live
+ // through otherwise.
+ if (!DefBlocks.empty())
+ DefBlocks.insert(&*MF.begin());
+
+ // Ask the SSA construction algorithm where we should put PHIs. Clear
+ // anything that might have been hanging around from earlier.
+ PHIBlocks.clear();
+ BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks);
+ };
+
+ auto InstallPHIsAtLoc = [&PHIBlocks, &MInLocs](LocIdx L) {
+ for (const MachineBasicBlock *MBB : PHIBlocks)
+ MInLocs[MBB->getNumber()][L.asU64()] = ValueIDNum(MBB->getNumber(), 0, L);
+ };
+
+ // For locations with no reg units, just place PHIs.
+ for (LocIdx L : NormalLocsToPHI) {
+ CollectPHIsForLoc(L);
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+ }
+
+ // For stack slots, calculate PHIs for the equivalent of the units, then
+ // install for each index.
+ for (SpillLocationNo Slot : StackSlots) {
+ for (unsigned Idx : StackUnits) {
+ unsigned SpillID = MTracker->getSpillIDWithIdx(Slot, Idx);
+ LocIdx L = MTracker->getSpillMLoc(SpillID);
+ CollectPHIsForLoc(L);
+ InstallPHIsAtLoc(L);
+
+ // Find anything that aliases this stack index, install PHIs for it too.
+ unsigned Size, Offset;
+ std::tie(Size, Offset) = MTracker->StackIdxesToPos[Idx];
+ for (auto &Pair : MTracker->StackSlotIdxes) {
+ unsigned ThisSize, ThisOffset;
+ std::tie(ThisSize, ThisOffset) = Pair.first;
+ if (ThisSize + ThisOffset <= Offset || Size + Offset <= ThisOffset)
+ continue;
+
+ unsigned ThisID = MTracker->getSpillIDWithIdx(Slot, Pair.second);
+ LocIdx ThisL = MTracker->getSpillMLoc(ThisID);
+ InstallPHIsAtLoc(ThisL);
+ }
+ }
+ }
+
+ // For reg units, place PHIs, and then place them for any aliasing registers.
+ for (Register R : RegUnitsToPHIUp) {
+ LocIdx L = MTracker->lookupOrTrackRegister(R);
+ CollectPHIsForLoc(L);
+
+ // Install those PHI values into the live-in value array.
+ InstallPHIsAtLoc(L);
+
+ // Now find aliases and install PHIs for those.
+ for (MCRegAliasIterator RAI(R, TRI, true); RAI.isValid(); ++RAI) {
+ // Super-registers that are "above" the largest register read/written by
+ // the function will alias, but will not be tracked.
+ if (!MTracker->isRegisterTracked(*RAI))
+ continue;
+
+ LocIdx AliasLoc = MTracker->lookupOrTrackRegister(*RAI);
+ InstallPHIsAtLoc(AliasLoc);
+ }
+ }
+}
+
+void InstrRefBasedLDV::buildMLocValueMap(
+ MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs,
SmallVectorImpl<MLocTransferMap> &MLocTransfer) {
std::priority_queue<unsigned int, std::vector<unsigned int>,
std::greater<unsigned int>>
@@ -2666,20 +2060,34 @@ void InstrRefBasedLDV::mlocDataflow(
// but this is probably not worth it.
SmallPtrSet<MachineBasicBlock *, 16> OnPending, OnWorklist;
- // Initialize worklist with every block to be visited.
+ // Initialize worklist with every block to be visited. Also produce list of
+ // all blocks.
+ SmallPtrSet<MachineBasicBlock *, 32> AllBlocks;
for (unsigned int I = 0; I < BBToOrder.size(); ++I) {
Worklist.push(I);
OnWorklist.insert(OrderToBB[I]);
+ AllBlocks.insert(OrderToBB[I]);
}
- MTracker->reset();
-
- // Set inlocs for entry block -- each as a PHI at the entry block. Represents
- // the incoming value to the function.
- MTracker->setMPhis(0);
+ // Initialize entry block to PHIs. These represent arguments.
for (auto Location : MTracker->locations())
- MInLocs[0][Location.Idx.asU64()] = Location.Value;
+ MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx);
+ MTracker->reset();
+
+ // Start by placing PHIs, using the usual SSA constructor algorithm. Consider
+ // any machine-location that isn't live-through a block to be def'd in that
+ // block.
+ placeMLocPHIs(MF, AllBlocks, MInLocs, MLocTransfer);
+
+ // Propagate values to eliminate redundant PHIs. At the same time, this
+ // produces the table of Block x Location => Value for the entry to each
+ // block.
+ // The kind of PHIs we can eliminate are, for example, where one path in a
+ // conditional spills and restores a register, and the register still has
+ // the same value once control flow joins, unbeknowns to the PHI placement
+ // code. Propagating values allows us to identify such un-necessary PHIs and
+ // remove them.
SmallPtrSet<const MachineBasicBlock *, 16> Visited;
while (!Worklist.empty() || !Pending.empty()) {
// Vector for storing the evaluated block transfer function.
@@ -2691,16 +2099,10 @@ void InstrRefBasedLDV::mlocDataflow(
Worklist.pop();
// Join the values in all predecessor blocks.
- bool InLocsChanged, DowngradeOccurred;
- std::tie(InLocsChanged, DowngradeOccurred) =
- mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
+ bool InLocsChanged;
+ InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]);
InLocsChanged |= Visited.insert(MBB).second;
- // If a downgrade occurred, book us in for re-examination on the next
- // iteration.
- if (DowngradeOccurred && OnPending.insert(MBB).second)
- Pending.push(BBToOrder[MBB]);
-
// Don't examine transfer function if we've visited this loc at least
// once, and inlocs haven't changed.
if (!InLocsChanged)
@@ -2715,7 +2117,7 @@ void InstrRefBasedLDV::mlocDataflow(
for (auto &P : MLocTransfer[CurBB]) {
if (P.second.getBlock() == CurBB && P.second.isPHI()) {
// This is a movement of whatever was live in. Read it.
- ValueIDNum NewID = MTracker->getNumAtPos(P.second.getLoc());
+ ValueIDNum NewID = MTracker->readMLoc(P.second.getLoc());
ToRemap.push_back(std::make_pair(P.first, NewID));
} else {
// It's a def. Just set it.
@@ -2745,8 +2147,8 @@ void InstrRefBasedLDV::mlocDataflow(
continue;
// All successors should be visited: put any back-edges on the pending
- // list for the next dataflow iteration, and any other successors to be
- // visited this iteration, if they're not going to be already.
+ // list for the next pass-through, and any other successors to be
+ // visited this pass, if they're not going to be already.
for (auto s : MBB->successors()) {
// Does branching to this successor represent a back-edge?
if (BBToOrder[s] > BBToOrder[MBB]) {
@@ -2769,170 +2171,169 @@ void InstrRefBasedLDV::mlocDataflow(
assert(Pending.empty() && "Pending should be empty");
}
- // Once all the live-ins don't change on mlocJoin(), we've reached a
- // fixedpoint.
+ // Once all the live-ins don't change on mlocJoin(), we've eliminated all
+ // redundant PHIs.
}
-bool InstrRefBasedLDV::vlocDowngradeLattice(
- const MachineBasicBlock &MBB, const DbgValue &OldLiveInLocation,
- const SmallVectorImpl<InValueT> &Values, unsigned CurBlockRPONum) {
- // Ranking value preference: see file level comment, the highest rank is
- // a plain def, followed by PHI values in reverse post-order. Numerically,
- // we assign all defs the rank '0', all PHIs their blocks RPO number plus
- // one, and consider the lowest value the highest ranked.
- int OldLiveInRank = BBNumToRPO[OldLiveInLocation.ID.getBlock()] + 1;
- if (!OldLiveInLocation.ID.isPHI())
- OldLiveInRank = 0;
-
- // Allow any unresolvable conflict to be over-ridden.
- if (OldLiveInLocation.Kind == DbgValue::NoVal) {
- // Although if it was an unresolvable conflict from _this_ block, then
- // all other seeking of downgrades and PHIs must have failed before hand.
- if (OldLiveInLocation.BlockNo == (unsigned)MBB.getNumber())
- return false;
- OldLiveInRank = INT_MIN;
- }
-
- auto &InValue = *Values[0].second;
+// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide
+// template specialisations for graph traits and a successor enumerator.
+namespace llvm {
+template <> struct GraphTraits<MachineBasicBlock> {
+ using NodeRef = MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::succ_iterator;
- if (InValue.Kind == DbgValue::Const || InValue.Kind == DbgValue::NoVal)
- return false;
+ static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
- unsigned ThisRPO = BBNumToRPO[InValue.ID.getBlock()];
- int ThisRank = ThisRPO + 1;
- if (!InValue.ID.isPHI())
- ThisRank = 0;
+template <> struct GraphTraits<const MachineBasicBlock> {
+ using NodeRef = const MachineBasicBlock *;
+ using ChildIteratorType = MachineBasicBlock::const_succ_iterator;
- // Too far down the lattice?
- if (ThisRPO >= CurBlockRPONum)
- return false;
+ static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; }
+ static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
+};
- // Higher in the lattice than what we've already explored?
- if (ThisRank <= OldLiveInRank)
- return false;
+using MachineDomTreeBase = DomTreeBase<MachineBasicBlock>::NodeType;
+using MachineDomTreeChildGetter =
+ typename IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false>;
- return true;
+namespace IDFCalculatorDetail {
+template <>
+typename MachineDomTreeChildGetter::ChildrenTy
+MachineDomTreeChildGetter::get(const NodeRef &N) {
+ return {N->succ_begin(), N->succ_end()};
+}
+} // namespace IDFCalculatorDetail
+} // namespace llvm
+
+void InstrRefBasedLDV::BlockPHIPlacement(
+ const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks) {
+ // Apply IDF calculator to the designated set of location defs, storing
+ // required PHIs into PHIBlocks. Uses the dominator tree stored in the
+ // InstrRefBasedLDV object.
+ IDFCalculatorDetail::ChildrenGetterTy<MachineDomTreeBase, false> foo;
+ IDFCalculatorBase<MachineDomTreeBase, false> IDF(DomTree->getBase(), foo);
+
+ IDF.setLiveInBlocks(AllBlocks);
+ IDF.setDefiningBlocks(DefBlocks);
+ IDF.calculate(PHIBlocks);
}
-std::tuple<Optional<ValueIDNum>, bool> InstrRefBasedLDV::pickVPHILoc(
- MachineBasicBlock &MBB, const DebugVariable &Var, const LiveIdxT &LiveOuts,
- ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
- const SmallVectorImpl<MachineBasicBlock *> &BlockOrders) {
+Optional<ValueIDNum> InstrRefBasedLDV::pickVPHILoc(
+ const MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders) {
// Collect a set of locations from predecessor where its live-out value can
// be found.
SmallVector<SmallVector<LocIdx, 4>, 8> Locs;
+ SmallVector<const DbgValueProperties *, 4> Properties;
unsigned NumLocs = MTracker->getNumLocs();
- unsigned BackEdgesStart = 0;
- for (auto p : BlockOrders) {
- // Pick out where backedges start in the list of predecessors. Relies on
- // BlockOrders being sorted by RPO.
- if (BBToOrder[p] < BBToOrder[&MBB])
- ++BackEdgesStart;
+ // No predecessors means no PHIs.
+ if (BlockOrders.empty())
+ return None;
- // For each predecessor, create a new set of locations.
- Locs.resize(Locs.size() + 1);
+ for (auto p : BlockOrders) {
unsigned ThisBBNum = p->getNumber();
- auto LiveOutMap = LiveOuts.find(p);
- if (LiveOutMap == LiveOuts.end())
- // This predecessor isn't in scope, it must have no live-in/live-out
- // locations.
- continue;
-
- auto It = LiveOutMap->second->find(Var);
- if (It == LiveOutMap->second->end())
- // There's no value recorded for this variable in this predecessor,
- // leave an empty set of locations.
- continue;
-
- const DbgValue &OutVal = It->second;
+ auto OutValIt = LiveOuts.find(p);
+ if (OutValIt == LiveOuts.end())
+ // If we have a predecessor not in scope, we'll never find a PHI position.
+ return None;
+ const DbgValue &OutVal = *OutValIt->second;
if (OutVal.Kind == DbgValue::Const || OutVal.Kind == DbgValue::NoVal)
// Consts and no-values cannot have locations we can join on.
- continue;
+ return None;
- assert(OutVal.Kind == DbgValue::Proposed || OutVal.Kind == DbgValue::Def);
- ValueIDNum ValToLookFor = OutVal.ID;
+ Properties.push_back(&OutVal.Properties);
+
+ // Create new empty vector of locations.
+ Locs.resize(Locs.size() + 1);
- // Search the live-outs of the predecessor for the specified value.
- for (unsigned int I = 0; I < NumLocs; ++I) {
- if (MOutLocs[ThisBBNum][I] == ValToLookFor)
- Locs.back().push_back(LocIdx(I));
+ // If the live-in value is a def, find the locations where that value is
+ // present. Do the same for VPHIs where we know the VPHI value.
+ if (OutVal.Kind == DbgValue::Def ||
+ (OutVal.Kind == DbgValue::VPHI && OutVal.BlockNo != MBB.getNumber() &&
+ OutVal.ID != ValueIDNum::EmptyValue)) {
+ ValueIDNum ValToLookFor = OutVal.ID;
+ // Search the live-outs of the predecessor for the specified value.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ if (MOutLocs[ThisBBNum][I] == ValToLookFor)
+ Locs.back().push_back(LocIdx(I));
+ }
+ } else {
+ assert(OutVal.Kind == DbgValue::VPHI);
+ // For VPHIs where we don't know the location, we definitely can't find
+ // a join loc.
+ if (OutVal.BlockNo != MBB.getNumber())
+ return None;
+
+ // Otherwise: this is a VPHI on a backedge feeding back into itself, i.e.
+ // a value that's live-through the whole loop. (It has to be a backedge,
+ // because a block can't dominate itself). We can accept as a PHI location
+ // any location where the other predecessors agree, _and_ the machine
+ // locations feed back into themselves. Therefore, add all self-looping
+ // machine-value PHI locations.
+ for (unsigned int I = 0; I < NumLocs; ++I) {
+ ValueIDNum MPHI(MBB.getNumber(), 0, LocIdx(I));
+ if (MOutLocs[ThisBBNum][I] == MPHI)
+ Locs.back().push_back(LocIdx(I));
+ }
}
}
- // If there were no locations at all, return an empty result.
- if (Locs.empty())
- return std::tuple<Optional<ValueIDNum>, bool>(None, false);
-
- // Lambda for seeking a common location within a range of location-sets.
- using LocsIt = SmallVector<SmallVector<LocIdx, 4>, 8>::iterator;
- auto SeekLocation =
- [&Locs](llvm::iterator_range<LocsIt> SearchRange) -> Optional<LocIdx> {
- // Starting with the first set of locations, take the intersection with
- // subsequent sets.
- SmallVector<LocIdx, 4> base = Locs[0];
- for (auto &S : SearchRange) {
- SmallVector<LocIdx, 4> new_base;
- std::set_intersection(base.begin(), base.end(), S.begin(), S.end(),
- std::inserter(new_base, new_base.begin()));
- base = new_base;
- }
- if (base.empty())
- return None;
+ // We should have found locations for all predecessors, or returned.
+ assert(Locs.size() == BlockOrders.size());
- // We now have a set of LocIdxes that contain the right output value in
- // each of the predecessors. Pick the lowest; if there's a register loc,
- // that'll be it.
- return *base.begin();
- };
+ // Check that all properties are the same. We can't pick a location if they're
+ // not.
+ const DbgValueProperties *Properties0 = Properties[0];
+ for (auto *Prop : Properties)
+ if (*Prop != *Properties0)
+ return None;
- // Search for a common location for all predecessors. If we can't, then fall
- // back to only finding a common location between non-backedge predecessors.
- bool ValidForAllLocs = true;
- auto TheLoc = SeekLocation(Locs);
- if (!TheLoc) {
- ValidForAllLocs = false;
- TheLoc =
- SeekLocation(make_range(Locs.begin(), Locs.begin() + BackEdgesStart));
- }
+ // Starting with the first set of locations, take the intersection with
+ // subsequent sets.
+ SmallVector<LocIdx, 4> CandidateLocs = Locs[0];
+ for (unsigned int I = 1; I < Locs.size(); ++I) {
+ auto &LocVec = Locs[I];
+ SmallVector<LocIdx, 4> NewCandidates;
+ std::set_intersection(CandidateLocs.begin(), CandidateLocs.end(),
+ LocVec.begin(), LocVec.end(), std::inserter(NewCandidates, NewCandidates.begin()));
+ CandidateLocs = NewCandidates;
+ }
+ if (CandidateLocs.empty())
+ return None;
- if (!TheLoc)
- return std::tuple<Optional<ValueIDNum>, bool>(None, false);
+ // We now have a set of LocIdxes that contain the right output value in
+ // each of the predecessors. Pick the lowest; if there's a register loc,
+ // that'll be it.
+ LocIdx L = *CandidateLocs.begin();
// Return a PHI-value-number for the found location.
- LocIdx L = *TheLoc;
ValueIDNum PHIVal = {(unsigned)MBB.getNumber(), 0, L};
- return std::tuple<Optional<ValueIDNum>, bool>(PHIVal, ValidForAllLocs);
+ return PHIVal;
}
-std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
- MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs, LiveIdxT &VLOCInLocs,
- SmallPtrSet<const MachineBasicBlock *, 16> *VLOCVisited, unsigned BBNum,
- const SmallSet<DebugVariable, 4> &AllVars, ValueIDNum **MOutLocs,
- ValueIDNum **MInLocs,
+bool InstrRefBasedLDV::vlocJoin(
+ MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
- DenseMap<DebugVariable, DbgValue> &InLocsT) {
- bool DowngradeOccurred = false;
-
+ DbgValue &LiveIn) {
// To emulate VarLocBasedImpl, process this block if it's not in scope but
// _does_ assign a variable value. No live-ins for this scope are transferred
// in though, so we can return immediately.
- if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB)) {
- if (VLOCVisited)
- return std::tuple<bool, bool>(true, false);
- return std::tuple<bool, bool>(false, false);
- }
+ if (InScopeBlocks.count(&MBB) == 0 && !ArtificialBlocks.count(&MBB))
+ return false;
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
- // Find any live-ins computed in a prior iteration.
- auto ILSIt = VLOCInLocs.find(&MBB);
- assert(ILSIt != VLOCInLocs.end());
- auto &ILS = *ILSIt->second;
-
// Order predecessors by RPOT order, for exploring them in that order.
SmallVector<MachineBasicBlock *, 8> BlockOrders(MBB.predecessors());
@@ -2944,244 +2345,102 @@ std::tuple<bool, bool> InstrRefBasedLDV::vlocJoin(
unsigned CurBlockRPONum = BBToOrder[&MBB];
- // Force a re-visit to loop heads in the first dataflow iteration.
- // FIXME: if we could "propose" Const values this wouldn't be needed,
- // because they'd need to be confirmed before being emitted.
- if (!BlockOrders.empty() &&
- BBToOrder[BlockOrders[BlockOrders.size() - 1]] >= CurBlockRPONum &&
- VLOCVisited)
- DowngradeOccurred = true;
-
- auto ConfirmValue = [&InLocsT](const DebugVariable &DV, DbgValue VR) {
- auto Result = InLocsT.insert(std::make_pair(DV, VR));
- (void)Result;
- assert(Result.second);
- };
-
- auto ConfirmNoVal = [&ConfirmValue, &MBB](const DebugVariable &Var, const DbgValueProperties &Properties) {
- DbgValue NoLocPHIVal(MBB.getNumber(), Properties, DbgValue::NoVal);
-
- ConfirmValue(Var, NoLocPHIVal);
- };
+ // Collect all the incoming DbgValues for this variable, from predecessor
+ // live-out values.
+ SmallVector<InValueT, 8> Values;
+ bool Bail = false;
+ int BackEdgesStart = 0;
+ for (auto p : BlockOrders) {
+ // If the predecessor isn't in scope / to be explored, we'll never be
+ // able to join any locations.
+ if (!BlocksToExplore.contains(p)) {
+ Bail = true;
+ break;
+ }
- // Attempt to join the values for each variable.
- for (auto &Var : AllVars) {
- // Collect all the DbgValues for this variable.
- SmallVector<InValueT, 8> Values;
- bool Bail = false;
- unsigned BackEdgesStart = 0;
- for (auto p : BlockOrders) {
- // If the predecessor isn't in scope / to be explored, we'll never be
- // able to join any locations.
- if (!BlocksToExplore.contains(p)) {
- Bail = true;
- break;
- }
+ // All Live-outs will have been initialized.
+ DbgValue &OutLoc = *VLOCOutLocs.find(p)->second;
- // Don't attempt to handle unvisited predecessors: they're implicitly
- // "unknown"s in the lattice.
- if (VLOCVisited && !VLOCVisited->count(p))
- continue;
+ // Keep track of where back-edges begin in the Values vector. Relies on
+ // BlockOrders being sorted by RPO.
+ unsigned ThisBBRPONum = BBToOrder[p];
+ if (ThisBBRPONum < CurBlockRPONum)
+ ++BackEdgesStart;
- // If the predecessors OutLocs is absent, there's not much we can do.
- auto OL = VLOCOutLocs.find(p);
- if (OL == VLOCOutLocs.end()) {
- Bail = true;
- break;
- }
+ Values.push_back(std::make_pair(p, &OutLoc));
+ }
- // No live-out value for this predecessor also means we can't produce
- // a joined value.
- auto VIt = OL->second->find(Var);
- if (VIt == OL->second->end()) {
- Bail = true;
- break;
- }
+ // If there were no values, or one of the predecessors couldn't have a
+ // value, then give up immediately. It's not safe to produce a live-in
+ // value. Leave as whatever it was before.
+ if (Bail || Values.size() == 0)
+ return false;
- // Keep track of where back-edges begin in the Values vector. Relies on
- // BlockOrders being sorted by RPO.
- unsigned ThisBBRPONum = BBToOrder[p];
- if (ThisBBRPONum < CurBlockRPONum)
- ++BackEdgesStart;
+ // All (non-entry) blocks have at least one non-backedge predecessor.
+ // Pick the variable value from the first of these, to compare against
+ // all others.
+ const DbgValue &FirstVal = *Values[0].second;
+
+ // If the old live-in value is not a PHI then either a) no PHI is needed
+ // here, or b) we eliminated the PHI that was here. If so, we can just
+ // propagate in the first parent's incoming value.
+ if (LiveIn.Kind != DbgValue::VPHI || LiveIn.BlockNo != MBB.getNumber()) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ }
+
+ // Scan for variable values that can never be resolved: if they have
+ // different DIExpressions, different indirectness, or are mixed constants /
+ // non-constants.
+ for (auto &V : Values) {
+ if (V.second->Properties != FirstVal.Properties)
+ return false;
+ if (V.second->Kind == DbgValue::NoVal)
+ return false;
+ if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
+ return false;
+ }
- Values.push_back(std::make_pair(p, &VIt->second));
- }
+ // Try to eliminate this PHI. Do the incoming values all agree?
+ bool Disagree = false;
+ for (auto &V : Values) {
+ if (*V.second == FirstVal)
+ continue; // No disagreement.
- // If there were no values, or one of the predecessors couldn't have a
- // value, then give up immediately. It's not safe to produce a live-in
- // value.
- if (Bail || Values.size() == 0)
+ // Eliminate if a backedge feeds a VPHI back into itself.
+ if (V.second->Kind == DbgValue::VPHI &&
+ V.second->BlockNo == MBB.getNumber() &&
+ // Is this a backedge?
+ std::distance(Values.begin(), &V) >= BackEdgesStart)
continue;
- // Enumeration identifying the current state of the predecessors values.
- enum {
- Unset = 0,
- Agreed, // All preds agree on the variable value.
- PropDisagree, // All preds agree, but the value kind is Proposed in some.
- BEDisagree, // Only back-edges disagree on variable value.
- PHINeeded, // Non-back-edge predecessors have conflicing values.
- NoSolution // Conflicting Value metadata makes solution impossible.
- } OurState = Unset;
-
- // All (non-entry) blocks have at least one non-backedge predecessor.
- // Pick the variable value from the first of these, to compare against
- // all others.
- const DbgValue &FirstVal = *Values[0].second;
- const ValueIDNum &FirstID = FirstVal.ID;
-
- // Scan for variable values that can't be resolved: if they have different
- // DIExpressions, different indirectness, or are mixed constants /
- // non-constants.
- for (auto &V : Values) {
- if (V.second->Properties != FirstVal.Properties)
- OurState = NoSolution;
- if (V.second->Kind == DbgValue::Const && FirstVal.Kind != DbgValue::Const)
- OurState = NoSolution;
- }
-
- // Flags diagnosing _how_ the values disagree.
- bool NonBackEdgeDisagree = false;
- bool DisagreeOnPHINess = false;
- bool IDDisagree = false;
- bool Disagree = false;
- if (OurState == Unset) {
- for (auto &V : Values) {
- if (*V.second == FirstVal)
- continue; // No disagreement.
-
- Disagree = true;
-
- // Flag whether the value number actually diagrees.
- if (V.second->ID != FirstID)
- IDDisagree = true;
-
- // Distinguish whether disagreement happens in backedges or not.
- // Relies on Values (and BlockOrders) being sorted by RPO.
- unsigned ThisBBRPONum = BBToOrder[V.first];
- if (ThisBBRPONum < CurBlockRPONum)
- NonBackEdgeDisagree = true;
-
- // Is there a difference in whether the value is definite or only
- // proposed?
- if (V.second->Kind != FirstVal.Kind &&
- (V.second->Kind == DbgValue::Proposed ||
- V.second->Kind == DbgValue::Def) &&
- (FirstVal.Kind == DbgValue::Proposed ||
- FirstVal.Kind == DbgValue::Def))
- DisagreeOnPHINess = true;
- }
-
- // Collect those flags together and determine an overall state for
- // what extend the predecessors agree on a live-in value.
- if (!Disagree)
- OurState = Agreed;
- else if (!IDDisagree && DisagreeOnPHINess)
- OurState = PropDisagree;
- else if (!NonBackEdgeDisagree)
- OurState = BEDisagree;
- else
- OurState = PHINeeded;
- }
-
- // An extra indicator: if we only disagree on whether the value is a
- // Def, or proposed, then also flag whether that disagreement happens
- // in backedges only.
- bool PropOnlyInBEs = Disagree && !IDDisagree && DisagreeOnPHINess &&
- !NonBackEdgeDisagree && FirstVal.Kind == DbgValue::Def;
-
- const auto &Properties = FirstVal.Properties;
-
- auto OldLiveInIt = ILS.find(Var);
- const DbgValue *OldLiveInLocation =
- (OldLiveInIt != ILS.end()) ? &OldLiveInIt->second : nullptr;
-
- bool OverRide = false;
- if (OurState == BEDisagree && OldLiveInLocation) {
- // Only backedges disagree: we can consider downgrading. If there was a
- // previous live-in value, use it to work out whether the current
- // incoming value represents a lattice downgrade or not.
- OverRide =
- vlocDowngradeLattice(MBB, *OldLiveInLocation, Values, CurBlockRPONum);
- }
-
- // Use the current state of predecessor agreement and other flags to work
- // out what to do next. Possibilities include:
- // * Accept a value all predecessors agree on, or accept one that
- // represents a step down the exploration lattice,
- // * Use a PHI value number, if one can be found,
- // * Propose a PHI value number, and see if it gets confirmed later,
- // * Emit a 'NoVal' value, indicating we couldn't resolve anything.
- if (OurState == Agreed) {
- // Easiest solution: all predecessors agree on the variable value.
- ConfirmValue(Var, FirstVal);
- } else if (OurState == BEDisagree && OverRide) {
- // Only backedges disagree, and the other predecessors have produced
- // a new live-in value further down the exploration lattice.
- DowngradeOccurred = true;
- ConfirmValue(Var, FirstVal);
- } else if (OurState == PropDisagree) {
- // Predecessors agree on value, but some say it's only a proposed value.
- // Propagate it as proposed: unless it was proposed in this block, in
- // which case we're able to confirm the value.
- if (FirstID.getBlock() == (uint64_t)MBB.getNumber() && FirstID.isPHI()) {
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
- } else if (PropOnlyInBEs) {
- // If only backedges disagree, a higher (in RPO) block confirmed this
- // location, and we need to propagate it into this loop.
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Def));
- } else {
- // Otherwise; a Def meeting a Proposed is still a Proposed.
- ConfirmValue(Var, DbgValue(FirstID, Properties, DbgValue::Proposed));
- }
- } else if ((OurState == PHINeeded || OurState == BEDisagree)) {
- // Predecessors disagree and can't be downgraded: this can only be
- // solved with a PHI. Use pickVPHILoc to go look for one.
- Optional<ValueIDNum> VPHI;
- bool AllEdgesVPHI = false;
- std::tie(VPHI, AllEdgesVPHI) =
- pickVPHILoc(MBB, Var, VLOCOutLocs, MOutLocs, MInLocs, BlockOrders);
-
- if (VPHI && AllEdgesVPHI) {
- // There's a PHI value that's valid for all predecessors -- we can use
- // it. If any of the non-backedge predecessors have proposed values
- // though, this PHI is also only proposed, until the predecessors are
- // confirmed.
- DbgValue::KindT K = DbgValue::Def;
- for (unsigned int I = 0; I < BackEdgesStart; ++I)
- if (Values[I].second->Kind == DbgValue::Proposed)
- K = DbgValue::Proposed;
-
- ConfirmValue(Var, DbgValue(*VPHI, Properties, K));
- } else if (VPHI) {
- // There's a PHI value, but it's only legal for backedges. Leave this
- // as a proposed PHI value: it might come back on the backedges,
- // and allow us to confirm it in the future.
- DbgValue NoBEValue = DbgValue(*VPHI, Properties, DbgValue::Proposed);
- ConfirmValue(Var, NoBEValue);
- } else {
- ConfirmNoVal(Var, Properties);
- }
- } else {
- // Otherwise: we don't know. Emit a "phi but no real loc" phi.
- ConfirmNoVal(Var, Properties);
- }
+ Disagree = true;
}
- // Store newly calculated in-locs into VLOCInLocs, if they've changed.
- Changed = ILS != InLocsT;
- if (Changed)
- ILS = InLocsT;
-
- return std::tuple<bool, bool>(Changed, DowngradeOccurred);
+ // No disagreement -> live-through value.
+ if (!Disagree) {
+ Changed = LiveIn != FirstVal;
+ if (Changed)
+ LiveIn = FirstVal;
+ return Changed;
+ } else {
+ // Otherwise use a VPHI.
+ DbgValue VPHI(MBB.getNumber(), FirstVal.Properties, DbgValue::VPHI);
+ Changed = LiveIn != VPHI;
+ if (Changed)
+ LiveIn = VPHI;
+ return Changed;
+ }
}
-void InstrRefBasedLDV::vlocDataflow(
- const LexicalScope *Scope, const DILocation *DILoc,
+void InstrRefBasedLDV::buildVLocValueMap(const DILocation *DILoc,
const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks, LiveInsT &Output,
ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
SmallVectorImpl<VLocTracker> &AllTheVLocs) {
- // This method is much like mlocDataflow: but focuses on a single
+ // This method is much like buildMLocValueMap: but focuses on a single
// LexicalScope at a time. Pick out a set of blocks and variables that are
// to have their value assignments solved, then run our dataflow algorithm
// until a fixedpoint is reached.
@@ -3235,8 +2494,8 @@ void InstrRefBasedLDV::vlocDataflow(
continue;
if (!ArtificialBlocks.count(succ))
continue;
- DFS.push_back(std::make_pair(succ, succ->succ_begin()));
ToAdd.insert(succ);
+ DFS.push_back(std::make_pair(succ, succ->succ_begin()));
}
// Search all those blocks, depth first.
@@ -3252,8 +2511,8 @@ void InstrRefBasedLDV::vlocDataflow(
// If the current successor is artificial and unexplored, descend into
// it.
if (!ToAdd.count(*CurSucc) && ArtificialBlocks.count(*CurSucc)) {
- DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
ToAdd.insert(*CurSucc);
+ DFS.push_back(std::make_pair(*CurSucc, (*CurSucc)->succ_begin()));
continue;
}
@@ -3278,6 +2537,13 @@ void InstrRefBasedLDV::vlocDataflow(
if (BlocksToExplore.size() == 1)
return;
+ // Convert a const set to a non-const set. LexicalScopes
+ // getMachineBasicBlocks returns const MBB pointers, IDF wants mutable ones.
+ // (Neither of them mutate anything).
+ SmallPtrSet<MachineBasicBlock *, 8> MutBlocksToExplore;
+ for (const auto *MBB : BlocksToExplore)
+ MutBlocksToExplore.insert(const_cast<MachineBasicBlock *>(MBB));
+
// Picks out relevants blocks RPO order and sort them.
for (auto *MBB : BlocksToExplore)
BlockOrders.push_back(const_cast<MachineBasicBlock *>(MBB));
@@ -3286,9 +2552,18 @@ void InstrRefBasedLDV::vlocDataflow(
unsigned NumBlocks = BlockOrders.size();
// Allocate some vectors for storing the live ins and live outs. Large.
- SmallVector<DenseMap<DebugVariable, DbgValue>, 32> LiveIns, LiveOuts;
- LiveIns.resize(NumBlocks);
- LiveOuts.resize(NumBlocks);
+ SmallVector<DbgValue, 32> LiveIns, LiveOuts;
+ LiveIns.reserve(NumBlocks);
+ LiveOuts.reserve(NumBlocks);
+
+ // Initialize all values to start as NoVals. This signifies "it's live
+ // through, but we don't know what it is".
+ DbgValueProperties EmptyProperties(EmptyExpr, false);
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns.push_back(EmptyDbgValue);
+ LiveOuts.push_back(EmptyDbgValue);
+ }
// Produce by-MBB indexes of live-in/live-outs, to ease lookup within
// vlocJoin.
@@ -3300,108 +2575,164 @@ void InstrRefBasedLDV::vlocDataflow(
LiveInIdx[BlockOrders[I]] = &LiveIns[I];
}
- for (auto *MBB : BlockOrders) {
- Worklist.push(BBToOrder[MBB]);
- OnWorklist.insert(MBB);
- }
+ // Loop over each variable and place PHIs for it, then propagate values
+ // between blocks. This keeps the locality of working on one lexical scope at
+ // at time, but avoids re-processing variable values because some other
+ // variable has been assigned.
+ for (auto &Var : VarsWeCareAbout) {
+ // Re-initialize live-ins and live-outs, to clear the remains of previous
+ // variables live-ins / live-outs.
+ for (unsigned int I = 0; I < NumBlocks; ++I) {
+ DbgValue EmptyDbgValue(I, EmptyProperties, DbgValue::NoVal);
+ LiveIns[I] = EmptyDbgValue;
+ LiveOuts[I] = EmptyDbgValue;
+ }
- // Iterate over all the blocks we selected, propagating variable values.
- bool FirstTrip = true;
- SmallPtrSet<const MachineBasicBlock *, 16> VLOCVisited;
- while (!Worklist.empty() || !Pending.empty()) {
- while (!Worklist.empty()) {
- auto *MBB = OrderToBB[Worklist.top()];
- CurBB = MBB->getNumber();
- Worklist.pop();
+ // Place PHIs for variable values, using the LLVM IDF calculator.
+ // Collect the set of blocks where variables are def'd.
+ SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
+ for (const MachineBasicBlock *ExpMBB : BlocksToExplore) {
+ auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars;
+ if (TransferFunc.find(Var) != TransferFunc.end())
+ DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB));
+ }
- DenseMap<DebugVariable, DbgValue> JoinedInLocs;
+ SmallVector<MachineBasicBlock *, 32> PHIBlocks;
- // Join values from predecessors. Updates LiveInIdx, and writes output
- // into JoinedInLocs.
- bool InLocsChanged, DowngradeOccurred;
- std::tie(InLocsChanged, DowngradeOccurred) = vlocJoin(
- *MBB, LiveOutIdx, LiveInIdx, (FirstTrip) ? &VLOCVisited : nullptr,
- CurBB, VarsWeCareAbout, MOutLocs, MInLocs, InScopeBlocks,
- BlocksToExplore, JoinedInLocs);
+ // Request the set of PHIs we should insert for this variable.
+ BlockPHIPlacement(MutBlocksToExplore, DefBlocks, PHIBlocks);
- bool FirstVisit = VLOCVisited.insert(MBB).second;
+ // Insert PHIs into the per-block live-in tables for this variable.
+ for (MachineBasicBlock *PHIMBB : PHIBlocks) {
+ unsigned BlockNo = PHIMBB->getNumber();
+ DbgValue *LiveIn = LiveInIdx[PHIMBB];
+ *LiveIn = DbgValue(BlockNo, EmptyProperties, DbgValue::VPHI);
+ }
- // Always explore transfer function if inlocs changed, or if we've not
- // visited this block before.
- InLocsChanged |= FirstVisit;
+ for (auto *MBB : BlockOrders) {
+ Worklist.push(BBToOrder[MBB]);
+ OnWorklist.insert(MBB);
+ }
- // If a downgrade occurred, book us in for re-examination on the next
- // iteration.
- if (DowngradeOccurred && OnPending.insert(MBB).second)
- Pending.push(BBToOrder[MBB]);
+ // Iterate over all the blocks we selected, propagating the variables value.
+ // This loop does two things:
+ // * Eliminates un-necessary VPHIs in vlocJoin,
+ // * Evaluates the blocks transfer function (i.e. variable assignments) and
+ // stores the result to the blocks live-outs.
+ // Always evaluate the transfer function on the first iteration, and when
+ // the live-ins change thereafter.
+ bool FirstTrip = true;
+ while (!Worklist.empty() || !Pending.empty()) {
+ while (!Worklist.empty()) {
+ auto *MBB = OrderToBB[Worklist.top()];
+ CurBB = MBB->getNumber();
+ Worklist.pop();
+
+ auto LiveInsIt = LiveInIdx.find(MBB);
+ assert(LiveInsIt != LiveInIdx.end());
+ DbgValue *LiveIn = LiveInsIt->second;
+
+ // Join values from predecessors. Updates LiveInIdx, and writes output
+ // into JoinedInLocs.
+ bool InLocsChanged =
+ vlocJoin(*MBB, LiveOutIdx, InScopeBlocks, BlocksToExplore, *LiveIn);
+
+ SmallVector<const MachineBasicBlock *, 8> Preds;
+ for (const auto *Pred : MBB->predecessors())
+ Preds.push_back(Pred);
+
+ // If this block's live-in value is a VPHI, try to pick a machine-value
+ // for it. This makes the machine-value available and propagated
+ // through all blocks by the time value propagation finishes. We can't
+ // do this any earlier as it needs to read the block live-outs.
+ if (LiveIn->Kind == DbgValue::VPHI && LiveIn->BlockNo == (int)CurBB) {
+ // There's a small possibility that on a preceeding path, a VPHI is
+ // eliminated and transitions from VPHI-with-location to
+ // live-through-value. As a result, the selected location of any VPHI
+ // might change, so we need to re-compute it on each iteration.
+ Optional<ValueIDNum> ValueNum =
+ pickVPHILoc(*MBB, Var, LiveOutIdx, MOutLocs, Preds);
+
+ if (ValueNum) {
+ InLocsChanged |= LiveIn->ID != *ValueNum;
+ LiveIn->ID = *ValueNum;
+ }
+ }
- if (!InLocsChanged)
- continue;
+ if (!InLocsChanged && !FirstTrip)
+ continue;
+
+ DbgValue *LiveOut = LiveOutIdx[MBB];
+ bool OLChanged = false;
- // Do transfer function.
- auto &VTracker = AllTheVLocs[MBB->getNumber()];
- for (auto &Transfer : VTracker.Vars) {
- // Is this var we're mangling in this scope?
- if (VarsWeCareAbout.count(Transfer.first)) {
+ // Do transfer function.
+ auto &VTracker = AllTheVLocs[MBB->getNumber()];
+ auto TransferIt = VTracker.Vars.find(Var);
+ if (TransferIt != VTracker.Vars.end()) {
// Erase on empty transfer (DBG_VALUE $noreg).
- if (Transfer.second.Kind == DbgValue::Undef) {
- JoinedInLocs.erase(Transfer.first);
+ if (TransferIt->second.Kind == DbgValue::Undef) {
+ DbgValue NewVal(MBB->getNumber(), EmptyProperties, DbgValue::NoVal);
+ if (*LiveOut != NewVal) {
+ *LiveOut = NewVal;
+ OLChanged = true;
+ }
} else {
// Insert new variable value; or overwrite.
- auto NewValuePair = std::make_pair(Transfer.first, Transfer.second);
- auto Result = JoinedInLocs.insert(NewValuePair);
- if (!Result.second)
- Result.first->second = Transfer.second;
+ if (*LiveOut != TransferIt->second) {
+ *LiveOut = TransferIt->second;
+ OLChanged = true;
+ }
+ }
+ } else {
+ // Just copy live-ins to live-outs, for anything not transferred.
+ if (*LiveOut != *LiveIn) {
+ *LiveOut = *LiveIn;
+ OLChanged = true;
}
}
- }
-
- // Did the live-out locations change?
- bool OLChanged = JoinedInLocs != *LiveOutIdx[MBB];
-
- // If they haven't changed, there's no need to explore further.
- if (!OLChanged)
- continue;
- // Commit to the live-out record.
- *LiveOutIdx[MBB] = JoinedInLocs;
-
- // We should visit all successors. Ensure we'll visit any non-backedge
- // successors during this dataflow iteration; book backedge successors
- // to be visited next time around.
- for (auto s : MBB->successors()) {
- // Ignore out of scope / not-to-be-explored successors.
- if (LiveInIdx.find(s) == LiveInIdx.end())
+ // If no live-out value changed, there's no need to explore further.
+ if (!OLChanged)
continue;
- if (BBToOrder[s] > BBToOrder[MBB]) {
- if (OnWorklist.insert(s).second)
- Worklist.push(BBToOrder[s]);
- } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
- Pending.push(BBToOrder[s]);
+ // We should visit all successors. Ensure we'll visit any non-backedge
+ // successors during this dataflow iteration; book backedge successors
+ // to be visited next time around.
+ for (auto s : MBB->successors()) {
+ // Ignore out of scope / not-to-be-explored successors.
+ if (LiveInIdx.find(s) == LiveInIdx.end())
+ continue;
+
+ if (BBToOrder[s] > BBToOrder[MBB]) {
+ if (OnWorklist.insert(s).second)
+ Worklist.push(BBToOrder[s]);
+ } else if (OnPending.insert(s).second && (FirstTrip || OLChanged)) {
+ Pending.push(BBToOrder[s]);
+ }
}
}
+ Worklist.swap(Pending);
+ std::swap(OnWorklist, OnPending);
+ OnPending.clear();
+ assert(Pending.empty());
+ FirstTrip = false;
}
- Worklist.swap(Pending);
- std::swap(OnWorklist, OnPending);
- OnPending.clear();
- assert(Pending.empty());
- FirstTrip = false;
- }
-
- // Dataflow done. Now what? Save live-ins. Ignore any that are still marked
- // as being variable-PHIs, because those did not have their machine-PHI
- // value confirmed. Such variable values are places that could have been
- // PHIs, but are not.
- for (auto *MBB : BlockOrders) {
- auto &VarMap = *LiveInIdx[MBB];
- for (auto &P : VarMap) {
- if (P.second.Kind == DbgValue::Proposed ||
- P.second.Kind == DbgValue::NoVal)
+
+ // Save live-ins to output vector. Ignore any that are still marked as being
+ // VPHIs with no location -- those are variables that we know the value of,
+ // but are not actually available in the register file.
+ for (auto *MBB : BlockOrders) {
+ DbgValue *BlockLiveIn = LiveInIdx[MBB];
+ if (BlockLiveIn->Kind == DbgValue::NoVal)
continue;
- Output[MBB->getNumber()].push_back(P);
+ if (BlockLiveIn->Kind == DbgValue::VPHI &&
+ BlockLiveIn->ID == ValueIDNum::EmptyValue)
+ continue;
+ if (BlockLiveIn->Kind == DbgValue::VPHI)
+ BlockLiveIn->Kind = DbgValue::Def;
+ Output[MBB->getNumber()].push_back(std::make_pair(Var, *BlockLiveIn));
}
- }
+ } // Per-variable loop.
BlockOrders.clear();
BlocksToExplore.clear();
@@ -3485,6 +2816,10 @@ void InstrRefBasedLDV::emitLocations(
void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
// Build some useful data structures.
+
+ LLVMContext &Context = MF.getFunction().getContext();
+ EmptyExpr = DIExpression::get(Context, {});
+
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
if (const DebugLoc &DL = MI.getDebugLoc())
return DL.getLine() != 0;
@@ -3524,7 +2859,10 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
- TargetPassConfig *TPC) {
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC,
+ unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
// No subprogram means this function contains no debuginfo.
if (!MF.getFunction().getSubprogram())
return false;
@@ -3532,7 +2870,9 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
this->TPC = TPC;
+ this->DomTree = DomTree;
TRI = MF.getSubtarget().getRegisterInfo();
+ MRI = &MF.getRegInfo();
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
TFI->getCalleeSaves(MF, CalleeSavedRegs);
@@ -3569,6 +2909,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks];
unsigned NumLocs = MTracker->getNumLocs();
for (int i = 0; i < MaxNumBlocks; ++i) {
+ // These all auto-initialize to ValueIDNum::EmptyValue
MOutLocs[i] = new ValueIDNum[NumLocs];
MInLocs[i] = new ValueIDNum[NumLocs];
}
@@ -3577,7 +2918,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// storing the computed live-ins / live-outs into the array-of-arrays. We use
// both live-ins and live-outs for decision making in the variable value
// dataflow problem.
- mlocDataflow(MInLocs, MOutLocs, MLocTransfer);
+ buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer);
// Patch up debug phi numbers, turning unknown block-live-in values into
// either live-through machine values, or PHIs.
@@ -3626,6 +2967,7 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
// To mirror old LiveDebugValues, enumerate variables in RPOT order. Otherwise
// the order is unimportant, it just has to be stable.
+ unsigned VarAssignCount = 0;
for (unsigned int I = 0; I < OrderToBB.size(); ++I) {
auto *MBB = OrderToBB[I];
auto *VTracker = &vlocs[MBB->getNumber()];
@@ -3643,24 +2985,42 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
ScopeToVars[Scope].insert(Var);
ScopeToBlocks[Scope].insert(VTracker->MBB);
ScopeToDILocation[Scope] = ScopeLoc;
+ ++VarAssignCount;
}
}
- // OK. Iterate over scopes: there might be something to be said for
- // ordering them by size/locality, but that's for the future. For each scope,
- // solve the variable value problem, producing a map of variables to values
- // in SavedLiveIns.
- for (auto &P : ScopeToVars) {
- vlocDataflow(P.first, ScopeToDILocation[P.first], P.second,
- ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
- vlocs);
- }
+ bool Changed = false;
+
+ // If we have an extremely large number of variable assignments and blocks,
+ // bail out at this point. We've burnt some time doing analysis already,
+ // however we should cut our losses.
+ if ((unsigned)MaxNumBlocks > InputBBLimit &&
+ VarAssignCount > InputDbgValLimit) {
+ LLVM_DEBUG(dbgs() << "Disabling InstrRefBasedLDV: " << MF.getName()
+ << " has " << MaxNumBlocks << " basic blocks and "
+ << VarAssignCount
+ << " variable assignments, exceeding limits.\n");
+ } else {
+ // Compute the extended ranges, iterating over scopes. There might be
+ // something to be said for ordering them by size/locality, but that's for
+ // the future. For each scope, solve the variable value problem, producing
+ // a map of variables to values in SavedLiveIns.
+ for (auto &P : ScopeToVars) {
+ buildVLocValueMap(ScopeToDILocation[P.first], P.second,
+ ScopeToBlocks[P.first], SavedLiveIns, MOutLocs, MInLocs,
+ vlocs);
+ }
+
+ // Using the computed value locations and variable values for each block,
+ // create the DBG_VALUE instructions representing the extended variable
+ // locations.
+ emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
- // Using the computed value locations and variable values for each block,
- // create the DBG_VALUE instructions representing the extended variable
- // locations.
- emitLocations(MF, SavedLiveIns, MOutLocs, MInLocs, AllVarsNumbering, *TPC);
+ // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
+ Changed = TTracker->Transfers.size() != 0;
+ }
+ // Common clean-up of memory.
for (int Idx = 0; Idx < MaxNumBlocks; ++Idx) {
delete[] MOutLocs[Idx];
delete[] MInLocs[Idx];
@@ -3668,9 +3028,6 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
delete[] MOutLocs;
delete[] MInLocs;
- // Did we actually make any changes? If we created any DBG_VALUEs, then yes.
- bool Changed = TTracker->Transfers.size() != 0;
-
delete MTracker;
delete TTracker;
MTracker = nullptr;
@@ -3883,10 +3240,8 @@ public:
/// vector.
static void FindPredecessorBlocks(LDVSSABlock *BB,
SmallVectorImpl<LDVSSABlock *> *Preds) {
- for (MachineBasicBlock::pred_iterator PI = BB->BB.pred_begin(),
- E = BB->BB.pred_end();
- PI != E; ++PI)
- Preds->push_back(BB->Updater.getSSALDVBlock(*PI));
+ for (MachineBasicBlock *Pred : BB->BB.predecessors())
+ Preds->push_back(BB->Updater.getSSALDVBlock(Pred));
}
/// GetUndefVal - Normally creates an IMPLICIT_DEF instruction with a new
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
new file mode 100644
index 000000000000..d96ef6d4f6e5
--- /dev/null
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -0,0 +1,1051 @@
+//===- InstrRefBasedImpl.h - Tracking Debug Value MIs ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+
+#include "LiveDebugValues.h"
+
+class TransferTracker;
+
+// Forward dec of unit test class, so that we can peer into the LDV object.
+class InstrRefLDVTest;
+
+namespace LiveDebugValues {
+
+class MLocTracker;
+
+using namespace llvm;
+
+/// Handle-class for a particular "location". This value-type uniquely
+/// symbolises a register or stack location, allowing manipulation of locations
+/// without concern for where that location is. Practically, this allows us to
+/// treat the state of the machine at a particular point as an array of values,
+/// rather than a map of values.
+class LocIdx {
+ unsigned Location;
+
+ // Default constructor is private, initializing to an illegal location number.
+ // Use only for "not an entry" elements in IndexedMaps.
+ LocIdx() : Location(UINT_MAX) {}
+
+public:
+#define NUM_LOC_BITS 24
+ LocIdx(unsigned L) : Location(L) {
+ assert(L < (1 << NUM_LOC_BITS) && "Machine locations must fit in 24 bits");
+ }
+
+ static LocIdx MakeIllegalLoc() { return LocIdx(); }
+ static LocIdx MakeTombstoneLoc() {
+ LocIdx L = LocIdx();
+ --L.Location;
+ return L;
+ }
+
+ bool isIllegal() const { return Location == UINT_MAX; }
+
+ uint64_t asU64() const { return Location; }
+
+ bool operator==(unsigned L) const { return Location == L; }
+
+ bool operator==(const LocIdx &L) const { return Location == L.Location; }
+
+ bool operator!=(unsigned L) const { return !(*this == L); }
+
+ bool operator!=(const LocIdx &L) const { return !(*this == L); }
+
+ bool operator<(const LocIdx &Other) const {
+ return Location < Other.Location;
+ }
+};
+
+// The location at which a spilled value resides. It consists of a register and
+// an offset.
+struct SpillLoc {
+ unsigned SpillBase;
+ StackOffset SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return std::make_pair(SpillBase, SpillOffset) ==
+ std::make_pair(Other.SpillBase, Other.SpillOffset);
+ }
+ bool operator<(const SpillLoc &Other) const {
+ return std::make_tuple(SpillBase, SpillOffset.getFixed(),
+ SpillOffset.getScalable()) <
+ std::make_tuple(Other.SpillBase, Other.SpillOffset.getFixed(),
+ Other.SpillOffset.getScalable());
+ }
+};
+
+/// Unique identifier for a value defined by an instruction, as a value type.
+/// Casts back and forth to a uint64_t. Probably replacable with something less
+/// bit-constrained. Each value identifies the instruction and machine location
+/// where the value is defined, although there may be no corresponding machine
+/// operand for it (ex: regmasks clobbering values). The instructions are
+/// one-based, and definitions that are PHIs have instruction number zero.
+///
+/// The obvious limits of a 1M block function or 1M instruction blocks are
+/// problematic; but by that point we should probably have bailed out of
+/// trying to analyse the function.
+class ValueIDNum {
+ union {
+ struct {
+ uint64_t BlockNo : 20; /// The block where the def happens.
+ uint64_t InstNo : 20; /// The Instruction where the def happens.
+ /// One based, is distance from start of block.
+ uint64_t LocNo
+ : NUM_LOC_BITS; /// The machine location where the def happens.
+ } s;
+ uint64_t Value;
+ } u;
+
+ static_assert(sizeof(u) == 8, "Badly packed ValueIDNum?");
+
+public:
+ // Default-initialize to EmptyValue. This is necessary to make IndexedMaps
+ // of values to work.
+ ValueIDNum() { u.Value = EmptyValue.asU64(); }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, uint64_t Loc) {
+ u.s = {Block, Inst, Loc};
+ }
+
+ ValueIDNum(uint64_t Block, uint64_t Inst, LocIdx Loc) {
+ u.s = {Block, Inst, Loc.asU64()};
+ }
+
+ uint64_t getBlock() const { return u.s.BlockNo; }
+ uint64_t getInst() const { return u.s.InstNo; }
+ uint64_t getLoc() const { return u.s.LocNo; }
+ bool isPHI() const { return u.s.InstNo == 0; }
+
+ uint64_t asU64() const { return u.Value; }
+
+ static ValueIDNum fromU64(uint64_t v) {
+ ValueIDNum Val;
+ Val.u.Value = v;
+ return Val;
+ }
+
+ bool operator<(const ValueIDNum &Other) const {
+ return asU64() < Other.asU64();
+ }
+
+ bool operator==(const ValueIDNum &Other) const {
+ return u.Value == Other.u.Value;
+ }
+
+ bool operator!=(const ValueIDNum &Other) const { return !(*this == Other); }
+
+ std::string asString(const std::string &mlocname) const {
+ return Twine("Value{bb: ")
+ .concat(Twine(u.s.BlockNo)
+ .concat(Twine(", inst: ")
+ .concat((u.s.InstNo ? Twine(u.s.InstNo)
+ : Twine("live-in"))
+ .concat(Twine(", loc: ").concat(
+ Twine(mlocname)))
+ .concat(Twine("}")))))
+ .str();
+ }
+
+ static ValueIDNum EmptyValue;
+ static ValueIDNum TombstoneValue;
+};
+
+/// Thin wrapper around an integer -- designed to give more type safety to
+/// spill location numbers.
+class SpillLocationNo {
+public:
+ explicit SpillLocationNo(unsigned SpillNo) : SpillNo(SpillNo) {}
+ unsigned SpillNo;
+ unsigned id() const { return SpillNo; }
+
+ bool operator<(const SpillLocationNo &Other) const {
+ return SpillNo < Other.SpillNo;
+ }
+
+ bool operator==(const SpillLocationNo &Other) const {
+ return SpillNo == Other.SpillNo;
+ }
+ bool operator!=(const SpillLocationNo &Other) const {
+ return !(*this == Other);
+ }
+};
+
+/// Meta qualifiers for a value. Pair of whatever expression is used to qualify
+/// the the value, and Boolean of whether or not it's indirect.
+class DbgValueProperties {
+public:
+ DbgValueProperties(const DIExpression *DIExpr, bool Indirect)
+ : DIExpr(DIExpr), Indirect(Indirect) {}
+
+ /// Extract properties from an existing DBG_VALUE instruction.
+ DbgValueProperties(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ DIExpr = MI.getDebugExpression();
+ Indirect = MI.getOperand(1).isImm();
+ }
+
+ bool operator==(const DbgValueProperties &Other) const {
+ return std::tie(DIExpr, Indirect) == std::tie(Other.DIExpr, Other.Indirect);
+ }
+
+ bool operator!=(const DbgValueProperties &Other) const {
+ return !(*this == Other);
+ }
+
+ const DIExpression *DIExpr;
+ bool Indirect;
+};
+
+/// Class recording the (high level) _value_ of a variable. Identifies either
+/// the value of the variable as a ValueIDNum, or a constant MachineOperand.
+/// This class also stores meta-information about how the value is qualified.
+/// Used to reason about variable values when performing the second
+/// (DebugVariable specific) dataflow analysis.
+class DbgValue {
+public:
+ /// If Kind is Def, the value number that this value is based on. VPHIs set
+ /// this field to EmptyValue if there is no machine-value for this VPHI, or
+ /// the corresponding machine-value if there is one.
+ ValueIDNum ID;
+ /// If Kind is Const, the MachineOperand defining this value.
+ Optional<MachineOperand> MO;
+ /// For a NoVal or VPHI DbgValue, which block it was generated in.
+ int BlockNo;
+
+ /// Qualifiers for the ValueIDNum above.
+ DbgValueProperties Properties;
+
+ typedef enum {
+ Undef, // Represents a DBG_VALUE $noreg in the transfer function only.
+ Def, // This value is defined by an inst, or is a PHI value.
+ Const, // A constant value contained in the MachineOperand field.
+ VPHI, // Incoming values to BlockNo differ, those values must be joined by
+ // a PHI in this block.
+ NoVal, // Empty DbgValue indicating an unknown value. Used as initializer,
+ // before dominating blocks values are propagated in.
+ } KindT;
+ /// Discriminator for whether this is a constant or an in-program value.
+ KindT Kind;
+
+ DbgValue(const ValueIDNum &Val, const DbgValueProperties &Prop, KindT Kind)
+ : ID(Val), MO(None), BlockNo(0), Properties(Prop), Kind(Kind) {
+ assert(Kind == Def);
+ }
+
+ DbgValue(unsigned BlockNo, const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(BlockNo),
+ Properties(Prop), Kind(Kind) {
+ assert(Kind == NoVal || Kind == VPHI);
+ }
+
+ DbgValue(const MachineOperand &MO, const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(MO), BlockNo(0), Properties(Prop),
+ Kind(Kind) {
+ assert(Kind == Const);
+ }
+
+ DbgValue(const DbgValueProperties &Prop, KindT Kind)
+ : ID(ValueIDNum::EmptyValue), MO(None), BlockNo(0), Properties(Prop),
+ Kind(Kind) {
+ assert(Kind == Undef &&
+ "Empty DbgValue constructor must pass in Undef kind");
+ }
+
+#ifndef NDEBUG
+ void dump(const MLocTracker *MTrack) const;
+#endif
+
+ bool operator==(const DbgValue &Other) const {
+ if (std::tie(Kind, Properties) != std::tie(Other.Kind, Other.Properties))
+ return false;
+ else if (Kind == Def && ID != Other.ID)
+ return false;
+ else if (Kind == NoVal && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == Const)
+ return MO->isIdenticalTo(*Other.MO);
+ else if (Kind == VPHI && BlockNo != Other.BlockNo)
+ return false;
+ else if (Kind == VPHI && ID != Other.ID)
+ return false;
+
+ return true;
+ }
+
+ bool operator!=(const DbgValue &Other) const { return !(*this == Other); }
+};
+
+class LocIdxToIndexFunctor {
+public:
+ using argument_type = LocIdx;
+ unsigned operator()(const LocIdx &L) const { return L.asU64(); }
+};
+
+/// Tracker for what values are in machine locations. Listens to the Things
+/// being Done by various instructions, and maintains a table of what machine
+/// locations have what values (as defined by a ValueIDNum).
+///
+/// There are potentially a much larger number of machine locations on the
+/// target machine than the actual working-set size of the function. On x86 for
+/// example, we're extremely unlikely to want to track values through control
+/// or debug registers. To avoid doing so, MLocTracker has several layers of
+/// indirection going on, described below, to avoid unnecessarily tracking
+/// any location.
+///
+/// Here's a sort of diagram of the indexes, read from the bottom up:
+///
+/// Size on stack Offset on stack
+/// \ /
+/// Stack Idx (Where in slot is this?)
+/// /
+/// /
+/// Slot Num (%stack.0) /
+/// FrameIdx => SpillNum /
+/// \ /
+/// SpillID (int) Register number (int)
+/// \ /
+/// LocationID => LocIdx
+/// |
+/// LocIdx => ValueIDNum
+///
+/// The aim here is that the LocIdx => ValueIDNum vector is just an array of
+/// values in numbered locations, so that later analyses can ignore whether the
+/// location is a register or otherwise. To map a register / spill location to
+/// a LocIdx, you have to use the (sparse) LocationID => LocIdx map. And to
+/// build a LocationID for a stack slot, you need to combine identifiers for
+/// which stack slot it is and where within that slot is being described.
+///
+/// Register mask operands cause trouble by technically defining every register;
+/// various hacks are used to avoid tracking registers that are never read and
+/// only written by regmasks.
+class MLocTracker {
+public:
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const TargetLowering &TLI;
+
+ /// IndexedMap type, mapping from LocIdx to ValueIDNum.
+ using LocToValueType = IndexedMap<ValueIDNum, LocIdxToIndexFunctor>;
+
+ /// Map of LocIdxes to the ValueIDNums that they store. This is tightly
+ /// packed, entries only exist for locations that are being tracked.
+ LocToValueType LocIdxToIDNum;
+
+ /// "Map" of machine location IDs (i.e., raw register or spill number) to the
+ /// LocIdx key / number for that location. There are always at least as many
+ /// as the number of registers on the target -- if the value in the register
+ /// is not being tracked, then the LocIdx value will be zero. New entries are
+ /// appended if a new spill slot begins being tracked.
+ /// This, and the corresponding reverse map persist for the analysis of the
+ /// whole function, and is necessarying for decoding various vectors of
+ /// values.
+ std::vector<LocIdx> LocIDToLocIdx;
+
+ /// Inverse map of LocIDToLocIdx.
+ IndexedMap<unsigned, LocIdxToIndexFunctor> LocIdxToLocID;
+
+ /// When clobbering register masks, we chose to not believe the machine model
+ /// and don't clobber SP. Do the same for SP aliases, and for efficiency,
+ /// keep a set of them here.
+ SmallSet<Register, 8> SPAliases;
+
+ /// Unique-ification of spill. Used to number them -- their LocID number is
+ /// the index in SpillLocs minus one plus NumRegs.
+ UniqueVector<SpillLoc> SpillLocs;
+
+ // If we discover a new machine location, assign it an mphi with this
+ // block number.
+ unsigned CurBB;
+
+ /// Cached local copy of the number of registers the target has.
+ unsigned NumRegs;
+
+ /// Number of slot indexes the target has -- distinct segments of a stack
+ /// slot that can take on the value of a subregister, when a super-register
+ /// is written to the stack.
+ unsigned NumSlotIdxes;
+
+ /// Collection of register mask operands that have been observed. Second part
+ /// of pair indicates the instruction that they happened in. Used to
+ /// reconstruct where defs happened if we start tracking a location later
+ /// on.
+ SmallVector<std::pair<const MachineOperand *, unsigned>, 32> Masks;
+
+ /// Pair for describing a position within a stack slot -- first the size in
+ /// bits, then the offset.
+ typedef std::pair<unsigned short, unsigned short> StackSlotPos;
+
+ /// Map from a size/offset pair describing a position in a stack slot, to a
+ /// numeric identifier for that position. Allows easier identification of
+ /// individual positions.
+ DenseMap<StackSlotPos, unsigned> StackSlotIdxes;
+
+ /// Inverse of StackSlotIdxes.
+ DenseMap<unsigned, StackSlotPos> StackIdxesToPos;
+
+ /// Iterator for locations and the values they contain. Dereferencing
+ /// produces a struct/pair containing the LocIdx key for this location,
+ /// and a reference to the value currently stored. Simplifies the process
+ /// of seeking a particular location.
+ class MLocIterator {
+ LocToValueType &ValueMap;
+ LocIdx Idx;
+
+ public:
+ class value_type {
+ public:
+ value_type(LocIdx Idx, ValueIDNum &Value) : Idx(Idx), Value(Value) {}
+ const LocIdx Idx; /// Read-only index of this location.
+ ValueIDNum &Value; /// Reference to the stored value at this location.
+ };
+
+ MLocIterator(LocToValueType &ValueMap, LocIdx Idx)
+ : ValueMap(ValueMap), Idx(Idx) {}
+
+ bool operator==(const MLocIterator &Other) const {
+ assert(&ValueMap == &Other.ValueMap);
+ return Idx == Other.Idx;
+ }
+
+ bool operator!=(const MLocIterator &Other) const {
+ return !(*this == Other);
+ }
+
+ void operator++() { Idx = LocIdx(Idx.asU64() + 1); }
+
+ value_type operator*() { return value_type(Idx, ValueMap[LocIdx(Idx)]); }
+ };
+
+ MLocTracker(MachineFunction &MF, const TargetInstrInfo &TII,
+ const TargetRegisterInfo &TRI, const TargetLowering &TLI);
+
+ /// Produce location ID number for a Register. Provides some small amount of
+ /// type safety.
+ /// \param Reg The register we're looking up.
+ unsigned getLocID(Register Reg) { return Reg.id(); }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \param SpillSubReg Subregister within the spill we're addressing.
+ unsigned getLocID(SpillLocationNo Spill, unsigned SpillSubReg) {
+ unsigned short Size = TRI.getSubRegIdxSize(SpillSubReg);
+ unsigned short Offs = TRI.getSubRegIdxOffset(SpillSubReg);
+ return getLocID(Spill, {Size, Offs});
+ }
+
+ /// Produce location ID number for a spill position.
+ /// \param Spill The number of the spill we're fetching the location for.
+ /// \apram SpillIdx size/offset within the spill slot to be addressed.
+ unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ assert(StackSlotIdxes.find(Idx) != StackSlotIdxes.end());
+ SlotNo += StackSlotIdxes[Idx];
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Given a spill number, and a slot within the spill, calculate the ID number
+ /// for that location.
+ unsigned getSpillIDWithIdx(SpillLocationNo Spill, unsigned Idx) {
+ unsigned SlotNo = Spill.id() - 1;
+ SlotNo *= NumSlotIdxes;
+ SlotNo += Idx;
+ SlotNo += NumRegs;
+ return SlotNo;
+ }
+
+ /// Return the spill number that a location ID corresponds to.
+ SpillLocationNo locIDToSpill(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ // Truncate away the index part, leaving only the spill number.
+ ID /= NumSlotIdxes;
+ return SpillLocationNo(ID + 1); // The UniqueVector is one-based.
+ }
+
+ /// Returns the spill-slot size/offs that a location ID corresponds to.
+ StackSlotPos locIDToSpillIdx(unsigned ID) const {
+ assert(ID >= NumRegs);
+ ID -= NumRegs;
+ unsigned Idx = ID % NumSlotIdxes;
+ return StackIdxesToPos.find(Idx)->second;
+ }
+
+ unsigned getNumLocs(void) const { return LocIdxToIDNum.size(); }
+
+ /// Reset all locations to contain a PHI value at the designated block. Used
+ /// sometimes for actual PHI values, othertimes to indicate the block entry
+ /// value (before any more information is known).
+ void setMPhis(unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ for (auto Location : locations())
+ Location.Value = {CurBB, 0, Location.Idx};
+ }
+
+ /// Load values for each location from array of ValueIDNums. Take current
+ /// bbnum just in case we read a value from a hitherto untouched register.
+ void loadFromArray(ValueIDNum *Locs, unsigned NewCurBB) {
+ CurBB = NewCurBB;
+ // Iterate over all tracked locations, and load each locations live-in
+ // value into our local index.
+ for (auto Location : locations())
+ Location.Value = Locs[Location.Idx.asU64()];
+ }
+
+ /// Wipe any un-necessary location records after traversing a block.
+ void reset(void) {
+ // We could reset all the location values too; however either loadFromArray
+ // or setMPhis should be called before this object is re-used. Just
+ // clear Masks, they're definitely not needed.
+ Masks.clear();
+ }
+
+ /// Clear all data. Destroys the LocID <=> LocIdx map, which makes most of
+ /// the information in this pass uninterpretable.
+ void clear(void) {
+ reset();
+ LocIDToLocIdx.clear();
+ LocIdxToLocID.clear();
+ LocIdxToIDNum.clear();
+ // SpillLocs.reset(); XXX UniqueVector::reset assumes a SpillLoc casts from
+ // 0
+ SpillLocs = decltype(SpillLocs)();
+ StackSlotIdxes.clear();
+ StackIdxesToPos.clear();
+
+ LocIDToLocIdx.resize(NumRegs, LocIdx::MakeIllegalLoc());
+ }
+
+ /// Set a locaiton to a certain value.
+ void setMLoc(LocIdx L, ValueIDNum Num) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ LocIdxToIDNum[L] = Num;
+ }
+
+ /// Read the value of a particular location
+ ValueIDNum readMLoc(LocIdx L) {
+ assert(L.asU64() < LocIdxToIDNum.size());
+ return LocIdxToIDNum[L];
+ }
+
+ /// Create a LocIdx for an untracked register ID. Initialize it to either an
+ /// mphi value representing a live-in, or a recent register mask clobber.
+ LocIdx trackRegister(unsigned ID);
+
+ LocIdx lookupOrTrackRegister(unsigned ID) {
+ LocIdx &Index = LocIDToLocIdx[ID];
+ if (Index.isIllegal())
+ Index = trackRegister(ID);
+ return Index;
+ }
+
+ /// Is register R currently tracked by MLocTracker?
+ bool isRegisterTracked(Register R) {
+ LocIdx &Index = LocIDToLocIdx[R];
+ return !Index.isIllegal();
+ }
+
+ /// Record a definition of the specified register at the given block / inst.
+ /// This doesn't take a ValueIDNum, because the definition and its location
+ /// are synonymous.
+ void defReg(Register R, unsigned BB, unsigned Inst) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ ValueIDNum ValueID = {BB, Inst, Idx};
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ /// Set a register to a value number. To be used if the value number is
+ /// known in advance.
+ void setReg(Register R, ValueIDNum ValueID) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ LocIdxToIDNum[Idx] = ValueID;
+ }
+
+ ValueIDNum readReg(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = lookupOrTrackRegister(ID);
+ return LocIdxToIDNum[Idx];
+ }
+
+ /// Reset a register value to zero / empty. Needed to replicate the
+ /// VarLoc implementation where a copy to/from a register effectively
+ /// clears the contents of the source register. (Values can only have one
+ /// machine location in VarLocBasedImpl).
+ void wipeRegister(Register R) {
+ unsigned ID = getLocID(R);
+ LocIdx Idx = LocIDToLocIdx[ID];
+ LocIdxToIDNum[Idx] = ValueIDNum::EmptyValue;
+ }
+
+ /// Determine the LocIdx of an existing register.
+ LocIdx getRegMLoc(Register R) {
+ unsigned ID = getLocID(R);
+ assert(ID < LocIDToLocIdx.size());
+ assert(LocIDToLocIdx[ID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[ID];
+ }
+
+ /// Record a RegMask operand being executed. Defs any register we currently
+ /// track, stores a pointer to the mask in case we have to account for it
+ /// later.
+ void writeRegMask(const MachineOperand *MO, unsigned CurBB, unsigned InstID);
+
+ /// Find LocIdx for SpillLoc \p L, creating a new one if it's not tracked.
+ SpillLocationNo getOrTrackSpillLoc(SpillLoc L);
+
+ // Get LocIdx of a spill ID.
+ LocIdx getSpillMLoc(unsigned SpillID) {
+ assert(LocIDToLocIdx[SpillID] != UINT_MAX); // Sentinal for IndexedMap.
+ return LocIDToLocIdx[SpillID];
+ }
+
+ /// Return true if Idx is a spill machine location.
+ bool isSpill(LocIdx Idx) const { return LocIdxToLocID[Idx] >= NumRegs; }
+
+ MLocIterator begin() { return MLocIterator(LocIdxToIDNum, 0); }
+
+ MLocIterator end() {
+ return MLocIterator(LocIdxToIDNum, LocIdxToIDNum.size());
+ }
+
+ /// Return a range over all locations currently tracked.
+ iterator_range<MLocIterator> locations() {
+ return llvm::make_range(begin(), end());
+ }
+
+ std::string LocIdxToName(LocIdx Idx) const;
+
+ std::string IDAsString(const ValueIDNum &Num) const;
+
+#ifndef NDEBUG
+ LLVM_DUMP_METHOD void dump();
+
+ LLVM_DUMP_METHOD void dump_mloc_map();
+#endif
+
+ /// Create a DBG_VALUE based on machine location \p MLoc. Qualify it with the
+ /// information in \pProperties, for variable Var. Don't insert it anywhere,
+ /// just return the builder for it.
+ MachineInstrBuilder emitLoc(Optional<LocIdx> MLoc, const DebugVariable &Var,
+ const DbgValueProperties &Properties);
+};
+
+/// Collection of DBG_VALUEs observed when traversing a block. Records each
+/// variable and the value the DBG_VALUE refers to. Requires the machine value
+/// location dataflow algorithm to have run already, so that values can be
+/// identified.
+class VLocTracker {
+public:
+ /// Map DebugVariable to the latest Value it's defined to have.
+ /// Needs to be a MapVector because we determine order-in-the-input-MIR from
+ /// the order in this container.
+ /// We only retain the last DbgValue in each block for each variable, to
+ /// determine the blocks live-out variable value. The Vars container forms the
+ /// transfer function for this block, as part of the dataflow analysis. The
+ /// movement of values between locations inside of a block is handled at a
+ /// much later stage, in the TransferTracker class.
+ MapVector<DebugVariable, DbgValue> Vars;
+ DenseMap<DebugVariable, const DILocation *> Scopes;
+ MachineBasicBlock *MBB = nullptr;
+
+public:
+ VLocTracker() {}
+
+ void defVar(const MachineInstr &MI, const DbgValueProperties &Properties,
+ Optional<ValueIDNum> ID) {
+ assert(MI.isDebugValue() || MI.isDebugRef());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValue Rec = (ID) ? DbgValue(*ID, Properties, DbgValue::Def)
+ : DbgValue(Properties, DbgValue::Undef);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+
+ void defVar(const MachineInstr &MI, const MachineOperand &MO) {
+ // Only DBG_VALUEs can define constant-valued variables.
+ assert(MI.isDebugValue());
+ DebugVariable Var(MI.getDebugVariable(), MI.getDebugExpression(),
+ MI.getDebugLoc()->getInlinedAt());
+ DbgValueProperties Properties(MI);
+ DbgValue Rec = DbgValue(MO, Properties, DbgValue::Const);
+
+ // Attempt insertion; overwrite if it's already mapped.
+ auto Result = Vars.insert(std::make_pair(Var, Rec));
+ if (!Result.second)
+ Result.first->second = Rec;
+ Scopes[Var] = MI.getDebugLoc().get();
+ }
+};
+
+/// Types for recording sets of variable fragments that overlap. For a given
+/// local variable, we record all other fragments of that variable that could
+/// overlap it, to reduce search time.
+using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+// XXX XXX docs
+class InstrRefBasedLDV : public LDVImpl {
+public:
+ friend class ::InstrRefLDVTest;
+
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
+ /// Machine location/value transfer function, a mapping of which locations
+ /// are assigned which new values.
+ using MLocTransferMap = SmallDenseMap<LocIdx, ValueIDNum>;
+
+ /// Live in/out structure for the variable values: a per-block map of
+ /// variables to their values.
+ using LiveIdxT = DenseMap<const MachineBasicBlock *, DbgValue *>;
+
+ using VarAndLoc = std::pair<DebugVariable, DbgValue>;
+
+ /// Type for a live-in value: the predecessor block, and its value.
+ using InValueT = std::pair<MachineBasicBlock *, DbgValue *>;
+
+ /// Vector (per block) of a collection (inner smallvector) of live-ins.
+ /// Used as the result type for the variable value dataflow problem.
+ using LiveInsT = SmallVector<SmallVector<VarAndLoc, 8>, 8>;
+
+private:
+ MachineDominatorTree *DomTree;
+ const TargetRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ const TargetFrameLowering *TFI;
+ const MachineFrameInfo *MFI;
+ BitVector CalleeSavedRegs;
+ LexicalScopes LS;
+ TargetPassConfig *TPC;
+
+ // An empty DIExpression. Used default / placeholder DbgValueProperties
+ // objects, as we can't have null expressions.
+ const DIExpression *EmptyExpr;
+
+ /// Object to track machine locations as we step through a block. Could
+ /// probably be a field rather than a pointer, as it's always used.
+ MLocTracker *MTracker = nullptr;
+
+ /// Number of the current block LiveDebugValues is stepping through.
+ unsigned CurBB;
+
+ /// Number of the current instruction LiveDebugValues is evaluating.
+ unsigned CurInst;
+
+ /// Variable tracker -- listens to DBG_VALUEs occurring as InstrRefBasedImpl
+ /// steps through a block. Reads the values at each location from the
+ /// MLocTracker object.
+ VLocTracker *VTracker = nullptr;
+
+ /// Tracker for transfers, listens to DBG_VALUEs and transfers of values
+ /// between locations during stepping, creates new DBG_VALUEs when values move
+ /// location.
+ TransferTracker *TTracker = nullptr;
+
+ /// Blocks which are artificial, i.e. blocks which exclusively contain
+ /// instructions without DebugLocs, or with line 0 locations.
+ SmallPtrSet<const MachineBasicBlock *, 16> ArtificialBlocks;
+
+ // Mapping of blocks to and from their RPOT order.
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<const MachineBasicBlock *, unsigned int> BBToOrder;
+ DenseMap<unsigned, unsigned> BBNumToRPO;
+
+ /// Pair of MachineInstr, and its 1-based offset into the containing block.
+ using InstAndNum = std::pair<const MachineInstr *, unsigned>;
+ /// Map from debug instruction number to the MachineInstr labelled with that
+ /// number, and its location within the function. Used to transform
+ /// instruction numbers in DBG_INSTR_REFs into machine value numbers.
+ std::map<uint64_t, InstAndNum> DebugInstrNumToInstr;
+
+ /// Record of where we observed a DBG_PHI instruction.
+ class DebugPHIRecord {
+ public:
+ uint64_t InstrNum; ///< Instruction number of this DBG_PHI.
+ MachineBasicBlock *MBB; ///< Block where DBG_PHI occurred.
+ ValueIDNum ValueRead; ///< The value number read by the DBG_PHI.
+ LocIdx ReadLoc; ///< Register/Stack location the DBG_PHI reads.
+
+ operator unsigned() const { return InstrNum; }
+ };
+
+ /// Map from instruction numbers defined by DBG_PHIs to a record of what that
+ /// DBG_PHI read and where. Populated and edited during the machine value
+ /// location problem -- we use LLVMs SSA Updater to fix changes by
+ /// optimizations that destroy PHI instructions.
+ SmallVector<DebugPHIRecord, 32> DebugPHINumToValue;
+
+ // Map of overlapping variable fragments.
+ OverlapMap OverlapFragments;
+ VarToFragments SeenFragments;
+
+ /// Tests whether this instruction is a spill to a stack slot.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
+ /// If a given instruction is identified as a spill, return the spill slot
+ /// and set \p Reg to the spilled register.
+ Optional<SpillLocationNo> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg);
+
+ /// Given a spill instruction, extract the spill slot information, ensure it's
+ /// tracked, and return the spill number.
+ SpillLocationNo extractSpillBaseRegAndOffset(const MachineInstr &MI);
+
+ /// Observe a single instruction while stepping through a block.
+ void process(MachineInstr &MI, ValueIDNum **MLiveOuts = nullptr,
+ ValueIDNum **MLiveIns = nullptr);
+
+ /// Examines whether \p MI is a DBG_VALUE and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugValue(const MachineInstr &MI);
+
+ /// Examines whether \p MI is a DBG_INSTR_REF and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugInstrRef(MachineInstr &MI, ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns);
+
+ /// Stores value-information about where this PHI occurred, and what
+ /// instruction number is associated with it.
+ /// \returns true if MI was recognized and processed.
+ bool transferDebugPHI(MachineInstr &MI);
+
+ /// Examines whether \p MI is copy instruction, and notifies trackers.
+ /// \returns true if MI was recognized and processed.
+ bool transferRegisterCopy(MachineInstr &MI);
+
+ /// Examines whether \p MI is stack spill or restore instruction, and
+ /// notifies trackers. \returns true if MI was recognized and processed.
+ bool transferSpillOrRestoreInst(MachineInstr &MI);
+
+ /// Examines \p MI for any registers that it defines, and notifies trackers.
+ void transferRegisterDef(MachineInstr &MI);
+
+ /// Copy one location to the other, accounting for movement of subregisters
+ /// too.
+ void performCopy(Register Src, Register Dst);
+
+ void accumulateFragmentMap(MachineInstr &MI);
+
+ /// Determine the machine value number referred to by (potentially several)
+ /// DBG_PHI instructions. Block duplication and tail folding can duplicate
+ /// DBG_PHIs, shifting the position where values in registers merge, and
+ /// forming another mini-ssa problem to solve.
+ /// \p Here the position of a DBG_INSTR_REF seeking a machine value number
+ /// \p InstrNum Debug instruction number defined by DBG_PHI instructions.
+ /// \returns The machine value number at position Here, or None.
+ Optional<ValueIDNum> resolveDbgPHIs(MachineFunction &MF,
+ ValueIDNum **MLiveOuts,
+ ValueIDNum **MLiveIns, MachineInstr &Here,
+ uint64_t InstrNum);
+
+ /// Step through the function, recording register definitions and movements
+ /// in an MLocTracker. Convert the observations into a per-block transfer
+ /// function in \p MLocTransfer, suitable for using with the machine value
+ /// location dataflow problem.
+ void
+ produceMLocTransferFunction(MachineFunction &MF,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer,
+ unsigned MaxNumBlocks);
+
+ /// Solve the machine value location dataflow problem. Takes as input the
+ /// transfer functions in \p MLocTransfer. Writes the output live-in and
+ /// live-out arrays to the (initialized to zero) multidimensional arrays in
+ /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block
+ /// number, the inner by LocIdx.
+ void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs,
+ ValueIDNum **MOutLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Examine the stack indexes (i.e. offsets within the stack) to find the
+ /// basic units of interference -- like reg units, but for the stack.
+ void findStackIndexInterference(SmallVectorImpl<unsigned> &Slots);
+
+ /// Install PHI values into the live-in array for each block, according to
+ /// the IDF of each register.
+ void placeMLocPHIs(MachineFunction &MF,
+ SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<MLocTransferMap> &MLocTransfer);
+
+ /// Calculate the iterated-dominance-frontier for a set of defs, using the
+ /// existing LLVM facilities for this. Works for a single "value" or
+ /// machine/variable location.
+ /// \p AllBlocks Set of blocks where we might consume the value.
+ /// \p DefBlocks Set of blocks where the value/location is defined.
+ /// \p PHIBlocks Output set of blocks where PHIs must be placed.
+ void BlockPHIPlacement(const SmallPtrSetImpl<MachineBasicBlock *> &AllBlocks,
+ const SmallPtrSetImpl<MachineBasicBlock *> &DefBlocks,
+ SmallVectorImpl<MachineBasicBlock *> &PHIBlocks);
+
+ /// Perform a control flow join (lattice value meet) of the values in machine
+ /// locations at \p MBB. Follows the algorithm described in the file-comment,
+ /// reading live-outs of predecessors from \p OutLocs, the current live ins
+ /// from \p InLocs, and assigning the newly computed live ins back into
+ /// \p InLocs. \returns two bools -- the first indicates whether a change
+ /// was made, the second whether a lattice downgrade occurred. If the latter
+ /// is true, revisiting this block is necessary.
+ bool mlocJoin(MachineBasicBlock &MBB,
+ SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
+ ValueIDNum **OutLocs, ValueIDNum *InLocs);
+
+ /// Solve the variable value dataflow problem, for a single lexical scope.
+ /// Uses the algorithm from the file comment to resolve control flow joins
+ /// using PHI placement and value propagation. Reads the locations of machine
+ /// values from the \p MInLocs and \p MOutLocs arrays (see buildMLocValueMap)
+ /// and reads the variable values transfer function from \p AllTheVlocs.
+ /// Live-in and Live-out variable values are stored locally, with the live-ins
+ /// permanently stored to \p Output once a fixedpoint is reached.
+ /// \p VarsWeCareAbout contains a collection of the variables in \p Scope
+ /// that we should be tracking.
+ /// \p AssignBlocks contains the set of blocks that aren't in \p DILoc's
+ /// scope, but which do contain DBG_VALUEs, which VarLocBasedImpl tracks
+ /// locations through.
+ void buildVLocValueMap(const DILocation *DILoc,
+ const SmallSet<DebugVariable, 4> &VarsWeCareAbout,
+ SmallPtrSetImpl<MachineBasicBlock *> &AssignBlocks,
+ LiveInsT &Output, ValueIDNum **MOutLocs,
+ ValueIDNum **MInLocs,
+ SmallVectorImpl<VLocTracker> &AllTheVLocs);
+
+ /// Attempt to eliminate un-necessary PHIs on entry to a block. Examines the
+ /// live-in values coming from predecessors live-outs, and replaces any PHIs
+ /// already present in this blocks live-ins with a live-through value if the
+ /// PHI isn't needed.
+ /// \p LiveIn Old live-in value, overwritten with new one if live-in changes.
+ /// \returns true if any live-ins change value, either from value propagation
+ /// or PHI elimination.
+ bool vlocJoin(MachineBasicBlock &MBB, LiveIdxT &VLOCOutLocs,
+ SmallPtrSet<const MachineBasicBlock *, 8> &InScopeBlocks,
+ SmallPtrSet<const MachineBasicBlock *, 8> &BlocksToExplore,
+ DbgValue &LiveIn);
+
+ /// For the given block and live-outs feeding into it, try to find a
+ /// machine location where all the variable values join together.
+ /// \returns Value ID of a machine PHI if an appropriate one is available.
+ Optional<ValueIDNum>
+ pickVPHILoc(const MachineBasicBlock &MBB, const DebugVariable &Var,
+ const LiveIdxT &LiveOuts, ValueIDNum **MOutLocs,
+ const SmallVectorImpl<const MachineBasicBlock *> &BlockOrders);
+
+ /// Given the solutions to the two dataflow problems, machine value locations
+ /// in \p MInLocs and live-in variable values in \p SavedLiveIns, runs the
+ /// TransferTracker class over the function to produce live-in and transfer
+ /// DBG_VALUEs, then inserts them. Groups of DBG_VALUEs are inserted in the
+ /// order given by AllVarsNumbering -- this could be any stable order, but
+ /// right now "order of appearence in function, when explored in RPO", so
+ /// that we can compare explictly against VarLocBasedImpl.
+ void emitLocations(MachineFunction &MF, LiveInsT SavedLiveIns,
+ ValueIDNum **MOutLocs, ValueIDNum **MInLocs,
+ DenseMap<DebugVariable, unsigned> &AllVarsNumbering,
+ const TargetPassConfig &TPC);
+
+ /// Boilerplate computation of some initial sets, artifical blocks and
+ /// RPOT block ordering.
+ void initialSetup(MachineFunction &MF);
+
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
+
+public:
+ /// Default construct and initialize the pass.
+ InstrRefBasedLDV();
+
+ LLVM_DUMP_METHOD
+ void dump_mloc_transfer(const MLocTransferMap &mloc_transfer) const;
+
+ bool isCalleeSaved(LocIdx L) const;
+
+ bool hasFoldedStackStore(const MachineInstr &MI) {
+ // Instruction must have a memory operand that's a stack slot, and isn't
+ // aliased, meaning it's a spill from regalloc instead of a variable.
+ // If it's aliased, we can't guarantee its value.
+ if (!MI.hasOneMemOperand())
+ return false;
+ auto *MemOperand = *MI.memoperands_begin();
+ return MemOperand->isStore() &&
+ MemOperand->getPseudoValue() &&
+ MemOperand->getPseudoValue()->kind() == PseudoSourceValue::FixedStack
+ && !MemOperand->getPseudoValue()->isAliased(MFI);
+ }
+
+ Optional<LocIdx> findLocationForMemOperand(const MachineInstr &MI);
+};
+
+} // namespace LiveDebugValues
+
+namespace llvm {
+using namespace LiveDebugValues;
+
+template <> struct DenseMapInfo<LocIdx> {
+ static inline LocIdx getEmptyKey() { return LocIdx::MakeIllegalLoc(); }
+ static inline LocIdx getTombstoneKey() { return LocIdx::MakeTombstoneLoc(); }
+
+ static unsigned getHashValue(const LocIdx &Loc) { return Loc.asU64(); }
+
+ static bool isEqual(const LocIdx &A, const LocIdx &B) { return A == B; }
+};
+
+template <> struct DenseMapInfo<ValueIDNum> {
+ static inline ValueIDNum getEmptyKey() { return ValueIDNum::EmptyValue; }
+ static inline ValueIDNum getTombstoneKey() {
+ return ValueIDNum::TombstoneValue;
+ }
+
+ static unsigned getHashValue(const ValueIDNum &Val) { return Val.asU64(); }
+
+ static bool isEqual(const ValueIDNum &A, const ValueIDNum &B) {
+ return A == B;
+ }
+};
+
+} // end namespace llvm
+
+#endif /* LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_INSTRREFBASEDLDV_H */
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 38e803d1abb5..691977dc34e6 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -40,6 +40,19 @@ static cl::opt<bool>
"normal DBG_VALUE inputs"),
cl::init(false));
+// Options to prevent pathological compile-time behavior. If InputBBLimit and
+// InputDbgValueLimit are both exceeded, range extension is disabled.
+static cl::opt<unsigned> InputBBLimit(
+ "livedebugvalues-input-bb-limit",
+ cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
+ cl::init(10000), cl::Hidden);
+static cl::opt<unsigned> InputDbgValueLimit(
+ "livedebugvalues-input-dbg-value-limit",
+ cl::desc(
+ "Maximum input DBG_VALUE insts supported by debug range extension"),
+ cl::init(50000), cl::Hidden);
+
+namespace {
/// Generic LiveDebugValues pass. Calls through to VarLocBasedLDV or
/// InstrRefBasedLDV to perform location propagation, via the LDVImpl
/// base class.
@@ -48,10 +61,7 @@ public:
static char ID;
LiveDebugValues();
- ~LiveDebugValues() {
- if (TheImpl)
- delete TheImpl;
- }
+ ~LiveDebugValues() {}
/// Calculate the liveness information for the given machine function.
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -67,9 +77,12 @@ public:
}
private:
- LDVImpl *TheImpl;
+ std::unique_ptr<LDVImpl> InstrRefImpl;
+ std::unique_ptr<LDVImpl> VarLocImpl;
TargetPassConfig *TPC;
+ MachineDominatorTree MDT;
};
+} // namespace
char LiveDebugValues::ID = 0;
@@ -81,27 +94,26 @@ INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false,
/// Default construct and initialize the pass.
LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
- TheImpl = nullptr;
+ InstrRefImpl =
+ std::unique_ptr<LDVImpl>(llvm::makeInstrRefBasedLiveDebugValues());
+ VarLocImpl = std::unique_ptr<LDVImpl>(llvm::makeVarLocBasedLiveDebugValues());
}
bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
- if (!TheImpl) {
- TPC = getAnalysisIfAvailable<TargetPassConfig>();
-
- bool InstrRefBased = false;
- if (TPC) {
- auto &TM = TPC->getTM<TargetMachine>();
- InstrRefBased = TM.Options.ValueTrackingVariableLocations;
- }
-
- // Allow the user to force selection of InstrRef LDV.
- InstrRefBased |= ForceInstrRefLDV;
-
- if (InstrRefBased)
- TheImpl = llvm::makeInstrRefBasedLiveDebugValues();
- else
- TheImpl = llvm::makeVarLocBasedLiveDebugValues();
+ bool InstrRefBased = MF.useDebugInstrRef();
+ // Allow the user to force selection of InstrRef LDV.
+ InstrRefBased |= ForceInstrRefLDV;
+
+ TPC = getAnalysisIfAvailable<TargetPassConfig>();
+ LDVImpl *TheImpl = &*VarLocImpl;
+
+ MachineDominatorTree *DomTree = nullptr;
+ if (InstrRefBased) {
+ DomTree = &MDT;
+ MDT.calculate(MF);
+ TheImpl = &*InstrRefImpl;
}
- return TheImpl->ExtendRanges(MF, TPC);
+ return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit,
+ InputDbgValueLimit);
}
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
index 9c910f180b9f..a5936c8a96f0 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h
@@ -9,6 +9,7 @@
#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
#define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -23,7 +24,9 @@ inline namespace SharedLiveDebugValues {
// implementation.
class LDVImpl {
public:
- virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) = 0;
+ virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) = 0;
virtual ~LDVImpl() {}
};
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index 1e6d65c18953..a632d3d9ce76 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -155,6 +155,7 @@
#include <cassert>
#include <cstdint>
#include <functional>
+#include <map>
#include <queue>
#include <tuple>
#include <utility>
@@ -166,18 +167,6 @@ using namespace llvm;
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
-// Options to prevent pathological compile-time behavior. If InputBBLimit and
-// InputDbgValueLimit are both exceeded, range extension is disabled.
-static cl::opt<unsigned> InputBBLimit(
- "livedebugvalues-input-bb-limit",
- cl::desc("Maximum input basic blocks before DBG_VALUE limit applies"),
- cl::init(10000), cl::Hidden);
-static cl::opt<unsigned> InputDbgValueLimit(
- "livedebugvalues-input-dbg-value-limit",
- cl::desc(
- "Maximum input DBG_VALUE insts supported by debug range extension"),
- cl::init(50000), cl::Hidden);
-
/// If \p Op is a stack or frame register return true, otherwise return false.
/// This is used to avoid basing the debug entry values on the registers, since
/// we do not support it at the moment.
@@ -296,6 +285,8 @@ private:
LexicalScopes LS;
VarLocSet::Allocator Alloc;
+ const MachineInstr *LastNonDbgMI;
+
enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
using FragmentInfo = DIExpression::FragmentInfo;
@@ -555,7 +546,6 @@ private:
EVKind == EntryValueLocKind::EntryValueKind ? Orig.getReg()
: Register(Loc.RegNo),
false));
- MOs.back().setIsDebug();
break;
case MachineLocKind::SpillLocKind: {
// Spills are indirect DBG_VALUEs, with a base register and offset.
@@ -565,9 +555,10 @@ private:
unsigned Base = Loc.SpillLocation.SpillBase;
auto *TRI = MF.getSubtarget().getRegisterInfo();
if (MI.isNonListDebugValue()) {
- DIExpr =
- TRI->prependOffsetExpression(DIExpr, DIExpression::ApplyOffset,
- Loc.SpillLocation.SpillOffset);
+ auto Deref = Indirect ? DIExpression::DerefAfter : 0;
+ DIExpr = TRI->prependOffsetExpression(
+ DIExpr, DIExpression::ApplyOffset | Deref,
+ Loc.SpillLocation.SpillOffset);
Indirect = true;
} else {
SmallVector<uint64_t, 4> Ops;
@@ -576,7 +567,6 @@ private:
DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, I);
}
MOs.push_back(MachineOperand::CreateReg(Base, false));
- MOs.back().setIsDebug();
break;
}
case MachineLocKind::ImmediateKind: {
@@ -626,7 +616,7 @@ private:
unsigned getRegIdx(Register Reg) const {
for (unsigned Idx = 0; Idx < Locs.size(); ++Idx)
if (Locs[Idx].Kind == MachineLocKind::RegisterKind &&
- Locs[Idx].Value.RegNo == Reg)
+ Register{static_cast<unsigned>(Locs[Idx].Value.RegNo)} == Reg)
return Idx;
llvm_unreachable("Could not find given Reg in Locs");
}
@@ -635,7 +625,7 @@ private:
/// add each of them to \p Regs and return true.
bool getDescribingRegs(SmallVectorImpl<uint32_t> &Regs) const {
bool AnyRegs = false;
- for (auto Loc : Locs)
+ for (const auto &Loc : Locs)
if (Loc.Kind == MachineLocKind::RegisterKind) {
Regs.push_back(Loc.Value.RegNo);
AnyRegs = true;
@@ -801,6 +791,10 @@ private:
LocIndex LocationID; ///< Location number for the transfer dest.
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
+ // Types for recording Entry Var Locations emitted by a single MachineInstr,
+ // as well as recording MachineInstr which last defined a register.
+ using InstToEntryLocMap = std::multimap<const MachineInstr *, LocIndex>;
+ using RegDefToInstMap = DenseMap<Register, MachineInstr *>;
// Types for recording sets of variable fragments that overlap. For a given
// local variable, we record all other fragments of that variable that could
@@ -974,13 +968,22 @@ private:
Register NewReg = Register());
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs);
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
- bool removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, const VarLoc &EntryVL);
+ void cleanupEntryValueTransfers(const MachineInstr *MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers);
+ void removeEntryValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
VarLocsInRange &KillSet);
void recordEntryValue(const MachineInstr &MI,
const DefinedRegsSet &DefinedRegs,
@@ -988,12 +991,16 @@ private:
void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs);
void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
OverlapMap &OLapMap);
@@ -1007,7 +1014,9 @@ private:
/// had their instruction creation deferred.
void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
- bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) override;
+ bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) override;
public:
/// Default construct and initialize the pass.
@@ -1225,62 +1234,100 @@ VarLocBasedLDV::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
return {Reg, Offset};
}
+/// Do cleanup of \p EntryValTransfers created by \p TRInst, by removing the
+/// Transfer, which uses the to-be-deleted \p EntryVL.
+void VarLocBasedLDV::cleanupEntryValueTransfers(
+ const MachineInstr *TRInst, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL, InstToEntryLocMap &EntryValTransfers) {
+ if (EntryValTransfers.empty() || TRInst == nullptr)
+ return;
+
+ auto TransRange = EntryValTransfers.equal_range(TRInst);
+ for (auto TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
+ const VarLoc &EmittedEV = VarLocIDs[TDPair.second];
+ if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) ==
+ std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo,
+ EmittedEV.Expr)) {
+ OpenRanges.erase(EmittedEV);
+ EntryValTransfers.erase(TRInst);
+ break;
+ }
+ }
+}
+
/// Try to salvage the debug entry value if we encounter a new debug value
/// describing the same parameter, otherwise stop tracking the value. Return
-/// true if we should stop tracking the entry value, otherwise return false.
-bool VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs,
- const VarLoc &EntryVL) {
+/// true if we should stop tracking the entry value and do the cleanup of
+/// emitted Entry Value Transfers, otherwise return false.
+void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ const VarLoc &EntryVL,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
// Skip the DBG_VALUE which is the debug entry value itself.
- if (MI.isIdenticalTo(EntryVL.MI))
- return false;
+ if (&MI == &EntryVL.MI)
+ return;
// If the parameter's location is not register location, we can not track
- // the entry value any more. In addition, if the debug expression from the
- // DBG_VALUE is not empty, we can assume the parameter's value has changed
- // indicating that we should stop tracking its entry value as well.
- if (!MI.getDebugOperand(0).isReg() ||
- MI.getDebugExpression()->getNumElements() != 0)
- return true;
-
- // If the DBG_VALUE comes from a copy instruction that copies the entry value,
- // it means the parameter's value has not changed and we should be able to use
- // its entry value.
+ // the entry value any more. It doesn't have the TransferInst which defines
+ // register, so no Entry Value Transfers have been emitted already.
+ if (!MI.getDebugOperand(0).isReg())
+ return;
+
+ // Try to get non-debug instruction responsible for the DBG_VALUE.
+ const MachineInstr *TransferInst = nullptr;
Register Reg = MI.getDebugOperand(0).getReg();
- auto I = std::next(MI.getReverseIterator());
- const MachineOperand *SrcRegOp, *DestRegOp;
- if (I != MI.getParent()->rend()) {
+ if (Reg.isValid() && RegSetInstrs.find(Reg) != RegSetInstrs.end())
+ TransferInst = RegSetInstrs.find(Reg)->second;
+
+ // Case of the parameter's DBG_VALUE at the start of entry MBB.
+ if (!TransferInst && !LastNonDbgMI && MI.getParent()->isEntryBlock())
+ return;
+ // If the debug expression from the DBG_VALUE is not empty, we can assume the
+ // parameter's value has changed indicating that we should stop tracking its
+ // entry value as well.
+ if (MI.getDebugExpression()->getNumElements() == 0 && TransferInst) {
+ // If the DBG_VALUE comes from a copy instruction that copies the entry
+ // value, it means the parameter's value has not changed and we should be
+ // able to use its entry value.
// TODO: Try to keep tracking of an entry value if we encounter a propagated
// DBG_VALUE describing the copy of the entry value. (Propagated entry value
// does not indicate the parameter modification.)
- auto DestSrc = TII->isCopyInstr(*I);
- if (!DestSrc)
- return true;
-
- SrcRegOp = DestSrc->Source;
- DestRegOp = DestSrc->Destination;
- if (Reg != DestRegOp->getReg())
- return true;
-
- for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
- const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
- if (VL.isEntryValueCopyBackupReg(Reg) &&
- // Entry Values should not be variadic.
- VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
- return false;
+ auto DestSrc = TII->isCopyInstr(*TransferInst);
+ if (DestSrc) {
+ const MachineOperand *SrcRegOp, *DestRegOp;
+ SrcRegOp = DestSrc->Source;
+ DestRegOp = DestSrc->Destination;
+ if (Reg == DestRegOp->getReg()) {
+ for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
+ const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
+ if (VL.isEntryValueCopyBackupReg(Reg) &&
+ // Entry Values should not be variadic.
+ VL.MI.getDebugOperand(0).getReg() == SrcRegOp->getReg())
+ return;
+ }
+ }
}
}
- return true;
+ LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
+ MI.print(dbgs(), /*IsStandalone*/ false,
+ /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
+ /*AddNewLine*/ true, TII));
+ cleanupEntryValueTransfers(TransferInst, OpenRanges, VarLocIDs, EntryVL,
+ EntryValTransfers);
+ OpenRanges.erase(EntryVL);
}
/// End all previous ranges related to @MI and start a new range from @MI
/// if it is a DBG_VALUE instr.
void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs) {
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
if (!MI.isDebugValue())
return;
const DILocalVariable *Var = MI.getDebugVariable();
@@ -1297,13 +1344,8 @@ void VarLocBasedLDV::transferDebugValue(const MachineInstr &MI,
auto EntryValBackupID = OpenRanges.getEntryValueBackup(V);
if (Var->isParameter() && EntryValBackupID) {
const VarLoc &EntryVL = VarLocIDs[EntryValBackupID->back()];
- if (removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL)) {
- LLVM_DEBUG(dbgs() << "Deleting a DBG entry value because of: ";
- MI.print(dbgs(), /*IsStandalone*/ false,
- /*SkipOpers*/ false, /*SkipDebugLoc*/ false,
- /*AddNewLine*/ true, TII));
- OpenRanges.erase(EntryVL);
- }
+ removeEntryValue(MI, OpenRanges, VarLocIDs, EntryVL, EntryValTransfers,
+ RegSetInstrs);
}
if (all_of(MI.debug_operands(), [](const MachineOperand &MO) {
@@ -1351,7 +1393,7 @@ void VarLocBasedLDV::collectAllVarLocs(SmallVectorImpl<VarLoc> &Collected,
void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs,
- TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
VarLocsInRange &KillSet) {
// Do not insert entry value locations after a terminator.
if (MI.isTerminator())
@@ -1377,7 +1419,9 @@ void VarLocBasedLDV::emitEntryValues(MachineInstr &MI,
VarLoc EntryLoc = VarLoc::CreateEntryLoc(EntryVL.MI, LS, EntryVL.Expr,
EntryVL.Locs[0].Value.RegNo);
LocIndices EntryValueIDs = VarLocIDs.insert(EntryLoc);
- Transfers.push_back({&MI, EntryValueIDs.back()});
+ assert(EntryValueIDs.size() == 1 &&
+ "EntryValue loc should not be variadic");
+ EntryValTransfers.insert({&MI, EntryValueIDs.back()});
OpenRanges.insert(EntryValueIDs, EntryLoc);
}
}
@@ -1454,9 +1498,11 @@ void VarLocBasedLDV::insertTransferDebugPair(
}
/// A definition of a register may mark the end of a range.
-void VarLocBasedLDV::transferRegisterDef(
- MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
- TransferMap &Transfers) {
+void VarLocBasedLDV::transferRegisterDef(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
// Meta Instructions do not affect the debug liveness of any register they
// define.
@@ -1479,6 +1525,8 @@ void VarLocBasedLDV::transferRegisterDef(
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
// FIXME: Can we break out of this loop early if no insertion occurs?
DeadRegs.insert(*RAI);
+ RegSetInstrs.erase(MO.getReg());
+ RegSetInstrs.insert({MO.getReg(), &MI});
} else if (MO.isRegMask()) {
RegMasks.push_back(MO.getRegMask());
}
@@ -1505,6 +1553,10 @@ void VarLocBasedLDV::transferRegisterDef(
});
if (AnyRegMaskKillsReg)
DeadRegs.insert(Reg);
+ if (AnyRegMaskKillsReg) {
+ RegSetInstrs.erase(Reg);
+ RegSetInstrs.insert({Reg, &MI});
+ }
}
}
@@ -1518,7 +1570,7 @@ void VarLocBasedLDV::transferRegisterDef(
if (TPC) {
auto &TM = TPC->getTM<TargetMachine>();
if (TM.Options.ShouldEmitDebugEntryValues())
- emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, KillSet);
+ emitEntryValues(MI, OpenRanges, VarLocIDs, EntryValTransfers, KillSet);
}
}
@@ -1851,9 +1903,15 @@ void VarLocBasedLDV::accumulateFragmentMap(MachineInstr &MI,
/// This routine creates OpenRanges.
void VarLocBasedLDV::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers) {
- transferDebugValue(MI, OpenRanges, VarLocIDs);
- transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ InstToEntryLocMap &EntryValTransfers,
+ RegDefToInstMap &RegSetInstrs) {
+ if (!MI.isDebugInstr())
+ LastNonDbgMI = &MI;
+ transferDebugValue(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, EntryValTransfers,
+ RegSetInstrs);
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
@@ -2048,7 +2106,11 @@ void VarLocBasedLDV::recordEntryValue(const MachineInstr &MI,
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
-bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
+bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF,
+ MachineDominatorTree *DomTree,
+ TargetPassConfig *TPC, unsigned InputBBLimit,
+ unsigned InputDbgValLimit) {
+ (void)DomTree;
LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n");
if (!MF.getFunction().getSubprogram())
@@ -2079,6 +2141,10 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with transfers (such as
// spills, copies and restores).
+ // Map responsible MI to attached Transfer emitted from Backup Entry Value.
+ InstToEntryLocMap EntryValTransfers;
+ // Map a Register to the last MI which clobbered it.
+ RegDefToInstMap RegSetInstrs;
VarToFragments SeenFragments;
@@ -2141,7 +2207,7 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
for (auto &MI : MBB)
if (MI.isDebugValue())
++NumInputDbgValues;
- if (NumInputDbgValues > InputDbgValueLimit) {
+ if (NumInputDbgValues > InputDbgValLimit) {
LLVM_DEBUG(dbgs() << "Disabling VarLocBasedLDV: " << MF.getName()
<< " has " << RPONumber << " basic blocks and "
<< NumInputDbgValues
@@ -2175,8 +2241,11 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
// operate with registers that correspond to user variables.
// First load any pending inlocs.
OpenRanges.insertFromLocSet(getVarLocsInMBB(MBB, InLocs), VarLocIDs);
+ LastNonDbgMI = nullptr;
+ RegSetInstrs.clear();
for (auto &MI : *MBB)
- process(MI, OpenRanges, VarLocIDs, Transfers);
+ process(MI, OpenRanges, VarLocIDs, Transfers, EntryValTransfers,
+ RegSetInstrs);
OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
@@ -2210,6 +2279,18 @@ bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC) {
}
Transfers.clear();
+ // Add DBG_VALUEs created using Backup Entry Value location.
+ for (auto &TR : EntryValTransfers) {
+ MachineInstr *TRInst = const_cast<MachineInstr *>(TR.first);
+ assert(!TRInst->isTerminator() &&
+ "Cannot insert DBG_VALUE after terminator");
+ MachineBasicBlock *MBB = TRInst->getParent();
+ const VarLoc &VL = VarLocIDs[TR.second];
+ MachineInstr *MI = VL.BuildDbgValue(MF);
+ MBB->insertAfterBundle(TRInst->getIterator(), MI);
+ }
+ EntryValTransfers.clear();
+
// Deferred inlocs will not have had any DBG_VALUE insts created; do
// that now.
flushPendingLocs(InLocs, VarLocIDs);
diff --git a/llvm/lib/CodeGen/LiveDebugVariables.cpp b/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 54058a547928..dcd546f9c6db 100644
--- a/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -417,7 +417,7 @@ public:
void addDef(SlotIndex Idx, ArrayRef<MachineOperand> LocMOs, bool IsIndirect,
bool IsList, const DIExpression &Expr) {
SmallVector<unsigned> Locs;
- for (MachineOperand Op : LocMOs)
+ for (const MachineOperand &Op : LocMOs)
Locs.push_back(getLocationNo(Op));
DbgVariableValue DbgValue(Locs, IsIndirect, IsList, Expr);
// Add a singular (Idx,Idx) -> value mapping.
@@ -1294,13 +1294,9 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf, bool InstrRef) {
static void removeDebugInstrs(MachineFunction &mf) {
for (MachineBasicBlock &MBB : mf) {
- for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) {
- if (!MBBI->isDebugInstr()) {
- ++MBBI;
- continue;
- }
- MBBI = MBB.erase(MBBI);
- }
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB))
+ if (MI.isDebugInstr())
+ MBB.erase(&MI);
}
}
@@ -1314,12 +1310,7 @@ bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
// Have we been asked to track variable locations using instruction
// referencing?
- bool InstrRef = false;
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (TPC) {
- auto &TM = TPC->getTM<TargetMachine>();
- InstrRef = TM.Options.ValueTrackingVariableLocations;
- }
+ bool InstrRef = mf.useDebugInstrRef();
if (!pImpl)
pImpl = new LDVImpl(this);
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 1eed0ec5bbbe..9ded0fb6ae0a 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -592,21 +592,10 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
VNInfo *ValNo = I->valno;
if (I->start == Start) {
if (I->end == End) {
- if (RemoveDeadValNo) {
- // Check if val# is dead.
- bool isDead = true;
- for (const_iterator II = begin(), EE = end(); II != EE; ++II)
- if (II != I && II->valno == ValNo) {
- isDead = false;
- break;
- }
- if (isDead) {
- // Now that ValNo is dead, remove it.
- markValNoForDeletion(ValNo);
- }
- }
-
segments.erase(I); // Removed the whole Segment.
+
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
} else
I->start = End;
return;
@@ -627,13 +616,25 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
segments.insert(std::next(I), Segment(End, OldEnd, ValNo));
}
+LiveRange::iterator LiveRange::removeSegment(iterator I, bool RemoveDeadValNo) {
+ VNInfo *ValNo = I->valno;
+ I = segments.erase(I);
+ if (RemoveDeadValNo)
+ removeValNoIfDead(ValNo);
+ return I;
+}
+
+void LiveRange::removeValNoIfDead(VNInfo *ValNo) {
+ if (none_of(*this, [=](const Segment &S) { return S.valno == ValNo; }))
+ markValNoForDeletion(ValNo);
+}
+
/// removeValNo - Remove all the segments defined by the specified value#.
/// Also remove the value# from value# list.
void LiveRange::removeValNo(VNInfo *ValNo) {
if (empty()) return;
- segments.erase(remove_if(*this, [ValNo](const Segment &S) {
- return S.valno == ValNo;
- }), end());
+ llvm::erase_if(segments,
+ [ValNo](const Segment &S) { return S.valno == ValNo; });
// Now that ValNo is dead, remove it.
markValNoForDeletion(ValNo);
}
@@ -1019,7 +1020,7 @@ void LiveRange::print(raw_ostream &OS) const {
// Print value number info.
if (getNumValNums()) {
- OS << " ";
+ OS << ' ';
unsigned vnum = 0;
for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
++i, ++vnum) {
@@ -1038,8 +1039,8 @@ void LiveRange::print(raw_ostream &OS) const {
}
void LiveInterval::SubRange::print(raw_ostream &OS) const {
- OS << " L" << PrintLaneMask(LaneMask) << ' '
- << static_cast<const LiveRange&>(*this);
+ OS << " L" << PrintLaneMask(LaneMask) << ' '
+ << static_cast<const LiveRange &>(*this);
}
void LiveInterval::print(raw_ostream &OS) const {
@@ -1048,7 +1049,7 @@ void LiveInterval::print(raw_ostream &OS) const {
// Print subranges
for (const SubRange &SR : subranges())
OS << SR;
- OS << " weight:" << Weight;
+ OS << " weight:" << Weight;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index dfa523d4bf41..50b31e1eb247 100644
--- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
// Scan the vector of interfering virtual registers in this union. Assume it's
// quite small.
bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
- return is_contained(*InterferingVRegs, VirtReg);
+ return is_contained(InterferingVRegs, VirtReg);
}
// Collect virtual registers in this union that interfere with this
@@ -124,14 +124,11 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused.
// 3. Iterators left at the last seen intersection.
//
-unsigned LiveIntervalUnion::Query::
-collectInterferingVRegs(unsigned MaxInterferingRegs) {
- if (!InterferingVRegs)
- InterferingVRegs.emplace();
-
+unsigned
+LiveIntervalUnion::Query::collectInterferingVRegs(unsigned MaxInterferingRegs) {
// Fast path return if we already have the desired information.
- if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs)
- return InterferingVRegs->size();
+ if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
// Set up iterators on the first call.
if (!CheckedFirstInterference) {
@@ -160,14 +157,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveInterval *VReg = LiveUnionI.value();
if (VReg != RecentReg && !isSeenInterference(VReg)) {
RecentReg = VReg;
- InterferingVRegs->push_back(VReg);
- if (InterferingVRegs->size() >= MaxInterferingRegs)
- return InterferingVRegs->size();
+ InterferingVRegs.push_back(VReg);
+ if (InterferingVRegs.size() >= MaxInterferingRegs)
+ return InterferingVRegs.size();
}
// This LiveUnion segment is no longer interesting.
if (!(++LiveUnionI).valid()) {
SeenAllInterferences = true;
- return InterferingVRegs->size();
+ return InterferingVRegs.size();
}
}
@@ -188,7 +185,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
LiveUnionI.advanceTo(LRI->start);
}
SeenAllInterferences = true;
- return InterferingVRegs->size();
+ return InterferingVRegs.size();
}
void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 23036c2b115f..2f97386b6d18 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -1571,15 +1571,14 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
LaneBitmask LaneMask) {
LiveInterval::iterator LII = LR.find(EndIdx);
SlotIndex lastUseIdx;
- if (LII == LR.begin()) {
- // This happens when the function is called for a subregister that only
- // occurs _after_ the range that is to be repaired.
- return;
- }
- if (LII != LR.end() && LII->start < EndIdx)
+ if (LII != LR.end() && LII->start < EndIdx) {
lastUseIdx = LII->end;
- else
+ } else if (LII == LR.begin()) {
+ // We may not have a liverange at all if this is a subregister untouched
+ // between \p Begin and \p End.
+ } else {
--LII;
+ }
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
@@ -1593,10 +1592,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
// FIXME: This doesn't currently handle early-clobber or multiple removed
// defs inside of the region to repair.
- for (MachineInstr::mop_iterator OI = MI.operands_begin(),
- OE = MI.operands_end();
- OI != OE; ++OI) {
- const MachineOperand &MO = *OI;
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.getReg() != Reg)
continue;
@@ -1608,17 +1604,9 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
if (MO.isDef()) {
if (!isStartValid) {
if (LII->end.isDead()) {
- SlotIndex prevStart;
+ LII = LR.removeSegment(LII, true);
if (LII != LR.begin())
- prevStart = std::prev(LII)->start;
-
- // FIXME: This could be more efficient if there was a
- // removeSegment method that returned an iterator.
- LR.removeSegment(*LII, true);
- if (prevStart.isValid())
- LII = LR.find(prevStart);
- else
- LII = LR.begin();
+ --LII;
} else {
LII->start = instrIdx.getRegSlot();
LII->valno->def = instrIdx.getRegSlot();
@@ -1656,6 +1644,10 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
}
}
}
+
+ bool isStartValid = getInstructionFromIndex(LII->start);
+ if (!isStartValid && LII->end.isDead())
+ LR.removeSegment(*LII, true);
}
void
@@ -1678,22 +1670,33 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
Indexes->repairIndexesInRange(MBB, Begin, End);
+ // Make sure a live interval exists for all register operands in the range.
+ SmallVector<Register> RegsToRepair(OrigRegs.begin(), OrigRegs.end());
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
if (MI.isDebugOrPseudoInstr())
continue;
- for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
- MOE = MI.operands_end();
- MOI != MOE; ++MOI) {
- if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) &&
- !hasInterval(MOI->getReg())) {
- createAndComputeVirtRegInterval(MOI->getReg());
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.getReg().isVirtual()) {
+ Register Reg = MO.getReg();
+ // If the new instructions refer to subregs but the old instructions did
+ // not, throw away any old live interval so it will be recomputed with
+ // subranges.
+ if (MO.getSubReg() && hasInterval(Reg) &&
+ !getInterval(Reg).hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(Reg))
+ removeInterval(Reg);
+ if (!hasInterval(Reg)) {
+ createAndComputeVirtRegInterval(Reg);
+ // Don't bother to repair a freshly calculated live interval.
+ erase_value(RegsToRepair, Reg);
+ }
}
}
}
- for (Register Reg : OrigRegs) {
+ for (Register Reg : RegsToRepair) {
if (!Reg.isVirtual())
continue;
@@ -1704,6 +1707,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (LiveInterval::SubRange &S : LI.subranges())
repairOldRegInRange(Begin, End, EndIdx, S, Reg, S.LaneMask);
+ LI.removeEmptySubRanges();
repairOldRegInRange(Begin, End, EndIdx, LI, Reg);
}
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index c0c7848139e4..d4848f16dcf2 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -81,22 +81,24 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
SmallVectorImpl<std::pair<MCPhysReg, const MachineOperand*>> &Clobbers) {
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
- if (O->isReg() && !O->isDebug()) {
+ if (O->isReg()) {
+ if (O->isDebug())
+ continue;
Register Reg = O->getReg();
- if (!Register::isPhysicalRegister(Reg))
+ if (!Reg.isPhysical())
continue;
if (O->isDef()) {
// Note, dead defs are still recorded. The caller should decide how to
// handle them.
Clobbers.push_back(std::make_pair(Reg, &*O));
} else {
- if (!O->isKill())
- continue;
assert(O->isUse());
- removeReg(Reg);
+ if (O->isKill())
+ removeReg(Reg);
}
- } else if (O->isRegMask())
+ } else if (O->isRegMask()) {
removeRegsInMask(*O, &Clobbers);
+ }
}
// Add defs to the set.
@@ -250,7 +252,7 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
- for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+ for (const MachineInstr &MI : llvm::reverse(MBB))
LiveRegs.stepBackward(MI);
}
@@ -287,7 +289,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
// Recompute dead flags.
for (MIBundleOperands MO(MI); MO.isValid(); ++MO) {
if (!MO->isReg() || !MO->isDef() || MO->isDebug())
@@ -296,7 +298,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(Register::isPhysicalRegister(Reg));
+ assert(Reg.isPhysical());
bool IsNotLive = LiveRegs.available(MRI, Reg);
@@ -325,7 +327,7 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(Register::isPhysicalRegister(Reg));
+ assert(Reg.isPhysical());
bool IsNotLive = LiveRegs.available(MRI, Reg);
MO->setIsKill(IsNotLive);
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index 64a2dd275643..d91ff734ad8f 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -107,7 +107,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
SlotIndex OrigIdx,
SlotIndex UseIdx) const {
OrigIdx = OrigIdx.getRegSlot(true);
- UseIdx = UseIdx.getRegSlot(true);
+ UseIdx = std::max(UseIdx, UseIdx.getRegSlot(true));
for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = OrigMI->getOperand(i);
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
@@ -305,17 +305,18 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
isOrigDef = SlotIndex::isSameInstr(OrigVNI->def, Idx);
}
+ bool HasLiveVRegUses = false;
+
// Check for live intervals that may shrink
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- if (!MOI->isReg())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MOI->getReg();
+ Register Reg = MO.getReg();
if (!Register::isVirtualRegister(Reg)) {
// Check if MI reads any unreserved physregs.
- if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
+ if (Reg && MO.readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
- else if (MOI->isDef())
+ else if (MO.isDef())
LIS.removePhysRegDefAt(Reg.asMCReg(), Idx);
continue;
}
@@ -325,12 +326,14 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// unlikely to change anything. We typically don't want to shrink the
// PIC base register that has lots of uses everywhere.
// Always shrink COPY uses that probably come from live range splitting.
- if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) ||
- (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI))))
+ if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) ||
+ (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))
ToShrink.insert(&LI);
+ else if (MO.readsReg())
+ HasLiveVRegUses = true;
// Remove defined value.
- if (MOI->isDef()) {
+ if (MO.isDef()) {
if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr)
TheDelegate->LRE_WillShrinkVirtReg(LI.reg());
LIS.removeVRegDefAt(LI, Idx);
@@ -362,7 +365,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// the inst for remat of other siblings. The inst is saved in
// LiveRangeEdit::DeadRemats and will be deleted after all the
// allocations of the func are done.
- if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
+ // However, immediately delete instructions which have unshrunk virtual
+ // register uses. That may provoke RA to split an interval at the KILL
+ // and later result in an invalid live segment end.
+ if (isOrigDef && DeadRemats && !HasLiveVRegUses &&
+ TII.isTriviallyReMaterializable(*MI, AA)) {
LiveInterval &NewLI = createEmptyIntervalFrom(Dest, false);
VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
@@ -405,8 +412,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr *> &Dead,
break;
// Shrink just one live interval. Then delete new dead defs.
- LiveInterval *LI = ToShrink.back();
- ToShrink.pop_back();
+ LiveInterval *LI = ToShrink.pop_back_val();
if (foldAsLoad(LI, Dead))
continue;
unsigned VReg = LI->reg();
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 7181dbc9c870..51ba4b7e53eb 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -119,8 +119,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
while (!WorkList.empty()) {
- MachineBasicBlock *Pred = WorkList.back();
- WorkList.pop_back();
+ MachineBasicBlock *Pred = WorkList.pop_back_val();
MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
}
}
@@ -484,8 +483,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
- Register Reg = Defs.back();
- Defs.pop_back();
+ Register Reg = Defs.pop_back_val();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
@@ -671,6 +669,86 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
return false;
}
+void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
+ assert(Reg.isVirtual());
+
+ VarInfo &VI = getVarInfo(Reg);
+ VI.AliveBlocks.clear();
+ VI.Kills.clear();
+
+ MachineInstr &DefMI = *MRI->getUniqueVRegDef(Reg);
+ MachineBasicBlock &DefBB = *DefMI.getParent();
+
+ // Handle the case where all uses have been removed.
+ if (MRI->use_nodbg_empty(Reg)) {
+ VI.Kills.push_back(&DefMI);
+ DefMI.addRegisterDead(Reg, nullptr);
+ return;
+ }
+ DefMI.clearRegisterDeads(Reg);
+
+ // Initialize a worklist of BBs that Reg is live-to-end of. (Here
+ // "live-to-end" means Reg is live at the end of a block even if it is only
+ // live because of phi uses in a successor. This is different from isLiveOut()
+ // which does not consider phi uses.)
+ SmallVector<MachineBasicBlock *> LiveToEndBlocks;
+ SparseBitVector<> UseBlocks;
+ for (auto &UseMO : MRI->use_nodbg_operands(Reg)) {
+ UseMO.setIsKill(false);
+ MachineInstr &UseMI = *UseMO.getParent();
+ MachineBasicBlock &UseBB = *UseMI.getParent();
+ UseBlocks.set(UseBB.getNumber());
+ if (UseMI.isPHI()) {
+ // If Reg is used in a phi then it is live-to-end of the corresponding
+ // predecessor.
+ unsigned Idx = UseMI.getOperandNo(&UseMO);
+ LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB());
+ } else if (&UseBB == &DefBB) {
+ // A non-phi use in the same BB as the single def must come after the def.
+ } else {
+ // Otherwise Reg must be live-to-end of all predecessors.
+ LiveToEndBlocks.append(UseBB.pred_begin(), UseBB.pred_end());
+ }
+ }
+
+ // Iterate over the worklist adding blocks to AliveBlocks.
+ bool LiveToEndOfDefBB = false;
+ while (!LiveToEndBlocks.empty()) {
+ MachineBasicBlock &BB = *LiveToEndBlocks.pop_back_val();
+ if (&BB == &DefBB) {
+ LiveToEndOfDefBB = true;
+ continue;
+ }
+ if (VI.AliveBlocks.test(BB.getNumber()))
+ continue;
+ VI.AliveBlocks.set(BB.getNumber());
+ LiveToEndBlocks.append(BB.pred_begin(), BB.pred_end());
+ }
+
+ // Recompute kill flags. For each block in which Reg is used but is not
+ // live-through, find the last instruction that uses Reg. Ignore phi nodes
+ // because they should not be included in Kills.
+ for (unsigned UseBBNum : UseBlocks) {
+ if (VI.AliveBlocks.test(UseBBNum))
+ continue;
+ MachineBasicBlock &UseBB = *MF->getBlockNumbered(UseBBNum);
+ if (&UseBB == &DefBB && LiveToEndOfDefBB)
+ continue;
+ for (auto &MI : reverse(UseBB)) {
+ if (MI.isDebugOrPseudoInstr())
+ continue;
+ if (MI.isPHI())
+ break;
+ if (MI.readsRegister(Reg)) {
+ assert(!MI.killsRegister(Reg));
+ MI.addRegisterKilled(Reg, nullptr);
+ VI.Kills.push_back(&MI);
+ break;
+ }
+ }
+ }
+}
+
/// replaceKillInstruction - Update register kill info by replacing a kill
/// instruction with a new one.
void LiveVariables::replaceKillInstruction(Register Reg, MachineInstr &OldMI,
diff --git a/llvm/lib/CodeGen/LoopTraversal.cpp b/llvm/lib/CodeGen/LoopTraversal.cpp
index 9490dfc40a82..0d400253c652 100644
--- a/llvm/lib/CodeGen/LoopTraversal.cpp
+++ b/llvm/lib/CodeGen/LoopTraversal.cpp
@@ -39,8 +39,7 @@ LoopTraversal::TraversalOrder LoopTraversal::traverse(MachineFunction &MF) {
bool Primary = true;
Workqueue.push_back(MBB);
while (!Workqueue.empty()) {
- MachineBasicBlock *ActiveMBB = &*Workqueue.back();
- Workqueue.pop_back();
+ MachineBasicBlock *ActiveMBB = Workqueue.pop_back_val();
bool Done = isBlockDone(ActiveMBB);
MBBTraversalOrder.push_back(TraversedMBBInfo(ActiveMBB, Primary, Done));
for (MachineBasicBlock *Succ : ActiveMBB->successors()) {
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index 62e9c6b629d3..dce64ab9f5ca 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -52,6 +52,16 @@ MVT llvm::getMVTForLLT(LLT Ty) {
Ty.getNumElements());
}
+EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
+ LLVMContext &Ctx) {
+ if (Ty.isVector()) {
+ EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
+ return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
+ }
+
+ return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
+}
+
LLT llvm::getLLTForMVT(MVT Ty) {
if (!Ty.isVector())
return LLT::scalar(Ty.getSizeInBits());
diff --git a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
index 8ef6aca602a1..3ec8c627f131 100644
--- a/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/llvm/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -38,10 +38,6 @@
using namespace llvm;
-namespace llvm {
-extern char &MIRCanonicalizerID;
-} // namespace llvm
-
#define DEBUG_TYPE "mir-canonicalizer"
static cl::opt<unsigned>
@@ -332,8 +328,8 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
continue;
std::vector<MachineOperand *> Uses;
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
- Uses.push_back(&*UI);
+ for (MachineOperand &MO : MRI.use_operands(Dst))
+ Uses.push_back(&MO);
for (auto *MO : Uses)
MO->setReg(Src);
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 87fde7d39a60..0ca820f160aa 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -261,6 +261,8 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("liveout", MIToken::kw_liveout)
.Case("address-taken", MIToken::kw_address_taken)
.Case("landing-pad", MIToken::kw_landing_pad)
+ .Case("inlineasm-br-indirect-target",
+ MIToken::kw_inlineasm_br_indirect_target)
.Case("ehfunclet-entry", MIToken::kw_ehfunclet_entry)
.Case("liveins", MIToken::kw_liveins)
.Case("successors", MIToken::kw_successors)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index 68425b41c3fb..70d17f819ce3 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -116,6 +116,7 @@ struct MIToken {
kw_liveout,
kw_address_taken,
kw_landing_pad,
+ kw_inlineasm_br_indirect_target,
kw_ehfunclet_entry,
kw_liveins,
kw_successors,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 34e1f9225d42..1a04e1ca56a9 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -498,7 +498,7 @@ public:
MachineOperand &Dest,
Optional<unsigned> &TiedDefIdx);
bool parseOffset(int64_t &Offset);
- bool parseAlignment(unsigned &Alignment);
+ bool parseAlignment(uint64_t &Alignment);
bool parseAddrspace(unsigned &Addrspace);
bool parseSectionID(Optional<MBBSectionID> &SID);
bool parseOperandsOffset(MachineOperand &Op);
@@ -674,9 +674,10 @@ bool MIParser::parseBasicBlockDefinition(
lex();
bool HasAddressTaken = false;
bool IsLandingPad = false;
+ bool IsInlineAsmBrIndirectTarget = false;
bool IsEHFuncletEntry = false;
Optional<MBBSectionID> SectionID;
- unsigned Alignment = 0;
+ uint64_t Alignment = 0;
BasicBlock *BB = nullptr;
if (consumeIfPresent(MIToken::lparen)) {
do {
@@ -690,6 +691,10 @@ bool MIParser::parseBasicBlockDefinition(
IsLandingPad = true;
lex();
break;
+ case MIToken::kw_inlineasm_br_indirect_target:
+ IsInlineAsmBrIndirectTarget = true;
+ lex();
+ break;
case MIToken::kw_ehfunclet_entry:
IsEHFuncletEntry = true;
lex();
@@ -737,6 +742,7 @@ bool MIParser::parseBasicBlockDefinition(
if (HasAddressTaken)
MBB->setHasAddressTaken();
MBB->setIsEHPad(IsLandingPad);
+ MBB->setIsInlineAsmBrIndirectTarget(IsInlineAsmBrIndirectTarget);
MBB->setIsEHFuncletEntry(IsEHFuncletEntry);
if (SectionID.hasValue()) {
MBB->setSectionID(SectionID.getValue());
@@ -1011,10 +1017,6 @@ bool MIParser::parse(MachineInstr *&MI) {
Optional<unsigned> TiedDefIdx;
if (parseMachineOperandAndTargetFlags(OpCode, Operands.size(), MO, TiedDefIdx))
return true;
- if ((OpCode == TargetOpcode::DBG_VALUE ||
- OpCode == TargetOpcode::DBG_VALUE_LIST) &&
- MO.isReg())
- MO.setIsDebug();
Operands.push_back(
ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -2898,16 +2900,16 @@ bool MIParser::parseOffset(int64_t &Offset) {
return false;
}
-bool MIParser::parseAlignment(unsigned &Alignment) {
+bool MIParser::parseAlignment(uint64_t &Alignment) {
assert(Token.is(MIToken::kw_align) || Token.is(MIToken::kw_basealign));
lex();
if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
return error("expected an integer literal after 'align'");
- if (getUnsigned(Alignment))
+ if (getUint64(Alignment))
return true;
lex();
- if (!isPowerOf2_32(Alignment))
+ if (!isPowerOf2_64(Alignment))
return error("expected a power-of-2 literal after 'align'");
return false;
@@ -3261,7 +3263,7 @@ bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
if (parseMachinePointerInfo(Ptr))
return true;
}
- unsigned BaseAlignment =
+ uint64_t BaseAlignment =
(Size != MemoryLocation::UnknownSize ? PowerOf2Ceil(Size) : 1);
AAMDNodes AAInfo;
MDNode *Range = nullptr;
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index d77104752880..6221b5929301 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -454,6 +454,9 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MF.getProperties().set(MachineFunctionProperties::Property::Selected);
if (YamlMF.FailedISel)
MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+ if (YamlMF.FailsVerification)
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
if (parseRegisterInfo(PFS, YamlMF))
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 2a78bb62762a..f1369396e37f 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -217,6 +217,8 @@ void MIRPrinter::print(const MachineFunction &MF) {
MachineFunctionProperties::Property::Selected);
YamlMF.FailedISel = MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel);
+ YamlMF.FailsVerification = MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification);
convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
MachineModuleSlotTracker MST(&MF);
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
new file mode 100644
index 000000000000..90ecc6fc68fc
--- /dev/null
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -0,0 +1,343 @@
+//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MIRSampleProfile loader, mainly
+// for flow sensitive SampleFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRSampleProfile.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace llvm::sampleprofutil;
+using ProfileCount = Function::ProfileCount;
+
+#define DEBUG_TYPE "fs-profile-loader"
+
+static cl::opt<bool> ShowFSBranchProb(
+ "show-fs-branchprob", cl::Hidden, cl::init(false),
+ cl::desc("Print setting flow sensitive branch probabilities"));
+static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
+ "fs-profile-debug-prob-diff-threshold", cl::init(10),
+ cl::desc("Only show debug message if the branch probility is greater than "
+ "this value (in percentage)."));
+
+static cl::opt<unsigned> FSProfileDebugBWThreshold(
+ "fs-profile-debug-bw-threshold", cl::init(10000),
+ cl::desc("Only show debug message if the source branch weight is greater "
+ " than this value."));
+
+static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI before MIR loader"));
+static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
+ cl::init(false),
+ cl::desc("View BFI after MIR loader"));
+
+char MIRProfileLoaderPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
+ "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
+ /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
+
+FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
+ std::string RemappingFile,
+ FSDiscriminatorPass P) {
+ return new MIRProfileLoaderPass(File, RemappingFile, P);
+}
+
+namespace llvm {
+
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi={none | fraction | integer | count}
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+namespace afdo_detail {
+template <> struct IRTraits<MachineBasicBlock> {
+ using InstructionT = MachineInstr;
+ using BasicBlockT = MachineBasicBlock;
+ using FunctionT = MachineFunction;
+ using BlockFrequencyInfoT = MachineBlockFrequencyInfo;
+ using LoopT = MachineLoop;
+ using LoopInfoPtrT = MachineLoopInfo *;
+ using DominatorTreePtrT = MachineDominatorTree *;
+ using PostDominatorTreePtrT = MachinePostDominatorTree *;
+ using PostDominatorTreeT = MachinePostDominatorTree;
+ using OptRemarkEmitterT = MachineOptimizationRemarkEmitter;
+ using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis;
+ using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+ static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
+ static const MachineBasicBlock *getEntryBB(const MachineFunction *F) {
+ return GraphTraits<const MachineFunction *>::getEntryNode(F);
+ }
+ static PredRangeT getPredecessors(MachineBasicBlock *BB) {
+ return BB->predecessors();
+ }
+ static SuccRangeT getSuccessors(MachineBasicBlock *BB) {
+ return BB->successors();
+ }
+};
+} // namespace afdo_detail
+
+class MIRProfileLoader final
+ : public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
+public:
+ void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
+ MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
+ MachineOptimizationRemarkEmitter *MORE) {
+ DT = MDT;
+ PDT = MPDT;
+ LI = MLI;
+ BFI = MBFI;
+ ORE = MORE;
+ }
+ void setFSPass(FSDiscriminatorPass Pass) {
+ P = Pass;
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+ }
+
+ MIRProfileLoader(StringRef Name, StringRef RemapName)
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
+ }
+
+ void setBranchProbs(MachineFunction &F);
+ bool runOnFunction(MachineFunction &F);
+ bool doInitialization(Module &M);
+ bool isValid() const { return ProfileIsValid; }
+
+protected:
+ friend class SampleCoverageTracker;
+
+ /// Hold the information of the basic block frequency.
+ MachineBlockFrequencyInfo *BFI;
+
+ /// PassNum is the sequence number this pass is called, start from 1.
+ FSDiscriminatorPass P;
+
+ // LowBit in the FS discriminator used by this instance. Note the number is
+ // 0-based. Base discrimnator use bit 0 to bit 11.
+ unsigned LowBit;
+ // HighwBit in the FS discriminator used by this instance. Note the number
+ // is 0-based.
+ unsigned HighBit;
+
+ bool ProfileIsValid = true;
+};
+
+template <>
+void SampleProfileLoaderBaseImpl<
+ MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
+
+void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
+ LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
+ for (auto &BI : F) {
+ MachineBasicBlock *BB = &BI;
+ if (BB->succ_size() < 2)
+ continue;
+ const MachineBasicBlock *EC = EquivalenceClass[BB];
+ uint64_t BBWeight = BlockWeights[EC];
+ uint64_t SumEdgeWeight = 0;
+ for (MachineBasicBlock *Succ : BB->successors()) {
+ Edge E = std::make_pair(BB, Succ);
+ SumEdgeWeight += EdgeWeights[E];
+ }
+
+ if (BBWeight != SumEdgeWeight) {
+ LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
+ << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
+ << "\n");
+ BBWeight = SumEdgeWeight;
+ }
+ if (BBWeight == 0) {
+ LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+ continue;
+ }
+
+#ifndef NDEBUG
+ uint64_t BBWeightOrig = BBWeight;
+#endif
+ uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
+ uint32_t Factor = 1;
+ if (BBWeight > MaxWeight) {
+ Factor = BBWeight / MaxWeight + 1;
+ BBWeight /= Factor;
+ LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
+ }
+
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end();
+ SI != SE; ++SI) {
+ MachineBasicBlock *Succ = *SI;
+ Edge E = std::make_pair(BB, Succ);
+ uint64_t EdgeWeight = EdgeWeights[E];
+ EdgeWeight /= Factor;
+
+ assert(BBWeight >= EdgeWeight &&
+ "BBweight is larger than EdgeWeight -- should not happen.\n");
+
+ BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
+ BranchProbability NewProb(EdgeWeight, BBWeight);
+ if (OldProb == NewProb)
+ continue;
+ BB->setSuccProbability(SI, NewProb);
+#ifndef NDEBUG
+ if (!ShowFSBranchProb)
+ continue;
+ bool Show = false;
+ BranchProbability Diff;
+ if (OldProb > NewProb)
+ Diff = OldProb - NewProb;
+ else
+ Diff = NewProb - OldProb;
+ Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100));
+ Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
+
+ auto DIL = BB->findBranchDebugLoc();
+ auto SuccDIL = Succ->findBranchDebugLoc();
+ if (Show) {
+ dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
+ << Succ->getNumber() << "): ";
+ if (DIL)
+ dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+ << DIL->getColumn();
+ if (SuccDIL)
+ dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
+ << ":" << SuccDIL->getColumn();
+ dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb
+ << "\n";
+ }
+#endif
+ }
+ }
+}
+
+bool MIRProfileLoader::doInitialization(Module &M) {
+ auto &Ctx = M.getContext();
+
+ auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
+ RemappingFilename);
+ if (std::error_code EC = ReaderOrErr.getError()) {
+ std::string Msg = "Could not open profile: " + EC.message();
+ Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+ return false;
+ }
+
+ Reader = std::move(ReaderOrErr.get());
+ Reader->setModule(&M);
+ ProfileIsValid = (Reader->read() == sampleprof_error::success);
+ Reader->getSummary();
+
+ return true;
+}
+
+bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+ Function &Func = MF.getFunction();
+ clearFunctionData(false);
+ Samples = Reader->getSamplesFor(Func);
+ if (!Samples || Samples->empty())
+ return false;
+
+ if (getFunctionLoc(MF) == 0)
+ return false;
+
+ DenseSet<GlobalValue::GUID> InlinedGUIDs;
+ bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
+
+ // Set the new BPI, BFI.
+ setBranchProbs(MF);
+
+ return Changed;
+}
+
+} // namespace llvm
+
+MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName,
+ std::string RemappingFileName,
+ FSDiscriminatorPass P)
+ : MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
+ MIRSampleLoader(
+ std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
+ LowBit = getFSPassBitBegin(P);
+ HighBit = getFSPassBitEnd(P);
+ assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+}
+
+bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
+ if (!MIRSampleLoader->isValid())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
+ << MF.getFunction().getName() << "\n");
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ MIRSampleLoader->setInitVals(
+ &getAnalysis<MachineDominatorTree>(),
+ &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
+ MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
+
+ MF.RenumberBlocks();
+ if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
+ }
+
+ bool Changed = MIRSampleLoader->runOnFunction(MF);
+
+ if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
+ (ViewBlockFreqFuncName.empty() ||
+ MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+ MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
+ }
+
+ return Changed;
+}
+
+bool MIRProfileLoaderPass::doInitialization(Module &M) {
+ LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
+ << "\n");
+
+ MIRSampleLoader->setFSPass(P);
+ return MIRSampleLoader->doInitialization(M);
+}
+
+void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachinePostDominatorTree>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index c6914dcd0e54..23c511aaa056 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -134,9 +134,8 @@ void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
// Make sure the instructions have their operands in the reginfo lists.
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- for (MachineBasicBlock::instr_iterator
- I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
- I->AddRegOperandsToUseLists(RegInfo);
+ for (MachineInstr &MI : N->instrs())
+ MI.AddRegOperandsToUseLists(RegInfo);
}
void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
@@ -281,8 +280,8 @@ MachineBasicBlock::getLastNonDebugInstr(bool SkipPseudoOp) {
}
bool MachineBasicBlock::hasEHPadSuccessor() const {
- for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
- if ((*I)->isEHPad())
+ for (const MachineBasicBlock *Succ : successors())
+ if (Succ->isEHPad())
return true;
return false;
}
@@ -517,6 +516,11 @@ void MachineBasicBlock::printName(raw_ostream &os, unsigned printNameFlags,
os << "landing-pad";
hasAttributes = true;
}
+ if (isInlineAsmBrIndirectTarget()) {
+ os << (hasAttributes ? ", " : " (");
+ os << "inlineasm-br-indirect-target";
+ hasAttributes = true;
+ }
if (isEHFuncletEntry()) {
os << (hasAttributes ? ", " : " (");
os << "ehfunclet-entry";
@@ -1037,17 +1041,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
I != E; ++I) {
MachineInstr *MI = &*I;
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || OI->getReg() == 0 ||
- !OI->isUse() || !OI->isKill() || OI->isUndef())
+ for (MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() ||
+ MO.isUndef())
continue;
- Register Reg = OI->getReg();
+ Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(*MI)) {
KilledRegs.push_back(Reg);
- LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI);
- OI->setIsKill(false);
+ LLVM_DEBUG(dbgs() << "Removing terminator kill: " << MI);
+ MO.setIsKill(false);
}
}
}
@@ -1058,12 +1061,11 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
I != E; ++I) {
MachineInstr *MI = &*I;
- for (MachineInstr::mop_iterator OI = MI->operands_begin(),
- OE = MI->operands_end(); OI != OE; ++OI) {
- if (!OI->isReg() || OI->getReg() == 0)
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || MO.getReg() == 0)
continue;
- Register Reg = OI->getReg();
+ Register Reg = MO.getReg();
if (!is_contained(UsedRegs, Reg))
UsedRegs.push_back(Reg);
}
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index f61142d202eb..8a1b4031642d 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1185,7 +1185,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// The integrated tail duplication is really designed for increasing
// fallthrough from predecessors from Succ to its successors. We may need
// other machanism to handle different cases.
- if (Succ->succ_size() == 0)
+ if (Succ->succ_empty())
return true;
// Plus the already placed predecessor.
@@ -2050,6 +2050,8 @@ MachineBlockPlacement::findBestLoopTopHelper(
BlockChain &HeaderChain = *BlockToChain[OldTop];
if (!LoopBlockSet.count(*HeaderChain.begin()))
return OldTop;
+ if (OldTop != *HeaderChain.begin())
+ return OldTop;
LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
<< "\n");
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index cb2e18e8c813..0fcb07252d0e 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -514,41 +514,38 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
SmallVector<unsigned, 2> ImplicitDefsToUpdate;
SmallVector<unsigned, 2> ImplicitDefs;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
-
- if (!isCSECandidate(MI))
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!isCSECandidate(&MI))
continue;
- bool FoundCSE = VNT.count(MI);
+ bool FoundCSE = VNT.count(&MI);
if (!FoundCSE) {
// Using trivial copy propagation to find more CSE opportunities.
- if (PerformTrivialCopyPropagation(MI, MBB)) {
+ if (PerformTrivialCopyPropagation(&MI, MBB)) {
Changed = true;
// After coalescing MI itself may become a copy.
- if (MI->isCopyLike())
+ if (MI.isCopyLike())
continue;
// Try again to see if CSE is possible.
- FoundCSE = VNT.count(MI);
+ FoundCSE = VNT.count(&MI);
}
}
// Commute commutable instructions.
bool Commuted = false;
- if (!FoundCSE && MI->isCommutable()) {
- if (MachineInstr *NewMI = TII->commuteInstruction(*MI)) {
+ if (!FoundCSE && MI.isCommutable()) {
+ if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
Commuted = true;
FoundCSE = VNT.count(NewMI);
- if (NewMI != MI) {
+ if (NewMI != &MI) {
// New instruction. It doesn't need to be kept.
NewMI->eraseFromParent();
Changed = true;
} else if (!FoundCSE)
// MI was changed but it didn't help, commute it back!
- (void)TII->commuteInstruction(*MI);
+ (void)TII->commuteInstruction(MI);
}
}
@@ -559,8 +556,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
SmallSet<MCRegister, 8> PhysRefs;
PhysDefVector PhysDefs;
bool PhysUseDef = false;
- if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
- PhysDefs, PhysUseDef)) {
+ if (FoundCSE &&
+ hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
FoundCSE = false;
// ... Unless the CS is local or is in the sole predecessor block
@@ -569,23 +566,23 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// This can never be the case if the instruction both uses and
// defines the same physical register, which was detected above.
if (!PhysUseDef) {
- unsigned CSVN = VNT.lookup(MI);
+ unsigned CSVN = VNT.lookup(&MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
+ if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
FoundCSE = true;
}
}
if (!FoundCSE) {
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
continue;
}
// Found a common subexpression, eliminate it.
- unsigned CSVN = VNT.lookup(MI);
+ unsigned CSVN = VNT.lookup(&MI);
MachineInstr *CSMI = Exps[CSVN];
- LLVM_DEBUG(dbgs() << "Examining: " << *MI);
+ LLVM_DEBUG(dbgs() << "Examining: " << MI);
LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
// Prevent CSE-ing non-local convergent instructions.
@@ -597,20 +594,20 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// definition, so it's necessary to use `isConvergent` to prevent illegally
// CSE-ing the subset of `isConvergent` instructions which do fall into this
// extended definition.
- if (MI->isConvergent() && MI->getParent() != CSMI->getParent()) {
+ if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
"different BBs, avoid CSE!\n");
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
continue;
}
// Check if it's profitable to perform this CSE.
bool DoCSE = true;
- unsigned NumDefs = MI->getNumDefs();
+ unsigned NumDefs = MI.getNumDefs();
- for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
Register OldReg = MO.getReg();
@@ -635,7 +632,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
Register::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
- if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
@@ -674,7 +671,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
for (const auto &PhysDef : PhysDefs)
- if (!MI->getOperand(PhysDef.first).isDead())
+ if (!MI.getOperand(PhysDef.first).isDead())
CSMI->getOperand(PhysDef.first).setIsDead(false);
// Go through implicit defs of CSMI and MI, and clear the kill flags on
@@ -687,8 +684,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
// Since we eliminated MI, and reused a register imp-def'd by CSMI
// (here %nzcv), that register, if it was killed before MI, should have
// that kill flag removed, because it's lifetime was extended.
- if (CSMI->getParent() == MI->getParent()) {
- for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II)
+ if (CSMI->getParent() == MI.getParent()) {
+ for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
for (auto ImplicitDef : ImplicitDefs)
if (MachineOperand *MO = II->findRegisterUseOperand(
ImplicitDef, /*isKill=*/true, TRI))
@@ -711,7 +708,7 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
++NumCrossBBCSEs;
}
- MI->eraseFromParent();
+ MI.eraseFromParent();
++NumCSEs;
if (!PhysRefs.empty())
++NumPhysCSEs;
@@ -719,8 +716,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
++NumCommutes;
Changed = true;
} else {
- VNT.insert(MI, CurrVN++);
- Exps.push_back(MI);
+ VNT.insert(&MI, CurrVN++);
+ Exps.push_back(&MI);
}
CSEPairs.clear();
ImplicitDefsToUpdate.clear();
@@ -807,19 +804,16 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {
bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
MachineBasicBlock *MBB) {
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
- if (!isPRECandidate(MI))
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!isPRECandidate(&MI))
continue;
- if (!PREMap.count(MI)) {
- PREMap[MI] = MBB;
+ if (!PREMap.count(&MI)) {
+ PREMap[&MI] = MBB;
continue;
}
- auto MBB1 = PREMap[MI];
+ auto MBB1 = PREMap[&MI];
assert(
!DT->properlyDominates(MBB, MBB1) &&
"MBB cannot properly dominate MBB1 while DFS through dominators tree!");
@@ -844,17 +838,17 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
// it's necessary to use `isConvergent` to prevent illegally PRE-ing the
// subset of `isConvergent` instructions which do fall into this
// extended definition.
- if (MI->isConvergent() && CMBB != MBB)
+ if (MI.isConvergent() && CMBB != MBB)
continue;
- assert(MI->getOperand(0).isDef() &&
+ assert(MI.getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
- Register VReg = MI->getOperand(0).getReg();
+ Register VReg = MI.getOperand(0).getReg();
Register NewReg = MRI->cloneVirtualRegister(VReg);
- if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
+ if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
continue;
MachineInstr &NewMI =
- TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+ TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
// When hoisting, make sure we don't carry the debug location of
// the original instruction, as that's not correct and can cause
@@ -864,7 +858,7 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
NewMI.getOperand(0).setReg(NewReg);
- PREMap[MI] = CMBB;
+ PREMap[&MI] = CMBB;
++NumPREs;
Changed = true;
}
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 10b74f5f47f5..7c83bacd80d9 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -414,6 +414,31 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
if (!UseI.isCopy())
return false;
+ const TargetRegisterClass *CopySrcRC =
+ TRI->getMinimalPhysRegClass(CopySrcReg);
+ const TargetRegisterClass *UseDstRC =
+ TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
+ const TargetRegisterClass *CrossCopyRC = TRI->getCrossCopyRegClass(CopySrcRC);
+
+ // If cross copy register class is not the same as copy source register class
+ // then it is not possible to copy the register directly and requires a cross
+ // register class copy. Fowarding this copy without checking register class of
+ // UseDst may create additional cross register copies when expanding the copy
+ // instruction in later passes.
+ if (CopySrcRC != CrossCopyRC) {
+ const TargetRegisterClass *CopyDstRC =
+ TRI->getMinimalPhysRegClass(Copy.getOperand(0).getReg());
+
+ // Check if UseDstRC matches the necessary register class to copy from
+ // CopySrc's register class. If so then forwarding the copy will not
+ // introduce any cross-class copys. Else if CopyDstRC matches then keep the
+ // copy and do not forward. If neither UseDstRC or CopyDstRC matches then
+ // we may need a cross register copy later but we do not worry about it
+ // here.
+ if (UseDstRC != CrossCopyRC && CopyDstRC == CrossCopyRC)
+ return false;
+ }
+
/// COPYs don't have register class constraints, so if the user instruction
/// is a COPY, we just try to avoid introducing additional cross-class
/// COPYs. For example:
@@ -430,9 +455,6 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
///
/// so we have reduced the number of cross-class COPYs and potentially
/// introduced a nop COPY that can be removed.
- const TargetRegisterClass *UseDstRC =
- TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg());
-
const TargetRegisterClass *SuperRC = UseDstRC;
for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses();
SuperRC; SuperRC = *SuperRCI++)
@@ -554,6 +576,7 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
MOUse.setReg(CopySrcReg);
if (!CopySrc.isRenamable())
MOUse.setIsRenamable(false);
+ MOUse.setIsUndef(CopySrc.isUndef());
LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n");
@@ -571,19 +594,16 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName()
<< "\n");
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
// Analyze copies (which don't overlap themselves).
- if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
- MI->getOperand(1).getReg())) {
- assert(MI->getOperand(0).getReg().isPhysical() &&
- MI->getOperand(1).getReg().isPhysical() &&
+ if (MI.isCopy() && !TRI->regsOverlap(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg())) {
+ assert(MI.getOperand(0).getReg().isPhysical() &&
+ MI.getOperand(1).getReg().isPhysical() &&
"MachineCopyPropagation should be run after register allocation!");
- MCRegister Def = MI->getOperand(0).getReg().asMCReg();
- MCRegister Src = MI->getOperand(1).getReg().asMCReg();
+ MCRegister Def = MI.getOperand(0).getReg().asMCReg();
+ MCRegister Src = MI.getOperand(1).getReg().asMCReg();
// The two copies cancel out and the source of the first copy
// hasn't been overridden, eliminate the second one. e.g.
@@ -600,31 +620,31 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// %ecx = COPY %eax
// =>
// %ecx = COPY %eax
- if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def))
+ if (eraseIfRedundant(MI, Def, Src) || eraseIfRedundant(MI, Src, Def))
continue;
- forwardUses(*MI);
+ forwardUses(MI);
// Src may have been changed by forwardUses()
- Src = MI->getOperand(1).getReg().asMCReg();
+ Src = MI.getOperand(1).getReg().asMCReg();
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
- ReadRegister(Src, *MI, RegularUse);
- for (const MachineOperand &MO : MI->implicit_operands()) {
+ ReadRegister(Src, MI, RegularUse);
+ for (const MachineOperand &MO : MI.implicit_operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
MCRegister Reg = MO.getReg().asMCReg();
if (!Reg)
continue;
- ReadRegister(Reg, *MI, RegularUse);
+ ReadRegister(Reg, MI, RegularUse);
}
- LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
+ LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI.dump());
// Copy is now a candidate for deletion.
if (!MRI->isReserved(Def))
- MaybeDeadCopies.insert(MI);
+ MaybeDeadCopies.insert(&MI);
// If 'Def' is previously source of another copy, then this earlier copy's
// source is no longer available. e.g.
@@ -634,7 +654,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
// ...
// %xmm2 = copy %xmm9
Tracker.clobberRegister(Def, *TRI);
- for (const MachineOperand &MO : MI->implicit_operands()) {
+ for (const MachineOperand &MO : MI.implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
MCRegister Reg = MO.getReg().asMCReg();
@@ -643,29 +663,29 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clobberRegister(Reg, *TRI);
}
- Tracker.trackCopy(MI, *TRI);
+ Tracker.trackCopy(&MI, *TRI);
continue;
}
// Clobber any earlyclobber regs first.
- for (const MachineOperand &MO : MI->operands())
+ for (const MachineOperand &MO : MI.operands())
if (MO.isReg() && MO.isEarlyClobber()) {
MCRegister Reg = MO.getReg().asMCReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
if (MO.isTied())
- ReadRegister(Reg, *MI, RegularUse);
+ ReadRegister(Reg, MI, RegularUse);
Tracker.clobberRegister(Reg, *TRI);
}
- forwardUses(*MI);
+ forwardUses(MI);
// Not a copy.
SmallVector<Register, 2> Defs;
const MachineOperand *RegMask = nullptr;
- for (const MachineOperand &MO : MI->operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
if (MO.isRegMask())
RegMask = &MO;
if (!MO.isReg())
@@ -681,7 +701,7 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Defs.push_back(Reg.asMCReg());
continue;
} else if (MO.readsReg())
- ReadRegister(Reg.asMCReg(), *MI, MO.isDebug() ? DebugUse : RegularUse);
+ ReadRegister(Reg.asMCReg(), MI, MO.isDebug() ? DebugUse : RegularUse);
}
// The instruction has a register mask operand which means that it clobbers
diff --git a/llvm/lib/CodeGen/MachineDominators.cpp b/llvm/lib/CodeGen/MachineDominators.cpp
index c8845d838282..28cff2a4f3f3 100644
--- a/llvm/lib/CodeGen/MachineDominators.cpp
+++ b/llvm/lib/CodeGen/MachineDominators.cpp
@@ -73,7 +73,7 @@ void MachineDominatorTree::releaseMemory() {
void MachineDominatorTree::verifyAnalysis() const {
if (DT && VerifyMachineDomInfo)
- if (!DT->verify(DomTreeT::VerificationLevel::Basic)) {
+ if (!DT->verify(MachineDomTree::VerificationLevel::Basic)) {
errs() << "MachineDominatorTree verification failed\n";
abort();
}
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 0a454b68aca3..366d06871245 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -99,6 +99,7 @@ static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
case P::Selected: return "Selected";
case P::TracksLiveness: return "TracksLiveness";
case P::TiedOpsRewritten: return "TiedOpsRewritten";
+ case P::FailsVerification: return "FailsVerification";
}
llvm_unreachable("Invalid machine function property");
}
@@ -129,8 +130,8 @@ void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
static inline unsigned getFnStackAlignment(const TargetSubtargetInfo *STI,
const Function &F) {
- if (F.hasFnAttribute(Attribute::StackAlignment))
- return F.getFnStackAlignment();
+ if (auto MA = F.getFnStackAlign())
+ return MA->value();
return STI->getFrameLowering()->getStackAlign().value();
}
@@ -745,9 +746,8 @@ MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
// Add filters in a list.
auto *CVal = cast<Constant>(Val);
SmallVector<const GlobalValue *, 4> FilterList;
- for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
- II != IE; ++II)
- FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+ for (const Use &U : CVal->operands())
+ FilterList.push_back(cast<GlobalValue>(U->stripPointerCasts()));
addFilterTypeInfo(LandingPad, FilterList);
}
@@ -973,6 +973,9 @@ void MachineFunction::makeDebugValueSubstitution(DebugInstrOperandPair A,
unsigned Subreg) {
// Catch any accidental self-loops.
assert(A.first != B.first);
+ // Don't allow any substitutions _from_ the memory operand number.
+ assert(A.second != DebugOperandMemNumber);
+
DebugValueSubstitutions.push_back({A, B, Subreg});
}
@@ -1148,17 +1151,17 @@ auto MachineFunction::salvageCopySSA(MachineInstr &MI)
// locations.
;
} else {
- // Assert that this is the entry block. If it isn't, then there is some
- // code construct we don't recognise that deals with physregs across
- // blocks.
+ // Assert that this is the entry block, or an EH pad. If it isn't, then
+ // there is some code construct we don't recognise that deals with physregs
+ // across blocks.
assert(!State.first.isVirtual());
- assert(&*InsertBB.getParent()->begin() == &InsertBB);
+ assert(&*InsertBB.getParent()->begin() == &InsertBB || InsertBB.isEHPad());
}
// Create DBG_PHI for specified physreg.
auto Builder = BuildMI(InsertBB, InsertBB.getFirstNonPHI(), DebugLoc(),
TII.get(TargetOpcode::DBG_PHI));
- Builder.addReg(State.first, RegState::Debug);
+ Builder.addReg(State.first);
unsigned NewNum = getNewDebugInstrNum();
Builder.addImm(NewNum);
return ApplySubregisters({NewNum, 0u});
@@ -1171,10 +1174,9 @@ void MachineFunction::finalizeDebugInstrRefs() {
const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_VALUE);
MI.setDesc(RefII);
MI.getOperand(1).ChangeToRegister(0, false);
- MI.getOperand(0).setIsDebug();
};
- if (!getTarget().Options.ValueTrackingVariableLocations)
+ if (!useDebugInstrRef())
return;
for (auto &MBB : *this) {
@@ -1221,6 +1223,27 @@ void MachineFunction::finalizeDebugInstrRefs() {
}
}
+bool MachineFunction::useDebugInstrRef() const {
+ // Disable instr-ref at -O0: it's very slow (in compile time). We can still
+ // have optimized code inlined into this unoptimized code, however with
+ // fewer and less aggressive optimizations happening, coverage and accuracy
+ // should not suffer.
+ if (getTarget().getOptLevel() == CodeGenOpt::None)
+ return false;
+
+ // Don't use instr-ref if this function is marked optnone.
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return false;
+
+ if (getTarget().Options.ValueTrackingVariableLocations)
+ return true;
+
+ return false;
+}
+
+// Use one million as a high / reserved number.
+const unsigned MachineFunction::DebugOperandMemNumber = 1000000;
+
/// \}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 0707945e7fb7..5c4f75e9ceb9 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -294,6 +294,9 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
NewMO->setIsEarlyClobber(true);
}
+ // Ensure debug instructions set debug flag on register uses.
+ if (NewMO->isUse() && isDebugInstr())
+ NewMO->setIsDebug();
}
}
@@ -2111,11 +2114,11 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- auto MIB = BuildMI(MF, DL, MCID).addReg(Reg, RegState::Debug);
+ auto MIB = BuildMI(MF, DL, MCID).addReg(Reg);
if (IsIndirect)
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Variable).addMetadata(Expr);
}
@@ -2134,7 +2137,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
if (IsIndirect)
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Variable).addMetadata(Expr);
}
@@ -2153,7 +2156,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
MIB.addMetadata(Variable).addMetadata(Expr);
for (const MachineOperand &MO : MOs)
if (MO.isReg())
- MIB.addReg(MO.getReg(), RegState::Debug);
+ MIB.addReg(MO.getReg());
else
MIB.add(MO);
return MIB;
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 883299c452b7..500cf8e0b79b 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -230,6 +230,9 @@ namespace {
bool IsGuaranteedToExecute(MachineBasicBlock *BB);
+ bool isTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const;
+
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
@@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
return true;
}
+/// Check if \p MI is trivially remateralizable and if it does not have any
+/// virtual register uses. Even though rematerializable RA might not actually
+/// rematerialize it in this scenario. In that case we do not want to hoist such
+/// instruction out of the loop in a belief RA will sink it back if needed.
+bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI,
+ AAResults *AA) const {
+ if (!TII->isTriviallyReMaterializable(MI, AA))
+ return false;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ return false;
+ }
+
+ return true;
+}
+
void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
@@ -761,15 +781,11 @@ void MachineLICMBase::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
// Process the block
SpeculationState = SpeculateUnknown;
- for (MachineBasicBlock::iterator
- MII = MBB->begin(), E = MBB->end(); MII != E; ) {
- MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- MachineInstr *MI = &*MII;
- if (!Hoist(MI, Preheader))
- UpdateRegPressure(MI);
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
+ if (!Hoist(&MI, Preheader))
+ UpdateRegPressure(&MI);
// If we have hoisted an instruction that may store, it can only be a
// constant store.
- MII = NextMII;
}
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
@@ -1156,9 +1172,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
return false;
}
- // Rematerializable instructions should always be hoisted since the register
- // allocator can just pull them down again when needed.
- if (TII->isTriviallyReMaterializable(MI, AA))
+ // Rematerializable instructions should always be hoisted providing the
+ // register allocator can just pull them down again when needed.
+ if (isTriviallyReMaterializable(MI, AA))
return true;
// FIXME: If there are long latency loop-invariant instructions inside the
@@ -1211,7 +1227,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
- if (!TII->isTriviallyReMaterializable(MI, AA) &&
+ if (!isTriviallyReMaterializable(MI, AA) &&
!MI.isDereferenceableInvariantLoad(AA)) {
LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
return false;
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 8f91a5b698d0..9b96bc5e5e7f 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/InitializePasses.h"
@@ -154,7 +155,9 @@ MachineLoopInfo::findLoopPreheader(MachineLoop *L, bool SpeculativePreheader,
bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
MachineFunction *MF = I.getParent()->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ const TargetSubtargetInfo &ST = MF->getSubtarget();
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+ const TargetInstrInfo *TII = ST.getInstrInfo();
// The instruction is loop invariant if all of its operands are.
for (const MachineOperand &MO : I.operands()) {
@@ -174,7 +177,8 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I) const {
// However, if the physreg is known to always be caller saved/restored
// then this use is safe to hoist.
if (!MRI->isConstantPhysReg(Reg) &&
- !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())))
+ !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
+ !TII->isIgnorableUse(MO))
return false;
// Otherwise it's safe to move.
continue;
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index b8ba0453d24c..4d080e1a4f82 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -250,6 +250,11 @@ void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp,
if (RegInfo && WasReg)
RegInfo->removeRegOperandFromUseList(this);
+ // Ensure debug instructions set debug flag on register uses.
+ const MachineInstr *MI = getParent();
+ if (!isDef && MI && MI->isDebugInstr())
+ isDebug = true;
+
// Change this to a register and set the reg#.
assert(!(isDead && !isDef) && "Dead flag on non-def");
assert(!(isKill && isDef) && "Kill flag on def");
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 1d55bd00e033..cfbccebaff3e 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -798,6 +798,7 @@ bool MachineOutliner::outline(Module &M,
Last = std::next(CallInst.getReverse());
Iter != Last; Iter++) {
MachineInstr *MI = &*Iter;
+ SmallSet<Register, 2> InstrUseRegs;
for (MachineOperand &MOP : MI->operands()) {
// Skip over anything that isn't a register.
if (!MOP.isReg())
@@ -806,7 +807,8 @@ bool MachineOutliner::outline(Module &M,
if (MOP.isDef()) {
// Introduce DefRegs set to skip the redundant register.
DefRegs.insert(MOP.getReg());
- if (!MOP.isDead() && UseRegs.count(MOP.getReg()))
+ if (UseRegs.count(MOP.getReg()) &&
+ !InstrUseRegs.count(MOP.getReg()))
// Since the regiester is modeled as defined,
// it is not necessary to be put in use register set.
UseRegs.erase(MOP.getReg());
@@ -814,6 +816,7 @@ bool MachineOutliner::outline(Module &M,
// Any register which is not undefined should
// be put in the use register set.
UseRegs.insert(MOP.getReg());
+ InstrUseRegs.insert(MOP.getReg());
}
}
if (MI->isCandidateForCallSiteEntry())
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index caa3f8049aeb..e18318386def 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -200,8 +200,7 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
if (!EnableSWP)
return false;
- if (mf.getFunction().getAttributes().hasAttribute(
- AttributeList::FunctionIndex, Attribute::OptimizeForSize) &&
+ if (mf.getFunction().getAttributes().hasFnAttr(Attribute::OptimizeForSize) &&
!EnableSWPOptSize.getPosition())
return false;
@@ -386,7 +385,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
MachineRegisterInfo &MRI = MF->getRegInfo();
SlotIndexes &Slots = *getAnalysis<LiveIntervals>().getSlotIndexes();
- for (MachineInstr &PI : make_range(B.begin(), B.getFirstNonPHI())) {
+ for (MachineInstr &PI : B.phis()) {
MachineOperand &DefOp = PI.getOperand(0);
assert(DefOp.getSubReg() == 0);
auto *RC = MRI.getRegClass(DefOp.getReg());
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 3f6b11e072b4..19bf87d3e290 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -383,9 +383,7 @@ void MachineRegisterInfo::replaceRegWith(Register FromReg, Register ToReg) {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
// TODO: This could be more efficient by bulk changing the operands.
- for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O : llvm::make_early_inc_range(reg_operands(FromReg))) {
if (Register::isPhysicalRegister(ToReg)) {
O.substPhysReg(ToReg, *TRI);
} else {
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 4f42a2c8aeff..47d40f0823c8 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -583,7 +583,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
<< " " << MBB->getName() << "\n From: " << *I
<< " To: ";
if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
- else dbgs() << "End";
+ else dbgs() << "End\n";
dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
if (DumpCriticalPathLength) {
errs() << MF->getName();
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index ec98394dca79..30745c7a5583 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -131,7 +131,7 @@ namespace {
// will be split.
SetVector<std::pair<MachineBasicBlock *, MachineBasicBlock *>> ToSplit;
- SparseBitVector<> RegsToClearKillFlags;
+ DenseSet<Register> RegsToClearKillFlags;
using AllSuccsCache =
std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>>;
@@ -476,14 +476,13 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
// of a def-use chain, if there is any.
// TODO: Sort the candidates using a cost-model.
unsigned i = 0;
- for (auto It = Candidates.rbegin(); It != Candidates.rend(); ++It) {
+ for (MachineInstr *I : llvm::reverse(Candidates)) {
if (i++ == SinkIntoLoopLimit) {
LLVM_DEBUG(dbgs() << "LoopSink: Limit reached of instructions to "
"be analysed.");
break;
}
- MachineInstr *I = *It;
if (!SinkIntoLoop(L, *I))
break;
EverMadeChange = true;
@@ -683,13 +682,9 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
// There is no need to do this check if all the uses are PHI nodes. PHI
// sources are only defined on the specific predecessor edges.
if (!BreakPHIEdge) {
- for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
- E = ToBB->pred_end(); PI != E; ++PI) {
- if (*PI == FromBB)
- continue;
- if (!DT->dominates(ToBB, *PI))
+ for (MachineBasicBlock *Pred : ToBB->predecessors())
+ if (Pred != FromBB && !DT->dominates(ToBB, Pred))
return false;
- }
}
ToSplit.insert(std::make_pair(FromBB, ToBB));
@@ -1329,7 +1324,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg()) continue;
+ if (!MO.isReg() || MO.isUse())
+ continue;
Register Reg = MO.getReg();
if (Reg == 0 || !Register::isPhysicalRegister(Reg))
continue;
@@ -1439,7 +1435,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// used registers.
for (MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isUse())
- RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags.
+ RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
}
return true;
@@ -1718,10 +1714,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
UsedRegUnits.clear();
SeenDbgInstrs.clear();
- for (auto I = CurBB.rbegin(), E = CurBB.rend(); I != E;) {
- MachineInstr *MI = &*I;
- ++I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(CurBB))) {
// Track the operand index for use in Copy.
SmallVector<unsigned, 2> UsedOpsInCopy;
// Track the register number defed in Copy.
@@ -1729,14 +1722,14 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// We must sink this DBG_VALUE if its operand is sunk. To avoid searching
// for DBG_VALUEs later, record them when they're encountered.
- if (MI->isDebugValue()) {
+ if (MI.isDebugValue()) {
SmallDenseMap<MCRegister, SmallVector<unsigned, 2>, 4> MIUnits;
bool IsValid = true;
- for (MachineOperand &MO : MI->debug_operands()) {
+ for (MachineOperand &MO : MI.debug_operands()) {
if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
// Bail if we can already tell the sink would be rejected, rather
// than needlessly accumulating lots of DBG_VALUEs.
- if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
IsValid = false;
break;
@@ -1750,28 +1743,28 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
if (IsValid) {
for (auto RegOps : MIUnits)
- SeenDbgInstrs[RegOps.first].push_back({MI, RegOps.second});
+ SeenDbgInstrs[RegOps.first].push_back({&MI, RegOps.second});
}
continue;
}
- if (MI->isDebugOrPseudoInstr())
+ if (MI.isDebugOrPseudoInstr())
continue;
// Do not move any instruction across function call.
- if (MI->isCall())
+ if (MI.isCall())
return false;
- if (!MI->isCopy() || !MI->getOperand(0).isRenamable()) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ if (!MI.isCopy() || !MI.getOperand(0).isRenamable()) {
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
// Don't sink the COPY if it would violate a register dependency.
- if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
+ if (hasRegisterDependency(&MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits)) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
@@ -1782,7 +1775,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// Don't sink if we cannot find a single sinkable successor in which Reg
// is live-in.
if (!SuccBB) {
- LiveRegUnits::accumulateUsedDefed(*MI, ModifiedRegUnits, UsedRegUnits,
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
TRI);
continue;
}
@@ -1793,7 +1786,7 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
- for (auto &MO : MI->operands()) {
+ for (auto &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
@@ -1811,10 +1804,10 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
- clearKillFlags(MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
+ clearKillFlags(&MI, CurBB, UsedOpsInCopy, UsedRegUnits, TRI);
MachineBasicBlock::iterator InsertPos = SuccBB->getFirstNonPHI();
- performSink(*MI, *SuccBB, InsertPos, DbgValsToSink);
- updateLiveIn(MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
+ performSink(MI, *SuccBB, InsertPos, DbgValsToSink);
+ updateLiveIn(&MI, SuccBB, UsedOpsInCopy, DefedRegsInCopy);
Changed = true;
++NumPostRACopySink;
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
index 584d43b42004..28712d1a816b 100644
--- a/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -82,7 +82,7 @@ bool isFunctionColdInCallGraph(
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (!PSI->isColdCount(FunctionCount.getCount()))
+ if (!PSI->isColdCount(FunctionCount->getCount()))
return false;
for (const auto &MBB : *MF)
if (!isColdBlock(&MBB, PSI, &MBFI))
@@ -99,7 +99,7 @@ bool isFunctionHotInCallGraphNthPercentile(
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
if (PSI->isHotCountNthPercentile(PercentileCutoff,
- FunctionCount.getCount()))
+ FunctionCount->getCount()))
return true;
for (const auto &MBB : *MF)
if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
@@ -112,7 +112,7 @@ bool isFunctionColdInCallGraphNthPercentile(
const MachineBlockFrequencyInfo &MBFI) {
if (auto FunctionCount = MF->getFunction().getEntryCount())
if (!PSI->isColdCountNthPercentile(PercentileCutoff,
- FunctionCount.getCount()))
+ FunctionCount->getCount()))
return false;
for (const auto &MBB : *MF)
if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
diff --git a/llvm/lib/CodeGen/MachineStripDebug.cpp b/llvm/lib/CodeGen/MachineStripDebug.cpp
index a1cb12f91275..86cf4999d4b0 100644
--- a/llvm/lib/CodeGen/MachineStripDebug.cpp
+++ b/llvm/lib/CodeGen/MachineStripDebug.cpp
@@ -50,29 +50,26 @@ struct StripDebugMachineModule : public ModulePass {
continue;
MachineFunction &MF = *MaybeMF;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E;) {
- if (I->isDebugInstr()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.isDebugInstr()) {
// FIXME: We should remove all of them. However, AArch64 emits an
// invalid `DBG_VALUE $lr` with only one operand instead of
// the usual three and has a test that depends on it's
// preservation. Preserve it for now.
- if (I->getNumOperands() > 1) {
- LLVM_DEBUG(dbgs() << "Removing debug instruction " << *I);
- I = MBB.erase(I);
+ if (MI.getNumOperands() > 1) {
+ LLVM_DEBUG(dbgs() << "Removing debug instruction " << MI);
+ MBB.erase(&MI);
Changed |= true;
continue;
}
}
- if (I->getDebugLoc()) {
- LLVM_DEBUG(dbgs() << "Removing location " << *I);
- I->setDebugLoc(DebugLoc());
+ if (MI.getDebugLoc()) {
+ LLVM_DEBUG(dbgs() << "Removing location " << MI);
+ MI.setDebugLoc(DebugLoc());
Changed |= true;
- ++I;
continue;
}
- LLVM_DEBUG(dbgs() << "Keeping " << *I);
- ++I;
+ LLVM_DEBUG(dbgs() << "Keeping " << MI);
}
}
}
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 7e3198af02cd..d6bb3e7c9e58 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -210,6 +210,11 @@ namespace {
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
void visitMachineBundleBefore(const MachineInstr *MI);
+ /// Verify that all of \p MI's virtual register operands are scalars.
+ /// \returns True if all virtual register operands are scalar. False
+ /// otherwise.
+ bool verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI);
bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
void verifyPreISelGenericInstruction(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
@@ -287,6 +292,13 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ // Skip functions that have known verification problems.
+ // FIXME: Remove this mechanism when all problematic passes have been
+ // fixed.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailsVerification))
+ return false;
+
unsigned FoundErrors = MachineVerifier(this, Banner.c_str()).verify(MF);
if (FoundErrors)
report_fatal_error("Found "+Twine(FoundErrors)+" machine code errors.");
@@ -849,6 +861,21 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
}
}
+bool MachineVerifier::verifyAllRegOpsScalar(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ if (none_of(MI.explicit_operands(), [&MRI](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ const auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return false;
+ return !MRI.getType(Reg).isScalar();
+ }))
+ return true;
+ report("All register operands must have scalar types", &MI);
+ return false;
+}
+
/// Check that types are consistent when two operands need to have the same
/// number of vector elements.
/// \return true if the types are valid.
@@ -1392,7 +1419,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
AttributeList Attrs
= Intrinsic::getAttributes(MF->getFunction().getContext(),
static_cast<Intrinsic::ID>(IntrID));
- bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone);
+ bool DeclHasSideEffects = !Attrs.hasFnAttr(Attribute::ReadNone);
if (NoSideEffects && DeclHasSideEffects) {
report("G_INTRINSIC used with intrinsic that accesses memory", MI);
break;
@@ -1570,11 +1597,8 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
case TargetOpcode::G_VECREDUCE_UMAX:
case TargetOpcode::G_VECREDUCE_UMIN: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
- LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (!DstTy.isScalar())
report("Vector reduction requires a scalar destination type", MI);
- if (!SrcTy.isVector())
- report("Vector reduction requires vector source=", MI);
break;
}
@@ -1598,7 +1622,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
}
break;
}
-
+ case TargetOpcode::G_LLROUND:
+ case TargetOpcode::G_LROUND: {
+ verifyAllRegOpsScalar(*MI, *MRI);
+ break;
+ }
default:
break;
}
@@ -1632,6 +1660,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
report("Unspillable Terminator does not define a reg", MI);
Register Def = MI->getOperand(0).getReg();
if (Def.isVirtual() &&
+ !MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs) &&
std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
report("Unspillable Terminator expected to have at most one use!", MI);
}
@@ -1866,6 +1896,15 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
switch (MO->getType()) {
case MachineOperand::MO_Register: {
+ // Verify debug flag on debug instructions. Check this first because reg0
+ // indicates an undefined debug value.
+ if (MI->isDebugInstr() && MO->isUse()) {
+ if (!MO->isDebug())
+ report("Register operand must be marked debug", MO, MONum);
+ } else if (MO->isDebug()) {
+ report("Register operand must not be marked debug", MO, MONum);
+ }
+
const Register Reg = MO->getReg();
if (!Reg)
return;
@@ -1932,10 +1971,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
}
}
- if (MI->isDebugValue() && MO->isUse() && !MO->isDebug()) {
- report("Use-reg is not IsDebug in a DBG_VALUE", MO, MONum);
- return;
- }
} else {
// Virtual register.
const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
@@ -2182,14 +2217,30 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const Register Reg = MO->getReg();
+ const unsigned SubRegIdx = MO->getSubReg();
+
+ const LiveInterval *LI = nullptr;
+ if (LiveInts && Reg.isVirtual()) {
+ if (LiveInts->hasInterval(Reg)) {
+ LI = &LiveInts->getInterval(Reg);
+ if (SubRegIdx != 0 && !LI->empty() && !LI->hasSubRanges() &&
+ MRI->shouldTrackSubRegLiveness(Reg))
+ report("Live interval for subreg operand has no subranges", MO, MONum);
+ } else {
+ report("Virtual register has no live interval", MO, MONum);
+ }
+ }
// Both use and def operands can read a register.
if (MO->readsReg()) {
if (MO->isKill())
addRegWithSubRegs(regsKilled, Reg);
- // Check that LiveVars knows this kill.
- if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) {
+ // Check that LiveVars knows this kill (unless we are inside a bundle, in
+ // which case we have already checked that LiveVars knows any kills on the
+ // bundle header instead).
+ if (LiveVars && Reg.isVirtual() && MO->isKill() &&
+ !MI->isBundledWithPred()) {
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
if (!is_contained(VI.Kills, MI))
report("Kill missing from LiveVariables", MO, MONum);
@@ -2209,42 +2260,36 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
}
- if (Register::isVirtualRegister(Reg)) {
- if (LiveInts->hasInterval(Reg)) {
- // This is a virtual register interval.
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- checkLivenessAtUse(MO, MONum, UseIdx, LI, Reg);
-
- if (LI.hasSubRanges() && !MO->isDef()) {
- unsigned SubRegIdx = MO->getSubReg();
- LaneBitmask MOMask = SubRegIdx != 0
- ? TRI->getSubRegIndexLaneMask(SubRegIdx)
- : MRI->getMaxLaneMaskForVReg(Reg);
- LaneBitmask LiveInMask;
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((MOMask & SR.LaneMask).none())
- continue;
- checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
- LiveQueryResult LRQ = SR.Query(UseIdx);
- if (LRQ.valueIn())
- LiveInMask |= SR.LaneMask;
- }
- // At least parts of the register has to be live at the use.
- if ((LiveInMask & MOMask).none()) {
- report("No live subrange at use", MO, MONum);
- report_context(LI);
- report_context(UseIdx);
- }
+ if (Reg.isVirtual()) {
+ // This is a virtual register interval.
+ checkLivenessAtUse(MO, MONum, UseIdx, *LI, Reg);
+
+ if (LI->hasSubRanges() && !MO->isDef()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ LaneBitmask LiveInMask;
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((MOMask & SR.LaneMask).none())
+ continue;
+ checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
+ LiveQueryResult LRQ = SR.Query(UseIdx);
+ if (LRQ.valueIn())
+ LiveInMask |= SR.LaneMask;
+ }
+ // At least parts of the register has to be live at the use.
+ if ((LiveInMask & MOMask).none()) {
+ report("No live subrange at use", MO, MONum);
+ report_context(*LI);
+ report_context(UseIdx);
}
- } else {
- report("Virtual register has no live interval", MO, MONum);
}
}
}
// Use of a dead register.
if (!regsLive.count(Reg)) {
- if (Register::isPhysicalRegister(Reg)) {
+ if (Reg.isPhysical()) {
// Reserved registers may be used even when 'dead'.
bool Bad = !isReserved(Reg);
// We are fine if just any subregister has a defined value.
@@ -2266,7 +2311,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!MOP.isReg() || !MOP.isImplicit())
continue;
- if (!Register::isPhysicalRegister(MOP.getReg()))
+ if (!MOP.getReg().isPhysical())
continue;
if (llvm::is_contained(TRI->subregs(MOP.getReg()), Reg))
@@ -2299,7 +2344,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsDefined, Reg);
// Verify SSA form.
- if (MRI->isSSA() && Register::isVirtualRegister(Reg) &&
+ if (MRI->isSSA() && Reg.isVirtual() &&
std::next(MRI->def_begin(Reg)) != MRI->def_end())
report("Multiple virtual register defs in SSA form", MO, MONum);
@@ -2308,24 +2353,18 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
- if (Register::isVirtualRegister(Reg)) {
- if (LiveInts->hasInterval(Reg)) {
- const LiveInterval &LI = LiveInts->getInterval(Reg);
- checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg);
-
- if (LI.hasSubRanges()) {
- unsigned SubRegIdx = MO->getSubReg();
- LaneBitmask MOMask = SubRegIdx != 0
- ? TRI->getSubRegIndexLaneMask(SubRegIdx)
- : MRI->getMaxLaneMaskForVReg(Reg);
- for (const LiveInterval::SubRange &SR : LI.subranges()) {
- if ((SR.LaneMask & MOMask).none())
- continue;
- checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
- }
+ if (Reg.isVirtual()) {
+ checkLivenessAtDef(MO, MONum, DefIdx, *LI, Reg);
+
+ if (LI->hasSubRanges()) {
+ LaneBitmask MOMask = SubRegIdx != 0
+ ? TRI->getSubRegIndexLaneMask(SubRegIdx)
+ : MRI->getMaxLaneMaskForVReg(Reg);
+ for (const LiveInterval::SubRange &SR : LI->subranges()) {
+ if ((SR.LaneMask & MOMask).none())
+ continue;
+ checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, true, SR.LaneMask);
}
- } else {
- report("Virtual register has no Live interval", MO, MONum);
}
}
}
@@ -2918,9 +2957,13 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
}
}
- // A live segment can only end at an early-clobber slot if it is being
- // redefined by an early-clobber def.
- if (S.end.isEarlyClobber()) {
+ // After tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if it is being redefined by an early-clobber def.
+ // TODO: Before tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if the last use is tied to an early-clobber def.
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ S.end.isEarlyClobber()) {
if (I+1 == LR.end() || (I+1)->start != S.end) {
report("Live segment ending at early clobber slot must be "
"redefined by an EC def in the same instruction", EndMBB);
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index d2ee21c8720f..b0760322064c 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -44,15 +44,15 @@ static SUnit *getPredClusterSU(const SUnit &SU) {
return nullptr;
}
-static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+bool llvm::hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
unsigned Num = 1;
const SUnit *CurrentSU = &SU;
while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
return Num < FuseLimit;
}
-static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
- SUnit &SecondSU) {
+bool llvm::fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
+ SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
// between them.
for (SDep &SI : FirstSU.Succs)
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index b5517c40a28a..8b3cdfab4d42 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -81,10 +81,7 @@ void ModuloScheduleExpander::expand() {
Register Reg = Op.getReg();
unsigned MaxDiff = 0;
bool PhiIsSwapped = false;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
- MachineOperand &UseOp = *UI;
+ for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
MachineInstr *UseMI = UseOp.getParent();
int UseStage = Schedule.getStage(UseMI);
unsigned Diff = 0;
@@ -141,13 +138,11 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Copy any terminator instructions to the new kernel, and update
// names as needed.
- for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
- E = BB->instr_end();
- I != E; ++I) {
- MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ for (MachineInstr &MI : BB->terminators()) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
- InstrMap[NewMI] = &*I;
+ InstrMap[NewMI] = &MI;
}
NewKernel = KernelBB;
@@ -334,14 +329,10 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
LiveIntervals &LIS) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
- E = MRI.use_end();
- I != E;) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
- }
if (!LIS.hasInterval(ToReg))
LIS.createEmptyInterval(ToReg);
}
@@ -350,10 +341,8 @@ static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
/// specified loop.
static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
MachineRegisterInfo &MRI) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
- E = MRI.use_end();
- I != E; ++I)
- if (I->getParent()->getParent() != BB)
+ for (const MachineOperand &MO : MRI.use_operands(Reg))
+ if (MO.getParent()->getParent() != BB)
return true;
return false;
}
@@ -702,11 +691,9 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
MBBVectorTy &EpilogBBs) {
// For each epilog block, check that the value defined by each instruction
// is used. If not, delete it.
- for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
- MBE = EpilogBBs.rend();
- MBB != MBE; ++MBB)
- for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
- ME = (*MBB)->instr_rend();
+ for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
+ for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
+ ME = MBB->instr_rend();
MI != ME;) {
// From DeadMachineInstructionElem. Don't delete inline assembly.
if (MI->isInlineAsm()) {
@@ -721,26 +708,22 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
continue;
}
bool used = true;
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
continue;
- Register reg = MOI->getReg();
+ Register reg = MO.getReg();
// Assume physical registers are used, unless they are marked dead.
if (Register::isPhysicalRegister(reg)) {
- used = !MOI->isDead();
+ used = !MO.isDead();
if (used)
break;
continue;
}
unsigned realUses = 0;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
+ for (const MachineOperand &U : MRI.use_operands(reg)) {
// Check if there are any uses that occur only in the original
// loop. If so, that's not a real use.
- if (UI->getParent()->getParent() != BB) {
+ if (U.getParent()->getParent() != BB) {
realUses++;
used = true;
break;
@@ -759,15 +742,11 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
}
// In the kernel block, check if we can remove a Phi that generates a value
// used in an instruction removed in the epilog block.
- for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
- BBE = KernelBB->getFirstNonPHI();
- BBI != BBE;) {
- MachineInstr *MI = &*BBI;
- ++BBI;
- Register reg = MI->getOperand(0).getReg();
+ for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
+ Register reg = MI.getOperand(0).getReg();
if (MRI.use_begin(reg) == MRI.use_end()) {
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
}
}
}
@@ -1145,12 +1124,9 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
int StagePhi = Schedule.getStage(Phi) + PhiNum;
// Rewrite uses that have been scheduled already to use the new
// Phi register.
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
- EI = MRI.use_end();
- UI != EI;) {
- MachineOperand &UseOp = *UI;
+ for (MachineOperand &UseOp :
+ llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
MachineInstr *UseMI = UseOp.getParent();
- ++UI;
if (UseMI->getParent() != BB)
continue;
if (UseMI->isPHI()) {
@@ -1223,8 +1199,7 @@ void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
bool Changed = true;
while (Changed) {
Changed = false;
- for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
assert(MI.isPHI());
if (MRI.use_empty(MI.getOperand(0).getReg())) {
if (LIS)
@@ -1624,32 +1599,32 @@ void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
auto InsertPt = DestBB->getFirstNonPHI();
DenseMap<Register, Register> Remaps;
- for (auto I = SourceBB->getFirstNonPHI(); I != SourceBB->end();) {
- MachineInstr *MI = &*I++;
- if (MI->isPHI()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(
+ llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
+ if (MI.isPHI()) {
// This is an illegal PHI. If we move any instructions using an illegal
// PHI, we need to create a legal Phi.
- if (getStage(MI) != Stage) {
+ if (getStage(&MI) != Stage) {
// The legal Phi is not necessary if the illegal phi's stage
// is being moved.
- Register PhiR = MI->getOperand(0).getReg();
+ Register PhiR = MI.getOperand(0).getReg();
auto RC = MRI.getRegClass(PhiR);
Register NR = MRI.createVirtualRegister(RC);
MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
DebugLoc(), TII->get(TargetOpcode::PHI), NR)
.addReg(PhiR)
.addMBB(SourceBB);
- BlockMIs[{DestBB, CanonicalMIs[MI]}] = NI;
- CanonicalMIs[NI] = CanonicalMIs[MI];
+ BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
+ CanonicalMIs[NI] = CanonicalMIs[&MI];
Remaps[PhiR] = NR;
}
}
- if (getStage(MI) != Stage)
+ if (getStage(&MI) != Stage)
continue;
- MI->removeFromParent();
- DestBB->insert(InsertPt, MI);
- auto *KernelMI = CanonicalMIs[MI];
- BlockMIs[{DestBB, KernelMI}] = MI;
+ MI.removeFromParent();
+ DestBB->insert(InsertPt, &MI);
+ auto *KernelMI = CanonicalMIs[&MI];
+ BlockMIs[{DestBB, KernelMI}] = &MI;
BlockMIs.erase({SourceBB, KernelMI});
}
SmallVector<MachineInstr *, 4> PhiToDelete;
@@ -1768,8 +1743,8 @@ void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
// Keep track at which iteration each phi belongs to. We need it to know
// what version of the variable to use during prologue/epilogue stitching.
EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
- for (auto Phi = B->begin(), IE = B->getFirstNonPHI(); Phi != IE; ++Phi)
- PhiNodeLoopIteration[&*Phi] = Schedule.getNumStages() - I;
+ for (MachineInstr &Phi : B->phis())
+ PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
}
for (size_t I = 0; I < Epilogs.size(); I++) {
LS.reset();
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 54805584dbc1..77a6c37e1362 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -107,6 +107,7 @@ namespace {
using BBVRegPair = std::pair<unsigned, Register>;
using VRegPHIUse = DenseMap<BBVRegPair, unsigned>;
+ // Count the number of non-undef PHI uses of each register in each BB.
VRegPHIUse VRegPHIUseCount;
// Defs of PHI sources which are implicit_def.
@@ -426,9 +427,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
// Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
- --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
- MPhi->getOperand(i).getReg())];
+ for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+ if (!MPhi->getOperand(i).isUndef()) {
+ --VRegPHIUseCount[BBVRegPair(
+ MPhi->getOperand(i + 1).getMBB()->getNumber(),
+ MPhi->getOperand(i).getReg())];
+ }
+ }
// Now loop over all of the incoming arguments, changing them to copy into the
// IncomingReg register in the corresponding predecessor basic block.
@@ -461,6 +466,15 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
assert(MRI->use_empty(SrcReg) &&
"Expected a single use from UnspillableTerminator");
SrcRegDef->getOperand(0).setReg(IncomingReg);
+
+ // Update LiveVariables.
+ if (LV) {
+ LiveVariables::VarInfo &SrcVI = LV->getVarInfo(SrcReg);
+ LiveVariables::VarInfo &IncomingVI = LV->getVarInfo(IncomingReg);
+ IncomingVI.AliveBlocks = std::move(SrcVI.AliveBlocks);
+ SrcVI.AliveBlocks.clear();
+ }
+
continue;
}
@@ -515,9 +529,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// case, we should mark the last such terminator as being the killing
// block, not the copy.
MachineBasicBlock::iterator KillInst = opBlock.end();
- MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
- for (MachineBasicBlock::iterator Term = FirstTerm;
- Term != opBlock.end(); ++Term) {
+ for (MachineBasicBlock::iterator Term = InsertPos; Term != opBlock.end();
+ ++Term) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
}
@@ -527,7 +540,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't insert a copy this time.
- KillInst = FirstTerm;
+ KillInst = InsertPos;
while (KillInst != opBlock.begin()) {
--KillInst;
if (KillInst->isDebugInstr())
@@ -574,9 +587,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (!isLiveOut) {
MachineBasicBlock::iterator KillInst = opBlock.end();
- MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator();
- for (MachineBasicBlock::iterator Term = FirstTerm;
- Term != opBlock.end(); ++Term) {
+ for (MachineBasicBlock::iterator Term = InsertPos;
+ Term != opBlock.end(); ++Term) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
}
@@ -586,7 +598,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't just insert a copy.
- KillInst = FirstTerm;
+ KillInst = InsertPos;
while (KillInst != opBlock.begin()) {
--KillInst;
if (KillInst->isDebugInstr())
@@ -623,14 +635,19 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
- for (const auto &MBB : MF)
+ for (const auto &MBB : MF) {
for (const auto &BBI : MBB) {
if (!BBI.isPHI())
break;
- for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
- ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(),
- BBI.getOperand(i).getReg())];
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
+ if (!BBI.getOperand(i).isUndef()) {
+ ++VRegPHIUseCount[BBVRegPair(
+ BBI.getOperand(i + 1).getMBB()->getNumber(),
+ BBI.getOperand(i).getReg())];
+ }
+ }
}
+ }
}
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 49bdba518322..f9b16d2630d6 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -626,7 +626,7 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
Register SrcReg, SrcReg2;
- int CmpMask, CmpValue;
+ int64_t CmpMask, CmpValue;
if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
SrcReg.isPhysical() || SrcReg2.isPhysical())
return false;
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 80c38f3ec341..e3eb3f825851 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -36,9 +37,8 @@ static bool lowerLoadRelative(Function &F) {
Type *Int32PtrTy = Int32Ty->getPointerTo();
Type *Int8Ty = Type::getInt8Ty(F.getContext());
- for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto CI = dyn_cast<CallInst>(U.getUser());
if (!CI || CI->getCalledOperand() != &F)
continue;
@@ -90,10 +90,22 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
CallInst::TailCallKind OverridingTCK = getOverridingTailCallKind(F);
- for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto *CI = cast<CallInst>(I->getUser());
+ for (Use &U : llvm::make_early_inc_range(F.uses())) {
+ auto *CB = cast<CallBase>(U.getUser());
+
+ if (CB->getCalledFunction() != &F) {
+ objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
+ (void)Kind;
+ assert((Kind == objcarc::ARCInstKind::RetainRV ||
+ Kind == objcarc::ARCInstKind::ClaimRV) &&
+ "use expected to be the argument of operand bundle "
+ "\"clang.arc.attachedcall\"");
+ U.set(FCache.getCallee());
+ continue;
+ }
+
+ auto *CI = cast<CallInst>(CB);
assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
- ++I;
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(CI->args());
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 2f65a450fb02..9a4f70a6070f 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -285,7 +285,7 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
(void)Failed;
}
if (StackSize > Threshold) {
- DiagnosticInfoStackSize DiagStackSize(F, StackSize, DS_Warning, Threshold);
+ DiagnosticInfoStackSize DiagStackSize(F, StackSize, Threshold, DS_Warning);
F.getContext().diagnose(DiagStackSize);
}
ORE->emit([&]() {
@@ -395,12 +395,28 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
const MCPhysReg *CSRegs = F.getRegInfo().getCalleeSavedRegs();
+ BitVector CSMask(SavedRegs.size());
+
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ CSMask.set(CSRegs[i]);
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (SavedRegs.test(Reg))
- CSI.push_back(CalleeSavedInfo(Reg));
+ if (SavedRegs.test(Reg)) {
+ bool SavedSuper = false;
+ for (const MCPhysReg &SuperReg : RegInfo->superregs(Reg)) {
+ // Some backends set all aliases for some registers as saved, such as
+ // Mips's $fp, so they appear in SavedRegs but not CSRegs.
+ if (SavedRegs.test(SuperReg) && CSMask.test(SuperReg)) {
+ SavedSuper = true;
+ break;
+ }
+ }
+
+ if (!SavedSuper)
+ CSI.push_back(CalleeSavedInfo(Reg));
+ }
}
const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
@@ -1237,7 +1253,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
StackOffset Offset =
TFI->getFrameIndexReference(MF, FrameIdx, Reg);
Op.ChangeToRegister(Reg, false /*isDef*/);
- Op.setIsDebug();
const DIExpression *DIExpr = MI.getDebugExpression();
diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index a9fb577d5735..5f69f9194125 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -44,7 +44,14 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
+ bool doInitialization(Module &M) override {
+ ShouldRun = M.getNamedMetadata(PseudoProbeDescMetadataName);
+ return false;
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!ShouldRun)
+ return false;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
@@ -129,6 +136,8 @@ private:
Name = SP->getName();
return Function::getGUID(Name);
}
+
+ bool ShouldRun = false;
};
} // namespace
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index d92c6a997f31..d704cf7b3213 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -171,7 +171,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
SmallSet<NodeId,32> Defs;
- // Remove all non-phi defs that are not aliased to RefRR, and segregate
+ // Remove all non-phi defs that are not aliased to RefRR, and separate
// the the remaining defs into buckets for containing blocks.
std::map<NodeId, NodeAddr<InstrNode*>> Owners;
std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks;
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index c850571da2ed..1264e6021b6e 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -30,16 +30,32 @@ static bool isValidRegUse(const MachineOperand &MO) {
return isValidReg(MO) && MO.isUse();
}
-static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg) {
- return isValidRegUse(MO) && MO.getReg() == PhysReg;
+static bool isValidRegUseOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegUse(MO))
+ return false;
+ if (MO.getReg() == PhysReg)
+ return true;
+ for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
+ if (MO.getReg() == *R)
+ return true;
+ return false;
}
static bool isValidRegDef(const MachineOperand &MO) {
return isValidReg(MO) && MO.isDef();
}
-static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg) {
- return isValidRegDef(MO) && MO.getReg() == PhysReg;
+static bool isValidRegDefOf(const MachineOperand &MO, MCRegister PhysReg,
+ const TargetRegisterInfo *TRI) {
+ if (!isValidRegDef(MO))
+ return false;
+ if (MO.getReg() == PhysReg)
+ return true;
+ for (MCRegAliasIterator R(PhysReg, TRI, false); R.isValid(); ++R)
+ if (MO.getReg() == *R)
+ return true;
+ return false;
}
void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
@@ -337,7 +353,7 @@ void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def,
return;
for (auto &MO : MI->operands()) {
- if (!isValidRegUseOf(MO, PhysReg))
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
continue;
Uses.insert(&*MI);
@@ -353,7 +369,7 @@ bool ReachingDefAnalysis::getLiveInUses(MachineBasicBlock *MBB,
for (MachineInstr &MI :
instructionsWithoutDebug(MBB->instr_begin(), MBB->instr_end())) {
for (auto &MO : MI.operands()) {
- if (!isValidRegUseOf(MO, PhysReg))
+ if (!isValidRegUseOf(MO, PhysReg, TRI))
continue;
if (getReachingDef(&MI, PhysReg) >= 0)
return false;
@@ -381,8 +397,7 @@ void ReachingDefAnalysis::getGlobalUses(MachineInstr *MI, MCRegister PhysReg,
SmallVector<MachineBasicBlock *, 4> ToVisit(MBB->successors());
SmallPtrSet<MachineBasicBlock*, 4>Visited;
while (!ToVisit.empty()) {
- MachineBasicBlock *MBB = ToVisit.back();
- ToVisit.pop_back();
+ MachineBasicBlock *MBB = ToVisit.pop_back_val();
if (Visited.count(MBB) || !MBB->isLiveIn(PhysReg))
continue;
if (getLiveInUses(MBB, PhysReg, Uses))
@@ -419,7 +434,7 @@ void ReachingDefAnalysis::getLiveOuts(MachineBasicBlock *MBB,
VisitedBBs.insert(MBB);
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return;
if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg))
@@ -469,7 +484,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
LiveRegs.addLiveOuts(*MBB);
// Yes if the register is live out of the basic block.
- if (LiveRegs.contains(PhysReg))
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return true;
// Walk backwards through the block to see if the register is live at some
@@ -477,7 +492,7 @@ bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI,
for (MachineInstr &Last :
instructionsWithoutDebug(MBB->instr_rbegin(), MBB->instr_rend())) {
LiveRegs.stepBackward(Last);
- if (LiveRegs.contains(PhysReg))
+ if (!LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return InstIds.lookup(&Last) > InstIds.lookup(MI);
}
return false;
@@ -502,7 +517,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
MachineBasicBlock *MBB = MI->getParent();
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return false;
auto Last = MBB->getLastNonDebugInstr();
@@ -512,7 +527,7 @@ bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI,
// Finally check that the last instruction doesn't redefine the register.
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return false;
return true;
@@ -523,7 +538,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
MCRegister PhysReg) const {
LivePhysRegs LiveRegs(*TRI);
LiveRegs.addLiveOuts(*MBB);
- if (!LiveRegs.contains(PhysReg))
+ if (LiveRegs.available(MBB->getParent()->getRegInfo(), PhysReg))
return nullptr;
auto Last = MBB->getLastNonDebugInstr();
@@ -532,7 +547,7 @@ ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB,
int Def = getReachingDef(&*Last, PhysReg);
for (auto &MO : Last->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return &*Last;
return Def < 0 ? nullptr : getInstFromId(MBB, Def);
@@ -700,7 +715,7 @@ bool ReachingDefAnalysis::isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg,
if (Ignore.count(&*I))
continue;
for (auto &MO : I->operands())
- if (isValidRegDefOf(MO, PhysReg))
+ if (isValidRegDefOf(MO, PhysReg, TRI))
return false;
}
}
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index b65d58077958..a9816b13e798 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -217,9 +217,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, MCRegister PhysReg,
// Collect interferences assigned to any alias of the physical register.
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- Q.collectInterferingVRegs();
- for (unsigned i = Q.interferingVRegs().size(); i; --i) {
- LiveInterval *Intf = Q.interferingVRegs()[i - 1];
+ for (auto *Intf : reverse(Q.interferingVRegs())) {
if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
Intfs.push_back(Intf);
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
new file mode 100644
index 000000000000..85fd3207888b
--- /dev/null
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -0,0 +1,90 @@
+//===- RegAllocEvictionAdvisor.h - Interference resolution ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+#define LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
+
+#include "AllocationOrder.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+using SmallVirtRegSet = SmallSet<Register, 16>;
+
+// Live ranges pass through a number of stages as we try to allocate them.
+// Some of the stages may also create new live ranges:
+//
+// - Region splitting.
+// - Per-block splitting.
+// - Local splitting.
+// - Spilling.
+//
+// Ranges produced by one of the stages skip the previous stages when they are
+// dequeued. This improves performance because we can skip interference checks
+// that are unlikely to give any results. It also guarantees that the live
+// range splitting algorithm terminates, something that is otherwise hard to
+// ensure.
+enum LiveRangeStage {
+ /// Newly created live range that has never been queued.
+ RS_New,
+
+ /// Only attempt assignment and eviction. Then requeue as RS_Split.
+ RS_Assign,
+
+ /// Attempt live range splitting if assignment is impossible.
+ RS_Split,
+
+ /// Attempt more aggressive live range splitting that is guaranteed to make
+ /// progress. This is used for split products that may not be making
+ /// progress.
+ RS_Split2,
+
+ /// Live range will be spilled. No more splitting will be attempted.
+ RS_Spill,
+
+ /// Live range is in memory. Because of other evictions, it might get moved
+ /// in a register in the end.
+ RS_Memory,
+
+ /// There is nothing more we can do to this live range. Abort compilation
+ /// if it can't be assigned.
+ RS_Done
+};
+
+/// Cost of evicting interference - used by default advisor, and the eviction
+/// chain heuristic in RegAllocGreedy.
+// FIXME: this can be probably made an implementation detail of the default
+// advisor, if the eviction chain logic can be refactored.
+struct EvictionCost {
+ unsigned BrokenHints = 0; ///< Total number of broken hints.
+ float MaxWeight = 0; ///< Maximum spill weight evicted.
+
+ EvictionCost() = default;
+
+ bool isMax() const { return BrokenHints == ~0u; }
+
+ void setMax() { BrokenHints = ~0u; }
+
+ void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
+
+ bool operator<(const EvictionCost &O) const {
+ return std::tie(BrokenHints, MaxWeight) <
+ std::tie(O.BrokenHints, O.MaxWeight);
+ }
+};
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REGALLOCEVICTIONADVISOR_H
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 707161d5a8b0..68920e2e50df 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
@@ -432,7 +433,7 @@ void RegAllocFast::spill(MachineBasicBlock::iterator Before, Register VirtReg,
// every definition of it, meaning we can switch all the DBG_VALUEs over
// to just reference the stack slot.
SmallVectorImpl<MachineOperand *> &LRIDbgOperands = LiveDbgValueMap[VirtReg];
- SmallDenseMap<MachineInstr *, SmallVector<const MachineOperand *>>
+ SmallMapVector<MachineInstr *, SmallVector<const MachineOperand *>, 2>
SpilledOperandsMap;
for (MachineOperand *MO : LRIDbgOperands)
SpilledOperandsMap[MO->getParent()].push_back(MO);
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4eb12aa30ee9..5a93b58e0baf 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -15,6 +15,7 @@
#include "InterferenceCache.h"
#include "LiveDebugVariables.h"
#include "RegAllocBase.h"
+#include "RegAllocEvictionAdvisor.h"
#include "SpillPlacement.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -57,6 +58,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -69,7 +71,6 @@
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/IR/DebugInfoMetadata.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -148,7 +149,6 @@ class RAGreedy : public MachineFunctionPass,
// Convenient shortcuts.
using PQueue = std::priority_queue<std::pair<unsigned, unsigned>>;
using SmallLISet = SmallPtrSet<LiveInterval *, 4>;
- using SmallVirtRegSet = SmallSet<Register, 16>;
// context
MachineFunction *MF;
@@ -175,47 +175,6 @@ class RAGreedy : public MachineFunctionPass,
unsigned NextCascade;
std::unique_ptr<VirtRegAuxInfo> VRAI;
- // Live ranges pass through a number of stages as we try to allocate them.
- // Some of the stages may also create new live ranges:
- //
- // - Region splitting.
- // - Per-block splitting.
- // - Local splitting.
- // - Spilling.
- //
- // Ranges produced by one of the stages skip the previous stages when they are
- // dequeued. This improves performance because we can skip interference checks
- // that are unlikely to give any results. It also guarantees that the live
- // range splitting algorithm terminates, something that is otherwise hard to
- // ensure.
- enum LiveRangeStage {
- /// Newly created live range that has never been queued.
- RS_New,
-
- /// Only attempt assignment and eviction. Then requeue as RS_Split.
- RS_Assign,
-
- /// Attempt live range splitting if assignment is impossible.
- RS_Split,
-
- /// Attempt more aggressive live range splitting that is guaranteed to make
- /// progress. This is used for split products that may not be making
- /// progress.
- RS_Split2,
-
- /// Live range will be spilled. No more splitting will be attempted.
- RS_Spill,
-
-
- /// Live range is in memory. Because of other evictions, it might get moved
- /// in a register in the end.
- RS_Memory,
-
- /// There is nothing more we can do to this live range. Abort compilation
- /// if it can't be assigned.
- RS_Done
- };
-
// Enum CutOffStage to keep a track whether the register allocation failed
// because of the cutoffs encountered in last chance recoloring.
// Note: This is used as bitmask. New value should be next power of 2.
@@ -267,25 +226,6 @@ class RAGreedy : public MachineFunctionPass,
}
}
- /// Cost of evicting interference.
- struct EvictionCost {
- unsigned BrokenHints = 0; ///< Total number of broken hints.
- float MaxWeight = 0; ///< Maximum spill weight evicted.
-
- EvictionCost() = default;
-
- bool isMax() const { return BrokenHints == ~0u; }
-
- void setMax() { BrokenHints = ~0u; }
-
- void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
-
- bool operator<(const EvictionCost &O) const {
- return std::tie(BrokenHints, MaxWeight) <
- std::tie(O.BrokenHints, O.MaxWeight);
- }
- };
-
/// EvictionTrack - Keeps track of past evictions in order to optimize region
/// split decision.
class EvictionTrack {
@@ -488,6 +428,8 @@ private:
MCRegister tryAssign(LiveInterval&, AllocationOrder&,
SmallVectorImpl<Register>&,
const SmallVirtRegSet&);
+ MCRegister tryFindEvictionCandidate(LiveInterval &, const AllocationOrder &,
+ uint8_t, const SmallVirtRegSet &) const;
MCRegister tryEvict(LiveInterval &, AllocationOrder &,
SmallVectorImpl<Register> &, uint8_t,
const SmallVirtRegSet &);
@@ -760,10 +702,9 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// Giant live ranges fall back to the global assignment heuristic, which
// prevents excessive spilling in pathological cases.
bool ReverseLocal = TRI->reverseLocalAssignment();
- bool AddPriorityToGlobal = TRI->addAllocPriorityToGlobalRanges();
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
bool ForceGlobal = !ReverseLocal &&
- (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
+ (Size / SlotIndex::InstrDist) > (2 * RCI.getNumAllocatableRegs(&RC));
if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
LIS->intervalIsInOneMBB(*LI)) {
@@ -785,8 +726,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// interference. Mark a bit to prioritize global above local ranges.
Prio = (1u << 29) + Size;
- if (AddPriorityToGlobal)
- Prio |= RC.AllocationPriority << 24;
+ Prio |= RC.AllocationPriority << 24;
}
// Mark a higher bit to prioritize global and local above RS_Split.
Prio |= (1u << 31);
@@ -860,7 +800,7 @@ MCRegister RAGreedy::tryAssign(LiveInterval &VirtReg,
return PhysReg;
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
- << Cost << '\n');
+ << (unsigned)Cost << '\n');
MCRegister CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
return CheapReg ? CheapReg : PhysReg;
}
@@ -957,11 +897,12 @@ bool RAGreedy::canEvictInterference(
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is 10 or more interferences, chances are one is heavier.
- if (Q.collectInterferingVRegs(10) >= 10)
+ const auto &Interferences = Q.interferingVRegs(10);
+ if (Interferences.size() >= 10)
return false;
// Check if any interfering live range is heavier than MaxWeight.
- for (LiveInterval *Intf : reverse(Q.interferingVRegs())) {
+ for (LiveInterval *Intf : reverse(Interferences)) {
assert(Register::isVirtualRegister(Intf->reg()) &&
"Only expecting virtual register interference from query");
@@ -1039,7 +980,6 @@ bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
- Q.collectInterferingVRegs();
// Check if any interfering live range is heavier than MaxWeight.
for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
@@ -1129,7 +1069,6 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, MCRegister PhysReg,
// should be fast, we may need to recalculate if when different physregs
// overlap the same register unit so we had different SubRanges queried
// against it.
- Q.collectInterferingVRegs();
ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
Intfs.append(IVR.begin(), IVR.end());
}
@@ -1162,17 +1101,9 @@ bool RAGreedy::isUnusedCalleeSavedReg(MCRegister PhysReg) const {
return !Matrix->isPhysRegUsed(PhysReg);
}
-/// tryEvict - Try to evict all interferences for a physreg.
-/// @param VirtReg Currently unassigned virtual register.
-/// @param Order Physregs to try.
-/// @return Physreg to assign VirtReg, or 0.
-MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<Register> &NewVRegs,
- uint8_t CostPerUseLimit,
- const SmallVirtRegSet &FixedRegisters) {
- NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
- TimePassesIsEnabled);
-
+MCRegister RAGreedy::tryFindEvictionCandidate(
+ LiveInterval &VirtReg, const AllocationOrder &Order,
+ uint8_t CostPerUseLimit, const SmallVirtRegSet &FixedRegisters) const {
// Keep track of the cheapest interference seen so far.
EvictionCost BestCost;
BestCost.setMax();
@@ -1230,7 +1161,22 @@ MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
if (I.isHint())
break;
}
+ return BestPhys;
+}
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+MCRegister RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<Register> &NewVRegs,
+ uint8_t CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) {
+ NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
+ TimePassesIsEnabled);
+
+ MCRegister BestPhys =
+ tryFindEvictionCandidate(VirtReg, Order, CostPerUseLimit, FixedRegisters);
if (BestPhys.isValid())
evictInterference(VirtReg, BestPhys, NewVRegs);
return BestPhys;
@@ -2135,7 +2081,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
// the constraints on the virtual register.
// Otherwise, splitting just inserts uncoalescable copies that do not help
// the allocation.
- for (const auto &Use : Uses) {
+ for (const SlotIndex Use : Uses) {
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use))
if (MI->isFullCopy() ||
SuperRCNumAllocatableRegs ==
@@ -2462,12 +2408,12 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
if (NewGaps >= NumGaps) {
- LLVM_DEBUG(dbgs() << "Tagging non-progress ranges: ");
+ LLVM_DEBUG(dbgs() << "Tagging non-progress ranges:");
assert(!ProgressRequired && "Didn't make progress when it was required.");
for (unsigned I = 0, E = IntvMap.size(); I != E; ++I)
if (IntvMap[I] == 1) {
setStage(LIS->getInterval(LREdit.get(I)), RS_Split2);
- LLVM_DEBUG(dbgs() << printReg(LREdit.get(I)));
+ LLVM_DEBUG(dbgs() << ' ' << printReg(LREdit.get(I)));
}
LLVM_DEBUG(dbgs() << '\n');
}
@@ -2506,17 +2452,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
SA->analyze(&VirtReg);
- // FIXME: SplitAnalysis may repair broken live ranges coming from the
- // coalescer. That may cause the range to become allocatable which means that
- // tryRegionSplit won't be making progress. This check should be replaced with
- // an assertion when the coalescer is fixed.
- if (SA->didRepairRange()) {
- // VirtReg has changed, so all cached queries are invalid.
- Matrix->invalidateVirtRegs();
- if (Register PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
- return PhysReg;
- }
-
// First try to split around a region spanning multiple blocks. RS_Split2
// ranges already made dubious progress with region splitting, so they go
// straight to single block splitting.
@@ -2560,8 +2495,9 @@ bool RAGreedy::mayRecolorAllInterferences(
LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
// If there is LastChanceRecoloringMaxInterference or more interferences,
// chances are one would not be recolorable.
- if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
- LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
+ if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
+ LastChanceRecoloringMaxInterference &&
+ !ExhaustiveSearch) {
LLVM_DEBUG(dbgs() << "Early abort: too many interferences.\n");
CutOffInfo |= CO_Interf;
return false;
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 751f79e66b73..c847068bca90 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -932,12 +932,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
// = B
// Update uses of IntA of the specific Val# with IntB.
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg()),
- UE = MRI->use_end();
- UI != UE;
- /* ++UI is below because of possible MI removal */) {
- MachineOperand &UseMO = *UI;
- ++UI;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(IntA.reg()))) {
if (UseMO.isUndef())
continue;
MachineInstr *UseMI = UseMO.getParent();
@@ -1573,9 +1569,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
// to describe DstReg instead.
if (MRI->use_nodbg_empty(SrcReg)) {
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
- UI != MRI->use_end();) {
- MachineOperand &UseMO = *UI++;
+ for (MachineOperand &UseMO :
+ llvm::make_early_inc_range(MRI->use_operands(SrcReg))) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugInstr()) {
if (Register::isPhysicalRegister(DstReg))
@@ -3708,7 +3703,7 @@ void RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF)
// vreg => DbgValueLoc map.
auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) {
for (auto *X : ToInsert) {
- for (auto Op : X->debug_operands()) {
+ for (const auto &Op : X->debug_operands()) {
if (Op.isReg() && Op.getReg().isVirtual())
DbgVRegToValues[Op.getReg()].push_back({Slot, X});
}
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index e35cf7aa6958..c0a07ec4c91d 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -495,21 +495,20 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
// Spill the scavenged register before \p Before.
int FI = Scavenged[SI].FrameIndex;
if (FI < FIB || FI >= FIE) {
- std::string Msg = std::string("Error while trying to spill ") +
- TRI->getName(Reg) + " from class " + TRI->getRegClassName(&RC) +
- ": Cannot scavenge register without an emergency spill slot!";
- report_fatal_error(Msg.c_str());
+ report_fatal_error(Twine("Error while trying to spill ") +
+ TRI->getName(Reg) + " from class " +
+ TRI->getRegClassName(&RC) +
+ ": Cannot scavenge register without an emergency "
+ "spill slot!");
}
- TII->storeRegToStackSlot(*MBB, Before, Reg, true, Scavenged[SI].FrameIndex,
- &RC, TRI);
+ TII->storeRegToStackSlot(*MBB, Before, Reg, true, FI, &RC, TRI);
MachineBasicBlock::iterator II = std::prev(Before);
unsigned FIOperandNum = getFrameIndexOperandNum(*II);
TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, Reg, Scavenged[SI].FrameIndex,
- &RC, TRI);
+ TII->loadRegFromStackSlot(*MBB, UseMI, Reg, FI, &RC, TRI);
II = std::prev(UseMI);
FIOperandNum = getFrameIndexOperandNum(*II);
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 1619381967c4..0ff045fa787e 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -70,7 +70,7 @@ static bool replaceWithTLIFunction(CallInst &CI, const StringRef TLIName) {
// Replace the call to the vector intrinsic with a call
// to the corresponding function from the vector library.
IRBuilder<> IRBuilder(&CI);
- SmallVector<Value *> Args(CI.arg_operands());
+ SmallVector<Value *> Args(CI.args());
// Preserve the operand bundles.
SmallVector<OperandBundleDef, 1> OpBundles;
CI.getOperandBundlesAsDefs(OpBundles);
@@ -106,7 +106,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// all vector operands have identical vector width.
ElementCount VF = ElementCount::getFixed(0);
SmallVector<Type *> ScalarTypes;
- for (auto Arg : enumerate(CI.arg_operands())) {
+ for (auto Arg : enumerate(CI.args())) {
auto *ArgType = Arg.value()->getType();
// Vector calls to intrinsics can still have
// scalar operands for specific arguments.
diff --git a/llvm/lib/CodeGen/SafeStack.cpp b/llvm/lib/CodeGen/SafeStack.cpp
index 94add920f284..50d9d64bfcfd 100644
--- a/llvm/lib/CodeGen/SafeStack.cpp
+++ b/llvm/lib/CodeGen/SafeStack.cpp
@@ -147,7 +147,7 @@ class SafeStack {
///
/// 16 seems like a reasonable upper bound on the alignment of objects that we
/// might expect to appear on the stack on most common targets.
- enum { StackAlignment = 16 };
+ static constexpr uint64_t StackAlignment = 16;
/// Return the value of the stack canary.
Value *getStackGuard(IRBuilder<> &IRB, Function &F);
@@ -221,6 +221,8 @@ public:
bool run();
};
+constexpr uint64_t SafeStack::StackAlignment;
+
uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType());
if (AI->isArrayAllocation()) {
@@ -519,7 +521,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
StackLayout SSL(StackAlignment);
if (StackGuardSlot) {
Type *Ty = StackGuardSlot->getAllocatedType();
- unsigned Align =
+ uint64_t Align =
std::max(DL.getPrefTypeAlignment(Ty), StackGuardSlot->getAlignment());
SSL.addObject(StackGuardSlot, getStaticAllocaAllocationSize(StackGuardSlot),
Align, SSC.getFullLiveRange());
@@ -532,8 +534,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align = std::max((unsigned)DL.getPrefTypeAlignment(Ty),
- Arg->getParamAlignment());
+ uint64_t Align =
+ std::max(DL.getPrefTypeAlignment(Ty), Arg->getParamAlignment());
SSL.addObject(Arg, Size, Align, SSC.getFullLiveRange());
}
@@ -544,21 +546,20 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
Size = 1; // Don't create zero-sized stack objects.
// Ensure the object is properly aligned.
- unsigned Align =
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment());
+ uint64_t Align = std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment());
SSL.addObject(AI, Size, Align,
ClColoring ? SSC.getLiveRange(AI) : NoColoringRange);
}
SSL.computeLayout();
- unsigned FrameAlignment = SSL.getFrameAlignment();
+ uint64_t FrameAlignment = SSL.getFrameAlignment();
// FIXME: tell SSL that we start at a less-then-MaxAlignment aligned location
// (AlignmentSkew).
if (FrameAlignment > StackAlignment) {
// Re-align the base pointer according to the max requested alignment.
- assert(isPowerOf2_32(FrameAlignment));
+ assert(isPowerOf2_64(FrameAlignment));
IRB.SetInsertPoint(BasePointer->getNextNode());
BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
@@ -676,9 +677,9 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
SP = IRB.CreateSub(SP, Size);
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
- unsigned Align = std::max(
- std::max((unsigned)DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
- (unsigned)StackAlignment);
+ uint64_t Align =
+ std::max(std::max(DL.getPrefTypeAlignment(Ty), AI->getAlignment()),
+ StackAlignment);
assert(isPowerOf2_32(Align));
Value *NewTop = IRB.CreateIntToPtr(
@@ -701,9 +702,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (!DynamicAllocas.empty()) {
// Now go through the instructions again, replacing stacksave/stackrestore.
- for (inst_iterator It = inst_begin(&F), Ie = inst_end(&F); It != Ie;) {
- Instruction *I = &*(It++);
- auto II = dyn_cast<IntrinsicInst>(I);
+ for (Instruction &I : llvm::make_early_inc_range(instructions(&F))) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II)
continue;
diff --git a/llvm/lib/CodeGen/SafeStackLayout.cpp b/llvm/lib/CodeGen/SafeStackLayout.cpp
index 5d61b3a146b4..7cdda7743c16 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -37,7 +37,7 @@ LLVM_DUMP_METHOD void StackLayout::print(raw_ostream &OS) {
}
}
-void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
+void StackLayout::addObject(const Value *V, unsigned Size, uint64_t Alignment,
const StackLifetime::LiveRange &Range) {
StackObjects.push_back({V, Size, Alignment, Range});
ObjectAlignments[V] = Alignment;
@@ -45,7 +45,7 @@ void StackLayout::addObject(const Value *V, unsigned Size, unsigned Alignment,
}
static unsigned AdjustStackOffset(unsigned Offset, unsigned Size,
- unsigned Alignment) {
+ uint64_t Alignment) {
return alignTo(Offset + Size, Alignment) - Size;
}
diff --git a/llvm/lib/CodeGen/SafeStackLayout.h b/llvm/lib/CodeGen/SafeStackLayout.h
index f0db1b42aa00..b72450e57080 100644
--- a/llvm/lib/CodeGen/SafeStackLayout.h
+++ b/llvm/lib/CodeGen/SafeStackLayout.h
@@ -22,7 +22,7 @@ namespace safestack {
/// Compute the layout of an unsafe stack frame.
class StackLayout {
- unsigned MaxAlignment;
+ uint64_t MaxAlignment;
struct StackRegion {
unsigned Start;
@@ -39,23 +39,24 @@ class StackLayout {
struct StackObject {
const Value *Handle;
- unsigned Size, Alignment;
+ unsigned Size;
+ uint64_t Alignment;
StackLifetime::LiveRange Range;
};
SmallVector<StackObject, 8> StackObjects;
DenseMap<const Value *, unsigned> ObjectOffsets;
- DenseMap<const Value *, unsigned> ObjectAlignments;
+ DenseMap<const Value *, uint64_t> ObjectAlignments;
void layoutObject(StackObject &Obj);
public:
- StackLayout(unsigned StackAlignment) : MaxAlignment(StackAlignment) {}
+ StackLayout(uint64_t StackAlignment) : MaxAlignment(StackAlignment) {}
/// Add an object to the stack frame. Value pointer is opaque and used as a
/// handle to retrieve the object's offset in the frame later.
- void addObject(const Value *V, unsigned Size, unsigned Alignment,
+ void addObject(const Value *V, unsigned Size, uint64_t Alignment,
const StackLifetime::LiveRange &Range);
/// Run the layout computation for all previously added objects.
@@ -65,13 +66,13 @@ public:
unsigned getObjectOffset(const Value *V) { return ObjectOffsets[V]; }
/// Returns the alignment of the object
- unsigned getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
+ uint64_t getObjectAlignment(const Value *V) { return ObjectAlignments[V]; }
/// Returns the size of the entire frame.
unsigned getFrameSize() { return Regions.empty() ? 0 : Regions.back().End; }
/// Returns the alignment of the frame.
- unsigned getFrameAlignment() { return MaxAlignment; }
+ uint64_t getFrameAlignment() { return MaxAlignment; }
void print(raw_ostream &OS);
};
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index 60f8eec1b9bc..ef3afab2b730 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -577,8 +577,7 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
SU = WorkList.back();
WorkList.pop_back();
Visited.set(SU->NodeNum);
- for (const SDep &SuccDep
- : make_range(SU->Succs.rbegin(), SU->Succs.rend())) {
+ for (const SDep &SuccDep : llvm::reverse(SU->Succs)) {
unsigned s = SuccDep.getSUnit()->NodeNum;
// Edges to non-SUnits are allowed but ignored (e.g. ExitSU).
if (s >= Node2Index.size())
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index daff3af3bc3c..3f013eb6024e 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -271,15 +271,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
RegUse, UseOp));
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
} else {
Dep.setLatency(0);
- // FIXME: We could always let target to adjustSchedDependency(), and
- // remove this condition, but that currently asserts in Hexagon BE.
- if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
- ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
}
-
+ ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
UseSU->addPred(Dep);
}
}
@@ -1117,7 +1112,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
LiveRegs.addLiveOuts(MBB);
// Examine block from end to start...
- for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
if (MI.isDebugOrPseudoInstr())
continue;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index b104e995019f..ce400ea43f29 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit(
static cl::opt<bool> EnableReduceLoadOpStoreWidth(
"combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable reducing the width of load/op/store "
+ cl::desc("DAG combiner enable reducing the width of load/op/store "
"sequence"));
static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
"combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
- cl::desc("DAG cominber enable load/<replace bytes>/store with "
+ cl::desc("DAG combiner enable load/<replace bytes>/store with "
"a narrower store"));
namespace {
@@ -319,7 +319,7 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ APInt DemandedBits = APInt::getAllOnes(BitWidth);
return SimplifyDemandedBits(Op, DemandedBits);
}
@@ -345,7 +345,7 @@ namespace {
return false;
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
return SimplifyDemandedVectorElts(Op, DemandedElts);
}
@@ -436,7 +436,7 @@ namespace {
SDValue visitOR(SDNode *N);
SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitXOR(SDNode *N);
- SDValue SimplifyVBinOp(SDNode *N);
+ SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
SDValue visitSHL(SDNode *N);
SDValue visitSRA(SDNode *N);
SDValue visitSRL(SDNode *N);
@@ -515,6 +515,7 @@ namespace {
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
+ SDValue visitVPOp(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
@@ -615,7 +616,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(SDNode *Op0, SDNode *Op1) const;
+ bool mayAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (N0.getOpcode() != Opc)
return SDValue();
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
// Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode =
- DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1}))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
+ return DAG.getNode(Opc, DL, VT, N00, OpNode);
return SDValue();
}
if (N0.hasOneUse()) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+ if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
+ return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ return SDValue();
}
}
return SDValue();
@@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
+#include "llvm/IR/VPIntrinsics.def"
+ return visitVPOp(N);
}
return SDValue();
}
@@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (add x, 0) -> x, vector edition
@@ -2439,9 +2444,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
N0.getOperand(0));
// fold (add (add (xor a, -1), b), 1) -> (sub b, a)
- if (N0.getOpcode() == ISD::ADD ||
- N0.getOpcode() == ISD::UADDO ||
- N0.getOpcode() == ISD::SADDO) {
+ if (N0.getOpcode() == ISD::ADD) {
SDValue A, Xor;
if (isBitwiseNot(N0.getOperand(0))) {
@@ -2783,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
IsFlip = Const->isOne();
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- IsFlip = Const->isAllOnesValue();
+ IsFlip = Const->isAllOnes();
break;
case TargetLowering::UndefinedBooleanContent:
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
@@ -3259,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (sub x, 0) -> x, vector edition
@@ -3317,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// Convert 0 - abs(x).
- SDValue Result;
if (N1->getOpcode() == ISD::ABS &&
- !TLI.isOperationLegalOrCustom(ISD::ABS, VT) &&
- TLI.expandABS(N1.getNode(), Result, DAG, true))
- return Result;
+ !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
+ if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
+ return Result;
// Fold neg(splat(neg(x)) -> splat(x)
if (VT.isVector()) {
@@ -3785,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
@@ -3810,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
// fold (mul x, 0) -> 0
- if (N1IsConst && ConstValue1.isNullValue())
+ if (N1IsConst && ConstValue1.isZero())
return N1;
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1.isOneValue())
+ if (N1IsConst && ConstValue1.isOne())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
// fold (mul x, -1) -> 0-x
- if (N1IsConst && ConstValue1.isAllOnesValue()) {
+ if (N1IsConst && ConstValue1.isAllOnes()) {
SDLoc DL(N);
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT), N0);
@@ -3839,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
+ if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
@@ -3968,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SmallBitVector ClearMask;
ClearMask.reserve(NumElts);
auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
- if (!V || V->isNullValue()) {
+ if (!V || V->isZero()) {
ClearMask.push_back(true);
return true;
}
@@ -4054,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
SDValue combined;
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Op0.getNode()->uses()) {
if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
User->use_empty())
continue;
@@ -4113,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
// 0 / X -> 0
// 0 % X -> 0
ConstantSDNode *N0C = isConstOrConstSplat(N0);
- if (N0C && N0C->isNullValue())
+ if (N0C && N0C->isZero())
return N0;
// X / X -> 1
@@ -4138,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (sdiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
return C;
// fold (sdiv X, -1) -> 0-X
- if (N1C && N1C->isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
@@ -4206,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
// Helper for determining whether a value is a power-2 constant scalar or a
// vector of such elements.
auto IsPowerOfTwo = [](ConstantSDNode *C) {
- if (C->isNullValue() || C->isOpaque())
+ if (C->isZero() || C->isOpaque())
return false;
if (C->getAPIntValue().isPowerOf2())
return true;
- if ((-C->getAPIntValue()).isPowerOf2())
+ if (C->getAPIntValue().isNegatedPowerOf2())
return true;
return false;
};
@@ -4283,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT CCVT = getSetCCResultType(VT);
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
- SDLoc DL(N);
-
// fold (udiv c1, c2) -> c1/c2
ConstantSDNode *N1C = isConstOrConstSplat(N1);
if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
return C;
// fold (udiv X, -1) -> select(X == -1, 1, 0)
- if (N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
@@ -4393,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
return C;
// fold (urem X, -1) -> select(X == -1, 0, x)
- if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
+ if (!isSigned && N1C && N1C->isAllOnes())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(0, DL, VT), N0);
@@ -4477,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4529,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
return C;
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -4569,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
}
}
+ // Simplify the operands using demanded-bits information.
+ // We don't have demanded bits support for MULHU so this just enables constant
+ // folding based on known bits.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4770,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
unsigned Opcode = N->getOpcode();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold operation with constant operands.
- if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// canonicalize constant to RHS
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
+ return DAG.getNode(N->getOpcode(), DL, VT, N1, N0);
// Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
// Only do this if the current op isn't legal and the flipped is.
@@ -4799,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
default: llvm_unreachable("Unknown MINMAX opcode");
}
if (TLI.isOperationLegal(AltOpcode, VT))
- return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
+ return DAG.getNode(AltOpcode, DL, VT, N0, N1);
}
// Simplify the operands using demanded-bits information.
@@ -5135,8 +5150,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
return V;
+ // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
- VT.getSizeInBits() <= 64) {
+ VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
// Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
@@ -5608,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
return DAG.getZExtOrTrunc(Setcc, DL, VT);
}
+/// For targets that support usubsat, match a bit-hack form of that operation
+/// that ends in 'and' and convert it.
+static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N1.getValueType();
+
+ // Canonicalize SRA as operand 1.
+ if (N0.getOpcode() == ISD::SRA)
+ std::swap(N0, N1);
+
+ // xor/add with SMIN (signmask) are logically equivalent.
+ if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
+ N0.getOperand(0) != N1.getOperand(0))
+ return SDValue();
+
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
+ ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
+ if (!XorC || !XorC->getAPIntValue().isSignMask() ||
+ !SraC || SraC->getAPIntValue() != BitWidth - 1)
+ return SDValue();
+
+ // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
+ // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
+ SDLoc DL(N);
+ SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5619,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (and x, 0) -> 0, vector edition
if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
// do not return N0, because undef node may exist in N0
- return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
SDLoc(N), N0.getValueType());
if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
// do not return N1, because undef node may exist in N1
- return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+ return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
SDLoc(N), N1.getValueType());
// fold (and x, -1) -> x, vector edition
@@ -5680,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// if (and x, c) is known to be zero, return 0
unsigned BitWidth = VT.getScalarSizeInBits();
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(BitWidth)))
+ if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -5743,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Get the constant (if applicable) the zero'th operand is being ANDed with.
// This can be a pure constant or a vector splat, in which case we treat the
// vector as a scalar and use the splat value.
- APInt Constant = APInt::getNullValue(1);
+ APInt Constant = APInt::getZero(1);
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
Constant = C->getAPIntValue();
} else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
@@ -5774,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
if ((SplatBitSize % EltBitWidth) == 0) {
- Constant = APInt::getAllOnesValue(EltBitWidth);
+ Constant = APInt::getAllOnes(EltBitWidth);
for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
@@ -5801,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
case ISD::NON_EXTLOAD: B = true; break;
}
- if (B && Constant.isAllOnesValue()) {
+ if (B && Constant.isAllOnes()) {
// If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
// preserve semantics once we get rid of the AND.
SDValue NewLoad(Load, 0);
@@ -5971,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (IsAndZeroExtMask(N0, N1))
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
+ if (hasOperation(ISD::USUBSAT, VT))
+ if (SDValue V = foldAndToUsubsat(N, DAG))
+ return V;
+
return SDValue();
}
@@ -6385,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
// fold (or x, 0) -> x, vector edition
@@ -6926,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
// a rot[lr]. This also matches funnel shift patterns, similar to rotation but
// with different shifted sources.
SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
- // Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT))
- return SDValue();
// The target must have at least one rotate/funnel flavor.
+ // We still try to match rotate by constant pre-legalization.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
bool HasFSHL = hasOperation(ISD::FSHL, VT);
bool HasFSHR = hasOperation(ISD::FSHR, VT);
- if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
return SDValue();
// Check for truncated rotate.
@@ -6989,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
if (LHSShift.getOpcode() == RHSShift.getOpcode())
return SDValue(); // Shifts must disagree.
+ // TODO: Support pre-legalization funnel-shift by constant.
bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0);
if (!IsRotate && !(HasFSHL || HasFSHR))
return SDValue(); // Requires funnel shift support.
@@ -7017,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
};
if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Res;
- if (IsRotate && (HasROTL || HasROTR))
- Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
- HasROTL ? LHSShiftAmt : RHSShiftAmt);
- else
- Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
- RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt);
+ if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
+ bool UseROTL = !LegalOperations || HasROTL;
+ Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
+ UseROTL ? LHSShiftAmt : RHSShiftAmt);
+ } else {
+ bool UseFSHL = !LegalOperations || HasFSHL;
+ Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
+ RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
+ }
// If there is an AND of either shifted operand, apply it to the result.
if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -7046,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return Res;
}
+ // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
+ // shift.
+ if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
+ return SDValue();
+
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
@@ -7297,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// TODO: If there is evidence that running this later would help, this
// limitation could be removed. Legality checks may need to be added
// for the created store and optional bswap/rotate.
- if (LegalOperations)
+ if (LegalOperations || OptLevel == CodeGenOpt::None)
return SDValue();
// We only handle merging simple stores of 1-4 bytes.
@@ -7672,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// | D |
// Into:
// (x & m) | (y & ~m)
-// If y is a constant, and the 'andn' does not work with immediates,
-// we unfold into a different pattern:
+// If y is a constant, m is not a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
// ~(~x & m) & (m | y)
+// If x is a constant, m is a 'not', and the 'andn' does not work with
+// immediates, we unfold into a different pattern:
+// (x | ~m) & ~(~m & ~y)
// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
// the very least that breaks andnpd / andnps patterns, and because those
// patterns are simplified in IR and shouldn't be created in the DAG
@@ -7729,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
SDLoc DL(N);
- // If Y is a constant, check that 'andn' works with immediates.
- if (!TLI.hasAndNot(Y)) {
+ // If Y is a constant, check that 'andn' works with immediates. Unless M is
+ // a bitwise not that would already allow ANDN to be used.
+ if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
// If not, we need to do a bit more work to make sure andn is still used.
SDValue NotX = DAG.getNOT(DL, X, VT);
@@ -7740,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
}
+ // If X is a constant and M is a bitwise not, check that 'andn' works with
+ // immediates.
+ if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
+ assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
+ // If not, we need to do a bit more work to make sure andn is still used.
+ SDValue NotM = M.getOperand(0);
+ SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
+ SDValue NotY = DAG.getNOT(DL, Y, VT);
+ SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
+ SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
+ return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
+ }
+
SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
SDValue NotM = DAG.getNOT(DL, M, VT);
SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
@@ -7751,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ SDLoc DL(N);
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (xor x, 0) -> x, vector edition
@@ -7765,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
}
// fold (xor undef, undef) -> 0. This is a common idiom (misuse).
- SDLoc DL(N);
if (N0.isUndef() && N1.isUndef())
return DAG.getConstant(0, DL, VT);
@@ -7900,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// shift has been simplified to undef.
uint64_t ShiftAmt = ShiftC->getLimitedValue();
if (ShiftAmt < BitWidth) {
- APInt Ones = APInt::getAllOnesValue(BitWidth);
+ APInt Ones = APInt::getAllOnes(BitWidth);
Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
if (XorC->getAPIntValue() == Ones) {
// If the xor constant is a shifted -1, do a 'not' before the shift:
@@ -8223,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
@@ -8256,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return NewSel;
// if (shl x, c) is known to be zero, return 0
- if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
@@ -8502,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// Both operands must be equivalent extend nodes.
SDValue LeftOp = ShiftOperand.getOperand(0);
SDValue RightOp = ShiftOperand.getOperand(1);
+
bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
- if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode())
+ if (!IsSignExt && !IsZeroExt)
return SDValue();
- EVT WideVT1 = LeftOp.getValueType();
- EVT WideVT2 = RightOp.getValueType();
- (void)WideVT2;
+ EVT NarrowVT = LeftOp.getOperand(0).getValueType();
+ unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
+
+ SDValue MulhRightOp;
+ if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
+ unsigned ActiveBits = IsSignExt
+ ? Constant->getAPIntValue().getMinSignedBits()
+ : Constant->getAPIntValue().getActiveBits();
+ if (ActiveBits > NarrowVTSize)
+ return SDValue();
+ MulhRightOp = DAG.getConstant(
+ Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
+ NarrowVT);
+ } else {
+ if (LeftOp.getOpcode() != RightOp.getOpcode())
+ return SDValue();
+ // Check that the two extend nodes are the same type.
+ if (NarrowVT != RightOp.getOperand(0).getValueType())
+ return SDValue();
+ MulhRightOp = RightOp.getOperand(0);
+ }
+
+ EVT WideVT = LeftOp.getValueType();
// Proceed with the transformation if the wide types match.
- assert((WideVT1 == WideVT2) &&
+ assert((WideVT == RightOp.getValueType()) &&
"Cannot have a multiply node with two different operand types.");
- EVT NarrowVT = LeftOp.getOperand(0).getValueType();
- // Check that the two extend nodes are the same type.
- if (NarrowVT != RightOp.getOperand(0).getValueType())
- return SDValue();
-
// Proceed with the transformation if the wide type is twice as large
// as the narrow type.
- unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
- if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize)
+ if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
return SDValue();
// Check the shift amount with the narrow type size.
@@ -8541,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
return SDValue();
- SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0),
- RightOp.getOperand(0));
- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1)
- : DAG.getZExtOrTrunc(Result, DL, WideVT1));
+ SDValue Result =
+ DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
+ return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
+ : DAG.getZExtOrTrunc(Result, DL, WideVT));
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -8564,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8762,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
return FoldedVOp;
ConstantSDNode *N1C = isConstOrConstSplat(N1);
@@ -8775,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSel;
// if (srl x, c) is known to be zero, return 0
- if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(OpSizeInBits)))
+ if (N1C &&
+ DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
@@ -9358,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// is also a target-independent combine here in DAGCombiner in the other
// direction for (select Cond, -1, 0) when the condition is not i1.
if (CondVT == MVT::i1 && !LegalOperations) {
- if (C1->isNullValue() && C2->isOne()) {
+ if (C1->isZero() && C2->isOne()) {
// select Cond, 0, 1 --> zext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isNullValue() && C2->isAllOnesValue()) {
+ if (C1->isZero() && C2->isAllOnes()) {
// select Cond, 0, -1 --> sext (!Cond)
SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
if (VT != MVT::i1)
NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
return NotCond;
}
- if (C1->isOne() && C2->isNullValue()) {
+ if (C1->isOne() && C2->isZero()) {
// select Cond, 1, 0 --> zext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return Cond;
}
- if (C1->isAllOnesValue() && C2->isNullValue()) {
+ if (C1->isAllOnes() && C2->isZero()) {
// select Cond, -1, 0 --> sext (Cond)
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
@@ -9406,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
}
// select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
- if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (C1Val.isPowerOf2() && C2Val.isZero()) {
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
@@ -9434,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
TargetLowering::ZeroOrOneBooleanContent &&
TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
TargetLowering::ZeroOrOneBooleanContent &&
- C1->isNullValue() && C2->isOne()) {
+ C1->isZero() && C2->isOne()) {
SDValue NotCond =
DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
if (VT.bitsEq(CondVT))
@@ -9479,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
+ return SDValue();
+
+ SDValue Cond0 = N0.getOperand(0);
+ SDValue Cond1 = N0.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (VT != Cond0.getValueType())
+ return SDValue();
+
+ // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
+ // compare is inverted from that pattern ("Cond0 s> -1").
+ if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
+ ; // This is the pattern we are looking for.
+ else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
+ std::swap(N1, N2);
+ else
+ return SDValue();
+
+ // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
+ if (isNullOrNullSplat(N2)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
+ }
+
+ // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
+ if (isAllOnesOrAllOnesSplat(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
+ }
+
+ // If we have to invert the sign bit mask, only do that transform if the
+ // target has a bitwise 'and not' instruction (the invert is free).
+ // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
+ SDLoc DL(N);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
+ SDValue Not = DAG.getNOT(DL, Sra, VT);
+ return DAG.getNode(ISD::AND, DL, VT, Not, N2);
+ }
+
+ // TODO: There's another pattern in this family, but it may require
+ // implementing hasOrNot() to check for profitability:
+ // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -9703,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
"same value. This should have been addressed before this function.");
return DAG.getNode(
ISD::CONCAT_VECTORS, DL, VT,
- BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
- TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
+ BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
+ TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
}
bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) {
@@ -10169,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
if (SDValue V = foldVSelectOfConstants(N))
return V;
+ if (hasOperation(ISD::SRA, VT))
+ if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10190,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
AddToWorklist(SCC.getNode());
if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
- if (!SCCC->isNullValue())
+ if (!SCCC->isZero())
return N2; // cond always true -> true val
else
return N3; // cond always false -> false val
@@ -10248,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// Is 'X Cond C' always true or false?
auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
- bool False = (Cond == ISD::SETULT && C->isNullValue()) ||
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
(Cond == ISD::SETLT && C->isMinSignedValue()) ||
- (Cond == ISD::SETUGT && C->isAllOnesValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
(Cond == ISD::SETGT && C->isMaxSignedValue());
- bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) ||
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
(Cond == ISD::SETLE && C->isMaxSignedValue()) ||
- (Cond == ISD::SETUGE && C->isNullValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
(Cond == ISD::SETGE && C->isMinSignedValue());
return True || False;
};
@@ -10863,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
return SDValue();
- if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
return SDValue();
if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
@@ -11257,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
Known = DAG.computeKnownBits(Op);
- return (Known.Zero | 1).isAllOnesValue();
+ return (Known.Zero | 1).isAllOnes();
}
/// Given an extending node with a pop-count operand, if the target does not
@@ -12016,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1))
+ if (ExtVTBits >= DAG.ComputeMinSignedBits(N0))
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -12032,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
- if ((N00Bits <= ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) &&
+ if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
@@ -12052,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
if ((N00Bits == ExtVTBits ||
(!IsZext && (N00Bits < ExtVTBits ||
- (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) <
- ExtVTBits))) &&
+ DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) &&
(!LegalOperations ||
TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
@@ -12290,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
- if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
+ if (Known.countMaxActiveBits() <= Log2_32(Size)) {
SDLoc SL(N);
EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
@@ -12538,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
assert(N->getOpcode() == ISD::BUILD_PAIR);
- LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
- LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+ auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+ auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
// A BUILD_PAIR is always having the least significant part in elt 0 and the
// most significant part in elt 1. So when combining into one large load, we
@@ -12547,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (DAG.getDataLayout().isBigEndian())
std::swap(LD1, LD2);
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
+ !LD1->hasOneUse() || !LD2->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
+
+ bool LD1Fast = false;
EVT LD1VT = LD1->getValueType(0);
unsigned LD1Bytes = LD1VT.getStoreSize();
- if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
- DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
- Align Alignment = LD1->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign <= Alignment &&
- (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
- return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
- LD1->getPointerInfo(), Alignment);
- }
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
+ DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
+ return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
+ LD1->getPointerInfo(), LD1->getAlign());
return SDValue();
}
@@ -12938,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
- SDLoc DL(BV);
-
// Okay, we know the src/dst types are both integers of differing types.
- // Handling growing first.
assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
- if (SrcBitSize < DstBitSize) {
- unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
- SmallVector<SDValue, 8> Ops;
- for (unsigned i = 0, e = BV->getNumOperands(); i != e;
- i += NumInputsPerOutput) {
- bool isLE = DAG.getDataLayout().isLittleEndian();
- APInt NewBits = APInt(DstBitSize, 0);
- bool EltIsUndef = true;
- for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
- // Shift the previously computed bits over.
- NewBits <<= SrcBitSize;
- SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
- if (Op.isUndef()) continue;
- EltIsUndef = false;
-
- NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
- zextOrTrunc(SrcBitSize).zext(DstBitSize);
- }
-
- if (EltIsUndef)
- Ops.push_back(DAG.getUNDEF(DstEltVT));
- else
- Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
- }
+ // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
+ // BuildVectorSDNode?
+ auto *BVN = cast<BuildVectorSDNode>(BV);
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getBuildVector(VT, DL, Ops);
- }
+ // Extract the constant raw bit data.
+ BitVector UndefElements;
+ SmallVector<APInt> RawBits;
+ bool IsLE = DAG.getDataLayout().isLittleEndian();
+ if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
+ return SDValue();
- // Finally, this must be the case where we are shrinking elements: each input
- // turns into multiple outputs.
- unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- NumOutputsPerInput*BV->getNumOperands());
+ SDLoc DL(BV);
SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
+ if (UndefElements[I])
+ Ops.push_back(DAG.getUNDEF(DstEltVT));
+ else
+ Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
+ }
- for (const SDValue &Op : BV->op_values()) {
- if (Op.isUndef()) {
- Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
- continue;
- }
-
- APInt OpVal = cast<ConstantSDNode>(Op)->
- getAPIntValue().zextOrTrunc(SrcBitSize);
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+ return DAG.getBuildVector(VT, DL, Ops);
+}
- for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = OpVal.trunc(DstBitSize);
- Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
- OpVal.lshrInPlace(DstBitSize);
- }
+// Returns true if floating point contraction is allowed on the FMUL-SDValue
+// `N`
+static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
+ assert(N.getOpcode() == ISD::FMUL);
- // For big endian targets, swap the order of the pieces of each element.
- if (DAG.getDataLayout().isBigEndian())
- std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
- }
+ return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
+ N->getFlags().hasAllowContract();
+}
- return DAG.getBuildVector(VT, DL, Ops);
+// Returns true if `N` can assume no infinities involved in its computation.
+static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
+ return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs();
}
/// Try to perform FMA combining on a given FADD node.
@@ -13039,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
@@ -13070,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
// This requires reassociation because it changes the order of operations.
SDValue FMA, E;
- if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanReassociate && isFusedOp(N0) &&
N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
N0.getOperand(2).hasOneUse()) {
FMA = N0;
E = N1;
- } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
+ } else if (CanReassociate && isFusedOp(N1) &&
N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
N1.getOperand(2).hasOneUse()) {
FMA = N1;
@@ -13131,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
Z));
};
- if (N0.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N0)) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13161,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13175,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd x, (fma y, z, (fpext (fmul u, v)))
// -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N1)) {
SDValue N12 = N1.getOperand(2);
if (N12.getOpcode() == ISD::FP_EXTEND) {
SDValue N120 = N12.getOperand(0);
@@ -13196,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N1.getOpcode() == ISD::FP_EXTEND) {
SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N10)) {
SDValue N102 = N10.getOperand(2);
if (isContractableFMUL(N102) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13392,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
return isContractableFMUL(N) && isReassociable(N.getNode());
};
+ auto isFusedOp = [&](SDValue N) {
+ unsigned Opcode = N.getOpcode();
+ return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ };
+
// More folding opportunities when target permits.
if (Aggressive && isReassociable(N)) {
bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
// fold (fsub (fma x, y, (fmul u, v)), z)
// -> (fma x, y (fma u, v, (fneg z)))
- if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
@@ -13410,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fmul u, v)))
// -> (fma (fneg y), z, (fma (fneg u), v, x))
- if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
+ if (CanFuse && isFusedOp(N1) &&
isContractableAndReassociableFMUL(N1.getOperand(2)) &&
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
@@ -13424,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode &&
- N0->hasOneUse()) {
+ if (isFusedOp(N0) && N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
if (N02.getOpcode() == ISD::FP_EXTEND) {
SDValue N020 = N02.getOperand(0);
@@ -13451,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
+ if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -13471,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
@@ -13496,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
@@ -13538,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
// The transforms below are incorrect when x == 0 and y == inf, because the
// intermediate multiplication produces a nan.
- if (!Options.NoInfsFPMath)
+ SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
+ if (!hasNoInfs(Options, FAdd))
return SDValue();
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
- (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+ isContractableFMUL(Options, SDValue(N, 0)) &&
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
(!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
@@ -13633,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fadd c1, c2) -> c1 + c2
@@ -13841,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fsub c1, c2) -> c1-c2
@@ -13926,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
// fold vector ops
if (VT.isVector()) {
// This just handles C1 * C2 for vectors. Other vector folds are below.
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
}
@@ -13971,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (N1CFP && N1CFP->isExactlyValue(+2.0))
return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
- // fold (fmul X, -1.0) -> (fneg X)
- if (N1CFP && N1CFP->isExactlyValue(-1.0))
- if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
- return DAG.getNode(ISD::FNEG, DL, VT, N0);
+ // fold (fmul X, -1.0) -> (fsub -0.0, X)
+ if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
+ return DAG.getNode(ISD::FSUB, DL, VT,
+ DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
+ }
+ }
// -N0 * -N1 --> N0 * N1
TargetLowering::NegatibleCost CostN0 =
@@ -14260,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// fold vector ops
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVBinOp(N))
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
return FoldedVOp;
// fold (fdiv c1, c2) -> c1/c2
@@ -16245,11 +16381,12 @@ struct LoadedSlice {
return false;
// Check if it will be merged with the load.
- // 1. Check the alignment constraint.
- Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign(
- ResVT.getTypeForEVT(*DAG->getContext()));
-
- if (RequiredAlignment > getAlign())
+ // 1. Check the alignment / fast memory access constraint.
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
+ Origin->getAddressSpace(), getAlign(),
+ Origin->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return false;
// 2. Check that the load is a legal operation for that type.
@@ -16270,7 +16407,7 @@ struct LoadedSlice {
/// \p UsedBits looks like 0..0 1..1 0..0.
static bool areUsedBitsDense(const APInt &UsedBits) {
// If all the bits are one, this is dense!
- if (UsedBits.isAllOnesValue())
+ if (UsedBits.isAllOnes())
return true;
// Get rid of the unused bits on the right.
@@ -16279,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
if (NarrowedUsedBits.countLeadingZeros())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
- return NarrowedUsedBits.isAllOnesValue();
+ return NarrowedUsedBits.isAllOnes();
}
/// Check whether or not \p First and \p Second are next to each other
@@ -16697,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
unsigned BitWidth = N1.getValueSizeInBits();
APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
if (Opc == ISD::AND)
- Imm ^= APInt::getAllOnesValue(BitWidth);
- if (Imm == 0 || Imm.isAllOnesValue())
+ Imm ^= APInt::getAllOnes(BitWidth);
+ if (Imm == 0 || Imm.isAllOnes())
return SDValue();
unsigned ShAmt = Imm.countTrailingZeros();
unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
@@ -16725,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if ((Imm & Mask) == Imm) {
APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
if (Opc == ISD::AND)
- NewImm ^= APInt::getAllOnesValue(NewBW);
+ NewImm ^= APInt::getAllOnes(NewBW);
uint64_t PtrOff = ShAmt / 8;
// For big endian targets, we need to adjust the offset to the pointer to
// load the correct bytes.
if (DAG.getDataLayout().isBigEndian())
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+ bool IsFast = false;
Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
- Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
- if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy))
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
+ LD->getAddressSpace(), NewAlign,
+ LD->getMemOperand()->getFlags(), &IsFast) ||
+ !IsFast)
return SDValue();
SDValue NewPtr =
@@ -16788,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
if (VTSize.isScalable())
return SDValue();
+ bool FastLD = false, FastST = false;
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
!TLI.isOperationLegal(ISD::STORE, IntVT) ||
!TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
- !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
- return SDValue();
-
- Align LDAlign = LD->getAlign();
- Align STAlign = ST->getAlign();
- Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
- Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy);
- if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *LD->getMemOperand(), &FastLD) ||
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
+ *ST->getMemOperand(), &FastST) ||
+ !FastLD || !FastST)
return SDValue();
SDValue NewLD =
DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
- LD->getPointerInfo(), LDAlign);
+ LD->getPointerInfo(), LD->getAlign());
SDValue NewST =
DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
- ST->getPointerInfo(), STAlign);
+ ST->getPointerInfo(), ST->getAlign());
AddToWorklist(NewLD.getNode());
AddToWorklist(NewST.getNode());
@@ -16839,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
SDValue &ConstNode) {
APInt Val;
- // If the add only has one use, this would be OK to do.
- if (AddNode.getNode()->hasOneUse())
+ // If the add only has one use, and the target thinks the folding is
+ // profitable or does not lead to worse code, this would be OK to do.
+ if (AddNode.getNode()->hasOneUse() &&
+ TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
return true;
// Walk all the users of the constant with which we're multiplying.
@@ -16932,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
unsigned SizeInBits = NumStores * ElementSizeBits;
unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+ Optional<MachineMemOperand::Flags> Flags;
+ AAMDNodes AAInfo;
+ for (unsigned I = 0; I != NumStores; ++I) {
+ StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
+ if (!Flags) {
+ Flags = St->getMemOperand()->getFlags();
+ AAInfo = St->getAAInfo();
+ continue;
+ }
+ // Skip merging if there's an inconsistent flag.
+ if (Flags != St->getMemOperand()->getFlags())
+ return false;
+ // Concatenate AA metadata.
+ AAInfo = AAInfo.concat(St->getAAInfo());
+ }
+
EVT StoreTy;
if (UseVector) {
unsigned Elts = NumStores * NumMemElts;
@@ -17049,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
if (!UseTrunc) {
- NewStore =
- DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstInChain->getAlign());
+ NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ FirstInChain->getPointerInfo(),
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -17063,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
+ FirstInChain->getAlign(), Flags.getValue(), AAInfo);
}
// Replace all merged stores with the new store.
@@ -17360,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
SDValue StoredVal = ST->getValue();
bool IsElementZero = false;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
- IsElementZero = C->isNullValue();
+ IsElementZero = C->isZero();
else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
IsElementZero = C->getConstantFPValue()->isNullValue();
if (IsElementZero) {
@@ -17379,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
break;
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17391,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants(
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
@@ -17410,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants(
unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
- TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17486,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts(
if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
break;
- if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
+ if (TLI.isTypeLegal(Ty) &&
+ TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, Ty,
*FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
@@ -17634,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
bool IsFastSt = false;
bool IsFastLd = false;
- if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ // Don't try vector types if we need a rotate. We may still fail the
+ // legality checks for the integer type, but we can't handle the rotate
+ // case with vectors.
+ // FIXME: We could use a shuffle in place of the rotate.
+ if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17649,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
+ DAG.getMachineFunction()) &&
TLI.allowsMemoryAccess(Context, DL, StoreTy,
*FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
@@ -17663,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
TargetLowering::TypePromoteInteger) {
EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
- TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
+ TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
+ DAG.getMachineFunction()) &&
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
@@ -18215,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
case ISD::LIFETIME_END:
// We can forward past any lifetime start/end that can be proven not to
// alias the node.
- if (!isAlias(Chain.getNode(), N))
+ if (!mayAlias(Chain.getNode(), N))
Chains.push_back(Chain.getOperand(0));
break;
case ISD::STORE: {
@@ -18593,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
if (!VecEltVT.isByteSized())
return SDValue();
- Align Alignment = OriginalLoad->getAlign();
- Align NewAlign = DAG.getDataLayout().getABITypeAlign(
- VecEltVT.getTypeForEVT(*DAG.getContext()));
-
- if (NewAlign > Alignment ||
- !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
- return SDValue();
-
- ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
- ISD::NON_EXTLOAD : ISD::EXTLOAD;
- if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
+ ISD::LoadExtType ExtTy =
+ ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
+ if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
+ !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
return SDValue();
- Alignment = NewAlign;
-
+ Align Alignment = OriginalLoad->getAlign();
MachinePointerInfo MPI;
SDLoc DL(EVE);
if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
int Elt = ConstEltNo->getZExtValue();
unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
+ Alignment = commonAlignment(Alignment, PtrOff);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
+ Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
}
+
+ bool IsFast = false;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
+ OriginalLoad->getAddressSpace(), Alignment,
+ OriginalLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
InVecVT, EltNo);
@@ -18864,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
Use->getOperand(0) == VecOp &&
isa<ConstantSDNode>(Use->getOperand(1));
})) {
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
for (SDNode *Use : VecOp->uses()) {
auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
if (CstElt->getAPIntValue().ult(NumElts))
@@ -18877,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
AddToWorklist(N);
return SDValue(N, 0);
}
- APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth);
+ APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
// We simplified the vector operand of this extract element. If this
// extract is not dead, visit it again so it is folded properly.
@@ -19672,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
// Make sure the first element matches
// (zext (extract_vector_elt X, C))
+ // Offset must be a constant multiple of the
+ // known-minimum vector length of the result type.
int64_t Offset = checkElem(Op0);
- if (Offset < 0)
+ if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
return SDValue();
unsigned NumElems = N->getNumOperands();
@@ -19844,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
}
+// Attempt to merge nested concat_vectors/undefs.
+// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
+// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
+static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
+ SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
+ EVT SubVT;
+ SDValue FirstConcat;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef())
+ continue;
+ if (Op.getOpcode() != ISD::CONCAT_VECTORS)
+ return SDValue();
+ if (!FirstConcat) {
+ SubVT = Op.getOperand(0).getValueType();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+ FirstConcat = Op;
+ continue;
+ }
+ if (SubVT != Op.getOperand(0).getValueType())
+ return SDValue();
+ }
+ assert(FirstConcat && "Concat of all-undefs found");
+
+ SmallVector<SDValue> ConcatOps;
+ for (const SDValue &Op : N->ops()) {
+ if (Op.isUndef()) {
+ ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
+ continue;
+ }
+ ConcatOps.append(Op->op_begin(), Op->op_end());
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
+}
+
// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
// most two distinct vectors the same size as the result, attempt to turn this
@@ -20103,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
+ // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
return V;
- // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
- if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
+ // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
+ if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
+ return V;
+
+ // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
return V;
+ }
if (SDValue V = combineConcatVectorOfCasts(N, DAG))
return V;
@@ -20351,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
- auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
- !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
return SDValue();
// Allow targets to opt-out.
@@ -20363,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
if (!VT.isByteSized())
return SDValue();
- unsigned Index = ExtIdx->getZExtValue();
+ unsigned Index = Extract->getConstantOperandVal(1);
unsigned NumElts = VT.getVectorMinNumElements();
// The definition of EXTRACT_SUBVECTOR states that the index must be a
@@ -20492,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// If the concatenated source types match this extract, it's a direct
// simplification:
// extract_subvec (concat V1, V2, ...), i --> Vi
- if (ConcatSrcNumElts == ExtNumElts)
+ if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
return V.getOperand(ConcatOpIdx);
// If the concatenated source vectors are a multiple length of this extract,
@@ -20500,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// concat operand. Example:
// v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
// v2i8 extract_subvec v8i8 Y, 6
- if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) {
+ if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
+ ConcatSrcNumElts % ExtNumElts == 0) {
SDLoc DL(N);
unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
@@ -20562,8 +20777,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
// otherwise => (extract_subvec V1, ExtIdx)
uint64_t InsIdx = V.getConstantOperandVal(2);
if (InsIdx * SmallVT.getScalarSizeInBits() ==
- ExtIdx * NVT.getScalarSizeInBits())
+ ExtIdx * NVT.getScalarSizeInBits()) {
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
+ return SDValue();
+
return DAG.getBitcast(NVT, V.getOperand(1));
+ }
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
@@ -21131,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
// Canonicalize shuffle v, v -> v, undef
- if (N0 == N1) {
- SmallVector<int, 8> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = SVN->getMaskElt(i);
- if (Idx >= (int)NumElts) Idx -= NumElts;
- NewMask.push_back(Idx);
- }
- return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
- }
+ if (N0 == N1)
+ return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
+ createUnaryMask(SVN->getMask(), NumElts));
// Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
if (N0.isUndef())
@@ -21290,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
+ // See if we can replace a shuffle with an insert_subvector.
+ // e.g. v2i32 into v8i32:
+ // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
+ // --> insert_subvector(lhs,rhs1,4).
+ if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
+ TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
+ auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
+ // Ensure RHS subvectors are legal.
+ assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
+ EVT SubVT = RHS.getOperand(0).getValueType();
+ int NumSubVecs = RHS.getNumOperands();
+ int NumSubElts = SubVT.getVectorNumElements();
+ assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
+ if (!TLI.isTypeLegal(SubVT))
+ return SDValue();
+
+ // Don't bother if we have an unary shuffle (matches undef + LHS elts).
+ if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
+ return SDValue();
+
+ // Search [NumSubElts] spans for RHS sequence.
+ // TODO: Can we avoid nested loops to increase performance?
+ SmallVector<int> InsertionMask(NumElts);
+ for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
+ for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
+ // Reset mask to identity.
+ std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
+
+ // Add subvector insertion.
+ std::iota(InsertionMask.begin() + SubIdx,
+ InsertionMask.begin() + SubIdx + NumSubElts,
+ NumElts + (SubVec * NumSubElts));
+
+ // See if the shuffle mask matches the reference insertion mask.
+ bool MatchingShuffle = true;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ int ExpectIdx = InsertionMask[i];
+ int ActualIdx = Mask[i];
+ if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
+ MatchingShuffle = false;
+ break;
+ }
+ }
+
+ if (MatchingShuffle)
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
+ RHS.getOperand(SubVec),
+ DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
+ }
+ }
+ return SDValue();
+ };
+ ArrayRef<int> Mask = SVN->getMask();
+ if (N1.getOpcode() == ISD::CONCAT_VECTORS)
+ if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
+ return InsertN1;
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
+ SmallVector<int> CommuteMask(Mask.begin(), Mask.end());
+ ShuffleVectorSDNode::commuteMask(CommuteMask);
+ if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
+ return InsertN0;
+ }
+ }
+
// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
@@ -21859,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVPOp(SDNode *N) {
+ // VP operations in which all vector elements are disabled - either by
+ // determining that the mask is all false or that the EVL is 0 - can be
+ // eliminated.
+ bool AreAllEltsDisabled = false;
+ if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
+ AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
+ if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
+ AreAllEltsDisabled |=
+ ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
+
+ // This is the only generic VP combine we support for now.
+ if (!AreAllEltsDisabled)
+ return SDValue();
+
+ // Binary operations can be replaced by UNDEF.
+ if (ISD::isVPBinaryOp(N->getOpcode()))
+ return DAG.getUNDEF(N->getValueType(0));
+
+ // VP Memory operations can be replaced by either the chain (stores) or the
+ // chain + undef (loads).
+ if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
+ if (MemSD->writeMem())
+ return MemSD->getChain();
+ return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
+ }
+
+ // Reduction operations return the start operand when no elements are active.
+ if (ISD::isVPReduction(N->getOpcode()))
+ return N->getOperand(0);
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -21915,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
else
Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
- if (Bits.isAllOnesValue())
+ if (Bits.isAllOnes())
Indices.push_back(i);
else if (Bits == 0)
Indices.push_back(i + NumSubElts);
@@ -21950,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
/// If a vector binop is performed on splat values, it may be profitable to
/// extract, scalarize, and insert/splat.
-static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
+ const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
unsigned Opcode = N->getOpcode();
@@ -21971,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
!TLI.isOperationLegalOrCustom(Opcode, EltVT))
return SDValue();
- SDLoc DL(N);
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
@@ -21995,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
}
/// Visit a binary vector operation, like ADD.
-SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- "SimplifyVBinOp only works on vectors!");
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
- EVT VT = N->getValueType(0);
unsigned Opcode = N->getOpcode();
SDNodeFlags Flags = N->getFlags();
// See if we can constant fold the vector operation.
- if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS),
+ LHS.getValueType(), Ops))
return Fold;
// Move unary shuffles with identical masks after a vector binop:
@@ -22026,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
(LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
- SDLoc DL(N);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
RHS.getOperand(0), Flags);
SDValue UndefV = LHS.getOperand(1);
@@ -22043,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat X), (splat C) --> splat (binop X, C)
- SDLoc DL(N);
SDValue X = Shuf0->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22053,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
// binop (splat C), (splat X) --> splat (binop C, X)
- SDLoc DL(N);
SDValue X = Shuf1->getOperand(0);
SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
@@ -22077,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
LegalOperations)) {
// (binop undef, undef) may not return undef, so compute that result.
- SDLoc DL(N);
SDValue VecC =
DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
@@ -22104,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
EVT NarrowVT = LHS.getOperand(0).getValueType();
if (NarrowVT == RHS.getOperand(0).getValueType() &&
TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
- SDLoc DL(N);
unsigned NumOperands = LHS.getNumOperands();
SmallVector<SDValue, 4> ConcatOps;
for (unsigned i = 0; i != NumOperands; ++i) {
@@ -22117,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
- if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
return V;
return SDValue();
@@ -22431,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc))
return SDValue();
- if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode()))
+ // The use checks are intentionally on SDNode because we may be dealing
+ // with opcodes that produce more than one SDValue.
+ // TODO: Do we really need to check N0 (the condition operand of the select)?
+ // But removing that clause could cause an infinite loop...
+ if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
return SDValue();
+ // Binops may include opcodes that return multiple values, so all values
+ // must be created/propagated from the newly created binops below.
+ SDVTList OpVTs = N1->getVTList();
+
// Fold select(cond, binop(x, y), binop(z, y))
// --> binop(select(cond, x, z), y)
if (N1.getOperand(1) == N2.getOperand(1)) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1));
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22453,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
VT == N2.getOperand(1).getValueType()) {
SDValue NewSel =
DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
- SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel);
+ SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
NewBinOp->setFlags(N1->getFlags());
NewBinOp->intersectFlagsWith(N2->getFlags());
return NewBinOp;
@@ -22581,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
// fold select_cc true, x, y -> x
// fold select_cc false, x, y -> y
- return !(SCCC->isNullValue()) ? N2 : N3;
+ return !(SCCC->isZero()) ? N2 : N3;
}
}
@@ -22680,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
// select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
// select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
- if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue ValueOnZero = N2;
SDValue Count = N3;
// If the condition is NE instead of E, swap the operands.
@@ -22707,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
}
}
+ // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
+ // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
+ if (!NotExtCompare && N1C && N2C && N3C &&
+ N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
+ ((N1C->isAllOnes() && CC == ISD::SETGT) ||
+ (N1C->isZero() && CC == ISD::SETLT)) &&
+ !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
+ SDValue ASR = DAG.getNode(
+ ISD::SRA, DL, CmpOpVT, N0,
+ DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
+ return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
+ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
+ }
+
return SDValue();
}
@@ -22747,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
return SDValue();
// Avoid division by zero.
- if (C->isNullValue())
+ if (C->isZero())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -22792,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
/// For the reciprocal, we need to find the zero of the function:
-/// F(X) = A X - 1 [which has a zero at X = 1/A]
+/// F(X) = 1/X - A [which has a zero at X = 1/A]
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
@@ -22803,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22942,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (LegalDAG)
return SDValue();
- // TODO: Handle half and/or extended types?
+ // TODO: Handle extended types?
EVT VT = Op.getValueType();
- if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
+ VT.getScalarType() != MVT::f64)
return SDValue();
// If estimates are explicitly disabled for this function, we're done.
@@ -22994,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
+bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
@@ -23154,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
// TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
cast<LSBaseSDNode>(C.getNode())->isSimple();
- if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
@@ -23172,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LIFETIME_END: {
// We can forward past any lifetime start/end that can be proven not to
// alias the memory access.
- if (!isAlias(N, C.getNode())) {
+ if (!mayAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4ca731cfdf62..4d1449bc2751 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -75,6 +75,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalValue.h"
@@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() {
EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
: FuncInfo.MBB->rend();
MachineBasicBlock::reverse_iterator RI(LastLocalValue);
- for (; RI != RE;) {
- MachineInstr &LocalMI = *RI;
- // Increment before erasing what it points to.
- ++RI;
+ for (MachineInstr &LocalMI :
+ llvm::make_early_inc_range(llvm::make_range(RI, RE))) {
Register DefReg = findLocalRegDef(LocalMI);
if (!DefReg)
continue;
@@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
const CallInst *CI, unsigned StartIdx) {
- for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) {
Value *Val = CI->getArgOperand(i);
// Check for constants and encode them with a StackMaps::ConstantOp prefix.
if (const auto *C = dyn_cast<ConstantInt>(Val)) {
@@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
// Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs>
// This includes all meta-operands up to but not including CC.
unsigned NumMetaOpers = PatchPointOpers::CCPos;
- assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs &&
+ assert(I->arg_size() >= NumMetaOpers + NumArgs &&
"Not enough arguments provided to the patchpoint intrinsic");
// For AnyRegCC the arguments are lowered later on manually.
@@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) {
CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI)
.setTailCall(IsTailCall);
+ diagnoseDontCall(*CI);
+
return lowerCallTo(CLI);
}
@@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs. Tack a deref onto
// the expression, we don't have an "indirect" flag in DBG_INSTR_REF.
- if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) {
+ if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
auto *NewExpr =
@@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ // See if there's an expression to constant-fold.
+ DIExpression *Expr = DI->getExpression();
+ if (Expr)
+ std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
.addImm(0U)
.addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ .addMetadata(Expr);
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
@@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// If using instruction referencing, mutate this into a DBG_INSTR_REF,
// to be later patched up by finalizeDebugInstrRefs.
- if (TM.Options.ValueTrackingVariableLocations) {
+ if (FuncInfo.MF->useDebugInstrRef()) {
Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF));
Builder->getOperand(1).ChangeToImmediate(0);
}
@@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I->getAAMetadata();
if (!Alignment) // Ensure that codegen never sees alignment 0.
Alignment = DL.getABITypeAlign(ValTy);
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 348fad6daf8f..c1bb65409282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps(
MIB.addFrameIndex(Op.getFrameIx());
break;
case SDDbgOperand::VREG:
- MIB.addReg(Op.getVReg(), RegState::Debug);
+ MIB.addReg(Op.getVReg());
break;
case SDDbgOperand::SDNODE: {
SDValue V = SDValue(Op.getSDNode(), Op.getResNo());
@@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) {
DebugLoc DL = SD->getDebugLoc();
auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE));
MIB.addReg(0U);
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
return &*MIB;
@@ -872,22 +872,33 @@ MachineInstr *
InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD,
DenseMap<SDValue, Register> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
assert(SD->getLocationOps().size() == 1 &&
"Non variadic dbg_value should have only one location op");
+ // See about constant-folding the expression.
+ // Copy the location operand in case we replace it.
+ SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]);
+ if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) {
+ const Value *V = LocationOps[0].getConst();
+ if (auto *C = dyn_cast<ConstantInt>(V)) {
+ std::tie(Expr, C) = Expr->constantFold(C);
+ LocationOps[0] = SDDbgOperand::fromConst(C);
+ }
+ }
+
// Emit non-variadic dbg_value nodes as DBG_VALUE.
// DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr
auto MIB = BuildMI(*MF, DL, II);
- AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap);
+ AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap);
if (SD->isIndirect())
MIB.addImm(0U);
else
- MIB.addReg(0U, RegState::Debug);
+ MIB.addReg(0U);
return MIB.addMetadata(Var).addMetadata(Expr);
}
@@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb,
TRI(MF->getSubtarget().getRegisterInfo()),
TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb),
InsertPos(insertpos) {
- EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations;
+ EmitDebugInstrRefs = MF->useDebugInstrRef();
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d92b23f56e4d..eb9d2286aeb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::VP_SCATTER:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPScatterSDNode>(Node)->getValue().getValueType());
+ break;
+ case ISD::VP_STORE:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(),
+ cast<VPStoreSDNode>(Node)->getValue().getValueType());
+ break;
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Node->getOpcode(), Node->getOperand(0).getValueType());
break;
case ISD::VECREDUCE_SEQ_FADD:
+ case ISD::VECREDUCE_SEQ_FMUL:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_FMUL:
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_FMAX:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_SEQ_FMUL:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(1).getValueType());
break;
@@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
Visited.insert(Op.getNode());
Worklist.push_back(Idx.getNode());
SDValue StackPtr, Ch;
- for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
- UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Vec.getNode()->uses()) {
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) {
if (ST->isIndexed() || ST->isTruncatingStore() ||
ST->getValue() != Vec)
@@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) {
? ISD::FCOS : ISD::FSIN;
SDValue Op0 = Node->getOperand(0);
- for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
- UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : Op0.getNode()->uses()) {
if (User == Node)
continue;
// The other user might have been turned into sincos already.
@@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) {
// If CTPOP is legal, use it. Otherwise use shifts and xor.
SDValue Result;
- if (TLI.isOperationLegal(ISD::CTPOP, VT)) {
+ if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) {
Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
} else {
Result = Op;
@@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
bool NeedInvert;
switch (Node->getOpcode()) {
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandABS(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTLZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp1, DAG))
+ if ((Tmp1 = TLI.expandCTTZ(Node, DAG)))
Results.push_back(Tmp1);
break;
case ISD::BITREVERSE:
@@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
"Don't know how to expand this subtraction!");
- Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
- VT));
+ Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT);
Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT));
Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
break;
@@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
SDValue Op = Node->getOperand(IsStrict ? 1 : 0);
SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
EVT VT = Node->getValueType(0);
- assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))
- ->isNullValue() &&
+ assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() &&
"Unable to expand as libcall if it is not normal rounding");
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT);
@@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
break;
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FROUND:
case ISD::STRICT_FSIN:
case ISD::STRICT_FCOS:
case ISD::STRICT_FLOG:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3553f9ec16c2..27f9cede1922 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
+ case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
@@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) {
GetSoftenedFloat(N->getOperand(0)));
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) {
+ EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty,
+ GetSoftenedFloat(N->getOperand(0)));
+ return NewFence;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- APInt API = APInt::getAllOnesValue(Size);
+ APInt API = APInt::getAllOnes(Size);
API.clearBit(Size - 1);
SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
@@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
+ case ISD::STRICT_FP_TO_FP16:
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
@@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
// We actually deal with the partially-softened FP_TO_FP16 node too, which
// returns an i16 so doesn't meet the constraints necessary for FP_ROUND.
assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16 ||
N->getOpcode() == ISD::STRICT_FP_ROUND);
bool IsStrict = N->isStrictFPOpcode();
SDValue Op = N->getOperand(IsStrict ? 1 : 0);
EVT SVT = Op.getValueType();
EVT RVT = N->getValueType(0);
- EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT;
+ EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 ||
+ N->getOpcode() == ISD::STRICT_FP_TO_FP16)
+ ? MVT::f16
+ : RVT;
RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b8a3dd014901..1fa4d88fcb4a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
using namespace llvm;
#define DEBUG_TYPE "legalize-types"
@@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FSETCCS:
case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break;
case ISD::SMIN:
- case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SMAX:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UMIN:
case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break;
- case ISD::SHL: Res = PromoteIntRes_SHL(N); break;
+ case ISD::SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ false);
+ break;
case ISD::SIGN_EXTEND_INREG:
Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
- case ISD::SRA: Res = PromoteIntRes_SRA(N); break;
- case ISD::SRL: Res = PromoteIntRes_SRL(N); break;
+ case ISD::SRA:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ false);
+ break;
+ case ISD::SRL:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ false);
+ break;
case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break;
case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break;
case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break;
@@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
- case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
+ case ISD::MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SDIV:
- case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
+ case ISD::SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::UDIV:
- case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break;
+ case ISD::UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false);
+ break;
case ISD::SADDO:
case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VECREDUCE(N);
break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntRes_VP_REDUCE(N);
+ break;
+
case ISD::FREEZE:
Res = PromoteIntRes_FREEZE(N);
break;
@@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = PromoteIntRes_FunnelShift(N);
break;
+
+ case ISD::VP_AND:
+ case ISD::VP_OR:
+ case ISD::VP_XOR:
+ case ISD::VP_ADD:
+ case ISD::VP_SUB:
+ case ISD::VP_MUL:
+ Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SDIV:
+ case ISD::VP_SREM:
+ Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_UDIV:
+ case ISD::VP_UREM:
+ Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_SHL:
+ Res = PromoteIntRes_SHL(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_ASHR:
+ Res = PromoteIntRes_SRA(N, /*IsVP*/ true);
+ break;
+ case ISD::VP_LSHR:
+ Res = PromoteIntRes_SRL(N, /*IsVP*/ true);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
CreateStackStoreLoad(InOp, OutVT));
}
-// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount
-// in the VT returned by getShiftAmountTy and to return a safe VT if we can't.
-static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI,
- SelectionDAG &DAG) {
- EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- // If any possible shift value won't fit in the prefered type, just use
- // something safe. It will be legalized when the shift is expanded.
- if (!ShiftVT.isVector() &&
- ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits()))
- ShiftVT = MVT::i32;
- return ShiftVT;
-}
-
SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) {
SDValue V = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::FREEZE, SDLoc(N),
@@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
}
@@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
return DAG.getNode(ISD::SRL, dl, NVT,
DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
DAG.getConstant(DiffBits, dl, ShiftVT));
@@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+ SDLoc dl(N);
+
+ // If the larger CTLZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) {
+ if (SDValue Result = TLI.expandCTLZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- SDLoc dl(N);
- EVT OVT = N->getValueType(0);
- EVT NVT = Op.getValueType();
Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
// Subtract off the extra leading bits in the bigger type.
return DAG.getNode(
@@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+ // If the larger CTPOP isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type.
+ // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to
+ // TargetLowering.
+ if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) {
+ if (SDValue Result = TLI.expandCTPOP(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result);
+ return Result;
+ }
+ }
+
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
@@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = Op.getValueType();
SDLoc dl(N);
+
+ // If the larger CTTZ isn't supported by the target, try to expand now.
+ // If we expand later we'll end up with more operations since we lost the
+ // original type. Don't expand if we can use CTPOP or CTLZ expansion on the
+ // larger type.
+ if (!OVT.isVector() && TLI.isTypeLegal(NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) &&
+ !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) &&
+ !TLI.isOperationLegal(ISD::CTPOP, NVT) &&
+ !TLI.isOperationLegal(ISD::CTLZ, NVT)) {
+ if (SDValue Result = TLI.expandCTTZ(N, DAG)) {
+ Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result);
+ return Result;
+ }
+ }
+
if (N->getOpcode() == ISD::CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
@@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
+ ISD::LoadExtType ExtType = N->getExtensionType();
+ if (ExtType == ISD::NON_EXTLOAD)
+ ExtType = ISD::EXTLOAD;
+
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getOffset(), N->getMask(), ExtPassThru,
N->getMemoryVT(), N->getMemOperand(),
- N->getAddressingMode(), ISD::EXTLOAD);
+ N->getAddressingMode(), ExtType,
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
unsigned NewBits = PromotedType.getScalarSizeInBits();
if (Opcode == ISD::UADDSAT) {
- APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits);
SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
SDValue Add =
DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
@@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) {
// Shift cannot use a min/max expansion, we can't detect overflow if all of
// the bits have been shifted out.
- if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) {
unsigned ShiftOp;
switch (Opcode) {
case ISD::SADDSAT:
@@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
@@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
Op.getValueType(), Op, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) {
// The input may have strange things in the top bits of the registers, but
// these operations don't care. They may have weird bits going out, but
// that too is okay if they are integer operations.
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = GetPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) {
// Sign extend the input.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
@@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) {
// The input value must be properly sign extended.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) {
// The input value must be properly zero extended.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger)
RHS = ZExtPromotedInteger(RHS);
- return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS);
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS,
+ N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) {
@@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
unsigned Shift = SmallVT.getScalarSizeInBits();
- EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG);
+ EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout());
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
@@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
case ISD::SHL:
case ISD::SRA:
@@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ Res = PromoteIntOp_VP_REDUCE(N, OpNo);
+ break;
case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break;
}
@@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
// If the width of OpL/OpR excluding the duplicated sign bits is no greater
// than the width of NewLHS/NewRH, we can avoid inserting real truncate
// instruction, which is redundant eventually.
- unsigned OpLEffectiveBits =
- OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
- unsigned OpREffectiveBits =
- OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+ unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL);
+ unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR);
if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() &&
OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) {
NewLHS = OpL;
@@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
unsigned OpNo) {
-
SDValue DataOp = N->getValue();
- EVT DataVT = DataOp.getValueType();
SDValue Mask = N->getMask();
- SDLoc dl(N);
- bool TruncateStore = false;
if (OpNo == 4) {
+ // The Mask. Update in place.
+ EVT DataVT = DataOp.getValueType();
Mask = PromoteTargetBoolean(Mask, DataVT);
- // Update in place.
SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
NewOps[4] = Mask;
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
- } else { // Data operand
- assert(OpNo == 1 && "Unexpected operand for promotion");
- DataOp = GetPromotedInteger(DataOp);
- TruncateStore = true;
}
- return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(),
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+ DataOp = GetPromotedInteger(DataOp);
+
+ return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
N->getOffset(), Mask, N->getMemoryVT(),
N->getMemOperand(), N->getAddressingMode(),
- TruncateStore, N->isCompressingStore());
+ /*IsTruncating*/ true, N->isCompressingStore());
}
SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
@@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
return SDValue();
}
-SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
- SDLoc dl(N);
- SDValue Op;
+static unsigned getExtendForIntVecReduction(SDNode *N) {
switch (N->getOpcode()) {
- default: llvm_unreachable("Expected integer vector reduction");
+ default:
+ llvm_unreachable("Expected integer vector reduction");
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
- Op = GetPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_MUL:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ return ISD::ANY_EXTEND;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_SMIN:
- Op = SExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_SMIN:
+ return ISD::SIGN_EXTEND;
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
- Op = ZExtPromotedInteger(N->getOperand(0));
- break;
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_UMIN:
+ return ISD::ZERO_EXTEND;
}
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) {
+ switch (getExtendForIntVecReduction(N)) {
+ default:
+ llvm_unreachable("Impossible extension kind for integer reduction");
+ case ISD::ANY_EXTEND:
+ return GetPromotedInteger(V);
+ case ISD::SIGN_EXTEND:
+ return SExtPromotedInteger(V);
+ case ISD::ZERO_EXTEND:
+ return ZExtPromotedInteger(V);
+ }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0));
EVT EltVT = Op.getValueType().getVectorElementType();
EVT VT = N->getValueType(0);
+
if (VT.bitsGE(EltVT))
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
@@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(OpNo);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ if (OpNo == 2) { // Mask
+ // Update in place.
+ NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType());
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+
+ assert(OpNo == 1 && "Unexpected operand for promotion");
+
+ Op = PromoteIntOpVectorReduction(N, Op);
+
+ NewOps[OpNo] = Op;
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = Op.getValueType().getScalarType();
+
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps);
+
+ // Result size must be >= element/start-value size. If this is not the case
+ // after promotion, also promote both the start value and result type and
+ // then truncate.
+ NewOps[0] =
+ DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0));
+ SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) {
SDValue Op = ZExtPromotedInteger(N->getOperand(1));
return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
@@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
report_fatal_error("Do not know how to expand the result of this "
"operator!");
+ case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
@@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
bool HasAddCarry = TLI.isOperationLegalOrCustom(
ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasAddCarry) {
- EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG);
+ EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
SDValue Sign =
DAG.getNode(ISD::SRA, dl, NVT, Hi,
DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy));
@@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
@@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType());
Op = GetSoftPromotedHalf(Op);
Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op);
+ Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op);
+ SplitInteger(Op, Lo, Hi);
+ return;
}
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
@@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask);
EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) {
- // The type from TLI is too small to fit the shift amount we want.
- // Override it with i32. The shift will have to be legalized.
- ShiftAmtTy = MVT::i32;
- }
SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy);
SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift);
SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift);
@@ -3464,8 +3633,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
SDValue Zero = DAG.getConstant(0, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
} else {
// For unsigned multiplication, we only need to check the max since we
@@ -3638,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
// Saturate to signed maximum.
APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
- APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnes(NVTSize);
Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
// Saturate to signed minimum.
@@ -3808,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
// the new SHL_PARTS operation would need further legalization.
SDValue ShiftOp = N->getOperand(1);
EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- assert(ShiftTy.getScalarSizeInBits() >=
- Log2_32_Ceil(VT.getScalarSizeInBits()) &&
- "ShiftAmountTy is too small to cover the range of this type!");
if (ShiftOp.getValueType() != ShiftTy)
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
@@ -3857,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
}
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
- SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ EVT ShAmtTy =
+ EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
+ SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy);
+ SDValue Ops[2] = {N->getOperand(0), ShAmt};
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setSExt(isSigned);
SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
@@ -4035,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
LC = RTLIB::MULO_I64;
else if (VT == MVT::i128)
LC = RTLIB::MULO_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ // FIXME: This is not an optimal expansion, but better than crashing.
+ EVT WideVT =
+ EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1));
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ SDValue MulLo, MulHi;
+ SplitInteger(Mul, MulLo, MulHi);
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, dl, VT, MulLo,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT));
+ SDValue Overflow =
+ DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE);
+ SplitInteger(MulLo, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return;
+ }
SDValue Temp = DAG.CreateStackTemporary(PtrVT);
// Temporary for the overflow value, default it to zero.
@@ -4188,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N,
SDValue &Lo, SDValue &Hi) {
- // Lower the rotate to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG);
+ // Delegate to funnel-shift expansion.
+ SDLoc DL(N);
+ unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR;
+ SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0),
+ N->getOperand(0), N->getOperand(1));
SplitInteger(Res, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
- // Lower the funnel shift to shifts and ORs which can be expanded.
- SDValue Res;
- TLI.expandFunnelShift(N, Res, DAG);
- SplitInteger(Res, Lo, Hi);
+void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Values numbered from least significant to most significant.
+ SDValue In1, In2, In3, In4;
+ GetExpandedInteger(N->getOperand(0), In3, In4);
+ GetExpandedInteger(N->getOperand(1), In1, In2);
+ EVT HalfVT = In1.getValueType();
+
+ SDLoc DL(N);
+ unsigned Opc = N->getOpcode();
+ SDValue ShAmt = N->getOperand(2);
+ EVT ShAmtVT = ShAmt.getValueType();
+ EVT ShAmtCCVT = getSetCCResultType(ShAmtVT);
+
+ // If the shift amount is at least half the bitwidth, swap the inputs.
+ unsigned HalfVTBits = HalfVT.getScalarSizeInBits();
+ SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt,
+ DAG.getConstant(HalfVTBits, DL, ShAmtVT));
+ SDValue Cond =
+ DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT),
+ Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ);
+
+ // Expand to a pair of funnel shifts.
+ EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
+ SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT);
+
+ SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2);
+ SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3);
+ SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4);
+ Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt);
+ Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}
void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
@@ -4297,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
if (RHSLo == RHSHi) {
if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
- if (RHSCST->isAllOnesValue()) {
+ if (RHSCST->isAllOnes()) {
// Equality comparison to -1.
NewLHS = DAG.getNode(ISD::AND, dl,
LHSLo.getValueType(), LHSLo, LHSHi);
@@ -4317,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
// If this is a comparison of the sign bit, just look at the top part.
// X > -1, x < 0
if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
- if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0
- (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1
+ if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0
+ (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1
NewLHS = LHSHi;
NewRHS = RHSHi;
return;
@@ -4369,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
- if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
- (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
- (LoCmpC && LoCmpC->isNullValue())))) {
+ // FIXME: Is the HiCmpC->isOne() here correct for
+ // ZeroOrNegativeOneBooleanContent.
+ if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) ||
+ (!EqAllowed &&
+ ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) {
// For LE / GE, if high part is known false, ignore the low part.
// For LT / GT: if low part is known false, return the high part.
// if high part is known true, ignore the low part.
@@ -4706,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue InOp0 = N->getOperand(0);
EVT InVT = InOp0.getValueType();
+ // Try and extract from a smaller type so that it eventually falls
+ // into the promotion code below.
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector ||
+ getTypeAction(InVT) == TargetLowering::TypeLegal) {
+ EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NElts = NInVT.getVectorMinNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue();
+
+ SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0,
+ DAG.getConstant(alignDown(IdxVal, NElts), dl,
+ BaseIdx.getValueType()));
+ SDValue Step2 = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1,
+ DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType()));
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2);
+ }
+
+ // Try and extract from a widened type.
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx};
+ SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops);
+ return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext);
+ }
+
// Promote operands and see if this is handled by target lowering,
// Otherwise, use the BUILD_VECTOR approach below
if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) {
@@ -4873,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
+ unsigned NumOperands = N->getNumOperands();
+ unsigned NumOutElem = NOutVT.getVectorMinNumElements();
EVT OutElemTy = NOutVT.getVectorElementType();
+ if (OutVT.isScalableVector()) {
+ // Find the largest promoted element type for each of the operands.
+ SDUse *MaxSizedValue = std::max_element(
+ N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) {
+ EVT AVT = A.getValueType().getVectorElementType();
+ EVT BVT = B.getValueType().getVectorElementType();
+ return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits();
+ });
+ EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType();
+
+ // Then promote all vectors to the largest element type.
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned I = 0; I < NumOperands; ++I) {
+ SDValue Op = N->getOperand(I);
+ EVT OpVT = Op.getValueType();
+ if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ else
+ assert(getTypeAction(OpVT) == TargetLowering::TypeLegal &&
+ "Unhandled legalization type");
+
+ if (OpVT.getVectorElementType().getScalarSizeInBits() <
+ MaxElementVT.getScalarSizeInBits())
+ Op = DAG.getAnyExtOrTrunc(Op, dl,
+ OpVT.changeVectorElementType(MaxElementVT));
+ Ops.push_back(Op);
+ }
+
+ // Do the CONCAT on the promoted type and finally truncate to (the promoted)
+ // NOutVT.
+ return DAG.getAnyExtOrTrunc(
+ DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ OutVT.changeVectorElementType(MaxElementVT), Ops),
+ dl, NOutVT);
+ }
unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
- unsigned NumOutElem = NOutVT.getVectorNumElements();
- unsigned NumOperands = N->getNumOperands();
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
@@ -4957,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
// we can simply change the result type.
SDLoc dl(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->ops());
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) {
+ // The VP_REDUCE result size may be larger than the element size, so we can
+ // simply change the result type. However the start value and result must be
+ // the same.
+ SDLoc DL(N);
+ SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start,
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
}
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -4974,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
}
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // The result type is equal to the first input operand's type, so the
+ // type that needs promoting must be the second source vector.
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ SDValue Idx = N->getOperand(2);
+ EVT PromVT = EVT::getVectorVT(*DAG.getContext(),
+ V1.getValueType().getVectorElementType(),
+ V0.getValueType().getVectorElementCount());
+ V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT);
+ SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx);
+ return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 05a974af3b55..1f73c9eea104 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() {
#endif
PerformExpensiveChecks();
- SDNode *N = Worklist.back();
- Worklist.pop_back();
+ SDNode *N = Worklist.pop_back_val();
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 8d17d8fc68b1..da282ecad282 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -289,6 +289,12 @@ private:
return DAG.getZeroExtendInReg(Op, DL, OldVT);
}
+ // Promote the given operand V (vector or scalar) according to N's specific
+ // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns
+ // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the
+ // promoted value.
+ SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V);
+
// Integer Result Promotion.
void PromoteIntegerResult(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
@@ -332,14 +338,14 @@ private:
SDValue PromoteIntRes_VSELECT(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
SDValue PromoteIntRes_SETCC(SDNode *N);
- SDValue PromoteIntRes_SHL(SDNode *N);
- SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
- SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
- SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
+ SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP);
SDValue PromoteIntRes_UMINUMAX(SDNode *N);
SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
- SDValue PromoteIntRes_SRA(SDNode *N);
- SDValue PromoteIntRes_SRL(SDNode *N);
+ SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP);
+ SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
@@ -353,6 +359,7 @@ private:
SDValue PromoteIntRes_DIVFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_VP_REDUCE(SDNode *N);
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
@@ -369,6 +376,7 @@ private:
SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
+ SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
@@ -394,6 +402,7 @@ private:
SDValue PromoteIntOp_FIX(SDNode *N);
SDValue PromoteIntOp_FPOWI(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
+ SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -518,6 +527,7 @@ private:
SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N);
SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(SDNode *N);
@@ -816,7 +826,7 @@ private:
// Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
void SplitVectorResult(SDNode *N, unsigned ResNo);
- void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -898,6 +908,7 @@ private:
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N);
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+ SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
@@ -912,7 +923,7 @@ private:
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
SDValue WidenVecRes_Ternary(SDNode *N);
- SDValue WidenVecRes_Binary(SDNode *N);
+ SDValue WidenVecRes_Binary(SDNode *N, bool IsVP);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
@@ -972,10 +983,10 @@ private:
LoadSDNode *LD, ISD::LoadExtType ExtType);
/// Helper function to generate a set of stores to store a widen vector into
- /// non-widen memory.
+ /// non-widen memory. Returns true if successful, false otherwise.
/// StChain: list of chains for the stores we have generated
/// ST: store of a widen value
- void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
+ bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST);
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
/// input vector must have the same element type as NVT.
@@ -1011,6 +1022,7 @@ private:
// Generic Result Splitting.
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
+ void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 81cc2bf10d25..3d3c9a2ad837 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L);
Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H);
}
+
+void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc DL(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L);
+ Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index ebe3bfc4b75a..88a28a3be53e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
return RecursivelyLegalizeResults(Op, ResultVals);
}
-// FIXME: This is very similar to the X86 override of
-// TargetLowering::LowerOperationWrapper. Can we merge them somehow?
+// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
+// merge them somehow?
bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
@@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandSETCC(Node, Results);
return;
case ISD::ABS:
- if (TLI.expandABS(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
ExpandBITREVERSE(Node, Results);
return;
case ISD::CTPOP:
- if (TLI.expandCTPOP(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- if (TLI.expandCTLZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- if (TLI.expandCTTZ(Node, Tmp, DAG)) {
- Results.push_back(Tmp);
+ if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
+ Results.push_back(Expanded);
return;
}
break;
@@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
// What is the size of each element in the vector mask.
EVT BitTy = MaskTy.getScalarType();
- Mask = DAG.getSelect(DL, BitTy, Mask,
- DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
- BitTy),
- DAG.getConstant(0, DL, BitTy));
+ Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
+ DAG.getConstant(0, DL, BitTy));
// Broadcast the mask so that the entire vector is all one or all zero.
if (VT.isFixedLengthVector())
@@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
@@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
EVT VT = Node->getValueType(0);
+ // Scalable vectors can't use shuffle expansion.
+ if (VT.isScalableVector())
+ return TLI.expandBSWAP(Node, DAG);
+
// Generate a byte wise shuffle mask for the BSWAP.
SmallVector<int, 16> ShuffleMask;
createBSWAPShuffleMask(VT, ShuffleMask);
EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
// Only emit a shuffle if the mask is legal.
- if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
- return DAG.UnrollVectorOp(Node);
+ if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
+ SDLoc DL(Node);
+ SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ }
- SDLoc DL(Node);
- SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
- Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
- return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+ // If we have the appropriate vector bit operations, it is better to use them
+ // than unrolling and expanding each component.
+ if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+ return TLI.expandBSWAP(Node, DAG);
+
+ // Otherwise unroll.
+ return DAG.UnrollVectorOp(Node);
}
void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
EVT VT = Node->getValueType(0);
+ // We can't unroll or use shuffles for scalable vectors.
+ if (VT.isScalableVector()) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
+ return;
+ }
+
// If we have the scalar operation, it's probably cheaper to unroll it.
if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) {
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node,
if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
- TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
- // Let LegalizeDAG handle this later.
+ TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) {
+ Results.push_back(TLI.expandBITREVERSE(Node, DAG));
return;
+ }
// Otherwise unroll.
SDValue Tmp = DAG.UnrollVectorOp(Node);
@@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
- SDValue AllOnes = DAG.getConstant(
- APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
- SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
+ SDValue NotMask = DAG.getNOT(DL, Mask, VT);
Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
@@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
if (Node->getOpcode() == ISD::STRICT_FSETCC ||
Node->getOpcode() == ISD::STRICT_FSETCCS)
ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
- DAG.getConstant(0, dl, EltVT));
+ DAG.getAllOnesConstant(dl, EltVT),
+ DAG.getConstant(0, dl, EltVT));
OpValues.push_back(ScalarResult);
OpChains.push_back(ScalarChain);
@@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(), TmpEltVT),
LHSElem, RHSElem, CC);
- Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
- DAG.getConstant(APInt::getAllOnesValue
- (EltVT.getSizeInBits()), dl, EltVT),
+ Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT),
DAG.getConstant(0, dl, EltVT));
}
return DAG.getBuildVector(VT, dl, Ops);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 91242bbf866f..539c9cb9c256 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
SDValue Arg = N->getOperand(2).getOperand(0);
if (Arg.isUndef())
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
- unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();
return GetScalarizedVector(N->getOperand(Op));
}
@@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- SplitVecRes_BinOp(N, Lo, Hi);
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false);
break;
case ISD::FMA:
case ISD::FSHL:
@@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::UDIVFIXSAT:
SplitVecRes_FIX(N, Lo, Hi);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true);
+ break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
}
}
-void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi,
+ bool IsVP) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
const SDNodeFlags Flags = N->getFlags();
unsigned Opcode = N->getOpcode();
- Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
- Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ if (!IsVP) {
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
+ return;
+ }
+
+ // Split the mask.
+ SDValue MaskLo, MaskHi;
+ SDValue Mask = N->getOperand(2);
+ EVT MaskVT = Mask.getValueType();
+ if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
+
+ // Split the vector length parameter.
+ // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts).
+ SDValue EVL = N->getOperand(3);
+ EVT VecVT = N->getValueType(0);
+ EVT EVLVT = EVL.getValueType();
+ assert(VecVT.getVectorElementCount().isKnownEven() &&
+ "Expecting the mask to be an evenly-sized vector");
+ unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2;
+ SDValue HalfNumElts =
+ VecVT.isFixedLengthVector()
+ ? DAG.getConstant(HalfMinNumElts, dl, EVLVT)
+ : DAG.getVScale(dl, EVLVT,
+ APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts));
+ SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts);
+ SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts);
+
+ Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
+ {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
+ Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
+ {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
}
void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::INSERT_SUBVECTOR:
+ Res = WidenVecRes_INSERT_SUBVECTOR(N);
+ break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
@@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::USHLSAT:
case ISD::ROTL:
case ISD::ROTR:
- Res = WidenVecRes_Binary(N);
+ Res = WidenVecRes_Binary(N, /*IsVP*/ false);
break;
case ISD::FADD:
@@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FSHR:
Res = WidenVecRes_Ternary(N);
break;
+ case ISD::VP_ADD:
+ case ISD::VP_AND:
+ case ISD::VP_MUL:
+ case ISD::VP_OR:
+ case ISD::VP_SUB:
+ case ISD::VP_XOR:
+ case ISD::VP_SHL:
+ case ISD::VP_LSHR:
+ case ISD::VP_ASHR:
+ case ISD::VP_SDIV:
+ case ISD::VP_UDIV:
+ case ISD::VP_SREM:
+ case ISD::VP_UREM:
+ case ISD::VP_FADD:
+ case ISD::VP_FSUB:
+ case ISD::VP_FMUL:
+ case ISD::VP_FDIV:
+ case ISD::VP_FREM:
+ // Vector-predicated binary op widening. Note that -- unlike the
+ // unpredicated versions -- we don't have to worry about trapping on
+ // operations like UDIV, FADD, etc., as we pass on the original vector
+ // length parameter. This means the widened elements containing garbage
+ // aren't active.
+ Res = WidenVecRes_Binary(N, /*IsVP*/ true);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
}
-SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) {
// Binary op widening.
SDLoc dl(N);
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
+ if (!IsVP)
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
+ N->getFlags());
+ // For VP operations, we must also widen the mask. Note that the mask type
+ // may not actually need widening, leading it be split along with the VP
+ // operation.
+ // FIXME: This could lead to an infinite split/widen loop. We only handle the
+ // case where the mask needs widening to an identically-sized type as the
+ // vector inputs.
+ SDValue Mask = N->getOperand(2);
+ assert(getTypeAction(Mask.getValueType()) ==
+ TargetLowering::TypeWidenVector &&
+ "Unable to widen binary VP op");
+ Mask = GetWidenedVector(Mask);
+ assert(Mask.getValueType().getVectorElementCount() ==
+ WidenVT.getVectorElementCount() &&
+ "Unable to widen binary VP op");
+ return DAG.getNode(N->getOpcode(), dl, WidenVT,
+ {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
}
SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
@@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDLoc DL(N);
EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
EVT InVT = InOp.getValueType();
@@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
- unsigned InVTNumElts = InVT.getVectorNumElements();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
+ ElementCount InVTEC = InVT.getVectorElementCount();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts) {
+ InVTEC = InVT.getVectorElementCount();
+ if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
@@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
+ if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
// Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
+ unsigned NumConcat =
+ WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
@@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
- if (InVTNumElts % WidenNumElts == 0) {
+ if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
@@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
@@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(WidenVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue InOp = N->getOperand(0);
SDValue Idx = N->getOperand(1);
SDLoc dl(N);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ auto InOpTypeAction = getTypeAction(InOp.getValueType());
+ if (InOpTypeAction == TargetLowering::TypeWidenVector)
InOp = GetWidenedVector(InOp);
EVT InVT = InOp.getValueType();
@@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
if (IdxVal == 0 && InVT == WidenVT)
return InOp;
- if (VT.isScalableVector())
- report_fatal_error("Don't know how to widen the result of "
- "EXTRACT_SUBVECTOR for scalable vectors");
-
// Check if we can extract from the vector.
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
+ unsigned InNumElts = InVT.getVectorMinNumElements();
if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+ if (VT.isScalableVector()) {
+ // Try to split the operation up into smaller extracts and concat the
+ // results together, e.g.
+ // nxv6i64 extract_subvector(nxv12i64, 6)
+ // <->
+ // nxv8i64 concat(
+ // nxv2i64 extract_subvector(nxv16i64, 6)
+ // nxv2i64 extract_subvector(nxv16i64, 8)
+ // nxv2i64 extract_subvector(nxv16i64, 10)
+ // undef)
+ unsigned VTNElts = VT.getVectorMinNumElements();
+ unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts);
+ assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
+ "down type's element count");
+ EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ ElementCount::getScalable(GCD));
+ // Avoid recursion around e.g. nxv1i8.
+ if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
+ SmallVector<SDValue> Parts;
+ unsigned I = 0;
+ for (; I < VTNElts / GCD; ++I)
+ Parts.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp,
+ DAG.getVectorIdxConstant(IdxVal + I * GCD, dl)));
+ for (; I < WidenNumElts / GCD; ++I)
+ Parts.push_back(DAG.getUNDEF(PartVT));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
+ }
+
+ report_fatal_error("Don't know how to widen the result of "
+ "EXTRACT_SUBVECTOR for scalable vectors");
+ }
+
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = VT.getVectorElementType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
for (i = 0; i < NumElts; ++i)
@@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
else
Result = GenWidenVectorLoads(LdChain, LD);
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
- return Result;
+ return Result;
+ }
+
+ // Generate a vector-predicated load if it is custom/legal on the target. To
+ // avoid possible recursion, only do this if the widened mask type is legal.
+ // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ EVT LdVT = LD->getMemoryVT();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ SDLoc DL(N);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = LdVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = LD->getMemOperand();
+ SDValue NewLoad =
+ DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
+ MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(),
+ MMO->getAAInfo());
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
+
+ return NewLoad;
+ }
+
+ report_fatal_error("Unable to widen vector load");
}
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
@@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
SDValue Cond1 = N->getOperand(0);
EVT CondVT = Cond1.getValueType();
@@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
}
EVT CondEltVT = CondVT.getVectorElementType();
- EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
- CondEltVT, WidenNumElts);
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);
if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
Cond1 = GetWidenedVector(Cond1);
@@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
return TLI.scalarizeVectorStore(ST, DAG);
SmallVector<SDValue, 16> StChain;
- GenWidenVectorStores(StChain, ST);
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
- if (StChain.size() == 1)
- return StChain[0];
- else
return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
+ // Generate a vector-predicated store if it is custom/legal on the target.
+ // To avoid possible recursion, only do this if the widened mask type is
+ // legal.
+ // FIXME: Not all targets may support EVL in VP_STORE. These will have been
+ // removed from the IR by the ExpandVectorPredication pass but we're
+ // reintroducing them here.
+ SDValue StVal = ST->getValue();
+ EVT StVT = StVal.getValueType();
+ EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ WideVT.getVectorElementCount());
+ if (WideVT.isScalableVector() &&
+ TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+ TLI.isTypeLegal(WideMaskVT)) {
+ // Widen the value.
+ SDLoc DL(N);
+ StVal = GetWidenedVector(StVal);
+ SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
+ MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
+ unsigned NumVTElts = StVT.getVectorMinNumElements();
+ SDValue EVL =
+ DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ const auto *MMO = ST->getMemOperand();
+ return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask,
+ EVL, MMO->getPointerInfo(), MMO->getAlign(),
+ MMO->getFlags(), MMO->getAAInfo());
+ }
+
+ report_fatal_error("Unable to widen vector store");
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
@@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
// Align: If 0, don't allow use of a wider type
// WidenEx: If Align is not 0, the amount additional we can load/store from.
-static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
- unsigned Width, EVT WidenVT,
- unsigned Align = 0, unsigned WidenEx = 0) {
+static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
EVT WidenEltVT = WidenVT.getVectorElementType();
const bool Scalable = WidenVT.isScalableVector();
unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize();
@@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
}
}
+ // Using element-wise loads and stores for widening operations is not
+ // supported for scalable vectors
if (Scalable)
- report_fatal_error("Using element-wise loads and stores for widening "
- "operations is not supported for scalable vectors");
+ return None;
+
return RetVT;
}
@@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
TypeSize WidthDiff = WidenWidth - LdWidth;
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
- unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
+ unsigned LdAlign =
+ (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
- EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- TypeSize NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ Optional<EVT> FirstVT =
+ findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
+ WidthDiff.getKnownMinSize());
+
+ if (!FirstVT)
+ return SDValue();
+
+ SmallVector<EVT, 8> MemVTs;
+ TypeSize FirstVTWidth = FirstVT->getSizeInBits();
+
+ // Unless we're able to load in one instruction we must work out how to load
+ // the remainder.
+ if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
+ Optional<EVT> NewVT = FirstVT;
+ TypeSize RemainingWidth = LdWidth;
+ TypeSize NewVTWidth = FirstVTWidth;
+ do {
+ RemainingWidth -= NewVTWidth;
+ if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
+ // The current type we are using is too large. Find a better size.
+ NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT,
+ LdAlign, WidthDiff.getKnownMinSize());
+ if (!NewVT)
+ return SDValue();
+ NewVTWidth = NewVT->getSizeInBits();
+ }
+ MemVTs.push_back(*NewVT);
+ } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
+ }
+
+ SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
- if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) {
- if (!NewVT.isVector()) {
- unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ if (MemVTs.empty()) {
+ assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
+ if (!FirstVT->isVector()) {
+ unsigned NumElts =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
- if (NewVT == WidenVT)
+ if (FirstVT == WidenVT)
return LdOp;
// TODO: We don't currently have any tests that exercise this code path.
- assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
- unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
+ assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0);
+ unsigned NumConcat =
+ WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
+ SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
@@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
uint64_t ScaledOffset = 0;
MachinePointerInfo MPI = LD->getPointerInfo();
- do {
- LdWidth -= NewVTWidth;
- IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
- &ScaledOffset);
-
- if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) {
- // The current type we are using is too large. Find a better size.
- NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
- WidthDiff.getKnownMinSize());
- NewVTWidth = NewVT.getSizeInBits();
- }
+ // First incremement past the first load.
+ IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
+ &ScaledOffset);
+
+ for (EVT MemVT : MemVTs) {
Align NewAlign = ScaledOffset == 0
? LD->getOriginalAlign()
: commonAlignment(LD->getAlign(), ScaledOffset);
SDValue L =
- DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
- LdChain.push_back(L.getValue(1));
+ DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
LdOps.push_back(L);
- LdOp = L;
- } while (TypeSize::isKnownGT(LdWidth, NewVTWidth));
+ LdChain.push_back(L.getValue(1));
+ IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
+ }
// Build the vector from the load operations.
unsigned End = LdOps.size();
@@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
return DAG.getBuildVector(WidenVT, dl, Ops);
}
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StoreSDNode *ST) {
// The strategy assumes that we can efficiently store power-of-two widths.
// The routine chops the vector into the largest vector stores with the same
@@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MachinePointerInfo MPI = ST->getPointerInfo();
uint64_t ScaledOffset = 0;
+
+ // A breakdown of how to widen this vector store. Each element of the vector
+ // is a memory VT combined with the number of times it is to be stored to,
+ // e,g., v5i32 -> {{v2i32,2},{i32,1}}
+ SmallVector<std::pair<EVT, unsigned>, 4> MemVTs;
+
while (StWidth.isNonZero()) {
// Find the largest vector type we can store with.
- EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ Optional<EVT> NewVT =
+ findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT);
+ if (!NewVT)
+ return false;
+ MemVTs.push_back({*NewVT, 0});
+ TypeSize NewVTWidth = NewVT->getSizeInBits();
+
+ do {
+ StWidth -= NewVTWidth;
+ MemVTs.back().second++;
+ } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ }
+
+ for (const auto &Pair : MemVTs) {
+ EVT NewVT = Pair.first;
+ unsigned Count = Pair.second;
TypeSize NewVTWidth = NewVT.getSizeInBits();
if (NewVT.isVector()) {
@@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
Idx += NumVTElts;
-
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
&ScaledOffset);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
} else {
// Cast the vector to the scalar type we can store.
unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize();
@@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo);
StChain.push_back(PartStore);
- StWidth -= NewVTWidth;
IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
- } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
+ } while (--Count);
// Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth;
}
}
+
+ return true;
}
/// Modifies a vector input (widen or narrows) to a vector of NVT. The
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 75b4242a415c..f64b332a7fef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -192,7 +192,7 @@ public:
// Returns the SDNodes which this SDDbgValue depends on.
SmallVector<SDNode *> getSDNodes() const {
SmallVector<SDNode *> Dependencies;
- for (SDDbgOperand DbgOp : getLocationOps())
+ for (const SDDbgOperand &DbgOp : getLocationOps())
if (DbgOp.getKind() == SDDbgOperand::SDNODE)
Dependencies.push_back(DbgOp.getSDNode());
for (SDNode *Node : getAdditionalDependencies())
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 0022e5ec31f0..1b89864116cb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -56,9 +56,7 @@ namespace {
SUnit *pop() {
if (empty()) return nullptr;
- SUnit *V = Queue.back();
- Queue.pop_back();
- return V;
+ return Queue.pop_back_val();
}
};
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b2a8c8bdd78c..95f7e43b151d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// There are either zero or one users of the Glue result.
bool HasGlueUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
- UI != E; ++UI)
- if (GlueVal.isOperandOf(*UI)) {
+ for (SDNode *U : N->uses())
+ if (GlueVal.isOperandOf(U)) {
HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
- N = *UI;
+ N = U;
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
NodeSUnit->isCall = true;
break;
@@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
/// Returns true if \p DV has any VReg operand locations which don't exist in
/// VRBaseMap.
auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) {
- for (SDDbgOperand L : DV->getLocationOps()) {
+ for (const SDDbgOperand &L : DV->getLocationOps()) {
if (L.getKind() == SDDbgOperand::SDNODE &&
VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0)
return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2a98464425c4..008665d50233 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) {
APInt SplatVal;
- return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue();
+ return isConstantSplatVector(N, SplatVal) && SplatVal.isZero();
}
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) {
}
}
+bool ISD::isVPBinaryOp(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
+bool ISD::isVPReduction(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ return false;
+#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \
+ case ISD::SDOPC: \
+ return true;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
/// The operand position of the vector mask.
Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) {
switch (Opcode) {
@@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
break;
}
+ case ISD::VP_LOAD: {
+ const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N);
+ ID.AddInteger(ELD->getMemoryVT().getRawBits());
+ ID.AddInteger(ELD->getRawSubclassData());
+ ID.AddInteger(ELD->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_STORE: {
+ const VPStoreSDNode *EST = cast<VPStoreSDNode>(N);
+ ID.AddInteger(EST->getMemoryVT().getRawBits());
+ ID.AddInteger(EST->getRawSubclassData());
+ ID.AddInteger(EST->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_GATHER: {
+ const VPGatherSDNode *EG = cast<VPGatherSDNode>(N);
+ ID.AddInteger(EG->getMemoryVT().getRawBits());
+ ID.AddInteger(EG->getRawSubclassData());
+ ID.AddInteger(EG->getPointerInfo().getAddrSpace());
+ break;
+ }
+ case ISD::VP_SCATTER: {
+ const VPScatterSDNode *ES = cast<VPScatterSDNode>(N);
+ ID.AddInteger(ES->getMemoryVT().getRawBits());
+ ID.AddInteger(ES->getRawSubclassData());
+ ID.AddInteger(ES->getPointerInfo().getAddrSpace());
+ break;
+ }
case ISD::MLOAD: {
const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
ID.AddInteger(MLD->getMemoryVT().getRawBits());
@@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.getScalarType();
- SDValue NegOne =
- getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT);
- return getNode(ISD::XOR, DL, VT, Val, NegOne);
+ return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT));
}
SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) {
@@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
if (SameNumElts)
return N1;
if (auto *C = dyn_cast<ConstantSDNode>(Splat))
- if (C->isNullValue())
+ if (C->isZero())
return N1;
}
@@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &C1 = N1C->getAPIntValue();
- switch (Cond) {
- default: llvm_unreachable("Unknown integer setcc!");
- case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT);
- case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT);
- case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT);
- case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT);
- case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT);
- case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT);
- case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT);
- case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT);
- case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT);
- case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT);
- }
+ return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)),
+ dl, VT, OpVT);
}
}
@@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
return SDValue();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return GetDemandedBits(V, DemandedBits, DemandedElts);
}
@@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
switch (V.getOpcode()) {
case ISD::SPLAT_VECTOR:
UndefElts = V.getOperand(0).isUndef()
- ? APInt::getAllOnesValue(DemandedElts.getBitWidth())
+ ? APInt::getAllOnes(DemandedElts.getBitWidth())
: APInt(DemandedElts.getBitWidth(), 0);
return true;
case ISD::ADD:
@@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
unsigned NumElts = VT.getVectorNumElements();
assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch");
- UndefElts = APInt::getNullValue(NumElts);
+ UndefElts = APInt::getZero(NumElts);
switch (V.getOpcode()) {
case ISD::BUILD_VECTOR: {
@@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
// For now we don't support this with scalable vectors.
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
return isSplatValue(V, DemandedElts, UndefElts) &&
(AllowUndefs || !UndefElts);
}
@@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
APInt DemandedElts;
if (!VT.isScalableVector())
- DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (isSplatValue(V, DemandedElts, UndefElts)) {
if (VT.isScalableVector()) {
@@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const {
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return computeKnownBits(Op, DemandedElts, Depth);
}
@@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Known.One.setAllBits();
Known.Zero.setAllBits();
@@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// bits from the overlapping larger input elements and extracting the
// sub sections we actually care about.
unsigned SubScale = SubBitWidth / BitWidth;
- APInt SubDemandedElts(NumElts / SubScale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SubDemandedElts.setBit(i / SubScale);
-
+ APInt SubDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale);
Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1);
Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
}))
return true;
+ // Is the operand of a splat vector a constant power of two?
+ if (Val.getOpcode() == ISD::SPLAT_VECTOR)
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0)))
+ if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2())
+ return true;
+
// More could be done here, though the above checks are enough
// to handle some common cases.
@@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
return 1;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ComputeNumSignBits(Op, DemandedElts, Depth);
}
@@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
assert(VT.isVector() && "Expected bitcast to vector");
unsigned Scale = SrcBits / VTBits;
- APInt SrcDemandedElts(NumElts / Scale, 0);
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
+ APInt SrcDemandedElts =
+ APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale);
// Fast case - sign splat can be simply split across the small elements.
Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1);
@@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Special case decrementing a value (ADD X, -1):
if (ConstantSDNode *CRHS =
isConstOrConstSplat(Op.getOperand(1), DemandedElts))
- if (CRHS->isAllOnesValue()) {
+ if (CRHS->isAllOnes()) {
KnownBits Known =
computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If we are subtracting one from a positive number, there is no carry
@@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// Handle NEG.
if (ConstantSDNode *CLHS =
isConstOrConstSplat(Op.getOperand(0), DemandedElts))
- if (CLHS->isNullValue()) {
+ if (CLHS->isZero()) {
KnownBits Known =
computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
- if ((Known.Zero | 1).isAllOnesValue())
+ if ((Known.Zero | 1).isAllOnes())
return VTBits;
// If the input is known to be positive (the sign bit is known clear),
@@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// If we know the element index, just demand that vector element, else for
// an unknown element index, ignore DemandedElts and demand them all.
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts =
@@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedSubElts) {
@@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return std::max(FirstAnswer, Mask.countLeadingOnes());
}
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
+unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op,
+ const APInt &DemandedElts,
+ unsigned Depth) const {
+ unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth);
+ return Op.getScalarValueSizeInBits() - SignBits + 1;
+}
+
bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
unsigned Depth) const {
// Early out for FREEZE.
@@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly,
return false;
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth);
}
@@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
case ISD::UNDEF:
return PoisonOnly;
- // TODO: ISD::BUILD_VECTOR handling
+ case ISD::BUILD_VECTOR:
+ // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements -
+ // this shouldn't affect the result.
+ for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly,
+ Depth + 1))
+ return false;
+ }
+ return true;
// TODO: Search for noundef attributes from library functions.
@@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
"Floating point types unsupported - use isKnownNeverZeroFloat");
// If the value is a constant, we can obviously see if it is a zero or not.
- if (ISD::matchUnaryPredicate(
- Op, [](ConstantSDNode *C) { return !C->isNullValue(); }))
+ if (ISD::matchUnaryPredicate(Op,
+ [](ConstantSDNode *C) { return !C->isZero(); }))
return true;
// TODO: Recognize more cases here.
@@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
SelectionDAG &DAG) {
- if (cast<ConstantSDNode>(Step)->isNullValue())
+ if (cast<ConstantSDNode>(Step)->isZero())
return DAG.getConstant(0, DL, VT);
return SDValue();
@@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
APFloat apf(EVTToAPFloatSemantics(VT),
- APInt::getNullValue(VT.getSizeInBits()));
+ APInt::getZero(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
@@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
SDValue Ops = {Operand};
- if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
@@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
+ if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
+ return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS)
return SDValue();
- // For now, the array Ops should only contain two values.
- // This enforcement will be removed once this function is merged with
- // FoldConstantVectorArithmetic
- if (Ops.size() != 2)
+ unsigned NumOps = Ops.size();
+ if (NumOps == 0)
return SDValue();
if (isUndef(Opcode, Ops))
return getUNDEF(VT);
- SDNode *N1 = Ops[0].getNode();
- SDNode *N2 = Ops[1].getNode();
-
// Handle the case of two scalars.
- if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
- if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
- if (C1->isOpaque() || C2->isOpaque())
- return SDValue();
-
- Optional<APInt> FoldAttempt =
- FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
- if (!FoldAttempt)
- return SDValue();
-
- SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
- assert((!Folded || !VT.isVector()) &&
- "Can't fold vectors ops with scalar operands");
- return Folded;
- }
- }
-
- // fold (add Sym, c) -> Sym+c
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
- return FoldSymbolOffset(Opcode, VT, GA, N2);
- if (TLI->isCommutativeBinOp(Opcode))
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
- return FoldSymbolOffset(Opcode, VT, GA, N1);
-
- // For fixed width vectors, extract each constant element and fold them
- // individually. Either input may be an undef value.
- bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR ||
- N1->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV1 && !N1->isUndef())
- return SDValue();
- bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR;
- if (!IsBVOrSV2 && !N2->isUndef())
- return SDValue();
- // If both operands are undef, that's handled the same way as scalars.
- if (!IsBVOrSV1 && !IsBVOrSV2)
- return SDValue();
-
- EVT SVT = VT.getScalarType();
- EVT LegalSVT = SVT;
- if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
- LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
- if (LegalSVT.bitsLT(SVT))
- return SDValue();
- }
-
- SmallVector<SDValue, 4> Outputs;
- unsigned NumOps = 0;
- if (IsBVOrSV1)
- NumOps = std::max(NumOps, N1->getNumOperands());
- if (IsBVOrSV2)
- NumOps = std::max(NumOps, N2->getNumOperands());
- assert(NumOps != 0 && "Expected non-zero operands");
- // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need
- // one iteration for that.
- assert((!VT.isScalableVector() || NumOps == 1) &&
- "Scalable vector should only have one scalar");
-
- for (unsigned I = 0; I != NumOps; ++I) {
- // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need
- // to use operand 0 of the SPLAT_VECTOR for each fixed element.
- SDValue V1;
- if (N1->getOpcode() == ISD::BUILD_VECTOR)
- V1 = N1->getOperand(I);
- else if (N1->getOpcode() == ISD::SPLAT_VECTOR)
- V1 = N1->getOperand(0);
- else
- V1 = getUNDEF(SVT);
-
- SDValue V2;
- if (N2->getOpcode() == ISD::BUILD_VECTOR)
- V2 = N2->getOperand(I);
- else if (N2->getOpcode() == ISD::SPLAT_VECTOR)
- V2 = N2->getOperand(0);
- else
- V2 = getUNDEF(SVT);
-
- if (SVT.isInteger()) {
- if (V1.getValueType().bitsGT(SVT))
- V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
- if (V2.getValueType().bitsGT(SVT))
- V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ if (NumOps == 2) {
+ // TODO: Move foldConstantFPMath here?
+
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) {
+ if (C1->isOpaque() || C2->isOpaque())
+ return SDValue();
+
+ Optional<APInt> FoldAttempt =
+ FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue());
+ if (!FoldAttempt)
+ return SDValue();
+
+ SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT);
+ assert((!Folded || !VT.isVector()) &&
+ "Can't fold vectors ops with scalar operands");
+ return Folded;
+ }
}
- if (V1.getValueType() != SVT || V2.getValueType() != SVT)
- return SDValue();
-
- // Fold one vector element.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
- if (LegalSVT != SVT)
- ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
-
- // Scalar folding only succeeded if the result is a constant or UNDEF.
- if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
- ScalarResult.getOpcode() != ISD::ConstantFP)
- return SDValue();
- Outputs.push_back(ScalarResult);
- }
-
- if (N1->getOpcode() == ISD::BUILD_VECTOR ||
- N2->getOpcode() == ISD::BUILD_VECTOR) {
- assert(VT.getVectorNumElements() == Outputs.size() &&
- "Vector size mismatch!");
-
- // Build a big vector out of the scalar elements we generated.
- return getBuildVector(VT, SDLoc(), Outputs);
+ // fold (add Sym, c) -> Sym+c
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode());
+ if (TLI->isCommutativeBinOp(Opcode))
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1]))
+ return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode());
}
- assert((N1->getOpcode() == ISD::SPLAT_VECTOR ||
- N2->getOpcode() == ISD::SPLAT_VECTOR) &&
- "One operand should be a splat vector");
-
- assert(Outputs.size() == 1 && "Vector size mismatch!");
- return getSplatVector(VT, SDLoc(), Outputs[0]);
-}
-
-// TODO: Merge with FoldConstantArithmetic
-SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
- const SDLoc &DL, EVT VT,
- ArrayRef<SDValue> Ops,
- const SDNodeFlags Flags) {
- // If the opcode is a target-specific ISD node, there's nothing we can
- // do here and the operand rules may not line up with the below, so
- // bail early.
- if (Opcode >= ISD::BUILTIN_OP_END)
- return SDValue();
-
- if (isUndef(Opcode, Ops))
- return getUNDEF(VT);
-
- // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+ // This is for vector folding only from here on.
if (!VT.isVector())
return SDValue();
ElementCount NumElts = VT.getVectorElementCount();
+ // See if we can fold through bitcasted integer ops.
+ // TODO: Can we handle undef elements?
+ if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() &&
+ Ops[0].getValueType() == VT && Ops[1].getValueType() == VT &&
+ Ops[0].getOpcode() == ISD::BITCAST &&
+ Ops[1].getOpcode() == ISD::BITCAST) {
+ SDValue N1 = peekThroughBitcasts(Ops[0]);
+ SDValue N2 = peekThroughBitcasts(Ops[1]);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ EVT BVVT = N1.getValueType();
+ if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) {
+ bool IsLE = getDataLayout().isLittleEndian();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ SmallVector<APInt> RawBits1, RawBits2;
+ BitVector UndefElts1, UndefElts2;
+ if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) &&
+ BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) &&
+ UndefElts1.none() && UndefElts2.none()) {
+ SmallVector<APInt> RawBits;
+ for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) {
+ Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]);
+ if (!Fold)
+ break;
+ RawBits.push_back(Fold.getValue());
+ }
+ if (RawBits.size() == NumElts.getFixedValue()) {
+ // We have constant folded, but we need to cast this again back to
+ // the original (possibly legalized) type.
+ SmallVector<APInt> DstBits;
+ BitVector DstUndefs;
+ BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(),
+ DstBits, RawBits, DstUndefs,
+ BitVector(RawBits.size(), false));
+ EVT BVEltVT = BV1->getOperand(0).getValueType();
+ unsigned BVEltBits = BVEltVT.getSizeInBits();
+ SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT));
+ for (unsigned I = 0, E = DstBits.size(); I != E; ++I) {
+ if (DstUndefs[I])
+ continue;
+ Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT);
+ }
+ return getBitcast(VT, getBuildVector(BVVT, DL, Ops));
+ }
+ }
+ }
+ }
+
auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
return !Op.getValueType().isVector() ||
Op.getValueType().getVectorElementCount() == NumElts;
};
- auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
- APInt SplatVal;
- BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+ auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
- (BV && BV->isConstant()) ||
- (Op.getOpcode() == ISD::SPLAT_VECTOR &&
- ISD::isConstantSplatVector(Op.getNode(), SplatVal));
+ Op.getOpcode() == ISD::BUILD_VECTOR ||
+ Op.getOpcode() == ISD::SPLAT_VECTOR;
};
// All operands must be vector types with the same number of elements as
- // the result type and must be either UNDEF or a build vector of constant
+ // the result type and must be either UNDEF or a build/splat vector
// or UNDEF scalars.
- if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) ||
+ if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
!llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
@@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// For scalable vector types we know we're dealing with SPLAT_VECTORs. We
// only have one operand to check. For fixed-length vector types we may have
// a combination of BUILD_VECTOR and SPLAT_VECTOR.
- unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
+ unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
// Constant fold each scalar lane separately.
SmallVector<SDValue, 4> ScalarResults;
- for (unsigned I = 0; I != NumOperands; I++) {
+ for (unsigned I = 0; I != NumVectorElts; I++) {
SmallVector<SDValue, 4> ScalarOps;
for (SDValue Op : Ops) {
EVT InSVT = Op.getValueType().getScalarType();
if (Op.getOpcode() != ISD::BUILD_VECTOR &&
Op.getOpcode() != ISD::SPLAT_VECTOR) {
- // We've checked that this is UNDEF or a constant of some kind.
if (Op.isUndef())
ScalarOps.push_back(getUNDEF(InSVT));
else
@@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
}
// Constant fold the scalar operands.
- SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+ SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps);
// Legalize the (integer) scalar constant if necessary.
if (LegalSVT != SVT)
@@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N2;
- if (N2C && N2C->isAllOnesValue()) // X & -1 -> X
+ if (N2C && N2C->isAllOnes()) // X & -1 -> X
return N1;
break;
case ISD::OR:
@@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
VT.getVectorElementType() == MVT::i1)
@@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// size of the value, the shift/rotate count is guaranteed to be zero.
if (VT == MVT::i1)
return N1;
- if (N2C && N2C->isNullValue())
+ if (N2C && N2C->isZero())
return N1;
break;
case ISD::FP_ROUND:
@@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
// Vector constant folding.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+ if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) {
NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
return V;
}
@@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
+ case ISD::VECTOR_SPLICE: {
+ if (cast<ConstantSDNode>(N3)->isNullValue())
+ return N1;
+ break;
+ }
case ISD::INSERT_VECTOR_ELT: {
ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except
@@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
ArgChains.push_back(Chain);
// Add a chain value for each stack argument.
- for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
- UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ for (SDNode *U : getEntryNode().getNode()->uses())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0)
ArgChains.push_back(SDValue(L, 1));
@@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool IsZeroVal =
- isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+ isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
if (!TLI.findOptimalMemOpLowering(
MemOps, TLI.getMaxStoresPerMemset(OptSize),
MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol),
@@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memcpy with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemcpyLoadsAndStores(
@@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memmove with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemmoveLoadsAndStores(
@@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (ConstantSize) {
// Memset with size zero? Just return the original chain.
- if (ConstantSize->isNullValue())
+ if (ConstantSize->isZero())
return Chain;
SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src,
@@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getLoadVP(
+ ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl,
+ SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo,
+ const MDNode *Ranges, bool IsExpanding) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOLoad;
+ assert((MMOFlags & MachineMemOperand::MOStore) == 0);
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
+
+ uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
+ Alignment, AAInfo, Ranges);
+ return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT,
+ MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
+ ISD::LoadExtType ExtType, EVT VT,
+ const SDLoc &dl, SDValue Chain, SDValue Ptr,
+ SDValue Offset, SDValue Mask, SDValue EVL,
+ EVT MemVT, MachineMemOperand *MMO,
+ bool IsExpanding) {
+ if (VT == MemVT) {
+ ExtType = ISD::NON_EXTLOAD;
+ } else if (ExtType == ISD::NON_EXTLOAD) {
+ assert(VT == MemVT && "Non-extending load from different memory type!");
+ } else {
+ // Extending load.
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
+ assert(VT.isInteger() == MemVT.isInteger() &&
+ "Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use an ext load to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == MemVT.getVectorElementCount()) &&
+ "Cannot use an ext load to change the number of vector elements!");
+ }
+
+ bool Indexed = AM != ISD::UNINDEXED;
+ assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!");
+
+ SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other)
+ : getVTList(VT, MVT::Other);
+ SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
+ dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM,
+ ExtType, IsExpanding, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, const MDNode *Ranges,
+ bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+ Mask, EVL, VT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, EVT MemVT,
+ MaybeAlign Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr,
+ IsExpanding);
+}
+
+SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl,
+ EVT VT, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue EVL, EVT MemVT,
+ MachineMemOperand *MMO, bool IsExpanding) {
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask,
+ EVL, MemVT, MMO, IsExpanding);
+}
+
+SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *LD = cast<VPLoadSDNode>(OrigLoad);
+ assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
+ // Don't propagate the invariant or dereferenceable flags.
+ auto MMOFlags =
+ LD->getMemOperand()->getFlags() &
+ ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
+ return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+ LD->getChain(), Base, Offset, LD->getMask(),
+ LD->getVectorLength(), LD->getPointerInfo(),
+ LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(),
+ nullptr, LD->isExpandingLoad());
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachinePointerInfo PtrInfo, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ uint64_t Size =
+ MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+}
+
+SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, SDValue EVL,
+ MachineMemOperand *MMO, bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, false, IsCompressing, VT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, MachinePointerInfo PtrInfo,
+ EVT SVT, Align Alignment,
+ MachineMemOperand::Flags MMOFlags,
+ const AAMDNodes &AAInfo,
+ bool IsCompressing) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+
+ MMOFlags |= MachineMemOperand::MOStore;
+ assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
+
+ if (PtrInfo.V.isNull())
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
+
+ MachineFunction &MF = getMachineFunction();
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+ Alignment, AAInfo);
+ return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
+ IsCompressing);
+}
+
+SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
+ SDValue Val, SDValue Ptr, SDValue Mask,
+ SDValue EVL, EVT SVT,
+ MachineMemOperand *MMO,
+ bool IsCompressing) {
+ EVT VT = Val.getValueType();
+
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ if (VT == SVT)
+ return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing);
+
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
+ assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!");
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorElementCount() == SVT.getVectorElementCount()) &&
+ "Cannot use trunc store to change the number of vector elements!");
+
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Undef = getUNDEF(Ptr.getValueType());
+ SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(SVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>(
+ dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N =
+ newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ ISD::UNINDEXED, true, IsCompressing, SVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl,
+ SDValue Base, SDValue Offset,
+ ISD::MemIndexedMode AM) {
+ auto *ST = cast<VPStoreSDNode>(OrigStore);
+ assert(ST->getOffset().isUndef() && "Store is already an indexed store!");
+ SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+ SDValue Ops[] = {ST->getChain(), ST->getValue(), Base,
+ Offset, ST->getMask(), ST->getVectorLength()};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops);
+ ID.AddInteger(ST->getMemoryVT().getRawBits());
+ ID.AddInteger(ST->getRawSubclassData());
+ ID.AddInteger(ST->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<VPStoreSDNode>(
+ dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(),
+ ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand());
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops, MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 6 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPGatherSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+
+ auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValueType(0).getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValueType(0).getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValueType(0).getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
+ ArrayRef<SDValue> Ops,
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
+ assert(Ops.size() == 7 && "Incompatible number of operands");
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>(
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+ cast<VPScatterSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+ VT, MMO, IndexType);
+ createOperands(N, Ops);
+
+ assert(N->getMask().getValueType().getVectorElementCount() ==
+ N->getValue().getValueType().getVectorElementCount() &&
+ "Vector width mismatch between mask and data");
+ assert(
+ N->getIndex().getValueType().getVectorElementCount().isScalable() ==
+ N->getValue().getValueType().getVectorElementCount().isScalable() &&
+ "Scalable flags of index and data do not match");
+ assert(ElementCount::isKnownGE(
+ N->getIndex().getValueType().getVectorElementCount(),
+ N->getValue().getValueType().getVectorElementCount()) &&
+ "Vector width mismatch between index and data");
+ assert(isa<ConstantSDNode>(N->getScale()) &&
+ cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ "Scale should be a constant power of 2");
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Base, SDValue Offset, SDValue Mask,
SDValue PassThru, EVT MemVT,
@@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
// select true, T, F --> T
// select false, T, F --> F
if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
- return CondC->isNullValue() ? F : T;
+ return CondC->isZero() ? F : T;
// TODO: This should simplify VSELECT with constant condition using something
// like this (but check boolean contents to be complete?):
@@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
}
#ifndef NDEBUG
-void SelectionDAG::VerifyDAGDiverence() {
+void SelectionDAG::VerifyDAGDivergence() {
std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
for (auto *N : TopoOrder) {
@@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
// before SortedPos will contain the topological sort index, and the
// Node Id fields for nodes At SortedPos and after will contain the
// count of outstanding operands.
- for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
- SDNode *N = &*I++;
- checkForCycles(N, this);
- unsigned Degree = N->getNumOperands();
+ for (SDNode &N : llvm::make_early_inc_range(allnodes())) {
+ checkForCycles(&N, this);
+ unsigned Degree = N.getNumOperands();
if (Degree == 0) {
// A node with no uses, add it to the result array immediately.
- N->setNodeId(DAGSize++);
- allnodes_iterator Q(N);
+ N.setNodeId(DAGSize++);
+ allnodes_iterator Q(&N);
if (Q != SortedPos)
SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
assert(SortedPos != AllNodes.end() && "Overran node list");
++SortedPos;
} else {
// Temporarily use the Node Id as scratch space for the degree count.
- N->setNodeId(Degree);
+ N.setNodeId(Degree);
}
}
@@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
std::string ErrorStr;
raw_string_ostream ErrorFormatter(ErrorStr);
-
ErrorFormatter << "Undefined external symbol ";
ErrorFormatter << '"' << Symbol << '"';
- ErrorFormatter.flush();
-
- report_fatal_error(ErrorStr);
+ report_fatal_error(Twine(ErrorFormatter.str()));
}
//===----------------------------------------------------------------------===//
@@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op,
bool llvm::isNullConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
+ return Const != nullptr && Const->isZero();
}
bool llvm::isNullFPConstant(SDValue V) {
@@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) {
bool llvm::isAllOnesConstant(SDValue V) {
ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isAllOnesValue();
+ return Const != nullptr && Const->isAllOnes();
}
bool llvm::isOneConstant(SDValue V) {
@@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
ConstantSDNode *C =
isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true);
- return C && C->isNullValue();
+ return C && C->isZero();
}
bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
@@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
- return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
+ return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth;
}
HandleSDNode::~HandleSDNode() {
@@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
/// isOnlyUserOf - Return true if this node is the only use of N.
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (User == this)
Seen = true;
else
@@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const {
/// Return true if the only users of N are contained in Nodes.
bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
bool Seen = false;
- for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
- SDNode *User = *I;
+ for (const SDNode *User : N->uses()) {
if (llvm::is_contained(Nodes, User))
Seen = true;
else
@@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT,
"Mixing fixed width and scalable vectors when enveloping a type");
EVT LoVT, HiVT;
if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) {
- LoVT = EnvVT;
+ LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts);
*HiIsEmpty = false;
} else {
// Flag that hi type has zero storage size, but return split envelop type
// (this would be easier if vector types with zero elements were allowed).
LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts);
- HiVT = EnvVT;
+ HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts);
*HiIsEmpty = true;
}
return std::make_pair(LoVT, HiVT);
@@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
}
SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getSplatValue(DemandedElts, UndefElements);
}
@@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts,
bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence,
BitVector *UndefElements) const {
- APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ APInt DemandedElts = APInt::getAllOnes(getNumOperands());
return getRepeatedSequence(DemandedElts, Sequence, UndefElements);
}
@@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
return -1;
}
+bool BuildVectorSDNode::getConstantRawBits(
+ bool IsLittleEndian, unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const {
+ // Early-out if this contains anything but Undef/Constant/ConstantFP.
+ if (!isConstant())
+ return false;
+
+ unsigned NumSrcOps = getNumOperands();
+ unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+
+ // Extract raw src bits.
+ SmallVector<APInt> SrcBitElements(NumSrcOps,
+ APInt::getNullValue(SrcEltSizeInBits));
+ BitVector SrcUndeElements(NumSrcOps, false);
+
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ SDValue Op = getOperand(I);
+ if (Op.isUndef()) {
+ SrcUndeElements.set(I);
+ continue;
+ }
+ auto *CInt = dyn_cast<ConstantSDNode>(Op);
+ auto *CFP = dyn_cast<ConstantFPSDNode>(Op);
+ assert((CInt || CFP) && "Unknown constant");
+ SrcBitElements[I] =
+ CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits)
+ : CFP->getValueAPF().bitcastToAPInt();
+ }
+
+ // Recast to dst width.
+ recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements,
+ SrcBitElements, UndefElements, SrcUndeElements);
+ return true;
+}
+
+void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,
+ unsigned DstEltSizeInBits,
+ SmallVectorImpl<APInt> &DstBitElements,
+ ArrayRef<APInt> SrcBitElements,
+ BitVector &DstUndefElements,
+ const BitVector &SrcUndefElements) {
+ unsigned NumSrcOps = SrcBitElements.size();
+ unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth();
+ assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 &&
+ "Invalid bitcast scale");
+ assert(NumSrcOps == SrcUndefElements.size() &&
+ "Vector size mismatch");
+
+ unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;
+ DstUndefElements.clear();
+ DstUndefElements.resize(NumDstOps, false);
+ DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits));
+
+ // Concatenate src elements constant bits together into dst element.
+ if (SrcEltSizeInBits <= DstEltSizeInBits) {
+ unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits;
+ for (unsigned I = 0; I != NumDstOps; ++I) {
+ DstUndefElements.set(I);
+ APInt &DstBits = DstBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ if (SrcUndefElements[Idx])
+ continue;
+ DstUndefElements.reset(I);
+ const APInt &SrcBits = SrcBitElements[Idx];
+ assert(SrcBits.getBitWidth() == SrcEltSizeInBits &&
+ "Illegal constant bitwidths");
+ DstBits.insertBits(SrcBits, J * SrcEltSizeInBits);
+ }
+ }
+ return;
+ }
+
+ // Split src element constant bits into dst elements.
+ unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits;
+ for (unsigned I = 0; I != NumSrcOps; ++I) {
+ if (SrcUndefElements[I]) {
+ DstUndefElements.set(I * Scale, (I + 1) * Scale);
+ continue;
+ }
+ const APInt &SrcBits = SrcBitElements[I];
+ for (unsigned J = 0; J != Scale; ++J) {
+ unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1));
+ APInt &DstBits = DstBitElements[Idx];
+ DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits);
+ }
+ }
+}
+
bool BuildVectorSDNode::isConstant() const {
for (const SDValue &Op : op_values()) {
unsigned Opc = Op.getOpcode();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 20c7d771bfb6..6d8252046501 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/GlobalAlias.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include <cstdint>
@@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
- IsAlias = false;
- return true;
+ if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ // We can derive NoAlias In case of mismatched base types.
+ if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) {
+ IsAlias = false;
+ return true;
+ }
+ if (IsGV0 && IsGV1) {
+ auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal();
+ auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal();
+ // It doesn't make sense to access one global value using another globals
+ // values address, so we can assume that there is no aliasing in case of
+ // two different globals (unless we have symbols that may indirectly point
+ // to each other).
+ // FIXME: This is perhaps a bit too defensive. We could try to follow the
+ // chain with aliasee information for GlobalAlias variables to find out if
+ // we indirect symbols may alias or not.
+ if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) {
+ IsAlias = false;
+ return true;
+ }
+ }
}
return false; // Cannot determine whether the pointers alias.
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d56d4bcc9169..5d911c165293 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -69,6 +69,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return Val;
if (PartEVT.isVector()) {
+ // Vector/Vector bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
// vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
// elements we want.
- if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) {
assert((PartEVT.getVectorElementCount().getKnownMinValue() >
ValueVT.getVectorElementCount().getKnownMinValue()) &&
(PartEVT.getVectorElementCount().isScalable() ==
ValueVT.getVectorElementCount().isScalable()) &&
"Cannot narrow, it would be a lossy transformation");
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
- DAG.getVectorIdxConstant(0, DL));
+ PartEVT =
+ EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val,
+ DAG.getVectorIdxConstant(0, DL));
+ if (PartEVT == ValueVT)
+ return Val;
}
- // Vector/Vector bitcast.
- if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
- return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
-
- assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() &&
- "Cannot handle this kind of promotion");
// Promoted vector extract
return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
-
}
// Trivial bitcast if the types are the same size and the destination
@@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else if (PartEVT.isVector() &&
+ PartEVT.getVectorElementType() !=
+ ValueVT.getVectorElementType() &&
+ TLI.getTypeAction(*DAG.getContext(), ValueVT) ==
+ TargetLowering::TypeWidenVector) {
+ // Combination of widening and promotion.
+ EVT WidenVT =
+ EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
+ PartVT.getVectorElementCount());
+ SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT);
+ Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT);
} else {
if (ValueVT.getVectorElementCount().isScalar()) {
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
@@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
} else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) {
// Bitconvert vector->vector case.
Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
- } else if (SDValue Widened =
- widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
- Val = Widened;
- } else if (BuiltVectorTy.getVectorElementType().bitsGE(
- ValueVT.getVectorElementType()) &&
- BuiltVectorTy.getVectorElementCount() ==
- ValueVT.getVectorElementCount()) {
- // Promoted vector extract
- Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy);
+ } else {
+ if (BuiltVectorTy.getVectorElementType().bitsGT(
+ ValueVT.getVectorElementType())) {
+ // Integer promotion.
+ ValueVT = EVT::getVectorVT(*DAG.getContext(),
+ BuiltVectorTy.getVectorElementType(),
+ ValueVT.getVectorElementCount());
+ Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+ }
+
+ if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) {
+ Val = Widened;
+ }
}
assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type");
@@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
while (isa<Instruction>(V)) {
Instruction &VAsInst = *cast<Instruction>(V);
// Temporary "0", awaiting real implementation.
+ SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 4> AdditionalValues;
- DIExpression *SalvagedExpr =
- salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues);
-
+ V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops,
+ AdditionalValues);
// If we cannot salvage any further, and haven't yet found a suitable debug
// expression, bail out.
+ if (!V)
+ break;
+
// TODO: If AdditionalValues isn't empty, then the salvage can only be
// represented with a DBG_VALUE_LIST, so we give up. When we have support
// here for variadic dbg_values, remove that condition.
- if (!SalvagedExpr || !AdditionalValues.empty())
+ if (!AdditionalValues.empty())
break;
// New value and expr now represent this debuginfo.
- V = VAsInst.getOperand(0);
- Expr = SalvagedExpr;
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue);
// Some kind of simplification occurred: check whether the operand of the
// salvaged debug expression can be encoded in this DAG.
@@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
BitsToDescribe = *VarSize;
if (auto Fragment = Expr->getFragmentInfo())
BitsToDescribe = Fragment->SizeInBits;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ for (const auto &RegAndSize : RFV.getRegsAndSizes()) {
// Bail out if all bits are described already.
if (Offset >= BitsToDescribe)
break;
@@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
/*IsVarArg*/ false, DL);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::SExt))
+ if (F->getAttributes().hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::ZExt))
+ else if (F->getAttributes().hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
LLVMContext &Context = F->getContext();
- bool RetInReg = F->getAttributes().hasAttribute(
- AttributeList::ReturnIndex, Attribute::InReg);
+ bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
EVT VT = ValueVTs[j];
@@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i) {
- Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ Outs.push_back(ISD::OutputArg(Flags,
+ Parts[i].getValueType().getSimpleVT(),
VT, /*isfixed=*/true, 0, 0));
OutVals.push_back(Parts[i]);
}
@@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
- Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
- EVT(TLI.getPointerTy(DL)) /*argvt*/,
- true /*isfixed*/, 1 /*origidx*/,
- 0 /*partOffs*/));
+ Outs.push_back(ISD::OutputArg(
+ Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)),
+ /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
@@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
- if (!JTH.OmitRangeCheck) {
+ if (!JTH.FallthroughUnreachable) {
// Emit the range check for the jump table, and branch to the default block
// for the switch statement if the value being switched on exceeds the
// largest case in the switch.
@@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
- if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
+ if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
@@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- if (!B.OmitRangeCheck)
+ if (!B.FallthroughUnreachable)
addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
SDValue Root = CopyTo;
- if (!B.OmitRangeCheck) {
+ if (!B.FallthroughUnreachable) {
// Conditional branch to the default block.
SDValue RangeCmp = DAG.getSetCC(dl,
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
@@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
+ else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits()))
Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
// Otherwise we'll need to temporarily settle for some other convenient
// type. Type legalization will make adjustments once the shiftee is split.
@@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SmallVector<EVT, 4> ValueVTs, MemVTs;
@@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
const Value *SV = I.getOperand(0);
Type *Ty = I.getType();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
assert(
(!AA ||
!AA->pointsToConstantMemory(MemoryLocation(
SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)),
- AAInfo))) &&
+ I.getAAMetadata()))) &&
"load_from_swift_error should not be constant memory");
SmallVector<EVT, 4> ValueVTs;
@@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
SDLoc dl = getCurSDLoc();
Align Alignment = I.getAlign();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout());
@@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata());
SDValue StoreNode =
DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO,
ISD::UNINDEXED, false /* Truncating */, IsCompressing);
@@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+ assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type");
// Handle splat constant pointer.
if (auto *C = dyn_cast<Constant>(Ptr)) {
@@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
-
SDValue Base;
SDValue Index;
ISD::MemIndexType IndexType;
@@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOStore,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata());
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
@@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
+ AAMDNodes AAInfo = I.getAAMetadata();
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
// Do not serialize masked loads of constant memory with anything.
@@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
->getMaybeAlignValue()
.getValueOr(DAG.getEVTAlign(VT.getScalarType()));
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
SDValue Root = DAG.getRoot();
@@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
MachinePointerInfo(AS), MachineMemOperand::MOLoad,
// TODO: Make MachineMemOperands aware of scalable
// vectors.
- MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges);
+ MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges);
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
@@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
TLI.getPointerTy(DAG.getDataLayout())));
// Add all operands of the call to the operand list.
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = I.arg_size(); i != e; ++i) {
const Value *Arg = I.getArgOperand(i);
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
Ops.push_back(getValue(Arg));
@@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
Result =
DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size, AAInfo);
+ Info.align, Info.flags, Info.size,
+ I.getAAMetadata());
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
// we've been asked to pursue.
auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr,
bool Indirect) {
- if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) {
+ if (Reg.isVirtual() && MF.useDebugInstrRef()) {
// For VRegs, in instruction referencing mode, create a DBG_INSTR_REF
// pointing at the VReg, which will be patched up later.
auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF);
auto MIB = BuildMI(MF, DL, Inst);
- MIB.addReg(Reg, RegState::Debug);
+ MIB.addReg(Reg);
MIB.addImm(0);
MIB.addMetadata(Variable);
auto *NewDIExpr = FragExpr;
@@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>>
SplitRegs) {
unsigned Offset = 0;
- for (auto RegAndSize : SplitRegs) {
+ for (const auto &RegAndSize : SplitRegs) {
// If the expression is already a fragment, the current register
// offset+size might extend beyond the fragment. In this case, only
// the register bits that are inside the fragment are relevant.
@@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
/* AlwaysInline */ false, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
// FIXME: Support passing different dest/src alignments to the memcpy DAG
// node.
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol,
/* AlwaysInline */ true, isTC,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MC);
return;
}
@@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isVol = MSI.isVolatile();
bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget());
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC,
- MachinePointerInfo(I.getArgOperand(0)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(0)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MS);
return;
}
@@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// FIXME: Support passing different dest/src alignments to the memmove DAG
// node.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
updateDAGForMaybeTailCall(MM);
return;
}
@@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
if (Values.empty())
return;
- if (std::count(Values.begin(), Values.end(), nullptr))
+ if (llvm::is_contained(Values, nullptr))
return;
bool IsVariadic = DI.hasArgList();
@@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName =
- I.getAttributes()
- .getAttribute(AttributeList::FunctionIndex, "trap-func-name")
- .getValueAsString();
+ I.getAttributes().getFnAttr("trap-func-name").getValueAsString();
if (TrapFuncName.empty()) {
switch (Intrinsic) {
case Intrinsic::trap:
@@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
// is the same on all targets.
- for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) {
+ for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) {
Value *Arg = I.getArgOperand(Idx)->stripPointerCasts();
if (isa<ConstantPointerNull>(Arg))
continue; // Skip null pointers. They represent a hole in index space.
@@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
};
SmallVector<BranchFunnelTarget, 8> Targets;
- for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) {
+ for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) {
auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
I.getArgOperand(Op), Offset, DAG.getDataLayout()));
if (ElemBase != Base)
@@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
llvm_unreachable(
"Inconsistency: no SDNode available for this VPIntrinsic!");
+ if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD ||
+ *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) {
+ if (VPIntrin.getFastMathFlags().allowReassoc())
+ return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD
+ : ISD::VP_REDUCE_FMUL;
+ }
+
return ResOPC.getValue();
}
+void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isGather) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(0);
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
+ SDValue LD;
+ bool AddToChain = true;
+ if (!isGather) {
+ // Do not serialize variable-length loads of constant memory with
+ // anything.
+ MemoryLocation ML;
+ if (VT.isScalableVector())
+ ML = MemoryLocation::getAfter(PtrOperand);
+ else
+ ML = MemoryLocation(
+ PtrOperand,
+ LocationSize::precise(
+ DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())),
+ AAInfo);
+ AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges);
+ LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2],
+ MMO, false /*IsExpanding */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOLoad,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ LD = DAG.getGatherVP(
+ DAG.getVTList(VT, MVT::Other), VT, DL,
+ {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO,
+ IndexType);
+ }
+ if (AddToChain)
+ PendingLoads.push_back(LD.getValue(1));
+ setValue(&VPIntrin, LD);
+}
+
+void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues,
+ bool isScatter) {
+ SDLoc DL = getCurSDLoc();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Value *PtrOperand = VPIntrin.getArgOperand(1);
+ EVT VT = OpValues[0].getValueType();
+ MaybeAlign Alignment = DAG.getEVTAlign(VT);
+ AAMDNodes AAInfo = VPIntrin.getAAMetadata();
+ SDValue ST;
+ if (!isScatter) {
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore,
+ VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo);
+ ST =
+ DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1],
+ OpValues[2], OpValues[3], MMO, false /* IsTruncating */);
+ } else {
+ unsigned AS =
+ PtrOperand->getType()->getScalarType()->getPointerAddressSpace();
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo(AS), MachineMemOperand::MOStore,
+ MemoryLocation::UnknownSize, *Alignment, AAInfo);
+ SDValue Base, Index, Scale;
+ ISD::MemIndexType IndexType;
+ bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale,
+ this, VPIntrin.getParent());
+ if (!UniformBase) {
+ Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ Index = getValue(PtrOperand);
+ IndexType = ISD::SIGNED_UNSCALED;
+ Scale =
+ DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
+ }
+ EVT IdxVT = Index.getValueType();
+ EVT EltTy = IdxVT.getVectorElementType();
+ if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) {
+ EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy);
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
+ }
+ ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL,
+ {getMemoryRoot(), OpValues[0], Base, Index, Scale,
+ OpValues[2], OpValues[3]},
+ MMO, IndexType);
+ }
+ DAG.setRoot(ST);
+ setValue(&VPIntrin, ST);
+}
+
void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
const VPIntrinsic &VPIntrin) {
SDLoc DL = getCurSDLoc();
@@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
// Request operands.
SmallVector<SDValue, 7> OpValues;
- for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) {
+ for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) {
auto Op = getValue(VPIntrin.getArgOperand(I));
if (I == EVLParamPos)
Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op);
OpValues.push_back(Op);
}
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
- setValue(&VPIntrin, Result);
+ switch (Opcode) {
+ default: {
+ SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ setValue(&VPIntrin, Result);
+ break;
+ }
+ case ISD::VP_LOAD:
+ case ISD::VP_GATHER:
+ visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues,
+ Opcode == ISD::VP_GATHER);
+ break;
+ case ISD::VP_STORE:
+ case ISD::VP_SCATTER:
+ visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER);
+ break;
+ }
}
SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain,
@@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// because the return pointer needs to be adjusted by the size of
// the copied memory.
SDValue Root = isVol ? getRoot() : getMemoryRoot();
- AAMDNodes AAInfo;
- I.getAAMetadata(AAInfo);
SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
- MachinePointerInfo(I.getArgOperand(1)), AAInfo);
+ MachinePointerInfo(I.getArgOperand(1)),
+ I.getAAMetadata());
assert(MC.getNode() != nullptr &&
"** memcpy should not be lowered as TailCall in mempcpy context **");
DAG.setRoot(MC);
@@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
if (Function *F = I.getCalledFunction()) {
+ diagnoseDontCall(I);
+
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
unsigned IID = F->getIntrinsicID();
@@ -8176,7 +8307,7 @@ public:
}
}
- return TLI.getValueType(DL, OpTy, true);
+ return TLI.getAsmOperandValueType(DL, OpTy, true);
}
};
@@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
///
/// OpInfo describes the operand
/// RefOpInfo describes the matching operand if any, the operand otherwise
-static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
- SDISelAsmOperandInfo &OpInfo,
- SDISelAsmOperandInfo &RefOpInfo) {
+static llvm::Optional<unsigned>
+getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
+ SDISelAsmOperandInfo &OpInfo,
+ SDISelAsmOperandInfo &RefOpInfo) {
LLVMContext &Context = *DAG.getContext();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No work to do for memory operations.
if (OpInfo.ConstraintType == TargetLowering::C_Memory)
- return;
+ return None;
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
@@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
&TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT);
// RC is unset only on failure. Return immediately.
if (!RC)
- return;
+ return None;
// Get the actual register value type. This is important, because the user
// may have asked for (e.g.) the AX register in i32 type. We need to
@@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// No need to allocate a matching input constraint since the constraint it's
// matching to has already been allocated.
if (OpInfo.isMatchingInputConstraint())
- return;
+ return None;
EVT ValueVT = OpInfo.ConstraintVT;
if (OpInfo.ConstraintVT == MVT::Other)
@@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
// Do not check for single registers.
if (AssignedReg) {
- for (; *I != AssignedReg; ++I)
- assert(I != RC->end() && "AssignedReg should be member of RC");
+ I = std::find(I, RC->end(), AssignedReg);
+ if (I == RC->end()) {
+ // RC does not contain the selected register, which indicates a
+ // mismatch between the register and the required type/bitwidth.
+ return {AssignedReg};
+ }
}
for (; NumRegs; --NumRegs, ++I) {
@@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
}
OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+ return None;
}
static unsigned
@@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
if (isa<CallBrInst>(Call) &&
- ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() -
+ ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() -
cast<CallBrInst>(&Call)->getNumIndirectDests() -
NumMatchingOps) &&
(NumMatchingOps == 0 ||
- ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() -
- NumMatchingOps))) {
+ ArgNo - 1 <
+ (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) {
const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
@@ -8479,8 +8616,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
DAG.getDataLayout(), STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT =
- TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType());
+ OpInfo.ConstraintVT = TLI.getAsmOperandValueType(
+ DAG.getDataLayout(), Call.getType()).getSimpleVT();
}
++ResNo;
} else {
@@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
OpInfo.isMatchingInputConstraint()
? ConstraintOperands[OpInfo.getMatchedOperand()]
: OpInfo;
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ const auto RegError =
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+ if (RegError.hasValue()) {
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const char *RegName = TRI.getName(RegError.getValue());
+ emitInlineAsmError(Call, "register '" + Twine(RegName) +
+ "' allocated for constraint '" +
+ Twine(OpInfo.ConstraintCode) +
+ "' does not match required type");
+ return;
+ }
auto DetectWriteToReservedRegister = [&]() {
const MachineFunction &MF = DAG.getMachineFunction();
@@ -8674,11 +8822,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
MachineFunction &MF = DAG.getMachineFunction();
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
- RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
+ auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]);
Register TiedReg = R->getReg();
MVT RegVT = R->getSimpleValueType(0);
- const TargetRegisterClass *RC = TiedReg.isVirtual() ?
- MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg);
+ const TargetRegisterClass *RC =
+ TiedReg.isVirtual() ? MRI.getRegClass(TiedReg)
+ : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT)
+ : TRI.getMinimalPhysRegClass(TiedReg);
unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(MRI.createVirtualRegister(RC));
@@ -9317,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2;
- if (I.getNumArgOperands() > 1)
+ if (I.arg_size() > 1)
Op2 = getValue(I.getArgOperand(1));
SDLoc dl = getCurSDLoc();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
@@ -9671,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// if it isn't first piece, alignment must be 1
// For scalable vectors the scalable part is currently handled
// by individual targets, so we just use the known minimum size here.
- ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT,
- i < CLI.NumFixedArgs, i,
- j*Parts[j].getValueType().getStoreSize().getKnownMinSize());
+ ISD::OutputArg MyFlags(
+ Flags, Parts[j].getValueType().getSimpleVT(), VT,
+ i < CLI.NumFixedArgs, i,
+ j * Parts[j].getValueType().getStoreSize().getKnownMinSize());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
@@ -9841,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
None); // This is not an ABI copy.
SDValue Chain = DAG.getEntryNode();
- ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
- FuncInfo.PreferredExtendType.end())
- ? ISD::ANY_EXTEND
- : FuncInfo.PreferredExtendType[V];
+ ISD::NodeType ExtendType = ISD::ANY_EXTEND;
+ auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V);
+ if (PreferredExtendIt != FuncInfo.PreferredExtendType.end())
+ ExtendType = PreferredExtendIt->second;
RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType);
PendingExports.push_back(Chain);
}
@@ -10490,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
ConstantsOut.clear();
}
-/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB
-/// is 0.
-MachineBasicBlock *
-SelectionDAGBuilder::StackProtectorDescriptor::
-AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB) {
- // If SuccBB has not been created yet, create it.
- if (!SuccMBB) {
- MachineFunction *MF = ParentMBB->getParent();
- MachineFunction::iterator BBI(ParentMBB);
- SuccMBB = MF->CreateMachineBasicBlock(BB);
- MF->insert(++BBI, SuccMBB);
- }
- // Add it as a successor of ParentMBB.
- ParentMBB->addSuccessor(
- SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
- return SuccMBB;
-}
-
MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator I(MBB);
if (++I == FuncInfo.MF->end())
@@ -10675,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- JTH->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ JTH->FallthroughUnreachable = true;
- if (!JTH->OmitRangeCheck)
+ if (!JTH->FallthroughUnreachable)
addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -10718,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->DefaultProb -= DefaultProb / 2;
}
- if (FallthroughUnreachable) {
- // Skip the range check if the fallthrough block is unreachable.
- BTB->OmitRangeCheck = true;
- }
+ if (FallthroughUnreachable)
+ BTB->FallthroughUnreachable = true;
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index df5be156821f..d6122aa0a739 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
@@ -180,204 +181,6 @@ private:
SwitchCG::CaseClusterVector &Clusters,
BranchProbability &PeeledCaseProb);
- /// A class which encapsulates all of the information needed to generate a
- /// stack protector check and signals to isel via its state being initialized
- /// that a stack protector needs to be generated.
- ///
- /// *NOTE* The following is a high level documentation of SelectionDAG Stack
- /// Protector Generation. The reason that it is placed here is for a lack of
- /// other good places to stick it.
- ///
- /// High Level Overview of SelectionDAG Stack Protector Generation:
- ///
- /// Previously, generation of stack protectors was done exclusively in the
- /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated
- /// splitting basic blocks at the IR level to create the success/failure basic
- /// blocks in the tail of the basic block in question. As a result of this,
- /// calls that would have qualified for the sibling call optimization were no
- /// longer eligible for optimization since said calls were no longer right in
- /// the "tail position" (i.e. the immediate predecessor of a ReturnInst
- /// instruction).
- ///
- /// Then it was noticed that since the sibling call optimization causes the
- /// callee to reuse the caller's stack, if we could delay the generation of
- /// the stack protector check until later in CodeGen after the sibling call
- /// decision was made, we get both the tail call optimization and the stack
- /// protector check!
- ///
- /// A few goals in solving this problem were:
- ///
- /// 1. Preserve the architecture independence of stack protector generation.
- ///
- /// 2. Preserve the normal IR level stack protector check for platforms like
- /// OpenBSD for which we support platform-specific stack protector
- /// generation.
- ///
- /// The main problem that guided the present solution is that one can not
- /// solve this problem in an architecture independent manner at the IR level
- /// only. This is because:
- ///
- /// 1. The decision on whether or not to perform a sibling call on certain
- /// platforms (for instance i386) requires lower level information
- /// related to available registers that can not be known at the IR level.
- ///
- /// 2. Even if the previous point were not true, the decision on whether to
- /// perform a tail call is done in LowerCallTo in SelectionDAG which
- /// occurs after the Stack Protector Pass. As a result, one would need to
- /// put the relevant callinst into the stack protector check success
- /// basic block (where the return inst is placed) and then move it back
- /// later at SelectionDAG/MI time before the stack protector check if the
- /// tail call optimization failed. The MI level option was nixed
- /// immediately since it would require platform-specific pattern
- /// matching. The SelectionDAG level option was nixed because
- /// SelectionDAG only processes one IR level basic block at a time
- /// implying one could not create a DAG Combine to move the callinst.
- ///
- /// To get around this problem a few things were realized:
- ///
- /// 1. While one can not handle multiple IR level basic blocks at the
- /// SelectionDAG Level, one can generate multiple machine basic blocks
- /// for one IR level basic block. This is how we handle bit tests and
- /// switches.
- ///
- /// 2. At the MI level, tail calls are represented via a special return
- /// MIInst called "tcreturn". Thus if we know the basic block in which we
- /// wish to insert the stack protector check, we get the correct behavior
- /// by always inserting the stack protector check right before the return
- /// statement. This is a "magical transformation" since no matter where
- /// the stack protector check intrinsic is, we always insert the stack
- /// protector check code at the end of the BB.
- ///
- /// Given the aforementioned constraints, the following solution was devised:
- ///
- /// 1. On platforms that do not support SelectionDAG stack protector check
- /// generation, allow for the normal IR level stack protector check
- /// generation to continue.
- ///
- /// 2. On platforms that do support SelectionDAG stack protector check
- /// generation:
- ///
- /// a. Use the IR level stack protector pass to decide if a stack
- /// protector is required/which BB we insert the stack protector check
- /// in by reusing the logic already therein. If we wish to generate a
- /// stack protector check in a basic block, we place a special IR
- /// intrinsic called llvm.stackprotectorcheck right before the BB's
- /// returninst or if there is a callinst that could potentially be
- /// sibling call optimized, before the call inst.
- ///
- /// b. Then when a BB with said intrinsic is processed, we codegen the BB
- /// normally via SelectBasicBlock. In said process, when we visit the
- /// stack protector check, we do not actually emit anything into the
- /// BB. Instead, we just initialize the stack protector descriptor
- /// class (which involves stashing information/creating the success
- /// mbbb and the failure mbb if we have not created one for this
- /// function yet) and export the guard variable that we are going to
- /// compare.
- ///
- /// c. After we finish selecting the basic block, in FinishBasicBlock if
- /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is
- /// initialized, we produce the validation code with one of these
- /// techniques:
- /// 1) with a call to a guard check function
- /// 2) with inlined instrumentation
- ///
- /// 1) We insert a call to the check function before the terminator.
- ///
- /// 2) We first find a splice point in the parent basic block
- /// before the terminator and then splice the terminator of said basic
- /// block into the success basic block. Then we code-gen a new tail for
- /// the parent basic block consisting of the two loads, the comparison,
- /// and finally two branches to the success/failure basic blocks. We
- /// conclude by code-gening the failure basic block if we have not
- /// code-gened it already (all stack protector checks we generate in
- /// the same function, use the same failure basic block).
- class StackProtectorDescriptor {
- public:
- StackProtectorDescriptor() = default;
-
- /// Returns true if all fields of the stack protector descriptor are
- /// initialized implying that we should/are ready to emit a stack protector.
- bool shouldEmitStackProtector() const {
- return ParentMBB && SuccessMBB && FailureMBB;
- }
-
- bool shouldEmitFunctionBasedCheckStackProtector() const {
- return ParentMBB && !SuccessMBB && !FailureMBB;
- }
-
- /// Initialize the stack protector descriptor structure for a new basic
- /// block.
- void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
- bool FunctionBasedInstrumentation) {
- // Make sure we are not initialized yet.
- assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
- "already initialized!");
- ParentMBB = MBB;
- if (!FunctionBasedInstrumentation) {
- SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
- FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
- }
- }
-
- /// Reset state that changes when we handle different basic blocks.
- ///
- /// This currently includes:
- ///
- /// 1. The specific basic block we are generating a
- /// stack protector for (ParentMBB).
- ///
- /// 2. The successor machine basic block that will contain the tail of
- /// parent mbb after we create the stack protector check (SuccessMBB). This
- /// BB is visited only on stack protector check success.
- void resetPerBBState() {
- ParentMBB = nullptr;
- SuccessMBB = nullptr;
- }
-
- /// Reset state that only changes when we switch functions.
- ///
- /// This currently includes:
- ///
- /// 1. FailureMBB since we reuse the failure code path for all stack
- /// protector checks created in an individual function.
- ///
- /// 2.The guard variable since the guard variable we are checking against is
- /// always the same.
- void resetPerFunctionState() {
- FailureMBB = nullptr;
- }
-
- MachineBasicBlock *getParentMBB() { return ParentMBB; }
- MachineBasicBlock *getSuccessMBB() { return SuccessMBB; }
- MachineBasicBlock *getFailureMBB() { return FailureMBB; }
-
- private:
- /// The basic block for which we are generating the stack protector.
- ///
- /// As a result of stack protector generation, we will splice the
- /// terminators of this basic block into the successor mbb SuccessMBB and
- /// replace it with a compare/branch to the successor mbbs
- /// SuccessMBB/FailureMBB depending on whether or not the stack protector
- /// was violated.
- MachineBasicBlock *ParentMBB = nullptr;
-
- /// A basic block visited on stack protector check success that contains the
- /// terminators of ParentMBB.
- MachineBasicBlock *SuccessMBB = nullptr;
-
- /// This basic block visited on stack protector check failure that will
- /// contain a call to __stack_chk_fail().
- MachineBasicBlock *FailureMBB = nullptr;
-
- /// Add a successor machine basic block to ParentMBB. If the successor mbb
- /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
- /// block will be created. Assign a large weight if IsLikely is true.
- MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
- MachineBasicBlock *ParentMBB,
- bool IsLikely,
- MachineBasicBlock *SuccMBB = nullptr);
- };
-
private:
const TargetMachine &TM;
@@ -764,6 +567,10 @@ private:
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT,
+ SmallVector<SDValue, 7> &OpValues, bool isGather);
+ void visitVPStoreScatter(const VPIntrinsic &VPIntrin,
+ SmallVector<SDValue, 7> &OpValues, bool isScatter);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
void visitVAStart(const CallInst &I);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 40083c614a6c..77e9e53668f9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return Intrinsic::getBaseName((Intrinsic::ID)IID).str();
- else if (!G)
+ if (!G)
return "Unknown intrinsic";
- else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+ if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
}
@@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO,
if (G) {
const MachineFunction *MF = &G->getMachineFunction();
return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(),
- &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(),
- *G->getContext());
- } else {
- LLVMContext Ctx;
- return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
- /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
+ &MF->getFrameInfo(),
+ G->getSubtarget().getInstrInfo(), *G->getContext());
}
+
+ LLVMContext Ctx;
+ return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr,
+ /*MFI=*/nullptr, /*TII=*/nullptr, Ctx);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
if (!Value.getNode()) {
OS << "<null>";
return false;
- } else if (shouldPrintInline(*Value.getNode(), G)) {
+ }
+
+ if (shouldPrintInline(*Value.getNode(), G)) {
OS << Value->getOperationName(G) << ':';
Value->print_types(OS, G);
Value->print_details(OS, G);
return true;
- } else {
- OS << PrintNodeId(*Value.getNode());
- if (unsigned RN = Value.getResNo())
- OS << ':' << RN;
- return false;
}
+
+ OS << PrintNodeId(*Value.getNode());
+ if (unsigned RN = Value.getResNo())
+ OS << ':' << RN;
+ return false;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
N->print(OS, G);
- if (depth < 1)
- return;
-
for (const SDValue &Op : N->op_values()) {
// Don't follow chain operands.
if (Op.getValueType() == MVT::Other)
continue;
OS << '\n';
- printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2);
+ printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1415cce3b1df..c7e37cf8ca14 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -33,6 +33,7 @@
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
@@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
- bool InstrRef = TM.Options.ValueTrackingVariableLocations;
+ bool InstrRef = MF->useDebugInstrRef();
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1];
assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST &&
@@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF,
R << (" (in function: " + MF.getName() + ")").str();
if (ShouldAbort)
- report_fatal_error(R.getMsg());
+ report_fatal_error(Twine(R.getMsg()));
ORE.emit(R);
}
@@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine1 && MatchFilterBB)
@@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Second step, hack on the DAG until it only uses operations and types that
@@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
// Only allow creation of legal node types.
@@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
{
@@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombineLT && MatchFilterBB)
@@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
}
@@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (ViewDAGCombine2 && MatchFilterBB)
@@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
#ifndef NDEBUG
if (TTI.hasBranchDivergence())
- CurDAG->VerifyDAGDiverence();
+ CurDAG->VerifyDAGDivergence();
#endif
if (OptLevel != CodeGenOpt::None)
@@ -1045,25 +1046,25 @@ public:
} // end anonymous namespace
// This function is used to enforce the topological node id property
-// property leveraged during Instruction selection. Before selection all
-// nodes are given a non-negative id such that all nodes have a larger id than
+// leveraged during instruction selection. Before the selection process all
+// nodes are given a non-negative id such that all nodes have a greater id than
// their operands. As this holds transitively we can prune checks that a node N
// is a predecessor of M another by not recursively checking through M's
-// operands if N's ID is larger than M's ID. This is significantly improves
-// performance of for various legality checks (e.g. IsLegalToFold /
-// UpdateChains).
-
-// However, when we fuse multiple nodes into a single node
-// during selection we may induce a predecessor relationship between inputs and
-// outputs of distinct nodes being merged violating the topological property.
-// Should a fused node have a successor which has yet to be selected, our
-// legality checks would be incorrect. To avoid this we mark all unselected
-// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x =>
+// operands if N's ID is larger than M's ID. This significantly improves
+// performance of various legality checks (e.g. IsLegalToFold / UpdateChains).
+
+// However, when we fuse multiple nodes into a single node during the
+// selection we may induce a predecessor relationship between inputs and
+// outputs of distinct nodes being merged, violating the topological property.
+// Should a fused node have a successor which has yet to be selected,
+// our legality checks would be incorrect. To avoid this we mark all unselected
+// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x =>
// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M.
// We use bit-negation to more clearly enforce that node id -1 can only be
-// achieved by selected nodes). As the conversion is reversable the original Id,
-// topological pruning can still be leveraged when looking for unselected nodes.
-// This method is call internally in all ISel replacement calls.
+// achieved by selected nodes. As the conversion is reversable to the original
+// Id, topological pruning can still be leveraged when looking for unselected
+// nodes. This method is called internally in all ISel replacement related
+// functions.
void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
SmallVector<SDNode *, 4> Nodes;
Nodes.push_back(Node);
@@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) {
}
}
-// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a
+// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a
// NodeId with the equivalent node id which is invalid for topological
// pruning.
void SelectionDAGISel::InvalidateNodeId(SDNode *N) {
@@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB,
bool IsSingleCatchAllClause =
CPI->getNumArgOperands() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue();
- if (!IsSingleCatchAllClause) {
+ // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 []
+ // and they don't need LSDA info
+ bool IsCatchLongjmp = CPI->getNumArgOperands() == 0;
+ if (!IsSingleCatchAllClause && !IsCatchLongjmp) {
// Create a mapping from landing pad label to landing pad index.
bool IntrFound = false;
for (const User *U : CPI->users()) {
@@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
SDB->SPDescriptor.resetPerFunctionState();
}
-/// Given that the input MI is before a partial terminator sequence TSeq, return
-/// true if M + TSeq also a partial terminator sequence.
-///
-/// A Terminator sequence is a sequence of MachineInstrs which at this point in
-/// lowering copy vregs into physical registers, which are then passed into
-/// terminator instructors so we can satisfy ABI constraints. A partial
-/// terminator sequence is an improper subset of a terminator sequence (i.e. it
-/// may be the whole terminator sequence).
-static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
- // If we do not have a copy or an implicit def, we return true if and only if
- // MI is a debug value.
- if (!MI.isCopy() && !MI.isImplicitDef())
- // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the
- // physical registers if there is debug info associated with the terminator
- // of our mbb. We want to include said debug info in our terminator
- // sequence, so we return true in that case.
- return MI.isDebugValue();
-
- // We have left the terminator sequence if we are not doing one of the
- // following:
- //
- // 1. Copying a vreg into a physical register.
- // 2. Copying a vreg into a vreg.
- // 3. Defining a register via an implicit def.
-
- // OPI should always be a register definition...
- MachineInstr::const_mop_iterator OPI = MI.operands_begin();
- if (!OPI->isReg() || !OPI->isDef())
- return false;
-
- // Defining any register via an implicit def is always ok.
- if (MI.isImplicitDef())
- return true;
-
- // Grab the copy source...
- MachineInstr::const_mop_iterator OPI2 = OPI;
- ++OPI2;
- assert(OPI2 != MI.operands_end()
- && "Should have a copy implying we should have 2 arguments.");
-
- // Make sure that the copy dest is not a vreg when the copy source is a
- // physical register.
- if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
- Register::isPhysicalRegister(OPI2->getReg())))
- return false;
-
- return true;
-}
-
-/// Find the split point at which to splice the end of BB into its success stack
-/// protector check machine basic block.
-///
-/// On many platforms, due to ABI constraints, terminators, even before register
-/// allocation, use physical registers. This creates an issue for us since
-/// physical registers at this point can not travel across basic
-/// blocks. Luckily, selectiondag always moves physical registers into vregs
-/// when they enter functions and moves them through a sequence of copies back
-/// into the physical registers right before the terminator creating a
-/// ``Terminator Sequence''. This function is searching for the beginning of the
-/// terminator sequence so that we can ensure that we splice off not just the
-/// terminator, but additionally the copies that move the vregs into the
-/// physical registers.
-static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB,
- const TargetInstrInfo &TII) {
- MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
- if (SplitPoint == BB->begin())
- return SplitPoint;
-
- MachineBasicBlock::iterator Start = BB->begin();
- MachineBasicBlock::iterator Previous = SplitPoint;
- --Previous;
-
- if (TII.isTailCall(*SplitPoint) &&
- Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
- // call itself, then we must insert before the sequence even starts. For
- // example:
- // <split point>
- // ADJCALLSTACKDOWN ...
- // <Moves>
- // ADJCALLSTACKUP ...
- // TAILJMP somewhere
- // On the other hand, it could be an unrelated call in which case this tail call
- // has to register moves of its own and should be the split point. For example:
- // ADJCALLSTACKDOWN
- // CALL something_else
- // ADJCALLSTACKUP
- // <split point>
- // TAILJMP somewhere
- do {
- --Previous;
- if (Previous->isCall())
- return SplitPoint;
- } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
-
- return Previous;
- }
-
- while (MIIsInTerminatorSequence(*Previous)) {
- SplitPoint = Previous;
- if (Previous == Start)
- break;
- --Previous;
- }
-
- return SplitPoint;
-}
-
void
SelectionDAGISel::FinishBasicBlock() {
LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: "
@@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB, *TII);
+ findSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
@@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() {
// test, and delete the last bit test.
MachineBasicBlock *NextMBB;
- if (BTB.ContiguousRange && j + 2 == ej) {
- // Second-to-last bit-test with contiguous range: fall through to the
- // target of the final bit test.
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
+ // Second-to-last bit-test with contiguous range or omitted range
+ // check: fall through to the target of the final bit test.
NextMBB = BTB.Cases[j + 1].TargetBB;
} else if (j + 1 == ej) {
// For the last bit test, fall through to Default.
@@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() {
SDB->clear();
CodeGenAndEmitDAG();
- if (BTB.ContiguousRange && j + 2 == ej) {
+ if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) {
// Since we're not going to use the final bit test, remove it.
BTB.Cases.pop_back();
break;
@@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
else
Msg << "unknown intrinsic #" << iid;
}
- report_fatal_error(Msg.str());
+ report_fatal_error(Twine(Msg.str()));
}
char SelectionDAGISel::ID = 0;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index a903c2401264..e2db9633bfb9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
StatepointLoweringInfo SI(DAG);
unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
- SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+ SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee,
ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
false);
if (!VarArgDisallowed)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1c1dae8f953f..e4a69adff05b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
@@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
@@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
@@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
@@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_SUBVECTOR: {
- // If we don't demand the inserted subvector, return the base vector.
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ // If we don't demand the inserted subvector, return the base vector.
+ if (DemandedSubElts == 0)
return Vec;
+ // If this simply widens the lowest subvector, see if we can do it earlier.
+ if (Idx == 0 && Vec.isUndef()) {
+ if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), NewSub, Op.getOperand(2));
+ }
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
@@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
- APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
@@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
- DemandedBits = APInt::getAllOnesValue(BitWidth);
- DemandedElts = APInt::getAllOnesValue(NumElts);
+ DemandedBits = APInt::getAllOnes(BitWidth);
+ DemandedElts = APInt::getAllOnes(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
@@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
+ !DemandedSrcElts.isAllOnes()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
@@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
@@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue() &&
- DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
@@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If we can't turn this into a 'not', try to shrink the constant.
- if (!C || !C->isAllOnesValue())
+ if (!C || !C->isAllOnes())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
@@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits(
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedBits.isOneValue())
+ if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (const APInt *SA =
@@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
- if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+ if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
@@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- unsigned NumSignBits =
- TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
+ unsigned MinSignedBits =
+ TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1);
+ bool AlreadySignExtended = ExVTBits >= MinSignedBits;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Demand the bits from every vector element without a constant index.
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
@@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcBits.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
@@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
@@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits(
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
- if (C && !C->isAllOnesValue() && !C->isOne() &&
- (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ if (C && !C->isAllOnes() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnes()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
return SDValue();
};
- APInt KnownUndef = APInt::getNullValue(NumElts);
+ APInt KnownUndef = APInt::getZero(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// If both inputs for this element are either constant or undef and match
// the element type, compute the constant/undef result for this element of
@@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
- KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
@@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1);
- APInt SrcZero, SrcUndef;
- APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+ APInt SrcDemandedElts, SrcZero, SrcUndef;
// Bitcast from 'large element' src vector to 'small element' vector, we
// must demand a source element if any DemandedElt maps to it.
if ((NumElts % NumSrcElts) == 0) {
unsigned Scale = NumElts / NumSrcElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO - bigendian once we have test coverage.
if (TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
- APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Ofs = (i % Scale) * EltSizeInBits;
@@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// of this vector.
if ((NumSrcElts % NumElts) == 0) {
unsigned Scale = NumSrcElts / NumElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// the output element will be as well, assuming it was demanded.
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i]) {
- if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
KnownZero.setBit(i);
- if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
KnownUndef.setBit(i);
}
}
@@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
// Don't simplify BROADCASTS.
if (llvm::any_of(Op->op_values(),
[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
@@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
APInt SubUndef, SubZero;
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
@@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero.insertBits(SubZero, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcElts.isAllOnesValue() ||
- !DemandedSubElts.isAllOnesValue()) {
+ if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
@@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.extractBits(NumElts, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSrc) {
@@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
+
+ // zext - if we just need the bottom element then we can mask:
+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
+ if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
+ Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
+ SDLoc DL(Op);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ SmallVector<SDValue> MaskElts;
+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
+ }
+ }
}
break;
}
@@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
} else {
KnownBits Known;
- APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
TLO, Depth, AssumeSingleUse))
return true;
@@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
- return CVal.isOneValue();
+ return CVal.isOne();
case ZeroOrNegativeOneBooleanContent:
- return CVal.isAllOnesValue();
+ return CVal.isAllOnes();
}
llvm_unreachable("Invalid boolean contents");
@@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
- return CN->isNullValue();
+ return CN->isZero();
}
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
@@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
case TargetLowering::UndefinedBooleanContent:
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return N->isAllOnesValue() && SExt;
+ return N->isAllOnes() && SExt;
}
llvm_unreachable("Unexpected enumeration.");
}
@@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
auto *YConst = dyn_cast<ConstantSDNode>(Y);
- if (YConst && YConst->isNullValue())
+ if (YConst && YConst->isZero())
return SDValue();
// Transform this into: ~X & Y == 0.
@@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
assert(isConstOrConstSplat(N1C) &&
- isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
@@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
isPowerOf2_32(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
@@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isConstFalseVal(N1C) ||
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
- bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
- (!N1C->isNullValue() && Cond == ISD::SETNE);
+ bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
+ (!N1C->isZero() && Cond == ISD::SETNE);
if (!Inverse)
return TopSetCC;
@@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
- } else if ((N1C->isNullValue() || N1C->isOne()) &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ } else if ((N1C->isZero() || N1C->isOne()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
@@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
// icmp eq/ne %x, 0
- if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
@@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
}
+ // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
+ // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
+ N1C && N1C->isAllOnes()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, dl, OpVT),
+ Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
- if (C1.isNullValue())
+ if (C1.isZero())
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
@@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
- bool CmpZero = N1C->getAPIntValue().isNullValue();
- bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
+ bool CmpZero = N1C->getAPIntValue().isZero();
+ bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
@@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
- if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
@@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
if (N0.getOpcode() == ISD::UREM) {
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
@@ -4687,7 +4716,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
getSimpleValueType(DL, STy->getElementType(ResNo));
} else {
assert(ResNo == 0 && "Asm only has one result!");
- OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType());
+ OpInfo.ConstraintVT =
+ getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
}
++ResNo;
break;
@@ -5049,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
SmallVector<SDValue, 16> Shifts, Factors;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countTrailingZeros();
@@ -5151,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
const APInt &Divisor = C->getAPIntValue();
- APInt::ms magics = Divisor.magic();
+ SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
- if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ if (Divisor.isOne() || Divisor.isAllOnes()) {
// If d is +1/-1, we just multiply the numerator by +1/-1.
NumeratorFactor = Divisor.getSExtValue();
- magics.m = 0;
- magics.s = 0;
+ magics.Magic = 0;
+ magics.ShiftAmount = 0;
ShiftMask = 0;
- } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
// If d > 0 and m < 0, add the numerator.
NumeratorFactor = 1;
- } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
// If d < 0 and m > 0, subtract the numerator.
NumeratorFactor = -1;
}
- MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
- Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
return true;
};
@@ -5296,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- APInt::mu magics = Divisor.magicu();
+ UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
- if (magics.a != 0 && !Divisor[0]) {
+ if (magics.IsAdd != 0 && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
- magics = Divisor.lshr(PreShift).magicu(PreShift);
- assert(magics.a == 0 && "Should use cheap fixup now");
+ magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
}
- APInt Magic = magics.m;
+ APInt Magic = magics.Magic;
unsigned SelNPQ;
- if (magics.a == 0 || Divisor.isOneValue()) {
- assert(magics.s < Divisor.getBitWidth() &&
+ if (magics.IsAdd == 0 || Divisor.isOne()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- PostShift = magics.s;
+ PostShift = magics.ShiftAmount;
SelNPQ = false;
} else {
- PostShift = magics.s - 1;
+ PostShift = magics.ShiftAmount - 1;
SelNPQ = true;
}
@@ -5330,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
NPQFactors.push_back(
DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
- : APInt::getNullValue(EltBits),
+ : APInt::getZero(EltBits),
dl, SVT));
PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
UseNPQ |= SelNPQ;
@@ -5510,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (CDiv->isNullValue())
+ if (CDiv->isZero())
return false;
const APInt &D = CDiv->getAPIntValue();
const APInt &Cmp = CCmp->getAPIntValue();
- ComparingWithAllZeros &= Cmp.isNullValue();
+ ComparingWithAllZeros &= Cmp.isZero();
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
@@ -5528,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// If all lanes are tautological (either all divisors are ones, or divisor
// is not greater than the constant we are comparing with),
// we will prefer to avoid the fold.
- bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
HadTautologicalLanes |= TautologicalLane;
AllLanesAreTautological &= TautologicalLane;
// If we are comparing with non-zero, we need'll need to subtract said
// comparison value from the LHS. But there is no point in doing that if
// every lane where we are comparing with non-zero is tautological..
- if (!Cmp.isNullValue())
+ if (!Cmp.isZero())
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
// D is even if it has trailing zeros.
HadEvenDivisor |= (K != 0);
// D is a power-of-two if D0 is one.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5555,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
APInt Q, R;
- APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+ APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
// If we are comparing with zero, then that comparison constant is okay,
// else it may need to be one less than that.
if (Cmp.ugt(R))
Q -= 1;
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the lane is tautological the result can be constant-folded.
@@ -5751,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isZero())
return SDValue();
bool HadIntMinDivisor = false;
@@ -5764,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildSREMPattern = [&](ConstantSDNode *C) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
@@ -5777,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
HadIntMinDivisor |= D.isMinSignedValue();
// If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ HadOneDivisor |= D.isOne();
+ AllDivisorsAreOnes &= D.isOne();
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
if (!D.isMinSignedValue()) {
@@ -5793,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// D is a power-of-two if D0 is one. This includes INT_MIN.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5801,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
@@ -5817,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// Q = floor((2 * A) / (2^K))
APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
- assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
"We are expecting that A is always less than all-ones for SVT");
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the divisor is 1 the result can be constant-folded. Likewise, we
// don't care about INT_MIN lanes, those can be set to undef if appropriate.
- if (D.isOneValue()) {
+ if (D.isOne()) {
// Set P, A and K to a bogus values so we can try to splat them.
P = 0;
A = -1;
@@ -5950,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue IntMax = DAG.getConstant(
APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
SDValue Zero =
- DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+ DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
@@ -6776,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// the destination signmask can't be represented by the float, so we can
// just use FP_TO_SINT directly.
const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
- APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
@@ -6969,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
-bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+// Only expand vector types if we have the appropriate vector bit operations.
+static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
+ assert(VT.isVector() && "Expected vector type");
+ unsigned Len = VT.getScalarSizeInBits();
+ return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
+}
+
+SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -6980,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
// TODO: Add support for irregular type lengths.
if (!(Len <= 128 && Len % 8 == 0))
- return false;
+ return SDValue();
// Only expand vector types if we have the appropriate vector bit operations.
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
- !isOperationLegalOrCustom(ISD::SUB, VT) ||
- !isOperationLegalOrCustom(ISD::SRL, VT) ||
- (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
- return false;
+ if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
+ return SDValue();
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -7025,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
DAG.getConstant(Len - 8, dl, ShVT));
- Result = Op;
- return true;
+ return Op;
}
-bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7039,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::CTLZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
@@ -7051,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
- !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
- return false;
+ return SDValue();
// for now, we do this:
// x = x | (x >> 1);
@@ -7078,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
}
Op = DAG.getNOT(dl, Op, VT);
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
-bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
SDValue Op = Node->getOperand(0);
@@ -7091,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTTZ, VT)) {
- Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTTZ, VT))
+ return DAG.getNode(ISD::CTTZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
@@ -7103,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -7127,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
- Result =
- DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
- DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
- return true;
+ return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
}
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
-bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
- SelectionDAG &DAG, bool IsNegative) const {
+SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7148,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// abs(x) -> umin(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// Only expand vector types if we have the appropriate vector operations.
@@ -7177,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
if (!IsNegative) {
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- } else {
- // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+ return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
}
- return true;
+
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+ return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
@@ -7265,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
- APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
- APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
- APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
- APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
- APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
- APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
- // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
@@ -7802,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
EVT VecVT, const SDLoc &dl,
- unsigned NumSubElts) {
- if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
- return Idx;
+ ElementCount SubEC) {
+ assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
+ "Cannot index a scalable vector within a fixed-width vector");
- EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorMinNumElements();
- if (VecVT.isScalableVector()) {
+ unsigned NumSubElts = SubEC.getKnownMinValue();
+ EVT IdxVT = Idx.getValueType();
+
+ if (VecVT.isScalableVector() && !SubEC.isScalable()) {
// If this is a constant index and we know the value plus the number of the
// elements in the subvector minus one is less than the minimum number of
// elements then it's safe to return Idx.
@@ -7855,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
-
- // Scalable vectors don't need clamping as these are checked at compile time
- if (SubVecVT.isFixedLengthVector()) {
- assert(SubVecVT.getVectorElementType() == EltVT &&
- "Sub-vector must be a fixed vector with matching element type");
- Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
- SubVecVT.getVectorNumElements());
- }
+ assert(SubVecVT.getVectorElementType() == EltVT &&
+ "Sub-vector must be a vector with matching element type");
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
+ SubVecVT.getVectorElementCount());
EVT IdxVT = Index.getValueType();
+ if (SubVecVT.isScalableVector())
+ Index =
+ DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
@@ -7920,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isNullValue() && CC == ISD::SETEQ) {
+ if (C->isZero() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
@@ -7948,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
// Scaling is unimportant for bytes, canonicalize to unscaled.
- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
- IsScaledIndex = false;
- IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
- }
+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
+ return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
return IndexType;
}
@@ -8072,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
- // SatMax -> Overflow && SumDiff < 0
- // SatMin -> Overflow && SumDiff >= 0
+ // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
APInt MinVal = APInt::getSignedMinValue(BitWidth);
- APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
+ DAG.getConstant(BitWidth - 1, dl, VT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
@@ -8154,8 +8172,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
APInt MaxVal = APInt::getSignedMaxValue(VTSize);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ // Xor the inputs, if resulting sign bit is 0 the product will be
+ // positive, else negative.
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
return DAG.getSelect(dl, VT, Overflow, Result, Product);
} else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
SDValue Result =
@@ -8390,7 +8411,7 @@ void TargetLowering::expandSADDSUBO(
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
- if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ if (isOperationLegal(OpcSat, LHS.getValueType())) {
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
@@ -8443,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
if (VT.isVector())
- WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
- VT.getVectorNumElements());
+ WideVT =
+ EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
SDValue BottomHalf;
SDValue TopHalf;
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index c70620fd7532..7f9518e4c075 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -50,7 +50,6 @@ STATISTIC(NumFinished, "Number of splits finished");
STATISTIC(NumSimple, "Number of splits that were simple");
STATISTIC(NumCopies, "Number of copies inserted for splitting");
STATISTIC(NumRemats, "Number of rematerialized defs for splitting");
-STATISTIC(NumRepairs, "Number of invalid live ranges repaired");
//===----------------------------------------------------------------------===//
// Last Insert Point Analysis
@@ -160,7 +159,6 @@ void SplitAnalysis::clear() {
UseBlocks.clear();
ThroughBlocks.clear();
CurLI = nullptr;
- DidRepairRange = false;
}
/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -188,20 +186,7 @@ void SplitAnalysis::analyzeUses() {
UseSlots.end());
// Compute per-live block info.
- if (!calcLiveBlockInfo()) {
- // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
- // I am looking at you, RegisterCoalescer!
- DidRepairRange = true;
- ++NumRepairs;
- LLVM_DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
- const_cast<LiveIntervals&>(LIS)
- .shrinkToUses(const_cast<LiveInterval*>(CurLI));
- UseBlocks.clear();
- ThroughBlocks.clear();
- bool fixed = calcLiveBlockInfo();
- (void)fixed;
- assert(fixed && "Couldn't fix broken live interval");
- }
+ calcLiveBlockInfo();
LLVM_DEBUG(dbgs() << "Analyze counted " << UseSlots.size() << " instrs in "
<< UseBlocks.size() << " blocks, through "
@@ -210,11 +195,11 @@ void SplitAnalysis::analyzeUses() {
/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
/// where CurLI is live.
-bool SplitAnalysis::calcLiveBlockInfo() {
+void SplitAnalysis::calcLiveBlockInfo() {
ThroughBlocks.resize(MF.getNumBlockIDs());
NumThroughBlocks = NumGapBlocks = 0;
if (CurLI->empty())
- return true;
+ return;
LiveInterval::const_iterator LVI = CurLI->begin();
LiveInterval::const_iterator LVE = CurLI->end();
@@ -240,8 +225,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
ThroughBlocks.set(BI.MBB->getNumber());
// The range shouldn't end mid-block if there are no uses. This shouldn't
// happen.
- if (LVI->end < Stop)
- return false;
+ assert(LVI->end >= Stop && "range ends mid block with no uses");
} else {
// This block has uses. Find the first and last uses in the block.
BI.FirstInstr = *UseI;
@@ -312,7 +296,6 @@ bool SplitAnalysis::calcLiveBlockInfo() {
}
assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
- return true;
}
unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
@@ -529,19 +512,12 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
| getInternalReadRegState(!FirstCopy), SubIdx)
.addReg(FromReg, 0, SubIdx);
- BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (FirstCopy) {
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
} else {
CopyMI->bundleWithPred();
}
- LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
- DestLI.refineSubRanges(Allocator, LaneMask,
- [Def, &Allocator](LiveInterval::SubRange &SR) {
- SR.createDeadDef(Def, Allocator);
- },
- Indexes, TRI);
return Def;
}
@@ -549,11 +525,11 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
// The full vreg is copied.
MachineInstr *CopyMI =
BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
- SlotIndexes &Indexes = *LIS.getSlotIndexes();
return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
}
@@ -567,18 +543,26 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
- SmallVector<unsigned, 8> Indexes;
+ SmallVector<unsigned, 8> SubIndexes;
// Abort if we cannot possibly implement the COPY with the given indexes.
- if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes))
+ if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes))
report_fatal_error("Impossible to implement partial COPY");
SlotIndex Def;
- for (unsigned BestIdx : Indexes) {
+ for (unsigned BestIdx : SubIndexes) {
Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
DestLI, Late, Def);
}
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ DestLI.refineSubRanges(
+ Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange &SR) {
+ SR.createDeadDef(Def, Allocator);
+ },
+ Indexes, TRI);
+
return Def;
}
diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h
index fbcffacb49ab..902546fe16d8 100644
--- a/llvm/lib/CodeGen/SplitKit.h
+++ b/llvm/lib/CodeGen/SplitKit.h
@@ -160,14 +160,11 @@ private:
/// NumThroughBlocks - Number of live-through blocks.
unsigned NumThroughBlocks;
- /// DidRepairRange - analyze was forced to shrinkToUses().
- bool DidRepairRange;
-
// Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
/// calcLiveBlockInfo - Compute per-block information about CurLI.
- bool calcLiveBlockInfo();
+ void calcLiveBlockInfo();
public:
SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
@@ -177,11 +174,6 @@ public:
/// split.
void analyze(const LiveInterval *li);
- /// didRepairRange() - Returns true if CurLI was invalid and has been repaired
- /// by analyze(). This really shouldn't happen, but sometimes the coalescer
- /// can create live ranges that end in mid-air.
- bool didRepairRange() const { return DidRepairRange; }
-
/// clear - clear all data structures so SplitAnalysis is ready to analyze a
/// new interval.
void clear();
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 162f3aab024d..623d5da9831e 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -687,6 +687,8 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
// Walk the instructions in the block to look for start/end ops.
for (MachineInstr &MI : *MBB) {
+ if (MI.isDebugInstr())
+ continue;
if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
MI.getOpcode() == TargetOpcode::LIFETIME_END) {
int Slot = getStartOrEndSlot(MI);
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 9f229d51b985..7445f77c955d 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -148,10 +148,8 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return false;
bool NeedsProtector = false;
- for (StructType::element_iterator I = ST->element_begin(),
- E = ST->element_end();
- I != E; ++I)
- if (ContainsProtectableArray(*I, IsLarge, Strong, true)) {
+ for (Type *ET : ST->elements())
+ if (ContainsProtectableArray(ET, IsLarge, Strong, true)) {
// If the element is a protectable array and is large (>= SSPBufferSize)
// then we are done. If the protectable array is not large, then
// keep looking in case a subsequent element is a large array.
@@ -436,13 +434,11 @@ bool StackProtector::InsertStackProtectors() {
// protection in SDAG.
bool SupportsSelectionDAGSP =
TLI->useStackGuardXorFP() ||
- (EnableSelectionDAGSP && !TM->Options.EnableFastISel &&
- !TM->Options.EnableGlobalISel);
- AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+ (EnableSelectionDAGSP && !TM->Options.EnableFastISel);
+ AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
- for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
- BasicBlock *BB = &*I++;
- ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+ for (BasicBlock &BB : llvm::make_early_inc_range(*F)) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator());
if (!RI)
continue;
@@ -530,23 +526,23 @@ bool StackProtector::InsertStackProtectors() {
// Split the basic block before the return instruction.
BasicBlock *NewBB =
- BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
+ BB.splitBasicBlock(CheckLoc->getIterator(), "SP_return");
// Update the dominator tree if we need to.
- if (DT && DT->isReachableFromEntry(BB)) {
- DT->addNewBlock(NewBB, BB);
- DT->addNewBlock(FailBB, BB);
+ if (DT && DT->isReachableFromEntry(&BB)) {
+ DT->addNewBlock(NewBB, &BB);
+ DT->addNewBlock(FailBB, &BB);
}
// Remove default branch instruction to the new BB.
- BB->getTerminator()->eraseFromParent();
+ BB.getTerminator()->eraseFromParent();
// Move the newly created basic block to the point right after the old
// basic block so that it's in the "fall through" position.
- NewBB->moveAfter(BB);
+ NewBB->moveAfter(&BB);
// Generate the stack protector instructions in the old basic block.
- IRBuilder<> B(BB);
+ IRBuilder<> B(&BB);
Value *Guard = getStackGuard(TLI, M, B);
LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
Value *Cmp = B.CreateICmpEQ(Guard, LI2);
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index ebe00bd7402f..9aea5a7a8853 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -169,7 +169,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
if (!LS->hasInterval(FI))
continue;
LiveInterval &li = LS->getInterval(FI);
- if (!MI.isDebugValue())
+ if (!MI.isDebugInstr())
li.incrementWeight(
LiveIntervals::getSpillWeight(false, true, MBFI, MI));
}
diff --git a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
index dfcec32d9537..36a02d5beb4b 100644
--- a/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
+++ b/llvm/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -405,7 +405,7 @@ bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
// Optimize the case where all the case values fit in a word without having
// to subtract minValue. In this case, we can optimize away the subtraction.
- LowBound = APInt::getNullValue(Low.getBitWidth());
+ LowBound = APInt::getZero(Low.getBitWidth());
CmpRange = High;
ContiguousRange = false;
} else {
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index af735f2a0216..943bd18c6c8b 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -70,6 +70,12 @@ static cl::opt<unsigned> TailDupIndirectBranchSize(
"end with indirect branches."), cl::init(20),
cl::Hidden);
+static cl::opt<unsigned> TailDupJmpTableLoopSize(
+ "tail-dup-jmptable-loop-size",
+ cl::desc("Maximum loop latches to consider tail duplication that are "
+ "successors of loop header."),
+ cl::init(128), cl::Hidden);
+
static cl::opt<bool>
TailDupVerify("tail-dup-verify",
cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -100,12 +106,11 @@ void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
}
static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
- for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
- SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
- MBB->pred_end());
- MachineBasicBlock::iterator MI = MBB->begin();
- while (MI != MBB->end()) {
+ for (MachineBasicBlock &MBB : llvm::drop_begin(MF)) {
+ SmallSetVector<MachineBasicBlock *, 8> Preds(MBB.pred_begin(),
+ MBB.pred_end());
+ MachineBasicBlock::iterator MI = MBB.begin();
+ while (MI != MBB.end()) {
if (!MI->isPHI())
break;
for (MachineBasicBlock *PredBB : Preds) {
@@ -118,7 +123,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
}
}
if (!Found) {
- dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
<< *MI;
dbgs() << " missing input from predecessor "
<< printMBBReference(*PredBB) << '\n';
@@ -129,14 +134,14 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
if (CheckExtra && !Preds.count(PHIBB)) {
- dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB)
+ dbgs() << "Warning: malformed PHI in " << printMBBReference(MBB)
<< ": " << *MI;
dbgs() << " extra input from predecessor "
<< printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
}
if (PHIBB->getNumber() < 0) {
- dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
+ dbgs() << "Malformed PHI in " << printMBBReference(MBB) << ": "
<< *MI;
dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n';
llvm_unreachable(nullptr);
@@ -279,18 +284,17 @@ bool TailDuplicator::tailDuplicateBlocks() {
VerifyPHIs(*MF, true);
}
- for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
- MachineBasicBlock *MBB = &*I++;
-
+ for (MachineBasicBlock &MBB :
+ llvm::make_early_inc_range(llvm::drop_begin(*MF))) {
if (NumTails == TailDupLimit)
break;
- bool IsSimple = isSimpleBB(MBB);
+ bool IsSimple = isSimpleBB(&MBB);
- if (!shouldTailDuplicate(IsSimple, *MBB))
+ if (!shouldTailDuplicate(IsSimple, MBB))
continue;
- MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
+ MadeChange |= tailDuplicateAndUpdate(IsSimple, &MBB, nullptr);
}
if (PreRegAlloc && TailDupVerify)
@@ -565,6 +569,29 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
if (TailBB.isSuccessor(&TailBB))
return false;
+ // When doing tail-duplication with jumptable loops like:
+ // 1 -> 2 <-> 3 |
+ // \ <-> 4 |
+ // \ <-> 5 |
+ // \ <-> ... |
+ // \---> rest |
+ // quadratic number of edges and much more loops are added to CFG. This
+ // may cause compile time regression when jumptable is quiet large.
+ // So set the limit on jumptable cases.
+ auto isLargeJumpTableLoop = [](const MachineBasicBlock &TailBB) {
+ const SmallPtrSet<const MachineBasicBlock *, 8> Preds(TailBB.pred_begin(),
+ TailBB.pred_end());
+ // Check the basic block has large number of successors, all of them only
+ // have one successor which is the basic block itself.
+ return llvm::count_if(
+ TailBB.successors(), [&](const MachineBasicBlock *SuccBB) {
+ return Preds.count(SuccBB) && SuccBB->succ_size() == 1;
+ }) > TailDupJmpTableLoopSize;
+ };
+
+ if (isLargeJumpTableLoop(TailBB))
+ return false;
+
// Set the limit on the cost to duplicate. When optimizing for size,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
@@ -874,18 +901,15 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// Clone the contents of TailBB into PredBB.
DenseMap<Register, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
- I != E; /* empty */) {
- MachineInstr *MI = &*I;
- ++I;
- if (MI->isPHI()) {
+ for (MachineInstr &MI : llvm::make_early_inc_range(*TailBB)) {
+ if (MI.isPHI()) {
// Replace the uses of the def of the PHI with the register coming
// from PredBB.
- processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
+ processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
} else {
// Replace def of virtual registers with new registers, and update
// uses with PHI source register or the new registers.
- duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
+ duplicateInstruction(&MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
}
}
appendCopies(PredBB, CopyInfos, Copies);
@@ -930,44 +954,56 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// There may be a branch to the layout successor. This is unlikely but it
// happens. The correct thing to do is to remove the branch before
// duplicating the instructions in all cases.
- TII->removeBranch(*PrevBB);
- if (PreRegAlloc) {
- DenseMap<Register, RegSubRegPair> LocalVRMap;
- SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
- // Process PHI instructions first.
- while (I != TailBB->end() && I->isPHI()) {
- // Replace the uses of the def of the PHI with the register coming
- // from PredBB.
- MachineInstr *MI = &*I++;
- processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
- }
+ bool RemovedBranches = TII->removeBranch(*PrevBB) != 0;
+
+ // If there are still tail instructions, abort the merge
+ if (PrevBB->getFirstTerminator() == PrevBB->end()) {
+ if (PreRegAlloc) {
+ DenseMap<Register, RegSubRegPair> LocalVRMap;
+ SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->isPHI()) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi,
+ true);
+ }
- // Now copy the non-PHI instructions.
- while (I != TailBB->end()) {
- // Replace def of virtual registers with new registers, and update
- // uses with PHI source register or the new registers.
- MachineInstr *MI = &*I++;
- assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
- duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
- MI->eraseFromParent();
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
+ duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
+ MI->eraseFromParent();
+ }
+ appendCopies(PrevBB, CopyInfos, Copies);
+ } else {
+ TII->removeBranch(*PrevBB);
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
}
- appendCopies(PrevBB, CopyInfos, Copies);
- } else {
- TII->removeBranch(*PrevBB);
- // No PHIs to worry about, just splice the instructions over.
- PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
- }
- PrevBB->removeSuccessor(PrevBB->succ_begin());
- assert(PrevBB->succ_empty());
- PrevBB->transferSuccessors(TailBB);
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
- // Update branches in PrevBB based on Tail's layout successor.
- if (ShouldUpdateTerminators)
- PrevBB->updateTerminator(TailBB->getNextNode());
+ // Update branches in PrevBB based on Tail's layout successor.
+ if (ShouldUpdateTerminators)
+ PrevBB->updateTerminator(TailBB->getNextNode());
- TDBBs.push_back(PrevBB);
- Changed = true;
+ TDBBs.push_back(PrevBB);
+ Changed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Abort merging blocks, the predecessor still "
+ "contains terminator instructions");
+ // Return early if no changes were made
+ if (!Changed)
+ return RemovedBranches;
+ }
+ Changed |= RemovedBranches;
}
// If this is after register allocation, there are no phis to fix.
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 2e4a656ea0c8..e74b3195a130 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -366,7 +366,7 @@ bool TargetInstrInfo::hasLoadFromStackSlot(
oe = MI.memoperands_end();
o != oe; ++o) {
if ((*o)->isLoad() &&
- dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
Accesses.push_back(*o);
}
return Accesses.size() != StartSize;
@@ -380,7 +380,7 @@ bool TargetInstrInfo::hasStoreToStackSlot(
oe = MI.memoperands_end();
o != oe; ++o) {
if ((*o)->isStore() &&
- dyn_cast_or_null<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
+ isa_and_nonnull<FixedStackPseudoSourceValue>((*o)->getPseudoValue()))
Accesses.push_back(*o);
}
return Accesses.size() != StartSize;
@@ -1264,22 +1264,6 @@ int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
}
-/// If we can determine the operand latency from the def only, without itinerary
-/// lookup, do so. Otherwise return -1.
-int TargetInstrInfo::computeDefOperandLatency(
- const InstrItineraryData *ItinData, const MachineInstr &DefMI) const {
-
- // Let the target hook getInstrLatency handle missing itineraries.
- if (!ItinData)
- return getInstrLatency(ItinData, DefMI);
-
- if(ItinData->isEmpty())
- return defaultDefLatency(ItinData->SchedModel, DefMI);
-
- // ...operand lookup required
- return -1;
-}
-
bool TargetInstrInfo::getRegSequenceInputs(
const MachineInstr &MI, unsigned DefIdx,
SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 3c5dd29036db..c0a7efff9e98 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -52,6 +52,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -236,6 +237,8 @@ RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
return FPEXT_F16_F32;
if (RetVT == MVT::f64)
return FPEXT_F16_F64;
+ if (RetVT == MVT::f80)
+ return FPEXT_F16_F80;
if (RetVT == MVT::f128)
return FPEXT_F16_F128;
} else if (OpVT == MVT::f32) {
@@ -659,7 +662,7 @@ RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) {
/// InitCmpLibcallCCs - Set default comparison libcall CC.
static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
- memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+ std::fill(CCs, CCs + RTLIB::UNKNOWN_LIBCALL, ISD::SETCC_INVALID);
CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
CCs[RTLIB::OEQ_F128] = ISD::SETEQ;
@@ -896,8 +899,6 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
- setOperationAction(ISD::FROUND, VT, Expand);
- setOperationAction(ISD::FROUNDEVEN, VT, Expand);
setOperationAction(ISD::LROUND, VT, Expand);
setOperationAction(ISD::LLROUND, VT, Expand);
setOperationAction(ISD::LRINT, VT, Expand);
@@ -924,8 +925,15 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
if (LHSTy.isVector())
return LHSTy;
- return LegalTypes ? getScalarShiftAmountTy(DL, LHSTy)
- : getPointerTy(DL);
+ MVT ShiftVT =
+ LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL);
+ // If any possible shift value won't fit in the prefered type, just use
+ // something safe. Assume it will be legalized when the shift is expanded.
+ if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits()))
+ ShiftVT = MVT::i32;
+ assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) &&
+ "ShiftVT is still too small!");
+ return ShiftVT;
}
bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
@@ -1556,7 +1564,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
// Scalable vectors cannot be scalarized, so handle the legalisation of the
// types like done elsewhere in SelectionDAG.
- if (VT.isScalableVector() && !isPowerOf2_32(EltCnt.getKnownMinValue())) {
+ if (EltCnt.isScalable()) {
LegalizeKind LK;
EVT PartVT = VT;
do {
@@ -1565,16 +1573,14 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
PartVT = LK.second;
} while (LK.first != TypeLegal);
- NumIntermediates = VT.getVectorElementCount().getKnownMinValue() /
- PartVT.getVectorElementCount().getKnownMinValue();
+ if (!PartVT.isVector()) {
+ report_fatal_error(
+ "Don't know how to legalize this scalable vector type");
+ }
- // FIXME: This code needs to be extended to handle more complex vector
- // breakdowns, like nxv7i64 -> nxv8i64 -> 4 x nxv2i64. Currently the only
- // supported cases are vectors that are broken down into equal parts
- // such as nxv6i64 -> 3 x nxv2i64.
- assert((PartVT.getVectorElementCount() * NumIntermediates) ==
- VT.getVectorElementCount() &&
- "Expected an integer multiple of PartVT");
+ NumIntermediates =
+ divideCeil(VT.getVectorElementCount().getKnownMinValue(),
+ PartVT.getVectorElementCount().getKnownMinValue());
IntermediateVT = PartVT;
RegisterVT = getRegisterType(Context, IntermediateVT);
return NumIntermediates;
@@ -1657,9 +1663,9 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
EVT VT = ValueVTs[j];
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (attr.hasRetAttr(Attribute::SExt))
ExtendKind = ISD::SIGN_EXTEND;
- else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasRetAttr(Attribute::ZExt))
ExtendKind = ISD::ZERO_EXTEND;
// FIXME: C calling convention requires the return type to be promoted to
@@ -1679,13 +1685,13 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::InReg))
+ if (attr.hasRetAttr(Attribute::InReg))
Flags.setInReg();
// Propagate extension type if any
- if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ if (attr.hasRetAttr(Attribute::SExt))
Flags.setSExt();
- else if (attr.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt))
+ else if (attr.hasRetAttr(Attribute::ZExt))
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
@@ -1696,7 +1702,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
-unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
+uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
return DL.getABITypeAlign(Ty).value();
}
@@ -1749,8 +1755,9 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, LLT Ty,
const MachineMemOperand &MMO,
bool *Fast) const {
- return allowsMemoryAccess(Context, DL, getMVTForLLT(Ty), MMO.getAddrSpace(),
- MMO.getAlign(), MMO.getFlags(), Fast);
+ EVT VT = getApproximateEVTForLLT(Ty, DL, Context);
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(), MMO.getAlign(),
+ MMO.getFlags(), Fast);
}
//===----------------------------------------------------------------------===//
@@ -1849,8 +1856,12 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
while (true) {
LegalizeKind LK = getTypeConversion(C, MTy);
- if (LK.first == TypeScalarizeScalableVector)
- return std::make_pair(InstructionCost::getInvalid(), MVT::getVT(Ty));
+ if (LK.first == TypeScalarizeScalableVector) {
+ // Ensure we return a sensible simple VT here, since many callers of this
+ // function require it.
+ MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
+ return std::make_pair(InstructionCost::getInvalid(), VT);
+ }
if (LK.first == TypeLegal)
return std::make_pair(Cost, MTy.getSimpleVT());
@@ -1980,8 +1991,11 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
auto *GV = new GlobalVariable(M, Type::getInt8PtrTy(M.getContext()), false,
GlobalVariable::ExternalLinkage, nullptr,
"__stack_chk_guard");
+
+ // FreeBSD has "__stack_chk_guard" defined externally on libc.so
if (TM.getRelocationModel() == Reloc::Static &&
- !TM.getTargetTriple().isWindowsGNUEnvironment())
+ !TM.getTargetTriple().isWindowsGNUEnvironment() &&
+ !TM.getTargetTriple().isOSFreeBSD())
GV->setDSOLocal(true);
}
}
@@ -2020,6 +2034,12 @@ bool TargetLoweringBase::isJumpTableRelative() const {
return getTargetMachine().isPositionIndependent();
}
+Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const {
+ if (TM.Options.LoopAlignment)
+ return Align(TM.Options.LoopAlignment);
+ return PrefLoopAlignment;
+}
+
//===----------------------------------------------------------------------===//
// Reciprocal Estimates
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index add34eccc1f3..1d3bb286c882 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -677,8 +677,9 @@ calcUniqueIDUpdateFlagsAndSize(const GlobalObject *GO, StringRef SectionName,
}
if (Retain) {
- if (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))
+ if ((Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
+ !TM.getTargetTriple().isOSSolaris())
Flags |= ELF::SHF_GNU_RETAIN;
return NextUniqueID++;
}
@@ -855,8 +856,10 @@ static MCSection *selectELFSectionForGlobal(
EmitUniqueSection = true;
Flags |= ELF::SHF_LINK_ORDER;
}
- if (Retain && (Ctx.getAsmInfo()->useIntegratedAssembler() ||
- Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36))) {
+ if (Retain &&
+ (Ctx.getAsmInfo()->useIntegratedAssembler() ||
+ Ctx.getAsmInfo()->binutilsIsAtLeast(2, 36)) &&
+ !TM.getTargetTriple().isOSSolaris()) {
EmitUniqueSection = true;
Flags |= ELF::SHF_GNU_RETAIN;
}
@@ -1492,7 +1495,7 @@ void TargetLoweringObjectFileMachO::getNameWithPrefix(
SmallVectorImpl<char> &OutName, const GlobalValue *GV,
const TargetMachine &TM) const {
bool CannotUsePrivateLabel = true;
- if (auto *GO = GV->getBaseObject()) {
+ if (auto *GO = GV->getAliaseeObject()) {
SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);
const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);
CannotUsePrivateLabel =
@@ -1563,7 +1566,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
if (const Comdat *C = GV->getComdat()) {
const GlobalValue *ComdatKey = getComdatGVForCOFF(GV);
if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey))
- ComdatKey = GA->getBaseObject();
+ ComdatKey = GA->getAliaseeObject();
if (ComdatKey == GV) {
switch (C->getSelectionKind()) {
case Comdat::Any:
@@ -1942,7 +1945,7 @@ static std::string APIntToHexString(const APInt &AI) {
static std::string scalarConstantToHexString(const Constant *C) {
Type *Ty = C->getType();
if (isa<UndefValue>(C)) {
- return APIntToHexString(APInt::getNullValue(Ty->getPrimitiveSizeInBits()));
+ return APIntToHexString(APInt::getZero(Ty->getPrimitiveSizeInBits()));
} else if (const auto *CFP = dyn_cast<ConstantFP>(C)) {
return APIntToHexString(CFP->getValueAPF().bitcastToAPInt());
} else if (const auto *CI = dyn_cast<ConstantInt>(C)) {
@@ -2414,7 +2417,20 @@ bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
MCSection *TargetLoweringObjectFileXCOFF::getSectionForConstant(
const DataLayout &DL, SectionKind Kind, const Constant *C,
Align &Alignment) const {
- //TODO: Enable emiting constant pool to unique sections when we support it.
+ // TODO: Enable emiting constant pool to unique sections when we support it.
+ if (Alignment > Align(16))
+ report_fatal_error("Alignments greater than 16 not yet supported.");
+
+ if (Alignment == Align(8)) {
+ assert(ReadOnly8Section && "Section should always be initialized.");
+ return ReadOnly8Section;
+ }
+
+ if (Alignment == Align(16)) {
+ assert(ReadOnly16Section && "Section should always be initialized.");
+ return ReadOnly16Section;
+ }
+
return ReadOnlySection;
}
@@ -2443,7 +2459,8 @@ MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
- report_fatal_error("XCOFF not yet implemented.");
+ /* Not implemented yet, but don't crash, return nullptr. */
+ return nullptr;
}
XCOFF::StorageClass
@@ -2473,12 +2490,12 @@ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(const GlobalValue *GV) {
MCSymbol *TargetLoweringObjectFileXCOFF::getFunctionEntryPointSymbol(
const GlobalValue *Func, const TargetMachine &TM) const {
- assert(
- (isa<Function>(Func) ||
- (isa<GlobalAlias>(Func) &&
- isa_and_nonnull<Function>(cast<GlobalAlias>(Func)->getBaseObject()))) &&
- "Func must be a function or an alias which has a function as base "
- "object.");
+ assert((isa<Function>(Func) ||
+ (isa<GlobalAlias>(Func) &&
+ isa_and_nonnull<Function>(
+ cast<GlobalAlias>(Func)->getAliaseeObject()))) &&
+ "Func must be a function or an alias which has a function as base "
+ "object.");
SmallString<128> NameStr;
NameStr.push_back('.');
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 4024fd452fc4..402e21d3708b 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -172,6 +172,24 @@ static cl::opt<bool>
FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
cl::desc("Do not insert FS-AFDO discriminators before "
"emit."));
+// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
+// tuning purpose.
+static cl::opt<bool> DisableRAFSProfileLoader(
+ "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before RegAlloc"));
+// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
+// and tuning purpose.
+static cl::opt<bool> DisableLayoutFSProfileLoader(
+ "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
+ cl::desc("Disable MIRProfileLoader before BlockPlacement"));
+// Specify FSProfile file name.
+static cl::opt<std::string>
+ FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile file name."), cl::Hidden);
+// Specify Remapping file for FSProfile.
+static cl::opt<std::string> FSRemappingFile(
+ "fs-remapping-file", cl::init(""), cl::value_desc("filename"),
+ cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);
// Temporary option to allow experimenting with MachineScheduler as a post-RA
// scheduler. Targets can "properly" enable this with
@@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
return TargetID;
}
+// Find the FSProfile file name. The internal option takes the precedence
+// before getting from TargetMachine.
+static const std::string getFSProfileFile(const TargetMachine *TM) {
+ if (!FSProfileFile.empty())
+ return FSProfileFile.getValue();
+ const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileFile;
+}
+
+// Find the Profile remapping file name. The internal option takes the
+// precedence before getting from TargetMachine.
+static const std::string getFSRemappingFile(const TargetMachine *TM) {
+ if (!FSRemappingFile.empty())
+ return FSRemappingFile.getValue();
+ const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+ if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+ return std::string();
+ return PGOOpt->ProfileRemappingFile;
+}
+
//===---------------------------------------------------------------------===//
/// TargetPassConfig
//===---------------------------------------------------------------------===//
@@ -321,12 +361,9 @@ namespace {
struct InsertedPass {
AnalysisID TargetPassID;
IdentifyingPassPtr InsertedPassID;
- bool VerifyAfter;
- InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter)
- : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
- VerifyAfter(VerifyAfter) {}
+ InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID)
+ : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID) {}
Pass *getInsertedPass() const {
assert(InsertedPassID.isValid() && "Illegal Pass ID!");
@@ -601,14 +638,13 @@ CodeGenOpt::Level TargetPassConfig::getOptLevel() const {
/// Insert InsertedPassID pass after TargetPassID.
void TargetPassConfig::insertPass(AnalysisID TargetPassID,
- IdentifyingPassPtr InsertedPassID,
- bool VerifyAfter) {
+ IdentifyingPassPtr InsertedPassID) {
assert(((!InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getID()) ||
(InsertedPassID.isInstance() &&
TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
"Insert a pass after itself!");
- Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter);
+ Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID);
}
/// createPassConfig - Create a pass configuration object to be used by
@@ -686,7 +722,7 @@ bool TargetPassConfig::isPassSubstitutedOrOverridden(AnalysisID ID) const {
/// a later pass or that it should stop after an earlier pass, then do not add
/// the pass. Finally, compare the current pass against the StartAfter
/// and StopAfter options and change the Started/Stopped flags accordingly.
-void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
+void TargetPassConfig::addPass(Pass *P) {
assert(!Initialized && "PassConfig is immutable");
// Cache the Pass ID here in case the pass manager finds this pass is
@@ -704,16 +740,16 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
addMachinePrePasses();
std::string Banner;
// Construct banner message before PM->add() as that may delete the pass.
- if (AddingMachinePasses && verifyAfter)
+ if (AddingMachinePasses)
Banner = std::string("After ") + std::string(P->getPassName());
PM->add(P);
if (AddingMachinePasses)
- addMachinePostPasses(Banner, /*AllowVerify*/ verifyAfter);
+ addMachinePostPasses(Banner);
// Add the passes after the pass P if there is any.
for (const auto &IP : Impl->InsertedPasses) {
if (IP.TargetPassID == PassID)
- addPass(IP.getInsertedPass(), IP.VerifyAfter);
+ addPass(IP.getInsertedPass());
}
} else {
delete P;
@@ -733,7 +769,7 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter) {
///
/// addPass cannot return a pointer to the pass instance because is internal the
/// PassManager and the instance we create here may already be freed.
-AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
+AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
if (!FinalPtr.isValid())
@@ -748,7 +784,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter) {
llvm_unreachable("Pass ID not registered");
}
AnalysisID FinalID = P->getPassID();
- addPass(P, verifyAfter); // Ends the lifetime of P.
+ addPass(P); // Ends the lifetime of P.
return FinalID;
}
@@ -792,8 +828,7 @@ void TargetPassConfig::addMachinePrePasses(bool AllowDebugify) {
addDebugifyPass();
}
-void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
- bool AllowVerify, bool AllowStrip) {
+void TargetPassConfig::addMachinePostPasses(const std::string &Banner) {
if (DebugifyIsSafe) {
if (DebugifyCheckAndStripAll == cl::BOU_TRUE) {
addCheckDebugPass();
@@ -801,8 +836,7 @@ void TargetPassConfig::addMachinePostPasses(const std::string &Banner,
} else if (DebugifyAndStripAll == cl::BOU_TRUE)
addStripDebugPass();
}
- if (AllowVerify)
- addVerifyPass(Banner);
+ addVerifyPass(Banner);
}
/// Add common target configurable passes that perform LLVM IR to IR transforms
@@ -1113,6 +1147,18 @@ void TargetPassConfig::addMachinePasses() {
// where it becomes safe again so stop debugifying here.
DebugifyIsSafe = false;
+ // Add a FSDiscriminator pass right before RA, so that we could get
+ // more precise SampleFDO profile for RA.
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass1));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
+ addPass(
+ createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass1));
+ }
+
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
@@ -1123,7 +1169,7 @@ void TargetPassConfig::addMachinePasses() {
// Run post-ra passes.
addPostRegAlloc();
- addPass(&RemoveRedundantDebugValuesID, false);
+ addPass(&RemoveRedundantDebugValuesID);
addPass(&FixupStatepointCallerSavedID);
@@ -1165,7 +1211,7 @@ void TargetPassConfig::addMachinePasses() {
// GC
if (addGCPasses()) {
if (PrintGCInfo)
- addPass(createGCInfoPrinter(dbgs()), false);
+ addPass(createGCInfoPrinter(dbgs()));
}
// Basic block placement.
@@ -1195,10 +1241,10 @@ void TargetPassConfig::addMachinePasses() {
// FIXME: Some backends are incompatible with running the verifier after
// addPreEmitPass. Maybe only pass "false" here for those targets?
- addPass(&FuncletLayoutID, false);
+ addPass(&FuncletLayoutID);
- addPass(&StackMapLivenessID, false);
- addPass(&LiveDebugValuesID, false);
+ addPass(&StackMapLivenessID);
+ addPass(&LiveDebugValuesID);
if (TM->Options.EnableMachineOutliner && getOptLevel() != CodeGenOpt::None &&
EnableMachineOutliner != RunOutliner::NeverOutline) {
@@ -1224,10 +1270,6 @@ void TargetPassConfig::addMachinePasses() {
// Add passes that directly emit MI after all other MI passes.
addPreEmitPass2();
- // Insert pseudo probe annotation for callsite profiling
- if (TM->Options.PseudoProbeForProfiling)
- addPass(createPseudoProbeInserter());
-
AddingMachinePasses = false;
}
@@ -1369,8 +1411,8 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
void TargetPassConfig::addFastRegAlloc() {
- addPass(&PHIEliminationID, false);
- addPass(&TwoAddressInstructionPassID, false);
+ addPass(&PHIEliminationID);
+ addPass(&TwoAddressInstructionPassID);
addRegAssignAndRewriteFast();
}
@@ -1379,9 +1421,9 @@ void TargetPassConfig::addFastRegAlloc() {
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
void TargetPassConfig::addOptimizedRegAlloc() {
- addPass(&DetectDeadLanesID, false);
+ addPass(&DetectDeadLanesID);
- addPass(&ProcessImplicitDefsID, false);
+ addPass(&ProcessImplicitDefsID);
// LiveVariables currently requires pure SSA form.
//
@@ -1393,18 +1435,18 @@ void TargetPassConfig::addOptimizedRegAlloc() {
// When LiveVariables is removed this has to be removed/moved either.
// Explicit addition of UnreachableMachineBlockElim allows stopping before or
// after it with -stop-before/-stop-after.
- addPass(&UnreachableMachineBlockElimID, false);
- addPass(&LiveVariablesID, false);
+ addPass(&UnreachableMachineBlockElimID);
+ addPass(&LiveVariablesID);
// Edge splitting is smarter with machine loop info.
- addPass(&MachineLoopInfoID, false);
- addPass(&PHIEliminationID, false);
+ addPass(&MachineLoopInfoID);
+ addPass(&PHIEliminationID);
// Eventually, we want to run LiveIntervals before PHI elimination.
if (EarlyLiveIntervals)
- addPass(&LiveIntervalsID, false);
+ addPass(&LiveIntervalsID);
- addPass(&TwoAddressInstructionPassID, false);
+ addPass(&TwoAddressInstructionPassID);
addPass(&RegisterCoalescerID);
// The machine scheduler may accidentally create disconnected components
@@ -1417,9 +1459,6 @@ void TargetPassConfig::addOptimizedRegAlloc() {
if (addRegAssignAndRewriteOptimized()) {
// Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
addPass(&StackSlotColoringID);
// Allow targets to expand pseudo instructions depending on the choice of
@@ -1459,12 +1498,21 @@ void TargetPassConfig::addMachineLateOptimization() {
/// Add standard GC passes.
bool TargetPassConfig::addGCPasses() {
- addPass(&GCMachineCodeAnalysisID, false);
+ addPass(&GCMachineCodeAnalysisID);
return true;
}
/// Add standard basic block placement passes.
void TargetPassConfig::addBlockPlacement() {
+ if (EnableFSDiscriminator) {
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::Pass2));
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
+ addPass(
+ createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass2));
+ }
if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
if (EnableBlockPlacementStats)
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1664b4dadfec..46cec5407565 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -118,6 +118,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
// registers. e.g. r1 = move v1024.
DenseMap<Register, Register> DstRegMap;
+ void removeClobberedSrcRegMap(MachineInstr *MI);
+
bool isRevCopyChain(Register FromReg, Register ToReg, int Maxlen);
bool noUseAfterLastDef(Register Reg, unsigned Dist, unsigned &LastDef);
@@ -132,7 +134,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool convertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi, Register RegA,
- Register RegB, unsigned Dist);
+ Register RegB, unsigned &Dist);
bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI);
@@ -144,7 +146,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
bool tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist, bool shouldOnlyCommute);
+ unsigned &Dist, bool shouldOnlyCommute);
bool tryInstructionCommute(MachineInstr *MI,
unsigned DstOpIdx,
@@ -380,7 +382,8 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
if (!MRI->hasOneNonDBGUse(Reg))
// None or more than one use.
return nullptr;
- MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
+ MachineOperand &UseOp = *MRI->use_nodbg_begin(Reg);
+ MachineInstr &UseMI = *UseOp.getParent();
if (UseMI.getParent() != MBB)
return nullptr;
Register SrcReg;
@@ -394,6 +397,18 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
IsDstPhys = DstReg.isPhysical();
return &UseMI;
}
+ if (UseMI.isCommutable()) {
+ unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
+ unsigned Src2 = UseMI.getOperandNo(&UseOp);
+ if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
+ MachineOperand &MO = UseMI.getOperand(Src1);
+ if (MO.isReg() && MO.isUse() &&
+ isTwoAddrUse(UseMI, MO.getReg(), DstReg)) {
+ IsDstPhys = DstReg.isPhysical();
+ return &UseMI;
+ }
+ }
+ }
return nullptr;
}
@@ -422,6 +437,76 @@ static bool regsAreCompatible(Register RegA, Register RegB,
return TRI->regsOverlap(RegA, RegB);
}
+/// From RegMap remove entries mapped to a physical register which overlaps MO.
+static void removeMapRegEntry(const MachineOperand &MO,
+ DenseMap<Register, Register> &RegMap,
+ const TargetRegisterInfo *TRI) {
+ assert(
+ (MO.isReg() || MO.isRegMask()) &&
+ "removeMapRegEntry must be called with a register or regmask operand.");
+
+ SmallVector<Register, 2> Srcs;
+ for (auto SI : RegMap) {
+ Register ToReg = SI.second;
+ if (ToReg.isVirtual())
+ continue;
+
+ if (MO.isReg()) {
+ Register Reg = MO.getReg();
+ if (TRI->regsOverlap(ToReg, Reg))
+ Srcs.push_back(SI.first);
+ } else if (MO.clobbersPhysReg(ToReg))
+ Srcs.push_back(SI.first);
+ }
+
+ for (auto SrcReg : Srcs)
+ RegMap.erase(SrcReg);
+}
+
+/// If a physical register is clobbered, old entries mapped to it should be
+/// deleted. For example
+///
+/// %2:gr64 = COPY killed $rdx
+/// MUL64r %3:gr64, implicit-def $rax, implicit-def $rdx
+///
+/// After the MUL instruction, $rdx contains different value than in the COPY
+/// instruction. So %2 should not map to $rdx after MUL.
+void TwoAddressInstructionPass::removeClobberedSrcRegMap(MachineInstr *MI) {
+ if (MI->isCopy()) {
+ // If a virtual register is copied to its mapped physical register, it
+ // doesn't change the potential coalescing between them, so we don't remove
+ // entries mapped to the physical register. For example
+ //
+ // %100 = COPY $r8
+ // ...
+ // $r8 = COPY %100
+ //
+ // The first copy constructs SrcRegMap[%100] = $r8, the second copy doesn't
+ // destroy the content of $r8, and should not impact SrcRegMap.
+ Register Dst = MI->getOperand(0).getReg();
+ if (!Dst || Dst.isVirtual())
+ return;
+
+ Register Src = MI->getOperand(1).getReg();
+ if (regsAreCompatible(Dst, getMappedReg(Src, SrcRegMap), TRI))
+ return;
+ }
+
+ for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isRegMask()) {
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ continue;
+ }
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg || Reg.isVirtual())
+ continue;
+ removeMapRegEntry(MO, SrcRegMap, TRI);
+ }
+}
+
// Returns true if Reg is equal or aliased to at least one register in Set.
static bool regOverlapsSet(const SmallVectorImpl<Register> &Set, Register Reg,
const TargetRegisterInfo *TRI) {
@@ -589,21 +674,15 @@ bool TwoAddressInstructionPass::isProfitableToConv3Addr(Register RegA,
/// Return true if this transformation was successful.
bool TwoAddressInstructionPass::convertInstTo3Addr(
MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi,
- Register RegA, Register RegB, unsigned Dist) {
- // FIXME: Why does convertToThreeAddress() need an iterator reference?
- MachineFunction::iterator MFI = MBB->getIterator();
- MachineInstr *NewMI = TII->convertToThreeAddress(MFI, *mi, LV);
- assert(MBB->getIterator() == MFI &&
- "convertToThreeAddress changed iterator reference");
+ Register RegA, Register RegB, unsigned &Dist) {
+ MachineInstrSpan MIS(mi, MBB);
+ MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV, LIS);
if (!NewMI)
return false;
LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
LLVM_DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI);
- if (LIS)
- LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
-
// If the old instruction is debug value tracked, an update is required.
if (auto OldInstrNum = mi->peekDebugInstrNum()) {
// Sanity check.
@@ -624,7 +703,9 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
MBB->erase(mi); // Nuke the old inst.
- DistanceMap.insert(std::make_pair(NewMI, Dist));
+ for (MachineInstr &MI : MIS)
+ DistanceMap.insert(std::make_pair(&MI, Dist++));
+ Dist--;
mi = NewMI;
nmi = std::next(mi);
@@ -656,9 +737,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
VirtRegPairs.push_back(NewReg);
break;
}
- bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
- if (!isNew)
- assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+ SrcRegMap[NewReg] = Reg;
VirtRegPairs.push_back(NewReg);
Reg = NewReg;
}
@@ -667,8 +746,7 @@ void TwoAddressInstructionPass::scanUses(Register DstReg) {
unsigned ToReg = VirtRegPairs.back();
VirtRegPairs.pop_back();
while (!VirtRegPairs.empty()) {
- unsigned FromReg = VirtRegPairs.back();
- VirtRegPairs.pop_back();
+ unsigned FromReg = VirtRegPairs.pop_back_val();
bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
if (!isNew)
assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
@@ -857,12 +935,13 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
nmi = End;
MachineBasicBlock::iterator InsertPos = KillPos;
if (LIS) {
- // We have to move the copies first so that the MBB is still well-formed
- // when calling handleMove().
+ // We have to move the copies (and any interleaved debug instructions)
+ // first so that the MBB is still well-formed when calling handleMove().
for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) {
auto CopyMI = MBBI++;
MBB->splice(InsertPos, MBB, CopyMI);
- LIS->handleMove(*CopyMI);
+ if (!CopyMI->isDebugOrPseudoInstr())
+ LIS->handleMove(*CopyMI);
InsertPos = CopyMI;
}
End = std::next(MachineBasicBlock::iterator(MI));
@@ -1130,7 +1209,7 @@ bool TwoAddressInstructionPass::
tryInstructionTransform(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
unsigned SrcIdx, unsigned DstIdx,
- unsigned Dist, bool shouldOnlyCommute) {
+ unsigned &Dist, bool shouldOnlyCommute) {
if (OptLevel == CodeGenOpt::None)
return false;
@@ -1238,6 +1317,8 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// look "normal" to the transformation logic.
MBB->insert(mi, NewMIs[0]);
MBB->insert(mi, NewMIs[1]);
+ DistanceMap.insert(std::make_pair(NewMIs[0], Dist++));
+ DistanceMap.insert(std::make_pair(NewMIs[1], Dist));
LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
<< "2addr: NEW INST: " << *NewMIs[1]);
@@ -1288,9 +1369,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (MO.isReg())
OrigRegs.push_back(MO.getReg());
}
+
+ LIS->RemoveMachineInstrFromMaps(MI);
}
MI.eraseFromParent();
+ DistanceMap.erase(&MI);
// Update LiveIntervals.
if (LIS) {
@@ -1307,6 +1391,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
+ DistanceMap.erase(NewMIs[0]);
+ DistanceMap.erase(NewMIs[1]);
+ Dist--;
}
}
}
@@ -1320,7 +1407,6 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
// Return true if any tied operands where found, including the trivial ones.
bool TwoAddressInstructionPass::
collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
- const MCInstrDesc &MCID = MI->getDesc();
bool AnyOps = false;
unsigned NumOps = MI->getNumOperands();
@@ -1342,10 +1428,10 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
- if (DstReg.isVirtual())
- if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
- TRI, *MF))
- MRI->constrainRegClass(DstReg, RC);
+ if (DstReg.isVirtual()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ MRI->constrainRegClass(DstReg, RC);
+ }
SrcMO.setReg(DstReg);
SrcMO.setSubReg(0);
LLVM_DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI);
@@ -1434,12 +1520,24 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (LIS) {
LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
+ SlotIndex endIdx =
+ LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
if (RegA.isVirtual()) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
- SlotIndex endIdx =
- LIS->getInstructionIndex(*MI).getRegSlot(IsEarlyClobber);
- LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI));
+ LI.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ for (auto &S : LI.subranges()) {
+ VNI = S.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ } else {
+ for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) {
+ if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) {
+ VNInfo *VNI =
+ LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
+ LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
+ }
+ }
}
}
@@ -1461,49 +1559,58 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// by SubRegB is compatible with RegA with no subregister. So regardless of
// whether the dest oper writes a subreg, the source oper should not.
MO.setSubReg(0);
-
- // Propagate SrcRegMap.
- SrcRegMap[RegA] = RegB;
}
if (AllUsesCopied) {
- bool ReplacedAllUntiedUses = true;
- if (!IsEarlyClobber) {
- // Replace other (un-tied) uses of regB with LastCopiedReg.
- for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
- if (MO.getSubReg() == SubRegB) {
- if (MO.isKill()) {
- MO.setIsKill(false);
- RemovedKillFlag = true;
- }
- MO.setReg(LastCopiedReg);
- MO.setSubReg(0);
- } else {
- ReplacedAllUntiedUses = false;
+ LaneBitmask RemainingUses = LaneBitmask::getNone();
+ // Replace other (un-tied) uses of regB with LastCopiedReg.
+ for (MachineOperand &MO : MI->operands()) {
+ if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ if (MO.getSubReg() == SubRegB && !IsEarlyClobber) {
+ if (MO.isKill()) {
+ MO.setIsKill(false);
+ RemovedKillFlag = true;
}
+ MO.setReg(LastCopiedReg);
+ MO.setSubReg(0);
+ } else {
+ RemainingUses |= TRI->getSubRegIndexLaneMask(MO.getSubReg());
}
}
}
// Update live variables for regB.
- if (RemovedKillFlag && ReplacedAllUntiedUses &&
- LV && LV->getVarInfo(RegB).removeKill(*MI)) {
+ if (RemovedKillFlag && RemainingUses.none() && LV &&
+ LV->getVarInfo(RegB).removeKill(*MI)) {
MachineBasicBlock::iterator PrevMI = MI;
--PrevMI;
LV->addVirtualRegisterKilled(RegB, *PrevMI);
}
+ if (RemovedKillFlag && RemainingUses.none())
+ SrcRegMap[LastCopiedReg] = RegB;
+
// Update LiveIntervals.
if (LIS) {
- LiveInterval &LI = LIS->getInterval(RegB);
- SlotIndex MIIdx = LIS->getInstructionIndex(*MI);
- LiveInterval::const_iterator I = LI.find(MIIdx);
- assert(I != LI.end() && "RegB must be live-in to use.");
+ SlotIndex UseIdx = LIS->getInstructionIndex(*MI);
+ auto Shrink = [=](LiveRange &LR, LaneBitmask LaneMask) {
+ LiveRange::Segment *S = LR.getSegmentContaining(LastCopyIdx);
+ if (!S)
+ return true;
+ if ((LaneMask & RemainingUses).any())
+ return false;
+ if (S->end.getBaseIndex() != UseIdx)
+ return false;
+ S->end = LastCopyIdx;
+ return true;
+ };
- SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber);
- if (I->end == UseIdx)
- LI.removeSegment(LastCopyIdx, UseIdx);
+ LiveInterval &LI = LIS->getInterval(RegB);
+ bool ShrinkLI = true;
+ for (auto &S : LI.subranges())
+ ShrinkLI &= Shrink(S, S.LaneMask);
+ if (ShrinkLI)
+ Shrink(LI, LaneBitmask::getAll());
}
} else if (RemovedKillFlag) {
// Some tied uses of regB matched their destination registers, so
@@ -1580,6 +1687,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// First scan through all the tied register uses in this instruction
// and record a list of pairs of tied operands for each register.
if (!collectTiedOperands(&*mi, TiedOperands)) {
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
@@ -1604,6 +1712,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
// The tied operands have been eliminated or shifted further down
// the block to ease elimination. Continue processing with 'nmi'.
TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
continue;
}
@@ -1628,18 +1737,44 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
mi->RemoveOperand(1);
mi->setDesc(TII->get(TargetOpcode::COPY));
LLVM_DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+
+ // Update LiveIntervals.
+ if (LIS) {
+ Register Reg = mi->getOperand(0).getReg();
+ LiveInterval &LI = LIS->getInterval(Reg);
+ if (LI.hasSubRanges()) {
+ // The COPY no longer defines subregs of %reg except for
+ // %reg.subidx.
+ LaneBitmask LaneMask =
+ TRI->getSubRegIndexLaneMask(mi->getOperand(0).getSubReg());
+ SlotIndex Idx = LIS->getInstructionIndex(*mi);
+ for (auto &S : LI.subranges()) {
+ if ((S.LaneMask & LaneMask).none()) {
+ LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
+ LiveRange::iterator DefSeg = std::next(UseSeg);
+ S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ }
+ }
+
+ // The COPY no longer has a use of %reg.
+ LIS->shrinkToUses(&LI);
+ } else {
+ // The live interval for Reg did not have subranges but now it needs
+ // them because we have introduced a subreg def. Recompute it.
+ LIS->removeInterval(Reg);
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
}
// Clear TiedOperands here instead of at the top of the loop
// since most instructions do not have tied operands.
TiedOperands.clear();
+ removeClobberedSrcRegMap(&*mi);
mi = nmi;
}
}
- if (LIS)
- MF->verify(this, "After two-address instruction pass");
-
return MadeChange;
}
@@ -1722,6 +1857,9 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
for (int j = MI.getNumOperands() - 1, ee = 0; j > ee; --j)
MI.RemoveOperand(j);
} else {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+
LLVM_DEBUG(dbgs() << "Eliminated: " << MI);
MI.eraseFromParent();
}
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index 2ce6ea1d4212..d042deefd746 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -108,7 +108,7 @@ class IRPromoter {
SetVector<Value*> &Visited;
SetVector<Value*> &Sources;
SetVector<Instruction*> &Sinks;
- SmallVectorImpl<Instruction*> &SafeWrap;
+ SmallPtrSetImpl<Instruction *> &SafeWrap;
IntegerType *ExtTy = nullptr;
SmallPtrSet<Value*, 8> NewInsts;
SmallPtrSet<Instruction*, 4> InstsToRemove;
@@ -116,7 +116,6 @@ class IRPromoter {
SmallPtrSet<Value*, 8> Promoted;
void ReplaceAllUsersOfWith(Value *From, Value *To);
- void PrepareWrappingAdds(void);
void ExtendSources(void);
void ConvertTruncs(void);
void PromoteTree(void);
@@ -125,11 +124,11 @@ class IRPromoter {
public:
IRPromoter(LLVMContext &C, IntegerType *Ty, unsigned Width,
- SetVector<Value*> &visited, SetVector<Value*> &sources,
- SetVector<Instruction*> &sinks,
- SmallVectorImpl<Instruction*> &wrap) :
- Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
- Sources(sources), Sinks(sinks), SafeWrap(wrap) {
+ SetVector<Value *> &visited, SetVector<Value *> &sources,
+ SetVector<Instruction *> &sinks,
+ SmallPtrSetImpl<Instruction *> &wrap)
+ : Ctx(C), OrigTy(Ty), PromotedWidth(Width), Visited(visited),
+ Sources(sources), Sinks(sinks), SafeWrap(wrap) {
ExtTy = IntegerType::get(Ctx, PromotedWidth);
assert(OrigTy->getPrimitiveSizeInBits().getFixedSize() <
ExtTy->getPrimitiveSizeInBits().getFixedSize() &&
@@ -145,7 +144,7 @@ class TypePromotion : public FunctionPass {
unsigned RegisterBitWidth = 0;
SmallPtrSet<Value*, 16> AllVisited;
SmallPtrSet<Instruction*, 8> SafeToPromote;
- SmallVector<Instruction*, 4> SafeWrap;
+ SmallPtrSet<Instruction *, 4> SafeWrap;
// Does V have the same size result type as TypeSize.
bool EqualTypeSize(Value *V);
@@ -183,6 +182,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
+ AU.setPreservesCFG();
}
StringRef getPassName() const override { return PASS_NAME; }
@@ -192,11 +192,8 @@ public:
}
-static bool GenerateSignBits(Value *V) {
- if (!isa<Instruction>(V))
- return false;
-
- unsigned Opc = cast<Instruction>(V)->getOpcode();
+static bool GenerateSignBits(Instruction *I) {
+ unsigned Opc = I->getOpcode();
return Opc == Instruction::AShr || Opc == Instruction::SDiv ||
Opc == Instruction::SRem || Opc == Instruction::SExt;
}
@@ -283,7 +280,7 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
// wrap in respect to itself in the original bitwidth. If it doesn't wrap,
// just underflows the range, the icmp would give the same result whether the
// result has been truncated or not. We calculate this by:
- // - Zero extending both constants, if needed, to 32-bits.
+ // - Zero extending both constants, if needed, to RegisterBitWidth.
// - Take the absolute value of I's constant, adding this to the icmp const.
// - Check that this value is not out of range for small type. If it is, it
// means that it has underflowed enough to wrap around the icmp constant.
@@ -335,53 +332,46 @@ bool TypePromotion::isSafeWrap(Instruction *I) {
if (Opc != Instruction::Add && Opc != Instruction::Sub)
return false;
- if (!I->hasOneUse() ||
- !isa<ICmpInst>(*I->user_begin()) ||
+ if (!I->hasOneUse() || !isa<ICmpInst>(*I->user_begin()) ||
!isa<ConstantInt>(I->getOperand(1)))
return false;
- ConstantInt *OverflowConst = cast<ConstantInt>(I->getOperand(1));
- bool NegImm = OverflowConst->isNegative();
- bool IsDecreasing = ((Opc == Instruction::Sub) && !NegImm) ||
- ((Opc == Instruction::Add) && NegImm);
- if (!IsDecreasing)
- return false;
-
// Don't support an icmp that deals with sign bits.
auto *CI = cast<ICmpInst>(*I->user_begin());
if (CI->isSigned() || CI->isEquality())
return false;
- ConstantInt *ICmpConst = nullptr;
+ ConstantInt *ICmpConstant = nullptr;
if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(0)))
- ICmpConst = Const;
+ ICmpConstant = Const;
else if (auto *Const = dyn_cast<ConstantInt>(CI->getOperand(1)))
- ICmpConst = Const;
+ ICmpConstant = Const;
else
return false;
- // Now check that the result can't wrap on itself.
- APInt Total = ICmpConst->getValue().getBitWidth() < 32 ?
- ICmpConst->getValue().zext(32) : ICmpConst->getValue();
-
- Total += OverflowConst->getValue().getBitWidth() < 32 ?
- OverflowConst->getValue().abs().zext(32) : OverflowConst->getValue().abs();
-
- APInt Max = APInt::getAllOnesValue(TypePromotion::TypeSize);
-
- if (Total.getBitWidth() > Max.getBitWidth()) {
- if (Total.ugt(Max.zext(Total.getBitWidth())))
- return false;
- } else if (Max.getBitWidth() > Total.getBitWidth()) {
- if (Total.zext(Max.getBitWidth()).ugt(Max))
- return false;
- } else if (Total.ugt(Max))
+ const APInt &ICmpConst = ICmpConstant->getValue();
+ APInt OverflowConst = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if (Opc == Instruction::Sub)
+ OverflowConst = -OverflowConst;
+ if (!OverflowConst.isNonPositive())
return false;
- LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for "
- << *I << "\n");
- SafeWrap.push_back(I);
- return true;
+ // Using C1 = OverflowConst and C2 = ICmpConst, we can use either prove that:
+ // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
+ // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
+ if (OverflowConst.sgt(ICmpConst)) {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << "\n");
+ SafeWrap.insert(I);
+ return true;
+ } else {
+ LLVM_DEBUG(dbgs() << "IR Promotion: Allowing safe overflow for sext "
+ << "const of " << *I << " and " << *CI << "\n");
+ SafeWrap.insert(I);
+ SafeWrap.insert(CI);
+ return true;
+ }
+ return false;
}
bool TypePromotion::shouldPromote(Value *V) {
@@ -403,17 +393,14 @@ bool TypePromotion::shouldPromote(Value *V) {
/// Return whether we can safely mutate V's type to ExtTy without having to be
/// concerned with zero extending or truncation.
-static bool isPromotedResultSafe(Value *V) {
- if (GenerateSignBits(V))
+static bool isPromotedResultSafe(Instruction *I) {
+ if (GenerateSignBits(I))
return false;
- if (!isa<Instruction>(V))
+ if (!isa<OverflowingBinaryOperator>(I))
return true;
- if (!isa<OverflowingBinaryOperator>(V))
- return true;
-
- return cast<Instruction>(V)->hasNoUnsignedWrap();
+ return I->hasNoUnsignedWrap();
}
void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
@@ -422,7 +409,7 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
bool ReplacedAll = true;
LLVM_DEBUG(dbgs() << "IR Promotion: Replacing " << *From << " with " << *To
- << "\n");
+ << "\n");
for (Use &U : From->uses()) {
auto *User = cast<Instruction>(U.getUser());
@@ -441,39 +428,6 @@ void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) {
InstsToRemove.insert(I);
}
-void IRPromoter::PrepareWrappingAdds() {
- LLVM_DEBUG(dbgs() << "IR Promotion: Prepare wrapping adds.\n");
- IRBuilder<> Builder{Ctx};
-
- // For adds that safely wrap and use a negative immediate as operand 1, we
- // create an equivalent instruction using a positive immediate.
- // That positive immediate can then be zext along with all the other
- // immediates later.
- for (auto *I : SafeWrap) {
- if (I->getOpcode() != Instruction::Add)
- continue;
-
- LLVM_DEBUG(dbgs() << "IR Promotion: Adjusting " << *I << "\n");
- assert((isa<ConstantInt>(I->getOperand(1)) &&
- cast<ConstantInt>(I->getOperand(1))->isNegative()) &&
- "Wrapping should have a negative immediate as the second operand");
-
- auto Const = cast<ConstantInt>(I->getOperand(1));
- auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs());
- Builder.SetInsertPoint(I);
- Value *NewVal = Builder.CreateSub(I->getOperand(0), NewConst);
- if (auto *NewInst = dyn_cast<Instruction>(NewVal)) {
- NewInst->copyIRFlags(I);
- NewInsts.insert(NewInst);
- }
- InstsToRemove.insert(I);
- I->replaceAllUsesWith(NewVal);
- LLVM_DEBUG(dbgs() << "IR Promotion: New equivalent: " << *NewVal << "\n");
- }
- for (auto *I : NewInsts)
- Visited.insert(I);
-}
-
void IRPromoter::ExtendSources() {
IRBuilder<> Builder{Ctx};
@@ -515,8 +469,6 @@ void IRPromoter::ExtendSources() {
void IRPromoter::PromoteTree() {
LLVM_DEBUG(dbgs() << "IR Promotion: Mutating the tree..\n");
- IRBuilder<> Builder{Ctx};
-
// Mutate the types of the instructions within the tree. Here we handle
// constant operands.
for (auto *V : Visited) {
@@ -533,14 +485,16 @@ void IRPromoter::PromoteTree() {
continue;
if (auto *Const = dyn_cast<ConstantInt>(Op)) {
- Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy);
+ Constant *NewConst = SafeWrap.contains(I)
+ ? ConstantExpr::getSExt(Const, ExtTy)
+ : ConstantExpr::getZExt(Const, ExtTy);
I->setOperand(i, NewConst);
} else if (isa<UndefValue>(Op))
I->setOperand(i, UndefValue::get(ExtTy));
}
- // Mutate the result type, unless this is an icmp.
- if (!isa<ICmpInst>(I)) {
+ // Mutate the result type, unless this is an icmp or switch.
+ if (!isa<ICmpInst>(I) && !isa<SwitchInst>(I)) {
I->mutateType(ExtTy);
Promoted.insert(I);
}
@@ -575,7 +529,7 @@ void IRPromoter::TruncateSinks() {
// Handle calls separately as we need to iterate over arg operands.
if (auto *Call = dyn_cast<CallInst>(I)) {
- for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
+ for (unsigned i = 0; i < Call->arg_size(); ++i) {
Value *Arg = Call->getArgOperand(i);
Type *Ty = TruncTysMap[Call][i];
if (Instruction *Trunc = InsertTrunc(Arg, Ty)) {
@@ -678,10 +632,8 @@ void IRPromoter::Mutate() {
// Cache original types of the values that will likely need truncating
for (auto *I : Sinks) {
if (auto *Call = dyn_cast<CallInst>(I)) {
- for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) {
- Value *Arg = Call->getArgOperand(i);
+ for (Value *Arg : Call->args())
TruncTysMap[Call].push_back(Arg->getType());
- }
} else if (auto *Switch = dyn_cast<SwitchInst>(I))
TruncTysMap[I].push_back(Switch->getCondition()->getType());
else {
@@ -696,10 +648,6 @@ void IRPromoter::Mutate() {
TruncTysMap[Trunc].push_back(Trunc->getDestTy());
}
- // Convert adds using negative immediates to equivalent instructions that use
- // positive constants.
- PrepareWrappingAdds();
-
// Insert zext instructions between sources and their users.
ExtendSources();
@@ -798,7 +746,7 @@ bool TypePromotion::isLegalToPromote(Value *V) {
if (SafeToPromote.count(I))
return true;
- if (isPromotedResultSafe(V) || isSafeWrap(I)) {
+ if (isPromotedResultSafe(I) || isSafeWrap(I)) {
SafeToPromote.insert(I);
return true;
}
@@ -815,7 +763,7 @@ bool TypePromotion::TryToPromote(Value *V, unsigned PromotedWidth) {
return false;
LLVM_DEBUG(dbgs() << "IR Promotion: TryToPromote: " << *V << ", from "
- << TypeSize << " bits to " << PromotedWidth << "\n");
+ << TypeSize << " bits to " << PromotedWidth << "\n");
SetVector<Value*> WorkList;
SetVector<Value*> Sources;
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 9daebfd9e63d..4876b9e23717 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -167,6 +167,7 @@ std::string EVT::getEVTString() const {
case MVT::Glue: return "glue";
case MVT::x86mmx: return "x86mmx";
case MVT::x86amx: return "x86amx";
+ case MVT::i64x8: return "i64x8";
case MVT::Metadata: return "Metadata";
case MVT::Untyped: return "Untyped";
case MVT::funcref: return "funcref";
@@ -198,6 +199,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
case MVT::x86amx: return Type::getX86_AMXTy(Context);
+ case MVT::i64x8: return IntegerType::get(Context, 512);
case MVT::externref:
return PointerType::get(StructType::create(Context), 10);
case MVT::funcref:
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 0f164e2637a2..069aca742da0 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -541,15 +541,8 @@ void VirtRegRewriter::rewrite() {
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
LLVM_DEBUG(MBBI->print(dbgs(), Indexes));
- for (MachineBasicBlock::instr_iterator
- MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
- MachineInstr *MI = &*MII;
- ++MII;
-
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end(); MOI != MOE; ++MOI) {
- MachineOperand &MO = *MOI;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) {
+ for (MachineOperand &MO : MI.operands()) {
// Make sure MRI knows about registers clobbered by regmasks.
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
@@ -574,7 +567,7 @@ void VirtRegRewriter::rewrite() {
// have to add implicit killed operands for the super-register. A
// partial redef always kills and redefines the super-register.
if ((MO.readsReg() && (MO.isDef() || MO.isKill())) ||
- (MO.isDef() && subRegLiveThrough(*MI, PhysReg)))
+ (MO.isDef() && subRegLiveThrough(MI, PhysReg)))
SuperKills.push_back(PhysReg);
if (MO.isDef()) {
@@ -619,20 +612,20 @@ void VirtRegRewriter::rewrite() {
// Add any missing super-register kills after rewriting the whole
// instruction.
while (!SuperKills.empty())
- MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+ MI.addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
while (!SuperDeads.empty())
- MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
+ MI.addRegisterDead(SuperDeads.pop_back_val(), TRI, true);
while (!SuperDefs.empty())
- MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI);
+ MI.addRegisterDefined(SuperDefs.pop_back_val(), TRI);
- LLVM_DEBUG(dbgs() << "> " << *MI);
+ LLVM_DEBUG(dbgs() << "> " << MI);
- expandCopyBundle(*MI);
+ expandCopyBundle(MI);
// We can remove identity copies right now.
- handleIdentityCopy(*MI);
+ handleIdentityCopy(MI);
}
}
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index c4c84cd921fa..c04a7b28eff9 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -29,7 +29,7 @@
// __wasm_lpad_context.lpad_index = index;
// __wasm_lpad_context.lsda = wasm.lsda();
// _Unwind_CallPersonality(exn);
-// selector = __wasm.landingpad_context.selector;
+// selector = __wasm_lpad_context.selector;
// ...
//
//
@@ -329,7 +329,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedPersonality,
OperandBundleDef("funclet", CPI));
PersCI->setDoesNotThrow();
- // Pseudocode: int selector = __wasm.landingpad_context.selector;
+ // Pseudocode: int selector = __wasm_lpad_context.selector;
Instruction *Selector =
IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 4449cd8ef555..a3dec6c25e44 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -549,6 +549,7 @@ static void updateChildIncompleteness(const DWARFDie &Die, CompileUnit &CU,
switch (Die.getTag()) {
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_class_type:
+ case dwarf::DW_TAG_union_type:
break;
default:
return;
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 3a9f79e47012..46e7457f2368 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -21,8 +21,8 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
namespace llvm {
diff --git a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
index 799cffb7116e..c7b1c65f2f9a 100644
--- a/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
+++ b/llvm/lib/DebugInfo/CodeView/ContinuationRecordBuilder.cpp
@@ -103,7 +103,7 @@ void ContinuationRecordBuilder::writeMemberType(RecordType &Record) {
if (getCurrentSegmentLength() > MaxSegmentLength) {
// We need to inject some bytes before the member we just wrote but after
// the previous member. Save off the length of the member we just wrote so
- // that we can do some sanity checking on it.
+ // that we can do validate it.
uint32_t MemberLength = SegmentWriter.getOffset() - OriginalOffset;
(void) MemberLength;
insertSegmentEnd(OriginalOffset);
diff --git a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
index ac3b30175956..d963e34628db 100644
--- a/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
+++ b/llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp
@@ -53,7 +53,7 @@ ArrayRef<uint8_t> SimpleTypeSerializer::serialize(T &Record) {
Prefix->RecordKind = CVT.kind();
Prefix->RecordLen = Writer.getOffset() - sizeof(uint16_t);
- return {ScratchBuffer.data(), Writer.getOffset()};
+ return {ScratchBuffer.data(), static_cast<size_t>(Writer.getOffset())};
}
// Explicitly instantiate the member function for each known type so that we can
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index ee1ff5460b9b..1be5a752453a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -147,41 +147,57 @@ DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const {
return None;
}
-Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
- const uint64_t DIEOffset, const dwarf::Attribute Attr,
- const DWARFUnit &U) const {
- // Check if this abbreviation has this attribute without needing to skip
- // any data so we can return quickly if it doesn't.
- Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
- if (!MatchAttrIndex)
- return None;
-
- auto DebugInfoData = U.getDebugInfoExtractor();
+uint64_t DWARFAbbreviationDeclaration::getAttributeOffsetFromIndex(
+ uint32_t AttrIndex, uint64_t DIEOffset, const DWARFUnit &U) const {
+ DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
// Add the byte size of ULEB that for the abbrev Code so we can start
// skipping the attribute data.
uint64_t Offset = DIEOffset + CodeByteSize;
- for (uint32_t CurAttrIdx = 0; CurAttrIdx != *MatchAttrIndex; ++CurAttrIdx)
+ for (uint32_t CurAttrIdx = 0; CurAttrIdx != AttrIndex; ++CurAttrIdx)
// Match Offset along until we get to the attribute we want.
if (auto FixedSize = AttributeSpecs[CurAttrIdx].getByteSize(U))
Offset += *FixedSize;
else
DWARFFormValue::skipValue(AttributeSpecs[CurAttrIdx].Form, DebugInfoData,
&Offset, U.getFormParams());
+ return Offset;
+}
+
+Optional<DWARFFormValue>
+DWARFAbbreviationDeclaration::getAttributeValueFromOffset(
+ uint32_t AttrIndex, uint64_t Offset, const DWARFUnit &U) const {
+ assert(AttributeSpecs.size() > AttrIndex &&
+ "Attribute Index is out of bounds.");
// We have arrived at the attribute to extract, extract if from Offset.
- const AttributeSpec &Spec = AttributeSpecs[*MatchAttrIndex];
+ const AttributeSpec &Spec = AttributeSpecs[AttrIndex];
if (Spec.isImplicitConst())
return DWARFFormValue::createFromSValue(Spec.Form,
Spec.getImplicitConstValue());
DWARFFormValue FormValue(Spec.Form);
+ DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
if (FormValue.extractValue(DebugInfoData, &Offset, U.getFormParams(), &U))
return FormValue;
-
return None;
}
+Optional<DWARFFormValue>
+DWARFAbbreviationDeclaration::getAttributeValue(const uint64_t DIEOffset,
+ const dwarf::Attribute Attr,
+ const DWARFUnit &U) const {
+ // Check if this abbreviation has this attribute without needing to skip
+ // any data so we can return quickly if it doesn't.
+ Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
+ if (!MatchAttrIndex)
+ return None;
+
+ uint64_t Offset = getAttributeOffsetFromIndex(*MatchAttrIndex, DIEOffset, U);
+
+ return getAttributeValueFromOffset(*MatchAttrIndex, Offset, U);
+}
+
size_t DWARFAbbreviationDeclaration::FixedSizeInfo::getByteSize(
const DWARFUnit &U) const {
size_t ByteSize = NumBytes;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 28d35b609c24..c77d4d4d989c 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -767,7 +767,7 @@ LLVM_DUMP_METHOD void DWARFDebugNames::NameIndex::dump(ScopedPrinter &W) const {
}
W.startLine() << "Hash table not present\n";
- for (NameTableEntry NTE : *this)
+ for (const NameTableEntry &NTE : *this)
dumpName(W, NTE, None);
}
@@ -799,7 +799,7 @@ DWARFDebugNames::ValueIterator::findEntryOffsetInCurrentIndex() {
const Header &Hdr = CurrentIndex->Hdr;
if (Hdr.BucketCount == 0) {
// No Hash Table, We need to search through all names in the Name Index.
- for (NameTableEntry NTE : *CurrentIndex) {
+ for (const NameTableEntry &NTE : *CurrentIndex) {
if (NTE.getString() == Key)
return NTE.getEntryOffset();
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index 4e1cafeb2126..c8331487f282 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -33,6 +33,7 @@
#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Decompressor.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdint>
@@ -693,6 +693,18 @@ void DWARFContext::dump(
getDebugNames().dump(OS);
}
+DWARFTypeUnit *DWARFContext::getTypeUnitForHash(uint16_t Version, uint64_t Hash,
+ bool IsDWO) {
+ // FIXME: Check for/use the tu_index here, if there is one.
+ for (const auto &U : IsDWO ? dwo_units() : normal_units()) {
+ if (DWARFTypeUnit *TU = dyn_cast<DWARFTypeUnit>(U.get())) {
+ if (TU->getTypeHash() == Hash)
+ return TU;
+ }
+ }
+ return nullptr;
+}
+
DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) {
parseDWOUnits(LazyParse);
@@ -1411,7 +1423,8 @@ DWARFContext::getDWOContext(StringRef AbsolutePath) {
auto S = std::make_shared<DWOFile>();
S->File = std::move(Obj.get());
- S->Context = DWARFContext::create(*S->File.getBinary());
+ S->Context = DWARFContext::create(*S->File.getBinary(),
+ ProcessDebugRelocations::Ignore);
*Entry = S;
auto *Ctxt = S->Context.get();
return std::shared_ptr<DWARFContext>(std::move(S), Ctxt);
@@ -1652,7 +1665,9 @@ public:
}
}
DWARFObjInMemory(const object::ObjectFile &Obj, const LoadedObjectInfo *L,
- function_ref<void(Error)> HandleError, function_ref<void(Error)> HandleWarning )
+ function_ref<void(Error)> HandleError,
+ function_ref<void(Error)> HandleWarning,
+ DWARFContext::ProcessDebugRelocations RelocAction)
: IsLittleEndian(Obj.isLittleEndian()),
AddressSize(Obj.getBytesInAddress()), FileName(Obj.getFileName()),
Obj(&Obj) {
@@ -1735,7 +1750,12 @@ public:
S.Data = Data;
}
- if (RelocatedSection == Obj.section_end())
+ if (RelocatedSection != Obj.section_end() && Name.contains(".dwo"))
+ HandleWarning(
+ createError("Unexpected relocations for dwo section " + Name));
+
+ if (RelocatedSection == Obj.section_end() ||
+ (RelocAction == DWARFContext::ProcessDebugRelocations::Ignore))
continue;
StringRef RelSecName;
@@ -1772,18 +1792,10 @@ public:
if (RelSecName == "debug_info")
Map = &static_cast<DWARFSectionMap &>(InfoSections[*RelocatedSection])
.Relocs;
- else if (RelSecName == "debug_info.dwo")
- Map = &static_cast<DWARFSectionMap &>(
- InfoDWOSections[*RelocatedSection])
- .Relocs;
else if (RelSecName == "debug_types")
Map =
&static_cast<DWARFSectionMap &>(TypesSections[*RelocatedSection])
.Relocs;
- else if (RelSecName == "debug_types.dwo")
- Map = &static_cast<DWARFSectionMap &>(
- TypesDWOSections[*RelocatedSection])
- .Relocs;
else
continue;
}
@@ -1966,12 +1978,13 @@ public:
} // namespace
std::unique_ptr<DWARFContext>
-DWARFContext::create(const object::ObjectFile &Obj, const LoadedObjectInfo *L,
- std::string DWPName,
+DWARFContext::create(const object::ObjectFile &Obj,
+ ProcessDebugRelocations RelocAction,
+ const LoadedObjectInfo *L, std::string DWPName,
std::function<void(Error)> RecoverableErrorHandler,
std::function<void(Error)> WarningHandler) {
- auto DObj =
- std::make_unique<DWARFObjInMemory>(Obj, L, RecoverableErrorHandler, WarningHandler);
+ auto DObj = std::make_unique<DWARFObjInMemory>(
+ Obj, L, RecoverableErrorHandler, WarningHandler, RelocAction);
return std::make_unique<DWARFContext>(std::move(DObj), std::move(DWPName),
RecoverableErrorHandler,
WarningHandler);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
index dcf2aefeb39f..5b1c62e6a259 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAddr.cpp
@@ -8,7 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugAddr.h"
#include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
using namespace llvm;
@@ -18,12 +18,10 @@ Error DWARFDebugAddrTable::extractAddresses(const DWARFDataExtractor &Data,
assert(EndOffset >= *OffsetPtr);
uint64_t DataSize = EndOffset - *OffsetPtr;
assert(Data.isValidOffsetForDataOfSize(*OffsetPtr, DataSize));
- if (AddrSize != 4 && AddrSize != 8)
- return createStringError(errc::not_supported,
- "address table at offset 0x%" PRIx64
- " has unsupported address size %" PRIu8
- " (4 and 8 are supported)",
- Offset, AddrSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ AddrSize, errc::not_supported, "address table at offset 0x%" PRIx64,
+ Offset))
+ return SizeErr;
if (DataSize % AddrSize != 0) {
invalidateLength();
return createStringError(errc::invalid_argument,
@@ -148,8 +146,20 @@ void DWARFDebugAddrTable::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
}
if (Addrs.size() > 0) {
- const char *AddrFmt =
- (AddrSize == 4) ? "0x%8.8" PRIx64 "\n" : "0x%16.16" PRIx64 "\n";
+ const char *AddrFmt;
+ switch (AddrSize) {
+ case 2:
+ AddrFmt = "0x%4.4" PRIx64 "\n";
+ break;
+ case 4:
+ AddrFmt = "0x%8.8" PRIx64 "\n";
+ break;
+ case 8:
+ AddrFmt = "0x%16.16" PRIx64 "\n";
+ break;
+ default:
+ llvm_unreachable("unsupported address size");
+ }
OS << "Addrs: [\n";
for (uint64_t Addr : Addrs)
OS << format(AddrFmt, Addr);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
index 598e3ecee30e..c60c9d9d7227 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugArangeSet.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Format.h"
@@ -87,12 +88,10 @@ Error DWARFDebugArangeSet::extract(DWARFDataExtractor data,
"the length of address range table at offset "
"0x%" PRIx64 " exceeds section size",
Offset);
- if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
- return createStringError(errc::invalid_argument,
- "address range table at offset 0x%" PRIx64
- " has unsupported address size: %d "
- "(4 and 8 supported)",
- Offset, HeaderData.AddrSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ HeaderData.AddrSize, errc::invalid_argument,
+ "address range table at offset 0x%" PRIx64, Offset))
+ return SizeErr;
if (HeaderData.SegSize != 0)
return createStringError(errc::not_supported,
"non-zero segment selector size in address range "
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 7ebb0092c34a..385bde51e2e7 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -19,18 +19,11 @@
using namespace llvm;
using namespace dwarf;
-bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
- uint64_t *OffsetPtr) {
- DWARFDataExtractor DebugInfoData = U.getDebugInfoExtractor();
- const uint64_t UEndOffset = U.getNextUnitOffset();
- return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0);
-}
-
bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint64_t *OffsetPtr,
const DWARFDataExtractor &DebugInfoData,
- uint64_t UEndOffset, uint32_t D) {
+ uint64_t UEndOffset, uint32_t ParentIdx) {
Offset = *OffsetPtr;
- Depth = D;
+ this->ParentIdx = ParentIdx;
if (Offset >= UEndOffset) {
U.getContext().getWarningHandler()(
createStringError(errc::invalid_argument,
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
index dc7da5d9348f..cad3dcab8a7e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp
@@ -16,6 +16,12 @@
using namespace llvm;
+bool DWARFDebugRangeList::RangeListEntry::isBaseAddressSelectionEntry(
+ uint8_t AddressSize) const {
+ assert(DWARFContext::isAddressSizeSupported(AddressSize));
+ return StartAddress == dwarf::computeTombstoneAddress(AddressSize);
+}
+
void DWARFDebugRangeList::clear() {
Offset = -1ULL;
AddressSize = 0;
@@ -30,9 +36,10 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
"invalid range list offset 0x%" PRIx64, *offset_ptr);
AddressSize = data.getAddressSize();
- if (AddressSize != 4 && AddressSize != 8)
- return createStringError(errc::invalid_argument,
- "invalid address size: %" PRIu8, AddressSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ AddressSize, errc::invalid_argument,
+ "range list at offset 0x%" PRIx64, *offset_ptr))
+ return SizeErr;
Offset = *offset_ptr;
while (true) {
RangeListEntry Entry;
@@ -58,12 +65,22 @@ Error DWARFDebugRangeList::extract(const DWARFDataExtractor &data,
}
void DWARFDebugRangeList::dump(raw_ostream &OS) const {
- for (const RangeListEntry &RLE : Entries) {
- const char *format_str =
- (AddressSize == 4 ? "%08" PRIx64 " %08" PRIx64 " %08" PRIx64 "\n"
- : "%08" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n");
- OS << format(format_str, Offset, RLE.StartAddress, RLE.EndAddress);
+ const char *AddrFmt;
+ switch (AddressSize) {
+ case 2:
+ AddrFmt = "%08" PRIx64 " %04" PRIx64 " %04" PRIx64 "\n";
+ break;
+ case 4:
+ AddrFmt = "%08" PRIx64 " %08" PRIx64 " %08" PRIx64 "\n";
+ break;
+ case 8:
+ AddrFmt = "%08" PRIx64 " %016" PRIx64 " %016" PRIx64 "\n";
+ break;
+ default:
+ llvm_unreachable("unsupported address size");
}
+ for (const RangeListEntry &RLE : Entries)
+ OS << format(AddrFmt, Offset, RLE.StartAddress, RLE.EndAddress);
OS << format("%08" PRIx64 " <End of list>\n", Offset);
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 0501e3ee3f9b..ed50f2635738 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -108,17 +108,41 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue,
return;
}
-/// Dump the name encoded in the type tag.
-static void dumpTypeTagName(raw_ostream &OS, dwarf::Tag T) {
- StringRef TagStr = TagString(T);
- if (!TagStr.startswith("DW_TAG_") || !TagStr.endswith("_type"))
- return;
- OS << TagStr.substr(7, TagStr.size() - 12) << " ";
+static DWARFDie resolveReferencedType(DWARFDie D,
+ dwarf::Attribute Attr = DW_AT_type) {
+ return D.getAttributeValueAsReferencedDie(Attr).resolveTypeUnitReference();
+}
+static DWARFDie resolveReferencedType(DWARFDie D, DWARFFormValue F) {
+ return D.getAttributeValueAsReferencedDie(F).resolveTypeUnitReference();
}
-static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
- for (const DWARFDie &C : D.children())
- if (C.getTag() == DW_TAG_subrange_type) {
+namespace {
+
+// FIXME: We should have pretty printers per language. Currently we print
+// everything as if it was C++ and fall back to the TAG type name.
+struct DWARFTypePrinter {
+ raw_ostream &OS;
+ bool Word = true;
+ bool EndedWithTemplate = false;
+
+ DWARFTypePrinter(raw_ostream &OS) : OS(OS) {}
+
+ /// Dump the name encoded in the type tag.
+ void appendTypeTagName(dwarf::Tag T) {
+ StringRef TagStr = TagString(T);
+ static constexpr StringRef Prefix = "DW_TAG_";
+ static constexpr StringRef Suffix = "_type";
+ if (!TagStr.startswith(Prefix) || !TagStr.endswith(Suffix))
+ return;
+ OS << TagStr.substr(Prefix.size(),
+ TagStr.size() - (Prefix.size() + Suffix.size()))
+ << " ";
+ }
+
+ void appendArrayType(const DWARFDie &D) {
+ for (const DWARFDie &C : D.children()) {
+ if (C.getTag() != DW_TAG_subrange_type)
+ continue;
Optional<uint64_t> LB;
Optional<uint64_t> Count;
Optional<uint64_t> UB;
@@ -159,79 +183,503 @@ static void dumpArrayType(raw_ostream &OS, const DWARFDie &D) {
OS << ")]";
}
}
-}
-
-/// Recursively dump the DIE type name when applicable.
-static void dumpTypeName(raw_ostream &OS, const DWARFDie &D) {
- if (!D.isValid())
- return;
+ EndedWithTemplate = false;
+ }
- if (const char *Name = D.getName(DINameKind::LinkageName)) {
- OS << Name;
- return;
+ DWARFDie skipQualifiers(DWARFDie D) {
+ while (D && (D.getTag() == DW_TAG_const_type ||
+ D.getTag() == DW_TAG_volatile_type))
+ D = resolveReferencedType(D);
+ return D;
}
- // FIXME: We should have pretty printers per language. Currently we print
- // everything as if it was C++ and fall back to the TAG type name.
- const dwarf::Tag T = D.getTag();
- switch (T) {
- case DW_TAG_array_type:
- case DW_TAG_pointer_type:
- case DW_TAG_ptr_to_member_type:
- case DW_TAG_reference_type:
- case DW_TAG_rvalue_reference_type:
- case DW_TAG_subroutine_type:
- break;
- default:
- dumpTypeTagName(OS, T);
+ bool needsParens(DWARFDie D) {
+ D = skipQualifiers(D);
+ return D && (D.getTag() == DW_TAG_subroutine_type || D.getTag() == DW_TAG_array_type);
}
- // Follow the DW_AT_type if possible.
- DWARFDie TypeDie = D.getAttributeValueAsReferencedDie(DW_AT_type);
- dumpTypeName(OS, TypeDie);
+ void appendPointerLikeTypeBefore(DWARFDie D, DWARFDie Inner, StringRef Ptr) {
+ appendQualifiedNameBefore(Inner);
+ if (Word)
+ OS << ' ';
+ if (needsParens(Inner))
+ OS << '(';
+ OS << Ptr;
+ Word = false;
+ EndedWithTemplate = false;
+ }
- switch (T) {
- case DW_TAG_subroutine_type: {
- if (!TypeDie)
+ DWARFDie
+ appendUnqualifiedNameBefore(DWARFDie D,
+ std::string *OriginalFullName = nullptr) {
+ Word = true;
+ if (!D) {
OS << "void";
+ return DWARFDie();
+ }
+ DWARFDie Inner = resolveReferencedType(D);
+ const dwarf::Tag T = D.getTag();
+ switch (T) {
+ case DW_TAG_pointer_type: {
+ appendPointerLikeTypeBefore(D, Inner, "*");
+ break;
+ }
+ case DW_TAG_subroutine_type: {
+ appendQualifiedNameBefore(Inner);
+ if (Word) {
+ OS << ' ';
+ }
+ Word = false;
+ break;
+ }
+ case DW_TAG_array_type: {
+ appendQualifiedNameBefore(Inner);
+ break;
+ }
+ case DW_TAG_reference_type:
+ appendPointerLikeTypeBefore(D, Inner, "&");
+ break;
+ case DW_TAG_rvalue_reference_type:
+ appendPointerLikeTypeBefore(D, Inner, "&&");
+ break;
+ case DW_TAG_ptr_to_member_type: {
+ appendQualifiedNameBefore(Inner);
+ if (needsParens(Inner))
+ OS << '(';
+ else if (Word)
+ OS << ' ';
+ if (DWARFDie Cont = resolveReferencedType(D, DW_AT_containing_type)) {
+ appendQualifiedName(Cont);
+ OS << "::";
+ }
+ OS << "*";
+ Word = false;
+ break;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ appendConstVolatileQualifierBefore(D);
+ break;
+ case DW_TAG_namespace: {
+ if (const char *Name = dwarf::toString(D.find(DW_AT_name), nullptr))
+ OS << Name;
+ else
+ OS << "(anonymous namespace)";
+ break;
+ }
+ case DW_TAG_unspecified_type: {
+ StringRef TypeName = D.getShortName();
+ if (TypeName == "decltype(nullptr)")
+ TypeName = "std::nullptr_t";
+ Word = true;
+ OS << TypeName;
+ EndedWithTemplate = false;
+ break;
+ }
+ /*
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_base_type:
+ */
+ default: {
+ const char *NamePtr = dwarf::toString(D.find(DW_AT_name), nullptr);
+ if (!NamePtr) {
+ appendTypeTagName(D.getTag());
+ return Inner;
+ }
+ Word = true;
+ StringRef Name = NamePtr;
+ static constexpr StringRef MangledPrefix = "_STN";
+ if (Name.startswith(MangledPrefix)) {
+ Name = Name.drop_front(MangledPrefix.size());
+ auto Separator = Name.find('|');
+ assert(Separator != StringRef::npos);
+ StringRef BaseName = Name.substr(0, Separator);
+ StringRef TemplateArgs = Name.substr(Separator + 1);
+ if (OriginalFullName)
+ *OriginalFullName = (BaseName + TemplateArgs).str();
+ Name = BaseName;
+ } else
+ EndedWithTemplate = Name.endswith(">");
+ OS << Name;
+ // This check would be insufficient for operator overloads like
+ // "operator>>" - but for now Clang doesn't try to simplify them, so this
+ // is OK. Add more nuanced operator overload handling here if/when needed.
+ if (Name.endswith(">"))
+ break;
+ if (!appendTemplateParameters(D))
+ break;
+
+ if (EndedWithTemplate)
+ OS << ' ';
+ OS << '>';
+ EndedWithTemplate = true;
+ Word = true;
+ break;
+ }
+ }
+ return Inner;
+ }
+
+ void appendUnqualifiedNameAfter(DWARFDie D, DWARFDie Inner,
+ bool SkipFirstParamIfArtificial = false) {
+ if (!D)
+ return;
+ switch (D.getTag()) {
+ case DW_TAG_subroutine_type: {
+ appendSubroutineNameAfter(D, Inner, SkipFirstParamIfArtificial, false,
+ false);
+ break;
+ }
+ case DW_TAG_array_type: {
+ appendArrayType(D);
+ break;
+ }
+ case DW_TAG_const_type:
+ case DW_TAG_volatile_type:
+ appendConstVolatileQualifierAfter(D);
+ break;
+ case DW_TAG_ptr_to_member_type:
+ case DW_TAG_reference_type:
+ case DW_TAG_rvalue_reference_type:
+ case DW_TAG_pointer_type: {
+ if (needsParens(Inner))
+ OS << ')';
+ appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner),
+ /*SkipFirstParamIfArtificial=*/D.getTag() ==
+ DW_TAG_ptr_to_member_type);
+ break;
+ }
+ /*
+ case DW_TAG_structure_type:
+ case DW_TAG_class_type:
+ case DW_TAG_enumeration_type:
+ case DW_TAG_base_type:
+ case DW_TAG_namespace:
+ */
+ default:
+ break;
+ }
+ }
+
+ void appendQualifiedName(DWARFDie D) {
+ if (D)
+ appendScopes(D.getParent());
+ appendUnqualifiedName(D);
+ }
+ DWARFDie appendQualifiedNameBefore(DWARFDie D) {
+ if (D)
+ appendScopes(D.getParent());
+ return appendUnqualifiedNameBefore(D);
+ }
+ bool appendTemplateParameters(DWARFDie D, bool *FirstParameter = nullptr) {
+ bool FirstParameterValue = true;
+ bool IsTemplate = false;
+ if (!FirstParameter)
+ FirstParameter = &FirstParameterValue;
+ for (const DWARFDie &C : D) {
+ auto Sep = [&] {
+ if (*FirstParameter)
+ OS << '<';
+ else
+ OS << ", ";
+ IsTemplate = true;
+ EndedWithTemplate = false;
+ *FirstParameter = false;
+ };
+ if (C.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
+ IsTemplate = true;
+ appendTemplateParameters(C, FirstParameter);
+ }
+ if (C.getTag() == dwarf::DW_TAG_template_value_parameter) {
+ DWARFDie T = resolveReferencedType(C);
+ Sep();
+ if (T.getTag() == DW_TAG_enumeration_type) {
+ auto V = C.find(DW_AT_const_value);
+ bool FoundEnumerator = false;
+ for (const DWARFDie &Enumerator : T) {
+ auto EV = Enumerator.find(DW_AT_const_value);
+ if (V && EV &&
+ V->getAsSignedConstant() == EV->getAsSignedConstant()) {
+ if (T.find(DW_AT_enum_class)) {
+ appendQualifiedName(T);
+ OS << "::";
+ } else
+ appendScopes(T.getParent());
+ OS << Enumerator.getShortName();
+ FoundEnumerator = true;
+ break;
+ }
+ }
+ if (FoundEnumerator)
+ continue;
+ OS << '(';
+ appendQualifiedName(T);
+ OS << ')';
+ OS << to_string(*V->getAsSignedConstant());
+ continue;
+ }
+ // /Maybe/ we could do pointer type parameters, looking for the
+ // symbol in the ELF symbol table to get back to the variable...
+ // but probably not worth it.
+ if (T.getTag() == DW_TAG_pointer_type)
+ continue;
+ const char *RawName = dwarf::toString(T.find(DW_AT_name), nullptr);
+ assert(RawName);
+ StringRef Name = RawName;
+ auto V = C.find(DW_AT_const_value);
+ bool IsQualifiedChar = false;
+ if (Name == "bool") {
+ OS << (*V->getAsUnsignedConstant() ? "true" : "false");
+ } else if (Name == "short") {
+ OS << "(short)";
+ OS << to_string(*V->getAsSignedConstant());
+ } else if (Name == "unsigned short") {
+ OS << "(unsigned short)";
+ OS << to_string(*V->getAsSignedConstant());
+ } else if (Name == "int")
+ OS << to_string(*V->getAsSignedConstant());
+ else if (Name == "long") {
+ OS << to_string(*V->getAsSignedConstant());
+ OS << "L";
+ } else if (Name == "long long") {
+ OS << to_string(*V->getAsSignedConstant());
+ OS << "LL";
+ } else if (Name == "unsigned int") {
+ OS << to_string(*V->getAsUnsignedConstant());
+ OS << "U";
+ } else if (Name == "unsigned long") {
+ OS << to_string(*V->getAsUnsignedConstant());
+ OS << "UL";
+ } else if (Name == "unsigned long long") {
+ OS << to_string(*V->getAsUnsignedConstant());
+ OS << "ULL";
+ } else if (Name == "char" ||
+ (IsQualifiedChar =
+ (Name == "unsigned char" || Name == "signed char"))) {
+ // FIXME: check T's DW_AT_type to see if it's signed or not (since
+ // char signedness is implementation defined).
+ auto Val = *V->getAsSignedConstant();
+ // Copied/hacked up from Clang's CharacterLiteral::print - incomplete
+ // (doesn't actually support different character types/widths, sign
+ // handling's not done, and doesn't correctly test if a character is
+ // printable or needs to use a numeric escape sequence instead)
+ if (IsQualifiedChar) {
+ OS << '(';
+ OS << Name;
+ OS << ')';
+ }
+ switch (Val) {
+ case '\\':
+ OS << "'\\\\'";
+ break;
+ case '\'':
+ OS << "'\\''";
+ break;
+ case '\a':
+ // TODO: K&R: the meaning of '\\a' is different in traditional C
+ OS << "'\\a'";
+ break;
+ case '\b':
+ OS << "'\\b'";
+ break;
+ case '\f':
+ OS << "'\\f'";
+ break;
+ case '\n':
+ OS << "'\\n'";
+ break;
+ case '\r':
+ OS << "'\\r'";
+ break;
+ case '\t':
+ OS << "'\\t'";
+ break;
+ case '\v':
+ OS << "'\\v'";
+ break;
+ default:
+ if ((Val & ~0xFFu) == ~0xFFu)
+ Val &= 0xFFu;
+ if (Val < 127 && Val >= 32) {
+ OS << "'";
+ OS << (char)Val;
+ OS << "'";
+ } else if (Val < 256)
+ OS << to_string(llvm::format("'\\x%02x'", Val));
+ else if (Val <= 0xFFFF)
+ OS << to_string(llvm::format("'\\u%04x'", Val));
+ else
+ OS << to_string(llvm::format("'\\U%08x'", Val));
+ }
+ }
+ continue;
+ }
+ if (C.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
+ const char *RawName =
+ dwarf::toString(C.find(DW_AT_GNU_template_name), nullptr);
+ assert(RawName);
+ StringRef Name = RawName;
+ Sep();
+ OS << Name;
+ continue;
+ }
+ if (C.getTag() != dwarf::DW_TAG_template_type_parameter)
+ continue;
+ auto TypeAttr = C.find(DW_AT_type);
+ Sep();
+ appendQualifiedName(TypeAttr ? resolveReferencedType(C, *TypeAttr)
+ : DWARFDie());
+ }
+ if (IsTemplate && *FirstParameter && FirstParameter == &FirstParameterValue)
+ OS << '<';
+ return IsTemplate;
+ }
+ void decomposeConstVolatile(DWARFDie &N, DWARFDie &T, DWARFDie &C,
+ DWARFDie &V) {
+ (N.getTag() == DW_TAG_const_type ? C : V) = N;
+ T = resolveReferencedType(N);
+ if (T) {
+ auto Tag = T.getTag();
+ if (Tag == DW_TAG_const_type) {
+ C = T;
+ T = resolveReferencedType(T);
+ } else if (Tag == DW_TAG_volatile_type) {
+ V = T;
+ T = resolveReferencedType(T);
+ }
+ }
+ }
+ void appendConstVolatileQualifierAfter(DWARFDie N) {
+ DWARFDie C;
+ DWARFDie V;
+ DWARFDie T;
+ decomposeConstVolatile(N, T, C, V);
+ if (T && T.getTag() == DW_TAG_subroutine_type)
+ appendSubroutineNameAfter(T, resolveReferencedType(T), false, C.isValid(),
+ V.isValid());
+ else
+ appendUnqualifiedNameAfter(T, resolveReferencedType(T));
+ }
+ void appendConstVolatileQualifierBefore(DWARFDie N) {
+ DWARFDie C;
+ DWARFDie V;
+ DWARFDie T;
+ decomposeConstVolatile(N, T, C, V);
+ bool Subroutine = T && T.getTag() == DW_TAG_subroutine_type;
+ DWARFDie A = T;
+ while (A && A.getTag() == DW_TAG_array_type)
+ A = resolveReferencedType(A);
+ bool Leading =
+ (!A || (A.getTag() != DW_TAG_pointer_type &&
+ A.getTag() != llvm::dwarf::DW_TAG_ptr_to_member_type)) &&
+ !Subroutine;
+ if (Leading) {
+ if (C)
+ OS << "const ";
+ if (V)
+ OS << "volatile ";
+ }
+ appendQualifiedNameBefore(T);
+ if (!Leading && !Subroutine) {
+ Word = true;
+ if (C)
+ OS << "const";
+ if (V) {
+ if (C)
+ OS << ' ';
+ OS << "volatile";
+ }
+ }
+ }
+
+ /// Recursively append the DIE type name when applicable.
+ void appendUnqualifiedName(DWARFDie D,
+ std::string *OriginalFullName = nullptr) {
+ // FIXME: We should have pretty printers per language. Currently we print
+ // everything as if it was C++ and fall back to the TAG type name.
+ DWARFDie Inner = appendUnqualifiedNameBefore(D, OriginalFullName);
+ appendUnqualifiedNameAfter(D, Inner);
+ }
+
+ void appendSubroutineNameAfter(DWARFDie D, DWARFDie Inner,
+ bool SkipFirstParamIfArtificial, bool Const,
+ bool Volatile) {
+ DWARFDie FirstParamIfArtificial;
OS << '(';
+ EndedWithTemplate = false;
bool First = true;
- for (const DWARFDie &C : D.children()) {
- if (C.getTag() == DW_TAG_formal_parameter) {
- if (!First)
- OS << ", ";
- First = false;
- dumpTypeName(OS, C.getAttributeValueAsReferencedDie(DW_AT_type));
+ bool RealFirst = true;
+ for (DWARFDie P : D) {
+ if (P.getTag() != DW_TAG_formal_parameter)
+ return;
+ DWARFDie T = resolveReferencedType(P);
+ if (SkipFirstParamIfArtificial && RealFirst && P.find(DW_AT_artificial)) {
+ FirstParamIfArtificial = T;
+ RealFirst = false;
+ continue;
}
+ if (!First) {
+ OS << ", ";
+ }
+ First = false;
+ appendQualifiedName(T);
}
+ EndedWithTemplate = false;
OS << ')';
- break;
- }
- case DW_TAG_array_type: {
- dumpArrayType(OS, D);
- break;
- }
- case DW_TAG_pointer_type:
- OS << '*';
- break;
- case DW_TAG_ptr_to_member_type:
- if (DWARFDie Cont =
- D.getAttributeValueAsReferencedDie(DW_AT_containing_type)) {
- dumpTypeName(OS << ' ', Cont);
- OS << "::";
+ if (FirstParamIfArtificial) {
+ if (DWARFDie P = FirstParamIfArtificial) {
+ if (P.getTag() == DW_TAG_pointer_type) {
+ DWARFDie C;
+ DWARFDie V;
+ auto CVStep = [&](DWARFDie CV) {
+ if (DWARFDie U = resolveReferencedType(CV)) {
+ if (U.getTag() == DW_TAG_const_type)
+ return C = U;
+ if (U.getTag() == DW_TAG_volatile_type)
+ return V = U;
+ }
+ return DWARFDie();
+ };
+ if (DWARFDie CV = CVStep(P)) {
+ CVStep(CV);
+ }
+ if (C)
+ OS << " const";
+ if (V)
+ OS << " volatile";
+ }
+ }
+ } else {
+ if (Const)
+ OS << " const";
+ if (Volatile)
+ OS << " volatile";
}
- OS << '*';
- break;
- case DW_TAG_reference_type:
- OS << '&';
- break;
- case DW_TAG_rvalue_reference_type:
- OS << "&&";
- break;
- default:
- break;
+ if (D.find(DW_AT_reference))
+ OS << " &";
+ if (D.find(DW_AT_rvalue_reference))
+ OS << " &&";
+ appendUnqualifiedNameAfter(Inner, resolveReferencedType(Inner));
}
-}
+ void appendScopes(DWARFDie D) {
+ if (D.getTag() == DW_TAG_compile_unit)
+ return;
+ if (D.getTag() == DW_TAG_type_unit)
+ return;
+ if (D.getTag() == DW_TAG_skeleton_unit)
+ return;
+ if (D.getTag() == DW_TAG_subprogram)
+ return;
+ D = D.resolveTypeUnitReference();
+ if (DWARFDie P = D.getParent())
+ appendScopes(P);
+ appendUnqualifiedName(D);
+ OS << "::";
+ }
+};
+} // anonymous namespace
static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
const DWARFAttribute &AttrValue, unsigned Indent,
@@ -316,9 +764,12 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
DINameKind::LinkageName))
OS << Space << "\"" << Name << '\"';
} else if (Attr == DW_AT_type) {
- OS << Space << "\"";
- dumpTypeName(OS, Die.getAttributeValueAsReferencedDie(FormValue));
- OS << '"';
+ DWARFDie D = resolveReferencedType(Die, FormValue);
+ if (D && !D.isNULL()) {
+ OS << Space << "\"";
+ DWARFTypePrinter(OS).appendQualifiedName(D);
+ OS << '"';
+ }
} else if (Attr == DW_AT_APPLE_property_attribute) {
if (Optional<uint64_t> OptVal = FormValue.getAsUnsignedConstant())
dumpApplePropertyAttribute(OS, *OptVal);
@@ -345,6 +796,14 @@ static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
OS << ")\n";
}
+void DWARFDie::getFullName(raw_string_ostream &OS,
+ std::string *OriginalFullName) const {
+ const char *NamePtr = getShortName();
+ if (!NamePtr)
+ return;
+ DWARFTypePrinter(OS).appendUnqualifiedName(*this, OriginalFullName);
+}
+
bool DWARFDie::isSubprogramDIE() const { return getTag() == DW_TAG_subprogram; }
bool DWARFDie::isSubroutineDIE() const {
@@ -417,13 +876,27 @@ DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
DWARFDie
DWARFDie::getAttributeValueAsReferencedDie(const DWARFFormValue &V) const {
+ DWARFDie Result;
if (auto SpecRef = V.getAsRelativeReference()) {
if (SpecRef->Unit)
- return SpecRef->Unit->getDIEForOffset(SpecRef->Unit->getOffset() + SpecRef->Offset);
- if (auto SpecUnit = U->getUnitVector().getUnitForOffset(SpecRef->Offset))
- return SpecUnit->getDIEForOffset(SpecRef->Offset);
+ Result = SpecRef->Unit->getDIEForOffset(SpecRef->Unit->getOffset() +
+ SpecRef->Offset);
+ else if (auto SpecUnit =
+ U->getUnitVector().getUnitForOffset(SpecRef->Offset))
+ Result = SpecUnit->getDIEForOffset(SpecRef->Offset);
}
- return DWARFDie();
+ return Result;
+}
+
+DWARFDie DWARFDie::resolveTypeUnitReference() const {
+ if (auto Attr = find(DW_AT_signature)) {
+ if (Optional<uint64_t> Sig = Attr->getAsReferenceUVal()) {
+ if (DWARFTypeUnit *TU = U->getContext().getTypeUnitForHash(
+ U->getVersion(), *Sig, U->isDWOUnit()))
+ return TU->getDIEForOffset(TU->getTypeOffset() + TU->getOffset());
+ }
+ }
+ return *this;
}
Optional<uint64_t> DWARFDie::getRangesBaseAttribute() const {
@@ -483,21 +956,6 @@ Expected<DWARFAddressRangesVector> DWARFDie::getAddressRanges() const {
return DWARFAddressRangesVector();
}
-void DWARFDie::collectChildrenAddressRanges(
- DWARFAddressRangesVector &Ranges) const {
- if (isNULL())
- return;
- if (isSubprogramDIE()) {
- if (auto DIERangesOrError = getAddressRanges())
- llvm::append_range(Ranges, DIERangesOrError.get());
- else
- llvm::consumeError(DIERangesOrError.takeError());
- }
-
- for (auto Child : children())
- Child.collectChildrenAddressRanges(Ranges);
-}
-
bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const {
auto RangesOrError = getAddressRanges();
if (!RangesOrError) {
@@ -581,18 +1039,10 @@ uint64_t DWARFDie::getDeclLine() const {
std::string
DWARFDie::getDeclFile(DILineInfoSpecifier::FileLineInfoKind Kind) const {
- auto D = getAttributeValueAsReferencedDie(DW_AT_abstract_origin);
- if (!D)
- D = *this;
- std::string FileName;
- if (auto DeclFile = toUnsigned(D.find(DW_AT_decl_file))) {
- if (const auto *LineTable =
- getDwarfUnit()->getContext().getLineTableForUnit(
- D.getDwarfUnit()->getLinkedUnit()))
- LineTable->getFileNameByIndex(
- *DeclFile, D.getDwarfUnit()->getCompilationDir(), Kind, FileName);
- }
- return FileName;
+ if (auto FormValue = findRecursively(DW_AT_decl_file))
+ if (auto OptString = FormValue->getAsFile(Kind))
+ return *OptString;
+ return {};
}
void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
@@ -641,9 +1091,13 @@ void DWARFDie::dump(raw_ostream &OS, unsigned Indent,
if (AbbrevDecl) {
WithColor(OS, HighlightColor::Tag).get().indent(Indent)
<< formatv("{0}", getTag());
- if (DumpOpts.Verbose)
+ if (DumpOpts.Verbose) {
OS << format(" [%u] %c", abbrCode,
AbbrevDecl->hasChildren() ? '*' : ' ');
+ if (Optional<uint32_t> ParentIdx = Die->getParentIdx())
+ OS << format(" (0x%8.8" PRIx64 ")",
+ U->getDIEAtIndex(*ParentIdx).getOffset());
+ }
OS << '\n';
// Dump all data in the DIE for the attributes.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 4b9be85f6885..d0fbd702e831 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -207,7 +207,8 @@ bool DWARFExpression::Operation::extract(DataExtractor Data,
}
static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
- DIDumpOptions DumpOpts, uint64_t Operands[2],
+ DIDumpOptions DumpOpts,
+ const uint64_t Operands[2],
unsigned Operand) {
assert(Operand < 2 && "operand out of bounds");
auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
@@ -226,7 +227,7 @@ static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
DIDumpOptions DumpOpts, uint8_t Opcode,
- uint64_t Operands[2],
+ const uint64_t Operands[2],
const MCRegisterInfo *MRI, bool isEH) {
if (!MRI)
return false;
@@ -262,7 +263,7 @@ static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts,
const DWARFExpression *Expr,
const MCRegisterInfo *RegInfo,
- DWARFUnit *U, bool isEH) {
+ DWARFUnit *U, bool isEH) const {
if (Error) {
OS << "<decoding error>";
return false;
@@ -356,10 +357,9 @@ void DWARFExpression::print(raw_ostream &OS, DIDumpOptions DumpOpts,
}
}
-bool DWARFExpression::Operation::verify(DWARFUnit *U) {
-
+bool DWARFExpression::Operation::verify(const Operation &Op, DWARFUnit *U) {
for (unsigned Operand = 0; Operand < 2; ++Operand) {
- unsigned Size = Desc.Op[Operand];
+ unsigned Size = Op.Desc.Op[Operand];
if (Size == Operation::SizeNA)
break;
@@ -369,13 +369,11 @@ bool DWARFExpression::Operation::verify(DWARFUnit *U) {
// the generic type should be done, so don't look up a base type in that
// case. The same holds for DW_OP_reinterpret, which is currently not
// supported.
- if (Opcode == DW_OP_convert && Operands[Operand] == 0)
+ if (Op.Opcode == DW_OP_convert && Op.Operands[Operand] == 0)
continue;
- auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
- if (!Die || Die.getTag() != dwarf::DW_TAG_base_type) {
- Error = true;
+ auto Die = U->getDIEForOffset(U->getOffset() + Op.Operands[Operand]);
+ if (!Die || Die.getTag() != dwarf::DW_TAG_base_type)
return false;
- }
}
}
@@ -384,7 +382,7 @@ bool DWARFExpression::Operation::verify(DWARFUnit *U) {
bool DWARFExpression::verify(DWARFUnit *U) {
for (auto &Op : *this)
- if (!Op.verify(U))
+ if (!Operation::verify(Op, U))
return false;
return true;
@@ -410,7 +408,7 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I,
SmallVector<PrintedExpr, 4> Stack;
while (I != E) {
- DWARFExpression::Operation &Op = *I;
+ const DWARFExpression::Operation &Op = *I;
uint8_t Opcode = Op.getCode();
switch (Opcode) {
case dwarf::DW_OP_regx: {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 2244a69bc121..cea0f63bbf81 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -332,7 +332,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
break;
case DW_FORM_LLVM_addrx_offset:
Value.uval = Data.getULEB128(OffsetPtr, &Err) << 32;
- Value.uval = Data.getU32(OffsetPtr, &Err);
+ Value.uval |= Data.getU32(OffsetPtr, &Err);
break;
case DW_FORM_string:
Value.cstr = Data.getCStr(OffsetPtr, &Err);
@@ -690,7 +690,7 @@ Optional<uint64_t> DWARFFormValue::getAsReference() const {
return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset;
return None;
}
-
+
Optional<DWARFFormValue::UnitOffset> DWARFFormValue::getAsRelativeReference() const {
if (!isFormClass(FC_Reference))
return None;
@@ -762,3 +762,17 @@ Optional<uint64_t> DWARFFormValue::getAsReferenceUVal() const {
return None;
return Value.uval;
}
+
+Optional<std::string>
+DWARFFormValue::getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const {
+ if (U == nullptr || !isFormClass(FC_Constant))
+ return None;
+ DWARFUnit *DLU = const_cast<DWARFUnit *>(U)->getLinkedUnit();
+ if (auto *LT = DLU->getContext().getLineTableForUnit(DLU)) {
+ std::string FileName;
+ if (LT->getFileNameByIndex(Value.uval, DLU->getCompilationDir(), Kind,
+ FileName))
+ return FileName;
+ }
+ return None;
+}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
index c876af1e9b51..b73dda3ff9ce 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/DWARF/DWARFListTable.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
@@ -54,11 +55,10 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
"unrecognised %s table version %" PRIu16
" in table at offset 0x%" PRIx64,
SectionName.data(), HeaderData.Version, HeaderOffset);
- if (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)
- return createStringError(errc::not_supported,
- "%s table at offset 0x%" PRIx64
- " has unsupported address size %" PRIu8,
- SectionName.data(), HeaderOffset, HeaderData.AddrSize);
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ HeaderData.AddrSize, errc::not_supported,
+ "%s table at offset 0x%" PRIx64, SectionName.data(), HeaderOffset))
+ return SizeErr;
if (HeaderData.SegSize != 0)
return createStringError(errc::not_supported,
"%s table at offset 0x%" PRIx64
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index f17dacfce665..82c34f537036 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -315,15 +315,10 @@ bool DWARFUnitHeader::extract(DWARFContext &Context,
return false;
}
- if (!DWARFContext::isAddressSizeSupported(getAddressByteSize())) {
- SmallVector<std::string, 3> Sizes;
- for (auto Size : DWARFContext::getSupportedAddressSizes())
- Sizes.push_back(std::to_string(Size));
- Context.getWarningHandler()(createStringError(
- errc::invalid_argument,
- "DWARF unit at offset 0x%8.8" PRIx64 " "
- "has unsupported address size %" PRIu8 ", supported are %s",
- Offset, getAddressByteSize(), llvm::join(Sizes, ", ").c_str()));
+ if (Error SizeErr = DWARFContext::checkAddressSizeSupported(
+ getAddressByteSize(), errc::invalid_argument,
+ "DWARF unit at offset 0x%8.8" PRIx64, Offset)) {
+ Context.getWarningHandler()(std::move(SizeErr));
return false;
}
@@ -349,29 +344,6 @@ bool DWARFUnitHeader::applyIndexEntry(const DWARFUnitIndex::Entry *Entry) {
return true;
}
-// Parse the rangelist table header, including the optional array of offsets
-// following it (DWARF v5 and later).
-template<typename ListTableType>
-static Expected<ListTableType>
-parseListTableHeader(DWARFDataExtractor &DA, uint64_t Offset,
- DwarfFormat Format) {
- // We are expected to be called with Offset 0 or pointing just past the table
- // header. Correct Offset in the latter case so that it points to the start
- // of the header.
- if (Offset > 0) {
- uint64_t HeaderSize = DWARFListTableHeader::getHeaderSize(Format);
- if (Offset < HeaderSize)
- return createStringError(errc::invalid_argument, "did not detect a valid"
- " list table with base = 0x%" PRIx64 "\n",
- Offset);
- Offset -= HeaderSize;
- }
- ListTableType Table;
- if (Error E = Table.extractHeaderAndOffsets(DA, &Offset))
- return std::move(E);
- return Table;
-}
-
Error DWARFUnit::extractRangeList(uint64_t RangeListOffset,
DWARFDebugRangeList &RangeList) const {
// Require that compile unit is extracted.
@@ -411,11 +383,39 @@ void DWARFUnit::extractDIEsToVector(
DWARFDataExtractor DebugInfoData = getDebugInfoExtractor();
// The end offset has been already checked by DWARFUnitHeader::extract.
assert(DebugInfoData.isValidOffset(NextCUOffset - 1));
- uint32_t Depth = 0;
+ std::vector<uint32_t> Parents;
+ std::vector<uint32_t> PrevSiblings;
bool IsCUDie = true;
- while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset,
- Depth)) {
+ assert(
+ ((AppendCUDie && Dies.empty()) || (!AppendCUDie && Dies.size() == 1)) &&
+ "Dies array is not empty");
+
+ // Fill Parents and Siblings stacks with initial value.
+ Parents.push_back(UINT32_MAX);
+ if (!AppendCUDie)
+ Parents.push_back(0);
+ PrevSiblings.push_back(0);
+
+ // Start to extract dies.
+ do {
+ assert(Parents.size() > 0 && "Empty parents stack");
+ assert((Parents.back() == UINT32_MAX || Parents.back() <= Dies.size()) &&
+ "Wrong parent index");
+
+ // Extract die. Stop if any error occured.
+ if (!DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset,
+ Parents.back()))
+ break;
+
+ // If previous sibling is remembered then update it`s SiblingIdx field.
+ if (PrevSiblings.back() > 0) {
+ assert(PrevSiblings.back() < Dies.size() &&
+ "Previous sibling index is out of Dies boundaries");
+ Dies[PrevSiblings.back()].setSiblingIdx(Dies.size());
+ }
+
+ // Store die into the Dies vector.
if (IsCUDie) {
if (AppendCUDie)
Dies.push_back(DIE);
@@ -425,26 +425,36 @@ void DWARFUnit::extractDIEsToVector(
// around 14-20 so let's pre-reserve the needed memory for
// our DIE entries accordingly.
Dies.reserve(Dies.size() + getDebugInfoSize() / 14);
- IsCUDie = false;
} else {
+ // Remember last previous sibling.
+ PrevSiblings.back() = Dies.size();
+
Dies.push_back(DIE);
}
+ // Check for new children scope.
if (const DWARFAbbreviationDeclaration *AbbrDecl =
DIE.getAbbreviationDeclarationPtr()) {
- // Normal DIE
- if (AbbrDecl->hasChildren())
- ++Depth;
- else if (Depth == 0)
- break; // This unit has a single DIE with no children.
+ if (AbbrDecl->hasChildren()) {
+ if (AppendCUDie || !IsCUDie) {
+ assert(Dies.size() > 0 && "Dies does not contain any die");
+ Parents.push_back(Dies.size() - 1);
+ PrevSiblings.push_back(0);
+ }
+ } else if (IsCUDie)
+ // Stop if we have single compile unit die w/o children.
+ break;
} else {
- // NULL DIE.
- if (Depth > 0)
- --Depth;
- if (Depth == 0)
- break; // We are done with this compile unit!
+ // NULL DIE: finishes current children scope.
+ Parents.pop_back();
+ PrevSiblings.pop_back();
}
- }
+
+ if (IsCUDie)
+ IsCUDie = false;
+
+ // Stop when compile unit die is removed from the parents stack.
+ } while (Parents.size() > 1);
}
void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
@@ -600,10 +610,14 @@ bool DWARFUnit::parseDWO() {
}
void DWARFUnit::clearDIEs(bool KeepCUDie) {
- if (DieArray.size() > (unsigned)KeepCUDie) {
- DieArray.resize((unsigned)KeepCUDie);
- DieArray.shrink_to_fit();
- }
+ // Do not use resize() + shrink_to_fit() to free memory occupied by dies.
+ // shrink_to_fit() is a *non-binding* request to reduce capacity() to size().
+ // It depends on the implementation whether the request is fulfilled.
+ // Create a new vector with a small capacity and assign it to the DieArray to
+ // have previous contents freed.
+ DieArray = (KeepCUDie && !DieArray.empty())
+ ? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
+ : std::vector<DWARFDebugInfoEntry>();
}
Expected<DWARFAddressRangesVector>
@@ -750,65 +764,65 @@ const DWARFUnitIndex &llvm::getDWARFUnitIndex(DWARFContext &Context,
DWARFDie DWARFUnit::getParent(const DWARFDebugInfoEntry *Die) {
if (!Die)
return DWARFDie();
- const uint32_t Depth = Die->getDepth();
- // Unit DIEs always have a depth of zero and never have parents.
- if (Depth == 0)
- return DWARFDie();
- // Depth of 1 always means parent is the compile/type unit.
- if (Depth == 1)
- return getUnitDIE();
- // Look for previous DIE with a depth that is one less than the Die's depth.
- const uint32_t ParentDepth = Depth - 1;
- for (uint32_t I = getDIEIndex(Die) - 1; I > 0; --I) {
- if (DieArray[I].getDepth() == ParentDepth)
- return DWARFDie(this, &DieArray[I]);
+
+ if (Optional<uint32_t> ParentIdx = Die->getParentIdx()) {
+ assert(*ParentIdx < DieArray.size() &&
+ "ParentIdx is out of DieArray boundaries");
+ return DWARFDie(this, &DieArray[*ParentIdx]);
}
+
return DWARFDie();
}
DWARFDie DWARFUnit::getSibling(const DWARFDebugInfoEntry *Die) {
if (!Die)
return DWARFDie();
- uint32_t Depth = Die->getDepth();
- // Unit DIEs always have a depth of zero and never have siblings.
- if (Depth == 0)
- return DWARFDie();
- // NULL DIEs don't have siblings.
- if (Die->getAbbreviationDeclarationPtr() == nullptr)
- return DWARFDie();
- // Find the next DIE whose depth is the same as the Die's depth.
- for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx;
- ++I) {
- if (DieArray[I].getDepth() == Depth)
- return DWARFDie(this, &DieArray[I]);
+ if (Optional<uint32_t> SiblingIdx = Die->getSiblingIdx()) {
+ assert(*SiblingIdx < DieArray.size() &&
+ "SiblingIdx is out of DieArray boundaries");
+ return DWARFDie(this, &DieArray[*SiblingIdx]);
}
+
return DWARFDie();
}
DWARFDie DWARFUnit::getPreviousSibling(const DWARFDebugInfoEntry *Die) {
if (!Die)
return DWARFDie();
- uint32_t Depth = Die->getDepth();
- // Unit DIEs always have a depth of zero and never have siblings.
- if (Depth == 0)
+
+ Optional<uint32_t> ParentIdx = Die->getParentIdx();
+ if (!ParentIdx)
+ // Die is a root die, there is no previous sibling.
return DWARFDie();
- // Find the previous DIE whose depth is the same as the Die's depth.
- for (size_t I = getDIEIndex(Die); I > 0;) {
- --I;
- if (DieArray[I].getDepth() == Depth - 1)
- return DWARFDie();
- if (DieArray[I].getDepth() == Depth)
- return DWARFDie(this, &DieArray[I]);
+ assert(*ParentIdx < DieArray.size() &&
+ "ParentIdx is out of DieArray boundaries");
+ assert(getDIEIndex(Die) > 0 && "Die is a root die");
+
+ uint32_t PrevDieIdx = getDIEIndex(Die) - 1;
+ if (PrevDieIdx == *ParentIdx)
+ // Immediately previous node is parent, there is no previous sibling.
+ return DWARFDie();
+
+ while (DieArray[PrevDieIdx].getParentIdx() != *ParentIdx) {
+ PrevDieIdx = *DieArray[PrevDieIdx].getParentIdx();
+
+ assert(PrevDieIdx < DieArray.size() &&
+ "PrevDieIdx is out of DieArray boundaries");
+ assert(PrevDieIdx >= *ParentIdx &&
+ "PrevDieIdx is not a child of parent of Die");
}
- return DWARFDie();
+
+ return DWARFDie(this, &DieArray[PrevDieIdx]);
}
DWARFDie DWARFUnit::getFirstChild(const DWARFDebugInfoEntry *Die) {
if (!Die->hasChildren())
return DWARFDie();
+ // TODO: Instead of checking here for invalid die we might reject
+ // invalid dies at parsing stage(DWARFUnit::extractDIEsToVector).
// We do not want access out of bounds when parsing corrupted debug data.
size_t I = getDIEIndex(Die) + 1;
if (I >= DieArray.size())
@@ -820,14 +834,30 @@ DWARFDie DWARFUnit::getLastChild(const DWARFDebugInfoEntry *Die) {
if (!Die->hasChildren())
return DWARFDie();
- uint32_t Depth = Die->getDepth();
- for (size_t I = getDIEIndex(Die) + 1, EndIdx = DieArray.size(); I < EndIdx;
- ++I) {
- if (DieArray[I].getDepth() == Depth + 1 &&
- DieArray[I].getTag() == dwarf::DW_TAG_null)
- return DWARFDie(this, &DieArray[I]);
- assert(DieArray[I].getDepth() > Depth && "Not processing children?");
+ if (Optional<uint32_t> SiblingIdx = Die->getSiblingIdx()) {
+ assert(*SiblingIdx < DieArray.size() &&
+ "SiblingIdx is out of DieArray boundaries");
+ assert(DieArray[*SiblingIdx - 1].getTag() == dwarf::DW_TAG_null &&
+ "Bad end of children marker");
+ return DWARFDie(this, &DieArray[*SiblingIdx - 1]);
+ }
+
+ // If SiblingIdx is set for non-root dies we could be sure that DWARF is
+ // correct and "end of children marker" must be found. For root die we do not
+ // have such a guarantee(parsing root die might be stopped if "end of children
+ // marker" is missing, SiblingIdx is always zero for root die). That is why we
+ // do not use assertion for checking for "end of children marker" for root
+ // die.
+
+ // TODO: Instead of checking here for invalid die we might reject
+ // invalid dies at parsing stage(DWARFUnit::extractDIEsToVector).
+ if (getDIEIndex(Die) == 0 && DieArray.size() > 1 &&
+ DieArray.back().getTag() == dwarf::DW_TAG_null) {
+ // For the unit die we might take last item from DieArray.
+ assert(getDIEIndex(Die) == getDIEIndex(getUnitDIE()) && "Bad unit die");
+ return DWARFDie(this, &DieArray.back());
}
+
return DWARFDie();
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index ac624ec8b80f..dcabefb9896e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFVerifier.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
@@ -50,6 +51,9 @@ DWARFVerifier::DieRangeInfo::insert(const DWARFAddressRange &R) {
DWARFVerifier::DieRangeInfo::die_range_info_iterator
DWARFVerifier::DieRangeInfo::insert(const DieRangeInfo &RI) {
+ if (RI.Ranges.empty())
+ return Children.end();
+
auto End = Children.end();
auto Iter = Children.begin();
while (Iter != End) {
@@ -158,7 +162,30 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
return Success;
}
-unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
+bool DWARFVerifier::verifyName(const DWARFDie &Die) {
+ // FIXME Add some kind of record of which DIE names have already failed and
+ // don't bother checking a DIE that uses an already failed DIE.
+
+ std::string ReconstructedName;
+ raw_string_ostream OS(ReconstructedName);
+ std::string OriginalFullName;
+ Die.getFullName(OS, &OriginalFullName);
+ OS.flush();
+ if (OriginalFullName.empty() || OriginalFullName == ReconstructedName)
+ return 0;
+
+ error() << "Simplified template DW_AT_name could not be reconstituted:\n"
+ << formatv(" original: {0}\n"
+ " reconstituted: {1}\n",
+ OriginalFullName, ReconstructedName);
+ dump(Die) << '\n';
+ dump(Die.getDwarfUnit()->getUnitDIE()) << '\n';
+ return 1;
+}
+
+unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit,
+ ReferenceMap &UnitLocalReferences,
+ ReferenceMap &CrossUnitReferences) {
unsigned NumUnitErrors = 0;
unsigned NumDies = Unit.getNumDIEs();
for (unsigned I = 0; I < NumDies; ++I) {
@@ -169,9 +196,12 @@ unsigned DWARFVerifier::verifyUnitContents(DWARFUnit &Unit) {
for (auto AttrValue : Die.attributes()) {
NumUnitErrors += verifyDebugInfoAttribute(Die, AttrValue);
- NumUnitErrors += verifyDebugInfoForm(Die, AttrValue);
+ NumUnitErrors += verifyDebugInfoForm(Die, AttrValue, UnitLocalReferences,
+ CrossUnitReferences);
}
+ NumUnitErrors += verifyName(Die);
+
if (Die.hasChildren()) {
if (Die.getFirstChild().isValid() &&
Die.getFirstChild().getTag() == DW_TAG_null) {
@@ -299,6 +329,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
bool hasDIE = DebugInfoData.isValidOffset(Offset);
DWARFUnitVector TypeUnitVector;
DWARFUnitVector CompileUnitVector;
+ /// A map that tracks all references (converted absolute references) so we
+ /// can verify each reference points to a valid DIE and not an offset that
+ /// lies between to valid DIEs.
+ ReferenceMap CrossUnitReferences;
while (hasDIE) {
OffsetStart = Offset;
if (!verifyUnitHeader(DebugInfoData, &Offset, UnitIdx, UnitType,
@@ -309,6 +343,7 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
} else {
DWARFUnitHeader Header;
Header.extract(DCtx, DebugInfoData, &OffsetStart, SectionKind);
+ ReferenceMap UnitLocalReferences;
DWARFUnit *Unit;
switch (UnitType) {
case dwarf::DW_UT_type:
@@ -337,7 +372,10 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
}
default: { llvm_unreachable("Invalid UnitType."); }
}
- NumDebugInfoErrors += verifyUnitContents(*Unit);
+ NumDebugInfoErrors +=
+ verifyUnitContents(*Unit, UnitLocalReferences, CrossUnitReferences);
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ UnitLocalReferences, [&](uint64_t Offset) { return Unit; });
}
hasDIE = DebugInfoData.isValidOffset(Offset);
++UnitIdx;
@@ -348,7 +386,14 @@ unsigned DWARFVerifier::verifyUnitSection(const DWARFSection &S,
}
if (!isHeaderChainValid)
++NumDebugInfoErrors;
- NumDebugInfoErrors += verifyDebugInfoReferences();
+ NumDebugInfoErrors += verifyDebugInfoReferences(
+ CrossUnitReferences, [&](uint64_t Offset) -> DWARFUnit * {
+ if (DWARFUnit *U = TypeUnitVector.getUnitForOffset(Offset))
+ return U;
+ if (DWARFUnit *U = CompileUnitVector.getUnitForOffset(Offset))
+ return U;
+ return nullptr;
+ });
return NumDebugInfoErrors;
}
@@ -383,7 +428,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
return NumErrors;
}
- DWARFAddressRangesVector Ranges = RangesOrError.get();
+ const DWARFAddressRangesVector &Ranges = RangesOrError.get();
// Build RI for this DIE and check that ranges within this DIE do not
// overlap.
DieRangeInfo RI(Die);
@@ -409,7 +454,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
if (!IsObjectFile || IsMachOObject || Die.getTag() != DW_TAG_compile_unit) {
bool DumpDieAfterError = false;
- for (auto Range : Ranges) {
+ for (const auto &Range : Ranges) {
if (!Range.valid()) {
++NumErrors;
error() << "Invalid address range " << Range << "\n";
@@ -444,7 +489,7 @@ unsigned DWARFVerifier::verifyDieRanges(const DWARFDie &Die,
}
// Verify that ranges are contained within their parent.
- bool ShouldBeContained = !Ranges.empty() && !ParentRI.Ranges.empty() &&
+ bool ShouldBeContained = !RI.Ranges.empty() && !ParentRI.Ranges.empty() &&
!(Die.getTag() == DW_TAG_subprogram &&
ParentRI.Die.getTag() == DW_TAG_subprogram);
if (ShouldBeContained && !ParentRI.contains(RI)) {
@@ -507,9 +552,10 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
DataExtractor Data(toStringRef(Entry.Expr), DCtx.isLittleEndian(), 0);
DWARFExpression Expression(Data, U->getAddressByteSize(),
U->getFormParams().Format);
- bool Error = any_of(Expression, [](DWARFExpression::Operation &Op) {
- return Op.isError();
- });
+ bool Error =
+ any_of(Expression, [](const DWARFExpression::Operation &Op) {
+ return Op.isError();
+ });
if (Error || !Expression.verify(U))
ReportError("DIE contains invalid DWARF expression:");
}
@@ -587,7 +633,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
}
unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
- DWARFAttribute &AttrValue) {
+ DWARFAttribute &AttrValue,
+ ReferenceMap &LocalReferences,
+ ReferenceMap &CrossUnitReferences) {
const DWARFObject &DObj = DCtx.getDWARFObj();
auto DieCU = Die.getDwarfUnit();
unsigned NumErrors = 0;
@@ -615,7 +663,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
} else {
// Valid reference, but we will verify it points to an actual
// DIE later.
- ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+ LocalReferences[*RefVal].insert(Die.getOffset());
}
}
break;
@@ -634,7 +682,7 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
} else {
// Valid reference, but we will verify it points to an actual
// DIE later.
- ReferenceToDIEOffsets[*RefVal].insert(Die.getOffset());
+ CrossUnitReferences[*RefVal].insert(Die.getOffset());
}
}
break;
@@ -694,20 +742,24 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
return NumErrors;
}
-unsigned DWARFVerifier::verifyDebugInfoReferences() {
- // Take all references and make sure they point to an actual DIE by
- // getting the DIE by offset and emitting an error
- OS << "Verifying .debug_info references...\n";
+unsigned DWARFVerifier::verifyDebugInfoReferences(
+ const ReferenceMap &References,
+ llvm::function_ref<DWARFUnit *(uint64_t)> GetUnitForOffset) {
+ auto GetDIEForOffset = [&](uint64_t Offset) {
+ if (DWARFUnit *U = GetUnitForOffset(Offset))
+ return U->getDIEForOffset(Offset);
+ return DWARFDie();
+ };
unsigned NumErrors = 0;
for (const std::pair<const uint64_t, std::set<uint64_t>> &Pair :
- ReferenceToDIEOffsets) {
- if (DCtx.getDIEForOffset(Pair.first))
+ References) {
+ if (GetDIEForOffset(Pair.first))
continue;
++NumErrors;
error() << "invalid DIE reference " << format("0x%08" PRIx64, Pair.first)
<< ". Offset is in between DIEs:\n";
for (auto Offset : Pair.second)
- dump(DCtx.getDIEForOffset(Offset)) << '\n';
+ dump(GetDIEForOffset(Offset)) << '\n';
OS << "\n";
}
return NumErrors;
@@ -1349,11 +1401,12 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
U->getAddressByteSize());
DWARFExpression Expression(Data, U->getAddressByteSize(),
U->getFormParams().Format);
- bool IsInteresting = any_of(Expression, [](DWARFExpression::Operation &Op) {
- return !Op.isError() && (Op.getCode() == DW_OP_addr ||
- Op.getCode() == DW_OP_form_tls_address ||
- Op.getCode() == DW_OP_GNU_push_tls_address);
- });
+ bool IsInteresting =
+ any_of(Expression, [](const DWARFExpression::Operation &Op) {
+ return !Op.isError() && (Op.getCode() == DW_OP_addr ||
+ Op.getCode() == DW_OP_form_tls_address ||
+ Op.getCode() == DW_OP_GNU_push_tls_address);
+ });
if (IsInteresting)
return true;
}
@@ -1488,7 +1541,7 @@ unsigned DWARFVerifier::verifyDebugNames(const DWARFSection &AccelSection,
if (NumErrors > 0)
return NumErrors;
for (const auto &NI : AccelTable)
- for (DWARFDebugNames::NameTableEntry NTE : NI)
+ for (const DWARFDebugNames::NameTableEntry &NTE : NI)
NumErrors += verifyNameIndexEntries(NI, NTE);
if (NumErrors > 0)
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index cdea0e39486d..b2c43b893cd3 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -260,17 +260,15 @@ static void convertFunctionLineTable(raw_ostream &Log, CUInfo &CUI,
if (!CUI.LineTable->lookupAddressRange(SecAddress, RangeSize, RowVector)) {
// If we have a DW_TAG_subprogram but no line entries, fall back to using
// the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
- if (auto FileIdx =
- dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_file}))) {
- if (auto Line =
- dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
- LineEntry LE(StartAddress, CUI.DWARFToGSYMFileIndex(Gsym, *FileIdx),
- *Line);
- FI.OptLineTable = LineTable();
- FI.OptLineTable->push(LE);
- // LE.Addr = EndAddress;
- // FI.OptLineTable->push(LE);
- }
+ std::string FilePath = Die.getDeclFile(
+ DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
+ if (FilePath.empty())
+ return;
+ if (auto Line =
+ dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
+ LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
+ FI.OptLineTable = LineTable();
+ FI.OptLineTable->push(LE);
}
return;
}
@@ -394,11 +392,11 @@ void DwarfTransformer::handleDie(raw_ostream &OS, CUInfo &CUI, DWARFDie Die) {
if (Range.LowPC != 0) {
if (!Gsym.isQuiet()) {
// Unexpected invalid address, emit a warning
- Log << "warning: DIE has an address range whose start address is "
- "not in any executable sections ("
- << *Gsym.GetValidTextRanges()
- << ") and will not be processed:\n";
- Die.dump(Log, 0, DIDumpOptions::getForSingleDIE());
+ OS << "warning: DIE has an address range whose start address is "
+ "not in any executable sections ("
+ << *Gsym.GetValidTextRanges()
+ << ") and will not be processed:\n";
+ Die.dump(OS, 0, DIDumpOptions::getForSingleDIE());
}
}
break;
diff --git a/llvm/lib/DebugInfo/GSYM/FileWriter.cpp b/llvm/lib/DebugInfo/GSYM/FileWriter.cpp
index 4b30dcb60a7b..b725f3ac74f5 100644
--- a/llvm/lib/DebugInfo/GSYM/FileWriter.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FileWriter.cpp
@@ -1,9 +1,8 @@
//===- FileWriter.cpp -------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp
index 044ddb8ba1ba..c1e8eccd0daa 100644
--- a/llvm/lib/DebugInfo/GSYM/Range.cpp
+++ b/llvm/lib/DebugInfo/GSYM/Range.cpp
@@ -1,9 +1,8 @@
//===- Range.cpp ------------------------------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp b/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
index 5dc9c86b34fd..00fc70ca5a54 100644
--- a/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -35,7 +35,7 @@ public:
} // end anonymous namespace
-using Interval = std::pair<uint32_t, uint32_t>;
+using Interval = std::pair<uint64_t, uint64_t>;
static Interval intersect(const Interval &I1, const Interval &I2) {
return std::make_pair(std::max(I1.first, I2.first),
@@ -85,7 +85,7 @@ MappedBlockStream::createFpmStream(const MSFLayout &Layout,
return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
-Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
+Error MappedBlockStream::readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) {
// Make sure we aren't trying to read beyond the end of the stream.
if (auto EC = checkOffsetForRead(Offset, Size))
@@ -138,7 +138,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
if (Intersection != RequestExtent)
continue;
- uint32_t CacheRangeOffset =
+ uint64_t CacheRangeOffset =
AbsoluteDifference(CachedExtent.first, Intersection.first);
Buffer = CachedAlloc.slice(CacheRangeOffset, Size);
return Error::success();
@@ -163,14 +163,14 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
return Error::success();
}
-Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
+Error MappedBlockStream::readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) {
// Make sure we aren't trying to read beyond the end of the stream.
if (auto EC = checkOffsetForRead(Offset, 1))
return EC;
- uint32_t First = Offset / BlockSize;
- uint32_t Last = First;
+ uint64_t First = Offset / BlockSize;
+ uint64_t Last = First;
while (Last < getNumBlocks() - 1) {
if (StreamLayout.Blocks[Last] != StreamLayout.Blocks[Last + 1] - 1)
@@ -178,13 +178,13 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
++Last;
}
- uint32_t OffsetInFirstBlock = Offset % BlockSize;
- uint32_t BytesFromFirstBlock = BlockSize - OffsetInFirstBlock;
- uint32_t BlockSpan = Last - First + 1;
- uint32_t ByteSpan = BytesFromFirstBlock + (BlockSpan - 1) * BlockSize;
+ uint64_t OffsetInFirstBlock = Offset % BlockSize;
+ uint64_t BytesFromFirstBlock = BlockSize - OffsetInFirstBlock;
+ uint64_t BlockSpan = Last - First + 1;
+ uint64_t ByteSpan = BytesFromFirstBlock + (BlockSpan - 1) * BlockSize;
ArrayRef<uint8_t> BlockData;
- uint32_t MsfOffset = blockToOffset(StreamLayout.Blocks[First], BlockSize);
+ uint64_t MsfOffset = blockToOffset(StreamLayout.Blocks[First], BlockSize);
if (auto EC = MsfData.readBytes(MsfOffset, BlockSize, BlockData))
return EC;
@@ -193,9 +193,9 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset,
return Error::success();
}
-uint32_t MappedBlockStream::getLength() { return StreamLayout.Length; }
+uint64_t MappedBlockStream::getLength() { return StreamLayout.Length; }
-bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
+bool MappedBlockStream::tryReadContiguously(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) {
if (Size == 0) {
Buffer = ArrayRef<uint8_t>();
@@ -206,15 +206,15 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
// all subsequent blocks are contiguous. For example, a 10k read with a 4k
// block size can be filled with a reference if, from the starting offset,
// 3 blocks in a row are contiguous.
- uint32_t BlockNum = Offset / BlockSize;
- uint32_t OffsetInBlock = Offset % BlockSize;
- uint32_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock);
- uint32_t NumAdditionalBlocks =
+ uint64_t BlockNum = Offset / BlockSize;
+ uint64_t OffsetInBlock = Offset % BlockSize;
+ uint64_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock);
+ uint64_t NumAdditionalBlocks =
alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize;
- uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
- uint32_t E = StreamLayout.Blocks[BlockNum];
- for (uint32_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) {
+ uint64_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
+ uint64_t E = StreamLayout.Blocks[BlockNum];
+ for (uint64_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) {
if (StreamLayout.Blocks[I + BlockNum] != E)
return false;
}
@@ -225,8 +225,8 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
// cross-block span, explicitly resize the ArrayRef to cover the entire
// request length.
ArrayRef<uint8_t> BlockData;
- uint32_t FirstBlockAddr = StreamLayout.Blocks[BlockNum];
- uint32_t MsfOffset = blockToOffset(FirstBlockAddr, BlockSize);
+ uint64_t FirstBlockAddr = StreamLayout.Blocks[BlockNum];
+ uint64_t MsfOffset = blockToOffset(FirstBlockAddr, BlockSize);
if (auto EC = MsfData.readBytes(MsfOffset, BlockSize, BlockData)) {
consumeError(std::move(EC));
return false;
@@ -236,28 +236,28 @@ bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
return true;
}
-Error MappedBlockStream::readBytes(uint32_t Offset,
+Error MappedBlockStream::readBytes(uint64_t Offset,
MutableArrayRef<uint8_t> Buffer) {
- uint32_t BlockNum = Offset / BlockSize;
- uint32_t OffsetInBlock = Offset % BlockSize;
+ uint64_t BlockNum = Offset / BlockSize;
+ uint64_t OffsetInBlock = Offset % BlockSize;
// Make sure we aren't trying to read beyond the end of the stream.
if (auto EC = checkOffsetForRead(Offset, Buffer.size()))
return EC;
- uint32_t BytesLeft = Buffer.size();
- uint32_t BytesWritten = 0;
+ uint64_t BytesLeft = Buffer.size();
+ uint64_t BytesWritten = 0;
uint8_t *WriteBuffer = Buffer.data();
while (BytesLeft > 0) {
- uint32_t StreamBlockAddr = StreamLayout.Blocks[BlockNum];
+ uint64_t StreamBlockAddr = StreamLayout.Blocks[BlockNum];
ArrayRef<uint8_t> BlockData;
- uint32_t Offset = blockToOffset(StreamBlockAddr, BlockSize);
+ uint64_t Offset = blockToOffset(StreamBlockAddr, BlockSize);
if (auto EC = MsfData.readBytes(Offset, BlockSize, BlockData))
return EC;
const uint8_t *ChunkStart = BlockData.data() + OffsetInBlock;
- uint32_t BytesInChunk = std::min(BytesLeft, BlockSize - OffsetInBlock);
+ uint64_t BytesInChunk = std::min(BytesLeft, BlockSize - OffsetInBlock);
::memcpy(WriteBuffer + BytesWritten, ChunkStart, BytesInChunk);
BytesWritten += BytesInChunk;
@@ -271,7 +271,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset,
void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); }
-void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
+void MappedBlockStream::fixCacheAfterWrite(uint64_t Offset,
ArrayRef<uint8_t> Data) const {
// If this write overlapped a read which previously came from the pool,
// someone may still be holding a pointer to that alloc which is now invalid.
@@ -297,10 +297,10 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
auto Intersection = intersect(WriteInterval, CachedInterval);
assert(Intersection.first <= Intersection.second);
- uint32_t Length = Intersection.second - Intersection.first;
- uint32_t SrcOffset =
+ uint64_t Length = Intersection.second - Intersection.first;
+ uint64_t SrcOffset =
AbsoluteDifference(WriteInterval.first, Intersection.first);
- uint32_t DestOffset =
+ uint64_t DestOffset =
AbsoluteDifference(CachedInterval.first, Intersection.first);
::memcpy(Alloc.data() + DestOffset, Data.data() + SrcOffset, Length);
}
@@ -370,39 +370,39 @@ WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout,
return createStream(Layout.SB->BlockSize, MinLayout, MsfData, Allocator);
}
-Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
+Error WritableMappedBlockStream::readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) {
return ReadInterface.readBytes(Offset, Size, Buffer);
}
Error WritableMappedBlockStream::readLongestContiguousChunk(
- uint32_t Offset, ArrayRef<uint8_t> &Buffer) {
+ uint64_t Offset, ArrayRef<uint8_t> &Buffer) {
return ReadInterface.readLongestContiguousChunk(Offset, Buffer);
}
-uint32_t WritableMappedBlockStream::getLength() {
+uint64_t WritableMappedBlockStream::getLength() {
return ReadInterface.getLength();
}
-Error WritableMappedBlockStream::writeBytes(uint32_t Offset,
+Error WritableMappedBlockStream::writeBytes(uint64_t Offset,
ArrayRef<uint8_t> Buffer) {
// Make sure we aren't trying to write beyond the end of the stream.
if (auto EC = checkOffsetForWrite(Offset, Buffer.size()))
return EC;
- uint32_t BlockNum = Offset / getBlockSize();
- uint32_t OffsetInBlock = Offset % getBlockSize();
+ uint64_t BlockNum = Offset / getBlockSize();
+ uint64_t OffsetInBlock = Offset % getBlockSize();
- uint32_t BytesLeft = Buffer.size();
- uint32_t BytesWritten = 0;
+ uint64_t BytesLeft = Buffer.size();
+ uint64_t BytesWritten = 0;
while (BytesLeft > 0) {
- uint32_t StreamBlockAddr = getStreamLayout().Blocks[BlockNum];
- uint32_t BytesToWriteInChunk =
+ uint64_t StreamBlockAddr = getStreamLayout().Blocks[BlockNum];
+ uint64_t BytesToWriteInChunk =
std::min(BytesLeft, getBlockSize() - OffsetInBlock);
const uint8_t *Chunk = Buffer.data() + BytesWritten;
ArrayRef<uint8_t> ChunkData(Chunk, BytesToWriteInChunk);
- uint32_t MsfOffset = blockToOffset(StreamBlockAddr, getBlockSize());
+ uint64_t MsfOffset = blockToOffset(StreamBlockAddr, getBlockSize());
MsfOffset += OffsetInBlock;
if (auto EC = WriteInterface.writeBytes(MsfOffset, ChunkData))
return EC;
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 98a8acaffd60..0584966a98c5 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -334,8 +334,6 @@ static uint16_t toSecMapFlags(uint32_t Flags) {
Ret |= static_cast<uint16_t>(OMFSegDescFlags::Write);
if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute);
- if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
- Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute);
if (!(Flags & COFF::IMAGE_SCN_MEM_16BIT))
Ret |= static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit);
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
index 7a258acbd7c0..5e6412275063 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -17,8 +17,8 @@ namespace pdb {
namespace {
-Expected<std::string> readStreamData(BinaryStream &Stream, uint32_t Limit) {
- uint32_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
+Expected<std::string> readStreamData(BinaryStream &Stream, uint64_t Limit) {
+ uint64_t Offset = 0, DataLength = std::min(Limit, Stream.getLength());
std::string Result;
Result.reserve(DataLength);
while (Offset < DataLength) {
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index a508f163a2d8..f33125474e3a 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -103,7 +103,7 @@ void PDBFileBuilder::addInjectedSource(StringRef Name,
// table and the hash value is dependent on the exact contents of the string.
// link.exe lowercases a path and converts / to \, so we must do the same.
SmallString<64> VName;
- sys::path::native(Name.lower(), VName);
+ sys::path::native(Name.lower(), VName, sys::path::Style::windows_backslash);
uint32_t NI = getStringTableBuilder().insert(Name);
uint32_t VNI = getStringTableBuilder().insert(VName);
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 72ca72230507..f3f09584fdc9 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -280,10 +280,7 @@ bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
return false;
for (const SectionRef &Section : Obj->sections()) {
StringRef Name;
- if (Expected<StringRef> NameOrErr = Section.getName())
- Name = *NameOrErr;
- else
- consumeError(NameOrErr.takeError());
+ consumeError(Section.getName().moveInto(Name));
Name = Name.substr(Name.find_first_not_of("._"));
if (Name == "gnu_debuglink") {
@@ -600,7 +597,9 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
}
}
if (!Context)
- Context = DWARFContext::create(*Objects.second, nullptr, Opts.DWPName);
+ Context = DWARFContext::create(
+ *Objects.second, DWARFContext::ProcessDebugRelocations::Process,
+ nullptr, Opts.DWPName);
return createModuleInfo(Objects.first, std::move(Context), ModuleName);
}
@@ -650,18 +649,9 @@ StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
std::string
LLVMSymbolizer::DemangleName(const std::string &Name,
const SymbolizableModule *DbiModuleDescriptor) {
- // We can spoil names of symbols with C linkage, so use an heuristic
- // approach to check if the name should be demangled.
- if (Name.substr(0, 2) == "_Z") {
- int status = 0;
- char *DemangledName =
- itaniumDemangle(Name.c_str(), nullptr, nullptr, &status);
- if (status != 0)
- return Name;
- std::string Result = DemangledName;
- free(DemangledName);
+ std::string Result;
+ if (nonMicrosoftDemangle(Name.c_str(), Result))
return Result;
- }
if (!Name.empty() && Name.front() == '?') {
// Only do MSVC C++ demangling on symbols starting with '?'.
@@ -672,7 +662,7 @@ LLVMSymbolizer::DemangleName(const std::string &Name,
MSDF_NoMemberType | MSDF_NoReturnType));
if (status != 0)
return Name;
- std::string Result = DemangledName;
+ Result = DemangledName;
free(DemangledName);
return Result;
}
diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
new file mode 100644
index 000000000000..d2f1bf4323ee
--- /dev/null
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -0,0 +1,45 @@
+//===--- DLangDemangle.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines a demangler for the D programming language as specified
+/// in the ABI specification, available at:
+/// https://dlang.org/spec/abi.html#name_mangling
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/Utility.h"
+
+#include <cstring>
+
+using namespace llvm;
+using llvm::itanium_demangle::OutputBuffer;
+
+char *llvm::dlangDemangle(const char *MangledName) {
+ if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
+ return nullptr;
+
+ OutputBuffer Demangled;
+ if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024))
+ return nullptr;
+
+ if (strcmp(MangledName, "_Dmain") == 0)
+ Demangled << "D main";
+
+ // OutputBuffer's internal buffer is not null terminated and therefore we need
+ // to add it to comply with C null terminated strings.
+ if (Demangled.getCurrentPosition() > 0) {
+ Demangled << '\0';
+ Demangled.setCurrentPosition(Demangled.getCurrentPosition() - 1);
+ return Demangled.getBuffer();
+ }
+
+ free(Demangled.getBuffer());
+ return nullptr;
+}
diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp
index 1851fb77b09e..13aa2864c183 100644
--- a/llvm/lib/Demangle/Demangle.cpp
+++ b/llvm/lib/Demangle/Demangle.cpp
@@ -12,32 +12,53 @@
#include "llvm/Demangle/Demangle.h"
#include <cstdlib>
+#include <cstring>
-static bool isItaniumEncoding(const std::string &MangledName) {
- size_t Pos = MangledName.find_first_not_of('_');
- // A valid Itanium encoding requires 1-4 leading underscores, followed by 'Z'.
- return Pos > 0 && Pos <= 4 && MangledName[Pos] == 'Z';
+static bool isItaniumEncoding(const char *S) {
+ // Itanium encoding requires 1 or 3 leading underscores, followed by 'Z'.
+ return std::strncmp(S, "_Z", 2) == 0 || std::strncmp(S, "___Z", 4) == 0;
}
-static bool isRustEncoding(const std::string &MangledName) {
+static bool isRustEncoding(const char *S) { return S[0] == '_' && S[1] == 'R'; }
+
+static bool isDLangEncoding(const std::string &MangledName) {
return MangledName.size() >= 2 && MangledName[0] == '_' &&
- MangledName[1] == 'R';
+ MangledName[1] == 'D';
}
std::string llvm::demangle(const std::string &MangledName) {
- char *Demangled;
+ std::string Result;
+ const char *S = MangledName.c_str();
+
+ if (nonMicrosoftDemangle(S, Result))
+ return Result;
+
+ if (S[0] == '_' && nonMicrosoftDemangle(S + 1, Result))
+ return Result;
+
+ if (char *Demangled =
+ microsoftDemangle(S, nullptr, nullptr, nullptr, nullptr)) {
+ Result = Demangled;
+ std::free(Demangled);
+ return Result;
+ }
+
+ return MangledName;
+}
+
+bool llvm::nonMicrosoftDemangle(const char *MangledName, std::string &Result) {
+ char *Demangled = nullptr;
if (isItaniumEncoding(MangledName))
- Demangled = itaniumDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
+ Demangled = itaniumDemangle(MangledName, nullptr, nullptr, nullptr);
else if (isRustEncoding(MangledName))
- Demangled = rustDemangle(MangledName.c_str(), nullptr, nullptr, nullptr);
- else
- Demangled = microsoftDemangle(MangledName.c_str(), nullptr, nullptr,
- nullptr, nullptr);
+ Demangled = rustDemangle(MangledName, nullptr, nullptr, nullptr);
+ else if (isDLangEncoding(MangledName))
+ Demangled = dlangDemangle(MangledName);
if (!Demangled)
- return MangledName;
+ return false;
- std::string Ret = Demangled;
+ Result = Demangled;
std::free(Demangled);
- return Ret;
+ return true;
}
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index fad9b6b7b63b..3f68f76761ce 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -333,21 +333,21 @@ char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
int InternalStatus = demangle_success;
Demangler Parser(MangledName, MangledName + std::strlen(MangledName));
- OutputStream S;
+ OutputBuffer OB;
Node *AST = Parser.parse();
if (AST == nullptr)
InternalStatus = demangle_invalid_mangled_name;
- else if (!initializeOutputStream(Buf, N, S, 1024))
+ else if (!initializeOutputBuffer(Buf, N, OB, 1024))
InternalStatus = demangle_memory_alloc_failure;
else {
assert(Parser.ForwardTemplateRefs.empty());
- AST->print(S);
- S += '\0';
+ AST->print(OB);
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- Buf = S.getBuffer();
+ *N = OB.getCurrentPosition();
+ Buf = OB.getBuffer();
}
if (Status)
@@ -385,14 +385,14 @@ bool ItaniumPartialDemangler::partialDemangle(const char *MangledName) {
}
static char *printNode(const Node *RootNode, char *Buf, size_t *N) {
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
- RootNode->print(S);
- S += '\0';
+ RootNode->print(OB);
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::getFunctionBaseName(char *Buf, size_t *N) const {
@@ -430,8 +430,8 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
return nullptr;
const Node *Name = static_cast<const FunctionEncoding *>(RootNode)->getName();
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
KeepGoingLocalFunction:
@@ -449,25 +449,25 @@ char *ItaniumPartialDemangler::getFunctionDeclContextName(char *Buf,
switch (Name->getKind()) {
case Node::KStdQualifiedName:
- S += "std";
+ OB += "std";
break;
case Node::KNestedName:
- static_cast<const NestedName *>(Name)->Qual->print(S);
+ static_cast<const NestedName *>(Name)->Qual->print(OB);
break;
case Node::KLocalName: {
auto *LN = static_cast<const LocalName *>(Name);
- LN->Encoding->print(S);
- S += "::";
+ LN->Encoding->print(OB);
+ OB += "::";
Name = LN->Entity;
goto KeepGoingLocalFunction;
}
default:
break;
}
- S += '\0';
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::getFunctionName(char *Buf, size_t *N) const {
@@ -483,17 +483,17 @@ char *ItaniumPartialDemangler::getFunctionParameters(char *Buf,
return nullptr;
NodeArray Params = static_cast<FunctionEncoding *>(RootNode)->getParams();
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
- S += '(';
- Params.printWithComma(S);
- S += ')';
- S += '\0';
+ OB += '(';
+ Params.printWithComma(OB);
+ OB += ')';
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::getFunctionReturnType(
@@ -501,18 +501,18 @@ char *ItaniumPartialDemangler::getFunctionReturnType(
if (!isFunction())
return nullptr;
- OutputStream S;
- if (!initializeOutputStream(Buf, N, S, 128))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(Buf, N, OB, 128))
return nullptr;
if (const Node *Ret =
static_cast<const FunctionEncoding *>(RootNode)->getReturnType())
- Ret->print(S);
+ Ret->print(OB);
- S += '\0';
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- return S.getBuffer();
+ *N = OB.getCurrentPosition();
+ return OB.getBuffer();
}
char *ItaniumPartialDemangler::finishDemangle(char *Buf, size_t *N) const {
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index 303207176be7..d8da3b48e25b 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -965,13 +965,13 @@ NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
// Render this class template name into a string buffer so that we can
// memorize it for the purpose of back-referencing.
- OutputStream OS;
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
// FIXME: Propagate out-of-memory as an error?
std::terminate();
- Identifier->output(OS, OF_Default);
- OS << '\0';
- char *Name = OS.getBuffer();
+ Identifier->output(OB, OF_Default);
+ OB << '\0';
+ char *Name = OB.getBuffer();
StringView Owned = copyString(Name);
memorizeString(Owned);
@@ -1107,7 +1107,7 @@ static void writeHexDigit(char *Buffer, uint8_t Digit) {
*Buffer = (Digit < 10) ? ('0' + Digit) : ('A' + Digit - 10);
}
-static void outputHex(OutputStream &OS, unsigned C) {
+static void outputHex(OutputBuffer &OB, unsigned C) {
assert (C != 0);
// It's easier to do the math if we can work from right to left, but we need
@@ -1130,43 +1130,43 @@ static void outputHex(OutputStream &OS, unsigned C) {
TempBuffer[Pos--] = 'x';
assert(Pos >= 0);
TempBuffer[Pos--] = '\\';
- OS << StringView(&TempBuffer[Pos + 1]);
+ OB << StringView(&TempBuffer[Pos + 1]);
}
-static void outputEscapedChar(OutputStream &OS, unsigned C) {
+static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
switch (C) {
case '\0': // nul
- OS << "\\0";
+ OB << "\\0";
return;
case '\'': // single quote
- OS << "\\\'";
+ OB << "\\\'";
return;
case '\"': // double quote
- OS << "\\\"";
+ OB << "\\\"";
return;
case '\\': // backslash
- OS << "\\\\";
+ OB << "\\\\";
return;
case '\a': // bell
- OS << "\\a";
+ OB << "\\a";
return;
case '\b': // backspace
- OS << "\\b";
+ OB << "\\b";
return;
case '\f': // form feed
- OS << "\\f";
+ OB << "\\f";
return;
case '\n': // new line
- OS << "\\n";
+ OB << "\\n";
return;
case '\r': // carriage return
- OS << "\\r";
+ OB << "\\r";
return;
case '\t': // tab
- OS << "\\t";
+ OB << "\\t";
return;
case '\v': // vertical tab
- OS << "\\v";
+ OB << "\\v";
return;
default:
break;
@@ -1174,11 +1174,11 @@ static void outputEscapedChar(OutputStream &OS, unsigned C) {
if (C > 0x1F && C < 0x7F) {
// Standard ascii char.
- OS << (char)C;
+ OB << (char)C;
return;
}
- outputHex(OS, C);
+ outputHex(OB, C);
}
static unsigned countTrailingNullBytes(const uint8_t *StringBytes, int Length) {
@@ -1273,7 +1273,7 @@ FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
EncodedStringLiteralNode *
Demangler::demangleStringLiteral(StringView &MangledName) {
// This function uses goto, so declare all variables up front.
- OutputStream OS;
+ OutputBuffer OB;
StringView CRC;
uint64_t StringByteSize;
bool IsWcharT = false;
@@ -1284,7 +1284,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
// Must happen before the first `goto StringLiteralError`.
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
// FIXME: Propagate out-of-memory as an error?
std::terminate();
@@ -1329,7 +1329,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
goto StringLiteralError;
wchar_t W = demangleWcharLiteral(MangledName);
if (StringByteSize != 2 || Result->IsTruncated)
- outputEscapedChar(OS, W);
+ outputEscapedChar(OB, W);
StringByteSize -= 2;
if (Error)
goto StringLiteralError;
@@ -1371,19 +1371,19 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
unsigned NextChar =
decodeMultiByteChar(StringBytes, CharIndex, CharBytes);
if (CharIndex + 1 < NumChars || Result->IsTruncated)
- outputEscapedChar(OS, NextChar);
+ outputEscapedChar(OB, NextChar);
}
}
- OS << '\0';
- ResultBuffer = OS.getBuffer();
+ OB << '\0';
+ ResultBuffer = OB.getBuffer();
Result->DecodedString = copyString(ResultBuffer);
std::free(ResultBuffer);
return Result;
StringLiteralError:
Error = true;
- std::free(OS.getBuffer());
+ std::free(OB.getBuffer());
return nullptr;
}
@@ -1447,16 +1447,16 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
return nullptr;
// Render the parent symbol's name into a buffer.
- OutputStream OS;
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
// FIXME: Propagate out-of-memory as an error?
std::terminate();
- OS << '`';
- Scope->output(OS, OF_Default);
- OS << '\'';
- OS << "::`" << Number << "'";
- OS << '\0';
- char *Result = OS.getBuffer();
+ OB << '`';
+ Scope->output(OB, OF_Default);
+ OB << '\'';
+ OB << "::`" << Number << "'";
+ OB << '\0';
+ char *Result = OB.getBuffer();
Identifier->Name = copyString(Result);
std::free(Result);
return Identifier;
@@ -2313,19 +2313,19 @@ void Demangler::dumpBackReferences() {
(int)Backrefs.FunctionParamCount);
// Create an output stream so we can render each type.
- OutputStream OS;
- if (!initializeOutputStream(nullptr, nullptr, OS, 1024))
+ OutputBuffer OB;
+ if (!initializeOutputBuffer(nullptr, nullptr, OB, 1024))
std::terminate();
for (size_t I = 0; I < Backrefs.FunctionParamCount; ++I) {
- OS.setCurrentPosition(0);
+ OB.setCurrentPosition(0);
TypeNode *T = Backrefs.FunctionParams[I];
- T->output(OS, OF_Default);
+ T->output(OB, OF_Default);
- std::printf(" [%d] - %.*s\n", (int)I, (int)OS.getCurrentPosition(),
- OS.getBuffer());
+ std::printf(" [%d] - %.*s\n", (int)I, (int)OB.getCurrentPosition(),
+ OB.getBuffer());
}
- std::free(OS.getBuffer());
+ std::free(OB.getBuffer());
if (Backrefs.FunctionParamCount > 0)
std::printf("\n");
@@ -2342,7 +2342,7 @@ char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
char *Buf, size_t *N,
int *Status, MSDemangleFlags Flags) {
Demangler D;
- OutputStream S;
+ OutputBuffer OB;
StringView Name{MangledName};
SymbolNode *AST = D.parse(Name);
@@ -2361,18 +2361,20 @@ char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
OF = OutputFlags(OF | OF_NoReturnType);
if (Flags & MSDF_NoMemberType)
OF = OutputFlags(OF | OF_NoMemberType);
+ if (Flags & MSDF_NoVariableType)
+ OF = OutputFlags(OF | OF_NoVariableType);
int InternalStatus = demangle_success;
if (D.Error)
InternalStatus = demangle_invalid_mangled_name;
- else if (!initializeOutputStream(Buf, N, S, 1024))
+ else if (!initializeOutputBuffer(Buf, N, OB, 1024))
InternalStatus = demangle_memory_alloc_failure;
else {
- AST->output(S, OF);
- S += '\0';
+ AST->output(OB, OF);
+ OB += '\0';
if (N != nullptr)
- *N = S.getCurrentPosition();
- Buf = S.getBuffer();
+ *N = OB.getCurrentPosition();
+ Buf = OB.getBuffer();
}
if (Status)
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 9fe157bf0d2a..32d8dff66c3f 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -21,97 +21,97 @@ using namespace ms_demangle;
#define OUTPUT_ENUM_CLASS_VALUE(Enum, Value, Desc) \
case Enum::Value: \
- OS << Desc; \
+ OB << Desc; \
break;
// Writes a space if the last token does not end with a punctuation.
-static void outputSpaceIfNecessary(OutputStream &OS) {
- if (OS.empty())
+static void outputSpaceIfNecessary(OutputBuffer &OB) {
+ if (OB.empty())
return;
- char C = OS.back();
+ char C = OB.back();
if (std::isalnum(C) || C == '>')
- OS << " ";
+ OB << " ";
}
-static void outputSingleQualifier(OutputStream &OS, Qualifiers Q) {
+static void outputSingleQualifier(OutputBuffer &OB, Qualifiers Q) {
switch (Q) {
case Q_Const:
- OS << "const";
+ OB << "const";
break;
case Q_Volatile:
- OS << "volatile";
+ OB << "volatile";
break;
case Q_Restrict:
- OS << "__restrict";
+ OB << "__restrict";
break;
default:
break;
}
}
-static bool outputQualifierIfPresent(OutputStream &OS, Qualifiers Q,
+static bool outputQualifierIfPresent(OutputBuffer &OB, Qualifiers Q,
Qualifiers Mask, bool NeedSpace) {
if (!(Q & Mask))
return NeedSpace;
if (NeedSpace)
- OS << " ";
+ OB << " ";
- outputSingleQualifier(OS, Mask);
+ outputSingleQualifier(OB, Mask);
return true;
}
-static void outputQualifiers(OutputStream &OS, Qualifiers Q, bool SpaceBefore,
+static void outputQualifiers(OutputBuffer &OB, Qualifiers Q, bool SpaceBefore,
bool SpaceAfter) {
if (Q == Q_None)
return;
- size_t Pos1 = OS.getCurrentPosition();
- SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Const, SpaceBefore);
- SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Volatile, SpaceBefore);
- SpaceBefore = outputQualifierIfPresent(OS, Q, Q_Restrict, SpaceBefore);
- size_t Pos2 = OS.getCurrentPosition();
+ size_t Pos1 = OB.getCurrentPosition();
+ SpaceBefore = outputQualifierIfPresent(OB, Q, Q_Const, SpaceBefore);
+ SpaceBefore = outputQualifierIfPresent(OB, Q, Q_Volatile, SpaceBefore);
+ SpaceBefore = outputQualifierIfPresent(OB, Q, Q_Restrict, SpaceBefore);
+ size_t Pos2 = OB.getCurrentPosition();
if (SpaceAfter && Pos2 > Pos1)
- OS << " ";
+ OB << " ";
}
-static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
- outputSpaceIfNecessary(OS);
+static void outputCallingConvention(OutputBuffer &OB, CallingConv CC) {
+ outputSpaceIfNecessary(OB);
switch (CC) {
case CallingConv::Cdecl:
- OS << "__cdecl";
+ OB << "__cdecl";
break;
case CallingConv::Fastcall:
- OS << "__fastcall";
+ OB << "__fastcall";
break;
case CallingConv::Pascal:
- OS << "__pascal";
+ OB << "__pascal";
break;
case CallingConv::Regcall:
- OS << "__regcall";
+ OB << "__regcall";
break;
case CallingConv::Stdcall:
- OS << "__stdcall";
+ OB << "__stdcall";
break;
case CallingConv::Thiscall:
- OS << "__thiscall";
+ OB << "__thiscall";
break;
case CallingConv::Eabi:
- OS << "__eabi";
+ OB << "__eabi";
break;
case CallingConv::Vectorcall:
- OS << "__vectorcall";
+ OB << "__vectorcall";
break;
case CallingConv::Clrcall:
- OS << "__clrcall";
+ OB << "__clrcall";
break;
case CallingConv::Swift:
- OS << "__attribute__((__swiftcall__)) ";
+ OB << "__attribute__((__swiftcall__)) ";
break;
case CallingConv::SwiftAsync:
- OS << "__attribute__((__swiftasynccall__)) ";
+ OB << "__attribute__((__swiftasynccall__)) ";
break;
default:
break;
@@ -119,16 +119,16 @@ static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
}
std::string Node::toString(OutputFlags Flags) const {
- OutputStream OS;
- initializeOutputStream(nullptr, nullptr, OS, 1024);
- this->output(OS, Flags);
- OS << '\0';
- std::string Owned(OS.getBuffer());
- std::free(OS.getBuffer());
+ OutputBuffer OB;
+ initializeOutputBuffer(nullptr, nullptr, OB, 1024);
+ this->output(OB, Flags);
+ OB << '\0';
+ std::string Owned(OB.getBuffer());
+ std::free(OB.getBuffer());
return Owned;
}
-void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+void PrimitiveTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
switch (PrimKind) {
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Void, "void");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Bool, "bool");
@@ -152,107 +152,107 @@ void PrimitiveTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Ldouble, "long double");
OUTPUT_ENUM_CLASS_VALUE(PrimitiveKind, Nullptr, "std::nullptr_t");
}
- outputQualifiers(OS, Quals, true, false);
+ outputQualifiers(OB, Quals, true, false);
}
-void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags) const {
- output(OS, Flags, ", ");
+void NodeArrayNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ output(OB, Flags, ", ");
}
-void NodeArrayNode::output(OutputStream &OS, OutputFlags Flags,
+void NodeArrayNode::output(OutputBuffer &OB, OutputFlags Flags,
StringView Separator) const {
if (Count == 0)
return;
if (Nodes[0])
- Nodes[0]->output(OS, Flags);
+ Nodes[0]->output(OB, Flags);
for (size_t I = 1; I < Count; ++I) {
- OS << Separator;
- Nodes[I]->output(OS, Flags);
+ OB << Separator;
+ Nodes[I]->output(OB, Flags);
}
}
-void EncodedStringLiteralNode::output(OutputStream &OS,
+void EncodedStringLiteralNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
switch (Char) {
case CharKind::Wchar:
- OS << "L\"";
+ OB << "L\"";
break;
case CharKind::Char:
- OS << "\"";
+ OB << "\"";
break;
case CharKind::Char16:
- OS << "u\"";
+ OB << "u\"";
break;
case CharKind::Char32:
- OS << "U\"";
+ OB << "U\"";
break;
}
- OS << DecodedString << "\"";
+ OB << DecodedString << "\"";
if (IsTruncated)
- OS << "...";
+ OB << "...";
}
-void IntegerLiteralNode::output(OutputStream &OS, OutputFlags Flags) const {
+void IntegerLiteralNode::output(OutputBuffer &OB, OutputFlags Flags) const {
if (IsNegative)
- OS << '-';
- OS << Value;
+ OB << '-';
+ OB << Value;
}
-void TemplateParameterReferenceNode::output(OutputStream &OS,
+void TemplateParameterReferenceNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
if (ThunkOffsetCount > 0)
- OS << "{";
+ OB << "{";
else if (Affinity == PointerAffinity::Pointer)
- OS << "&";
+ OB << "&";
if (Symbol) {
- Symbol->output(OS, Flags);
+ Symbol->output(OB, Flags);
if (ThunkOffsetCount > 0)
- OS << ", ";
+ OB << ", ";
}
if (ThunkOffsetCount > 0)
- OS << ThunkOffsets[0];
+ OB << ThunkOffsets[0];
for (int I = 1; I < ThunkOffsetCount; ++I) {
- OS << ", " << ThunkOffsets[I];
+ OB << ", " << ThunkOffsets[I];
}
if (ThunkOffsetCount > 0)
- OS << "}";
+ OB << "}";
}
-void IdentifierNode::outputTemplateParameters(OutputStream &OS,
+void IdentifierNode::outputTemplateParameters(OutputBuffer &OB,
OutputFlags Flags) const {
if (!TemplateParams)
return;
- OS << "<";
- TemplateParams->output(OS, Flags);
- OS << ">";
+ OB << "<";
+ TemplateParams->output(OB, Flags);
+ OB << ">";
}
-void DynamicStructorIdentifierNode::output(OutputStream &OS,
+void DynamicStructorIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
if (IsDestructor)
- OS << "`dynamic atexit destructor for ";
+ OB << "`dynamic atexit destructor for ";
else
- OS << "`dynamic initializer for ";
+ OB << "`dynamic initializer for ";
if (Variable) {
- OS << "`";
- Variable->output(OS, Flags);
- OS << "''";
+ OB << "`";
+ Variable->output(OB, Flags);
+ OB << "''";
} else {
- OS << "'";
- Name->output(OS, Flags);
- OS << "''";
+ OB << "'";
+ Name->output(OB, Flags);
+ OB << "''";
}
}
-void NamedIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
- OS << Name;
- outputTemplateParameters(OS, Flags);
+void NamedIdentifierNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ OB << Name;
+ outputTemplateParameters(OB, Flags);
}
-void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
+void IntrinsicFunctionIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
switch (Operator) {
OUTPUT_ENUM_CLASS_VALUE(IntrinsicFunctionKind, New, "operator new");
@@ -350,188 +350,188 @@ void IntrinsicFunctionIdentifierNode::output(OutputStream &OS,
case IntrinsicFunctionKind::None:
break;
}
- outputTemplateParameters(OS, Flags);
+ outputTemplateParameters(OB, Flags);
}
-void LocalStaticGuardIdentifierNode::output(OutputStream &OS,
+void LocalStaticGuardIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
if (IsThread)
- OS << "`local static thread guard'";
+ OB << "`local static thread guard'";
else
- OS << "`local static guard'";
+ OB << "`local static guard'";
if (ScopeIndex > 0)
- OS << "{" << ScopeIndex << "}";
+ OB << "{" << ScopeIndex << "}";
}
-void ConversionOperatorIdentifierNode::output(OutputStream &OS,
+void ConversionOperatorIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "operator";
- outputTemplateParameters(OS, Flags);
- OS << " ";
- TargetType->output(OS, Flags);
+ OB << "operator";
+ outputTemplateParameters(OB, Flags);
+ OB << " ";
+ TargetType->output(OB, Flags);
}
-void StructorIdentifierNode::output(OutputStream &OS, OutputFlags Flags) const {
+void StructorIdentifierNode::output(OutputBuffer &OB, OutputFlags Flags) const {
if (IsDestructor)
- OS << "~";
- Class->output(OS, Flags);
- outputTemplateParameters(OS, Flags);
+ OB << "~";
+ Class->output(OB, Flags);
+ outputTemplateParameters(OB, Flags);
}
-void LiteralOperatorIdentifierNode::output(OutputStream &OS,
+void LiteralOperatorIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "operator \"\"" << Name;
- outputTemplateParameters(OS, Flags);
+ OB << "operator \"\"" << Name;
+ outputTemplateParameters(OB, Flags);
}
-void FunctionSignatureNode::outputPre(OutputStream &OS,
+void FunctionSignatureNode::outputPre(OutputBuffer &OB,
OutputFlags Flags) const {
if (!(Flags & OF_NoAccessSpecifier)) {
if (FunctionClass & FC_Public)
- OS << "public: ";
+ OB << "public: ";
if (FunctionClass & FC_Protected)
- OS << "protected: ";
+ OB << "protected: ";
if (FunctionClass & FC_Private)
- OS << "private: ";
+ OB << "private: ";
}
if (!(Flags & OF_NoMemberType)) {
if (!(FunctionClass & FC_Global)) {
if (FunctionClass & FC_Static)
- OS << "static ";
+ OB << "static ";
}
if (FunctionClass & FC_Virtual)
- OS << "virtual ";
+ OB << "virtual ";
if (FunctionClass & FC_ExternC)
- OS << "extern \"C\" ";
+ OB << "extern \"C\" ";
}
if (!(Flags & OF_NoReturnType) && ReturnType) {
- ReturnType->outputPre(OS, Flags);
- OS << " ";
+ ReturnType->outputPre(OB, Flags);
+ OB << " ";
}
if (!(Flags & OF_NoCallingConvention))
- outputCallingConvention(OS, CallConvention);
+ outputCallingConvention(OB, CallConvention);
}
-void FunctionSignatureNode::outputPost(OutputStream &OS,
+void FunctionSignatureNode::outputPost(OutputBuffer &OB,
OutputFlags Flags) const {
if (!(FunctionClass & FC_NoParameterList)) {
- OS << "(";
+ OB << "(";
if (Params)
- Params->output(OS, Flags);
+ Params->output(OB, Flags);
else
- OS << "void";
+ OB << "void";
if (IsVariadic) {
- if (OS.back() != '(')
- OS << ", ";
- OS << "...";
+ if (OB.back() != '(')
+ OB << ", ";
+ OB << "...";
}
- OS << ")";
+ OB << ")";
}
if (Quals & Q_Const)
- OS << " const";
+ OB << " const";
if (Quals & Q_Volatile)
- OS << " volatile";
+ OB << " volatile";
if (Quals & Q_Restrict)
- OS << " __restrict";
+ OB << " __restrict";
if (Quals & Q_Unaligned)
- OS << " __unaligned";
+ OB << " __unaligned";
if (IsNoexcept)
- OS << " noexcept";
+ OB << " noexcept";
if (RefQualifier == FunctionRefQualifier::Reference)
- OS << " &";
+ OB << " &";
else if (RefQualifier == FunctionRefQualifier::RValueReference)
- OS << " &&";
+ OB << " &&";
if (!(Flags & OF_NoReturnType) && ReturnType)
- ReturnType->outputPost(OS, Flags);
+ ReturnType->outputPost(OB, Flags);
}
-void ThunkSignatureNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
- OS << "[thunk]: ";
+void ThunkSignatureNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
+ OB << "[thunk]: ";
- FunctionSignatureNode::outputPre(OS, Flags);
+ FunctionSignatureNode::outputPre(OB, Flags);
}
-void ThunkSignatureNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+void ThunkSignatureNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {
if (FunctionClass & FC_StaticThisAdjust) {
- OS << "`adjustor{" << ThisAdjust.StaticOffset << "}'";
+ OB << "`adjustor{" << ThisAdjust.StaticOffset << "}'";
} else if (FunctionClass & FC_VirtualThisAdjust) {
if (FunctionClass & FC_VirtualThisAdjustEx) {
- OS << "`vtordispex{" << ThisAdjust.VBPtrOffset << ", "
+ OB << "`vtordispex{" << ThisAdjust.VBPtrOffset << ", "
<< ThisAdjust.VBOffsetOffset << ", " << ThisAdjust.VtordispOffset
<< ", " << ThisAdjust.StaticOffset << "}'";
} else {
- OS << "`vtordisp{" << ThisAdjust.VtordispOffset << ", "
+ OB << "`vtordisp{" << ThisAdjust.VtordispOffset << ", "
<< ThisAdjust.StaticOffset << "}'";
}
}
- FunctionSignatureNode::outputPost(OS, Flags);
+ FunctionSignatureNode::outputPost(OB, Flags);
}
-void PointerTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+void PointerTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
if (Pointee->kind() == NodeKind::FunctionSignature) {
// If this is a pointer to a function, don't output the calling convention.
// It needs to go inside the parentheses.
const FunctionSignatureNode *Sig =
static_cast<const FunctionSignatureNode *>(Pointee);
- Sig->outputPre(OS, OF_NoCallingConvention);
+ Sig->outputPre(OB, OF_NoCallingConvention);
} else
- Pointee->outputPre(OS, Flags);
+ Pointee->outputPre(OB, Flags);
- outputSpaceIfNecessary(OS);
+ outputSpaceIfNecessary(OB);
if (Quals & Q_Unaligned)
- OS << "__unaligned ";
+ OB << "__unaligned ";
if (Pointee->kind() == NodeKind::ArrayType) {
- OS << "(";
+ OB << "(";
} else if (Pointee->kind() == NodeKind::FunctionSignature) {
- OS << "(";
+ OB << "(";
const FunctionSignatureNode *Sig =
static_cast<const FunctionSignatureNode *>(Pointee);
- outputCallingConvention(OS, Sig->CallConvention);
- OS << " ";
+ outputCallingConvention(OB, Sig->CallConvention);
+ OB << " ";
}
if (ClassParent) {
- ClassParent->output(OS, Flags);
- OS << "::";
+ ClassParent->output(OB, Flags);
+ OB << "::";
}
switch (Affinity) {
case PointerAffinity::Pointer:
- OS << "*";
+ OB << "*";
break;
case PointerAffinity::Reference:
- OS << "&";
+ OB << "&";
break;
case PointerAffinity::RValueReference:
- OS << "&&";
+ OB << "&&";
break;
default:
assert(false);
}
- outputQualifiers(OS, Quals, false, false);
+ outputQualifiers(OB, Quals, false, false);
}
-void PointerTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
+void PointerTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {
if (Pointee->kind() == NodeKind::ArrayType ||
Pointee->kind() == NodeKind::FunctionSignature)
- OS << ")";
+ OB << ")";
- Pointee->outputPost(OS, Flags);
+ Pointee->outputPost(OB, Flags);
}
-void TagTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
+void TagTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
if (!(Flags & OF_NoTagSpecifier)) {
switch (Tag) {
OUTPUT_ENUM_CLASS_VALUE(TagKind, Class, "class");
@@ -539,59 +539,59 @@ void TagTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
OUTPUT_ENUM_CLASS_VALUE(TagKind, Union, "union");
OUTPUT_ENUM_CLASS_VALUE(TagKind, Enum, "enum");
}
- OS << " ";
+ OB << " ";
}
- QualifiedName->output(OS, Flags);
- outputQualifiers(OS, Quals, true, false);
+ QualifiedName->output(OB, Flags);
+ outputQualifiers(OB, Quals, true, false);
}
-void TagTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+void TagTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {}
-void ArrayTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
- ElementType->outputPre(OS, Flags);
- outputQualifiers(OS, Quals, true, false);
+void ArrayTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
+ ElementType->outputPre(OB, Flags);
+ outputQualifiers(OB, Quals, true, false);
}
-void ArrayTypeNode::outputOneDimension(OutputStream &OS, OutputFlags Flags,
+void ArrayTypeNode::outputOneDimension(OutputBuffer &OB, OutputFlags Flags,
Node *N) const {
assert(N->kind() == NodeKind::IntegerLiteral);
IntegerLiteralNode *ILN = static_cast<IntegerLiteralNode *>(N);
if (ILN->Value != 0)
- ILN->output(OS, Flags);
+ ILN->output(OB, Flags);
}
-void ArrayTypeNode::outputDimensionsImpl(OutputStream &OS,
+void ArrayTypeNode::outputDimensionsImpl(OutputBuffer &OB,
OutputFlags Flags) const {
if (Dimensions->Count == 0)
return;
- outputOneDimension(OS, Flags, Dimensions->Nodes[0]);
+ outputOneDimension(OB, Flags, Dimensions->Nodes[0]);
for (size_t I = 1; I < Dimensions->Count; ++I) {
- OS << "][";
- outputOneDimension(OS, Flags, Dimensions->Nodes[I]);
+ OB << "][";
+ outputOneDimension(OB, Flags, Dimensions->Nodes[I]);
}
}
-void ArrayTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {
- OS << "[";
- outputDimensionsImpl(OS, Flags);
- OS << "]";
+void ArrayTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {
+ OB << "[";
+ outputDimensionsImpl(OB, Flags);
+ OB << "]";
- ElementType->outputPost(OS, Flags);
+ ElementType->outputPost(OB, Flags);
}
-void SymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
- Name->output(OS, Flags);
+void SymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ Name->output(OB, Flags);
}
-void FunctionSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
- Signature->outputPre(OS, Flags);
- outputSpaceIfNecessary(OS);
- Name->output(OS, Flags);
- Signature->outputPost(OS, Flags);
+void FunctionSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ Signature->outputPre(OB, Flags);
+ outputSpaceIfNecessary(OB);
+ Name->output(OB, Flags);
+ Signature->outputPost(OB, Flags);
}
-void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
+void VariableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
const char *AccessSpec = nullptr;
bool IsStatic = true;
switch (SC) {
@@ -609,52 +609,52 @@ void VariableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
break;
}
if (!(Flags & OF_NoAccessSpecifier) && AccessSpec)
- OS << AccessSpec << ": ";
+ OB << AccessSpec << ": ";
if (!(Flags & OF_NoMemberType) && IsStatic)
- OS << "static ";
+ OB << "static ";
- if (Type) {
- Type->outputPre(OS, Flags);
- outputSpaceIfNecessary(OS);
+ if (!(Flags & OF_NoVariableType) && Type) {
+ Type->outputPre(OB, Flags);
+ outputSpaceIfNecessary(OB);
}
- Name->output(OS, Flags);
- if (Type)
- Type->outputPost(OS, Flags);
+ Name->output(OB, Flags);
+ if (!(Flags & OF_NoVariableType) && Type)
+ Type->outputPost(OB, Flags);
}
-void CustomTypeNode::outputPre(OutputStream &OS, OutputFlags Flags) const {
- Identifier->output(OS, Flags);
+void CustomTypeNode::outputPre(OutputBuffer &OB, OutputFlags Flags) const {
+ Identifier->output(OB, Flags);
}
-void CustomTypeNode::outputPost(OutputStream &OS, OutputFlags Flags) const {}
+void CustomTypeNode::outputPost(OutputBuffer &OB, OutputFlags Flags) const {}
-void QualifiedNameNode::output(OutputStream &OS, OutputFlags Flags) const {
- Components->output(OS, Flags, "::");
+void QualifiedNameNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ Components->output(OB, Flags, "::");
}
-void RttiBaseClassDescriptorNode::output(OutputStream &OS,
+void RttiBaseClassDescriptorNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "`RTTI Base Class Descriptor at (";
- OS << NVOffset << ", " << VBPtrOffset << ", " << VBTableOffset << ", "
+ OB << "`RTTI Base Class Descriptor at (";
+ OB << NVOffset << ", " << VBPtrOffset << ", " << VBTableOffset << ", "
<< this->Flags;
- OS << ")'";
+ OB << ")'";
}
-void LocalStaticGuardVariableNode::output(OutputStream &OS,
+void LocalStaticGuardVariableNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- Name->output(OS, Flags);
+ Name->output(OB, Flags);
}
-void VcallThunkIdentifierNode::output(OutputStream &OS,
+void VcallThunkIdentifierNode::output(OutputBuffer &OB,
OutputFlags Flags) const {
- OS << "`vcall'{" << OffsetInVTable << ", {flat}}";
+ OB << "`vcall'{" << OffsetInVTable << ", {flat}}";
}
-void SpecialTableSymbolNode::output(OutputStream &OS, OutputFlags Flags) const {
- outputQualifiers(OS, Quals, false, true);
- Name->output(OS, Flags);
+void SpecialTableSymbolNode::output(OutputBuffer &OB, OutputFlags Flags) const {
+ outputQualifiers(OB, Quals, false, true);
+ Name->output(OB, Flags);
if (TargetName) {
- OS << "{for `";
- TargetName->output(OS, Flags);
- OS << "'}";
+ OB << "{for `";
+ TargetName->output(OB, Flags);
+ OB << "'}";
}
}
diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp
index f916300835ce..dcac0bd63859 100644
--- a/llvm/lib/Demangle/RustDemangle.cpp
+++ b/llvm/lib/Demangle/RustDemangle.cpp
@@ -23,7 +23,7 @@
using namespace llvm;
-using llvm::itanium_demangle::OutputStream;
+using llvm::itanium_demangle::OutputBuffer;
using llvm::itanium_demangle::StringView;
using llvm::itanium_demangle::SwapAndRestore;
@@ -88,7 +88,7 @@ class Demangler {
public:
// Demangled output.
- OutputStream Output;
+ OutputBuffer Output;
Demangler(size_t MaxRecursionLevel = 500);
@@ -135,6 +135,7 @@ private:
void printDecimalNumber(uint64_t N);
void printBasicType(BasicType);
void printLifetime(uint64_t Index);
+ void printIdentifier(Identifier Ident);
char look() const;
char consume();
@@ -163,7 +164,7 @@ char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N,
}
Demangler D;
- if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) {
+ if (!initializeOutputBuffer(nullptr, nullptr, D.Output, 1024)) {
if (Status != nullptr)
*Status = demangle_memory_alloc_failure;
return nullptr;
@@ -283,8 +284,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
switch (consume()) {
case 'C': {
parseOptionalBase62Number('s');
- Identifier Ident = parseIdentifier();
- print(Ident.Name);
+ printIdentifier(parseIdentifier());
break;
}
case 'M': {
@@ -333,7 +333,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
print(NS);
if (!Ident.empty()) {
print(":");
- print(Ident.Name);
+ printIdentifier(Ident);
}
print('#');
printDecimalNumber(Disambiguator);
@@ -342,7 +342,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
// Implementation internal namespaces.
if (!Ident.empty()) {
print("::");
- print(Ident.Name);
+ printIdentifier(Ident);
}
}
break;
@@ -669,6 +669,8 @@ void Demangler::demangleFnSig() {
print("C");
} else {
Identifier Ident = parseIdentifier();
+ if (Ident.Punycode)
+ Error = true;
for (char C : Ident.Name) {
// When mangling ABI string, the "-" is replaced with "_".
if (C == '_')
@@ -1078,6 +1080,172 @@ void Demangler::printLifetime(uint64_t Index) {
}
}
+static inline bool decodePunycodeDigit(char C, size_t &Value) {
+ if (isLower(C)) {
+ Value = C - 'a';
+ return true;
+ }
+
+ if (isDigit(C)) {
+ Value = 26 + (C - '0');
+ return true;
+ }
+
+ return false;
+}
+
+static void removeNullBytes(OutputBuffer &Output, size_t StartIdx) {
+ char *Buffer = Output.getBuffer();
+ char *Start = Buffer + StartIdx;
+ char *End = Buffer + Output.getCurrentPosition();
+ Output.setCurrentPosition(std::remove(Start, End, '\0') - Buffer);
+}
+
+// Encodes code point as UTF-8 and stores results in Output. Returns false if
+// CodePoint is not a valid unicode scalar value.
+static inline bool encodeUTF8(size_t CodePoint, char *Output) {
+ if (0xD800 <= CodePoint && CodePoint <= 0xDFFF)
+ return false;
+
+ if (CodePoint <= 0x7F) {
+ Output[0] = CodePoint;
+ return true;
+ }
+
+ if (CodePoint <= 0x7FF) {
+ Output[0] = 0xC0 | ((CodePoint >> 6) & 0x3F);
+ Output[1] = 0x80 | (CodePoint & 0x3F);
+ return true;
+ }
+
+ if (CodePoint <= 0xFFFF) {
+ Output[0] = 0xE0 | (CodePoint >> 12);
+ Output[1] = 0x80 | ((CodePoint >> 6) & 0x3F);
+ Output[2] = 0x80 | (CodePoint & 0x3F);
+ return true;
+ }
+
+ if (CodePoint <= 0x10FFFF) {
+ Output[0] = 0xF0 | (CodePoint >> 18);
+ Output[1] = 0x80 | ((CodePoint >> 12) & 0x3F);
+ Output[2] = 0x80 | ((CodePoint >> 6) & 0x3F);
+ Output[3] = 0x80 | (CodePoint & 0x3F);
+ return true;
+ }
+
+ return false;
+}
+
+// Decodes string encoded using punycode and appends results to Output.
+// Returns true if decoding was successful.
+static bool decodePunycode(StringView Input, OutputBuffer &Output) {
+ size_t OutputSize = Output.getCurrentPosition();
+ size_t InputIdx = 0;
+
+ // Rust uses an underscore as a delimiter.
+ size_t DelimiterPos = StringView::npos;
+ for (size_t I = 0; I != Input.size(); ++I)
+ if (Input[I] == '_')
+ DelimiterPos = I;
+
+ if (DelimiterPos != StringView::npos) {
+ // Copy basic code points before the last delimiter to the output.
+ for (; InputIdx != DelimiterPos; ++InputIdx) {
+ char C = Input[InputIdx];
+ if (!isValid(C))
+ return false;
+ // Code points are padded with zeros while decoding is in progress.
+ char UTF8[4] = {C};
+ Output += StringView(UTF8, UTF8 + 4);
+ }
+ // Skip over the delimiter.
+ ++InputIdx;
+ }
+
+ size_t Base = 36;
+ size_t Skew = 38;
+ size_t Bias = 72;
+ size_t N = 0x80;
+ size_t TMin = 1;
+ size_t TMax = 26;
+ size_t Damp = 700;
+
+ auto Adapt = [&](size_t Delta, size_t NumPoints) {
+ Delta /= Damp;
+ Delta += Delta / NumPoints;
+ Damp = 2;
+
+ size_t K = 0;
+ while (Delta > (Base - TMin) * TMax / 2) {
+ Delta /= Base - TMin;
+ K += Base;
+ }
+ return K + (((Base - TMin + 1) * Delta) / (Delta + Skew));
+ };
+
+ // Main decoding loop.
+ for (size_t I = 0; InputIdx != Input.size(); I += 1) {
+ size_t OldI = I;
+ size_t W = 1;
+ size_t Max = std::numeric_limits<size_t>::max();
+ for (size_t K = Base; true; K += Base) {
+ if (InputIdx == Input.size())
+ return false;
+ char C = Input[InputIdx++];
+ size_t Digit = 0;
+ if (!decodePunycodeDigit(C, Digit))
+ return false;
+
+ if (Digit > (Max - I) / W)
+ return false;
+ I += Digit * W;
+
+ size_t T;
+ if (K <= Bias)
+ T = TMin;
+ else if (K >= Bias + TMax)
+ T = TMax;
+ else
+ T = K - Bias;
+
+ if (Digit < T)
+ break;
+
+ if (W > Max / (Base - T))
+ return false;
+ W *= (Base - T);
+ }
+ size_t NumPoints = (Output.getCurrentPosition() - OutputSize) / 4 + 1;
+ Bias = Adapt(I - OldI, NumPoints);
+
+ if (I / NumPoints > Max - N)
+ return false;
+ N += I / NumPoints;
+ I = I % NumPoints;
+
+ // Insert N at position I in the output.
+ char UTF8[4] = {};
+ if (!encodeUTF8(N, UTF8))
+ return false;
+ Output.insert(OutputSize + I * 4, UTF8, 4);
+ }
+
+ removeNullBytes(Output, OutputSize);
+ return true;
+}
+
+void Demangler::printIdentifier(Identifier Ident) {
+ if (Error || !Print)
+ return;
+
+ if (Ident.Punycode) {
+ if (!decodePunycode(Ident.Name, Output))
+ Error = true;
+ } else {
+ print(Ident.Name);
+ }
+}
+
char Demangler::look() const {
if (Error || Position >= Input.size())
return 0;
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index c8bbf0bcdfda..fe3c433bd2c5 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -28,13 +28,13 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index addec6871fa1..672fd7b991c2 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -188,8 +188,7 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
for (auto &F : *Mod) {
auto Attrs = F.getAttributes();
StringRef Value = options.NoFramePointerElim ? "all" : "none";
- Attrs = Attrs.addAttribute(F.getContext(), AttributeList::FunctionIndex,
- "frame-pointer", Value);
+ Attrs = Attrs.addFnAttribute(F.getContext(), "frame-pointer", Value);
F.setAttributes(Attrs);
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
index c85e80b52e5a..4d7d5ce26668 100644
--- a/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp
@@ -1,9 +1,8 @@
//===-------- JITLink_EHFrameSupport.cpp - JITLink eh-frame utils ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -300,7 +299,7 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
if (auto Err = RecordReader.skip(PC.G.getPointerSize()))
return Err;
- // Read and sanity check the code alignment factor.
+ // Read and validate the code alignment factor.
{
uint64_t CodeAlignmentFactor = 0;
if (auto Err = RecordReader.readULEB128(CodeAlignmentFactor))
@@ -311,7 +310,7 @@ Error EHFrameEdgeFixer::processCIE(ParseContext &PC, Block &B,
" (expected 1)");
}
- // Read and sanity check the data alignment factor.
+ // Read and validate the data alignment factor.
{
int64_t DataAlignmentFactor = 0;
if (auto Err = RecordReader.readSLEB128(DataAlignmentFactor))
@@ -665,7 +664,7 @@ EHFrameEdgeFixer::readEncodedPointer(uint8_t PointerEncoding,
EffectiveType = (PointerSize == 8) ? DW_EH_PE_udata8 : DW_EH_PE_udata4;
JITTargetAddress Addr;
- Edge::Kind PointerEdgeKind;
+ Edge::Kind PointerEdgeKind = Edge::Invalid;
switch (EffectiveType) {
case DW_EH_PE_udata4: {
uint32_t Val;
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
index 252e44fe4a74..eb98e4ba4041 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
@@ -1,9 +1,8 @@
//===-------------- ELF.cpp - JIT linker function for ELF -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "llvm/ExecutionEngine/JITLink/ELF.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h"
#include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
#include "llvm/Object/ELF.h"
@@ -65,6 +65,8 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) {
return TargetMachineArch.takeError();
switch (*TargetMachineArch) {
+ case ELF::EM_AARCH64:
+ return createLinkGraphFromELFObject_aarch64(ObjectBuffer);
case ELF::EM_RISCV:
return createLinkGraphFromELFObject_riscv(ObjectBuffer);
case ELF::EM_X86_64:
@@ -79,6 +81,9 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) {
void link_ELF(std::unique_ptr<LinkGraph> G,
std::unique_ptr<JITLinkContext> Ctx) {
switch (G->getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ link_ELF_aarch64(std::move(G), std::move(Ctx));
+ return;
case Triple::riscv32:
case Triple::riscv64:
link_ELF_riscv(std::move(G), std::move(Ctx));
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index 2b2a1a8db4c1..fdc987751286 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -36,11 +36,9 @@ protected:
}
Section &getCommonSection() {
- if (!CommonSection) {
- auto Prot = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_WRITE);
- CommonSection = &G->createSection(CommonSectionName, Prot);
- }
+ if (!CommonSection)
+ CommonSection =
+ &G->createSection(CommonSectionName, MemProt::Read | MemProt::Write);
return *CommonSection;
}
@@ -110,6 +108,31 @@ protected:
Error graphifySections();
Error graphifySymbols();
+ /// Traverse all matching relocation records in the given section. The handler
+ /// function Func should be callable with this signature:
+ /// Error(const typename ELFT::Rela &,
+ /// const typename ELFT::Shdr &, Section &)
+ ///
+ template <typename RelocHandlerFunction>
+ Error forEachRelocation(const typename ELFT::Shdr &RelSect,
+ RelocHandlerFunction &&Func,
+ bool ProcessDebugSections = false);
+
+ /// Traverse all matching relocation records in the given section. Convenience
+ /// wrapper to allow passing a member function for the handler.
+ ///
+ template <typename ClassT, typename RelocHandlerMethod>
+ Error forEachRelocation(const typename ELFT::Shdr &RelSect, ClassT *Instance,
+ RelocHandlerMethod &&Method,
+ bool ProcessDebugSections = false) {
+ return forEachRelocation(
+ RelSect,
+ [Instance, Method](const auto &Rel, const auto &Target, auto &GS) {
+ return (Instance->*Method)(Rel, Target, GS);
+ },
+ ProcessDebugSections);
+ }
+
const ELFFile &Obj;
typename ELFFile::Elf_Shdr_Range Sections;
@@ -170,11 +193,14 @@ ELFLinkGraphBuilder<ELFT>::getSymbolLinkageAndScope(
// Nothing to do here.
break;
case ELF::STB_WEAK:
+ case ELF::STB_GNU_UNIQUE:
L = Linkage::Weak;
break;
default:
- return make_error<StringError>("Unrecognized symbol binding for " + Name,
- inconvertibleErrorCode());
+ return make_error<StringError>(
+ "Unrecognized symbol binding " +
+ Twine(static_cast<int>(Sym.getBinding())) + " for " + Name,
+ inconvertibleErrorCode());
}
switch (Sym.getVisibility()) {
@@ -190,8 +216,10 @@ ELFLinkGraphBuilder<ELFT>::getSymbolLinkageAndScope(
S = Scope::Hidden;
break;
case ELF::STV_INTERNAL:
- return make_error<StringError>("Unrecognized symbol visibility for " + Name,
- inconvertibleErrorCode());
+ return make_error<StringError>(
+ "Unrecognized symbol visibility " +
+ Twine(static_cast<int>(Sym.getVisibility())) + " for " + Name,
+ inconvertibleErrorCode());
}
return std::make_pair(L, S);
@@ -265,13 +293,11 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
});
// Get the section's memory protection flags.
- sys::Memory::ProtectionFlags Prot;
+ MemProt Prot;
if (Sec.sh_flags & ELF::SHF_EXECINSTR)
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
+ Prot = MemProt::Read | MemProt::Exec;
else
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
+ Prot = MemProt::Read | MemProt::Write;
// For now we just use this to skip the "undefined" section, probably need
// to revist.
@@ -374,7 +400,7 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
if (Sym.isDefined() &&
(Sym.getType() == ELF::STT_NOTYPE || Sym.getType() == ELF::STT_FUNC ||
Sym.getType() == ELF::STT_OBJECT ||
- Sym.getType() == ELF::STT_SECTION)) {
+ Sym.getType() == ELF::STT_SECTION || Sym.getType() == ELF::STT_TLS)) {
// FIXME: Handle extended tables.
if (auto *GraphSec = getGraphSection(Sym.st_shndx)) {
@@ -421,6 +447,54 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
return Error::success();
}
+template <typename ELFT>
+template <typename RelocHandlerFunction>
+Error ELFLinkGraphBuilder<ELFT>::forEachRelocation(
+ const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func,
+ bool ProcessDebugSections) {
+
+ // Only look into sections that store relocation entries.
+ if (RelSect.sh_type != ELF::SHT_RELA && RelSect.sh_type != ELF::SHT_REL)
+ return Error::success();
+
+ // sh_info contains the section header index of the target (FixupSection),
+ // which is the section to which all relocations in RelSect apply.
+ auto FixupSection = Obj.getSection(RelSect.sh_info);
+ if (!FixupSection)
+ return FixupSection.takeError();
+
+ // Target sections have names in valid ELF object files.
+ Expected<StringRef> Name = Obj.getSectionName(**FixupSection);
+ if (!Name)
+ return Name.takeError();
+ LLVM_DEBUG(dbgs() << " " << *Name << ":\n");
+
+ // Consider skipping these relocations.
+ if (!ProcessDebugSections && isDwarfSection(*Name)) {
+ LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n");
+ return Error::success();
+ }
+
+ // Lookup the link-graph node corresponding to the target section name.
+ Section *GraphSect = G->findSectionByName(*Name);
+ if (!GraphSect)
+ return make_error<StringError>(
+ "Refencing a section that wasn't added to the graph: " + *Name,
+ inconvertibleErrorCode());
+
+ auto RelEntries = Obj.relas(RelSect);
+ if (!RelEntries)
+ return RelEntries.takeError();
+
+ // Let the callee process relocation entries one by one.
+ for (const typename ELFT::Rela &R : *RelEntries)
+ if (Error Err = Func(R, **FixupSection, *GraphSect))
+ return Err;
+
+ LLVM_DEBUG(dbgs() << "\n");
+ return Error::success();
+}
+
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
new file mode 100644
index 000000000000..dc183dfddfae
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
@@ -0,0 +1,185 @@
+//===----- ELF_aarch64.cpp - JIT linker implementation for ELF/aarch64 ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ELF/aarch64 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h"
+#include "ELFLinkGraphBuilder.h"
+#include "JITLinkGeneric.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
+#include "llvm/Object/ELFObjectFile.h"
+
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+
+namespace llvm {
+namespace jitlink {
+
+class ELFJITLinker_aarch64 : public JITLinker<ELFJITLinker_aarch64> {
+ friend class JITLinker<ELFJITLinker_aarch64>;
+
+public:
+ ELFJITLinker_aarch64(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G,
+ PassConfiguration PassConfig)
+ : JITLinker(std::move(Ctx), std::move(G), std::move(PassConfig)) {}
+
+private:
+ Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
+ using namespace aarch64;
+ using namespace llvm::support;
+
+ char *BlockWorkingMem = B.getAlreadyMutableContent().data();
+ char *FixupPtr = BlockWorkingMem + E.getOffset();
+ JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
+ switch (E.getKind()) {
+ case aarch64::R_AARCH64_CALL26: {
+ assert((FixupAddress & 0x3) == 0 && "Call-inst is not 32-bit aligned");
+ int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
+
+ if (static_cast<uint64_t>(Value) & 0x3)
+ return make_error<JITLinkError>("Call target is not 32-bit aligned");
+
+ if (!fitsRangeSignedInt<27>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+
+ uint32_t RawInstr = *(little32_t *)FixupPtr;
+ assert((RawInstr & 0x7fffffff) == 0x14000000 &&
+ "RawInstr isn't a B or BR immediate instruction");
+ uint32_t Imm = (static_cast<uint32_t>(Value) & ((1 << 28) - 1)) >> 2;
+ uint32_t FixedInstr = RawInstr | Imm;
+ *(little32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ }
+ return Error::success();
+ }
+
+ template <uint8_t Bits> static bool fitsRangeSignedInt(int64_t Value) {
+ return Value >= -(1ll << Bits) && Value < (1ll << Bits);
+ }
+};
+
+template <typename ELFT>
+class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder<ELFT> {
+private:
+ static Expected<aarch64::EdgeKind_aarch64>
+ getRelocationKind(const uint32_t Type) {
+ using namespace aarch64;
+ switch (Type) {
+ case ELF::R_AARCH64_CALL26:
+ return EdgeKind_aarch64::R_AARCH64_CALL26;
+ }
+
+ return make_error<JITLinkError>("Unsupported aarch64 relocation:" +
+ formatv("{0:d}", Type));
+ }
+
+ Error addRelocations() override {
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
+ using Base = ELFLinkGraphBuilder<ELFT>;
+ using Self = ELFLinkGraphBuilder_aarch64<ELFT>;
+ for (const auto &RelSect : Base::Sections)
+ if (Error Err = Base::forEachRelocation(RelSect, this,
+ &Self::addSingleRelocation))
+ return Err;
+
+ return Error::success();
+ }
+
+ Error addSingleRelocation(const typename ELFT::Rela &Rel,
+ const typename ELFT::Shdr &FixupSect,
+ Section &GraphSection) {
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ uint32_t Type = Rel.getType(false);
+ Expected<aarch64::EdgeKind_aarch64> Kind = getRelocationKind(Type);
+ if (!Kind)
+ return Kind.takeError();
+
+ int64_t Addend = Rel.r_addend;
+ Block *BlockToFix = *(GraphSection.blocks().begin());
+ JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress();
+ Edge GE(*Kind, Offset, *GraphSymbol, Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), *BlockToFix, GE, aarch64::getEdgeKindName(*Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix->addEdge(std::move(GE));
+ return Error::success();
+ }
+
+public:
+ ELFLinkGraphBuilder_aarch64(StringRef FileName,
+ const object::ELFFile<ELFT> &Obj, const Triple T)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(T), FileName,
+ aarch64::getEdgeKindName) {}
+};
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer) {
+ LLVM_DEBUG({
+ dbgs() << "Building jitlink graph for new input "
+ << ObjectBuffer.getBufferIdentifier() << "...\n";
+ });
+
+ auto ELFObj = object::ObjectFile::createELFObjectFile(ObjectBuffer);
+ if (!ELFObj)
+ return ELFObj.takeError();
+
+ assert((*ELFObj)->getArch() == Triple::aarch64 &&
+ "Only AArch64 (little endian) is supported for now");
+
+ auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
+ return ELFLinkGraphBuilder_aarch64<object::ELF64LE>((*ELFObj)->getFileName(),
+ ELFObjFile.getELFFile(),
+ (*ELFObj)->makeTriple())
+ .buildGraph();
+}
+
+void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ PassConfiguration Config;
+ const Triple &TT = G->getTargetTriple();
+ if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ Config.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ Config.PrePrunePasses.push_back(markAllSymbolsLive);
+ }
+ if (auto Err = Ctx->modifyPassConfig(*G, Config))
+ return Ctx->notifyFailed(std::move(Err));
+
+ ELFJITLinker_aarch64::link(std::move(Ctx), std::move(G), std::move(Config));
+}
+
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index d0e65ef1c3ac..b057788ce3ef 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -11,17 +11,117 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
+#include "ELFLinkGraphBuilder.h"
+#include "JITLinkGeneric.h"
+#include "PerGraphGOTAndPLTStubsBuilder.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/JITLink/riscv.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
-#include "ELFLinkGraphBuilder.h"
-#include "JITLinkGeneric.h"
-
#define DEBUG_TYPE "jitlink"
using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::jitlink::riscv;
+
+namespace {
+
+class PerGraphGOTAndPLTStubsBuilder_ELF_riscv
+ : public PerGraphGOTAndPLTStubsBuilder<
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv> {
+public:
+ static constexpr size_t StubEntrySize = 16;
+ static const uint8_t NullGOTEntryContent[8];
+ static const uint8_t RV64StubContent[StubEntrySize];
+ static const uint8_t RV32StubContent[StubEntrySize];
+
+ using PerGraphGOTAndPLTStubsBuilder<
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv>::PerGraphGOTAndPLTStubsBuilder;
+
+ bool isRV64() const { return G.getPointerSize() == 8; }
+
+ bool isGOTEdgeToFix(Edge &E) const { return E.getKind() == R_RISCV_GOT_HI20; }
+
+ Symbol &createGOTEntry(Symbol &Target) {
+ Block &GOTBlock = G.createContentBlock(
+ getGOTSection(), getGOTEntryBlockContent(), 0, G.getPointerSize(), 0);
+ GOTBlock.addEdge(isRV64() ? R_RISCV_64 : R_RISCV_32, 0, Target, 0);
+ return G.addAnonymousSymbol(GOTBlock, 0, G.getPointerSize(), false, false);
+ }
+
+ Symbol &createPLTStub(Symbol &Target) {
+ Block &StubContentBlock =
+ G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 4, 0);
+ auto &GOTEntrySymbol = getGOTEntry(Target);
+ StubContentBlock.addEdge(R_RISCV_CALL, 0, GOTEntrySymbol, 0);
+ return G.addAnonymousSymbol(StubContentBlock, 0, StubEntrySize, true,
+ false);
+ }
+
+ void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
+ // Replace the relocation pair (R_RISCV_GOT_HI20, R_RISCV_PCREL_LO12)
+ // with (R_RISCV_PCREL_HI20, R_RISCV_PCREL_LO12)
+ // Therefore, here just change the R_RISCV_GOT_HI20 to R_RISCV_PCREL_HI20
+ E.setKind(R_RISCV_PCREL_HI20);
+ E.setTarget(GOTEntry);
+ }
+
+ void fixPLTEdge(Edge &E, Symbol &PLTStubs) {
+ assert(E.getKind() == R_RISCV_CALL_PLT && "Not a R_RISCV_CALL_PLT edge?");
+ E.setKind(R_RISCV_CALL);
+ E.setTarget(PLTStubs);
+ }
+
+ bool isExternalBranchEdge(Edge &E) const {
+ return E.getKind() == R_RISCV_CALL_PLT;
+ }
+
+private:
+ Section &getGOTSection() const {
+ if (!GOTSection)
+ GOTSection = &G.createSection("$__GOT", MemProt::Read);
+ return *GOTSection;
+ }
+
+ Section &getStubsSection() const {
+ if (!StubsSection)
+ StubsSection =
+ &G.createSection("$__STUBS", MemProt::Read | MemProt::Exec);
+ return *StubsSection;
+ }
+
+ ArrayRef<char> getGOTEntryBlockContent() {
+ return {reinterpret_cast<const char *>(NullGOTEntryContent),
+ G.getPointerSize()};
+ }
+
+ ArrayRef<char> getStubBlockContent() {
+ auto StubContent = isRV64() ? RV64StubContent : RV32StubContent;
+ return {reinterpret_cast<const char *>(StubContent), StubEntrySize};
+ }
+
+ mutable Section *GOTSection = nullptr;
+ mutable Section *StubsSection = nullptr;
+};
+const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_riscv::NullGOTEntryContent[8] =
+ {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+const uint8_t
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv::RV64StubContent[StubEntrySize] = {
+ 0x17, 0x0e, 0x00, 0x00, // auipc t3, literal
+ 0x03, 0x3e, 0x0e, 0x00, // ld t3, literal(t3)
+ 0x67, 0x00, 0x0e, 0x00, // jr t3
+ 0x13, 0x00, 0x00, 0x00}; // nop
+
+const uint8_t
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv::RV32StubContent[StubEntrySize] = {
+ 0x17, 0x0e, 0x00, 0x00, // auipc t3, literal
+ 0x03, 0x2e, 0x0e, 0x00, // lw t3, literal(t3)
+ 0x67, 0x00, 0x0e, 0x00, // jr t3
+ 0x13, 0x00, 0x00, 0x00}; // nop
+} // namespace
namespace llvm {
namespace jitlink {
@@ -78,6 +178,16 @@ private:
char *FixupPtr = BlockWorkingMem + E.getOffset();
JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
switch (E.getKind()) {
+ case R_RISCV_32: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend();
+ *(little32_t *)FixupPtr = static_cast<uint32_t>(Value);
+ break;
+ }
+ case R_RISCV_64: {
+ int64_t Value = E.getTarget().getAddress() + E.getAddend();
+ *(little64_t *)FixupPtr = static_cast<uint64_t>(Value);
+ break;
+ }
case R_RISCV_HI20: {
int64_t Value = E.getTarget().getAddress() + E.getAddend();
int32_t Hi = (Value + 0x800) & 0xFFFFF000;
@@ -163,6 +273,10 @@ private:
return EdgeKind_riscv::R_RISCV_PCREL_LO12_I;
case ELF::R_RISCV_PCREL_LO12_S:
return EdgeKind_riscv::R_RISCV_PCREL_LO12_S;
+ case ELF::R_RISCV_GOT_HI20:
+ return EdgeKind_riscv::R_RISCV_GOT_HI20;
+ case ELF::R_RISCV_CALL_PLT:
+ return EdgeKind_riscv::R_RISCV_CALL_PLT;
}
return make_error<JITLinkError>("Unsupported riscv relocation:" +
@@ -170,93 +284,54 @@ private:
}
Error addRelocations() override {
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
using Base = ELFLinkGraphBuilder<ELFT>;
- LLVM_DEBUG(dbgs() << "Adding relocations\n");
-
- // TODO a partern is forming of iterate some sections but only give me
- // ones I am interested, I should abstract that concept some where
- for (auto &SecRef : Base::Sections) {
- if (SecRef.sh_type != ELF::SHT_RELA && SecRef.sh_type != ELF::SHT_REL)
- continue;
- auto RelSectName = Base::Obj.getSectionName(SecRef);
- if (!RelSectName)
- return RelSectName.takeError();
-
- LLVM_DEBUG({
- dbgs() << "Adding relocations from section " << *RelSectName << "\n";
- });
-
- auto UpdateSection = Base::Obj.getSection(SecRef.sh_info);
- if (!UpdateSection)
- return UpdateSection.takeError();
-
- auto UpdateSectionName = Base::Obj.getSectionName(**UpdateSection);
- if (!UpdateSectionName)
- return UpdateSectionName.takeError();
- // Don't process relocations for debug sections.
- if (Base::isDwarfSection(*UpdateSectionName)) {
- LLVM_DEBUG({
- dbgs() << " Target is dwarf section " << *UpdateSectionName
- << ". Skipping.\n";
- });
- continue;
- } else
- LLVM_DEBUG({
- dbgs() << " For target section " << *UpdateSectionName << "\n";
- });
-
- auto *JITSection = Base::G->findSectionByName(*UpdateSectionName);
- if (!JITSection)
- return make_error<llvm::StringError>(
- "Refencing a section that wasn't added to graph" +
- *UpdateSectionName,
- llvm::inconvertibleErrorCode());
-
- auto Relocations = Base::Obj.relas(SecRef);
- if (!Relocations)
- return Relocations.takeError();
-
- for (const auto &Rela : *Relocations) {
- auto Type = Rela.getType(false);
-
- LLVM_DEBUG({
- dbgs() << "Relocation Type: " << Type << "\n"
- << "Name: " << Base::Obj.getRelocationTypeName(Type) << "\n";
- });
-
- auto SymbolIndex = Rela.getSymbol(false);
- auto Symbol = Base::Obj.getRelocationSymbol(Rela, Base::SymTabSec);
- if (!Symbol)
- return Symbol.takeError();
-
- auto BlockToFix = *(JITSection->blocks().begin());
- auto *TargetSymbol = Base::getGraphSymbol(SymbolIndex);
-
- if (!TargetSymbol) {
- return make_error<llvm::StringError>(
- "Could not find symbol at given index, did you add it to "
- "JITSymbolTable? index: " +
- std::to_string(SymbolIndex) + ", shndx: " +
- std::to_string((*Symbol)->st_shndx) + " Size of table: " +
- std::to_string(Base::GraphSymbols.size()),
- llvm::inconvertibleErrorCode());
- }
- int64_t Addend = Rela.r_addend;
- JITTargetAddress FixupAddress =
- (*UpdateSection)->sh_addr + Rela.r_offset;
-
- LLVM_DEBUG({
- dbgs() << "Processing relocation at "
- << format("0x%016" PRIx64, FixupAddress) << "\n";
- });
- auto Kind = getRelocationKind(Type);
- if (!Kind)
- return Kind.takeError();
-
- BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
- *TargetSymbol, Addend);
- }
- }
+ using Self = ELFLinkGraphBuilder_riscv<ELFT>;
+ for (const auto &RelSect : Base::Sections)
+ if (Error Err = Base::forEachRelocation(RelSect, this,
+ &Self::addSingleRelocation))
+ return Err;
+
+ return Error::success();
+ }
+
+ Error addSingleRelocation(const typename ELFT::Rela &Rel,
+ const typename ELFT::Shdr &FixupSect,
+ Section &GraphSection) {
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ uint32_t Type = Rel.getType(false);
+ Expected<riscv::EdgeKind_riscv> Kind = getRelocationKind(Type);
+ if (!Kind)
+ return Kind.takeError();
+
+ int64_t Addend = Rel.r_addend;
+ Block *BlockToFix = *(GraphSection.blocks().begin());
+ JITTargetAddress FixupAddress = FixupSect.sh_addr + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress();
+ Edge GE(*Kind, Offset, *GraphSymbol, Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), *BlockToFix, GE, riscv::getEdgeKindName(*Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix->addEdge(std::move(GE));
return Error::success();
}
@@ -304,6 +379,8 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(std::move(MarkLive));
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
+ Config.PostPrunePasses.push_back(
+ PerGraphGOTAndPLTStubsBuilder_ELF_riscv::asPass);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
return Ctx->notifyFailed(std::move(Err));
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index a5aed6d25200..3ea9ffee6554 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -12,6 +12,7 @@
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/TableManager.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Endian.h"
@@ -20,7 +21,6 @@
#include "EHFrameSupportImpl.h"
#include "ELFLinkGraphBuilder.h"
#include "JITLinkGeneric.h"
-#include "PerGraphGOTAndPLTStubsBuilder.h"
#define DEBUG_TYPE "jitlink"
@@ -30,196 +30,82 @@ using namespace llvm::jitlink::ELF_x86_64_Edges;
namespace {
-constexpr StringRef ELFGOTSectionName = "$__GOT";
constexpr StringRef ELFGOTSymbolName = "_GLOBAL_OFFSET_TABLE_";
+constexpr StringRef ELFTLSInfoSectionName = "$__TLSINFO";
-class PerGraphGOTAndPLTStubsBuilder_ELF_x86_64
- : public PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_ELF_x86_64> {
+class TLSInfoTableManager_ELF_x86_64
+ : public TableManager<TLSInfoTableManager_ELF_x86_64> {
public:
- static const uint8_t NullGOTEntryContent[8];
- static const uint8_t StubContent[6];
-
- using PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_ELF_x86_64>::PerGraphGOTAndPLTStubsBuilder;
-
- bool isGOTEdgeToFix(Edge &E) const {
- if (E.getKind() == GOTOFF64) {
- // We need to make sure that the GOT section exists, but don't otherwise
- // need to fix up this edge.
- getGOTSection();
- return false;
- }
-
- return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad ||
- E.getKind() == PCRel64GOT || E.getKind() == GOT64;
- }
+ static const uint8_t TLSInfoEntryContent[16];
- Symbol &createGOTEntry(Symbol &Target) {
- auto &GOTEntryBlock = G.createContentBlock(
- getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0);
- GOTEntryBlock.addEdge(Pointer64, 0, Target, 0);
- return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
- }
+ static StringRef getSectionName() { return ELFTLSInfoSectionName; }
- void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
- // If this is a PCRel32GOT/PCRel64GOT then change it to an ordinary
- // PCRel32/PCRel64. If it is a PCRel32GOTLoad then leave it as-is for now:
- // We will use the kind to check for GOT optimization opportunities in the
- // optimizeMachO_x86_64_GOTAndStubs pass below.
- // If it's a GOT64 leave it as is.
- switch (E.getKind()) {
- case PCRel32GOT:
- E.setKind(PCRel32);
- break;
- case PCRel64GOT:
- E.setKind(PCRel64);
- break;
- case GOT64:
- break;
- case PCRel32GOTLoad:
- break;
- default:
- llvm_unreachable("Unexpected GOT edge kind");
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ if (E.getKind() == x86_64::RequestTLSDescInGOTAndTransformToDelta32) {
+ LLVM_DEBUG({
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << formatv("{0:x}", B->getFixupAddress(E)) << " ("
+ << formatv("{0:x}", B->getAddress()) << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setKind(x86_64::Delta32);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
}
-
- E.setTarget(GOTEntry);
- // Leave the edge addend as-is.
+ return false;
}
- bool isExternalBranchEdge(Edge &E) {
- return E.getKind() == Branch32 && !E.getTarget().isDefined();
- }
-
- Symbol &createPLTStub(Symbol &Target) {
- auto &StubContentBlock =
- G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0);
- // Re-use GOT entries for stub targets.
- auto &GOTEntrySymbol = getGOTEntry(Target);
- StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, -4);
- return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false);
- }
-
- void fixPLTEdge(Edge &E, Symbol &Stub) {
- assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
-
- // Set the edge kind to Branch32ToStub. We will use this to check for stub
- // optimization opportunities in the optimize ELF_x86_64_GOTAndStubs pass
- // below.
- E.setKind(Branch32ToStub);
- E.setTarget(Stub);
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ // the TLS Info entry's key value will be written by the fixTLVSectionByName
+ // pass, so create mutable content.
+ auto &TLSInfoEntry = G.createMutableContentBlock(
+ getTLSInfoSection(G), G.allocateContent(getTLSInfoEntryContent()), 0, 8,
+ 0);
+ TLSInfoEntry.addEdge(x86_64::Pointer64, 8, Target, 0);
+ return G.addAnonymousSymbol(TLSInfoEntry, 0, 16, false, false);
}
private:
- Section &getGOTSection() const {
- if (!GOTSection)
- GOTSection = &G.createSection(ELFGOTSectionName, sys::Memory::MF_READ);
- return *GOTSection;
- }
-
- Section &getStubsSection() const {
- if (!StubsSection) {
- auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_EXEC);
- StubsSection = &G.createSection("$__STUBS", StubsProt);
- }
- return *StubsSection;
- }
-
- ArrayRef<char> getGOTEntryBlockContent() {
- return {reinterpret_cast<const char *>(NullGOTEntryContent),
- sizeof(NullGOTEntryContent)};
+ Section &getTLSInfoSection(LinkGraph &G) {
+ if (!TLSInfoTable)
+ TLSInfoTable = &G.createSection(ELFTLSInfoSectionName, MemProt::Read);
+ return *TLSInfoTable;
}
- ArrayRef<char> getStubBlockContent() {
- return {reinterpret_cast<const char *>(StubContent), sizeof(StubContent)};
+ ArrayRef<char> getTLSInfoEntryContent() const {
+ return {reinterpret_cast<const char *>(TLSInfoEntryContent),
+ sizeof(TLSInfoEntryContent)};
}
- mutable Section *GOTSection = nullptr;
- mutable Section *StubsSection = nullptr;
+ Section *TLSInfoTable = nullptr;
};
-} // namespace
+const uint8_t TLSInfoTableManager_ELF_x86_64::TLSInfoEntryContent[16] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /*pthread key */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 /*data address*/
+};
-const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::NullGOTEntryContent[8] =
- {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-const uint8_t PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::StubContent[6] = {
- 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00};
-
-static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) {
- LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
-
- for (auto *B : G.blocks())
- for (auto &E : B->edges())
- if (E.getKind() == PCRel32GOTLoad) {
- // Replace GOT load with LEA only for MOVQ instructions.
- constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b};
- if (E.getOffset() < 3 ||
- strncmp(B->getContent().data() + E.getOffset() - 3,
- reinterpret_cast<const char *>(MOVQRIPRel), 2) != 0)
- continue;
-
- auto &GOTBlock = E.getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT entry block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT entry should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- // Change the edge kind as we don't go through GOT anymore. This is
- // for formal correctness only. Technically, the two relocation kinds
- // are resolved the same way.
- E.setKind(PCRel32);
- E.setTarget(GOTTarget);
- auto *BlockData = reinterpret_cast<uint8_t *>(
- const_cast<char *>(B->getContent().data()));
- BlockData[E.getOffset() - 2] = 0x8d;
- LLVM_DEBUG({
- dbgs() << " Replaced GOT load wih LEA:\n ";
- printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- } else if (E.getKind() == Branch32ToStub) {
- auto &StubBlock = E.getTarget().getBlock();
- assert(
- StubBlock.getSize() ==
- sizeof(PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::StubContent) &&
- "Stub block should be stub sized");
- assert(StubBlock.edges_size() == 1 &&
- "Stub block should only have one outgoing edge");
-
- auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT block should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- E.setKind(Branch32);
- E.setTarget(GOTTarget);
- LLVM_DEBUG({
- dbgs() << " Replaced stub branch with direct branch:\n ";
- printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- }
+Error buildTables_ELF_x86_64(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+ x86_64::GOTTableManager GOT;
+ x86_64::PLTTableManager PLT(GOT);
+ TLSInfoTableManager_ELF_x86_64 TLSInfo;
+ visitExistingEdges(G, GOT, PLT, TLSInfo);
return Error::success();
}
+} // namespace
+
+static const char *getELFX86_64RelocName(uint32_t Type) {
+ switch (Type) {
+#define ELF_RELOC(Name, Number) \
+ case Number: \
+ return #Name;
+#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"
+#undef ELF_RELOC
+ }
+ return "Unrecognized ELF/x86-64 relocation type";
+}
namespace llvm {
namespace jitlink {
@@ -228,10 +114,13 @@ namespace jitlink {
// generic
class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder<object::ELF64LE> {
private:
+ using ELFT = object::ELF64LE;
static Expected<ELF_x86_64_Edges::ELFX86RelocationKind>
getRelocationKind(const uint32_t Type) {
switch (Type) {
+ case ELF::R_X86_64_32S:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer32Signed;
case ELF::R_X86_64_PC32:
return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32;
case ELF::R_X86_64_PC64:
@@ -240,9 +129,11 @@ private:
case ELF::R_X86_64_64:
return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer64;
case ELF::R_X86_64_GOTPCREL:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad;
case ELF::R_X86_64_GOTPCRELX:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoadRelaxable;
case ELF::R_X86_64_REX_GOTPCRELX:
- return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad;
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32REXGOTLoadRelaxable;
case ELF::R_X86_64_GOTPCREL64:
return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel64GOT;
case ELF::R_X86_64_GOT64:
@@ -251,109 +142,121 @@ private:
return ELF_x86_64_Edges::ELFX86RelocationKind::GOTOFF64;
case ELF::R_X86_64_PLT32:
return ELF_x86_64_Edges::ELFX86RelocationKind::Branch32;
+ case ELF::R_X86_64_TLSGD:
+ return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32TLV;
}
- return make_error<JITLinkError>("Unsupported x86-64 relocation:" +
- formatv("{0:d}", Type));
+ return make_error<JITLinkError>("Unsupported x86-64 relocation type " +
+ formatv("{0:d}: ", Type) +
+ getELFX86_64RelocName(Type));
}
Error addRelocations() override {
- LLVM_DEBUG(dbgs() << "Adding relocations\n");
- // TODO a partern is forming of iterate some sections but only give me
- // ones I am interested, i should abstract that concept some where
- for (auto &SecRef : Sections) {
- if (SecRef.sh_type != ELF::SHT_RELA && SecRef.sh_type != ELF::SHT_REL)
- continue;
- // TODO can the elf obj file do this for me?
- if (SecRef.sh_type == ELF::SHT_REL)
- return make_error<llvm::StringError>("Shouldn't have REL in x64",
- llvm::inconvertibleErrorCode());
-
- auto RelSectName = Obj.getSectionName(SecRef);
- if (!RelSectName)
- return RelSectName.takeError();
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
+ using Base = ELFLinkGraphBuilder<ELFT>;
+ using Self = ELFLinkGraphBuilder_x86_64;
+ for (const auto &RelSect : Base::Sections) {
+ // Validate the section to read relocation entries from.
+ if (RelSect.sh_type == ELF::SHT_REL)
+ return make_error<StringError>(
+ "No SHT_REL in valid x64 ELF object files",
+ inconvertibleErrorCode());
+
+ if (Error Err = Base::forEachRelocation(RelSect, this,
+ &Self::addSingleRelocation))
+ return Err;
+ }
- LLVM_DEBUG({
- dbgs() << "Adding relocations from section " << *RelSectName << "\n";
- });
+ return Error::success();
+ }
- auto UpdateSection = Obj.getSection(SecRef.sh_info);
- if (!UpdateSection)
- return UpdateSection.takeError();
-
- auto UpdateSectionName = Obj.getSectionName(**UpdateSection);
- if (!UpdateSectionName)
- return UpdateSectionName.takeError();
-
- // Don't process relocations for debug sections.
- if (isDwarfSection(*UpdateSectionName)) {
- LLVM_DEBUG({
- dbgs() << " Target is dwarf section " << *UpdateSectionName
- << ". Skipping.\n";
- });
- continue;
- } else
- LLVM_DEBUG({
- dbgs() << " For target section " << *UpdateSectionName << "\n";
- });
-
- auto JITSection = G->findSectionByName(*UpdateSectionName);
- if (!JITSection)
- return make_error<llvm::StringError>(
- "Refencing a a section that wasn't added to graph" +
- *UpdateSectionName,
- llvm::inconvertibleErrorCode());
-
- auto Relocations = Obj.relas(SecRef);
- if (!Relocations)
- return Relocations.takeError();
-
- for (const auto &Rela : *Relocations) {
- auto Type = Rela.getType(false);
-
- LLVM_DEBUG({
- dbgs() << "Relocation Type: " << Type << "\n"
- << "Name: " << Obj.getRelocationTypeName(Type) << "\n";
- });
- auto SymbolIndex = Rela.getSymbol(false);
- auto Symbol = Obj.getRelocationSymbol(Rela, SymTabSec);
- if (!Symbol)
- return Symbol.takeError();
-
- auto BlockToFix = *(JITSection->blocks().begin());
- auto *TargetSymbol = getGraphSymbol(SymbolIndex);
-
- if (!TargetSymbol) {
- return make_error<llvm::StringError>(
- "Could not find symbol at given index, did you add it to "
- "JITSymbolTable? index: " +
- std::to_string(SymbolIndex) +
- ", shndx: " + std::to_string((*Symbol)->st_shndx) +
- " Size of table: " + std::to_string(GraphSymbols.size()),
- llvm::inconvertibleErrorCode());
- }
- uint64_t Addend = Rela.r_addend;
- JITTargetAddress FixupAddress =
- (*UpdateSection)->sh_addr + Rela.r_offset;
-
- LLVM_DEBUG({
- dbgs() << "Processing relocation at "
- << format("0x%016" PRIx64, FixupAddress) << "\n";
- });
- auto Kind = getRelocationKind(Type);
- if (!Kind)
- return Kind.takeError();
-
- LLVM_DEBUG({
- Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
- Addend);
- printEdge(dbgs(), *BlockToFix, GE,
- getELFX86RelocationKindName(*Kind));
- dbgs() << "\n";
- });
- BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
- *TargetSymbol, Addend);
- }
+ Error addSingleRelocation(const typename ELFT::Rela &Rel,
+ const typename ELFT::Shdr &FixupSection,
+ Section &GraphSection) {
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ // Validate the relocation kind.
+ auto ELFRelocKind = getRelocationKind(Rel.getType(false));
+ if (!ELFRelocKind)
+ return ELFRelocKind.takeError();
+
+ int64_t Addend = Rel.r_addend;
+ Edge::Kind Kind = Edge::Invalid;
+ switch (*ELFRelocKind) {
+ case PCRel32:
+ Kind = x86_64::Delta32;
+ break;
+ case Delta64:
+ Kind = x86_64::Delta64;
+ break;
+ case Pointer32Signed:
+ Kind = x86_64::Pointer32Signed;
+ break;
+ case Pointer64:
+ Kind = x86_64::Pointer64;
+ break;
+ case PCRel32GOTLoad: {
+ Kind = x86_64::RequestGOTAndTransformToDelta32;
+ break;
+ }
+ case PCRel32REXGOTLoadRelaxable: {
+ Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable;
+ Addend = 0;
+ break;
+ }
+ case PCRel32TLV: {
+ Kind = x86_64::RequestTLSDescInGOTAndTransformToDelta32;
+ break;
+ }
+ case PCRel32GOTLoadRelaxable: {
+ Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable;
+ Addend = 0;
+ break;
+ }
+ case PCRel64GOT: {
+ Kind = x86_64::RequestGOTAndTransformToDelta64;
+ break;
+ }
+ case GOT64: {
+ Kind = x86_64::RequestGOTAndTransformToDelta64FromGOT;
+ break;
+ }
+ case GOTOFF64: {
+ Kind = x86_64::Delta64FromGOT;
+ break;
+ }
+ case Branch32: {
+ Kind = x86_64::BranchPCRel32;
+ Addend = 0;
+ break;
}
+ }
+
+ Block *BlockToFix = *(GraphSection.blocks().begin());
+ JITTargetAddress FixupAddress = FixupSection.sh_addr + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix->getAddress();
+ Edge GE(Kind, Offset, *GraphSymbol, Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), *BlockToFix, GE, getELFX86RelocationKindName(Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix->addEdge(std::move(GE));
return Error::success();
}
@@ -361,7 +264,7 @@ public:
ELFLinkGraphBuilder_x86_64(StringRef FileName,
const object::ELFFile<object::ELF64LE> &Obj)
: ELFLinkGraphBuilder(Obj, Triple("x86_64-unknown-linux"), FileName,
- getELFX86RelocationKindName) {}
+ x86_64::getEdgeKindName) {}
};
class ELFJITLinker_x86_64 : public JITLinker<ELFJITLinker_x86_64> {
@@ -384,7 +287,8 @@ private:
createDefineExternalSectionStartAndEndSymbolsPass(
[&](LinkGraph &LG, Symbol &Sym) -> SectionRangeSymbolDesc {
if (Sym.getName() == ELFGOTSymbolName)
- if (auto *GOTSection = G.findSectionByName(ELFGOTSectionName)) {
+ if (auto *GOTSection = G.findSectionByName(
+ x86_64::GOTTableManager::getSectionName())) {
GOTSymbol = &Sym;
return {*GOTSection, true};
}
@@ -403,7 +307,8 @@ private:
// Otherwise look for a GOT section: If it already has a start symbol we'll
// record it, otherwise we'll create our own.
// If there's a GOT section but we didn't find an external GOT symbol...
- if (auto *GOTSection = G.findSectionByName(ELFGOTSectionName)) {
+ if (auto *GOTSection =
+ G.findSectionByName(x86_64::GOTTableManager::getSectionName())) {
// Check for an existing defined symbol.
for (auto *Sym : GOTSection->symbols())
@@ -427,81 +332,7 @@ private:
}
Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
- using namespace ELF_x86_64_Edges;
- using namespace llvm::support;
-
- char *BlockWorkingMem = B.getAlreadyMutableContent().data();
- char *FixupPtr = BlockWorkingMem + E.getOffset();
- JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
- switch (E.getKind()) {
- case ELFX86RelocationKind::Branch32:
- case ELFX86RelocationKind::Branch32ToStub:
- case ELFX86RelocationKind::PCRel32:
- case ELFX86RelocationKind::PCRel32GOTLoad: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- if (LLVM_LIKELY(x86_64::isInRangeForImmS32(Value)))
- *(little32_t *)FixupPtr = Value;
- else
- return makeTargetOutOfRangeError(G, B, E);
- break;
- }
- case ELFX86RelocationKind::PCRel64: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- *(little64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::Pointer64: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend();
- *(ulittle64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::Delta32: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- if (LLVM_LIKELY(x86_64::isInRangeForImmS32(Value)))
- *(little32_t *)FixupPtr = Value;
- else
- return makeTargetOutOfRangeError(G, B, E);
- break;
- }
- case ELFX86RelocationKind::Delta64: {
- int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
- *(little64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::NegDelta32: {
- int64_t Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
- if (LLVM_LIKELY(x86_64::isInRangeForImmS32(Value)))
- *(little32_t *)FixupPtr = Value;
- else
- return makeTargetOutOfRangeError(G, B, E);
- break;
- }
- case ELFX86RelocationKind::NegDelta64: {
- int64_t Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
- *(little64_t *)FixupPtr = Value;
- break;
- }
- case ELFX86RelocationKind::GOT64:
- case ELFX86RelocationKind::GOTOFF64: {
- // GOT64: Offset of GOT entry within GOT.
- // GOTOFF64: Offset from GOT base to target.
- // The expressions are the same in both cases, but in the GOT64 case the
- // edge will have been fixed to point at the GOT entry, and in the
- // GOTOFF64 case it will still point at the original target.
- assert(GOTSymbol && "No GOT section symbol");
- int64_t Value =
- E.getTarget().getAddress() - GOTSymbol->getAddress() + E.getAddend();
- *(little64_t *)FixupPtr = Value;
- break;
- }
- default:
- LLVM_DEBUG({
- dbgs() << "Bad edge: " << getELFX86RelocationKindName(E.getKind())
- << "\n";
- });
- llvm_unreachable("Unsupported relocation");
- }
- return Error::success();
+ return x86_64::applyFixup(G, B, E, GOTSymbol);
}
};
@@ -547,8 +378,9 @@ void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
Config.PrePrunePasses.push_back(EHFrameSplitter(".eh_frame"));
- Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
- ".eh_frame", G->getPointerSize(), Delta64, Delta32, NegDelta32));
+ Config.PrePrunePasses.push_back(
+ EHFrameEdgeFixer(".eh_frame", x86_64::PointerSize, x86_64::Delta64,
+ x86_64::Delta32, x86_64::NegDelta32));
Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame"));
// Construct a JITLinker and run the link function.
@@ -558,9 +390,8 @@ void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
- // Add an in-place GOT/Stubs pass.
- Config.PostPrunePasses.push_back(
- PerGraphGOTAndPLTStubsBuilder_ELF_x86_64::asPass);
+ // Add an in-place GOT/Stubs/TLSInfoEntry build pass.
+ Config.PostPrunePasses.push_back(buildTables_ELF_x86_64);
// Resolve any external section start / end symbols.
Config.PostAllocationPasses.push_back(
@@ -568,7 +399,7 @@ void link_ELF_x86_64(std::unique_ptr<LinkGraph> G,
identifyELFSectionStartAndEndSymbols));
// Add GOT/Stubs optimizer pass.
- Config.PreFixupPasses.push_back(optimizeELF_x86_64_GOTAndStubs);
+ Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
@@ -580,44 +411,26 @@ const char *getELFX86RelocationKindName(Edge::Kind R) {
switch (R) {
case Branch32:
return "Branch32";
- case Branch32ToStub:
- return "Branch32ToStub";
- case Pointer32:
- return "Pointer32";
+ case Pointer32Signed:
+ return "Pointer32Signed";
case Pointer64:
return "Pointer64";
- case Pointer64Anon:
- return "Pointer64Anon";
case PCRel32:
return "PCRel32";
- case PCRel32Minus1:
- return "PCRel32Minus1";
- case PCRel32Minus2:
- return "PCRel32Minus2";
- case PCRel32Minus4:
- return "PCRel32Minus4";
- case PCRel32Anon:
- return "PCRel32Anon";
- case PCRel32Minus1Anon:
- return "PCRel32Minus1Anon";
- case PCRel32Minus2Anon:
- return "PCRel32Minus2Anon";
- case PCRel32Minus4Anon:
- return "PCRel32Minus4Anon";
case PCRel32GOTLoad:
return "PCRel32GOTLoad";
- case PCRel32GOT:
- return "PCRel32GOT";
- case PCRel32TLV:
- return "PCRel32TLV";
- case Delta32:
- return "Delta32";
+ case PCRel32GOTLoadRelaxable:
+ return "PCRel32GOTLoadRelaxable";
+ case PCRel32REXGOTLoadRelaxable:
+ return "PCRel32REXGOTLoad";
+ case PCRel64GOT:
+ return "PCRel64GOT";
case Delta64:
return "Delta64";
- case NegDelta32:
- return "NegDelta32";
- case NegDelta64:
- return "NegDelta64";
+ case GOT64:
+ return "GOT64";
+ case GOTOFF64:
+ return "GOTOFF64";
}
return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index a4976f2f3d27..51dcc1c35fad 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -1,9 +1,8 @@
//===------------- JITLink.cpp - Core Run-time JIT linker APIs ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -214,7 +213,12 @@ Block &LinkGraph::splitBlock(Block &B, size_t SplitIndex,
// Transfer all symbols with offset less than SplitIndex to NewBlock.
while (!BlockSymbols.empty() &&
BlockSymbols.back()->getOffset() < SplitIndex) {
- BlockSymbols.back()->setBlock(NewBlock);
+ auto *Sym = BlockSymbols.back();
+ // If the symbol extends beyond the split, update the size to be within
+ // the new block.
+ if (Sym->getOffset() + Sym->getSize() > SplitIndex)
+ Sym->setSize(SplitIndex - Sym->getOffset());
+ Sym->setBlock(NewBlock);
BlockSymbols.pop_back();
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 5b163ab6316d..706688aba4ec 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -48,12 +48,21 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
if (auto Err = runPasses(Passes.PostPrunePasses))
return Ctx->notifyFailed(std::move(Err));
- // Sort blocks into segments.
- auto Layout = layOutBlocks();
+ Ctx->getMemoryManager().allocate(
+ Ctx->getJITLinkDylib(), *G,
+ [S = std::move(Self)](AllocResult AR) mutable {
+ auto *TmpSelf = S.get();
+ TmpSelf->linkPhase2(std::move(S), std::move(AR));
+ });
+}
- // Allocate memory for segments.
- if (auto Err = allocateSegments(Layout))
- return Ctx->notifyFailed(std::move(Err));
+void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
+ AllocResult AR) {
+
+ if (AR)
+ Alloc = std::move(*AR);
+ else
+ return Ctx->notifyFailed(AR.takeError());
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName()
@@ -73,16 +82,16 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
auto ExternalSymbols = getExternalSymbolNames();
- // If there are no external symbols then proceed immediately with phase 2.
+ // If there are no external symbols then proceed immediately with phase 3.
if (ExternalSymbols.empty()) {
LLVM_DEBUG({
dbgs() << "No external symbols for " << G->getName()
- << ". Proceeding immediately with link phase 2.\n";
+ << ". Proceeding immediately with link phase 3.\n";
});
// FIXME: Once callee expressions are defined to be sequenced before
// argument expressions (c++17) we can simplify this. See below.
auto &TmpSelf = *Self;
- TmpSelf.linkPhase2(std::move(Self), AsyncLookupResult(), std::move(Layout));
+ TmpSelf.linkPhase3(std::move(Self), AsyncLookupResult());
return;
}
@@ -100,37 +109,31 @@ void JITLinkerBase::linkPhase1(std::unique_ptr<JITLinkerBase> Self) {
//
// Ctx->lookup(std::move(UnresolvedExternals),
// [Self=std::move(Self)](Expected<AsyncLookupResult> Result) {
- // Self->linkPhase2(std::move(Self), std::move(Result));
+ // Self->linkPhase3(std::move(Self), std::move(Result));
// });
- auto *TmpCtx = Ctx.get();
- TmpCtx->lookup(std::move(ExternalSymbols),
- createLookupContinuation(
- [S = std::move(Self), L = std::move(Layout)](
- Expected<AsyncLookupResult> LookupResult) mutable {
- auto &TmpSelf = *S;
- TmpSelf.linkPhase2(std::move(S), std::move(LookupResult),
- std::move(L));
- }));
+ Ctx->lookup(std::move(ExternalSymbols),
+ createLookupContinuation(
+ [S = std::move(Self)](
+ Expected<AsyncLookupResult> LookupResult) mutable {
+ auto &TmpSelf = *S;
+ TmpSelf.linkPhase3(std::move(S), std::move(LookupResult));
+ }));
}
-void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
- Expected<AsyncLookupResult> LR,
- SegmentLayoutMap Layout) {
+void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self,
+ Expected<AsyncLookupResult> LR) {
LLVM_DEBUG({
- dbgs() << "Starting link phase 2 for graph " << G->getName() << "\n";
+ dbgs() << "Starting link phase 3 for graph " << G->getName() << "\n";
});
// If the lookup failed, bail out.
if (!LR)
- return deallocateAndBailOut(LR.takeError());
+ return abandonAllocAndBailOut(std::move(Self), LR.takeError());
// Assign addresses to external addressables.
applyLookupResult(*LR);
- // Copy block content to working memory.
- copyBlockContentToWorkingMemory(Layout, *Alloc);
-
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName()
<< "\" before pre-fixup passes:\n";
@@ -138,7 +141,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
});
if (auto Err = runPasses(Passes.PreFixupPasses))
- return deallocateAndBailOut(std::move(Err));
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName() << "\" before copy-and-fixup:\n";
@@ -147,7 +150,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
// Fix up block content.
if (auto Err = fixUpBlocks(*G))
- return deallocateAndBailOut(std::move(Err));
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName() << "\" after copy-and-fixup:\n";
@@ -155,27 +158,25 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
});
if (auto Err = runPasses(Passes.PostFixupPasses))
- return deallocateAndBailOut(std::move(Err));
-
- // FIXME: Use move capture once we have c++14.
- auto *UnownedSelf = Self.release();
- auto Phase3Continuation = [UnownedSelf](Error Err) {
- std::unique_ptr<JITLinkerBase> Self(UnownedSelf);
- UnownedSelf->linkPhase3(std::move(Self), std::move(Err));
- };
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
- Alloc->finalizeAsync(std::move(Phase3Continuation));
+ Alloc->finalize([S = std::move(Self)](FinalizeResult FR) mutable {
+ auto *TmpSelf = S.get();
+ TmpSelf->linkPhase4(std::move(S), std::move(FR));
+ });
}
-void JITLinkerBase::linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err) {
+void JITLinkerBase::linkPhase4(std::unique_ptr<JITLinkerBase> Self,
+ FinalizeResult FR) {
LLVM_DEBUG({
- dbgs() << "Starting link phase 3 for graph " << G->getName() << "\n";
+ dbgs() << "Starting link phase 4 for graph " << G->getName() << "\n";
});
- if (Err)
- return deallocateAndBailOut(std::move(Err));
- Ctx->notifyFinalized(std::move(Alloc));
+ if (!FR)
+ return Ctx->notifyFailed(FR.takeError());
+
+ Ctx->notifyFinalized(std::move(*FR));
LLVM_DEBUG({ dbgs() << "Link of graph " << G->getName() << " complete\n"; });
}
@@ -187,131 +188,6 @@ Error JITLinkerBase::runPasses(LinkGraphPassList &Passes) {
return Error::success();
}
-JITLinkerBase::SegmentLayoutMap JITLinkerBase::layOutBlocks() {
-
- SegmentLayoutMap Layout;
-
- /// Partition blocks based on permissions and content vs. zero-fill.
- for (auto *B : G->blocks()) {
- auto &SegLists = Layout[B->getSection().getProtectionFlags()];
- if (!B->isZeroFill())
- SegLists.ContentBlocks.push_back(B);
- else
- SegLists.ZeroFillBlocks.push_back(B);
- }
-
- /// Sort blocks within each list.
- for (auto &KV : Layout) {
-
- auto CompareBlocks = [](const Block *LHS, const Block *RHS) {
- // Sort by section, address and size
- if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal())
- return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal();
- if (LHS->getAddress() != RHS->getAddress())
- return LHS->getAddress() < RHS->getAddress();
- return LHS->getSize() < RHS->getSize();
- };
-
- auto &SegLists = KV.second;
- llvm::sort(SegLists.ContentBlocks, CompareBlocks);
- llvm::sort(SegLists.ZeroFillBlocks, CompareBlocks);
- }
-
- LLVM_DEBUG({
- dbgs() << "Computed segment ordering:\n";
- for (auto &KV : Layout) {
- dbgs() << " Segment "
- << static_cast<sys::Memory::ProtectionFlags>(KV.first) << ":\n";
- auto &SL = KV.second;
- for (auto &SIEntry :
- {std::make_pair(&SL.ContentBlocks, "content block"),
- std::make_pair(&SL.ZeroFillBlocks, "zero-fill block")}) {
- dbgs() << " " << SIEntry.second << ":\n";
- for (auto *B : *SIEntry.first)
- dbgs() << " " << *B << "\n";
- }
- }
- });
-
- return Layout;
-}
-
-Error JITLinkerBase::allocateSegments(const SegmentLayoutMap &Layout) {
-
- // Compute segment sizes and allocate memory.
- LLVM_DEBUG(dbgs() << "JIT linker requesting: { ");
- JITLinkMemoryManager::SegmentsRequestMap Segments;
- for (auto &KV : Layout) {
- auto &Prot = KV.first;
- auto &SegLists = KV.second;
-
- uint64_t SegAlign = 1;
-
- // Calculate segment content size.
- size_t SegContentSize = 0;
- for (auto *B : SegLists.ContentBlocks) {
- SegAlign = std::max(SegAlign, B->getAlignment());
- SegContentSize = alignToBlock(SegContentSize, *B);
- SegContentSize += B->getSize();
- }
-
- uint64_t SegZeroFillStart = SegContentSize;
- uint64_t SegZeroFillEnd = SegZeroFillStart;
-
- for (auto *B : SegLists.ZeroFillBlocks) {
- SegAlign = std::max(SegAlign, B->getAlignment());
- SegZeroFillEnd = alignToBlock(SegZeroFillEnd, *B);
- SegZeroFillEnd += B->getSize();
- }
-
- Segments[Prot] = {SegAlign, SegContentSize,
- SegZeroFillEnd - SegZeroFillStart};
-
- LLVM_DEBUG({
- dbgs() << (&KV == &*Layout.begin() ? "" : "; ")
- << static_cast<sys::Memory::ProtectionFlags>(Prot)
- << ": alignment = " << SegAlign
- << ", content size = " << SegContentSize
- << ", zero-fill size = " << (SegZeroFillEnd - SegZeroFillStart);
- });
- }
- LLVM_DEBUG(dbgs() << " }\n");
-
- if (auto AllocOrErr =
- Ctx->getMemoryManager().allocate(Ctx->getJITLinkDylib(), Segments))
- Alloc = std::move(*AllocOrErr);
- else
- return AllocOrErr.takeError();
-
- LLVM_DEBUG({
- dbgs() << "JIT linker got memory (working -> target):\n";
- for (auto &KV : Layout) {
- auto Prot = static_cast<sys::Memory::ProtectionFlags>(KV.first);
- dbgs() << " " << Prot << ": "
- << (const void *)Alloc->getWorkingMemory(Prot).data() << " -> "
- << formatv("{0:x16}", Alloc->getTargetMemory(Prot)) << "\n";
- }
- });
-
- // Update block target addresses.
- for (auto &KV : Layout) {
- auto &Prot = KV.first;
- auto &SL = KV.second;
-
- JITTargetAddress NextBlockAddr =
- Alloc->getTargetMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
-
- for (auto *SIList : {&SL.ContentBlocks, &SL.ZeroFillBlocks})
- for (auto *B : *SIList) {
- NextBlockAddr = alignToBlock(NextBlockAddr, *B);
- B->setAddress(NextBlockAddr);
- NextBlockAddr += B->getSize();
- }
- }
-
- return Error::success();
-}
-
JITLinkContext::LookupMap JITLinkerBase::getExternalSymbolNames() const {
// Identify unresolved external symbols.
JITLinkContext::LookupMap UnresolvedExternals;
@@ -351,81 +227,13 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
});
}
-void JITLinkerBase::copyBlockContentToWorkingMemory(
- const SegmentLayoutMap &Layout, JITLinkMemoryManager::Allocation &Alloc) {
-
- LLVM_DEBUG(dbgs() << "Copying block content:\n");
- for (auto &KV : Layout) {
- auto &Prot = KV.first;
- auto &SegLayout = KV.second;
-
- auto SegMem =
- Alloc.getWorkingMemory(static_cast<sys::Memory::ProtectionFlags>(Prot));
- char *LastBlockEnd = SegMem.data();
- char *BlockDataPtr = LastBlockEnd;
-
- LLVM_DEBUG({
- dbgs() << " Processing segment "
- << static_cast<sys::Memory::ProtectionFlags>(Prot) << " [ "
- << (const void *)SegMem.data() << " .. "
- << (const void *)((char *)SegMem.data() + SegMem.size())
- << " ]\n Processing content sections:\n";
- });
-
- for (auto *B : SegLayout.ContentBlocks) {
- LLVM_DEBUG(dbgs() << " " << *B << ":\n");
-
- // Pad to alignment/alignment-offset.
- BlockDataPtr = alignToBlock(BlockDataPtr, *B);
-
- LLVM_DEBUG({
- dbgs() << " Bumped block pointer to " << (const void *)BlockDataPtr
- << " to meet block alignment " << B->getAlignment()
- << " and alignment offset " << B->getAlignmentOffset() << "\n";
- });
-
- // Zero pad up to alignment.
- LLVM_DEBUG({
- if (LastBlockEnd != BlockDataPtr)
- dbgs() << " Zero padding from " << (const void *)LastBlockEnd
- << " to " << (const void *)BlockDataPtr << "\n";
- });
-
- while (LastBlockEnd != BlockDataPtr)
- *LastBlockEnd++ = 0;
-
- // Copy initial block content.
- LLVM_DEBUG({
- dbgs() << " Copying block " << *B << " content, "
- << B->getContent().size() << " bytes, from "
- << (const void *)B->getContent().data() << " to "
- << (const void *)BlockDataPtr << "\n";
- });
- memcpy(BlockDataPtr, B->getContent().data(), B->getContent().size());
-
- // Point the block's content to the fixed up buffer.
- B->setMutableContent({BlockDataPtr, B->getContent().size()});
-
- // Update block end pointer.
- LastBlockEnd = BlockDataPtr + B->getContent().size();
- BlockDataPtr = LastBlockEnd;
- }
-
- // Zero pad the rest of the segment.
- LLVM_DEBUG({
- dbgs() << " Zero padding end of segment from "
- << (const void *)LastBlockEnd << " to "
- << (const void *)((char *)SegMem.data() + SegMem.size()) << "\n";
- });
- while (LastBlockEnd != SegMem.data() + SegMem.size())
- *LastBlockEnd++ = 0;
- }
-}
-
-void JITLinkerBase::deallocateAndBailOut(Error Err) {
+void JITLinkerBase::abandonAllocAndBailOut(std::unique_ptr<JITLinkerBase> Self,
+ Error Err) {
assert(Err && "Should not be bailing out on success value");
- assert(Alloc && "can not call deallocateAndBailOut before allocation");
- Ctx->notifyFailed(joinErrors(std::move(Err), Alloc->deallocate()));
+ assert(Alloc && "can not call abandonAllocAndBailOut before allocation");
+ Alloc->abandon([S = std::move(Self), E1 = std::move(Err)](Error E2) mutable {
+ S->Ctx->notifyFailed(joinErrors(std::move(E1), std::move(E2)));
+ });
}
void prune(LinkGraph &G) {
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index 6b815fe4fb31..e4fdda0783a4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -42,14 +42,9 @@ public:
virtual ~JITLinkerBase();
protected:
- struct SegmentLayout {
- using BlocksList = std::vector<Block *>;
-
- BlocksList ContentBlocks;
- BlocksList ZeroFillBlocks;
- };
-
- using SegmentLayoutMap = DenseMap<unsigned, SegmentLayout>;
+ using InFlightAlloc = JITLinkMemoryManager::InFlightAlloc;
+ using AllocResult = Expected<std::unique_ptr<InFlightAlloc>>;
+ using FinalizeResult = Expected<JITLinkMemoryManager::FinalizedAlloc>;
// Returns the PassConfiguration for this instance. This can be used by
// JITLinkerBase implementations to add late passes that reference their
@@ -61,39 +56,27 @@ protected:
// 1.1: Run pre-prune passes
// 1.2: Prune graph
// 1.3: Run post-prune passes
- // 1.4: Sort blocks into segments
- // 1.5: Allocate segment memory, update node vmaddrs to target vmaddrs
- // 1.6: Run post-allocation passes
- // 1.7: Notify context of final assigned symbol addresses
- // 1.8: Identify external symbols and make an async call to resolve
+ // 1.4: Allocate memory.
void linkPhase1(std::unique_ptr<JITLinkerBase> Self);
// Phase 2:
- // 2.1: Apply resolution results
- // 2.2: Run pre-fixup passes
- // 2.3: Fix up block contents
- // 2.4: Run post-fixup passes
- // 2.5: Make an async call to transfer and finalize memory.
- void linkPhase2(std::unique_ptr<JITLinkerBase> Self,
- Expected<AsyncLookupResult> LookupResult,
- SegmentLayoutMap Layout);
+ // 2.2: Run post-allocation passes
+ // 2.3: Notify context of final assigned symbol addresses
+ // 2.4: Identify external symbols and make an async call to resolve
+ void linkPhase2(std::unique_ptr<JITLinkerBase> Self, AllocResult AR);
// Phase 3:
- // 3.1: Call OnFinalized callback, handing off allocation.
- void linkPhase3(std::unique_ptr<JITLinkerBase> Self, Error Err);
-
- // Align a JITTargetAddress to conform with block alignment requirements.
- static JITTargetAddress alignToBlock(JITTargetAddress Addr, Block &B) {
- uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment();
- return Addr + Delta;
- }
-
- // Align a pointer to conform with block alignment requirements.
- static char *alignToBlock(char *P, Block &B) {
- uint64_t PAddr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(P));
- uint64_t Delta = (B.getAlignmentOffset() - PAddr) % B.getAlignment();
- return P + Delta;
- }
+ // 3.1: Apply resolution results
+ // 3.2: Run pre-fixup passes
+ // 3.3: Fix up block contents
+ // 3.4: Run post-fixup passes
+ // 3.5: Make an async call to transfer and finalize memory.
+ void linkPhase3(std::unique_ptr<JITLinkerBase> Self,
+ Expected<AsyncLookupResult> LookupResult);
+
+ // Phase 4:
+ // 4.1: Call OnFinalized callback, handing off allocation.
+ void linkPhase4(std::unique_ptr<JITLinkerBase> Self, FinalizeResult FR);
private:
// Run all passes in the given pass list, bailing out immediately if any pass
@@ -104,18 +87,14 @@ private:
// Implemented in JITLinker.
virtual Error fixUpBlocks(LinkGraph &G) const = 0;
- SegmentLayoutMap layOutBlocks();
- Error allocateSegments(const SegmentLayoutMap &Layout);
JITLinkContext::LookupMap getExternalSymbolNames() const;
void applyLookupResult(AsyncLookupResult LR);
- void copyBlockContentToWorkingMemory(const SegmentLayoutMap &Layout,
- JITLinkMemoryManager::Allocation &Alloc);
- void deallocateAndBailOut(Error Err);
+ void abandonAllocAndBailOut(std::unique_ptr<JITLinkerBase> Self, Error Err);
std::unique_ptr<JITLinkContext> Ctx;
std::unique_ptr<LinkGraph> G;
PassConfiguration Passes;
- std::unique_ptr<JITLinkMemoryManager::Allocation> Alloc;
+ std::unique_ptr<InFlightAlloc> Alloc;
};
template <typename LinkerImpl> class JITLinker : public JITLinkerBase {
@@ -152,6 +131,8 @@ private:
// Copy Block data and apply fixups.
LLVM_DEBUG(dbgs() << " Applying fixups.\n");
+ assert((!B->isZeroFill() || B->edges_size() == 0) &&
+ "Edges in zero-fill block?");
for (auto &E : B->edges()) {
// Skip non-relocation edges.
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index 36067ccf2753..831b9b26d2fd 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -1,135 +1,528 @@
//===--- JITLinkMemoryManager.cpp - JITLinkMemoryManager implementation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Process.h"
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm;
+
+namespace {
+
+// FIXME: Remove this copy of CWrapperFunctionResult as soon as JITLink can
+// depend on shared utils from Orc.
+
+// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
+union CWrapperFunctionResultDataUnion {
+ char *ValuePtr;
+ char Value[sizeof(ValuePtr)];
+};
+
+// Must be kept in-sync with compiler-rt/lib/orc/c-api.h.
+typedef struct {
+ CWrapperFunctionResultDataUnion Data;
+ size_t Size;
+} CWrapperFunctionResult;
+
+Error toError(CWrapperFunctionResult R) {
+ bool HasError = false;
+ std::string ErrMsg;
+ if (R.Size) {
+ bool Large = R.Size > sizeof(CWrapperFunctionResultDataUnion);
+ char *Content = Large ? R.Data.ValuePtr : R.Data.Value;
+ if (Content[0]) {
+ HasError = true;
+ constexpr unsigned StrStart = 1 + sizeof(uint64_t);
+ ErrMsg.resize(R.Size - StrStart);
+ memcpy(&ErrMsg[0], Content + StrStart, R.Size - StrStart);
+ }
+ if (Large)
+ free(R.Data.ValuePtr);
+ } else if (R.Data.ValuePtr) {
+ HasError = true;
+ ErrMsg = R.Data.ValuePtr;
+ free(R.Data.ValuePtr);
+ }
+
+ if (HasError)
+ return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+ return Error::success();
+}
+} // namespace
+
namespace llvm {
namespace jitlink {
JITLinkMemoryManager::~JITLinkMemoryManager() = default;
-JITLinkMemoryManager::Allocation::~Allocation() = default;
-
-Expected<std::unique_ptr<JITLinkMemoryManager::Allocation>>
-InProcessMemoryManager::allocate(const JITLinkDylib *JD,
- const SegmentsRequestMap &Request) {
-
- using AllocationMap = DenseMap<unsigned, sys::MemoryBlock>;
-
- // Local class for allocation.
- class IPMMAlloc : public Allocation {
- public:
- IPMMAlloc(AllocationMap SegBlocks) : SegBlocks(std::move(SegBlocks)) {}
- MutableArrayRef<char> getWorkingMemory(ProtectionFlags Seg) override {
- assert(SegBlocks.count(Seg) && "No allocation for segment");
- return {static_cast<char *>(SegBlocks[Seg].base()),
- SegBlocks[Seg].allocatedSize()};
+JITLinkMemoryManager::InFlightAlloc::~InFlightAlloc() = default;
+
+static Error runAllocAction(JITLinkMemoryManager::AllocActionCall &C) {
+ using WrapperFnTy = CWrapperFunctionResult (*)(const void *, size_t);
+ auto *Fn = jitTargetAddressToPointer<WrapperFnTy>(C.FnAddr);
+
+ return toError(Fn(jitTargetAddressToPointer<const void *>(C.CtxAddr),
+ static_cast<size_t>(C.CtxSize)));
+}
+
+BasicLayout::BasicLayout(LinkGraph &G) : G(G) {
+
+ for (auto &Sec : G.sections()) {
+ // Skip empty sections.
+ if (empty(Sec.blocks()))
+ continue;
+
+ auto &Seg = Segments[{Sec.getMemProt(), Sec.getMemDeallocPolicy()}];
+ for (auto *B : Sec.blocks())
+ if (LLVM_LIKELY(!B->isZeroFill()))
+ Seg.ContentBlocks.push_back(B);
+ else
+ Seg.ZeroFillBlocks.push_back(B);
+ }
+
+ // Build Segments map.
+ auto CompareBlocks = [](const Block *LHS, const Block *RHS) {
+ // Sort by section, address and size
+ if (LHS->getSection().getOrdinal() != RHS->getSection().getOrdinal())
+ return LHS->getSection().getOrdinal() < RHS->getSection().getOrdinal();
+ if (LHS->getAddress() != RHS->getAddress())
+ return LHS->getAddress() < RHS->getAddress();
+ return LHS->getSize() < RHS->getSize();
+ };
+
+ LLVM_DEBUG(dbgs() << "Generated BasicLayout for " << G.getName() << ":\n");
+ for (auto &KV : Segments) {
+ auto &Seg = KV.second;
+
+ llvm::sort(Seg.ContentBlocks, CompareBlocks);
+ llvm::sort(Seg.ZeroFillBlocks, CompareBlocks);
+
+ for (auto *B : Seg.ContentBlocks) {
+ Seg.ContentSize = alignToBlock(Seg.ContentSize, *B);
+ Seg.ContentSize += B->getSize();
+ Seg.Alignment = std::max(Seg.Alignment, Align(B->getAlignment()));
}
- JITTargetAddress getTargetMemory(ProtectionFlags Seg) override {
- assert(SegBlocks.count(Seg) && "No allocation for segment");
- return pointerToJITTargetAddress(SegBlocks[Seg].base());
+
+ uint64_t SegEndOffset = Seg.ContentSize;
+ for (auto *B : Seg.ZeroFillBlocks) {
+ SegEndOffset = alignToBlock(SegEndOffset, *B);
+ SegEndOffset += B->getSize();
+ Seg.Alignment = std::max(Seg.Alignment, Align(B->getAlignment()));
}
- void finalizeAsync(FinalizeContinuation OnFinalize) override {
- OnFinalize(applyProtections());
+ Seg.ZeroFillSize = SegEndOffset - Seg.ContentSize;
+
+ LLVM_DEBUG({
+ dbgs() << " Seg " << KV.first
+ << ": content-size=" << formatv("{0:x}", Seg.ContentSize)
+ << ", zero-fill-size=" << formatv("{0:x}", Seg.ZeroFillSize)
+ << ", align=" << formatv("{0:x}", Seg.Alignment.value()) << "\n";
+ });
+ }
+}
+
+Expected<BasicLayout::ContiguousPageBasedLayoutSizes>
+BasicLayout::getContiguousPageBasedLayoutSizes(uint64_t PageSize) {
+ ContiguousPageBasedLayoutSizes SegsSizes;
+
+ for (auto &KV : segments()) {
+ auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ if (Seg.Alignment > PageSize)
+ return make_error<StringError>("Segment alignment greater than page size",
+ inconvertibleErrorCode());
+
+ uint64_t SegSize = alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize);
+ if (AG.getMemDeallocPolicy() == MemDeallocPolicy::Standard)
+ SegsSizes.StandardSegs += SegSize;
+ else
+ SegsSizes.FinalizeSegs += SegSize;
+ }
+
+ return SegsSizes;
+}
+
+Error BasicLayout::apply() {
+ for (auto &KV : Segments) {
+ auto &Seg = KV.second;
+
+ assert(!(Seg.ContentBlocks.empty() && Seg.ZeroFillBlocks.empty()) &&
+ "Empty section recorded?");
+
+ for (auto *B : Seg.ContentBlocks) {
+ // Align addr and working-mem-offset.
+ Seg.Addr = alignToBlock(Seg.Addr, *B);
+ Seg.NextWorkingMemOffset = alignToBlock(Seg.NextWorkingMemOffset, *B);
+
+ // Update block addr.
+ B->setAddress(Seg.Addr);
+ Seg.Addr += B->getSize();
+
+ // Copy content to working memory, then update content to point at working
+ // memory.
+ memcpy(Seg.WorkingMem + Seg.NextWorkingMemOffset, B->getContent().data(),
+ B->getSize());
+ B->setMutableContent(
+ {Seg.WorkingMem + Seg.NextWorkingMemOffset, B->getSize()});
+ Seg.NextWorkingMemOffset += B->getSize();
}
- Error deallocate() override {
- if (SegBlocks.empty())
- return Error::success();
- void *SlabStart = SegBlocks.begin()->second.base();
- char *SlabEnd = (char *)SlabStart;
- for (auto &KV : SegBlocks) {
- SlabStart = std::min(SlabStart, KV.second.base());
- SlabEnd = std::max(SlabEnd, (char *)(KV.second.base()) +
- KV.second.allocatedSize());
- }
- size_t SlabSize = SlabEnd - (char *)SlabStart;
- assert((SlabSize % sys::Process::getPageSizeEstimate()) == 0 &&
- "Slab size is not a multiple of page size");
- sys::MemoryBlock Slab(SlabStart, SlabSize);
- if (auto EC = sys::Memory::releaseMappedMemory(Slab))
+
+ for (auto *B : Seg.ZeroFillBlocks) {
+ // Align addr.
+ Seg.Addr = alignToBlock(Seg.Addr, *B);
+ // Update block addr.
+ B->setAddress(Seg.Addr);
+ Seg.Addr += B->getSize();
+ }
+
+ Seg.ContentBlocks.clear();
+ Seg.ZeroFillBlocks.clear();
+ }
+
+ return Error::success();
+}
+
+JITLinkMemoryManager::AllocActions &BasicLayout::graphAllocActions() {
+ return G.allocActions();
+}
+
+void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD, SegmentMap Segments,
+ OnCreatedFunction OnCreated) {
+
+ static_assert(AllocGroup::NumGroups == 16,
+ "AllocGroup has changed. Section names below must be updated");
+ StringRef AGSectionNames[] = {
+ "__---.standard", "__R--.standard", "__-W-.standard", "__RW-.standard",
+ "__--X.standard", "__R-X.standard", "__-WX.standard", "__RWX.standard",
+ "__---.finalize", "__R--.finalize", "__-W-.finalize", "__RW-.finalize",
+ "__--X.finalize", "__R-X.finalize", "__-WX.finalize", "__RWX.finalize"};
+
+ auto G =
+ std::make_unique<LinkGraph>("", Triple(), 0, support::native, nullptr);
+ AllocGroupSmallMap<Block *> ContentBlocks;
+
+ JITTargetAddress NextAddr = 0x100000;
+ for (auto &KV : Segments) {
+ auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ auto AGSectionName =
+ AGSectionNames[static_cast<unsigned>(AG.getMemProt()) |
+ static_cast<bool>(AG.getMemDeallocPolicy()) << 3];
+
+ auto &Sec = G->createSection(AGSectionName, AG.getMemProt());
+ Sec.setMemDeallocPolicy(AG.getMemDeallocPolicy());
+
+ if (Seg.ContentSize != 0) {
+ NextAddr = alignTo(NextAddr, Seg.ContentAlign);
+ auto &B =
+ G->createMutableContentBlock(Sec, G->allocateBuffer(Seg.ContentSize),
+ NextAddr, Seg.ContentAlign.value(), 0);
+ ContentBlocks[AG] = &B;
+ NextAddr += Seg.ContentSize;
+ }
+ }
+
+ // GRef declared separately since order-of-argument-eval isn't specified.
+ auto &GRef = *G;
+ MemMgr.allocate(JD, GRef,
+ [G = std::move(G), ContentBlocks = std::move(ContentBlocks),
+ OnCreated = std::move(OnCreated)](
+ JITLinkMemoryManager::AllocResult Alloc) mutable {
+ if (!Alloc)
+ OnCreated(Alloc.takeError());
+ else
+ OnCreated(SimpleSegmentAlloc(std::move(G),
+ std::move(ContentBlocks),
+ std::move(*Alloc)));
+ });
+}
+
+Expected<SimpleSegmentAlloc>
+SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ SegmentMap Segments) {
+ std::promise<MSVCPExpected<SimpleSegmentAlloc>> AllocP;
+ auto AllocF = AllocP.get_future();
+ Create(MemMgr, JD, std::move(Segments),
+ [&](Expected<SimpleSegmentAlloc> Result) {
+ AllocP.set_value(std::move(Result));
+ });
+ return AllocF.get();
+}
+
+SimpleSegmentAlloc::SimpleSegmentAlloc(SimpleSegmentAlloc &&) = default;
+SimpleSegmentAlloc &
+SimpleSegmentAlloc::operator=(SimpleSegmentAlloc &&) = default;
+SimpleSegmentAlloc::~SimpleSegmentAlloc() {}
+
+SimpleSegmentAlloc::SegmentInfo SimpleSegmentAlloc::getSegInfo(AllocGroup AG) {
+ auto I = ContentBlocks.find(AG);
+ if (I != ContentBlocks.end()) {
+ auto &B = *I->second;
+ return {B.getAddress(), B.getAlreadyMutableContent()};
+ }
+ return {};
+}
+
+SimpleSegmentAlloc::SimpleSegmentAlloc(
+ std::unique_ptr<LinkGraph> G, AllocGroupSmallMap<Block *> ContentBlocks,
+ std::unique_ptr<JITLinkMemoryManager::InFlightAlloc> Alloc)
+ : G(std::move(G)), ContentBlocks(std::move(ContentBlocks)),
+ Alloc(std::move(Alloc)) {}
+
+class InProcessMemoryManager::IPInFlightAlloc
+ : public JITLinkMemoryManager::InFlightAlloc {
+public:
+ IPInFlightAlloc(InProcessMemoryManager &MemMgr, LinkGraph &G, BasicLayout BL,
+ sys::MemoryBlock StandardSegments,
+ sys::MemoryBlock FinalizationSegments)
+ : MemMgr(MemMgr), G(G), BL(std::move(BL)),
+ StandardSegments(std::move(StandardSegments)),
+ FinalizationSegments(std::move(FinalizationSegments)) {}
+
+ void finalize(OnFinalizedFunction OnFinalized) override {
+
+ // Apply memory protections to all segments.
+ if (auto Err = applyProtections()) {
+ OnFinalized(std::move(Err));
+ return;
+ }
+
+ // Run finalization actions.
+ // FIXME: Roll back previous successful actions on failure.
+ std::vector<AllocActionCall> DeallocActions;
+ DeallocActions.reserve(G.allocActions().size());
+ for (auto &ActPair : G.allocActions()) {
+ if (ActPair.Finalize.FnAddr)
+ if (auto Err = runAllocAction(ActPair.Finalize)) {
+ OnFinalized(std::move(Err));
+ return;
+ }
+ if (ActPair.Dealloc.FnAddr)
+ DeallocActions.push_back(ActPair.Dealloc);
+ }
+ G.allocActions().clear();
+
+ // Release the finalize segments slab.
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments)) {
+ OnFinalized(errorCodeToError(EC));
+ return;
+ }
+
+ // Continue with finalized allocation.
+ OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
+ std::move(DeallocActions)));
+ }
+
+ void abandon(OnAbandonedFunction OnAbandoned) override {
+ Error Err = Error::success();
+ if (auto EC = sys::Memory::releaseMappedMemory(FinalizationSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+ OnAbandoned(std::move(Err));
+ }
+
+private:
+ Error applyProtections() {
+ for (auto &KV : BL.segments()) {
+ const auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ auto Prot = toSysMemoryProtectionFlags(AG.getMemProt());
+
+ uint64_t SegSize =
+ alignTo(Seg.ContentSize + Seg.ZeroFillSize, MemMgr.PageSize);
+ sys::MemoryBlock MB(Seg.WorkingMem, SegSize);
+ if (auto EC = sys::Memory::protectMappedMemory(MB, Prot))
return errorCodeToError(EC);
- return Error::success();
+ if (Prot & sys::Memory::MF_EXEC)
+ sys::Memory::InvalidateInstructionCache(MB.base(), MB.allocatedSize());
}
+ return Error::success();
+ }
+
+ InProcessMemoryManager &MemMgr;
+ LinkGraph &G;
+ BasicLayout BL;
+ sys::MemoryBlock StandardSegments;
+ sys::MemoryBlock FinalizationSegments;
+};
+
+Expected<std::unique_ptr<InProcessMemoryManager>>
+InProcessMemoryManager::Create() {
+ if (auto PageSize = sys::Process::getPageSize())
+ return std::make_unique<InProcessMemoryManager>(*PageSize);
+ else
+ return PageSize.takeError();
+}
+
+void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G,
+ OnAllocatedFunction OnAllocated) {
+
+ // FIXME: Just check this once on startup.
+ if (!isPowerOf2_64((uint64_t)PageSize)) {
+ OnAllocated(make_error<StringError>("Page size is not a power of 2",
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ BasicLayout BL(G);
+
+ /// Scan the request and calculate the group and total sizes.
+ /// Check that segment size is no larger than a page.
+ auto SegsSizes = BL.getContiguousPageBasedLayoutSizes(PageSize);
+ if (!SegsSizes) {
+ OnAllocated(SegsSizes.takeError());
+ return;
+ }
+
+ /// Check that the total size requested (including zero fill) is not larger
+ /// than a size_t.
+ if (SegsSizes->total() > std::numeric_limits<size_t>::max()) {
+ OnAllocated(make_error<JITLinkError>(
+ "Total requested size " + formatv("{0:x}", SegsSizes->total()) +
+ " for graph " + G.getName() + " exceeds address space"));
+ return;
+ }
+
+ // Allocate one slab for the whole thing (to make sure everything is
+ // in-range), then partition into standard and finalization blocks.
+ //
+ // FIXME: Make two separate allocations in the future to reduce
+ // fragmentation: finalization segments will usually be a single page, and
+ // standard segments are likely to be more than one page. Where multiple
+ // allocations are in-flight at once (likely) the current approach will leave
+ // a lot of single-page holes.
+ sys::MemoryBlock Slab;
+ sys::MemoryBlock StandardSegsMem;
+ sys::MemoryBlock FinalizeSegsMem;
+ {
+ const sys::Memory::ProtectionFlags ReadWrite =
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE);
+
+ std::error_code EC;
+ Slab = sys::Memory::allocateMappedMemory(SegsSizes->total(), nullptr,
+ ReadWrite, EC);
- private:
- Error applyProtections() {
- for (auto &KV : SegBlocks) {
- auto &Prot = KV.first;
- auto &Block = KV.second;
- if (auto EC = sys::Memory::protectMappedMemory(Block, Prot))
- return errorCodeToError(EC);
- if (Prot & sys::Memory::MF_EXEC)
- sys::Memory::InvalidateInstructionCache(Block.base(),
- Block.allocatedSize());
- }
- return Error::success();
+ if (EC) {
+ OnAllocated(errorCodeToError(EC));
+ return;
}
- AllocationMap SegBlocks;
- };
+ // Zero-fill the whole slab up-front.
+ memset(Slab.base(), 0, Slab.allocatedSize());
+
+ StandardSegsMem = {Slab.base(),
+ static_cast<size_t>(SegsSizes->StandardSegs)};
+ FinalizeSegsMem = {(void *)((char *)Slab.base() + SegsSizes->StandardSegs),
+ static_cast<size_t>(SegsSizes->FinalizeSegs)};
+ }
- if (!isPowerOf2_64((uint64_t)sys::Process::getPageSizeEstimate()))
- return make_error<StringError>("Page size is not a power of 2",
- inconvertibleErrorCode());
+ auto NextStandardSegAddr = pointerToJITTargetAddress(StandardSegsMem.base());
+ auto NextFinalizeSegAddr = pointerToJITTargetAddress(FinalizeSegsMem.base());
- AllocationMap Blocks;
- const sys::Memory::ProtectionFlags ReadWrite =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
+ LLVM_DEBUG({
+ dbgs() << "InProcessMemoryManager allocated:\n";
+ if (SegsSizes->StandardSegs)
+ dbgs() << formatv(" [ {0:x16} -- {1:x16} ]", NextStandardSegAddr,
+ NextStandardSegAddr + StandardSegsMem.allocatedSize())
+ << " to stardard segs\n";
+ else
+ dbgs() << " no standard segs\n";
+ if (SegsSizes->FinalizeSegs)
+ dbgs() << formatv(" [ {0:x16} -- {1:x16} ]", NextFinalizeSegAddr,
+ NextFinalizeSegAddr + FinalizeSegsMem.allocatedSize())
+ << " to finalize segs\n";
+ else
+ dbgs() << " no finalize segs\n";
+ });
- // Compute the total number of pages to allocate.
- size_t TotalSize = 0;
- for (auto &KV : Request) {
- const auto &Seg = KV.second;
+ // Build ProtMap, assign addresses.
+ for (auto &KV : BL.segments()) {
+ auto &AG = KV.first;
+ auto &Seg = KV.second;
- if (Seg.getAlignment() > sys::Process::getPageSizeEstimate())
- return make_error<StringError>("Cannot request higher than page "
- "alignment",
- inconvertibleErrorCode());
+ auto &SegAddr = (AG.getMemDeallocPolicy() == MemDeallocPolicy::Standard)
+ ? NextStandardSegAddr
+ : NextFinalizeSegAddr;
- TotalSize = alignTo(TotalSize, sys::Process::getPageSizeEstimate());
- TotalSize += Seg.getContentSize();
- TotalSize += Seg.getZeroFillSize();
+ Seg.WorkingMem = jitTargetAddressToPointer<char *>(SegAddr);
+ Seg.Addr = SegAddr;
+
+ SegAddr += alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize);
}
- // Allocate one slab to cover all the segments.
- std::error_code EC;
- auto SlabRemaining =
- sys::Memory::allocateMappedMemory(TotalSize, nullptr, ReadWrite, EC);
+ if (auto Err = BL.apply()) {
+ OnAllocated(std::move(Err));
+ return;
+ }
- if (EC)
- return errorCodeToError(EC);
+ OnAllocated(std::make_unique<IPInFlightAlloc>(*this, G, std::move(BL),
+ std::move(StandardSegsMem),
+ std::move(FinalizeSegsMem)));
+}
- // Allocate segment memory from the slab.
- for (auto &KV : Request) {
+void InProcessMemoryManager::deallocate(std::vector<FinalizedAlloc> Allocs,
+ OnDeallocatedFunction OnDeallocated) {
+ std::vector<sys::MemoryBlock> StandardSegmentsList;
+ std::vector<std::vector<AllocActionCall>> DeallocActionsList;
- const auto &Seg = KV.second;
+ {
+ std::lock_guard<std::mutex> Lock(FinalizedAllocsMutex);
+ for (auto &Alloc : Allocs) {
+ auto *FA =
+ jitTargetAddressToPointer<FinalizedAllocInfo *>(Alloc.release());
+ StandardSegmentsList.push_back(std::move(FA->StandardSegments));
+ if (!FA->DeallocActions.empty())
+ DeallocActionsList.push_back(std::move(FA->DeallocActions));
+ FA->~FinalizedAllocInfo();
+ FinalizedAllocInfos.Deallocate(FA);
+ }
+ }
+
+ Error DeallocErr = Error::success();
- uint64_t SegmentSize = alignTo(Seg.getContentSize() + Seg.getZeroFillSize(),
- sys::Process::getPageSizeEstimate());
- assert(SlabRemaining.allocatedSize() >= SegmentSize &&
- "Mapping exceeds allocation");
+ while (!DeallocActionsList.empty()) {
+ auto &DeallocActions = DeallocActionsList.back();
+ auto &StandardSegments = StandardSegmentsList.back();
- sys::MemoryBlock SegMem(SlabRemaining.base(), SegmentSize);
- SlabRemaining = sys::MemoryBlock((char *)SlabRemaining.base() + SegmentSize,
- SlabRemaining.allocatedSize() - SegmentSize);
+ /// Run any deallocate calls.
+ while (!DeallocActions.empty()) {
+ if (auto Err = runAllocAction(DeallocActions.back()))
+ DeallocErr = joinErrors(std::move(DeallocErr), std::move(Err));
+ DeallocActions.pop_back();
+ }
- // Zero out the zero-fill memory.
- memset(static_cast<char *>(SegMem.base()) + Seg.getContentSize(), 0,
- Seg.getZeroFillSize());
+ /// Release the standard segments slab.
+ if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
+ DeallocErr = joinErrors(std::move(DeallocErr), errorCodeToError(EC));
- // Record the block for this segment.
- Blocks[KV.first] = std::move(SegMem);
+ DeallocActionsList.pop_back();
+ StandardSegmentsList.pop_back();
}
- return std::unique_ptr<InProcessMemoryManager::Allocation>(
- new IPMMAlloc(std::move(Blocks)));
+ OnDeallocated(std::move(DeallocErr));
+}
+
+JITLinkMemoryManager::FinalizedAlloc
+InProcessMemoryManager::createFinalizedAlloc(
+ sys::MemoryBlock StandardSegments,
+ std::vector<AllocActionCall> DeallocActions) {
+ std::lock_guard<std::mutex> Lock(FinalizedAllocsMutex);
+ auto *FA = FinalizedAllocInfos.Allocate<FinalizedAllocInfo>();
+ new (FA) FinalizedAllocInfo(
+ {std::move(StandardSegments), std::move(DeallocActions)});
+ return FinalizedAlloc(pointerToJITTargetAddress(FA));
}
} // end namespace jitlink
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
index eda2b8811deb..e49480c78662 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -1,9 +1,8 @@
//===-------------- MachO.cpp - JIT linker function for MachO -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index 03a8b98dff18..d588b63d9e88 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -23,7 +23,7 @@ MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
- // Sanity check: we only operate on relocatable objects.
+ // We only operate on relocatable objects.
if (!Obj.isRelocatableObject())
return make_error<JITLinkError>("Object is not a relocatable MachO");
@@ -107,11 +107,9 @@ MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
}
Section &MachOLinkGraphBuilder::getCommonSection() {
- if (!CommonSection) {
- auto Prot = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_WRITE);
- CommonSection = &G->createSection(CommonSectionName, Prot);
- }
+ if (!CommonSection)
+ CommonSection =
+ &G->createSection(CommonSectionName, MemProt::Read | MemProt::Write);
return *CommonSection;
}
@@ -176,25 +174,16 @@ Error MachOLinkGraphBuilder::createNormalizedSections() {
// Get prot flags.
// FIXME: Make sure this test is correct (it's probably missing cases
// as-is).
- sys::Memory::ProtectionFlags Prot;
+ MemProt Prot;
if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
+ Prot = MemProt::Read | MemProt::Exec;
else
- Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
-
- if (!isDebugSection(NSec)) {
- auto FullyQualifiedName =
- G->allocateString(StringRef(NSec.SegName) + "," + NSec.SectName);
- NSec.GraphSection = &G->createSection(
- StringRef(FullyQualifiedName.data(), FullyQualifiedName.size()),
- Prot);
- } else
- LLVM_DEBUG({
- dbgs() << " " << NSec.SegName << "," << NSec.SectName
- << " is a debug section: No graph section will be created.\n";
- });
+ Prot = MemProt::Read | MemProt::Write;
+
+ auto FullyQualifiedName =
+ G->allocateString(StringRef(NSec.SegName) + "," + NSec.SectName);
+ NSec.GraphSection = &G->createSection(
+ StringRef(FullyQualifiedName.data(), FullyQualifiedName.size()), Prot);
IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
}
@@ -292,15 +281,16 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() {
dbgs() << "\n";
});
- // If this symbol has a section, sanity check that the addresses line up.
+ // If this symbol has a section, verify that the addresses line up.
if (Sect != 0) {
auto NSec = findSectionByIndex(Sect - 1);
if (!NSec)
return NSec.takeError();
if (Value < NSec->Address || Value > NSec->Address + NSec->Size)
- return make_error<JITLinkError>("Symbol address does not fall within "
- "section");
+ return make_error<JITLinkError>("Address " + formatv("{0:x}", Value) +
+ " for symbol " + *Name +
+ " does not fall within section");
if (!NSec->GraphSection) {
LLVM_DEBUG({
@@ -321,16 +311,19 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() {
}
void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
- Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size,
- uint32_t Alignment, bool IsLive) {
+ unsigned SecIndex, Section &GraphSec, uint64_t Address, const char *Data,
+ uint64_t Size, uint32_t Alignment, bool IsLive) {
Block &B =
Data ? G->createContentBlock(GraphSec, ArrayRef<char>(Data, Size),
Address, Alignment, 0)
: G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
- assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) &&
+ auto SecI = IndexToSection.find(SecIndex);
+ assert(SecI != IndexToSection.end() && "SecIndex invalid");
+ auto &NSec = SecI->second;
+ assert(!NSec.CanonicalSymbols.count(Sym.getAddress()) &&
"Anonymous block start symbol clashes with existing symbol address");
- AddrToCanonicalSymbol[Sym.getAddress()] = &Sym;
+ NSec.CanonicalSymbols[Sym.getAddress()] = &Sym;
}
Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
@@ -444,8 +437,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
<< formatv("{0:x16}", NSec.Address) << " -- "
<< formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
});
- addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
- NSec.Size, NSec.Alignment,
+ addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address,
+ NSec.Data, NSec.Size, NSec.Alignment,
SectionIsNoDeadStrip);
} else
LLVM_DEBUG({
@@ -483,8 +476,8 @@ Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
<< formatv("{0:x16}", NSec.Address) << " -- "
<< formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n";
});
- addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
- AnonBlockSize, NSec.Alignment,
+ addSectionStartSymAndBlock(SecIndex, *NSec.GraphSection, NSec.Address,
+ NSec.Data, AnonBlockSize, NSec.Alignment,
SectionIsNoDeadStrip);
}
@@ -583,7 +576,7 @@ Symbol &MachOLinkGraphBuilder::createStandardGraphSymbol(NormalizedSymbol &NSym,
NSym.GraphSymbol = &Sym;
if (IsCanonical)
- setCanonicalSymbol(Sym);
+ setCanonicalSymbol(getSectionByIndex(NSym.Sect - 1), Sym);
return Sym;
}
@@ -610,7 +603,6 @@ Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
Error MachOLinkGraphBuilder::graphifyCStringSection(
NormalizedSection &NSec, std::vector<NormalizedSymbol *> NSyms) {
-
assert(NSec.GraphSection && "C string literal section missing graph section");
assert(NSec.Data && "C string literal section has no data");
@@ -664,7 +656,7 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
// If there's no symbol at the start of this block then create one.
if (NSyms.empty() || NSyms.back()->Value != B.getAddress()) {
auto &S = G->addAnonymousSymbol(B, 0, BlockSize, false, false);
- setCanonicalSymbol(S);
+ setCanonicalSymbol(NSec, S);
LLVM_DEBUG({
dbgs() << " Adding anonymous symbol for c-string block "
<< formatv("{0:x16} -- {1:x16}", S.getAddress(),
@@ -700,5 +692,119 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
return Error::success();
}
+Error CompactUnwindSplitter::operator()(LinkGraph &G) {
+ auto *CUSec = G.findSectionByName(CompactUnwindSectionName);
+ if (!CUSec)
+ return Error::success();
+
+ if (!G.getTargetTriple().isOSBinFormatMachO())
+ return make_error<JITLinkError>(
+ "Error linking " + G.getName() +
+ ": compact unwind splitting not supported on non-macho target " +
+ G.getTargetTriple().str());
+
+ unsigned CURecordSize = 0;
+ unsigned PersonalityEdgeOffset = 0;
+ unsigned LSDAEdgeOffset = 0;
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ case Triple::x86_64:
+ // 64-bit compact-unwind record format:
+ // Range start: 8 bytes.
+ // Range size: 4 bytes.
+ // CU encoding: 4 bytes.
+ // Personality: 8 bytes.
+ // LSDA: 8 bytes.
+ CURecordSize = 32;
+ PersonalityEdgeOffset = 16;
+ LSDAEdgeOffset = 24;
+ break;
+ default:
+ return make_error<JITLinkError>(
+ "Error linking " + G.getName() +
+ ": compact unwind splitting not supported on " +
+ G.getTargetTriple().getArchName());
+ }
+
+ std::vector<Block *> OriginalBlocks(CUSec->blocks().begin(),
+ CUSec->blocks().end());
+ LLVM_DEBUG({
+ dbgs() << "In " << G.getName() << " splitting compact unwind section "
+ << CompactUnwindSectionName << " containing "
+ << OriginalBlocks.size() << " initial blocks...\n";
+ });
+
+ while (!OriginalBlocks.empty()) {
+ auto *B = OriginalBlocks.back();
+ OriginalBlocks.pop_back();
+
+ if (B->getSize() == 0) {
+ LLVM_DEBUG({
+ dbgs() << " Skipping empty block at "
+ << formatv("{0:x16}", B->getAddress()) << "\n";
+ });
+ continue;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << " Splitting block at " << formatv("{0:x16}", B->getAddress())
+ << " into " << (B->getSize() / CURecordSize)
+ << " compact unwind record(s)\n";
+ });
+
+ if (B->getSize() % CURecordSize)
+ return make_error<JITLinkError>(
+ "Error splitting compact unwind record in " + G.getName() +
+ ": block at " + formatv("{0:x}", B->getAddress()) + " has size " +
+ formatv("{0:x}", B->getSize()) +
+ " (not a multiple of CU record size of " +
+ formatv("{0:x}", CURecordSize) + ")");
+
+ unsigned NumBlocks = B->getSize() / CURecordSize;
+ LinkGraph::SplitBlockCache C;
+
+ for (unsigned I = 0; I != NumBlocks; ++I) {
+ auto &CURec = G.splitBlock(*B, CURecordSize, &C);
+ bool AddedKeepAlive = false;
+
+ for (auto &E : CURec.edges()) {
+ if (E.getOffset() == 0) {
+ LLVM_DEBUG({
+ dbgs() << " Updating compact unwind record at "
+ << formatv("{0:x16}", CURec.getAddress()) << " to point to "
+ << (E.getTarget().hasName() ? E.getTarget().getName()
+ : StringRef())
+ << " (at " << formatv("{0:x16}", E.getTarget().getAddress())
+ << ")\n";
+ });
+
+ if (E.getTarget().isExternal())
+ return make_error<JITLinkError>(
+ "Error adding keep-alive edge for compact unwind record at " +
+ formatv("{0:x}", CURec.getAddress()) + ": target " +
+ E.getTarget().getName() + " is an external symbol");
+ auto &TgtBlock = E.getTarget().getBlock();
+ auto &CURecSym =
+ G.addAnonymousSymbol(CURec, 0, CURecordSize, 0, false);
+ TgtBlock.addEdge(Edge::KeepAlive, 0, CURecSym, 0);
+ AddedKeepAlive = true;
+ } else if (E.getOffset() != PersonalityEdgeOffset &&
+ E.getOffset() != LSDAEdgeOffset)
+ return make_error<JITLinkError>("Unexpected edge at offset " +
+ formatv("{0:x}", E.getOffset()) +
+ " in compact unwind record at " +
+ formatv("{0:x}", CURec.getAddress()));
+ }
+
+ if (!AddedKeepAlive)
+ return make_error<JITLinkError>(
+ "Error adding keep-alive edge for compact unwind record at " +
+ formatv("{0:x}", CURec.getAddress()) +
+ ": no outgoing target edge at offset 0");
+ }
+ }
+ return Error::success();
+}
+
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index 90b14c44ff8a..d29732ebdba8 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -77,6 +77,7 @@ protected:
uint32_t Flags = 0;
const char *Data = nullptr;
Section *GraphSection = nullptr;
+ std::map<JITTargetAddress, Symbol *> CanonicalSymbols;
};
using SectionParserFunction = std::function<Error(NormalizedSection &S)>;
@@ -125,30 +126,31 @@ protected:
/// given index is out of range, or if no symbol has been added for the given
/// index.
Expected<NormalizedSymbol &> findSymbolByIndex(uint64_t Index) {
- if (Index >= IndexToSymbol.size())
- return make_error<JITLinkError>("Symbol index out of range");
- auto *Sym = IndexToSymbol[Index];
- if (!Sym)
+ auto I = IndexToSymbol.find(Index);
+ if (I == IndexToSymbol.end())
return make_error<JITLinkError>("No symbol at index " +
formatv("{0:d}", Index));
- return *Sym;
+ assert(I->second && "Null symbol at index");
+ return *I->second;
}
/// Returns the symbol with the highest address not greater than the search
/// address, or null if no such symbol exists.
- Symbol *getSymbolByAddress(JITTargetAddress Address) {
- auto I = AddrToCanonicalSymbol.upper_bound(Address);
- if (I == AddrToCanonicalSymbol.begin())
+ Symbol *getSymbolByAddress(NormalizedSection &NSec,
+ JITTargetAddress Address) {
+ auto I = NSec.CanonicalSymbols.upper_bound(Address);
+ if (I == NSec.CanonicalSymbols.begin())
return nullptr;
return std::prev(I)->second;
}
/// Returns the symbol with the highest address not greater than the search
/// address, or an error if no such symbol exists.
- Expected<Symbol &> findSymbolByAddress(JITTargetAddress Address) {
- auto *Sym = getSymbolByAddress(Address);
+ Expected<Symbol &> findSymbolByAddress(NormalizedSection &NSec,
+ JITTargetAddress Address) {
+ auto *Sym = getSymbolByAddress(NSec, Address);
if (Sym)
- if (Address < Sym->getAddress() + Sym->getSize())
+ if (Address <= Sym->getAddress() + Sym->getSize())
return *Sym;
return make_error<JITLinkError>("No symbol covering address " +
formatv("{0:x16}", Address));
@@ -179,8 +181,8 @@ private:
static unsigned getPointerSize(const object::MachOObjectFile &Obj);
static support::endianness getEndianness(const object::MachOObjectFile &Obj);
- void setCanonicalSymbol(Symbol &Sym) {
- auto *&CanonicalSymEntry = AddrToCanonicalSymbol[Sym.getAddress()];
+ void setCanonicalSymbol(NormalizedSection &NSec, Symbol &Sym) {
+ auto *&CanonicalSymEntry = NSec.CanonicalSymbols[Sym.getAddress()];
// There should be no symbol at this address, or, if there is,
// it should be a zero-sized symbol from an empty section (which
// we can safely override).
@@ -190,9 +192,10 @@ private:
}
Section &getCommonSection();
- void addSectionStartSymAndBlock(Section &GraphSec, uint64_t Address,
- const char *Data, uint64_t Size,
- uint32_t Alignment, bool IsLive);
+ void addSectionStartSymAndBlock(unsigned SecIndex, Section &GraphSec,
+ uint64_t Address, const char *Data,
+ uint64_t Size, uint32_t Alignment,
+ bool IsLive);
Error createNormalizedSections();
Error createNormalizedSymbols();
@@ -227,10 +230,20 @@ private:
Section *CommonSection = nullptr;
DenseMap<uint32_t, NormalizedSymbol *> IndexToSymbol;
- std::map<JITTargetAddress, Symbol *> AddrToCanonicalSymbol;
StringMap<SectionParserFunction> CustomSectionParserFunctions;
};
+/// A pass to split up __LD,__compact_unwind sections.
+class CompactUnwindSplitter {
+public:
+ CompactUnwindSplitter(StringRef CompactUnwindSectionName)
+ : CompactUnwindSectionName(CompactUnwindSectionName) {}
+ Error operator()(LinkGraph &G);
+
+private:
+ StringRef CompactUnwindSectionName;
+};
+
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 169e20a1d1d3..f2a029d35cd5 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -81,6 +81,14 @@ private:
if (!RI.r_pcrel && !RI.r_extern && RI.r_length == 2)
return PairedAddend;
break;
+ case MachO::ARM64_RELOC_TLVP_LOAD_PAGE21:
+ if (RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return TLVPage21;
+ break;
+ case MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12:
+ if (!RI.r_pcrel && RI.r_extern && RI.r_length == 2)
+ return TLVPageOffset12;
+ break;
}
return make_error<JITLinkError>(
@@ -152,7 +160,7 @@ private:
auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
if (!ToSymbolSec)
return ToSymbolSec.takeError();
- ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+ ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address);
assert(ToSymbol && "No symbol for section");
FixupValue -= ToSymbol->getAddress();
}
@@ -197,14 +205,18 @@ private:
continue;
}
- // Skip relocations for debug symbols.
+ auto NSec =
+ findSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
+ if (!NSec)
+ return NSec.takeError();
+
+ // Skip relocations for MachO sections without corresponding graph
+ // sections.
{
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- if (!NSec.GraphSection) {
+ if (!NSec->GraphSection) {
LLVM_DEBUG({
dbgs() << " Skipping relocations for MachO section "
- << NSec.SegName << "/" << NSec.SectName
+ << NSec->SegName << "/" << NSec->SectName
<< " which has no associated graph section\n";
});
continue;
@@ -216,25 +228,22 @@ private:
MachO::relocation_info RI = getRelocationInfo(RelItr);
- // Sanity check the relocation kind.
+ // Validate the relocation kind.
auto Kind = getRelocationKind(RI);
if (!Kind)
return Kind.takeError();
// Find the address of the value to fix up.
JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
-
LLVM_DEBUG({
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- dbgs() << " " << NSec.SectName << " + "
+ dbgs() << " " << NSec->SectName << " + "
<< formatv("{0:x8}", RI.r_address) << ":\n";
});
// Find the block that the fixup points to.
Block *BlockToFix = nullptr;
{
- auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress);
+ auto SymbolToFixOrErr = findSymbolByAddress(*NSec, FixupAddress);
if (!SymbolToFixOrErr)
return SymbolToFixOrErr.takeError();
BlockToFix = &SymbolToFixOrErr->getBlock();
@@ -316,7 +325,11 @@ private:
break;
case Pointer64Anon: {
JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -324,6 +337,7 @@ private:
break;
}
case Page21:
+ case TLVPage21:
case GOTPage21: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
@@ -348,6 +362,7 @@ private:
"encoded addend");
break;
}
+ case TLVPageOffset12:
case GOTPageOffset12: {
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
TargetSymbol = TargetSymbolOrErr->GraphSymbol;
@@ -414,6 +429,7 @@ public:
bool isGOTEdgeToFix(Edge &E) const {
return E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 ||
+ E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12 ||
E.getKind() == PointerToGOT;
}
@@ -425,7 +441,8 @@ public:
}
void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
- if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12) {
+ if (E.getKind() == GOTPage21 || E.getKind() == GOTPageOffset12 ||
+ E.getKind() == TLVPage21 || E.getKind() == TLVPageOffset12) {
// Update the target, but leave the edge addend as-is.
E.setTarget(GOTEntry);
} else if (E.getKind() == PointerToGOT) {
@@ -457,16 +474,14 @@ public:
private:
Section &getGOTSection() {
if (!GOTSection)
- GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
+ GOTSection = &G.createSection("$__GOT", MemProt::Read);
return *GOTSection;
}
Section &getStubsSection() {
- if (!StubsSection) {
- auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_EXEC);
- StubsSection = &G.createSection("$__STUBS", StubsProt);
- }
+ if (!StubsSection)
+ StubsSection =
+ &G.createSection("$__STUBS", MemProt::Read | MemProt::Exec);
return *StubsSection;
}
@@ -567,6 +582,7 @@ private:
break;
}
case Page21:
+ case TLVPage21:
case GOTPage21: {
assert((E.getKind() != GOTPage21 || E.getAddend() == 0) &&
"GOTPAGE21 with non-zero addend");
@@ -603,6 +619,7 @@ private:
*(ulittle32_t *)FixupPtr = FixedInstr;
break;
}
+ case TLVPageOffset12:
case GOTPageOffset12: {
assert(E.getAddend() == 0 && "GOTPAGEOF12 with non-zero addend");
@@ -629,7 +646,8 @@ private:
if (Delta < -(1 << 20) || Delta > ((1 << 20) - 1))
return makeTargetOutOfRangeError(G, B, E);
- uint32_t EncodedImm = (static_cast<uint32_t>(Delta) >> 2) << 5;
+ uint32_t EncodedImm =
+ ((static_cast<uint32_t>(Delta) >> 2) & 0x7ffff) << 5;
uint32_t FixedInstr = RawInstr | EncodedImm;
*(ulittle32_t *)FixupPtr = FixedInstr;
break;
@@ -683,6 +701,10 @@ void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
+ // Add compact unwind splitter pass.
+ Config.PrePrunePasses.push_back(
+ CompactUnwindSplitter("__LD,__compact_unwind"));
+
// Add an in-place GOT/Stubs pass.
Config.PostPrunePasses.push_back(
PerGraphGOTAndPLTStubsBuilder_MachO_arm64::asPass);
@@ -711,6 +733,10 @@ const char *getMachOARM64RelocationKindName(Edge::Kind R) {
return "GOTPage21";
case GOTPageOffset12:
return "GOTPageOffset12";
+ case TLVPage21:
+ return "TLVPage21";
+ case TLVPageOffset12:
+ return "TLVPageOffset12";
case PointerToGOT:
return "PointerToGOT";
case PairedAddend:
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 61d5c5e21ff1..a4fcd3b9a5f5 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -170,7 +170,7 @@ private:
auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
if (!ToSymbolSec)
return ToSymbolSec.takeError();
- ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+ ToSymbol = getSymbolByAddress(*ToSymbolSec, ToSymbolSec->Address);
assert(ToSymbol && "No symbol for section");
FixupValue -= ToSymbol->getAddress();
}
@@ -216,14 +216,18 @@ private:
continue;
}
- // Skip relocations for debug symbols.
+ auto NSec =
+ findSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
+ if (!NSec)
+ return NSec.takeError();
+
+ // Skip relocations for MachO sections without corresponding graph
+ // sections.
{
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- if (!NSec.GraphSection) {
+ if (!NSec->GraphSection) {
LLVM_DEBUG({
dbgs() << " Skipping relocations for MachO section "
- << NSec.SegName << "/" << NSec.SectName
+ << NSec->SegName << "/" << NSec->SectName
<< " which has no associated graph section\n";
});
continue;
@@ -240,16 +244,14 @@ private:
JITTargetAddress FixupAddress = SectionAddress + (uint32_t)RI.r_address;
LLVM_DEBUG({
- auto &NSec =
- getSectionByIndex(Obj.getSectionIndex(S.getRawDataRefImpl()));
- dbgs() << " " << NSec.SectName << " + "
+ dbgs() << " " << NSec->SectName << " + "
<< formatv("{0:x8}", RI.r_address) << ":\n";
});
// Find the block that the fixup points to.
Block *BlockToFix = nullptr;
{
- auto SymbolToFixOrErr = findSymbolByAddress(FixupAddress);
+ auto SymbolToFixOrErr = findSymbolByAddress(*NSec, FixupAddress);
if (!SymbolToFixOrErr)
return SymbolToFixOrErr.takeError();
BlockToFix = &SymbolToFixOrErr->getBlock();
@@ -270,7 +272,7 @@ private:
Symbol *TargetSymbol = nullptr;
uint64_t Addend = 0;
- // Sanity check the relocation kind.
+ // Validate the relocation kind.
auto MachORelocKind = getRelocKind(RI);
if (!MachORelocKind)
return MachORelocKind.takeError();
@@ -300,7 +302,7 @@ private:
else
return TargetSymbolOrErr.takeError();
Addend = *(const little32_t *)FixupContent;
- Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable;
+ Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable;
if (FixupOffset < 3)
return make_error<JITLinkError>("GOTLD at invalid offset " +
formatv("{0}", FixupOffset));
@@ -319,7 +321,10 @@ private:
else
return TargetSymbolOrErr.takeError();
Addend = *(const little32_t *)FixupContent;
- Kind = x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable;
+ Kind = x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable;
+ if (FixupOffset < 3)
+ return make_error<JITLinkError>("TLV at invalid offset " +
+ formatv("{0}", FixupOffset));
break;
case MachOPointer32:
if (auto TargetSymbolOrErr = findSymbolByIndex(RI.r_symbolnum))
@@ -339,7 +344,11 @@ private:
break;
case MachOPointer64Anon: {
JITTargetAddress TargetAddress = *(const ulittle64_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -360,7 +369,11 @@ private:
case MachOPCRel32Anon: {
JITTargetAddress TargetAddress =
FixupAddress + 4 + *(const little32_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -376,7 +389,11 @@ private:
1ULL << (*MachORelocKind - MachOPCRel32Minus1Anon));
JITTargetAddress TargetAddress =
FixupAddress + Delta + *(const little32_t *)FixupContent;
- if (auto TargetSymbolOrErr = findSymbolByAddress(TargetAddress))
+ auto TargetNSec = findSectionByIndex(RI.r_symbolnum - 1);
+ if (!TargetNSec)
+ return TargetNSec.takeError();
+ if (auto TargetSymbolOrErr =
+ findSymbolByAddress(*TargetNSec, TargetAddress))
TargetSymbol = &*TargetSymbolOrErr;
else
return TargetSymbolOrErr.takeError();
@@ -417,157 +434,15 @@ private:
}
};
-class PerGraphGOTAndPLTStubsBuilder_MachO_x86_64
- : public PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_MachO_x86_64> {
-public:
-
- using PerGraphGOTAndPLTStubsBuilder<
- PerGraphGOTAndPLTStubsBuilder_MachO_x86_64>::
- PerGraphGOTAndPLTStubsBuilder;
-
- bool isGOTEdgeToFix(Edge &E) const {
- return E.getKind() == x86_64::RequestGOTAndTransformToDelta32 ||
- E.getKind() ==
- x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable;
- }
-
- Symbol &createGOTEntry(Symbol &Target) {
- return x86_64::createAnonymousPointer(G, getGOTSection(), &Target);
- }
-
- void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
- // Fix the edge kind.
- switch (E.getKind()) {
- case x86_64::RequestGOTAndTransformToDelta32:
- E.setKind(x86_64::Delta32);
- break;
- case x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable:
- E.setKind(x86_64::PCRel32GOTLoadRelaxable);
- break;
- default:
- llvm_unreachable("Not a GOT transform edge");
- }
- // Fix the target, leave the addend as-is.
- E.setTarget(GOTEntry);
- }
-
- bool isExternalBranchEdge(Edge &E) {
- return E.getKind() == x86_64::BranchPCRel32 && E.getTarget().isExternal();
- }
-
- Symbol &createPLTStub(Symbol &Target) {
- return x86_64::createAnonymousPointerJumpStub(G, getStubsSection(),
- getGOTEntry(Target));
- }
-
- void fixPLTEdge(Edge &E, Symbol &Stub) {
- assert(E.getKind() == x86_64::BranchPCRel32 && "Not a Branch32 edge?");
- assert(E.getAddend() == 0 &&
- "BranchPCRel32 edge has unexpected addend value");
-
- // Set the edge kind to BranchPCRel32ToPtrJumpStubRelaxable. We will use
- // this to check for stub optimization opportunities in the
- // optimizeMachO_x86_64_GOTAndStubs pass below.
- E.setKind(x86_64::BranchPCRel32ToPtrJumpStubRelaxable);
- E.setTarget(Stub);
- }
-
-private:
- Section &getGOTSection() {
- if (!GOTSection)
- GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
- return *GOTSection;
- }
-
- Section &getStubsSection() {
- if (!StubsSection) {
- auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
- sys::Memory::MF_READ | sys::Memory::MF_EXEC);
- StubsSection = &G.createSection("$__STUBS", StubsProt);
- }
- return *StubsSection;
- }
-
- Section *GOTSection = nullptr;
- Section *StubsSection = nullptr;
-};
-
-} // namespace
-
-static Error optimizeMachO_x86_64_GOTAndStubs(LinkGraph &G) {
- LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
-
- for (auto *B : G.blocks())
- for (auto &E : B->edges())
- if (E.getKind() == x86_64::PCRel32GOTLoadRelaxable) {
- assert(E.getOffset() >= 3 && "GOT edge occurs too early in block");
-
- // Optimize GOT references.
- auto &GOTBlock = E.getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT entry block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT entry should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- // Check that this is a recognized MOV instruction.
- // FIXME: Can we assume this?
- constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b};
- if (strncmp(B->getContent().data() + E.getOffset() - 3,
- reinterpret_cast<const char *>(MOVQRIPRel), 2) != 0)
- continue;
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- E.setTarget(GOTTarget);
- E.setKind(x86_64::Delta32);
- E.setAddend(E.getAddend() - 4);
- char *BlockData = B->getMutableContent(G).data();
- BlockData[E.getOffset() - 2] = (char)0x8d;
- LLVM_DEBUG({
- dbgs() << " Replaced GOT load wih LEA:\n ";
- printEdge(dbgs(), *B, E, x86_64::getEdgeKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- } else if (E.getKind() == x86_64::BranchPCRel32ToPtrJumpStubRelaxable) {
- auto &StubBlock = E.getTarget().getBlock();
- assert(StubBlock.getSize() == sizeof(x86_64::PointerJumpStubContent) &&
- "Stub block should be stub sized");
- assert(StubBlock.edges_size() == 1 &&
- "Stub block should only have one outgoing edge");
-
- auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
- assert(GOTBlock.getSize() == G.getPointerSize() &&
- "GOT block should be pointer sized");
- assert(GOTBlock.edges_size() == 1 &&
- "GOT block should only have one outgoing edge");
-
- auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
- JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
- JITTargetAddress TargetAddr = GOTTarget.getAddress();
-
- int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (Displacement >= std::numeric_limits<int32_t>::min() &&
- Displacement <= std::numeric_limits<int32_t>::max()) {
- E.setKind(x86_64::BranchPCRel32);
- E.setTarget(GOTTarget);
- LLVM_DEBUG({
- dbgs() << " Replaced stub branch with direct branch:\n ";
- printEdge(dbgs(), *B, E, x86_64::getEdgeKindName(E.getKind()));
- dbgs() << "\n";
- });
- }
- }
-
+Error buildGOTAndStubs_MachO_x86_64(LinkGraph &G) {
+ x86_64::GOTTableManager GOT;
+ x86_64::PLTTableManager PLT(GOT);
+ visitExistingEdges(G, GOT, PLT);
return Error::success();
}
+} // namespace
+
namespace llvm {
namespace jitlink {
@@ -582,7 +457,7 @@ public:
private:
Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
- return x86_64::applyFixup(G, B, E);
+ return x86_64::applyFixup(G, B, E, nullptr);
}
};
@@ -604,6 +479,10 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(createEHFrameSplitterPass_MachO_x86_64());
Config.PrePrunePasses.push_back(createEHFrameEdgeFixerPass_MachO_x86_64());
+ // Add compact unwind splitter pass.
+ Config.PrePrunePasses.push_back(
+ CompactUnwindSplitter("__LD,__compact_unwind"));
+
// Add a mark-live pass.
if (auto MarkLive = Ctx->getMarkLivePass(G->getTargetTriple()))
Config.PrePrunePasses.push_back(std::move(MarkLive));
@@ -611,11 +490,10 @@ void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(markAllSymbolsLive);
// Add an in-place GOT/Stubs pass.
- Config.PostPrunePasses.push_back(
- PerGraphGOTAndPLTStubsBuilder_MachO_x86_64::asPass);
+ Config.PostPrunePasses.push_back(buildGOTAndStubs_MachO_x86_64);
// Add GOT/Stubs optimizer pass.
- Config.PreFixupPasses.push_back(optimizeMachO_x86_64_GOTAndStubs);
+ Config.PreFixupPasses.push_back(x86_64::optimizeGOTAndStubAccesses);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
diff --git a/llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp b/llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp
new file mode 100644
index 000000000000..b73a310b2910
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp
@@ -0,0 +1,33 @@
+//===------------- MemoryFlags.cpp - Memory allocation flags --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/MemoryFlags.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+
+raw_ostream &operator<<(raw_ostream &OS, MemProt MP) {
+ return OS << (((MP & MemProt::Read) != MemProt::None) ? 'R' : '-')
+ << (((MP & MemProt::Write) != MemProt::None) ? 'W' : '-')
+ << (((MP & MemProt::Exec) != MemProt::None) ? 'X' : '-');
+}
+
+raw_ostream &operator<<(raw_ostream &OS, MemDeallocPolicy MDP) {
+ return OS << (MDP == MemDeallocPolicy::Standard ? "standard" : "finalize");
+}
+
+raw_ostream &operator<<(raw_ostream &OS, AllocGroup AG) {
+ return OS << '(' << AG.getMemProt() << ", " << AG.getMemDeallocPolicy()
+ << ')';
+}
+
+} // end namespace jitlink
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
new file mode 100644
index 000000000000..6dccc4811885
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
@@ -0,0 +1,30 @@
+//===---- aarch64.cpp - Generic JITLink aarch64 edge kinds, utilities -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing aarch64 objects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+namespace aarch64 {
+
+const char *getEdgeKindName(Edge::Kind K) {
+ switch (K) {
+ case R_AARCH64_CALL26:
+ return "R_AARCH64_CALL26";
+ }
+ return getGenericEdgeKindName(K);
+}
+} // namespace aarch64
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
index c951ed6d95be..48521280059d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
@@ -24,6 +24,8 @@ const char *getEdgeKindName(Edge::Kind K) {
return "Pointer64";
case Pointer32:
return "Pointer32";
+ case Pointer32Signed:
+ return "Pointer32Signed";
case Delta64:
return "Delta64";
case Delta32:
@@ -32,22 +34,32 @@ const char *getEdgeKindName(Edge::Kind K) {
return "NegDelta64";
case NegDelta32:
return "NegDelta32";
+ case Delta64FromGOT:
+ return "Delta64FromGOT";
case BranchPCRel32:
return "BranchPCRel32";
case BranchPCRel32ToPtrJumpStub:
return "BranchPCRel32ToPtrJumpStub";
- case BranchPCRel32ToPtrJumpStubRelaxable:
- return "BranchPCRel32ToPtrJumpStubRelaxable";
+ case BranchPCRel32ToPtrJumpStubBypassable:
+ return "BranchPCRel32ToPtrJumpStubBypassable";
case RequestGOTAndTransformToDelta32:
return "RequestGOTAndTransformToDelta32";
+ case RequestGOTAndTransformToDelta64:
+ return "RequestGOTAndTransformToDelta64";
+ case RequestGOTAndTransformToDelta64FromGOT:
+ return "RequestGOTAndTransformToDelta64FromGOT";
+ case PCRel32GOTLoadREXRelaxable:
+ return "PCRel32GOTLoadREXRelaxable";
+ case RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable:
+ return "RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable";
case PCRel32GOTLoadRelaxable:
return "PCRel32GOTLoadRelaxable";
case RequestGOTAndTransformToPCRel32GOTLoadRelaxable:
return "RequestGOTAndTransformToPCRel32GOTLoadRelaxable";
- case PCRel32TLVPLoadRelaxable:
- return "PCRel32TLVPLoadRelaxable";
- case RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable:
- return "RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable";
+ case PCRel32TLVPLoadREXRelaxable:
+ return "PCRel32TLVPLoadREXRelaxable";
+ case RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable:
+ return "RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable";
default:
return getGenericEdgeKindName(static_cast<Edge::Kind>(K));
}
@@ -59,6 +71,119 @@ const char NullPointerContent[PointerSize] = {0x00, 0x00, 0x00, 0x00,
const char PointerJumpStubContent[6] = {
static_cast<char>(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00};
+Error optimizeGOTAndStubAccesses(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
+
+ for (auto *B : G.blocks())
+ for (auto &E : B->edges()) {
+ if (E.getKind() == x86_64::PCRel32GOTLoadRelaxable ||
+ E.getKind() == x86_64::PCRel32GOTLoadREXRelaxable) {
+#ifndef NDEBUG
+ bool REXPrefix = E.getKind() == x86_64::PCRel32GOTLoadREXRelaxable;
+ assert(E.getOffset() >= (REXPrefix ? 3u : 2u) &&
+ "GOT edge occurs too early in block");
+#endif
+ auto *FixupData = reinterpret_cast<uint8_t *>(
+ const_cast<char *>(B->getContent().data())) +
+ E.getOffset();
+ const uint8_t Op = FixupData[-2];
+ const uint8_t ModRM = FixupData[-1];
+
+ auto &GOTEntryBlock = E.getTarget().getBlock();
+ assert(GOTEntryBlock.getSize() == G.getPointerSize() &&
+ "GOT entry block should be pointer sized");
+ assert(GOTEntryBlock.edges_size() == 1 &&
+ "GOT entry should only have one outgoing edge");
+ auto &GOTTarget = GOTEntryBlock.edges().begin()->getTarget();
+ JITTargetAddress TargetAddr = GOTTarget.getAddress();
+ JITTargetAddress EdgeAddr = B->getFixupAddress(E);
+ int64_t Displacement = TargetAddr - EdgeAddr + 4;
+ bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr);
+ bool DisplacementInRangeForImmS32 = isInRangeForImmS32(Displacement);
+
+ // If both of the Target and displacement is out of range, then
+ // there isn't optimization chance.
+ if (!(TargetInRangeForImmU32 || DisplacementInRangeForImmS32))
+ continue;
+
+ // Transform "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".
+ if (Op == 0x8b && DisplacementInRangeForImmS32) {
+ FixupData[-2] = 0x8d;
+ E.setKind(x86_64::Delta32);
+ E.setTarget(GOTTarget);
+ E.setAddend(E.getAddend() - 4);
+ LLVM_DEBUG({
+ dbgs() << " Replaced GOT load wih LEA:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ continue;
+ }
+
+ // Transform call/jmp instructions
+ if (Op == 0xff && TargetInRangeForImmU32) {
+ if (ModRM == 0x15) {
+ // ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call
+ // foo" But lld convert it to "addr32 call foo, because that makes
+ // result expression to be a single instruction.
+ FixupData[-2] = 0x67;
+ FixupData[-1] = 0xe8;
+ LLVM_DEBUG({
+ dbgs() << " replaced call instruction's memory operand wih imm "
+ "operand:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ } else {
+ // Transform "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop"
+ assert(ModRM == 0x25 && "Invalid ModRm for call/jmp instructions");
+ FixupData[-2] = 0xe9;
+ FixupData[3] = 0x90;
+ E.setOffset(E.getOffset() - 1);
+ LLVM_DEBUG({
+ dbgs() << " replaced jmp instruction's memory operand wih imm "
+ "operand:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ }
+ E.setKind(x86_64::Pointer32);
+ E.setTarget(GOTTarget);
+ continue;
+ }
+ } else if (E.getKind() == x86_64::BranchPCRel32ToPtrJumpStubBypassable) {
+ auto &StubBlock = E.getTarget().getBlock();
+ assert(StubBlock.getSize() == sizeof(PointerJumpStubContent) &&
+ "Stub block should be stub sized");
+ assert(StubBlock.edges_size() == 1 &&
+ "Stub block should only have one outgoing edge");
+
+ auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
+ assert(GOTBlock.getSize() == G.getPointerSize() &&
+ "GOT block should be pointer sized");
+ assert(GOTBlock.edges_size() == 1 &&
+ "GOT block should only have one outgoing edge");
+
+ auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+ JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+ JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+ int64_t Displacement = TargetAddr - EdgeAddr + 4;
+ if (isInRangeForImmS32(Displacement)) {
+ E.setKind(x86_64::BranchPCRel32);
+ E.setTarget(GOTTarget);
+ LLVM_DEBUG({
+ dbgs() << " Replaced stub branch with direct branch:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ }
+ }
+ }
+
+ return Error::success();
+}
+
} // end namespace x86_64
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 144329aa8bea..200f42aec067 100644
--- a/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -218,8 +218,7 @@ void MCJIT::generateCodeForModule(Module *M) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(LoadedObject.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
std::unique_ptr<RuntimeDyld::LoadedObjectInfo> L =
Dyld.loadObject(*LoadedObject.get());
diff --git a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
index 5b73c0e2fbc8..9ff6cec8c6c5 100644
--- a/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/CompileOnDemandLayer.cpp
@@ -184,6 +184,8 @@ void CompileOnDemandLayer::emit(
CompileOnDemandLayer::PerDylibResources &
CompileOnDemandLayer::getPerDylibResources(JITDylib &TargetD) {
+ std::lock_guard<std::mutex> Lock(CODLayerMutex);
+
auto I = DylibResources.find(&TargetD);
if (I == DylibResources.end()) {
auto &ImplD =
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 12a501f7f98c..64e5090e4c53 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -29,7 +29,6 @@ char SymbolsNotFound::ID = 0;
char SymbolsCouldNotBeRemoved::ID = 0;
char MissingSymbolDefinitions::ID = 0;
char UnexpectedSymbolDefinitions::ID = 0;
-char Task::ID = 0;
char MaterializationTask::ID = 0;
RegisterDependenciesFunction NoDependenciesToRegister =
@@ -90,14 +89,17 @@ void FailedToMaterialize::log(raw_ostream &OS) const {
OS << "Failed to materialize symbols: " << *Symbols;
}
-SymbolsNotFound::SymbolsNotFound(SymbolNameSet Symbols) {
+SymbolsNotFound::SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameSet Symbols)
+ : SSP(std::move(SSP)) {
for (auto &Sym : Symbols)
this->Symbols.push_back(Sym);
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
-SymbolsNotFound::SymbolsNotFound(SymbolNameVector Symbols)
- : Symbols(std::move(Symbols)) {
+SymbolsNotFound::SymbolsNotFound(std::shared_ptr<SymbolStringPool> SSP,
+ SymbolNameVector Symbols)
+ : SSP(std::move(SSP)), Symbols(std::move(Symbols)) {
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
@@ -109,8 +111,9 @@ void SymbolsNotFound::log(raw_ostream &OS) const {
OS << "Symbols not found: " << Symbols;
}
-SymbolsCouldNotBeRemoved::SymbolsCouldNotBeRemoved(SymbolNameSet Symbols)
- : Symbols(std::move(Symbols)) {
+SymbolsCouldNotBeRemoved::SymbolsCouldNotBeRemoved(
+ std::shared_ptr<SymbolStringPool> SSP, SymbolNameSet Symbols)
+ : SSP(std::move(SSP)), Symbols(std::move(Symbols)) {
assert(!this->Symbols.empty() && "Can not fail to resolve an empty set");
}
@@ -1333,11 +1336,13 @@ Error JITDylib::remove(const SymbolNameSet &Names) {
// If any of the symbols are not defined, return an error.
if (!Missing.empty())
- return make_error<SymbolsNotFound>(std::move(Missing));
+ return make_error<SymbolsNotFound>(ES.getSymbolStringPool(),
+ std::move(Missing));
// If any of the symbols are currently materializing, return an error.
if (!Materializing.empty())
- return make_error<SymbolsCouldNotBeRemoved>(std::move(Materializing));
+ return make_error<SymbolsCouldNotBeRemoved>(ES.getSymbolStringPool(),
+ std::move(Materializing));
// Remove the symbols.
for (auto &SymbolMaterializerItrPair : SymbolsToRemove) {
@@ -1793,8 +1798,6 @@ void Platform::lookupInitSymbolsAsync(
}
}
-void Task::anchor() {}
-
void MaterializationTask::printDescription(raw_ostream &OS) {
OS << "Materialization task: " << MU->getName() << " in "
<< MR->getTargetJITDylib().getName();
@@ -2086,8 +2089,8 @@ Error ExecutionSession::registerJITDispatchHandlers(
}
void ExecutionSession::runJITDispatchHandler(
- ExecutorProcessControl::SendResultFunction SendResult,
- JITTargetAddress HandlerFnTagAddr, ArrayRef<char> ArgBuffer) {
+ SendResultFunction SendResult, JITTargetAddress HandlerFnTagAddr,
+ ArrayRef<char> ArgBuffer) {
std::shared_ptr<JITDispatchHandlerFunction> F;
{
@@ -2234,7 +2237,8 @@ Error ExecutionSession::IL_updateCandidatesFor(
// weakly referenced" specific error here to reduce confusion.
if (SymI->second.getFlags().hasMaterializationSideEffectsOnly() &&
SymLookupFlags != SymbolLookupFlags::WeaklyReferencedSymbol)
- return make_error<SymbolsNotFound>(SymbolNameVector({Name}));
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ SymbolNameVector({Name}));
// If we matched against this symbol but it is in the error state
// then bail out and treat it as a failure to materialize.
@@ -2422,7 +2426,7 @@ void ExecutionSession::OL_applyQueryPhase1(
} else {
LLVM_DEBUG(dbgs() << "Phase 1 failed with unresolved symbols.\n");
IPLS->fail(make_error<SymbolsNotFound>(
- IPLS->DefGeneratorCandidates.getSymbolNames()));
+ getSymbolStringPool(), IPLS->DefGeneratorCandidates.getSymbolNames()));
}
}
@@ -2492,7 +2496,8 @@ void ExecutionSession::OL_completeLookup(
dbgs() << "error: "
"required, but symbol is has-side-effects-only\n";
});
- return make_error<SymbolsNotFound>(SymbolNameVector({Name}));
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ SymbolNameVector({Name}));
}
// If we matched against this symbol but it is in the error state
@@ -2594,7 +2599,7 @@ void ExecutionSession::OL_completeLookup(
}
}
- LLVM_DEBUG(dbgs() << "Stripping unmatched weakly-refererced symbols\n");
+ LLVM_DEBUG(dbgs() << "Stripping unmatched weakly-referenced symbols\n");
IPLS->LookupSet.forEachWithRemoval(
[&](const SymbolStringPtr &Name, SymbolLookupFlags SymLookupFlags) {
if (SymLookupFlags == SymbolLookupFlags::WeaklyReferencedSymbol) {
@@ -2606,7 +2611,8 @@ void ExecutionSession::OL_completeLookup(
if (!IPLS->LookupSet.empty()) {
LLVM_DEBUG(dbgs() << "Failing due to unresolved symbols\n");
- return make_error<SymbolsNotFound>(IPLS->LookupSet.getSymbolNames());
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ IPLS->LookupSet.getSymbolNames());
}
// Record whether the query completed.
@@ -2733,7 +2739,8 @@ void ExecutionSession::OL_completeLookupFlags(
if (!IPLS->LookupSet.empty()) {
LLVM_DEBUG(dbgs() << "Failing due to unresolved symbols\n");
- return make_error<SymbolsNotFound>(IPLS->LookupSet.getSymbolNames());
+ return make_error<SymbolsNotFound>(getSymbolStringPool(),
+ IPLS->LookupSet.getSymbolNames());
}
LLVM_DEBUG(dbgs() << "Succeded, result = " << Result << "\n");
@@ -2911,6 +2918,7 @@ void ExecutionSession::dumpDispatchInfo(Task &T) {
runSessionLocked([&]() {
dbgs() << "Dispatching: ";
T.printDescription(dbgs());
+ dbgs() << "\n";
});
}
#endif // NDEBUG
diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
index 36efc744bf30..fcfe389f82a8 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
@@ -1,10 +1,15 @@
-//===---- DebugObjectManagerPlugin.h - JITLink debug objects ---*- C++ -*-===//
+//===------- DebugObjectManagerPlugin.cpp - JITLink debug objects ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+//
+// FIXME: Update Plugin to poke the debug object into a new JITLink section,
+// rather than creating a new allocation.
+//
+//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
@@ -108,70 +113,77 @@ void ELFDebugObjectSection<ELFT>::dump(raw_ostream &OS, StringRef Name) {
}
}
-static constexpr sys::Memory::ProtectionFlags ReadOnly =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ);
-
enum class Requirement {
// Request final target memory load-addresses for all sections.
ReportFinalSectionLoadAddresses,
};
-/// The plugin creates a debug object from JITLinkContext when JITLink starts
-/// processing the corresponding LinkGraph. It provides access to the pass
-/// configuration of the LinkGraph and calls the finalization function, once
-/// the resulting link artifact was emitted.
+/// The plugin creates a debug object from when JITLink starts processing the
+/// corresponding LinkGraph. It provides access to the pass configuration of
+/// the LinkGraph and calls the finalization function, once the resulting link
+/// artifact was emitted.
///
class DebugObject {
public:
- DebugObject(JITLinkContext &Ctx, ExecutionSession &ES) : Ctx(Ctx), ES(ES) {}
+ DebugObject(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ ExecutionSession &ES)
+ : MemMgr(MemMgr), JD(JD), ES(ES) {}
void set(Requirement Req) { Reqs.insert(Req); }
bool has(Requirement Req) const { return Reqs.count(Req) > 0; }
- using FinalizeContinuation = std::function<void(Expected<sys::MemoryBlock>)>;
+ using FinalizeContinuation = std::function<void(Expected<ExecutorAddrRange>)>;
+
void finalizeAsync(FinalizeContinuation OnFinalize);
virtual ~DebugObject() {
- if (Alloc)
- if (Error Err = Alloc->deallocate())
+ if (Alloc) {
+ std::vector<FinalizedAlloc> Allocs;
+ Allocs.push_back(std::move(Alloc));
+ if (Error Err = MemMgr.deallocate(std::move(Allocs)))
ES.reportError(std::move(Err));
+ }
}
virtual void reportSectionTargetMemoryRange(StringRef Name,
SectionRange TargetMem) {}
protected:
- using Allocation = JITLinkMemoryManager::Allocation;
+ using InFlightAlloc = JITLinkMemoryManager::InFlightAlloc;
+ using FinalizedAlloc = JITLinkMemoryManager::FinalizedAlloc;
- virtual Expected<std::unique_ptr<Allocation>>
- finalizeWorkingMemory(JITLinkContext &Ctx) = 0;
+ virtual Expected<SimpleSegmentAlloc> finalizeWorkingMemory() = 0;
+
+ JITLinkMemoryManager &MemMgr;
+ const JITLinkDylib *JD = nullptr;
private:
- JITLinkContext &Ctx;
ExecutionSession &ES;
std::set<Requirement> Reqs;
- std::unique_ptr<Allocation> Alloc{nullptr};
+ FinalizedAlloc Alloc;
};
// Finalize working memory and take ownership of the resulting allocation. Start
// copying memory over to the target and pass on the result once we're done.
// Ownership of the allocation remains with us for the rest of our lifetime.
void DebugObject::finalizeAsync(FinalizeContinuation OnFinalize) {
- assert(Alloc == nullptr && "Cannot finalize more than once");
-
- auto AllocOrErr = finalizeWorkingMemory(Ctx);
- if (!AllocOrErr)
- OnFinalize(AllocOrErr.takeError());
- Alloc = std::move(*AllocOrErr);
-
- Alloc->finalizeAsync([this, OnFinalize](Error Err) {
- if (Err)
- OnFinalize(std::move(Err));
- else
- OnFinalize(sys::MemoryBlock(
- jitTargetAddressToPointer<void *>(Alloc->getTargetMemory(ReadOnly)),
- Alloc->getWorkingMemory(ReadOnly).size()));
- });
+ assert(!Alloc && "Cannot finalize more than once");
+
+ if (auto SimpleSegAlloc = finalizeWorkingMemory()) {
+ auto ROSeg = SimpleSegAlloc->getSegInfo(MemProt::Read);
+ ExecutorAddrRange DebugObjRange(ExecutorAddr(ROSeg.Addr),
+ ExecutorAddrDiff(ROSeg.WorkingMem.size()));
+ SimpleSegAlloc->finalize(
+ [this, DebugObjRange,
+ OnFinalize = std::move(OnFinalize)](Expected<FinalizedAlloc> FA) {
+ if (FA) {
+ Alloc = std::move(*FA);
+ OnFinalize(DebugObjRange);
+ } else
+ OnFinalize(FA.takeError());
+ });
+ } else
+ OnFinalize(SimpleSegAlloc.takeError());
}
/// The current implementation of ELFDebugObject replicates the approach used in
@@ -190,8 +202,7 @@ public:
StringRef getBuffer() const { return Buffer->getMemBufferRef().getBuffer(); }
protected:
- Expected<std::unique_ptr<Allocation>>
- finalizeWorkingMemory(JITLinkContext &Ctx) override;
+ Expected<SimpleSegmentAlloc> finalizeWorkingMemory() override;
template <typename ELFT>
Error recordSection(StringRef Name,
@@ -201,15 +212,16 @@ protected:
private:
template <typename ELFT>
static Expected<std::unique_ptr<ELFDebugObject>>
- CreateArchType(MemoryBufferRef Buffer, JITLinkContext &Ctx,
- ExecutionSession &ES);
+ CreateArchType(MemoryBufferRef Buffer, JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD, ExecutionSession &ES);
static std::unique_ptr<WritableMemoryBuffer>
CopyBuffer(MemoryBufferRef Buffer, Error &Err);
ELFDebugObject(std::unique_ptr<WritableMemoryBuffer> Buffer,
- JITLinkContext &Ctx, ExecutionSession &ES)
- : DebugObject(Ctx, ES), Buffer(std::move(Buffer)) {
+ JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
+ ExecutionSession &ES)
+ : DebugObject(MemMgr, JD, ES), Buffer(std::move(Buffer)) {
set(Requirement::ReportFinalSectionLoadAddresses);
}
@@ -244,13 +256,14 @@ ELFDebugObject::CopyBuffer(MemoryBufferRef Buffer, Error &Err) {
template <typename ELFT>
Expected<std::unique_ptr<ELFDebugObject>>
-ELFDebugObject::CreateArchType(MemoryBufferRef Buffer, JITLinkContext &Ctx,
- ExecutionSession &ES) {
+ELFDebugObject::CreateArchType(MemoryBufferRef Buffer,
+ JITLinkMemoryManager &MemMgr,
+ const JITLinkDylib *JD, ExecutionSession &ES) {
using SectionHeader = typename ELFT::Shdr;
Error Err = Error::success();
std::unique_ptr<ELFDebugObject> DebugObj(
- new ELFDebugObject(CopyBuffer(Buffer, Err), Ctx, ES));
+ new ELFDebugObject(CopyBuffer(Buffer, Err), MemMgr, JD, ES));
if (Err)
return std::move(Err);
@@ -299,23 +312,26 @@ ELFDebugObject::Create(MemoryBufferRef Buffer, JITLinkContext &Ctx,
if (Class == ELF::ELFCLASS32) {
if (Endian == ELF::ELFDATA2LSB)
- return CreateArchType<ELF32LE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF32LE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
if (Endian == ELF::ELFDATA2MSB)
- return CreateArchType<ELF32BE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF32BE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
return nullptr;
}
if (Class == ELF::ELFCLASS64) {
if (Endian == ELF::ELFDATA2LSB)
- return CreateArchType<ELF64LE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF64LE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
if (Endian == ELF::ELFDATA2MSB)
- return CreateArchType<ELF64BE>(Buffer, Ctx, ES);
+ return CreateArchType<ELF64BE>(Buffer, Ctx.getMemoryManager(),
+ Ctx.getJITLinkDylib(), ES);
return nullptr;
}
return nullptr;
}
-Expected<std::unique_ptr<DebugObject::Allocation>>
-ELFDebugObject::finalizeWorkingMemory(JITLinkContext &Ctx) {
+Expected<SimpleSegmentAlloc> ELFDebugObject::finalizeWorkingMemory() {
LLVM_DEBUG({
dbgs() << "Section load-addresses in debug object for \""
<< Buffer->getBufferIdentifier() << "\":\n";
@@ -324,28 +340,21 @@ ELFDebugObject::finalizeWorkingMemory(JITLinkContext &Ctx) {
});
// TODO: This works, but what actual alignment requirements do we have?
- unsigned Alignment = sys::Process::getPageSizeEstimate();
- JITLinkMemoryManager &MemMgr = Ctx.getMemoryManager();
- const JITLinkDylib *JD = Ctx.getJITLinkDylib();
+ unsigned PageSize = sys::Process::getPageSizeEstimate();
size_t Size = Buffer->getBufferSize();
// Allocate working memory for debug object in read-only segment.
- JITLinkMemoryManager::SegmentsRequestMap SingleReadOnlySegment;
- SingleReadOnlySegment[ReadOnly] =
- JITLinkMemoryManager::SegmentRequest(Alignment, Size, 0);
-
- auto AllocOrErr = MemMgr.allocate(JD, SingleReadOnlySegment);
- if (!AllocOrErr)
- return AllocOrErr.takeError();
+ auto Alloc = SimpleSegmentAlloc::Create(
+ MemMgr, JD, {{MemProt::Read, {Size, Align(PageSize)}}});
+ if (!Alloc)
+ return Alloc;
// Initialize working memory with a copy of our object buffer.
- // TODO: Use our buffer as working memory directly.
- std::unique_ptr<Allocation> Alloc = std::move(*AllocOrErr);
- MutableArrayRef<char> WorkingMem = Alloc->getWorkingMemory(ReadOnly);
- memcpy(WorkingMem.data(), Buffer->getBufferStart(), Size);
+ auto SegInfo = Alloc->getSegInfo(MemProt::Read);
+ memcpy(SegInfo.WorkingMem.data(), Buffer->getBufferStart(), Size);
Buffer.reset();
- return std::move(Alloc);
+ return Alloc;
}
void ELFDebugObject::reportSectionTargetMemoryRange(StringRef Name,
@@ -447,7 +456,7 @@ Error DebugObjectManagerPlugin::notifyEmitted(
std::future<MSVCPError> FinalizeErr = FinalizePromise.get_future();
It->second->finalizeAsync(
- [this, &FinalizePromise, &MR](Expected<sys::MemoryBlock> TargetMem) {
+ [this, &FinalizePromise, &MR](Expected<ExecutorAddrRange> TargetMem) {
// Any failure here will fail materialization.
if (!TargetMem) {
FinalizePromise.set_value(TargetMem.takeError());
diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
new file mode 100644
index 000000000000..8479495623b8
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
@@ -0,0 +1,450 @@
+//===------- DebuggerSupportPlugin.cpp - Utils for debugger support -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h"
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/BinaryFormat/MachO.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::jitlink;
+using namespace llvm::orc;
+
+static const char *SynthDebugSectionName = "__jitlink_synth_debug_object";
+
+namespace {
+
+struct MachO64LE {
+ using UIntPtr = uint64_t;
+
+ using Header = MachO::mach_header_64;
+ using SegmentLC = MachO::segment_command_64;
+ using Section = MachO::section_64;
+ using NList = MachO::nlist_64;
+
+ static constexpr support::endianness Endianness = support::little;
+ static constexpr const uint32_t Magic = MachO::MH_MAGIC_64;
+ static constexpr const uint32_t SegmentCmd = MachO::LC_SEGMENT_64;
+};
+
+class MachODebugObjectSynthesizerBase
+ : public GDBJITDebugInfoRegistrationPlugin::DebugSectionSynthesizer {
+public:
+ static bool isDebugSection(Section &Sec) {
+ return Sec.getName().startswith("__DWARF,");
+ }
+
+ MachODebugObjectSynthesizerBase(LinkGraph &G, ExecutorAddr RegisterActionAddr)
+ : G(G), RegisterActionAddr(RegisterActionAddr) {}
+ virtual ~MachODebugObjectSynthesizerBase() {}
+
+ Error preserveDebugSections() {
+ if (G.findSectionByName(SynthDebugSectionName)) {
+ LLVM_DEBUG({
+ dbgs() << "MachODebugObjectSynthesizer skipping graph " << G.getName()
+ << " which contains an unexpected existing "
+ << SynthDebugSectionName << " section.\n";
+ });
+ return Error::success();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "MachODebugObjectSynthesizer visiting graph " << G.getName()
+ << "\n";
+ });
+ for (auto &Sec : G.sections()) {
+ if (!isDebugSection(Sec))
+ continue;
+ // Preserve blocks in this debug section by marking one existing symbol
+ // live for each block, and introducing a new live, anonymous symbol for
+ // each currently unreferenced block.
+ LLVM_DEBUG({
+ dbgs() << " Preserving debug section " << Sec.getName() << "\n";
+ });
+ SmallSet<Block *, 8> PreservedBlocks;
+ for (auto *Sym : Sec.symbols()) {
+ bool NewPreservedBlock =
+ PreservedBlocks.insert(&Sym->getBlock()).second;
+ if (NewPreservedBlock)
+ Sym->setLive(true);
+ }
+ for (auto *B : Sec.blocks())
+ if (!PreservedBlocks.count(B))
+ G.addAnonymousSymbol(*B, 0, 0, false, true);
+ }
+ return Error::success();
+ }
+
+protected:
+ LinkGraph &G;
+ ExecutorAddr RegisterActionAddr;
+};
+
+template <typename MachOTraits>
+class MachODebugObjectSynthesizer : public MachODebugObjectSynthesizerBase {
+private:
+ class MachOStructWriter {
+ public:
+ MachOStructWriter(MutableArrayRef<char> Buffer) : Buffer(Buffer) {}
+
+ size_t getOffset() const { return Offset; }
+
+ template <typename MachOStruct> void write(MachOStruct S) {
+ assert(Offset + sizeof(S) <= Buffer.size() &&
+ "Container block overflow while constructing debug MachO");
+ if (MachOTraits::Endianness != support::endian::system_endianness())
+ MachO::swapStruct(S);
+ memcpy(Buffer.data() + Offset, &S, sizeof(S));
+ Offset += sizeof(S);
+ }
+
+ private:
+ MutableArrayRef<char> Buffer;
+ size_t Offset = 0;
+ };
+
+public:
+ using MachODebugObjectSynthesizerBase::MachODebugObjectSynthesizerBase;
+
+ Error startSynthesis() override {
+ LLVM_DEBUG({
+ dbgs() << "Creating " << SynthDebugSectionName << " for " << G.getName()
+ << "\n";
+ });
+ auto &SDOSec = G.createSection(SynthDebugSectionName, MemProt::Read);
+
+ struct DebugSectionInfo {
+ Section *Sec = nullptr;
+ StringRef SegName;
+ StringRef SecName;
+ JITTargetAddress Alignment = 0;
+ JITTargetAddress StartAddr = 0;
+ uint64_t Size = 0;
+ };
+
+ SmallVector<DebugSectionInfo, 12> DebugSecInfos;
+ size_t NumSections = 0;
+ for (auto &Sec : G.sections()) {
+ if (llvm::empty(Sec.blocks()))
+ continue;
+
+ ++NumSections;
+ if (isDebugSection(Sec)) {
+ size_t SepPos = Sec.getName().find(',');
+ if (SepPos > 16 || (Sec.getName().size() - (SepPos + 1) > 16)) {
+ LLVM_DEBUG({
+ dbgs() << "Skipping debug object synthesis for graph "
+ << G.getName()
+ << ": encountered non-standard DWARF section name \""
+ << Sec.getName() << "\"\n";
+ });
+ return Error::success();
+ }
+ DebugSecInfos.push_back({&Sec, Sec.getName().substr(0, SepPos),
+ Sec.getName().substr(SepPos + 1), 0, 0});
+ } else
+ NonDebugSections.push_back(&Sec);
+ }
+
+ // Create container block.
+ size_t SectionsCmdSize =
+ sizeof(typename MachOTraits::Section) * NumSections;
+ size_t SegmentLCSize =
+ sizeof(typename MachOTraits::SegmentLC) + SectionsCmdSize;
+ size_t ContainerBlockSize =
+ sizeof(typename MachOTraits::Header) + SegmentLCSize;
+ auto ContainerBlockContent = G.allocateBuffer(ContainerBlockSize);
+ MachOContainerBlock =
+ &G.createMutableContentBlock(SDOSec, ContainerBlockContent, 0, 8, 0);
+
+ // Copy debug section blocks and symbols.
+ JITTargetAddress NextBlockAddr = MachOContainerBlock->getSize();
+ for (auto &SI : DebugSecInfos) {
+ assert(!llvm::empty(SI.Sec->blocks()) && "Empty debug info section?");
+
+ // Update addresses in debug section.
+ LLVM_DEBUG({
+ dbgs() << " Appending " << SI.Sec->getName() << " ("
+ << SI.Sec->blocks_size() << " block(s)) at "
+ << formatv("{0:x8}", NextBlockAddr) << "\n";
+ });
+ for (auto *B : SI.Sec->blocks()) {
+ NextBlockAddr = alignToBlock(NextBlockAddr, *B);
+ B->setAddress(NextBlockAddr);
+ NextBlockAddr += B->getSize();
+ }
+
+ auto &FirstBlock = **SI.Sec->blocks().begin();
+ if (FirstBlock.getAlignmentOffset() != 0)
+ return make_error<StringError>(
+ "First block in " + SI.Sec->getName() +
+ " section has non-zero alignment offset",
+ inconvertibleErrorCode());
+ if (FirstBlock.getAlignment() > std::numeric_limits<uint32_t>::max())
+ return make_error<StringError>("First block in " + SI.Sec->getName() +
+ " has alignment >4Gb",
+ inconvertibleErrorCode());
+
+ SI.Alignment = FirstBlock.getAlignment();
+ SI.StartAddr = FirstBlock.getAddress();
+ SI.Size = NextBlockAddr - SI.StartAddr;
+ G.mergeSections(SDOSec, *SI.Sec);
+ SI.Sec = nullptr;
+ }
+ size_t DebugSectionsSize = NextBlockAddr - MachOContainerBlock->getSize();
+
+ // Write MachO header and debug section load commands.
+ MachOStructWriter Writer(MachOContainerBlock->getAlreadyMutableContent());
+ typename MachOTraits::Header Hdr;
+ memset(&Hdr, 0, sizeof(Hdr));
+ Hdr.magic = MachOTraits::Magic;
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ Hdr.cputype = MachO::CPU_TYPE_X86_64;
+ Hdr.cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL;
+ break;
+ case Triple::aarch64:
+ Hdr.cputype = MachO::CPU_TYPE_ARM64;
+ Hdr.cpusubtype = MachO::CPU_SUBTYPE_ARM64_ALL;
+ break;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+ Hdr.filetype = MachO::MH_OBJECT;
+ Hdr.ncmds = 1;
+ Hdr.sizeofcmds = SegmentLCSize;
+ Hdr.flags = 0;
+ Writer.write(Hdr);
+
+ typename MachOTraits::SegmentLC SegLC;
+ memset(&SegLC, 0, sizeof(SegLC));
+ SegLC.cmd = MachOTraits::SegmentCmd;
+ SegLC.cmdsize = SegmentLCSize;
+ SegLC.vmaddr = ContainerBlockSize;
+ SegLC.vmsize = DebugSectionsSize;
+ SegLC.fileoff = ContainerBlockSize;
+ SegLC.filesize = DebugSectionsSize;
+ SegLC.maxprot =
+ MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
+ SegLC.initprot =
+ MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
+ SegLC.nsects = NumSections;
+ SegLC.flags = 0;
+ Writer.write(SegLC);
+
+ StringSet<> ExistingLongNames;
+ for (auto &SI : DebugSecInfos) {
+ typename MachOTraits::Section Sec;
+ memset(&Sec, 0, sizeof(Sec));
+ memcpy(Sec.sectname, SI.SecName.data(), SI.SecName.size());
+ memcpy(Sec.segname, SI.SegName.data(), SI.SegName.size());
+ Sec.addr = SI.StartAddr;
+ Sec.size = SI.Size;
+ Sec.offset = SI.StartAddr;
+ Sec.align = SI.Alignment;
+ Sec.reloff = 0;
+ Sec.nreloc = 0;
+ Sec.flags = MachO::S_ATTR_DEBUG;
+ Writer.write(Sec);
+ }
+
+ // Set MachOContainerBlock to indicate success to
+ // completeSynthesisAndRegister.
+ NonDebugSectionsStart = Writer.getOffset();
+ return Error::success();
+ }
+
+ Error completeSynthesisAndRegister() override {
+ if (!MachOContainerBlock) {
+ LLVM_DEBUG({
+ dbgs() << "Not writing MachO debug object header for " << G.getName()
+ << " since createDebugSection failed\n";
+ });
+ return Error::success();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "Writing MachO debug object header for " << G.getName() << "\n";
+ });
+
+ MachOStructWriter Writer(
+ MachOContainerBlock->getAlreadyMutableContent().drop_front(
+ NonDebugSectionsStart));
+
+ unsigned LongSectionNameIdx = 0;
+ for (auto *Sec : NonDebugSections) {
+ size_t SepPos = Sec->getName().find(',');
+ StringRef SegName, SecName;
+ std::string CustomSecName;
+
+ if ((SepPos == StringRef::npos && Sec->getName().size() <= 16)) {
+ // No embedded segment name, short section name.
+ SegName = "__JITLINK_CUSTOM";
+ SecName = Sec->getName();
+ } else if (SepPos < 16 && (Sec->getName().size() - (SepPos + 1) <= 16)) {
+ // Canonical embedded segment and section name.
+ SegName = Sec->getName().substr(0, SepPos);
+ SecName = Sec->getName().substr(SepPos + 1);
+ } else {
+ // Long section name that needs to be truncated.
+ assert(Sec->getName().size() > 16 &&
+ "Short section name should have been handled above");
+ SegName = "__JITLINK_CUSTOM";
+ auto IdxStr = std::to_string(++LongSectionNameIdx);
+ CustomSecName = Sec->getName().substr(0, 15 - IdxStr.size()).str();
+ CustomSecName += ".";
+ CustomSecName += IdxStr;
+ SecName = StringRef(CustomSecName.data(), 16);
+ }
+
+ SectionRange R(*Sec);
+ if (R.getFirstBlock()->getAlignmentOffset() != 0)
+ return make_error<StringError>(
+ "While building MachO debug object for " + G.getName() +
+ " first block has non-zero alignment offset",
+ inconvertibleErrorCode());
+
+ typename MachOTraits::Section SecCmd;
+ memset(&SecCmd, 0, sizeof(SecCmd));
+ memcpy(SecCmd.sectname, SecName.data(), SecName.size());
+ memcpy(SecCmd.segname, SegName.data(), SegName.size());
+ SecCmd.addr = R.getStart();
+ SecCmd.size = R.getSize();
+ SecCmd.offset = 0;
+ SecCmd.align = R.getFirstBlock()->getAlignment();
+ SecCmd.reloff = 0;
+ SecCmd.nreloc = 0;
+ SecCmd.flags = 0;
+ Writer.write(SecCmd);
+ }
+
+ SectionRange R(MachOContainerBlock->getSection());
+ G.allocActions().push_back(
+ {{RegisterActionAddr.getValue(), R.getStart(), R.getSize()}, {}});
+ return Error::success();
+ }
+
+private:
+ Block *MachOContainerBlock = nullptr;
+ SmallVector<Section *, 16> NonDebugSections;
+ size_t NonDebugSectionsStart = 0;
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<GDBJITDebugInfoRegistrationPlugin>>
+GDBJITDebugInfoRegistrationPlugin::Create(ExecutionSession &ES,
+ JITDylib &ProcessJD,
+ const Triple &TT) {
+ auto RegisterActionAddr =
+ TT.isOSBinFormatMachO()
+ ? ES.intern("_llvm_orc_registerJITLoaderGDBAllocAction")
+ : ES.intern("llvm_orc_registerJITLoaderGDBAllocAction");
+
+ if (auto Addr = ES.lookup({&ProcessJD}, RegisterActionAddr))
+ return std::make_unique<GDBJITDebugInfoRegistrationPlugin>(
+ ExecutorAddr(Addr->getAddress()));
+ else
+ return Addr.takeError();
+}
+
+Error GDBJITDebugInfoRegistrationPlugin::notifyFailed(
+ MaterializationResponsibility &MR) {
+ return Error::success();
+}
+
+Error GDBJITDebugInfoRegistrationPlugin::notifyRemovingResources(
+ ResourceKey K) {
+ return Error::success();
+}
+
+void GDBJITDebugInfoRegistrationPlugin::notifyTransferringResources(
+ ResourceKey DstKey, ResourceKey SrcKey) {}
+
+void GDBJITDebugInfoRegistrationPlugin::modifyPassConfig(
+ MaterializationResponsibility &MR, LinkGraph &LG,
+ PassConfiguration &PassConfig) {
+
+ if (LG.getTargetTriple().getObjectFormat() == Triple::MachO)
+ modifyPassConfigForMachO(MR, LG, PassConfig);
+ else {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unspported graph "
+ << LG.getName() << "(triple = " << LG.getTargetTriple().str()
+ << "\n";
+ });
+ }
+}
+
+void GDBJITDebugInfoRegistrationPlugin::modifyPassConfigForMachO(
+ MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &PassConfig) {
+
+ switch (LG.getTargetTriple().getArch()) {
+ case Triple::x86_64:
+ case Triple::aarch64:
+ // Supported, continue.
+ assert(LG.getPointerSize() == 8 && "Graph has incorrect pointer size");
+ assert(LG.getEndianness() == support::little &&
+ "Graph has incorrect endianness");
+ break;
+ default:
+ // Unsupported.
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin skipping unsupported "
+ << "MachO graph " << LG.getName()
+ << "(triple = " << LG.getTargetTriple().str()
+ << ", pointer size = " << LG.getPointerSize() << ", endianness = "
+ << (LG.getEndianness() == support::big ? "big" : "little")
+ << ")\n";
+ });
+ return;
+ }
+
+ // Scan for debug sections. If we find one then install passes.
+ bool HasDebugSections = false;
+ for (auto &Sec : LG.sections())
+ if (MachODebugObjectSynthesizerBase::isDebugSection(Sec)) {
+ HasDebugSections = true;
+ break;
+ }
+
+ if (HasDebugSections) {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
+ << " contains debug info. Installing debugger support passes.\n";
+ });
+
+ auto MDOS = std::make_shared<MachODebugObjectSynthesizer<MachO64LE>>(
+ LG, RegisterActionAddr);
+ PassConfig.PrePrunePasses.push_back(
+ [=](LinkGraph &G) { return MDOS->preserveDebugSections(); });
+ PassConfig.PostPrunePasses.push_back(
+ [=](LinkGraph &G) { return MDOS->startSynthesis(); });
+ PassConfig.PreFixupPasses.push_back(
+ [=](LinkGraph &G) { return MDOS->completeSynthesisAndRegister(); });
+ } else {
+ LLVM_DEBUG({
+ dbgs() << "GDBJITDebugInfoRegistrationPlugin: Graph " << LG.getName()
+ << " contains no debug info. Skipping.\n";
+ });
+ }
+}
+
+} // namespace orc
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
new file mode 100644
index 000000000000..b17d196f01b6
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
@@ -0,0 +1,818 @@
+//===------ ELFNixPlatform.cpp - Utilities for executing MachO in Orc -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
+#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::orc::shared;
+
+namespace {
+
+class DSOHandleMaterializationUnit : public MaterializationUnit {
+public:
+ DSOHandleMaterializationUnit(ELFNixPlatform &ENP,
+ const SymbolStringPtr &DSOHandleSymbol)
+ : MaterializationUnit(createDSOHandleSectionSymbols(ENP, DSOHandleSymbol),
+ DSOHandleSymbol),
+ ENP(ENP) {}
+
+ StringRef getName() const override { return "DSOHandleMU"; }
+
+ void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
+ unsigned PointerSize;
+ support::endianness Endianness;
+ jitlink::Edge::Kind EdgeKind;
+ const auto &TT =
+ ENP.getExecutionSession().getExecutorProcessControl().getTargetTriple();
+
+ switch (TT.getArch()) {
+ case Triple::x86_64:
+ PointerSize = 8;
+ Endianness = support::endianness::little;
+ EdgeKind = jitlink::x86_64::Pointer64;
+ break;
+ default:
+ llvm_unreachable("Unrecognized architecture");
+ }
+
+ // void *__dso_handle = &__dso_handle;
+ auto G = std::make_unique<jitlink::LinkGraph>(
+ "<DSOHandleMU>", TT, PointerSize, Endianness,
+ jitlink::getGenericEdgeKindName);
+ auto &DSOHandleSection =
+ G->createSection(".data.__dso_handle", jitlink::MemProt::Read);
+ auto &DSOHandleBlock = G->createContentBlock(
+ DSOHandleSection, getDSOHandleContent(PointerSize), 0, 8, 0);
+ auto &DSOHandleSymbol = G->addDefinedSymbol(
+ DSOHandleBlock, 0, *R->getInitializerSymbol(), DSOHandleBlock.getSize(),
+ jitlink::Linkage::Strong, jitlink::Scope::Default, false, true);
+ DSOHandleBlock.addEdge(EdgeKind, 0, DSOHandleSymbol, 0);
+
+ ENP.getObjectLinkingLayer().emit(std::move(R), std::move(G));
+ }
+
+ void discard(const JITDylib &JD, const SymbolStringPtr &Sym) override {}
+
+private:
+ static SymbolFlagsMap
+ createDSOHandleSectionSymbols(ELFNixPlatform &ENP,
+ const SymbolStringPtr &DSOHandleSymbol) {
+ SymbolFlagsMap SymbolFlags;
+ SymbolFlags[DSOHandleSymbol] = JITSymbolFlags::Exported;
+ return SymbolFlags;
+ }
+
+ ArrayRef<char> getDSOHandleContent(size_t PointerSize) {
+ static const char Content[8] = {0};
+ assert(PointerSize <= sizeof Content);
+ return {Content, PointerSize};
+ }
+
+ ELFNixPlatform &ENP;
+};
+
+StringRef EHFrameSectionName = ".eh_frame";
+StringRef InitArrayFuncSectionName = ".init_array";
+
+StringRef ThreadBSSSectionName = ".tbss";
+StringRef ThreadDataSectionName = ".tdata";
+
+StringRef InitSectionNames[] = {InitArrayFuncSectionName};
+
+} // end anonymous namespace
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<ELFNixPlatform>>
+ELFNixPlatform::Create(ExecutionSession &ES,
+ ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, const char *OrcRuntimePath,
+ Optional<SymbolAliasMap> RuntimeAliases) {
+
+ auto &EPC = ES.getExecutorProcessControl();
+
+ // If the target is not supported then bail out immediately.
+ if (!supportedTarget(EPC.getTargetTriple()))
+ return make_error<StringError>("Unsupported ELFNixPlatform triple: " +
+ EPC.getTargetTriple().str(),
+ inconvertibleErrorCode());
+
+ // Create default aliases if the caller didn't supply any.
+ if (!RuntimeAliases)
+ RuntimeAliases = standardPlatformAliases(ES);
+
+ // Define the aliases.
+ if (auto Err = PlatformJD.define(symbolAliases(std::move(*RuntimeAliases))))
+ return std::move(Err);
+
+ // Add JIT-dispatch function support symbols.
+ if (auto Err = PlatformJD.define(absoluteSymbols(
+ {{ES.intern("__orc_rt_jit_dispatch"),
+ {EPC.getJITDispatchInfo().JITDispatchFunction.getValue(),
+ JITSymbolFlags::Exported}},
+ {ES.intern("__orc_rt_jit_dispatch_ctx"),
+ {EPC.getJITDispatchInfo().JITDispatchContext.getValue(),
+ JITSymbolFlags::Exported}}})))
+ return std::move(Err);
+
+ // Create a generator for the ORC runtime archive.
+ auto OrcRuntimeArchiveGenerator = StaticLibraryDefinitionGenerator::Load(
+ ObjLinkingLayer, OrcRuntimePath, EPC.getTargetTriple());
+ if (!OrcRuntimeArchiveGenerator)
+ return OrcRuntimeArchiveGenerator.takeError();
+
+ // Create the instance.
+ Error Err = Error::success();
+ auto P = std::unique_ptr<ELFNixPlatform>(
+ new ELFNixPlatform(ES, ObjLinkingLayer, PlatformJD,
+ std::move(*OrcRuntimeArchiveGenerator), Err));
+ if (Err)
+ return std::move(Err);
+ return std::move(P);
+}
+
+Error ELFNixPlatform::setupJITDylib(JITDylib &JD) {
+ return JD.define(
+ std::make_unique<DSOHandleMaterializationUnit>(*this, DSOHandleSymbol));
+}
+
+Error ELFNixPlatform::notifyAdding(ResourceTracker &RT,
+ const MaterializationUnit &MU) {
+ auto &JD = RT.getJITDylib();
+ const auto &InitSym = MU.getInitializerSymbol();
+ if (!InitSym)
+ return Error::success();
+
+ RegisteredInitSymbols[&JD].add(InitSym,
+ SymbolLookupFlags::WeaklyReferencedSymbol);
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform: Registered init symbol " << *InitSym
+ << " for MU " << MU.getName() << "\n";
+ });
+ return Error::success();
+}
+
+Error ELFNixPlatform::notifyRemoving(ResourceTracker &RT) {
+ llvm_unreachable("Not supported yet");
+}
+
+static void addAliases(ExecutionSession &ES, SymbolAliasMap &Aliases,
+ ArrayRef<std::pair<const char *, const char *>> AL) {
+ for (auto &KV : AL) {
+ auto AliasName = ES.intern(KV.first);
+ assert(!Aliases.count(AliasName) && "Duplicate symbol name in alias map");
+ Aliases[std::move(AliasName)] = {ES.intern(KV.second),
+ JITSymbolFlags::Exported};
+ }
+}
+
+SymbolAliasMap ELFNixPlatform::standardPlatformAliases(ExecutionSession &ES) {
+ SymbolAliasMap Aliases;
+ addAliases(ES, Aliases, requiredCXXAliases());
+ addAliases(ES, Aliases, standardRuntimeUtilityAliases());
+ return Aliases;
+}
+
+ArrayRef<std::pair<const char *, const char *>>
+ELFNixPlatform::requiredCXXAliases() {
+ static const std::pair<const char *, const char *> RequiredCXXAliases[] = {
+ {"__cxa_atexit", "__orc_rt_elfnix_cxa_atexit"},
+ {"atexit", "__orc_rt_elfnix_atexit"}};
+
+ return ArrayRef<std::pair<const char *, const char *>>(RequiredCXXAliases);
+}
+
+ArrayRef<std::pair<const char *, const char *>>
+ELFNixPlatform::standardRuntimeUtilityAliases() {
+ static const std::pair<const char *, const char *>
+ StandardRuntimeUtilityAliases[] = {
+ {"__orc_rt_run_program", "__orc_rt_elfnix_run_program"},
+ {"__orc_rt_log_error", "__orc_rt_log_error_to_stderr"}};
+
+ return ArrayRef<std::pair<const char *, const char *>>(
+ StandardRuntimeUtilityAliases);
+}
+
+bool ELFNixPlatform::isInitializerSection(StringRef SecName) {
+ for (auto &Name : InitSectionNames) {
+ if (Name.equals(SecName))
+ return true;
+ }
+ return false;
+}
+
+bool ELFNixPlatform::supportedTarget(const Triple &TT) {
+ switch (TT.getArch()) {
+ case Triple::x86_64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ELFNixPlatform::ELFNixPlatform(
+ ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD,
+ std::unique_ptr<DefinitionGenerator> OrcRuntimeGenerator, Error &Err)
+ : ES(ES), ObjLinkingLayer(ObjLinkingLayer),
+ DSOHandleSymbol(ES.intern("__dso_handle")) {
+ ErrorAsOutParameter _(&Err);
+
+ ObjLinkingLayer.addPlugin(std::make_unique<ELFNixPlatformPlugin>(*this));
+
+ PlatformJD.addGenerator(std::move(OrcRuntimeGenerator));
+
+ // PlatformJD hasn't been 'set-up' by the platform yet (since we're creating
+ // the platform now), so set it up.
+ if (auto E2 = setupJITDylib(PlatformJD)) {
+ Err = std::move(E2);
+ return;
+ }
+
+ RegisteredInitSymbols[&PlatformJD].add(
+ DSOHandleSymbol, SymbolLookupFlags::WeaklyReferencedSymbol);
+
+ // Associate wrapper function tags with JIT-side function implementations.
+ if (auto E2 = associateRuntimeSupportFunctions(PlatformJD)) {
+ Err = std::move(E2);
+ return;
+ }
+
+ // Lookup addresses of runtime functions callable by the platform,
+ // call the platform bootstrap function to initialize the platform-state
+ // object in the executor.
+ if (auto E2 = bootstrapELFNixRuntime(PlatformJD)) {
+ Err = std::move(E2);
+ return;
+ }
+}
+
+Error ELFNixPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
+ ExecutionSession::JITDispatchHandlerAssociationMap WFs;
+
+ using GetInitializersSPSSig =
+ SPSExpected<SPSELFNixJITDylibInitializerSequence>(SPSString);
+ WFs[ES.intern("__orc_rt_elfnix_get_initializers_tag")] =
+ ES.wrapAsyncWithSPS<GetInitializersSPSSig>(
+ this, &ELFNixPlatform::rt_getInitializers);
+
+ using GetDeinitializersSPSSig =
+ SPSExpected<SPSELFJITDylibDeinitializerSequence>(SPSExecutorAddr);
+ WFs[ES.intern("__orc_rt_elfnix_get_deinitializers_tag")] =
+ ES.wrapAsyncWithSPS<GetDeinitializersSPSSig>(
+ this, &ELFNixPlatform::rt_getDeinitializers);
+
+ using LookupSymbolSPSSig =
+ SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, SPSString);
+ WFs[ES.intern("__orc_rt_elfnix_symbol_lookup_tag")] =
+ ES.wrapAsyncWithSPS<LookupSymbolSPSSig>(this,
+ &ELFNixPlatform::rt_lookupSymbol);
+
+ return ES.registerJITDispatchHandlers(PlatformJD, std::move(WFs));
+}
+
+void ELFNixPlatform::getInitializersBuildSequencePhase(
+ SendInitializerSequenceFn SendResult, JITDylib &JD,
+ std::vector<JITDylibSP> DFSLinkOrder) {
+ ELFNixJITDylibInitializerSequence FullInitSeq;
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ for (auto &InitJD : reverse(DFSLinkOrder)) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform: Appending inits for \"" << InitJD->getName()
+ << "\" to sequence\n";
+ });
+ auto ISItr = InitSeqs.find(InitJD.get());
+ if (ISItr != InitSeqs.end()) {
+ FullInitSeq.emplace_back(std::move(ISItr->second));
+ InitSeqs.erase(ISItr);
+ }
+ }
+ }
+
+ SendResult(std::move(FullInitSeq));
+}
+
+void ELFNixPlatform::getInitializersLookupPhase(
+ SendInitializerSequenceFn SendResult, JITDylib &JD) {
+
+ auto DFSLinkOrder = JD.getDFSLinkOrder();
+ DenseMap<JITDylib *, SymbolLookupSet> NewInitSymbols;
+ ES.runSessionLocked([&]() {
+ for (auto &InitJD : DFSLinkOrder) {
+ auto RISItr = RegisteredInitSymbols.find(InitJD.get());
+ if (RISItr != RegisteredInitSymbols.end()) {
+ NewInitSymbols[InitJD.get()] = std::move(RISItr->second);
+ RegisteredInitSymbols.erase(RISItr);
+ }
+ }
+ });
+
+ // If there are no further init symbols to look up then move on to the next
+ // phase.
+ if (NewInitSymbols.empty()) {
+ getInitializersBuildSequencePhase(std::move(SendResult), JD,
+ std::move(DFSLinkOrder));
+ return;
+ }
+
+ // Otherwise issue a lookup and re-run this phase when it completes.
+ lookupInitSymbolsAsync(
+ [this, SendResult = std::move(SendResult), &JD](Error Err) mutable {
+ if (Err)
+ SendResult(std::move(Err));
+ else
+ getInitializersLookupPhase(std::move(SendResult), JD);
+ },
+ ES, std::move(NewInitSymbols));
+}
+
+void ELFNixPlatform::rt_getInitializers(SendInitializerSequenceFn SendResult,
+ StringRef JDName) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform::rt_getInitializers(\"" << JDName << "\")\n";
+ });
+
+ JITDylib *JD = ES.getJITDylibByName(JDName);
+ if (!JD) {
+ LLVM_DEBUG({
+ dbgs() << " No such JITDylib \"" << JDName << "\". Sending error.\n";
+ });
+ SendResult(make_error<StringError>("No JITDylib named " + JDName,
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ getInitializersLookupPhase(std::move(SendResult), *JD);
+}
+
+void ELFNixPlatform::rt_getDeinitializers(
+ SendDeinitializerSequenceFn SendResult, ExecutorAddr Handle) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform::rt_getDeinitializers(\""
+ << formatv("{0:x}", Handle.getValue()) << "\")\n";
+ });
+
+ JITDylib *JD = nullptr;
+
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ auto I = HandleAddrToJITDylib.find(Handle.getValue());
+ if (I != HandleAddrToJITDylib.end())
+ JD = I->second;
+ }
+
+ if (!JD) {
+ LLVM_DEBUG({
+ dbgs() << " No JITDylib for handle "
+ << formatv("{0:x}", Handle.getValue()) << "\n";
+ });
+ SendResult(make_error<StringError>("No JITDylib associated with handle " +
+ formatv("{0:x}", Handle.getValue()),
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ SendResult(ELFNixJITDylibDeinitializerSequence());
+}
+
+void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
+ ExecutorAddr Handle,
+ StringRef SymbolName) {
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform::rt_lookupSymbol(\""
+ << formatv("{0:x}", Handle.getValue()) << "\")\n";
+ });
+
+ JITDylib *JD = nullptr;
+
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ auto I = HandleAddrToJITDylib.find(Handle.getValue());
+ if (I != HandleAddrToJITDylib.end())
+ JD = I->second;
+ }
+
+ if (!JD) {
+ LLVM_DEBUG({
+ dbgs() << " No JITDylib for handle "
+ << formatv("{0:x}", Handle.getValue()) << "\n";
+ });
+ SendResult(make_error<StringError>("No JITDylib associated with handle " +
+ formatv("{0:x}", Handle.getValue()),
+ inconvertibleErrorCode()));
+ return;
+ }
+
+ // Use functor class to work around XL build compiler issue on AIX.
+ class RtLookupNotifyComplete {
+ public:
+ RtLookupNotifyComplete(SendSymbolAddressFn &&SendResult)
+ : SendResult(std::move(SendResult)) {}
+ void operator()(Expected<SymbolMap> Result) {
+ if (Result) {
+ assert(Result->size() == 1 && "Unexpected result map count");
+ SendResult(ExecutorAddr(Result->begin()->second.getAddress()));
+ } else {
+ SendResult(Result.takeError());
+ }
+ }
+
+ private:
+ SendSymbolAddressFn SendResult;
+ };
+
+ ES.lookup(
+ LookupKind::DLSym, {{JD, JITDylibLookupFlags::MatchExportedSymbolsOnly}},
+ SymbolLookupSet(ES.intern(SymbolName)), SymbolState::Ready,
+ RtLookupNotifyComplete(std::move(SendResult)), NoDependenciesToRegister);
+}
+
+Error ELFNixPlatform::bootstrapELFNixRuntime(JITDylib &PlatformJD) {
+
+ std::pair<const char *, ExecutorAddr *> Symbols[] = {
+ {"__orc_rt_elfnix_platform_bootstrap", &orc_rt_elfnix_platform_bootstrap},
+ {"__orc_rt_elfnix_platform_shutdown", &orc_rt_elfnix_platform_shutdown},
+ {"__orc_rt_elfnix_register_object_sections",
+ &orc_rt_elfnix_register_object_sections},
+ {"__orc_rt_elfnix_create_pthread_key",
+ &orc_rt_elfnix_create_pthread_key}};
+
+ SymbolLookupSet RuntimeSymbols;
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> AddrsToRecord;
+ for (const auto &KV : Symbols) {
+ auto Name = ES.intern(KV.first);
+ RuntimeSymbols.add(Name);
+ AddrsToRecord.push_back({std::move(Name), KV.second});
+ }
+
+ auto RuntimeSymbolAddrs = ES.lookup(
+ {{&PlatformJD, JITDylibLookupFlags::MatchAllSymbols}}, RuntimeSymbols);
+ if (!RuntimeSymbolAddrs)
+ return RuntimeSymbolAddrs.takeError();
+
+ for (const auto &KV : AddrsToRecord) {
+ auto &Name = KV.first;
+ assert(RuntimeSymbolAddrs->count(Name) && "Missing runtime symbol?");
+ KV.second->setValue((*RuntimeSymbolAddrs)[Name].getAddress());
+ }
+
+ auto PJDDSOHandle = ES.lookup(
+ {{&PlatformJD, JITDylibLookupFlags::MatchAllSymbols}}, DSOHandleSymbol);
+ if (!PJDDSOHandle)
+ return PJDDSOHandle.takeError();
+
+ if (auto Err = ES.callSPSWrapper<void(uint64_t)>(
+ orc_rt_elfnix_platform_bootstrap, PJDDSOHandle->getAddress()))
+ return Err;
+
+ // FIXME: Ordering is fuzzy here. We're probably best off saying
+ // "behavior is undefined if code that uses the runtime is added before
+ // the platform constructor returns", then move all this to the constructor.
+ RuntimeBootstrapped = true;
+ std::vector<ELFPerObjectSectionsToRegister> DeferredPOSRs;
+ {
+ std::lock_guard<std::mutex> Lock(PlatformMutex);
+ DeferredPOSRs = std::move(BootstrapPOSRs);
+ }
+
+ for (auto &D : DeferredPOSRs)
+ if (auto Err = registerPerObjectSections(D))
+ return Err;
+
+ return Error::success();
+}
+
+Error ELFNixPlatform::registerInitInfo(
+ JITDylib &JD, ArrayRef<jitlink::Section *> InitSections) {
+
+ std::unique_lock<std::mutex> Lock(PlatformMutex);
+
+ ELFNixJITDylibInitializers *InitSeq = nullptr;
+ {
+ auto I = InitSeqs.find(&JD);
+ if (I == InitSeqs.end()) {
+ // If there's no init sequence entry yet then we need to look up the
+ // header symbol to force creation of one.
+ Lock.unlock();
+
+ auto SearchOrder =
+ JD.withLinkOrderDo([](const JITDylibSearchOrder &SO) { return SO; });
+ if (auto Err = ES.lookup(SearchOrder, DSOHandleSymbol).takeError())
+ return Err;
+
+ Lock.lock();
+ I = InitSeqs.find(&JD);
+ assert(I != InitSeqs.end() &&
+ "Entry missing after header symbol lookup?");
+ }
+ InitSeq = &I->second;
+ }
+
+ for (auto *Sec : InitSections) {
+ // FIXME: Avoid copy here.
+ jitlink::SectionRange R(*Sec);
+ InitSeq->InitSections[Sec->getName()].push_back(
+ {ExecutorAddr(R.getStart()), ExecutorAddr(R.getEnd())});
+ }
+
+ return Error::success();
+}
+
+Error ELFNixPlatform::registerPerObjectSections(
+ const ELFPerObjectSectionsToRegister &POSR) {
+
+ if (!orc_rt_elfnix_register_object_sections)
+ return make_error<StringError>("Attempting to register per-object "
+ "sections, but runtime support has not "
+ "been loaded yet",
+ inconvertibleErrorCode());
+
+ Error ErrResult = Error::success();
+ if (auto Err = ES.callSPSWrapper<shared::SPSError(
+ SPSELFPerObjectSectionsToRegister)>(
+ orc_rt_elfnix_register_object_sections, ErrResult, POSR))
+ return Err;
+ return ErrResult;
+}
+
+Expected<uint64_t> ELFNixPlatform::createPThreadKey() {
+ if (!orc_rt_elfnix_create_pthread_key)
+ return make_error<StringError>(
+ "Attempting to create pthread key in target, but runtime support has "
+ "not been loaded yet",
+ inconvertibleErrorCode());
+
+ Expected<uint64_t> Result(0);
+ if (auto Err = ES.callSPSWrapper<SPSExpected<uint64_t>(void)>(
+ orc_rt_elfnix_create_pthread_key, Result))
+ return std::move(Err);
+ return Result;
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::modifyPassConfig(
+ MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
+ jitlink::PassConfiguration &Config) {
+
+ // If the initializer symbol is the __dso_handle symbol then just add
+ // the DSO handle support passes.
+ if (MR.getInitializerSymbol() == MP.DSOHandleSymbol) {
+ addDSOHandleSupportPasses(MR, Config);
+ // The DSOHandle materialization unit doesn't require any other
+ // support, so we can bail out early.
+ return;
+ }
+
+ // If the object contains initializers then add passes to record them.
+ if (MR.getInitializerSymbol())
+ addInitializerSupportPasses(MR, Config);
+
+ // Add passes for eh-frame and TLV support.
+ addEHAndTLVSupportPasses(MR, Config);
+}
+
+ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap
+ELFNixPlatform::ELFNixPlatformPlugin::getSyntheticSymbolDependencies(
+ MaterializationResponsibility &MR) {
+ std::lock_guard<std::mutex> Lock(PluginMutex);
+ auto I = InitSymbolDeps.find(&MR);
+ if (I != InitSymbolDeps.end()) {
+ SyntheticSymbolDependenciesMap Result;
+ Result[MR.getInitializerSymbol()] = std::move(I->second);
+ InitSymbolDeps.erase(&MR);
+ return Result;
+ }
+ return SyntheticSymbolDependenciesMap();
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::addInitializerSupportPasses(
+ MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
+
+ /// Preserve init sections.
+ Config.PrePrunePasses.push_back([this, &MR](jitlink::LinkGraph &G) -> Error {
+ if (auto Err = preserveInitSections(G, MR))
+ return Err;
+ return Error::success();
+ });
+
+ Config.PostFixupPasses.push_back(
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return registerInitSections(G, JD);
+ });
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::addDSOHandleSupportPasses(
+ MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
+
+ Config.PostAllocationPasses.push_back([this, &JD = MR.getTargetJITDylib()](
+ jitlink::LinkGraph &G) -> Error {
+ auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) {
+ return Sym->getName() == *MP.DSOHandleSymbol;
+ });
+ assert(I != G.defined_symbols().end() && "Missing DSO handle symbol");
+ {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ JITTargetAddress HandleAddr = (*I)->getAddress();
+ MP.HandleAddrToJITDylib[HandleAddr] = &JD;
+ assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists");
+ MP.InitSeqs.insert(std::make_pair(
+ &JD,
+ ELFNixJITDylibInitializers(JD.getName(), ExecutorAddr(HandleAddr))));
+ }
+ return Error::success();
+ });
+}
+
+void ELFNixPlatform::ELFNixPlatformPlugin::addEHAndTLVSupportPasses(
+ MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
+
+ // Insert TLV lowering at the start of the PostPrunePasses, since we want
+ // it to run before GOT/PLT lowering.
+
+ // TODO: Check that before the fixTLVSectionsAndEdges pass, the GOT/PLT build
+ // pass has done. Because the TLS descriptor need to be allocate in GOT.
+ Config.PostPrunePasses.push_back(
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return fixTLVSectionsAndEdges(G, JD);
+ });
+
+ // Add a pass to register the final addresses of the eh-frame and TLV sections
+ // with the runtime.
+ Config.PostFixupPasses.push_back([this](jitlink::LinkGraph &G) -> Error {
+ ELFPerObjectSectionsToRegister POSR;
+
+ if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
+ jitlink::SectionRange R(*EHFrameSection);
+ if (!R.empty())
+ POSR.EHFrameSection = {ExecutorAddr(R.getStart()),
+ ExecutorAddr(R.getEnd())};
+ }
+
+ // Get a pointer to the thread data section if there is one. It will be used
+ // below.
+ jitlink::Section *ThreadDataSection =
+ G.findSectionByName(ThreadDataSectionName);
+
+ // Handle thread BSS section if there is one.
+ if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
+ // If there's already a thread data section in this graph then merge the
+ // thread BSS section content into it, otherwise just treat the thread
+ // BSS section as the thread data section.
+ if (ThreadDataSection)
+ G.mergeSections(*ThreadDataSection, *ThreadBSSSection);
+ else
+ ThreadDataSection = ThreadBSSSection;
+ }
+
+ // Having merged thread BSS (if present) and thread data (if present),
+ // record the resulting section range.
+ if (ThreadDataSection) {
+ jitlink::SectionRange R(*ThreadDataSection);
+ if (!R.empty())
+ POSR.ThreadDataSection = {ExecutorAddr(R.getStart()),
+ ExecutorAddr(R.getEnd())};
+ }
+
+ if (POSR.EHFrameSection.Start || POSR.ThreadDataSection.Start) {
+
+ // If we're still bootstrapping the runtime then just record this
+ // frame for now.
+ if (!MP.RuntimeBootstrapped) {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ MP.BootstrapPOSRs.push_back(POSR);
+ return Error::success();
+ }
+
+ // Otherwise register it immediately.
+ if (auto Err = MP.registerPerObjectSections(POSR))
+ return Err;
+ }
+
+ return Error::success();
+ });
+}
+
+Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections(
+ jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
+
+ JITLinkSymbolSet InitSectionSymbols;
+ for (auto &InitSectionName : InitSectionNames) {
+ // Skip non-init sections.
+ auto *InitSection = G.findSectionByName(InitSectionName);
+ if (!InitSection)
+ continue;
+
+ // Make a pass over live symbols in the section: those blocks are already
+ // preserved.
+ DenseSet<jitlink::Block *> AlreadyLiveBlocks;
+ for (auto &Sym : InitSection->symbols()) {
+ auto &B = Sym->getBlock();
+ if (Sym->isLive() && Sym->getOffset() == 0 &&
+ Sym->getSize() == B.getSize() && !AlreadyLiveBlocks.count(&B)) {
+ InitSectionSymbols.insert(Sym);
+ AlreadyLiveBlocks.insert(&B);
+ }
+ }
+
+ // Add anonymous symbols to preserve any not-already-preserved blocks.
+ for (auto *B : InitSection->blocks())
+ if (!AlreadyLiveBlocks.count(B))
+ InitSectionSymbols.insert(
+ &G.addAnonymousSymbol(*B, 0, B->getSize(), false, true));
+ }
+
+ if (!InitSectionSymbols.empty()) {
+ std::lock_guard<std::mutex> Lock(PluginMutex);
+ InitSymbolDeps[&MR] = std::move(InitSectionSymbols);
+ }
+
+ return Error::success();
+}
+
+Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections(
+ jitlink::LinkGraph &G, JITDylib &JD) {
+
+ SmallVector<jitlink::Section *> InitSections;
+
+ LLVM_DEBUG({ dbgs() << "ELFNixPlatform::registerInitSections\n"; });
+
+ for (auto InitSectionName : InitSectionNames) {
+ if (auto *Sec = G.findSectionByName(InitSectionName)) {
+ InitSections.push_back(Sec);
+ }
+ }
+
+ // Dump the scraped inits.
+ LLVM_DEBUG({
+ dbgs() << "ELFNixPlatform: Scraped " << G.getName() << " init sections:\n";
+ for (auto *Sec : InitSections) {
+ jitlink::SectionRange R(*Sec);
+ dbgs() << " " << Sec->getName() << ": "
+ << formatv("[ {0:x} -- {1:x} ]", R.getStart(), R.getEnd()) << "\n";
+ }
+ });
+
+ return MP.registerInitInfo(JD, InitSections);
+}
+
+Error ELFNixPlatform::ELFNixPlatformPlugin::fixTLVSectionsAndEdges(
+ jitlink::LinkGraph &G, JITDylib &JD) {
+
+ // TODO implement TLV support
+ for (auto *Sym : G.external_symbols())
+ if (Sym->getName() == "__tls_get_addr") {
+ Sym->setName("___orc_rt_elfnix_tls_get_addr");
+ }
+
+ auto *TLSInfoEntrySection = G.findSectionByName("$__TLSINFO");
+
+ if (TLSInfoEntrySection) {
+ Optional<uint64_t> Key;
+ {
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ auto I = MP.JITDylibToPThreadKey.find(&JD);
+ if (I != MP.JITDylibToPThreadKey.end())
+ Key = I->second;
+ }
+ if (!Key) {
+ if (auto KeyOrErr = MP.createPThreadKey())
+ Key = *KeyOrErr;
+ else
+ return KeyOrErr.takeError();
+ }
+
+ uint64_t PlatformKeyBits =
+ support::endian::byte_swap(*Key, G.getEndianness());
+
+ for (auto *B : TLSInfoEntrySection->blocks()) {
+ // FIXME: The TLS descriptor byte length may different with different
+ // ISA
+ assert(B->getSize() == (G.getPointerSize() * 2) &&
+ "TLS descriptor must be 2 words length");
+ auto TLSInfoEntryContent = B->getMutableContent(G);
+ memcpy(TLSInfoEntryContent.data(), &PlatformKeyBits, G.getPointerSize());
+ }
+ }
+
+ return Error::success();
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
index 5715eda71eee..f3fe0555fa75 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
@@ -39,13 +39,13 @@ createJITLoaderGDBRegistrar(ExecutionSession &ES) {
assert((*Result)[0].size() == 1 &&
"Unexpected number of addresses in result");
- return std::make_unique<EPCDebugObjectRegistrar>(ES, (*Result)[0][0]);
+ return std::make_unique<EPCDebugObjectRegistrar>(
+ ES, ExecutorAddr((*Result)[0][0]));
}
-Error EPCDebugObjectRegistrar::registerDebugObject(sys::MemoryBlock TargetMem) {
- return ES.callSPSWrapper<void(SPSExecutorAddress, uint64_t)>(
- RegisterFn, ExecutorAddress::fromPtr(TargetMem.base()),
- static_cast<uint64_t>(TargetMem.allocatedSize()));
+Error EPCDebugObjectRegistrar::registerDebugObject(
+ ExecutorAddrRange TargetMem) {
+ return ES.callSPSWrapper<void(SPSExecutorAddrRange)>(RegisterFn, TargetMem);
}
} // namespace orc
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
index 8cdda9ab5a15..4c0fab8aa9fa 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
@@ -51,21 +51,22 @@ EPCEHFrameRegistrar::Create(ExecutionSession &ES) {
auto RegisterEHFrameWrapperFnAddr = (*Result)[0][0];
auto DeregisterEHFrameWrapperFnAddr = (*Result)[0][1];
- return std::make_unique<EPCEHFrameRegistrar>(ES, RegisterEHFrameWrapperFnAddr,
- DeregisterEHFrameWrapperFnAddr);
+ return std::make_unique<EPCEHFrameRegistrar>(
+ ES, ExecutorAddr(RegisterEHFrameWrapperFnAddr),
+ ExecutorAddr(DeregisterEHFrameWrapperFnAddr));
}
Error EPCEHFrameRegistrar::registerEHFrames(JITTargetAddress EHFrameSectionAddr,
size_t EHFrameSectionSize) {
- return ES.callSPSWrapper<void(SPSExecutorAddress, uint64_t)>(
- RegisterEHFrameWrapperFnAddr, EHFrameSectionAddr,
+ return ES.callSPSWrapper<void(SPSExecutorAddr, uint64_t)>(
+ RegisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr),
static_cast<uint64_t>(EHFrameSectionSize));
}
Error EPCEHFrameRegistrar::deregisterEHFrames(
JITTargetAddress EHFrameSectionAddr, size_t EHFrameSectionSize) {
- return ES.callSPSWrapper<void(SPSExecutorAddress, uint64_t)>(
- DeregisterEHFrameWrapperFnAddr, EHFrameSectionAddr,
+ return ES.callSPSWrapper<void(SPSExecutorAddr, uint64_t)>(
+ DeregisterEHFrameWrapperFnAddr, ExecutorAddr(EHFrameSectionAddr),
static_cast<uint64_t>(EHFrameSectionSize));
}
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp
new file mode 100644
index 000000000000..6c47c5c5f7bb
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp
@@ -0,0 +1,107 @@
+//===------- EPCGenericDylibManager.cpp -- Dylib management via EPC -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h"
+
+#include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+
+namespace llvm {
+namespace orc {
+namespace shared {
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookupSetElement,
+ SymbolLookupSet::value_type> {
+public:
+ static size_t size(const SymbolLookupSet::value_type &V) {
+ return SPSArgList<SPSString, bool>::size(
+ *V.first, V.second == SymbolLookupFlags::RequiredSymbol);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const SymbolLookupSet::value_type &V) {
+ return SPSArgList<SPSString, bool>::serialize(
+ OB, *V.first, V.second == SymbolLookupFlags::RequiredSymbol);
+ }
+};
+
+template <>
+class TrivialSPSSequenceSerialization<SPSRemoteSymbolLookupSetElement,
+ SymbolLookupSet> {
+public:
+ static constexpr bool available = true;
+};
+
+template <>
+class SPSSerializationTraits<SPSRemoteSymbolLookup,
+ ExecutorProcessControl::LookupRequest> {
+ using MemberSerialization =
+ SPSArgList<SPSExecutorAddr, SPSRemoteSymbolLookupSet>;
+
+public:
+ static size_t size(const ExecutorProcessControl::LookupRequest &LR) {
+ return MemberSerialization::size(ExecutorAddr(LR.Handle), LR.Symbols);
+ }
+
+ static bool serialize(SPSOutputBuffer &OB,
+ const ExecutorProcessControl::LookupRequest &LR) {
+ return MemberSerialization::serialize(OB, ExecutorAddr(LR.Handle),
+ LR.Symbols);
+ }
+};
+
+} // end namespace shared
+
+Expected<EPCGenericDylibManager>
+EPCGenericDylibManager::CreateWithDefaultBootstrapSymbols(
+ ExecutorProcessControl &EPC) {
+ SymbolAddrs SAs;
+ if (auto Err = EPC.getBootstrapSymbols(
+ {{SAs.Instance, rt::SimpleExecutorDylibManagerInstanceName},
+ {SAs.Open, rt::SimpleExecutorDylibManagerOpenWrapperName},
+ {SAs.Lookup, rt::SimpleExecutorDylibManagerLookupWrapperName}}))
+ return std::move(Err);
+ return EPCGenericDylibManager(EPC, std::move(SAs));
+}
+
+Expected<tpctypes::DylibHandle> EPCGenericDylibManager::open(StringRef Path,
+ uint64_t Mode) {
+ Expected<tpctypes::DylibHandle> H(0);
+ if (auto Err =
+ EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerOpenSignature>(
+ SAs.Open, H, SAs.Instance, Path, Mode))
+ return std::move(Err);
+ return H;
+}
+
+Expected<std::vector<ExecutorAddr>>
+EPCGenericDylibManager::lookup(tpctypes::DylibHandle H,
+ const SymbolLookupSet &Lookup) {
+ Expected<std::vector<ExecutorAddr>> Result((std::vector<ExecutorAddr>()));
+ if (auto Err =
+ EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerLookupSignature>(
+ SAs.Lookup, Result, SAs.Instance, H, Lookup))
+ return std::move(Err);
+ return Result;
+}
+
+Expected<std::vector<ExecutorAddr>>
+EPCGenericDylibManager::lookup(tpctypes::DylibHandle H,
+ const RemoteSymbolLookupSet &Lookup) {
+ Expected<std::vector<ExecutorAddr>> Result((std::vector<ExecutorAddr>()));
+ if (auto Err =
+ EPC.callSPSWrapper<rt::SPSSimpleExecutorDylibManagerLookupSignature>(
+ SAs.Lookup, Result, SAs.Instance, H, Lookup))
+ return std::move(Err);
+ return Result;
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
new file mode 100644
index 000000000000..9b712cb8f7ca
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
@@ -0,0 +1,184 @@
+//===---- EPCGenericJITLinkMemoryManager.cpp -- Mem management via EPC ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h"
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+
+#include <limits>
+
+using namespace llvm::jitlink;
+
+namespace llvm {
+namespace orc {
+
+class EPCGenericJITLinkMemoryManager::InFlightAlloc
+ : public jitlink::JITLinkMemoryManager::InFlightAlloc {
+public:
+
+ // FIXME: The C++98 initializer is an attempt to work around compile failures
+ // due to http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#1397.
+ // We should be able to switch this back to member initialization once that
+ // issue is fixed.
+ struct SegInfo {
+ SegInfo() : WorkingMem(nullptr), ContentSize(0), ZeroFillSize(0) {}
+
+ char *WorkingMem;
+ ExecutorAddr Addr;
+ uint64_t ContentSize;
+ uint64_t ZeroFillSize;
+ };
+
+ using SegInfoMap = AllocGroupSmallMap<SegInfo>;
+
+ InFlightAlloc(EPCGenericJITLinkMemoryManager &Parent, LinkGraph &G,
+ ExecutorAddr AllocAddr, SegInfoMap Segs)
+ : Parent(Parent), G(G), AllocAddr(AllocAddr), Segs(std::move(Segs)) {}
+
+ void finalize(OnFinalizedFunction OnFinalize) override {
+ tpctypes::FinalizeRequest FR;
+ for (auto &KV : Segs) {
+ assert(KV.second.ContentSize <= std::numeric_limits<size_t>::max());
+ FR.Segments.push_back(tpctypes::SegFinalizeRequest{
+ tpctypes::toWireProtectionFlags(
+ toSysMemoryProtectionFlags(KV.first.getMemProt())),
+ KV.second.Addr,
+ alignTo(KV.second.ContentSize + KV.second.ZeroFillSize,
+ Parent.EPC.getPageSize()),
+ {KV.second.WorkingMem, static_cast<size_t>(KV.second.ContentSize)}});
+ }
+
+ // Transfer allocation actions.
+ // FIXME: Merge JITLink and ORC SupportFunctionCall and Action list types,
+ // turn this into a std::swap.
+ FR.Actions.reserve(G.allocActions().size());
+ for (auto &ActPair : G.allocActions())
+ FR.Actions.push_back({{ExecutorAddr(ActPair.Finalize.FnAddr),
+ {ExecutorAddr(ActPair.Finalize.CtxAddr),
+ ExecutorAddrDiff(ActPair.Finalize.CtxSize)}},
+ {ExecutorAddr(ActPair.Dealloc.FnAddr),
+ {ExecutorAddr(ActPair.Dealloc.CtxAddr),
+ ExecutorAddrDiff(ActPair.Dealloc.CtxSize)}}});
+ G.allocActions().clear();
+
+ Parent.EPC.callSPSWrapperAsync<
+ rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
+ Parent.SAs.Finalize,
+ [OnFinalize = std::move(OnFinalize), AllocAddr = this->AllocAddr](
+ Error SerializationErr, Error FinalizeErr) mutable {
+ // FIXME: Release abandoned alloc.
+ if (SerializationErr) {
+ cantFail(std::move(FinalizeErr));
+ OnFinalize(std::move(SerializationErr));
+ } else if (FinalizeErr)
+ OnFinalize(std::move(FinalizeErr));
+ else
+ OnFinalize(FinalizedAlloc(AllocAddr.getValue()));
+ },
+ Parent.SAs.Allocator, std::move(FR));
+ }
+
+ void abandon(OnAbandonedFunction OnAbandoned) override {
+ // FIXME: Return memory to pool instead.
+ Parent.EPC.callSPSWrapperAsync<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ Parent.SAs.Deallocate,
+ [OnAbandoned = std::move(OnAbandoned)](Error SerializationErr,
+ Error DeallocateErr) mutable {
+ if (SerializationErr) {
+ cantFail(std::move(DeallocateErr));
+ OnAbandoned(std::move(SerializationErr));
+ } else
+ OnAbandoned(std::move(DeallocateErr));
+ },
+ Parent.SAs.Allocator, ArrayRef<ExecutorAddr>(AllocAddr));
+ }
+
+private:
+ EPCGenericJITLinkMemoryManager &Parent;
+ LinkGraph &G;
+ ExecutorAddr AllocAddr;
+ SegInfoMap Segs;
+};
+
+void EPCGenericJITLinkMemoryManager::allocate(const JITLinkDylib *JD,
+ LinkGraph &G,
+ OnAllocatedFunction OnAllocated) {
+ BasicLayout BL(G);
+
+ auto Pages = BL.getContiguousPageBasedLayoutSizes(EPC.getPageSize());
+ if (!Pages)
+ return OnAllocated(Pages.takeError());
+
+ EPC.callSPSWrapperAsync<rt::SPSSimpleExecutorMemoryManagerReserveSignature>(
+ SAs.Reserve,
+ [this, BL = std::move(BL), OnAllocated = std::move(OnAllocated)](
+ Error SerializationErr, Expected<ExecutorAddr> AllocAddr) mutable {
+ if (SerializationErr) {
+ cantFail(AllocAddr.takeError());
+ return OnAllocated(std::move(SerializationErr));
+ }
+ if (!AllocAddr)
+ return OnAllocated(AllocAddr.takeError());
+
+ completeAllocation(*AllocAddr, std::move(BL), std::move(OnAllocated));
+ },
+ SAs.Allocator, Pages->total());
+}
+
+void EPCGenericJITLinkMemoryManager::deallocate(
+ std::vector<FinalizedAlloc> Allocs, OnDeallocatedFunction OnDeallocated) {
+ EPC.callSPSWrapperAsync<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ SAs.Deallocate,
+ [OnDeallocated = std::move(OnDeallocated)](Error SerErr,
+ Error DeallocErr) mutable {
+ if (SerErr) {
+ cantFail(std::move(DeallocErr));
+ OnDeallocated(std::move(SerErr));
+ } else
+ OnDeallocated(std::move(DeallocErr));
+ },
+ SAs.Allocator, Allocs);
+ for (auto &A : Allocs)
+ A.release();
+}
+
+void EPCGenericJITLinkMemoryManager::completeAllocation(
+ ExecutorAddr AllocAddr, BasicLayout BL, OnAllocatedFunction OnAllocated) {
+
+ InFlightAlloc::SegInfoMap SegInfos;
+
+ ExecutorAddr NextSegAddr = AllocAddr;
+ for (auto &KV : BL.segments()) {
+ const auto &AG = KV.first;
+ auto &Seg = KV.second;
+
+ Seg.Addr = NextSegAddr.getValue();
+ KV.second.WorkingMem = BL.getGraph().allocateBuffer(Seg.ContentSize).data();
+ NextSegAddr += ExecutorAddrDiff(
+ alignTo(Seg.ContentSize + Seg.ZeroFillSize, EPC.getPageSize()));
+
+ auto &SegInfo = SegInfos[AG];
+ SegInfo.ContentSize = Seg.ContentSize;
+ SegInfo.ZeroFillSize = Seg.ZeroFillSize;
+ SegInfo.Addr = ExecutorAddr(Seg.Addr);
+ SegInfo.WorkingMem = Seg.WorkingMem;
+ }
+
+ if (auto Err = BL.apply())
+ return OnAllocated(std::move(Err));
+
+ OnAllocated(std::make_unique<InFlightAlloc>(*this, BL.getGraph(), AllocAddr,
+ std::move(SegInfos)));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
new file mode 100644
index 000000000000..1d98e104a4d7
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
@@ -0,0 +1,317 @@
+//===----- EPCGenericRTDyldMemoryManager.cpp - EPC-bbasde MemMgr -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+Expected<std::unique_ptr<EPCGenericRTDyldMemoryManager>>
+EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
+ ExecutorProcessControl &EPC) {
+ SymbolAddrs SAs;
+ if (auto Err = EPC.getBootstrapSymbols(
+ {{SAs.Instance, rt::SimpleExecutorMemoryManagerInstanceName},
+ {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
+ {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
+ {SAs.Deallocate,
+ rt::SimpleExecutorMemoryManagerDeallocateWrapperName},
+ {SAs.RegisterEHFrame,
+ rt::RegisterEHFrameSectionCustomDirectWrapperName},
+ {SAs.DeregisterEHFrame,
+ rt::DeregisterEHFrameSectionCustomDirectWrapperName}}))
+ return std::move(Err);
+ return std::make_unique<EPCGenericRTDyldMemoryManager>(EPC, std::move(SAs));
+}
+
+EPCGenericRTDyldMemoryManager::EPCGenericRTDyldMemoryManager(
+ ExecutorProcessControl &EPC, SymbolAddrs SAs)
+ : EPC(EPC), SAs(std::move(SAs)) {
+ LLVM_DEBUG(dbgs() << "Created remote allocator " << (void *)this << "\n");
+}
+
+EPCGenericRTDyldMemoryManager::~EPCGenericRTDyldMemoryManager() {
+ LLVM_DEBUG(dbgs() << "Destroyed remote allocator " << (void *)this << "\n");
+ if (!ErrMsg.empty())
+ errs() << "Destroying with existing errors:\n" << ErrMsg << "\n";
+
+ Error Err = Error::success();
+ if (auto Err2 = EPC.callSPSWrapper<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>(
+ SAs.Reserve, Err, SAs.Instance, FinalizedAllocs)) {
+ // FIXME: Report errors through EPC once that functionality is available.
+ logAllUnhandledErrors(std::move(Err2), errs(), "");
+ return;
+ }
+
+ if (Err)
+ logAllUnhandledErrors(std::move(Err), errs(), "");
+}
+
+uint8_t *EPCGenericRTDyldMemoryManager::allocateCodeSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName) {
+ std::lock_guard<std::mutex> Lock(M);
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " allocating code section "
+ << SectionName << ": size = " << formatv("{0:x}", Size)
+ << " bytes, alignment = " << Alignment << "\n";
+ });
+ auto &Seg = Unmapped.back().CodeAllocs;
+ Seg.emplace_back(Size, Alignment);
+ return reinterpret_cast<uint8_t *>(
+ alignAddr(Seg.back().Contents.get(), Align(Alignment)));
+}
+
+uint8_t *EPCGenericRTDyldMemoryManager::allocateDataSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName, bool IsReadOnly) {
+ std::lock_guard<std::mutex> Lock(M);
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " allocating "
+ << (IsReadOnly ? "ro" : "rw") << "-data section " << SectionName
+ << ": size = " << formatv("{0:x}", Size) << " bytes, alignment "
+ << Alignment << ")\n";
+ });
+
+ auto &Seg =
+ IsReadOnly ? Unmapped.back().RODataAllocs : Unmapped.back().RWDataAllocs;
+
+ Seg.emplace_back(Size, Alignment);
+ return reinterpret_cast<uint8_t *>(
+ alignAddr(Seg.back().Contents.get(), Align(Alignment)));
+}
+
+void EPCGenericRTDyldMemoryManager::reserveAllocationSpace(
+ uintptr_t CodeSize, uint32_t CodeAlign, uintptr_t RODataSize,
+ uint32_t RODataAlign, uintptr_t RWDataSize, uint32_t RWDataAlign) {
+
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ // If there's already an error then bail out.
+ if (!ErrMsg.empty())
+ return;
+
+ if (!isPowerOf2_32(CodeAlign) || CodeAlign > EPC.getPageSize()) {
+ ErrMsg = "Invalid code alignment in reserveAllocationSpace";
+ return;
+ }
+ if (!isPowerOf2_32(RODataAlign) || RODataAlign > EPC.getPageSize()) {
+ ErrMsg = "Invalid ro-data alignment in reserveAllocationSpace";
+ return;
+ }
+ if (!isPowerOf2_32(RWDataAlign) || RWDataAlign > EPC.getPageSize()) {
+ ErrMsg = "Invalid rw-data alignment in reserveAllocationSpace";
+ return;
+ }
+ }
+
+ uint64_t TotalSize = 0;
+ TotalSize += alignTo(CodeSize, EPC.getPageSize());
+ TotalSize += alignTo(RODataSize, EPC.getPageSize());
+ TotalSize += alignTo(RWDataSize, EPC.getPageSize());
+
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " reserving "
+ << formatv("{0:x}", TotalSize) << " bytes.\n";
+ });
+
+ Expected<ExecutorAddr> TargetAllocAddr((ExecutorAddr()));
+ if (auto Err = EPC.callSPSWrapper<
+ rt::SPSSimpleExecutorMemoryManagerReserveSignature>(
+ SAs.Reserve, TargetAllocAddr, SAs.Instance, TotalSize)) {
+ std::lock_guard<std::mutex> Lock(M);
+ ErrMsg = toString(std::move(Err));
+ return;
+ }
+ if (!TargetAllocAddr) {
+ std::lock_guard<std::mutex> Lock(M);
+ ErrMsg = toString(TargetAllocAddr.takeError());
+ return;
+ }
+
+ std::lock_guard<std::mutex> Lock(M);
+ Unmapped.push_back(AllocGroup());
+ Unmapped.back().RemoteCode = {
+ *TargetAllocAddr, ExecutorAddrDiff(alignTo(CodeSize, EPC.getPageSize()))};
+ Unmapped.back().RemoteROData = {
+ Unmapped.back().RemoteCode.End,
+ ExecutorAddrDiff(alignTo(RODataSize, EPC.getPageSize()))};
+ Unmapped.back().RemoteRWData = {
+ Unmapped.back().RemoteROData.End,
+ ExecutorAddrDiff(alignTo(RWDataSize, EPC.getPageSize()))};
+}
+
+bool EPCGenericRTDyldMemoryManager::needsToReserveAllocationSpace() {
+ return true;
+}
+
+void EPCGenericRTDyldMemoryManager::registerEHFrames(uint8_t *Addr,
+ uint64_t LoadAddr,
+ size_t Size) {
+ LLVM_DEBUG({
+ dbgs() << "Allocator " << (void *)this << " added unfinalized eh-frame "
+ << formatv("[ {0:x} {1:x} ]", LoadAddr, LoadAddr + Size) << "\n";
+ });
+ std::lock_guard<std::mutex> Lock(M);
+ // Bail out early if there's already an error.
+ if (!ErrMsg.empty())
+ return;
+
+ ExecutorAddr LA(LoadAddr);
+ for (auto &Alloc : llvm::reverse(Unfinalized)) {
+ if (Alloc.RemoteCode.contains(LA) || Alloc.RemoteROData.contains(LA) ||
+ Alloc.RemoteRWData.contains(LA)) {
+ Alloc.UnfinalizedEHFrames.push_back({LA, Size});
+ return;
+ }
+ }
+ ErrMsg = "eh-frame does not lie inside unfinalized alloc";
+}
+
+void EPCGenericRTDyldMemoryManager::deregisterEHFrames() {
+ // This is a no-op for us: We've registered a deallocation action for it.
+}
+
+void EPCGenericRTDyldMemoryManager::notifyObjectLoaded(
+ RuntimeDyld &Dyld, const object::ObjectFile &Obj) {
+ std::lock_guard<std::mutex> Lock(M);
+ LLVM_DEBUG(dbgs() << "Allocator " << (void *)this << " applied mappings:\n");
+ for (auto &ObjAllocs : Unmapped) {
+ mapAllocsToRemoteAddrs(Dyld, ObjAllocs.CodeAllocs,
+ ObjAllocs.RemoteCode.Start);
+ mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RODataAllocs,
+ ObjAllocs.RemoteROData.Start);
+ mapAllocsToRemoteAddrs(Dyld, ObjAllocs.RWDataAllocs,
+ ObjAllocs.RemoteRWData.Start);
+ Unfinalized.push_back(std::move(ObjAllocs));
+ }
+ Unmapped.clear();
+}
+
+bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
+ LLVM_DEBUG(dbgs() << "Allocator " << (void *)this << " finalizing:\n");
+
+ // If there's an error then bail out here.
+ std::vector<AllocGroup> Allocs;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ if (ErrMsg && !this->ErrMsg.empty()) {
+ *ErrMsg = std::move(this->ErrMsg);
+ return true;
+ }
+ std::swap(Allocs, Unfinalized);
+ }
+
+ // Loop over unfinalized objects to make finalization requests.
+ for (auto &ObjAllocs : Allocs) {
+
+ tpctypes::WireProtectionFlags SegProts[3] = {
+ tpctypes::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC)),
+ tpctypes::toWireProtectionFlags(sys::Memory::MF_READ),
+ tpctypes::toWireProtectionFlags(
+ static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE))};
+
+ ExecutorAddrRange *RemoteAddrs[3] = {&ObjAllocs.RemoteCode,
+ &ObjAllocs.RemoteROData,
+ &ObjAllocs.RemoteRWData};
+
+ std::vector<Alloc> *SegSections[3] = {&ObjAllocs.CodeAllocs,
+ &ObjAllocs.RODataAllocs,
+ &ObjAllocs.RWDataAllocs};
+
+ tpctypes::FinalizeRequest FR;
+ std::unique_ptr<char[]> AggregateContents[3];
+
+ for (unsigned I = 0; I != 3; ++I) {
+ FR.Segments.push_back({});
+ auto &Seg = FR.Segments.back();
+ Seg.Prot = SegProts[I];
+ Seg.Addr = RemoteAddrs[I]->Start;
+ for (auto &SecAlloc : *SegSections[I]) {
+ Seg.Size = alignTo(Seg.Size, SecAlloc.Align);
+ Seg.Size += SecAlloc.Size;
+ }
+ AggregateContents[I] = std::make_unique<char[]>(Seg.Size);
+ size_t SecOffset = 0;
+ for (auto &SecAlloc : *SegSections[I]) {
+ SecOffset = alignTo(SecOffset, SecAlloc.Align);
+ memcpy(&AggregateContents[I][SecOffset],
+ reinterpret_cast<const char *>(
+ alignAddr(SecAlloc.Contents.get(), Align(SecAlloc.Align))),
+ SecAlloc.Size);
+ SecOffset += SecAlloc.Size;
+ // FIXME: Can we reset SecAlloc.Content here, now that it's copied into
+ // the aggregated content?
+ }
+ Seg.Content = {AggregateContents[I].get(), SecOffset};
+ }
+
+ for (auto &Frame : ObjAllocs.UnfinalizedEHFrames)
+ FR.Actions.push_back(
+ {{SAs.RegisterEHFrame,
+ {ExecutorAddr(Frame.Addr), ExecutorAddrDiff(Frame.Size)}},
+ {SAs.DeregisterEHFrame,
+ {ExecutorAddr(Frame.Addr), ExecutorAddrDiff(Frame.Size)}}});
+
+ // We'll also need to make an extra allocation for the eh-frame wrapper call
+ // arguments.
+ Error FinalizeErr = Error::success();
+ if (auto Err = EPC.callSPSWrapper<
+ rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>(
+ SAs.Finalize, FinalizeErr, SAs.Instance, std::move(FR))) {
+ std::lock_guard<std::mutex> Lock(M);
+ this->ErrMsg = toString(std::move(Err));
+ dbgs() << "Serialization error: " << this->ErrMsg << "\n";
+ if (ErrMsg)
+ *ErrMsg = this->ErrMsg;
+ return true;
+ }
+ if (FinalizeErr) {
+ std::lock_guard<std::mutex> Lock(M);
+ this->ErrMsg = toString(std::move(FinalizeErr));
+ dbgs() << "Finalization error: " << this->ErrMsg << "\n";
+ if (ErrMsg)
+ *ErrMsg = this->ErrMsg;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void EPCGenericRTDyldMemoryManager::mapAllocsToRemoteAddrs(
+ RuntimeDyld &Dyld, std::vector<Alloc> &Allocs, ExecutorAddr NextAddr) {
+ for (auto &Alloc : Allocs) {
+ NextAddr.setValue(alignTo(NextAddr.getValue(), Alloc.Align));
+ LLVM_DEBUG({
+ dbgs() << " " << static_cast<void *>(Alloc.Contents.get()) << " -> "
+ << format("0x%016" PRIx64, NextAddr.getValue()) << "\n";
+ });
+ Dyld.mapSectionAddress(reinterpret_cast<const void *>(alignAddr(
+ Alloc.Contents.get(), Align(Alloc.Align))),
+ NextAddr.getValue());
+ Alloc.RemoteAddr = NextAddr;
+ // Only advance NextAddr if it was non-null to begin with,
+ // otherwise leave it as null.
+ if (NextAddr)
+ NextAddr += ExecutorAddrDiff(Alloc.Size);
+ }
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index b9c70b0aeb3c..818b6b52ff83 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -43,12 +43,12 @@ public:
protected:
Error grow() override;
- using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+ using FinalizedAlloc = jitlink::JITLinkMemoryManager::FinalizedAlloc;
EPCIndirectionUtils &EPCIU;
unsigned TrampolineSize = 0;
unsigned TrampolinesPerPage = 0;
- std::vector<std::unique_ptr<Allocation>> TrampolineBlocks;
+ std::vector<FinalizedAlloc> TrampolineBlocks;
};
class EPCIndirectStubsManager : public IndirectStubsManager,
@@ -89,12 +89,19 @@ EPCTrampolinePool::EPCTrampolinePool(EPCIndirectionUtils &EPCIU)
Error EPCTrampolinePool::deallocatePool() {
Error Err = Error::success();
- for (auto &Alloc : TrampolineBlocks)
- Err = joinErrors(std::move(Err), Alloc->deallocate());
- return Err;
+ std::promise<MSVCPError> DeallocResultP;
+ auto DeallocResultF = DeallocResultP.get_future();
+
+ EPCIU.getExecutorProcessControl().getMemMgr().deallocate(
+ std::move(TrampolineBlocks),
+ [&](Error Err) { DeallocResultP.set_value(std::move(Err)); });
+
+ return DeallocResultF.get();
}
Error EPCTrampolinePool::grow() {
+ using namespace jitlink;
+
assert(AvailableTrampolines.empty() &&
"Grow called with trampolines still available");
@@ -102,34 +109,26 @@ Error EPCTrampolinePool::grow() {
assert(ResolverAddress && "Resolver address can not be null");
auto &EPC = EPCIU.getExecutorProcessControl();
- constexpr auto TrampolinePagePermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
auto PageSize = EPC.getPageSize();
- jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
- Request[TrampolinePagePermissions] = {PageSize, static_cast<size_t>(PageSize),
- 0};
- auto Alloc = EPC.getMemMgr().allocate(nullptr, Request);
-
+ auto Alloc = SimpleSegmentAlloc::Create(
+ EPC.getMemMgr(), nullptr,
+ {{MemProt::Read | MemProt::Exec, {PageSize, Align(PageSize)}}});
if (!Alloc)
return Alloc.takeError();
unsigned NumTrampolines = TrampolinesPerPage;
- auto WorkingMemory = (*Alloc)->getWorkingMemory(TrampolinePagePermissions);
- auto TargetAddress = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
-
- EPCIU.getABISupport().writeTrampolines(WorkingMemory.data(), TargetAddress,
- ResolverAddress, NumTrampolines);
-
- auto TargetAddr = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
+ auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
+ EPCIU.getABISupport().writeTrampolines(
+ SegInfo.WorkingMem.data(), SegInfo.Addr, ResolverAddress, NumTrampolines);
for (unsigned I = 0; I < NumTrampolines; ++I)
- AvailableTrampolines.push_back(TargetAddr + (I * TrampolineSize));
+ AvailableTrampolines.push_back(SegInfo.Addr + (I * TrampolineSize));
- if (auto Err = (*Alloc)->finalize())
- return Err;
+ auto FA = Alloc->finalize();
+ if (!FA)
+ return FA.takeError();
- TrampolineBlocks.push_back(std::move(*Alloc));
+ TrampolineBlocks.push_back(std::move(*FA));
return Error::success();
}
@@ -162,16 +161,18 @@ Error EPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt32Write> PtrUpdates;
for (auto &SI : StubInits)
- PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
- static_cast<uint32_t>(SI.second.first)});
+ PtrUpdates.push_back(
+ {ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
+ static_cast<uint32_t>(SI.second.first)});
return MemAccess.writeUInt32s(PtrUpdates);
}
case 8: {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt64Write> PtrUpdates;
for (auto &SI : StubInits)
- PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
- static_cast<uint64_t>(SI.second.first)});
+ PtrUpdates.push_back(
+ {ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
+ static_cast<uint64_t>(SI.second.first)});
return MemAccess.writeUInt64s(PtrUpdates);
}
default:
@@ -213,11 +214,11 @@ Error EPCIndirectStubsManager::updatePointer(StringRef Name,
auto &MemAccess = EPCIU.getExecutorProcessControl().getMemoryAccess();
switch (EPCIU.getABISupport().getPointerSize()) {
case 4: {
- tpctypes::UInt32Write PUpdate(PtrAddr, NewAddr);
+ tpctypes::UInt32Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
return MemAccess.writeUInt32s(PUpdate);
}
case 8: {
- tpctypes::UInt64Write PUpdate(PtrAddr, NewAddr);
+ tpctypes::UInt64Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
return MemAccess.writeUInt64s(PUpdate);
}
default:
@@ -267,17 +268,17 @@ EPCIndirectionUtils::Create(ExecutorProcessControl &EPC) {
}
Error EPCIndirectionUtils::cleanup() {
- Error Err = Error::success();
- for (auto &A : IndirectStubAllocs)
- Err = joinErrors(std::move(Err), A->deallocate());
+ auto &MemMgr = EPC.getMemMgr();
+ auto Err = MemMgr.deallocate(std::move(IndirectStubAllocs));
if (TP)
Err = joinErrors(std::move(Err),
static_cast<EPCTrampolinePool &>(*TP).deallocatePool());
if (ResolverBlock)
- Err = joinErrors(std::move(Err), ResolverBlock->deallocate());
+ Err =
+ joinErrors(std::move(Err), MemMgr.deallocate(std::move(ResolverBlock)));
return Err;
}
@@ -285,29 +286,29 @@ Error EPCIndirectionUtils::cleanup() {
Expected<JITTargetAddress>
EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
JITTargetAddress ReentryCtxAddr) {
+ using namespace jitlink;
+
assert(ABI && "ABI can not be null");
- constexpr auto ResolverBlockPermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
auto ResolverSize = ABI->getResolverCodeSize();
- jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
- Request[ResolverBlockPermissions] = {EPC.getPageSize(),
- static_cast<size_t>(ResolverSize), 0};
- auto Alloc = EPC.getMemMgr().allocate(nullptr, Request);
+ auto Alloc =
+ SimpleSegmentAlloc::Create(EPC.getMemMgr(), nullptr,
+ {{MemProt::Read | MemProt::Exec,
+ {ResolverSize, Align(EPC.getPageSize())}}});
+
if (!Alloc)
return Alloc.takeError();
- auto WorkingMemory = (*Alloc)->getWorkingMemory(ResolverBlockPermissions);
- ResolverBlockAddr = (*Alloc)->getTargetMemory(ResolverBlockPermissions);
- ABI->writeResolverCode(WorkingMemory.data(), ResolverBlockAddr, ReentryFnAddr,
+ auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
+ ABI->writeResolverCode(SegInfo.WorkingMem.data(), SegInfo.Addr, ReentryFnAddr,
ReentryCtxAddr);
- if (auto Err = (*Alloc)->finalize())
- return std::move(Err);
+ auto FA = Alloc->finalize();
+ if (!FA)
+ return FA.takeError();
- ResolverBlock = std::move(*Alloc);
- return ResolverBlockAddr;
+ ResolverBlock = std::move(*FA);
+ return SegInfo.Addr;
}
std::unique_ptr<IndirectStubsManager>
@@ -341,6 +342,7 @@ EPCIndirectionUtils::EPCIndirectionUtils(ExecutorProcessControl &EPC,
Expected<EPCIndirectionUtils::IndirectStubInfoVector>
EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
+ using namespace jitlink;
std::lock_guard<std::mutex> Lock(EPCUIMutex);
@@ -350,42 +352,40 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
auto PageSize = EPC.getPageSize();
auto StubBytes = alignTo(NumStubsToAllocate * ABI->getStubSize(), PageSize);
NumStubsToAllocate = StubBytes / ABI->getStubSize();
- auto PointerBytes =
+ auto PtrBytes =
alignTo(NumStubsToAllocate * ABI->getPointerSize(), PageSize);
- constexpr auto StubPagePermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_EXEC);
- constexpr auto PointerPagePermissions =
- static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
- sys::Memory::MF_WRITE);
-
- jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
- Request[StubPagePermissions] = {PageSize, static_cast<size_t>(StubBytes),
- 0};
- Request[PointerPagePermissions] = {PageSize, 0, PointerBytes};
- auto Alloc = EPC.getMemMgr().allocate(nullptr, Request);
+ auto StubProt = MemProt::Read | MemProt::Exec;
+ auto PtrProt = MemProt::Read | MemProt::Write;
+
+ auto Alloc = SimpleSegmentAlloc::Create(
+ EPC.getMemMgr(), nullptr,
+ {{StubProt, {static_cast<size_t>(StubBytes), Align(PageSize)}},
+ {PtrProt, {static_cast<size_t>(PtrBytes), Align(PageSize)}}});
+
if (!Alloc)
return Alloc.takeError();
- auto StubTargetAddr = (*Alloc)->getTargetMemory(StubPagePermissions);
- auto PointerTargetAddr = (*Alloc)->getTargetMemory(PointerPagePermissions);
+ auto StubSeg = Alloc->getSegInfo(StubProt);
+ auto PtrSeg = Alloc->getSegInfo(PtrProt);
+
+ ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), StubSeg.Addr,
+ PtrSeg.Addr, NumStubsToAllocate);
- ABI->writeIndirectStubsBlock(
- (*Alloc)->getWorkingMemory(StubPagePermissions).data(), StubTargetAddr,
- PointerTargetAddr, NumStubsToAllocate);
+ auto FA = Alloc->finalize();
+ if (!FA)
+ return FA.takeError();
- if (auto Err = (*Alloc)->finalize())
- return std::move(Err);
+ IndirectStubAllocs.push_back(std::move(*FA));
+ auto StubExecutorAddr = StubSeg.Addr;
+ auto PtrExecutorAddr = PtrSeg.Addr;
for (unsigned I = 0; I != NumStubsToAllocate; ++I) {
AvailableIndirectStubs.push_back(
- IndirectStubInfo(StubTargetAddr, PointerTargetAddr));
- StubTargetAddr += ABI->getStubSize();
- PointerTargetAddr += ABI->getPointerSize();
+ IndirectStubInfo(StubExecutorAddr, PtrExecutorAddr));
+ StubExecutorAddr += ABI->getStubSize();
+ PtrExecutorAddr += ABI->getPointerSize();
}
-
- IndirectStubAllocs.push_back(std::move(*Alloc));
}
assert(NumStubs <= AvailableIndirectStubs.size() &&
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 7a76a6ccc122..2ab9ed4f856b 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -12,9 +12,9 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <string>
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
index 7d86d125d1db..2eb835551adb 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
@@ -24,20 +24,22 @@ ExecutorProcessControl::MemoryAccess::~MemoryAccess() {}
ExecutorProcessControl::~ExecutorProcessControl() {}
SelfExecutorProcessControl::SelfExecutorProcessControl(
- std::shared_ptr<SymbolStringPool> SSP, Triple TargetTriple,
- unsigned PageSize, std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr)
- : ExecutorProcessControl(std::move(SSP)) {
+ std::shared_ptr<SymbolStringPool> SSP, std::unique_ptr<TaskDispatcher> D,
+ Triple TargetTriple, unsigned PageSize,
+ std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr)
+ : ExecutorProcessControl(std::move(SSP), std::move(D)) {
OwnedMemMgr = std::move(MemMgr);
if (!OwnedMemMgr)
- OwnedMemMgr = std::make_unique<jitlink::InProcessMemoryManager>();
+ OwnedMemMgr = std::make_unique<jitlink::InProcessMemoryManager>(
+ sys::Process::getPageSizeEstimate());
this->TargetTriple = std::move(TargetTriple);
this->PageSize = PageSize;
this->MemMgr = OwnedMemMgr.get();
this->MemAccess = this;
- this->JDI = {ExecutorAddress::fromPtr(jitDispatchViaWrapperFunctionManager),
- ExecutorAddress::fromPtr(this)};
+ this->JDI = {ExecutorAddr::fromPtr(jitDispatchViaWrapperFunctionManager),
+ ExecutorAddr::fromPtr(this)};
if (this->TargetTriple.isOSBinFormatMachO())
GlobalManglingPrefix = '_';
}
@@ -45,11 +47,20 @@ SelfExecutorProcessControl::SelfExecutorProcessControl(
Expected<std::unique_ptr<SelfExecutorProcessControl>>
SelfExecutorProcessControl::Create(
std::shared_ptr<SymbolStringPool> SSP,
+ std::unique_ptr<TaskDispatcher> D,
std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr) {
if (!SSP)
SSP = std::make_shared<SymbolStringPool>();
+ if (!D) {
+#if LLVM_ENABLE_THREADS
+ D = std::make_unique<DynamicThreadPoolTaskDispatcher>();
+#else
+ D = std::make_unique<InPlaceTaskDispatcher>();
+#endif
+ }
+
auto PageSize = sys::Process::getPageSize();
if (!PageSize)
return PageSize.takeError();
@@ -57,7 +68,8 @@ SelfExecutorProcessControl::Create(
Triple TT(sys::getProcessTriple());
return std::make_unique<SelfExecutorProcessControl>(
- std::move(SSP), std::move(TT), *PageSize, std::move(MemMgr));
+ std::move(SSP), std::move(D), std::move(TT), *PageSize,
+ std::move(MemMgr));
}
Expected<tpctypes::DylibHandle>
@@ -93,7 +105,7 @@ SelfExecutorProcessControl::lookupSymbols(ArrayRef<LookupRequest> Request) {
// FIXME: Collect all failing symbols before erroring out.
SymbolNameVector MissingSymbols;
MissingSymbols.push_back(Sym);
- return make_error<SymbolsNotFound>(std::move(MissingSymbols));
+ return make_error<SymbolsNotFound>(SSP, std::move(MissingSymbols));
}
R.back().push_back(pointerToJITTargetAddress(Addr));
}
@@ -103,60 +115,62 @@ SelfExecutorProcessControl::lookupSymbols(ArrayRef<LookupRequest> Request) {
}
Expected<int32_t>
-SelfExecutorProcessControl::runAsMain(JITTargetAddress MainFnAddr,
+SelfExecutorProcessControl::runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) {
using MainTy = int (*)(int, char *[]);
- return orc::runAsMain(jitTargetAddressToFunction<MainTy>(MainFnAddr), Args);
+ return orc::runAsMain(MainFnAddr.toPtr<MainTy>(), Args);
}
-void SelfExecutorProcessControl::callWrapperAsync(
- SendResultFunction SendResult, JITTargetAddress WrapperFnAddr,
- ArrayRef<char> ArgBuffer) {
+void SelfExecutorProcessControl::callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler SendResult,
+ ArrayRef<char> ArgBuffer) {
using WrapperFnTy =
- shared::detail::CWrapperFunctionResult (*)(const char *Data, size_t Size);
- auto *WrapperFn = jitTargetAddressToFunction<WrapperFnTy>(WrapperFnAddr);
+ shared::CWrapperFunctionResult (*)(const char *Data, size_t Size);
+ auto *WrapperFn = WrapperFnAddr.toPtr<WrapperFnTy>();
SendResult(WrapperFn(ArgBuffer.data(), ArgBuffer.size()));
}
-Error SelfExecutorProcessControl::disconnect() { return Error::success(); }
+Error SelfExecutorProcessControl::disconnect() {
+ D->shutdown();
+ return Error::success();
+}
-void SelfExecutorProcessControl::writeUInt8s(ArrayRef<tpctypes::UInt8Write> Ws,
- WriteResultFn OnWriteComplete) {
+void SelfExecutorProcessControl::writeUInt8sAsync(
+ ArrayRef<tpctypes::UInt8Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint8_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint8_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt16s(
+void SelfExecutorProcessControl::writeUInt16sAsync(
ArrayRef<tpctypes::UInt16Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint16_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint16_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt32s(
+void SelfExecutorProcessControl::writeUInt32sAsync(
ArrayRef<tpctypes::UInt32Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint32_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint32_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeUInt64s(
+void SelfExecutorProcessControl::writeUInt64sAsync(
ArrayRef<tpctypes::UInt64Write> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- *jitTargetAddressToPointer<uint64_t *>(W.Address) = W.Value;
+ *W.Addr.toPtr<uint64_t *>() = W.Value;
OnWriteComplete(Error::success());
}
-void SelfExecutorProcessControl::writeBuffers(
+void SelfExecutorProcessControl::writeBuffersAsync(
ArrayRef<tpctypes::BufferWrite> Ws, WriteResultFn OnWriteComplete) {
for (auto &W : Ws)
- memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
- W.Buffer.size());
+ memcpy(W.Addr.toPtr<char *>(), W.Buffer.data(), W.Buffer.size());
OnWriteComplete(Error::success());
}
-shared::detail::CWrapperFunctionResult
+shared::CWrapperFunctionResult
SelfExecutorProcessControl::jitDispatchViaWrapperFunctionManager(
void *Ctx, const void *FnTag, const char *Data, size_t Size) {
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index e8dd1bb90c9a..ee1630a2ffa8 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -9,12 +9,17 @@
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/Support/Format.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <sstream>
+#define DEBUG_TYPE "orc"
+
using namespace llvm;
using namespace llvm::orc;
@@ -372,5 +377,77 @@ void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
Dst.addModuleFlag(MapMetadata(MF, VMap));
}
+Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
+ jitlink::LinkGraph &G,
+ MCDisassembler &Disassembler,
+ MCInstrAnalysis &MIA) {
+ // AArch64 appears to already come with the necessary relocations. Among other
+ // architectures, only x86_64 is currently implemented here.
+ if (G.getTargetTriple().getArch() != Triple::x86_64)
+ return Error::success();
+
+ raw_null_ostream CommentStream;
+ auto &STI = Disassembler.getSubtargetInfo();
+
+ // Determine the function bounds
+ auto &B = Sym.getBlock();
+ assert(!B.isZeroFill() && "expected content block");
+ auto SymAddress = Sym.getAddress();
+ auto SymStartInBlock =
+ (const uint8_t *)B.getContent().data() + Sym.getOffset();
+ auto SymSize = Sym.getSize() ? Sym.getSize() : B.getSize() - Sym.getOffset();
+ auto Content = makeArrayRef(SymStartInBlock, SymSize);
+
+ LLVM_DEBUG(dbgs() << "Adding self-relocations to " << Sym.getName() << "\n");
+
+ SmallDenseSet<uintptr_t, 8> ExistingRelocations;
+ for (auto &E : B.edges()) {
+ if (E.isRelocation())
+ ExistingRelocations.insert(E.getOffset());
+ }
+
+ size_t I = 0;
+ while (I < Content.size()) {
+ MCInst Instr;
+ uint64_t InstrSize = 0;
+ uint64_t InstrStart = SymAddress + I;
+ auto DecodeStatus = Disassembler.getInstruction(
+ Instr, InstrSize, Content.drop_front(I), InstrStart, CommentStream);
+ if (DecodeStatus != MCDisassembler::Success) {
+ LLVM_DEBUG(dbgs() << "Aborting due to disassembly failure at address "
+ << InstrStart);
+ return make_error<StringError>(
+ formatv("failed to disassemble at address {0:x16}", InstrStart),
+ inconvertibleErrorCode());
+ }
+ // Advance to the next instruction.
+ I += InstrSize;
+
+ // Check for a PC-relative address equal to the symbol itself.
+ auto PCRelAddr =
+ MIA.evaluateMemoryOperandAddress(Instr, &STI, InstrStart, InstrSize);
+ if (!PCRelAddr.hasValue() || PCRelAddr.getValue() != SymAddress)
+ continue;
+
+ auto RelocOffInInstr =
+ MIA.getMemoryOperandRelocationOffset(Instr, InstrSize);
+ if (!RelocOffInInstr.hasValue() ||
+ InstrSize - RelocOffInInstr.getValue() != 4) {
+ LLVM_DEBUG(dbgs() << "Skipping unknown self-relocation at "
+ << InstrStart);
+ continue;
+ }
+
+ auto RelocOffInBlock =
+ InstrStart + *RelocOffInInstr - SymAddress + Sym.getOffset();
+ if (ExistingRelocations.contains(RelocOffInBlock))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Adding delta32 self-relocation at " << InstrStart);
+ B.addEdge(jitlink::x86_64::Delta32, RelocOffInBlock, Sym, /*Addend=*/-4);
+ }
+ return Error::success();
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index 4257137a2212..0fbf79b8a56d 100644
--- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -8,8 +8,8 @@
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index 2ac32293e4db..0ab0d7d2e2b6 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -105,16 +105,18 @@ private:
/// llvm.global_ctors.
class GlobalCtorDtorScraper {
public:
-
GlobalCtorDtorScraper(GenericLLVMIRPlatformSupport &PS,
- StringRef InitFunctionPrefix)
- : PS(PS), InitFunctionPrefix(InitFunctionPrefix) {}
+ StringRef InitFunctionPrefix,
+ StringRef DeInitFunctionPrefix)
+ : PS(PS), InitFunctionPrefix(InitFunctionPrefix),
+ DeInitFunctionPrefix(DeInitFunctionPrefix) {}
Expected<ThreadSafeModule> operator()(ThreadSafeModule TSM,
MaterializationResponsibility &R);
private:
GenericLLVMIRPlatformSupport &PS;
StringRef InitFunctionPrefix;
+ StringRef DeInitFunctionPrefix;
};
/// Generic IR Platform Support
@@ -125,12 +127,14 @@ private:
class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport {
public:
GenericLLVMIRPlatformSupport(LLJIT &J)
- : J(J), InitFunctionPrefix(J.mangle("__orc_init_func.")) {
+ : J(J), InitFunctionPrefix(J.mangle("__orc_init_func.")),
+ DeInitFunctionPrefix(J.mangle("__orc_deinit_func.")) {
getExecutionSession().setPlatform(
std::make_unique<GenericLLVMIRPlatform>(*this));
- setInitTransform(J, GlobalCtorDtorScraper(*this, InitFunctionPrefix));
+ setInitTransform(J, GlobalCtorDtorScraper(*this, InitFunctionPrefix,
+ DeInitFunctionPrefix));
SymbolMap StdInterposes;
@@ -203,6 +207,8 @@ public:
InitSymbols[&JD].add(KV.first,
SymbolLookupFlags::WeaklyReferencedSymbol);
InitFunctions[&JD].add(KV.first);
+ } else if ((*KV.first).startswith(DeInitFunctionPrefix)) {
+ DeInitFunctions[&JD].add(KV.first);
}
}
return Error::success();
@@ -256,6 +262,11 @@ public:
});
}
+ void registerDeInitFunc(JITDylib &JD, SymbolStringPtr DeInitName) {
+ getExecutionSession().runSessionLocked(
+ [&]() { DeInitFunctions[&JD].add(DeInitName); });
+ }
+
private:
Expected<std::vector<JITTargetAddress>> getInitializers(JITDylib &JD) {
@@ -438,6 +449,7 @@ private:
LLJIT &J;
std::string InitFunctionPrefix;
+ std::string DeInitFunctionPrefix;
DenseMap<JITDylib *, SymbolLookupSet> InitSymbols;
DenseMap<JITDylib *, SymbolLookupSet> InitFunctions;
DenseMap<JITDylib *, SymbolLookupSet> DeInitFunctions;
@@ -459,40 +471,63 @@ GlobalCtorDtorScraper::operator()(ThreadSafeModule TSM,
auto Err = TSM.withModuleDo([&](Module &M) -> Error {
auto &Ctx = M.getContext();
auto *GlobalCtors = M.getNamedGlobal("llvm.global_ctors");
-
- // If there's no llvm.global_ctors or it's just a decl then skip.
- if (!GlobalCtors || GlobalCtors->isDeclaration())
+ auto *GlobalDtors = M.getNamedGlobal("llvm.global_dtors");
+
+ auto RegisterCOrDtors = [&](GlobalVariable *GlobalCOrDtors,
+ bool isCtor) -> Error {
+ // If there's no llvm.global_c/dtor or it's just a decl then skip.
+ if (!GlobalCOrDtors || GlobalCOrDtors->isDeclaration())
+ return Error::success();
+ std::string InitOrDeInitFunctionName;
+ if (isCtor)
+ raw_string_ostream(InitOrDeInitFunctionName)
+ << InitFunctionPrefix << M.getModuleIdentifier();
+ else
+ raw_string_ostream(InitOrDeInitFunctionName)
+ << DeInitFunctionPrefix << M.getModuleIdentifier();
+
+ MangleAndInterner Mangle(PS.getExecutionSession(), M.getDataLayout());
+ auto InternedInitOrDeInitName = Mangle(InitOrDeInitFunctionName);
+ if (auto Err = R.defineMaterializing(
+ {{InternedInitOrDeInitName, JITSymbolFlags::Callable}}))
+ return Err;
+
+ auto *InitOrDeInitFunc = Function::Create(
+ FunctionType::get(Type::getVoidTy(Ctx), {}, false),
+ GlobalValue::ExternalLinkage, InitOrDeInitFunctionName, &M);
+ InitOrDeInitFunc->setVisibility(GlobalValue::HiddenVisibility);
+ std::vector<std::pair<Function *, unsigned>> InitsOrDeInits;
+ auto COrDtors = isCtor ? getConstructors(M) : getDestructors(M);
+
+ for (auto E : COrDtors)
+ InitsOrDeInits.push_back(std::make_pair(E.Func, E.Priority));
+ llvm::sort(InitsOrDeInits,
+ [](const std::pair<Function *, unsigned> &LHS,
+ const std::pair<Function *, unsigned> &RHS) {
+ return LHS.first < RHS.first;
+ });
+
+ auto *InitOrDeInitFuncEntryBlock =
+ BasicBlock::Create(Ctx, "entry", InitOrDeInitFunc);
+ IRBuilder<> IB(InitOrDeInitFuncEntryBlock);
+ for (auto &KV : InitsOrDeInits)
+ IB.CreateCall(KV.first);
+ IB.CreateRetVoid();
+
+ if (isCtor)
+ PS.registerInitFunc(R.getTargetJITDylib(), InternedInitOrDeInitName);
+ else
+ PS.registerDeInitFunc(R.getTargetJITDylib(), InternedInitOrDeInitName);
+
+ GlobalCOrDtors->eraseFromParent();
return Error::success();
+ };
- std::string InitFunctionName;
- raw_string_ostream(InitFunctionName)
- << InitFunctionPrefix << M.getModuleIdentifier();
-
- MangleAndInterner Mangle(PS.getExecutionSession(), M.getDataLayout());
- auto InternedName = Mangle(InitFunctionName);
- if (auto Err =
- R.defineMaterializing({{InternedName, JITSymbolFlags::Callable}}))
+ if (auto Err = RegisterCOrDtors(GlobalCtors, true))
+ return Err;
+ if (auto Err = RegisterCOrDtors(GlobalDtors, false))
return Err;
- auto *InitFunc =
- Function::Create(FunctionType::get(Type::getVoidTy(Ctx), {}, false),
- GlobalValue::ExternalLinkage, InitFunctionName, &M);
- InitFunc->setVisibility(GlobalValue::HiddenVisibility);
- std::vector<std::pair<Function *, unsigned>> Inits;
- for (auto E : getConstructors(M))
- Inits.push_back(std::make_pair(E.Func, E.Priority));
- llvm::sort(Inits, [](const std::pair<Function *, unsigned> &LHS,
- const std::pair<Function *, unsigned> &RHS) {
- return LHS.first < RHS.first;
- });
- auto *EntryBlock = BasicBlock::Create(Ctx, "entry", InitFunc);
- IRBuilder<> IB(EntryBlock);
- for (auto &KV : Inits)
- IB.CreateCall(KV.first);
- IB.CreateRetVoid();
-
- PS.registerInitFunc(R.getTargetJITDylib(), InternedName);
- GlobalCtors->eraseFromParent();
return Error::success();
});
diff --git a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
new file mode 100644
index 000000000000..44cb78c773c9
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
@@ -0,0 +1,82 @@
+//===------- LookupAndRecordAddrs.h - Symbol lookup support utility -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+
+#include <future>
+
+namespace llvm {
+namespace orc {
+
+void lookupAndRecordAddrs(
+ unique_function<void(Error)> OnRecorded, ExecutionSession &ES, LookupKind K,
+ const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags) {
+
+ SymbolLookupSet Symbols;
+ for (auto &KV : Pairs)
+ Symbols.add(KV.first, LookupFlags);
+
+ ES.lookup(
+ K, SearchOrder, Symbols, SymbolState::Ready,
+ [Pairs = std::move(Pairs),
+ OnRec = std::move(OnRecorded)](Expected<SymbolMap> Result) mutable {
+ if (!Result)
+ return OnRec(Result.takeError());
+ for (auto &KV : Pairs) {
+ auto I = Result->find(KV.first);
+ KV.second->setValue((I != Result->end()) ? I->second.getAddress()
+ : 0);
+ }
+ OnRec(Error::success());
+ },
+ NoDependenciesToRegister);
+}
+
+Error lookupAndRecordAddrs(
+ ExecutionSession &ES, LookupKind K, const JITDylibSearchOrder &SearchOrder,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags) {
+
+ std::promise<MSVCPError> ResultP;
+ auto ResultF = ResultP.get_future();
+ lookupAndRecordAddrs([&](Error Err) { ResultP.set_value(std::move(Err)); },
+ ES, K, SearchOrder, Pairs, LookupFlags);
+ return ResultF.get();
+}
+
+Error lookupAndRecordAddrs(
+ ExecutorProcessControl &EPC, tpctypes::DylibHandle H,
+ std::vector<std::pair<SymbolStringPtr, ExecutorAddr *>> Pairs,
+ SymbolLookupFlags LookupFlags) {
+
+ SymbolLookupSet Symbols;
+ for (auto &KV : Pairs)
+ Symbols.add(KV.first, LookupFlags);
+
+ ExecutorProcessControl::LookupRequest LR(H, Symbols);
+ auto Result = EPC.lookupSymbols(LR);
+ if (!Result)
+ return Result.takeError();
+
+ if (Result->size() != 1)
+ return make_error<StringError>("Error in lookup result",
+ inconvertibleErrorCode());
+ if (Result->front().size() != Pairs.size())
+ return make_error<StringError>("Error in lookup result elements",
+ inconvertibleErrorCode());
+
+ for (unsigned I = 0; I != Pairs.size(); ++I)
+ Pairs[I].second->setValue(Result->front()[I]);
+
+ return Error::success();
+}
+
+} // End namespace orc.
+} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 66ef835dc34d..46c915dfea9e 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -12,6 +12,7 @@
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/Debug.h"
@@ -52,7 +53,7 @@ public:
auto G = std::make_unique<jitlink::LinkGraph>(
"<MachOHeaderMU>", TT, PointerSize, Endianness,
jitlink::getGenericEdgeKindName);
- auto &HeaderSection = G->createSection("__header", sys::Memory::MF_READ);
+ auto &HeaderSection = G->createSection("__header", jitlink::MemProt::Read);
auto &HeaderBlock = createHeaderBlock(*G, HeaderSection);
// Init symbol is header-start symbol.
@@ -135,13 +136,14 @@ StringRef ObjCImageInfoSectionName = "__DATA,__objc_image_info";
StringRef ObjCSelRefsSectionName = "__DATA,__objc_selrefs";
StringRef Swift5ProtoSectionName = "__TEXT,__swift5_proto";
StringRef Swift5ProtosSectionName = "__TEXT,__swift5_protos";
+StringRef Swift5TypesSectionName = "__TEXT,__swift5_types";
StringRef ThreadBSSSectionName = "__DATA,__thread_bss";
StringRef ThreadDataSectionName = "__DATA,__thread_data";
StringRef ThreadVarsSectionName = "__DATA,__thread_vars";
StringRef InitSectionNames[] = {
ModInitFuncSectionName, ObjCSelRefsSectionName, ObjCClassListSectionName,
- Swift5ProtosSectionName, Swift5ProtoSectionName};
+ Swift5ProtosSectionName, Swift5ProtoSectionName, Swift5TypesSectionName};
} // end anonymous namespace
@@ -172,10 +174,10 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
// Add JIT-dispatch function support symbols.
if (auto Err = PlatformJD.define(absoluteSymbols(
{{ES.intern("___orc_rt_jit_dispatch"),
- {EPC.getJITDispatchInfo().JITDispatchFunctionAddress.getValue(),
+ {EPC.getJITDispatchInfo().JITDispatchFunction.getValue(),
JITSymbolFlags::Exported}},
{ES.intern("___orc_rt_jit_dispatch_ctx"),
- {EPC.getJITDispatchInfo().JITDispatchContextAddress.getValue(),
+ {EPC.getJITDispatchInfo().JITDispatchContext.getValue(),
JITSymbolFlags::Exported}}})))
return std::move(Err);
@@ -267,6 +269,7 @@ bool MachOPlatform::isInitializerSection(StringRef SegName,
bool MachOPlatform::supportedTarget(const Triple &TT) {
switch (TT.getArch()) {
+ case Triple::aarch64:
case Triple::x86_64:
return true;
default:
@@ -286,6 +289,19 @@ MachOPlatform::MachOPlatform(
PlatformJD.addGenerator(std::move(OrcRuntimeGenerator));
+ // Force linking of eh-frame registration functions.
+ if (auto Err2 = lookupAndRecordAddrs(
+ ES, LookupKind::Static, makeJITDylibSearchOrder(&PlatformJD),
+ {{ES.intern("___orc_rt_macho_register_ehframe_section"),
+ &orc_rt_macho_register_ehframe_section},
+ {ES.intern("___orc_rt_macho_deregister_ehframe_section"),
+ &orc_rt_macho_deregister_ehframe_section}})) {
+ Err = std::move(Err2);
+ return;
+ }
+
+ State = BootstrapPhase2;
+
// PlatformJD hasn't been 'set-up' by the platform yet (since we're creating
// the platform now), so set it up.
if (auto E2 = setupJITDylib(PlatformJD)) {
@@ -309,6 +325,8 @@ MachOPlatform::MachOPlatform(
Err = std::move(E2);
return;
}
+
+ State = Initialized;
}
Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
@@ -321,13 +339,13 @@ Error MachOPlatform::associateRuntimeSupportFunctions(JITDylib &PlatformJD) {
this, &MachOPlatform::rt_getInitializers);
using GetDeinitializersSPSSig =
- SPSExpected<SPSMachOJITDylibDeinitializerSequence>(SPSExecutorAddress);
+ SPSExpected<SPSMachOJITDylibDeinitializerSequence>(SPSExecutorAddr);
WFs[ES.intern("___orc_rt_macho_get_deinitializers_tag")] =
ES.wrapAsyncWithSPS<GetDeinitializersSPSSig>(
this, &MachOPlatform::rt_getDeinitializers);
using LookupSymbolSPSSig =
- SPSExpected<SPSExecutorAddress>(SPSExecutorAddress, SPSString);
+ SPSExpected<SPSExecutorAddr>(SPSExecutorAddr, SPSString);
WFs[ES.intern("___orc_rt_macho_symbol_lookup_tag")] =
ES.wrapAsyncWithSPS<LookupSymbolSPSSig>(this,
&MachOPlatform::rt_lookupSymbol);
@@ -411,7 +429,7 @@ void MachOPlatform::rt_getInitializers(SendInitializerSequenceFn SendResult,
}
void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
- ExecutorAddress Handle) {
+ ExecutorAddr Handle) {
LLVM_DEBUG({
dbgs() << "MachOPlatform::rt_getDeinitializers(\""
<< formatv("{0:x}", Handle.getValue()) << "\")\n";
@@ -441,8 +459,7 @@ void MachOPlatform::rt_getDeinitializers(SendDeinitializerSequenceFn SendResult,
}
void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
- ExecutorAddress Handle,
- StringRef SymbolName) {
+ ExecutorAddr Handle, StringRef SymbolName) {
LLVM_DEBUG({
dbgs() << "MachOPlatform::rt_lookupSymbol(\""
<< formatv("{0:x}", Handle.getValue()) << "\")\n";
@@ -476,7 +493,7 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
void operator()(Expected<SymbolMap> Result) {
if (Result) {
assert(Result->size() == 1 && "Unexpected result map count");
- SendResult(ExecutorAddress(Result->begin()->second.getAddress()));
+ SendResult(ExecutorAddr(Result->begin()->second.getAddress()));
} else {
SendResult(Result.takeError());
}
@@ -495,56 +512,25 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
}
Error MachOPlatform::bootstrapMachORuntime(JITDylib &PlatformJD) {
-
- std::pair<const char *, ExecutorAddress *> Symbols[] = {
- {"___orc_rt_macho_platform_bootstrap", &orc_rt_macho_platform_bootstrap},
- {"___orc_rt_macho_platform_shutdown", &orc_rt_macho_platform_shutdown},
- {"___orc_rt_macho_register_object_sections",
- &orc_rt_macho_register_object_sections},
- {"___orc_rt_macho_create_pthread_key", &orc_rt_macho_create_pthread_key}};
-
- SymbolLookupSet RuntimeSymbols;
- std::vector<std::pair<SymbolStringPtr, ExecutorAddress *>> AddrsToRecord;
- for (const auto &KV : Symbols) {
- auto Name = ES.intern(KV.first);
- RuntimeSymbols.add(Name);
- AddrsToRecord.push_back({std::move(Name), KV.second});
- }
-
- auto RuntimeSymbolAddrs = ES.lookup(
- {{&PlatformJD, JITDylibLookupFlags::MatchAllSymbols}}, RuntimeSymbols);
- if (!RuntimeSymbolAddrs)
- return RuntimeSymbolAddrs.takeError();
-
- for (const auto &KV : AddrsToRecord) {
- auto &Name = KV.first;
- assert(RuntimeSymbolAddrs->count(Name) && "Missing runtime symbol?");
- KV.second->setValue((*RuntimeSymbolAddrs)[Name].getAddress());
- }
-
- if (auto Err =
- ES.callSPSWrapper<void()>(orc_rt_macho_platform_bootstrap.getValue()))
+ if (auto Err = lookupAndRecordAddrs(
+ ES, LookupKind::Static, makeJITDylibSearchOrder(&PlatformJD),
+ {{ES.intern("___orc_rt_macho_platform_bootstrap"),
+ &orc_rt_macho_platform_bootstrap},
+ {ES.intern("___orc_rt_macho_platform_shutdown"),
+ &orc_rt_macho_platform_shutdown},
+ {ES.intern("___orc_rt_macho_register_thread_data_section"),
+ &orc_rt_macho_register_thread_data_section},
+ {ES.intern("___orc_rt_macho_deregister_thread_data_section"),
+ &orc_rt_macho_deregister_thread_data_section},
+ {ES.intern("___orc_rt_macho_create_pthread_key"),
+ &orc_rt_macho_create_pthread_key}}))
return Err;
- // FIXME: Ordering is fuzzy here. We're probably best off saying
- // "behavior is undefined if code that uses the runtime is added before
- // the platform constructor returns", then move all this to the constructor.
- RuntimeBootstrapped = true;
- std::vector<MachOPerObjectSectionsToRegister> DeferredPOSRs;
- {
- std::lock_guard<std::mutex> Lock(PlatformMutex);
- DeferredPOSRs = std::move(BootstrapPOSRs);
- }
-
- for (auto &D : DeferredPOSRs)
- if (auto Err = registerPerObjectSections(D))
- return Err;
-
- return Error::success();
+ return ES.callSPSWrapper<void()>(orc_rt_macho_platform_bootstrap);
}
Error MachOPlatform::registerInitInfo(
- JITDylib &JD, ExecutorAddress ObjCImageInfoAddr,
+ JITDylib &JD, ExecutorAddr ObjCImageInfoAddr,
ArrayRef<jitlink::Section *> InitSections) {
std::unique_lock<std::mutex> Lock(PlatformMutex);
@@ -576,29 +562,12 @@ Error MachOPlatform::registerInitInfo(
// FIXME: Avoid copy here.
jitlink::SectionRange R(*Sec);
InitSeq->InitSections[Sec->getName()].push_back(
- {ExecutorAddress(R.getStart()), ExecutorAddress(R.getEnd())});
+ {ExecutorAddr(R.getStart()), ExecutorAddr(R.getEnd())});
}
return Error::success();
}
-Error MachOPlatform::registerPerObjectSections(
- const MachOPerObjectSectionsToRegister &POSR) {
-
- if (!orc_rt_macho_register_object_sections)
- return make_error<StringError>("Attempting to register per-object "
- "sections, but runtime support has not "
- "been loaded yet",
- inconvertibleErrorCode());
-
- Error ErrResult = Error::success();
- if (auto Err = ES.callSPSWrapper<shared::SPSError(
- SPSMachOPerObjectSectionsToRegister)>(
- orc_rt_macho_register_object_sections.getValue(), ErrResult, POSR))
- return Err;
- return ErrResult;
-}
-
Expected<uint64_t> MachOPlatform::createPThreadKey() {
if (!orc_rt_macho_create_pthread_key)
return make_error<StringError>(
@@ -608,7 +577,7 @@ Expected<uint64_t> MachOPlatform::createPThreadKey() {
Expected<uint64_t> Result(0);
if (auto Err = ES.callSPSWrapper<SPSExpected<uint64_t>(void)>(
- orc_rt_macho_create_pthread_key.getValue(), Result))
+ orc_rt_macho_create_pthread_key, Result))
return std::move(Err);
return Result;
}
@@ -617,21 +586,55 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
MaterializationResponsibility &MR, jitlink::LinkGraph &LG,
jitlink::PassConfiguration &Config) {
- // If the initializer symbol is the MachOHeader start symbol then just add
- // the macho header support passes.
- if (MR.getInitializerSymbol() == MP.MachOHeaderStartSymbol) {
- addMachOHeaderSupportPasses(MR, Config);
- // The header materialization unit doesn't require any other support, so we
- // can bail out early.
+ auto PS = MP.State.load();
+
+ // --- Handle Initializers ---
+ if (auto InitSymbol = MR.getInitializerSymbol()) {
+
+ // If the initializer symbol is the MachOHeader start symbol then just
+ // register it and then bail out -- the header materialization unit
+ // definitely doesn't need any other passes.
+ if (InitSymbol == MP.MachOHeaderStartSymbol) {
+ Config.PostAllocationPasses.push_back([this, &MR](jitlink::LinkGraph &G) {
+ return associateJITDylibHeaderSymbol(G, MR);
+ });
+ return;
+ }
+
+ // If the object contains an init symbol other than the header start symbol
+ // then add passes to preserve, process and register the init
+ // sections/symbols.
+ Config.PrePrunePasses.push_back([this, &MR](jitlink::LinkGraph &G) {
+ if (auto Err = preserveInitSections(G, MR))
+ return Err;
+ return processObjCImageInfo(G, MR);
+ });
+
+ Config.PostFixupPasses.push_back(
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return registerInitSections(G, JD);
+ });
+ }
+
+ // --- Add passes for eh-frame and TLV support ---
+ if (PS == MachOPlatform::BootstrapPhase1) {
+ Config.PostFixupPasses.push_back(
+ [this](jitlink::LinkGraph &G) { return registerEHSectionsPhase1(G); });
return;
}
- // If the object contains initializers then add passes to record them.
- if (MR.getInitializerSymbol())
- addInitializerSupportPasses(MR, Config);
+ // Insert TLV lowering at the start of the PostPrunePasses, since we want
+ // it to run before GOT/PLT lowering.
+ Config.PostPrunePasses.insert(
+ Config.PostPrunePasses.begin(),
+ [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
+ return fixTLVSectionsAndEdges(G, JD);
+ });
- // Add passes for eh-frame and TLV support.
- addEHAndTLVSupportPasses(MR, Config);
+ // Add a pass to register the final addresses of the eh-frame and TLV sections
+ // with the runtime.
+ Config.PostFixupPasses.push_back(
+ [this](jitlink::LinkGraph &G) { return registerEHAndTLVSections(G); });
}
ObjectLinkingLayer::Plugin::SyntheticSymbolDependenciesMap
@@ -648,111 +651,22 @@ MachOPlatform::MachOPlatformPlugin::getSyntheticSymbolDependencies(
return SyntheticSymbolDependenciesMap();
}
-void MachOPlatform::MachOPlatformPlugin::addInitializerSupportPasses(
- MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
-
- /// Preserve init sections.
- Config.PrePrunePasses.push_back([this, &MR](jitlink::LinkGraph &G) {
- if (auto Err = preserveInitSections(G, MR))
- return Err;
- return processObjCImageInfo(G, MR);
- });
-
- Config.PostFixupPasses.push_back(
- [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
- return registerInitSections(G, JD);
- });
-}
-
-void MachOPlatform::MachOPlatformPlugin::addMachOHeaderSupportPasses(
- MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
-
- Config.PostAllocationPasses.push_back([this, &JD = MR.getTargetJITDylib()](
- jitlink::LinkGraph &G) -> Error {
- auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) {
- return Sym->getName() == *MP.MachOHeaderStartSymbol;
- });
- assert(I != G.defined_symbols().end() &&
- "Missing MachO header start symbol");
- {
- std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
- JITTargetAddress HeaderAddr = (*I)->getAddress();
- MP.HeaderAddrToJITDylib[HeaderAddr] = &JD;
- assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists");
- MP.InitSeqs.insert(
- std::make_pair(&JD, MachOJITDylibInitializers(
- JD.getName(), ExecutorAddress(HeaderAddr))));
- }
- return Error::success();
- });
-}
-
-void MachOPlatform::MachOPlatformPlugin::addEHAndTLVSupportPasses(
- MaterializationResponsibility &MR, jitlink::PassConfiguration &Config) {
-
- // Insert TLV lowering at the start of the PostPrunePasses, since we want
- // it to run before GOT/PLT lowering.
- Config.PostPrunePasses.insert(
- Config.PostPrunePasses.begin(),
- [this, &JD = MR.getTargetJITDylib()](jitlink::LinkGraph &G) {
- return fixTLVSectionsAndEdges(G, JD);
- });
-
- // Add a pass to register the final addresses of the eh-frame and TLV sections
- // with the runtime.
- Config.PostFixupPasses.push_back([this](jitlink::LinkGraph &G) -> Error {
- MachOPerObjectSectionsToRegister POSR;
-
- if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
- jitlink::SectionRange R(*EHFrameSection);
- if (!R.empty())
- POSR.EHFrameSection = {ExecutorAddress(R.getStart()),
- ExecutorAddress(R.getEnd())};
- }
-
- // Get a pointer to the thread data section if there is one. It will be used
- // below.
- jitlink::Section *ThreadDataSection =
- G.findSectionByName(ThreadDataSectionName);
-
- // Handle thread BSS section if there is one.
- if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
- // If there's already a thread data section in this graph then merge the
- // thread BSS section content into it, otherwise just treat the thread
- // BSS section as the thread data section.
- if (ThreadDataSection)
- G.mergeSections(*ThreadDataSection, *ThreadBSSSection);
- else
- ThreadDataSection = ThreadBSSSection;
- }
-
- // Having merged thread BSS (if present) and thread data (if present),
- // record the resulting section range.
- if (ThreadDataSection) {
- jitlink::SectionRange R(*ThreadDataSection);
- if (!R.empty())
- POSR.ThreadDataSection = {ExecutorAddress(R.getStart()),
- ExecutorAddress(R.getEnd())};
- }
-
- if (POSR.EHFrameSection.StartAddress ||
- POSR.ThreadDataSection.StartAddress) {
-
- // If we're still bootstrapping the runtime then just record this
- // frame for now.
- if (!MP.RuntimeBootstrapped) {
- std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
- MP.BootstrapPOSRs.push_back(POSR);
- return Error::success();
- }
-
- // Otherwise register it immediately.
- if (auto Err = MP.registerPerObjectSections(POSR))
- return Err;
- }
+Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol(
+ jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
- return Error::success();
+ auto I = llvm::find_if(G.defined_symbols(), [this](jitlink::Symbol *Sym) {
+ return Sym->getName() == *MP.MachOHeaderStartSymbol;
});
+ assert(I != G.defined_symbols().end() && "Missing MachO header start symbol");
+
+ auto &JD = MR.getTargetJITDylib();
+ std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
+ JITTargetAddress HeaderAddr = (*I)->getAddress();
+ MP.HeaderAddrToJITDylib[HeaderAddr] = &JD;
+ assert(!MP.InitSeqs.count(&JD) && "InitSeq entry for JD already exists");
+ MP.InitSeqs.insert(std::make_pair(
+ &JD, MachOJITDylibInitializers(JD.getName(), ExecutorAddr(HeaderAddr))));
+ return Error::success();
}
Error MachOPlatform::MachOPlatformPlugin::preserveInitSections(
@@ -873,7 +787,7 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
Error MachOPlatform::MachOPlatformPlugin::registerInitSections(
jitlink::LinkGraph &G, JITDylib &JD) {
- ExecutorAddress ObjCImageInfoAddr;
+ ExecutorAddr ObjCImageInfoAddr;
SmallVector<jitlink::Section *> InitSections;
if (auto *ObjCImageInfoSec = G.findSectionByName(ObjCImageInfoSectionName)) {
@@ -950,9 +864,109 @@ Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges(
for (auto *B : G.blocks())
for (auto &E : B->edges())
if (E.getKind() ==
- jitlink::x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadRelaxable)
- E.setKind(
- jitlink::x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable);
+ jitlink::x86_64::RequestTLVPAndTransformToPCRel32TLVPLoadREXRelaxable)
+ E.setKind(jitlink::x86_64::
+ RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable);
+
+ return Error::success();
+}
+
+Error MachOPlatform::MachOPlatformPlugin::registerEHAndTLVSections(
+ jitlink::LinkGraph &G) {
+
+ // Add a pass to register the final addresses of the eh-frame and TLV sections
+ // with the runtime.
+ if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
+ jitlink::SectionRange R(*EHFrameSection);
+ if (!R.empty())
+ G.allocActions().push_back(
+ {{MP.orc_rt_macho_register_ehframe_section.getValue(), R.getStart(),
+ R.getSize()},
+ {MP.orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(),
+ R.getSize()}});
+ }
+
+ // Get a pointer to the thread data section if there is one. It will be used
+ // below.
+ jitlink::Section *ThreadDataSection =
+ G.findSectionByName(ThreadDataSectionName);
+
+ // Handle thread BSS section if there is one.
+ if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
+ // If there's already a thread data section in this graph then merge the
+ // thread BSS section content into it, otherwise just treat the thread
+ // BSS section as the thread data section.
+ if (ThreadDataSection)
+ G.mergeSections(*ThreadDataSection, *ThreadBSSSection);
+ else
+ ThreadDataSection = ThreadBSSSection;
+ }
+
+ // Having merged thread BSS (if present) and thread data (if present),
+ // record the resulting section range.
+ if (ThreadDataSection) {
+ jitlink::SectionRange R(*ThreadDataSection);
+ if (!R.empty()) {
+ if (MP.State != MachOPlatform::Initialized)
+ return make_error<StringError>("__thread_data section encountered, but "
+ "MachOPlatform has not finished booting",
+ inconvertibleErrorCode());
+
+ G.allocActions().push_back(
+ {{MP.orc_rt_macho_register_thread_data_section.getValue(),
+ R.getStart(), R.getSize()},
+ {MP.orc_rt_macho_deregister_thread_data_section.getValue(),
+ R.getStart(), R.getSize()}});
+ }
+ }
+ return Error::success();
+}
+
+Error MachOPlatform::MachOPlatformPlugin::registerEHSectionsPhase1(
+ jitlink::LinkGraph &G) {
+
+ // If there's no eh-frame there's nothing to do.
+ auto *EHFrameSection = G.findSectionByName(EHFrameSectionName);
+ if (!EHFrameSection)
+ return Error::success();
+
+ // If the eh-frame section is empty there's nothing to do.
+ jitlink::SectionRange R(*EHFrameSection);
+ if (R.empty())
+ return Error::success();
+
+ // Since we're linking the object containing the registration code now the
+ // addresses won't be ready in the platform. We'll have to find them in this
+ // graph instead.
+ ExecutorAddr orc_rt_macho_register_ehframe_section;
+ ExecutorAddr orc_rt_macho_deregister_ehframe_section;
+ for (auto *Sym : G.defined_symbols()) {
+ if (!Sym->hasName())
+ continue;
+ if (Sym->getName() == "___orc_rt_macho_register_ehframe_section")
+ orc_rt_macho_register_ehframe_section = ExecutorAddr(Sym->getAddress());
+ else if (Sym->getName() == "___orc_rt_macho_deregister_ehframe_section")
+ orc_rt_macho_deregister_ehframe_section = ExecutorAddr(Sym->getAddress());
+
+ if (orc_rt_macho_register_ehframe_section &&
+ orc_rt_macho_deregister_ehframe_section)
+ break;
+ }
+
+ // If we failed to find the required functions then bail out.
+ if (!orc_rt_macho_register_ehframe_section ||
+ !orc_rt_macho_deregister_ehframe_section)
+ return make_error<StringError>("Could not find eh-frame registration "
+ "functions during platform bootstrap",
+ inconvertibleErrorCode());
+
+ // Otherwise, add allocation actions to the graph to register eh-frames for
+ // this object.
+ G.allocActions().push_back(
+ {{orc_rt_macho_register_ehframe_section.getValue(), R.getStart(),
+ R.getSize()},
+ {orc_rt_macho_deregister_ehframe_section.getValue(), R.getStart(),
+ R.getSize()}});
return Error::success();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/Mangling.cpp b/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
index 14b22880ab7e..7b21e6a684ca 100644
--- a/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Mangling.cpp
@@ -7,9 +7,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/Mangling.h"
+#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
@@ -83,17 +85,29 @@ void IRSymbolMapper::add(ExecutionSession &ES, const ManglingOptions &MO,
}
}
-Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
-getObjectSymbolInfo(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
- auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
+static SymbolStringPtr addInitSymbol(SymbolFlagsMap &SymbolFlags,
+ ExecutionSession &ES,
+ StringRef ObjFileName) {
+ SymbolStringPtr InitSymbol;
+ size_t Counter = 0;
- if (!Obj)
- return Obj.takeError();
+ do {
+ std::string InitSymString;
+ raw_string_ostream(InitSymString)
+ << "$." << ObjFileName << ".__inits." << Counter++;
+ InitSymbol = ES.intern(InitSymString);
+ } while (SymbolFlags.count(InitSymbol));
- bool IsMachO = isa<object::MachOObjectFile>(Obj->get());
+ SymbolFlags[InitSymbol] = JITSymbolFlags::MaterializationSideEffectsOnly;
+ return InitSymbol;
+}
+static Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getMachOObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::MachOObjectFile &Obj) {
SymbolFlagsMap SymbolFlags;
- for (auto &Sym : (*Obj)->symbols()) {
+
+ for (auto &Sym : Obj.symbols()) {
Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
if (!SymFlagsOrErr)
// TODO: Test this error.
@@ -123,48 +137,135 @@ getObjectSymbolInfo(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
return SymFlags.takeError();
// Strip the 'exported' flag from MachO linker-private symbols.
- if (IsMachO && Name->startswith("l"))
+ if (Name->startswith("l"))
*SymFlags &= ~JITSymbolFlags::Exported;
SymbolFlags[InternedName] = std::move(*SymFlags);
}
SymbolStringPtr InitSymbol;
+ for (auto &Sec : Obj.sections()) {
+ auto SecType = Obj.getSectionType(Sec);
+ if ((SecType & MachO::SECTION_TYPE) == MachO::S_MOD_INIT_FUNC_POINTERS) {
+ InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
+ break;
+ }
+ auto SegName = Obj.getSectionFinalSegmentName(Sec.getRawDataRefImpl());
+ auto SecName = cantFail(Obj.getSectionName(Sec.getRawDataRefImpl()));
+ if (MachOPlatform::isInitializerSection(SegName, SecName)) {
+ InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
+ break;
+ }
+ }
- size_t Counter = 0;
- auto AddInitSymbol = [&]() {
- while (true) {
- std::string InitSymString;
- raw_string_ostream(InitSymString)
- << "$." << ObjBuffer.getBufferIdentifier() << ".__inits."
- << Counter++;
- InitSymbol = ES.intern(InitSymString);
- if (SymbolFlags.count(InitSymbol))
+ return std::make_pair(std::move(SymbolFlags), std::move(InitSymbol));
+}
+
+static Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getELFObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::ELFObjectFileBase &Obj) {
+ SymbolFlagsMap SymbolFlags;
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
continue;
- SymbolFlags[InitSymbol] = JITSymbolFlags::MaterializationSideEffectsOnly;
- return;
- }
- };
-
- if (IsMachO) {
- auto &MachOObj = cast<object::MachOObjectFile>(*Obj->get());
- for (auto &Sec : MachOObj.sections()) {
- auto SecType = MachOObj.getSectionType(Sec);
- if ((SecType & MachO::SECTION_TYPE) == MachO::S_MOD_INIT_FUNC_POINTERS) {
- AddInitSymbol();
- break;
- }
- auto SegName =
- MachOObj.getSectionFinalSegmentName(Sec.getRawDataRefImpl());
- auto SecName = cantFail(MachOObj.getSectionName(Sec.getRawDataRefImpl()));
- if (MachOPlatform::isInitializerSection(SegName, SecName)) {
- AddInitSymbol();
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+
+ // ELF STB_GNU_UNIQUE should map to Weak for ORC.
+ if (Sym.getBinding() == ELF::STB_GNU_UNIQUE)
+ *SymFlags |= JITSymbolFlags::Weak;
+
+ SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ SymbolStringPtr InitSymbol;
+ for (auto &Sec : Obj.sections()) {
+ if (auto SecName = Sec.getName()) {
+ if (ELFNixPlatform::isInitializerSection(*SecName)) {
+ InitSymbol = addInitSymbol(SymbolFlags, ES, Obj.getFileName());
break;
}
}
}
- return std::make_pair(std::move(SymbolFlags), std::move(InitSymbol));
+ return std::make_pair(std::move(SymbolFlags), InitSymbol);
+}
+
+Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getGenericObjectFileSymbolInfo(ExecutionSession &ES,
+ const object::ObjectFile &Obj) {
+ SymbolFlagsMap SymbolFlags;
+ for (auto &Sym : Obj.symbols()) {
+ Expected<uint32_t> SymFlagsOrErr = Sym.getFlags();
+ if (!SymFlagsOrErr)
+ // TODO: Test this error.
+ return SymFlagsOrErr.takeError();
+
+ // Skip symbols not defined in this object file.
+ if (*SymFlagsOrErr & object::BasicSymbolRef::SF_Undefined)
+ continue;
+
+ // Skip symbols that are not global.
+ if (!(*SymFlagsOrErr & object::BasicSymbolRef::SF_Global))
+ continue;
+
+ // Skip symbols that have type SF_File.
+ if (auto SymType = Sym.getType()) {
+ if (*SymType == object::SymbolRef::ST_File)
+ continue;
+ } else
+ return SymType.takeError();
+
+ auto Name = Sym.getName();
+ if (!Name)
+ return Name.takeError();
+ auto InternedName = ES.intern(*Name);
+ auto SymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
+ if (!SymFlags)
+ return SymFlags.takeError();
+
+ SymbolFlags[InternedName] = std::move(*SymFlags);
+ }
+
+ return std::make_pair(std::move(SymbolFlags), nullptr);
+}
+
+Expected<std::pair<SymbolFlagsMap, SymbolStringPtr>>
+getObjectSymbolInfo(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
+ auto Obj = object::ObjectFile::createObjectFile(ObjBuffer);
+
+ if (!Obj)
+ return Obj.takeError();
+
+ if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(Obj->get()))
+ return getMachOObjectFileSymbolInfo(ES, *MachOObj);
+ else if (auto *ELFObj = dyn_cast<object::ELFObjectFileBase>(Obj->get()))
+ return getELFObjectFileSymbolInfo(ES, *ELFObj);
+
+ return getGenericObjectFileSymbolInfo(ES, **Obj);
}
} // End namespace orc.
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index fd260089c04b..6f840a079dd1 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -64,9 +64,9 @@ private:
LGI.SymbolFlags[ES.intern(Sym->getName())] = Flags;
}
- if (G.getTargetTriple().isOSBinFormatMachO())
- if (hasMachOInitSection(G))
- LGI.InitSymbol = makeInitSymbol(ES, G);
+ if ((G.getTargetTriple().isOSBinFormatMachO() && hasMachOInitSection(G)) ||
+ (G.getTargetTriple().isOSBinFormatELF() && hasELFInitSection(G)))
+ LGI.InitSymbol = makeInitSymbol(ES, G);
return LGI;
}
@@ -77,11 +77,19 @@ private:
Sec.getName() == "__DATA,__objc_classlist" ||
Sec.getName() == "__TEXT,__swift5_protos" ||
Sec.getName() == "__TEXT,__swift5_proto" ||
+ Sec.getName() == "__TEXT,__swift5_types" ||
Sec.getName() == "__DATA,__mod_init_func")
return true;
return false;
}
+ static bool hasELFInitSection(LinkGraph &G) {
+ for (auto &Sec : G.sections())
+ if (Sec.getName() == ".init_array")
+ return true;
+ return false;
+ }
+
static SymbolStringPtr makeInitSymbol(ExecutionSession &ES, LinkGraph &G) {
std::string InitSymString;
raw_string_ostream(InitSymString)
@@ -272,8 +280,9 @@ public:
// If there were missing symbols then report the error.
if (!MissingSymbols.empty())
- return make_error<MissingSymbolDefinitions>(G.getName(),
- std::move(MissingSymbols));
+ return make_error<MissingSymbolDefinitions>(
+ Layer.getExecutionSession().getSymbolStringPool(), G.getName(),
+ std::move(MissingSymbols));
// If there are more definitions than expected, add them to the
// ExtraSymbols vector.
@@ -286,8 +295,9 @@ public:
// If there were extra definitions then report the error.
if (!ExtraSymbols.empty())
- return make_error<UnexpectedSymbolDefinitions>(G.getName(),
- std::move(ExtraSymbols));
+ return make_error<UnexpectedSymbolDefinitions>(
+ Layer.getExecutionSession().getSymbolStringPool(), G.getName(),
+ std::move(ExtraSymbols));
}
if (auto Err = MR->notifyResolved(InternedResult))
@@ -297,8 +307,7 @@ public:
return Error::success();
}
- void notifyFinalized(
- std::unique_ptr<JITLinkMemoryManager::Allocation> A) override {
+ void notifyFinalized(JITLinkMemoryManager::FinalizedAlloc A) override {
if (auto Err = Layer.notifyEmitted(*MR, std::move(A))) {
Layer.getExecutionSession().reportError(std::move(Err));
MR->failMaterialization();
@@ -414,7 +423,8 @@ private:
std::vector<std::pair<SymbolStringPtr, Symbol *>> NameToSym;
auto ProcessSymbol = [&](Symbol *Sym) {
- if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak) {
+ if (Sym->hasName() && Sym->getLinkage() == Linkage::Weak &&
+ Sym->getScope() != Scope::Local) {
auto Name = ES.intern(Sym->getName());
if (!MR->getSymbols().count(ES.intern(Sym->getName()))) {
JITSymbolFlags SF = JITSymbolFlags::Weak;
@@ -543,8 +553,7 @@ private:
// Propagate block-level dependencies through the block-dependence graph.
while (!WorkList.empty()) {
- auto *B = WorkList.back();
- WorkList.pop_back();
+ auto *B = WorkList.pop_back_val();
auto &BI = BlockInfos[B];
assert(BI.DependenciesChanged &&
@@ -672,7 +681,7 @@ void ObjectLinkingLayer::notifyLoaded(MaterializationResponsibility &MR) {
}
Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
- AllocPtr Alloc) {
+ FinalizedAlloc FA) {
Error Err = Error::success();
for (auto &P : Plugins)
Err = joinErrors(std::move(Err), P->notifyEmitted(MR));
@@ -681,17 +690,20 @@ Error ObjectLinkingLayer::notifyEmitted(MaterializationResponsibility &MR,
return Err;
return MR.withResourceKeyDo(
- [&](ResourceKey K) { Allocs[K].push_back(std::move(Alloc)); });
+ [&](ResourceKey K) { Allocs[K].push_back(std::move(FA)); });
}
Error ObjectLinkingLayer::handleRemoveResources(ResourceKey K) {
- Error Err = Error::success();
-
- for (auto &P : Plugins)
- Err = joinErrors(std::move(Err), P->notifyRemovingResources(K));
+ {
+ Error Err = Error::success();
+ for (auto &P : Plugins)
+ Err = joinErrors(std::move(Err), P->notifyRemovingResources(K));
+ if (Err)
+ return Err;
+ }
- std::vector<AllocPtr> AllocsToRemove;
+ std::vector<FinalizedAlloc> AllocsToRemove;
getExecutionSession().runSessionLocked([&] {
auto I = Allocs.find(K);
if (I != Allocs.end()) {
@@ -700,12 +712,10 @@ Error ObjectLinkingLayer::handleRemoveResources(ResourceKey K) {
}
});
- while (!AllocsToRemove.empty()) {
- Err = joinErrors(std::move(Err), AllocsToRemove.back()->deallocate());
- AllocsToRemove.pop_back();
- }
+ if (AllocsToRemove.empty())
+ return Error::success();
- return Err;
+ return MemMgr.deallocate(std::move(AllocsToRemove));
}
void ObjectLinkingLayer::handleTransferResources(ResourceKey DstKey,
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index d6f73a8b0864..673f7394450f 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -619,6 +619,61 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForProcess(
return LLVMErrorSuccess;
}
+LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, const char *FileName,
+ char GlobalPrefix, LLVMOrcSymbolPredicate Filter, void *FilterCtx) {
+ assert(Result && "Result can not be null");
+ assert(FileName && "FileName can not be null");
+ assert((Filter || !FilterCtx) &&
+ "if Filter is null then FilterCtx must also be null");
+
+ DynamicLibrarySearchGenerator::SymbolPredicate Pred;
+ if (Filter)
+ Pred = [=](const SymbolStringPtr &Name) -> bool {
+ return Filter(FilterCtx, wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(Name)));
+ };
+
+ auto LibrarySymsGenerator =
+ DynamicLibrarySearchGenerator::Load(FileName, GlobalPrefix, Pred);
+
+ if (!LibrarySymsGenerator) {
+ *Result = 0;
+ return wrap(LibrarySymsGenerator.takeError());
+ }
+
+ *Result = wrap(LibrarySymsGenerator->release());
+ return LLVMErrorSuccess;
+}
+
+LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath(
+ LLVMOrcDefinitionGeneratorRef *Result, LLVMOrcObjectLayerRef ObjLayer,
+ const char *FileName, const char *TargetTriple) {
+ assert(Result && "Result can not be null");
+ assert(FileName && "Filename can not be null");
+ assert(ObjLayer && "ObjectLayer can not be null");
+
+ if (TargetTriple) {
+ auto TT = Triple(TargetTriple);
+ auto LibrarySymsGenerator =
+ StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName, TT);
+ if (!LibrarySymsGenerator) {
+ *Result = 0;
+ return wrap(LibrarySymsGenerator.takeError());
+ }
+ *Result = wrap(LibrarySymsGenerator->release());
+ return LLVMErrorSuccess;
+ } else {
+ auto LibrarySymsGenerator =
+ StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName);
+ if (!LibrarySymsGenerator) {
+ *Result = 0;
+ return wrap(LibrarySymsGenerator.takeError());
+ }
+ *Result = wrap(LibrarySymsGenerator->release());
+ return LLVMErrorSuccess;
+ }
+}
+
LLVMOrcThreadSafeContextRef LLVMOrcCreateNewThreadSafeContext(void) {
return wrap(new ThreadSafeContext(std::make_unique<LLVMContext>()));
}
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
new file mode 100644
index 000000000000..02044e4af29a
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp
@@ -0,0 +1,47 @@
+//===------ OrcRTBridge.cpp - Executor functions for bootstrap -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+
+namespace llvm {
+namespace orc {
+namespace rt {
+
+const char *SimpleExecutorDylibManagerInstanceName =
+ "__llvm_orc_SimpleExecutorDylibManager_Instance";
+const char *SimpleExecutorDylibManagerOpenWrapperName =
+ "__llvm_orc_SimpleExecutorDylibManager_open_wrapper";
+const char *SimpleExecutorDylibManagerLookupWrapperName =
+ "__llvm_orc_SimpleExecutorDylibManager_lookup_wrapper";
+const char *SimpleExecutorMemoryManagerInstanceName =
+ "__llvm_orc_SimpleExecutorMemoryManager_Instance";
+const char *SimpleExecutorMemoryManagerReserveWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_reserve_wrapper";
+const char *SimpleExecutorMemoryManagerFinalizeWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_finalize_wrapper";
+const char *SimpleExecutorMemoryManagerDeallocateWrapperName =
+ "__llvm_orc_SimpleExecutorMemoryManager_deallocate_wrapper";
+const char *MemoryWriteUInt8sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint8s_wrapper";
+const char *MemoryWriteUInt16sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint16s_wrapper";
+const char *MemoryWriteUInt32sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint32s_wrapper";
+const char *MemoryWriteUInt64sWrapperName =
+ "__llvm_orc_bootstrap_mem_write_uint64s_wrapper";
+const char *MemoryWriteBuffersWrapperName =
+ "__llvm_orc_bootstrap_mem_write_buffers_wrapper";
+const char *RegisterEHFrameSectionCustomDirectWrapperName =
+ "__llvm_orc_bootstrap_register_ehframe_section_custom_direct_wrapper";
+const char *DeregisterEHFrameSectionCustomDirectWrapperName =
+ "__llvm_orc_bootstrap_deregister_ehframe_section_custom_direct_wrapper";
+const char *RunAsMainWrapperName = "__llvm_orc_bootstrap_run_as_main_wrapper";
+
+} // end namespace rt
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp
deleted file mode 100644
index a55cb220f218..000000000000
--- a/llvm/lib/ExecutionEngine/Orc/Shared/RPCError.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===--------------- RPCError.cpp - RPCERror implementation ---------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// RPC Error type implmentations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ExecutionEngine/Orc/Shared/RPCUtils.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <string>
-#include <system_error>
-
-char llvm::orc::shared::RPCFatalError::ID = 0;
-char llvm::orc::shared::ConnectionClosed::ID = 0;
-char llvm::orc::shared::ResponseAbandoned::ID = 0;
-char llvm::orc::shared::CouldNotNegotiate::ID = 0;
-
-namespace llvm {
-namespace orc {
-namespace shared {
-
-std::error_code ConnectionClosed::convertToErrorCode() const {
- return orcError(OrcErrorCode::RPCConnectionClosed);
-}
-
-void ConnectionClosed::log(raw_ostream &OS) const {
- OS << "RPC connection already closed";
-}
-
-std::error_code ResponseAbandoned::convertToErrorCode() const {
- return orcError(OrcErrorCode::RPCResponseAbandoned);
-}
-
-void ResponseAbandoned::log(raw_ostream &OS) const {
- OS << "RPC response abandoned";
-}
-
-CouldNotNegotiate::CouldNotNegotiate(std::string Signature)
- : Signature(std::move(Signature)) {}
-
-std::error_code CouldNotNegotiate::convertToErrorCode() const {
- return orcError(OrcErrorCode::RPCCouldNotNegotiateFunction);
-}
-
-void CouldNotNegotiate::log(raw_ostream &OS) const {
- OS << "Could not negotiate RPC function " << Signature;
-}
-
-} // end namespace shared
-} // end namespace orc
-} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
new file mode 100644
index 000000000000..64fc717b7b56
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp
@@ -0,0 +1,250 @@
+//===------ SimpleRemoteEPCUtils.cpp - Utils for Simple Remote EPC --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Message definitions and other utilities for SimpleRemoteEPC and
+// SimpleRemoteEPCServer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
+namespace {
+
+struct FDMsgHeader {
+ static constexpr unsigned MsgSizeOffset = 0;
+ static constexpr unsigned OpCOffset = MsgSizeOffset + sizeof(uint64_t);
+ static constexpr unsigned SeqNoOffset = OpCOffset + sizeof(uint64_t);
+ static constexpr unsigned TagAddrOffset = SeqNoOffset + sizeof(uint64_t);
+ static constexpr unsigned Size = TagAddrOffset + sizeof(uint64_t);
+};
+
+} // namespace
+
+namespace llvm {
+namespace orc {
+namespace SimpleRemoteEPCDefaultBootstrapSymbolNames {
+
+const char *ExecutorSessionObjectName =
+ "__llvm_orc_SimpleRemoteEPC_dispatch_ctx";
+const char *DispatchFnName = "__llvm_orc_SimpleRemoteEPC_dispatch_fn";
+
+} // end namespace SimpleRemoteEPCDefaultBootstrapSymbolNames
+
+SimpleRemoteEPCTransportClient::~SimpleRemoteEPCTransportClient() {}
+SimpleRemoteEPCTransport::~SimpleRemoteEPCTransport() {}
+
+Expected<std::unique_ptr<FDSimpleRemoteEPCTransport>>
+FDSimpleRemoteEPCTransport::Create(SimpleRemoteEPCTransportClient &C, int InFD,
+ int OutFD) {
+#if LLVM_ENABLE_THREADS
+ if (InFD == -1)
+ return make_error<StringError>("Invalid input file descriptor " +
+ Twine(InFD),
+ inconvertibleErrorCode());
+ if (OutFD == -1)
+ return make_error<StringError>("Invalid output file descriptor " +
+ Twine(OutFD),
+ inconvertibleErrorCode());
+ std::unique_ptr<FDSimpleRemoteEPCTransport> FDT(
+ new FDSimpleRemoteEPCTransport(C, InFD, OutFD));
+ return std::move(FDT);
+#else
+ return make_error<StringError>("FD-based SimpleRemoteEPC transport requires "
+ "thread support, but llvm was built with "
+ "LLVM_ENABLE_THREADS=Off",
+ inconvertibleErrorCode());
+#endif
+}
+
+FDSimpleRemoteEPCTransport::~FDSimpleRemoteEPCTransport() {
+#if LLVM_ENABLE_THREADS
+ ListenerThread.join();
+#endif
+}
+
+Error FDSimpleRemoteEPCTransport::start() {
+#if LLVM_ENABLE_THREADS
+ ListenerThread = std::thread([this]() { listenLoop(); });
+ return Error::success();
+#endif
+ llvm_unreachable("Should not be called with LLVM_ENABLE_THREADS=Off");
+}
+
+Error FDSimpleRemoteEPCTransport::sendMessage(SimpleRemoteEPCOpcode OpC,
+ uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ ArrayRef<char> ArgBytes) {
+ char HeaderBuffer[FDMsgHeader::Size];
+
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::MsgSizeOffset)) =
+ FDMsgHeader::Size + ArgBytes.size();
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::OpCOffset)) =
+ static_cast<uint64_t>(OpC);
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::SeqNoOffset)) = SeqNo;
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::TagAddrOffset)) =
+ TagAddr.getValue();
+
+ std::lock_guard<std::mutex> Lock(M);
+ if (Disconnected)
+ return make_error<StringError>("FD-transport disconnected",
+ inconvertibleErrorCode());
+ if (int ErrNo = writeBytes(HeaderBuffer, FDMsgHeader::Size))
+ return errorCodeToError(std::error_code(ErrNo, std::generic_category()));
+ if (int ErrNo = writeBytes(ArgBytes.data(), ArgBytes.size()))
+ return errorCodeToError(std::error_code(ErrNo, std::generic_category()));
+ return Error::success();
+}
+
+void FDSimpleRemoteEPCTransport::disconnect() {
+ if (Disconnected)
+ return; // Return if already disconnected.
+
+ Disconnected = true;
+ bool CloseOutFD = InFD != OutFD;
+
+ // Close InFD.
+ while (close(InFD) == -1) {
+ if (errno == EBADF)
+ break;
+ }
+
+ // Close OutFD.
+ if (CloseOutFD) {
+ while (close(OutFD) == -1) {
+ if (errno == EBADF)
+ break;
+ }
+ }
+}
+
+static Error makeUnexpectedEOFError() {
+ return make_error<StringError>("Unexpected end-of-file",
+ inconvertibleErrorCode());
+}
+
+Error FDSimpleRemoteEPCTransport::readBytes(char *Dst, size_t Size,
+ bool *IsEOF) {
+ assert(Dst && "Attempt to read into null.");
+ ssize_t Completed = 0;
+ while (Completed < static_cast<ssize_t>(Size)) {
+ ssize_t Read = ::read(InFD, Dst + Completed, Size - Completed);
+ if (Read <= 0) {
+ auto ErrNo = errno;
+ if (Read == 0) {
+ if (Completed == 0 && IsEOF) {
+ *IsEOF = true;
+ return Error::success();
+ } else
+ return makeUnexpectedEOFError();
+ } else if (ErrNo == EAGAIN || ErrNo == EINTR)
+ continue;
+ else {
+ std::lock_guard<std::mutex> Lock(M);
+ if (Disconnected && IsEOF) { // disconnect called, pretend this is EOF.
+ *IsEOF = true;
+ return Error::success();
+ }
+ return errorCodeToError(
+ std::error_code(ErrNo, std::generic_category()));
+ }
+ }
+ Completed += Read;
+ }
+ return Error::success();
+}
+
+int FDSimpleRemoteEPCTransport::writeBytes(const char *Src, size_t Size) {
+ assert(Src && "Attempt to append from null.");
+ ssize_t Completed = 0;
+ while (Completed < static_cast<ssize_t>(Size)) {
+ ssize_t Written = ::write(OutFD, Src + Completed, Size - Completed);
+ if (Written < 0) {
+ auto ErrNo = errno;
+ if (ErrNo == EAGAIN || ErrNo == EINTR)
+ continue;
+ else
+ return ErrNo;
+ }
+ Completed += Written;
+ }
+ return 0;
+}
+
+void FDSimpleRemoteEPCTransport::listenLoop() {
+ Error Err = Error::success();
+ do {
+
+ char HeaderBuffer[FDMsgHeader::Size];
+ // Read the header buffer.
+ {
+ bool IsEOF = false;
+ if (auto Err2 = readBytes(HeaderBuffer, FDMsgHeader::Size, &IsEOF)) {
+ Err = joinErrors(std::move(Err), std::move(Err2));
+ break;
+ }
+ if (IsEOF)
+ break;
+ }
+
+ // Decode header buffer.
+ uint64_t MsgSize;
+ SimpleRemoteEPCOpcode OpC;
+ uint64_t SeqNo;
+ ExecutorAddr TagAddr;
+
+ MsgSize =
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::MsgSizeOffset));
+ OpC = static_cast<SimpleRemoteEPCOpcode>(static_cast<uint64_t>(
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::OpCOffset))));
+ SeqNo =
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::SeqNoOffset));
+ TagAddr.setValue(
+ *((support::ulittle64_t *)(HeaderBuffer + FDMsgHeader::TagAddrOffset)));
+
+ if (MsgSize < FDMsgHeader::Size) {
+ Err = joinErrors(std::move(Err),
+ make_error<StringError>("Message size too small",
+ inconvertibleErrorCode()));
+ break;
+ }
+
+ // Read the argument bytes.
+ SimpleRemoteEPCArgBytesVector ArgBytes;
+ ArgBytes.resize(MsgSize - FDMsgHeader::Size);
+ if (auto Err2 = readBytes(ArgBytes.data(), ArgBytes.size())) {
+ Err = joinErrors(std::move(Err), std::move(Err2));
+ break;
+ }
+
+ if (auto Action = C.handleMessage(OpC, SeqNo, TagAddr, ArgBytes)) {
+ if (*Action == SimpleRemoteEPCTransportClient::EndSession)
+ break;
+ } else {
+ Err = joinErrors(std::move(Err), Action.takeError());
+ break;
+ }
+ } while (true);
+
+ // Attempt to close FDs, set Disconnected to true so that subsequent
+ // sendMessage calls fail.
+ disconnect();
+
+ // Call up to the client to handle the disconnection.
+ C.handleDisconnect(std::move(Err));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
new file mode 100644
index 000000000000..47364a92a451
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -0,0 +1,406 @@
+//===------- SimpleRemoteEPC.cpp -- Simple remote executor control --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h"
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+
+SimpleRemoteEPC::~SimpleRemoteEPC() {
+#ifndef NDEBUG
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ assert(Disconnected && "Destroyed without disconnection");
+#endif // NDEBUG
+}
+
+Expected<tpctypes::DylibHandle>
+SimpleRemoteEPC::loadDylib(const char *DylibPath) {
+ return DylibMgr->open(DylibPath, 0);
+}
+
+Expected<std::vector<tpctypes::LookupResult>>
+SimpleRemoteEPC::lookupSymbols(ArrayRef<LookupRequest> Request) {
+ std::vector<tpctypes::LookupResult> Result;
+
+ for (auto &Element : Request) {
+ if (auto R = DylibMgr->lookup(Element.Handle, Element.Symbols)) {
+ Result.push_back({});
+ Result.back().reserve(R->size());
+ for (auto Addr : *R)
+ Result.back().push_back(Addr.getValue());
+ } else
+ return R.takeError();
+ }
+ return std::move(Result);
+}
+
+Expected<int32_t> SimpleRemoteEPC::runAsMain(ExecutorAddr MainFnAddr,
+ ArrayRef<std::string> Args) {
+ int64_t Result = 0;
+ if (auto Err = callSPSWrapper<rt::SPSRunAsMainSignature>(
+ RunAsMainAddr, Result, ExecutorAddr(MainFnAddr), Args))
+ return std::move(Err);
+ return Result;
+}
+
+void SimpleRemoteEPC::callWrapperAsync(ExecutorAddr WrapperFnAddr,
+ IncomingWFRHandler OnComplete,
+ ArrayRef<char> ArgBuffer) {
+ uint64_t SeqNo;
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ SeqNo = getNextSeqNo();
+ assert(!PendingCallWrapperResults.count(SeqNo) && "SeqNo already in use");
+ PendingCallWrapperResults[SeqNo] = std::move(OnComplete);
+ }
+
+ if (auto Err = sendMessage(SimpleRemoteEPCOpcode::CallWrapper, SeqNo,
+ WrapperFnAddr, ArgBuffer)) {
+ IncomingWFRHandler H;
+
+ // We just registered OnComplete, but there may be a race between this
+ // thread returning from sendMessage and handleDisconnect being called from
+ // the transport's listener thread. If handleDisconnect gets there first
+ // then it will have failed 'H' for us. If we get there first (or if
+ // handleDisconnect already ran) then we need to take care of it.
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ auto I = PendingCallWrapperResults.find(SeqNo);
+ if (I != PendingCallWrapperResults.end()) {
+ H = std::move(I->second);
+ PendingCallWrapperResults.erase(I);
+ }
+ }
+
+ if (H)
+ H(shared::WrapperFunctionResult::createOutOfBandError("disconnecting"));
+
+ getExecutionSession().reportError(std::move(Err));
+ }
+}
+
+Error SimpleRemoteEPC::disconnect() {
+ T->disconnect();
+ D->shutdown();
+ std::unique_lock<std::mutex> Lock(SimpleRemoteEPCMutex);
+ DisconnectCV.wait(Lock, [this] { return Disconnected; });
+ return std::move(DisconnectErr);
+}
+
+Expected<SimpleRemoteEPCTransportClient::HandleMessageAction>
+SimpleRemoteEPC::handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC::handleMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ dbgs() << "Setup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+
+ using UT = std::underlying_type_t<SimpleRemoteEPCOpcode>;
+ if (static_cast<UT>(OpC) > static_cast<UT>(SimpleRemoteEPCOpcode::LastOpC))
+ return make_error<StringError>("Unexpected opcode",
+ inconvertibleErrorCode());
+
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ if (auto Err = handleSetup(SeqNo, TagAddr, std::move(ArgBytes)))
+ return std::move(Err);
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ T->disconnect();
+ if (auto Err = handleHangup(std::move(ArgBytes)))
+ return std::move(Err);
+ return EndSession;
+ case SimpleRemoteEPCOpcode::Result:
+ if (auto Err = handleResult(SeqNo, TagAddr, std::move(ArgBytes)))
+ return std::move(Err);
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ handleCallWrapper(SeqNo, TagAddr, std::move(ArgBytes));
+ break;
+ }
+ return ContinueSession;
+}
+
+void SimpleRemoteEPC::handleDisconnect(Error Err) {
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC::handleDisconnect: "
+ << (Err ? "failure" : "success") << "\n";
+ });
+
+ PendingCallWrapperResultsMap TmpPending;
+
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ std::swap(TmpPending, PendingCallWrapperResults);
+ }
+
+ for (auto &KV : TmpPending)
+ KV.second(
+ shared::WrapperFunctionResult::createOutOfBandError("disconnecting"));
+
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ DisconnectErr = joinErrors(std::move(DisconnectErr), std::move(Err));
+ Disconnected = true;
+ DisconnectCV.notify_all();
+}
+
+Expected<std::unique_ptr<jitlink::JITLinkMemoryManager>>
+SimpleRemoteEPC::createDefaultMemoryManager(SimpleRemoteEPC &SREPC) {
+ EPCGenericJITLinkMemoryManager::SymbolAddrs SAs;
+ if (auto Err = SREPC.getBootstrapSymbols(
+ {{SAs.Allocator, rt::SimpleExecutorMemoryManagerInstanceName},
+ {SAs.Reserve, rt::SimpleExecutorMemoryManagerReserveWrapperName},
+ {SAs.Finalize, rt::SimpleExecutorMemoryManagerFinalizeWrapperName},
+ {SAs.Deallocate,
+ rt::SimpleExecutorMemoryManagerDeallocateWrapperName}}))
+ return std::move(Err);
+
+ return std::make_unique<EPCGenericJITLinkMemoryManager>(SREPC, SAs);
+}
+
+Expected<std::unique_ptr<ExecutorProcessControl::MemoryAccess>>
+SimpleRemoteEPC::createDefaultMemoryAccess(SimpleRemoteEPC &SREPC) {
+ return nullptr;
+}
+
+Error SimpleRemoteEPC::sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ ArrayRef<char> ArgBytes) {
+ assert(OpC != SimpleRemoteEPCOpcode::Setup &&
+ "SimpleRemoteEPC sending Setup message? That's the wrong direction.");
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC::sendMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ default:
+ llvm_unreachable("Invalid opcode");
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+ auto Err = T->sendMessage(OpC, SeqNo, TagAddr, ArgBytes);
+ LLVM_DEBUG({
+ if (Err)
+ dbgs() << " \\--> SimpleRemoteEPC::sendMessage failed\n";
+ });
+ return Err;
+}
+
+Error SimpleRemoteEPC::handleSetup(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ if (SeqNo != 0)
+ return make_error<StringError>("Setup packet SeqNo not zero",
+ inconvertibleErrorCode());
+
+ if (TagAddr)
+ return make_error<StringError>("Setup packet TagAddr not zero",
+ inconvertibleErrorCode());
+
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ auto I = PendingCallWrapperResults.find(0);
+ assert(PendingCallWrapperResults.size() == 1 &&
+ I != PendingCallWrapperResults.end() &&
+ "Setup message handler not connectly set up");
+ auto SetupMsgHandler = std::move(I->second);
+ PendingCallWrapperResults.erase(I);
+
+ auto WFR =
+ shared::WrapperFunctionResult::copyFrom(ArgBytes.data(), ArgBytes.size());
+ SetupMsgHandler(std::move(WFR));
+ return Error::success();
+}
+
+Error SimpleRemoteEPC::setup(Setup S) {
+ using namespace SimpleRemoteEPCDefaultBootstrapSymbolNames;
+
+ std::promise<MSVCPExpected<SimpleRemoteEPCExecutorInfo>> EIP;
+ auto EIF = EIP.get_future();
+
+ // Prepare a handler for the setup packet.
+ PendingCallWrapperResults[0] =
+ RunInPlace()(
+ [&](shared::WrapperFunctionResult SetupMsgBytes) {
+ if (const char *ErrMsg = SetupMsgBytes.getOutOfBandError()) {
+ EIP.set_value(
+ make_error<StringError>(ErrMsg, inconvertibleErrorCode()));
+ return;
+ }
+ using SPSSerialize =
+ shared::SPSArgList<shared::SPSSimpleRemoteEPCExecutorInfo>;
+ shared::SPSInputBuffer IB(SetupMsgBytes.data(), SetupMsgBytes.size());
+ SimpleRemoteEPCExecutorInfo EI;
+ if (SPSSerialize::deserialize(IB, EI))
+ EIP.set_value(EI);
+ else
+ EIP.set_value(make_error<StringError>(
+ "Could not deserialize setup message", inconvertibleErrorCode()));
+ });
+
+ // Start the transport.
+ if (auto Err = T->start())
+ return Err;
+
+ // Wait for setup packet to arrive.
+ auto EI = EIF.get();
+ if (!EI) {
+ T->disconnect();
+ return EI.takeError();
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPC received setup message:\n"
+ << " Triple: " << EI->TargetTriple << "\n"
+ << " Page size: " << EI->PageSize << "\n"
+ << " Bootstrap symbols:\n";
+ for (const auto &KV : EI->BootstrapSymbols)
+ dbgs() << " " << KV.first() << ": "
+ << formatv("{0:x16}", KV.second.getValue()) << "\n";
+ });
+ TargetTriple = Triple(EI->TargetTriple);
+ PageSize = EI->PageSize;
+ BootstrapSymbols = std::move(EI->BootstrapSymbols);
+
+ if (auto Err = getBootstrapSymbols(
+ {{JDI.JITDispatchContext, ExecutorSessionObjectName},
+ {JDI.JITDispatchFunction, DispatchFnName},
+ {RunAsMainAddr, rt::RunAsMainWrapperName}}))
+ return Err;
+
+ if (auto DM =
+ EPCGenericDylibManager::CreateWithDefaultBootstrapSymbols(*this))
+ DylibMgr = std::make_unique<EPCGenericDylibManager>(std::move(*DM));
+ else
+ return DM.takeError();
+
+ // Set a default CreateMemoryManager if none is specified.
+ if (!S.CreateMemoryManager)
+ S.CreateMemoryManager = createDefaultMemoryManager;
+
+ if (auto MemMgr = S.CreateMemoryManager(*this)) {
+ OwnedMemMgr = std::move(*MemMgr);
+ this->MemMgr = OwnedMemMgr.get();
+ } else
+ return MemMgr.takeError();
+
+ // Set a default CreateMemoryAccess if none is specified.
+ if (!S.CreateMemoryAccess)
+ S.CreateMemoryAccess = createDefaultMemoryAccess;
+
+ if (auto MemAccess = S.CreateMemoryAccess(*this)) {
+ OwnedMemAccess = std::move(*MemAccess);
+ this->MemAccess = OwnedMemAccess.get();
+ } else
+ return MemAccess.takeError();
+
+ return Error::success();
+}
+
+Error SimpleRemoteEPC::handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ IncomingWFRHandler SendResult;
+
+ if (TagAddr)
+ return make_error<StringError>("Unexpected TagAddr in result message",
+ inconvertibleErrorCode());
+
+ {
+ std::lock_guard<std::mutex> Lock(SimpleRemoteEPCMutex);
+ auto I = PendingCallWrapperResults.find(SeqNo);
+ if (I == PendingCallWrapperResults.end())
+ return make_error<StringError>("No call for sequence number " +
+ Twine(SeqNo),
+ inconvertibleErrorCode());
+ SendResult = std::move(I->second);
+ PendingCallWrapperResults.erase(I);
+ releaseSeqNo(SeqNo);
+ }
+
+ auto WFR =
+ shared::WrapperFunctionResult::copyFrom(ArgBytes.data(), ArgBytes.size());
+ SendResult(std::move(WFR));
+ return Error::success();
+}
+
+void SimpleRemoteEPC::handleCallWrapper(
+ uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ assert(ES && "No ExecutionSession attached");
+ D->dispatch(makeGenericNamedTask(
+ [this, RemoteSeqNo, TagAddr, ArgBytes = std::move(ArgBytes)]() {
+ ES->runJITDispatchHandler(
+ [this, RemoteSeqNo](shared::WrapperFunctionResult WFR) {
+ if (auto Err =
+ sendMessage(SimpleRemoteEPCOpcode::Result, RemoteSeqNo,
+ ExecutorAddr(), {WFR.data(), WFR.size()}))
+ getExecutionSession().reportError(std::move(Err));
+ },
+ TagAddr.getValue(), ArgBytes);
+ },
+ "callWrapper task"));
+}
+
+Error SimpleRemoteEPC::handleHangup(SimpleRemoteEPCArgBytesVector ArgBytes) {
+ using namespace llvm::orc::shared;
+ auto WFR = WrapperFunctionResult::copyFrom(ArgBytes.data(), ArgBytes.size());
+ if (const char *ErrMsg = WFR.getOutOfBandError())
+ return make_error<StringError>(ErrMsg, inconvertibleErrorCode());
+
+ detail::SPSSerializableError Info;
+ SPSInputBuffer IB(WFR.data(), WFR.size());
+ if (!SPSArgList<SPSError>::deserialize(IB, Info))
+ return make_error<StringError>("Could not deserialize hangup info",
+ inconvertibleErrorCode());
+ return fromSPSSerializable(std::move(Info));
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
index 43c2a44835fd..4c15e25b1d89 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
@@ -10,6 +10,7 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/ManagedStatic.h"
#include <cstdint>
@@ -64,14 +65,23 @@ LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() {
}
using namespace llvm;
+using namespace llvm::orc;
// Serialize rendezvous with the debugger as well as access to shared data.
ManagedStatic<std::mutex> JITDebugLock;
// Register debug object, return error message or null for success.
-static void registerJITLoaderGDBImpl(JITTargetAddress Addr, uint64_t Size) {
+static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) {
+ LLVM_DEBUG({
+ dbgs() << "Registering debug object with GDB JIT interface "
+ << formatv("([{0:x16} -- {1:x16}])",
+ reinterpret_cast<uintptr_t>(ObjAddr),
+ reinterpret_cast<uintptr_t>(ObjAddr + Size))
+ << "\n";
+ });
+
jit_code_entry *E = new jit_code_entry;
- E->symfile_addr = jitTargetAddressToPointer<const char *>(Addr);
+ E->symfile_addr = ObjAddr;
E->symfile_size = Size;
E->prev_entry = nullptr;
@@ -92,10 +102,26 @@ static void registerJITLoaderGDBImpl(JITTargetAddress Addr, uint64_t Size) {
__jit_debug_register_code();
}
-extern "C" orc::shared::detail::CWrapperFunctionResult
+extern "C" orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size) {
+ using namespace orc::shared;
+ return WrapperFunction<SPSError()>::handle(nullptr, 0,
+ [=]() -> Error {
+ registerJITLoaderGDBImpl(Data,
+ Size);
+ return Error::success();
+ })
+ .release();
+}
+
+extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size) {
using namespace orc::shared;
- return WrapperFunction<void(SPSExecutorAddress, uint64_t)>::handle(
- Data, Size, registerJITLoaderGDBImpl)
+ return WrapperFunction<void(SPSExecutorAddrRange)>::handle(
+ Data, Size,
+ [](ExecutorAddrRange R) {
+ registerJITLoaderGDBImpl(R.Start.toPtr<char *>(),
+ R.size().getValue());
+ })
.release();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp
new file mode 100644
index 000000000000..82aa62a0c0d9
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp
@@ -0,0 +1,84 @@
+//===------------------------ OrcRTBootstrap.cpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcRTBootstrap.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm::orc::shared;
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+template <typename WriteT, typename SPSWriteT>
+static llvm::orc::shared::CWrapperFunctionResult
+writeUIntsWrapper(const char *ArgData, size_t ArgSize) {
+ return WrapperFunction<void(SPSSequence<SPSWriteT>)>::handle(
+ ArgData, ArgSize,
+ [](std::vector<WriteT> Ws) {
+ for (auto &W : Ws)
+ *W.Addr.template toPtr<decltype(W.Value) *>() = W.Value;
+ })
+ .release();
+}
+
+static llvm::orc::shared::CWrapperFunctionResult
+writeBuffersWrapper(const char *ArgData, size_t ArgSize) {
+ return WrapperFunction<void(SPSSequence<SPSMemoryAccessBufferWrite>)>::handle(
+ ArgData, ArgSize,
+ [](std::vector<tpctypes::BufferWrite> Ws) {
+ for (auto &W : Ws)
+ memcpy(W.Addr.template toPtr<char *>(), W.Buffer.data(),
+ W.Buffer.size());
+ })
+ .release();
+}
+
+static llvm::orc::shared::CWrapperFunctionResult
+runAsMainWrapper(const char *ArgData, size_t ArgSize) {
+ return WrapperFunction<rt::SPSRunAsMainSignature>::handle(
+ ArgData, ArgSize,
+ [](ExecutorAddr MainAddr,
+ std::vector<std::string> Args) -> int64_t {
+ return runAsMain(MainAddr.toPtr<int (*)(int, char *[])>(), Args);
+ })
+ .release();
+}
+
+void addTo(StringMap<ExecutorAddr> &M) {
+ M[rt::MemoryWriteUInt8sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt8Write,
+ shared::SPSMemoryAccessUInt8Write>);
+ M[rt::MemoryWriteUInt16sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt16Write,
+ shared::SPSMemoryAccessUInt16Write>);
+ M[rt::MemoryWriteUInt32sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt32Write,
+ shared::SPSMemoryAccessUInt32Write>);
+ M[rt::MemoryWriteUInt64sWrapperName] = ExecutorAddr::fromPtr(
+ &writeUIntsWrapper<tpctypes::UInt64Write,
+ shared::SPSMemoryAccessUInt64Write>);
+ M[rt::MemoryWriteBuffersWrapperName] =
+ ExecutorAddr::fromPtr(&writeBuffersWrapper);
+ M[rt::RegisterEHFrameSectionCustomDirectWrapperName] = ExecutorAddr::fromPtr(
+ &llvm_orc_registerEHFrameSectionCustomDirectWrapper);
+ M[rt::DeregisterEHFrameSectionCustomDirectWrapperName] =
+ ExecutorAddr::fromPtr(
+ &llvm_orc_deregisterEHFrameSectionCustomDirectWrapper);
+ M[rt::RunAsMainWrapperName] = ExecutorAddr::fromPtr(&runAsMainWrapper);
+}
+
+} // end namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h
new file mode 100644
index 000000000000..6b7ff79a3efc
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h
@@ -0,0 +1,36 @@
+//===----------------------- OrcRTBootstrap.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// OrcRTPrelinkImpl provides functions that should be linked into the executor
+// to bootstrap common JIT functionality (e.g. memory allocation and memory
+// access).
+//
+// Call rt_impl::addTo to add these functions to a bootstrap symbols map.
+//
+// FIXME: The functionality in this file should probably be moved to an ORC
+// runtime bootstrap library in compiler-rt.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRTBOOTSTRAP_H
+#define LIB_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRTBOOTSTRAP_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+void addTo(StringMap<ExecutorAddr> &M);
+
+} // namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESS_ORCRTBOOTSTRAP_H
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
index 4a408d61ee38..e331bad84200 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp
@@ -1,9 +1,8 @@
//===--------- RegisterEHFrames.cpp - Register EH frame sections ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -86,11 +85,11 @@ static Error deregisterFrameWrapper(const void *P) {
}
#endif
-#ifdef __APPLE__
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
template <typename HandleFDEFn>
-Error walkAppleEHFrameSection(const char *const SectionStart,
- size_t SectionSize, HandleFDEFn HandleFDE) {
+Error walkLibunwindEHFrameSection(const char *const SectionStart,
+ size_t SectionSize, HandleFDEFn HandleFDE) {
const char *CurCFIRecord = SectionStart;
const char *End = SectionStart + SectionSize;
uint64_t Size = *reinterpret_cast<const uint32_t *>(CurCFIRecord);
@@ -124,16 +123,19 @@ Error walkAppleEHFrameSection(const char *const SectionStart,
return Error::success();
}
-#endif // __APPLE__
+#endif // HAVE_UNW_ADD_DYNAMIC_FDE || __APPLE__
Error registerEHFrameSection(const void *EHFrameSectionAddr,
size_t EHFrameSectionSize) {
-#ifdef __APPLE__
- // On Darwin __register_frame has to be called for each FDE entry.
- return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
- EHFrameSectionSize, registerFrameWrapper);
+ /* libgcc and libunwind __register_frame behave differently. We use the
+ * presence of __unw_add_dynamic_fde to detect libunwind. */
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
+ // With libunwind, __register_frame has to be called for each FDE entry.
+ return walkLibunwindEHFrameSection(
+ static_cast<const char *>(EHFrameSectionAddr), EHFrameSectionSize,
+ registerFrameWrapper);
#else
- // On Linux __register_frame takes a single argument:
+ // With libgcc, __register_frame takes a single argument:
// a pointer to the start of the .eh_frame section.
// How can it find the end? Because crtendS.o is linked
@@ -144,9 +146,10 @@ Error registerEHFrameSection(const void *EHFrameSectionAddr,
Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
size_t EHFrameSectionSize) {
-#ifdef __APPLE__
- return walkAppleEHFrameSection(static_cast<const char *>(EHFrameSectionAddr),
- EHFrameSectionSize, deregisterFrameWrapper);
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
+ return walkLibunwindEHFrameSection(
+ static_cast<const char *>(EHFrameSectionAddr), EHFrameSectionSize,
+ deregisterFrameWrapper);
#else
return deregisterFrameWrapper(EHFrameSectionAddr);
#endif
@@ -155,26 +158,42 @@ Error deregisterEHFrameSection(const void *EHFrameSectionAddr,
} // end namespace orc
} // end namespace llvm
-static Error registerEHFrameWrapper(JITTargetAddress Addr, uint64_t Size) {
- return llvm::orc::registerEHFrameSection(
- jitTargetAddressToPointer<const void *>(Addr), Size);
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_registerEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size) {
+ if (auto Err = registerEHFrameSection(EHFrameSectionAddr, Size))
+ return WrapperFunctionResult::createOutOfBandError(toString(std::move(Err)))
+ .release();
+ return llvm::orc::shared::CWrapperFunctionResult();
+}
+
+extern "C" llvm::orc::shared::CWrapperFunctionResult
+llvm_orc_deregisterEHFrameSectionCustomDirectWrapper(
+ const char *EHFrameSectionAddr, uint64_t Size) {
+ if (auto Err = deregisterEHFrameSection(EHFrameSectionAddr, Size))
+ return WrapperFunctionResult::createOutOfBandError(toString(std::move(Err)))
+ .release();
+ return llvm::orc::shared::CWrapperFunctionResult();
+}
+
+static Error registerEHFrameWrapper(ExecutorAddr Addr, uint64_t Size) {
+ return llvm::orc::registerEHFrameSection(Addr.toPtr<const void *>(), Size);
}
-static Error deregisterEHFrameWrapper(JITTargetAddress Addr, uint64_t Size) {
- return llvm::orc::deregisterEHFrameSection(
- jitTargetAddressToPointer<const void *>(Addr), Size);
+static Error deregisterEHFrameWrapper(ExecutorAddr Addr, uint64_t Size) {
+ return llvm::orc::deregisterEHFrameSection(Addr.toPtr<const void *>(), Size);
}
-extern "C" orc::shared::detail::CWrapperFunctionResult
+extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_registerEHFrameSectionWrapper(const char *Data, uint64_t Size) {
- return WrapperFunction<SPSError(SPSExecutorAddress, uint64_t)>::handle(
+ return WrapperFunction<SPSError(SPSExecutorAddr, uint64_t)>::handle(
Data, Size, registerEHFrameWrapper)
.release();
}
-extern "C" orc::shared::detail::CWrapperFunctionResult
+extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_deregisterEHFrameSectionWrapper(const char *Data, uint64_t Size) {
- return WrapperFunction<SPSError(SPSExecutorAddress, uint64_t)>::handle(
+ return WrapperFunction<SPSError(SPSExecutorAddr, uint64_t)>::handle(
Data, Size, deregisterEHFrameWrapper)
.release();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp
new file mode 100644
index 000000000000..3c9dd21b0832
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp
@@ -0,0 +1,129 @@
+//===--- SimpleExecutorDylibManager.cpp - Executor-side dylib management --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+SimpleExecutorDylibManager::~SimpleExecutorDylibManager() {
+ assert(Dylibs.empty() && "shutdown not called?");
+}
+
+Expected<tpctypes::DylibHandle>
+SimpleExecutorDylibManager::open(const std::string &Path, uint64_t Mode) {
+ if (Mode != 0)
+ return make_error<StringError>("open: non-zero mode bits not yet supported",
+ inconvertibleErrorCode());
+
+ const char *PathCStr = Path.empty() ? nullptr : Path.c_str();
+ std::string ErrMsg;
+
+ auto DL = sys::DynamicLibrary::getPermanentLibrary(PathCStr, &ErrMsg);
+ if (!DL.isValid())
+ return make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode());
+
+ std::lock_guard<std::mutex> Lock(M);
+ Dylibs[NextId] = std::move(DL);
+ return NextId++;
+}
+
+Expected<std::vector<ExecutorAddr>>
+SimpleExecutorDylibManager::lookup(tpctypes::DylibHandle H,
+ const RemoteSymbolLookupSet &L) {
+ std::vector<ExecutorAddr> Result;
+
+ std::lock_guard<std::mutex> Lock(M);
+ auto I = Dylibs.find(H);
+ if (I == Dylibs.end())
+ return make_error<StringError>("No dylib for handle " + formatv("{0:x}", H),
+ inconvertibleErrorCode());
+ auto &DL = I->second;
+
+ for (const auto &E : L) {
+
+ if (E.Name.empty()) {
+ if (E.Required)
+ return make_error<StringError>("Required address for empty symbol \"\"",
+ inconvertibleErrorCode());
+ else
+ Result.push_back(ExecutorAddr());
+ } else {
+
+ const char *DemangledSymName = E.Name.c_str();
+#ifdef __APPLE__
+ if (E.Name.front() != '_')
+ return make_error<StringError>(Twine("MachO symbol \"") + E.Name +
+ "\" missing leading '_'",
+ inconvertibleErrorCode());
+ ++DemangledSymName;
+#endif
+
+ void *Addr = DL.getAddressOfSymbol(DemangledSymName);
+ if (!Addr && E.Required)
+ return make_error<StringError>(Twine("Missing definition for ") +
+ DemangledSymName,
+ inconvertibleErrorCode());
+
+ Result.push_back(ExecutorAddr::fromPtr(Addr));
+ }
+ }
+
+ return Result;
+}
+
+Error SimpleExecutorDylibManager::shutdown() {
+
+ DylibsMap DM;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ std::swap(DM, Dylibs);
+ }
+
+ // There is no removal of dylibs at the moment, so nothing to do here.
+ return Error::success();
+}
+
+void SimpleExecutorDylibManager::addBootstrapSymbols(
+ StringMap<ExecutorAddr> &M) {
+ M[rt::SimpleExecutorDylibManagerInstanceName] = ExecutorAddr::fromPtr(this);
+ M[rt::SimpleExecutorDylibManagerOpenWrapperName] =
+ ExecutorAddr::fromPtr(&openWrapper);
+ M[rt::SimpleExecutorDylibManagerLookupWrapperName] =
+ ExecutorAddr::fromPtr(&lookupWrapper);
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorDylibManager::openWrapper(const char *ArgData, size_t ArgSize) {
+ return shared::
+ WrapperFunction<rt::SPSSimpleExecutorDylibManagerOpenSignature>::handle(
+ ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorDylibManager::open))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorDylibManager::lookupWrapper(const char *ArgData, size_t ArgSize) {
+ return shared::
+ WrapperFunction<rt::SPSSimpleExecutorDylibManagerLookupSignature>::handle(
+ ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorDylibManager::lookup))
+ .release();
+}
+
+} // namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
new file mode 100644
index 000000000000..232340c22a32
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -0,0 +1,261 @@
+//===- SimpleExecuorMemoryManagare.cpp - Simple executor-side memory mgmt -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h"
+#include "llvm/Support/FormatVariadic.h"
+
+#define DEBUG_TYPE "orc"
+
+namespace llvm {
+namespace orc {
+namespace rt_bootstrap {
+
+SimpleExecutorMemoryManager::~SimpleExecutorMemoryManager() {
+ assert(Allocations.empty() && "shutdown not called?");
+}
+
+Expected<ExecutorAddr> SimpleExecutorMemoryManager::allocate(uint64_t Size) {
+ std::error_code EC;
+ auto MB = sys::Memory::allocateMappedMemory(
+ Size, 0, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
+ if (EC)
+ return errorCodeToError(EC);
+ std::lock_guard<std::mutex> Lock(M);
+ assert(!Allocations.count(MB.base()) && "Duplicate allocation addr");
+ Allocations[MB.base()].Size = Size;
+ return ExecutorAddr::fromPtr(MB.base());
+}
+
+Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
+ ExecutorAddr Base(~0ULL);
+ std::vector<tpctypes::WrapperFunctionCall> DeallocationActions;
+ size_t SuccessfulFinalizationActions = 0;
+
+ if (FR.Segments.empty()) {
+ // NOTE: Finalizing nothing is currently a no-op. Should it be an error?
+ if (FR.Actions.empty())
+ return Error::success();
+ else
+ return make_error<StringError>("Finalization actions attached to empty "
+ "finalization request",
+ inconvertibleErrorCode());
+ }
+
+ for (auto &Seg : FR.Segments)
+ Base = std::min(Base, Seg.Addr);
+
+ for (auto &ActPair : FR.Actions)
+ if (ActPair.Deallocate.Func)
+ DeallocationActions.push_back(ActPair.Deallocate);
+
+ // Get the Allocation for this finalization.
+ size_t AllocSize = 0;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ auto I = Allocations.find(Base.toPtr<void *>());
+ if (I == Allocations.end())
+ return make_error<StringError>("Attempt to finalize unrecognized "
+ "allocation " +
+ formatv("{0:x}", Base.getValue()),
+ inconvertibleErrorCode());
+ AllocSize = I->second.Size;
+ I->second.DeallocationActions = std::move(DeallocationActions);
+ }
+ ExecutorAddr AllocEnd = Base + ExecutorAddrDiff(AllocSize);
+
+ // Bail-out function: this will run deallocation actions corresponding to any
+ // completed finalization actions, then deallocate memory.
+ auto BailOut = [&](Error Err) {
+ std::pair<void *, Allocation> AllocToDestroy;
+
+ // Get allocation to destory.
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ auto I = Allocations.find(Base.toPtr<void *>());
+
+ // Check for missing allocation (effective a double free).
+ if (I == Allocations.end())
+ return joinErrors(
+ std::move(Err),
+ make_error<StringError>("No allocation entry found "
+ "for " +
+ formatv("{0:x}", Base.getValue()),
+ inconvertibleErrorCode()));
+ AllocToDestroy = std::move(*I);
+ Allocations.erase(I);
+ }
+
+ // Run deallocation actions for all completed finalization actions.
+ while (SuccessfulFinalizationActions)
+ Err =
+ joinErrors(std::move(Err), FR.Actions[--SuccessfulFinalizationActions]
+ .Deallocate.runWithSPSRet());
+
+ // Deallocate memory.
+ sys::MemoryBlock MB(AllocToDestroy.first, AllocToDestroy.second.Size);
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+
+ return Err;
+ };
+
+ // Copy content and apply permissions.
+ for (auto &Seg : FR.Segments) {
+
+ // Check segment ranges.
+ if (LLVM_UNLIKELY(Seg.Size < Seg.Content.size()))
+ return BailOut(make_error<StringError>(
+ formatv("Segment {0:x} content size ({1:x} bytes) "
+ "exceeds segment size ({2:x} bytes)",
+ Seg.Addr.getValue(), Seg.Content.size(), Seg.Size),
+ inconvertibleErrorCode()));
+ ExecutorAddr SegEnd = Seg.Addr + ExecutorAddrDiff(Seg.Size);
+ if (LLVM_UNLIKELY(Seg.Addr < Base || SegEnd > AllocEnd))
+ return BailOut(make_error<StringError>(
+ formatv("Segment {0:x} -- {1:x} crosses boundary of "
+ "allocation {2:x} -- {3:x}",
+ Seg.Addr.getValue(), SegEnd.getValue(), Base.getValue(),
+ AllocEnd.getValue()),
+ inconvertibleErrorCode()));
+
+ char *Mem = Seg.Addr.toPtr<char *>();
+ memcpy(Mem, Seg.Content.data(), Seg.Content.size());
+ memset(Mem + Seg.Content.size(), 0, Seg.Size - Seg.Content.size());
+ assert(Seg.Size <= std::numeric_limits<size_t>::max());
+ if (auto EC = sys::Memory::protectMappedMemory(
+ {Mem, static_cast<size_t>(Seg.Size)},
+ tpctypes::fromWireProtectionFlags(Seg.Prot)))
+ return BailOut(errorCodeToError(EC));
+ if (Seg.Prot & tpctypes::WPF_Exec)
+ sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
+ }
+
+ // Run finalization actions.
+ for (auto &ActPair : FR.Actions) {
+ if (auto Err = ActPair.Finalize.runWithSPSRet())
+ return BailOut(std::move(Err));
+ ++SuccessfulFinalizationActions;
+ }
+
+ return Error::success();
+}
+
+Error SimpleExecutorMemoryManager::deallocate(
+ const std::vector<ExecutorAddr> &Bases) {
+ std::vector<std::pair<void *, Allocation>> AllocPairs;
+ AllocPairs.reserve(Bases.size());
+
+ // Get allocation to destory.
+ Error Err = Error::success();
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ for (auto &Base : Bases) {
+ auto I = Allocations.find(Base.toPtr<void *>());
+
+ // Check for missing allocation (effective a double free).
+ if (I != Allocations.end()) {
+ AllocPairs.push_back(std::move(*I));
+ Allocations.erase(I);
+ } else
+ Err = joinErrors(
+ std::move(Err),
+ make_error<StringError>("No allocation entry found "
+ "for " +
+ formatv("{0:x}", Base.getValue()),
+ inconvertibleErrorCode()));
+ }
+ }
+
+ while (!AllocPairs.empty()) {
+ auto &P = AllocPairs.back();
+ Err = joinErrors(std::move(Err), deallocateImpl(P.first, P.second));
+ AllocPairs.pop_back();
+ }
+
+ return Err;
+}
+
+Error SimpleExecutorMemoryManager::shutdown() {
+
+ AllocationsMap AM;
+ {
+ std::lock_guard<std::mutex> Lock(M);
+ AM = std::move(Allocations);
+ }
+
+ Error Err = Error::success();
+ for (auto &KV : AM)
+ Err = joinErrors(std::move(Err), deallocateImpl(KV.first, KV.second));
+ return Err;
+}
+
+void SimpleExecutorMemoryManager::addBootstrapSymbols(
+ StringMap<ExecutorAddr> &M) {
+ M[rt::SimpleExecutorMemoryManagerInstanceName] = ExecutorAddr::fromPtr(this);
+ M[rt::SimpleExecutorMemoryManagerReserveWrapperName] =
+ ExecutorAddr::fromPtr(&reserveWrapper);
+ M[rt::SimpleExecutorMemoryManagerFinalizeWrapperName] =
+ ExecutorAddr::fromPtr(&finalizeWrapper);
+ M[rt::SimpleExecutorMemoryManagerDeallocateWrapperName] =
+ ExecutorAddr::fromPtr(&deallocateWrapper);
+}
+
+Error SimpleExecutorMemoryManager::deallocateImpl(void *Base, Allocation &A) {
+ Error Err = Error::success();
+
+ while (!A.DeallocationActions.empty()) {
+ Err = joinErrors(std::move(Err),
+ A.DeallocationActions.back().runWithSPSRet());
+ A.DeallocationActions.pop_back();
+ }
+
+ sys::MemoryBlock MB(Base, A.Size);
+ if (auto EC = sys::Memory::releaseMappedMemory(MB))
+ Err = joinErrors(std::move(Err), errorCodeToError(EC));
+
+ return Err;
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::reserveWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSSimpleExecutorMemoryManagerReserveSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::allocate))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::finalizeWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSSimpleExecutorMemoryManagerFinalizeSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::finalize))
+ .release();
+}
+
+llvm::orc::shared::CWrapperFunctionResult
+SimpleExecutorMemoryManager::deallocateWrapper(const char *ArgData,
+ size_t ArgSize) {
+ return shared::WrapperFunction<
+ rt::SPSSimpleExecutorMemoryManagerDeallocateSignature>::
+ handle(ArgData, ArgSize,
+ shared::makeMethodWrapperHandler(
+ &SimpleExecutorMemoryManager::deallocate))
+ .release();
+}
+
+} // namespace rt_bootstrap
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
new file mode 100644
index 000000000000..b6b21bde1182
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
@@ -0,0 +1,293 @@
+//===------- SimpleEPCServer.cpp - EPC over simple abstract channel -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h"
+
+#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Process.h"
+
+#include "OrcRTBootstrap.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm::orc::shared;
+
+namespace llvm {
+namespace orc {
+
+ExecutorBootstrapService::~ExecutorBootstrapService() {}
+
+SimpleRemoteEPCServer::Dispatcher::~Dispatcher() {}
+
+#if LLVM_ENABLE_THREADS
+void SimpleRemoteEPCServer::ThreadDispatcher::dispatch(
+ unique_function<void()> Work) {
+ {
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ if (!Running)
+ return;
+ ++Outstanding;
+ }
+
+ std::thread([this, Work = std::move(Work)]() mutable {
+ Work();
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ --Outstanding;
+ OutstandingCV.notify_all();
+ }).detach();
+}
+
+void SimpleRemoteEPCServer::ThreadDispatcher::shutdown() {
+ std::unique_lock<std::mutex> Lock(DispatchMutex);
+ Running = false;
+ OutstandingCV.wait(Lock, [this]() { return Outstanding == 0; });
+}
+#endif
+
+StringMap<ExecutorAddr> SimpleRemoteEPCServer::defaultBootstrapSymbols() {
+ StringMap<ExecutorAddr> DBS;
+ rt_bootstrap::addTo(DBS);
+ return DBS;
+}
+
+Expected<SimpleRemoteEPCTransportClient::HandleMessageAction>
+SimpleRemoteEPCServer::handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
+ ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPCServer::handleMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ dbgs() << "Setup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+
+ using UT = std::underlying_type_t<SimpleRemoteEPCOpcode>;
+ if (static_cast<UT>(OpC) > static_cast<UT>(SimpleRemoteEPCOpcode::LastOpC))
+ return make_error<StringError>("Unexpected opcode",
+ inconvertibleErrorCode());
+
+ // TODO: Clean detach message?
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ return make_error<StringError>("Unexpected Setup opcode",
+ inconvertibleErrorCode());
+ case SimpleRemoteEPCOpcode::Hangup:
+ return SimpleRemoteEPCTransportClient::EndSession;
+ case SimpleRemoteEPCOpcode::Result:
+ if (auto Err = handleResult(SeqNo, TagAddr, std::move(ArgBytes)))
+ return std::move(Err);
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ handleCallWrapper(SeqNo, TagAddr, std::move(ArgBytes));
+ break;
+ }
+ return ContinueSession;
+}
+
+Error SimpleRemoteEPCServer::waitForDisconnect() {
+ std::unique_lock<std::mutex> Lock(ServerStateMutex);
+ ShutdownCV.wait(Lock, [this]() { return RunState == ServerShutDown; });
+ return std::move(ShutdownErr);
+}
+
+void SimpleRemoteEPCServer::handleDisconnect(Error Err) {
+ PendingJITDispatchResultsMap TmpPending;
+
+ {
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ std::swap(TmpPending, PendingJITDispatchResults);
+ RunState = ServerShuttingDown;
+ }
+
+ // Send out-of-band errors to any waiting threads.
+ for (auto &KV : TmpPending)
+ KV.second->set_value(
+ shared::WrapperFunctionResult::createOutOfBandError("disconnecting"));
+
+ // Wait for dispatcher to clear.
+ D->shutdown();
+
+ // Shut down services.
+ while (!Services.empty()) {
+ ShutdownErr =
+ joinErrors(std::move(ShutdownErr), Services.back()->shutdown());
+ Services.pop_back();
+ }
+
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ ShutdownErr = joinErrors(std::move(ShutdownErr), std::move(Err));
+ RunState = ServerShutDown;
+ ShutdownCV.notify_all();
+}
+
+Error SimpleRemoteEPCServer::sendMessage(SimpleRemoteEPCOpcode OpC,
+ uint64_t SeqNo, ExecutorAddr TagAddr,
+ ArrayRef<char> ArgBytes) {
+
+ LLVM_DEBUG({
+ dbgs() << "SimpleRemoteEPCServer::sendMessage: opc = ";
+ switch (OpC) {
+ case SimpleRemoteEPCOpcode::Setup:
+ dbgs() << "Setup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ break;
+ case SimpleRemoteEPCOpcode::Hangup:
+ dbgs() << "Hangup";
+ assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ break;
+ case SimpleRemoteEPCOpcode::Result:
+ dbgs() << "Result";
+ assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ break;
+ case SimpleRemoteEPCOpcode::CallWrapper:
+ dbgs() << "CallWrapper";
+ break;
+ }
+ dbgs() << ", seqno = " << SeqNo
+ << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ << ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
+ << " bytes\n";
+ });
+ auto Err = T->sendMessage(OpC, SeqNo, TagAddr, ArgBytes);
+ LLVM_DEBUG({
+ if (Err)
+ dbgs() << " \\--> SimpleRemoteEPC::sendMessage failed\n";
+ });
+ return Err;
+}
+
+Error SimpleRemoteEPCServer::sendSetupMessage(
+ StringMap<ExecutorAddr> BootstrapSymbols) {
+
+ using namespace SimpleRemoteEPCDefaultBootstrapSymbolNames;
+
+ std::vector<char> SetupPacket;
+ SimpleRemoteEPCExecutorInfo EI;
+ EI.TargetTriple = sys::getProcessTriple();
+ if (auto PageSize = sys::Process::getPageSize())
+ EI.PageSize = *PageSize;
+ else
+ return PageSize.takeError();
+ EI.BootstrapSymbols = std::move(BootstrapSymbols);
+
+ assert(!EI.BootstrapSymbols.count(ExecutorSessionObjectName) &&
+ "Dispatch context name should not be set");
+ assert(!EI.BootstrapSymbols.count(DispatchFnName) &&
+ "Dispatch function name should not be set");
+ EI.BootstrapSymbols[ExecutorSessionObjectName] = ExecutorAddr::fromPtr(this);
+ EI.BootstrapSymbols[DispatchFnName] = ExecutorAddr::fromPtr(jitDispatchEntry);
+
+ using SPSSerialize =
+ shared::SPSArgList<shared::SPSSimpleRemoteEPCExecutorInfo>;
+ auto SetupPacketBytes =
+ shared::WrapperFunctionResult::allocate(SPSSerialize::size(EI));
+ shared::SPSOutputBuffer OB(SetupPacketBytes.data(), SetupPacketBytes.size());
+ if (!SPSSerialize::serialize(OB, EI))
+ return make_error<StringError>("Could not send setup packet",
+ inconvertibleErrorCode());
+
+ return sendMessage(SimpleRemoteEPCOpcode::Setup, 0, ExecutorAddr(),
+ {SetupPacketBytes.data(), SetupPacketBytes.size()});
+}
+
+Error SimpleRemoteEPCServer::handleResult(
+ uint64_t SeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ std::promise<shared::WrapperFunctionResult> *P = nullptr;
+ {
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ auto I = PendingJITDispatchResults.find(SeqNo);
+ if (I == PendingJITDispatchResults.end())
+ return make_error<StringError>("No call for sequence number " +
+ Twine(SeqNo),
+ inconvertibleErrorCode());
+ P = I->second;
+ PendingJITDispatchResults.erase(I);
+ releaseSeqNo(SeqNo);
+ }
+ auto R = shared::WrapperFunctionResult::allocate(ArgBytes.size());
+ memcpy(R.data(), ArgBytes.data(), ArgBytes.size());
+ P->set_value(std::move(R));
+ return Error::success();
+}
+
+void SimpleRemoteEPCServer::handleCallWrapper(
+ uint64_t RemoteSeqNo, ExecutorAddr TagAddr,
+ SimpleRemoteEPCArgBytesVector ArgBytes) {
+ D->dispatch([this, RemoteSeqNo, TagAddr, ArgBytes = std::move(ArgBytes)]() {
+ using WrapperFnTy =
+ shared::CWrapperFunctionResult (*)(const char *, size_t);
+ auto *Fn = TagAddr.toPtr<WrapperFnTy>();
+ shared::WrapperFunctionResult ResultBytes(
+ Fn(ArgBytes.data(), ArgBytes.size()));
+ if (auto Err = sendMessage(SimpleRemoteEPCOpcode::Result, RemoteSeqNo,
+ ExecutorAddr(),
+ {ResultBytes.data(), ResultBytes.size()}))
+ ReportError(std::move(Err));
+ });
+}
+
+shared::WrapperFunctionResult
+SimpleRemoteEPCServer::doJITDispatch(const void *FnTag, const char *ArgData,
+ size_t ArgSize) {
+ uint64_t SeqNo;
+ std::promise<shared::WrapperFunctionResult> ResultP;
+ auto ResultF = ResultP.get_future();
+ {
+ std::lock_guard<std::mutex> Lock(ServerStateMutex);
+ if (RunState != ServerRunning)
+ return shared::WrapperFunctionResult::createOutOfBandError(
+ "jit_dispatch not available (EPC server shut down)");
+
+ SeqNo = getNextSeqNo();
+ assert(!PendingJITDispatchResults.count(SeqNo) && "SeqNo already in use");
+ PendingJITDispatchResults[SeqNo] = &ResultP;
+ }
+
+ if (auto Err = sendMessage(SimpleRemoteEPCOpcode::CallWrapper, SeqNo,
+ ExecutorAddr::fromPtr(FnTag), {ArgData, ArgSize}))
+ ReportError(std::move(Err));
+
+ return ResultF.get();
+}
+
+shared::CWrapperFunctionResult
+SimpleRemoteEPCServer::jitDispatchEntry(void *DispatchCtx, const void *FnTag,
+ const char *ArgData, size_t ArgSize) {
+ return reinterpret_cast<SimpleRemoteEPCServer *>(DispatchCtx)
+ ->doJITDispatch(FnTag, ArgData, ArgSize)
+ .release();
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp
new file mode 100644
index 000000000000..111c84ec87ed
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp
@@ -0,0 +1,48 @@
+//===------------ TaskDispatch.cpp - ORC task dispatch utils --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
+
+namespace llvm {
+namespace orc {
+
+char Task::ID = 0;
+char GenericNamedTask::ID = 0;
+const char *GenericNamedTask::DefaultDescription = "Generic Task";
+
+void Task::anchor() {}
+TaskDispatcher::~TaskDispatcher() {}
+
+void InPlaceTaskDispatcher::dispatch(std::unique_ptr<Task> T) { T->run(); }
+
+void InPlaceTaskDispatcher::shutdown() {}
+
+#if LLVM_ENABLE_THREADS
+void DynamicThreadPoolTaskDispatcher::dispatch(std::unique_ptr<Task> T) {
+ {
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ ++Outstanding;
+ }
+
+ std::thread([this, T = std::move(T)]() mutable {
+ T->run();
+ std::lock_guard<std::mutex> Lock(DispatchMutex);
+ --Outstanding;
+ OutstandingCV.notify_all();
+ }).detach();
+}
+
+void DynamicThreadPoolTaskDispatcher::shutdown() {
+ std::unique_lock<std::mutex> Lock(DispatchMutex);
+ Running = false;
+ OutstandingCV.wait(Lock, [this]() { return Outstanding == 0; });
+}
+#endif
+
+} // namespace orc
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
index 0f6f9efe1102..210fbf6e43e3 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -84,7 +84,7 @@ llvm::JITSymbolFlags::fromObjectSymbol(const object::SymbolRef &Symbol) {
if (!SymbolType)
return SymbolType.takeError();
- if (*SymbolType & object::SymbolRef::ST_Function)
+ if (*SymbolType == object::SymbolRef::ST_Function)
Flags |= JITSymbolFlags::Callable;
return Flags;
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index b6ccd02405c1..9c8d402364c6 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -67,7 +67,9 @@ static void __deregister_frame(void *p) {
}
#endif
-#ifdef __APPLE__
+/* libgcc and libunwind __register_frame behave differently. We use the presence
+ * of __unw_add_dynamic_fde to detect libunwind. */
+#if defined(HAVE_UNW_ADD_DYNAMIC_FDE) || defined(__APPLE__)
static const char *processFDE(const char *Entry, bool isDeregister) {
const char *P = Entry;
@@ -284,7 +286,7 @@ void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
uint64_t Addr = getSymbolAddress(Name);
if (!Addr && AbortOnFailure)
- report_fatal_error("Program used external function '" + Name +
+ report_fatal_error(Twine("Program used external function '") + Name +
"' which could not be resolved!");
return (void*)Addr;
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 687fd839805f..f16c6bdbfa4f 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -520,6 +520,13 @@ static bool isZeroInit(const SectionRef Section) {
SectionType == MachO::S_GB_ZEROFILL;
}
+static bool isTLS(const SectionRef Section) {
+ const ObjectFile *Obj = Section.getObject();
+ if (isa<object::ELFObjectFileBase>(Obj))
+ return ELFSectionRef(Section).getFlags() & ELF::SHF_TLS;
+ return false;
+}
+
// Compute an upper bound of the memory size that is required to load all
// sections
Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
@@ -549,6 +556,7 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
bool IsCode = Section.isText();
bool IsReadOnly = isReadOnlyData(Section);
+ bool IsTLS = isTLS(Section);
Expected<StringRef> NameOrErr = Section.getName();
if (!NameOrErr)
@@ -582,7 +590,7 @@ Error RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
} else if (IsReadOnly) {
RODataAlign = std::max(RODataAlign, Alignment);
ROSectionSizes.push_back(SectionSize);
- } else {
+ } else if (!IsTLS) {
RWDataAlign = std::max(RWDataAlign, Alignment);
RWSectionSizes.push_back(SectionSize);
}
@@ -672,7 +680,7 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj,
Expected<section_iterator> RelSecOrErr = SI->getRelocatedSection();
if (!RelSecOrErr)
- report_fatal_error(toString(RelSecOrErr.takeError()));
+ report_fatal_error(Twine(toString(RelSecOrErr.takeError())));
section_iterator RelSecI = *RelSecOrErr;
if (!(RelSecI == Section))
@@ -800,6 +808,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
bool IsVirtual = Section.isVirtual();
bool IsZeroInit = isZeroInit(Section);
bool IsReadOnly = isReadOnlyData(Section);
+ bool IsTLS = isTLS(Section);
uint64_t DataSize = Section.getSize();
// An alignment of 0 (at least with ELF) is identical to an alignment of 1,
@@ -823,6 +832,7 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
uintptr_t Allocate;
unsigned SectionID = Sections.size();
uint8_t *Addr;
+ uint64_t LoadAddress = 0;
const char *pData = nullptr;
// If this section contains any bits (i.e. isn't a virtual or bss section),
@@ -851,10 +861,17 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
Allocate = DataSize + PaddingSize + StubBufSize;
if (!Allocate)
Allocate = 1;
- Addr = IsCode ? MemMgr.allocateCodeSection(Allocate, Alignment, SectionID,
- Name)
- : MemMgr.allocateDataSection(Allocate, Alignment, SectionID,
- Name, IsReadOnly);
+ if (IsTLS) {
+ auto TLSSection =
+ MemMgr.allocateTLSSection(Allocate, Alignment, SectionID, Name);
+ Addr = TLSSection.InitializationImage;
+ LoadAddress = TLSSection.Offset;
+ } else if (IsCode) {
+ Addr = MemMgr.allocateCodeSection(Allocate, Alignment, SectionID, Name);
+ } else {
+ Addr = MemMgr.allocateDataSection(Allocate, Alignment, SectionID, Name,
+ IsReadOnly);
+ }
if (!Addr)
report_fatal_error("Unable to allocate section memory!");
@@ -897,6 +914,10 @@ RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
Sections.push_back(
SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData));
+ // The load address of a TLS section is not equal to the address of its
+ // initialization image
+ if (IsTLS)
+ Sections.back().setLoadAddress(LoadAddress);
// Debug info sections are linked as if their load address was zero
if (!IsRequired)
Sections.back().setLoadAddress(0);
@@ -1118,7 +1139,7 @@ void RuntimeDyldImpl::applyExternalSymbolRelocations(
// FIXME: Implement error handling that doesn't kill the host program!
if (!Addr && !Resolver.allowsZeroSymbols())
- report_fatal_error("Program used external function '" + Name +
+ report_fatal_error(Twine("Program used external function '") + Name +
"' which could not be resolved!");
// If Resolver returned UINT64_MAX, the client wants to handle this symbol
@@ -1261,6 +1282,14 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
return 0;
}
+RuntimeDyld::MemoryManager::TLSSection
+RuntimeDyld::MemoryManager::allocateTLSSection(uintptr_t Size,
+ unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) {
+ report_fatal_error("allocation of TLS not implemented");
+}
+
void RuntimeDyld::MemoryManager::anchor() {}
void JITSymbolResolver::anchor() {}
void LegacyJITSymbolResolver::anchor() {}
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index a3005f786cf9..2b88c481dab0 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -232,6 +232,26 @@ private:
EvalResult(("Cannot decode unknown symbol '" + Symbol + "'").str()),
"");
+ // if there is an offset number expr
+ int64_t Offset = 0;
+ BinOpToken BinOp;
+ std::tie(BinOp, RemainingExpr) = parseBinOpToken(RemainingExpr);
+ switch (BinOp) {
+ case BinOpToken::Add: {
+ EvalResult Number;
+ std::tie(Number, RemainingExpr) = evalNumberExpr(RemainingExpr);
+ Offset = Number.getValue();
+ break;
+ }
+ case BinOpToken::Invalid:
+ break;
+ default:
+ return std::make_pair(
+ unexpectedToken(RemainingExpr, RemainingExpr,
+ "expected '+' for offset or ',' if no offset"),
+ "");
+ }
+
if (!RemainingExpr.startswith(","))
return std::make_pair(
unexpectedToken(RemainingExpr, RemainingExpr, "expected ','"), "");
@@ -249,7 +269,7 @@ private:
MCInst Inst;
uint64_t Size;
- if (!decodeInst(Symbol, Inst, Size))
+ if (!decodeInst(Symbol, Inst, Size, Offset))
return std::make_pair(
EvalResult(("Couldn't decode instruction at '" + Symbol + "'").str()),
"");
@@ -307,7 +327,7 @@ private:
MCInst Inst;
uint64_t InstSize;
- if (!decodeInst(Symbol, Inst, InstSize))
+ if (!decodeInst(Symbol, Inst, InstSize, 0))
return std::make_pair(
EvalResult(("Couldn't decode instruction at '" + Symbol + "'").str()),
"");
@@ -664,10 +684,12 @@ private:
return evalComplexExpr(std::make_pair(ThisResult, RemainingExpr), PCtx);
}
- bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const {
+ bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size,
+ int64_t Offset) const {
MCDisassembler *Dis = Checker.Disassembler;
StringRef SymbolMem = Checker.getSymbolContent(Symbol);
- ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin(), SymbolMem.size());
+ ArrayRef<uint8_t> SymbolBytes(SymbolMem.bytes_begin() + Offset,
+ SymbolMem.size() - Offset);
MCDisassembler::DecodeStatus S =
Dis->getInstruction(Inst, Size, SymbolBytes, 0, nulls());
@@ -675,7 +697,7 @@ private:
return (S == MCDisassembler::Success);
}
};
-}
+} // namespace llvm
RuntimeDyldCheckerImpl::RuntimeDyldCheckerImpl(
IsSymbolValidFunction IsSymbolValid, GetSymbolInfoFunction GetSymbolInfo,
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index efe0b9cd61cd..1b7fdb588275 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -345,6 +345,32 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = GOTOffset;
break;
}
+ case ELF::R_X86_64_DTPMOD64: {
+ // We only have one DSO, so the module id is always 1.
+ support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) = 1;
+ break;
+ }
+ case ELF::R_X86_64_DTPOFF64:
+ case ELF::R_X86_64_TPOFF64: {
+ // DTPOFF64 should resolve to the offset in the TLS block, TPOFF64 to the
+ // offset in the *initial* TLS block. Since we are statically linking, all
+ // TLS blocks already exist in the initial block, so resolve both
+ // relocations equally.
+ support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) =
+ Value + Addend;
+ break;
+ }
+ case ELF::R_X86_64_DTPOFF32:
+ case ELF::R_X86_64_TPOFF32: {
+ // As for the (D)TPOFF64 relocations above, both DTPOFF32 and TPOFF32 can
+ // be resolved equally.
+ int64_t RealValue = Value + Addend;
+ assert(RealValue >= INT32_MIN && RealValue <= INT32_MAX);
+ int32_t TruncValue = RealValue;
+ support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+ TruncValue;
+ break;
+ }
}
}
@@ -674,7 +700,7 @@ Error RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
Expected<section_iterator> RelSecOrErr = si->getRelocatedSection();
if (!RelSecOrErr)
- report_fatal_error(toString(RelSecOrErr.takeError()));
+ report_fatal_error(Twine(toString(RelSecOrErr.takeError())));
section_iterator RelSecI = *RelSecOrErr;
if (RelSecI == Obj.section_end())
@@ -1210,8 +1236,7 @@ RuntimeDyldELF::processRelocationRef(
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
SymType = *SymTypeOrErr;
}
@@ -1231,8 +1256,7 @@ RuntimeDyldELF::processRelocationRef(
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SectionOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
section_iterator si = *SectionOrErr;
if (si == Obj.section_end())
@@ -1813,11 +1837,14 @@ RuntimeDyldELF::processRelocationRef(
addRelocationForSymbol(RE, Value.SymbolName);
else
addRelocationForSection(RE, Value.SectionID);
- } else if (RelType == ELF::R_X86_64_GOTPC64) {
+ } else if (RelType == ELF::R_X86_64_GOTPC32) {
// Materialize the address of the base of the GOT relative to the PC.
// This doesn't create a GOT entry, but it does mean we need a GOT
// section.
(void)allocateGOTEntries(0);
+ resolveGOTOffsetRelocation(SectionID, Offset, Addend, ELF::R_X86_64_PC32);
+ } else if (RelType == ELF::R_X86_64_GOTPC64) {
+ (void)allocateGOTEntries(0);
resolveGOTOffsetRelocation(SectionID, Offset, Addend, ELF::R_X86_64_PC64);
} else if (RelType == ELF::R_X86_64_GOTOFF64) {
// GOTOFF relocations ultimately require a section difference relocation.
@@ -1829,6 +1856,15 @@ RuntimeDyldELF::processRelocationRef(
} else if (RelType == ELF::R_X86_64_PC64) {
Value.Addend += support::ulittle64_t::ref(computePlaceholderAddress(SectionID, Offset));
processSimpleRelocation(SectionID, Offset, RelType, Value);
+ } else if (RelType == ELF::R_X86_64_GOTTPOFF) {
+ processX86_64GOTTPOFFRelocation(SectionID, Offset, Value, Addend);
+ } else if (RelType == ELF::R_X86_64_TLSGD ||
+ RelType == ELF::R_X86_64_TLSLD) {
+ // The next relocation must be the relocation for __tls_get_addr.
+ ++RelI;
+ auto &GetAddrRelocation = *RelI;
+ processX86_64TLSRelocation(SectionID, Offset, RelType, Value, Addend,
+ GetAddrRelocation);
} else {
processSimpleRelocation(SectionID, Offset, RelType, Value);
}
@@ -1841,6 +1877,330 @@ RuntimeDyldELF::processRelocationRef(
return ++RelI;
}
+void RuntimeDyldELF::processX86_64GOTTPOFFRelocation(unsigned SectionID,
+ uint64_t Offset,
+ RelocationValueRef Value,
+ int64_t Addend) {
+ // Use the approach from "x86-64 Linker Optimizations" from the TLS spec
+ // to replace the GOTTPOFF relocation with a TPOFF relocation. The spec
+ // only mentions one optimization even though there are two different
+ // code sequences for the Initial Exec TLS Model. We match the code to
+ // find out which one was used.
+
+ // A possible TLS code sequence and its replacement
+ struct CodeSequence {
+ // The expected code sequence
+ ArrayRef<uint8_t> ExpectedCodeSequence;
+ // The negative offset of the GOTTPOFF relocation to the beginning of
+ // the sequence
+ uint64_t TLSSequenceOffset;
+ // The new code sequence
+ ArrayRef<uint8_t> NewCodeSequence;
+ // The offset of the new TPOFF relocation
+ uint64_t TpoffRelocationOffset;
+ };
+
+ std::array<CodeSequence, 2> CodeSequences;
+
+ // Initial Exec Code Model Sequence
+ {
+ static const std::initializer_list<uint8_t> ExpectedCodeSequenceList = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00,
+ 0x00, // mov %fs:0, %rax
+ 0x48, 0x03, 0x05, 0x00, 0x00, 0x00, 0x00 // add x@gotpoff(%rip),
+ // %rax
+ };
+ CodeSequences[0].ExpectedCodeSequence =
+ ArrayRef<uint8_t>(ExpectedCodeSequenceList);
+ CodeSequences[0].TLSSequenceOffset = 12;
+
+ static const std::initializer_list<uint8_t> NewCodeSequenceList = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0, %rax
+ 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff(%rax), %rax
+ };
+ CodeSequences[0].NewCodeSequence = ArrayRef<uint8_t>(NewCodeSequenceList);
+ CodeSequences[0].TpoffRelocationOffset = 12;
+ }
+
+ // Initial Exec Code Model Sequence, II
+ {
+ static const std::initializer_list<uint8_t> ExpectedCodeSequenceList = {
+ 0x48, 0x8b, 0x05, 0x00, 0x00, 0x00, 0x00, // mov x@gotpoff(%rip), %rax
+ 0x64, 0x48, 0x8b, 0x00, 0x00, 0x00, 0x00 // mov %fs:(%rax), %rax
+ };
+ CodeSequences[1].ExpectedCodeSequence =
+ ArrayRef<uint8_t>(ExpectedCodeSequenceList);
+ CodeSequences[1].TLSSequenceOffset = 3;
+
+ static const std::initializer_list<uint8_t> NewCodeSequenceList = {
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, // 6 byte nop
+ 0x64, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:x@tpoff, %rax
+ };
+ CodeSequences[1].NewCodeSequence = ArrayRef<uint8_t>(NewCodeSequenceList);
+ CodeSequences[1].TpoffRelocationOffset = 10;
+ }
+
+ bool Resolved = false;
+ auto &Section = Sections[SectionID];
+ for (const auto &C : CodeSequences) {
+ assert(C.ExpectedCodeSequence.size() == C.NewCodeSequence.size() &&
+ "Old and new code sequences must have the same size");
+
+ if (Offset < C.TLSSequenceOffset ||
+ (Offset - C.TLSSequenceOffset + C.NewCodeSequence.size()) >
+ Section.getSize()) {
+ // This can't be a matching sequence as it doesn't fit in the current
+ // section
+ continue;
+ }
+
+ auto TLSSequenceStartOffset = Offset - C.TLSSequenceOffset;
+ auto *TLSSequence = Section.getAddressWithOffset(TLSSequenceStartOffset);
+ if (ArrayRef<uint8_t>(TLSSequence, C.ExpectedCodeSequence.size()) !=
+ C.ExpectedCodeSequence) {
+ continue;
+ }
+
+ memcpy(TLSSequence, C.NewCodeSequence.data(), C.NewCodeSequence.size());
+
+ // The original GOTTPOFF relocation has an addend as it is PC relative,
+ // so it needs to be corrected. The TPOFF32 relocation is used as an
+ // absolute value (which is an offset from %fs:0), so remove the addend
+ // again.
+ RelocationEntry RE(SectionID,
+ TLSSequenceStartOffset + C.TpoffRelocationOffset,
+ ELF::R_X86_64_TPOFF32, Value.Addend - Addend);
+
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+
+ Resolved = true;
+ break;
+ }
+
+ if (!Resolved) {
+ // The GOTTPOFF relocation was not used in one of the sequences
+ // described in the spec, so we can't optimize it to a TPOFF
+ // relocation.
+ uint64_t GOTOffset = allocateGOTEntries(1);
+ resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend,
+ ELF::R_X86_64_PC32);
+ RelocationEntry RE =
+ computeGOTOffsetRE(GOTOffset, Value.Offset, ELF::R_X86_64_TPOFF64);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ }
+}
+
+void RuntimeDyldELF::processX86_64TLSRelocation(
+ unsigned SectionID, uint64_t Offset, uint64_t RelType,
+ RelocationValueRef Value, int64_t Addend,
+ const RelocationRef &GetAddrRelocation) {
+ // Since we are statically linking and have no additional DSOs, we can resolve
+ // the relocation directly without using __tls_get_addr.
+ // Use the approach from "x86-64 Linker Optimizations" from the TLS spec
+ // to replace it with the Local Exec relocation variant.
+
+ // Find out whether the code was compiled with the large or small memory
+ // model. For this we look at the next relocation which is the relocation
+ // for the __tls_get_addr function. If it's a 32 bit relocation, it's the
+ // small code model, with a 64 bit relocation it's the large code model.
+ bool IsSmallCodeModel;
+ // Is the relocation for the __tls_get_addr a PC-relative GOT relocation?
+ bool IsGOTPCRel = false;
+
+ switch (GetAddrRelocation.getType()) {
+ case ELF::R_X86_64_GOTPCREL:
+ case ELF::R_X86_64_REX_GOTPCRELX:
+ case ELF::R_X86_64_GOTPCRELX:
+ IsGOTPCRel = true;
+ LLVM_FALLTHROUGH;
+ case ELF::R_X86_64_PLT32:
+ IsSmallCodeModel = true;
+ break;
+ case ELF::R_X86_64_PLTOFF64:
+ IsSmallCodeModel = false;
+ break;
+ default:
+ report_fatal_error(
+ "invalid TLS relocations for General/Local Dynamic TLS Model: "
+ "expected PLT or GOT relocation for __tls_get_addr function");
+ }
+
+ // The negative offset to the start of the TLS code sequence relative to
+ // the offset of the TLSGD/TLSLD relocation
+ uint64_t TLSSequenceOffset;
+ // The expected start of the code sequence
+ ArrayRef<uint8_t> ExpectedCodeSequence;
+ // The new TLS code sequence that will replace the existing code
+ ArrayRef<uint8_t> NewCodeSequence;
+
+ if (RelType == ELF::R_X86_64_TLSGD) {
+ // The offset of the new TPOFF32 relocation (offset starting from the
+ // beginning of the whole TLS sequence)
+ uint64_t TpoffRelocOffset;
+
+ if (IsSmallCodeModel) {
+ if (!IsGOTPCRel) {
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x66, // data16 (no-op prefix)
+ 0x48, 0x8d, 0x3d, 0x00, 0x00,
+ 0x00, 0x00, // lea <disp32>(%rip), %rdi
+ 0x66, 0x66, // two data16 prefixes
+ 0x48, // rex64 (no-op prefix)
+ 0xe8, 0x00, 0x00, 0x00, 0x00 // call __tls_get_addr@plt
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 4;
+ } else {
+ // This code sequence is not described in the TLS spec but gcc
+ // generates it sometimes.
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x66, // data16 (no-op prefix)
+ 0x48, 0x8d, 0x3d, 0x00, 0x00,
+ 0x00, 0x00, // lea <disp32>(%rip), %rdi
+ 0x66, // data16 prefix (no-op prefix)
+ 0x48, // rex64 (no-op prefix)
+ 0xff, 0x15, 0x00, 0x00, 0x00,
+ 0x00 // call *__tls_get_addr@gotpcrel(%rip)
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 4;
+ }
+
+ // The replacement code for the small code model. It's the same for
+ // both sequences.
+ static const std::initializer_list<uint8_t> SmallSequence = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00,
+ 0x00, // mov %fs:0, %rax
+ 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00 // lea x@tpoff(%rax),
+ // %rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(SmallSequence);
+ TpoffRelocOffset = 12;
+ } else {
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00, // lea <disp32>(%rip),
+ // %rdi
+ 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, // movabs $__tls_get_addr@pltoff, %rax
+ 0x48, 0x01, 0xd8, // add %rbx, %rax
+ 0xff, 0xd0 // call *%rax
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement code for the large code model
+ static const std::initializer_list<uint8_t> LargeSequence = {
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00,
+ 0x00, // mov %fs:0, %rax
+ 0x48, 0x8d, 0x80, 0x00, 0x00, 0x00, 0x00, // lea x@tpoff(%rax),
+ // %rax
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 // nopw 0x0(%rax,%rax,1)
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(LargeSequence);
+ TpoffRelocOffset = 12;
+ }
+
+ // The TLSGD/TLSLD relocations are PC-relative, so they have an addend.
+ // The new TPOFF32 relocations is used as an absolute offset from
+ // %fs:0, so remove the TLSGD/TLSLD addend again.
+ RelocationEntry RE(SectionID, Offset - TLSSequenceOffset + TpoffRelocOffset,
+ ELF::R_X86_64_TPOFF32, Value.Addend - Addend);
+ if (Value.SymbolName)
+ addRelocationForSymbol(RE, Value.SymbolName);
+ else
+ addRelocationForSection(RE, Value.SectionID);
+ } else if (RelType == ELF::R_X86_64_TLSLD) {
+ if (IsSmallCodeModel) {
+ if (!IsGOTPCRel) {
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, // leaq <disp32>(%rip), %rdi
+ 0x00, 0xe8, 0x00, 0x00, 0x00, 0x00 // call __tls_get_addr@plt
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement code for the small code model
+ static const std::initializer_list<uint8_t> SmallSequence = {
+ 0x66, 0x66, 0x66, // three data16 prefixes (no-op)
+ 0x64, 0x48, 0x8b, 0x04, 0x25,
+ 0x00, 0x00, 0x00, 0x00 // mov %fs:0, %rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(SmallSequence);
+ } else {
+ // This code sequence is not described in the TLS spec but gcc
+ // generates it sometimes.
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00,
+ 0x00, 0x00, 0x00, // leaq <disp32>(%rip), %rdi
+ 0xff, 0x15, 0x00, 0x00,
+ 0x00, 0x00 // call
+ // *__tls_get_addr@gotpcrel(%rip)
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement is code is just like above but it needs to be
+ // one byte longer.
+ static const std::initializer_list<uint8_t> SmallSequence = {
+ 0x0f, 0x1f, 0x40, 0x00, // 4 byte nop
+ 0x64, 0x48, 0x8b, 0x04, 0x25,
+ 0x00, 0x00, 0x00, 0x00 // mov %fs:0, %rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(SmallSequence);
+ }
+ } else {
+ // This is the same sequence as for the TLSGD sequence with the large
+ // memory model above
+ static const std::initializer_list<uint8_t> CodeSequence = {
+ 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00, // lea <disp32>(%rip),
+ // %rdi
+ 0x48, 0xb8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x48, // movabs $__tls_get_addr@pltoff, %rax
+ 0x01, 0xd8, // add %rbx, %rax
+ 0xff, 0xd0 // call *%rax
+ };
+ ExpectedCodeSequence = ArrayRef<uint8_t>(CodeSequence);
+ TLSSequenceOffset = 3;
+
+ // The replacement code for the large code model
+ static const std::initializer_list<uint8_t> LargeSequence = {
+ 0x66, 0x66, 0x66, // three data16 prefixes (no-op)
+ 0x66, 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00,
+ 0x00, // 10 byte nop
+ 0x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00 // mov %fs:0,%rax
+ };
+ NewCodeSequence = ArrayRef<uint8_t>(LargeSequence);
+ }
+ } else {
+ llvm_unreachable("both TLS relocations handled above");
+ }
+
+ assert(ExpectedCodeSequence.size() == NewCodeSequence.size() &&
+ "Old and new code sequences must have the same size");
+
+ auto &Section = Sections[SectionID];
+ if (Offset < TLSSequenceOffset ||
+ (Offset - TLSSequenceOffset + NewCodeSequence.size()) >
+ Section.getSize()) {
+ report_fatal_error("unexpected end of section in TLS sequence");
+ }
+
+ auto *TLSSequence = Section.getAddressWithOffset(Offset - TLSSequenceOffset);
+ if (ArrayRef<uint8_t>(TLSSequence, ExpectedCodeSequence.size()) !=
+ ExpectedCodeSequence) {
+ report_fatal_error(
+ "invalid TLS sequence for Global/Local Dynamic TLS Model");
+ }
+
+ memcpy(TLSSequence, NewCodeSequence.data(), NewCodeSequence.size());
+}
+
size_t RuntimeDyldELF::getGOTEntrySize() {
// We don't use the GOT in all of these cases, but it's essentially free
// to put them all here.
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 31892b7466e6..1251036f4caa 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -161,6 +161,18 @@ private:
bool relocationNeedsGot(const RelocationRef &R) const override;
bool relocationNeedsStub(const RelocationRef &R) const override;
+ // Process a GOTTPOFF TLS relocation for x86-64
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void processX86_64GOTTPOFFRelocation(unsigned SectionID, uint64_t Offset,
+ RelocationValueRef Value,
+ int64_t Addend);
+ // Process a TLSLD/TLSGD relocation for x86-64
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ void processX86_64TLSRelocation(unsigned SectionID, uint64_t Offset,
+ uint64_t RelType, RelocationValueRef Value,
+ int64_t Addend,
+ const RelocationRef &GetAddrRelocation);
+
public:
RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
JITSymbolResolver &Resolver);
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 721f2b14829a..dd66ff7ecf70 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -29,8 +29,7 @@ static bool isThumbFunc(object::symbol_iterator Symbol,
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SymTypeOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
if (*SymTypeOrErr != object::SymbolRef::ST_Function)
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
index 14fb36f070f8..f03acb41d670 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFMIPS_H
#include "../RuntimeDyldELF.h"
-#include <string>
#define DEBUG_TYPE "dyld"
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index a76958a9e2c2..fcf723aaea28 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOARM_H
#include "../RuntimeDyldMachO.h"
-#include <string>
#define DEBUG_TYPE "dyld"
@@ -141,7 +140,7 @@ public:
return ++RelI;
}
- // Sanity check relocation type.
+ // Validate the relocation type.
switch (RelType) {
UNIMPLEMENTED_RELOC(MachO::ARM_RELOC_PAIR);
UNIMPLEMENTED_RELOC(MachO::ARM_RELOC_SECTDIFF);
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 523deb29b723..d029d3266f79 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOI386_H
#include "../RuntimeDyldMachO.h"
-#include <string>
#define DEBUG_TYPE "dyld"
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index 28febbdb948c..a4d91cf338cb 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -10,7 +10,6 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDMACHOX86_64_H
#include "../RuntimeDyldMachO.h"
-#include <string>
#define DEBUG_TYPE "dyld"
diff --git a/llvm/lib/ExecutionEngine/TargetSelect.cpp b/llvm/lib/ExecutionEngine/TargetSelect.cpp
index 28ea04be1a5e..c67a1a7661d6 100644
--- a/llvm/lib/ExecutionEngine/TargetSelect.cpp
+++ b/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -17,8 +17,8 @@
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index 04476d999336..c962231cbdc1 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -954,8 +954,8 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
// Check to see if this is a fixed string, or if it has regex pieces.
if (!MatchFullLinesHere &&
- (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
- PatternStr.find("[[") == StringRef::npos))) {
+ (PatternStr.size() < 2 ||
+ (!PatternStr.contains("{{") && !PatternStr.contains("[[")))) {
FixedStr = PatternStr;
return false;
}
@@ -1034,7 +1034,8 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
bool IsLegacyLineExpr = false;
StringRef DefName;
StringRef SubstStr;
- std::string MatchRegexp;
+ StringRef MatchRegexp;
+ std::string WildcardRegexp;
size_t SubstInsertIdx = RegExStr.size();
// Parse string variable or legacy @LINE expression.
@@ -1078,7 +1079,7 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
return true;
}
DefName = Name;
- MatchRegexp = MatchStr.str();
+ MatchRegexp = MatchStr;
} else {
if (IsPseudo) {
MatchStr = OrigMatchStr;
@@ -1117,7 +1118,8 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
SubstStr = MatchStr;
else {
ExpressionFormat Format = ExpressionPointer->getFormat();
- MatchRegexp = cantFail(Format.getWildcardRegex());
+ WildcardRegexp = cantFail(Format.getWildcardRegex());
+ MatchRegexp = WildcardRegexp;
}
}
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 76954f9a37e1..ce998df757ec 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -13,18 +13,29 @@
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
-
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/Transforms/Utils/LoopPeel.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <sstream>
@@ -39,16 +50,22 @@ static cl::opt<bool>
"'as-if' properties of runtime calls."),
cl::init(false));
+static cl::opt<double> UnrollThresholdFactor(
+ "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
+ cl::desc("Factor for the unroll threshold to account for code "
+ "simplifications still taking place"),
+ cl::init(1.5));
+
void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
LLVMContext &Ctx = Fn.getContext();
// Get the function's current attributes.
auto Attrs = Fn.getAttributes();
- auto FnAttrs = Attrs.getFnAttributes();
- auto RetAttrs = Attrs.getRetAttributes();
+ auto FnAttrs = Attrs.getFnAttrs();
+ auto RetAttrs = Attrs.getRetAttrs();
SmallVector<AttributeSet, 4> ArgAttrs;
for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
- ArgAttrs.emplace_back(Attrs.getParamAttributes(ArgNo));
+ ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
@@ -228,6 +245,16 @@ OpenMPIRBuilder::~OpenMPIRBuilder() {
assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
}
+GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
+ IntegerType *I32Ty = Type::getInt32Ty(M.getContext());
+ auto *GV =
+ new GlobalVariable(M, I32Ty,
+ /* isConstant = */ true, GlobalValue::WeakODRLinkage,
+ ConstantInt::get(I32Ty, Value), Name);
+
+ return GV;
+}
+
Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
IdentFlag LocFlags,
unsigned Reserve2Flags) {
@@ -241,32 +268,29 @@ Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
Constant *IdentData[] = {
I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
- Constant *Initializer = ConstantStruct::get(
- cast<StructType>(IdentPtr->getPointerElementType()), IdentData);
+ Constant *Initializer =
+ ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
// Look for existing encoding of the location + flags, not needed but
// minimizes the difference to the existing solution while we transition.
for (GlobalVariable &GV : M.getGlobalList())
- if (GV.getType() == IdentPtr && GV.hasInitializer())
+ if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
if (GV.getInitializer() == Initializer)
- return Ident = &GV;
-
- auto *GV = new GlobalVariable(M, IdentPtr->getPointerElementType(),
- /* isConstant = */ true,
- GlobalValue::PrivateLinkage, Initializer);
- GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- GV->setAlignment(Align(8));
- Ident = GV;
+ Ident = &GV;
+
+ if (!Ident) {
+ auto *GV = new GlobalVariable(
+ M, OpenMPIRBuilder::Ident,
+ /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
+ nullptr, GlobalValue::NotThreadLocal,
+ M.getDataLayout().getDefaultGlobalsAddressSpace());
+ GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+ GV->setAlignment(Align(8));
+ Ident = GV;
+ }
}
- return Builder.CreatePointerCast(Ident, IdentPtr);
-}
-Type *OpenMPIRBuilder::getLanemaskType() {
- LLVMContext &Ctx = M.getContext();
- Triple triple(M.getTargetTriple());
-
- // This test is adequate until deviceRTL has finer grained lane widths
- return triple.isAMDGCN() ? Type::getInt64Ty(Ctx) : Type::getInt32Ty(Ctx);
+ return Builder.CreatePointerCast(Ident, IdentPtr);
}
Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
@@ -310,9 +334,8 @@ Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
}
-Constant *
-OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
- DILocation *DIL = Loc.DL.get();
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) {
+ DILocation *DIL = DL.get();
if (!DIL)
return getOrCreateDefaultSrcLocStr();
StringRef FileName = M.getName();
@@ -320,12 +343,17 @@ OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
if (Optional<StringRef> Source = DIF->getSource())
FileName = *Source;
StringRef Function = DIL->getScope()->getSubprogram()->getName();
- Function =
- !Function.empty() ? Function : Loc.IP.getBlock()->getParent()->getName();
+ if (Function.empty() && F)
+ Function = F->getName();
return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
DIL->getColumn());
}
+Constant *
+OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
+ return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
+}
+
Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
return Builder.CreateCall(
getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
@@ -581,8 +609,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
// Add some fake uses for OpenMP provided arguments.
ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
- Instruction *ZeroAddrUse = Builder.CreateLoad(Int32, ZeroAddr,
- "zero.addr.use");
+ Instruction *ZeroAddrUse =
+ Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
ToBeDeleted.push_back(ZeroAddrUse);
// ThenBB
@@ -965,8 +993,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createSections(
Value *ST = ConstantInt::get(I32Ty, 1);
llvm::CanonicalLoopInfo *LoopInfo = createCanonicalLoop(
Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
- LoopInfo = createStaticWorkshareLoop(Loc, LoopInfo, AllocaIP, true);
- BasicBlock *LoopAfterBB = LoopInfo->getAfter();
+ InsertPointTy AfterIP =
+ applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, true);
+ BasicBlock *LoopAfterBB = AfterIP.getBlock();
Instruction *SplitPos = LoopAfterBB->getTerminator();
if (!isa_and_nonnull<BranchInst>(SplitPos))
SplitPos = new UnreachableInst(Builder.getContext(), LoopAfterBB);
@@ -1022,6 +1051,179 @@ OpenMPIRBuilder::createSection(const LocationDescription &Loc,
/*IsCancellable*/ true);
}
+/// Create a function with a unique name and a "void (i8*, i8*)" signature in
+/// the given module and return it.
+Function *getFreshReductionFunc(Module &M) {
+ Type *VoidTy = Type::getVoidTy(M.getContext());
+ Type *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+ auto *FuncTy =
+ FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
+ return Function::Create(FuncTy, GlobalVariable::InternalLinkage,
+ M.getDataLayout().getDefaultGlobalsAddressSpace(),
+ ".omp.reduction.func", &M);
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait) {
+ for (const ReductionInfo &RI : ReductionInfos) {
+ (void)RI;
+ assert(RI.Variable && "expected non-null variable");
+ assert(RI.PrivateVariable && "expected non-null private variable");
+ assert(RI.ReductionGen && "expected non-null reduction generator callback");
+ assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
+ "expected variables and their private equivalents to have the same "
+ "type");
+ assert(RI.Variable->getType()->isPointerTy() &&
+ "expected variables to be pointers");
+ }
+
+ if (!updateToLocation(Loc))
+ return InsertPointTy();
+
+ BasicBlock *InsertBlock = Loc.IP.getBlock();
+ BasicBlock *ContinuationBlock =
+ InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
+ InsertBlock->getTerminator()->eraseFromParent();
+
+ // Create and populate array of type-erased pointers to private reduction
+ // values.
+ unsigned NumReductions = ReductionInfos.size();
+ Type *RedArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumReductions);
+ Builder.restoreIP(AllocaIP);
+ Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
+
+ Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
+
+ for (auto En : enumerate(ReductionInfos)) {
+ unsigned Index = En.index();
+ const ReductionInfo &RI = En.value();
+ Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
+ RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
+ Value *Casted =
+ Builder.CreateBitCast(RI.PrivateVariable, Builder.getInt8PtrTy(),
+ "private.red.var." + Twine(Index) + ".casted");
+ Builder.CreateStore(Casted, RedArrayElemPtr);
+ }
+
+ // Emit a call to the runtime function that orchestrates the reduction.
+ // Declare the reduction function in the process.
+ Function *Func = Builder.GetInsertBlock()->getParent();
+ Module *Module = Func->getParent();
+ Value *RedArrayPtr =
+ Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ bool CanGenerateAtomic =
+ llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
+ return RI.AtomicReductionGen;
+ });
+ Value *Ident = getOrCreateIdent(
+ SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
+ : IdentFlag(0));
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Constant *NumVariables = Builder.getInt32(NumReductions);
+ const DataLayout &DL = Module->getDataLayout();
+ unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
+ Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
+ Function *ReductionFunc = getFreshReductionFunc(*Module);
+ Value *Lock = getOMPCriticalRegionLock(".reduction");
+ Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
+ IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
+ : RuntimeFunction::OMPRTL___kmpc_reduce);
+ CallInst *ReduceCall =
+ Builder.CreateCall(ReduceFunc,
+ {Ident, ThreadId, NumVariables, RedArraySize,
+ RedArrayPtr, ReductionFunc, Lock},
+ "reduce");
+
+ // Create final reduction entry blocks for the atomic and non-atomic case.
+ // Emit IR that dispatches control flow to one of the blocks based on the
+ // reduction supporting the atomic mode.
+ BasicBlock *NonAtomicRedBlock =
+ BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
+ BasicBlock *AtomicRedBlock =
+ BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
+ SwitchInst *Switch =
+ Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
+ Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
+ Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
+
+ // Populate the non-atomic reduction using the elementwise reduction function.
+ // This loads the elements from the global and private variables and reduces
+ // them before storing back the result to the global variable.
+ Builder.SetInsertPoint(NonAtomicRedBlock);
+ for (auto En : enumerate(ReductionInfos)) {
+ const ReductionInfo &RI = En.value();
+ Type *ValueType = RI.getElementType();
+ Value *RedValue = Builder.CreateLoad(ValueType, RI.Variable,
+ "red.value." + Twine(En.index()));
+ Value *PrivateRedValue =
+ Builder.CreateLoad(ValueType, RI.PrivateVariable,
+ "red.private.value." + Twine(En.index()));
+ Value *Reduced;
+ Builder.restoreIP(
+ RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced));
+ if (!Builder.GetInsertBlock())
+ return InsertPointTy();
+ Builder.CreateStore(Reduced, RI.Variable);
+ }
+ Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
+ IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
+ : RuntimeFunction::OMPRTL___kmpc_end_reduce);
+ Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
+ Builder.CreateBr(ContinuationBlock);
+
+ // Populate the atomic reduction using the atomic elementwise reduction
+ // function. There are no loads/stores here because they will be happening
+ // inside the atomic elementwise reduction.
+ Builder.SetInsertPoint(AtomicRedBlock);
+ if (CanGenerateAtomic) {
+ for (const ReductionInfo &RI : ReductionInfos) {
+ Builder.restoreIP(RI.AtomicReductionGen(Builder.saveIP(), RI.Variable,
+ RI.PrivateVariable));
+ if (!Builder.GetInsertBlock())
+ return InsertPointTy();
+ }
+ Builder.CreateBr(ContinuationBlock);
+ } else {
+ Builder.CreateUnreachable();
+ }
+
+ // Populate the outlined reduction function using the elementwise reduction
+ // function. Partial values are extracted from the type-erased array of
+ // pointers to private variables.
+ BasicBlock *ReductionFuncBlock =
+ BasicBlock::Create(Module->getContext(), "", ReductionFunc);
+ Builder.SetInsertPoint(ReductionFuncBlock);
+ Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
+ RedArrayTy->getPointerTo());
+ Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
+ RedArrayTy->getPointerTo());
+ for (auto En : enumerate(ReductionInfos)) {
+ const ReductionInfo &RI = En.value();
+ Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
+ RedArrayTy, LHSArrayPtr, 0, En.index());
+ Value *LHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), LHSI8PtrPtr);
+ Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
+ Value *LHS = Builder.CreateLoad(RI.getElementType(), LHSPtr);
+ Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
+ RedArrayTy, RHSArrayPtr, 0, En.index());
+ Value *RHSI8Ptr = Builder.CreateLoad(Builder.getInt8PtrTy(), RHSI8PtrPtr);
+ Value *RHSPtr =
+ Builder.CreateBitCast(RHSI8Ptr, RI.PrivateVariable->getType());
+ Value *RHS = Builder.CreateLoad(RI.getElementType(), RHSPtr);
+ Value *Reduced;
+ Builder.restoreIP(RI.ReductionGen(Builder.saveIP(), LHS, RHS, Reduced));
+ if (!Builder.GetInsertBlock())
+ return InsertPointTy();
+ Builder.CreateStore(Reduced, LHSPtr);
+ }
+ Builder.CreateRetVoid();
+
+ Builder.SetInsertPoint(ContinuationBlock);
+ return Builder.saveIP();
+}
+
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
BodyGenCallbackTy BodyGenCB,
@@ -1133,8 +1335,6 @@ CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
CL->Exit = Exit;
CL->After = After;
- CL->IsValid = true;
-
#ifndef NDEBUG
CL->assertOK();
#endif
@@ -1271,14 +1471,17 @@ void setCanonicalLoopTripCount(CanonicalLoopInfo *CLI, Value *TripCount) {
CLI->assertOK();
}
-CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
- const LocationDescription &Loc, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, bool NeedsBarrier, Value *Chunk) {
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP,
+ bool NeedsBarrier, Value *Chunk) {
+ assert(CLI->isValid() && "Requires a valid canonical loop");
+
// Set up the source location value for OpenMP runtime.
- if (!updateToLocation(Loc))
- return nullptr;
+ Builder.restoreIP(CLI->getPreheaderIP());
+ Builder.SetCurrentDebugLocation(DL);
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
Value *SrcLoc = getOrCreateIdent(SrcLocStr);
// Declare useful OpenMP runtime functions.
@@ -1308,6 +1511,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
Builder.CreateStore(UpperBound, PUpperBound);
Builder.CreateStore(One, PStride);
+ // FIXME: schedule(static) is NOT the same as schedule(static,1)
if (!Chunk)
Chunk = One;
@@ -1348,19 +1552,21 @@ CanonicalLoopInfo *OpenMPIRBuilder::createStaticWorkshareLoop(
// Add the barrier if requested.
if (NeedsBarrier)
- createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ createBarrier(LocationDescription(Builder.saveIP(), DL),
omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
/* CheckCancelFlag */ false);
- CLI->assertOK();
- return CLI;
+ InsertPointTy AfterIP = CLI->getAfterIP();
+ CLI->invalidate();
+
+ return AfterIP;
}
-CanonicalLoopInfo *OpenMPIRBuilder::createWorkshareLoop(
- const LocationDescription &Loc, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, bool NeedsBarrier) {
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
+ InsertPointTy AllocaIP, bool NeedsBarrier) {
// Currently only supports static schedules.
- return createStaticWorkshareLoop(Loc, CLI, AllocaIP, NeedsBarrier);
+ return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
}
/// Returns an LLVM function to call for initializing loop bounds using OpenMP
@@ -1395,14 +1601,15 @@ getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder) {
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
}
-OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
- const LocationDescription &Loc, CanonicalLoopInfo *CLI,
- InsertPointTy AllocaIP, OMPScheduleType SchedType, bool NeedsBarrier,
- Value *Chunk) {
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
+ DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
+ OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
+ assert(CLI->isValid() && "Requires a valid canonical loop");
+
// Set up the source location value for OpenMP runtime.
- Builder.SetCurrentDebugLocation(Loc.DL);
+ Builder.SetCurrentDebugLocation(DL);
- Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
Value *SrcLoc = getOrCreateIdent(SrcLocStr);
// Declare useful OpenMP runtime functions.
@@ -1496,11 +1703,12 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createDynamicWorkshareLoop(
// Add the barrier if requested.
if (NeedsBarrier) {
Builder.SetInsertPoint(&Exit->back());
- createBarrier(LocationDescription(Builder.saveIP(), Loc.DL),
+ createBarrier(LocationDescription(Builder.saveIP(), DL),
omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
/* CheckCancelFlag */ false);
}
+ CLI->invalidate();
return AfterIP;
}
@@ -1592,6 +1800,8 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
// TODO: Find common/largest indvar type.
Value *CollapsedTripCount = nullptr;
for (CanonicalLoopInfo *L : Loops) {
+ assert(L->isValid() &&
+ "All loops to collapse must be valid canonical loops");
Value *OrigTripCount = L->getTripCount();
if (!CollapsedTripCount) {
CollapsedTripCount = OrigTripCount;
@@ -1680,6 +1890,9 @@ OpenMPIRBuilder::collapseLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Loop->collectControlBlocks(OldControlBBs);
removeUnusedBlocksFromParent(OldControlBBs);
+ for (CanonicalLoopInfo *L : Loops)
+ L->invalidate();
+
#ifndef NDEBUG
Result->assertOK();
#endif
@@ -1706,6 +1919,7 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
// any original CanonicalLoopInfo.
SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
for (CanonicalLoopInfo *L : Loops) {
+ assert(L->isValid() && "All input loops must be valid canonical loops");
OrigTripCounts.push_back(L->getTripCount());
OrigIndVars.push_back(L->getIndVar());
}
@@ -1864,6 +2078,9 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
Loop->collectControlBlocks(OldControlBBs);
removeUnusedBlocksFromParent(OldControlBBs);
+ for (CanonicalLoopInfo *L : Loops)
+ L->invalidate();
+
#ifndef NDEBUG
for (CanonicalLoopInfo *GenL : Result)
GenL->assertOK();
@@ -1871,6 +2088,287 @@ OpenMPIRBuilder::tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
return Result;
}
+/// Attach loop metadata \p Properties to the loop described by \p Loop. If the
+/// loop already has metadata, the loop properties are appended.
+static void addLoopMetadata(CanonicalLoopInfo *Loop,
+ ArrayRef<Metadata *> Properties) {
+ assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
+
+ // Nothing to do if no property to attach.
+ if (Properties.empty())
+ return;
+
+ LLVMContext &Ctx = Loop->getFunction()->getContext();
+ SmallVector<Metadata *> NewLoopProperties;
+ NewLoopProperties.push_back(nullptr);
+
+ // If the loop already has metadata, prepend it to the new metadata.
+ BasicBlock *Latch = Loop->getLatch();
+ assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
+ MDNode *Existing = Latch->getTerminator()->getMetadata(LLVMContext::MD_loop);
+ if (Existing)
+ append_range(NewLoopProperties, drop_begin(Existing->operands(), 1));
+
+ append_range(NewLoopProperties, Properties);
+ MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
+ LoopID->replaceOperandWith(0, LoopID);
+
+ Latch->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopID);
+}
+
+void OpenMPIRBuilder::unrollLoopFull(DebugLoc, CanonicalLoopInfo *Loop) {
+ LLVMContext &Ctx = Builder.getContext();
+ addLoopMetadata(
+ Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
+ MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
+}
+
+void OpenMPIRBuilder::unrollLoopHeuristic(DebugLoc, CanonicalLoopInfo *Loop) {
+ LLVMContext &Ctx = Builder.getContext();
+ addLoopMetadata(
+ Loop, {
+ MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
+ });
+}
+
+/// Create the TargetMachine object to query the backend for optimization
+/// preferences.
+///
+/// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
+/// e.g. Clang does not pass it to its CodeGen layer and creates it only when
+/// needed for the LLVM pass pipline. We use some default options to avoid
+/// having to pass too many settings from the frontend that probably do not
+/// matter.
+///
+/// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
+/// method. If we are going to use TargetMachine for more purposes, especially
+/// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
+/// might become be worth requiring front-ends to pass on their TargetMachine,
+/// or at least cache it between methods. Note that while fontends such as Clang
+/// have just a single main TargetMachine per translation unit, "target-cpu" and
+/// "target-features" that determine the TargetMachine are per-function and can
+/// be overrided using __attribute__((target("OPTIONS"))).
+static std::unique_ptr<TargetMachine>
+createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) {
+ Module *M = F->getParent();
+
+ StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
+ StringRef Features = F->getFnAttribute("target-features").getValueAsString();
+ const std::string &Triple = M->getTargetTriple();
+
+ std::string Error;
+ const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+ if (!TheTarget)
+ return {};
+
+ llvm::TargetOptions Options;
+ return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
+ Triple, CPU, Features, Options, /*RelocModel=*/None, /*CodeModel=*/None,
+ OptLevel));
+}
+
+/// Heuristically determine the best-performant unroll factor for \p CLI. This
+/// depends on the target processor. We are re-using the same heuristics as the
+/// LoopUnrollPass.
+static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI) {
+ Function *F = CLI->getFunction();
+
+ // Assume the user requests the most aggressive unrolling, even if the rest of
+ // the code is optimized using a lower setting.
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive;
+ std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
+
+ FunctionAnalysisManager FAM;
+ FAM.registerPass([]() { return TargetLibraryAnalysis(); });
+ FAM.registerPass([]() { return AssumptionAnalysis(); });
+ FAM.registerPass([]() { return DominatorTreeAnalysis(); });
+ FAM.registerPass([]() { return LoopAnalysis(); });
+ FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
+ FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
+ TargetIRAnalysis TIRA;
+ if (TM)
+ TIRA = TargetIRAnalysis(
+ [&](const Function &F) { return TM->getTargetTransformInfo(F); });
+ FAM.registerPass([&]() { return TIRA; });
+
+ TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
+ ScalarEvolutionAnalysis SEA;
+ ScalarEvolution &&SE = SEA.run(*F, FAM);
+ DominatorTreeAnalysis DTA;
+ DominatorTree &&DT = DTA.run(*F, FAM);
+ LoopAnalysis LIA;
+ LoopInfo &&LI = LIA.run(*F, FAM);
+ AssumptionAnalysis ACT;
+ AssumptionCache &&AC = ACT.run(*F, FAM);
+ OptimizationRemarkEmitter ORE{F};
+
+ Loop *L = LI.getLoopFor(CLI->getHeader());
+ assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
+
+ TargetTransformInfo::UnrollingPreferences UP =
+ gatherUnrollingPreferences(L, SE, TTI,
+ /*BlockFrequencyInfo=*/nullptr,
+ /*ProfileSummaryInfo=*/nullptr, ORE, OptLevel,
+ /*UserThreshold=*/None,
+ /*UserCount=*/None,
+ /*UserAllowPartial=*/true,
+ /*UserAllowRuntime=*/true,
+ /*UserUpperBound=*/None,
+ /*UserFullUnrollMaxCount=*/None);
+
+ UP.Force = true;
+
+ // Account for additional optimizations taking place before the LoopUnrollPass
+ // would unroll the loop.
+ UP.Threshold *= UnrollThresholdFactor;
+ UP.PartialThreshold *= UnrollThresholdFactor;
+
+ // Use normal unroll factors even if the rest of the code is optimized for
+ // size.
+ UP.OptSizeThreshold = UP.Threshold;
+ UP.PartialOptSizeThreshold = UP.PartialThreshold;
+
+ LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
+ << " Threshold=" << UP.Threshold << "\n"
+ << " PartialThreshold=" << UP.PartialThreshold << "\n"
+ << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
+ << " PartialOptSizeThreshold="
+ << UP.PartialOptSizeThreshold << "\n");
+
+ // Disable peeling.
+ TargetTransformInfo::PeelingPreferences PP =
+ gatherPeelingPreferences(L, SE, TTI,
+ /*UserAllowPeeling=*/false,
+ /*UserAllowProfileBasedPeeling=*/false,
+ /*UserUnrollingSpecficValues=*/false);
+
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
+
+ // Assume that reads and writes to stack variables can be eliminated by
+ // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
+ // size.
+ for (BasicBlock *BB : L->blocks()) {
+ for (Instruction &I : *BB) {
+ Value *Ptr;
+ if (auto *Load = dyn_cast<LoadInst>(&I)) {
+ Ptr = Load->getPointerOperand();
+ } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
+ Ptr = Store->getPointerOperand();
+ } else
+ continue;
+
+ Ptr = Ptr->stripPointerCasts();
+
+ if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
+ if (Alloca->getParent() == &F->getEntryBlock())
+ EphValues.insert(&I);
+ }
+ }
+ }
+
+ unsigned NumInlineCandidates;
+ bool NotDuplicatable;
+ bool Convergent;
+ unsigned LoopSize =
+ ApproximateLoopSize(L, NumInlineCandidates, NotDuplicatable, Convergent,
+ TTI, EphValues, UP.BEInsns);
+ LLVM_DEBUG(dbgs() << "Estimated loop size is " << LoopSize << "\n");
+
+ // Loop is not unrollable if the loop contains certain instructions.
+ if (NotDuplicatable || Convergent) {
+ LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
+ return 1;
+ }
+
+ // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
+ // be able to use it.
+ int TripCount = 0;
+ int MaxTripCount = 0;
+ bool MaxOrZero = false;
+ unsigned TripMultiple = 0;
+
+ bool UseUpperBound = false;
+ computeUnrollCount(L, TTI, DT, &LI, SE, EphValues, &ORE, TripCount,
+ MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
+ UseUpperBound);
+ unsigned Factor = UP.Count;
+ LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
+
+ // This function returns 1 to signal to not unroll a loop.
+ if (Factor == 0)
+ return 1;
+ return Factor;
+}
+
+void OpenMPIRBuilder::unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop,
+ int32_t Factor,
+ CanonicalLoopInfo **UnrolledCLI) {
+ assert(Factor >= 0 && "Unroll factor must not be negative");
+
+ Function *F = Loop->getFunction();
+ LLVMContext &Ctx = F->getContext();
+
+ // If the unrolled loop is not used for another loop-associated directive, it
+ // is sufficient to add metadata for the LoopUnrollPass.
+ if (!UnrolledCLI) {
+ SmallVector<Metadata *, 2> LoopMetadata;
+ LoopMetadata.push_back(
+ MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
+
+ if (Factor >= 1) {
+ ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
+ LoopMetadata.push_back(MDNode::get(
+ Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
+ }
+
+ addLoopMetadata(Loop, LoopMetadata);
+ return;
+ }
+
+ // Heuristically determine the unroll factor.
+ if (Factor == 0)
+ Factor = computeHeuristicUnrollFactor(Loop);
+
+ // No change required with unroll factor 1.
+ if (Factor == 1) {
+ *UnrolledCLI = Loop;
+ return;
+ }
+
+ assert(Factor >= 2 &&
+ "unrolling only makes sense with a factor of 2 or larger");
+
+ Type *IndVarTy = Loop->getIndVarType();
+
+ // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
+ // unroll the inner loop.
+ Value *FactorVal =
+ ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
+ /*isSigned=*/false));
+ std::vector<CanonicalLoopInfo *> LoopNest =
+ tileLoops(DL, {Loop}, {FactorVal});
+ assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
+ *UnrolledCLI = LoopNest[0];
+ CanonicalLoopInfo *InnerLoop = LoopNest[1];
+
+ // LoopUnrollPass can only fully unroll loops with constant trip count.
+ // Unroll by the unroll factor with a fallback epilog for the remainder
+ // iterations if necessary.
+ ConstantAsMetadata *FactorConst = ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
+ addLoopMetadata(
+ InnerLoop,
+ {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
+ MDNode::get(
+ Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
+
+#ifndef NDEBUG
+ (*UnrolledCLI)->assertOK();
+#endif
+}
+
OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
llvm::Value *BufSize, llvm::Value *CpyBuf,
@@ -1960,6 +2458,74 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
/*Conditional*/ false, /*hasFinalize*/ true);
}
+OpenMPIRBuilder::InsertPointTy
+OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc,
+ InsertPointTy AllocaIP, unsigned NumLoops,
+ ArrayRef<llvm::Value *> StoreValues,
+ const Twine &Name, bool IsDependSource) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ // Allocate space for vector and generate alloc instruction.
+ auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
+ Builder.restoreIP(AllocaIP);
+ AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
+ ArgsBase->setAlignment(Align(8));
+ Builder.restoreIP(Loc.IP);
+
+ // Store the index value with offset in depend vector.
+ for (unsigned I = 0; I < NumLoops; ++I) {
+ Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
+ ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
+ Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
+ }
+
+ Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
+ ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
+
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
+
+ Function *RTLFn = nullptr;
+ if (IsDependSource)
+ RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
+ else
+ RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
+ Builder.CreateCall(RTLFn, Args);
+
+ return Builder.saveIP();
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
+ const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
+ FinalizeCallbackTy FiniCB, bool IsThreads) {
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Directive OMPD = Directive::OMPD_ordered;
+ Instruction *EntryCall = nullptr;
+ Instruction *ExitCall = nullptr;
+
+ if (IsThreads) {
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+ Value *Ident = getOrCreateIdent(SrcLocStr);
+ Value *ThreadId = getOrCreateThreadID(Ident);
+ Value *Args[] = {Ident, ThreadId};
+
+ Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
+ EntryCall = Builder.CreateCall(EntryRTLFn, Args);
+
+ Function *ExitRTLFn =
+ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
+ ExitCall = Builder.CreateCall(ExitRTLFn, Args);
+ }
+
+ return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
+ /*Conditional*/ false, /*hasFinalize*/ true);
+}
+
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
@@ -2193,25 +2759,30 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
}
OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) {
+OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
+ bool RequiresFullRuntime) {
if (!updateToLocation(Loc))
return Loc.IP;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
- ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
+ ConstantInt *IsSPMDVal = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(Int8->getContext()),
+ IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
ConstantInt *UseGenericStateMachine =
ConstantInt::getBool(Int32->getContext(), !IsSPMD);
- ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
+ ConstantInt *RequiresFullRuntimeVal =
+ ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_init);
- CallInst *ThreadKind =
- Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
+ CallInst *ThreadKind = Builder.CreateCall(
+ Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
Value *ExecUserCode = Builder.CreateICmpEQ(
- ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code");
+ ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
+ "exec_user_code");
// ThreadKind = __kmpc_target_init(...)
// if (ThreadKind == -1)
@@ -2241,14 +2812,18 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, b
}
void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
- bool IsSPMD, bool RequiresFullRuntime) {
+ bool IsSPMD,
+ bool RequiresFullRuntime) {
if (!updateToLocation(Loc))
return;
Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
Value *Ident = getOrCreateIdent(SrcLocStr);
- ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
- ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
+ ConstantInt *IsSPMDVal = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(Int8->getContext()),
+ IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
+ ConstantInt *RequiresFullRuntimeVal =
+ ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
Function *Fn = getOrCreateRuntimeFunctionPtr(
omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
@@ -2749,7 +3324,8 @@ void CanonicalLoopInfo::collectControlBlocks(
void CanonicalLoopInfo::assertOK() const {
#ifndef NDEBUG
- if (!IsValid)
+ // No constraints if this object currently does not describe a loop.
+ if (!isValid())
return;
// Verify standard control-flow we use for OpenMP loops.
@@ -2835,3 +3411,13 @@ void CanonicalLoopInfo::assertOK() const {
"Exit condition must compare with the trip count");
#endif
}
+
+void CanonicalLoopInfo::invalidate() {
+ Preheader = nullptr;
+ Header = nullptr;
+ Cond = nullptr;
+ Body = nullptr;
+ Latch = nullptr;
+ Exit = nullptr;
+ After = nullptr;
+}
diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp
index 6504e566ba4b..2e41799e13e9 100644
--- a/llvm/lib/IR/AbstractCallSite.cpp
+++ b/llvm/lib/IR/AbstractCallSite.cpp
@@ -121,7 +121,7 @@ AbstractCallSite::AbstractCallSite(const Use *U)
assert(CallbackEncMD->getNumOperands() >= 2 && "Incomplete !callback metadata");
- unsigned NumCallOperands = CB->getNumArgOperands();
+ unsigned NumCallOperands = CB->arg_size();
// Skip the var-arg flag at the end when reading the metadata.
for (unsigned u = 0, e = CallbackEncMD->getNumOperands() - 1; u < e; u++) {
Metadata *OpAsM = CallbackEncMD->getOperand(u).get();
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 69e2d85e58fe..7734c0a8de58 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -44,7 +45,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalIFunc.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
@@ -72,6 +72,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -554,16 +555,13 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
FunctionType *FTy = cast<FunctionType>(Ty);
print(FTy->getReturnType(), OS);
OS << " (";
- for (FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end(); I != E; ++I) {
- if (I != FTy->param_begin())
- OS << ", ";
- print(*I, OS);
- }
- if (FTy->isVarArg()) {
- if (FTy->getNumParams()) OS << ", ";
- OS << "...";
+ ListSeparator LS;
+ for (Type *Ty : FTy->params()) {
+ OS << LS;
+ print(Ty, OS);
}
+ if (FTy->isVarArg())
+ OS << LS << "...";
OS << ')';
return;
}
@@ -633,12 +631,11 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
if (STy->getNumElements() == 0) {
OS << "{}";
} else {
- StructType::element_iterator I = STy->element_begin();
OS << "{ ";
- print(*I++, OS);
- for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
- OS << ", ";
- print(*I, OS);
+ ListSeparator LS;
+ for (Type *Ty : STy->elements()) {
+ OS << LS;
+ print(Ty, OS);
}
OS << " }";
@@ -988,7 +985,7 @@ void SlotTracker::processModule() {
// Add all the function attributes to the table.
// FIXME: Add attributes of other objects?
- AttributeSet FnAttrs = F.getAttributes().getFnAttributes();
+ AttributeSet FnAttrs = F.getAttributes().getFnAttrs();
if (FnAttrs.hasAttributes())
CreateAttributeSetSlot(FnAttrs);
}
@@ -1029,7 +1026,7 @@ void SlotTracker::processFunction() {
// target may not be linked into the optimizer.
if (const auto *Call = dyn_cast<CallBase>(&I)) {
// Add all the call attributes to the table.
- AttributeSet Attrs = Call->getAttributes().getFnAttributes();
+ AttributeSet Attrs = Call->getAttributes().getFnAttrs();
if (Attrs.hasAttributes())
CreateAttributeSetSlot(Attrs);
}
@@ -1277,18 +1274,38 @@ void SlotTracker::CreateTypeIdSlot(StringRef Id) {
TypeIdMap[Id] = TypeIdNext++;
}
+namespace {
+/// Common instances used by most of the printer functions.
+struct AsmWriterContext {
+ TypePrinting *TypePrinter = nullptr;
+ SlotTracker *Machine = nullptr;
+ const Module *Context = nullptr;
+
+ AsmWriterContext(TypePrinting *TP, SlotTracker *ST, const Module *M = nullptr)
+ : TypePrinter(TP), Machine(ST), Context(M) {}
+
+ static AsmWriterContext &getEmpty() {
+ static AsmWriterContext EmptyCtx(nullptr, nullptr);
+ return EmptyCtx;
+ }
+
+ /// A callback that will be triggered when the underlying printer
+ /// prints a Metadata as operand.
+ virtual void onWriteMetadataAsOperand(const Metadata *) {}
+
+ virtual ~AsmWriterContext() {}
+};
+} // end anonymous namespace
+
//===----------------------------------------------------------------------===//
// AsmWriter Implementation
//===----------------------------------------------------------------------===//
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context);
+ AsmWriterContext &WriterCtx);
static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context,
+ AsmWriterContext &WriterCtx,
bool FromValue = false);
static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
@@ -1331,9 +1348,7 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
}
static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
- TypePrinting &TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
if (CI->getType()->isIntegerTy(1)) {
Out << (CI->getZExtValue() ? "true" : "false");
@@ -1442,36 +1457,30 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
Out << "blockaddress(";
- WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, BA->getFunction(), WriterCtx);
Out << ", ";
- WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, BA->getBasicBlock(), WriterCtx);
Out << ")";
return;
}
if (const auto *Equiv = dyn_cast<DSOLocalEquivalent>(CV)) {
Out << "dso_local_equivalent ";
- WriteAsOperandInternal(Out, Equiv->getGlobalValue(), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, Equiv->getGlobalValue(), WriterCtx);
return;
}
if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
Type *ETy = CA->getType()->getElementType();
Out << '[';
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(0),
- &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CA->getOperand(0), WriterCtx);
for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
Out << ", ";
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CA->getOperand(i), WriterCtx);
}
Out << ']';
return;
@@ -1489,17 +1498,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Type *ETy = CA->getType()->getElementType();
Out << '[';
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getElementAsConstant(0),
- &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(0), WriterCtx);
for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) {
Out << ", ";
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter,
- Machine, Context);
+ WriteAsOperandInternal(Out, CA->getElementAsConstant(i), WriterCtx);
}
Out << ']';
return;
@@ -1512,19 +1518,17 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
unsigned N = CS->getNumOperands();
if (N) {
Out << ' ';
- TypePrinter.print(CS->getOperand(0)->getType(), Out);
+ WriterCtx.TypePrinter->print(CS->getOperand(0)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CS->getOperand(0), WriterCtx);
for (unsigned i = 1; i < N; i++) {
Out << ", ";
- TypePrinter.print(CS->getOperand(i)->getType(), Out);
+ WriterCtx.TypePrinter->print(CS->getOperand(i)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
- Context);
+ WriteAsOperandInternal(Out, CS->getOperand(i), WriterCtx);
}
Out << ' ';
}
@@ -1539,16 +1543,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
auto *CVVTy = cast<FixedVectorType>(CV->getType());
Type *ETy = CVVTy->getElementType();
Out << '<';
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter,
- Machine, Context);
+ WriteAsOperandInternal(Out, CV->getAggregateElement(0U), WriterCtx);
for (unsigned i = 1, e = CVVTy->getNumElements(); i != e; ++i) {
Out << ", ";
- TypePrinter.print(ETy, Out);
+ WriterCtx.TypePrinter->print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter,
- Machine, Context);
+ WriteAsOperandInternal(Out, CV->getAggregateElement(i), WriterCtx);
}
Out << '>';
return;
@@ -1584,7 +1586,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Optional<unsigned> InRangeOp;
if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
- TypePrinter.print(GEP->getSourceElementType(), Out);
+ WriterCtx.TypePrinter->print(GEP->getSourceElementType(), Out);
Out << ", ";
InRangeOp = GEP->getInRangeIndex();
if (InRangeOp)
@@ -1594,9 +1596,9 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
if (InRangeOp && unsigned(OI - CE->op_begin()) == *InRangeOp)
Out << "inrange ";
- TypePrinter.print((*OI)->getType(), Out);
+ WriterCtx.TypePrinter->print((*OI)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, *OI, WriterCtx);
if (OI+1 != CE->op_end())
Out << ", ";
}
@@ -1609,7 +1611,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
if (CE->isCast()) {
Out << " to ";
- TypePrinter.print(CE->getType(), Out);
+ WriterCtx.TypePrinter->print(CE->getType(), Out);
}
if (CE->getOpcode() == Instruction::ShuffleVector)
@@ -1623,8 +1625,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
}
static void writeMDTuple(raw_ostream &Out, const MDTuple *Node,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!{";
for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
const Metadata *MD = Node->getOperand(mi);
@@ -1632,11 +1633,12 @@ static void writeMDTuple(raw_ostream &Out, const MDTuple *Node,
Out << "null";
else if (auto *MDV = dyn_cast<ValueAsMetadata>(MD)) {
Value *V = MDV->getValue();
- TypePrinter->print(V->getType(), Out);
+ WriterCtx.TypePrinter->print(V->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, V, TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, V, WriterCtx);
} else {
- WriteAsOperandInternal(Out, MD, TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, MD, WriterCtx);
+ WriterCtx.onWriteMetadataAsOperand(MD);
}
if (mi + 1 != me)
Out << ", ";
@@ -1665,15 +1667,12 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
struct MDFieldPrinter {
raw_ostream &Out;
FieldSeparator FS;
- TypePrinting *TypePrinter = nullptr;
- SlotTracker *Machine = nullptr;
- const Module *Context = nullptr;
+ AsmWriterContext &WriterCtx;
- explicit MDFieldPrinter(raw_ostream &Out) : Out(Out) {}
- MDFieldPrinter(raw_ostream &Out, TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context)
- : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) {
- }
+ explicit MDFieldPrinter(raw_ostream &Out)
+ : Out(Out), WriterCtx(AsmWriterContext::getEmpty()) {}
+ MDFieldPrinter(raw_ostream &Out, AsmWriterContext &Ctx)
+ : Out(Out), WriterCtx(Ctx) {}
void printTag(const DINode *N);
void printMacinfoType(const DIMacroNode *N);
@@ -1734,14 +1733,13 @@ void MDFieldPrinter::printString(StringRef Name, StringRef Value,
}
static void writeMetadataAsOperand(raw_ostream &Out, const Metadata *MD,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
if (!MD) {
Out << "null";
return;
}
- WriteAsOperandInternal(Out, MD, TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, MD, WriterCtx);
+ WriterCtx.onWriteMetadataAsOperand(MD);
}
void MDFieldPrinter::printMetadata(StringRef Name, const Metadata *MD,
@@ -1750,7 +1748,7 @@ void MDFieldPrinter::printMetadata(StringRef Name, const Metadata *MD,
return;
Out << FS << Name << ": ";
- writeMetadataAsOperand(Out, MD, TypePrinter, Machine, Context);
+ writeMetadataAsOperand(Out, MD, WriterCtx);
}
template <class IntTy>
@@ -1763,7 +1761,7 @@ void MDFieldPrinter::printInt(StringRef Name, IntTy Int, bool ShouldSkipZero) {
void MDFieldPrinter::printAPInt(StringRef Name, const APInt &Int,
bool IsUnsigned, bool ShouldSkipZero) {
- if (ShouldSkipZero && Int.isNullValue())
+ if (ShouldSkipZero && Int.isZero())
return;
Out << FS << Name << ": ";
@@ -1847,10 +1845,9 @@ void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
}
static void writeGenericDINode(raw_ostream &Out, const GenericDINode *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!GenericDINode(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("header", N->getHeader());
if (N->getNumDwarfOperands()) {
@@ -1858,7 +1855,7 @@ static void writeGenericDINode(raw_ostream &Out, const GenericDINode *N,
FieldSeparator IFS;
for (auto &I : N->dwarf_operands()) {
Out << IFS;
- writeMetadataAsOperand(Out, I, TypePrinter, Machine, Context);
+ writeMetadataAsOperand(Out, I, WriterCtx);
}
Out << "}";
}
@@ -1866,10 +1863,9 @@ static void writeGenericDINode(raw_ostream &Out, const GenericDINode *N,
}
static void writeDILocation(raw_ostream &Out, const DILocation *DL,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILocation(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
// Always output the line, since 0 is a relevant and important value for it.
Printer.printInt("line", DL->getLine(), /* ShouldSkipZero */ false);
Printer.printInt("column", DL->getColumn());
@@ -1881,10 +1877,9 @@ static void writeDILocation(raw_ostream &Out, const DILocation *DL,
}
static void writeDISubrange(raw_ostream &Out, const DISubrange *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DISubrange(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
auto *Count = N->getRawCountNode();
if (auto *CE = dyn_cast_or_null<ConstantAsMetadata>(Count)) {
@@ -1923,18 +1918,15 @@ static void writeDISubrange(raw_ostream &Out, const DISubrange *N,
}
static void writeDIGenericSubrange(raw_ostream &Out, const DIGenericSubrange *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIGenericSubrange(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
auto IsConstant = [&](Metadata *Bound) -> bool {
if (auto *BE = dyn_cast_or_null<DIExpression>(Bound)) {
- return BE->isConstant()
- ? DIExpression::SignedOrUnsignedConstant::SignedConstant ==
- *BE->isConstant()
- : false;
+ return BE->isConstant() &&
+ DIExpression::SignedOrUnsignedConstant::SignedConstant ==
+ *BE->isConstant();
}
return false;
};
@@ -1977,7 +1969,7 @@ static void writeDIGenericSubrange(raw_ostream &Out, const DIGenericSubrange *N,
}
static void writeDIEnumerator(raw_ostream &Out, const DIEnumerator *N,
- TypePrinting *, SlotTracker *, const Module *) {
+ AsmWriterContext &) {
Out << "!DIEnumerator(";
MDFieldPrinter Printer(Out);
Printer.printString("name", N->getName(), /* ShouldSkipEmpty */ false);
@@ -1989,7 +1981,7 @@ static void writeDIEnumerator(raw_ostream &Out, const DIEnumerator *N,
}
static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
- TypePrinting *, SlotTracker *, const Module *) {
+ AsmWriterContext &) {
Out << "!DIBasicType(";
MDFieldPrinter Printer(Out);
if (N->getTag() != dwarf::DW_TAG_base_type)
@@ -2004,10 +1996,9 @@ static void writeDIBasicType(raw_ostream &Out, const DIBasicType *N,
}
static void writeDIStringType(raw_ostream &Out, const DIStringType *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIStringType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
if (N->getTag() != dwarf::DW_TAG_string_type)
Printer.printTag(N);
Printer.printString("name", N->getName());
@@ -2021,10 +2012,9 @@ static void writeDIStringType(raw_ostream &Out, const DIStringType *N,
}
static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIDerivedType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope());
@@ -2040,14 +2030,14 @@ static void writeDIDerivedType(raw_ostream &Out, const DIDerivedType *N,
if (const auto &DWARFAddressSpace = N->getDWARFAddressSpace())
Printer.printInt("dwarfAddressSpace", *DWARFAddressSpace,
/* ShouldSkipZero */ false);
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDICompositeType(raw_ostream &Out, const DICompositeType *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DICompositeType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope());
@@ -2073,14 +2063,14 @@ static void writeDICompositeType(raw_ostream &Out, const DICompositeType *N,
/* ShouldSkipZero */ false);
else
Printer.printMetadata("rank", N->getRawRank(), /*ShouldSkipNull */ true);
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDISubroutineType(raw_ostream &Out, const DISubroutineType *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DISubroutineType(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printDIFlags("flags", N->getFlags());
Printer.printDwarfEnum("cc", N->getCC(), dwarf::ConventionString);
Printer.printMetadata("types", N->getRawTypeArray(),
@@ -2088,8 +2078,7 @@ static void writeDISubroutineType(raw_ostream &Out, const DISubroutineType *N,
Out << ")";
}
-static void writeDIFile(raw_ostream &Out, const DIFile *N, TypePrinting *,
- SlotTracker *, const Module *) {
+static void writeDIFile(raw_ostream &Out, const DIFile *N, AsmWriterContext &) {
Out << "!DIFile(";
MDFieldPrinter Printer(Out);
Printer.printString("filename", N->getFilename(),
@@ -2105,10 +2094,9 @@ static void writeDIFile(raw_ostream &Out, const DIFile *N, TypePrinting *,
}
static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DICompileUnit(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printDwarfEnum("language", N->getSourceLanguage(),
dwarf::LanguageString, /* ShouldSkipZero */ false);
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
@@ -2136,10 +2124,9 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
}
static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DISubprogram(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printString("linkageName", N->getLinkageName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
@@ -2159,14 +2146,14 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
Printer.printMetadata("declaration", N->getRawDeclaration());
Printer.printMetadata("retainedNodes", N->getRawRetainedNodes());
Printer.printMetadata("thrownTypes", N->getRawThrownTypes());
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDILexicalBlock(raw_ostream &Out, const DILexicalBlock *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILexicalBlock(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
@@ -2176,11 +2163,9 @@ static void writeDILexicalBlock(raw_ostream &Out, const DILexicalBlock *N,
static void writeDILexicalBlockFile(raw_ostream &Out,
const DILexicalBlockFile *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILexicalBlockFile(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("discriminator", N->getDiscriminator(),
@@ -2189,10 +2174,9 @@ static void writeDILexicalBlockFile(raw_ostream &Out,
}
static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DINamespace(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printBool("exportSymbols", N->getExportSymbols(), false);
@@ -2200,10 +2184,9 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
}
static void writeDICommonBlock(raw_ostream &Out, const DICommonBlock *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DICommonBlock(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), false);
Printer.printMetadata("declaration", N->getRawDecl(), false);
Printer.printString("name", N->getName());
@@ -2213,10 +2196,9 @@ static void writeDICommonBlock(raw_ostream &Out, const DICommonBlock *N,
}
static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIMacro(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMacinfoType(N);
Printer.printInt("line", N->getLine());
Printer.printString("name", N->getName());
@@ -2225,10 +2207,9 @@ static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
}
static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIMacroFile(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printInt("line", N->getLine());
Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
Printer.printMetadata("nodes", N->getRawElements());
@@ -2236,10 +2217,9 @@ static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N,
}
static void writeDIModule(raw_ostream &Out, const DIModule *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIModule(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printString("name", N->getName());
Printer.printString("configMacros", N->getConfigurationMacros());
@@ -2251,14 +2231,11 @@ static void writeDIModule(raw_ostream &Out, const DIModule *N,
Out << ")";
}
-
static void writeDITemplateTypeParameter(raw_ostream &Out,
const DITemplateTypeParameter *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DITemplateTypeParameter(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printMetadata("type", N->getRawType(), /* ShouldSkipNull */ false);
Printer.printBool("defaulted", N->isDefault(), /* Default= */ false);
@@ -2267,11 +2244,9 @@ static void writeDITemplateTypeParameter(raw_ostream &Out,
static void writeDITemplateValueParameter(raw_ostream &Out,
const DITemplateValueParameter *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DITemplateValueParameter(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
if (N->getTag() != dwarf::DW_TAG_template_value_parameter)
Printer.printTag(N);
Printer.printString("name", N->getName());
@@ -2282,10 +2257,9 @@ static void writeDITemplateValueParameter(raw_ostream &Out,
}
static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIGlobalVariable(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printString("linkageName", N->getLinkageName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
@@ -2297,14 +2271,14 @@ static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
Printer.printMetadata("templateParams", N->getRawTemplateParams());
Printer.printInt("align", N->getAlignInBits());
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILocalVariable(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printInt("arg", N->getArg());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
@@ -2313,14 +2287,14 @@ static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
Printer.printMetadata("type", N->getRawType());
Printer.printDIFlags("flags", N->getFlags());
Printer.printInt("align", N->getAlignInBits());
+ Printer.printMetadata("annotations", N->getRawAnnotations());
Out << ")";
}
static void writeDILabel(raw_ostream &Out, const DILabel *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DILabel(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printString("name", N->getName());
Printer.printMetadata("file", N->getRawFile());
@@ -2329,8 +2303,7 @@ static void writeDILabel(raw_ostream &Out, const DILabel *N,
}
static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIExpression(";
FieldSeparator FS;
if (N->isValid()) {
@@ -2355,37 +2328,34 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
}
static void writeDIArgList(raw_ostream &Out, const DIArgList *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context, bool FromValue = false) {
+ AsmWriterContext &WriterCtx,
+ bool FromValue = false) {
assert(FromValue &&
"Unexpected DIArgList metadata outside of value argument");
Out << "!DIArgList(";
FieldSeparator FS;
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
for (Metadata *Arg : N->getArgs()) {
Out << FS;
- WriteAsOperandInternal(Out, Arg, TypePrinter, Machine, Context, true);
+ WriteAsOperandInternal(Out, Arg, WriterCtx, true);
}
Out << ")";
}
static void writeDIGlobalVariableExpression(raw_ostream &Out,
const DIGlobalVariableExpression *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIGlobalVariableExpression(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printMetadata("var", N->getVariable());
Printer.printMetadata("expr", N->getExpression());
Out << ")";
}
static void writeDIObjCProperty(raw_ostream &Out, const DIObjCProperty *N,
- TypePrinting *TypePrinter, SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIObjCProperty(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printString("name", N->getName());
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
@@ -2397,23 +2367,21 @@ static void writeDIObjCProperty(raw_ostream &Out, const DIObjCProperty *N,
}
static void writeDIImportedEntity(raw_ostream &Out, const DIImportedEntity *N,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context) {
+ AsmWriterContext &WriterCtx) {
Out << "!DIImportedEntity(";
- MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+ MDFieldPrinter Printer(Out, WriterCtx);
Printer.printTag(N);
Printer.printString("name", N->getName());
Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
Printer.printMetadata("entity", N->getRawEntity());
Printer.printMetadata("file", N->getRawFile());
Printer.printInt("line", N->getLine());
+ Printer.printMetadata("elements", N->getRawElements());
Out << ")";
}
static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &Ctx) {
if (Node->isDistinct())
Out << "distinct ";
else if (Node->isTemporary())
@@ -2424,7 +2392,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
llvm_unreachable("Expected uniquable MDNode");
#define HANDLE_MDNODE_LEAF(CLASS) \
case Metadata::CLASS##Kind: \
- write##CLASS(Out, cast<CLASS>(Node), TypePrinter, Machine, Context); \
+ write##CLASS(Out, cast<CLASS>(Node), Ctx); \
break;
#include "llvm/IR/Metadata.def"
}
@@ -2433,9 +2401,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
// Full implementation of printing a Value as an operand with support for
// TypePrinting, etc.
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
- TypePrinting *TypePrinter,
- SlotTracker *Machine,
- const Module *Context) {
+ AsmWriterContext &WriterCtx) {
if (V->hasName()) {
PrintLLVMName(Out, V);
return;
@@ -2443,8 +2409,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
const Constant *CV = dyn_cast<Constant>(V);
if (CV && !isa<GlobalValue>(CV)) {
- assert(TypePrinter && "Constants require TypePrinting!");
- WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
+ assert(WriterCtx.TypePrinter && "Constants require TypePrinting!");
+ WriteConstantInternal(Out, CV, WriterCtx);
return;
}
@@ -2468,13 +2434,14 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
}
if (auto *MD = dyn_cast<MetadataAsValue>(V)) {
- WriteAsOperandInternal(Out, MD->getMetadata(), TypePrinter, Machine,
- Context, /* FromValue */ true);
+ WriteAsOperandInternal(Out, MD->getMetadata(), WriterCtx,
+ /* FromValue */ true);
return;
}
char Prefix = '%';
int Slot;
+ auto *Machine = WriterCtx.Machine;
// If we have a SlotTracker, use it.
if (Machine) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
@@ -2513,30 +2480,30 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
}
static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
- TypePrinting *TypePrinter,
- SlotTracker *Machine, const Module *Context,
+ AsmWriterContext &WriterCtx,
bool FromValue) {
// Write DIExpressions and DIArgLists inline when used as a value. Improves
// readability of debug info intrinsics.
if (const DIExpression *Expr = dyn_cast<DIExpression>(MD)) {
- writeDIExpression(Out, Expr, TypePrinter, Machine, Context);
+ writeDIExpression(Out, Expr, WriterCtx);
return;
}
if (const DIArgList *ArgList = dyn_cast<DIArgList>(MD)) {
- writeDIArgList(Out, ArgList, TypePrinter, Machine, Context, FromValue);
+ writeDIArgList(Out, ArgList, WriterCtx, FromValue);
return;
}
if (const MDNode *N = dyn_cast<MDNode>(MD)) {
std::unique_ptr<SlotTracker> MachineStorage;
- if (!Machine) {
- MachineStorage = std::make_unique<SlotTracker>(Context);
- Machine = MachineStorage.get();
+ SaveAndRestore<SlotTracker *> SARMachine(WriterCtx.Machine);
+ if (!WriterCtx.Machine) {
+ MachineStorage = std::make_unique<SlotTracker>(WriterCtx.Context);
+ WriterCtx.Machine = MachineStorage.get();
}
- int Slot = Machine->getMetadataSlot(N);
+ int Slot = WriterCtx.Machine->getMetadataSlot(N);
if (Slot == -1) {
if (const DILocation *Loc = dyn_cast<DILocation>(N)) {
- writeDILocation(Out, Loc, TypePrinter, Machine, Context);
+ writeDILocation(Out, Loc, WriterCtx);
return;
}
// Give the pointer value instead of "badref", since this comes up all
@@ -2555,13 +2522,13 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
}
auto *V = cast<ValueAsMetadata>(MD);
- assert(TypePrinter && "TypePrinter required for metadata values");
+ assert(WriterCtx.TypePrinter && "TypePrinter required for metadata values");
assert((FromValue || !isa<LocalAsMetadata>(V)) &&
"Unexpected function-local metadata outside of value argument");
- TypePrinter->print(V->getValue()->getType(), Out);
+ WriterCtx.TypePrinter->print(V->getValue()->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, V->getValue(), TypePrinter, Machine, Context);
+ WriteAsOperandInternal(Out, V->getValue(), WriterCtx);
}
namespace {
@@ -2592,6 +2559,10 @@ public:
AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const ModuleSummaryIndex *Index, bool IsForDebug);
+ AsmWriterContext getContext() {
+ return AsmWriterContext(&TypePrinter, &Machine, TheModule);
+ }
+
void printMDNodeBody(const MDNode *MD);
void printNamedMDNode(const NamedMDNode *NMD);
@@ -2618,7 +2589,8 @@ public:
void printTypeIdentities();
void printGlobal(const GlobalVariable *GV);
- void printIndirectSymbol(const GlobalIndirectSymbol *GIS);
+ void printAlias(const GlobalAlias *GA);
+ void printIFunc(const GlobalIFunc *GI);
void printComdat(const Comdat *C);
void printFunction(const Function *F);
void printArgument(const Argument *FA, AttributeSet Attrs);
@@ -2693,7 +2665,8 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
TypePrinter.print(Operand->getType(), Out);
Out << ' ';
}
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+ auto WriterCtx = getContext();
+ WriteAsOperandInternal(Out, Operand, WriterCtx);
}
void AssemblyWriter::writeSyncScope(const LLVMContext &Context,
@@ -2752,7 +2725,8 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
}
Out << ' ';
// Print the operand
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+ auto WriterCtx = getContext();
+ WriteAsOperandInternal(Out, Operand, WriterCtx);
}
void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
@@ -2776,6 +2750,7 @@ void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
Out << '(';
bool FirstInput = true;
+ auto WriterCtx = getContext();
for (const auto &Input : BU.Inputs) {
if (!FirstInput)
Out << ", ";
@@ -2783,7 +2758,7 @@ void AssemblyWriter::writeOperandBundles(const CallBase *Call) {
TypePrinter.print(Input->getType(), Out);
Out << " ";
- WriteAsOperandInternal(Out, Input, &TypePrinter, &Machine, TheModule);
+ WriteAsOperandInternal(Out, Input, WriterCtx);
}
Out << ')';
@@ -2853,12 +2828,12 @@ void AssemblyWriter::printModule(const Module *M) {
// Output all aliases.
if (!M->alias_empty()) Out << "\n";
for (const GlobalAlias &GA : M->aliases())
- printIndirectSymbol(&GA);
+ printAlias(&GA);
// Output all ifuncs.
if (!M->ifunc_empty()) Out << "\n";
for (const GlobalIFunc &GI : M->ifuncs())
- printIndirectSymbol(&GI);
+ printIFunc(&GI);
// Output all of the functions.
for (const Function &F : *M) {
@@ -3198,19 +3173,9 @@ static const char *getVisibilityName(GlobalValue::VisibilityTypes Vis) {
void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
Out << ", insts: " << FS->instCount();
+ if (FS->fflags().anyFlagSet())
+ Out << ", " << FS->fflags();
- FunctionSummary::FFlags FFlags = FS->fflags();
- if (FFlags.ReadNone | FFlags.ReadOnly | FFlags.NoRecurse |
- FFlags.ReturnDoesNotAlias | FFlags.NoInline | FFlags.AlwaysInline) {
- Out << ", funcFlags: (";
- Out << "readNone: " << FFlags.ReadNone;
- Out << ", readOnly: " << FFlags.ReadOnly;
- Out << ", noRecurse: " << FFlags.NoRecurse;
- Out << ", returnDoesNotAlias: " << FFlags.ReturnDoesNotAlias;
- Out << ", noInline: " << FFlags.NoInline;
- Out << ", alwaysInline: " << FFlags.AlwaysInline;
- Out << ")";
- }
if (!FS->calls().empty()) {
Out << ", calls: (";
FieldSeparator IFS;
@@ -3453,7 +3418,7 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
assert(!isa<DIArgList>(Op) &&
"DIArgLists should not appear in NamedMDNodes");
if (auto *Expr = dyn_cast<DIExpression>(Op)) {
- writeDIExpression(Out, Expr, nullptr, nullptr, nullptr);
+ writeDIExpression(Out, Expr, AsmWriterContext::getEmpty());
continue;
}
@@ -3544,7 +3509,8 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
if (GV->isMaterializable())
Out << "; Materializable\n";
- WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, GV->getParent());
+ WriteAsOperandInternal(Out, GV, WriterCtx);
Out << " = ";
if (!GV->hasInitializer() && GV->hasExternalLinkage())
@@ -3596,49 +3562,76 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
printInfoComment(*GV);
}
-void AssemblyWriter::printIndirectSymbol(const GlobalIndirectSymbol *GIS) {
- if (GIS->isMaterializable())
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+ if (GA->isMaterializable())
Out << "; Materializable\n";
- WriteAsOperandInternal(Out, GIS, &TypePrinter, &Machine, GIS->getParent());
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, GA->getParent());
+ WriteAsOperandInternal(Out, GA, WriterCtx);
Out << " = ";
- Out << getLinkageNameWithSpace(GIS->getLinkage());
- PrintDSOLocation(*GIS, Out);
- PrintVisibility(GIS->getVisibility(), Out);
- PrintDLLStorageClass(GIS->getDLLStorageClass(), Out);
- PrintThreadLocalModel(GIS->getThreadLocalMode(), Out);
- StringRef UA = getUnnamedAddrEncoding(GIS->getUnnamedAddr());
+ Out << getLinkageNameWithSpace(GA->getLinkage());
+ PrintDSOLocation(*GA, Out);
+ PrintVisibility(GA->getVisibility(), Out);
+ PrintDLLStorageClass(GA->getDLLStorageClass(), Out);
+ PrintThreadLocalModel(GA->getThreadLocalMode(), Out);
+ StringRef UA = getUnnamedAddrEncoding(GA->getUnnamedAddr());
if (!UA.empty())
Out << UA << ' ';
- if (isa<GlobalAlias>(GIS))
- Out << "alias ";
- else if (isa<GlobalIFunc>(GIS))
- Out << "ifunc ";
- else
- llvm_unreachable("Not an alias or ifunc!");
-
- TypePrinter.print(GIS->getValueType(), Out);
+ Out << "alias ";
+ TypePrinter.print(GA->getValueType(), Out);
Out << ", ";
- const Constant *IS = GIS->getIndirectSymbol();
-
- if (!IS) {
- TypePrinter.print(GIS->getType(), Out);
+ if (const Constant *Aliasee = GA->getAliasee()) {
+ writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
+ } else {
+ TypePrinter.print(GA->getType(), Out);
Out << " <<NULL ALIASEE>>";
+ }
+
+ if (GA->hasPartition()) {
+ Out << ", partition \"";
+ printEscapedString(GA->getPartition(), Out);
+ Out << '"';
+ }
+
+ printInfoComment(*GA);
+ Out << '\n';
+}
+
+void AssemblyWriter::printIFunc(const GlobalIFunc *GI) {
+ if (GI->isMaterializable())
+ Out << "; Materializable\n";
+
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, GI->getParent());
+ WriteAsOperandInternal(Out, GI, WriterCtx);
+ Out << " = ";
+
+ Out << getLinkageNameWithSpace(GI->getLinkage());
+ PrintDSOLocation(*GI, Out);
+ PrintVisibility(GI->getVisibility(), Out);
+
+ Out << "ifunc ";
+
+ TypePrinter.print(GI->getValueType(), Out);
+ Out << ", ";
+
+ if (const Constant *Resolver = GI->getResolver()) {
+ writeOperand(Resolver, !isa<ConstantExpr>(Resolver));
} else {
- writeOperand(IS, !isa<ConstantExpr>(IS));
+ TypePrinter.print(GI->getType(), Out);
+ Out << " <<NULL RESOLVER>>";
}
- if (GIS->hasPartition()) {
+ if (GI->hasPartition()) {
Out << ", partition \"";
- printEscapedString(GIS->getPartition(), Out);
+ printEscapedString(GI->getPartition(), Out);
Out << '"';
}
- printInfoComment(*GIS);
+ printInfoComment(*GI);
Out << '\n';
}
@@ -3683,8 +3676,8 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << "; Materializable\n";
const AttributeList &Attrs = F->getAttributes();
- if (Attrs.hasAttributes(AttributeList::FunctionIndex)) {
- AttributeSet AS = Attrs.getFnAttributes();
+ if (Attrs.hasFnAttrs()) {
+ AttributeSet AS = Attrs.getFnAttrs();
std::string AttrStr;
for (const Attribute &Attr : AS) {
@@ -3721,11 +3714,12 @@ void AssemblyWriter::printFunction(const Function *F) {
}
FunctionType *FT = F->getFunctionType();
- if (Attrs.hasAttributes(AttributeList::ReturnIndex))
+ if (Attrs.hasRetAttrs())
Out << Attrs.getAsString(AttributeList::ReturnIndex) << ' ';
TypePrinter.print(F->getReturnType(), Out);
+ AsmWriterContext WriterCtx(&TypePrinter, &Machine, F->getParent());
Out << ' ';
- WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+ WriteAsOperandInternal(Out, F, WriterCtx);
Out << '(';
// Loop over the arguments, printing them...
@@ -3738,7 +3732,7 @@ void AssemblyWriter::printFunction(const Function *F) {
// Output type...
TypePrinter.print(FT->getParamType(I), Out);
- AttributeSet ArgAttrs = Attrs.getParamAttributes(I);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
if (ArgAttrs.hasAttributes()) {
Out << ' ';
writeAttributeSet(ArgAttrs);
@@ -3750,7 +3744,7 @@ void AssemblyWriter::printFunction(const Function *F) {
// Insert commas as we go... the first arg doesn't get a comma
if (Arg.getArgNo() != 0)
Out << ", ";
- printArgument(&Arg, Attrs.getParamAttributes(Arg.getArgNo()));
+ printArgument(&Arg, Attrs.getParamAttrs(Arg.getArgNo()));
}
}
@@ -3770,8 +3764,8 @@ void AssemblyWriter::printFunction(const Function *F) {
if (F->getAddressSpace() != 0 || !Mod ||
Mod->getDataLayout().getProgramAddressSpace() != 0)
Out << " addrspace(" << F->getAddressSpace() << ")";
- if (Attrs.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes());
+ if (Attrs.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttrs());
if (F->hasSection()) {
Out << " section \"";
printEscapedString(F->getSection(), Out);
@@ -4127,7 +4121,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Type *RetTy = FTy->getReturnType();
const AttributeList &PAL = CI->getAttributes();
- if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ if (PAL.hasRetAttrs())
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// Only print addrspace(N) if necessary:
@@ -4142,10 +4136,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
- for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+ for (unsigned op = 0, Eop = CI->arg_size(); op < Eop; ++op) {
if (op > 0)
Out << ", ";
- writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(CI->getArgOperand(op), PAL.getParamAttrs(op));
}
// Emit an ellipsis if this is a musttail call in a vararg function. This
@@ -4156,8 +4150,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ", ...";
Out << ')';
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ if (PAL.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttrs());
writeOperandBundles(CI);
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
@@ -4172,7 +4166,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
PrintCallingConv(II->getCallingConv(), Out);
}
- if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ if (PAL.hasRetAttrs())
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// Only print addrspace(N) if necessary:
@@ -4187,15 +4181,15 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
- for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
+ for (unsigned op = 0, Eop = II->arg_size(); op < Eop; ++op) {
if (op)
Out << ", ";
- writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(II->getArgOperand(op), PAL.getParamAttrs(op));
}
Out << ')';
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ if (PAL.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttrs());
writeOperandBundles(II);
@@ -4215,7 +4209,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
PrintCallingConv(CBI->getCallingConv(), Out);
}
- if (PAL.hasAttributes(AttributeList::ReturnIndex))
+ if (PAL.hasRetAttrs())
Out << ' ' << PAL.getAsString(AttributeList::ReturnIndex);
// If possible, print out the short form of the callbr instruction. We can
@@ -4227,15 +4221,15 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
- for (unsigned op = 0, Eop = CBI->getNumArgOperands(); op < Eop; ++op) {
+ for (unsigned op = 0, Eop = CBI->arg_size(); op < Eop; ++op) {
if (op)
Out << ", ";
- writeParamOperand(CBI->getArgOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(CBI->getArgOperand(op), PAL.getParamAttrs(op));
}
Out << ')';
- if (PAL.hasAttributes(AttributeList::FunctionIndex))
- Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+ if (PAL.hasFnAttrs())
+ Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttrs());
writeOperandBundles(CBI);
@@ -4375,6 +4369,7 @@ void AssemblyWriter::printMetadataAttachments(
if (MDNames.empty())
MDs[0].second->getContext().getMDKindNames(MDNames);
+ auto WriterCtx = getContext();
for (const auto &I : MDs) {
unsigned Kind = I.first;
Out << Separator;
@@ -4384,7 +4379,7 @@ void AssemblyWriter::printMetadataAttachments(
} else
Out << "!<unknown kind #" << Kind << ">";
Out << ' ';
- WriteAsOperandInternal(Out, I.second, &TypePrinter, &Machine, TheModule);
+ WriteAsOperandInternal(Out, I.second, WriterCtx);
}
}
@@ -4406,7 +4401,8 @@ void AssemblyWriter::writeAllMDNodes() {
}
void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
- WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
+ auto WriterCtx = getContext();
+ WriteMDNodeBodyInternal(Out, Node, WriterCtx);
}
void AssemblyWriter::writeAttribute(const Attribute &Attr, bool InAttrGroup) {
@@ -4626,15 +4622,20 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST,
W.printGlobal(V);
else if (const Function *F = dyn_cast<Function>(GV))
W.printFunction(F);
+ else if (const GlobalAlias *A = dyn_cast<GlobalAlias>(GV))
+ W.printAlias(A);
+ else if (const GlobalIFunc *I = dyn_cast<GlobalIFunc>(GV))
+ W.printIFunc(I);
else
- W.printIndirectSymbol(cast<GlobalIndirectSymbol>(GV));
+ llvm_unreachable("Unknown GlobalValue to print out!");
} else if (const MetadataAsValue *V = dyn_cast<MetadataAsValue>(this)) {
V->getMetadata()->print(ROS, MST, getModuleFromVal(V));
} else if (const Constant *C = dyn_cast<Constant>(this)) {
TypePrinting TypePrinter;
TypePrinter.print(C->getType(), OS);
OS << ' ';
- WriteConstantInternal(OS, C, TypePrinter, MST.getMachine(), nullptr);
+ AsmWriterContext WriterCtx(&TypePrinter, MST.getMachine());
+ WriteConstantInternal(OS, C, WriterCtx);
} else if (isa<InlineAsm>(this) || isa<Argument>(this)) {
this->printAsOperand(OS, /* PrintType */ true, MST);
} else {
@@ -4649,7 +4650,8 @@ static bool printWithoutType(const Value &V, raw_ostream &O,
SlotTracker *Machine, const Module *M) {
if (V.hasName() || isa<GlobalValue>(V) ||
(!isa<Constant>(V) && !isa<MetadataAsValue>(V))) {
- WriteAsOperandInternal(O, &V, nullptr, Machine, M);
+ AsmWriterContext WriterCtx(nullptr, Machine, M);
+ WriteAsOperandInternal(O, &V, WriterCtx);
return true;
}
return false;
@@ -4663,8 +4665,8 @@ static void printAsOperandImpl(const Value &V, raw_ostream &O, bool PrintType,
O << ' ';
}
- WriteAsOperandInternal(O, &V, &TypePrinter, MST.getMachine(),
- MST.getModule());
+ AsmWriterContext WriterCtx(&TypePrinter, MST.getMachine(), MST.getModule());
+ WriteAsOperandInternal(O, &V, WriterCtx);
}
void Value::printAsOperand(raw_ostream &O, bool PrintType,
@@ -4691,22 +4693,87 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType,
printAsOperandImpl(*this, O, PrintType, MST);
}
+/// Recursive version of printMetadataImpl.
+static void printMetadataImplRec(raw_ostream &ROS, const Metadata &MD,
+ AsmWriterContext &WriterCtx) {
+ formatted_raw_ostream OS(ROS);
+ WriteAsOperandInternal(OS, &MD, WriterCtx, /* FromValue */ true);
+
+ auto *N = dyn_cast<MDNode>(&MD);
+ if (!N || isa<DIExpression>(MD) || isa<DIArgList>(MD))
+ return;
+
+ OS << " = ";
+ WriteMDNodeBodyInternal(OS, N, WriterCtx);
+}
+
+namespace {
+struct MDTreeAsmWriterContext : public AsmWriterContext {
+ unsigned Level;
+ // {Level, Printed string}
+ using EntryTy = std::pair<unsigned, std::string>;
+ SmallVector<EntryTy, 4> Buffer;
+
+ // Used to break the cycle in case there is any.
+ SmallPtrSet<const Metadata *, 4> Visited;
+
+ raw_ostream &MainOS;
+
+ MDTreeAsmWriterContext(TypePrinting *TP, SlotTracker *ST, const Module *M,
+ raw_ostream &OS, const Metadata *InitMD)
+ : AsmWriterContext(TP, ST, M), Level(0U), Visited({InitMD}), MainOS(OS) {}
+
+ void onWriteMetadataAsOperand(const Metadata *MD) override {
+ if (Visited.count(MD))
+ return;
+ Visited.insert(MD);
+
+ std::string Str;
+ raw_string_ostream SS(Str);
+ ++Level;
+ // A placeholder entry to memorize the correct
+ // position in buffer.
+ Buffer.emplace_back(std::make_pair(Level, ""));
+ unsigned InsertIdx = Buffer.size() - 1;
+
+ printMetadataImplRec(SS, *MD, *this);
+ Buffer[InsertIdx].second = std::move(SS.str());
+ --Level;
+ }
+
+ ~MDTreeAsmWriterContext() {
+ for (const auto &Entry : Buffer) {
+ MainOS << "\n";
+ unsigned NumIndent = Entry.first * 2U;
+ MainOS.indent(NumIndent) << Entry.second;
+ }
+ }
+};
+} // end anonymous namespace
+
static void printMetadataImpl(raw_ostream &ROS, const Metadata &MD,
ModuleSlotTracker &MST, const Module *M,
- bool OnlyAsOperand) {
+ bool OnlyAsOperand, bool PrintAsTree = false) {
formatted_raw_ostream OS(ROS);
TypePrinting TypePrinter(M);
- WriteAsOperandInternal(OS, &MD, &TypePrinter, MST.getMachine(), M,
- /* FromValue */ true);
+ std::unique_ptr<AsmWriterContext> WriterCtx;
+ if (PrintAsTree && !OnlyAsOperand)
+ WriterCtx = std::make_unique<MDTreeAsmWriterContext>(
+ &TypePrinter, MST.getMachine(), M, OS, &MD);
+ else
+ WriterCtx =
+ std::make_unique<AsmWriterContext>(&TypePrinter, MST.getMachine(), M);
+
+ WriteAsOperandInternal(OS, &MD, *WriterCtx, /* FromValue */ true);
auto *N = dyn_cast<MDNode>(&MD);
if (OnlyAsOperand || !N || isa<DIExpression>(MD) || isa<DIArgList>(MD))
return;
OS << " = ";
- WriteMDNodeBodyInternal(OS, N, &TypePrinter, MST.getMachine(), M);
+ WriteMDNodeBodyInternal(OS, N, *WriterCtx);
}
void Metadata::printAsOperand(raw_ostream &OS, const Module *M) const {
@@ -4730,6 +4797,18 @@ void Metadata::print(raw_ostream &OS, ModuleSlotTracker &MST,
printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
}
+void MDNode::printTree(raw_ostream &OS, const Module *M) const {
+ ModuleSlotTracker MST(M, true);
+ printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false,
+ /*PrintAsTree=*/true);
+}
+
+void MDNode::printTree(raw_ostream &OS, ModuleSlotTracker &MST,
+ const Module *M) const {
+ printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false,
+ /*PrintAsTree=*/true);
+}
+
void ModuleSummaryIndex::print(raw_ostream &ROS, bool IsForDebug) const {
SlotTracker SlotTable(this);
formatted_raw_ostream OS(ROS);
@@ -4781,6 +4860,15 @@ void Metadata::dump(const Module *M) const {
dbgs() << '\n';
}
+LLVM_DUMP_METHOD
+void MDNode::dumpTree() const { dumpTree(nullptr); }
+
+LLVM_DUMP_METHOD
+void MDNode::dumpTree(const Module *M) const {
+ printTree(dbgs(), M);
+ dbgs() << '\n';
+}
+
// Allow printing of ModuleSummaryIndex from the debugger.
LLVM_DUMP_METHOD
void ModuleSummaryIndex::dump() const { print(dbgs(), /*IsForDebug=*/true); }
diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp
index 6498114cd60d..3d24ae062841 100644
--- a/llvm/lib/IR/Assumptions.cpp
+++ b/llvm/lib/IR/Assumptions.cpp
@@ -6,17 +6,23 @@
//
//===----------------------------------------------------------------------===//
//
+// This file implements helper functions for accessing assumption infomration
+// inside of the "llvm.assume" metadata.
+//
//===----------------------------------------------------------------------===//
#include "llvm/IR/Assumptions.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
using namespace llvm;
-bool llvm::hasAssumption(Function &F,
- const KnownAssumptionString &AssumptionStr) {
- const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+namespace {
+bool hasAssumption(const Attribute &A,
+ const KnownAssumptionString &AssumptionStr) {
if (!A.isValid())
return false;
assert(A.isStringAttribute() && "Expected a string attribute!");
@@ -24,9 +30,76 @@ bool llvm::hasAssumption(Function &F,
SmallVector<StringRef, 8> Strings;
A.getValueAsString().split(Strings, ",");
- return llvm::any_of(Strings, [=](StringRef Assumption) {
- return Assumption == AssumptionStr;
- });
+ return llvm::is_contained(Strings, AssumptionStr);
+}
+
+DenseSet<StringRef> getAssumptions(const Attribute &A) {
+ if (!A.isValid())
+ return DenseSet<StringRef>();
+ assert(A.isStringAttribute() && "Expected a string attribute!");
+
+ DenseSet<StringRef> Assumptions;
+ SmallVector<StringRef, 8> Strings;
+ A.getValueAsString().split(Strings, ",");
+
+ for (StringRef Str : Strings)
+ Assumptions.insert(Str);
+ return Assumptions;
+}
+
+template <typename AttrSite>
+bool addAssumptionsImpl(AttrSite &Site,
+ const DenseSet<StringRef> &Assumptions) {
+ if (Assumptions.empty())
+ return false;
+
+ DenseSet<StringRef> CurAssumptions = getAssumptions(Site);
+
+ if (!set_union(CurAssumptions, Assumptions))
+ return false;
+
+ LLVMContext &Ctx = Site.getContext();
+ Site.addFnAttr(llvm::Attribute::get(
+ Ctx, llvm::AssumptionAttrKey,
+ llvm::join(CurAssumptions.begin(), CurAssumptions.end(), ",")));
+
+ return true;
+}
+} // namespace
+
+bool llvm::hasAssumption(const Function &F,
+ const KnownAssumptionString &AssumptionStr) {
+ const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+ return ::hasAssumption(A, AssumptionStr);
+}
+
+bool llvm::hasAssumption(const CallBase &CB,
+ const KnownAssumptionString &AssumptionStr) {
+ if (Function *F = CB.getCalledFunction())
+ if (hasAssumption(*F, AssumptionStr))
+ return true;
+
+ const Attribute &A = CB.getFnAttr(AssumptionAttrKey);
+ return ::hasAssumption(A, AssumptionStr);
+}
+
+DenseSet<StringRef> llvm::getAssumptions(const Function &F) {
+ const Attribute &A = F.getFnAttribute(AssumptionAttrKey);
+ return ::getAssumptions(A);
+}
+
+DenseSet<StringRef> llvm::getAssumptions(const CallBase &CB) {
+ const Attribute &A = CB.getFnAttr(AssumptionAttrKey);
+ return ::getAssumptions(A);
+}
+
+bool llvm::addAssumptions(Function &F, const DenseSet<StringRef> &Assumptions) {
+ return ::addAssumptionsImpl(F, Assumptions);
+}
+
+bool llvm::addAssumptions(CallBase &CB,
+ const DenseSet<StringRef> &Assumptions) {
+ return ::addAssumptionsImpl(CB, Assumptions);
}
StringSet<> llvm::KnownAssumptionStrings({
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 5cd1bafccc47..f81a446d6e46 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -812,42 +812,13 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
if (!B.contains(Kind))
continue;
- if (Attribute::isTypeAttrKind(Kind)) {
- Attrs.push_back(Attribute::get(C, Kind, B.getTypeAttr(Kind)));
- continue;
- }
-
Attribute Attr;
- switch (Kind) {
- case Attribute::Alignment:
- assert(B.getAlignment() && "Alignment must be set");
- Attr = Attribute::getWithAlignment(C, *B.getAlignment());
- break;
- case Attribute::StackAlignment:
- assert(B.getStackAlignment() && "StackAlignment must be set");
- Attr = Attribute::getWithStackAlignment(C, *B.getStackAlignment());
- break;
- case Attribute::Dereferenceable:
- Attr = Attribute::getWithDereferenceableBytes(
- C, B.getDereferenceableBytes());
- break;
- case Attribute::DereferenceableOrNull:
- Attr = Attribute::getWithDereferenceableOrNullBytes(
- C, B.getDereferenceableOrNullBytes());
- break;
- case Attribute::AllocSize: {
- auto A = B.getAllocSizeArgs();
- Attr = Attribute::getWithAllocSizeArgs(C, A.first, A.second);
- break;
- }
- case Attribute::VScaleRange: {
- auto A = B.getVScaleRangeArgs();
- Attr = Attribute::getWithVScaleRangeArgs(C, A.first, A.second);
- break;
- }
- default:
+ if (Attribute::isTypeAttrKind(Kind))
+ Attr = Attribute::get(C, Kind, B.getTypeAttr(Kind));
+ else if (Attribute::isIntAttrKind(Kind))
+ Attr = Attribute::get(C, Kind, B.getRawIntAttr(Kind));
+ else
Attr = Attribute::get(C, Kind);
- }
Attrs.push_back(Attr);
}
@@ -1209,33 +1180,36 @@ AttributeList AttributeList::get(LLVMContext &C,
return getImpl(C, NewAttrSets);
}
-AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const {
- if (hasAttribute(Index, Kind)) return *this;
+AttributeList
+AttributeList::addAttributeAtIndex(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const {
+ if (hasAttributeAtIndex(Index, Kind))
+ return *this;
AttributeSet Attrs = getAttributes(Index);
// TODO: Insert at correct position and avoid sort.
SmallVector<Attribute, 8> NewAttrs(Attrs.begin(), Attrs.end());
NewAttrs.push_back(Attribute::get(C, Kind));
- return setAttributes(C, Index, AttributeSet::get(C, NewAttrs));
+ return setAttributesAtIndex(C, Index, AttributeSet::get(C, NewAttrs));
}
-AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind,
- StringRef Value) const {
+AttributeList AttributeList::addAttributeAtIndex(LLVMContext &C, unsigned Index,
+ StringRef Kind,
+ StringRef Value) const {
AttrBuilder B;
B.addAttribute(Kind, Value);
- return addAttributes(C, Index, B);
+ return addAttributesAtIndex(C, Index, B);
}
-AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index,
- Attribute A) const {
+AttributeList AttributeList::addAttributeAtIndex(LLVMContext &C, unsigned Index,
+ Attribute A) const {
AttrBuilder B;
B.addAttribute(A);
- return addAttributes(C, Index, B);
+ return addAttributesAtIndex(C, Index, B);
}
-AttributeList AttributeList::setAttributes(LLVMContext &C, unsigned Index,
- AttributeSet Attrs) const {
+AttributeList AttributeList::setAttributesAtIndex(LLVMContext &C,
+ unsigned Index,
+ AttributeSet Attrs) const {
Index = attrIdxToArrayIdx(Index);
SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
if (Index >= AttrSets.size())
@@ -1244,8 +1218,9 @@ AttributeList AttributeList::setAttributes(LLVMContext &C, unsigned Index,
return AttributeList::getImpl(C, AttrSets);
}
-AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &B) const {
+AttributeList AttributeList::addAttributesAtIndex(LLVMContext &C,
+ unsigned Index,
+ const AttrBuilder &B) const {
if (!B.hasAttributes())
return *this;
@@ -1263,7 +1238,7 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index,
AttrBuilder Merged(getAttributes(Index));
Merged.merge(B);
- return setAttributes(C, Index, AttributeSet::get(C, Merged));
+ return setAttributesAtIndex(C, Index, AttributeSet::get(C, Merged));
}
AttributeList AttributeList::addParamAttribute(LLVMContext &C,
@@ -1286,9 +1261,11 @@ AttributeList AttributeList::addParamAttribute(LLVMContext &C,
return getImpl(C, AttrSets);
}
-AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
- Attribute::AttrKind Kind) const {
- if (!hasAttribute(Index, Kind)) return *this;
+AttributeList
+AttributeList::removeAttributeAtIndex(LLVMContext &C, unsigned Index,
+ Attribute::AttrKind Kind) const {
+ if (!hasAttributeAtIndex(Index, Kind))
+ return *this;
Index = attrIdxToArrayIdx(Index);
SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
@@ -1299,9 +1276,11 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
return getImpl(C, AttrSets);
}
-AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
- StringRef Kind) const {
- if (!hasAttribute(Index, Kind)) return *this;
+AttributeList AttributeList::removeAttributeAtIndex(LLVMContext &C,
+ unsigned Index,
+ StringRef Kind) const {
+ if (!hasAttributeAtIndex(Index, Kind))
+ return *this;
Index = attrIdxToArrayIdx(Index);
SmallVector<AttributeSet, 4> AttrSets(this->begin(), this->end());
@@ -1313,18 +1292,19 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index,
}
AttributeList
-AttributeList::removeAttributes(LLVMContext &C, unsigned Index,
- const AttrBuilder &AttrsToRemove) const {
+AttributeList::removeAttributesAtIndex(LLVMContext &C, unsigned Index,
+ const AttrBuilder &AttrsToRemove) const {
AttributeSet Attrs = getAttributes(Index);
AttributeSet NewAttrs = Attrs.removeAttributes(C, AttrsToRemove);
// If nothing was removed, return the original list.
if (Attrs == NewAttrs)
return *this;
- return setAttributes(C, Index, NewAttrs);
+ return setAttributesAtIndex(C, Index, NewAttrs);
}
-AttributeList AttributeList::removeAttributes(LLVMContext &C,
- unsigned WithoutIndex) const {
+AttributeList
+AttributeList::removeAttributesAtIndex(LLVMContext &C,
+ unsigned WithoutIndex) const {
if (!pImpl)
return {};
WithoutIndex = attrIdxToArrayIdx(WithoutIndex);
@@ -1335,79 +1315,73 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C,
return getImpl(C, AttrSets);
}
-AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C,
- unsigned Index,
- uint64_t Bytes) const {
+AttributeList AttributeList::addDereferenceableRetAttr(LLVMContext &C,
+ uint64_t Bytes) const {
AttrBuilder B;
B.addDereferenceableAttr(Bytes);
- return addAttributes(C, Index, B);
+ return addRetAttributes(C, B);
}
-AttributeList
-AttributeList::addDereferenceableOrNullAttr(LLVMContext &C, unsigned Index,
- uint64_t Bytes) const {
+AttributeList AttributeList::addDereferenceableParamAttr(LLVMContext &C,
+ unsigned Index,
+ uint64_t Bytes) const {
AttrBuilder B;
- B.addDereferenceableOrNullAttr(Bytes);
- return addAttributes(C, Index, B);
+ B.addDereferenceableAttr(Bytes);
+ return addParamAttributes(C, Index, B);
}
AttributeList
-AttributeList::addAllocSizeAttr(LLVMContext &C, unsigned Index,
- unsigned ElemSizeArg,
- const Optional<unsigned> &NumElemsArg) {
+AttributeList::addDereferenceableOrNullParamAttr(LLVMContext &C, unsigned Index,
+ uint64_t Bytes) const {
AttrBuilder B;
- B.addAllocSizeAttr(ElemSizeArg, NumElemsArg);
- return addAttributes(C, Index, B);
+ B.addDereferenceableOrNullAttr(Bytes);
+ return addParamAttributes(C, Index, B);
}
-AttributeList AttributeList::addVScaleRangeAttr(LLVMContext &C, unsigned Index,
- unsigned MinValue,
- unsigned MaxValue) {
+AttributeList
+AttributeList::addAllocSizeParamAttr(LLVMContext &C, unsigned Index,
+ unsigned ElemSizeArg,
+ const Optional<unsigned> &NumElemsArg) {
AttrBuilder B;
- B.addVScaleRangeAttr(MinValue, MaxValue);
- return addAttributes(C, Index, B);
+ B.addAllocSizeAttr(ElemSizeArg, NumElemsArg);
+ return addParamAttributes(C, Index, B);
}
//===----------------------------------------------------------------------===//
// AttributeList Accessor Methods
//===----------------------------------------------------------------------===//
-AttributeSet AttributeList::getParamAttributes(unsigned ArgNo) const {
+AttributeSet AttributeList::getParamAttrs(unsigned ArgNo) const {
return getAttributes(ArgNo + FirstArgIndex);
}
-AttributeSet AttributeList::getRetAttributes() const {
+AttributeSet AttributeList::getRetAttrs() const {
return getAttributes(ReturnIndex);
}
-AttributeSet AttributeList::getFnAttributes() const {
+AttributeSet AttributeList::getFnAttrs() const {
return getAttributes(FunctionIndex);
}
-bool AttributeList::hasAttribute(unsigned Index,
- Attribute::AttrKind Kind) const {
+bool AttributeList::hasAttributeAtIndex(unsigned Index,
+ Attribute::AttrKind Kind) const {
return getAttributes(Index).hasAttribute(Kind);
}
-bool AttributeList::hasAttribute(unsigned Index, StringRef Kind) const {
+bool AttributeList::hasAttributeAtIndex(unsigned Index, StringRef Kind) const {
return getAttributes(Index).hasAttribute(Kind);
}
-bool AttributeList::hasAttributes(unsigned Index) const {
+bool AttributeList::hasAttributesAtIndex(unsigned Index) const {
return getAttributes(Index).hasAttributes();
}
-bool AttributeList::hasFnAttribute(Attribute::AttrKind Kind) const {
+bool AttributeList::hasFnAttr(Attribute::AttrKind Kind) const {
return pImpl && pImpl->hasFnAttribute(Kind);
}
-bool AttributeList::hasFnAttribute(StringRef Kind) const {
- return hasAttribute(AttributeList::FunctionIndex, Kind);
-}
-
-bool AttributeList::hasParamAttribute(unsigned ArgNo,
- Attribute::AttrKind Kind) const {
- return hasAttribute(ArgNo + FirstArgIndex, Kind);
+bool AttributeList::hasFnAttr(StringRef Kind) const {
+ return hasAttributeAtIndex(AttributeList::FunctionIndex, Kind);
}
bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr,
@@ -1415,12 +1389,13 @@ bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr,
return pImpl && pImpl->hasAttrSomewhere(Attr, Index);
}
-Attribute AttributeList::getAttribute(unsigned Index,
- Attribute::AttrKind Kind) const {
+Attribute AttributeList::getAttributeAtIndex(unsigned Index,
+ Attribute::AttrKind Kind) const {
return getAttributes(Index).getAttribute(Kind);
}
-Attribute AttributeList::getAttribute(unsigned Index, StringRef Kind) const {
+Attribute AttributeList::getAttributeAtIndex(unsigned Index,
+ StringRef Kind) const {
return getAttributes(Index).getAttribute(Kind);
}
@@ -1460,26 +1435,29 @@ Type *AttributeList::getParamElementType(unsigned Index) const {
return getAttributes(Index + FirstArgIndex).getElementType();
}
-MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
- return getAttributes(Index).getStackAlignment();
+MaybeAlign AttributeList::getFnStackAlignment() const {
+ return getFnAttrs().getStackAlignment();
}
-uint64_t AttributeList::getDereferenceableBytes(unsigned Index) const {
- return getAttributes(Index).getDereferenceableBytes();
+MaybeAlign AttributeList::getRetStackAlignment() const {
+ return getRetAttrs().getStackAlignment();
}
-uint64_t AttributeList::getDereferenceableOrNullBytes(unsigned Index) const {
- return getAttributes(Index).getDereferenceableOrNullBytes();
+uint64_t AttributeList::getRetDereferenceableBytes() const {
+ return getRetAttrs().getDereferenceableBytes();
}
-std::pair<unsigned, Optional<unsigned>>
-AttributeList::getAllocSizeArgs(unsigned Index) const {
- return getAttributes(Index).getAllocSizeArgs();
+uint64_t AttributeList::getParamDereferenceableBytes(unsigned Index) const {
+ return getParamAttrs(Index).getDereferenceableBytes();
}
-std::pair<unsigned, unsigned>
-AttributeList::getVScaleRangeArgs(unsigned Index) const {
- return getAttributes(Index).getVScaleRangeArgs();
+uint64_t AttributeList::getRetDereferenceableOrNullBytes() const {
+ return getRetAttrs().getDereferenceableOrNullBytes();
+}
+
+uint64_t
+AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
+ return getParamAttrs(Index).getDereferenceableOrNullBytes();
}
std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const {
@@ -1520,7 +1498,7 @@ unsigned AttributeList::getNumAttrSets() const {
void AttributeList::print(raw_ostream &O) const {
O << "AttributeList[\n";
- for (unsigned i = index_begin(), e = index_end(); i != e; ++i) {
+ for (unsigned i : indexes()) {
if (!getAttributes(i).hasAttributes())
continue;
O << " { ";
@@ -1563,15 +1541,18 @@ AttrBuilder::AttrBuilder(AttributeSet AS) {
void AttrBuilder::clear() {
Attrs.reset();
TargetDepAttrs.clear();
- Alignment.reset();
- StackAlignment.reset();
- DerefBytes = DerefOrNullBytes = 0;
- AllocSizeArgs = 0;
- VScaleRangeArgs = 0;
+ IntAttrs = {};
TypeAttrs = {};
}
Optional<unsigned>
+AttrBuilder::kindToIntIndex(Attribute::AttrKind Kind) const {
+ if (Attribute::isIntAttrKind(Kind))
+ return Kind - Attribute::FirstIntAttr;
+ return None;
+}
+
+Optional<unsigned>
AttrBuilder::kindToTypeIndex(Attribute::AttrKind Kind) const {
if (Attribute::isTypeAttrKind(Kind))
return Kind - Attribute::FirstTypeAttr;
@@ -1589,18 +1570,8 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
if (Optional<unsigned> TypeIndex = kindToTypeIndex(Kind))
TypeAttrs[*TypeIndex] = Attr.getValueAsType();
- else if (Kind == Attribute::Alignment)
- Alignment = Attr.getAlignment();
- else if (Kind == Attribute::StackAlignment)
- StackAlignment = Attr.getStackAlignment();
- else if (Kind == Attribute::Dereferenceable)
- DerefBytes = Attr.getDereferenceableBytes();
- else if (Kind == Attribute::DereferenceableOrNull)
- DerefOrNullBytes = Attr.getDereferenceableOrNullBytes();
- else if (Kind == Attribute::AllocSize)
- AllocSizeArgs = Attr.getValueAsInt();
- else if (Kind == Attribute::VScaleRange)
- VScaleRangeArgs = Attr.getValueAsInt();
+ else if (Optional<unsigned> IntIndex = kindToIntIndex(Kind))
+ IntAttrs[*IntIndex] = Attr.getValueAsInt();
return *this;
}
@@ -1616,18 +1587,8 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
if (Optional<unsigned> TypeIndex = kindToTypeIndex(Val))
TypeAttrs[*TypeIndex] = nullptr;
- else if (Val == Attribute::Alignment)
- Alignment.reset();
- else if (Val == Attribute::StackAlignment)
- StackAlignment.reset();
- else if (Val == Attribute::Dereferenceable)
- DerefBytes = 0;
- else if (Val == Attribute::DereferenceableOrNull)
- DerefOrNullBytes = 0;
- else if (Val == Attribute::AllocSize)
- AllocSizeArgs = 0;
- else if (Val == Attribute::VScaleRange)
- VScaleRangeArgs = 0;
+ else if (Optional<unsigned> IntIndex = kindToIntIndex(Val))
+ IntAttrs[*IntIndex] = 0;
return *this;
}
@@ -1638,18 +1599,32 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) {
}
AttrBuilder &AttrBuilder::removeAttribute(StringRef A) {
- auto I = TargetDepAttrs.find(A);
- if (I != TargetDepAttrs.end())
- TargetDepAttrs.erase(I);
+ TargetDepAttrs.erase(A);
+ return *this;
+}
+
+uint64_t AttrBuilder::getRawIntAttr(Attribute::AttrKind Kind) const {
+ Optional<unsigned> IntIndex = kindToIntIndex(Kind);
+ assert(IntIndex && "Not an int attribute");
+ return IntAttrs[*IntIndex];
+}
+
+AttrBuilder &AttrBuilder::addRawIntAttr(Attribute::AttrKind Kind,
+ uint64_t Value) {
+ Optional<unsigned> IntIndex = kindToIntIndex(Kind);
+ assert(IntIndex && "Not an int attribute");
+ assert(Value && "Value cannot be zero");
+ Attrs[Kind] = true;
+ IntAttrs[*IntIndex] = Value;
return *this;
}
std::pair<unsigned, Optional<unsigned>> AttrBuilder::getAllocSizeArgs() const {
- return unpackAllocSizeArgs(AllocSizeArgs);
+ return unpackAllocSizeArgs(getRawIntAttr(Attribute::AllocSize));
}
std::pair<unsigned, unsigned> AttrBuilder::getVScaleRangeArgs() const {
- return unpackVScaleRangeArgs(VScaleRangeArgs);
+ return unpackVScaleRangeArgs(getRawIntAttr(Attribute::VScaleRange));
}
AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) {
@@ -1657,10 +1632,7 @@ AttrBuilder &AttrBuilder::addAlignmentAttr(MaybeAlign Align) {
return *this;
assert(*Align <= llvm::Value::MaximumAlignment && "Alignment too large.");
-
- Attrs[Attribute::Alignment] = true;
- Alignment = Align;
- return *this;
+ return addRawIntAttr(Attribute::Alignment, Align->value());
}
AttrBuilder &AttrBuilder::addStackAlignmentAttr(MaybeAlign Align) {
@@ -1669,27 +1641,20 @@ AttrBuilder &AttrBuilder::addStackAlignmentAttr(MaybeAlign Align) {
return *this;
assert(*Align <= 0x100 && "Alignment too large.");
-
- Attrs[Attribute::StackAlignment] = true;
- StackAlignment = Align;
- return *this;
+ return addRawIntAttr(Attribute::StackAlignment, Align->value());
}
AttrBuilder &AttrBuilder::addDereferenceableAttr(uint64_t Bytes) {
if (Bytes == 0) return *this;
- Attrs[Attribute::Dereferenceable] = true;
- DerefBytes = Bytes;
- return *this;
+ return addRawIntAttr(Attribute::Dereferenceable, Bytes);
}
AttrBuilder &AttrBuilder::addDereferenceableOrNullAttr(uint64_t Bytes) {
if (Bytes == 0)
return *this;
- Attrs[Attribute::DereferenceableOrNull] = true;
- DerefOrNullBytes = Bytes;
- return *this;
+ return addRawIntAttr(Attribute::DereferenceableOrNull, Bytes);
}
AttrBuilder &AttrBuilder::addAllocSizeAttr(unsigned ElemSize,
@@ -1700,12 +1665,7 @@ AttrBuilder &AttrBuilder::addAllocSizeAttr(unsigned ElemSize,
AttrBuilder &AttrBuilder::addAllocSizeAttrFromRawRepr(uint64_t RawArgs) {
// (0, 0) is our "not present" value, so we need to check for it here.
assert(RawArgs && "Invalid allocsize arguments -- given allocsize(0, 0)");
-
- Attrs[Attribute::AllocSize] = true;
- // Reuse existing machinery to store this as a single 64-bit integer so we can
- // save a few bytes over using a pair<unsigned, Optional<unsigned>>.
- AllocSizeArgs = RawArgs;
- return *this;
+ return addRawIntAttr(Attribute::AllocSize, RawArgs);
}
AttrBuilder &AttrBuilder::addVScaleRangeAttr(unsigned MinValue,
@@ -1718,11 +1678,7 @@ AttrBuilder &AttrBuilder::addVScaleRangeAttrFromRawRepr(uint64_t RawArgs) {
if (RawArgs == 0)
return *this;
- Attrs[Attribute::VScaleRange] = true;
- // Reuse existing machinery to store this as a single 64-bit integer so we can
- // save a few bytes over using a pair<unsigned, unsigned>.
- VScaleRangeArgs = RawArgs;
- return *this;
+ return addRawIntAttr(Attribute::VScaleRange, RawArgs);
}
Type *AttrBuilder::getTypeAttr(Attribute::AttrKind Kind) const {
@@ -1760,24 +1716,10 @@ AttrBuilder &AttrBuilder::addInAllocaAttr(Type *Ty) {
}
AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
- // FIXME: What if both have alignments, but they don't match?!
- if (!Alignment)
- Alignment = B.Alignment;
-
- if (!StackAlignment)
- StackAlignment = B.StackAlignment;
-
- if (!DerefBytes)
- DerefBytes = B.DerefBytes;
-
- if (!DerefOrNullBytes)
- DerefOrNullBytes = B.DerefOrNullBytes;
-
- if (!AllocSizeArgs)
- AllocSizeArgs = B.AllocSizeArgs;
-
- if (!VScaleRangeArgs)
- VScaleRangeArgs = B.VScaleRangeArgs;
+ // FIXME: What if both have an int/type attribute, but they don't match?!
+ for (unsigned Index = 0; Index < Attribute::NumIntAttrKinds; ++Index)
+ if (!IntAttrs[Index])
+ IntAttrs[Index] = B.IntAttrs[Index];
for (unsigned Index = 0; Index < Attribute::NumTypeAttrKinds; ++Index)
if (!TypeAttrs[Index])
@@ -1792,24 +1734,10 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
}
AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
- // FIXME: What if both have alignments, but they don't match?!
- if (B.Alignment)
- Alignment.reset();
-
- if (B.StackAlignment)
- StackAlignment.reset();
-
- if (B.DerefBytes)
- DerefBytes = 0;
-
- if (B.DerefOrNullBytes)
- DerefOrNullBytes = 0;
-
- if (B.AllocSizeArgs)
- AllocSizeArgs = 0;
-
- if (B.VScaleRangeArgs)
- VScaleRangeArgs = 0;
+ // FIXME: What if both have an int/type attribute, but they don't match?!
+ for (unsigned Index = 0; Index < Attribute::NumIntAttrKinds; ++Index)
+ if (B.IntAttrs[Index])
+ IntAttrs[Index] = 0;
for (unsigned Index = 0; Index < Attribute::NumTypeAttrKinds; ++Index)
if (B.TypeAttrs[Index])
@@ -1861,7 +1789,7 @@ bool AttrBuilder::hasAttributes(AttributeList AL, uint64_t Index) const {
}
bool AttrBuilder::hasAlignmentAttr() const {
- return Alignment != 0;
+ return getRawIntAttr(Attribute::Alignment) != 0;
}
bool AttrBuilder::operator==(const AttrBuilder &B) const {
@@ -1872,9 +1800,7 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const {
if (B.TargetDepAttrs.find(TDA.first) == B.TargetDepAttrs.end())
return false;
- return Alignment == B.Alignment && StackAlignment == B.StackAlignment &&
- DerefBytes == B.DerefBytes && TypeAttrs == B.TypeAttrs &&
- VScaleRangeArgs == B.VScaleRangeArgs;
+ return IntAttrs == B.IntAttrs && TypeAttrs == B.TypeAttrs;
}
//===----------------------------------------------------------------------===//
@@ -1966,11 +1892,11 @@ static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
.addAttribute(Attribute::StackProtectReq);
if (Callee.hasFnAttribute(Attribute::StackProtectReq)) {
- Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr);
+ Caller.removeFnAttrs(OldSSPAttr);
Caller.addFnAttr(Attribute::StackProtectReq);
} else if (Callee.hasFnAttribute(Attribute::StackProtectStrong) &&
!Caller.hasFnAttribute(Attribute::StackProtectReq)) {
- Caller.removeAttributes(AttributeList::FunctionIndex, OldSSPAttr);
+ Caller.removeFnAttrs(OldSSPAttr);
Caller.addFnAttr(Attribute::StackProtectStrong);
} else if (Callee.hasFnAttribute(Attribute::StackProtect) &&
!Caller.hasFnAttribute(Attribute::StackProtectReq) &&
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 6271385183eb..d73d1e9c20b3 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -583,8 +583,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// Can't use Intrinsic::getDeclaration here as the return types might
// then only be structurally equal.
FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
+ StringRef Suffix =
+ F->getContext().supportsTypedPointers() ? "p0i8" : "p0";
NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
- "llvm." + Name + ".p0i8", F->getParent());
+ "llvm." + Name + "." + Suffix, F->getParent());
return true;
}
static const Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
@@ -601,7 +603,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
auto fArgs = F->getFunctionType()->params();
Type *Tys[] = {fArgs[0], fArgs[1]};
- if (Name.find("lane") == StringRef::npos)
+ if (!Name.contains("lane"))
NewFn = Intrinsic::getDeclaration(F->getParent(),
StoreInts[fArgs.size() - 3], Tys);
else
@@ -1273,7 +1275,7 @@ static Value *UpgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallInst &CI,
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
- if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+ if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
@@ -1300,7 +1302,7 @@ static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
- if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
+ if (CI.arg_size() == 4) { // For masked intrinsics.
Value *VecSrc = CI.getOperand(2);
Value *Mask = CI.getOperand(3);
Res = EmitX86Select(Builder, Mask, Res, VecSrc);
@@ -1370,7 +1372,7 @@ static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
- unsigned NumArgs = CI.getNumArgOperands();
+ unsigned NumArgs = CI.arg_size();
if (NumArgs >= 4) { // For masked intrinsics.
Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
@@ -1431,7 +1433,7 @@ static Value *upgradeAbs(IRBuilder<> &Builder, CallInst &CI) {
Value *Op0 = CI.getArgOperand(0);
Function *F = Intrinsic::getDeclaration(CI.getModule(), Intrinsic::abs, Ty);
Value *Res = Builder.CreateCall(F, {Op0, Builder.getInt1(false)});
- if (CI.getNumArgOperands() == 3)
+ if (CI.arg_size() == 3)
Res = EmitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
return Res;
}
@@ -1459,7 +1461,7 @@ static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
Value *Res = Builder.CreateMul(LHS, RHS);
- if (CI.getNumArgOperands() == 4)
+ if (CI.arg_size() == 4)
Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
return Res;
@@ -1514,7 +1516,7 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
}
- Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
+ Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
}
@@ -1779,13 +1781,12 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
} else
return false;
- SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
- CI.arg_operands().end());
+ SmallVector<Value *, 4> Args(CI.args());
Args.pop_back();
Args.pop_back();
Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
Args);
- unsigned NumArgs = CI.getNumArgOperands();
+ unsigned NumArgs = CI.arg_size();
Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
CI.getArgOperand(NumArgs - 2));
return true;
@@ -1964,7 +1965,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->getType()),
{CI->getArgOperand(0)});
} else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
- if (CI->getNumArgOperands() == 4 &&
+ if (CI->arg_size() == 4 &&
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
@@ -2124,8 +2125,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
{ CI->getOperand(0), CI->getArgOperand(1) });
Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.cmp.p")) {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
Type *OpTy = Args[0]->getType();
unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
unsigned EltWidth = OpTy->getScalarSizeInBits();
@@ -2257,7 +2257,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
if (IsPS2PD)
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
- else if (CI->getNumArgOperands() == 4 &&
+ else if (CI->arg_size() == 4 &&
(!isa<ConstantInt>(CI->getArgOperand(3)) ||
cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
@@ -2270,7 +2270,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
: Builder.CreateSIToFP(Rep, DstTy, "cvt");
}
- if (CI->getNumArgOperands() >= 3)
+ if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.vcvtph2ps.") ||
@@ -2286,7 +2286,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateBitCast(
Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
- if (CI->getNumArgOperands() >= 3)
+ if (CI->arg_size() >= 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
@@ -2353,7 +2353,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
llvm_unreachable("Unknown suffix");
unsigned Imm;
- if (CI->getNumArgOperands() == 3) {
+ if (CI->arg_size() == 3) {
Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
} else {
Name = Name.substr(9); // strip off "xop.vpcom"
@@ -2417,7 +2417,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
EltTy->getPointerTo());
Value *Load = Builder.CreateLoad(EltTy, Cast);
Type *I32Ty = Type::getInt32Ty(C);
- Rep = UndefValue::get(VecTy);
+ Rep = PoisonValue::get(VecTy);
for (unsigned I = 0; I < EltNum; ++I)
Rep = Builder.CreateInsertElement(Rep, Load,
ConstantInt::get(I32Ty, I));
@@ -2442,7 +2442,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
// If there are 3 arguments, it's a masked intrinsic so we need a select.
- if (CI->getNumArgOperands() == 3)
+ if (CI->arg_size() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (Name == "avx512.mask.pmov.qd.256" ||
@@ -2518,7 +2518,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M);
Rep = Builder.CreateShuffleVector(Op, M);
- if (CI->getNumArgOperands() == 3)
+ if (CI->arg_size() == 3)
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
} else if (IsX86 && (Name.startswith("sse2.padds.") ||
@@ -2636,7 +2636,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
// If the intrinsic has a mask operand, handle that.
- if (CI->getNumArgOperands() == 5)
+ if (CI->arg_size() == 5)
Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
CI->getArgOperand(3));
} else if (IsX86 && (Name.startswith("avx.vextractf128.") ||
@@ -2661,7 +2661,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
// If the intrinsic has a mask operand, handle that.
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (!IsX86 && Name == "stackprotectorcheck") {
@@ -2679,7 +2679,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name.startswith("avx.vperm2f128.") ||
@@ -2739,7 +2739,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufl.w" ||
@@ -2758,7 +2758,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && (Name == "sse2.pshufh.w" ||
@@ -2777,7 +2777,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
- if (CI->getNumArgOperands() == 4)
+ if (CI->arg_size() == 4)
Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
CI->getArgOperand(2));
} else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
@@ -3346,7 +3346,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (NegAcc)
C = Builder.CreateFNeg(C);
- if (CI->getNumArgOperands() == 5 &&
+ if (CI->arg_size() == 5 &&
(!isa<ConstantInt>(CI->getArgOperand(4)) ||
cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
Intrinsic::ID IID;
@@ -3399,7 +3399,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Drop the "avx512.mask." to make it easier.
Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
bool IsSubAdd = Name[3] == 's';
- if (CI->getNumArgOperands() == 5) {
+ if (CI->arg_size() == 5) {
Intrinsic::ID IID;
// Check the character before ".512" in string.
if (Name[Name.size()-5] == 's')
@@ -3686,8 +3686,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::arm_neon_vst2lane:
case Intrinsic::arm_neon_vst3lane:
case Intrinsic::arm_neon_vst4lane: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
@@ -3701,14 +3700,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::aarch64_neon_bfmlalb:
case Intrinsic::aarch64_neon_bfmlalt: {
SmallVector<Value *, 3> Args;
- assert(CI->getNumArgOperands() == 3 &&
+ assert(CI->arg_size() == 3 &&
"Mismatch between function args and call args");
size_t OperandWidth =
CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits();
assert((OperandWidth == 64 || OperandWidth == 128) &&
"Unexpected operand width");
Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
- auto Iter = CI->arg_operands().begin();
+ auto Iter = CI->args().begin();
Args.push_back(*Iter++);
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
@@ -3722,18 +3721,17 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::ctlz:
case Intrinsic::cttz:
- assert(CI->getNumArgOperands() == 1 &&
+ assert(CI->arg_size() == 1 &&
"Mismatch between function args and call args");
NewCall =
Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
break;
case Intrinsic::objectsize: {
- Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
- ? Builder.getFalse()
- : CI->getArgOperand(2);
+ Value *NullIsUnknownSize =
+ CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
Value *Dynamic =
- CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
+ CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
NewCall = Builder.CreateCall(
NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
break;
@@ -3749,7 +3747,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::dbg_value:
// Upgrade from the old version that had an extra offset argument.
- assert(CI->getNumArgOperands() == 4);
+ assert(CI->arg_size() == 4);
// Drop nonzero offsets instead of attempting to upgrade them.
if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)))
if (Offset->isZeroValue()) {
@@ -3763,7 +3761,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::ptr_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
- assert(CI->getNumArgOperands() == 4 &&
+ assert(CI->arg_size() == 4 &&
"Before LLVM 12.0 this intrinsic took four arguments");
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
@@ -3777,7 +3775,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::var_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
- assert(CI->getNumArgOperands() == 4 &&
+ assert(CI->arg_size() == 4 &&
"Before LLVM 12.0 this intrinsic took four arguments");
// Create a new call with an added null annotation attribute argument.
NewCall = Builder.CreateCall(
@@ -3796,8 +3794,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::x86_xop_vpermil2ps:
case Intrinsic::x86_xop_vpermil2pd_256:
case Intrinsic::x86_xop_vpermil2ps_256: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
@@ -3858,8 +3855,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::x86_avx2_mpsadbw: {
// Need to truncate the last argument from i32 to i8 -- this argument models
// an inherently 8-bit immediate operand to these x86 instructions.
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
// Replace the last argument with a trunc.
Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
@@ -3873,8 +3869,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::x86_avx512_mask_cmp_ps_128:
case Intrinsic::x86_avx512_mask_cmp_ps_256:
case Intrinsic::x86_avx512_mask_cmp_ps_512: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
unsigned NumElts =
cast<FixedVectorType>(Args[0]->getType())->getNumElements();
Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
@@ -3895,8 +3890,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::invariant_start:
case Intrinsic::invariant_end: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
break;
}
@@ -3904,8 +3898,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::masked_store:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter: {
- SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
- CI->arg_operands().end());
+ SmallVector<Value *, 4> Args(CI->args());
NewCall = Builder.CreateCall(NewFn, Args);
NewCall->copyMetadata(*CI);
break;
@@ -3921,7 +3914,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// @llvm.memset...(i8*, i8, i[32|64], i32, i1)
// -> @llvm.memset...(i8*, i8, i[32|64], i1)
// Note: i8*'s in the above can be any pointer type
- if (CI->getNumArgOperands() != 5) {
+ if (CI->arg_size() != 5) {
DefaultCase();
return;
}
@@ -4111,7 +4104,7 @@ void llvm::UpgradeARCRuntime(Module &M) {
bool InvalidCast = false;
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
Value *Arg = CI->getArgOperand(I);
// Bitcast argument to the parameter type of the new function if it's
@@ -4361,8 +4354,8 @@ struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
return;
// If we get here, the caller doesn't have the strictfp attribute
// but this callsite does. Replace the strictfp attribute with nobuiltin.
- Call.removeAttribute(AttributeList::FunctionIndex, Attribute::StrictFP);
- Call.addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
+ Call.removeFnAttr(Attribute::StrictFP);
+ Call.addFnAttr(Attribute::NoBuiltin);
}
};
} // namespace
@@ -4383,8 +4376,7 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
}
// Remove all incompatibile attributes from function.
- F.removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(F.getReturnType()));
+ F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType()));
for (auto &Arg : F.args())
Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType()));
}
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index d14abafdef2e..ed1956e0f7e9 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/BasicBlock.h"
#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -23,6 +24,9 @@
using namespace llvm;
+#define DEBUG_TYPE "ir"
+STATISTIC(NumInstrRenumberings, "Number of renumberings across all blocks");
+
ValueSymbolTable *BasicBlock::getValueSymbolTable() {
if (Function *F = getParent())
return F->getValueSymbolTable();
@@ -505,6 +509,8 @@ void BasicBlock::renumberInstructions() {
BasicBlockBits Bits = getBasicBlockBits();
Bits.InstrOrderValid = true;
setBasicBlockBits(Bits);
+
+ NumInstrRenumberings++;
}
#ifndef NDEBUG
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 5f05aa2e94e7..437fd0558447 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -349,200 +349,6 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
}
}
-/// Wrapper around getFoldedSizeOfImpl() that adds caching.
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache);
-
-/// Return a ConstantExpr with type DestTy for sizeof on Ty, with any known
-/// factors factored out. If Folded is false, return null if no factoring was
-/// possible, to avoid endlessly bouncing an unfoldable expression back into the
-/// top-level folder.
-static Constant *getFoldedSizeOfImpl(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache) {
- // This is the actual implementation of getFoldedSizeOf(). To get the caching
- // behavior, we need to call getFoldedSizeOf() when we recurse.
-
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
- Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true, Cache);
- return ConstantExpr::getNUWMul(E, N);
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- unsigned NumElems = STy->getNumElements();
- // An empty struct has size zero.
- if (NumElems == 0)
- return ConstantExpr::getNullValue(DestTy);
- // Check for a struct with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(STy->getElementType(0), DestTy, true, Cache);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(STy->getElementType(i), DestTy, true, Cache)) {
- AllSame = false;
- break;
- }
- if (AllSame) {
- Constant *N = ConstantInt::get(DestTy, NumElems);
- return ConstantExpr::getNUWMul(MemberSize, N);
- }
- }
-
- // Pointer size doesn't depend on the pointee type, so canonicalize them
- // to an arbitrary pointee.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isIntegerTy(1))
- return getFoldedSizeOf(
- PointerType::get(IntegerType::get(PTy->getContext(), 1),
- PTy->getAddressSpace()),
- DestTy, true, Cache);
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular sizeof expression.
- Constant *C = ConstantExpr::getSizeOf(Ty);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded,
- DenseMap<Type *, Constant *> &Cache) {
- // Check for previously generated folded size constant.
- auto It = Cache.find(Ty);
- if (It != Cache.end())
- return It->second;
- return Cache[Ty] = getFoldedSizeOfImpl(Ty, DestTy, Folded, Cache);
-}
-
-static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy, bool Folded) {
- DenseMap<Type *, Constant *> Cache;
- return getFoldedSizeOf(Ty, DestTy, Folded, Cache);
-}
-
-/// Return a ConstantExpr with type DestTy for alignof on Ty, with any known
-/// factors factored out. If Folded is false, return null if no factoring was
-/// possible, to avoid endlessly bouncing an unfoldable expression back into the
-/// top-level folder.
-static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy, bool Folded) {
- // The alignment of an array is equal to the alignment of the
- // array element. Note that this is not always true for vectors.
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy,
- false),
- C, DestTy);
- return C;
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty)) {
- // Packed structs always have an alignment of 1.
- if (STy->isPacked())
- return ConstantInt::get(DestTy, 1);
-
- // Otherwise, struct alignment is the maximum alignment of any member.
- // Without target data, we can't compare much, but we can check to see
- // if all the members have the same alignment.
- unsigned NumElems = STy->getNumElements();
- // An empty struct has minimal alignment.
- if (NumElems == 0)
- return ConstantInt::get(DestTy, 1);
- // Check for a struct with all members having the same alignment.
- Constant *MemberAlign =
- getFoldedAlignOf(STy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame)
- return MemberAlign;
- }
-
- // Pointer alignment doesn't depend on the pointee type, so canonicalize them
- // to an arbitrary pointee.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
- if (!PTy->getElementType()->isIntegerTy(1))
- return
- getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
- 1),
- PTy->getAddressSpace()),
- DestTy, true);
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular alignof expression.
- Constant *C = ConstantExpr::getAlignOf(Ty);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
-/// Return a ConstantExpr with type DestTy for offsetof on Ty and FieldNo, with
-/// any known factors factored out. If Folded is false, return null if no
-/// factoring was possible, to avoid endlessly bouncing an unfoldable expression
-/// back into the top-level folder.
-static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo, Type *DestTy,
- bool Folded) {
- if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
- DestTy, false),
- FieldNo, DestTy);
- Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
- return ConstantExpr::getNUWMul(E, N);
- }
-
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- unsigned NumElems = STy->getNumElements();
- // An empty struct has no members.
- if (NumElems == 0)
- return nullptr;
- // Check for a struct with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(STy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
- false,
- DestTy,
- false),
- FieldNo, DestTy);
- return ConstantExpr::getNUWMul(MemberSize, N);
- }
- }
-
- // If there's no interesting folding happening, bail so that we don't create
- // a constant that looks like it needs folding but really doesn't.
- if (!Folded)
- return nullptr;
-
- // Base case: Get a regular offsetof expression.
- Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- DestTy, false),
- C, DestTy);
- return C;
-}
-
Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
Type *DestTy) {
if (isa<PoisonValue>(V))
@@ -666,53 +472,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
// Is it a null pointer value?
if (V->isNullValue())
return ConstantInt::get(DestTy, 0);
- // If this is a sizeof-like expression, pull out multiplications by
- // known factors to expose them to subsequent folding. If it's an
- // alignof-like expression, factor out known factors.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- CE->getOperand(0)->isNullValue()) {
- // FIXME: Looks like getFoldedSizeOf(), getFoldedOffsetOf() and
- // getFoldedAlignOf() don't handle the case when DestTy is a vector of
- // pointers yet. We end up in asserts in CastInst::getCastOpcode (see
- // test/Analysis/ConstantFolding/cast-vector.ll). I've only seen this
- // happen in one "real" C-code test case, so it does not seem to be an
- // important optimization to handle vectors here. For now, simply bail
- // out.
- if (DestTy->isVectorTy())
- return nullptr;
- GEPOperator *GEPO = cast<GEPOperator>(CE);
- Type *Ty = GEPO->getSourceElementType();
- if (CE->getNumOperands() == 2) {
- // Handle a sizeof-like expression.
- Constant *Idx = CE->getOperand(1);
- bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
- if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
- Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
- DestTy, false),
- Idx, DestTy);
- return ConstantExpr::getMul(C, Idx);
- }
- } else if (CE->getNumOperands() == 3 &&
- CE->getOperand(1)->isNullValue()) {
- // Handle an alignof-like expression.
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked()) {
- ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
- if (CI->isOne() &&
- STy->getNumElements() == 2 &&
- STy->getElementType(0)->isIntegerTy(1)) {
- return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
- }
- }
- // Handle an offsetof-like expression.
- if (Ty->isStructTy() || Ty->isArrayTy()) {
- if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
- DestTy, false))
- return C;
- }
- }
- }
// Other pointer types cannot be casted
return nullptr;
case Instruction::UIToFP:
@@ -720,7 +479,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
const APInt &api = CI->getValue();
APFloat apf(DestTy->getFltSemantics(),
- APInt::getNullValue(DestTy->getPrimitiveSizeInBits()));
+ APInt::getZero(DestTy->getPrimitiveSizeInBits()));
apf.convertFromAPInt(api, opc==Instruction::SIToFP,
APFloat::rmNearestTiesToEven);
return ConstantFP::get(V->getContext(), apf);
@@ -908,13 +667,16 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
}
}
+ if (Constant *C = Val->getAggregateElement(CIdx))
+ return C;
+
// Lane < Splat minimum vector width => extractelt Splat(x), Lane -> x
if (CIdx->getValue().ult(ValVTy->getElementCount().getKnownMinValue())) {
if (Constant *SplatVal = Val->getSplatValue())
return SplatVal;
}
- return Val->getAggregateElement(CIdx);
+ return nullptr;
}
Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
@@ -969,12 +731,16 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
// If the mask is all zeros this is a splat, no need to go through all
// elements.
- if (all_of(Mask, [](int Elt) { return Elt == 0; }) &&
- !MaskEltCount.isScalable()) {
+ if (all_of(Mask, [](int Elt) { return Elt == 0; })) {
Type *Ty = IntegerType::get(V1->getContext(), 32);
Constant *Elt =
ConstantExpr::getExtractElement(V1, ConstantInt::get(Ty, 0));
- return ConstantVector::getSplat(MaskEltCount, Elt);
+
+ if (Elt->isNullValue()) {
+ auto *VTy = VectorType::get(EltTy, MaskEltCount);
+ return ConstantAggregateZero::get(VTy);
+ } else if (!MaskEltCount.isScalable())
+ return ConstantVector::getSplat(MaskEltCount, Elt);
}
// Do not iterate on scalable vector. The num of elements is unknown at
// compile-time.
@@ -1379,7 +1145,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
case Instruction::SDiv:
assert(!CI2->isZero() && "Div by zero handled above");
- if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ if (C2V.isAllOnes() && C1V.isMinSignedValue())
return PoisonValue::get(CI1->getType()); // MIN_INT / -1 -> poison
return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
case Instruction::URem:
@@ -1387,7 +1153,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
case Instruction::SRem:
assert(!CI2->isZero() && "Div by zero handled above");
- if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+ if (C2V.isAllOnes() && C1V.isMinSignedValue())
return PoisonValue::get(CI1->getType()); // MIN_INT % -1 -> poison
return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
case Instruction::And:
@@ -2030,19 +1796,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
const APInt &V1 = cast<ConstantInt>(C1)->getValue();
const APInt &V2 = cast<ConstantInt>(C2)->getValue();
- switch (pred) {
- default: llvm_unreachable("Invalid ICmp Predicate");
- case ICmpInst::ICMP_EQ: return ConstantInt::get(ResultTy, V1 == V2);
- case ICmpInst::ICMP_NE: return ConstantInt::get(ResultTy, V1 != V2);
- case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
- case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
- case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
- case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
- case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
- case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
- case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
- case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
- }
+ return ConstantInt::get(
+ ResultTy, ICmpInst::compare(V1, V2, (ICmpInst::Predicate)pred));
} else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
const APFloat &C1V = cast<ConstantFP>(C1)->getValueAPF();
const APFloat &C2V = cast<ConstantFP>(C2)->getValueAPF();
@@ -2564,7 +2319,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
if (isIndexInRangeOfArrayType(STy->getNumElements(), CI))
// It's in range, skip to the next index.
continue;
- if (CI->getSExtValue() < 0) {
+ if (CI->isNegative()) {
// It's out of range and negative, don't try to factor it.
Unknown = true;
continue;
@@ -2575,7 +2330,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
auto *CI = cast<ConstantInt>(CV->getElementAsConstant(I));
InRange &= isIndexInRangeOfArrayType(STy->getNumElements(), CI);
- if (CI->getSExtValue() < 0) {
+ if (CI->isNegative()) {
Unknown = true;
break;
}
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index 0649776dbc22..a0f2179bddb4 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -110,7 +110,7 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
APInt UMin(CR.getUnsignedMin());
if (UMin.isMaxValue())
return getEmpty(W);
- return ConstantRange(std::move(UMin) + 1, APInt::getNullValue(W));
+ return ConstantRange(std::move(UMin) + 1, APInt::getZero(W));
}
case CmpInst::ICMP_SGT: {
APInt SMin(CR.getSignedMin());
@@ -119,7 +119,7 @@ ConstantRange ConstantRange::makeAllowedICmpRegion(CmpInst::Predicate Pred,
return ConstantRange(std::move(SMin) + 1, APInt::getSignedMinValue(W));
}
case CmpInst::ICMP_UGE:
- return getNonEmpty(CR.getUnsignedMin(), APInt::getNullValue(W));
+ return getNonEmpty(CR.getUnsignedMin(), APInt::getZero(W));
case CmpInst::ICMP_SGE:
return getNonEmpty(CR.getSignedMin(), APInt::getSignedMinValue(W));
}
@@ -147,38 +147,77 @@ ConstantRange ConstantRange::makeExactICmpRegion(CmpInst::Predicate Pred,
return makeAllowedICmpRegion(Pred, C);
}
-bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
- APInt &RHS) const {
- bool Success = false;
+bool ConstantRange::areInsensitiveToSignednessOfICmpPredicate(
+ const ConstantRange &CR1, const ConstantRange &CR2) {
+ if (CR1.isEmptySet() || CR2.isEmptySet())
+ return true;
+
+ return (CR1.isAllNonNegative() && CR2.isAllNonNegative()) ||
+ (CR1.isAllNegative() && CR2.isAllNegative());
+}
+
+bool ConstantRange::areInsensitiveToSignednessOfInvertedICmpPredicate(
+ const ConstantRange &CR1, const ConstantRange &CR2) {
+ if (CR1.isEmptySet() || CR2.isEmptySet())
+ return true;
+
+ return (CR1.isAllNonNegative() && CR2.isAllNegative()) ||
+ (CR1.isAllNegative() && CR2.isAllNonNegative());
+}
+
+CmpInst::Predicate ConstantRange::getEquivalentPredWithFlippedSignedness(
+ CmpInst::Predicate Pred, const ConstantRange &CR1,
+ const ConstantRange &CR2) {
+ assert(CmpInst::isIntPredicate(Pred) && CmpInst::isRelational(Pred) &&
+ "Only for relational integer predicates!");
+ CmpInst::Predicate FlippedSignednessPred =
+ CmpInst::getFlippedSignednessPredicate(Pred);
+
+ if (areInsensitiveToSignednessOfICmpPredicate(CR1, CR2))
+ return FlippedSignednessPred;
+
+ if (areInsensitiveToSignednessOfInvertedICmpPredicate(CR1, CR2))
+ return CmpInst::getInversePredicate(FlippedSignednessPred);
+
+ return CmpInst::Predicate::BAD_ICMP_PREDICATE;
+}
+
+void ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
+ APInt &RHS, APInt &Offset) const {
+ Offset = APInt(getBitWidth(), 0);
if (isFullSet() || isEmptySet()) {
Pred = isEmptySet() ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
RHS = APInt(getBitWidth(), 0);
- Success = true;
} else if (auto *OnlyElt = getSingleElement()) {
Pred = CmpInst::ICMP_EQ;
RHS = *OnlyElt;
- Success = true;
} else if (auto *OnlyMissingElt = getSingleMissingElement()) {
Pred = CmpInst::ICMP_NE;
RHS = *OnlyMissingElt;
- Success = true;
} else if (getLower().isMinSignedValue() || getLower().isMinValue()) {
Pred =
getLower().isMinSignedValue() ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
RHS = getUpper();
- Success = true;
} else if (getUpper().isMinSignedValue() || getUpper().isMinValue()) {
Pred =
getUpper().isMinSignedValue() ? CmpInst::ICMP_SGE : CmpInst::ICMP_UGE;
RHS = getLower();
- Success = true;
+ } else {
+ Pred = CmpInst::ICMP_ULT;
+ RHS = getUpper() - getLower();
+ Offset = -getLower();
}
- assert((!Success || ConstantRange::makeExactICmpRegion(Pred, RHS) == *this) &&
+ assert(ConstantRange::makeExactICmpRegion(Pred, RHS) == add(Offset) &&
"Bad result!");
+}
- return Success;
+bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
+ APInt &RHS) const {
+ APInt Offset;
+ getEquivalentICmp(Pred, RHS, Offset);
+ return Offset.isZero();
}
bool ConstantRange::icmp(CmpInst::Predicate Pred,
@@ -204,13 +243,13 @@ static ConstantRange makeExactMulNSWRegion(const APInt &V) {
// Handle special case for 0, -1 and 1. See the last for reason why we
// specialize -1 and 1.
unsigned BitWidth = V.getBitWidth();
- if (V == 0 || V.isOneValue())
+ if (V == 0 || V.isOne())
return ConstantRange::getFull(BitWidth);
APInt MinValue = APInt::getSignedMinValue(BitWidth);
APInt MaxValue = APInt::getSignedMaxValue(BitWidth);
// e.g. Returning [-127, 127], represented as [-127, -128).
- if (V.isAllOnesValue())
+ if (V.isAllOnes())
return ConstantRange(-MaxValue, MinValue);
APInt Lower, Upper;
@@ -248,8 +287,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
case Instruction::Add: {
if (Unsigned)
- return getNonEmpty(APInt::getNullValue(BitWidth),
- -Other.getUnsignedMax());
+ return getNonEmpty(APInt::getZero(BitWidth), -Other.getUnsignedMax());
APInt SignedMinVal = APInt::getSignedMinValue(BitWidth);
APInt SMin = Other.getSignedMin(), SMax = Other.getSignedMax();
@@ -291,7 +329,7 @@ ConstantRange::makeGuaranteedNoWrapRegion(Instruction::BinaryOps BinOp,
// to be at most bitwidth-1, which results in most conservative range.
APInt ShAmtUMax = ShAmt.getUnsignedMax();
if (Unsigned)
- return getNonEmpty(APInt::getNullValue(BitWidth),
+ return getNonEmpty(APInt::getZero(BitWidth),
APInt::getMaxValue(BitWidth).lshr(ShAmtUMax) + 1);
return getNonEmpty(APInt::getSignedMinValue(BitWidth).ashr(ShAmtUMax),
APInt::getSignedMaxValue(BitWidth).ashr(ShAmtUMax) + 1);
@@ -316,7 +354,7 @@ bool ConstantRange::isEmptySet() const {
}
bool ConstantRange::isWrappedSet() const {
- return Lower.ugt(Upper) && !Upper.isNullValue();
+ return Lower.ugt(Upper) && !Upper.isZero();
}
bool ConstantRange::isUpperWrapped() const {
@@ -343,11 +381,10 @@ ConstantRange::isSizeStrictlySmallerThan(const ConstantRange &Other) const {
bool
ConstantRange::isSizeLargerThan(uint64_t MaxSize) const {
- assert(MaxSize && "MaxSize can't be 0.");
// If this a full set, we need special handling to avoid needing an extra bit
// to represent the size.
if (isFullSet())
- return APInt::getMaxValue(getBitWidth()).ugt(MaxSize - 1);
+ return MaxSize == 0 || APInt::getMaxValue(getBitWidth()).ugt(MaxSize - 1);
return (Upper - Lower).ugt(MaxSize);
}
@@ -595,7 +632,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR,
APInt L = CR.Lower.ult(Lower) ? CR.Lower : Lower;
APInt U = (CR.Upper - 1).ugt(Upper - 1) ? CR.Upper : Upper;
- if (L.isNullValue() && U.isNullValue())
+ if (L.isZero() && U.isZero())
return getFull();
return ConstantRange(std::move(L), std::move(U));
@@ -644,6 +681,24 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR,
return ConstantRange(std::move(L), std::move(U));
}
+Optional<ConstantRange>
+ConstantRange::exactIntersectWith(const ConstantRange &CR) const {
+ // TODO: This can be implemented more efficiently.
+ ConstantRange Result = intersectWith(CR);
+ if (Result == inverse().unionWith(CR.inverse()).inverse())
+ return Result;
+ return None;
+}
+
+Optional<ConstantRange>
+ConstantRange::exactUnionWith(const ConstantRange &CR) const {
+ // TODO: This can be implemented more efficiently.
+ ConstantRange Result = unionWith(CR);
+ if (Result == inverse().intersectWith(CR.inverse()).inverse())
+ return Result;
+ return None;
+}
+
ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
uint32_t ResultBitWidth) const {
switch (CastOp) {
@@ -1055,6 +1110,25 @@ ConstantRange::multiply(const ConstantRange &Other) const {
return UR.isSizeStrictlySmallerThan(SR) ? UR : SR;
}
+ConstantRange ConstantRange::smul_fast(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return getEmpty();
+
+ APInt Min = getSignedMin();
+ APInt Max = getSignedMax();
+ APInt OtherMin = Other.getSignedMin();
+ APInt OtherMax = Other.getSignedMax();
+
+ bool O1, O2, O3, O4;
+ auto Muls = {Min.smul_ov(OtherMin, O1), Min.smul_ov(OtherMax, O2),
+ Max.smul_ov(OtherMin, O3), Max.smul_ov(OtherMax, O4)};
+ if (O1 || O2 || O3 || O4)
+ return getFull();
+
+ auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
+ return getNonEmpty(std::min(Muls, Compare), std::max(Muls, Compare) + 1);
+}
+
ConstantRange
ConstantRange::smax(const ConstantRange &Other) const {
// X smax Y is: range(smax(X_smin, Y_smin),
@@ -1113,13 +1187,13 @@ ConstantRange::umin(const ConstantRange &Other) const {
ConstantRange
ConstantRange::udiv(const ConstantRange &RHS) const {
- if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
+ if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isZero())
return getEmpty();
APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
APInt RHS_umin = RHS.getUnsignedMin();
- if (RHS_umin.isNullValue()) {
+ if (RHS_umin.isZero()) {
// We want the lowest value in RHS excluding zero. Usually that would be 1
// except for a range in the form of [X, 1) in which case it would be X.
if (RHS.getUpper() == 1)
@@ -1136,7 +1210,7 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
// We split up the LHS and RHS into positive and negative components
// and then also compute the positive and negative components of the result
// separately by combining division results with the appropriate signs.
- APInt Zero = APInt::getNullValue(getBitWidth());
+ APInt Zero = APInt::getZero(getBitWidth());
APInt SignedMin = APInt::getSignedMinValue(getBitWidth());
ConstantRange PosFilter(APInt(getBitWidth(), 1), SignedMin);
ConstantRange NegFilter(SignedMin, Zero);
@@ -1159,12 +1233,12 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
// (For APInts the operation is well-defined and yields SignedMin.) We
// handle this by dropping either SignedMin from the LHS or -1 from the RHS.
APInt Lo = (NegL.Upper - 1).sdiv(NegR.Lower);
- if (NegL.Lower.isMinSignedValue() && NegR.Upper.isNullValue()) {
+ if (NegL.Lower.isMinSignedValue() && NegR.Upper.isZero()) {
// Remove -1 from the LHS. Skip if it's the only element, as this would
// leave us with an empty set.
- if (!NegR.Lower.isAllOnesValue()) {
+ if (!NegR.Lower.isAllOnes()) {
APInt AdjNegRUpper;
- if (RHS.Lower.isAllOnesValue())
+ if (RHS.Lower.isAllOnes())
// Negative part of [-1, X] without -1 is [SignedMin, X].
AdjNegRUpper = RHS.Upper;
else
@@ -1218,12 +1292,12 @@ ConstantRange ConstantRange::sdiv(const ConstantRange &RHS) const {
}
ConstantRange ConstantRange::urem(const ConstantRange &RHS) const {
- if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isNullValue())
+ if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax().isZero())
return getEmpty();
if (const APInt *RHSInt = RHS.getSingleElement()) {
// UREM by null is UB.
- if (RHSInt->isNullValue())
+ if (RHSInt->isZero())
return getEmpty();
// Use APInt's implementation of UREM for single element ranges.
if (const APInt *LHSInt = getSingleElement())
@@ -1236,7 +1310,7 @@ ConstantRange ConstantRange::urem(const ConstantRange &RHS) const {
// L % R is <= L and < R.
APInt Upper = APIntOps::umin(getUnsignedMax(), RHS.getUnsignedMax() - 1) + 1;
- return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(Upper));
+ return getNonEmpty(APInt::getZero(getBitWidth()), std::move(Upper));
}
ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
@@ -1245,7 +1319,7 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
if (const APInt *RHSInt = RHS.getSingleElement()) {
// SREM by null is UB.
- if (RHSInt->isNullValue())
+ if (RHSInt->isZero())
return getEmpty();
// Use APInt's implementation of SREM for single element ranges.
if (const APInt *LHSInt = getSingleElement())
@@ -1257,10 +1331,10 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
APInt MaxAbsRHS = AbsRHS.getUnsignedMax();
// Modulus by zero is UB.
- if (MaxAbsRHS.isNullValue())
+ if (MaxAbsRHS.isZero())
return getEmpty();
- if (MinAbsRHS.isNullValue())
+ if (MinAbsRHS.isZero())
++MinAbsRHS;
APInt MinLHS = getSignedMin(), MaxLHS = getSignedMax();
@@ -1272,7 +1346,7 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
// L % R is <= L and < R.
APInt Upper = APIntOps::umin(MaxLHS, MaxAbsRHS - 1) + 1;
- return ConstantRange(APInt::getNullValue(getBitWidth()), std::move(Upper));
+ return ConstantRange(APInt::getZero(getBitWidth()), std::move(Upper));
}
// Same basic logic as above, but the result is negative.
@@ -1291,7 +1365,7 @@ ConstantRange ConstantRange::srem(const ConstantRange &RHS) const {
}
ConstantRange ConstantRange::binaryNot() const {
- return ConstantRange(APInt::getAllOnesValue(getBitWidth())).sub(*this);
+ return ConstantRange(APInt::getAllOnes(getBitWidth())).sub(*this);
}
ConstantRange
@@ -1306,7 +1380,7 @@ ConstantRange::binaryAnd(const ConstantRange &Other) const {
// TODO: replace this with something less conservative
APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
- return getNonEmpty(APInt::getNullValue(getBitWidth()), std::move(umin) + 1);
+ return getNonEmpty(APInt::getZero(getBitWidth()), std::move(umin) + 1);
}
ConstantRange
@@ -1321,7 +1395,7 @@ ConstantRange::binaryOr(const ConstantRange &Other) const {
// TODO: replace this with something less conservative
APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
- return getNonEmpty(std::move(umax), APInt::getNullValue(getBitWidth()));
+ return getNonEmpty(std::move(umax), APInt::getZero(getBitWidth()));
}
ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const {
@@ -1333,9 +1407,9 @@ ConstantRange ConstantRange::binaryXor(const ConstantRange &Other) const {
return {*getSingleElement() ^ *Other.getSingleElement()};
// Special-case binary complement, since we can give a precise answer.
- if (Other.isSingleElement() && Other.getSingleElement()->isAllOnesValue())
+ if (Other.isSingleElement() && Other.getSingleElement()->isAllOnes())
return binaryNot();
- if (isSingleElement() && getSingleElement()->isAllOnesValue())
+ if (isSingleElement() && getSingleElement()->isAllOnes())
return Other.binaryNot();
// TODO: replace this with something less conservative
@@ -1347,24 +1421,33 @@ ConstantRange::shl(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
return getEmpty();
- APInt max = getUnsignedMax();
- APInt Other_umax = Other.getUnsignedMax();
+ APInt Min = getUnsignedMin();
+ APInt Max = getUnsignedMax();
+ if (const APInt *RHS = Other.getSingleElement()) {
+ unsigned BW = getBitWidth();
+ if (RHS->uge(BW))
+ return getEmpty();
- // If we are shifting by maximum amount of
- // zero return return the original range.
- if (Other_umax.isNullValue())
- return *this;
- // there's overflow!
- if (Other_umax.ugt(max.countLeadingZeros()))
+ unsigned EqualLeadingBits = (Min ^ Max).countLeadingZeros();
+ if (RHS->ule(EqualLeadingBits))
+ return getNonEmpty(Min << *RHS, (Max << *RHS) + 1);
+
+ return getNonEmpty(APInt::getZero(BW),
+ APInt::getBitsSetFrom(BW, RHS->getZExtValue()) + 1);
+ }
+
+ APInt OtherMax = Other.getUnsignedMax();
+
+ // There's overflow!
+ if (OtherMax.ugt(Max.countLeadingZeros()))
return getFull();
// FIXME: implement the other tricky cases
- APInt min = getUnsignedMin();
- min <<= Other.getUnsignedMin();
- max <<= Other_umax;
+ Min <<= Other.getUnsignedMin();
+ Max <<= OtherMax;
- return ConstantRange(std::move(min), std::move(max) + 1);
+ return ConstantRange::getNonEmpty(std::move(Min), std::move(Max) + 1);
}
ConstantRange
@@ -1483,20 +1566,15 @@ ConstantRange ConstantRange::smul_sat(const ConstantRange &Other) const {
// [-1,4) * [-2,3) = min(-1*-2, -1*2, 3*-2, 3*2) = -6.
// Similarly for the upper bound, swapping min for max.
- APInt this_min = getSignedMin().sext(getBitWidth() * 2);
- APInt this_max = getSignedMax().sext(getBitWidth() * 2);
- APInt Other_min = Other.getSignedMin().sext(getBitWidth() * 2);
- APInt Other_max = Other.getSignedMax().sext(getBitWidth() * 2);
+ APInt Min = getSignedMin();
+ APInt Max = getSignedMax();
+ APInt OtherMin = Other.getSignedMin();
+ APInt OtherMax = Other.getSignedMax();
- auto L = {this_min * Other_min, this_min * Other_max, this_max * Other_min,
- this_max * Other_max};
+ auto L = {Min.smul_sat(OtherMin), Min.smul_sat(OtherMax),
+ Max.smul_sat(OtherMin), Max.smul_sat(OtherMax)};
auto Compare = [](const APInt &A, const APInt &B) { return A.slt(B); };
-
- // Note that we wanted to perform signed saturating multiplication,
- // so since we performed plain multiplication in twice the bitwidth,
- // we need to perform signed saturating truncation.
- return getNonEmpty(std::min(L, Compare).truncSSat(getBitWidth()),
- std::max(L, Compare).truncSSat(getBitWidth()) + 1);
+ return getNonEmpty(std::min(L, Compare), std::max(L, Compare) + 1);
}
ConstantRange ConstantRange::ushl_sat(const ConstantRange &Other) const {
@@ -1535,7 +1613,7 @@ ConstantRange ConstantRange::abs(bool IntMinIsPoison) const {
APInt Lo;
// Check whether the range crosses zero.
if (Upper.isStrictlyPositive() || !Lower.isStrictlyPositive())
- Lo = APInt::getNullValue(getBitWidth());
+ Lo = APInt::getZero(getBitWidth());
else
Lo = APIntOps::umin(Lower, -Upper + 1);
@@ -1565,7 +1643,7 @@ ConstantRange ConstantRange::abs(bool IntMinIsPoison) const {
return ConstantRange(-SMax, -SMin + 1);
// Range crosses zero.
- return ConstantRange(APInt::getNullValue(getBitWidth()),
+ return ConstantRange(APInt::getZero(getBitWidth()),
APIntOps::umax(-SMin, SMax) + 1);
}
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 6c75085a6678..c66cfb6e9ac1 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -95,7 +95,7 @@ bool Constant::isAllOnesValue() const {
// Check for FP which are bitcasted from -1 integers
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
- return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
+ return CFP->getValueAPF().bitcastToAPInt().isAllOnes();
// Check for constant splat vectors of 1 values.
if (getType()->isVectorTy())
@@ -112,7 +112,7 @@ bool Constant::isOneValue() const {
// Check for FP which are bitcasted from 1 integers
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
- return CFP->getValueAPF().bitcastToAPInt().isOneValue();
+ return CFP->getValueAPF().bitcastToAPInt().isOne();
// Check for constant splat vectors of 1 values.
if (getType()->isVectorTy())
@@ -129,7 +129,7 @@ bool Constant::isNotOneValue() const {
// Check for FP which are bitcasted from 1 integers
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
- return !CFP->getValueAPF().bitcastToAPInt().isOneValue();
+ return !CFP->getValueAPF().bitcastToAPInt().isOne();
// Check that vectors don't contain 1
if (auto *VTy = dyn_cast<FixedVectorType>(getType())) {
@@ -315,9 +315,11 @@ containsUndefinedElement(const Constant *C,
return false;
for (unsigned i = 0, e = cast<FixedVectorType>(VTy)->getNumElements();
- i != e; ++i)
- if (HasFn(C->getAggregateElement(i)))
- return true;
+ i != e; ++i) {
+ if (Constant *Elem = C->getAggregateElement(i))
+ if (HasFn(Elem))
+ return true;
+ }
}
return false;
@@ -366,9 +368,8 @@ Constant *Constant::getNullValue(Type *Ty) {
return ConstantFP::get(Ty->getContext(),
APFloat::getZero(APFloat::IEEEquad()));
case Type::PPC_FP128TyID:
- return ConstantFP::get(Ty->getContext(),
- APFloat(APFloat::PPCDoubleDouble(),
- APInt::getNullValue(128)));
+ return ConstantFP::get(Ty->getContext(), APFloat(APFloat::PPCDoubleDouble(),
+ APInt::getZero(128)));
case Type::PointerTyID:
return ConstantPointerNull::get(cast<PointerType>(Ty));
case Type::StructTyID:
@@ -404,11 +405,10 @@ Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
Constant *Constant::getAllOnesValue(Type *Ty) {
if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
return ConstantInt::get(Ty->getContext(),
- APInt::getAllOnesValue(ITy->getBitWidth()));
+ APInt::getAllOnes(ITy->getBitWidth()));
if (Ty->isFloatingPointTy()) {
- APFloat FL = APFloat::getAllOnesValue(Ty->getFltSemantics(),
- Ty->getPrimitiveSizeInBits());
+ APFloat FL = APFloat::getAllOnesValue(Ty->getFltSemantics());
return ConstantFP::get(Ty->getContext(), FL);
}
@@ -714,29 +714,41 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
return Result;
}
-/// If the specified constantexpr is dead, remove it. This involves recursively
-/// eliminating any dead users of the constantexpr.
-static bool removeDeadUsersOfConstant(const Constant *C) {
+/// Return true if the specified constantexpr is dead. This involves
+/// recursively traversing users of the constantexpr.
+/// If RemoveDeadUsers is true, also remove dead users at the same time.
+static bool constantIsDead(const Constant *C, bool RemoveDeadUsers) {
if (isa<GlobalValue>(C)) return false; // Cannot remove this
- while (!C->use_empty()) {
- const Constant *User = dyn_cast<Constant>(C->user_back());
+ Value::const_user_iterator I = C->user_begin(), E = C->user_end();
+ while (I != E) {
+ const Constant *User = dyn_cast<Constant>(*I);
if (!User) return false; // Non-constant usage;
- if (!removeDeadUsersOfConstant(User))
+ if (!constantIsDead(User, RemoveDeadUsers))
return false; // Constant wasn't dead
+
+ // Just removed User, so the iterator was invalidated.
+ // Since we return immediately upon finding a live user, we can always
+ // restart from user_begin().
+ if (RemoveDeadUsers)
+ I = C->user_begin();
+ else
+ ++I;
}
- // If C is only used by metadata, it should not be preserved but should have
- // its uses replaced.
- if (C->isUsedByMetadata()) {
- const_cast<Constant *>(C)->replaceAllUsesWith(
- UndefValue::get(C->getType()));
+ if (RemoveDeadUsers) {
+ // If C is only used by metadata, it should not be preserved but should
+ // have its uses replaced.
+ if (C->isUsedByMetadata()) {
+ const_cast<Constant *>(C)->replaceAllUsesWith(
+ UndefValue::get(C->getType()));
+ }
+ const_cast<Constant *>(C)->destroyConstant();
}
- const_cast<Constant*>(C)->destroyConstant();
+
return true;
}
-
void Constant::removeDeadConstantUsers() const {
Value::const_user_iterator I = user_begin(), E = user_end();
Value::const_user_iterator LastNonDeadUser = E;
@@ -748,7 +760,7 @@ void Constant::removeDeadConstantUsers() const {
continue;
}
- if (!removeDeadUsersOfConstant(User)) {
+ if (!constantIsDead(User, /* RemoveDeadUsers= */ true)) {
// If the constant wasn't dead, remember that this was the last live use
// and move on to the next constant.
LastNonDeadUser = I;
@@ -764,6 +776,20 @@ void Constant::removeDeadConstantUsers() const {
}
}
+bool Constant::hasOneLiveUse() const {
+ unsigned NumUses = 0;
+ for (const Use &use : uses()) {
+ const Constant *User = dyn_cast<Constant>(use.getUser());
+ if (!User || !constantIsDead(User, /* RemoveDeadUsers= */ false)) {
+ ++NumUses;
+
+ if (NumUses > 1)
+ return false;
+ }
+ }
+ return NumUses == 1;
+}
+
Constant *Constant::replaceUndefsWith(Constant *C, Constant *Replacement) {
assert(C && Replacement && "Expected non-nullptr constant arguments");
Type *Ty = C->getType();
@@ -1430,12 +1456,12 @@ Constant *ConstantVector::getSplat(ElementCount EC, Constant *V) {
Type *I32Ty = Type::getInt32Ty(VTy->getContext());
// Move scalar into vector.
- Constant *UndefV = UndefValue::get(VTy);
- V = ConstantExpr::getInsertElement(UndefV, V, ConstantInt::get(I32Ty, 0));
+ Constant *PoisonV = PoisonValue::get(VTy);
+ V = ConstantExpr::getInsertElement(PoisonV, V, ConstantInt::get(I32Ty, 0));
// Build shuffle mask to perform the splat.
SmallVector<int, 8> Zeros(EC.getKnownMinValue(), 0);
// Splat.
- return ConstantExpr::getShuffleVector(V, UndefV, Zeros);
+ return ConstantExpr::getShuffleVector(V, PoisonV, Zeros);
}
ConstantTokenNone *ConstantTokenNone::get(LLVMContext &Context) {
@@ -1508,20 +1534,6 @@ Constant *ConstantExpr::getShuffleMaskForBitcode() const {
return cast<ShuffleVectorConstantExpr>(this)->ShuffleMaskForBitcode;
}
-Constant *
-ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
- assert(Op->getType() == getOperand(OpNo)->getType() &&
- "Replacing operand with value of different type!");
- if (getOperand(OpNo) == Op)
- return const_cast<ConstantExpr*>(this);
-
- SmallVector<Constant*, 8> NewOps;
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- NewOps.push_back(i == OpNo ? Op : getOperand(i));
-
- return getWithOperands(NewOps);
-}
-
Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
bool OnlyIfReduced, Type *SrcTy) const {
assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
@@ -3282,7 +3294,7 @@ bool ConstantDataSequential::isCString() const {
if (Str.back() != 0) return false;
// Other elements must be non-nul.
- return Str.drop_back().find(0) == StringRef::npos;
+ return !Str.drop_back().contains(0);
}
bool ConstantDataVector::isSplatData() const {
@@ -3480,7 +3492,7 @@ Value *ConstantExpr::handleOperandChangeImpl(Value *From, Value *ToV) {
NewOps, this, From, To, NumUpdated, OperandNo);
}
-Instruction *ConstantExpr::getAsInstruction() const {
+Instruction *ConstantExpr::getAsInstruction(Instruction *InsertBefore) const {
SmallVector<Value *, 4> ValueOperands(operands());
ArrayRef<Value*> Ops(ValueOperands);
@@ -3498,40 +3510,43 @@ Instruction *ConstantExpr::getAsInstruction() const {
case Instruction::IntToPtr:
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
- return CastInst::Create((Instruction::CastOps)getOpcode(),
- Ops[0], getType());
+ return CastInst::Create((Instruction::CastOps)getOpcode(), Ops[0],
+ getType(), "", InsertBefore);
case Instruction::Select:
- return SelectInst::Create(Ops[0], Ops[1], Ops[2]);
+ return SelectInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore);
case Instruction::InsertElement:
- return InsertElementInst::Create(Ops[0], Ops[1], Ops[2]);
+ return InsertElementInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore);
case Instruction::ExtractElement:
- return ExtractElementInst::Create(Ops[0], Ops[1]);
+ return ExtractElementInst::Create(Ops[0], Ops[1], "", InsertBefore);
case Instruction::InsertValue:
- return InsertValueInst::Create(Ops[0], Ops[1], getIndices());
+ return InsertValueInst::Create(Ops[0], Ops[1], getIndices(), "",
+ InsertBefore);
case Instruction::ExtractValue:
- return ExtractValueInst::Create(Ops[0], getIndices());
+ return ExtractValueInst::Create(Ops[0], getIndices(), "", InsertBefore);
case Instruction::ShuffleVector:
- return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask());
+ return new ShuffleVectorInst(Ops[0], Ops[1], getShuffleMask(), "",
+ InsertBefore);
case Instruction::GetElementPtr: {
const auto *GO = cast<GEPOperator>(this);
if (GO->isInBounds())
- return GetElementPtrInst::CreateInBounds(GO->getSourceElementType(),
- Ops[0], Ops.slice(1));
+ return GetElementPtrInst::CreateInBounds(
+ GO->getSourceElementType(), Ops[0], Ops.slice(1), "", InsertBefore);
return GetElementPtrInst::Create(GO->getSourceElementType(), Ops[0],
- Ops.slice(1));
+ Ops.slice(1), "", InsertBefore);
}
case Instruction::ICmp:
case Instruction::FCmp:
return CmpInst::Create((Instruction::OtherOps)getOpcode(),
- (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]);
+ (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1],
+ "", InsertBefore);
case Instruction::FNeg:
- return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0]);
+ return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0], "",
+ InsertBefore);
default:
assert(getNumOperands() == 2 && "Must be binary operator?");
- BinaryOperator *BO =
- BinaryOperator::Create((Instruction::BinaryOps)getOpcode(),
- Ops[0], Ops[1]);
+ BinaryOperator *BO = BinaryOperator::Create(
+ (Instruction::BinaryOps)getOpcode(), Ops[0], Ops[1], "", InsertBefore);
if (isa<OverflowingBinaryOperator>(BO)) {
BO->setHasNoUnsignedWrap(SubclassOptionalData &
OverflowingBinaryOperator::NoUnsignedWrap);
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 8a7060c148c9..905372982dc2 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -2460,7 +2460,7 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef A) {
- unwrap<Function>(F)->addAttribute(Idx, unwrap(A));
+ unwrap<Function>(F)->addAttributeAtIndex(Idx, unwrap(A));
}
unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) {
@@ -2478,31 +2478,32 @@ void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
LLVMAttributeRef LLVMGetEnumAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
unsigned KindID) {
- return wrap(unwrap<Function>(F)->getAttribute(Idx,
- (Attribute::AttrKind)KindID));
+ return wrap(unwrap<Function>(F)->getAttributeAtIndex(
+ Idx, (Attribute::AttrKind)KindID));
}
LLVMAttributeRef LLVMGetStringAttributeAtIndex(LLVMValueRef F,
LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- return wrap(unwrap<Function>(F)->getAttribute(Idx, StringRef(K, KLen)));
+ return wrap(
+ unwrap<Function>(F)->getAttributeAtIndex(Idx, StringRef(K, KLen)));
}
void LLVMRemoveEnumAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
unsigned KindID) {
- unwrap<Function>(F)->removeAttribute(Idx, (Attribute::AttrKind)KindID);
+ unwrap<Function>(F)->removeAttributeAtIndex(Idx, (Attribute::AttrKind)KindID);
}
void LLVMRemoveStringAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- unwrap<Function>(F)->removeAttribute(Idx, StringRef(K, KLen));
+ unwrap<Function>(F)->removeAttributeAtIndex(Idx, StringRef(K, KLen));
}
void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
const char *V) {
Function *Func = unwrap<Function>(Fn);
Attribute Attr = Attribute::get(Func->getContext(), A, V);
- Func->addAttribute(AttributeList::FunctionIndex, Attr);
+ Func->addFnAttr(Attr);
}
/*--.. Operations on parameters ............................................--*/
@@ -2843,7 +2844,7 @@ unsigned LLVMGetNumArgOperands(LLVMValueRef Instr) {
if (FuncletPadInst *FPI = dyn_cast<FuncletPadInst>(unwrap(Instr))) {
return FPI->getNumArgOperands();
}
- return unwrap<CallBase>(Instr)->getNumArgOperands();
+ return unwrap<CallBase>(Instr)->arg_size();
}
/*--.. Call and invoke instructions ........................................--*/
@@ -2857,17 +2858,17 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
static_cast<CallingConv::ID>(CC));
}
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, LLVMAttributeIndex Idx,
unsigned align) {
auto *Call = unwrap<CallBase>(Instr);
Attribute AlignAttr =
Attribute::getWithAlignment(Call->getContext(), Align(align));
- Call->addAttribute(index, AlignAttr);
+ Call->addAttributeAtIndex(Idx, AlignAttr);
}
void LLVMAddCallSiteAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef A) {
- unwrap<CallBase>(C)->addAttribute(Idx, unwrap(A));
+ unwrap<CallBase>(C)->addAttributeAtIndex(Idx, unwrap(A));
}
unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
@@ -2888,24 +2889,25 @@ void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
LLVMAttributeRef LLVMGetCallSiteEnumAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
unsigned KindID) {
- return wrap(
- unwrap<CallBase>(C)->getAttribute(Idx, (Attribute::AttrKind)KindID));
+ return wrap(unwrap<CallBase>(C)->getAttributeAtIndex(
+ Idx, (Attribute::AttrKind)KindID));
}
LLVMAttributeRef LLVMGetCallSiteStringAttribute(LLVMValueRef C,
LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- return wrap(unwrap<CallBase>(C)->getAttribute(Idx, StringRef(K, KLen)));
+ return wrap(
+ unwrap<CallBase>(C)->getAttributeAtIndex(Idx, StringRef(K, KLen)));
}
void LLVMRemoveCallSiteEnumAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
unsigned KindID) {
- unwrap<CallBase>(C)->removeAttribute(Idx, (Attribute::AttrKind)KindID);
+ unwrap<CallBase>(C)->removeAttributeAtIndex(Idx, (Attribute::AttrKind)KindID);
}
void LLVMRemoveCallSiteStringAttribute(LLVMValueRef C, LLVMAttributeIndex Idx,
const char *K, unsigned KLen) {
- unwrap<CallBase>(C)->removeAttribute(Idx, StringRef(K, KLen));
+ unwrap<CallBase>(C)->removeAttributeAtIndex(Idx, StringRef(K, KLen));
}
LLVMValueRef LLVMGetCalledValue(LLVMValueRef Instr) {
@@ -3131,6 +3133,10 @@ void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
}
+void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+ unwrap(Builder)->AddMetadataToInst(unwrap<Instruction>(Inst));
+}
+
void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder,
LLVMMetadataRef FPMathTag) {
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index 61d3b5e69e9e..ca7dafc814ce 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -32,8 +32,8 @@ static cl::opt<bool>
cl::init(false), cl::Hidden);
DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU)
- : M(m), VMContext(M.getContext()), CUNode(CU),
- DeclareFn(nullptr), ValueFn(nullptr), LabelFn(nullptr),
+ : M(m), VMContext(M.getContext()), CUNode(CU), DeclareFn(nullptr),
+ ValueFn(nullptr), LabelFn(nullptr),
AllowUnresolvedNodes(AllowUnresolvedNodes) {}
void DIBuilder::trackIfUnresolved(MDNode *N) {
@@ -73,7 +73,8 @@ void DIBuilder::finalize() {
return;
}
- CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
+ if (!AllEnumTypes.empty())
+ CUNode->replaceEnumTypes(MDTuple::get(VMContext, AllEnumTypes));
SmallVector<Metadata *, 16> RetainValues;
// Declarations and definitions of the same type may be retained. Some
@@ -164,12 +165,13 @@ DICompileUnit *DIBuilder::createCompileUnit(
static DIImportedEntity *
createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
Metadata *NS, DIFile *File, unsigned Line, StringRef Name,
+ DINodeArray Elements,
SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
if (Line)
assert(File && "Source location has line number but no file");
unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size();
auto *M = DIImportedEntity::get(C, Tag, Context, cast_or_null<DINode>(NS),
- File, Line, Name);
+ File, Line, Name, Elements);
if (EntitiesCount < C.pImpl->DIImportedEntitys.size())
// A new Imported Entity was just added to the context.
// Add it to the Imported Modules list.
@@ -179,36 +181,38 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
DINamespace *NS, DIFile *File,
- unsigned Line) {
+ unsigned Line,
+ DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, NS, File, Line, StringRef(),
+ Context, NS, File, Line, StringRef(), Elements,
AllImportedModules);
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
DIImportedEntity *NS,
- DIFile *File, unsigned Line) {
+ DIFile *File, unsigned Line,
+ DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, NS, File, Line, StringRef(),
+ Context, NS, File, Line, StringRef(), Elements,
AllImportedModules);
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M,
- DIFile *File, unsigned Line) {
+ DIFile *File, unsigned Line,
+ DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
- Context, M, File, Line, StringRef(),
+ Context, M, File, Line, StringRef(), Elements,
AllImportedModules);
}
-DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context,
- DINode *Decl,
- DIFile *File,
- unsigned Line,
- StringRef Name) {
+DIImportedEntity *
+DIBuilder::createImportedDeclaration(DIScope *Context, DINode *Decl,
+ DIFile *File, unsigned Line,
+ StringRef Name, DINodeArray Elements) {
// Make sure to use the unique identifier based metadata reference for
// types that have one.
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
- Context, Decl, File, Line, Name,
+ Context, Decl, File, Line, Name, Elements,
AllImportedModules);
}
@@ -250,7 +254,7 @@ DIEnumerator *DIBuilder::createEnumerator(StringRef Name, uint64_t Val,
Name);
}
-DIEnumerator *DIBuilder::createEnumerator(StringRef Name, APSInt Value) {
+DIEnumerator *DIBuilder::createEnumerator(StringRef Name, const APSInt &Value) {
assert(!Name.empty() && "Unable to create enumerator without name");
return DIEnumerator::get(VMContext, APInt(Value), Value.isUnsigned(), Name);
}
@@ -283,17 +287,16 @@ DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
0, 0, None, DINode::FlagZero);
}
-DIDerivedType *DIBuilder::createPointerType(
- DIType *PointeeTy,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- Optional<unsigned> DWARFAddressSpace,
- StringRef Name) {
+DIDerivedType *
+DIBuilder::createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
+ uint32_t AlignInBits,
+ Optional<unsigned> DWARFAddressSpace,
+ StringRef Name, DINodeArray Annotations) {
// FIXME: Why is there a name here?
return DIDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
nullptr, 0, nullptr, PointeeTy, SizeInBits,
- AlignInBits, 0, DWARFAddressSpace,
- DINode::FlagZero);
+ AlignInBits, 0, DWARFAddressSpace, DINode::FlagZero,
+ nullptr, Annotations);
}
DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
@@ -306,11 +309,10 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
AlignInBits, 0, None, Flags, Base);
}
-DIDerivedType *DIBuilder::createReferenceType(
- unsigned Tag, DIType *RTy,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- Optional<unsigned> DWARFAddressSpace) {
+DIDerivedType *
+DIBuilder::createReferenceType(unsigned Tag, DIType *RTy, uint64_t SizeInBits,
+ uint32_t AlignInBits,
+ Optional<unsigned> DWARFAddressSpace) {
assert(RTy && "Unable to create reference type");
return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, RTy,
SizeInBits, AlignInBits, 0, DWARFAddressSpace,
@@ -319,11 +321,12 @@ DIDerivedType *DIBuilder::createReferenceType(
DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
DIFile *File, unsigned LineNo,
- DIScope *Context,
- uint32_t AlignInBits) {
+ DIScope *Context, uint32_t AlignInBits,
+ DINodeArray Annotations) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
LineNo, getNonCompileUnitScope(Context), Ty, 0,
- AlignInBits, 0, None, DINode::FlagZero);
+ AlignInBits, 0, None, DINode::FlagZero, nullptr,
+ Annotations);
}
DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) {
@@ -341,19 +344,18 @@ DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
Metadata *ExtraData = ConstantAsMetadata::get(
ConstantInt::get(IntegerType::get(VMContext, 32), VBPtrOffset));
return DIDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
- 0, Ty, BaseTy, 0, 0, BaseOffset, None,
- Flags, ExtraData);
+ 0, Ty, BaseTy, 0, 0, BaseOffset, None, Flags,
+ ExtraData);
}
-DIDerivedType *DIBuilder::createMemberType(DIScope *Scope, StringRef Name,
- DIFile *File, unsigned LineNumber,
- uint64_t SizeInBits,
- uint32_t AlignInBits,
- uint64_t OffsetInBits,
- DINode::DIFlags Flags, DIType *Ty) {
+DIDerivedType *DIBuilder::createMemberType(
+ DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
+ uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+ DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations) {
return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
LineNumber, getNonCompileUnitScope(Scope), Ty,
- SizeInBits, AlignInBits, OffsetInBits, None, Flags);
+ SizeInBits, AlignInBits, OffsetInBits, None, Flags,
+ nullptr, Annotations);
}
static ConstantAsMetadata *getConstantOrNull(Constant *C) {
@@ -375,14 +377,15 @@ DIDerivedType *DIBuilder::createVariantMemberType(
DIDerivedType *DIBuilder::createBitFieldMemberType(
DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
uint64_t SizeInBits, uint64_t OffsetInBits, uint64_t StorageOffsetInBits,
- DINode::DIFlags Flags, DIType *Ty) {
+ DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations) {
Flags |= DINode::FlagBitField;
return DIDerivedType::get(
VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
- getNonCompileUnitScope(Scope), Ty, SizeInBits, /* AlignInBits */ 0,
+ getNonCompileUnitScope(Scope), Ty, SizeInBits, /*AlignInBits=*/0,
OffsetInBits, None, Flags,
ConstantAsMetadata::get(ConstantInt::get(IntegerType::get(VMContext, 64),
- StorageOffsetInBits)));
+ StorageOffsetInBits)),
+ Annotations);
}
DIDerivedType *
@@ -498,10 +501,12 @@ DICompositeType *DIBuilder::createUnionType(
return R;
}
-DICompositeType *DIBuilder::createVariantPart(
- DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
- uint64_t SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags,
- DIDerivedType *Discriminator, DINodeArray Elements, StringRef UniqueIdentifier) {
+DICompositeType *
+DIBuilder::createVariantPart(DIScope *Scope, StringRef Name, DIFile *File,
+ unsigned LineNumber, uint64_t SizeInBits,
+ uint32_t AlignInBits, DINode::DIFlags Flags,
+ DIDerivedType *Discriminator, DINodeArray Elements,
+ StringRef UniqueIdentifier) {
auto *R = DICompositeType::get(
VMContext, dwarf::DW_TAG_variant_part, Name, File, LineNumber,
getNonCompileUnitScope(Scope), nullptr, SizeInBits, AlignInBits, 0, Flags,
@@ -542,16 +547,17 @@ DIDerivedType *DIBuilder::createSetType(DIScope *Scope, StringRef Name,
return R;
}
-DICompositeType *DIBuilder::createArrayType(
- uint64_t Size, uint32_t AlignInBits, DIType *Ty, DINodeArray Subscripts,
- PointerUnion<DIExpression *, DIVariable *> DL,
- PointerUnion<DIExpression *, DIVariable *> AS,
- PointerUnion<DIExpression *, DIVariable *> AL,
- PointerUnion<DIExpression *, DIVariable *> RK) {
+DICompositeType *
+DIBuilder::createArrayType(uint64_t Size, uint32_t AlignInBits, DIType *Ty,
+ DINodeArray Subscripts,
+ PointerUnion<DIExpression *, DIVariable *> DL,
+ PointerUnion<DIExpression *, DIVariable *> AS,
+ PointerUnion<DIExpression *, DIVariable *> AL,
+ PointerUnion<DIExpression *, DIVariable *> RK) {
auto *R = DICompositeType::get(
- VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0,
- nullptr, Ty, Size, AlignInBits, 0, DINode::FlagZero,
- Subscripts, 0, nullptr, nullptr, "", nullptr,
+ VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty, Size,
+ AlignInBits, 0, DINode::FlagZero, Subscripts, 0, nullptr, nullptr, "",
+ nullptr,
DL.is<DIExpression *>() ? (Metadata *)DL.get<DIExpression *>()
: (Metadata *)DL.get<DIVariable *>(),
AS.is<DIExpression *>() ? (Metadata *)AS.get<DIExpression *>()
@@ -628,12 +634,14 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIScope *Scope,
DICompositeType *DIBuilder::createReplaceableCompositeType(
unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
unsigned RuntimeLang, uint64_t SizeInBits, uint32_t AlignInBits,
- DINode::DIFlags Flags, StringRef UniqueIdentifier) {
+ DINode::DIFlags Flags, StringRef UniqueIdentifier,
+ DINodeArray Annotations) {
auto *RetTy =
DICompositeType::getTemporary(
VMContext, Tag, Name, F, Line, getNonCompileUnitScope(Scope), nullptr,
SizeInBits, AlignInBits, 0, Flags, nullptr, RuntimeLang, nullptr,
- nullptr, UniqueIdentifier)
+ nullptr, UniqueIdentifier, nullptr, nullptr, nullptr, nullptr,
+ nullptr, Annotations)
.release();
trackIfUnresolved(RetTy);
return RetTy;
@@ -701,15 +709,16 @@ static void checkGlobalVariableScope(DIScope *Context) {
DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
- unsigned LineNumber, DIType *Ty, bool IsLocalToUnit,
- bool isDefined, DIExpression *Expr,
- MDNode *Decl, MDTuple *TemplateParams, uint32_t AlignInBits) {
+ unsigned LineNumber, DIType *Ty, bool IsLocalToUnit, bool isDefined,
+ DIExpression *Expr, MDNode *Decl, MDTuple *TemplateParams,
+ uint32_t AlignInBits, DINodeArray Annotations) {
checkGlobalVariableScope(Context);
auto *GV = DIGlobalVariable::getDistinct(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
- LineNumber, Ty, IsLocalToUnit, isDefined, cast_or_null<DIDerivedType>(Decl),
- TemplateParams, AlignInBits);
+ LineNumber, Ty, IsLocalToUnit, isDefined,
+ cast_or_null<DIDerivedType>(Decl), TemplateParams, AlignInBits,
+ Annotations);
if (!Expr)
Expr = createExpression();
auto *N = DIGlobalVariableExpression::get(VMContext, GV, Expr);
@@ -726,7 +735,8 @@ DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
return DIGlobalVariable::getTemporary(
VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
LineNumber, Ty, IsLocalToUnit, false,
- cast_or_null<DIDerivedType>(Decl), TemplateParams, AlignInBits)
+ cast_or_null<DIDerivedType>(Decl), TemplateParams, AlignInBits,
+ nullptr)
.release();
}
@@ -735,16 +745,16 @@ static DILocalVariable *createLocalVariable(
DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> &PreservedVariables,
DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags,
- uint32_t AlignInBits) {
+ uint32_t AlignInBits, DINodeArray Annotations = nullptr) {
// FIXME: Why getNonCompileUnitScope()?
// FIXME: Why is "!Context" okay here?
// FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
// the only valid scopes)?
DIScope *Context = getNonCompileUnitScope(Scope);
- auto *Node =
- DILocalVariable::get(VMContext, cast_or_null<DILocalScope>(Context), Name,
- File, LineNo, Ty, ArgNo, Flags, AlignInBits);
+ auto *Node = DILocalVariable::get(
+ VMContext, cast_or_null<DILocalScope>(Context), Name, File, LineNo, Ty,
+ ArgNo, Flags, AlignInBits, Annotations);
if (AlwaysPreserve) {
// The optimizer may remove local variables. If there is an interest
// to preserve variable info in such situation then stash it in a
@@ -768,21 +778,20 @@ DILocalVariable *DIBuilder::createAutoVariable(DIScope *Scope, StringRef Name,
DILocalVariable *DIBuilder::createParameterVariable(
DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
- unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags) {
+ unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags,
+ DINodeArray Annotations) {
assert(ArgNo && "Expected non-zero argument number for parameter");
return createLocalVariable(VMContext, PreservedVariables, Scope, Name, ArgNo,
File, LineNo, Ty, AlwaysPreserve, Flags,
- /* AlignInBits */0);
+ /*AlignInBits=*/0, Annotations);
}
-DILabel *DIBuilder::createLabel(
- DIScope *Scope, StringRef Name, DIFile *File,
- unsigned LineNo, bool AlwaysPreserve) {
+DILabel *DIBuilder::createLabel(DIScope *Scope, StringRef Name, DIFile *File,
+ unsigned LineNo, bool AlwaysPreserve) {
DIScope *Context = getNonCompileUnitScope(Scope);
- auto *Node =
- DILabel::get(VMContext, cast_or_null<DILocalScope>(Context), Name,
- File, LineNo);
+ auto *Node = DILabel::get(VMContext, cast_or_null<DILocalScope>(Context),
+ Name, File, LineNo);
if (AlwaysPreserve) {
/// The optimizer may remove labels. If there is an interest
@@ -806,7 +815,7 @@ DIExpression *DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
}
template <class... Ts>
-static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) {
+static DISubprogram *getSubprogram(bool IsDistinct, Ts &&...Args) {
if (IsDistinct)
return DISubprogram::getDistinct(std::forward<Ts>(Args)...);
return DISubprogram::get(std::forward<Ts>(Args)...);
@@ -817,13 +826,14 @@ DISubprogram *DIBuilder::createFunction(
unsigned LineNo, DISubroutineType *Ty, unsigned ScopeLine,
DINode::DIFlags Flags, DISubprogram::DISPFlags SPFlags,
DITemplateParameterArray TParams, DISubprogram *Decl,
- DITypeArray ThrownTypes) {
+ DITypeArray ThrownTypes, DINodeArray Annotations) {
bool IsDefinition = SPFlags & DISubprogram::SPFlagDefinition;
auto *Node = getSubprogram(
/*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context),
Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags,
SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl,
- MDTuple::getTemporary(VMContext, None).release(), ThrownTypes);
+ MDTuple::getTemporary(VMContext, None).release(), ThrownTypes,
+ Annotations);
if (IsDefinition)
AllSubprograms.push_back(Node);
@@ -869,11 +879,11 @@ DISubprogram *DIBuilder::createMethod(
return SP;
}
-DICommonBlock *DIBuilder::createCommonBlock(
- DIScope *Scope, DIGlobalVariable *Decl, StringRef Name, DIFile *File,
- unsigned LineNo) {
- return DICommonBlock::get(
- VMContext, Scope, Decl, Name, File, LineNo);
+DICommonBlock *DIBuilder::createCommonBlock(DIScope *Scope,
+ DIGlobalVariable *Decl,
+ StringRef Name, DIFile *File,
+ unsigned LineNo) {
+ return DICommonBlock::get(VMContext, Scope, Decl, Name, File, LineNo);
}
DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
@@ -929,9 +939,9 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
Instruction *InsertBefore) {
- return insertLabel(
- LabelInfo, DL, InsertBefore ? InsertBefore->getParent() : nullptr,
- InsertBefore);
+ return insertLabel(LabelInfo, DL,
+ InsertBefore ? InsertBefore->getParent() : nullptr,
+ InsertBefore);
}
Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
@@ -980,7 +990,8 @@ static Function *getDeclareIntrin(Module &M) {
Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
DIExpression *Expr, const DILocation *DL,
- BasicBlock *InsertBB, Instruction *InsertBefore) {
+ BasicBlock *InsertBB,
+ Instruction *InsertBefore) {
assert(VarInfo && "empty or invalid DILocalVariable* passed to dbg.declare");
assert(DL && "Expected debug loc");
assert(DL->getScope()->getSubprogram() ==
@@ -1023,9 +1034,9 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(
return B.CreateCall(ValueFn, Args);
}
-Instruction *DIBuilder::insertLabel(
- DILabel *LabelInfo, const DILocation *DL,
- BasicBlock *InsertBB, Instruction *InsertBefore) {
+Instruction *DIBuilder::insertLabel(DILabel *LabelInfo, const DILocation *DL,
+ BasicBlock *InsertBB,
+ Instruction *InsertBefore) {
assert(LabelInfo && "empty or invalid DILabel* passed to dbg.label");
assert(DL && "Expected debug loc");
assert(DL->getScope()->getSubprogram() ==
@@ -1042,8 +1053,7 @@ Instruction *DIBuilder::insertLabel(
return B.CreateCall(LabelFn, Args);
}
-void DIBuilder::replaceVTableHolder(DICompositeType *&T,
- DIType *VTableHolder) {
+void DIBuilder::replaceVTableHolder(DICompositeType *&T, DIType *VTableHolder) {
{
TypedTrackingMDRef<DICompositeType> N(T);
N->replaceVTableHolder(VTableHolder);
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index ecd74449dc38..2ace18048262 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -151,6 +151,8 @@ PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
//===----------------------------------------------------------------------===//
const char *DataLayout::getManglingComponent(const Triple &T) {
+ if (T.isOSBinFormatGOFF())
+ return "-m:l";
if (T.isOSBinFormatMachO())
return "-m:o";
if (T.isOSWindows() && T.isOSBinFormatCOFF())
@@ -258,12 +260,12 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
while (!Desc.empty()) {
// Split at '-'.
std::pair<StringRef, StringRef> Split;
- if (Error Err = split(Desc, '-', Split))
+ if (Error Err = ::split(Desc, '-', Split))
return Err;
Desc = Split.second;
// Split at ':'.
- if (Error Err = split(Split.first, ':', Split))
+ if (Error Err = ::split(Split.first, ':', Split))
return Err;
// Aliases used below.
@@ -272,7 +274,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Tok == "ni") {
do {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
Rest = Split.second;
unsigned AS;
@@ -313,7 +315,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Rest.empty())
return reportError(
"Missing size specification for pointer in datalayout string");
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
unsigned PointerMemSize;
if (Error Err = getIntInBytes(Tok, PointerMemSize))
@@ -325,7 +327,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Rest.empty())
return reportError(
"Missing alignment specification for pointer in datalayout string");
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
unsigned PointerABIAlign;
if (Error Err = getIntInBytes(Tok, PointerABIAlign))
@@ -340,7 +342,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
// Preferred alignment.
unsigned PointerPrefAlign = PointerABIAlign;
if (!Rest.empty()) {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
if (Error Err = getIntInBytes(Tok, PointerPrefAlign))
return Err;
@@ -350,7 +352,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
// Now read the index. It is the second optional parameter here.
if (!Rest.empty()) {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
if (Error Err = getIntInBytes(Tok, IndexSize))
return Err;
@@ -391,7 +393,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Rest.empty())
return reportError(
"Missing alignment specification in datalayout string");
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
unsigned ABIAlign;
if (Error Err = getIntInBytes(Tok, ABIAlign))
@@ -408,7 +410,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
// Preferred alignment.
unsigned PrefAlign = ABIAlign;
if (!Rest.empty()) {
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
if (Error Err = getIntInBytes(Tok, PrefAlign))
return Err;
@@ -437,7 +439,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
LegalIntWidths.push_back(Width);
if (Rest.empty())
break;
- if (Error Err = split(Rest, ':', Split))
+ if (Error Err = ::split(Rest, ':', Split))
return Err;
}
break;
@@ -500,6 +502,9 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
case 'e':
ManglingMode = MM_ELF;
break;
+ case 'l':
+ ManglingMode = MM_GOFF;
+ break;
case 'o':
ManglingMode = MM_MachO;
break;
@@ -702,12 +707,12 @@ unsigned DataLayout::getPointerSize(unsigned AS) const {
return getPointerAlignElem(AS).TypeByteWidth;
}
-unsigned DataLayout::getMaxPointerSize() const {
- unsigned MaxPointerSize = 0;
+unsigned DataLayout::getMaxIndexSize() const {
+ unsigned MaxIndexSize = 0;
for (auto &P : Pointers)
- MaxPointerSize = std::max(MaxPointerSize, P.TypeByteWidth);
+ MaxIndexSize = std::max(MaxIndexSize, P.IndexWidth);
- return MaxPointerSize;
+ return MaxIndexSize;
}
unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
@@ -800,15 +805,11 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
// By default, use natural alignment for vector types. This is consistent
// with what clang and llvm-gcc do.
- // TODO: This should probably not be using the alloc size.
- unsigned Alignment =
- getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+ //
// We're only calculating a natural alignment, so it doesn't have to be
// based on the full size for scalable vectors. Using the minimum element
// count should be enough here.
- Alignment *= cast<VectorType>(Ty)->getElementCount().getKnownMinValue();
- Alignment = PowerOf2Ceil(Alignment);
- return Align(Alignment);
+ return Align(PowerOf2Ceil(getTypeStoreSize(Ty).getKnownMinSize()));
}
case Type::X86_AMXTyID:
return Align(64);
@@ -818,7 +819,7 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
}
/// TODO: Remove this function once the transition to Align is over.
-unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
+uint64_t DataLayout::getABITypeAlignment(Type *Ty) const {
return getABITypeAlign(Ty).value();
}
@@ -827,7 +828,7 @@ Align DataLayout::getABITypeAlign(Type *Ty) const {
}
/// TODO: Remove this function once the transition to Align is over.
-unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
+uint64_t DataLayout::getPrefTypeAlignment(Type *Ty) const {
return getPrefTypeAlign(Ty).value();
}
@@ -900,6 +901,72 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
return Result;
}
+static void addElementIndex(SmallVectorImpl<APInt> &Indices, TypeSize ElemSize,
+ APInt &Offset) {
+ // Skip over scalable or zero size elements. Also skip element sizes larger
+ // than the positive index space, because the arithmetic below may not be
+ // correct in that case.
+ unsigned BitWidth = Offset.getBitWidth();
+ if (ElemSize.isScalable() || ElemSize == 0 ||
+ !isUIntN(BitWidth - 1, ElemSize)) {
+ Indices.push_back(APInt::getZero(BitWidth));
+ return;
+ }
+
+ APInt Index = Offset.sdiv(ElemSize);
+ Offset -= Index * ElemSize;
+ if (Offset.isNegative()) {
+ // Prefer a positive remaining offset to allow struct indexing.
+ --Index;
+ Offset += ElemSize;
+ assert(Offset.isNonNegative() && "Remaining offset shouldn't be negative");
+ }
+ Indices.push_back(Index);
+}
+
+SmallVector<APInt> DataLayout::getGEPIndicesForOffset(Type *&ElemTy,
+ APInt &Offset) const {
+ assert(ElemTy->isSized() && "Element type must be sized");
+ SmallVector<APInt> Indices;
+ addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset);
+ while (Offset != 0) {
+ if (auto *ArrTy = dyn_cast<ArrayType>(ElemTy)) {
+ ElemTy = ArrTy->getElementType();
+ addElementIndex(Indices, getTypeAllocSize(ElemTy), Offset);
+ continue;
+ }
+
+ if (auto *VecTy = dyn_cast<VectorType>(ElemTy)) {
+ ElemTy = VecTy->getElementType();
+ unsigned ElemSizeInBits = getTypeSizeInBits(ElemTy).getFixedSize();
+ // GEPs over non-multiple of 8 size vector elements are invalid.
+ if (ElemSizeInBits % 8 != 0)
+ break;
+
+ addElementIndex(Indices, TypeSize::Fixed(ElemSizeInBits / 8), Offset);
+ continue;
+ }
+
+ if (auto *STy = dyn_cast<StructType>(ElemTy)) {
+ const StructLayout *SL = getStructLayout(STy);
+ uint64_t IntOffset = Offset.getZExtValue();
+ if (IntOffset >= SL->getSizeInBytes())
+ break;
+
+ unsigned Index = SL->getElementContainingOffset(IntOffset);
+ Offset -= SL->getElementOffset(Index);
+ ElemTy = STy->getElementType(Index);
+ Indices.push_back(APInt(32, Index));
+ continue;
+ }
+
+ // Can't index into non-aggregate type.
+ break;
+ }
+
+ return Indices;
+}
+
/// getPreferredAlign - Return the preferred alignment of the specified global.
/// This includes an explicitly requested alignment (if the global has one).
Align DataLayout::getPreferredAlign(const GlobalVariable *GV) const {
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 06c511f8530a..7c69fbf7085d 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -447,8 +447,7 @@ bool llvm::stripDebugInfo(Function &F) {
DenseMap<MDNode *, MDNode *> LoopIDsMap;
for (BasicBlock &BB : F) {
- for (auto II = BB.begin(), End = BB.end(); II != End;) {
- Instruction &I = *II++; // We may delete the instruction, increment now.
+ for (Instruction &I : llvm::make_early_inc_range(BB)) {
if (isa<DbgInfoIntrinsic>(&I)) {
I.eraseFromParent();
Changed = true;
@@ -909,6 +908,11 @@ void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder) {
unwrap(Builder)->finalize();
}
+void LLVMDIBuilderFinalizeSubprogram(LLVMDIBuilderRef Builder,
+ LLVMMetadataRef subprogram) {
+ unwrap(Builder)->finalizeSubprogram(unwrapDI<DISubprogram>(subprogram));
+}
+
LLVMMetadataRef LLVMDIBuilderCreateCompileUnit(
LLVMDIBuilderRef Builder, LLVMDWARFSourceLanguage Lang,
LLVMMetadataRef FileRef, const char *Producer, size_t ProducerLen,
@@ -1003,41 +1007,43 @@ LLVMDIBuilderCreateImportedModuleFromNamespace(LLVMDIBuilderRef Builder,
Line));
}
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromAlias(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef ImportedEntity,
- LLVMMetadataRef File,
- unsigned Line) {
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromAlias(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope,
+ LLVMMetadataRef ImportedEntity, LLVMMetadataRef File, unsigned Line,
+ LLVMMetadataRef *Elements, unsigned NumElements) {
+ auto Elts =
+ (NumElements > 0)
+ ? unwrap(Builder)->getOrCreateArray({unwrap(Elements), NumElements})
+ : nullptr;
return wrap(unwrap(Builder)->createImportedModule(
- unwrapDI<DIScope>(Scope),
- unwrapDI<DIImportedEntity>(ImportedEntity),
- unwrapDI<DIFile>(File), Line));
-}
-
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedModuleFromModule(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef M,
- LLVMMetadataRef File,
- unsigned Line) {
- return wrap(unwrap(Builder)->createImportedModule(unwrapDI<DIScope>(Scope),
- unwrapDI<DIModule>(M),
- unwrapDI<DIFile>(File),
- Line));
-}
-
-LLVMMetadataRef
-LLVMDIBuilderCreateImportedDeclaration(LLVMDIBuilderRef Builder,
- LLVMMetadataRef Scope,
- LLVMMetadataRef Decl,
- LLVMMetadataRef File,
- unsigned Line,
- const char *Name, size_t NameLen) {
+ unwrapDI<DIScope>(Scope), unwrapDI<DIImportedEntity>(ImportedEntity),
+ unwrapDI<DIFile>(File), Line, Elts));
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateImportedModuleFromModule(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef M,
+ LLVMMetadataRef File, unsigned Line, LLVMMetadataRef *Elements,
+ unsigned NumElements) {
+ auto Elts =
+ (NumElements > 0)
+ ? unwrap(Builder)->getOrCreateArray({unwrap(Elements), NumElements})
+ : nullptr;
+ return wrap(unwrap(Builder)->createImportedModule(
+ unwrapDI<DIScope>(Scope), unwrapDI<DIModule>(M), unwrapDI<DIFile>(File),
+ Line, Elts));
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateImportedDeclaration(
+ LLVMDIBuilderRef Builder, LLVMMetadataRef Scope, LLVMMetadataRef Decl,
+ LLVMMetadataRef File, unsigned Line, const char *Name, size_t NameLen,
+ LLVMMetadataRef *Elements, unsigned NumElements) {
+ auto Elts =
+ (NumElements > 0)
+ ? unwrap(Builder)->getOrCreateArray({unwrap(Elements), NumElements})
+ : nullptr;
return wrap(unwrap(Builder)->createImportedDeclaration(
- unwrapDI<DIScope>(Scope),
- unwrapDI<DINode>(Decl),
- unwrapDI<DIFile>(File), Line, {Name, NameLen}));
+ unwrapDI<DIScope>(Scope), unwrapDI<DINode>(Decl), unwrapDI<DIFile>(File),
+ Line, {Name, NameLen}, Elts));
}
LLVMMetadataRef
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 7b0dab799e1a..b20e581d283a 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -82,8 +82,8 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
Storage, Context.pImpl->DILocations);
}
-const
-DILocation *DILocation::getMergedLocations(ArrayRef<const DILocation *> Locs) {
+const DILocation *
+DILocation::getMergedLocations(ArrayRef<const DILocation *> Locs) {
if (Locs.empty())
return nullptr;
if (Locs.size() == 1)
@@ -139,7 +139,8 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
return DILocation::get(Result->getContext(), 0, 0, S, L);
}
-Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF, unsigned CI) {
+Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF,
+ unsigned CI) {
std::array<unsigned, 3> Components = {BD, DF, CI};
uint64_t RemainingWork = 0U;
// We use RemainingWork to figure out if we have no remaining components to
@@ -147,7 +148,8 @@ Optional<unsigned> DILocation::encodeDiscriminator(unsigned BD, unsigned DF, uns
// encode anything for the latter 2.
// Since any of the input components is at most 32 bits, their sum will be
// less than 34 bits, and thus RemainingWork won't overflow.
- RemainingWork = std::accumulate(Components.begin(), Components.end(), RemainingWork);
+ RemainingWork =
+ std::accumulate(Components.begin(), Components.end(), RemainingWork);
int I = 0;
unsigned Ret = 0;
@@ -179,7 +181,6 @@ void DILocation::decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
}
-
DINode::DIFlags DINode::getFlag(StringRef Flag) {
return StringSwitch<DIFlags>(Flag)
#define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
@@ -546,8 +547,8 @@ DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
DEFINE_GETIMPL_LOOKUP(DIBasicType,
(Tag, Name, SizeInBits, AlignInBits, Encoding, Flags));
Metadata *Ops[] = {nullptr, nullptr, Name};
- DEFINE_GETIMPL_STORE(DIBasicType, (Tag, SizeInBits, AlignInBits, Encoding,
- Flags), Ops);
+ DEFINE_GETIMPL_STORE(DIBasicType,
+ (Tag, SizeInBits, AlignInBits, Encoding, Flags), Ops);
}
Optional<DIBasicType::Signedness> DIBasicType::getSignedness() const {
@@ -582,16 +583,17 @@ DIDerivedType *DIDerivedType::getImpl(
unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, DIFlags Flags, Metadata *ExtraData,
- StorageType Storage, bool ShouldCreate) {
+ Metadata *Annotations, StorageType Storage, bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIDerivedType,
(Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, DWARFAddressSpace, Flags,
- ExtraData));
- Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData};
- DEFINE_GETIMPL_STORE(
- DIDerivedType, (Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
- DWARFAddressSpace, Flags), Ops);
+ ExtraData, Annotations));
+ Metadata *Ops[] = {File, Scope, Name, BaseType, ExtraData, Annotations};
+ DEFINE_GETIMPL_STORE(DIDerivedType,
+ (Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
+ DWARFAddressSpace, Flags),
+ Ops);
}
DICompositeType *DICompositeType::getImpl(
@@ -601,22 +603,25 @@ DICompositeType *DICompositeType::getImpl(
Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder,
Metadata *TemplateParams, MDString *Identifier, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank, StorageType Storage, bool ShouldCreate) {
+ Metadata *Rank, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
// Keep this in sync with buildODRType.
- DEFINE_GETIMPL_LOOKUP(
- DICompositeType,
- (Tag, Name, File, Line, Scope, BaseType, SizeInBits, AlignInBits,
- OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder, TemplateParams,
- Identifier, Discriminator, DataLocation, Associated, Allocated, Rank));
+ DEFINE_GETIMPL_LOOKUP(DICompositeType,
+ (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
+ AlignInBits, OffsetInBits, Flags, Elements,
+ RuntimeLang, VTableHolder, TemplateParams, Identifier,
+ Discriminator, DataLocation, Associated, Allocated,
+ Rank, Annotations));
Metadata *Ops[] = {File, Scope, Name, BaseType,
Elements, VTableHolder, TemplateParams, Identifier,
Discriminator, DataLocation, Associated, Allocated,
- Rank};
- DEFINE_GETIMPL_STORE(DICompositeType, (Tag, Line, RuntimeLang, SizeInBits,
- AlignInBits, OffsetInBits, Flags),
- Ops);
+ Rank, Annotations};
+ DEFINE_GETIMPL_STORE(
+ DICompositeType,
+ (Tag, Line, RuntimeLang, SizeInBits, AlignInBits, OffsetInBits, Flags),
+ Ops);
}
DICompositeType *DICompositeType::buildODRType(
@@ -626,7 +631,7 @@ DICompositeType *DICompositeType::buildODRType(
DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank) {
+ Metadata *Rank, Metadata *Annotations) {
assert(!Identifier.getString().empty() && "Expected valid identifier");
if (!Context.isODRUniquingDebugTypes())
return nullptr;
@@ -636,7 +641,10 @@ DICompositeType *DICompositeType::buildODRType(
Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
VTableHolder, TemplateParams, &Identifier, Discriminator,
- DataLocation, Associated, Allocated, Rank);
+ DataLocation, Associated, Allocated, Rank, Annotations);
+
+ if (CT->getTag() != Tag)
+ return nullptr;
// Only mutate CT if it's a forward declaration and the new operands aren't.
assert(CT->getRawIdentifier() == &Identifier && "Wrong ODR identifier?");
@@ -649,7 +657,7 @@ DICompositeType *DICompositeType::buildODRType(
Metadata *Ops[] = {File, Scope, Name, BaseType,
Elements, VTableHolder, TemplateParams, &Identifier,
Discriminator, DataLocation, Associated, Allocated,
- Rank};
+ Rank, Annotations};
assert((std::end(Ops) - std::begin(Ops)) == (int)CT->getNumOperands() &&
"Mismatched number of operands");
for (unsigned I = 0, E = CT->getNumOperands(); I != E; ++I)
@@ -665,17 +673,21 @@ DICompositeType *DICompositeType::getODRType(
DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
Metadata *VTableHolder, Metadata *TemplateParams, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated, Metadata *Allocated,
- Metadata *Rank) {
+ Metadata *Rank, Metadata *Annotations) {
assert(!Identifier.getString().empty() && "Expected valid identifier");
if (!Context.isODRUniquingDebugTypes())
return nullptr;
auto *&CT = (*Context.pImpl->DITypeMap)[&Identifier];
- if (!CT)
+ if (!CT) {
CT = DICompositeType::getDistinct(
Context, Tag, Name, File, Line, Scope, BaseType, SizeInBits,
AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang, VTableHolder,
TemplateParams, &Identifier, Discriminator, DataLocation, Associated,
- Allocated, Rank);
+ Allocated, Rank, Annotations);
+ } else {
+ if (CT->getTag() != Tag)
+ return nullptr;
+ }
return CT;
}
@@ -789,10 +801,14 @@ DICompileUnit::getNameTableKind(StringRef Str) {
const char *DICompileUnit::emissionKindString(DebugEmissionKind EK) {
switch (EK) {
- case NoDebug: return "NoDebug";
- case FullDebug: return "FullDebug";
- case LineTablesOnly: return "LineTablesOnly";
- case DebugDirectivesOnly: return "DebugDirectivesOnly";
+ case NoDebug:
+ return "NoDebug";
+ case FullDebug:
+ return "FullDebug";
+ case LineTablesOnly:
+ return "LineTablesOnly";
+ case DebugDirectivesOnly:
+ return "DebugDirectivesOnly";
}
return nullptr;
}
@@ -862,23 +878,28 @@ DISubprogram *DISubprogram::getImpl(
unsigned ScopeLine, Metadata *ContainingType, unsigned VirtualIndex,
int ThisAdjustment, DIFlags Flags, DISPFlags SPFlags, Metadata *Unit,
Metadata *TemplateParams, Metadata *Declaration, Metadata *RetainedNodes,
- Metadata *ThrownTypes, StorageType Storage, bool ShouldCreate) {
+ Metadata *ThrownTypes, Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DISubprogram,
(Scope, Name, LinkageName, File, Line, Type, ScopeLine,
ContainingType, VirtualIndex, ThisAdjustment, Flags,
SPFlags, Unit, TemplateParams, Declaration,
- RetainedNodes, ThrownTypes));
- SmallVector<Metadata *, 11> Ops = {
- File, Scope, Name, LinkageName, Type, Unit,
- Declaration, RetainedNodes, ContainingType, TemplateParams, ThrownTypes};
- if (!ThrownTypes) {
+ RetainedNodes, ThrownTypes, Annotations));
+ SmallVector<Metadata *, 12> Ops = {
+ File, Scope, Name, LinkageName,
+ Type, Unit, Declaration, RetainedNodes,
+ ContainingType, TemplateParams, ThrownTypes, Annotations};
+ if (!Annotations) {
Ops.pop_back();
- if (!TemplateParams) {
+ if (!ThrownTypes) {
Ops.pop_back();
- if (!ContainingType)
+ if (!TemplateParams) {
Ops.pop_back();
+ if (!ContainingType)
+ Ops.pop_back();
+ }
}
}
DEFINE_GETIMPL_STORE_N(
@@ -977,13 +998,14 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration,
Metadata *TemplateParams, uint32_t AlignInBits,
- StorageType Storage, bool ShouldCreate) {
+ Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
assert(isCanonical(LinkageName) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DIGlobalVariable, (Scope, Name, LinkageName, File, Line,
- Type, IsLocalToUnit, IsDefinition,
- StaticDataMemberDeclaration,
- TemplateParams, AlignInBits));
+ DEFINE_GETIMPL_LOOKUP(
+ DIGlobalVariable,
+ (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
+ StaticDataMemberDeclaration, TemplateParams, AlignInBits, Annotations));
Metadata *Ops[] = {Scope,
Name,
File,
@@ -991,27 +1013,26 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
Name,
LinkageName,
StaticDataMemberDeclaration,
- TemplateParams};
+ TemplateParams,
+ Annotations};
DEFINE_GETIMPL_STORE(DIGlobalVariable,
(Line, IsLocalToUnit, IsDefinition, AlignInBits), Ops);
}
-DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, Metadata *File,
- unsigned Line, Metadata *Type,
- unsigned Arg, DIFlags Flags,
- uint32_t AlignInBits,
- StorageType Storage,
- bool ShouldCreate) {
+DILocalVariable *
+DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ Metadata *File, unsigned Line, Metadata *Type,
+ unsigned Arg, DIFlags Flags, uint32_t AlignInBits,
+ Metadata *Annotations, StorageType Storage,
+ bool ShouldCreate) {
// 64K ought to be enough for any frontend.
assert(Arg <= UINT16_MAX && "Expected argument number to fit in 16-bits");
assert(Scope && "Expected scope");
assert(isCanonical(Name) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DILocalVariable,
- (Scope, Name, File, Line, Type, Arg, Flags,
- AlignInBits));
- Metadata *Ops[] = {Scope, Name, File, Type};
+ DEFINE_GETIMPL_LOOKUP(DILocalVariable, (Scope, Name, File, Line, Type, Arg,
+ Flags, AlignInBits, Annotations));
+ Metadata *Ops[] = {Scope, Name, File, Type, Annotations};
DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags, AlignInBits), Ops);
}
@@ -1038,14 +1059,12 @@ Optional<uint64_t> DIVariable::getSizeInBits() const {
return None;
}
-DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope,
- MDString *Name, Metadata *File, unsigned Line,
- StorageType Storage,
+DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+ Metadata *File, unsigned Line, StorageType Storage,
bool ShouldCreate) {
assert(Scope && "Expected scope");
assert(isCanonical(Name) && "Expected canonical MDString");
- DEFINE_GETIMPL_LOOKUP(DILabel,
- (Scope, Name, File, Line));
+ DEFINE_GETIMPL_LOOKUP(DILabel, (Scope, Name, File, Line));
Metadata *Ops[] = {Scope, Name, File};
DEFINE_GETIMPL_STORE(DILabel, (Line), Ops);
}
@@ -1194,10 +1213,11 @@ bool DIExpression::isComplex() const {
// kind of complex computation occurs.
for (const auto &It : expr_ops()) {
switch (It.getOp()) {
- case dwarf::DW_OP_LLVM_tag_offset:
- case dwarf::DW_OP_LLVM_fragment:
- continue;
- default: return true;
+ case dwarf::DW_OP_LLVM_tag_offset:
+ case dwarf::DW_OP_LLVM_fragment:
+ continue;
+ default:
+ return true;
}
}
@@ -1346,8 +1366,7 @@ DIExpression *DIExpression::replaceArg(const DIExpression *Expr,
DIExpression *DIExpression::prependOpcodes(const DIExpression *Expr,
SmallVectorImpl<uint64_t> &Ops,
- bool StackValue,
- bool EntryValue) {
+ bool StackValue, bool EntryValue) {
assert(Expr && "Can't prepend ops to this expression");
if (EntryValue) {
@@ -1442,7 +1461,8 @@ Optional<DIExpression *> DIExpression::createFragmentExpression(
if (Expr) {
for (auto Op : Expr->expr_ops()) {
switch (Op.getOp()) {
- default: break;
+ default:
+ break;
case dwarf::DW_OP_shr:
case dwarf::DW_OP_shra:
case dwarf::DW_OP_shl:
@@ -1476,6 +1496,45 @@ Optional<DIExpression *> DIExpression::createFragmentExpression(
return DIExpression::get(Expr->getContext(), Ops);
}
+std::pair<DIExpression *, const ConstantInt *>
+DIExpression::constantFold(const ConstantInt *CI) {
+ // Copy the APInt so we can modify it.
+ APInt NewInt = CI->getValue();
+ SmallVector<uint64_t, 8> Ops;
+
+ // Fold operators only at the beginning of the expression.
+ bool First = true;
+ bool Changed = false;
+ for (auto Op : expr_ops()) {
+ switch (Op.getOp()) {
+ default:
+ // We fold only the leading part of the expression; if we get to a part
+ // that we're going to copy unchanged, and haven't done any folding,
+ // then the entire expression is unchanged and we can return early.
+ if (!Changed)
+ return {this, CI};
+ First = false;
+ break;
+ case dwarf::DW_OP_LLVM_convert:
+ if (!First)
+ break;
+ Changed = true;
+ if (Op.getArg(1) == dwarf::DW_ATE_signed)
+ NewInt = NewInt.sextOrTrunc(Op.getArg(0));
+ else {
+ assert(Op.getArg(1) == dwarf::DW_ATE_unsigned && "Unexpected operand");
+ NewInt = NewInt.zextOrTrunc(Op.getArg(0));
+ }
+ continue;
+ }
+ Op.appendToVector(Ops);
+ }
+ if (!Changed)
+ return {this, CI};
+ return {DIExpression::get(getContext(), Ops),
+ ConstantInt::get(getContext(), NewInt)};
+}
+
uint64_t DIExpression::getNumLocationOperands() const {
uint64_t Result = 0;
for (auto ExprOp : expr_ops())
@@ -1552,21 +1611,22 @@ DIObjCProperty *DIObjCProperty::getImpl(
DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag,
Metadata *Scope, Metadata *Entity,
Metadata *File, unsigned Line,
- MDString *Name, StorageType Storage,
+ MDString *Name, Metadata *Elements,
+ StorageType Storage,
bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIImportedEntity,
- (Tag, Scope, Entity, File, Line, Name));
- Metadata *Ops[] = {Scope, Entity, Name, File};
+ (Tag, Scope, Entity, File, Line, Name, Elements));
+ Metadata *Ops[] = {Scope, Entity, Name, File, Elements};
DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops);
}
-DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType,
- unsigned Line, MDString *Name, MDString *Value,
- StorageType Storage, bool ShouldCreate) {
+DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
+ MDString *Name, MDString *Value, StorageType Storage,
+ bool ShouldCreate) {
assert(isCanonical(Name) && "Expected canonical MDString");
DEFINE_GETIMPL_LOOKUP(DIMacro, (MIType, Line, Name, Value));
- Metadata *Ops[] = { Name, Value };
+ Metadata *Ops[] = {Name, Value};
DEFINE_GETIMPL_STORE(DIMacro, (MIType, Line), Ops);
}
@@ -1574,9 +1634,8 @@ DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType,
unsigned Line, Metadata *File,
Metadata *Elements, StorageType Storage,
bool ShouldCreate) {
- DEFINE_GETIMPL_LOOKUP(DIMacroFile,
- (MIType, Line, File, Elements));
- Metadata *Ops[] = { File, Elements };
+ DEFINE_GETIMPL_LOOKUP(DIMacroFile, (MIType, Line, File, Elements));
+ Metadata *Ops[] = {File, Elements};
DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops);
}
@@ -1592,6 +1651,12 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
assert((!New || isa<ValueAsMetadata>(New)) &&
"DIArgList must be passed a ValueAsMetadata");
untrack();
+ bool Uniq = isUniqued();
+ if (Uniq) {
+ // We need to update the uniqueness once the Args are updated since they
+ // form the key to the DIArgLists store.
+ eraseFromStore();
+ }
ValueAsMetadata *NewVM = cast_or_null<ValueAsMetadata>(New);
for (ValueAsMetadata *&VM : Args) {
if (&VM == OldVMPtr) {
@@ -1601,6 +1666,10 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
VM = ValueAsMetadata::get(UndefValue::get(VM->getValue()->getType()));
}
}
+ if (Uniq) {
+ if (uniquify() != this)
+ storeDistinctInContext();
+ }
track();
}
void DIArgList::track() {
diff --git a/llvm/lib/IR/DiagnosticHandler.cpp b/llvm/lib/IR/DiagnosticHandler.cpp
index 2fe634803894..7b40728a34e8 100644
--- a/llvm/lib/IR/DiagnosticHandler.cpp
+++ b/llvm/lib/IR/DiagnosticHandler.cpp
@@ -30,7 +30,7 @@ struct PassRemarksOpt {
Pattern = std::make_shared<Regex>(Val);
std::string RegexError;
if (!Pattern->isValid(RegexError))
- report_fatal_error("Invalid regular expression '" + Val +
+ report_fatal_error(Twine("Invalid regular expression '") + Val +
"' in -pass-remarks: " + RegexError,
false);
}
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index f92138274801..0a872a81f911 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -1,4 +1,4 @@
-//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
+//===- llvm/IR/DiagnosticInfo.cpp - Diagnostic Definitions ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -70,10 +70,8 @@ void DiagnosticInfoInlineAsm::print(DiagnosticPrinter &DP) const {
}
void DiagnosticInfoResourceLimit::print(DiagnosticPrinter &DP) const {
- DP << getResourceName() << " (" << getResourceSize() << ") exceeds limit";
- if (getResourceLimit() != 0)
- DP << " (" << getResourceLimit() << ')';
- DP << " in function '" << getFunction() << '\'';
+ DP << getResourceName() << " (" << getResourceSize() << ") exceeds limit ("
+ << getResourceLimit() << ") in function '" << getFunction() << '\'';
}
void DiagnosticInfoDebugMetadataVersion::print(DiagnosticPrinter &DP) const {
@@ -401,3 +399,35 @@ std::string DiagnosticInfoOptimizationBase::getMsg() const {
void OptimizationRemarkAnalysisFPCommute::anchor() {}
void OptimizationRemarkAnalysisAliasing::anchor() {}
+
+void llvm::diagnoseDontCall(const CallInst &CI) {
+ auto *F = CI.getCalledFunction();
+ if (!F)
+ return;
+
+ for (int i = 0; i != 2; ++i) {
+ auto AttrName = i == 0 ? "dontcall-error" : "dontcall-warn";
+ auto Sev = i == 0 ? DS_Error : DS_Warning;
+
+ if (F->hasFnAttribute(AttrName)) {
+ unsigned LocCookie = 0;
+ auto A = F->getFnAttribute(AttrName);
+ if (MDNode *MD = CI.getMetadata("srcloc"))
+ LocCookie =
+ mdconst::extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
+ DiagnosticInfoDontCall D(F->getName(), A.getValueAsString(), Sev,
+ LocCookie);
+ F->getContext().diagnose(D);
+ }
+ }
+}
+
+void DiagnosticInfoDontCall::print(DiagnosticPrinter &DP) const {
+ DP << "call to " << getFunctionName() << " marked \"dontcall-";
+ if (getSeverity() == DiagnosticSeverity::DS_Error)
+ DP << "error\"";
+ else
+ DP << "warn\"";
+ if (!getNote().empty())
+ DP << ": " << getNote();
+}
diff --git a/llvm/lib/IR/DiagnosticPrinter.cpp b/llvm/lib/IR/DiagnosticPrinter.cpp
index 496bd18e78e2..49b8bbae53be 100644
--- a/llvm/lib/IR/DiagnosticPrinter.cpp
+++ b/llvm/lib/IR/DiagnosticPrinter.cpp
@@ -1,4 +1,4 @@
-//===- llvm/Support/DiagnosticInfo.cpp - Diagnostic Definitions -*- C++ -*-===//
+//===- llvm/IR/DiagnosticPrinter.cpp - Diagnostic Printer -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/IR/FPEnv.cpp b/llvm/lib/IR/FPEnv.cpp
index 516c702acec7..c6e0938e71a6 100644
--- a/llvm/lib/IR/FPEnv.cpp
+++ b/llvm/lib/IR/FPEnv.cpp
@@ -17,7 +17,7 @@
namespace llvm {
-Optional<RoundingMode> StrToRoundingMode(StringRef RoundingArg) {
+Optional<RoundingMode> convertStrToRoundingMode(StringRef RoundingArg) {
// For dynamic rounding mode, we use round to nearest but we will set the
// 'exact' SDNodeFlag so that the value will not be rounded.
return StringSwitch<Optional<RoundingMode>>(RoundingArg)
@@ -30,7 +30,7 @@ Optional<RoundingMode> StrToRoundingMode(StringRef RoundingArg) {
.Default(None);
}
-Optional<StringRef> RoundingModeToStr(RoundingMode UseRounding) {
+Optional<StringRef> convertRoundingModeToStr(RoundingMode UseRounding) {
Optional<StringRef> RoundingStr = None;
switch (UseRounding) {
case RoundingMode::Dynamic:
@@ -57,7 +57,8 @@ Optional<StringRef> RoundingModeToStr(RoundingMode UseRounding) {
return RoundingStr;
}
-Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef ExceptionArg) {
+Optional<fp::ExceptionBehavior>
+convertStrToExceptionBehavior(StringRef ExceptionArg) {
return StringSwitch<Optional<fp::ExceptionBehavior>>(ExceptionArg)
.Case("fpexcept.ignore", fp::ebIgnore)
.Case("fpexcept.maytrap", fp::ebMayTrap)
@@ -65,7 +66,8 @@ Optional<fp::ExceptionBehavior> StrToExceptionBehavior(StringRef ExceptionArg) {
.Default(None);
}
-Optional<StringRef> ExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
+Optional<StringRef>
+convertExceptionBehaviorToStr(fp::ExceptionBehavior UseExcept) {
Optional<StringRef> ExceptStr = None;
switch (UseExcept) {
case fp::ebStrict:
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 4034b1505bd0..82b20a8af91b 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -140,25 +140,25 @@ bool Argument::hasPreallocatedAttr() const {
bool Argument::hasPassPointeeByValueCopyAttr() const {
if (!getType()->isPointerTy()) return false;
AttributeList Attrs = getParent()->getAttributes();
- return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated);
+ return Attrs.hasParamAttr(getArgNo(), Attribute::ByVal) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::InAlloca) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::Preallocated);
}
bool Argument::hasPointeeInMemoryValueAttr() const {
if (!getType()->isPointerTy())
return false;
AttributeList Attrs = getParent()->getAttributes();
- return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::StructRet) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::InAlloca) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::Preallocated) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::ByRef);
+ return Attrs.hasParamAttr(getArgNo(), Attribute::ByVal) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::StructRet) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::InAlloca) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::Preallocated) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::ByRef);
}
/// For a byval, sret, inalloca, or preallocated parameter, get the in-memory
/// parameter type.
-static Type *getMemoryParamAllocType(AttributeSet ParamAttrs, Type *ArgTy) {
+static Type *getMemoryParamAllocType(AttributeSet ParamAttrs) {
// FIXME: All the type carrying attributes are mutually exclusive, so there
// should be a single query to get the stored type that handles any of them.
if (Type *ByValTy = ParamAttrs.getByValType())
@@ -177,19 +177,19 @@ static Type *getMemoryParamAllocType(AttributeSet ParamAttrs, Type *ArgTy) {
uint64_t Argument::getPassPointeeByValueCopySize(const DataLayout &DL) const {
AttributeSet ParamAttrs =
- getParent()->getAttributes().getParamAttributes(getArgNo());
- if (Type *MemTy = getMemoryParamAllocType(ParamAttrs, getType()))
+ getParent()->getAttributes().getParamAttrs(getArgNo());
+ if (Type *MemTy = getMemoryParamAllocType(ParamAttrs))
return DL.getTypeAllocSize(MemTy);
return 0;
}
Type *Argument::getPointeeInMemoryValueType() const {
AttributeSet ParamAttrs =
- getParent()->getAttributes().getParamAttributes(getArgNo());
- return getMemoryParamAllocType(ParamAttrs, getType());
+ getParent()->getAttributes().getParamAttrs(getArgNo());
+ return getMemoryParamAllocType(ParamAttrs);
}
-unsigned Argument::getParamAlignment() const {
+uint64_t Argument::getParamAlignment() const {
assert(getType()->isPointerTy() && "Only pointers have alignments");
return getParent()->getParamAlignment(getArgNo());
}
@@ -278,8 +278,8 @@ bool Argument::hasSExtAttr() const {
bool Argument::onlyReadsMemory() const {
AttributeList Attrs = getParent()->getAttributes();
- return Attrs.hasParamAttribute(getArgNo(), Attribute::ReadOnly) ||
- Attrs.hasParamAttribute(getArgNo(), Attribute::ReadNone);
+ return Attrs.hasParamAttr(getArgNo(), Attribute::ReadOnly) ||
+ Attrs.hasParamAttr(getArgNo(), Attribute::ReadNone);
}
void Argument::addAttrs(AttrBuilder &B) {
@@ -354,7 +354,7 @@ Function *Function::createWithDefaultAttr(FunctionType *Ty,
B.addAttribute("frame-pointer", "all");
break;
}
- F->addAttributes(AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
return F;
}
@@ -529,101 +529,144 @@ void Function::dropAllReferences() {
clearMetadata();
}
-void Function::addAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+void Function::addAttributeAtIndex(unsigned i, Attribute Attr) {
+ AttributeSets = AttributeSets.addAttributeAtIndex(getContext(), i, Attr);
}
-void Function::addAttribute(unsigned i, Attribute Attr) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttribute(getContext(), i, Attr);
- setAttributes(PAL);
+void Function::addFnAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.addFnAttribute(getContext(), Kind);
}
-void Function::addAttributes(unsigned i, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addAttributes(getContext(), i, Attrs);
- setAttributes(PAL);
+void Function::addFnAttr(StringRef Kind, StringRef Val) {
+ AttributeSets = AttributeSets.addFnAttribute(getContext(), Kind, Val);
+}
+
+void Function::addFnAttr(Attribute Attr) {
+ AttributeSets = AttributeSets.addFnAttribute(getContext(), Attr);
+}
+
+void Function::addFnAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.addFnAttributes(getContext(), Attrs);
+}
+
+void Function::addRetAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.addRetAttribute(getContext(), Kind);
+}
+
+void Function::addRetAttr(Attribute Attr) {
+ AttributeSets = AttributeSets.addRetAttribute(getContext(), Attr);
+}
+
+void Function::addRetAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.addRetAttributes(getContext(), Attrs);
}
void Function::addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addParamAttribute(getContext(), ArgNo, Kind);
}
void Function::addParamAttr(unsigned ArgNo, Attribute Attr) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttribute(getContext(), ArgNo, Attr);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addParamAttribute(getContext(), ArgNo, Attr);
}
void Function::addParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addParamAttributes(getContext(), ArgNo, Attrs);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addParamAttributes(getContext(), ArgNo, Attrs);
}
-void Function::removeAttribute(unsigned i, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+void Function::removeAttributeAtIndex(unsigned i, Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.removeAttributeAtIndex(getContext(), i, Kind);
}
-void Function::removeAttribute(unsigned i, StringRef Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttribute(getContext(), i, Kind);
- setAttributes(PAL);
+void Function::removeAttributeAtIndex(unsigned i, StringRef Kind) {
+ AttributeSets = AttributeSets.removeAttributeAtIndex(getContext(), i, Kind);
}
-void Function::removeAttributes(unsigned i, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeAttributes(getContext(), i, Attrs);
- setAttributes(PAL);
+void Function::removeFnAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.removeFnAttribute(getContext(), Kind);
+}
+
+void Function::removeFnAttr(StringRef Kind) {
+ AttributeSets = AttributeSets.removeFnAttribute(getContext(), Kind);
+}
+
+void Function::removeFnAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.removeFnAttributes(getContext(), Attrs);
+}
+
+void Function::removeRetAttr(Attribute::AttrKind Kind) {
+ AttributeSets = AttributeSets.removeRetAttribute(getContext(), Kind);
+}
+
+void Function::removeRetAttr(StringRef Kind) {
+ AttributeSets = AttributeSets.removeRetAttribute(getContext(), Kind);
+}
+
+void Function::removeRetAttrs(const AttrBuilder &Attrs) {
+ AttributeSets = AttributeSets.removeRetAttributes(getContext(), Attrs);
}
void Function::removeParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.removeParamAttribute(getContext(), ArgNo, Kind);
}
void Function::removeParamAttr(unsigned ArgNo, StringRef Kind) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttribute(getContext(), ArgNo, Kind);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.removeParamAttribute(getContext(), ArgNo, Kind);
}
void Function::removeParamAttrs(unsigned ArgNo, const AttrBuilder &Attrs) {
- AttributeList PAL = getAttributes();
- PAL = PAL.removeParamAttributes(getContext(), ArgNo, Attrs);
- setAttributes(PAL);
+ AttributeSets =
+ AttributeSets.removeParamAttributes(getContext(), ArgNo, Attrs);
+}
+
+void Function::addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes) {
+ AttributeSets =
+ AttributeSets.addDereferenceableParamAttr(getContext(), ArgNo, Bytes);
}
-void Function::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+bool Function::hasFnAttribute(Attribute::AttrKind Kind) const {
+ return AttributeSets.hasFnAttr(Kind);
}
-void Function::addDereferenceableParamAttr(unsigned ArgNo, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableParamAttr(getContext(), ArgNo, Bytes);
- setAttributes(PAL);
+bool Function::hasFnAttribute(StringRef Kind) const {
+ return AttributeSets.hasFnAttr(Kind);
+}
+
+bool Function::hasRetAttribute(Attribute::AttrKind Kind) const {
+ return AttributeSets.hasRetAttr(Kind);
+}
+
+bool Function::hasParamAttribute(unsigned ArgNo,
+ Attribute::AttrKind Kind) const {
+ return AttributeSets.hasParamAttr(ArgNo, Kind);
+}
+
+Attribute Function::getAttributeAtIndex(unsigned i,
+ Attribute::AttrKind Kind) const {
+ return AttributeSets.getAttributeAtIndex(i, Kind);
+}
+
+Attribute Function::getAttributeAtIndex(unsigned i, StringRef Kind) const {
+ return AttributeSets.getAttributeAtIndex(i, Kind);
}
-void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableOrNullAttr(getContext(), i, Bytes);
- setAttributes(PAL);
+Attribute Function::getFnAttribute(Attribute::AttrKind Kind) const {
+ return AttributeSets.getFnAttr(Kind);
+}
+
+Attribute Function::getFnAttribute(StringRef Kind) const {
+ return AttributeSets.getFnAttr(Kind);
+}
+
+/// gets the specified attribute from the list of attributes.
+Attribute Function::getParamAttribute(unsigned ArgNo,
+ Attribute::AttrKind Kind) const {
+ return AttributeSets.getParamAttr(ArgNo, Kind);
}
void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo,
uint64_t Bytes) {
- AttributeList PAL = getAttributes();
- PAL = PAL.addDereferenceableOrNullParamAttr(getContext(), ArgNo, Bytes);
- setAttributes(PAL);
+ AttributeSets = AttributeSets.addDereferenceableOrNullParamAttr(getContext(),
+ ArgNo, Bytes);
}
DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const {
@@ -936,7 +979,8 @@ enum IIT_Info {
IIT_BF16 = 48,
IIT_STRUCT9 = 49,
IIT_V256 = 50,
- IIT_AMX = 51
+ IIT_AMX = 51,
+ IIT_PPCF128 = 52
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -983,6 +1027,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_F128:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Quad, 0));
return;
+ case IIT_PPCF128:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::PPCQuad, 0));
+ return;
case IIT_I1:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 1));
return;
@@ -1207,6 +1254,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::Float: return Type::getFloatTy(Context);
case IITDescriptor::Double: return Type::getDoubleTy(Context);
case IITDescriptor::Quad: return Type::getFP128Ty(Context);
+ case IITDescriptor::PPCQuad: return Type::getPPC_FP128Ty(Context);
case IITDescriptor::Integer:
return IntegerType::get(Context, D.Integer_Width);
@@ -1389,6 +1437,7 @@ static bool matchIntrinsicType(
case IITDescriptor::Float: return !Ty->isFloatTy();
case IITDescriptor::Double: return !Ty->isDoubleTy();
case IITDescriptor::Quad: return !Ty->isFP128Ty();
+ case IITDescriptor::PPCQuad: return !Ty->isPPC_FP128Ty();
case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
case IITDescriptor::Vector: {
VectorType *VT = dyn_cast<VectorType>(Ty);
@@ -1403,11 +1452,6 @@ static bool matchIntrinsicType(
if (!PT->isOpaque())
return matchIntrinsicType(PT->getElementType(), Infos, ArgTys,
DeferredChecks, IsDeferredCheck);
- // If typed pointers are supported, do not allow using opaque pointer in
- // place of fixed pointer type. This would make the intrinsic signature
- // non-unique.
- if (Ty->getContext().supportsTypedPointers())
- return true;
// Consume IIT descriptors relating to the pointer element type.
while (Infos.front().Kind == IITDescriptor::Pointer)
Infos = Infos.slice(1);
@@ -1525,11 +1569,8 @@ static bool matchIntrinsicType(
if (!ThisArgType || !ReferenceType)
return true;
- if (!ThisArgType->isOpaque())
- return ThisArgType->getElementType() != ReferenceType->getElementType();
- // If typed pointers are supported, do not allow opaque pointer to ensure
- // uniqueness.
- return Ty->getContext().supportsTypedPointers();
+ return !ThisArgType->isOpaqueOrPointeeTypeMatches(
+ ReferenceType->getElementType());
}
case IITDescriptor::VecOfAnyPtrsToElt: {
unsigned RefArgNumber = D.getRefArgNumber();
@@ -1702,8 +1743,8 @@ Optional<Function *> Intrinsic::remangleIntrinsicFunction(Function *F) {
/// and llvm.compiler.used variables.
bool Function::hasAddressTaken(const User **PutOffender,
bool IgnoreCallbackUses,
- bool IgnoreAssumeLikeCalls,
- bool IgnoreLLVMUsed) const {
+ bool IgnoreAssumeLikeCalls, bool IgnoreLLVMUsed,
+ bool IgnoreARCAttachedCall) const {
for (const Use &U : uses()) {
const User *FU = U.getUser();
if (isa<BlockAddress>(FU))
@@ -1747,6 +1788,11 @@ bool Function::hasAddressTaken(const User **PutOffender,
return true;
}
if (!Call->isCallee(&U)) {
+ if (IgnoreARCAttachedCall &&
+ Call->isOperandBundleOfType(LLVMContext::OB_clang_arc_attachedcall,
+ U.getOperandNo()))
+ continue;
+
if (PutOffender)
*PutOffender = FU;
return true;
@@ -1846,10 +1892,9 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
void Function::setEntryCount(ProfileCount Count,
const DenseSet<GlobalValue::GUID> *S) {
- assert(Count.hasValue());
#if !defined(NDEBUG)
auto PrevCount = getEntryCount();
- assert(!PrevCount.hasValue() || PrevCount.getType() == Count.getType());
+ assert(!PrevCount.hasValue() || PrevCount->getType() == Count.getType());
#endif
auto ImportGUIDs = getImportGUIDs();
@@ -1867,7 +1912,7 @@ void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type,
setEntryCount(ProfileCount(Count, Type), Imports);
}
-ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
+Optional<ProfileCount> Function::getEntryCount(bool AllowSynthetic) const {
MDNode *MD = getMetadata(LLVMContext::MD_prof);
if (MD && MD->getOperand(0))
if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0))) {
@@ -1877,7 +1922,7 @@ ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
// A value of -1 is used for SamplePGO when there were no samples.
// Treat this the same as unknown.
if (Count == (uint64_t)-1)
- return ProfileCount::getInvalid();
+ return None;
return ProfileCount(Count, PCT_Real);
} else if (AllowSynthetic &&
MDS->getString().equals("synthetic_function_entry_count")) {
@@ -1886,7 +1931,7 @@ ProfileCount Function::getEntryCount(bool AllowSynthetic) const {
return ProfileCount(Count, PCT_Synthetic);
}
}
- return ProfileCount::getInvalid();
+ return None;
}
DenseSet<GlobalValue::GUID> Function::getImportGUIDs() const {
diff --git a/llvm/lib/IR/GCStrategy.cpp b/llvm/lib/IR/GCStrategy.cpp
index 25dad5bec9ef..f3bc5b74f8fd 100644
--- a/llvm/lib/IR/GCStrategy.cpp
+++ b/llvm/lib/IR/GCStrategy.cpp
@@ -18,3 +18,21 @@ using namespace llvm;
LLVM_INSTANTIATE_REGISTRY(GCRegistry)
GCStrategy::GCStrategy() = default;
+
+std::unique_ptr<GCStrategy> llvm::getGCStrategy(const StringRef Name) {
+ for (auto &S : GCRegistry::entries())
+ if (S.getName() == Name)
+ return S.instantiate();
+
+ if (GCRegistry::begin() == GCRegistry::end()) {
+ // In normal operation, the registry should not be empty. There should
+ // be the builtin GCs if nothing else. The most likely scenario here is
+ // that we got here without running the initializers used by the Registry
+ // itself and it's registration mechanism.
+ const std::string error =
+ std::string("unsupported GC: ") + Name.str() +
+ " (did you remember to link and initialize the library?)";
+ report_fatal_error(error);
+ } else
+ report_fatal_error(std::string("unsupported GC: ") + Name.str());
+}
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index b1c6dcc6672d..9f38288095e3 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -162,7 +162,7 @@ std::string GlobalValue::getGlobalIdentifier() const {
StringRef GlobalValue::getSection() const {
if (auto *GA = dyn_cast<GlobalAlias>(this)) {
// In general we cannot compute this at the IR level, but we try.
- if (const GlobalObject *GO = GA->getBaseObject())
+ if (const GlobalObject *GO = GA->getAliaseeObject())
return GO->getSection();
return "";
}
@@ -172,7 +172,7 @@ StringRef GlobalValue::getSection() const {
const Comdat *GlobalValue::getComdat() const {
if (auto *GA = dyn_cast<GlobalAlias>(this)) {
// In general we cannot compute this at the IR level, but we try.
- if (const GlobalObject *GO = GA->getBaseObject())
+ if (const GlobalObject *GO = GA->getAliaseeObject())
return const_cast<GlobalObject *>(GO)->getComdat();
return nullptr;
}
@@ -235,7 +235,7 @@ bool GlobalValue::isDeclaration() const {
return F->empty() && !F->isMaterializable();
// Aliases and ifuncs are always definitions.
- assert(isa<GlobalIndirectSymbol>(this));
+ assert(isa<GlobalAlias>(this) || isa<GlobalIFunc>(this));
return false;
}
@@ -280,14 +280,44 @@ bool GlobalObject::canIncreaseAlignment() const {
return true;
}
-const GlobalObject *GlobalValue::getBaseObject() const {
- if (auto *GO = dyn_cast<GlobalObject>(this))
+static const GlobalObject *
+findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) {
+ if (auto *GO = dyn_cast<GlobalObject>(C))
return GO;
- if (auto *GA = dyn_cast<GlobalIndirectSymbol>(this))
- return GA->getBaseObject();
+ if (auto *GA = dyn_cast<GlobalAlias>(C))
+ if (Aliases.insert(GA).second)
+ return findBaseObject(GA->getOperand(0), Aliases);
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ switch (CE->getOpcode()) {
+ case Instruction::Add: {
+ auto *LHS = findBaseObject(CE->getOperand(0), Aliases);
+ auto *RHS = findBaseObject(CE->getOperand(1), Aliases);
+ if (LHS && RHS)
+ return nullptr;
+ return LHS ? LHS : RHS;
+ }
+ case Instruction::Sub: {
+ if (findBaseObject(CE->getOperand(1), Aliases))
+ return nullptr;
+ return findBaseObject(CE->getOperand(0), Aliases);
+ }
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ return findBaseObject(CE->getOperand(0), Aliases);
+ default:
+ break;
+ }
+ }
return nullptr;
}
+const GlobalObject *GlobalValue::getAliaseeObject() const {
+ DenseSet<const GlobalAlias *> Aliases;
+ return findBaseObject(this, Aliases);
+}
+
bool GlobalValue::isAbsoluteSymbolRef() const {
auto *GO = dyn_cast<GlobalObject>(this);
if (!GO)
@@ -421,63 +451,15 @@ void GlobalVariable::dropAllReferences() {
}
//===----------------------------------------------------------------------===//
-// GlobalIndirectSymbol Implementation
-//===----------------------------------------------------------------------===//
-
-GlobalIndirectSymbol::GlobalIndirectSymbol(Type *Ty, ValueTy VTy,
- unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name,
- Constant *Symbol)
- : GlobalValue(Ty, VTy, &Op<0>(), 1, Linkage, Name, AddressSpace) {
- Op<0>() = Symbol;
-}
-
-static const GlobalObject *
-findBaseObject(const Constant *C, DenseSet<const GlobalAlias *> &Aliases) {
- if (auto *GO = dyn_cast<GlobalObject>(C))
- return GO;
- if (auto *GA = dyn_cast<GlobalAlias>(C))
- if (Aliases.insert(GA).second)
- return findBaseObject(GA->getOperand(0), Aliases);
- if (auto *CE = dyn_cast<ConstantExpr>(C)) {
- switch (CE->getOpcode()) {
- case Instruction::Add: {
- auto *LHS = findBaseObject(CE->getOperand(0), Aliases);
- auto *RHS = findBaseObject(CE->getOperand(1), Aliases);
- if (LHS && RHS)
- return nullptr;
- return LHS ? LHS : RHS;
- }
- case Instruction::Sub: {
- if (findBaseObject(CE->getOperand(1), Aliases))
- return nullptr;
- return findBaseObject(CE->getOperand(0), Aliases);
- }
- case Instruction::IntToPtr:
- case Instruction::PtrToInt:
- case Instruction::BitCast:
- case Instruction::GetElementPtr:
- return findBaseObject(CE->getOperand(0), Aliases);
- default:
- break;
- }
- }
- return nullptr;
-}
-
-const GlobalObject *GlobalIndirectSymbol::getBaseObject() const {
- DenseSet<const GlobalAlias *> Aliases;
- return findBaseObject(getOperand(0), Aliases);
-}
-
-//===----------------------------------------------------------------------===//
// GlobalAlias Implementation
//===----------------------------------------------------------------------===//
GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
const Twine &Name, Constant *Aliasee,
Module *ParentModule)
- : GlobalIndirectSymbol(Ty, Value::GlobalAliasVal, AddressSpace, Link, Name,
- Aliasee) {
+ : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name,
+ AddressSpace) {
+ setAliasee(Aliasee);
if (ParentModule)
ParentModule->getAliasList().push_back(this);
}
@@ -521,7 +503,12 @@ void GlobalAlias::eraseFromParent() {
void GlobalAlias::setAliasee(Constant *Aliasee) {
assert((!Aliasee || Aliasee->getType() == getType()) &&
"Alias and aliasee types should match!");
- setIndirectSymbol(Aliasee);
+ Op<0>().set(Aliasee);
+}
+
+const GlobalObject *GlobalAlias::getAliaseeObject() const {
+ DenseSet<const GlobalAlias *> Aliases;
+ return findBaseObject(getOperand(0), Aliases);
}
//===----------------------------------------------------------------------===//
@@ -531,8 +518,9 @@ void GlobalAlias::setAliasee(Constant *Aliasee) {
GlobalIFunc::GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
const Twine &Name, Constant *Resolver,
Module *ParentModule)
- : GlobalIndirectSymbol(Ty, Value::GlobalIFuncVal, AddressSpace, Link, Name,
- Resolver) {
+ : GlobalObject(Ty, Value::GlobalIFuncVal, &Op<0>(), 1, Link, Name,
+ AddressSpace) {
+ setResolver(Resolver);
if (ParentModule)
ParentModule->getIFuncList().push_back(this);
}
@@ -550,3 +538,8 @@ void GlobalIFunc::removeFromParent() {
void GlobalIFunc::eraseFromParent() {
getParent()->getIFuncList().erase(getIterator());
}
+
+const Function *GlobalIFunc::getResolverFunction() const {
+ DenseSet<const GlobalAlias *> Aliases;
+ return dyn_cast<Function>(findBaseObject(getResolver(), Aliases));
+}
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 0f4945bad5ab..98f6ccf81973 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -94,11 +94,22 @@ Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) {
}
Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) {
- if (isa<ScalableVectorType>(DstType))
- return CreateIntrinsic(Intrinsic::experimental_stepvector, {DstType}, {},
- nullptr, Name);
-
Type *STy = DstType->getScalarType();
+ if (isa<ScalableVectorType>(DstType)) {
+ Type *StepVecType = DstType;
+ // TODO: We expect this special case (element type < 8 bits) to be
+ // temporary - once the intrinsic properly supports < 8 bits this code
+ // can be removed.
+ if (STy->getScalarSizeInBits() < 8)
+ StepVecType =
+ VectorType::get(getInt8Ty(), cast<ScalableVectorType>(DstType));
+ Value *Res = CreateIntrinsic(Intrinsic::experimental_stepvector,
+ {StepVecType}, {}, nullptr, Name);
+ if (StepVecType != DstType)
+ Res = CreateTrunc(Res, DstType);
+ return Res;
+ }
+
unsigned NumEls = cast<FixedVectorType>(DstType)->getNumElements();
// Create a vector of consecutive numbers from zero to VF.
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 937dc6957806..a4659da7e807 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -141,6 +141,10 @@ bool Instruction::hasNoSignedWrap() const {
return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
}
+bool Instruction::hasPoisonGeneratingFlags() const {
+ return cast<Operator>(this)->hasPoisonGeneratingFlags();
+}
+
void Instruction::dropPoisonGeneratingFlags() {
switch (getOpcode()) {
case Instruction::Add:
@@ -163,6 +167,8 @@ void Instruction::dropPoisonGeneratingFlags() {
break;
}
// TODO: FastMathFlags!
+
+ assert(!hasPoisonGeneratingFlags() && "must be kept in sync");
}
void Instruction::dropUndefImplyingAttrsAndUnknownMetadata(
@@ -178,9 +184,9 @@ void Instruction::dropUndefImplyingAttrsAndUnknownMetadata(
if (AL.isEmpty())
return;
AttrBuilder UBImplyingAttributes = AttributeFuncs::getUBImplyingAttributes();
- for (unsigned ArgNo = 0; ArgNo < CB->getNumArgOperands(); ArgNo++)
+ for (unsigned ArgNo = 0; ArgNo < CB->arg_size(); ArgNo++)
CB->removeParamAttrs(ArgNo, UBImplyingAttributes);
- CB->removeAttributes(AttributeList::ReturnIndex, UBImplyingAttributes);
+ CB->removeRetAttrs(UBImplyingAttributes);
}
bool Instruction::isExact() const {
@@ -307,20 +313,20 @@ void Instruction::copyIRFlags(const Value *V, bool IncludeWrapFlags) {
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(V))
if (auto *DestGEP = dyn_cast<GetElementPtrInst>(this))
- DestGEP->setIsInBounds(SrcGEP->isInBounds() | DestGEP->isInBounds());
+ DestGEP->setIsInBounds(SrcGEP->isInBounds() || DestGEP->isInBounds());
}
void Instruction::andIRFlags(const Value *V) {
if (auto *OB = dyn_cast<OverflowingBinaryOperator>(V)) {
if (isa<OverflowingBinaryOperator>(this)) {
- setHasNoSignedWrap(hasNoSignedWrap() & OB->hasNoSignedWrap());
- setHasNoUnsignedWrap(hasNoUnsignedWrap() & OB->hasNoUnsignedWrap());
+ setHasNoSignedWrap(hasNoSignedWrap() && OB->hasNoSignedWrap());
+ setHasNoUnsignedWrap(hasNoUnsignedWrap() && OB->hasNoUnsignedWrap());
}
}
if (auto *PE = dyn_cast<PossiblyExactOperator>(V))
if (isa<PossiblyExactOperator>(this))
- setIsExact(isExact() & PE->isExact());
+ setIsExact(isExact() && PE->isExact());
if (auto *FP = dyn_cast<FPMathOperator>(V)) {
if (isa<FPMathOperator>(this)) {
@@ -332,7 +338,7 @@ void Instruction::andIRFlags(const Value *V) {
if (auto *SrcGEP = dyn_cast<GetElementPtrInst>(V))
if (auto *DestGEP = dyn_cast<GetElementPtrInst>(this))
- DestGEP->setIsInBounds(SrcGEP->isInBounds() & DestGEP->isInBounds());
+ DestGEP->setIsInBounds(SrcGEP->isInBounds() && DestGEP->isInBounds());
}
const char *Instruction::getOpcodeName(unsigned OpCode) {
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 5b01c70dec8d..c42df49d97ea 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -318,9 +318,8 @@ bool CallBase::isReturnNonNull() const {
if (hasRetAttr(Attribute::NonNull))
return true;
- if (getDereferenceableBytes(AttributeList::ReturnIndex) > 0 &&
- !NullPointerIsDefined(getCaller(),
- getType()->getPointerAddressSpace()))
+ if (getRetDereferenceableBytes() > 0 &&
+ !NullPointerIsDefined(getCaller(), getType()->getPointerAddressSpace()))
return true;
return false;
@@ -329,11 +328,10 @@ bool CallBase::isReturnNonNull() const {
Value *CallBase::getReturnedArgOperand() const {
unsigned Index;
- if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index) && Index)
+ if (Attrs.hasAttrSomewhere(Attribute::Returned, &Index))
return getArgOperand(Index - AttributeList::FirstArgIndex);
if (const Function *F = getCalledFunction())
- if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index) &&
- Index)
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Returned, &Index))
return getArgOperand(Index - AttributeList::FirstArgIndex);
return nullptr;
@@ -341,24 +339,36 @@ Value *CallBase::getReturnedArgOperand() const {
/// Determine whether the argument or parameter has the given attribute.
bool CallBase::paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
- assert(ArgNo < getNumArgOperands() && "Param index out of bounds!");
+ assert(ArgNo < arg_size() && "Param index out of bounds!");
- if (Attrs.hasParamAttribute(ArgNo, Kind))
+ if (Attrs.hasParamAttr(ArgNo, Kind))
return true;
if (const Function *F = getCalledFunction())
- return F->getAttributes().hasParamAttribute(ArgNo, Kind);
+ return F->getAttributes().hasParamAttr(ArgNo, Kind);
return false;
}
bool CallBase::hasFnAttrOnCalledFunction(Attribute::AttrKind Kind) const {
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasFnAttribute(Kind);
+ Value *V = getCalledOperand();
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == BitCast)
+ V = CE->getOperand(0);
+
+ if (auto *F = dyn_cast<Function>(V))
+ return F->getAttributes().hasFnAttr(Kind);
+
return false;
}
bool CallBase::hasFnAttrOnCalledFunction(StringRef Kind) const {
- if (const Function *F = getCalledFunction())
- return F->getAttributes().hasFnAttribute(Kind);
+ Value *V = getCalledOperand();
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (CE->getOpcode() == BitCast)
+ V = CE->getOperand(0);
+
+ if (auto *F = dyn_cast<Function>(V))
+ return F->getAttributes().hasFnAttr(Kind);
+
return false;
}
@@ -933,7 +943,7 @@ void CallBrInst::updateArgBlockAddresses(unsigned i, BasicBlock *B) {
if (BasicBlock *OldBB = getIndirectDest(i)) {
BlockAddress *Old = BlockAddress::get(OldBB);
BlockAddress *New = BlockAddress::get(B);
- for (unsigned ArgNo = 0, e = getNumArgOperands(); ArgNo != e; ++ArgNo)
+ for (unsigned ArgNo = 0, e = arg_size(); ArgNo != e; ++ArgNo)
if (dyn_cast<BlockAddress>(getArgOperand(ArgNo)) == Old)
setArgOperand(ArgNo, New);
}
@@ -1909,6 +1919,32 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
// ShuffleVectorInst Implementation
//===----------------------------------------------------------------------===//
+static Value *createPlaceholderForShuffleVector(Value *V) {
+ assert(V && "Cannot create placeholder of nullptr V");
+ return PoisonValue::get(V->getType());
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *Mask, const Twine &Name,
+ Instruction *InsertBefore)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertBefore) {}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *Mask, const Twine &Name,
+ BasicBlock *InsertAtEnd)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertAtEnd) {}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, ArrayRef<int> Mask,
+ const Twine &Name,
+ Instruction *InsertBefore)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertBefore) {}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, ArrayRef<int> Mask,
+ const Twine &Name, BasicBlock *InsertAtEnd)
+ : ShuffleVectorInst(V1, createPlaceholderForShuffleVector(V1), Mask, Name,
+ InsertAtEnd) {}
+
ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
const Twine &Name,
Instruction *InsertBefore)
@@ -2259,6 +2295,80 @@ bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef<int> Mask,
return false;
}
+bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
+ int NumSrcElts, int &NumSubElts,
+ int &Index) {
+ int NumMaskElts = Mask.size();
+
+ // Don't try to match if we're shuffling to a smaller size.
+ if (NumMaskElts < NumSrcElts)
+ return false;
+
+ // TODO: We don't recognize self-insertion/widening.
+ if (isSingleSourceMaskImpl(Mask, NumSrcElts))
+ return false;
+
+ // Determine which mask elements are attributed to which source.
+ APInt UndefElts = APInt::getZero(NumMaskElts);
+ APInt Src0Elts = APInt::getZero(NumMaskElts);
+ APInt Src1Elts = APInt::getZero(NumMaskElts);
+ bool Src0Identity = true;
+ bool Src1Identity = true;
+
+ for (int i = 0; i != NumMaskElts; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ UndefElts.setBit(i);
+ continue;
+ }
+ if (M < NumSrcElts) {
+ Src0Elts.setBit(i);
+ Src0Identity &= (M == i);
+ continue;
+ }
+ Src1Elts.setBit(i);
+ Src1Identity &= (M == (i + NumSrcElts));
+ continue;
+ }
+ assert((Src0Elts | Src1Elts | UndefElts).isAllOnes() &&
+ "unknown shuffle elements");
+ assert(!Src0Elts.isZero() && !Src1Elts.isZero() &&
+ "2-source shuffle not found");
+
+ // Determine lo/hi span ranges.
+ // TODO: How should we handle undefs at the start of subvector insertions?
+ int Src0Lo = Src0Elts.countTrailingZeros();
+ int Src1Lo = Src1Elts.countTrailingZeros();
+ int Src0Hi = NumMaskElts - Src0Elts.countLeadingZeros();
+ int Src1Hi = NumMaskElts - Src1Elts.countLeadingZeros();
+
+ // If src0 is in place, see if the src1 elements is inplace within its own
+ // span.
+ if (Src0Identity) {
+ int NumSub1Elts = Src1Hi - Src1Lo;
+ ArrayRef<int> Sub1Mask = Mask.slice(Src1Lo, NumSub1Elts);
+ if (isIdentityMaskImpl(Sub1Mask, NumSrcElts)) {
+ NumSubElts = NumSub1Elts;
+ Index = Src1Lo;
+ return true;
+ }
+ }
+
+ // If src1 is in place, see if the src0 elements is inplace within its own
+ // span.
+ if (Src1Identity) {
+ int NumSub0Elts = Src0Hi - Src0Lo;
+ ArrayRef<int> Sub0Mask = Mask.slice(Src0Lo, NumSub0Elts);
+ if (isIdentityMaskImpl(Sub0Mask, NumSrcElts)) {
+ NumSubElts = NumSub0Elts;
+ Index = Src0Lo;
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool ShuffleVectorInst::isIdentityWithPadding() const {
if (isa<UndefValue>(Op<2>()))
return false;
@@ -2326,6 +2436,87 @@ bool ShuffleVectorInst::isConcat() const {
return isIdentityMaskImpl(getShuffleMask(), NumMaskElts);
}
+static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
+ int ReplicationFactor, int VF) {
+ assert(Mask.size() == (unsigned)ReplicationFactor * VF &&
+ "Unexpected mask size.");
+
+ for (int CurrElt : seq(0, VF)) {
+ ArrayRef<int> CurrSubMask = Mask.take_front(ReplicationFactor);
+ assert(CurrSubMask.size() == (unsigned)ReplicationFactor &&
+ "Run out of mask?");
+ Mask = Mask.drop_front(ReplicationFactor);
+ if (!all_of(CurrSubMask, [CurrElt](int MaskElt) {
+ return MaskElt == UndefMaskElem || MaskElt == CurrElt;
+ }))
+ return false;
+ }
+ assert(Mask.empty() && "Did not consume the whole mask?");
+
+ return true;
+}
+
+bool ShuffleVectorInst::isReplicationMask(ArrayRef<int> Mask,
+ int &ReplicationFactor, int &VF) {
+ // undef-less case is trivial.
+ if (none_of(Mask, [](int MaskElt) { return MaskElt == UndefMaskElem; })) {
+ ReplicationFactor =
+ Mask.take_while([](int MaskElt) { return MaskElt == 0; }).size();
+ if (ReplicationFactor == 0 || Mask.size() % ReplicationFactor != 0)
+ return false;
+ VF = Mask.size() / ReplicationFactor;
+ return isReplicationMaskWithParams(Mask, ReplicationFactor, VF);
+ }
+
+ // However, if the mask contains undef's, we have to enumerate possible tuples
+ // and pick one. There are bounds on replication factor: [1, mask size]
+ // (where RF=1 is an identity shuffle, RF=mask size is a broadcast shuffle)
+ // Additionally, mask size is a replication factor multiplied by vector size,
+ // which further significantly reduces the search space.
+
+ // Before doing that, let's perform basic sanity check first.
+ int Largest = -1;
+ for (int MaskElt : Mask) {
+ if (MaskElt == UndefMaskElem)
+ continue;
+ // Elements must be in non-decreasing order.
+ if (MaskElt < Largest)
+ return false;
+ Largest = std::max(Largest, MaskElt);
+ }
+
+ // Prefer larger replication factor if all else equal.
+ for (int PossibleReplicationFactor :
+ reverse(seq_inclusive<unsigned>(1, Mask.size()))) {
+ if (Mask.size() % PossibleReplicationFactor != 0)
+ continue;
+ int PossibleVF = Mask.size() / PossibleReplicationFactor;
+ if (!isReplicationMaskWithParams(Mask, PossibleReplicationFactor,
+ PossibleVF))
+ continue;
+ ReplicationFactor = PossibleReplicationFactor;
+ VF = PossibleVF;
+ return true;
+ }
+
+ return false;
+}
+
+bool ShuffleVectorInst::isReplicationMask(int &ReplicationFactor,
+ int &VF) const {
+ // Not possible to express a shuffle mask for a scalable vector for this
+ // case.
+ if (isa<ScalableVectorType>(getType()))
+ return false;
+
+ VF = cast<FixedVectorType>(Op<0>()->getType())->getNumElements();
+ if (ShuffleMask.size() % VF != 0)
+ return false;
+ ReplicationFactor = ShuffleMask.size() / VF;
+
+ return isReplicationMaskWithParams(ShuffleMask, ReplicationFactor, VF);
+}
+
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//
@@ -3945,6 +4136,35 @@ bool CmpInst::isSigned(Predicate predicate) {
}
}
+bool ICmpInst::compare(const APInt &LHS, const APInt &RHS,
+ ICmpInst::Predicate Pred) {
+ assert(ICmpInst::isIntPredicate(Pred) && "Only for integer predicates!");
+ switch (Pred) {
+ case ICmpInst::Predicate::ICMP_EQ:
+ return LHS.eq(RHS);
+ case ICmpInst::Predicate::ICMP_NE:
+ return LHS.ne(RHS);
+ case ICmpInst::Predicate::ICMP_UGT:
+ return LHS.ugt(RHS);
+ case ICmpInst::Predicate::ICMP_UGE:
+ return LHS.uge(RHS);
+ case ICmpInst::Predicate::ICMP_ULT:
+ return LHS.ult(RHS);
+ case ICmpInst::Predicate::ICMP_ULE:
+ return LHS.ule(RHS);
+ case ICmpInst::Predicate::ICMP_SGT:
+ return LHS.sgt(RHS);
+ case ICmpInst::Predicate::ICMP_SGE:
+ return LHS.sge(RHS);
+ case ICmpInst::Predicate::ICMP_SLT:
+ return LHS.slt(RHS);
+ case ICmpInst::Predicate::ICMP_SLE:
+ return LHS.sle(RHS);
+ default:
+ llvm_unreachable("Unexpected non-integer predicate.");
+ };
+}
+
CmpInst::Predicate CmpInst::getFlippedSignednessPredicate(Predicate pred) {
assert(CmpInst::isRelational(pred) &&
"Call only with non-equality predicates!");
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index 19942fa187fd..7552906fd07a 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -188,26 +188,26 @@ Value *InstrProfIncrementInst::getStep() const {
}
Optional<RoundingMode> ConstrainedFPIntrinsic::getRoundingMode() const {
- unsigned NumOperands = getNumArgOperands();
+ unsigned NumOperands = arg_size();
Metadata *MD = nullptr;
auto *MAV = dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2));
if (MAV)
MD = MAV->getMetadata();
if (!MD || !isa<MDString>(MD))
return None;
- return StrToRoundingMode(cast<MDString>(MD)->getString());
+ return convertStrToRoundingMode(cast<MDString>(MD)->getString());
}
Optional<fp::ExceptionBehavior>
ConstrainedFPIntrinsic::getExceptionBehavior() const {
- unsigned NumOperands = getNumArgOperands();
+ unsigned NumOperands = arg_size();
Metadata *MD = nullptr;
auto *MAV = dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1));
if (MAV)
MD = MAV->getMetadata();
if (!MD || !isa<MDString>(MD))
return None;
- return StrToExceptionBehavior(cast<MDString>(MD)->getString());
+ return convertStrToExceptionBehavior(cast<MDString>(MD)->getString());
}
bool ConstrainedFPIntrinsic::isDefaultFPEnvironment() const {
@@ -473,8 +473,17 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
assert(isVPIntrinsic(VPID) && "not a VP intrinsic");
Function *VPFunc;
switch (VPID) {
- default:
- VPFunc = Intrinsic::getDeclaration(M, VPID, Params[0]->getType());
+ default: {
+ Type *OverloadTy = Params[0]->getType();
+ if (VPReductionIntrinsic::isVPReduction(VPID))
+ OverloadTy =
+ Params[*VPReductionIntrinsic::getVectorParamPos(VPID)]->getType();
+
+ VPFunc = Intrinsic::getDeclaration(M, VPID, OverloadTy);
+ break;
+ }
+ case Intrinsic::vp_select:
+ VPFunc = Intrinsic::getDeclaration(M, VPID, {Params[1]->getType()});
break;
case Intrinsic::vp_load:
VPFunc = Intrinsic::getDeclaration(
@@ -504,6 +513,48 @@ Function *VPIntrinsic::getDeclarationForParams(Module *M, Intrinsic::ID VPID,
return VPFunc;
}
+bool VPReductionIntrinsic::isVPReduction(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ return false;
+#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
+ case Intrinsic::VPID: \
+ break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return true;
+}
+
+unsigned VPReductionIntrinsic::getVectorParamPos() const {
+ return *VPReductionIntrinsic::getVectorParamPos(getIntrinsicID());
+}
+
+unsigned VPReductionIntrinsic::getStartParamPos() const {
+ return *VPReductionIntrinsic::getStartParamPos(getIntrinsicID());
+}
+
+Optional<unsigned> VPReductionIntrinsic::getVectorParamPos(Intrinsic::ID ID) {
+ switch (ID) {
+#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
+ case Intrinsic::VPID: \
+ return VECTORPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ default:
+ return None;
+ }
+}
+
+Optional<unsigned> VPReductionIntrinsic::getStartParamPos(Intrinsic::ID ID) {
+ switch (ID) {
+#define HANDLE_VP_REDUCTION(VPID, STARTPOS, VECTORPOS) \
+ case Intrinsic::VPID: \
+ return STARTPOS;
+#include "llvm/IR/VPIntrinsics.def"
+ default:
+ return None;
+ }
+}
+
Instruction::BinaryOps BinaryOpIntrinsic::getBinaryOp() const {
switch (getIntrinsicID()) {
case Intrinsic::uadd_with_overflow:
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index c4a713db455b..90716d9c81a6 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -348,6 +348,12 @@ std::unique_ptr<DiagnosticHandler> LLVMContext::getDiagnosticHandler() {
return std::move(pImpl->DiagHandler);
}
+void LLVMContext::enableOpaquePointers() const {
+ assert(pImpl->PointerTypes.empty() && pImpl->ASPointerTypes.empty() &&
+ "Must be called before creating any pointer types");
+ pImpl->setOpaquePointers(true);
+}
+
bool LLVMContext::supportsTypedPointers() const {
- return !pImpl->ForceOpaquePointers;
+ return !pImpl->getOpaquePointers();
}
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index 99819602c545..ebbf382aea38 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -23,9 +23,8 @@
using namespace llvm;
static cl::opt<bool>
- ForceOpaquePointersCL("force-opaque-pointers",
- cl::desc("Force all pointers to be opaque pointers"),
- cl::init(false));
+ OpaquePointersCL("opaque-pointers", cl::desc("Use opaque pointers"),
+ cl::init(false));
LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
: DiagHandler(std::make_unique<DiagnosticHandler>()),
@@ -36,8 +35,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID),
PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID),
X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8),
- Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128),
- ForceOpaquePointers(ForceOpaquePointersCL) {}
+ Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {}
LLVMContextImpl::~LLVMContextImpl() {
// NOTE: We need to delete the contents of OwnedModules, but Module's dtor
@@ -55,8 +53,15 @@ LLVMContextImpl::~LLVMContextImpl() {
// Drop references for MDNodes. Do this before Values get deleted to avoid
// unnecessary RAUW when nodes are still unresolved.
- for (auto *I : DistinctMDNodes)
+ for (auto *I : DistinctMDNodes) {
+ // We may have DIArgList that were uniqued, and as it has a custom
+ // implementation of dropAllReferences, it needs to be explicitly invoked.
+ if (auto *AL = dyn_cast<DIArgList>(I)) {
+ AL->dropAllReferences();
+ continue;
+ }
I->dropAllReferences();
+ }
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
for (auto *I : CLASS##s) \
I->dropAllReferences();
@@ -227,3 +232,11 @@ OptPassGate &LLVMContextImpl::getOptPassGate() const {
void LLVMContextImpl::setOptPassGate(OptPassGate& OPG) {
this->OPG = &OPG;
}
+
+bool LLVMContextImpl::getOpaquePointers() {
+ if (LLVM_UNLIKELY(!(OpaquePointers.hasValue())))
+ OpaquePointers = OpaquePointersCL;
+ return *OpaquePointers;
+}
+
+void LLVMContextImpl::setOpaquePointers(bool OP) { OpaquePointers = OP; }
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 2ae23fdc95a8..b2909c425846 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -61,7 +61,9 @@ using DenseMapAPIntKeyInfo = DenseMapInfo<APInt>;
struct DenseMapAPFloatKeyInfo {
static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); }
- static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus(), 2); }
+ static inline APFloat getTombstoneKey() {
+ return APFloat(APFloat::Bogus(), 2);
+ }
static unsigned getHashValue(const APFloat &Key) {
return static_cast<unsigned>(hash_value(Key));
@@ -74,46 +76,42 @@ struct DenseMapAPFloatKeyInfo {
struct AnonStructTypeKeyInfo {
struct KeyTy {
- ArrayRef<Type*> ETypes;
+ ArrayRef<Type *> ETypes;
bool isPacked;
- KeyTy(const ArrayRef<Type*>& E, bool P) :
- ETypes(E), isPacked(P) {}
+ KeyTy(const ArrayRef<Type *> &E, bool P) : ETypes(E), isPacked(P) {}
KeyTy(const StructType *ST)
: ETypes(ST->elements()), isPacked(ST->isPacked()) {}
- bool operator==(const KeyTy& that) const {
+ bool operator==(const KeyTy &that) const {
if (isPacked != that.isPacked)
return false;
if (ETypes != that.ETypes)
return false;
return true;
}
- bool operator!=(const KeyTy& that) const {
- return !this->operator==(that);
- }
+ bool operator!=(const KeyTy &that) const { return !this->operator==(that); }
};
- static inline StructType* getEmptyKey() {
- return DenseMapInfo<StructType*>::getEmptyKey();
+ static inline StructType *getEmptyKey() {
+ return DenseMapInfo<StructType *>::getEmptyKey();
}
- static inline StructType* getTombstoneKey() {
- return DenseMapInfo<StructType*>::getTombstoneKey();
+ static inline StructType *getTombstoneKey() {
+ return DenseMapInfo<StructType *>::getTombstoneKey();
}
- static unsigned getHashValue(const KeyTy& Key) {
- return hash_combine(hash_combine_range(Key.ETypes.begin(),
- Key.ETypes.end()),
- Key.isPacked);
+ static unsigned getHashValue(const KeyTy &Key) {
+ return hash_combine(
+ hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), Key.isPacked);
}
static unsigned getHashValue(const StructType *ST) {
return getHashValue(KeyTy(ST));
}
- static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
+ static bool isEqual(const KeyTy &LHS, const StructType *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
return LHS == KeyTy(RHS);
@@ -127,16 +125,16 @@ struct AnonStructTypeKeyInfo {
struct FunctionTypeKeyInfo {
struct KeyTy {
const Type *ReturnType;
- ArrayRef<Type*> Params;
+ ArrayRef<Type *> Params;
bool isVarArg;
- KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
- ReturnType(R), Params(P), isVarArg(V) {}
+ KeyTy(const Type *R, const ArrayRef<Type *> &P, bool V)
+ : ReturnType(R), Params(P), isVarArg(V) {}
KeyTy(const FunctionType *FT)
: ReturnType(FT->getReturnType()), Params(FT->params()),
isVarArg(FT->isVarArg()) {}
- bool operator==(const KeyTy& that) const {
+ bool operator==(const KeyTy &that) const {
if (ReturnType != that.ReturnType)
return false;
if (isVarArg != that.isVarArg)
@@ -145,31 +143,28 @@ struct FunctionTypeKeyInfo {
return false;
return true;
}
- bool operator!=(const KeyTy& that) const {
- return !this->operator==(that);
- }
+ bool operator!=(const KeyTy &that) const { return !this->operator==(that); }
};
- static inline FunctionType* getEmptyKey() {
- return DenseMapInfo<FunctionType*>::getEmptyKey();
+ static inline FunctionType *getEmptyKey() {
+ return DenseMapInfo<FunctionType *>::getEmptyKey();
}
- static inline FunctionType* getTombstoneKey() {
- return DenseMapInfo<FunctionType*>::getTombstoneKey();
+ static inline FunctionType *getTombstoneKey() {
+ return DenseMapInfo<FunctionType *>::getTombstoneKey();
}
- static unsigned getHashValue(const KeyTy& Key) {
- return hash_combine(Key.ReturnType,
- hash_combine_range(Key.Params.begin(),
- Key.Params.end()),
- Key.isVarArg);
+ static unsigned getHashValue(const KeyTy &Key) {
+ return hash_combine(
+ Key.ReturnType,
+ hash_combine_range(Key.Params.begin(), Key.Params.end()), Key.isVarArg);
}
static unsigned getHashValue(const FunctionType *FT) {
return getHashValue(KeyTy(FT));
}
- static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
+ static bool isEqual(const KeyTy &LHS, const FunctionType *RHS) {
if (RHS == getEmptyKey() || RHS == getTombstoneKey())
return false;
return LHS == KeyTy(RHS);
@@ -412,14 +407,14 @@ template <> struct MDNodeKeyImpl<DIBasicType> {
Encoding(Encoding), Flags(Flags) {}
MDNodeKeyImpl(const DIBasicType *N)
: Tag(N->getTag()), Name(N->getRawName()), SizeInBits(N->getSizeInBits()),
- AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()), Flags(N->getFlags()) {}
+ AlignInBits(N->getAlignInBits()), Encoding(N->getEncoding()),
+ Flags(N->getFlags()) {}
bool isKeyOf(const DIBasicType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
SizeInBits == RHS->getSizeInBits() &&
AlignInBits == RHS->getAlignInBits() &&
- Encoding == RHS->getEncoding() &&
- Flags == RHS->getFlags();
+ Encoding == RHS->getEncoding() && Flags == RHS->getFlags();
}
unsigned getHashValue() const {
@@ -471,23 +466,24 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
Optional<unsigned> DWARFAddressSpace;
unsigned Flags;
Metadata *ExtraData;
+ Metadata *Annotations;
MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
uint32_t AlignInBits, uint64_t OffsetInBits,
Optional<unsigned> DWARFAddressSpace, unsigned Flags,
- Metadata *ExtraData)
+ Metadata *ExtraData, Metadata *Annotations)
: Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
AlignInBits(AlignInBits), DWARFAddressSpace(DWARFAddressSpace),
- Flags(Flags), ExtraData(ExtraData) {}
+ Flags(Flags), ExtraData(ExtraData), Annotations(Annotations) {}
MDNodeKeyImpl(const DIDerivedType *N)
: Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Scope(N->getRawScope()),
BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()),
DWARFAddressSpace(N->getDWARFAddressSpace()), Flags(N->getFlags()),
- ExtraData(N->getRawExtraData()) {}
+ ExtraData(N->getRawExtraData()), Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DIDerivedType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
@@ -497,8 +493,8 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
AlignInBits == RHS->getAlignInBits() &&
OffsetInBits == RHS->getOffsetInBits() &&
DWARFAddressSpace == RHS->getDWARFAddressSpace() &&
- Flags == RHS->getFlags() &&
- ExtraData == RHS->getRawExtraData();
+ Flags == RHS->getFlags() && ExtraData == RHS->getRawExtraData() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -525,7 +521,8 @@ template <> struct MDNodeSubsetEqualImpl<DIDerivedType> {
return isODRMember(LHS.Tag, LHS.Scope, LHS.Name, RHS);
}
- static bool isSubsetEqual(const DIDerivedType *LHS, const DIDerivedType *RHS) {
+ static bool isSubsetEqual(const DIDerivedType *LHS,
+ const DIDerivedType *RHS) {
return isODRMember(LHS->getTag(), LHS->getRawScope(), LHS->getRawName(),
RHS);
}
@@ -569,6 +566,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Metadata *Associated;
Metadata *Allocated;
Metadata *Rank;
+ Metadata *Annotations;
MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
@@ -577,14 +575,15 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Metadata *VTableHolder, Metadata *TemplateParams,
MDString *Identifier, Metadata *Discriminator,
Metadata *DataLocation, Metadata *Associated,
- Metadata *Allocated, Metadata *Rank)
+ Metadata *Allocated, Metadata *Rank, Metadata *Annotations)
: Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
AlignInBits(AlignInBits), Flags(Flags), Elements(Elements),
RuntimeLang(RuntimeLang), VTableHolder(VTableHolder),
TemplateParams(TemplateParams), Identifier(Identifier),
Discriminator(Discriminator), DataLocation(DataLocation),
- Associated(Associated), Allocated(Allocated), Rank(Rank) {}
+ Associated(Associated), Allocated(Allocated), Rank(Rank),
+ Annotations(Annotations) {}
MDNodeKeyImpl(const DICompositeType *N)
: Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Scope(N->getRawScope()),
@@ -597,7 +596,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Discriminator(N->getRawDiscriminator()),
DataLocation(N->getRawDataLocation()),
Associated(N->getRawAssociated()), Allocated(N->getRawAllocated()),
- Rank(N->getRawRank()) {}
+ Rank(N->getRawRank()), Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DICompositeType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
@@ -614,7 +613,8 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
Discriminator == RHS->getRawDiscriminator() &&
DataLocation == RHS->getRawDataLocation() &&
Associated == RHS->getRawAssociated() &&
- Allocated == RHS->getRawAllocated() && Rank == RHS->getRawRank();
+ Allocated == RHS->getRawAllocated() && Rank == RHS->getRawRank() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -623,7 +623,7 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
// collision "most of the time". There is no correctness issue in case of
// collision because of the full check above.
return hash_combine(Name, File, Line, BaseType, Scope, Elements,
- TemplateParams);
+ TemplateParams, Annotations);
}
};
@@ -663,14 +663,13 @@ template <> struct MDNodeKeyImpl<DIFile> {
bool isKeyOf(const DIFile *RHS) const {
return Filename == RHS->getRawFilename() &&
Directory == RHS->getRawDirectory() &&
- Checksum == RHS->getRawChecksum() &&
- Source == RHS->getRawSource();
+ Checksum == RHS->getRawChecksum() && Source == RHS->getRawSource();
}
unsigned getHashValue() const {
- return hash_combine(
- Filename, Directory, Checksum ? Checksum->Kind : 0,
- Checksum ? Checksum->Value : nullptr, Source.getValueOr(nullptr));
+ return hash_combine(Filename, Directory, Checksum ? Checksum->Kind : 0,
+ Checksum ? Checksum->Value : nullptr,
+ Source.getValueOr(nullptr));
}
};
@@ -692,6 +691,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
Metadata *Declaration;
Metadata *RetainedNodes;
Metadata *ThrownTypes;
+ Metadata *Annotations;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
@@ -699,13 +699,14 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
unsigned SPFlags, Metadata *Unit, Metadata *TemplateParams,
Metadata *Declaration, Metadata *RetainedNodes,
- Metadata *ThrownTypes)
+ Metadata *ThrownTypes, Metadata *Annotations)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
Line(Line), Type(Type), ScopeLine(ScopeLine),
ContainingType(ContainingType), VirtualIndex(VirtualIndex),
ThisAdjustment(ThisAdjustment), Flags(Flags), SPFlags(SPFlags),
Unit(Unit), TemplateParams(TemplateParams), Declaration(Declaration),
- RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes) {}
+ RetainedNodes(RetainedNodes), ThrownTypes(ThrownTypes),
+ Annotations(Annotations) {}
MDNodeKeyImpl(const DISubprogram *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
@@ -717,7 +718,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
TemplateParams(N->getRawTemplateParams()),
Declaration(N->getRawDeclaration()),
RetainedNodes(N->getRawRetainedNodes()),
- ThrownTypes(N->getRawThrownTypes()) {}
+ ThrownTypes(N->getRawThrownTypes()),
+ Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DISubprogram *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
@@ -732,7 +734,8 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
TemplateParams == RHS->getRawTemplateParams() &&
Declaration == RHS->getRawDeclaration() &&
RetainedNodes == RHS->getRawRetainedNodes() &&
- ThrownTypes == RHS->getRawThrownTypes();
+ ThrownTypes == RHS->getRawThrownTypes() &&
+ Annotations == RHS->getRawAnnotations();
}
bool isDefinition() const { return SPFlags & DISubprogram::SPFlagDefinition; }
@@ -853,9 +856,7 @@ template <> struct MDNodeKeyImpl<DINamespace> {
ExportSymbols == RHS->getExportSymbols();
}
- unsigned getHashValue() const {
- return hash_combine(Scope, Name);
- }
+ unsigned getHashValue() const { return hash_combine(Scope, Name); }
};
template <> struct MDNodeKeyImpl<DICommonBlock> {
@@ -865,8 +866,8 @@ template <> struct MDNodeKeyImpl<DICommonBlock> {
Metadata *File;
unsigned LineNo;
- MDNodeKeyImpl(Metadata *Scope, Metadata *Decl, MDString *Name,
- Metadata *File, unsigned LineNo)
+ MDNodeKeyImpl(Metadata *Scope, Metadata *Decl, MDString *Name, Metadata *File,
+ unsigned LineNo)
: Scope(Scope), Decl(Decl), Name(Name), File(File), LineNo(LineNo) {}
MDNodeKeyImpl(const DICommonBlock *N)
: Scope(N->getRawScope()), Decl(N->getRawDecl()), Name(N->getRawName()),
@@ -874,8 +875,8 @@ template <> struct MDNodeKeyImpl<DICommonBlock> {
bool isKeyOf(const DICommonBlock *RHS) const {
return Scope == RHS->getRawScope() && Decl == RHS->getRawDecl() &&
- Name == RHS->getRawName() && File == RHS->getRawFile() &&
- LineNo == RHS->getLineNo();
+ Name == RHS->getRawName() && File == RHS->getRawFile() &&
+ LineNo == RHS->getLineNo();
}
unsigned getHashValue() const {
@@ -976,17 +977,19 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
Metadata *StaticDataMemberDeclaration;
Metadata *TemplateParams;
uint32_t AlignInBits;
+ Metadata *Annotations;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
Metadata *File, unsigned Line, Metadata *Type,
bool IsLocalToUnit, bool IsDefinition,
Metadata *StaticDataMemberDeclaration, Metadata *TemplateParams,
- uint32_t AlignInBits)
+ uint32_t AlignInBits, Metadata *Annotations)
: Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
IsDefinition(IsDefinition),
StaticDataMemberDeclaration(StaticDataMemberDeclaration),
- TemplateParams(TemplateParams), AlignInBits(AlignInBits) {}
+ TemplateParams(TemplateParams), AlignInBits(AlignInBits),
+ Annotations(Annotations) {}
MDNodeKeyImpl(const DIGlobalVariable *N)
: Scope(N->getRawScope()), Name(N->getRawName()),
LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
@@ -994,7 +997,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()),
TemplateParams(N->getRawTemplateParams()),
- AlignInBits(N->getAlignInBits()) {}
+ AlignInBits(N->getAlignInBits()), Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DIGlobalVariable *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
@@ -1005,7 +1008,8 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
StaticDataMemberDeclaration ==
RHS->getRawStaticDataMemberDeclaration() &&
TemplateParams == RHS->getRawTemplateParams() &&
- AlignInBits == RHS->getAlignInBits();
+ AlignInBits == RHS->getAlignInBits() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -1018,7 +1022,7 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
// TODO: make hashing work fine with such situations
return hash_combine(Scope, Name, LinkageName, File, Line, Type,
IsLocalToUnit, IsDefinition, /* AlignInBits, */
- StaticDataMemberDeclaration);
+ StaticDataMemberDeclaration, Annotations);
}
};
@@ -1031,22 +1035,25 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
unsigned Arg;
unsigned Flags;
uint32_t AlignInBits;
+ Metadata *Annotations;
MDNodeKeyImpl(Metadata *Scope, MDString *Name, Metadata *File, unsigned Line,
Metadata *Type, unsigned Arg, unsigned Flags,
- uint32_t AlignInBits)
+ uint32_t AlignInBits, Metadata *Annotations)
: Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), Arg(Arg),
- Flags(Flags), AlignInBits(AlignInBits) {}
+ Flags(Flags), AlignInBits(AlignInBits), Annotations(Annotations) {}
MDNodeKeyImpl(const DILocalVariable *N)
: Scope(N->getRawScope()), Name(N->getRawName()), File(N->getRawFile()),
Line(N->getLine()), Type(N->getRawType()), Arg(N->getArg()),
- Flags(N->getFlags()), AlignInBits(N->getAlignInBits()) {}
+ Flags(N->getFlags()), AlignInBits(N->getAlignInBits()),
+ Annotations(N->getRawAnnotations()) {}
bool isKeyOf(const DILocalVariable *RHS) const {
return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
File == RHS->getRawFile() && Line == RHS->getLine() &&
Type == RHS->getRawType() && Arg == RHS->getArg() &&
- Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits();
+ Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits() &&
+ Annotations == RHS->getRawAnnotations();
}
unsigned getHashValue() const {
@@ -1057,7 +1064,7 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
// clang/test/CodeGen/debug-info-257-args.c is an example of this problem,
// generated IR is random for each run and test fails with Align included.
// TODO: make hashing work fine with such situations
- return hash_combine(Scope, Name, File, Line, Type, Arg, Flags);
+ return hash_combine(Scope, Name, File, Line, Type, Arg, Flags, Annotations);
}
};
@@ -1079,9 +1086,7 @@ template <> struct MDNodeKeyImpl<DILabel> {
}
/// Using name and line to get hash value. It should already be mostly unique.
- unsigned getHashValue() const {
- return hash_combine(Scope, Name, Line);
- }
+ unsigned getHashValue() const { return hash_combine(Scope, Name, Line); }
};
template <> struct MDNodeKeyImpl<DIExpression> {
@@ -1155,23 +1160,26 @@ template <> struct MDNodeKeyImpl<DIImportedEntity> {
Metadata *File;
unsigned Line;
MDString *Name;
+ Metadata *Elements;
MDNodeKeyImpl(unsigned Tag, Metadata *Scope, Metadata *Entity, Metadata *File,
- unsigned Line, MDString *Name)
+ unsigned Line, MDString *Name, Metadata *Elements)
: Tag(Tag), Scope(Scope), Entity(Entity), File(File), Line(Line),
- Name(Name) {}
+ Name(Name), Elements(Elements) {}
MDNodeKeyImpl(const DIImportedEntity *N)
: Tag(N->getTag()), Scope(N->getRawScope()), Entity(N->getRawEntity()),
- File(N->getRawFile()), Line(N->getLine()), Name(N->getRawName()) {}
+ File(N->getRawFile()), Line(N->getLine()), Name(N->getRawName()),
+ Elements(N->getRawElements()) {}
bool isKeyOf(const DIImportedEntity *RHS) const {
return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
Entity == RHS->getRawEntity() && File == RHS->getFile() &&
- Line == RHS->getLine() && Name == RHS->getRawName();
+ Line == RHS->getLine() && Name == RHS->getRawName() &&
+ Elements == RHS->getRawElements();
}
unsigned getHashValue() const {
- return hash_combine(Tag, Scope, Entity, File, Line, Name);
+ return hash_combine(Tag, Scope, Entity, File, Line, Name, Elements);
}
};
@@ -1325,7 +1333,7 @@ class LLVMContextImpl {
public:
/// OwnedModules - The set of modules instantiated in this context, and which
/// will be automatically deleted if this context is deleted.
- SmallPtrSet<Module*, 4> OwnedModules;
+ SmallPtrSet<Module *, 4> OwnedModules;
/// The main remark streamer used by all the other streamers (e.g. IR, MIR,
/// frontends, etc.). This should only be used by the specific streamers, and
@@ -1377,7 +1385,7 @@ public:
DenseMap<Value *, ValueAsMetadata *> ValuesAsMetadata;
DenseMap<Metadata *, MetadataAsValue *> MetadataAsValues;
- DenseMap<const Value*, ValueName*> ValueNames;
+ DenseMap<const Value *, ValueName *> ValueNames;
#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) \
DenseSet<CLASS *, CLASS##Info> CLASS##s;
@@ -1412,7 +1420,7 @@ public:
StringMap<std::unique_ptr<ConstantDataSequential>> CDSConstants;
DenseMap<std::pair<const Function *, const BasicBlock *>, BlockAddress *>
- BlockAddresses;
+ BlockAddresses;
DenseMap<const GlobalValue *, DSOLocalEquivalent *> DSOLocalEquivalents;
@@ -1434,22 +1442,19 @@ public:
BumpPtrAllocator Alloc;
UniqueStringSaver Saver{Alloc};
- DenseMap<unsigned, IntegerType*> IntegerTypes;
+ DenseMap<unsigned, IntegerType *> IntegerTypes;
using FunctionTypeSet = DenseSet<FunctionType *, FunctionTypeKeyInfo>;
FunctionTypeSet FunctionTypes;
using StructTypeSet = DenseSet<StructType *, AnonStructTypeKeyInfo>;
StructTypeSet AnonStructTypes;
- StringMap<StructType*> NamedStructTypes;
+ StringMap<StructType *> NamedStructTypes;
unsigned NamedStructTypesUniqueID = 0;
- DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
- DenseMap<std::pair<Type *, ElementCount>, VectorType*> VectorTypes;
- // TODO: clean up the following after we no longer support non-opaque pointer
- // types.
- bool ForceOpaquePointers;
- DenseMap<Type*, PointerType*> PointerTypes; // Pointers in AddrSpace = 0
- DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
+ DenseMap<std::pair<Type *, uint64_t>, ArrayType *> ArrayTypes;
+ DenseMap<std::pair<Type *, ElementCount>, VectorType *> VectorTypes;
+ DenseMap<Type *, PointerType *> PointerTypes; // Pointers in AddrSpace = 0
+ DenseMap<std::pair<Type *, unsigned>, PointerType *> ASPointerTypes;
/// ValueHandles - This map keeps track of all of the value handles that are
/// watching a Value*. The Value::HasValueHandle bit is used to know
@@ -1503,7 +1508,7 @@ public:
/// This saves allocating an additional word in Function for programs which
/// do not use GC (i.e., most programs) at the cost of increased overhead for
/// clients which do use GC.
- DenseMap<const Function*, std::string> GCNames;
+ DenseMap<const Function *, std::string> GCNames;
/// Flag to indicate if Value (other than GlobalValue) retains their name or
/// not.
@@ -1526,7 +1531,15 @@ public:
///
/// The lifetime of the object must be guaranteed to extend as long as the
/// LLVMContext is used by compilation.
- void setOptPassGate(OptPassGate&);
+ void setOptPassGate(OptPassGate &);
+
+ // TODO: clean up the following after we no longer support non-opaque pointer
+ // types.
+ bool getOpaquePointers();
+ void setOpaquePointers(bool OP);
+
+private:
+ Optional<bool> OpaquePointers;
};
} // end namespace llvm
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 32840fdeddf7..7bccf09012ca 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -1351,7 +1351,7 @@ void FunctionPassManager::add(Pass *P) {
///
bool FunctionPassManager::run(Function &F) {
handleAllErrors(F.materialize(), [&](ErrorInfoBase &EIB) {
- report_fatal_error("Error reading bitcode file: " + EIB.message());
+ report_fatal_error(Twine("Error reading bitcode file: ") + EIB.message());
});
return FPM->run(F);
}
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index bbdde586e6e0..2399ea27ee9d 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -99,6 +99,11 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
const unsigned PtrSize = DL.getPointerSize();
for (const Argument &A : F->args()) {
+ // For the purposes of the byte count suffix, structs returned by pointer
+ // do not count as function arguments.
+ if (A.hasStructRetAttr())
+ continue;
+
// 'Dereference' type in case of byval or inalloca parameter attribute.
uint64_t AllocSize = A.hasPassPointeeByValueCopyAttr() ?
A.getPassPointeeByValueCopySize(DL) :
@@ -186,7 +191,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
// Check if the name needs quotes to be safe for the linker to interpret.
static bool canBeUnquotedInDirective(char C) {
- return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '@';
+ return isAlnum(C) || C == '_' || C == '@';
}
static bool canBeUnquotedInDirective(StringRef Name) {
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index 4f87ef537765..ebcc493407cc 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -345,7 +345,7 @@ ReplaceableMetadataImpl *ReplaceableMetadataImpl::getIfExists(Metadata &MD) {
bool ReplaceableMetadataImpl::isReplaceable(const Metadata &MD) {
if (auto *N = dyn_cast<MDNode>(&MD))
return !N->isResolved();
- return dyn_cast<ValueAsMetadata>(&MD);
+ return isa<ValueAsMetadata>(&MD);
}
static DISubprogram *getLocalFunctionMetadata(Value *V) {
@@ -1367,6 +1367,15 @@ void Instruction::addAnnotationMetadata(StringRef Name) {
setMetadata(LLVMContext::MD_annotation, MD);
}
+AAMDNodes Instruction::getAAMetadata() const {
+ AAMDNodes Result;
+ Result.TBAA = getMetadata(LLVMContext::MD_tbaa);
+ Result.TBAAStruct = getMetadata(LLVMContext::MD_tbaa_struct);
+ Result.Scope = getMetadata(LLVMContext::MD_alias_scope);
+ Result.NoAlias = getMetadata(LLVMContext::MD_noalias);
+ return Result;
+}
+
void Instruction::setAAMetadata(const AAMDNodes &N) {
setMetadata(LLVMContext::MD_tbaa, N.TBAA);
setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct);
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 7c18dc0ed299..63ea41fba89a 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -114,6 +114,10 @@ GlobalValue *Module::getNamedValue(StringRef Name) const {
return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
}
+unsigned Module::getNumNamedValues() const {
+ return getValueSymbolTable().size();
+}
+
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
/// This ID is uniqued across modules in the current LLVMContext.
unsigned Module::getMDKindID(StringRef Name) const {
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index f4ac6caf4f93..31c5cd938d03 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -251,12 +251,13 @@ void ModuleSummaryIndex::propagateAttributes(
bool IsDSOLocal = true;
for (auto &S : P.second.SummaryList) {
if (!isGlobalValueLive(S.get())) {
- // computeDeadSymbols should have marked all copies live. Note that
- // it is possible that there is a GUID collision between internal
- // symbols with the same name in different files of the same name but
- // not enough distinguishing path. Because computeDeadSymbols should
- // conservatively mark all copies live we can assert here that all are
- // dead if any copy is dead.
+ // computeDeadSymbolsAndUpdateIndirectCalls should have marked all
+ // copies live. Note that it is possible that there is a GUID collision
+ // between internal symbols with the same name in different files of the
+ // same name but not enough distinguishing path. Because
+ // computeDeadSymbolsAndUpdateIndirectCalls should conservatively mark
+ // all copies live we can assert here that all are dead if any copy is
+ // dead.
assert(llvm::none_of(
P.second.SummaryList,
[&](const std::unique_ptr<GlobalValueSummary> &Summary) {
@@ -446,9 +447,11 @@ static std::string linkageToString(GlobalValue::LinkageTypes LT) {
static std::string fflagsToString(FunctionSummary::FFlags F) {
auto FlagValue = [](unsigned V) { return V ? '1' : '0'; };
- char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
- FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias),
- FlagValue(F.NoInline), FlagValue(F.AlwaysInline), 0};
+ char FlagRep[] = {FlagValue(F.ReadNone), FlagValue(F.ReadOnly),
+ FlagValue(F.NoRecurse), FlagValue(F.ReturnDoesNotAlias),
+ FlagValue(F.NoInline), FlagValue(F.AlwaysInline),
+ FlagValue(F.NoUnwind), FlagValue(F.MayThrow),
+ FlagValue(F.HasUnknownCall), 0};
return FlagRep;
}
diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp
index 18a1c84933e0..cf309ffd6212 100644
--- a/llvm/lib/IR/Operator.cpp
+++ b/llvm/lib/IR/Operator.cpp
@@ -19,6 +19,31 @@
#include "ConstantsContext.h"
namespace llvm {
+bool Operator::hasPoisonGeneratingFlags() const {
+ switch (getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl: {
+ auto *OBO = cast<OverflowingBinaryOperator>(this);
+ return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ return cast<PossiblyExactOperator>(this)->isExact();
+ case Instruction::GetElementPtr: {
+ auto *GEP = cast<GEPOperator>(this);
+ // Note: inrange exists on constexpr only
+ return GEP->isInBounds() || GEP->getInRangeIndex() != None;
+ }
+ default:
+ return false;
+ }
+ // TODO: FastMathFlags! (On instructions, but not constexpr)
+}
+
Type *GEPOperator::getSourceElementType() const {
if (auto *I = dyn_cast<GetElementPtrInst>(this))
return I->getSourceElementType();
@@ -190,12 +215,14 @@ bool GEPOperator::collectOffset(
if (STy || ScalableType)
return false;
- // Insert an initial offset of 0 for V iff none exists already, then
- // increment the offset by IndexedSize.
- VariableOffsets.insert({V, APInt(BitWidth, 0)});
APInt IndexedSize =
APInt(BitWidth, DL.getTypeAllocSize(GTI.getIndexedType()));
- VariableOffsets[V] += IndexedSize;
+ // Insert an initial offset of 0 for V iff none exists already, then
+ // increment the offset by IndexedSize.
+ if (!IndexedSize.isZero()) {
+ VariableOffsets.insert({V, APInt(BitWidth, 0)});
+ VariableOffsets[V] += IndexedSize;
+ }
}
return true;
}
diff --git a/llvm/lib/IR/OptBisect.cpp b/llvm/lib/IR/OptBisect.cpp
index 2cf2298e0005..55c0dbad5aab 100644
--- a/llvm/lib/IR/OptBisect.cpp
+++ b/llvm/lib/IR/OptBisect.cpp
@@ -22,14 +22,12 @@
using namespace llvm;
static cl::opt<int> OptBisectLimit("opt-bisect-limit", cl::Hidden,
- cl::init(std::numeric_limits<int>::max()),
- cl::Optional,
+ cl::init(OptBisect::Disabled), cl::Optional,
+ cl::cb<void, int>([](int Limit) {
+ llvm::OptBisector->setLimit(Limit);
+ }),
cl::desc("Maximum optimization to perform"));
-OptBisect::OptBisect() : OptPassGate() {
- BisectEnabled = OptBisectLimit != std::numeric_limits<int>::max();
-}
-
static void printPassMessage(const StringRef &Name, int PassNum,
StringRef TargetDesc, bool Running) {
StringRef Status = Running ? "" : "NOT ";
@@ -38,19 +36,21 @@ static void printPassMessage(const StringRef &Name, int PassNum,
}
bool OptBisect::shouldRunPass(const Pass *P, StringRef IRDescription) {
- assert(BisectEnabled);
+ assert(isEnabled());
return checkPass(P->getPassName(), IRDescription);
}
bool OptBisect::checkPass(const StringRef PassName,
const StringRef TargetDesc) {
- assert(BisectEnabled);
+ assert(isEnabled());
int CurBisectNum = ++LastBisectNum;
- bool ShouldRun = (OptBisectLimit == -1 || CurBisectNum <= OptBisectLimit);
+ bool ShouldRun = (BisectLimit == -1 || CurBisectNum <= BisectLimit);
printPassMessage(PassName, CurBisectNum, TargetDesc, ShouldRun);
return ShouldRun;
}
+const int OptBisect::Disabled;
+
ManagedStatic<OptBisect> llvm::OptBisector;
diff --git a/llvm/lib/IR/PassManager.cpp b/llvm/lib/IR/PassManager.cpp
index 4cf7ab2a602b..d933003ccdf7 100644
--- a/llvm/lib/IR/PassManager.cpp
+++ b/llvm/lib/IR/PassManager.cpp
@@ -10,12 +10,13 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManagerImpl.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+namespace llvm {
// Explicit template instantiations and specialization defininitions for core
// template typedefs.
-namespace llvm {
template class AllAnalysesOn<Module>;
template class AllAnalysesOn<Function>;
template class PassManager<Module>;
@@ -91,6 +92,16 @@ bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
}
} // namespace llvm
+void ModuleToFunctionPassAdaptor::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << "function";
+ if (EagerlyInvalidate)
+ OS << "<eager-inv>";
+ OS << "(";
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+}
+
PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M,
ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
@@ -122,7 +133,7 @@ PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M,
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
- FAM.invalidate(F, PassPA);
+ FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA);
// Then intersect the preserved set so that invalidation of module
// analyses will eventually occur when the module pass completes.
diff --git a/llvm/lib/IR/ProfileSummary.cpp b/llvm/lib/IR/ProfileSummary.cpp
index 453a278a7f3f..05d5ac2c5ddf 100644
--- a/llvm/lib/IR/ProfileSummary.cpp
+++ b/llvm/lib/IR/ProfileSummary.cpp
@@ -249,7 +249,7 @@ ProfileSummary *ProfileSummary::getFromMD(Metadata *MD) {
PartialProfileRatio);
}
-void ProfileSummary::printSummary(raw_ostream &OS) {
+void ProfileSummary::printSummary(raw_ostream &OS) const {
OS << "Total functions: " << NumFunctions << "\n";
OS << "Maximum function count: " << MaxFunctionCount << "\n";
OS << "Maximum block count: " << MaxCount << "\n";
@@ -257,7 +257,7 @@ void ProfileSummary::printSummary(raw_ostream &OS) {
OS << "Total count: " << TotalCount << "\n";
}
-void ProfileSummary::printDetailedSummary(raw_ostream &OS) {
+void ProfileSummary::printDetailedSummary(raw_ostream &OS) const {
OS << "Detailed summary:\n";
for (const auto &Entry : DetailedSummary) {
OS << Entry.NumCounts << " blocks with count >= " << Entry.MinCount
diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index bd92c604da2c..101cada77ff9 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -98,12 +98,4 @@ void setProbeDistributionFactor(Instruction &Inst, float Factor) {
}
}
-void addPseudoProbeAttribute(PseudoProbeInst &Inst,
- PseudoProbeAttributes Attr) {
- IRBuilder<> Builder(&Inst);
- uint32_t OldAttr = Inst.getAttributes()->getZExtValue();
- uint32_t NewAttr = OldAttr | (uint32_t)Attr;
- if (OldAttr != NewAttr)
- Inst.replaceUsesOfWith(Inst.getAttributes(), Builder.getInt32(NewAttr));
-}
} // namespace llvm
diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index fd73a1a8e5af..cfd8deba5a53 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -15,15 +15,9 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/ValueMap.h"
namespace llvm {
-// Replace a constant expression by instructions with equivalent operations at
-// a specified location.
-Instruction *createReplacementInstr(ConstantExpr *CE, Instruction *Instr) {
- auto *CEInstr = CE->getAsInstruction();
- CEInstr->insertBefore(Instr);
- return CEInstr;
-}
void convertConstantExprsToInstructions(Instruction *I, ConstantExpr *CE,
SmallPtrSetImpl<Instruction *> *Insts) {
@@ -40,7 +34,8 @@ void convertConstantExprsToInstructions(
Instruction *I,
std::map<Use *, std::vector<std::vector<ConstantExpr *>>> &CEPaths,
SmallPtrSetImpl<Instruction *> *Insts) {
- SmallPtrSet<ConstantExpr *, 8> Visited;
+ ValueMap<ConstantExpr *, Instruction *> Visited;
+
for (Use &U : I->operands()) {
// The operand U is either not a constant expression operand or the
// constant expression paths do not belong to U, ignore U.
@@ -55,24 +50,47 @@ void convertConstantExprsToInstructions(
BI = &(*(BB->getFirstInsertionPt()));
}
- // Go through the paths associated with operand U, and convert all the
- // constant expressions along all paths to corresponding instructions.
+ // Go through all the paths associated with operand U, and convert all the
+ // constant expressions along all the paths to corresponding instructions.
auto *II = I;
auto &Paths = CEPaths[&U];
for (auto &Path : Paths) {
for (auto *CE : Path) {
- if (!Visited.insert(CE).second)
- continue;
- auto *NI = CE->getAsInstruction();
- NI->insertBefore(BI);
+ // Instruction which is equivalent to CE.
+ Instruction *NI = nullptr;
+
+ if (!Visited.count(CE)) {
+ // CE is encountered first time, convert it into a corresponding
+ // instruction NI, and appropriately insert NI before the parent
+ // instruction.
+ NI = CE->getAsInstruction(BI);
+
+ // Mark CE as visited by mapping CE to NI.
+ Visited[CE] = NI;
+
+ // If required collect NI.
+ if (Insts)
+ Insts->insert(NI);
+ } else {
+ // We had already encountered CE, the correponding instruction already
+ // exist, use it to replace CE.
+ NI = Visited[CE];
+ }
+
+ assert(NI && "Expected an instruction corresponding to constant "
+ "expression.");
+
+ // Replace all uses of constant expression CE by the corresponding
+ // instruction NI within the current parent instruction.
II->replaceUsesOfWith(CE, NI);
- CE->removeDeadConstantUsers();
BI = II = NI;
- if (Insts)
- Insts->insert(NI);
}
}
}
+
+ // Remove all converted constant expressions which are dead by now.
+ for (auto Item : Visited)
+ Item.first->removeDeadConstantUsers();
}
void collectConstantExprPaths(
diff --git a/llvm/lib/IR/Statepoint.cpp b/llvm/lib/IR/Statepoint.cpp
index bbfbbe489bae..b5916e4937c6 100644
--- a/llvm/lib/IR/Statepoint.cpp
+++ b/llvm/lib/IR/Statepoint.cpp
@@ -26,16 +26,14 @@ StatepointDirectives
llvm::parseStatepointDirectivesFromAttrs(AttributeList AS) {
StatepointDirectives Result;
- Attribute AttrID =
- AS.getAttribute(AttributeList::FunctionIndex, "statepoint-id");
+ Attribute AttrID = AS.getFnAttr("statepoint-id");
uint64_t StatepointID;
if (AttrID.isStringAttribute())
if (!AttrID.getValueAsString().getAsInteger(10, StatepointID))
Result.StatepointID = StatepointID;
uint32_t NumPatchBytes;
- Attribute AttrNumPatchBytes = AS.getAttribute(AttributeList::FunctionIndex,
- "statepoint-num-patch-bytes");
+ Attribute AttrNumPatchBytes = AS.getFnAttr("statepoint-num-patch-bytes");
if (AttrNumPatchBytes.isStringAttribute())
if (!AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes))
Result.NumPatchBytes = NumPatchBytes;
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index a21998976066..d59d87ad631b 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -66,6 +66,44 @@ bool Type::isOpaquePointerTy() const {
return false;
}
+const fltSemantics &Type::getFltSemantics() const {
+ switch (getTypeID()) {
+ case HalfTyID: return APFloat::IEEEhalf();
+ case BFloatTyID: return APFloat::BFloat();
+ case FloatTyID: return APFloat::IEEEsingle();
+ case DoubleTyID: return APFloat::IEEEdouble();
+ case X86_FP80TyID: return APFloat::x87DoubleExtended();
+ case FP128TyID: return APFloat::IEEEquad();
+ case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
+ default: llvm_unreachable("Invalid floating type");
+ }
+}
+
+bool Type::isIEEE() const {
+ return APFloat::getZero(getFltSemantics()).isIEEE();
+}
+
+Type *Type::getFloatingPointTy(LLVMContext &C, const fltSemantics &S) {
+ Type *Ty;
+ if (&S == &APFloat::IEEEhalf())
+ Ty = Type::getHalfTy(C);
+ else if (&S == &APFloat::BFloat())
+ Ty = Type::getBFloatTy(C);
+ else if (&S == &APFloat::IEEEsingle())
+ Ty = Type::getFloatTy(C);
+ else if (&S == &APFloat::IEEEdouble())
+ Ty = Type::getDoubleTy(C);
+ else if (&S == &APFloat::x87DoubleExtended())
+ Ty = Type::getX86_FP80Ty(C);
+ else if (&S == &APFloat::IEEEquad())
+ Ty = Type::getFP128Ty(C);
+ else {
+ assert(&S == &APFloat::PPCDoubleDouble() && "Unknown FP format");
+ Ty = Type::getPPC_FP128Ty(C);
+ }
+ return Ty;
+}
+
bool Type::canLosslesslyBitCastTo(Type *Ty) const {
// Identity cast means no change so return true
if (this == Ty)
@@ -296,9 +334,7 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
return Entry;
}
-APInt IntegerType::getMask() const {
- return APInt::getAllOnesValue(getBitWidth());
-}
+APInt IntegerType::getMask() const { return APInt::getAllOnes(getBitWidth()); }
//===----------------------------------------------------------------------===//
// FunctionType Implementation
@@ -696,8 +732,8 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
- // Create opaque pointer for pointer to opaque pointer.
- if (CImpl->ForceOpaquePointers || EltTy->isOpaquePointerTy())
+ // Automatically convert typed pointers to opaque pointers.
+ if (CImpl->getOpaquePointers())
return get(EltTy->getContext(), AddressSpace);
// Since AddressSpace #0 is the common case, we special case it.
@@ -711,6 +747,8 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
PointerType *PointerType::get(LLVMContext &C, unsigned AddressSpace) {
LLVMContextImpl *CImpl = C.pImpl;
+ assert(CImpl->getOpaquePointers() &&
+ "Can only create opaque pointers in opaque pointer mode");
// Since AddressSpace #0 is the common case, we special case it.
PointerType *&Entry =
diff --git a/llvm/lib/IR/TypeFinder.cpp b/llvm/lib/IR/TypeFinder.cpp
index 724b8f6b6ad2..1f757d7dbf4e 100644
--- a/llvm/lib/IR/TypeFinder.cpp
+++ b/llvm/lib/IR/TypeFinder.cpp
@@ -106,11 +106,9 @@ void TypeFinder::incorporateType(Type *Ty) {
StructTypes.push_back(STy);
// Add all unvisited subtypes to worklist for processing
- for (Type::subtype_reverse_iterator I = Ty->subtype_rbegin(),
- E = Ty->subtype_rend();
- I != E; ++I)
- if (VisitedTypes.insert(*I).second)
- TypeWorklist.push_back(*I);
+ for (Type *SubTy : llvm::reverse(Ty->subtypes()))
+ if (VisitedTypes.insert(SubTy).second)
+ TypeWorklist.push_back(SubTy);
} while (!TypeWorklist.empty());
}
diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp
index 8837151f2e18..68489075cd88 100644
--- a/llvm/lib/IR/User.cpp
+++ b/llvm/lib/IR/User.cpp
@@ -107,7 +107,7 @@ MutableArrayRef<uint8_t> User::getDescriptor() {
}
bool User::isDroppable() const {
- return isa<AssumeInst>(this);
+ return isa<AssumeInst>(this) || isa<PseudoProbeInst>(this);
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 1c595651b3d7..b475c8327874 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -176,6 +176,18 @@ Use *Value::getSingleUndroppableUse() {
return Result;
}
+User *Value::getUniqueUndroppableUser() {
+ User *Result = nullptr;
+ for (auto *U : users()) {
+ if (!U->isDroppable()) {
+ if (Result && Result != U)
+ return nullptr;
+ Result = U;
+ }
+ }
+ return Result;
+}
+
bool Value::hasNUndroppableUses(unsigned int N) const {
return hasNItems(user_begin(), user_end(), N, isUnDroppableUser);
}
@@ -534,9 +546,7 @@ void Value::replaceUsesWithIf(Value *New,
SmallVector<TrackingVH<Constant>, 8> Consts;
SmallPtrSet<Constant *, 8> Visited;
- for (use_iterator UI = use_begin(), E = use_end(); UI != E;) {
- Use &U = *UI;
- ++UI;
+ for (Use &U : llvm::make_early_inc_range(uses())) {
if (!ShouldReplace(U))
continue;
// Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -694,6 +704,7 @@ const Value *Value::stripPointerCastsForAliasAnalysis() const {
const Value *Value::stripAndAccumulateConstantOffsets(
const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
+ bool AllowInvariantGroup,
function_ref<bool(Value &, APInt &)> ExternalAnalysis) const {
if (!getType()->isPtrOrPtrVectorTy())
return this;
@@ -753,6 +764,8 @@ const Value *Value::stripAndAccumulateConstantOffsets(
} else if (const auto *Call = dyn_cast<CallBase>(V)) {
if (const Value *RV = Call->getReturnedArgOperand())
V = RV;
+ if (AllowInvariantGroup && Call->isLaunderOrStripInvariantGroup())
+ V = Call->getArgOperand(0);
}
assert(V->getType()->isPtrOrPtrVectorTy() && "Unexpected operand type!");
} while (Visited.insert(V).second);
@@ -852,10 +865,9 @@ uint64_t Value::getPointerDereferenceableBytes(const DataLayout &DL,
CanBeNull = true;
}
} else if (const auto *Call = dyn_cast<CallBase>(this)) {
- DerefBytes = Call->getDereferenceableBytes(AttributeList::ReturnIndex);
+ DerefBytes = Call->getRetDereferenceableBytes();
if (DerefBytes == 0) {
- DerefBytes =
- Call->getDereferenceableOrNullBytes(AttributeList::ReturnIndex);
+ DerefBytes = Call->getRetDereferenceableOrNullBytes();
CanBeNull = true;
}
} else if (const LoadInst *LI = dyn_cast<LoadInst>(this)) {
@@ -1014,8 +1026,7 @@ bool Value::isTransitiveUsedByMetadataOnly() const {
llvm::SmallPtrSet<const User *, 32> Visited;
WorkList.insert(WorkList.begin(), user_begin(), user_end());
while (!WorkList.empty()) {
- const User *U = WorkList.back();
- WorkList.pop_back();
+ const User *U = WorkList.pop_back_val();
Visited.insert(U);
// If it is transitively used by a global value or a non-constant value,
// it's obviously not only used by metadata.
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 758205a39eb3..dc4370d4b6ed 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -415,15 +415,18 @@ public:
for (const GlobalAlias &GA : M.aliases())
visitGlobalAlias(GA);
+ for (const GlobalIFunc &GI : M.ifuncs())
+ visitGlobalIFunc(GI);
+
for (const NamedMDNode &NMD : M.named_metadata())
visitNamedMDNode(NMD);
for (const StringMapEntry<Comdat> &SMEC : M.getComdatSymbolTable())
visitComdat(SMEC.getValue());
- visitModuleFlags(M);
- visitModuleIdents(M);
- visitModuleCommandLines(M);
+ visitModuleFlags();
+ visitModuleIdents();
+ visitModuleCommandLines();
verifyCompileUnits();
@@ -440,6 +443,7 @@ private:
void visitGlobalValue(const GlobalValue &GV);
void visitGlobalVariable(const GlobalVariable &GV);
void visitGlobalAlias(const GlobalAlias &GA);
+ void visitGlobalIFunc(const GlobalIFunc &GI);
void visitAliaseeSubExpr(const GlobalAlias &A, const Constant &C);
void visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias *> &Visited,
const GlobalAlias &A, const Constant &C);
@@ -448,9 +452,9 @@ private:
void visitMetadataAsValue(const MetadataAsValue &MD, Function *F);
void visitValueAsMetadata(const ValueAsMetadata &MD, Function *F);
void visitComdat(const Comdat &C);
- void visitModuleIdents(const Module &M);
- void visitModuleCommandLines(const Module &M);
- void visitModuleFlags(const Module &M);
+ void visitModuleIdents();
+ void visitModuleCommandLines();
+ void visitModuleFlags();
void visitModuleFlag(const MDNode *Op,
DenseMap<const MDString *, const MDNode *> &SeenIDs,
SmallVectorImpl<const MDNode *> &Requirements);
@@ -461,6 +465,8 @@ private:
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
void visitProfMetadata(Instruction &I, MDNode *MD);
void visitAnnotationMetadata(MDNode *Annotation);
+ void visitAliasScopeMetadata(const MDNode *MD);
+ void visitAliasScopeListMetadata(const MDNode *MD);
template <class Ty> bool isValidMetadataArray(const MDTuple &N);
#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -547,6 +553,8 @@ private:
void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
const Value *V, bool IsIntrinsic);
void verifyFunctionMetadata(ArrayRef<std::pair<unsigned, MDNode *>> MDs);
+ template <typename T>
+ void verifyODRTypeAsScopeOperand(const MDNode &MD, T * = nullptr);
void visitConstantExprsRecursively(const Constant *EntryC);
void visitConstantExpr(const ConstantExpr *CE);
@@ -569,6 +577,9 @@ private:
/// declarations share the same calling convention.
void verifyDeoptimizeCallingConvs();
+ void verifyAttachedCallBundle(const CallBase &Call,
+ const OperandBundleUse &BU);
+
/// Verify all-or-nothing property of DIFile source attribute within a CU.
void verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F);
@@ -816,6 +827,21 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
visitGlobalValue(GA);
}
+void Verifier::visitGlobalIFunc(const GlobalIFunc &GI) {
+ // Pierce through ConstantExprs and GlobalAliases and check that the resolver
+ // has a Function
+ const Function *Resolver = GI.getResolverFunction();
+ Assert(Resolver, "IFunc must have a Function resolver", &GI);
+
+ // Check that the immediate resolver operand (prior to any bitcasts) has the
+ // correct type
+ const Type *ResolverTy = GI.getResolver()->getType();
+ const Type *ResolverFuncTy =
+ GlobalIFunc::getResolverFunctionType(GI.getValueType());
+ Assert(ResolverTy == ResolverFuncTy->getPointerTo(),
+ "IFunc resolver has incorrect type", &GI);
+}
+
void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
// There used to be various other llvm.dbg.* nodes, but we don't support
// upgrading them and we want to reserve the namespace for future uses.
@@ -834,6 +860,19 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
}
}
+template <typename T>
+void Verifier::verifyODRTypeAsScopeOperand(const MDNode &MD, T *) {
+ if (isa<T>(MD)) {
+ if (auto *N = dyn_cast_or_null<DICompositeType>(cast<T>(MD).getScope()))
+ // Of all the supported tags for DICompositeType(see visitDICompositeType)
+ // we know that enum type cannot be a scope.
+ AssertDI(N->getTag() != dwarf::DW_TAG_enumeration_type,
+ "enum type is not a scope; check enum type ODR "
+ "violation",
+ N, &MD);
+ }
+}
+
void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
// Only visit each node once. Metadata can be mutually recursive, so this
// avoids infinite recursion here, as well as being an optimization.
@@ -843,6 +882,12 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) {
Assert(&MD.getContext() == &Context,
"MDNode context does not match Module context!", &MD);
+ // Makes sure when a scope operand is a ODR type, the ODR type uniquing does
+ // not create invalid debug metadata.
+ // TODO: check that the non-ODR-type scope operand is valid.
+ verifyODRTypeAsScopeOperand<DIType>(MD);
+ verifyODRTypeAsScopeOperand<DILocalScope>(MD);
+
switch (MD.getMetadataID()) {
default:
llvm_unreachable("Invalid MDNode subclass");
@@ -1091,7 +1136,8 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
N.getTag() == dwarf::DW_TAG_union_type ||
N.getTag() == dwarf::DW_TAG_enumeration_type ||
N.getTag() == dwarf::DW_TAG_class_type ||
- N.getTag() == dwarf::DW_TAG_variant_part,
+ N.getTag() == dwarf::DW_TAG_variant_part ||
+ N.getTag() == dwarf::DW_TAG_namelist,
"invalid tag", &N);
AssertDI(isScope(N.getRawScope()), "invalid scope", &N, N.getRawScope());
@@ -1470,7 +1516,7 @@ void Verifier::visitComdat(const Comdat &C) {
"comdat global value has private linkage", GV);
}
-void Verifier::visitModuleIdents(const Module &M) {
+void Verifier::visitModuleIdents() {
const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident");
if (!Idents)
return;
@@ -1487,7 +1533,7 @@ void Verifier::visitModuleIdents(const Module &M) {
}
}
-void Verifier::visitModuleCommandLines(const Module &M) {
+void Verifier::visitModuleCommandLines() {
const NamedMDNode *CommandLines = M.getNamedMetadata("llvm.commandline");
if (!CommandLines)
return;
@@ -1505,7 +1551,7 @@ void Verifier::visitModuleCommandLines(const Module &M) {
}
}
-void Verifier::visitModuleFlags(const Module &M) {
+void Verifier::visitModuleFlags() {
const NamedMDNode *Flags = M.getModuleFlagsMetadata();
if (!Flags) return;
@@ -1824,9 +1870,8 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
void Verifier::checkUnsignedBaseTenFuncAttr(AttributeList Attrs, StringRef Attr,
const Value *V) {
- if (Attrs.hasFnAttribute(Attr)) {
- StringRef S = Attrs.getAttribute(AttributeList::FunctionIndex, Attr)
- .getValueAsString();
+ if (Attrs.hasFnAttr(Attr)) {
+ StringRef S = Attrs.getFnAttr(Attr).getValueAsString();
unsigned N;
if (S.getAsInteger(10, N))
CheckFailed("\"" + Attr + "\" takes an unsigned integer: " + S, V);
@@ -1861,7 +1906,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
bool SawSwiftError = false;
// Verify return value attributes.
- AttributeSet RetAttrs = Attrs.getRetAttributes();
+ AttributeSet RetAttrs = Attrs.getRetAttrs();
for (Attribute RetAttr : RetAttrs)
Assert(RetAttr.isStringAttribute() ||
Attribute::canUseAsRetAttr(RetAttr.getKindAsEnum()),
@@ -1874,7 +1919,7 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
// Verify parameter attributes.
for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
Type *Ty = FT->getParamType(i);
- AttributeSet ArgAttrs = Attrs.getParamAttributes(i);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(i);
if (!IsIntrinsic) {
Assert(!ArgAttrs.hasAttribute(Attribute::ImmArg),
@@ -1928,63 +1973,63 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
}
}
- if (!Attrs.hasAttributes(AttributeList::FunctionIndex))
+ if (!Attrs.hasFnAttrs())
return;
- verifyAttributeTypes(Attrs.getFnAttributes(), V);
- for (Attribute FnAttr : Attrs.getFnAttributes())
+ verifyAttributeTypes(Attrs.getFnAttrs(), V);
+ for (Attribute FnAttr : Attrs.getFnAttrs())
Assert(FnAttr.isStringAttribute() ||
Attribute::canUseAsFnAttr(FnAttr.getKindAsEnum()),
"Attribute '" + FnAttr.getAsString() +
"' does not apply to functions!",
V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::ReadOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::ReadOnly)),
"Attributes 'readnone and readonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::WriteOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::WriteOnly)),
"Attributes 'readnone and writeonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadOnly) &&
- Attrs.hasFnAttribute(Attribute::WriteOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadOnly) &&
+ Attrs.hasFnAttr(Attribute::WriteOnly)),
"Attributes 'readonly and writeonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::InaccessibleMemOrArgMemOnly)),
"Attributes 'readnone and inaccessiblemem_or_argmemonly' are "
"incompatible!",
V);
- Assert(!(Attrs.hasFnAttribute(Attribute::ReadNone) &&
- Attrs.hasFnAttribute(Attribute::InaccessibleMemOnly)),
+ Assert(!(Attrs.hasFnAttr(Attribute::ReadNone) &&
+ Attrs.hasFnAttr(Attribute::InaccessibleMemOnly)),
"Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
- Assert(!(Attrs.hasFnAttribute(Attribute::NoInline) &&
- Attrs.hasFnAttribute(Attribute::AlwaysInline)),
+ Assert(!(Attrs.hasFnAttr(Attribute::NoInline) &&
+ Attrs.hasFnAttr(Attribute::AlwaysInline)),
"Attributes 'noinline and alwaysinline' are incompatible!", V);
- if (Attrs.hasFnAttribute(Attribute::OptimizeNone)) {
- Assert(Attrs.hasFnAttribute(Attribute::NoInline),
+ if (Attrs.hasFnAttr(Attribute::OptimizeNone)) {
+ Assert(Attrs.hasFnAttr(Attribute::NoInline),
"Attribute 'optnone' requires 'noinline'!", V);
- Assert(!Attrs.hasFnAttribute(Attribute::OptimizeForSize),
+ Assert(!Attrs.hasFnAttr(Attribute::OptimizeForSize),
"Attributes 'optsize and optnone' are incompatible!", V);
- Assert(!Attrs.hasFnAttribute(Attribute::MinSize),
+ Assert(!Attrs.hasFnAttr(Attribute::MinSize),
"Attributes 'minsize and optnone' are incompatible!", V);
}
- if (Attrs.hasFnAttribute(Attribute::JumpTable)) {
+ if (Attrs.hasFnAttr(Attribute::JumpTable)) {
const GlobalValue *GV = cast<GlobalValue>(V);
Assert(GV->hasGlobalUnnamedAddr(),
"Attribute 'jumptable' requires 'unnamed_addr'", V);
}
- if (Attrs.hasFnAttribute(Attribute::AllocSize)) {
+ if (Attrs.hasFnAttr(Attribute::AllocSize)) {
std::pair<unsigned, Optional<unsigned>> Args =
- Attrs.getAllocSizeArgs(AttributeList::FunctionIndex);
+ Attrs.getFnAttrs().getAllocSizeArgs();
auto CheckParam = [&](StringRef Name, unsigned ParamNo) {
if (ParamNo >= FT->getNumParams()) {
@@ -2009,17 +2054,16 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
return;
}
- if (Attrs.hasFnAttribute(Attribute::VScaleRange)) {
+ if (Attrs.hasFnAttr(Attribute::VScaleRange)) {
std::pair<unsigned, unsigned> Args =
- Attrs.getVScaleRangeArgs(AttributeList::FunctionIndex);
+ Attrs.getFnAttrs().getVScaleRangeArgs();
if (Args.first > Args.second && Args.second != 0)
CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
}
- if (Attrs.hasFnAttribute("frame-pointer")) {
- StringRef FP = Attrs.getAttribute(AttributeList::FunctionIndex,
- "frame-pointer").getValueAsString();
+ if (Attrs.hasFnAttr("frame-pointer")) {
+ StringRef FP = Attrs.getFnAttr("frame-pointer").getValueAsString();
if (FP != "all" && FP != "non-leaf" && FP != "none")
CheckFailed("invalid value for 'frame-pointer' attribute: " + FP, V);
}
@@ -2168,7 +2212,7 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
Call);
if (TargetFuncType->isVarArg()) {
- AttributeSet ArgAttrs = Attrs.getParamAttributes(5 + i);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(5 + i);
Assert(!ArgAttrs.hasAttribute(Attribute::StructRet),
"Attribute 'sret' cannot be used for vararg call arguments!",
Call);
@@ -2334,7 +2378,7 @@ void Verifier::visitFunction(const Function &F) {
// On function declarations/definitions, we do not support the builtin
// attribute. We do not check this in VerifyFunctionAttrs since that is
// checking for Attributes that can/can not ever be on functions.
- Assert(!Attrs.hasFnAttribute(Attribute::Builtin),
+ Assert(!Attrs.hasFnAttr(Attribute::Builtin),
"Attribute 'builtin' can only be applied to a callsite.", &F);
Assert(!Attrs.hasAttrSomewhere(Attribute::ElementType),
@@ -2348,7 +2392,7 @@ void Verifier::visitFunction(const Function &F) {
case CallingConv::C:
break;
case CallingConv::X86_INTR: {
- Assert(F.arg_empty() || Attrs.hasParamAttribute(0, Attribute::ByVal),
+ Assert(F.arg_empty() || Attrs.hasParamAttr(0, Attribute::ByVal),
"Calling convention parameter requires byval", &F);
break;
}
@@ -2368,14 +2412,14 @@ void Verifier::visitFunction(const Function &F) {
const unsigned StackAS = DL.getAllocaAddrSpace();
unsigned i = 0;
for (const Argument &Arg : F.args()) {
- Assert(!Attrs.hasParamAttribute(i, Attribute::ByVal),
+ Assert(!Attrs.hasParamAttr(i, Attribute::ByVal),
"Calling convention disallows byval", &F);
- Assert(!Attrs.hasParamAttribute(i, Attribute::Preallocated),
+ Assert(!Attrs.hasParamAttr(i, Attribute::Preallocated),
"Calling convention disallows preallocated", &F);
- Assert(!Attrs.hasParamAttribute(i, Attribute::InAlloca),
+ Assert(!Attrs.hasParamAttr(i, Attribute::InAlloca),
"Calling convention disallows inalloca", &F);
- if (Attrs.hasParamAttribute(i, Attribute::ByRef)) {
+ if (Attrs.hasParamAttr(i, Attribute::ByRef)) {
// FIXME: Should also disallow LDS and GDS, but we don't have the enum
// value here.
Assert(Arg.getType()->getPointerAddressSpace() != StackAS,
@@ -2416,7 +2460,7 @@ void Verifier::visitFunction(const Function &F) {
}
// Check that swifterror argument is only used by loads and stores.
- if (Attrs.hasParamAttribute(i, Attribute::SwiftError)) {
+ if (Attrs.hasParamAttr(i, Attribute::SwiftError)) {
verifySwiftErrorValue(&Arg);
}
++i;
@@ -2523,7 +2567,8 @@ void Verifier::visitFunction(const Function &F) {
// uses.
if (F.isIntrinsic() && F.getParent()->isMaterialized()) {
const User *U;
- if (F.hasAddressTaken(&U))
+ if (F.hasAddressTaken(&U, false, true, false,
+ /*IgnoreARCAttachedCall=*/true))
Assert(false, "Invalid user of intrinsic instruction!", U);
}
@@ -2693,6 +2738,7 @@ void Verifier::visitReturnInst(ReturnInst &RI) {
}
void Verifier::visitSwitchInst(SwitchInst &SI) {
+ Assert(SI.getType()->isVoidTy(), "Switch must have void result type!", &SI);
// Check to make sure that all of the constants in the switch instruction
// have the same type as the switched-on value.
Type *SwitchTy = SI.getCondition()->getType();
@@ -2726,7 +2772,7 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) {
Assert(CBI.getSuccessor(i)->getType()->isLabelTy(),
"Callbr successors must all have pointer type!", &CBI);
for (unsigned i = 0, e = CBI.getNumOperands(); i != e; ++i) {
- Assert(i >= CBI.getNumArgOperands() || !isa<BasicBlock>(CBI.getOperand(i)),
+ Assert(i >= CBI.arg_size() || !isa<BasicBlock>(CBI.getOperand(i)),
"Using an unescaped label as a callbr argument!", &CBI);
if (isa<BasicBlock>(CBI.getOperand(i)))
for (unsigned j = i + 1; j != e; ++j)
@@ -3071,14 +3117,14 @@ void Verifier::visitCallBase(CallBase &Call) {
Assert(Callee->getValueType() == FTy,
"Intrinsic called with incompatible signature", Call);
- if (Attrs.hasFnAttribute(Attribute::Speculatable)) {
+ if (Attrs.hasFnAttr(Attribute::Speculatable)) {
// Don't allow speculatable on call sites, unless the underlying function
// declaration is also speculatable.
Assert(Callee && Callee->isSpeculatable(),
"speculatable attribute may not apply to call sites", Call);
}
- if (Attrs.hasFnAttribute(Attribute::Preallocated)) {
+ if (Attrs.hasFnAttr(Attribute::Preallocated)) {
Assert(Call.getCalledFunction()->getIntrinsicID() ==
Intrinsic::call_preallocated_arg,
"preallocated as a call site attribute can only be on "
@@ -3118,7 +3164,7 @@ void Verifier::visitCallBase(CallBase &Call) {
Call);
}
- if (Attrs.hasParamAttribute(i, Attribute::ImmArg)) {
+ if (Attrs.hasParamAttr(i, Attribute::ImmArg)) {
// Don't allow immarg on call sites, unless the underlying declaration
// also has the matching immarg.
Assert(Callee && Callee->hasParamAttribute(i, Attribute::ImmArg),
@@ -3150,16 +3196,16 @@ void Verifier::visitCallBase(CallBase &Call) {
bool SawReturned = false;
for (unsigned Idx = 0; Idx < FTy->getNumParams(); ++Idx) {
- if (Attrs.hasParamAttribute(Idx, Attribute::Nest))
+ if (Attrs.hasParamAttr(Idx, Attribute::Nest))
SawNest = true;
- if (Attrs.hasParamAttribute(Idx, Attribute::Returned))
+ if (Attrs.hasParamAttr(Idx, Attribute::Returned))
SawReturned = true;
}
// Check attributes on the varargs part.
for (unsigned Idx = FTy->getNumParams(); Idx < Call.arg_size(); ++Idx) {
Type *Ty = Call.getArgOperand(Idx)->getType();
- AttributeSet ArgAttrs = Attrs.getParamAttributes(Idx);
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(Idx);
verifyParameterAttrs(ArgAttrs, Ty, &Call);
if (ArgAttrs.hasAttribute(Attribute::Nest)) {
@@ -3265,17 +3311,10 @@ void Verifier::visitCallBase(CallBase &Call) {
Assert(!FoundAttachedCallBundle,
"Multiple \"clang.arc.attachedcall\" operand bundles", Call);
FoundAttachedCallBundle = true;
+ verifyAttachedCallBundle(Call, BU);
}
}
- if (FoundAttachedCallBundle)
- Assert((FTy->getReturnType()->isPointerTy() ||
- (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())),
- "a call with operand bundle \"clang.arc.attachedcall\" must call a "
- "function returning a pointer or a non-returning function that has "
- "a void return type",
- Call);
-
// Verify that each inlinable callsite of a debug-info-bearing function in a
// debug-info-bearing function has a debug location attached to it. Failure to
// do so causes assertion failures when the inliner sets up inline scope info.
@@ -3315,7 +3354,7 @@ static bool isTypeCongruent(Type *L, Type *R) {
return PL->getAddressSpace() == PR->getAddressSpace();
}
-static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
+static AttrBuilder getParameterABIAttributes(unsigned I, AttributeList Attrs) {
static const Attribute::AttrKind ABIAttrs[] = {
Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
Attribute::InReg, Attribute::StackAlignment, Attribute::SwiftSelf,
@@ -3323,15 +3362,15 @@ static AttrBuilder getParameterABIAttributes(int I, AttributeList Attrs) {
Attribute::ByRef};
AttrBuilder Copy;
for (auto AK : ABIAttrs) {
- Attribute Attr = Attrs.getParamAttributes(I).getAttribute(AK);
+ Attribute Attr = Attrs.getParamAttrs(I).getAttribute(AK);
if (Attr.isValid())
Copy.addAttribute(Attr);
}
// `align` is ABI-affecting only in combination with `byval` or `byref`.
- if (Attrs.hasParamAttribute(I, Attribute::Alignment) &&
- (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
- Attrs.hasParamAttribute(I, Attribute::ByRef)))
+ if (Attrs.hasParamAttr(I, Attribute::Alignment) &&
+ (Attrs.hasParamAttr(I, Attribute::ByVal) ||
+ Attrs.hasParamAttr(I, Attribute::ByRef)))
Copy.addAlignmentAttr(Attrs.getParamAlignment(I));
return Copy;
}
@@ -3383,12 +3422,12 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// - Only sret, byval, swiftself, and swiftasync ABI-impacting attributes
// are allowed in swifttailcc call
- for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
AttrBuilder ABIAttrs = getParameterABIAttributes(I, CallerAttrs);
SmallString<32> Context{CCName, StringRef(" musttail caller")};
verifyTailCCMustTailAttrs(ABIAttrs, Context);
}
- for (int I = 0, E = CalleeTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CalleeTy->getNumParams(); I != E; ++I) {
AttrBuilder ABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
SmallString<32> Context{CCName, StringRef(" musttail callee")};
verifyTailCCMustTailAttrs(ABIAttrs, Context);
@@ -3406,7 +3445,7 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
Assert(CallerTy->getNumParams() == CalleeTy->getNumParams(),
"cannot guarantee tail call due to mismatched parameter counts",
&CI);
- for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
Assert(
isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
"cannot guarantee tail call due to mismatched parameter types", &CI);
@@ -3415,7 +3454,7 @@ void Verifier::verifyMustTailCall(CallInst &CI) {
// - All ABI-impacting function attributes, such as sret, byval, inreg,
// returned, preallocated, and inalloca, must match.
- for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ for (unsigned I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
Assert(CallerABIAttrs == CalleeABIAttrs,
@@ -4347,6 +4386,38 @@ void Verifier::visitAnnotationMetadata(MDNode *Annotation) {
Assert(isa<MDString>(Op.get()), "operands must be strings");
}
+void Verifier::visitAliasScopeMetadata(const MDNode *MD) {
+ unsigned NumOps = MD->getNumOperands();
+ Assert(NumOps >= 2 && NumOps <= 3, "scope must have two or three operands",
+ MD);
+ Assert(MD->getOperand(0).get() == MD || isa<MDString>(MD->getOperand(0)),
+ "first scope operand must be self-referential or string", MD);
+ if (NumOps == 3)
+ Assert(isa<MDString>(MD->getOperand(2)),
+ "third scope operand must be string (if used)", MD);
+
+ MDNode *Domain = dyn_cast<MDNode>(MD->getOperand(1));
+ Assert(Domain != nullptr, "second scope operand must be MDNode", MD);
+
+ unsigned NumDomainOps = Domain->getNumOperands();
+ Assert(NumDomainOps >= 1 && NumDomainOps <= 2,
+ "domain must have one or two operands", Domain);
+ Assert(Domain->getOperand(0).get() == Domain ||
+ isa<MDString>(Domain->getOperand(0)),
+ "first domain operand must be self-referential or string", Domain);
+ if (NumDomainOps == 2)
+ Assert(isa<MDString>(Domain->getOperand(1)),
+ "second domain operand must be string (if used)", Domain);
+}
+
+void Verifier::visitAliasScopeListMetadata(const MDNode *MD) {
+ for (const MDOperand &Op : MD->operands()) {
+ const MDNode *OpMD = dyn_cast<MDNode>(Op);
+ Assert(OpMD != nullptr, "scope list must consist of MDNodes", MD);
+ visitAliasScopeMetadata(OpMD);
+ }
+}
+
/// verifyInstruction - Verify that an instruction is well formed.
///
void Verifier::visitInstruction(Instruction &I) {
@@ -4403,10 +4474,21 @@ void Verifier::visitInstruction(Instruction &I) {
}
if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+ // This code checks whether the function is used as the operand of a
+ // clang_arc_attachedcall operand bundle.
+ auto IsAttachedCallOperand = [](Function *F, const CallBase *CBI,
+ int Idx) {
+ return CBI && CBI->isOperandBundleOfType(
+ LLVMContext::OB_clang_arc_attachedcall, Idx);
+ };
+
// Check to make sure that the "address of" an intrinsic function is never
- // taken.
- Assert(!F->isIntrinsic() ||
- (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)),
+ // taken. Ignore cases where the address of the intrinsic function is used
+ // as the argument of operand bundle "clang.arc.attachedcall" as those
+ // cases are handled in verifyAttachedCallBundle.
+ Assert((!F->isIntrinsic() ||
+ (CBI && &CBI->getCalledOperandUse() == &I.getOperandUse(i)) ||
+ IsAttachedCallOperand(F, CBI, i)),
"Cannot take the address of an intrinsic!", &I);
Assert(
!F->isIntrinsic() || isa<CallInst>(I) ||
@@ -4420,9 +4502,10 @@ void Verifier::visitInstruction(Instruction &I) {
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint ||
- F->getIntrinsicID() == Intrinsic::wasm_rethrow,
+ F->getIntrinsicID() == Intrinsic::wasm_rethrow ||
+ IsAttachedCallOperand(F, CBI, i),
"Cannot invoke an intrinsic other than donothing, patchpoint, "
- "statepoint, coro_resume or coro_destroy",
+ "statepoint, coro_resume, coro_destroy or clang.arc.attachedcall",
&I);
Assert(F->getParent() == &M, "Referencing function in another module!",
&I, &M, F, F->getParent());
@@ -4471,6 +4554,11 @@ void Verifier::visitInstruction(Instruction &I) {
visitRangeMetadata(I, Range, I.getType());
}
+ if (I.hasMetadata(LLVMContext::MD_invariant_group)) {
+ Assert(isa<LoadInst>(I) || isa<StoreInst>(I),
+ "invariant.group metadata is only for loads and stores", &I);
+ }
+
if (I.getMetadata(LLVMContext::MD_nonnull)) {
Assert(I.getType()->isPointerTy(), "nonnull applies only to pointer types",
&I);
@@ -4489,6 +4577,11 @@ void Verifier::visitInstruction(Instruction &I) {
if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa))
TBAAVerifyHelper.visitTBAAMetadata(I, TBAA);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_noalias))
+ visitAliasScopeListMetadata(MD);
+ if (MDNode *MD = I.getMetadata(LLVMContext::MD_alias_scope))
+ visitAliasScopeListMetadata(MD);
+
if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) {
Assert(I.getType()->isPointerTy(), "align applies only to pointer types",
&I);
@@ -4599,33 +4692,34 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
for (auto &Elem : Call.bundle_op_infos()) {
Assert(Elem.Tag->getKey() == "ignore" ||
Attribute::isExistingAttribute(Elem.Tag->getKey()),
- "tags must be valid attribute names");
+ "tags must be valid attribute names", Call);
Attribute::AttrKind Kind =
Attribute::getAttrKindFromName(Elem.Tag->getKey());
unsigned ArgCount = Elem.End - Elem.Begin;
if (Kind == Attribute::Alignment) {
Assert(ArgCount <= 3 && ArgCount >= 2,
- "alignment assumptions should have 2 or 3 arguments");
+ "alignment assumptions should have 2 or 3 arguments", Call);
Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
- "first argument should be a pointer");
+ "first argument should be a pointer", Call);
Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
- "second argument should be an integer");
+ "second argument should be an integer", Call);
if (ArgCount == 3)
Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
- "third argument should be an integer if present");
+ "third argument should be an integer if present", Call);
return;
}
- Assert(ArgCount <= 2, "to many arguments");
+ Assert(ArgCount <= 2, "too many arguments", Call);
if (Kind == Attribute::None)
break;
if (Attribute::isIntAttrKind(Kind)) {
- Assert(ArgCount == 2, "this attribute should have 2 arguments");
+ Assert(ArgCount == 2, "this attribute should have 2 arguments", Call);
Assert(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
- "the second argument should be a constant integral value");
+ "the second argument should be a constant integral value", Call);
} else if (Attribute::canUseAsParamAttr(Kind)) {
- Assert((ArgCount) == 1, "this attribute should have one argument");
+ Assert((ArgCount) == 1, "this attribute should have one argument",
+ Call);
} else if (Attribute::canUseAsFnAttr(Kind)) {
- Assert((ArgCount) == 0, "this attribute has no argument");
+ Assert((ArgCount) == 0, "this attribute has no argument", Call);
}
}
break;
@@ -4736,7 +4830,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"llvm.call.preallocated.setup");
FoundCall = true;
size_t NumPreallocatedArgs = 0;
- for (unsigned i = 0; i < UseCall->getNumArgOperands(); i++) {
+ for (unsigned i = 0; i < UseCall->arg_size(); i++) {
if (UseCall->paramHasAttr(i, Attribute::Preallocated)) {
++NumPreallocatedArgs;
}
@@ -4834,7 +4928,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Assert(AI && AI->isStaticAlloca(),
"llvm.localescape only accepts static allocas", Call);
}
- FrameEscapeInfo[BB->getParent()].first = Call.getNumArgOperands();
+ FrameEscapeInfo[BB->getParent()].first = Call.arg_size();
SawFrameEscape = true;
break;
}
@@ -4883,7 +4977,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
break;
}
case Intrinsic::experimental_gc_relocate: {
- Assert(Call.getNumArgOperands() == 3, "wrong number of arguments", Call);
+ Assert(Call.arg_size() == 3, "wrong number of arguments", Call);
Assert(isa<PointerType>(Call.getType()->getScalarType()),
"gc.relocate must return a pointer or a vector of pointers", Call);
@@ -5017,14 +5111,14 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::masked_gather: {
const APInt &Alignment =
cast<ConstantInt>(Call.getArgOperand(1))->getValue();
- Assert(Alignment.isNullValue() || Alignment.isPowerOf2(),
+ Assert(Alignment.isZero() || Alignment.isPowerOf2(),
"masked_gather: alignment must be 0 or a power of 2", Call);
break;
}
case Intrinsic::masked_scatter: {
const APInt &Alignment =
cast<ConstantInt>(Call.getArgOperand(2))->getValue();
- Assert(Alignment.isNullValue() || Alignment.isPowerOf2(),
+ Assert(Alignment.isZero() || Alignment.isPowerOf2(),
"masked_scatter: alignment must be 0 or a power of 2", Call);
break;
}
@@ -5340,7 +5434,7 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
// Compare intrinsics carry an extra predicate metadata operand.
if (isa<ConstrainedFPCmpIntrinsic>(FPI))
NumOperands += 1;
- Assert((FPI.getNumArgOperands() == NumOperands),
+ Assert((FPI.arg_size() == NumOperands),
"invalid arguments for constrained FP intrinsic", &FPI);
switch (FPI.getIntrinsicID()) {
@@ -5643,6 +5737,41 @@ void Verifier::verifyDeoptimizeCallingConvs() {
}
}
+void Verifier::verifyAttachedCallBundle(const CallBase &Call,
+ const OperandBundleUse &BU) {
+ FunctionType *FTy = Call.getFunctionType();
+
+ Assert((FTy->getReturnType()->isPointerTy() ||
+ (Call.doesNotReturn() && FTy->getReturnType()->isVoidTy())),
+ "a call with operand bundle \"clang.arc.attachedcall\" must call a "
+ "function returning a pointer or a non-returning function that has a "
+ "void return type",
+ Call);
+
+ Assert((BU.Inputs.empty() ||
+ (BU.Inputs.size() == 1 && isa<Function>(BU.Inputs.front()))),
+ "operand bundle \"clang.arc.attachedcall\" can take either no "
+ "arguments or one function as an argument",
+ Call);
+
+ if (BU.Inputs.empty())
+ return;
+
+ auto *Fn = cast<Function>(BU.Inputs.front());
+ Intrinsic::ID IID = Fn->getIntrinsicID();
+
+ if (IID) {
+ Assert((IID == Intrinsic::objc_retainAutoreleasedReturnValue ||
+ IID == Intrinsic::objc_unsafeClaimAutoreleasedReturnValue),
+ "invalid function argument", Call);
+ } else {
+ StringRef FnName = Fn->getName();
+ Assert((FnName == "objc_retainAutoreleasedReturnValue" ||
+ FnName == "objc_unsafeClaimAutoreleasedReturnValue"),
+ "invalid function argument", Call);
+ }
+}
+
void Verifier::verifySourceDebugInfo(const DICompileUnit &U, const DIFile &F) {
bool HasSource = F.getSource().hasValue();
if (!HasSourceDebugInfo.count(&U))
@@ -5671,6 +5800,7 @@ void Verifier::verifyNoAliasScopeDecl() {
II);
Assert(ScopeListMD->getNumOperands() == 1,
"!id.scope.list must point to a list with a single scope", II);
+ visitAliasScopeListMetadata(ScopeListMD);
}
// Only check the domination rule when requested. Once all passes have been
@@ -6036,11 +6166,7 @@ static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
// In the new format type nodes shall have a reference to the parent type as
// its first operand.
- MDNode *Parent = dyn_cast_or_null<MDNode>(Type->getOperand(0));
- if (!Parent)
- return false;
-
- return true;
+ return isa_and_nonnull<MDNode>(Type->getOperand(0));
}
bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
diff --git a/llvm/lib/InterfaceStub/ELFObjHandler.cpp b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
index 112c1cea354a..d41c7d3217d7 100644
--- a/llvm/lib/InterfaceStub/ELFObjHandler.cpp
+++ b/llvm/lib/InterfaceStub/ELFObjHandler.cpp
@@ -367,7 +367,7 @@ Error appendToError(Error Err, StringRef After) {
Stream << Err;
Stream << " " << After;
consumeError(std::move(Err));
- return createError(Stream.str().c_str());
+ return createError(Stream.str());
}
/// This function populates a DynamicEntries struct using an ELFT::DynRange.
diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp
index d3d351fa2ed4..e6bf09232ce2 100644
--- a/llvm/lib/InterfaceStub/IFSHandler.cpp
+++ b/llvm/lib/InterfaceStub/IFSHandler.cpp
@@ -163,7 +163,7 @@ bool usesTriple(StringRef Buf) {
for (line_iterator I(MemoryBufferRef(Buf, "ELFStub")); !I.is_at_eof(); ++I) {
StringRef Line = (*I).trim();
if (Line.startswith("Target:")) {
- if (Line == "Target:" || (Line.find("{") != Line.npos)) {
+ if (Line == "Target:" || Line.contains("{")) {
return false;
}
}
@@ -327,3 +327,13 @@ void ifs::stripIFSTarget(IFSStub &Stub, bool StripTriple, bool StripArch,
Stub.Target.ObjectFormat.reset();
}
}
+
+void ifs::stripIFSUndefinedSymbols(IFSStub &Stub) {
+ for (auto Iter = Stub.Symbols.begin(); Iter != Stub.Symbols.end();) {
+ if (Iter->Undefined) {
+ Iter = Stub.Symbols.erase(Iter);
+ } else {
+ Iter++;
+ }
+ }
+}
diff --git a/llvm/lib/InterfaceStub/IFSStub.cpp b/llvm/lib/InterfaceStub/IFSStub.cpp
index bbc91ada1ded..008263f8db9f 100644
--- a/llvm/lib/InterfaceStub/IFSStub.cpp
+++ b/llvm/lib/InterfaceStub/IFSStub.cpp
@@ -29,7 +29,7 @@ IFSStub::IFSStub(IFSStub &&Stub) {
Symbols = std::move(Stub.Symbols);
}
-IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) {
+IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) : IFSStub() {
IfsVersion = Stub.IfsVersion;
Target = Stub.Target;
SoName = Stub.SoName;
@@ -37,7 +37,7 @@ IFSStubTriple::IFSStubTriple(IFSStubTriple const &Stub) {
Symbols = Stub.Symbols;
}
-IFSStubTriple::IFSStubTriple(IFSStub const &Stub) {
+IFSStubTriple::IFSStubTriple(IFSStub const &Stub) : IFSStub() {
IfsVersion = Stub.IfsVersion;
Target = Stub.Target;
SoName = Stub.SoName;
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 69d500ba9bce..6ce2ed265739 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/LTO/LTO.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -32,6 +33,7 @@
#include "llvm/LTO/LTOBackend.h"
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/Linker/IRMover.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
@@ -41,7 +43,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/TimeProfiler.h"
@@ -536,12 +537,12 @@ void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
auto *ResI = Res.begin();
auto *ResE = Res.end();
(void)ResE;
+ const Triple TT(RegularLTO.CombinedModule->getTargetTriple());
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
StringRef Name = Sym.getName();
- Triple TT(RegularLTO.CombinedModule->getTargetTriple());
// Strip the __imp_ prefix from COFF dllimport symbols (similar to the
// way they are handled by lld), otherwise we can end up with two
// global resolutions (one with and one for a copy of the symbol without).
@@ -732,7 +733,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
DenseSet<GlobalObject *> AliasedGlobals;
for (auto &GA : M.aliases())
- if (GlobalObject *GO = GA.getBaseObject())
+ if (GlobalObject *GO = GA.getAliaseeObject())
AliasedGlobals.insert(GO);
// In this function we need IR GlobalValues matching the symbols in Syms
@@ -856,10 +857,14 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
for (GlobalValue *GV : Mod.Keep) {
if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) {
if (Function *F = dyn_cast<Function>(GV)) {
- OptimizationRemarkEmitter ORE(F, nullptr);
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
- << ore::NV("Function", F)
- << " not added to the combined module ");
+ if (DiagnosticOutputFile) {
+ if (Error Err = F->materialize())
+ return Err;
+ OptimizationRemarkEmitter ORE(F, nullptr);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "deadfunction", F)
+ << ore::NV("Function", F)
+ << " not added to the combined module ");
+ }
}
continue;
}
@@ -992,7 +997,7 @@ Error LTO::checkPartiallySplit() {
return Error::success();
}
-Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
+Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
// Compute "dead" symbols, we don't want to import/export these!
DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
DenseMap<GlobalValue::GUID, PrevailingType> GUIDPrevailingResolutions;
@@ -1048,6 +1053,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
Conf.RemarksHotnessThreshold);
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
+ DiagnosticOutputFile = std::move(*DiagFileOrErr);
// Finalize linking of regular LTO modules containing summaries now that
// we have computed liveness information.
@@ -1136,7 +1142,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
return Err;
}
- return finalizeOptimizationRemarks(std::move(*DiagFileOrErr));
+ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
}
static const char *libcallRoutineNames[] = {
@@ -1177,7 +1183,7 @@ namespace {
class InProcessThinBackend : public ThinBackendProc {
ThreadPool BackendThreadPool;
AddStreamFn AddStream;
- NativeObjectCache Cache;
+ FileCache Cache;
std::set<GlobalValue::GUID> CfiFunctionDefs;
std::set<GlobalValue::GUID> CfiFunctionDecls;
@@ -1189,7 +1195,7 @@ public:
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
ThreadPoolStrategy ThinLTOParallelism,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache)
+ AddStreamFn AddStream, FileCache Cache)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
BackendThreadPool(ThinLTOParallelism), AddStream(std::move(AddStream)),
Cache(std::move(Cache)) {
@@ -1202,8 +1208,8 @@ public:
}
Error runThinLTOBackendThread(
- AddStreamFn AddStream, NativeObjectCache Cache, unsigned Task,
- BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
+ AddStreamFn AddStream, FileCache Cache, unsigned Task, BitcodeModule BM,
+ ModuleSummaryIndex &CombinedIndex,
const FunctionImporter::ImportMapTy &ImportList,
const FunctionImporter::ExportSetTy &ExportList,
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
@@ -1233,7 +1239,11 @@ public:
computeLTOCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList,
ExportList, ResolvedODR, DefinedGlobals, CfiFunctionDefs,
CfiFunctionDecls);
- if (AddStreamFn CacheAddStream = Cache(Task, Key))
+ Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, Key);
+ if (Error Err = CacheAddStreamOrErr.takeError())
+ return Err;
+ AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;
+ if (CacheAddStream)
return RunThinBackend(CacheAddStream);
return Error::success();
@@ -1295,7 +1305,7 @@ public:
ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism) {
return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache) {
+ AddStreamFn AddStream, FileCache Cache) {
return std::make_unique<InProcessThinBackend>(
Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, AddStream,
Cache);
@@ -1389,15 +1399,20 @@ ThinBackend lto::createWriteIndexesThinBackend(
raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) {
return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- AddStreamFn AddStream, NativeObjectCache Cache) {
+ AddStreamFn AddStream, FileCache Cache) {
return std::make_unique<WriteIndexesThinBackend>(
Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix,
ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite);
};
}
-Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ timeTraceProfilerBegin("ThinLink", StringRef(""));
+ auto TimeTraceScopeExit = llvm::make_scope_exit([]() {
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+ });
if (ThinLTO.ModuleMap.empty())
return Error::success();
@@ -1510,8 +1525,15 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
thinLTOResolvePrevailingInIndex(Conf, ThinLTO.CombinedIndex, isPrevailing,
recordNewLinkage, GUIDPreservedSymbols);
+ thinLTOPropagateFunctionAttrs(ThinLTO.CombinedIndex, isPrevailing);
+
generateParamAccessSummary(ThinLTO.CombinedIndex);
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+
+ TimeTraceScopeExit.release();
+
std::unique_ptr<ThinBackendProc> BackendProc =
ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
AddStream, Cache);
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 4e4ba4f3a58e..be06556b0c3b 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -27,6 +27,7 @@
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTO.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
@@ -37,7 +38,6 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -74,7 +74,11 @@ static cl::opt<bool> ThinLTOAssumeMerged(
cl::desc("Assume the input has already undergone ThinLTO function "
"importing and the other pre-optimization pipeline changes."));
-LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
+namespace llvm {
+extern cl::opt<bool> NoPGOWarnMismatch;
+}
+
+[[noreturn]] static void reportOpenError(StringRef Path, Twine Msg) {
errs() << "failed to open " << Path << ": " << Msg << '\n';
errs().flush();
exit(1);
@@ -221,10 +225,13 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
PGOOptions::IRUse, PGOOptions::CSIRUse,
Conf.AddFSDiscriminator);
+ NoPGOWarnMismatch = !Conf.PGOWarnMismatch;
} else if (Conf.AddFSDiscriminator) {
PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
PGOOptions::NoCSAction, true);
}
+ if (TM)
+ TM->setPGOOption(PGOOpt);
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
@@ -244,18 +251,16 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
TLII->disableAllFunctions();
FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
- AAManager AA;
// Parse a custom AA pipeline if asked to.
if (!Conf.AAPipeline.empty()) {
+ AAManager AA;
if (auto Err = PB.parseAAPipeline(AA, Conf.AAPipeline)) {
- report_fatal_error("unable to parse AA pipeline description '" +
+ report_fatal_error(Twine("unable to parse AA pipeline description '") +
Conf.AAPipeline + "': " + toString(std::move(Err)));
}
- } else {
- AA = PB.buildDefaultAAPipeline();
+ // Register the AA manager first so that our version is the one used.
+ FAM.registerPass([&] { return std::move(AA); });
}
- // Register the AA manager first so that our version is the one used.
- FAM.registerPass([&] { return std::move(AA); });
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
@@ -269,29 +274,29 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
if (!Conf.DisableVerify)
MPM.addPass(VerifierPass());
- PassBuilder::OptimizationLevel OL;
+ OptimizationLevel OL;
switch (OptLevel) {
default:
llvm_unreachable("Invalid optimization level");
case 0:
- OL = PassBuilder::OptimizationLevel::O0;
+ OL = OptimizationLevel::O0;
break;
case 1:
- OL = PassBuilder::OptimizationLevel::O1;
+ OL = OptimizationLevel::O1;
break;
case 2:
- OL = PassBuilder::OptimizationLevel::O2;
+ OL = OptimizationLevel::O2;
break;
case 3:
- OL = PassBuilder::OptimizationLevel::O3;
+ OL = OptimizationLevel::O3;
break;
}
// Parse a custom pipeline if asked to.
if (!Conf.OptPipeline.empty()) {
if (auto Err = PB.parsePassPipeline(MPM, Conf.OptPipeline)) {
- report_fatal_error("unable to parse pass pipeline description '" +
+ report_fatal_error(Twine("unable to parse pass pipeline description '") +
Conf.OptPipeline + "': " + toString(std::move(Err)));
}
} else if (IsThinLTO) {
@@ -387,8 +392,8 @@ static void codegen(const Config &Conf, TargetMachine *TM,
if (!Conf.DwoDir.empty()) {
std::error_code EC;
if (auto EC = llvm::sys::fs::create_directories(Conf.DwoDir))
- report_fatal_error("Failed to create directory " + Conf.DwoDir + ": " +
- EC.message());
+ report_fatal_error(Twine("Failed to create directory ") + Conf.DwoDir +
+ ": " + EC.message());
DwoFile = Conf.DwoDir;
sys::path::append(DwoFile, std::to_string(Task) + ".dwo");
@@ -400,10 +405,14 @@ static void codegen(const Config &Conf, TargetMachine *TM,
std::error_code EC;
DwoOut = std::make_unique<ToolOutputFile>(DwoFile, EC, sys::fs::OF_None);
if (EC)
- report_fatal_error("Failed to open " + DwoFile + ": " + EC.message());
+ report_fatal_error(Twine("Failed to open ") + DwoFile + ": " +
+ EC.message());
}
- auto Stream = AddStream(Task);
+ Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = AddStream(Task);
+ if (Error Err = StreamOrErr.takeError())
+ report_fatal_error(std::move(Err));
+ std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr;
legacy::PassManager CodeGenPasses;
CodeGenPasses.add(
createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex));
@@ -599,7 +608,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
dropDeadSymbols(Mod, DefinedGlobals, CombinedIndex);
- thinLTOResolvePrevailingInModule(Mod, DefinedGlobals);
+ thinLTOFinalizeInModule(Mod, DefinedGlobals, /*PropagateAttrs=*/true);
if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index 7bffcbf01b03..088e45c9e8dc 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -44,13 +44,13 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Signals.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
@@ -245,8 +245,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
// make unique temp output file to put generated code
SmallString<128> Filename;
- auto AddStream =
- [&](size_t Task) -> std::unique_ptr<lto::NativeObjectStream> {
+ auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
StringRef Extension(Config.CGFileType == CGFT_AssemblyFile ? "s" : "o");
int FD;
@@ -255,7 +254,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
if (EC)
emitError(EC.message());
- return std::make_unique<lto::NativeObjectStream>(
+ return std::make_unique<CachedFileStream>(
std::make_unique<llvm::raw_fd_ostream>(FD, true));
};
@@ -557,7 +556,7 @@ bool LTOCodeGenerator::optimize() {
return true;
}
-bool LTOCodeGenerator::compileOptimized(lto::AddStreamFn AddStream,
+bool LTOCodeGenerator::compileOptimized(AddStreamFn AddStream,
unsigned ParallelismLevel) {
if (!this->determineTarget())
return false;
diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp
index 155790041a75..4cc1b307c553 100644
--- a/llvm/lib/LTO/LTOModule.cpp
+++ b/llvm/lib/LTO/LTOModule.cpp
@@ -27,6 +27,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
@@ -35,7 +36,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -688,3 +688,16 @@ Expected<uint32_t> LTOModule::getMachOCPUType() const {
Expected<uint32_t> LTOModule::getMachOCPUSubType() const {
return MachO::getCPUSubType(Triple(Mod->getTargetTriple()));
}
+
+bool LTOModule::hasCtorDtor() const {
+ for (auto Sym : SymTab.symbols()) {
+ if (auto *GV = Sym.dyn_cast<GlobalValue *>()) {
+ StringRef Name = GV->getName();
+ if (Name.consume_front("llvm.global_")) {
+ if (Name.equals("ctors") || Name.equals("dtors"))
+ return true;
+ }
+ }
+ }
+ return false;
+}
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 8f0fa933a6a1..9474d8c9dafb 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -14,6 +14,7 @@
#include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -37,6 +38,7 @@
#include "llvm/LTO/LTO.h"
#include "llvm/LTO/SummaryBasedOptimizations.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/StandardInstrumentations.h"
@@ -48,12 +50,12 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -289,11 +291,6 @@ static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM,
TLII->disableAllFunctions();
FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
- AAManager AA = PB.buildDefaultAAPipeline();
-
- // Register the AA manager first so that our version is the one used.
- FAM.registerPass([&] { return std::move(AA); });
-
// Register all the basic analyses with the managers.
PB.registerModuleAnalyses(MAM);
PB.registerCGSCCAnalyses(CGAM);
@@ -303,22 +300,22 @@ static void optimizeModuleNewPM(Module &TheModule, TargetMachine &TM,
ModulePassManager MPM;
- PassBuilder::OptimizationLevel OL;
+ OptimizationLevel OL;
switch (OptLevel) {
default:
llvm_unreachable("Invalid optimization level");
case 0:
- OL = PassBuilder::OptimizationLevel::O0;
+ OL = OptimizationLevel::O0;
break;
case 1:
- OL = PassBuilder::OptimizationLevel::O1;
+ OL = OptimizationLevel::O1;
break;
case 2:
- OL = PassBuilder::OptimizationLevel::O2;
+ OL = OptimizationLevel::O2;
break;
case 3:
- OL = PassBuilder::OptimizationLevel::O3;
+ OL = OptimizationLevel::O3;
break;
}
@@ -503,7 +500,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
promoteModule(TheModule, Index, ClearDSOLocalOnDeclarations);
// Apply summary-based prevailing-symbol resolution decisions.
- thinLTOResolvePrevailingInModule(TheModule, DefinedGlobals);
+ thinLTOFinalizeInModule(TheModule, DefinedGlobals, /*PropagateAttrs=*/true);
// Save temps: after promotion.
saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
@@ -607,7 +604,7 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
auto InputOrError = lto::InputFile::create(Buffer);
if (!InputOrError)
- report_fatal_error("ThinLTO cannot create input file: " +
+ report_fatal_error(Twine("ThinLTO cannot create input file: ") +
toString(InputOrError.takeError()));
auto TripleStr = (*InputOrError)->getTargetTriple();
@@ -642,7 +639,7 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
const Target *TheTarget =
TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
if (!TheTarget) {
- report_fatal_error("Can't load target for this Triple: " + ErrMsg);
+ report_fatal_error(Twine("Can't load target for this Triple: ") + ErrMsg);
}
// Use MAttr as the default set of features.
@@ -762,8 +759,9 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
PrevailingCopy);
- thinLTOResolvePrevailingInModule(
- TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
+ thinLTOFinalizeInModule(TheModule,
+ ModuleToDefinedGVSummaries[ModuleIdentifier],
+ /*PropagateAttrs=*/false);
// Promote the exported values in the index, so that they are promoted
// in the module.
@@ -937,8 +935,9 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
promoteModule(TheModule, Index, /*ClearDSOLocalOnDeclarations=*/false);
// Internalization
- thinLTOResolvePrevailingInModule(
- TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
+ thinLTOFinalizeInModule(TheModule,
+ ModuleToDefinedGVSummaries[ModuleIdentifier],
+ /*PropagateAttrs=*/false);
thinLTOInternalizeModule(TheModule,
ModuleToDefinedGVSummaries[ModuleIdentifier]);
@@ -989,13 +988,18 @@ ThinLTOCodeGenerator::writeGeneratedObject(int count, StringRef CacheEntryPath,
std::error_code Err;
raw_fd_ostream OS(OutputPath, Err, sys::fs::OF_None);
if (Err)
- report_fatal_error("Can't open output '" + OutputPath + "'\n");
+ report_fatal_error(Twine("Can't open output '") + OutputPath + "'\n");
OS << OutputBuffer.getBuffer();
return std::string(OutputPath.str());
}
// Main entry point for the ThinLTO processing
void ThinLTOCodeGenerator::run() {
+ timeTraceProfilerBegin("ThinLink", StringRef(""));
+ auto TimeTraceScopeExit = llvm::make_scope_exit([]() {
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+ });
// Prepare the resulting object vector
assert(ProducedBinaries.empty() && "The generator should not be reused");
if (SavedObjectsDirectoryPath.empty())
@@ -1005,7 +1009,7 @@ void ThinLTOCodeGenerator::run() {
bool IsDir;
sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
if (!IsDir)
- report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
+ report_fatal_error(Twine("Unexistent dir: '") + SavedObjectsDirectoryPath + "'");
ProducedBinaryFiles.resize(Modules.size());
}
@@ -1124,6 +1128,8 @@ void ThinLTOCodeGenerator::run() {
*Index, IsExported(ExportLists, GUIDPreservedSymbols),
IsPrevailing(PrevailingCopy));
+ thinLTOPropagateFunctionAttrs(*Index, IsPrevailing(PrevailingCopy));
+
// Make sure that every module has an entry in the ExportLists, ImportList,
// GVSummary and ResolvedODR maps to enable threaded access to these maps
// below.
@@ -1141,6 +1147,11 @@ void ThinLTOCodeGenerator::run() {
ModulesVec.push_back(&Mod->getSingleBitcodeModule());
std::vector<int> ModulesOrdering = lto::generateModulesOrdering(ModulesVec);
+ if (llvm::timeTraceProfilerEnabled())
+ llvm::timeTraceProfilerEnd();
+
+ TimeTraceScopeExit.release();
+
// Parallel optimizer + codegen
{
ThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount));
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 7bc6f0585921..bad483be197d 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/TypeFinder.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <utility>
using namespace llvm;
@@ -491,8 +492,8 @@ class IRLinker {
void linkGlobalVariable(GlobalVariable &Dst, GlobalVariable &Src);
Error linkFunctionBody(Function &Dst, Function &Src);
- void linkIndirectSymbolBody(GlobalIndirectSymbol &Dst,
- GlobalIndirectSymbol &Src);
+ void linkAliasAliasee(GlobalAlias &Dst, GlobalAlias &Src);
+ void linkIFuncResolver(GlobalIFunc &Dst, GlobalIFunc &Src);
Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
/// Replace all types in the source AttributeList with the
@@ -503,7 +504,7 @@ class IRLinker {
/// into the destination module.
GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
Function *copyFunctionProto(const Function *SF);
- GlobalValue *copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS);
+ GlobalValue *copyIndirectSymbolProto(const GlobalValue *SGV);
/// Perform "replace all uses with" operations. These work items need to be
/// performed as part of materialization, but we postpone them to happen after
@@ -605,10 +606,14 @@ Value *IRLinker::materialize(Value *V, bool ForIndirectSymbol) {
} else if (auto *V = dyn_cast<GlobalVariable>(New)) {
if (V->hasInitializer() || V->hasAppendingLinkage())
return New;
- } else {
- auto *IS = cast<GlobalIndirectSymbol>(New);
- if (IS->getIndirectSymbol())
+ } else if (auto *GA = dyn_cast<GlobalAlias>(New)) {
+ if (GA->getAliasee())
+ return New;
+ } else if (auto *GI = dyn_cast<GlobalIFunc>(New)) {
+ if (GI->getResolver())
return New;
+ } else {
+ llvm_unreachable("Invalid GlobalValue type");
}
// If the global is being linked for an indirect symbol, it may have already
@@ -648,12 +653,14 @@ GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
AttributeList IRLinker::mapAttributeTypes(LLVMContext &C, AttributeList Attrs) {
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- for (Attribute::AttrKind TypedAttr :
- {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef,
- Attribute::InAlloca}) {
- if (Attrs.hasAttribute(i, TypedAttr)) {
- if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
- Attrs = Attrs.replaceAttributeType(C, i, TypedAttr, TypeMap.get(Ty));
+ for (int AttrIdx = Attribute::FirstTypeAttr;
+ AttrIdx <= Attribute::LastTypeAttr; AttrIdx++) {
+ Attribute::AttrKind TypedAttr = (Attribute::AttrKind)AttrIdx;
+ if (Attrs.hasAttributeAtIndex(i, TypedAttr)) {
+ if (Type *Ty =
+ Attrs.getAttributeAtIndex(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeTypeAtIndex(C, i, TypedAttr,
+ TypeMap.get(Ty));
break;
}
}
@@ -677,22 +684,28 @@ Function *IRLinker::copyFunctionProto(const Function *SF) {
/// Set up prototypes for any indirect symbols that come over from the source
/// module.
-GlobalValue *
-IRLinker::copyGlobalIndirectSymbolProto(const GlobalIndirectSymbol *SGIS) {
+GlobalValue *IRLinker::copyIndirectSymbolProto(const GlobalValue *SGV) {
// If there is no linkage to be performed or we're linking from the source,
// bring over SGA.
- auto *Ty = TypeMap.get(SGIS->getValueType());
- GlobalIndirectSymbol *GIS;
- if (isa<GlobalAlias>(SGIS))
- GIS = GlobalAlias::create(Ty, SGIS->getAddressSpace(),
- GlobalValue::ExternalLinkage, SGIS->getName(),
- &DstM);
- else
- GIS = GlobalIFunc::create(Ty, SGIS->getAddressSpace(),
- GlobalValue::ExternalLinkage, SGIS->getName(),
- nullptr, &DstM);
- GIS->copyAttributesFrom(SGIS);
- return GIS;
+ auto *Ty = TypeMap.get(SGV->getValueType());
+
+ if (auto *GA = dyn_cast<GlobalAlias>(SGV)) {
+ auto *DGA = GlobalAlias::create(Ty, SGV->getAddressSpace(),
+ GlobalValue::ExternalLinkage,
+ SGV->getName(), &DstM);
+ DGA->copyAttributesFrom(GA);
+ return DGA;
+ }
+
+ if (auto *GI = dyn_cast<GlobalIFunc>(SGV)) {
+ auto *DGI = GlobalIFunc::create(Ty, SGV->getAddressSpace(),
+ GlobalValue::ExternalLinkage,
+ SGV->getName(), nullptr, &DstM);
+ DGI->copyAttributesFrom(GI);
+ return DGI;
+ }
+
+ llvm_unreachable("Invalid source global value type");
}
GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
@@ -704,7 +717,7 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
NewGV = copyFunctionProto(SF);
} else {
if (ForDefinition)
- NewGV = copyGlobalIndirectSymbolProto(cast<GlobalIndirectSymbol>(SGV));
+ NewGV = copyIndirectSymbolProto(SGV);
else if (SGV->getValueType()->isFunctionTy())
NewGV =
Function::Create(cast<FunctionType>(TypeMap.get(SGV->getValueType())),
@@ -1108,10 +1121,12 @@ Error IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
return Error::success();
}
-void IRLinker::linkIndirectSymbolBody(GlobalIndirectSymbol &Dst,
- GlobalIndirectSymbol &Src) {
- Mapper.scheduleMapGlobalIndirectSymbol(Dst, *Src.getIndirectSymbol(),
- IndirectSymbolMCID);
+void IRLinker::linkAliasAliasee(GlobalAlias &Dst, GlobalAlias &Src) {
+ Mapper.scheduleMapGlobalAlias(Dst, *Src.getAliasee(), IndirectSymbolMCID);
+}
+
+void IRLinker::linkIFuncResolver(GlobalIFunc &Dst, GlobalIFunc &Src) {
+ Mapper.scheduleMapGlobalIFunc(Dst, *Src.getResolver(), IndirectSymbolMCID);
}
Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
@@ -1121,7 +1136,11 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
linkGlobalVariable(cast<GlobalVariable>(Dst), *GVar);
return Error::success();
}
- linkIndirectSymbolBody(cast<GlobalIndirectSymbol>(Dst), cast<GlobalIndirectSymbol>(Src));
+ if (auto *GA = dyn_cast<GlobalAlias>(&Src)) {
+ linkAliasAliasee(cast<GlobalAlias>(Dst), *GA);
+ return Error::success();
+ }
+ linkIFuncResolver(cast<GlobalIFunc>(Dst), cast<GlobalIFunc>(Src));
return Error::success();
}
@@ -1443,7 +1462,39 @@ Error IRLinker::run() {
if (DstM.getDataLayout().isDefault())
DstM.setDataLayout(SrcM->getDataLayout());
- if (SrcM->getDataLayout() != DstM.getDataLayout()) {
+ // Copy the target triple from the source to dest if the dest's is empty.
+ if (DstM.getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+ DstM.setTargetTriple(SrcM->getTargetTriple());
+
+ Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM.getTargetTriple());
+
+ // During CUDA compilation we have to link with the bitcode supplied with
+ // CUDA. libdevice bitcode either has no data layout set (pre-CUDA-11), or has
+ // the layout that is different from the one used by LLVM/clang (it does not
+ // include i128). Issuing a warning is not very helpful as there's not much
+ // the user can do about it.
+ bool EnableDLWarning = true;
+ bool EnableTripleWarning = true;
+ if (SrcTriple.isNVPTX() && DstTriple.isNVPTX()) {
+ std::string ModuleId = SrcM->getModuleIdentifier();
+ StringRef FileName = llvm::sys::path::filename(ModuleId);
+ bool SrcIsLibDevice =
+ FileName.startswith("libdevice") && FileName.endswith(".10.bc");
+ bool SrcHasLibDeviceDL =
+ (SrcM->getDataLayoutStr().empty() ||
+ SrcM->getDataLayoutStr() == "e-i64:64-v16:16-v32:32-n16:32:64");
+ // libdevice bitcode uses nvptx64-nvidia-gpulibs or just
+ // 'nvptx-unknown-unknown' triple (before CUDA-10.x) and is compatible with
+ // all NVPTX variants.
+ bool SrcHasLibDeviceTriple = (SrcTriple.getVendor() == Triple::NVIDIA &&
+ SrcTriple.getOSName() == "gpulibs") ||
+ (SrcTriple.getVendorName() == "unknown" &&
+ SrcTriple.getOSName() == "unknown");
+ EnableTripleWarning = !(SrcIsLibDevice && SrcHasLibDeviceTriple);
+ EnableDLWarning = !(SrcIsLibDevice && SrcHasLibDeviceDL);
+ }
+
+ if (EnableDLWarning && (SrcM->getDataLayout() != DstM.getDataLayout())) {
emitWarning("Linking two modules of different data layouts: '" +
SrcM->getModuleIdentifier() + "' is '" +
SrcM->getDataLayoutStr() + "' whereas '" +
@@ -1451,13 +1502,7 @@ Error IRLinker::run() {
DstM.getDataLayoutStr() + "'\n");
}
- // Copy the target triple from the source to dest if the dest's is empty.
- if (DstM.getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
- DstM.setTargetTriple(SrcM->getTargetTriple());
-
- Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM.getTargetTriple());
-
- if (!SrcM->getTargetTriple().empty()&&
+ if (EnableTripleWarning && !SrcM->getTargetTriple().empty() &&
!SrcTriple.isCompatibleWith(DstTriple))
emitWarning("Linking two modules of different target triples: '" +
SrcM->getModuleIdentifier() + "' is '" +
diff --git a/llvm/lib/Linker/LinkModules.cpp b/llvm/lib/Linker/LinkModules.cpp
index 97d6f8cd8075..f9f51bf17d95 100644
--- a/llvm/lib/Linker/LinkModules.cpp
+++ b/llvm/lib/Linker/LinkModules.cpp
@@ -24,6 +24,8 @@ using namespace llvm;
namespace {
+enum class LinkFrom { Dst, Src, Both };
+
/// This is an implementation class for the LinkModules function, which is the
/// entrypoint for this file.
class ModuleLinker {
@@ -67,11 +69,11 @@ class ModuleLinker {
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
- bool &LinkFromSrc);
- std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>>
+ LinkFrom &From);
+ DenseMap<const Comdat *, std::pair<Comdat::SelectionKind, LinkFrom>>
ComdatsChosen;
bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
- bool &LinkFromSrc);
+ LinkFrom &From);
// Keep track of the lazy linked global members of each comdat in source.
DenseMap<const Comdat *, std::vector<GlobalValue *>> LazyComdatMembers;
@@ -103,7 +105,7 @@ class ModuleLinker {
void dropReplacedComdat(GlobalValue &GV,
const DenseSet<const Comdat *> &ReplacedDstComdats);
- bool linkIfNeeded(GlobalValue &GV);
+ bool linkIfNeeded(GlobalValue &GV, SmallVectorImpl<GlobalValue *> &GVToClone);
public:
ModuleLinker(IRMover &Mover, std::unique_ptr<Module> SrcM, unsigned Flags,
@@ -114,7 +116,7 @@ public:
bool run();
};
-}
+} // namespace
static GlobalValue::VisibilityTypes
getMinVisibility(GlobalValue::VisibilityTypes A,
@@ -131,7 +133,7 @@ bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
const GlobalVariable *&GVar) {
const GlobalValue *GVal = M.getNamedValue(ComdatName);
if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
- GVal = GA->getBaseObject();
+ GVal = GA->getAliaseeObject();
if (!GVal)
// We cannot resolve the size of the aliasee yet.
return emitError("Linking COMDATs named '" + ComdatName +
@@ -151,7 +153,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
Comdat::SelectionKind Src,
Comdat::SelectionKind Dst,
Comdat::SelectionKind &Result,
- bool &LinkFromSrc) {
+ LinkFrom &From) {
Module &DstM = Mover.getModule();
// The ability to mix Comdat::SelectionKind::Any with
// Comdat::SelectionKind::Largest is a behavior that comes from COFF.
@@ -175,11 +177,11 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
switch (Result) {
case Comdat::SelectionKind::Any:
// Go with Dst.
- LinkFromSrc = false;
+ From = LinkFrom::Dst;
break;
case Comdat::SelectionKind::NoDeduplicate:
- return emitError("Linking COMDATs named '" + ComdatName +
- "': nodeduplicate has been violated!");
+ From = LinkFrom::Both;
+ break;
case Comdat::SelectionKind::ExactMatch:
case Comdat::SelectionKind::Largest:
case Comdat::SelectionKind::SameSize: {
@@ -197,14 +199,14 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
if (SrcGV->getInitializer() != DstGV->getInitializer())
return emitError("Linking COMDATs named '" + ComdatName +
"': ExactMatch violated!");
- LinkFromSrc = false;
+ From = LinkFrom::Dst;
} else if (Result == Comdat::SelectionKind::Largest) {
- LinkFromSrc = SrcSize > DstSize;
+ From = SrcSize > DstSize ? LinkFrom::Src : LinkFrom::Dst;
} else if (Result == Comdat::SelectionKind::SameSize) {
if (SrcSize != DstSize)
return emitError("Linking COMDATs named '" + ComdatName +
"': SameSize violated!");
- LinkFromSrc = false;
+ From = LinkFrom::Dst;
} else {
llvm_unreachable("unknown selection kind");
}
@@ -217,7 +219,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
bool ModuleLinker::getComdatResult(const Comdat *SrcC,
Comdat::SelectionKind &Result,
- bool &LinkFromSrc) {
+ LinkFrom &From) {
Module &DstM = Mover.getModule();
Comdat::SelectionKind SSK = SrcC->getSelectionKind();
StringRef ComdatName = SrcC->getName();
@@ -226,15 +228,14 @@ bool ModuleLinker::getComdatResult(const Comdat *SrcC,
if (DstCI == ComdatSymTab.end()) {
// Use the comdat if it is only available in one of the modules.
- LinkFromSrc = true;
+ From = LinkFrom::Src;
Result = SSK;
return false;
}
const Comdat *DstC = &DstCI->second;
Comdat::SelectionKind DSK = DstC->getSelectionKind();
- return computeResultingSelectionKind(ComdatName, SSK, DSK, Result,
- LinkFromSrc);
+ return computeResultingSelectionKind(ComdatName, SSK, DSK, Result, From);
}
bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
@@ -325,7 +326,8 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
"': symbol multiply defined!");
}
-bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
+bool ModuleLinker::linkIfNeeded(GlobalValue &GV,
+ SmallVectorImpl<GlobalValue *> &GVToClone) {
GlobalValue *DGV = getLinkedToGlobal(&GV);
if (shouldLinkOnlyNeeded()) {
@@ -377,17 +379,18 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
if (GV.isDeclaration())
return false;
+ LinkFrom ComdatFrom = LinkFrom::Dst;
if (const Comdat *SC = GV.getComdat()) {
- bool LinkFromSrc;
- Comdat::SelectionKind SK;
- std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
- if (!LinkFromSrc)
+ std::tie(std::ignore, ComdatFrom) = ComdatsChosen[SC];
+ if (ComdatFrom == LinkFrom::Dst)
return false;
}
bool LinkFromSrc = true;
if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
return true;
+ if (DGV && ComdatFrom == LinkFrom::Both)
+ GVToClone.push_back(LinkFromSrc ? DGV : &GV);
if (LinkFromSrc)
ValuesToLink.insert(&GV);
return false;
@@ -462,12 +465,12 @@ bool ModuleLinker::run() {
if (ComdatsChosen.count(&C))
continue;
Comdat::SelectionKind SK;
- bool LinkFromSrc;
- if (getComdatResult(&C, SK, LinkFromSrc))
+ LinkFrom From;
+ if (getComdatResult(&C, SK, From))
return true;
- ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
+ ComdatsChosen[&C] = std::make_pair(SK, From);
- if (!LinkFromSrc)
+ if (From != LinkFrom::Src)
continue;
Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
@@ -482,20 +485,14 @@ bool ModuleLinker::run() {
// Alias have to go first, since we are not able to find their comdats
// otherwise.
- for (auto I = DstM.alias_begin(), E = DstM.alias_end(); I != E;) {
- GlobalAlias &GV = *I++;
+ for (GlobalAlias &GV : llvm::make_early_inc_range(DstM.aliases()))
dropReplacedComdat(GV, ReplacedDstComdats);
- }
- for (auto I = DstM.global_begin(), E = DstM.global_end(); I != E;) {
- GlobalVariable &GV = *I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(DstM.globals()))
dropReplacedComdat(GV, ReplacedDstComdats);
- }
- for (auto I = DstM.begin(), E = DstM.end(); I != E;) {
- Function &GV = *I++;
+ for (Function &GV : llvm::make_early_inc_range(DstM))
dropReplacedComdat(GV, ReplacedDstComdats);
- }
for (GlobalVariable &GV : SrcM->globals())
if (GV.hasLinkOnceLinkage())
@@ -514,18 +511,45 @@ bool ModuleLinker::run() {
// Insert all of the globals in src into the DstM module... without linking
// initializers (which could refer to functions not yet mapped over).
+ SmallVector<GlobalValue *, 0> GVToClone;
for (GlobalVariable &GV : SrcM->globals())
- if (linkIfNeeded(GV))
+ if (linkIfNeeded(GV, GVToClone))
return true;
for (Function &SF : *SrcM)
- if (linkIfNeeded(SF))
+ if (linkIfNeeded(SF, GVToClone))
return true;
for (GlobalAlias &GA : SrcM->aliases())
- if (linkIfNeeded(GA))
+ if (linkIfNeeded(GA, GVToClone))
+ return true;
+
+ for (GlobalIFunc &GI : SrcM->ifuncs())
+ if (linkIfNeeded(GI, GVToClone))
return true;
+ // For a variable in a comdat nodeduplicate, its initializer should be
+ // preserved (its content may be implicitly used by other members) even if
+ // symbol resolution does not pick it. Clone it into an unnamed private
+ // variable.
+ for (GlobalValue *GV : GVToClone) {
+ if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
+ auto *NewVar = new GlobalVariable(*Var->getParent(), Var->getValueType(),
+ Var->isConstant(), Var->getLinkage(),
+ Var->getInitializer());
+ NewVar->copyAttributesFrom(Var);
+ NewVar->setVisibility(GlobalValue::DefaultVisibility);
+ NewVar->setLinkage(GlobalValue::PrivateLinkage);
+ NewVar->setDSOLocal(true);
+ NewVar->setComdat(Var->getComdat());
+ if (Var->getParent() != &Mover.getModule())
+ ValuesToLink.insert(NewVar);
+ } else {
+ emitError("linking '" + GV->getName() +
+ "': non-variables in comdat nodeduplicate are not handled");
+ }
+ }
+
for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
GlobalValue *GV = ValuesToLink[I];
const Comdat *SC = GV->getComdat();
diff --git a/llvm/lib/MC/ConstantPools.cpp b/llvm/lib/MC/ConstantPools.cpp
index d4199025ad77..d8a08a4bd439 100644
--- a/llvm/lib/MC/ConstantPools.cpp
+++ b/llvm/lib/MC/ConstantPools.cpp
@@ -28,7 +28,7 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) {
return;
Streamer.emitDataRegion(MCDR_DataRegion);
for (const ConstantPoolEntry &Entry : Entries) {
- Streamer.emitCodeAlignment(Entry.Size); // align naturally
+ Streamer.emitValueToAlignment(Entry.Size); // align naturally
Streamer.emitLabel(Entry.Label);
Streamer.emitValue(Entry.Value, Entry.Size, Entry.Loc);
}
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index e0ea44626b7f..883735fcc293 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -796,7 +796,7 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
else
EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
- unsigned Flags = 0;
+ unsigned Flags = ELF::SHF_INFO_LINK;
if (Sec.getFlags() & ELF::SHF_GROUP)
Flags = ELF::SHF_GROUP;
@@ -1311,6 +1311,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
case MCSymbolRefExpr::VK_GOT:
case MCSymbolRefExpr::VK_PLT:
case MCSymbolRefExpr::VK_GOTPCREL:
+ case MCSymbolRefExpr::VK_GOTPCREL_NORELAX:
case MCSymbolRefExpr::VK_PPC_GOT_LO:
case MCSymbolRefExpr::VK_PPC_GOT_HI:
case MCSymbolRefExpr::VK_PPC_GOT_HA:
diff --git a/llvm/lib/MC/MCAsmInfoGOFF.cpp b/llvm/lib/MC/MCAsmInfoGOFF.cpp
new file mode 100644
index 000000000000..81704ffe4b24
--- /dev/null
+++ b/llvm/lib/MC/MCAsmInfoGOFF.cpp
@@ -0,0 +1,27 @@
+//===- MCAsmInfoGOFF.cpp - MCGOFFAsmInfo properties -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines certain target specific asm properties for GOFF (z/OS)
+/// based targets.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoGOFF.h"
+
+using namespace llvm;
+
+void MCAsmInfoGOFF::anchor() {}
+
+MCAsmInfoGOFF::MCAsmInfoGOFF() {
+ Data64bitsDirective = "\t.quad\t";
+ HasDotTypeDotSizeDirective = false;
+ PrivateGlobalPrefix = "@@";
+ PrivateLabelPrefix = "@";
+ ZeroDirective = "\t.space\t";
+}
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 72f4ee3f33be..154b2d051f34 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -30,13 +30,13 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolXCOFF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cctype>
using namespace llvm;
@@ -245,7 +245,7 @@ public:
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0) override;
- void emitCodeAlignment(unsigned ByteAlignment,
+ void emitCodeAlignment(unsigned ByteAlignment, const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0) override;
void emitValueToOffset(const MCExpr *Offset,
@@ -1429,6 +1429,7 @@ void MCAsmStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
}
void MCAsmStreamer::emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {
// Emit with a text fill value.
emitValueToAlignment(ByteAlignment, MAI->getTextAlignFillValue(),
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 9ed8d1083a40..d5e9f4fc66bc 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -483,6 +483,7 @@ void MCAssembler::writeFragmentPadding(raw_ostream &OS,
"Writing bundle padding for a fragment without instructions");
unsigned TotalLength = BundlePadding + static_cast<unsigned>(FSize);
+ const MCSubtargetInfo *STI = EF.getSubtargetInfo();
if (EF.alignToBundleEnd() && TotalLength > getBundleAlignSize()) {
// If the padding itself crosses a bundle boundary, it must be emitted
// in 2 pieces, since even nop instructions must not cross boundaries.
@@ -493,12 +494,12 @@ void MCAssembler::writeFragmentPadding(raw_ostream &OS,
// ----------------------------
// ^-------------------^ <- TotalLength
unsigned DistanceToBoundary = TotalLength - getBundleAlignSize();
- if (!getBackend().writeNopData(OS, DistanceToBoundary))
+ if (!getBackend().writeNopData(OS, DistanceToBoundary, STI))
report_fatal_error("unable to write NOP sequence of " +
Twine(DistanceToBoundary) + " bytes");
BundlePadding -= DistanceToBoundary;
}
- if (!getBackend().writeNopData(OS, BundlePadding))
+ if (!getBackend().writeNopData(OS, BundlePadding, STI))
report_fatal_error("unable to write NOP sequence of " +
Twine(BundlePadding) + " bytes");
}
@@ -544,7 +545,7 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
// bytes left to fill use the Value and ValueSize to fill the rest.
// If we are aligning with nops, ask that target to emit the right data.
if (AF.hasEmitNops()) {
- if (!Asm.getBackend().writeNopData(OS, Count))
+ if (!Asm.getBackend().writeNopData(OS, Count, AF.getSubtargetInfo()))
report_fatal_error("unable to write nop sequence of " +
Twine(Count) + " bytes");
break;
@@ -621,9 +622,11 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
case MCFragment::FT_Nops: {
++stats::EmittedNopsFragments;
const MCNopsFragment &NF = cast<MCNopsFragment>(F);
+
int64_t NumBytes = NF.getNumBytes();
int64_t ControlledNopLength = NF.getControlledNopLength();
- int64_t MaximumNopLength = Asm.getBackend().getMaximumNopSize();
+ int64_t MaximumNopLength =
+ Asm.getBackend().getMaximumNopSize(*NF.getSubtargetInfo());
assert(NumBytes > 0 && "Expected positive NOPs fragment size");
assert(ControlledNopLength >= 0 && "Expected non-negative NOP size");
@@ -647,7 +650,8 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
uint64_t NumBytesToEmit =
(uint64_t)std::min(NumBytes, ControlledNopLength);
assert(NumBytesToEmit && "try to emit empty NOP instruction");
- if (!Asm.getBackend().writeNopData(OS, NumBytesToEmit)) {
+ if (!Asm.getBackend().writeNopData(OS, NumBytesToEmit,
+ NF.getSubtargetInfo())) {
report_fatal_error("unable to write nop sequence of the remaining " +
Twine(NumBytesToEmit) + " bytes");
break;
@@ -664,7 +668,8 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
}
case MCFragment::FT_BoundaryAlign: {
- if (!Asm.getBackend().writeNopData(OS, FragmentSize))
+ const MCBoundaryAlignFragment &BF = cast<MCBoundaryAlignFragment>(F);
+ if (!Asm.getBackend().writeNopData(OS, FragmentSize, BF.getSubtargetInfo()))
report_fatal_error("unable to write nop sequence of " +
Twine(FragmentSize) + " bytes");
break;
diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index 52ab0b41f539..aaa3b747682c 100644
--- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -25,9 +25,9 @@
#include "llvm/MC/MCSchedule.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
diff --git a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
index 64e216e0051d..735be23206e4 100644
--- a/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
+++ b/llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp
@@ -8,7 +8,7 @@
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
#include "llvm-c/Disassembler.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 27bb7a103165..1c9cfb9042e2 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -27,7 +27,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
@@ -66,29 +65,6 @@ MCSymbol *mcdwarf::emitListsTableHeaderStart(MCStreamer &S) {
return End;
}
-/// Manage the .debug_line_str section contents, if we use it.
-class llvm::MCDwarfLineStr {
- MCSymbol *LineStrLabel = nullptr;
- StringTableBuilder LineStrings{StringTableBuilder::DWARF};
- bool UseRelocs = false;
-
-public:
- /// Construct an instance that can emit .debug_line_str (for use in a normal
- /// v5 line table).
- explicit MCDwarfLineStr(MCContext &Ctx) {
- UseRelocs = Ctx.getAsmInfo()->doesDwarfUseRelocationsAcrossSections();
- if (UseRelocs)
- LineStrLabel =
- Ctx.getObjectFileInfo()->getDwarfLineStrSection()->getBeginSymbol();
- }
-
- /// Emit a reference to the string.
- void emitRef(MCStreamer *MCOS, StringRef Path);
-
- /// Emit the .debug_line_str section if appropriate.
- void emitSection(MCStreamer *MCOS);
-};
-
static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
unsigned MinInsnLength = Context.getAsmInfo()->getMinInstAlignment();
if (MinInsnLength == 1)
@@ -100,6 +76,13 @@ static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
return AddrDelta / MinInsnLength;
}
+MCDwarfLineStr::MCDwarfLineStr(MCContext &Ctx) {
+ UseRelocs = Ctx.getAsmInfo()->doesDwarfUseRelocationsAcrossSections();
+ if (UseRelocs)
+ LineStrLabel =
+ Ctx.getObjectFileInfo()->getDwarfLineStrSection()->getBeginSymbol();
+}
+
//
// This is called when an instruction is assembled into the specified section
// and if there is information from the last .loc directive that has yet to have
@@ -158,23 +141,58 @@ makeStartPlusIntExpr(MCContext &Ctx, const MCSymbol &Start, int IntVal) {
return Res;
}
+void MCLineSection::addEndEntry(MCSymbol *EndLabel) {
+ auto *Sec = &EndLabel->getSection();
+ // The line table may be empty, which we should skip adding an end entry.
+ // There are two cases:
+ // (1) MCAsmStreamer - emitDwarfLocDirective emits a location directive in
+ // place instead of adding a line entry if the target has
+ // usesDwarfFileAndLocDirectives.
+ // (2) MCObjectStreamer - if a function has incomplete debug info where
+ // instructions don't have DILocations, the line entries are missing.
+ auto I = MCLineDivisions.find(Sec);
+ if (I != MCLineDivisions.end()) {
+ auto &Entries = I->second;
+ auto EndEntry = Entries.back();
+ EndEntry.setEndLabel(EndLabel);
+ Entries.push_back(EndEntry);
+ }
+}
+
//
// This emits the Dwarf line table for the specified section from the entries
// in the LineSection.
//
-static inline void emitDwarfLineTable(
+void MCDwarfLineTable::emitOne(
MCStreamer *MCOS, MCSection *Section,
const MCLineSection::MCDwarfLineEntryCollection &LineEntries) {
- unsigned FileNum = 1;
- unsigned LastLine = 1;
- unsigned Column = 0;
- unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
- unsigned Isa = 0;
- unsigned Discriminator = 0;
- MCSymbol *LastLabel = nullptr;
+
+ unsigned FileNum, LastLine, Column, Flags, Isa, Discriminator;
+ MCSymbol *LastLabel;
+ auto init = [&]() {
+ FileNum = 1;
+ LastLine = 1;
+ Column = 0;
+ Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+ Isa = 0;
+ Discriminator = 0;
+ LastLabel = nullptr;
+ };
+ init();
// Loop through each MCDwarfLineEntry and encode the dwarf line number table.
+ bool EndEntryEmitted = false;
for (const MCDwarfLineEntry &LineEntry : LineEntries) {
+ MCSymbol *Label = LineEntry.getLabel();
+ const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
+ if (LineEntry.IsEndEntry) {
+ MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, Label,
+ asmInfo->getCodePointerSize());
+ init();
+ EndEntryEmitted = true;
+ continue;
+ }
+
int64_t LineDelta = static_cast<int64_t>(LineEntry.getLine()) - LastLine;
if (FileNum != LineEntry.getFileNum()) {
@@ -212,12 +230,9 @@ static inline void emitDwarfLineTable(
if (LineEntry.getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
MCOS->emitInt8(dwarf::DW_LNS_set_epilogue_begin);
- MCSymbol *Label = LineEntry.getLabel();
-
// At this point we want to emit/create the sequence to encode the delta in
// line numbers and the increment of the address from the previous Label
// and the current Label.
- const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo();
MCOS->emitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
asmInfo->getCodePointerSize());
@@ -227,7 +242,12 @@ static inline void emitDwarfLineTable(
}
// Generate DWARF line end entry.
- MCOS->emitDwarfLineEndEntry(Section, LastLabel);
+ // We do not need this for DwarfDebug that explicitly terminates the line
+ // table using ranges whenever CU or section changes. However, the MC path
+ // does not track ranges nor terminate the line table. In that case,
+ // conservatively use the section end symbol to end the line table.
+ if (!EndEntryEmitted)
+ MCOS->emitDwarfLineEndEntry(Section, LastLabel);
}
//
@@ -522,7 +542,7 @@ void MCDwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
// Put out the line tables.
for (const auto &LineSec : MCLineSections.getMCLineEntries())
- emitDwarfLineTable(MCOS, LineSec.first, LineSec.second);
+ emitOne(MCOS, LineSec.first, LineSec.second);
// This is the end of the section, so set the value of the symbol at the end
// of this section (that was used in a previous expression).
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 784d66805d63..1ba999a63113 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -29,10 +29,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -88,10 +88,10 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF,
DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
}
-void MCELFStreamer::InitSections(bool NoExecStack) {
+void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
MCContext &Ctx = getContext();
SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(Ctx.getObjectFileInfo()->getTextSectionAlignment(), &STI);
if (NoExecStack)
SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
@@ -224,6 +224,7 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_ELF_TypeGnuUniqueObject:
Symbol->setType(CombineSymbolTypes(Symbol->getType(), ELF::STT_OBJECT));
Symbol->setBinding(ELF::STB_GNU_UNIQUE);
+ getAssembler().getWriter().markGnuAbi();
break;
case MCSA_Global:
@@ -325,7 +326,7 @@ void MCELFStreamer::emitCommonSymbol(MCSymbol *S, uint64_t Size,
SwitchSection(P.first, P.second);
} else {
if(Symbol->declareCommon(Size, ByteAlignment))
- report_fatal_error("Symbol: " + Symbol->getName() +
+ report_fatal_error(Twine("Symbol: ") + Symbol->getName() +
" redeclared as different type");
}
@@ -500,7 +501,7 @@ void MCELFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE,
*MCOffset, "BFD_RELOC_NONE", SRE, SRE->getLoc(),
*getContext().getSubtargetInfo()))
report_fatal_error("Relocation for CG Profile could not be created: " +
- Err->second);
+ Twine(Err->second));
}
void MCELFStreamer::finalizeCGProfile() {
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 84ec0f6bb57b..10d494b5ac61 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -230,6 +230,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_GOTREL: return "GOTREL";
case VK_PCREL: return "PCREL";
case VK_GOTPCREL: return "GOTPCREL";
+ case VK_GOTPCREL_NORELAX: return "GOTPCREL_NORELAX";
case VK_GOTTPOFF: return "GOTTPOFF";
case VK_INDNTPOFF: return "INDNTPOFF";
case VK_NTPOFF: return "NTPOFF";
@@ -358,6 +359,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_WASM_MBREL: return "MBREL";
case VK_WASM_TLSREL: return "TLSREL";
case VK_WASM_TBREL: return "TBREL";
+ case VK_WASM_GOT_TLS: return "GOT@TLS";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
case VK_AMDGPU_REL32_LO: return "rel32@lo";
@@ -393,6 +395,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("gotrel", VK_GOTREL)
.Case("pcrel", VK_PCREL)
.Case("gotpcrel", VK_GOTPCREL)
+ .Case("gotpcrel_norelax", VK_GOTPCREL_NORELAX)
.Case("gottpoff", VK_GOTTPOFF)
.Case("indntpoff", VK_INDNTPOFF)
.Case("ntpoff", VK_NTPOFF)
@@ -499,6 +502,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("tbrel", VK_WASM_TBREL)
.Case("mbrel", VK_WASM_MBREL)
.Case("tlsrel", VK_WASM_TLSREL)
+ .Case("got@tls", VK_WASM_GOT_TLS)
.Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
.Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
.Case("rel32@lo", VK_AMDGPU_REL32_LO)
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index 0f8543f51096..4634de863b2f 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -128,7 +128,11 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
const MCSymbolRefExpr *A = Target.getSymA();
if (A) {
uint64_t ValA;
- if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA))
+ // FIXME: On most platforms, `Target`'s component symbols are labels from
+ // having been simplified during evaluation, but on Mach-O they can be
+ // variables due to PR19203. This, and the line below for `B` can be
+ // restored to call `getLabelOffset` when PR19203 is fixed.
+ if (!getSymbolOffsetImpl(Layout, A->getSymbol(), ReportError, ValA))
return false;
Offset += ValA;
}
@@ -136,7 +140,7 @@ static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
const MCSymbolRefExpr *B = Target.getSymB();
if (B) {
uint64_t ValB;
- if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB))
+ if (!getSymbolOffsetImpl(Layout, B->getSymbol(), ReportError, ValB))
return false;
Offset -= ValB;
}
diff --git a/llvm/lib/MC/MCInstrAnalysis.cpp b/llvm/lib/MC/MCInstrAnalysis.cpp
index a7dc0626d0ab..52b59185c6fc 100644
--- a/llvm/lib/MC/MCInstrAnalysis.cpp
+++ b/llvm/lib/MC/MCInstrAnalysis.cpp
@@ -29,8 +29,14 @@ bool MCInstrAnalysis::evaluateBranch(const MCInst & /*Inst*/, uint64_t /*Addr*/,
return false;
}
-Optional<uint64_t>
-MCInstrAnalysis::evaluateMemoryOperandAddress(const MCInst &Inst, uint64_t Addr,
- uint64_t Size) const {
+Optional<uint64_t> MCInstrAnalysis::evaluateMemoryOperandAddress(
+ const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
+ uint64_t Size) const {
return None;
}
+
+Optional<uint64_t>
+MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
+ uint64_t Size) const {
+ return None;
+} \ No newline at end of file
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index f0948a184598..aa94b141d8be 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -30,9 +30,9 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolMachO.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <vector>
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 7ea1106068b7..d7f85f793c55 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -896,6 +896,19 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
".rodata", SectionKind::getReadOnly(),
XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD),
/* MultiSymbolsAllowed*/ true);
+ ReadOnlySection->setAlignment(Align(4));
+
+ ReadOnly8Section = Ctx->getXCOFFSection(
+ ".rodata.8", SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+ ReadOnly8Section->setAlignment(Align(8));
+
+ ReadOnly16Section = Ctx->getXCOFFSection(
+ ".rodata.16", SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_RO, XCOFF::XTY_SD),
+ /* MultiSymbolsAllowed*/ true);
+ ReadOnly16Section->setAlignment(Align(16));
TLSDataSection = Ctx->getXCOFFSection(
".tdata", SectionKind::getThreadData(),
@@ -968,6 +981,8 @@ void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
/* MultiSymbolsAllowed */ true, ".dwmac", XCOFF::SSUBTYP_DWMAC);
}
+MCObjectFileInfo::~MCObjectFileInfo() {}
+
void MCObjectFileInfo::initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
bool LargeCodeModel) {
PositionIndependent = PIC;
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 2865a2ad80a9..9c86fcc86bcb 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -368,7 +368,7 @@ void MCObjectStreamer::emitInstruction(const MCInst &Inst,
"' cannot have instructions");
return;
}
- getAssembler().getBackend().emitInstructionBegin(*this, Inst);
+ getAssembler().getBackend().emitInstructionBegin(*this, Inst, STI);
emitInstructionImpl(Inst, STI);
getAssembler().getBackend().emitInstructionEnd(*this, Inst);
}
@@ -609,9 +609,10 @@ void MCObjectStreamer::emitValueToAlignment(unsigned ByteAlignment,
}
void MCObjectStreamer::emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {
emitValueToAlignment(ByteAlignment, 0, 1, MaxBytesToEmit);
- cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true);
+ cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true, STI);
}
void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
@@ -835,13 +836,14 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size,
}
void MCObjectStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLength,
- SMLoc Loc) {
+ SMLoc Loc, const MCSubtargetInfo &STI) {
// Emit an NOP fragment.
MCDataFragment *DF = getOrCreateDataFragment();
flushPendingLabels(DF, DF->getContents().size());
assert(getCurrentSectionOnly() && "need a section");
- insert(new MCNopsFragment(NumBytes, ControlledNopLength, Loc));
+
+ insert(new MCNopsFragment(NumBytes, ControlledNopLength, Loc, STI));
}
void MCObjectStreamer::emitFileDirective(StringRef Filename) {
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index e328ba5315af..bf9b9e916d6f 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -228,6 +228,7 @@ AsmToken AsmLexer::LexLineComment() {
int CurChar = getNextChar();
while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
CurChar = getNextChar();
+ const char *NewlinePtr = CurPtr;
if (CurChar == '\r' && CurPtr != CurBuf.end() && *CurPtr == '\n')
++CurPtr;
@@ -235,7 +236,7 @@ AsmToken AsmLexer::LexLineComment() {
if (CommentConsumer) {
CommentConsumer->HandleComment(
SMLoc::getFromPointer(CommentTextStart),
- StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
+ StringRef(CommentTextStart, NewlinePtr - 1 - CommentTextStart));
}
IsAtStartOfLine = true;
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index d3cb5ca59bf3..ed9f2066dc20 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -749,6 +749,7 @@ namespace llvm {
extern MCAsmParserExtension *createDarwinAsmParser();
extern MCAsmParserExtension *createELFAsmParser();
extern MCAsmParserExtension *createCOFFAsmParser();
+extern MCAsmParserExtension *createGOFFAsmParser();
extern MCAsmParserExtension *createXCOFFAsmParser();
extern MCAsmParserExtension *createWasmAsmParser();
@@ -783,7 +784,8 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
PlatformParser.reset(createELFAsmParser());
break;
case MCContext::IsGOFF:
- report_fatal_error("GOFFAsmParser support not implemented yet");
+ PlatformParser.reset(createGOFFAsmParser());
+ break;
case MCContext::IsWasm:
PlatformParser.reset(createWasmAsmParser());
break;
@@ -950,7 +952,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Create the initial section, if requested.
if (!NoInitialTextSection)
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
// Prime the lexer.
Lex();
@@ -1052,18 +1054,21 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
}
}
}
-
// Finalize the output stream if there are no errors and if the client wants
// us to.
- if (!HadError && !NoFinalize)
+ if (!HadError && !NoFinalize) {
+ if (auto *TS = Out.getTargetStreamer())
+ TS->emitConstantPools();
+
Out.Finish(Lexer.getLoc());
+ }
return HadError || getContext().hadError();
}
bool AsmParser::checkForValidSection() {
if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
return Error(getTok().getLoc(),
"expected section directive before assembly directive");
}
@@ -3451,7 +3456,8 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
bool UseCodeAlign = Section->UseCodeAlign();
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
- getStreamer().emitCodeAlignment(Alignment, MaxBytesToFill);
+ getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
+ MaxBytesToFill);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
getStreamer().emitValueToAlignment(Alignment, FillExpr, ValueSize,
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 70d69fc8dd32..ddc41d0a08ab 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -502,6 +502,23 @@ static bool hasPrefix(StringRef SectionName, StringRef Prefix) {
return SectionName.startswith(Prefix) || SectionName == Prefix.drop_back();
}
+static bool allowSectionTypeMismatch(const Triple &TT, StringRef SectionName,
+ unsigned Type) {
+ if (TT.getArch() == Triple::x86_64) {
+ // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame,
+ // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't
+ // error for SHT_PROGBITS .eh_frame
+ return SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS;
+ }
+ if (TT.isMIPS()) {
+ // MIPS .debug_* sections should have SHT_MIPS_DWARF section type to
+ // distinguish among sections contain DWARF and ECOFF debug formats,
+ // but in assembly files these sections have SHT_PROGBITS type.
+ return hasPrefix(SectionName, ".debug_") && Type == ELF::SHT_PROGBITS;
+ }
+ return false;
+}
+
bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
StringRef SectionName;
@@ -659,11 +676,9 @@ EndStmt:
getContext().getELFSection(SectionName, Type, Flags, Size, GroupName,
IsComdat, UniqueID, LinkedToSym);
getStreamer().SwitchSection(Section, Subsection);
- // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame,
- // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't error
- // for SHT_PROGBITS .eh_frame
if (Section->getType() != Type &&
- !(SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS))
+ !allowSectionTypeMismatch(getContext().getTargetTriple(), SectionName,
+ Type))
Error(loc, "changed section type for " + SectionName + ", expected: 0x" +
utohexstr(Section->getType()));
// Check that flags are used consistently. However, the GNU assembler permits
@@ -815,7 +830,7 @@ bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
- if (Name.find('@') == StringRef::npos)
+ if (!Name.contains('@'))
return TokError("expected a '@' in the name");
bool KeepOriginalSym = !Name.contains("@@@");
if (parseOptionalToken(AsmToken::Comma)) {
diff --git a/llvm/lib/MC/MCParser/GOFFAsmParser.cpp b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp
new file mode 100644
index 000000000000..c2a7eaee8029
--- /dev/null
+++ b/llvm/lib/MC/MCParser/GOFFAsmParser.cpp
@@ -0,0 +1,48 @@
+//===- GOFFAsmParser.cpp - GOFF Assembly Parser ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCSectionGOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbolGOFF.h"
+
+using namespace llvm;
+
+namespace {
+
+class GOFFAsmParser : public MCAsmParserExtension {
+ template <bool (GOFFAsmParser::*HandlerMethod)(StringRef, SMLoc)>
+ void addDirectiveHandler(StringRef Directive) {
+ MCAsmParser::ExtensionDirectiveHandler Handler =
+ std::make_pair(this, HandleDirective<GOFFAsmParser, HandlerMethod>);
+
+ getParser().addDirectiveHandler(Directive, Handler);
+ }
+
+public:
+ GOFFAsmParser() {}
+
+ void Initialize(MCAsmParser &Parser) override {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+ }
+};
+
+} // namespace
+
+namespace llvm {
+
+MCAsmParserExtension *createGOFFAsmParser() { return new GOFFAsmParser; }
+
+} // namespace llvm
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 7b4d6e529cc2..f1704cef46ac 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1319,7 +1319,7 @@ bool MasmParser::enabledGenDwarfForAssembly() {
bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
// Create the initial section, if requested.
if (!NoInitialTextSection)
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
// Prime the lexer.
Lex();
@@ -1437,7 +1437,7 @@ bool MasmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
bool MasmParser::checkForValidSection() {
if (!ParsingMSInlineAsm && !getStreamer().getCurrentSectionOnly()) {
- Out.InitSections(false);
+ Out.initSections(false, getTargetParser().getSTI());
return Error(getTok().getLoc(),
"expected section directive before assembly directive");
}
@@ -4772,7 +4772,8 @@ bool MasmParser::emitAlignTo(int64_t Alignment) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign()) {
- getStreamer().emitCodeAlignment(Alignment, /*MaxBytesToEmit=*/0);
+ getStreamer().emitCodeAlignment(Alignment, &getTargetParser().getSTI(),
+ /*MaxBytesToEmit=*/0);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
getStreamer().emitValueToAlignment(Alignment, /*Value=*/0,
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index 731831d3bce3..e35bcec8fe75 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -12,10 +12,17 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/raw_ostream.h"
+#include <limits>
+#include <memory>
+#include <sstream>
#define DEBUG_TYPE "mcpseudoprobe"
using namespace llvm;
+using namespace support;
#ifndef NDEBUG
int MCPseudoProbeTable::DdgPrintIndent = 0;
@@ -69,23 +76,6 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
});
}
-MCPseudoProbeInlineTree::~MCPseudoProbeInlineTree() {
- for (auto &Inlinee : Inlinees)
- delete Inlinee.second;
-}
-
-MCPseudoProbeInlineTree *
-MCPseudoProbeInlineTree::getOrAddNode(InlineSite Site) {
- auto Iter = Inlinees.find(Site);
- if (Iter == Inlinees.end()) {
- auto *Node = new MCPseudoProbeInlineTree(std::get<0>(Site));
- Inlinees[Site] = Node;
- return Node;
- } else {
- return Iter->second;
- }
-}
-
void MCPseudoProbeInlineTree::addPseudoProbe(
const MCPseudoProbe &Probe, const MCPseudoProbeInlineStack &InlineStack) {
// The function should not be called on the root.
@@ -147,7 +137,7 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
// Emit number of probes in this node
MCOS->emitULEB128IntValue(Probes.size());
// Emit number of direct inlinees
- MCOS->emitULEB128IntValue(Inlinees.size());
+ MCOS->emitULEB128IntValue(Children.size());
// Emit probes in this group
for (const auto &Probe : Probes) {
Probe.emit(MCOS, LastProbe);
@@ -157,7 +147,13 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS,
assert(Probes.empty() && "Root should not have probes");
}
- // Emit descendent
+ // Emit sorted descendant
+ // InlineSite is unique for each pair,
+ // so there will be no ordering of Inlinee based on MCPseudoProbeInlineTree*
+ std::map<InlineSite, MCPseudoProbeInlineTree *> Inlinees;
+ for (auto Child = Children.begin(); Child != Children.end(); ++Child)
+ Inlinees[Child->first] = Child->second.get();
+
for (const auto &Inlinee : Inlinees) {
if (Guid) {
// Emit probe index
@@ -211,3 +207,361 @@ void MCPseudoProbeTable::emit(MCObjectStreamer *MCOS) {
// Put out the probe.
ProbeSections.emit(MCOS);
}
+
+static StringRef getProbeFNameForGUID(const GUIDProbeFunctionMap &GUID2FuncMAP,
+ uint64_t GUID) {
+ auto It = GUID2FuncMAP.find(GUID);
+ assert(It != GUID2FuncMAP.end() &&
+ "Probe function must exist for a valid GUID");
+ return It->second.FuncName;
+}
+
+void MCPseudoProbeFuncDesc::print(raw_ostream &OS) {
+ OS << "GUID: " << FuncGUID << " Name: " << FuncName << "\n";
+ OS << "Hash: " << FuncHash << "\n";
+}
+
+void MCDecodedPseudoProbe::getInlineContext(
+ SmallVectorImpl<MCPseduoProbeFrameLocation> &ContextStack,
+ const GUIDProbeFunctionMap &GUID2FuncMAP) const {
+ uint32_t Begin = ContextStack.size();
+ MCDecodedPseudoProbeInlineTree *Cur = InlineTree;
+ // It will add the string of each node's inline site during iteration.
+ // Note that it won't include the probe's belonging function(leaf location)
+ while (Cur->hasInlineSite()) {
+ StringRef FuncName =
+ getProbeFNameForGUID(GUID2FuncMAP, std::get<0>(Cur->ISite));
+ ContextStack.emplace_back(
+ MCPseduoProbeFrameLocation(FuncName, std::get<1>(Cur->ISite)));
+ Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
+ }
+ // Make the ContextStack in caller-callee order
+ std::reverse(ContextStack.begin() + Begin, ContextStack.end());
+}
+
+std::string MCDecodedPseudoProbe::getInlineContextStr(
+ const GUIDProbeFunctionMap &GUID2FuncMAP) const {
+ std::ostringstream OContextStr;
+ SmallVector<MCPseduoProbeFrameLocation, 16> ContextStack;
+ getInlineContext(ContextStack, GUID2FuncMAP);
+ for (auto &Cxt : ContextStack) {
+ if (OContextStr.str().size())
+ OContextStr << " @ ";
+ OContextStr << Cxt.first.str() << ":" << Cxt.second;
+ }
+ return OContextStr.str();
+}
+
+static const char *PseudoProbeTypeStr[3] = {"Block", "IndirectCall",
+ "DirectCall"};
+
+void MCDecodedPseudoProbe::print(raw_ostream &OS,
+ const GUIDProbeFunctionMap &GUID2FuncMAP,
+ bool ShowName) const {
+ OS << "FUNC: ";
+ if (ShowName) {
+ StringRef FuncName = getProbeFNameForGUID(GUID2FuncMAP, Guid);
+ OS << FuncName.str() << " ";
+ } else {
+ OS << Guid << " ";
+ }
+ OS << "Index: " << Index << " ";
+ OS << "Type: " << PseudoProbeTypeStr[static_cast<uint8_t>(Type)] << " ";
+ std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP);
+ if (InlineContextStr.size()) {
+ OS << "Inlined: @ ";
+ OS << InlineContextStr;
+ }
+ OS << "\n";
+}
+
+template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readUnencodedNumber() {
+ if (Data + sizeof(T) > End) {
+ return std::error_code();
+ }
+ T Val = endian::readNext<T, little, unaligned>(Data);
+ return ErrorOr<T>(Val);
+}
+
+template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readUnsignedNumber() {
+ unsigned NumBytesRead = 0;
+ uint64_t Val = decodeULEB128(Data, &NumBytesRead);
+ if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
+ return std::error_code();
+ }
+ Data += NumBytesRead;
+ return ErrorOr<T>(static_cast<T>(Val));
+}
+
+template <typename T> ErrorOr<T> MCPseudoProbeDecoder::readSignedNumber() {
+ unsigned NumBytesRead = 0;
+ int64_t Val = decodeSLEB128(Data, &NumBytesRead);
+ if (Val > std::numeric_limits<T>::max() || (Data + NumBytesRead > End)) {
+ return std::error_code();
+ }
+ Data += NumBytesRead;
+ return ErrorOr<T>(static_cast<T>(Val));
+}
+
+ErrorOr<StringRef> MCPseudoProbeDecoder::readString(uint32_t Size) {
+ StringRef Str(reinterpret_cast<const char *>(Data), Size);
+ if (Data + Size > End) {
+ return std::error_code();
+ }
+ Data += Size;
+ return ErrorOr<StringRef>(Str);
+}
+
+bool MCPseudoProbeDecoder::buildGUID2FuncDescMap(const uint8_t *Start,
+ std::size_t Size) {
+ // The pseudo_probe_desc section has a format like:
+ // .section .pseudo_probe_desc,"",@progbits
+ // .quad -5182264717993193164 // GUID
+ // .quad 4294967295 // Hash
+ // .uleb 3 // Name size
+ // .ascii "foo" // Name
+ // .quad -2624081020897602054
+ // .quad 174696971957
+ // .uleb 34
+ // .ascii "main"
+
+ Data = Start;
+ End = Data + Size;
+
+ while (Data < End) {
+ auto ErrorOrGUID = readUnencodedNumber<uint64_t>();
+ if (!ErrorOrGUID)
+ return false;
+
+ auto ErrorOrHash = readUnencodedNumber<uint64_t>();
+ if (!ErrorOrHash)
+ return false;
+
+ auto ErrorOrNameSize = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrNameSize)
+ return false;
+ uint32_t NameSize = std::move(*ErrorOrNameSize);
+
+ auto ErrorOrName = readString(NameSize);
+ if (!ErrorOrName)
+ return false;
+
+ uint64_t GUID = std::move(*ErrorOrGUID);
+ uint64_t Hash = std::move(*ErrorOrHash);
+ StringRef Name = std::move(*ErrorOrName);
+
+ // Initialize PseudoProbeFuncDesc and populate it into GUID2FuncDescMap
+ GUID2FuncDescMap.emplace(GUID, MCPseudoProbeFuncDesc(GUID, Hash, Name));
+ }
+ assert(Data == End && "Have unprocessed data in pseudo_probe_desc section");
+ return true;
+}
+
+bool MCPseudoProbeDecoder::buildAddress2ProbeMap(const uint8_t *Start,
+ std::size_t Size) {
+ // The pseudo_probe section encodes an inline forest and each tree has a
+ // format like:
+ // FUNCTION BODY (one for each uninlined function present in the text
+ // section)
+ // GUID (uint64)
+ // GUID of the function
+ // NPROBES (ULEB128)
+ // Number of probes originating from this function.
+ // NUM_INLINED_FUNCTIONS (ULEB128)
+ // Number of callees inlined into this function, aka number of
+ // first-level inlinees
+ // PROBE RECORDS
+ // A list of NPROBES entries. Each entry contains:
+ // INDEX (ULEB128)
+ // TYPE (uint4)
+ // 0 - block probe, 1 - indirect call, 2 - direct call
+ // ATTRIBUTE (uint3)
+ // 1 - tail call, 2 - dangling
+ // ADDRESS_TYPE (uint1)
+ // 0 - code address, 1 - address delta
+ // CODE_ADDRESS (uint64 or ULEB128)
+ // code address or address delta, depending on Flag
+ // INLINED FUNCTION RECORDS
+ // A list of NUM_INLINED_FUNCTIONS entries describing each of the
+ // inlined callees. Each record contains:
+ // INLINE SITE
+ // Index of the callsite probe (ULEB128)
+ // FUNCTION BODY
+ // A FUNCTION BODY entry describing the inlined function.
+
+ Data = Start;
+ End = Data + Size;
+
+ MCDecodedPseudoProbeInlineTree *Root = &DummyInlineRoot;
+ MCDecodedPseudoProbeInlineTree *Cur = &DummyInlineRoot;
+ uint64_t LastAddr = 0;
+ uint32_t Index = 0;
+ // A DFS-based decoding
+ while (Data < End) {
+ if (Root == Cur) {
+ // Use a sequential id for top level inliner.
+ Index = Root->getChildren().size();
+ } else {
+ // Read inline site for inlinees
+ auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrIndex)
+ return false;
+ Index = std::move(*ErrorOrIndex);
+ }
+ // Switch/add to a new tree node(inlinee)
+ Cur = Cur->getOrAddNode(std::make_tuple(Cur->Guid, Index));
+ // Read guid
+ auto ErrorOrCurGuid = readUnencodedNumber<uint64_t>();
+ if (!ErrorOrCurGuid)
+ return false;
+ Cur->Guid = std::move(*ErrorOrCurGuid);
+ // Read number of probes in the current node.
+ auto ErrorOrNodeCount = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrNodeCount)
+ return false;
+ uint32_t NodeCount = std::move(*ErrorOrNodeCount);
+ // Read number of direct inlinees
+ auto ErrorOrCurChildrenToProcess = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrCurChildrenToProcess)
+ return false;
+ Cur->ChildrenToProcess = std::move(*ErrorOrCurChildrenToProcess);
+ // Read all probes in this node
+ for (std::size_t I = 0; I < NodeCount; I++) {
+ // Read index
+ auto ErrorOrIndex = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrIndex)
+ return false;
+ uint32_t Index = std::move(*ErrorOrIndex);
+ // Read type | flag.
+ auto ErrorOrValue = readUnencodedNumber<uint8_t>();
+ if (!ErrorOrValue)
+ return false;
+ uint8_t Value = std::move(*ErrorOrValue);
+ uint8_t Kind = Value & 0xf;
+ uint8_t Attr = (Value & 0x70) >> 4;
+ // Read address
+ uint64_t Addr = 0;
+ if (Value & 0x80) {
+ auto ErrorOrOffset = readSignedNumber<int64_t>();
+ if (!ErrorOrOffset)
+ return false;
+ int64_t Offset = std::move(*ErrorOrOffset);
+ Addr = LastAddr + Offset;
+ } else {
+ auto ErrorOrAddr = readUnencodedNumber<int64_t>();
+ if (!ErrorOrAddr)
+ return false;
+ Addr = std::move(*ErrorOrAddr);
+ }
+ // Populate Address2ProbesMap
+ auto &Probes = Address2ProbesMap[Addr];
+ Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
+ Cur);
+ Cur->addProbes(&Probes.back());
+ LastAddr = Addr;
+ }
+
+ // Look for the parent for the next node by subtracting the current
+ // node count from tree counts along the parent chain. The first node
+ // in the chain that has a non-zero tree count is the target.
+ while (Cur != Root) {
+ if (Cur->ChildrenToProcess == 0) {
+ Cur = static_cast<MCDecodedPseudoProbeInlineTree *>(Cur->Parent);
+ if (Cur != Root) {
+ assert(Cur->ChildrenToProcess > 0 &&
+ "Should have some unprocessed nodes");
+ Cur->ChildrenToProcess -= 1;
+ }
+ } else {
+ break;
+ }
+ }
+ }
+
+ assert(Data == End && "Have unprocessed data in pseudo_probe section");
+ assert(Cur == Root &&
+ " Cur should point to root when the forest is fully built up");
+ return true;
+}
+
+void MCPseudoProbeDecoder::printGUID2FuncDescMap(raw_ostream &OS) {
+ OS << "Pseudo Probe Desc:\n";
+ // Make the output deterministic
+ std::map<uint64_t, MCPseudoProbeFuncDesc> OrderedMap(GUID2FuncDescMap.begin(),
+ GUID2FuncDescMap.end());
+ for (auto &I : OrderedMap) {
+ I.second.print(OS);
+ }
+}
+
+void MCPseudoProbeDecoder::printProbeForAddress(raw_ostream &OS,
+ uint64_t Address) {
+ auto It = Address2ProbesMap.find(Address);
+ if (It != Address2ProbesMap.end()) {
+ for (auto &Probe : It->second) {
+ OS << " [Probe]:\t";
+ Probe.print(OS, GUID2FuncDescMap, true);
+ }
+ }
+}
+
+void MCPseudoProbeDecoder::printProbesForAllAddresses(raw_ostream &OS) {
+ std::vector<uint64_t> Addresses;
+ for (auto Entry : Address2ProbesMap)
+ Addresses.push_back(Entry.first);
+ std::sort(Addresses.begin(), Addresses.end());
+ for (auto K : Addresses) {
+ OS << "Address:\t";
+ OS << K;
+ OS << "\n";
+ printProbeForAddress(OS, K);
+ }
+}
+
+const MCDecodedPseudoProbe *
+MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
+ auto It = Address2ProbesMap.find(Address);
+ if (It == Address2ProbesMap.end())
+ return nullptr;
+ const auto &Probes = It->second;
+
+ const MCDecodedPseudoProbe *CallProbe = nullptr;
+ for (const auto &Probe : Probes) {
+ if (Probe.isCall()) {
+ assert(!CallProbe &&
+ "There should be only one call probe corresponding to address "
+ "which is a callsite.");
+ CallProbe = &Probe;
+ }
+ }
+ return CallProbe;
+}
+
+const MCPseudoProbeFuncDesc *
+MCPseudoProbeDecoder::getFuncDescForGUID(uint64_t GUID) const {
+ auto It = GUID2FuncDescMap.find(GUID);
+ assert(It != GUID2FuncDescMap.end() && "Function descriptor doesn't exist");
+ return &It->second;
+}
+
+void MCPseudoProbeDecoder::getInlineContextForProbe(
+ const MCDecodedPseudoProbe *Probe,
+ SmallVectorImpl<MCPseduoProbeFrameLocation> &InlineContextStack,
+ bool IncludeLeaf) const {
+ Probe->getInlineContext(InlineContextStack, GUID2FuncDescMap);
+ if (!IncludeLeaf)
+ return;
+ // Note that the context from probe doesn't include leaf frame,
+ // hence we need to retrieve and prepend leaf if requested.
+ const auto *FuncDesc = getFuncDescForGUID(Probe->getGuid());
+ InlineContextStack.emplace_back(
+ MCPseduoProbeFrameLocation(FuncDesc->FuncName, Probe->getIndex()));
+}
+
+const MCPseudoProbeFuncDesc *MCPseudoProbeDecoder::getInlinerDescForProbe(
+ const MCDecodedPseudoProbe *Probe) const {
+ MCDecodedPseudoProbeInlineTree *InlinerNode = Probe->getInlineTreeNode();
+ if (!InlinerNode->hasInlineSite())
+ return nullptr;
+ return getFuncDescForGUID(std::get<0>(InlinerNode->ISite));
+}
diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
index 648efc14da06..7f7380bf810d 100644
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -118,6 +118,10 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
bool MCSectionXCOFF::UseCodeAlign() const { return getKind().isText(); }
bool MCSectionXCOFF::isVirtualSection() const {
- assert(isCsect() && "Only csect section can be virtual!");
+ // DWARF sections are always not virtual.
+ if (isDwarfSect())
+ return false;
+ assert(isCsect() &&
+ "Handling for isVirtualSection not implemented for this section!");
return XCOFF::XTY_CM == CsectProp->Type;
}
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index fc7fb555f0b9..f4e64b42c817 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -53,6 +53,8 @@ void MCTargetStreamer::emitLabel(MCSymbol *Symbol) {}
void MCTargetStreamer::finish() {}
+void MCTargetStreamer::emitConstantPools() {}
+
void MCTargetStreamer::changeSection(const MCSection *CurSection,
MCSection *Section,
const MCExpr *Subsection,
@@ -218,7 +220,7 @@ void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
}
void llvm::MCStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLen,
- llvm::SMLoc) {}
+ llvm::SMLoc, const MCSubtargetInfo& STI) {}
/// The implementation in this class just redirects to emitFill.
void MCStreamer::emitZeros(uint64_t NumBytes) { emitFill(NumBytes, 0); }
@@ -397,7 +399,7 @@ void MCStreamer::emitEHSymAttributes(const MCSymbol *Symbol,
MCSymbol *EHSymbol) {
}
-void MCStreamer::InitSections(bool NoExecStack) {
+void MCStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) {
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
@@ -1198,6 +1200,7 @@ void MCStreamer::emitValueToAlignment(unsigned ByteAlignment, int64_t Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {}
void MCStreamer::emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {}
void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value,
SMLoc Loc) {}
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index e3d2439cef81..90249fb7380a 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -26,10 +26,10 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -49,6 +49,27 @@ void MCWasmStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) {
DF->getContents().append(EF->getContents().begin(), EF->getContents().end());
}
+void MCWasmStreamer::emitLabel(MCSymbol *S, SMLoc Loc) {
+ auto *Symbol = cast<MCSymbolWasm>(S);
+ MCObjectStreamer::emitLabel(Symbol, Loc);
+
+ const MCSectionWasm &Section =
+ static_cast<const MCSectionWasm &>(*getCurrentSectionOnly());
+ if (Section.getSegmentFlags() & wasm::WASM_SEG_FLAG_TLS)
+ Symbol->setTLS();
+}
+
+void MCWasmStreamer::emitLabelAtPos(MCSymbol *S, SMLoc Loc, MCFragment *F,
+ uint64_t Offset) {
+ auto *Symbol = cast<MCSymbolWasm>(S);
+ MCObjectStreamer::emitLabelAtPos(Symbol, Loc, F, Offset);
+
+ const MCSectionWasm &Section =
+ static_cast<const MCSectionWasm &>(*getCurrentSectionOnly());
+ if (Section.getSegmentFlags() & wasm::WASM_SEG_FLAG_TLS)
+ Symbol->setTLS();
+}
+
void MCWasmStreamer::emitAssemblerFlag(MCAssemblerFlag Flag) {
// Let the target do whatever target specific stuff it needs to do.
getAssembler().getBackend().handleAssemblerFlag(Flag);
@@ -117,6 +138,10 @@ bool MCWasmStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
Symbol->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
break;
+ case MCSA_ELF_TypeTLS:
+ Symbol->setTLS();
+ break;
+
case MCSA_ELF_TypeObject:
case MCSA_Cold:
break;
@@ -156,6 +181,10 @@ void MCWasmStreamer::emitIdent(StringRef IdentString) {
void MCWasmStreamer::emitInstToFragment(const MCInst &Inst,
const MCSubtargetInfo &STI) {
this->MCObjectStreamer::emitInstToFragment(Inst, STI);
+ MCRelaxableFragment &F = *cast<MCRelaxableFragment>(getCurrentFragment());
+
+ for (auto &Fixup : F.getFixups())
+ fixSymbolsInTLSFixups(Fixup.getValue());
}
void MCWasmStreamer::emitInstToData(const MCInst &Inst,
@@ -166,6 +195,9 @@ void MCWasmStreamer::emitInstToData(const MCInst &Inst,
raw_svector_ostream VecOS(Code);
Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ for (auto &Fixup : Fixups)
+ fixSymbolsInTLSFixups(Fixup.getValue());
+
// Append the encoded instruction to the current data fragment (or create a
// new such fragment if the current fragment is not a data fragment).
MCDataFragment *DF = getOrCreateDataFragment();
@@ -185,16 +217,37 @@ void MCWasmStreamer::finishImpl() {
this->MCObjectStreamer::finishImpl();
}
-MCStreamer *llvm::createWasmStreamer(MCContext &Context,
- std::unique_ptr<MCAsmBackend> &&MAB,
- std::unique_ptr<MCObjectWriter> &&OW,
- std::unique_ptr<MCCodeEmitter> &&CE,
- bool RelaxAll) {
- MCWasmStreamer *S =
- new MCWasmStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
- if (RelaxAll)
- S->getAssembler().setRelaxAll(true);
- return S;
+void MCWasmStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+ switch (expr->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+ fixSymbolsInTLSFixups(be->getLHS());
+ fixSymbolsInTLSFixups(be->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+ switch (symRef.getKind()) {
+ case MCSymbolRefExpr::VK_WASM_TLSREL:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
+ getAssembler().registerSymbol(symRef.getSymbol());
+ cast<MCSymbolWasm>(symRef.getSymbol()).setTLS();
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+
+ case MCExpr::Unary:
+ fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+ break;
+ }
}
void MCWasmStreamer::emitThumbFunc(MCSymbol *Func) {
@@ -215,3 +268,15 @@ void MCWasmStreamer::emitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
llvm_unreachable("Wasm doesn't support this directive");
}
+
+MCStreamer *llvm::createWasmStreamer(MCContext &Context,
+ std::unique_ptr<MCAsmBackend> &&MAB,
+ std::unique_ptr<MCObjectWriter> &&OW,
+ std::unique_ptr<MCCodeEmitter> &&CE,
+ bool RelaxAll) {
+ MCWasmStreamer *S =
+ new MCWasmStreamer(Context, std::move(MAB), std::move(OW), std::move(CE));
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index de1b0fd3c742..7773d8828931 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -144,8 +144,8 @@ static void EmitRuntimeFunction(MCStreamer &streamer,
MCContext &context = streamer.getContext();
streamer.emitValueToAlignment(4);
- EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
- EmitSymbolRefWithOfs(streamer, info->Function, info->End);
+ EmitSymbolRefWithOfs(streamer, info->Begin, info->Begin);
+ EmitSymbolRefWithOfs(streamer, info->Begin, info->End);
streamer.emitValue(MCSymbolRefExpr::create(info->Symbol,
MCSymbolRefExpr::VK_COFF_IMGREL32,
context), 4);
@@ -1073,7 +1073,7 @@ static void ARM64EmitRuntimeFunction(MCStreamer &streamer,
MCContext &context = streamer.getContext();
streamer.emitValueToAlignment(4);
- EmitSymbolRefWithOfs(streamer, info->Function, info->Begin);
+ EmitSymbolRefWithOfs(streamer, info->Begin, info->Begin);
if (info->PackedInfo)
streamer.emitInt32(info->PackedInfo);
else
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 69dc71b39fd1..0dfe5a5c2bdb 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -66,18 +66,19 @@ void MCWinCOFFStreamer::emitInstToData(const MCInst &Inst,
DF->getContents().append(Code.begin(), Code.end());
}
-void MCWinCOFFStreamer::InitSections(bool NoExecStack) {
+void MCWinCOFFStreamer::initSections(bool NoExecStack,
+ const MCSubtargetInfo &STI) {
// FIXME: this is identical to the ELF one.
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(4, &STI);
SwitchSection(getContext().getObjectFileInfo()->getDataSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(4, &STI);
SwitchSection(getContext().getObjectFileInfo()->getBSSSection());
- emitCodeAlignment(4);
+ emitCodeAlignment(4, &STI);
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index ec9e89fac416..90604782de13 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -18,7 +18,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 10ae27c2acc2..277d88cf1cd2 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -965,7 +965,7 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
// Write the section relocation entries, in reverse order to match 'as'
// (approximately, the exact algorithm is more complicated than this).
std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
- for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
+ for (const RelAndSymbol &Rel : llvm::reverse(Relocs)) {
W.write<uint32_t>(Rel.MRE.r_word0);
W.write<uint32_t>(Rel.MRE.r_word1);
}
diff --git a/llvm/lib/Support/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index 1f9c3bbf8229..0948a6b9f1a1 100644
--- a/llvm/lib/Support/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 0dc5c9111db2..636c1d238932 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -292,6 +292,8 @@ private:
W->OS << Str;
}
+ void writeStringWithAlignment(const StringRef Str, unsigned Alignment);
+
void writeI32(int32_t val) {
char Buffer[4];
support::endian::write32le(Buffer, val);
@@ -317,7 +319,7 @@ private:
uint32_t writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
ArrayRef<WasmFunction> Functions);
uint32_t writeDataSection(const MCAsmLayout &Layout);
- void writeTagSection(ArrayRef<wasm::WasmTagType> Tags);
+ void writeTagSection(ArrayRef<uint32_t> TagTypes);
void writeGlobalSection(ArrayRef<wasm::WasmGlobal> Globals);
void writeTableSection(ArrayRef<wasm::WasmTable> Tables);
void writeRelocSection(uint32_t SectionIndex, StringRef Name,
@@ -362,6 +364,28 @@ void WasmObjectWriter::startSection(SectionBookkeeping &Section,
Section.Index = SectionCount++;
}
+// Write a string with extra paddings for trailing alignment
+// TODO: support alignment at asm and llvm level?
+void WasmObjectWriter::writeStringWithAlignment(const StringRef Str,
+ unsigned Alignment) {
+
+ // Calculate the encoded size of str length and add pads based on it and
+ // alignment.
+ raw_null_ostream NullOS;
+ uint64_t StrSizeLength = encodeULEB128(Str.size(), NullOS);
+ uint64_t Offset = W->OS.tell() + StrSizeLength + Str.size();
+ uint64_t Paddings = offsetToAlignment(Offset, Align(Alignment));
+ Offset += Paddings;
+
+ // LEB128 greater than 5 bytes is invalid
+ assert((StrSizeLength + Paddings) <= 5 && "too long string to align");
+
+ encodeSLEB128(Str.size(), W->OS, StrSizeLength + Paddings);
+ W->OS << Str;
+
+ assert(W->OS.tell() == Offset && "invalid padding");
+}
+
void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
StringRef Name) {
LLVM_DEBUG(dbgs() << "startCustomSection " << Name << "\n");
@@ -371,7 +395,12 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
Section.PayloadOffset = W->OS.tell();
// Custom sections in wasm also have a string identifier.
- writeString(Name);
+ if (Name != "__clangast") {
+ writeString(Name);
+ } else {
+ // The on-disk hashtable in clangast needs to be aligned by 4 bytes.
+ writeStringWithAlignment(Name, 4);
+ }
// The position where the custom section starts.
Section.ContentsOffset = W->OS.tell();
@@ -565,8 +594,14 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
SymA->setUsedInReloc();
}
- if (RefA->getKind() == MCSymbolRefExpr::VK_GOT)
+ switch (RefA->getKind()) {
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
SymA->setUsedInGOT();
+ break;
+ default:
+ break;
+ }
WasmRelocationEntry Rec(FixupOffset, SymA, C, Type, &FixupSection);
LLVM_DEBUG(dbgs() << "WasmReloc: " << Rec << "\n");
@@ -825,8 +860,8 @@ void WasmObjectWriter::writeImportSection(ArrayRef<wasm::WasmImport> Imports,
encodeULEB128(NumElements, W->OS); // initial
break;
case wasm::WASM_EXTERNAL_TAG:
- W->OS << char(Import.Tag.Attribute);
- encodeULEB128(Import.Tag.SigIndex, W->OS);
+ W->OS << char(0); // Reserved 'attribute' field
+ encodeULEB128(Import.SigIndex, W->OS);
break;
default:
llvm_unreachable("unsupported import kind");
@@ -850,17 +885,17 @@ void WasmObjectWriter::writeFunctionSection(ArrayRef<WasmFunction> Functions) {
endSection(Section);
}
-void WasmObjectWriter::writeTagSection(ArrayRef<wasm::WasmTagType> Tags) {
- if (Tags.empty())
+void WasmObjectWriter::writeTagSection(ArrayRef<uint32_t> TagTypes) {
+ if (TagTypes.empty())
return;
SectionBookkeeping Section;
startSection(Section, wasm::WASM_SEC_TAG);
- encodeULEB128(Tags.size(), W->OS);
- for (const wasm::WasmTagType &Tag : Tags) {
- W->OS << char(Tag.Attribute);
- encodeULEB128(Tag.SigIndex, W->OS);
+ encodeULEB128(TagTypes.size(), W->OS);
+ for (uint32_t Index : TagTypes) {
+ W->OS << char(0); // Reserved 'attribute' field
+ encodeULEB128(Index, W->OS);
}
endSection(Section);
@@ -1052,7 +1087,7 @@ uint32_t WasmObjectWriter::writeDataSection(const MCAsmLayout &Layout) {
void WasmObjectWriter::writeRelocSection(
uint32_t SectionIndex, StringRef Name,
std::vector<WasmRelocationEntry> &Relocs) {
- // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ // See: https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md
// for descriptions of the reloc sections.
if (Relocs.empty())
@@ -1340,8 +1375,7 @@ void WasmObjectWriter::prepareImports(
Import.Module = WS.getImportModule();
Import.Field = WS.getImportName();
Import.Kind = wasm::WASM_EXTERNAL_TAG;
- Import.Tag.Attribute = wasm::WASM_TAG_ATTRIBUTE_EXCEPTION;
- Import.Tag.SigIndex = getTagType(WS);
+ Import.SigIndex = getTagType(WS);
Imports.push_back(Import);
assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = NumTagImports++;
@@ -1409,7 +1443,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
SmallVector<uint32_t, 4> TableElems;
SmallVector<wasm::WasmImport, 4> Imports;
SmallVector<wasm::WasmExport, 4> Exports;
- SmallVector<wasm::WasmTagType, 1> Tags;
+ SmallVector<uint32_t, 2> TagTypes;
SmallVector<wasm::WasmGlobal, 1> Globals;
SmallVector<wasm::WasmTable, 1> Tables;
SmallVector<wasm::WasmSymbolInfo, 4> SymbolInfos;
@@ -1644,16 +1678,15 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
LLVM_DEBUG(dbgs() << " -> table index: "
<< WasmIndices.find(&WS)->second << "\n");
} else if (WS.isTag()) {
- // C++ exception symbol (__cpp_exception)
+ // C++ exception symbol (__cpp_exception) or longjmp symbol
+ // (__c_longjmp)
unsigned Index;
if (WS.isDefined()) {
- Index = NumTagImports + Tags.size();
- wasm::WasmTagType Tag;
- Tag.SigIndex = getTagType(WS);
- Tag.Attribute = wasm::WASM_TAG_ATTRIBUTE_EXCEPTION;
+ Index = NumTagImports + TagTypes.size();
+ uint32_t SigIndex = getTagType(WS);
assert(WasmIndices.count(&WS) == 0);
WasmIndices[&WS] = Index;
- Tags.push_back(Tag);
+ TagTypes.push_back(SigIndex);
} else {
// An import; the index was assigned above.
assert(WasmIndices.count(&WS) > 0);
@@ -1747,6 +1780,8 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
Flags |= wasm::WASM_SYMBOL_EXPLICIT_NAME;
if (WS.hasExportName())
Flags |= wasm::WASM_SYMBOL_EXPORTED;
+ if (WS.isTLS())
+ Flags |= wasm::WASM_SYMBOL_TLS;
wasm::WasmSymbolInfo Info;
Info.Name = WS.getName();
@@ -1869,7 +1904,7 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
writeFunctionSection(Functions);
writeTableSection(Tables);
// Skip the "memory" section; we import the memory instead.
- writeTagSection(Tags);
+ writeTagSection(TagTypes);
writeGlobalSection(Globals);
writeExportSection(Exports);
const MCSymbol *IndirectFunctionTable =
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index adf0d3eb443c..177253d7a9d7 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -168,6 +168,24 @@ struct CsectSectionEntry : public SectionEntry {
virtual ~CsectSectionEntry() {}
};
+struct DwarfSectionEntry : public SectionEntry {
+ // For DWARF section entry.
+ std::unique_ptr<XCOFFSection> DwarfSect;
+
+ DwarfSectionEntry(StringRef N, int32_t Flags,
+ std::unique_ptr<XCOFFSection> Sect)
+ : SectionEntry(N, Flags | XCOFF::STYP_DWARF), DwarfSect(std::move(Sect)) {
+ assert(DwarfSect->MCSec->isDwarfSect() &&
+ "This should be a DWARF section!");
+ assert(N.size() <= XCOFF::NameSize && "section name too long");
+ memcpy(Name, N.data(), N.size());
+ }
+
+ DwarfSectionEntry(DwarfSectionEntry &&s) = default;
+
+ virtual ~DwarfSectionEntry() {}
+};
+
class XCOFFObjectWriter : public MCObjectWriter {
uint32_t SymbolTableEntryCount = 0;
@@ -213,6 +231,8 @@ class XCOFFObjectWriter : public MCObjectWriter {
std::array<CsectSectionEntry *const, 5> Sections{
{&Text, &Data, &BSS, &TData, &TBSS}};
+ std::vector<DwarfSectionEntry> DwarfSections;
+
CsectGroup &getCsectGroup(const MCSectionXCOFF *MCSec);
virtual void reset() override;
@@ -231,12 +251,21 @@ class XCOFFObjectWriter : public MCObjectWriter {
uint64_t);
void writeSymbolTableEntryForControlSection(const XCOFFSection &, int16_t,
XCOFF::StorageClass);
+ void writeSymbolTableEntryForDwarfSection(const XCOFFSection &, int16_t);
void writeFileHeader();
void writeSectionHeaderTable();
void writeSections(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ void writeSectionForControlSectionEntry(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const CsectSectionEntry &CsectEntry,
+ uint32_t &CurrentAddressLocation);
+ void writeSectionForDwarfSectionEntry(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const DwarfSectionEntry &DwarfEntry,
+ uint32_t &CurrentAddressLocation);
void writeSymbolTable(const MCAsmLayout &Layout);
void writeRelocations();
- void writeRelocation(XCOFFRelocation Reloc, const XCOFFSection &CSection);
+ void writeRelocation(XCOFFRelocation Reloc, const XCOFFSection &Section);
// Called after all the csects and symbols have been processed by
// `executePostLayoutBinding`, this function handles building up the majority
@@ -290,6 +319,8 @@ void XCOFFObjectWriter::reset() {
// Reset any sections we have written to, and empty the section header table.
for (auto *Sec : Sections)
Sec->reset();
+ for (auto &DwarfSec : DwarfSections)
+ DwarfSec.reset();
// Reset states in XCOFFObjectWriter.
SymbolTableEntryCount = 0;
@@ -372,17 +403,32 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const auto *MCSec = cast<const MCSectionXCOFF>(&S);
assert(SectionMap.find(MCSec) == SectionMap.end() &&
"Cannot add a section twice.");
- assert(XCOFF::XTY_ER != MCSec->getCSectType() &&
- "An undefined csect should not get registered.");
// If the name does not fit in the storage provided in the symbol table
// entry, add it to the string table.
if (nameShouldBeInStringTable(MCSec->getSymbolTableName()))
Strings.add(MCSec->getSymbolTableName());
-
- CsectGroup &Group = getCsectGroup(MCSec);
- Group.emplace_back(MCSec);
- SectionMap[MCSec] = &Group.back();
+ if (MCSec->isCsect()) {
+ // A new control section. Its CsectSectionEntry should already be staticly
+ // generated as Text/Data/BSS/TDATA/TBSS. Add this section to the group of
+ // the CsectSectionEntry.
+ assert(XCOFF::XTY_ER != MCSec->getCSectType() &&
+ "An undefined csect should not get registered.");
+ CsectGroup &Group = getCsectGroup(MCSec);
+ Group.emplace_back(MCSec);
+ SectionMap[MCSec] = &Group.back();
+ } else if (MCSec->isDwarfSect()) {
+ // A new DwarfSectionEntry.
+ std::unique_ptr<XCOFFSection> DwarfSec =
+ std::make_unique<XCOFFSection>(MCSec);
+ SectionMap[MCSec] = DwarfSec.get();
+
+ DwarfSectionEntry SecEntry(MCSec->getName(),
+ MCSec->getDwarfSubtypeFlags().getValue(),
+ std::move(DwarfSec));
+ DwarfSections.push_back(std::move(SecEntry));
+ } else
+ llvm_unreachable("unsupport section type!");
}
for (const MCSymbol &S : Asm.symbols()) {
@@ -443,13 +489,20 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
: SymbolIndexMap[ContainingCsect->getQualNameSymbol()];
};
- auto getVirtualAddress = [this,
- &Layout](const MCSymbol *Sym,
- const MCSectionXCOFF *ContainingCsect) {
- // If Sym is a csect, return csect's address.
- // If Sym is a label, return csect's address + label's offset from the csect.
- return SectionMap[ContainingCsect]->Address +
- (Sym->isDefined() ? Layout.getSymbolOffset(*Sym) : 0);
+ auto getVirtualAddress =
+ [this, &Layout](const MCSymbol *Sym,
+ const MCSectionXCOFF *ContainingSect) -> uint64_t {
+ // A DWARF section.
+ if (ContainingSect->isDwarfSect())
+ return Layout.getSymbolOffset(*Sym);
+
+ // A csect.
+ if (!Sym->isDefined())
+ return SectionMap[ContainingSect]->Address;
+
+ // A label.
+ assert(Sym->isDefined() && "not a valid object that has address!");
+ return SectionMap[ContainingSect]->Address + Layout.getSymbolOffset(*Sym);
};
const MCSymbol *const SymA = &Target.getSymA()->getSymbol();
@@ -538,41 +591,12 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
void XCOFFObjectWriter::writeSections(const MCAssembler &Asm,
const MCAsmLayout &Layout) {
uint32_t CurrentAddressLocation = 0;
- for (const auto *Section : Sections) {
- // Nothing to write for this Section.
- if (Section->Index == SectionEntry::UninitializedIndex ||
- Section->IsVirtual)
- continue;
-
- // There could be a gap (without corresponding zero padding) between
- // sections.
- assert(((CurrentAddressLocation <= Section->Address) ||
- (Section->Flags == XCOFF::STYP_TDATA) ||
- (Section->Flags == XCOFF::STYP_TBSS)) &&
- "CurrentAddressLocation should be less than or equal to section "
- "address if the section is not TData or TBSS.");
-
- CurrentAddressLocation = Section->Address;
-
- for (const auto *Group : Section->Groups) {
- for (const auto &Csect : *Group) {
- if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation)
- W.OS.write_zeros(PaddingSize);
- if (Csect.Size)
- Asm.writeSectionData(W.OS, Csect.MCSec, Layout);
- CurrentAddressLocation = Csect.Address + Csect.Size;
- }
- }
-
- // The size of the tail padding in a section is the end virtual address of
- // the current section minus the the end virtual address of the last csect
- // in that section.
- if (uint32_t PaddingSize =
- Section->Address + Section->Size - CurrentAddressLocation) {
- W.OS.write_zeros(PaddingSize);
- CurrentAddressLocation += PaddingSize;
- }
- }
+ for (const auto *Section : Sections)
+ writeSectionForControlSectionEntry(Asm, Layout, *Section,
+ CurrentAddressLocation);
+ for (const auto &DwarfSection : DwarfSections)
+ writeSectionForDwarfSectionEntry(Asm, Layout, DwarfSection,
+ CurrentAddressLocation);
}
uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
@@ -654,6 +678,36 @@ void XCOFFObjectWriter::writeSymbolTableEntryForCsectMemberLabel(
W.write<uint16_t>(0);
}
+void XCOFFObjectWriter::writeSymbolTableEntryForDwarfSection(
+ const XCOFFSection &DwarfSectionRef, int16_t SectionIndex) {
+ assert(DwarfSectionRef.MCSec->isDwarfSect() && "Not a DWARF section!");
+
+ // n_name, n_zeros, n_offset
+ writeSymbolName(DwarfSectionRef.getSymbolTableName());
+ // n_value
+ W.write<uint32_t>(0);
+ // n_scnum
+ W.write<int16_t>(SectionIndex);
+ // n_type
+ W.write<uint16_t>(0);
+ // n_sclass
+ W.write<uint8_t>(XCOFF::C_DWARF);
+ // Always 1 aux entry for now.
+ W.write<uint8_t>(1);
+
+ // Now output the auxiliary entry.
+ // x_scnlen
+ W.write<uint32_t>(DwarfSectionRef.Size);
+ // Reserved
+ W.write<uint32_t>(0);
+ // x_nreloc. Set to 0 for now.
+ W.write<uint32_t>(0);
+ // Reserved
+ W.write<uint32_t>(0);
+ // Reserved
+ W.write<uint16_t>(0);
+}
+
void XCOFFObjectWriter::writeSymbolTableEntryForControlSection(
const XCOFFSection &CSectionRef, int16_t SectionIndex,
XCOFF::StorageClass StorageClass) {
@@ -711,10 +765,10 @@ void XCOFFObjectWriter::writeFileHeader() {
}
void XCOFFObjectWriter::writeSectionHeaderTable() {
- for (const auto *Sec : Sections) {
+ auto writeSectionHeader = [&](const SectionEntry *Sec, bool IsDwarf) {
// Nothing to write for this Section.
if (Sec->Index == SectionEntry::UninitializedIndex)
- continue;
+ return false;
// Write Name.
ArrayRef<char> NameRef(Sec->Name, XCOFF::NameSize);
@@ -722,8 +776,14 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
// Write the Physical Address and Virtual Address. In an object file these
// are the same.
- W.write<uint32_t>(Sec->Address);
- W.write<uint32_t>(Sec->Address);
+ // We use 0 for DWARF sections' Physical and Virtual Addresses.
+ if (!IsDwarf) {
+ W.write<uint32_t>(Sec->Address);
+ W.write<uint32_t>(Sec->Address);
+ } else {
+ W.write<uint32_t>(0);
+ W.write<uint32_t>(0);
+ }
W.write<uint32_t>(Sec->Size);
W.write<uint32_t>(Sec->FileOffsetToData);
@@ -738,12 +798,25 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
W.write<uint16_t>(0);
W.write<int32_t>(Sec->Flags);
- }
+
+ return true;
+ };
+
+ for (const auto *CsectSec : Sections)
+ writeSectionHeader(CsectSec, /* IsDwarf */ false);
+ for (const auto &DwarfSec : DwarfSections)
+ writeSectionHeader(&DwarfSec, /* IsDwarf */ true);
}
void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc,
- const XCOFFSection &CSection) {
- W.write<uint32_t>(CSection.Address + Reloc.FixupOffsetInCsect);
+ const XCOFFSection &Section) {
+ if (Section.MCSec->isCsect())
+ W.write<uint32_t>(Section.Address + Reloc.FixupOffsetInCsect);
+ else {
+ // DWARF sections' address is set to 0.
+ assert(Section.MCSec->isDwarfSect() && "unsupport section type!");
+ W.write<uint32_t>(Reloc.FixupOffsetInCsect);
+ }
W.write<uint32_t>(Reloc.SymbolTableIndex);
W.write<uint8_t>(Reloc.SignAndSize);
W.write<uint8_t>(Reloc.Type);
@@ -765,6 +838,10 @@ void XCOFFObjectWriter::writeRelocations() {
}
}
}
+
+ for (const auto &DwarfSection : DwarfSections)
+ for (const auto &Reloc : DwarfSection.DwarfSect->Relocations)
+ writeRelocation(Reloc, *DwarfSection.DwarfSect);
}
void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
@@ -819,6 +896,10 @@ void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
}
}
}
+
+ for (const auto &DwarfSection : DwarfSections)
+ writeSymbolTableEntryForDwarfSection(*DwarfSection.DwarfSect,
+ DwarfSection.Index);
}
void XCOFFObjectWriter::finalizeSectionInfo() {
@@ -844,11 +925,17 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
}
}
+ for (auto &DwarfSection : DwarfSections)
+ DwarfSection.RelocationCount = DwarfSection.DwarfSect->Relocations.size();
+
// Calculate the file offset to the relocation entries.
uint64_t RawPointer = RelocationEntryOffset;
- for (auto Sec : Sections) {
- if (Sec->Index == SectionEntry::UninitializedIndex || !Sec->RelocationCount)
- continue;
+ auto calcOffsetToRelocations = [&](SectionEntry *Sec, bool IsDwarf) {
+ if (!IsDwarf && Sec->Index == SectionEntry::UninitializedIndex)
+ return false;
+
+ if (!Sec->RelocationCount)
+ return false;
Sec->FileOffsetToRelocations = RawPointer;
const uint32_t RelocationSizeInSec =
@@ -856,7 +943,15 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
RawPointer += RelocationSizeInSec;
if (RawPointer > UINT32_MAX)
report_fatal_error("Relocation data overflowed this object file.");
- }
+
+ return true;
+ };
+
+ for (auto *Sec : Sections)
+ calcOffsetToRelocations(Sec, /* IsDwarf */ false);
+
+ for (auto &DwarfSec : DwarfSections)
+ calcOffsetToRelocations(&DwarfSec, /* IsDwarf */ true);
// TODO Error check that the number of symbol table entries fits in 32-bits
// signed ...
@@ -944,6 +1039,37 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
Section->Size = Address - Section->Address;
}
+ for (auto &DwarfSection : DwarfSections) {
+ assert((SectionIndex <= MaxSectionIndex) && "Section index overflow!");
+
+ XCOFFSection &DwarfSect = *DwarfSection.DwarfSect;
+ const MCSectionXCOFF *MCSec = DwarfSect.MCSec;
+
+ // Section index.
+ DwarfSection.Index = SectionIndex++;
+ SectionCount++;
+
+ // Symbol index.
+ DwarfSect.SymbolTableIndex = SymbolTableIndex;
+ SymbolIndexMap[MCSec->getQualNameSymbol()] = DwarfSect.SymbolTableIndex;
+ // 1 main and 1 auxiliary symbol table entry for the csect.
+ SymbolTableIndex += 2;
+
+ // Section address. Make it align to section alignment.
+ // We use address 0 for DWARF sections' Physical and Virtual Addresses.
+ // This address is used to tell where is the section in the final object.
+ // See writeSectionForDwarfSectionEntry().
+ DwarfSection.Address = DwarfSect.Address =
+ alignTo(Address, MCSec->getAlignment());
+
+ // Section size.
+ // For DWARF section, we must use the real size which may be not aligned.
+ DwarfSection.Size = DwarfSect.Size = Layout.getSectionAddressSize(MCSec);
+
+ // Make the Address align to default alignment for follow section.
+ Address = alignTo(DwarfSect.Address + DwarfSect.Size, DefaultSectionAlign);
+ }
+
SymbolTableEntryCount = SymbolTableIndex;
// Calculate the RawPointer value for each section.
@@ -959,9 +1085,102 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
report_fatal_error("Section raw data overflowed this object file.");
}
+ for (auto &DwarfSection : DwarfSections) {
+ // Address of csect sections are always aligned to DefaultSectionAlign, but
+ // address of DWARF section are aligned to Section alignment which may be
+ // bigger than DefaultSectionAlign, need to execlude the padding bits.
+ RawPointer =
+ alignTo(RawPointer, DwarfSection.DwarfSect->MCSec->getAlignment());
+
+ DwarfSection.FileOffsetToData = RawPointer;
+ // Some section entries, like DWARF section size is not aligned, so
+ // RawPointer may be not aligned.
+ RawPointer += DwarfSection.Size;
+ // Make sure RawPointer is aligned.
+ RawPointer = alignTo(RawPointer, DefaultSectionAlign);
+
+ assert(RawPointer <= UINT32_MAX &&
+ "Section raw data overflowed this object file.");
+ }
+
RelocationEntryOffset = RawPointer;
}
+void XCOFFObjectWriter::writeSectionForControlSectionEntry(
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const CsectSectionEntry &CsectEntry, uint32_t &CurrentAddressLocation) {
+ // Nothing to write for this Section.
+ if (CsectEntry.Index == SectionEntry::UninitializedIndex)
+ return;
+
+ // There could be a gap (without corresponding zero padding) between
+ // sections.
+ // There could be a gap (without corresponding zero padding) between
+ // sections.
+ assert(((CurrentAddressLocation <= CsectEntry.Address) ||
+ (CsectEntry.Flags == XCOFF::STYP_TDATA) ||
+ (CsectEntry.Flags == XCOFF::STYP_TBSS)) &&
+ "CurrentAddressLocation should be less than or equal to section "
+ "address if the section is not TData or TBSS.");
+
+ CurrentAddressLocation = CsectEntry.Address;
+
+ // For virtual sections, nothing to write. But need to increase
+ // CurrentAddressLocation for later sections like DWARF section has a correct
+ // writing location.
+ if (CsectEntry.IsVirtual) {
+ CurrentAddressLocation += CsectEntry.Size;
+ return;
+ }
+
+ for (const auto &Group : CsectEntry.Groups) {
+ for (const auto &Csect : *Group) {
+ if (uint32_t PaddingSize = Csect.Address - CurrentAddressLocation)
+ W.OS.write_zeros(PaddingSize);
+ if (Csect.Size)
+ Asm.writeSectionData(W.OS, Csect.MCSec, Layout);
+ CurrentAddressLocation = Csect.Address + Csect.Size;
+ }
+ }
+
+ // The size of the tail padding in a section is the end virtual address of
+ // the current section minus the the end virtual address of the last csect
+ // in that section.
+ if (uint32_t PaddingSize =
+ CsectEntry.Address + CsectEntry.Size - CurrentAddressLocation) {
+ W.OS.write_zeros(PaddingSize);
+ CurrentAddressLocation += PaddingSize;
+ }
+}
+
+void XCOFFObjectWriter::writeSectionForDwarfSectionEntry(
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const DwarfSectionEntry &DwarfEntry, uint32_t &CurrentAddressLocation) {
+ // There could be a gap (without corresponding zero padding) between
+ // sections. For example DWARF section alignment is bigger than
+ // DefaultSectionAlign.
+ assert(CurrentAddressLocation <= DwarfEntry.Address &&
+ "CurrentAddressLocation should be less than or equal to section "
+ "address.");
+
+ if (uint32_t PaddingSize = DwarfEntry.Address - CurrentAddressLocation)
+ W.OS.write_zeros(PaddingSize);
+
+ if (DwarfEntry.Size)
+ Asm.writeSectionData(W.OS, DwarfEntry.DwarfSect->MCSec, Layout);
+
+ CurrentAddressLocation = DwarfEntry.Address + DwarfEntry.Size;
+
+ // DWARF section size is not aligned to DefaultSectionAlign.
+ // Make sure CurrentAddressLocation is aligned to DefaultSectionAlign.
+ uint32_t Mod = CurrentAddressLocation % DefaultSectionAlign;
+ uint32_t TailPaddingSize = Mod ? DefaultSectionAlign - Mod : 0;
+ if (TailPaddingSize)
+ W.OS.write_zeros(TailPaddingSize);
+
+ CurrentAddressLocation += TailPaddingSize;
+}
+
// Takes the log base 2 of the alignment and shifts the result into the 5 most
// significant bits of a byte, then or's in the csect type into the least
// significant 3 bits.
diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp
index 99d2373588ac..c21ec9e62dff 100644
--- a/llvm/lib/MCA/Context.cpp
+++ b/llvm/lib/MCA/Context.cpp
@@ -74,14 +74,17 @@ Context::createInOrderPipeline(const PipelineOptions &Opts, SourceMgr &SrcMgr,
CustomBehaviour &CB) {
const MCSchedModel &SM = STI.getSchedModel();
auto PRF = std::make_unique<RegisterFile>(SM, MRI, Opts.RegisterFileSize);
+ auto LSU = std::make_unique<LSUnit>(SM, Opts.LoadQueueSize,
+ Opts.StoreQueueSize, Opts.AssumeNoAlias);
// Create the pipeline stages.
auto Entry = std::make_unique<EntryStage>(SrcMgr);
- auto InOrderIssue = std::make_unique<InOrderIssueStage>(STI, *PRF, CB);
+ auto InOrderIssue = std::make_unique<InOrderIssueStage>(STI, *PRF, CB, *LSU);
auto StagePipeline = std::make_unique<Pipeline>();
// Pass the ownership of all the hardware units to this Context.
addHardwareUnit(std::move(PRF));
+ addHardwareUnit(std::move(LSU));
// Build the pipeline.
StagePipeline->appendStage(std::move(Entry));
diff --git a/llvm/lib/MCA/CustomBehaviour.cpp b/llvm/lib/MCA/CustomBehaviour.cpp
index 23211f402927..a9ea8edff059 100644
--- a/llvm/lib/MCA/CustomBehaviour.cpp
+++ b/llvm/lib/MCA/CustomBehaviour.cpp
@@ -24,5 +24,23 @@ unsigned CustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
return 0;
}
+std::vector<std::unique_ptr<View>>
+CustomBehaviour::getStartViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts) {
+ return std::vector<std::unique_ptr<View>>();
+}
+
+std::vector<std::unique_ptr<View>>
+CustomBehaviour::getPostInstrInfoViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts) {
+ return std::vector<std::unique_ptr<View>>();
+}
+
+std::vector<std::unique_ptr<View>>
+CustomBehaviour::getEndViews(llvm::MCInstPrinter &IP,
+ llvm::ArrayRef<llvm::MCInst> Insts) {
+ return std::vector<std::unique_ptr<View>>();
+}
+
} // namespace mca
} // namespace llvm
diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
index 81c4f682f63d..474bf84cf891 100644
--- a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -288,6 +288,19 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// If this move has been eliminated, then method tryEliminateMoveOrSwap should
// have already updated all the register mappings.
if (!IsEliminated) {
+ // Check if this is one of multiple writes performed by this
+ // instruction to register RegID.
+ const WriteRef &OtherWrite = RegisterMappings[RegID].first;
+ const WriteState *OtherWS = OtherWrite.getWriteState();
+ if (OtherWS && OtherWrite.getSourceIndex() == Write.getSourceIndex()) {
+ if (OtherWS->getLatency() > WS.getLatency()) {
+ // Conservatively keep the slowest write on RegID.
+ if (ShouldAllocatePhysRegs)
+ allocatePhysRegs(RegisterMappings[RegID].second, UsedPhysRegs);
+ return;
+ }
+ }
+
// Update the mapping for register RegID including its sub-registers.
RegisterMappings[RegID].first = Write;
RegisterMappings[RegID].second.AliasRegID = 0U;
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 4067d86930d1..0ab845a4c28f 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -687,7 +687,7 @@ InstrBuilder::createInstruction(const MCInst &MCI) {
if (IsDepBreaking) {
// A mask of all zeroes means: explicit input operands are not
// independent.
- if (Mask.isNullValue()) {
+ if (Mask.isZero()) {
if (!RD.isImplicitRead())
RS.setIndependentFromDef();
} else {
diff --git a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
index ccf6f20a6737..fa5c0fc66b9e 100644
--- a/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
+++ b/llvm/lib/MCA/Stages/InOrderIssueStage.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MCA/Stages/InOrderIssueStage.h"
+#include "llvm/MCA/HardwareUnits/LSUnit.h"
#include "llvm/MCA/HardwareUnits/RegisterFile.h"
#include "llvm/MCA/HardwareUnits/RetireControlUnit.h"
#include "llvm/MCA/Instruction.h"
@@ -43,9 +44,10 @@ void StallInfo::cycleEnd() {
}
InOrderIssueStage::InOrderIssueStage(const MCSubtargetInfo &STI,
- RegisterFile &PRF, CustomBehaviour &CB)
- : STI(STI), PRF(PRF), RM(STI.getSchedModel()), CB(CB), NumIssued(), SI(),
- CarryOver(), Bandwidth(), LastWriteBackCycle() {}
+ RegisterFile &PRF, CustomBehaviour &CB,
+ LSUnit &LSU)
+ : STI(STI), PRF(PRF), RM(STI.getSchedModel()), CB(CB), LSU(LSU),
+ NumIssued(), SI(), CarryOver(), Bandwidth(), LastWriteBackCycle() {}
unsigned InOrderIssueStage::getIssueWidth() const {
return STI.getSchedModel().IssueWidth;
@@ -125,6 +127,13 @@ bool InOrderIssueStage::canExecute(const InstRef &IR) {
return false;
}
+ if (IR.getInstruction()->isMemOp() && !LSU.isReady(IR)) {
+ // This load (store) aliases with a preceding store (load). Delay
+ // it until the depenency is cleared.
+ SI.update(IR, /* delay */ 1, StallInfo::StallKind::LOAD_STORE);
+ return false;
+ }
+
if (unsigned CustomStallCycles = CB.checkCustomHazard(IssuedInst, IR)) {
SI.update(IR, CustomStallCycles, StallInfo::StallKind::CUSTOM_STALL);
return false;
@@ -188,6 +197,10 @@ void InOrderIssueStage::notifyInstructionRetired(const InstRef &IR,
}
llvm::Error InOrderIssueStage::execute(InstRef &IR) {
+ Instruction &IS = *IR.getInstruction();
+ if (IS.isMemOp())
+ IS.setLSUTokenID(LSU.dispatch(IR));
+
if (llvm::Error E = tryIssue(IR))
return E;
@@ -222,6 +235,9 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
RM.issueInstruction(Desc, UsedResources);
IS.execute(SourceIndex);
+ if (IS.isMemOp())
+ LSU.onInstructionIssued(IR);
+
// Replace resource masks with valid resource processor IDs.
for (ResourceUse &Use : UsedResources) {
uint64_t Mask = Use.first.first;
@@ -245,6 +261,7 @@ llvm::Error InOrderIssueStage::tryIssue(InstRef &IR) {
// the execution and retirement now.
if (IS.isExecuted()) {
PRF.onInstructionExecuted(&IS);
+ LSU.onInstructionExecuted(IR);
notifyEvent<HWInstructionEvent>(
HWInstructionEvent(HWInstructionEvent::Executed, IR));
LLVM_DEBUG(dbgs() << "[E] Instruction #" << IR << " is executed\n");
@@ -279,6 +296,7 @@ void InOrderIssueStage::updateIssuedInst() {
}
PRF.onInstructionExecuted(&IS);
+ LSU.onInstructionExecuted(IR);
notifyInstructionExecuted(IR);
++NumExecuted;
@@ -324,6 +342,9 @@ void InOrderIssueStage::retireInstruction(InstRef &IR) {
for (const WriteState &WS : IS.getDefs())
PRF.removeRegisterWrite(WS, FreedRegs);
+ if (IS.isMemOp())
+ LSU.onInstructionRetired(IR);
+
notifyInstructionRetired(IR, FreedRegs);
}
@@ -363,6 +384,7 @@ llvm::Error InOrderIssueStage::cycleStart() {
Bandwidth = getIssueWidth();
PRF.cycleStart();
+ LSU.cycleEvent();
// Release consumed resources.
SmallVector<ResourceRef, 4> Freed;
diff --git a/llvm/lib/MCA/Stages/InstructionTables.cpp b/llvm/lib/MCA/Stages/InstructionTables.cpp
index 93e368123066..a842b52dcd39 100644
--- a/llvm/lib/MCA/Stages/InstructionTables.cpp
+++ b/llvm/lib/MCA/Stages/InstructionTables.cpp
@@ -24,7 +24,7 @@ Error InstructionTables::execute(InstRef &IR) {
UsedResources.clear();
// Identify the resources consumed by this instruction.
- for (const std::pair<const uint64_t, ResourceUsage> Resource :
+ for (const std::pair<uint64_t, ResourceUsage> &Resource :
Desc.Resources) {
// Skip zero-cycle resources (i.e., unused resources).
if (!Resource.second.size())
diff --git a/llvm/tools/llvm-mca/Views/View.cpp b/llvm/lib/MCA/View.cpp
index 09d08d3ae007..a56d3a124934 100644
--- a/llvm/tools/llvm-mca/Views/View.cpp
+++ b/llvm/lib/MCA/View.cpp
@@ -11,7 +11,7 @@
///
//===----------------------------------------------------------------------===//
-#include "Views/View.h"
+#include "llvm/MCA/View.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 6ff896cf347e..5492692445e7 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -418,7 +418,7 @@ Expected<bool> Archive::Child::isThinMember() const {
if (!NameOrErr)
return NameOrErr.takeError();
StringRef Name = NameOrErr.get();
- return Parent->IsThin && Name != "/" && Name != "//";
+ return Parent->IsThin && Name != "/" && Name != "//" && Name != "/SYM64/";
}
Expected<std::string> Archive::Child::getFullName() const {
diff --git a/llvm/lib/Object/COFFModuleDefinition.cpp b/llvm/lib/Object/COFFModuleDefinition.cpp
index 8f29f7a658fd..55ddd3baca2b 100644
--- a/llvm/lib/Object/COFFModuleDefinition.cpp
+++ b/llvm/lib/Object/COFFModuleDefinition.cpp
@@ -80,11 +80,6 @@ static bool isDecorated(StringRef Sym, bool MingwDef) {
(!MingwDef && Sym.contains('@'));
}
-static Error createError(const Twine &Err) {
- return make_error<StringError>(StringRef(Err.str()),
- object_error::parse_failed);
-}
-
class Lexer {
public:
Lexer(StringRef S) : Buf(S) {}
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index ca2ed4449120..84181ae5e501 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -246,6 +246,9 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_MIPS_ABIFLAGS);
}
break;
+ case ELF::EM_MSP430:
+ switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_MSP430_ATTRIBUTES); }
+ break;
case ELF::EM_RISCV:
switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_RISCV_ATTRIBUTES); }
break;
@@ -333,40 +336,26 @@ ELFFile<ELFT>::decode_relrs(Elf_Relr_Range relrs) const {
std::vector<Elf_Rel> Relocs;
// Word type: uint32_t for Elf32, and uint64_t for Elf64.
- typedef typename ELFT::uint Word;
-
- // Word size in number of bytes.
- const size_t WordSize = sizeof(Word);
+ using Addr = typename ELFT::uint;
- // Number of bits used for the relocation offsets bitmap.
- // These many relative relocations can be encoded in a single entry.
- const size_t NBits = 8*WordSize - 1;
-
- Word Base = 0;
- for (const Elf_Relr &R : relrs) {
- Word Entry = R;
- if ((Entry&1) == 0) {
+ Addr Base = 0;
+ for (Elf_Relr R : relrs) {
+ typename ELFT::uint Entry = R;
+ if ((Entry & 1) == 0) {
// Even entry: encodes the offset for next relocation.
Rel.r_offset = Entry;
Relocs.push_back(Rel);
// Set base offset for subsequent bitmap entries.
- Base = Entry + WordSize;
- continue;
- }
-
- // Odd entry: encodes bitmap for relocations starting at base.
- Word Offset = Base;
- while (Entry != 0) {
- Entry >>= 1;
- if ((Entry&1) != 0) {
- Rel.r_offset = Offset;
- Relocs.push_back(Rel);
- }
- Offset += WordSize;
+ Base = Entry + sizeof(Addr);
+ } else {
+ // Odd entry: encodes bitmap for relocations starting at base.
+ for (Addr Offset = Base; (Entry >>= 1) != 0; Offset += sizeof(Addr))
+ if ((Entry & 1) != 0) {
+ Rel.r_offset = Offset;
+ Relocs.push_back(Rel);
+ }
+ Base += (CHAR_BIT * sizeof(Entry) - 1) * sizeof(Addr);
}
-
- // Advance base offset by NBits words.
- Base += NBits * WordSize;
}
return Relocs;
@@ -474,6 +463,14 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
}
break;
+ case ELF::EM_PPC:
+ switch (Type) {
+#define PPC_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef PPC_DYNAMIC_TAG
+ }
+ break;
+
case ELF::EM_PPC64:
switch (Type) {
#define PPC64_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
@@ -481,6 +478,14 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#undef PPC64_DYNAMIC_TAG
}
break;
+
+ case ELF::EM_RISCV:
+ switch (Type) {
+#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_STRINGIFY_ENUM(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef RISCV_DYNAMIC_TAG
+ }
+ break;
}
#undef DYNAMIC_TAG
switch (Type) {
@@ -488,7 +493,9 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#define AARCH64_DYNAMIC_TAG(name, value)
#define MIPS_DYNAMIC_TAG(name, value)
#define HEXAGON_DYNAMIC_TAG(name, value)
+#define PPC_DYNAMIC_TAG(name, value)
#define PPC64_DYNAMIC_TAG(name, value)
+#define RISCV_DYNAMIC_TAG(name, value)
// Also ignore marker tags such as DT_HIOS (maps to DT_VERNEEDNUM), etc.
#define DYNAMIC_TAG_MARKER(name, value)
#define DYNAMIC_TAG(name, value) case value: return #name;
@@ -497,7 +504,9 @@ std::string ELFFile<ELFT>::getDynamicTagAsString(unsigned Arch,
#undef AARCH64_DYNAMIC_TAG
#undef MIPS_DYNAMIC_TAG
#undef HEXAGON_DYNAMIC_TAG
+#undef PPC_DYNAMIC_TAG
#undef PPC64_DYNAMIC_TAG
+#undef RISCV_DYNAMIC_TAG
#undef DYNAMIC_TAG_MARKER
#undef DYNAMIC_STRINGIFY_ENUM
default:
@@ -613,14 +622,14 @@ ELFFile<ELFT>::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const {
}
template <class ELFT>
-Expected<std::vector<typename ELFT::BBAddrMap>>
+Expected<std::vector<BBAddrMap>>
ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec);
if (!ContentsOrErr)
return ContentsOrErr.takeError();
ArrayRef<uint8_t> Content = *ContentsOrErr;
DataExtractor Data(Content, isLE(), ELFT::Is64Bits ? 8 : 4);
- std::vector<Elf_BBAddrMap> FunctionEntries;
+ std::vector<BBAddrMap> FunctionEntries;
DataExtractor::Cursor Cur(0);
Error ULEBSizeErr = Error::success();
@@ -647,7 +656,7 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
while (!ULEBSizeErr && Cur && Cur.tell() < Content.size()) {
uintX_t Address = static_cast<uintX_t>(Data.getAddress(Cur));
uint32_t NumBlocks = ReadULEB128AsUInt32();
- std::vector<typename Elf_BBAddrMap::BBEntry> BBEntries;
+ std::vector<BBAddrMap::BBEntry> BBEntries;
for (uint32_t BlockID = 0; !ULEBSizeErr && Cur && (BlockID < NumBlocks);
++BlockID) {
uint32_t Offset = ReadULEB128AsUInt32();
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 6613d79ab3d0..50035d6c7523 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -15,6 +15,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFTypes.h"
#include "llvm/Object/Error.h"
@@ -25,7 +26,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributeParser.h"
#include "llvm/Support/RISCVAttributes.h"
-#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -538,9 +538,16 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
case ARMBuildAttrs::v6K:
Triple += "v6k";
break;
- case ARMBuildAttrs::v7:
- Triple += "v7";
+ case ARMBuildAttrs::v7: {
+ Optional<unsigned> ArchProfileAttr =
+ Attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
+ if (ArchProfileAttr.hasValue() &&
+ ArchProfileAttr.getValue() == ARMBuildAttrs::MicroControllerProfile)
+ Triple += "v7m";
+ else
+ Triple += "v7";
break;
+ }
case ARMBuildAttrs::v6_M:
Triple += "v6m";
break;
@@ -647,3 +654,72 @@ ELFObjectFileBase::getPltAddresses() const {
}
return Result;
}
+
+template <class ELFT>
+static Expected<std::vector<VersionEntry>>
+readDynsymVersionsImpl(const ELFFile<ELFT> &EF,
+ ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
+ using Elf_Shdr = typename ELFT::Shdr;
+ const Elf_Shdr *VerSec = nullptr;
+ const Elf_Shdr *VerNeedSec = nullptr;
+ const Elf_Shdr *VerDefSec = nullptr;
+ // The user should ensure sections() can't fail here.
+ for (const Elf_Shdr &Sec : cantFail(EF.sections())) {
+ if (Sec.sh_type == ELF::SHT_GNU_versym)
+ VerSec = &Sec;
+ else if (Sec.sh_type == ELF::SHT_GNU_verdef)
+ VerDefSec = &Sec;
+ else if (Sec.sh_type == ELF::SHT_GNU_verneed)
+ VerNeedSec = &Sec;
+ }
+ if (!VerSec)
+ return std::vector<VersionEntry>();
+
+ Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr =
+ EF.loadVersionMap(VerNeedSec, VerDefSec);
+ if (!MapOrErr)
+ return MapOrErr.takeError();
+
+ std::vector<VersionEntry> Ret;
+ size_t I = 0;
+ for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) {
+ ++I;
+ Expected<const typename ELFT::Versym *> VerEntryOrErr =
+ EF.template getEntry<typename ELFT::Versym>(*VerSec, I);
+ if (!VerEntryOrErr)
+ return createError("unable to read an entry with index " + Twine(I) +
+ " from " + describe(EF, *VerSec) + ": " +
+ toString(VerEntryOrErr.takeError()));
+
+ Expected<uint32_t> FlagsOrErr = It->getFlags();
+ if (!FlagsOrErr)
+ return createError("unable to read flags for symbol with index " +
+ Twine(I) + ": " + toString(FlagsOrErr.takeError()));
+
+ bool IsDefault;
+ Expected<StringRef> VerOrErr = EF.getSymbolVersionByIndex(
+ (*VerEntryOrErr)->vs_index, IsDefault, *MapOrErr,
+ (*FlagsOrErr) & SymbolRef::SF_Undefined);
+ if (!VerOrErr)
+ return createError("unable to get a version for entry " + Twine(I) +
+ " of " + describe(EF, *VerSec) + ": " +
+ toString(VerOrErr.takeError()));
+
+ Ret.push_back({(*VerOrErr).str(), IsDefault});
+ }
+
+ return Ret;
+}
+
+Expected<std::vector<VersionEntry>>
+ELFObjectFileBase::readDynsymVersions() const {
+ elf_symbol_iterator_range Symbols = getDynamicSymbolIterators();
+ if (const auto *Obj = dyn_cast<ELF32LEObjectFile>(this))
+ return readDynsymVersionsImpl(Obj->getELFFile(), Symbols);
+ if (const auto *Obj = dyn_cast<ELF32BEObjectFile>(this))
+ return readDynsymVersionsImpl(Obj->getELFFile(), Symbols);
+ if (const auto *Obj = dyn_cast<ELF64LEObjectFile>(this))
+ return readDynsymVersionsImpl(Obj->getELFFile(), Symbols);
+ return readDynsymVersionsImpl(cast<ELF64BEObjectFile>(this)->getELFFile(),
+ Symbols);
+}
diff --git a/llvm/lib/Object/IRObjectFile.cpp b/llvm/lib/Object/IRObjectFile.cpp
index befba5d57127..c653262791cc 100644
--- a/llvm/lib/Object/IRObjectFile.cpp
+++ b/llvm/lib/Object/IRObjectFile.cpp
@@ -18,9 +18,9 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace object;
diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp
index 746b00867157..093ae1bbc267 100644
--- a/llvm/lib/Object/IRSymtab.cpp
+++ b/llvm/lib/Object/IRSymtab.cpp
@@ -41,10 +41,15 @@
using namespace llvm;
using namespace irsymtab;
-static const char *LibcallRoutineNames[] = {
+static const char *PreservedSymbols[] = {
#define HANDLE_LIBCALL(code, name) name,
#include "llvm/IR/RuntimeLibcalls.def"
#undef HANDLE_LIBCALL
+ // There are global variables, so put it here instead of in
+ // RuntimeLibcalls.def.
+ // TODO: Are there similar such variables?
+ "__ssp_canary_word",
+ "__stack_chk_guard",
};
namespace {
@@ -261,9 +266,9 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
setStr(Sym.IRName, GV->getName());
- bool IsBuiltinFunc = llvm::is_contained(LibcallRoutineNames, GV->getName());
+ bool IsPreservedSymbol = llvm::is_contained(PreservedSymbols, GV->getName());
- if (Used.count(GV) || IsBuiltinFunc)
+ if (Used.count(GV) || IsPreservedSymbol)
Sym.Flags |= 1 << storage::Symbol::FB_used;
if (GV->isThreadLocal())
Sym.Flags |= 1 << storage::Symbol::FB_tls;
@@ -283,11 +288,15 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
Uncommon().CommonAlign = GVar->getAlignment();
}
- const GlobalObject *Base = GV->getBaseObject();
- if (!Base)
- return make_error<StringError>("Unable to determine comdat of alias!",
- inconvertibleErrorCode());
- if (const Comdat *C = Base->getComdat()) {
+ const GlobalObject *GO = GV->getAliaseeObject();
+ if (!GO) {
+ if (isa<GlobalIFunc>(GV))
+ GO = cast<GlobalIFunc>(GV)->getResolverFunction();
+ if (!GO)
+ return make_error<StringError>("Unable to determine comdat of alias!",
+ inconvertibleErrorCode());
+ }
+ if (const Comdat *C = GO->getComdat()) {
Expected<int> ComdatIndexOrErr = getComdatIndex(C, GV->getParent());
if (!ComdatIndexOrErr)
return ComdatIndexOrErr.takeError();
@@ -312,8 +321,8 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
}
}
- if (!Base->getSection().empty())
- setStr(Uncommon().SectionName, Saver.save(Base->getSection()));
+ if (!GO->getSection().empty())
+ setStr(Uncommon().SectionName, Saver.save(GO->getSection()));
return Error::success();
}
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 177314a9a790..7501661591f0 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -246,8 +246,8 @@ static Error checkOverlappingElement(std::list<MachOElement> &Elements,
if (Size == 0)
return Error::success();
- for (auto it=Elements.begin() ; it != Elements.end(); ++it) {
- auto E = *it;
+ for (auto it = Elements.begin(); it != Elements.end(); ++it) {
+ const auto &E = *it;
if ((Offset >= E.Offset && Offset < E.Offset + E.Size) ||
(Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) ||
(Offset <= E.Offset && Offset + Size >= E.Offset + E.Size))
@@ -258,7 +258,7 @@ static Error checkOverlappingElement(std::list<MachOElement> &Elements,
auto nt = it;
nt++;
if (nt != Elements.end()) {
- auto N = *nt;
+ const auto &N = *nt;
if (Offset + Size <= N.Offset) {
Elements.insert(nt, {Offset, Size, Name});
return Error::success();
@@ -2048,6 +2048,46 @@ bool MachOObjectFile::isDebugSection(DataRefImpl Sec) const {
SectionName == "__swift_ast";
}
+namespace {
+template <typename LoadCommandType>
+ArrayRef<uint8_t> getSegmentContents(const MachOObjectFile &Obj,
+ MachOObjectFile::LoadCommandInfo LoadCmd,
+ StringRef SegmentName) {
+ auto SegmentOrErr = getStructOrErr<LoadCommandType>(Obj, LoadCmd.Ptr);
+ if (!SegmentOrErr) {
+ consumeError(SegmentOrErr.takeError());
+ return {};
+ }
+ auto &Segment = SegmentOrErr.get();
+ if (StringRef(Segment.segname, 16).startswith(SegmentName))
+ return arrayRefFromStringRef(Obj.getData().slice(
+ Segment.fileoff, Segment.fileoff + Segment.filesize));
+ return {};
+}
+} // namespace
+
+ArrayRef<uint8_t>
+MachOObjectFile::getSegmentContents(StringRef SegmentName) const {
+ for (auto LoadCmd : load_commands()) {
+ ArrayRef<uint8_t> Contents;
+ switch (LoadCmd.C.cmd) {
+ case MachO::LC_SEGMENT:
+ Contents = ::getSegmentContents<MachO::segment_command>(*this, LoadCmd,
+ SegmentName);
+ break;
+ case MachO::LC_SEGMENT_64:
+ Contents = ::getSegmentContents<MachO::segment_command_64>(*this, LoadCmd,
+ SegmentName);
+ break;
+ default:
+ continue;
+ }
+ if (!Contents.empty())
+ return Contents;
+ }
+ return {};
+}
+
unsigned MachOObjectFile::getSectionID(SectionRef Sec) const {
return Sec.getRawDataRefImpl().d.a;
}
diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp
index 9a79de77af16..954d1f09f4e9 100644
--- a/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -36,6 +36,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
@@ -43,7 +44,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -204,8 +204,9 @@ uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const {
if (GVar->isConstant())
Res |= BasicSymbolRef::SF_Const;
}
- if (dyn_cast_or_null<Function>(GV->getBaseObject()))
- Res |= BasicSymbolRef::SF_Executable;
+ if (const GlobalObject *GO = GV->getAliaseeObject())
+ if (isa<Function>(GO) || isa<GlobalIFunc>(GO))
+ Res |= BasicSymbolRef::SF_Executable;
if (isa<GlobalAlias>(GV))
Res |= BasicSymbolRef::SF_Indirect;
if (GV->hasPrivateLinkage())
diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp
index b486e9f5c9a8..0659cf6a2d41 100644
--- a/llvm/lib/Object/Object.cpp
+++ b/llvm/lib/Object/Object.cpp
@@ -222,8 +222,7 @@ void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(SecOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
*unwrap(Sect) = *SecOrErr;
}
@@ -304,8 +303,7 @@ const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(Ret.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
return Ret->data();
}
@@ -316,8 +314,7 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
std::string Buf;
raw_string_ostream OS(Buf);
logAllUnhandledErrors(Ret.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
return *Ret;
}
diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index 5c894439ff67..6fd02f3b9592 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -55,14 +55,15 @@ bool SectionRef::containsSymbol(SymbolRef S) const {
}
Expected<uint64_t> ObjectFile::getSymbolValue(DataRefImpl Ref) const {
- if (Expected<uint32_t> FlagsOrErr = getSymbolFlags(Ref)) {
- if (*FlagsOrErr & SymbolRef::SF_Undefined)
- return 0;
- if (*FlagsOrErr & SymbolRef::SF_Common)
- return getCommonSymbolSize(Ref);
- } else
+ uint32_t Flags;
+ if (Error E = getSymbolFlags(Ref).moveInto(Flags))
// TODO: Test this error.
- return FlagsOrErr.takeError();
+ return std::move(E);
+
+ if (Flags & SymbolRef::SF_Undefined)
+ return 0;
+ if (Flags & SymbolRef::SF_Common)
+ return getCommonSymbolSize(Ref);
return getSymbolValueImpl(Ref);
}
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index ab98a2dd2ac1..00a45e2c5d4e 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -18,7 +18,7 @@ namespace object {
static int64_t getELFAddend(RelocationRef R) {
Expected<int64_t> AddendOrErr = ELFRelocationRef(R).getAddend();
handleAllErrors(AddendOrErr.takeError(), [](const ErrorInfoBase &EI) {
- report_fatal_error(EI.message());
+ report_fatal_error(Twine(EI.message()));
});
return *AddendOrErr;
}
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index a08c648358c0..6a19b159f3d5 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -286,9 +286,9 @@ WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
return;
}
- WasmSection Sec;
WasmSectionOrderChecker Checker;
while (Ctx.Ptr < Ctx.End) {
+ WasmSection Sec;
if ((Err = readSection(Sec, Ctx, Checker)))
return;
if ((Err = parseSection(Sec)))
@@ -339,7 +339,8 @@ Error WasmObjectFile::parseSection(WasmSection &Sec) {
}
Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
- // See https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
+ // Legacy "dylink" section support.
+ // See parseDylink0Section for the current "dylink.0" section parsing.
HasDylinkSection = true;
DylinkInfo.MemorySize = readVaruint32(Ctx);
DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
@@ -349,17 +350,77 @@ Error WasmObjectFile::parseDylinkSection(ReadContext &Ctx) {
while (Count--) {
DylinkInfo.Needed.push_back(readString(Ctx));
}
+
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>("dylink section ended prematurely",
object_error::parse_failed);
return Error::success();
}
+Error WasmObjectFile::parseDylink0Section(ReadContext &Ctx) {
+ // See
+ // https://github.com/WebAssembly/tool-conventions/blob/main/DynamicLinking.md
+ HasDylinkSection = true;
+
+ const uint8_t *OrigEnd = Ctx.End;
+ while (Ctx.Ptr < OrigEnd) {
+ Ctx.End = OrigEnd;
+ uint8_t Type = readUint8(Ctx);
+ uint32_t Size = readVaruint32(Ctx);
+ LLVM_DEBUG(dbgs() << "readSubsection type=" << int(Type) << " size=" << Size
+ << "\n");
+ Ctx.End = Ctx.Ptr + Size;
+ uint32_t Count;
+ switch (Type) {
+ case wasm::WASM_DYLINK_MEM_INFO:
+ DylinkInfo.MemorySize = readVaruint32(Ctx);
+ DylinkInfo.MemoryAlignment = readVaruint32(Ctx);
+ DylinkInfo.TableSize = readVaruint32(Ctx);
+ DylinkInfo.TableAlignment = readVaruint32(Ctx);
+ break;
+ case wasm::WASM_DYLINK_NEEDED:
+ Count = readVaruint32(Ctx);
+ while (Count--) {
+ DylinkInfo.Needed.push_back(readString(Ctx));
+ }
+ break;
+ case wasm::WASM_DYLINK_EXPORT_INFO: {
+ uint32_t Count = readVaruint32(Ctx);
+ while (Count--) {
+ DylinkInfo.ExportInfo.push_back({readString(Ctx), readVaruint32(Ctx)});
+ }
+ break;
+ }
+ case wasm::WASM_DYLINK_IMPORT_INFO: {
+ uint32_t Count = readVaruint32(Ctx);
+ while (Count--) {
+ DylinkInfo.ImportInfo.push_back(
+ {readString(Ctx), readString(Ctx), readVaruint32(Ctx)});
+ }
+ break;
+ }
+ default:
+ LLVM_DEBUG(dbgs() << "unknown dylink.0 sub-section: " << Type << "\n");
+ Ctx.Ptr += Size;
+ break;
+ }
+ if (Ctx.Ptr != Ctx.End) {
+ return make_error<GenericBinaryError>(
+ "dylink.0 sub-section ended prematurely", object_error::parse_failed);
+ }
+ }
+
+ if (Ctx.Ptr != Ctx.End)
+ return make_error<GenericBinaryError>("dylink.0 section ended prematurely",
+ object_error::parse_failed);
+ return Error::success();
+}
+
Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
llvm::DenseSet<uint64_t> SeenFunctions;
llvm::DenseSet<uint64_t> SeenGlobals;
llvm::DenseSet<uint64_t> SeenSegments;
- if (FunctionTypes.size() && !SeenCodeSection) {
+ if (Functions.size() && !SeenCodeSection) {
return make_error<GenericBinaryError>("names must come after code section",
object_error::parse_failed);
}
@@ -427,7 +488,7 @@ Error WasmObjectFile::parseNameSection(ReadContext &Ctx) {
Error WasmObjectFile::parseLinkingSection(ReadContext &Ctx) {
HasLinkingSection = true;
- if (FunctionTypes.size() && !SeenCodeSection) {
+ if (Functions.size() && !SeenCodeSection) {
return make_error<GenericBinaryError>(
"linking data must come after code section",
object_error::parse_failed);
@@ -529,7 +590,6 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
const wasm::WasmSignature *Signature = nullptr;
const wasm::WasmGlobalType *GlobalType = nullptr;
const wasm::WasmTableType *TableType = nullptr;
- const wasm::WasmTagType *TagType = nullptr;
Info.Kind = readUint8(Ctx);
Info.Flags = readVaruint32(Ctx);
@@ -545,8 +605,8 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
if (IsDefined) {
Info.Name = readString(Ctx);
unsigned FuncIndex = Info.ElementIndex - NumImportedFunctions;
- Signature = &Signatures[FunctionTypes[FuncIndex]];
wasm::WasmFunction &Function = Functions[FuncIndex];
+ Signature = &Signatures[Function.SigIndex];
if (Function.SymbolName.empty())
Function.SymbolName = Info.Name;
} else {
@@ -674,8 +734,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
Info.Name = readString(Ctx);
unsigned TagIndex = Info.ElementIndex - NumImportedTags;
wasm::WasmTag &Tag = Tags[TagIndex];
- Signature = &Signatures[Tag.Type.SigIndex];
- TagType = &Tag.Type;
+ Signature = &Signatures[Tag.SigIndex];
if (Tag.SymbolName.empty())
Tag.SymbolName = Info.Name;
@@ -687,8 +746,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
} else {
Info.Name = Import.Field;
}
- TagType = &Import.Tag;
- Signature = &Signatures[TagType->SigIndex];
+ Signature = &Signatures[Import.SigIndex];
if (!Import.Module.empty()) {
Info.ImportModule = Import.Module;
}
@@ -710,7 +768,7 @@ Error WasmObjectFile::parseLinkingSectionSymtab(ReadContext &Ctx) {
object_error::parse_failed);
LinkingData.SymbolTable.emplace_back(Info);
Symbols.emplace_back(LinkingData.SymbolTable.back(), GlobalType, TableType,
- TagType, Signature);
+ Signature);
LLVM_DEBUG(dbgs() << "Adding symbol: " << Symbols.back() << "\n");
}
@@ -984,6 +1042,9 @@ Error WasmObjectFile::parseCustomSection(WasmSection &Sec, ReadContext &Ctx) {
if (Sec.Name == "dylink") {
if (Error Err = parseDylinkSection(Ctx))
return Err;
+ } else if (Sec.Name == "dylink.0") {
+ if (Error Err = parseDylink0Section(Ctx))
+ return Err;
} else if (Sec.Name == "name") {
if (Error Err = parseNameSection(Ctx))
return Err;
@@ -1034,6 +1095,7 @@ Error WasmObjectFile::parseTypeSection(ReadContext &Ctx) {
Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
+ uint32_t NumTypes = Signatures.size();
Imports.reserve(Count);
for (uint32_t I = 0; I < Count; I++) {
wasm::WasmImport Im;
@@ -1044,6 +1106,9 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
case wasm::WASM_EXTERNAL_FUNCTION:
NumImportedFunctions++;
Im.SigIndex = readVaruint32(Ctx);
+ if (Im.SigIndex >= NumTypes)
+ return make_error<GenericBinaryError>("invalid function type",
+ object_error::parse_failed);
break;
case wasm::WASM_EXTERNAL_GLOBAL:
NumImportedGlobals++;
@@ -1067,8 +1132,13 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
}
case wasm::WASM_EXTERNAL_TAG:
NumImportedTags++;
- Im.Tag.Attribute = readUint8(Ctx);
- Im.Tag.SigIndex = readVarint32(Ctx);
+ if (readUint8(Ctx) != 0) // Reserved 'attribute' field
+ return make_error<GenericBinaryError>("invalid attribute",
+ object_error::parse_failed);
+ Im.SigIndex = readVaruint32(Ctx);
+ if (Im.SigIndex >= NumTypes)
+ return make_error<GenericBinaryError>("invalid tag type",
+ object_error::parse_failed);
break;
default:
return make_error<GenericBinaryError>("unexpected import kind",
@@ -1084,15 +1154,16 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
Error WasmObjectFile::parseFunctionSection(ReadContext &Ctx) {
uint32_t Count = readVaruint32(Ctx);
- FunctionTypes.reserve(Count);
- Functions.resize(Count);
+ Functions.reserve(Count);
uint32_t NumTypes = Signatures.size();
while (Count--) {
uint32_t Type = readVaruint32(Ctx);
if (Type >= NumTypes)
return make_error<GenericBinaryError>("invalid function type",
object_error::parse_failed);
- FunctionTypes.push_back(Type);
+ wasm::WasmFunction F;
+ F.SigIndex = Type;
+ Functions.push_back(F);
}
if (Ctx.Ptr != Ctx.End)
return make_error<GenericBinaryError>("function section ended prematurely",
@@ -1141,11 +1212,18 @@ Error WasmObjectFile::parseTagSection(ReadContext &Ctx) {
TagSection = Sections.size();
uint32_t Count = readVaruint32(Ctx);
Tags.reserve(Count);
+ uint32_t NumTypes = Signatures.size();
while (Count--) {
+ if (readUint8(Ctx) != 0) // Reserved 'attribute' field
+ return make_error<GenericBinaryError>("invalid attribute",
+ object_error::parse_failed);
+ uint32_t Type = readVaruint32(Ctx);
+ if (Type >= NumTypes)
+ return make_error<GenericBinaryError>("invalid tag type",
+ object_error::parse_failed);
wasm::WasmTag Tag;
Tag.Index = NumImportedTags + Tags.size();
- Tag.Type.Attribute = readUint8(Ctx);
- Tag.Type.SigIndex = readVaruint32(Ctx);
+ Tag.SigIndex = Type;
Tags.push_back(Tag);
}
@@ -1216,7 +1294,7 @@ Error WasmObjectFile::parseExportSection(ReadContext &Ctx) {
}
bool WasmObjectFile::isValidFunctionIndex(uint32_t Index) const {
- return Index < NumImportedFunctions + FunctionTypes.size();
+ return Index < NumImportedFunctions + Functions.size();
}
bool WasmObjectFile::isDefinedFunctionIndex(uint32_t Index) const {
@@ -1304,7 +1382,7 @@ Error WasmObjectFile::parseCodeSection(ReadContext &Ctx) {
SeenCodeSection = true;
CodeSection = Sections.size();
uint32_t FunctionCount = readVaruint32(Ctx);
- if (FunctionCount != FunctionTypes.size()) {
+ if (FunctionCount != Functions.size()) {
return make_error<GenericBinaryError>("invalid function count",
object_error::parse_failed);
}
@@ -1793,6 +1871,7 @@ int WasmSectionOrderChecker::getSectionOrder(unsigned ID,
case wasm::WASM_SEC_CUSTOM:
return StringSwitch<unsigned>(CustomSectionName)
.Case("dylink", WASM_SEC_ORDER_DYLINK)
+ .Case("dylink.0", WASM_SEC_ORDER_DYLINK)
.Case("linking", WASM_SEC_ORDER_LINKING)
.StartsWith("reloc.", WASM_SEC_ORDER_RELOC)
.Case("name", WASM_SEC_ORDER_NAME)
diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp
index 53447d0c97b2..9b0a5efacba7 100644
--- a/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -69,15 +69,18 @@ bool XCOFFSectionHeader<T>::isReservedSectionType() const {
return getSectionType() & SectionFlagsReservedMask;
}
-bool XCOFFRelocation32::isRelocationSigned() const {
+template <typename AddressType>
+bool XCOFFRelocation<AddressType>::isRelocationSigned() const {
return Info & XR_SIGN_INDICATOR_MASK;
}
-bool XCOFFRelocation32::isFixupIndicated() const {
+template <typename AddressType>
+bool XCOFFRelocation<AddressType>::isFixupIndicated() const {
return Info & XR_FIXUP_INDICATOR_MASK;
}
-uint8_t XCOFFRelocation32::getRelocatedLength() const {
+template <typename AddressType>
+uint8_t XCOFFRelocation<AddressType>::getRelocatedLength() const {
// The relocation encodes the bit length being relocated minus 1. Add back
// the 1 to get the actual length being relocated.
return (Info & XR_BIASED_LENGTH_MASK) + 1;
@@ -146,6 +149,20 @@ const XCOFFFileHeader64 *XCOFFObjectFile::fileHeader64() const {
return static_cast<const XCOFFFileHeader64 *>(FileHeader);
}
+const XCOFFAuxiliaryHeader32 *XCOFFObjectFile::auxiliaryHeader32() const {
+ assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
+ return static_cast<const XCOFFAuxiliaryHeader32 *>(AuxiliaryHeader);
+}
+
+const XCOFFAuxiliaryHeader64 *XCOFFObjectFile::auxiliaryHeader64() const {
+ assert(is64Bit() && "64-bit interface called on a 32-bit object file.");
+ return static_cast<const XCOFFAuxiliaryHeader64 *>(AuxiliaryHeader);
+}
+
+template <typename T> const T *XCOFFObjectFile::sectionHeaderTable() const {
+ return static_cast<const T *>(SectionHeaderTable);
+}
+
const XCOFFSectionHeader32 *
XCOFFObjectFile::sectionHeaderTable32() const {
assert(!is64Bit() && "32-bit interface called on 64-bit object file.");
@@ -183,12 +200,16 @@ XCOFFObjectFile::getStringTableEntry(uint32_t Offset) const {
if (StringTable.Data != nullptr && StringTable.Size > Offset)
return (StringTable.Data + Offset);
- return make_error<GenericBinaryError>("Bad offset for string table entry",
- object_error::parse_failed);
+ return createError("entry with offset 0x" + Twine::utohexstr(Offset) +
+ " in a string table with size 0x" +
+ Twine::utohexstr(StringTable.Size) + " is invalid");
}
StringRef XCOFFObjectFile::getStringTable() const {
- return StringRef(StringTable.Data, StringTable.Size);
+ // If the size is less than or equal to 4, then the string table contains no
+ // string data.
+ return StringRef(StringTable.Data,
+ StringTable.Size <= 4 ? 0 : StringTable.Size);
}
Expected<StringRef>
@@ -210,15 +231,85 @@ uint64_t XCOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
return toSymbolRef(Symb).getValue();
}
+uint32_t XCOFFObjectFile::getSymbolAlignment(DataRefImpl Symb) const {
+ uint64_t Result = 0;
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxRefOrError =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (!CsectAuxRefOrError)
+ // TODO: report the error up the stack.
+ consumeError(CsectAuxRefOrError.takeError());
+ else
+ Result = 1ULL << CsectAuxRefOrError.get().getAlignmentLog2();
+ }
+ return Result;
+}
+
uint64_t XCOFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
uint64_t Result = 0;
- llvm_unreachable("Not yet implemented!");
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxRefOrError =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (!CsectAuxRefOrError)
+ // TODO: report the error up the stack.
+ consumeError(CsectAuxRefOrError.takeError());
+ else {
+ XCOFFCsectAuxRef CsectAuxRef = CsectAuxRefOrError.get();
+ assert(CsectAuxRef.getSymbolType() == XCOFF::XTY_CM);
+ Result = CsectAuxRef.getSectionOrLength();
+ }
+ }
return Result;
}
Expected<SymbolRef::Type>
XCOFFObjectFile::getSymbolType(DataRefImpl Symb) const {
- // TODO: Return the correct symbol type.
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+
+ if (XCOFFSym.isFunction())
+ return SymbolRef::ST_Function;
+
+ if (XCOFF::C_FILE == XCOFFSym.getStorageClass())
+ return SymbolRef::ST_File;
+
+ int16_t SecNum = XCOFFSym.getSectionNumber();
+ if (SecNum <= 0)
+ return SymbolRef::ST_Other;
+
+ Expected<DataRefImpl> SecDRIOrErr =
+ getSectionByNum(XCOFFSym.getSectionNumber());
+
+ if (!SecDRIOrErr)
+ return SecDRIOrErr.takeError();
+
+ DataRefImpl SecDRI = SecDRIOrErr.get();
+
+ Expected<StringRef> SymNameOrError = XCOFFSym.getName();
+ if (SymNameOrError) {
+ // The "TOC" symbol is treated as SymbolRef::ST_Other.
+ if (SymNameOrError.get() == "TOC")
+ return SymbolRef::ST_Other;
+
+ // The symbol for a section name is treated as SymbolRef::ST_Other.
+ StringRef SecName;
+ if (is64Bit())
+ SecName = XCOFFObjectFile::toSection64(SecDRIOrErr.get())->getName();
+ else
+ SecName = XCOFFObjectFile::toSection32(SecDRIOrErr.get())->getName();
+
+ if (SecName == SymNameOrError.get())
+ return SymbolRef::ST_Other;
+ } else
+ return SymNameOrError.takeError();
+
+ if (isSectionData(SecDRI) || isSectionBSS(SecDRI))
+ return SymbolRef::ST_Data;
+
+ if (isDebugSection(SecDRI))
+ return SymbolRef::ST_Debug;
+
return SymbolRef::ST_Other;
}
@@ -285,8 +376,12 @@ XCOFFObjectFile::getSectionContents(DataRefImpl Sec) const {
const uint8_t * ContentStart = base() + OffsetToRaw;
uint64_t SectionSize = getSectionSize(Sec);
- if (checkOffset(Data, reinterpret_cast<uintptr_t>(ContentStart), SectionSize))
- return make_error<BinaryError>();
+ if (Error E = Binary::checkOffset(
+ Data, reinterpret_cast<uintptr_t>(ContentStart), SectionSize))
+ return createError(
+ toString(std::move(E)) + ": section data with offset 0x" +
+ Twine::utohexstr(OffsetToRaw) + " and size 0x" +
+ Twine::utohexstr(SectionSize) + " goes past the end of the file");
return makeArrayRef(ContentStart,SectionSize);
}
@@ -297,6 +392,43 @@ uint64_t XCOFFObjectFile::getSectionAlignment(DataRefImpl Sec) const {
return Result;
}
+Expected<uintptr_t> XCOFFObjectFile::getLoaderSectionAddress() const {
+ uint64_t OffsetToLoaderSection = 0;
+ uint64_t SizeOfLoaderSection = 0;
+
+ if (is64Bit()) {
+ for (const auto &Sec64 : sections64())
+ if (Sec64.getSectionType() == XCOFF::STYP_LOADER) {
+ OffsetToLoaderSection = Sec64.FileOffsetToRawData;
+ SizeOfLoaderSection = Sec64.SectionSize;
+ break;
+ }
+ } else {
+ for (const auto &Sec32 : sections32())
+ if (Sec32.getSectionType() == XCOFF::STYP_LOADER) {
+ OffsetToLoaderSection = Sec32.FileOffsetToRawData;
+ SizeOfLoaderSection = Sec32.SectionSize;
+ break;
+ }
+ }
+
+ // No loader section is not an error.
+ if (!SizeOfLoaderSection)
+ return 0;
+
+ uintptr_t LoderSectionStart =
+ reinterpret_cast<uintptr_t>(base() + OffsetToLoaderSection);
+ if (Error E =
+ Binary::checkOffset(Data, LoderSectionStart, SizeOfLoaderSection))
+ return createError(toString(std::move(E)) +
+ ": loader section with offset 0x" +
+ Twine::utohexstr(OffsetToLoaderSection) +
+ " and size 0x" + Twine::utohexstr(SizeOfLoaderSection) +
+ " goes past the end of the file");
+
+ return LoderSectionStart;
+}
+
bool XCOFFObjectFile::isSectionCompressed(DataRefImpl Sec) const {
return false;
}
@@ -326,61 +458,112 @@ bool XCOFFObjectFile::isSectionVirtual(DataRefImpl Sec) const {
}
relocation_iterator XCOFFObjectFile::section_rel_begin(DataRefImpl Sec) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
- auto RelocationsOrErr = relocations(*SectionEntPtr);
- if (Error E = RelocationsOrErr.takeError())
- return relocation_iterator(RelocationRef());
DataRefImpl Ret;
- Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin());
+ if (is64Bit()) {
+ const XCOFFSectionHeader64 *SectionEntPtr = toSection64(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader64, XCOFFRelocation64>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin());
+ } else {
+ const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader32, XCOFFRelocation32>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().begin());
+ }
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator XCOFFObjectFile::section_rel_end(DataRefImpl Sec) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
- auto RelocationsOrErr = relocations(*SectionEntPtr);
- if (Error E = RelocationsOrErr.takeError())
- return relocation_iterator(RelocationRef());
DataRefImpl Ret;
- Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end());
+ if (is64Bit()) {
+ const XCOFFSectionHeader64 *SectionEntPtr = toSection64(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader64, XCOFFRelocation64>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end());
+ } else {
+ const XCOFFSectionHeader32 *SectionEntPtr = toSection32(Sec);
+ auto RelocationsOrErr =
+ relocations<XCOFFSectionHeader32, XCOFFRelocation32>(*SectionEntPtr);
+ if (Error E = RelocationsOrErr.takeError()) {
+ // TODO: report the error up the stack.
+ consumeError(std::move(E));
+ return relocation_iterator(RelocationRef());
+ }
+ Ret.p = reinterpret_cast<uintptr_t>(&*RelocationsOrErr.get().end());
+ }
return relocation_iterator(RelocationRef(Ret, this));
}
void XCOFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
- Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation32>(Rel.p) + 1);
+ if (is64Bit())
+ Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation64>(Rel.p) + 1);
+ else
+ Rel.p = reinterpret_cast<uintptr_t>(viewAs<XCOFFRelocation32>(Rel.p) + 1);
}
uint64_t XCOFFObjectFile::getRelocationOffset(DataRefImpl Rel) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
- const XCOFFSectionHeader32 *Sec32 = sectionHeaderTable32();
- const uint32_t RelocAddress = Reloc->VirtualAddress;
- const uint16_t NumberOfSections = getNumberOfSections();
- for (uint16_t i = 0; i < NumberOfSections; ++i) {
- // Find which section this relocation is belonging to, and get the
- // relocation offset relative to the start of the section.
- if (Sec32->VirtualAddress <= RelocAddress &&
- RelocAddress < Sec32->VirtualAddress + Sec32->SectionSize) {
- return RelocAddress - Sec32->VirtualAddress;
+ if (is64Bit()) {
+ const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p);
+ const XCOFFSectionHeader64 *Sec64 = sectionHeaderTable64();
+ const uint64_t RelocAddress = Reloc->VirtualAddress;
+ const uint16_t NumberOfSections = getNumberOfSections();
+ for (uint16_t I = 0; I < NumberOfSections; ++I) {
+ // Find which section this relocation belongs to, and get the
+ // relocation offset relative to the start of the section.
+ if (Sec64->VirtualAddress <= RelocAddress &&
+ RelocAddress < Sec64->VirtualAddress + Sec64->SectionSize) {
+ return RelocAddress - Sec64->VirtualAddress;
+ }
+ ++Sec64;
+ }
+ } else {
+ const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
+ const XCOFFSectionHeader32 *Sec32 = sectionHeaderTable32();
+ const uint32_t RelocAddress = Reloc->VirtualAddress;
+ const uint16_t NumberOfSections = getNumberOfSections();
+ for (uint16_t I = 0; I < NumberOfSections; ++I) {
+ // Find which section this relocation belongs to, and get the
+ // relocation offset relative to the start of the section.
+ if (Sec32->VirtualAddress <= RelocAddress &&
+ RelocAddress < Sec32->VirtualAddress + Sec32->SectionSize) {
+ return RelocAddress - Sec32->VirtualAddress;
+ }
+ ++Sec32;
}
- ++Sec32;
}
return InvalidRelocOffset;
}
symbol_iterator XCOFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
- const uint32_t Index = Reloc->SymbolIndex;
-
- if (Index >= getLogicalNumberOfSymbolTableEntries32())
- return symbol_end();
-
+ uint32_t Index;
+ if (is64Bit()) {
+ const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p);
+ Index = Reloc->SymbolIndex;
+
+ if (Index >= getNumberOfSymbolTableEntries64())
+ return symbol_end();
+ } else {
+ const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
+ Index = Reloc->SymbolIndex;
+
+ if (Index >= getLogicalNumberOfSymbolTableEntries32())
+ return symbol_end();
+ }
DataRefImpl SymDRI;
SymDRI.p = getSymbolEntryAddressByIndex(Index);
return symbol_iterator(SymbolRef(SymDRI, this));
@@ -388,22 +571,50 @@ symbol_iterator XCOFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
uint64_t XCOFFObjectFile::getRelocationType(DataRefImpl Rel) const {
if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
+ return viewAs<XCOFFRelocation64>(Rel.p)->Type;
return viewAs<XCOFFRelocation32>(Rel.p)->Type;
}
void XCOFFObjectFile::getRelocationTypeName(
DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
- if (is64Bit())
- report_fatal_error("64-bit support not implemented yet");
- const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
- StringRef Res = XCOFF::getRelocationTypeString(Reloc->Type);
+ StringRef Res;
+ if (is64Bit()) {
+ const XCOFFRelocation64 *Reloc = viewAs<XCOFFRelocation64>(Rel.p);
+ Res = XCOFF::getRelocationTypeString(Reloc->Type);
+ } else {
+ const XCOFFRelocation32 *Reloc = viewAs<XCOFFRelocation32>(Rel.p);
+ Res = XCOFF::getRelocationTypeString(Reloc->Type);
+ }
Result.append(Res.begin(), Res.end());
}
Expected<uint32_t> XCOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const {
- uint32_t Result = 0;
- // TODO: Return correct symbol flags.
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ uint32_t Result = SymbolRef::SF_None;
+
+ if (XCOFFSym.getSectionNumber() == XCOFF::N_ABS)
+ Result |= SymbolRef::SF_Absolute;
+
+ XCOFF::StorageClass SC = XCOFFSym.getStorageClass();
+ if (XCOFF::C_EXT == SC || XCOFF::C_WEAKEXT == SC)
+ Result |= SymbolRef::SF_Global;
+
+ if (XCOFF::C_WEAKEXT == SC)
+ Result |= SymbolRef::SF_Weak;
+
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxEntOrErr =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (CsectAuxEntOrErr) {
+ if (CsectAuxEntOrErr.get().getSymbolType() == XCOFF::XTY_CM)
+ Result |= SymbolRef::SF_Common;
+ } else
+ return CsectAuxEntOrErr.takeError();
+ }
+
+ if (XCOFFSym.getSectionNumber() == XCOFF::N_UNDEF)
+ Result |= SymbolRef::SF_Undefined;
+
return Result;
}
@@ -494,7 +705,9 @@ uint16_t XCOFFObjectFile::getMagic() const {
Expected<DataRefImpl> XCOFFObjectFile::getSectionByNum(int16_t Num) const {
if (Num <= 0 || Num > getNumberOfSections())
- return errorCodeToError(object_error::invalid_section_index);
+ return createStringError(object_error::invalid_section_index,
+ "the section index (" + Twine(Num) +
+ ") is invalid");
DataRefImpl DRI;
DRI.p = getWithOffset(getSectionHeaderTableAddress(),
@@ -602,6 +815,25 @@ uint32_t XCOFFObjectFile::getSymbolIndex(uintptr_t SymbolEntPtr) const {
XCOFF::SymbolTableEntrySize;
}
+uint64_t XCOFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+ uint64_t Result = 0;
+ XCOFFSymbolRef XCOFFSym = toSymbolRef(Symb);
+ if (XCOFFSym.isCsectSymbol()) {
+ Expected<XCOFFCsectAuxRef> CsectAuxRefOrError =
+ XCOFFSym.getXCOFFCsectAuxRef();
+ if (!CsectAuxRefOrError)
+ // TODO: report the error up the stack.
+ consumeError(CsectAuxRefOrError.takeError());
+ else {
+ XCOFFCsectAuxRef CsectAuxRef = CsectAuxRefOrError.get();
+ uint8_t SymType = CsectAuxRef.getSymbolType();
+ if (SymType == XCOFF::XTY_SD || SymType == XCOFF::XTY_CM)
+ Result = CsectAuxRef.getSectionOrLength();
+ }
+ }
+ return Result;
+}
+
uintptr_t XCOFFObjectFile::getSymbolEntryAddressByIndex(uint32_t Index) const {
return getAdvancedSymbolEntryAddress(
reinterpret_cast<uintptr_t>(getPointerToSymbolTable()), Index);
@@ -612,7 +844,9 @@ XCOFFObjectFile::getSymbolNameByIndex(uint32_t Index) const {
const uint32_t NumberOfSymTableEntries = getNumberOfSymbolTableEntries();
if (Index >= NumberOfSymTableEntries)
- return errorCodeToError(object_error::invalid_symbol_index);
+ return createError("symbol index " + Twine(Index) +
+ " exceeds symbol count " +
+ Twine(NumberOfSymTableEntries));
DataRefImpl SymDRI;
SymDRI.p = getSymbolEntryAddressByIndex(Index);
@@ -658,13 +892,16 @@ ArrayRef<XCOFFSectionHeader32> XCOFFObjectFile::sections32() const {
// section header contains the actual count of relocation entries in the s_paddr
// field. STYP_OVRFLO headers contain the section index of their corresponding
// sections as their raw "NumberOfRelocations" field value.
-Expected<uint32_t> XCOFFObjectFile::getLogicalNumberOfRelocationEntries(
- const XCOFFSectionHeader32 &Sec) const {
-
- uint16_t SectionIndex = &Sec - sectionHeaderTable32() + 1;
+template <typename T>
+Expected<uint32_t> XCOFFObjectFile::getNumberOfRelocationEntries(
+ const XCOFFSectionHeader<T> &Sec) const {
+ const T &Section = static_cast<const T &>(Sec);
+ if (is64Bit())
+ return Section.NumberOfRelocations;
- if (Sec.NumberOfRelocations < XCOFF::RelocOverflow)
- return Sec.NumberOfRelocations;
+ uint16_t SectionIndex = &Section - sectionHeaderTable<T>() + 1;
+ if (Section.NumberOfRelocations < XCOFF::RelocOverflow)
+ return Section.NumberOfRelocations;
for (const auto &Sec : sections32()) {
if (Sec.Flags == XCOFF::STYP_OVRFLO &&
Sec.NumberOfRelocations == SectionIndex)
@@ -673,27 +910,31 @@ Expected<uint32_t> XCOFFObjectFile::getLogicalNumberOfRelocationEntries(
return errorCodeToError(object_error::parse_failed);
}
-Expected<ArrayRef<XCOFFRelocation32>>
-XCOFFObjectFile::relocations(const XCOFFSectionHeader32 &Sec) const {
+template <typename Shdr, typename Reloc>
+Expected<ArrayRef<Reloc>> XCOFFObjectFile::relocations(const Shdr &Sec) const {
uintptr_t RelocAddr = getWithOffset(reinterpret_cast<uintptr_t>(FileHeader),
Sec.FileOffsetToRelocationInfo);
- auto NumRelocEntriesOrErr = getLogicalNumberOfRelocationEntries(Sec);
+ auto NumRelocEntriesOrErr = getNumberOfRelocationEntries(Sec);
if (Error E = NumRelocEntriesOrErr.takeError())
return std::move(E);
uint32_t NumRelocEntries = NumRelocEntriesOrErr.get();
-
- static_assert(
- sizeof(XCOFFRelocation32) == XCOFF::RelocationSerializationSize32, "");
+ static_assert((sizeof(Reloc) == XCOFF::RelocationSerializationSize64 ||
+ sizeof(Reloc) == XCOFF::RelocationSerializationSize32),
+ "Relocation structure is incorrect");
auto RelocationOrErr =
- getObject<XCOFFRelocation32>(Data, reinterpret_cast<void *>(RelocAddr),
- NumRelocEntries * sizeof(XCOFFRelocation32));
- if (Error E = RelocationOrErr.takeError())
- return std::move(E);
+ getObject<Reloc>(Data, reinterpret_cast<void *>(RelocAddr),
+ NumRelocEntries * sizeof(Reloc));
+ if (!RelocationOrErr)
+ return createError(
+ toString(RelocationOrErr.takeError()) + ": relocations with offset 0x" +
+ Twine::utohexstr(Sec.FileOffsetToRelocationInfo) + " and size 0x" +
+ Twine::utohexstr(NumRelocEntries * sizeof(Reloc)) +
+ " go past the end of the file");
- const XCOFFRelocation32 *StartReloc = RelocationOrErr.get();
+ const Reloc *StartReloc = RelocationOrErr.get();
- return ArrayRef<XCOFFRelocation32>(StartReloc, StartReloc + NumRelocEntries);
+ return ArrayRef<Reloc>(StartReloc, StartReloc + NumRelocEntries);
}
Expected<XCOFFStringTable>
@@ -716,8 +957,12 @@ XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
auto StringTableOrErr =
getObject<char>(Obj->Data, Obj->base() + Offset, Size);
- if (Error E = StringTableOrErr.takeError())
- return std::move(E);
+ if (!StringTableOrErr)
+ return createError(toString(StringTableOrErr.takeError()) +
+ ": string table with offset 0x" +
+ Twine::utohexstr(Offset) + " and size 0x" +
+ Twine::utohexstr(Size) +
+ " goes past the end of the file");
const char *StringTablePtr = StringTableOrErr.get();
if (StringTablePtr[Size - 1] != '\0')
@@ -726,6 +971,54 @@ XCOFFObjectFile::parseStringTable(const XCOFFObjectFile *Obj, uint64_t Offset) {
return XCOFFStringTable{Size, StringTablePtr};
}
+// This function returns the import file table. Each entry in the import file
+// table consists of: "path_name\0base_name\0archive_member_name\0".
+Expected<StringRef> XCOFFObjectFile::getImportFileTable() const {
+ Expected<uintptr_t> LoaderSectionAddrOrError = getLoaderSectionAddress();
+ if (!LoaderSectionAddrOrError)
+ return LoaderSectionAddrOrError.takeError();
+
+ uintptr_t LoaderSectionAddr = LoaderSectionAddrOrError.get();
+ if (!LoaderSectionAddr)
+ return StringRef();
+
+ uint64_t OffsetToImportFileTable = 0;
+ uint64_t LengthOfImportFileTable = 0;
+ if (is64Bit()) {
+ const LoaderSectionHeader64 *LoaderSec64 =
+ viewAs<LoaderSectionHeader64>(LoaderSectionAddr);
+ OffsetToImportFileTable = LoaderSec64->OffsetToImpid;
+ LengthOfImportFileTable = LoaderSec64->LengthOfImpidStrTbl;
+ } else {
+ const LoaderSectionHeader32 *LoaderSec32 =
+ viewAs<LoaderSectionHeader32>(LoaderSectionAddr);
+ OffsetToImportFileTable = LoaderSec32->OffsetToImpid;
+ LengthOfImportFileTable = LoaderSec32->LengthOfImpidStrTbl;
+ }
+
+ auto ImportTableOrErr = getObject<char>(
+ Data,
+ reinterpret_cast<void *>(LoaderSectionAddr + OffsetToImportFileTable),
+ LengthOfImportFileTable);
+ if (!ImportTableOrErr)
+ return createError(
+ toString(ImportTableOrErr.takeError()) +
+ ": import file table with offset 0x" +
+ Twine::utohexstr(LoaderSectionAddr + OffsetToImportFileTable) +
+ " and size 0x" + Twine::utohexstr(LengthOfImportFileTable) +
+ " goes past the end of the file");
+
+ const char *ImportTablePtr = ImportTableOrErr.get();
+ if (ImportTablePtr[LengthOfImportFileTable - 1] != '\0')
+ return createError(
+ ": import file name table with offset 0x" +
+ Twine::utohexstr(LoaderSectionAddr + OffsetToImportFileTable) +
+ " and size 0x" + Twine::utohexstr(LengthOfImportFileTable) +
+ " must end with a null terminator");
+
+ return StringRef(ImportTablePtr, LengthOfImportFileTable);
+}
+
Expected<std::unique_ptr<XCOFFObjectFile>>
XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
// Can't use std::make_unique because of the private constructor.
@@ -744,17 +1037,30 @@ XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
Obj->FileHeader = FileHeaderOrErr.get();
CurOffset += Obj->getFileHeaderSize();
- // TODO FIXME we don't have support for an optional header yet, so just skip
- // past it.
+
+ if (Obj->getOptionalHeaderSize()) {
+ auto AuxiliaryHeaderOrErr =
+ getObject<void>(Data, Base + CurOffset, Obj->getOptionalHeaderSize());
+ if (Error E = AuxiliaryHeaderOrErr.takeError())
+ return std::move(E);
+ Obj->AuxiliaryHeader = AuxiliaryHeaderOrErr.get();
+ }
+
CurOffset += Obj->getOptionalHeaderSize();
// Parse the section header table if it is present.
if (Obj->getNumberOfSections()) {
- auto SecHeadersOrErr = getObject<void>(Data, Base + CurOffset,
- Obj->getNumberOfSections() *
- Obj->getSectionHeaderSize());
- if (Error E = SecHeadersOrErr.takeError())
- return std::move(E);
+ uint64_t SectionHeadersSize =
+ Obj->getNumberOfSections() * Obj->getSectionHeaderSize();
+ auto SecHeadersOrErr =
+ getObject<void>(Data, Base + CurOffset, SectionHeadersSize);
+ if (!SecHeadersOrErr)
+ return createError(toString(SecHeadersOrErr.takeError()) +
+ ": section headers with offset 0x" +
+ Twine::utohexstr(CurOffset) + " and size 0x" +
+ Twine::utohexstr(SectionHeadersSize) +
+ " go past the end of the file");
+
Obj->SectionHeaderTable = SecHeadersOrErr.get();
}
@@ -773,8 +1079,12 @@ XCOFFObjectFile::create(unsigned Type, MemoryBufferRef MBR) {
NumberOfSymbolTableEntries;
auto SymTableOrErr =
getObject<void *>(Data, Base + CurOffset, SymbolTableSize);
- if (Error E = SymTableOrErr.takeError())
- return std::move(E);
+ if (!SymTableOrErr)
+ return createError(
+ toString(SymTableOrErr.takeError()) + ": symbol table with offset 0x" +
+ Twine::utohexstr(CurOffset) + " and size 0x" +
+ Twine::utohexstr(SymbolTableSize) + " goes past the end of the file");
+
Obj->SymbolTblPtr = SymTableOrErr.get();
CurOffset += SymbolTableSize;
@@ -844,10 +1154,10 @@ Expected<XCOFFCsectAuxRef> XCOFFSymbolRef::getXCOFFCsectAuxRef() const {
if (auto Err = NameOrErr.takeError())
return std::move(Err);
+ uint32_t SymbolIdx = OwningObjectPtr->getSymbolIndex(getEntryAddress());
if (!NumberOfAuxEntries) {
- return createStringError(object_error::parse_failed,
- "csect symbol \"" + *NameOrErr +
- "\" contains no auxiliary entry");
+ return createError("csect symbol \"" + *NameOrErr + "\" with index " +
+ Twine(SymbolIdx) + " contains no auxiliary entry");
}
if (!OwningObjectPtr->is64Bit()) {
@@ -872,9 +1182,9 @@ Expected<XCOFFCsectAuxRef> XCOFFSymbolRef::getXCOFFCsectAuxRef() const {
}
}
- return createStringError(
- object_error::parse_failed,
- "a csect auxiliary entry is not found for symbol \"" + *NameOrErr + "\"");
+ return createError(
+ "a csect auxiliary entry has not been found for symbol \"" + *NameOrErr +
+ "\" with index " + Twine(SymbolIdx));
}
Expected<StringRef> XCOFFSymbolRef::getName() const {
@@ -897,6 +1207,18 @@ Expected<StringRef> XCOFFSymbolRef::getName() const {
template struct XCOFFSectionHeader<XCOFFSectionHeader32>;
template struct XCOFFSectionHeader<XCOFFSectionHeader64>;
+template struct XCOFFRelocation<llvm::support::ubig32_t>;
+template struct XCOFFRelocation<llvm::support::ubig64_t>;
+
+template llvm::Expected<llvm::ArrayRef<llvm::object::XCOFFRelocation64>>
+llvm::object::XCOFFObjectFile::relocations<llvm::object::XCOFFSectionHeader64,
+ llvm::object::XCOFFRelocation64>(
+ llvm::object::XCOFFSectionHeader64 const &) const;
+template llvm::Expected<llvm::ArrayRef<llvm::object::XCOFFRelocation32>>
+llvm::object::XCOFFObjectFile::relocations<llvm::object::XCOFFSectionHeader32,
+ llvm::object::XCOFFRelocation32>(
+ llvm::object::XCOFFSectionHeader32 const &) const;
+
bool doesXCOFFTracebackTableBegin(ArrayRef<uint8_t> Bytes) {
if (Bytes.size() < 4)
return false;
diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp
index 06ce93affd38..5f38ca13cfc2 100644
--- a/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -170,8 +170,8 @@ static bool layoutOptionalHeader(COFFParser &CP) {
unsigned PEHeaderSize = CP.is64Bit() ? sizeof(object::pe32plus_header)
: sizeof(object::pe32_header);
CP.Obj.Header.SizeOfOptionalHeader =
- PEHeaderSize +
- sizeof(object::data_directory) * (COFF::NUM_DATA_DIRECTORIES + 1);
+ PEHeaderSize + sizeof(object::data_directory) *
+ CP.Obj.OptionalHeader->Header.NumberOfRvaAndSize;
return true;
}
@@ -397,7 +397,7 @@ static uint32_t initializeOptionalHeader(COFFParser &CP, uint16_t Magic,
Header->SizeOfStackCommit = CP.Obj.OptionalHeader->Header.SizeOfStackCommit;
Header->SizeOfHeapReserve = CP.Obj.OptionalHeader->Header.SizeOfHeapReserve;
Header->SizeOfHeapCommit = CP.Obj.OptionalHeader->Header.SizeOfHeapCommit;
- Header->NumberOfRvaAndSize = COFF::NUM_DATA_DIRECTORIES + 1;
+ Header->NumberOfRvaAndSize = CP.Obj.OptionalHeader->Header.NumberOfRvaAndSize;
return BaseOfData;
}
@@ -458,18 +458,20 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
PEH.BaseOfData = BaseOfData;
OS.write(reinterpret_cast<char *>(&PEH), sizeof(PEH));
}
- for (const Optional<COFF::DataDirectory> &DD :
- CP.Obj.OptionalHeader->DataDirectories) {
- if (!DD.hasValue()) {
+ for (uint32_t I = 0; I < CP.Obj.OptionalHeader->Header.NumberOfRvaAndSize;
+ ++I) {
+ const Optional<COFF::DataDirectory> *DataDirectories =
+ CP.Obj.OptionalHeader->DataDirectories;
+ uint32_t NumDataDir = sizeof(CP.Obj.OptionalHeader->DataDirectories) /
+ sizeof(Optional<COFF::DataDirectory>);
+ if (I >= NumDataDir || !DataDirectories[I].hasValue()) {
OS << zeros(uint32_t(0));
OS << zeros(uint32_t(0));
} else {
- OS << binary_le(DD->RelativeVirtualAddress);
- OS << binary_le(DD->Size);
+ OS << binary_le(DataDirectories[I]->RelativeVirtualAddress);
+ OS << binary_le(DataDirectories[I]->Size);
}
}
- OS << zeros(uint32_t(0));
- OS << zeros(uint32_t(0));
}
assert(OS.tell() == CP.SectionTableStart);
diff --git a/llvm/lib/ObjectYAML/COFFYAML.cpp b/llvm/lib/ObjectYAML/COFFYAML.cpp
index 96069c0c590f..6e5cdce89060 100644
--- a/llvm/lib/ObjectYAML/COFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/COFFYAML.cpp
@@ -448,25 +448,27 @@ void MappingTraits<COFFYAML::PEHeader>::mapping(IO &IO,
MappingNormalization<NDLLCharacteristics, uint16_t> NDC(
IO, PH.Header.DLLCharacteristics);
- IO.mapRequired("AddressOfEntryPoint", PH.Header.AddressOfEntryPoint);
- IO.mapRequired("ImageBase", PH.Header.ImageBase);
- IO.mapRequired("SectionAlignment", PH.Header.SectionAlignment);
- IO.mapRequired("FileAlignment", PH.Header.FileAlignment);
- IO.mapRequired("MajorOperatingSystemVersion",
+ IO.mapOptional("AddressOfEntryPoint", PH.Header.AddressOfEntryPoint);
+ IO.mapOptional("ImageBase", PH.Header.ImageBase);
+ IO.mapOptional("SectionAlignment", PH.Header.SectionAlignment, 1);
+ IO.mapOptional("FileAlignment", PH.Header.FileAlignment, 1);
+ IO.mapOptional("MajorOperatingSystemVersion",
PH.Header.MajorOperatingSystemVersion);
- IO.mapRequired("MinorOperatingSystemVersion",
+ IO.mapOptional("MinorOperatingSystemVersion",
PH.Header.MinorOperatingSystemVersion);
- IO.mapRequired("MajorImageVersion", PH.Header.MajorImageVersion);
- IO.mapRequired("MinorImageVersion", PH.Header.MinorImageVersion);
- IO.mapRequired("MajorSubsystemVersion", PH.Header.MajorSubsystemVersion);
- IO.mapRequired("MinorSubsystemVersion", PH.Header.MinorSubsystemVersion);
- IO.mapRequired("Subsystem", NWS->Subsystem);
- IO.mapRequired("DLLCharacteristics", NDC->Characteristics);
- IO.mapRequired("SizeOfStackReserve", PH.Header.SizeOfStackReserve);
- IO.mapRequired("SizeOfStackCommit", PH.Header.SizeOfStackCommit);
- IO.mapRequired("SizeOfHeapReserve", PH.Header.SizeOfHeapReserve);
- IO.mapRequired("SizeOfHeapCommit", PH.Header.SizeOfHeapCommit);
-
+ IO.mapOptional("MajorImageVersion", PH.Header.MajorImageVersion);
+ IO.mapOptional("MinorImageVersion", PH.Header.MinorImageVersion);
+ IO.mapOptional("MajorSubsystemVersion", PH.Header.MajorSubsystemVersion);
+ IO.mapOptional("MinorSubsystemVersion", PH.Header.MinorSubsystemVersion);
+ IO.mapOptional("Subsystem", NWS->Subsystem);
+ IO.mapOptional("DLLCharacteristics", NDC->Characteristics);
+ IO.mapOptional("SizeOfStackReserve", PH.Header.SizeOfStackReserve);
+ IO.mapOptional("SizeOfStackCommit", PH.Header.SizeOfStackCommit);
+ IO.mapOptional("SizeOfHeapReserve", PH.Header.SizeOfHeapReserve);
+ IO.mapOptional("SizeOfHeapCommit", PH.Header.SizeOfHeapCommit);
+
+ IO.mapOptional("NumberOfRvaAndSize", PH.Header.NumberOfRvaAndSize,
+ COFF::NUM_DATA_DIRECTORIES + 1);
IO.mapOptional("ExportTable", PH.DataDirectories[COFF::EXPORT_TABLE]);
IO.mapOptional("ImportTable", PH.DataDirectories[COFF::IMPORT_TABLE]);
IO.mapOptional("ResourceTable", PH.DataDirectories[COFF::RESOURCE_TABLE]);
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index f8f2f0c12020..e378be3892fe 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -1380,9 +1380,6 @@ void ELFState<ELFT>::writeSectionContent(
if (!Section.Entries)
return;
- if (!Section.Entries)
- return;
-
for (const ELFYAML::StackSizeEntry &E : *Section.Entries) {
CBA.write<uintX_t>(E.Address, ELFT::TargetEndianness);
SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(E.Size);
@@ -1488,9 +1485,6 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
if (!Section.Bucket)
return;
- if (!Section.Bucket)
- return;
-
CBA.write<uint32_t>(
Section.NBucket.getValueOr(llvm::yaml::Hex64(Section.Bucket->size())),
ELFT::TargetEndianness);
@@ -1663,9 +1657,6 @@ void ELFState<ELFT>::writeSectionContent(Elf_Shdr &SHeader,
if (!Section.Symbols)
return;
- if (!Section.Symbols)
- return;
-
for (StringRef Sym : *Section.Symbols)
SHeader.sh_size +=
CBA.writeULEB128(toSymbolIndex(Sym, Section.Name, /*IsDynamic=*/false));
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 50821544a687..fdf9aeae1622 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -155,6 +155,13 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_FREEBSD_PROCSTAT_OSREL);
ECase(NT_FREEBSD_PROCSTAT_PSSTRINGS);
ECase(NT_FREEBSD_PROCSTAT_AUXV);
+ // OpenBSD core note types.
+ ECase(NT_OPENBSD_PROCINFO);
+ ECase(NT_OPENBSD_AUXV);
+ ECase(NT_OPENBSD_REGS);
+ ECase(NT_OPENBSD_FPREGS);
+ ECase(NT_OPENBSD_XFPREGS);
+ ECase(NT_OPENBSD_WCOOKIE);
// AMD specific notes. (Code Object V2)
ECase(NT_AMD_HSA_CODE_OBJECT_VERSION);
ECase(NT_AMD_HSA_HSAIL);
@@ -655,6 +662,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
case ELF::EM_RISCV:
ECase(SHT_RISCV_ATTRIBUTES);
break;
+ case ELF::EM_MSP430:
+ ECase(SHT_MSP430_ATTRIBUTES);
+ break;
default:
// Nothing to do.
break;
@@ -887,6 +897,13 @@ void ScalarEnumerationTraits<ELFYAML::ELF_DYNTAG>::enumeration(
#undef PPC64_DYNAMIC_TAG
#define PPC64_DYNAMIC_TAG(name, value)
break;
+ case ELF::EM_RISCV:
+#undef RISCV_DYNAMIC_TAG
+#define RISCV_DYNAMIC_TAG(name, value) DYNAMIC_TAG(name, value)
+#include "llvm/BinaryFormat/DynamicTags.def"
+#undef RISCV_DYNAMIC_TAG
+#define RISCV_DYNAMIC_TAG(name, value)
+ break;
default:
#include "llvm/BinaryFormat/DynamicTags.def"
break;
@@ -1165,6 +1182,8 @@ struct NormalizedOther {
if (EMachine == ELF::EM_AARCH64)
Map["STO_AARCH64_VARIANT_PCS"] = ELF::STO_AARCH64_VARIANT_PCS;
+ if (EMachine == ELF::EM_RISCV)
+ Map["STO_RISCV_VARIANT_CC"] = ELF::STO_RISCV_VARIANT_CC;
return Map;
}
diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp
index 46e4dd05a737..c653c29ec9a7 100644
--- a/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -184,6 +184,30 @@ size_t writeLoadCommandData<MachO::rpath_command>(MachOYAML::LoadCommand &LC,
}
template <>
+size_t writeLoadCommandData<MachO::sub_framework_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::sub_umbrella_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::sub_client_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
+size_t writeLoadCommandData<MachO::sub_library_command>(
+ MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
+ return writePayloadString(LC, OS);
+}
+
+template <>
size_t writeLoadCommandData<MachO::build_version_command>(
MachOYAML::LoadCommand &LC, raw_ostream &OS, bool IsLittleEndian) {
size_t BytesWritten = 0;
@@ -264,6 +288,7 @@ void MachOWriter::writeLoadCommands(raw_ostream &OS) {
}
Error MachOWriter::writeSectionData(raw_ostream &OS) {
+ uint64_t LinkEditOff = 0;
for (auto &LC : Obj.LoadCommands) {
switch (LC.Data.load_command_data.cmd) {
case MachO::LC_SEGMENT:
@@ -273,6 +298,9 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
if (0 ==
strncmp(&LC.Data.segment_command_data.segname[0], "__LINKEDIT", 16)) {
FoundLinkEditSeg = true;
+ LinkEditOff = segOff;
+ if (Obj.RawLinkEditSegment)
+ continue;
writeLinkEditData(OS);
}
for (auto &Sec : LC.Sections) {
@@ -320,6 +348,13 @@ Error MachOWriter::writeSectionData(raw_ostream &OS) {
}
}
+ if (Obj.RawLinkEditSegment) {
+ ZeroToOffset(OS, LinkEditOff);
+ if (OS.tell() - fileStart > LinkEditOff || !LinkEditOff)
+ return createStringError(errc::invalid_argument,
+ "section offsets don't line up");
+ Obj.RawLinkEditSegment->writeAsBinary(OS);
+ }
return Error::success();
}
diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index dce82ab1cada..c9562bd72258 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -110,6 +110,9 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 ||
Object.Header.magic == MachO::MH_CIGAM_64;
IO.mapOptional("LoadCommands", Object.LoadCommands);
+
+ if (Object.RawLinkEditSegment || !IO.outputting())
+ IO.mapOptional("__LINKEDIT", Object.RawLinkEditSegment);
if(!Object.LinkEdit.isEmpty() || !IO.outputting())
IO.mapOptional("LinkEditData", Object.LinkEdit);
@@ -234,6 +237,30 @@ void mapLoadCommandData<MachO::dylinker_command>(
}
template <>
+void mapLoadCommandData<MachO::sub_framework_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
+void mapLoadCommandData<MachO::sub_umbrella_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
+void mapLoadCommandData<MachO::sub_client_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
+void mapLoadCommandData<MachO::sub_library_command>(
+ IO &IO, MachOYAML::LoadCommand &LoadCommand) {
+ IO.mapOptional("Content", LoadCommand.Content);
+}
+
+template <>
void mapLoadCommandData<MachO::build_version_command>(
IO &IO, MachOYAML::LoadCommand &LoadCommand) {
IO.mapOptional("Tools", LoadCommand.Tools);
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 888ba115e2d9..80a8c56f6912 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -157,13 +157,24 @@ void WasmWriter::writeInitExpr(raw_ostream &OS,
void WasmWriter::writeSectionContent(raw_ostream &OS,
WasmYAML::DylinkSection &Section) {
writeStringRef(Section.Name, OS);
- encodeULEB128(Section.MemorySize, OS);
- encodeULEB128(Section.MemoryAlignment, OS);
- encodeULEB128(Section.TableSize, OS);
- encodeULEB128(Section.TableAlignment, OS);
- encodeULEB128(Section.Needed.size(), OS);
- for (StringRef Needed : Section.Needed)
- writeStringRef(Needed, OS);
+
+ writeUint8(OS, wasm::WASM_DYLINK_MEM_INFO);
+ SubSectionWriter SubSection(OS);
+ raw_ostream &SubOS = SubSection.getStream();
+ encodeULEB128(Section.MemorySize, SubOS);
+ encodeULEB128(Section.MemoryAlignment, SubOS);
+ encodeULEB128(Section.TableSize, SubOS);
+ encodeULEB128(Section.TableAlignment, SubOS);
+ SubSection.done();
+
+ if (Section.Needed.size()) {
+ writeUint8(OS, wasm::WASM_DYLINK_NEEDED);
+ raw_ostream &SubOS = SubSection.getStream();
+ encodeULEB128(Section.Needed.size(), SubOS);
+ for (StringRef Needed : Section.Needed)
+ writeStringRef(Needed, SubOS);
+ SubSection.done();
+ }
}
void WasmWriter::writeSectionContent(raw_ostream &OS,
@@ -386,8 +397,8 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
NumImportedGlobals++;
break;
case wasm::WASM_EXTERNAL_TAG:
- writeUint32(OS, Import.TagImport.Attribute);
- writeUint32(OS, Import.TagImport.SigIndex);
+ writeUint8(OS, 0); // Reserved 'attribute' field
+ encodeULEB128(Import.SigIndex, OS);
NumImportedTags++;
break;
case wasm::WASM_EXTERNAL_MEMORY:
@@ -451,16 +462,10 @@ void WasmWriter::writeSectionContent(raw_ostream &OS,
void WasmWriter::writeSectionContent(raw_ostream &OS,
WasmYAML::TagSection &Section) {
- encodeULEB128(Section.Tags.size(), OS);
- uint32_t ExpectedIndex = NumImportedTags;
- for (auto &Tag : Section.Tags) {
- if (Tag.Index != ExpectedIndex) {
- reportError("unexpected tag index: " + Twine(Tag.Index));
- return;
- }
- ++ExpectedIndex;
- encodeULEB128(Tag.Attribute, OS);
- encodeULEB128(Tag.SigIndex, OS);
+ encodeULEB128(Section.TagTypes.size(), OS);
+ for (uint32_t TagType : Section.TagTypes) {
+ writeUint8(OS, 0); // Reserved 'attribute' field
+ encodeULEB128(TagType, OS);
}
}
diff --git a/llvm/lib/ObjectYAML/WasmYAML.cpp b/llvm/lib/ObjectYAML/WasmYAML.cpp
index 752654ddbbaf..3f0172ebf361 100644
--- a/llvm/lib/ObjectYAML/WasmYAML.cpp
+++ b/llvm/lib/ObjectYAML/WasmYAML.cpp
@@ -55,6 +55,8 @@ static void sectionMapping(IO &IO, WasmYAML::DylinkSection &Section) {
IO.mapRequired("TableSize", Section.TableSize);
IO.mapRequired("TableAlignment", Section.TableAlignment);
IO.mapRequired("Needed", Section.Needed);
+ IO.mapOptional("ImportInfo", Section.ImportInfo);
+ IO.mapOptional("ExportInfo", Section.ExportInfo);
}
static void sectionMapping(IO &IO, WasmYAML::NameSection &Section) {
@@ -122,7 +124,7 @@ static void sectionMapping(IO &IO, WasmYAML::MemorySection &Section) {
static void sectionMapping(IO &IO, WasmYAML::TagSection &Section) {
commonSectionMapping(IO, Section);
- IO.mapOptional("Tags", Section.Tags);
+ IO.mapOptional("TagTypes", Section.TagTypes);
}
static void sectionMapping(IO &IO, WasmYAML::GlobalSection &Section) {
@@ -177,7 +179,7 @@ void MappingTraits<std::unique_ptr<WasmYAML::Section>>::mapping(
} else {
IO.mapRequired("Name", SectionName);
}
- if (SectionName == "dylink") {
+ if (SectionName == "dylink" || SectionName == "dylink.0") {
if (!IO.outputting())
Section.reset(new WasmYAML::DylinkSection());
sectionMapping(IO, *cast<WasmYAML::DylinkSection>(Section.get()));
@@ -391,14 +393,12 @@ void MappingTraits<WasmYAML::Import>::mapping(IO &IO,
IO.mapRequired("Module", Import.Module);
IO.mapRequired("Field", Import.Field);
IO.mapRequired("Kind", Import.Kind);
- if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
+ if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION ||
+ Import.Kind == wasm::WASM_EXTERNAL_TAG) {
IO.mapRequired("SigIndex", Import.SigIndex);
} else if (Import.Kind == wasm::WASM_EXTERNAL_GLOBAL) {
IO.mapRequired("GlobalType", Import.GlobalImport.Type);
IO.mapRequired("GlobalMutable", Import.GlobalImport.Mutable);
- } else if (Import.Kind == wasm::WASM_EXTERNAL_TAG) {
- IO.mapRequired("TagAttribute", Import.TagImport.Attribute);
- IO.mapRequired("TagSigIndex", Import.TagImport.SigIndex);
} else if (Import.Kind == wasm::WASM_EXTERNAL_TABLE) {
IO.mapRequired("Table", Import.TableImport);
} else if (Import.Kind == wasm::WASM_EXTERNAL_MEMORY) {
@@ -525,10 +525,17 @@ void MappingTraits<WasmYAML::SymbolInfo>::mapping(IO &IO,
}
}
-void MappingTraits<WasmYAML::Tag>::mapping(IO &IO, WasmYAML::Tag &Tag) {
- IO.mapRequired("Index", Tag.Index);
- IO.mapRequired("Attribute", Tag.Attribute);
- IO.mapRequired("SigIndex", Tag.SigIndex);
+void MappingTraits<WasmYAML::DylinkImportInfo>::mapping(
+ IO &IO, WasmYAML::DylinkImportInfo &Info) {
+ IO.mapRequired("Module", Info.Module);
+ IO.mapRequired("Field", Info.Field);
+ IO.mapRequired("Flags", Info.Flags);
+}
+
+void MappingTraits<WasmYAML::DylinkExportInfo>::mapping(
+ IO &IO, WasmYAML::DylinkExportInfo &Info) {
+ IO.mapRequired("Name", Info.Name);
+ IO.mapRequired("Flags", Info.Flags);
}
void ScalarBitSetTraits<WasmYAML::LimitFlags>::bitset(
@@ -561,6 +568,7 @@ void ScalarBitSetTraits<WasmYAML::SymbolFlags>::bitset(
BCaseMask(EXPORTED, EXPORTED);
BCaseMask(EXPLICIT_NAME, EXPLICIT_NAME);
BCaseMask(NO_STRIP, NO_STRIP);
+ BCaseMask(TLS, TLS);
#undef BCaseMask
}
diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
index 14fea5437a32..85d1f82bfafc 100644
--- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
@@ -18,8 +18,9 @@
#include "llvm/ObjectYAML/ObjectYAML.h"
#include "llvm/ObjectYAML/yaml2obj.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -33,7 +34,7 @@ class XCOFFWriter {
public:
XCOFFWriter(XCOFFYAML::Object &Obj, raw_ostream &OS, yaml::ErrorHandler EH)
: Obj(Obj), W(OS, support::big), ErrHandler(EH),
- Strings(StringTableBuilder::XCOFF) {
+ StrTblBuilder(StringTableBuilder::XCOFF) {
Is64Bit = Obj.Header.Magic == (llvm::yaml::Hex16)XCOFF::XCOFF64;
}
bool writeXCOFF();
@@ -41,20 +42,24 @@ public:
private:
bool nameShouldBeInStringTable(StringRef SymbolName);
bool initFileHeader(uint64_t CurrentOffset);
+ void initAuxFileHeader();
bool initSectionHeader(uint64_t &CurrentOffset);
bool initRelocations(uint64_t &CurrentOffset);
+ bool initStringTable();
bool assignAddressesAndIndices();
void writeFileHeader();
+ void writeAuxFileHeader();
void writeSectionHeader();
bool writeSectionData();
bool writeRelocations();
bool writeSymbols();
+ void writeStringTable();
XCOFFYAML::Object &Obj;
bool Is64Bit = false;
support::endian::Writer W;
yaml::ErrorHandler ErrHandler;
- StringTableBuilder Strings;
+ StringTableBuilder StrTblBuilder;
uint64_t StartOffset;
// Map the section name to its corrresponding section index.
DenseMap<StringRef, int16_t> SectionIndexMap = {
@@ -62,6 +67,7 @@ private:
{StringRef("N_ABS"), XCOFF::N_ABS},
{StringRef("N_UNDEF"), XCOFF::N_UNDEF}};
XCOFFYAML::FileHeader InitFileHdr = Obj.Header;
+ XCOFFYAML::AuxiliaryHeader InitAuxFileHdr;
std::vector<XCOFFYAML::Section> InitSections = Obj.Sections;
};
@@ -75,7 +81,8 @@ static void writeName(StringRef StrName, support::endian::Writer W) {
}
bool XCOFFWriter::nameShouldBeInStringTable(StringRef SymbolName) {
- return SymbolName.size() > XCOFF::NameSize;
+ // For XCOFF64: The symbol name is always in the string table.
+ return (SymbolName.size() > XCOFF::NameSize) || Is64Bit;
}
bool XCOFFWriter::initRelocations(uint64_t &CurrentOffset) {
@@ -83,8 +90,9 @@ bool XCOFFWriter::initRelocations(uint64_t &CurrentOffset) {
if (!InitSections[I].Relocations.empty()) {
InitSections[I].NumberOfRelocations = InitSections[I].Relocations.size();
InitSections[I].FileOffsetToRelocations = CurrentOffset;
- CurrentOffset += InitSections[I].NumberOfRelocations *
- XCOFF::RelocationSerializationSize32;
+ uint64_t RelSize = Is64Bit ? XCOFF::RelocationSerializationSize64
+ : XCOFF::RelocationSerializationSize32;
+ CurrentOffset += InitSections[I].NumberOfRelocations * RelSize;
if (CurrentOffset > MaxRawDataSize) {
ErrHandler("maximum object size of" + Twine(MaxRawDataSize) +
"exceeded when writing relocation data");
@@ -138,20 +146,79 @@ bool XCOFFWriter::initSectionHeader(uint64_t &CurrentOffset) {
return initRelocations(CurrentOffset);
}
+bool XCOFFWriter::initStringTable() {
+ if (Obj.StrTbl.RawContent) {
+ size_t RawSize = Obj.StrTbl.RawContent->binary_size();
+ if (Obj.StrTbl.Strings || Obj.StrTbl.Length) {
+ ErrHandler(
+ "can't specify Strings or Length when RawContent is specified");
+ return false;
+ }
+ if (Obj.StrTbl.ContentSize && *Obj.StrTbl.ContentSize < RawSize) {
+ ErrHandler("specified ContentSize (" + Twine(*Obj.StrTbl.ContentSize) +
+ ") is less than the RawContent data size (" + Twine(RawSize) +
+ ")");
+ return false;
+ }
+ return true;
+ }
+ if (Obj.StrTbl.ContentSize && *Obj.StrTbl.ContentSize <= 3) {
+ ErrHandler("ContentSize shouldn't be less than 4 without RawContent");
+ return false;
+ }
+
+ // Build the string table.
+ StrTblBuilder.clear();
+
+ if (Obj.StrTbl.Strings) {
+ // All specified strings should be added to the string table.
+ for (StringRef StringEnt : *Obj.StrTbl.Strings)
+ StrTblBuilder.add(StringEnt);
+
+ size_t StrTblIdx = 0;
+ size_t NumOfStrings = Obj.StrTbl.Strings->size();
+ for (XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
+ if (nameShouldBeInStringTable(YamlSym.SymbolName)) {
+ if (StrTblIdx < NumOfStrings) {
+ // Overwrite the symbol name with the specified string.
+ YamlSym.SymbolName = (*Obj.StrTbl.Strings)[StrTblIdx];
+ ++StrTblIdx;
+ } else
+ // Names that are not overwritten are still stored in the string
+ // table.
+ StrTblBuilder.add(YamlSym.SymbolName);
+ }
+ }
+ } else {
+ for (XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
+ if (nameShouldBeInStringTable(YamlSym.SymbolName))
+ StrTblBuilder.add(YamlSym.SymbolName);
+ }
+ }
+
+ StrTblBuilder.finalize();
+
+ size_t StrTblSize = StrTblBuilder.getSize();
+ if (Obj.StrTbl.ContentSize && *Obj.StrTbl.ContentSize < StrTblSize) {
+ ErrHandler("specified ContentSize (" + Twine(*Obj.StrTbl.ContentSize) +
+ ") is less than the size of the data that would otherwise be "
+ "written (" +
+ Twine(StrTblSize) + ")");
+ return false;
+ }
+
+ return true;
+}
+
bool XCOFFWriter::initFileHeader(uint64_t CurrentOffset) {
// The default format of the object file is XCOFF32.
InitFileHdr.Magic = XCOFF::XCOFF32;
InitFileHdr.NumberOfSections = Obj.Sections.size();
InitFileHdr.NumberOfSymTableEntries = Obj.Symbols.size();
- for (const XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
+ for (const XCOFFYAML::Symbol &YamlSym : Obj.Symbols)
// Add the number of auxiliary symbols to the total number.
InitFileHdr.NumberOfSymTableEntries += YamlSym.NumberOfAuxEntries;
- if (nameShouldBeInStringTable(YamlSym.SymbolName))
- Strings.add(YamlSym.SymbolName);
- }
- // Finalize the string table.
- Strings.finalize();
// Calculate SymbolTableOffset for the file header.
if (InitFileHdr.NumberOfSymTableEntries) {
@@ -168,17 +235,87 @@ bool XCOFFWriter::initFileHeader(uint64_t CurrentOffset) {
return true;
}
+void XCOFFWriter::initAuxFileHeader() {
+ InitAuxFileHdr = *Obj.AuxHeader;
+ // In general, an object file might contain multiple sections of a given type,
+ // but in a loadable module, there must be exactly one .text, .data, .bss, and
+ // .loader section. A loadable object might also have one .tdata section and
+ // one .tbss section.
+ // Set these section-related values if not set explicitly. We assume that the
+ // input YAML matches the format of the loadable object, but if multiple input
+ // sections still have the same type, the first section with that type
+ // prevails.
+ for (uint16_t I = 0, E = InitSections.size(); I < E; ++I) {
+ switch (InitSections[I].Flags) {
+ case XCOFF::STYP_TEXT:
+ if (!InitAuxFileHdr.TextSize)
+ InitAuxFileHdr.TextSize = InitSections[I].Size;
+ if (!InitAuxFileHdr.TextStartAddr)
+ InitAuxFileHdr.TextStartAddr = InitSections[I].Address;
+ if (!InitAuxFileHdr.SecNumOfText)
+ InitAuxFileHdr.SecNumOfText = I + 1;
+ break;
+ case XCOFF::STYP_DATA:
+ if (!InitAuxFileHdr.InitDataSize)
+ InitAuxFileHdr.InitDataSize = InitSections[I].Size;
+ if (!InitAuxFileHdr.DataStartAddr)
+ InitAuxFileHdr.DataStartAddr = InitSections[I].Address;
+ if (!InitAuxFileHdr.SecNumOfData)
+ InitAuxFileHdr.SecNumOfData = I + 1;
+ break;
+ case XCOFF::STYP_BSS:
+ if (!InitAuxFileHdr.BssDataSize)
+ InitAuxFileHdr.BssDataSize = InitSections[I].Size;
+ if (!InitAuxFileHdr.SecNumOfBSS)
+ InitAuxFileHdr.SecNumOfBSS = I + 1;
+ break;
+ case XCOFF::STYP_TDATA:
+ if (!InitAuxFileHdr.SecNumOfTData)
+ InitAuxFileHdr.SecNumOfTData = I + 1;
+ break;
+ case XCOFF::STYP_TBSS:
+ if (!InitAuxFileHdr.SecNumOfTBSS)
+ InitAuxFileHdr.SecNumOfTBSS = I + 1;
+ break;
+ case XCOFF::STYP_LOADER:
+ if (!InitAuxFileHdr.SecNumOfLoader)
+ InitAuxFileHdr.SecNumOfLoader = I + 1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
bool XCOFFWriter::assignAddressesAndIndices() {
- Strings.clear();
+ uint64_t FileHdrSize =
+ Is64Bit ? XCOFF::FileHeaderSize64 : XCOFF::FileHeaderSize32;
+ uint64_t AuxFileHdrSize = 0;
+ if (Obj.AuxHeader)
+ AuxFileHdrSize = Obj.Header.AuxHeaderSize
+ ? Obj.Header.AuxHeaderSize
+ : (Is64Bit ? XCOFF::AuxFileHeaderSize64
+ : XCOFF::AuxFileHeaderSize32);
+ uint64_t SecHdrSize =
+ Is64Bit ? XCOFF::SectionHeaderSize64 : XCOFF::SectionHeaderSize32;
uint64_t CurrentOffset =
- XCOFF::FileHeaderSize32 /* TODO: + auxiliaryHeaderSize() */ +
- InitSections.size() * XCOFF::SectionHeaderSize32;
+ FileHdrSize + AuxFileHdrSize + InitSections.size() * SecHdrSize;
// Calculate section header info.
if (!initSectionHeader(CurrentOffset))
return false;
+ InitFileHdr.AuxHeaderSize = AuxFileHdrSize;
+
// Calculate file header info.
- return initFileHeader(CurrentOffset);
+ if (!initFileHeader(CurrentOffset))
+ return false;
+
+ // Initialize the auxiliary file header.
+ if (Obj.AuxHeader)
+ initAuxFileHeader();
+
+ // Initialize the string table.
+ return initStringTable();
}
void XCOFFWriter::writeFileHeader() {
@@ -186,14 +323,86 @@ void XCOFFWriter::writeFileHeader() {
W.write<uint16_t>(Obj.Header.NumberOfSections ? Obj.Header.NumberOfSections
: InitFileHdr.NumberOfSections);
W.write<int32_t>(Obj.Header.TimeStamp);
- W.write<uint32_t>(Obj.Header.SymbolTableOffset
- ? Obj.Header.SymbolTableOffset
- : InitFileHdr.SymbolTableOffset);
- W.write<int32_t>(Obj.Header.NumberOfSymTableEntries
- ? Obj.Header.NumberOfSymTableEntries
- : InitFileHdr.NumberOfSymTableEntries);
- W.write<uint16_t>(Obj.Header.AuxHeaderSize);
- W.write<uint16_t>(Obj.Header.Flags);
+ if (Is64Bit) {
+ W.write<uint64_t>(Obj.Header.SymbolTableOffset
+ ? Obj.Header.SymbolTableOffset
+ : InitFileHdr.SymbolTableOffset);
+ W.write<uint16_t>(InitFileHdr.AuxHeaderSize);
+ W.write<uint16_t>(Obj.Header.Flags);
+ W.write<int32_t>(Obj.Header.NumberOfSymTableEntries
+ ? Obj.Header.NumberOfSymTableEntries
+ : InitFileHdr.NumberOfSymTableEntries);
+ } else {
+ W.write<uint32_t>(Obj.Header.SymbolTableOffset
+ ? Obj.Header.SymbolTableOffset
+ : InitFileHdr.SymbolTableOffset);
+ W.write<int32_t>(Obj.Header.NumberOfSymTableEntries
+ ? Obj.Header.NumberOfSymTableEntries
+ : InitFileHdr.NumberOfSymTableEntries);
+ W.write<uint16_t>(InitFileHdr.AuxHeaderSize);
+ W.write<uint16_t>(Obj.Header.Flags);
+ }
+}
+
+void XCOFFWriter::writeAuxFileHeader() {
+ W.write<uint16_t>(InitAuxFileHdr.Magic.getValueOr(yaml::Hex16(1)));
+ W.write<uint16_t>(InitAuxFileHdr.Version.getValueOr(yaml::Hex16(1)));
+ if (Is64Bit) {
+ W.OS.write_zeros(4); // Reserved for debugger.
+ W.write<uint64_t>(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0)));
+ } else {
+ W.write<uint32_t>(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.TextStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.DataStartAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.TOCAnchorAddr.getValueOr(yaml::Hex64(0)));
+ }
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfEntryPoint.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfText.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfData.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTOC.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfLoader.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfBSS.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfText.getValueOr(yaml::Hex16(0)));
+ W.write<uint16_t>(InitAuxFileHdr.MaxAlignOfData.getValueOr(yaml::Hex16(0)));
+ W.write<uint16_t>(InitAuxFileHdr.ModuleType.getValueOr(yaml::Hex16(0)));
+ W.write<uint8_t>(InitAuxFileHdr.CpuFlag.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(0); // Reserved for CPU type.
+ if (Is64Bit) {
+ W.write<uint8_t>(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(
+ InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0x80)));
+ W.write<uint64_t>(InitAuxFileHdr.TextSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.InitDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.BssDataSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.EntryPointAddr.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint64_t>(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0)));
+ } else {
+ W.write<uint32_t>(InitAuxFileHdr.MaxStackSize.getValueOr(yaml::Hex64(0)));
+ W.write<uint32_t>(InitAuxFileHdr.MaxDataSize.getValueOr(yaml::Hex64(0)));
+ W.OS.write_zeros(4); // Reserved for debugger.
+ W.write<uint8_t>(InitAuxFileHdr.TextPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.DataPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(InitAuxFileHdr.StackPageSize.getValueOr(yaml::Hex8(0)));
+ W.write<uint8_t>(
+ InitAuxFileHdr.FlagAndTDataAlignment.getValueOr(yaml::Hex8(0)));
+ }
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTData.getValueOr(0));
+ W.write<uint16_t>(InitAuxFileHdr.SecNumOfTBSS.getValueOr(0));
+ if (Is64Bit) {
+ W.write<uint16_t>(InitAuxFileHdr.Flag.getValueOr(yaml::Hex16(XCOFF::SHR_SYMTAB)));
+ if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize64)
+ W.OS.write_zeros(InitFileHdr.AuxHeaderSize - XCOFF::AuxFileHeaderSize64);
+ } else if (InitFileHdr.AuxHeaderSize > XCOFF::AuxFileHeaderSize32) {
+ W.OS.write_zeros(InitFileHdr.AuxHeaderSize - XCOFF::AuxFileHeaderSize32);
+ }
}
void XCOFFWriter::writeSectionHeader() {
@@ -202,22 +411,40 @@ void XCOFFWriter::writeSectionHeader() {
XCOFFYAML::Section DerivedSec = InitSections[I];
writeName(YamlSec.SectionName, W);
// Virtual address is the same as physical address.
- uint32_t SectionAddress =
+ uint64_t SectionAddress =
YamlSec.Address ? YamlSec.Address : DerivedSec.Address;
- W.write<uint32_t>(SectionAddress); // Physical address
- W.write<uint32_t>(SectionAddress); // Virtual address
- W.write<uint32_t>(YamlSec.Size ? YamlSec.Size : DerivedSec.Size);
- W.write<uint32_t>(YamlSec.FileOffsetToData ? YamlSec.FileOffsetToData
- : DerivedSec.FileOffsetToData);
- W.write<uint32_t>(YamlSec.FileOffsetToRelocations
- ? YamlSec.FileOffsetToRelocations
- : DerivedSec.FileOffsetToRelocations);
- W.write<uint32_t>(YamlSec.FileOffsetToLineNumbers);
- W.write<uint16_t>(YamlSec.NumberOfRelocations
- ? YamlSec.NumberOfRelocations
- : DerivedSec.NumberOfRelocations);
- W.write<uint16_t>(YamlSec.NumberOfLineNumbers);
- W.write<int32_t>(YamlSec.Flags);
+ if (Is64Bit) {
+ W.write<uint64_t>(SectionAddress); // Physical address
+ W.write<uint64_t>(SectionAddress); // Virtual address
+ W.write<uint64_t>(YamlSec.Size ? YamlSec.Size : DerivedSec.Size);
+ W.write<uint64_t>(YamlSec.FileOffsetToData ? YamlSec.FileOffsetToData
+ : DerivedSec.FileOffsetToData);
+ W.write<uint64_t>(YamlSec.FileOffsetToRelocations
+ ? YamlSec.FileOffsetToRelocations
+ : DerivedSec.FileOffsetToRelocations);
+ W.write<uint64_t>(YamlSec.FileOffsetToLineNumbers);
+ W.write<uint32_t>(YamlSec.NumberOfRelocations
+ ? YamlSec.NumberOfRelocations
+ : DerivedSec.NumberOfRelocations);
+ W.write<uint32_t>(YamlSec.NumberOfLineNumbers);
+ W.write<int32_t>(YamlSec.Flags);
+ W.OS.write_zeros(4);
+ } else {
+ W.write<uint32_t>(SectionAddress); // Physical address
+ W.write<uint32_t>(SectionAddress); // Virtual address
+ W.write<uint32_t>(YamlSec.Size ? YamlSec.Size : DerivedSec.Size);
+ W.write<uint32_t>(YamlSec.FileOffsetToData ? YamlSec.FileOffsetToData
+ : DerivedSec.FileOffsetToData);
+ W.write<uint32_t>(YamlSec.FileOffsetToRelocations
+ ? YamlSec.FileOffsetToRelocations
+ : DerivedSec.FileOffsetToRelocations);
+ W.write<uint32_t>(YamlSec.FileOffsetToLineNumbers);
+ W.write<uint16_t>(YamlSec.NumberOfRelocations
+ ? YamlSec.NumberOfRelocations
+ : DerivedSec.NumberOfRelocations);
+ W.write<uint16_t>(YamlSec.NumberOfLineNumbers);
+ W.write<int32_t>(YamlSec.Flags);
+ }
}
}
@@ -232,8 +459,7 @@ bool XCOFFWriter::writeSectionData() {
ErrHandler("redundant data was written before section data");
return false;
}
- if (PaddingSize > 0)
- W.OS.write_zeros(PaddingSize);
+ W.OS.write_zeros(PaddingSize);
YamlSec.SectionData.writeAsBinary(W.OS);
}
}
@@ -250,10 +476,12 @@ bool XCOFFWriter::writeRelocations() {
ErrHandler("redundant data was written before relocations");
return false;
}
- if (PaddingSize > 0)
- W.OS.write_zeros(PaddingSize);
+ W.OS.write_zeros(PaddingSize);
for (const XCOFFYAML::Relocation &YamlRel : YamlSec.Relocations) {
- W.write<uint32_t>(YamlRel.VirtualAddress);
+ if (Is64Bit)
+ W.write<uint64_t>(YamlRel.VirtualAddress);
+ else
+ W.write<uint32_t>(YamlRel.VirtualAddress);
W.write<uint32_t>(YamlRel.SymbolIndex);
W.write<uint8_t>(YamlRel.Info);
W.write<uint8_t>(YamlRel.Type);
@@ -270,20 +498,39 @@ bool XCOFFWriter::writeSymbols() {
ErrHandler("redundant data was written before symbols");
return false;
}
- if (PaddingSize > 0)
- W.OS.write_zeros(PaddingSize);
+ W.OS.write_zeros(PaddingSize);
for (const XCOFFYAML::Symbol &YamlSym : Obj.Symbols) {
- if (nameShouldBeInStringTable(YamlSym.SymbolName)) {
- // For XCOFF32: A value of 0 indicates that the symbol name is in the
- // string table.
- W.write<int32_t>(0);
- W.write<uint32_t>(Strings.getOffset(YamlSym.SymbolName));
+ if (Is64Bit) {
+ W.write<uint64_t>(YamlSym.Value);
+ W.write<uint32_t>(StrTblBuilder.getOffset(YamlSym.SymbolName));
+ } else {
+ if (nameShouldBeInStringTable(YamlSym.SymbolName)) {
+ // For XCOFF32: A value of 0 indicates that the symbol name is in the
+ // string table.
+ W.write<int32_t>(0);
+ W.write<uint32_t>(StrTblBuilder.getOffset(YamlSym.SymbolName));
+ } else {
+ writeName(YamlSym.SymbolName, W);
+ }
+ W.write<uint32_t>(YamlSym.Value);
+ }
+ if (YamlSym.SectionName) {
+ if (!SectionIndexMap.count(*YamlSym.SectionName)) {
+ ErrHandler("the SectionName " + *YamlSym.SectionName +
+ " specified in the symbol does not exist");
+ return false;
+ }
+ if (YamlSym.SectionIndex &&
+ SectionIndexMap[*YamlSym.SectionName] != *YamlSym.SectionIndex) {
+ ErrHandler("the SectionName " + *YamlSym.SectionName +
+ " and the SectionIndex (" + Twine(*YamlSym.SectionIndex) +
+ ") refer to different sections");
+ return false;
+ }
+ W.write<int16_t>(SectionIndexMap[*YamlSym.SectionName]);
} else {
- writeName(YamlSym.SymbolName, W);
+ W.write<int16_t>(YamlSym.SectionIndex ? *YamlSym.SectionIndex : 0);
}
- W.write<uint32_t>(YamlSym.Value);
- W.write<int16_t>(
- YamlSym.SectionName.size() ? SectionIndexMap[YamlSym.SectionName] : 0);
W.write<uint16_t>(YamlSym.Type);
W.write<uint8_t>(YamlSym.StorageClass);
W.write<uint8_t>(YamlSym.NumberOfAuxEntries);
@@ -295,21 +542,61 @@ bool XCOFFWriter::writeSymbols() {
// length of each auxiliary entry is the same as a symbol table entry (18
// bytes). The format and quantity of auxiliary entries depend on the
// storage class (n_sclass) and type (n_type) of the symbol table entry.
- W.OS.write_zeros(18);
+ W.OS.write_zeros(XCOFF::SymbolTableEntrySize);
}
}
return true;
}
-bool XCOFFWriter::writeXCOFF() {
- if (Is64Bit) {
- ErrHandler("only XCOFF32 is currently supported");
- return false;
+void XCOFFWriter::writeStringTable() {
+ if (Obj.StrTbl.RawContent) {
+ Obj.StrTbl.RawContent->writeAsBinary(W.OS);
+ if (Obj.StrTbl.ContentSize) {
+ assert(*Obj.StrTbl.ContentSize >= Obj.StrTbl.RawContent->binary_size() &&
+ "Specified ContentSize is less than the RawContent size.");
+ W.OS.write_zeros(*Obj.StrTbl.ContentSize -
+ Obj.StrTbl.RawContent->binary_size());
+ }
+ return;
+ }
+
+ size_t StrTblBuilderSize = StrTblBuilder.getSize();
+ // If neither Length nor ContentSize is specified, write the StrTblBuilder
+ // directly, which contains the auto-generated Length value.
+ if (!Obj.StrTbl.Length && !Obj.StrTbl.ContentSize) {
+ if (StrTblBuilderSize <= 4)
+ return;
+ StrTblBuilder.write(W.OS);
+ return;
+ }
+
+ // Serialize the string table's content to a temporary buffer.
+ std::unique_ptr<WritableMemoryBuffer> Buf =
+ WritableMemoryBuffer::getNewMemBuffer(StrTblBuilderSize);
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+ StrTblBuilder.write(Ptr);
+ // Replace the first 4 bytes, which contain the auto-generated Length value,
+ // with the specified value.
+ memset(Ptr, 0, 4);
+ support::endian::write32be(Ptr, Obj.StrTbl.Length ? *Obj.StrTbl.Length
+ : *Obj.StrTbl.ContentSize);
+ // Copy the buffer content to the actual output stream.
+ W.OS.write(Buf->getBufferStart(), Buf->getBufferSize());
+ // Add zeros as padding after strings.
+ if (Obj.StrTbl.ContentSize) {
+ assert(*Obj.StrTbl.ContentSize >= StrTblBuilderSize &&
+ "Specified ContentSize is less than the StringTableBuilder size.");
+ W.OS.write_zeros(*Obj.StrTbl.ContentSize - StrTblBuilderSize);
}
+}
+
+bool XCOFFWriter::writeXCOFF() {
if (!assignAddressesAndIndices())
return false;
StartOffset = W.OS.tell();
writeFileHeader();
+ if (Obj.AuxHeader)
+ writeAuxFileHeader();
if (!Obj.Sections.empty()) {
writeSectionHeader();
if (!writeSectionData())
@@ -319,9 +606,7 @@ bool XCOFFWriter::writeXCOFF() {
}
if (!Obj.Symbols.empty() && !writeSymbols())
return false;
- // Write the string table.
- if (Strings.getSize() > 4)
- Strings.write(W.OS);
+ writeStringTable();
return true;
}
diff --git a/llvm/lib/ObjectYAML/XCOFFYAML.cpp b/llvm/lib/ObjectYAML/XCOFFYAML.cpp
index 73d188e274b1..221cf3b064c0 100644
--- a/llvm/lib/ObjectYAML/XCOFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFYAML.cpp
@@ -118,6 +118,37 @@ void MappingTraits<XCOFFYAML::FileHeader>::mapping(
IO.mapOptional("Flags", FileHdr.Flags);
}
+void MappingTraits<XCOFFYAML::AuxiliaryHeader>::mapping(
+ IO &IO, XCOFFYAML::AuxiliaryHeader &AuxHdr) {
+ IO.mapOptional("Magic", AuxHdr.Magic);
+ IO.mapOptional("Version", AuxHdr.Version);
+ IO.mapOptional("TextStartAddr", AuxHdr.TextStartAddr);
+ IO.mapOptional("DataStartAddr", AuxHdr.DataStartAddr);
+ IO.mapOptional("TOCAnchorAddr", AuxHdr.TOCAnchorAddr);
+ IO.mapOptional("TextSectionSize", AuxHdr.TextSize);
+ IO.mapOptional("DataSectionSize", AuxHdr.InitDataSize);
+ IO.mapOptional("BssSectionSize", AuxHdr.BssDataSize);
+ IO.mapOptional("SecNumOfEntryPoint", AuxHdr.SecNumOfEntryPoint);
+ IO.mapOptional("SecNumOfText", AuxHdr.SecNumOfText);
+ IO.mapOptional("SecNumOfData", AuxHdr.SecNumOfData);
+ IO.mapOptional("SecNumOfTOC", AuxHdr.SecNumOfTOC);
+ IO.mapOptional("SecNumOfLoader", AuxHdr.SecNumOfLoader);
+ IO.mapOptional("SecNumOfBSS", AuxHdr.SecNumOfBSS);
+ IO.mapOptional("MaxAlignOfText", AuxHdr.MaxAlignOfText);
+ IO.mapOptional("MaxAlignOfData", AuxHdr.MaxAlignOfData);
+ IO.mapOptional("ModuleType", AuxHdr.CpuFlag);
+ IO.mapOptional("TextPageSize", AuxHdr.TextPageSize);
+ IO.mapOptional("DataPageSize", AuxHdr.DataPageSize);
+ IO.mapOptional("StackPageSize", AuxHdr.StackPageSize);
+ IO.mapOptional("FlagAndTDataAlignment", AuxHdr.FlagAndTDataAlignment);
+ IO.mapOptional("EntryPointAddr", AuxHdr.EntryPointAddr);
+ IO.mapOptional("MaxStackSize", AuxHdr.MaxStackSize);
+ IO.mapOptional("MaxDataSize", AuxHdr.MaxDataSize);
+ IO.mapOptional("SecNumOfTData", AuxHdr.SecNumOfTData);
+ IO.mapOptional("SecNumOfTBSS", AuxHdr.SecNumOfTBSS);
+ IO.mapOptional("Flag", AuxHdr.Flag);
+}
+
void MappingTraits<XCOFFYAML::Relocation>::mapping(IO &IO,
XCOFFYAML::Relocation &R) {
IO.mapOptional("Address", R.VirtualAddress);
@@ -143,19 +174,29 @@ void MappingTraits<XCOFFYAML::Section>::mapping(IO &IO,
}
void MappingTraits<XCOFFYAML::Symbol>::mapping(IO &IO, XCOFFYAML::Symbol &S) {
- IO.mapRequired("Name", S.SymbolName);
+ IO.mapOptional("Name", S.SymbolName);
IO.mapOptional("Value", S.Value);
IO.mapOptional("Section", S.SectionName);
+ IO.mapOptional("SectionIndex", S.SectionIndex);
IO.mapOptional("Type", S.Type);
IO.mapOptional("StorageClass", S.StorageClass);
IO.mapOptional("NumberOfAuxEntries", S.NumberOfAuxEntries);
}
+void MappingTraits<XCOFFYAML::StringTable>::mapping(IO &IO, XCOFFYAML::StringTable &Str) {
+ IO.mapOptional("ContentSize", Str.ContentSize);
+ IO.mapOptional("Length", Str.Length);
+ IO.mapOptional("Strings", Str.Strings);
+ IO.mapOptional("RawContent", Str.RawContent);
+}
+
void MappingTraits<XCOFFYAML::Object>::mapping(IO &IO, XCOFFYAML::Object &Obj) {
IO.mapTag("!XCOFF", true);
IO.mapRequired("FileHeader", Obj.Header);
+ IO.mapOptional("AuxiliaryHeader", Obj.AuxHeader);
IO.mapOptional("Sections", Obj.Sections);
IO.mapOptional("Symbols", Obj.Symbols);
+ IO.mapOptional("StringTable", Obj.StrTbl);
}
} // namespace yaml
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index f5bf166e9e65..37c2fcbab181 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -104,11 +104,11 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
unsigned Kind = getInfo(i + 1).Kind;
if (Kind == Option::InputClass) {
- assert(!TheInputOptionID && "Cannot have multiple input options!");
- TheInputOptionID = getInfo(i + 1).ID;
+ assert(!InputOptionID && "Cannot have multiple input options!");
+ InputOptionID = getInfo(i + 1).ID;
} else if (Kind == Option::UnknownClass) {
- assert(!TheUnknownOptionID && "Cannot have multiple unknown options!");
- TheUnknownOptionID = getInfo(i + 1).ID;
+ assert(!UnknownOptionID && "Cannot have multiple unknown options!");
+ UnknownOptionID = getInfo(i + 1).ID;
} else if (Kind != Option::GroupClass) {
FirstSearchableIndex = i;
break;
@@ -337,13 +337,14 @@ bool OptTable::addValues(const char *Option, const char *Values) {
// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will
// be updated to "-bc". This overload does not support
// FlagsToInclude/FlagsToExclude or case insensitive options.
-Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
+std::unique_ptr<Arg> OptTable::parseOneArgGrouped(InputArgList &Args,
+ unsigned &Index) const {
// Anything that doesn't start with PrefixesUnion is an input, as is '-'
// itself.
const char *CStr = Args.getArgString(Index);
StringRef Str(CStr);
if (isInput(PrefixesUnion, Str))
- return new Arg(getOption(TheInputOptionID), Str, Index++, CStr);
+ return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++, CStr);
const Info *End = OptionInfos.data() + OptionInfos.size();
StringRef Name = Str.ltrim(PrefixChars);
@@ -359,8 +360,9 @@ Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
continue;
Option Opt(Start, this);
- if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
- false, Index))
+ if (std::unique_ptr<Arg> A =
+ Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+ /*GroupedShortOption=*/false, Index))
return A;
// If Opt is a Flag of length 2 (e.g. "-a"), we know it is a prefix of
@@ -375,28 +377,39 @@ Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
}
if (Fallback) {
Option Opt(Fallback, this);
- if (Arg *A = Opt.accept(Args, Str.substr(0, 2), true, Index)) {
- if (Str.size() == 2)
- ++Index;
- else
- Args.replaceArgString(Index, Twine('-') + Str.substr(2));
+ // Check that the last option isn't a flag wrongly given an argument.
+ if (Str[2] == '=')
+ return std::make_unique<Arg>(getOption(UnknownOptionID), Str, Index++,
+ CStr);
+
+ if (std::unique_ptr<Arg> A = Opt.accept(
+ Args, Str.substr(0, 2), /*GroupedShortOption=*/true, Index)) {
+ Args.replaceArgString(Index, Twine('-') + Str.substr(2));
return A;
}
}
- return new Arg(getOption(TheUnknownOptionID), Str, Index++, CStr);
+ // In the case of an incorrect short option extract the character and move to
+ // the next one.
+ if (Str[1] != '-') {
+ CStr = Args.MakeArgString(Str.substr(0, 2));
+ Args.replaceArgString(Index, Twine('-') + Str.substr(2));
+ return std::make_unique<Arg>(getOption(UnknownOptionID), CStr, Index, CStr);
+ }
+
+ return std::make_unique<Arg>(getOption(UnknownOptionID), Str, Index++, CStr);
}
-Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
- unsigned FlagsToInclude,
- unsigned FlagsToExclude) const {
+std::unique_ptr<Arg> OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
+ unsigned FlagsToInclude,
+ unsigned FlagsToExclude) const {
unsigned Prev = Index;
const char *Str = Args.getArgString(Index);
// Anything that doesn't start with PrefixesUnion is an input, as is '-'
// itself.
if (isInput(PrefixesUnion, Str))
- return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
+ return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++, Str);
const Info *Start = OptionInfos.data() + FirstSearchableIndex;
const Info *End = OptionInfos.data() + OptionInfos.size();
@@ -430,8 +443,9 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
continue;
// See if this option matches.
- if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
- false, Index))
+ if (std::unique_ptr<Arg> A =
+ Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+ /*GroupedShortOption=*/false, Index))
return A;
// Otherwise, see if this argument was missing values.
@@ -442,9 +456,9 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
// If we failed to find an option and this arg started with /, then it's
// probably an input path.
if (Str[0] == '/')
- return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
+ return std::make_unique<Arg>(getOption(InputOptionID), Str, Index++, Str);
- return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str);
+ return std::make_unique<Arg>(getOption(UnknownOptionID), Str, Index++, Str);
}
InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
@@ -472,7 +486,7 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
}
unsigned Prev = Index;
- Arg *A = GroupedShortOptions
+ std::unique_ptr<Arg> A = GroupedShortOptions
? parseOneArgGrouped(Args, Index)
: ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
assert((Index > Prev || GroupedShortOptions) &&
@@ -487,7 +501,7 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
break;
}
- Args.append(A);
+ Args.append(A.release());
}
return Args;
@@ -654,7 +668,7 @@ void OptTable::printHelp(raw_ostream &OS, const char *Usage, const char *Title,
HelpText = getOptionHelpText(Alias.getID());
}
- if (HelpText) {
+ if (HelpText && (strlen(HelpText) != 0)) {
const char *HelpGroup = getOptionHelpGroup(*this, Id);
const std::string &OptName = getOptionHelpName(*this, Id);
GroupedOptionHelp[HelpGroup].push_back({OptName, HelpText});
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 68d074b2702e..ebdba8949223 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -106,23 +106,24 @@ bool Option::matches(OptSpecifier Opt) const {
return false;
}
-Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
- unsigned &Index) const {
+std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
+ StringRef Spelling,
+ unsigned &Index) const {
size_t ArgSize = Spelling.size();
switch (getKind()) {
case FlagClass: {
if (ArgSize != strlen(Args.getArgString(Index)))
return nullptr;
- return new Arg(*this, Spelling, Index++);
+ return std::make_unique<Arg>(*this, Spelling, Index++);
}
case JoinedClass: {
const char *Value = Args.getArgString(Index) + ArgSize;
- return new Arg(*this, Spelling, Index++, Value);
+ return std::make_unique<Arg>(*this, Spelling, Index++, Value);
}
case CommaJoinedClass: {
// Always matches.
const char *Str = Args.getArgString(Index) + ArgSize;
- Arg *A = new Arg(*this, Spelling, Index++);
+ auto A = std::make_unique<Arg>(*this, Spelling, Index++);
// Parse out the comma separated values.
const char *Prev = Str;
@@ -158,7 +159,8 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
+ return std::make_unique<Arg>(*this, Spelling, Index - 2,
+ Args.getArgString(Index - 1));
case MultiArgClass: {
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
@@ -169,8 +171,8 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
if (Index > Args.getNumInputArgStrings())
return nullptr;
- Arg *A = new Arg(*this, Spelling, Index - 1 - getNumArgs(),
- Args.getArgString(Index - getNumArgs()));
+ auto A = std::make_unique<Arg>(*this, Spelling, Index - 1 - getNumArgs(),
+ Args.getArgString(Index - getNumArgs()));
for (unsigned i = 1; i != getNumArgs(); ++i)
A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i));
return A;
@@ -180,7 +182,7 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index))) {
const char *Value = Args.getArgString(Index) + ArgSize;
- return new Arg(*this, Spelling, Index++, Value);
+ return std::make_unique<Arg>(*this, Spelling, Index++, Value);
}
// Otherwise it must be separate.
@@ -189,7 +191,8 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(*this, Spelling, Index - 2, Args.getArgString(Index - 1));
+ return std::make_unique<Arg>(*this, Spelling, Index - 2,
+ Args.getArgString(Index - 1));
}
case JoinedAndSeparateClass:
// Always matches.
@@ -198,22 +201,22 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
Args.getArgString(Index - 1) == nullptr)
return nullptr;
- return new Arg(*this, Spelling, Index - 2,
- Args.getArgString(Index - 2) + ArgSize,
- Args.getArgString(Index - 1));
+ return std::make_unique<Arg>(*this, Spelling, Index - 2,
+ Args.getArgString(Index - 2) + ArgSize,
+ Args.getArgString(Index - 1));
case RemainingArgsClass: {
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index)))
return nullptr;
- Arg *A = new Arg(*this, Spelling, Index++);
+ auto A = std::make_unique<Arg>(*this, Spelling, Index++);
while (Index < Args.getNumInputArgStrings() &&
Args.getArgString(Index) != nullptr)
A->getValues().push_back(Args.getArgString(Index++));
return A;
}
case RemainingArgsJoinedClass: {
- Arg *A = new Arg(*this, Spelling, Index);
+ auto A = std::make_unique<Arg>(*this, Spelling, Index);
if (ArgSize != strlen(Args.getArgString(Index))) {
// An inexact match means there is a joined arg.
A->getValues().push_back(Args.getArgString(Index) + ArgSize);
@@ -230,17 +233,18 @@ Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
}
}
-Arg *Option::accept(const ArgList &Args, StringRef CurArg,
- bool GroupedShortOption, unsigned &Index) const {
- std::unique_ptr<Arg> A(GroupedShortOption && getKind() == FlagClass
- ? new Arg(*this, CurArg, Index)
+std::unique_ptr<Arg> Option::accept(const ArgList &Args, StringRef CurArg,
+ bool GroupedShortOption,
+ unsigned &Index) const {
+ auto A(GroupedShortOption && getKind() == FlagClass
+ ? std::make_unique<Arg>(*this, CurArg, Index)
: acceptInternal(Args, CurArg, Index));
if (!A)
return nullptr;
const Option &UnaliasedOption = getUnaliasedOption();
if (getID() == UnaliasedOption.getID())
- return A.release();
+ return A;
// "A" is an alias for a different flag. For most clients it's more convenient
// if this function returns unaliased Args, so create an unaliased arg for
@@ -259,7 +263,8 @@ Arg *Option::accept(const ArgList &Args, StringRef CurArg,
// Due to this, ArgList::getArgString(A->getIndex()) will return the spelling
// of the aliased arg always, while A->getSpelling() returns either the
// unaliased or the aliased arg, depending on which Arg object it's called on.
- Arg *UnaliasedA = new Arg(UnaliasedOption, UnaliasedSpelling, A->getIndex());
+ auto UnaliasedA =
+ std::make_unique<Arg>(UnaliasedOption, UnaliasedSpelling, A->getIndex());
Arg *RawA = A.get();
UnaliasedA->setAlias(std::move(A));
diff --git a/llvm/lib/Passes/OptimizationLevel.cpp b/llvm/lib/Passes/OptimizationLevel.cpp
new file mode 100644
index 000000000000..a1f8c1e14b1f
--- /dev/null
+++ b/llvm/lib/Passes/OptimizationLevel.cpp
@@ -0,0 +1,30 @@
+//===- OptimizationLevel.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/OptimizationLevel.h"
+
+using namespace llvm;
+
+const OptimizationLevel OptimizationLevel::O0 = {
+ /*SpeedLevel*/ 0,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O1 = {
+ /*SpeedLevel*/ 1,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O2 = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::O3 = {
+ /*SpeedLevel*/ 3,
+ /*SizeLevel*/ 0};
+const OptimizationLevel OptimizationLevel::Os = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 1};
+const OptimizationLevel OptimizationLevel::Oz = {
+ /*SpeedLevel*/ 2,
+ /*SizeLevel*/ 2};
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 79fcc8569b6d..561a881bab0c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1,4 +1,4 @@
-//===- Parsing, selection, and construction of pass pipelines -------------===//
+//===- Parsing and selection of pass pipelines ----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,6 +27,7 @@
#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CostModel.h"
#include "llvm/Analysis/DDG.h"
#include "llvm/Analysis/DDGPrinter.h"
#include "llvm/Analysis/Delinearization.h"
@@ -109,6 +110,7 @@
#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
@@ -241,103 +243,16 @@
using namespace llvm;
-static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
- "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
- cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
- cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
- "Heuristics-based inliner version."),
- clEnumValN(InliningAdvisorMode::Development, "development",
- "Use development mode (runtime-loadable model)."),
- clEnumValN(InliningAdvisorMode::Release, "release",
- "Use release mode (AOT-compiled model).")));
-
-static cl::opt<bool> EnableSyntheticCounts(
- "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Run synthetic function entry count generation "
- "pass"));
-
static const Regex DefaultAliasRegex(
"^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
-/// Flag to enable inline deferral during PGO.
-static cl::opt<bool>
- EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
- cl::Hidden,
- cl::desc("Enable inline deferral during PGO"));
-
-static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
- cl::Hidden, cl::ZeroOrMore,
- cl::desc("Enable memory profiler"));
-
-static cl::opt<bool> PerformMandatoryInliningsFirst(
- "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
- cl::desc("Perform mandatory inlinings module-wide, before performing "
- "inlining."));
-
-static cl::opt<bool> EnableO3NonTrivialUnswitching(
- "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
- cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
-
-PipelineTuningOptions::PipelineTuningOptions() {
- LoopInterleaving = true;
- LoopVectorization = true;
- SLPVectorization = false;
- LoopUnrolling = true;
- ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
- LicmMssaOptCap = SetLicmMssaOptCap;
- LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
- CallGraphProfile = true;
- MergeFunctions = false;
-}
-
namespace llvm {
-extern cl::opt<unsigned> MaxDevirtIterations;
-extern cl::opt<bool> EnableConstraintElimination;
-extern cl::opt<bool> EnableFunctionSpecialization;
-extern cl::opt<bool> EnableGVNHoist;
-extern cl::opt<bool> EnableGVNSink;
-extern cl::opt<bool> EnableHotColdSplit;
-extern cl::opt<bool> EnableIROutliner;
-extern cl::opt<bool> EnableOrderFileInstrumentation;
-extern cl::opt<bool> EnableCHR;
-extern cl::opt<bool> EnableLoopInterchange;
-extern cl::opt<bool> EnableUnrollAndJam;
-extern cl::opt<bool> EnableLoopFlatten;
-extern cl::opt<bool> EnableDFAJumpThreading;
-extern cl::opt<bool> RunNewGVN;
-extern cl::opt<bool> RunPartialInlining;
-extern cl::opt<bool> ExtraVectorizerPasses;
-
-extern cl::opt<bool> FlattenedProfileUsed;
-
-extern cl::opt<AttributorRunOption> AttributorRun;
-extern cl::opt<bool> EnableKnowledgeRetention;
-
-extern cl::opt<bool> EnableMatrix;
-
-extern cl::opt<bool> DisablePreInliner;
-extern cl::opt<int> PreInlineThreshold;
+cl::opt<bool> PrintPipelinePasses(
+ "print-pipeline-passes",
+ cl::desc("Print a '-passes' compatible string describing the pipeline "
+ "(best-effort only)."));
} // namespace llvm
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O0 = {
- /*SpeedLevel*/ 0,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O1 = {
- /*SpeedLevel*/ 1,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O2 = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::O3 = {
- /*SpeedLevel*/ 3,
- /*SizeLevel*/ 0};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Os = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 1};
-const PassBuilder::OptimizationLevel PassBuilder::OptimizationLevel::Oz = {
- /*SpeedLevel*/ 2,
- /*SizeLevel*/ 2};
-
namespace {
// The following passes/analyses have custom names, otherwise their name will
@@ -405,6 +320,15 @@ public:
static StringRef name() { return "NoOpFunctionAnalysis"; }
};
+/// No-op loop nest pass which does nothing.
+struct NoOpLoopNestPass : PassInfoMixin<NoOpLoopNestPass> {
+ PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &,
+ LoopStandardAnalysisResults &, LPMUpdater &) {
+ return PreservedAnalyses::all();
+ }
+ static StringRef name() { return "NoOpLoopNestPass"; }
+};
+
/// No-op loop pass which does nothing.
struct NoOpLoopPass : PassInfoMixin<NoOpLoopPass> {
PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
@@ -439,7 +363,8 @@ AnalysisKey NoOpLoopAnalysis::Key;
/// it. This should be updated if new pass instrumentation wants to use the map.
/// We currently only use this for --print-before/after.
bool shouldPopulateClassToPassNames() {
- return !printBeforePasses().empty() || !printAfterPasses().empty();
+ return PrintPipelinePasses || !printBeforePasses().empty() ||
+ !printAfterPasses().empty();
}
} // namespace
@@ -453,6 +378,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
if (PIC && shouldPopulateClassToPassNames()) {
#define MODULE_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ PIC->addClassToPassName(CLASS, NAME);
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define FUNCTION_PASS(NAME, CREATE_PASS) \
@@ -461,6 +388,8 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
PIC->addClassToPassName(CLASS, NAME);
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define LOOP_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define LOOP_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
@@ -469,18 +398,14 @@ PassBuilder::PassBuilder(TargetMachine *TM, PipelineTuningOptions PTO,
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#define CGSCC_PASS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ PIC->addClassToPassName(CLASS, NAME);
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);
#include "PassRegistry.def"
}
}
-void PassBuilder::invokePeepholeEPCallbacks(
- FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
- for (auto &C : PeepholeEPCallbacks)
- C(FPM, Level);
-}
-
void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
MAM.registerPass([&] { return CREATE_PASS; });
@@ -500,6 +425,11 @@ void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
}
void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
+ // We almost always want the default alias analysis pipeline.
+ // If a user wants a different one, they can register their own before calling
+ // registerFunctionAnalyses().
+ FAM.registerPass([&] { return buildDefaultAAPipeline(); });
+
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \
FAM.registerPass([&] { return CREATE_PASS; });
#include "PassRegistry.def"
@@ -517,1518 +447,6 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
C(LAM);
}
-// Helper to add AnnotationRemarksPass.
-static void addAnnotationRemarksPass(ModulePassManager &MPM) {
- FunctionPassManager FPM;
- FPM.addPass(AnnotationRemarksPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-}
-
-// Helper to check if the current compilation phase is preparing for LTO
-static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
- return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
- Phase == ThinOrFullLTOPhase::FullLTOPreLink;
-}
-
-// TODO: Investigate the cost/benefit of tail call elimination on debugging.
-FunctionPassManager
-PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
-
- FunctionPassManager FPM;
-
- // Form SSA out of local memory accesses after breaking apart aggregates into
- // scalars.
- FPM.addPass(SROA());
-
- // Catch trivial redundancies
- FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
-
- // Hoisting of scalars and load expressions.
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
-
- FPM.addPass(LibCallsShrinkWrapPass());
-
- invokePeepholeEPCallbacks(FPM, Level);
-
- FPM.addPass(SimplifyCFGPass());
-
- // Form canonically associated expression trees, and simplify the trees using
- // basic mathematical properties. For example, this will form (nearly)
- // minimal multiplication trees.
- FPM.addPass(ReassociatePass());
-
- // Add the primary loop simplification pipeline.
- // FIXME: Currently this is split into two loop pass pipelines because we run
- // some function passes in between them. These can and should be removed
- // and/or replaced by scheduling the loop pass equivalents in the correct
- // positions. But those equivalent passes aren't powerful enough yet.
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
- // `LoopInstSimplify`.
- LoopPassManager LPM1, LPM2;
-
- // Simplify the loop body. We do this initially to clean up after other loop
- // passes run, either when iterating on a loop or on inner loops with
- // implications on the outer loop.
- LPM1.addPass(LoopInstSimplifyPass());
- LPM1.addPass(LoopSimplifyCFGPass());
-
- // Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-
- LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
- isLTOPreLink(Phase)));
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM1.addPass(SimpleLoopUnswitchPass());
-
- LPM2.addPass(LoopIdiomRecognizePass());
- LPM2.addPass(IndVarSimplifyPass());
-
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
-
- LPM2.addPass(LoopDeletionPass());
-
- if (EnableLoopInterchange)
- LPM2.addPass(LoopInterchangePass());
-
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
- // because it changes IR to makes profile annotation in back compile
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
- // attributes so we need to make sure and allow the full unroll pass to pay
- // attention to it.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse)
- LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
-
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
-
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- if (EnableLoopFlatten)
- FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
- // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
-
- // Delete small array after loop unroll.
- FPM.addPass(SROA());
-
- // Specially optimize memory movement as it doesn't look like dataflow in SSA.
- FPM.addPass(MemCpyOptPass());
-
- // Sparse conditional constant propagation.
- // FIXME: It isn't clear why we do this *after* loop passes rather than
- // before...
- FPM.addPass(SCCPPass());
-
- // Delete dead bit computations (instcombine runs after to fold away the dead
- // computations, and then ADCE will run later to exploit any new DCE
- // opportunities that creates).
- FPM.addPass(BDCEPass());
-
- // Run instcombine after redundancy and dead bit elimination to exploit
- // opportunities opened up by them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- FPM.addPass(CoroElidePass());
-
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
-
- // Finally, do an expensive DCE pass to catch all the dead code exposed by
- // the simplifications and basic cleanup after all the simplifications.
- // TODO: Investigate if this is too expensive.
- FPM.addPass(ADCEPass());
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- return FPM;
-}
-
-FunctionPassManager
-PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
-
- // The O1 pipeline has a separate pipeline creation function to simplify
- // construction readability.
- if (Level.getSpeedupLevel() == 1)
- return buildO1FunctionSimplificationPipeline(Level, Phase);
-
- FunctionPassManager FPM;
-
- // Form SSA out of local memory accesses after breaking apart aggregates into
- // scalars.
- FPM.addPass(SROA());
-
- // Catch trivial redundancies
- FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
- if (EnableKnowledgeRetention)
- FPM.addPass(AssumeSimplifyPass());
-
- // Hoisting of scalars and load expressions.
- if (EnableGVNHoist)
- FPM.addPass(GVNHoistPass());
-
- // Global value numbering based sinking.
- if (EnableGVNSink) {
- FPM.addPass(GVNSinkPass());
- FPM.addPass(SimplifyCFGPass());
- }
-
- if (EnableConstraintElimination)
- FPM.addPass(ConstraintEliminationPass());
-
- // Speculative execution if the target has divergent branches; otherwise nop.
- FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
-
- // Optimize based on known information about branches, and cleanup afterward.
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
-
- FPM.addPass(SimplifyCFGPass());
- if (Level == OptimizationLevel::O3)
- FPM.addPass(AggressiveInstCombinePass());
- FPM.addPass(InstCombinePass());
-
- if (!Level.isOptimizingForSize())
- FPM.addPass(LibCallsShrinkWrapPass());
-
- invokePeepholeEPCallbacks(FPM, Level);
-
- // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
- // using the size value profile. Don't perform this when optimizing for size.
- if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
- !Level.isOptimizingForSize())
- FPM.addPass(PGOMemOPSizeOpt());
-
- FPM.addPass(TailCallElimPass());
- FPM.addPass(SimplifyCFGPass());
-
- // Form canonically associated expression trees, and simplify the trees using
- // basic mathematical properties. For example, this will form (nearly)
- // minimal multiplication trees.
- FPM.addPass(ReassociatePass());
-
- // Add the primary loop simplification pipeline.
- // FIXME: Currently this is split into two loop pass pipelines because we run
- // some function passes in between them. These can and should be removed
- // and/or replaced by scheduling the loop pass equivalents in the correct
- // positions. But those equivalent passes aren't powerful enough yet.
- // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
- // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
- // fully replace `SimplifyCFGPass`, and the closest to the other we have is
- // `LoopInstSimplify`.
- LoopPassManager LPM1, LPM2;
-
- // Simplify the loop body. We do this initially to clean up after other loop
- // passes run, either when iterating on a loop or on inner loops with
- // implications on the outer loop.
- LPM1.addPass(LoopInstSimplifyPass());
- LPM1.addPass(LoopSimplifyCFGPass());
-
- // Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated.
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
-
- // Disable header duplication in loop rotation at -Oz.
- LPM1.addPass(
- LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
- // TODO: Investigate promotion cap for O1.
- LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM1.addPass(
- SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
- EnableO3NonTrivialUnswitching));
- LPM2.addPass(LoopIdiomRecognizePass());
- LPM2.addPass(IndVarSimplifyPass());
-
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
-
- LPM2.addPass(LoopDeletionPass());
-
- if (EnableLoopInterchange)
- LPM2.addPass(LoopInterchangePass());
-
- // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
- // because it changes IR to makes profile annotation in back compile
- // inaccurate. The normal unroller doesn't pay attention to forced full unroll
- // attributes so we need to make sure and allow the full unroll pass to pay
- // attention to it.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
- PGOOpt->Action != PGOOptions::SampleUse)
- LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
-
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
-
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
- EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- if (EnableLoopFlatten)
- FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
- // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
- // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
- /*UseMemorySSA=*/false,
- /*UseBlockFrequencyInfo=*/false));
-
- // Delete small array after loop unroll.
- FPM.addPass(SROA());
-
- // Eliminate redundancies.
- FPM.addPass(MergedLoadStoreMotionPass());
- if (RunNewGVN)
- FPM.addPass(NewGVNPass());
- else
- FPM.addPass(GVN());
-
- // Sparse conditional constant propagation.
- // FIXME: It isn't clear why we do this *after* loop passes rather than
- // before...
- FPM.addPass(SCCPPass());
-
- // Delete dead bit computations (instcombine runs after to fold away the dead
- // computations, and then ADCE will run later to exploit any new DCE
- // opportunities that creates).
- FPM.addPass(BDCEPass());
-
- // Run instcombine after redundancy and dead bit elimination to exploit
- // opportunities opened up by them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- // Re-consider control flow based optimizations after redundancy elimination,
- // redo DCE, etc.
- if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
- FPM.addPass(DFAJumpThreadingPass());
-
- FPM.addPass(JumpThreadingPass());
- FPM.addPass(CorrelatedValuePropagationPass());
-
- // Finally, do an expensive DCE pass to catch all the dead code exposed by
- // the simplifications and basic cleanup after all the simplifications.
- // TODO: Investigate if this is too expensive.
- FPM.addPass(ADCEPass());
-
- // Specially optimize memory movement as it doesn't look like dataflow in SSA.
- FPM.addPass(MemCpyOptPass());
-
- FPM.addPass(DSEPass());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
-
- FPM.addPass(CoroElidePass());
-
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
-
- FPM.addPass(SimplifyCFGPass(
- SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
- (PGOOpt->Action == PGOOptions::IRUse ||
- PGOOpt->Action == PGOOptions::SampleUse))
- FPM.addPass(ControlHeightReductionPass());
-
- return FPM;
-}
-
-void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
- MPM.addPass(CanonicalizeAliasesPass());
- MPM.addPass(NameAnonGlobalPass());
-}
-
-void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
- PassBuilder::OptimizationLevel Level,
- bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
- std::string ProfileRemappingFile) {
- assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
- if (!IsCS && !DisablePreInliner) {
- InlineParams IP;
-
- IP.DefaultThreshold = PreInlineThreshold;
-
- // FIXME: The hint threshold has the same value used by the regular inliner
- // when not optimzing for size. This should probably be lowered after
- // performance testing.
- // FIXME: this comment is cargo culted from the old pass manager, revisit).
- IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
- ModuleInlinerWrapperPass MIWP(IP);
- CGSCCPassManager &CGPipeline = MIWP.getPM();
-
- FunctionPassManager FPM;
- FPM.addPass(SROA());
- FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
- FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
- FPM.addPass(InstCombinePass()); // Combine silly sequences.
- invokePeepholeEPCallbacks(FPM, Level);
-
- CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
-
- MPM.addPass(std::move(MIWP));
-
- // Delete anything that is now dead to make sure that we don't instrument
- // dead code. Instrumentation can end up keeping dead code around and
- // dramatically increase code size.
- MPM.addPass(GlobalDCEPass());
- }
-
- if (!RunProfileGen) {
- assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- return;
- }
-
- // Perform PGO instrumentation.
- MPM.addPass(PGOInstrumentationGen(IsCS));
-
- FunctionPassManager FPM;
- // Disable header duplication in loop rotation at -Oz.
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(Level != OptimizationLevel::Oz), EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/false));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!ProfileFile.empty())
- Options.InstrProfileOutput = ProfileFile;
- // Do counter promotion at Level greater than O0.
- Options.DoCounterPromotion = true;
- Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
-}
-
-void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
- bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
- std::string ProfileRemappingFile) {
- if (!RunProfileGen) {
- assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- return;
- }
-
- // Perform PGO instrumentation.
- MPM.addPass(PGOInstrumentationGen(IsCS));
- // Add the profile lowering pass.
- InstrProfOptions Options;
- if (!ProfileFile.empty())
- Options.InstrProfileOutput = ProfileFile;
- // Do not do counter promotion at O0.
- Options.DoCounterPromotion = false;
- Options.UseBFIInPromotion = IsCS;
- MPM.addPass(InstrProfiling(Options, IsCS));
-}
-
-static InlineParams
-getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
- return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
-}
-
-ModuleInlinerWrapperPass
-PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- InlineParams IP = getInlineParamsFromOptLevel(Level);
- if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
- PGOOpt->Action == PGOOptions::SampleUse)
- IP.HotCallSiteThreshold = 0;
-
- if (PGOOpt)
- IP.EnableDeferral = EnablePGOInlineDeferral;
-
- ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
- UseInlineAdvisor, MaxDevirtIterations);
-
- // Require the GlobalsAA analysis for the module so we can query it within
- // the CGSCC pipeline.
- MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MIWP.addModulePass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
-
- // Require the ProfileSummaryAnalysis for the module so we can query it within
- // the inliner pass.
- MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
-
- // Now begin the main postorder CGSCC pipeline.
- // FIXME: The current CGSCC pipeline has its origins in the legacy pass
- // manager and trying to emulate its precise behavior. Much of this doesn't
- // make a lot of sense and we should revisit the core CGSCC structure.
- CGSCCPassManager &MainCGPipeline = MIWP.getPM();
-
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
-
- if (AttributorRun & AttributorRunOption::CGSCC)
- MainCGPipeline.addPass(AttributorCGSCCPass());
-
- // Now deduce any function attributes based in the current code.
- MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
-
- // When at O3 add argument promotion to the pass pipeline.
- // FIXME: It isn't at all clear why this should be limited to O3.
- if (Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(ArgumentPromotionPass());
-
- // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
- // there are no OpenMP runtime calls present in the module.
- if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
- MainCGPipeline.addPass(OpenMPOptCGSCCPass());
-
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(MainCGPipeline, Level);
-
- // Lastly, add the core function simplification pipeline nested inside the
- // CGSCC walk.
- MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
- buildFunctionSimplificationPipeline(Level, Phase)));
-
- MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
-
- return MIWP;
-}
-
-ModulePassManager
-PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
- ThinOrFullLTOPhase Phase) {
- ModulePassManager MPM;
-
- // Place pseudo probe instrumentation as the first pass of the pipeline to
- // minimize the impact of optimization changes.
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
- Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
- MPM.addPass(SampleProfileProbePass(TM));
-
- bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
-
- // In ThinLTO mode, when flattened profile is used, all the available
- // profile information will be annotated in PreLink phase so there is
- // no need to load the profile again in PostLink.
- bool LoadSampleProfile =
- HasSampleProfile &&
- !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
-
- // During the ThinLTO backend phase we perform early indirect call promotion
- // here, before globalopt. Otherwise imported available_externally functions
- // look unreferenced and are removed. If we are going to load the sample
- // profile then defer until later.
- // TODO: See if we can move later and consolidate with the location where
- // we perform ICP when we are loading a sample profile.
- // TODO: We pass HasSampleProfile (whether there was a sample profile file
- // passed to the compile) to the SamplePGO flag of ICP. This is used to
- // determine whether the new direct calls are annotated with prof metadata.
- // Ideally this should be determined from whether the IR is annotated with
- // sample profile, and not whether the a sample profile was provided on the
- // command line. E.g. for flattened profiles where we will not be reloading
- // the sample profile in the ThinLTO backend, we ideally shouldn't have to
- // provide the sample profile file.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
- MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
-
- // Do basic inference of function attributes from known properties of system
- // libraries and other oracles.
- MPM.addPass(InferFunctionAttrsPass());
-
- // Create an early function pass manager to cleanup the output of the
- // frontend.
- FunctionPassManager EarlyFPM;
- // Lower llvm.expect to metadata before attempting transforms.
- // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
- EarlyFPM.addPass(LowerExpectIntrinsicPass());
- EarlyFPM.addPass(SimplifyCFGPass());
- EarlyFPM.addPass(SROA());
- EarlyFPM.addPass(EarlyCSEPass());
- EarlyFPM.addPass(CoroEarlyPass());
- if (Level == OptimizationLevel::O3)
- EarlyFPM.addPass(CallSiteSplittingPass());
-
- // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
- // to convert bitcast to direct calls so that they can be inlined during the
- // profile annotation prepration step.
- // More details about SamplePGO design can be found in:
- // https://research.google.com/pubs/pub45290.html
- // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
- if (LoadSampleProfile)
- EarlyFPM.addPass(InstCombinePass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
-
- if (LoadSampleProfile) {
- // Annotate sample profile right after early FPM to ensure freshness of
- // the debug info.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile, Phase));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- // Do not invoke ICP in the LTOPrelink phase as it makes it hard
- // for the profile annotation to be accurate in the LTO backend.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
- Phase != ThinOrFullLTOPhase::FullLTOPreLink)
- // We perform early indirect call promotion here, before globalopt.
- // This is important for the ThinLTO backend phase because otherwise
- // imported available_externally functions look unreferenced and are
- // removed.
- MPM.addPass(
- PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
- }
-
- // Try to perform OpenMP specific optimizations on the module. This is a
- // (quick!) no-op if there are no OpenMP runtime calls present in the module.
- if (Level != OptimizationLevel::O0)
- MPM.addPass(OpenMPOptPass());
-
- if (AttributorRun & AttributorRunOption::MODULE)
- MPM.addPass(AttributorPass());
-
- // Lower type metadata and the type.test intrinsic in the ThinLTO
- // post link pipeline after ICP. This is to enable usage of the type
- // tests in ICP sequences.
- if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
-
- // Specialize functions with IPSCCP.
- if (EnableFunctionSpecialization)
- MPM.addPass(FunctionSpecializationPass());
-
- // Interprocedural constant propagation now that basic cleanup has occurred
- // and prior to optimizing globals.
- // FIXME: This position in the pipeline hasn't been carefully considered in
- // years, it should be re-analyzed.
- MPM.addPass(IPSCCPPass());
-
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
-
- // Optimize globals to try and fold them into constants.
- MPM.addPass(GlobalOptPass());
-
- // Promote any localized globals to SSA registers.
- // FIXME: Should this instead by a run of SROA?
- // FIXME: We should probably run instcombine and simplifycfg afterward to
- // delete control flows that are dead once globals have been folded to
- // constants.
- MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
-
- // Remove any dead arguments exposed by cleanups and constant folding
- // globals.
- MPM.addPass(DeadArgumentEliminationPass());
-
- // Create a small function pass pipeline to cleanup after all the global
- // optimizations.
- FunctionPassManager GlobalCleanupPM;
- GlobalCleanupPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
-
- GlobalCleanupPM.addPass(SimplifyCFGPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
-
- // Add all the requested passes for instrumentation PGO, if requested.
- if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
- (PGOOpt->Action == PGOOptions::IRInstr ||
- PGOOpt->Action == PGOOptions::IRUse)) {
- addPGOInstrPasses(MPM, Level,
- /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
- /* IsCS */ false, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- MPM.addPass(PGOIndirectCallPromotion(false, false));
- }
- if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
- PGOOpt->CSAction == PGOOptions::CSIRInstr)
- MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
-
- // Synthesize function entry counts for non-PGO compilation.
- if (EnableSyntheticCounts && !PGOOpt)
- MPM.addPass(SyntheticCountsPropagation());
-
- MPM.addPass(buildInlinerPipeline(Level, Phase));
-
- if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
- MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
- MPM.addPass(ModuleMemProfilerPass());
- }
-
- return MPM;
-}
-
-/// TODO: Should LTO cause any differences to this set of passes?
-void PassBuilder::addVectorPasses(OptimizationLevel Level,
- FunctionPassManager &FPM, bool IsFullLTO) {
- FPM.addPass(LoopVectorizePass(
- LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
-
- if (IsFullLTO) {
- // The vectorizer may have significantly shortened a loop body; unroll
- // again. Unroll small loops to hide loop backedge latency and saturate any
- // parallel execution resources of an out-of-order processor. We also then
- // need to clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam && PTO.LoopUnrolling)
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopUnrollAndJamPass(Level.getSpeedupLevel())));
- FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
- FPM.addPass(WarnMissedTransformationsPass());
- }
-
- if (!IsFullLTO) {
- // Eliminate loads by forwarding stores from the previous iteration to loads
- // of the current iteration.
- FPM.addPass(LoopLoadEliminationPass());
- }
- // Cleanup after the loop optimization passes.
- FPM.addPass(InstCombinePass());
-
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- // At higher optimization levels, try to clean up any runtime overlap and
- // alignment checks inserted by the vectorizer. We want to track correlated
- // runtime checks for two inner loops in the same outer loop, fold any
- // common computations, hoist loop-invariant aspects out of any outer loop,
- // and unswitch the runtime checks if possible. Once hoisted, we may have
- // dead (or speculatable) control flows or more combining opportunities.
- FPM.addPass(EarlyCSEPass());
- FPM.addPass(CorrelatedValuePropagationPass());
- FPM.addPass(InstCombinePass());
- LoopPassManager LPM;
- LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
- LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
- OptimizationLevel::O3));
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/true));
- FPM.addPass(SimplifyCFGPass());
- FPM.addPass(InstCombinePass());
- }
-
- // Now that we've formed fast to execute loop structures, we do further
- // optimizations. These are run afterward as they might block doing complex
- // analyses and transforms such as what are needed for loop vectorization.
-
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
- // GVN, loop transforms, and others have already run, so it's now better to
- // convert to more optimized IR using more aggressive simplify CFG options.
- // The extra sinking transform can create larger basic blocks, so do this
- // before SLP vectorization.
- FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
-
- if (IsFullLTO) {
- FPM.addPass(SCCPPass());
- FPM.addPass(InstCombinePass());
- FPM.addPass(BDCEPass());
- }
-
- // Optimize parallel scalar instruction chains into SIMD instructions.
- if (PTO.SLPVectorization) {
- FPM.addPass(SLPVectorizerPass());
- if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
- FPM.addPass(EarlyCSEPass());
- }
- }
- // Enhance/cleanup vector code.
- FPM.addPass(VectorCombinePass());
-
- if (!IsFullLTO) {
- FPM.addPass(InstCombinePass());
- // Unroll small loops to hide loop backedge latency and saturate any
- // parallel execution resources of an out-of-order processor. We also then
- // need to clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
- if (EnableUnrollAndJam && PTO.LoopUnrolling) {
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LoopUnrollAndJamPass(Level.getSpeedupLevel())));
- }
- FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
- Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll)));
- FPM.addPass(WarnMissedTransformationsPass());
- FPM.addPass(InstCombinePass());
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
- }
-
- // Now that we've vectorized and unrolled loops, we may have more refined
- // alignment information, try to re-derive it here.
- FPM.addPass(AlignmentFromAssumptionsPass());
-
- if (IsFullLTO)
- FPM.addPass(InstCombinePass());
-}
-
-ModulePassManager
-PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- ModulePassManager MPM;
-
- // Optimize globals now that the module is fully simplified.
- MPM.addPass(GlobalOptPass());
- MPM.addPass(GlobalDCEPass());
-
- // Run partial inlining pass to partially inline functions that have
- // large bodies.
- if (RunPartialInlining)
- MPM.addPass(PartialInlinerPass());
-
- // Remove avail extern fns and globals definitions since we aren't compiling
- // an object file for later LTO. For LTO we want to preserve these so they
- // are eligible for inlining at link-time. Note if they are unreferenced they
- // will be removed by GlobalDCE later, so this only impacts referenced
- // available externally globals. Eventually they will be suppressed during
- // codegen, but eliminating here enables more opportunity for GlobalDCE as it
- // may make globals referenced by available external functions dead and saves
- // running remaining passes on the eliminated functions. These should be
- // preserved during prelinking for link-time inlining decisions.
- if (!LTOPreLink)
- MPM.addPass(EliminateAvailableExternallyPass());
-
- if (EnableOrderFileInstrumentation)
- MPM.addPass(InstrOrderFilePass());
-
- // Do RPO function attribute inference across the module to forward-propagate
- // attributes where applicable.
- // FIXME: Is this really an optimization rather than a canonicalization?
- MPM.addPass(ReversePostOrderFunctionAttrsPass());
-
- // Do a post inline PGO instrumentation and use pass. This is a context
- // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
- // cross-module inline has not been done yet. The context sensitive
- // instrumentation is after all the inlines are done.
- if (!LTOPreLink && PGOOpt) {
- if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
- else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- }
-
- // Re-require GloblasAA here prior to function passes. This is particularly
- // useful as the above will have inlined, DCE'ed, and function-attr
- // propagated everything. We should at this point have a reasonably minimal
- // and richly annotated call graph. By computing aliasing and mod/ref
- // information for all local globals here, the late loop passes and notably
- // the vectorizer will be able to use them to help recognize vectorizable
- // memory operations.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
-
- FunctionPassManager OptimizePM;
- OptimizePM.addPass(Float2IntPass());
- OptimizePM.addPass(LowerConstantIntrinsicsPass());
-
- if (EnableMatrix) {
- OptimizePM.addPass(LowerMatrixIntrinsicsPass());
- OptimizePM.addPass(EarlyCSEPass());
- }
-
- // FIXME: We need to run some loop optimizations to re-rotate loops after
- // simplifycfg and others undo their rotation.
-
- // Optimize the loop execution. These passes operate on entire loop nests
- // rather than on each loop in an inside-out manner, and so they are actually
- // function passes.
-
- for (auto &C : VectorizerStartEPCallbacks)
- C(OptimizePM, Level);
-
- // First rotate loops that may have been un-rotated by prior passes.
- // Disable header duplication at -Oz.
- OptimizePM.addPass(createFunctionToLoopPassAdaptor(
- LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink),
- EnableMSSALoopDependency,
- /*UseBlockFrequencyInfo=*/false));
-
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
- // into separate loop that would otherwise inhibit vectorization. This is
- // currently only performed for loops marked with the metadata
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
- OptimizePM.addPass(LoopDistributePass());
-
- // Populates the VFABI attribute with the scalar-to-vector mappings
- // from the TargetLibraryInfo.
- OptimizePM.addPass(InjectTLIMappings());
-
- addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
-
- // Split out cold code. Splitting is done late to avoid hiding context from
- // other optimizations and inadvertently regressing performance. The tradeoff
- // is that this has a higher code size cost than splitting early.
- if (EnableHotColdSplit && !LTOPreLink)
- MPM.addPass(HotColdSplittingPass());
-
- // Search the code for similar regions of code. If enough similar regions can
- // be found where extracting the regions into their own function will decrease
- // the size of the program, we extract the regions, a deduplicate the
- // structurally similar regions.
- if (EnableIROutliner)
- MPM.addPass(IROutlinerPass());
-
- // Merge functions if requested.
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
- // LoopSink pass sinks instructions hoisted by LICM, which serves as a
- // canonicalization pass that enables other optimizations. As a result,
- // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
- // result too early.
- OptimizePM.addPass(LoopSinkPass());
-
- // And finally clean up LCSSA form before generating code.
- OptimizePM.addPass(InstSimplifyPass());
-
- // This hoists/decomposes div/rem ops. It should run after other sink/hoist
- // passes to avoid re-sinking, but before SimplifyCFG because it can allow
- // flattening of blocks.
- OptimizePM.addPass(DivRemPairsPass());
-
- // LoopSink (and other loop passes since the last simplifyCFG) might have
- // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- OptimizePM.addPass(SimplifyCFGPass());
-
- OptimizePM.addPass(CoroCleanupPass());
-
- // Add the core optimizing pipeline.
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
-
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
-
- if (PTO.CallGraphProfile)
- MPM.addPass(CGProfilePass());
-
- // Now we need to do some global optimization transforms.
- // FIXME: It would seem like these should come first in the optimization
- // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
- // ordering here.
- MPM.addPass(GlobalDCEPass());
- MPM.addPass(ConstantMergePass());
-
- // TODO: Relative look table converter pass caused an issue when full lto is
- // enabled. See https://reviews.llvm.org/D94355 for more details.
- // Until the issue fixed, disable this pass during pre-linking phase.
- if (!LTOPreLink)
- MPM.addPass(RelLookupTableConverterPass());
-
- return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
-
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- // Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
-
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
- // Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
- : ThinOrFullLTOPhase::None));
-
- // Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
-
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
- MPM.addPass(PseudoProbeUpdatePass());
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- if (LTOPreLink)
- addRequiredLTOPreLinkPasses(MPM);
-
- return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
-
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
- // Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
-
- // If we are planning to perform ThinLTO later, we don't bloat the code with
- // unrolling/vectorization/... now. Just simplify the module as much as we
- // can.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, ThinOrFullLTOPhase::ThinLTOPreLink));
-
- // Run partial inlining pass to partially inline functions that have
- // large bodies.
- // FIXME: It isn't clear whether this is really the right place to run this
- // in ThinLTO. Because there is another canonicalization and simplification
- // phase that will run after the thin link, running this here ends up with
- // less information than will be available later and it may grow functions in
- // ways that aren't beneficial.
- if (RunPartialInlining)
- MPM.addPass(PartialInlinerPass());
-
- // Reduce the size of the IR as much as possible.
- MPM.addPass(GlobalOptPass());
-
- // Module simplification splits coroutines, but does not fully clean up
- // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
- // on these, we schedule the cleanup here.
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
-
- if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
- MPM.addPass(PseudoProbeUpdatePass());
-
- // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
- // optimization is going to be done in PostLink stage, but clang can't
- // add callbacks there in case of in-process ThinLTO called by linker.
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- addRequiredLTOPreLinkPasses(MPM);
-
- return MPM;
-}
-
-ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
- OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- if (ImportSummary) {
- // These passes import type identifier resolutions for whole-program
- // devirtualization and CFI. They must run early because other passes may
- // disturb the specific instruction patterns that these passes look for,
- // creating dependencies on resolutions that may not appear in the summary.
- //
- // For example, GVN may transform the pattern assume(type.test) appearing in
- // two basic blocks into assume(phi(type.test, type.test)), which would
- // transform a dependency on a WPD resolution into a dependency on a type
- // identifier resolution for CFI.
- //
- // Also, WPD has access to more precise information than ICP and can
- // devirtualize more effectively, so it should operate on the IR first.
- //
- // The WPD and LowerTypeTest passes need to run at -O0 to lower type
- // metadata and intrinsics.
- MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
- MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
- }
-
- if (Level == OptimizationLevel::O0) {
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP.
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- // Drop available_externally and unreferenced globals. This is necessary
- // with ThinLTO in order to avoid leaving undefined references to dead
- // globals in the object file.
- MPM.addPass(EliminateAvailableExternallyPass());
- MPM.addPass(GlobalDCEPass());
- return MPM;
- }
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- // Add the core simplification pipeline.
- MPM.addPass(buildModuleSimplificationPipeline(
- Level, ThinOrFullLTOPhase::ThinLTOPostLink));
-
- // Now add the optimization pipeline.
- MPM.addPass(buildModuleOptimizationPipeline(Level));
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
-}
-
-ModulePassManager
-PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
- // FIXME: We should use a customized pre-link pipeline!
- return buildPerModuleDefaultPipeline(Level,
- /* LTOPreLink */ true);
-}
-
-ModulePassManager
-PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
- ModuleSummaryIndex *ExportSummary) {
- ModulePassManager MPM;
-
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- // Create a function that performs CFI checks for cross-DSO calls with targets
- // in the current module.
- MPM.addPass(CrossDSOCFIPass());
-
- if (Level == OptimizationLevel::O0) {
- // The WPD and LowerTypeTest passes need to run at -O0 to lower type
- // metadata and intrinsics.
- MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP.
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
- }
-
- if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
- // Load sample profile before running the LTO optimization pipeline.
- MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink));
- // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
- // RequireAnalysisPass for PSI before subsequent non-module passes.
- MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
- }
-
- // Remove unused virtual tables to improve the quality of code generated by
- // whole-program devirtualization and bitset lowering.
- MPM.addPass(GlobalDCEPass());
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
- // Do basic inference of function attributes from known properties of system
- // libraries and other oracles.
- MPM.addPass(InferFunctionAttrsPass());
-
- if (Level.getSpeedupLevel() > 1) {
- FunctionPassManager EarlyFPM;
- EarlyFPM.addPass(CallSiteSplittingPass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
-
- // Indirect call promotion. This should promote all the targets that are
- // left by the earlier promotion pass that promotes intra-module targets.
- // This two-step promotion is to save the compile time. For LTO, it should
- // produce the same result as if we only do promotion here.
- MPM.addPass(PGOIndirectCallPromotion(
- true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
-
- if (EnableFunctionSpecialization)
- MPM.addPass(FunctionSpecializationPass());
- // Propagate constants at call sites into the functions they call. This
- // opens opportunities for globalopt (and inlining) by substituting function
- // pointers passed as arguments to direct uses of functions.
- MPM.addPass(IPSCCPPass());
-
- // Attach metadata to indirect call sites indicating the set of functions
- // they may target at run-time. This should follow IPSCCP.
- MPM.addPass(CalledValuePropagationPass());
- }
-
- // Now deduce any function attributes based in the current code.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
- PostOrderFunctionAttrsPass()));
-
- // Do RPO function attribute inference across the module to forward-propagate
- // attributes where applicable.
- // FIXME: Is this really an optimization rather than a canonicalization?
- MPM.addPass(ReversePostOrderFunctionAttrsPass());
-
- // Use in-range annotations on GEP indices to split globals where beneficial.
- MPM.addPass(GlobalSplitPass());
-
- // Run whole program optimization of virtual call when the list of callees
- // is fixed.
- MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
-
- // Stop here at -O1.
- if (Level == OptimizationLevel::O1) {
- // The LowerTypeTestsPass needs to run to lower type metadata and the
- // type.test intrinsics. The pass does nothing if CFI is disabled.
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO
- // pipeline).
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
- }
-
- // Optimize globals to try and fold them into constants.
- MPM.addPass(GlobalOptPass());
-
- // Promote any localized globals to SSA registers.
- MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
-
- // Linking modules together can lead to duplicate global constant, only
- // keep one copy of each constant.
- MPM.addPass(ConstantMergePass());
-
- // Remove unused arguments from functions.
- MPM.addPass(DeadArgumentEliminationPass());
-
- // Reduce the code after globalopt and ipsccp. Both can open up significant
- // simplification opportunities, and both can propagate functions through
- // function pointers. When this happens, we often have to resolve varargs
- // calls, etc, so let instcombine do this.
- FunctionPassManager PeepholeFPM;
- if (Level == OptimizationLevel::O3)
- PeepholeFPM.addPass(AggressiveInstCombinePass());
- PeepholeFPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(PeepholeFPM, Level);
-
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
-
- // Note: historically, the PruneEH pass was run first to deduce nounwind and
- // generally clean up exception handling overhead. It isn't clear this is
- // valuable as the inliner doesn't currently care whether it is inlining an
- // invoke or a call.
- // Run the inliner now.
- MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
-
- // Optimize globals again after we ran the inliner.
- MPM.addPass(GlobalOptPass());
-
- // Garbage collect dead functions.
- // FIXME: Add ArgumentPromotion pass after once it's ported.
- MPM.addPass(GlobalDCEPass());
-
- FunctionPassManager FPM;
- // The IPO Passes may leave cruft around. Clean up after them.
- FPM.addPass(InstCombinePass());
- invokePeepholeEPCallbacks(FPM, Level);
-
- FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
-
- // Do a post inline PGO instrumentation and use pass. This is a context
- // sensitive PGO pass.
- if (PGOOpt) {
- if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
- /* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile);
- else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
- addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
- /* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile);
- }
-
- // Break up allocas
- FPM.addPass(SROA());
-
- // LTO provides additional opportunities for tailcall elimination due to
- // link-time inlining, and visibility of nocapture attribute.
- FPM.addPass(TailCallElimPass());
-
- // Run a few AA driver optimizations here and now to cleanup the code.
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-
- MPM.addPass(
- createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
-
- // Require the GlobalsAA analysis for the module so we can query it within
- // MainFPM.
- MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
- // Invalidate AAManager so it can be recreated and pick up the newly available
- // GlobalsAA.
- MPM.addPass(
- createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
-
- FunctionPassManager MainFPM;
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
- EnableMSSALoopDependency, /*UseBlockFrequencyInfo=*/true));
-
- if (RunNewGVN)
- MainFPM.addPass(NewGVNPass());
- else
- MainFPM.addPass(GVN());
-
- // Remove dead memcpy()'s.
- MainFPM.addPass(MemCpyOptPass());
-
- // Nuke dead stores.
- MainFPM.addPass(DSEPass());
- MainFPM.addPass(MergedLoadStoreMotionPass());
-
- // More loops are countable; try to optimize them.
- if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
- MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
-
- if (EnableConstraintElimination)
- MainFPM.addPass(ConstraintEliminationPass());
-
- LoopPassManager LPM;
- LPM.addPass(IndVarSimplifyPass());
- LPM.addPass(LoopDeletionPass());
- // FIXME: Add loop interchange.
-
- // Unroll small loops and perform peeling.
- LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
- /* OnlyWhenForced= */ !PTO.LoopUnrolling,
- PTO.ForgetAllSCEVInLoopUnroll));
- // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
- // *All* loop passes must preserve it, in order to be able to use it.
- MainFPM.addPass(createFunctionToLoopPassAdaptor(
- std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
-
- MainFPM.addPass(LoopDistributePass());
-
- addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
-
- invokePeepholeEPCallbacks(MainFPM, Level);
- MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
-
- // Lower type metadata and the type.test intrinsic. This pass supports
- // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
- // to be run at link time if CFI is enabled. This pass does nothing if
- // CFI is disabled.
- MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
- // Run a second time to clean up any type tests left behind by WPD for use
- // in ICP (which is performed earlier than this in the regular LTO pipeline).
- MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
-
- // Enable splitting late in the FullLTO post-link pipeline. This is done in
- // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
- if (EnableHotColdSplit)
- MPM.addPass(HotColdSplittingPass());
-
- // Add late LTO optimization passes.
- // Delete basic blocks, which optimization passes may have killed.
- MPM.addPass(createModuleToFunctionPassAdaptor(
- SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
-
- // Drop bodies of available eternally objects to improve GlobalDCE.
- MPM.addPass(EliminateAvailableExternallyPass());
-
- // Now that we have optimized the program, discard unreachable functions.
- MPM.addPass(GlobalDCEPass());
-
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
- // Emit annotation remarks.
- addAnnotationRemarksPass(MPM);
-
- return MPM;
-}
-
-ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
- bool LTOPreLink) {
- assert(Level == OptimizationLevel::O0 &&
- "buildO0DefaultPipeline should only be used with O0");
-
- ModulePassManager MPM;
-
- if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
- PGOOpt->Action == PGOOptions::IRUse))
- addPGOInstrPassesForO0(
- MPM,
- /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
- /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
-
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
-
- if (PGOOpt && PGOOpt->DebugInfoForProfiling)
- MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
-
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
-
- // Build a minimal pipeline based on the semantics required by LLVM,
- // which is just that always inlining occurs. Further, disable generating
- // lifetime intrinsics to avoid enabling further optimizations during
- // code generation.
- MPM.addPass(AlwaysInlinerPass(
- /*InsertLifetimeIntrinsics=*/false));
-
- if (PTO.MergeFunctions)
- MPM.addPass(MergeFunctionsPass());
-
- if (EnableMatrix)
- MPM.addPass(
- createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
-
- if (!CGSCCOptimizerLateEPCallbacks.empty()) {
- CGSCCPassManager CGPM;
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(CGPM, Level);
- if (!CGPM.isEmpty())
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- }
- if (!LateLoopOptimizationsEPCallbacks.empty()) {
- LoopPassManager LPM;
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM, Level);
- if (!LPM.isEmpty()) {
- MPM.addPass(createModuleToFunctionPassAdaptor(
- createFunctionToLoopPassAdaptor(std::move(LPM))));
- }
- }
- if (!LoopOptimizerEndEPCallbacks.empty()) {
- LoopPassManager LPM;
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM, Level);
- if (!LPM.isEmpty()) {
- MPM.addPass(createModuleToFunctionPassAdaptor(
- createFunctionToLoopPassAdaptor(std::move(LPM))));
- }
- }
- if (!ScalarOptimizerLateEPCallbacks.empty()) {
- FunctionPassManager FPM;
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
- if (!FPM.isEmpty())
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
- if (!VectorizerStartEPCallbacks.empty()) {
- FunctionPassManager FPM;
- for (auto &C : VectorizerStartEPCallbacks)
- C(FPM, Level);
- if (!FPM.isEmpty())
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
- }
-
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
- CGSCCPassManager CGPM;
- CGPM.addPass(CoroSplitPass());
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
- MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
-
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
-
- if (LTOPreLink)
- addRequiredLTOPreLinkPasses(MPM);
-
- return MPM;
-}
-
-AAManager PassBuilder::buildDefaultAAPipeline() {
- AAManager AA;
-
- // The order in which these are registered determines their priority when
- // being queried.
-
- // First we register the basic alias analysis that provides the majority of
- // per-function local AA logic. This is a stateless, on-demand local set of
- // AA techniques.
- AA.registerFunctionAnalysis<BasicAA>();
-
- // Next we query fast, specialized alias analyses that wrap IR-embedded
- // information about aliasing.
- AA.registerFunctionAnalysis<ScopedNoAliasAA>();
- AA.registerFunctionAnalysis<TypeBasedAA>();
-
- // Add support for querying global aliasing information when available.
- // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
- // analysis, all that the `AAManager` can do is query for any *cached*
- // results from `GlobalsAA` through a readonly proxy.
- AA.registerModuleAnalysis<GlobalsAA>();
-
- // Add target-specific alias analyses.
- if (TM)
- TM->registerDefaultAliasAnalyses(AA);
-
- return AA;
-}
-
static Optional<int> parseRepeatPassName(StringRef Name) {
if (!Name.consume_front("repeat<") || !Name.consume_back(">"))
return None;
@@ -2140,6 +558,83 @@ Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
return UnrollOpts;
}
+Expected<bool> parseSinglePassOption(StringRef Params, StringRef OptionName,
+ StringRef PassName) {
+ bool Result = false;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == OptionName) {
+ Result = true;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid {1} pass parameter '{0}' ", ParamName, PassName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
+Expected<bool> parseInlinerPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "only-mandatory", "InlinerPass");
+}
+
+Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "memssa", "EarlyCSE");
+}
+
+Expected<bool> parseEntryExitInstrumenterPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "post-inline", "EntryExitInstrumenter");
+}
+
+Expected<bool> parseLoopExtractorPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "single", "LoopExtractor");
+}
+
+Expected<bool> parseLowerMatrixIntrinsicsPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "minimal", "LowerMatrixIntrinsics");
+}
+
+Expected<AddressSanitizerOptions> parseASanPassOptions(StringRef Params) {
+ AddressSanitizerOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == "kernel") {
+ Result.CompileKernel = true;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid AddressSanitizer pass parameter '{0}' ", ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
+Expected<HWAddressSanitizerOptions> parseHWASanPassOptions(StringRef Params) {
+ HWAddressSanitizerOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == "recover") {
+ Result.Recover = true;
+ } else if (ParamName == "kernel") {
+ Result.CompileKernel = true;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid HWAddressSanitizer pass parameter '{0}' ", ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
MemorySanitizerOptions Result;
while (!Params.empty()) {
@@ -2349,7 +844,7 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
return true;
if (Name == "cgscc")
return true;
- if (Name == "function")
+ if (Name == "function" || Name == "function<eager-inv>")
return true;
// Explicitly handle custom-parsed pass names.
@@ -2359,6 +854,9 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
#define MODULE_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) \
+ return true;
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
@@ -2372,7 +870,7 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
if (Name == "cgscc")
return true;
- if (Name == "function")
+ if (Name == "function" || Name == "function<eager-inv>")
return true;
// Explicitly handle custom-parsed pass names.
@@ -2384,6 +882,9 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
#define CGSCC_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) \
+ return true;
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \
return true;
@@ -2395,7 +896,7 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
template <typename CallbacksT>
static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
- if (Name == "function")
+ if (Name == "function" || Name == "function<eager-inv>")
return true;
if (Name == "loop" || Name == "loop-mssa")
return true;
@@ -2419,15 +920,41 @@ static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
}
template <typename CallbacksT>
-static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
- // Explicitly handle pass manager names.
- if (Name == "loop" || Name == "loop-mssa")
+static bool isLoopNestPassName(StringRef Name, CallbacksT &Callbacks,
+ bool &UseMemorySSA) {
+ UseMemorySSA = false;
+
+ // Explicitly handle custom-parsed pass names.
+ if (parseRepeatPassName(Name))
return true;
+ if (Name == "lnicm") {
+ UseMemorySSA = true;
+ return true;
+ }
+
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) \
+ return true;
+#include "PassRegistry.def"
+
+ return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks);
+}
+
+template <typename CallbacksT>
+static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks,
+ bool &UseMemorySSA) {
+ UseMemorySSA = false;
+
// Explicitly handle custom-parsed pass names.
if (parseRepeatPassName(Name))
return true;
+ if (Name == "licm") {
+ UseMemorySSA = true;
+ return true;
+ }
+
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) \
return true;
@@ -2520,11 +1047,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
return Error::success();
}
- if (Name == "function") {
+ if (Name == "function" || Name == "function<eager-inv>") {
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
+ Name != "function"));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
@@ -2599,6 +1127,14 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(CREATE_PASS); \
return Error::success(); \
}
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ MPM.addPass(CREATE_PASS(Params.get())); \
+ return Error::success(); \
+ }
#define MODULE_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
MPM.addPass( \
@@ -2616,6 +1152,15 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS)); \
return Error::success(); \
}
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ MPM.addPass( \
+ createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS(Params.get()))); \
+ return Error::success(); \
+ }
#define FUNCTION_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS)); \
@@ -2629,6 +1174,12 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
return Error::success(); \
}
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ MPM.addPass(createModuleToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
MPM.addPass(createModuleToFunctionPassAdaptor( \
@@ -2670,12 +1221,13 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
CGPM.addPass(std::move(NestedCGPM));
return Error::success();
}
- if (Name == "function") {
+ if (Name == "function" || Name == "function<eager-inv>") {
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
- CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+ CGPM.addPass(
+ createCGSCCToFunctionPassAdaptor(std::move(FPM), Name != "function"));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
@@ -2710,6 +1262,14 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
CGPM.addPass(CREATE_PASS); \
return Error::success(); \
}
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ if (checkParametrizedPassName(Name, NAME)) { \
+ auto Params = parsePassParameters(PARSER, Name, NAME); \
+ if (!Params) \
+ return Params.takeError(); \
+ CGPM.addPass(CREATE_PASS(Params.get())); \
+ return Error::success(); \
+ }
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \
if (Name == "require<" NAME ">") { \
CGPM.addPass(RequireAnalysisPass< \
@@ -2736,6 +1296,12 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
return Error::success(); \
}
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
+ createFunctionToLoopPassAdaptor(CREATE_PASS, false, false))); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
CGPM.addPass(createCGSCCToFunctionPassAdaptor( \
@@ -2785,8 +1351,11 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
bool UseMemorySSA = (Name == "loop-mssa");
bool UseBFI = llvm::any_of(
InnerPipeline, [](auto Pipeline) { return Pipeline.Name == "licm"; });
+ bool UseBPI = llvm::any_of(InnerPipeline, [](auto Pipeline) {
+ return Pipeline.Name == "loop-predication";
+ });
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM), UseMemorySSA,
- UseBFI));
+ UseBFI, UseBPI));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
@@ -2837,6 +1406,11 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
// bool UseMemorySSA = !("canon-freeze" || "loop-predication" ||
// "guard-widening");
// The risk is that it may become obsolete if we're not careful.
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS, false, false)); \
@@ -2895,6 +1469,11 @@ Error PassBuilder::parseLoopPass(LoopPassManager &LPM,
}
// Now expand the basic registered passes from the .inc file.
+#define LOOPNEST_PASS(NAME, CREATE_PASS) \
+ if (Name == NAME) { \
+ LPM.addPass(CREATE_PASS); \
+ return Error::success(); \
+ }
#define LOOP_PASS(NAME, CREATE_PASS) \
if (Name == NAME) { \
LPM.addPass(CREATE_PASS); \
@@ -3016,13 +1595,20 @@ Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
StringRef FirstName = Pipeline->front().Name;
if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) {
+ bool UseMemorySSA;
if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) {
Pipeline = {{"cgscc", std::move(*Pipeline)}};
} else if (isFunctionPassName(FirstName,
FunctionPipelineParsingCallbacks)) {
Pipeline = {{"function", std::move(*Pipeline)}};
- } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) {
- Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
+ } else if (isLoopNestPassName(FirstName, LoopPipelineParsingCallbacks,
+ UseMemorySSA)) {
+ Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop",
+ std::move(*Pipeline)}}}};
+ } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks,
+ UseMemorySSA)) {
+ Pipeline = {{"function", {{UseMemorySSA ? "loop-mssa" : "loop",
+ std::move(*Pipeline)}}}};
} else {
for (auto &C : TopLevelPipelineParsingCallbacks)
if (C(MPM, *Pipeline))
@@ -3172,6 +1758,11 @@ void PassBuilder::printPassNames(raw_ostream &OS) {
#define MODULE_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
+ OS << "Module passes with params:\n";
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ printPassName(NAME, PARAMS, OS);
+#include "PassRegistry.def"
+
OS << "Module analyses:\n";
#define MODULE_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
@@ -3184,6 +1775,11 @@ void PassBuilder::printPassNames(raw_ostream &OS) {
#define CGSCC_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
+ OS << "CGSCC passes with params:\n";
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS) \
+ printPassName(NAME, PARAMS, OS);
+#include "PassRegistry.def"
+
OS << "CGSCC analyses:\n";
#define CGSCC_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
@@ -3205,6 +1801,10 @@ void PassBuilder::printPassNames(raw_ostream &OS) {
#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
+ OS << "LoopNest passes:\n";
+#define LOOPNEST_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
+#include "PassRegistry.def"
+
OS << "Loop passes:\n";
#define LOOP_PASS(NAME, CREATE_PASS) printPassName(NAME, OS);
#include "PassRegistry.def"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
new file mode 100644
index 000000000000..ac5dfdbdd540
--- /dev/null
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -0,0 +1,1798 @@
+//===- Construction of pass pipelines -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of the PassBuilder based on our
+/// static pass registry as well as related functionality. It also provides
+/// helpers to aid in analyzing, debugging, and testing passes and pass
+/// pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Passes/OptimizationLevel.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/PGOOptions.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/Coroutines/CoroCleanup.h"
+#include "llvm/Transforms/Coroutines/CoroEarly.h"
+#include "llvm/Transforms/Coroutines/CoroElide.h"
+#include "llvm/Transforms/Coroutines/CoroSplit.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/Annotation2Metadata.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/IPO/CalledValuePropagation.h"
+#include "llvm/Transforms/IPO/ConstantMerge.h"
+#include "llvm/Transforms/IPO/CrossDSOCFI.h"
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
+#include "llvm/Transforms/IPO/ElimAvailExtern.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
+#include "llvm/Transforms/IPO/GlobalOpt.h"
+#include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
+#include "llvm/Transforms/IPO/IROutliner.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/MergeFunctions.h"
+#include "llvm/Transforms/IPO/ModuleInliner.h"
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
+#include "llvm/Transforms/IPO/PartialInlining.h"
+#include "llvm/Transforms/IPO/SCCP.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
+#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
+#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
+#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
+#include "llvm/Transforms/Scalar/BDCE.h"
+#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
+#include "llvm/Transforms/Scalar/ConstraintElimination.h"
+#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
+#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
+#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
+#include "llvm/Transforms/Scalar/DivRemPairs.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/Float2Int.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/IndVarSimplify.h"
+#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopDeletion.h"
+#include "llvm/Transforms/Scalar/LoopDistribute.h"
+#include "llvm/Transforms/Scalar/LoopFlatten.h"
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
+#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
+#include "llvm/Transforms/Scalar/LoopInterchange.h"
+#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopRotation.h"
+#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+#include "llvm/Transforms/Scalar/LoopSink.h"
+#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/NewGVN.h"
+#include "llvm/Transforms/Scalar/Reassociate.h"
+#include "llvm/Transforms/Scalar/SCCP.h"
+#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
+#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+#include "llvm/Transforms/Utils/InjectTLIMappings.h"
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
+#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
+#include "llvm/Transforms/Vectorize/VectorCombine.h"
+
+using namespace llvm;
+
+static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
+ "enable-ml-inliner", cl::init(InliningAdvisorMode::Default), cl::Hidden,
+ cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
+ cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
+ "Heuristics-based inliner version."),
+ clEnumValN(InliningAdvisorMode::Development, "development",
+ "Use development mode (runtime-loadable model)."),
+ clEnumValN(InliningAdvisorMode::Release, "release",
+ "Use release mode (AOT-compiled model).")));
+
+static cl::opt<bool> EnableSyntheticCounts(
+ "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Run synthetic function entry count generation "
+ "pass"));
+
+/// Flag to enable inline deferral during PGO.
+static cl::opt<bool>
+ EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
+ cl::Hidden,
+ cl::desc("Enable inline deferral during PGO"));
+
+static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::init(false),
+ cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Enable memory profiler"));
+
+static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
+ cl::init(false), cl::Hidden,
+ cl::desc("Enable module inliner"));
+
+static cl::opt<bool> PerformMandatoryInliningsFirst(
+ "mandatory-inlining-first", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+ cl::desc("Perform mandatory inlinings module-wide, before performing "
+ "inlining."));
+
+static cl::opt<bool> EnableO3NonTrivialUnswitching(
+ "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable non-trivial loop unswitching for -O3"));
+
+static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
+ "eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
+ cl::desc("Eagerly invalidate more analyses in default pipelines"));
+
+static cl::opt<bool> EnableNoRerunSimplificationPipeline(
+ "enable-no-rerun-simplification-pipeline", cl::init(false), cl::Hidden,
+ cl::desc(
+ "Prevent running the simplification pipeline on a function more "
+ "than once in the case that SCC mutations cause a function to be "
+ "visited multiple times as long as the function has not been changed"));
+
+PipelineTuningOptions::PipelineTuningOptions() {
+ LoopInterleaving = true;
+ LoopVectorization = true;
+ SLPVectorization = false;
+ LoopUnrolling = true;
+ ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
+ LicmMssaOptCap = SetLicmMssaOptCap;
+ LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
+ CallGraphProfile = true;
+ MergeFunctions = false;
+ EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
+}
+
+namespace llvm {
+
+extern cl::opt<unsigned> MaxDevirtIterations;
+extern cl::opt<bool> EnableConstraintElimination;
+extern cl::opt<bool> EnableFunctionSpecialization;
+extern cl::opt<bool> EnableGVNHoist;
+extern cl::opt<bool> EnableGVNSink;
+extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableIROutliner;
+extern cl::opt<bool> EnableOrderFileInstrumentation;
+extern cl::opt<bool> EnableCHR;
+extern cl::opt<bool> EnableLoopInterchange;
+extern cl::opt<bool> EnableUnrollAndJam;
+extern cl::opt<bool> EnableLoopFlatten;
+extern cl::opt<bool> EnableDFAJumpThreading;
+extern cl::opt<bool> RunNewGVN;
+extern cl::opt<bool> RunPartialInlining;
+extern cl::opt<bool> ExtraVectorizerPasses;
+
+extern cl::opt<bool> FlattenedProfileUsed;
+
+extern cl::opt<AttributorRunOption> AttributorRun;
+extern cl::opt<bool> EnableKnowledgeRetention;
+
+extern cl::opt<bool> EnableMatrix;
+
+extern cl::opt<bool> DisablePreInliner;
+extern cl::opt<int> PreInlineThreshold;
+} // namespace llvm
+
+void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
+ OptimizationLevel Level) {
+ for (auto &C : PeepholeEPCallbacks)
+ C(FPM, Level);
+}
+
+// Helper to add AnnotationRemarksPass.
+static void addAnnotationRemarksPass(ModulePassManager &MPM) {
+ FunctionPassManager FPM;
+ FPM.addPass(AnnotationRemarksPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+}
+
+// Helper to check if the current compilation phase is preparing for LTO
+static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
+ return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
+ Phase == ThinOrFullLTOPhase::FullLTOPreLink;
+}
+
+// TODO: Investigate the cost/benefit of tail call elimination on debugging.
+FunctionPassManager
+PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+
+ FunctionPassManager FPM;
+
+ // Form SSA out of local memory accesses after breaking apart aggregates into
+ // scalars.
+ FPM.addPass(SROAPass());
+
+ // Catch trivial redundancies
+ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+
+ // Hoisting of scalars and load expressions.
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+
+ FPM.addPass(LibCallsShrinkWrapPass());
+
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ FPM.addPass(SimplifyCFGPass());
+
+ // Form canonically associated expression trees, and simplify the trees using
+ // basic mathematical properties. For example, this will form (nearly)
+ // minimal multiplication trees.
+ FPM.addPass(ReassociatePass());
+
+ // Add the primary loop simplification pipeline.
+ // FIXME: Currently this is split into two loop pass pipelines because we run
+ // some function passes in between them. These can and should be removed
+ // and/or replaced by scheduling the loop pass equivalents in the correct
+ // positions. But those equivalent passes aren't powerful enough yet.
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+ // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+ // `LoopInstSimplify`.
+ LoopPassManager LPM1, LPM2;
+
+ // Simplify the loop body. We do this initially to clean up after other loop
+ // passes run, either when iterating on a loop or on inner loops with
+ // implications on the outer loop.
+ LPM1.addPass(LoopInstSimplifyPass());
+ LPM1.addPass(LoopSimplifyCFGPass());
+
+ // Try to remove as much code from the loop header as possible,
+ // to reduce amount of IR that will have to be duplicated.
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+
+ LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
+ isLTOPreLink(Phase)));
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(SimpleLoopUnswitchPass());
+
+ LPM2.addPass(LoopIdiomRecognizePass());
+ LPM2.addPass(IndVarSimplifyPass());
+
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM2, Level);
+
+ LPM2.addPass(LoopDeletionPass());
+
+ if (EnableLoopInterchange)
+ LPM2.addPass(LoopInterchangePass());
+
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+ // because it changes IR to makes profile annotation in back compile
+ // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+ // attributes so we need to make sure and allow the full unroll pass to pay
+ // attention to it.
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse)
+ LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll));
+
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM2, Level);
+
+ // We provide the opt remark emitter pass for LICM to use. We only need to do
+ // this once as it is immutable.
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
+ /*UseMemorySSA=*/true,
+ /*UseBlockFrequencyInfo=*/true));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ if (EnableLoopFlatten)
+ FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+ // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
+ /*UseMemorySSA=*/false,
+ /*UseBlockFrequencyInfo=*/false));
+
+ // Delete small array after loop unroll.
+ FPM.addPass(SROAPass());
+
+ // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+ FPM.addPass(MemCpyOptPass());
+
+ // Sparse conditional constant propagation.
+ // FIXME: It isn't clear why we do this *after* loop passes rather than
+ // before...
+ FPM.addPass(SCCPPass());
+
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ FPM.addPass(BDCEPass());
+
+ // Run instcombine after redundancy and dead bit elimination to exploit
+ // opportunities opened up by them.
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ FPM.addPass(CoroElidePass());
+
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+
+ // Finally, do an expensive DCE pass to catch all the dead code exposed by
+ // the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
+ FPM.addPass(ADCEPass());
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ return FPM;
+}
+
+FunctionPassManager
+PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
+
+ // The O1 pipeline has a separate pipeline creation function to simplify
+ // construction readability.
+ if (Level.getSpeedupLevel() == 1)
+ return buildO1FunctionSimplificationPipeline(Level, Phase);
+
+ FunctionPassManager FPM;
+
+ // Form SSA out of local memory accesses after breaking apart aggregates into
+ // scalars.
+ FPM.addPass(SROAPass());
+
+ // Catch trivial redundancies
+ FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+ if (EnableKnowledgeRetention)
+ FPM.addPass(AssumeSimplifyPass());
+
+ // Hoisting of scalars and load expressions.
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
+ // Global value numbering based sinking.
+ if (EnableGVNSink) {
+ FPM.addPass(GVNSinkPass());
+ FPM.addPass(SimplifyCFGPass());
+ }
+
+ if (EnableConstraintElimination)
+ FPM.addPass(ConstraintEliminationPass());
+
+ // Speculative execution if the target has divergent branches; otherwise nop.
+ FPM.addPass(SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
+
+ // Optimize based on known information about branches, and cleanup afterward.
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+
+ FPM.addPass(SimplifyCFGPass());
+ if (Level == OptimizationLevel::O3)
+ FPM.addPass(AggressiveInstCombinePass());
+ FPM.addPass(InstCombinePass());
+
+ if (!Level.isOptimizingForSize())
+ FPM.addPass(LibCallsShrinkWrapPass());
+
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
+ // using the size value profile. Don't perform this when optimizing for size.
+ if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
+ !Level.isOptimizingForSize())
+ FPM.addPass(PGOMemOPSizeOpt());
+
+ FPM.addPass(TailCallElimPass());
+ FPM.addPass(SimplifyCFGPass());
+
+ // Form canonically associated expression trees, and simplify the trees using
+ // basic mathematical properties. For example, this will form (nearly)
+ // minimal multiplication trees.
+ FPM.addPass(ReassociatePass());
+
+ // Add the primary loop simplification pipeline.
+ // FIXME: Currently this is split into two loop pass pipelines because we run
+ // some function passes in between them. These can and should be removed
+ // and/or replaced by scheduling the loop pass equivalents in the correct
+ // positions. But those equivalent passes aren't powerful enough yet.
+ // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+ // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+ // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+ // `LoopInstSimplify`.
+ LoopPassManager LPM1, LPM2;
+
+ // Simplify the loop body. We do this initially to clean up after other loop
+ // passes run, either when iterating on a loop or on inner loops with
+ // implications on the outer loop.
+ LPM1.addPass(LoopInstSimplifyPass());
+ LPM1.addPass(LoopSimplifyCFGPass());
+
+ // Try to remove as much code from the loop header as possible,
+ // to reduce amount of IR that will have to be duplicated.
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+
+ // Disable header duplication in loop rotation at -Oz.
+ LPM1.addPass(
+ LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
+ // TODO: Investigate promotion cap for O1.
+ LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM1.addPass(
+ SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
+ EnableO3NonTrivialUnswitching));
+ LPM2.addPass(LoopIdiomRecognizePass());
+ LPM2.addPass(IndVarSimplifyPass());
+
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM2, Level);
+
+ LPM2.addPass(LoopDeletionPass());
+
+ if (EnableLoopInterchange)
+ LPM2.addPass(LoopInterchangePass());
+
+ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+ // because it changes IR to makes profile annotation in back compile
+ // inaccurate. The normal unroller doesn't pay attention to forced full unroll
+ // attributes so we need to make sure and allow the full unroll pass to pay
+ // attention to it.
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
+ PGOOpt->Action != PGOOptions::SampleUse)
+ LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll));
+
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM2, Level);
+
+ // We provide the opt remark emitter pass for LICM to use. We only need to do
+ // this once as it is immutable.
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
+ /*UseMemorySSA=*/true,
+ /*UseBlockFrequencyInfo=*/true));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ if (EnableLoopFlatten)
+ FPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+ // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
+ // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2),
+ /*UseMemorySSA=*/false,
+ /*UseBlockFrequencyInfo=*/false));
+
+ // Delete small array after loop unroll.
+ FPM.addPass(SROAPass());
+
+ // The matrix extension can introduce large vector operations early, which can
+ // benefit from running vector-combine early on.
+ if (EnableMatrix)
+ FPM.addPass(VectorCombinePass(/*ScalarizationOnly=*/true));
+
+ // Eliminate redundancies.
+ FPM.addPass(MergedLoadStoreMotionPass());
+ if (RunNewGVN)
+ FPM.addPass(NewGVNPass());
+ else
+ FPM.addPass(GVNPass());
+
+ // Sparse conditional constant propagation.
+ // FIXME: It isn't clear why we do this *after* loop passes rather than
+ // before...
+ FPM.addPass(SCCPPass());
+
+ // Delete dead bit computations (instcombine runs after to fold away the dead
+ // computations, and then ADCE will run later to exploit any new DCE
+ // opportunities that creates).
+ FPM.addPass(BDCEPass());
+
+ // Run instcombine after redundancy and dead bit elimination to exploit
+ // opportunities opened up by them.
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ // Re-consider control flow based optimizations after redundancy elimination,
+ // redo DCE, etc.
+ if (EnableDFAJumpThreading && Level.getSizeLevel() == 0)
+ FPM.addPass(DFAJumpThreadingPass());
+
+ FPM.addPass(JumpThreadingPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+
+ // Finally, do an expensive DCE pass to catch all the dead code exposed by
+ // the simplifications and basic cleanup after all the simplifications.
+ // TODO: Investigate if this is too expensive.
+ FPM.addPass(ADCEPass());
+
+ // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+ FPM.addPass(MemCpyOptPass());
+
+ FPM.addPass(DSEPass());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+
+ FPM.addPass(CoroElidePass());
+
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+
+ FPM.addPass(SimplifyCFGPass(
+ SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
+ (PGOOpt->Action == PGOOptions::IRUse ||
+ PGOOpt->Action == PGOOptions::SampleUse))
+ FPM.addPass(ControlHeightReductionPass());
+
+ return FPM;
+}
+
+void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
+ MPM.addPass(CanonicalizeAliasesPass());
+ MPM.addPass(NameAnonGlobalPass());
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
+ OptimizationLevel Level, bool RunProfileGen,
+ bool IsCS, std::string ProfileFile,
+ std::string ProfileRemappingFile) {
+ assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
+ if (!IsCS && !DisablePreInliner) {
+ InlineParams IP;
+
+ IP.DefaultThreshold = PreInlineThreshold;
+
+ // FIXME: The hint threshold has the same value used by the regular inliner
+ // when not optimzing for size. This should probably be lowered after
+ // performance testing.
+ // FIXME: this comment is cargo culted from the old pass manager, revisit).
+ IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
+ ModuleInlinerWrapperPass MIWP(IP);
+ CGSCCPassManager &CGPipeline = MIWP.getPM();
+
+ FunctionPassManager FPM;
+ FPM.addPass(SROAPass());
+ FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies.
+ FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
+ FPM.addPass(InstCombinePass()); // Combine silly sequences.
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ std::move(FPM), PTO.EagerlyInvalidateAnalyses));
+
+ MPM.addPass(std::move(MIWP));
+
+ // Delete anything that is now dead to make sure that we don't instrument
+ // dead code. Instrumentation can end up keeping dead code around and
+ // dramatically increase code size.
+ MPM.addPass(GlobalDCEPass());
+ }
+
+ if (!RunProfileGen) {
+ assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
+ MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ return;
+ }
+
+ // Perform PGO instrumentation.
+ MPM.addPass(PGOInstrumentationGen(IsCS));
+
+ FunctionPassManager FPM;
+ // Disable header duplication in loop rotation at -Oz.
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LoopRotatePass(Level != OptimizationLevel::Oz), /*UseMemorySSA=*/false,
+ /*UseBlockFrequencyInfo=*/false));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ if (!ProfileFile.empty())
+ Options.InstrProfileOutput = ProfileFile;
+ // Do counter promotion at Level greater than O0.
+ Options.DoCounterPromotion = true;
+ Options.UseBFIInPromotion = IsCS;
+ MPM.addPass(InstrProfiling(Options, IsCS));
+}
+
+void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
+ bool RunProfileGen, bool IsCS,
+ std::string ProfileFile,
+ std::string ProfileRemappingFile) {
+ if (!RunProfileGen) {
+ assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
+ MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ return;
+ }
+
+ // Perform PGO instrumentation.
+ MPM.addPass(PGOInstrumentationGen(IsCS));
+ // Add the profile lowering pass.
+ InstrProfOptions Options;
+ if (!ProfileFile.empty())
+ Options.InstrProfileOutput = ProfileFile;
+ // Do not do counter promotion at O0.
+ Options.DoCounterPromotion = false;
+ Options.UseBFIInPromotion = IsCS;
+ MPM.addPass(InstrProfiling(Options, IsCS));
+}
+
+static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
+ return getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel());
+}
+
+ModuleInlinerWrapperPass
+PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ IP.HotCallSiteThreshold = 0;
+
+ if (PGOOpt)
+ IP.EnableDeferral = EnablePGOInlineDeferral;
+
+ ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
+ UseInlineAdvisor, MaxDevirtIterations);
+
+ // Require the GlobalsAA analysis for the module so we can query it within
+ // the CGSCC pipeline.
+ MIWP.addModulePass(RequireAnalysisPass<GlobalsAA, Module>());
+ // Invalidate AAManager so it can be recreated and pick up the newly available
+ // GlobalsAA.
+ MIWP.addModulePass(
+ createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+
+ // Require the ProfileSummaryAnalysis for the module so we can query it within
+ // the inliner pass.
+ MIWP.addModulePass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+
+ // Now begin the main postorder CGSCC pipeline.
+ // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+ // manager and trying to emulate its precise behavior. Much of this doesn't
+ // make a lot of sense and we should revisit the core CGSCC structure.
+ CGSCCPassManager &MainCGPipeline = MIWP.getPM();
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+
+ if (AttributorRun & AttributorRunOption::CGSCC)
+ MainCGPipeline.addPass(AttributorCGSCCPass());
+
+ // Now deduce any function attributes based in the current code.
+ MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+ // When at O3 add argument promotion to the pass pipeline.
+ // FIXME: It isn't at all clear why this should be limited to O3.
+ if (Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(ArgumentPromotionPass());
+
+ // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
+ // there are no OpenMP runtime calls present in the module.
+ if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
+ MainCGPipeline.addPass(OpenMPOptCGSCCPass());
+
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(MainCGPipeline, Level);
+
+ // Lastly, add the core function simplification pipeline nested inside the
+ // CGSCC walk.
+ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ buildFunctionSimplificationPipeline(Level, Phase),
+ PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline));
+
+ MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
+
+ if (EnableNoRerunSimplificationPipeline)
+ MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
+ InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
+
+ return MIWP;
+}
+
+ModuleInlinerPass
+PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ InlineParams IP = getInlineParamsFromOptLevel(Level);
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ IP.HotCallSiteThreshold = 0;
+
+ if (PGOOpt)
+ IP.EnableDeferral = EnablePGOInlineDeferral;
+
+ // The inline deferral logic is used to avoid losing some
+ // inlining chance in future. It is helpful in SCC inliner, in which
+ // inlining is processed in bottom-up order.
+ // While in module inliner, the inlining order is a priority-based order
+ // by default. The inline deferral is unnecessary there. So we disable the
+ // inline deferral logic in module inliner.
+ IP.EnableDeferral = false;
+
+ return ModuleInlinerPass(IP, UseInlineAdvisor);
+}
+
+ModulePassManager
+PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ ModulePassManager MPM;
+
+ // Place pseudo probe instrumentation as the first pass of the pipeline to
+ // minimize the impact of optimization changes.
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+ Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
+ MPM.addPass(SampleProfileProbePass(TM));
+
+ bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
+
+ // In ThinLTO mode, when flattened profile is used, all the available
+ // profile information will be annotated in PreLink phase so there is
+ // no need to load the profile again in PostLink.
+ bool LoadSampleProfile =
+ HasSampleProfile &&
+ !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
+
+ // During the ThinLTO backend phase we perform early indirect call promotion
+ // here, before globalopt. Otherwise imported available_externally functions
+ // look unreferenced and are removed. If we are going to load the sample
+ // profile then defer until later.
+ // TODO: See if we can move later and consolidate with the location where
+ // we perform ICP when we are loading a sample profile.
+ // TODO: We pass HasSampleProfile (whether there was a sample profile file
+ // passed to the compile) to the SamplePGO flag of ICP. This is used to
+ // determine whether the new direct calls are annotated with prof metadata.
+ // Ideally this should be determined from whether the IR is annotated with
+ // sample profile, and not whether the a sample profile was provided on the
+ // command line. E.g. for flattened profiles where we will not be reloading
+ // the sample profile in the ThinLTO backend, we ideally shouldn't have to
+ // provide the sample profile file.
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
+ MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
+
+ // Do basic inference of function attributes from known properties of system
+ // libraries and other oracles.
+ MPM.addPass(InferFunctionAttrsPass());
+
+ // Create an early function pass manager to cleanup the output of the
+ // frontend.
+ FunctionPassManager EarlyFPM;
+ // Lower llvm.expect to metadata before attempting transforms.
+ // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
+ EarlyFPM.addPass(LowerExpectIntrinsicPass());
+ EarlyFPM.addPass(SimplifyCFGPass());
+ EarlyFPM.addPass(SROAPass());
+ EarlyFPM.addPass(EarlyCSEPass());
+ EarlyFPM.addPass(CoroEarlyPass());
+ if (Level == OptimizationLevel::O3)
+ EarlyFPM.addPass(CallSiteSplittingPass());
+
+ // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
+ // to convert bitcast to direct calls so that they can be inlined during the
+ // profile annotation prepration step.
+ // More details about SamplePGO design can be found in:
+ // https://research.google.com/pubs/pub45290.html
+ // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
+ if (LoadSampleProfile)
+ EarlyFPM.addPass(InstCombinePass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ if (LoadSampleProfile) {
+ // Annotate sample profile right after early FPM to ensure freshness of
+ // the debug info.
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile, Phase));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ // Do not invoke ICP in the LTOPrelink phase as it makes it hard
+ // for the profile annotation to be accurate in the LTO backend.
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
+ Phase != ThinOrFullLTOPhase::FullLTOPreLink)
+ // We perform early indirect call promotion here, before globalopt.
+ // This is important for the ThinLTO backend phase because otherwise
+ // imported available_externally functions look unreferenced and are
+ // removed.
+ MPM.addPass(
+ PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
+ }
+
+ // Try to perform OpenMP specific optimizations on the module. This is a
+ // (quick!) no-op if there are no OpenMP runtime calls present in the module.
+ if (Level != OptimizationLevel::O0)
+ MPM.addPass(OpenMPOptPass());
+
+ if (AttributorRun & AttributorRunOption::MODULE)
+ MPM.addPass(AttributorPass());
+
+ // Lower type metadata and the type.test intrinsic in the ThinLTO
+ // post link pipeline after ICP. This is to enable usage of the type
+ // tests in ICP sequences.
+ if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ for (auto &C : PipelineEarlySimplificationEPCallbacks)
+ C(MPM, Level);
+
+ // Specialize functions with IPSCCP.
+ if (EnableFunctionSpecialization && Level == OptimizationLevel::O3)
+ MPM.addPass(FunctionSpecializationPass());
+
+ // Interprocedural constant propagation now that basic cleanup has occurred
+ // and prior to optimizing globals.
+ // FIXME: This position in the pipeline hasn't been carefully considered in
+ // years, it should be re-analyzed.
+ MPM.addPass(IPSCCPPass());
+
+ // Attach metadata to indirect call sites indicating the set of functions
+ // they may target at run-time. This should follow IPSCCP.
+ MPM.addPass(CalledValuePropagationPass());
+
+ // Optimize globals to try and fold them into constants.
+ MPM.addPass(GlobalOptPass());
+
+ // Promote any localized globals to SSA registers.
+ // FIXME: Should this instead by a run of SROA?
+ // FIXME: We should probably run instcombine and simplifycfg afterward to
+ // delete control flows that are dead once globals have been folded to
+ // constants.
+ MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+ // Remove any dead arguments exposed by cleanups and constant folding
+ // globals.
+ MPM.addPass(DeadArgumentEliminationPass());
+
+ // Create a small function pass pipeline to cleanup after all the global
+ // optimizations.
+ FunctionPassManager GlobalCleanupPM;
+ GlobalCleanupPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
+
+ GlobalCleanupPM.addPass(SimplifyCFGPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Add all the requested passes for instrumentation PGO, if requested.
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+ (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse)) {
+ addPGOInstrPasses(MPM, Level,
+ /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
+ /* IsCS */ false, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ MPM.addPass(PGOIndirectCallPromotion(false, false));
+ }
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+ PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
+
+ // Synthesize function entry counts for non-PGO compilation.
+ if (EnableSyntheticCounts && !PGOOpt)
+ MPM.addPass(SyntheticCountsPropagation());
+
+ if (EnableModuleInliner)
+ MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
+ else
+ MPM.addPass(buildInlinerPipeline(Level, Phase));
+
+ if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+ MPM.addPass(ModuleMemProfilerPass());
+ }
+
+ return MPM;
+}
+
+/// TODO: Should LTO cause any differences to this set of passes?
+void PassBuilder::addVectorPasses(OptimizationLevel Level,
+ FunctionPassManager &FPM, bool IsFullLTO) {
+ FPM.addPass(LoopVectorizePass(
+ LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
+
+ if (IsFullLTO) {
+ // The vectorizer may have significantly shortened a loop body; unroll
+ // again. Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && PTO.LoopUnrolling)
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LoopUnrollAndJamPass(Level.getSpeedupLevel())));
+ FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+ Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
+ FPM.addPass(WarnMissedTransformationsPass());
+ }
+
+ if (!IsFullLTO) {
+ // Eliminate loads by forwarding stores from the previous iteration to loads
+ // of the current iteration.
+ FPM.addPass(LoopLoadEliminationPass());
+ }
+ // Cleanup after the loop optimization passes.
+ FPM.addPass(InstCombinePass());
+
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ // At higher optimization levels, try to clean up any runtime overlap and
+ // alignment checks inserted by the vectorizer. We want to track correlated
+ // runtime checks for two inner loops in the same outer loop, fold any
+ // common computations, hoist loop-invariant aspects out of any outer loop,
+ // and unswitch the runtime checks if possible. Once hoisted, we may have
+ // dead (or speculatable) control flows or more combining opportunities.
+ FPM.addPass(EarlyCSEPass());
+ FPM.addPass(CorrelatedValuePropagationPass());
+ FPM.addPass(InstCombinePass());
+ LoopPassManager LPM;
+ LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+ LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
+ OptimizationLevel::O3));
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(
+ createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
+ /*UseBlockFrequencyInfo=*/true));
+ FPM.addPass(SimplifyCFGPass());
+ FPM.addPass(InstCombinePass());
+ }
+
+ // Now that we've formed fast to execute loop structures, we do further
+ // optimizations. These are run afterward as they might block doing complex
+ // analyses and transforms such as what are needed for loop vectorization.
+
+ // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+ // GVN, loop transforms, and others have already run, so it's now better to
+ // convert to more optimized IR using more aggressive simplify CFG options.
+ // The extra sinking transform can create larger basic blocks, so do this
+ // before SLP vectorization.
+ FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+ .forwardSwitchCondToPhi(true)
+ .convertSwitchToLookupTable(true)
+ .needCanonicalLoops(false)
+ .hoistCommonInsts(true)
+ .sinkCommonInsts(true)));
+
+ if (IsFullLTO) {
+ FPM.addPass(SCCPPass());
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(BDCEPass());
+ }
+
+ // Optimize parallel scalar instruction chains into SIMD instructions.
+ if (PTO.SLPVectorization) {
+ FPM.addPass(SLPVectorizerPass());
+ if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
+ FPM.addPass(EarlyCSEPass());
+ }
+ }
+ // Enhance/cleanup vector code.
+ FPM.addPass(VectorCombinePass());
+
+ if (!IsFullLTO) {
+ FPM.addPass(InstCombinePass());
+ // Unroll small loops to hide loop backedge latency and saturate any
+ // parallel execution resources of an out-of-order processor. We also then
+ // need to clean up redundancies and loop invariant code.
+ // FIXME: It would be really good to use a loop-integrated instruction
+ // combiner for cleanup here so that the unrolling and LICM can be pipelined
+ // across the loop nests.
+ // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+ if (EnableUnrollAndJam && PTO.LoopUnrolling) {
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LoopUnrollAndJamPass(Level.getSpeedupLevel())));
+ }
+ FPM.addPass(LoopUnrollPass(LoopUnrollOptions(
+ Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll)));
+ FPM.addPass(WarnMissedTransformationsPass());
+ FPM.addPass(InstCombinePass());
+ FPM.addPass(
+ RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+ }
+
+ // Now that we've vectorized and unrolled loops, we may have more refined
+ // alignment information, try to re-derive it here.
+ FPM.addPass(AlignmentFromAssumptionsPass());
+
+ if (IsFullLTO)
+ FPM.addPass(InstCombinePass());
+}
+
+ModulePassManager
+PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
+ ModulePassManager MPM;
+
+ // Optimize globals now that the module is fully simplified.
+ MPM.addPass(GlobalOptPass());
+ MPM.addPass(GlobalDCEPass());
+
+ // Run partial inlining pass to partially inline functions that have
+ // large bodies.
+ if (RunPartialInlining)
+ MPM.addPass(PartialInlinerPass());
+
+ // Remove avail extern fns and globals definitions since we aren't compiling
+ // an object file for later LTO. For LTO we want to preserve these so they
+ // are eligible for inlining at link-time. Note if they are unreferenced they
+ // will be removed by GlobalDCE later, so this only impacts referenced
+ // available externally globals. Eventually they will be suppressed during
+ // codegen, but eliminating here enables more opportunity for GlobalDCE as it
+ // may make globals referenced by available external functions dead and saves
+ // running remaining passes on the eliminated functions. These should be
+ // preserved during prelinking for link-time inlining decisions.
+ if (!LTOPreLink)
+ MPM.addPass(EliminateAvailableExternallyPass());
+
+ if (EnableOrderFileInstrumentation)
+ MPM.addPass(InstrOrderFilePass());
+
+ // Do RPO function attribute inference across the module to forward-propagate
+ // attributes where applicable.
+ // FIXME: Is this really an optimization rather than a canonicalization?
+ MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+ // Do a post inline PGO instrumentation and use pass. This is a context
+ // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
+ // cross-module inline has not been done yet. The context sensitive
+ // instrumentation is after all the inlines are done.
+ if (!LTOPreLink && PGOOpt) {
+ if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
+ /* IsCS */ true, PGOOpt->CSProfileGenFile,
+ PGOOpt->ProfileRemappingFile);
+ else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
+ /* IsCS */ true, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ }
+
+ // Re-require GloblasAA here prior to function passes. This is particularly
+ // useful as the above will have inlined, DCE'ed, and function-attr
+ // propagated everything. We should at this point have a reasonably minimal
+ // and richly annotated call graph. By computing aliasing and mod/ref
+ // information for all local globals here, the late loop passes and notably
+ // the vectorizer will be able to use them to help recognize vectorizable
+ // memory operations.
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+
+ FunctionPassManager OptimizePM;
+ OptimizePM.addPass(Float2IntPass());
+ OptimizePM.addPass(LowerConstantIntrinsicsPass());
+
+ if (EnableMatrix) {
+ OptimizePM.addPass(LowerMatrixIntrinsicsPass());
+ OptimizePM.addPass(EarlyCSEPass());
+ }
+
+ // FIXME: We need to run some loop optimizations to re-rotate loops after
+ // simplifycfg and others undo their rotation.
+
+ // Optimize the loop execution. These passes operate on entire loop nests
+ // rather than on each loop in an inside-out manner, and so they are actually
+ // function passes.
+
+ for (auto &C : VectorizerStartEPCallbacks)
+ C(OptimizePM, Level);
+
+ LoopPassManager LPM;
+ // First rotate loops that may have been un-rotated by prior passes.
+ // Disable header duplication at -Oz.
+ LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
+ // Some loops may have become dead by now. Try to delete them.
+ // FIXME: see disscussion in https://reviews.llvm.org/D112851
+ // this may need to be revisited once GVN is more powerful.
+ LPM.addPass(LoopDeletionPass());
+ OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
+
+ // Distribute loops to allow partial vectorization. I.e. isolate dependences
+ // into separate loop that would otherwise inhibit vectorization. This is
+ // currently only performed for loops marked with the metadata
+ // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
+ OptimizePM.addPass(LoopDistributePass());
+
+ // Populates the VFABI attribute with the scalar-to-vector mappings
+ // from the TargetLibraryInfo.
+ OptimizePM.addPass(InjectTLIMappings());
+
+ addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false);
+
+ // Split out cold code. Splitting is done late to avoid hiding context from
+ // other optimizations and inadvertently regressing performance. The tradeoff
+ // is that this has a higher code size cost than splitting early.
+ if (EnableHotColdSplit && !LTOPreLink)
+ MPM.addPass(HotColdSplittingPass());
+
+ // Search the code for similar regions of code. If enough similar regions can
+ // be found where extracting the regions into their own function will decrease
+ // the size of the program, we extract the regions, a deduplicate the
+ // structurally similar regions.
+ if (EnableIROutliner)
+ MPM.addPass(IROutlinerPass());
+
+ // Merge functions if requested.
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ // LoopSink pass sinks instructions hoisted by LICM, which serves as a
+ // canonicalization pass that enables other optimizations. As a result,
+ // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
+ // result too early.
+ OptimizePM.addPass(LoopSinkPass());
+
+ // And finally clean up LCSSA form before generating code.
+ OptimizePM.addPass(InstSimplifyPass());
+
+ // This hoists/decomposes div/rem ops. It should run after other sink/hoist
+ // passes to avoid re-sinking, but before SimplifyCFG because it can allow
+ // flattening of blocks.
+ OptimizePM.addPass(DivRemPairsPass());
+
+ // LoopSink (and other loop passes since the last simplifyCFG) might have
+ // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+ OptimizePM.addPass(SimplifyCFGPass());
+
+ OptimizePM.addPass(CoroCleanupPass());
+
+ // Add the core optimizing pipeline.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ if (PTO.CallGraphProfile)
+ MPM.addPass(CGProfilePass());
+
+ // Now we need to do some global optimization transforms.
+ // FIXME: It would seem like these should come first in the optimization
+ // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
+ // ordering here.
+ MPM.addPass(GlobalDCEPass());
+ MPM.addPass(ConstantMergePass());
+
+ // TODO: Relative look table converter pass caused an issue when full lto is
+ // enabled. See https://reviews.llvm.org/D94355 for more details.
+ // Until the issue fixed, disable this pass during pre-linking phase.
+ if (!LTOPreLink)
+ MPM.addPass(RelLookupTableConverterPass());
+
+ return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
+
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Apply module pipeline start EP callback.
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+ MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+ // Add the core simplification pipeline.
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink
+ : ThinOrFullLTOPhase::None));
+
+ // Now add the optimization pipeline.
+ MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
+
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ MPM.addPass(PseudoProbeUpdatePass());
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ if (LTOPreLink)
+ addRequiredLTOPreLinkPasses(MPM);
+
+ return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
+
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+ MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+ // Apply module pipeline start EP callback.
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+
+ // If we are planning to perform ThinLTO later, we don't bloat the code with
+ // unrolling/vectorization/... now. Just simplify the module as much as we
+ // can.
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, ThinOrFullLTOPhase::ThinLTOPreLink));
+
+ // Run partial inlining pass to partially inline functions that have
+ // large bodies.
+ // FIXME: It isn't clear whether this is really the right place to run this
+ // in ThinLTO. Because there is another canonicalization and simplification
+ // phase that will run after the thin link, running this here ends up with
+ // less information than will be available later and it may grow functions in
+ // ways that aren't beneficial.
+ if (RunPartialInlining)
+ MPM.addPass(PartialInlinerPass());
+
+ // Reduce the size of the IR as much as possible.
+ MPM.addPass(GlobalOptPass());
+
+ // Module simplification splits coroutines, but does not fully clean up
+ // coroutine intrinsics. To ensure ThinLTO optimization passes don't trip up
+ // on these, we schedule the cleanup here.
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
+ PGOOpt->Action == PGOOptions::SampleUse)
+ MPM.addPass(PseudoProbeUpdatePass());
+
+ // Handle OptimizerLastEPCallbacks added by clang on PreLink. Actual
+ // optimization is going to be done in PostLink stage, but clang can't
+ // add callbacks there in case of in-process ThinLTO called by linker.
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ addRequiredLTOPreLinkPasses(MPM);
+
+ return MPM;
+}
+
+ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
+ OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ if (ImportSummary) {
+ // These passes import type identifier resolutions for whole-program
+ // devirtualization and CFI. They must run early because other passes may
+ // disturb the specific instruction patterns that these passes look for,
+ // creating dependencies on resolutions that may not appear in the summary.
+ //
+ // For example, GVN may transform the pattern assume(type.test) appearing in
+ // two basic blocks into assume(phi(type.test, type.test)), which would
+ // transform a dependency on a WPD resolution into a dependency on a type
+ // identifier resolution for CFI.
+ //
+ // Also, WPD has access to more precise information than ICP and can
+ // devirtualize more effectively, so it should operate on the IR first.
+ //
+ // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+ // metadata and intrinsics.
+ MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
+ MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
+ }
+
+ if (Level == OptimizationLevel::O0) {
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP.
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+ // Drop available_externally and unreferenced globals. This is necessary
+ // with ThinLTO in order to avoid leaving undefined references to dead
+ // globals in the object file.
+ MPM.addPass(EliminateAvailableExternallyPass());
+ MPM.addPass(GlobalDCEPass());
+ return MPM;
+ }
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Add the core simplification pipeline.
+ MPM.addPass(buildModuleSimplificationPipeline(
+ Level, ThinOrFullLTOPhase::ThinLTOPostLink));
+
+ // Now add the optimization pipeline.
+ MPM.addPass(buildModuleOptimizationPipeline(Level));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
+ assert(Level != OptimizationLevel::O0 &&
+ "Must request optimizations for the default pipeline!");
+ // FIXME: We should use a customized pre-link pipeline!
+ return buildPerModuleDefaultPipeline(Level,
+ /* LTOPreLink */ true);
+}
+
+ModulePassManager
+PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
+ ModuleSummaryIndex *ExportSummary) {
+ ModulePassManager MPM;
+
+ // Convert @llvm.global.annotations to !annotation metadata.
+ MPM.addPass(Annotation2MetadataPass());
+
+ // Create a function that performs CFI checks for cross-DSO calls with targets
+ // in the current module.
+ MPM.addPass(CrossDSOCFIPass());
+
+ if (Level == OptimizationLevel::O0) {
+ // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+ // metadata and intrinsics.
+ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP.
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+ }
+
+ if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
+ // Load sample profile before running the LTO optimization pipeline.
+ MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile,
+ ThinOrFullLTOPhase::FullLTOPostLink));
+ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+ // RequireAnalysisPass for PSI before subsequent non-module passes.
+ MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+ }
+
+ // Remove unused virtual tables to improve the quality of code generated by
+ // whole-program devirtualization and bitset lowering.
+ MPM.addPass(GlobalDCEPass());
+
+ // Force any function attributes we want the rest of the pipeline to observe.
+ MPM.addPass(ForceFunctionAttrsPass());
+
+ // Do basic inference of function attributes from known properties of system
+ // libraries and other oracles.
+ MPM.addPass(InferFunctionAttrsPass());
+
+ if (Level.getSpeedupLevel() > 1) {
+ FunctionPassManager EarlyFPM;
+ EarlyFPM.addPass(CallSiteSplittingPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
+
+ // Indirect call promotion. This should promote all the targets that are
+ // left by the earlier promotion pass that promotes intra-module targets.
+ // This two-step promotion is to save the compile time. For LTO, it should
+ // produce the same result as if we only do promotion here.
+ MPM.addPass(PGOIndirectCallPromotion(
+ true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
+
+ if (EnableFunctionSpecialization && Level == OptimizationLevel::O3)
+ MPM.addPass(FunctionSpecializationPass());
+ // Propagate constants at call sites into the functions they call. This
+ // opens opportunities for globalopt (and inlining) by substituting function
+ // pointers passed as arguments to direct uses of functions.
+ MPM.addPass(IPSCCPPass());
+
+ // Attach metadata to indirect call sites indicating the set of functions
+ // they may target at run-time. This should follow IPSCCP.
+ MPM.addPass(CalledValuePropagationPass());
+ }
+
+ // Now deduce any function attributes based in the current code.
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+
+ // Do RPO function attribute inference across the module to forward-propagate
+ // attributes where applicable.
+ // FIXME: Is this really an optimization rather than a canonicalization?
+ MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+ // Use in-range annotations on GEP indices to split globals where beneficial.
+ MPM.addPass(GlobalSplitPass());
+
+ // Run whole program optimization of virtual call when the list of callees
+ // is fixed.
+ MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+
+ // Stop here at -O1.
+ if (Level == OptimizationLevel::O1) {
+ // The LowerTypeTestsPass needs to run to lower type metadata and the
+ // type.test intrinsics. The pass does nothing if CFI is disabled.
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO
+ // pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+ }
+
+ // Optimize globals to try and fold them into constants.
+ MPM.addPass(GlobalOptPass());
+
+ // Promote any localized globals to SSA registers.
+ MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+ // Linking modules together can lead to duplicate global constant, only
+ // keep one copy of each constant.
+ MPM.addPass(ConstantMergePass());
+
+ // Remove unused arguments from functions.
+ MPM.addPass(DeadArgumentEliminationPass());
+
+ // Reduce the code after globalopt and ipsccp. Both can open up significant
+ // simplification opportunities, and both can propagate functions through
+ // function pointers. When this happens, we often have to resolve varargs
+ // calls, etc, so let instcombine do this.
+ FunctionPassManager PeepholeFPM;
+ if (Level == OptimizationLevel::O3)
+ PeepholeFPM.addPass(AggressiveInstCombinePass());
+ PeepholeFPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(PeepholeFPM, Level);
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Note: historically, the PruneEH pass was run first to deduce nounwind and
+ // generally clean up exception handling overhead. It isn't clear this is
+ // valuable as the inliner doesn't currently care whether it is inlining an
+ // invoke or a call.
+ // Run the inliner now.
+ MPM.addPass(ModuleInlinerWrapperPass(getInlineParamsFromOptLevel(Level)));
+
+ // Optimize globals again after we ran the inliner.
+ MPM.addPass(GlobalOptPass());
+
+ // Garbage collect dead functions.
+ MPM.addPass(GlobalDCEPass());
+
+ // If we didn't decide to inline a function, check to see if we can
+ // transform it to pass arguments by value instead of by reference.
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
+
+ FunctionPassManager FPM;
+ // The IPO Passes may leave cruft around. Clean up after them.
+ FPM.addPass(InstCombinePass());
+ invokePeepholeEPCallbacks(FPM, Level);
+
+ FPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+
+ // Do a post inline PGO instrumentation and use pass. This is a context
+ // sensitive PGO pass.
+ if (PGOOpt) {
+ if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
+ /* IsCS */ true, PGOOpt->CSProfileGenFile,
+ PGOOpt->ProfileRemappingFile);
+ else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+ addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
+ /* IsCS */ true, PGOOpt->ProfileFile,
+ PGOOpt->ProfileRemappingFile);
+ }
+
+ // Break up allocas
+ FPM.addPass(SROAPass());
+
+ // LTO provides additional opportunities for tailcall elimination due to
+ // link-time inlining, and visibility of nocapture attribute.
+ FPM.addPass(TailCallElimPass());
+
+ // Run a few AA driver optimizations here and now to cleanup the code.
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ MPM.addPass(
+ createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
+
+ // Require the GlobalsAA analysis for the module so we can query it within
+ // MainFPM.
+ MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+ // Invalidate AAManager so it can be recreated and pick up the newly available
+ // GlobalsAA.
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(InvalidateAnalysisPass<AAManager>()));
+
+ FunctionPassManager MainFPM;
+ MainFPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+ /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+
+ if (RunNewGVN)
+ MainFPM.addPass(NewGVNPass());
+ else
+ MainFPM.addPass(GVNPass());
+
+ // Remove dead memcpy()'s.
+ MainFPM.addPass(MemCpyOptPass());
+
+ // Nuke dead stores.
+ MainFPM.addPass(DSEPass());
+ MainFPM.addPass(MergedLoadStoreMotionPass());
+
+ // More loops are countable; try to optimize them.
+ if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
+ MainFPM.addPass(createFunctionToLoopPassAdaptor(LoopFlattenPass()));
+
+ if (EnableConstraintElimination)
+ MainFPM.addPass(ConstraintEliminationPass());
+
+ LoopPassManager LPM;
+ LPM.addPass(IndVarSimplifyPass());
+ LPM.addPass(LoopDeletionPass());
+ // FIXME: Add loop interchange.
+
+ // Unroll small loops and perform peeling.
+ LPM.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(),
+ /* OnlyWhenForced= */ !PTO.LoopUnrolling,
+ PTO.ForgetAllSCEVInLoopUnroll));
+ // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
+ // *All* loop passes must preserve it, in order to be able to use it.
+ MainFPM.addPass(createFunctionToLoopPassAdaptor(
+ std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
+
+ MainFPM.addPass(LoopDistributePass());
+
+ addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
+
+ invokePeepholeEPCallbacks(MainFPM, Level);
+ MainFPM.addPass(JumpThreadingPass(/*InsertFreezeWhenUnfoldingSelect*/ true));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM),
+ PTO.EagerlyInvalidateAnalyses));
+
+ // Lower type metadata and the type.test intrinsic. This pass supports
+ // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
+ // to be run at link time if CFI is enabled. This pass does nothing if
+ // CFI is disabled.
+ MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+ // Run a second time to clean up any type tests left behind by WPD for use
+ // in ICP (which is performed earlier than this in the regular LTO pipeline).
+ MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
+ // Enable splitting late in the FullLTO post-link pipeline. This is done in
+ // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
+ if (EnableHotColdSplit)
+ MPM.addPass(HotColdSplittingPass());
+
+ // Add late LTO optimization passes.
+ // Delete basic blocks, which optimization passes may have killed.
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true))));
+
+ // Drop bodies of available eternally objects to improve GlobalDCE.
+ MPM.addPass(EliminateAvailableExternallyPass());
+
+ // Now that we have optimized the program, discard unreachable functions.
+ MPM.addPass(GlobalDCEPass());
+
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ // Emit annotation remarks.
+ addAnnotationRemarksPass(MPM);
+
+ return MPM;
+}
+
+ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
+ bool LTOPreLink) {
+ assert(Level == OptimizationLevel::O0 &&
+ "buildO0DefaultPipeline should only be used with O0");
+
+ ModulePassManager MPM;
+
+ // Perform pseudo probe instrumentation in O0 mode. This is for the
+ // consistency between different build modes. For example, a LTO build can be
+ // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
+ // the postlink will require pseudo probe instrumentation in the prelink.
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(SampleProfileProbePass(TM));
+
+ if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
+ PGOOpt->Action == PGOOptions::IRUse))
+ addPGOInstrPassesForO0(
+ MPM,
+ /* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
+ /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
+
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+
+ if (PGOOpt && PGOOpt->DebugInfoForProfiling)
+ MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+ for (auto &C : PipelineEarlySimplificationEPCallbacks)
+ C(MPM, Level);
+
+ // Build a minimal pipeline based on the semantics required by LLVM,
+ // which is just that always inlining occurs. Further, disable generating
+ // lifetime intrinsics to avoid enabling further optimizations during
+ // code generation.
+ MPM.addPass(AlwaysInlinerPass(
+ /*InsertLifetimeIntrinsics=*/false));
+
+ if (PTO.MergeFunctions)
+ MPM.addPass(MergeFunctionsPass());
+
+ if (EnableMatrix)
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(LowerMatrixIntrinsicsPass(true)));
+
+ if (!CGSCCOptimizerLateEPCallbacks.empty()) {
+ CGSCCPassManager CGPM;
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(CGPM, Level);
+ if (!CGPM.isEmpty())
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+ }
+ if (!LateLoopOptimizationsEPCallbacks.empty()) {
+ LoopPassManager LPM;
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM, Level);
+ if (!LPM.isEmpty()) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ createFunctionToLoopPassAdaptor(std::move(LPM))));
+ }
+ }
+ if (!LoopOptimizerEndEPCallbacks.empty()) {
+ LoopPassManager LPM;
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM, Level);
+ if (!LPM.isEmpty()) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ createFunctionToLoopPassAdaptor(std::move(LPM))));
+ }
+ }
+ if (!ScalarOptimizerLateEPCallbacks.empty()) {
+ FunctionPassManager FPM;
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+ if (!FPM.isEmpty())
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+ if (!VectorizerStartEPCallbacks.empty()) {
+ FunctionPassManager FPM;
+ for (auto &C : VectorizerStartEPCallbacks)
+ C(FPM, Level);
+ if (!FPM.isEmpty())
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+ }
+
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroEarlyPass()));
+ CGSCCPassManager CGPM;
+ CGPM.addPass(CoroSplitPass());
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+
+ if (LTOPreLink)
+ addRequiredLTOPreLinkPasses(MPM);
+
+ return MPM;
+}
+
+AAManager PassBuilder::buildDefaultAAPipeline() {
+ AAManager AA;
+
+ // The order in which these are registered determines their priority when
+ // being queried.
+
+ // First we register the basic alias analysis that provides the majority of
+ // per-function local AA logic. This is a stateless, on-demand local set of
+ // AA techniques.
+ AA.registerFunctionAnalysis<BasicAA>();
+
+ // Next we query fast, specialized alias analyses that wrap IR-embedded
+ // information about aliasing.
+ AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+ AA.registerFunctionAnalysis<TypeBasedAA>();
+
+ // Add support for querying global aliasing information when available.
+ // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
+ // analysis, all that the `AAManager` can do is query for any *cached*
+ // results from `GlobalsAA` through a readonly proxy.
+ AA.registerModuleAnalysis<GlobalsAA>();
+
+ // Add target-specific alias analyses.
+ if (TM)
+ TM->registerDefaultAliasAnalyses(AA);
+
+ return AA;
+}
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 7525d59f94a5..c2032b5b8276 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -60,8 +60,6 @@ MODULE_PASS("globaldce", GlobalDCEPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
-MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
-MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
MODULE_PASS("inferattrs", InferFunctionAttrsPass())
MODULE_PASS("inliner-wrapper", ModuleInlinerWrapperPass())
MODULE_PASS("inliner-wrapper-no-mandatory-first", ModuleInlinerWrapperPass(
@@ -75,7 +73,6 @@ MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("ipsccp", IPSCCPPass())
MODULE_PASS("iroutliner", IROutlinerPass())
MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
-MODULE_PASS("loop-extract", LoopExtractorPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
MODULE_PASS("metarenamer", MetaRenamerPass())
MODULE_PASS("mergefunc", MergeFunctionsPass())
@@ -101,7 +98,6 @@ MODULE_PASS("rpo-function-attrs", ReversePostOrderFunctionAttrsPass())
MODULE_PASS("sample-profile", SampleProfileLoaderPass())
MODULE_PASS("scc-oz-module-inliner",
buildInlinerPipeline(OptimizationLevel::Oz, ThinOrFullLTOPhase::None))
-MODULE_PASS("loop-extract-single", LoopExtractorPass(1))
MODULE_PASS("strip", StripSymbolsPass())
MODULE_PASS("strip-dead-debug-info", StripDeadDebugInfoPass())
MODULE_PASS("pseudo-probe", SampleProfileProbePass(TM))
@@ -113,16 +109,43 @@ MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
MODULE_PASS("verify", VerifierPass())
MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
MODULE_PASS("dfsan", DataFlowSanitizerPass())
-MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
-MODULE_PASS("msan-module", MemorySanitizerPass({}))
-MODULE_PASS("tsan-module", ThreadSanitizerPass())
-MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
+MODULE_PASS("msan-module", ModuleMemorySanitizerPass({}))
+MODULE_PASS("module-inline", ModuleInlinerPass())
+MODULE_PASS("tsan-module", ModuleThreadSanitizerPass())
MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("poison-checking", PoisonCheckingPass())
MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
#undef MODULE_PASS
+#ifndef MODULE_PASS_WITH_PARAMS
+#define MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
+#endif
+MODULE_PASS_WITH_PARAMS("loop-extract",
+ "LoopExtractorPass",
+ [](bool Single) {
+ if (Single)
+ return LoopExtractorPass(1);
+ return LoopExtractorPass();
+ },
+ parseLoopExtractorPassOptions,
+ "single")
+MODULE_PASS_WITH_PARAMS("hwasan",
+ "HWAddressSanitizerPass",
+ [](HWAddressSanitizerOptions Opts) {
+ return HWAddressSanitizerPass(Opts);
+ },
+ parseHWASanPassOptions,
+ "kernel;recover")
+MODULE_PASS_WITH_PARAMS("asan-module",
+ "ModuleAddressSanitizerPass",
+ [](AddressSanitizerOptions Opts) {
+ return ModuleAddressSanitizerPass(Opts);
+ },
+ parseASanPassOptions,
+ "kernel")
+#undef MODULE_PASS_WITH_PARAMS
+
#ifndef CGSCC_ANALYSIS
#define CGSCC_ANALYSIS(NAME, CREATE_PASS)
#endif
@@ -138,12 +161,23 @@ CGSCC_PASS("argpromotion", ArgumentPromotionPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass())
-CGSCC_PASS("inline", InlinerPass())
CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
CGSCC_PASS("coro-split", CoroSplitPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
#undef CGSCC_PASS
+#ifndef CGSCC_PASS_WITH_PARAMS
+#define CGSCC_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
+#endif
+CGSCC_PASS_WITH_PARAMS("inline",
+ "InlinerPass",
+ [](bool OnlyMandatory) {
+ return InlinerPass(OnlyMandatory);
+ },
+ parseInlinerPassOptions,
+ "only-mandatory")
+#undef CGSCC_PASS_WITH_PARAMS
+
#ifndef FUNCTION_ANALYSIS
#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#endif
@@ -167,6 +201,7 @@ FUNCTION_ANALYSIS("regions", RegionInfoAnalysis())
FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
+FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis())
FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
FUNCTION_ANALYSIS("targetir",
@@ -217,12 +252,8 @@ FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("dot-cfg", CFGPrinterPass())
FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
-FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
-FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
-FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false))
FUNCTION_PASS("fix-irreducible", FixIrreduciblePass())
FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
-FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true))
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
FUNCTION_PASS("gvn-sink", GVNSinkPass())
FUNCTION_PASS("helloworld", HelloWorldPass())
@@ -242,8 +273,6 @@ FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass())
-FUNCTION_PASS("lower-matrix-intrinsics", LowerMatrixIntrinsicsPass())
-FUNCTION_PASS("lower-matrix-intrinsics-minimal", LowerMatrixIntrinsicsPass(true))
FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
FUNCTION_PASS("guard-widening", GuardWideningPass())
FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
@@ -273,6 +302,7 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
+FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
@@ -286,6 +316,7 @@ FUNCTION_PASS("print<inliner-size-estimator>",
InlineSizeEstimatorAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
+FUNCTION_PASS("print<memoryssa-walker>", MemorySSAWalkerPrinterPass(dbgs()))
FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
@@ -306,7 +337,7 @@ FUNCTION_PASS("sink", SinkingPass())
FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
-FUNCTION_PASS("sroa", SROA())
+FUNCTION_PASS("sroa", SROAPass())
FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
@@ -322,10 +353,6 @@ FUNCTION_PASS("verify<scalar-evolution>", ScalarEvolutionVerifierPass())
FUNCTION_PASS("view-cfg", CFGViewerPass())
FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("asan", AddressSanitizerPass(false, false, false))
-FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false))
-FUNCTION_PASS("msan", MemorySanitizerPass({}))
-FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("memprof", MemProfilerPass())
#undef FUNCTION_PASS
@@ -333,6 +360,27 @@ FUNCTION_PASS("memprof", MemProfilerPass())
#ifndef FUNCTION_PASS_WITH_PARAMS
#define FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#endif
+FUNCTION_PASS_WITH_PARAMS("early-cse",
+ "EarlyCSEPass",
+ [](bool UseMemorySSA) {
+ return EarlyCSEPass(UseMemorySSA);
+ },
+ parseEarlyCSEPassOptions,
+ "memssa")
+FUNCTION_PASS_WITH_PARAMS("ee-instrument",
+ "EntryExitInstrumenterPass",
+ [](bool PostInlining) {
+ return EntryExitInstrumenterPass(PostInlining);
+ },
+ parseEntryExitInstrumenterPassOptions,
+ "post-inline")
+FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics",
+ "LowerMatrixIntrinsicsPass",
+ [](bool Minimal) {
+ return LowerMatrixIntrinsicsPass(Minimal);
+ },
+ parseLowerMatrixIntrinsicsPassOptions,
+ "minimal")
FUNCTION_PASS_WITH_PARAMS("loop-unroll",
"LoopUnrollPass",
[](LoopUnrollOptions Opts) {
@@ -345,6 +393,13 @@ FUNCTION_PASS_WITH_PARAMS("loop-unroll",
"no-profile-peeling;profile-peeling;"
"no-runtime;runtime;"
"no-upperbound;upperbound")
+FUNCTION_PASS_WITH_PARAMS("asan",
+ "AddressSanitizerPass",
+ [](AddressSanitizerOptions Opts) {
+ return AddressSanitizerPass(Opts);
+ },
+ parseASanPassOptions,
+ "kernel")
FUNCTION_PASS_WITH_PARAMS("msan",
"MemorySanitizerPass",
[](MemorySanitizerOptions Opts) {
@@ -381,9 +436,9 @@ FUNCTION_PASS_WITH_PARAMS("mldst-motion",
parseMergedLoadStoreMotionOptions,
"no-split-footer-bb;split-footer-bb")
FUNCTION_PASS_WITH_PARAMS("gvn",
- "GVN",
+ "GVNPass",
[](GVNOptions Opts) {
- return GVN(Opts);
+ return GVNPass(Opts);
},
parseGVNOptions,
"no-pre;pre;"
@@ -399,6 +454,16 @@ FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
"may;must")
#undef FUNCTION_PASS_WITH_PARAMS
+#ifndef LOOPNEST_PASS
+#define LOOPNEST_PASS(NAME, CREATE_PASS)
+#endif
+LOOPNEST_PASS("lnicm", LNICMPass())
+LOOPNEST_PASS("loop-flatten", LoopFlattenPass())
+LOOPNEST_PASS("loop-interchange", LoopInterchangePass())
+LOOPNEST_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
+LOOPNEST_PASS("no-op-loopnest", NoOpLoopNestPass())
+#undef LOOPNEST_PASS
+
#ifndef LOOP_ANALYSIS
#define LOOP_ANALYSIS(NAME, CREATE_PASS)
#endif
@@ -416,11 +481,8 @@ LOOP_PASS("canon-freeze", CanonicalizeFreezeInLoopsPass())
LOOP_PASS("dot-ddg", DDGDotPrinterPass())
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
LOOP_PASS("licm", LICMPass())
-LOOP_PASS("lnicm", LNICMPass())
-LOOP_PASS("loop-flatten", LoopFlattenPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
-LOOP_PASS("loop-interchange", LoopInterchangePass())
LOOP_PASS("loop-rotate", LoopRotatePass())
LOOP_PASS("no-op-loop", NoOpLoopPass())
LOOP_PASS("print", PrintLoopPass(dbgs()))
@@ -428,7 +490,6 @@ LOOP_PASS("loop-deletion", LoopDeletionPass())
LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
LOOP_PASS("loop-reduce", LoopStrengthReducePass())
LOOP_PASS("indvars", IndVarSimplifyPass())
-LOOP_PASS("loop-unroll-and-jam", LoopUnrollAndJamPass())
LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index 5a48923bce8a..8e6be6730ea4 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -29,10 +29,14 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Program.h"
+#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
+#include <unordered_map>
#include <unordered_set>
+#include <utility>
#include <vector>
using namespace llvm;
@@ -40,10 +44,11 @@ using namespace llvm;
cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG(
"verify-cfg-preserved", cl::Hidden,
#ifdef NDEBUG
- cl::init(false));
+ cl::init(false)
#else
- cl::init(true));
+ cl::init(true)
#endif
+ );
// An option that prints out the IR after passes, similar to
// -print-after-all except that it only prints the IR after passes that
@@ -79,7 +84,9 @@ enum class ChangePrinter {
PrintChangedDiffVerbose,
PrintChangedDiffQuiet,
PrintChangedColourDiffVerbose,
- PrintChangedColourDiffQuiet
+ PrintChangedColourDiffQuiet,
+ PrintChangedDotCfgVerbose,
+ PrintChangedDotCfgQuiet
};
static cl::opt<ChangePrinter> PrintChanged(
"print-changed", cl::desc("Print changed IRs"), cl::Hidden,
@@ -95,6 +102,10 @@ static cl::opt<ChangePrinter> PrintChanged(
"Display patch-like changes with color"),
clEnumValN(ChangePrinter::PrintChangedColourDiffQuiet, "cdiff-quiet",
"Display patch-like changes in quiet mode with color"),
+ clEnumValN(ChangePrinter::PrintChangedDotCfgVerbose, "dot-cfg",
+ "Create a website with graphical changes"),
+ clEnumValN(ChangePrinter::PrintChangedDotCfgQuiet, "dot-cfg-quiet",
+ "Create a website with graphical changes in quiet mode"),
// Sentinel value for unspecified option.
clEnumValN(ChangePrinter::PrintChangedVerbose, "", "")));
@@ -119,6 +130,40 @@ static cl::opt<std::string>
DiffBinary("print-changed-diff-path", cl::Hidden, cl::init("diff"),
cl::desc("system diff used by change reporters"));
+// An option for specifying the dot used by
+// print-changed=[dot-cfg | dot-cfg-quiet]
+static cl::opt<std::string>
+ DotBinary("print-changed-dot-path", cl::Hidden, cl::init("dot"),
+ cl::desc("system dot used by change reporters"));
+
+// An option that determines the colour used for elements that are only
+// in the before part. Must be a colour named in appendix J of
+// https://graphviz.org/pdf/dotguide.pdf
+cl::opt<std::string>
+ BeforeColour("dot-cfg-before-color",
+ cl::desc("Color for dot-cfg before elements."), cl::Hidden,
+ cl::init("red"));
+// An option that determines the colour used for elements that are only
+// in the after part. Must be a colour named in appendix J of
+// https://graphviz.org/pdf/dotguide.pdf
+cl::opt<std::string> AfterColour("dot-cfg-after-color",
+ cl::desc("Color for dot-cfg after elements."),
+ cl::Hidden, cl::init("forestgreen"));
+// An option that determines the colour used for elements that are in both
+// the before and after parts. Must be a colour named in appendix J of
+// https://graphviz.org/pdf/dotguide.pdf
+cl::opt<std::string>
+ CommonColour("dot-cfg-common-color",
+ cl::desc("Color for dot-cfg common elements."), cl::Hidden,
+ cl::init("black"));
+
+// An option that determines where the generated website file (named
+// passes.html) and the associated pdf files (named diff_*.pdf) are saved.
+static cl::opt<std::string> DotCfgDir(
+ "dot-cfg-dir",
+ cl::desc("Generate dot files into specified directory for changed IRs"),
+ cl::Hidden, cl::init("./"));
+
namespace {
// Perform a system based diff between \p Before and \p After, using
@@ -166,7 +211,8 @@ std::string doSystemDiff(StringRef Before, StringRef After,
SmallString<128> ULF =
formatv("--unchanged-line-format={0}", UnchangedLineFormat);
- StringRef Args[] = {"-w", "-d", OLF, NLF, ULF, FileName[0], FileName[1]};
+ StringRef Args[] = {DiffBinary, "-w", "-d", OLF,
+ NLF, ULF, FileName[0], FileName[1]};
Optional<StringRef> Redirects[] = {None, StringRef(FileName[2]), None};
int Result = sys::ExecuteAndWait(*DiffExe, Args, None, Redirects);
if (Result < 0)
@@ -230,10 +276,9 @@ void printIR(raw_ostream &OS, const Function *F) {
OS << *F;
}
-void printIR(raw_ostream &OS, const Module *M,
- bool ShouldPreserveUseListOrder = false) {
+void printIR(raw_ostream &OS, const Module *M) {
if (isFunctionInPrintList("*") || forcePrintModuleIR()) {
- M->print(OS, nullptr, ShouldPreserveUseListOrder);
+ M->print(OS, nullptr);
} else {
for (const auto &F : M->functions()) {
printIR(OS, &F);
@@ -323,21 +368,20 @@ bool shouldPrintIR(Any IR) {
/// Generic IR-printing helper that unpacks a pointer to IRUnit wrapped into
/// llvm::Any and does actual print job.
-void unwrapAndPrint(raw_ostream &OS, Any IR,
- bool ShouldPreserveUseListOrder = false) {
+void unwrapAndPrint(raw_ostream &OS, Any IR) {
if (!shouldPrintIR(IR))
return;
if (forcePrintModuleIR()) {
auto *M = unwrapModule(IR);
assert(M && "should have unwrapped module");
- printIR(OS, M, ShouldPreserveUseListOrder);
+ printIR(OS, M);
return;
}
if (any_isa<const Module *>(IR)) {
const Module *M = any_cast<const Module *>(IR);
- printIR(OS, M, ShouldPreserveUseListOrder);
+ printIR(OS, M);
return;
}
@@ -368,20 +412,46 @@ bool isIgnored(StringRef PassID) {
"DevirtSCCRepeatedPass", "ModuleInlinerWrapperPass"});
}
+std::string makeHTMLReady(StringRef SR) {
+ std::string S;
+ while (true) {
+ StringRef Clean =
+ SR.take_until([](char C) { return C == '<' || C == '>'; });
+ S.append(Clean.str());
+ SR = SR.drop_front(Clean.size());
+ if (SR.size() == 0)
+ return S;
+ S.append(SR[0] == '<' ? "&lt;" : "&gt;");
+ SR = SR.drop_front();
+ }
+ llvm_unreachable("problems converting string to HTML");
+}
+
+// Return the module when that is the appropriate level of comparison for \p IR.
+const Module *getModuleForComparison(Any IR) {
+ if (any_isa<const Module *>(IR))
+ return any_cast<const Module *>(IR);
+ if (any_isa<const LazyCallGraph::SCC *>(IR))
+ return any_cast<const LazyCallGraph::SCC *>(IR)
+ ->begin()
+ ->getFunction()
+ .getParent();
+ return nullptr;
+}
+
} // namespace
-template <typename IRUnitT>
-ChangeReporter<IRUnitT>::~ChangeReporter<IRUnitT>() {
+template <typename T> ChangeReporter<T>::~ChangeReporter<T>() {
assert(BeforeStack.empty() && "Problem with Change Printer stack.");
}
-template <typename IRUnitT>
-bool ChangeReporter<IRUnitT>::isInterestingFunction(const Function &F) {
+template <typename T>
+bool ChangeReporter<T>::isInterestingFunction(const Function &F) {
return isFunctionInPrintList(F.getName());
}
-template <typename IRUnitT>
-bool ChangeReporter<IRUnitT>::isInterestingPass(StringRef PassID) {
+template <typename T>
+bool ChangeReporter<T>::isInterestingPass(StringRef PassID) {
if (isIgnored(PassID))
return false;
@@ -392,8 +462,8 @@ bool ChangeReporter<IRUnitT>::isInterestingPass(StringRef PassID) {
// Return true when this is a pass on IR for which printing
// of changes is desired.
-template <typename IRUnitT>
-bool ChangeReporter<IRUnitT>::isInteresting(Any IR, StringRef PassID) {
+template <typename T>
+bool ChangeReporter<T>::isInteresting(Any IR, StringRef PassID) {
if (!isInterestingPass(PassID))
return false;
if (any_isa<const Function *>(IR))
@@ -401,8 +471,8 @@ bool ChangeReporter<IRUnitT>::isInteresting(Any IR, StringRef PassID) {
return true;
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::saveIRBeforePass(Any IR, StringRef PassID) {
+template <typename T>
+void ChangeReporter<T>::saveIRBeforePass(Any IR, StringRef PassID) {
// Always need to place something on the stack because invalidated passes
// are not given the IR so it cannot be determined whether the pass was for
// something that was filtered out.
@@ -418,12 +488,12 @@ void ChangeReporter<IRUnitT>::saveIRBeforePass(Any IR, StringRef PassID) {
}
// Save the IR representation on the stack.
- IRUnitT &Data = BeforeStack.back();
+ T &Data = BeforeStack.back();
generateIRRepresentation(IR, PassID, Data);
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::handleIRAfterPass(Any IR, StringRef PassID) {
+template <typename T>
+void ChangeReporter<T>::handleIRAfterPass(Any IR, StringRef PassID) {
assert(!BeforeStack.empty() && "Unexpected empty stack encountered.");
std::string Name = getIRName(IR);
@@ -436,13 +506,13 @@ void ChangeReporter<IRUnitT>::handleIRAfterPass(Any IR, StringRef PassID) {
handleFiltered(PassID, Name);
} else {
// Get the before rep from the stack
- IRUnitT &Before = BeforeStack.back();
+ T &Before = BeforeStack.back();
// Create the after rep
- IRUnitT After;
+ T After;
generateIRRepresentation(IR, PassID, After);
// Was there a change in IR?
- if (same(Before, After)) {
+ if (Before == After) {
if (VerboseMode)
omitAfter(PassID, Name);
} else
@@ -451,8 +521,8 @@ void ChangeReporter<IRUnitT>::handleIRAfterPass(Any IR, StringRef PassID) {
BeforeStack.pop_back();
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::handleInvalidatedPass(StringRef PassID) {
+template <typename T>
+void ChangeReporter<T>::handleInvalidatedPass(StringRef PassID) {
assert(!BeforeStack.empty() && "Unexpected empty stack encountered.");
// Always flag it as invalidated as we cannot determine when
@@ -464,8 +534,8 @@ void ChangeReporter<IRUnitT>::handleInvalidatedPass(StringRef PassID) {
BeforeStack.pop_back();
}
-template <typename IRUnitT>
-void ChangeReporter<IRUnitT>::registerRequiredCallbacks(
+template <typename T>
+void ChangeReporter<T>::registerRequiredCallbacks(
PassInstrumentationCallbacks &PIC) {
PIC.registerBeforeNonSkippedPassCallback(
[this](StringRef P, Any IR) { saveIRBeforePass(IR, P); });
@@ -480,50 +550,40 @@ void ChangeReporter<IRUnitT>::registerRequiredCallbacks(
});
}
-ChangedBlockData::ChangedBlockData(const BasicBlock &B)
- : Label(B.getName().str()) {
- raw_string_ostream SS(Body);
- B.print(SS, nullptr, true, true);
-}
-
-template <typename IRUnitT>
-TextChangeReporter<IRUnitT>::TextChangeReporter(bool Verbose)
- : ChangeReporter<IRUnitT>(Verbose), Out(dbgs()) {}
+template <typename T>
+TextChangeReporter<T>::TextChangeReporter(bool Verbose)
+ : ChangeReporter<T>(Verbose), Out(dbgs()) {}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleInitialIR(Any IR) {
+template <typename T> void TextChangeReporter<T>::handleInitialIR(Any IR) {
// Always print the module.
// Unwrap and print directly to avoid filtering problems in general routines.
auto *M = unwrapModule(IR, /*Force=*/true);
assert(M && "Expected module to be unwrapped when forced.");
Out << "*** IR Dump At Start ***\n";
- M->print(Out, nullptr,
- /*ShouldPreserveUseListOrder=*/true);
+ M->print(Out, nullptr);
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::omitAfter(StringRef PassID,
- std::string &Name) {
+template <typename T>
+void TextChangeReporter<T>::omitAfter(StringRef PassID, std::string &Name) {
Out << formatv("*** IR Dump After {0} on {1} omitted because no change ***\n",
PassID, Name);
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleInvalidated(StringRef PassID) {
+template <typename T>
+void TextChangeReporter<T>::handleInvalidated(StringRef PassID) {
Out << formatv("*** IR Pass {0} invalidated ***\n", PassID);
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleFiltered(StringRef PassID,
- std::string &Name) {
+template <typename T>
+void TextChangeReporter<T>::handleFiltered(StringRef PassID,
+ std::string &Name) {
SmallString<20> Banner =
formatv("*** IR Dump After {0} on {1} filtered out ***\n", PassID, Name);
Out << Banner;
}
-template <typename IRUnitT>
-void TextChangeReporter<IRUnitT>::handleIgnored(StringRef PassID,
- std::string &Name) {
+template <typename T>
+void TextChangeReporter<T>::handleIgnored(StringRef PassID, std::string &Name) {
Out << formatv("*** IR Pass {0} on {1} ignored ***\n", PassID, Name);
}
@@ -538,8 +598,7 @@ void IRChangedPrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
void IRChangedPrinter::generateIRRepresentation(Any IR, StringRef PassID,
std::string &Output) {
raw_string_ostream OS(Output);
- unwrapAndPrint(OS, IR,
- /*ShouldPreserveUseListOrder=*/true);
+ unwrapAndPrint(OS, IR);
OS.str();
}
@@ -561,14 +620,10 @@ void IRChangedPrinter::handleAfter(StringRef PassID, std::string &Name,
Out << "*** IR Dump After " << PassID << " on " << Name << " ***\n" << After;
}
-bool IRChangedPrinter::same(const std::string &S1, const std::string &S2) {
- return S1 == S2;
-}
-
-template <typename IRData>
-void OrderedChangedData<IRData>::report(
+template <typename T>
+void OrderedChangedData<T>::report(
const OrderedChangedData &Before, const OrderedChangedData &After,
- function_ref<void(const IRData *, const IRData *)> HandlePair) {
+ function_ref<void(const T *, const T *)> HandlePair) {
const auto &BFD = Before.getData();
const auto &AFD = After.getData();
std::vector<std::string>::const_iterator BI = Before.getOrder().begin();
@@ -576,21 +631,21 @@ void OrderedChangedData<IRData>::report(
std::vector<std::string>::const_iterator AI = After.getOrder().begin();
std::vector<std::string>::const_iterator AE = After.getOrder().end();
- auto handlePotentiallyRemovedIRData = [&](std::string S) {
+ auto HandlePotentiallyRemovedData = [&](std::string S) {
// The order in LLVM may have changed so check if still exists.
if (!AFD.count(S)) {
// This has been removed.
HandlePair(&BFD.find(*BI)->getValue(), nullptr);
}
};
- auto handleNewIRData = [&](std::vector<const IRData *> &Q) {
+ auto HandleNewData = [&](std::vector<const T *> &Q) {
// Print out any queued up new sections
- for (const IRData *NBI : Q)
+ for (const T *NBI : Q)
HandlePair(nullptr, NBI);
Q.clear();
};
- // Print out the IRData in the after order, with before ones interspersed
+ // Print out the data in the after order, with before ones interspersed
// appropriately (ie, somewhere near where they were in the before list).
// Start at the beginning of both lists. Loop through the
// after list. If an element is common, then advance in the before list
@@ -599,26 +654,26 @@ void OrderedChangedData<IRData>::report(
// common, then enqueue it for reporting. When the after list is exhausted,
// loop through the before list, reporting any removed ones. Finally,
// report the rest of the enqueued new ones.
- std::vector<const IRData *> NewIRDataQueue;
+ std::vector<const T *> NewDataQueue;
while (AI != AE) {
if (!BFD.count(*AI)) {
// This section is new so place it in the queue. This will cause it
// to be reported after deleted sections.
- NewIRDataQueue.emplace_back(&AFD.find(*AI)->getValue());
+ NewDataQueue.emplace_back(&AFD.find(*AI)->getValue());
++AI;
continue;
}
// This section is in both; advance and print out any before-only
// until we get to it.
while (*BI != *AI) {
- handlePotentiallyRemovedIRData(*BI);
+ HandlePotentiallyRemovedData(*BI);
++BI;
}
// Report any new sections that were queued up and waiting.
- handleNewIRData(NewIRDataQueue);
+ HandleNewData(NewDataQueue);
- const IRData &AData = AFD.find(*AI)->getValue();
- const IRData &BData = BFD.find(*AI)->getValue();
+ const T &AData = AFD.find(*AI)->getValue();
+ const T &BData = BFD.find(*AI)->getValue();
HandlePair(&BData, &AData);
++BI;
++AI;
@@ -626,39 +681,42 @@ void OrderedChangedData<IRData>::report(
// Check any remaining before sections to see if they have been removed
while (BI != BE) {
- handlePotentiallyRemovedIRData(*BI);
+ HandlePotentiallyRemovedData(*BI);
++BI;
}
- handleNewIRData(NewIRDataQueue);
+ HandleNewData(NewDataQueue);
}
-void ChangedIRComparer::compare(Any IR, StringRef Prefix, StringRef PassID,
- StringRef Name) {
- if (!getModuleForComparison(IR)) {
- // Not a module so just handle the single function.
- assert(Before.getData().size() == 1 && "Expected only one function.");
- assert(After.getData().size() == 1 && "Expected only one function.");
- handleFunctionCompare(Name, Prefix, PassID, false,
- Before.getData().begin()->getValue(),
- After.getData().begin()->getValue());
+template <typename T>
+void IRComparer<T>::compare(
+ bool CompareModule,
+ std::function<void(bool InModule, unsigned Minor,
+ const FuncDataT<T> &Before, const FuncDataT<T> &After)>
+ CompareFunc) {
+ if (!CompareModule) {
+ // Just handle the single function.
+ assert(Before.getData().size() == 1 && After.getData().size() == 1 &&
+ "Expected only one function.");
+ CompareFunc(false, 0, Before.getData().begin()->getValue(),
+ After.getData().begin()->getValue());
return;
}
- ChangedIRData::report(
- Before, After, [&](const ChangedFuncData *B, const ChangedFuncData *A) {
- ChangedFuncData Missing;
- if (!B)
- B = &Missing;
- else if (!A)
- A = &Missing;
- assert(B != &Missing && A != &Missing &&
- "Both functions cannot be missing.");
- handleFunctionCompare(Name, Prefix, PassID, true, *B, *A);
- });
+ unsigned Minor = 0;
+ FuncDataT<T> Missing("");
+ IRDataT<T>::report(Before, After,
+ [&](const FuncDataT<T> *B, const FuncDataT<T> *A) {
+ assert((B || A) && "Both functions cannot be missing.");
+ if (!B)
+ B = &Missing;
+ else if (!A)
+ A = &Missing;
+ CompareFunc(true, Minor++, *B, *A);
+ });
}
-void ChangedIRComparer::analyzeIR(Any IR, ChangedIRData &Data) {
+template <typename T> void IRComparer<T>::analyzeIR(Any IR, IRDataT<T> &Data) {
if (const Module *M = getModuleForComparison(IR)) {
// Create data for each existing/interesting function in the module.
for (const Function &F : *M)
@@ -678,27 +736,16 @@ void ChangedIRComparer::analyzeIR(Any IR, ChangedIRData &Data) {
generateFunctionData(Data, *F);
}
-const Module *ChangedIRComparer::getModuleForComparison(Any IR) {
- if (any_isa<const Module *>(IR))
- return any_cast<const Module *>(IR);
- if (any_isa<const LazyCallGraph::SCC *>(IR))
- return any_cast<const LazyCallGraph::SCC *>(IR)
- ->begin()
- ->getFunction()
- .getParent();
- return nullptr;
-}
-
-bool ChangedIRComparer::generateFunctionData(ChangedIRData &Data,
- const Function &F) {
+template <typename T>
+bool IRComparer<T>::generateFunctionData(IRDataT<T> &Data, const Function &F) {
if (!F.isDeclaration() && isFunctionInPrintList(F.getName())) {
- ChangedFuncData CFD;
+ FuncDataT<T> FD(F.getEntryBlock().getName().str());
for (const auto &B : F) {
- CFD.getOrder().emplace_back(B.getName());
- CFD.getData().insert({B.getName(), B});
+ FD.getOrder().emplace_back(B.getName());
+ FD.getData().insert({B.getName(), B});
}
Data.getOrder().emplace_back(F.getName());
- Data.getData().insert({F.getName(), CFD});
+ Data.getData().insert({F.getName(), FD});
return true;
}
return false;
@@ -792,7 +839,7 @@ bool PrintIRInstrumentation::shouldPrintBeforePass(StringRef PassID) {
return true;
StringRef PassName = PIC->getPassNameForClassName(PassID);
- return llvm::is_contained(printBeforePasses(), PassName);
+ return is_contained(printBeforePasses(), PassName);
}
bool PrintIRInstrumentation::shouldPrintAfterPass(StringRef PassID) {
@@ -800,7 +847,7 @@ bool PrintIRInstrumentation::shouldPrintAfterPass(StringRef PassID) {
return true;
StringRef PassName = PIC->getPassNameForClassName(PassID);
- return llvm::is_contained(printAfterPasses(), PassName);
+ return is_contained(printAfterPasses(), PassName);
}
void PrintIRInstrumentation::registerCallbacks(
@@ -874,14 +921,13 @@ void PrintPassInstrumentation::registerCallbacks(
SpecialPasses.emplace_back("PassAdaptor");
}
- PIC.registerBeforeSkippedPassCallback(
- [this, SpecialPasses](StringRef PassID, Any IR) {
- assert(!isSpecialPass(PassID, SpecialPasses) &&
- "Unexpectedly skipping special pass");
+ PIC.registerBeforeSkippedPassCallback([this, SpecialPasses](StringRef PassID,
+ Any IR) {
+ assert(!isSpecialPass(PassID, SpecialPasses) &&
+ "Unexpectedly skipping special pass");
- print() << "Skipping pass: " << PassID << " on " << getIRName(IR)
- << "\n";
- });
+ print() << "Skipping pass: " << PassID << " on " << getIRName(IR) << "\n";
+ });
PIC.registerBeforeNonSkippedPassCallback([this, SpecialPasses](
StringRef PassID, Any IR) {
if (isSpecialPass(PassID, SpecialPasses))
@@ -1079,19 +1125,18 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks(
report_fatal_error(Twine("CFG unexpectedly changed by ", Pass));
};
- PIC.registerBeforeNonSkippedPassCallback(
- [this, &FAM](StringRef P, Any IR) {
+ PIC.registerBeforeNonSkippedPassCallback([this, &FAM](StringRef P, Any IR) {
#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
- assert(&PassStack.emplace_back(P));
+ assert(&PassStack.emplace_back(P));
#endif
- (void)this;
- if (!any_isa<const Function *>(IR))
- return;
+ (void)this;
+ if (!any_isa<const Function *>(IR))
+ return;
- const auto *F = any_cast<const Function *>(IR);
- // Make sure a fresh CFG snapshot is available before the pass.
- FAM.getResult<PreservedCFGCheckerAnalysis>(*const_cast<Function *>(F));
- });
+ const auto *F = any_cast<const Function *>(IR);
+ // Make sure a fresh CFG snapshot is available before the pass.
+ FAM.getResult<PreservedCFGCheckerAnalysis>(*const_cast<Function *>(F));
+ });
PIC.registerAfterPassInvalidatedCallback(
[this](StringRef P, const PreservedAnalyses &PassPA) {
@@ -1165,36 +1210,38 @@ void VerifyInstrumentation::registerCallbacks(
InLineChangePrinter::~InLineChangePrinter() {}
void InLineChangePrinter::generateIRRepresentation(Any IR, StringRef PassID,
- ChangedIRData &D) {
- ChangedIRComparer::analyzeIR(IR, D);
+ IRDataT<EmptyData> &D) {
+ IRComparer<EmptyData>::analyzeIR(IR, D);
}
void InLineChangePrinter::handleAfter(StringRef PassID, std::string &Name,
- const ChangedIRData &Before,
- const ChangedIRData &After, Any IR) {
+ const IRDataT<EmptyData> &Before,
+ const IRDataT<EmptyData> &After, Any IR) {
SmallString<20> Banner =
formatv("*** IR Dump After {0} on {1} ***\n", PassID, Name);
Out << Banner;
- ChangedIRComparer(Out, Before, After, UseColour)
- .compare(IR, "", PassID, Name);
+ IRComparer<EmptyData>(Before, After)
+ .compare(getModuleForComparison(IR),
+ [&](bool InModule, unsigned Minor,
+ const FuncDataT<EmptyData> &Before,
+ const FuncDataT<EmptyData> &After) -> void {
+ handleFunctionCompare(Name, "", PassID, " on ", InModule,
+ Minor, Before, After);
+ });
Out << "\n";
}
-bool InLineChangePrinter::same(const ChangedIRData &D1,
- const ChangedIRData &D2) {
- return D1 == D2;
-}
-
-void ChangedIRComparer::handleFunctionCompare(StringRef Name, StringRef Prefix,
- StringRef PassID, bool InModule,
- const ChangedFuncData &Before,
- const ChangedFuncData &After) {
+void InLineChangePrinter::handleFunctionCompare(
+ StringRef Name, StringRef Prefix, StringRef PassID, StringRef Divider,
+ bool InModule, unsigned Minor, const FuncDataT<EmptyData> &Before,
+ const FuncDataT<EmptyData> &After) {
// Print a banner when this is being shown in the context of a module
if (InModule)
Out << "\n*** IR for function " << Name << " ***\n";
- ChangedFuncData::report(
- Before, After, [&](const ChangedBlockData *B, const ChangedBlockData *A) {
+ FuncDataT<EmptyData>::report(
+ Before, After,
+ [&](const BlockDataT<EmptyData> *B, const BlockDataT<EmptyData> *A) {
StringRef BStr = B ? B->getBody() : "\n";
StringRef AStr = A ? A->getBody() : "\n";
const std::string Removed =
@@ -1210,7 +1257,863 @@ void InLineChangePrinter::registerCallbacks(PassInstrumentationCallbacks &PIC) {
PrintChanged == ChangePrinter::PrintChangedDiffQuiet ||
PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose ||
PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet)
- TextChangeReporter<ChangedIRData>::registerRequiredCallbacks(PIC);
+ TextChangeReporter<IRDataT<EmptyData>>::registerRequiredCallbacks(PIC);
+}
+
+namespace {
+
+enum IRChangeDiffType { InBefore, InAfter, IsCommon, NumIRChangeDiffTypes };
+
+// Describe where a given element exists.
+std::string Colours[NumIRChangeDiffTypes];
+
+class DisplayNode;
+class DotCfgDiffDisplayGraph;
+
+// Base class for a node or edge in the dot-cfg-changes graph.
+class DisplayElement {
+public:
+ // Is this in before, after, or both?
+ IRChangeDiffType getType() const { return Type; }
+
+protected:
+ DisplayElement(IRChangeDiffType T) : Type(T) {}
+ const IRChangeDiffType Type;
+};
+
+// An edge representing a transition between basic blocks in the
+// dot-cfg-changes graph.
+class DisplayEdge : public DisplayElement {
+public:
+ DisplayEdge(std::string V, DisplayNode &Node, IRChangeDiffType T)
+ : DisplayElement(T), Value(V), Node(Node) {}
+ // The value on which the transition is made.
+ std::string getValue() const { return Value; }
+ // The node (representing a basic block) reached by this transition.
+ const DisplayNode &getDestinationNode() const { return Node; }
+
+protected:
+ std::string Value;
+ const DisplayNode &Node;
+};
+
+// A node in the dot-cfg-changes graph which represents a basic block.
+class DisplayNode : public DisplayElement {
+public:
+ // \p C is the content for the node, \p T indicates the colour for the
+ // outline of the node
+ DisplayNode(std::string C, IRChangeDiffType T)
+ : DisplayElement(T), Content(C) {}
+
+ // Iterator to the child nodes. Required by GraphWriter.
+ using ChildIterator = std::unordered_set<DisplayNode *>::const_iterator;
+ ChildIterator children_begin() const { return Children.cbegin(); }
+ ChildIterator children_end() const { return Children.cend(); }
+
+ // Iterator for the edges. Required by GraphWriter.
+ using EdgeIterator = std::vector<DisplayEdge *>::const_iterator;
+ EdgeIterator edges_begin() const { return EdgePtrs.cbegin(); }
+ EdgeIterator edges_end() const { return EdgePtrs.cend(); }
+
+ // Create an edge to \p Node on value \p V, with type \p T.
+ void createEdge(StringRef V, DisplayNode &Node, IRChangeDiffType T);
+
+ // Return the content of this node.
+ std::string getContent() const { return Content; }
+
+ // Return the type of the edge to node \p S.
+ const DisplayEdge &getEdge(const DisplayNode &To) const {
+ assert(EdgeMap.find(&To) != EdgeMap.end() && "Expected to find edge.");
+ return *EdgeMap.find(&To)->second;
+ }
+
+ // Return the value for the transition to basic block \p S.
+ // Required by GraphWriter.
+ std::string getEdgeSourceLabel(const DisplayNode &Sink) const {
+ return getEdge(Sink).getValue();
+ }
+
+ void createEdgeMap();
+
+protected:
+ const std::string Content;
+
+ // Place to collect all of the edges. Once they are all in the vector,
+ // the vector will not reallocate so then we can use pointers to them,
+ // which are required by the graph writing routines.
+ std::vector<DisplayEdge> Edges;
+
+ std::vector<DisplayEdge *> EdgePtrs;
+ std::unordered_set<DisplayNode *> Children;
+ std::unordered_map<const DisplayNode *, const DisplayEdge *> EdgeMap;
+
+ // Safeguard adding of edges.
+ bool AllEdgesCreated = false;
+};
+
+// Class representing a difference display (corresponds to a pdf file).
+class DotCfgDiffDisplayGraph {
+public:
+ DotCfgDiffDisplayGraph(std::string Name) : GraphName(Name) {}
+
+ // Generate the file into \p DotFile.
+ void generateDotFile(StringRef DotFile);
+
+ // Iterator to the nodes. Required by GraphWriter.
+ using NodeIterator = std::vector<DisplayNode *>::const_iterator;
+ NodeIterator nodes_begin() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return NodePtrs.cbegin();
+ }
+ NodeIterator nodes_end() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return NodePtrs.cend();
+ }
+
+ // Record the index of the entry node. At this point, we can build up
+ // vectors of pointers that are required by the graph routines.
+ void setEntryNode(unsigned N) {
+ // At this point, there will be no new nodes.
+ assert(!NodeGenerationComplete && "Unexpected node creation");
+ NodeGenerationComplete = true;
+ for (auto &N : Nodes)
+ NodePtrs.emplace_back(&N);
+
+ EntryNode = NodePtrs[N];
+ }
+
+ // Create a node.
+ void createNode(std::string C, IRChangeDiffType T) {
+ assert(!NodeGenerationComplete && "Unexpected node creation");
+ Nodes.emplace_back(C, T);
+ }
+ // Return the node at index \p N to avoid problems with vectors reallocating.
+ DisplayNode &getNode(unsigned N) {
+ assert(N < Nodes.size() && "Node is out of bounds");
+ return Nodes[N];
+ }
+ unsigned size() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return Nodes.size();
+ }
+
+ // Return the name of the graph. Required by GraphWriter.
+ std::string getGraphName() const { return GraphName; }
+
+ // Return the string representing the differences for basic block \p Node.
+ // Required by GraphWriter.
+ std::string getNodeLabel(const DisplayNode &Node) const {
+ return Node.getContent();
+ }
+
+ // Return a string with colour information for Dot. Required by GraphWriter.
+ std::string getNodeAttributes(const DisplayNode &Node) const {
+ return attribute(Node.getType());
+ }
+
+ // Return a string with colour information for Dot. Required by GraphWriter.
+ std::string getEdgeColorAttr(const DisplayNode &From,
+ const DisplayNode &To) const {
+ return attribute(From.getEdge(To).getType());
+ }
+
+ // Get the starting basic block. Required by GraphWriter.
+ DisplayNode *getEntryNode() const {
+ assert(NodeGenerationComplete && "Unexpected children iterator creation");
+ return EntryNode;
+ }
+
+protected:
+ // Return the string containing the colour to use as a Dot attribute.
+ std::string attribute(IRChangeDiffType T) const;
+
+ bool NodeGenerationComplete = false;
+ const std::string GraphName;
+ std::vector<DisplayNode> Nodes;
+ std::vector<DisplayNode *> NodePtrs;
+ DisplayNode *EntryNode = nullptr;
+};
+
+void DisplayNode::createEdge(StringRef V, DisplayNode &Node,
+ IRChangeDiffType T) {
+ assert(!AllEdgesCreated && "Expected to be able to still create edges.");
+ Edges.emplace_back(V.str(), Node, T);
+ Children.insert(&Node);
+}
+
+void DisplayNode::createEdgeMap() {
+ // No more edges will be added so we can now use pointers to the edges
+ // as the vector will not grow and reallocate.
+ AllEdgesCreated = true;
+ for (auto &E : Edges)
+ EdgeMap.insert({&E.getDestinationNode(), &E});
+}
+
+class DotCfgDiffNode;
+class DotCfgDiff;
+
+// A class representing a basic block in the Dot difference graph.
+class DotCfgDiffNode {
+public:
+ DotCfgDiffNode() = delete;
+
+ // Create a node in Dot difference graph \p G representing the basic block
+ // represented by \p BD with type \p T (where it exists).
+ DotCfgDiffNode(DotCfgDiff &G, unsigned N, const BlockDataT<DCData> &BD,
+ IRChangeDiffType T)
+ : Graph(G), N(N), Data{&BD, nullptr}, Type(T) {}
+ DotCfgDiffNode(const DotCfgDiffNode &DN)
+ : Graph(DN.Graph), N(DN.N), Data{DN.Data[0], DN.Data[1]}, Type(DN.Type),
+ EdgesMap(DN.EdgesMap), Children(DN.Children), Edges(DN.Edges) {}
+
+ unsigned getIndex() const { return N; }
+
+ // The label of the basic block
+ StringRef getLabel() const {
+ assert(Data[0] && "Expected Data[0] to be set.");
+ return Data[0]->getLabel();
+ }
+ // Return where this block exists.
+ IRChangeDiffType getType() const { return Type; }
+ // Change this basic block from being only in before to being common.
+ // Save the pointer to \p Other.
+ void setCommon(const BlockDataT<DCData> &Other) {
+ assert(!Data[1] && "Expected only one block datum");
+ Data[1] = &Other;
+ Type = IsCommon;
+ }
+ // Add an edge to \p E of type {\p Value, \p T}.
+ void addEdge(unsigned E, StringRef Value, IRChangeDiffType T) {
+ // This is a new edge or it is an edge being made common.
+ assert((EdgesMap.count(E) == 0 || T == IsCommon) &&
+ "Unexpected edge count and type.");
+ EdgesMap[E] = {Value.str(), T};
+ }
+ // Record the children and create edges.
+ void finalize(DotCfgDiff &G);
+
+ // Return the type of the edge to node \p S.
+ std::pair<std::string, IRChangeDiffType> getEdge(const unsigned S) const {
+ assert(EdgesMap.count(S) == 1 && "Expected to find edge.");
+ return EdgesMap.at(S);
+ }
+
+ // Return the string representing the basic block.
+ std::string getBodyContent() const;
+
+ void createDisplayEdges(DotCfgDiffDisplayGraph &Graph, unsigned DisplayNode,
+ std::map<const unsigned, unsigned> &NodeMap) const;
+
+protected:
+ DotCfgDiff &Graph;
+ const unsigned N;
+ const BlockDataT<DCData> *Data[2];
+ IRChangeDiffType Type;
+ std::map<const unsigned, std::pair<std::string, IRChangeDiffType>> EdgesMap;
+ std::vector<unsigned> Children;
+ std::vector<unsigned> Edges;
+};
+
+// Class representing the difference graph between two functions.
+class DotCfgDiff {
+public:
+ // \p Title is the title given to the graph. \p EntryNodeName is the
+ // entry node for the function. \p Before and \p After are the before
+ // after versions of the function, respectively. \p Dir is the directory
+ // in which to store the results.
+ DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After);
+
+ DotCfgDiff(const DotCfgDiff &) = delete;
+ DotCfgDiff &operator=(const DotCfgDiff &) = delete;
+
+ DotCfgDiffDisplayGraph createDisplayGraph(StringRef Title,
+ StringRef EntryNodeName);
+
+ // Return a string consisting of the labels for the \p Source and \p Sink.
+ // The combination allows distinguishing changing transitions on the
+ // same value (ie, a transition went to X before and goes to Y after).
+ // Required by GraphWriter.
+ StringRef getEdgeSourceLabel(const unsigned &Source,
+ const unsigned &Sink) const {
+ std::string S =
+ getNode(Source).getLabel().str() + " " + getNode(Sink).getLabel().str();
+ assert(EdgeLabels.count(S) == 1 && "Expected to find edge label.");
+ return EdgeLabels.find(S)->getValue();
+ }
+
+ // Return the number of basic blocks (nodes). Required by GraphWriter.
+ unsigned size() const { return Nodes.size(); }
+
+ const DotCfgDiffNode &getNode(unsigned N) const {
+ assert(N < Nodes.size() && "Unexpected index for node reference");
+ return Nodes[N];
+ }
+
+protected:
+ // Return the string surrounded by HTML to make it the appropriate colour.
+ std::string colourize(std::string S, IRChangeDiffType T) const;
+
+ void createNode(StringRef Label, const BlockDataT<DCData> &BD,
+ IRChangeDiffType T) {
+ unsigned Pos = Nodes.size();
+ Nodes.emplace_back(*this, Pos, BD, T);
+ NodePosition.insert({Label, Pos});
+ }
+
+ // TODO Nodes should probably be a StringMap<DotCfgDiffNode> after the
+ // display graph is separated out, which would remove the need for
+ // NodePosition.
+ std::vector<DotCfgDiffNode> Nodes;
+ StringMap<unsigned> NodePosition;
+ const std::string GraphName;
+
+ StringMap<std::string> EdgeLabels;
+};
+
+std::string DotCfgDiffNode::getBodyContent() const {
+ if (Type == IsCommon) {
+ assert(Data[1] && "Expected Data[1] to be set.");
+
+ StringRef SR[2];
+ for (unsigned I = 0; I < 2; ++I) {
+ SR[I] = Data[I]->getBody();
+ // drop initial '\n' if present
+ if (SR[I][0] == '\n')
+ SR[I] = SR[I].drop_front();
+ // drop predecessors as they can be big and are redundant
+ SR[I] = SR[I].drop_until([](char C) { return C == '\n'; }).drop_front();
+ }
+
+ SmallString<80> OldLineFormat = formatv(
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[InBefore]);
+ SmallString<80> NewLineFormat = formatv(
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[InAfter]);
+ SmallString<80> UnchangedLineFormat = formatv(
+ "<FONT COLOR=\"{0}\">%l</FONT><BR align=\"left\"/>", Colours[IsCommon]);
+ std::string Diff = Data[0]->getLabel().str();
+ Diff += ":\n<BR align=\"left\"/>" +
+ doSystemDiff(makeHTMLReady(SR[0]), makeHTMLReady(SR[1]),
+ OldLineFormat, NewLineFormat, UnchangedLineFormat);
+
+ // Diff adds in some empty colour changes which are not valid HTML
+ // so remove them. Colours are all lowercase alpha characters (as
+ // listed in https://graphviz.org/pdf/dotguide.pdf).
+ Regex R("<FONT COLOR=\"\\w+\"></FONT>");
+ while (true) {
+ std::string Error;
+ std::string S = R.sub("", Diff, &Error);
+ if (Error != "")
+ return Error;
+ if (S == Diff)
+ return Diff;
+ Diff = S;
+ }
+ llvm_unreachable("Should not get here");
+ }
+
+ // Put node out in the appropriate colour.
+ assert(!Data[1] && "Data[1] is set unexpectedly.");
+ std::string Body = makeHTMLReady(Data[0]->getBody());
+ const StringRef BS = Body;
+ StringRef BS1 = BS;
+ // Drop leading newline, if present.
+ if (BS.front() == '\n')
+ BS1 = BS1.drop_front(1);
+ // Get label.
+ StringRef Label = BS1.take_until([](char C) { return C == ':'; });
+ // drop predecessors as they can be big and are redundant
+ BS1 = BS1.drop_until([](char C) { return C == '\n'; }).drop_front();
+
+ std::string S = "<FONT COLOR=\"" + Colours[Type] + "\">" + Label.str() + ":";
+
+ // align each line to the left.
+ while (BS1.size()) {
+ S.append("<BR align=\"left\"/>");
+ StringRef Line = BS1.take_until([](char C) { return C == '\n'; });
+ S.append(Line.str());
+ BS1 = BS1.drop_front(Line.size() + 1);
+ }
+ S.append("<BR align=\"left\"/></FONT>");
+ return S;
+}
+
+std::string DotCfgDiff::colourize(std::string S, IRChangeDiffType T) const {
+ if (S.length() == 0)
+ return S;
+ return "<FONT COLOR=\"" + Colours[T] + "\">" + S + "</FONT>";
+}
+
+std::string DotCfgDiffDisplayGraph::attribute(IRChangeDiffType T) const {
+ return "color=" + Colours[T];
+}
+
+DotCfgDiff::DotCfgDiff(StringRef Title, const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After)
+ : GraphName(Title.str()) {
+ StringMap<IRChangeDiffType> EdgesMap;
+
+ // Handle each basic block in the before IR.
+ for (auto &B : Before.getData()) {
+ StringRef Label = B.getKey();
+ const BlockDataT<DCData> &BD = B.getValue();
+ createNode(Label, BD, InBefore);
+
+ // Create transitions with names made up of the from block label, the value
+ // on which the transition is made and the to block label.
+ for (StringMap<std::string>::const_iterator Sink = BD.getData().begin(),
+ E = BD.getData().end();
+ Sink != E; ++Sink) {
+ std::string Key = (Label + " " + Sink->getKey().str()).str() + " " +
+ BD.getData().getSuccessorLabel(Sink->getKey()).str();
+ EdgesMap.insert({Key, InBefore});
+ }
+ }
+
+ // Handle each basic block in the after IR
+ for (auto &A : After.getData()) {
+ StringRef Label = A.getKey();
+ const BlockDataT<DCData> &BD = A.getValue();
+ unsigned C = NodePosition.count(Label);
+ if (C == 0)
+ // This only exists in the after IR. Create the node.
+ createNode(Label, BD, InAfter);
+ else {
+ assert(C == 1 && "Unexpected multiple nodes.");
+ Nodes[NodePosition[Label]].setCommon(BD);
+ }
+ // Add in the edges between the nodes (as common or only in after).
+ for (StringMap<std::string>::const_iterator Sink = BD.getData().begin(),
+ E = BD.getData().end();
+ Sink != E; ++Sink) {
+ std::string Key = (Label + " " + Sink->getKey().str()).str() + " " +
+ BD.getData().getSuccessorLabel(Sink->getKey()).str();
+ unsigned C = EdgesMap.count(Key);
+ if (C == 0)
+ EdgesMap.insert({Key, InAfter});
+ else {
+ EdgesMap[Key] = IsCommon;
+ }
+ }
+ }
+
+ // Now go through the map of edges and add them to the node.
+ for (auto &E : EdgesMap) {
+ // Extract the source, sink and value from the edge key.
+ StringRef S = E.getKey();
+ auto SP1 = S.rsplit(' ');
+ auto &SourceSink = SP1.first;
+ auto SP2 = SourceSink.split(' ');
+ StringRef Source = SP2.first;
+ StringRef Sink = SP2.second;
+ StringRef Value = SP1.second;
+
+ assert(NodePosition.count(Source) == 1 && "Expected to find node.");
+ DotCfgDiffNode &SourceNode = Nodes[NodePosition[Source]];
+ assert(NodePosition.count(Sink) == 1 && "Expected to find node.");
+ unsigned SinkNode = NodePosition[Sink];
+ IRChangeDiffType T = E.second;
+
+ // Look for an edge from Source to Sink
+ if (EdgeLabels.count(SourceSink) == 0)
+ EdgeLabels.insert({SourceSink, colourize(Value.str(), T)});
+ else {
+ StringRef V = EdgeLabels.find(SourceSink)->getValue();
+ std::string NV = colourize(V.str() + " " + Value.str(), T);
+ T = IsCommon;
+ EdgeLabels[SourceSink] = NV;
+ }
+ SourceNode.addEdge(SinkNode, Value, T);
+ }
+ for (auto &I : Nodes)
+ I.finalize(*this);
+}
+
+DotCfgDiffDisplayGraph DotCfgDiff::createDisplayGraph(StringRef Title,
+ StringRef EntryNodeName) {
+ assert(NodePosition.count(EntryNodeName) == 1 &&
+ "Expected to find entry block in map.");
+ unsigned Entry = NodePosition[EntryNodeName];
+ assert(Entry < Nodes.size() && "Expected to find entry node");
+ DotCfgDiffDisplayGraph G(Title.str());
+
+ std::map<const unsigned, unsigned> NodeMap;
+
+ int EntryIndex = -1;
+ unsigned Index = 0;
+ for (auto &I : Nodes) {
+ if (I.getIndex() == Entry)
+ EntryIndex = Index;
+ G.createNode(I.getBodyContent(), I.getType());
+ NodeMap.insert({I.getIndex(), Index++});
+ }
+ assert(EntryIndex >= 0 && "Expected entry node index to be set.");
+ G.setEntryNode(EntryIndex);
+
+ for (auto &I : NodeMap) {
+ unsigned SourceNode = I.first;
+ unsigned DisplayNode = I.second;
+ getNode(SourceNode).createDisplayEdges(G, DisplayNode, NodeMap);
+ }
+ return G;
+}
+
+void DotCfgDiffNode::createDisplayEdges(
+ DotCfgDiffDisplayGraph &DisplayGraph, unsigned DisplayNodeIndex,
+ std::map<const unsigned, unsigned> &NodeMap) const {
+
+ DisplayNode &SourceDisplayNode = DisplayGraph.getNode(DisplayNodeIndex);
+
+ for (auto I : Edges) {
+ unsigned SinkNodeIndex = I;
+ IRChangeDiffType Type = getEdge(SinkNodeIndex).second;
+ const DotCfgDiffNode *SinkNode = &Graph.getNode(SinkNodeIndex);
+
+ StringRef Label = Graph.getEdgeSourceLabel(getIndex(), SinkNodeIndex);
+ DisplayNode &SinkDisplayNode = DisplayGraph.getNode(SinkNode->getIndex());
+ SourceDisplayNode.createEdge(Label, SinkDisplayNode, Type);
+ }
+ SourceDisplayNode.createEdgeMap();
+}
+
+void DotCfgDiffNode::finalize(DotCfgDiff &G) {
+ for (auto E : EdgesMap) {
+ Children.emplace_back(E.first);
+ Edges.emplace_back(E.first);
+ }
+}
+
+} // namespace
+
+namespace llvm {
+
+template <> struct GraphTraits<DotCfgDiffDisplayGraph *> {
+ using NodeRef = const DisplayNode *;
+ using ChildIteratorType = DisplayNode::ChildIterator;
+ using nodes_iterator = DotCfgDiffDisplayGraph::NodeIterator;
+ using EdgeRef = const DisplayEdge *;
+ using ChildEdgeIterator = DisplayNode::EdgeIterator;
+
+ static NodeRef getEntryNode(const DotCfgDiffDisplayGraph *G) {
+ return G->getEntryNode();
+ }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return N->children_begin();
+ }
+ static ChildIteratorType child_end(NodeRef N) { return N->children_end(); }
+ static nodes_iterator nodes_begin(const DotCfgDiffDisplayGraph *G) {
+ return G->nodes_begin();
+ }
+ static nodes_iterator nodes_end(const DotCfgDiffDisplayGraph *G) {
+ return G->nodes_end();
+ }
+ static ChildEdgeIterator child_edge_begin(NodeRef N) {
+ return N->edges_begin();
+ }
+ static ChildEdgeIterator child_edge_end(NodeRef N) { return N->edges_end(); }
+ static NodeRef edge_dest(EdgeRef E) { return &E->getDestinationNode(); }
+ static unsigned size(const DotCfgDiffDisplayGraph *G) { return G->size(); }
+};
+
+template <>
+struct DOTGraphTraits<DotCfgDiffDisplayGraph *> : public DefaultDOTGraphTraits {
+ explicit DOTGraphTraits(bool Simple = false)
+ : DefaultDOTGraphTraits(Simple) {}
+
+ static bool renderNodesUsingHTML() { return true; }
+ static std::string getGraphName(const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getGraphName();
+ }
+ static std::string
+ getGraphProperties(const DotCfgDiffDisplayGraph *DiffData) {
+ return "\tsize=\"190, 190\";\n";
+ }
+ static std::string getNodeLabel(const DisplayNode *Node,
+ const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getNodeLabel(*Node);
+ }
+ static std::string getNodeAttributes(const DisplayNode *Node,
+ const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getNodeAttributes(*Node);
+ }
+ static std::string getEdgeSourceLabel(const DisplayNode *From,
+ DisplayNode::ChildIterator &To) {
+ return From->getEdgeSourceLabel(**To);
+ }
+ static std::string getEdgeAttributes(const DisplayNode *From,
+ DisplayNode::ChildIterator &To,
+ const DotCfgDiffDisplayGraph *DiffData) {
+ return DiffData->getEdgeColorAttr(*From, **To);
+ }
+};
+
+} // namespace llvm
+
+namespace {
+
+void DotCfgDiffDisplayGraph::generateDotFile(StringRef DotFile) {
+ std::error_code EC;
+ raw_fd_ostream OutStream(DotFile, EC);
+ if (EC) {
+ errs() << "Error: " << EC.message() << "\n";
+ return;
+ }
+ WriteGraph(OutStream, this, false);
+ OutStream.flush();
+ OutStream.close();
+}
+
+} // namespace
+
+namespace llvm {
+
+DCData::DCData(const BasicBlock &B) {
+ // Build up transition labels.
+ const Instruction *Term = B.getTerminator();
+ if (const BranchInst *Br = dyn_cast<const BranchInst>(Term))
+ if (Br->isUnconditional())
+ addSuccessorLabel(Br->getSuccessor(0)->getName().str(), "");
+ else {
+ addSuccessorLabel(Br->getSuccessor(0)->getName().str(), "true");
+ addSuccessorLabel(Br->getSuccessor(1)->getName().str(), "false");
+ }
+ else if (const SwitchInst *Sw = dyn_cast<const SwitchInst>(Term)) {
+ addSuccessorLabel(Sw->case_default()->getCaseSuccessor()->getName().str(),
+ "default");
+ for (auto &C : Sw->cases()) {
+ assert(C.getCaseValue() && "Expected to find case value.");
+ SmallString<20> Value = formatv("{0}", C.getCaseValue()->getSExtValue());
+ addSuccessorLabel(C.getCaseSuccessor()->getName().str(), Value);
+ }
+ } else
+ for (const_succ_iterator I = succ_begin(&B), E = succ_end(&B); I != E; ++I)
+ addSuccessorLabel((*I)->getName().str(), "");
+}
+
+DotCfgChangeReporter::DotCfgChangeReporter(bool Verbose)
+ : ChangeReporter<IRDataT<DCData>>(Verbose) {
+ // Set up the colours based on the hidden options.
+ Colours[InBefore] = BeforeColour;
+ Colours[InAfter] = AfterColour;
+ Colours[IsCommon] = CommonColour;
+}
+
+void DotCfgChangeReporter::handleFunctionCompare(
+ StringRef Name, StringRef Prefix, StringRef PassID, StringRef Divider,
+ bool InModule, unsigned Minor, const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<8> Extender;
+ SmallString<8> Number;
+ // Handle numbering and file names.
+ if (InModule) {
+ Extender = formatv("{0}_{1}", N, Minor);
+ Number = formatv("{0}.{1}", N, Minor);
+ } else {
+ Extender = formatv("{0}", N);
+ Number = formatv("{0}", N);
+ }
+ // Create a temporary file name for the dot file.
+ SmallVector<char, 128> SV;
+ sys::fs::createUniquePath("cfgdot-%%%%%%.dot", SV, true);
+ std::string DotFile = Twine(SV).str();
+
+ SmallString<20> PDFFileName = formatv("diff_{0}.pdf", Extender);
+ SmallString<200> Text;
+
+ Text = formatv("{0}.{1}{2}{3}{4}", Number, Prefix, makeHTMLReady(PassID),
+ Divider, Name);
+
+ DotCfgDiff Diff(Text, Before, After);
+ std::string EntryBlockName = After.getEntryBlockName();
+ // Use the before entry block if the after entry block was removed.
+ if (EntryBlockName == "")
+ EntryBlockName = Before.getEntryBlockName();
+ assert(EntryBlockName != "" && "Expected to find entry block");
+
+ DotCfgDiffDisplayGraph DG = Diff.createDisplayGraph(Text, EntryBlockName);
+ DG.generateDotFile(DotFile);
+
+ *HTML << genHTML(Text, DotFile, PDFFileName);
+ std::error_code EC = sys::fs::remove(DotFile);
+ if (EC)
+ errs() << "Error: " << EC.message() << "\n";
+}
+
+std::string DotCfgChangeReporter::genHTML(StringRef Text, StringRef DotFile,
+ StringRef PDFFileName) {
+ SmallString<20> PDFFile = formatv("{0}/{1}", DotCfgDir, PDFFileName);
+ // Create the PDF file.
+ static ErrorOr<std::string> DotExe = sys::findProgramByName(DotBinary);
+ if (!DotExe)
+ return "Unable to find dot executable.";
+
+ StringRef Args[] = {DotBinary, "-Tpdf", "-o", PDFFile, DotFile};
+ int Result = sys::ExecuteAndWait(*DotExe, Args, None);
+ if (Result < 0)
+ return "Error executing system dot.";
+
+ // Create the HTML tag refering to the PDF file.
+ SmallString<200> S = formatv(
+ " <a href=\"{0}\" target=\"_blank\">{1}</a><br/>\n", PDFFileName, Text);
+ return S.c_str();
+}
+
+void DotCfgChangeReporter::handleInitialIR(Any IR) {
+ assert(HTML && "Expected outstream to be set");
+ *HTML << "<button type=\"button\" class=\"collapsible\">0. "
+ << "Initial IR (by function)</button>\n"
+ << "<div class=\"content\">\n"
+ << " <p>\n";
+ // Create representation of IR
+ IRDataT<DCData> Data;
+ IRComparer<DCData>::analyzeIR(IR, Data);
+ // Now compare it against itself, which will have everything the
+ // same and will generate the files.
+ IRComparer<DCData>(Data, Data)
+ .compare(getModuleForComparison(IR),
+ [&](bool InModule, unsigned Minor,
+ const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After) -> void {
+ handleFunctionCompare("", " ", "Initial IR", "", InModule,
+ Minor, Before, After);
+ });
+ *HTML << " </p>\n"
+ << "</div><br/>\n";
+ ++N;
+}
+
+void DotCfgChangeReporter::generateIRRepresentation(Any IR, StringRef PassID,
+ IRDataT<DCData> &Data) {
+ IRComparer<DCData>::analyzeIR(IR, Data);
+}
+
+void DotCfgChangeReporter::omitAfter(StringRef PassID, std::string &Name) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner =
+ formatv(" <a>{0}. Pass {1} on {2} omitted because no change</a><br/>\n",
+ N, makeHTMLReady(PassID), Name);
+ *HTML << Banner;
+ ++N;
+}
+
+void DotCfgChangeReporter::handleAfter(StringRef PassID, std::string &Name,
+ const IRDataT<DCData> &Before,
+ const IRDataT<DCData> &After, Any IR) {
+ assert(HTML && "Expected outstream to be set");
+ IRComparer<DCData>(Before, After)
+ .compare(getModuleForComparison(IR),
+ [&](bool InModule, unsigned Minor,
+ const FuncDataT<DCData> &Before,
+ const FuncDataT<DCData> &After) -> void {
+ handleFunctionCompare(Name, " Pass ", PassID, " on ", InModule,
+ Minor, Before, After);
+ });
+ *HTML << " </p></div>\n";
+ ++N;
+}
+
+void DotCfgChangeReporter::handleInvalidated(StringRef PassID) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner =
+ formatv(" <a>{0}. {1} invalidated</a><br/>\n", N, makeHTMLReady(PassID));
+ *HTML << Banner;
+ ++N;
+}
+
+void DotCfgChangeReporter::handleFiltered(StringRef PassID, std::string &Name) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner =
+ formatv(" <a>{0}. Pass {1} on {2} filtered out</a><br/>\n", N,
+ makeHTMLReady(PassID), Name);
+ *HTML << Banner;
+ ++N;
+}
+
+void DotCfgChangeReporter::handleIgnored(StringRef PassID, std::string &Name) {
+ assert(HTML && "Expected outstream to be set");
+ SmallString<20> Banner = formatv(" <a>{0}. {1} on {2} ignored</a><br/>\n", N,
+ makeHTMLReady(PassID), Name);
+ *HTML << Banner;
+ ++N;
+}
+
+bool DotCfgChangeReporter::initializeHTML() {
+ std::error_code EC;
+ HTML = std::make_unique<raw_fd_ostream>(DotCfgDir + "/passes.html", EC);
+ if (EC) {
+ HTML = nullptr;
+ return false;
+ }
+
+ *HTML << "<!doctype html>"
+ << "<html>"
+ << "<head>"
+ << "<style>.collapsible { "
+ << "background-color: #777;"
+ << " color: white;"
+ << " cursor: pointer;"
+ << " padding: 18px;"
+ << " width: 100%;"
+ << " border: none;"
+ << " text-align: left;"
+ << " outline: none;"
+ << " font-size: 15px;"
+ << "} .active, .collapsible:hover {"
+ << " background-color: #555;"
+ << "} .content {"
+ << " padding: 0 18px;"
+ << " display: none;"
+ << " overflow: hidden;"
+ << " background-color: #f1f1f1;"
+ << "}"
+ << "</style>"
+ << "<title>passes.html</title>"
+ << "</head>\n"
+ << "<body>";
+ return true;
+}
+
+DotCfgChangeReporter::~DotCfgChangeReporter() {
+ if (!HTML)
+ return;
+ *HTML
+ << "<script>var coll = document.getElementsByClassName(\"collapsible\");"
+ << "var i;"
+ << "for (i = 0; i < coll.length; i++) {"
+ << "coll[i].addEventListener(\"click\", function() {"
+ << " this.classList.toggle(\"active\");"
+ << " var content = this.nextElementSibling;"
+ << " if (content.style.display === \"block\"){"
+ << " content.style.display = \"none\";"
+ << " }"
+ << " else {"
+ << " content.style.display= \"block\";"
+ << " }"
+ << " });"
+ << " }"
+ << "</script>"
+ << "</body>"
+ << "</html>\n";
+ HTML->flush();
+ HTML->close();
+}
+
+void DotCfgChangeReporter::registerCallbacks(
+ PassInstrumentationCallbacks &PIC) {
+ if ((PrintChanged == ChangePrinter::PrintChangedDotCfgVerbose ||
+ PrintChanged == ChangePrinter::PrintChangedDotCfgQuiet)) {
+ SmallString<128> OutputDir;
+ sys::fs::expand_tilde(DotCfgDir, OutputDir);
+ sys::fs::make_absolute(OutputDir);
+ assert(!OutputDir.empty() && "expected output dir to be non-empty");
+ DotCfgDir = OutputDir.c_str();
+ if (initializeHTML()) {
+ ChangeReporter<IRDataT<DCData>>::registerRequiredCallbacks(PIC);
+ return;
+ }
+ dbgs() << "Unable to open output stream for -cfg-dot-changed\n";
+ }
}
StandardInstrumentations::StandardInstrumentations(
@@ -1222,6 +2125,8 @@ StandardInstrumentations::StandardInstrumentations(
PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose,
PrintChanged == ChangePrinter::PrintChangedColourDiffVerbose ||
PrintChanged == ChangePrinter::PrintChangedColourDiffQuiet),
+ WebsiteChangeReporter(PrintChanged ==
+ ChangePrinter::PrintChangedDotCfgVerbose),
Verify(DebugLogging), VerifyEach(VerifyEach) {}
void StandardInstrumentations::registerCallbacks(
@@ -1238,14 +2143,17 @@ void StandardInstrumentations::registerCallbacks(
if (VerifyEach)
Verify.registerCallbacks(PIC);
PrintChangedDiff.registerCallbacks(PIC);
+ WebsiteChangeReporter.registerCallbacks(PIC);
}
-namespace llvm {
-
template class ChangeReporter<std::string>;
template class TextChangeReporter<std::string>;
-template class ChangeReporter<ChangedIRData>;
-template class TextChangeReporter<ChangedIRData>;
+template class BlockDataT<EmptyData>;
+template class FuncDataT<EmptyData>;
+template class IRDataT<EmptyData>;
+template class ChangeReporter<IRDataT<EmptyData>>;
+template class TextChangeReporter<IRDataT<EmptyData>>;
+template class IRComparer<EmptyData>;
} // namespace llvm
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 9fffb249e72d..94bd4807041d 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -567,7 +567,8 @@ class VersionedCovMapFuncRecordReader : public CovMapFuncRecordReader {
if (Error Err = CFR->template getFuncName<Endian>(ProfileNames, FuncName))
return Err;
if (FuncName.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "function name is empty");
++CovMapNumUsedRecords;
Records.emplace_back(Version, FuncName, FuncHash, Mapping,
FileRange.StartingIndex, FileRange.Length);
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index a83b56ed67f1..1168ad27fe52 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -74,53 +74,82 @@ static cl::opt<unsigned> StaticFuncStripDirNamePrefix(
cl::desc("Strip specified level of directory name from source path in "
"the profile counter name for static functions."));
-static std::string getInstrProfErrString(instrprof_error Err) {
+static std::string getInstrProfErrString(instrprof_error Err,
+ const std::string &ErrMsg = "") {
+ std::string Msg;
+ raw_string_ostream OS(Msg);
+
switch (Err) {
case instrprof_error::success:
- return "success";
+ OS << "success";
+ break;
case instrprof_error::eof:
- return "end of File";
+ OS << "end of File";
+ break;
case instrprof_error::unrecognized_format:
- return "unrecognized instrumentation profile encoding format";
+ OS << "unrecognized instrumentation profile encoding format";
+ break;
case instrprof_error::bad_magic:
- return "invalid instrumentation profile data (bad magic)";
+ OS << "invalid instrumentation profile data (bad magic)";
+ break;
case instrprof_error::bad_header:
- return "invalid instrumentation profile data (file header is corrupt)";
+ OS << "invalid instrumentation profile data (file header is corrupt)";
+ break;
case instrprof_error::unsupported_version:
- return "unsupported instrumentation profile format version";
+ OS << "unsupported instrumentation profile format version";
+ break;
case instrprof_error::unsupported_hash_type:
- return "unsupported instrumentation profile hash type";
+ OS << "unsupported instrumentation profile hash type";
+ break;
case instrprof_error::too_large:
- return "too much profile data";
+ OS << "too much profile data";
+ break;
case instrprof_error::truncated:
- return "truncated profile data";
+ OS << "truncated profile data";
+ break;
case instrprof_error::malformed:
- return "malformed instrumentation profile data";
+ OS << "malformed instrumentation profile data";
+ break;
case instrprof_error::invalid_prof:
- return "invalid profile created. Please file a bug "
- "at: " BUG_REPORT_URL
- " and include the profraw files that caused this error.";
+ OS << "invalid profile created. Please file a bug "
+ "at: " BUG_REPORT_URL
+ " and include the profraw files that caused this error.";
+ break;
case instrprof_error::unknown_function:
- return "no profile data available for function";
+ OS << "no profile data available for function";
+ break;
case instrprof_error::hash_mismatch:
- return "function control flow change detected (hash mismatch)";
+ OS << "function control flow change detected (hash mismatch)";
+ break;
case instrprof_error::count_mismatch:
- return "function basic block count change detected (counter mismatch)";
+ OS << "function basic block count change detected (counter mismatch)";
+ break;
case instrprof_error::counter_overflow:
- return "counter overflow";
+ OS << "counter overflow";
+ break;
case instrprof_error::value_site_count_mismatch:
- return "function value site count change detected (counter mismatch)";
+ OS << "function value site count change detected (counter mismatch)";
+ break;
case instrprof_error::compress_failed:
- return "failed to compress data (zlib)";
+ OS << "failed to compress data (zlib)";
+ break;
case instrprof_error::uncompress_failed:
- return "failed to uncompress data (zlib)";
+ OS << "failed to uncompress data (zlib)";
+ break;
case instrprof_error::empty_raw_profile:
- return "empty raw profile file";
+ OS << "empty raw profile file";
+ break;
case instrprof_error::zlib_unavailable:
- return "profile uses zlib compression but the profile reader was built "
- "without zlib support";
+ OS << "profile uses zlib compression but the profile reader was built "
+ "without zlib support";
+ break;
}
- llvm_unreachable("A value of instrprof_error has no message.");
+
+ // If optional error message is not empty, append it to the message.
+ if (!ErrMsg.empty())
+ OS << ": " << ErrMsg;
+
+ return OS.str();
}
namespace {
@@ -217,7 +246,7 @@ void SoftInstrProfErrors::addError(instrprof_error IE) {
}
std::string InstrProfError::message() const {
- return getInstrProfErrString(Err);
+ return getInstrProfErrString(Err, Msg);
}
char InstrProfError::ID = 0;
@@ -878,18 +907,23 @@ static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
Error ValueProfData::checkIntegrity() {
if (NumValueKinds > IPVK_Last + 1)
- return make_error<InstrProfError>(instrprof_error::malformed);
- // Total size needs to be mulltiple of quadword size.
+ return make_error<InstrProfError>(
+ instrprof_error::malformed, "number of value profile kinds is invalid");
+ // Total size needs to be multiple of quadword size.
if (TotalSize % sizeof(uint64_t))
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(
+ instrprof_error::malformed, "total size is not multiples of quardword");
ValueProfRecord *VR = getFirstValueProfRecord(this);
for (uint32_t K = 0; K < this->NumValueKinds; K++) {
if (VR->Kind > IPVK_Last)
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "value kind is invalid");
VR = getValueProfRecordNext(VR);
if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize)
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "value profile address is greater than total size");
}
return Error::success();
}
@@ -1098,10 +1132,14 @@ bool needsComdatForCounter(const Function &F, const Module &M) {
bool isIRPGOFlagSet(const Module *M) {
auto IRInstrVar =
M->getNamedGlobal(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
- if (!IRInstrVar || IRInstrVar->isDeclaration() ||
- IRInstrVar->hasLocalLinkage())
+ if (!IRInstrVar || IRInstrVar->hasLocalLinkage())
return false;
+ // For CSPGO+LTO, this variable might be marked as non-prevailing and we only
+ // have the decl.
+ if (IRInstrVar->isDeclaration())
+ return true;
+
// Check if the flag is set.
if (!IRInstrVar->hasInitializer())
return false;
@@ -1137,8 +1175,8 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
// aware this is an ir_level profile so it can set the version flag.
-void createIRLevelProfileFlagVar(Module &M, bool IsCS,
- bool InstrEntryBBEnabled) {
+GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS,
+ bool InstrEntryBBEnabled) {
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
Type *IntTy64 = Type::getInt64Ty(M.getContext());
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
@@ -1155,6 +1193,7 @@ void createIRLevelProfileFlagVar(Module &M, bool IsCS,
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
}
+ return IRLevelVersionVariable;
}
// Create the variable for the profile file name.
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 8a4470ae207d..b4e8025dbef9 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -204,13 +204,15 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
return success();
}
if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "number of value kinds is invalid");
Line++;
for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
VP_READ_ADVANCE(ValueKind);
if (ValueKind > IPVK_Last)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "value kind is invalid");
+ ;
VP_READ_ADVANCE(NumValueSites);
if (!NumValueSites)
continue;
@@ -268,16 +270,18 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
if (Line.is_at_end())
return error(instrprof_error::truncated);
if ((Line++)->getAsInteger(0, Record.Hash))
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "function hash is not a valid integer");
// Read the number of counters.
uint64_t NumCounters;
if (Line.is_at_end())
return error(instrprof_error::truncated);
if ((Line++)->getAsInteger(10, NumCounters))
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "number of counters is not a valid integer");
if (NumCounters == 0)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "number of counters is zero");
// Read each counter and fill our internal storage with the values.
Record.Clear();
@@ -287,7 +291,7 @@ Error TextInstrProfReader::readNextRecord(NamedInstrProfRecord &Record) {
return error(instrprof_error::truncated);
uint64_t Count;
if ((Line++)->getAsInteger(10, Count))
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "count is invalid");
Record.Counts.push_back(Count);
}
@@ -332,10 +336,12 @@ Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
// If there isn't enough space for another header, this is probably just
// garbage at the end of the file.
if (CurrentPos + sizeof(RawInstrProf::Header) > End)
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "not enough space for another header");
// The writer ensures each profile is padded to start at an aligned address.
if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "insufficient padding");
// The magic should have the same byte order as in the previous header.
uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
@@ -366,6 +372,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
if (GET_VERSION(Version) != RawInstrProf::Version)
return error(instrprof_error::unsupported_version);
+ BinaryIdsSize = swap(Header.BinaryIdsSize);
+ if (BinaryIdsSize % sizeof(uint64_t))
+ return error(instrprof_error::bad_header);
+
CountersDelta = swap(Header.CountersDelta);
NamesDelta = swap(Header.NamesDelta);
auto DataSize = swap(Header.DataSize);
@@ -374,7 +384,6 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
auto PaddingBytesAfterCounters = swap(Header.PaddingBytesAfterCounters);
NamesSize = swap(Header.NamesSize);
ValueKindLast = swap(Header.ValueKindLast);
- BinaryIdsSize = swap(Header.BinaryIdsSize);
auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
auto PaddingSize = getNumPaddingBytes(NamesSize);
@@ -402,6 +411,10 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
NamesStart = Start + NamesOffset;
ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
+ const uint8_t *BufferEnd = (const uint8_t *)DataBuffer->getBufferEnd();
+ if (BinaryIdsStart + BinaryIdsSize > BufferEnd)
+ return error(instrprof_error::bad_header);
+
std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
if (Error E = createSymtab(*NewSymtab.get()))
return E;
@@ -426,21 +439,46 @@ template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::readRawCounts(
InstrProfRecord &Record) {
uint32_t NumCounters = swap(Data->NumCounters);
- IntPtrT CounterPtr = Data->CounterPtr;
if (NumCounters == 0)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed, "number of counters is zero");
+ IntPtrT CounterPtr = Data->CounterPtr;
auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
ptrdiff_t MaxNumCounters = NamesStartAsCounter - CountersStart;
// Check bounds. Note that the counter pointer embedded in the data record
// may itself be corrupt.
if (MaxNumCounters < 0 || NumCounters > (uint32_t)MaxNumCounters)
- return error(instrprof_error::malformed);
+ return error(instrprof_error::malformed,
+ "counter pointer is out of bounds");
+
+ // We need to compute the in-buffer counter offset from the in-memory address
+ // distance. The initial CountersDelta is the in-memory address difference
+ // start(__llvm_prf_cnts)-start(__llvm_prf_data), so SrcData->CounterPtr -
+ // CountersDelta computes the offset into the in-buffer counter section.
+ //
+ // CountersDelta decreases as we advance to the next data record.
ptrdiff_t CounterOffset = getCounterOffset(CounterPtr);
- if (CounterOffset < 0 || CounterOffset > MaxNumCounters ||
- ((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
- return error(instrprof_error::malformed);
+ CountersDelta -= sizeof(*Data);
+ if (CounterOffset < 0)
+ return error(
+ instrprof_error::malformed,
+ ("counter offset " + Twine(CounterOffset) + " is negative").str());
+
+ if (CounterOffset > MaxNumCounters)
+ return error(instrprof_error::malformed,
+ ("counter offset " + Twine(CounterOffset) +
+ " is greater than the maximum number of counters " +
+ Twine((uint32_t)MaxNumCounters))
+ .str());
+
+ if (((uint32_t)CounterOffset + NumCounters) > (uint32_t)MaxNumCounters)
+ return error(instrprof_error::malformed,
+ ("number of counters " +
+ Twine(((uint32_t)CounterOffset + NumCounters)) +
+ " is greater than the maximum number of counters " +
+ Twine((uint32_t)MaxNumCounters))
+ .str());
auto RawCounts = makeArrayRef(getCounter(CounterOffset), NumCounters);
@@ -512,6 +550,10 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
return success();
}
+static size_t RoundUp(size_t size, size_t align) {
+ return (size + align - 1) & ~(align - 1);
+}
+
template <class IntPtrT>
Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
if (BinaryIdsSize == 0)
@@ -519,19 +561,38 @@ Error RawInstrProfReader<IntPtrT>::printBinaryIds(raw_ostream &OS) {
OS << "Binary IDs: \n";
const uint8_t *BI = BinaryIdsStart;
- while (BI < BinaryIdsStart + BinaryIdsSize) {
+ const uint8_t *BIEnd = BinaryIdsStart + BinaryIdsSize;
+ while (BI < BIEnd) {
+ size_t Remaining = BIEnd - BI;
+
+ // There should be enough left to read the binary ID size field.
+ if (Remaining < sizeof(uint64_t))
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "not enough data to read binary id length");
+
uint64_t BinaryIdLen = swap(*reinterpret_cast<const uint64_t *>(BI));
+
+ // There should be enough left to read the binary ID size field, and the
+ // binary ID.
+ if (Remaining < sizeof(BinaryIdLen) + BinaryIdLen)
+ return make_error<InstrProfError>(
+ instrprof_error::malformed, "not enough data to read binary id data");
+
// Increment by binary id length data type size.
BI += sizeof(BinaryIdLen);
if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(
+ instrprof_error::malformed,
+ "binary id that is read is bigger than buffer size");
for (uint64_t I = 0; I < BinaryIdLen; I++)
OS << format("%02x", BI[I]);
OS << "\n";
- // Increment by binary id data length.
- BI += BinaryIdLen;
+ // Increment by binary id data length, rounded to the next 8 bytes. This
+ // accounts for the zero-padding after each build ID.
+ BI += RoundUp(BinaryIdLen, sizeof(uint64_t));
if (BI > (const uint8_t *)DataBuffer->getBufferEnd())
return make_error<InstrProfError>(instrprof_error::malformed);
}
@@ -624,7 +685,8 @@ Error InstrProfReaderIndex<HashTableImpl>::getRecords(
Data = (*Iter);
if (Data.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "profile data is empty");
return Error::success();
}
@@ -638,7 +700,8 @@ Error InstrProfReaderIndex<HashTableImpl>::getRecords(
Data = *RecordIterator;
if (Data.empty())
- return make_error<InstrProfError>(instrprof_error::malformed);
+ return make_error<InstrProfError>(instrprof_error::malformed,
+ "profile data is empty");
return Error::success();
}
@@ -669,7 +732,7 @@ public:
return Underlying.getRecords(FuncName, Data);
}
};
-}
+} // namespace
/// A remapper that applies remappings based on a symbol remapping file.
template <typename HashTableImpl>
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 987c0b175d3c..492e3541cb5a 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -215,8 +215,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
InstrProfRecord &Dest = Where->second;
uint64_t ValueCutoff = FuncFilter.ValueCutoff;
- if (!FuncFilter.NameFilter.empty() &&
- Name.find(FuncFilter.NameFilter) != Name.npos)
+ if (!FuncFilter.NameFilter.empty() && Name.contains(FuncFilter.NameFilter))
ValueCutoff = 0;
Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff);
@@ -272,7 +271,7 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary,
ProfileSummary &PS) {
using namespace IndexedInstrProf;
- std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
+ const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary();
TheSummary->NumSummaryFields = Summary::NumKinds;
TheSummary->NumCutoffEntries = Res.size();
TheSummary->set(Summary::MaxFunctionCount, PS.getMaxFunctionCount());
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index 2ab0f0cbc17a..f54df7b295e3 100644
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -80,7 +80,7 @@ const ArrayRef<uint32_t> ProfileSummaryBuilder::DefaultCutoffs =
DefaultCutoffsData;
const ProfileSummaryEntry &
-ProfileSummaryBuilder::getEntryForPercentile(SummaryEntryVector &DS,
+ProfileSummaryBuilder::getEntryForPercentile(const SummaryEntryVector &DS,
uint64_t Percentile) {
auto It = partition_point(DS, [=](const ProfileSummaryEntry &Entry) {
return Entry.Cutoff < Percentile;
@@ -154,7 +154,8 @@ void ProfileSummaryBuilder::computeDetailedSummary() {
}
}
-uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
+uint64_t
+ProfileSummaryBuilder::getHotCountThreshold(const SummaryEntryVector &DS) {
auto &HotEntry =
ProfileSummaryBuilder::getEntryForPercentile(DS, ProfileSummaryCutoffHot);
uint64_t HotCountThreshold = HotEntry.MinCount;
@@ -163,7 +164,8 @@ uint64_t ProfileSummaryBuilder::getHotCountThreshold(SummaryEntryVector &DS) {
return HotCountThreshold;
}
-uint64_t ProfileSummaryBuilder::getColdCountThreshold(SummaryEntryVector &DS) {
+uint64_t
+ProfileSummaryBuilder::getColdCountThreshold(const SummaryEntryVector &DS) {
auto &ColdEntry = ProfileSummaryBuilder::getEntryForPercentile(
DS, ProfileSummaryCutoffCold);
uint64_t ColdCountThreshold = ColdEntry.MinCount;
@@ -181,17 +183,17 @@ std::unique_ptr<ProfileSummary> SampleProfileSummaryBuilder::getSummary() {
std::unique_ptr<ProfileSummary>
SampleProfileSummaryBuilder::computeSummaryForProfiles(
- const StringMap<sampleprof::FunctionSamples> &Profiles) {
+ const SampleProfileMap &Profiles) {
assert(NumFunctions == 0 &&
"This can only be called on an empty summary builder");
- StringMap<sampleprof::FunctionSamples> ContextLessProfiles;
- const StringMap<sampleprof::FunctionSamples> *ProfilesToUse = &Profiles;
+ sampleprof::SampleProfileMap ContextLessProfiles;
+ const sampleprof::SampleProfileMap *ProfilesToUse = &Profiles;
// For CSSPGO, context-sensitive profile effectively split a function profile
// into many copies each representing the CFG profile of a particular calling
// context. That makes the count distribution looks more flat as we now have
// more function profiles each with lower counts, which in turn leads to lower
- // hot thresholds. To compensate for that, by defauly we merge context
- // profiles before coumputing profile summary.
+ // hot thresholds. To compensate for that, by default we merge context
+ // profiles before computing profile summary.
if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
!UseContextLessSummary.getNumOccurrences())) {
for (const auto &I : Profiles) {
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 60e707b146d5..fd8fd3b675b7 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -198,6 +198,21 @@ raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
return OS;
}
+void sampleprof::sortFuncProfiles(
+ const SampleProfileMap &ProfileMap,
+ std::vector<NameFunctionSamples> &SortedProfiles) {
+ for (const auto &I : ProfileMap) {
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second));
+ }
+ llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
+ const NameFunctionSamples &B) {
+ if (A.second->getTotalSamples() == B.second->getTotalSamples())
+ return A.first < B.first;
+ return A.second->getTotalSamples() > B.second->getTotalSamples();
+ });
+}
+
unsigned FunctionSamples::getOffset(const DILocation *DIL) {
return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
0xffff;
@@ -230,9 +245,13 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
else
Discriminator = DIL->getBaseDiscriminator();
+ // Use C++ linkage name if possible.
+ StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = PrevDIL->getScope()->getSubprogram()->getName();
+
S.push_back(
- std::make_pair(LineLocation(getOffset(DIL), Discriminator),
- PrevDIL->getScope()->getSubprogram()->getLinkageName()));
+ std::make_pair(LineLocation(getOffset(DIL), Discriminator), Name));
PrevDIL = DIL;
}
if (S.size() == 0)
@@ -245,7 +264,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamples(
}
void FunctionSamples::findAllNames(DenseSet<StringRef> &NameSet) const {
- NameSet.insert(Name);
+ NameSet.insert(getName());
for (const auto &BS : BodySamples)
for (const auto &TS : BS.second.getCallTargets())
NameSet.insert(TS.getKey());
@@ -316,7 +335,7 @@ std::error_code ProfileSymbolList::read(const uint8_t *Data,
void SampleContextTrimmer::trimAndMergeColdContextProfiles(
uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,
- uint32_t ColdContextFrameLength) {
+ uint32_t ColdContextFrameLength, bool TrimBaseProfileOnly) {
if (!TrimColdContext && !MergeColdContext)
return;
@@ -324,25 +343,32 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
if (ColdCountThreshold == 0)
return;
+ // Trimming base profiles only is mainly to honor the preinliner decsion. When
+ // MergeColdContext is true preinliner decsion is not honored anyway so turn
+ // off TrimBaseProfileOnly.
+ if (MergeColdContext)
+ TrimBaseProfileOnly = false;
+
// Filter the cold profiles from ProfileMap and move them into a tmp
// container
- std::vector<std::pair<StringRef, const FunctionSamples *>> ColdProfiles;
+ std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
for (const auto &I : ProfileMap) {
+ const SampleContext &Context = I.first;
const FunctionSamples &FunctionProfile = I.second;
- if (FunctionProfile.getTotalSamples() >= ColdCountThreshold)
- continue;
- ColdProfiles.emplace_back(I.getKey(), &I.second);
+ if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
+ (!TrimBaseProfileOnly || Context.isBaseContext()))
+ ColdProfiles.emplace_back(Context, &I.second);
}
// Remove the cold profile from ProfileMap and merge them into
// MergedProfileMap by the last K frames of context
- StringMap<FunctionSamples> MergedProfileMap;
+ SampleProfileMap MergedProfileMap;
for (const auto &I : ColdProfiles) {
if (MergeColdContext) {
- auto Ret = MergedProfileMap.try_emplace(
- I.second->getContext().getContextWithLastKFrames(
- ColdContextFrameLength),
- FunctionSamples());
+ auto MergedContext = I.second->getContext().getContextFrames();
+ if (ColdContextFrameLength < MergedContext.size())
+ MergedContext = MergedContext.take_back(ColdContextFrameLength);
+ auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples());
FunctionSamples &MergedProfile = Ret.first->second;
MergedProfile.merge(*I.second);
}
@@ -353,16 +379,15 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
for (const auto &I : MergedProfileMap) {
// Filter the cold merged profile
if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
- ProfileMap.find(I.getKey()) == ProfileMap.end())
+ ProfileMap.find(I.first) == ProfileMap.end())
continue;
// Merge the profile if the original profile exists, otherwise just insert
// as a new profile
- auto Ret = ProfileMap.try_emplace(I.getKey(), FunctionSamples());
+ auto Ret = ProfileMap.emplace(I.first, FunctionSamples());
if (Ret.second) {
- SampleContext FContext(Ret.first->first(), RawContext);
+ SampleContext FContext(Ret.first->first, RawContext);
FunctionSamples &FProfile = Ret.first->second;
FProfile.setContext(FContext);
- FProfile.setName(FContext.getNameWithoutContext());
}
FunctionSamples &OrigProfile = Ret.first->second;
OrigProfile.merge(I.second);
@@ -370,12 +395,12 @@ void SampleContextTrimmer::trimAndMergeColdContextProfiles(
}
void SampleContextTrimmer::canonicalizeContextProfiles() {
- std::vector<StringRef> ProfilesToBeRemoved;
- StringMap<FunctionSamples> ProfilesToBeAdded;
+ std::vector<SampleContext> ProfilesToBeRemoved;
+ SampleProfileMap ProfilesToBeAdded;
for (auto &I : ProfileMap) {
FunctionSamples &FProfile = I.second;
- StringRef ContextStr = FProfile.getNameWithContext();
- if (I.first() == ContextStr)
+ SampleContext &Context = FProfile.getContext();
+ if (I.first == Context)
continue;
// Use the context string from FunctionSamples to update the keys of
@@ -390,10 +415,10 @@ void SampleContextTrimmer::canonicalizeContextProfiles() {
// with different profiles) from the map can cause a conflict if they are
// not handled in a right order. This can be solved by just caching the
// profiles to be added.
- auto Ret = ProfilesToBeAdded.try_emplace(ContextStr, FProfile);
+ auto Ret = ProfilesToBeAdded.emplace(Context, FProfile);
(void)Ret;
assert(Ret.second && "Context conflict during canonicalization");
- ProfilesToBeRemoved.push_back(I.first());
+ ProfilesToBeRemoved.push_back(I.first);
}
for (auto &I : ProfilesToBeRemoved) {
@@ -401,7 +426,7 @@ void SampleContextTrimmer::canonicalizeContextProfiles() {
}
for (auto &I : ProfilesToBeAdded) {
- ProfileMap.try_emplace(I.first(), I.second);
+ ProfileMap.emplace(I.first, I.second);
}
}
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 6058eddb13dc..c99a19020511 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -53,21 +53,23 @@ using namespace sampleprof;
// For ext-binary format profiles, the flag is set in the summary.
static cl::opt<bool> ProfileIsFSDisciminator(
"profile-isfs", cl::Hidden, cl::init(false),
- cl::desc("Profile uses flow senstive discriminators"));
+ cl::desc("Profile uses flow sensitive discriminators"));
/// Dump the function profile for \p FName.
///
-/// \param FName Name of the function to print.
+/// \param FContext Name + context of the function to print.
/// \param OS Stream to emit the output to.
-void SampleProfileReader::dumpFunctionProfile(StringRef FName,
+void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
raw_ostream &OS) {
- OS << "Function: " << FName << ": " << Profiles[FName];
+ OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
}
/// Dump all the function profiles found on stream \p OS.
void SampleProfileReader::dump(raw_ostream &OS) {
- for (const auto &I : Profiles)
- dumpFunctionProfile(I.getKey(), OS);
+ std::vector<NameFunctionSamples> V;
+ sortFuncProfiles(Profiles, V);
+ for (const auto &I : V)
+ dumpFunctionProfile(I.first, OS);
}
/// Parse \p Input as function head.
@@ -249,6 +251,7 @@ std::error_code SampleProfileReaderText::readImpl() {
bool SeenMetadata = false;
ProfileIsFS = ProfileIsFSDisciminator;
+ FunctionSamples::ProfileIsFS = ProfileIsFS;
for (; !LineIt.is_at_eof(); ++LineIt) {
if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
continue;
@@ -273,12 +276,11 @@ std::error_code SampleProfileReaderText::readImpl() {
return sampleprof_error::malformed;
}
SeenMetadata = false;
- SampleContext FContext(FName);
+ SampleContext FContext(FName, CSNameTable);
if (FContext.hasContext())
++CSProfileCount;
Profiles[FContext] = FunctionSamples();
FunctionSamples &FProfile = Profiles[FContext];
- FProfile.setName(FContext.getNameWithoutContext());
FProfile.setContext(FContext);
MergeResult(Result, FProfile.addTotalSamples(NumSamples));
MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
@@ -450,6 +452,13 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
return NameTable[*Idx];
}
+ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ return SampleContext(*FName);
+}
+
ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
if (!FixedLengthMD5)
return SampleProfileReaderBinary::readStringFromTable();
@@ -576,18 +585,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
if (std::error_code EC = NumHeadSamples.getError())
return EC;
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
+ ErrorOr<SampleContext> FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- SampleContext FContext(*FName);
- Profiles[FContext] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[FContext];
- FProfile.setName(FContext.getNameWithoutContext());
- FProfile.setContext(FContext);
+ Profiles[*FContext] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[*FContext];
+ FProfile.setContext(*FContext);
FProfile.addHeadSamples(*NumHeadSamples);
- if (FContext.hasContext())
+ if (FContext->hasContext())
CSProfileCount++;
if (std::error_code EC = readProfile(FProfile))
@@ -597,6 +604,7 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
std::error_code SampleProfileReaderBinary::readImpl() {
ProfileIsFS = ProfileIsFSDisciminator;
+ FunctionSamples::ProfileIsFS = ProfileIsFS;
while (!at_eof()) {
if (std::error_code EC = readFuncProfile(Data))
return EC;
@@ -605,6 +613,31 @@ std::error_code SampleProfileReaderBinary::readImpl() {
return sampleprof_error::success;
}
+ErrorOr<SampleContextFrames>
+SampleProfileReaderExtBinaryBase::readContextFromTable() {
+ auto ContextIdx = readNumber<uint32_t>();
+ if (std::error_code EC = ContextIdx.getError())
+ return EC;
+ if (*ContextIdx >= CSNameTable->size())
+ return sampleprof_error::truncated_name_table;
+ return (*CSNameTable)[*ContextIdx];
+}
+
+ErrorOr<SampleContext>
+SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
+ if (ProfileIsCS) {
+ auto FContext(readContextFromTable());
+ if (std::error_code EC = FContext.getError())
+ return EC;
+ return SampleContext(*FContext);
+ } else {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ return SampleContext(*FName);
+ }
+}
+
std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
Data = Start;
@@ -632,11 +665,17 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
return EC;
break;
}
+ case SecCSNameTable: {
+ if (std::error_code EC = readCSNameTableSec())
+ return EC;
+ break;
+ }
case SecLBRProfile:
if (std::error_code EC = readFuncProfiles())
return EC;
break;
case SecFuncOffsetTable:
+ FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
if (std::error_code EC = readFuncOffsetTable())
return EC;
break;
@@ -682,17 +721,27 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
return EC;
FuncOffsetTable.reserve(*Size);
+
+ if (FuncOffsetsOrdered) {
+ OrderedFuncOffsets =
+ std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
+ OrderedFuncOffsets->reserve(*Size);
+ }
+
for (uint32_t I = 0; I < *Size; ++I) {
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
auto Offset = readNumber<uint64_t>();
if (std::error_code EC = Offset.getError())
return EC;
- FuncOffsetTable[*FName] = *Offset;
+ FuncOffsetTable[*FContext] = *Offset;
+ if (FuncOffsetsOrdered)
+ OrderedFuncOffsets->emplace_back(*FContext, *Offset);
}
+
return sampleprof_error::success;
}
@@ -721,75 +770,77 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
}
- if (useMD5()) {
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
+ if (ProfileIsCS) {
+ DenseSet<uint64_t> FuncGuidsToUse;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse)
+ FuncGuidsToUse.insert(Function::getGUID(Name));
}
- } else if (FunctionSamples::ProfileIsCS) {
- // Compute the ordered set of names, so we can
- // get all context profiles under a subtree by
- // iterating through the ordered names.
- struct Comparer {
- // Ignore the closing ']' when ordering context
- bool operator()(const StringRef &L, const StringRef &R) const {
- return L.substr(0, L.size() - 1) < R.substr(0, R.size() - 1);
+
+ // For each function in current module, load all context profiles for
+ // the function as well as their callee contexts which can help profile
+ // guided importing for ThinLTO. This can be achieved by walking
+ // through an ordered context container, where contexts are laid out
+ // as if they were walked in preorder of a context trie. While
+ // traversing the trie, a link to the highest common ancestor node is
+ // kept so that all of its decendants will be loaded.
+ assert(OrderedFuncOffsets.get() &&
+ "func offset table should always be sorted in CS profile");
+ const SampleContext *CommonContext = nullptr;
+ for (const auto &NameOffset : *OrderedFuncOffsets) {
+ const auto &FContext = NameOffset.first;
+ auto FName = FContext.getName();
+ // For function in the current module, keep its farthest ancestor
+ // context. This can be used to load itself and its child and
+ // sibling contexts.
+ if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) ||
+ (!useMD5() && (FuncsToUse.count(FName) ||
+ (Remapper && Remapper->exist(FName))))) {
+ if (!CommonContext || !CommonContext->IsPrefixOf(FContext))
+ CommonContext = &FContext;
}
- };
- std::set<StringRef, Comparer> OrderedNames;
- for (auto Name : FuncOffsetTable) {
- OrderedNames.insert(Name.first);
- }
- // For each function in current module, load all
- // context profiles for the function.
- for (auto NameOffset : FuncOffsetTable) {
- StringRef ContextName = NameOffset.first;
- SampleContext FContext(ContextName);
- auto FuncName = FContext.getNameWithoutContext();
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
-
- // For each context profile we need, try to load
- // all context profile in the subtree. This can
- // help profile guided importing for ThinLTO.
- auto It = OrderedNames.find(ContextName);
- while (It != OrderedNames.end() &&
- It->startswith(ContextName.substr(0, ContextName.size() - 1))) {
- const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It];
+ if (CommonContext == &FContext ||
+ (CommonContext && CommonContext->IsPrefixOf(FContext))) {
+ // Load profile for the current context which originated from
+ // the common ancestor.
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
assert(FuncProfileAddr < End && "out of LBRProfile section");
if (std::error_code EC = readFuncProfile(FuncProfileAddr))
return EC;
- // Remove loaded context profile so we won't
- // load it repeatedly.
- It = OrderedNames.erase(It);
}
}
} else {
- for (auto NameOffset : FuncOffsetTable) {
- SampleContext FContext(NameOffset.first);
- auto FuncName = FContext.getNameWithoutContext();
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ } else {
+ for (auto NameOffset : FuncOffsetTable) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getName();
+ if (!FuncsToUse.count(FuncName) &&
+ (!Remapper || !Remapper->exist(FuncName)))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
}
}
Data = End;
}
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile");
- assert(ProfileIsCS == (CSProfileCount > 0) &&
+ assert((!CSProfileCount || ProfileIsCS) &&
"Section flag should be consistent with actual profile");
return sampleprof_error::success;
}
@@ -885,6 +936,7 @@ std::error_code SampleProfileReaderCompactBinary::readImpl() {
// given a module.
bool LoadFuncsToBeUsed = collectFuncsFromModule();
ProfileIsFS = ProfileIsFSDisciminator;
+ FunctionSamples::ProfileIsFS = ProfileIsFS;
std::vector<uint64_t> OffsetsToUse;
if (!LoadFuncsToBeUsed) {
// load all the function profiles.
@@ -983,22 +1035,62 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
return SampleProfileReaderBinary::readNameTable();
}
+// Read in the CS name table section, which basically contains a list of context
+// vectors. Each element of a context vector, aka a frame, refers to the
+// underlying raw function names that are stored in the name table, as well as
+// a callsite identifier that only makes sense for non-leaf frames.
+std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
+ auto Size = readNumber<uint32_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+
+ std::vector<SampleContextFrameVector> *PNameVec =
+ new std::vector<SampleContextFrameVector>();
+ PNameVec->reserve(*Size);
+ for (uint32_t I = 0; I < *Size; ++I) {
+ PNameVec->emplace_back(SampleContextFrameVector());
+ auto ContextSize = readNumber<uint32_t>();
+ if (std::error_code EC = ContextSize.getError())
+ return EC;
+ for (uint32_t J = 0; J < *ContextSize; ++J) {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ auto LineOffset = readNumber<uint64_t>();
+ if (std::error_code EC = LineOffset.getError())
+ return EC;
+
+ if (!isOffsetLegal(*LineOffset))
+ return std::error_code();
+
+ auto Discriminator = readNumber<uint64_t>();
+ if (std::error_code EC = Discriminator.getError())
+ return EC;
+
+ PNameVec->back().emplace_back(
+ FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
+ }
+ }
+
+ // From this point the underlying object of CSNameTable should be immutable.
+ CSNameTable.reset(PNameVec);
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
while (Data < End) {
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
+ auto FContext(readSampleContextFromTable());
+ if (std::error_code EC = FContext.getError())
return EC;
- SampleContext FContext(*FName);
- bool ProfileInMap = Profiles.count(FContext);
-
+ bool ProfileInMap = Profiles.count(*FContext);
if (ProfileIsProbeBased) {
auto Checksum = readNumber<uint64_t>();
if (std::error_code EC = Checksum.getError())
return EC;
if (ProfileInMap)
- Profiles[FContext].setFunctionHash(*Checksum);
+ Profiles[*FContext].setFunctionHash(*Checksum);
}
if (ProfileHasAttribute) {
@@ -1006,7 +1098,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
if (std::error_code EC = Attributes.getError())
return EC;
if (ProfileInMap)
- Profiles[FContext].getContext().setAllAttributes(*Attributes);
+ Profiles[*FContext].getContext().setAllAttributes(*Attributes);
}
}
@@ -1132,6 +1224,16 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
Flags.append("fs-discriminator,");
break;
+ case SecFuncOffsetTable:
+ if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
+ Flags.append("ordered,");
+ break;
+ case SecFuncMetadata:
+ if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased))
+ Flags.append("probe,");
+ if (hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute))
+ Flags.append("attr,");
+ break;
default:
break;
}
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 535f87968104..78006aab1541 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -41,23 +41,10 @@
using namespace llvm;
using namespace sampleprof;
-std::error_code SampleProfileWriter::writeFuncProfiles(
- const StringMap<FunctionSamples> &ProfileMap) {
- // Sort the ProfileMap by total samples.
- typedef std::pair<StringRef, const FunctionSamples *> NameFunctionSamples;
+std::error_code
+SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) {
std::vector<NameFunctionSamples> V;
- for (const auto &I : ProfileMap) {
- assert(I.getKey() == I.second.getNameWithContext() &&
- "Inconsistent profile map");
- V.push_back(std::make_pair(I.second.getNameWithContext(), &I.second));
- }
- llvm::stable_sort(
- V, [](const NameFunctionSamples &A, const NameFunctionSamples &B) {
- if (A.second->getTotalSamples() == B.second->getTotalSamples())
- return A.first > B.first;
- return A.second->getTotalSamples() > B.second->getTotalSamples();
- });
-
+ sortFuncProfiles(ProfileMap, V);
for (const auto &I : V) {
if (std::error_code EC = writeSample(*I.second))
return EC;
@@ -65,8 +52,7 @@ std::error_code SampleProfileWriter::writeFuncProfiles(
return sampleprof_error::success;
}
-std::error_code
-SampleProfileWriter::write(const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code SampleProfileWriter::write(const SampleProfileMap &ProfileMap) {
if (std::error_code EC = writeHeader(ProfileMap))
return EC;
@@ -130,8 +116,8 @@ std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterExtBinaryBase::write(
- const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code
+SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) {
if (std::error_code EC = writeHeader(ProfileMap))
return EC;
@@ -146,11 +132,28 @@ std::error_code SampleProfileWriterExtBinaryBase::write(
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterExtBinaryBase::writeContextIdx(
+ const SampleContext &Context) {
+ if (Context.hasContext())
+ return writeCSNameIdx(Context);
+ else
+ return SampleProfileWriterBinary::writeNameIdx(Context.getName());
+}
+
+std::error_code
+SampleProfileWriterExtBinaryBase::writeCSNameIdx(const SampleContext &Context) {
+ const auto &Ret = CSNameTable.find(Context);
+ if (Ret == CSNameTable.end())
+ return sampleprof_error::truncated_name_table;
+ encodeULEB128(Ret->second, *OutputStream);
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
uint64_t Offset = OutputStream->tell();
- StringRef Name = S.getNameWithContext();
- FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
+ auto &Context = S.getContext();
+ FuncOffsetTable[Context] = Offset - SecLBRProfileStart;
encodeULEB128(S.getHeadSamples(), *OutputStream);
return writeBody(S);
}
@@ -162,24 +165,42 @@ std::error_code SampleProfileWriterExtBinaryBase::writeFuncOffsetTable() {
encodeULEB128(FuncOffsetTable.size(), OS);
// Write out FuncOffsetTable.
- for (auto Entry : FuncOffsetTable) {
- if (std::error_code EC =
- writeNameIdx(Entry.first, FunctionSamples::ProfileIsCS))
+ auto WriteItem = [&](const SampleContext &Context, uint64_t Offset) {
+ if (std::error_code EC = writeContextIdx(Context))
return EC;
- encodeULEB128(Entry.second, OS);
+ encodeULEB128(Offset, OS);
+ return (std::error_code)sampleprof_error::success;
+ };
+
+ if (FunctionSamples::ProfileIsCS) {
+ // Sort the contexts before writing them out. This is to help fast load all
+ // context profiles for a function as well as their callee contexts which
+ // can help profile-guided importing for ThinLTO.
+ std::map<SampleContext, uint64_t> OrderedFuncOffsetTable(
+ FuncOffsetTable.begin(), FuncOffsetTable.end());
+ for (const auto &Entry : OrderedFuncOffsetTable) {
+ if (std::error_code EC = WriteItem(Entry.first, Entry.second))
+ return EC;
+ }
+ addSectionFlag(SecFuncOffsetTable, SecFuncOffsetFlags::SecFlagOrdered);
+ } else {
+ for (const auto &Entry : FuncOffsetTable) {
+ if (std::error_code EC = WriteItem(Entry.first, Entry.second))
+ return EC;
+ }
}
+
FuncOffsetTable.clear();
return sampleprof_error::success;
}
std::error_code SampleProfileWriterExtBinaryBase::writeFuncMetadata(
- const StringMap<FunctionSamples> &Profiles) {
+ const SampleProfileMap &Profiles) {
if (!FunctionSamples::ProfileIsProbeBased && !FunctionSamples::ProfileIsCS)
return sampleprof_error::success;
auto &OS = *OutputStream;
for (const auto &Entry : Profiles) {
- if (std::error_code EC = writeNameIdx(Entry.second.getNameWithContext(),
- FunctionSamples::ProfileIsCS))
+ if (std::error_code EC = writeContextIdx(Entry.second.getContext()))
return EC;
if (FunctionSamples::ProfileIsProbeBased)
encodeULEB128(Entry.second.getFunctionHash(), OS);
@@ -195,7 +216,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
- stablizeNameTable(V);
+ stablizeNameTable(NameTable, V);
// Write out the MD5 name table. We wrote unencoded MD5 so reader can
// retrieve the name using the name index without having to read the
@@ -208,11 +229,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTable() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
for (const auto &I : ProfileMap) {
- assert(I.first() == I.second.getNameWithContext() &&
- "Inconsistent profile map");
- addName(I.second.getNameWithContext(), FunctionSamples::ProfileIsCS);
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ addContext(I.second.getContext());
addNames(I.second);
}
@@ -220,7 +240,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
// so compiler won't strip the suffix during profile matching after
// seeing the flag in the profile.
for (const auto &I : NameTable) {
- if (I.first.find(FunctionSamples::UniqSuffix) != StringRef::npos) {
+ if (I.first.contains(FunctionSamples::UniqSuffix)) {
addSectionFlag(SecNameTable, SecNameTableFlags::SecFlagUniqSuffix);
break;
}
@@ -231,6 +251,34 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
return sampleprof_error::success;
}
+std::error_code SampleProfileWriterExtBinaryBase::writeCSNameTableSection() {
+ // Sort the names to make CSNameTable deterministic.
+ std::set<SampleContext> OrderedContexts;
+ for (const auto &I : CSNameTable)
+ OrderedContexts.insert(I.first);
+ assert(OrderedContexts.size() == CSNameTable.size() &&
+ "Unmatched ordered and unordered contexts");
+ uint64_t I = 0;
+ for (auto &Context : OrderedContexts)
+ CSNameTable[Context] = I++;
+
+ auto &OS = *OutputStream;
+ encodeULEB128(OrderedContexts.size(), OS);
+ support::endian::Writer Writer(OS, support::little);
+ for (auto Context : OrderedContexts) {
+ auto Frames = Context.getContextFrames();
+ encodeULEB128(Frames.size(), OS);
+ for (auto &Callsite : Frames) {
+ if (std::error_code EC = writeNameIdx(Callsite.FuncName))
+ return EC;
+ encodeULEB128(Callsite.Location.LineOffset, OS);
+ encodeULEB128(Callsite.Location.Discriminator, OS);
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() {
if (ProfSymList && ProfSymList->size() > 0)
@@ -241,8 +289,7 @@ SampleProfileWriterExtBinaryBase::writeProfileSymbolListSection() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
- SecType Type, uint32_t LayoutIdx,
- const StringMap<FunctionSamples> &ProfileMap) {
+ SecType Type, uint32_t LayoutIdx, const SampleProfileMap &ProfileMap) {
// The setting of SecFlagCompress should happen before markSectionStart.
if (Type == SecProfileSymbolList && ProfSymList && ProfSymList->toCompress())
setToCompressSection(SecProfileSymbolList);
@@ -266,6 +313,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
if (auto EC = writeNameTableSection(ProfileMap))
return EC;
break;
+ case SecCSNameTable:
+ if (auto EC = writeCSNameTableSection())
+ return EC;
+ break;
case SecLBRProfile:
SecLBRProfileStart = OutputStream->tell();
if (std::error_code EC = writeFuncProfiles(ProfileMap))
@@ -294,7 +345,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
}
std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
// The const indices passed to writeOneSection below are specifying the
// positions of the sections in SectionHdrLayout. Look at
// initSectionHdrLayout to find out where each section is located in
@@ -303,32 +354,33 @@ std::error_code SampleProfileWriterExtBinary::writeDefaultLayout(
return EC;
if (auto EC = writeOneSection(SecNameTable, 1, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecLBRProfile, 3, ProfileMap))
+ if (auto EC = writeOneSection(SecCSNameTable, 2, ProfileMap))
+ return EC;
+ if (auto EC = writeOneSection(SecLBRProfile, 4, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecProfileSymbolList, 4, ProfileMap))
+ if (auto EC = writeOneSection(SecProfileSymbolList, 5, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecFuncOffsetTable, 2, ProfileMap))
+ if (auto EC = writeOneSection(SecFuncOffsetTable, 3, ProfileMap))
return EC;
- if (auto EC = writeOneSection(SecFuncMetadata, 5, ProfileMap))
+ if (auto EC = writeOneSection(SecFuncMetadata, 6, ProfileMap))
return EC;
return sampleprof_error::success;
}
-static void
-splitProfileMapToTwo(const StringMap<FunctionSamples> &ProfileMap,
- StringMap<FunctionSamples> &ContextProfileMap,
- StringMap<FunctionSamples> &NoContextProfileMap) {
+static void splitProfileMapToTwo(const SampleProfileMap &ProfileMap,
+ SampleProfileMap &ContextProfileMap,
+ SampleProfileMap &NoContextProfileMap) {
for (const auto &I : ProfileMap) {
if (I.second.getCallsiteSamples().size())
- ContextProfileMap.insert({I.first(), I.second});
+ ContextProfileMap.insert({I.first, I.second});
else
- NoContextProfileMap.insert({I.first(), I.second});
+ NoContextProfileMap.insert({I.first, I.second});
}
}
std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
- const StringMap<FunctionSamples> &ProfileMap) {
- StringMap<FunctionSamples> ContextProfileMap, NoContextProfileMap;
+ const SampleProfileMap &ProfileMap) {
+ SampleProfileMap ContextProfileMap, NoContextProfileMap;
splitProfileMapToTwo(ProfileMap, ContextProfileMap, NoContextProfileMap);
if (auto EC = writeOneSection(SecProfSummary, 0, ProfileMap))
@@ -358,7 +410,7 @@ std::error_code SampleProfileWriterExtBinary::writeCtxSplitLayout(
}
std::error_code SampleProfileWriterExtBinary::writeSections(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
std::error_code EC;
if (SecLayout == DefaultLayout)
EC = writeDefaultLayout(ProfileMap);
@@ -369,8 +421,8 @@ std::error_code SampleProfileWriterExtBinary::writeSections(
return EC;
}
-std::error_code SampleProfileWriterCompactBinary::write(
- const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code
+SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) {
if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
return EC;
if (std::error_code EC = writeFuncOffsetTable())
@@ -389,7 +441,7 @@ std::error_code SampleProfileWriterCompactBinary::write(
std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
auto &OS = *OutputStream;
if (FunctionSamples::ProfileIsCS)
- OS << "[" << S.getNameWithContext() << "]:" << S.getTotalSamples();
+ OS << "[" << S.getContext().toString() << "]:" << S.getTotalSamples();
else
OS << S.getName() << ":" << S.getTotalSamples();
@@ -445,27 +497,28 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName,
- bool IsContextName) {
- std::string BracketedName;
- if (IsContextName) {
- BracketedName = "[" + FName.str() + "]";
- FName = StringRef(BracketedName);
- }
+std::error_code
+SampleProfileWriterBinary::writeContextIdx(const SampleContext &Context) {
+ assert(!Context.hasContext() && "cs profile is not supported");
+ return writeNameIdx(Context.getName());
+}
- const auto &Ret = NameTable.find(FName);
- if (Ret == NameTable.end())
+std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
+ auto &NTable = getNameTable();
+ const auto &Ret = NTable.find(FName);
+ if (Ret == NTable.end())
return sampleprof_error::truncated_name_table;
encodeULEB128(Ret->second, *OutputStream);
return sampleprof_error::success;
}
-void SampleProfileWriterBinary::addName(StringRef FName, bool IsContextName) {
- if (IsContextName) {
- auto It = BracketedContextStr.insert("[" + FName.str() + "]");
- FName = StringRef(*It.first);
- }
- NameTable.insert(std::make_pair(FName, 0));
+void SampleProfileWriterBinary::addName(StringRef FName) {
+ auto &NTable = getNameTable();
+ NTable.insert(std::make_pair(FName, 0));
+}
+
+void SampleProfileWriterBinary::addContext(const SampleContext &Context) {
+ addName(Context.getName());
}
void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
@@ -485,7 +538,19 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
}
}
-void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
+void SampleProfileWriterExtBinaryBase::addContext(
+ const SampleContext &Context) {
+ if (Context.hasContext()) {
+ for (auto &Callsite : Context.getContextFrames())
+ SampleProfileWriterBinary::addName(Callsite.FuncName);
+ CSNameTable.insert(std::make_pair(Context, 0));
+ } else {
+ SampleProfileWriterBinary::addName(Context.getName());
+ }
+}
+
+void SampleProfileWriterBinary::stablizeNameTable(
+ MapVector<StringRef, uint32_t> &NameTable, std::set<StringRef> &V) {
// Sort the names to make NameTable deterministic.
for (const auto &I : NameTable)
V.insert(I.first);
@@ -497,7 +562,7 @@ void SampleProfileWriterBinary::stablizeNameTable(std::set<StringRef> &V) {
std::error_code SampleProfileWriterBinary::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
- stablizeNameTable(V);
+ stablizeNameTable(NameTable, V);
// Write out the name table.
encodeULEB128(NameTable.size(), OS);
@@ -526,8 +591,7 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
// Write out FuncOffsetTable.
for (auto Entry : FuncOffsetTable) {
- if (std::error_code EC =
- writeNameIdx(Entry.first, FunctionSamples::ProfileIsCS))
+ if (std::error_code EC = writeNameIdx(Entry.first))
return EC;
encodeULEB128(Entry.second, OS);
}
@@ -537,7 +601,7 @@ std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
auto &OS = *OutputStream;
std::set<StringRef> V;
- stablizeNameTable(V);
+ stablizeNameTable(NameTable, V);
// Write out the name table.
encodeULEB128(NameTable.size(), OS);
@@ -556,8 +620,8 @@ SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterBinary::writeHeader(
- const StringMap<FunctionSamples> &ProfileMap) {
+std::error_code
+SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
writeMagicIdent(Format);
computeSummary(ProfileMap);
@@ -566,9 +630,8 @@ std::error_code SampleProfileWriterBinary::writeHeader(
// Generate the name table for all the functions referenced in the profile.
for (const auto &I : ProfileMap) {
- assert(I.first() == I.second.getNameWithContext() &&
- "Inconsistent profile map");
- addName(I.first(), FunctionSamples::ProfileIsCS);
+ assert(I.first == I.second.getContext() && "Inconsistent profile map");
+ addContext(I.first);
addNames(I.second);
}
@@ -642,7 +705,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
auto &OS = *OutputStream;
FileStart = OS.tell();
writeMagicIdent(Format);
@@ -652,7 +715,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
}
std::error_code SampleProfileWriterCompactBinary::writeHeader(
- const StringMap<FunctionSamples> &ProfileMap) {
+ const SampleProfileMap &ProfileMap) {
support::endian::Writer Writer(*OutputStream, support::little);
if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
return EC;
@@ -671,7 +734,8 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
encodeULEB128(Summary->getMaxFunctionCount(), OS);
encodeULEB128(Summary->getNumCounts(), OS);
encodeULEB128(Summary->getNumFunctions(), OS);
- std::vector<ProfileSummaryEntry> &Entries = Summary->getDetailedSummary();
+ const std::vector<ProfileSummaryEntry> &Entries =
+ Summary->getDetailedSummary();
encodeULEB128(Entries.size(), OS);
for (auto Entry : Entries) {
encodeULEB128(Entry.Cutoff, OS);
@@ -682,9 +746,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
}
std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
auto &OS = *OutputStream;
-
- if (std::error_code EC =
- writeNameIdx(S.getNameWithContext(), FunctionSamples::ProfileIsCS))
+ if (std::error_code EC = writeContextIdx(S.getContext()))
return EC;
encodeULEB128(S.getTotalSamples(), OS);
@@ -803,8 +865,7 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
return std::move(Writer);
}
-void SampleProfileWriter::computeSummary(
- const StringMap<FunctionSamples> &ProfileMap) {
+void SampleProfileWriter::computeSummary(const SampleProfileMap &ProfileMap) {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
Summary = Builder.computeSummaryForProfiles(ProfileMap);
}
diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp
index 2993892097e7..b3136a91e7f5 100644
--- a/llvm/lib/Support/AArch64TargetParser.cpp
+++ b/llvm/lib/Support/AArch64TargetParser.cpp
@@ -98,6 +98,8 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions,
Features.push_back("+sve2-sha3");
if (Extensions & AEK_SVE2BITPERM)
Features.push_back("+sve2-bitperm");
+ if (Extensions & AArch64::AEK_TME)
+ Features.push_back("+tme");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");
if (Extensions & AEK_BRBE)
@@ -118,6 +120,8 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions,
bool AArch64::getArchFeatures(AArch64::ArchKind AK,
std::vector<StringRef> &Features) {
+ if (AK == ArchKind::ARMV8A)
+ Features.push_back("+v8a");
if (AK == ArchKind::ARMV8_1A)
Features.push_back("+v8.1a");
if (AK == ArchKind::ARMV8_2A)
@@ -132,6 +136,12 @@ bool AArch64::getArchFeatures(AArch64::ArchKind AK,
Features.push_back("+v8.6a");
if (AK == AArch64::ArchKind::ARMV8_7A)
Features.push_back("+v8.7a");
+ if (AK == AArch64::ArchKind::ARMV9A)
+ Features.push_back("+v9a");
+ if (AK == AArch64::ArchKind::ARMV9_1A)
+ Features.push_back("+v9.1a");
+ if (AK == AArch64::ArchKind::ARMV9_2A)
+ Features.push_back("+v9.2a");
if(AK == AArch64::ArchKind::ARMV8R)
Features.push_back("+v8r");
diff --git a/llvm/lib/Support/APFixedPoint.cpp b/llvm/lib/Support/APFixedPoint.cpp
index 9764dd51f572..61b30b5c5c60 100644
--- a/llvm/lib/Support/APFixedPoint.cpp
+++ b/llvm/lib/Support/APFixedPoint.cpp
@@ -306,7 +306,7 @@ APFixedPoint APFixedPoint::div(const APFixedPoint &Other,
APInt::sdivrem(ThisVal, OtherVal, Result, Rem);
// If the quotient is negative and the remainder is nonzero, round
// towards negative infinity by subtracting epsilon from the result.
- if (ThisVal.isNegative() != OtherVal.isNegative() && !Rem.isNullValue())
+ if (ThisVal.isNegative() != OtherVal.isNegative() && !Rem.isZero())
Result = Result - 1;
} else
Result = ThisVal.udiv(OtherVal);
@@ -381,7 +381,7 @@ void APFixedPoint::toString(SmallVectorImpl<char> &Str) const {
// Add 4 digits to hold the value after multiplying 10 (the radix)
unsigned Width = Val.getBitWidth() + 4;
APInt FractPart = Val.zextOrTrunc(Scale).zext(Width);
- APInt FractPartMask = APInt::getAllOnesValue(Scale).zext(Width);
+ APInt FractPartMask = APInt::getAllOnes(Scale).zext(Width);
APInt RadixInt = APInt(Width, 10);
IntPart.toString(Str, /*Radix=*/10);
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index 7abca8391f70..4b75c9db8526 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -92,7 +92,7 @@ namespace llvm {
Note: we need to make the value different from semBogus as otherwise
an unsafe optimization may collapse both values to a single address,
and we heavily rely on them having distinct addresses. */
- static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 0};
+ static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
/* These are legacy semantics for the fallback, inaccrurate implementation of
IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
@@ -1288,6 +1288,23 @@ IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
return cmpEqual;
}
+/* Set the least significant BITS bits of a bignum, clear the
+ rest. */
+static void tcSetLeastSignificantBits(APInt::WordType *dst, unsigned parts,
+ unsigned bits) {
+ unsigned i = 0;
+ while (bits > APInt::APINT_BITS_PER_WORD) {
+ dst[i++] = ~(APInt::WordType)0;
+ bits -= APInt::APINT_BITS_PER_WORD;
+ }
+
+ if (bits)
+ dst[i++] = ~(APInt::WordType)0 >> (APInt::APINT_BITS_PER_WORD - bits);
+
+ while (i < parts)
+ dst[i++] = 0;
+}
+
/* Handle overflow. Sign is preserved. We either become infinity or
the largest finite number. */
IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
@@ -1303,8 +1320,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
/* Otherwise we become the largest finite number. */
category = fcNormal;
exponent = semantics->maxExponent;
- APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
- semantics->precision);
+ tcSetLeastSignificantBits(significandParts(), partCount(),
+ semantics->precision);
return opInexact;
}
@@ -2412,7 +2429,7 @@ IEEEFloat::convertToInteger(MutableArrayRef<integerPart> parts,
else
bits = width - isSigned;
- APInt::tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
+ tcSetLeastSignificantBits(parts.data(), dstPartsCount, bits);
if (sign && isSigned)
APInt::tcShiftLeft(parts.data(), dstPartsCount, width - 1);
}
@@ -3379,7 +3396,6 @@ double IEEEFloat::convertToDouble() const {
/// exponent = 0, integer bit 1 ("pseudodenormal")
/// At the moment, the first three are treated as NaNs, the last one as Normal.
void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
- assert(api.getBitWidth()==80);
uint64_t i1 = api.getRawData()[0];
uint64_t i2 = api.getRawData()[1];
uint64_t myexponent = (i2 & 0x7fff);
@@ -3411,7 +3427,6 @@ void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
}
void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
- assert(api.getBitWidth()==128);
uint64_t i1 = api.getRawData()[0];
uint64_t i2 = api.getRawData()[1];
opStatus fs;
@@ -3435,7 +3450,6 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
}
void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
- assert(api.getBitWidth()==128);
uint64_t i1 = api.getRawData()[0];
uint64_t i2 = api.getRawData()[1];
uint64_t myexponent = (i2 >> 48) & 0x7fff;
@@ -3471,7 +3485,6 @@ void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
}
void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
- assert(api.getBitWidth()==64);
uint64_t i = *api.getRawData();
uint64_t myexponent = (i >> 52) & 0x7ff;
uint64_t mysignificand = i & 0xfffffffffffffLL;
@@ -3500,7 +3513,6 @@ void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
}
void IEEEFloat::initFromFloatAPInt(const APInt &api) {
- assert(api.getBitWidth()==32);
uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 23) & 0xff;
uint32_t mysignificand = i & 0x7fffff;
@@ -3529,7 +3541,6 @@ void IEEEFloat::initFromFloatAPInt(const APInt &api) {
}
void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
- assert(api.getBitWidth() == 16);
uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 7) & 0xff;
uint32_t mysignificand = i & 0x7f;
@@ -3558,7 +3569,6 @@ void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
}
void IEEEFloat::initFromHalfAPInt(const APInt &api) {
- assert(api.getBitWidth()==16);
uint32_t i = (uint32_t)*api.getRawData();
uint32_t myexponent = (i >> 10) & 0x1f;
uint32_t mysignificand = i & 0x3ff;
@@ -3591,6 +3601,7 @@ void IEEEFloat::initFromHalfAPInt(const APInt &api) {
/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
/// when the size is anything else).
void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
+ assert(api.getBitWidth() == Sem->sizeInBits);
if (Sem == &semIEEEhalf)
return initFromHalfAPInt(api);
if (Sem == &semBFloat)
@@ -4847,9 +4858,8 @@ APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
llvm_unreachable("Unexpected semantics");
}
-APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics,
- unsigned BitWidth) {
- return APFloat(Semantics, APInt::getAllOnesValue(BitWidth));
+APFloat APFloat::getAllOnesValue(const fltSemantics &Semantics) {
+ return APFloat(Semantics, APInt::getAllOnes(Semantics.sizeInBits));
}
void APFloat::print(raw_ostream &OS) const {
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index a8a950f09747..4940b61602d1 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -89,7 +89,6 @@ void APInt::initSlowCase(const APInt& that) {
}
void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
- assert(BitWidth && "Bitwidth too small");
assert(bigVal.data() && "Null pointer detected!");
if (isSingleWord())
U.VAL = bigVal[0];
@@ -105,19 +104,17 @@ void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
clearUnusedBits();
}
-APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
- : BitWidth(numBits) {
+APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) : BitWidth(numBits) {
initFromArray(bigVal);
}
APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
- : BitWidth(numBits) {
+ : BitWidth(numBits) {
initFromArray(makeArrayRef(bigVal, numWords));
}
APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
- : BitWidth(numbits) {
- assert(BitWidth && "Bitwidth too small");
+ : BitWidth(numbits) {
fromString(numbits, Str, radix);
}
@@ -140,7 +137,7 @@ void APInt::reallocate(unsigned NewBitWidth) {
U.pVal = getMemory(getNumWords());
}
-void APInt::AssignSlowCase(const APInt& RHS) {
+void APInt::assignSlowCase(const APInt &RHS) {
// Don't do anything for X = X
if (this == &RHS)
return;
@@ -233,27 +230,30 @@ APInt APInt::operator*(const APInt& RHS) const {
return APInt(BitWidth, U.VAL * RHS.U.VAL);
APInt Result(getMemory(getNumWords()), getBitWidth());
-
tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
-
Result.clearUnusedBits();
return Result;
}
-void APInt::AndAssignSlowCase(const APInt& RHS) {
- tcAnd(U.pVal, RHS.U.pVal, getNumWords());
+void APInt::andAssignSlowCase(const APInt &RHS) {
+ WordType *dst = U.pVal, *rhs = RHS.U.pVal;
+ for (size_t i = 0, e = getNumWords(); i != e; ++i)
+ dst[i] &= rhs[i];
}
-void APInt::OrAssignSlowCase(const APInt& RHS) {
- tcOr(U.pVal, RHS.U.pVal, getNumWords());
+void APInt::orAssignSlowCase(const APInt &RHS) {
+ WordType *dst = U.pVal, *rhs = RHS.U.pVal;
+ for (size_t i = 0, e = getNumWords(); i != e; ++i)
+ dst[i] |= rhs[i];
}
-void APInt::XorAssignSlowCase(const APInt& RHS) {
- tcXor(U.pVal, RHS.U.pVal, getNumWords());
+void APInt::xorAssignSlowCase(const APInt &RHS) {
+ WordType *dst = U.pVal, *rhs = RHS.U.pVal;
+ for (size_t i = 0, e = getNumWords(); i != e; ++i)
+ dst[i] ^= rhs[i];
}
-APInt& APInt::operator*=(const APInt& RHS) {
- assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+APInt &APInt::operator*=(const APInt &RHS) {
*this = *this * RHS;
return *this;
}
@@ -268,7 +268,7 @@ APInt& APInt::operator*=(uint64_t RHS) {
return clearUnusedBits();
}
-bool APInt::EqualSlowCase(const APInt& RHS) const {
+bool APInt::equalSlowCase(const APInt &RHS) const {
return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
}
@@ -327,12 +327,29 @@ void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
U.pVal[word] = WORDTYPE_MAX;
}
+// Complement a bignum in-place.
+static void tcComplement(APInt::WordType *dst, unsigned parts) {
+ for (unsigned i = 0; i < parts; i++)
+ dst[i] = ~dst[i];
+}
+
/// Toggle every bit to its opposite value.
void APInt::flipAllBitsSlowCase() {
tcComplement(U.pVal, getNumWords());
clearUnusedBits();
}
+/// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
+/// equivalent to:
+/// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
+/// In the slow case, we know the result is large.
+APInt APInt::concatSlowCase(const APInt &NewLSB) const {
+ unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
+ APInt Result = NewLSB.zextOrSelf(NewWidth);
+ Result.insertBits(*this, NewLSB.getBitWidth());
+ return Result;
+}
+
/// Toggle a given bit to its opposite value whose position is given
/// as "bitPosition".
/// Toggles a given bit to its opposite value.
@@ -343,8 +360,11 @@ void APInt::flipBit(unsigned bitPosition) {
void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
unsigned subBitWidth = subBits.getBitWidth();
- assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
- "Illegal bit insertion");
+ assert((subBitWidth + bitPosition) <= BitWidth && "Illegal bit insertion");
+
+ // inserting no bits is a noop.
+ if (subBitWidth == 0)
+ return;
// Insertion is a direct copy.
if (subBitWidth == BitWidth) {
@@ -424,7 +444,6 @@ void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits)
}
APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
- assert(numBits > 0 && "Can't extract zero bits");
assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
"Illegal bit extraction");
@@ -550,7 +569,7 @@ hash_code llvm::hash_value(const APInt &Arg) {
hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
}
-unsigned DenseMapInfo<APInt>::getHashValue(const APInt &Key) {
+unsigned DenseMapInfo<APInt, void>::getHashValue(const APInt &Key) {
return static_cast<unsigned>(hash_value(Key));
}
@@ -702,6 +721,8 @@ APInt APInt::reverseBits() const {
return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
case 8:
return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
+ case 0:
+ return *this;
default:
break;
}
@@ -861,7 +882,6 @@ double APInt::roundToDouble(bool isSigned) const {
// Truncate to new width.
APInt APInt::trunc(unsigned width) const {
assert(width < BitWidth && "Invalid APInt Truncate request");
- assert(width && "Can't truncate to 0 bits");
if (width <= APINT_BITS_PER_WORD)
return APInt(width, getRawData()[0]);
@@ -884,7 +904,6 @@ APInt APInt::trunc(unsigned width) const {
// Truncate to new width with unsigned saturation.
APInt APInt::truncUSat(unsigned width) const {
assert(width < BitWidth && "Invalid APInt Truncate request");
- assert(width && "Can't truncate to 0 bits");
// Can we just losslessly truncate it?
if (isIntN(width))
@@ -896,7 +915,6 @@ APInt APInt::truncUSat(unsigned width) const {
// Truncate to new width with signed saturation.
APInt APInt::truncSSat(unsigned width) const {
assert(width < BitWidth && "Invalid APInt Truncate request");
- assert(width && "Can't truncate to 0 bits");
// Can we just losslessly truncate it?
if (isSignedIntN(width))
@@ -1059,6 +1077,8 @@ void APInt::shlSlowCase(unsigned ShiftAmt) {
// Calculate the rotate amount modulo the bit width.
static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ return 0;
unsigned rotBitWidth = rotateAmt.getBitWidth();
APInt rot = rotateAmt;
if (rotBitWidth < BitWidth) {
@@ -1075,6 +1095,8 @@ APInt APInt::rotl(const APInt &rotateAmt) const {
}
APInt APInt::rotl(unsigned rotateAmt) const {
+ if (LLVM_UNLIKELY(BitWidth == 0))
+ return *this;
rotateAmt %= BitWidth;
if (rotateAmt == 0)
return *this;
@@ -1086,12 +1108,43 @@ APInt APInt::rotr(const APInt &rotateAmt) const {
}
APInt APInt::rotr(unsigned rotateAmt) const {
+ if (BitWidth == 0)
+ return *this;
rotateAmt %= BitWidth;
if (rotateAmt == 0)
return *this;
return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
}
+/// \returns the nearest log base 2 of this APInt. Ties round up.
+///
+/// NOTE: When we have a BitWidth of 1, we define:
+///
+/// log2(0) = UINT32_MAX
+/// log2(1) = 0
+///
+/// to get around any mathematical concerns resulting from
+/// referencing 2 in a space where 2 does no exist.
+unsigned APInt::nearestLogBase2() const {
+ // Special case when we have a bitwidth of 1. If VAL is 1, then we
+ // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
+ // UINT32_MAX.
+ if (BitWidth == 1)
+ return U.VAL - 1;
+
+ // Handle the zero case.
+ if (isZero())
+ return UINT32_MAX;
+
+ // The non-zero case is handled by computing:
+ //
+ // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
+ //
+ // where x[i] is referring to the value of the ith bit of x.
+ unsigned lg = logBase2();
+ return lg + unsigned((*this)[lg - 1]);
+}
+
// Square Root - this method computes and returns the square root of "this".
// Three mechanisms are used for computation. For small values (<= 5 bits),
// a table lookup is done. This gets some performance for common cases. For
@@ -1222,98 +1275,6 @@ APInt APInt::multiplicativeInverse(const APInt& modulo) const {
return std::move(t[i]);
}
-/// Calculate the magic numbers required to implement a signed integer division
-/// by a constant as a sequence of multiplies, adds and shifts. Requires that
-/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
-/// Warren, Jr., chapter 10.
-APInt::ms APInt::magic() const {
- const APInt& d = *this;
- unsigned p;
- APInt ad, anc, delta, q1, r1, q2, r2, t;
- APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
- struct ms mag;
-
- ad = d.abs();
- t = signedMin + (d.lshr(d.getBitWidth() - 1));
- anc = t - 1 - t.urem(ad); // absolute value of nc
- p = d.getBitWidth() - 1; // initialize p
- q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc)
- r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc))
- q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d)
- r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d))
- do {
- p = p + 1;
- q1 = q1<<1; // update q1 = 2p/abs(nc)
- r1 = r1<<1; // update r1 = rem(2p/abs(nc))
- if (r1.uge(anc)) { // must be unsigned comparison
- q1 = q1 + 1;
- r1 = r1 - anc;
- }
- q2 = q2<<1; // update q2 = 2p/abs(d)
- r2 = r2<<1; // update r2 = rem(2p/abs(d))
- if (r2.uge(ad)) { // must be unsigned comparison
- q2 = q2 + 1;
- r2 = r2 - ad;
- }
- delta = ad - r2;
- } while (q1.ult(delta) || (q1 == delta && r1 == 0));
-
- mag.m = q2 + 1;
- if (d.isNegative()) mag.m = -mag.m; // resulting magic number
- mag.s = p - d.getBitWidth(); // resulting shift
- return mag;
-}
-
-/// Calculate the magic numbers required to implement an unsigned integer
-/// division by a constant as a sequence of multiplies, adds and shifts.
-/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
-/// S. Warren, Jr., chapter 10.
-/// LeadingZeros can be used to simplify the calculation if the upper bits
-/// of the divided value are known zero.
-APInt::mu APInt::magicu(unsigned LeadingZeros) const {
- const APInt& d = *this;
- unsigned p;
- APInt nc, delta, q1, r1, q2, r2;
- struct mu magu;
- magu.a = 0; // initialize "add" indicator
- APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
- APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
- APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
-
- nc = allOnes - (allOnes - d).urem(d);
- p = d.getBitWidth() - 1; // initialize p
- q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc
- r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc)
- q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d
- r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d)
- do {
- p = p + 1;
- if (r1.uge(nc - r1)) {
- q1 = q1 + q1 + 1; // update q1
- r1 = r1 + r1 - nc; // update r1
- }
- else {
- q1 = q1+q1; // update q1
- r1 = r1+r1; // update r1
- }
- if ((r2 + 1).uge(d - r2)) {
- if (q2.uge(signedMax)) magu.a = 1;
- q2 = q2+q2 + 1; // update q2
- r2 = r2+r2 + 1 - d; // update r2
- }
- else {
- if (q2.uge(signedMin)) magu.a = 1;
- q2 = q2+q2; // update q2
- r2 = r2+r2 + 1; // update r2
- }
- delta = d - 1 - r2;
- } while (p < d.getBitWidth()*2 &&
- (q1.ult(delta) || (q1 == delta && r1 == 0)));
- magu.m = q2 + 1; // resulting magic number
- magu.s = p - d.getBitWidth(); // resulting shift
- return magu;
-}
-
/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
/// variables here have the same names as in the algorithm. Comments explain
@@ -1984,15 +1945,16 @@ APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
// MININT/-1 --> overflow.
- Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+ Overflow = isMinSignedValue() && RHS.isAllOnes();
return sdiv(RHS);
}
APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
APInt Res = *this * RHS;
- if (*this != 0 && RHS != 0)
- Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+ if (RHS != 0)
+ Overflow = Res.sdiv(RHS) != *this ||
+ (isMinSignedValue() && RHS.isAllOnes());
else
Overflow = false;
return Res;
@@ -2196,7 +2158,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
}
// First, check for a zero value and just short circuit the logic below.
- if (*this == 0) {
+ if (isZero()) {
while (*Prefix) {
Str.push_back(*Prefix);
++Prefix;
@@ -2305,55 +2267,51 @@ void APInt::print(raw_ostream &OS, bool isSigned) const {
static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
"Part width must be divisible by 2!");
-/* Some handy functions local to this file. */
-
-/* Returns the integer part with the least significant BITS set.
- BITS cannot be zero. */
+// Returns the integer part with the least significant BITS set.
+// BITS cannot be zero.
static inline APInt::WordType lowBitMask(unsigned bits) {
assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
-
return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
}
-/* Returns the value of the lower half of PART. */
+/// Returns the value of the lower half of PART.
static inline APInt::WordType lowHalf(APInt::WordType part) {
return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
}
-/* Returns the value of the upper half of PART. */
+/// Returns the value of the upper half of PART.
static inline APInt::WordType highHalf(APInt::WordType part) {
return part >> (APInt::APINT_BITS_PER_WORD / 2);
}
-/* Returns the bit number of the most significant set bit of a part.
- If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the most significant set bit of a part.
+/// If the input number has no bits set -1U is returned.
static unsigned partMSB(APInt::WordType value) {
return findLastSet(value, ZB_Max);
}
-/* Returns the bit number of the least significant set bit of a
- part. If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the least significant set bit of a part. If the
+/// input number has no bits set -1U is returned.
static unsigned partLSB(APInt::WordType value) {
return findFirstSet(value, ZB_Max);
}
-/* Sets the least significant part of a bignum to the input value, and
- zeroes out higher parts. */
+/// Sets the least significant part of a bignum to the input value, and zeroes
+/// out higher parts.
void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
assert(parts > 0);
-
dst[0] = part;
for (unsigned i = 1; i < parts; i++)
dst[i] = 0;
}
-/* Assign one bignum to another. */
+/// Assign one bignum to another.
void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
for (unsigned i = 0; i < parts; i++)
dst[i] = src[i];
}
-/* Returns true if a bignum is zero, false otherwise. */
+/// Returns true if a bignum is zero, false otherwise.
bool APInt::tcIsZero(const WordType *src, unsigned parts) {
for (unsigned i = 0; i < parts; i++)
if (src[i])
@@ -2362,28 +2320,27 @@ bool APInt::tcIsZero(const WordType *src, unsigned parts) {
return true;
}
-/* Extract the given bit of a bignum; returns 0 or 1. */
+/// Extract the given bit of a bignum; returns 0 or 1.
int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
return (parts[whichWord(bit)] & maskBit(bit)) != 0;
}
-/* Set the given bit of a bignum. */
+/// Set the given bit of a bignum.
void APInt::tcSetBit(WordType *parts, unsigned bit) {
parts[whichWord(bit)] |= maskBit(bit);
}
-/* Clears the given bit of a bignum. */
+/// Clears the given bit of a bignum.
void APInt::tcClearBit(WordType *parts, unsigned bit) {
parts[whichWord(bit)] &= ~maskBit(bit);
}
-/* Returns the bit number of the least significant set bit of a
- number. If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the least significant set bit of a number. If the
+/// input number has no bits set -1U is returned.
unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
for (unsigned i = 0; i < n; i++) {
if (parts[i] != 0) {
unsigned lsb = partLSB(parts[i]);
-
return lsb + i * APINT_BITS_PER_WORD;
}
}
@@ -2391,8 +2348,8 @@ unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
return -1U;
}
-/* Returns the bit number of the most significant set bit of a number.
- If the input number has no bits set -1U is returned. */
+/// Returns the bit number of the most significant set bit of a number.
+/// If the input number has no bits set -1U is returned.
unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
do {
--n;
@@ -2407,10 +2364,10 @@ unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
return -1U;
}
-/* Copy the bit vector of width srcBITS from SRC, starting at bit
- srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
- the least significant bit of DST. All high bits above srcBITS in
- DST are zero-filled. */
+/// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
+/// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
+/// significant bit of DST. All high bits above srcBITS in DST are zero-filled.
+/// */
void
APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
unsigned srcBits, unsigned srcLSB) {
@@ -2418,14 +2375,14 @@ APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
assert(dstParts <= dstCount);
unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
- tcAssign (dst, src + firstSrcPart, dstParts);
+ tcAssign(dst, src + firstSrcPart, dstParts);
unsigned shift = srcLSB % APINT_BITS_PER_WORD;
- tcShiftRight (dst, dstParts, shift);
+ tcShiftRight(dst, dstParts, shift);
- /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
- in DST. If this is less that srcBits, append the rest, else
- clear the high bits. */
+ // We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
+ // in DST. If this is less that srcBits, append the rest, else
+ // clear the high bits.
unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
if (n < srcBits) {
WordType mask = lowBitMask (srcBits - n);
@@ -2436,12 +2393,12 @@ APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
}
- /* Clear high parts. */
+ // Clear high parts.
while (dstParts < dstCount)
dst[dstParts++] = 0;
}
-/* DST += RHS + C where C is zero or one. Returns the carry flag. */
+//// DST += RHS + C where C is zero or one. Returns the carry flag.
APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
WordType c, unsigned parts) {
assert(c <= 1);
@@ -2476,7 +2433,7 @@ APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
return 1;
}
-/* DST -= RHS + C where C is zero or one. Returns the carry flag. */
+/// DST -= RHS + C where C is zero or one. Returns the carry flag.
APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
WordType c, unsigned parts) {
assert(c <= 1);
@@ -2515,47 +2472,39 @@ APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
return 1;
}
-/* Negate a bignum in-place. */
+/// Negate a bignum in-place.
void APInt::tcNegate(WordType *dst, unsigned parts) {
tcComplement(dst, parts);
tcIncrement(dst, parts);
}
-/* DST += SRC * MULTIPLIER + CARRY if add is true
- DST = SRC * MULTIPLIER + CARRY if add is false
-
- Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
- they must start at the same point, i.e. DST == SRC.
-
- If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
- returned. Otherwise DST is filled with the least significant
- DSTPARTS parts of the result, and if all of the omitted higher
- parts were zero return zero, otherwise overflow occurred and
- return one. */
+/// DST += SRC * MULTIPLIER + CARRY if add is true
+/// DST = SRC * MULTIPLIER + CARRY if add is false
+/// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC
+/// they must start at the same point, i.e. DST == SRC.
+/// If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+/// returned. Otherwise DST is filled with the least significant
+/// DSTPARTS parts of the result, and if all of the omitted higher
+/// parts were zero return zero, otherwise overflow occurred and
+/// return one.
int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
WordType multiplier, WordType carry,
unsigned srcParts, unsigned dstParts,
bool add) {
- /* Otherwise our writes of DST kill our later reads of SRC. */
+ // Otherwise our writes of DST kill our later reads of SRC.
assert(dst <= src || dst >= src + srcParts);
assert(dstParts <= srcParts + 1);
- /* N loops; minimum of dstParts and srcParts. */
+ // N loops; minimum of dstParts and srcParts.
unsigned n = std::min(dstParts, srcParts);
for (unsigned i = 0; i < n; i++) {
- WordType low, mid, high, srcPart;
-
- /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
-
- This cannot overflow, because
-
- (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
-
- which is less than n^2. */
-
- srcPart = src[i];
-
+ // [LOW, HIGH] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+ // This cannot overflow, because:
+ // (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+ // which is less than n^2.
+ WordType srcPart = src[i];
+ WordType low, mid, high;
if (multiplier == 0 || srcPart == 0) {
low = carry;
high = 0;
@@ -2577,14 +2526,14 @@ int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
high++;
low += mid;
- /* Now add carry. */
+ // Now add carry.
if (low + carry < low)
high++;
low += carry;
}
if (add) {
- /* And now DST[i], and store the new low part there. */
+ // And now DST[i], and store the new low part there.
if (low + dst[i] < low)
high++;
dst[i] += low;
@@ -2595,32 +2544,32 @@ int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
}
if (srcParts < dstParts) {
- /* Full multiplication, there is no overflow. */
+ // Full multiplication, there is no overflow.
assert(srcParts + 1 == dstParts);
dst[srcParts] = carry;
return 0;
}
- /* We overflowed if there is carry. */
+ // We overflowed if there is carry.
if (carry)
return 1;
- /* We would overflow if any significant unwritten parts would be
- non-zero. This is true if any remaining src parts are non-zero
- and the multiplier is non-zero. */
+ // We would overflow if any significant unwritten parts would be
+ // non-zero. This is true if any remaining src parts are non-zero
+ // and the multiplier is non-zero.
if (multiplier)
for (unsigned i = dstParts; i < srcParts; i++)
if (src[i])
return 1;
- /* We fitted in the narrow destination. */
+ // We fitted in the narrow destination.
return 0;
}
-/* DST = LHS * RHS, where DST has the same width as the operands and
- is filled with the least significant parts of the result. Returns
- one if overflow occurred, otherwise zero. DST must be disjoint
- from both operands. */
+/// DST = LHS * RHS, where DST has the same width as the operands and
+/// is filled with the least significant parts of the result. Returns
+/// one if overflow occurred, otherwise zero. DST must be disjoint
+/// from both operands.
int APInt::tcMultiply(WordType *dst, const WordType *lhs,
const WordType *rhs, unsigned parts) {
assert(dst != lhs && dst != rhs);
@@ -2640,7 +2589,7 @@ int APInt::tcMultiply(WordType *dst, const WordType *lhs,
void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
const WordType *rhs, unsigned lhsParts,
unsigned rhsParts) {
- /* Put the narrower number on the LHS for less loops below. */
+ // Put the narrower number on the LHS for less loops below.
if (lhsParts > rhsParts)
return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
@@ -2652,16 +2601,15 @@ void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
}
-/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
- Otherwise set LHS to LHS / RHS with the fractional part discarded,
- set REMAINDER to the remainder, return zero. i.e.
-
- OLD_LHS = RHS * LHS + REMAINDER
-
- SCRATCH is a bignum of the same size as the operands and result for
- use by the routine; its contents need not be initialized and are
- destroyed. LHS, REMAINDER and SCRATCH must be distinct.
-*/
+// If RHS is zero LHS and REMAINDER are left unchanged, return one.
+// Otherwise set LHS to LHS / RHS with the fractional part discarded,
+// set REMAINDER to the remainder, return zero. i.e.
+//
+// OLD_LHS = RHS * LHS + REMAINDER
+//
+// SCRATCH is a bignum of the same size as the operands and result for
+// use by the routine; its contents need not be initialized and are
+// destroyed. LHS, REMAINDER and SCRATCH must be distinct.
int APInt::tcDivide(WordType *lhs, const WordType *rhs,
WordType *remainder, WordType *srhs,
unsigned parts) {
@@ -2680,8 +2628,8 @@ int APInt::tcDivide(WordType *lhs, const WordType *rhs,
tcAssign(remainder, lhs, parts);
tcSet(lhs, 0, parts);
- /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
- the total. */
+ // Loop, subtracting SRHS if REMAINDER is greater and adding that to the
+ // total.
for (;;) {
int compare = tcCompare(remainder, srhs, parts);
if (compare >= 0) {
@@ -2756,31 +2704,7 @@ void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
}
-/* Bitwise and of two bignums. */
-void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] &= rhs[i];
-}
-
-/* Bitwise inclusive or of two bignums. */
-void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] |= rhs[i];
-}
-
-/* Bitwise exclusive or of two bignums. */
-void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] ^= rhs[i];
-}
-
-/* Complement a bignum in-place. */
-void APInt::tcComplement(WordType *dst, unsigned parts) {
- for (unsigned i = 0; i < parts; i++)
- dst[i] = ~dst[i];
-}
-
-/* Comparison (unsigned) of two bignums. */
+// Comparison (unsigned) of two bignums.
int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
unsigned parts) {
while (parts) {
@@ -2792,23 +2716,6 @@ int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
return 0;
}
-/* Set the least significant BITS bits of a bignum, clear the
- rest. */
-void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
- unsigned bits) {
- unsigned i = 0;
- while (bits > APINT_BITS_PER_WORD) {
- dst[i++] = ~(WordType) 0;
- bits -= APINT_BITS_PER_WORD;
- }
-
- if (bits)
- dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
-
- while (i < parts)
- dst[i++] = 0;
-}
-
APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
APInt::Rounding RM) {
// Currently udivrem always rounds down.
@@ -2819,7 +2726,7 @@ APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
case APInt::Rounding::UP: {
APInt Quo, Rem;
APInt::udivrem(A, B, Quo, Rem);
- if (Rem == 0)
+ if (Rem.isZero())
return Quo;
return Quo + 1;
}
@@ -2834,7 +2741,7 @@ APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
case APInt::Rounding::UP: {
APInt Quo, Rem;
APInt::sdivrem(A, B, Quo, Rem);
- if (Rem == 0)
+ if (Rem.isZero())
return Quo;
// This algorithm deals with arbitrary rounding mode used by sdivrem.
// We want to check whether the non-integer part of the mathematical value
@@ -2870,7 +2777,7 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
<< "x + " << C << ", rw:" << RangeWidth << '\n');
// Identify 0 as a (non)solution immediately.
- if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
+ if (C.sextOrTrunc(RangeWidth).isZero()) {
LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
return APInt(CoeffWidth, 0);
}
@@ -2932,7 +2839,7 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
assert(A.isStrictlyPositive());
APInt T = V.abs().urem(A);
- if (T.isNullValue())
+ if (T.isZero())
return V;
return V.isNegative() ? V+T : V+(A-T);
};
@@ -3016,7 +2923,7 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
// can be 0, but cannot be negative.
assert(X.isNonNegative() && "Solution should be non-negative");
- if (!InexactSQ && Rem.isNullValue()) {
+ if (!InexactSQ && Rem.isZero()) {
LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
return X;
}
@@ -3032,8 +2939,8 @@ llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
APInt VX = (A*X + B)*X + C;
APInt VY = VX + TwoA*X + A + B;
- bool SignChange = VX.isNegative() != VY.isNegative() ||
- VX.isNullValue() != VY.isNullValue();
+ bool SignChange =
+ VX.isNegative() != VY.isNegative() || VX.isZero() != VY.isZero();
// If the sign did not change between X and X+1, X is not a valid solution.
// This could happen when the actual (exact) roots don't have an integer
// between them, so they would both be contained between X and X+1.
@@ -3055,6 +2962,40 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
}
+APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth) {
+ unsigned OldBitWidth = A.getBitWidth();
+ assert((((OldBitWidth % NewBitWidth) == 0) ||
+ ((NewBitWidth % OldBitWidth) == 0)) &&
+ "One size should be a multiple of the other one. "
+ "Can't do fractional scaling.");
+
+ // Check for matching bitwidths.
+ if (OldBitWidth == NewBitWidth)
+ return A;
+
+ APInt NewA = APInt::getZero(NewBitWidth);
+
+ // Check for null input.
+ if (A.isZero())
+ return NewA;
+
+ if (NewBitWidth > OldBitWidth) {
+ // Repeat bits.
+ unsigned Scale = NewBitWidth / OldBitWidth;
+ for (unsigned i = 0; i != OldBitWidth; ++i)
+ if (A[i])
+ NewA.setBits(i * Scale, (i + 1) * Scale);
+ } else {
+ // Merge bits - if any old bit is set, then set scale equivalent new bit.
+ unsigned Scale = OldBitWidth / NewBitWidth;
+ for (unsigned i = 0; i != NewBitWidth; ++i)
+ if (!A.extractBits(Scale, i * Scale).isZero())
+ NewA.setBit(i);
+ }
+
+ return NewA;
+}
+
/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
/// with the integer held in IntVal.
void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
diff --git a/llvm/lib/Support/ARMTargetParser.cpp b/llvm/lib/Support/ARMTargetParser.cpp
index 94b48df27993..4405ed176fe2 100644
--- a/llvm/lib/Support/ARMTargetParser.cpp
+++ b/llvm/lib/Support/ARMTargetParser.cpp
@@ -82,6 +82,10 @@ unsigned ARM::parseArchVersion(StringRef Arch) {
case ArchKind::ARMV8MMainline:
case ArchKind::ARMV8_1MMainline:
return 8;
+ case ArchKind::ARMV9A:
+ case ArchKind::ARMV9_1A:
+ case ArchKind::ARMV9_2A:
+ return 9;
case ArchKind::INVALID:
return 0;
}
@@ -113,6 +117,9 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
case ArchKind::ARMV8_5A:
case ArchKind::ARMV8_6A:
case ArchKind::ARMV8_7A:
+ case ArchKind::ARMV9A:
+ case ArchKind::ARMV9_1A:
+ case ArchKind::ARMV9_2A:
return ProfileKind::A;
case ArchKind::ARMV2:
case ArchKind::ARMV2A:
@@ -158,6 +165,9 @@ StringRef ARM::getArchSynonym(StringRef Arch) {
.Case("v8.6a", "v8.6-a")
.Case("v8.7a", "v8.7-a")
.Case("v8r", "v8-r")
+ .Cases("v9", "v9a", "v9-a")
+ .Case("v9.1a", "v9.1-a")
+ .Case("v9.2a", "v9.2-a")
.Case("v8m.base", "v8-m.base")
.Case("v8m.main", "v8-m.main")
.Case("v8.1m.main", "v8.1-m.main")
@@ -297,7 +307,7 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
else if (A.startswith("aarch64")) {
offset = 7;
// AArch64 uses "_be", not "eb" suffix.
- if (A.find("eb") != StringRef::npos)
+ if (A.contains("eb"))
return Error;
if (A.substr(offset, 3) == "_be")
offset += 3;
@@ -323,7 +333,7 @@ StringRef ARM::getCanonicalArchName(StringRef Arch) {
if (A.size() >= 2 && (A[0] != 'v' || !std::isdigit(A[1])))
return Error;
// Can't have an extra 'eb'.
- if (A.find("eb") != StringRef::npos)
+ if (A.contains("eb"))
return Error;
}
diff --git a/llvm/lib/Support/BinaryStreamReader.cpp b/llvm/lib/Support/BinaryStreamReader.cpp
index a0434bdc6115..2fe450db11dd 100644
--- a/llvm/lib/Support/BinaryStreamReader.cpp
+++ b/llvm/lib/Support/BinaryStreamReader.cpp
@@ -72,10 +72,10 @@ Error BinaryStreamReader::readSLEB128(int64_t &Dest) {
}
Error BinaryStreamReader::readCString(StringRef &Dest) {
- uint32_t OriginalOffset = getOffset();
- uint32_t FoundOffset = 0;
+ uint64_t OriginalOffset = getOffset();
+ uint64_t FoundOffset = 0;
while (true) {
- uint32_t ThisOffset = getOffset();
+ uint64_t ThisOffset = getOffset();
ArrayRef<uint8_t> Buffer;
if (auto EC = readLongestContiguousChunk(Buffer))
return EC;
@@ -100,8 +100,8 @@ Error BinaryStreamReader::readCString(StringRef &Dest) {
}
Error BinaryStreamReader::readWideString(ArrayRef<UTF16> &Dest) {
- uint32_t Length = 0;
- uint32_t OriginalOffset = getOffset();
+ uint64_t Length = 0;
+ uint64_t OriginalOffset = getOffset();
const UTF16 *C;
while (true) {
if (auto EC = readObject(C))
@@ -110,7 +110,7 @@ Error BinaryStreamReader::readWideString(ArrayRef<UTF16> &Dest) {
break;
++Length;
}
- uint32_t NewOffset = getOffset();
+ uint64_t NewOffset = getOffset();
setOffset(OriginalOffset);
if (auto EC = readArray(Dest, Length))
@@ -145,7 +145,7 @@ Error BinaryStreamReader::readSubstream(BinarySubstreamRef &Ref,
return readStreamRef(Ref.StreamData, Length);
}
-Error BinaryStreamReader::skip(uint32_t Amount) {
+Error BinaryStreamReader::skip(uint64_t Amount) {
if (Amount > bytesRemaining())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
Offset += Amount;
@@ -166,7 +166,7 @@ uint8_t BinaryStreamReader::peek() const {
}
std::pair<BinaryStreamReader, BinaryStreamReader>
-BinaryStreamReader::split(uint32_t Off) const {
+BinaryStreamReader::split(uint64_t Off) const {
assert(getLength() >= Off);
BinaryStreamRef First = Stream.drop_front(Offset);
diff --git a/llvm/lib/Support/BinaryStreamRef.cpp b/llvm/lib/Support/BinaryStreamRef.cpp
index 53e71baad57a..6d79d95e1bf0 100644
--- a/llvm/lib/Support/BinaryStreamRef.cpp
+++ b/llvm/lib/Support/BinaryStreamRef.cpp
@@ -21,15 +21,15 @@ public:
llvm::support::endianness getEndian() const override {
return BBS.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return BBS.getLength(); }
+ uint64_t getLength() override { return BBS.getLength(); }
private:
BinaryByteStream BBS;
@@ -44,17 +44,17 @@ public:
llvm::support::endianness getEndian() const override {
return BBS.getEndian();
}
- Error readBytes(uint32_t Offset, uint32_t Size,
+ Error readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readBytes(Offset, Size, Buffer);
}
- Error readLongestContiguousChunk(uint32_t Offset,
+ Error readLongestContiguousChunk(uint64_t Offset,
ArrayRef<uint8_t> &Buffer) override {
return BBS.readLongestContiguousChunk(Offset, Buffer);
}
- uint32_t getLength() override { return BBS.getLength(); }
+ uint64_t getLength() override { return BBS.getLength(); }
- Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) override {
+ Error writeBytes(uint64_t Offset, ArrayRef<uint8_t> Data) override {
return BBS.writeBytes(Offset, Data);
}
Error commit() override { return BBS.commit(); }
@@ -66,8 +66,8 @@ private:
BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream)
: BinaryStreamRefBase(Stream) {}
-BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint32_t Offset,
- Optional<uint32_t> Length)
+BinaryStreamRef::BinaryStreamRef(BinaryStream &Stream, uint64_t Offset,
+ Optional<uint64_t> Length)
: BinaryStreamRefBase(Stream, Offset, Length) {}
BinaryStreamRef::BinaryStreamRef(ArrayRef<uint8_t> Data, endianness Endian)
: BinaryStreamRefBase(std::make_shared<ArrayRefImpl>(Data, Endian), 0,
@@ -76,7 +76,7 @@ BinaryStreamRef::BinaryStreamRef(StringRef Data, endianness Endian)
: BinaryStreamRef(makeArrayRef(Data.bytes_begin(), Data.bytes_end()),
Endian) {}
-Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size,
+Error BinaryStreamRef::readBytes(uint64_t Offset, uint64_t Size,
ArrayRef<uint8_t> &Buffer) const {
if (auto EC = checkOffsetForRead(Offset, Size))
return EC;
@@ -84,7 +84,7 @@ Error BinaryStreamRef::readBytes(uint32_t Offset, uint32_t Size,
}
Error BinaryStreamRef::readLongestContiguousChunk(
- uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
+ uint64_t Offset, ArrayRef<uint8_t> &Buffer) const {
if (auto EC = checkOffsetForRead(Offset, 1))
return EC;
@@ -94,7 +94,7 @@ Error BinaryStreamRef::readLongestContiguousChunk(
// This StreamRef might refer to a smaller window over a larger stream. In
// that case we will have read out more bytes than we should return, because
// we should not read past the end of the current view.
- uint32_t MaxLength = getLength() - Offset;
+ uint64_t MaxLength = getLength() - Offset;
if (Buffer.size() > MaxLength)
Buffer = Buffer.slice(0, MaxLength);
return Error::success();
@@ -104,8 +104,8 @@ WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream)
: BinaryStreamRefBase(Stream) {}
WritableBinaryStreamRef::WritableBinaryStreamRef(WritableBinaryStream &Stream,
- uint32_t Offset,
- Optional<uint32_t> Length)
+ uint64_t Offset,
+ Optional<uint64_t> Length)
: BinaryStreamRefBase(Stream, Offset, Length) {}
WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
@@ -113,8 +113,7 @@ WritableBinaryStreamRef::WritableBinaryStreamRef(MutableArrayRef<uint8_t> Data,
: BinaryStreamRefBase(std::make_shared<MutableArrayRefImpl>(Data, Endian),
0, Data.size()) {}
-
-Error WritableBinaryStreamRef::writeBytes(uint32_t Offset,
+Error WritableBinaryStreamRef::writeBytes(uint64_t Offset,
ArrayRef<uint8_t> Data) const {
if (auto EC = checkOffsetForWrite(Offset, Data.size()))
return EC;
diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp
index 986e18da281d..8c9efa0ed9a9 100644
--- a/llvm/lib/Support/BinaryStreamWriter.cpp
+++ b/llvm/lib/Support/BinaryStreamWriter.cpp
@@ -62,7 +62,7 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref) {
return writeStreamRef(Ref, Ref.getLength());
}
-Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) {
+Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint64_t Length) {
BinaryStreamReader SrcReader(Ref.slice(0, Length));
// This is a bit tricky. If we just call readBytes, we are requiring that it
// return us the entire stream as a contiguous buffer. There is no guarantee
@@ -80,7 +80,7 @@ Error BinaryStreamWriter::writeStreamRef(BinaryStreamRef Ref, uint32_t Length) {
}
std::pair<BinaryStreamWriter, BinaryStreamWriter>
-BinaryStreamWriter::split(uint32_t Off) const {
+BinaryStreamWriter::split(uint64_t Off) const {
assert(getLength() >= Off);
WritableBinaryStreamRef First = Stream.drop_front(Offset);
@@ -93,7 +93,7 @@ BinaryStreamWriter::split(uint32_t Off) const {
}
Error BinaryStreamWriter::padToAlignment(uint32_t Align) {
- uint32_t NewOffset = alignTo(Offset, Align);
+ uint64_t NewOffset = alignTo(Offset, Align);
if (NewOffset > getLength())
return make_error<BinaryStreamError>(stream_error_code::stream_too_short);
while (Offset < NewOffset)
diff --git a/llvm/lib/LTO/Caching.cpp b/llvm/lib/Support/Caching.cpp
index 75a89e729f43..a2fe37a26617 100644
--- a/llvm/lib/LTO/Caching.cpp
+++ b/llvm/lib/Support/Caching.cpp
@@ -1,4 +1,4 @@
-//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===//
+//===-Caching.cpp - LLVM Local File Cache ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,18 +6,17 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the Caching for ThinLTO.
+// This file implements the localCache function, which simplifies creating,
+// adding to, and querying a local file system cache. localCache takes care of
+// periodically pruning older files from the cache using a CachePruningPolicy.
//
//===----------------------------------------------------------------------===//
-#include "llvm/LTO/Caching.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/raw_ostream.h"
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
@@ -26,14 +25,21 @@
#endif
using namespace llvm;
-using namespace llvm::lto;
-Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
- AddBufferFn AddBuffer) {
- if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPath))
+Expected<FileCache> llvm::localCache(Twine CacheNameRef,
+ Twine TempFilePrefixRef,
+ Twine CacheDirectoryPathRef,
+ AddBufferFn AddBuffer) {
+ if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPathRef))
return errorCodeToError(EC);
- return [=](unsigned Task, StringRef Key) -> AddStreamFn {
+ // Create local copies which are safely captured-by-copy in lambdas
+ SmallString<64> CacheName, TempFilePrefix, CacheDirectoryPath;
+ CacheNameRef.toVector(CacheName);
+ TempFilePrefixRef.toVector(TempFilePrefix);
+ CacheDirectoryPathRef.toVector(CacheDirectoryPath);
+
+ return [=](unsigned Task, StringRef Key) -> Expected<AddStreamFn> {
// This choice of file name allows the cache to be pruned (see pruneCache()
// in include/llvm/Support/CachePruning.h).
SmallString<64> EntryPath;
@@ -65,12 +71,12 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
// Since the file is probably being deleted we handle it in the same way as
// if the file did not exist at all.
if (EC != errc::no_such_file_or_directory && EC != errc::permission_denied)
- report_fatal_error(Twine("Failed to open cache file ") + EntryPath +
- ": " + EC.message() + "\n");
+ return createStringError(EC, Twine("Failed to open cache file ") +
+ EntryPath + ": " + EC.message() + "\n");
- // This native object stream is responsible for commiting the resulting
- // file to the cache and calling AddBuffer to add it to the link.
- struct CacheStream : NativeObjectStream {
+ // This file stream is responsible for commiting the resulting file to the
+ // cache and calling AddBuffer to add it to the link.
+ struct CacheStream : CachedFileStream {
AddBufferFn AddBuffer;
sys::fs::TempFile TempFile;
std::string EntryPath;
@@ -79,11 +85,14 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddBufferFn AddBuffer,
sys::fs::TempFile TempFile, std::string EntryPath,
unsigned Task)
- : NativeObjectStream(std::move(OS)), AddBuffer(std::move(AddBuffer)),
+ : CachedFileStream(std::move(OS)), AddBuffer(std::move(AddBuffer)),
TempFile(std::move(TempFile)), EntryPath(std::move(EntryPath)),
Task(Task) {}
~CacheStream() {
+ // TODO: Manually commit rather than using non-trivial destructor,
+ // allowing to replace report_fatal_errors with a return Error.
+
// Make sure the stream is closed before committing it.
OS.reset();
@@ -131,16 +140,17 @@ Expected<NativeObjectCache> lto::localCache(StringRef CacheDirectoryPath,
}
};
- return [=](size_t Task) -> std::unique_ptr<NativeObjectStream> {
+ return [=](size_t Task) -> Expected<std::unique_ptr<CachedFileStream>> {
// Write to a temporary to avoid race condition
SmallString<64> TempFilenameModel;
- sys::path::append(TempFilenameModel, CacheDirectoryPath, "Thin-%%%%%%.tmp.o");
+ sys::path::append(TempFilenameModel, CacheDirectoryPath,
+ TempFilePrefix + "-%%%%%%.tmp.o");
Expected<sys::fs::TempFile> Temp = sys::fs::TempFile::create(
TempFilenameModel, sys::fs::owner_read | sys::fs::owner_write);
- if (!Temp) {
- errs() << "Error: " << toString(Temp.takeError()) << "\n";
- report_fatal_error("ThinLTO: Can't get a temporary file");
- }
+ if (!Temp)
+ return createStringError(errc::io_error,
+ toString(Temp.takeError()) + ": " + CacheName +
+ ": Can't get a temporary file");
// This CacheStream will move the temporary file into the cache when done.
return std::make_unique<CacheStream>(
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 4ae3ad4c2453..e64934aa90cc 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1321,12 +1321,20 @@ bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
Errs, LongOptionsUseDoubleDash);
}
+/// Reset all options at least once, so that we can parse different options.
void CommandLineParser::ResetAllOptionOccurrences() {
- // So that we can parse different command lines multiple times in succession
- // we reset all option values to look like they have never been seen before.
+ // Reset all option values to look like they have never been seen before.
+ // Options might be reset twice (they can be reference in both OptionsMap
+ // and one of the other members), but that does not harm.
for (auto *SC : RegisteredSubCommands) {
for (auto &O : SC->OptionsMap)
O.second->reset();
+ for (Option *O : SC->PositionalOpts)
+ O->reset();
+ for (Option *O : SC->SinkOpts)
+ O->reset();
+ if (SC->ConsumeAfterOpt)
+ SC->ConsumeAfterOpt->reset();
}
}
@@ -2633,6 +2641,7 @@ void cl::AddExtraVersionPrinter(VersionPrinterTy func) {
}
StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {
+ initCommonOptions();
auto &Subs = GlobalParser->RegisteredSubCommands;
(void)Subs;
assert(is_contained(Subs, &Sub));
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index 433d99df5932..b6aaf373a522 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -428,8 +428,7 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
#endif // !_MSC_VER
-LLVM_ATTRIBUTE_NORETURN
-void CrashRecoveryContext::HandleExit(int RetCode) {
+[[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) {
#if defined(_WIN32)
// SEH and VEH
::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
diff --git a/llvm/lib/Support/DebugOptions.h b/llvm/lib/Support/DebugOptions.h
index 4d5250649f6a..75e557d7d8d7 100644
--- a/llvm/lib/Support/DebugOptions.h
+++ b/llvm/lib/Support/DebugOptions.h
@@ -26,4 +26,4 @@ void initWithColorOptions();
void initDebugOptions();
void initRandomSeedOptions();
-} // namespace llvm \ No newline at end of file
+} // namespace llvm
diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp
new file mode 100644
index 000000000000..077629670e40
--- /dev/null
+++ b/llvm/lib/Support/DivisionByConstantInfo.cpp
@@ -0,0 +1,107 @@
+//===----- DivisonByConstantInfo.cpp - division by constant -*- C++ -*-----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file implements support for optimizing divisions by a constant
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DivisionByConstantInfo.h"
+
+using namespace llvm;
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts. Requires that
+/// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., Chapter 10.
+SignedDivisionByConstantInfo SignedDivisionByConstantInfo::get(const APInt &D) {
+ unsigned P;
+ APInt AD, ANC, Delta, Q1, R1, Q2, R2, T;
+ APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth());
+ struct SignedDivisionByConstantInfo Retval;
+
+ AD = D.abs();
+ T = SignedMin + (D.lshr(D.getBitWidth() - 1));
+ ANC = T - 1 - T.urem(AD); // absolute value of NC
+ P = D.getBitWidth() - 1; // initialize P
+ Q1 = SignedMin.udiv(ANC); // initialize Q1 = 2P/abs(NC)
+ R1 = SignedMin - Q1 * ANC; // initialize R1 = rem(2P,abs(NC))
+ Q2 = SignedMin.udiv(AD); // initialize Q2 = 2P/abs(D)
+ R2 = SignedMin - Q2 * AD; // initialize R2 = rem(2P,abs(D))
+ do {
+ P = P + 1;
+ Q1 = Q1 << 1; // update Q1 = 2P/abs(NC)
+ R1 = R1 << 1; // update R1 = rem(2P/abs(NC))
+ if (R1.uge(ANC)) { // must be unsigned comparison
+ Q1 = Q1 + 1;
+ R1 = R1 - ANC;
+ }
+ Q2 = Q2 << 1; // update Q2 = 2P/abs(D)
+ R2 = R2 << 1; // update R2 = rem(2P/abs(D))
+ if (R2.uge(AD)) { // must be unsigned comparison
+ Q2 = Q2 + 1;
+ R2 = R2 - AD;
+ }
+ Delta = AD - R2;
+ } while (Q1.ult(Delta) || (Q1 == Delta && R1 == 0));
+
+ Retval.Magic = Q2 + 1;
+ if (D.isNegative())
+ Retval.Magic = -Retval.Magic; // resulting magic number
+ Retval.ShiftAmount = P - D.getBitWidth(); // resulting shift
+ return Retval;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+/// LeadingZeros can be used to simplify the calculation if the upper bits
+/// of the divided value are known zero.
+UnsignedDivisonByConstantInfo
+UnsignedDivisonByConstantInfo::get(const APInt &D, unsigned LeadingZeros) {
+ unsigned P;
+ APInt NC, Delta, Q1, R1, Q2, R2;
+ struct UnsignedDivisonByConstantInfo Retval;
+ Retval.IsAdd = 0; // initialize "add" indicator
+ APInt AllOnes = APInt::getAllOnes(D.getBitWidth()).lshr(LeadingZeros);
+ APInt SignedMin = APInt::getSignedMinValue(D.getBitWidth());
+ APInt SignedMax = APInt::getSignedMaxValue(D.getBitWidth());
+
+ NC = AllOnes - (AllOnes - D).urem(D);
+ P = D.getBitWidth() - 1; // initialize P
+ Q1 = SignedMin.udiv(NC); // initialize Q1 = 2P/NC
+ R1 = SignedMin - Q1 * NC; // initialize R1 = rem(2P,NC)
+ Q2 = SignedMax.udiv(D); // initialize Q2 = (2P-1)/D
+ R2 = SignedMax - Q2 * D; // initialize R2 = rem((2P-1),D)
+ do {
+ P = P + 1;
+ if (R1.uge(NC - R1)) {
+ Q1 = Q1 + Q1 + 1; // update Q1
+ R1 = R1 + R1 - NC; // update R1
+ } else {
+ Q1 = Q1 + Q1; // update Q1
+ R1 = R1 + R1; // update R1
+ }
+ if ((R2 + 1).uge(D - R2)) {
+ if (Q2.uge(SignedMax))
+ Retval.IsAdd = 1;
+ Q2 = Q2 + Q2 + 1; // update Q2
+ R2 = R2 + R2 + 1 - D; // update R2
+ } else {
+ if (Q2.uge(SignedMin))
+ Retval.IsAdd = 1;
+ Q2 = Q2 + Q2; // update Q2
+ R2 = R2 + R2 + 1; // update R2
+ }
+ Delta = D - 1 - R2;
+ } while (P < D.getBitWidth() * 2 &&
+ (Q1.ult(Delta) || (Q1 == Delta && R1 == 0)));
+ Retval.Magic = Q2 + 1; // resulting magic number
+ Retval.ShiftAmount = P - D.getBitWidth(); // resulting shift
+ return Retval;
+}
diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp
index e7ab4387dfd1..8bfc8ee7a8cc 100644
--- a/llvm/lib/Support/Error.cpp
+++ b/llvm/lib/Support/Error.cpp
@@ -80,8 +80,11 @@ std::error_code inconvertibleErrorCode() {
}
std::error_code FileError::convertToErrorCode() const {
- return std::error_code(static_cast<int>(ErrorErrorCode::FileError),
- *ErrorErrorCat);
+ std::error_code NestedEC = Err->convertToErrorCode();
+ if (NestedEC == inconvertibleErrorCode())
+ return std::error_code(static_cast<int>(ErrorErrorCode::FileError),
+ *ErrorErrorCat);
+ return NestedEC;
}
Error errorCodeToError(std::error_code EC) {
@@ -96,7 +99,7 @@ std::error_code errorToErrorCode(Error Err) {
EC = EI.convertToErrorCode();
});
if (EC == inconvertibleErrorCode())
- report_fatal_error(EC.message());
+ report_fatal_error(Twine(EC.message()));
return EC;
}
@@ -144,7 +147,7 @@ void report_fatal_error(Error Err, bool GenCrashDiag) {
raw_string_ostream ErrStream(ErrMsg);
logAllUnhandledErrors(std::move(Err), ErrStream);
}
- report_fatal_error(ErrMsg);
+ report_fatal_error(Twine(ErrMsg));
}
} // end namespace llvm
diff --git a/llvm/lib/Support/ErrorHandling.cpp b/llvm/lib/Support/ErrorHandling.cpp
index ce6344284f06..80c0e00439a5 100644
--- a/llvm/lib/Support/ErrorHandling.cpp
+++ b/llvm/lib/Support/ErrorHandling.cpp
@@ -83,10 +83,6 @@ void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
report_fatal_error(Twine(Reason), GenCrashDiag);
}
-void llvm::report_fatal_error(const std::string &Reason, bool GenCrashDiag) {
- report_fatal_error(Twine(Reason), GenCrashDiag);
-}
-
void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) {
report_fatal_error(Twine(Reason), GenCrashDiag);
}
@@ -105,7 +101,7 @@ void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) {
}
if (handler) {
- handler(handlerData, Reason.str(), GenCrashDiag);
+ handler(handlerData, Reason.str().c_str(), GenCrashDiag);
} else {
// Blast the result out to stderr. We don't try hard to make sure this
// succeeds (e.g. handling EINTR) and we can't use errs() here because
@@ -218,11 +214,11 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file,
#endif
}
-static void bindingsErrorHandler(void *user_data, const std::string& reason,
+static void bindingsErrorHandler(void *user_data, const char *reason,
bool gen_crash_diag) {
LLVMFatalErrorHandler handler =
LLVM_EXTENSION reinterpret_cast<LLVMFatalErrorHandler>(user_data);
- handler(reason.c_str());
+ handler(reason);
}
void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) {
@@ -247,7 +243,10 @@ std::error_code llvm::mapWindowsError(unsigned EV) {
switch (EV) {
MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied);
MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists);
+ MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
+ MAP_ERR_TO_COND(ERROR_BAD_PATHNAME, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device);
+ MAP_ERR_TO_COND(ERROR_BROKEN_PIPE, broken_pipe);
MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long);
MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy);
@@ -269,18 +268,20 @@ std::error_code llvm::mapWindowsError(unsigned EV) {
MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported);
MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument);
MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument);
+ MAP_ERR_TO_COND(ERROR_INVALID_PARAMETER, invalid_argument);
MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available);
MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available);
MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument);
MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied);
MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again);
+ MAP_ERR_TO_COND(ERROR_NOT_SUPPORTED, not_supported);
MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error);
MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy);
MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory);
MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory);
- MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory);
MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error);
+ MAP_ERR_TO_COND(ERROR_REPARSE_TAG_INVALID, invalid_argument);
MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again);
MAP_ERR_TO_COND(ERROR_SEEK, io_error);
MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied);
diff --git a/llvm/lib/Support/ExtensibleRTTI.cpp b/llvm/lib/Support/ExtensibleRTTI.cpp
index 1c98d1bb8feb..a6a5c196fb35 100644
--- a/llvm/lib/Support/ExtensibleRTTI.cpp
+++ b/llvm/lib/Support/ExtensibleRTTI.cpp
@@ -1,9 +1,8 @@
//===----- lib/Support/ExtensibleRTTI.cpp - ExtensibleRTTI utilities ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp
index e4a86bb69de4..dbe28e56b2c3 100644
--- a/llvm/lib/Support/FileUtilities.cpp
+++ b/llvm/lib/Support/FileUtilities.cpp
@@ -300,8 +300,7 @@ llvm::Error llvm::writeFileAtomically(
std::function<llvm::Error(llvm::raw_ostream &)> Writer) {
SmallString<128> GeneratedUniqPath;
int TempFD;
- if (sys::fs::createUniqueFile(TempPathModel.str(), TempFD,
- GeneratedUniqPath)) {
+ if (sys::fs::createUniqueFile(TempPathModel, TempFD, GeneratedUniqPath)) {
return llvm::make_error<AtomicFileWriteError>(
atomic_write_error::failed_to_create_uniq_file);
}
@@ -319,8 +318,7 @@ llvm::Error llvm::writeFileAtomically(
atomic_write_error::output_stream_error);
}
- if (sys::fs::rename(/*from=*/GeneratedUniqPath.c_str(),
- /*to=*/FinalPath.str().c_str())) {
+ if (sys::fs::rename(/*from=*/GeneratedUniqPath, /*to=*/FinalPath)) {
return llvm::make_error<AtomicFileWriteError>(
atomic_write_error::failed_to_rename_temp_file);
}
diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp
index b41869aba95f..696e6b7a99d8 100644
--- a/llvm/lib/Support/GraphWriter.cpp
+++ b/llvm/lib/Support/GraphWriter.cpp
@@ -23,11 +23,12 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
-#include <system_error>
#include <string>
+#include <system_error>
#include <vector>
using namespace llvm;
@@ -94,11 +95,8 @@ StringRef llvm::DOT::getColorString(unsigned ColorNumber) {
static std::string replaceIllegalFilenameChars(std::string Filename,
const char ReplacementChar) {
-#ifdef _WIN32
- std::string IllegalChars = "\\/:?\"<>|";
-#else
- std::string IllegalChars = "/";
-#endif
+ std::string IllegalChars =
+ is_style_windows(sys::path::Style::native) ? "\\/:?\"<>|" : "/";
for (char IllegalChar : IllegalChars) {
std::replace(Filename.begin(), Filename.end(), IllegalChar,
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index f873ff06f1f7..7b14616f6fea 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -772,6 +772,22 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
break;
+ // Tigerlake:
+ case 0x8c:
+ case 0x8d:
+ CPU = "tigerlake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_TIGERLAKE;
+ break;
+
+ // Alderlake:
+ case 0x97:
+ case 0x9a:
+ CPU = "alderlake";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_ALDERLAKE;
+ break;
+
// Icelake Xeon:
case 0x6a:
case 0x6c:
@@ -1055,8 +1071,10 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_FMA);
if ((ECX >> 19) & 1)
setFeature(X86::FEATURE_SSE4_1);
- if ((ECX >> 20) & 1)
+ if ((ECX >> 20) & 1) {
setFeature(X86::FEATURE_SSE4_2);
+ setFeature(X86::FEATURE_CRC32);
+ }
if ((ECX >> 23) & 1)
setFeature(X86::FEATURE_POPCNT);
if ((ECX >> 25) & 1)
@@ -1338,6 +1356,16 @@ StringRef sys::getHostCPUName() {
return "generic";
}
}
+#elif defined(__riscv)
+StringRef sys::getHostCPUName() {
+#if __riscv_xlen == 64
+ return "generic-rv64";
+#elif __riscv_xlen == 32
+ return "generic-rv32";
+#else
+#error "Unhandled value of __riscv_xlen"
+#endif
+}
#else
StringRef sys::getHostCPUName() { return "generic"; }
namespace llvm {
@@ -1502,6 +1530,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["cx16"] = (ECX >> 13) & 1;
Features["sse4.1"] = (ECX >> 19) & 1;
Features["sse4.2"] = (ECX >> 20) & 1;
+ Features["crc32"] = Features["sse4.2"];
Features["movbe"] = (ECX >> 22) & 1;
Features["popcnt"] = (ECX >> 23) & 1;
Features["aes"] = (ECX >> 25) & 1;
@@ -1617,6 +1646,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
// For more info, see X86 ISA docs.
Features["pconfig"] = HasLeaf7 && ((EDX >> 18) & 1);
Features["amx-bf16"] = HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave;
+ Features["avx512fp16"] = HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save;
Features["amx-tile"] = HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave;
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
bool HasLeaf7Subleaf1 =
diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index dbfd673553f4..17b36ed51850 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -109,6 +109,7 @@ void Value::copyFrom(const Value &M) {
case T_Boolean:
case T_Double:
case T_Integer:
+ case T_UINT64:
memcpy(&Union, &M.Union, sizeof(Union));
break;
case T_StringRef:
@@ -133,6 +134,7 @@ void Value::moveFrom(const Value &&M) {
case T_Boolean:
case T_Double:
case T_Integer:
+ case T_UINT64:
memcpy(&Union, &M.Union, sizeof(Union));
break;
case T_StringRef:
@@ -159,6 +161,7 @@ void Value::destroy() {
case T_Boolean:
case T_Double:
case T_Integer:
+ case T_UINT64:
break;
case T_StringRef:
as<StringRef>().~StringRef();
@@ -750,6 +753,8 @@ void llvm::json::OStream::value(const Value &V) {
valueBegin();
if (V.Type == Value::T_Integer)
OS << *V.getAsInteger();
+ else if (V.Type == Value::T_UINT64)
+ OS << *V.getAsUINT64();
else
OS << format("%.*g", std::numeric_limits<double>::max_digits10,
*V.getAsNumber());
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index d997bd85f1e0..90483817c302 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -404,7 +404,7 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
// We only know that the absolute values's MSB will be zero if INT_MIN is
// poison, or there is a set bit that isn't the sign bit (otherwise it could
// be INT_MIN).
- if (IntMinIsPoison || (!One.isNullValue() && !One.isMinSignedValue()))
+ if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue()))
KnownAbs.Zero.setSignBit();
// FIXME: Handle known negative input?
@@ -412,10 +412,13 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
return KnownAbs;
}
-KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
+ bool SelfMultiply) {
unsigned BitWidth = LHS.getBitWidth();
assert(BitWidth == RHS.getBitWidth() && !LHS.hasConflict() &&
!RHS.hasConflict() && "Operand mismatch");
+ assert((!SelfMultiply || (LHS.One == RHS.One && LHS.Zero == RHS.Zero)) &&
+ "Self multiplication knownbits mismatch");
// Compute a conservative estimate for high known-0 bits.
unsigned LeadZ =
@@ -489,6 +492,14 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS) {
Res.Zero.setHighBits(LeadZ);
Res.Zero |= (~BottomKnown).getLoBits(ResultBitsKnown);
Res.One = BottomKnown.getLoBits(ResultBitsKnown);
+
+ // If we're self-multiplying then bit[1] is guaranteed to be zero.
+ if (SelfMultiply && BitWidth > 1) {
+ assert(Res.One[1] == 0 &&
+ "Self-multiplication failed Quadratic Reciprocity!");
+ Res.Zero.setBit(1);
+ }
+
return Res;
}
diff --git a/llvm/lib/Support/LockFileManager.cpp b/llvm/lib/Support/LockFileManager.cpp
index a2b56ab295c4..5fd52999adb5 100644
--- a/llvm/lib/Support/LockFileManager.cpp
+++ b/llvm/lib/Support/LockFileManager.cpp
@@ -35,7 +35,7 @@
#include <unistd.h>
#endif
-#if defined(__APPLE__) && defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && (__MAC_OS_X_VERSION_MIN_REQUIRED > 1050)
+#if defined(__APPLE__) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1050)
#define USE_OSX_GETHOSTUUID 1
#else
#define USE_OSX_GETHOSTUUID 0
diff --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp
index 5e0b076f176e..9dceb4d418cd 100644
--- a/llvm/lib/Support/MD5.cpp
+++ b/llvm/lib/Support/MD5.cpp
@@ -67,11 +67,11 @@
// SET reads 4 input bytes in little-endian byte order and stores them
// in a properly aligned word in host byte order.
#define SET(n) \
- (block[(n)] = \
- (MD5_u32plus) ptr[(n) * 4] | ((MD5_u32plus) ptr[(n) * 4 + 1] << 8) | \
- ((MD5_u32plus) ptr[(n) * 4 + 2] << 16) | \
- ((MD5_u32plus) ptr[(n) * 4 + 3] << 24))
-#define GET(n) (block[(n)])
+ (InternalState.block[(n)] = (MD5_u32plus)ptr[(n)*4] | \
+ ((MD5_u32plus)ptr[(n)*4 + 1] << 8) | \
+ ((MD5_u32plus)ptr[(n)*4 + 2] << 16) | \
+ ((MD5_u32plus)ptr[(n)*4 + 3] << 24))
+#define GET(n) (InternalState.block[(n)])
using namespace llvm;
@@ -85,10 +85,10 @@ const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
ptr = Data.data();
- a = this->a;
- b = this->b;
- c = this->c;
- d = this->d;
+ a = InternalState.a;
+ b = InternalState.b;
+ c = InternalState.c;
+ d = InternalState.d;
do {
saved_a = a;
@@ -176,10 +176,10 @@ const uint8_t *MD5::body(ArrayRef<uint8_t> Data) {
ptr += 64;
} while (Size -= 64);
- this->a = a;
- this->b = b;
- this->c = c;
- this->d = d;
+ InternalState.a = a;
+ InternalState.b = b;
+ InternalState.c = c;
+ InternalState.d = d;
return ptr;
}
@@ -193,10 +193,10 @@ void MD5::update(ArrayRef<uint8_t> Data) {
const uint8_t *Ptr = Data.data();
unsigned long Size = Data.size();
- saved_lo = lo;
- if ((lo = (saved_lo + Size) & 0x1fffffff) < saved_lo)
- hi++;
- hi += Size >> 29;
+ saved_lo = InternalState.lo;
+ if ((InternalState.lo = (saved_lo + Size) & 0x1fffffff) < saved_lo)
+ InternalState.hi++;
+ InternalState.hi += Size >> 29;
used = saved_lo & 0x3f;
@@ -204,14 +204,14 @@ void MD5::update(ArrayRef<uint8_t> Data) {
free = 64 - used;
if (Size < free) {
- memcpy(&buffer[used], Ptr, Size);
+ memcpy(&InternalState.buffer[used], Ptr, Size);
return;
}
- memcpy(&buffer[used], Ptr, free);
+ memcpy(&InternalState.buffer[used], Ptr, free);
Ptr = Ptr + free;
Size -= free;
- body(makeArrayRef(buffer, 64));
+ body(makeArrayRef(InternalState.buffer, 64));
}
if (Size >= 64) {
@@ -219,7 +219,7 @@ void MD5::update(ArrayRef<uint8_t> Data) {
Size &= 0x3f;
}
- memcpy(buffer, Ptr, Size);
+ memcpy(InternalState.buffer, Ptr, Size);
}
/// Add the bytes in the StringRef \p Str to the hash.
@@ -235,31 +235,48 @@ void MD5::update(StringRef Str) {
void MD5::final(MD5Result &Result) {
unsigned long used, free;
- used = lo & 0x3f;
+ used = InternalState.lo & 0x3f;
- buffer[used++] = 0x80;
+ InternalState.buffer[used++] = 0x80;
free = 64 - used;
if (free < 8) {
- memset(&buffer[used], 0, free);
- body(makeArrayRef(buffer, 64));
+ memset(&InternalState.buffer[used], 0, free);
+ body(makeArrayRef(InternalState.buffer, 64));
used = 0;
free = 64;
}
- memset(&buffer[used], 0, free - 8);
+ memset(&InternalState.buffer[used], 0, free - 8);
- lo <<= 3;
- support::endian::write32le(&buffer[56], lo);
- support::endian::write32le(&buffer[60], hi);
+ InternalState.lo <<= 3;
+ support::endian::write32le(&InternalState.buffer[56], InternalState.lo);
+ support::endian::write32le(&InternalState.buffer[60], InternalState.hi);
- body(makeArrayRef(buffer, 64));
+ body(makeArrayRef(InternalState.buffer, 64));
- support::endian::write32le(&Result[0], a);
- support::endian::write32le(&Result[4], b);
- support::endian::write32le(&Result[8], c);
- support::endian::write32le(&Result[12], d);
+ support::endian::write32le(&Result[0], InternalState.a);
+ support::endian::write32le(&Result[4], InternalState.b);
+ support::endian::write32le(&Result[8], InternalState.c);
+ support::endian::write32le(&Result[12], InternalState.d);
+}
+
+StringRef MD5::final() {
+ final(Result);
+ return StringRef(reinterpret_cast<char *>(Result.Bytes.data()),
+ Result.Bytes.size());
+}
+
+StringRef MD5::result() {
+ auto StateToRestore = InternalState;
+
+ auto Hash = final();
+
+ // Restore the state
+ InternalState = StateToRestore;
+
+ return Hash;
}
SmallString<32> MD5::MD5Result::digest() const {
diff --git a/llvm/lib/Support/MSP430AttributeParser.cpp b/llvm/lib/Support/MSP430AttributeParser.cpp
new file mode 100644
index 000000000000..a9948a158fc0
--- /dev/null
+++ b/llvm/lib/Support/MSP430AttributeParser.cpp
@@ -0,0 +1,53 @@
+//===-- MSP430AttributeParser.cpp - MSP430 Attribute Parser ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MSP430AttributeParser.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+using namespace llvm::MSP430Attrs;
+
+constexpr std::array<MSP430AttributeParser::DisplayHandler, 4>
+ MSP430AttributeParser::DisplayRoutines{
+ {{MSP430Attrs::TagISA, &MSP430AttributeParser::parseISA},
+ {MSP430Attrs::TagCodeModel, &MSP430AttributeParser::parseCodeModel},
+ {MSP430Attrs::TagDataModel, &MSP430AttributeParser::parseDataModel},
+ {MSP430Attrs::TagEnumSize, &MSP430AttributeParser::parseEnumSize}}};
+
+Error MSP430AttributeParser::parseISA(AttrType Tag) {
+ static const char *StringVals[] = {"None", "MSP430", "MSP430X"};
+ return parseStringAttribute("ISA", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::parseCodeModel(AttrType Tag) {
+ static const char *StringVals[] = {"None", "Small", "Large"};
+ return parseStringAttribute("Code Model", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::parseDataModel(AttrType Tag) {
+ static const char *StringVals[] = {"None", "Small", "Large", "Restricted"};
+ return parseStringAttribute("Data Model", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::parseEnumSize(AttrType Tag) {
+ static const char *StringVals[] = {"None", "Small", "Integer", "Don't Care"};
+ return parseStringAttribute("Enum Size", Tag, makeArrayRef(StringVals));
+}
+
+Error MSP430AttributeParser::handler(uint64_t Tag, bool &Handled) {
+ Handled = false;
+ for (const DisplayHandler &Disp : DisplayRoutines) {
+ if (uint64_t(Disp.Attribute) != Tag)
+ continue;
+ if (Error E = (this->*Disp.Routine)(static_cast<AttrType>(Tag)))
+ return E;
+ Handled = true;
+ break;
+ }
+ return Error::success();
+}
diff --git a/llvm/lib/Support/MSP430Attributes.cpp b/llvm/lib/Support/MSP430Attributes.cpp
new file mode 100644
index 000000000000..4483a6872559
--- /dev/null
+++ b/llvm/lib/Support/MSP430Attributes.cpp
@@ -0,0 +1,22 @@
+//===-- MSP430Attributes.cpp - MSP430 Attributes --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MSP430Attributes.h"
+
+using namespace llvm;
+using namespace llvm::MSP430Attrs;
+
+static constexpr TagNameItem TagData[] = {{TagISA, "Tag_ISA"},
+ {TagCodeModel, "Tag_Code_Model"},
+ {TagDataModel, "Tag_Data_Model"},
+ {TagEnumSize, "Tag_Enum_Size"}};
+
+constexpr TagNameMap MSP430AttributeTags{TagData};
+const TagNameMap &llvm::MSP430Attrs::getMSP430AttributeTags() {
+ return MSP430AttributeTags;
+}
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 9a2e1003da5a..71e3a1362f7e 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -151,7 +151,12 @@ static std::atomic<int> TaskGroupInstances;
// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario
// of nested parallel_for_each(), only the outermost one runs parallelly.
TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {}
-TaskGroup::~TaskGroup() { --TaskGroupInstances; }
+TaskGroup::~TaskGroup() {
+ // We must ensure that all the workloads have finished before decrementing the
+ // instances count.
+ L.sync();
+ --TaskGroupInstances;
+}
void TaskGroup::spawn(std::function<void()> F) {
if (Parallel) {
diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index a724ba2faf93..3957547dfaaa 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -12,6 +12,7 @@
#include "llvm/Support/Path.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Config/config.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
@@ -37,15 +38,16 @@ namespace {
using llvm::sys::path::Style;
inline Style real_style(Style style) {
-#ifdef _WIN32
- return (style == Style::posix) ? Style::posix : Style::windows;
-#else
- return (style == Style::windows) ? Style::windows : Style::posix;
-#endif
+ if (style != Style::native)
+ return style;
+ if (is_style_posix(style))
+ return Style::posix;
+ return LLVM_WINDOWS_PREFER_FORWARD_SLASH ? Style::windows_slash
+ : Style::windows_backslash;
}
inline const char *separators(Style style) {
- if (real_style(style) == Style::windows)
+ if (is_style_windows(style))
return "\\/";
return "/";
}
@@ -66,7 +68,7 @@ namespace {
if (path.empty())
return path;
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
// C:
if (path.size() >= 2 &&
std::isalpha(static_cast<unsigned char>(path[0])) && path[1] == ':')
@@ -98,7 +100,7 @@ namespace {
size_t pos = str.find_last_of(separators(style), str.size() - 1);
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
if (pos == StringRef::npos)
pos = str.find_last_of(':', str.size() - 2);
}
@@ -113,7 +115,7 @@ namespace {
// directory in str, it returns StringRef::npos.
size_t root_dir_start(StringRef str, Style style) {
// case "c:/"
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
if (str.size() > 2 && str[1] == ':' && is_separator(str[2], style))
return 2;
}
@@ -259,7 +261,7 @@ const_iterator &const_iterator::operator++() {
// Root dir.
if (was_net ||
// c:/
- (real_style(S) == Style::windows && Component.endswith(":"))) {
+ (is_style_windows(S) && Component.endswith(":"))) {
Component = Path.substr(Position, 1);
return *this;
}
@@ -348,7 +350,7 @@ StringRef root_path(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->endswith(":");
if (has_net || has_drive) {
if ((++pos != e) && is_separator((*pos)[0], style)) {
@@ -373,7 +375,7 @@ StringRef root_name(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->endswith(":");
if (has_net || has_drive) {
// just {C:,//net}, return the first component.
@@ -390,7 +392,7 @@ StringRef root_directory(StringRef path, Style style) {
if (b != e) {
bool has_net =
b->size() > 2 && is_separator((*b)[0], style) && (*b)[1] == (*b)[0];
- bool has_drive = (real_style(style) == Style::windows) && b->endswith(":");
+ bool has_drive = is_style_windows(style) && b->endswith(":");
if ((has_net || has_drive) &&
// {C:,//net}, skip to the next component.
@@ -497,7 +499,7 @@ void replace_extension(SmallVectorImpl<char> &path, const Twine &extension,
static bool starts_with(StringRef Path, StringRef Prefix,
Style style = Style::native) {
// Windows prefix matching : case and separator insensitive
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
if (Path.size() < Prefix.size())
return false;
for (size_t I = 0, E = Prefix.size(); I != E; ++I) {
@@ -548,8 +550,10 @@ void native(const Twine &path, SmallVectorImpl<char> &result, Style style) {
void native(SmallVectorImpl<char> &Path, Style style) {
if (Path.empty())
return;
- if (real_style(style) == Style::windows) {
- std::replace(Path.begin(), Path.end(), '/', '\\');
+ if (is_style_windows(style)) {
+ for (char &Ch : Path)
+ if (is_separator(Ch, style))
+ Ch = preferred_separator(style);
if (Path[0] == '~' && (Path.size() == 1 || is_separator(Path[1], style))) {
SmallString<128> PathHome;
home_directory(PathHome);
@@ -557,14 +561,12 @@ void native(SmallVectorImpl<char> &Path, Style style) {
Path = PathHome;
}
} else {
- for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI)
- if (*PI == '\\')
- *PI = '/';
+ std::replace(Path.begin(), Path.end(), '\\', '/');
}
}
std::string convert_to_slash(StringRef path, Style style) {
- if (real_style(style) != Style::windows)
+ if (is_style_posix(style))
return std::string(path);
std::string s = path.str();
@@ -599,7 +601,7 @@ StringRef extension(StringRef path, Style style) {
bool is_separator(char value, Style style) {
if (value == '/')
return true;
- if (real_style(style) == Style::windows)
+ if (is_style_windows(style))
return value == '\\';
return false;
}
@@ -671,8 +673,7 @@ bool is_absolute(const Twine &path, Style style) {
StringRef p = path.toStringRef(path_storage);
bool rootDir = has_root_directory(p, style);
- bool rootName =
- (real_style(style) != Style::windows) || has_root_name(p, style);
+ bool rootName = is_style_posix(style) || has_root_name(p, style);
return rootDir && rootName;
}
@@ -686,7 +687,7 @@ bool is_absolute_gnu(const Twine &path, Style style) {
if (!p.empty() && is_separator(p.front(), style))
return true;
- if (real_style(style) == Style::windows) {
+ if (is_style_windows(style)) {
// Handle drive letter pattern (a character followed by ':') on Windows.
if (p.size() >= 2 && (p[0] && p[1] == ':'))
return true;
@@ -906,8 +907,7 @@ void make_absolute(const Twine &current_directory,
bool rootName = path::has_root_name(p);
// Already absolute.
- if ((rootName || real_style(Style::native) != Style::windows) &&
- rootDirectory)
+ if ((rootName || is_style_posix(Style::native)) && rootDirectory)
return;
// All of the following conditions will need the current directory.
@@ -1190,6 +1190,10 @@ TempFile &TempFile::operator=(TempFile &&Other) {
FD = Other.FD;
Other.Done = true;
Other.FD = -1;
+#ifdef _WIN32
+ RemoveOnClose = Other.RemoveOnClose;
+ Other.RemoveOnClose = false;
+#endif
return *this;
}
@@ -1204,20 +1208,23 @@ Error TempFile::discard() {
FD = -1;
#ifdef _WIN32
- // On windows closing will remove the file.
- TmpName = "";
- return Error::success();
+ // On Windows, closing will remove the file, if we set the delete
+ // disposition. If not, remove it manually.
+ bool Remove = RemoveOnClose;
#else
- // Always try to close and remove.
+ // Always try to remove the file.
+ bool Remove = true;
+#endif
std::error_code RemoveEC;
- if (!TmpName.empty()) {
+ if (Remove && !TmpName.empty()) {
RemoveEC = fs::remove(TmpName);
sys::DontRemoveFileOnSignal(TmpName);
if (!RemoveEC)
TmpName = "";
+ } else {
+ TmpName = "";
}
return errorCodeToError(RemoveEC);
-#endif
}
Error TempFile::keep(const Twine &Name) {
@@ -1228,19 +1235,26 @@ Error TempFile::keep(const Twine &Name) {
// If we can't cancel the delete don't rename.
auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
std::error_code RenameEC = setDeleteDisposition(H, false);
+ bool ShouldDelete = false;
if (!RenameEC) {
RenameEC = rename_handle(H, Name);
// If rename failed because it's cross-device, copy instead
if (RenameEC ==
std::error_code(ERROR_NOT_SAME_DEVICE, std::system_category())) {
RenameEC = copy_file(TmpName, Name);
- setDeleteDisposition(H, true);
+ ShouldDelete = true;
}
}
- // If we can't rename, discard the temporary file.
+ // If we can't rename or copy, discard the temporary file.
if (RenameEC)
- setDeleteDisposition(H, true);
+ ShouldDelete = true;
+ if (ShouldDelete) {
+ if (!RemoveOnClose)
+ setDeleteDisposition(H, true);
+ else
+ remove(TmpName);
+ }
#else
std::error_code RenameEC = fs::rename(TmpName, Name);
if (RenameEC) {
@@ -1250,8 +1264,8 @@ Error TempFile::keep(const Twine &Name) {
if (RenameEC)
remove(TmpName);
}
- sys::DontRemoveFileOnSignal(TmpName);
#endif
+ sys::DontRemoveFileOnSignal(TmpName);
if (!RenameEC)
TmpName = "";
@@ -1273,9 +1287,8 @@ Error TempFile::keep() {
auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
if (std::error_code EC = setDeleteDisposition(H, false))
return errorCodeToError(EC);
-#else
- sys::DontRemoveFileOnSignal(TmpName);
#endif
+ sys::DontRemoveFileOnSignal(TmpName);
TmpName = "";
@@ -1297,14 +1310,22 @@ Expected<TempFile> TempFile::create(const Twine &Model, unsigned Mode,
return errorCodeToError(EC);
TempFile Ret(ResultPath, FD);
-#ifndef _WIN32
- if (sys::RemoveFileOnSignal(ResultPath)) {
+#ifdef _WIN32
+ auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
+ bool SetSignalHandler = false;
+ if (std::error_code EC = setDeleteDisposition(H, true)) {
+ Ret.RemoveOnClose = true;
+ SetSignalHandler = true;
+ }
+#else
+ bool SetSignalHandler = true;
+#endif
+ if (SetSignalHandler && sys::RemoveFileOnSignal(ResultPath)) {
// Make sure we delete the file when RemoveFileOnSignal fails.
consumeError(Ret.discard());
std::error_code EC(errc::operation_not_permitted);
return errorCodeToError(EC);
}
-#endif
return std::move(Ret);
}
} // namespace fs
diff --git a/llvm/lib/Support/Process.cpp b/llvm/lib/Support/Process.cpp
index e7e9a8b56f74..547b3b73eec2 100644
--- a/llvm/lib/Support/Process.cpp
+++ b/llvm/lib/Support/Process.cpp
@@ -92,8 +92,7 @@ static bool coreFilesPrevented = !LLVM_ENABLE_CRASH_DUMPS;
bool Process::AreCoreFilesPrevented() { return coreFilesPrevented; }
-LLVM_ATTRIBUTE_NORETURN
-void Process::Exit(int RetCode, bool NoCleanup) {
+[[noreturn]] void Process::Exit(int RetCode, bool NoCleanup) {
if (CrashRecoveryContext *CRC = CrashRecoveryContext::GetCurrent())
CRC->HandleExit(RetCode);
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
new file mode 100644
index 000000000000..8e984002f90d
--- /dev/null
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -0,0 +1,718 @@
+//===-- RISCVISAInfo.cpp - RISCV Arch String Parser --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <array>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+/// Represents the major and version number components of a RISC-V extension
+struct RISCVExtensionVersion {
+ unsigned Major;
+ unsigned Minor;
+};
+
+struct RISCVSupportedExtension {
+ const char *Name;
+ /// Supported version.
+ RISCVExtensionVersion Version;
+};
+
+} // end anonymous namespace
+
+static constexpr StringLiteral AllStdExts = "mafdqlcbjtpvn";
+
+static const RISCVSupportedExtension SupportedExtensions[] = {
+ {"i", RISCVExtensionVersion{2, 0}},
+ {"e", RISCVExtensionVersion{1, 9}},
+ {"m", RISCVExtensionVersion{2, 0}},
+ {"a", RISCVExtensionVersion{2, 0}},
+ {"f", RISCVExtensionVersion{2, 0}},
+ {"d", RISCVExtensionVersion{2, 0}},
+ {"c", RISCVExtensionVersion{2, 0}},
+};
+
+static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
+ {"v", RISCVExtensionVersion{0, 10}},
+ {"zba", RISCVExtensionVersion{1, 0}},
+ {"zbb", RISCVExtensionVersion{1, 0}},
+ {"zbc", RISCVExtensionVersion{1, 0}},
+ {"zbe", RISCVExtensionVersion{0, 93}},
+ {"zbf", RISCVExtensionVersion{0, 93}},
+ {"zbm", RISCVExtensionVersion{0, 93}},
+ {"zbp", RISCVExtensionVersion{0, 93}},
+ {"zbr", RISCVExtensionVersion{0, 93}},
+ {"zbs", RISCVExtensionVersion{1, 0}},
+ {"zbt", RISCVExtensionVersion{0, 93}},
+
+ {"zvamo", RISCVExtensionVersion{0, 10}},
+ {"zvlsseg", RISCVExtensionVersion{0, 10}},
+
+ {"zfhmin", RISCVExtensionVersion{0, 1}},
+ {"zfh", RISCVExtensionVersion{0, 1}},
+};
+
+static bool stripExperimentalPrefix(StringRef &Ext) {
+ return Ext.consume_front("experimental-");
+}
+
+struct FindByName {
+ FindByName(StringRef Ext) : Ext(Ext){};
+ StringRef Ext;
+ bool operator()(const RISCVSupportedExtension &ExtInfo) {
+ return ExtInfo.Name == Ext;
+ }
+};
+
+static Optional<RISCVExtensionVersion> findDefaultVersion(StringRef ExtName) {
+ // Find default version of an extension.
+ // TODO: We might set default version based on profile or ISA spec.
+ for (auto &ExtInfo : {makeArrayRef(SupportedExtensions),
+ makeArrayRef(SupportedExperimentalExtensions)}) {
+ auto ExtensionInfoIterator = llvm::find_if(ExtInfo, FindByName(ExtName));
+
+ if (ExtensionInfoIterator == ExtInfo.end()) {
+ continue;
+ }
+ return ExtensionInfoIterator->Version;
+ }
+ return None;
+}
+
+void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion,
+ unsigned MinorVersion) {
+ RISCVExtensionInfo Ext;
+ Ext.ExtName = ExtName.str();
+ Ext.MajorVersion = MajorVersion;
+ Ext.MinorVersion = MinorVersion;
+ Exts[ExtName.str()] = Ext;
+}
+
+static StringRef getExtensionTypeDesc(StringRef Ext) {
+ if (Ext.startswith("sx"))
+ return "non-standard supervisor-level extension";
+ if (Ext.startswith("s"))
+ return "standard supervisor-level extension";
+ if (Ext.startswith("x"))
+ return "non-standard user-level extension";
+ if (Ext.startswith("z"))
+ return "standard user-level extension";
+ return StringRef();
+}
+
+static StringRef getExtensionType(StringRef Ext) {
+ if (Ext.startswith("sx"))
+ return "sx";
+ if (Ext.startswith("s"))
+ return "s";
+ if (Ext.startswith("x"))
+ return "x";
+ if (Ext.startswith("z"))
+ return "z";
+ return StringRef();
+}
+
+static Optional<RISCVExtensionVersion> isExperimentalExtension(StringRef Ext) {
+ auto ExtIterator =
+ llvm::find_if(SupportedExperimentalExtensions, FindByName(Ext));
+ if (ExtIterator == std::end(SupportedExperimentalExtensions))
+ return None;
+
+ return ExtIterator->Version;
+}
+
+bool RISCVISAInfo::isSupportedExtensionFeature(StringRef Ext) {
+ bool IsExperimental = stripExperimentalPrefix(Ext);
+
+ if (IsExperimental)
+ return llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
+ else
+ return llvm::any_of(SupportedExtensions, FindByName(Ext));
+}
+
+bool RISCVISAInfo::isSupportedExtension(StringRef Ext) {
+ return llvm::any_of(SupportedExtensions, FindByName(Ext)) ||
+ llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
+}
+
+bool RISCVISAInfo::isSupportedExtension(StringRef Ext, unsigned MajorVersion,
+ unsigned MinorVersion) {
+ auto FindByNameAndVersion = [=](const RISCVSupportedExtension &ExtInfo) {
+ return ExtInfo.Name == Ext && (MajorVersion == ExtInfo.Version.Major) &&
+ (MinorVersion == ExtInfo.Version.Minor);
+ };
+ return llvm::any_of(SupportedExtensions, FindByNameAndVersion) ||
+ llvm::any_of(SupportedExperimentalExtensions, FindByNameAndVersion);
+}
+
+bool RISCVISAInfo::hasExtension(StringRef Ext) const {
+ stripExperimentalPrefix(Ext);
+
+ if (!isSupportedExtension(Ext))
+ return false;
+
+ return Exts.count(Ext.str()) != 0;
+}
+
+// Get the rank for single-letter extension, lower value meaning higher
+// priority.
+static int singleLetterExtensionRank(char Ext) {
+ switch (Ext) {
+ case 'i':
+ return -2;
+ case 'e':
+ return -1;
+ default:
+ break;
+ }
+
+ size_t Pos = AllStdExts.find(Ext);
+ int Rank;
+ if (Pos == StringRef::npos)
+ // If we got an unknown extension letter, then give it an alphabetical
+ // order, but after all known standard extensions.
+ Rank = AllStdExts.size() + (Ext - 'a');
+ else
+ Rank = Pos;
+
+ return Rank;
+}
+
+// Get the rank for multi-letter extension, lower value meaning higher
+// priority/order in canonical order.
+static int multiLetterExtensionRank(const std::string &ExtName) {
+ assert(ExtName.length() >= 2);
+ int HighOrder;
+ int LowOrder = 0;
+ // The order between multi-char extensions: s -> h -> z -> x.
+ char ExtClass = ExtName[0];
+ switch (ExtClass) {
+ case 's':
+ HighOrder = 0;
+ break;
+ case 'h':
+ HighOrder = 1;
+ break;
+ case 'z':
+ HighOrder = 2;
+ // `z` extension must be sorted by canonical order of second letter.
+ // e.g. zmx has higher rank than zax.
+ LowOrder = singleLetterExtensionRank(ExtName[1]);
+ break;
+ case 'x':
+ HighOrder = 3;
+ break;
+ default:
+ llvm_unreachable("Unknown prefix for multi-char extension");
+ return -1;
+ }
+
+ return (HighOrder << 8) + LowOrder;
+}
+
+// Compare function for extension.
+// Only compare the extension name, ignore version comparison.
+bool RISCVISAInfo::compareExtension(const std::string &LHS,
+ const std::string &RHS) {
+ size_t LHSLen = LHS.length();
+ size_t RHSLen = RHS.length();
+ if (LHSLen == 1 && RHSLen != 1)
+ return true;
+
+ if (LHSLen != 1 && RHSLen == 1)
+ return false;
+
+ if (LHSLen == 1 && RHSLen == 1)
+ return singleLetterExtensionRank(LHS[0]) <
+ singleLetterExtensionRank(RHS[0]);
+
+ // Both are multi-char ext here.
+ int LHSRank = multiLetterExtensionRank(LHS);
+ int RHSRank = multiLetterExtensionRank(RHS);
+ if (LHSRank != RHSRank)
+ return LHSRank < RHSRank;
+
+ // If the rank is same, it must be sorted by lexicographic order.
+ return LHS < RHS;
+}
+
+void RISCVISAInfo::toFeatures(
+ std::vector<StringRef> &Features,
+ std::function<StringRef(const Twine &)> StrAlloc) const {
+ for (auto &Ext : Exts) {
+ StringRef ExtName = Ext.first;
+
+ if (ExtName == "i")
+ continue;
+
+ if (ExtName == "zvlsseg") {
+ Features.push_back("+experimental-v");
+ Features.push_back("+experimental-zvlsseg");
+ } else if (ExtName == "zvamo") {
+ Features.push_back("+experimental-v");
+ Features.push_back("+experimental-zvlsseg");
+ Features.push_back("+experimental-zvamo");
+ } else if (isExperimentalExtension(ExtName)) {
+ Features.push_back(StrAlloc("+experimental-" + ExtName));
+ } else {
+ Features.push_back(StrAlloc("+" + ExtName));
+ }
+ }
+}
+
+// Extensions may have a version number, and may be separated by
+// an underscore '_' e.g.: rv32i2_m2.
+// Version number is divided into major and minor version numbers,
+// separated by a 'p'. If the minor version is 0 then 'p0' can be
+// omitted from the version string. E.g., rv32i2p0, rv32i2, rv32i2p1.
+static Error getExtensionVersion(StringRef Ext, StringRef In, unsigned &Major,
+ unsigned &Minor, unsigned &ConsumeLength,
+ bool EnableExperimentalExtension,
+ bool ExperimentalExtensionVersionCheck) {
+ StringRef MajorStr, MinorStr;
+ Major = 0;
+ Minor = 0;
+ ConsumeLength = 0;
+ MajorStr = In.take_while(isDigit);
+ In = In.substr(MajorStr.size());
+
+ if (!MajorStr.empty() && In.consume_front("p")) {
+ MinorStr = In.take_while(isDigit);
+ In = In.substr(MajorStr.size() + 1);
+
+ // Expected 'p' to be followed by minor version number.
+ if (MinorStr.empty()) {
+ return createStringError(
+ errc::invalid_argument,
+ "minor version number missing after 'p' for extension '" + Ext + "'");
+ }
+ }
+
+ if (!MajorStr.empty() && MajorStr.getAsInteger(10, Major))
+ return createStringError(
+ errc::invalid_argument,
+ "Failed to parse major version number for extension '" + Ext + "'");
+
+ if (!MinorStr.empty() && MinorStr.getAsInteger(10, Minor))
+ return createStringError(
+ errc::invalid_argument,
+ "Failed to parse minor version number for extension '" + Ext + "'");
+
+ ConsumeLength = MajorStr.size();
+
+ if (!MinorStr.empty())
+ ConsumeLength += MinorStr.size() + 1 /*'p'*/;
+
+ // Expected multi-character extension with version number to have no
+ // subsequent characters (i.e. must either end string or be followed by
+ // an underscore).
+ if (Ext.size() > 1 && In.size()) {
+ std::string Error =
+ "multi-character extensions must be separated by underscores";
+ return createStringError(errc::invalid_argument, Error);
+ }
+
+ // If experimental extension, require use of current version number number
+ if (auto ExperimentalExtension = isExperimentalExtension(Ext)) {
+ if (!EnableExperimentalExtension) {
+ std::string Error = "requires '-menable-experimental-extensions' for "
+ "experimental extension '" +
+ Ext.str() + "'";
+ return createStringError(errc::invalid_argument, Error);
+ }
+
+ if (ExperimentalExtensionVersionCheck &&
+ (MajorStr.empty() && MinorStr.empty())) {
+ std::string Error =
+ "experimental extension requires explicit version number `" +
+ Ext.str() + "`";
+ return createStringError(errc::invalid_argument, Error);
+ }
+
+ auto SupportedVers = *ExperimentalExtension;
+ if (ExperimentalExtensionVersionCheck &&
+ (Major != SupportedVers.Major || Minor != SupportedVers.Minor)) {
+ std::string Error = "unsupported version number " + MajorStr.str();
+ if (!MinorStr.empty())
+ Error += "." + MinorStr.str();
+ Error += " for experimental extension '" + Ext.str() +
+ "'(this compiler supports " + utostr(SupportedVers.Major) + "." +
+ utostr(SupportedVers.Minor) + ")";
+ return createStringError(errc::invalid_argument, Error);
+ }
+ return Error::success();
+ }
+
+ // Exception rule for `g`, we don't have clear version scheme for that on
+ // ISA spec.
+ if (Ext == "g")
+ return Error::success();
+
+ if (MajorStr.empty() && MinorStr.empty()) {
+ if (auto DefaultVersion = findDefaultVersion(Ext)) {
+ Major = DefaultVersion->Major;
+ Minor = DefaultVersion->Minor;
+ }
+ // No matter found or not, return success, assume other place will
+ // verify.
+ return Error::success();
+ }
+
+ if (RISCVISAInfo::isSupportedExtension(Ext, Major, Minor))
+ return Error::success();
+
+ std::string Error = "unsupported version number " + std::string(MajorStr);
+ if (!MinorStr.empty())
+ Error += "." + MinorStr.str();
+ Error += " for extension '" + Ext.str() + "'";
+ return createStringError(errc::invalid_argument, Error);
+}
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::parseFeatures(unsigned XLen,
+ const std::vector<std::string> &Features) {
+ assert(XLen == 32 || XLen == 64);
+ std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+
+ bool HasE = false;
+ for (auto &Feature : Features) {
+ StringRef ExtName = Feature;
+ bool Experimental = false;
+ assert(ExtName.size() > 1 && (ExtName[0] == '+' || ExtName[0] == '-'));
+ bool Add = ExtName[0] == '+';
+ ExtName = ExtName.drop_front(1); // Drop '+' or '-'
+ Experimental = stripExperimentalPrefix(ExtName);
+ auto ExtensionInfos = Experimental
+ ? makeArrayRef(SupportedExperimentalExtensions)
+ : makeArrayRef(SupportedExtensions);
+ auto ExtensionInfoIterator =
+ llvm::find_if(ExtensionInfos, FindByName(ExtName));
+
+ // Not all features is related to ISA extension, like `relax` or
+ // `save-restore`, skip those feature.
+ if (ExtensionInfoIterator == ExtensionInfos.end())
+ continue;
+
+ if (Add) {
+ if (ExtName == "e") {
+ if (XLen != 32)
+ return createStringError(
+ errc::invalid_argument,
+ "standard user-level extension 'e' requires 'rv32'");
+ HasE = true;
+ }
+
+ ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version.Major,
+ ExtensionInfoIterator->Version.Minor);
+ } else
+ ISAInfo->Exts.erase(ExtName.str());
+ }
+ if (!HasE) {
+ if (auto Version = findDefaultVersion("i"))
+ ISAInfo->addExtension("i", Version->Major, Version->Minor);
+ else
+ llvm_unreachable("Default extension version for 'i' not found?");
+ }
+
+ ISAInfo->updateFLen();
+
+ return std::move(ISAInfo);
+}
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
+ bool ExperimentalExtensionVersionCheck) {
+ // RISC-V ISA strings must be lowercase.
+ if (llvm::any_of(Arch, isupper)) {
+ return createStringError(errc::invalid_argument,
+ "string must be lowercase");
+ }
+
+ bool HasRV64 = Arch.startswith("rv64");
+ // ISA string must begin with rv32 or rv64.
+ if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) {
+ return createStringError(errc::invalid_argument,
+ "string must begin with rv32{i,e,g} or rv64{i,g}");
+ }
+
+ unsigned XLen = HasRV64 ? 64 : 32;
+ std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+
+ // The canonical order specified in ISA manual.
+ // Ref: Table 22.1 in RISC-V User-Level ISA V2.2
+ StringRef StdExts = AllStdExts;
+ bool HasF = false, HasD = false;
+ char Baseline = Arch[4];
+
+ // First letter should be 'e', 'i' or 'g'.
+ switch (Baseline) {
+ default:
+ return createStringError(errc::invalid_argument,
+ "first letter should be 'e', 'i' or 'g'");
+ case 'e': {
+ // Extension 'e' is not allowed in rv64.
+ if (HasRV64)
+ return createStringError(
+ errc::invalid_argument,
+ "standard user-level extension 'e' requires 'rv32'");
+ break;
+ }
+ case 'i':
+ break;
+ case 'g':
+ // g = imafd
+ StdExts = StdExts.drop_front(4);
+ HasF = true;
+ HasD = true;
+ break;
+ }
+
+ // Skip rvxxx
+ StringRef Exts = Arch.substr(5);
+
+ // Remove multi-letter standard extensions, non-standard extensions and
+ // supervisor-level extensions. They have 'z', 'x', 's', 'sx' prefixes.
+ // Parse them at the end.
+ // Find the very first occurrence of 's', 'x' or 'z'.
+ StringRef OtherExts;
+ size_t Pos = Exts.find_first_of("zsx");
+ if (Pos != StringRef::npos) {
+ OtherExts = Exts.substr(Pos);
+ Exts = Exts.substr(0, Pos);
+ }
+
+ unsigned Major, Minor, ConsumeLength;
+ if (auto E = getExtensionVersion(std::string(1, Baseline), Exts, Major, Minor,
+ ConsumeLength, EnableExperimentalExtension,
+ ExperimentalExtensionVersionCheck))
+ return std::move(E);
+
+ if (Baseline == 'g') {
+ // No matter which version is given to `g`, we always set imafd to default
+ // version since the we don't have clear version scheme for that on
+ // ISA spec.
+ for (auto Ext : {"i", "m", "a", "f", "d"})
+ if (auto Version = findDefaultVersion(Ext))
+ ISAInfo->addExtension(Ext, Version->Major, Version->Minor);
+ else
+ llvm_unreachable("Default extension version not found?");
+ } else
+ // Baseline is `i` or `e`
+ ISAInfo->addExtension(std::string(1, Baseline), Major, Minor);
+
+ // Consume the base ISA version number and any '_' between rvxxx and the
+ // first extension
+ Exts = Exts.drop_front(ConsumeLength);
+ Exts.consume_front("_");
+
+ // TODO: Use version number when setting target features
+
+ auto StdExtsItr = StdExts.begin();
+ auto StdExtsEnd = StdExts.end();
+ for (auto I = Exts.begin(), E = Exts.end(); I != E;) {
+ char C = *I;
+
+ // Check ISA extensions are specified in the canonical order.
+ while (StdExtsItr != StdExtsEnd && *StdExtsItr != C)
+ ++StdExtsItr;
+
+ if (StdExtsItr == StdExtsEnd) {
+ // Either c contains a valid extension but it was not given in
+ // canonical order or it is an invalid extension.
+ if (StdExts.contains(C)) {
+ return createStringError(
+ errc::invalid_argument,
+ "standard user-level extension not given in canonical order '%c'",
+ C);
+ }
+
+ return createStringError(errc::invalid_argument,
+ "invalid standard user-level extension '%c'", C);
+ }
+
+ // Move to next char to prevent repeated letter.
+ ++StdExtsItr;
+
+ std::string Next;
+ unsigned Major, Minor, ConsumeLength;
+ if (std::next(I) != E)
+ Next = std::string(std::next(I), E);
+ if (auto E = getExtensionVersion(std::string(1, C), Next, Major, Minor,
+ ConsumeLength, EnableExperimentalExtension,
+ ExperimentalExtensionVersionCheck))
+ return std::move(E);
+
+ // The order is OK, then push it into features.
+ // TODO: Use version number when setting target features
+ switch (C) {
+ default:
+ // Currently LLVM supports only "mafdcbv".
+ return createStringError(errc::invalid_argument,
+ "unsupported standard user-level extension '%c'",
+ C);
+ case 'm':
+ ISAInfo->addExtension("m", Major, Minor);
+ break;
+ case 'a':
+ ISAInfo->addExtension("a", Major, Minor);
+ break;
+ case 'f':
+ ISAInfo->addExtension("f", Major, Minor);
+ HasF = true;
+ break;
+ case 'd':
+ ISAInfo->addExtension("d", Major, Minor);
+ HasD = true;
+ break;
+ case 'c':
+ ISAInfo->addExtension("c", Major, Minor);
+ break;
+ case 'v':
+ ISAInfo->addExtension("v", Major, Minor);
+ ISAInfo->addExtension("zvlsseg", Major, Minor);
+ break;
+ }
+ // Consume full extension name and version, including any optional '_'
+ // between this extension and the next
+ ++I;
+ I += ConsumeLength;
+ if (*I == '_')
+ ++I;
+ }
+ // Dependency check.
+ // It's illegal to specify the 'd' (double-precision floating point)
+ // extension without also specifying the 'f' (single precision
+ // floating-point) extension.
+ // TODO: This has been removed in later specs, which specify that D implies F
+ if (HasD && !HasF)
+ return createStringError(errc::invalid_argument,
+ "d requires f extension to also be specified");
+
+ // Additional dependency checks.
+ // TODO: The 'q' extension requires rv64.
+ // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
+
+ if (OtherExts.empty())
+ return std::move(ISAInfo);
+
+ // Handle other types of extensions other than the standard
+ // general purpose and standard user-level extensions.
+ // Parse the ISA string containing non-standard user-level
+ // extensions, standard supervisor-level extensions and
+ // non-standard supervisor-level extensions.
+ // These extensions start with 'z', 'x', 's', 'sx' prefixes, follow a
+ // canonical order, might have a version number (major, minor)
+ // and are separated by a single underscore '_'.
+ // Set the hardware features for the extensions that are supported.
+
+ // Multi-letter extensions are seperated by a single underscore
+ // as described in RISC-V User-Level ISA V2.2.
+ SmallVector<StringRef, 8> Split;
+ OtherExts.split(Split, '_');
+
+ SmallVector<StringRef, 8> AllExts;
+ std::array<StringRef, 4> Prefix{"z", "x", "s", "sx"};
+ auto I = Prefix.begin();
+ auto E = Prefix.end();
+
+ for (StringRef Ext : Split) {
+ if (Ext.empty())
+ return createStringError(errc::invalid_argument,
+ "extension name missing after separator '_'");
+
+ StringRef Type = getExtensionType(Ext);
+ StringRef Desc = getExtensionTypeDesc(Ext);
+ auto Pos = Ext.find_if(isDigit);
+ StringRef Name(Ext.substr(0, Pos));
+ StringRef Vers(Ext.substr(Pos));
+
+ if (Type.empty())
+ return createStringError(errc::invalid_argument,
+ "invalid extension prefix '" + Ext + "'");
+
+ // Check ISA extensions are specified in the canonical order.
+ while (I != E && *I != Type)
+ ++I;
+
+ if (I == E)
+ return createStringError(errc::invalid_argument,
+ "%s not given in canonical order '%s'",
+ Desc.str().c_str(), Ext.str().c_str());
+
+ if (Name.size() == Type.size()) {
+ return createStringError(errc::invalid_argument,
+ "%s name missing after '%s'", Desc.str().c_str(),
+ Type.str().c_str());
+ }
+
+ unsigned Major, Minor, ConsumeLength;
+ if (auto E = getExtensionVersion(Name, Vers, Major, Minor, ConsumeLength,
+ EnableExperimentalExtension,
+ ExperimentalExtensionVersionCheck))
+ return std::move(E);
+
+ // Check if duplicated extension.
+ if (llvm::is_contained(AllExts, Name))
+ return createStringError(errc::invalid_argument, "duplicated %s '%s'",
+ Desc.str().c_str(), Name.str().c_str());
+
+ ISAInfo->addExtension(Name, Major, Minor);
+ // Extension format is correct, keep parsing the extensions.
+ // TODO: Save Type, Name, Major, Minor to avoid parsing them later.
+ AllExts.push_back(Name);
+ }
+
+ for (auto Ext : AllExts) {
+ if (!isSupportedExtension(Ext)) {
+ StringRef Desc = getExtensionTypeDesc(getExtensionType(Ext));
+ return createStringError(errc::invalid_argument, "unsupported %s '%s'",
+ Desc.str().c_str(), Ext.str().c_str());
+ }
+ }
+
+ ISAInfo->updateFLen();
+
+ return std::move(ISAInfo);
+}
+
+void RISCVISAInfo::updateFLen() {
+ FLen = 0;
+ // TODO: Handle q extension.
+ if (Exts.count("d"))
+ FLen = 64;
+ else if (Exts.count("f"))
+ FLen = 32;
+}
+
+std::string RISCVISAInfo::toString() const {
+ std::string Buffer;
+ raw_string_ostream Arch(Buffer);
+
+ Arch << "rv" << XLen;
+
+ ListSeparator LS("_");
+ for (auto &Ext : Exts) {
+ StringRef ExtName = Ext.first;
+ auto ExtInfo = Ext.second;
+ Arch << LS << ExtName;
+ Arch << ExtInfo.MajorVersion << "p" << ExtInfo.MinorVersion;
+ }
+
+ return Arch.str();
+}
diff --git a/llvm/lib/Support/Signposts.cpp b/llvm/lib/Support/Signposts.cpp
index 49a0b16baa02..58fafb26cdf3 100644
--- a/llvm/lib/Support/Signposts.cpp
+++ b/llvm/lib/Support/Signposts.cpp
@@ -1,23 +1,27 @@
//===-- Signposts.cpp - Interval debug annotations ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/Signposts.h"
#include "llvm/Support/Timer.h"
+#include "llvm/Config/config.h"
#if LLVM_SUPPORT_XCODE_SIGNPOSTS
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Mutex.h"
-#endif
+#include <Availability.h>
+#include <os/signpost.h>
+#endif // if LLVM_SUPPORT_XCODE_SIGNPOSTS
using namespace llvm;
#if LLVM_SUPPORT_XCODE_SIGNPOSTS
+#define SIGNPOSTS_AVAILABLE() \
+ __builtin_available(macos 10.14, iOS 12, tvOS 12, watchOS 5, *)
namespace {
os_log_t *LogCreator() {
os_log_t *X = new os_log_t;
@@ -35,13 +39,13 @@ struct LogDeleter {
namespace llvm {
class SignpostEmitterImpl {
using LogPtrTy = std::unique_ptr<os_log_t, LogDeleter>;
+ using LogTy = LogPtrTy::element_type;
LogPtrTy SignpostLog;
DenseMap<const void *, os_signpost_id_t> Signposts;
sys::SmartMutex<true> Mutex;
-public:
- os_log_t &getLogger() const { return *SignpostLog; }
+ LogTy &getLogger() const { return *SignpostLog; }
os_signpost_id_t getSignpostForObject(const void *O) {
sys::SmartScopedLock<true> Lock(Mutex);
const auto &I = Signposts.find(O);
@@ -55,6 +59,7 @@ public:
return Inserted.first->second;
}
+public:
SignpostEmitterImpl() : SignpostLog(LogCreator()) {}
bool isEnabled() const {
@@ -73,7 +78,7 @@ public:
}
}
- void endInterval(const void *O) {
+ void endInterval(const void *O, llvm::StringRef Name) {
if (isEnabled()) {
if (SIGNPOSTS_AVAILABLE()) {
// Both strings used here are required to be constant literal strings.
@@ -119,17 +124,10 @@ void SignpostEmitter::startInterval(const void *O, StringRef Name) {
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
-#if HAVE_ANY_SIGNPOST_IMPL
-os_log_t &SignpostEmitter::getLogger() const { return Impl->getLogger(); }
-os_signpost_id_t SignpostEmitter::getSignpostForObject(const void *O) {
- return Impl->getSignpostForObject(O);
-}
-#endif
-
-void SignpostEmitter::endInterval(const void *O) {
+void SignpostEmitter::endInterval(const void *O, StringRef Name) {
#if HAVE_ANY_SIGNPOST_IMPL
if (Impl == nullptr)
return;
- Impl->endInterval(O);
+ Impl->endInterval(O, Name);
#endif // if !HAVE_ANY_SIGNPOST_IMPL
}
diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp
index 0005f7840912..2d7721e4e1fb 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
#include <cstdint>
#ifdef LLVM_ENABLE_EXCEPTIONS
#include <stdexcept>
@@ -19,12 +20,21 @@ using namespace llvm;
// Check that no bytes are wasted and everything is well-aligned.
namespace {
+// These structures may cause binary compat warnings on AIX. Suppress the
+// warning since we are only using these types for the static assertions below.
+#if defined(_AIX)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Waix-compat"
+#endif
struct Struct16B {
alignas(16) void *X;
};
struct Struct32B {
alignas(32) void *X;
};
+#if defined(_AIX)
+#pragma GCC diagnostic pop
+#endif
}
static_assert(sizeof(SmallVector<void *, 0>) ==
sizeof(unsigned) * 2 + sizeof(void *),
@@ -47,8 +57,7 @@ static_assert(sizeof(SmallVector<char, 0>) ==
/// Report that MinSize doesn't fit into this vector's size type. Throws
/// std::length_error or calls report_fatal_error.
-LLVM_ATTRIBUTE_NORETURN
-static void report_size_overflow(size_t MinSize, size_t MaxSize);
+[[noreturn]] static void report_size_overflow(size_t MinSize, size_t MaxSize);
static void report_size_overflow(size_t MinSize, size_t MaxSize) {
std::string Reason = "SmallVector unable to grow. Requested capacity (" +
std::to_string(MinSize) +
@@ -57,13 +66,13 @@ static void report_size_overflow(size_t MinSize, size_t MaxSize) {
#ifdef LLVM_ENABLE_EXCEPTIONS
throw std::length_error(Reason);
#else
- report_fatal_error(Reason);
+ report_fatal_error(Twine(Reason));
#endif
}
/// Report that this vector is already at maximum capacity. Throws
/// std::length_error or calls report_fatal_error.
-LLVM_ATTRIBUTE_NORETURN static void report_at_maximum_capacity(size_t MaxSize);
+[[noreturn]] static void report_at_maximum_capacity(size_t MaxSize);
static void report_at_maximum_capacity(size_t MaxSize) {
std::string Reason =
"SmallVector capacity unable to grow. Already at maximum size " +
@@ -71,7 +80,7 @@ static void report_at_maximum_capacity(size_t MaxSize) {
#ifdef LLVM_ENABLE_EXCEPTIONS
throw std::length_error(Reason);
#else
- report_fatal_error(Reason);
+ report_fatal_error(Twine(Reason));
#endif
}
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 73f852624a69..1939ed9e9547 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -64,7 +64,7 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
return It->second;
if (Trigrams.isDefinitelyOut(Query))
return false;
- for (auto& RegExKV : RegExes)
+ for (const auto &RegExKV : RegExes)
if (RegExKV.first->match(Query))
return RegExKV.second;
return 0;
@@ -93,7 +93,7 @@ SpecialCaseList::createOrDie(const std::vector<std::string> &Paths,
std::string Error;
if (auto SCL = create(Paths, FS, Error))
return SCL;
- report_fatal_error(Error);
+ report_fatal_error(Twine(Error));
}
bool SpecialCaseList::createInternal(const std::vector<std::string> &Paths,
@@ -209,7 +209,7 @@ bool SpecialCaseList::inSection(StringRef Section, StringRef Prefix,
unsigned SpecialCaseList::inSectionBlame(StringRef Section, StringRef Prefix,
StringRef Query,
StringRef Category) const {
- for (auto &SectionIter : Sections)
+ for (const auto &SectionIter : Sections)
if (SectionIter.SectionMatcher->match(Section)) {
unsigned Blame =
inSectionBlame(SectionIter.Entries, Prefix, Query, Category);
diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp
index 8f2544e9e26d..2b094a4983a0 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -110,9 +110,8 @@ struct llvm::TimeTraceProfiler {
// templates from within, we only want to add the topmost one. "topmost"
// happens to be the ones that don't have any currently open entries above
// itself.
- if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
- return Val.Name == E.Name;
- }) == Stack.rend()) {
+ if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
+ [&](const Entry &Val) { return Val.Name == E.Name; })) {
auto &CountAndTotal = CountAndTotalPerName[E.Name];
CountAndTotal.first++;
CountAndTotal.second += Duration;
@@ -272,8 +271,9 @@ void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
// Called from main thread.
void llvm::timeTraceProfilerCleanup() {
delete TimeTraceProfilerInstance;
+ TimeTraceProfilerInstance = nullptr;
std::lock_guard<std::mutex> Lock(Mu);
- for (auto TTP : *ThreadTimeTraceProfilerInstances)
+ for (auto *TTP : *ThreadTimeTraceProfilerInstances)
delete TTP;
ThreadTimeTraceProfilerInstances->clear();
}
diff --git a/llvm/lib/Support/Timer.cpp b/llvm/lib/Support/Timer.cpp
index f025ecd3d45c..08e1a8a0e0aa 100644
--- a/llvm/lib/Support/Timer.cpp
+++ b/llvm/lib/Support/Timer.cpp
@@ -199,7 +199,7 @@ void Timer::stopTimer() {
Running = false;
Time += TimeRecord::getCurrentTime(false);
Time -= StartTime;
- Signposts->endInterval(this);
+ Signposts->endInterval(this, getName());
}
void Timer::clear() {
@@ -393,8 +393,7 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
OS << " --- Name ---\n";
// Loop through all of the timing data, printing it out.
- for (const PrintRecord &Record : make_range(TimersToPrint.rbegin(),
- TimersToPrint.rend())) {
+ for (const PrintRecord &Record : llvm::reverse(TimersToPrint)) {
Record.Time.print(Total, OS);
OS << Record.Description << '\n';
}
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 88311546354b..b9a92e280576 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -67,6 +67,8 @@ StringRef Triple::getArchTypeName(ArchType Kind) {
case sparcv9: return "sparcv9";
case spir64: return "spir64";
case spir: return "spir";
+ case spirv32: return "spirv32";
+ case spirv64: return "spirv64";
case systemz: return "s390x";
case tce: return "tce";
case tcele: return "tcele";
@@ -147,6 +149,10 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) {
case spir:
case spir64: return "spir";
+
+ case spirv32:
+ case spirv64: return "spirv";
+
case kalimba: return "kalimba";
case lanai: return "lanai";
case shave: return "shave";
@@ -323,6 +329,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("hsail64", hsail64)
.Case("spir", spir)
.Case("spir64", spir64)
+ .Case("spirv32", spirv32)
+ .Case("spirv64", spirv64)
.Case("kalimba", kalimba)
.Case("lanai", lanai)
.Case("shave", shave)
@@ -456,6 +464,8 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("hsail64", Triple::hsail64)
.Case("spir", Triple::spir)
.Case("spir64", Triple::spir64)
+ .Case("spirv32", Triple::spirv32)
+ .Case("spirv64", Triple::spirv64)
.StartsWith("kalimba", Triple::kalimba)
.Case("lanai", Triple::lanai)
.Case("renderscript32", Triple::renderscript32)
@@ -653,6 +663,12 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
return Triple::ARMSubArch_v8_6a;
case ARM::ArchKind::ARMV8_7A:
return Triple::ARMSubArch_v8_7a;
+ case ARM::ArchKind::ARMV9A:
+ return Triple::ARMSubArch_v9;
+ case ARM::ArchKind::ARMV9_1A:
+ return Triple::ARMSubArch_v9_1a;
+ case ARM::ArchKind::ARMV9_2A:
+ return Triple::ARMSubArch_v9_2a;
case ARM::ArchKind::ARMV8R:
return Triple::ARMSubArch_v8r;
case ARM::ArchKind::ARMV8MBaseline:
@@ -753,6 +769,11 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::wasm32:
case Triple::wasm64:
return Triple::Wasm;
+
+ case Triple::spirv32:
+ case Triple::spirv64:
+ // TODO: In future this will be Triple::SPIRV.
+ return Triple::UnknownObjectFormat;
}
llvm_unreachable("unknown architecture");
}
@@ -1024,6 +1045,30 @@ StringRef Triple::getArchName() const {
return StringRef(Data).split('-').first; // Isolate first component
}
+StringRef Triple::getArchName(ArchType Kind, SubArchType SubArch) const {
+ switch (Kind) {
+ case Triple::mips:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa32r6";
+ break;
+ case Triple::mipsel:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa32r6el";
+ break;
+ case Triple::mips64:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa64r6";
+ break;
+ case Triple::mips64el:
+ if (SubArch == MipsSubArch_r6)
+ return "mipsisa64r6el";
+ break;
+ default:
+ break;
+ }
+ return getArchTypeName(Kind);
+}
+
StringRef Triple::getVendorName() const {
StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
return Tmp.split('-').first; // Isolate second component
@@ -1205,8 +1250,8 @@ void Triple::setTriple(const Twine &Str) {
*this = Triple(Str);
}
-void Triple::setArch(ArchType Kind) {
- setArchName(getArchTypeName(Kind));
+void Triple::setArch(ArchType Kind, SubArchType SubArch) {
+ setArchName(getArchName(Kind, SubArch));
}
void Triple::setVendor(VendorType Kind) {
@@ -1298,6 +1343,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::sparc:
case llvm::Triple::sparcel:
case llvm::Triple::spir:
+ case llvm::Triple::spirv32:
case llvm::Triple::tce:
case llvm::Triple::tcele:
case llvm::Triple::thumb:
@@ -1324,6 +1370,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::riscv64:
case llvm::Triple::sparcv9:
case llvm::Triple::spir64:
+ case llvm::Triple::spirv64:
case llvm::Triple::systemz:
case llvm::Triple::ve:
case llvm::Triple::wasm64:
@@ -1383,6 +1430,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::sparc:
case Triple::sparcel:
case Triple::spir:
+ case Triple::spirv32:
case Triple::tce:
case Triple::tcele:
case Triple::thumb:
@@ -1398,8 +1446,12 @@ Triple Triple::get32BitArchVariant() const {
case Triple::amdil64: T.setArch(Triple::amdil); break;
case Triple::hsail64: T.setArch(Triple::hsail); break;
case Triple::le64: T.setArch(Triple::le32); break;
- case Triple::mips64: T.setArch(Triple::mips); break;
- case Triple::mips64el: T.setArch(Triple::mipsel); break;
+ case Triple::mips64:
+ T.setArch(Triple::mips, getSubArch());
+ break;
+ case Triple::mips64el:
+ T.setArch(Triple::mipsel, getSubArch());
+ break;
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
case Triple::ppc64le: T.setArch(Triple::ppcle); break;
@@ -1407,6 +1459,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::riscv64: T.setArch(Triple::riscv32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::spir64: T.setArch(Triple::spir); break;
+ case Triple::spirv64: T.setArch(Triple::spirv32); break;
case Triple::wasm64: T.setArch(Triple::wasm32); break;
case Triple::x86_64: T.setArch(Triple::x86); break;
}
@@ -1451,6 +1504,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::riscv64:
case Triple::sparcv9:
case Triple::spir64:
+ case Triple::spirv64:
case Triple::systemz:
case Triple::ve:
case Triple::wasm64:
@@ -1464,8 +1518,12 @@ Triple Triple::get64BitArchVariant() const {
case Triple::armeb: T.setArch(Triple::aarch64_be); break;
case Triple::hsail: T.setArch(Triple::hsail64); break;
case Triple::le32: T.setArch(Triple::le64); break;
- case Triple::mips: T.setArch(Triple::mips64); break;
- case Triple::mipsel: T.setArch(Triple::mips64el); break;
+ case Triple::mips:
+ T.setArch(Triple::mips64, getSubArch());
+ break;
+ case Triple::mipsel:
+ T.setArch(Triple::mips64el, getSubArch());
+ break;
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
case Triple::ppcle: T.setArch(Triple::ppc64le); break;
@@ -1473,6 +1531,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::riscv32: T.setArch(Triple::riscv64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::spir: T.setArch(Triple::spir64); break;
+ case Triple::spirv32: T.setArch(Triple::spirv64); break;
case Triple::thumb: T.setArch(Triple::aarch64); break;
case Triple::thumbeb: T.setArch(Triple::aarch64_be); break;
case Triple::wasm32: T.setArch(Triple::wasm64); break;
@@ -1509,6 +1568,8 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::shave:
case Triple::spir64:
case Triple::spir:
+ case Triple::spirv32:
+ case Triple::spirv64:
case Triple::wasm32:
case Triple::wasm64:
case Triple::x86:
@@ -1526,8 +1587,12 @@ Triple Triple::getBigEndianArchVariant() const {
case Triple::aarch64: T.setArch(Triple::aarch64_be); break;
case Triple::bpfel: T.setArch(Triple::bpfeb); break;
- case Triple::mips64el:T.setArch(Triple::mips64); break;
- case Triple::mipsel: T.setArch(Triple::mips); break;
+ case Triple::mips64el:
+ T.setArch(Triple::mips64, getSubArch());
+ break;
+ case Triple::mipsel:
+ T.setArch(Triple::mips, getSubArch());
+ break;
case Triple::ppcle: T.setArch(Triple::ppc); break;
case Triple::ppc64le: T.setArch(Triple::ppc64); break;
case Triple::sparcel: T.setArch(Triple::sparc); break;
@@ -1559,8 +1624,12 @@ Triple Triple::getLittleEndianArchVariant() const {
case Triple::aarch64_be: T.setArch(Triple::aarch64); break;
case Triple::bpfeb: T.setArch(Triple::bpfel); break;
- case Triple::mips64: T.setArch(Triple::mips64el); break;
- case Triple::mips: T.setArch(Triple::mipsel); break;
+ case Triple::mips64:
+ T.setArch(Triple::mips64el, getSubArch());
+ break;
+ case Triple::mips:
+ T.setArch(Triple::mipsel, getSubArch());
+ break;
case Triple::ppc: T.setArch(Triple::ppcle); break;
case Triple::ppc64: T.setArch(Triple::ppc64le); break;
case Triple::sparc: T.setArch(Triple::sparcel); break;
@@ -1604,6 +1673,8 @@ bool Triple::isLittleEndian() const {
case Triple::sparcel:
case Triple::spir64:
case Triple::spir:
+ case Triple::spirv32:
+ case Triple::spirv64:
case Triple::tcele:
case Triple::thumb:
case Triple::ve:
@@ -1709,6 +1780,7 @@ StringRef Triple::getARMCPUForArch(StringRef MArch) const {
switch (getOS()) {
case llvm::Triple::FreeBSD:
case llvm::Triple::NetBSD:
+ case llvm::Triple::OpenBSD:
if (!MArch.empty() && MArch == "v6")
return "arm1176jzf-s";
if (!MArch.empty() && MArch == "v7")
diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc
index be88e7db1400..b83477e0e4cc 100644
--- a/llvm/lib/Support/Unix/Memory.inc
+++ b/llvm/lib/Support/Unix/Memory.inc
@@ -29,14 +29,6 @@
#include <zircon/syscalls.h>
#endif
-#if defined(__mips__)
-# if defined(__OpenBSD__)
-# include <mips64/sysarch.h>
-# elif !defined(__FreeBSD__)
-# include <sys/cachectl.h>
-# endif
-#endif
-
#if defined(__APPLE__)
extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
#else
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index c37b3a54644a..19d89db55627 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -39,6 +39,9 @@
#include <mach-o/dyld.h>
#include <sys/attr.h>
#include <copyfile.h>
+#if __has_include(<sys/clonefile.h>)
+#include <sys/clonefile.h>
+#endif
#elif defined(__FreeBSD__)
#include <osreldate.h>
#if __FreeBSD_version >= 1300057
@@ -125,7 +128,8 @@ const file_t kInvalidFile = -1;
#if defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
defined(__minix) || defined(__FreeBSD_kernel__) || defined(__linux__) || \
- defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || defined(__GNU__)
+ defined(__CYGWIN__) || defined(__DragonFly__) || defined(_AIX) || defined(__GNU__) || \
+ (defined(__sun__) && defined(__svr4__))
static int
test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
{
@@ -283,6 +287,20 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
// Fall back to the classical detection.
if (getprogpath(exe_path, argv0))
return exe_path;
+#elif defined(__sun__) && defined(__svr4__)
+ char exe_path[PATH_MAX];
+ const char *aPath = "/proc/self/execname";
+ if (sys::fs::exists(aPath)) {
+ int fd = open(aPath, O_RDONLY);
+ if (fd == -1)
+ return "";
+ if (read(fd, exe_path, sizeof(exe_path)) < 0)
+ return "";
+ return exe_path;
+ }
+ // Fall back to the classical detection.
+ if (getprogpath(exe_path, argv0) != NULL)
+ return exe_path;
#elif defined(__MVS__)
int token = 0;
W_PSPROC buf;
@@ -1442,22 +1460,37 @@ namespace fs {
/// file descriptor variant of this function still uses the default
/// implementation.
std::error_code copy_file(const Twine &From, const Twine &To) {
- uint32_t Flag = COPYFILE_DATA;
-#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
+ std::string FromS = From.str();
+ std::string ToS = To.str();
+#if __has_builtin(__builtin_available)
if (__builtin_available(macos 10.12, *)) {
- bool IsSymlink;
- if (std::error_code Error = is_symlink_file(From, IsSymlink))
- return Error;
- // COPYFILE_CLONE clones the symlink instead of following it
- // and returns EEXISTS if the target file already exists.
- if (!IsSymlink && !exists(To))
- Flag = COPYFILE_CLONE;
+ // Optimistically try to use clonefile() and handle errors, rather than
+ // calling stat() to see if it'll work.
+ //
+ // Note: It's okay if From is a symlink. In contrast to the behaviour of
+ // copyfile() with COPYFILE_CLONE, clonefile() clones targets (not the
+ // symlink itself) unless the flag CLONE_NOFOLLOW is passed.
+ if (!clonefile(FromS.c_str(), ToS.c_str(), 0))
+ return std::error_code();
+
+ auto Errno = errno;
+ switch (Errno) {
+ case EEXIST: // To already exists.
+ case ENOTSUP: // Device does not support cloning.
+ case EXDEV: // From and To are on different devices.
+ break;
+ default:
+ // Anything else will also break copyfile().
+ return std::error_code(Errno, std::generic_category());
+ }
+
+ // TODO: For EEXIST, profile calling fs::generateUniqueName() and
+ // clonefile() in a retry loop (then rename() on success) before falling
+ // back to copyfile(). Depending on the size of the file this could be
+ // cheaper.
}
#endif
- int Status =
- copyfile(From.str().c_str(), To.str().c_str(), /* State */ NULL, Flag);
-
- if (Status == 0)
+ if (!copyfile(FromS.c_str(), ToS.c_str(), /*State=*/NULL, COPYFILE_DATA))
return std::error_code();
return std::error_code(errno, std::generic_category());
}
diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc
index 30b957e6a1c4..d3d9fb7d7187 100644
--- a/llvm/lib/Support/Unix/Process.inc
+++ b/llvm/lib/Support/Unix/Process.inc
@@ -461,5 +461,4 @@ unsigned llvm::sys::Process::GetRandomNumber() {
#endif
}
-LLVM_ATTRIBUTE_NORETURN
-void Process::ExitNoCleanup(int RetCode) { _Exit(RetCode); }
+[[noreturn]] void Process::ExitNoCleanup(int RetCode) { _Exit(RetCode); }
diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc
index be59bb0232de..089342030b97 100644
--- a/llvm/lib/Support/Unix/Program.inc
+++ b/llvm/lib/Support/Unix/Program.inc
@@ -71,7 +71,8 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
assert(!Name.empty() && "Must have a name!");
// Use the given path verbatim if it contains any slashes; this matches
// the behavior of sh(1) and friends.
- if (Name.find('/') != StringRef::npos) return std::string(Name);
+ if (Name.contains('/'))
+ return std::string(Name);
SmallVector<StringRef, 16> EnvironmentPaths;
if (Paths.empty())
diff --git a/llvm/lib/Support/Unix/Unix.h b/llvm/lib/Support/Unix/Unix.h
index 60929139598b..1599241a344a 100644
--- a/llvm/lib/Support/Unix/Unix.h
+++ b/llvm/lib/Support/Unix/Unix.h
@@ -67,11 +67,10 @@ static inline bool MakeErrMsg(
}
// Include StrError(errnum) in a fatal error message.
-LLVM_ATTRIBUTE_NORETURN static inline void ReportErrnumFatal(const char *Msg,
- int errnum) {
+[[noreturn]] static inline void ReportErrnumFatal(const char *Msg, int errnum) {
std::string ErrMsg;
MakeErrMsg(&ErrMsg, Msg, errnum);
- llvm::report_fatal_error(ErrMsg);
+ llvm::report_fatal_error(llvm::Twine(ErrMsg));
}
namespace llvm {
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 15bb54e61817..9bf0384b5f1b 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileSystem/UniqueID.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
@@ -193,6 +194,7 @@ public:
bool RequiresNullTerminator,
bool IsVolatile) override;
std::error_code close() override;
+ void setPath(const Twine &Path) override;
};
} // namespace
@@ -228,6 +230,12 @@ std::error_code RealFile::close() {
return EC;
}
+void RealFile::setPath(const Twine &Path) {
+ RealName = Path.str();
+ if (auto Status = status())
+ S = Status.get().copyWithNewName(Status.get(), Path);
+}
+
namespace {
/// A file system according to your operating system.
@@ -442,7 +450,7 @@ std::error_code OverlayFileSystem::isLocal(const Twine &Path, bool &Result) {
std::error_code
OverlayFileSystem::getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) const {
- for (auto &FS : FSList)
+ for (const auto &FS : FSList)
if (FS->exists(Path))
return FS->getRealPath(Path, Output);
return errc::no_such_file_or_directory;
@@ -638,6 +646,8 @@ public:
}
std::error_code close() override { return {}; }
+
+ void setPath(const Twine &Path) override { RequestedName = Path.str(); }
};
} // namespace
@@ -655,6 +665,9 @@ public:
Status getStatus(const Twine &RequestedName) const {
return Status::copyWithNewName(Stat, RequestedName);
}
+
+ UniqueID getUniqueID() const { return Stat.getUniqueID(); }
+
InMemoryNode *getChild(StringRef Name) {
auto I = Entries.find(Name);
if (I != Entries.end())
@@ -698,10 +711,28 @@ Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) {
} // namespace
} // namespace detail
+// The UniqueID of in-memory files is derived from path and content.
+// This avoids difficulties in creating exactly equivalent in-memory FSes,
+// as often needed in multithreaded programs.
+static sys::fs::UniqueID getUniqueID(hash_code Hash) {
+ return sys::fs::UniqueID(std::numeric_limits<uint64_t>::max(),
+ uint64_t(size_t(Hash)));
+}
+static sys::fs::UniqueID getFileID(sys::fs::UniqueID Parent,
+ llvm::StringRef Name,
+ llvm::StringRef Contents) {
+ return getUniqueID(llvm::hash_combine(Parent.getFile(), Name, Contents));
+}
+static sys::fs::UniqueID getDirectoryID(sys::fs::UniqueID Parent,
+ llvm::StringRef Name) {
+ return getUniqueID(llvm::hash_combine(Parent.getFile(), Name));
+}
+
InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
: Root(new detail::InMemoryDirectory(
- Status("", getNextVirtualUniqueID(), llvm::sys::TimePoint<>(), 0, 0,
- 0, llvm::sys::fs::file_type::directory_file,
+ Status("", getDirectoryID(llvm::sys::fs::UniqueID(), ""),
+ llvm::sys::TimePoint<>(), 0, 0, 0,
+ llvm::sys::fs::file_type::directory_file,
llvm::sys::fs::perms::all_all))),
UseNormalizedPaths(UseNormalizedPaths) {}
@@ -754,10 +785,14 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
else {
// Create a new file or directory.
- Status Stat(P.str(), getNextVirtualUniqueID(),
- llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
- ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
- ResolvedPerms);
+ Status Stat(
+ P.str(),
+ (ResolvedType == sys::fs::file_type::directory_file)
+ ? getDirectoryID(Dir->getUniqueID(), Name)
+ : getFileID(Dir->getUniqueID(), Name, Buffer->getBuffer()),
+ llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
+ ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
+ ResolvedPerms);
if (ResolvedType == sys::fs::file_type::directory_file) {
Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
} else {
@@ -772,9 +807,9 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
// Create a new directory. Use the path up to here.
Status Stat(
StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
- getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
- ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
- NewDirectoryPerms);
+ getDirectoryID(Dir->getUniqueID(), Name),
+ llvm::sys::toTimePoint(ModificationTime), ResolvedUser, ResolvedGroup,
+ 0, sys::fs::file_type::directory_file, NewDirectoryPerms);
Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
Name, std::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
continue;
@@ -1015,9 +1050,10 @@ static llvm::sys::path::Style getExistingStyle(llvm::StringRef Path) {
// Detect the path style in use by checking the first separator.
llvm::sys::path::Style style = llvm::sys::path::Style::native;
const size_t n = Path.find_first_of("/\\");
+ // Can't distinguish between posix and windows_slash here.
if (n != static_cast<size_t>(-1))
style = (Path[n] == '/') ? llvm::sys::path::Style::posix
- : llvm::sys::path::Style::windows;
+ : llvm::sys::path::Style::windows_backslash;
return style;
}
@@ -1091,6 +1127,7 @@ public:
}
};
+namespace {
/// Directory iterator implementation for \c RedirectingFileSystem's
/// directory remap entries that maps the paths reported by the external
/// file system's directory iterator back to the virtual directory's path.
@@ -1129,6 +1166,7 @@ public:
return EC;
}
};
+} // namespace
llvm::ErrorOr<std::string>
RedirectingFileSystem::getCurrentWorkingDirectory() const {
@@ -1161,8 +1199,10 @@ std::error_code RedirectingFileSystem::isLocal(const Twine &Path_,
}
std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
+ // is_absolute(..., Style::windows_*) accepts paths with both slash types.
if (llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::posix) ||
- llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::windows))
+ llvm::sys::path::is_absolute(Path,
+ llvm::sys::path::Style::windows_backslash))
return {};
auto WorkingDir = getCurrentWorkingDirectory();
@@ -1173,9 +1213,15 @@ std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl<char> &Path)
// is native and there is no way to override that. Since we know WorkingDir
// is absolute, we can use it to determine which style we actually have and
// append Path ourselves.
- sys::path::Style style = sys::path::Style::windows;
+ sys::path::Style style = sys::path::Style::windows_backslash;
if (sys::path::is_absolute(WorkingDir.get(), sys::path::Style::posix)) {
style = sys::path::Style::posix;
+ } else {
+ // Distinguish between windows_backslash and windows_slash; getExistingStyle
+ // returns posix for a path with windows_slash.
+ if (getExistingStyle(WorkingDir.get()) !=
+ sys::path::Style::windows_backslash)
+ style = sys::path::Style::windows_slash;
}
std::string Result = WorkingDir.get();
@@ -1207,7 +1253,7 @@ directory_iterator RedirectingFileSystem::dir_begin(const Twine &Dir,
}
// Use status to make sure the path exists and refers to a directory.
- ErrorOr<Status> S = status(Path, *Result);
+ ErrorOr<Status> S = status(Path, Dir, *Result);
if (!S) {
if (shouldFallBackToExternalFS(S.getError(), Result->E))
return ExternalFS->dir_begin(Dir, EC);
@@ -1593,8 +1639,9 @@ private:
// which style we have, and use it consistently.
if (sys::path::is_absolute(Name, sys::path::Style::posix)) {
path_style = sys::path::Style::posix;
- } else if (sys::path::is_absolute(Name, sys::path::Style::windows)) {
- path_style = sys::path::Style::windows;
+ } else if (sys::path::is_absolute(Name,
+ sys::path::Style::windows_backslash)) {
+ path_style = sys::path::Style::windows_backslash;
} else {
assert(NameValueNode && "Name presence should be checked earlier");
error(NameValueNode,
@@ -1933,47 +1980,68 @@ RedirectingFileSystem::lookupPathImpl(
return make_error_code(llvm::errc::no_such_file_or_directory);
}
-static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames,
+static Status getRedirectedFileStatus(const Twine &OriginalPath,
+ bool UseExternalNames,
Status ExternalStatus) {
Status S = ExternalStatus;
if (!UseExternalNames)
- S = Status::copyWithNewName(S, Path);
+ S = Status::copyWithNewName(S, OriginalPath);
S.IsVFSMapped = true;
return S;
}
ErrorOr<Status> RedirectingFileSystem::status(
- const Twine &Path, const RedirectingFileSystem::LookupResult &Result) {
+ const Twine &CanonicalPath, const Twine &OriginalPath,
+ const RedirectingFileSystem::LookupResult &Result) {
if (Optional<StringRef> ExtRedirect = Result.getExternalRedirect()) {
- ErrorOr<Status> S = ExternalFS->status(*ExtRedirect);
+ SmallString<256> CanonicalRemappedPath((*ExtRedirect).str());
+ if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+ return EC;
+
+ ErrorOr<Status> S = ExternalFS->status(CanonicalRemappedPath);
if (!S)
return S;
+ S = Status::copyWithNewName(*S, *ExtRedirect);
auto *RE = cast<RedirectingFileSystem::RemapEntry>(Result.E);
- return getRedirectedFileStatus(Path, RE->useExternalName(UseExternalNames),
- *S);
+ return getRedirectedFileStatus(OriginalPath,
+ RE->useExternalName(UseExternalNames), *S);
}
auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(Result.E);
- return Status::copyWithNewName(DE->getStatus(), Path);
+ return Status::copyWithNewName(DE->getStatus(), CanonicalPath);
}
-ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path_) {
- SmallString<256> Path;
- Path_.toVector(Path);
+ErrorOr<Status>
+RedirectingFileSystem::getExternalStatus(const Twine &CanonicalPath,
+ const Twine &OriginalPath) const {
+ if (auto Result = ExternalFS->status(CanonicalPath)) {
+ return Result.get().copyWithNewName(Result.get(), OriginalPath);
+ } else {
+ return Result.getError();
+ }
+}
- if (std::error_code EC = makeCanonical(Path))
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &OriginalPath) {
+ SmallString<256> CanonicalPath;
+ OriginalPath.toVector(CanonicalPath);
+
+ if (std::error_code EC = makeCanonical(CanonicalPath))
return EC;
- ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
+ ErrorOr<RedirectingFileSystem::LookupResult> Result =
+ lookupPath(CanonicalPath);
if (!Result) {
- if (shouldFallBackToExternalFS(Result.getError()))
- return ExternalFS->status(Path);
+ if (shouldFallBackToExternalFS(Result.getError())) {
+ return getExternalStatus(CanonicalPath, OriginalPath);
+ }
return Result.getError();
}
- ErrorOr<Status> S = status(Path, *Result);
- if (!S && shouldFallBackToExternalFS(S.getError(), Result->E))
- S = ExternalFS->status(Path);
+ ErrorOr<Status> S = status(CanonicalPath, OriginalPath, *Result);
+ if (!S && shouldFallBackToExternalFS(S.getError(), Result->E)) {
+ return getExternalStatus(CanonicalPath, OriginalPath);
+ }
+
return S;
}
@@ -1998,22 +2066,39 @@ public:
}
std::error_code close() override { return InnerFile->close(); }
+
+ void setPath(const Twine &Path) override { S = S.copyWithNewName(S, Path); }
};
} // namespace
ErrorOr<std::unique_ptr<File>>
-RedirectingFileSystem::openFileForRead(const Twine &Path_) {
- SmallString<256> Path;
- Path_.toVector(Path);
+File::getWithPath(ErrorOr<std::unique_ptr<File>> Result, const Twine &P) {
+ if (!Result)
+ return Result;
- if (std::error_code EC = makeCanonical(Path))
+ ErrorOr<std::unique_ptr<File>> F = std::move(*Result);
+ auto Name = F->get()->getName();
+ if (Name && Name.get() != P.str())
+ F->get()->setPath(P);
+ return F;
+}
+
+ErrorOr<std::unique_ptr<File>>
+RedirectingFileSystem::openFileForRead(const Twine &OriginalPath) {
+ SmallString<256> CanonicalPath;
+ OriginalPath.toVector(CanonicalPath);
+
+ if (std::error_code EC = makeCanonical(CanonicalPath))
return EC;
- ErrorOr<RedirectingFileSystem::LookupResult> Result = lookupPath(Path);
+ ErrorOr<RedirectingFileSystem::LookupResult> Result =
+ lookupPath(CanonicalPath);
if (!Result) {
if (shouldFallBackToExternalFS(Result.getError()))
- return ExternalFS->openFileForRead(Path);
+ return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
+ OriginalPath);
+
return Result.getError();
}
@@ -2021,12 +2106,18 @@ RedirectingFileSystem::openFileForRead(const Twine &Path_) {
return make_error_code(llvm::errc::invalid_argument);
StringRef ExtRedirect = *Result->getExternalRedirect();
+ SmallString<256> CanonicalRemappedPath(ExtRedirect.str());
+ if (std::error_code EC = makeCanonical(CanonicalRemappedPath))
+ return EC;
+
auto *RE = cast<RedirectingFileSystem::RemapEntry>(Result->E);
- auto ExternalFile = ExternalFS->openFileForRead(ExtRedirect);
+ auto ExternalFile = File::getWithPath(
+ ExternalFS->openFileForRead(CanonicalRemappedPath), ExtRedirect);
if (!ExternalFile) {
if (shouldFallBackToExternalFS(ExternalFile.getError(), Result->E))
- return ExternalFS->openFileForRead(Path);
+ return File::getWithPath(ExternalFS->openFileForRead(CanonicalPath),
+ OriginalPath);
return ExternalFile;
}
@@ -2036,7 +2127,7 @@ RedirectingFileSystem::openFileForRead(const Twine &Path_) {
// FIXME: Update the status with the name and VFSMapped.
Status S = getRedirectedFileStatus(
- Path, RE->useExternalName(UseExternalNames), *ExternalStatus);
+ OriginalPath, RE->useExternalName(UseExternalNames), *ExternalStatus);
return std::unique_ptr<File>(
std::make_unique<FileWithFixedStatus>(std::move(*ExternalFile), S));
}
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index c1d291731a88..b15e71a9ce2a 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -74,6 +74,11 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
SmallString<MAX_PATH> Path8Str;
Path8.toVector(Path8Str);
+ // If the path is a long path, mangled into forward slashes, normalize
+ // back to backslashes here.
+ if (Path8Str.startswith("//?/"))
+ llvm::sys::path::native(Path8Str, path::Style::windows_backslash);
+
if (std::error_code EC = UTF8ToUTF16(Path8Str, Path16))
return EC;
@@ -100,8 +105,10 @@ std::error_code widenPath(const Twine &Path8, SmallVectorImpl<wchar_t> &Path16,
}
// Remove '.' and '..' because long paths treat these as real path components.
+ // Explicitly use the backslash form here, as we're prepending the \\?\
+ // prefix.
llvm::sys::path::native(Path8Str, path::Style::windows);
- llvm::sys::path::remove_dots(Path8Str, true);
+ llvm::sys::path::remove_dots(Path8Str, true, path::Style::windows);
const StringRef RootName = llvm::sys::path::root_name(Path8Str);
assert(!RootName.empty() &&
@@ -145,6 +152,7 @@ std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
if (UTF16ToUTF8(PathName.data(), PathName.size(), PathNameUTF8))
return "";
+ llvm::sys::path::make_preferred(PathNameUTF8);
return std::string(PathNameUTF8.data());
}
@@ -207,7 +215,13 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
// On success, GetCurrentDirectoryW returns the number of characters not
// including the null-terminator.
cur_path.set_size(len);
- return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
+
+ if (std::error_code EC =
+ UTF16ToUTF8(cur_path.begin(), cur_path.size(), result))
+ return EC;
+
+ llvm::sys::path::make_preferred(result);
+ return std::error_code();
}
std::error_code set_current_path(const Twine &path) {
@@ -388,7 +402,11 @@ static std::error_code realPathFromHandle(HANDLE H,
}
// Convert the result from UTF-16 to UTF-8.
- return UTF16ToUTF8(Data, CountChars, RealPath);
+ if (std::error_code EC = UTF16ToUTF8(Data, CountChars, RealPath))
+ return EC;
+
+ llvm::sys::path::make_preferred(RealPath);
+ return std::error_code();
}
std::error_code is_local(int FD, bool &Result) {
@@ -416,8 +434,7 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
// Check if the file is on a network (non-local) drive. If so, don't
// continue when DeleteFile is true, since it prevents opening the file for
- // writes. Note -- this will leak temporary files on disk, but only when the
- // target file is on a network drive.
+ // writes.
SmallVector<wchar_t, 128> FinalPath;
if (std::error_code EC = realPathFromHandle(Handle, FinalPath))
return EC;
@@ -427,7 +444,7 @@ static std::error_code setDeleteDisposition(HANDLE Handle, bool Delete) {
return EC;
if (!IsLocal)
- return std::error_code();
+ return errc::not_supported;
// The file is on a local drive, we can safely set FILE_DISPOSITION_INFO's
// flag.
@@ -1183,12 +1200,6 @@ Expected<file_t> openNativeFile(const Twine &Name, CreationDisposition Disp,
}
}
- if (Flags & OF_Delete) {
- if ((EC = setDeleteDisposition(Result, true))) {
- ::CloseHandle(Result);
- return errorCodeToError(EC);
- }
- }
return Result;
}
@@ -1414,6 +1425,8 @@ static bool getKnownFolderPath(KNOWNFOLDERID folderId,
bool ok = !UTF16ToUTF8(path, ::wcslen(path), result);
::CoTaskMemFree(path);
+ if (ok)
+ llvm::sys::path::make_preferred(result);
return ok;
}
@@ -1474,6 +1487,7 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
// Fall back to a system default.
const char *DefaultResult = "C:\\Temp";
Result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
+ llvm::sys::path::make_preferred(Result);
}
} // end namespace path
diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc
index 6f58c52e0746..6732063b562e 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -261,6 +261,7 @@ windows::GetCommandLineArguments(SmallVectorImpl<const char *> &Args,
EC = GetExecutableName(Filename);
if (EC)
return EC;
+ sys::path::make_preferred(Arg0);
sys::path::append(Arg0, Filename);
Args[0] = Saver.save(Arg0).data();
return std::error_code();
@@ -504,8 +505,7 @@ bool llvm::RunningWindows8OrGreater() {
return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0);
}
-LLVM_ATTRIBUTE_NORETURN
-void Process::ExitNoCleanup(int RetCode) {
+[[noreturn]] void Process::ExitNoCleanup(int RetCode) {
TerminateProcess(GetCurrentProcess(), RetCode);
llvm_unreachable("TerminateProcess doesn't return");
}
diff --git a/llvm/lib/Support/Windows/Program.inc b/llvm/lib/Support/Windows/Program.inc
index 824834c1cbbe..a9cf2db7ec72 100644
--- a/llvm/lib/Support/Windows/Program.inc
+++ b/llvm/lib/Support/Windows/Program.inc
@@ -103,6 +103,7 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
if (U8Result.empty())
return mapWindowsError(::GetLastError());
+ llvm::sys::path::make_preferred(U8Result);
return std::string(U8Result.begin(), U8Result.end());
}
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index c9530659caad..ab49ac548f89 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -11,7 +11,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/X86TargetParser.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
+#include <numeric>
using namespace llvm;
using namespace llvm::X86;
@@ -137,8 +139,8 @@ constexpr FeatureBitset FeaturesNocona =
// Basic 64-bit capable CPU.
constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
- FeaturePOPCNT | FeatureSSE4_2 |
- FeatureCMPXCHG16B;
+ FeaturePOPCNT | FeatureCRC32 |
+ FeatureSSE4_2 | FeatureCMPXCHG16B;
constexpr FeatureBitset FeaturesX86_64_V3 =
FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
@@ -151,7 +153,7 @@ constexpr FeatureBitset FeaturesCore2 =
FeaturesNocona | FeatureSAHF | FeatureSSSE3;
constexpr FeatureBitset FeaturesPenryn = FeaturesCore2 | FeatureSSE4_1;
constexpr FeatureBitset FeaturesNehalem =
- FeaturesPenryn | FeaturePOPCNT | FeatureSSE4_2;
+ FeaturesPenryn | FeaturePOPCNT | FeatureCRC32 | FeatureSSE4_2;
constexpr FeatureBitset FeaturesWestmere = FeaturesNehalem | FeaturePCLMUL;
constexpr FeatureBitset FeaturesSandyBridge =
FeaturesWestmere | FeatureAVX | FeatureXSAVE | FeatureXSAVEOPT;
@@ -201,11 +203,11 @@ constexpr FeatureBitset FeaturesTigerlake =
FeaturesICLClient | FeatureAVX512VP2INTERSECT | FeatureMOVDIR64B |
FeatureCLWB | FeatureMOVDIRI | FeatureSHSTK | FeatureKL | FeatureWIDEKL;
constexpr FeatureBitset FeaturesSapphireRapids =
- FeaturesICLServer | FeatureAMX_TILE | FeatureAMX_INT8 | FeatureAMX_BF16 |
- FeatureAVX512BF16 | FeatureAVX512VP2INTERSECT | FeatureCLDEMOTE |
- FeatureENQCMD | FeatureMOVDIR64B | FeatureMOVDIRI | FeaturePTWRITE |
- FeatureSERIALIZE | FeatureSHSTK | FeatureTSXLDTRK | FeatureUINTR |
- FeatureWAITPKG | FeatureAVXVNNI;
+ FeaturesICLServer | FeatureAMX_BF16 | FeatureAMX_INT8 | FeatureAMX_TILE |
+ FeatureAVX512BF16 | FeatureAVX512FP16 | FeatureAVX512VP2INTERSECT |
+ FeatureAVXVNNI | FeatureCLDEMOTE | FeatureENQCMD | FeatureMOVDIR64B |
+ FeatureMOVDIRI | FeaturePTWRITE | FeatureSERIALIZE | FeatureSHSTK |
+ FeatureTSXLDTRK | FeatureUINTR | FeatureWAITPKG;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
@@ -254,16 +256,17 @@ constexpr FeatureBitset FeaturesBTVER1 =
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_A |
FeatureSAHF;
constexpr FeatureBitset FeaturesBTVER2 =
- FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureF16C |
- FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
+ FeaturesBTVER1 | FeatureAES | FeatureAVX | FeatureBMI | FeatureCRC32 |
+ FeatureF16C | FeatureMOVBE | FeaturePCLMUL | FeatureXSAVE | FeatureXSAVEOPT;
// AMD Bulldozer architecture processors.
constexpr FeatureBitset FeaturesBDVER1 =
FeatureX87 | FeatureAES | FeatureAVX | FeatureCMPXCHG8B |
- FeatureCMPXCHG16B | Feature64BIT | FeatureFMA4 | FeatureFXSR | FeatureLWP |
- FeatureLZCNT | FeatureMMX | FeaturePCLMUL | FeaturePOPCNT | FeaturePRFCHW |
- FeatureSAHF | FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 |
- FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A | FeatureXOP | FeatureXSAVE;
+ FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT | FeatureFMA4 |
+ FeatureFXSR | FeatureLWP | FeatureLZCNT | FeatureMMX | FeaturePCLMUL |
+ FeaturePOPCNT | FeaturePRFCHW | FeatureSAHF | FeatureSSE | FeatureSSE2 |
+ FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 | FeatureSSE4_2 | FeatureSSE4_A |
+ FeatureXOP | FeatureXSAVE;
constexpr FeatureBitset FeaturesBDVER2 =
FeaturesBDVER1 | FeatureBMI | FeatureFMA | FeatureF16C | FeatureTBM;
constexpr FeatureBitset FeaturesBDVER3 =
@@ -276,9 +279,9 @@ constexpr FeatureBitset FeaturesBDVER4 = FeaturesBDVER3 | FeatureAVX2 |
constexpr FeatureBitset FeaturesZNVER1 =
FeatureX87 | FeatureADX | FeatureAES | FeatureAVX | FeatureAVX2 |
FeatureBMI | FeatureBMI2 | FeatureCLFLUSHOPT | FeatureCLZERO |
- FeatureCMPXCHG8B | FeatureCMPXCHG16B | Feature64BIT | FeatureF16C |
- FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT | FeatureMMX |
- FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
+ FeatureCMPXCHG8B | FeatureCMPXCHG16B | FeatureCRC32 | Feature64BIT |
+ FeatureF16C | FeatureFMA | FeatureFSGSBASE | FeatureFXSR | FeatureLZCNT |
+ FeatureMMX | FeatureMOVBE | FeatureMWAITX | FeaturePCLMUL | FeaturePOPCNT |
FeaturePRFCHW | FeatureRDRND | FeatureRDSEED | FeatureSAHF | FeatureSHA |
FeatureSSE | FeatureSSE2 | FeatureSSE3 | FeatureSSSE3 | FeatureSSE4_1 |
FeatureSSE4_2 | FeatureSSE4_A | FeatureXSAVE | FeatureXSAVEC |
@@ -470,6 +473,7 @@ constexpr FeatureBitset ImpliedFeaturesCLZERO = {};
constexpr FeatureBitset ImpliedFeaturesCMOV = {};
constexpr FeatureBitset ImpliedFeaturesCMPXCHG16B = {};
constexpr FeatureBitset ImpliedFeaturesCMPXCHG8B = {};
+constexpr FeatureBitset ImpliedFeaturesCRC32 = {};
constexpr FeatureBitset ImpliedFeaturesENQCMD = {};
constexpr FeatureBitset ImpliedFeaturesFSGSBASE = {};
constexpr FeatureBitset ImpliedFeaturesFXSR = {};
@@ -576,6 +580,8 @@ constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};
+static constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
+ FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
constexpr FeatureBitset ImpliedFeaturesKL = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesWIDEKL = FeatureKL;
@@ -660,3 +666,45 @@ void llvm::X86::updateImpliedFeatures(
if (ImpliedBits[i] && !FeatureInfos[i].Name.empty())
Features[FeatureInfos[i].Name] = Enabled;
}
+
+uint64_t llvm::X86::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
+ // Processor features and mapping to processor feature value.
+ uint64_t FeaturesMask = 0;
+ for (const StringRef &FeatureStr : FeatureStrs) {
+ unsigned Feature = StringSwitch<unsigned>(FeatureStr)
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
+ .Case(STR, llvm::X86::FEATURE_##ENUM)
+#include "llvm/Support/X86TargetParser.def"
+ ;
+ FeaturesMask |= (1ULL << Feature);
+ }
+ return FeaturesMask;
+}
+
+unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
+#ifndef NDEBUG
+ // Check that priorities are set properly in the .def file. We expect that
+ // "compat" features are assigned non-duplicate consecutive priorities
+ // starting from zero (0, 1, ..., num_features - 1).
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) PRIORITY,
+ unsigned Priorities[] = {
+#include "llvm/Support/X86TargetParser.def"
+ std::numeric_limits<unsigned>::max() // Need to consume last comma.
+ };
+ std::array<unsigned, array_lengthof(Priorities) - 1> HelperList;
+ std::iota(HelperList.begin(), HelperList.end(), 0);
+ assert(std::is_permutation(HelperList.begin(), HelperList.end(),
+ std::begin(Priorities),
+ std::prev(std::end(Priorities))) &&
+ "Priorities don't form consecutive range!");
+#endif
+
+ switch (Feat) {
+#define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) \
+ case X86::FEATURE_##ENUM: \
+ return PRIORITY;
+#include "llvm/Support/X86TargetParser.def"
+ default:
+ llvm_unreachable("No Feature Priority for non-CPUSupports Features");
+ }
+}
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index d4e1c884d125..4590a3d19b0d 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -185,7 +185,7 @@ raw_ostream &raw_ostream::write_escaped(StringRef Str,
// Write out the escaped representation.
if (UseHexEscapes) {
*this << '\\' << 'x';
- *this << hexdigit((c >> 4 & 0xF));
+ *this << hexdigit((c >> 4) & 0xF);
*this << hexdigit((c >> 0) & 0xF);
} else {
// Always use a full 3-character octal escape.
@@ -679,7 +679,8 @@ raw_fd_ostream::~raw_fd_ostream() {
// has_error() and clear the error flag with clear_error() before
// destructing raw_ostream objects which may have errors.
if (has_error())
- report_fatal_error("IO failure on output stream: " + error().message(),
+ report_fatal_error(Twine("IO failure on output stream: ") +
+ error().message(),
/*gen_crash_diag=*/false);
}
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 0b1024648b66..762255b43136 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -55,6 +55,10 @@ WriteIfChanged("write-if-changed", cl::desc("Only write output if it changed"));
static cl::opt<bool>
TimePhases("time-phases", cl::desc("Time phases of parser and backend"));
+static cl::opt<bool> NoWarnOnUnusedTemplateArgs(
+ "no-warn-on-unused-template-args",
+ cl::desc("Disable unused template argument warnings."));
+
static int reportError(const char *ProgName, Twine Msg) {
errs() << ProgName << ": " << Msg;
errs().flush();
@@ -107,7 +111,7 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) {
// it later.
SrcMgr.setIncludeDirs(IncludeDirs);
- TGParser Parser(SrcMgr, MacroNames, Records);
+ TGParser Parser(SrcMgr, MacroNames, Records, NoWarnOnUnusedTemplateArgs);
if (Parser.ParseFile())
return 1;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 8663863d968f..eb7d4838a9f6 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
@@ -41,24 +42,70 @@ using namespace llvm;
#define DEBUG_TYPE "tblgen-records"
-static BumpPtrAllocator Allocator;
+//===----------------------------------------------------------------------===//
+// Context
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace detail {
+/// This class contains all of the contextual static state of the Record
+/// classes. This allows for better lifetime management and control of the used
+/// static data.
+struct RecordContext {
+ RecordContext()
+ : AnyRecord(0), TrueBitInit(true, &SharedBitRecTy),
+ FalseBitInit(false, &SharedBitRecTy), StringInitStringPool(Allocator),
+ StringInitCodePool(Allocator), LastRecordID(0) {}
+
+ BumpPtrAllocator Allocator;
+ std::vector<BitsRecTy *> SharedBitsRecTys;
+ BitRecTy SharedBitRecTy;
+ IntRecTy SharedIntRecTy;
+ StringRecTy SharedStringRecTy;
+ DagRecTy SharedDagRecTy;
+
+ RecordRecTy AnyRecord;
+ UnsetInit TheUnsetInit;
+ BitInit TrueBitInit;
+ BitInit FalseBitInit;
+
+ FoldingSet<BitsInit> TheBitsInitPool;
+ std::map<int64_t, IntInit *> TheIntInitPool;
+ StringMap<StringInit *, BumpPtrAllocator &> StringInitStringPool;
+ StringMap<StringInit *, BumpPtrAllocator &> StringInitCodePool;
+ FoldingSet<ListInit> TheListInitPool;
+ FoldingSet<UnOpInit> TheUnOpInitPool;
+ FoldingSet<BinOpInit> TheBinOpInitPool;
+ FoldingSet<TernOpInit> TheTernOpInitPool;
+ FoldingSet<FoldOpInit> TheFoldOpInitPool;
+ FoldingSet<IsAOpInit> TheIsAOpInitPool;
+ DenseMap<std::pair<RecTy *, Init *>, VarInit *> TheVarInitPool;
+ DenseMap<std::pair<TypedInit *, unsigned>, VarBitInit *> TheVarBitInitPool;
+ DenseMap<std::pair<TypedInit *, unsigned>, VarListElementInit *>
+ TheVarListElementInitPool;
+ FoldingSet<VarDefInit> TheVarDefInitPool;
+ DenseMap<std::pair<Init *, StringInit *>, FieldInit *> TheFieldInitPool;
+ FoldingSet<CondOpInit> TheCondOpInitPool;
+ FoldingSet<DagInit> TheDagInitPool;
+
+ unsigned LastRecordID;
+};
+} // namespace detail
+} // namespace llvm
+
+ManagedStatic<detail::RecordContext> Context;
//===----------------------------------------------------------------------===//
// Type implementations
//===----------------------------------------------------------------------===//
-BitRecTy BitRecTy::Shared;
-IntRecTy IntRecTy::Shared;
-StringRecTy StringRecTy::Shared;
-DagRecTy DagRecTy::Shared;
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecTy::dump() const { print(errs()); }
#endif
ListRecTy *RecTy::getListTy() {
if (!ListTy)
- ListTy = new(Allocator) ListRecTy(this);
+ ListTy = new(Context->Allocator) ListRecTy(this);
return ListTy;
}
@@ -69,6 +116,8 @@ bool RecTy::typeIsConvertibleTo(const RecTy *RHS) const {
bool RecTy::typeIsA(const RecTy *RHS) const { return this == RHS; }
+BitRecTy *BitRecTy::get() { return &Context->SharedBitRecTy; }
+
bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
if (RecTy::typeIsConvertibleTo(RHS) || RHS->getRecTyKind() == IntRecTyKind)
return true;
@@ -78,12 +127,11 @@ bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
}
BitsRecTy *BitsRecTy::get(unsigned Sz) {
- static std::vector<BitsRecTy*> Shared;
- if (Sz >= Shared.size())
- Shared.resize(Sz + 1);
- BitsRecTy *&Ty = Shared[Sz];
+ if (Sz >= Context->SharedBitsRecTys.size())
+ Context->SharedBitsRecTys.resize(Sz + 1);
+ BitsRecTy *&Ty = Context->SharedBitsRecTys[Sz];
if (!Ty)
- Ty = new(Allocator) BitsRecTy(Sz);
+ Ty = new (Context->Allocator) BitsRecTy(Sz);
return Ty;
}
@@ -104,11 +152,15 @@ bool BitsRecTy::typeIsA(const RecTy *RHS) const {
return false;
}
+IntRecTy *IntRecTy::get() { return &Context->SharedIntRecTy; }
+
bool IntRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
RecTyKind kind = RHS->getRecTyKind();
return kind==BitRecTyKind || kind==BitsRecTyKind || kind==IntRecTyKind;
}
+StringRecTy *StringRecTy::get() { return &Context->SharedStringRecTy; }
+
std::string StringRecTy::getAsString() const {
return "string";
}
@@ -134,6 +186,8 @@ bool ListRecTy::typeIsA(const RecTy *RHS) const {
return false;
}
+DagRecTy *DagRecTy::get() { return &Context->SharedDagRecTy; }
+
std::string DagRecTy::getAsString() const {
return "dag";
}
@@ -146,10 +200,8 @@ static void ProfileRecordRecTy(FoldingSetNodeID &ID,
}
RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
- if (UnsortedClasses.empty()) {
- static RecordRecTy AnyRecord(0);
- return &AnyRecord;
- }
+ if (UnsortedClasses.empty())
+ return &Context->AnyRecord;
FoldingSet<RecordRecTy> &ThePool =
UnsortedClasses[0]->getRecords().RecordTypePool;
@@ -177,8 +229,8 @@ RecordRecTy *RecordRecTy::get(ArrayRef<Record *> UnsortedClasses) {
}
#endif
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Record *>(Classes.size()),
- alignof(RecordRecTy));
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Record *>(Classes.size()), alignof(RecordRecTy));
RecordRecTy *Ty = new(Mem) RecordRecTy(Classes.size());
std::uninitialized_copy(Classes.begin(), Classes.end(),
Ty->getTrailingObjects<Record *>());
@@ -283,10 +335,7 @@ void Init::anchor() {}
LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); }
#endif
-UnsetInit *UnsetInit::get() {
- static UnsetInit TheInit;
- return &TheInit;
-}
+UnsetInit *UnsetInit::get() { return &Context->TheUnsetInit; }
Init *UnsetInit::getCastTo(RecTy *Ty) const {
return const_cast<UnsetInit *>(this);
@@ -297,10 +346,7 @@ Init *UnsetInit::convertInitializerTo(RecTy *Ty) const {
}
BitInit *BitInit::get(bool V) {
- static BitInit True(true);
- static BitInit False(false);
-
- return V ? &True : &False;
+ return V ? &Context->TrueBitInit : &Context->FalseBitInit;
}
Init *BitInit::convertInitializerTo(RecTy *Ty) const {
@@ -328,21 +374,19 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
}
BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
- static FoldingSet<BitsInit> ThePool;
-
FoldingSetNodeID ID;
ProfileBitsInit(ID, Range);
void *IP = nullptr;
- if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (BitsInit *I = Context->TheBitsInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
- alignof(BitsInit));
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *>(Range.size()), alignof(BitsInit));
BitsInit *I = new(Mem) BitsInit(Range.size());
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
- ThePool.InsertNode(I, IP);
+ Context->TheBitsInitPool.InsertNode(I, IP);
return I;
}
@@ -446,10 +490,9 @@ Init *BitsInit::resolveReferences(Resolver &R) const {
}
IntInit *IntInit::get(int64_t V) {
- static std::map<int64_t, IntInit*> ThePool;
-
- IntInit *&I = ThePool[V];
- if (!I) I = new(Allocator) IntInit(V);
+ IntInit *&I = Context->TheIntInitPool[V];
+ if (!I)
+ I = new (Context->Allocator) IntInit(V);
return I;
}
@@ -503,7 +546,7 @@ IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
}
AnonymousNameInit *AnonymousNameInit::get(unsigned V) {
- return new (Allocator) AnonymousNameInit(V);
+ return new (Context->Allocator) AnonymousNameInit(V);
}
StringInit *AnonymousNameInit::getNameInit() const {
@@ -525,20 +568,12 @@ Init *AnonymousNameInit::resolveReferences(Resolver &R) const {
}
StringInit *StringInit::get(StringRef V, StringFormat Fmt) {
- static StringMap<StringInit*, BumpPtrAllocator &> StringPool(Allocator);
- static StringMap<StringInit*, BumpPtrAllocator &> CodePool(Allocator);
-
- if (Fmt == SF_String) {
- auto &Entry = *StringPool.insert(std::make_pair(V, nullptr)).first;
- if (!Entry.second)
- Entry.second = new (Allocator) StringInit(Entry.getKey(), Fmt);
- return Entry.second;
- } else {
- auto &Entry = *CodePool.insert(std::make_pair(V, nullptr)).first;
- if (!Entry.second)
- Entry.second = new (Allocator) StringInit(Entry.getKey(), Fmt);
- return Entry.second;
- }
+ auto &InitMap = Fmt == SF_String ? Context->StringInitStringPool
+ : Context->StringInitCodePool;
+ auto &Entry = *InitMap.insert(std::make_pair(V, nullptr)).first;
+ if (!Entry.second)
+ Entry.second = new (Context->Allocator) StringInit(Entry.getKey(), Fmt);
+ return Entry.second;
}
Init *StringInit::convertInitializerTo(RecTy *Ty) const {
@@ -559,24 +594,22 @@ static void ProfileListInit(FoldingSetNodeID &ID,
}
ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
- static FoldingSet<ListInit> ThePool;
-
FoldingSetNodeID ID;
ProfileListInit(ID, Range, EltTy);
void *IP = nullptr;
- if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (ListInit *I = Context->TheListInitPool.FindNodeOrInsertPos(ID, IP))
return I;
assert(Range.empty() || !isa<TypedInit>(Range[0]) ||
cast<TypedInit>(Range[0])->getType()->typeIsConvertibleTo(EltTy));
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
- alignof(ListInit));
- ListInit *I = new(Mem) ListInit(Range.size(), EltTy);
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *>(Range.size()), alignof(ListInit));
+ ListInit *I = new (Mem) ListInit(Range.size(), EltTy);
std::uninitialized_copy(Range.begin(), Range.end(),
I->getTrailingObjects<Init *>());
- ThePool.InsertNode(I, IP);
+ Context->TheListInitPool.InsertNode(I, IP);
return I;
}
@@ -696,17 +729,15 @@ ProfileUnOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *Op, RecTy *Type) {
}
UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) {
- static FoldingSet<UnOpInit> ThePool;
-
FoldingSetNodeID ID;
ProfileUnOpInit(ID, Opc, LHS, Type);
void *IP = nullptr;
- if (UnOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (UnOpInit *I = Context->TheUnOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- UnOpInit *I = new(Allocator) UnOpInit(Opc, LHS, Type);
- ThePool.InsertNode(I, IP);
+ UnOpInit *I = new (Context->Allocator) UnOpInit(Opc, LHS, Type);
+ Context->TheUnOpInitPool.InsertNode(I, IP);
return I;
}
@@ -860,19 +891,16 @@ ProfileBinOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *RHS,
ID.AddPointer(Type);
}
-BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS,
- Init *RHS, RecTy *Type) {
- static FoldingSet<BinOpInit> ThePool;
-
+BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, Init *RHS, RecTy *Type) {
FoldingSetNodeID ID;
ProfileBinOpInit(ID, Opc, LHS, RHS, Type);
void *IP = nullptr;
- if (BinOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (BinOpInit *I = Context->TheBinOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- BinOpInit *I = new(Allocator) BinOpInit(Opc, LHS, RHS, Type);
- ThePool.InsertNode(I, IP);
+ BinOpInit *I = new (Context->Allocator) BinOpInit(Opc, LHS, RHS, Type);
+ Context->TheBinOpInitPool.InsertNode(I, IP);
return I;
}
@@ -884,7 +912,7 @@ static StringInit *ConcatStringInits(const StringInit *I0,
const StringInit *I1) {
SmallString<80> Concat(I0->getValue());
Concat.append(I1->getValue());
- return StringInit::get(Concat,
+ return StringInit::get(Concat,
StringInit::determineFormat(I0->getFormat(),
I1->getFormat()));
}
@@ -1189,17 +1217,15 @@ ProfileTernOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *MHS,
TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS,
RecTy *Type) {
- static FoldingSet<TernOpInit> ThePool;
-
FoldingSetNodeID ID;
ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type);
void *IP = nullptr;
- if (TernOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (TernOpInit *I = Context->TheTernOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- TernOpInit *I = new(Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
- ThePool.InsertNode(I, IP);
+ TernOpInit *I = new (Context->Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
+ Context->TheTernOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1273,8 +1299,8 @@ static Init *FilterHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
if (!Include)
return nullptr;
if (IntInit *IncludeInt = dyn_cast_or_null<IntInit>(
- Include->convertInitializerTo(IntRecTy::get()))) {
- if (IncludeInt->getValue())
+ Include->convertInitializerTo(IntRecTy::get()))) {
+ if (IncludeInt->getValue())
NewList.push_back(Item);
} else {
return nullptr;
@@ -1482,17 +1508,17 @@ static void ProfileFoldOpInit(FoldingSetNodeID &ID, Init *Start, Init *List,
FoldOpInit *FoldOpInit::get(Init *Start, Init *List, Init *A, Init *B,
Init *Expr, RecTy *Type) {
- static FoldingSet<FoldOpInit> ThePool;
FoldingSetNodeID ID;
ProfileFoldOpInit(ID, Start, List, A, B, Expr, Type);
void *IP = nullptr;
- if (FoldOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (FoldOpInit *I = Context->TheFoldOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- FoldOpInit *I = new (Allocator) FoldOpInit(Start, List, A, B, Expr, Type);
- ThePool.InsertNode(I, IP);
+ FoldOpInit *I =
+ new (Context->Allocator) FoldOpInit(Start, List, A, B, Expr, Type);
+ Context->TheFoldOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1547,17 +1573,16 @@ static void ProfileIsAOpInit(FoldingSetNodeID &ID, RecTy *CheckType,
}
IsAOpInit *IsAOpInit::get(RecTy *CheckType, Init *Expr) {
- static FoldingSet<IsAOpInit> ThePool;
FoldingSetNodeID ID;
ProfileIsAOpInit(ID, CheckType, Expr);
void *IP = nullptr;
- if (IsAOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (IsAOpInit *I = Context->TheIsAOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- IsAOpInit *I = new (Allocator) IsAOpInit(CheckType, Expr);
- ThePool.InsertNode(I, IP);
+ IsAOpInit *I = new (Context->Allocator) IsAOpInit(CheckType, Expr);
+ Context->TheIsAOpInitPool.InsertNode(I, IP);
return I;
}
@@ -1680,14 +1705,9 @@ VarInit *VarInit::get(StringRef VN, RecTy *T) {
}
VarInit *VarInit::get(Init *VN, RecTy *T) {
- using Key = std::pair<RecTy *, Init *>;
- static DenseMap<Key, VarInit*> ThePool;
-
- Key TheKey(std::make_pair(T, VN));
-
- VarInit *&I = ThePool[TheKey];
+ VarInit *&I = Context->TheVarInitPool[std::make_pair(T, VN)];
if (!I)
- I = new(Allocator) VarInit(VN, T);
+ I = new (Context->Allocator) VarInit(VN, T);
return I;
}
@@ -1709,14 +1729,9 @@ Init *VarInit::resolveReferences(Resolver &R) const {
}
VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
- using Key = std::pair<TypedInit *, unsigned>;
- static DenseMap<Key, VarBitInit*> ThePool;
-
- Key TheKey(std::make_pair(T, B));
-
- VarBitInit *&I = ThePool[TheKey];
+ VarBitInit *&I = Context->TheVarBitInitPool[std::make_pair(T, B)];
if (!I)
- I = new(Allocator) VarBitInit(T, B);
+ I = new(Context->Allocator) VarBitInit(T, B);
return I;
}
@@ -1732,15 +1747,11 @@ Init *VarBitInit::resolveReferences(Resolver &R) const {
return const_cast<VarBitInit*>(this);
}
-VarListElementInit *VarListElementInit::get(TypedInit *T,
- unsigned E) {
- using Key = std::pair<TypedInit *, unsigned>;
- static DenseMap<Key, VarListElementInit*> ThePool;
-
- Key TheKey(std::make_pair(T, E));
-
- VarListElementInit *&I = ThePool[TheKey];
- if (!I) I = new(Allocator) VarListElementInit(T, E);
+VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) {
+ VarListElementInit *&I =
+ Context->TheVarListElementInitPool[std::make_pair(T, E)];
+ if (!I)
+ I = new (Context->Allocator) VarListElementInit(T, E);
return I;
}
@@ -1800,21 +1811,19 @@ static void ProfileVarDefInit(FoldingSetNodeID &ID,
}
VarDefInit *VarDefInit::get(Record *Class, ArrayRef<Init *> Args) {
- static FoldingSet<VarDefInit> ThePool;
-
FoldingSetNodeID ID;
ProfileVarDefInit(ID, Class, Args);
void *IP = nullptr;
- if (VarDefInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (VarDefInit *I = Context->TheVarDefInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Args.size()),
- alignof(VarDefInit));
- VarDefInit *I = new(Mem) VarDefInit(Class, Args.size());
+ void *Mem = Context->Allocator.Allocate(totalSizeToAlloc<Init *>(Args.size()),
+ alignof(VarDefInit));
+ VarDefInit *I = new (Mem) VarDefInit(Class, Args.size());
std::uninitialized_copy(Args.begin(), Args.end(),
I->getTrailingObjects<Init *>());
- ThePool.InsertNode(I, IP);
+ Context->TheVarDefInitPool.InsertNode(I, IP);
return I;
}
@@ -1920,13 +1929,9 @@ std::string VarDefInit::getAsString() const {
}
FieldInit *FieldInit::get(Init *R, StringInit *FN) {
- using Key = std::pair<Init *, StringInit *>;
- static DenseMap<Key, FieldInit*> ThePool;
-
- Key TheKey(std::make_pair(R, FN));
-
- FieldInit *&I = ThePool[TheKey];
- if (!I) I = new(Allocator) FieldInit(R, FN);
+ FieldInit *&I = Context->TheFieldInitPool[std::make_pair(R, FN)];
+ if (!I)
+ I = new (Context->Allocator) FieldInit(R, FN);
return I;
}
@@ -1995,23 +2000,22 @@ CondOpInit::get(ArrayRef<Init *> CondRange,
assert(CondRange.size() == ValRange.size() &&
"Number of conditions and values must match!");
- static FoldingSet<CondOpInit> ThePool;
FoldingSetNodeID ID;
ProfileCondOpInit(ID, CondRange, ValRange, Ty);
void *IP = nullptr;
- if (CondOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (CondOpInit *I = Context->TheCondOpInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(2*CondRange.size()),
- alignof(BitsInit));
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *>(2 * CondRange.size()), alignof(BitsInit));
CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty);
std::uninitialized_copy(CondRange.begin(), CondRange.end(),
I->getTrailingObjects<Init *>());
std::uninitialized_copy(ValRange.begin(), ValRange.end(),
I->getTrailingObjects<Init *>()+CondRange.size());
- ThePool.InsertNode(I, IP);
+ Context->TheCondOpInitPool.InsertNode(I, IP);
return I;
}
@@ -2113,25 +2117,24 @@ static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
assert(Name == NameRange.end() && "Arg name overflow!");
}
-DagInit *
-DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
- ArrayRef<StringInit *> NameRange) {
- static FoldingSet<DagInit> ThePool;
-
+DagInit *DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
+ ArrayRef<StringInit *> NameRange) {
FoldingSetNodeID ID;
ProfileDagInit(ID, V, VN, ArgRange, NameRange);
void *IP = nullptr;
- if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
+ if (DagInit *I = Context->TheDagInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *, StringInit *>(ArgRange.size(), NameRange.size()), alignof(BitsInit));
- DagInit *I = new(Mem) DagInit(V, VN, ArgRange.size(), NameRange.size());
+ void *Mem = Context->Allocator.Allocate(
+ totalSizeToAlloc<Init *, StringInit *>(ArgRange.size(), NameRange.size()),
+ alignof(BitsInit));
+ DagInit *I = new (Mem) DagInit(V, VN, ArgRange.size(), NameRange.size());
std::uninitialized_copy(ArgRange.begin(), ArgRange.end(),
I->getTrailingObjects<Init *>());
std::uninitialized_copy(NameRange.begin(), NameRange.end(),
I->getTrailingObjects<StringInit *>());
- ThePool.InsertNode(I, IP);
+ Context->TheDagInitPool.InsertNode(I, IP);
return I;
}
@@ -2301,8 +2304,6 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
if (PrintSem) OS << ";\n";
}
-unsigned Record::LastID = 0;
-
void Record::checkName() {
// Ensure the record name has string type.
const TypedInit *TypedName = cast<const TypedInit>(Name);
@@ -2319,10 +2320,12 @@ RecordRecTy *Record::getType() {
DefInit *Record::getDefInit() {
if (!CorrespondingDefInit)
- CorrespondingDefInit = new (Allocator) DefInit(this);
+ CorrespondingDefInit = new (Context->Allocator) DefInit(this);
return CorrespondingDefInit;
}
+unsigned Record::getNewUID() { return Context->LastRecordID++; }
+
void Record::setName(Init *NewName) {
Name = NewName;
checkName();
@@ -2501,7 +2504,7 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
if (BitsInit *BI = dyn_cast<BitsInit>(R->getValue()))
return BI;
- PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
"' exists but does not have a bits value");
}
@@ -2513,7 +2516,7 @@ ListInit *Record::getValueAsListInit(StringRef FieldName) const {
if (ListInit *LI = dyn_cast<ListInit>(R->getValue()))
return LI;
- PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
+ PrintFatalError(getLoc(), "Record `" + getName() + "', field `" + FieldName +
"' exists but does not have a list value");
}
@@ -2653,13 +2656,23 @@ void Record::checkRecordAssertions() {
RecordResolver R(*this);
R.setFinal(true);
- for (auto Assertion : getAssertions()) {
+ for (const auto &Assertion : getAssertions()) {
Init *Condition = Assertion.Condition->resolveReferences(R);
Init *Message = Assertion.Message->resolveReferences(R);
CheckAssert(Assertion.Loc, Condition, Message);
}
}
+// Report a warning if the record has unused template arguments.
+void Record::checkUnusedTemplateArgs() {
+ for (const Init *TA : getTemplateArgs()) {
+ const RecordVal *Arg = getValue(TA);
+ if (!Arg->isUsed())
+ PrintWarning(Arg->getLoc(),
+ "unused template argument: " + Twine(Arg->getName()));
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RecordKeeper::dump() const { errs() << *this; }
#endif
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index ed7963031b24..6ccca4d69f40 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -874,8 +874,9 @@ Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
Record *TemplateRec = CurMultiClass ? &CurMultiClass->Rec : CurRec;
if (TemplateRec->isTemplateArg(TemplateArgName)) {
- const RecordVal *RV = TemplateRec->getValue(TemplateArgName);
+ RecordVal *RV = TemplateRec->getValue(TemplateArgName);
assert(RV && "Template arg doesn't exist??");
+ RV->setUsed(true);
return VarInit::get(TemplateArgName, RV->getType());
} else if (Name->getValue() == "NAME") {
return VarInit::get(TemplateArgName, StringRecTy::get());
@@ -3346,7 +3347,12 @@ bool TGParser::ParseClass() {
if (ParseTemplateArgList(CurRec))
return true;
- return ParseObjectBody(CurRec);
+ if (ParseObjectBody(CurRec))
+ return true;
+
+ if (!NoWarnOnUnusedTemplateArgs)
+ CurRec->checkUnusedTemplateArgs();
+ return false;
}
/// ParseLetList - Parse a non-empty list of assignment expressions into a list
@@ -3541,6 +3547,9 @@ bool TGParser::ParseMultiClass() {
PopLocalScope(MulticlassScope);
}
+ if (!NoWarnOnUnusedTemplateArgs)
+ CurMultiClass->Rec.checkUnusedTemplateArgs();
+
CurMultiClass = nullptr;
return false;
}
diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h
index 6e3c5186e4f6..00883c858d58 100644
--- a/llvm/lib/TableGen/TGParser.h
+++ b/llvm/lib/TableGen/TGParser.h
@@ -160,10 +160,13 @@ class TGParser {
// exist.
};
+ bool NoWarnOnUnusedTemplateArgs = false;
+
public:
- TGParser(SourceMgr &SM, ArrayRef<std::string> Macros,
- RecordKeeper &records)
- : Lex(SM, Macros), CurMultiClass(nullptr), Records(records) {}
+ TGParser(SourceMgr &SM, ArrayRef<std::string> Macros, RecordKeeper &records,
+ const bool NoWarnOnUnusedTemplateArgs = false)
+ : Lex(SM, Macros), CurMultiClass(nullptr), Records(records),
+ NoWarnOnUnusedTemplateArgs(NoWarnOnUnusedTemplateArgs) {}
/// ParseFile - Main entrypoint for parsing a tblgen file. These parser
/// routines return true on error, or false on success.
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 658d44771e8d..b0dd30c13137 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -51,6 +51,7 @@ FunctionPass *createAArch64A53Fix835769();
FunctionPass *createFalkorHWPFFixPass();
FunctionPass *createFalkorMarkStridedAccessesPass();
FunctionPass *createAArch64BranchTargetsPass();
+FunctionPass *createAArch64MIPeepholeOptPass();
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
@@ -82,6 +83,7 @@ void initializeAArch64SLSHardeningPass(PassRegistry&);
void initializeAArch64SpeculationHardeningPass(PassRegistry&);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
+void initializeAArch64MIPeepholeOptPass(PassRegistry &);
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &);
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index d8dd9d1b2f91..548e4e0c9389 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -61,6 +61,9 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
"Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
+ "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules">;
+
def FeatureOutlineAtomics : SubtargetFeature<"outline-atomics", "OutlineAtomics", "true",
"Enable out of line atomics to support LSE instructions">;
@@ -126,8 +129,12 @@ def FeatureExperimentalZeroingPseudos
"merged with destructive operations",
[]>;
+def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
+ "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+
def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
- "Enable Scalable Vector Extension 2 (SVE2) instructions", [FeatureSVE]>;
+ "Enable Scalable Vector Extension 2 (SVE2) instructions",
+ [FeatureSVE, FeatureUseScalarIncVL]>;
def FeatureSVE2AES : SubtargetFeature<"sve2-aes", "HasSVE2AES", "true",
"Enable AES SVE2 instructions", [FeatureSVE2, FeatureAES]>;
@@ -309,10 +316,6 @@ def FeatureSEL2 : SubtargetFeature<
"sel2", "HasSEL2", "true",
"Enable v8.4-A Secure Exception Level 2 extension">;
-def FeaturePMU : SubtargetFeature<
- "pmu", "HasPMU", "true",
- "Enable v8.4-A PMU extension">;
-
def FeatureTLB_RMI : SubtargetFeature<
"tlb-rmi", "HasTLB_RMI", "true",
"Enable v8.4-A TLB Range and Maintenance Instructions">;
@@ -429,10 +432,13 @@ def FeatureEnhancedCounterVirtualization :
def FeatureRME : SubtargetFeature<"rme", "HasRME",
"true", "Enable Realm Management Extension">;
-// FIXME: SME should only imply the subset of SVE(2) instructions that are
-// legal in streaming mode.
+// A subset of SVE(2) instructions are legal in Streaming SVE execution mode
+// defined by SME.
+def FeatureStreamingSVE : SubtargetFeature<"streaming-sve",
+ "HasStreamingSVE", "true",
+ "Enable subset of SVE(2) instructions for Streaming SVE execution mode">;
def FeatureSME : SubtargetFeature<"sme", "HasSME", "true",
- "Enable Scalable Matrix Extension (SME)", [FeatureSVE2, FeatureBF16]>;
+ "Enable Scalable Matrix Extension (SME)", [FeatureStreamingSVE, FeatureBF16]>;
def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true",
"Enable Scalable Matrix Extension (SME) F64F64 instructions", [FeatureSME]>;
@@ -440,13 +446,24 @@ def FeatureSMEF64 : SubtargetFeature<"sme-f64", "HasSMEF64", "true",
def FeatureSMEI64 : SubtargetFeature<"sme-i64", "HasSMEI64", "true",
"Enable Scalable Matrix Extension (SME) I16I64 instructions", [FeatureSME]>;
+def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
+ "Apple A7 (the CPU formerly known as Cyclone)">;
+
+def FeatureEL2VMSA : SubtargetFeature<"el2vmsa", "HasEL2VMSA", "true",
+ "Enable Exception Level 2 Virtual Memory System Architecture">;
+
+def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true",
+ "Enable Exception Level 3">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
+def HasV8_0aOps : SubtargetFeature<"v8a", "HasV8_0aOps", "true",
+ "Support ARM v8.0a instructions", [FeatureEL2VMSA, FeatureEL3]>;
def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
- "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE, FeatureRDM,
- FeaturePAN, FeatureLOR, FeatureVH]>;
+ "Support ARM v8.1a instructions", [HasV8_0aOps, FeatureCRC, FeatureLSE,
+ FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH]>;
def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
"Support ARM v8.2a instructions", [HasV8_1aOps, FeaturePsUAO,
@@ -459,8 +476,8 @@ def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true",
def HasV8_4aOps : SubtargetFeature<"v8.4a", "HasV8_4aOps", "true",
"Support ARM v8.4a instructions", [HasV8_3aOps, FeatureDotProd,
FeatureNV, FeatureMPAM, FeatureDIT,
- FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeaturePMU, FeatureTLB_RMI,
- FeatureFlagM, FeatureRCPC_IMMO]>;
+ FeatureTRACEV8_4, FeatureAM, FeatureSEL2, FeatureTLB_RMI,
+ FeatureFlagM, FeatureRCPC_IMMO, FeatureLSE2]>;
def HasV8_5aOps : SubtargetFeature<
"v8.5a", "HasV8_5aOps", "true", "Support ARM v8.5a instructions",
@@ -477,6 +494,18 @@ def HasV8_7aOps : SubtargetFeature<
"v8.7a", "HasV8_7aOps", "true", "Support ARM v8.7a instructions",
[HasV8_6aOps, FeatureXS, FeatureWFxT, FeatureHCX]>;
+def HasV9_0aOps : SubtargetFeature<
+ "v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions",
+ [HasV8_5aOps, FeatureSVE2]>;
+
+def HasV9_1aOps : SubtargetFeature<
+ "v9.1a", "HasV9_1aOps", "true", "Support ARM v9.1a instructions",
+ [HasV8_6aOps, HasV9_0aOps]>;
+
+def HasV9_2aOps : SubtargetFeature<
+ "v9.2a", "HasV9_2aOps", "true", "Support ARM v9.2a instructions",
+ [HasV8_7aOps, HasV9_1aOps]>;
+
def HasV8_0rOps : SubtargetFeature<
"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
[//v8.1
@@ -553,7 +582,7 @@ class AArch64Unsupported { list<Predicate> F; }
def SVEUnsupported : AArch64Unsupported {
let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
- HasSVE2BitPerm];
+ HasSVE2BitPerm, HasSVEorStreamingSVE, HasSVE2orStreamingSVE];
}
def PAUnsupported : AArch64Unsupported {
@@ -579,660 +608,553 @@ include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
include "AArch64SchedTSV110.td"
-def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
- "Cortex-A35 ARM processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureNEON,
- FeaturePerfMon
- ]>;
+def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+ "Cortex-A35 ARM processors">;
-def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
"Cortex-A53 ARM processors", [
+ FeatureFuseAES,
FeatureBalanceFPOps,
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- ]>;
+ FeaturePostRAScheduler]>;
-def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
+def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
"Cortex-A55 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeatureFullFP16,
- FeatureDotProd,
- FeatureRCPC,
- FeaturePerfMon,
FeaturePostRAScheduler,
+ FeatureFuseAddress]>;
+
+def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
+ "Cortex-A510 ARM processors", [
+ FeatureFuseAES,
+ FeaturePostRAScheduler
]>;
-def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors", [
+ FeatureFuseAES,
FeatureBalanceFPOps,
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureFuseAES,
FeatureFuseLiterals,
- FeatureNEON,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive
- ]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
+def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
"Cortex-A65 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFPARMv8,
- FeatureFullFP16,
- FeatureFuseAddress,
FeatureFuseAES,
- FeatureFuseLiterals,
- FeatureNEON,
- FeatureRAS,
- FeatureRCPC,
- FeatureSSBS,
- ]>;
+ FeatureFuseAddress,
+ FeatureFuseLiterals]>;
-def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
"Cortex-A72 ARM processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureFuseLiterals,
- FeatureNEON,
- FeaturePerfMon
- ]>;
+ FeatureFuseLiterals]>;
-def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
+def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon
- ]>;
+ FeatureFuseAES]>;
-def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
+def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
"Cortex-A75 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeatureFullFP16,
- FeatureDotProd,
- FeatureRCPC,
- FeaturePerfMon
- ]>;
+ FeatureFuseAES]>;
-def ProcA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
+def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
- HasV8_2aOps,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeatureRCPC,
- FeatureCrypto,
- FeatureFullFP16,
- FeatureDotProd,
- FeatureSSBS
- ]>;
-
-def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
+ FeatureFuseAES]>;
+
+def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", [
- HasV8_2aOps,
- FeatureCmpBccFusion,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON, FeatureRCPC,
- FeatureCrypto,
- FeatureFullFP16,
- FeatureDotProd
- ]>;
-
-def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily",
- "CortexA78",
+ FeatureCmpBccFusion,
+ FeatureFuseAES]>;
+
+def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
"Cortex-A78 ARM processors", [
- HasV8_2aOps,
FeatureCmpBccFusion,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeatureRCPC,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureSPE,
- FeatureFullFP16,
- FeatureSSBS,
- FeatureDotProd]>;
-
-def ProcA78C : SubtargetFeature<"cortex-a78c", "ARMProcFamily",
+ FeaturePostRAScheduler]>;
+
+def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
"CortexA78C",
"Cortex-A78C ARM processors", [
- HasV8_2aOps,
FeatureCmpBccFusion,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFlagM,
- FeatureFP16FML,
- FeatureFPARMv8,
- FeatureFullFP16,
FeatureFuseAES,
- FeatureNEON,
- FeaturePAuth,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureRCPC,
- FeatureSPE,
- FeatureSSBS]>;
-
-def ProcR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
+ FeaturePostRAScheduler]>;
+
+def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
+ "Cortex-A710 ARM processors", [
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureCmpBccFusion]>;
+
+def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
"CortexR82",
- "Cortex-R82 ARM Processors", [
- FeaturePostRAScheduler,
- // All other features are implied by v8_0r ops:
- HasV8_0rOps,
- ]>;
+ "Cortex-R82 ARM processors", [
+ FeaturePostRAScheduler]>;
-def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
+def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", [
- HasV8_2aOps,
FeatureCmpBccFusion,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeatureRCPC,
- FeaturePerfMon,
+ FeaturePostRAScheduler]>;
+
+def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
+ "Cortex-X2 ARM processors", [
+ FeatureFuseAES,
FeaturePostRAScheduler,
- FeatureSPE,
- FeatureFullFP16,
- FeatureDotProd]>;
+ FeatureCmpBccFusion]>;
-def ProcA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
+def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
"Fujitsu A64FX processors", [
- HasV8_2aOps,
- FeatureFPARMv8,
- FeatureNEON,
- FeatureSHA2,
- FeaturePerfMon,
- FeatureFullFP16,
- FeatureSVE,
- FeaturePostRAScheduler,
- FeatureComplxNum,
- FeatureAggressiveFMA,
- FeatureArithmeticBccFusion,
- FeaturePredictableSelectIsExpensive
- ]>;
-
-def ProcCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
- "Nvidia Carmel processors", [
- HasV8_2aOps,
- FeatureNEON,
- FeatureCrypto,
- FeatureFullFP16
- ]>;
+ FeaturePostRAScheduler,
+ FeatureAggressiveFMA,
+ FeatureArithmeticBccFusion,
+ FeaturePredictableSelectIsExpensive
+ ]>;
+
+def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
+ "Nvidia Carmel processors">;
// Note that cyclone does not fuse AES instructions, but newer apple chips do
// perform the fusion and cyclone is used by default when targetting apple OSes.
-def ProcAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
- "Apple A7 (the CPU formerly known as Cyclone)", [
- FeatureAlternateSExtLoadCVTF32Pattern,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
- FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureZCZeroingFPWorkaround
- ]>;
+def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
+ "Apple A7 (the CPU formerly known as Cyclone)", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAES, FeatureFuseCryptoEOR,
+ FeatureZCRegMove,
+ FeatureZCZeroing,
+ FeatureZCZeroingFPWorkaround]
+ >;
-def ProcAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
+def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
"Apple A10", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
FeatureFuseAES,
FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureCRC,
- FeatureRDM,
- FeaturePAN,
- FeatureLOR,
- FeatureVH,
- ]>;
-
-def ProcAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
+ FeatureZCZeroing]
+ >;
+
+def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
"Apple A11", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
FeatureFuseAES,
FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- HasV8_2aOps
- ]>;
+ FeatureZCZeroing]
+ >;
-def ProcAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
+def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
"Apple A12", [
FeatureAlternateSExtLoadCVTF32Pattern,
FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
FeatureFuseAES,
FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- HasV8_3aOps
- ]>;
-
-def ProcAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
- "Apple A13", [
- FeatureAlternateSExtLoadCVTF32Pattern,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureFuseCryptoEOR,
- FeatureNEON,
- FeaturePerfMon,
- FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- FeatureFP16FML,
- FeatureSHA3,
- HasV8_4aOps
- ]>;
-
-def ProcAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
- "Apple A14", [
- FeatureAggressiveFMA,
- FeatureAlternateSExtLoadCVTF32Pattern,
- FeatureAltFPCmp,
- FeatureArithmeticBccFusion,
- FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDisableLatencySchedHeuristic,
- FeatureFPARMv8,
- FeatureFRInt3264,
- FeatureFuseAddress,
- FeatureFuseAES,
- FeatureFuseArithmeticLogic,
- FeatureFuseCCSelect,
- FeatureFuseCryptoEOR,
- FeatureFuseLiterals,
- FeatureNEON,
- FeaturePerfMon,
- FeatureSpecRestrict,
- FeatureSSBS,
- FeatureSB,
- FeaturePredRes,
- FeatureCacheDeepPersist,
- FeatureZCRegMove,
- FeatureZCZeroing,
- FeatureFullFP16,
- FeatureFP16FML,
- FeatureSHA3,
- HasV8_4aOps
- ]>;
-
-def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
+ FeatureZCZeroing]
+ >;
+
+def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
+ "Apple A13", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAES,
+ FeatureFuseCryptoEOR,
+ FeatureZCRegMove,
+ FeatureZCZeroing]
+ >;
+
+def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
+ "Apple A14", [
+ FeatureAggressiveFMA,
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureZCRegMove,
+ FeatureZCZeroing]>;
+
+def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
- [FeatureCRC,
- FeatureCrypto,
- FeatureExynosCheapAsMoveHandling,
+ [FeatureExynosCheapAsMoveHandling,
FeatureForce32BitJumpTables,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseCCSelect,
FeatureFuseLiterals,
FeatureLSLFast,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive]>;
-def ProcExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
- "Samsung Exynos-M4 processors",
- [HasV8_2aOps,
- FeatureArithmeticBccFusion,
+def TuneExynosM4 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
+ "Samsung Exynos-M3 processors",
+ [FeatureArithmeticBccFusion,
FeatureArithmeticCbzFusion,
- FeatureCrypto,
- FeatureDotProd,
FeatureExynosCheapAsMoveHandling,
FeatureForce32BitJumpTables,
- FeatureFullFP16,
FeatureFuseAddress,
FeatureFuseAES,
FeatureFuseArithmeticLogic,
FeatureFuseCCSelect,
FeatureFuseLiterals,
FeatureLSLFast,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeatureZCZeroing]>;
-def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
+def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
"Qualcomm Kryo processors", [
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureNEON,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast
- ]>;
+ FeatureLSLFast]
+ >;
-def ProcFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
+def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
"Qualcomm Falkor processors", [
- FeatureCRC,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureNEON,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureRDM,
FeatureZCZeroing,
FeatureLSLFast,
FeatureSlowSTRQro
]>;
-def ProcNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily",
- "NeoverseE1",
+def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",
"Neoverse E1 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFPARMv8,
- FeatureFullFP16,
- FeatureNEON,
- FeatureRCPC,
- FeatureSSBS,
FeaturePostRAScheduler,
- FeatureFuseAES,
+ FeatureFuseAES
]>;
-def ProcNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily",
- "NeoverseN1",
+def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1",
"Neoverse N1 ARM processors", [
- HasV8_2aOps,
- FeatureCrypto,
- FeatureDotProd,
- FeatureFPARMv8,
- FeatureFullFP16,
- FeatureNEON,
- FeatureRCPC,
- FeatureSPE,
- FeatureSSBS,
FeaturePostRAScheduler,
- FeatureFuseAES,
+ FeatureFuseAES
]>;
-def ProcNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily",
- "NeoverseN2",
+def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2",
"Neoverse N2 ARM processors", [
- HasV8_5aOps,
- FeatureBF16,
- FeatureETE,
- FeatureMatMulInt8,
- FeatureMTE,
- FeatureSVE2,
- FeatureSVE2BitPerm,
- FeatureTRBE,
FeaturePostRAScheduler,
- FeatureCrypto,
- FeatureFuseAES,
+ FeatureFuseAES
+ ]>;
+def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB",
+ "Neoverse 512-TVB ARM processors", [
+ FeaturePostRAScheduler,
+ FeatureFuseAES
]>;
-def ProcNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily",
- "NeoverseV1",
+def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1",
"Neoverse V1 ARM processors", [
- HasV8_4aOps,
- FeatureBF16,
- FeatureCacheDeepPersist,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureFP16FML,
- FeatureFullFP16,
FeatureFuseAES,
- FeatureMatMulInt8,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureRandGen,
- FeatureSPE,
- FeatureSSBS,
- FeatureSVE]>;
+ FeaturePostRAScheduler]>;
-def ProcSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
+def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
- FeatureNEON,
- FeatureSPE,
- FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast,
- HasV8_4aOps]>;
+ FeatureLSLFast]>;
-def ProcThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily",
- "ThunderX2T99",
+def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",
"Cavium ThunderX2 processors", [
FeatureAggressiveFMA,
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeatureArithmeticBccFusion,
- FeatureNEON,
- FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureLSE,
- HasV8_1aOps]>;
-
-def ProcThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
- "ThunderX3T110",
- "Marvell ThunderX3 processors", [
- FeatureAggressiveFMA,
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
FeatureArithmeticBccFusion,
- FeatureNEON,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureLSE,
- FeaturePAuth,
- FeatureBalanceFPOps,
- FeaturePerfMon,
- FeatureStrictAlign,
- HasV8_3aOps]>;
-
-def ProcThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
+ FeaturePredictableSelectIsExpensive]>;
+
+def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
+ "ThunderX3T110",
+ "Marvell ThunderX3 processors", [
+ FeatureAggressiveFMA,
+ FeatureArithmeticBccFusion,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureBalanceFPOps,
+ FeatureStrictAlign]>;
+
+def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
+def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
"ThunderXT88",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
+def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
"ThunderXT81",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
+def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
"ThunderXT83",
"Cavium ThunderX processors", [
- FeatureCRC,
- FeatureCrypto,
- FeatureFPARMv8,
- FeaturePerfMon,
FeaturePostRAScheduler,
- FeaturePredictableSelectIsExpensive,
- FeatureNEON]>;
+ FeaturePredictableSelectIsExpensive]>;
-def ProcTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
+def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
"HiSilicon TS-V110 processors", [
- HasV8_2aOps,
- FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
- FeatureFPARMv8,
FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
- FeatureSPE,
- FeatureFullFP16,
- FeatureFP16FML,
- FeatureDotProd]>;
-
-def : ProcessorModel<"generic", NoSchedModel, [
- FeatureFPARMv8,
- FeatureFuseAES,
- FeatureNEON,
- FeaturePerfMon,
- FeaturePostRAScheduler,
-// ETE and TRBE are future architecture extensions. We temporarily enable them
-// by default for users targeting generic AArch64, until it is decided in which
-// armv8.x-a architecture revision they will end up. The extensions do not
-// affect code generated by the compiler and can be used only by explicitly
-// mentioning the new system register names in assembly.
- FeatureETE
- ]>;
-
-def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
-def : ProcessorModel<"cortex-a34", CortexA53Model, [ProcA35]>;
-def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : ProcessorModel<"cortex-a55", CortexA55Model, [ProcA55]>;
-def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
-def : ProcessorModel<"cortex-a65", CortexA53Model, [ProcA65]>;
-def : ProcessorModel<"cortex-a65ae", CortexA53Model, [ProcA65]>;
-def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
-def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
-def : ProcessorModel<"cortex-a75", CortexA57Model, [ProcA75]>;
-def : ProcessorModel<"cortex-a76", CortexA57Model, [ProcA76]>;
-def : ProcessorModel<"cortex-a76ae", CortexA57Model, [ProcA76]>;
-def : ProcessorModel<"cortex-a77", CortexA57Model, [ProcA77]>;
-def : ProcessorModel<"cortex-a78", CortexA57Model, [ProcA78]>;
-def : ProcessorModel<"cortex-a78c", CortexA57Model, [ProcA78C]>;
-def : ProcessorModel<"cortex-r82", CortexA55Model, [ProcR82]>;
-def : ProcessorModel<"cortex-x1", CortexA57Model, [ProcX1]>;
-def : ProcessorModel<"neoverse-e1", CortexA53Model, [ProcNeoverseE1]>;
-def : ProcessorModel<"neoverse-n1", CortexA57Model, [ProcNeoverseN1]>;
-def : ProcessorModel<"neoverse-n2", CortexA57Model, [ProcNeoverseN2]>;
-def : ProcessorModel<"neoverse-v1", CortexA57Model, [ProcNeoverseV1]>;
-def : ProcessorModel<"exynos-m3", ExynosM3Model, [ProcExynosM3]>;
-def : ProcessorModel<"exynos-m4", ExynosM4Model, [ProcExynosM4]>;
-def : ProcessorModel<"exynos-m5", ExynosM5Model, [ProcExynosM4]>;
-def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
-def : ProcessorModel<"saphira", FalkorModel, [ProcSaphira]>;
-def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
+ FeaturePostRAScheduler]>;
+
+
+def ProcessorFeatures {
+ list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeaturePerfMon];
+ list<SubtargetFeature> A55 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeaturePerfMon];
+ list<SubtargetFeature> A510 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+ FeatureMatMulInt8, FeatureBF16, FeatureAM,
+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+ FeatureFP16FML];
+ list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeatureSSBS, FeatureRAS];
+ list<SubtargetFeature> A76 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeatureSSBS];
+ list<SubtargetFeature> A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC];
+ list<SubtargetFeature> A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureRCPC, FeaturePerfMon, FeatureSPE,
+ FeatureSSBS];
+ list<SubtargetFeature> A78C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureFullFP16, FeatureDotProd,
+ FeatureFlagM, FeatureFP16FML, FeaturePAuth,
+ FeaturePerfMon, FeatureRCPC, FeatureSPE,
+ FeatureSSBS];
+ list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+ FeatureETE, FeatureMTE, FeatureFP16FML,
+ FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8];
+ list<SubtargetFeature> R82 = [HasV8_0rOps];
+ list<SubtargetFeature> X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureRCPC, FeaturePerfMon,
+ FeatureSPE, FeatureFullFP16, FeatureDotProd];
+ list<SubtargetFeature> X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
+ FeatureMatMulInt8, FeatureBF16, FeatureAM,
+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+ FeatureFP16FML];
+ list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
+ FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
+ FeatureSVE, FeatureComplxNum];
+ list<SubtargetFeature> Carmel = [HasV8_2aOps, FeatureNEON, FeatureCrypto,
+ FeatureFullFP16];
+ list<SubtargetFeature> AppleA7 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON,FeaturePerfMon, FeatureAppleA7SysReg];
+ list<SubtargetFeature> AppleA10 = [HasV8_0aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureCRC,
+ FeatureRDM, FeaturePAN, FeatureLOR, FeatureVH];
+ list<SubtargetFeature> AppleA11 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFullFP16];
+ list<SubtargetFeature> AppleA12 = [HasV8_3aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFullFP16];
+ list<SubtargetFeature> AppleA13 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFullFP16,
+ FeatureFP16FML, FeatureSHA3];
+ list<SubtargetFeature> AppleA14 = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureFRInt3264,
+ FeatureSpecRestrict, FeatureSSBS, FeatureSB,
+ FeaturePredRes, FeatureCacheDeepPersist,
+ FeatureFullFP16, FeatureFP16FML, FeatureSHA3,
+ FeatureAltFPCmp];
+ list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeaturePerfMon];
+ list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+ FeatureFullFP16, FeaturePerfMon];
+ list<SubtargetFeature> Falkor = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeaturePerfMon,
+ FeatureRDM];
+ list<SubtargetFeature> NeoverseE1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+ FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
+ FeatureRCPC, FeatureSSBS];
+ list<SubtargetFeature> NeoverseN1 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
+ FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
+ FeatureRCPC, FeatureSPE, FeatureSSBS];
+ list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,
+ FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
+ FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto];
+ list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
+ FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
+ FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
+ FeaturePerfMon, FeatureRandGen, FeatureSPE,
+ FeatureSSBS, FeatureSVE];
+ list<SubtargetFeature> NeoverseV1 = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
+ FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
+ FeatureFullFP16, FeatureMatMulInt8, FeatureNEON,
+ FeaturePerfMon, FeatureRandGen, FeatureSPE,
+ FeatureSSBS, FeatureSVE];
+ list<SubtargetFeature> Saphira = [HasV8_4aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeatureSPE, FeaturePerfMon];
+ list<SubtargetFeature> ThunderX = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeaturePerfMon, FeatureNEON];
+ list<SubtargetFeature> ThunderX2T99 = [HasV8_1aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeatureLSE];
+ list<SubtargetFeature> ThunderX3T110 = [HasV8_3aOps, FeatureCRC, FeatureCrypto,
+ FeatureFPARMv8, FeatureNEON, FeatureLSE,
+ FeaturePAuth, FeaturePerfMon];
+ list<SubtargetFeature> TSV110 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureSPE,
+ FeatureFullFP16, FeatureFP16FML, FeatureDotProd];
+
+ // ETE and TRBE are future architecture extensions. We temporarily enable them
+ // by default for users targeting generic AArch64. The extensions do not
+ // affect code generated by the compiler and can be used only by explicitly
+ // mentioning the new system register names in assembly.
+ list<SubtargetFeature> Generic = [FeatureFPARMv8, FeatureNEON, FeaturePerfMon, FeatureETE];
+}
+
+
+def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,
+ [FeatureFuseAES, FeaturePostRAScheduler]>;
+def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,
+ [TuneA35]>;
+def : ProcessorModel<"cortex-a34", CortexA53Model, ProcessorFeatures.A53,
+ [TuneA35]>;
+def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53,
+ [TuneA53]>;
+def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
+ [TuneA55]>;
+def : ProcessorModel<"cortex-a510", CortexA55Model, ProcessorFeatures.A510,
+ [TuneA510]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
+ [TuneA57]>;
+def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
+ [TuneA65]>;
+def : ProcessorModel<"cortex-a65ae", CortexA53Model, ProcessorFeatures.A65,
+ [TuneA65]>;
+def : ProcessorModel<"cortex-a72", CortexA57Model, ProcessorFeatures.A53,
+ [TuneA72]>;
+def : ProcessorModel<"cortex-a73", CortexA57Model, ProcessorFeatures.A53,
+ [TuneA73]>;
+def : ProcessorModel<"cortex-a75", CortexA57Model, ProcessorFeatures.A55,
+ [TuneA75]>;
+def : ProcessorModel<"cortex-a76", CortexA57Model, ProcessorFeatures.A76,
+ [TuneA76]>;
+def : ProcessorModel<"cortex-a76ae", CortexA57Model, ProcessorFeatures.A76,
+ [TuneA76]>;
+def : ProcessorModel<"cortex-a77", CortexA57Model, ProcessorFeatures.A77,
+ [TuneA77]>;
+def : ProcessorModel<"cortex-a78", CortexA57Model, ProcessorFeatures.A78,
+ [TuneA78]>;
+def : ProcessorModel<"cortex-a78c", CortexA57Model, ProcessorFeatures.A78C,
+ [TuneA78C]>;
+def : ProcessorModel<"cortex-a710", CortexA57Model, ProcessorFeatures.A710,
+ [TuneA710]>;
+def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
+ [TuneR82]>;
+def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
+ [TuneX1]>;
+def : ProcessorModel<"cortex-x2", CortexA57Model, ProcessorFeatures.X2,
+ [TuneX2]>;
+def : ProcessorModel<"neoverse-e1", CortexA53Model,
+ ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
+def : ProcessorModel<"neoverse-n1", CortexA57Model,
+ ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>;
+def : ProcessorModel<"neoverse-n2", CortexA57Model,
+ ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>;
+def : ProcessorModel<"neoverse-512tvb", CortexA57Model,
+ ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>;
+def : ProcessorModel<"neoverse-v1", CortexA57Model,
+ ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>;
+def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3,
+ [TuneExynosM3]>;
+def : ProcessorModel<"exynos-m4", ExynosM4Model, ProcessorFeatures.ExynosM4,
+ [TuneExynosM4]>;
+def : ProcessorModel<"exynos-m5", ExynosM5Model, ProcessorFeatures.ExynosM4,
+ [TuneExynosM4]>;
+def : ProcessorModel<"falkor", FalkorModel, ProcessorFeatures.Falkor,
+ [TuneFalkor]>;
+def : ProcessorModel<"saphira", FalkorModel, ProcessorFeatures.Saphira,
+ [TuneSaphira]>;
+def : ProcessorModel<"kryo", KryoModel, ProcessorFeatures.A53, [TuneKryo]>;
+
// Cavium ThunderX/ThunderX T8X Processors
-def : ProcessorModel<"thunderx", ThunderXT8XModel, [ProcThunderX]>;
-def : ProcessorModel<"thunderxt88", ThunderXT8XModel, [ProcThunderXT88]>;
-def : ProcessorModel<"thunderxt81", ThunderXT8XModel, [ProcThunderXT81]>;
-def : ProcessorModel<"thunderxt83", ThunderXT8XModel, [ProcThunderXT83]>;
+def : ProcessorModel<"thunderx", ThunderXT8XModel, ProcessorFeatures.ThunderX,
+ [TuneThunderX]>;
+def : ProcessorModel<"thunderxt88", ThunderXT8XModel,
+ ProcessorFeatures.ThunderX, [TuneThunderXT88]>;
+def : ProcessorModel<"thunderxt81", ThunderXT8XModel,
+ ProcessorFeatures.ThunderX, [TuneThunderXT81]>;
+def : ProcessorModel<"thunderxt83", ThunderXT8XModel,
+ ProcessorFeatures.ThunderX, [TuneThunderXT83]>;
// Cavium ThunderX2T9X Processors. Formerly Broadcom Vulcan.
-def : ProcessorModel<"thunderx2t99", ThunderX2T99Model, [ProcThunderX2T99]>;
+def : ProcessorModel<"thunderx2t99", ThunderX2T99Model,
+ ProcessorFeatures.ThunderX2T99, [TuneThunderX2T99]>;
// Marvell ThunderX3T110 Processors.
-def : ProcessorModel<"thunderx3t110", ThunderX3T110Model, [ProcThunderX3T110]>;
-def : ProcessorModel<"tsv110", TSV110Model, [ProcTSV110]>;
+def : ProcessorModel<"thunderx3t110", ThunderX3T110Model,
+ ProcessorFeatures.ThunderX3T110, [TuneThunderX3T110]>;
+def : ProcessorModel<"tsv110", TSV110Model, ProcessorFeatures.TSV110,
+ [TuneTSV110]>;
// Support cyclone as an alias for apple-a7 so we can still LTO old bitcode.
-def : ProcessorModel<"cyclone", CycloneModel, [ProcAppleA7]>;
+def : ProcessorModel<"cyclone", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
// iPhone and iPad CPUs
-def : ProcessorModel<"apple-a7", CycloneModel, [ProcAppleA7]>;
-def : ProcessorModel<"apple-a8", CycloneModel, [ProcAppleA7]>;
-def : ProcessorModel<"apple-a9", CycloneModel, [ProcAppleA7]>;
-def : ProcessorModel<"apple-a10", CycloneModel, [ProcAppleA10]>;
-def : ProcessorModel<"apple-a11", CycloneModel, [ProcAppleA11]>;
-def : ProcessorModel<"apple-a12", CycloneModel, [ProcAppleA12]>;
-def : ProcessorModel<"apple-a13", CycloneModel, [ProcAppleA13]>;
-def : ProcessorModel<"apple-a14", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-a7", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
+def : ProcessorModel<"apple-a8", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
+def : ProcessorModel<"apple-a9", CycloneModel, ProcessorFeatures.AppleA7,
+ [TuneAppleA7]>;
+def : ProcessorModel<"apple-a10", CycloneModel, ProcessorFeatures.AppleA10,
+ [TuneAppleA10]>;
+def : ProcessorModel<"apple-a11", CycloneModel, ProcessorFeatures.AppleA11,
+ [TuneAppleA11]>;
+def : ProcessorModel<"apple-a12", CycloneModel, ProcessorFeatures.AppleA12,
+ [TuneAppleA12]>;
+def : ProcessorModel<"apple-a13", CycloneModel, ProcessorFeatures.AppleA13,
+ [TuneAppleA13]>;
+def : ProcessorModel<"apple-a14", CycloneModel, ProcessorFeatures.AppleA14,
+ [TuneAppleA14]>;
// Mac CPUs
-def : ProcessorModel<"apple-m1", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
+ [TuneAppleA14]>;
// watch CPUs.
-def : ProcessorModel<"apple-s4", CycloneModel, [ProcAppleA12]>;
-def : ProcessorModel<"apple-s5", CycloneModel, [ProcAppleA12]>;
+def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,
+ [TuneAppleA12]>;
+def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
+ [TuneAppleA12]>;
// Alias for the latest Apple processor model supported by LLVM.
-def : ProcessorModel<"apple-latest", CycloneModel, [ProcAppleA14]>;
+def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA14,
+ [TuneAppleA14]>;
// Fujitsu A64FX
-def : ProcessorModel<"a64fx", A64FXModel, [ProcA64FX]>;
+def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
+ [TuneA64FX]>;
// Nvidia Carmel
-def : ProcessorModel<"carmel", NoSchedModel, [ProcCarmel]>;
+def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
+ [TuneCarmel]>;
//===----------------------------------------------------------------------===//
// Assembly parser
diff --git a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index e80fe2cada09..7fd51a98ad94 100644
--- a/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -159,7 +159,7 @@ static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB,
// If there is no non-pseudo in the current block, loop back around and try
// the previous block (if there is one).
while ((FMBB = getBBFallenThrough(FMBB, TII))) {
- for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend()))
+ for (MachineInstr &I : llvm::reverse(*FMBB))
if (!I.isPseudo())
return &I;
}
diff --git a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index c996d2df8c38..cd67e058a9c1 100644
--- a/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -377,8 +377,7 @@ void AArch64AdvSIMDScalar::transformInstruction(MachineInstr &MI) {
// processMachineBasicBlock - Main optimzation loop.
bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
if (isProfitableToTransform(MI)) {
transformInstruction(MI);
Changed = true;
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index adefe3b37ee0..9f527a17d390 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -50,9 +50,9 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
@@ -293,7 +293,7 @@ void AArch64AsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) {
// ;DATA: higher 32 bits of the address of the trampoline
// LDP X0, X30, [SP], #16 ; pop X0 and the link register from the stack
//
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
@@ -653,6 +653,9 @@ bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
case 'x':
Reg = getXRegFromWReg(Reg);
break;
+ case 't':
+ Reg = getXRegFromXRegTuple(Reg);
+ break;
}
O << AArch64InstPrinter::getRegisterName(Reg);
@@ -749,6 +752,10 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
AArch64::GPR64allRegClass.contains(Reg))
return printAsmMRegister(MO, 'x', O);
+ // If this is an x register tuple, print an x register.
+ if (AArch64::GPR64x8ClassRegClass.contains(Reg))
+ return printAsmMRegister(MO, 't', O);
+
unsigned AltName = AArch64::NoRegAltName;
const TargetRegisterClass *RegClass;
if (AArch64::ZPRRegClass.contains(Reg)) {
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 4b7ce565eb1e..c90601443934 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -50,9 +50,9 @@ def CC_AArch64_AAPCS : CallingConv<[
// "sret" on argument 1 means instance methods.
CCIfInReg<CCIfType<[i64],
- CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1], [W0, W1]>>>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
- CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
// slot is 64-bit.
@@ -64,14 +64,14 @@ def CC_AArch64_AAPCS : CallingConv<[
CCIfNest<CCAssignToReg<[X18]>>,
// Pass SwiftSelf in a callee saved register.
- CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
// A SwiftError is passed in X21.
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
// Pass SwiftAsync in an otherwise callee saved register so that it will be
// preserved for normal function calls.
- CCIfSwiftAsync<CCIfType<[i64], CCAssignToRegWithShadow<[X22], [W22]>>>,
+ CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
@@ -90,8 +90,7 @@ def CC_AArch64_AAPCS : CallingConv<[
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6],
[X0, X1, X3, X5]>>>,
@@ -99,19 +98,13 @@ def CC_AArch64_AAPCS : CallingConv<[
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
- CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
@@ -131,7 +124,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
// their lanes are in a consistent order.
@@ -141,21 +134,14 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCBitConvertToType<f128>>>,
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
- CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
@@ -195,49 +181,41 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
// An SRet is passed in X8, not X0 like a normal pointer parameter.
- CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
// Put ByVal arguments directly on the stack. Minimum size and alignment of a
// slot is 64-bit.
CCIfByVal<CCPassByVal<8, 8>>,
// Pass SwiftSelf in a callee saved register.
- CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>,
+ CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[X20]>>>,
// A SwiftError is passed in X21.
- CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
+ CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[X21]>>>,
// Pass SwiftAsync in an otherwise callee saved register so that it will be
// preserved for normal function calls.
- CCIfSwiftAsync<CCIfType<[i64], CCAssignToRegWithShadow<[X22], [W22]>>>,
+ CCIfSwiftAsync<CCIfType<[i64], CCAssignToReg<[X22]>>>,
CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
CCIfType<[i64],
- CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6],
- [W0, W1, W2, W3, W4, W5, W6]>>>,
+ CCIfSplit<CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6]>>>,
// i128 is split to two i64s, and its stack alignment is 16 bytes.
CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[bf16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[bf16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16, v4bf16],
- CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
+ CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16, v8bf16],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
@@ -310,8 +288,8 @@ let Entry = 1 in
def CC_AArch64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
+ CCIfType<[i32], CCAssignToReg<[W0]>>,
+ CCIfType<[i64], CCAssignToReg<[X0]>>,
// Pass the remaining arguments on the stack instead.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
@@ -320,14 +298,10 @@ def CC_AArch64_WebKit_JS : CallingConv<[
let Entry = 1 in
def RetCC_AArch64_WebKit_JS : CallingConv<[
- CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
- [X0, X1, X2, X3, X4, X5, X6, X7]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
- [W0, W1, W2, W3, W4, W5, W6, W7]>>,
- CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
- [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3, W4, W5, W6, W7]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>
]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index d938008a1e07..d2097f7e6ee3 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -189,6 +189,13 @@ def fold_merge_to_zext : GICombineRule<
(apply [{ applyFoldMergeToZext(*${d}, MRI, B, Observer); }])
>;
+def mutate_anyext_to_zext : GICombineRule<
+ (defs root:$d),
+ (match (wip_match_opcode G_ANYEXT):$d,
+ [{ return matchMutateAnyExtToZExt(*${d}, MRI); }]),
+ (apply [{ applyMutateAnyExtToZExt(*${d}, MRI, B, Observer); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -204,7 +211,7 @@ def AArch64PostLegalizerLoweringHelper
def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
[copy_prop, erase_undef_store, combines_for_extload,
- sext_trunc_sextload,
+ sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
redundant_and, xor_of_and_with_same_reg,
extractvecelt_pairwise_add, redundant_or,
@@ -212,6 +219,7 @@ def AArch64PostLegalizerCombinerHelper
form_bitfield_extract, rotate_out_of_range,
icmp_to_true_false_known_bits, merge_unmerge,
select_combines, fold_merge_to_zext,
- constant_fold, identity_combines]> {
+ constant_fold, identity_combines,
+ ptr_add_immed_chain, overlapping_and]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
index e90e8e3da057..533ab3b05de9 100644
--- a/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CondBrTuning.cpp
@@ -295,10 +295,7 @@ bool AArch64CondBrTuning::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
bool LocalChange = false;
- for (MachineBasicBlock::iterator I = MBB.getFirstTerminator(),
- E = MBB.end();
- I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : MBB.terminators()) {
switch (MI.getOpcode()) {
default:
break;
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index b2eee2845ba9..4c04e04a7d3c 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -937,12 +937,16 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
}
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ MachineFunction &MF = *MBB.getParent();
+ // Try to create new inst without implicit operands added.
+ MachineInstr *NewMI = MF.CreateMachineInstr(
+ TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
+ MBB.insert(MBBI, NewMI);
+ MachineInstrBuilder MIB1(MF, NewMI);
+ MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
return true;
@@ -1049,6 +1053,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case AArch64::MOVaddrEXT: {
// Expand into ADRP + ADD.
Register DstReg = MI.getOperand(0).getReg();
+ assert(DstReg != AArch64::XZR);
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
.add(MI.getOperand(1));
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 9acda17b816f..3dc694df509d 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -3483,7 +3483,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return false;
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
- return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
+ return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
@@ -3499,7 +3499,7 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
// address spaces.
return false;
- return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memset", II->arg_size() - 1);
}
case Intrinsic::sin:
case Intrinsic::cos:
@@ -3533,10 +3533,10 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
}
ArgListTy Args;
- Args.reserve(II->getNumArgOperands());
+ Args.reserve(II->arg_size());
// Populate the argument list.
- for (auto &Arg : II->arg_operands()) {
+ for (auto &Arg : II->args()) {
ArgListEntry Entry;
Entry.Val = Arg;
Entry.Ty = Arg->getType();
@@ -4806,7 +4806,7 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
- !(C.isPowerOf2() || (-C).isPowerOf2()))
+ !(C.isPowerOf2() || C.isNegatedPowerOf2()))
return selectBinaryOp(I, ISD::SDIV);
unsigned Lg2 = C.countTrailingZeros();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index f6a528c0e6fd..b630f4f0df5f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1158,11 +1158,33 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// ORR is sufficient, it is assumed a Swift kernel would initialize the TBI
// bits so that is still true.
if (HasFP && AFI->hasSwiftAsyncContext()) {
- // ORR x29, x29, #0x1000_0000_0000_0000
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
- .addUse(AArch64::FP)
- .addImm(0x1100)
- .setMIFlag(MachineInstr::FrameSetup);
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ if (Subtarget.swiftAsyncContextIsDynamicallySet()) {
+ // The special symbol below is absolute and has a *value* that can be
+ // combined with the frame pointer to signal an extended frame.
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::LOADgot), AArch64::X16)
+ .addExternalSymbol("swift_async_extendedFramePointerFlags",
+ AArch64II::MO_GOT);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addUse(AArch64::X16)
+ .addImm(Subtarget.isTargetILP32() ? 32 : 0);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+
+ case SwiftAsyncFramePointerMode::Always:
+ // ORR x29, x29, #0x1000_0000_0000_0000
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXri), AArch64::FP)
+ .addUse(AArch64::FP)
+ .addImm(0x1100)
+ .setMIFlag(MachineInstr::FrameSetup);
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
}
// All calls are tail calls in GHC calling conv, and functions have no
@@ -1205,7 +1227,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
- if (!NeedsWinCFI && needsFrameMoves) {
+ if (needsFrameMoves) {
// Label used to tie together the PROLOG_LABEL and the MachineMoves.
MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
// Encode the stack size of the leaf function.
@@ -1631,7 +1653,8 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
// The AUTIASP instruction assembles to a hint instruction before v8.3a so
// this instruction can safely used for any v8a architecture.
// From v8.3a onwards there are optimised authenticate LR and return
- // instructions, namely RETA{A,B}, that can be used instead.
+ // instructions, namely RETA{A,B}, that can be used instead. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
if (Subtarget.hasPAuth() && MBBI != MBB.end() &&
MBBI->getOpcode() == AArch64::RET_ReallyLR) {
BuildMI(MBB, MBBI, DL,
@@ -1643,6 +1666,12 @@ static void InsertReturnAddressAuth(MachineFunction &MF,
MBB, MBBI, DL,
TII->get(MFI.shouldSignWithBKey() ? AArch64::AUTIBSP : AArch64::AUTIASP))
.setMIFlag(MachineInstr::FrameDestroy);
+
+ unsigned CFIIndex =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
}
@@ -2472,22 +2501,20 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
.setMIFlag(MachineInstr::FrameSetup);
- if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
- // Emit a CFI instruction that causes 8 to be subtracted from the value of
- // x18 when unwinding past this frame.
- static const char CFIInst[] = {
- dwarf::DW_CFA_val_expression,
- 18, // register
- 2, // length
- static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
- static_cast<char>(-8) & 0x7f, // addend (sleb128)
- };
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
- nullptr, StringRef(CFIInst, sizeof(CFIInst))));
- BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlag(MachineInstr::FrameSetup);
- }
+ // Emit a CFI instruction that causes 8 to be subtracted from the value of
+ // x18 when unwinding past this frame.
+ static const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ 18, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
+ static_cast<char>(-8) & 0x7f, // addend (sleb128)
+ };
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
+ nullptr, StringRef(CFIInst, sizeof(CFIInst))));
+ BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
// This instruction also makes x18 live-in to the entry block.
MBB.addLiveIn(AArch64::X18);
@@ -2509,9 +2536,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
}
return true;
}
- for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
- ++RPII) {
- RegPairInfo RPI = *RPII;
+ for (const RegPairInfo &RPI : llvm::reverse(RegPairs)) {
unsigned Reg1 = RPI.Reg1;
unsigned Reg2 = RPI.Reg2;
unsigned StrOpc;
@@ -3512,7 +3537,14 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
return StackOffset::getFixed(MFI.getObjectOffset(FI));
}
- return getFrameIndexReference(MF, FI, FrameReg);
+ // Go to common code if we cannot provide sp + offset.
+ if (MFI.hasVarSizedObjects() ||
+ MF.getInfo<AArch64FunctionInfo>()->getStackSizeSVE() ||
+ MF.getSubtarget().getRegisterInfo()->hasStackRealignment(MF))
+ return getFrameIndexReference(MF, FI, FrameReg);
+
+ FrameReg = AArch64::SP;
+ return getStackOffset(MF, MFI.getObjectOffset(FI));
}
/// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index f8adaf36db84..e6d997f91b47 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -67,8 +67,6 @@ public:
bool hasFP(const MachineFunction &MF) const override;
bool hasReservedCallFrame(const MachineFunction &MF) const override;
- bool hasSwiftExtendedFrame(const MachineFunction &MF) const;
-
bool assignCalleeSavedSpillSlots(MachineFunction &MF,
const TargetRegisterInfo *TRI,
std::vector<CalleeSavedInfo> &CSI,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 17e530a4641d..fe9b2f8883b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -167,7 +167,7 @@ public:
case ISD::SPLAT_VECTOR: {
auto Opnd0 = N->getOperand(0);
if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
- if (CN->isNullValue())
+ if (CN->isZero())
return true;
if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
if (CN->isZero())
@@ -187,7 +187,7 @@ public:
case ISD::SPLAT_VECTOR: {
auto Opnd0 = N->getOperand(0);
if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
- if (CN->isNullValue())
+ if (CN->isZero())
return true;
if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
if (CN->isZero())
@@ -286,7 +286,8 @@ public:
void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
- unsigned Opc_rr, unsigned Opc_ri);
+ unsigned Opc_rr, unsigned Opc_ri,
+ bool IsIntr = false);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
@@ -1487,7 +1488,7 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
unsigned Scale, unsigned Opc_ri,
- unsigned Opc_rr) {
+ unsigned Opc_rr, bool IsIntr) {
assert(Scale < 4 && "Invalid scaling value.");
SDLoc DL(N);
EVT VT = N->getValueType(0);
@@ -1497,11 +1498,11 @@ void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
SDValue Base, Offset;
unsigned Opc;
std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
- N, Opc_rr, Opc_ri, N->getOperand(2),
+ N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
- SDValue Ops[] = {N->getOperand(1), // Predicate
- Base, // Memory operand
+ SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
+ Base, // Memory operand
Offset, Chain};
const EVT ResTys[] = {MVT::Untyped, MVT::Other};
@@ -2167,7 +2168,7 @@ static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
- (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
+ (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
}
// Look for bits that will be useful for later uses.
@@ -2965,8 +2966,8 @@ static int getIntOperandFromRegisterString(StringRef RegString) {
// form described in getIntOperandsFromRegsterString) or is a named register
// known by the MRS SysReg mapper.
bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
SDLoc DL(N);
int Reg = getIntOperandFromRegisterString(RegString->getString());
@@ -3011,8 +3012,8 @@ bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
// form described in getIntOperandsFromRegsterString) or is a named register
// known by the MSR SysReg mapper.
bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
SDLoc DL(N);
int Reg = getIntOperandFromRegisterString(RegString->getString());
@@ -3152,7 +3153,6 @@ bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SD
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
return true;
} else if ((ImmVal & 0xFF) == 0) {
- assert((ImmVal >= -32768) && (ImmVal <= 32512));
Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32);
return true;
@@ -3521,7 +3521,7 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.
ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
- if (ConstNode->isNullValue()) {
+ if (ConstNode->isZero()) {
if (VT == MVT::i32) {
SDValue New = CurDAG->getCopyFromReg(
CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
@@ -3895,6 +3895,69 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_ld64b:
SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
return;
+ case Intrinsic::aarch64_sve_ld2_sret: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
+ true);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
+ true);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
+ true);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
+ true);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_ld3_sret: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
+ true);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
+ true);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
+ true);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
+ true);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_ld4_sret: {
+ if (VT == MVT::nxv16i8) {
+ SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
+ true);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
+ SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
+ true);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
+ true);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
+ true);
+ return;
+ }
+ break;
+ }
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
@@ -4987,6 +5050,14 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
SDValue &Base,
SDValue &OffImm) {
const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
+ const DataLayout &DL = CurDAG->getDataLayout();
+
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+ return true;
+ }
if (MemVT == EVT())
return false;
@@ -5010,6 +5081,11 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
return false;
Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+ }
+
OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e7282aad05e2..6e9e61c8e7ac 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -246,6 +246,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
+ if (Subtarget->hasLS64()) {
+ addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
+ setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
+ setOperationAction(ISD::STORE, MVT::i64x8, Custom);
+ }
+
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
@@ -779,6 +785,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
+ // Aligned 128-bit loads and stores are single-copy atomic according to the
+ // v8.4a spec.
+ if (Subtarget->hasLSE2()) {
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
+ }
+
// 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
// custom lowering, as there are no un-paired non-temporal stores and
// legalization will break up 256 bit inputs.
@@ -882,9 +895,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::UINT_TO_FP);
- // TODO: Do the same for FP_TO_*INT_SAT.
setTargetDAGCombine(ISD::FP_TO_SINT);
setTargetDAGCombine(ISD::FP_TO_UINT);
+ setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+ setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
setTargetDAGCombine(ISD::FDIV);
// Try and combine setcc with csel
@@ -899,6 +913,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::CONCAT_VECTORS);
+ setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::STORE);
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
@@ -991,16 +1006,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
+
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
// AArch64 doesn't have a direct vector ->f32 conversion instructions for
// elements smaller than i32, so promote the input to i32 first.
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
- setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
- setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
// Similarly, there is no direct i32 -> f64 vector conversion instruction.
setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
@@ -1013,6 +1027,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
@@ -1020,6 +1038,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
} else {
// when AArch64 doesn't have fullfp16 support, promote the input
// to i32 first.
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
+ setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
+ setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
@@ -1034,6 +1056,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
+ for (auto VT : {MVT::v1i64, MVT::v2i64}) {
+ setOperationAction(ISD::UMAX, VT, Custom);
+ setOperationAction(ISD::SMAX, VT, Custom);
+ setOperationAction(ISD::UMIN, VT, Custom);
+ setOperationAction(ISD::SMIN, VT, Custom);
+ }
// AArch64 doesn't have MUL.2d:
setOperationAction(ISD::MUL, MVT::v2i64, Expand);
@@ -1260,6 +1288,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
setOperationAction(ISD::FMA, VT, Custom);
setOperationAction(ISD::FMAXIMUM, VT, Custom);
@@ -1447,6 +1476,8 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
if (!VT.isFloatingPoint())
setOperationAction(ISD::ABS, VT, Legal);
@@ -1502,6 +1533,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
InnerVT = InnerVT.changeVectorElementType(
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
}
@@ -1771,6 +1804,11 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known.Zero = APInt::getHighBitsSet(64, 32);
break;
}
+ case AArch64ISD::ASSERT_ZEXT_BOOL: {
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known.Zero |= APInt(Known.getBitWidth(), 0xFE);
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
@@ -2023,6 +2061,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::LASTA)
MAKE_CASE(AArch64ISD::LASTB)
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
+ MAKE_CASE(AArch64ISD::LS64_BUILD)
+ MAKE_CASE(AArch64ISD::LS64_EXTRACT)
MAKE_CASE(AArch64ISD::TBL)
MAKE_CASE(AArch64ISD::FADD_PRED)
MAKE_CASE(AArch64ISD::FADDA_PRED)
@@ -2160,6 +2200,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::INDEX_VECTOR)
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
+ MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
}
#undef MAKE_CASE
return nullptr;
@@ -2245,9 +2286,15 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case AArch64::F128CSEL:
return EmitF128CSEL(MI, BB);
+ case TargetOpcode::STATEPOINT:
+ // STATEPOINT is a pseudo instruction which has no implicit defs/uses
+ // while bl call instruction (where statepoint will be lowered at the end)
+ // has implicit def. Add this implicit dead def here as a workaround.
+ MI.addOperand(*MI.getMF(), MachineOperand::CreateReg(AArch64::LR, true,
+ true, false, true));
+ LLVM_FALLTHROUGH;
case TargetOpcode::STACKMAP:
case TargetOpcode::PATCHPOINT:
- case TargetOpcode::STATEPOINT:
return emitPatchPoint(MI, BB);
case AArch64::CATCHRET:
@@ -2285,7 +2332,7 @@ static bool isZerosVector(const SDNode *N) {
auto Opnd0 = N->getOperand(0);
auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
- return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
+ return (CINT && CINT->isZero()) || (CFP && CFP->isZero());
}
/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
@@ -2967,9 +3014,9 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
}
}
- if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
+ if (!Cmp && (RHSC->isZero() || RHSC->isOne())) {
if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
- if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
+ if ((CC == ISD::SETNE) ^ RHSC->isZero())
AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
}
}
@@ -3134,14 +3181,14 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
// We can commute the SELECT_CC by inverting the condition. This
// might be needed to make this fit into a CSINV pattern.
- if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
+ if (CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
}
// If the constants line up, perform the transform!
- if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
+ if (CTVal->isZero() && CFVal->isAllOnes()) {
SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
@@ -3364,42 +3411,132 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
return SDValue();
}
+SDValue
+AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // AArch64 FP-to-int conversions saturate to the destination element size, so
+ // we can lower common saturating conversions to simple instructions.
+ SDValue SrcVal = Op.getOperand(0);
+ EVT SrcVT = SrcVal.getValueType();
+ EVT DstVT = Op.getValueType();
+ EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
+ uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
+ uint64_t SatWidth = SatVT.getScalarSizeInBits();
+ assert(SatWidth <= DstElementWidth &&
+ "Saturation width cannot exceed result width");
+
+ // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
+ // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
+ // types, so this is hard to reach.
+ if (DstVT.isScalableVector())
+ return SDValue();
+
+ EVT SrcElementVT = SrcVT.getVectorElementType();
+
+ // In the absence of FP16 support, promote f16 to f32 and saturate the result.
+ if (SrcElementVT == MVT::f16 &&
+ (!Subtarget->hasFullFP16() || DstElementWidth > 16)) {
+ MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal);
+ SrcVT = F32VT;
+ SrcElementVT = MVT::f32;
+ SrcElementWidth = 32;
+ } else if (SrcElementVT != MVT::f64 && SrcElementVT != MVT::f32 &&
+ SrcElementVT != MVT::f16)
+ return SDValue();
+
+ SDLoc DL(Op);
+ // Cases that we can emit directly.
+ if (SrcElementWidth == DstElementWidth && SrcElementWidth == SatWidth)
+ return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
+ DAG.getValueType(DstVT.getScalarType()));
+
+ // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
+ // result. This is only valid if the legal cvt is larger than the saturate
+ // width. For double, as we don't have MIN/MAX, it can be simpler to scalarize
+ // (at least until sqxtn is selected).
+ if (SrcElementWidth < SatWidth || SrcElementVT == MVT::f64)
+ return SDValue();
+
+ EVT IntVT = SrcVT.changeVectorElementTypeToInteger();
+ SDValue NativeCvt = DAG.getNode(Op.getOpcode(), DL, IntVT, SrcVal,
+ DAG.getValueType(IntVT.getScalarType()));
+ SDValue Sat;
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
+ SDValue MinC = DAG.getConstant(
+ APInt::getSignedMaxValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ SDValue Min = DAG.getNode(ISD::SMIN, DL, IntVT, NativeCvt, MinC);
+ SDValue MaxC = DAG.getConstant(
+ APInt::getSignedMinValue(SatWidth).sextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
+ } else {
+ SDValue MinC = DAG.getConstant(
+ APInt::getAllOnesValue(SatWidth).zextOrSelf(SrcElementWidth), DL,
+ IntVT);
+ Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
+}
+
SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
SelectionDAG &DAG) const {
// AArch64 FP-to-int conversions saturate to the destination register size, so
// we can lower common saturating conversions to simple instructions.
SDValue SrcVal = Op.getOperand(0);
-
EVT SrcVT = SrcVal.getValueType();
- EVT DstVT = Op.getValueType();
+ if (SrcVT.isVector())
+ return LowerVectorFP_TO_INT_SAT(Op, DAG);
+
+ EVT DstVT = Op.getValueType();
EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
uint64_t SatWidth = SatVT.getScalarSizeInBits();
uint64_t DstWidth = DstVT.getScalarSizeInBits();
assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width");
- // TODO: Support lowering of NEON and SVE conversions.
- if (SrcVT.isVector())
- return SDValue();
-
- // TODO: Saturate to SatWidth explicitly.
- if (SatWidth != DstWidth)
+ // In the absence of FP16 support, promote f16 to f32 and saturate the result.
+ if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16()) {
+ SrcVal = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal);
+ SrcVT = MVT::f32;
+ } else if (SrcVT != MVT::f64 && SrcVT != MVT::f32 && SrcVT != MVT::f16)
return SDValue();
- // In the absence of FP16 support, promote f32 to f16, like LowerFP_TO_INT().
- if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
- Op.getOperand(1));
-
+ SDLoc DL(Op);
// Cases that we can emit directly.
if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
(SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
- (DstVT == MVT::i64 || DstVT == MVT::i32))
- return Op;
+ DstVT == SatVT && (DstVT == MVT::i64 || DstVT == MVT::i32))
+ return DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal,
+ DAG.getValueType(DstVT));
+
+ // Otherwise we emit a cvt that saturates to a higher BW, and saturate the
+ // result. This is only valid if the legal cvt is larger than the saturate
+ // width.
+ if (DstWidth < SatWidth)
+ return SDValue();
- // For all other cases, fall back on the expanded form.
- return SDValue();
+ SDValue NativeCvt =
+ DAG.getNode(Op.getOpcode(), DL, DstVT, SrcVal, DAG.getValueType(DstVT));
+ SDValue Sat;
+ if (Op.getOpcode() == ISD::FP_TO_SINT_SAT) {
+ SDValue MinC = DAG.getConstant(
+ APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
+ SDValue Min = DAG.getNode(ISD::SMIN, DL, DstVT, NativeCvt, MinC);
+ SDValue MaxC = DAG.getConstant(
+ APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth), DL, DstVT);
+ Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
+ } else {
+ SDValue MinC = DAG.getConstant(
+ APInt::getAllOnesValue(SatWidth).zextOrSelf(DstWidth), DL, DstVT);
+ Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Sat);
}
SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
@@ -3938,8 +4075,8 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::aarch64_sve_ptrue:
- return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
- Op.getOperand(1));
+ return getPTrue(DAG, dl, Op.getValueType(),
+ cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
case Intrinsic::aarch64_sve_clz:
return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -4004,6 +4141,18 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_frecpx:
return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frecpe_x:
+ return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frecps_x:
+ return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::aarch64_sve_frsqrte_x:
+ return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
+ Op.getOperand(1));
+ case Intrinsic::aarch64_sve_frsqrts_x:
+ return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_fabs:
return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
@@ -4153,14 +4302,17 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
if (VT.getVectorElementType() == MVT::i32 &&
- VT.getVectorElementCount().getKnownMinValue() >= 4)
+ VT.getVectorElementCount().getKnownMinValue() >= 4 &&
+ !VT.isFixedLengthVector())
return true;
return false;
}
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
- return ExtVal.getValueType().isScalableVector();
+ return ExtVal.getValueType().isScalableVector() ||
+ useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
+ /*OverrideNEON=*/true);
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
@@ -4345,9 +4497,17 @@ SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
+ IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
+ MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ } else {
+ MemVT = getContainerForFixedLengthVector(DAG, MemVT);
+ IndexVT = MemVT.changeTypeToInteger();
+ }
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
+ Mask = DAG.getNode(
+ ISD::ZERO_EXTEND, DL,
+ VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
}
if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
@@ -4442,8 +4602,13 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
if (IsFixedLength) {
assert(Subtarget->useSVEForFixedLengthVectors() &&
"Cannot lower when not using SVE for fixed vectors");
- IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
- MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
+ IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
+ MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
+ } else {
+ MemVT = getContainerForFixedLengthVector(DAG, MemVT);
+ IndexVT = MemVT.changeTypeToInteger();
+ }
InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
StoreVal =
@@ -4452,6 +4617,9 @@ SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
ISD::ANY_EXTEND, DL,
VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
+ Mask = DAG.getNode(
+ ISD::ZERO_EXTEND, DL,
+ VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
} else if (VT.isFloatingPoint()) {
// Handle FP data by casting the data so an integer scatter can be used.
EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
@@ -4593,29 +4761,77 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
return Result;
}
} else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
- assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
- SDValue Lo =
- DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
- DAG.getConstant(0, Dl, MVT::i64));
- SDValue Hi =
- DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
- DAG.getConstant(1, Dl, MVT::i64));
- SDValue Result = DAG.getMemIntrinsicNode(
- AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
- {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
- StoreNode->getMemoryVT(), StoreNode->getMemOperand());
- return Result;
+ return LowerStore128(Op, DAG);
+ } else if (MemVT == MVT::i64x8) {
+ SDValue Value = StoreNode->getValue();
+ assert(Value->getValueType(0) == MVT::i64x8);
+ SDValue Chain = StoreNode->getChain();
+ SDValue Base = StoreNode->getBasePtr();
+ EVT PtrVT = Base.getValueType();
+ for (unsigned i = 0; i < 8; i++) {
+ SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
+ Value, DAG.getConstant(i, Dl, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
+ DAG.getConstant(i * 8, Dl, PtrVT));
+ Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
+ StoreNode->getOriginalAlign());
+ }
+ return Chain;
}
return SDValue();
}
-// Custom lowering for extending v4i8 vector loads.
+/// Lower atomic or volatile 128-bit stores to a single STP instruction.
+SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
+ SelectionDAG &DAG) const {
+ MemSDNode *StoreNode = cast<MemSDNode>(Op);
+ assert(StoreNode->getMemoryVT() == MVT::i128);
+ assert(StoreNode->isVolatile() || StoreNode->isAtomic());
+ assert(!StoreNode->isAtomic() ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
+
+ SDValue Value = StoreNode->getOpcode() == ISD::STORE
+ ? StoreNode->getOperand(1)
+ : StoreNode->getOperand(2);
+ SDLoc DL(Op);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
+ DAG.getConstant(1, DL, MVT::i64));
+ SDValue Result = DAG.getMemIntrinsicNode(
+ AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
+ {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ StoreNode->getMemoryVT(), StoreNode->getMemOperand());
+ return Result;
+}
+
SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
assert(LoadNode && "Expected custom lowering of a load node");
+
+ if (LoadNode->getMemoryVT() == MVT::i64x8) {
+ SmallVector<SDValue, 8> Ops;
+ SDValue Base = LoadNode->getBasePtr();
+ SDValue Chain = LoadNode->getChain();
+ EVT PtrVT = Base.getValueType();
+ for (unsigned i = 0; i < 8; i++) {
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
+ DAG.getConstant(i * 8, DL, PtrVT));
+ SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
+ LoadNode->getPointerInfo(),
+ LoadNode->getOriginalAlign());
+ Ops.push_back(Part);
+ Chain = SDValue(Part.getNode(), 1);
+ }
+ SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
+ return DAG.getMergeValues({Loaded, Chain}, DL);
+ }
+
+ // Custom lowering for extending v4i8 vector loads.
EVT VT = Op->getValueType(0);
assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
@@ -4777,17 +4993,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::UDIV:
return LowerDIV(Op, DAG);
case ISD::SMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
- /*OverrideNEON=*/true);
case ISD::UMIN:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
- /*OverrideNEON=*/true);
case ISD::SMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
- /*OverrideNEON=*/true);
case ISD::UMAX:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
- /*OverrideNEON=*/true);
+ return LowerMinMax(Op, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL:
@@ -4835,6 +5044,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
/*OverrideNEON=*/true);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::ATOMIC_STORE:
+ if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
+ assert(Subtarget->hasLSE2());
+ return LowerStore128(Op, DAG);
+ }
+ return SDValue();
case ISD::STORE:
return LowerSTORE(Op, DAG);
case ISD::MSTORE:
@@ -5025,8 +5240,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
DenseMap<unsigned, SDValue> CopiedRegs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
// At this point, Ins[].VT may already be promoted to i32. To correctly
// handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
@@ -5186,10 +5400,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue = DAG.getExtLoad(
- ExtType, DL, VA.getLocVT(), Chain, FIN,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
- MemVT);
+ ArgValue =
+ DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI), MemVT);
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
@@ -5229,6 +5442,19 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
ArgValue, DAG.getValueType(MVT::i32));
+
+ // i1 arguments are zero-extended to i8 by the caller. Emit a
+ // hint to reflect this.
+ if (Ins[i].isOrigArg()) {
+ Argument *OrigArg = MF.getFunction().getArg(Ins[i].getOrigArgIndex());
+ if (OrigArg->getType()->isIntegerTy(1)) {
+ if (!Ins[i].Flags.isZExt()) {
+ ArgValue = DAG.getNode(AArch64ISD::ASSERT_ZEXT_BOOL, DL,
+ ArgValue.getValueType(), ArgValue);
+ }
+ }
+ }
+
InVals.push_back(ArgValue);
}
}
@@ -5350,13 +5576,11 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
- SDValue Store = DAG.getStore(
- Val.getValue(1), DL, Val, FIN,
- IsWin64
- ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
- GPRIdx,
- (i - FirstVariadicGPR) * 8)
- : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ IsWin64 ? MachinePointerInfo::getFixedStack(
+ MF, GPRIdx, (i - FirstVariadicGPR) * 8)
+ : MachinePointerInfo::getStack(MF, i * 8));
MemOps.push_back(Store);
FIN =
DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
@@ -5383,9 +5607,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
- SDValue Store = DAG.getStore(
- Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
+ SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(MF, i * 16));
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
DAG.getConstant(16, DL, PtrVT));
@@ -5645,10 +5868,8 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
- for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
- UE = DAG.getEntryNode().getNode()->use_end();
- U != UE; ++U)
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+ for (SDNode *U : DAG.getEntryNode().getNode()->uses())
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
@@ -5670,6 +5891,19 @@ bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
}
+// Check if the value is zero-extended from i1 to i8
+static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
+ unsigned SizeInBits = Arg.getValueType().getSizeInBits();
+ if (SizeInBits < 8)
+ return false;
+
+ APInt LowBits(SizeInBits, 0xFF);
+ APInt RequredZero(SizeInBits, 0xFE);
+ KnownBits Bits = DAG.computeKnownBits(Arg, LowBits, 4);
+ bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
+ return ZExtBool;
+}
+
/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
/// and add input and output parameter nodes.
SDValue
@@ -5730,8 +5964,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (IsVarArg) {
// Handle fixed and variable vector arguments differently.
@@ -5868,8 +6101,22 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
case CCValAssign::AExt:
if (Outs[i].ArgVT == MVT::i1) {
// AAPCS requires i1 to be zero-extended to 8-bits by the caller.
- Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
- Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
+ //
+ // Check if we actually have to do this, because the value may
+ // already be zero-extended.
+ //
+ // We cannot just emit a (zext i8 (trunc (assert-zext i8)))
+ // and rely on DAGCombiner to fold this, because the following
+ // (anyext i32) is combined with (zext i8) in DAG.getNode:
+ //
+ // (ext (zext x)) -> (zext x)
+ //
+ // This will give us (zext i32), which we cannot remove, so
+ // try to check this beforehand.
+ if (!checkZExtBool(Arg, DAG)) {
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
+ }
}
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
@@ -5902,14 +6149,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
StoreSize *= NumParts;
}
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
MFI.setStackID(FI, TargetStackID::ScalableVector);
- MachinePointerInfo MPI =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
SDValue SpillSlot = Ptr;
@@ -6004,8 +6250,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
DstAddr = DAG.getFrameIndex(FI, PtrVT);
- DstInfo =
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
+ DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
// Make sure any stack arguments overlapping with where we're storing
// are loaded before this eventual operation. Otherwise they'll be
@@ -6015,8 +6260,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
- DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
- LocMemOffset);
+ DstInfo = MachinePointerInfo::getStack(MF, LocMemOffset);
}
if (Outs[i].Flags.isByVal()) {
@@ -6196,8 +6440,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
- *DAG.getContext());
+ CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
@@ -6274,8 +6517,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
- const MCPhysReg *I =
- TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ const MCPhysReg *I = TRI->getCalleeSavedRegsViaCopy(&MF);
if (I) {
for (; *I; ++I) {
if (AArch64::GPR64RegClass.contains(*I))
@@ -6938,6 +7180,30 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
SDValue In2 = Op.getOperand(1);
EVT SrcVT = In2.getValueType();
+ if (VT.isScalableVector()) {
+ if (VT != SrcVT)
+ return SDValue();
+
+ // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK)
+ //
+ // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU;
+ // maybe useful for copysign operations with mismatched VTs.
+ //
+ // IntVT here is chosen so it's a legal type with the same element width
+ // as the input.
+ EVT IntVT =
+ getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
+ unsigned NumBits = VT.getScalarSizeInBits();
+ SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT);
+ SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT);
+ SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask,
+ getSVESafeBitCast(IntVT, In2, DAG));
+ SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask,
+ getSVESafeBitCast(IntVT, In1, DAG));
+ SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude);
+ return getSVESafeBitCast(VT, IntResult, DAG);
+ }
+
if (SrcVT.bitsLT(VT))
In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
else if (SrcVT.bitsGT(VT))
@@ -7083,6 +7349,56 @@ SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
}
+SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned Opcode = Op.getOpcode();
+ ISD::CondCode CC;
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Wrong instruction");
+ case ISD::SMAX:
+ CC = ISD::SETGT;
+ break;
+ case ISD::SMIN:
+ CC = ISD::SETLT;
+ break;
+ case ISD::UMAX:
+ CC = ISD::SETUGT;
+ break;
+ case ISD::UMIN:
+ CC = ISD::SETULT;
+ break;
+ }
+
+ if (VT.isScalableVector() ||
+ useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("Wrong instruction");
+ case ISD::SMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::SMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::UMAX:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
+ /*OverrideNEON=*/true);
+ case ISD::UMIN:
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
+ /*OverrideNEON=*/true);
+ }
+ }
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+}
+
SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -7255,8 +7571,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
// into (OR (ASR lhs, N-1), 1), which requires less instructions for the
// supported types.
- if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal &&
- CTVal->isOne() && CFVal->isAllOnesValue() &&
+ if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal &&
+ CTVal->isOne() && CFVal->isAllOnes() &&
LHS.getValueType() == TVal.getValueType()) {
EVT VT = LHS.getValueType();
SDValue Shift =
@@ -7269,11 +7585,11 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// If both the TVal and the FVal are constants, see if we can swap them in
// order to for a CSINV or CSINC out of them.
- if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
+ if (CTVal && CFVal && CTVal->isAllOnes() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
- } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
+ } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isZero()) {
std::swap(TVal, FVal);
std::swap(CTVal, CFVal);
CC = ISD::getSetCCInverse(CC, LHS.getValueType());
@@ -7352,7 +7668,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
// FVal, respectively.
ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
- !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
+ !RHSVal->isZero() && !RHSVal->isAllOnes()) {
AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
// Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
// "a != C ? x : a" to avoid materializing C.
@@ -7425,11 +7741,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
SelectionDAG &DAG) const {
-
EVT Ty = Op.getValueType();
auto Idx = Op.getConstantOperandAPInt(2);
- if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
+
+ // This will select to an EXT instruction, which has a maximum immediate
+ // value of 255, hence 2048-bits is the maximum value we can lower.
+ if (Idx.sge(-1) && Idx.slt(2048 / Ty.getVectorElementType().getSizeInBits()))
return Op;
+
return SDValue();
}
@@ -7937,10 +8256,12 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
SDValue Operand, SelectionDAG &DAG,
int &ExtraSteps) {
EVT VT = Operand.getValueType();
- if (ST->hasNEON() &&
- (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
- VT == MVT::f32 || VT == MVT::v1f32 ||
- VT == MVT::v2f32 || VT == MVT::v4f32)) {
+ if ((ST->hasNEON() &&
+ (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
+ VT == MVT::f32 || VT == MVT::v1f32 || VT == MVT::v2f32 ||
+ VT == MVT::v4f32)) ||
+ (ST->hasSVE() &&
+ (VT == MVT::nxv8f16 || VT == MVT::nxv4f32 || VT == MVT::nxv2f64))) {
if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
// For the reciprocal estimates, convergence is quadratic, so the number
// of digits is doubled after each iteration. In ARMv8, the accuracy of
@@ -8173,6 +8494,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
case 'r':
if (VT.isScalableVector())
return std::make_pair(0U, nullptr);
+ if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
+ return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
if (VT.getFixedSizeInBits() == 64)
return std::make_pair(0U, &AArch64::GPR64commonRegClass);
return std::make_pair(0U, &AArch64::GPR32commonRegClass);
@@ -8260,6 +8583,15 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
return Res;
}
+EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
+ llvm::Type *Ty,
+ bool AllowUnknown) const {
+ if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
+ return EVT(MVT::i64x8);
+
+ return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
+}
+
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
void AArch64TargetLowering::LowerAsmOperandForConstraint(
@@ -8618,7 +8950,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
Src.WindowBase *= Src.WindowScale;
}
- // Final sanity check before we try to actually produce a shuffle.
+ // Final check before we try to actually produce a shuffle.
LLVM_DEBUG(for (auto Src
: Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT););
@@ -9250,8 +9582,11 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
} else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
// The lane is incremented by the index of the extract.
// Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
- Lane += V.getConstantOperandVal(1);
- V = V.getOperand(0);
+ auto VecVT = V.getOperand(0).getValueType();
+ if (VecVT.isFixedLengthVector() && VecVT.getFixedSizeInBits() <= 128) {
+ Lane += V.getConstantOperandVal(1);
+ V = V.getOperand(0);
+ }
} else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
// The lane is decremented if we are splatting from the 2nd operand.
// Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
@@ -9265,6 +9600,86 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
}
+// Return true if we can get a new shuffle mask by checking the parameter mask
+// array to test whether every two adjacent mask values are continuous and
+// starting from an even number.
+static bool isWideTypeMask(ArrayRef<int> M, EVT VT,
+ SmallVectorImpl<int> &NewMask) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts % 2 != 0)
+ return false;
+
+ NewMask.clear();
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ int M0 = M[i];
+ int M1 = M[i + 1];
+
+ // If both elements are undef, new mask is undef too.
+ if (M0 == -1 && M1 == -1) {
+ NewMask.push_back(-1);
+ continue;
+ }
+
+ if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
+ NewMask.push_back(M1 / 2);
+ continue;
+ }
+
+ if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
+ NewMask.push_back(M0 / 2);
+ continue;
+ }
+
+ NewMask.clear();
+ return false;
+ }
+
+ assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
+ return true;
+}
+
+// Try to widen element type to get a new mask value for a better permutation
+// sequence, so that we can use NEON shuffle instructions, such as zip1/2,
+// UZP1/2, TRN1/2, REV, INS, etc.
+// For example:
+// shufflevector <4 x i32> %a, <4 x i32> %b,
+// <4 x i32> <i32 6, i32 7, i32 2, i32 3>
+// is equivalent to:
+// shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
+// Finally, we can get:
+// mov v0.d[0], v1.d[1]
+static SDValue tryWidenMaskForShuffle(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT ScalarVT = VT.getVectorElementType();
+ unsigned ElementSize = ScalarVT.getFixedSizeInBits();
+ SDValue V0 = Op.getOperand(0);
+ SDValue V1 = Op.getOperand(1);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // If combining adjacent elements, like two i16's -> i32, two i32's -> i64 ...
+ // We need to make sure the wider element type is legal. Thus, ElementSize
+ // should be not larger than 32 bits, and i1 type should also be excluded.
+ if (ElementSize > 32 || ElementSize == 1)
+ return SDValue();
+
+ SmallVector<int, 8> NewMask;
+ if (isWideTypeMask(Mask, VT, NewMask)) {
+ MVT NewEltVT = VT.isFloatingPoint()
+ ? MVT::getFloatingPointVT(ElementSize * 2)
+ : MVT::getIntegerVT(ElementSize * 2);
+ MVT NewVT = MVT::getVectorVT(NewEltVT, VT.getVectorNumElements() / 2);
+ if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) {
+ V0 = DAG.getBitcast(NewVT, V0);
+ V1 = DAG.getBitcast(NewVT, V1);
+ return DAG.getBitcast(VT,
+ DAG.getVectorShuffle(NewVT, DL, V0, V1, NewMask));
+ }
+ }
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -9412,6 +9827,9 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
DstLaneV);
}
+ if (SDValue NewSD = tryWidenMaskForShuffle(Op, DAG))
+ return NewSD;
+
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
unsigned NumElts = VT.getVectorNumElements();
@@ -9454,9 +9872,10 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+ if (ConstVal->isZero())
+ return SDValue(DAG.getMachineNode(AArch64::PFALSE, dl, VT), 0);
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
- // TODO: Add special case for constant false
}
// The general case of i1. There isn't any natural way to do this,
// so we use some trickery with whilelo.
@@ -10007,7 +10426,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned BitSize = VT.getVectorElementType().getSizeInBits();
APInt Val(BitSize,
Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
- if (Val.isNullValue() || Val.isAllOnesValue())
+ if (Val.isZero() || Val.isAllOnes())
return Op;
}
}
@@ -10311,8 +10730,29 @@ SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
isTypeLegal(Op.getValueType()) &&
"Expected legal scalable vector type!");
- if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
- return Op;
+ if (isTypeLegal(Op.getOperand(0).getValueType())) {
+ unsigned NumOperands = Op->getNumOperands();
+ assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&
+ "Unexpected number of operands in CONCAT_VECTORS");
+
+ if (NumOperands == 2)
+ return Op;
+
+ // Concat each pair of subvectors and pack into the lower half of the array.
+ SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
+ while (ConcatOps.size() > 1) {
+ for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
+ SDValue V1 = ConcatOps[I];
+ SDValue V2 = ConcatOps[I + 1];
+ EVT SubVT = V1.getValueType();
+ EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ ConcatOps[I / 2] =
+ DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
+ }
+ ConcatOps.resize(ConcatOps.size() / 2);
+ }
+ return ConcatOps[0];
+ }
return SDValue();
}
@@ -10432,6 +10872,10 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Size = Op.getValueSizeInBits();
+ // If we don't have legal types yet, do nothing
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
+ return SDValue();
+
if (InVT.isScalableVector()) {
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG))
@@ -10450,6 +10894,18 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
InVT.getSizeInBits() == 128)
return Op;
+ if (useSVEForFixedLengthVectorVT(InVT)) {
+ SDLoc DL(Op);
+
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
+ SDValue NewInVec =
+ convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
+
+ SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
+ NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
+ return convertFromScalableVector(DAG, Op.getValueType(), Splice);
+ }
+
return SDValue();
}
@@ -10465,7 +10921,7 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SDLoc DL(Op);
EVT VT = Op.getValueType();
- if (!isTypeLegal(VT) || !VT.isInteger())
+ if (!isTypeLegal(VT))
return SDValue();
SDValue Vec0 = Op.getOperand(0);
@@ -10475,9 +10931,19 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
return SDValue();
- // Extend elements of smaller vector...
- EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
- SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+ EVT WideVT;
+ SDValue ExtVec;
+
+ if (VT.isFloatingPoint()) {
+ // The InVT type should be legal. We can safely cast the unpacked
+ // subvector from InVT -> VT.
+ WideVT = VT;
+ ExtVec = getSVESafeBitCast(VT, Vec1, DAG);
+ } else {
+ // Extend elements of smaller vector...
+ WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
+ ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
+ }
if (Idx == 0) {
SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
@@ -11085,7 +11551,7 @@ setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
// memVT is `NumVecs * VT`.
Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
EC * NumVecs);
- Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
+ Info.ptrVal = CI.getArgOperand(CI.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
Info.flags = MachineMemOperand::MOStore;
@@ -11123,7 +11589,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors loaded.
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile loads with NEON intrinsics not supported
@@ -11142,14 +11608,14 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_VOID;
// Conservatively set memVT to the entire set of vectors stored.
unsigned NumElts = 0;
- for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
- Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ for (const Value *Arg : I.args()) {
+ Type *ArgTy = Arg->getType();
if (!ArgTy->isVectorTy())
break;
NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
}
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile stores with NEON intrinsics not supported
@@ -11203,9 +11669,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(1);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
- Info.flags = MachineMemOperand::MOLoad;
- if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
- Info.flags |= MachineMemOperand::MONonTemporal;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
return true;
}
case Intrinsic::aarch64_sve_stnt1: {
@@ -11215,9 +11679,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.ptrVal = I.getArgOperand(2);
Info.offset = 0;
Info.align = DL.getABITypeAlign(PtrTy->getElementType());
- Info.flags = MachineMemOperand::MOStore;
- if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
- Info.flags |= MachineMemOperand::MONonTemporal;
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
default:
@@ -11502,7 +11964,7 @@ bool AArch64TargetLowering::shouldSinkOperands(
// can sink them too.
auto Ext1 = cast<Instruction>(I->getOperand(0));
auto Ext2 = cast<Instruction>(I->getOperand(1));
- if (areExtractShuffleVectors(Ext1, Ext2)) {
+ if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
Ops.push_back(&Ext1->getOperandUse(0));
Ops.push_back(&Ext2->getOperandUse(0));
}
@@ -11568,10 +12030,10 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
/// A helper function for determining the number of interleaved accesses we
/// will generate when lowering accesses of the given type.
-unsigned
-AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
- const DataLayout &DL) const {
- return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
+unsigned AArch64TargetLowering::getNumInterleavedAccesses(
+ VectorType *VecTy, const DataLayout &DL, bool UseScalable) const {
+ unsigned VecSize = UseScalable ? Subtarget->getMinSVEVectorSizeInBits() : 128;
+ return std::max<unsigned>(1, (DL.getTypeSizeInBits(VecTy) + 127) / VecSize);
}
MachineMemOperand::Flags
@@ -11583,24 +12045,63 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
}
bool AArch64TargetLowering::isLegalInterleavedAccessType(
- VectorType *VecTy, const DataLayout &DL) const {
+ VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const {
unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
+
+ UseScalable = false;
// Ensure the number of vector elements is greater than 1.
- if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
+ if (NumElements < 2)
return false;
// Ensure the element type is legal.
if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
return false;
+ if (Subtarget->useSVEForFixedLengthVectors() &&
+ (VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
+ (VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
+ isPowerOf2_32(NumElements) && VecSize > 128))) {
+ UseScalable = true;
+ return true;
+ }
+
// Ensure the total vector size is 64 or a multiple of 128. Types larger than
// 128 will be split into multiple interleaved accesses.
return VecSize == 64 || VecSize % 128 == 0;
}
+static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) {
+ if (VTy->getElementType() == Type::getDoubleTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 2);
+
+ if (VTy->getElementType() == Type::getFloatTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 4);
+
+ if (VTy->getElementType() == Type::getBFloatTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 8);
+
+ if (VTy->getElementType() == Type::getHalfTy(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 8);
+
+ if (VTy->getElementType() == Type::getInt64Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 2);
+
+ if (VTy->getElementType() == Type::getInt32Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 4);
+
+ if (VTy->getElementType() == Type::getInt16Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 8);
+
+ if (VTy->getElementType() == Type::getInt8Ty(VTy->getContext()))
+ return ScalableVectorType::get(VTy->getElementType(), 16);
+
+ llvm_unreachable("Cannot handle input vector type");
+}
+
/// Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -11628,10 +12129,12 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
+ bool UseScalable;
+ if (!Subtarget->hasNEON() ||
+ !isLegalInterleavedAccessType(VTy, DL, UseScalable))
return false;
- unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
+ unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
auto *FVTy = cast<FixedVectorType>(VTy);
@@ -11642,48 +12145,84 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
FVTy =
FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
+ // If we're going to generate more than one load, reset the sub-vector type
+ // to something legal.
+ FVTy = FixedVectorType::get(FVTy->getElementType(),
+ FVTy->getNumElements() / NumLoads);
+
+ auto *LDVTy =
+ UseScalable ? cast<VectorType>(getSVEContainerIRType(FVTy)) : FVTy;
+
IRBuilder<> Builder(LI);
// The base address of the load.
Value *BaseAddr = LI->getPointerOperand();
if (NumLoads > 1) {
- // If we're going to generate more than one load, reset the sub-vector type
- // to something legal.
- FVTy = FixedVectorType::get(FVTy->getElementType(),
- FVTy->getNumElements() / NumLoads);
-
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
- FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
- }
-
- Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
- Type *Tys[2] = {FVTy, PtrTy};
- static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
- Intrinsic::aarch64_neon_ld3,
- Intrinsic::aarch64_neon_ld4};
- Function *LdNFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
+ LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
+ }
+
+ Type *PtrTy =
+ UseScalable
+ ? LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())
+ : LDVTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
+ LDVTy->getElementCount());
+
+ static const Intrinsic::ID SVELoadIntrs[3] = {
+ Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
+ Intrinsic::aarch64_sve_ld4_sret};
+ static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
+ Intrinsic::aarch64_neon_ld3,
+ Intrinsic::aarch64_neon_ld4};
+ Function *LdNFunc;
+ if (UseScalable)
+ LdNFunc = Intrinsic::getDeclaration(LI->getModule(),
+ SVELoadIntrs[Factor - 2], {LDVTy});
+ else
+ LdNFunc = Intrinsic::getDeclaration(
+ LI->getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
// replace.
DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
+ Value *PTrue = nullptr;
+ if (UseScalable) {
+ unsigned PgPattern =
+ getSVEPredPatternFromNumElements(FVTy->getNumElements());
+ if (Subtarget->getMinSVEVectorSizeInBits() ==
+ Subtarget->getMaxSVEVectorSizeInBits() &&
+ Subtarget->getMinSVEVectorSizeInBits() == DL.getTypeSizeInBits(FVTy))
+ PgPattern = AArch64SVEPredPattern::all;
+
+ auto *PTruePat =
+ ConstantInt::get(Type::getInt32Ty(LDVTy->getContext()), PgPattern);
+ PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
+ {PTruePat});
+ }
+
for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
- BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
+ BaseAddr = Builder.CreateConstGEP1_32(LDVTy->getElementType(), BaseAddr,
FVTy->getNumElements() * Factor);
- CallInst *LdN = Builder.CreateCall(
- LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
+ CallInst *LdN;
+ if (UseScalable)
+ LdN = Builder.CreateCall(
+ LdNFunc, {PTrue, Builder.CreateBitCast(BaseAddr, PtrTy)}, "ldN");
+ else
+ LdN = Builder.CreateCall(LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy),
+ "ldN");
// Extract and store the sub-vectors returned by the load intrinsic.
for (unsigned i = 0; i < Shuffles.size(); i++) {
@@ -11692,11 +12231,17 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
Value *SubVec = Builder.CreateExtractValue(LdN, Index);
+ if (UseScalable)
+ SubVec = Builder.CreateExtractVector(
+ FVTy, SubVec,
+ ConstantInt::get(Type::getInt64Ty(VTy->getContext()), 0));
+
// Convert the integer vector to pointer vector if the element is pointer.
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
FVTy->getNumElements()));
+
SubVecs[SVI].push_back(SubVec);
}
}
@@ -11755,14 +12300,16 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
+ bool UseScalable;
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
- if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
+ if (!Subtarget->hasNEON() ||
+ !isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
return false;
- unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
+ unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
Value *Op0 = SVI->getOperand(0);
Value *Op1 = SVI->getOperand(1);
@@ -11783,15 +12330,18 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
SubVecTy = FixedVectorType::get(IntTy, LaneLen);
}
+ // If we're going to generate more than one store, reset the lane length
+ // and sub-vector type to something legal.
+ LaneLen /= NumStores;
+ SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
+
+ auto *STVTy = UseScalable ? cast<VectorType>(getSVEContainerIRType(SubVecTy))
+ : SubVecTy;
+
// The base address of the store.
Value *BaseAddr = SI->getPointerOperand();
if (NumStores > 1) {
- // If we're going to generate more than one store, reset the lane length
- // and sub-vector type to something legal.
- LaneLen /= NumStores;
- SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
-
// We will compute the pointer operand of each store from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
@@ -11802,13 +12352,42 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
auto Mask = SVI->getShuffleMask();
- Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
- Type *Tys[2] = {SubVecTy, PtrTy};
- static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
- Intrinsic::aarch64_neon_st3,
- Intrinsic::aarch64_neon_st4};
- Function *StNFunc =
- Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
+ Type *PtrTy =
+ UseScalable
+ ? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
+ : STVTy->getPointerTo(SI->getPointerAddressSpace());
+ Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
+ STVTy->getElementCount());
+
+ static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
+ Intrinsic::aarch64_sve_st3,
+ Intrinsic::aarch64_sve_st4};
+ static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
+ Intrinsic::aarch64_neon_st3,
+ Intrinsic::aarch64_neon_st4};
+ Function *StNFunc;
+ if (UseScalable)
+ StNFunc = Intrinsic::getDeclaration(SI->getModule(),
+ SVEStoreIntrs[Factor - 2], {STVTy});
+ else
+ StNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});
+
+ Value *PTrue = nullptr;
+ if (UseScalable) {
+ unsigned PgPattern =
+ getSVEPredPatternFromNumElements(SubVecTy->getNumElements());
+ if (Subtarget->getMinSVEVectorSizeInBits() ==
+ Subtarget->getMaxSVEVectorSizeInBits() &&
+ Subtarget->getMinSVEVectorSizeInBits() ==
+ DL.getTypeSizeInBits(SubVecTy))
+ PgPattern = AArch64SVEPredPattern::all;
+
+ auto *PTruePat =
+ ConstantInt::get(Type::getInt32Ty(STVTy->getContext()), PgPattern);
+ PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue, {PredTy},
+ {PTruePat});
+ }
for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
@@ -11816,10 +12395,11 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// Split the shufflevector operands into sub vectors for the new stN call.
for (unsigned i = 0; i < Factor; i++) {
+ Value *Shuffle;
unsigned IdxI = StoreCount * LaneLen * Factor + i;
if (Mask[IdxI] >= 0) {
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
+ Shuffle = Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0));
} else {
unsigned StartMask = 0;
for (unsigned j = 1; j < LaneLen; j++) {
@@ -11834,11 +12414,21 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// In the case of all undefs we're defaulting to using elems from 0
// Note: StartMask cannot be negative, it's checked in
// isReInterleaveMask
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
+ Shuffle = Builder.CreateShuffleVector(
+ Op0, Op1, createSequentialMask(StartMask, LaneLen, 0));
}
+
+ if (UseScalable)
+ Shuffle = Builder.CreateInsertVector(
+ STVTy, UndefValue::get(STVTy), Shuffle,
+ ConstantInt::get(Type::getInt64Ty(STVTy->getContext()), 0));
+
+ Ops.push_back(Shuffle);
}
+ if (UseScalable)
+ Ops.push_back(PTrue);
+
// If we generating more than one store, we compute the base address of
// subsequent stores as an offset from the previous.
if (StoreCount > 0)
@@ -11905,8 +12495,7 @@ SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
EVT AArch64TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
- bool CanImplicitFloat =
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
+ bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
@@ -11923,8 +12512,8 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
};
if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
- AlignmentIsAcceptable(MVT::v2i64, Align(16)))
- return MVT::v2i64;
+ AlignmentIsAcceptable(MVT::v16i8, Align(16)))
+ return MVT::v16i8;
if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
return MVT::f128;
if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
@@ -11936,8 +12525,7 @@ EVT AArch64TargetLowering::getOptimalMemOpType(
LLT AArch64TargetLowering::getOptimalMemOpLLT(
const MemOp &Op, const AttributeList &FuncAttributes) const {
- bool CanImplicitFloat =
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
+ bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
// Only use AdvSIMD to implement memset of 32-byte and above. It would have
@@ -11981,6 +12569,33 @@ bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
return IsLegal;
}
+// Return false to prevent folding
+// (mul (add x, c1), c2) -> (add (mul x, c2), c2*c1) in DAGCombine,
+// if the folding leads to worse code.
+bool AArch64TargetLowering::isMulAddWithConstProfitable(
+ const SDValue &AddNode, const SDValue &ConstNode) const {
+ // Let the DAGCombiner decide for vector types and large types.
+ const EVT VT = AddNode.getValueType();
+ if (VT.isVector() || VT.getScalarSizeInBits() > 64)
+ return true;
+
+ // It is worse if c1 is legal add immediate, while c1*c2 is not
+ // and has to be composed by at least two instructions.
+ const ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
+ const ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
+ const int64_t C1 = C1Node->getSExtValue();
+ const APInt C1C2 = C1Node->getAPIntValue() * C2Node->getAPIntValue();
+ if (!isLegalAddImmediate(C1) || isLegalAddImmediate(C1C2.getSExtValue()))
+ return true;
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
+ if (Insn.size() > 1)
+ return false;
+
+ // Default to true and let the DAGCombiner decide.
+ return true;
+}
+
// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
// immediates is the same as for an add or a sub.
bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
@@ -12100,7 +12715,8 @@ bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
bool AArch64TargetLowering::generateFMAsInMachineCombiner(
EVT VT, CodeGenOpt::Level OptLevel) const {
- return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
+ return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector() &&
+ !useSVEForFixedLengthVectorVT(VT);
}
const MCPhysReg *
@@ -12348,7 +12964,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
// fold (sdiv X, pow2)
EVT VT = N->getValueType(0);
if ((VT != MVT::i32 && VT != MVT::i64) ||
- !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
SDLoc DL(N);
@@ -12505,7 +13121,7 @@ static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
DAG.getConstant(0, DL, MVT::i64));
- std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
+ std::vector<int> ShuffleMask(TargetType.getVectorNumElements());
SDValue VectorShuffleNode =
DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
@@ -12547,12 +13163,44 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalizeOps())
return SDValue();
+ // Canonicalize X*(Y+1) -> X*Y+X and (X+1)*Y -> X*Y+Y,
+ // and in MachineCombiner pass, add+mul will be combined into madd.
+ // Similarly, X*(1-Y) -> X - X*Y and (1-Y)*X -> X - Y*X.
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue MulOper;
+ unsigned AddSubOpc;
+
+ auto IsAddSubWith1 = [&](SDValue V) -> bool {
+ AddSubOpc = V->getOpcode();
+ if ((AddSubOpc == ISD::ADD || AddSubOpc == ISD::SUB) && V->hasOneUse()) {
+ SDValue Opnd = V->getOperand(1);
+ MulOper = V->getOperand(0);
+ if (AddSubOpc == ISD::SUB)
+ std::swap(Opnd, MulOper);
+ if (auto C = dyn_cast<ConstantSDNode>(Opnd))
+ return C->isOne();
+ }
+ return false;
+ };
+
+ if (IsAddSubWith1(N0)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N1, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N1, MulVal);
+ }
+
+ if (IsAddSubWith1(N1)) {
+ SDValue MulVal = DAG.getNode(ISD::MUL, DL, VT, N0, MulOper);
+ return DAG.getNode(AddSubOpc, DL, VT, N0, MulVal);
+ }
+
// The below optimizations require a constant RHS.
- if (!isa<ConstantSDNode>(N->getOperand(1)))
+ if (!isa<ConstantSDNode>(N1))
return SDValue();
- SDValue N0 = N->getOperand(0);
- ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *C = cast<ConstantSDNode>(N1);
const APInt &ConstValue = C->getAPIntValue();
// Allow the scaling to be folded into the `cnt` instruction by preventing
@@ -12593,7 +13241,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// and shift+add+shift.
APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
- unsigned ShiftAmt, AddSubOpc;
+ unsigned ShiftAmt;
// Is the shifted value the LHS operand of the add/sub?
bool ShiftValUseIsN0 = true;
// Do we need to negate the result?
@@ -12630,8 +13278,6 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
DAG.getConstant(ShiftAmt, DL, MVT::i64));
@@ -12757,7 +13403,8 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
uint32_t FloatBits = FloatTy.getSizeInBits();
- if (FloatBits != 32 && FloatBits != 64)
+ if (FloatBits != 32 && FloatBits != 64 &&
+ (FloatBits != 16 || !Subtarget->hasFullFP16()))
return SDValue();
MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
@@ -12776,27 +13423,20 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
if (C == -1 || C == 0 || C > Bits)
return SDValue();
- MVT ResTy;
- unsigned NumLanes = Op.getValueType().getVectorNumElements();
- switch (NumLanes) {
- default:
- return SDValue();
- case 2:
- ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
- break;
- case 4:
- ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
- break;
- }
-
- if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
+ EVT ResTy = Op.getValueType().changeVectorElementTypeToInteger();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(ResTy))
return SDValue();
- assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
- "Illegal vector type after legalization");
+ if (N->getOpcode() == ISD::FP_TO_SINT_SAT ||
+ N->getOpcode() == ISD::FP_TO_UINT_SAT) {
+ EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+ if (SatVT.getScalarSizeInBits() != IntBits)
+ return SDValue();
+ }
SDLoc DL(N);
- bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+ bool IsSigned = (N->getOpcode() == ISD::FP_TO_SINT ||
+ N->getOpcode() == ISD::FP_TO_SINT_SAT);
unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
: Intrinsic::aarch64_neon_vcvtfp2fxu;
SDValue FixConv =
@@ -13097,6 +13737,9 @@ static SDValue performSVEAndCombine(SDNode *N,
SDLoc DL(N);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
+ if (!C)
+ return SDValue();
+
uint64_t ExtVal = C->getZExtValue();
// If the mask is fully covered by the unpack, we don't need to push
@@ -13289,7 +13932,7 @@ performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
unsigned ElemSizeInBits = VT.getScalarSizeInBits();
APInt CAsAPInt(ElemSizeInBits, C);
- if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
+ if (CAsAPInt != APInt::getAllOnes(ElemSizeInBits))
return SDValue();
ExtendOpA = Xor.getOperand(0);
@@ -13475,7 +14118,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
- if (N0 == N1 && VT.getVectorNumElements() == 2) {
+ if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
assert(VT.getScalarSizeInBits() == 64);
return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
DAG.getConstant(0, dl, MVT::i64));
@@ -13490,7 +14133,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
// becomes
// (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
- if (N1Opc != ISD::BITCAST)
+ if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
return SDValue();
SDValue RHS = N1->getOperand(0);
MVT RHSTy = RHS.getValueType().getSimpleVT();
@@ -13509,6 +14152,48 @@ static SDValue performConcatVectorsCombine(SDNode *N,
RHS));
}
+static SDValue
+performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ uint64_t IdxVal = N->getConstantOperandVal(2);
+ EVT VecVT = Vec.getValueType();
+ EVT SubVT = SubVec.getValueType();
+
+ // Only do this for legal fixed vector types.
+ if (!VecVT.isFixedLengthVector() ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+
+ // Ignore widening patterns.
+ if (IdxVal == 0 && Vec.isUndef())
+ return SDValue();
+
+ // Subvector must be half the width and an "aligned" insertion.
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
+ (IdxVal != 0 && IdxVal != NumSubElts))
+ return SDValue();
+
+ // Fold insert_subvector -> concat_vectors
+ // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
+ // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ if (IdxVal == 0) {
+ Lo = SubVec;
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DAG.getVectorIdxConstant(NumSubElts, DL));
+ } else {
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ Hi = SubVec;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
+}
+
static SDValue tryCombineFixedPointConvert(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -13611,6 +14296,8 @@ static bool isEssentiallyExtractHighSubvector(SDValue N) {
N = N.getOperand(0);
if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
+ if (N.getOperand(0).getValueType().isScalableVector())
+ return false;
return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
N.getOperand(0).getValueType().getVectorNumElements() / 2;
}
@@ -13687,7 +14374,7 @@ static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
SetCCInfo.Info.AArch64.CC =
AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
}
- return TValue->isOne() && FValue->isNullValue();
+ return TValue->isOne() && FValue->isZero();
}
// Returns true if Op is setcc or zext of setcc.
@@ -13765,7 +14452,7 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
- if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
+ if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isZero())
return SDValue();
SDValue Op1 = LHS->getOperand(0);
@@ -14237,20 +14924,20 @@ static bool isAllActivePredicate(SDValue N) {
// or unpredicated operation, which potentially allows better isel (perhaps
// using immediate forms) or relaxing register reuse requirements.
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
- SelectionDAG &DAG,
- bool UnpredOp = false) {
+ SelectionDAG &DAG, bool UnpredOp = false,
+ bool SwapOperands = false) {
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
SDValue Pg = N->getOperand(1);
+ SDValue Op1 = N->getOperand(SwapOperands ? 3 : 2);
+ SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
// ISD way to specify an all active predicate.
if (isAllActivePredicate(Pg)) {
if (UnpredOp)
- return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2),
- N->getOperand(3));
- else
- return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg,
- N->getOperand(2), N->getOperand(3));
+ return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
+
+ return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg, Op1, Op2);
}
// FUTURE: SplatVector(true)
@@ -14372,6 +15059,8 @@ static SDValue performIntrinsicCombine(SDNode *N,
return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
case Intrinsic::aarch64_sve_sub:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
+ case Intrinsic::aarch64_sve_subr:
+ return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
case Intrinsic::aarch64_sve_and:
return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
case Intrinsic::aarch64_sve_bic:
@@ -14927,6 +15616,18 @@ static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) {
+ assert((N->getOpcode() == AArch64ISD::UUNPKHI ||
+ N->getOpcode() == AArch64ISD::UUNPKLO) &&
+ "Unexpected Opcode!");
+
+ // uunpklo/hi undef -> undef
+ if (N->getOperand(0).isUndef())
+ return DAG.getUNDEF(N->getValueType(0));
+
+ return SDValue();
+}
+
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
@@ -15169,11 +15870,10 @@ static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
ExtOpCode != ISD::ANY_EXTEND)
return SDValue();
SDValue Orig = Ext->getOperand(0);
- if (Store->getMemoryVT() != Orig->getValueType(0))
+ if (Store->getMemoryVT() != Orig.getValueType())
return SDValue();
return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
- Store->getBasePtr(), Store->getPointerInfo(),
- Store->getAlign());
+ Store->getBasePtr(), Store->getMemOperand());
}
return SDValue();
@@ -15844,7 +16544,7 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
MVT::v2i32, MVT::v4i32, MVT::v2i64}),
VT.getSimpleVT().SimpleTy) &&
ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
- SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
+ SplatLHSVal.isOne() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
unsigned NumElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(
@@ -16544,6 +17244,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performIntToFpCombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
return performFpToIntCombine(N, DAG, DCI, Subtarget);
case ISD::FDIV:
return performFDivCombine(N, DAG, DCI, Subtarget);
@@ -16565,6 +17267,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performVectorTruncateCombine(N, DCI, DAG);
case ISD::CONCAT_VECTORS:
return performConcatVectorsCombine(N, DCI, DAG);
+ case ISD::INSERT_SUBVECTOR:
+ return performInsertSubvectorCombine(N, DCI, DAG);
case ISD::SELECT:
return performSelectCombine(N, DCI);
case ISD::VSELECT:
@@ -16592,6 +17296,9 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performNVCASTCombine(N);
case AArch64ISD::SPLICE:
return performSpliceCombine(N, DAG);
+ case AArch64ISD::UUNPKLO:
+ case AArch64ISD::UUNPKHI:
+ return performUnpackCombine(N, DAG);
case AArch64ISD::UZP1:
return performUzpCombine(N, DAG);
case AArch64ISD::SETCC_MERGE_ZERO:
@@ -17212,18 +17919,22 @@ void AArch64TargetLowering::ReplaceNodeResults(
return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
// Let normal code take care of it by not adding anything to Results.
return;
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
assert(SDValue(N, 0).getValueType() == MVT::i128 &&
"unexpected load's value type");
- LoadSDNode *LoadNode = cast<LoadSDNode>(N);
- if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
- // Non-volatile loads are optimized later in AArch64's load/store
+ MemSDNode *LoadNode = cast<MemSDNode>(N);
+ if ((!LoadNode->isVolatile() && !LoadNode->isAtomic()) ||
+ LoadNode->getMemoryVT() != MVT::i128) {
+ // Non-volatile or atomic loads are optimized later in AArch64's load/store
// optimizer.
return;
}
@@ -17314,12 +18025,37 @@ AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
return TargetLoweringBase::getPreferredVectorAction(VT);
}
+// In v8.4a, ldp and stp instructions are guaranteed to be single-copy atomic
+// provided the address is 16-byte aligned.
+bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
+ if (!Subtarget->hasLSE2())
+ return false;
+
+ if (auto LI = dyn_cast<LoadInst>(I))
+ return LI->getType()->getPrimitiveSizeInBits() == 128 &&
+ LI->getAlignment() >= 16;
+
+ if (auto SI = dyn_cast<StoreInst>(I))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
+ SI->getAlignment() >= 16;
+
+ return false;
+}
+
+bool AArch64TargetLowering::shouldInsertFencesForAtomic(
+ const Instruction *I) const {
+ return isOpSuitableForLDPSTP(I);
+}
+
// Loads and stores less than 128-bits are already atomic; ones above that
// are doomed anyway, so defer to the default libcall and blame the OS when
// things go wrong.
bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- return Size == 128;
+ if (Size != 128)
+ return false;
+
+ return !isOpSuitableForLDPSTP(SI);
}
// Loads and stores less than 128-bits are already atomic; ones above that
@@ -17328,7 +18064,19 @@ bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
+
+ if (Size != 128 || isOpSuitableForLDPSTP(LI))
+ return AtomicExpansionKind::None;
+
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on the
+ // stack and close enough to the spill slot, this can lead to a situation
+ // where the monitor always gets cleared and the atomic operation can never
+ // succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+
+ return AtomicExpansionKind::LLSC;
}
// For the real atomic operations, we have ldxr/stxr up to 128 bits,
@@ -17531,7 +18279,7 @@ void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::Win64);
- F->addAttribute(1, Attribute::AttrKind::InReg);
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
}
return;
}
@@ -17657,7 +18405,7 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
+ bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
return OptSize && !VT.isVector();
}
@@ -17759,42 +18507,20 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal fixed length vector!");
- int PgPattern;
- switch (VT.getVectorNumElements()) {
- default:
- llvm_unreachable("unexpected element count for SVE predicate");
- case 1:
- PgPattern = AArch64SVEPredPattern::vl1;
- break;
- case 2:
- PgPattern = AArch64SVEPredPattern::vl2;
- break;
- case 4:
- PgPattern = AArch64SVEPredPattern::vl4;
- break;
- case 8:
- PgPattern = AArch64SVEPredPattern::vl8;
- break;
- case 16:
- PgPattern = AArch64SVEPredPattern::vl16;
- break;
- case 32:
- PgPattern = AArch64SVEPredPattern::vl32;
- break;
- case 64:
- PgPattern = AArch64SVEPredPattern::vl64;
- break;
- case 128:
- PgPattern = AArch64SVEPredPattern::vl128;
- break;
- case 256:
- PgPattern = AArch64SVEPredPattern::vl256;
- break;
- }
+ unsigned PgPattern =
+ getSVEPredPatternFromNumElements(VT.getVectorNumElements());
+ assert(PgPattern && "Unexpected element count for SVE predicate");
- // TODO: For vectors that are exactly getMaxSVEVectorSizeInBits big, we can
- // use AArch64SVEPredPattern::all, which can enable the use of unpredicated
+ // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
+ // AArch64SVEPredPattern::all, which can enable the use of unpredicated
// variants of instructions when available.
+ const auto &Subtarget =
+ static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+ unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+ unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+ if (MaxSVESize && MinSVESize == MaxSVESize &&
+ MaxSVESize == VT.getSizeInBits())
+ PgPattern = AArch64SVEPredPattern::all;
MVT MaskVT;
switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
@@ -17817,8 +18543,7 @@ static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
break;
}
- return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
- DAG.getTargetConstant(PgPattern, DL, MVT::i64));
+ return getPTrue(DAG, DL, MaskVT, PgPattern);
}
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
@@ -17898,9 +18623,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
SDValue Op, SelectionDAG &DAG) const {
auto Load = cast<MaskedLoadSDNode>(Op);
- if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD)
- return SDValue();
-
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 386e1c2d8400..392e22b68366 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -330,6 +330,10 @@ enum NodeType : unsigned {
// Cast between vectors of the same element type but differ in length.
REINTERPRET_CAST,
+ // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
+ LS64_BUILD,
+ LS64_EXTRACT,
+
LD1_MERGE_ZERO,
LD1S_MERGE_ZERO,
LDNF1_MERGE_ZERO,
@@ -401,6 +405,10 @@ enum NodeType : unsigned {
SSTNT1_PRED,
SSTNT1_INDEX_PRED,
+ // Asserts that a function argument (i32) is zero-extended to i8 by
+ // the caller
+ ASSERT_ZEXT_BOOL,
+
// Strict (exception-raising) floating point comparison
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPE,
@@ -591,6 +599,9 @@ public:
bool isLegalAddImmediate(int64_t) const override;
bool isLegalICmpImmediate(int64_t) const override;
+ bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const override;
+
bool shouldConsiderGEPOffsetSplit() const override;
EVT getOptimalMemOpType(const MemOp &Op,
@@ -653,6 +664,9 @@ public:
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
+ bool isOpSuitableForLDPSTP(const Instruction *I) const;
+ bool shouldInsertFencesForAtomic(const Instruction *I) const override;
+
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
@@ -697,12 +711,11 @@ public:
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const override {
+ const MachineFunction &MF) const override {
// Do not merge to float value size (128 bytes) if no implicit
// float attribute is set.
- bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
+ bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
if (NoFloat)
return (MemVT.getSizeInBits() <= 64);
@@ -730,7 +743,9 @@ public:
if (!VT.isVector())
return hasAndNotCompare(Y);
- return VT.getSizeInBits() >= 64; // vector 'bic'
+ TypeSize TS = VT.getSizeInBits();
+ // TODO: We should be able to use bic/bif too for SVE.
+ return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
}
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
@@ -786,13 +801,13 @@ public:
/// Returns true if \p VecTy is a legal interleaved access type. This
/// function checks the vector element type and the overall width of the
/// vector.
- bool isLegalInterleavedAccessType(VectorType *VecTy,
- const DataLayout &DL) const;
+ bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
+ bool &UseScalable) const;
/// Returns the number of interleaved accesses that will be generated when
/// lowering accesses of the given type.
- unsigned getNumInterleavedAccesses(VectorType *VecTy,
- const DataLayout &DL) const;
+ unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
+ bool UseScalable) const;
MachineMemOperand::Flags getTargetMMOFlags(
const Instruction &I) const override;
@@ -824,6 +839,9 @@ public:
bool isAllActivePredicate(SDValue N) const;
EVT getPromotedVTForPredicate(EVT VT) const;
+ EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
+ bool AllowUnknown = false) const override;
+
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -854,6 +872,7 @@ private:
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
@@ -959,10 +978,12 @@ private:
SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 9bc2539e95f0..cd4bc8a61a8a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -721,6 +721,7 @@ def tvecshiftR64 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm1_64Operand;
}
+def Imm0_0Operand : AsmImmRange<0, 0>;
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_3Operand : AsmImmRange<0, 3>;
def Imm0_7Operand : AsmImmRange<0, 7>;
@@ -845,13 +846,13 @@ def logical_imm64_not : Operand<i64> {
let ParserMatchClass = LogicalImm64NotOperand;
}
-// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
+// immXX_0_65535 predicates - True if the immediate is in the range [0,65535].
let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
-def i32_imm0_65535 : Operand<i32>, TImmLeaf<i32, [{
+def timm32_0_65535 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 65536;
}]>;
-def i64_imm0_65535 : Operand<i64>, TImmLeaf<i64, [{
+def timm64_0_65535 : Operand<i64>, TImmLeaf<i64, [{
return ((uint64_t)Imm) < 65536;
}]>;
}
@@ -955,8 +956,8 @@ def imm0_3 : Operand<i64>, ImmLeaf<i64, [{
let ParserMatchClass = Imm0_3Operand;
}
-// imm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
-def imm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
+// timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7]
+def timm32_0_7 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 8;
}]> {
let ParserMatchClass = Imm0_7Operand;
@@ -1215,6 +1216,18 @@ def fpimm0 : FPImmLeaf<fAny, [{
return Imm.isExactlyValue(+0.0);
}]>;
+def fpimm_half : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+0.5);
+}]>;
+
+def fpimm_one : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+1.0);
+}]>;
+
+def fpimm_two : FPImmLeaf<fAny, [{
+ return Imm.isExactlyValue(+2.0);
+}]>;
+
def gi_fpimm16 : GICustomOperandRenderer<"renderFPImm16">,
GISDNodeXFormEquiv<fpimm16XForm>;
def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">,
@@ -1241,12 +1254,15 @@ multiclass VectorIndex<ValueType ty, AsmOperandClass mc, code pred> {
def _timm : AsmVectorIndexOpnd<ty, mc>, TImmLeaf<ty, pred>;
}
+def VectorIndex0Operand : AsmVectorIndex<0, 0>;
def VectorIndex1Operand : AsmVectorIndex<1, 1>;
def VectorIndexBOperand : AsmVectorIndex<0, 15>;
def VectorIndexHOperand : AsmVectorIndex<0, 7>;
def VectorIndexSOperand : AsmVectorIndex<0, 3>;
def VectorIndexDOperand : AsmVectorIndex<0, 1>;
+defm VectorIndex0 : VectorIndex<i64, VectorIndex0Operand,
+ [{ return ((uint64_t)Imm) == 0; }]>;
defm VectorIndex1 : VectorIndex<i64, VectorIndex1Operand,
[{ return ((uint64_t)Imm) == 1; }]>;
defm VectorIndexB : VectorIndex<i64, VectorIndexBOperand,
@@ -1291,6 +1307,37 @@ defm sve_elm_idx_extdup_q
: VectorIndex<i64, SVEVectorIndexExtDupQOperand,
[{ return ((uint64_t)Imm) < 4; }]>;
+def sme_elm_idx0_0 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) == 0;
+}]> {
+ let ParserMatchClass = Imm0_0Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_1 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 1;
+}]> {
+ let ParserMatchClass = Imm0_1Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_3 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 3;
+}]> {
+ let ParserMatchClass = Imm0_3Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_7 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 7;
+}]> {
+ let ParserMatchClass = Imm0_7Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+def sme_elm_idx0_15 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) <= 15;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+ let PrintMethod = "printMatrixIndex";
+}
+
// 8-bit immediate for AdvSIMD where 64-bit values of the form:
// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
// are encoded as the eight bit value 'abcdefgh'.
@@ -1379,7 +1426,7 @@ class TMSystemINoOperand<bits<4> CRm, string asm, list<dag> pattern>
// System instructions for exit from transactions
class TMSystemException<bits<3> op1, string asm, list<dag> pattern>
- : I<(outs), (ins i64_imm0_65535:$imm), asm, "\t$imm", "", pattern>,
+ : I<(outs), (ins timm64_0_65535:$imm), asm, "\t$imm", "", pattern>,
Sched<[WriteSys]> {
bits<16> imm;
let Inst{31-24} = 0b11010100;
@@ -1703,7 +1750,7 @@ class AuthReturn<bits<3> op, bits<1> M, string asm>
let mayLoad = 1 in
class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
- string operands, string cstr, Operand opr>
+ string operands, string cstr>
: I<oops, iops, asm, operands, cstr, []>, Sched<[]> {
bits<10> offset;
bits<5> Rn;
@@ -1725,11 +1772,11 @@ class BaseAuthLoad<bit M, bit W, dag oops, dag iops, string asm,
multiclass AuthLoad<bit M, string asm, Operand opr> {
def indexed : BaseAuthLoad<M, 0, (outs GPR64:$Rt),
(ins GPR64sp:$Rn, opr:$offset),
- asm, "\t$Rt, [$Rn, $offset]", "", opr>;
+ asm, "\t$Rt, [$Rn, $offset]", "">;
def writeback : BaseAuthLoad<M, 1, (outs GPR64sp:$wback, GPR64:$Rt),
(ins GPR64sp:$Rn, opr:$offset),
asm, "\t$Rt, [$Rn, $offset]!",
- "$Rn = $wback,@earlyclobber $wback", opr>;
+ "$Rn = $wback,@earlyclobber $wback">;
def : InstAlias<asm # "\t$Rt, [$Rn]",
(!cast<Instruction>(NAME # "indexed") GPR64:$Rt, GPR64sp:$Rn, 0)>;
@@ -1965,10 +2012,10 @@ class OneXRegData<bits<3> opc, string asm, SDPatternOperator node>
let Inst{31} = 1;
}
-class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm>
- : I<(outs GPR64:$Rd), (ins GPR64:$src, GPR64sp:$Rn), asm, "\t$Rd, $Rn",
- "$Rd = $src",
- []>,
+class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm,
+ SDPatternOperator op>
+ : I<(outs GPR64:$dst), (ins GPR64:$Rd, GPR64sp:$Rn), asm, "\t$Rd, $Rn",
+ "$dst = $Rd", [(set GPR64:$dst, (op GPR64:$Rd, opcode, GPR64sp:$Rn))]>,
Sched<[WriteI, ReadI]> {
bits<5> Rd;
bits<5> Rn;
@@ -1979,9 +2026,11 @@ class SignAuthOneData<bits<3> opcode_prefix, bits<2> opcode, string asm>
let Inst{4-0} = Rd;
}
-class SignAuthZero<bits<3> opcode_prefix, bits<2> opcode, string asm>
- : I<(outs GPR64:$Rd), (ins GPR64:$src), asm, "\t$Rd", "$Rd = $src",
- []>, Sched<[]> {
+class SignAuthZero<bits<3> opcode_prefix, bits<2> opcode, string asm,
+ SDPatternOperator op>
+ : I<(outs GPR64:$dst), (ins GPR64:$Rd), asm, "\t$Rd", "$dst = $Rd",
+ [(set GPR64:$dst, (op GPR64:$Rd, opcode, (i64 0)))]>,
+ Sched<[]> {
bits<5> Rd;
let Inst{31-15} = 0b11011010110000010;
let Inst{14-12} = opcode_prefix;
@@ -2193,16 +2242,14 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
let Inst{4-0} = Rd;
}
-multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
+multiclass MulAccum<bit isSub, string asm> {
// MADD/MSUB generation is decided by MachineCombiner.cpp
- def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
- [/*(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))*/]>,
+ def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm, []>,
Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 0;
}
- def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
- [/*(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))*/]>,
+ def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm, []>,
Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> {
let Inst{31} = 1;
}
@@ -3421,8 +3468,8 @@ def ro64 : ROAddrMode<ro_Windexed64, ro_Xindexed64, ro_Wextend64, ro_Xextend64>;
def ro128 : ROAddrMode<ro_Windexed128, ro_Xindexed128, ro_Wextend128,
ro_Xextend128>;
-class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3450,7 +3497,7 @@ class ROInstAlias<string asm, DAGOperand regtype, Instruction INST>
multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10 in
- def roW : LoadStore8RO<sz, V, opc, regtype, asm,
+ def roW : LoadStore8RO<sz, V, opc, asm,
(outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
[(set (Ty regtype:$Rt),
@@ -3461,7 +3508,7 @@ multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10 in
- def roX : LoadStore8RO<sz, V, opc, regtype, asm,
+ def roX : LoadStore8RO<sz, V, opc, asm,
(outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
[(set (Ty regtype:$Rt),
@@ -3477,30 +3524,30 @@ multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store8RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10 in
- def roW : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore8RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend8:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10 in
- def roX : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore8RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend8:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3524,7 +3571,7 @@ class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10 in
- def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore16RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
@@ -3534,7 +3581,7 @@ multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10 in
- def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore16RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
@@ -3549,30 +3596,30 @@ multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store16RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10 in
- def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore16RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend16:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10 in
- def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore16RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend16:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3596,7 +3643,7 @@ class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10 in
- def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore32RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
@@ -3606,7 +3653,7 @@ multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10 in
- def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore32RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
@@ -3621,30 +3668,30 @@ multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store32RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10 in
- def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore32RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend32:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10 in
- def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore32RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend32:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3668,7 +3715,7 @@ class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore64RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
@@ -3678,7 +3725,7 @@ multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore64RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
@@ -3693,30 +3740,30 @@ multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Store64RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator storeop> {
let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore64RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
ro_Wextend64:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore64RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
[(storeop (Ty regtype:$Rt),
(ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
ro_Xextend64:$extend))]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, dag ins, dag outs, list<dag> pat>
+class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, string asm, dag ins,
+ dag outs, list<dag> pat>
: I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
bits<5> Rt;
bits<5> Rn;
@@ -3740,7 +3787,7 @@ class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
string asm, ValueType Ty, SDPatternOperator loadop> {
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roW : LoadStore128RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
@@ -3750,7 +3797,7 @@ multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
- def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ def roX : LoadStore128RO<sz, V, opc, asm, (outs regtype:$Rt),
(ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
[(set (Ty regtype:$Rt),
(loadop (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
@@ -3763,20 +3810,20 @@ multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
}
multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, DAGOperand regtype,
- string asm, ValueType Ty, SDPatternOperator storeop> {
+ string asm> {
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ def roW : LoadStore128RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
[]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b0;
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
- def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ def roX : LoadStore128RO<sz, V, opc, asm, (outs),
(ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
[]>,
- Sched<[WriteSTIdx, ReadAdrBase]> {
+ Sched<[WriteSTIdx, ReadST, ReadAdrBase]> {
let Inst{13} = 0b1;
}
@@ -4466,7 +4513,7 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
- : I<(outs), (ins i32_imm0_65535:$imm), asm, "\t$imm", "", []>,
+ : I<(outs), (ins timm32_0_65535:$imm), asm, "\t$imm", "", []>,
Sched<[WriteSys]> {
bits<16> imm;
let Inst{31-24} = 0b11010100;
@@ -5309,7 +5356,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -5332,7 +5379,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -5351,7 +5398,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDThreeSameVectorPseudo<RegisterOperand regtype, list<dag> pattern>
: Pseudo<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), pattern>,
- Sched<[WriteV]>;
+ Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]>;
multiclass SIMDLogicalThreeVectorPseudo<SDPatternOperator OpNode> {
def v8i8 : BaseSIMDThreeSameVectorPseudo<V64,
@@ -5704,7 +5751,7 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -5729,7 +5776,7 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind #
"|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -5775,7 +5822,7 @@ class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
: I<(outs V128:$Rd), (ins regtype:$Rn), asm,
"{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
"|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6032,7 +6079,7 @@ multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
[(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
}
-
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand inreg, RegisterOperand outreg,
string asm, string outkind, string inkind,
@@ -6040,7 +6087,7 @@ class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind #
"|" # outkind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6055,6 +6102,7 @@ class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand inreg, RegisterOperand outreg,
string asm, string outkind, string inkind,
@@ -6062,7 +6110,7 @@ class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
: I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind #
"|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6114,7 +6162,7 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
"{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
"|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
[(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6212,7 +6260,7 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
list<dag> pattern>
: I<(outs outtype:$Rd), (ins intype:$Rn), asm,
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6227,13 +6275,14 @@ class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
let Inst{4-0} = Rd;
}
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
RegisterOperand outtype, RegisterOperand intype,
string asm, string VdTy, string VnTy,
list<dag> pattern>
: I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
!strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -6296,7 +6345,7 @@ class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
: I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
"|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6322,7 +6371,7 @@ class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
: I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
"{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
"|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6662,7 +6711,7 @@ class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
"|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
[(set (vty regtype:$Rd),
(AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
- Sched<[WriteV]> {
+ Sched<[!if(size, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6696,7 +6745,7 @@ class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
"|" # kind # "\t$Rd, $Rn, $Rm}", "",
[(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
- Sched<[WriteV]> {
+ Sched<[!if(!eq(regtype, V128), WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6752,7 +6801,7 @@ class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
list<dag> pattern>
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
"\t$Rd, $Rn, $Rm", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6772,7 +6821,7 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
dag oops, dag iops, string asm,
list<dag> pattern>
: I<oops, iops, asm, "\t$Rd, $Rn, $Rm", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6815,8 +6864,7 @@ multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
def v1i16 : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
}
-multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
+multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm> {
def v1i32: BaseSIMDThreeScalarTied<U, 0b10, R, opc, (outs FPR32:$dst),
(ins FPR32:$Rd, FPR32:$Rn, FPR32:$Rm),
asm, []>;
@@ -6826,16 +6874,19 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
}
multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
+ SDPatternOperator OpNode = null_frag,
+ Predicate pred = HasNEON> {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ let Predicates = [pred] in {
def NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
[(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
def NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
[(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
- let Predicates = [HasNEON, HasFullFP16] in {
+ }
+ let Predicates = [pred, HasFullFP16] in {
def NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
[(set (f16 FPR16:$Rd), (OpNode (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]>;
- } // Predicates = [HasNEON, HasFullFP16]
+ }
}
def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -6863,7 +6914,7 @@ class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
dag oops, dag iops, string asm, string cstr, list<dag> pat>
: I<oops, iops, asm,
"\t$Rd, $Rn, $Rm", cstr, pat>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -6916,7 +6967,7 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
string asm, list<dag> pat>
: I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
"\t$Rd, $Rn", "", pat>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -6938,7 +6989,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
string asm, list<dag> pat>
: I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
"\t$Rd, $Rn", "$Rd = $dst", pat>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -6958,7 +7009,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
RegisterClass regtype, string asm, string zero>
: I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
"\t$Rd, $Rn, #" # zero, "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -6977,7 +7028,7 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
: I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
[(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-17} = 0b011111100110000;
@@ -7025,10 +7076,13 @@ multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
(!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
}
-multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm,
+ Predicate pred = HasNEON> {
+ let Predicates = [pred] in {
def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
- let Predicates = [HasNEON, HasFullFP16] in {
+ }
+ let Predicates = [pred, HasFullFP16] in {
def v1f16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
}
}
@@ -7096,7 +7150,7 @@ class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
string asm, string kind>
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31-30} = 0b01;
@@ -7136,7 +7190,7 @@ class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
string asm, string kind, list<dag> pattern>
: I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
"{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -7202,7 +7256,7 @@ multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
string operands, string constraints, list<dag> pattern>
: I<outs, ins, asm, operands, constraints, pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -7228,7 +7282,7 @@ class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype,
class SIMDDupFromElement<bit Q, string dstkind, string srckind,
ValueType vectype, ValueType insreg,
RegisterOperand vecreg, Operand idxtype,
- ValueType elttype, SDNode OpNode>
+ SDNode OpNode>
: BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup",
"{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" #
"|" # dstkind # "\t$Rd, $Rn$idx}", "",
@@ -7239,7 +7293,7 @@ class SIMDDupFromElement<bit Q, string dstkind, string srckind,
class SIMDDup64FromElement
: SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128,
- VectorIndexD, i64, AArch64duplane64> {
+ VectorIndexD, AArch64duplane64> {
bits<1> idx;
let Inst{20} = idx;
let Inst{19-16} = 0b1000;
@@ -7248,7 +7302,7 @@ class SIMDDup64FromElement
class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
RegisterOperand vecreg>
: SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg,
- VectorIndexS, i64, AArch64duplane32> {
+ VectorIndexS, AArch64duplane32> {
bits<2> idx;
let Inst{20-19} = idx;
let Inst{18-16} = 0b100;
@@ -7257,7 +7311,7 @@ class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
RegisterOperand vecreg>
: SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg,
- VectorIndexH, i64, AArch64duplane16> {
+ VectorIndexH, AArch64duplane16> {
bits<3> idx;
let Inst{20-18} = idx;
let Inst{17-16} = 0b10;
@@ -7266,7 +7320,7 @@ class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
class SIMDDup8FromElement<bit Q, string size, ValueType vectype,
RegisterOperand vecreg>
: SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg,
- VectorIndexB, i64, AArch64duplane8> {
+ VectorIndexB, AArch64duplane8> {
bits<4> idx;
let Inst{20-17} = idx;
let Inst{16} = 1;
@@ -7295,6 +7349,25 @@ class SIMDMovAlias<string asm, string size, Instruction inst,
(inst regtype:$dst, V128:$src, idxtype:$idx)>;
multiclass SMov {
+ // SMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
+ // streaming mode.
+ let Predicates = [HasNEONorStreamingSVE] in {
+ def vi8to32_idx0 : SIMDSMov<0, ".b", GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00001;
+ }
+ def vi8to64_idx0 : SIMDSMov<1, ".b", GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b00001;
+ }
+ def vi16to32_idx0 : SIMDSMov<0, ".h", GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00010;
+ }
+ def vi16to64_idx0 : SIMDSMov<1, ".h", GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b00010;
+ }
+ def vi32to64_idx0 : SIMDSMov<1, ".s", GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b00100;
+ }
+ }
def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> {
bits<4> idx;
let Inst{20-17} = idx;
@@ -7323,6 +7396,28 @@ multiclass SMov {
}
multiclass UMov {
+ // UMOV with vector index of 0 are legal in Scalable Matrix Extension (SME)
+ // streaming mode.
+ let Predicates = [HasNEONorStreamingSVE] in {
+ def vi8_idx0 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00001;
+ }
+ def vi16_idx0 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00010;
+ }
+ def vi32_idx0 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndex0> {
+ let Inst{20-16} = 0b00100;
+ }
+ def vi64_idx0 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndex0> {
+ let Inst{20-16} = 0b01000;
+ }
+ def : SIMDMovAlias<"mov", ".s",
+ !cast<Instruction>(NAME # vi32_idx0),
+ GPR32, VectorIndex0>;
+ def : SIMDMovAlias<"mov", ".d",
+ !cast<Instruction>(NAME # vi64_idx0),
+ GPR64, VectorIndex0>;
+ }
def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> {
bits<4> idx;
let Inst{20-17} = idx;
@@ -7473,7 +7568,7 @@ class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
RegisterOperand listtype, string asm, string kind>
: I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Vd;
bits<5> Vn;
bits<5> Vm;
@@ -7494,7 +7589,7 @@ class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectyp
RegisterOperand listtype, string asm, string kind>
: I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
"\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Vd;
bits<5> Vn;
bits<5> Vm;
@@ -7609,11 +7704,11 @@ multiclass SIMDTableLookupTied<bit op, string asm> {
//----------------------------------------------------------------------------
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
- string kind, Operand idxtype>
- : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov",
+ string asm, string kind, Operand idxtype>
+ : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), asm,
"{\t$dst, $src" # kind # "$idx" #
"|\t$dst, $src$idx}", "", []>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> dst;
bits<5> src;
let Inst{31-21} = 0b01011110000;
@@ -7630,22 +7725,22 @@ class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
multiclass SIMDScalarCPY<string asm> {
- def i8 : BaseSIMDScalarCPY<FPR8, V128, ".b", VectorIndexB> {
+ def i8 : BaseSIMDScalarCPY<FPR8, V128, asm, ".b", VectorIndexB> {
bits<4> idx;
let Inst{20-17} = idx;
let Inst{16} = 1;
}
- def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> {
+ def i16 : BaseSIMDScalarCPY<FPR16, V128, asm, ".h", VectorIndexH> {
bits<3> idx;
let Inst{20-18} = idx;
let Inst{17-16} = 0b10;
}
- def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> {
+ def i32 : BaseSIMDScalarCPY<FPR32, V128, asm, ".s", VectorIndexS> {
bits<2> idx;
let Inst{20-19} = idx;
let Inst{18-16} = 0b100;
}
- def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> {
+ def i64 : BaseSIMDScalarCPY<FPR64, V128, asm, ".d", VectorIndexD> {
bits<1> idx;
let Inst{20} = idx;
let Inst{19-16} = 0b1000;
@@ -7678,7 +7773,7 @@ class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
string asm, string op_string,
string cstr, list<dag> pattern>
: I<oops, iops, asm, op_string, cstr, pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<8> imm8;
let Inst{31} = 0;
@@ -7848,7 +7943,7 @@ class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
asm,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -7878,7 +7973,7 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
(ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
"|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -7971,7 +8066,7 @@ class SIMDBF16MLALIndex<bit Q, string asm, SDPatternOperator OpNode>
(v8bf16
(AArch64duplane16 (v8bf16 V128_lo:$Rm),
VectorIndexH:$idx)))))]>,
- Sched<[WriteV]> {
+ Sched<[WriteVq]> {
bits<5> Rd;
bits<5> Rn;
bits<4> Rm;
@@ -8892,7 +8987,7 @@ class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
Operand immtype, string asm, list<dag> pattern>
: I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
asm, "\t$Rd, $Rn, $imm", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<7> imm;
@@ -8912,7 +9007,7 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
Operand immtype, string asm, list<dag> pattern>
: I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[WriteVd]> {
bits<5> Rd;
bits<5> Rn;
bits<7> imm;
@@ -9076,7 +9171,7 @@ class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
: I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -9099,7 +9194,7 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
: I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
"|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
let Inst{31} = 0;
@@ -10646,7 +10741,7 @@ class BaseSIMDThreeSameVectorComplex<bit Q, bit U, bits<2> size, bits<3> opcode,
: I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10720,7 +10815,7 @@ class BaseSIMDThreeSameVectorTiedComplex<bit Q, bit U, bits<2> size,
(ins regtype:$Rd, regtype:$Rn, regtype:$Rm, rottype:$rot), asm,
"{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $rot"
"|" # kind # "\t$Rd, $Rn, $Rm, $rot}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10796,7 +10891,7 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
"{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind #
"$idx, $rot" # "|" # apple_kind #
"\t$Rd, $Rn, $Rm$idx, $rot}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
+ Sched<[!if(Q, WriteVq, WriteVd)]> {
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10822,8 +10917,8 @@ class BaseSIMDIndexedTiedComplex<bit Q, bit U, bit Scalar, bits<2> size,
// The complex instructions index by pairs of elements, so the VectorIndexes
// don't match the lane types, and the index bits are different to the other
// classes.
-multiclass SIMDIndexedTiedComplexHSD<bit U, bit opc1, bit opc2, Operand rottype,
- string asm, SDPatternOperator OpNode> {
+multiclass SIMDIndexedTiedComplexHSD<bit opc1, bit opc2, Operand rottype,
+ string asm> {
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def v4f16_indexed : BaseSIMDIndexedTiedComplex<0, 1, 0, 0b01, opc1, opc2, V64,
V64, V128, VectorIndexD, rottype, asm, ".4h", ".4h",
@@ -10861,7 +10956,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
list<dag> pat>
: I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
- Sched<[WriteV]>{
+ Sched<[WriteVq]>{
bits<5> Rd;
bits<5> Rn;
let Inst{31-16} = 0b0100111000101000;
@@ -10887,7 +10982,7 @@ class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
: I<oops, iops, asm,
"{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
"|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
- Sched<[WriteV]>{
+ Sched<[WriteVq]>{
bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
@@ -10927,7 +11022,7 @@ class SHA2OpInst<bits<4> opc, string asm, string kind,
list<dag> pat>
: I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
"|" # kind # "\t$Rd, $Rn}", cstr, pat>,
- Sched<[WriteV]>{
+ Sched<[WriteVq]>{
bits<5> Rd;
bits<5> Rn;
let Inst{31-16} = 0b0101111000101000;
@@ -10950,7 +11045,7 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
// Armv8.2-A Crypto extensions
class BaseCryptoV82<dag oops, dag iops, string asm, string asmops, string cst,
list<dag> pattern>
- : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteV]> {
+ : I <oops, iops, asm, asmops, cst, pattern>, Sched<[WriteVq]> {
bits<5> Vd;
bits<5> Vn;
let Inst{31-25} = 0b1100111;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index b03d421d3e6d..f8f8ee3f1e6c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1112,8 +1112,8 @@ bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
// The first operand can be a frame index where we'd normally expect a
// register.
assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
@@ -1155,8 +1155,7 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME: In order to convert CmpValue to 0 or 1
- CmpValue = MI.getOperand(2).getImm() != 0;
+ CmpValue = MI.getOperand(2).getImm();
return true;
case AArch64::ANDSWri:
case AArch64::ANDSXri:
@@ -1165,14 +1164,9 @@ bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
CmpMask = ~0;
- // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
- // while the type of CmpValue is int. When converting uint64_t to int,
- // the high 32 bits of uint64_t will be lost.
- // In fact it causes a bug in spec2006-483.xalancbmk
- // CmpValue is only used to compare with zero in OptimizeCompareInstr
CmpValue = AArch64_AM::decodeLogicalImmediate(
MI.getOperand(2).getImm(),
- MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
+ MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
return true;
}
@@ -1433,8 +1427,8 @@ bool AArch64InstrInfo::optimizePTestInstr(
/// instruction.
/// Only comparison with zero is supported.
bool AArch64InstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue, const MachineRegisterInfo *MRI) const {
assert(CmpInstr.getParent());
assert(MRI);
@@ -1462,10 +1456,6 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
- // Continue only if we have a "ri" where immediate is zero.
- // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
- // function.
- assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
if (SrcReg2 != 0)
return false;
@@ -1473,9 +1463,10 @@ bool AArch64InstrInfo::optimizeCompareInstr(
if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
return false;
- if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
+ if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
return true;
- return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
+ return (CmpValue == 0 || CmpValue == 1) &&
+ removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
}
/// Get opcode of S version of Instr.
@@ -2099,10 +2090,8 @@ bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
default:
break;
case TargetOpcode::COPY: {
- // FPR64 copies will by lowered to ORR.16b
Register DstReg = MI.getOperand(0).getReg();
- return (AArch64::FPR64RegClass.contains(DstReg) ||
- AArch64::FPR128RegClass.contains(DstReg));
+ return AArch64::FPR128RegClass.contains(DstReg);
}
case AArch64::ORRv16i8:
if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
@@ -2274,32 +2263,35 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::STNPSi:
case AArch64::LDG:
case AArch64::STGPi:
+
case AArch64::LD1B_IMM:
- case AArch64::LD1H_IMM:
- case AArch64::LD1W_IMM:
- case AArch64::LD1D_IMM:
- case AArch64::ST1B_IMM:
- case AArch64::ST1H_IMM:
- case AArch64::ST1W_IMM:
- case AArch64::ST1D_IMM:
case AArch64::LD1B_H_IMM:
+ case AArch64::LD1B_S_IMM:
+ case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_H_IMM:
+ case AArch64::LD1SB_S_IMM:
+ case AArch64::LD1SB_D_IMM:
+ case AArch64::LD1H_IMM:
case AArch64::LD1H_S_IMM:
+ case AArch64::LD1H_D_IMM:
case AArch64::LD1SH_S_IMM:
+ case AArch64::LD1SH_D_IMM:
+ case AArch64::LD1W_IMM:
case AArch64::LD1W_D_IMM:
case AArch64::LD1SW_D_IMM:
+ case AArch64::LD1D_IMM:
+
+ case AArch64::ST1B_IMM:
case AArch64::ST1B_H_IMM:
- case AArch64::ST1H_S_IMM:
- case AArch64::ST1W_D_IMM:
- case AArch64::LD1B_S_IMM:
- case AArch64::LD1SB_S_IMM:
- case AArch64::LD1H_D_IMM:
- case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
- case AArch64::ST1H_D_IMM:
- case AArch64::LD1B_D_IMM:
- case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
+ case AArch64::ST1H_IMM:
+ case AArch64::ST1H_S_IMM:
+ case AArch64::ST1H_D_IMM:
+ case AArch64::ST1W_IMM:
+ case AArch64::ST1W_D_IMM:
+ case AArch64::ST1D_IMM:
+
case AArch64::LD1RB_IMM:
case AArch64::LD1RB_H_IMM:
case AArch64::LD1RB_S_IMM:
@@ -2316,6 +2308,32 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LD1RW_D_IMM:
case AArch64::LD1RSW_IMM:
case AArch64::LD1RD_IMM:
+
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDNT1D_ZRI:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STNT1D_ZRI:
+
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1SB_D_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1SH_D_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1SW_D_IMM:
+ case AArch64::LDNF1D_IMM:
return 3;
case AArch64::ADDG:
case AArch64::STGOffset:
@@ -2866,10 +2884,22 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1H_IMM:
case AArch64::LD1W_IMM:
case AArch64::LD1D_IMM:
+ case AArch64::LDNT1B_ZRI:
+ case AArch64::LDNT1H_ZRI:
+ case AArch64::LDNT1W_ZRI:
+ case AArch64::LDNT1D_ZRI:
case AArch64::ST1B_IMM:
case AArch64::ST1H_IMM:
case AArch64::ST1W_IMM:
case AArch64::ST1D_IMM:
+ case AArch64::STNT1B_ZRI:
+ case AArch64::STNT1H_ZRI:
+ case AArch64::STNT1W_ZRI:
+ case AArch64::STNT1D_ZRI:
+ case AArch64::LDNF1B_IMM:
+ case AArch64::LDNF1H_IMM:
+ case AArch64::LDNF1W_IMM:
+ case AArch64::LDNF1D_IMM:
// A full vectors worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(16);
@@ -2886,6 +2916,12 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::ST1B_H_IMM:
case AArch64::ST1H_S_IMM:
case AArch64::ST1W_D_IMM:
+ case AArch64::LDNF1B_H_IMM:
+ case AArch64::LDNF1SB_H_IMM:
+ case AArch64::LDNF1H_S_IMM:
+ case AArch64::LDNF1SH_S_IMM:
+ case AArch64::LDNF1W_D_IMM:
+ case AArch64::LDNF1SW_D_IMM:
// A half vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(8);
@@ -2899,6 +2935,10 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1SH_D_IMM:
case AArch64::ST1B_S_IMM:
case AArch64::ST1H_D_IMM:
+ case AArch64::LDNF1B_S_IMM:
+ case AArch64::LDNF1SB_S_IMM:
+ case AArch64::LDNF1H_D_IMM:
+ case AArch64::LDNF1SH_D_IMM:
// A quarter vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(4);
@@ -2909,6 +2949,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LD1B_D_IMM:
case AArch64::LD1SB_D_IMM:
case AArch64::ST1B_D_IMM:
+ case AArch64::LDNF1B_D_IMM:
+ case AArch64::LDNF1SB_D_IMM:
// A eighth vector worth of data
// Width = mbytes * elements
Scale = TypeSize::Scalable(2);
@@ -3503,77 +3545,37 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR64RegClass.contains(DestReg) &&
AArch64::FPR64RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR32RegClass.contains(DestReg) &&
AArch64::FPR32RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR16RegClass.contains(DestReg) &&
AArch64::FPR16RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
- &AArch64::FPR32RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ DestReg =
+ RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
+ SrcReg =
+ RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
if (AArch64::FPR8RegClass.contains(DestReg) &&
AArch64::FPR8RegClass.contains(SrcReg)) {
- if (Subtarget.hasNEON()) {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- } else {
- DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
- &AArch64::FPR32RegClass);
- SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
+ DestReg =
+ RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
+ SrcReg =
+ RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
return;
}
@@ -4339,6 +4341,10 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
case AArch64::ST1Twov1d:
case AArch64::ST1Threev1d:
case AArch64::ST1Fourv1d:
+ case AArch64::ST1i8:
+ case AArch64::ST1i16:
+ case AArch64::ST1i32:
+ case AArch64::ST1i64:
case AArch64::IRG:
case AArch64::IRGstack:
case AArch64::STGloop:
@@ -4911,6 +4917,55 @@ static bool getFMAPatterns(MachineInstr &Root,
return Found;
}
+static bool getFMULPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ MachineBasicBlock &MBB = *Root.getParent();
+ bool Found = false;
+
+ auto Match = [&](unsigned Opcode, int Operand,
+ MachineCombinerPattern Pattern) -> bool {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ MachineOperand &MO = Root.getOperand(Operand);
+ MachineInstr *MI = nullptr;
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
+ MI = MRI.getUniqueVRegDef(MO.getReg());
+ if (MI && MI->getOpcode() == Opcode) {
+ Patterns.push_back(Pattern);
+ return true;
+ }
+ return false;
+ };
+
+ typedef MachineCombinerPattern MCP;
+
+ switch (Root.getOpcode()) {
+ default:
+ return false;
+ case AArch64::FMULv2f32:
+ Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
+ break;
+ case AArch64::FMULv2f64:
+ Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
+ Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
+ break;
+ case AArch64::FMULv4f16:
+ Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
+ break;
+ case AArch64::FMULv4f32:
+ Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
+ Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
+ break;
+ case AArch64::FMULv8f16:
+ Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
+ Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
+ break;
+ }
+
+ return Found;
+}
+
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
@@ -4974,6 +5029,16 @@ bool AArch64InstrInfo::isThroughputPattern(
case MachineCombinerPattern::FMLSv2f64_OP2:
case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
case MachineCombinerPattern::FMLSv4f32_OP2:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2:
case MachineCombinerPattern::MULADDv8i8_OP1:
case MachineCombinerPattern::MULADDv8i8_OP2:
case MachineCombinerPattern::MULADDv16i8_OP1:
@@ -5030,6 +5095,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
if (getMaddPatterns(Root, Patterns))
return true;
// Floating point patterns
+ if (getFMULPatterns(Root, Patterns))
+ return true;
if (getFMAPatterns(Root, Patterns))
return true;
@@ -5118,6 +5185,42 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
return MUL;
}
+/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
+static MachineInstr *
+genIndexedMultiply(MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ unsigned IdxDupOp, unsigned MulOpc,
+ const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
+ assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
+ "Invalid index of FMUL operand");
+
+ MachineFunction &MF = *Root.getMF();
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+
+ MachineInstr *Dup =
+ MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
+
+ Register DupSrcReg = Dup->getOperand(1).getReg();
+ MRI.clearKillFlags(DupSrcReg);
+ MRI.constrainRegClass(DupSrcReg, RC);
+
+ unsigned DupSrcLane = Dup->getOperand(2).getImm();
+
+ unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
+ MachineOperand &MulOp = Root.getOperand(IdxMulOp);
+
+ Register ResultReg = Root.getOperand(0).getReg();
+
+ MachineInstrBuilder MIB;
+ MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg)
+ .add(MulOp)
+ .addReg(DupSrcReg)
+ .addImm(DupSrcLane);
+
+ InsInstrs.push_back(MIB);
+ return &Root;
+}
+
/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
/// instructions.
///
@@ -5329,15 +5432,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
uint64_t UImm = SignExtend64(Imm, BitSize);
uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
- }
+ if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ return;
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
case MachineCombinerPattern::MULSUBW_OP1:
@@ -5420,15 +5523,15 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
uint64_t UImm = SignExtend64(-Imm, BitSize);
uint64_t Encoding;
- if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- MachineInstrBuilder MIB1 =
- BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
- .addReg(ZeroReg)
- .addImm(Encoding);
- InsInstrs.push_back(MIB1);
- InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
- MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
- }
+ if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
+ return;
+ MachineInstrBuilder MIB1 =
+ BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
+ .addReg(ZeroReg)
+ .addImm(Encoding);
+ InsInstrs.push_back(MIB1);
+ InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+ MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
break;
}
@@ -6076,12 +6179,50 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
}
break;
}
+ case MachineCombinerPattern::FMULv2i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv2i64_indexed_OP1:
+ case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv4i32_indexed_OP1:
+ case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
+ &AArch64::FPR128RegClass, MRI);
+ break;
+ }
+ case MachineCombinerPattern::FMULv8i16_indexed_OP1:
+ case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
+ unsigned IdxDupOp =
+ (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
+ genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
+ &AArch64::FPR128_loRegClass, MRI);
+ break;
+ }
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
- // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
- // CodeGen/AArch64/urem-seteq-nonzero.ll.
- // assert(MUL && "MUL was never set");
- DelInstrs.push_back(MUL);
+ if (MUL)
+ DelInstrs.push_back(MUL);
DelInstrs.push_back(&Root);
}
@@ -6624,13 +6765,8 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- const std::vector<MCCFIInstruction> &CFIInstructions =
- RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
- if (MBBI->isCFIInstruction()) {
- unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
- MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ if (MBBI->isCFIInstruction())
CFICount++;
- }
MBBI++;
}
@@ -7212,7 +7348,8 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
.setMIFlags(MachineInstr::FrameSetup);
// If v8.3a features are available we can replace a RET instruction by
- // RETAA or RETAB and omit the AUT instructions
+ // RETAA or RETAB and omit the AUT instructions. In this case the
+ // DW_CFA_AARCH64_negate_ra_state can't be emitted.
if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
MBBAUT->getOpcode() == AArch64::RET) {
BuildMI(MBB, MBBAUT, DL,
@@ -7225,6 +7362,11 @@ static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
: AArch64::AUTIBSP))
.setMIFlag(MachineInstr::FrameDestroy);
+ unsigned CFIIndexAuth =
+ MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
+ BuildMI(MBB, MBBAUT, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndexAuth)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
}
}
@@ -7401,7 +7543,11 @@ MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
unsigned Reg = findRegisterToSaveLRTo(C);
assert(Reg != 0 && "No callee-saved register available?");
- // Save and restore LR from that register.
+ // LR has to be a live in so that we can save it.
+ if (!MBB.isLiveIn(AArch64::LR))
+ MBB.addLiveIn(AArch64::LR);
+
+ // Save and restore LR from Reg.
Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
.addReg(AArch64::XZR)
.addReg(AArch64::LR)
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index e25189e409a3..b2f9e82a7e8b 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -227,12 +227,12 @@ public:
/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
/// Return true if the comparison instruction can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
/// optimizeCompareInstr - Convert the instruction supplying the argument to
/// the comparison into one that sets the zero bit in the flags register.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
bool optimizeCondBranch(MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 682cec361728..db8e0c5dac4a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -27,6 +27,21 @@ def HasV8_6a : Predicate<"Subtarget->hasV8_6aOps()">,
AssemblerPredicate<(all_of HasV8_6aOps), "armv8.6a">;
def HasV8_7a : Predicate<"Subtarget->hasV8_7aOps()">,
AssemblerPredicate<(all_of HasV8_7aOps), "armv8.7a">;
+def HasV9_0a : Predicate<"Subtarget->hasV9_0aOps()">,
+ AssemblerPredicate<(all_of HasV9_0aOps), "armv9-a">;
+def HasV9_1a : Predicate<"Subtarget->hasV9_1aOps()">,
+ AssemblerPredicate<(all_of HasV9_1aOps), "armv9.1a">;
+def HasV9_2a : Predicate<"Subtarget->hasV9_2aOps()">,
+ AssemblerPredicate<(all_of HasV9_2aOps), "armv9.2a">;
+def HasV8_0r : Predicate<"Subtarget->hasV8_0rOps()">,
+ AssemblerPredicate<(all_of HasV8_0rOps), "armv8-r">;
+
+def HasEL2VMSA : Predicate<"Subtarget->hasEL2VMSA()">,
+ AssemblerPredicate<(all_of FeatureEL2VMSA), "el2vmsa">;
+
+def HasEL3 : Predicate<"Subtarget->hasEL3()">,
+ AssemblerPredicate<(all_of FeatureEL3), "el3">;
+
def HasVH : Predicate<"Subtarget->hasVH()">,
AssemblerPredicate<(all_of FeatureVH), "vh">;
@@ -63,9 +78,6 @@ def HasAM : Predicate<"Subtarget->hasAM()">,
def HasSEL2 : Predicate<"Subtarget->hasSEL2()">,
AssemblerPredicate<(all_of FeatureSEL2), "sel2">;
-def HasPMU : Predicate<"Subtarget->hasPMU()">,
- AssemblerPredicate<(all_of FeaturePMU), "pmu">;
-
def HasTLB_RMI : Predicate<"Subtarget->hasTLB_RMI()">,
AssemblerPredicate<(all_of FeatureTLB_RMI), "tlb-rmi">;
@@ -128,6 +140,24 @@ def HasSMEF64 : Predicate<"Subtarget->hasSMEF64()">,
AssemblerPredicate<(all_of FeatureSMEF64), "sme-f64">;
def HasSMEI64 : Predicate<"Subtarget->hasSMEI64()">,
AssemblerPredicate<(all_of FeatureSMEI64), "sme-i64">;
+def HasStreamingSVE : Predicate<"Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(all_of FeatureStreamingSVE), "streaming-sve">;
+// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
+// they should be enabled if either has been specified.
+def HasSVEorStreamingSVE
+ : Predicate<"Subtarget->hasSVE() || Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(any_of FeatureSVE, FeatureStreamingSVE),
+ "streaming-sve or sve">;
+def HasSVE2orStreamingSVE
+ : Predicate<"Subtarget->hasSVE2() || Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(any_of FeatureSVE2, FeatureStreamingSVE),
+ "streaming-sve or sve2">;
+// A subset of NEON instructions are legal in Streaming SVE execution mode,
+// they should be enabled if either has been specified.
+def HasNEONorStreamingSVE
+ : Predicate<"Subtarget->hasNEON() || Subtarget->hasStreamingSVE()">,
+ AssemblerPredicate<(any_of FeatureNEON, FeatureStreamingSVE),
+ "streaming-sve or neon">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<(all_of FeatureRCPC), "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
@@ -180,6 +210,8 @@ def UseNegativeImmediates
: Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
"NegativeImmediates">;
+def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
+
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
@@ -673,40 +705,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in {
// removed, along with the AArch64Wrapper node.
let AddedComplexity = 10 in
-def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
- [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
+def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
+ [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
Sched<[WriteLDAdr]>;
// The MOVaddr instruction should match only when the add is not folded
// into a load or store address.
def MOVaddr
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
tglobaladdr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrJT
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
tjumptable:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrCP
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
tconstpool:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrBA
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
tblockaddress:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrTLS
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
tglobaltlsaddr:$low))]>,
Sched<[WriteAdrAdr]>;
def MOVaddrEXT
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
+ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
texternalsym:$low))]>,
Sched<[WriteAdrAdr]>;
// Normally AArch64addlow either gets folded into a following ldr/str,
@@ -714,8 +746,8 @@ def MOVaddrEXT
// might appear without either of them, so allow lowering it into a plain
// add.
def ADDlowTLS
- : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low),
- [(set GPR64:$dst, (AArch64addlow GPR64:$src,
+ : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
+ [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
tglobaltlsaddr:$low))]>,
Sched<[WriteAdr]>;
@@ -855,7 +887,7 @@ defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
}
// ARMv8.6-A BFloat
-let Predicates = [HasBF16] in {
+let Predicates = [HasNEON, HasBF16] in {
defm BFDOT : SIMDThreeSameVectorBFDot<1, "bfdot">;
defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
def BFMMLA : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
@@ -865,7 +897,6 @@ def BFMLALBIdx : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
def BFMLALTIdx : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
def BFCVTN : SIMD_BFCVTN;
def BFCVTN2 : SIMD_BFCVTN2;
-def BFCVT : BF16ToSinglePrecision<"bfcvt">;
// Vector-scalar BFDOT:
// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
@@ -885,6 +916,10 @@ def : Pat<(v2f32 (int_aarch64_neon_bfdot
VectorIndexS:$idx)>;
}
+let Predicates = [HasNEONorStreamingSVE, HasBF16] in {
+def BFCVT : BF16ToSinglePrecision<"bfcvt">;
+}
+
// ARMv8.6A AArch64 matrix multiplication
let Predicates = [HasMatMulInt8] in {
def SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
@@ -958,6 +993,15 @@ def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
+class EOR3_pattern<ValueType VecTy>
+ : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
+ (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
+
+def : EOR3_pattern<v16i8>;
+def : EOR3_pattern<v8i16>;
+def : EOR3_pattern<v4i32>;
+def : EOR3_pattern<v2i64>;
+
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
@@ -1034,8 +1078,7 @@ defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
"fcmla", null_frag>;
defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
"fcadd", null_frag>;
-defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla",
- null_frag>;
+defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
@@ -1172,23 +1215,25 @@ let Predicates = [HasPAuth] in {
def : InstAlias<"autib1716", (AUTIB1716), 1>;
def : InstAlias<"xpaclri", (XPACLRI), 1>;
- multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm> {
- def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia")>;
- def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib")>;
- def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da")>;
- def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db")>;
- def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza")>;
- def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza")>;
- def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb")>;
- def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb")>;
+ multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
+ SDPatternOperator op> {
+ def IA : SignAuthOneData<prefix, 0b00, !strconcat(asm, "ia"), op>;
+ def IB : SignAuthOneData<prefix, 0b01, !strconcat(asm, "ib"), op>;
+ def DA : SignAuthOneData<prefix, 0b10, !strconcat(asm, "da"), op>;
+ def DB : SignAuthOneData<prefix, 0b11, !strconcat(asm, "db"), op>;
+ def IZA : SignAuthZero<prefix_z, 0b00, !strconcat(asm, "iza"), op>;
+ def DZA : SignAuthZero<prefix_z, 0b10, !strconcat(asm, "dza"), op>;
+ def IZB : SignAuthZero<prefix_z, 0b01, !strconcat(asm, "izb"), op>;
+ def DZB : SignAuthZero<prefix_z, 0b11, !strconcat(asm, "dzb"), op>;
}
- defm PAC : SignAuth<0b000, 0b010, "pac">;
- defm AUT : SignAuth<0b001, 0b011, "aut">;
+ defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
+ defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
def XPACI : ClearAuth<0, "xpaci">;
def XPACD : ClearAuth<1, "xpacd">;
- def PACGA : SignAuthTwoOperand<0b1100, "pacga", null_frag>;
+
+ def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
// Combined Instructions
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@@ -1272,6 +1317,7 @@ def : InstAlias<"clrex", (CLREX 0xf)>;
def : InstAlias<"isb", (ISB 0xf)>;
def : InstAlias<"ssbb", (DSB 0)>;
def : InstAlias<"pssbb", (DSB 4)>;
+def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
def MRS : MRSI;
def MSR : MSRI;
@@ -1325,7 +1371,7 @@ def TSTART : TMSystemI<0b0000, "tstart",
def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
def TCANCEL : TMSystemException<0b011, "tcancel",
- [(int_aarch64_tcancel i64_imm0_65535:$imm)]>;
+ [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
let mayLoad = 0;
@@ -1344,12 +1390,12 @@ let PostEncoderMethod = "fixMOVZ" in
defm MOVZ : MoveImmediate<0b10, "movz">;
// First group of aliases covers an implicit "lsl #0".
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, i32_imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, i32_imm0_65535:$imm, 0), 0>;
-def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, i32_imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, i32_imm0_65535:$imm, 0)>;
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
+def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
@@ -1620,8 +1666,8 @@ def : ShiftAlias<"rorv", RORVXr, GPR64>;
// Multiply-add
let AddedComplexity = 5 in {
-defm MADD : MulAccum<0, "madd", add>;
-defm MSUB : MulAccum<1, "msub", sub>;
+defm MADD : MulAccum<0, "madd">;
+defm MSUB : MulAccum<1, "msub">;
def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
(MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
@@ -2334,16 +2380,16 @@ def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
}
def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
-def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
+def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
-def SMC : ExceptionGeneration<0b000, 0b11, "smc">;
+def SMC : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
// DCPSn defaults to an immediate operand of zero if unspecified.
def : InstAlias<"dcps1", (DCPS1 0)>;
def : InstAlias<"dcps2", (DCPS2 0)>;
-def : InstAlias<"dcps3", (DCPS3 0)>;
+def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
def UDF : UDFType<0, "udf">;
@@ -3114,7 +3160,7 @@ defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
-defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str", f128, store>;
+defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
let Predicates = [UseSTRQro], AddedComplexity = 10 in {
def : Pat<(store (f128 FPR128:$Rt),
@@ -3710,35 +3756,56 @@ defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
- def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
- (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
- def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
- (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
+ def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
+ (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+ def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
+ (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+
+ let Predicates = [HasFullFP16] in {
+ def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
+ def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
+ def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
+ (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
+ def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
+ (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}
defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
+ }
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
+ }
def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
@@ -3763,10 +3830,12 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, strin
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;
// These instructions saturate like fp_to_[su]int_sat.
+ let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
+ }
def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
@@ -4127,6 +4196,22 @@ defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
+// AArch64's FCVT instructions saturate when out of range.
+multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
+ def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
+ (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
+ def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
+ (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
+ def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
+ (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
+ def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
+ (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
+ def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
+ (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
+}
+defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
+defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
+
def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
@@ -4606,9 +4691,9 @@ defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
-defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorStreamingSVE>;
+defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorStreamingSVE>;
+defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorStreamingSVE>;
defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -4707,9 +4792,9 @@ defm FCVTPU : SIMDFPTwoScalar< 1, 1, 0b11010, "fcvtpu">;
def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
defm FCVTZS : SIMDFPTwoScalar< 0, 1, 0b11011, "fcvtzs">;
defm FCVTZU : SIMDFPTwoScalar< 1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">;
+defm FRECPE : SIMDFPTwoScalar< 0, 1, 0b11101, "frecpe", HasNEONorStreamingSVE>;
+defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx", HasNEONorStreamingSVE>;
+defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte", HasNEONorStreamingSVE>;
defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>;
@@ -5211,7 +5296,7 @@ def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
// AdvSIMD scalar CPY instruction
//----------------------------------------------------------------------------
-defm CPY : SIMDScalarCPY<"cpy">;
+defm CPY : SIMDScalarCPY<"mov">;
//----------------------------------------------------------------------------
// AdvSIMD scalar pairwise instructions
@@ -5693,7 +5778,7 @@ def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
(v4i16 (AArch64uaddv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
(i64 0))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
- (UADDLVv4i16v V64:$op), ssub), ssub)>;
+ (UADDLVv8i8v V64:$op), hsub), ssub)>;
def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (AArch64uaddlp
(v16i8 V128:$op))))), (i64 0))),
(EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
@@ -6964,9 +7049,9 @@ def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
// for AES fusion on some CPUs.
let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
- Sched<[WriteV]>;
+ Sched<[WriteVq]>;
def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
- Sched<[WriteV]>;
+ Sched<[WriteVq]>;
}
// Only use constrained versions of AES(I)MC instructions if they are paired with
@@ -8092,6 +8177,20 @@ let AddedComplexity = 10 in {
// FIXME: add SVE dot-product patterns.
}
+// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
+// so that it can be used as input to inline asm, and vice versa.
+def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
+def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
+def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
+ GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
+ (REG_SEQUENCE GPR64x8Class,
+ $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
+ $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
+foreach i = 0-7 in {
+ def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
+ (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
+}
+
let Predicates = [HasLS64] in {
def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
(outs GPR64x8:$Rt)>;
@@ -8114,6 +8213,10 @@ def StoreSwiftAsyncContext
: Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
[]>, Sched<[]>;
+def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
+def : Pat<(AArch64AssertZExtBool GPR32:$op),
+ (i32 GPR32:$op)>;
+
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
include "AArch64SMEInstrInfo.td"
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index bf042c83294a..3a836ac33064 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1613,8 +1613,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the stored value and the address of the second instruction is
// the same, it needs to be using the updated register and therefore
// it must not be folded.
- bool IsMIRegTheSame =
- getLdStRegOp(MI).getReg() == getLdStBaseOp(MI).getReg();
+ bool IsMIRegTheSame = TRI->regsOverlap(getLdStRegOp(MI).getReg(),
+ getLdStBaseOp(MI).getReg());
if (IsOutOfBounds || IsBaseRegUsed || IsBaseRegModified ||
IsMIRegTheSame) {
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
index be19d4953857..487e1f6162b9 100644
--- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp
@@ -363,7 +363,7 @@ static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
int InstCount = RegCount / 2;
// Do not use a helper call when not saving LR.
- if (std::find(Regs.begin(), Regs.end(), AArch64::LR) == Regs.end())
+ if (!llvm::is_contained(Regs, AArch64::LR))
return false;
switch (Type) {
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
new file mode 100644
index 000000000000..42db18332f1c
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -0,0 +1,293 @@
+//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs below peephole optimizations on MIR level.
+//
+// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
+// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+//
+// The mov pseudo instruction could be expanded to multiple mov instructions
+// later. In this case, we could try to split the constant operand of mov
+// instruction into two bitmask immediates. It makes two AND instructions
+// intead of multiple `mov` + `and` instructions.
+//
+// 2. Remove redundant ORRWrs which is generated by zero-extend.
+//
+// %3:gpr32 = ORRWrs $wzr, %2, 0
+// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
+//
+// If AArch64's 32-bit form of instruction defines the source operand of
+// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
+// operand are set to zero.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64ExpandImm.h"
+#include "AArch64InstrInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-mi-peephole-opt"
+
+namespace {
+
+struct AArch64MIPeepholeOpt : public MachineFunctionPass {
+ static char ID;
+
+ AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
+ initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ const AArch64InstrInfo *TII;
+ MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI;
+
+ template <typename T>
+ bool visitAND(MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+ bool visitORR(MachineInstr &MI,
+ SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return "AArch64 MI Peephole Optimization pass";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+char AArch64MIPeepholeOpt::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
+ "AArch64 MI Peephole Optimization", false, false)
+
+template <typename T>
+static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
+ T UImm = static_cast<T>(Imm);
+ if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
+ return false;
+
+ // If this immediate can be handled by one instruction, do not split it.
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+ AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
+ if (Insn.size() == 1)
+ return false;
+
+ // The bitmask immediate consists of consecutive ones. Let's say there is
+ // constant 0b00000000001000000000010000000000 which does not consist of
+ // consecutive ones. We can split it in to two bitmask immediate like
+ // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
+ // If we do AND with these two bitmask immediate, we can see original one.
+ unsigned LowestBitSet = countTrailingZeros(UImm);
+ unsigned HighestBitSet = Log2_64(UImm);
+
+ // Create a mask which is filled with one from the position of lowest bit set
+ // to the position of highest bit set.
+ T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
+ (static_cast<T>(1) << LowestBitSet);
+ // Create a mask which is filled with one outside the position of lowest bit
+ // set and the position of highest bit set.
+ T NewImm2 = UImm | ~NewImm1;
+
+ // If the split value is not valid bitmask immediate, do not split this
+ // constant.
+ if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
+ return false;
+
+ Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
+ Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
+ return true;
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::visitAND(
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ // Try below transformation.
+ //
+ // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
+ // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
+ //
+ // The mov pseudo instruction could be expanded to multiple mov instructions
+ // later. Let's try to split the constant operand of mov instruction into two
+ // bitmask immediates. It makes only two AND instructions intead of multiple
+ // mov + and instructions.
+
+ unsigned RegSize = sizeof(T) * 8;
+ assert((RegSize == 32 || RegSize == 64) &&
+ "Invalid RegSize for AND bitmask peephole optimization");
+
+ // Check whether AND's MBB is in loop and the AND is loop invariant.
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineLoop *L = MLI->getLoopFor(MBB);
+ if (L && !L->isLoopInvariant(MI))
+ return false;
+
+ // Check whether AND's operand is MOV with immediate.
+ MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+ if (!MovMI)
+ return false;
+
+ MachineInstr *SubregToRegMI = nullptr;
+ // If it is SUBREG_TO_REG, check its operand.
+ if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
+ SubregToRegMI = MovMI;
+ MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
+ if (!MovMI)
+ return false;
+ }
+
+ if (MovMI->getOpcode() != AArch64::MOVi32imm &&
+ MovMI->getOpcode() != AArch64::MOVi64imm)
+ return false;
+
+ // If the MOV has multiple uses, do not split the immediate because it causes
+ // more instructions.
+ if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
+ return false;
+
+ if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
+ return false;
+
+ // Split the bitmask immediate into two.
+ T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
+ // For the 32 bit form of instruction, the upper 32 bits of the destination
+ // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
+ // of UImm to zero.
+ if (SubregToRegMI)
+ UImm &= 0xFFFFFFFF;
+ T Imm1Enc;
+ T Imm2Enc;
+ if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
+ return false;
+
+ // Create new AND MIs.
+ DebugLoc DL = MI.getDebugLoc();
+ const TargetRegisterClass *ANDImmRC =
+ (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
+ Register NewDstReg = MRI->createVirtualRegister(ANDImmRC);
+ unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
+
+ MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+ .addReg(SrcReg)
+ .addImm(Imm1Enc);
+
+ MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
+ BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+ .addReg(NewTmpReg)
+ .addImm(Imm2Enc);
+
+ MRI->replaceRegWith(DstReg, NewDstReg);
+ // replaceRegWith changes MI's definition register. Keep it for SSA form until
+ // deleting MI.
+ MI.getOperand(0).setReg(DstReg);
+
+ ToBeRemoved.insert(&MI);
+ if (SubregToRegMI)
+ ToBeRemoved.insert(SubregToRegMI);
+ ToBeRemoved.insert(MovMI);
+
+ return true;
+}
+
+bool AArch64MIPeepholeOpt::visitORR(
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+ // Check this ORR comes from below zero-extend pattern.
+ //
+ // def : Pat<(i64 (zext GPR32:$src)),
+ // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
+ if (MI.getOperand(3).getImm() != 0)
+ return false;
+
+ if (MI.getOperand(1).getReg() != AArch64::WZR)
+ return false;
+
+ MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+ if (!SrcMI)
+ return false;
+
+ // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
+ //
+ // When you use the 32-bit form of an instruction, the upper 32 bits of the
+ // source registers are ignored and the upper 32 bits of the destination
+ // register are set to zero.
+ //
+ // If AArch64's 32-bit form of instruction defines the source operand of
+ // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
+ // real AArch64 instruction and if it is not, do not process the opcode
+ // conservatively.
+ if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
+ return false;
+
+ Register DefReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(2).getReg();
+ MRI->replaceRegWith(DefReg, SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ // replaceRegWith changes MI's definition register. Keep it for SSA form until
+ // deleting MI.
+ MI.getOperand(0).setReg(DefReg);
+ ToBeRemoved.insert(&MI);
+
+ LLVM_DEBUG({ dbgs() << "Removed: " << MI << "\n"; });
+
+ return true;
+}
+
+bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+ MLI = &getAnalysis<MachineLoopInfo>();
+ MRI = &MF.getRegInfo();
+
+ if (!MRI->isSSA())
+ return false;
+
+ bool Changed = false;
+ SmallSetVector<MachineInstr *, 8> ToBeRemoved;
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AArch64::ANDWrr:
+ Changed = visitAND<uint32_t>(MI, ToBeRemoved);
+ break;
+ case AArch64::ANDXrr:
+ Changed = visitAND<uint64_t>(MI, ToBeRemoved);
+ break;
+ case AArch64::ORRWrs:
+ Changed = visitORR(MI, ToBeRemoved);
+ }
+ }
+ }
+
+ for (MachineInstr *MI : ToBeRemoved)
+ MI->eraseFromParent();
+
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
+ return new AArch64MIPeepholeOpt();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 07dee3ce1fbc..70daf5abf81d 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -732,7 +732,9 @@ def Tuples8X : RegisterTuples<
!foreach(i, [0,1,2,3,4,5,6,7], !cast<SubRegIndex>("x8sub_"#i)),
!foreach(i, [0,1,2,3,4,5,6,7], (trunc (decimate (rotl GPR64, i), 2), 12))>;
-def GPR64x8Class : RegisterClass<"AArch64", [i64], 64, (trunc Tuples8X, 12)>;
+def GPR64x8Class : RegisterClass<"AArch64", [i64x8], 512, (trunc Tuples8X, 12)> {
+ let Size = 512;
+}
def GPR64x8AsmOp : AsmOperandClass {
let Name = "GPR64x8";
let ParserMethod = "tryParseGPR64x8";
@@ -899,16 +901,8 @@ def PPR32 : PPRRegOp<"s", PPRAsmOp32, ElementSizeS, PPR>;
def PPR64 : PPRRegOp<"d", PPRAsmOp64, ElementSizeD, PPR>;
def PPRAsmOp3bAny : PPRAsmOperand<"Predicate3bAny", "PPR_3b", 0>;
-def PPRAsmOp3b8 : PPRAsmOperand<"Predicate3bB", "PPR_3b", 8>;
-def PPRAsmOp3b16 : PPRAsmOperand<"Predicate3bH", "PPR_3b", 16>;
-def PPRAsmOp3b32 : PPRAsmOperand<"Predicate3bS", "PPR_3b", 32>;
-def PPRAsmOp3b64 : PPRAsmOperand<"Predicate3bD", "PPR_3b", 64>;
def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>;
-def PPR3b8 : PPRRegOp<"b", PPRAsmOp3b8, ElementSizeB, PPR_3b>;
-def PPR3b16 : PPRRegOp<"h", PPRAsmOp3b16, ElementSizeH, PPR_3b>;
-def PPR3b32 : PPRRegOp<"s", PPRAsmOp3b32, ElementSizeS, PPR_3b>;
-def PPR3b64 : PPRRegOp<"d", PPRAsmOp3b64, ElementSizeD, PPR_3b>;
//******************************************************************************
diff --git a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
index 03b32967a212..80d98d17e1d6 100644
--- a/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SIMDInstrOpt.cpp
@@ -641,7 +641,7 @@ bool AArch64SIMDInstrOpt::processSeqRegInst(MachineInstr *DefiningMI,
StReg[i] = DefiningMI->getOperand(2*i+1).getReg();
StRegKill[i] = getKillRegState(DefiningMI->getOperand(2*i+1).isKill());
- // Sanity check for the other arguments.
+ // Validation check for the other arguments.
if (DefiningMI->getOperand(2*i+2).isImm()) {
switch (DefiningMI->getOperand(2*i+2).getImm()) {
default:
@@ -711,9 +711,7 @@ bool AArch64SIMDInstrOpt::runOnMachineFunction(MachineFunction &MF) {
if (!shouldExitEarly(&MF, OptimizationKind)) {
SmallVector<MachineInstr *, 8> RemoveMIs;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
- MII != MIE;) {
- MachineInstr &MI = *MII;
+ for (MachineInstr &MI : MBB) {
bool InstRewrite;
if (OptimizationKind == VectorElem)
InstRewrite = optimizeVectElement(MI) ;
@@ -725,7 +723,6 @@ bool AArch64SIMDInstrOpt::runOnMachineFunction(MachineFunction &MF) {
RemoveMIs.push_back(&MI);
Changed = true;
}
- ++MII;
}
}
for (MachineInstr *MI : RemoveMIs)
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index 6a0fa2fc4f4e..aacace64e998 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -138,6 +138,6 @@ def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
-defm DUP_PPzPRI : sve2_int_perm_dup_p<"dup">;
+defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">;
} // End let Predicates = [HasSME]
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 91c3aec30a15..67d8fbb45cf5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -261,11 +261,6 @@ def AArch64dup_mt : SDNode<"AArch64ISD::DUP_MERGE_PASSTHRU", SDT_AArch64DUP_PRED
def AArch64splice : SDNode<"AArch64ISD::SPLICE", SDT_AArch64Arith>;
-def step_vector_oneuse : PatFrag<(ops node:$idx),
- (step_vector node:$idx), [{
- return N->hasOneUse();
-}]>;
-
def reinterpret_cast : SDNode<"AArch64ISD::REINTERPRET_CAST", SDTUnaryOp>;
def AArch64mul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
@@ -286,7 +281,9 @@ let Predicates = [HasSVE] in {
defm RDFFR_P : sve_int_rdffr_unpred<"rdffr", int_aarch64_sve_rdffr>;
def SETFFR : sve_int_setffr<"setffr", int_aarch64_sve_setffr>;
def WRFFR : sve_int_wrffr<"wrffr", int_aarch64_sve_wrffr>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm ADD_ZZZ : sve_int_bin_cons_arit_0<0b000, "add", add>;
defm SUB_ZZZ : sve_int_bin_cons_arit_0<0b001, "sub", sub>;
defm SQADD_ZZZ : sve_int_bin_cons_arit_0<0b100, "sqadd", saddsat>;
@@ -305,13 +302,15 @@ let Predicates = [HasSVE] in {
defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
defm SUB_ZPZZ : sve_int_bin_pred_bhsd<AArch64sub_p>;
+} // End HasSVEorStreamingSVE
- let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
- defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
- defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
- }
+let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm ADD_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_add>;
+ defm SUB_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_sub>;
+ defm SUBR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_subr>;
+} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+let Predicates = [HasSVEorStreamingSVE] in {
defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", int_aarch64_sve_orr>;
defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", int_aarch64_sve_eor>;
defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", int_aarch64_sve_and>;
@@ -403,17 +402,37 @@ let Predicates = [HasSVE] in {
defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
- defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>;
- defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
+ defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", AArch64frecpe>;
+ defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>;
+
+ defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>;
+ defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>;
+ defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>;
+ defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
+ defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
+ defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
+
+ defm FADD_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fadd_p>;
+ defm FSUB_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, AArch64fsub_p>;
+ defm FMUL_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, AArch64fmul_p>;
+ defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one>;
+ defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmaxnm_p>;
+ defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fminnm_p>;
+ defm FMAX_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmax_p>;
+ defm FMIN_ZPZI : sve_fp_2op_i_p_zds_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, AArch64fmin_p>;
- defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>;
- defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>;
- defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>;
- defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>;
- defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>;
- defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>;
- defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
- defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
+ let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
+ defm FADD_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fadd>;
+ defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsub>;
+ defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_two, fpimm_half, fpimm_two, int_aarch64_sve_fmul>;
+ defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_half_one, fpimm_half, fpimm_one, int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fminnm>;
+ defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmax>;
+ defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zeroing_hfd<sve_fpimm_zero_one, fpimm0, fpimm_one, int_aarch64_sve_fmin>;
+ }
defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>;
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
@@ -437,31 +456,43 @@ let Predicates = [HasSVE] in {
defm FMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmax_p>;
defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>;
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
-
- let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
- defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
- defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
- defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>;
- defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>;
- defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>;
- defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>;
- defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>;
- defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>;
- defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
- defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
- defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
- }
-
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
+ defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
+ defm FMUL_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmul>;
+ defm FSUBR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fminnm>;
+ defm FMAX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmax>;
+ defm FMIN_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmin>;
+ defm FABD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fabd>;
+ defm FMULX_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fmulx>;
+ defm FDIVR_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdivr>;
+ defm FDIV_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
+} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVE] in {
defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
- defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
- defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
+ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", AArch64frecps>;
+ defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", AArch64frsqrts>;
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVE] in {
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel", int_aarch64_sve_ftssel_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
@@ -475,10 +506,10 @@ let Predicates = [HasSVE] in {
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>;
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>;
- defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fmla, int_aarch64_sve_fmad>;
- defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fmls, int_aarch64_sve_fmsb>;
- defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fnmla, int_aarch64_sve_fnmad>;
- defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx<int_aarch64_sve_fnmls, int_aarch64_sve_fnmsb>;
+ defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
+ defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
+ defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
+ defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
multiclass fma<ValueType Ty, ValueType PredTy, string Suffix> {
// Zd = Za + Zn * Zm
@@ -516,17 +547,26 @@ let Predicates = [HasSVE] in {
defm : fma<nxv4f32, nxv4i1, "S">;
defm : fma<nxv2f32, nxv2i1, "S">;
defm : fma<nxv2f64, nxv2i1, "D">;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
defm FTMAD_ZZI : sve_fp_ftmad<"ftmad", int_aarch64_sve_ftmad_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm FMLA_ZZZI : sve_fp_fma_by_indexed_elem<0b0, "fmla", int_aarch64_sve_fmla_lane>;
defm FMLS_ZZZI : sve_fp_fma_by_indexed_elem<0b1, "fmls", int_aarch64_sve_fmls_lane>;
defm FCMLA_ZZZI : sve_fp_fcmla_by_indexed_elem<"fcmla", int_aarch64_sve_fcmla_lane>;
defm FMUL_ZZZI : sve_fp_fmul_by_indexed_elem<"fmul", int_aarch64_sve_fmul_lane>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// SVE floating point reductions.
defm FADDA_VPZ : sve_fp_2op_p_vd<0b000, "fadda", AArch64fadda_p>;
+} // End HasSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm FADDV_VPZ : sve_fp_fast_red<0b000, "faddv", AArch64faddv_p>;
defm FMAXNMV_VPZ : sve_fp_fast_red<0b100, "fmaxnmv", AArch64fmaxnmv_p>;
defm FMINNMV_VPZ : sve_fp_fast_red<0b101, "fminnmv", AArch64fminnmv_p>;
@@ -614,8 +654,13 @@ let Predicates = [HasSVE] in {
defm SEL_ZPZZ : sve_int_sel_vvv<"sel", vselect>;
defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>;
+} // End HasSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
@@ -639,8 +684,13 @@ let Predicates = [HasSVE] in {
defm MOVPRFX_ZPzZ : sve_int_movprfx_pred_zero<0b000, "movprfx">;
defm MOVPRFX_ZPmZ : sve_int_movprfx_pred_merge<0b001, "movprfx">;
def MOVPRFX_ZZ : sve_int_bin_cons_misc_0_c<0b00000001, "movprfx", ZPRAny>;
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasSVE] in {
defm FEXPA_ZZ : sve_int_bin_cons_misc_0_c_fexpa<"fexpa", int_aarch64_sve_fexpa_x>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
defm BRKPA_PPzPP : sve_int_brkp<0b00, "brkpa", int_aarch64_sve_brkpa_z>;
defm BRKPAS_PPzPP : sve_int_brkp<0b10, "brkpas", null_frag>;
defm BRKPB_PPzPP : sve_int_brkp<0b01, "brkpb", int_aarch64_sve_brkpb_z>;
@@ -752,7 +802,9 @@ let Predicates = [HasSVE] in {
defm LD1SB_S : sve_mem_cld_ss<0b1101, "ld1sb", Z_s, ZPR32, GPR64NoXZRshifted8>;
defm LD1SB_H : sve_mem_cld_ss<0b1110, "ld1sb", Z_h, ZPR16, GPR64NoXZRshifted8>;
defm LD1D : sve_mem_cld_ss<0b1111, "ld1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// non-faulting continuous load with reg+immediate
defm LDNF1B_IMM : sve_mem_cldnf_si<0b0000, "ldnf1b", Z_b, ZPR8>;
defm LDNF1B_H_IMM : sve_mem_cldnf_si<0b0001, "ldnf1b", Z_h, ZPR16>;
@@ -788,7 +840,9 @@ let Predicates = [HasSVE] in {
defm LDFF1SB_S : sve_mem_cldff_ss<0b1101, "ldff1sb", Z_s, ZPR32, GPR64shifted8>;
defm LDFF1SB_H : sve_mem_cldff_ss<0b1110, "ldff1sb", Z_h, ZPR16, GPR64shifted8>;
defm LDFF1D : sve_mem_cldff_ss<0b1111, "ldff1d", Z_d, ZPR64, GPR64shifted64>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
// LD(2|3|4) structured loads with reg+immediate
defm LD2B_IMM : sve_mem_eld_si<0b00, 0b01, ZZ_b, "ld2b", simm4s2>;
defm LD3B_IMM : sve_mem_eld_si<0b00, 0b10, ZZZ_b, "ld3b", simm4s3>;
@@ -816,7 +870,9 @@ let Predicates = [HasSVE] in {
def LD2D : sve_mem_eld_ss<0b11, 0b01, ZZ_d, "ld2d", GPR64NoXZRshifted64>;
def LD3D : sve_mem_eld_ss<0b11, 0b10, ZZZ_d, "ld3d", GPR64NoXZRshifted64>;
def LD4D : sve_mem_eld_ss<0b11, 0b11, ZZZZ_d, "ld4d", GPR64NoXZRshifted64>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// Gathers using unscaled 32-bit offsets, e.g.
// ld1h z0.s, p0/z, [x0, z0.s, uxtw]
defm GLD1SB_S : sve_mem_32b_gld_vs_32_unscaled<0b0000, "ld1sb", AArch64ld1s_gather_sxtw_z, AArch64ld1s_gather_uxtw_z, ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, nxv4i8>;
@@ -928,7 +984,9 @@ let Predicates = [HasSVE] in {
defm GLDFF1W_D : sve_mem_64b_gld_sv_32_scaled<0b1011, "ldff1w", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW32, ZPR64ExtUXTW32, nxv2i32>;
defm GLD1D : sve_mem_64b_gld_sv_32_scaled<0b1110, "ld1d", AArch64ld1_gather_sxtw_scaled_z, AArch64ld1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
defm GLDFF1D : sve_mem_64b_gld_sv_32_scaled<0b1111, "ldff1d", AArch64ldff1_gather_sxtw_scaled_z, AArch64ldff1_gather_uxtw_scaled_z, ZPR64ExtSXTW64, ZPR64ExtUXTW64, nxv2i64>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
// Non-temporal contiguous loads (register + immediate)
defm LDNT1B_ZRI : sve_mem_cldnt_si<0b00, "ldnt1b", Z_b, ZPR8>;
defm LDNT1H_ZRI : sve_mem_cldnt_si<0b01, "ldnt1h", Z_h, ZPR16>;
@@ -964,7 +1022,9 @@ let Predicates = [HasSVE] in {
defm ST1W : sve_mem_cst_ss<0b1010, "st1w", Z_s, ZPR32, GPR64NoXZRshifted32>;
defm ST1W_D : sve_mem_cst_ss<0b1011, "st1w", Z_d, ZPR64, GPR64NoXZRshifted32>;
defm ST1D : sve_mem_cst_ss<0b1111, "st1d", Z_d, ZPR64, GPR64NoXZRshifted64>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// Scatters using unpacked, unscaled 32-bit offsets, e.g.
// st1h z0.d, p0, [x0, z0.d, uxtw]
defm SST1B_D : sve_mem_64b_sst_sv_32_unscaled<0b000, "st1b", AArch64st1_scatter_sxtw, AArch64st1_scatter_uxtw, ZPR64ExtSXTW8Only, ZPR64ExtUXTW8Only, nxv2i8>;
@@ -1014,7 +1074,9 @@ let Predicates = [HasSVE] in {
defm SST1H_D_SCALED : sve_mem_sst_sv_64_scaled<0b01, "st1h", AArch64st1_scatter_scaled, ZPR64ExtLSL16, nxv2i16>;
defm SST1W_D_SCALED : sve_mem_sst_sv_64_scaled<0b10, "st1w", AArch64st1_scatter_scaled, ZPR64ExtLSL32, nxv2i32>;
defm SST1D_SCALED : sve_mem_sst_sv_64_scaled<0b11, "st1d", AArch64st1_scatter_scaled, ZPR64ExtLSL64, nxv2i64>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
// ST(2|3|4) structured stores (register + immediate)
defm ST2B_IMM : sve_mem_est_si<0b00, 0b01, ZZ_b, "st2b", simm4s2>;
defm ST3B_IMM : sve_mem_est_si<0b00, 0b10, ZZZ_b, "st3b", simm4s3>;
@@ -1073,7 +1135,7 @@ let Predicates = [HasSVE] in {
def PRFS_PRR : sve_mem_prfm_ss<0b101, "prfw", GPR64NoXZRshifted32>;
def PRFD_PRR : sve_mem_prfm_ss<0b111, "prfd", GPR64NoXZRshifted64>;
- multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, int scale, ComplexPattern AddrCP> {
+ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instruction RegImmInst, Instruction RegRegInst, ComplexPattern AddrCP> {
// reg + imm
let AddedComplexity = 2 in {
def _reg_imm : Pat<(prefetch (PredTy PPR_3b:$gp), (am_sve_indexed_s6 GPR64sp:$base, simm6s1:$offset), (i32 sve_prfop:$prfop)),
@@ -1091,11 +1153,13 @@ let Predicates = [HasSVE] in {
(RegImmInst sve_prfop:$prfop, PPR_3b:$gp, GPR64:$base, (i64 0))>;
}
- defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, 0, am_sve_regreg_lsl0>;
- defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, 1, am_sve_regreg_lsl1>;
- defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFS_PRR, 2, am_sve_regreg_lsl2>;
- defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, 3, am_sve_regreg_lsl3>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv16i1, PRFB_PRI, PRFB_PRR, am_sve_regreg_lsl0>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv8i1, PRFH_PRI, PRFH_PRR, am_sve_regreg_lsl1>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv4i1, PRFW_PRI, PRFS_PRR, am_sve_regreg_lsl2>;
+ defm : sve_prefetch<int_aarch64_sve_prf, nxv2i1, PRFD_PRI, PRFD_PRR, am_sve_regreg_lsl3>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
// Gather prefetch using scaled 32-bit offsets, e.g.
// prfh pldl1keep, p0, [x0, z0.s, uxtw #1]
defm PRFB_S : sve_mem_32b_prfm_sv_scaled<0b00, "prfb", ZPR32ExtSXTW8Only, ZPR32ExtUXTW8Only, int_aarch64_sve_prfb_gather_sxtw_index, int_aarch64_sve_prfb_gather_uxtw_index>;
@@ -1153,6 +1217,53 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv2i64 (int_aarch64_sve_adrd nxv2i64:$Op1, nxv2i64:$Op2)),
(ADR_LSL_ZZZ_D_3 $Op1, $Op2)>;
+ // Patterns to generate adr instruction.
+ // adr z0.d, [z0.d, z0.d, uxtw]
+ def : Pat<(add nxv2i64:$Op1,
+ (nxv2i64 (and nxv2i64:$Op2, (nxv2i64 (AArch64dup (i64 0xFFFFFFFF)))))),
+ (ADR_UXTW_ZZZ_D_0 $Op1, $Op2)>;
+ // adr z0.d, [z0.d, z0.d, sxtw]
+ def : Pat<(add nxv2i64:$Op1,
+ (nxv2i64 (sext_inreg nxv2i64:$Op2, nxv2i32))),
+ (ADR_SXTW_ZZZ_D_0 $Op1, $Op2)>;
+
+ // adr z0.s, [z0.s, z0.s, lsl #<shift>]
+ // adr z0.d, [z0.d, z0.d, lsl #<shift>]
+ multiclass adrShiftPat<ValueType Ty, ValueType PredTy, ValueType ShiftTy, Instruction DestAdrIns, int ShiftAmt> {
+ def : Pat<(add Ty:$Op1,
+ (Ty (AArch64lsl_p (PredTy (SVEAllActive)),
+ Ty:$Op2,
+ (Ty (AArch64dup (ShiftTy ShiftAmt)))))),
+ (DestAdrIns $Op1, $Op2)>;
+ }
+ defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_1, 1>;
+ defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_2, 2>;
+ defm : adrShiftPat<nxv2i64, nxv2i1, i64, ADR_LSL_ZZZ_D_3, 3>;
+ defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_1, 1>;
+ defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_2, 2>;
+ defm : adrShiftPat<nxv4i32, nxv4i1, i32, ADR_LSL_ZZZ_S_3, 3>;
+
+ // adr z0.d, [z0.d, z0.d, uxtw #<shift>]
+ // adr z0.d, [z0.d, z0.d, sxtw #<shift>]
+ multiclass adrXtwShiftPat<ValueType Ty, ValueType PredTy, int ShiftAmt> {
+ def : Pat<(add Ty:$Op1,
+ (Ty (AArch64lsl_p (PredTy (SVEAllActive)),
+ (Ty (and Ty:$Op2, (Ty (AArch64dup (i64 0xFFFFFFFF))))),
+ (Ty (AArch64dup (i64 ShiftAmt)))))),
+ (!cast<Instruction>("ADR_UXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>;
+
+ def : Pat<(add Ty:$Op1,
+ (Ty (AArch64lsl_p (PredTy (SVEAllActive)),
+ (Ty (sext_inreg Ty:$Op2, nxv2i32)),
+ (Ty (AArch64dup (i64 ShiftAmt)))))),
+ (!cast<Instruction>("ADR_SXTW_ZZZ_D_"#ShiftAmt) $Op1, $Op2)>;
+ }
+ defm : adrXtwShiftPat<nxv2i64, nxv2i1, 1>;
+ defm : adrXtwShiftPat<nxv2i64, nxv2i1, 2>;
+ defm : adrXtwShiftPat<nxv2i64, nxv2i1, 3>;
+} // End HasSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>;
defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>;
@@ -1171,17 +1282,52 @@ let Predicates = [HasSVE] in {
// Extract lo/hi halves of legal predicate types.
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
- (ZIP1_PPP_S PPR:$Ps, (PFALSE))>;
+ (PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
- (ZIP2_PPP_S PPR:$Ps, (PFALSE))>;
+ (PUNPKHI_PP PPR:$Ps)>;
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
- (ZIP1_PPP_H PPR:$Ps, (PFALSE))>;
+ (PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
- (ZIP2_PPP_H PPR:$Ps, (PFALSE))>;
+ (PUNPKHI_PP PPR:$Ps)>;
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
- (ZIP1_PPP_B PPR:$Ps, (PFALSE))>;
+ (PUNPKLO_PP PPR:$Ps)>;
def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
- (ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
+ (PUNPKHI_PP PPR:$Ps)>;
+
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 2))),
+ (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 6))),
+ (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+ (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+ (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps))>;
+ def : Pat<(nxv4i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+ (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps))>;
+
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 2))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 4))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 6))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKLO_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+ (PUNPKLO_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 10))),
+ (PUNPKHI_PP (PUNPKLO_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 12))),
+ (PUNPKLO_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
+ def : Pat<(nxv2i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 14))),
+ (PUNPKHI_PP (PUNPKHI_PP (PUNPKHI_PP PPR:$Ps)))>;
// Extract subvectors from FP SVE vectors
def : Pat<(nxv2f16 (extract_subvector (nxv4f16 ZPR:$Zs), (i64 0))),
@@ -1206,6 +1352,24 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv4bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
(UUNPKHI_ZZ_S ZPR:$Zs)>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 4))),
+ (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2f16 (extract_subvector (nxv8f16 ZPR:$Zs), (i64 6))),
+ (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 0))),
+ (UUNPKLO_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 2))),
+ (UUNPKHI_ZZ_D (UUNPKLO_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 4))),
+ (UUNPKLO_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+ def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))),
+ (UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+
// Concatenate two predicates.
def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)),
(UZP1_PPP_S $p1, $p2)>;
@@ -1308,16 +1472,18 @@ let Predicates = [HasSVE] in {
defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>;
defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>;
defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>;
+}
- defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb">;
- defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb">;
- defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch">;
- defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech">;
- defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw">;
- defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw">;
- defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd">;
- defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd">;
+ defm INCB_XPiI : sve_int_pred_pattern_a<0b000, "incb", add, int_aarch64_sve_cntb>;
+ defm DECB_XPiI : sve_int_pred_pattern_a<0b001, "decb", sub, int_aarch64_sve_cntb>;
+ defm INCH_XPiI : sve_int_pred_pattern_a<0b010, "inch", add, int_aarch64_sve_cnth>;
+ defm DECH_XPiI : sve_int_pred_pattern_a<0b011, "dech", sub, int_aarch64_sve_cnth>;
+ defm INCW_XPiI : sve_int_pred_pattern_a<0b100, "incw", add, int_aarch64_sve_cntw>;
+ defm DECW_XPiI : sve_int_pred_pattern_a<0b101, "decw", sub, int_aarch64_sve_cntw>;
+ defm INCD_XPiI : sve_int_pred_pattern_a<0b110, "incd", add, int_aarch64_sve_cntd>;
+ defm DECD_XPiI : sve_int_pred_pattern_a<0b111, "decd", sub, int_aarch64_sve_cntd>;
+let Predicates = [HasSVEorStreamingSVE] in {
defm SQINCB_XPiWdI : sve_int_pred_pattern_b_s32<0b00000, "sqincb", int_aarch64_sve_sqincb_n32>;
defm UQINCB_WPiI : sve_int_pred_pattern_b_u32<0b00001, "uqincb", int_aarch64_sve_uqincb_n32>;
defm SQDECB_XPiWdI : sve_int_pred_pattern_b_s32<0b00010, "sqdecb", int_aarch64_sve_sqdecb_n32>;
@@ -1391,10 +1557,10 @@ let Predicates = [HasSVE] in {
defm INCP_ZP : sve_int_count_v<0b10000, "incp">;
defm DECP_ZP : sve_int_count_v<0b10100, "decp">;
- defm INDEX_RR : sve_int_index_rr<"index", step_vector, step_vector_oneuse, AArch64mul_p_oneuse>;
- defm INDEX_IR : sve_int_index_ir<"index", step_vector, step_vector_oneuse, AArch64mul_p, AArch64mul_p_oneuse>;
- defm INDEX_RI : sve_int_index_ri<"index", step_vector, step_vector_oneuse>;
- defm INDEX_II : sve_int_index_ii<"index", step_vector, step_vector_oneuse>;
+ defm INDEX_RR : sve_int_index_rr<"index", AArch64mul_p_oneuse>;
+ defm INDEX_IR : sve_int_index_ir<"index", AArch64mul_p, AArch64mul_p_oneuse>;
+ defm INDEX_RI : sve_int_index_ri<"index">;
+ defm INDEX_II : sve_int_index_ii<"index">;
// Unpredicated shifts
defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>;
@@ -1414,14 +1580,16 @@ let Predicates = [HasSVE] in {
defm ASR_ZPZI : sve_int_shift_pred_bhsd<AArch64asr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSR_ZPZI : sve_int_shift_pred_bhsd<AArch64lsr_p, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
defm LSL_ZPZI : sve_int_shift_pred_bhsd<AArch64lsl_p, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
+} // End HasSVEorStreamingSVE
- let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
- defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
- defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
- defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
- defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
- }
+let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
+ defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
+ defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
+ defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
+} // End HasSVEorStreamingSVE, UseExperimentalZeroingPseudos
+let Predicates = [HasSVEorStreamingSVE] in {
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
@@ -1536,19 +1704,27 @@ let Predicates = [HasSVE] in {
defm FRINTI_ZPmZ : sve_fp_2op_p_zd_HSD<0b00111, "frinti", AArch64frinti_mt>;
defm FRECPX_ZPmZ : sve_fp_2op_p_zd_HSD<0b01100, "frecpx", AArch64frecpx_mt>;
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
-
- let Predicates = [HasBF16, HasSVE] in {
- defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
- defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
- defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
- defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
- defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
- defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
- defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
- defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>;
- defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
- }
-
+} // End HasSVEorStreamingSVE
+
+let Predicates = [HasBF16, HasSVEorStreamingSVE] in {
+ defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
+ defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
+} // End HasBF16, HasSVEorStreamingSVE
+
+let Predicates = [HasBF16, HasSVE] in {
+ defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
+} // End HasBF16, HasSVE
+
+let Predicates = [HasBF16, HasSVEorStreamingSVE] in {
+ defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
+ defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
+ defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
+ defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
+ defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>;
+ defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
+} // End HasBF16, HasSVEorStreamingSVE
+
+let Predicates = [HasSVEorStreamingSVE] in {
// InstAliases
def : InstAlias<"mov $Zd, $Zn",
(ORR_ZZZ ZPR64:$Zd, ZPR64:$Zn, ZPR64:$Zn), 1>;
@@ -1739,6 +1915,72 @@ let Predicates = [HasSVE] in {
def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
}
+ let AddedComplexity = 5 in {
+ def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
+ (ADDVL_XXI GPR64:$op, $imm)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_rdvl_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (ADDVL_XXI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), $imm),
+ sub_32))>;
+
+ def : Pat<(nxv8i16 (add ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ (INCH_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv4i32 (add ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ (INCW_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv2i64 (add ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ (INCD_ZPiI ZPR:$op, 31, $imm)>;
+
+ def : Pat<(nxv8i16 (sub ZPR:$op, (nxv8i16 (AArch64dup (i32 (trunc (vscale (sve_cnth_imm i32:$imm)))))))),
+ (DECH_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv4i32 (sub ZPR:$op, (nxv4i32 (AArch64dup (i32 (trunc (vscale (sve_cntw_imm i32:$imm)))))))),
+ (DECW_ZPiI ZPR:$op, 31, $imm)>;
+ def : Pat<(nxv2i64 (sub ZPR:$op, (nxv2i64 (AArch64dup (i64 (vscale (sve_cntd_imm i32:$imm))))))),
+ (DECD_ZPiI ZPR:$op, 31, $imm)>;
+ }
+
+ let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL], AddedComplexity = 5 in {
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm i32:$imm))),
+ (INCH_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm i32:$imm))),
+ (INCW_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm i32:$imm))),
+ (INCD_XPiI GPR64:$op, 31, $imm)>;
+
+ def : Pat<(add GPR64:$op, (vscale (sve_cnth_imm_neg i32:$imm))),
+ (DECH_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntw_imm_neg i32:$imm))),
+ (DECW_XPiI GPR64:$op, 31, $imm)>;
+ def : Pat<(add GPR64:$op, (vscale (sve_cntd_imm_neg i32:$imm))),
+ (DECD_XPiI GPR64:$op, 31, $imm)>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (INCD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cnth_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECH_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntw_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECW_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ def : Pat<(add GPR32:$op, (i32 (trunc (vscale (sve_cntd_imm_neg i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (DECD_XPiI (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$op, sub_32), 31, $imm),
+ sub_32))>;
+ }
+
def : Pat<(add GPR64:$op, (vscale (sve_rdvl_imm i32:$imm))),
(ADDVL_XXI GPR64:$op, $imm)>;
@@ -1864,25 +2106,27 @@ let Predicates = [HasSVE] in {
}
// 2-element contiguous loads
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
- defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i8, LD1B_D, LD1B_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i8, LD1SB_D, LD1SB_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i16, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i16, LD1SH_D, LD1SH_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2i64, nxv2i1, zext_masked_load_i32, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, asext_masked_load_i32, LD1SW_D, LD1SW_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2i64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_load<nxv2f16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2bf16, nxv2i1, nonext_masked_load, LD1H_D, LD1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv2f32, nxv2i1, nonext_masked_load, LD1W_D, LD1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv2f64, nxv2i1, nonext_masked_load, LD1D, LD1D_IMM, am_sve_regreg_lsl3>;
// 4-element contiguous loads
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
- defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i8, LD1B_S, LD1B_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i8, LD1SB_S, LD1SB_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_load<nxv4i32, nxv4i1, zext_masked_load_i16, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, asext_masked_load_i16, LD1SH_S, LD1SH_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4i32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_load<nxv4f16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4bf16, nxv4i1, nonext_masked_load, LD1H_S, LD1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_load<nxv4f32, nxv4i1, nonext_masked_load, LD1W, LD1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous loads
defm : pred_load<nxv8i16, nxv8i1, zext_masked_load_i8, LD1B_H, LD1B_H_IMM, am_sve_regreg_lsl0>;
@@ -1909,20 +2153,22 @@ let Predicates = [HasSVE] in {
}
// 2-element contiguous stores
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D, ST1B_D_IMM, am_sve_regreg_lsl0>;
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
- defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
- defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i8, ST1B_D, ST1B_D_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i16, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2i64, nxv2i1, trunc_masked_store_i32, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv2i64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
+ defm : pred_store<nxv2f16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2bf16, nxv2i1, nontrunc_masked_store, ST1H_D, ST1H_D_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv2f32, nxv2i1, nontrunc_masked_store, ST1W_D, ST1W_D_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv2f64, nxv2i1, nontrunc_masked_store, ST1D, ST1D_IMM, am_sve_regreg_lsl3>;
// 4-element contiguous stores
- defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S, ST1B_S_IMM, am_sve_regreg_lsl0>;
- defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
- defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
- defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i8, ST1B_S, ST1B_S_IMM, am_sve_regreg_lsl0>;
+ defm : pred_store<nxv4i32, nxv4i1, trunc_masked_store_i16, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4i32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
+ defm : pred_store<nxv4f16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4bf16, nxv4i1, nontrunc_masked_store, ST1H_S, ST1H_S_IMM, am_sve_regreg_lsl1>;
+ defm : pred_store<nxv4f32, nxv4i1, nontrunc_masked_store, ST1W, ST1W_IMM, am_sve_regreg_lsl2>;
// 8-element contiguous stores
defm : pred_store<nxv8i16, nxv8i1, trunc_masked_store_i8, ST1B_H, ST1B_H_IMM, am_sve_regreg_lsl0>;
@@ -1954,32 +2200,30 @@ let Predicates = [HasSVE] in {
def _imm : Pat<(Store (Ty ZPR:$val), (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset)),
(RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
}
- let AddedComplexity = 3 in {
- def _fi : Pat<(Store (Ty ZPR:$val), (am_sve_fi GPR64sp:$base, simm4s1:$offset)),
- (RegImmInst ZPR:$val, (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
- }
def : Pat<(Store (Ty ZPR:$val), GPR64:$base),
(RegImmInst ZPR:$val, (PTrue 31), GPR64:$base, (i64 0))>;
}
- defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>;
- defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>;
- defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>;
- defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>;
- defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
- defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
- defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
- defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
- defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
- defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
- defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
- defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
- defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
+ defm : unpred_store< store, nxv16i8, ST1B, ST1B_IMM, PTRUE_B, am_sve_regreg_lsl0>;
+ defm : unpred_store< truncstorevi8, nxv8i16, ST1B_H, ST1B_H_IMM, PTRUE_H, am_sve_regreg_lsl0>;
+ defm : unpred_store< truncstorevi8, nxv4i32, ST1B_S, ST1B_S_IMM, PTRUE_S, am_sve_regreg_lsl0>;
+ defm : unpred_store< truncstorevi8, nxv2i64, ST1B_D, ST1B_D_IMM, PTRUE_D, am_sve_regreg_lsl0>;
+ defm : unpred_store< store, nxv8i16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
+ defm : unpred_store<truncstorevi16, nxv4i32, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_store<truncstorevi16, nxv2i64, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4i32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
+ defm : unpred_store<truncstorevi32, nxv2i64, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
+ defm : unpred_store< store, nxv2i64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
+ defm : unpred_store< store, nxv8f16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv8bf16, ST1H, ST1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4f16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4bf16, ST1H_S, ST1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv2f16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv2bf16, ST1H_D, ST1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_store< store, nxv4f32, ST1W, ST1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
+ defm : unpred_store< store, nxv2f32, ST1W_D, ST1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
+ defm : unpred_store< store, nxv2f64, ST1D, ST1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
multiclass unpred_load<PatFrag Load, ValueType Ty, Instruction RegRegInst,
Instruction RegImmInst, Instruction PTrue,
@@ -1992,10 +2236,6 @@ let Predicates = [HasSVE] in {
def _imm: Pat<(Ty (Load (am_sve_indexed_s4 GPR64sp:$base, simm4s1:$offset))),
(RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
}
- let AddedComplexity = 3 in {
- def _fi : Pat<(Ty (Load (am_sve_fi GPR64sp:$base, simm4s1:$offset))),
- (RegImmInst (PTrue 31), GPR64sp:$base, simm4s1:$offset)>;
- }
def : Pat<(Ty (Load GPR64:$base)),
(RegImmInst (PTrue 31), GPR64:$base, (i64 0))>;
@@ -2026,7 +2266,9 @@ let Predicates = [HasSVE] in {
defm : unpred_load< load, nxv8f16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv8bf16, LD1H, LD1H_IMM, PTRUE_H, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv4f16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
+ defm : unpred_load< load, nxv4bf16, LD1H_S, LD1H_S_IMM, PTRUE_S, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv2f16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
+ defm : unpred_load< load, nxv2bf16, LD1H_D, LD1H_D_IMM, PTRUE_D, am_sve_regreg_lsl1>;
defm : unpred_load< load, nxv4f32, LD1W, LD1W_IMM, PTRUE_S, am_sve_regreg_lsl2>;
defm : unpred_load< load, nxv2f32, LD1W_D, LD1W_D_IMM, PTRUE_D, am_sve_regreg_lsl2>;
defm : unpred_load< load, nxv2f64, LD1D, LD1D_IMM, PTRUE_D, am_sve_regreg_lsl3>;
@@ -2059,9 +2301,6 @@ let Predicates = [HasSVE] in {
}
defm Pat_Store_P16 : unpred_store_predicate<nxv16i1, STR_PXI>;
- defm Pat_Store_P8 : unpred_store_predicate<nxv8i1, STR_PXI>;
- defm Pat_Store_P4 : unpred_store_predicate<nxv4i1, STR_PXI>;
- defm Pat_Store_P2 : unpred_store_predicate<nxv2i1, STR_PXI>;
multiclass unpred_load_predicate<ValueType Ty, Instruction Load> {
def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))),
@@ -2072,9 +2311,6 @@ let Predicates = [HasSVE] in {
}
defm Pat_Load_P16 : unpred_load_predicate<nxv16i1, LDR_PXI>;
- defm Pat_Load_P8 : unpred_load_predicate<nxv8i1, LDR_PXI>;
- defm Pat_Load_P4 : unpred_load_predicate<nxv4i1, LDR_PXI>;
- defm Pat_Load_P2 : unpred_load_predicate<nxv2i1, LDR_PXI>;
multiclass ld1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
@@ -2122,7 +2358,9 @@ let Predicates = [HasSVE] in {
// 16-element contiguous loads
defm : ld1<LD1B, LD1B_IMM, nxv16i8, AArch64ld1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+} // End HasSVEorStreamingSVE
+let Predicates = [HasSVE] in {
multiclass ldnf1<Instruction I, ValueType Ty, SDPatternOperator Load, ValueType PredTy, ValueType MemVT> {
// scalar + immediate (mul vl)
let AddedComplexity = 1 in {
@@ -2203,7 +2441,9 @@ let Predicates = [HasSVE] in {
// 16-element contiguous first faulting loads
defm : ldff1<LDFF1B, nxv16i8, AArch64ldff1_z, nxv16i1, nxv16i8, am_sve_regreg_lsl0>;
+} // End HasSVE
+let Predicates = [HasSVEorStreamingSVE] in {
multiclass st1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
SDPatternOperator Store, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
// reg + reg
@@ -2400,6 +2640,19 @@ let Predicates = [HasSVE] in {
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
}
+ def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
+ (i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
+ def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8),
+ (i64 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
+
+ def : Pat<(sext_inreg (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index), i16),
+ (i32 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
+ def : Pat<(sext_inreg (anyext (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), i16),
+ (i64 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
+
+ def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
+ (i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
+
// Extract first element from vector.
let AddedComplexity = 2 in {
def : Pat<(vector_extract (nxv16i8 ZPR:$Zs), (i64 0)),
@@ -2425,28 +2678,32 @@ let Predicates = [HasSVE] in {
}
// Splice with lane bigger or equal to 0
- def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_15 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_15:$index)>;
- def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_7 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_7:$index)>;
- def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_3 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_3:$index)>;
- def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_1 i32:$index)))),
- (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_1:$index)>;
-}
+ def : Pat<(nxv16i8 (vector_splice (nxv16i8 ZPR:$Z1), (nxv16i8 ZPR:$Z2), (i64 (sve_ext_imm_0_255 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_255:$index)>;
+ def : Pat<(nxv8i16 (vector_splice (nxv8i16 ZPR:$Z1), (nxv8i16 ZPR:$Z2), (i64 (sve_ext_imm_0_127 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_127:$index)>;
+ def : Pat<(nxv4i32 (vector_splice (nxv4i32 ZPR:$Z1), (nxv4i32 ZPR:$Z2), (i64 (sve_ext_imm_0_63 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_63:$index)>;
+ def : Pat<(nxv2i64 (vector_splice (nxv2i64 ZPR:$Z1), (nxv2i64 ZPR:$Z2), (i64 (sve_ext_imm_0_31 i32:$index)))),
+ (EXT_ZZI ZPR:$Z1, ZPR:$Z2, sve_ext_imm_0_31:$index)>;
+
+} // End HasSVEorStreamingSVE
let Predicates = [HasSVE, HasMatMulInt8] in {
defm SMMLA_ZZZ : sve_int_matmul<0b00, "smmla", int_aarch64_sve_smmla>;
defm UMMLA_ZZZ : sve_int_matmul<0b11, "ummla", int_aarch64_sve_ummla>;
defm USMMLA_ZZZ : sve_int_matmul<0b10, "usmmla", int_aarch64_sve_usmmla>;
+} // End HasSVE, HasMatMulInt8
+
+let Predicates = [HasSVEorStreamingSVE, HasMatMulInt8] in {
defm USDOT_ZZZ : sve_int_dot_mixed<"usdot", int_aarch64_sve_usdot>;
defm USDOT_ZZZI : sve_int_dot_mixed_indexed<0, "usdot", int_aarch64_sve_usdot_lane>;
defm SUDOT_ZZZI : sve_int_dot_mixed_indexed<1, "sudot", int_aarch64_sve_sudot_lane>;
-}
+} // End HasSVEorStreamingSVE, HasMatMulInt8
let Predicates = [HasSVE, HasMatMulFP32] in {
defm FMMLA_ZZZ_S : sve_fp_matrix_mla<0, "fmmla", ZPR32, int_aarch64_sve_fmmla, nxv4f32>;
-}
+} // End HasSVE, HasMatMulFP32
let Predicates = [HasSVE, HasMatMulFP64] in {
defm FMMLA_ZZZ_D : sve_fp_matrix_mla<1, "fmmla", ZPR64, int_aarch64_sve_fmmla, nxv2f64>;
@@ -2458,15 +2715,18 @@ let Predicates = [HasSVE, HasMatMulFP64] in {
defm LD1RO_H : sve_mem_ldor_ss<0b01, "ld1roh", Z_h, ZPR16, GPR64NoXZRshifted16, nxv8i16, nxv8i1, AArch64ld1ro_z, am_sve_regreg_lsl1>;
defm LD1RO_W : sve_mem_ldor_ss<0b10, "ld1row", Z_s, ZPR32, GPR64NoXZRshifted32, nxv4i32, nxv4i1, AArch64ld1ro_z, am_sve_regreg_lsl2>;
defm LD1RO_D : sve_mem_ldor_ss<0b11, "ld1rod", Z_d, ZPR64, GPR64NoXZRshifted64, nxv2i64, nxv2i1, AArch64ld1ro_z, am_sve_regreg_lsl3>;
+} // End HasSVE, HasMatMulFP64
+
+let Predicates = [HasSVEorStreamingSVE, HasMatMulFP64] in {
defm ZIP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 0, "zip1", int_aarch64_sve_zip1q>;
defm ZIP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b00, 1, "zip2", int_aarch64_sve_zip2q>;
defm UZP1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 0, "uzp1", int_aarch64_sve_uzp1q>;
defm UZP2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b01, 1, "uzp2", int_aarch64_sve_uzp2q>;
defm TRN1_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 0, "trn1", int_aarch64_sve_trn1q>;
defm TRN2_ZZZ_Q : sve_int_perm_bin_perm_128_zz<0b11, 1, "trn2", int_aarch64_sve_trn2q>;
-}
+} // End HasSVEorStreamingSVE, HasMatMulFP64
-let Predicates = [HasSVE2] in {
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 integer multiply-add (indexed)
defm MLA_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b0, "mla", int_aarch64_sve_mla_lane>;
defm MLS_ZZZI : sve2_int_mla_by_indexed_elem<0b01, 0b1, "mls", int_aarch64_sve_mls_lane>;
@@ -2614,15 +2874,17 @@ let Predicates = [HasSVE2] in {
defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>;
defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>;
defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>;
+} // End HasSVE2orStreamingSVE
- let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in {
- defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
- defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
- defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
- defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
- defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
- }
+let Predicates = [HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos] in {
+ defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
+ defm UQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
+ defm SRSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_srshr>;
+ defm URSHR_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_urshr>;
+ defm SQSHLU_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<int_aarch64_sve_sqshlu>;
+} // End HasSVE2orStreamingSVE, UseExperimentalZeroingPseudos
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 predicated shifts
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0110, "sqshl", "SQSHL_ZPZI", int_aarch64_sve_sqshl>;
defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left_dup<0b0111, "uqshl", "UQSHL_ZPZI", int_aarch64_sve_uqshl>;
@@ -2735,11 +2997,15 @@ let Predicates = [HasSVE2] in {
defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt", int_aarch64_sve_sqxtnt>;
defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt", int_aarch64_sve_uqxtnt>;
defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt", int_aarch64_sve_sqxtunt>;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 character match
defm MATCH_PPzZZ : sve2_char_match<0b0, "match", int_aarch64_sve_match>;
defm NMATCH_PPzZZ : sve2_char_match<0b1, "nmatch", int_aarch64_sve_nmatch>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 bitwise exclusive-or interleaved
defm EORBT_ZZZ : sve2_bitwise_xor_interleaved<0b0, "eorbt", int_aarch64_sve_eorbt>;
defm EORTB_ZZZ : sve2_bitwise_xor_interleaved<0b1, "eortb", int_aarch64_sve_eortb>;
@@ -2754,13 +3020,17 @@ let Predicates = [HasSVE2] in {
defm SADDLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b00, "saddlbt", int_aarch64_sve_saddlbt>;
defm SSUBLBT_ZZZ : sve2_misc_int_addsub_long_interleaved<0b10, "ssublbt", int_aarch64_sve_ssublbt>;
defm SSUBLTB_ZZZ : sve2_misc_int_addsub_long_interleaved<0b11, "ssubltb", int_aarch64_sve_ssubltb>;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 histogram generation (segment)
def HISTSEG_ZZZ : sve2_hist_gen_segment<"histseg", int_aarch64_sve_histseg>;
// SVE2 histogram generation (vector)
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt", int_aarch64_sve_histcnt>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 floating-point base 2 logarithm as integer
defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;
@@ -2802,7 +3072,9 @@ let Predicates = [HasSVE2] in {
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 non-temporal gather loads
defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00000, "ldnt1sb", AArch64ldnt1s_gather_z, nxv4i8>;
defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs_32_ptrs<0b00001, "ldnt1b", AArch64ldnt1_gather_z, nxv4i8>;
@@ -2817,10 +3089,14 @@ let Predicates = [HasSVE2] in {
defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11000, "ldnt1sw", AArch64ldnt1s_gather_z, nxv2i32>;
defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11010, "ldnt1w", AArch64ldnt1_gather_z, nxv2i32>;
defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs_64_ptrs<0b11110, "ldnt1d", AArch64ldnt1_gather_z, nxv2i64>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
+} // End HasSVE2orStreamingSVE
+let Predicates = [HasSVE2] in {
// SVE2 non-temporal scatter stores
defm STNT1B_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b001, "stnt1b", AArch64stnt1_scatter, nxv4i8>;
defm STNT1H_ZZR_S : sve2_mem_sstnt_vs_32_ptrs<0b011, "stnt1h", AArch64stnt1_scatter, nxv4i16>;
@@ -2830,7 +3106,9 @@ let Predicates = [HasSVE2] in {
defm STNT1H_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b010, "stnt1h", AArch64stnt1_scatter, nxv2i16>;
defm STNT1W_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b100, "stnt1w", AArch64stnt1_scatter, nxv2i32>;
defm STNT1D_ZZR_D : sve2_mem_sstnt_vs_64_ptrs<0b110, "stnt1d", AArch64stnt1_scatter, nxv2i64>;
+} // End HasSVE2
+let Predicates = [HasSVE2orStreamingSVE] in {
// SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>;
@@ -2849,7 +3127,7 @@ let Predicates = [HasSVE2] in {
// SVE2 pointer conflict compare
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr", "int_aarch64_sve_whilewr">;
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw", "int_aarch64_sve_whilerw">;
-}
+} // End HasSVE2orStreamingSVE
let Predicates = [HasSVE2AES] in {
// SVE2 crypto destructive binary operations
@@ -2865,23 +3143,23 @@ let Predicates = [HasSVE2AES] in {
// to NEON PMULL2 instruction.
defm PMULLB_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11010, "pmullb", int_aarch64_sve_pmullb_pair>;
defm PMULLT_ZZZ_Q : sve2_wide_int_arith_pmul<0b00, 0b11011, "pmullt", int_aarch64_sve_pmullt_pair>;
-}
+} // End HasSVE2AES
let Predicates = [HasSVE2SM4] in {
// SVE2 crypto constructive binary operations
defm SM4EKEY_ZZZ_S : sve2_crypto_cons_bin_op<0b0, "sm4ekey", ZPR32, int_aarch64_sve_sm4ekey, nxv4i32>;
// SVE2 crypto destructive binary operations
defm SM4E_ZZZ_S : sve2_crypto_des_bin_op<0b10, "sm4e", ZPR32, int_aarch64_sve_sm4e, nxv4i32>;
-}
+} // End HasSVE2SM4
let Predicates = [HasSVE2SHA3] in {
// SVE2 crypto constructive binary operations
defm RAX1_ZZZ_D : sve2_crypto_cons_bin_op<0b1, "rax1", ZPR64, int_aarch64_sve_rax1, nxv2i64>;
-}
+} // End HasSVE2SHA3
let Predicates = [HasSVE2BitPerm] in {
// SVE2 bitwise permute
defm BEXT_ZZZ : sve2_misc_bitwise<0b1100, "bext", int_aarch64_sve_bext_x>;
defm BDEP_ZZZ : sve2_misc_bitwise<0b1101, "bdep", int_aarch64_sve_bdep_x>;
defm BGRP_ZZZ : sve2_misc_bitwise<0b1110, "bgrp", int_aarch64_sve_bgrp_x>;
-}
+} // End HasSVE2BitPerm
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA53.td b/llvm/lib/Target/AArch64/AArch64SchedA53.td
index 65c84b1f39c0..d18a05fda191 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA53.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA53.td
@@ -127,7 +127,8 @@ def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [A53UnitFPALU]> { let Latency = 6; }
// FP Mul, Div, Sqrt
def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
@@ -149,6 +150,7 @@ def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
// No forwarding for these reads.
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
def : ReadAdvance<ReadVLD, 0>;
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td
index 0e680078c348..877c4d2ced41 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA55.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td
@@ -149,9 +149,11 @@ def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
-def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteVd, [CortexA55UnitFPALU]> { let Latency = 4; }
+def : WriteRes<WriteVq, [CortexA55UnitFPALU,CortexA55UnitFPALU]> { let Latency = 4; let BeginGroup = 1; }
// FP ALU specific new schedwrite definitions
+def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;}
def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
@@ -182,6 +184,7 @@ def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency =
def : ReadAdvance<ReadVLD, 0>;
def : ReadAdvance<ReadExtrHi, 1>;
def : ReadAdvance<ReadAdrBase, 1>;
+def : ReadAdvance<ReadST, 1>;
// ALU - ALU input operands are generally needed in EX1. An operand produced in
// in say EX2 can be forwarded for consumption to ALU in EX1, thereby
@@ -330,6 +333,8 @@ def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16
//---
// Floating Point Conversions, MAC, DIV, SQRT
//---
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
+def : InstRW<[CortexA55WriteFPALU_F2], (instregex "^XTN")>;
def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA57.td b/llvm/lib/Target/AArch64/AArch64SchedA57.td
index c1eacca8cc1f..168a762241ca 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -96,7 +96,8 @@ def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
def : SchedAlias<WriteFImm, A57Write_3cyc_1V>;
def : WriteRes<WriteFMul, [A57UnitV]> { let Latency = 5;}
def : SchedAlias<WriteFDiv, A57Write_17cyc_1W>;
-def : SchedAlias<WriteV, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVd, A57Write_3cyc_1V>;
+def : SchedAlias<WriteVq, A57Write_3cyc_1V>;
def : SchedAlias<WriteVLD, A57Write_5cyc_1L>;
def : SchedAlias<WriteVST, A57Write_1cyc_1S>;
@@ -116,6 +117,7 @@ def : ReadAdvance<ReadIM, 0>;
def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadST, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index b6741d418ef0..1d25a6c00f95 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -21,7 +21,8 @@ def A64FXModel : SchedMachineModel {
let CompleteModel = 1;
list<Predicate> UnsupportedFeatures =
- [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth];
+ [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
+ HasSVE2orStreamingSVE];
let FullInstRWOverlapCheck = 0;
}
@@ -760,6 +761,7 @@ def : ReadAdvance<ReadIMA, 0>;
def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
def : ReadAdvance<ReadVLD, 0>;
//===----------------------------------------------------------------------===//
@@ -1625,7 +1627,11 @@ def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [A64FXGI03]> {
+def : WriteRes<WriteVd, [A64FXGI03]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteVq, [A64FXGI03]> {
let Latency = 4;
let ResourceCycles = [1];
}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
index 11df304a974c..9fbb46919427 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -258,6 +258,7 @@ def CyReadAdrBase : SchedReadVariant<[
SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift.
def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
+def : ReadAdvance<ReadST, 0>;
//---
// 7.8.9,7.8.11. Load/Store, paired
@@ -303,7 +304,8 @@ def : WriteRes<WriteSys, []> {let Latency = -1;}
// 7.9 Vector Unit Instructions
// Simple vector operations take 2 cycles.
-def : WriteRes<WriteV, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVd, [CyUnitV]> {let Latency = 2;}
+def : WriteRes<WriteVq, [CyUnitV]> {let Latency = 2;}
// Define some longer latency vector op types for Cyclone.
def CyWriteV3 : SchedWriteRes<[CyUnitV]> {let Latency = 3;}
@@ -334,7 +336,7 @@ def : WriteRes<WriteFImm, [CyUnitV]> {let Latency = 2;}
// COPY is handled above in the WriteMov Variant.
def WriteVMov : SchedWriteVariant<[
SchedVar<WriteVMovPred, [WriteX]>,
- SchedVar<NoSchedPred, [WriteV]>]>;
+ SchedVar<NoSchedPred, [WriteVq]>]>;
def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
// FMOVSr,FMOVDr are WriteF.
@@ -354,7 +356,7 @@ def : WriteRes<WriteFCopy, [CyUnitLS]> {
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
// INS V[x],R
-def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteV]>;
+def CyWriteCopyToFPR : WriteSequence<[WriteVLD, WriteVq]>;
def : InstRW<[CyWriteCopyToFPR], (instregex "INSv")>;
// SMOV,UMOV R,V[x]
@@ -570,7 +572,7 @@ def : InstRW<[WriteFRSQRTS], (instregex "FRSQRTSv")>;
//---
// FCVT lengthen f16/s32
-def : InstRW<[WriteV], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
+def : InstRW<[WriteVq], (instrs FCVTSHr,FCVTDHr,FCVTDSr)>;
// FCVT,FCVTN,FCVTXN
// SCVTF,UCVTF V,V
@@ -680,61 +682,61 @@ def : InstRW<[WriteVLDShuffle],
def : InstRW<[WriteVLDShuffle, WriteAdr],
(instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
(instregex "LD2Twov(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
(instregex "LD2Twov(8b|4h|2s)_POST$")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle],
(instregex "LD2Twov(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
(instregex "LD2i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
(instregex "LD2i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq],
(instregex "LD2i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq],
(instregex "LD2i64_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq],
(instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
(instregex "LD3Threev(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
(instregex "LD3Threev(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)$")>;
def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVLDShuffle],
(instregex "LD3Threev(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq],
(instregex "LD3i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq],
(instregex "LD3i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq],
(instregex "LD3i64$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
(instregex "LD3i64_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq],
(instregex "LD3Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq],
(instrs LD3Rv1d,LD3Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq],
(instrs LD3Rv1d_POST,LD3Rv2d_POST)>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD4Fourv(8b|4h|2s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
(instregex "LD4Fourv(8b|4h|2s)_POST")>;
def : InstRW<[WriteVLDPairShuffle, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle],
@@ -743,25 +745,25 @@ def : InstRW<[WriteVLDPairShuffle, WriteAdr, WriteVLDPairShuffle,
WriteVLDPairShuffle, WriteVLDPairShuffle],
(instregex "LD4Fourv(16b|8h|4s|2d)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVq, WriteVq, WriteVq],
(instregex "LD4i(8|16|32)$")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVq, WriteVq, WriteVq],
(instregex "LD4i(8|16|32)_POST")>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4i64)>;
-def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteV],
+def : InstRW<[WriteVLDShuffle, ReadVLD, WriteAdr, WriteVLDShuffle, WriteVq],
(instrs LD4i64_POST)>;
-def : InstRW<[WriteVLDShuffle, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVq, WriteVq, WriteVq],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteV, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVq, WriteVq, WriteVq],
(instregex "LD4Rv(8b|4h|2s|16b|8h|4s)_POST")>;
-def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4Rv1d,LD4Rv2d)>;
-def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteV, WriteV],
+def : InstRW<[WriteVLDShuffle, WriteAdr, WriteVLDShuffle, WriteVq, WriteVq],
(instrs LD4Rv1d_POST,LD4Rv2d_POST)>;
//---
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index 6a33258be02c..14df8236504b 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -254,7 +254,8 @@ def : WriteRes<WriteVST, [M3UnitS,
let NumMicroOps = 1; }
// ASIMD FP instructions.
-def : WriteRes<WriteV, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVd, [M3UnitNALU]> { let Latency = 3; }
+def : WriteRes<WriteVq, [M3UnitNALU]> { let Latency = 3; }
// Other miscellaneous instructions.
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -277,6 +278,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// Finer scheduling model.
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index db066a19b0b6..8f740a9a0d35 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -558,7 +558,8 @@ def : SchedAlias<WriteVLD, M4WriteL5>;
def : SchedAlias<WriteVST, M4WriteVST1>;
// ASIMD FP instructions.
-def : SchedAlias<WriteV, M4WriteNALU1>;
+def : SchedAlias<WriteVd, M4WriteNALU1>;
+def : SchedAlias<WriteVq, M4WriteNALU1>;
// Other miscellaneous instructions.
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
@@ -581,6 +582,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// Finer scheduling model.
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index 0429b6ab2ee2..93e1b66bea03 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -594,7 +594,8 @@ def : SchedAlias<WriteVLD, M5WriteL6>;
def : SchedAlias<WriteVST, M5WriteVST1>;
// ASIMD FP instructions.
-def : SchedAlias<WriteV, M5WriteNALU1>;
+def : SchedAlias<WriteVd, M5WriteNALU1>;
+def : SchedAlias<WriteVq, M5WriteNALU1>;
// Other miscellaneous instructions.
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
@@ -616,6 +617,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// Finer scheduling model.
diff --git a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
index 8bb95e442249..7c9b0afdd169 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -92,7 +92,8 @@ def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
-def : WriteRes<WriteV, []> { let Unsupported = 1; }
+def : WriteRes<WriteVd, []> { let Unsupported = 1; }
+def : WriteRes<WriteVq, []> { let Unsupported = 1; }
def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
def : WriteRes<WriteVST, []> { let Unsupported = 1; }
def : WriteRes<WriteSys, []> { let Unsupported = 1; }
@@ -111,6 +112,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
// Detailed Refinements
// -----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
index 45964e1ed6de..cc568a2f2f17 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedKryo.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedKryo.td
@@ -95,7 +95,8 @@ def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]>
{ let Latency = 6; let NumMicroOps = 2; }
def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]>
{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
-def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVd, [KryoUnitXY]> { let Latency = 6; }
+def : WriteRes<WriteVq, [KryoUnitXY]> { let Latency = 6; }
def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; }
def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; }
@@ -117,6 +118,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
index 438371c1b6a8..77fca22a5f55 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td
@@ -90,7 +90,8 @@ def : WriteRes<WriteFMul, [TSV110UnitF]> { let Latency = 5; }
// FP Div, Sqrt
def : WriteRes<WriteFDiv, [TSV110UnitFSU1]> { let Latency = 18; }
-def : WriteRes<WriteV, [TSV110UnitF]> { let Latency = 4; }
+def : WriteRes<WriteVd, [TSV110UnitF]> { let Latency = 4; }
+def : WriteRes<WriteVq, [TSV110UnitF]> { let Latency = 4; }
def : WriteRes<WriteVLD, [TSV110UnitFLdSt]> { let Latency = 5; }
def : WriteRes<WriteVST, [TSV110UnitF]> { let Latency = 1; }
@@ -113,6 +114,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
def : InstRW<[WriteI], (instrs COPY)>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
index 125eb284cfd1..ff34c0ce9a0c 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX.td
@@ -154,7 +154,8 @@ def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; }
def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; }
-def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; }
// FP Mul, Div, Sqrt
def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; }
@@ -192,6 +193,7 @@ def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> {
def : ReadAdvance<ReadExtrHi, 1>;
def : ReadAdvance<ReadAdrBase, 2>;
def : ReadAdvance<ReadVLD, 2>;
+def : ReadAdvance<ReadST, 2>;
// FIXME: This needs more targeted benchmarking.
// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
index 8d8675b7ac6f..e4cae97b5524 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -362,6 +362,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
@@ -1249,7 +1250,12 @@ def : InstRW<[THX2T99Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX2T99F01]> {
+def : WriteRes<WriteVd, [THX2T99F01]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+def : WriteRes<WriteVq, [THX2T99F01]> {
let Latency = 7;
let NumMicroOps = 4;
let ResourceCycles = [4];
diff --git a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
index 00838cc4b9bd..08be2b3a55b3 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td
@@ -621,6 +621,7 @@ def : ReadAdvance<ReadID, 0>;
def : ReadAdvance<ReadExtrHi, 0>;
def : ReadAdvance<ReadAdrBase, 0>;
def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadST, 0>;
//===----------------------------------------------------------------------===//
// 3. Instruction Tables.
@@ -1356,7 +1357,12 @@ def : InstRW<[THX3T110Write_5Cyc_F01], (instrs FMOVXDHighr, FMOVDXHighr)>;
// ASIMD shift by register, basic, Q-form
// ASIMD shift by register, complex, D-form
// ASIMD shift by register, complex, Q-form
-def : WriteRes<WriteV, [THX3T110FP0123]> {
+def : WriteRes<WriteVd, [THX3T110FP0123]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+def : WriteRes<WriteVq, [THX3T110FP0123]> {
let Latency = 5;
let NumMicroOps = 4;
let ResourceCycles = [4];
diff --git a/llvm/lib/Target/AArch64/AArch64Schedule.td b/llvm/lib/Target/AArch64/AArch64Schedule.td
index 49c0c1782236..b8572c9b4572 100644
--- a/llvm/lib/Target/AArch64/AArch64Schedule.td
+++ b/llvm/lib/Target/AArch64/AArch64Schedule.td
@@ -47,6 +47,7 @@ def WriteAdr : SchedWrite; // Address pre/post increment.
def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
+def ReadST : SchedRead; // Read the stored value.
def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
// Serialized two-level address load.
@@ -76,7 +77,8 @@ def WriteFImm : SchedWrite; // Floating-point immediate.
def WriteFMul : SchedWrite; // Floating-point multiply.
def WriteFDiv : SchedWrite; // Floating-point division.
-def WriteV : SchedWrite; // Vector ops.
+def WriteVd : SchedWrite; // 64bit Vector D ops.
+def WriteVq : SchedWrite; // 128bit Vector Q ops.
def WriteVLD : SchedWrite; // Vector loads.
def WriteVST : SchedWrite; // Vector stores.
@@ -86,9 +88,9 @@ def WriteAtomic : SchedWrite; // Atomic memory operations (CAS, Swap, LDOP)
def ReadVLD : SchedRead;
// Sequential vector load and shuffle.
-def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
-def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteVq]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteVq, WriteVq]>;
// Store a shuffled vector.
-def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
-def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
+def WriteVSTShuffle : WriteSequence<[WriteVq, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteVq, WriteVq, WriteVST]>;
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 3eb4c04570de..d2d84b2a3f6d 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -24,8 +24,10 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
const AArch64Subtarget &STI =
DAG.getMachineFunction().getSubtarget<AArch64Subtarget>();
- const char *bzeroName = (V && V->isNullValue())
- ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) : nullptr;
+ const char *bzeroName =
+ (V && V->isZero())
+ ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
+ : nullptr;
// For small size (< 256), it is not beneficial to use bzero
// instead of memset.
if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) {
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index f37fedd50378..5cec4cb66339 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -1,9 +1,8 @@
//===- AArch64StackTagging.cpp - Stack tagging in IR --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -652,7 +651,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); };
if (!DT || !PDT ||
- !forAllReachableExits(*DT, *PDT, Start, End, RetVec, TagEnd))
+ !forAllReachableExits(*DT, *PDT, Start, Info.LifetimeEnd, RetVec,
+ TagEnd))
End->eraseFromParent();
} else {
uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 076ed9b13c99..d2488f61eb4b 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -1,9 +1,8 @@
//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -177,20 +176,19 @@ bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
}
void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
- for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end();
- UI != E;) {
- MachineInstr *UseI = &*(UI++);
- if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) {
+ for (MachineInstr &UseI :
+ llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
+ if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
// FI operand is always the one before the immediate offset.
- unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1;
- if (UseI->getOperand(OpIdx).isReg() &&
- UseI->getOperand(OpIdx).getReg() == TaggedReg) {
- UseI->getOperand(OpIdx).ChangeToFrameIndex(FI);
- UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
+ unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
+ if (UseI.getOperand(OpIdx).isReg() &&
+ UseI.getOperand(OpIdx).getReg() == TaggedReg) {
+ UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
+ UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
}
- } else if (UseI->isCopy() &&
- Register::isVirtualRegister(UseI->getOperand(0).getReg())) {
- uncheckUsesOf(UseI->getOperand(0).getReg(), FI);
+ } else if (UseI.isCopy() &&
+ Register::isVirtualRegister(UseI.getOperand(0).getReg())) {
+ uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
}
}
}
@@ -277,8 +275,7 @@ Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
WorkList.push_back(RetagReg);
while (!WorkList.empty()) {
- Register UseReg = WorkList.back();
- WorkList.pop_back();
+ Register UseReg = WorkList.pop_back_val();
for (auto &UseI : MRI->use_instructions(UseReg)) {
unsigned Opcode = UseI.getOpcode();
if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index a94856ef4fba..64f13eab0413 100644
--- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -119,7 +119,7 @@ bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
}
bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
+ if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize())
return false;
const TargetSubtargetInfo &ST = MF.getSubtarget();
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index b22eb3b154f5..d782d6352cbe 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -50,15 +50,17 @@ static cl::opt<bool>
static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
cl::desc("Enable the use of AA during codegen."));
-AArch64Subtarget &
-AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
- StringRef CPUString) {
+AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
+ StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
// Determine default and user-specified characteristics
if (CPUString.empty())
CPUString = "generic";
- ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS);
+ if (TuneCPUString.empty())
+ TuneCPUString = CPUString;
+
+ ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
initializeProperties();
return *this;
@@ -98,6 +100,12 @@ void AArch64Subtarget::initializeProperties() {
case CortexX1:
PrefFunctionLogAlignment = 4;
break;
+ case CortexA510:
+ case CortexA710:
+ case CortexX2:
+ PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 1;
+ break;
case A64FX:
CacheLineSize = 256;
PrefFunctionLogAlignment = 3;
@@ -106,6 +114,7 @@ void AArch64Subtarget::initializeProperties() {
PrefetchDistance = 128;
MinPrefetchStride = 1024;
MaxPrefetchIterationsAhead = 4;
+ VScaleForTuning = 4;
break;
case AppleA7:
case AppleA10:
@@ -147,9 +156,20 @@ void AArch64Subtarget::initializeProperties() {
PrefFunctionLogAlignment = 3;
break;
case NeoverseN1:
+ PrefFunctionLogAlignment = 4;
+ break;
case NeoverseN2:
+ PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 1;
+ break;
case NeoverseV1:
PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 2;
+ break;
+ case Neoverse512TVB:
+ PrefFunctionLogAlignment = 4;
+ VScaleForTuning = 1;
+ MaxInterleaveFactor = 4;
break;
case Saphira:
MaxInterleaveFactor = 4;
@@ -197,18 +217,20 @@ void AArch64Subtarget::initializeProperties() {
}
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
+ const std::string &TuneCPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride)
- : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
- FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)),
- TSInfo(), TLInfo(TM, *this) {
+ FrameLowering(),
+ InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(),
+ TLInfo(TM, *this) {
if (AArch64::isX18ReservedByDefault(TT))
ReserveXRegister.set(18);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e0ef8df6fca9..19db774ccd7b 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -50,6 +50,7 @@ public:
CortexA35,
CortexA53,
CortexA55,
+ CortexA510,
CortexA57,
CortexA65,
CortexA72,
@@ -59,14 +60,17 @@ public:
CortexA77,
CortexA78,
CortexA78C,
+ CortexA710,
CortexR82,
CortexX1,
+ CortexX2,
ExynosM3,
Falkor,
Kryo,
NeoverseE1,
NeoverseN1,
NeoverseN2,
+ Neoverse512TVB,
NeoverseV1,
Saphira,
ThunderX2T99,
@@ -82,6 +86,7 @@ protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
+ bool HasV8_0aOps = false;
bool HasV8_1aOps = false;
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
@@ -89,16 +94,21 @@ protected:
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasV8_7aOps = false;
-
+ bool HasV9_0aOps = false;
+ bool HasV9_1aOps = false;
+ bool HasV9_2aOps = false;
bool HasV8_0rOps = false;
- bool HasCONTEXTIDREL2 = false;
+ bool HasCONTEXTIDREL2 = false;
+ bool HasEL2VMSA = false;
+ bool HasEL3 = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasCrypto = false;
bool HasDotProd = false;
bool HasCRC = false;
bool HasLSE = false;
+ bool HasLSE2 = false;
bool HasRAS = false;
bool HasRDM = false;
bool HasPerfMon = false;
@@ -119,6 +129,7 @@ protected:
// SVE extensions
bool HasSVE = false;
bool UseExperimentalZeroingPseudos = false;
+ bool UseScalarIncVL = false;
// Armv8.2 Crypto extensions
bool HasSM4 = false;
@@ -139,7 +150,6 @@ protected:
bool HasTRACEV8_4 = false;
bool HasAM = false;
bool HasSEL2 = false;
- bool HasPMU = false;
bool HasTLB_RMI = false;
bool HasFlagM = false;
bool HasRCPC_IMMO = false;
@@ -190,6 +200,10 @@ protected:
bool HasSME = false;
bool HasSMEF64 = false;
bool HasSMEI64 = false;
+ bool HasStreamingSVE = false;
+
+ // AppleA7 system register.
+ bool HasAppleA7SysReg = false;
// Future architecture extensions.
bool HasETE = false;
@@ -271,6 +285,7 @@ protected:
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
+ unsigned VScaleForTuning = 2;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -292,7 +307,8 @@ private:
/// passed in feature string so that we can use initializer lists for
/// subtarget initialization.
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
- StringRef CPUString);
+ StringRef CPUString,
+ StringRef TuneCPUString);
/// Initialize properties based on the selected processor family.
void initializeProperties();
@@ -301,8 +317,8 @@ public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, const TargetMachine &TM,
- bool LittleEndian,
+ const std::string &TuneCPU, const std::string &FS,
+ const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0);
@@ -338,11 +354,15 @@ public:
return ARMProcFamily;
}
+ bool hasV8_0aOps() const { return HasV8_0aOps; }
bool hasV8_1aOps() const { return HasV8_1aOps; }
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
+ bool hasV9_0aOps() const { return HasV9_0aOps; }
+ bool hasV9_1aOps() const { return HasV9_1aOps; }
+ bool hasV9_2aOps() const { return HasV9_2aOps; }
bool hasV8_0rOps() const { return HasV8_0rOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
@@ -375,6 +395,7 @@ public:
bool hasDotProd() const { return HasDotProd; }
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
+ bool hasLSE2() const { return HasLSE2; }
bool hasRAS() const { return HasRAS; }
bool hasRDM() const { return HasRDM; }
bool hasSM4() const { return HasSM4; }
@@ -449,6 +470,8 @@ public:
return UseExperimentalZeroingPseudos;
}
+ bool useScalarIncVL() const { return UseScalarIncVL; }
+
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
@@ -494,6 +517,7 @@ public:
bool hasSME() const { return HasSME; }
bool hasSMEF64() const { return HasSMEF64; }
bool hasSMEI64() const { return HasSMEI64; }
+ bool hasStreamingSVE() const { return HasStreamingSVE; }
bool isLittleEndian() const { return IsLittle; }
@@ -541,10 +565,11 @@ public:
bool hasHCX() const { return HasHCX; }
bool hasLS64() const { return HasLS64; }
bool hasSEL2() const { return HasSEL2; }
- bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
bool hasFlagM() const { return HasFlagM; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
+ bool hasEL2VMSA() const { return HasEL2VMSA; }
+ bool hasEL3() const { return HasEL3; }
bool addrSinkUsingGEPs() const override {
// Keeping GEPs inbounds is important for exploiting AArch64
@@ -598,6 +623,31 @@ public:
}
}
+ /// Return whether FrameLowering should always set the "extended frame
+ /// present" bit in FP, or set it based on a symbol in the runtime.
+ bool swiftAsyncContextIsDynamicallySet() const {
+ // Older OS versions (particularly system unwinders) are confused by the
+ // Swift extended frame, so when building code that might be run on them we
+ // must dynamically query the concurrency library to determine whether
+ // extended frames should be flagged as present.
+ const Triple &TT = getTargetTriple();
+
+ unsigned Major, Minor, Micro;
+ TT.getOSVersion(Major, Minor, Micro);
+ switch(TT.getOS()) {
+ default:
+ return false;
+ case Triple::IOS:
+ case Triple::TvOS:
+ return Major < 15;
+ case Triple::WatchOS:
+ return Major < 8;
+ case Triple::MacOSX:
+ case Triple::Darwin:
+ return Major < 12;
+ }
+ }
+
void mirFileLoaded(MachineFunction &MF) const override;
// Return the known range for the bit length of SVE data registers. A value
@@ -614,6 +664,8 @@ public:
}
bool useSVEForFixedLengthVectors() const;
+
+ unsigned getVScaleForTuning() const { return VScaleForTuning; }
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index f400916c97c9..f9fe804865a5 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -586,6 +586,7 @@ class SysReg<string name, bits<2> op0, bits<3> op1, bits<4> crn, bits<4> crm,
let EnumValueField = "Encoding";
string Name = name;
+ string AltName = name;
bits<16> Encoding;
let Encoding{15-14} = op0;
let Encoding{13-11} = op1;
@@ -912,13 +913,19 @@ def : RWSysReg<"HSTR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b011>;
def : RWSysReg<"HACR_EL2", 0b11, 0b100, 0b0001, 0b0001, 0b111>;
def : RWSysReg<"MDCR_EL3", 0b11, 0b110, 0b0001, 0b0011, 0b001>;
def : RWSysReg<"TTBR0_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b000>;
-def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000>;
def : RWSysReg<"TTBR0_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b000>;
+
+let Requires = [{ {AArch64::FeatureEL2VMSA} }] in {
+def : RWSysReg<"TTBR0_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
+ let AltName = "VSCTLR_EL2";
+}
+def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
+}
+
def : RWSysReg<"TTBR1_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b001>;
def : RWSysReg<"TCR_EL1", 0b11, 0b000, 0b0010, 0b0000, 0b010>;
def : RWSysReg<"TCR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b010>;
def : RWSysReg<"TCR_EL3", 0b11, 0b110, 0b0010, 0b0000, 0b010>;
-def : RWSysReg<"VTTBR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b000>;
def : RWSysReg<"VTCR_EL2", 0b11, 0b100, 0b0010, 0b0001, 0b010>;
def : RWSysReg<"DACR32_EL2", 0b11, 0b100, 0b0011, 0b0000, 0b000>;
def : RWSysReg<"SPSR_EL1", 0b11, 0b000, 0b0100, 0b0000, 0b000>;
@@ -970,6 +977,7 @@ def : RWSysReg<"PMUSERENR_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b000>;
def : RWSysReg<"PMINTENSET_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b001>;
def : RWSysReg<"PMINTENCLR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b010>;
def : RWSysReg<"PMOVSSET_EL0", 0b11, 0b011, 0b1001, 0b1110, 0b011>;
+def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
def : RWSysReg<"MAIR_EL1", 0b11, 0b000, 0b1010, 0b0010, 0b000>;
def : RWSysReg<"MAIR_EL2", 0b11, 0b100, 0b1010, 0b0010, 0b000>;
def : RWSysReg<"MAIR_EL3", 0b11, 0b110, 0b1010, 0b0010, 0b000>;
@@ -1292,6 +1300,57 @@ def : RWSysReg<"ICH_LR13_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b101>;
def : RWSysReg<"ICH_LR14_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b110>;
def : RWSysReg<"ICH_LR15_EL2", 0b11, 0b100, 0b1100, 0b1101, 0b111>;
+// v8r system registers
+let Requires = [{ {AArch64::HasV8_0rOps} }] in {
+//Virtualization System Control Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"VSCTLR_EL2", 0b11, 0b100, 0b0010, 0b0000, 0b000> {
+ let AltName = "TTBR0_EL2";
+}
+
+//MPU Type Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"MPUIR_EL1", 0b11, 0b000, 0b0000, 0b0000, 0b100>;
+def : RWSysReg<"MPUIR_EL2", 0b11, 0b100, 0b0000, 0b0000, 0b100>;
+
+//Protection Region Enable Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRENR_EL1", 0b11, 0b000, 0b0110, 0b0001, 0b001>;
+def : RWSysReg<"PRENR_EL2", 0b11, 0b100, 0b0110, 0b0001, 0b001>;
+
+//Protection Region Selection Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRSELR_EL1", 0b11, 0b000, 0b0110, 0b0010, 0b001>;
+def : RWSysReg<"PRSELR_EL2", 0b11, 0b100, 0b0110, 0b0010, 0b001>;
+
+//Protection Region Base Address Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRBAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b000>;
+def : RWSysReg<"PRBAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b000>;
+
+//Protection Region Limit Address Register
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"PRLAR_EL1", 0b11, 0b000, 0b0110, 0b1000, 0b001>;
+def : RWSysReg<"PRLAR_EL2", 0b11, 0b100, 0b0110, 0b1000, 0b001>;
+
+foreach n = 0-15 in {
+foreach x = 1-2 in {
+//Direct acces to Protection Region Base Address Register for n th MPU region
+ def : RWSysReg<!strconcat("PRBAR"#n, "_EL"#x),
+ 0b11, 0b000, 0b0110, 0b1000, 0b000>{
+ let Encoding{5-2} = n;
+ let Encoding{13} = !add(x,-1);
+ }
+
+ def : RWSysReg<!strconcat("PRLAR"#n, "_EL"#x),
+ 0b11, 0b000, 0b0110, 0b1000, 0b001>{
+ let Encoding{5-2} = n;
+ let Encoding{13} = !add(x,-1);
+ }
+} //foreach x = 1-2 in
+} //foreach n = 0-15 in
+} //let Requires = [{ {AArch64::HasV8_0rOps} }] in
+
// v8.1a "Privileged Access Never" extension-specific system registers
let Requires = [{ {AArch64::FeaturePAN} }] in
def : RWSysReg<"PAN", 0b11, 0b000, 0b0100, 0b0010, 0b011>;
@@ -1395,7 +1454,9 @@ let Requires = [{ {AArch64::FeatureSEL2} }] in {
// v8.4a "Virtualization secure second stage translation" registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"VSTCR_EL2" , 0b11, 0b100, 0b0010, 0b0110, 0b010>;
-def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000>;
+def : RWSysReg<"VSTTBR_EL2", 0b11, 0b100, 0b0010, 0b0110, 0b000> {
+ let Requires = [{ {AArch64::HasV8_0aOps} }];
+}
// v8.4a "Virtualization timer" registers
// Op0 Op1 CRn CRm Op2
@@ -1411,12 +1472,6 @@ def : RWSysReg<"CNTHPS_CTL_EL2", 0b11, 0b100, 0b1110, 0b0101, 0b001>;
def : RWSysReg<"SDER32_EL2", 0b11, 0b100, 0b0001, 0b0011, 0b001>;
} // FeatureSEL2
-// v8.4a PMU registers
-// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::FeaturePMU} }] in {
-def : RWSysReg<"PMMIR_EL1", 0b11, 0b000, 0b1001, 0b1110, 0b110>;
-} // FeaturePMU
-
// v8.4a RAS registers
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"ERXPFGCTL_EL1", 0b11, 0b000, 0b0101, 0b0100, 0b101>;
@@ -1640,7 +1695,7 @@ def : RWSysReg<"PMSNEVFR_EL1", 0b11, 0b000, 0b1001, 0b1001, 0b001>;
// Cyclone specific system registers
// Op0 Op1 CRn CRm Op2
-let Requires = [{ {AArch64::ProcAppleA7} }] in
+let Requires = [{ {AArch64::FeatureAppleA7SysReg} }] in
def : RWSysReg<"CPM_IOACC_CTL_EL3", 0b11, 0b111, 0b1111, 0b0010, 0b000>;
// Scalable Matrix Extension (SME)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 99bcb2f4649a..ce26c62af61a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
@@ -36,10 +37,10 @@
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/CFGuard.h"
@@ -175,6 +176,16 @@ static cl::opt<unsigned> SVEVectorBitsMinOpt(
extern cl::opt<bool> EnableHomogeneousPrologEpilog;
+static cl::opt<bool> EnableGISelLoadStoreOptPreLegal(
+ "aarch64-enable-gisel-ldst-prelegal",
+ cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool> EnableGISelLoadStoreOptPostLegal(
+ "aarch64-enable-gisel-ldst-postlegal",
+ cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"),
+ cl::init(false), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
// Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
@@ -195,6 +206,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
+ initializeAArch64MIPeepholeOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
initializeAArch64O0PreLegalizerCombinerPass(*PR);
initializeAArch64PreLegalizerCombinerPass(*PR);
@@ -354,10 +366,13 @@ AArch64TargetMachine::~AArch64TargetMachine() = default;
const AArch64Subtarget *
AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
Attribute FSAttr = F.getFnAttribute("target-features");
std::string CPU =
CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
std::string FS =
FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
@@ -398,6 +413,7 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
Key += "SVEMax";
Key += std::to_string(MaxSVEVectorSize);
Key += CPU;
+ Key += TuneCPU;
Key += FS;
auto &I = SubtargetMap[Key];
@@ -406,8 +422,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// creation will depend on the TM and the code generation flags on the
// function that reside in TargetOptions.
resetTargetOptions(F);
- I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
- isLittle, MinSVEVectorSize,
+ I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, TuneCPU, FS,
+ *this, isLittle, MinSVEVectorSize,
MaxSVEVectorSize);
}
return I.get();
@@ -471,6 +487,7 @@ public:
void addIRPasses() override;
bool addPreISel() override;
+ void addCodeGenPrepare() override;
bool addInstSelector() override;
bool addIRTranslator() override;
void addPreLegalizeMachineIR() override;
@@ -479,6 +496,7 @@ public:
bool addRegBankSelect() override;
void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
+ void addMachineSSAOptimization() override;
bool addILPOpts() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
@@ -597,6 +615,12 @@ bool AArch64PassConfig::addPreISel() {
return false;
}
+void AArch64PassConfig::addCodeGenPrepare() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createTypePromotionPass());
+ TargetPassConfig::addCodeGenPrepare();
+}
+
bool AArch64PassConfig::addInstSelector() {
addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
@@ -617,8 +641,11 @@ bool AArch64PassConfig::addIRTranslator() {
void AArch64PassConfig::addPreLegalizeMachineIR() {
if (getOptLevel() == CodeGenOpt::None)
addPass(createAArch64O0PreLegalizerCombiner());
- else
+ else {
addPass(createAArch64PreLegalizerCombiner());
+ if (EnableGISelLoadStoreOptPreLegal)
+ addPass(new LoadStoreOpt());
+ }
}
bool AArch64PassConfig::addLegalizeMachineIR() {
@@ -628,8 +655,11 @@ bool AArch64PassConfig::addLegalizeMachineIR() {
void AArch64PassConfig::addPreRegBankSelect() {
bool IsOptNone = getOptLevel() == CodeGenOpt::None;
- if (!IsOptNone)
+ if (!IsOptNone) {
addPass(createAArch64PostLegalizerCombiner(IsOptNone));
+ if (EnableGISelLoadStoreOptPostLegal)
+ addPass(new LoadStoreOpt());
+ }
addPass(createAArch64PostLegalizerLowering());
}
@@ -649,6 +679,14 @@ bool AArch64PassConfig::addGlobalInstructionSelect() {
return false;
}
+void AArch64PassConfig::addMachineSSAOptimization() {
+ // Run default MachineSSAOptimization first.
+ TargetPassConfig::addMachineSSAOptimization();
+
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64MIPeepholeOptPass());
+}
+
bool AArch64PassConfig::addILPOpts() {
if (EnableCondOpt)
addPass(createAArch64ConditionOptimizerPass());
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 01236aa6b527..63d6fa5bbb26 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -9,11 +9,13 @@
#include "AArch64TargetTransformInfo.h"
#include "AArch64ExpandImm.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/PatternMatch.h"
@@ -220,19 +222,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
auto *RetTy = ICA.getReturnType();
switch (ICA.getID()) {
case Intrinsic::umin:
- case Intrinsic::umax: {
- auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
- // umin(x,y) -> sub(x,usubsat(x,y))
- // umax(x,y) -> add(x,usubsat(y,x))
- if (LT.second == MVT::v2i64)
- return LT.first * 2;
- LLVM_FALLTHROUGH;
- }
+ case Intrinsic::umax:
case Intrinsic::smin:
case Intrinsic::smax: {
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
MVT::v8i16, MVT::v2i32, MVT::v4i32};
auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+ // v2i64 types get converted to cmp+bif hence the cost of 2
+ if (LT.second == MVT::v2i64)
+ return LT.first * 2;
if (any_of(ValidMinMaxTys, [&LT](MVT M) { return M == LT.second; }))
return LT.first;
break;
@@ -291,13 +289,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
const auto LegalisationCost = TLI->getTypeLegalizationCost(DL, RetTy);
const auto *Entry =
CostTableLookup(BitreverseTbl, ICA.getID(), LegalisationCost.second);
- // Cost Model is using the legal type(i32) that i8 and i16 will be converted
- // to +1 so that we match the actual lowering cost
- if (TLI->getValueType(DL, RetTy, true) == MVT::i8 ||
- TLI->getValueType(DL, RetTy, true) == MVT::i16)
- return LegalisationCost.first * Entry->Cost + 1;
- if (Entry)
+ if (Entry) {
+ // Cost Model is using the legal type(i32) that i8 and i16 will be
+ // converted to +1 so that we match the actual lowering cost
+ if (TLI->getValueType(DL, RetTy, true) == MVT::i8 ||
+ TLI->getValueType(DL, RetTy, true) == MVT::i16)
+ return LegalisationCost.first * Entry->Cost + 1;
+
return LegalisationCost.first * Entry->Cost;
+ }
break;
}
case Intrinsic::ctpop: {
@@ -440,6 +440,18 @@ static Optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
return IC.replaceInstUsesWith(II, Insert);
}
+static Optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // Replace DupX with a regular IR splat.
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ auto *RetTy = cast<ScalableVectorType>(II.getType());
+ Value *Splat =
+ Builder.CreateVectorSplat(RetTy->getElementCount(), II.getArgOperand(0));
+ Splat->takeName(&II);
+ return IC.replaceInstUsesWith(II, Splat);
+}
+
static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
@@ -457,12 +469,9 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
return None;
// Check that we have a compare of zero..
- auto *DupX = dyn_cast<IntrinsicInst>(II.getArgOperand(2));
- if (!DupX || DupX->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
- return None;
-
- auto *DupXArg = dyn_cast<ConstantInt>(DupX->getArgOperand(0));
- if (!DupXArg || !DupXArg->isZero())
+ auto *SplatValue =
+ dyn_cast_or_null<ConstantInt>(getSplatValue(II.getArgOperand(2)));
+ if (!SplatValue || !SplatValue->isZero())
return None;
// ..against a dupq
@@ -547,14 +556,34 @@ static Optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
IntrinsicInst &II) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
Value *Pg = II.getArgOperand(0);
Value *Vec = II.getArgOperand(1);
- bool IsAfter = II.getIntrinsicID() == Intrinsic::aarch64_sve_lasta;
+ auto IntrinsicID = II.getIntrinsicID();
+ bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
// lastX(splat(X)) --> X
if (auto *SplatVal = getSplatValue(Vec))
return IC.replaceInstUsesWith(II, SplatVal);
+ // If x and/or y is a splat value then:
+ // lastX (binop (x, y)) --> binop(lastX(x), lastX(y))
+ Value *LHS, *RHS;
+ if (match(Vec, m_OneUse(m_BinOp(m_Value(LHS), m_Value(RHS))))) {
+ if (isSplatValue(LHS) || isSplatValue(RHS)) {
+ auto *OldBinOp = cast<BinaryOperator>(Vec);
+ auto OpC = OldBinOp->getOpcode();
+ auto *NewLHS =
+ Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, LHS});
+ auto *NewRHS =
+ Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, RHS});
+ auto *NewBinOp = BinaryOperator::CreateWithCopiedFlags(
+ OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II);
+ return IC.replaceInstUsesWith(II, NewBinOp);
+ }
+ }
+
auto *C = dyn_cast<Constant>(Pg);
if (IsAfter && C && C->isNullValue()) {
// The intrinsic is extracting lane 0 so use an extract instead.
@@ -576,39 +605,11 @@ static Optional<Instruction *> instCombineSVELast(InstCombiner &IC,
cast<ConstantInt>(IntrPG->getOperand(0))->getZExtValue();
// Can the intrinsic's predicate be converted to a known constant index?
- unsigned Idx;
- switch (PTruePattern) {
- default:
+ unsigned MinNumElts = getNumElementsFromSVEPredPattern(PTruePattern);
+ if (!MinNumElts)
return None;
- case AArch64SVEPredPattern::vl1:
- Idx = 0;
- break;
- case AArch64SVEPredPattern::vl2:
- Idx = 1;
- break;
- case AArch64SVEPredPattern::vl3:
- Idx = 2;
- break;
- case AArch64SVEPredPattern::vl4:
- Idx = 3;
- break;
- case AArch64SVEPredPattern::vl5:
- Idx = 4;
- break;
- case AArch64SVEPredPattern::vl6:
- Idx = 5;
- break;
- case AArch64SVEPredPattern::vl7:
- Idx = 6;
- break;
- case AArch64SVEPredPattern::vl8:
- Idx = 7;
- break;
- case AArch64SVEPredPattern::vl16:
- Idx = 15;
- break;
- }
+ unsigned Idx = MinNumElts - 1;
// Increment the index if extracting the element after the last active
// predicate element.
if (IsAfter)
@@ -661,26 +662,9 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
return IC.replaceInstUsesWith(II, VScale);
}
- unsigned MinNumElts = 0;
- switch (Pattern) {
- default:
- return None;
- case AArch64SVEPredPattern::vl1:
- case AArch64SVEPredPattern::vl2:
- case AArch64SVEPredPattern::vl3:
- case AArch64SVEPredPattern::vl4:
- case AArch64SVEPredPattern::vl5:
- case AArch64SVEPredPattern::vl6:
- case AArch64SVEPredPattern::vl7:
- case AArch64SVEPredPattern::vl8:
- MinNumElts = Pattern;
- break;
- case AArch64SVEPredPattern::vl16:
- MinNumElts = 16;
- break;
- }
+ unsigned MinNumElts = getNumElementsFromSVEPredPattern(Pattern);
- return NumElts >= MinNumElts
+ return MinNumElts && NumElts >= MinNumElts
? Optional<Instruction *>(IC.replaceInstUsesWith(
II, ConstantInt::get(II.getType(), MinNumElts)))
: None;
@@ -711,6 +695,116 @@ static Optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
return None;
}
+static Optional<Instruction *> instCombineSVEVectorFMLA(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // fold (fadd p a (fmul p b c)) -> (fma p a b c)
+ Value *P = II.getOperand(0);
+ Value *A = II.getOperand(1);
+ auto FMul = II.getOperand(2);
+ Value *B, *C;
+ if (!match(FMul, m_Intrinsic<Intrinsic::aarch64_sve_fmul>(
+ m_Specific(P), m_Value(B), m_Value(C))))
+ return None;
+
+ if (!FMul->hasOneUse())
+ return None;
+
+ llvm::FastMathFlags FAddFlags = II.getFastMathFlags();
+ // Stop the combine when the flags on the inputs differ in case dropping flags
+ // would lead to us missing out on more beneficial optimizations.
+ if (FAddFlags != cast<CallInst>(FMul)->getFastMathFlags())
+ return None;
+ if (!FAddFlags.allowContract())
+ return None;
+
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ auto FMLA = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fmla,
+ {II.getType()}, {P, A, B, C}, &II);
+ FMLA->setFastMathFlags(FAddFlags);
+ return IC.replaceInstUsesWith(II, FMLA);
+}
+
+static Optional<Instruction *>
+instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Value *Pred = II.getOperand(0);
+ Value *PtrOp = II.getOperand(1);
+ Type *VecTy = II.getType();
+ Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTy->getPointerTo());
+
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>()))) {
+ LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr);
+ return IC.replaceInstUsesWith(II, Load);
+ }
+
+ CallInst *MaskedLoad =
+ Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL),
+ Pred, ConstantAggregateZero::get(VecTy));
+ return IC.replaceInstUsesWith(II, MaskedLoad);
+}
+
+static Optional<Instruction *>
+instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Value *VecOp = II.getOperand(0);
+ Value *Pred = II.getOperand(1);
+ Value *PtrOp = II.getOperand(2);
+ Value *VecPtr =
+ Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());
+
+ if (match(Pred, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>()))) {
+ Builder.CreateStore(VecOp, VecPtr);
+ return IC.eraseInstFromFunction(II);
+ }
+
+ Builder.CreateMaskedStore(VecOp, VecPtr, PtrOp->getPointerAlignment(DL),
+ Pred);
+ return IC.eraseInstFromFunction(II);
+}
+
+static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::aarch64_sve_fmul:
+ return Instruction::BinaryOps::FMul;
+ case Intrinsic::aarch64_sve_fadd:
+ return Instruction::BinaryOps::FAdd;
+ case Intrinsic::aarch64_sve_fsub:
+ return Instruction::BinaryOps::FSub;
+ default:
+ return Instruction::BinaryOpsEnd;
+ }
+}
+
+static Optional<Instruction *> instCombineSVEVectorBinOp(InstCombiner &IC,
+ IntrinsicInst &II) {
+ auto *OpPredicate = II.getOperand(0);
+ auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
+ if (BinOpCode == Instruction::BinaryOpsEnd ||
+ !match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>())))
+ return None;
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Builder.setFastMathFlags(II.getFastMathFlags());
+ auto BinOp =
+ Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2));
+ return IC.replaceInstUsesWith(II, BinOp);
+}
+
+static Optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC,
+ IntrinsicInst &II) {
+ if (auto FMLA = instCombineSVEVectorFMLA(IC, II))
+ return FMLA;
+ return instCombineSVEVectorBinOp(IC, II);
+}
+
static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
IntrinsicInst &II) {
auto *OpPredicate = II.getOperand(0);
@@ -720,14 +814,11 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
IRBuilder<> Builder(II.getContext());
Builder.SetInsertPoint(&II);
- // Return true if a given instruction is an aarch64_sve_dup_x intrinsic call
- // with a unit splat value, false otherwise.
- auto IsUnitDupX = [](auto *I) {
- auto *IntrI = dyn_cast<IntrinsicInst>(I);
- if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
+ // Return true if a given instruction is a unit splat value, false otherwise.
+ auto IsUnitSplat = [](auto *I) {
+ auto *SplatValue = getSplatValue(I);
+ if (!SplatValue)
return false;
-
- auto *SplatValue = IntrI->getOperand(0);
return match(SplatValue, m_FPOne()) || match(SplatValue, m_One());
};
@@ -744,10 +835,10 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
// The OpMultiplier variable should always point to the dup (if any), so
// swap if necessary.
- if (IsUnitDup(OpMultiplicand) || IsUnitDupX(OpMultiplicand))
+ if (IsUnitDup(OpMultiplicand) || IsUnitSplat(OpMultiplicand))
std::swap(OpMultiplier, OpMultiplicand);
- if (IsUnitDupX(OpMultiplier)) {
+ if (IsUnitSplat(OpMultiplier)) {
// [f]mul pg (dupx 1) %n => %n
OpMultiplicand->takeName(&II);
return IC.replaceInstUsesWith(II, OpMultiplicand);
@@ -763,22 +854,40 @@ static Optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
}
}
- return None;
+ return instCombineSVEVectorBinOp(IC, II);
}
+static Optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
+ IntrinsicInst &II) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+ Value *UnpackArg = II.getArgOperand(0);
+ auto *RetTy = cast<ScalableVectorType>(II.getType());
+ bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
+ II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
+
+ // Hi = uunpkhi(splat(X)) --> Hi = splat(extend(X))
+ // Lo = uunpklo(splat(X)) --> Lo = splat(extend(X))
+ if (auto *ScalarArg = getSplatValue(UnpackArg)) {
+ ScalarArg =
+ Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
+ Value *NewVal =
+ Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
+ NewVal->takeName(&II);
+ return IC.replaceInstUsesWith(II, NewVal);
+ }
+
+ return None;
+}
static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
IntrinsicInst &II) {
auto *OpVal = II.getOperand(0);
auto *OpIndices = II.getOperand(1);
VectorType *VTy = cast<VectorType>(II.getType());
- // Check whether OpIndices is an aarch64_sve_dup_x intrinsic call with
- // constant splat value < minimal element count of result.
- auto *DupXIntrI = dyn_cast<IntrinsicInst>(OpIndices);
- if (!DupXIntrI || DupXIntrI->getIntrinsicID() != Intrinsic::aarch64_sve_dup_x)
- return None;
-
- auto *SplatValue = dyn_cast<ConstantInt>(DupXIntrI->getOperand(0));
+ // Check whether OpIndices is a constant splat value < minimal element count
+ // of result.
+ auto *SplatValue = dyn_cast_or_null<ConstantInt>(getSplatValue(OpIndices));
if (!SplatValue ||
SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
return None;
@@ -795,6 +904,115 @@ static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
return IC.replaceInstUsesWith(II, VectorSplat);
}
+static Optional<Instruction *> instCombineSVETupleGet(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // Try to remove sequences of tuple get/set.
+ Value *SetTuple, *SetIndex, *SetValue;
+ auto *GetTuple = II.getArgOperand(0);
+ auto *GetIndex = II.getArgOperand(1);
+ // Check that we have tuple_get(GetTuple, GetIndex) where GetTuple is a
+ // call to tuple_set i.e. tuple_set(SetTuple, SetIndex, SetValue).
+ // Make sure that the types of the current intrinsic and SetValue match
+ // in order to safely remove the sequence.
+ if (!match(GetTuple,
+ m_Intrinsic<Intrinsic::aarch64_sve_tuple_set>(
+ m_Value(SetTuple), m_Value(SetIndex), m_Value(SetValue))) ||
+ SetValue->getType() != II.getType())
+ return None;
+ // Case where we get the same index right after setting it.
+ // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) --> SetValue
+ if (GetIndex == SetIndex)
+ return IC.replaceInstUsesWith(II, SetValue);
+ // If we are getting a different index than what was set in the tuple_set
+ // intrinsic. We can just set the input tuple to the one up in the chain.
+ // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex)
+ // --> tuple_get(SetTuple, GetIndex)
+ return IC.replaceOperand(II, 0, SetTuple);
+}
+
+static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
+ IntrinsicInst &II) {
+ // zip1(uzp1(A, B), uzp2(A, B)) --> A
+ // zip2(uzp1(A, B), uzp2(A, B)) --> B
+ Value *A, *B;
+ if (match(II.getArgOperand(0),
+ m_Intrinsic<Intrinsic::aarch64_sve_uzp1>(m_Value(A), m_Value(B))) &&
+ match(II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
+ m_Specific(A), m_Specific(B))))
+ return IC.replaceInstUsesWith(
+ II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));
+
+ return None;
+}
+
+static Optional<Instruction *> instCombineLD1GatherIndex(InstCombiner &IC,
+ IntrinsicInst &II) {
+ Value *Mask = II.getOperand(0);
+ Value *BasePtr = II.getOperand(1);
+ Value *Index = II.getOperand(2);
+ Type *Ty = II.getType();
+ Type *BasePtrTy = BasePtr->getType();
+ Value *PassThru = ConstantAggregateZero::get(Ty);
+
+ // Contiguous gather => masked load.
+ // (sve.ld1.gather.index Mask BasePtr (sve.index IndexBase 1))
+ // => (masked.load (gep BasePtr IndexBase) Align Mask zeroinitializer)
+ Value *IndexBase;
+ if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
+ m_Value(IndexBase), m_SpecificInt(1)))) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Align Alignment =
+ BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
+
+ Type *VecPtrTy = PointerType::getUnqual(Ty);
+ Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
+ IndexBase);
+ Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
+ CallInst *MaskedLoad =
+ Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
+ MaskedLoad->takeName(&II);
+ return IC.replaceInstUsesWith(II, MaskedLoad);
+ }
+
+ return None;
+}
+
+static Optional<Instruction *> instCombineST1ScatterIndex(InstCombiner &IC,
+ IntrinsicInst &II) {
+ Value *Val = II.getOperand(0);
+ Value *Mask = II.getOperand(1);
+ Value *BasePtr = II.getOperand(2);
+ Value *Index = II.getOperand(3);
+ Type *Ty = Val->getType();
+ Type *BasePtrTy = BasePtr->getType();
+
+ // Contiguous scatter => masked store.
+ // (sve.ld1.scatter.index Value Mask BasePtr (sve.index IndexBase 1))
+ // => (masked.store Value (gep BasePtr IndexBase) Align Mask)
+ Value *IndexBase;
+ if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
+ m_Value(IndexBase), m_SpecificInt(1)))) {
+ IRBuilder<> Builder(II.getContext());
+ Builder.SetInsertPoint(&II);
+
+ Align Alignment =
+ BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
+
+ Value *Ptr = Builder.CreateGEP(BasePtrTy->getPointerElementType(), BasePtr,
+ IndexBase);
+ Type *VecPtrTy = PointerType::getUnqual(Ty);
+ Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
+
+ (void)Builder.CreateMaskedStore(Val, Ptr, Alignment, Mask);
+
+ return IC.eraseInstFromFunction(II);
+ }
+
+ return None;
+}
+
Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const {
@@ -806,6 +1024,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
return instCombineConvertFromSVBool(IC, II);
case Intrinsic::aarch64_sve_dup:
return instCombineSVEDup(IC, II);
+ case Intrinsic::aarch64_sve_dup_x:
+ return instCombineSVEDupX(IC, II);
case Intrinsic::aarch64_sve_cmpne:
case Intrinsic::aarch64_sve_cmpne_wide:
return instCombineSVECmpNE(IC, II);
@@ -829,8 +1049,30 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_mul:
case Intrinsic::aarch64_sve_fmul:
return instCombineSVEVectorMul(IC, II);
+ case Intrinsic::aarch64_sve_fadd:
+ return instCombineSVEVectorFAdd(IC, II);
+ case Intrinsic::aarch64_sve_fsub:
+ return instCombineSVEVectorBinOp(IC, II);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
+ case Intrinsic::aarch64_sve_uunpkhi:
+ case Intrinsic::aarch64_sve_uunpklo:
+ case Intrinsic::aarch64_sve_sunpkhi:
+ case Intrinsic::aarch64_sve_sunpklo:
+ return instCombineSVEUnpack(IC, II);
+ case Intrinsic::aarch64_sve_tuple_get:
+ return instCombineSVETupleGet(IC, II);
+ case Intrinsic::aarch64_sve_zip1:
+ case Intrinsic::aarch64_sve_zip2:
+ return instCombineSVEZip(IC, II);
+ case Intrinsic::aarch64_sve_ld1_gather_index:
+ return instCombineLD1GatherIndex(IC, II);
+ case Intrinsic::aarch64_sve_st1_scatter_index:
+ return instCombineST1ScatterIndex(IC, II);
+ case Intrinsic::aarch64_sve_ld1:
+ return instCombineSVELD1(IC, II, DL);
+ case Intrinsic::aarch64_sve_st1:
+ return instCombineSVEST1(IC, II, DL);
}
return None;
@@ -1393,9 +1635,13 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
return (Cost + 1) * LT.first;
case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FNEG:
// These nodes are marked as 'custom' just to lower them to SVE.
// We know said lowering will incur no additional cost.
- if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
+ if (!Ty->getScalarType()->isFP128Ty())
return (Cost + 2) * LT.first;
return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
@@ -1525,8 +1771,7 @@ AArch64TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
-
- if (!isa<ScalableVectorType>(DataTy))
+ if (useNeonVector(DataTy))
return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
Alignment, CostKind, I);
auto *VT = cast<VectorType>(DataTy);
@@ -1623,9 +1868,10 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
// ldN/stN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
// matched to more than one ldN/stN instruction.
+ bool UseScalable;
if (NumElts % Factor == 0 &&
- TLI->isLegalInterleavedAccessType(SubVecTy, DL))
- return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
+ TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
+ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
}
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
@@ -1705,9 +1951,12 @@ getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
}
void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// Enable partial unrolling and runtime unrolling.
- BaseT::getUnrollingPreferences(L, SE, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
+
+ UP.UpperBound = true;
// For inner loop, it is more likely to be a hot one, and the runtime check
// can be promoted out from LICM pass, so the overhead is less, let's try
@@ -1749,7 +1998,6 @@ void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
!ST->getSchedModel().isOutOfOrder()) {
UP.Runtime = true;
UP.Partial = true;
- UP.UpperBound = true;
UP.UnrollRemainder = true;
UP.DefaultUnrollRuntimeCount = 4;
@@ -1775,7 +2023,7 @@ Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
StructType *ST = dyn_cast<StructType>(ExpectedType);
if (!ST)
return nullptr;
- unsigned NumElts = Inst->getNumArgOperands() - 1;
+ unsigned NumElts = Inst->arg_size() - 1;
if (ST->getNumElements() != NumElts)
return nullptr;
for (unsigned i = 0, e = NumElts; i != e; ++i) {
@@ -1816,7 +2064,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
case Intrinsic::aarch64_neon_st4:
Info.ReadMem = false;
Info.WriteMem = true;
- Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
+ Info.PtrVal = Inst->getArgOperand(Inst->arg_size() - 1);
break;
}
@@ -1892,6 +2140,8 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::UMax:
case RecurKind::FMin:
case RecurKind::FMax:
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
return true;
default:
return false;
@@ -1902,23 +2152,23 @@ InstructionCost
AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
bool IsUnsigned,
TTI::TargetCostKind CostKind) {
- if (!isa<ScalableVectorType>(Ty))
+ std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+
+ if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
- assert((isa<ScalableVectorType>(Ty) && isa<ScalableVectorType>(CondTy)) &&
- "Both vector needs to be scalable");
- std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+ assert((isa<ScalableVectorType>(Ty) == isa<ScalableVectorType>(CondTy)) &&
+ "Both vector needs to be equally scalable");
+
InstructionCost LegalizationCost = 0;
if (LT.first > 1) {
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
- unsigned CmpOpcode =
- Ty->isFPOrFPVectorTy() ? Instruction::FCmp : Instruction::ICmp;
- LegalizationCost =
- getCmpSelInstrCost(CmpOpcode, LegalVTy, LegalVTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind) +
- getCmpSelInstrCost(Instruction::Select, LegalVTy, LegalVTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- LegalizationCost *= LT.first - 1;
+ unsigned MinMaxOpcode =
+ Ty->isFPOrFPVectorTy()
+ ? Intrinsic::maxnum
+ : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin);
+ IntrinsicCostAttributes Attrs(MinMaxOpcode, LegalVTy, {LegalVTy, LegalVTy});
+ LegalizationCost = getIntrinsicInstrCost(Attrs, CostKind) * (LT.first - 1);
}
return LegalizationCost + /*Cost of horizontal reduction*/ 2;
@@ -1954,8 +2204,13 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {
if (TTI::requiresOrderedReduction(FMF)) {
- if (!isa<ScalableVectorType>(ValTy))
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+ if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
+ InstructionCost BaseCost =
+ BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+ // Add on extra cost to reflect the extra overhead on some CPUs. We still
+ // end up vectorizing for more computationally intensive loops.
+ return BaseCost + FixedVTy->getNumElements();
+ }
if (Opcode != Instruction::FAdd)
return InstructionCost::getInvalid();
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index d55fd5b4f815..d1e8cd204b3a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -125,10 +125,8 @@ public:
return ST->getMinVectorRegisterBitWidth();
}
- Optional<unsigned> getMaxVScale() const {
- if (ST->hasSVE())
- return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
- return BaseT::getMaxVScale();
+ Optional<unsigned> getVScaleForTuning() const {
+ return ST->getVScaleForTuning();
}
/// Try to return an estimate cost factor that can be used as a multiplier
@@ -138,9 +136,8 @@ public:
unsigned getMaxNumElements(ElementCount VF) const {
if (!VF.isScalable())
return VF.getFixedValue();
- Optional<unsigned> MaxNumVScale = getMaxVScale();
- assert(MaxNumVScale && "Expected valid max vscale value");
- return *MaxNumVScale * VF.getKnownMinValue();
+
+ return VF.getKnownMinValue() * ST->getVScaleForTuning();
}
unsigned getMaxInterleaveFactor(unsigned VF);
@@ -180,8 +177,7 @@ public:
InstructionCost getSpliceCost(VectorType *Tp, int Index);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -209,7 +205,8 @@ public:
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -229,7 +226,7 @@ public:
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
return true;
- if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
+ if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
return true;
@@ -244,8 +241,7 @@ public:
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
return false; // Fall back to scalarization of masked operations.
- return !DataType->getScalarType()->isIntegerTy(1) &&
- isElementTypeLegalForScalableVector(DataType->getScalarType());
+ return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -266,8 +262,7 @@ public:
DataTypeFVTy->getNumElements() < 2))
return false;
- return !DataType->getScalarType()->isIntegerTy(1) &&
- isElementTypeLegalForScalableVector(DataType->getScalarType());
+ return isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
@@ -295,10 +290,11 @@ public:
return BaseT::isLegalNTStore(DataType, Alignment);
}
+ bool enableOrderedReductions() const { return true; }
+
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
bool
@@ -316,9 +312,9 @@ public:
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
- InstructionCost getArithmeticReductionCost(
- unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
+ InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask, int Index,
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f27e9b2ef0f0..6d3aea2721de 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -6,13 +6,13 @@
//
//===----------------------------------------------------------------------===//
+#include "AArch64InstrInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64InstPrinter.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "MCTargetDesc/AArch64TargetStreamer.h"
#include "TargetInfo/AArch64TargetInfo.h"
-#include "AArch64InstrInfo.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -40,15 +40,15 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cctype>
@@ -1511,7 +1511,7 @@ public:
}
bool isAdrpLabel() const {
- // Validation was handled during parsing, so we just sanity check that
+ // Validation was handled during parsing, so we just verify that
// something didn't go haywire.
if (!isImm())
return false;
@@ -1527,7 +1527,7 @@ public:
}
bool isAdrLabel() const {
- // Validation was handled during parsing, so we just sanity check that
+ // Validation was handled during parsing, so we just verify that
// something didn't go haywire.
if (!isImm())
return false;
@@ -2672,8 +2672,7 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
/// the register is added to the operand list.
OperandMatchResultTy
AArch64AsmParser::tryParseScalarRegister(unsigned &RegNum) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -2683,22 +2682,21 @@ AArch64AsmParser::tryParseScalarRegister(unsigned &RegNum) {
return MatchOperand_NoMatch;
RegNum = Reg;
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
OperandMatchResultTy
AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ if (getTok().isNot(AsmToken::Identifier)) {
Error(S, "Expected cN operand where 0 <= N <= 15");
return MatchOperand_ParseFail;
}
- StringRef Tok = Parser.getTok().getIdentifier();
+ StringRef Tok = getTok().getIdentifier();
if (Tok[0] != 'c' && Tok[0] != 'C') {
Error(S, "Expected cN operand where 0 <= N <= 15");
return MatchOperand_ParseFail;
@@ -2711,7 +2709,7 @@ AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
Operands.push_back(
AArch64Operand::CreateSysCR(CRNum, S, getLoc(), getContext()));
return MatchOperand_Success;
@@ -2721,9 +2719,8 @@ AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
template <bool IsSVEPrefetch>
OperandMatchResultTy
AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
auto LookupByName = [](StringRef N) {
if (IsSVEPrefetch) {
@@ -2783,16 +2780,15 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreatePrefetch(
*PRFM, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
/// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command
OperandMatchResultTy
AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier)) {
TokError("invalid operand for instruction");
return MatchOperand_ParseFail;
@@ -2806,16 +2802,15 @@ AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreatePSBHint(
PSB->Encoding, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
/// tryParseBTIHint - Try to parse a BTI operand, mapped to Hint command
OperandMatchResultTy
AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier)) {
TokError("invalid operand for instruction");
return MatchOperand_ParseFail;
@@ -2829,7 +2824,7 @@ AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateBTIHint(
BTI->Encoding, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
@@ -2837,12 +2832,11 @@ AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
/// instruction.
OperandMatchResultTy
AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
const MCExpr *Expr = nullptr;
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat hash token.
+ if (getTok().is(AsmToken::Hash)) {
+ Lex(); // Eat hash token.
}
if (parseSymbolicImmVal(Expr))
@@ -2894,11 +2888,11 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
const MCExpr *Expr = nullptr;
// Leave anything with a bracket to the default for SVE
- if (getParser().getTok().is(AsmToken::LBrac))
+ if (getTok().is(AsmToken::LBrac))
return MatchOperand_NoMatch;
- if (getParser().getTok().is(AsmToken::Hash))
- getParser().Lex(); // Eat hash token.
+ if (getTok().is(AsmToken::Hash))
+ Lex(); // Eat hash token.
if (parseSymbolicImmVal(Expr))
return MatchOperand_ParseFail;
@@ -2927,7 +2921,6 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
template<bool AddFPZeroAsLiteral>
OperandMatchResultTy
AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
bool Hash = parseOptionalToken(AsmToken::Hash);
@@ -2935,7 +2928,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
// Handle negation, as that still comes through as a separate token.
bool isNegative = parseOptionalToken(AsmToken::Minus);
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (!Tok.is(AsmToken::Real) && !Tok.is(AsmToken::Integer)) {
if (!Hash)
return MatchOperand_NoMatch;
@@ -2974,7 +2967,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
RealVal, *StatusOrErr == APFloat::opOK, S, getContext()));
}
- Parser.Lex(); // Eat the token.
+ Lex(); // Eat the token.
return MatchOperand_Success;
}
@@ -2983,51 +2976,50 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
/// a shift suffix, for example '#1, lsl #12'.
OperandMatchResultTy
AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- if (Parser.getTok().is(AsmToken::Hash))
- Parser.Lex(); // Eat '#'
- else if (Parser.getTok().isNot(AsmToken::Integer))
+ if (getTok().is(AsmToken::Hash))
+ Lex(); // Eat '#'
+ else if (getTok().isNot(AsmToken::Integer))
// Operand should start from # or should be integer, emit error otherwise.
return MatchOperand_NoMatch;
const MCExpr *Imm = nullptr;
if (parseSymbolicImmVal(Imm))
return MatchOperand_ParseFail;
- else if (Parser.getTok().isNot(AsmToken::Comma)) {
+ else if (getTok().isNot(AsmToken::Comma)) {
Operands.push_back(
AArch64Operand::CreateImm(Imm, S, getLoc(), getContext()));
return MatchOperand_Success;
}
// Eat ','
- Parser.Lex();
+ Lex();
// The optional operand must be "lsl #N" where N is non-negative.
- if (!Parser.getTok().is(AsmToken::Identifier) ||
- !Parser.getTok().getIdentifier().equals_insensitive("lsl")) {
+ if (!getTok().is(AsmToken::Identifier) ||
+ !getTok().getIdentifier().equals_insensitive("lsl")) {
Error(getLoc(), "only 'lsl #+N' valid after immediate");
return MatchOperand_ParseFail;
}
// Eat 'lsl'
- Parser.Lex();
+ Lex();
parseOptionalToken(AsmToken::Hash);
- if (Parser.getTok().isNot(AsmToken::Integer)) {
+ if (getTok().isNot(AsmToken::Integer)) {
Error(getLoc(), "only 'lsl #+N' valid after immediate");
return MatchOperand_ParseFail;
}
- int64_t ShiftAmount = Parser.getTok().getIntVal();
+ int64_t ShiftAmount = getTok().getIntVal();
if (ShiftAmount < 0) {
Error(getLoc(), "positive shift amount required");
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat the number
+ Lex(); // Eat the number
// Just in case the optional lsl #0 is used for immediates other than zero.
if (ShiftAmount == 0 && Imm != nullptr) {
@@ -3085,16 +3077,15 @@ AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
/// parseCondCode - Parse a Condition Code operand.
bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
bool invertCondCode) {
- MCAsmParser &Parser = getParser();
SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
StringRef Cond = Tok.getString();
AArch64CC::CondCode CC = parseCondCodeString(Cond);
if (CC == AArch64CC::Invalid)
return TokError("invalid condition code");
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
if (invertCondCode) {
if (CC == AArch64CC::AL || CC == AArch64CC::NV)
@@ -3109,8 +3100,7 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
OperandMatchResultTy
AArch64AsmParser::tryParseSVCR(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
SMLoc S = getLoc();
if (Tok.isNot(AsmToken::Identifier)) {
@@ -3125,20 +3115,19 @@ AArch64AsmParser::tryParseSVCR(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateSVCR(PStateImm, Tok.getString(), S, getContext()));
- Parser.Lex(); // Eat identifier token.
+ Lex(); // Eat identifier token.
return MatchOperand_Success;
}
OperandMatchResultTy
AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
SMLoc S = getLoc();
StringRef Name = Tok.getString();
if (Name.equals_insensitive("za")) {
- Parser.Lex(); // eat "za"
+ Lex(); // eat "za"
Operands.push_back(AArch64Operand::CreateMatrixRegister(
AArch64::ZA, /*ElementWidth=*/0, MatrixKind::Array, S, getLoc(),
getContext()));
@@ -3176,7 +3165,7 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
}
unsigned ElementWidth = KindRes->second;
- Parser.Lex();
+ Lex();
Operands.push_back(AArch64Operand::CreateMatrixRegister(
Reg, ElementWidth, Kind, S, getLoc(), getContext()));
@@ -3194,8 +3183,7 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
/// them if present.
OperandMatchResultTy
AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
std::string LowerID = Tok.getString().lower();
AArch64_AM::ShiftExtendType ShOp =
StringSwitch<AArch64_AM::ShiftExtendType>(LowerID)
@@ -3218,7 +3206,7 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
return MatchOperand_NoMatch;
SMLoc S = Tok.getLoc();
- Parser.Lex();
+ Lex();
bool Hash = parseOptionalToken(AsmToken::Hash);
@@ -3241,9 +3229,8 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
// Make sure we do actually have a number, identifier or a parenthesized
// expression.
SMLoc E = getLoc();
- if (!Parser.getTok().is(AsmToken::Integer) &&
- !Parser.getTok().is(AsmToken::LParen) &&
- !Parser.getTok().is(AsmToken::Identifier)) {
+ if (!getTok().is(AsmToken::Integer) && !getTok().is(AsmToken::LParen) &&
+ !getTok().is(AsmToken::Identifier)) {
Error(E, "expected integer shift amount");
return MatchOperand_ParseFail;
}
@@ -3309,6 +3296,8 @@ static const struct Extension {
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
+ if (FBS[AArch64::HasV8_0aOps])
+ Str += "ARMv8a";
if (FBS[AArch64::HasV8_1aOps])
Str += "ARMv8.1a";
else if (FBS[AArch64::HasV8_2aOps])
@@ -3323,6 +3312,14 @@ static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
Str += "ARMv8.6a";
else if (FBS[AArch64::HasV8_7aOps])
Str += "ARMv8.7a";
+ else if (FBS[AArch64::HasV9_0aOps])
+ Str += "ARMv9-a";
+ else if (FBS[AArch64::HasV9_1aOps])
+ Str += "ARMv9.1a";
+ else if (FBS[AArch64::HasV9_2aOps])
+ Str += "ARMv9.2a";
+ else if (FBS[AArch64::HasV8_0rOps])
+ Str += "ARMv8r";
else {
SmallVector<std::string, 2> ExtMatches;
for (const auto& Ext : ExtensionMap) {
@@ -3358,14 +3355,13 @@ void AArch64AsmParser::createSysAlias(uint16_t Encoding, OperandVector &Operands
/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
- if (Name.find('.') != StringRef::npos)
+ if (Name.contains('.'))
return TokError("invalid operand");
Mnemonic = Name;
Operands.push_back(AArch64Operand::CreateToken("sys", NameLoc, getContext()));
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
StringRef Op = Tok.getString();
SMLoc S = Tok.getLoc();
@@ -3376,7 +3372,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!IC->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("IC " + std::string(IC->Name) + " requires: ");
setRequiredFeatureString(IC->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(IC->Encoding, Operands, S);
} else if (Mnemonic == "dc") {
@@ -3386,7 +3382,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!DC->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("DC " + std::string(DC->Name) + " requires: ");
setRequiredFeatureString(DC->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(DC->Encoding, Operands, S);
} else if (Mnemonic == "at") {
@@ -3396,7 +3392,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!AT->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("AT " + std::string(AT->Name) + " requires: ");
setRequiredFeatureString(AT->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(AT->Encoding, Operands, S);
} else if (Mnemonic == "tlbi") {
@@ -3406,7 +3402,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
else if (!TLBI->haveFeatures(getSTI().getFeatureBits())) {
std::string Str("TLBI " + std::string(TLBI->Name) + " requires: ");
setRequiredFeatureString(TLBI->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
createSysAlias(TLBI->Encoding, Operands, S);
} else if (Mnemonic == "cfp" || Mnemonic == "dvp" || Mnemonic == "cpp") {
@@ -3417,7 +3413,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
std::string Str(
Mnemonic.upper() + std::string(PRCTX->Name) + " requires: ");
setRequiredFeatureString(PRCTX->getRequiredFeatures(), Str);
- return TokError(Str.c_str());
+ return TokError(Str);
}
uint16_t PRCTX_Op2 =
Mnemonic == "cfp" ? 4 :
@@ -3428,7 +3424,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
createSysAlias(PRCTX->Encoding << 3 | PRCTX_Op2 , Operands, S);
}
- Parser.Lex(); // Eat operand.
+ Lex(); // Eat operand.
bool ExpectRegister = (Op.lower().find("all") == StringRef::npos);
bool HasRegister = false;
@@ -3454,7 +3450,7 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
OperandMatchResultTy
AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Mnemonic == "tsb" && Tok.isNot(AsmToken::Identifier)) {
TokError("'csync' operand expected");
@@ -3519,15 +3515,14 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateBarrier(
DB ? DB->Encoding : TSB->Encoding, Tok.getString(), getLoc(),
getContext(), false /*hasnXSModifier*/));
- Parser.Lex(); // Consume the option
+ Lex(); // Consume the option
return MatchOperand_Success;
}
OperandMatchResultTy
AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands");
if (Mnemonic != "dsb")
@@ -3574,15 +3569,14 @@ AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(),
getContext(), true /*hasnXSModifier*/));
- Parser.Lex(); // Consume the option
+ Lex(); // Consume the option
return MatchOperand_Success;
}
OperandMatchResultTy
AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -3606,15 +3600,14 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateSysReg(Tok.getString(), getLoc(), MRSReg, MSRReg,
PStateImm, getContext()));
- Parser.Lex(); // Eat identifier
+ Lex(); // Eat identifier
return MatchOperand_Success;
}
/// tryParseNeonVectorRegister - Parse a vector register operand.
bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- if (Parser.getTok().isNot(AsmToken::Identifier))
+ if (getTok().isNot(AsmToken::Identifier))
return true;
SMLoc S = getLoc();
@@ -3675,8 +3668,7 @@ AArch64AsmParser::tryParseVectorIndex(OperandVector &Operands) {
OperandMatchResultTy
AArch64AsmParser::tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
RegKind MatchKind) {
- MCAsmParser &Parser = getParser();
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return MatchOperand_NoMatch;
@@ -3696,7 +3688,7 @@ AArch64AsmParser::tryParseVectorRegister(unsigned &Reg, StringRef &Kind,
return MatchOperand_ParseFail;
}
}
- Parser.Lex(); // Eat the register token.
+ Lex(); // Eat the register token.
Reg = RegNum;
return MatchOperand_Success;
@@ -3733,8 +3725,7 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
}
// Not all predicates are followed by a '/m' or '/z'.
- MCAsmParser &Parser = getParser();
- if (Parser.getTok().isNot(AsmToken::Slash))
+ if (getTok().isNot(AsmToken::Slash))
return MatchOperand_Success;
// But when they do they shouldn't have an element type suffix.
@@ -3746,10 +3737,10 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
// Add a literal slash as operand
Operands.push_back(AArch64Operand::CreateToken("/", getLoc(), getContext()));
- Parser.Lex(); // Eat the slash.
+ Lex(); // Eat the slash.
// Zeroing or merging?
- auto Pred = Parser.getTok().getString().lower();
+ auto Pred = getTok().getString().lower();
if (Pred != "z" && Pred != "m") {
Error(getLoc(), "expecting 'm' or 'z' predication");
return MatchOperand_ParseFail;
@@ -3759,7 +3750,7 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
const char *ZM = Pred == "z" ? "z" : "m";
Operands.push_back(AArch64Operand::CreateToken(ZM, getLoc(), getContext()));
- Parser.Lex(); // Eat zero/merge token.
+ Lex(); // Eat zero/merge token.
return MatchOperand_Success;
}
@@ -3777,17 +3768,16 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
}
bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
- MCAsmParser &Parser = getParser();
bool HasELFModifier = false;
AArch64MCExpr::VariantKind RefKind;
if (parseOptionalToken(AsmToken::Colon)) {
HasELFModifier = true;
- if (Parser.getTok().isNot(AsmToken::Identifier))
+ if (getTok().isNot(AsmToken::Identifier))
return TokError("expect relocation specifier in operand after ':'");
- std::string LowerCase = Parser.getTok().getIdentifier().lower();
+ std::string LowerCase = getTok().getIdentifier().lower();
RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
.Case("lo12", AArch64MCExpr::VK_LO12)
.Case("abs_g3", AArch64MCExpr::VK_ABS_G3)
@@ -3840,7 +3830,7 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
if (RefKind == AArch64MCExpr::VK_INVALID)
return TokError("expect relocation specifier in operand after ':'");
- Parser.Lex(); // Eat identifier
+ Lex(); // Eat identifier
if (parseToken(AsmToken::Colon, "expect ':' after relocation specifier"))
return true;
@@ -3857,14 +3847,11 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
OperandMatchResultTy
AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
-
- if (Parser.getTok().isNot(AsmToken::LCurly))
+ if (getTok().isNot(AsmToken::LCurly))
return MatchOperand_NoMatch;
- auto ParseMatrixTile = [this, &Parser](unsigned &Reg,
- unsigned &ElementWidth) {
- StringRef Name = Parser.getTok().getString();
+ auto ParseMatrixTile = [this](unsigned &Reg, unsigned &ElementWidth) {
+ StringRef Name = getTok().getString();
size_t DotPosition = Name.find('.');
if (DotPosition == StringRef::npos)
return MatchOperand_NoMatch;
@@ -3882,13 +3869,13 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
}
ElementWidth = KindRes->second;
Reg = RegNum;
- Parser.Lex(); // Eat the register.
+ Lex(); // Eat the register.
return MatchOperand_Success;
};
SMLoc S = getLoc();
- auto LCurly = Parser.getTok();
- Parser.Lex(); // Eat left bracket token.
+ auto LCurly = getTok();
+ Lex(); // Eat left bracket token.
// Empty matrix list
if (parseOptionalToken(AsmToken::RCurly)) {
@@ -3898,8 +3885,8 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
}
// Try parse {za} alias early
- if (Parser.getTok().getString().equals_insensitive("za")) {
- Parser.Lex(); // Eat 'za'
+ if (getTok().getString().equals_insensitive("za")) {
+ Lex(); // Eat 'za'
if (parseToken(AsmToken::RCurly, "'}' expected"))
return MatchOperand_ParseFail;
@@ -3914,7 +3901,7 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
unsigned FirstReg, ElementWidth;
auto ParseRes = ParseMatrixTile(FirstReg, ElementWidth);
if (ParseRes != MatchOperand_Success) {
- Parser.getLexer().UnLex(LCurly);
+ getLexer().UnLex(LCurly);
return ParseRes;
}
@@ -3974,13 +3961,13 @@ OperandMatchResultTy
AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
bool ExpectMatch) {
MCAsmParser &Parser = getParser();
- if (!Parser.getTok().is(AsmToken::LCurly))
+ if (!getTok().is(AsmToken::LCurly))
return MatchOperand_NoMatch;
// Wrapper around parse function
- auto ParseVector = [this, &Parser](unsigned &Reg, StringRef &Kind, SMLoc Loc,
- bool NoMatchIsError) {
- auto RegTok = Parser.getTok();
+ auto ParseVector = [this](unsigned &Reg, StringRef &Kind, SMLoc Loc,
+ bool NoMatchIsError) {
+ auto RegTok = getTok();
auto ParseRes = tryParseVectorRegister(Reg, Kind, VectorKind);
if (ParseRes == MatchOperand_Success) {
if (parseVectorKind(Kind, VectorKind))
@@ -4000,8 +3987,8 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
};
SMLoc S = getLoc();
- auto LCurly = Parser.getTok();
- Parser.Lex(); // Eat left bracket token.
+ auto LCurly = getTok();
+ Lex(); // Eat left bracket token.
StringRef Kind;
unsigned FirstReg;
@@ -4117,7 +4104,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
parseOptionalToken(AsmToken::Hash);
- if (getParser().getTok().isNot(AsmToken::Integer)) {
+ if (getTok().isNot(AsmToken::Integer)) {
Error(getLoc(), "index must be absent or #0");
return MatchOperand_ParseFail;
}
@@ -4145,14 +4132,14 @@ AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) {
return Res;
// No shift/extend is the default.
- if (!ParseShiftExtend || getParser().getTok().isNot(AsmToken::Comma)) {
+ if (!ParseShiftExtend || getTok().isNot(AsmToken::Comma)) {
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::Scalar, StartLoc, getLoc(), getContext(), EqTy));
return MatchOperand_Success;
}
// Eat the comma
- getParser().Lex();
+ Lex();
// Match the shift
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> ExtOpnd;
@@ -4178,23 +4165,23 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
bool NextIsVL =
Parser.getLexer().peekTok().getString().equals_insensitive("vl");
bool NextIsHash = Parser.getLexer().peekTok().is(AsmToken::Hash);
- if (!Parser.getTok().getString().equals_insensitive("mul") ||
+ if (!getTok().getString().equals_insensitive("mul") ||
!(NextIsVL || NextIsHash))
return true;
Operands.push_back(
AArch64Operand::CreateToken("mul", getLoc(), getContext()));
- Parser.Lex(); // Eat the "mul"
+ Lex(); // Eat the "mul"
if (NextIsVL) {
Operands.push_back(
AArch64Operand::CreateToken("vl", getLoc(), getContext()));
- Parser.Lex(); // Eat the "vl"
+ Lex(); // Eat the "vl"
return false;
}
if (NextIsHash) {
- Parser.Lex(); // Eat the #
+ Lex(); // Eat the #
SMLoc S = getLoc();
// Parse immediate operand.
@@ -4212,8 +4199,7 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
}
bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
- auto Tok = Parser.getTok();
+ auto Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return true;
@@ -4225,7 +4211,7 @@ bool AArch64AsmParser::parseKeywordOperand(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateToken(Keyword, Tok.getLoc(), getContext()));
- Parser.Lex();
+ Lex();
return false;
}
@@ -4264,7 +4250,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
case AsmToken::LBrac: {
Operands.push_back(
AArch64Operand::CreateToken("[", getLoc(), getContext()));
- Parser.Lex(); // Eat '['
+ Lex(); // Eat '['
// There's no comma after a '[', so we can parse the next operand
// immediately.
@@ -4276,7 +4262,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
Operands.push_back(
AArch64Operand::CreateToken("{", getLoc(), getContext()));
- Parser.Lex(); // Eat '{'
+ Lex(); // Eat '{'
// There's no comma after a '{', so we can parse the next operand
// immediately.
@@ -4332,18 +4318,18 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
// Parse a negative sign
bool isNegative = false;
- if (Parser.getTok().is(AsmToken::Minus)) {
+ if (getTok().is(AsmToken::Minus)) {
isNegative = true;
// We need to consume this token only when we have a Real, otherwise
// we let parseSymbolicImmVal take care of it
if (Parser.getLexer().peekTok().is(AsmToken::Real))
- Parser.Lex();
+ Lex();
}
// The only Real that should come through here is a literal #0.0 for
// the fcmp[e] r, #0.0 instructions. They expect raw token operands,
// so convert the value.
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.is(AsmToken::Real)) {
APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
@@ -4353,7 +4339,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
return TokError("unexpected floating point literal");
else if (IntVal != 0 || isNegative)
return TokError("expected floating-point constant #0.0");
- Parser.Lex(); // Eat the token.
+ Lex(); // Eat the token.
Operands.push_back(AArch64Operand::CreateToken("#0", S, getContext()));
Operands.push_back(AArch64Operand::CreateToken(".0", S, getContext()));
@@ -4372,7 +4358,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
SMLoc Loc = getLoc();
if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
return TokError("unexpected token in operand");
- Parser.Lex(); // Eat '='
+ Lex(); // Eat '='
const MCExpr *SubExprVal;
if (getParser().parseExpression(SubExprVal))
return true;
@@ -4431,11 +4417,10 @@ bool AArch64AsmParser::parseImmExpr(int64_t &Out) {
}
bool AArch64AsmParser::parseComma() {
- if (check(getParser().getTok().isNot(AsmToken::Comma), getLoc(),
- "expected comma"))
+ if (check(getTok().isNot(AsmToken::Comma), getLoc(), "expected comma"))
return true;
// Eat the comma
- getParser().Lex();
+ Lex();
return false;
}
@@ -4507,7 +4492,6 @@ bool AArch64AsmParser::regsEqual(const MCParsedAsmOperand &Op1,
bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
- MCAsmParser &Parser = getParser();
Name = StringSwitch<StringRef>(Name.lower())
.Case("beq", "b.eq")
.Case("bne", "b.ne")
@@ -4530,8 +4514,8 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
.Default(Name);
// First check for the AArch64-specific .req directive.
- if (Parser.getTok().is(AsmToken::Identifier) &&
- Parser.getTok().getIdentifier().lower() == ".req") {
+ if (getTok().is(AsmToken::Identifier) &&
+ getTok().getIdentifier().lower() == ".req") {
parseDirectiveReq(Name, NameLoc);
// We always return 'error' for this, as we're done with this
// statement and don't need to match the 'instruction."
@@ -5084,6 +5068,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "index must be a multiple of 8 in range [0, 32760].");
case Match_InvalidMemoryIndexed16:
return Error(Loc, "index must be a multiple of 16 in range [0, 65520].");
+ case Match_InvalidImm0_0:
+ return Error(Loc, "immediate must be 0.");
case Match_InvalidImm0_1:
return Error(Loc, "immediate must be an integer in range [0, 1].");
case Match_InvalidImm0_3:
@@ -5128,6 +5114,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
case Match_InvalidSVECpyImm64:
return Error(Loc, "immediate must be an integer in range [-128, 127] or a "
"multiple of 256 in range [-32768, 32512]");
+ case Match_InvalidIndexRange0_0:
+ return Error(Loc, "expected lane specifier '[0]'");
case Match_InvalidIndexRange1_1:
return Error(Loc, "expected lane specifier '[1]'");
case Match_InvalidIndexRange0_15:
@@ -5256,14 +5244,6 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
return Error(Loc, "invalid predicate register.");
case Match_InvalidSVEPredicate3bAnyReg:
return Error(Loc, "invalid restricted predicate register, expected p0..p7 (without element suffix)");
- case Match_InvalidSVEPredicate3bBReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.b..p7.b");
- case Match_InvalidSVEPredicate3bHReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.h..p7.h");
- case Match_InvalidSVEPredicate3bSReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.s..p7.s");
- case Match_InvalidSVEPredicate3bDReg:
- return Error(Loc, "invalid restricted predicate register, expected p0.d..p7.d");
case Match_InvalidSVEExactFPImmOperandHalfOne:
return Error(Loc, "Invalid floating point constant, expected 0.5 or 1.0.");
case Match_InvalidSVEExactFPImmOperandHalfTwo:
@@ -5724,6 +5704,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidMemoryIndexedSImm9:
case Match_InvalidMemoryIndexed16SImm9:
case Match_InvalidMemoryIndexed8SImm10:
+ case Match_InvalidImm0_0:
case Match_InvalidImm0_1:
case Match_InvalidImm0_3:
case Match_InvalidImm0_7:
@@ -5745,6 +5726,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSVECpyImm16:
case Match_InvalidSVECpyImm32:
case Match_InvalidSVECpyImm64:
+ case Match_InvalidIndexRange0_0:
case Match_InvalidIndexRange1_1:
case Match_InvalidIndexRange0_15:
case Match_InvalidIndexRange0_7:
@@ -5811,10 +5793,6 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidSVEPredicateSReg:
case Match_InvalidSVEPredicateDReg:
case Match_InvalidSVEPredicate3bAnyReg:
- case Match_InvalidSVEPredicate3bBReg:
- case Match_InvalidSVEPredicate3bHReg:
- case Match_InvalidSVEPredicate3bSReg:
- case Match_InvalidSVEPredicate3bDReg:
case Match_InvalidSVEExactFPImmOperandHalfOne:
case Match_InvalidSVEExactFPImmOperandHalfTwo:
case Match_InvalidSVEExactFPImmOperandZeroOne:
@@ -5958,6 +5936,9 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV9A:
+ case AArch64::ArchKind::ARMV9_1A:
+ case AArch64::ArchKind::ARMV9_2A:
case AArch64::ArchKind::ARMV8R:
RequestedExtensions.push_back("sm4");
RequestedExtensions.push_back("sha3");
@@ -5980,6 +5961,9 @@ static void ExpandCryptoAEK(AArch64::ArchKind ArchKind,
case AArch64::ArchKind::ARMV8_5A:
case AArch64::ArchKind::ARMV8_6A:
case AArch64::ArchKind::ARMV8_7A:
+ case AArch64::ArchKind::ARMV9A:
+ case AArch64::ArchKind::ARMV9_1A:
+ case AArch64::ArchKind::ARMV9_2A:
RequestedExtensions.push_back("nosm4");
RequestedExtensions.push_back("nosha3");
RequestedExtensions.push_back("nosha2");
@@ -6206,12 +6190,12 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
/// The number of arguments depends on the loh identifier.
bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
MCLOHType Kind;
- if (getParser().getTok().isNot(AsmToken::Identifier)) {
- if (getParser().getTok().isNot(AsmToken::Integer))
+ if (getTok().isNot(AsmToken::Identifier)) {
+ if (getTok().isNot(AsmToken::Integer))
return TokError("expected an identifier or a number in directive");
// We successfully get a numeric value for the identifier.
// Check if it is valid.
- int64_t Id = getParser().getTok().getIntVal();
+ int64_t Id = getTok().getIntVal();
if (Id <= -1U && !isValidMCLOHType(Id))
return TokError("invalid numeric identifier in directive");
Kind = (MCLOHType)Id;
@@ -6265,8 +6249,7 @@ bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
/// parseDirectiveReq
/// ::= name .req registername
bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
- MCAsmParser &Parser = getParser();
- Parser.Lex(); // Eat the '.req' token.
+ Lex(); // Eat the '.req' token.
SMLoc SRegLoc = getLoc();
RegKind RegisterKind = RegKind::Scalar;
unsigned RegNum;
@@ -6329,11 +6312,10 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
/// parseDirectiveUneq
/// ::= .unreq registername
bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
- MCAsmParser &Parser = getParser();
if (getTok().isNot(AsmToken::Identifier))
return TokError("unexpected input in .unreq directive.");
- RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
- Parser.Lex(); // Eat the identifier.
+ RegisterReqs.erase(getTok().getIdentifier().lower());
+ Lex(); // Eat the identifier.
return parseToken(AsmToken::EndOfStatement);
}
@@ -6357,9 +6339,7 @@ bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() {
/// parseDirectiveVariantPCS
/// ::= .variant_pcs symbolname
bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
- MCAsmParser &Parser = getParser();
-
- const AsmToken &Tok = Parser.getTok();
+ const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
return TokError("expected symbol name");
@@ -6369,7 +6349,7 @@ bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) {
if (!Sym)
return TokError("unknown symbol");
- Parser.Lex(); // Eat the symbol
+ Lex(); // Eat the symbol
if (parseEOL())
return true;
@@ -6741,7 +6721,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
SMLoc S = getLoc();
- if (getParser().getTok().isNot(AsmToken::Identifier)) {
+ if (getTok().isNot(AsmToken::Identifier)) {
Error(S, "expected register");
return MatchOperand_ParseFail;
}
@@ -6773,12 +6753,12 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
return MatchOperand_ParseFail;
}
- if (getParser().getTok().isNot(AsmToken::Comma)) {
+ if (getTok().isNot(AsmToken::Comma)) {
Error(getLoc(), "expected comma");
return MatchOperand_ParseFail;
}
// Eat the comma
- getParser().Lex();
+ Lex();
SMLoc E = getLoc();
unsigned SecondReg;
@@ -6833,7 +6813,7 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
unsigned ElementWidth = KindRes->second;
// No shift/extend is the default.
- if (!ParseShiftExtend || getParser().getTok().isNot(AsmToken::Comma)) {
+ if (!ParseShiftExtend || getTok().isNot(AsmToken::Comma)) {
Operands.push_back(AArch64Operand::CreateVectorReg(
RegNum, RegKind::SVEDataVector, ElementWidth, S, S, getContext()));
@@ -6844,7 +6824,7 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
}
// Eat the comma
- getParser().Lex();
+ Lex();
// Match the shift
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> ExtOpnd;
@@ -6866,7 +6846,7 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc SS = getLoc();
- const AsmToken &TokE = Parser.getTok();
+ const AsmToken &TokE = getTok();
bool IsHash = TokE.is(AsmToken::Hash);
if (!IsHash && TokE.isNot(AsmToken::Identifier))
@@ -6874,7 +6854,7 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
int64_t Pattern;
if (IsHash) {
- Parser.Lex(); // Eat hash
+ Lex(); // Eat hash
// Parse the immediate operand.
const MCExpr *ImmVal;
@@ -6893,7 +6873,7 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
if (!Pat)
return MatchOperand_NoMatch;
- Parser.Lex();
+ Lex();
Pattern = Pat->Encoding;
assert(Pattern >= 0 && Pattern < 32);
}
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 1ed8a80a4600..96d410e42be2 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -21,10 +21,10 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
#include <memory>
@@ -225,13 +225,12 @@ static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Addr,
const void *Decoder);
-static DecodeStatus DecodeSVELogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-template<int Bits>
-static DecodeStatus DecodeSImm(llvm::MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder);
+template <int Bits>
+static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+ const void *Decoder);
template <int ElementWidth>
static DecodeStatus DecodeImm8OptLsl(MCInst &Inst, unsigned Imm,
uint64_t Addr, const void *Decoder);
@@ -324,6 +323,33 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
// ^ insert implicit 8-bit element tile
MI.insert(MI.begin()+2, MCOperand::createReg(AArch64::ZAB0));
break;
+ case AArch64::LD1_MXIPXX_H_Q:
+ case AArch64::LD1_MXIPXX_V_Q:
+ case AArch64::ST1_MXIPXX_H_Q:
+ case AArch64::ST1_MXIPXX_V_Q:
+ // 128-bit load/store have implicit zero vector index.
+ MI.insert(MI.begin()+2, MCOperand::createImm(0));
+ break;
+ // 128-bit mova have implicit zero vector index.
+ case AArch64::INSERT_MXIPZ_H_Q:
+ case AArch64::INSERT_MXIPZ_V_Q:
+ MI.insert(MI.begin()+2, MCOperand::createImm(0));
+ break;
+ case AArch64::EXTRACT_ZPMXI_H_Q:
+ case AArch64::EXTRACT_ZPMXI_V_Q:
+ MI.addOperand(MCOperand::createImm(0));
+ break;
+ case AArch64::SMOVvi8to32_idx0:
+ case AArch64::SMOVvi8to64_idx0:
+ case AArch64::SMOVvi16to32_idx0:
+ case AArch64::SMOVvi16to64_idx0:
+ case AArch64::SMOVvi32to64_idx0:
+ case AArch64::UMOVvi8_idx0:
+ case AArch64::UMOVvi16_idx0:
+ case AArch64::UMOVvi32_idx0:
+ case AArch64::UMOVvi64_idx0:
+ MI.addOperand(MCOperand::createImm(0));
+ break;
}
if (Result != MCDisassembler::Fail)
@@ -366,23 +392,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Disassembler() {
createAArch64ExternalSymbolizer);
}
-static const unsigned FPR128DecoderTable[] = {
- AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
- AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
- AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
- AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
- AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
- AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
- AArch64::Q30, AArch64::Q31
-};
-
static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR128DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR128RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -395,107 +412,63 @@ static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
}
-static const unsigned FPR64DecoderTable[] = {
- AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
- AArch64::D5, AArch64::D6, AArch64::D7, AArch64::D8, AArch64::D9,
- AArch64::D10, AArch64::D11, AArch64::D12, AArch64::D13, AArch64::D14,
- AArch64::D15, AArch64::D16, AArch64::D17, AArch64::D18, AArch64::D19,
- AArch64::D20, AArch64::D21, AArch64::D22, AArch64::D23, AArch64::D24,
- AArch64::D25, AArch64::D26, AArch64::D27, AArch64::D28, AArch64::D29,
- AArch64::D30, AArch64::D31
-};
-
static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR64DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR64RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned FPR32DecoderTable[] = {
- AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
- AArch64::S5, AArch64::S6, AArch64::S7, AArch64::S8, AArch64::S9,
- AArch64::S10, AArch64::S11, AArch64::S12, AArch64::S13, AArch64::S14,
- AArch64::S15, AArch64::S16, AArch64::S17, AArch64::S18, AArch64::S19,
- AArch64::S20, AArch64::S21, AArch64::S22, AArch64::S23, AArch64::S24,
- AArch64::S25, AArch64::S26, AArch64::S27, AArch64::S28, AArch64::S29,
- AArch64::S30, AArch64::S31
-};
-
static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR32DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR32RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned FPR16DecoderTable[] = {
- AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
- AArch64::H5, AArch64::H6, AArch64::H7, AArch64::H8, AArch64::H9,
- AArch64::H10, AArch64::H11, AArch64::H12, AArch64::H13, AArch64::H14,
- AArch64::H15, AArch64::H16, AArch64::H17, AArch64::H18, AArch64::H19,
- AArch64::H20, AArch64::H21, AArch64::H22, AArch64::H23, AArch64::H24,
- AArch64::H25, AArch64::H26, AArch64::H27, AArch64::H28, AArch64::H29,
- AArch64::H30, AArch64::H31
-};
-
static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR16DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR16RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned FPR8DecoderTable[] = {
- AArch64::B0, AArch64::B1, AArch64::B2, AArch64::B3, AArch64::B4,
- AArch64::B5, AArch64::B6, AArch64::B7, AArch64::B8, AArch64::B9,
- AArch64::B10, AArch64::B11, AArch64::B12, AArch64::B13, AArch64::B14,
- AArch64::B15, AArch64::B16, AArch64::B17, AArch64::B18, AArch64::B19,
- AArch64::B20, AArch64::B21, AArch64::B22, AArch64::B23, AArch64::B24,
- AArch64::B25, AArch64::B26, AArch64::B27, AArch64::B28, AArch64::B29,
- AArch64::B30, AArch64::B31
-};
-
static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = FPR8DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::FPR8RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned GPR64DecoderTable[] = {
- AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
- AArch64::X5, AArch64::X6, AArch64::X7, AArch64::X8, AArch64::X9,
- AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14,
- AArch64::X15, AArch64::X16, AArch64::X17, AArch64::X18, AArch64::X19,
- AArch64::X20, AArch64::X21, AArch64::X22, AArch64::X23, AArch64::X24,
- AArch64::X25, AArch64::X26, AArch64::X27, AArch64::X28, AArch64::FP,
- AArch64::LR, AArch64::XZR
-};
-
static DecodeStatus DecodeGPR64commonRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 30)
return Fail;
- unsigned Register = GPR64DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64commonRegClassID].getRegister(
+ RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -506,26 +479,12 @@ static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
- unsigned Register = GPR64DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned GPR64x8DecoderTable[] = {
- AArch64::X0_X1_X2_X3_X4_X5_X6_X7,
- AArch64::X2_X3_X4_X5_X6_X7_X8_X9,
- AArch64::X4_X5_X6_X7_X8_X9_X10_X11,
- AArch64::X6_X7_X8_X9_X10_X11_X12_X13,
- AArch64::X8_X9_X10_X11_X12_X13_X14_X15,
- AArch64::X10_X11_X12_X13_X14_X15_X16_X17,
- AArch64::X12_X13_X14_X15_X16_X17_X18_X19,
- AArch64::X14_X15_X16_X17_X18_X19_X20_X21,
- AArch64::X16_X17_X18_X19_X20_X21_X22_X23,
- AArch64::X18_X19_X20_X21_X22_X23_X24_X25,
- AArch64::X20_X21_X22_X23_X24_X25_X26_X27,
- AArch64::X22_X23_X24_X25_X26_X27_X28_FP,
-};
-
static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -535,7 +494,9 @@ static DecodeStatus DecodeGPR64x8ClassRegisterClass(MCInst &Inst,
if (RegNo & 1)
return Fail;
- unsigned Register = GPR64x8DecoderTable[RegNo >> 1];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID].getRegister(
+ RegNo >> 1);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -545,17 +506,12 @@ static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = GPR64DecoderTable[RegNo];
- if (Register == AArch64::XZR)
- Register = AArch64::SP;
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned MatrixIndexGPR32_12_15DecoderTable[] = {
- AArch64::W12, AArch64::W13, AArch64::W14, AArch64::W15
-};
-
static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Addr,
@@ -563,28 +519,21 @@ static DecodeStatus DecodeMatrixIndexGPR32_12_15RegisterClass(MCInst &Inst,
if (RegNo > 3)
return Fail;
- unsigned Register = MatrixIndexGPR32_12_15DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::MatrixIndexGPR32_12_15RegClassID]
+ .getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned GPR32DecoderTable[] = {
- AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
- AArch64::W5, AArch64::W6, AArch64::W7, AArch64::W8, AArch64::W9,
- AArch64::W10, AArch64::W11, AArch64::W12, AArch64::W13, AArch64::W14,
- AArch64::W15, AArch64::W16, AArch64::W17, AArch64::W18, AArch64::W19,
- AArch64::W20, AArch64::W21, AArch64::W22, AArch64::W23, AArch64::W24,
- AArch64::W25, AArch64::W26, AArch64::W27, AArch64::W28, AArch64::W29,
- AArch64::W30, AArch64::WZR
-};
-
static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = GPR32DecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR32RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -595,22 +544,11 @@ static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
- unsigned Register = GPR32DecoderTable[RegNo];
- if (Register == AArch64::WZR)
- Register = AArch64::WSP;
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::GPR32spRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned ZPRDecoderTable[] = {
- AArch64::Z0, AArch64::Z1, AArch64::Z2, AArch64::Z3,
- AArch64::Z4, AArch64::Z5, AArch64::Z6, AArch64::Z7,
- AArch64::Z8, AArch64::Z9, AArch64::Z10, AArch64::Z11,
- AArch64::Z12, AArch64::Z13, AArch64::Z14, AArch64::Z15,
- AArch64::Z16, AArch64::Z17, AArch64::Z18, AArch64::Z19,
- AArch64::Z20, AArch64::Z21, AArch64::Z22, AArch64::Z23,
- AArch64::Z24, AArch64::Z25, AArch64::Z26, AArch64::Z27,
- AArch64::Z28, AArch64::Z29, AArch64::Z30, AArch64::Z31
-};
static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
@@ -618,7 +556,8 @@ static DecodeStatus DecodeZPRRegisterClass(MCInst &Inst, unsigned RegNo,
if (RegNo > 31)
return Fail;
- unsigned Register = ZPRDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPRRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -639,71 +578,35 @@ static DecodeStatus DecodeZPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodeZPRRegisterClass(Inst, RegNo, Address, Decoder);
}
-static const unsigned ZZDecoderTable[] = {
- AArch64::Z0_Z1, AArch64::Z1_Z2, AArch64::Z2_Z3, AArch64::Z3_Z4,
- AArch64::Z4_Z5, AArch64::Z5_Z6, AArch64::Z6_Z7, AArch64::Z7_Z8,
- AArch64::Z8_Z9, AArch64::Z9_Z10, AArch64::Z10_Z11, AArch64::Z11_Z12,
- AArch64::Z12_Z13, AArch64::Z13_Z14, AArch64::Z14_Z15, AArch64::Z15_Z16,
- AArch64::Z16_Z17, AArch64::Z17_Z18, AArch64::Z18_Z19, AArch64::Z19_Z20,
- AArch64::Z20_Z21, AArch64::Z21_Z22, AArch64::Z22_Z23, AArch64::Z23_Z24,
- AArch64::Z24_Z25, AArch64::Z25_Z26, AArch64::Z26_Z27, AArch64::Z27_Z28,
- AArch64::Z28_Z29, AArch64::Z29_Z30, AArch64::Z30_Z31, AArch64::Z31_Z0
-};
-
static DecodeStatus DecodeZPR2RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void* Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = ZZDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPR2RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned ZZZDecoderTable[] = {
- AArch64::Z0_Z1_Z2, AArch64::Z1_Z2_Z3, AArch64::Z2_Z3_Z4,
- AArch64::Z3_Z4_Z5, AArch64::Z4_Z5_Z6, AArch64::Z5_Z6_Z7,
- AArch64::Z6_Z7_Z8, AArch64::Z7_Z8_Z9, AArch64::Z8_Z9_Z10,
- AArch64::Z9_Z10_Z11, AArch64::Z10_Z11_Z12, AArch64::Z11_Z12_Z13,
- AArch64::Z12_Z13_Z14, AArch64::Z13_Z14_Z15, AArch64::Z14_Z15_Z16,
- AArch64::Z15_Z16_Z17, AArch64::Z16_Z17_Z18, AArch64::Z17_Z18_Z19,
- AArch64::Z18_Z19_Z20, AArch64::Z19_Z20_Z21, AArch64::Z20_Z21_Z22,
- AArch64::Z21_Z22_Z23, AArch64::Z22_Z23_Z24, AArch64::Z23_Z24_Z25,
- AArch64::Z24_Z25_Z26, AArch64::Z25_Z26_Z27, AArch64::Z26_Z27_Z28,
- AArch64::Z27_Z28_Z29, AArch64::Z28_Z29_Z30, AArch64::Z29_Z30_Z31,
- AArch64::Z30_Z31_Z0, AArch64::Z31_Z0_Z1
-};
-
static DecodeStatus DecodeZPR3RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void* Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = ZZZDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPR3RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned ZZZZDecoderTable[] = {
- AArch64::Z0_Z1_Z2_Z3, AArch64::Z1_Z2_Z3_Z4, AArch64::Z2_Z3_Z4_Z5,
- AArch64::Z3_Z4_Z5_Z6, AArch64::Z4_Z5_Z6_Z7, AArch64::Z5_Z6_Z7_Z8,
- AArch64::Z6_Z7_Z8_Z9, AArch64::Z7_Z8_Z9_Z10, AArch64::Z8_Z9_Z10_Z11,
- AArch64::Z9_Z10_Z11_Z12, AArch64::Z10_Z11_Z12_Z13, AArch64::Z11_Z12_Z13_Z14,
- AArch64::Z12_Z13_Z14_Z15, AArch64::Z13_Z14_Z15_Z16, AArch64::Z14_Z15_Z16_Z17,
- AArch64::Z15_Z16_Z17_Z18, AArch64::Z16_Z17_Z18_Z19, AArch64::Z17_Z18_Z19_Z20,
- AArch64::Z18_Z19_Z20_Z21, AArch64::Z19_Z20_Z21_Z22, AArch64::Z20_Z21_Z22_Z23,
- AArch64::Z21_Z22_Z23_Z24, AArch64::Z22_Z23_Z24_Z25, AArch64::Z23_Z24_Z25_Z26,
- AArch64::Z24_Z25_Z26_Z27, AArch64::Z25_Z26_Z27_Z28, AArch64::Z26_Z27_Z28_Z29,
- AArch64::Z27_Z28_Z29_Z30, AArch64::Z28_Z29_Z30_Z31, AArch64::Z29_Z30_Z31_Z0,
- AArch64::Z30_Z31_Z0_Z1, AArch64::Z31_Z0_Z1_Z2
-};
-
static DecodeStatus DecodeZPR4RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void* Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = ZZZZDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::ZPR4RegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -741,19 +644,13 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
return Success;
}
-static const unsigned PPRDecoderTable[] = {
- AArch64::P0, AArch64::P1, AArch64::P2, AArch64::P3,
- AArch64::P4, AArch64::P5, AArch64::P6, AArch64::P7,
- AArch64::P8, AArch64::P9, AArch64::P10, AArch64::P11,
- AArch64::P12, AArch64::P13, AArch64::P14, AArch64::P15
-};
-
static DecodeStatus DecodePPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 15)
return Fail;
- unsigned Register = PPRDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::PPRRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -768,157 +665,64 @@ static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo,
return DecodePPRRegisterClass(Inst, RegNo, Addr, Decoder);
}
-static const unsigned VectorDecoderTable[] = {
- AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
- AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
- AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
- AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
- AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
- AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
- AArch64::Q30, AArch64::Q31
-};
-
-static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = VectorDecoderTable[RegNo];
- Inst.addOperand(MCOperand::createReg(Register));
- return Success;
-}
-
-static const unsigned QQDecoderTable[] = {
- AArch64::Q0_Q1, AArch64::Q1_Q2, AArch64::Q2_Q3, AArch64::Q3_Q4,
- AArch64::Q4_Q5, AArch64::Q5_Q6, AArch64::Q6_Q7, AArch64::Q7_Q8,
- AArch64::Q8_Q9, AArch64::Q9_Q10, AArch64::Q10_Q11, AArch64::Q11_Q12,
- AArch64::Q12_Q13, AArch64::Q13_Q14, AArch64::Q14_Q15, AArch64::Q15_Q16,
- AArch64::Q16_Q17, AArch64::Q17_Q18, AArch64::Q18_Q19, AArch64::Q19_Q20,
- AArch64::Q20_Q21, AArch64::Q21_Q22, AArch64::Q22_Q23, AArch64::Q23_Q24,
- AArch64::Q24_Q25, AArch64::Q25_Q26, AArch64::Q26_Q27, AArch64::Q27_Q28,
- AArch64::Q28_Q29, AArch64::Q29_Q30, AArch64::Q30_Q31, AArch64::Q31_Q0
-};
-
static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = QQDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::QQRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned QQQDecoderTable[] = {
- AArch64::Q0_Q1_Q2, AArch64::Q1_Q2_Q3, AArch64::Q2_Q3_Q4,
- AArch64::Q3_Q4_Q5, AArch64::Q4_Q5_Q6, AArch64::Q5_Q6_Q7,
- AArch64::Q6_Q7_Q8, AArch64::Q7_Q8_Q9, AArch64::Q8_Q9_Q10,
- AArch64::Q9_Q10_Q11, AArch64::Q10_Q11_Q12, AArch64::Q11_Q12_Q13,
- AArch64::Q12_Q13_Q14, AArch64::Q13_Q14_Q15, AArch64::Q14_Q15_Q16,
- AArch64::Q15_Q16_Q17, AArch64::Q16_Q17_Q18, AArch64::Q17_Q18_Q19,
- AArch64::Q18_Q19_Q20, AArch64::Q19_Q20_Q21, AArch64::Q20_Q21_Q22,
- AArch64::Q21_Q22_Q23, AArch64::Q22_Q23_Q24, AArch64::Q23_Q24_Q25,
- AArch64::Q24_Q25_Q26, AArch64::Q25_Q26_Q27, AArch64::Q26_Q27_Q28,
- AArch64::Q27_Q28_Q29, AArch64::Q28_Q29_Q30, AArch64::Q29_Q30_Q31,
- AArch64::Q30_Q31_Q0, AArch64::Q31_Q0_Q1
-};
-
static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = QQQDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::QQQRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned QQQQDecoderTable[] = {
- AArch64::Q0_Q1_Q2_Q3, AArch64::Q1_Q2_Q3_Q4, AArch64::Q2_Q3_Q4_Q5,
- AArch64::Q3_Q4_Q5_Q6, AArch64::Q4_Q5_Q6_Q7, AArch64::Q5_Q6_Q7_Q8,
- AArch64::Q6_Q7_Q8_Q9, AArch64::Q7_Q8_Q9_Q10, AArch64::Q8_Q9_Q10_Q11,
- AArch64::Q9_Q10_Q11_Q12, AArch64::Q10_Q11_Q12_Q13, AArch64::Q11_Q12_Q13_Q14,
- AArch64::Q12_Q13_Q14_Q15, AArch64::Q13_Q14_Q15_Q16, AArch64::Q14_Q15_Q16_Q17,
- AArch64::Q15_Q16_Q17_Q18, AArch64::Q16_Q17_Q18_Q19, AArch64::Q17_Q18_Q19_Q20,
- AArch64::Q18_Q19_Q20_Q21, AArch64::Q19_Q20_Q21_Q22, AArch64::Q20_Q21_Q22_Q23,
- AArch64::Q21_Q22_Q23_Q24, AArch64::Q22_Q23_Q24_Q25, AArch64::Q23_Q24_Q25_Q26,
- AArch64::Q24_Q25_Q26_Q27, AArch64::Q25_Q26_Q27_Q28, AArch64::Q26_Q27_Q28_Q29,
- AArch64::Q27_Q28_Q29_Q30, AArch64::Q28_Q29_Q30_Q31, AArch64::Q29_Q30_Q31_Q0,
- AArch64::Q30_Q31_Q0_Q1, AArch64::Q31_Q0_Q1_Q2
-};
-
static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = QQQQDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::QQQQRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned DDDecoderTable[] = {
- AArch64::D0_D1, AArch64::D1_D2, AArch64::D2_D3, AArch64::D3_D4,
- AArch64::D4_D5, AArch64::D5_D6, AArch64::D6_D7, AArch64::D7_D8,
- AArch64::D8_D9, AArch64::D9_D10, AArch64::D10_D11, AArch64::D11_D12,
- AArch64::D12_D13, AArch64::D13_D14, AArch64::D14_D15, AArch64::D15_D16,
- AArch64::D16_D17, AArch64::D17_D18, AArch64::D18_D19, AArch64::D19_D20,
- AArch64::D20_D21, AArch64::D21_D22, AArch64::D22_D23, AArch64::D23_D24,
- AArch64::D24_D25, AArch64::D25_D26, AArch64::D26_D27, AArch64::D27_D28,
- AArch64::D28_D29, AArch64::D29_D30, AArch64::D30_D31, AArch64::D31_D0
-};
-
static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = DDDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::DDRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned DDDDecoderTable[] = {
- AArch64::D0_D1_D2, AArch64::D1_D2_D3, AArch64::D2_D3_D4,
- AArch64::D3_D4_D5, AArch64::D4_D5_D6, AArch64::D5_D6_D7,
- AArch64::D6_D7_D8, AArch64::D7_D8_D9, AArch64::D8_D9_D10,
- AArch64::D9_D10_D11, AArch64::D10_D11_D12, AArch64::D11_D12_D13,
- AArch64::D12_D13_D14, AArch64::D13_D14_D15, AArch64::D14_D15_D16,
- AArch64::D15_D16_D17, AArch64::D16_D17_D18, AArch64::D17_D18_D19,
- AArch64::D18_D19_D20, AArch64::D19_D20_D21, AArch64::D20_D21_D22,
- AArch64::D21_D22_D23, AArch64::D22_D23_D24, AArch64::D23_D24_D25,
- AArch64::D24_D25_D26, AArch64::D25_D26_D27, AArch64::D26_D27_D28,
- AArch64::D27_D28_D29, AArch64::D28_D29_D30, AArch64::D29_D30_D31,
- AArch64::D30_D31_D0, AArch64::D31_D0_D1
-};
-
static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr, const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = DDDDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::DDDRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
-static const unsigned DDDDDecoderTable[] = {
- AArch64::D0_D1_D2_D3, AArch64::D1_D2_D3_D4, AArch64::D2_D3_D4_D5,
- AArch64::D3_D4_D5_D6, AArch64::D4_D5_D6_D7, AArch64::D5_D6_D7_D8,
- AArch64::D6_D7_D8_D9, AArch64::D7_D8_D9_D10, AArch64::D8_D9_D10_D11,
- AArch64::D9_D10_D11_D12, AArch64::D10_D11_D12_D13, AArch64::D11_D12_D13_D14,
- AArch64::D12_D13_D14_D15, AArch64::D13_D14_D15_D16, AArch64::D14_D15_D16_D17,
- AArch64::D15_D16_D17_D18, AArch64::D16_D17_D18_D19, AArch64::D17_D18_D19_D20,
- AArch64::D18_D19_D20_D21, AArch64::D19_D20_D21_D22, AArch64::D20_D21_D22_D23,
- AArch64::D21_D22_D23_D24, AArch64::D22_D23_D24_D25, AArch64::D23_D24_D25_D26,
- AArch64::D24_D25_D26_D27, AArch64::D25_D26_D27_D28, AArch64::D26_D27_D28_D29,
- AArch64::D27_D28_D29_D30, AArch64::D28_D29_D30_D31, AArch64::D29_D30_D31_D0,
- AArch64::D30_D31_D0_D1, AArch64::D31_D0_D1_D2
-};
-
static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Addr,
const void *Decoder) {
if (RegNo > 31)
return Fail;
- unsigned Register = DDDDDecoderTable[RegNo];
+ unsigned Register =
+ AArch64MCRegisterClasses[AArch64::DDDDRegClassID].getRegister(RegNo);
Inst.addOperand(MCOperand::createReg(Register));
return Success;
}
@@ -1776,7 +1580,7 @@ static DecodeStatus DecodeModImmInstruction(MCInst &Inst, uint32_t insn,
if (Inst.getOpcode() == AArch64::MOVID)
DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
else
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
Inst.addOperand(MCOperand::createImm(imm));
@@ -1813,8 +1617,8 @@ static DecodeStatus DecodeModImmTiedInstruction(MCInst &Inst, uint32_t insn,
imm |= fieldFromInstruction(insn, 5, 5);
// Tied operands added twice.
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
Inst.addOperand(MCOperand::createImm(imm));
Inst.addOperand(MCOperand::createImm((cmode & 6) << 2));
@@ -1980,8 +1784,7 @@ static DecodeStatus DecodeXSeqPairsClassRegisterClass(MCInst &Inst,
RegNo, Addr, Decoder);
}
-static DecodeStatus DecodeSVELogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn,
+static DecodeStatus DecodeSVELogicalImmInstruction(MCInst &Inst, uint32_t insn,
uint64_t Addr,
const void *Decoder) {
unsigned Zdn = fieldFromInstruction(insn, 0, 5);
@@ -1997,9 +1800,9 @@ static DecodeStatus DecodeSVELogicalImmInstruction(llvm::MCInst &Inst,
return Success;
}
-template<int Bits>
-static DecodeStatus DecodeSImm(llvm::MCInst &Inst, uint64_t Imm,
- uint64_t Address, const void *Decoder) {
+template <int Bits>
+static DecodeStatus DecodeSImm(MCInst &Inst, uint64_t Imm, uint64_t Address,
+ const void *Decoder) {
if (Imm & ~((1LL << Bits) - 1))
return Fail;
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
index 3f815ac8c3d0..5b6f06f8dbb4 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -34,7 +34,9 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) {
case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
return MCSymbolRefExpr::VK_GOTPAGEOFF;
case LLVMDisassembler_VariantKind_ARM64_TLVP:
+ return MCSymbolRefExpr::VK_TLVPPAGE;
case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
+ return MCSymbolRefExpr::VK_TLVPPAGEOFF;
default:
llvm_unreachable("bad LLVMDisassembler_VariantKind");
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 28b234b180fc..ac08ee8ae8dd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -156,7 +156,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
@@ -181,7 +181,18 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
auto MMO = MF.getMachineMemOperand(
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, LocTy,
inferAlignFromPtrInfo(MF, MPO));
- MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+
+ switch (VA.getLocInfo()) {
+ case CCValAssign::LocInfo::ZExt:
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, ValVReg, Addr, *MMO);
+ return;
+ case CCValAssign::LocInfo::SExt:
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, ValVReg, Addr, *MMO);
+ return;
+ default:
+ MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+ return;
+ }
}
/// How the physical register gets marked varies between formal
@@ -270,7 +281,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -376,11 +387,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CC, SplitEVTs[i]);
if (EVT(NewVT) != SplitEVTs[i]) {
unsigned ExtendOp = TargetOpcode::G_ANYEXT;
- if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::SExt))
+ if (F.getAttributes().hasRetAttr(Attribute::SExt))
ExtendOp = TargetOpcode::G_SEXT;
- else if (F.getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::ZExt))
+ else if (F.getAttributes().hasRetAttr(Attribute::ZExt))
ExtendOp = TargetOpcode::G_ZEXT;
LLT NewLLT(NewVT);
@@ -522,6 +531,7 @@ bool AArch64CallLowering::lowerFormalArguments(
auto &DL = F.getParent()->getDataLayout();
SmallVector<ArgInfo, 8> SplitArgs;
+ SmallVector<std::pair<Register, Register>> BoolArgs;
unsigned i = 0;
for (auto &Arg : F.args()) {
if (DL.getTypeStoreSize(Arg.getType()).isZero())
@@ -530,6 +540,22 @@ bool AArch64CallLowering::lowerFormalArguments(
ArgInfo OrigArg{VRegs[i], Arg, i};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
+ // i1 arguments are zero-extended to i8 by the caller. Emit a
+ // hint to reflect this.
+ if (OrigArg.Ty->isIntegerTy(1)) {
+ assert(OrigArg.Regs.size() == 1 &&
+ MRI.getType(OrigArg.Regs[0]).getSizeInBits() == 1 &&
+ "Unexpected registers used for i1 arg");
+
+ if (!OrigArg.Flags[0].isZExt()) {
+ // Lower i1 argument as i8, and insert AssertZExt + Trunc later.
+ Register OrigReg = OrigArg.Regs[0];
+ Register WideReg = MRI.createGenericVirtualRegister(LLT::scalar(8));
+ OrigArg.Regs[0] = WideReg;
+ BoolArgs.push_back({OrigReg, WideReg});
+ }
+ }
+
if (Arg.hasAttribute(Attribute::SwiftAsync))
MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
@@ -550,6 +576,18 @@ bool AArch64CallLowering::lowerFormalArguments(
F.getCallingConv(), F.isVarArg()))
return false;
+ if (!BoolArgs.empty()) {
+ for (auto &KV : BoolArgs) {
+ Register OrigReg = KV.first;
+ Register WideReg = KV.second;
+ LLT WideTy = MRI.getType(WideReg);
+ assert(MRI.getType(OrigReg).getScalarSizeInBits() == 1 &&
+ "Unexpected bit size of a bool arg");
+ MIRBuilder.buildTrunc(
+ OrigReg, MIRBuilder.buildAssertZExt(WideTy, WideReg, 1).getReg(0));
+ }
+ }
+
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
uint64_t StackOffset = Assigner.StackOffset;
if (F.isVarArg()) {
@@ -1042,8 +1080,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
for (auto &OrigArg : Info.OrigArgs) {
splitToValueTypes(OrigArg, OutArgs, DL, Info.CallConv);
// AAPCS requires that we zero-extend i1 to 8 bits by the caller.
- if (OrigArg.Ty->isIntegerTy(1))
- OutArgs.back().Flags[0].setZExt();
+ if (OrigArg.Ty->isIntegerTy(1)) {
+ ArgInfo &OutArg = OutArgs.back();
+ assert(OutArg.Regs.size() == 1 &&
+ MRI.getType(OutArg.Regs[0]).getSizeInBits() == 1 &&
+ "Unexpected registers used for i1 arg");
+
+ // We cannot use a ZExt ArgInfo flag here, because it will
+ // zero-extend the argument to i32 instead of just i8.
+ OutArg.Regs[0] =
+ MIRBuilder.buildZExt(LLT::scalar(8), OutArg.Regs[0]).getReg(0);
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ OutArg.Ty = Type::getInt8Ty(Ctx);
+ }
}
SmallVector<ArgInfo, 8> InArgs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 08d1c987dc3b..38afc5deb42f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -26,7 +26,7 @@ AArch64GISelUtils::getAArch64VectorSplat(const MachineInstr &MI,
return None;
Register Src = MI.getOperand(1).getReg();
if (auto ValAndVReg =
- getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
+ getAnyConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI))
return RegOrConstant(ValAndVReg->Value.getSExtValue());
return RegOrConstant(Src);
}
@@ -56,7 +56,7 @@ bool AArch64GISelUtils::isCMN(const MachineInstr *MaybeSub,
!CmpInst::isEquality(Pred))
return false;
auto MaybeZero =
- getConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MaybeSub->getOperand(1).getReg(), MRI);
return MaybeZero && MaybeZero->Value.getZExtValue() == 0;
}
@@ -68,7 +68,8 @@ bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI,
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
if (!TLI.getLibcallName(RTLIB::BZERO))
return false;
- auto Zero = getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
+ auto Zero =
+ getIConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI);
if (!Zero || Zero->Value.getSExtValue() != 0)
return false;
@@ -78,8 +79,8 @@ bool AArch64GISelUtils::tryEmitBZero(MachineInstr &MI,
if (!MinSize) {
// If the size is known, check it. If it is not known, assume using bzero is
// better.
- if (auto Size =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
+ if (auto Size = getIConstantVRegValWithLookThrough(
+ MI.getOperand(2).getReg(), MRI)) {
if (Size->Value.getSExtValue() <= 256)
return false;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index a98248438e40..e090d87d59a2 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -22,6 +22,7 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -163,6 +164,9 @@ private:
bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
MachineRegisterInfo &MRI);
+ /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
+ /// SUBREG_TO_REG.
+ bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -171,6 +175,14 @@ private:
bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
+
+ /// Helper function to select vector load intrinsics like
+ /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
+ /// \p Opc is the opcode that the selected instruction should use.
+ /// \p NumVecs is the number of vector destinations for the instruction.
+ /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
+ bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
+ MachineInstr &I);
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI);
bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
@@ -181,6 +193,7 @@ private:
bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
+ bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
unsigned emitConstantPoolEntry(const Constant *CPVal,
MachineFunction &MF) const;
@@ -263,13 +276,9 @@ private:
const RegisterBank &DstRB, LLT ScalarTy,
Register VecReg, unsigned LaneIdx,
MachineIRBuilder &MIRBuilder) const;
-
- /// Emit a CSet for an integer compare.
- ///
- /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers.
- MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg = AArch64::WZR) const;
+ MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
+ AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const;
/// Emit a CSet for a FP compare.
///
/// \p Dst is expected to be a 32-bit scalar register.
@@ -367,18 +376,15 @@ private:
return selectAddrModeWRO(Root, Width / 8);
}
- ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
+ ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
+ bool AllowROR = false) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
return selectShiftedRegister(Root);
}
ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
- // TODO: selectShiftedRegister should allow for rotates on logical shifts.
- // For now, make them the same. The only difference between the two is that
- // logical shifts are allowed to fold in rotates. Otherwise, these are
- // functionally the same.
- return selectShiftedRegister(Root);
+ return selectShiftedRegister(Root, true);
}
/// Given an extend instruction, determine the correct shift-extend type for
@@ -496,14 +502,18 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
}
if (RB.getID() == AArch64::FPRRegBankID) {
- if (Ty.getSizeInBits() <= 16)
+ switch (Ty.getSizeInBits()) {
+ case 8:
+ return &AArch64::FPR8RegClass;
+ case 16:
return &AArch64::FPR16RegClass;
- if (Ty.getSizeInBits() == 32)
+ case 32:
return &AArch64::FPR32RegClass;
- if (Ty.getSizeInBits() == 64)
+ case 64:
return &AArch64::FPR64RegClass;
- if (Ty.getSizeInBits() == 128)
+ case 128:
return &AArch64::FPR128RegClass;
+ }
return nullptr;
}
@@ -652,7 +662,7 @@ static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
Immed = Root.getCImm()->getZExtValue();
else if (Root.isReg()) {
auto ValAndVReg =
- getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
+ getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
if (!ValAndVReg)
return None;
Immed = ValAndVReg->Value.getSExtValue();
@@ -810,6 +820,8 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
return isStore ? AArch64::STRSui : AArch64::LDRSui;
case 64:
return isStore ? AArch64::STRDui : AArch64::LDRDui;
+ case 128:
+ return isStore ? AArch64::STRQui : AArch64::LDRQui;
}
break;
}
@@ -1195,8 +1207,8 @@ AArch64InstructionSelector::emitSelect(Register Dst, Register True,
&Optimized]() {
if (Optimized)
return false;
- auto TrueCst = getConstantVRegValWithLookThrough(True, MRI);
- auto FalseCst = getConstantVRegValWithLookThrough(False, MRI);
+ auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
+ auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
if (!TrueCst && !FalseCst)
return false;
@@ -1301,6 +1313,7 @@ static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineRegisterInfo &MRI) {
assert(Reg.isValid() && "Expected valid register!");
+ bool HasZext = false;
while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
unsigned Opc = MI->getOpcode();
@@ -1314,6 +1327,9 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
// on the truncated x is the same as the bit number on x.
if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
Opc == TargetOpcode::G_TRUNC) {
+ if (Opc == TargetOpcode::G_ZEXT)
+ HasZext = true;
+
Register NextReg = MI->getOperand(1).getReg();
// Did we find something worth folding?
if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
@@ -1334,16 +1350,20 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
case TargetOpcode::G_XOR: {
TestReg = MI->getOperand(1).getReg();
Register ConstantReg = MI->getOperand(2).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!VRegAndVal) {
// AND commutes, check the other side for a constant.
// FIXME: Can we canonicalize the constant so that it's always on the
// same side at some point earlier?
std::swap(ConstantReg, TestReg);
- VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
+ }
+ if (VRegAndVal) {
+ if (HasZext)
+ C = VRegAndVal->Value.getZExtValue();
+ else
+ C = VRegAndVal->Value.getSExtValue();
}
- if (VRegAndVal)
- C = VRegAndVal->Value.getSExtValue();
break;
}
case TargetOpcode::G_ASHR:
@@ -1351,7 +1371,7 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
case TargetOpcode::G_SHL: {
TestReg = MI->getOperand(1).getReg();
auto VRegAndVal =
- getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
if (VRegAndVal)
C = VRegAndVal->Value.getSExtValue();
break;
@@ -1479,7 +1499,7 @@ bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
// Check if the AND has a constant on its RHS which we can use as a mask.
// If it's a power of 2, then it's the same as checking a specific bit.
// (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
- auto MaybeBit = getConstantVRegValWithLookThrough(
+ auto MaybeBit = getIConstantVRegValWithLookThrough(
AndInst.getOperand(2).getReg(), *MIB.getMRI());
if (!MaybeBit)
return false;
@@ -1555,7 +1575,7 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
Register RHS = ICmp.getOperand(3).getReg();
// We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
- auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
// When we can emit a TB(N)Z, prefer that.
@@ -1590,7 +1610,7 @@ bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
if (ICmpInst::isEquality(Pred)) {
if (!VRegAndVal) {
std::swap(RHS, LHS);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
}
@@ -2049,7 +2069,7 @@ bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
// selector which will match the register variant.
assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
const auto &MO = I.getOperand(2);
- auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
+ auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
if (!VRegAndVal)
return false;
@@ -2131,7 +2151,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// Before selecting a DUP instruction, check if it is better selected as a
// MOV or load from a constant pool.
Register Src = I.getOperand(1).getReg();
- auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
if (!ValAndVReg)
return false;
LLVMContext &Ctx = MF.getFunction().getContext();
@@ -2145,17 +2165,14 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
I.eraseFromParent();
return true;
}
- case TargetOpcode::G_BR: {
- // If the branch jumps to the fallthrough block, don't bother emitting it.
- // Only do this for -O0 for a good code size improvement, because when
- // optimizations are enabled we want to leave this choice to
- // MachineBlockPlacement.
- bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None;
- if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB()))
- return false;
- I.eraseFromParent();
- return true;
- }
+ case TargetOpcode::G_SEXT:
+ // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
+ // over a normal extend.
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+ return false;
+ case TargetOpcode::G_BR:
+ return false;
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
case TargetOpcode::G_CONSTANT: {
@@ -2192,27 +2209,55 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
// fold the add into the cset for the cmp by using cinc.
//
// FIXME: This would probably be a lot nicer in PostLegalizerLowering.
- Register X = I.getOperand(1).getReg();
-
- // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out
- // early if we see it.
- LLT Ty = MRI.getType(X);
- if (Ty.isVector() || Ty.getSizeInBits() != 32)
+ Register AddDst = I.getOperand(0).getReg();
+ Register AddLHS = I.getOperand(1).getReg();
+ Register AddRHS = I.getOperand(2).getReg();
+ // Only handle scalars.
+ LLT Ty = MRI.getType(AddLHS);
+ if (Ty.isVector())
return false;
-
- Register CmpReg = I.getOperand(2).getReg();
- MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
+ // bits.
+ unsigned Size = Ty.getSizeInBits();
+ if (Size != 32 && Size != 64)
+ return false;
+ auto MatchCmp = [&](Register Reg) -> MachineInstr * {
+ if (!MRI.hasOneNonDBGUse(Reg))
+ return nullptr;
+ // If the LHS of the add is 32 bits, then we want to fold a 32-bit
+ // compare.
+ if (Size == 32)
+ return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
+ // We model scalar compares using 32-bit destinations right now.
+ // If it's a 64-bit compare, it'll have 64-bit sources.
+ Register ZExt;
+ if (!mi_match(Reg, MRI,
+ m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
+ return nullptr;
+ auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
+ if (!Cmp ||
+ MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
+ return nullptr;
+ return Cmp;
+ };
+ // Try to match
+ // z + (cmp pred, x, y)
+ MachineInstr *Cmp = MatchCmp(AddRHS);
if (!Cmp) {
- std::swap(X, CmpReg);
- Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI);
+ // (cmp pred, x, y) + z
+ std::swap(AddLHS, AddRHS);
+ Cmp = MatchCmp(AddRHS);
if (!Cmp)
return false;
}
- auto Pred =
- static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate());
- emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3),
- Cmp->getOperand(1), MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X);
+ auto &PredOp = Cmp->getOperand(1);
+ auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ MIB.setInstrAndDebugLoc(I);
+ emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
+ /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
+ emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -2352,10 +2397,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
unsigned Size = Ty.getSizeInBits();
unsigned Opc = OpcTable[IsSigned][Size == 64];
auto Cst1 =
- getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
assert(Cst1 && "Should have gotten a constant for src 1?");
auto Cst2 =
- getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
assert(Cst2 && "Should have gotten a constant for src 2?");
auto LSB = Cst1->Value.getZExtValue();
auto Width = Cst2->Value.getZExtValue();
@@ -2456,10 +2501,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// FIXME: Redundant check, but even less readable when factored out.
if (isFP) {
- if (Ty != s32 && Ty != s64 && Ty != s128) {
+ if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
- << " constant, expected: " << s32 << " or " << s64
- << " or " << s128 << '\n');
+ << " constant, expected: " << s16 << " or " << s32
+ << " or " << s64 << " or " << s128 << '\n');
return false;
}
@@ -2493,23 +2538,20 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
}
- // We allow G_CONSTANT of types < 32b.
- const unsigned MovOpc =
- DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
-
if (isFP) {
- // Either emit a FMOV, or emit a copy to emit a normal mov.
- const TargetRegisterClass &GPRRC =
- DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
- const TargetRegisterClass &FPRRC =
- DefSize == 32 ? AArch64::FPR32RegClass
- : (DefSize == 64 ? AArch64::FPR64RegClass
- : AArch64::FPR128RegClass);
-
- // For 64b values, emit a constant pool load instead.
- // For s32, use a cp load if we have optsize/minsize.
- if (DefSize == 64 || DefSize == 128 ||
- (DefSize == 32 && shouldOptForSize(&MF))) {
+ const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
+ // For 16, 64, and 128b values, emit a constant pool load.
+ switch (DefSize) {
+ default:
+ llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
+ case 32:
+ // For s32, use a cp load if we have optsize/minsize.
+ if (!shouldOptForSize(&MF))
+ break;
+ LLVM_FALLTHROUGH;
+ case 16:
+ case 64:
+ case 128: {
auto *FPImm = I.getOperand(1).getFPImm();
auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
if (!LoadMI) {
@@ -2520,9 +2562,13 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
}
+ }
- // Nope. Emit a copy and use a normal mov instead.
- const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
+ // Either emit a FMOV, or emit a copy to emit a normal mov.
+ assert(DefSize == 32 &&
+ "Expected constant pool loads for all sizes other than 32!");
+ const Register DefGPRReg =
+ MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineOperand &RegOp = I.getOperand(0);
RegOp.setReg(DefGPRReg);
MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
@@ -2545,6 +2591,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.getOperand(1).ChangeToImmediate(Val);
}
+ const unsigned MovOpc =
+ DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
I.setDesc(TII.get(MovOpc));
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
return true;
@@ -2693,8 +2741,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXTLOAD:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE: {
+ GLoadStore &LdSt = cast<GLoadStore>(I);
bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
- LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
+ LLT PtrTy = MRI.getType(LdSt.getPointerReg());
if (PtrTy != LLT::pointer(0, 64)) {
LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
@@ -2702,26 +2751,33 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return false;
}
- auto &MemOp = **I.memoperands_begin();
- uint64_t MemSizeInBytes = MemOp.getSize();
- unsigned MemSizeInBits = MemSizeInBytes * 8;
- AtomicOrdering Order = MemOp.getSuccessOrdering();
+ uint64_t MemSizeInBytes = LdSt.getMemSize();
+ unsigned MemSizeInBits = LdSt.getMemSizeInBits();
+ AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
// Need special instructions for atomics that affect ordering.
if (Order != AtomicOrdering::NotAtomic &&
Order != AtomicOrdering::Unordered &&
Order != AtomicOrdering::Monotonic) {
- assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD);
+ assert(!isa<GZExtLoad>(LdSt));
if (MemSizeInBytes > 64)
return false;
- if (I.getOpcode() == TargetOpcode::G_LOAD) {
+ if (isa<GLoad>(LdSt)) {
static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
AArch64::LDARW, AArch64::LDARX};
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
+ Register ValReg = LdSt.getReg(0);
+ if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
+ // Emit a subreg copy of 32 bits.
+ Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
+ .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
+ I.getOperand(0).setReg(NewVal);
+ }
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
@@ -2729,22 +2785,64 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
#ifndef NDEBUG
- const Register PtrReg = I.getOperand(1).getReg();
+ const Register PtrReg = LdSt.getPointerReg();
const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
- // Sanity-check the pointer register.
+ // Check that the pointer register is valid.
assert(PtrRB.getID() == AArch64::GPRRegBankID &&
"Load/Store pointer operand isn't a GPR");
assert(MRI.getType(PtrReg).isPointer() &&
"Load/Store pointer operand isn't a pointer");
#endif
- const Register ValReg = I.getOperand(0).getReg();
+ const Register ValReg = LdSt.getReg(0);
+ const LLT ValTy = MRI.getType(ValReg);
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
+ // The code below doesn't support truncating stores, so we need to split it
+ // again.
+ if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+ unsigned SubReg;
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+
+ // Generate a subreg copy.
+ auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
+ .addReg(ValReg, 0, SubReg)
+ .getReg(0);
+ RBI.constrainGenericRegister(Copy, *RC, MRI);
+ LdSt.getOperand(0).setReg(Copy);
+ } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
+ // If this is an any-extending load from the FPR bank, split it into a regular
+ // load + extend.
+ if (RB.getID() == AArch64::FPRRegBankID) {
+ unsigned SubReg;
+ LLT MemTy = LdSt.getMMO().getMemoryType();
+ auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
+ if (!getSubRegForClass(RC, TRI, SubReg))
+ return false;
+ Register OldDst = LdSt.getReg(0);
+ Register NewDst =
+ MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
+ LdSt.getOperand(0).setReg(NewDst);
+ MRI.setRegBank(NewDst, RB);
+ // Generate a SUBREG_TO_REG to extend it.
+ MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
+ .addImm(0)
+ .addUse(NewDst)
+ .addImm(SubReg);
+ auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
+ RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
+ MIB.setInstr(LdSt);
+ }
+ }
+
// Helper lambda for partially selecting I. Either returns the original
// instruction with an updated opcode, or a new instruction.
auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
- bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ bool IsStore = isa<GStore>(I);
const unsigned NewOpc =
selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
if (NewOpc == I.getOpcode())
@@ -2761,7 +2859,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// Folded something. Create a new instruction and return it.
auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
- IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ Register CurValReg = I.getOperand(0).getReg();
+ IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
NewInst.cloneMemRefs(I);
for (auto &Fn : *AddrModeFns)
Fn(NewInst);
@@ -2775,9 +2874,8 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// If we're storing a 0, use WZR/XZR.
if (Opcode == TargetOpcode::G_STORE) {
- auto CVal = getConstantVRegValWithLookThrough(
- LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
- /*HandleFConstants = */ false);
+ auto CVal = getIConstantVRegValWithLookThrough(
+ LoadStore->getOperand(0).getReg(), MRI);
if (CVal && CVal->Value == 0) {
switch (LoadStore->getOpcode()) {
case AArch64::STRWui:
@@ -2897,17 +2995,15 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
// false, so to get the increment when it's true, we need to use the
// inverse. In this case, we want to increment when carry is set.
Register ZReg = AArch64::WZR;
- auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
- {ZReg, ZReg})
- .addImm(getInvertedCondCode(OpAndCC.second));
- constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
+ emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
+ getInvertedCondCode(OpAndCC.second), MIB);
I.eraseFromParent();
return true;
}
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
- Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI);
+ Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
// TODO: Implement arbitrary cases
if (!MaskVal || !isShiftedMask_64(*MaskVal))
return false;
@@ -2991,7 +3087,7 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
if (Opcode == TargetOpcode::G_PTRTOINT) {
assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
I.setDesc(TII.get(TargetOpcode::COPY));
- return true;
+ return selectCopy(I, TII, MRI, TRI, RBI);
}
}
@@ -2999,6 +3095,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
case TargetOpcode::G_ANYEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
const Register DstReg = I.getOperand(0).getReg();
const Register SrcReg = I.getOperand(1).getReg();
@@ -3045,6 +3144,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_SEXT_INREG:
case TargetOpcode::G_SEXT: {
+ if (selectUSMovFromExtend(I, MRI))
+ return true;
+
unsigned Opcode = I.getOpcode();
const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
const Register DefReg = I.getOperand(0).getReg();
@@ -3231,9 +3333,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
- emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1),
- MIB);
- emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB);
+ const AArch64CC::CondCode InvCC =
+ changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
+ emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
+ emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
+ /*Src2=*/AArch64::WZR, InvCC, MIB);
I.eraseFromParent();
return true;
}
@@ -3839,6 +3943,10 @@ static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
// Choose a lane copy opcode and subregister based off of the size of the
// vector's elements.
switch (EltSize) {
+ case 8:
+ CopyOpc = AArch64::CPYi8;
+ ExtractSubReg = AArch64::bsub;
+ break;
case 16:
CopyOpc = AArch64::CPYi16;
ExtractSubReg = AArch64::hsub;
@@ -3942,7 +4050,7 @@ bool AArch64InstructionSelector::selectExtractElt(
}
// Find the index to extract from.
- auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
@@ -4164,6 +4272,13 @@ MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
.addConstantPoolIndex(CPIdx, 0,
AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
break;
+ case 2:
+ LoadMI =
+ &*MIRBuilder
+ .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
+ .addConstantPoolIndex(CPIdx, 0,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType());
@@ -4326,7 +4441,7 @@ AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
{AArch64::ANDSXrr, AArch64::ANDSWrr}};
// ANDS needs a logical immediate for its immediate form. Check if we can
// fold one in.
- if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
+ if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
int64_t Imm = ValAndVReg->Value.getSExtValue();
if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
@@ -4368,25 +4483,19 @@ MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
"Expected a 32-bit scalar register?");
#endif
- const Register ZeroReg = AArch64::WZR;
- auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) {
- auto CSet =
- MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg})
- .addImm(getInvertedCondCode(CC));
- constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI);
- return &*CSet;
- };
-
+ const Register ZReg = AArch64::WZR;
AArch64CC::CondCode CC1, CC2;
changeFCMPPredToAArch64CC(Pred, CC1, CC2);
+ auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
if (CC2 == AArch64CC::AL)
- return EmitCSet(Dst, CC1);
-
+ return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
+ MIRBuilder);
const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
Register Def1Reg = MRI.createVirtualRegister(RC);
Register Def2Reg = MRI.createVirtualRegister(RC);
- EmitCSet(Def1Reg, CC1);
- EmitCSet(Def2Reg, CC2);
+ auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
+ emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
+ emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
return &*OrMI;
@@ -4495,16 +4604,25 @@ MachineInstr *AArch64InstructionSelector::emitVectorConcat(
}
MachineInstr *
-AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
- MachineIRBuilder &MIRBuilder,
- Register SrcReg) const {
- // CSINC increments the result when the predicate is false. Invert it.
- const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
- CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
- auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg})
- .addImm(InvCC);
- constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
- return &*I;
+AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
+ Register Src2, AArch64CC::CondCode Pred,
+ MachineIRBuilder &MIRBuilder) const {
+ auto &MRI = *MIRBuilder.getMRI();
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
+ // If we used a register class, then this won't necessarily have an LLT.
+ // Compute the size based off whether or not we have a class or bank.
+ unsigned Size;
+ if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
+ Size = TRI.getRegSizeInBits(*RC);
+ else
+ Size = MRI.getType(Dst).getSizeInBits();
+ // Some opcodes use s1.
+ assert(Size <= 64 && "Expected 64 bits or less only!");
+ static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
+ unsigned Opc = OpcTable[Size == 64];
+ auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
+ constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
+ return &*CSINC;
}
std::pair<MachineInstr *, AArch64CC::CondCode>
@@ -4671,7 +4789,7 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
if (!CmpInst::isUnsigned(P) && LHSDef &&
LHSDef->getOpcode() == TargetOpcode::G_AND) {
// Make sure that the RHS is 0.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!ValAndVReg || ValAndVReg->Value != 0)
return nullptr;
@@ -4792,6 +4910,71 @@ MachineInstr *AArch64InstructionSelector::emitLaneInsert(
return InsElt;
}
+bool AArch64InstructionSelector::selectUSMovFromExtend(
+ MachineInstr &MI, MachineRegisterInfo &MRI) {
+ if (MI.getOpcode() != TargetOpcode::G_SEXT &&
+ MI.getOpcode() != TargetOpcode::G_ZEXT &&
+ MI.getOpcode() != TargetOpcode::G_ANYEXT)
+ return false;
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
+ const Register DefReg = MI.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DefReg);
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ if (DstSize != 32 && DstSize != 64)
+ return false;
+
+ MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
+ MI.getOperand(1).getReg(), MRI);
+ int64_t Lane;
+ if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
+ return false;
+ Register Src0 = Extract->getOperand(1).getReg();
+
+ const LLT &VecTy = MRI.getType(Src0);
+
+ if (VecTy.getSizeInBits() != 128) {
+ const MachineInstr *ScalarToVector = emitScalarToVector(
+ VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
+ assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
+ Src0 = ScalarToVector->getOperand(0).getReg();
+ }
+
+ unsigned Opcode;
+ if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
+ Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
+ else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
+ Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
+ else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
+ Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
+ else
+ llvm_unreachable("Unexpected type combo for S/UMov!");
+
+ // We may need to generate one of these, depending on the type and sign of the
+ // input:
+ // DstReg = SMOV Src0, Lane;
+ // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
+ MachineInstr *ExtI = nullptr;
+ if (DstSize == 64 && !IsSigned) {
+ Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
+ ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
+ .addImm(0)
+ .addUse(NewReg)
+ .addImm(AArch64::sub_32);
+ RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
+ } else
+ ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
+
+ constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+ MI.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
@@ -4811,7 +4994,7 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
// Find the definition of the index. Bail out if it's not defined by a
// G_CONSTANT.
Register IdxReg = I.getOperand(3).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
if (!VRegAndVal)
return false;
unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
@@ -4936,6 +5119,47 @@ bool AArch64InstructionSelector::tryOptConstantBuildVec(
return true;
}
+bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
+ MachineInstr &I, MachineRegisterInfo &MRI) {
+ // Given:
+ // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
+ //
+ // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
+ Register Dst = I.getOperand(0).getReg();
+ Register EltReg = I.getOperand(1).getReg();
+ LLT EltTy = MRI.getType(EltReg);
+ // If the index isn't on the same bank as its elements, then this can't be a
+ // SUBREG_TO_REG.
+ const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
+ const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
+ if (EltRB != DstRB)
+ return false;
+ if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
+ [&MRI](const MachineOperand &Op) {
+ return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
+ MRI);
+ }))
+ return false;
+ unsigned SubReg;
+ const TargetRegisterClass *EltRC =
+ getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
+ if (!EltRC)
+ return false;
+ const TargetRegisterClass *DstRC =
+ getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
+ if (!DstRC)
+ return false;
+ if (!getSubRegForClass(EltRC, TRI, SubReg))
+ return false;
+ auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
+ .addImm(0)
+ .addUse(EltReg)
+ .addImm(SubReg);
+ I.eraseFromParent();
+ constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
+ return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
+}
+
bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
MachineRegisterInfo &MRI) {
assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
@@ -4947,6 +5171,9 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
if (tryOptConstantBuildVec(I, DstTy, MRI))
return true;
+ if (tryOptBuildVecToSubregToReg(I, MRI))
+ return true;
+
if (EltSize < 16 || EltSize > 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
@@ -5013,24 +5240,45 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
return true;
}
-/// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
-/// ID if it exists, and 0 otherwise.
-static unsigned findIntrinsicID(MachineInstr &I) {
- auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
- return Op.isIntrinsicID();
- });
- if (IntrinOp == I.operands_end())
- return 0;
- return IntrinOp->getIntrinsicID();
+bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
+ unsigned NumVecs,
+ MachineInstr &I) {
+ assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ assert(Opc && "Expected an opcode?");
+ assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
+ auto &MRI = *MIB.getMRI();
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ assert((Size == 64 || Size == 128) &&
+ "Destination must be 64 bits or 128 bits?");
+ unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
+ auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
+ assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
+ auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
+ Load.cloneMemRefs(I);
+ constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
+ Register SelectedLoadDst = Load->getOperand(0).getReg();
+ for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
+ auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
+ .addReg(SelectedLoadDst, 0, SubReg + Idx);
+ // Emit the subreg copies and immediately select them.
+ // FIXME: We should refactor our copy code into an emitCopy helper and
+ // clean up uses of this pattern elsewhere in the selector.
+ selectCopy(*Vec, TII, MRI, TRI, RBI);
+ }
+ return true;
}
bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MachineInstr &I, MachineRegisterInfo &MRI) {
// Find the intrinsic ID.
- unsigned IntrinID = findIntrinsicID(I);
- if (!IntrinID)
- return false;
+ unsigned IntrinID = I.getIntrinsicID();
+ const LLT S8 = LLT::scalar(8);
+ const LLT S16 = LLT::scalar(16);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S64 = LLT::scalar(64);
+ const LLT P0 = LLT::pointer(0, 64);
// Select the instruction.
switch (IntrinID) {
default:
@@ -5055,16 +5303,59 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
MIB.buildInstr(AArch64::BRK, {}, {})
.addImm(I.getOperand(1).getImm() | ('U' << 8));
break;
+ case Intrinsic::aarch64_neon_ld2: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD2Twov8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD2Twov16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD2Twov4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD2Twov8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD2Twov2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD2Twov4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD2Twov2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Twov1d;
+ else
+ llvm_unreachable("Unexpected type for ld2!");
+ selectVectorLoadIntrinsic(Opc, 2, I);
+ break;
+ }
+ case Intrinsic::aarch64_neon_ld4: {
+ LLT Ty = MRI.getType(I.getOperand(0).getReg());
+ unsigned Opc = 0;
+ if (Ty == LLT::fixed_vector(8, S8))
+ Opc = AArch64::LD4Fourv8b;
+ else if (Ty == LLT::fixed_vector(16, S8))
+ Opc = AArch64::LD4Fourv16b;
+ else if (Ty == LLT::fixed_vector(4, S16))
+ Opc = AArch64::LD4Fourv4h;
+ else if (Ty == LLT::fixed_vector(8, S16))
+ Opc = AArch64::LD4Fourv8h;
+ else if (Ty == LLT::fixed_vector(2, S32))
+ Opc = AArch64::LD4Fourv2s;
+ else if (Ty == LLT::fixed_vector(4, S32))
+ Opc = AArch64::LD4Fourv4s;
+ else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
+ Opc = AArch64::LD4Fourv2d;
+ else if (Ty == S64 || Ty == P0)
+ Opc = AArch64::LD1Fourv1d;
+ else
+ llvm_unreachable("Unexpected type for ld4!");
+ selectVectorLoadIntrinsic(Opc, 4, I);
+ break;
+ }
case Intrinsic::aarch64_neon_st2: {
Register Src1 = I.getOperand(1).getReg();
Register Src2 = I.getOperand(2).getReg();
Register Ptr = I.getOperand(3).getReg();
LLT Ty = MRI.getType(Src1);
- const LLT S8 = LLT::scalar(8);
- const LLT S16 = LLT::scalar(16);
- const LLT S32 = LLT::scalar(32);
- const LLT S64 = LLT::scalar(64);
- const LLT P0 = LLT::pointer(0, 64);
unsigned Opc;
if (Ty == LLT::fixed_vector(8, S8))
Opc = AArch64::ST2Twov8b;
@@ -5100,9 +5391,7 @@ bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
MachineRegisterInfo &MRI) {
- unsigned IntrinID = findIntrinsicID(I);
- if (!IntrinID)
- return false;
+ unsigned IntrinID = I.getIntrinsicID();
switch (IntrinID) {
default:
@@ -5146,6 +5435,33 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case Intrinsic::ptrauth_sign: {
+ Register DstReg = I.getOperand(0).getReg();
+ Register ValReg = I.getOperand(2).getReg();
+ uint64_t Key = I.getOperand(3).getImm();
+ Register DiscReg = I.getOperand(4).getReg();
+ auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
+ bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
+
+ if (Key > 3)
+ return false;
+
+ unsigned Opcodes[][4] = {
+ {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
+ {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
+ unsigned Opcode = Opcodes[IsDiscZero][Key];
+
+ auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
+
+ if (!IsDiscZero) {
+ PAC.addUse(DiscReg);
+ RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
+ }
+
+ RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
+ I.eraseFromParent();
+ return true;
+ }
case Intrinsic::frameaddress:
case Intrinsic::returnaddress: {
MachineFunction &MF = *I.getParent()->getParent();
@@ -5403,7 +5719,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// constant is the RHS.
Register OffsetReg = OffsetInst->getOperand(1).getReg();
Register ConstantReg = OffsetInst->getOperand(2).getReg();
- auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg) {
// We didn't get a constant on the RHS. If the opcode is a shift, then
// we're done.
@@ -5412,7 +5728,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// If we have a G_MUL, we can use either register. Try looking at the RHS.
std::swap(OffsetReg, ConstantReg);
- ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
+ ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
if (!ValAndVReg)
return None;
}
@@ -5580,7 +5896,7 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
// mov x0, wide
// ldr x2, [base, x0]
auto ValAndVReg =
- getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
if (ValAndVReg) {
unsigned Scale = Log2_32(SizeInBytes);
int64_t ImmOff = ValAndVReg->Value.getSExtValue();
@@ -5839,7 +6155,6 @@ AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
/// Given a shift instruction, return the correct shift type for that
/// instruction.
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
- // TODO: Handle AArch64_AM::ROR
switch (MI.getOpcode()) {
default:
return AArch64_AM::InvalidShiftExtend;
@@ -5849,15 +6164,16 @@ static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
return AArch64_AM::LSR;
case TargetOpcode::G_ASHR:
return AArch64_AM::ASR;
+ case TargetOpcode::G_ROTR:
+ return AArch64_AM::ROR;
}
}
/// Select a "shifted register" operand. If the value is not shifted, set the
/// shift operand to a default value of "lsl 0".
-///
-/// TODO: Allow shifted register to be rotated in logical instructions.
InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
+AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
+ bool AllowROR) const {
if (!Root.isReg())
return None;
MachineRegisterInfo &MRI =
@@ -5865,14 +6181,14 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
// Check if the operand is defined by an instruction which corresponds to
// a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
- //
- // TODO: Handle AArch64_AM::ROR for logical instructions.
MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
if (!ShiftInst)
return None;
AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
if (ShType == AArch64_AM::InvalidShiftExtend)
return None;
+ if (ShType == AArch64_AM::ROR && !AllowROR)
+ return None;
if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
return None;
@@ -6045,7 +6361,7 @@ void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
Optional<int64_t> CstVal =
- getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
+ getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
assert(CstVal && "Expected constant value");
MIB.addImm(CstVal.getValue());
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 08e4a119127c..1524aa5eb0ec 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -16,6 +16,7 @@
#include "AArch64Subtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -23,6 +24,7 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/MathExtras.h"
@@ -34,6 +36,7 @@ using namespace llvm;
using namespace LegalizeActions;
using namespace LegalizeMutations;
using namespace LegalityPredicates;
+using namespace MIPatternMatch;
AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
: ST(&ST) {
@@ -45,7 +48,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
const LLT s128 = LLT::scalar(128);
- const LLT s256 = LLT::scalar(256);
const LLT v16s8 = LLT::fixed_vector(16, 8);
const LLT v8s8 = LLT::fixed_vector(8, 8);
const LLT v4s8 = LLT::fixed_vector(4, 8);
@@ -80,8 +82,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
.legalFor({p0, s1, s8, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
+ .widenScalarToNextPow2(0)
.clampScalar(0, s8, s64)
- .widenScalarToNextPow2(0, 8)
.fewerElementsIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].isVector() &&
@@ -95,16 +97,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
return std::make_pair(0, EltTy);
});
- getActionDefinitionsBuilder(G_PHI).legalFor({p0, s16, s32, s64})
+ getActionDefinitionsBuilder(G_PHI)
+ .legalFor({p0, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
+ .widenScalarToNextPow2(0)
.clampScalar(0, s16, s64)
- .widenScalarToNextPow2(0);
+ // Maximum: sN * k = 128
+ .clampMaxNumElements(0, s8, 16)
+ .clampMaxNumElements(0, s16, 8)
+ .clampMaxNumElements(0, s32, 4)
+ .clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, p0, 2);
getActionDefinitionsBuilder(G_BSWAP)
.legalFor({s32, s64, v4s32, v2s32, v2s64})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
- .customIf(typeIs(0, v2s16)); // custom lower as G_REV32 + G_LSHR
+ .clampScalar(0, s32, s64);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
@@ -114,8 +122,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
},
0)
.legalFor({v2s64})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);
@@ -161,11 +169,22 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.scalarize(0);
getActionDefinitionsBuilder({G_SREM, G_UREM, G_SDIVREM, G_UDIVREM})
- .lowerFor({s1, s8, s16, s32, s64});
+ .lowerFor({s1, s8, s16, s32, s64, v2s64, v4s32, v2s32})
+ .widenScalarOrEltToNextPow2(0)
+ .clampScalarOrElt(0, s32, s64)
+ .clampNumElements(0, v2s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsToNextPow2(0);
+
- getActionDefinitionsBuilder({G_SMULO, G_UMULO}).lowerFor({{s64, s1}});
+ getActionDefinitionsBuilder({G_SMULO, G_UMULO})
+ .widenScalarToNextPow2(0, /*Min = */ 32)
+ .clampScalar(0, s32, s64)
+ .lowerIf(typeIs(1, s1));
- getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64});
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH})
+ .legalFor({s64, v8s16, v16s8, v4s32})
+ .lower();
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
.legalFor({v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
@@ -184,7 +203,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FNEG})
- .legalFor({s32, s64, v2s64, v4s32, v2s32})
+ .legalFor({MinFPScalar, s32, s64, v2s64, v4s32, v2s32})
+ .clampScalar(0, MinFPScalar, s64)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64);
@@ -220,42 +240,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor({s32, s64, v2s32, v4s32, v2s64});
getActionDefinitionsBuilder(G_INSERT)
- .unsupportedIf([=](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() <= Query.Types[1].getSizeInBits();
- })
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &Ty0 = Query.Types[0];
- const LLT &Ty1 = Query.Types[1];
- if (Ty0 != s32 && Ty0 != s64 && Ty0 != p0)
- return false;
- return isPowerOf2_32(Ty1.getSizeInBits()) &&
- (Ty1.getSizeInBits() == 1 || Ty1.getSizeInBits() >= 8);
- })
- .clampScalar(0, s32, s64)
+ .legalIf(all(typeInSet(0, {s32, s64, p0}),
+ typeInSet(1, {s1, s8, s16, s32}), smallerThan(1, 0)))
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(1)
+ .minScalar(1, s8)
.maxScalarIf(typeInSet(0, {s32}), 1, s16)
- .maxScalarIf(typeInSet(0, {s64}), 1, s32)
- .widenScalarToNextPow2(1);
+ .maxScalarIf(typeInSet(0, {s64, p0}), 1, s32);
getActionDefinitionsBuilder(G_EXTRACT)
- .unsupportedIf([=](const LegalityQuery &Query) {
- return Query.Types[0].getSizeInBits() >= Query.Types[1].getSizeInBits();
- })
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &Ty0 = Query.Types[0];
- const LLT &Ty1 = Query.Types[1];
- if (Ty1 != s32 && Ty1 != s64 && Ty1 != s128)
- return false;
- if (Ty1 == p0)
- return true;
- return isPowerOf2_32(Ty0.getSizeInBits()) &&
- (Ty0.getSizeInBits() == 1 || Ty0.getSizeInBits() >= 8);
- })
- .clampScalar(1, s32, s128)
+ .legalIf(all(typeInSet(0, {s16, s32, s64, p0}),
+ typeInSet(1, {s32, s64, s128, p0}), smallerThan(0, 1)))
.widenScalarToNextPow2(1)
+ .clampScalar(1, s32, s128)
+ .widenScalarToNextPow2(0)
+ .minScalar(0, s16)
.maxScalarIf(typeInSet(1, {s32}), 0, s16)
- .maxScalarIf(typeInSet(1, {s64}), 0, s32)
- .widenScalarToNextPow2(0);
+ .maxScalarIf(typeInSet(1, {s64, p0}), 0, s32)
+ .maxScalarIf(typeInSet(1, {s128}), 0, s64);
getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
.lowerIf(atomicOrderingAtLeastOrStrongerThan(0, AtomicOrdering::Unordered))
@@ -268,8 +271,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{s64, p0, s64, 8},
{p0, p0, s64, 8},
{v2s32, p0, s64, 8}})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
// TODO: We could support sum-of-pow2's but the lowering code doesn't know
// how to do that yet.
.unsupportedIfMemSizeNotPow2()
@@ -285,6 +288,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
};
getActionDefinitionsBuilder(G_LOAD)
+ .customIf([=](const LegalityQuery &Query) {
+ return Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
+ })
.legalForTypesWithMemDesc({{s8, p0, s8, 8},
{s16, p0, s16, 8},
{s32, p0, s32, 8},
@@ -300,9 +307,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v2s64, p0, s128, 8}})
// These extends are also legal
.legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
- .clampScalar(0, s8, s64)
+ .widenScalarToNextPow2(0, /* MinSize = */8)
.lowerIfMemSizeNotPow2()
- .widenScalarToNextPow2(0)
+ .clampScalar(0, s8, s64)
.narrowScalarIf([=](const LegalityQuery &Query) {
// Clamp extending load results to 32-bits.
return Query.Types[0].isScalar() &&
@@ -318,10 +325,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
.clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, p0, 2)
.customIf(IsPtrVecPred)
.scalarizeIf(typeIs(0, v2s16), 0);
getActionDefinitionsBuilder(G_STORE)
+ .customIf([=](const LegalityQuery &Query) {
+ return Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
+ })
.legalForTypesWithMemDesc({{s8, p0, s8, 8},
{s16, p0, s8, 8}, // truncstorei8 from s16
{s32, p0, s8, 8}, // truncstorei8 from s32
@@ -353,6 +365,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
.clampMaxNumElements(0, s64, 2)
+ .clampMaxNumElements(0, p0, 2)
.lowerIfMemSizeNotPow2()
.customIf(IsPtrVecPred)
.scalarizeIf(typeIs(0, v2s16), 0);
@@ -360,8 +373,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Constants
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({p0, s8, s16, s32, s64})
- .clampScalar(0, s8, s64)
- .widenScalarToNextPow2(0);
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s8, s64);
getActionDefinitionsBuilder(G_FCONSTANT)
.legalIf([=](const LegalityQuery &Query) {
const auto &Ty = Query.Types[0];
@@ -383,6 +396,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v8s16, v8s16},
{v8s8, v8s8},
{v16s8, v16s8}})
+ .widenScalarOrEltToNextPow2(1)
.clampScalar(1, s32, s64)
.clampScalar(0, s32, s32)
.minScalarEltSameAsIf(
@@ -399,7 +413,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) { return Query.Types[1] == v2p0; }, 0,
s64)
- .widenScalarOrEltToNextPow2(1)
.clampNumElements(0, v2s32, v4s32);
// Extensions
@@ -459,10 +472,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
// Conversions
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
- .clampScalar(1, s32, s64)
- .widenScalarToNextPow2(1);
+ .clampScalar(0, s32, s64)
+ .widenScalarToNextPow2(1)
+ .clampScalar(1, s32, s64);
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalForCartesianProduct({s32, s64, v2s64, v4s32, v2s32})
@@ -477,8 +490,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_SELECT)
.legalFor({{s32, s1}, {s64, s1}, {p0, s1}})
- .clampScalar(0, s32, s64)
.widenScalarToNextPow2(0)
+ .clampScalar(0, s32, s64)
.minScalarEltSameAsIf(all(isVector(0), isVector(1)), 1, 0)
.lowerIf(isVector(0));
@@ -492,6 +505,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_PTRTOINT)
.legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
+ .legalFor({{v2s64, v2p0}})
.maxScalar(0, s64)
.widenScalarToNextPow2(0, /*Min*/ 8);
@@ -544,76 +558,30 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
-
- auto notValidElt = [](const LegalityQuery &Query, unsigned TypeIdx) {
- const LLT &Ty = Query.Types[TypeIdx];
- if (Ty.isVector()) {
- const LLT &EltTy = Ty.getElementType();
- if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
- return true;
- if (!isPowerOf2_32(EltTy.getSizeInBits()))
- return true;
- }
- return false;
- };
-
- // FIXME: This rule is horrible, but specifies the same as what we had
- // before with the particularly strange definitions removed (e.g.
- // s8 = G_MERGE_VALUES s32, s32).
- // Part of the complexity comes from these ops being extremely flexible. For
- // example, you can build/decompose vectors with it, concatenate vectors,
- // etc. and in addition to this you can also bitcast with it at the same
- // time. We've been considering breaking it up into multiple ops to make it
- // more manageable throughout the backend.
getActionDefinitionsBuilder(Op)
- // Break up vectors with weird elements into scalars
- .fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
- scalarize(0))
- .fewerElementsIf(
- [=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
- scalarize(1))
- // Clamp the big scalar to s8-s128 and make it a power of 2.
- .clampScalar(BigTyIdx, s8, s128)
- .widenScalarIf(
- [=](const LegalityQuery &Query) {
- const LLT &Ty = Query.Types[BigTyIdx];
- return !isPowerOf2_32(Ty.getSizeInBits()) &&
- Ty.getSizeInBits() % 64 != 0;
- },
- [=](const LegalityQuery &Query) {
- // Pick the next power of 2, or a multiple of 64 over 128.
- // Whichever is smaller.
- const LLT &Ty = Query.Types[BigTyIdx];
- unsigned NewSizeInBits = 1
- << Log2_32_Ceil(Ty.getSizeInBits() + 1);
- if (NewSizeInBits >= 256) {
- unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
- if (RoundedTo < NewSizeInBits)
- NewSizeInBits = RoundedTo;
- }
- return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
- })
- // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
- // worth considering the multiples of 64 since 2*192 and 2*384 are not
- // valid.
- .clampScalar(LitTyIdx, s8, s256)
- .widenScalarToNextPow2(LitTyIdx, /*Min*/ 8)
- // So at this point, we have s8, s16, s32, s64, s128, s192, s256, s384,
- // s512, <X x s8>, <X x s16>, <X x s32>, or <X x s64>.
- // At this point it's simple enough to accept the legal types.
- .legalIf([=](const LegalityQuery &Query) {
- const LLT &BigTy = Query.Types[BigTyIdx];
- const LLT &LitTy = Query.Types[LitTyIdx];
- if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
+ .widenScalarToNextPow2(LitTyIdx, 8)
+ .widenScalarToNextPow2(BigTyIdx, 32)
+ .clampScalar(LitTyIdx, s8, s64)
+ .clampScalar(BigTyIdx, s32, s128)
+ .legalIf([=](const LegalityQuery &Q) {
+ switch (Q.Types[BigTyIdx].getSizeInBits()) {
+ case 32:
+ case 64:
+ case 128:
+ break;
+ default:
return false;
- if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
+ }
+ switch (Q.Types[LitTyIdx].getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ return true;
+ default:
return false;
- return BigTy.getSizeInBits() % LitTy.getSizeInBits() == 0;
- })
- // Any vectors left are the wrong size. Scalarize them.
- .scalarize(0)
- .scalarize(1);
+ }
+ });
}
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
@@ -626,7 +594,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT &VecTy = Query.Types[1];
return VecTy == v2s16 || VecTy == v4s16 || VecTy == v8s16 ||
VecTy == v4s32 || VecTy == v2s64 || VecTy == v2s32 ||
- VecTy == v16s8 || VecTy == v2s32 || VecTy == v2p0;
+ VecTy == v8s8 || VecTy == v16s8 || VecTy == v2s32 ||
+ VecTy == v2p0;
})
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
@@ -671,6 +640,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v2s64, s64}})
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
+ .minScalarOrElt(0, s8)
.minScalarSameAs(1, 0);
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
@@ -682,7 +652,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
// TODO: Custom lowering for v2s32, v4s32, v2s64.
- getActionDefinitionsBuilder(G_BITREVERSE).legalFor({s32, s64, v8s8, v16s8});
+ getActionDefinitionsBuilder(G_BITREVERSE)
+ .legalFor({s32, s64, v8s8, v16s8})
+ .widenScalarToNextPow2(0, /*Min = */ 32)
+ .clampScalar(0, s32, s64);
getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
@@ -716,7 +689,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64);
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
- .legalFor({{v4s32, v2s32}, {v8s16, v4s16}});
+ .legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
getActionDefinitionsBuilder(G_JUMP_TABLE).legalFor({{p0}, {s64}});
@@ -748,6 +721,28 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, s32, 4)
.lower();
+ getActionDefinitionsBuilder(
+ {G_VECREDUCE_OR, G_VECREDUCE_AND, G_VECREDUCE_XOR})
+ // Try to break down into smaller vectors as long as they're at least 64
+ // bits. This lets us use vector operations for some parts of the
+ // reduction.
+ .fewerElementsIf(
+ [=](const LegalityQuery &Q) {
+ LLT SrcTy = Q.Types[1];
+ if (SrcTy.isScalar())
+ return false;
+ if (!isPowerOf2_32(SrcTy.getNumElements()))
+ return false;
+ // We can usually perform 64b vector operations.
+ return SrcTy.getSizeInBits() > 64;
+ },
+ [=](const LegalityQuery &Q) {
+ LLT SrcTy = Q.Types[1];
+ return std::make_pair(1, SrcTy.divide(2));
+ })
+ .scalarize(1)
+ .lower();
+
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT})
.lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); });
@@ -764,7 +759,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder({G_SBFX, G_UBFX})
.customFor({{s32, s32}, {s64, s64}});
- // TODO: Custom legalization for s128
// TODO: Use generic lowering when custom lowering is not possible.
auto always = [=](const LegalityQuery &Q) { return true; };
getActionDefinitionsBuilder(G_CTPOP)
@@ -775,12 +769,27 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.maxScalarEltSameAsIf(always, 1, 0)
.customFor({{s32, s32},
{s64, s64},
+ {s128, s128},
{v2s64, v2s64},
{v2s32, v2s32},
{v4s32, v4s32},
{v4s16, v4s16},
{v8s16, v8s16}});
+ // TODO: Vector types.
+ getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0));
+
+ // TODO: Vector types.
+ getActionDefinitionsBuilder({G_FMAXNUM, G_FMINNUM})
+ .legalFor({MinFPScalar, s32, s64})
+ .libcallFor({s128})
+ .minScalar(0, MinFPScalar);
+
+ // TODO: Libcall support for s128.
+ // TODO: s16 should be legal with full FP16 support.
+ getActionDefinitionsBuilder({G_LROUND, G_LLROUND})
+ .legalFor({{s64, s32}, {s64, s64}});
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -799,8 +808,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
- case TargetOpcode::G_BSWAP:
- return legalizeBSwap(MI, MRI, MIRBuilder);
case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
@@ -948,6 +955,37 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue(
bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
+ switch (MI.getIntrinsicID()) {
+ case Intrinsic::vacopy: {
+ unsigned PtrSize = ST->isTargetILP32() ? 4 : 8;
+ unsigned VaListSize =
+ (ST->isTargetDarwin() || ST->isTargetWindows())
+ ? PtrSize
+ : ST->isTargetILP32() ? 20 : 32;
+
+ MachineFunction &MF = *MI.getMF();
+ auto Val = MF.getRegInfo().createGenericVirtualRegister(
+ LLT::scalar(VaListSize * 8));
+ MachineIRBuilder MIB(MI);
+ MIB.buildLoad(Val, MI.getOperand(2),
+ *MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOLoad,
+ VaListSize, Align(PtrSize)));
+ MIB.buildStore(Val, MI.getOperand(1),
+ *MF.getMachineMemOperand(MachinePointerInfo(),
+ MachineMemOperand::MOStore,
+ VaListSize, Align(PtrSize)));
+ MI.eraseFromParent();
+ return true;
+ }
+ case Intrinsic::get_dynamic_area_offset: {
+ MachineIRBuilder &MIB = Helper.MIRBuilder;
+ MIB.buildConstant(MI.getOperand(0).getReg(), 0);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
return true;
}
@@ -960,7 +998,7 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
// If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the
// imported patterns can select it later. Either way, it will be legal.
Register AmtReg = MI.getOperand(2).getReg();
- auto VRegAndVal = getConstantVRegValWithLookThrough(AmtReg, MRI);
+ auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);
if (!VRegAndVal)
return true;
// Check the shift amount is in range for an immediate form.
@@ -974,6 +1012,20 @@ bool AArch64LegalizerInfo::legalizeShlAshrLshr(
return true;
}
+static void matchLDPSTPAddrMode(Register Root, Register &Base, int &Offset,
+ MachineRegisterInfo &MRI) {
+ Base = Root;
+ Offset = 0;
+
+ Register NewBase;
+ int64_t NewOffset;
+ if (mi_match(Root, MRI, m_GPtrAdd(m_Reg(NewBase), m_ICst(NewOffset))) &&
+ isShiftedInt<7, 3>(NewOffset)) {
+ Base = NewBase;
+ Offset = NewOffset;
+ }
+}
+
// FIXME: This should be removed and replaced with the generic bitcast legalize
// action.
bool AArch64LegalizerInfo::legalizeLoadStore(
@@ -993,6 +1045,36 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
Register ValReg = MI.getOperand(0).getReg();
const LLT ValTy = MRI.getType(ValReg);
+ if (ValTy == LLT::scalar(128)) {
+ assert((*MI.memoperands_begin())->getSuccessOrdering() ==
+ AtomicOrdering::Monotonic ||
+ (*MI.memoperands_begin())->getSuccessOrdering() ==
+ AtomicOrdering::Unordered);
+ assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
+ LLT s64 = LLT::scalar(64);
+ MachineInstrBuilder NewI;
+ if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+ NewI = MIRBuilder.buildInstr(AArch64::LDPXi, {s64, s64}, {});
+ MIRBuilder.buildMerge(ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
+ } else {
+ auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
+ NewI = MIRBuilder.buildInstr(
+ AArch64::STPXi, {}, {Split->getOperand(0), Split->getOperand(1)});
+ }
+ Register Base;
+ int Offset;
+ matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
+ NewI.addUse(Base);
+ NewI.addImm(Offset / 8);
+
+ NewI.cloneMemRefs(MI);
+ constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
+ *MRI.getTargetRegisterInfo(),
+ *ST->getRegBankInfo());
+ MI.eraseFromParent();
+ return true;
+ }
+
if (!ValTy.isVector() || !ValTy.getElementType().isPointer() ||
ValTy.getElementType().getAddressSpace() != 0) {
LLVM_DEBUG(dbgs() << "Tried to do custom legalization on wrong load/store");
@@ -1015,46 +1097,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
return true;
}
-bool AArch64LegalizerInfo::legalizeBSwap(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- assert(MI.getOpcode() == TargetOpcode::G_BSWAP);
-
- // The <2 x half> case needs special lowering because there isn't an
- // instruction that does that directly. Instead, we widen to <8 x i8>
- // and emit a G_REV32 followed by a G_LSHR knowing that instruction selection
- // will later match them as:
- //
- // rev32.8b v0, v0
- // ushr.2s v0, v0, #16
- //
- // We could emit those here directly, but it seems better to keep things as
- // generic as possible through legalization, and avoid committing layering
- // violations by legalizing & selecting here at the same time.
-
- Register ValReg = MI.getOperand(1).getReg();
- assert(LLT::fixed_vector(2, 16) == MRI.getType(ValReg));
- const LLT v2s32 = LLT::fixed_vector(2, 32);
- const LLT v8s8 = LLT::fixed_vector(8, 8);
- const LLT s32 = LLT::scalar(32);
-
- auto Undef = MIRBuilder.buildUndef(v8s8);
- auto Insert =
- MIRBuilder
- .buildInstr(TargetOpcode::INSERT_SUBREG, {v8s8}, {Undef, ValReg})
- .addImm(AArch64::ssub);
- auto Rev32 = MIRBuilder.buildInstr(AArch64::G_REV32, {v8s8}, {Insert});
- auto Bitcast = MIRBuilder.buildBitcast(v2s32, Rev32);
- auto Amt = MIRBuilder.buildConstant(v2s32, 16);
- auto UShr =
- MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {v2s32}, {Bitcast, Amt});
- auto Zero = MIRBuilder.buildConstant(s32, 0);
- auto Extract = MIRBuilder.buildExtractVectorElement(s32, UShr, Zero);
- MIRBuilder.buildBitcast({MI.getOperand(0).getReg()}, Extract);
- MI.eraseFromParent();
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
@@ -1107,8 +1149,8 @@ bool AArch64LegalizerInfo::legalizeBitfieldExtract(
MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
// Only legal if we can select immediate forms.
// TODO: Lower this otherwise.
- return getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
- getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+ return getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI) &&
+ getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
}
bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
@@ -1151,8 +1193,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
// v8s16,v4s32,v2s64 -> v16i8
LLT VTy = Size == 128 ? LLT::fixed_vector(16, 8) : LLT::fixed_vector(8, 8);
if (Ty.isScalar()) {
- // TODO: Handle s128.
- assert((Size == 32 || Size == 64) && "Expected only 32 or 64 bit scalars!");
+ assert((Size == 32 || Size == 64 || Size == 128) && "Expected only 32, 64, or 128 bit scalars!");
if (Size == 32) {
Val = MIRBuilder.buildZExt(LLT::scalar(64), Val).getReg(0);
}
@@ -1198,7 +1239,7 @@ bool AArch64LegalizerInfo::legalizeCTPOP(MachineInstr &MI,
}
// Post-conditioning.
- if (Ty.isScalar() && Size == 64)
+ if (Ty.isScalar() && (Size == 64 || Size == 128))
MIRBuilder.buildZExt(Dst, UADD);
else
UADD->getOperand(0).setReg(Dst);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 78fc24559d71..35456d95dc2b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -35,8 +35,6 @@ public:
MachineInstr &MI) const override;
private:
- bool legalizeBSwap(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index b700c3760a58..a9b3792e0118 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -55,7 +55,7 @@ bool matchExtractVecEltPairwiseAdd(
Register Src2 = MI.getOperand(2).getReg();
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- auto Cst = getConstantVRegValWithLookThrough(Src2, MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);
if (!Cst || Cst->Value != 0)
return false;
// SDAG also checks for FullFP16, but this looks to be beneficial anyway.
@@ -129,7 +129,7 @@ bool matchAArch64MulConstCombine(
const LLT Ty = MRI.getType(LHS);
// The below optimizations require a constant RHS.
- auto Const = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto Const = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!Const)
return false;
@@ -262,6 +262,33 @@ void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
Observer.changedInstr(MI);
}
+/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
+/// instruction.
+static bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
+ // If this is coming from a scalar compare then we can use a G_ZEXT instead of
+ // a G_ANYEXT:
+ //
+ // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.
+ // %ext:_(s64) = G_ANYEXT %cmp(s32)
+ //
+ // By doing this, we can leverage more KnownBits combines.
+ assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ return MRI.getType(Dst).isScalar() &&
+ mi_match(Src, MRI,
+ m_any_of(m_GICmp(m_Pred(), m_Reg(), m_Reg()),
+ m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
+}
+
+static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer) {
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
+ Observer.changedInstr(MI);
+}
+
#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
#include "AArch64GenPostLegalizeGICombiner.inc"
#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 84ecb4ba6964..3ff67d188822 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -527,7 +527,7 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
// If the RHS is not a constant, or the RHS is already a valid arithmetic
// immediate, then there is nothing to change.
- auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
if (!ValAndVReg)
return None;
uint64_t C = ValAndVReg->Value.getZExtValue();
@@ -757,7 +757,7 @@ static unsigned getCmpOperandFoldingProfit(Register CmpOp,
if (MI.getOpcode() != TargetOpcode::G_AND)
return false;
auto ValAndVReg =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!ValAndVReg)
return false;
uint64_t Mask = ValAndVReg->Value.getZExtValue();
@@ -774,7 +774,7 @@ static unsigned getCmpOperandFoldingProfit(Register CmpOp,
return 0;
auto MaybeShiftAmt =
- getConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
if (!MaybeShiftAmt)
return 0;
uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
@@ -814,7 +814,7 @@ static bool trySwapICmpOperands(MachineInstr &MI,
// Don't swap if there's a constant on the RHS, because we know we can fold
// that.
Register RHS = MI.getOperand(3).getReg();
- auto RHSCst = getConstantVRegValWithLookThrough(RHS, MRI);
+ auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
return false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 9efbcbb0065b..d3f4130d2ba1 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -146,8 +146,8 @@ static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
return false;
- auto Cst =
- getConstantVRegValWithLookThrough(UseInstr.getOperand(2).getReg(), MRI);
+ auto Cst = getIConstantVRegValWithLookThrough(
+ UseInstr.getOperand(2).getReg(), MRI);
if (!Cst)
return false;
MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
@@ -220,6 +220,121 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
+static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
+ CombinerHelper &Helper,
+ GISelChangeObserver &Observer) {
+ // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
+ // result is only used in the no-overflow case. It is restricted to cases
+ // where we know that the high-bits of the operands are 0. If there's an
+ // overflow, then the the 9th or 17th bit must be set, which can be checked
+ // using TBNZ.
+ //
+ // Change (for UADDOs on 8 and 16 bits):
+ //
+ // %z0 = G_ASSERT_ZEXT _
+ // %op0 = G_TRUNC %z0
+ // %z1 = G_ASSERT_ZEXT _
+ // %op1 = G_TRUNC %z1
+ // %val, %cond = G_UADDO %op0, %op1
+ // G_BRCOND %cond, %error.bb
+ //
+ // error.bb:
+ // (no successors and no uses of %val)
+ //
+ // To:
+ //
+ // %z0 = G_ASSERT_ZEXT _
+ // %z1 = G_ASSERT_ZEXT _
+ // %add = G_ADD %z0, %z1
+ // %val = G_TRUNC %add
+ // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
+ // %cond = G_ICMP NE, %bit, 0
+ // G_BRCOND %cond, %error.bb
+
+ auto &MRI = *B.getMRI();
+
+ MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
+ MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
+ Register Op0Wide;
+ Register Op1Wide;
+ if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
+ !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
+ return false;
+ LLT WideTy0 = MRI.getType(Op0Wide);
+ LLT WideTy1 = MRI.getType(Op1Wide);
+ Register ResVal = MI.getOperand(0).getReg();
+ LLT OpTy = MRI.getType(ResVal);
+ MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
+ MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
+
+ unsigned OpTySize = OpTy.getScalarSizeInBits();
+ // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
+ // inputs have been zero-extended.
+ if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
+ Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
+ OpTySize != Op0WideDef->getOperand(2).getImm() ||
+ OpTySize != Op1WideDef->getOperand(2).getImm())
+ return false;
+
+ // Only scalar UADDO with either 8 or 16 bit operands are handled.
+ if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
+ OpTySize >= WideTy0.getScalarSizeInBits() ||
+ (OpTySize != 8 && OpTySize != 16))
+ return false;
+
+ // The overflow-status result must be used by a branch only.
+ Register ResStatus = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(ResStatus))
+ return false;
+ MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
+ if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
+ return false;
+
+ // Make sure the computed result is only used in the no-overflow blocks.
+ MachineBasicBlock *CurrentMBB = MI.getParent();
+ MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
+ if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
+ return false;
+ if (any_of(MRI.use_nodbg_instructions(ResVal),
+ [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
+ return &MI != &I &&
+ (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
+ }))
+ return false;
+
+ // Remove G_ADDO.
+ B.setInstrAndDebugLoc(*MI.getNextNode());
+ MI.eraseFromParent();
+
+ // Emit wide add.
+ Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
+ B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
+
+ // Emit check of the 9th or 17th bit and update users (the branch). This will
+ // later be folded to TBNZ.
+ Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
+ B.buildAnd(
+ CondBit, AddDst,
+ B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
+ B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
+ B.buildConstant(LLT::scalar(32), 0));
+
+ // Update ZEXts users of the result value. Because all uses are in the
+ // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
+ B.buildZExtOrTrunc(ResVal, AddDst);
+ for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
+ Register WideReg;
+ if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
+ auto OldR = U.getParent()->getOperand(0).getReg();
+ Observer.erasingInstr(*U.getParent());
+ U.getParent()->eraseFromParent();
+ Helper.replaceRegWith(MRI, OldR, AddDst);
+ }
+ }
+
+ return true;
+}
+
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;
@@ -272,6 +387,8 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
return Helper.tryCombineShuffleVector(MI);
+ case TargetOpcode::G_UADDO:
+ return tryToSimplifyUADDO(MI, B, Helper, Observer);
case TargetOpcode::G_MEMCPY_INLINE:
return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_MEMCPY:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 8c34027f7bb3..40ddf6a94f73 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -13,8 +13,11 @@
#include "AArch64RegisterBankInfo.h"
#include "AArch64InstrInfo.h"
+#include "AArch64RegisterInfo.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -271,6 +274,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AArch64::WSeqPairsClassRegClassID:
case AArch64::XSeqPairsClassRegClassID:
case AArch64::MatrixIndexGPR32_12_15RegClassID:
+ case AArch64::GPR64_with_sub_32_in_MatrixIndexGPR32_12_15RegClassID:
return getRegBank(AArch64::GPRRegBankID);
case AArch64::CCRRegClassID:
return getRegBank(AArch64::CCRegBankID);
@@ -424,6 +428,8 @@ static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FRINT:
case TargetOpcode::G_INTRINSIC_TRUNC:
case TargetOpcode::G_INTRINSIC_ROUND:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM:
return true;
}
return false;
@@ -529,6 +535,8 @@ bool AArch64RegisterBankInfo::onlyUsesFP(const MachineInstr &MI,
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND:
return true;
default:
break;
@@ -747,24 +755,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// for the greedy mode the cost of the cross bank copy will
// offset this number.
// FIXME: Should be derived from the scheduling model.
- if (OpRegBankIdx[0] != PMI_FirstGPR)
+ if (OpRegBankIdx[0] != PMI_FirstGPR) {
Cost = 2;
- else
- // Check if that load feeds fp instructions.
- // In that case, we want the default mapping to be on FPR
- // instead of blind map every scalar to GPR.
- for (const MachineInstr &UseMI :
- MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
- // If we have at least one direct use in a FP instruction,
- // assume this was a floating point load in the IR.
- // If it was not, we would have had a bitcast before
- // reaching that instruction.
- // Int->FP conversion operations are also captured in onlyDefinesFP().
- if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
- OpRegBankIdx[0] = PMI_FirstFPR;
- break;
- }
- }
+ break;
+ }
+
+ if (cast<GLoad>(MI).isAtomic()) {
+ // Atomics always use GPR destinations. Don't refine any further.
+ OpRegBankIdx[0] = PMI_FirstGPR;
+ break;
+ }
+
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ // If we have at least one direct use in a FP instruction,
+ // assume this was a floating point load in the IR. If it was
+ // not, we would have had a bitcast before reaching that
+ // instruction.
+ //
+ // Int->FP conversion operations are also captured in
+ // onlyDefinesFP().
+ return onlyUsesFP(UseMI, MRI, TRI) ||
+ onlyDefinesFP(UseMI, MRI, TRI);
+ }))
+ OpRegBankIdx[0] = PMI_FirstFPR;
break;
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
@@ -957,6 +974,12 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}
break;
}
+ case TargetOpcode::G_LROUND:
+ case TargetOpcode::G_LLROUND: {
+ // Source is always floating point and destination is always integer.
+ OpRegBankIdx = {PMI_FirstGPR, PMI_FirstFPR};
+ break;
+ }
}
// Finally construct the computed mapping.
diff --git a/llvm/lib/Target/AArch64/GISel/select-saddo.mir b/llvm/lib/Target/AArch64/GISel/select-saddo.mir
deleted file mode 100644
index 6f05bd7ac838..000000000000
--- a/llvm/lib/Target/AArch64/GISel/select-saddo.mir
+++ /dev/null
@@ -1,158 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
-
-...
----
-name: saddo_s32
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
-
- ; CHECK-LABEL: name: saddo_s32
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %saddo:gpr32 = ADDSWrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %saddo
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %saddo:gpr(s32), %4:gpr(s1) = G_SADDO %reg0, %reg1
- $w0 = COPY %saddo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_s64
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $x0, $x1, $x2
-
- ; CHECK-LABEL: name: saddo_s64
- ; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: %reg0:gpr64 = COPY $x0
- ; CHECK: %reg1:gpr64 = COPY $x1
- ; CHECK: %saddo:gpr64 = ADDSXrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %saddo
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s64) = COPY $x1
- %saddo:gpr(s64), %4:gpr(s1) = G_SADDO %reg0, %reg1
- $x0 = COPY %saddo(s64)
- RET_ReallyLR implicit $x0
-
-...
----
-name: saddo_s32_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get ADDSWri when we can fold in a constant.
- ;
- ; CHECK-LABEL: name: saddo_s32_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %saddo:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %saddo
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 16
- %saddo:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant
- $w0 = COPY %saddo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_s32_shifted
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get ADDSWrs when we can fold in a shift.
- ;
- ; CHECK-LABEL: name: saddo_s32_shifted
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %add:gpr32 = ADDSWrs %reg0, %reg1, 16, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %constant:gpr(s32) = G_CONSTANT i32 16
- %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32)
- %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %reg0, %shift
- $w0 = COPY %add(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_s32_neg_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get SUBSWri when we can fold in a negative constant.
- ;
- ; CHECK-LABEL: name: saddo_s32_neg_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 -16
- %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant
- $w0 = COPY %add(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: saddo_arith_extended
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $x0
- ; Check that we get ADDSXrx.
- ; CHECK-LABEL: name: saddo_arith_extended
- ; CHECK: liveins: $w0, $x0
- ; CHECK: %reg0:gpr64sp = COPY $x0
- ; CHECK: %reg1:gpr32 = COPY $w0
- ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %add
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s32) = COPY $w0
- %ext:gpr(s64) = G_ZEXT %reg1(s32)
- %cst:gpr(s64) = G_CONSTANT i64 2
- %shift:gpr(s64) = G_SHL %ext, %cst(s64)
- %add:gpr(s64), %flags:gpr(s1) = G_SADDO %reg0, %shift
- $x0 = COPY %add(s64)
- RET_ReallyLR implicit $x0
diff --git a/llvm/lib/Target/AArch64/GISel/select-ssubo.mir b/llvm/lib/Target/AArch64/GISel/select-ssubo.mir
deleted file mode 100644
index f6b1794645f7..000000000000
--- a/llvm/lib/Target/AArch64/GISel/select-ssubo.mir
+++ /dev/null
@@ -1,158 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
-
-...
----
-name: ssubo_s32
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
-
- ; CHECK-LABEL: name: ssubo_s32
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %ssubo:gpr32 = SUBSWrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %ssubo
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %ssubo:gpr(s32), %4:gpr(s1) = G_SSUBO %reg0, %reg1
- $w0 = COPY %ssubo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_s64
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $x0, $x1, $x2
-
- ; CHECK-LABEL: name: ssubo_s64
- ; CHECK: liveins: $x0, $x1, $x2
- ; CHECK: %reg0:gpr64 = COPY $x0
- ; CHECK: %reg1:gpr64 = COPY $x1
- ; CHECK: %ssubo:gpr64 = SUBSXrr %reg0, %reg1, implicit-def $nzcv
- ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %ssubo
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s64) = COPY $x1
- %ssubo:gpr(s64), %4:gpr(s1) = G_SSUBO %reg0, %reg1
- $x0 = COPY %ssubo(s64)
- RET_ReallyLR implicit $x0
-
-...
----
-name: ssubo_s32_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get SUBSWri when we can fold in a constant.
- ;
- ; CHECK-LABEL: name: ssubo_s32_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %ssubo:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %ssubo
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 16
- %ssubo:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant
- $w0 = COPY %ssubo(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_s32_shifted
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get SUBSWrs when we can fold in a shift.
- ;
- ; CHECK-LABEL: name: ssubo_s32_shifted
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %reg0:gpr32 = COPY $w0
- ; CHECK: %reg1:gpr32 = COPY $w1
- ; CHECK: %sub:gpr32 = SUBSWrs %reg0, %reg1, 16, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %sub
- ; CHECK: RET_ReallyLR implicit $w0
- %reg0:gpr(s32) = COPY $w0
- %reg1:gpr(s32) = COPY $w1
- %constant:gpr(s32) = G_CONSTANT i32 16
- %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32)
- %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %reg0, %shift
- $w0 = COPY %sub(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_s32_neg_imm
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $w1, $x2
- ; Check that we get ADDSWri when we can fold in a negative constant.
- ;
- ; CHECK-LABEL: name: ssubo_s32_neg_imm
- ; CHECK: liveins: $w0, $w1, $x2
- ; CHECK: %copy:gpr32sp = COPY $w0
- ; CHECK: %sub:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $w0 = COPY %sub
- ; CHECK: RET_ReallyLR implicit $w0
- %copy:gpr(s32) = COPY $w0
- %constant:gpr(s32) = G_CONSTANT i32 -16
- %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant
- $w0 = COPY %sub(s32)
- RET_ReallyLR implicit $w0
-
-...
----
-name: ssubo_arith_extended
-alignment: 4
-legalized: true
-regBankSelected: true
-tracksRegLiveness: true
-body: |
- bb.1.entry:
- liveins: $w0, $x0
- ; Check that we get SUBSXrx.
- ; CHECK-LABEL: name: ssubo_arith_extended
- ; CHECK: liveins: $w0, $x0
- ; CHECK: %reg0:gpr64sp = COPY $x0
- ; CHECK: %reg1:gpr32 = COPY $w0
- ; CHECK: %sub:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
- ; CHECK: $x0 = COPY %sub
- ; CHECK: RET_ReallyLR implicit $x0
- %reg0:gpr(s64) = COPY $x0
- %reg1:gpr(s32) = COPY $w0
- %ext:gpr(s64) = G_ZEXT %reg1(s32)
- %cst:gpr(s64) = G_CONSTANT i64 2
- %shift:gpr(s64) = G_SHL %ext, %cst(s64)
- %sub:gpr(s64), %flags:gpr(s1) = G_SSUBO %reg0, %shift
- $x0 = COPY %sub(s64)
- RET_ReallyLR implicit $x0
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index c3e74757675b..876526093591 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
#define LLVM_LIB_TARGET_AARCH64_MCTARGETDESC_AARCH64ADDRESSINGMODES_H
+#include "AArch64ExpandImm.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/bit.h"
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 290fe88a8cec..dbb8e85713cb 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -24,9 +24,9 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
namespace {
@@ -92,7 +92,8 @@ public:
const MCAsmLayout &Layout) const override;
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
unsigned getFixupKindContainereSizeInBytes(unsigned Kind) const;
@@ -159,8 +160,11 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
return AdrImmBits(Value & 0x1fffffULL);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
assert(!IsResolved);
- if (TheTriple.isOSBinFormatCOFF())
+ if (TheTriple.isOSBinFormatCOFF()) {
+ if (!isInt<21>(SignedValue))
+ Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
+ }
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
case AArch64::fixup_aarch64_ldr_pcrel_imm19:
case AArch64::fixup_aarch64_pcrel_branch19:
@@ -456,7 +460,8 @@ void AArch64AsmBackend::relaxInstruction(MCInst &Inst,
llvm_unreachable("AArch64AsmBackend::relaxInstruction() unimplemented");
}
-bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool AArch64AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// If the count is not 4-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index cd1bfed9d40d..ee0870d9ef7a 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -1026,11 +1026,11 @@ void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
unsigned Shift =
AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
O << '#' << formatImm(Val);
- if (Shift != 0)
+ if (Shift != 0) {
printShifter(MI, OpNum + 1, STI, O);
-
- if (CommentStream)
- *CommentStream << '=' << formatImm(Val << Shift) << '\n';
+ if (CommentStream)
+ *CommentStream << '=' << formatImm(Val << Shift) << '\n';
+ }
} else {
assert(MO.isExpr() && "Unexpected operand type!");
MO.getExpr()->print(O, &MAI);
@@ -1450,6 +1450,12 @@ void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
+void AArch64InstPrinter::printMatrixIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O << MI->getOperand(OpNum).getImm();
+}
+
void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
unsigned OpNum,
const MCSubtargetInfo &STI,
@@ -1539,6 +1545,28 @@ void AArch64InstPrinter::printBarriernXSOption(const MCInst *MI, unsigned OpNo,
O << "#" << Val;
}
+static bool isValidSysReg(const AArch64SysReg::SysReg *Reg, bool Read,
+ const MCSubtargetInfo &STI) {
+ return (Reg && (Read ? Reg->Readable : Reg->Writeable) &&
+ Reg->haveFeatures(STI.getFeatureBits()));
+}
+
+// Looks up a system register either by encoding or by name. Some system
+// registers share the same encoding between different architectures,
+// therefore a tablegen lookup by encoding will return an entry regardless
+// of the register's predication on a specific subtarget feature. To work
+// around this problem we keep an alternative name for such registers and
+// look them up by that name if the first lookup was unsuccessful.
+static const AArch64SysReg::SysReg *lookupSysReg(unsigned Val, bool Read,
+ const MCSubtargetInfo &STI) {
+ const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
+
+ if (Reg && !isValidSysReg(Reg, Read, STI))
+ Reg = AArch64SysReg::lookupSysRegByName(Reg->AltName);
+
+ return Reg;
+}
+
void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1558,8 +1586,9 @@ void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
return;
}
- const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
- if (Reg && Reg->Readable && Reg->haveFeatures(STI.getFeatureBits()))
+ const AArch64SysReg::SysReg *Reg = lookupSysReg(Val, true /*Read*/, STI);
+
+ if (isValidSysReg(Reg, true /*Read*/, STI))
O << Reg->Name;
else
O << AArch64SysReg::genericRegisterString(Val);
@@ -1584,8 +1613,9 @@ void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
return;
}
- const AArch64SysReg::SysReg *Reg = AArch64SysReg::lookupSysRegByEncoding(Val);
- if (Reg && Reg->Writeable && Reg->haveFeatures(STI.getFeatureBits()))
+ const AArch64SysReg::SysReg *Reg = lookupSysReg(Val, false /*Read*/, STI);
+
+ if (isValidSysReg(Reg, false /*Read*/, STI))
O << Reg->Name;
else
O << AArch64SysReg::genericRegisterString(Val);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 9ec74a1bc7b6..d36fb30a0ce6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -161,6 +161,8 @@ protected:
void printVectorIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printMatrixIndex(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printBarrierOption(const MCInst *MI, unsigned OpNum,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 3c2df1621e11..90688f1a3e83 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -26,9 +26,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -57,7 +57,16 @@ createAArch64MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
CPU = "apple-a12";
}
- return createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ // Most of the NEON instruction set isn't supported in streaming mode on SME
+ // targets, disable NEON unless explicitly requested.
+ bool RequestedNEON = FS.contains("neon");
+ bool RequestedStreamingSVE = FS.contains("streaming-sve");
+ MCSubtargetInfo *STI =
+ createAArch64MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
+ if (RequestedStreamingSVE && !RequestedNEON &&
+ STI->hasFeature(AArch64::FeatureNEON))
+ STI->ToggleFeature(AArch64::FeatureNEON);
+ return STI;
}
void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index c84c313c1db0..941226b83e44 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -62,9 +62,6 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
MCInstPrinter *InstPrint,
bool isVerboseAsm);
-MCTargetStreamer *createAArch64ObjectTargetStreamer(MCStreamer &S,
- const MCSubtargetInfo &STI);
-
namespace AArch64_MC {
void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 557603c24ba5..cf1a60643efd 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -48,11 +48,13 @@ void AArch64TargetStreamer::emitCurrentConstantPool() {
ConstantPools->emitForCurrentSection(Streamer);
}
+void AArch64TargetStreamer::emitConstantPools() {
+ ConstantPools->emitAll(Streamer);
+}
+
// finish() - write out any non-empty assembler constant pools and
// write out note.gnu.properties if need.
void AArch64TargetStreamer::finish() {
- ConstantPools->emitAll(Streamer);
-
if (MarkBTIProperty)
emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index 9b030775094c..86c7baf8f429 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -23,6 +23,7 @@ public:
~AArch64TargetStreamer() override;
void finish() override;
+ void emitConstantPools() override;
/// Callback used to implement the ldr= pseudo.
/// Add a new entry to the constant pool for the current section and return an
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 62089166f4b7..41f2cead4cf8 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -180,26 +180,18 @@ class sme_mem_ld_ss_base<bit Q, bit V, bits<2> msz, dag outs, dag ins,
let mayLoad = 1;
}
-class sme_mem_ld_ss_inst_BHSD<bits<2> msz, string mnemonic,
- MatrixTileVectorOperand tile_ty, bit is_col,
- Operand imm_ty, RegisterOperand gpr_ty>
+class sme_mem_ld_ss_inst<bit Q, bits<2> msz, string mnemonic,
+ MatrixTileVectorOperand tile_ty, bit is_col,
+ Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_ld_ss_base<
- 0b0, is_col, msz, (outs tile_ty:$ZAt),
+ Q, is_col, msz, (outs tile_ty:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn,
gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg/z, [$Rn, $Rm]">;
-class sme_mem_ld_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
- bit is_col>
- : sme_mem_ld_ss_base<
- 0b1, is_col, 0b11, (outs tile_ty:$ZAt),
- (ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn,
- GPR64shifted128:$Rm),
- mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg/z, [$Rn, $Rm]">;
-
-multiclass sme_mem_ss_aliases_BHSD<string mnemonic, Instruction inst,
- MatrixTileVectorOperand tile_ty, Operand imm_ty,
- RegisterOperand gpr_ty,
+multiclass sme_mem_ss_aliases_base<string mnemonic, Instruction inst,
+ MatrixTileVectorOperand tile_ty,
+ Operand imm_ty, RegisterOperand gpr_ty,
string pg_suffix=""> {
def : InstAlias<mnemonic # "\t$ZAt[$Rv, $imm], $Pg" # pg_suffix # ", [$Rn, $Rm]",
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, gpr_ty:$Rm), 0>;
@@ -210,35 +202,23 @@ multiclass sme_mem_ss_aliases_BHSD<string mnemonic, Instruction inst,
(inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
}
-multiclass sme_mem_ss_aliases_Q<string mnemonic, Instruction inst,
- MatrixTileVectorOperand tile_ty,
- string pg_suffix=""> {
- def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn, $Rm]",
- (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, GPR64shifted128:$Rm), 0>;
- // Default XZR offset aliases
- def : InstAlias<mnemonic # "\t\\{$ZAt[$Rv]\\}, $Pg" # pg_suffix # ", [$Rn]",
- (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 2>;
- def : InstAlias<mnemonic # "\t$ZAt[$Rv], $Pg" # pg_suffix # ", [$Rn]",
- (inst tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, GPR64sp:$Rn, XZR), 0>;
-}
-
multiclass sme_mem_ss_aliases<string mnemonic, string inst, bit is_col,
string pg_suffix=""> {
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "b", !cast<Instruction>(inst # _B),
+ defm : sme_mem_ss_aliases_base<mnemonic # "b", !cast<Instruction>(inst # _B),
!if(is_col, TileVectorOpV8, TileVectorOpH8),
- imm0_15, GPR64shifted8, pg_suffix>;
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "h", !cast<Instruction>(inst # _H),
+ sme_elm_idx0_15, GPR64shifted8, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "h", !cast<Instruction>(inst # _H),
!if(is_col, TileVectorOpV16, TileVectorOpH16),
- imm0_7, GPR64shifted16, pg_suffix>;
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "w", !cast<Instruction>(inst # _S),
+ sme_elm_idx0_7, GPR64shifted16, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "w", !cast<Instruction>(inst # _S),
!if(is_col, TileVectorOpV32, TileVectorOpH32),
- imm0_3, GPR64shifted32, pg_suffix>;
- defm : sme_mem_ss_aliases_BHSD<mnemonic # "d", !cast<Instruction>(inst # _D),
+ sme_elm_idx0_3, GPR64shifted32, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "d", !cast<Instruction>(inst # _D),
!if(is_col, TileVectorOpV64, TileVectorOpH64),
- imm0_1, GPR64shifted64, pg_suffix>;
- defm : sme_mem_ss_aliases_Q <mnemonic # "q", !cast<Instruction>(inst # _Q),
+ sme_elm_idx0_1, GPR64shifted64, pg_suffix>;
+ defm : sme_mem_ss_aliases_base<mnemonic # "q", !cast<Instruction>(inst # _Q),
!if(is_col, TileVectorOpV128, TileVectorOpH128),
- pg_suffix>;
+ sme_elm_idx0_0, GPR64shifted128, pg_suffix>;
}
multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
@@ -246,44 +226,39 @@ multiclass sme_mem_ld_ss_aliases<string inst, bit is_col> {
}
multiclass sme_mem_ld_v_ss<string mnemonic, bit is_col> {
- def _B : sme_mem_ld_ss_inst_BHSD<0b00, mnemonic # "b",
- !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, GPR64shifted8> {
+ def _B : sme_mem_ld_ss_inst<0b0, 0b00, mnemonic # "b",
+ !if(is_col, TileVectorOpV8, TileVectorOpH8),
+ is_col, sme_elm_idx0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
- def _H : sme_mem_ld_ss_inst_BHSD<0b01, mnemonic # "h",
- !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, GPR64shifted16> {
+ def _H : sme_mem_ld_ss_inst<0b0, 0b01, mnemonic # "h",
+ !if(is_col, TileVectorOpV16, TileVectorOpH16),
+ is_col, sme_elm_idx0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
- def _S : sme_mem_ld_ss_inst_BHSD<0b10, mnemonic # "w",
- !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, GPR64shifted32> {
+ def _S : sme_mem_ld_ss_inst<0b0, 0b10, mnemonic # "w",
+ !if(is_col, TileVectorOpV32, TileVectorOpH32),
+ is_col, sme_elm_idx0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
- def _D : sme_mem_ld_ss_inst_BHSD<0b11, mnemonic # "d",
- !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, GPR64shifted64> {
+ def _D : sme_mem_ld_ss_inst<0b0, 0b11, mnemonic # "d",
+ !if(is_col, TileVectorOpV64, TileVectorOpH64),
+ is_col, sme_elm_idx0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
- def _Q : sme_mem_ld_ss_inst_Q<mnemonic # "q",
- !if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col> {
+ def _Q : sme_mem_ld_ss_inst<0b1, 0b11, mnemonic # "q",
+ !if(is_col, TileVectorOpV128, TileVectorOpH128),
+ is_col, sme_elm_idx0_0, GPR64shifted128> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
@@ -322,66 +297,53 @@ class sme_mem_st_ss_base<bit Q, bit V, bits<2> msz, dag ins,
let hasSideEffects = 1;
}
-class sme_mem_st_ss_inst_BHSD<bits<2> msz, string mnemonic,
- MatrixTileVectorOperand tile_ty, bit is_col,
- Operand imm_ty, RegisterOperand gpr_ty>
+class sme_mem_st_ss_inst<bit Q, bits<2> msz, string mnemonic,
+ MatrixTileVectorOperand tile_ty, bit is_col,
+ Operand imm_ty, RegisterOperand gpr_ty>
: sme_mem_st_ss_base<
- 0b0, is_col, msz,
+ Q, is_col, msz,
(ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg,
GPR64sp:$Rn, gpr_ty:$Rm),
mnemonic, "\t\\{$ZAt[$Rv, $imm]\\}, $Pg, [$Rn, $Rm]">;
-class sme_mem_st_ss_inst_Q<string mnemonic, MatrixTileVectorOperand tile_ty,
- bit is_col>
- : sme_mem_st_ss_base<
- 0b1, is_col, 0b11,
- (ins tile_ty:$ZAt, MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg,
- GPR64sp:$Rn, GPR64shifted128:$Rm),
- mnemonic, "\t\\{$ZAt[$Rv]\\}, $Pg, [$Rn, $Rm]">;
-
multiclass sme_mem_st_ss_aliases<string inst, bit is_col> {
defm NAME : sme_mem_ss_aliases<"st1", inst, is_col>;
}
multiclass sme_mem_st_v_ss<string mnemonic, bit is_col> {
- def _B : sme_mem_st_ss_inst_BHSD<0b00, mnemonic # "b",
- !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, GPR64shifted8> {
+ def _B : sme_mem_st_ss_inst<0b0, 0b00, mnemonic # "b",
+ !if(is_col, TileVectorOpV8, TileVectorOpH8),
+ is_col, sme_elm_idx0_15, GPR64shifted8> {
bits<4> imm;
let Inst{3-0} = imm;
}
- def _H : sme_mem_st_ss_inst_BHSD<0b01, mnemonic # "h",
- !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, GPR64shifted16> {
+ def _H : sme_mem_st_ss_inst<0b0, 0b01, mnemonic # "h",
+ !if(is_col, TileVectorOpV16, TileVectorOpH16),
+ is_col, sme_elm_idx0_7, GPR64shifted16> {
bits<1> ZAt;
bits<3> imm;
let Inst{3} = ZAt;
let Inst{2-0} = imm;
}
- def _S : sme_mem_st_ss_inst_BHSD<0b10, mnemonic # "w",
- !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, GPR64shifted32> {
+ def _S : sme_mem_st_ss_inst<0b0, 0b10, mnemonic # "w",
+ !if(is_col, TileVectorOpV32, TileVectorOpH32),
+ is_col, sme_elm_idx0_3, GPR64shifted32> {
bits<2> ZAt;
bits<2> imm;
let Inst{3-2} = ZAt;
let Inst{1-0} = imm;
}
- def _D : sme_mem_st_ss_inst_BHSD<0b11, mnemonic # "d",
- !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, GPR64shifted64> {
+ def _D : sme_mem_st_ss_inst<0b0, 0b11, mnemonic # "d",
+ !if(is_col, TileVectorOpV64, TileVectorOpH64),
+ is_col, sme_elm_idx0_1, GPR64shifted64> {
bits<3> ZAt;
bits<1> imm;
let Inst{3-1} = ZAt;
let Inst{0} = imm;
}
- def _Q : sme_mem_st_ss_inst_Q<mnemonic # "q",
- !if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col> {
+ def _Q : sme_mem_st_ss_inst<0b1, 0b11, mnemonic # "q",
+ !if(is_col, TileVectorOpV128, TileVectorOpH128),
+ is_col, sme_elm_idx0_0, GPR64shifted128> {
bits<4> ZAt;
let Inst{3-0} = ZAt;
}
@@ -423,13 +385,13 @@ multiclass sme_spill_fill<bit isStore, dag outs, dag ins, string opcodestr> {
def : InstAlias<opcodestr # "\t$ZAt[$Rv, $imm4], [$Rn]",
(!cast<Instruction>(NAME) MatrixOp:$ZAt,
- MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm4, GPR64sp:$Rn, 0), 1>;
+ MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_15:$imm4, GPR64sp:$Rn, 0), 1>;
}
multiclass sme_spill<string opcodestr> {
defm NAME : sme_spill_fill<0b1, (outs),
(ins MatrixOp:$ZAt, MatrixIndexGPR32Op12_15:$Rv,
- imm0_15:$imm4, GPR64sp:$Rn,
+ sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
}
@@ -437,7 +399,7 @@ multiclass sme_spill<string opcodestr> {
multiclass sme_fill<string opcodestr> {
defm NAME : sme_spill_fill<0b0, (outs MatrixOp:$ZAt),
(ins MatrixIndexGPR32Op12_15:$Rv,
- imm0_15:$imm4, GPR64sp:$Rn,
+ sme_elm_idx0_15:$imm4, GPR64sp:$Rn,
imm0_15:$offset),
opcodestr>;
}
@@ -463,60 +425,54 @@ class sme_vector_to_tile_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
let Inst{4} = 0b0;
}
-class sme_vector_to_tile_inst<bits<2> sz, MatrixTileVectorOperand tile_ty,
+class sme_vector_to_tile_inst<bit Q, bits<2> sz, MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, ZPRRegOp zpr_ty,
string mnemonic>
- : sme_vector_to_tile_base<0b0, is_col, sz, (outs tile_ty:$ZAd),
+ : sme_vector_to_tile_base<Q, is_col, sz, (outs tile_ty:$ZAd),
(ins MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn),
mnemonic, "\t$ZAd[$Rv, $imm], $Pg/m, $Zn">;
-class sme_vector_to_tile_inst_Q<MatrixTileVectorOperand tile_ty,
- bit is_col, string mnemonic>
- : sme_vector_to_tile_base<0b1, is_col, 0b11, (outs tile_ty:$ZAd),
- (ins MatrixIndexGPR32Op12_15:$Rv, PPR3bAny:$Pg, ZPR128:$Zn),
- mnemonic, "\t$ZAd[$Rv], $Pg/m, $Zn">;
-
multiclass sme_vector_to_tile_aliases<Instruction inst,
MatrixTileVectorOperand tile_ty,
ZPRRegOp zpr_ty, Operand imm_ty> {
def : InstAlias<"mov\t$ZAd[$Rv, $imm], $Pg/m, $Zn",
- (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm0_15:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
+ (inst tile_ty:$ZAd, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm, PPR3bAny:$Pg, zpr_ty:$Zn), 1>;
}
multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
- def _B : sme_vector_to_tile_inst<0b00, !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, ZPR8, mnemonic> {
+ def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8,
+ TileVectorOpH8),
+ is_col, sme_elm_idx0_15, ZPR8, mnemonic> {
bits<4> imm;
let Inst{3-0} = imm;
}
- def _H : sme_vector_to_tile_inst<0b01, !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, ZPR16, mnemonic> {
+ def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16,
+ TileVectorOpH16),
+ is_col, sme_elm_idx0_7, ZPR16, mnemonic> {
bits<1> ZAd;
bits<3> imm;
let Inst{3} = ZAd;
let Inst{2-0} = imm;
}
- def _S : sme_vector_to_tile_inst<0b10, !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, ZPR32, mnemonic> {
+ def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32,
+ TileVectorOpH32),
+ is_col, sme_elm_idx0_3, ZPR32, mnemonic> {
bits<2> ZAd;
bits<2> imm;
let Inst{3-2} = ZAd;
let Inst{1-0} = imm;
}
- def _D : sme_vector_to_tile_inst<0b11, !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, ZPR64, mnemonic> {
+ def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64,
+ TileVectorOpH64),
+ is_col, sme_elm_idx0_1, ZPR64, mnemonic> {
bits<3> ZAd;
bits<1> imm;
let Inst{3-1} = ZAd;
let Inst{0} = imm;
}
- def _Q : sme_vector_to_tile_inst_Q<!if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col, mnemonic> {
+ def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128,
+ TileVectorOpH128),
+ is_col, sme_elm_idx0_0, ZPR128, mnemonic> {
bits<4> ZAd;
bits<1> imm;
let Inst{3-0} = ZAd;
@@ -525,26 +481,23 @@ multiclass sme_vector_v_to_tile<string mnemonic, bit is_col> {
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _B),
!if(is_col, TileVectorOpV8,
TileVectorOpH8),
- ZPR8, imm0_15>;
+ ZPR8, sme_elm_idx0_15>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _H),
!if(is_col, TileVectorOpV16,
TileVectorOpH16),
- ZPR16, imm0_7>;
+ ZPR16, sme_elm_idx0_7>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _S),
!if(is_col, TileVectorOpV32,
TileVectorOpH32),
- ZPR32, imm0_3>;
+ ZPR32, sme_elm_idx0_3>;
defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _D),
!if(is_col, TileVectorOpV64,
TileVectorOpH64),
- ZPR64, imm0_1>;
-
- def : InstAlias<"mov\t$ZAd[$Rv], $Pg/m, $Zn",
- (!cast<Instruction>(NAME # _Q) !if(is_col,
- TileVectorOpV128,
- TileVectorOpH128):$ZAd,
- MatrixIndexGPR32Op12_15:$Rv,
- PPR3bAny:$Pg, ZPR128:$Zn), 1>;
+ ZPR64, sme_elm_idx0_1>;
+ defm : sme_vector_to_tile_aliases<!cast<Instruction>(NAME # _Q),
+ !if(is_col, TileVectorOpV128,
+ TileVectorOpH128),
+ ZPR128, sme_elm_idx0_0>;
}
multiclass sme_vector_to_tile<string mnemonic> {
@@ -569,19 +522,13 @@ class sme_tile_to_vector_base<bit Q, bit V, bits<2> sz, dag outs, dag ins,
let Inst{4-0} = Zd;
}
-class sme_tile_to_vector_inst<bits<2> sz, ZPRRegOp zpr_ty,
+class sme_tile_to_vector_inst<bit Q, bits<2> sz, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
bit is_col, Operand imm_ty, string mnemonic>
- : sme_tile_to_vector_base<0b0, is_col, sz, (outs zpr_ty:$Zd),
+ : sme_tile_to_vector_base<Q, is_col, sz, (outs zpr_ty:$Zd),
(ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv, $imm]">;
-class sme_tile_to_vector_inst_Q<MatrixTileVectorOperand tile_ty,
- bit is_col, string mnemonic>
- : sme_tile_to_vector_base<0b1, is_col, 0b11, (outs ZPR128:$Zd),
- (ins PPR3bAny:$Pg, tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rv),
- mnemonic, "\t$Zd, $Pg/m, $ZAn[$Rv]">;
-
multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
MatrixTileVectorOperand tile_ty,
Operand imm_ty > {
@@ -590,62 +537,58 @@ multiclass sme_tile_to_vector_aliases<Instruction inst, ZPRRegOp zpr_ty,
}
multiclass sme_tile_to_vector_v<string mnemonic, bit is_col> {
- def _B : sme_tile_to_vector_inst<0b00, ZPR8, !if(is_col, TileVectorOpV8,
- TileVectorOpH8),
- is_col, imm0_15, mnemonic> {
+ def _B : sme_tile_to_vector_inst<0b0, 0b00, ZPR8, !if(is_col, TileVectorOpV8,
+ TileVectorOpH8),
+ is_col, sme_elm_idx0_15, mnemonic> {
bits<4> imm;
let Inst{8-5} = imm;
}
- def _H : sme_tile_to_vector_inst<0b01, ZPR16, !if(is_col, TileVectorOpV16,
- TileVectorOpH16),
- is_col, imm0_7, mnemonic> {
+ def _H : sme_tile_to_vector_inst<0b0, 0b01, ZPR16, !if(is_col, TileVectorOpV16,
+ TileVectorOpH16),
+ is_col, sme_elm_idx0_7, mnemonic> {
bits<1> ZAn;
bits<3> imm;
let Inst{8} = ZAn;
let Inst{7-5} = imm;
}
- def _S : sme_tile_to_vector_inst<0b10, ZPR32, !if(is_col, TileVectorOpV32,
- TileVectorOpH32),
- is_col, imm0_3, mnemonic> {
+ def _S : sme_tile_to_vector_inst<0b0, 0b10, ZPR32, !if(is_col, TileVectorOpV32,
+ TileVectorOpH32),
+ is_col, sme_elm_idx0_3, mnemonic> {
bits<2> ZAn;
bits<2> imm;
let Inst{8-7} = ZAn;
let Inst{6-5} = imm;
}
- def _D : sme_tile_to_vector_inst<0b11, ZPR64, !if(is_col, TileVectorOpV64,
- TileVectorOpH64),
- is_col, imm0_1, mnemonic> {
+ def _D : sme_tile_to_vector_inst<0b0, 0b11, ZPR64, !if(is_col, TileVectorOpV64,
+ TileVectorOpH64),
+ is_col, sme_elm_idx0_1, mnemonic> {
bits<3> ZAn;
bits<1> imm;
let Inst{8-6} = ZAn;
let Inst{5} = imm;
}
- def _Q : sme_tile_to_vector_inst_Q<!if(is_col, TileVectorOpV128,
- TileVectorOpH128),
- is_col, mnemonic> {
+ def _Q : sme_tile_to_vector_inst<0b1, 0b11, ZPR128, !if(is_col, TileVectorOpV128,
+ TileVectorOpH128),
+ is_col, sme_elm_idx0_0, mnemonic> {
bits<4> ZAn;
let Inst{8-5} = ZAn;
}
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _B), ZPR8,
!if(is_col, TileVectorOpV8,
- TileVectorOpH8), imm0_15>;
+ TileVectorOpH8), sme_elm_idx0_15>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _H), ZPR16,
!if(is_col, TileVectorOpV16,
- TileVectorOpH16), imm0_7>;
+ TileVectorOpH16), sme_elm_idx0_7>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _S), ZPR32,
!if(is_col, TileVectorOpV32,
- TileVectorOpH32), imm0_3>;
+ TileVectorOpH32), sme_elm_idx0_3>;
defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _D), ZPR64,
!if(is_col, TileVectorOpV64,
- TileVectorOpH64), imm0_1>;
-
- def : InstAlias<"mov\t$Zd, $Pg/m, $ZAn[$Rv]",
- (!cast<Instruction>(NAME # _Q) ZPR128:$Zd, PPR3bAny:$Pg,
- !if(is_col,
- TileVectorOpV128,
- TileVectorOpH128):$ZAn,
- MatrixIndexGPR32Op12_15:$Rv), 1>;
+ TileVectorOpH64), sme_elm_idx0_1>;
+ defm : sme_tile_to_vector_aliases<!cast<Instruction>(NAME # _Q), ZPR128,
+ !if(is_col, TileVectorOpV128,
+ TileVectorOpH128), sme_elm_idx0_0>;
}
multiclass sme_tile_to_vector<string mnemonic> {
@@ -736,57 +679,48 @@ multiclass sve2_clamp<string asm, bit U> {
def _D : sve2_clamp<asm, 0b11, U, ZPR64>;
}
-class sve2_int_perm_dup_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
- : I<(outs ppr_ty:$Pd), (ins PPRAny:$Pg, ppr_ty:$Pn,
- MatrixIndexGPR32Op12_15:$Rm, imm_ty:$imm),
- asm, "\t$Pd, $Pg/z, $Pn[$Rm, $imm]", "", []>,
+class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
+ : I<(outs PPRAny:$Pd), (ins PPRAny:$Pn, ppr_ty:$Pm,
+ MatrixIndexGPR32Op12_15:$Rv, imm_ty:$imm),
+ asm, "\t$Pd, $Pn, $Pm[$Rv, $imm]", "", []>,
Sched<[]> {
- bits<2> Rm;
- bits<4> Pg;
+ bits<2> Rv;
bits<4> Pn;
+ bits<4> Pm;
bits<4> Pd;
let Inst{31-24} = 0b00100101;
let Inst{21} = 0b1;
- let Inst{17-16} = Rm;
+ let Inst{17-16} = Rv;
let Inst{15-14} = 0b01;
- let Inst{13-10} = Pg;
+ let Inst{13-10} = Pn;
let Inst{9} = 0b0;
- let Inst{8-5} = Pn;
+ let Inst{8-5} = Pm;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
}
-multiclass sve2_int_perm_dup_p<string asm> {
- def _B : sve2_int_perm_dup_p<asm, PPR8, imm0_15> {
+multiclass sve2_int_perm_sel_p<string asm> {
+ def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
bits<4> imm;
let Inst{23-22} = imm{3-2};
let Inst{20-19} = imm{1-0};
let Inst{18} = 0b1;
}
- def _H : sve2_int_perm_dup_p<asm, PPR16, imm0_7> {
+ def _H : sve2_int_perm_sel_p<asm, PPR16, sme_elm_idx0_7> {
bits<3> imm;
let Inst{23-22} = imm{2-1};
let Inst{20} = imm{0};
let Inst{19-18} = 0b10;
}
- def _S : sve2_int_perm_dup_p<asm, PPR32, imm0_3> {
+ def _S : sve2_int_perm_sel_p<asm, PPR32, sme_elm_idx0_3> {
bits<2> imm;
let Inst{23-22} = imm{1-0};
let Inst{20-18} = 0b100;
}
- def _D : sve2_int_perm_dup_p<asm, PPR64, imm0_1> {
+ def _D : sve2_int_perm_sel_p<asm, PPR64, sme_elm_idx0_1> {
bits<1> imm;
let Inst{23} = imm;
let Inst{22} = 0b1;
let Inst{20-18} = 0b000;
}
-
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _B) PPR8:$Pd, PPRAny:$Pg, PPR8:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _H) PPR16:$Pd, PPRAny:$Pg, PPR16:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _S) PPR32:$Pd, PPRAny:$Pg, PPR32:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
- def : InstAlias<"dup\t$Pd, $Pg/z, $Pn[$Rm]",
- (!cast<Instruction>(NAME # _D) PPR64:$Pd, PPRAny:$Pg, PPR64:$Pn, MatrixIndexGPR32Op12_15:$Rm, 0), 1>;
}
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 02d3a765a802..010ffa1502de 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -264,16 +264,22 @@ def sve_cnt_mul_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, false>">;
def sve_cnt_shl_imm : ComplexPattern<i32, 1, "SelectCntImm<1, 16, 1, true>">;
-def sve_ext_imm_0_1 : ComplexPattern<i32, 1, "SelectEXTImm<1, 8>">;
-def sve_ext_imm_0_3 : ComplexPattern<i32, 1, "SelectEXTImm<3, 4>">;
-def sve_ext_imm_0_7 : ComplexPattern<i32, 1, "SelectEXTImm<7, 2>">;
-def sve_ext_imm_0_15 : ComplexPattern<i32, 1, "SelectEXTImm<15, 1>">;
+def sve_ext_imm_0_31 : ComplexPattern<i32, 1, "SelectEXTImm<31, 8>">;
+def sve_ext_imm_0_63 : ComplexPattern<i32, 1, "SelectEXTImm<63, 4>">;
+def sve_ext_imm_0_127 : ComplexPattern<i32, 1, "SelectEXTImm<127, 2>">;
+def sve_ext_imm_0_255 : ComplexPattern<i32, 1, "SelectEXTImm<255, 1>">;
def int_aarch64_sve_cntp_oneuse : PatFrag<(ops node:$pred, node:$src2),
(int_aarch64_sve_cntp node:$pred, node:$src2), [{
return N->hasOneUse();
}]>;
+def step_vector_oneuse : PatFrag<(ops node:$idx),
+ (step_vector node:$idx), [{
+ return N->hasOneUse();
+}]>;
+
+
//===----------------------------------------------------------------------===//
// SVE PTrue - These are used extensively throughout the pattern matching so
// it's important we define them first.
@@ -321,7 +327,7 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
-let Predicates = [HasSVE] in {
+let Predicates = [HasSVEorStreamingSVE] in {
defm PTRUE : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
defm PTRUES : sve_int_ptrue<0b001, "ptrues", null_frag>;
}
@@ -484,6 +490,21 @@ class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))),
(inst $Rn, i32:$imm)>;
+class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ FPImmLeaf immL, int imm,
+ Instruction inst>
+: Pat<(vt (op (pt PPR_3b:$Pg), (vt ZPR:$Zs1), (vt (AArch64dup (it immL))))),
+ (inst $Pg, $Zs1, imm)>;
+
+class SVE_2_Op_Fp_Imm_Pat_Zero<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ FPImmLeaf immL, int imm,
+ Instruction inst>
+: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Zs1, (SVEDup0)),
+ (vt (AArch64dup (it immL))))),
+ (inst $Pg, $Zs1, imm)>;
+
//
// Pseudo -> Instruction mappings
//
@@ -621,6 +642,8 @@ class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
let Constraints = "$Pdn = $_Pdn";
let Defs = [NZCV];
+ let isPTestLike = 1;
+ let ElementSize = pprty.ElementSize;
}
multiclass sve_int_pfirst<bits<5> opc, string asm, SDPatternOperator op> {
@@ -912,13 +935,43 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
let Constraints = "$Rdn = $_Rdn";
}
-multiclass sve_int_pred_pattern_a<bits<3> opc, string asm> {
- def NAME : sve_int_pred_pattern_a<opc, asm>;
+multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
+ SDPatternOperator op,
+ SDPatternOperator opcnt> {
+ let Predicates = [HasSVEorStreamingSVE] in {
+ def NAME : sve_int_pred_pattern_a<opc, asm>;
+
+ def : InstAlias<asm # "\t$Rdn, $pattern",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
+ def : InstAlias<asm # "\t$Rdn",
+ (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+ }
- def : InstAlias<asm # "\t$Rdn, $pattern",
- (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1), 1>;
- def : InstAlias<asm # "\t$Rdn",
- (!cast<Instruction>(NAME) GPR64:$Rdn, 0b11111, 1), 2>;
+ let Predicates = [HasSVEorStreamingSVE, UseScalarIncVL] in {
+ def : Pat<(i64 (op GPR64:$Rdn, (opcnt sve_pred_enum:$pattern))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, 1)>;
+
+ def : Pat<(i64 (op GPR64:$Rdn, (mul (opcnt sve_pred_enum:$pattern), (sve_cnt_mul_imm i32:$imm)))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+ def : Pat<(i64 (op GPR64:$Rdn, (shl (opcnt sve_pred_enum:$pattern), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ (!cast<Instruction>(NAME) GPR64:$Rdn, sve_pred_enum:$pattern, $imm)>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (i32 (trunc (opcnt (sve_pred_enum:$pattern)))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, 1),
+ sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (mul (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (sve_cnt_mul_imm i32:$imm)))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+ sub_32))>;
+
+ def : Pat<(i32 (op GPR32:$Rdn, (shl (i32 (trunc (opcnt (sve_pred_enum:$pattern)))), (i64 (sve_cnt_shl_imm i32:$imm))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME) (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ GPR32:$Rdn, sub_32), sve_pred_enum:$pattern, $imm),
+ sub_32))>;
+ }
}
class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
@@ -1270,10 +1323,15 @@ multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv4i32, op, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv2i64, op, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv2f16, op, nxv2f16, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv4f16, op, nxv4f16, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv8f16, op, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv2f32, op, nxv2f32, !cast<Instruction>(NAME # _D)>;
def : SVE_1_Op_Pat<nxv4f32, op, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv2f64, op, nxv2f64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv2bf16, op, nxv2bf16, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv4bf16, op, nxv4bf16, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
@@ -1707,10 +1765,19 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, Operand imm_ty> {
- def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>;
- def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>;
- def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>;
+multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
+ let DestructiveInstType = DestructiveBinaryImm in {
+ def _H : SVEPseudo2Instr<Ps # _H, 1>, sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>;
+ def _S : SVEPseudo2Instr<Ps # _S, 1>, sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>;
+ def _D : SVEPseudo2Instr<Ps # _D, 1>, sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>;
+ }
+
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_D")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_D")>;
}
class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
@@ -1775,7 +1842,7 @@ multiclass sve_fp_2op_p_zds_zeroing_hsd<SDPatternOperator op> {
}
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
-: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, imm32_0_7:$imm3),
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, zprty:$Zm, timm32_0_7:$imm3),
asm, "\t$Zdn, $_Zdn, $Zm, $imm3",
"",
[]>, Sched<[]> {
@@ -1800,12 +1867,46 @@ multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
def _S : sve_fp_ftmad<0b10, asm, ZPR32>;
def _D : sve_fp_ftmad<0b11, asm, ZPR64>;
- def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 imm32_0_7:$imm))),
- (!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, imm32_0_7:$imm)>;
- def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 imm32_0_7:$imm))),
- (!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, imm32_0_7:$imm)>;
- def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 imm32_0_7:$imm))),
- (!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, imm32_0_7:$imm)>;
+ def : Pat<(nxv8f16 (op (nxv8f16 ZPR16:$Zn), (nxv8f16 ZPR16:$Zm), (i32 timm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _H) ZPR16:$Zn, ZPR16:$Zm, timm32_0_7:$imm)>;
+ def : Pat<(nxv4f32 (op (nxv4f32 ZPR32:$Zn), (nxv4f32 ZPR32:$Zm), (i32 timm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _S) ZPR32:$Zn, ZPR32:$Zm, timm32_0_7:$imm)>;
+ def : Pat<(nxv2f64 (op (nxv2f64 ZPR64:$Zn), (nxv2f64 ZPR64:$Zm), (i32 timm32_0_7:$imm))),
+ (!cast<Instruction>(NAME # _D) ZPR64:$Zn, ZPR64:$Zm, timm32_0_7:$imm)>;
+}
+
+multiclass sve_fp_2op_i_p_zds_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator ir_op = null_frag> {
+ def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesUndef>;
+ def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesUndef>;
+
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_UNDEF_D")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_UNDEF_D")>;
+}
+
+multiclass sve_fp_2op_i_p_zds_zeroing_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
+ def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesZero>;
+ def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesZero>;
+ def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesZero>;
+
+ let AddedComplexity = 2 in {
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_ZERO_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_ZERO_H")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_ZERO_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_ZERO_S")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_ZERO_D")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_ZERO_D")>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -1938,7 +2039,7 @@ multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op,
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_fp_3op_p_zds_zx<SDPatternOperator op, SDPatternOperator rev_op> {
+multiclass sve_fp_3op_p_zds_zx {
def _UNDEF_H : PredThreeOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
def _UNDEF_S : PredThreeOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
def _UNDEF_D : PredThreeOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
@@ -2433,7 +2534,7 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = DestructiveOther;
+ let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = Sz;
}
@@ -2482,9 +2583,12 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
}
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
- def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
- def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
- def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
+ def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>,
+ SVEPseudo2Instr<NAME # _H, 1>;
+ def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>,
+ SVEPseudo2Instr<NAME # _S, 1>;
+ def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>,
+ SVEPseudo2Instr<NAME # _D, 1>;
def : SVE_1_Op_Passthru_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
@@ -2492,6 +2596,17 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+ def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _UNDEF_H)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _UNDEF_S)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _UNDEF_D)>;
}
multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
@@ -4986,7 +5101,7 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ii<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse> {
+multiclass sve_int_index_ii<string asm> {
def _B : sve_int_index_ii<0b00, asm, ZPR8, simm5_8b>;
def _H : sve_int_index_ii<0b01, asm, ZPR16, simm5_16b>;
def _S : sve_int_index_ii<0b10, asm, ZPR32, simm5_32b>;
@@ -5029,7 +5144,7 @@ class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ir<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
+multiclass sve_int_index_ir<string asm, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
def _B : sve_int_index_ir<0b00, asm, ZPR8, GPR32, simm5_8b>;
def _H : sve_int_index_ir<0b01, asm, ZPR16, GPR32, simm5_16b>;
def _S : sve_int_index_ir<0b10, asm, ZPR32, GPR32, simm5_32b>;
@@ -5096,7 +5211,7 @@ class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_ri<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse> {
+multiclass sve_int_index_ri<string asm> {
def _B : sve_int_index_ri<0b00, asm, ZPR8, GPR32, simm5_8b>;
def _H : sve_int_index_ri<0b01, asm, ZPR16, GPR32, simm5_16b>;
def _S : sve_int_index_ri<0b10, asm, ZPR32, GPR32, simm5_32b>;
@@ -5130,7 +5245,7 @@ class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{4-0} = Zd;
}
-multiclass sve_int_index_rr<string asm, SDPatternOperator step_vector, SDPatternOperator step_vector_oneuse, SDPatternOperator mulop> {
+multiclass sve_int_index_rr<string asm, SDPatternOperator mulop> {
def _B : sve_int_index_rr<0b00, asm, ZPR8, GPR32>;
def _H : sve_int_index_rr<0b01, asm, ZPR16, GPR32>;
def _S : sve_int_index_rr<0b10, asm, ZPR32, GPR32>;
@@ -8333,3 +8448,4 @@ multiclass sve_int_bin_pred_all_active_bhsd<SDPatternOperator op> {
def : SVE_2_Op_Pred_All_Active_Pt<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
def : SVE_2_Op_Pred_All_Active_Pt<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
}
+
diff --git a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
index 79dcca8f8458..e72dccdc4b78 100644
--- a/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
+++ b/llvm/lib/Target/AArch64/SVEIntrinsicOpts.cpp
@@ -1,9 +1,8 @@
//===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -59,6 +58,10 @@ private:
bool coalescePTrueIntrinsicCalls(BasicBlock &BB,
SmallSetVector<IntrinsicInst *, 4> &PTrues);
bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions);
+ bool optimizePredicateStore(Instruction *I);
+ bool optimizePredicateLoad(Instruction *I);
+
+ bool optimizeInstructions(SmallSetVector<Function *, 4> &Functions);
/// Operates at the function-scope. I.e., optimizations are applied local to
/// the functions themselves.
@@ -276,11 +279,166 @@ bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls(
return Changed;
}
+// This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
+// scalable stores as late as possible
+bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) {
+ auto *F = I->getFunction();
+ auto Attr = F->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return false;
+
+ unsigned MinVScale, MaxVScale;
+ std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ // The transform needs to know the exact runtime length of scalable vectors
+ if (MinVScale != MaxVScale || MinVScale == 0)
+ return false;
+
+ auto *PredType =
+ ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
+ auto *FixedPredType =
+ FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
+
+ // If we have a store..
+ auto *Store = dyn_cast<StoreInst>(I);
+ if (!Store || !Store->isSimple())
+ return false;
+
+ // ..that is storing a predicate vector sized worth of bits..
+ if (Store->getOperand(0)->getType() != FixedPredType)
+ return false;
+
+ // ..where the value stored comes from a vector extract..
+ auto *IntrI = dyn_cast<IntrinsicInst>(Store->getOperand(0));
+ if (!IntrI ||
+ IntrI->getIntrinsicID() != Intrinsic::experimental_vector_extract)
+ return false;
+
+ // ..that is extracting from index 0..
+ if (!cast<ConstantInt>(IntrI->getOperand(1))->isZero())
+ return false;
+
+ // ..where the value being extract from comes from a bitcast
+ auto *BitCast = dyn_cast<BitCastInst>(IntrI->getOperand(0));
+ if (!BitCast)
+ return false;
+
+ // ..and the bitcast is casting from predicate type
+ if (BitCast->getOperand(0)->getType() != PredType)
+ return false;
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(I);
+
+ auto *PtrBitCast = Builder.CreateBitCast(
+ Store->getPointerOperand(),
+ PredType->getPointerTo(Store->getPointerAddressSpace()));
+ Builder.CreateStore(BitCast->getOperand(0), PtrBitCast);
+
+ Store->eraseFromParent();
+ if (IntrI->getNumUses() == 0)
+ IntrI->eraseFromParent();
+ if (BitCast->getNumUses() == 0)
+ BitCast->eraseFromParent();
+
+ return true;
+}
+
+// This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce
+// scalable loads as late as possible
+bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) {
+ auto *F = I->getFunction();
+ auto Attr = F->getFnAttribute(Attribute::VScaleRange);
+ if (!Attr.isValid())
+ return false;
+
+ unsigned MinVScale, MaxVScale;
+ std::tie(MinVScale, MaxVScale) = Attr.getVScaleRangeArgs();
+ // The transform needs to know the exact runtime length of scalable vectors
+ if (MinVScale != MaxVScale || MinVScale == 0)
+ return false;
+
+ auto *PredType =
+ ScalableVectorType::get(Type::getInt1Ty(I->getContext()), 16);
+ auto *FixedPredType =
+ FixedVectorType::get(Type::getInt8Ty(I->getContext()), MinVScale * 2);
+
+ // If we have a bitcast..
+ auto *BitCast = dyn_cast<BitCastInst>(I);
+ if (!BitCast || BitCast->getType() != PredType)
+ return false;
+
+ // ..whose operand is a vector_insert..
+ auto *IntrI = dyn_cast<IntrinsicInst>(BitCast->getOperand(0));
+ if (!IntrI ||
+ IntrI->getIntrinsicID() != Intrinsic::experimental_vector_insert)
+ return false;
+
+ // ..that is inserting into index zero of an undef vector..
+ if (!isa<UndefValue>(IntrI->getOperand(0)) ||
+ !cast<ConstantInt>(IntrI->getOperand(2))->isZero())
+ return false;
+
+ // ..where the value inserted comes from a load..
+ auto *Load = dyn_cast<LoadInst>(IntrI->getOperand(1));
+ if (!Load || !Load->isSimple())
+ return false;
+
+ // ..that is loading a predicate vector sized worth of bits..
+ if (Load->getType() != FixedPredType)
+ return false;
+
+ IRBuilder<> Builder(I->getContext());
+ Builder.SetInsertPoint(Load);
+
+ auto *PtrBitCast = Builder.CreateBitCast(
+ Load->getPointerOperand(),
+ PredType->getPointerTo(Load->getPointerAddressSpace()));
+ auto *LoadPred = Builder.CreateLoad(PredType, PtrBitCast);
+
+ BitCast->replaceAllUsesWith(LoadPred);
+ BitCast->eraseFromParent();
+ if (IntrI->getNumUses() == 0)
+ IntrI->eraseFromParent();
+ if (Load->getNumUses() == 0)
+ Load->eraseFromParent();
+
+ return true;
+}
+
+bool SVEIntrinsicOpts::optimizeInstructions(
+ SmallSetVector<Function *, 4> &Functions) {
+ bool Changed = false;
+
+ for (auto *F : Functions) {
+ DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree();
+
+ // Traverse the DT with an rpo walk so we see defs before uses, allowing
+ // simplification to be done incrementally.
+ BasicBlock *Root = DT->getRoot();
+ ReversePostOrderTraversal<BasicBlock *> RPOT(Root);
+ for (auto *BB : RPOT) {
+ for (Instruction &I : make_early_inc_range(*BB)) {
+ switch (I.getOpcode()) {
+ case Instruction::Store:
+ Changed |= optimizePredicateStore(&I);
+ break;
+ case Instruction::BitCast:
+ Changed |= optimizePredicateLoad(&I);
+ break;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
bool SVEIntrinsicOpts::optimizeFunctions(
SmallSetVector<Function *, 4> &Functions) {
bool Changed = false;
Changed |= optimizePTrueIntrinsicCalls(Functions);
+ Changed |= optimizeInstructions(Functions);
return Changed;
}
@@ -297,6 +455,8 @@ bool SVEIntrinsicOpts::runOnModule(Module &M) {
continue;
switch (F.getIntrinsicID()) {
+ case Intrinsic::experimental_vector_extract:
+ case Intrinsic::experimental_vector_insert:
case Intrinsic::aarch64_sve_ptrue:
for (User *U : F.users())
Functions.insert(cast<Instruction>(U)->getFunction());
diff --git a/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index a6796742117b..52c88fd0218d 100644
--- a/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/AArch64TargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheAArch64leTarget() {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index ce6866154242..caee2acd2606 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -106,6 +106,25 @@ inline static unsigned getXRegFromWReg(unsigned Reg) {
return Reg;
}
+inline static unsigned getXRegFromXRegTuple(unsigned RegTuple) {
+ switch (RegTuple) {
+ case AArch64::X0_X1_X2_X3_X4_X5_X6_X7: return AArch64::X0;
+ case AArch64::X2_X3_X4_X5_X6_X7_X8_X9: return AArch64::X2;
+ case AArch64::X4_X5_X6_X7_X8_X9_X10_X11: return AArch64::X4;
+ case AArch64::X6_X7_X8_X9_X10_X11_X12_X13: return AArch64::X6;
+ case AArch64::X8_X9_X10_X11_X12_X13_X14_X15: return AArch64::X8;
+ case AArch64::X10_X11_X12_X13_X14_X15_X16_X17: return AArch64::X10;
+ case AArch64::X12_X13_X14_X15_X16_X17_X18_X19: return AArch64::X12;
+ case AArch64::X14_X15_X16_X17_X18_X19_X20_X21: return AArch64::X14;
+ case AArch64::X16_X17_X18_X19_X20_X21_X22_X23: return AArch64::X16;
+ case AArch64::X18_X19_X20_X21_X22_X23_X24_X25: return AArch64::X18;
+ case AArch64::X20_X21_X22_X23_X24_X25_X26_X27: return AArch64::X20;
+ case AArch64::X22_X23_X24_X25_X26_X27_X28_FP: return AArch64::X22;
+ }
+ // For anything else, return it unchanged.
+ return RegTuple;
+}
+
static inline unsigned getBRegFromDReg(unsigned Reg) {
switch (Reg) {
case AArch64::D0: return AArch64::B0;
@@ -435,6 +454,60 @@ namespace AArch64SVEPredPattern {
#include "AArch64GenSystemOperands.inc"
}
+/// Return the number of active elements for VL1 to VL256 predicate pattern,
+/// zero for all other patterns.
+inline unsigned getNumElementsFromSVEPredPattern(unsigned Pattern) {
+ switch (Pattern) {
+ default:
+ return 0;
+ case AArch64SVEPredPattern::vl1:
+ case AArch64SVEPredPattern::vl2:
+ case AArch64SVEPredPattern::vl3:
+ case AArch64SVEPredPattern::vl4:
+ case AArch64SVEPredPattern::vl5:
+ case AArch64SVEPredPattern::vl6:
+ case AArch64SVEPredPattern::vl7:
+ case AArch64SVEPredPattern::vl8:
+ return Pattern;
+ case AArch64SVEPredPattern::vl16:
+ return 16;
+ case AArch64SVEPredPattern::vl32:
+ return 32;
+ case AArch64SVEPredPattern::vl64:
+ return 64;
+ case AArch64SVEPredPattern::vl128:
+ return 128;
+ case AArch64SVEPredPattern::vl256:
+ return 256;
+ }
+}
+
+/// Return specific VL predicate pattern based on the number of elements.
+inline unsigned getSVEPredPatternFromNumElements(unsigned MinNumElts) {
+ switch (MinNumElts) {
+ default:
+ llvm_unreachable("unexpected element count for SVE predicate");
+ case 1:
+ return AArch64SVEPredPattern::vl1;
+ case 2:
+ return AArch64SVEPredPattern::vl2;
+ case 4:
+ return AArch64SVEPredPattern::vl4;
+ case 8:
+ return AArch64SVEPredPattern::vl8;
+ case 16:
+ return AArch64SVEPredPattern::vl16;
+ case 32:
+ return AArch64SVEPredPattern::vl32;
+ case 64:
+ return AArch64SVEPredPattern::vl64;
+ case 128:
+ return AArch64SVEPredPattern::vl128;
+ case 256:
+ return AArch64SVEPredPattern::vl256;
+ }
+}
+
namespace AArch64ExactFPImm {
struct ExactFPImm {
const char *Name;
@@ -552,6 +625,7 @@ AArch64StringToVectorLayout(StringRef LayoutStr) {
namespace AArch64SysReg {
struct SysReg {
const char *Name;
+ const char *AltName;
unsigned Encoding;
bool Readable;
bool Writeable;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index ca088e63e03c..958e8c9e5bc5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -15,17 +15,7 @@
namespace llvm {
-class FunctionPass;
-class GCNTargetMachine;
-class ImmutablePass;
-class MachineFunctionPass;
-class ModulePass;
-class Pass;
-class Target;
class TargetMachine;
-class TargetOptions;
-class PassRegistry;
-class Module;
// GlobalISel passes
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
@@ -35,16 +25,6 @@ FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
void initializeAMDGPURegBankCombinerPass(PassRegistry &);
-// R600 Passes
-FunctionPass *createR600VectorRegMerger();
-FunctionPass *createR600ExpandSpecialInstrsPass();
-FunctionPass *createR600EmitClauseMarkers();
-FunctionPass *createR600ClauseMergePass();
-FunctionPass *createR600Packetizer();
-FunctionPass *createR600ControlFlowFinalizer();
-FunctionPass *createAMDGPUCFGStructurizerPass();
-FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
-
// SI Passes
FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowPass();
@@ -114,10 +94,23 @@ ModulePass *createAMDGPUFixFunctionBitcastsPass();
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
extern char &AMDGPUFixFunctionBitcastsID;
+ModulePass *createAMDGPUCtorDtorLoweringPass();
+void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &);
+extern char &AMDGPUCtorDtorLoweringID;
+
FunctionPass *createAMDGPULowerKernelArgumentsPass();
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
extern char &AMDGPULowerKernelArgumentsID;
+FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
+void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
+extern char &AMDGPUPromoteKernelArgumentsID;
+
+struct AMDGPUPromoteKernelArgumentsPass
+ : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
ModulePass *createAMDGPULowerKernelAttributesPass();
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
extern char &AMDGPULowerKernelAttributesID;
@@ -172,21 +165,6 @@ extern char &AMDGPURewriteOutArgumentsID;
void initializeGCNDPPCombinePass(PassRegistry &);
extern char &GCNDPPCombineID;
-void initializeR600ClauseMergePassPass(PassRegistry &);
-extern char &R600ClauseMergePassID;
-
-void initializeR600ControlFlowFinalizerPass(PassRegistry &);
-extern char &R600ControlFlowFinalizerID;
-
-void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
-extern char &R600ExpandSpecialInstrsPassID;
-
-void initializeR600VectorRegMergerPass(PassRegistry &);
-extern char &R600VectorRegMergerID;
-
-void initializeR600PacketizerPass(PassRegistry &);
-extern char &R600PacketizerID;
-
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
@@ -278,7 +256,6 @@ private:
bool GlobalOpt;
};
-ModulePass *createR600OpenCLImageTypeLoweringPass();
FunctionPass *createAMDGPUAnnotateUniformValues();
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -390,9 +367,9 @@ namespace AMDGPUAS {
BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
- /// Address space for direct addressible parameter memory (CONST0).
+ /// Address space for direct addressable parameter memory (CONST0).
PARAM_D_ADDRESS = 6,
- /// Address space for indirect addressible parameter memory (VTX1).
+ /// Address space for indirect addressable parameter memory (VTX1).
PARAM_I_ADDRESS = 7,
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7991f3d2a6b2..e606f0e8fc3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -18,7 +18,6 @@ def p4 : PtrValueType<i64, 4>;
def p5 : PtrValueType<i32, 5>;
def p6 : PtrValueType<i32, 6>;
-
class BoolToList<bit Value> {
list<int> ret = !if(Value, [1]<int>, []<int>);
}
@@ -416,7 +415,7 @@ def FeatureDPP : SubtargetFeature<"dpp",
"Support DPP (Data Parallel Primitives) extension"
>;
-// DPP8 allows arbitrary cross-lane swizzling withing groups of 8 lanes.
+// DPP8 allows arbitrary cross-lane swizzling within groups of 8 lanes.
def FeatureDPP8 : SubtargetFeature<"dpp8",
"HasDPP8",
"true",
@@ -1351,7 +1350,7 @@ def NotLDSRequiresM0Init : Predicate<"!Subtarget->ldsRequiresM0Init()">;
def HasDSAddTid : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
-def HasLDSFPAtomics : Predicate<"Subtarget->hasLDSFPAtomics()">,
+def HasLDSFPAtomicAdd : Predicate<"Subtarget->hasLDSFPAtomicAdd()">,
AssemblerPredicate<(all_of FeatureGFX8Insts)>;
def HasAddNoCarryInsts : Predicate<"Subtarget->hasAddNoCarry()">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 88b88a04a7d1..dd3eb3849eac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUAliasAnalysis.h"
+#include "AMDGPU.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Instructions.h"
@@ -37,6 +38,10 @@ ImmutablePass *llvm::createAMDGPUExternalAAWrapperPass() {
return new AMDGPUExternalAAWrapper();
}
+AMDGPUAAWrapperPass::AMDGPUAAWrapperPass() : ImmutablePass(ID) {
+ initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
@@ -94,7 +99,7 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
getUnderlyingObject(A.Ptr->stripPointerCastsForAliasAnalysis());
if (const LoadInst *LI = dyn_cast<LoadInst>(ObjA)) {
// If a generic pointer is loaded from the constant address space, it
- // could only be a GLOBAL or CONSTANT one as that address space is soley
+ // could only be a GLOBAL or CONSTANT one as that address space is solely
// prepared on the host side, where only GLOBAL or CONSTANT variables are
// visible. Note that this even holds for regular functions.
if (LI->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
index 44de40d4aa7f..22be014813b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h
@@ -12,13 +12,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUALIASANALYSIS_H
-#include "AMDGPU.h"
#include "llvm/Analysis/AliasAnalysis.h"
namespace llvm {
class DataLayout;
-class MDNode;
class MemoryLocation;
/// A simple AA result that uses TBAA metadata to answer queries.
@@ -67,9 +65,7 @@ class AMDGPUAAWrapperPass : public ImmutablePass {
public:
static char ID;
- AMDGPUAAWrapperPass() : ImmutablePass(ID) {
- initializeAMDGPUAAWrapperPassPass(*PassRegistry::getPassRegistry());
- }
+ AMDGPUAAWrapperPass();
AMDGPUAAResult &getResult() { return *Result; }
const AMDGPUAAResult &getResult() const { return *Result; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 2af9fc955875..2e24e9f929d2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -15,6 +15,7 @@
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -70,7 +71,7 @@ recursivelyVisitUsers(GlobalValue &GV,
// and just let us hit the error when we can't handle this.
//
// Unfortunately, clang adds noinline to all functions at -O0. We have
- // to override this here. until that's fixed.
+ // to override this here until that's fixed.
F->removeFnAttr(Attribute::NoInline);
FuncsToAlwaysInline.insert(F);
@@ -90,9 +91,13 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
SmallPtrSet<Function *, 8> FuncsToNoInline;
+ Triple TT(M.getTargetTriple());
for (GlobalAlias &A : M.aliases()) {
if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+ if (TT.getArch() == Triple::amdgcn &&
+ A.getLinkage() != GlobalValue::InternalLinkage)
+ continue;
A.replaceAllUsesWith(F);
AliasesToRemove.push_back(&A);
}
@@ -122,7 +127,7 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
unsigned AS = GV.getAddressSpace();
if ((AS == AMDGPUAS::REGION_ADDRESS) ||
(AS == AMDGPUAS::LOCAL_ADDRESS &&
- !AMDGPUTargetMachine::EnableLowerModuleLDS))
+ (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer())))
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index af6dfc07eb50..52791dfd9d93 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -6,8 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file This pass adds target attributes to functions which use intrinsics
-/// which will impact calling convention lowering.
+/// \file This pass propagates the uniform-work-group-size attribute from
+/// kernels to leaf functions when possible. It also adds additional attributes
+/// to hint ABI lowering optimizations later.
//
//===----------------------------------------------------------------------===//
@@ -25,22 +26,11 @@
using namespace llvm;
namespace {
-static constexpr StringLiteral ImplicitAttrNames[] = {
- // X ids unnecessarily propagated to kernels.
- "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
- "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
- "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
- "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
- "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
-
class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
private:
const TargetMachine *TM = nullptr;
- SmallVector<CallGraphNode*, 8> NodeList;
bool addFeatureAttributes(Function &F);
- bool processUniformWorkGroupAttribute();
- bool propagateUniformWorkGroupAttribute(Function &Caller, Function &Callee);
public:
static char ID;
@@ -58,12 +48,6 @@ public:
AU.setPreservesAll();
CallGraphSCCPass::getAnalysisUsage(AU);
}
-
- static bool visitConstantExpr(const ConstantExpr *CE);
- static bool visitConstantExprsRecursively(
- const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited, bool IsFunc,
- bool HasApertureRegs);
};
} // end anonymous namespace
@@ -75,212 +59,11 @@ char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
"Add AMDGPU function attributes", false, false)
-
-// The queue ptr is only needed when casting to flat, not from it.
-static bool castRequiresQueuePtr(unsigned SrcAS) {
- return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
-}
-
-static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) {
- return castRequiresQueuePtr(ASC->getSrcAddressSpace());
-}
-
-static bool isDSAddress(const Constant *C) {
- const GlobalValue *GV = dyn_cast<GlobalValue>(C);
- if (!GV)
- return false;
- unsigned AS = GV->getAddressSpace();
- return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
-}
-
-bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) {
- if (CE->getOpcode() == Instruction::AddrSpaceCast) {
- unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
- return castRequiresQueuePtr(SrcAS);
- }
-
- return false;
-}
-
-bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively(
- const Constant *EntryC,
- SmallPtrSet<const Constant *, 8> &ConstantExprVisited,
- bool IsFunc, bool HasApertureRegs) {
-
- if (!ConstantExprVisited.insert(EntryC).second)
- return false;
-
- SmallVector<const Constant *, 16> Stack;
- Stack.push_back(EntryC);
-
- while (!Stack.empty()) {
- const Constant *C = Stack.pop_back_val();
-
- // We need to trap on DS globals in non-entry functions.
- if (IsFunc && isDSAddress(C))
- return true;
-
- // Check this constant expression.
- if (const auto *CE = dyn_cast<ConstantExpr>(C)) {
- if (!HasApertureRegs && visitConstantExpr(CE))
- return true;
- }
-
- // Visit all sub-expressions.
- for (const Use &U : C->operands()) {
- const auto *OpC = dyn_cast<Constant>(U);
- if (!OpC)
- continue;
-
- if (!ConstantExprVisited.insert(OpC).second)
- continue;
-
- Stack.push_back(OpC);
- }
- }
-
- return false;
-}
-
-// We do not need to note the x workitem or workgroup id because they are always
-// initialized.
-//
-// TODO: We should not add the attributes if the known compile time workgroup
-// size is 1 for y/z.
-static StringRef intrinsicToAttrName(Intrinsic::ID ID,
- bool &NonKernelOnly,
- bool &IsQueuePtr) {
- switch (ID) {
- case Intrinsic::amdgcn_workitem_id_x:
- NonKernelOnly = true;
- return "amdgpu-work-item-id-x";
- case Intrinsic::amdgcn_workgroup_id_x:
- NonKernelOnly = true;
- return "amdgpu-work-group-id-x";
- case Intrinsic::amdgcn_workitem_id_y:
- case Intrinsic::r600_read_tidig_y:
- return "amdgpu-work-item-id-y";
- case Intrinsic::amdgcn_workitem_id_z:
- case Intrinsic::r600_read_tidig_z:
- return "amdgpu-work-item-id-z";
- case Intrinsic::amdgcn_workgroup_id_y:
- case Intrinsic::r600_read_tgid_y:
- return "amdgpu-work-group-id-y";
- case Intrinsic::amdgcn_workgroup_id_z:
- case Intrinsic::r600_read_tgid_z:
- return "amdgpu-work-group-id-z";
- case Intrinsic::amdgcn_dispatch_ptr:
- return "amdgpu-dispatch-ptr";
- case Intrinsic::amdgcn_dispatch_id:
- return "amdgpu-dispatch-id";
- case Intrinsic::amdgcn_kernarg_segment_ptr:
- return "amdgpu-kernarg-segment-ptr";
- case Intrinsic::amdgcn_implicitarg_ptr:
- return "amdgpu-implicitarg-ptr";
- case Intrinsic::amdgcn_queue_ptr:
- case Intrinsic::amdgcn_is_shared:
- case Intrinsic::amdgcn_is_private:
- // TODO: Does not require queue ptr on gfx9+
- case Intrinsic::trap:
- case Intrinsic::debugtrap:
- IsQueuePtr = true;
- return "amdgpu-queue-ptr";
- default:
- return "";
- }
-}
-
-static bool handleAttr(Function &Parent, const Function &Callee,
- StringRef Name) {
- if (Callee.hasFnAttribute(Name)) {
- Parent.addFnAttr(Name);
- return true;
- }
- return false;
-}
-
-static void copyFeaturesToFunction(Function &Parent, const Function &Callee,
- bool &NeedQueuePtr) {
- if (handleAttr(Parent, Callee, "amdgpu-queue-ptr"))
- NeedQueuePtr = true;
-
- for (StringRef AttrName : ImplicitAttrNames)
- handleAttr(Parent, Callee, AttrName);
-}
-
-bool AMDGPUAnnotateKernelFeatures::processUniformWorkGroupAttribute() {
- bool Changed = false;
-
- for (auto *Node : reverse(NodeList)) {
- Function *Caller = Node->getFunction();
-
- for (auto I : *Node) {
- Function *Callee = std::get<1>(I)->getFunction();
- if (Callee)
- Changed = propagateUniformWorkGroupAttribute(*Caller, *Callee);
- }
- }
-
- return Changed;
-}
-
-bool AMDGPUAnnotateKernelFeatures::propagateUniformWorkGroupAttribute(
- Function &Caller, Function &Callee) {
-
- // Check for externally defined function
- if (!Callee.hasExactDefinition()) {
- Callee.addFnAttr("uniform-work-group-size", "false");
- if (!Caller.hasFnAttribute("uniform-work-group-size"))
- Caller.addFnAttr("uniform-work-group-size", "false");
-
- return true;
- }
- // Check if the Caller has the attribute
- if (Caller.hasFnAttribute("uniform-work-group-size")) {
- // Check if the value of the attribute is true
- if (Caller.getFnAttribute("uniform-work-group-size")
- .getValueAsString().equals("true")) {
- // Propagate the attribute to the Callee, if it does not have it
- if (!Callee.hasFnAttribute("uniform-work-group-size")) {
- Callee.addFnAttr("uniform-work-group-size", "true");
- return true;
- }
- } else {
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- } else {
- // If the attribute is absent, set it as false
- Caller.addFnAttr("uniform-work-group-size", "false");
- Callee.addFnAttr("uniform-work-group-size", "false");
- return true;
- }
- return false;
-}
-
bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
- const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(F);
- bool HasApertureRegs = ST.hasApertureRegs();
- SmallPtrSet<const Constant *, 8> ConstantExprVisited;
-
bool HaveStackObjects = false;
bool Changed = false;
- bool NeedQueuePtr = false;
bool HaveCall = false;
- bool HasIndirectCall = false;
bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
- CallingConv::ID CC = F.getCallingConv();
- bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
-
- // If this function hasAddressTaken() = true
- // then add all attributes corresponding to the implicit args.
- if (CallingConvSupportsAllImplicits &&
- F.hasAddressTaken(nullptr, true, true, true)) {
- for (StringRef AttrName : ImplicitAttrNames) {
- F.addFnAttr(AttrName);
- }
- Changed = true;
- }
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
@@ -293,65 +76,23 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
const Function *Callee =
dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
- // Note the occurence of indirect call.
+ // Note the occurrence of indirect call.
if (!Callee) {
- if (!CB->isInlineAsm()) {
- HasIndirectCall = true;
+ if (!CB->isInlineAsm())
HaveCall = true;
- }
+
continue;
}
Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID == Intrinsic::not_intrinsic) {
HaveCall = true;
- copyFeaturesToFunction(F, *Callee, NeedQueuePtr);
Changed = true;
- } else {
- bool NonKernelOnly = false;
-
- if (!IsFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
- F.addFnAttr("amdgpu-kernarg-segment-ptr");
- } else {
- StringRef AttrName = intrinsicToAttrName(IID, NonKernelOnly,
- NeedQueuePtr);
- if (!AttrName.empty() && (IsFunc || !NonKernelOnly)) {
- F.addFnAttr(AttrName);
- Changed = true;
- }
- }
- }
- }
-
- if (NeedQueuePtr || (!IsFunc && HasApertureRegs))
- continue;
-
- if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (!HasApertureRegs && castRequiresQueuePtr(ASC)) {
- NeedQueuePtr = true;
- continue;
- }
- }
-
- for (const Use &U : I.operands()) {
- const auto *OpC = dyn_cast<Constant>(U);
- if (!OpC)
- continue;
-
- if (visitConstantExprsRecursively(OpC, ConstantExprVisited, IsFunc,
- HasApertureRegs)) {
- NeedQueuePtr = true;
- break;
}
}
}
}
- if (NeedQueuePtr) {
- F.addFnAttr("amdgpu-queue-ptr");
- Changed = true;
- }
-
// TODO: We could refine this to captured pointers that could possibly be
// accessed by flat instructions. For now this is mostly a poor way of
// estimating whether there are calls before argument lowering.
@@ -365,28 +106,6 @@ bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
Changed = true;
}
- // This pass cannot copy attributes from callees to callers
- // if there is an indirect call and in thus such cases,
- // hasAddressTaken() would be false for kernels and functions
- // making an indirect call (if they are themselves not indirectly called).
- // We must tag all such kernels/functions with all implicits attributes
- // for correctness.
- // e.g.
- // 1. Kernel K1 makes an indirect call to function F1.
- // Without detecting an indirect call in K1, this pass will not
- // add all implicit args to K1 (which is incorrect).
- // 2. Kernel K1 makes direct call to F1 which makes indirect call to function
- // F2.
- // Without detecting an indirect call in F1 (whose hasAddressTaken() is
- // false), the pass will not add all implicit args to F1 (which is
- // essential for correctness).
- if (CallingConvSupportsAllImplicits && HasIndirectCall) {
- for (StringRef AttrName : ImplicitAttrNames) {
- F.addFnAttr(AttrName);
- }
- Changed = true;
- }
-
return Changed;
}
@@ -394,14 +113,6 @@ bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false;
for (CallGraphNode *I : SCC) {
- // Build a list of CallGraphNodes from most number of uses to least
- if (I->getNumReferences())
- NodeList.push_back(I);
- else {
- processUniformWorkGroupAttribute();
- NodeList.clear();
- }
-
Function *F = I->getFunction();
// Ignore functions with graphics calling conventions, these are currently
// not allowed to have kernel arguments.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index cbc4ab212566..bb2e723f4ab0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -32,8 +32,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -678,7 +678,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
GCNSubtarget::MaxWaveScratchSize / STM.getWavefrontSize();
if (ProgInfo.ScratchSize > MaxScratchPerWorkitem) {
DiagnosticInfoStackSize DiagStackSize(MF.getFunction(),
- ProgInfo.ScratchSize, DS_Error);
+ ProgInfo.ScratchSize,
+ MaxScratchPerWorkitem, DS_Error);
MF.getFunction().getContext().diagnose(DiagStackSize);
}
@@ -697,11 +698,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
// This can happen due to a compiler bug or when using inline asm.
LLVMContext &Ctx = MF.getFunction().getContext();
- DiagnosticInfoResourceLimit Diag(MF.getFunction(),
- "addressable scalar registers",
- ProgInfo.NumSGPR, DS_Error,
- DK_ResourceLimit,
- MaxAddressableNumSGPRs);
+ DiagnosticInfoResourceLimit Diag(
+ MF.getFunction(), "addressable scalar registers", ProgInfo.NumSGPR,
+ MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);
Ctx.diagnose(Diag);
ProgInfo.NumSGPR = MaxAddressableNumSGPRs - 1;
}
@@ -717,18 +716,72 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
if (isShader(F.getCallingConv())) {
+ bool IsPixelShader =
+ F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();
+
+ // Calculate the number of VGPR registers based on the SPI input registers
+ uint32_t InputEna = 0;
+ uint32_t InputAddr = 0;
+ unsigned LastEna = 0;
+
+ if (IsPixelShader) {
+ // Note for IsPixelShader:
+ // By this stage, all enabled inputs are tagged in InputAddr as well.
+ // We will use InputAddr to determine whether the input counts against the
+ // vgpr total and only use the InputEnable to determine the last input
+ // that is relevant - if extra arguments are used, then we have to honour
+ // the InputAddr for any intermediate non-enabled inputs.
+ InputEna = MFI->getPSInputEnable();
+ InputAddr = MFI->getPSInputAddr();
+
+ // We only need to consider input args up to the last used arg.
+ assert((InputEna || InputAddr) &&
+ "PSInputAddr and PSInputEnable should "
+ "never both be 0 for AMDGPU_PS shaders");
+ // There are some rare circumstances where InputAddr is non-zero and
+ // InputEna can be set to 0. In this case we default to setting LastEna
+ // to 1.
+ LastEna = InputEna ? findLastSet(InputEna) + 1 : 1;
+ }
+
// FIXME: We should be using the number of registers determined during
// calling convention lowering to legalize the types.
const DataLayout &DL = F.getParent()->getDataLayout();
+ unsigned PSArgCount = 0;
+ unsigned IntermediateVGPR = 0;
for (auto &Arg : F.args()) {
unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
- if (Arg.hasAttribute(Attribute::InReg))
+ if (Arg.hasAttribute(Attribute::InReg)) {
WaveDispatchNumSGPR += NumRegs;
- else
- WaveDispatchNumVGPR += NumRegs;
+ } else {
+ // If this is a PS shader and we're processing the PS Input args (first
+ // 16 VGPR), use the InputEna and InputAddr bits to define how many
+ // VGPRs are actually used.
+ // Any extra VGPR arguments are handled as normal arguments (and
+ // contribute to the VGPR count whether they're used or not).
+ if (IsPixelShader && PSArgCount < 16) {
+ if ((1 << PSArgCount) & InputAddr) {
+ if (PSArgCount < LastEna)
+ WaveDispatchNumVGPR += NumRegs;
+ else
+ IntermediateVGPR += NumRegs;
+ }
+ PSArgCount++;
+ } else {
+ // If there are extra arguments we have to include the allocation for
+ // the non-used (but enabled with InputAddr) input arguments
+ if (IntermediateVGPR) {
+ WaveDispatchNumVGPR += IntermediateVGPR;
+ IntermediateVGPR = 0;
+ }
+ WaveDispatchNumVGPR += NumRegs;
+ }
+ }
}
ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
- ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
+ ProgInfo.NumArchVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
+ ProgInfo.NumVGPR =
+ Info.getTotalNumVGPRs(STM, Info.NumAGPR, ProgInfo.NumArchVGPR);
}
// Adjust number of registers used to meet default/requested minimum/maximum
@@ -745,11 +798,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// This can happen due to a compiler bug or when using inline asm to use
// the registers which are usually reserved for vcc etc.
LLVMContext &Ctx = MF.getFunction().getContext();
- DiagnosticInfoResourceLimit Diag(MF.getFunction(),
- "scalar registers",
- ProgInfo.NumSGPR, DS_Error,
- DK_ResourceLimit,
- MaxAddressableNumSGPRs);
+ DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",
+ ProgInfo.NumSGPR, MaxAddressableNumSGPRs,
+ DS_Error, DK_ResourceLimit);
Ctx.diagnose(Diag);
ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
@@ -766,14 +817,16 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {
LLVMContext &Ctx = MF.getFunction().getContext();
DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",
- MFI->getNumUserSGPRs(), DS_Error);
+ MFI->getNumUserSGPRs(),
+ STM.getMaxNumUserSGPRs(), DS_Error);
Ctx.diagnose(Diag);
}
if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
LLVMContext &Ctx = MF.getFunction().getContext();
DiagnosticInfoResourceLimit Diag(MF.getFunction(), "local memory",
- MFI->getLDSSize(), DS_Error);
+ MFI->getLDSSize(),
+ STM.getLocalMemorySize(), DS_Error);
Ctx.diagnose(Diag);
}
@@ -1039,6 +1092,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
// kernarg_segment_alignment is specified as log of the alignment.
// The minimum alignment is 16.
+ // FIXME: The metadata treats the minimum as 4?
Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index d3a555bc228f..d5c60aa3be7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -26,7 +26,6 @@ struct AMDGPUResourceUsageAnalysis;
class AMDGPUTargetStreamer;
class MCCodeEmitter;
class MCOperand;
-class GCNSubtarget;
namespace AMDGPU {
namespace HSAMD {
@@ -55,9 +54,6 @@ private:
void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
void getAmdKernelCode(amd_kernel_code_t &Out, const SIProgramInfo &KernelInfo,
const MachineFunction &MF) const;
- void findNumUsedRegistersSI(const MachineFunction &MF,
- unsigned &NumSGPR,
- unsigned &NumVGPR) const;
/// Emit register usage information so that the GPU driver
/// can correctly setup the GPU state.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 3e9fdcb1618e..1e2cf3890d0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -541,7 +541,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
if (NeedResult)
ExclScan = buildShiftRight(B, NewV, Identity);
- // Read the value from the last lane, which has accumlated the values of
+ // Read the value from the last lane, which has accumulated the values of
// each active lane in the wavefront. This will be our new value which we
// will provide to the atomic operation.
Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 61b1d22edc33..f0aadab3302f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -22,48 +22,71 @@
using namespace llvm;
-static constexpr StringLiteral ImplicitAttrNames[] = {
- // X ids unnecessarily propagated to kernels.
- "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
- "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
- "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
- "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
- "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
+enum ImplicitArgumentMask {
+ NOT_IMPLICIT_INPUT = 0,
+
+ // SGPRs
+ DISPATCH_PTR = 1 << 0,
+ QUEUE_PTR = 1 << 1,
+ DISPATCH_ID = 1 << 2,
+ IMPLICIT_ARG_PTR = 1 << 3,
+ WORKGROUP_ID_X = 1 << 4,
+ WORKGROUP_ID_Y = 1 << 5,
+ WORKGROUP_ID_Z = 1 << 6,
+
+ // VGPRS:
+ WORKITEM_ID_X = 1 << 7,
+ WORKITEM_ID_Y = 1 << 8,
+ WORKITEM_ID_Z = 1 << 9,
+ ALL_ARGUMENT_MASK = (1 << 10) - 1
+};
+
+static constexpr std::pair<ImplicitArgumentMask,
+ StringLiteral> ImplicitAttrs[] = {
+ {DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
+ {QUEUE_PTR, "amdgpu-no-queue-ptr"},
+ {DISPATCH_ID, "amdgpu-no-dispatch-id"},
+ {IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
+ {WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
+ {WORKGROUP_ID_Y, "amdgpu-no-workgroup-id-y"},
+ {WORKGROUP_ID_Z, "amdgpu-no-workgroup-id-z"},
+ {WORKITEM_ID_X, "amdgpu-no-workitem-id-x"},
+ {WORKITEM_ID_Y, "amdgpu-no-workitem-id-y"},
+ {WORKITEM_ID_Z, "amdgpu-no-workitem-id-z"}
+};
// We do not need to note the x workitem or workgroup id because they are always
// initialized.
//
// TODO: We should not add the attributes if the known compile time workgroup
// size is 1 for y/z.
-static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
- bool &IsQueuePtr) {
+static ImplicitArgumentMask
+intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &IsQueuePtr) {
switch (ID) {
case Intrinsic::amdgcn_workitem_id_x:
NonKernelOnly = true;
- return "amdgpu-work-item-id-x";
+ return WORKITEM_ID_X;
case Intrinsic::amdgcn_workgroup_id_x:
NonKernelOnly = true;
- return "amdgpu-work-group-id-x";
+ return WORKGROUP_ID_X;
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::r600_read_tidig_y:
- return "amdgpu-work-item-id-y";
+ return WORKITEM_ID_Y;
case Intrinsic::amdgcn_workitem_id_z:
case Intrinsic::r600_read_tidig_z:
- return "amdgpu-work-item-id-z";
+ return WORKITEM_ID_Z;
case Intrinsic::amdgcn_workgroup_id_y:
case Intrinsic::r600_read_tgid_y:
- return "amdgpu-work-group-id-y";
+ return WORKGROUP_ID_Y;
case Intrinsic::amdgcn_workgroup_id_z:
case Intrinsic::r600_read_tgid_z:
- return "amdgpu-work-group-id-z";
+ return WORKGROUP_ID_Z;
case Intrinsic::amdgcn_dispatch_ptr:
- return "amdgpu-dispatch-ptr";
+ return DISPATCH_PTR;
case Intrinsic::amdgcn_dispatch_id:
- return "amdgpu-dispatch-id";
- case Intrinsic::amdgcn_kernarg_segment_ptr:
- return "amdgpu-kernarg-segment-ptr";
+ return DISPATCH_ID;
case Intrinsic::amdgcn_implicitarg_ptr:
- return "amdgpu-implicitarg-ptr";
+ return IMPLICIT_ARG_PTR;
case Intrinsic::amdgcn_queue_ptr:
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:
@@ -71,9 +94,9 @@ static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
case Intrinsic::trap:
case Intrinsic::debugtrap:
IsQueuePtr = true;
- return "amdgpu-queue-ptr";
+ return QUEUE_PTR;
default:
- return "";
+ return NOT_IMPLICIT_INPUT;
}
}
@@ -89,6 +112,7 @@ static bool isDSAddress(const Constant *C) {
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
}
+namespace {
class AMDGPUInformationCache : public InformationCache {
public:
AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
@@ -105,6 +129,17 @@ public:
return ST.hasApertureRegs();
}
+ std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getFlatWorkGroupSizes(F);
+ }
+
+ std::pair<unsigned, unsigned>
+ getMaximumFlatWorkGroupRange(const Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
+ }
+
private:
/// Check if the ConstantExpr \p CE requires queue ptr attribute.
static bool visitConstExpr(const ConstantExpr *CE) {
@@ -163,8 +198,11 @@ private:
DenseMap<const Constant *, uint8_t> ConstantStatus;
};
-struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
- using Base = StateWrapper<BooleanState, AbstractAttribute>;
+struct AAAMDAttributes : public StateWrapper<
+ BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>, AbstractAttribute> {
+ using Base = StateWrapper<BitIntegerState<uint16_t, ALL_ARGUMENT_MASK, 0>,
+ AbstractAttribute>;
+
AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
@@ -183,24 +221,24 @@ struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
return (AA->getIdAddr() == &ID);
}
- virtual const DenseSet<StringRef> &getAttributes() const = 0;
-
/// Unique ID (due to the unique address)
static const char ID;
};
const char AAAMDAttributes::ID = 0;
-struct AAAMDWorkGroupSize
+struct AAUniformWorkGroupSize
: public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
- AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
/// Create an abstract attribute view for the position \p IRP.
- static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
- Attributor &A);
+ static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,
+ Attributor &A);
/// See AbstractAttribute::getName().
- const std::string getName() const override { return "AAAMDWorkGroupSize"; }
+ const std::string getName() const override {
+ return "AAUniformWorkGroupSize";
+ }
/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }
@@ -214,11 +252,11 @@ struct AAAMDWorkGroupSize
/// Unique ID (due to the unique address)
static const char ID;
};
-const char AAAMDWorkGroupSize::ID = 0;
+const char AAUniformWorkGroupSize::ID = 0;
-struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
- AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
- : AAAMDWorkGroupSize(IRP, A) {}
+struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
+ AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
+ : AAUniformWorkGroupSize(IRP, A) {}
void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
@@ -244,10 +282,10 @@ struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
auto CheckCallSite = [&](AbstractCallSite CS) {
Function *Caller = CS.getInstruction()->getFunction();
- LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
+ LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
<< "->" << getAssociatedFunction()->getName() << "\n");
- const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
+ const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
Change = Change | clampStateAndIndicateChange(this->getState(),
@@ -286,11 +324,13 @@ struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
void trackStatistics() const override {}
};
-AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
- Attributor &A) {
+AAUniformWorkGroupSize &
+AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,
+ Attributor &A) {
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
- return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
- llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
+ return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);
+ llvm_unreachable(
+ "AAUniformWorkGroupSize is only valid for function position");
}
struct AAAMDAttributesFunction : public AAAMDAttributes {
@@ -299,14 +339,13 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
void initialize(Attributor &A) override {
Function *F = getAssociatedFunction();
- CallingConv::ID CC = F->getCallingConv();
- bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
+ for (auto Attr : ImplicitAttrs) {
+ if (F->hasFnAttribute(Attr.second))
+ addKnownBits(Attr.first);
+ }
- // Don't add attributes to instrinsics
- if (F->isIntrinsic()) {
- indicatePessimisticFixpoint();
+ if (F->isDeclaration())
return;
- }
// Ignore functions with graphics calling conventions, these are currently
// not allowed to have kernel arguments.
@@ -314,94 +353,47 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
indicatePessimisticFixpoint();
return;
}
-
- for (StringRef Attr : ImplicitAttrNames) {
- if (F->hasFnAttribute(Attr))
- Attributes.insert(Attr);
- }
-
- // TODO: We shouldn't need this in the future.
- if (CallingConvSupportsAllImplicits &&
- F->hasAddressTaken(nullptr, true, true, true)) {
- for (StringRef AttrName : ImplicitAttrNames) {
- Attributes.insert(AttrName);
- }
- }
}
ChangeStatus updateImpl(Attributor &A) override {
Function *F = getAssociatedFunction();
- ChangeStatus Change = ChangeStatus::UNCHANGED;
- bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
- CallingConv::ID CC = F->getCallingConv();
- bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
- auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
-
- auto AddAttribute = [&](StringRef AttrName) {
- if (Attributes.insert(AttrName).second)
- Change = ChangeStatus::CHANGED;
- };
+ // The current assumed state used to determine a change.
+ auto OrigAssumed = getAssumed();
// Check for Intrinsics and propagate attributes.
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
*this, this->getIRPosition(), DepClassTy::REQUIRED);
+ if (AAEdges.hasNonAsmUnknownCallee())
+ return indicatePessimisticFixpoint();
- // We have to assume that we can reach a function with these attributes.
- // We do not consider inline assembly as a unknown callee.
- if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
- for (StringRef AttrName : ImplicitAttrNames) {
- AddAttribute(AttrName);
- }
- }
+ bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
bool NeedsQueuePtr = false;
- bool HasCall = false;
+
for (Function *Callee : AAEdges.getOptimisticEdges()) {
Intrinsic::ID IID = Callee->getIntrinsicID();
- if (IID != Intrinsic::not_intrinsic) {
- if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
- AddAttribute("amdgpu-kernarg-segment-ptr");
- continue;
- }
-
- bool NonKernelOnly = false;
- StringRef AttrName =
- intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
-
- if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
- AddAttribute(AttrName);
-
+ if (IID == Intrinsic::not_intrinsic) {
+ const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
+ *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
+ *this &= AAAMD;
continue;
}
- HasCall = true;
- const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
- *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
- const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
- // Propagate implicit attributes from called function.
- for (StringRef AttrName : ImplicitAttrNames)
- if (CalleeAttributes.count(AttrName))
- AddAttribute(AttrName);
+ bool NonKernelOnly = false;
+ ImplicitArgumentMask AttrMask =
+ intrinsicToAttrMask(IID, NonKernelOnly, NeedsQueuePtr);
+ if (AttrMask != NOT_IMPLICIT_INPUT) {
+ if ((IsNonEntryFunc || !NonKernelOnly))
+ removeAssumedBits(AttrMask);
+ }
}
- HasCall |= AAEdges.hasUnknownCallee();
- if (!IsNonEntryFunc && HasCall)
- AddAttribute("amdgpu-calls");
-
- // Check the function body.
- auto CheckAlloca = [&](Instruction &I) {
- AddAttribute("amdgpu-stack-objects");
- return false;
- };
-
- bool UsedAssumedInformation = false;
- A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
- UsedAssumedInformation);
-
// If we found that we need amdgpu-queue-ptr, nothing else to do.
- if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
- AddAttribute("amdgpu-queue-ptr");
- return Change;
+ if (NeedsQueuePtr) {
+ removeAssumedBits(QUEUE_PTR);
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
auto CheckAddrSpaceCasts = [&](Instruction &I) {
@@ -419,60 +411,68 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// instructions, try it first.
// amdgpu-queue-ptr is not needed if aperture regs is present.
- if (!HasApertureRegs)
+ if (!HasApertureRegs) {
+ bool UsedAssumedInformation = false;
A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
{Instruction::AddrSpaceCast},
UsedAssumedInformation);
+ }
// If we found that we need amdgpu-queue-ptr, nothing else to do.
if (NeedsQueuePtr) {
- AddAttribute("amdgpu-queue-ptr");
- return Change;
+ removeAssumedBits(QUEUE_PTR);
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
- if (!IsNonEntryFunc && HasApertureRegs)
- return Change;
+ if (!IsNonEntryFunc && HasApertureRegs) {
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
+ }
for (BasicBlock &BB : *F) {
for (Instruction &I : BB) {
for (const Use &U : I.operands()) {
if (const auto *C = dyn_cast<Constant>(U)) {
if (InfoCache.needsQueuePtr(C, *F)) {
- AddAttribute("amdgpu-queue-ptr");
- return Change;
+ removeAssumedBits(QUEUE_PTR);
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
}
}
}
}
- return Change;
+ return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED :
+ ChangeStatus::UNCHANGED;
}
ChangeStatus manifest(Attributor &A) override {
SmallVector<Attribute, 8> AttrList;
LLVMContext &Ctx = getAssociatedFunction()->getContext();
- for (StringRef AttrName : Attributes)
- AttrList.push_back(Attribute::get(Ctx, AttrName));
+ for (auto Attr : ImplicitAttrs) {
+ if (isKnown(Attr.first))
+ AttrList.push_back(Attribute::get(Ctx, Attr.second));
+ }
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
/* ForceReplace */ true);
}
const std::string getAsStr() const override {
- return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
- }
-
- const DenseSet<StringRef> &getAttributes() const override {
- return Attributes;
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "AMDInfo[";
+ for (auto Attr : ImplicitAttrs)
+ OS << ' ' << Attr.second;
+ OS << " ]";
+ return OS.str();
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
-
-private:
- DenseSet<StringRef> Attributes;
};
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
@@ -482,6 +482,118 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDAttributes is only valid for function position");
}
+/// Propagate amdgpu-flat-work-group-size attribute.
+struct AAAMDFlatWorkGroupSize
+ : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
+ using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
+ AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
+ : Base(IRP, 32) {}
+
+ /// See AbstractAttribute::getState(...).
+ IntegerRangeState &getState() override { return *this; }
+ const IntegerRangeState &getState() const override { return *this; }
+
+ void initialize(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ unsigned MinGroupSize, MaxGroupSize;
+ std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
+ intersectKnown(
+ ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto CheckCallSite = [&](AbstractCallSite CS) {
+ Function *Caller = CS.getInstruction()->getFunction();
+ LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
+ << "->" << getAssociatedFunction()->getName() << '\n');
+
+ const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
+ *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+
+ Change |=
+ clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
+
+ return true;
+ };
+
+ bool AllCallSitesKnown = true;
+ if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return Change;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ SmallVector<Attribute, 8> AttrList;
+ Function *F = getAssociatedFunction();
+ LLVMContext &Ctx = F->getContext();
+
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ unsigned Min, Max;
+ std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
+
+ // Don't add the attribute if it's the implied default.
+ if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
+ return ChangeStatus::UNCHANGED;
+
+ SmallString<10> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
+
+ AttrList.push_back(
+ Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
+ return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
+ /* ForceReplace */ true);
+ }
+
+ const std::string getAsStr() const override {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ OS << "AMDFlatWorkGroupSize[";
+ OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
+ OS << ']';
+ return OS.str();
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override {
+ return "AAAMDFlatWorkGroupSize";
+ }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDFlatWorkGroupSize
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+const char AAAMDFlatWorkGroupSize::ID = 0;
+
+AAAMDFlatWorkGroupSize &
+AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
+ Attributor &A) {
+ if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+ return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);
+ llvm_unreachable(
+ "AAAMDFlatWorkGroupSize is only valid for function position");
+}
+
class AMDGPUAttributor : public ModulePass {
public:
AMDGPUAttributor() : ModulePass(ID) {}
@@ -500,17 +612,28 @@ public:
bool runOnModule(Module &M) override {
SetVector<Function *> Functions;
AnalysisGetter AG;
- for (Function &F : M)
- Functions.insert(&F);
+ for (Function &F : M) {
+ if (!F.isIntrinsic())
+ Functions.insert(&F);
+ }
CallGraphUpdater CGUpdater;
BumpPtrAllocator Allocator;
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
- Attributor A(Functions, InfoCache, CGUpdater);
+ DenseSet<const char *> Allowed(
+ {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
+ &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID});
+
+ Attributor A(Functions, InfoCache, CGUpdater, &Allowed);
for (Function &F : M) {
- A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
- A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
+ if (!F.isIntrinsic()) {
+ A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
+ A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
+ }
+ }
}
ChangeStatus Change = A.run();
@@ -521,6 +644,7 @@ public:
TargetMachine *TM;
static char ID;
};
+} // namespace
char AMDGPUAttributor::ID = 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index b9faad453aba..43928d7c2a09 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -60,7 +60,7 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
// If this is a scalar return, insert a readfirstlane just in case the value
@@ -103,7 +103,7 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
if (VA.getLocVT().getSizeInBits() < 32) {
@@ -203,7 +203,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -236,7 +236,7 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
-// FIXME: Compatability shim
+// FIXME: Compatibility shim
static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
switch (MIOpc) {
case TargetOpcode::G_SEXT:
@@ -355,14 +355,23 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
auto const &ST = MF.getSubtarget<GCNSubtarget>();
- unsigned ReturnOpc =
- IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
+ unsigned ReturnOpc = 0;
+ if (IsShader)
+ ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG;
+ else if (CC == CallingConv::AMDGPU_Gfx)
+ ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx;
+ else
+ ReturnOpc = AMDGPU::S_SETPC_B64_return;
auto Ret = B.buildInstrNoInsert(ReturnOpc);
Register ReturnAddrVReg;
if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
Ret.addUse(ReturnAddrVReg);
+ } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
+ ReturnAddrVReg =
+ MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass);
+ Ret.addUse(ReturnAddrVReg);
}
if (!FLI.CanLowerReturn)
@@ -370,7 +379,8 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
else if (!lowerReturnVal(B, Val, VRegs, Ret))
return false;
- if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
+ if (ReturnOpc == AMDGPU::S_SETPC_B64_return ||
+ ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
&AMDGPU::SGPR_64RegClass);
@@ -753,6 +763,11 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
MachineFunction &MF = MIRBuilder.getMF();
+ // If there's no call site, this doesn't correspond to a call from the IR and
+ // doesn't need implicit inputs.
+ if (!Info.CB)
+ return true;
+
const AMDGPUFunctionArgInfo *CalleeArgInfo
= &AMDGPUArgumentUsageInfo::FixedABIFunctionInfo;
@@ -773,17 +788,32 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
};
+ static constexpr StringLiteral ImplicitAttrNames[] = {
+ "amdgpu-no-dispatch-ptr",
+ "amdgpu-no-queue-ptr",
+ "amdgpu-no-implicitarg-ptr",
+ "amdgpu-no-dispatch-id",
+ "amdgpu-no-workgroup-id-x",
+ "amdgpu-no-workgroup-id-y",
+ "amdgpu-no-workgroup-id-z"
+ };
+
MachineRegisterInfo &MRI = MF.getRegInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const AMDGPULegalizerInfo *LI
= static_cast<const AMDGPULegalizerInfo*>(ST.getLegalizerInfo());
+ unsigned I = 0;
for (auto InputID : InputRegs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
+ // If the callee does not use the attribute value, skip copying the value.
+ if (Info.CB->hasFnAttr(ImplicitAttrNames[I++]))
+ continue;
+
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
@@ -843,16 +873,22 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
const ArgDescriptor *IncomingArgZ = std::get<0>(WorkitemIDZ);
const LLT S32 = LLT::scalar(32);
+ const bool NeedWorkItemIDX = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+ const bool NeedWorkItemIDY = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+ const bool NeedWorkItemIDZ = !Info.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
// If incoming ids are not packed we need to pack them.
// FIXME: Should consider known workgroup size to eliminate known 0 cases.
Register InputReg;
- if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX) {
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+ NeedWorkItemIDX) {
InputReg = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(InputReg, MIRBuilder, IncomingArgX,
std::get<1>(WorkitemIDX), std::get<2>(WorkitemIDX));
}
- if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+ NeedWorkItemIDY) {
Register Y = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Y, MIRBuilder, IncomingArgY, std::get<1>(WorkitemIDY),
std::get<2>(WorkitemIDY));
@@ -861,7 +897,8 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Y).getReg(0) : Y;
}
- if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+ NeedWorkItemIDZ) {
Register Z = MRI.createGenericVirtualRegister(S32);
LI->loadInputValue(Z, MIRBuilder, IncomingArgZ, std::get<1>(WorkitemIDZ),
std::get<2>(WorkitemIDZ));
@@ -870,7 +907,7 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
InputReg = InputReg ? MIRBuilder.buildOr(S32, InputReg, Z).getReg(0) : Z;
}
- if (!InputReg) {
+ if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
InputReg = MRI.createGenericVirtualRegister(S32);
// Workitem ids are already packed, any of present incoming arguments will
@@ -883,7 +920,9 @@ bool AMDGPUCallLowering::passSpecialInputs(MachineIRBuilder &MIRBuilder,
}
if (OutgoingArg->isRegister()) {
- ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (InputReg)
+ ArgRegs.emplace_back(OutgoingArg->getRegister(), InputReg);
+
if (!CCInfo.AllocateReg(OutgoingArg->getRegister()))
report_fatal_error("failed to allocate implicit input argument");
} else {
@@ -903,7 +942,9 @@ getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
bool IsTailCall) {
- return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::SI_CALL;
+ assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
+ "because the address can be divergent");
+ return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
}
// Add operands to call instruction to track the callee.
@@ -1033,6 +1074,11 @@ bool AMDGPUCallLowering::isEligibleForTailCallOptimization(
if (!Info.IsTailCall)
return false;
+ // Indirect calls can't be tail calls, because the address can be divergent.
+ // TODO Check divergence info if the call really is divergent.
+ if (Info.Callee.isReg())
+ return false;
+
MachineFunction &MF = B.getMF();
const Function &CallerF = MF.getFunction();
CallingConv::ID CalleeCC = Info.CallConv;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 90b52395b76c..1682d43ae671 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -20,11 +20,13 @@ def CC_SI_Gfx : CallingConv<[
// 0-3 are reserved for the stack buffer descriptor
// 30-31 are reserved for the return address
// 32 is reserved for the stack pointer
+ // 33 is reserved for the frame pointer
+ // 34 is reserved for the base pointer
CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
- SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
]>>>,
CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
@@ -41,17 +43,6 @@ def RetCC_SI_Gfx : CallingConv<[
CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
- // 0-3 are reserved for the stack buffer descriptor
- // 32 is reserved for the stack pointer
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
- SGPR4, SGPR5, SGPR6, SGPR7,
- SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
- SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
- SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
- SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39,
- SGPR40, SGPR41, SGPR42, SGPR43
- ]>>>,
-
CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
@@ -139,14 +130,6 @@ def RetCC_SI_Shader : CallingConv<[
]>>
]>;
-def CSR_AMDGPU_VGPRs_24_255 : CalleeSavedRegs<
- (sequence "VGPR%u", 24, 255)
->;
-
-def CSR_AMDGPU_VGPRs_32_255 : CalleeSavedRegs<
- (sequence "VGPR%u", 32, 255)
->;
-
def CSR_AMDGPU_VGPRs : CalleeSavedRegs<
// The CSRs & scratch-registers are interleaved at a split boundary of 8.
(add (sequence "VGPR%u", 40, 47),
@@ -173,6 +156,14 @@ def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
(sequence "SGPR%u", 32, 105)
>;
+def CSR_AMDGPU_SI_Gfx_SGPRs_4_29 : CalleeSavedRegs<
+ (sequence "SGPR%u", 4, 29)
+>;
+
+def CSR_AMDGPU_SI_Gfx_SGPRs_64_105 : CalleeSavedRegs<
+ (sequence "SGPR%u", 64, 105)
+>;
+
// Just to get the regmask, not for calling convention purposes.
def CSR_AMDGPU_AllVGPRs : CalleeSavedRegs<
(sequence "VGPR%u", 0, 255)
@@ -198,6 +189,14 @@ def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs<
(add CSR_AMDGPU_HighRegs, CSR_AMDGPU_AGPRs_32_255)
>;
+def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
+ (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105)
+>;
+
+def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs<
+ (add CSR_AMDGPU_SI_Gfx, CSR_AMDGPU_AGPRs_32_255)
+>;
+
def CSR_AMDGPU_NoRegs : CalleeSavedRegs<(add)>;
// Calling convention for leaf functions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 60e79c2c6c2f..a55729586b8d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -148,11 +148,15 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
/// \returns True.
bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
-
+ /// \returns The minimum number of bits needed to store the value of \Op as an
+ /// unsigned integer. Truncating to this size and then zero-extending to
+ /// ScalarSize will not change the value.
unsigned numBitsUnsigned(Value *Op, unsigned ScalarSize) const;
+
+ /// \returns The minimum number of bits needed to store the value of \Op as a
+ /// signed integer. Truncating to this size and then sign-extending to
+ /// ScalarSize will not change the value.
unsigned numBitsSigned(Value *Op, unsigned ScalarSize) const;
- bool isI24(Value *V, unsigned ScalarSize) const;
- bool isU24(Value *V, unsigned ScalarSize) const;
/// Replace mul instructions with llvm.amdgcn.mul.u24 or llvm.amdgcn.mul.s24.
/// SelectionDAG has an issue where an and asserting the bits are known
@@ -451,17 +455,7 @@ unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op,
unsigned ScalarSize) const {
// In order for this to be a signed 24-bit value, bit 23, must
// be a sign bit.
- return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC);
-}
-
-bool AMDGPUCodeGenPrepare::isI24(Value *V, unsigned ScalarSize) const {
- return ScalarSize >= 24 && // Types less than 24-bit should be treated
- // as unsigned 24-bit values.
- numBitsSigned(V, ScalarSize) < 24;
-}
-
-bool AMDGPUCodeGenPrepare::isU24(Value *V, unsigned ScalarSize) const {
- return numBitsUnsigned(V, ScalarSize) <= 24;
+ return ScalarSize - ComputeNumSignBits(Op, *DL, 0, AC) + 1;
}
static void extractValues(IRBuilder<> &Builder,
@@ -489,6 +483,34 @@ static Value *insertValues(IRBuilder<> &Builder,
return NewVal;
}
+// Returns 24-bit or 48-bit (as per `NumBits` and `Size`) mul of `LHS` and
+// `RHS`. `NumBits` is the number of KnownBits of the result and `Size` is the
+// width of the original destination.
+static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
+ unsigned Size, unsigned NumBits, bool IsSigned) {
+ if (Size <= 32 || NumBits <= 32) {
+ Intrinsic::ID ID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ return Builder.CreateIntrinsic(ID, {}, {LHS, RHS});
+ }
+
+ assert(NumBits <= 48);
+
+ Intrinsic::ID LoID =
+ IsSigned ? Intrinsic::amdgcn_mul_i24 : Intrinsic::amdgcn_mul_u24;
+ Intrinsic::ID HiID =
+ IsSigned ? Intrinsic::amdgcn_mulhi_i24 : Intrinsic::amdgcn_mulhi_u24;
+
+ Value *Lo = Builder.CreateIntrinsic(LoID, {}, {LHS, RHS});
+ Value *Hi = Builder.CreateIntrinsic(HiID, {}, {LHS, RHS});
+
+ IntegerType *I64Ty = Builder.getInt64Ty();
+ Lo = Builder.CreateZExtOrTrunc(Lo, I64Ty);
+ Hi = Builder.CreateZExtOrTrunc(Hi, I64Ty);
+
+ return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
+}
+
bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)
return false;
@@ -507,13 +529,17 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
IRBuilder<> Builder(&I);
Builder.SetCurrentDebugLocation(I.getDebugLoc());
- Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+ unsigned LHSBits = 0, RHSBits = 0;
+ bool IsSigned = false;
+
+ if (ST->hasMulU24() && (LHSBits = numBitsUnsigned(LHS, Size)) <= 24 &&
+ (RHSBits = numBitsUnsigned(RHS, Size)) <= 24) {
+ IsSigned = false;
+
+ } else if (ST->hasMulI24() && (LHSBits = numBitsSigned(LHS, Size)) <= 24 &&
+ (RHSBits = numBitsSigned(RHS, Size)) <= 24) {
+ IsSigned = true;
- // TODO: Should this try to match mulhi24?
- if (ST->hasMulU24() && isU24(LHS, Size) && isU24(RHS, Size)) {
- IntrID = Intrinsic::amdgcn_mul_u24;
- } else if (ST->hasMulI24() && isI24(LHS, Size) && isI24(RHS, Size)) {
- IntrID = Intrinsic::amdgcn_mul_i24;
} else
return false;
@@ -523,27 +549,26 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
extractValues(Builder, LHSVals, LHS);
extractValues(Builder, RHSVals, RHS);
-
IntegerType *I32Ty = Builder.getInt32Ty();
- FunctionCallee Intrin = Intrinsic::getDeclaration(Mod, IntrID);
for (int I = 0, E = LHSVals.size(); I != E; ++I) {
Value *LHS, *RHS;
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
- LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
- RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
- } else {
+ if (IsSigned) {
LHS = Builder.CreateSExtOrTrunc(LHSVals[I], I32Ty);
RHS = Builder.CreateSExtOrTrunc(RHSVals[I], I32Ty);
+ } else {
+ LHS = Builder.CreateZExtOrTrunc(LHSVals[I], I32Ty);
+ RHS = Builder.CreateZExtOrTrunc(RHSVals[I], I32Ty);
}
- Value *Result = Builder.CreateCall(Intrin, {LHS, RHS});
+ Value *Result =
+ getMul24(Builder, LHS, RHS, Size, LHSBits + RHSBits, IsSigned);
- if (IntrID == Intrinsic::amdgcn_mul_u24) {
- ResultVals.push_back(Builder.CreateZExtOrTrunc(Result,
- LHSVals[I]->getType()));
+ if (IsSigned) {
+ ResultVals.push_back(
+ Builder.CreateSExtOrTrunc(Result, LHSVals[I]->getType()));
} else {
- ResultVals.push_back(Builder.CreateSExtOrTrunc(Result,
- LHSVals[I]->getType()));
+ ResultVals.push_back(
+ Builder.CreateZExtOrTrunc(Result, LHSVals[I]->getType()));
}
}
@@ -816,7 +841,7 @@ bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) {
if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1)
return visitBinaryOperator(I);
- // Check if the Call is an intrinsic intruction to amdgcn_class intrinsic
+ // Check if the Call is an intrinsic instruction to amdgcn_class intrinsic
// has only one use
if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class ||
!IntrinsicCall->hasOneUse())
@@ -1314,7 +1339,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
ConstantInt *Lower =
mdconst::extract<ConstantInt>(Range->getOperand(0));
- if (Lower->getValue().isNullValue()) {
+ if (Lower->isNullValue()) {
WidenLoad->setMetadata(LLVMContext::MD_range, nullptr);
} else {
Metadata *LowAndHigh[] = {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index c6273adca50f..df2f9a0fa3a9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -64,26 +64,36 @@ def remove_fcanonicalize : GICombineRule<
[{ return PostLegalizerHelper.matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]),
(apply [{ Helper.replaceSingleDefInstWithReg(*${fcanonicalize}, ${matchinfo}); }])>;
+def foldable_fneg_matchdata : GIDefMatchData<"MachineInstr *">;
+
+def foldable_fneg : GICombineRule<
+ (defs root:$ffn, foldable_fneg_matchdata:$matchinfo),
+ (match (wip_match_opcode G_FNEG):$ffn,
+ [{ return Helper.matchFoldableFneg(*${ffn}, ${matchinfo}); }]),
+ (apply [{ Helper.applyFoldableFneg(*${ffn}, ${matchinfo}); }])>;
+
// Combines which should only apply on SI/VI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
- "AMDGPUGenPreLegalizerCombinerHelper", [all_combines, clamp_i64_to_i16]> {
+ "AMDGPUGenPreLegalizerCombinerHelper",
+ [all_combines, clamp_i64_to_i16, foldable_fneg]> {
let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
let StateClass = "AMDGPUPreLegalizerCombinerHelperState";
+ let AdditionalArguments = [];
}
def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
"AMDGPUGenPostLegalizerCombinerHelper",
[all_combines, gfx6gfx7_combines,
- uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize]> {
+ uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg]> {
let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
let AdditionalArguments = [];
}
def AMDGPURegBankCombinerHelper : GICombinerHelper<
- "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3]> {
+ "AMDGPUGenRegBankCombinerHelper", [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain]> {
let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
let StateClass = "AMDGPURegBankCombinerHelperState";
let AdditionalArguments = [];
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
new file mode 100644
index 000000000000..301e6f6d6f42
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -0,0 +1,382 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.cpp ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUCombinerHelper.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+using namespace MIPatternMatch;
+
+LLVM_READNONE
+static bool fnegFoldsIntoMI(const MachineInstr &MI) {
+ switch (MI.getOpcode()) {
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ case AMDGPU::G_FMUL:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ return true;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MI.getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fmed3:
+ case Intrinsic::amdgcn_fma_legacy:
+ return true;
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+/// \p returns true if the operation will definitely need to use a 64-bit
+/// encoding, and thus will use a VOP3 encoding regardless of the source
+/// modifiers.
+LLVM_READONLY
+static bool opMustUseVOP3Encoding(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+ return MI.getNumOperands() >
+ (MI.getOpcode() == AMDGPU::G_INTRINSIC ? 4u : 3u) ||
+ MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() == 64;
+}
+
+// Most FP instructions support source modifiers.
+LLVM_READONLY
+static bool hasSourceMods(const MachineInstr &MI) {
+ if (!MI.memoperands().empty())
+ return false;
+
+ switch (MI.getOpcode()) {
+ case AMDGPU::COPY:
+ case AMDGPU::G_SELECT:
+ case AMDGPU::G_FDIV:
+ case AMDGPU::G_FREM:
+ case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
+ case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS:
+ case AMDGPU::G_BITCAST:
+ case AMDGPU::G_ANYEXT:
+ case AMDGPU::G_BUILD_VECTOR:
+ case AMDGPU::G_BUILD_VECTOR_TRUNC:
+ case AMDGPU::G_PHI:
+ return false;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MI.getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_interp_p1:
+ case Intrinsic::amdgcn_interp_p2:
+ case Intrinsic::amdgcn_interp_mov:
+ case Intrinsic::amdgcn_interp_p1_f16:
+ case Intrinsic::amdgcn_interp_p2_f16:
+ case Intrinsic::amdgcn_div_scale:
+ return false;
+ default:
+ return true;
+ }
+ }
+ default:
+ return true;
+ }
+}
+
+static bool allUsesHaveSourceMods(MachineInstr &MI, MachineRegisterInfo &MRI,
+ unsigned CostThreshold = 4) {
+ // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
+ // it is truly free to use a source modifier in all cases. If there are
+ // multiple users but for each one will necessitate using VOP3, there will be
+ // a code size increase. Try to avoid increasing code size unless we know it
+ // will save on the instruction count.
+ unsigned NumMayIncreaseSize = 0;
+ Register Dst = MI.getOperand(0).getReg();
+ for (const MachineInstr &Use : MRI.use_nodbg_instructions(Dst)) {
+ if (!hasSourceMods(Use))
+ return false;
+
+ if (!opMustUseVOP3Encoding(Use, MRI)) {
+ if (++NumMayIncreaseSize > CostThreshold)
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool mayIgnoreSignedZero(MachineInstr &MI) {
+ const TargetOptions &Options = MI.getMF()->getTarget().Options;
+ return Options.NoSignedZerosFPMath || MI.getFlag(MachineInstr::MIFlag::FmNsz);
+}
+
+static bool isInv2Pi(const APFloat &APF) {
+ static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
+ static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
+ static const APFloat KF64(APFloat::IEEEdouble(),
+ APInt(64, 0x3fc45f306dc9c882));
+
+ return APF.bitwiseIsEqual(KF16) || APF.bitwiseIsEqual(KF32) ||
+ APF.bitwiseIsEqual(KF64);
+}
+
+// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
+// additional cost to negate them.
+static bool isConstantCostlierToNegate(MachineInstr &MI, Register Reg,
+ MachineRegisterInfo &MRI) {
+ Optional<FPValueAndVReg> FPValReg;
+ if (mi_match(Reg, MRI, m_GFCstOrSplat(FPValReg))) {
+ if (FPValReg->Value.isZero() && !FPValReg->Value.isNegative())
+ return true;
+
+ const GCNSubtarget &ST = MI.getMF()->getSubtarget<GCNSubtarget>();
+ if (ST.hasInv2PiInlineImm() && isInv2Pi(FPValReg->Value))
+ return true;
+ }
+ return false;
+}
+
+static unsigned inverseMinMax(unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::G_FMAXNUM:
+ return AMDGPU::G_FMINNUM;
+ case AMDGPU::G_FMINNUM:
+ return AMDGPU::G_FMAXNUM;
+ case AMDGPU::G_FMAXNUM_IEEE:
+ return AMDGPU::G_FMINNUM_IEEE;
+ case AMDGPU::G_FMINNUM_IEEE:
+ return AMDGPU::G_FMAXNUM_IEEE;
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ return AMDGPU::G_AMDGPU_FMIN_LEGACY;
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ return AMDGPU::G_AMDGPU_FMAX_LEGACY;
+ default:
+ llvm_unreachable("invalid min/max opcode");
+ }
+}
+
+bool AMDGPUCombinerHelper::matchFoldableFneg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) {
+ Register Src = MI.getOperand(1).getReg();
+ MatchInfo = MRI.getVRegDef(Src);
+
+ // If the input has multiple uses and we can either fold the negate down, or
+ // the other uses cannot, give up. This both prevents unprofitable
+ // transformations and infinite loops: we won't repeatedly try to fold around
+ // a negate that has no 'good' form.
+ if (MRI.hasOneNonDBGUse(Src)) {
+ if (allUsesHaveSourceMods(MI, MRI, 0))
+ return false;
+ } else {
+ if (fnegFoldsIntoMI(*MatchInfo) &&
+ (allUsesHaveSourceMods(MI, MRI) ||
+ !allUsesHaveSourceMods(*MatchInfo, MRI)))
+ return false;
+ }
+
+ switch (MatchInfo->getOpcode()) {
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY:
+ // 0 doesn't have a negated inline immediate.
+ return !isConstantCostlierToNegate(*MatchInfo,
+ MatchInfo->getOperand(2).getReg(), MRI);
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ return mayIgnoreSignedZero(*MatchInfo);
+ case AMDGPU::G_FMUL:
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FPTRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ return true;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ case Intrinsic::amdgcn_fmul_legacy:
+ case Intrinsic::amdgcn_fmed3:
+ return true;
+ case Intrinsic::amdgcn_fma_legacy:
+ return mayIgnoreSignedZero(*MatchInfo);
+ default:
+ return false;
+ }
+ }
+ default:
+ return false;
+ }
+}
+
+void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) {
+ // Transform:
+ // %A = inst %Op1, ...
+ // %B = fneg %A
+ //
+ // into:
+ //
+ // (if %A has one use, specifically fneg above)
+ // %B = inst (maybe fneg %Op1), ...
+ //
+ // (if %A has multiple uses)
+ // %B = inst (maybe fneg %Op1), ...
+ // %A = fneg %B
+
+ // Replace register in operand with a register holding negated value.
+ auto NegateOperand = [&](MachineOperand &Op) {
+ Register Reg = Op.getReg();
+ if (!mi_match(Reg, MRI, m_GFNeg(m_Reg(Reg))))
+ Reg = Builder.buildFNeg(MRI.getType(Reg), Reg).getReg(0);
+ replaceRegOpWith(MRI, Op, Reg);
+ };
+
+ // Replace either register in operands with a register holding negated value.
+ auto NegateEitherOperand = [&](MachineOperand &X, MachineOperand &Y) {
+ Register XReg = X.getReg();
+ Register YReg = Y.getReg();
+ if (mi_match(XReg, MRI, m_GFNeg(m_Reg(XReg))))
+ replaceRegOpWith(MRI, X, XReg);
+ else if (mi_match(YReg, MRI, m_GFNeg(m_Reg(YReg))))
+ replaceRegOpWith(MRI, Y, YReg);
+ else {
+ YReg = Builder.buildFNeg(MRI.getType(YReg), YReg).getReg(0);
+ replaceRegOpWith(MRI, Y, YReg);
+ }
+ };
+
+ Builder.setInstrAndDebugLoc(*MatchInfo);
+
+ // Negate appropriate operands so that resulting value of MatchInfo is
+ // negated.
+ switch (MatchInfo->getOpcode()) {
+ case AMDGPU::G_FADD:
+ case AMDGPU::G_FSUB:
+ NegateOperand(MatchInfo->getOperand(1));
+ NegateOperand(MatchInfo->getOperand(2));
+ break;
+ case AMDGPU::G_FMUL:
+ NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+ break;
+ case AMDGPU::G_FMINNUM:
+ case AMDGPU::G_FMAXNUM:
+ case AMDGPU::G_FMINNUM_IEEE:
+ case AMDGPU::G_FMAXNUM_IEEE:
+ case AMDGPU::G_AMDGPU_FMIN_LEGACY:
+ case AMDGPU::G_AMDGPU_FMAX_LEGACY: {
+ NegateOperand(MatchInfo->getOperand(1));
+ NegateOperand(MatchInfo->getOperand(2));
+ unsigned Opposite = inverseMinMax(MatchInfo->getOpcode());
+ replaceOpcodeWith(*MatchInfo, Opposite);
+ break;
+ }
+ case AMDGPU::G_FMA:
+ case AMDGPU::G_FMAD:
+ NegateEitherOperand(MatchInfo->getOperand(1), MatchInfo->getOperand(2));
+ NegateOperand(MatchInfo->getOperand(3));
+ break;
+ case AMDGPU::G_FPEXT:
+ case AMDGPU::G_INTRINSIC_TRUNC:
+ case AMDGPU::G_FRINT:
+ case AMDGPU::G_FNEARBYINT:
+ case AMDGPU::G_INTRINSIC_ROUND:
+ case AMDGPU::G_INTRINSIC_ROUNDEVEN:
+ case AMDGPU::G_FSIN:
+ case AMDGPU::G_FCANONICALIZE:
+ case AMDGPU::G_AMDGPU_RCP_IFLAG:
+ case AMDGPU::G_FPTRUNC:
+ NegateOperand(MatchInfo->getOperand(1));
+ break;
+ case AMDGPU::G_INTRINSIC: {
+ unsigned IntrinsicID = MatchInfo->getIntrinsicID();
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_rcp:
+ case Intrinsic::amdgcn_rcp_legacy:
+ case Intrinsic::amdgcn_sin:
+ NegateOperand(MatchInfo->getOperand(2));
+ break;
+ case Intrinsic::amdgcn_fmul_legacy:
+ NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+ break;
+ case Intrinsic::amdgcn_fmed3:
+ NegateOperand(MatchInfo->getOperand(2));
+ NegateOperand(MatchInfo->getOperand(3));
+ NegateOperand(MatchInfo->getOperand(4));
+ break;
+ case Intrinsic::amdgcn_fma_legacy:
+ NegateEitherOperand(MatchInfo->getOperand(2), MatchInfo->getOperand(3));
+ NegateOperand(MatchInfo->getOperand(4));
+ break;
+ default:
+ llvm_unreachable("folding fneg not supported for this intrinsic");
+ }
+ break;
+ }
+ default:
+ llvm_unreachable("folding fneg not supported for this instruction");
+ }
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register MatchInfoDst = MatchInfo->getOperand(0).getReg();
+
+ if (MRI.hasOneNonDBGUse(MatchInfoDst)) {
+ // MatchInfo now has negated value so use that instead of old Dst.
+ replaceRegWith(MRI, Dst, MatchInfoDst);
+ } else {
+ // We want to swap all uses of Dst with uses of MatchInfoDst and vice versa
+ // but replaceRegWith will replace defs as well. It is easier to replace one
+ // def with a new register.
+ LLT Type = MRI.getType(Dst);
+ Register NegatedMatchInfo = MRI.createGenericVirtualRegister(Type);
+ replaceRegOpWith(MRI, MatchInfo->getOperand(0), NegatedMatchInfo);
+
+ // MatchInfo now has negated value so use that instead of old Dst.
+ replaceRegWith(MRI, Dst, NegatedMatchInfo);
+
+ // Recreate non negated value for other uses of old MatchInfoDst
+ Builder.setInstrAndDebugLoc(MI);
+ Builder.buildFNeg(MatchInfoDst, NegatedMatchInfo, MI.getFlags());
+ }
+
+ MI.eraseFromParent();
+ return;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
new file mode 100644
index 000000000000..1d4747136bf7
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -0,0 +1,26 @@
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.h -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This contains common combine transformations that may be used in a combine
+/// pass.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Combiner.h"
+#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+
+using namespace llvm;
+
+class AMDGPUCombinerHelper : public CombinerHelper {
+public:
+ using CombinerHelper::CombinerHelper;
+
+ bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
+ void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
+};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
new file mode 100644
index 000000000000..04bf623bfa46
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -0,0 +1,95 @@
+//===-- AMDGPUCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This pass creates a unified init and fini kernel with the required metadata
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-lower-ctor-dtor"
+
+namespace {
+class AMDGPUCtorDtorLowering final : public ModulePass {
+ bool runOnModule(Module &M) override;
+
+public:
+ Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
+ StringRef InitOrFiniKernelName = "amdgcn.device.init";
+ if (!IsCtor)
+ InitOrFiniKernelName = "amdgcn.device.fini";
+
+ Function *InitOrFiniKernel = Function::createWithDefaultAttr(
+ FunctionType::get(Type::getVoidTy(M.getContext()), false),
+ GlobalValue::ExternalLinkage, 0, InitOrFiniKernelName, &M);
+ BasicBlock *InitOrFiniKernelBB =
+ BasicBlock::Create(M.getContext(), "", InitOrFiniKernel);
+ ReturnInst::Create(M.getContext(), InitOrFiniKernelBB);
+
+ InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);
+ if (IsCtor)
+ InitOrFiniKernel->addFnAttr("device-init");
+ else
+ InitOrFiniKernel->addFnAttr("device-fini");
+ return InitOrFiniKernel;
+ }
+
+ bool createInitOrFiniKernel(Module &M, GlobalVariable *GV, bool IsCtor) {
+ if (!GV)
+ return false;
+ ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!GA || GA->getNumOperands() == 0)
+ return false;
+ Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
+ IRBuilder<> IRB(InitOrFiniKernel->getEntryBlock().getTerminator());
+ for (Value *V : GA->operands()) {
+ auto *CS = cast<ConstantStruct>(V);
+ if (Function *F = dyn_cast<Function>(CS->getOperand(1))) {
+ FunctionCallee Ctor =
+ M.getOrInsertFunction(F->getName(), IRB.getVoidTy());
+ IRB.CreateCall(Ctor);
+ }
+ }
+ appendToUsed(M, {InitOrFiniKernel});
+ return true;
+ }
+
+ static char ID;
+ AMDGPUCtorDtorLowering() : ModulePass(ID) {}
+};
+} // End anonymous namespace
+
+char AMDGPUCtorDtorLowering::ID = 0;
+char &llvm::AMDGPUCtorDtorLoweringID = AMDGPUCtorDtorLowering::ID;
+INITIALIZE_PASS(AMDGPUCtorDtorLowering, DEBUG_TYPE,
+ "Lower ctors and dtors for AMDGPU", false, false)
+
+ModulePass *llvm::createAMDGPUCtorDtorLoweringPass() {
+ return new AMDGPUCtorDtorLowering();
+}
+
+bool AMDGPUCtorDtorLowering::runOnModule(Module &M) {
+ bool Modified = false;
+ Modified |=
+ createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_ctors"),
+ /*IsCtor =*/true);
+ Modified |=
+ createInitOrFiniKernel(M, M.getGlobalVariable("llvm.global_dtors"),
+ /*IsCtor =*/false);
+ return Modified;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
index db00f8f711a3..3533087bbfd1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -25,7 +25,6 @@ class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"The size of local memory in bytes"
>;
-def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 521c8f261a00..12cef2774aaf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -159,6 +159,7 @@ def : GINodeEquiv<G_ATOMICRMW_UMAX, atomic_load_umax_glue>;
def : GINodeEquiv<G_ATOMICRMW_FADD, atomic_load_fadd_glue>;
def : GINodeEquiv<G_AMDGPU_FFBH_U32, AMDGPUffbh_u32_impl>;
+def : GINodeEquiv<G_AMDGPU_FFBL_B32, AMDGPUffbl_b32_impl>;
def : GINodeEquiv<G_AMDGPU_FMIN_LEGACY, AMDGPUfmin_legacy>;
def : GINodeEquiv<G_AMDGPU_FMAX_LEGACY, AMDGPUfmax_legacy>;
def : GINodeEquiv<G_AMDGPU_RCP_IFLAG, AMDGPUrcp_iflag>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 8eeda7b67b73..b9c59f4c615a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -113,7 +113,7 @@ MetadataStreamerV2::getAddressSpaceQualifier(
ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const {
- if (TypeQual.find("pipe") != StringRef::npos)
+ if (TypeQual.contains("pipe"))
return ValueKind::Pipe;
return StringSwitch<ValueKind>(BaseTypeName)
@@ -201,10 +201,11 @@ MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
Align MaxKernArgAlign;
HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
MaxKernArgAlign);
+ HSACodeProps.mKernargSegmentAlign =
+ std::max(MaxKernArgAlign, Align(4)).value();
+
HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
- HSACodeProps.mKernargSegmentAlign =
- std::max(MaxKernArgAlign, Align(4)).value();
HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
@@ -533,7 +534,7 @@ MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const {
- if (TypeQual.find("pipe") != StringRef::npos)
+ if (TypeQual.contains("pipe"))
return "pipe";
return StringSwitch<StringRef>(BaseTypeName)
@@ -665,6 +666,10 @@ void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
Func.getFnAttribute("runtime-handle").getValueAsString().str(),
/*Copy=*/true);
}
+ if (Func.hasFnAttribute("device-init"))
+ Kern[".kind"] = Kern.getDocument()->getNode("init");
+ else if (Func.hasFnAttribute("device-fini"))
+ Kern[".kind"] = Kern.getDocument()->getNode("fini");
}
void MetadataStreamerV3::emitKernelArgs(const Function &Func,
@@ -794,7 +799,8 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
if (!HiddenArgNumBytes)
return;
- auto &DL = Func.getParent()->getDataLayout();
+ const Module *M = Func.getParent();
+ auto &DL = M->getDataLayout();
auto Int64Ty = Type::getInt64Ty(Func.getContext());
if (HiddenArgNumBytes >= 8)
@@ -810,16 +816,16 @@ void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
auto Int8PtrTy =
Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
- // Emit "printf buffer" argument if printf is used, otherwise emit dummy
- // "none" argument.
+ // Emit "printf buffer" argument if printf is used, emit "hostcall buffer"
+ // if "hostcall" module flag is set, otherwise emit dummy "none" argument.
if (HiddenArgNumBytes >= 32) {
- if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
+ if (M->getNamedMetadata("llvm.printf.fmts"))
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
Args);
- else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
+ else if (M->getModuleFlag("amdgpu_hostcall")) {
// The printf runtime binding pass should have ensured that hostcall and
// printf are not used in the same module.
- assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
+ assert(!M->getNamedMetadata("llvm.printf.fmts"));
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
Args);
} else
@@ -862,6 +868,8 @@ MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
+
+ // FIXME: The metadata treats the minimum as 16?
Kern[".kernarg_segment_align"] =
Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
Kern[".wavefront_size"] =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 4824b4cf37c7..af5dae1cd8c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -15,7 +15,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUHSAMETADATASTREAMER_H
-#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/Alignment.h"
@@ -33,6 +32,11 @@ struct SIProgramInfo;
class Type;
namespace AMDGPU {
+
+namespace IsaInfo {
+class AMDGPUTargetID;
+}
+
namespace HSAMD {
class MetadataStreamer {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index a3106ded1e38..cee56ee97294 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,8 +11,11 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600RegisterInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -32,287 +35,12 @@
using namespace llvm;
-namespace llvm {
-
-class R600InstrInfo;
-
-} // end namespace llvm
-
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
namespace {
-static bool isNullConstantOrUndef(SDValue V) {
- if (V.isUndef())
- return true;
-
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isNullValue();
-}
-
-static bool getConstantValue(SDValue N, uint32_t &Out) {
- // This is only used for packed vectors, where ussing 0 for undef should
- // always be good.
- if (N.isUndef()) {
- Out = 0;
- return true;
- }
-
- if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
- Out = C->getAPIntValue().getSExtValue();
- return true;
- }
-
- if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
- Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
- return true;
- }
-
- return false;
-}
-
-// TODO: Handle undef as zero
-static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
- bool Negate = false) {
- assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
- uint32_t LHSVal, RHSVal;
- if (getConstantValue(N->getOperand(0), LHSVal) &&
- getConstantValue(N->getOperand(1), RHSVal)) {
- SDLoc SL(N);
- uint32_t K = Negate ?
- (-LHSVal & 0xffff) | (-RHSVal << 16) :
- (LHSVal & 0xffff) | (RHSVal << 16);
- return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
- DAG.getTargetConstant(K, SL, MVT::i32));
- }
-
- return nullptr;
-}
-
-static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
- return packConstantV2I16(N, DAG, true);
-}
-
-/// AMDGPU specific code to select AMDGPU machine instructions for
-/// SelectionDAG operations.
-class AMDGPUDAGToDAGISel : public SelectionDAGISel {
- // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
- // make the right decision when generating code for different targets.
- const GCNSubtarget *Subtarget;
-
- // Default FP mode for the current function.
- AMDGPU::SIModeRegisterDefaults Mode;
-
- bool EnableLateStructurizeCFG;
-
- // Instructions that will be lowered with a final instruction that zeros the
- // high result bits.
- bool fp16SrcZerosHighBits(unsigned Opc) const;
-
-public:
- explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
- CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
- : SelectionDAGISel(*TM, OptLevel) {
- EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
- }
- ~AMDGPUDAGToDAGISel() override = default;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AMDGPUArgumentUsageInfo>();
- AU.addRequired<LegacyDivergenceAnalysis>();
-#ifdef EXPENSIVE_CHECKS
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
-#endif
- SelectionDAGISel::getAnalysisUsage(AU);
- }
-
- bool matchLoadD16FromBuildVector(SDNode *N) const;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
- void PreprocessISelDAG() override;
- void Select(SDNode *N) override;
- StringRef getPassName() const override;
- void PostprocessISelDAG() override;
-
-protected:
- void SelectBuildVector(SDNode *N, unsigned RegClassID);
-
-private:
- std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
- bool isNoNanSrc(SDValue N) const;
- bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
- bool isNegInlineImmediate(const SDNode *N) const {
- return isInlineImmediate(N, true);
- }
-
- bool isInlineImmediate16(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate32(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate64(int64_t Imm) const {
- return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
- }
-
- bool isInlineImmediate(const APFloat &Imm) const {
- return Subtarget->getInstrInfo()->isInlineConstant(Imm);
- }
-
- bool isVGPRImm(const SDNode *N) const;
- bool isUniformLoad(const SDNode *N) const;
- bool isUniformBr(const SDNode *N) const;
-
- bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
- SDValue &RHS) const;
-
- MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
-
- SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
- SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
- SDNode *glueCopyToM0LDSInit(SDNode *N) const;
-
- const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
- virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
- virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
- bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
- unsigned Size) const;
- bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
- bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1) const;
- bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
- SDValue &Offset1, unsigned Size) const;
- bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset, SDValue &Offen,
- SDValue &Idxen, SDValue &Addr64) const;
- bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &Offset) const;
- bool SelectMUBUFScratchOffen(SDNode *Parent,
- SDValue Addr, SDValue &RSrc, SDValue &VAddr,
- SDValue &SOffset, SDValue &ImmOffset) const;
- bool SelectMUBUFScratchOffset(SDNode *Parent,
- SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
- SDValue &Offset) const;
-
- bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset, uint64_t FlatVariant) const;
- bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
- SDValue &Offset) const;
- bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &VOffset, SDValue &Offset) const;
- bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
- SDValue &Offset) const;
-
- bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
- bool &Imm) const;
- SDValue Expand32BitAddress(SDValue Addr) const;
- bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
- bool &Imm) const;
- bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
- bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
- bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
- bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
-
- bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
- bool AllowAbs = true) const;
- bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
- bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
- bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3OMods(SDValue In, SDValue &Src,
- SDValue &Clamp, SDValue &Omod) const;
-
- bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
- bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
-
- SDValue getHi16Elt(SDValue In) const;
-
- SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
-
- void SelectADD_SUB_I64(SDNode *N);
- void SelectAddcSubb(SDNode *N);
- void SelectUADDO_USUBO(SDNode *N);
- void SelectDIV_SCALE(SDNode *N);
- void SelectMAD_64_32(SDNode *N);
- void SelectFMA_W_CHAIN(SDNode *N);
- void SelectFMUL_W_CHAIN(SDNode *N);
-
- SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
- uint32_t Offset, uint32_t Width);
- void SelectS_BFEFromShifts(SDNode *N);
- void SelectS_BFE(SDNode *N);
- bool isCBranchSCC(const SDNode *N) const;
- void SelectBRCOND(SDNode *N);
- void SelectFMAD_FMA(SDNode *N);
- void SelectATOMIC_CMP_SWAP(SDNode *N);
- void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
- void SelectDS_GWS(SDNode *N, unsigned IntrID);
- void SelectInterpP1F16(SDNode *N);
- void SelectINTRINSIC_W_CHAIN(SDNode *N);
- void SelectINTRINSIC_WO_CHAIN(SDNode *N);
- void SelectINTRINSIC_VOID(SDNode *N);
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "AMDGPUGenDAGISel.inc"
-};
-
-class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
- const R600Subtarget *Subtarget;
-
- bool isConstantLoad(const MemSDNode *N, int cbID) const;
- bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
- bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
- SDValue& Offset);
-public:
- explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
- AMDGPUDAGToDAGISel(TM, OptLevel) {}
-
- void Select(SDNode *N) override;
-
- bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
- bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) override;
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void PreprocessISelDAG() override {}
-
-protected:
- // Include the pieces autogenerated from the target description.
-#include "R600GenDAGISel.inc"
-};
-
static SDValue stripBitcast(SDValue Val) {
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
}
@@ -351,7 +79,7 @@ static bool isExtractHiElt(SDValue In, SDValue &Out) {
static SDValue stripExtractLoElt(SDValue In) {
if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) {
- if (Idx->isNullValue() && In.getValueSizeInBits() <= 32)
+ if (Idx->isZero() && In.getValueSizeInBits() <= 32)
return In.getOperand(0);
}
}
@@ -386,11 +114,11 @@ FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
return new AMDGPUDAGToDAGISel(TM, OptLevel);
}
-/// This pass converts a legalized DAG into a R600-specific
-// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
- CodeGenOpt::Level OptLevel) {
- return new R600DAGToDAGISel(TM, OptLevel);
+AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
+ TargetMachine *TM /*= nullptr*/,
+ CodeGenOpt::Level OptLevel /*= CodeGenOpt::Default*/)
+ : SelectionDAGISel(*TM, OptLevel) {
+ EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
@@ -468,6 +196,16 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
}
}
+void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<AMDGPUArgumentUsageInfo>();
+ AU.addRequired<LegacyDivergenceAnalysis>();
+#ifdef EXPENSIVE_CHECKS
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+#endif
+ SelectionDAGISel::getAnalysisUsage(AU);
+}
+
bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
assert(Subtarget->d16PreservesUnusedBits());
MVT VT = N->getValueType(0).getSimpleVT();
@@ -903,8 +641,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
uint32_t OffsetVal = Offset->getZExtValue();
uint32_t WidthVal = Width->getZExtValue();
- ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
- SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
+ ReplaceNode(N, getBFE32(Signed, SDLoc(N), N->getOperand(0), OffsetVal,
+ WidthVal));
return;
}
case AMDGPUISD::DIV_SCALE: {
@@ -1207,7 +945,14 @@ void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
Ops[8] = N->getOperand(0);
Ops[9] = N->getOperand(4);
- CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops);
+ // If there are no source modifiers, prefer fmac over fma because it can use
+ // the smaller VOP2 encoding.
+ bool UseFMAC = Subtarget->hasDLInsts() &&
+ cast<ConstantSDNode>(Ops[0])->isZero() &&
+ cast<ConstantSDNode>(Ops[2])->isZero() &&
+ cast<ConstantSDNode>(Ops[4])->isZero();
+ unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), Ops);
}
void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
@@ -1707,7 +1452,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
- APInt::getAllOnesValue(32).getZExtValue(); // Size
+ APInt::getAllOnes(32).getZExtValue(); // Size
SDLoc DL(Addr);
const SITargetLowering& Lowering =
@@ -2202,9 +1947,17 @@ bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
return true;
}
-SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
+SDNode *AMDGPUDAGToDAGISel::getBFE32(bool IsSigned, const SDLoc &DL,
SDValue Val, uint32_t Offset,
uint32_t Width) {
+ if (Val->isDivergent()) {
+ unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
+ SDValue Off = CurDAG->getTargetConstant(Offset, DL, MVT::i32);
+ SDValue W = CurDAG->getTargetConstant(Width, DL, MVT::i32);
+
+ return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, Off, W);
+ }
+ unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
// Transformation function, pack the offset and width of a BFE into
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
// source, bits [5:0] contain the offset and bits [22:16] the width.
@@ -2229,10 +1982,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
if (0 < BVal && BVal <= CVal && CVal < 32) {
bool Signed = N->getOpcode() == ISD::SRA;
- unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
-
- ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
- 32 - CVal));
+ ReplaceNode(N, getBFE32(Signed, SDLoc(N), Shl.getOperand(0), CVal - BVal,
+ 32 - CVal));
return;
}
}
@@ -2255,9 +2006,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
-
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
- Srl.getOperand(0), ShiftVal, WidthVal));
+ ReplaceNode(N, getBFE32(false, SDLoc(N), Srl.getOperand(0), ShiftVal,
+ WidthVal));
return;
}
}
@@ -2277,9 +2027,8 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
if (isMask_32(MaskVal)) {
uint32_t WidthVal = countPopulation(MaskVal);
-
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
- And.getOperand(0), ShiftVal, WidthVal));
+ ReplaceNode(N, getBFE32(false, SDLoc(N), And.getOperand(0), ShiftVal,
+ WidthVal));
return;
}
}
@@ -2306,7 +2055,7 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
break;
unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
- ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
+ ReplaceNode(N, getBFE32(true, SDLoc(N), Src.getOperand(0),
Amt->getZExtValue(), Width));
return;
}
@@ -3111,128 +2860,3 @@ void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
} while (IsModified);
}
-
-bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
- Subtarget = &MF.getSubtarget<R600Subtarget>();
- return SelectionDAGISel::runOnMachineFunction(MF);
-}
-
-bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
- if (!N->readMem())
- return false;
- if (CbId == -1)
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
-
- return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
- SDValue& IntPtr) {
- if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
- true);
- return true;
- }
- return false;
-}
-
-bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
- SDValue& BaseReg, SDValue &Offset) {
- if (!isa<ConstantSDNode>(Addr)) {
- BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
- return true;
- }
- return false;
-}
-
-void R600DAGToDAGISel::Select(SDNode *N) {
- unsigned int Opc = N->getOpcode();
- if (N->isMachineOpcode()) {
- N->setNodeId(-1);
- return; // Already selected.
- }
-
- switch (Opc) {
- default: break;
- case AMDGPUISD::BUILD_VERTICAL_VECTOR:
- case ISD::SCALAR_TO_VECTOR:
- case ISD::BUILD_VECTOR: {
- EVT VT = N->getValueType(0);
- unsigned NumVectorElts = VT.getVectorNumElements();
- unsigned RegClassID;
- // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
- // that adds a 128 bits reg copy when going through TwoAddressInstructions
- // pass. We want to avoid 128 bits copies as much as possible because they
- // can't be bundled by our scheduler.
- switch(NumVectorElts) {
- case 2: RegClassID = R600::R600_Reg64RegClassID; break;
- case 4:
- if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- RegClassID = R600::R600_Reg128VerticalRegClassID;
- else
- RegClassID = R600::R600_Reg128RegClassID;
- break;
- default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
- }
- SelectBuildVector(N, RegClassID);
- return;
- }
- }
-
- SelectCode(N);
-}
-
-bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *C;
- SDLoc DL(Addr);
-
- if ((C = dyn_cast<ConstantSDNode>(Addr))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
- Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
- (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
- } else {
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- }
-
- return true;
-}
-
-bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
- ConstantSDNode *IMMOffset;
-
- if (Addr.getOpcode() == ISD::ADD
- && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
- && isInt<16>(IMMOffset->getZExtValue())) {
-
- Base = Addr.getOperand(0);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- // If the pointer address is constant, we can move it to the offset field.
- } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
- && isInt<16>(IMMOffset->getZExtValue())) {
- Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- SDLoc(CurDAG->getEntryNode()),
- R600::ZERO, MVT::i32);
- Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
- MVT::i32);
- return true;
- }
-
- // Default case, no offset
- Base = Addr;
- Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
- return true;
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
new file mode 100644
index 000000000000..c1d9673f067e
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -0,0 +1,256 @@
+//===-- AMDGPUISelDAGToDAG.h - A dag to dag inst selector for AMDGPU ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the AMDGPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
+
+#include "GCNSubtarget.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+static inline bool isNullConstantOrUndef(SDValue V) {
+ if (V.isUndef())
+ return true;
+
+ ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+ return Const != nullptr && Const->isZero();
+}
+
+static inline bool getConstantValue(SDValue N, uint32_t &Out) {
+ // This is only used for packed vectors, where using 0 for undef should
+ // always be good.
+ if (N.isUndef()) {
+ Out = 0;
+ return true;
+ }
+
+ if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
+ Out = C->getAPIntValue().getSExtValue();
+ return true;
+ }
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
+ Out = C->getValueAPF().bitcastToAPInt().getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+// TODO: Handle undef as zero
+static inline SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG,
+ bool Negate = false) {
+ assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2);
+ uint32_t LHSVal, RHSVal;
+ if (getConstantValue(N->getOperand(0), LHSVal) &&
+ getConstantValue(N->getOperand(1), RHSVal)) {
+ SDLoc SL(N);
+ uint32_t K = Negate ? (-LHSVal & 0xffff) | (-RHSVal << 16)
+ : (LHSVal & 0xffff) | (RHSVal << 16);
+ return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0),
+ DAG.getTargetConstant(K, SL, MVT::i32));
+ }
+
+ return nullptr;
+}
+
+static inline SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) {
+ return packConstantV2I16(N, DAG, true);
+}
+} // namespace
+
+/// AMDGPU specific code to select AMDGPU machine instructions for
+/// SelectionDAG operations.
+class AMDGPUDAGToDAGISel : public SelectionDAGISel {
+ // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
+ // make the right decision when generating code for different targets.
+ const GCNSubtarget *Subtarget;
+
+ // Default FP mode for the current function.
+ AMDGPU::SIModeRegisterDefaults Mode;
+
+ bool EnableLateStructurizeCFG;
+
+ // Instructions that will be lowered with a final instruction that zeros the
+ // high result bits.
+ bool fp16SrcZerosHighBits(unsigned Opc) const;
+
+public:
+ explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
+ CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
+ ~AMDGPUDAGToDAGISel() override = default;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+ bool matchLoadD16FromBuildVector(SDNode *N) const;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void PreprocessISelDAG() override;
+ void Select(SDNode *N) override;
+ StringRef getPassName() const override;
+ void PostprocessISelDAG() override;
+
+protected:
+ void SelectBuildVector(SDNode *N, unsigned RegClassID);
+
+private:
+ std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
+ bool isNoNanSrc(SDValue N) const;
+ bool isInlineImmediate(const SDNode *N, bool Negated = false) const;
+ bool isNegInlineImmediate(const SDNode *N) const {
+ return isInlineImmediate(N, true);
+ }
+
+ bool isInlineImmediate16(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate32(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate64(int64_t Imm) const {
+ return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm());
+ }
+
+ bool isInlineImmediate(const APFloat &Imm) const {
+ return Subtarget->getInstrInfo()->isInlineConstant(Imm);
+ }
+
+ bool isVGPRImm(const SDNode *N) const;
+ bool isUniformLoad(const SDNode *N) const;
+ bool isUniformBr(const SDNode *N) const;
+
+ bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS,
+ SDValue &RHS) const;
+
+ MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
+
+ SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const;
+ SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+
+ const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
+ virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
+ virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
+ bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
+ unsigned Size) const;
+ bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
+ bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1) const;
+ bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0,
+ SDValue &Offset1, unsigned Size) const;
+ bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset, SDValue &Offen,
+ SDValue &Idxen, SDValue &Addr64) const;
+ bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+ SDValue &SOffset, SDValue &Offset) const;
+ bool SelectMUBUFScratchOffen(SDNode *Parent, SDValue Addr, SDValue &RSrc,
+ SDValue &VAddr, SDValue &SOffset,
+ SDValue &ImmOffset) const;
+ bool SelectMUBUFScratchOffset(SDNode *Parent, SDValue Addr, SDValue &SRsrc,
+ SDValue &Soffset, SDValue &Offset) const;
+
+ bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
+ SDValue &Offset) const;
+
+ bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset, uint64_t FlatVariant) const;
+ bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr,
+ SDValue &Offset) const;
+ bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &VOffset, SDValue &Offset) const;
+ bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr,
+ SDValue &Offset) const;
+
+ bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
+ bool &Imm) const;
+ SDValue Expand32BitAddress(SDValue Addr) const;
+ bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
+ bool &Imm) const;
+ bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+ bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
+ bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+ bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
+
+ bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+ bool AllowAbs = true) const;
+ bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
+ bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+ bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
+ SDValue &Clamp, SDValue &Omod) const;
+
+ bool SelectVOP3OMods(SDValue In, SDValue &Src, SDValue &Clamp,
+ SDValue &Omod) const;
+
+ bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
+ unsigned &Mods) const;
+ bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+
+ SDValue getHi16Elt(SDValue In) const;
+
+ SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const;
+
+ void SelectADD_SUB_I64(SDNode *N);
+ void SelectAddcSubb(SDNode *N);
+ void SelectUADDO_USUBO(SDNode *N);
+ void SelectDIV_SCALE(SDNode *N);
+ void SelectMAD_64_32(SDNode *N);
+ void SelectFMA_W_CHAIN(SDNode *N);
+ void SelectFMUL_W_CHAIN(SDNode *N);
+ SDNode *getBFE32(bool IsSigned, const SDLoc &DL, SDValue Val, uint32_t Offset,
+ uint32_t Width);
+ void SelectS_BFEFromShifts(SDNode *N);
+ void SelectS_BFE(SDNode *N);
+ bool isCBranchSCC(const SDNode *N) const;
+ void SelectBRCOND(SDNode *N);
+ void SelectFMAD_FMA(SDNode *N);
+ void SelectATOMIC_CMP_SWAP(SDNode *N);
+ void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
+ void SelectDS_GWS(SDNode *N, unsigned IntrID);
+ void SelectInterpP1F16(SDNode *N);
+ void SelectINTRINSIC_W_CHAIN(SDNode *N);
+ void SelectINTRINSIC_WO_CHAIN(SDNode *N);
+ void SelectINTRINSIC_VOID(SDNode *N);
+
+protected:
+ // Include the pieces autogenerated from the target description.
+#include "AMDGPUGenDAGISel.inc"
+};
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUISELDAGTODAG_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d68488ccb342..523fa2d3724b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -45,17 +45,13 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
}
unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
- KnownBits Known = DAG.computeKnownBits(Op);
- return VT.getSizeInBits() - Known.countMinLeadingZeros();
+ return DAG.computeKnownBits(Op).countMaxActiveBits();
}
unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
-
// In order for this to be a signed 24-bit value, bit 23, must
// be a sign bit.
- return VT.getSizeInBits() - DAG.ComputeNumSignBits(Op);
+ return DAG.ComputeMinSignedBits(Op);
}
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
@@ -1042,7 +1038,7 @@ CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
/// In order to correctly lower the arguments we need to know the size of each
/// argument. Since Ins[x].VT gives us the size of the register that will
/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
-/// for the orignal function argument so that we can deduce the correct memory
+/// for the original function argument so that we can deduce the correct memory
/// type to use for Ins[x]. In most cases the correct memory type will be
/// Ins[x].ArgVT. However, this will not always be the case. If, for example,
/// we have a kernel argument of type v8i8, this argument will be split into
@@ -1210,10 +1206,8 @@ SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
ArgChains.push_back(Chain);
// Add a chain value for each stack argument corresponding
- for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
- UE = DAG.getEntryNode().getNode()->use_end();
- U != UE; ++U) {
- if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) {
+ for (SDNode *U : DAG.getEntryNode().getNode()->uses()) {
+ if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
if (FI->getIndex() < 0) {
int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
@@ -1334,14 +1328,6 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
}
}
-bool AMDGPUTargetLowering::hasDefinedInitializer(const GlobalValue *GV) {
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (!GVar || !GVar->hasInitializer())
- return false;
-
- return !isa<UndefValue>(GVar->getInitializer());
-}
-
SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
SDValue Op,
SelectionDAG &DAG) const {
@@ -1378,16 +1364,11 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
"Do not know what to do with an non-zero offset");
// TODO: We could emit code to handle the initialization somewhere.
- if (!hasDefinedInitializer(GV)) {
- unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
- return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
- }
+ // We ignore the initializer for now and legalize it to allow selection.
+ // The initializer will anyway get errored out during assembly emission.
+ unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
+ return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
}
-
- const Function &Fn = DAG.getMachineFunction().getFunction();
- DiagnosticInfoUnsupported BadInit(
- Fn, "unsupported initializer for address space", SDLoc(Op).getDebugLoc());
- DAG.getContext()->diagnose(BadInit);
return SDValue();
}
@@ -1856,6 +1837,9 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
}
if (isTypeLegal(MVT::i64)) {
+ // The algorithm here is based on ideas from "Software Integer Division",
+ // Tom Rodeheffer, August 2008.
+
MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -1890,37 +1874,35 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1);
SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1);
+ // First round of UNR (Unsigned integer Newton-Raphson).
SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
Zero);
- SDValue Mulhi1_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
- One);
-
+ SDValue Mulhi1_Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, One);
SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo,
Mulhi1_Lo, Zero1);
SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi,
Mulhi1_Hi, Add1_Lo.getValue(1));
- SDValue Add1_HiNc = DAG.getNode(ISD::ADD, DL, HalfVT, Rcp_Hi, Mulhi1_Hi);
SDValue Add1 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));
+ // Second round of UNR.
SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
Zero);
- SDValue Mulhi2_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
- One);
-
+ SDValue Mulhi2_Hi =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, One);
SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo,
Mulhi2_Lo, Zero1);
- SDValue Add2_HiC = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_HiNc,
- Mulhi2_Hi, Add1_Lo.getValue(1));
- SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add2_HiC,
- Zero, Add2_Lo.getValue(1));
+ SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Hi,
+ Mulhi2_Hi, Add2_Lo.getValue(1));
SDValue Add2 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));
+
SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2);
SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);
@@ -2211,13 +2193,10 @@ SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::f64);
const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-
- SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
// Extract the upper half, since this is where we will find the sign and
// exponent.
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One);
+ SDValue Hi = getHiHalf64(Src, DAG);
SDValue Exp = extractF64Exponent(Hi, SL, DAG);
@@ -2380,72 +2359,50 @@ static bool isCttzOpc(unsigned Opc) {
SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- bool ZeroUndef = Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF ||
- Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
-
- unsigned ISDOpc, NewOpc;
- if (isCtlzOpc(Op.getOpcode())) {
- ISDOpc = ISD::CTLZ_ZERO_UNDEF;
- NewOpc = AMDGPUISD::FFBH_U32;
- } else if (isCttzOpc(Op.getOpcode())) {
- ISDOpc = ISD::CTTZ_ZERO_UNDEF;
- NewOpc = AMDGPUISD::FFBL_B32;
- } else
- llvm_unreachable("Unexpected OPCode!!!");
-
-
- if (ZeroUndef && Src.getValueType() == MVT::i32)
- return DAG.getNode(NewOpc, SL, MVT::i32, Src);
- SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
-
- const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- const SDValue One = DAG.getConstant(1, SL, MVT::i32);
-
- SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
-
- EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), MVT::i32);
-
- SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo;
- SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ);
-
- SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo);
- SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi);
-
- const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
- SDValue Add, NewOpr;
- if (isCtlzOpc(Op.getOpcode())) {
- Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprLo, Bits32);
- // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
- NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprHi);
- } else {
- Add = DAG.getNode(ISD::ADD, SL, MVT::i32, OprHi, Bits32);
- // cttz(x) = lo_32(x) == 0 ? cttz(hi_32(x)) + 32 : cttz(lo_32(x))
- NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0orLo0, Add, OprLo);
+ assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode()));
+ bool Ctlz = isCtlzOpc(Op.getOpcode());
+ unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;
+
+ bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
+ Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
+
+ if (Src.getValueType() == MVT::i32) {
+ // (ctlz hi:lo) -> (umin (ffbh src), 32)
+ // (cttz hi:lo) -> (umin (ffbl src), 32)
+ // (ctlz_zero_undef src) -> (ffbh src)
+ // (cttz_zero_undef src) -> (ffbl src)
+ SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
+ if (!ZeroUndef) {
+ const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const32);
+ }
+ return NewOpr;
}
- if (!ZeroUndef) {
- // Test if the full 64-bit input is zero.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = split64BitValue(Src, DAG);
- // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
- // which we probably don't want.
- SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi;
- SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ);
- SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0);
+ SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo);
+ SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi);
- // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
- // with the same cycles, otherwise it is slower.
- // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src,
- // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ);
+ // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64)
+ // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64)
+ // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
+ // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
- const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32);
+ unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT;
+ const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
+ if (Ctlz)
+ OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);
+ else
+ OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);
- // The instruction returns -1 for 0 input, but the defined intrinsic
- // behavior is to return the number of bits.
- NewOpr = DAG.getNode(ISD::SELECT, SL, MVT::i32,
- SrcIsZero, Bits32, NewOpr);
+ SDValue NewOpr;
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);
+ if (!ZeroUndef) {
+ const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32);
+ NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);
}
return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
@@ -2453,87 +2410,128 @@ SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) cons
SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
- // Unsigned
- // cul2f(ulong u)
- //{
- // uint lz = clz(u);
- // uint e = (u != 0) ? 127U + 63U - lz : 0;
- // u = (u << lz) & 0x7fffffffffffffffUL;
- // ulong t = u & 0xffffffffffUL;
- // uint v = (e << 23) | (uint)(u >> 40);
- // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
- // return as_float(v + r);
- //}
- // Signed
- // cl2f(long l)
- //{
- // long s = l >> 63;
- // float r = cul2f((l + s) ^ s);
- // return s ? -r : r;
- //}
+ // The regular method converting a 64-bit integer to float roughly consists of
+ // 2 steps: normalization and rounding. In fact, after normalization, the
+ // conversion from a 64-bit integer to a float is essentially the same as the
+ // one from a 32-bit integer. The only difference is that it has more
+ // trailing bits to be rounded. To leverage the native 32-bit conversion, a
+ // 64-bit integer could be preprocessed and fit into a 32-bit integer then
+ // converted into the correct float number. The basic steps for the unsigned
+ // conversion are illustrated in the following pseudo code:
+ //
+ // f32 uitofp(i64 u) {
+ // i32 hi, lo = split(u);
+ // // Only count the leading zeros in hi as we have native support of the
+ // // conversion from i32 to f32. If hi is all 0s, the conversion is
+ // // reduced to a 32-bit one automatically.
+ // i32 shamt = clz(hi); // Return 32 if hi is all 0s.
+ // u <<= shamt;
+ // hi, lo = split(u);
+ // hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo.
+ // // convert it as a 32-bit integer and scale the result back.
+ // return uitofp(hi) * 2^(32 - shamt);
+ // }
+ //
+ // The signed one follows the same principle but uses 'ffbh_i32' to count its
+ // sign bits instead. If 'ffbh_i32' is not available, its absolute value is
+ // converted instead followed by negation based its sign bit.
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- SDValue L = Src;
- SDValue S;
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = split64BitValue(Src, DAG);
+ SDValue Sign;
+ SDValue ShAmt;
+ if (Signed && Subtarget->isGCN()) {
+ // We also need to consider the sign bit in Lo if Hi has just sign bits,
+ // i.e. Hi is 0 or -1. However, that only needs to take the MSB into
+ // account. That is, the maximal shift is
+ // - 32 if Lo and Hi have opposite signs;
+ // - 33 if Lo and Hi have the same sign.
+ //
+ // Or, MaxShAmt = 33 + OppositeSign, where
+ //
+ // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
+ // - -1 if Lo and Hi have opposite signs; and
+ // - 0 otherwise.
+ //
+ // All in all, ShAmt is calculated as
+ //
+ // umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
+ //
+ // or
+ //
+ // umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
+ //
+ // to reduce the critical path.
+ SDValue OppositeSign = DAG.getNode(
+ ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
+ DAG.getConstant(31, SL, MVT::i32));
+ SDValue MaxShAmt =
+ DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
+ OppositeSign);
+ // Count the leading sign bits.
+ ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
+ // Different from unsigned conversion, the shift should be one bit less to
+ // preserve the sign bit.
+ ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
+ DAG.getConstant(1, SL, MVT::i32));
+ ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
+ } else {
+ if (Signed) {
+ // Without 'ffbh_i32', only leading zeros could be counted. Take the
+ // absolute value first.
+ Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src,
+ DAG.getConstant(63, SL, MVT::i64));
+ SDValue Abs =
+ DAG.getNode(ISD::XOR, SL, MVT::i64,
+ DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign);
+ std::tie(Lo, Hi) = split64BitValue(Abs, DAG);
+ }
+ // Count the leading zeros.
+ ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi);
+ // The shift amount for signed integers is [0, 32].
+ }
+ // Normalize the given 64-bit integer.
+ SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt);
+ // Split it again.
+ std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
+ // Calculate the adjust bit for rounding.
+ // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
+ SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
+ DAG.getConstant(1, SL, MVT::i32), Lo);
+ // Get the 32-bit normalized integer.
+ Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
+ // Convert the normalized 32-bit integer into f32.
+ unsigned Opc =
+ (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
+ SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm);
+
+ // Finally, need to scale back the converted floating number as the original
+ // 64-bit integer is converted as a 32-bit one.
+ ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
+ ShAmt);
+ // On GCN, use LDEXP directly.
+ if (Subtarget->isGCN())
+ return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt);
+
+ // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
+ // part directly to emulate the multiplication of 2^ShAmt. That 8-bit
+ // exponent is enough to avoid overflowing into the sign bit.
+ SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt,
+ DAG.getConstant(23, SL, MVT::i32));
+ SDValue IVal =
+ DAG.getNode(ISD::ADD, SL, MVT::i32,
+ DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);
if (Signed) {
- const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64);
- S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit);
-
- SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S);
- L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S);
+ // Set the sign bit.
+ Sign = DAG.getNode(ISD::SHL, SL, MVT::i32,
+ DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign),
+ DAG.getConstant(31, SL, MVT::i32));
+ IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);
}
-
- EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), MVT::f32);
-
-
- SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
- SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64);
- SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L);
- LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ);
-
- SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32);
- SDValue E = DAG.getSelect(SL, MVT::i32,
- DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE),
- DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ),
- ZeroI32);
-
- SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64,
- DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ),
- DAG.getConstant((-1ULL) >> 1, SL, MVT::i64));
-
- SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U,
- DAG.getConstant(0xffffffffffULL, SL, MVT::i64));
-
- SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64,
- U, DAG.getConstant(40, SL, MVT::i64));
-
- SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32,
- DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)),
- DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl));
-
- SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64);
- SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT);
- SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ);
-
- SDValue One = DAG.getConstant(1, SL, MVT::i32);
-
- SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One);
-
- SDValue R = DAG.getSelect(SL, MVT::i32,
- RCmp,
- One,
- DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32));
- R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R);
- R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R);
-
- if (!Signed)
- return R;
-
- SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R);
- return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R);
+ return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);
}
SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
@@ -2541,12 +2539,8 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDLoc SL(Op);
SDValue Src = Op.getOperand(0);
- SDValue BC = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
-
- SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(0, SL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, BC,
- DAG.getConstant(1, SL, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = split64BitValue(Src, DAG);
SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
SL, MVT::f64, Hi);
@@ -2878,7 +2872,7 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
// as unsigned 24-bit values.
- AMDGPUTargetLowering::numBitsSigned(Op, DAG) < 24;
+ AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24;
}
static SDValue simplifyMul24(SDNode *Node24,
@@ -2892,8 +2886,22 @@ static SDValue simplifyMul24(SDNode *Node24,
unsigned NewOpcode = Node24->getOpcode();
if (IsIntrin) {
unsigned IID = cast<ConstantSDNode>(Node24->getOperand(0))->getZExtValue();
- NewOpcode = IID == Intrinsic::amdgcn_mul_i24 ?
- AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
+ switch (IID) {
+ case Intrinsic::amdgcn_mul_i24:
+ NewOpcode = AMDGPUISD::MUL_I24;
+ break;
+ case Intrinsic::amdgcn_mul_u24:
+ NewOpcode = AMDGPUISD::MUL_U24;
+ break;
+ case Intrinsic::amdgcn_mulhi_i24:
+ NewOpcode = AMDGPUISD::MULHI_I24;
+ break;
+ case Intrinsic::amdgcn_mulhi_u24:
+ NewOpcode = AMDGPUISD::MULHI_U24;
+ break;
+ default:
+ llvm_unreachable("Expected 24-bit mul intrinsic");
+ }
}
APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
@@ -3102,6 +3110,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
switch (IID) {
case Intrinsic::amdgcn_mul_i24:
case Intrinsic::amdgcn_mul_u24:
+ case Intrinsic::amdgcn_mulhi_i24:
+ case Intrinsic::amdgcn_mulhi_u24:
return simplifyMul24(N, DCI);
case Intrinsic::amdgcn_fract:
case Intrinsic::amdgcn_rsq:
@@ -3281,11 +3291,9 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
// srl i64:x, C for C >= 32
// =>
// build_pair (srl hi_32(x), C - 32), 0
- SDValue One = DAG.getConstant(1, SL, MVT::i32);
SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
- SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One);
+ SDValue Hi = getHiHalf64(LHS, DAG);
SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
@@ -3355,7 +3363,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
KnownBits Known = DAG.computeKnownBits(Amt);
unsigned Size = VT.getScalarSizeInBits();
if ((Known.isConstant() && Known.getConstant().ule(Size)) ||
- (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size))) {
+ (Known.countMaxActiveBits() <= Log2_32(Size))) {
EVT MidVT = VT.isVector() ?
EVT::getVectorVT(*DAG.getContext(), MVT::i32,
VT.getVectorNumElements()) : MVT::i32;
@@ -3522,7 +3530,7 @@ SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
static bool isNegativeOne(SDValue Val) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
- return C->isAllOnesValue();
+ return C->isAllOnes();
return false;
}
@@ -3557,7 +3565,7 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C
SDValue LHS, SDValue RHS,
DAGCombinerInfo &DCI) const {
ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
- if (!CmpRhs || !CmpRhs->isNullValue())
+ if (!CmpRhs || !CmpRhs->isZero())
return SDValue();
SelectionDAG &DAG = DCI.DAG;
@@ -4341,6 +4349,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(TRAP)
NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(RET_GFX_FLAG)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
NODE_NAME_CASE(DWORDADDR)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index e61021d451f8..03632ac18598 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -35,9 +35,15 @@ private:
SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const;
public:
+ /// \returns The minimum number of bits needed to store the value of \Op as an
+ /// unsigned integer. Truncating to this size and then zero-extending to the
+ /// original size will not change the value.
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
+
+ /// \returns The minimum number of bits needed to store the value of \Op as a
+ /// signed integer. Truncating to this size and then sign-extending to the
+ /// original size will not change the value.
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
- static bool hasDefinedInitializer(const GlobalValue *GV);
protected:
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -337,7 +343,7 @@ namespace AMDGPUISD {
enum NodeType : unsigned {
// AMDIL ISD Opcodes
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- UMUL, // 32bit unsigned multiplication
+ UMUL, // 32bit unsigned multiplication
BRANCH_COND,
// End AMDIL ISD Opcodes
@@ -360,6 +366,9 @@ enum NodeType : unsigned {
// Return with values from a non-entry function.
RET_FLAG,
+ // Return with values from a non-entry function (AMDGPU_Gfx CC).
+ RET_GFX_FLAG,
+
DWORDADDR,
FRACT,
@@ -416,10 +425,10 @@ enum NodeType : unsigned {
DOT4,
CARRY,
BORROW,
- BFE_U32, // Extract range of bits with zero extension to 32-bits.
- BFE_I32, // Extract range of bits with sign extension to 32-bits.
- BFI, // (src0 & src1) | (~src0 & src2)
- BFM, // Insert a range of bits into a 32-bit word.
+ BFE_U32, // Extract range of bits with zero extension to 32-bits.
+ BFE_I32, // Extract range of bits with sign extension to 32-bits.
+ BFI, // (src0 & src1) | (~src0 & src2)
+ BFM, // Insert a range of bits into a 32-bit word.
FFBH_U32, // ctlz with -1 if input is zero.
FFBH_I32,
FFBL_B32, // cttz with -1 if input is zero.
@@ -528,7 +537,6 @@ enum NodeType : unsigned {
LAST_AMDGPU_ISD_NUMBER
};
-
} // End namespace AMDGPUISD
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 06aa0055e4bb..88b4ec53a2a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -17,7 +17,6 @@
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNSubtarget.h"
-#include "R600Subtarget.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -149,7 +148,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
Function *I =
Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
- SmallVector<Value *, 8> Args(II.arg_operands());
+ SmallVector<Value *, 8> Args(II.args());
unsigned EndIndex =
OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
@@ -440,7 +439,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (!CWidth || !COffset)
break;
- // The case of Width == 0 is handled above, which makes this tranformation
+ // The case of Width == 0 is handled above, which makes this transformation
// safe. If Width == 0, then the ashr and lshr instructions become poison
// value since the shift amount would be equal to the bit size.
assert(Width != 0);
@@ -586,8 +585,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
MDNode *MD = MDNode::get(II.getContext(), MDArgs);
Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
- NewCall->addAttribute(AttributeList::FunctionIndex,
- Attribute::Convergent);
+ NewCall->addFnAttr(Attribute::Convergent);
NewCall->takeName(&II);
return IC.replaceInstUsesWith(II, NewCall);
}
@@ -712,8 +710,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
MDNode *MD = MDNode::get(II.getContext(), MDArgs);
Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
- NewCall->addAttribute(AttributeList::FunctionIndex,
- Attribute::Convergent);
+ NewCall->addFnAttr(Attribute::Convergent);
NewCall->takeName(&II);
return IC.replaceInstUsesWith(II, NewCall);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 8e7a6a7029c6..b1263618c5db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -76,8 +76,8 @@ struct ImageDimIntrinsicInfo {
};
const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr);
-const ImageDimIntrinsicInfo *getImageDimInstrinsicByBaseOpcode(unsigned BaseOpcode,
- unsigned Dim);
+const ImageDimIntrinsicInfo *
+getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim);
} // end AMDGPU namespace
} // End llvm namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 0f9cb712f820..391dc8428539 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -103,9 +103,6 @@ def AMDGPUconstdata_ptr : SDNode<
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
-// Force dependencies for vector trunc stores
-def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
-
def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
// out = a - floor(a)
@@ -282,11 +279,18 @@ def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]
>;
-def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
+// mulhi24 yields the high-order 16 bits of the 48-bit result. Here's an example
+// that shows mulhi24 is not associative:
+//
+// Given a = 0x10002, b = c = 0xffffff:
+// mulhi24(mulhi24(a, b), c) = mulhi24(0x100, 0xffffff) = 0
+// Which is not equal to:
+// mulhi24(a, mulhi24(b, c)) = mulhi24(0x10002, 0xffff) = 1
+def AMDGPUmulhi_u24_impl : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
+ [SDNPCommutative]
>;
-def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
- [SDNPCommutative, SDNPAssociative]
+def AMDGPUmulhi_i24_impl : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
+ [SDNPCommutative]
>;
def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
@@ -329,11 +333,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [
]>;
-def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
-
-def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp,
- [SDNPHasChain, SDNPSideEffect]>;
-
//===----------------------------------------------------------------------===//
// Flow Control Profile Types
//===----------------------------------------------------------------------===//
@@ -360,6 +359,10 @@ def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPt
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
+def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+>;
+
//===----------------------------------------------------------------------===//
// Intrinsic/Custom node compatibility PatFrags
@@ -443,6 +446,14 @@ def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_mul_i24 node:$src0, node:$src1),
(AMDGPUmul_i24_impl node:$src0, node:$src1)]>;
+def AMDGPUmulhi_u24 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_mulhi_u24 node:$src0, node:$src1),
+ (AMDGPUmulhi_u24_impl node:$src0, node:$src1)]>;
+
+def AMDGPUmulhi_i24 : PatFrags<(ops node:$src0, node:$src1),
+ [(int_amdgcn_mulhi_i24 node:$src0, node:$src1),
+ (AMDGPUmulhi_i24_impl node:$src0, node:$src1)]>;
+
def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2),
(AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 323aaaf70cd4..28cb2fc57ac7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#define DEBUG_TYPE "amdgpu-isel"
@@ -140,7 +141,7 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
= TRI.getConstrainedRegClassForOperand(Src, *MRI);
Optional<ValueAndVReg> ConstVal =
- getConstantVRegValWithLookThrough(SrcReg, *MRI, true, true);
+ getIConstantVRegValWithLookThrough(SrcReg, *MRI, true);
if (ConstVal) {
unsigned MovOpc =
STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
@@ -608,11 +609,10 @@ bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR_TRUNC(
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *BB = MI.getParent();
- auto ConstSrc1 =
- getConstantVRegValWithLookThrough(Src1, *MRI, true, true, true);
+ auto ConstSrc1 = getAnyConstantVRegValWithLookThrough(Src1, *MRI, true, true);
if (ConstSrc1) {
auto ConstSrc0 =
- getConstantVRegValWithLookThrough(Src0, *MRI, true, true, true);
+ getAnyConstantVRegValWithLookThrough(Src0, *MRI, true, true);
if (ConstSrc0) {
const int64_t K0 = ConstSrc0->Value.getSExtValue();
const int64_t K1 = ConstSrc1->Value.getSExtValue();
@@ -844,7 +844,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
Optional<ValueAndVReg> ConstSelect =
- getConstantVRegValWithLookThrough(LaneSelect, *MRI, true, true);
+ getIConstantVRegValWithLookThrough(LaneSelect, *MRI);
if (ConstSelect) {
// The selector has to be an inline immediate, so we can use whatever for
// the other operands.
@@ -853,7 +853,7 @@ bool AMDGPUInstructionSelector::selectWritelane(MachineInstr &MI) const {
maskTrailingOnes<uint64_t>(STI.getWavefrontSizeLog2()));
} else {
Optional<ValueAndVReg> ConstVal =
- getConstantVRegValWithLookThrough(Val, *MRI, true, true);
+ getIConstantVRegValWithLookThrough(Val, *MRI);
// If the value written is an inline immediate, we can get away without a
// copy to m0.
@@ -928,7 +928,7 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
case Intrinsic::amdgcn_if_break: {
MachineBasicBlock *BB = I.getParent();
- // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+ // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
.add(I.getOperand(0))
@@ -1130,7 +1130,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
return false;
Optional<ValueAndVReg> Arg =
- getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true);
+ getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI);
if (Arg.hasValue()) {
const int64_t Value = Arg.getValue().Value.getSExtValue();
@@ -1242,7 +1242,7 @@ bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const {
}
bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
- // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
+ // FIXME: Manually selecting to avoid dealing with the SReg_1 trick
// SelectionDAG uses for wave32 vs wave64.
MachineBasicBlock *BB = MI.getParent();
BuildMI(*BB, &MI, MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
@@ -1826,8 +1826,9 @@ bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const {
.add(I.getOperand(2))
.add(I.getOperand(3));
- bool Ret = constrainSelectedInstRegOperands(*Select, TII, TRI, RBI) |
- constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
+ bool Ret = false;
+ Ret |= constrainSelectedInstRegOperands(*Select, TII, TRI, RBI);
+ Ret |= constrainSelectedInstRegOperands(*CopySCC, TII, TRI, RBI);
I.eraseFromParent();
return Ret;
}
@@ -2387,7 +2388,7 @@ void AMDGPUInstructionSelector::initM0(MachineInstr &I) const {
STI.ldsRequiresM0Init()) {
MachineBasicBlock *BB = I.getParent();
- // If DS instructions require M0 initializtion, insert it before selecting.
+ // If DS instructions require M0 initialization, insert it before selecting.
BuildMI(*BB, &I, I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addImm(-1);
}
@@ -2465,6 +2466,27 @@ bool AMDGPUInstructionSelector::selectG_AMDGPU_ATOMIC_CMPXCHG(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
+static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI) {
+ if (Reg.isPhysical())
+ return false;
+
+ MachineInstr &MI = *MRI.getUniqueVRegDef(Reg);
+ const unsigned Opcode = MI.getOpcode();
+
+ if (Opcode == AMDGPU::COPY)
+ return isVCmpResult(MI.getOperand(1).getReg(), MRI);
+
+ if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
+ Opcode == AMDGPU::G_XOR)
+ return isVCmpResult(MI.getOperand(1).getReg(), MRI) &&
+ isVCmpResult(MI.getOperand(2).getReg(), MRI);
+
+ if (Opcode == TargetOpcode::G_INTRINSIC)
+ return MI.getIntrinsicID() == Intrinsic::amdgcn_class;
+
+ return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
+}
+
bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
MachineOperand &CondOp = I.getOperand(0);
@@ -2488,11 +2510,22 @@ bool AMDGPUInstructionSelector::selectG_BRCOND(MachineInstr &I) const {
BrOpcode = AMDGPU::S_CBRANCH_SCC1;
ConstrainRC = &AMDGPU::SReg_32RegClass;
} else {
- // FIXME: Do we have to insert an and with exec here, like in SelectionDAG?
- // We sort of know that a VCC producer based on the register bank, that ands
- // inactive lanes with 0. What if there was a logical operation with vcc
- // producers in different blocks/with different exec masks?
// FIXME: Should scc->vcc copies and with exec?
+
+ // Unless the value of CondReg is a result of a V_CMP* instruction then we
+ // need to insert an and with exec.
+ if (!isVCmpResult(CondReg, *MRI)) {
+ const bool Is64 = STI.isWave64();
+ const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+ const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+
+ Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
+ BuildMI(*BB, &I, DL, TII.get(Opcode), TmpReg)
+ .addReg(CondReg)
+ .addReg(Exec);
+ CondReg = TmpReg;
+ }
+
CondPhysReg = TRI.getVCC();
BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
ConstrainRC = TRI.getBoolRC();
@@ -3216,6 +3249,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case AMDGPU::G_SBFX:
case AMDGPU::G_UBFX:
return selectG_SBFX_UBFX(I);
+ case AMDGPU::G_SI_CALL:
+ I.setDesc(TII.get(AMDGPU::SI_CALL));
+ return true;
default:
return selectImpl(I, *CoverageInfo);
}
@@ -3977,8 +4013,8 @@ AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
return {Root, 0};
MachineOperand &RHS = RootI->getOperand(2);
- Optional<ValueAndVReg> MaybeOffset
- = getConstantVRegValWithLookThrough(RHS.getReg(), MRI, true);
+ Optional<ValueAndVReg> MaybeOffset =
+ getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
if (!MaybeOffset)
return {Root, 0};
return {RootI->getOperand(1).getReg(), MaybeOffset->Value.getSExtValue()};
@@ -4306,8 +4342,8 @@ AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
/// Get an immediate that must be 32-bits, and treated as zero extended.
static Optional<uint64_t> getConstantZext32Val(Register Reg,
const MachineRegisterInfo &MRI) {
- // getConstantVRegVal sexts any values, so see if that matters.
- Optional<int64_t> OffsetVal = getConstantVRegSExtVal(Reg, MRI);
+ // getIConstantVRegVal sexts any values, so see if that matters.
+ Optional<int64_t> OffsetVal = getIConstantVRegSExtVal(Reg, MRI);
if (!OffsetVal || !isInt<32>(*OffsetVal))
return None;
return Lo_32(*OffsetVal);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index cb05a1cb6369..b70e6883bae2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -14,10 +14,7 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
namespace {
#define GET_GLOBALISEL_PREDICATE_BITSET
@@ -135,7 +132,6 @@ private:
bool hasVgprParts(ArrayRef<GEPInfo> AddrInfo) const;
void getAddrModeInfo(const MachineInstr &Load, const MachineRegisterInfo &MRI,
SmallVectorImpl<GEPInfo> &AddrInfo) const;
- bool selectSMRD(MachineInstr &I, ArrayRef<GEPInfo> AddrInfo) const;
void initM0(MachineInstr &I) const;
bool selectG_LOAD_STORE_ATOMICRMW(MachineInstr &I) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 119c4089d6c2..bad9f6265b36 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -237,6 +237,36 @@ def select_oneuse : HasOneUseTernaryOp<select>;
def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>;
def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>;
+//===----------------------------------------------------------------------===//
+// PatFrags for shifts
+//===----------------------------------------------------------------------===//
+
+// Constrained shift PatFrags.
+foreach width = [16, 32, 64] in {
+defvar mask = !sub(width, 1);
+
+def cshl_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(shl node:$src0, node:$src1), (shl node:$src0, (and node:$src1, mask))]>;
+defvar cshl = !cast<SDPatternOperator>("cshl_"#width);
+def cshl_#width#_oneuse : HasOneUseBinOp<cshl>;
+def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1),
+ (cshl $src1, $src0)>;
+
+def csrl_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(srl node:$src0, node:$src1), (srl node:$src0, (and node:$src1, mask))]>;
+defvar csrl = !cast<SDPatternOperator>("csrl_"#width);
+def csrl_#width#_oneuse : HasOneUseBinOp<csrl>;
+def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
+ (csrl $src1, $src0)>;
+
+def csra_#width : PatFrags<(ops node:$src0, node:$src1),
+ [(sra node:$src0, node:$src1), (sra node:$src0, (and node:$src1, mask))]>;
+defvar csra = !cast<SDPatternOperator>("csra_"#width);
+def csra_#width#_oneuse : HasOneUseBinOp<csra>;
+def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1),
+ (csra $src1, $src0)>;
+} // end foreach width
+
def srl_16 : PatFrag<
(ops node:$src0), (srl_oneuse node:$src0, (i32 16))
>;
@@ -422,6 +452,16 @@ def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> {
let MemoryVT = i16;
}
+def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+
+def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
+
def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> {
let IsAtomic = 1;
let MemoryVT = i32;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 4971b010870d..9e86bd0c2b97 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -95,10 +95,8 @@ bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
bool Changed = false;
for (auto &BB : F)
- for (auto BI = BB.begin(), BE = BB.end(); BI != BE; /*EMPTY*/) {
- Instruction *I = &*BI++;
- Changed |= visit(*I);
- }
+ for (Instruction &I : llvm::make_early_inc_range(BB))
+ Changed |= visit(I);
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c1a9b30a509e..1f898f2ba8b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -59,7 +59,7 @@ static LLT getPow2ScalarType(LLT Ty) {
return LLT::scalar(Pow2Bits);
}
-/// \returs true if this is an odd sized vector which should widen by adding an
+/// \returns true if this is an odd sized vector which should widen by adding an
/// additional element. This is mostly to handle <3 x s16> -> <4 x s16>. This
/// excludes s1 vectors, which should always be scalarized.
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
@@ -532,10 +532,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Full set of gfx9 features.
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16, V2S16})
- .clampScalar(0, S16, S32)
+ .minScalar(0, S16)
.clampMaxNumElements(0, S16, 2)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32);
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, S32)
+ .scalarize(0);
getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
.legalFor({S32, S16, V2S16}) // Clamp modifier
@@ -547,9 +548,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
} else if (ST.has16BitInsts()) {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16})
- .clampScalar(0, S16, S32)
- .scalarize(0)
- .widenScalarToNextPow2(0, 32); // FIXME: min should be 16
+ .minScalar(0, S16)
+ .widenScalarToNextMultipleOf(0, 32)
+ .maxScalar(0, S32)
+ .scalarize(0);
// Technically the saturating operations require clamp bit support, but this
// was introduced at the same time as 16-bit operations.
@@ -569,6 +571,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
} else {
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32})
+ .widenScalarToNextMultipleOf(0, 32)
.clampScalar(0, S32, S32)
.scalarize(0);
@@ -603,7 +606,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
auto &Mulh = getActionDefinitionsBuilder({G_UMULH, G_SMULH})
.legalFor({S32})
- .maxScalarOrElt(0, S32);
+ .maxScalar(0, S32);
if (ST.hasVOP3PInsts()) {
Mulh
@@ -812,10 +815,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Split s1->s64 during regbankselect for VALU.
auto &IToFP = getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
- .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
- .lowerFor({{S32, S64}})
- .lowerIf(typeIs(1, S1))
- .customFor({{S64, S64}});
+ .legalFor({{S32, S32}, {S64, S32}, {S16, S32}})
+ .lowerIf(typeIs(1, S1))
+ .customFor({{S32, S64}, {S64, S64}});
if (ST.has16BitInsts())
IToFP.legalFor({{S16, S16}});
IToFP.clampScalar(1, S32, S64)
@@ -941,7 +943,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(1, S32, S64)
.widenScalarToNextPow2(0, 32)
.widenScalarToNextPow2(1, 32)
- .lower();
+ .custom();
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF})
@@ -1266,7 +1268,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// May need relegalization for the scalars.
return std::make_pair(0, EltTy);
})
- .lowerIfMemSizeNotPow2()
.minScalar(0, S32)
.narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32))
.widenScalarToNextPow2(0)
@@ -1318,7 +1319,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
auto &Atomic = getActionDefinitionsBuilder(G_ATOMICRMW_FADD);
- if (ST.hasLDSFPAtomics()) {
+ if (ST.hasLDSFPAtomicAdd()) {
Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
if (ST.hasGFX90AInsts())
Atomic.legalFor({{S64, LocalPtr}});
@@ -1628,6 +1629,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S32, S64)
.lower();
+ getActionDefinitionsBuilder({G_ROTR, G_ROTL})
+ .scalarize(0)
+ .lower();
+
// TODO: Only Try to form v2s16 with legal packed instructions.
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
@@ -1681,6 +1686,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: Implement
G_FMINIMUM, G_FMAXIMUM}).lower();
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMCPY_INLINE, G_MEMMOVE, G_MEMSET})
+ .lower();
+
getActionDefinitionsBuilder({G_VASTART, G_VAARG, G_BRJT, G_JUMP_TABLE,
G_INDEXED_LOAD, G_INDEXED_SEXTLOAD,
G_INDEXED_ZEXTLOAD, G_INDEXED_STORE})
@@ -1760,6 +1768,9 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeFFloor(MI, MRI, B);
case TargetOpcode::G_BUILD_VECTOR:
return legalizeBuildVector(MI, MRI, B);
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTTZ:
+ return legalizeCTLZ_CTTZ(MI, MRI, B);
default:
return false;
}
@@ -2065,23 +2076,53 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
- assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
+ assert(MRI.getType(Src) == S64);
auto Unmerge = B.buildUnmerge({S32, S32}, Src);
+ auto ThirtyTwo = B.buildConstant(S32, 32);
- auto CvtHi = Signed ?
- B.buildSITOFP(S64, Unmerge.getReg(1)) :
- B.buildUITOFP(S64, Unmerge.getReg(1));
+ if (MRI.getType(Dst) == S64) {
+ auto CvtHi = Signed ? B.buildSITOFP(S64, Unmerge.getReg(1))
+ : B.buildUITOFP(S64, Unmerge.getReg(1));
- auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
+ auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
+ auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
+ .addUse(CvtHi.getReg(0))
+ .addUse(ThirtyTwo.getReg(0));
- auto ThirtyTwo = B.buildConstant(S32, 32);
- auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
- .addUse(CvtHi.getReg(0))
- .addUse(ThirtyTwo.getReg(0));
+ // TODO: Should this propagate fast-math-flags?
+ B.buildFAdd(Dst, LdExp, CvtLo);
+ MI.eraseFromParent();
+ return true;
+ }
- // TODO: Should this propagate fast-math-flags?
- B.buildFAdd(Dst, LdExp, CvtLo);
+ assert(MRI.getType(Dst) == S32);
+
+ auto One = B.buildConstant(S32, 1);
+
+ MachineInstrBuilder ShAmt;
+ if (Signed) {
+ auto ThirtyOne = B.buildConstant(S32, 31);
+ auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1));
+ auto OppositeSign = B.buildAShr(S32, X, ThirtyOne);
+ auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign);
+ auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32},
+ /*HasSideEffects=*/false)
+ .addUse(Unmerge.getReg(1));
+ auto LS2 = B.buildSub(S32, LS, One);
+ ShAmt = B.buildUMin(S32, LS2, MaxShAmt);
+ } else
+ ShAmt = B.buildCTLZ(S32, Unmerge.getReg(1));
+ auto Norm = B.buildShl(S64, Src, ShAmt);
+ auto Unmerge2 = B.buildUnmerge({S32, S32}, Norm);
+ auto Adjust = B.buildUMin(S32, One, Unmerge2.getReg(0));
+ auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
+ auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
+ auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);
+ B.buildIntrinsic(Intrinsic::amdgcn_ldexp, ArrayRef<Register>{Dst},
+ /*HasSideEffects=*/false)
+ .addUse(FVal.getReg(0))
+ .addUse(Scale.getReg(0));
MI.eraseFromParent();
return true;
}
@@ -2183,9 +2224,9 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
- // getConstantVRegValWithLookThrough.
+ // getIConstantVRegValWithLookThrough.
Optional<ValueAndVReg> MaybeIdxVal =
- getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
if (!MaybeIdxVal) // Dynamic case will be selected to register indexing.
return true;
const int64_t IdxVal = MaybeIdxVal->Value.getSExtValue();
@@ -2215,9 +2256,9 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
- // getConstantVRegValWithLookThrough.
+ // getIConstantVRegValWithLookThrough.
Optional<ValueAndVReg> MaybeIdxVal =
- getConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
+ getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
if (!MaybeIdxVal) // Dynamic case will be selected to register indexing.
return true;
@@ -2379,43 +2420,36 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
}
// TODO: We could emit code to handle the initialization somewhere.
- if (!AMDGPUTargetLowering::hasDefinedInitializer(GV)) {
- const SITargetLowering *TLI = ST.getTargetLowering();
- if (!TLI->shouldUseLDSConstAddress(GV)) {
- MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
- return true; // Leave in place;
- }
+ // We ignore the initializer for now and legalize it to allow selection.
+ // The initializer will anyway get errored out during assembly emission.
+ const SITargetLowering *TLI = ST.getTargetLowering();
+ if (!TLI->shouldUseLDSConstAddress(GV)) {
+ MI.getOperand(1).setTargetFlags(SIInstrInfo::MO_ABS32_LO);
+ return true; // Leave in place;
+ }
- if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) {
- Type *Ty = GV->getValueType();
- // HIP uses an unsized array `extern __shared__ T s[]` or similar
- // zero-sized type in other languages to declare the dynamic shared
- // memory which size is not known at the compile time. They will be
- // allocated by the runtime and placed directly after the static
- // allocated ones. They all share the same offset.
- if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
- // Adjust alignment for that dynamic shared memory array.
- MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
- LLT S32 = LLT::scalar(32);
- auto Sz =
- B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
- B.buildIntToPtr(DstReg, Sz);
- MI.eraseFromParent();
- return true;
- }
+ if (AS == AMDGPUAS::LOCAL_ADDRESS && GV->hasExternalLinkage()) {
+ Type *Ty = GV->getValueType();
+ // HIP uses an unsized array `extern __shared__ T s[]` or similar
+ // zero-sized type in other languages to declare the dynamic shared
+ // memory which size is not known at the compile time. They will be
+ // allocated by the runtime and placed directly after the static
+ // allocated ones. They all share the same offset.
+ if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
+ // Adjust alignment for that dynamic shared memory array.
+ MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
+ LLT S32 = LLT::scalar(32);
+ auto Sz =
+ B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
+ B.buildIntToPtr(DstReg, Sz);
+ MI.eraseFromParent();
+ return true;
}
-
- B.buildConstant(
- DstReg,
- MFI->allocateLDSGlobal(B.getDataLayout(), *cast<GlobalVariable>(GV)));
- MI.eraseFromParent();
- return true;
}
- const Function &Fn = MF.getFunction();
- DiagnosticInfoUnsupported BadInit(
- Fn, "unsupported initializer for address space", MI.getDebugLoc());
- Fn.getContext().diagnose(BadInit);
+ B.buildConstant(DstReg, MFI->allocateLDSGlobal(B.getDataLayout(),
+ *cast<GlobalVariable>(GV)));
+ MI.eraseFromParent();
return true;
}
@@ -2446,7 +2480,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
if (Ty.getSizeInBits() == 32) {
- // Truncate if this is a 32-bit constant adrdess.
+ // Truncate if this is a 32-bit constant address.
auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
B.buildExtract(DstReg, Load, 0);
} else
@@ -2745,11 +2779,32 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
return true;
}
+// Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to
+// ctlz/cttz_zero_undef. This allows us to fix up the result for the zero input
+// case with a single min instruction instead of a compare+select.
+bool AMDGPULegalizerInfo::legalizeCTLZ_CTTZ(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ unsigned NewOpc = MI.getOpcode() == AMDGPU::G_CTLZ
+ ? AMDGPU::G_AMDGPU_FFBH_U32
+ : AMDGPU::G_AMDGPU_FFBL_B32;
+ auto Tmp = B.buildInstr(NewOpc, {DstTy}, {Src});
+ B.buildUMin(Dst, Tmp, B.buildConstant(DstTy, SrcTy.getSizeInBits()));
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Check that this is a G_XOR x, -1
static bool isNot(const MachineRegisterInfo &MRI, const MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::G_XOR)
return false;
- auto ConstVal = getConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI);
+ auto ConstVal = getIConstantVRegSExtVal(MI.getOperand(2).getReg(), MRI);
return ConstVal && *ConstVal == -1;
}
@@ -2770,7 +2825,7 @@ verifyCFIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineInstr *&Br,
return nullptr;
// We're deleting the def of this value, so we need to remove it.
- UseMI->eraseFromParent();
+ eraseInstr(*UseMI, MRI);
UseMI = &*MRI.use_instr_nodbg_begin(NegatedCond);
Negated = true;
@@ -2836,6 +2891,20 @@ bool AMDGPULegalizerInfo::loadInputValue(
LLT ArgTy;
std::tie(Arg, ArgRC, ArgTy) = MFI->getPreloadedValue(ArgType);
+ if (!Arg) {
+ if (ArgType == AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR) {
+ // The intrinsic may appear when we have a 0 sized kernarg segment, in which
+ // case the pointer argument may be missing and we use null.
+ B.buildConstant(DstReg, 0);
+ return true;
+ }
+
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ B.buildUndef(DstReg);
+ return true;
+ }
+
if (!Arg->isRegister() || !Arg->getRegister().isValid())
return false; // TODO: Handle these
return loadInputValue(DstReg, B, Arg, ArgRC, ArgTy);
@@ -2913,7 +2982,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B,
B.buildSelect(DstRemReg, Cond, B.buildSub(S32, R, Y), R);
}
-// Build integer reciprocal sequence arounud V_RCP_IFLAG_F32
+// Build integer reciprocal sequence around V_RCP_IFLAG_F32
//
// Return lo, hi of result
//
@@ -2982,7 +3051,6 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B,
auto Add1_Lo = B.buildUAddo(S32, S1, RcpLo, MulHi1_Lo);
auto Add1_Hi = B.buildUAdde(S32, S1, RcpHi, MulHi1_Hi, Add1_Lo.getReg(1));
- auto Add1_HiNc = B.buildAdd(S32, RcpHi, MulHi1_Hi);
auto Add1 = B.buildMerge(S64, {Add1_Lo, Add1_Hi});
auto MulLo2 = B.buildMul(S64, NegDenom, Add1);
@@ -2993,9 +3061,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B,
auto Zero32 = B.buildConstant(S32, 0);
auto Add2_Lo = B.buildUAddo(S32, S1, Add1_Lo, MulHi2_Lo);
- auto Add2_HiC =
- B.buildUAdde(S32, S1, Add1_HiNc, MulHi2_Hi, Add1_Lo.getReg(1));
- auto Add2_Hi = B.buildUAdde(S32, S1, Add2_HiC, Zero32, Add2_Lo.getReg(1));
+ auto Add2_Hi = B.buildUAdde(S32, S1, Add1_Hi, MulHi2_Hi, Add2_Lo.getReg(1));
auto Add2 = B.buildMerge(S64, {Add2_Lo, Add2_Hi});
auto UnmergeNumer = B.buildUnmerge(S32, Numer);
@@ -3701,11 +3767,11 @@ void AMDGPULegalizerInfo::updateBufferMMO(MachineMemOperand *MMO,
unsigned ImmOffset, Register VIndex,
MachineRegisterInfo &MRI) const {
Optional<ValueAndVReg> MaybeVOffsetVal =
- getConstantVRegValWithLookThrough(VOffset, MRI);
+ getIConstantVRegValWithLookThrough(VOffset, MRI);
Optional<ValueAndVReg> MaybeSOffsetVal =
- getConstantVRegValWithLookThrough(SOffset, MRI);
+ getIConstantVRegValWithLookThrough(SOffset, MRI);
Optional<ValueAndVReg> MaybeVIndexVal =
- getConstantVRegValWithLookThrough(VIndex, MRI);
+ getIConstantVRegValWithLookThrough(VIndex, MRI);
// If the combined VOffset + SOffset + ImmOffset + strided VIndex is constant,
// update the MMO with that offset. The stride is unknown so we can only do
// this if VIndex is constant 0.
@@ -4246,8 +4312,8 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI,
/// to exposes all register repacking to the legalizer/combiners. We also don't
/// want a selected instrution entering RegBankSelect. In order to avoid
/// defining a multitude of intermediate image instructions, directly hack on
-/// the intrinsic's arguments. In cases like a16 addreses, this requires padding
-/// now unnecessary arguments with $noreg.
+/// the intrinsic's arguments. In cases like a16 addresses, this requires
+/// padding now unnecessary arguments with $noreg.
bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
@@ -4339,8 +4405,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
if (ConstantLod->isZero() || ConstantLod->isNegative()) {
// Set new opcode to _lz variant of _l, and change the intrinsic ID.
const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
- AMDGPU::getImageDimInstrinsicByBaseOpcode(LZMappingInfo->LZ,
- Intr->Dim);
+ AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
+ Intr->Dim);
// The starting indexes should remain in the same place.
--CorrectedNumVAddrs;
@@ -4518,7 +4584,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MI.getOperand(0).setReg(NewResultReg);
// In the IR, TFE is supposed to be used with a 2 element struct return
- // type. The intruction really returns these two values in one contiguous
+ // type. The instruction really returns these two values in one contiguous
// register, with one additional dword beyond the loaded data. Rewrite the
// return type to use a single register result.
@@ -4730,7 +4796,7 @@ bool AMDGPULegalizerInfo::legalizeTrapHsa(
bool AMDGPULegalizerInfo::legalizeDebugTrapIntrinsic(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
- // Is non-HSA path or trap-handler disabled? then, report a warning
+ // Is non-HSA path or trap-handler disabled? Then, report a warning
// accordingly
if (!ST.isTrapHandlerEnabled() ||
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA) {
@@ -4771,12 +4837,27 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
return false;
}
- bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
- bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
- unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa
- : Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa;
+ const bool IsA16 = MRI.getType(RayDir).getElementType().getSizeInBits() == 16;
+ const bool Is64 = MRI.getType(NodePtr).getSizeInBits() == 64;
+ const unsigned NumVDataDwords = 4;
+ const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
+ const bool UseNSA =
+ ST.hasNSAEncoding() && NumVAddrDwords <= ST.getNSAMaxSize();
+ const unsigned BaseOpcodes[2][2] = {
+ {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
+ {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
+ AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+ int Opcode;
+ if (UseNSA) {
+ Opcode =
+ AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10NSA,
+ NumVDataDwords, NumVAddrDwords);
+ } else {
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
+ PowerOf2Ceil(NumVAddrDwords));
+ }
+ assert(Opcode != -1);
SmallVector<Register, 12> Ops;
if (Is64) {
@@ -4813,6 +4894,14 @@ bool AMDGPULegalizerInfo::legalizeBVHIntrinsic(MachineInstr &MI,
packLanes(RayInvDir);
}
+ if (!UseNSA) {
+ // Build a single vector containing all the operands so far prepared.
+ LLT OpTy = LLT::fixed_vector(Ops.size(), 32);
+ Register MergedOps = B.buildMerge(OpTy, Ops).getReg(0);
+ Ops.clear();
+ Ops.push_back(MergedOps);
+ }
+
auto MIB = B.buildInstr(AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY)
.addDef(DstReg)
.addImm(Opcode);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index d4fefd89b487..7faf0436f995 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -89,6 +89,8 @@ public:
bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeCTLZ_CTTZ(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
const ArgDescriptor *Arg,
@@ -107,8 +109,8 @@ public:
Register Den) const;
void legalizeUnsignedDIV_REM64Impl(MachineIRBuilder &B, Register DstDivReg,
- Register DstRemReg, Register Numer,
- Register Denom) const;
+ Register DstRemReg, Register Num,
+ Register Den) const;
bool legalizeSignedDIV_REM(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
index 1ee6933bd7ff..49cf6db5197f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp
@@ -54,15 +54,14 @@ private:
bool useNativeFunc(const StringRef F) const;
- // Return a pointer (pointer expr) to the function if function defintion with
+ // Return a pointer (pointer expr) to the function if function definition with
// "FuncName" exists. It may create a new function prototype in pre-link mode.
FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
// Replace a normal function with its native version.
bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
- bool parseFunctionName(const StringRef& FMangledName,
- FuncInfo *FInfo=nullptr /*out*/);
+ bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
@@ -87,9 +86,9 @@ private:
bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
// evaluate calls if calls' arguments are constants.
- bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
+ bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
- bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
+ bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
// exp
bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
@@ -116,7 +115,8 @@ private:
bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
// __read_pipe/__write_pipe
- bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
+ bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
+ const FuncInfo &FInfo);
// llvm.amdgcn.wavefrontsize
bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
@@ -125,7 +125,7 @@ private:
BasicBlock::iterator getEntryIns(CallInst * UI);
// Insert an Alloc instruction.
AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
- // Get a scalar native builtin signle argument FP function
+ // Get a scalar native builtin single argument FP function
FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
protected:
@@ -466,9 +466,9 @@ FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
: AMDGPULibFunc::getFunction(M, fInfo);
}
-bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
- FuncInfo *FInfo) {
- return AMDGPULibFunc::parse(FMangledName, *FInfo);
+bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
+ FuncInfo &FInfo) {
+ return AMDGPULibFunc::parse(FMangledName, FInfo);
}
bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
@@ -529,7 +529,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
Function *Callee = aCI->getCalledFunction();
FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
+ if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
!(AllNative || useNativeFunc(FInfo.getName()))) {
@@ -558,7 +558,7 @@ bool AMDGPULibCalls::useNative(CallInst *aCI) {
// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
- FuncInfo &FInfo) {
+ const FuncInfo &FInfo) {
auto *Callee = CI->getCalledFunction();
if (!Callee->isDeclaration())
return false;
@@ -567,7 +567,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
auto *M = Callee->getParent();
auto &Ctx = M->getContext();
std::string Name = std::string(Callee->getName());
- auto NumArg = CI->getNumArgOperands();
+ auto NumArg = CI->arg_size();
if (NumArg != 4 && NumArg != 6)
return false;
auto *PacketSize = CI->getArgOperand(NumArg - 2);
@@ -584,7 +584,7 @@ bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
else
PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
- unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
+ unsigned PtrArgLoc = CI->arg_size() - 3;
auto PtrArg = CI->getArgOperand(PtrArgLoc);
unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
@@ -644,11 +644,11 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
}
FuncInfo FInfo;
- if (!parseFunctionName(Callee->getName(), &FInfo))
+ if (!parseFunctionName(Callee->getName(), FInfo))
return false;
// Further check the number of arguments to see if they match.
- if (CI->getNumArgOperands() != FInfo.getNumArgs())
+ if (CI->arg_size() != FInfo.getNumArgs())
return false;
if (TDOFold(CI, FInfo))
@@ -660,7 +660,7 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
return true;
- // Specilized optimizations for each function call
+ // Specialized optimizations for each function call
switch (FInfo.getId()) {
case AMDGPULibFunc::EI_RECIP:
// skip vector function
@@ -1231,7 +1231,7 @@ bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
return false;
}
-// Get a scalar native builtin signle argument FP function
+// Get a scalar native builtin single argument FP function
FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
const FuncInfo &FInfo) {
if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
@@ -1371,8 +1371,7 @@ bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
StringRef CPU = TM->getTargetCPU();
StringRef Features = TM->getTargetFeatureString();
if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
- (Features.empty() ||
- Features.find_insensitive("wavefrontsize") == StringRef::npos))
+ (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
return false;
Function *F = CI->getParent()->getParent();
@@ -1410,7 +1409,7 @@ AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
return Alloc;
}
-bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
+bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
double& Res0, double& Res1,
Constant *copr0, Constant *copr1,
Constant *copr2) {
@@ -1605,8 +1604,8 @@ bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
return false;
}
-bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
- int numArgs = (int)aCI->getNumArgOperands();
+bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
+ int numArgs = (int)aCI->arg_size();
if (numArgs > 3)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 32262ea75fd3..aa7c7ff2e388 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -352,7 +352,7 @@ const unsigned UnmangledFuncInfo::TableSize =
static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
const AMDGPULibFunc::Param (&Leads)[2]) {
AMDGPULibFunc::Param Res = Leads[0];
- // TBD - This switch may require to be extended for other intriniscs
+ // TBD - This switch may require to be extended for other intrinsics
switch (id) {
case AMDGPULibFunc::EI_SINCOS:
Res.PtrKind = AMDGPULibFunc::BYVALUE;
@@ -455,7 +455,8 @@ AMDGPULibFunc::Param ParamIterator::getNextParam() {
break;
}
- default: llvm_unreachable("Unhandeled param rule");
+ default:
+ llvm_unreachable("Unhandled param rule");
}
}
++Index;
@@ -747,7 +748,8 @@ static const char *getItaniumTypeName(AMDGPULibFunc::EType T) {
case AMDGPULibFunc::IMG3D: return "11ocl_image3d";
case AMDGPULibFunc::SAMPLER: return "11ocl_sampler";
case AMDGPULibFunc::EVENT: return "9ocl_event";
- default: llvm_unreachable("Unhandeled param type");
+ default:
+ llvm_unreachable("Unhandled param type");
}
return nullptr;
}
@@ -761,7 +763,7 @@ namespace {
// substitution candidates from the grammar, but are explicitly excluded:
// 1. <builtin-type> other than vendor extended types ..."
-// For the purpose of functions the following productions make sence for the
+// For the purpose of functions the following productions make sense for the
// substitution:
// <type> ::= <builtin-type>
// ::= <class-enum-type>
@@ -774,11 +776,11 @@ namespace {
// using <class-enum-type> production rule they're not used for substitution
// because clang consider them as builtin types.
//
-// DvNN_ type is GCC extension for vectors and is a subject for the substitution.
-
+// DvNN_ type is GCC extension for vectors and is a subject for the
+// substitution.
class ItaniumMangler {
- SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substituions
+ SmallVector<AMDGPULibFunc::Param, 10> Str; // list of accumulated substitutions
bool UseAddrSpace;
int findSubst(const AMDGPULibFunc::Param& P) const {
@@ -902,7 +904,7 @@ static Type* getIntrinsicParamType(
case AMDGPULibFunc::EVENT:
T = StructType::create(C,"ocl_event")->getPointerTo(); break;
default:
- llvm_unreachable("Unhandeled param type");
+ llvm_unreachable("Unhandled param type");
return nullptr;
}
if (P.VectorSize > 1)
@@ -990,10 +992,8 @@ FunctionCallee AMDGPULibFunc::getOrInsertFunction(Module *M,
} else {
AttributeList Attr;
LLVMContext &Ctx = M->getContext();
- Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
- Attribute::ReadOnly);
- Attr = Attr.addAttribute(Ctx, AttributeList::FunctionIndex,
- Attribute::NoUnwind);
+ Attr = Attr.addFnAttribute(Ctx, Attribute::ReadOnly);
+ Attr = Attr.addFnAttribute(Ctx, Attribute::NoUnwind);
C = M->getOrInsertFunction(FuncName, FuncTy, Attr);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
index 714e74faaf13..b700dd5aa301 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
@@ -76,9 +76,8 @@ bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
Intrinsic::ID ID = F.getIntrinsicID();
bool Changed = false;
- for (auto I = F.user_begin(), E = F.user_end(); I != E;) {
- Instruction *Inst = cast<Instruction>(*I);
- ++I;
+ for (User *U : llvm::make_early_inc_range(F.users())) {
+ Instruction *Inst = cast<Instruction>(U);
switch (ID) {
case Intrinsic::memcpy: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 0f157e53c3db..c34c12ab9fec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -82,9 +82,9 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
nullptr, F.getName() + ".kernarg.segment");
- KernArgSegment->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- KernArgSegment->addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
+ KernArgSegment->addRetAttr(Attribute::NonNull);
+ KernArgSegment->addRetAttr(
+ Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
unsigned AS = KernArgSegment->getType()->getPointerAddressSpace();
uint64_t ExplicitArgOffset = 0;
@@ -232,8 +232,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
}
}
- KernArgSegment->addAttribute(
- AttributeList::ReturnIndex,
+ KernArgSegment->addRetAttr(
Attribute::getWithAlignment(Ctx, std::max(KernArgBaseAlign, MaxAlign)));
return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 70ecea8dbc3e..12d6d35a6917 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -42,6 +42,7 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -118,7 +119,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
// The llvm.amdgcn.module.lds instance is implicitly used by all kernels
// that might call a function which accesses a field within it. This is
// presently approximated to 'all kernels' if there are any such functions
- // in the module. This implicit use is reified as an explicit use here so
+ // in the module. This implicit use is redefined as an explicit use here so
// that later passes, specifically PromoteAlloca, account for the required
// memory without any knowledge of this transform.
@@ -162,6 +163,9 @@ public:
bool Changed = processUsedLDS(M);
for (Function &F : M.functions()) {
+ if (F.isDeclaration())
+ continue;
+
// Only lower compute kernels' LDS.
if (!AMDGPU::isKernel(F.getCallingConv()))
continue;
@@ -282,6 +286,21 @@ private:
// so remove the variables from these lists before replaceAllUsesWith
removeFromUsedLists(M, LocalVars);
+ // Create alias.scope and their lists. Each field in the new structure
+ // does not alias with all other fields.
+ SmallVector<MDNode *> AliasScopes;
+ SmallVector<Metadata *> NoAliasList;
+ if (LocalVars.size() > 1) {
+ MDBuilder MDB(Ctx);
+ AliasScopes.reserve(LocalVars.size());
+ MDNode *Domain = MDB.createAnonymousAliasScopeDomain();
+ for (size_t I = 0; I < LocalVars.size(); I++) {
+ MDNode *Scope = MDB.createAnonymousAliasScope(Domain);
+ AliasScopes.push_back(Scope);
+ }
+ NoAliasList.append(&AliasScopes[1], AliasScopes.end());
+ }
+
// Replace uses of ith variable with a constantexpr to the ith field of the
// instance that will be allocated by AMDGPUMachineFunction
Type *I32 = Type::getInt32Ty(Ctx);
@@ -313,7 +332,15 @@ private:
uint64_t Off = DL.getStructLayout(LDSTy)->getElementOffset(I);
Align A = commonAlignment(StructAlign, Off);
- refineUsesAlignment(GEP, A, DL);
+
+ if (I)
+ NoAliasList[I - 1] = AliasScopes[I - 1];
+ MDNode *NoAlias =
+ NoAliasList.empty() ? nullptr : MDNode::get(Ctx, NoAliasList);
+ MDNode *AliasScope =
+ AliasScopes.empty() ? nullptr : MDNode::get(Ctx, {AliasScopes[I]});
+
+ refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias);
}
// Mark kernels with asm that reads the address of the allocated structure
@@ -323,23 +350,39 @@ private:
if (!F) {
IRBuilder<> Builder(Ctx);
SmallPtrSet<Function *, 32> Kernels;
- for (auto &I : M.functions()) {
- Function *Func = &I;
- if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) {
- markUsedByKernel(Builder, Func, SGV);
- Kernels.insert(Func);
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration())
+ continue;
+
+ if (AMDGPU::isKernelCC(&Func) && !Kernels.contains(&Func)) {
+ markUsedByKernel(Builder, &Func, SGV);
+ Kernels.insert(&Func);
}
}
}
return true;
}
- void refineUsesAlignment(Value *Ptr, Align A, const DataLayout &DL,
- unsigned MaxDepth = 5) {
- if (!MaxDepth || A == 1)
+ void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
+ MDNode *AliasScope, MDNode *NoAlias,
+ unsigned MaxDepth = 5) {
+ if (!MaxDepth || (A == 1 && !AliasScope))
return;
for (User *U : Ptr->users()) {
+ if (auto *I = dyn_cast<Instruction>(U)) {
+ if (AliasScope && I->mayReadOrWriteMemory()) {
+ MDNode *AS = I->getMetadata(LLVMContext::MD_alias_scope);
+ AS = (AS ? MDNode::getMostGenericAliasScope(AS, AliasScope)
+ : AliasScope);
+ I->setMetadata(LLVMContext::MD_alias_scope, AS);
+
+ MDNode *NA = I->getMetadata(LLVMContext::MD_noalias);
+ NA = (NA ? MDNode::intersect(NA, NoAlias) : NoAlias);
+ I->setMetadata(LLVMContext::MD_noalias, NA);
+ }
+ }
+
if (auto *LI = dyn_cast<LoadInst>(U)) {
LI->setAlignment(std::max(A, LI->getAlign()));
continue;
@@ -364,17 +407,19 @@ private:
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
APInt Off(BitWidth, 0);
- if (GEP->getPointerOperand() == Ptr &&
- GEP->accumulateConstantOffset(DL, Off)) {
- Align GA = commonAlignment(A, Off.getLimitedValue());
- refineUsesAlignment(GEP, GA, DL, MaxDepth - 1);
+ if (GEP->getPointerOperand() == Ptr) {
+ Align GA;
+ if (GEP->accumulateConstantOffset(DL, Off))
+ GA = commonAlignment(A, Off.getLimitedValue());
+ refineUsesAlignmentAndAA(GEP, GA, DL, AliasScope, NoAlias,
+ MaxDepth - 1);
}
continue;
}
if (auto *I = dyn_cast<Instruction>(U)) {
if (I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::AddrSpaceCast)
- refineUsesAlignment(I, A, DL, MaxDepth - 1);
+ refineUsesAlignmentAndAA(I, A, DL, AliasScope, NoAlias, MaxDepth - 1);
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 3dd27f1996d6..3fad7e192195 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -12,11 +12,11 @@
//===----------------------------------------------------------------------===//
//
+#include "AMDGPUMCInstLower.h"
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "R600AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
@@ -34,36 +34,6 @@
using namespace llvm;
-namespace {
-
-class AMDGPUMCInstLower {
- MCContext &Ctx;
- const TargetSubtargetInfo &ST;
- const AsmPrinter &AP;
-
-public:
- AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
- const AsmPrinter &AP);
-
- bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
-
- /// Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-
-};
-
-class R600MCInstLower : public AMDGPUMCInstLower {
-public:
- R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
- const AsmPrinter &AP);
-
- /// Lower a MachineInstr to an MCInst
- void lower(const MachineInstr *MI, MCInst &OutMI) const;
-};
-
-
-} // End anonymous namespace
-
#include "AMDGPUGenMCPseudoLowering.inc"
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx,
@@ -150,7 +120,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
// FIXME: Should be able to handle this with emitPseudoExpansionLowering. We
// need to select it to the subtarget specific version, and there's no way to
// do that with a single pseudo source operation.
- if (Opcode == AMDGPU::S_SETPC_B64_return)
+ if (Opcode == AMDGPU::S_SETPC_B64_return ||
+ Opcode == AMDGPU::S_SETPC_B64_return_gfx)
Opcode = AMDGPU::S_SETPC_B64;
else if (Opcode == AMDGPU::SI_CALL) {
// SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
@@ -194,30 +165,6 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
return MCInstLowering.lowerOperand(MO, MCOp);
}
-static const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
- const Constant *CV,
- MCContext &OutContext) {
- // TargetMachine does not support llvm-style cast. Use C++-style cast.
- // This is safe since TM is always of type AMDGPUTargetMachine or its
- // derived class.
- auto &AT = static_cast<const AMDGPUTargetMachine&>(TM);
- auto *CE = dyn_cast<ConstantExpr>(CV);
-
- // Lower null pointers in private and local address space.
- // Clang generates addrspacecast for null pointers in private and local
- // address space, which needs to be lowered.
- if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
- auto Op = CE->getOperand(0);
- auto SrcAddr = Op->getType()->getPointerAddressSpace();
- if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
- auto DstAddr = CE->getType()->getPointerAddressSpace();
- return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
- OutContext);
- }
- }
- return nullptr;
-}
-
const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
return E;
@@ -267,12 +214,18 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
+ if (MI->isMetaInstruction()) {
+ if (isVerbose())
+ OutStreamer->emitRawComment(" meta instruction");
+ return;
+ }
+
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
#ifdef EXPENSIVE_CHECKS
- // Sanity-check getInstSizeInBytes on explicitly specified CPUs (it cannot
+ // Check getInstSizeInBytes on explicitly specified CPUs (it cannot
// work correctly for the generic CPU).
//
// The isPseudo check really shouldn't be here, but unfortunately there are
@@ -325,47 +278,3 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
}
}
-
-R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
- const AsmPrinter &AP) :
- AMDGPUMCInstLower(Ctx, ST, AP) { }
-
-void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- OutMI.setOpcode(MI->getOpcode());
- for (const MachineOperand &MO : MI->explicit_operands()) {
- MCOperand MCOp;
- lowerOperand(MO, MCOp);
- OutMI.addOperand(MCOp);
- }
-}
-
-void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
- const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
- R600MCInstLower MCInstLowering(OutContext, STI, *this);
-
- StringRef Err;
- if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
- LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
- C.emitError("Illegal instruction detected: " + Err);
- MI->print(errs());
- }
-
- if (MI->isBundle()) {
- const MachineBasicBlock *MBB = MI->getParent();
- MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
- while (I != MBB->instr_end() && I->isInsideBundle()) {
- emitInstruction(&*I);
- ++I;
- }
- } else {
- MCInst TmpInst;
- MCInstLowering.lower(MI, TmpInst);
- EmitToStreamer(*OutStreamer, TmpInst);
- }
-}
-
-const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
- if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
- return E;
- return AsmPrinter::lowerConstant(CV);
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
new file mode 100644
index 000000000000..0e43b4fe9461
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -0,0 +1,69 @@
+//===- AMDGPUMCInstLower.h - Lower AMDGPU MachineInstr to an MCInst -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Header of lower AMDGPU MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
+
+#include "AMDGPUTargetMachine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+} // namespace llvm
+
+using namespace llvm;
+
+class AMDGPUMCInstLower {
+ MCContext &Ctx;
+ const TargetSubtargetInfo &ST;
+ const AsmPrinter &AP;
+
+public:
+ AMDGPUMCInstLower(MCContext &ctx, const TargetSubtargetInfo &ST,
+ const AsmPrinter &AP);
+
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+ /// Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+namespace {
+static inline const MCExpr *lowerAddrSpaceCast(const TargetMachine &TM,
+ const Constant *CV,
+ MCContext &OutContext) {
+ // TargetMachine does not support llvm-style cast. Use C++-style cast.
+ // This is safe since TM is always of type AMDGPUTargetMachine or its
+ // derived class.
+ auto &AT = static_cast<const AMDGPUTargetMachine &>(TM);
+ auto *CE = dyn_cast<ConstantExpr>(CV);
+
+ // Lower null pointers in private and local address space.
+ // Clang generates addrspacecast for null pointers in private and local
+ // address space, which needs to be lowered.
+ if (CE && CE->getOpcode() == Instruction::AddrSpaceCast) {
+ auto Op = CE->getOperand(0);
+ auto SrcAddr = Op->getType()->getPointerAddressSpace();
+ if (Op->isNullValue() && AT.getNullPointerValue(SrcAddr) == 0) {
+ auto DstAddr = CE->getType()->getPointerAddressSpace();
+ return MCConstantExpr::create(AT.getNullPointerValue(DstAddr),
+ OutContext);
+ }
+ }
+ return nullptr;
+}
+} // namespace
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
index a61f1f7b8182..47faa6c72481 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h
@@ -16,18 +16,12 @@
#ifndef LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H
#define LLVM_LIB_TARGET_AMDGPUMIRFORMATTER_H
-#include "llvm/ADT/Optional.h"
#include "llvm/CodeGen/MIRFormatter.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstdint>
namespace llvm {
class MachineFunction;
-class MachineInstr;
struct PerFunctionMIParsingState;
-struct SlotMapping;
class AMDGPUMIRFormatter final : public MIRFormatter {
public:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index 697513b5db7a..5d4b007f11e6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -194,7 +194,7 @@ bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
}
void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
- assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
+ assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exists");
PHISourcesT EmptySet;
PHIInfoElementT *NewElement = new PHIInfoElementT();
NewElement->DestReg = DestReg;
@@ -606,7 +606,7 @@ MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
DenseMap<MachineRegion *, RegionMRT *> &RegionMap) {
for (auto &MFI : MF) {
MachineBasicBlock *ExitMBB = &MFI;
- if (ExitMBB->succ_size() == 0) {
+ if (ExitMBB->succ_empty()) {
return ExitMBB;
}
}
@@ -748,10 +748,8 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
// If we have a successor with a PHI, source coming from this MBB we have to
// add the register as live out
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end();
- SI != E; ++SI) {
- for (auto &II : *(*SI)) {
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ for (auto &II : *Succ) {
if (II.isPHI()) {
MachineInstr &PHI = II;
int numPreds = getPHINumInputs(PHI);
@@ -760,7 +758,7 @@ void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
unsigned PHIReg = getPHISourceReg(PHI, i);
LLVM_DEBUG(dbgs()
<< "Add LiveOut (PhiSource " << printMBBReference(*MBB)
- << " -> " << printMBBReference(*(*SI))
+ << " -> " << printMBBReference(*Succ)
<< "): " << printReg(PHIReg, TRI) << "\n");
addLiveOut(PHIReg);
}
@@ -813,7 +811,7 @@ void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
} else {
LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
// We should be limited to only store registers that are live out from the
- // lineaized region
+ // linearized region
for (auto MBBI : SubRegion->MBBs) {
storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
}
@@ -896,7 +894,7 @@ void LinearizedRegion::replaceRegister(unsigned Register,
assert(Register != NewRegister && "Cannot replace a reg with itself");
LLVM_DEBUG(
- dbgs() << "Pepareing to replace register (region): "
+ dbgs() << "Preparing to replace register (region): "
<< printReg(Register, MRI->getTargetRegisterInfo()) << " with "
<< printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
@@ -1073,7 +1071,6 @@ private:
const SIInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineRegisterInfo *MRI;
- unsigned BBSelectRegister;
PHILinearize PHIInfo;
DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
RegionMRT *RMRT;
@@ -1125,8 +1122,6 @@ private:
void transformSimpleIfRegion(RegionMRT *Region);
- void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
-
void insertUnconditionalBranch(MachineBasicBlock *MBB,
MachineBasicBlock *Dest,
const DebugLoc &DL = DebugLoc());
@@ -1238,11 +1233,7 @@ bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
return false;
}
- for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
- E = Entry->succ_end();
- SI != E; ++SI) {
- MachineBasicBlock *Current = *SI;
-
+ for (MachineBasicBlock *Current : Entry->successors()) {
if (Current == Succ) {
FoundBypass = true;
} else if ((Current->succ_size() == 1) &&
@@ -1280,10 +1271,7 @@ static void fixRegionTerminator(RegionMRT *Region) {
auto Exit = LRegion->getExit();
SmallPtrSet<MachineBasicBlock *, 2> Successors;
- for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(),
- SE = Exit->succ_end();
- SI != SE; ++SI) {
- MachineBasicBlock *Succ = *SI;
+ for (MachineBasicBlock *Succ : Exit->successors()) {
if (LRegion->contains(Succ)) {
// Do not allow re-assign
assert(InternalSucc == nullptr);
@@ -1404,7 +1392,7 @@ void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
MachineInstr &Instr = *I;
if (Instr.isPHI()) {
unsigned PHIDestReg = getPHIDestReg(Instr);
- LLVM_DEBUG(dbgs() << "Extractking killed phi:\n");
+ LLVM_DEBUG(dbgs() << "Extracting killed phi:\n");
LLVM_DEBUG(Instr.dump());
PHIs.insert(&Instr);
PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
@@ -1589,11 +1577,9 @@ void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
// Check if register is live out of the basic block
MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
- for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) {
- if ((*UI).getParent()->getParent() != DefMBB) {
+ for (const MachineOperand &MO : MRI->use_operands(Reg))
+ if (MO.getParent()->getParent() != DefMBB)
IsDead = false;
- }
- }
LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
<< (IsDead ? "dead" : "alive")
@@ -1686,7 +1672,7 @@ void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *
static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) {
MachineBasicBlock *result = nullptr;
for (auto &MFI : MF) {
- if (MFI.succ_size() == 0) {
+ if (MFI.succ_empty()) {
if (result == nullptr) {
result = &MFI;
} else {
@@ -1770,34 +1756,27 @@ static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
MachineBasicBlock *EndMBB) {
- // We have to check against the StartMBB successor becasuse a
+ // We have to check against the StartMBB successor because a
// structurized region with a loop will have the entry block split,
// and the backedge will go to the entry successor.
DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
unsigned SuccSize = StartMBB->succ_size();
if (SuccSize > 0) {
MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
- for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(),
- E = EndMBB->succ_end();
- PI != E; ++PI) {
+ for (MachineBasicBlock *Succ : EndMBB->successors()) {
// Either we have a back-edge to the entry block, or a back-edge to the
// successor of the entry block since the block may be split.
- if ((*PI) != StartMBB &&
- !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
+ if (Succ != StartMBB &&
+ !(Succ == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
Succs.insert(
- std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI));
+ std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, Succ));
}
}
}
- for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(),
- E = StartMBB->pred_end();
- PI != E; ++PI) {
- if ((*PI) != EndMBB) {
- Succs.insert(
- std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB));
- }
- }
+ for (MachineBasicBlock *Pred : StartMBB->predecessors())
+ if (Pred != EndMBB)
+ Succs.insert(std::make_pair(Pred, StartMBB));
for (auto SI : Succs) {
std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
@@ -1815,14 +1794,9 @@ MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock();
if (InheritPreds) {
- for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(),
- E = CodeBBStart->pred_end();
- PI != E; ++PI) {
- if ((*PI) != CodeBBEnd) {
- MachineBasicBlock *Pred = (*PI);
+ for (MachineBasicBlock *Pred : CodeBBStart->predecessors())
+ if (Pred != CodeBBEnd)
Pred->addSuccessor(IfBB);
- }
- }
}
removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
@@ -1872,9 +1846,8 @@ void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
return;
Register CondReg = Cond[0].getReg();
- for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
- (*UI).setIsKill(false);
- }
+ for (MachineOperand &MO : MRI->use_operands(CondReg))
+ MO.setIsKill(false);
}
void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
@@ -2018,7 +1991,7 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
if (!containsDef(CodeBB, InnerRegion, LI) ||
(!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
- // If the register simly lives through the CodeBB, we don't have
+ // If the register simply lives through the CodeBB, we don't have
// to rewrite anything since the register is not defined in this
// part of the code.
LLVM_DEBUG(dbgs() << "- through");
@@ -2028,14 +2001,14 @@ void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
unsigned Reg = LI;
if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
// If the register is live out, we do want to create a phi,
- // unless it is from the Exit block, becasuse in that case there
+ // unless it is from the Exit block, because in that case there
// is already a PHI, and no need to create a new one.
// If the register is just a live out def and not part of a phi
// chain, we need to create a PHI node to handle the if region,
// and replace all uses outside of the region with the new dest
// register, unless it is the outgoing BB select register. We have
- // already creaed phi nodes for these.
+ // already created phi nodes for these.
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
Register PHIDestReg = MRI->createVirtualRegister(RegClass);
Register IfSourceReg = MRI->createVirtualRegister(RegClass);
@@ -2569,11 +2542,9 @@ static void removeOldExitPreds(RegionMRT *Region) {
static bool mbbHasBackEdge(MachineBasicBlock *MBB,
SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
- for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
- if (MBBs.contains(*SI)) {
+ for (MachineBasicBlock *Succ : MBB->successors())
+ if (MBBs.contains(Succ))
return true;
- }
- }
return false;
}
@@ -2591,11 +2562,9 @@ static bool containsNewBackedge(MRT *Tree,
}
} else {
RegionMRT *Region = Tree->getRegionMRT();
- SetVector<MRT *> *Children = Region->getChildren();
- for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) {
- if (containsNewBackedge(*CI, MBBs))
+ for (MRT *C : llvm::reverse(*Region->getChildren()))
+ if (containsNewBackedge(C, MBBs))
return true;
- }
}
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index d27eb68ca74b..5a5a5d213a1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -13,7 +13,7 @@
//
// In LLVM CodeGen the runtime-handle metadata will be translated to
// RuntimeHandle metadata in code object. Runtime allocates a global buffer
-// for each kernel with RuntimeHandel metadata and saves the kernel address
+// for each kernel with RuntimeHandle metadata and saves the kernel address
// required for the AQL packet into the buffer. __enqueue_kernel function
// in device library knows that the invoke function pointer in the block
// literal is actually runtime handle and loads the kernel address from it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 728be811afae..fc984d2dda64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUCombinerHelper.h"
#include "AMDGPULegalizerInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,10 +35,11 @@ protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
public:
- AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+ AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
+ AMDGPUCombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
struct FMinFMaxLegacyInfo {
@@ -257,12 +259,12 @@ bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
class AMDGPUPostLegalizerCombinerHelperState {
protected:
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
public:
AMDGPUPostLegalizerCombinerHelperState(
- CombinerHelper &Helper,
+ AMDGPUCombinerHelper &Helper,
AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
: Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
};
@@ -300,7 +302,7 @@ public:
bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
+ AMDGPUCombinerHelper Helper(Observer, B, KB, MDT, LInfo);
AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
PostLegalizerHelper);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index 13f09ab8f164..c029046ab65f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUCombinerHelper.h"
#include "AMDGPULegalizerInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
@@ -34,10 +35,11 @@ protected:
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
public:
- AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
+ AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B,
+ AMDGPUCombinerHelper &Helper)
: B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
struct ClampI64ToI16MatchInfo {
@@ -154,12 +156,12 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
class AMDGPUPreLegalizerCombinerHelperState {
protected:
- CombinerHelper &Helper;
+ AMDGPUCombinerHelper &Helper;
AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper;
public:
AMDGPUPreLegalizerCombinerHelperState(
- CombinerHelper &Helper,
+ AMDGPUCombinerHelper &Helper,
AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
: Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
};
@@ -196,17 +198,15 @@ public:
bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, KB, MDT);
+ AMDGPUCombinerHelper Helper(Observer, B, KB, MDT);
AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
PreLegalizerHelper);
- if (Generated.tryCombineAll(Observer, MI, B, Helper))
+ if (Generated.tryCombineAll(Observer, MI, B))
return true;
switch (MI.getOpcode()) {
- case TargetOpcode::G_MEMCPY_INLINE:
- return Helper.tryEmitMemcpyInline(MI);
case TargetOpcode::G_CONCAT_VECTORS:
return Helper.tryCombineConcatVectors(MI);
case TargetOpcode::G_SHUFFLE_VECTOR:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index 7b6959b56145..d560d2043f42 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -149,11 +149,11 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
IRBuilder<> Builder(Ctx);
Type *I32Ty = Type::getInt32Ty(Ctx);
unsigned UniqID = 0;
- // NB: This is important for this string size to be divizable by 4
+ // NB: This is important for this string size to be divisible by 4
const char NonLiteralStr[4] = "???";
for (auto CI : Printfs) {
- unsigned NumOps = CI->getNumArgOperands();
+ unsigned NumOps = CI->arg_size();
SmallString<16> OpConvSpecifiers;
Value *Op = CI->getArgOperand(0);
@@ -201,10 +201,10 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
std::string AStreamHolder;
raw_string_ostream Sizes(AStreamHolder);
int Sum = DWORD_ALIGN;
- Sizes << CI->getNumArgOperands() - 1;
+ Sizes << CI->arg_size() - 1;
Sizes << ':';
- for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
- ArgCount <= OpConvSpecifiers.size();
+ for (unsigned ArgCount = 1;
+ ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
ArgCount++) {
Value *Arg = CI->getArgOperand(ArgCount);
Type *ArgType = Arg->getType();
@@ -330,7 +330,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
- std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str();
+ std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str();
MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
// Instead of creating global variables, the
@@ -389,8 +389,8 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
Type *Int32Ty = Type::getInt32Ty(Ctx);
Type *Int64Ty = Type::getInt64Ty(Ctx);
- for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
- ArgCount <= OpConvSpecifiers.size();
+ for (unsigned ArgCount = 1;
+ ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
ArgCount++) {
Value *Arg = CI->getArgOperand(ArgCount);
Type *ArgType = Arg->getType();
@@ -524,7 +524,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
<< *StBuff << '\n');
(void)StBuff;
- if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
+ if (I + 1 == E && ArgCount + 1 == CI->arg_size())
break;
BufferIdx = GetElementPtrInst::Create(I8Ty, BufferIdx, BuffOffset,
"PrintBuffNextPtr", Brnch);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 3f1f21a33f7e..3ec5dd7e0eff 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -21,6 +21,7 @@
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include "Utils/AMDGPUBaseInfo.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -176,6 +177,10 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
if (IsAMDGCN) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+ // A non-entry function has only 32 caller preserved registers.
+ // Do not promote alloca which will force spilling.
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ MaxVGPRs = std::min(MaxVGPRs, 32u);
} else {
MaxVGPRs = 128;
}
@@ -200,7 +205,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F) {
std::pair<Value *, Value *>
AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
- const Function &F = *Builder.GetInsertBlock()->getParent();
+ Function &F = *Builder.GetInsertBlock()->getParent();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
if (!IsAMDHSA) {
@@ -256,11 +261,12 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
= Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
- DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- DispatchPtr->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ DispatchPtr->addRetAttr(Attribute::NoAlias);
+ DispatchPtr->addRetAttr(Attribute::NonNull);
+ F.removeFnAttr("amdgpu-no-dispatch-ptr");
// Size of the dispatch packet struct.
- DispatchPtr->addDereferenceableAttr(AttributeList::ReturnIndex, 64);
+ DispatchPtr->addDereferenceableRetAttr(64);
Type *I32Ty = Type::getInt32Ty(Mod->getContext());
Value *CastDispatchPtr = Builder.CreateBitCast(
@@ -268,7 +274,7 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
// We could do a single 64-bit load here, but it's likely that the basic
// 32-bit and extract sequence is already present, and it is probably easier
- // to CSE this. The loads should be mergable later anyway.
+ // to CSE this. The loads should be mergeable later anyway.
Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, Align(4));
@@ -288,23 +294,27 @@ AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
unsigned N) {
- const AMDGPUSubtarget &ST =
- AMDGPUSubtarget::get(TM, *Builder.GetInsertBlock()->getParent());
+ Function *F = Builder.GetInsertBlock()->getParent();
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *F);
Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+ StringRef AttrName;
switch (N) {
case 0:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
: (Intrinsic::ID)Intrinsic::r600_read_tidig_x;
+ AttrName = "amdgpu-no-workitem-id-x";
break;
case 1:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
: (Intrinsic::ID)Intrinsic::r600_read_tidig_y;
+ AttrName = "amdgpu-no-workitem-id-y";
break;
case 2:
IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
: (Intrinsic::ID)Intrinsic::r600_read_tidig_z;
+ AttrName = "amdgpu-no-workitem-id-z";
break;
default:
llvm_unreachable("invalid dimension");
@@ -313,6 +323,7 @@ Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
CallInst *CI = Builder.CreateCall(WorkitemIdFn);
ST.makeLIDRangeMetadata(CI);
+ F->removeFnAttr(AttrName);
return CI;
}
@@ -1065,9 +1076,9 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
MI->getRawSource(), MI->getSourceAlign(),
MI->getLength(), MI->isVolatile());
- for (unsigned I = 1; I != 3; ++I) {
- if (uint64_t Bytes = Intr->getDereferenceableBytes(I)) {
- B->addDereferenceableAttr(I, Bytes);
+ for (unsigned I = 0; I != 2; ++I) {
+ if (uint64_t Bytes = Intr->getParamDereferenceableBytes(I)) {
+ B->addDereferenceableParamAttr(I, Bytes);
}
}
@@ -1101,6 +1112,10 @@ bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+ // A non-entry function has only 32 caller preserved registers.
+ // Do not promote alloca which will force spilling.
+ if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ MaxVGPRs = std::min(MaxVGPRs, 32u);
} else {
MaxVGPRs = 128;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
new file mode 100644
index 000000000000..01d03d17ec47
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
@@ -0,0 +1,195 @@
+//===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass recursively promotes generic pointer arguments of a kernel
+/// into the global address space.
+///
+/// The pass walks kernel's pointer arguments, then loads from them. If a loaded
+/// value is a pointer and loaded pointer is unmodified in the kernel before the
+/// load, then promote loaded pointer to global. Then recursively continue.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUPromoteKernelArguments : public FunctionPass {
+ MemorySSA *MSSA;
+
+ Instruction *ArgCastInsertPt;
+
+ SmallVector<Value *> Ptrs;
+
+ void enqueueUsers(Value *Ptr);
+
+ bool promotePointer(Value *Ptr);
+
+public:
+ static char ID;
+
+ AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
+
+ bool run(Function &F, MemorySSA &MSSA);
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.setPreservesAll();
+ }
+};
+
+} // end anonymous namespace
+
+void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
+ SmallVector<User *> PtrUsers(Ptr->users());
+
+ while (!PtrUsers.empty()) {
+ Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
+ if (!U)
+ continue;
+
+ switch (U->getOpcode()) {
+ default:
+ break;
+ case Instruction::Load: {
+ LoadInst *LD = cast<LoadInst>(U);
+ PointerType *PT = dyn_cast<PointerType>(LD->getType());
+ if (!PT ||
+ (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) ||
+ LD->getPointerOperand()->stripInBoundsOffsets() != Ptr)
+ break;
+ const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(LD);
+ // TODO: This load poprobably can be promoted to constant address space.
+ if (MSSA->isLiveOnEntryDef(MA))
+ Ptrs.push_back(LD);
+ break;
+ }
+ case Instruction::GetElementPtr:
+ case Instruction::AddrSpaceCast:
+ case Instruction::BitCast:
+ if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
+ PtrUsers.append(U->user_begin(), U->user_end());
+ break;
+ }
+ }
+}
+
+bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
+ enqueueUsers(Ptr);
+
+ PointerType *PT = cast<PointerType>(Ptr->getType());
+ if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
+ return false;
+
+ bool IsArg = isa<Argument>(Ptr);
+ IRBuilder<> B(IsArg ? ArgCastInsertPt
+ : &*std::next(cast<Instruction>(Ptr)->getIterator()));
+
+ // Cast pointer to global address space and back to flat and let
+ // Infer Address Spaces pass to do all necessary rewriting.
+ PointerType *NewPT =
+ PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
+ Value *Cast =
+ B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
+ Value *CastBack =
+ B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
+ Ptr->replaceUsesWithIf(CastBack,
+ [Cast](Use &U) { return U.getUser() != Cast; });
+
+ return true;
+}
+
+// skip allocas
+static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
+ BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
+ for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
+
+ // If this is a dynamic alloca, the value may depend on the loaded kernargs,
+ // so loads will need to be inserted before it.
+ if (!AI || !AI->isStaticAlloca())
+ break;
+ }
+
+ return InsPt;
+}
+
+bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA) {
+ if (skipFunction(F))
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
+ return false;
+
+ ArgCastInsertPt = &*getInsertPt(*F.begin());
+ this->MSSA = &MSSA;
+
+ for (Argument &Arg : F.args()) {
+ if (Arg.use_empty())
+ continue;
+
+ PointerType *PT = dyn_cast<PointerType>(Arg.getType());
+ if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
+ PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
+ continue;
+
+ Ptrs.push_back(&Arg);
+ }
+
+ bool Changed = false;
+ while (!Ptrs.empty()) {
+ Value *Ptr = Ptrs.pop_back_val();
+ Changed |= promotePointer(Ptr);
+ }
+
+ return Changed;
+}
+
+bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
+ MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ return run(F, MSSA);
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
+ "AMDGPU Promote Kernel Arguments", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
+ "AMDGPU Promote Kernel Arguments", false, false)
+
+char AMDGPUPromoteKernelArguments::ID = 0;
+
+FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
+ return new AMDGPUPromoteKernelArguments();
+}
+
+PreservedAnalyses
+AMDGPUPromoteKernelArgumentsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ if (AMDGPUPromoteKernelArguments().run(F, MSSA)) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
+ }
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index 0e4c26170a8f..dafbeaeaec52 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -9,7 +9,7 @@
/// \file
/// \brief This pass propagates attributes from kernels to the non-entry
/// functions. Most of the library functions were not compiled for specific ABI,
-/// yet will be correctly compiled if proper attrbutes are propagated from the
+/// yet will be correctly compiled if proper attributes are propagated from the
/// caller.
///
/// The pass analyzes call graph and propagates ABI target features through the
@@ -17,7 +17,7 @@
///
/// It can run in two modes: as a function or module pass. A function pass
/// simply propagates attributes. A module pass clones functions if there are
-/// callers with different ABI. If a function is clonned all call sites will
+/// callers with different ABI. If a function is cloned all call sites will
/// be updated to use a correct clone.
///
/// A function pass is limited in functionality but can run early in the
@@ -55,10 +55,7 @@ static constexpr const FeatureBitset TargetFeatures = {
// Attributes to propagate.
// TODO: Support conservative min/max merging instead of cloning.
-static constexpr const char* AttributeNames[] = {
- "amdgpu-waves-per-eu",
- "amdgpu-flat-work-group-size"
-};
+static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
static constexpr unsigned NumAttr =
sizeof(AttributeNames) / sizeof(AttributeNames[0]);
@@ -149,7 +146,7 @@ public:
bool process(Module &M);
};
-// Allows to propagate attributes early, but no clonning is allowed as it must
+// Allows to propagate attributes early, but no cloning is allowed as it must
// be a function pass to run before any optimizations.
// TODO: We shall only need a one instance of module pass, but that needs to be
// in the linker pipeline which is currently not possible.
@@ -168,7 +165,7 @@ public:
bool runOnFunction(Function &F) override;
};
-// Allows to propagate attributes with clonning but does that late in the
+// Allows to propagate attributes with cloning but does that late in the
// pipeline.
class AMDGPUPropagateAttributesLate : public ModulePass {
const TargetMachine *TM;
@@ -212,10 +209,10 @@ AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
bool AMDGPUPropagateAttributes::process(Module &M) {
for (auto &F : M.functions())
- if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ if (AMDGPU::isKernel(F.getCallingConv()))
Roots.insert(&F);
- return process();
+ return Roots.empty() ? false : process();
}
bool AMDGPUPropagateAttributes::process(Function &F) {
@@ -228,8 +225,7 @@ bool AMDGPUPropagateAttributes::process() {
SmallSet<Function *, 32> NewRoots;
SmallSet<Function *, 32> Replaced;
- if (Roots.empty())
- return false;
+ assert(!Roots.empty());
Module &M = *(*Roots.begin())->getParent();
do {
@@ -273,7 +269,7 @@ bool AMDGPUPropagateAttributes::process() {
if (!NewF) {
const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
if (!AllowClone) {
- // This may set different features on different iteartions if
+ // This may set different features on different iterations if
// there is a contradiction in callers' attributes. In this case
// we rely on a second pass running on Module, which is allowed
// to clone.
@@ -383,7 +379,7 @@ bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
TM = &TPC->getTM<TargetMachine>();
}
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ if (!AMDGPU::isKernel(F.getCallingConv()))
return false;
return AMDGPUPropagateAttributes(TM, false).process(F);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index 4e12e5cd8f65..d7dc9ee4117b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -57,9 +57,9 @@ public:
MinMaxMedOpc getMinMaxPair(unsigned Opc);
- template <class m_Cst>
+ template <class m_Cst, typename CstTy>
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
- Register &Val, Register &K0, Register &K1);
+ Register &Val, CstTy &K0, CstTy &K1);
bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
@@ -83,11 +83,11 @@ AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
}
}
-template <class m_Cst>
+template <class m_Cst, typename CstTy>
bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
MachineRegisterInfo &MRI,
MinMaxMedOpc MMMOpc, Register &Val,
- Register &K0, Register &K1) {
+ CstTy &K0, CstTy &K1) {
// 4 operand commutes of: min(max(Val, K0), K1).
// Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
// Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
@@ -115,19 +115,18 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
return false;
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
- Register Val, K0, K1;
+ Register Val;
+ Optional<ValueAndVReg> K0, K1;
// Match min(max(Val, K0), K1) or max(min(Val, K1), K0). Then see if K0 <= K1.
- if (!matchMed<ICstRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
+ if (!matchMed<GCstAndRegMatch>(MI, MRI, OpcodeTriple, Val, K0, K1))
return false;
- const APInt &K0_Imm = getConstantIntVRegVal(K0, MRI)->getValue();
- const APInt &K1_Imm = getConstantIntVRegVal(K1, MRI)->getValue();
- if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0_Imm.sgt(K1_Imm))
+ if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_SMED3 && K0->Value.sgt(K1->Value))
return false;
- if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0_Imm.ugt(K1_Imm))
+ if (OpcodeTriple.Med == AMDGPU::G_AMDGPU_UMED3 && K0->Value.ugt(K1->Value))
return false;
- MatchInfo = {OpcodeTriple.Med, Val, K0, K1};
+ MatchInfo = {OpcodeTriple.Med, Val, K0->VReg, K1->VReg};
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 0e4005627e02..ab3ce980c3f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -58,7 +58,7 @@
///
/// We avoid trying to solve this problem in RegBankSelect. Any VALU G_*
/// operation should have its source operands all mapped to VGPRs (except for
-/// VCC), inserting copies from any SGPR operands. This the most trival legal
+/// VCC), inserting copies from any SGPR operands. This the most trivial legal
/// mapping. Anything beyond the simplest 1:1 instruction selection would be too
/// complicated to solve here. Every optimization pattern or instruction
/// selected to multiple outputs would have to enforce this rule, and there
@@ -118,7 +118,7 @@ public:
Opc == AMDGPU::G_SEXT) {
// LegalizerHelper wants to use the basic legalization artifacts when
// widening etc. We don't handle selection with vcc in artifact sources,
- // so we need to use a sslect instead to handle these properly.
+ // so we need to use a select instead to handle these properly.
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, MRI, *RBI.TRI);
@@ -282,7 +282,7 @@ AMDGPURegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
// VCC-like use.
if (TRI->isSGPRClass(&RC)) {
// FIXME: This probably came from a copy from a physical register, which
- // should be inferrrable from the copied to-type. We don't have many boolean
+ // should be inferable from the copied to-type. We don't have many boolean
// physical register constraints so just assume a normal SGPR for now.
if (!Ty.isValid())
return AMDGPU::SGPRRegBank;
@@ -734,23 +734,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
const int OrigRangeSize = std::distance(Range.begin(), Range.end());
#endif
- for (MachineInstr &MI : Range) {
- for (MachineOperand &Def : MI.defs()) {
- if (MRI.use_nodbg_empty(Def.getReg()))
- continue;
-
- LLT ResTy = MRI.getType(Def.getReg());
- const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
- ResultRegs.push_back(Def.getReg());
- Register InitReg = B.buildUndef(ResTy).getReg(0);
- Register PhiReg = MRI.createGenericVirtualRegister(ResTy);
- InitResultRegs.push_back(InitReg);
- PhiRegs.push_back(PhiReg);
- MRI.setRegBank(PhiReg, *DefBank);
- MRI.setRegBank(InitReg, *DefBank);
- }
- }
-
Register SaveExecReg = MRI.createVirtualRegister(WaveRC);
Register InitSaveExecReg = MRI.createVirtualRegister(WaveRC);
@@ -894,23 +877,26 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
bool Is64 = OpSize % 64 == 0;
- LLT UnmergeTy = OpSize % 64 == 0 ? LLT::scalar(64) : LLT::scalar(32);
- unsigned CmpOp = OpSize % 64 == 0 ? AMDGPU::V_CMP_EQ_U64_e64
- : AMDGPU::V_CMP_EQ_U32_e64;
-
- // The compares can be done as 64-bit, but the extract needs to be done
- // in 32-bit pieces.
+ unsigned UnmergeTySize = Is64 ? 64 : 32;
+ unsigned CmpOp =
+ Is64 ? AMDGPU::V_CMP_EQ_U64_e64 : AMDGPU::V_CMP_EQ_U32_e64;
// Insert the unmerge before the loop.
B.setMBB(MBB);
- auto Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
+ unsigned NumPieces = OpSize / UnmergeTySize;
+ SmallVector<Register, 8> UnmergePieces;
+ if (NumPieces == 1) {
+ UnmergePieces.push_back(OpReg);
+ } else {
+ LLT UnmergeTy = LLT::scalar(UnmergeTySize);
+ MachineInstrBuilder Unmerge = B.buildUnmerge(UnmergeTy, OpReg);
+ for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx)
+ UnmergePieces.push_back(Unmerge.getReg(PieceIdx));
+ }
B.setInstr(*I);
- unsigned NumPieces = Unmerge->getNumOperands() - 1;
- for (unsigned PieceIdx = 0; PieceIdx != NumPieces; ++PieceIdx) {
- Register UnmergePiece = Unmerge.getReg(PieceIdx);
-
+ for (Register UnmergePiece : UnmergePieces) {
Register CurrentLaneOpReg;
if (Is64) {
Register CurrentLaneOpRegLo = MRI.createGenericVirtualRegister(S32);
@@ -985,12 +971,14 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
if (OpTy.isVector()) {
auto Merge = B.buildBuildVector(OpTy, ReadlanePieces);
Op.setReg(Merge.getReg(0));
- } else {
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ } else if (ReadlanePieces.size() > 1) {
auto Merge = B.buildMerge(OpTy, ReadlanePieces);
Op.setReg(Merge.getReg(0));
+ MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
+ } else {
+ Op.setReg(ReadlanePieces[0]);
}
-
- MRI.setRegBank(Op.getReg(), AMDGPU::SGPRRegBank);
}
// Make sure we don't re-process this register again.
@@ -998,8 +986,6 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
}
}
- B.setInsertPt(*LoopBB, LoopBB->end());
-
// Update EXEC, save the original EXEC value to VCC.
B.buildInstr(AndSaveExecOpc)
.addDef(NewExec)
@@ -1007,6 +993,8 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MRI.setSimpleHint(NewExec, CondReg);
+ B.setInsertPt(*LoopBB, LoopBB->end());
+
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
B.buildInstr(XorTermOpc)
.addDef(ExecReg)
@@ -1017,8 +1005,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
// s_cbranch_scc0?
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
- B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
- .addMBB(LoopBB);
+ B.buildInstr(AMDGPU::SI_WATERFALL_LOOP).addMBB(LoopBB);
// Save the EXEC mask before the loop.
BuildMI(MBB, MBB.end(), DL, TII->get(MovTermOpc), SaveExecReg)
@@ -1336,7 +1323,7 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
const LLT S32 = LLT::scalar(32);
MachineRegisterInfo *MRI = B.getMRI();
- if (Optional<int64_t> Imm = getConstantVRegSExtVal(CombinedOffset, *MRI)) {
+ if (Optional<int64_t> Imm = getIConstantVRegSExtVal(CombinedOffset, *MRI)) {
uint32_t SOffset, ImmOffset;
if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget,
Alignment)) {
@@ -1430,7 +1417,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
OffsetBank == &AMDGPU::SGPRRegBank)
return true; // Legal mapping
- // FIXME: 96-bit case was widened during legalize. We neeed to narrow it back
+ // FIXME: 96-bit case was widened during legalize. We need to narrow it back
// here but don't have an MMO.
unsigned LoadSize = Ty.getSizeInBits();
@@ -1455,7 +1442,7 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
VOffset, SOffset, ImmOffset, Alignment);
// TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
- // can, but we neeed to track an MMO for that.
+ // can, but we need to track an MMO for that.
const unsigned MemSize = (Ty.getSizeInBits() + 7) / 8;
const Align MemAlign(4); // FIXME: ABI type alignment?
MachineMemOperand *BaseMMO = MF.getMachineMemOperand(
@@ -1569,7 +1556,7 @@ bool AMDGPURegisterBankInfo::applyMappingBFE(const OperandsMapper &OpdMapper,
// A 64-bit bitfield extract uses the 32-bit bitfield extract instructions
// if the width is a constant.
- if (auto ConstWidth = getConstantVRegValWithLookThrough(WidthReg, MRI)) {
+ if (auto ConstWidth = getIConstantVRegValWithLookThrough(WidthReg, MRI)) {
// Use the 32-bit bitfield extract instruction if the width is a constant.
// Depending on the width size, use either the low or high 32-bits.
auto Zero = B.buildConstant(S32, 0);
@@ -1775,97 +1762,6 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
return {BaseReg, C1};
}
-static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
- int64_t C;
- return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
-}
-
-static unsigned extractCPol(unsigned CachePolicy) {
- return CachePolicy & AMDGPU::CPol::ALL;
-}
-
-static unsigned extractSWZ(unsigned CachePolicy) {
- return (CachePolicy >> 3) & 1;
-}
-
-
-MachineInstr *
-AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
- MachineInstr &MI) const {
- MachineRegisterInfo &MRI = *B.getMRI();
- executeInWaterfallLoop(B, MI, MRI, {2, 4});
-
- // FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
-
- Register VData = MI.getOperand(1).getReg();
- LLT Ty = MRI.getType(VData);
-
- int EltSize = Ty.getScalarSizeInBits();
- int Size = Ty.getSizeInBits();
-
- // FIXME: Broken integer truncstore.
- if (EltSize != 32)
- report_fatal_error("unhandled intrinsic store");
-
- // FIXME: Verifier should enforce 1 MMO for these intrinsics.
- const int MemSize = (*MI.memoperands_begin())->getSize();
-
-
- Register RSrc = MI.getOperand(2).getReg();
- Register VOffset = MI.getOperand(3).getReg();
- Register SOffset = MI.getOperand(4).getReg();
- unsigned CachePolicy = MI.getOperand(5).getImm();
-
- unsigned ImmOffset;
- std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
-
- const bool Offen = !isZero(VOffset, MRI);
-
- unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
- switch (8 * MemSize) {
- case 8:
- Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
- AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
- break;
- case 16:
- Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
- AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
- break;
- default:
- Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
- AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
- if (Size > 32)
- Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
- break;
- }
-
-
- // Set the insertion point back to the instruction in case it was moved into a
- // loop.
- B.setInstr(MI);
-
- MachineInstrBuilder MIB = B.buildInstr(Opc)
- .addUse(VData);
-
- if (Offen)
- MIB.addUse(VOffset);
-
- MIB.addUse(RSrc)
- .addUse(SOffset)
- .addImm(ImmOffset)
- .addImm(extractCPol(CachePolicy))
- .addImm(0) // tfe: FIXME: Remove from inst
- .addImm(extractSWZ(CachePolicy))
- .cloneMemRefs(MI);
-
- // FIXME: We need a way to report failure from applyMappingImpl.
- // Insert constrain copies before inserting the loop.
- if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
- report_fatal_error("failed to constrain selected store intrinsic");
-
- return MIB;
-}
-
bool AMDGPURegisterBankInfo::buildVCopy(MachineIRBuilder &B, Register DstReg,
Register SrcReg) const {
MachineRegisterInfo &MRI = *B.getMRI();
@@ -2153,7 +2049,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
// The standard handling only considers the result register bank for
// phis. For VCC, blindly inserting a copy when the phi is lowered will
// produce an invalid copy. We can only copy with some kind of compare to
- // get a vector boolean result. Insert a regitser bank copy that will be
+ // get a vector boolean result. Insert a register bank copy that will be
// correctly lowered to a compare.
MachineIRBuilder B(*MI.getParent()->getParent());
@@ -2491,9 +2387,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
return;
}
case AMDGPU::G_CTPOP:
- case AMDGPU::G_BITREVERSE:
- case AMDGPU::G_CTLZ_ZERO_UNDEF:
- case AMDGPU::G_CTTZ_ZERO_UNDEF: {
+ case AMDGPU::G_BITREVERSE: {
const RegisterBank *DstBank =
OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
if (DstBank == &AMDGPU::SGPRRegBank)
@@ -2515,6 +2409,48 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
llvm_unreachable("narrowScalar should have succeeded");
return;
}
+ case AMDGPU::G_AMDGPU_FFBH_U32:
+ case AMDGPU::G_AMDGPU_FFBL_B32:
+ case AMDGPU::G_CTLZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF: {
+ const RegisterBank *DstBank =
+ OpdMapper.getInstrMapping().getOperandMapping(0).BreakDown[0].RegBank;
+ if (DstBank == &AMDGPU::SGPRRegBank)
+ break;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ const LLT S32 = LLT::scalar(32);
+ LLT Ty = MRI.getType(SrcReg);
+ if (Ty == S32)
+ break;
+
+ // We can narrow this more efficiently than Helper can by using ffbh/ffbl
+ // which return -1 when the input is zero:
+ // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
+ // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
+ // (ffbh hi:lo) -> (umin (ffbh hi), (uaddsat (ffbh lo), 32))
+ // (ffbl hi:lo) -> (umin (uaddsat (ffbh hi), 32), (ffbh lo))
+ ApplyRegBankMapping ApplyVALU(*this, MRI, &AMDGPU::VGPRRegBank);
+ MachineIRBuilder B(MI, ApplyVALU);
+ SmallVector<Register, 2> SrcRegs(OpdMapper.getVRegs(1));
+ unsigned NewOpc = Opc == AMDGPU::G_CTLZ_ZERO_UNDEF
+ ? (unsigned)AMDGPU::G_AMDGPU_FFBH_U32
+ : Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
+ ? (unsigned)AMDGPU::G_AMDGPU_FFBL_B32
+ : Opc;
+ unsigned Idx = NewOpc == AMDGPU::G_AMDGPU_FFBH_U32;
+ auto X = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx]});
+ auto Y = B.buildInstr(NewOpc, {S32}, {SrcRegs[Idx ^ 1]});
+ unsigned AddOpc =
+ Opc == AMDGPU::G_CTLZ_ZERO_UNDEF || Opc == AMDGPU::G_CTTZ_ZERO_UNDEF
+ ? AMDGPU::G_ADD
+ : AMDGPU::G_UADDSAT;
+ Y = B.buildInstr(AddOpc, {S32}, {Y, B.buildConstant(S32, 32)});
+ Register DstReg = MI.getOperand(0).getReg();
+ B.buildUMin(DstReg, X, Y);
+ MI.eraseFromParent();
+ return;
+ }
case AMDGPU::G_SEXT:
case AMDGPU::G_ZEXT:
case AMDGPU::G_ANYEXT: {
@@ -3034,6 +2970,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
}
case AMDGPU::G_AMDGPU_INTRIN_BVH_INTERSECT_RAY: {
unsigned N = MI.getNumExplicitOperands() - 2;
+ applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, { N });
return;
}
@@ -3095,6 +3032,101 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
}
break;
}
+ case AMDGPU::G_SI_CALL: {
+ // Use a set to avoid extra readfirstlanes in the case where multiple
+ // operands are the same register.
+ SmallSet<Register, 4> SGPROperandRegs;
+
+ if (!collectWaterfallOperands(SGPROperandRegs, MI, MRI, {1}))
+ break;
+
+ // Move all copies to physical SGPRs that are used by the call instruction
+ // into the loop block. Start searching for these copies until the
+ // ADJCALLSTACKUP.
+ unsigned FrameSetupOpcode = AMDGPU::ADJCALLSTACKUP;
+ unsigned FrameDestroyOpcode = AMDGPU::ADJCALLSTACKDOWN;
+
+ // Move all non-copies before the copies, so that a complete range can be
+ // moved into the waterfall loop.
+ SmallVector<MachineInstr *, 4> NonCopyInstrs;
+ // Count of NonCopyInstrs found until the current LastCopy.
+ unsigned NonCopyInstrsLen = 0;
+ MachineBasicBlock::iterator Start(&MI);
+ MachineBasicBlock::iterator LastCopy = Start;
+ MachineBasicBlock *MBB = MI.getParent();
+ const SIMachineFunctionInfo *Info =
+ MBB->getParent()->getInfo<SIMachineFunctionInfo>();
+ while (Start->getOpcode() != FrameSetupOpcode) {
+ --Start;
+ bool IsCopy = false;
+ if (Start->getOpcode() == AMDGPU::COPY) {
+ auto &Dst = Start->getOperand(0);
+ if (Dst.isReg()) {
+ Register Reg = Dst.getReg();
+ if (Reg.isPhysical() && MI.readsRegister(Reg, TRI)) {
+ IsCopy = true;
+ } else {
+ // Also move the copy from the scratch rsrc descriptor into the loop
+ // to allow it to be optimized away.
+ auto &Src = Start->getOperand(1);
+ if (Src.isReg()) {
+ Reg = Src.getReg();
+ IsCopy = Info->getScratchRSrcReg() == Reg;
+ }
+ }
+ }
+ }
+
+ if (IsCopy) {
+ LastCopy = Start;
+ NonCopyInstrsLen = NonCopyInstrs.size();
+ } else {
+ NonCopyInstrs.push_back(&*Start);
+ }
+ }
+ NonCopyInstrs.resize(NonCopyInstrsLen);
+
+ for (auto *NonCopy : reverse(NonCopyInstrs)) {
+ MBB->splice(LastCopy, MBB, NonCopy->getIterator());
+ }
+ Start = LastCopy;
+
+ // Do the same for copies after the loop
+ NonCopyInstrs.clear();
+ NonCopyInstrsLen = 0;
+ MachineBasicBlock::iterator End(&MI);
+ LastCopy = End;
+ while (End->getOpcode() != FrameDestroyOpcode) {
+ ++End;
+ bool IsCopy = false;
+ if (End->getOpcode() == AMDGPU::COPY) {
+ auto &Src = End->getOperand(1);
+ if (Src.isReg()) {
+ Register Reg = Src.getReg();
+ IsCopy = Reg.isPhysical() && MI.modifiesRegister(Reg, TRI);
+ }
+ }
+
+ if (IsCopy) {
+ LastCopy = End;
+ NonCopyInstrsLen = NonCopyInstrs.size();
+ } else {
+ NonCopyInstrs.push_back(&*End);
+ }
+ }
+ NonCopyInstrs.resize(NonCopyInstrsLen);
+
+ End = LastCopy;
+ ++LastCopy;
+ for (auto *NonCopy : reverse(NonCopyInstrs)) {
+ MBB->splice(LastCopy, MBB, NonCopy->getIterator());
+ }
+
+ ++End;
+ MachineIRBuilder B(*Start);
+ executeInWaterfallLoop(B, make_range(Start, End), SGPROperandRegs, MRI);
+ break;
+ }
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD: {
@@ -3290,7 +3322,7 @@ AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
}
-/// Return the mapping for a pointer arugment.
+/// Return the mapping for a pointer argument.
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getValueMappingForPtr(const MachineRegisterInfo &MRI,
Register PtrReg) const {
@@ -3620,7 +3652,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
case AMDGPU::G_FSHR: // TODO: Expand for scalar
- case AMDGPU::G_AMDGPU_FFBH_U32:
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
case AMDGPU::G_AMDGPU_FMAX_LEGACY:
case AMDGPU::G_AMDGPU_RCP_IFLAG:
@@ -3726,8 +3757,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size);
break;
}
+ case AMDGPU::G_AMDGPU_FFBH_U32:
+ case AMDGPU::G_AMDGPU_FFBL_B32:
case AMDGPU::G_CTLZ_ZERO_UNDEF:
- case AMDGPU::G_CTTZ_ZERO_UNDEF:
+ case AMDGPU::G_CTTZ_ZERO_UNDEF: {
+ unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
+ OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32);
+ OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID, Size);
+ break;
+ }
case AMDGPU::G_CTPOP: {
unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI);
@@ -4033,6 +4072,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_mbcnt_hi:
case Intrinsic::amdgcn_mul_u24:
case Intrinsic::amdgcn_mul_i24:
+ case Intrinsic::amdgcn_mulhi_u24:
+ case Intrinsic::amdgcn_mulhi_i24:
case Intrinsic::amdgcn_lerp:
case Intrinsic::amdgcn_sad_u8:
case Intrinsic::amdgcn_msad_u8:
@@ -4254,8 +4295,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
unsigned N = MI.getNumExplicitOperands() - 2;
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 128);
OpdsMapping[N] = getSGPROpMapping(MI.getOperand(N).getReg(), MRI, *TRI);
- for (unsigned I = 2; I < N; ++I)
- OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ if (N == 3) {
+ // Sequential form: all operands combined into VGPR256/VGPR512
+ unsigned Size = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ if (Size > 256)
+ Size = 512;
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
+ } else {
+ // NSA form
+ for (unsigned I = 2; I < N; ++I)
+ OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
+ }
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
@@ -4447,6 +4497,23 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ case AMDGPU::G_SI_CALL: {
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
+ // Lie and claim everything is legal, even though some need to be
+ // SGPRs. applyMapping will have to deal with it as a waterfall loop.
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+
+ // Allow anything for implicit arguments
+ for (unsigned I = 4; I < MI.getNumOperands(); ++I) {
+ if (MI.getOperand(I).isReg()) {
+ Register Reg = MI.getOperand(I).getReg();
+ auto OpBank = getRegBankID(Reg, MRI);
+ unsigned Size = getSizeInBits(Reg, MRI, *TRI);
+ OpdsMapping[I] = AMDGPU::getValueMapping(OpBank, Size);
+ }
+ }
+ break;
+ }
case AMDGPU::G_LOAD:
case AMDGPU::G_ZEXTLOAD:
case AMDGPU::G_SEXTLOAD:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index 7e051e4a5424..2b9d0923ab49 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -89,9 +89,6 @@ public:
std::pair<Register, unsigned>
splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
- MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
- MachineInstr &MI) const;
-
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
index dabb4d006d99..d55bf3917e9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
@@ -130,11 +130,9 @@ class ReplaceLDSUseImpl {
std::vector<GlobalVariable *> LDSGlobals = AMDGPU::findVariablesToLower(M);
// Remove LDS which don't qualify for replacement.
- LDSGlobals.erase(std::remove_if(LDSGlobals.begin(), LDSGlobals.end(),
- [&](GlobalVariable *GV) {
- return shouldIgnorePointerReplacement(GV);
- }),
- LDSGlobals.end());
+ llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
+ return shouldIgnorePointerReplacement(GV);
+ });
return LDSGlobals;
}
@@ -142,7 +140,7 @@ class ReplaceLDSUseImpl {
// Returns true if uses of given LDS global within non-kernel functions should
// be keep as it is without pointer replacement.
bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
- // LDS whose size is very small and doesn`t exceed pointer size is not worth
+ // LDS whose size is very small and doesn't exceed pointer size is not worth
// replacing.
if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
return true;
@@ -158,7 +156,7 @@ class ReplaceLDSUseImpl {
// Insert new global LDS pointer which points to LDS.
GlobalVariable *createLDSPointer(GlobalVariable *GV) {
- // LDS pointer which points to LDS is already created? return it.
+ // LDS pointer which points to LDS is already created? Return it.
auto PointerEntry = LDSToPointer.insert(std::make_pair(GV, nullptr));
if (!PointerEntry.second)
return PointerEntry.first->second;
@@ -185,7 +183,7 @@ class ReplaceLDSUseImpl {
// Split entry basic block in such a way that only lane 0 of each wave does
// the LDS pointer initialization, and return newly created basic block.
BasicBlock *activateLaneZero(Function *K) {
- // If the entry basic block of kernel K is already splitted, then return
+ // If the entry basic block of kernel K is already split, then return
// newly created basic block.
auto BasicBlockEntry = KernelToInitBB.insert(std::make_pair(K, nullptr));
if (!BasicBlockEntry.second)
@@ -204,7 +202,7 @@ class ReplaceLDSUseImpl {
BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();
- // Mark that the entry basic block of kernel K is splitted.
+ // Mark that the entry basic block of kernel K is split.
KernelToInitBB[K] = NBB;
return NBB;
@@ -235,7 +233,7 @@ class ReplaceLDSUseImpl {
}
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
- // within all relevent kernels. Now replace all the uses of LDS within
+ // within all relevant kernels. Now replace all the uses of LDS within
// non-kernel functions by LDS pointer.
void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
SmallVector<User *, 8> LDSUsers(GV->users());
@@ -268,8 +266,8 @@ class ReplaceLDSUseImpl {
convertConstantExprsToInstructions(I, CE, &UserInsts);
}
- // Go through all the user instrutions, if LDS exist within them as an
- // operand, then replace it by replace instruction.
+ // Go through all the user instructions, if LDS exist within them as
+ // an operand, then replace it by replace instruction.
for (auto *II : UserInsts) {
auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
II->replaceUsesOfWith(GV, ReplaceInst);
@@ -373,7 +371,7 @@ bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
return false;
// We have created an LDS pointer for LDS, and initialized it to point-to LDS
- // within all relevent kernels. Now replace all the uses of LDS within
+ // within all relevant kernels. Now replace all the uses of LDS within
// non-kernel functions by LDS pointer.
replaceLDSUseByPointer(GV, LDSPointer);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index ef46e53b7460..cb511e5e3483 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -29,6 +29,8 @@
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -61,7 +63,8 @@ static const Function *getCalleeFunction(const MachineOperand &Op) {
assert(Op.getImm() == 0);
return nullptr;
}
-
+ if (auto *GA = dyn_cast<GlobalAlias>(Op.getGlobal()))
+ return cast<Function>(GA->getOperand(0));
return cast<Function>(Op.getGlobal());
}
@@ -83,10 +86,15 @@ int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumSGPRs(
}
int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
+ const GCNSubtarget &ST, int32_t ArgNumAGPR, int32_t ArgNumVGPR) const {
+ if (ST.hasGFX90AInsts() && ArgNumAGPR)
+ return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
+ return std::max(ArgNumVGPR, ArgNumAGPR);
+}
+
+int32_t AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo::getTotalNumVGPRs(
const GCNSubtarget &ST) const {
- if (ST.hasGFX90AInsts() && NumAGPR)
- return alignTo(NumVGPR, 4) + NumAGPR;
- return std::max(NumVGPR, NumAGPR);
+ return getTotalNumVGPRs(ST, NumAGPR, NumVGPR);
}
bool AMDGPUResourceUsageAnalysis::runOnSCC(CallGraphSCC &SCC) {
@@ -444,6 +452,25 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
if (!IsIndirect)
I = CallGraphResourceInfo.find(Callee);
+ // FIXME: Call site could have norecurse on it
+ if (!Callee || !Callee->doesNotRecurse()) {
+ Info.HasRecursion = true;
+
+ // TODO: If we happen to know there is no stack usage in the
+ // callgraph, we don't need to assume an infinitely growing stack.
+ if (!MI.isReturn()) {
+ // We don't need to assume an unknown stack size for tail calls.
+
+ // FIXME: This only benefits in the case where the kernel does not
+ // directly call the tail called function. If a kernel directly
+ // calls a tail recursive function, we'll assume maximum stack size
+ // based on the regular call instruction.
+ CalleeFrameSize =
+ std::max(CalleeFrameSize,
+ static_cast<uint64_t>(AssumedStackSizeForExternalCall));
+ }
+ }
+
if (IsIndirect || I == CallGraphResourceInfo.end()) {
CalleeFrameSize =
std::max(CalleeFrameSize,
@@ -468,10 +495,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
Info.HasRecursion |= I->second.HasRecursion;
Info.HasIndirectCall |= I->second.HasIndirectCall;
}
-
- // FIXME: Call site could have norecurse on it
- if (!Callee || !Callee->doesNotRecurse())
- Info.HasRecursion = true;
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
index 832e8119e444..b0a2d3bffc62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h
@@ -17,7 +17,6 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/IR/ValueMap.h"
namespace llvm {
@@ -44,6 +43,10 @@ public:
bool HasIndirectCall = false;
int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const;
+ // Total number of VGPRs is actually a combination of AGPR and VGPR
+ // depending on architecture - and some alignment constraints
+ int32_t getTotalNumVGPRs(const GCNSubtarget &ST, int32_t NumAGPR,
+ int32_t NumVGPR) const;
int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const;
};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index e2aafa25142e..45f7c2f369bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -142,8 +142,8 @@ bool AMDGPURewriteOutArguments::checkArgumentUses(Value &Arg) const {
// Special case handle structs with single members. It is useful to handle
// some casts between structs and non-structs, but we can't bitcast
- // directly between them. directly bitcast between them. Blender uses
- // some casts that look like { <3 x float> }* to <4 x float>*
+ // directly between them. Blender uses some casts that look like
+ // { <3 x float> }* to <4 x float>*
if ((SrcEltTy->isStructTy() && (SrcEltTy->getStructNumElements() != 1)))
return false;
@@ -259,7 +259,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
// Keep retrying if we are able to successfully eliminate an argument. This
// helps with cases with multiple arguments which may alias, such as in a
- // sincos implemntation. If we have 2 stores to arguments, on the first
+ // sincos implementation. If we have 2 stores to arguments, on the first
// attempt the MDA query will succeed for the second store but not the
// first. On the second iteration we've removed that out clobbering argument
// (by effectively moving it into another function) and will find the second
@@ -357,7 +357,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
RetAttrs.addAttribute(Attribute::SExt);
RetAttrs.addAttribute(Attribute::ZExt);
RetAttrs.addAttribute(Attribute::NoAlias);
- NewFunc->removeAttributes(AttributeList::ReturnIndex, RetAttrs);
+ NewFunc->removeRetAttrs(RetAttrs);
// TODO: How to preserve metadata?
// Move the body of the function into the new rewritten function, and replace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 0c5020dccecd..0655b4342ba1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -12,12 +12,12 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUSubtarget.h"
-#include "AMDGPU.h"
#include "AMDGPUCallLowering.h"
#include "AMDGPUInstructionSelector.h"
#include "AMDGPULegalizerInfo.h"
#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallString.h"
@@ -38,10 +38,7 @@ using namespace llvm;
#define GET_SUBTARGETINFO_CTOR
#define AMDGPUSubtarget GCNSubtarget
#include "AMDGPUGenSubtargetInfo.inc"
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
#undef AMDGPUSubtarget
-#include "R600GenSubtargetInfo.inc"
static cl::opt<bool> DisablePowerSched(
"amdgpu-disable-power-sched",
@@ -64,19 +61,6 @@ static cl::opt<bool> UseAA("amdgpu-use-aa-in-codegen",
GCNSubtarget::~GCNSubtarget() = default;
-R600Subtarget &
-R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
- StringRef GPU, StringRef FS) {
- SmallString<256> FullFS("+promote-alloca,");
- FullFS += FS;
- ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
-
- HasMulU24 = getGeneration() >= EVERGREEN;
- HasMulI24 = hasCaymanISA();
-
- return *this;
-}
-
GCNSubtarget &
GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
@@ -98,12 +82,12 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
FullFS += "+enable-prt-strict-null,"; // This is overridden by a disable in FS
// Disable mutually exclusive bits.
- if (FS.find_insensitive("+wavefrontsize") != StringRef::npos) {
- if (FS.find_insensitive("wavefrontsize16") == StringRef::npos)
+ if (FS.contains_insensitive("+wavefrontsize")) {
+ if (!FS.contains_insensitive("wavefrontsize16"))
FullFS += "-wavefrontsize16,";
- if (FS.find_insensitive("wavefrontsize32") == StringRef::npos)
+ if (!FS.contains_insensitive("wavefrontsize32"))
FullFS += "-wavefrontsize32,";
- if (FS.find_insensitive("wavefrontsize64") == StringRef::npos)
+ if (!FS.contains_insensitive("wavefrontsize64"))
FullFS += "-wavefrontsize64,";
}
@@ -549,13 +533,10 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
}
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
- const Function &F) const {
+ const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
// Default minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
- // Default/requested minimum/maximum flat work group sizes.
- std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
-
// If minimum/maximum flat work group sizes were explicitly requested using
// "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
// number of waves per execution unit to values implied by requested
@@ -563,8 +544,6 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
unsigned MinImpliedByFlatWorkGroupSize =
getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
Default.first = MinImpliedByFlatWorkGroupSize;
- bool RequestedFlatWorkGroupSize =
- F.hasFnAttribute("amdgpu-flat-work-group-size");
// Requested minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
@@ -581,8 +560,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
// Make sure requested values are compatible with values implied by requested
// minimum/maximum flat work group sizes.
- if (RequestedFlatWorkGroupSize &&
- Requested.first < MinImpliedByFlatWorkGroupSize)
+ if (Requested.first < MinImpliedByFlatWorkGroupSize)
return Default;
return Requested;
@@ -710,6 +688,7 @@ unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
if (ImplicitBytes != 0) {
const Align Alignment = getAlignmentForImplicitArgPtr();
TotalSize = alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+ MaxAlign = std::max(MaxAlign, Alignment);
}
// Being able to dereference past the end is useful for emitting scalar loads.
@@ -721,23 +700,6 @@ AMDGPUDwarfFlavour AMDGPUSubtarget::getAMDGPUDwarfFlavour() const {
: AMDGPUDwarfFlavour::Wave64;
}
-R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const TargetMachine &TM) :
- R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/GPU, FS),
- AMDGPUSubtarget(TT),
- InstrInfo(*this),
- FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- FMA(false),
- CaymanISA(false),
- CFALUBug(false),
- HasVertexCache(false),
- R600ALUInst(false),
- FP64(false),
- TexVTXClauseSize(0),
- Gen(R600),
- TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
- InstrItins(getInstrItineraryForCPU(GPU)) { }
-
void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const {
// Track register pressure so the scheduler can try to decrease
@@ -805,7 +767,7 @@ GCNSubtarget::getBaseReservedNumSGPRs(const bool HasFlatScratchInit) const {
if (getGeneration() >= AMDGPUSubtarget::GFX10)
return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
- if (HasFlatScratchInit) {
+ if (HasFlatScratchInit || HasArchitectedFlatScratch) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
if (getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
@@ -1003,6 +965,13 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
--Lat;
}
Dep.setLatency(Lat);
+ } else if (Dep.getLatency() == 0 && Dep.getReg() == AMDGPU::VCC_LO) {
+ // Work around the fact that SIInstrInfo::fixImplicitOperands modifies
+ // implicit operands which come from the MCInstrDesc, which can fool
+ // ScheduleDAGInstrs::addPhysRegDataDeps into treating them as implicit
+ // pseudo operands.
+ Dep.setLatency(InstrInfo.getSchedModel().computeOperandLatency(
+ DefI, DefOpIdx, UseI, UseOpIdx));
}
}
@@ -1052,7 +1021,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
return true;
}
- // Link as much SALU intructions in chain as possible. Return the size
+ // Link as many SALU instructions in chain as possible. Return the size
// of the chain. Links up to MaxChain instructions.
unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
SmallPtrSetImpl<SUnit *> &Visited) const {
@@ -1136,6 +1105,11 @@ void GCNSubtarget::getPostRAMutations(
Mutations.push_back(std::make_unique<FillMFMAShadowMutation>(&InstrInfo));
}
+std::unique_ptr<ScheduleDAGMutation>
+GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
+ return std::make_unique<FillMFMAShadowMutation>(&InstrInfo);
+}
+
const AMDGPUSubtarget &AMDGPUSubtarget::get(const MachineFunction &MF) {
if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
return static_cast<const AMDGPUSubtarget&>(MF.getSubtarget<GCNSubtarget>());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index b160cdf3a97a..88ed4b2b7a24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -91,7 +91,18 @@ public:
/// be converted to integer, violate subtarget's specifications, or are not
/// compatible with minimum/maximum number of waves limited by flat work group
/// size, register usage, and/or lds usage.
- std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+ std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const {
+ // Default/requested minimum/maximum flat work group sizes.
+ std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
+ return getWavesPerEU(F, FlatWorkGroupSizes);
+ }
+
+ /// Overload which uses the specified values for the flat work group sizes,
+ /// rather than querying the function itself. \p FlatWorkGroupSizes Should
+ /// correspond to the function's value for getFlatWorkGroupSizes.
+ std::pair<unsigned, unsigned>
+ getWavesPerEU(const Function &F,
+ std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
@@ -240,7 +251,7 @@ public:
uint64_t getExplicitKernArgSize(const Function &F, Align &MaxAlign) const;
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const;
- /// \returns Corresponsing DWARF register number mapping flavour for the
+ /// \returns Corresponding DWARF register number mapping flavour for the
/// \p WavefrontSize.
AMDGPUDwarfFlavour getAMDGPUDwarfFlavour() const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index e4485f87fb79..de11676279f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -8,7 +8,7 @@
//
/// \file
/// The AMDGPU target machine contains all of the hardware specific
-/// information needed to emit code for R600 and SI GPUs.
+/// information needed to emit code for SI+ GPUs.
//
//===----------------------------------------------------------------------===//
@@ -21,7 +21,8 @@
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
-#include "R600MachineScheduler.h"
+#include "R600.h"
+#include "R600TargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
@@ -35,11 +36,13 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -162,12 +165,6 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR(
"fast", "fast register allocator", createFastVGPRRegisterAllocator);
}
-
-static cl::opt<bool> EnableR600StructurizeCFG(
- "r600-ir-structurize",
- cl::desc("Use StructurizeCFG IR pass"),
- cl::init(true));
-
static cl::opt<bool> EnableSROA(
"amdgpu-sroa",
cl::desc("Run SROA after promote alloca pass"),
@@ -184,12 +181,6 @@ OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
cl::desc("Run pre-RA exec mask optimizations"),
cl::init(true));
-static cl::opt<bool> EnableR600IfConvert(
- "r600-if-convert",
- cl::desc("Use if conversion pass"),
- cl::ReallyHidden,
- cl::init(true));
-
// Option to disable vectorizer for tests.
static cl::opt<bool> EnableLoadStoreVectorizer(
"amdgpu-load-store-vectorizer",
@@ -240,13 +231,6 @@ static cl::opt<bool, true> LateCFGStructurize(
cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
cl::Hidden);
-static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
- "amdgpu-function-calls",
- cl::desc("Enable AMDGPU function call support"),
- cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
- cl::init(true),
- cl::Hidden);
-
static cl::opt<bool, true> EnableAMDGPUFixedFunctionABIOpt(
"amdgpu-fixed-function-abi",
cl::desc("Enable all implicit function arguments"),
@@ -324,6 +308,11 @@ static cl::opt<bool> EnablePreRAOptimizations(
cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
cl::Hidden);
+static cl::opt<bool> EnablePromoteKernelArguments(
+ "amdgpu-enable-promote-kernel-arguments",
+ cl::desc("Enable promotion of flat kernel pointer arguments to global"),
+ cl::Hidden, cl::init(true));
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
@@ -349,6 +338,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIOptimizeVGPRLiveRangePass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
initializeAMDGPUFixFunctionBitcastsPass(*PR);
+ initializeAMDGPUCtorDtorLoweringPass(*PR);
initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAttributorPass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
@@ -356,6 +346,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUArgumentUsageInfoPass(*PR);
initializeAMDGPUAtomicOptimizerPass(*PR);
initializeAMDGPULowerKernelArgumentsPass(*PR);
+ initializeAMDGPUPromoteKernelArgumentsPass(*PR);
initializeAMDGPULowerKernelAttributesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
@@ -400,10 +391,6 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
return std::make_unique<AMDGPUTargetObjectFile>();
}
-static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
-}
-
static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
return new SIScheduleDAGMI(C);
}
@@ -441,10 +428,6 @@ createIterativeILPMachineScheduler(MachineSchedContext *C) {
}
static MachineSchedRegistry
-R600SchedRegistry("r600", "Run R600's custom scheduler",
- createR600MachineScheduler);
-
-static MachineSchedRegistry
SISchedRegistry("si", "Run SI's custom scheduler",
createSIMachineScheduler);
@@ -542,7 +525,9 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
/// Predicate for Internalize pass.
static bool mustPreserveGV(const GlobalValue &GV) {
if (const Function *F = dyn_cast<Function>(&GV))
- return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
+ return F->isDeclaration() || F->getName().startswith("__asan_") ||
+ F->getName().startswith("__sanitizer_") ||
+ AMDGPU::isEntryFunctionCC(F->getCallingConv());
GV.removeDeadConstantUsers();
return !GV.use_empty();
@@ -556,6 +541,8 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
+ bool PromoteKernelArguments =
+ EnablePromoteKernelArguments && getOptLevel() > CodeGenOpt::Less;
if (EnableFunctionCalls) {
delete Builder.Inliner;
@@ -597,7 +584,14 @@ void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
Builder.addExtension(
PassManagerBuilder::EP_CGSCCOptimizerLate,
- [EnableOpt](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
+ [EnableOpt, PromoteKernelArguments](const PassManagerBuilder &,
+ legacy::PassManagerBase &PM) {
+ // Add promote kernel arguments pass to the opt pipeline right before
+ // infer address spaces which is needed to do actual address space
+ // rewriting.
+ if (PromoteKernelArguments)
+ PM.add(createAMDGPUPromoteKernelArgumentsPass());
+
// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
PM.add(createInferAddressSpacesPass());
@@ -674,6 +668,10 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
return true;
}
+ if (PassName == "amdgpu-promote-kernel-arguments") {
+ PM.addPass(AMDGPUPromoteKernelArgumentsPass());
+ return true;
+ }
return false;
});
@@ -690,19 +688,18 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineStartEPCallback(
- [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
FPM.addPass(AMDGPUUseNativeCallsPass());
- if (EnableLibCallSimplify &&
- Level != PassBuilder::OptimizationLevel::O0)
+ if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
- if (Level == PassBuilder::OptimizationLevel::O0)
+ [this](ModulePassManager &PM, OptimizationLevel Level) {
+ if (Level == OptimizationLevel::O0)
return;
PM.addPass(AMDGPUUnifyMetadataPass());
@@ -720,12 +717,19 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerCGSCCOptimizerLateEPCallback(
- [this](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
- if (Level == PassBuilder::OptimizationLevel::O0)
+ [this](CGSCCPassManager &PM, OptimizationLevel Level) {
+ if (Level == OptimizationLevel::O0)
return;
FunctionPassManager FPM;
+ // Add promote kernel arguments pass to the opt pipeline right before
+ // infer address spaces which is needed to do actual address space
+ // rewriting.
+ if (Level.getSpeedupLevel() > OptimizationLevel::O1.getSpeedupLevel() &&
+ EnablePromoteKernelArguments)
+ FPM.addPass(AMDGPUPromoteKernelArgumentsPass());
+
// Add infer address spaces pass to the opt pipeline after inlining
// but before SROA to increase SROA opportunities.
FPM.addPass(InferAddressSpacesPass());
@@ -734,7 +738,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
// anything, and before other cleanup optimizations.
FPM.addPass(AMDGPULowerKernelAttributesPass());
- if (Level != PassBuilder::OptimizationLevel::O0) {
+ if (Level != OptimizationLevel::O0) {
// Promote alloca to vector before SROA and loop unroll. If we
// manage to eliminate allocas before unroll we may choose to unroll
// less.
@@ -745,45 +749,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
}
-//===----------------------------------------------------------------------===//
-// R600 Target Machine (R600 -> Cayman)
-//===----------------------------------------------------------------------===//
-
-R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
- StringRef CPU, StringRef FS,
- TargetOptions Options,
- Optional<Reloc::Model> RM,
- Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT)
- : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
- setRequiresStructuredCFG(true);
-
- // Override the default since calls aren't supported for r600.
- if (EnableFunctionCalls &&
- EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
- EnableFunctionCalls = false;
-}
-
-const R600Subtarget *R600TargetMachine::getSubtargetImpl(
- const Function &F) const {
- StringRef GPU = getGPUName(F);
- StringRef FS = getFeatureString(F);
-
- SmallString<128> SubtargetKey(GPU);
- SubtargetKey.append(FS);
-
- auto &I = SubtargetMap[SubtargetKey];
- if (!I) {
- // This needs to be done before we create a new subtarget since any
- // creation will depend on the TM and the code generation flags on the
- // function that reside in TargetOptions.
- resetTargetOptions(F);
- I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
- }
-
- return I.get();
-}
-
int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
@@ -817,9 +782,31 @@ unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
return AMDGPUAS::GLOBAL_ADDRESS;
}
-TargetTransformInfo
-R600TargetMachine::getTargetTransformInfo(const Function &F) {
- return TargetTransformInfo(R600TTIImpl(this, F));
+std::pair<const Value *, unsigned>
+AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const {
+ if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_is_shared:
+ return std::make_pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS);
+ case Intrinsic::amdgcn_is_private:
+ return std::make_pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS);
+ default:
+ break;
+ }
+ return std::make_pair(nullptr, -1);
+ }
+ // Check the global pointer predication based on
+ // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and
+ // the order of 'is_shared' and 'is_private' is not significant.
+ Value *Ptr;
+ if (match(
+ const_cast<Value *>(V),
+ m_c_And(m_Not(m_Intrinsic<Intrinsic::amdgcn_is_shared>(m_Value(Ptr))),
+ m_Not(m_Intrinsic<Intrinsic::amdgcn_is_private>(
+ m_Deferred(Ptr))))))
+ return std::make_pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS);
+
+ return std::make_pair(nullptr, -1);
}
//===----------------------------------------------------------------------===//
@@ -834,7 +821,8 @@ GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
CodeGenOpt::Level OL, bool JIT)
: AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
-const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
+const TargetSubtargetInfo *
+GCNTargetMachine::getSubtargetImpl(const Function &F) const {
StringRef GPU = getGPUName(F);
StringRef FS = getFeatureString(F);
@@ -864,76 +852,11 @@ GCNTargetMachine::getTargetTransformInfo(const Function &F) {
// AMDGPU Pass Setup
//===----------------------------------------------------------------------===//
-namespace {
-
-class AMDGPUPassConfig : public TargetPassConfig {
-public:
- AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {
- // Exceptions and StackMaps are not supported, so these passes will never do
- // anything.
- disablePass(&StackMapLivenessID);
- disablePass(&FuncletLayoutID);
- // Garbage collection is not supported.
- disablePass(&GCLoweringID);
- disablePass(&ShadowStackGCLoweringID);
- }
-
- AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
- return getTM<AMDGPUTargetMachine>();
- }
-
- ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const override {
- ScheduleDAGMILive *DAG = createGenericSchedLive(C);
- DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
- return DAG;
- }
-
- void addEarlyCSEOrGVNPass();
- void addStraightLineScalarOptimizationPasses();
- void addIRPasses() override;
- void addCodeGenPrepare() override;
- bool addPreISel() override;
- bool addInstSelector() override;
- bool addGCPasses() override;
-
- std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
-
- /// Check if a pass is enabled given \p Opt option. The option always
- /// overrides defaults if explicitely used. Otherwise its default will
- /// be used given that a pass shall work at an optimization \p Level
- /// minimum.
- bool isPassEnabled(const cl::opt<bool> &Opt,
- CodeGenOpt::Level Level = CodeGenOpt::Default) const {
- if (Opt.getNumOccurrences())
- return Opt;
- if (TM->getOptLevel() < Level)
- return false;
- return Opt;
- }
-};
-
-std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
+std::unique_ptr<CSEConfigBase> llvm::AMDGPUPassConfig::getCSEConfig() const {
return getStandardCSEConfigForOpt(TM->getOptLevel());
}
-class R600PassConfig final : public AMDGPUPassConfig {
-public:
- R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
- : AMDGPUPassConfig(TM, PM) {}
-
- ScheduleDAGInstrs *createMachineScheduler(
- MachineSchedContext *C) const override {
- return createR600MachineScheduler(C);
- }
-
- bool addPreISel() override;
- bool addInstSelector() override;
- void addPreRegAlloc() override;
- void addPreSched2() override;
- void addPreEmitPass() override;
-};
+namespace {
class GCNPassConfig final : public AMDGPUPassConfig {
public:
@@ -943,6 +866,7 @@ public:
// allow calls without EnableAMDGPUFunctionCalls if they are marked
// noinline, so this is always required.
setRequiresCodeGenSCCOrder(true);
+ substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
}
GCNTargetMachine &getGCNTargetMachine() const {
@@ -952,6 +876,15 @@ public:
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override;
+ ScheduleDAGInstrs *
+ createPostMachineScheduler(MachineSchedContext *C) const override {
+ ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
+ const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(ST.createFillMFMAShadowMutation(DAG->TII));
+ return DAG;
+ }
+
bool addPreISel() override;
void addMachineSSAOptimization() override;
bool addILPOpts() override;
@@ -982,6 +915,17 @@ public:
} // end anonymous namespace
+AMDGPUPassConfig::AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {
+ // Exceptions and StackMaps are not supported, so these passes will never do
+ // anything.
+ disablePass(&StackMapLivenessID);
+ disablePass(&FuncletLayoutID);
+ // Garbage collection is not supported.
+ disablePass(&GCLoweringID);
+ disablePass(&ShadowStackGCLoweringID);
+}
+
void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
if (getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGVNPass());
@@ -993,7 +937,7 @@ void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
addPass(createLICMPass());
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
- // ReassociateGEPs exposes more opportunites for SLSR. See
+ // ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
addPass(createStraightLineStrengthReducePass());
// SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
@@ -1015,6 +959,7 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&PatchableFunctionID);
addPass(createAMDGPUPrintfRuntimeBinding());
+ addPass(createAMDGPUCtorDtorLoweringPass());
// This must occur before inlining, as the inliner will not look through
// bitcast calls.
@@ -1100,8 +1045,13 @@ void AMDGPUPassConfig::addIRPasses() {
}
void AMDGPUPassConfig::addCodeGenPrepare() {
- if (TM->getTargetTriple().getArch() == Triple::amdgcn)
+ if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
+ addPass(createAMDGPUAttributorPass());
+
+ // FIXME: This pass adds 2 hacky attributes that can be replaced with an
+ // analysis, and should be removed.
addPass(createAMDGPUAnnotateKernelFeaturesPass());
+ }
if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
EnableLowerKernelArguments)
@@ -1126,8 +1076,7 @@ bool AMDGPUPassConfig::addPreISel() {
}
bool AMDGPUPassConfig::addInstSelector() {
- // Defer the verifier until FinalizeISel.
- addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
+ addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
return false;
}
@@ -1136,44 +1085,11 @@ bool AMDGPUPassConfig::addGCPasses() {
return false;
}
-//===----------------------------------------------------------------------===//
-// R600 Pass Setup
-//===----------------------------------------------------------------------===//
-
-bool R600PassConfig::addPreISel() {
- AMDGPUPassConfig::addPreISel();
-
- if (EnableR600StructurizeCFG)
- addPass(createStructurizeCFGPass());
- return false;
-}
-
-bool R600PassConfig::addInstSelector() {
- addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
- return false;
-}
-
-void R600PassConfig::addPreRegAlloc() {
- addPass(createR600VectorRegMerger());
-}
-
-void R600PassConfig::addPreSched2() {
- addPass(createR600EmitClauseMarkers(), false);
- if (EnableR600IfConvert)
- addPass(&IfConverterID, false);
- addPass(createR600ClauseMergePass(), false);
-}
-
-void R600PassConfig::addPreEmitPass() {
- addPass(createAMDGPUCFGStructurizerPass(), false);
- addPass(createR600ExpandSpecialInstrsPass(), false);
- addPass(&FinalizeMachineBundlesID, false);
- addPass(createR600Packetizer(), false);
- addPass(createR600ControlFlowFinalizer(), false);
-}
-
-TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new R600PassConfig(*this, PM);
+llvm::ScheduleDAGInstrs *
+AMDGPUPassConfig::createMachineScheduler(MachineSchedContext *C) const {
+ ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+ DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+ return DAG;
}
//===----------------------------------------------------------------------===//
@@ -1311,7 +1227,7 @@ void GCNPassConfig::addFastRegAlloc() {
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
+ insertPass(&PHIEliminationID, &SILowerControlFlowID);
insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
@@ -1341,11 +1257,11 @@ void GCNPassConfig::addOptimizedRegAlloc() {
// the register in LiveVariables, this would trigger a failure in verifier,
// we should fix it and enable the verifier.
if (OptVGPRLiveRange)
- insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID, false);
+ insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID);
// This must be run immediately after phi elimination and before
// TwoAddressInstructions, otherwise the processing of the tied operand of
// SI_ELSE will introduce a copy of the tied operand source after the else.
- insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
+ insertPass(&PHIEliminationID, &SILowerControlFlowID);
if (EnableDCEInRA)
insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
@@ -1418,7 +1334,7 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
// Commit allocated register changes. This is mostly necessary because too
// many things rely on the use lists of the physical registers, such as the
// verifier. This is only necessary with allocators which use LiveIntervals,
- // since FastRegAlloc does the replacments itself.
+ // since FastRegAlloc does the replacements itself.
addPass(createVirtRegRewriter(false));
// Equivalent of PEI for SGPRs.
@@ -1440,6 +1356,8 @@ void GCNPassConfig::addPostRegAlloc() {
}
void GCNPassConfig::addPreSched2() {
+ if (TM->getOptLevel() > CodeGenOpt::None)
+ addPass(createSIShrinkInstructionsPass());
addPass(&SIPostRABundlerID);
}
@@ -1447,9 +1365,6 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createSIMemoryLegalizerPass());
addPass(createSIInsertWaitcntsPass());
- if (TM->getOptLevel() > CodeGenOpt::None)
- addPass(createSIShrinkInstructionsPass());
-
addPass(createSIModeRegisterPass());
if (getOptLevel() > CodeGenOpt::None)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 1bfe026d080c..0ff2db2a52d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -15,11 +15,14 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
#include "GCNSubtarget.h"
-#include "R600Subtarget.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Target/TargetMachine.h"
+#include <utility>
namespace llvm {
+class ScheduleDAGMILive;
+
//===----------------------------------------------------------------------===//
// AMDGPU Target Machine (R600+)
//===----------------------------------------------------------------------===//
@@ -61,31 +64,9 @@ public:
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
unsigned getAssumedAddrSpace(const Value *V) const override;
-};
-
-//===----------------------------------------------------------------------===//
-// R600 Target Machine (R600 -> Cayman)
-//===----------------------------------------------------------------------===//
-
-class R600TargetMachine final : public AMDGPUTargetMachine {
-private:
- mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
-public:
- R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
- StringRef FS, TargetOptions Options,
- Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
- CodeGenOpt::Level OL, bool JIT);
-
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-
- const R600Subtarget *getSubtargetImpl(const Function &) const override;
-
- TargetTransformInfo getTargetTransformInfo(const Function &F) override;
-
- bool isMachineVerifierClean() const override {
- return false;
- }
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const override;
};
//===----------------------------------------------------------------------===//
@@ -104,7 +85,7 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- const GCNSubtarget *getSubtargetImpl(const Function &) const override;
+ const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override;
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
@@ -121,6 +102,45 @@ public:
SMRange &SourceRange) const override;
};
+//===----------------------------------------------------------------------===//
+// AMDGPU Pass Setup
+//===----------------------------------------------------------------------===//
+
+class AMDGPUPassConfig : public TargetPassConfig {
+public:
+ AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM);
+
+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
+ return getTM<AMDGPUTargetMachine>();
+ }
+
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override;
+
+ void addEarlyCSEOrGVNPass();
+ void addStraightLineScalarOptimizationPasses();
+ void addIRPasses() override;
+ void addCodeGenPrepare() override;
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addGCPasses() override;
+
+ std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
+
+ /// Check if a pass is enabled given \p Opt option. The option always
+ /// overrides defaults if explicitly used. Otherwise its default will
+ /// be used given that a pass shall work at an optimization \p Level
+ /// minimum.
+ bool isPassEnabled(const cl::opt<bool> &Opt,
+ CodeGenOpt::Level Level = CodeGenOpt::Default) const {
+ if (Opt.getNumOccurrences())
+ return Opt;
+ if (TM->getOptLevel() < Level)
+ return false;
+ return Opt;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 63f449f7a726..ecdbdf613a53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -16,10 +16,11 @@
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
@@ -101,7 +102,8 @@ AMDGPUTTIImpl::AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
TLI(ST->getTargetLowering()) {}
void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
const Function &F = *L->getHeader()->getParent();
UP.Threshold = AMDGPU::getIntegerAttribute(F, "amdgpu-unroll-threshold", 300);
UP.MaxCount = std::numeric_limits<unsigned>::max();
@@ -503,7 +505,7 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
Info.Ordering = static_cast<AtomicOrdering>(OrderingVal);
Info.ReadMem = true;
Info.WriteMem = true;
- Info.IsVolatile = !Volatile->isNullValue();
+ Info.IsVolatile = !Volatile->isZero();
return true;
}
default:
@@ -1224,8 +1226,9 @@ unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
}
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
- CommonTTI.getUnrollingPreferences(L, SE, UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
}
void GCNTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
@@ -1239,122 +1242,3 @@ int GCNTTIImpl::get64BitInstrCost(TTI::TargetCostKind CostKind) const {
: ST->hasHalfRate64Ops() ? getHalfRateInstrCost(CostKind)
: getQuarterRateInstrCost(CostKind);
}
-
-R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
- : BaseT(TM, F.getParent()->getDataLayout()),
- ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
- TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
-
-unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
- return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
-}
-
-unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
- return getHardwareNumberOfRegisters(Vec);
-}
-
-TypeSize
-R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
- return TypeSize::getFixed(32);
-}
-
-unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const {
- return 32;
-}
-
-unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
- if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
- AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
- return 128;
- if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
- AddrSpace == AMDGPUAS::REGION_ADDRESS)
- return 64;
- if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
- return 32;
-
- if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
- AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
- (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
- AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
- return 128;
- llvm_unreachable("unhandled address space");
-}
-
-bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
- Align Alignment,
- unsigned AddrSpace) const {
- // We allow vectorization of flat stores, even though we may need to decompose
- // them later if they may access private memory. We don't have enough context
- // here, and legalization can handle it.
- return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
-}
-
-bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- Align Alignment,
- unsigned AddrSpace) const {
- return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
-}
-
-bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- Align Alignment,
- unsigned AddrSpace) const {
- return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
-}
-
-unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
- // Disable unrolling if the loop is not vectorized.
- // TODO: Enable this again.
- if (VF == 1)
- return 1;
-
- return 8;
-}
-
-InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
- TTI::TargetCostKind CostKind,
- const Instruction *I) {
- if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
- return Opcode == Instruction::PHI ? 0 : 1;
-
- // XXX - For some reason this isn't called for switch.
- switch (Opcode) {
- case Instruction::Br:
- case Instruction::Ret:
- return 10;
- default:
- return BaseT::getCFInstrCost(Opcode, CostKind, I);
- }
-}
-
-InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index) {
- switch (Opcode) {
- case Instruction::ExtractElement:
- case Instruction::InsertElement: {
- unsigned EltSize
- = DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
- if (EltSize < 32) {
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
- }
-
- // Extracts are just reads of a subregister, so are free. Inserts are
- // considered free because we don't want to have any cost for scalarizing
- // operations, and we don't have to copy into a different register class.
-
- // Dynamic indexing isn't free and is best avoided.
- return Index == ~0u ? 2 : 0;
- }
- default:
- return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
- }
-}
-
-void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
- CommonTTI.getUnrollingPreferences(L, SE, UP);
-}
-
-void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::PeelingPreferences &PP) {
- CommonTTI.getPeelingPreferences(L, SE, PP);
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 37c0756eb7a8..e901b5c5747d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -18,18 +18,14 @@
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETTRANSFORMINFO_H
#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
namespace llvm {
-class AMDGPUTargetLowering;
class AMDGPUTargetMachine;
class GCNSubtarget;
class InstCombiner;
class Loop;
-class R600Subtarget;
class ScalarEvolution;
class SITargetLowering;
class Type;
@@ -53,7 +49,8 @@ public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -82,24 +79,21 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
return TargetTransformInfo::TCC_Basic;
}
- static inline int getHalfRateInstrCost(
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+ static inline int getHalfRateInstrCost(TTI::TargetCostKind CostKind) {
return CostKind == TTI::TCK_CodeSize ? 2
: 2 * TargetTransformInfo::TCC_Basic;
}
// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe
// should be 2 or 4.
- static inline int getQuarterRateInstrCost(
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) {
+ static inline int getQuarterRateInstrCost(TTI::TargetCostKind CostKind) {
return CostKind == TTI::TCK_CodeSize ? 2
: 4 * TargetTransformInfo::TCC_Basic;
}
// On some parts, normal fp64 operations are half rate, and others
// quarter. This also applies to some integer operations.
- int get64BitInstrCost(
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+ int get64BitInstrCost(TTI::TargetCostKind CostKind) const;
public:
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
@@ -108,7 +102,8 @@ public:
bool useGPUDivergenceAnalysis() const;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -154,8 +149,7 @@ public:
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -184,6 +178,12 @@ public:
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const;
+
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+ return AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
+ AS != AMDGPUAS::PRIVATE_ADDRESS;
+ }
+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;
@@ -213,51 +213,13 @@ public:
InstructionCost getArithmeticReductionCost(
unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
+ TTI::TargetCostKind CostKind);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
InstructionCost getMinMaxReductionCost(
VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput);
-};
-
-class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
- using BaseT = BasicTTIImplBase<R600TTIImpl>;
- using TTI = TargetTransformInfo;
-
- friend BaseT;
-
- const R600Subtarget *ST;
- const AMDGPUTargetLowering *TLI;
- AMDGPUTTIImpl CommonTTI;
-
-public:
- explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
-
- const R600Subtarget *getST() const { return ST; }
- const AMDGPUTargetLowering *getTLI() const { return TLI; }
-
- void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
- void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::PeelingPreferences &PP);
- unsigned getHardwareNumberOfRegisters(bool Vec) const;
- unsigned getNumberOfRegisters(bool Vec) const;
- TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
- unsigned getMinVectorRegisterBitWidth() const;
- unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
- bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
- unsigned AddrSpace) const;
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
- unsigned AddrSpace) const;
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
- unsigned AddrSpace) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
- InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
- const Instruction *I = nullptr);
- InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
- unsigned Index);
+ TTI::TargetCostKind CostKind);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 4e3d5fdc012d..c6751f98fe6a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -232,7 +232,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB);
Updates.push_back({DominatorTree::Insert, BB, DummyReturnBB});
} else { // Conditional branch.
- SmallVector<BasicBlock *, 2> Successors(succ_begin(BB), succ_end(BB));
+ SmallVector<BasicBlock *, 2> Successors(successors(BB));
// Create a new transition block to hold the conditional branch.
BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock");
diff --git a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 56befe4ed0d0..1a9255f3240f 100644
--- a/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -6,8 +6,8 @@
//
//==-----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600RegisterInfo.h"
#include "R600Subtarget.h"
#include "llvm/ADT/SCCIterator.h"
@@ -127,6 +127,10 @@ public:
bool prepare();
bool runOnMachineFunction(MachineFunction &MF) override {
+ // FIXME: This pass causes verification failures.
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
+
TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
TRI = &TII->getRegisterInfo();
LLVM_DEBUG(MF.dump(););
@@ -245,7 +249,7 @@ protected:
int loopendPatternMatch();
int mergeLoop(MachineLoop *LoopRep);
- /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
+ /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in
/// the same loop with LoopLandInfo without explicitly keeping track of
/// loopContBlks and loopBreakBlks, this is a method to get the information.
bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
@@ -571,12 +575,9 @@ bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
//get DebugLoc from the first MachineBasicBlock instruction with debug info
DebugLoc DL;
- for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
- ++It) {
- MachineInstr *instr = &(*It);
- if (instr->getDebugLoc())
- DL = instr->getDebugLoc();
- }
+ for (MachineInstr &MI : *MBB)
+ if (MI.getDebugLoc())
+ DL = MI.getDebugLoc();
return DL;
}
@@ -617,7 +618,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
MachineInstr *MI = getReturnInstr(MBB);
- bool IsReturn = (MBB->succ_size() == 0);
+ bool IsReturn = MBB->succ_empty();
if (MI)
assert(IsReturn);
else if (IsReturn)
@@ -628,9 +629,8 @@ bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
MachineBasicBlock *SrcMBB) {
- for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
- iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
- DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
+ for (MachineBasicBlock *Succ : SrcMBB->successors())
+ DstMBB->addSuccessor(Succ); // *iter's predecessor is also taken care of
}
MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
@@ -808,7 +808,7 @@ bool AMDGPUCFGStructurizer::run() {
MachineBasicBlock *EntryMBB =
*GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
- if (EntryMBB->succ_size() == 0) {
+ if (EntryMBB->succ_empty()) {
Finish = true;
LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
} else {
@@ -1054,7 +1054,7 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
- if (Src1MBB->succ_size() == 0) {
+ if (Src1MBB->succ_empty()) {
MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
@@ -1319,12 +1319,9 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
insertInstrBefore(I, R600::ENDIF);
// put initReg = 2 to other predecessors of landBlk
- for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
- PE = LandBlk->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *MBB = *PI;
+ for (MachineBasicBlock *MBB : LandBlk->predecessors())
if (MBB != TrueMBB && MBB != FalseMBB)
report_fatal_error("Extra register needed to handle CFG");
- }
}
LLVM_DEBUG(
dbgs() << "result from improveSimpleJumpintoIf: ";
@@ -1393,7 +1390,7 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
MBB->splice(I, FalseMBB, FalseMBB->begin(),
FalseMBB->end());
MBB->removeSuccessor(FalseMBB, true);
- if (LandMBB && FalseMBB->succ_size() != 0)
+ if (LandMBB && !FalseMBB->succ_empty())
FalseMBB->removeSuccessor(LandMBB, true);
retireBlock(FalseMBB);
MLI->removeBlock(FalseMBB);
@@ -1639,8 +1636,7 @@ void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
SrcBlkInfo = new BlockInformation();
SrcBlkInfo->IsRetired = true;
- assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
- && "can't retire block yet");
+ assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet");
}
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 00032c7d4ea5..4acd77a9d5d2 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -28,12 +28,12 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -1542,7 +1542,7 @@ private:
bool validateOpSel(const MCInst &Inst);
bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
bool validateVccOperand(unsigned Reg) const;
- bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
+ bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
bool validateAGPRLdSt(const MCInst &Inst) const;
bool validateVGPRAlign(const MCInst &Inst) const;
@@ -1715,6 +1715,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
switch (OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -1723,6 +1724,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_KIMM32:
return &APFloat::IEEEsingle();
case AMDGPU::OPERAND_REG_IMM_INT64:
case AMDGPU::OPERAND_REG_IMM_FP64:
@@ -1732,6 +1734,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
return &APFloat::IEEEdouble();
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -1742,6 +1745,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_KIMM16:
return &APFloat::IEEEhalf();
default:
llvm_unreachable("unsupported fp type");
@@ -2017,12 +2021,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
@@ -2036,7 +2042,9 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
- case AMDGPU::OPERAND_REG_IMM_V2INT32: {
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16: {
bool lost;
APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
// Convert literal to single precision
@@ -2062,6 +2070,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -2101,6 +2110,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
@@ -2128,6 +2138,14 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
Inst.addOperand(MCOperand::createImm(Val));
return;
}
+ case AMDGPU::OPERAND_KIMM32:
+ Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+ setImmKindNone();
+ return;
+ case AMDGPU::OPERAND_KIMM16:
+ Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
+ setImmKindNone();
+ return;
default:
llvm_unreachable("invalid operand size");
}
@@ -3250,7 +3268,8 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
SIInstrFlags::SDWA)) {
// Check special imm operands (used by madmk, etc)
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
- ++ConstantBusUseCount;
+ ++NumLiterals;
+ LiteralSize = 4;
}
SmallDenseSet<unsigned> SGPRsUsed;
@@ -3290,7 +3309,7 @@ AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
// An instruction may use only one literal.
// This has been validated on the previous step.
- // See validateVOP3Literal.
+ // See validateVOPLiteral.
// This literal may be used as more than one operand.
// If all these operands are of the same size,
// this literal counts as one scalar value.
@@ -3981,26 +4000,29 @@ bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
(FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
}
-// VOP3 literal is only allowed in GFX10+ and only one can be used
-bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
- const OperandVector &Operands) {
+// One unique literal can be used. VOP3 literal is only allowed in GFX10+
+bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
+ const OperandVector &Operands) {
unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
- if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
+ const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
+ if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
+ ImmIdx == -1)
return true;
const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
- const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
+ const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
unsigned NumExprs = 0;
unsigned NumLiterals = 0;
uint32_t LiteralValue;
for (int OpIdx : OpIndices) {
- if (OpIdx == -1) break;
+ if (OpIdx == -1)
+ continue;
const MCOperand &MO = Inst.getOperand(OpIdx);
if (!MO.isImm() && !MO.isExpr())
@@ -4030,7 +4052,7 @@ bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
if (!NumLiterals)
return true;
- if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+ if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
Error(getLitLoc(Operands), "literal operands are not supported");
return false;
}
@@ -4202,7 +4224,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
"only one literal operand is allowed");
return false;
}
- if (!validateVOP3Literal(Inst, Operands)) {
+ if (!validateVOPLiteral(Inst, Operands)) {
return false;
}
if (!validateConstantBusLimitations(Inst, Operands)) {
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 5f43aa8388ee..d3644db7cf8b 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -55,10 +55,6 @@ class MTBUFGetBaseOpcode<string Op> {
!subst("FORMAT_XYZW", "FORMAT_X", Op)));
}
-class getMTBUFElements<string Op> {
- int ret = 1;
-}
-
class MTBUF_Pseudo <string opName, dag outs, dag ins,
string asmOps, list<dag> pattern=[]> :
@@ -223,8 +219,7 @@ class MTBUF_Load_Pseudo <string opName,
}
multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
- int elems, ValueType load_vt = i32,
- SDPatternOperator ld = null_frag> {
+ int elems> {
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>,
MTBUFAddr64Table<0, NAME>;
@@ -265,8 +260,7 @@ class MTBUF_Store_Pseudo <string opName,
}
multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
- int elems, ValueType store_vt = i32,
- SDPatternOperator st = null_frag> {
+ int elems> {
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>,
MTBUFAddr64Table<0, NAME>;
@@ -541,7 +535,6 @@ multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPa
// opcode because it needs an N+1 register class dest register.
multiclass MUBUF_Pseudo_Loads<string opName,
ValueType load_vt = i32,
- SDPatternOperator ld = null_frag,
bit TiedDest = 0,
bit isLds = 0> {
@@ -565,11 +558,9 @@ multiclass MUBUF_Pseudo_Loads<string opName,
}
}
-multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32,
- SDPatternOperator ld_nolds = null_frag,
- SDPatternOperator ld_lds = null_frag> {
- defm NAME : MUBUF_Pseudo_Loads<opName, load_vt, ld_nolds>;
- defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, ld_lds, 0, 1>;
+multiclass MUBUF_Pseudo_Loads_Lds<string opName, ValueType load_vt = i32> {
+ defm NAME : MUBUF_Pseudo_Loads<opName, load_vt>;
+ defm _LDS : MUBUF_Pseudo_Loads<opName, load_vt, 0, 1>;
}
class MUBUF_Store_Pseudo <string opName,
@@ -742,7 +733,6 @@ class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
- SDPatternOperator atomic,
bit isFP = isFloatType<vdataType>.ret> {
let FPAtomic = isFP in
def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
@@ -796,7 +786,7 @@ multiclass MUBUF_Pseudo_Atomics <string opName,
RegisterClass vdataClass,
ValueType vdataType,
SDPatternOperator atomic> :
- MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType, atomic>,
+ MUBUF_Pseudo_Atomics_NO_RTN<opName, vdataClass, vdataType>,
MUBUF_Pseudo_Atomics_RTN<opName, vdataClass, vdataType, atomic>;
@@ -924,13 +914,13 @@ defm : MUBUF_Pseudo_Load_Pats<"BUFFER_LOAD_DWORDX4", v4i32, load_global>;
// in at least GFX8+ chips. See Bug 37653.
let SubtargetPredicate = isGFX8GFX9 in {
defm BUFFER_LOAD_DWORDX2_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx2", v2i32, null_frag, 0, 1
+ "buffer_load_dwordx2", v2i32, 0, 1
>;
defm BUFFER_LOAD_DWORDX3_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx3", v3i32, null_frag, 0, 1
+ "buffer_load_dwordx3", v3i32, 0, 1
>;
defm BUFFER_LOAD_DWORDX4_LDS : MUBUF_Pseudo_Loads <
- "buffer_load_dwordx4", v4i32, null_frag, 0, 1
+ "buffer_load_dwordx4", v4i32, 0, 1
>;
}
@@ -1076,27 +1066,27 @@ defm BUFFER_ATOMIC_FMAX_X2 : MUBUF_Pseudo_Atomics <
let SubtargetPredicate = HasD16LoadStore in {
defm BUFFER_LOAD_UBYTE_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_ubyte_d16", i32, null_frag, 1
+ "buffer_load_ubyte_d16", i32, 1
>;
defm BUFFER_LOAD_UBYTE_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_ubyte_d16_hi", i32, null_frag, 1
+ "buffer_load_ubyte_d16_hi", i32, 1
>;
defm BUFFER_LOAD_SBYTE_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_sbyte_d16", i32, null_frag, 1
+ "buffer_load_sbyte_d16", i32, 1
>;
defm BUFFER_LOAD_SBYTE_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_sbyte_d16_hi", i32, null_frag, 1
+ "buffer_load_sbyte_d16_hi", i32, 1
>;
defm BUFFER_LOAD_SHORT_D16 : MUBUF_Pseudo_Loads <
- "buffer_load_short_d16", i32, null_frag, 1
+ "buffer_load_short_d16", i32, 1
>;
defm BUFFER_LOAD_SHORT_D16_HI : MUBUF_Pseudo_Loads <
- "buffer_load_short_d16_hi", i32, null_frag, 1
+ "buffer_load_short_d16_hi", i32, 1
>;
defm BUFFER_STORE_BYTE_D16_HI : MUBUF_Pseudo_Stores <
@@ -1121,10 +1111,10 @@ def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
let SubtargetPredicate = HasAtomicFaddInsts in {
defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_add_f32", VGPR_32, f32, atomic_load_fadd_global_noret_32
+ "buffer_atomic_add_f32", VGPR_32, f32
>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
- "buffer_atomic_pk_add_f16", VGPR_32, v2f16, atomic_load_fadd_v2f16_global_noret_32
+ "buffer_atomic_pk_add_f16", VGPR_32, v2f16
>;
let OtherPredicates = [isGFX90APlus] in {
@@ -1438,6 +1428,13 @@ defm : BufferAtomicPatterns<SIbuffer_atomic_xor, i64, "BUFFER_ATOMIC_XOR_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_inc, i64, "BUFFER_ATOMIC_INC_X2">;
defm : BufferAtomicPatterns<SIbuffer_atomic_dec, i64, "BUFFER_ATOMIC_DEC_X2">;
+let SubtargetPredicate = isGFX6GFX7GFX10 in {
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f32, "BUFFER_ATOMIC_FMIN">;
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f32, "BUFFER_ATOMIC_FMAX">;
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmin, f64, "BUFFER_ATOMIC_FMIN_X2">;
+ defm : BufferAtomicPatterns<SIbuffer_atomic_fmax, f64, "BUFFER_ATOMIC_FMAX_X2">;
+}
+
class NoUseBufferAtomic<SDPatternOperator Op, ValueType vt> : PatFrag <
(ops node:$src0, node:$src1, node:$src2, node:$src3, node:$src4, node:$src5, node:$src6, node:$src7),
(vt (Op $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7)),
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index ad9528ece7d0..104b5160b985 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -428,11 +428,10 @@ defm DS_AND_B32 : DS_1A1D_NORET_mc<"ds_and_b32">;
defm DS_OR_B32 : DS_1A1D_NORET_mc<"ds_or_b32">;
defm DS_XOR_B32 : DS_1A1D_NORET_mc<"ds_xor_b32">;
-let SubtargetPredicate = HasLDSFPAtomics in {
+let SubtargetPredicate = HasLDSFPAtomicAdd in {
defm DS_ADD_F32 : DS_1A1D_NORET_mc<"ds_add_f32">;
}
-// FIXME: Are these really present pre-gfx8?
defm DS_MIN_F32 : DS_1A1D_NORET_mc<"ds_min_f32">;
defm DS_MAX_F32 : DS_1A1D_NORET_mc<"ds_max_f32">;
@@ -493,7 +492,7 @@ defm DS_MAX_F64 : DS_1A1D_NORET_mc<"ds_max_f64", VReg_64>;
defm DS_ADD_RTN_U32 : DS_1A1D_RET_mc<"ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
-let SubtargetPredicate = HasLDSFPAtomics in {
+let SubtargetPredicate = HasLDSFPAtomicAdd in {
defm DS_ADD_RTN_F32 : DS_1A1D_RET_mc<"ds_add_rtn_f32", VGPR_32, "ds_add_f32">;
}
defm DS_SUB_RTN_U32 : DS_1A1D_RET_mc<"ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
@@ -665,7 +664,7 @@ def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
} // let SubtargetPredicate = isGFX8Plus
-let SubtargetPredicate = HasLDSFPAtomics, OtherPredicates = [HasDsSrc2Insts] in {
+let SubtargetPredicate = HasLDSFPAtomicAdd, OtherPredicates = [HasDsSrc2Insts] in {
def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
}
@@ -715,6 +714,10 @@ foreach vt = Reg32Types.types in {
defm : DSReadPat_mc <DS_READ_B32, vt, "load_local">;
}
+defm : DSReadPat_mc <DS_READ_U8, i16, "atomic_load_8_local">;
+defm : DSReadPat_mc <DS_READ_U8, i32, "atomic_load_8_local">;
+defm : DSReadPat_mc <DS_READ_U16, i16, "atomic_load_16_local">;
+defm : DSReadPat_mc <DS_READ_U16, i32, "atomic_load_16_local">;
defm : DSReadPat_mc <DS_READ_B32, i32, "atomic_load_32_local">;
defm : DSReadPat_mc <DS_READ_B64, i64, "atomic_load_64_local">;
@@ -775,6 +778,10 @@ foreach vt = Reg32Types.types in {
defm : DSWritePat_mc <DS_WRITE_B32, vt, "store_local">;
}
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i16, "atomic_store_local_8">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B8, i32, "atomic_store_local_8">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i16, "atomic_store_local_16">;
+defm : DSAtomicWritePat_mc <DS_WRITE_B16, i32, "atomic_store_local_16">;
defm : DSAtomicWritePat_mc <DS_WRITE_B32, i32, "atomic_store_local_32">;
defm : DSAtomicWritePat_mc <DS_WRITE_B64, i64, "atomic_store_local_64">;
@@ -933,11 +940,11 @@ defm : DSAtomicRetPat_mc<DS_MIN_RTN_I32, i32, "atomic_load_min">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_I32, i32, "atomic_load_max">;
defm : DSAtomicRetPat_mc<DS_MIN_RTN_U32, i32, "atomic_load_umin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_U32, i32, "atomic_load_umax">;
-defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
-
-let SubtargetPredicate = HasLDSFPAtomics in {
defm : DSAtomicRetPat_mc<DS_MIN_RTN_F32, f32, "atomic_load_fmin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_F32, f32, "atomic_load_fmax">;
+defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B32, i32, "atomic_cmp_swap">;
+
+let SubtargetPredicate = HasLDSFPAtomicAdd in {
defm : DSAtomicRetPat_mc<DS_ADD_RTN_F32, f32, "atomic_load_fadd">;
}
@@ -954,6 +961,8 @@ defm : DSAtomicRetPat_mc<DS_MIN_RTN_I64, i64, "atomic_load_min">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_I64, i64, "atomic_load_max">;
defm : DSAtomicRetPat_mc<DS_MIN_RTN_U64, i64, "atomic_load_umin">;
defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_load_umax">;
+defm : DSAtomicRetPat_mc<DS_MIN_RTN_F64, f64, "atomic_load_fmin">;
+defm : DSAtomicRetPat_mc<DS_MAX_RTN_F64, f64, "atomic_load_fmax">;
defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap">;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fe62b8590fa0..e2186d4d533e 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -25,8 +25,9 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -264,6 +265,34 @@ static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst,
return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
}
+static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
+}
+
+static DecodeStatus decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(
+ Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
+}
+
+static DecodeStatus decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ return addOperand(
+ Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
+}
+
static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
const MCRegisterInfo *MRI) {
if (OpIdx < 0)
@@ -626,6 +655,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
+ int ImmLitIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+ if (Res && ImmLitIdx != -1)
+ Res = convertFMAanyK(MI, ImmLitIdx);
+
// if the opcode was not recognized we'll assume a Size of 4 bytes
// (unless there are fewer bytes left)
Size = Res ? (MaxInstBytesNum - Bytes.size())
@@ -693,22 +727,21 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::d16);
+ const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
+ const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
+ AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
+
assert(VDataIdx != -1);
- if (DMaskIdx == -1 || TFEIdx == -1) {// intersect_ray
+ if (BaseOpcode->BVH) {
+ // Add A16 operand for intersect_ray instructions
if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16) > -1) {
- assert(MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa ||
- MI.getOpcode() == AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa);
addOperand(MI, MCOperand::createImm(1));
}
return MCDisassembler::Success;
}
- const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
bool IsAtomic = (VDstIdx != -1);
bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
-
bool IsNSA = false;
unsigned AddrSize = Info->VAddrDwords;
@@ -717,8 +750,6 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
int A16Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
- const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
const AMDGPU::MIMGDimInfo *Dim =
AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
@@ -813,6 +844,24 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
return MCDisassembler::Success;
}
+DecodeStatus AMDGPUDisassembler::convertFMAanyK(MCInst &MI,
+ int ImmLitIdx) const {
+ assert(HasLiteral && "Should have decoded a literal");
+ const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
+ unsigned DescNumOps = Desc.getNumOperands();
+ assert(DescNumOps == MI.getNumOperands());
+ for (unsigned I = 0; I < DescNumOps; ++I) {
+ auto &Op = MI.getOperand(I);
+ auto OpType = Desc.OpInfo[I].OperandType;
+ bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
+ OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
+ if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
+ IsDeferredOp)
+ Op.setImm(Literal);
+ }
+ return MCDisassembler::Success;
+}
+
const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
return getContext().getRegisterInfo()->
getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
@@ -1022,6 +1071,18 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
return decodeDstOp(OPW512, Val);
}
+// Decode Literals for insts which always have a literal in the encoding
+MCOperand
+AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
+ if (HasLiteral) {
+ if (Literal != Val)
+ return errOperand(Val, "More than one unique literal is illegal");
+ }
+ HasLiteral = true;
+ Literal = Val;
+ return MCOperand::createImm(Literal);
+}
+
MCOperand AMDGPUDisassembler::decodeLiteralConstant() const {
// For now all literal constants are supposed to be unsigned integer
// ToDo: deal with signed/unsigned 64-bit integer constants
@@ -1235,7 +1296,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
}
-MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) const {
+MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
+ bool MandatoryLiteral) const {
using namespace AMDGPU::EncValues;
assert(Val < 1024); // enum10
@@ -1264,8 +1326,13 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
return decodeFPImmed(Width, Val);
- if (Val == LITERAL_CONST)
- return decodeLiteralConstant();
+ if (Val == LITERAL_CONST) {
+ if (MandatoryLiteral)
+ // Keep a sentinel value for deferred setting
+ return MCOperand::createImm(LITERAL_CONST);
+ else
+ return decodeLiteralConstant();
+ }
switch (Width) {
case OPW32:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index dc879ec5ad88..eea6074d5281 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -87,6 +87,7 @@ public:
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;
+ DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
DecodeStatus convertSDWAInst(MCInst &MI) const;
DecodeStatus convertDPP8Inst(MCInst &MI) const;
DecodeStatus convertMIMGInst(MCInst &MI) const;
@@ -150,9 +151,11 @@ public:
static MCOperand decodeIntImmed(unsigned Imm);
static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+ MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeLiteralConstant() const;
- MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
+ bool MandatoryLiteral = false) const;
MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 596c3d7baea0..12224cb3f797 100644
--- a/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -303,16 +303,16 @@ def : EGPat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
let SubtargetPredicate = isEGorCayman in {
-multiclass AtomicPat<Instruction inst_ret, Instruction inst_noret,
- SDPatternOperator node_ret, SDPatternOperator node_noret> {
+multiclass AtomicPat<Instruction inst_noret,
+ SDPatternOperator node_noret> {
// FIXME: Add _RTN version. We need per WI scratch location to store the old value
// EXTRACT_SUBREG here is dummy, we know the node has no uses
def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, i32:$data)),
(EXTRACT_SUBREG (inst_noret
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), $data, sub0), $ptr), sub1)>;
}
-multiclass AtomicIncDecPat<Instruction inst_ret, Instruction inst_noret,
- SDPatternOperator node_ret, SDPatternOperator node_noret, int C> {
+multiclass AtomicIncDecPat<Instruction inst_noret,
+ SDPatternOperator node_noret, int C> {
// FIXME: Add _RTN version. We need per WI scratch location to store the old value
// EXTRACT_SUBREG here is dummy, we know the node has no uses
def : EGOrCaymanPat<(i32 (node_noret i32:$ptr, C)),
@@ -330,47 +330,33 @@ def : EGOrCaymanPat<(i32 (atomic_cmp_swap_global_noret i32:$ptr, i32:$cmp, i32:$
$data, sub0),
$ptr), sub1)>;
-defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_RTN,
- RAT_ATOMIC_XCHG_INT_NORET,
- atomic_swap_global_ret_32,
+defm AtomicSwapPat : AtomicPat <RAT_ATOMIC_XCHG_INT_NORET,
atomic_swap_global_noret_32>;
-defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_RTN, RAT_ATOMIC_ADD_NORET,
- atomic_load_add_global_ret_32, atomic_load_add_global_noret_32>;
-defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_RTN, RAT_ATOMIC_SUB_NORET,
- atomic_load_sub_global_ret_32, atomic_load_sub_global_noret_32>;
-defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_RTN,
- RAT_ATOMIC_MIN_INT_NORET,
- atomic_load_min_global_ret_32, atomic_load_min_global_noret_32>;
-defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_RTN,
- RAT_ATOMIC_MIN_UINT_NORET,
- atomic_load_umin_global_ret_32, atomic_load_umin_global_noret_32>;
-defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_RTN,
- RAT_ATOMIC_MAX_INT_NORET,
- atomic_load_max_global_ret_32, atomic_load_max_global_noret_32>;
-defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_RTN,
- RAT_ATOMIC_MAX_UINT_NORET,
- atomic_load_umax_global_ret_32, atomic_load_umax_global_noret_32>;
-defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_RTN, RAT_ATOMIC_AND_NORET,
- atomic_load_and_global_ret_32, atomic_load_and_global_noret_32>;
-defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_RTN, RAT_ATOMIC_OR_NORET,
- atomic_load_or_global_ret_32, atomic_load_or_global_noret_32>;
-defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_RTN, RAT_ATOMIC_XOR_NORET,
- atomic_load_xor_global_ret_32, atomic_load_xor_global_noret_32>;
-defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
- RAT_ATOMIC_INC_UINT_NORET,
- atomic_load_add_global_ret_32,
+defm AtomicAddPat : AtomicPat <RAT_ATOMIC_ADD_NORET,
+ atomic_load_add_global_noret_32>;
+defm AtomicSubPat : AtomicPat <RAT_ATOMIC_SUB_NORET,
+ atomic_load_sub_global_noret_32>;
+defm AtomicMinPat : AtomicPat <RAT_ATOMIC_MIN_INT_NORET,
+ atomic_load_min_global_noret_32>;
+defm AtomicUMinPat : AtomicPat <RAT_ATOMIC_MIN_UINT_NORET,
+ atomic_load_umin_global_noret_32>;
+defm AtomicMaxPat : AtomicPat <RAT_ATOMIC_MAX_INT_NORET,
+ atomic_load_max_global_noret_32>;
+defm AtomicUMaxPat : AtomicPat <RAT_ATOMIC_MAX_UINT_NORET,
+ atomic_load_umax_global_noret_32>;
+defm AtomicAndPat : AtomicPat <RAT_ATOMIC_AND_NORET,
+ atomic_load_and_global_noret_32>;
+defm AtomicOrPat : AtomicPat <RAT_ATOMIC_OR_NORET,
+ atomic_load_or_global_noret_32>;
+defm AtomicXorPat : AtomicPat <RAT_ATOMIC_XOR_NORET,
+ atomic_load_xor_global_noret_32>;
+defm AtomicIncAddPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_NORET,
atomic_load_add_global_noret_32, 1>;
-defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_RTN,
- RAT_ATOMIC_INC_UINT_NORET,
- atomic_load_sub_global_ret_32,
+defm AtomicIncSubPat : AtomicIncDecPat <RAT_ATOMIC_INC_UINT_NORET,
atomic_load_sub_global_noret_32, -1>;
-defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
- RAT_ATOMIC_DEC_UINT_NORET,
- atomic_load_add_global_ret_32,
+defm AtomicDecAddPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_NORET,
atomic_load_add_global_noret_32, -1>;
-defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_RTN,
- RAT_ATOMIC_DEC_UINT_NORET,
- atomic_load_sub_global_ret_32,
+defm AtomicDecSubPat : AtomicIncDecPat <RAT_ATOMIC_DEC_UINT_NORET,
atomic_load_sub_global_noret_32, 1>;
// Should be predicated on FeatureFP64
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 90f26e514f54..bb0aa648ff90 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -203,7 +203,7 @@ multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit Ha
}
class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
- bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+ bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs regClass:$vdst),
!con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)),
@@ -224,10 +224,10 @@ class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass,
}
multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass,
- bit HasTiedOutput = 0, bit HasSignedOffset = 0> {
- def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>,
+ bit HasTiedOutput = 0> {
+ def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>,
+ def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>,
GlobalSaddrTable<1, opName>;
}
@@ -241,7 +241,7 @@ multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> {
}
class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
- bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo<
+ bit EnableSaddr = 0> : FLAT_Pseudo<
opName,
(outs),
!con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)),
@@ -258,11 +258,10 @@ class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass,
let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", "");
}
-multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass,
- bit HasSignedOffset = 0> {
- def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>,
+multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> {
+ def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>,
GlobalSaddrTable<0, opName>;
- def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>,
+ def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>,
GlobalSaddrTable<1, opName>;
}
@@ -353,8 +352,6 @@ class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins,
let mayStore = 1;
let has_glc = 0;
let glcValue = 0;
- let has_dlc = 0;
- let dlcValue = 0;
let has_vdst = 0;
let has_sccb = 1;
let sccbValue = 0;
@@ -368,7 +365,6 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
let hasPostISelHook = 1;
let has_vdst = 1;
let glcValue = 1;
- let dlcValue = 0;
let sccbValue = 0;
let IsAtomicNoRet = 0;
let IsAtomicRet = 1;
@@ -412,7 +408,6 @@ multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
string opName,
RegisterClass vdst_rc,
ValueType vt,
- SDPatternOperator atomic = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc,
bit isFP = isFloatType<data_vt>.ret,
@@ -483,11 +478,10 @@ multiclass FLAT_Global_Atomic_Pseudo<
RegisterClass vdst_rc,
ValueType vt,
SDPatternOperator atomic_rtn = null_frag,
- SDPatternOperator atomic_no_rtn = null_frag,
ValueType data_vt = vt,
RegisterClass data_rc = vdst_rc> {
let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in {
- defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>;
+ defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>;
defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>;
}
}
@@ -668,12 +662,11 @@ defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d
let is_flat_global = 1 in {
defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap",
- VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag,
+ VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32,
v2i32, VReg_64>;
defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2",
VReg_64, i64, AMDGPUatomic_cmp_swap_global_64,
- null_frag,
v2i64, VReg_128>;
defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap",
@@ -786,17 +779,17 @@ defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_shor
let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in {
defm GLOBAL_ATOMIC_FCMPSWAP :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>;
defm GLOBAL_ATOMIC_FMIN :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin>;
defm GLOBAL_ATOMIC_FMAX :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax>;
defm GLOBAL_ATOMIC_FCMPSWAP_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>;
defm GLOBAL_ATOMIC_FMIN_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin>;
defm GLOBAL_ATOMIC_FMAX_X2 :
- FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>;
+ FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64, int_amdgcn_global_atomic_fmax>;
} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1
let is_flat_global = 1 in {
@@ -1237,6 +1230,13 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>;
+let OtherPredicates = [isGFX10Plus] in {
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", atomic_load_fmin_global_32, f32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", atomic_load_fmax_global_32, f32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", atomic_load_fmin_global_64, f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", atomic_load_fmax_global_64, f64>;
+}
+
let OtherPredicates = [HasAtomicFaddInsts] in {
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>;
defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 2bf365168048..a8c85ec4e5ea 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -612,8 +612,7 @@ bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (auto &MBB : MF) {
- for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
- auto &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(llvm::reverse(MBB))) {
if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
Changed = true;
++NumDPPMovsCombined;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index bc2fb1e9770c..ff5d0b0af6a4 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -349,20 +349,16 @@ void GCNHazardRecognizer::AdvanceCycle() {
return;
}
- // Do not track non-instructions which do not affect the wait states.
- // If included, these instructions can lead to buffer overflow such that
- // detectable hazards are missed.
- if (CurrCycleInstr->isMetaInstruction()) {
- CurrCycleInstr = nullptr;
- return;
- }
-
if (CurrCycleInstr->isBundle()) {
processBundle();
return;
}
unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr);
+ if (!NumWaitStates) {
+ CurrCycleInstr = nullptr;
+ return;
+ }
// Keep track of emitted instructions
EmittedInstrs.push_front(CurrCycleInstr);
@@ -409,7 +405,7 @@ static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard,
if (IsHazard(*I))
return WaitStates;
- if (I->isInlineAsm() || I->isMetaInstruction())
+ if (I->isInlineAsm())
continue;
WaitStates += SIInstrInfo::getNumWaitStates(*I);
@@ -1549,7 +1545,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
}
int GCNHazardRecognizer::checkMAILdStHazards(MachineInstr *MI) {
- // On gfx90a+ releveant hazards are checked in checkMAIVALUHazards()
+ // On gfx90a+ relevant hazards are checked in checkMAIVALUHazards()
if (!ST.hasMAIInsts() || ST.hasGFX90AInsts())
return 0;
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index f3f9eb53355f..86924667084d 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -188,7 +188,7 @@ public:
printRegion(dbgs(), Rgn.Begin, Rgn.End, Sch.LIS, 2));
Sch.BaseClass::schedule();
- // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
+ // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
Sch.RegionEnd = Rgn.End;
//assert(Rgn.End == Sch.RegionEnd);
Rgn.Begin = Sch.RegionBegin;
@@ -280,7 +280,7 @@ GCNIterativeScheduler::getSchedulePressure(const Region &R,
return RPTracker.moveMaxPressure();
}
-void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
+void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overridden
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned NumRegionInstrs) {
@@ -293,7 +293,7 @@ void GCNIterativeScheduler::enterRegion(MachineBasicBlock *BB, // overriden
}
}
-void GCNIterativeScheduler::schedule() { // overriden
+void GCNIterativeScheduler::schedule() { // overridden
// do nothing
LLVM_DEBUG(printLivenessInfo(dbgs(), RegionBegin, RegionEnd, LIS);
if (!Regions.empty() && Regions.back()->Begin == RegionBegin) {
@@ -304,7 +304,7 @@ void GCNIterativeScheduler::schedule() { // overriden
<< '\n';);
}
-void GCNIterativeScheduler::finalizeSchedule() { // overriden
+void GCNIterativeScheduler::finalizeSchedule() { // overridden
if (Regions.empty())
return;
switch (Strategy) {
@@ -391,8 +391,8 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
// and already interleaved with debug values
if (!std::is_same<decltype(*Schedule.begin()), MachineInstr*>::value) {
placeDebugValues();
- // Unfortunatelly placeDebugValues incorrectly modifies RegionEnd, restore
- //assert(R.End == RegionEnd);
+ // Unfortunately placeDebugValues incorrectly modifies RegionEnd, restore
+ // assert(R.End == RegionEnd);
RegionEnd = R.End;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
index 443472a3b99a..e82d7362a342 100644
--- a/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file defines and imlements the class GCNMinRegScheduler, which
+/// This file defines and implements the class GCNMinRegScheduler, which
/// implements an experimental, simple scheduler whose main goal is to learn
/// ways about consuming less possible registers for a region.
///
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
index a51399d7da5f..a906a4207758 100644
--- a/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This pass combines split register tuple initialization into a single psuedo:
+/// This pass combines split register tuple initialization into a single pseudo:
///
/// undef %0.sub1:sreg_64 = S_MOV_B32 1
/// %0.sub0:sreg_64 = S_MOV_B32 2
@@ -40,6 +40,7 @@ namespace {
class GCNPreRAOptimizations : public MachineFunctionPass {
private:
const SIInstrInfo *TII;
+ const SIRegisterInfo *TRI;
MachineRegisterInfo *MRI;
LiveIntervals *LIS;
@@ -85,32 +86,107 @@ bool GCNPreRAOptimizations::processReg(Register Reg) {
MachineInstr *Def0 = nullptr;
MachineInstr *Def1 = nullptr;
uint64_t Init = 0;
+ bool Changed = false;
+ SmallSet<Register, 32> ModifiedRegs;
+ bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
for (MachineInstr &I : MRI->def_instructions(Reg)) {
- if (I.getOpcode() != AMDGPU::S_MOV_B32 || I.getOperand(0).getReg() != Reg ||
- !I.getOperand(1).isImm() || I.getNumOperands() != 2)
- return false;
-
- switch (I.getOperand(0).getSubReg()) {
+ switch (I.getOpcode()) {
default:
return false;
- case AMDGPU::sub0:
- if (Def0)
+ case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
+ break;
+ case AMDGPU::COPY: {
+ // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
+ // intermdiate temporary VGPR register. Try to find the defining
+ // accvgpr_write to avoid temporary registers.
+
+ if (!IsAGPRDst)
return false;
- Def0 = &I;
- Init |= I.getOperand(1).getImm() & 0xffffffff;
+
+ Register SrcReg = I.getOperand(1).getReg();
+
+ if (!SrcReg.isVirtual())
+ break;
+
+ // Check if source of copy is from another AGPR.
+ bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
+ if (!IsAGPRSrc)
+ break;
+
+ // def_instructions() does not look at subregs so it may give us a
+ // different instruction that defines the same vreg but different subreg
+ // so we have to manually check subreg.
+ Register SrcSubReg = I.getOperand(1).getSubReg();
+ for (auto &Def : MRI->def_instructions(SrcReg)) {
+ if (SrcSubReg != Def.getOperand(0).getSubReg())
+ continue;
+
+ if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
+ MachineOperand DefSrcMO = Def.getOperand(1);
+
+ // Immediates are not an issue and can be propagated in
+ // postrapseudos pass. Only handle cases where defining
+ // accvgpr_write source is a vreg.
+ if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
+ // Propagate source reg of accvgpr write to this copy instruction
+ I.getOperand(1).setReg(DefSrcMO.getReg());
+ I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
+
+ // Reg uses were changed, collect unique set of registers to update
+ // live intervals at the end.
+ ModifiedRegs.insert(DefSrcMO.getReg());
+ ModifiedRegs.insert(SrcReg);
+
+ Changed = true;
+ }
+
+ // Found the defining accvgpr_write, stop looking any further.
+ break;
+ }
+ }
break;
- case AMDGPU::sub1:
- if (Def1)
+ }
+ case AMDGPU::S_MOV_B32:
+ if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
+ I.getNumOperands() != 2)
return false;
- Def1 = &I;
- Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
+
+ switch (I.getOperand(0).getSubReg()) {
+ default:
+ return false;
+ case AMDGPU::sub0:
+ if (Def0)
+ return false;
+ Def0 = &I;
+ Init |= I.getOperand(1).getImm() & 0xffffffff;
+ break;
+ case AMDGPU::sub1:
+ if (Def1)
+ return false;
+ Def1 = &I;
+ Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
+ break;
+ }
break;
}
}
+ // For AGPR reg, check if live intervals need to be updated.
+ if (IsAGPRDst) {
+ if (Changed) {
+ for (Register RegToUpdate : ModifiedRegs) {
+ LIS->removeInterval(RegToUpdate);
+ LIS->createAndComputeVirtRegInterval(RegToUpdate);
+ }
+ }
+
+ return Changed;
+ }
+
+ // For SGPR reg, check if we can combine instructions.
if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
- return false;
+ return Changed;
LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
<< " =>\n");
@@ -144,7 +220,7 @@ bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
LIS = &getAnalysis<LiveIntervals>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ TRI = ST.getRegisterInfo();
bool Changed = false;
@@ -153,8 +229,10 @@ bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
if (!LIS->hasInterval(Reg))
continue;
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- if (RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC))
+ if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
+ (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
continue;
+
Changed |= processReg(Reg);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 0212b8e17641..75855a7a4f9c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -26,32 +26,36 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) {
GenericScheduler::initialize(DAG);
- const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
-
MF = &DAG->MF;
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
// FIXME: This is also necessary, because some passes that run after
// scheduling and before regalloc increase register pressure.
- const int ErrorMargin = 3;
-
- SGPRExcessLimit = Context->RegClassInfo
- ->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass) - ErrorMargin;
- VGPRExcessLimit = Context->RegClassInfo
- ->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass) - ErrorMargin;
- if (TargetOccupancy) {
- SGPRCriticalLimit = ST.getMaxNumSGPRs(TargetOccupancy, true);
- VGPRCriticalLimit = ST.getMaxNumVGPRs(TargetOccupancy);
- } else {
- SGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
- AMDGPU::RegisterPressureSets::SReg_32);
- VGPRCriticalLimit = SRI->getRegPressureSetLimit(DAG->MF,
- AMDGPU::RegisterPressureSets::VGPR_32);
- }
-
- SGPRCriticalLimit -= ErrorMargin;
- VGPRCriticalLimit -= ErrorMargin;
+ const unsigned ErrorMargin = 3;
+
+ SGPRExcessLimit =
+ Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
+ VGPRExcessLimit =
+ Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
+
+ SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
+ // Set the initial TargetOccupnacy to the maximum occupancy that we can
+ // achieve for this function. This effectively sets a lower bound on the
+ // 'Critical' register limits in the scheduler.
+ TargetOccupancy = MFI.getOccupancy();
+ SGPRCriticalLimit =
+ std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
+ VGPRCriticalLimit =
+ std::min(ST.getMaxNumVGPRs(TargetOccupancy), VGPRExcessLimit);
+
+ // Subtract error margin from register limits and avoid overflow.
+ SGPRCriticalLimit =
+ std::min(SGPRCriticalLimit - ErrorMargin, SGPRCriticalLimit);
+ VGPRCriticalLimit =
+ std::min(VGPRCriticalLimit - ErrorMargin, VGPRCriticalLimit);
+ SGPRExcessLimit = std::min(SGPRExcessLimit - ErrorMargin, SGPRExcessLimit);
+ VGPRExcessLimit = std::min(VGPRExcessLimit - ErrorMargin, VGPRExcessLimit);
}
void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
@@ -117,7 +121,7 @@ void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU
// Register pressure is considered 'CRITICAL' if it is approaching a value
// that would reduce the wave occupancy for the execution unit. When
- // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
+ // register pressure is 'CRITICAL', increasing SGPR and VGPR pressure both
// has the same cost, so we don't need to prefer one over the other.
int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
@@ -361,14 +365,18 @@ void GCNScheduleDAGMILive::schedule() {
LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n");
return;
}
- unsigned Occ = MFI.getOccupancy();
- unsigned WavesAfter = std::min(Occ, PressureAfter.getOccupancy(ST));
- unsigned WavesBefore = std::min(Occ, PressureBefore.getOccupancy(ST));
+
+ unsigned WavesAfter =
+ std::min(S.TargetOccupancy, PressureAfter.getOccupancy(ST));
+ unsigned WavesBefore =
+ std::min(S.TargetOccupancy, PressureBefore.getOccupancy(ST));
LLVM_DEBUG(dbgs() << "Occupancy before scheduling: " << WavesBefore
<< ", after " << WavesAfter << ".\n");
- // We could not keep current target occupancy because of the just scheduled
- // region. Record new occupancy for next scheduling cycle.
+ // We may not be able to keep the current target occupancy because of the just
+ // scheduled region. We might still be able to revert scheduling if the
+ // occupancy before was higher, or if the current schedule has register
+ // pressure higher than the excess limits which could lead to more spilling.
unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
// Allow memory bound functions to drop to 4 waves if not limited by an
// attribute.
@@ -378,6 +386,7 @@ void GCNScheduleDAGMILive::schedule() {
<< MFI.getMinAllowedOccupancy() << " waves\n");
NewOccupancy = WavesAfter;
}
+
if (NewOccupancy < MinOccupancy) {
MinOccupancy = NewOccupancy;
MFI.limitOccupancy(MinOccupancy);
@@ -394,6 +403,11 @@ void GCNScheduleDAGMILive::schedule() {
RegionsWithHighRP[RegionIdx] = true;
}
+ // If this condition is true, then either the occupancy before and after
+ // scheduling is the same, or we are allowing the occupancy to drop because
+ // the function is memory bound. Even if we are OK with the current occupancy,
+ // we still need to verify that we will not introduce any extra chance of
+ // spilling.
if (WavesAfter >= MinOccupancy) {
if (Stage == UnclusteredReschedule &&
!PressureAfter.less(ST, PressureBefore)) {
@@ -540,7 +554,6 @@ GCNScheduleDAGMILive::getBBLiveInMap() const {
}
void GCNScheduleDAGMILive::finalizeSchedule() {
- GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl;
LLVM_DEBUG(dbgs() << "All regions recorded, starting actual scheduling.\n");
LiveIns.resize(Regions.size());
@@ -586,8 +599,6 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
dbgs()
<< "Retrying function scheduling with lowest recorded occupancy "
<< MinOccupancy << ".\n");
-
- S.setTargetOccupancy(MinOccupancy);
}
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 15eba3f5eac0..53d6ff0aa731 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -54,7 +54,7 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler {
// before a region scheduling to know if the region had such clusters.
bool HasClusteredNodes;
- // schedule() have seen a an excess register pressure and had to track
+ // schedule() have seen an excess register pressure and had to track
// register pressure for actual scheduling heuristics.
bool HasExcessPressure;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index bd0c40081c01..d8bc0b2df2bd 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -21,13 +21,6 @@
#include "SIInstrInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-namespace llvm {
-
-class MCInst;
-class MCInstrInfo;
-
-} // namespace llvm
-
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
@@ -104,7 +97,6 @@ protected:
bool FP64;
bool FMA;
bool MIMG_R128;
- bool IsGCN;
bool CIInsts;
bool GFX8Insts;
bool GFX9Insts;
@@ -172,13 +164,8 @@ protected:
bool HasArchitectedFlatScratch;
bool AddNoCarryInsts;
bool HasUnpackedD16VMem;
- bool R600ALUInst;
- bool CaymanISA;
- bool CFALUBug;
bool LDSMisalignedBug;
bool HasMFMAInlineLiteralBug;
- bool HasVertexCache;
- short TexVTXClauseSize;
bool UnalignedBufferAccess;
bool UnalignedDSAccess;
bool HasPackedTID;
@@ -272,7 +259,7 @@ public:
return (Generation)Gen;
}
- /// Return the number of high bits known to be zero fror a frame index.
+ /// Return the number of high bits known to be zero for a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
return countLeadingZeros(MaxWaveScratchSize) + getWavefrontSizeLog2();
}
@@ -612,7 +599,7 @@ public:
}
/// Return if most LDS instructions have an m0 use that require m0 to be
- /// iniitalized.
+ /// initialized.
bool ldsRequiresM0Init() const {
return getGeneration() < GFX9;
}
@@ -753,7 +740,7 @@ public:
}
// Scratch is allocated in 256 dword per wave blocks for the entire
- // wavefront. When viewed from the perspecive of an arbitrary workitem, this
+ // wavefront. When viewed from the perspective of an arbitrary workitem, this
// is 4-byte aligned.
//
// Only 4-byte alignment is really needed to access anything. Transformations
@@ -818,9 +805,7 @@ public:
return HasScalarAtomics;
}
- bool hasLDSFPAtomics() const {
- return GFX8Insts;
- }
+ bool hasLDSFPAtomicAdd() const { return GFX8Insts; }
/// \returns true if the subtarget has the v_permlanex16_b32 instruction.
bool hasPermLaneX16() const { return getGeneration() >= GFX10; }
@@ -1139,6 +1124,9 @@ public:
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+ std::unique_ptr<ScheduleDAGMutation>
+ createFillMFMAShadowMutation(const TargetInstrInfo *TII) const;
+
bool isWave32() const {
return getWavefrontSize() == 32;
}
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
new file mode 100644
index 000000000000..f3f664f7972a
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -0,0 +1,361 @@
+//===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file implements methods from the AMDGPUCustomBehaviour class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUCustomBehaviour.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIInstrInfo.h"
+#include "TargetInfo/AMDGPUTargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/WithColor.h"
+
+namespace llvm {
+namespace mca {
+
+void AMDGPUInstrPostProcess::postProcessInstruction(
+ std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
+ switch (MCI.getOpcode()) {
+ case AMDGPU::S_WAITCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_VSCNT:
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx6_gfx7:
+ case AMDGPU::S_WAITCNT_vi:
+ return processWaitCnt(Inst, MCI);
+ }
+}
+
+// s_waitcnt instructions encode important information as immediate operands
+// which are lost during the MCInst -> mca::Instruction lowering.
+void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
+ const MCInst &MCI) {
+ for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
+ MCAOperand Op;
+ const MCOperand &MCOp = MCI.getOperand(Idx);
+ if (MCOp.isReg()) {
+ Op = MCAOperand::createReg(MCOp.getReg());
+ } else if (MCOp.isImm()) {
+ Op = MCAOperand::createImm(MCOp.getImm());
+ }
+ Op.setIndex(Idx);
+ Inst->addOperand(Op);
+ }
+}
+
+AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII)
+ : CustomBehaviour(STI, SrcMgr, MCII) {
+ generateWaitCntInfo();
+}
+
+unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
+ const InstRef &IR) {
+ const Instruction &Inst = *IR.getInstruction();
+ unsigned Opcode = Inst.getOpcode();
+
+ // llvm-mca is generally run on fully compiled assembly so we wouldn't see any
+ // pseudo instructions here. However, there are plans for the future to make
+ // it possible to use mca within backend passes. As such, I have left the
+ // pseudo version of s_waitcnt within this switch statement.
+ switch (Opcode) {
+ default:
+ return 0;
+ case AMDGPU::S_WAITCNT: // This instruction
+ case AMDGPU::S_WAITCNT_EXPCNT:
+ case AMDGPU::S_WAITCNT_LGKMCNT:
+ case AMDGPU::S_WAITCNT_VMCNT:
+ case AMDGPU::S_WAITCNT_VSCNT: // to this instruction are all pseudo.
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx6_gfx7:
+ case AMDGPU::S_WAITCNT_vi:
+ // s_endpgm also behaves as if there is an implicit
+ // s_waitcnt 0, but I'm not sure if it would be appropriate
+ // to model this in llvm-mca based on how the iterations work
+ // while simulating the pipeline over and over.
+ return handleWaitCnt(IssuedInst, IR);
+ }
+
+ return 0;
+}
+
+unsigned AMDGPUCustomBehaviour::handleWaitCnt(ArrayRef<InstRef> IssuedInst,
+ const InstRef &IR) {
+ // Currently, all s_waitcnt instructions are handled except s_waitcnt_depctr.
+ // I do not know how that instruction works so I did not attempt to model it.
+ // set the max values to begin
+ unsigned Vmcnt = 63;
+ unsigned Expcnt = 7;
+ unsigned Lgkmcnt = 31;
+ unsigned Vscnt = 63;
+ unsigned CurrVmcnt = 0;
+ unsigned CurrExpcnt = 0;
+ unsigned CurrLgkmcnt = 0;
+ unsigned CurrVscnt = 0;
+ unsigned CyclesToWaitVm = ~0U;
+ unsigned CyclesToWaitExp = ~0U;
+ unsigned CyclesToWaitLgkm = ~0U;
+ unsigned CyclesToWaitVs = ~0U;
+
+ computeWaitCnt(IR, Vmcnt, Expcnt, Lgkmcnt, Vscnt);
+
+ // We will now look at each of the currently executing instructions
+ // to find out if this wait instruction still needs to wait.
+ for (auto I = IssuedInst.begin(), E = IssuedInst.end(); I != E; I++) {
+ const InstRef &PrevIR = *I;
+ const Instruction &PrevInst = *PrevIR.getInstruction();
+ const unsigned PrevInstIndex = PrevIR.getSourceIndex() % SrcMgr.size();
+ const WaitCntInfo &PrevInstWaitInfo = InstrWaitCntInfo[PrevInstIndex];
+ const int CyclesLeft = PrevInst.getCyclesLeft();
+ assert(CyclesLeft != UNKNOWN_CYCLES &&
+ "We should know how many cycles are left for this instruction");
+ if (PrevInstWaitInfo.VmCnt) {
+ CurrVmcnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitVm)
+ CyclesToWaitVm = CyclesLeft;
+ }
+ if (PrevInstWaitInfo.ExpCnt) {
+ CurrExpcnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitExp)
+ CyclesToWaitExp = CyclesLeft;
+ }
+ if (PrevInstWaitInfo.LgkmCnt) {
+ CurrLgkmcnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitLgkm)
+ CyclesToWaitLgkm = CyclesLeft;
+ }
+ if (PrevInstWaitInfo.VsCnt) {
+ CurrVscnt++;
+ if ((unsigned)CyclesLeft < CyclesToWaitVs)
+ CyclesToWaitVs = CyclesLeft;
+ }
+ }
+
+ unsigned CyclesToWait = ~0U;
+ if (CurrVmcnt > Vmcnt && CyclesToWaitVm < CyclesToWait)
+ CyclesToWait = CyclesToWaitVm;
+ if (CurrExpcnt > Expcnt && CyclesToWaitExp < CyclesToWait)
+ CyclesToWait = CyclesToWaitExp;
+ if (CurrLgkmcnt > Lgkmcnt && CyclesToWaitLgkm < CyclesToWait)
+ CyclesToWait = CyclesToWaitLgkm;
+ if (CurrVscnt > Vscnt && CyclesToWaitVs < CyclesToWait)
+ CyclesToWait = CyclesToWaitVs;
+
+ // We may underestimate how many cycles we need to wait, but this
+ // isn't a big deal. Our return value is just how many cycles until
+ // this function gets run again. So as long as we don't overestimate
+ // the wait time, we'll still end up stalling at this instruction
+ // for the correct number of cycles.
+
+ if (CyclesToWait == ~0U)
+ return 0;
+ return CyclesToWait;
+}
+
+void AMDGPUCustomBehaviour::computeWaitCnt(const InstRef &IR, unsigned &Vmcnt,
+ unsigned &Expcnt, unsigned &Lgkmcnt,
+ unsigned &Vscnt) {
+ AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
+ const Instruction &Inst = *IR.getInstruction();
+ unsigned Opcode = Inst.getOpcode();
+
+ switch (Opcode) {
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10: {
+ // Should probably be checking for nullptr
+ // here, but I'm not sure how I should handle the case
+ // where we see a nullptr.
+ const MCAOperand *OpReg = Inst.getOperand(0);
+ const MCAOperand *OpImm = Inst.getOperand(1);
+ assert(OpReg && OpReg->isReg() && "First operand should be a register.");
+ assert(OpImm && OpImm->isImm() && "Second operand should be an immediate.");
+ if (OpReg->getReg() != AMDGPU::SGPR_NULL) {
+ // Instruction is using a real register.
+ // Since we can't know what value this register will have,
+ // we can't compute what the value of this wait should be.
+ WithColor::warning() << "The register component of "
+ << MCII.getName(Opcode) << " will be completely "
+ << "ignored. So the wait may not be accurate.\n";
+ }
+ switch (Opcode) {
+ // Redundant switch so I don't have to repeat the code above
+ // for each case. There are more clever ways to avoid this
+ // extra switch and anyone can feel free to implement one of them.
+ case AMDGPU::S_WAITCNT_EXPCNT_gfx10:
+ Expcnt = OpImm->getImm();
+ break;
+ case AMDGPU::S_WAITCNT_LGKMCNT_gfx10:
+ Lgkmcnt = OpImm->getImm();
+ break;
+ case AMDGPU::S_WAITCNT_VMCNT_gfx10:
+ Vmcnt = OpImm->getImm();
+ break;
+ case AMDGPU::S_WAITCNT_VSCNT_gfx10:
+ Vscnt = OpImm->getImm();
+ break;
+ }
+ return;
+ }
+ case AMDGPU::S_WAITCNT_gfx10:
+ case AMDGPU::S_WAITCNT_gfx6_gfx7:
+ case AMDGPU::S_WAITCNT_vi:
+ unsigned WaitCnt = Inst.getOperand(0)->getImm();
+ AMDGPU::decodeWaitcnt(IV, WaitCnt, Vmcnt, Expcnt, Lgkmcnt);
+ return;
+ }
+}
+
+void AMDGPUCustomBehaviour::generateWaitCntInfo() {
+ // The core logic from this function is taken from
+ // SIInsertWaitcnts::updateEventWaitcntAfter() In that pass, the instructions
+ // that are being looked at are in the MachineInstr format, whereas we have
+ // access to the MCInst format. The side effects of this are that we can't use
+ // the mayAccessVMEMThroughFlat(Inst) or mayAccessLDSThroughFlat(Inst)
+ // functions. Therefore, we conservatively assume that these functions will
+ // return true. This may cause a few instructions to be incorrectly tagged
+ // with an extra CNT. However, these are instructions that do interact with at
+ // least one CNT so giving them an extra CNT shouldn't cause issues in most
+ // scenarios.
+ AMDGPU::IsaVersion IV = AMDGPU::getIsaVersion(STI.getCPU());
+ InstrWaitCntInfo.resize(SrcMgr.size());
+
+ int Index = 0;
+ for (auto I = SrcMgr.begin(), E = SrcMgr.end(); I != E; ++I, ++Index) {
+ const std::unique_ptr<Instruction> &Inst = *I;
+ unsigned Opcode = Inst->getOpcode();
+ const MCInstrDesc &MCID = MCII.get(Opcode);
+ if ((MCID.TSFlags & SIInstrFlags::DS) &&
+ (MCID.TSFlags & SIInstrFlags::LGKM_CNT)) {
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ if (isAlwaysGDS(Opcode) || hasModifiersSet(Inst, AMDGPU::OpName::gds))
+ InstrWaitCntInfo[Index].ExpCnt = true;
+ } else if (MCID.TSFlags & SIInstrFlags::FLAT) {
+ // We conservatively assume that mayAccessVMEMThroughFlat(Inst)
+ // and mayAccessLDSThroughFlat(Inst) would both return true for this
+ // instruction. We have to do this because those functions use
+ // information about the memory operands that we don't have access to.
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ if (!STI.hasFeature(AMDGPU::FeatureVscnt))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else if (MCID.mayLoad() && !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else
+ InstrWaitCntInfo[Index].VsCnt = true;
+ } else if (isVMEM(MCID) && !AMDGPU::getMUBUFIsBufferInv(Opcode)) {
+ if (!STI.hasFeature(AMDGPU::FeatureVscnt))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else if ((MCID.mayLoad() &&
+ !(MCID.TSFlags & SIInstrFlags::IsAtomicNoRet)) ||
+ ((MCID.TSFlags & SIInstrFlags::MIMG) && !MCID.mayLoad() &&
+ !MCID.mayStore()))
+ InstrWaitCntInfo[Index].VmCnt = true;
+ else if (MCID.mayStore())
+ InstrWaitCntInfo[Index].VsCnt = true;
+
+ // (IV.Major < 7) is meant to represent
+ // GCNTarget.vmemWriteNeedsExpWaitcnt()
+ // which is defined as
+ // { return getGeneration() < SEA_ISLANDS; }
+ if (IV.Major < 7 &&
+ (MCID.mayStore() || (MCID.TSFlags & SIInstrFlags::IsAtomicRet)))
+ InstrWaitCntInfo[Index].ExpCnt = true;
+ } else if (MCID.TSFlags & SIInstrFlags::SMRD) {
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ } else if (MCID.TSFlags & SIInstrFlags::EXP) {
+ InstrWaitCntInfo[Index].ExpCnt = true;
+ } else {
+ switch (Opcode) {
+ case AMDGPU::S_SENDMSG:
+ case AMDGPU::S_SENDMSGHALT:
+ case AMDGPU::S_MEMTIME:
+ case AMDGPU::S_MEMREALTIME:
+ InstrWaitCntInfo[Index].LgkmCnt = true;
+ break;
+ }
+ }
+ }
+}
+
+// taken from SIInstrInfo::isVMEM()
+bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
+ return MCID.TSFlags & SIInstrFlags::MUBUF ||
+ MCID.TSFlags & SIInstrFlags::MTBUF ||
+ MCID.TSFlags & SIInstrFlags::MIMG;
+}
+
+// taken from SIInstrInfo::hasModifiersSet()
+bool AMDGPUCustomBehaviour::hasModifiersSet(
+ const std::unique_ptr<Instruction> &Inst, unsigned OpName) const {
+ int Idx = AMDGPU::getNamedOperandIdx(Inst->getOpcode(), OpName);
+ if (Idx == -1)
+ return false;
+
+ const MCAOperand *Op = Inst->getOperand(Idx);
+ if (Op == nullptr || !Op->isImm() || !Op->getImm())
+ return false;
+
+ return true;
+}
+
+// taken from SIInstrInfo::isAlwaysGDS()
+bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const {
+ return Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::DS_GWS_INIT ||
+ Opcode == AMDGPU::DS_GWS_SEMA_V || Opcode == AMDGPU::DS_GWS_SEMA_BR ||
+ Opcode == AMDGPU::DS_GWS_SEMA_P ||
+ Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
+ Opcode == AMDGPU::DS_GWS_BARRIER;
+}
+
+} // namespace mca
+} // namespace llvm
+
+using namespace llvm;
+using namespace mca;
+
+static CustomBehaviour *
+createAMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr,
+ const MCInstrInfo &MCII) {
+ return new AMDGPUCustomBehaviour(STI, SrcMgr, MCII);
+}
+
+static InstrPostProcess *
+createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI,
+ const MCInstrInfo &MCII) {
+ return new AMDGPUInstrPostProcess(STI, MCII);
+}
+
+/// Extern function to initialize the targets for the AMDGPU backend
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() {
+ TargetRegistry::RegisterCustomBehaviour(getTheAMDGPUTarget(),
+ createAMDGPUCustomBehaviour);
+ TargetRegistry::RegisterInstrPostProcess(getTheAMDGPUTarget(),
+ createAMDGPUInstrPostProcess);
+
+ TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
+ createAMDGPUCustomBehaviour);
+ TargetRegistry::RegisterInstrPostProcess(getTheGCNTarget(),
+ createAMDGPUInstrPostProcess);
+}
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
new file mode 100644
index 000000000000..56650515bd0a
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
@@ -0,0 +1,103 @@
+//===------------------- AMDGPUCustomBehaviour.h ----------------*-C++ -* -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file defines the AMDGPUCustomBehaviour class which inherits from
+/// CustomBehaviour. This class is used by the tool llvm-mca to enforce
+/// target specific behaviour that is not expressed well enough in the
+/// scheduling model for mca to enforce it automatically.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCA_AMDGPUCUSTOMBEHAVIOUR_H
+#define LLVM_LIB_TARGET_AMDGPU_MCA_AMDGPUCUSTOMBEHAVIOUR_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MCA/CustomBehaviour.h"
+#include "llvm/Support/TargetParser.h"
+
+namespace llvm {
+namespace mca {
+
+class AMDGPUInstrPostProcess : public InstrPostProcess {
+ void processWaitCnt(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
+
+public:
+ AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
+ : InstrPostProcess(STI, MCII) {}
+
+ ~AMDGPUInstrPostProcess() {}
+
+ void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
+ const MCInst &MCI) override;
+};
+
+struct WaitCntInfo {
+ bool VmCnt = false;
+ bool ExpCnt = false;
+ bool LgkmCnt = false;
+ bool VsCnt = false;
+};
+
+class AMDGPUCustomBehaviour : public CustomBehaviour {
+ /// Whenever MCA would like to dispatch an s_waitcnt instructions,
+ /// we must check all the instruction that are still executing to see if
+ /// they modify the same CNT as we need to wait for. This vector
+ /// gets built in the constructor and contains 1 WaitCntInfo struct
+ /// for each instruction within the SrcManager. Each element
+ /// tells us which CNTs that instruction may interact with.
+ /// We conservatively assume some instructions interact with more
+ /// CNTs than they do in reality, so we will occasionally wait
+ /// longer than necessary, but we shouldn't ever wait for shorter.
+ std::vector<WaitCntInfo> InstrWaitCntInfo;
+
+ /// This method gets called from the constructor and is
+ /// where we setup the InstrWaitCntInfo vector.
+ /// The core logic for determining which CNTs an instruction
+ /// interacts with is taken from SIInsertWaitcnts::updateEventWaitcntAfter().
+ /// Unfortunately, some of the logic from that function is not available to us
+ /// in this scope so we conservatively end up assuming that some
+ /// instructions interact with more CNTs than they do in reality.
+ void generateWaitCntInfo();
+ /// Helper function used in generateWaitCntInfo()
+ bool hasModifiersSet(const std::unique_ptr<Instruction> &Inst,
+ unsigned OpName) const;
+ /// Helper function used in generateWaitCntInfo()
+ bool isAlwaysGDS(uint16_t Opcode) const;
+ /// Helper function used in generateWaitCntInfo()
+ bool isVMEM(const MCInstrDesc &MCID);
+ /// This method gets called from checkCustomHazard when mca is attempting to
+ /// dispatch an s_waitcnt instruction (or one of its variants). The method
+ /// looks at each of the instructions that are still executing in the pipeline
+ /// to determine if the waitcnt should force a wait.
+ unsigned handleWaitCnt(ArrayRef<InstRef> IssuedInst, const InstRef &IR);
+ /// Based on the type of s_waitcnt instruction we are looking at, and what its
+ /// operands are, this method will set the values for each of the cnt
+ /// references provided as arguments.
+ void computeWaitCnt(const InstRef &IR, unsigned &Vmcnt, unsigned &Expcnt,
+ unsigned &Lgkmcnt, unsigned &Vscnt);
+
+public:
+ AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
+ const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII);
+
+ ~AMDGPUCustomBehaviour() {}
+ /// This method is used to determine if an instruction
+ /// should be allowed to be dispatched. The return value is
+ /// how many cycles until the instruction can be dispatched.
+ /// This method is called after MCA has already checked for
+ /// register and hardware dependencies so this method should only
+ /// implement custom behaviour and dependencies that are not picked up
+ /// by MCA naturally.
+ unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst,
+ const InstRef &IR) override;
+};
+} // namespace mca
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index dd0db6c7b655..50318a59225d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -15,8 +15,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -44,7 +44,8 @@ public:
const MCSubtargetInfo &STI) const override;
unsigned getMinimumNopSize() const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
};
@@ -169,7 +170,8 @@ unsigned AMDGPUAsmBackend::getMinimumNopSize() const {
return 4;
}
-bool AMDGPUAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool AMDGPUAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// If the count is not 4-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
index b56f75132135..e09e2dca1b47 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -19,10 +19,9 @@ namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
-class MCSubtargetInfo;
class MCELFStreamer;
-class Triple;
class MCObjectWriter;
+class Triple;
MCELFStreamer *createAMDGPUELFStreamer(const Triple &T, MCContext &Context,
std::unique_ptr<MCAsmBackend> MAB,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 9ba0ffbced3d..b68b4b12e750 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -605,6 +605,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
switch (OpTy) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -631,6 +632,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
printImmediate16(Op.getImm(), STI, O);
break;
case AMDGPU::OPERAND_REG_IMM_V2INT16:
@@ -1451,208 +1453,3 @@ void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
}
#include "AMDGPUGenAsmWriter.inc"
-
-void R600InstPrinter::printInst(const MCInst *MI, uint64_t Address,
- StringRef Annot, const MCSubtargetInfo &STI,
- raw_ostream &O) {
- O.flush();
- printInstruction(MI, Address, O);
- printAnnotation(O, Annot);
-}
-
-void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
-}
-
-void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int BankSwizzle = MI->getOperand(OpNo).getImm();
- switch (BankSwizzle) {
- case 1:
- O << "BS:VEC_021/SCL_122";
- break;
- case 2:
- O << "BS:VEC_120/SCL_212";
- break;
- case 3:
- O << "BS:VEC_102/SCL_221";
- break;
- case 4:
- O << "BS:VEC_201";
- break;
- case 5:
- O << "BS:VEC_210";
- break;
- default:
- break;
- }
-}
-
-void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT");
-}
-
-void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned CT = MI->getOperand(OpNo).getImm();
- switch (CT) {
- case 0:
- O << 'U';
- break;
- case 1:
- O << 'N';
- break;
- default:
- break;
- }
-}
-
-void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- int KCacheMode = MI->getOperand(OpNo).getImm();
- if (KCacheMode > 0) {
- int KCacheBank = MI->getOperand(OpNo - 2).getImm();
- O << "CB" << KCacheBank << ':';
- int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
- int LineSize = (KCacheMode == 1) ? 16 : 32;
- O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
- }
-}
-
-void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " ");
-}
-
-void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- assert(Op.isImm() || Op.isExpr());
- if (Op.isImm()) {
- int64_t Imm = Op.getImm();
- O << Imm << '(' << BitsToFloat(Imm) << ')';
- }
- if (Op.isExpr()) {
- Op.getExpr()->print(O << '@', &MAI);
- }
-}
-
-void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-');
-}
-
-void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- switch (MI->getOperand(OpNo).getImm()) {
- default: break;
- case 1:
- O << " * 2.0";
- break;
- case 2:
- O << " * 4.0";
- break;
- case 3:
- O << " / 2.0";
- break;
- }
-}
-
-void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printOperand(MI, OpNo, O);
- O << ", ";
- printOperand(MI, OpNo + 1, O);
-}
-
-void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (OpNo >= MI->getNumOperands()) {
- O << "/*Missing OP" << OpNo << "*/";
- return;
- }
-
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- switch (Op.getReg()) {
- // This is the default predicate state, so we don't need to print it.
- case R600::PRED_SEL_OFF:
- break;
-
- default:
- O << getRegisterName(Op.getReg());
- break;
- }
- } else if (Op.isImm()) {
- O << Op.getImm();
- } else if (Op.isDFPImm()) {
- // We special case 0.0 because otherwise it will be printed as an integer.
- if (Op.getDFPImm() == 0.0)
- O << "0.0";
- else {
- O << bit_cast<double>(Op.getDFPImm());
- }
- } else if (Op.isExpr()) {
- const MCExpr *Exp = Op.getExpr();
- Exp->print(O, &MAI);
- } else {
- O << "/*INV_OP*/";
- }
-}
-
-void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+');
-}
-
-void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned Sel = MI->getOperand(OpNo).getImm();
- switch (Sel) {
- case 0:
- O << 'X';
- break;
- case 1:
- O << 'Y';
- break;
- case 2:
- O << 'Z';
- break;
- case 3:
- O << 'W';
- break;
- case 4:
- O << '0';
- break;
- case 5:
- O << '1';
- break;
- case 7:
- O << '_';
- break;
- default:
- break;
- }
-}
-
-void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,");
-}
-
-void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,");
-}
-
-void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.getImm() == 0) {
- O << " (MASKED)";
- }
-}
-
-#include "R600GenAsmWriter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 3cb4fcb28cb0..71db0beba0b6 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -240,36 +240,6 @@ protected:
raw_ostream &O);
};
-class R600InstPrinter : public MCInstPrinter {
-public:
- R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
-
- void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
- const MCSubtargetInfo &STI, raw_ostream &O) override;
- std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
- void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
-
- void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-};
-
} // End namespace llvm
#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index f3d945cc0764..93bec8aaadfd 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// CodeEmitter interface for R600 and SI codegen.
+/// CodeEmitter interface for SI codegen.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index 1a7ca7e1a330..53c724f2211a 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// CodeEmitter interface for R600 and SI codegen.
+/// CodeEmitter interface for SI codegen.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 34b2cd1fc1e4..1f917cd91b47 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -16,7 +16,8 @@
#include "AMDGPUInstPrinter.h"
#include "AMDGPUMCAsmInfo.h"
#include "AMDGPUTargetStreamer.h"
-#include "SIDefines.h"
+#include "R600InstPrinter.h"
+#include "R600MCTargetDesc.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -26,10 +27,9 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCRegister.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 71b44a509108..e5cce6045c8c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -18,6 +18,7 @@
#include <memory>
namespace llvm {
+class Target;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
@@ -26,20 +27,11 @@ class MCObjectTargetWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCTargetOptions;
-class StringRef;
-class Target;
-class Triple;
-class raw_pwrite_stream;
enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 };
MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour);
-MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- MCContext &Ctx);
-MCInstrInfo *createR600MCInstrInfo();
-
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
@@ -57,23 +49,12 @@ createAMDGPUELFObjectWriter(bool Is64Bit, uint8_t OSABI,
#define GET_REGINFO_ENUM
#include "AMDGPUGenRegisterInfo.inc"
-#define GET_REGINFO_ENUM
-#include "R600GenRegisterInfo.inc"
-
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
#include "AMDGPUGenInstrInfo.inc"
-#define GET_INSTRINFO_ENUM
-#define GET_INSTRINFO_OPERAND_ENUM
-#define GET_INSTRINFO_SCHED_ENUM
-#include "R600GenInstrInfo.inc"
-
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
-#define GET_SUBTARGETINFO_ENUM
-#include "R600GenSubtargetInfo.inc"
-
#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index cef34a5e5a59..a857fd00a855 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -17,13 +17,8 @@ struct amd_kernel_code_t;
namespace llvm {
-class DataLayout;
-class Function;
class MCELFStreamer;
class MCSymbol;
-class MDNode;
-class Module;
-class Type;
class formatted_raw_ostream;
namespace AMDGPU {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp
new file mode 100644
index 000000000000..f77ed1faf029
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp
@@ -0,0 +1,224 @@
+//===-- R600InstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// \file
+//===----------------------------------------------------------------------===//
+
+#include "R600InstPrinter.h"
+#include "AMDGPUInstPrinter.h"
+#include "R600MCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void R600InstPrinter::printInst(const MCInst *MI, uint64_t Address,
+ StringRef Annot, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ O.flush();
+ printInstruction(MI, Address, O);
+ printAnnotation(O, Annot);
+}
+
+void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
+}
+
+void R600InstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int BankSwizzle = MI->getOperand(OpNo).getImm();
+ switch (BankSwizzle) {
+ case 1:
+ O << "BS:VEC_021/SCL_122";
+ break;
+ case 2:
+ O << "BS:VEC_120/SCL_212";
+ break;
+ case 3:
+ O << "BS:VEC_102/SCL_221";
+ break;
+ case 4:
+ O << "BS:VEC_201";
+ break;
+ case 5:
+ O << "BS:VEC_210";
+ break;
+ default:
+ break;
+ }
+}
+
+void R600InstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "_SAT");
+}
+
+void R600InstPrinter::printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ unsigned CT = MI->getOperand(OpNo).getImm();
+ switch (CT) {
+ case 0:
+ O << 'U';
+ break;
+ case 1:
+ O << 'N';
+ break;
+ default:
+ break;
+ }
+}
+
+void R600InstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int KCacheMode = MI->getOperand(OpNo).getImm();
+ if (KCacheMode > 0) {
+ int KCacheBank = MI->getOperand(OpNo - 2).getImm();
+ O << "CB" << KCacheBank << ':';
+ int KCacheAddr = MI->getOperand(OpNo + 2).getImm();
+ int LineSize = (KCacheMode == 1) ? 16 : 32;
+ O << KCacheAddr * 16 << '-' << KCacheAddr * 16 + LineSize;
+ }
+}
+
+void R600InstPrinter::printLast(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "*", " ");
+}
+
+void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert(Op.isImm() || Op.isExpr());
+ if (Op.isImm()) {
+ int64_t Imm = Op.getImm();
+ O << Imm << '(' << BitsToFloat(Imm) << ')';
+ }
+ if (Op.isExpr()) {
+ Op.getExpr()->print(O << '@', &MAI);
+ }
+}
+
+void R600InstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '-');
+}
+
+void R600InstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ switch (MI->getOperand(OpNo).getImm()) {
+ default:
+ break;
+ case 1:
+ O << " * 2.0";
+ break;
+ case 2:
+ O << " * 4.0";
+ break;
+ case 3:
+ O << " / 2.0";
+ break;
+ }
+}
+
+void R600InstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo + 1, O);
+}
+
+void R600InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (OpNo >= MI->getNumOperands()) {
+ O << "/*Missing OP" << OpNo << "*/";
+ return;
+ }
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ switch (Op.getReg()) {
+ // This is the default predicate state, so we don't need to print it.
+ case R600::PRED_SEL_OFF:
+ break;
+
+ default:
+ O << getRegisterName(Op.getReg());
+ break;
+ }
+ } else if (Op.isImm()) {
+ O << Op.getImm();
+ } else if (Op.isDFPImm()) {
+ // We special case 0.0 because otherwise it will be printed as an integer.
+ if (Op.getDFPImm() == 0.0)
+ O << "0.0";
+ else {
+ O << bit_cast<double>(Op.getDFPImm());
+ }
+ } else if (Op.isExpr()) {
+ const MCExpr *Exp = Op.getExpr();
+ Exp->print(O, &MAI);
+ } else {
+ O << "/*INV_OP*/";
+ }
+}
+
+void R600InstPrinter::printRel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '+');
+}
+
+void R600InstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Sel = MI->getOperand(OpNo).getImm();
+ switch (Sel) {
+ case 0:
+ O << 'X';
+ break;
+ case 1:
+ O << 'Y';
+ break;
+ case 2:
+ O << 'Z';
+ break;
+ case 3:
+ O << 'W';
+ break;
+ case 4:
+ O << '0';
+ break;
+ case 5:
+ O << '1';
+ break;
+ case 7:
+ O << '_';
+ break;
+ default:
+ break;
+ }
+}
+
+void R600InstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "ExecMask,");
+}
+
+void R600InstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ AMDGPUInstPrinter::printIfSet(MI, OpNo, O, "Pred,");
+}
+
+void R600InstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.getImm() == 0) {
+ O << " (MASKED)";
+ }
+}
+
+#include "R600GenAsmWriter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h
new file mode 100644
index 000000000000..6c88ffd1514b
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h
@@ -0,0 +1,48 @@
+//===-- R600InstPrinter.h - AMDGPU MC Inst -> ASM interface -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600INSTPRINTER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class R600InstPrinter : public MCInstPrinter {
+public:
+ R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI)
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
+ const MCSubtargetInfo &STI, raw_ostream &O) override;
+ std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
+ void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+ void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdateExecMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index bbca8cbb742c..6fe192e95e72 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
index a4809af29daa..269209a12175 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUMCTargetDesc.h"
+#include "R600MCTargetDesc.h"
#include "llvm/MC/MCInstrInfo.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
new file mode 100644
index 000000000000..fc52cb33824f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h
@@ -0,0 +1,44 @@
+//===-- R600MCTargetDesc.h - R600 Target Descriptions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Provides R600 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600MCTARGETDESC_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_R600MCTARGETDESC_H
+
+#include <cstdint>
+
+namespace llvm {
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCRegisterInfo;
+
+MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ MCContext &Ctx);
+MCInstrInfo *createR600MCInstrInfo();
+
+} // namespace llvm
+
+#define GET_REGINFO_ENUM
+#include "R600GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
+#define GET_INSTRINFO_SCHED_ENUM
+#include "R600GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "R600GenSubtargetInfo.inc"
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index dbce4b2e872c..77f219aaa3ab 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -233,6 +233,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
switch (OpInfo.OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -255,6 +256,7 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
// FIXME Is this correct? What do inline immediates do on SI for f16 src
@@ -277,6 +279,9 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
uint32_t Encoding = getLit16Encoding(Lo16, STI);
return Encoding;
}
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16:
+ return MO.getImm();
default:
llvm_unreachable("invalid operand size");
}
@@ -341,7 +346,13 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
(bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
return;
- // Check for additional literals in SRC0/1/2 (Op 1/2/3)
+ // Do not print literals from SISrc Operands for insts with mandatory literals
+ int ImmLitIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
+ if (ImmLitIdx != -1)
+ return;
+
+ // Check for additional literals
for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
// Check if this operand should be encoded as [SV]Src
@@ -536,8 +547,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
- if (Enc != ~0U &&
- (Enc != 255 || Desc.getSize() == 4 || Desc.getSize() == 8))
+ if (Enc != ~0U)
return Enc;
} else if (MO.isImm())
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index bacb790aac62..6dd886367302 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -43,6 +43,7 @@ class MIMGBaseOpcode : PredicateControl {
bit HasD16 = 0;
bit IsAtomicRet = 0;
bit MSAA = 0;
+ bit BVH = 0;
}
def MIMGBaseOpcode : GenericEnum {
@@ -54,7 +55,7 @@ def MIMGBaseOpcodesTable : GenericTable {
let CppTypeName = "MIMGBaseOpcodeInfo";
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
- "LodOrClampOrMip", "HasD16", "MSAA"];
+ "LodOrClampOrMip", "HasD16", "MSAA", "BVH"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
let PrimaryKey = ["BaseOpcode"];
@@ -872,6 +873,14 @@ multiclass MIMG_Gather <mimgopc op, AMDGPUSampleVariant sample, bit wqm = 0,
multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
: MIMG_Gather<op, sample, 1>;
+class MIMG_IntersectRay_Helper<bit Is64, bit A16> {
+ int num_addrs = !if(Is64, !if(A16, 9, 12), !if(A16, 8, 11));
+ // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
+ // when we only need 9, 11 or 12 depending on A16 field and ptr size.
+ RegisterClass RegClass = MIMGAddrSize<num_addrs, 0>.RegClass;
+ int VAddrDwords = !srl(RegClass.Size, 5);
+}
+
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, bit A16>
: MIMG_gfx10<op.BASE, (outs VReg_128:$vdata), "AMDGPU"> {
@@ -890,8 +899,11 @@ class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(A16, "$a16", "");
}
-multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16> {
- def "" : MIMGBaseOpcode;
+multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit A16> {
+ defvar info = MIMG_IntersectRay_Helper<Is64, A16>;
+ def "" : MIMGBaseOpcode {
+ let BVH = 1;
+ }
let SubtargetPredicate = HasGFX10_AEncoding,
AssemblerPredicate = HasGFX10_AEncoding,
AsmMatchConverter = !if(A16, "cvtIntersectRay", ""),
@@ -908,13 +920,11 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, int num_addrs, bit A16>
d16 = 0,
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
VDataDwords = 4 in {
- // TODO: MIMGAddrSize will choose VReg_512 which is a 16 register tuple,
- // when we only need 9, 11 or 12 depending on A16 field and ptr size.
- def "_sa" : MIMG_IntersectRay_gfx10<op, opcode, MIMGAddrSize<num_addrs, 0>.RegClass, A16> {
- let VAddrDwords = !srl(MIMGAddrSize<num_addrs, 0>.RegClass.Size, 5);
+ def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass, A16> {
+ let VAddrDwords = info.VAddrDwords;
}
- def _nsa : MIMG_IntersectRay_nsa_gfx10<op, opcode, num_addrs, A16> {
- let VAddrDwords = num_addrs;
+ def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs, A16> {
+ let VAddrDwords = info.num_addrs;
}
}
}
@@ -949,7 +959,7 @@ defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimgopc<0x19>, "image_atomic_or">
defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimgopc<0x1a>, "image_atomic_xor">;
defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimgopc<0x1b>, "image_atomic_inc">;
defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimgopc<0x1c>, "image_atomic_dec">;
-defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 0, 1>;
+defm IMAGE_ATOMIC_FCMPSWAP : MIMG_Atomic <mimgopc<0x1d, MIMG.NOP>, "image_atomic_fcmpswap", 1, 1>;
defm IMAGE_ATOMIC_FMIN : MIMG_Atomic <mimgopc<0x1e, MIMG.NOP>, "image_atomic_fmin", 0, 1>;
defm IMAGE_ATOMIC_FMAX : MIMG_Atomic <mimgopc<0x1f, MIMG.NOP>, "image_atomic_fmax", 0, 1>;
@@ -1045,10 +1055,10 @@ defm IMAGE_SAMPLE_C_CD_CL_O_G16 : MIMG_Sampler <mimgopc<0xef>, AMDGPUSample_c_cd
let SubtargetPredicate = HasGFX10_AEncoding in
defm IMAGE_MSAA_LOAD_X : MIMG_NoSampler <mimgopc<0x80>, "image_msaa_load", 1, 0, 0, 1>;
-defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 11, 0>;
-defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 8, 1>;
-defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 12, 0>;
-defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 9, 1>;
+defm IMAGE_BVH_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 0>;
+defm IMAGE_BVH_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe6>, "image_bvh_intersect_ray", 0, 1>;
+defm IMAGE_BVH64_INTERSECT_RAY : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 0>;
+defm IMAGE_BVH64_INTERSECT_RAY_a16 : MIMG_IntersectRay<mimgopc<0xe7>, "image_bvh64_intersect_ray", 1, 1>;
/********** ========================================= **********/
/********** Table of dimension-aware image intrinsics **********/
@@ -1098,7 +1108,7 @@ def ImageDimIntrinsicTable : GenericTable {
let PrimaryKeyEarlyOut = 1;
}
-def getImageDimInstrinsicByBaseOpcode : SearchIndex {
+def getImageDimIntrinsicByBaseOpcode : SearchIndex {
let Table = ImageDimIntrinsicTable;
let Key = ["BaseOpcode", "Dim"];
}
diff --git a/llvm/lib/Target/AMDGPU/R600.h b/llvm/lib/Target/AMDGPU/R600.h
new file mode 100644
index 000000000000..2b483ae63da9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600.h
@@ -0,0 +1,50 @@
+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+/// \file
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600_H
+#define LLVM_LIB_TARGET_AMDGPU_R600_H
+
+#include "llvm/Support/CodeGen.h"
+
+namespace llvm {
+
+class FunctionPass;
+class TargetMachine;
+class ModulePass;
+class PassRegistry;
+
+// R600 Passes
+FunctionPass *createR600VectorRegMerger();
+FunctionPass *createR600ExpandSpecialInstrsPass();
+FunctionPass *createR600EmitClauseMarkers();
+FunctionPass *createR600ClauseMergePass();
+FunctionPass *createR600Packetizer();
+FunctionPass *createR600ControlFlowFinalizer();
+FunctionPass *createAMDGPUCFGStructurizerPass();
+FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
+ModulePass *createR600OpenCLImageTypeLoweringPass();
+
+void initializeR600ClauseMergePassPass(PassRegistry &);
+extern char &R600ClauseMergePassID;
+
+void initializeR600ControlFlowFinalizerPass(PassRegistry &);
+extern char &R600ControlFlowFinalizerID;
+
+void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
+extern char &R600ExpandSpecialInstrsPassID;
+
+void initializeR600VectorRegMergerPass(PassRegistry &);
+extern char &R600VectorRegMergerID;
+
+void initializeR600PacketizerPass(PassRegistry &);
+extern char &R600PacketizerID;
+
+} // End namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/AMDGPU/R600.td b/llvm/lib/Target/AMDGPU/R600.td
index 1d11da969474..45bc955d4f4c 100644
--- a/llvm/lib/Target/AMDGPU/R600.td
+++ b/llvm/lib/Target/AMDGPU/R600.td
@@ -34,6 +34,7 @@ def ALU_NULL : FuncUnit;
include "AMDGPUFeatures.td"
include "R600Schedule.td"
include "R600Processors.td"
+include "R600InstrInfo.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUInstructions.td"
include "R600Instructions.td"
diff --git a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
index a96fc7ef234e..c19e3c41485e 100644
--- a/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/R600AsmPrinter.cpp
@@ -15,7 +15,7 @@
//===----------------------------------------------------------------------===//
#include "R600AsmPrinter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
@@ -129,4 +129,3 @@ bool R600AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
return false;
}
-
diff --git a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index a19d00b62502..1d93165f9eec 100644
--- a/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -12,8 +12,8 @@
/// It needs to be called after IfCvt for best results.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Subtarget.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index ca1e61393e9a..29c37c706138 100644
--- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -12,8 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
#include <set>
diff --git a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 664e134889e9..d5eaa33ef964 100644
--- a/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -13,8 +13,8 @@
/// initiated by CF_ALU instructions.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
diff --git a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 81dc91ab922f..838a497b4df1 100644
--- a/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
diff --git a/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
new file mode 100644
index 000000000000..9f842e91c0f3
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
@@ -0,0 +1,184 @@
+//===-- R600ISelDAGToDAG.cpp - A dag to dag inst selector for R600 --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//==-----------------------------------------------------------------------===//
+//
+/// \file
+/// Defines an instruction selector for the R600 subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUISelDAGToDAG.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
+#include "R600Subtarget.h"
+#include "llvm/Analysis/ValueTracking.h"
+
+class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+ const R600Subtarget *Subtarget;
+
+ bool isConstantLoad(const MemSDNode *N, int cbID) const;
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue &IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+ SDValue &Offset);
+
+public:
+ explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel)
+ : AMDGPUDAGToDAGISel(TM, OptLevel) {}
+
+ void Select(SDNode *N) override;
+
+ bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+ bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void PreprocessISelDAG() override {}
+
+protected:
+ // Include the pieces autogenerated from the target description.
+#include "R600GenDAGISel.inc"
+};
+
+bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &MF.getSubtarget<R600Subtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+ if (!N->readMem())
+ return false;
+ if (CbId == -1)
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
+
+ return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue &IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr =
+ CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), true);
+ return true;
+ }
+ return false;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue &BaseReg,
+ SDValue &Offset) {
+ if (!isa<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+ return true;
+ }
+ return false;
+}
+
+void R600DAGToDAGISel::Select(SDNode *N) {
+ unsigned int Opc = N->getOpcode();
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return; // Already selected.
+ }
+
+ switch (Opc) {
+ default:
+ break;
+ case AMDGPUISD::BUILD_VERTICAL_VECTOR:
+ case ISD::SCALAR_TO_VECTOR:
+ case ISD::BUILD_VECTOR: {
+ EVT VT = N->getValueType(0);
+ unsigned NumVectorElts = VT.getVectorNumElements();
+ unsigned RegClassID;
+ // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
+ // that adds a 128 bits reg copy when going through TwoAddressInstructions
+ // pass. We want to avoid 128 bits copies as much as possible because they
+ // can't be bundled by our scheduler.
+ switch (NumVectorElts) {
+ case 2:
+ RegClassID = R600::R600_Reg64RegClassID;
+ break;
+ case 4:
+ if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
+ RegClassID = R600::R600_Reg128VerticalRegClassID;
+ else
+ RegClassID = R600::R600_Reg128RegClassID;
+ break;
+ default:
+ llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
+ }
+ SelectBuildVector(N, RegClassID);
+ return;
+ }
+ }
+
+ SelectCode(N);
+}
+
+bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *C;
+ SDLoc DL(Addr);
+
+ if ((C = dyn_cast<ConstantSDNode>(Addr))) {
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
+ (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
+ } else {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ }
+
+ return true;
+}
+
+bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ ConstantSDNode *IMMOffset;
+
+ if (Addr.getOpcode() == ISD::ADD &&
+ (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+ isInt<16>(IMMOffset->getZExtValue())) {
+
+ Base = Addr.getOperand(0);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ // If the pointer address is constant, we can move it to the offset field.
+ } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) &&
+ isInt<16>(IMMOffset->getZExtValue())) {
+ Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+ SDLoc(CurDAG->getEntryNode()), R600::ZERO,
+ MVT::i32);
+ Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
+ MVT::i32);
+ return true;
+ }
+
+ // Default case, no offset
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
+ return true;
+}
+
+/// This pass converts a legalized DAG into a R600-specific
+// DAG, ready for instruction scheduling.
+FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
+ CodeGenOpt::Level OptLevel) {
+ return new R600DAGToDAGISel(TM, OptLevel);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 002ef1801448..0215eb9f9fea 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -13,11 +13,12 @@
#include "R600ISelLowering.h"
#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
#include "R600MachineFunctionInfo.h"
#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -335,7 +336,9 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
//TODO: Ugh this is rather ugly
- MIB->getOperand(Idx) = MI.getOperand(1);
+ const MachineOperand &MO = MI.getOperand(1);
+ MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
+ MO.getTargetFlags());
break;
}
@@ -827,7 +830,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
bool R600TargetLowering::isZero(SDValue Op) const {
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
- return Cst->isNullValue();
+ return Cst->isZero();
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
return CstFP->isZero();
} else {
@@ -923,7 +926,7 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
std::swap(LHS, RHS);
CC = DAG.getCondCode(CCSwapped);
} else {
- // Try inverting the conditon and then swapping the operands
+ // Try inverting the condition and then swapping the operands
ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
@@ -1564,7 +1567,7 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
}
bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
// Local and Private addresses do not handle vectors. Limit to i32
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
return (MemVT.getSizeInBits() <= 32);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 920cf3cd97ef..f9a9a6127322 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_AMDGPU_R600ISELLOWERING_H
#include "AMDGPUISelLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -47,7 +48,7 @@ public:
EVT VT) const override;
bool canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const override;
+ const MachineFunction &MF) const override;
bool allowsMisalignedMemoryAccesses(
EVT VT, unsigned AS, Align Alignment,
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 7a623f3e304e..a7ebf72315cb 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -13,7 +13,8 @@
#include "R600InstrInfo.h"
#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
#include "llvm/ADT/SmallSet.h"
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 1e249c6348f1..fc567f1a1fca 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -175,7 +175,7 @@ public:
int *BytesAdded = nullptr) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
- int *BytesRemvoed = nullptr) const override;
+ int *BytesRemoved = nullptr) const override;
bool isPredicated(const MachineInstr &MI) const override;
@@ -211,7 +211,7 @@ public:
bool expandPostRAPseudo(MachineInstr &MI) const override;
- /// Reserve the registers that may be accesed using indirect addressing.
+ /// Reserve the registers that may be accessed using indirect addressing.
void reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF,
const R600RegisterInfo &TRI) const;
@@ -220,7 +220,7 @@ public:
/// \p Channel
///
/// We model indirect addressing using a virtual address space that can be
- /// accesed with loads and stores. The "Indirect Address" is the memory
+ /// accessed with loads and stores. The "Indirect Address" is the memory
/// address in this virtual address space that maps to the given \p RegIndex
/// and \p Channel.
unsigned calculateIndirectAddress(unsigned RegIndex, unsigned Channel) const;
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.td b/llvm/lib/Target/AMDGPU/R600InstrInfo.td
new file mode 100644
index 000000000000..92320748c497
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.td
@@ -0,0 +1,23 @@
+//===-- R600InstrInfo.td - R600 DAG nodes ------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains DAG node definitions for the R600 target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// R600 DAG Nodes
+//
+
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
+def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp,
+ [SDNPHasChain, SDNPSideEffect]>;
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index 055e2de59ea1..4487864888b6 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -74,8 +74,6 @@ def FRAMEri : Operand<iPTR> {
let MIOperandInfo = (ops R600_Reg32:$ptr, i32imm:$index);
}
-def ADDRParam : ComplexPattern<i32, 2, "SelectADDRParam", [], []>;
-def ADDRDWord : ComplexPattern<i32, 1, "SelectADDRDWord", [], []>;
def ADDRVTX_READ : ComplexPattern<i32, 2, "SelectADDRVTX_READ", [], []>;
def ADDRGA_CONST_OFFSET : ComplexPattern<i32, 1, "SelectGlobalValueConstantOffset", [], []>;
def ADDRGA_VAR_OFFSET : ComplexPattern<i32, 2, "SelectGlobalValueVariableOffset", [], []>;
@@ -212,16 +210,6 @@ class R600_3OP <bits<5> inst, string opName, list<dag> pattern,
let Inst{63-32} = Word1;
}
-class R600_REDUCTION <bits<11> inst, dag ins, string asm, list<dag> pattern,
- InstrItinClass itin = VecALU> :
- InstR600 <(outs R600_Reg32:$dst),
- ins,
- asm,
- pattern,
- itin>;
-
-
-
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
@@ -815,7 +803,7 @@ def DUMMY_CHAIN : R600WrapperInst <
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
-class MOV_IMM <ValueType vt, Operand immType> : R600WrapperInst <
+class MOV_IMM <Operand immType> : R600WrapperInst <
(outs R600_Reg32:$dst),
(ins immType:$imm),
"",
@@ -826,20 +814,20 @@ class MOV_IMM <ValueType vt, Operand immType> : R600WrapperInst <
} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
-def MOV_IMM_I32 : MOV_IMM<i32, i32imm>;
+def MOV_IMM_I32 : MOV_IMM<i32imm>;
def : R600Pat <
(imm:$val),
(MOV_IMM_I32 imm:$val)
>;
-def MOV_IMM_GLOBAL_ADDR : MOV_IMM<iPTR, i32imm>;
+def MOV_IMM_GLOBAL_ADDR : MOV_IMM<i32imm>;
def : R600Pat <
(AMDGPUconstdata_ptr tglobaladdr:$addr),
(MOV_IMM_GLOBAL_ADDR tglobaladdr:$addr)
>;
-def MOV_IMM_F32 : MOV_IMM<f32, f32imm>;
+def MOV_IMM_F32 : MOV_IMM<f32imm>;
def : R600Pat <
(fpimm:$val),
(MOV_IMM_F32 fpimm:$val)
@@ -1358,7 +1346,7 @@ let Predicates = [isR600] in {
//===----------------------------------------------------------------------===//
-// Regist loads and stores - for indirect addressing
+// Register loads and stores - for indirect addressing
//===----------------------------------------------------------------------===//
let Namespace = "R600" in {
diff --git a/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
new file mode 100644
index 000000000000..8f7807a2b472
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600MCInstLower.cpp
@@ -0,0 +1,73 @@
+//===- R600MCInstLower.cpp - Lower R600 MachineInstr to an MCInst ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Code to lower R600 MachineInstrs to their corresponding MCInst.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPUMCInstLower.h"
+#include "R600AsmPrinter.h"
+#include "R600Subtarget.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+
+class R600MCInstLower : public AMDGPUMCInstLower {
+public:
+ R600MCInstLower(MCContext &ctx, const R600Subtarget &ST,
+ const AsmPrinter &AP);
+
+ /// Lower a MachineInstr to an MCInst
+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+
+R600MCInstLower::R600MCInstLower(MCContext &Ctx, const R600Subtarget &ST,
+ const AsmPrinter &AP)
+ : AMDGPUMCInstLower(Ctx, ST, AP) {}
+
+void R600MCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+ for (const MachineOperand &MO : MI->explicit_operands()) {
+ MCOperand MCOp;
+ lowerOperand(MO, MCOp);
+ OutMI.addOperand(MCOp);
+ }
+}
+
+void R600AsmPrinter::emitInstruction(const MachineInstr *MI) {
+ const R600Subtarget &STI = MF->getSubtarget<R600Subtarget>();
+ R600MCInstLower MCInstLowering(OutContext, STI, *this);
+
+ StringRef Err;
+ if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
+ LLVMContext &C = MI->getParent()->getParent()->getFunction().getContext();
+ C.emitError("Illegal instruction detected: " + Err);
+ MI->print(errs());
+ }
+
+ if (MI->isBundle()) {
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
+ while (I != MBB->instr_end() && I->isInsideBundle()) {
+ emitInstruction(&*I);
+ ++I;
+ }
+ } else {
+ MCInst TmpInst;
+ MCInstLowering.lower(MI, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
+ }
+}
+
+const MCExpr *R600AsmPrinter::lowerConstant(const Constant *CV) {
+ if (const MCExpr *E = lowerAddrSpaceCast(TM, CV, OutContext))
+ return E;
+ return AsmPrinter::lowerConstant(CV);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
index f85a68706287..36acfafa72aa 100644
--- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "R600MachineScheduler.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Subtarget.h"
using namespace llvm;
@@ -29,7 +29,7 @@ void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
MRI = &DAG->MRI;
CurInstKind = IDOther;
CurEmitted = 0;
- OccupedSlotsMask = 31;
+ OccupiedSlotsMask = 31;
InstKindLimit[IDAlu] = TII->getMaxAlusPerClause();
InstKindLimit[IDOther] = 32;
InstKindLimit[IDFetch] = ST.getTexVTXClauseSize();
@@ -138,7 +138,7 @@ void R600SchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (NextInstKind != CurInstKind) {
LLVM_DEBUG(dbgs() << "Instruction Type Switch\n");
if (NextInstKind != IDAlu)
- OccupedSlotsMask |= 31;
+ OccupiedSlotsMask |= 31;
CurEmitted = 0;
CurInstKind = NextInstKind;
}
@@ -339,10 +339,10 @@ void R600SchedStrategy::LoadAlu() {
void R600SchedStrategy::PrepareNextSlot() {
LLVM_DEBUG(dbgs() << "New Slot\n");
- assert (OccupedSlotsMask && "Slot wasn't filled");
- OccupedSlotsMask = 0;
-// if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
-// OccupedSlotsMask |= 16;
+ assert(OccupiedSlotsMask && "Slot wasn't filled");
+ OccupiedSlotsMask = 0;
+ // if (HwGen == AMDGPUSubtarget::NORTHERN_ISLANDS)
+ // OccupiedSlotsMask |= 16;
InstructionsGroupCandidate.clear();
LoadAlu();
}
@@ -400,41 +400,41 @@ unsigned R600SchedStrategy::AvailablesAluCount() const {
SUnit* R600SchedStrategy::pickAlu() {
while (AvailablesAluCount() || !Pending[IDAlu].empty()) {
- if (!OccupedSlotsMask) {
+ if (!OccupiedSlotsMask) {
// Bottom up scheduling : predX must comes first
if (!AvailableAlus[AluPredX].empty()) {
- OccupedSlotsMask |= 31;
+ OccupiedSlotsMask |= 31;
return PopInst(AvailableAlus[AluPredX], false);
}
// Flush physical reg copies (RA will discard them)
if (!AvailableAlus[AluDiscarded].empty()) {
- OccupedSlotsMask |= 31;
+ OccupiedSlotsMask |= 31;
return PopInst(AvailableAlus[AluDiscarded], false);
}
// If there is a T_XYZW alu available, use it
if (!AvailableAlus[AluT_XYZW].empty()) {
- OccupedSlotsMask |= 15;
+ OccupiedSlotsMask |= 15;
return PopInst(AvailableAlus[AluT_XYZW], false);
}
}
- bool TransSlotOccuped = OccupedSlotsMask & 16;
- if (!TransSlotOccuped && VLIW5) {
+ bool TransSlotOccupied = OccupiedSlotsMask & 16;
+ if (!TransSlotOccupied && VLIW5) {
if (!AvailableAlus[AluTrans].empty()) {
- OccupedSlotsMask |= 16;
+ OccupiedSlotsMask |= 16;
return PopInst(AvailableAlus[AluTrans], false);
}
SUnit *SU = AttemptFillSlot(3, true);
if (SU) {
- OccupedSlotsMask |= 16;
+ OccupiedSlotsMask |= 16;
return SU;
}
}
for (int Chan = 3; Chan > -1; --Chan) {
- bool isOccupied = OccupedSlotsMask & (1 << Chan);
+ bool isOccupied = OccupiedSlotsMask & (1 << Chan);
if (!isOccupied) {
SUnit *SU = AttemptFillSlot(Chan, false);
if (SU) {
- OccupedSlotsMask |= (1 << Chan);
+ OccupiedSlotsMask |= (1 << Chan);
InstructionsGroupCandidate.push_back(SU->getInstr());
return SU;
}
diff --git a/llvm/lib/Target/AMDGPU/R600MachineScheduler.h b/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
index abcc37f8400d..f3fd71d470ba 100644
--- a/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
+++ b/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -63,7 +63,7 @@ class R600SchedStrategy final : public MachineSchedStrategy {
int InstKindLimit[IDLast];
- int OccupedSlotsMask;
+ int OccupiedSlotsMask;
public:
R600SchedStrategy() = default;
diff --git a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
index 8f1a069c232d..ac6a3581e255 100644
--- a/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OpenCLImageTypeLoweringPass.cpp
@@ -24,7 +24,7 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
+#include "R600.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
@@ -86,7 +86,7 @@ GetFunctionFromMDNode(MDNode *Node) {
if (!F)
return nullptr;
- // Sanity checks.
+ // Validation checks.
size_t ExpectNumArgNodeOps = F->arg_size() + 1;
for (size_t i = 0; i < NumKernelArgMDNodes; ++i) {
MDNode *ArgNode = dyn_cast_or_null<MDNode>(Node->getOperand(i + 1));
diff --git a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 8f19a3e478e8..1a723279dc9f 100644
--- a/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -26,8 +26,8 @@
/// to reduce MOV count.
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index eaac938b098a..e858bba2983c 100644
--- a/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -13,8 +13,8 @@
//
//===----------------------------------------------------------------------===//
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+#include "R600.h"
#include "R600Subtarget.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/MachineDominators.h"
diff --git a/llvm/lib/Target/AMDGPU/R600Processors.td b/llvm/lib/Target/AMDGPU/R600Processors.td
index fff884e4848e..8cf8edd1254f 100644
--- a/llvm/lib/Target/AMDGPU/R600Processors.td
+++ b/llvm/lib/Target/AMDGPU/R600Processors.td
@@ -45,11 +45,11 @@ class R600SubtargetFeatureGeneration <string Value, string FeatureName,
SubtargetFeatureGeneration <Value, FeatureName, "R600Subtarget", Implies>;
def FeatureR600 : R600SubtargetFeatureGeneration<"R600", "r600",
- [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
+ [FeatureR600ALUInst, FeatureFetchLimit8]
>;
def FeatureR700 : R600SubtargetFeatureGeneration<"R700", "r700",
- [FeatureFetchLimit16, FeatureLocalMemorySize0]
+ [FeatureFetchLimit16]
>;
def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN", "evergreen",
diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index e4f7d89bf4c9..99a1a8e9871a 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "R600RegisterInfo.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600Defines.h"
#include "R600Subtarget.h"
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.cpp b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
new file mode 100644
index 000000000000..20c1ce7266dd
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.cpp
@@ -0,0 +1,46 @@
+//===-- R600Subtarget.cpp - R600 Subtarget Information --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Implements the R600 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600Subtarget.h"
+#include "MCTargetDesc/R600MCTargetDesc.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "r600-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "R600GenSubtargetInfo.inc"
+
+R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
+ const TargetMachine &TM)
+ : R600GenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT),
+ InstrInfo(*this),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
+ FMA(false), CaymanISA(false), CFALUBug(false), HasVertexCache(false),
+ R600ALUInst(false), FP64(false), TexVTXClauseSize(0), Gen(R600),
+ TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
+ InstrItins(getInstrItineraryForCPU(GPU)) {}
+
+R600Subtarget &R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU,
+ StringRef FS) {
+ SmallString<256> FullFS("+promote-alloca,");
+ FullFS += FS;
+ ParseSubtargetFeatures(GPU, /*TuneCPU*/ GPU, FullFS);
+
+ HasMulU24 = getGeneration() >= EVERGREEN;
+ HasMulI24 = hasCaymanISA();
+
+ return *this;
+}
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index 07238da18c67..94403b88f21a 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -23,7 +23,6 @@
namespace llvm {
-class MCInst;
class MCInstrInfo;
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
new file mode 100644
index 000000000000..39dad45425fc
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.cpp
@@ -0,0 +1,143 @@
+//===-- R600TargetMachine.cpp - TargetMachine for hw codegen targets-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The AMDGPU-R600 target machine contains all of the hardware specific
+/// information needed to emit code for R600 GPUs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600TargetMachine.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600.h"
+#include "R600MachineScheduler.h"
+#include "R600TargetTransformInfo.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ EnableR600StructurizeCFG("r600-ir-structurize",
+ cl::desc("Use StructurizeCFG IR pass"),
+ cl::init(true));
+
+static cl::opt<bool> EnableR600IfConvert("r600-if-convert",
+ cl::desc("Use if conversion pass"),
+ cl::ReallyHidden, cl::init(true));
+
+static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
+ "amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"),
+ cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true),
+ cl::Hidden);
+
+static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
+ return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
+}
+
+static MachineSchedRegistry R600SchedRegistry("r600",
+ "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
+ StringRef CPU, StringRef FS,
+ TargetOptions Options,
+ Optional<Reloc::Model> RM,
+ Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT)
+ : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+ setRequiresStructuredCFG(true);
+
+ // Override the default since calls aren't supported for r600.
+ if (EnableFunctionCalls &&
+ EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
+ EnableFunctionCalls = false;
+}
+
+const TargetSubtargetInfo *
+R600TargetMachine::getSubtargetImpl(const Function &F) const {
+ StringRef GPU = getGPUName(F);
+ StringRef FS = getFeatureString(F);
+
+ SmallString<128> SubtargetKey(GPU);
+ SubtargetKey.append(FS);
+
+ auto &I = SubtargetMap[SubtargetKey];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
+ }
+
+ return I.get();
+}
+
+TargetTransformInfo
+R600TargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(R600TTIImpl(this, F));
+}
+
+class R600PassConfig final : public AMDGPUPassConfig {
+public:
+ R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
+ : AMDGPUPassConfig(TM, PM) {}
+
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
+ return createR600MachineScheduler(C);
+ }
+
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ void addPreRegAlloc() override;
+ void addPreSched2() override;
+ void addPreEmitPass() override;
+};
+
+//===----------------------------------------------------------------------===//
+// R600 Pass Setup
+//===----------------------------------------------------------------------===//
+
+bool R600PassConfig::addPreISel() {
+ AMDGPUPassConfig::addPreISel();
+
+ if (EnableR600StructurizeCFG)
+ addPass(createStructurizeCFGPass());
+ return false;
+}
+
+bool R600PassConfig::addInstSelector() {
+ addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
+ return false;
+}
+
+void R600PassConfig::addPreRegAlloc() { addPass(createR600VectorRegMerger()); }
+
+void R600PassConfig::addPreSched2() {
+ addPass(createR600EmitClauseMarkers());
+ if (EnableR600IfConvert)
+ addPass(&IfConverterID);
+ addPass(createR600ClauseMergePass());
+}
+
+void R600PassConfig::addPreEmitPass() {
+ addPass(createAMDGPUCFGStructurizerPass());
+ addPass(createR600ExpandSpecialInstrsPass());
+ addPass(&FinalizeMachineBundlesID);
+ addPass(createR600Packetizer());
+ addPass(createR600ControlFlowFinalizer());
+}
+
+TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new R600PassConfig(*this, PM);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600TargetMachine.h b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
new file mode 100644
index 000000000000..0ccbca3c68b1
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetMachine.h
@@ -0,0 +1,48 @@
+//===-- R600TargetMachine.h - AMDGPU TargetMachine Interface ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The AMDGPU TargetMachine interface definition for hw codgen targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H
+#define LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H
+
+#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// R600 Target Machine (R600 -> Cayman)
+//===----------------------------------------------------------------------===//
+
+class R600TargetMachine final : public AMDGPUTargetMachine {
+private:
+ mutable StringMap<std::unique_ptr<R600Subtarget>> SubtargetMap;
+
+public:
+ R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+ StringRef FS, TargetOptions Options,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
+ CodeGenOpt::Level OL, bool JIT);
+
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+ const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override;
+
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
+ bool isMachineVerifierClean() const override { return false; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETMACHINE_H
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
new file mode 100644
index 000000000000..365c005b2503
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -0,0 +1,142 @@
+//===- R600TargetTransformInfo.cpp - AMDGPU specific TTI pass -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements a TargetTransformInfo analysis pass specific to the
+// R600 target machine. It uses the target's detailed information to provide
+// more precise answers to certain TTI queries, while letting the target
+// independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#include "R600TargetTransformInfo.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
+#include "R600Subtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "R600tti"
+
+R600TTIImpl::R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()),
+ ST(static_cast<const R600Subtarget *>(TM->getSubtargetImpl(F))),
+ TLI(ST->getTargetLowering()), CommonTTI(TM, F) {}
+
+unsigned R600TTIImpl::getHardwareNumberOfRegisters(bool Vec) const {
+ return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
+}
+
+unsigned R600TTIImpl::getNumberOfRegisters(bool Vec) const {
+ return getHardwareNumberOfRegisters(Vec);
+}
+
+TypeSize
+R600TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
+ return TypeSize::getFixed(32);
+}
+
+unsigned R600TTIImpl::getMinVectorRegisterBitWidth() const { return 32; }
+
+unsigned R600TTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
+ if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::CONSTANT_ADDRESS)
+ return 128;
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
+ AddrSpace == AMDGPUAS::REGION_ADDRESS)
+ return 64;
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS)
+ return 32;
+
+ if ((AddrSpace == AMDGPUAS::PARAM_D_ADDRESS ||
+ AddrSpace == AMDGPUAS::PARAM_I_ADDRESS ||
+ (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 &&
+ AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15)))
+ return 128;
+ llvm_unreachable("unhandled address space");
+}
+
+bool R600TTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ // We allow vectorization of flat stores, even though we may need to decompose
+ // them later if they may access private memory. We don't have enough context
+ // here, and legalization can handle it.
+ return (AddrSpace != AMDGPUAS::PRIVATE_ADDRESS);
+}
+
+bool R600TTIImpl::isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+ Align Alignment,
+ unsigned AddrSpace) const {
+ return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
+}
+
+unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+ // Disable unrolling if the loop is not vectorized.
+ // TODO: Enable this again.
+ if (VF == 1)
+ return 1;
+
+ return 8;
+}
+
+InstructionCost R600TTIImpl::getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) {
+ if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency)
+ return Opcode == Instruction::PHI ? 0 : 1;
+
+ // XXX - For some reason this isn't called for switch.
+ switch (Opcode) {
+ case Instruction::Br:
+ case Instruction::Ret:
+ return 10;
+ default:
+ return BaseT::getCFInstrCost(Opcode, CostKind, I);
+ }
+}
+
+InstructionCost R600TTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index) {
+ switch (Opcode) {
+ case Instruction::ExtractElement:
+ case Instruction::InsertElement: {
+ unsigned EltSize =
+ DL.getTypeSizeInBits(cast<VectorType>(ValTy)->getElementType());
+ if (EltSize < 32) {
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+
+ // Extracts are just reads of a subregister, so are free. Inserts are
+ // considered free because we don't want to have any cost for scalarizing
+ // operations, and we don't have to copy into a different register class.
+
+ // Dynamic indexing isn't free and is best avoided.
+ return Index == ~0u ? 2 : 0;
+ }
+ default:
+ return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+ }
+}
+
+void R600TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ CommonTTI.getUnrollingPreferences(L, SE, UP, ORE);
+}
+
+void R600TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP) {
+ CommonTTI.getPeelingPreferences(L, SE, PP);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
new file mode 100644
index 000000000000..544292bc4fd9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
@@ -0,0 +1,69 @@
+//===- R600TargetTransformInfo.h - R600 specific TTI --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// R600 target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H
+
+#include "AMDGPUTargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class R600Subtarget;
+class AMDGPUTargetLowering;
+
+class R600TTIImpl final : public BasicTTIImplBase<R600TTIImpl> {
+ using BaseT = BasicTTIImplBase<R600TTIImpl>;
+ using TTI = TargetTransformInfo;
+
+ friend BaseT;
+
+ const R600Subtarget *ST;
+ const AMDGPUTargetLowering *TLI;
+ AMDGPUTTIImpl CommonTTI;
+
+public:
+ explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
+
+ const R600Subtarget *getST() const { return ST; }
+ const AMDGPUTargetLowering *getTLI() const { return TLI; }
+
+ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ TTI::PeelingPreferences &PP);
+ unsigned getHardwareNumberOfRegisters(bool Vec) const;
+ unsigned getNumberOfRegisters(bool Vec) const;
+ TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind Vector) const;
+ unsigned getMinVectorRegisterBitWidth() const;
+ unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+ bool isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, Align Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
+ unsigned AddrSpace) const;
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
+ unsigned AddrSpace) const;
+ unsigned getMaxInterleaveFactor(unsigned VF);
+ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr);
+ InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy,
+ unsigned Index);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_R600TARGETTRANSFORMINFO_H
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index d3c0d792804d..777744f08cde 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -7,13 +7,20 @@
/// \file
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCInstrDesc.h"
-
#ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
#define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
+#include "llvm/MC/MCInstrDesc.h"
+
namespace llvm {
+// This needs to be kept in sync with the field bits in SIRegisterClass.
+enum SIRCFlags : uint8_t {
+ // For vector registers.
+ HasVGPR = 1 << 0,
+ HasAGPR = 1 << 1
+}; // enum SIRCFlags
+
namespace SIInstrFlags {
// This needs to be kept in sync with the field bits in InstSI.
enum : uint64_t {
@@ -132,64 +139,67 @@ enum ClassFlags : unsigned {
}
namespace AMDGPU {
- enum OperandType : unsigned {
- /// Operands with register or 32-bit immediate
- OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_REG_IMM_INT64,
- OPERAND_REG_IMM_INT16,
- OPERAND_REG_IMM_FP32,
- OPERAND_REG_IMM_FP64,
- OPERAND_REG_IMM_FP16,
- OPERAND_REG_IMM_V2FP16,
- OPERAND_REG_IMM_V2INT16,
- OPERAND_REG_IMM_V2INT32,
- OPERAND_REG_IMM_V2FP32,
-
- /// Operands with register or inline constant
- OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_INT32,
- OPERAND_REG_INLINE_C_INT64,
- OPERAND_REG_INLINE_C_FP16,
- OPERAND_REG_INLINE_C_FP32,
- OPERAND_REG_INLINE_C_FP64,
- OPERAND_REG_INLINE_C_V2INT16,
- OPERAND_REG_INLINE_C_V2FP16,
- OPERAND_REG_INLINE_C_V2INT32,
- OPERAND_REG_INLINE_C_V2FP32,
-
- /// Operands with an AccVGPR register or inline constant
- OPERAND_REG_INLINE_AC_INT16,
- OPERAND_REG_INLINE_AC_INT32,
- OPERAND_REG_INLINE_AC_FP16,
- OPERAND_REG_INLINE_AC_FP32,
- OPERAND_REG_INLINE_AC_FP64,
- OPERAND_REG_INLINE_AC_V2INT16,
- OPERAND_REG_INLINE_AC_V2FP16,
- OPERAND_REG_INLINE_AC_V2INT32,
- OPERAND_REG_INLINE_AC_V2FP32,
-
- OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
- OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
-
- OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
- OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
- OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
- OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
-
- OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
- OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
-
- // Operand for source modifiers for VOP instructions
- OPERAND_INPUT_MODS,
-
- // Operand for SDWA instructions
- OPERAND_SDWA_VOPC_DST,
-
- /// Operand with 32-bit immediate that uses the constant bus.
- OPERAND_KIMM32,
- OPERAND_KIMM16
- };
+enum OperandType : unsigned {
+ /// Operands with register or 32-bit immediate
+ OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_REG_IMM_INT64,
+ OPERAND_REG_IMM_INT16,
+ OPERAND_REG_IMM_FP32,
+ OPERAND_REG_IMM_FP64,
+ OPERAND_REG_IMM_FP16,
+ OPERAND_REG_IMM_FP16_DEFERRED,
+ OPERAND_REG_IMM_FP32_DEFERRED,
+ OPERAND_REG_IMM_V2FP16,
+ OPERAND_REG_IMM_V2INT16,
+ OPERAND_REG_IMM_V2INT32,
+ OPERAND_REG_IMM_V2FP32,
+
+ /// Operands with register or inline constant
+ OPERAND_REG_INLINE_C_INT16,
+ OPERAND_REG_INLINE_C_INT32,
+ OPERAND_REG_INLINE_C_INT64,
+ OPERAND_REG_INLINE_C_FP16,
+ OPERAND_REG_INLINE_C_FP32,
+ OPERAND_REG_INLINE_C_FP64,
+ OPERAND_REG_INLINE_C_V2INT16,
+ OPERAND_REG_INLINE_C_V2FP16,
+ OPERAND_REG_INLINE_C_V2INT32,
+ OPERAND_REG_INLINE_C_V2FP32,
+
+ /// Operand with 32-bit immediate that uses the constant bus.
+ OPERAND_KIMM32,
+ OPERAND_KIMM16,
+
+ /// Operands with an AccVGPR register or inline constant
+ OPERAND_REG_INLINE_AC_INT16,
+ OPERAND_REG_INLINE_AC_INT32,
+ OPERAND_REG_INLINE_AC_FP16,
+ OPERAND_REG_INLINE_AC_FP32,
+ OPERAND_REG_INLINE_AC_FP64,
+ OPERAND_REG_INLINE_AC_V2INT16,
+ OPERAND_REG_INLINE_AC_V2FP16,
+ OPERAND_REG_INLINE_AC_V2INT32,
+ OPERAND_REG_INLINE_AC_V2FP32,
+
+ OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+ OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
+
+ OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+ OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+ OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
+ OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
+
+ OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+ OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
+
+ // Operand for source modifiers for VOP instructions
+ OPERAND_INPUT_MODS,
+
+ // Operand for SDWA instructions
+ OPERAND_SDWA_VOPC_DST
+
+};
}
// Input operand modifiers bit-masks
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index d5c56bf2a321..cf93a63f26a0 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -585,10 +585,43 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
case AMDGPU::SOFT_WQM:
case AMDGPU::STRICT_WWM: {
Register DstReg = MI.getOperand(0).getReg();
-
const TargetRegisterClass *SrcRC, *DstRC;
std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, *MRI);
+ if (MI.isCopy()) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (SrcReg == AMDGPU::SCC) {
+ Register SCCCopy = MRI->createVirtualRegister(
+ TRI->getRegClass(AMDGPU::SReg_1_XEXECRegClassID));
+ I = BuildMI(*MI.getParent(),
+ std::next(MachineBasicBlock::iterator(MI)),
+ MI.getDebugLoc(),
+ TII->get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64),
+ SCCCopy)
+ .addImm(-1)
+ .addImm(0);
+ I = BuildMI(*MI.getParent(), std::next(I), I->getDebugLoc(),
+ TII->get(AMDGPU::COPY), DstReg)
+ .addReg(SCCCopy);
+ MI.eraseFromParent();
+ continue;
+ } else if (DstReg == AMDGPU::SCC) {
+ unsigned Opcode =
+ ST.isWave64() ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
+ Register Exec = ST.isWave64() ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+ Register Tmp = MRI->createVirtualRegister(TRI->getBoolRC());
+ I = BuildMI(*MI.getParent(),
+ std::next(MachineBasicBlock::iterator(MI)),
+ MI.getDebugLoc(), TII->get(Opcode))
+ .addReg(Tmp, getDefRegState(true))
+ .addReg(SrcReg)
+ .addReg(Exec);
+ MI.eraseFromParent();
+ continue;
+ }
+ }
+
if (!DstReg.isVirtual()) {
// If the destination register is a physical register there isn't
// really much we can do to fix this.
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index ad910522ba90..a3a0e9c9b9ac 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -228,7 +228,7 @@ static bool updateOperand(FoldCandidate &Fold,
MachineOperand &Mod = MI->getOperand(ModIdx);
unsigned Val = Mod.getImm();
if (!(Val & SISrcMods::OP_SEL_0) && (Val & SISrcMods::OP_SEL_1)) {
- // Only apply the following transformation if that operand requries
+ // Only apply the following transformation if that operand requires
// a packed immediate.
switch (TII.get(Opcode).OpInfo[OpNo].OperandType) {
case AMDGPU::OPERAND_REG_IMM_V2FP16:
@@ -452,7 +452,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
const SIRegisterInfo &SRI = TII->getRegisterInfo();
// Fine if the operand can be encoded as an inline constant
- if (OpToFold->isImm()) {
+ if (TII->isLiteralConstantLike(*OpToFold, OpInfo)) {
if (!SRI.opCanUseInlineConstant(OpInfo.OperandType) ||
!TII->isInlineConstant(*OpToFold, OpInfo)) {
// Otherwise check for another constant
@@ -646,7 +646,7 @@ void SIFoldOperands::foldOperand(
return;
if (frameIndexMayFold(TII, *UseMI, UseOpIdx, OpToFold)) {
- // Sanity check that this is a stack access.
+ // Verify that this is a stack access.
// FIXME: Should probably use stack pseudos before frame lowering.
if (TII->isMUBUF(*UseMI)) {
@@ -688,7 +688,7 @@ void SIFoldOperands::foldOperand(
// Don't fold into a copy to a physical register with the same class. Doing
// so would interfere with the register coalescer's logic which would avoid
- // redundant initalizations.
+ // redundant initializations.
if (DestReg.isPhysical() && SrcRC->contains(DestReg))
return;
@@ -902,7 +902,7 @@ void SIFoldOperands::foldOperand(
tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
// FIXME: We could try to change the instruction from 64-bit to 32-bit
- // to enable more folding opportunites. The shrink operands pass
+ // to enable more folding opportunities. The shrink operands pass
// already does this.
return;
}
@@ -1388,6 +1388,13 @@ bool SIFoldOperands::tryFoldClamp(MachineInstr &MI) {
DefClamp->setImm(1);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
MI.eraseFromParent();
+
+ // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
+ // instruction, so we might as well convert it to the more flexible VOP3-only
+ // mad/fma form.
+ if (TII->convertToThreeAddress(*Def, nullptr, nullptr))
+ Def->eraseFromParent();
+
return true;
}
@@ -1526,6 +1533,13 @@ bool SIFoldOperands::tryFoldOMod(MachineInstr &MI) {
DefOMod->setImm(OMod);
MRI->replaceRegWith(MI.getOperand(0).getReg(), Def->getOperand(0).getReg());
MI.eraseFromParent();
+
+ // Use of output modifiers forces VOP3 encoding for a VOP2 mac/fmac
+ // instruction, so we might as well convert it to the more flexible VOP3-only
+ // mad/fma form.
+ if (TII->convertToThreeAddress(*Def, nullptr, nullptr))
+ Def->eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index c9883d38e08c..882b9a203755 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -125,8 +125,8 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, Register SpillReg,
- int FI) {
+ MachineBasicBlock::iterator I, const DebugLoc &DL,
+ Register SpillReg, int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
@@ -136,7 +136,7 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
LiveRegs.addReg(SpillReg);
- TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, true,
+ TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, true,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
LiveRegs.removeReg(SpillReg);
@@ -147,8 +147,8 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, Register SpillReg,
- int FI) {
+ MachineBasicBlock::iterator I,
+ const DebugLoc &DL, Register SpillReg, int FI) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
@@ -157,7 +157,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
- TRI.buildSpillLoadStore(MBB, I, Opc, FI, SpillReg, false,
+ TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false,
FuncInfo.getStackPtrOffsetReg(), 0, MMO, nullptr,
&LiveRegs);
}
@@ -258,9 +258,10 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
// Mask the offset in [47:0] of the descriptor
const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);
- BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
+ auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)
.addReg(FlatScrInitHi)
.addImm(0xffff);
+ And->getOperand(3).setIsDead(); // Mark SCC as dead.
} else {
Register FlatScratchInitReg =
MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);
@@ -280,9 +281,12 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), FlatScrInitHi)
+ auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
+ FlatScrInitHi)
.addReg(FlatScrInitHi)
.addImm(0);
+ Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
+
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32)).
addReg(FlatScrInitLo).
addImm(int16_t(AMDGPU::Hwreg::ID_FLAT_SCR_LO |
@@ -298,9 +302,11 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
.addReg(FlatScrInitLo)
.addReg(ScratchWaveOffsetReg);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
+ auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),
+ AMDGPU::FLAT_SCR_HI)
.addReg(FlatScrInitHi)
.addImm(0);
+ Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
return;
}
@@ -318,9 +324,11 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
.addReg(ScratchWaveOffsetReg);
// Convert offset to 256-byte units.
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
+ auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),
+ AMDGPU::FLAT_SCR_HI)
.addReg(FlatScrInitLo, RegState::Kill)
.addImm(8);
+ LShr->getOperand(3).setIsDead(true); // Mark SCC as dead.
}
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
@@ -419,9 +427,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(
AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
- // FIXME: Hack to not crash in situations which emitted an error.
- if (!PreloadedScratchWaveOffsetReg)
- return;
// We need to do the replacement of the private segment buffer register even
// if there are no stack objects. There could be stores to undef or a
@@ -467,7 +472,8 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
// chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch
// wave offset to a free SGPR.
Register ScratchWaveOffsetReg;
- if (TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
+ if (PreloadedScratchWaveOffsetReg &&
+ TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {
ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);
unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
AllSGPRs = AllSGPRs.slice(
@@ -485,7 +491,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
} else {
ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;
}
- assert(ScratchWaveOffsetReg);
+ assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
if (requiresStackPointerReference(MF)) {
Register SPReg = MFI->getStackPtrOffsetReg();
@@ -506,7 +512,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
- !ST.flatScratchIsArchitected()) {
+ PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
}
@@ -660,10 +666,11 @@ void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(
.addReg(ScratchRsrcSub0)
.addReg(ScratchWaveOffsetReg)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
+ auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)
.addReg(ScratchRsrcSub1)
.addImm(0)
.addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ Addc->getOperand(3).setIsDead(); // Mark SCC as dead.
}
bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
@@ -720,7 +727,9 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
const unsigned OrSaveExec =
ST.isWave32() ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
- BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy).addImm(-1);
+ auto SaveExec = BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), ScratchExecCopy)
+ .addImm(-1);
+ SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
return ScratchExecCopy;
}
@@ -776,7 +785,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI,
/*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, Reg.VGPR,
*Reg.FI);
}
@@ -791,7 +800,8 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ true);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
+ *FI);
}
if (ScratchExecCopy) {
@@ -817,7 +827,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(FramePtrReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
FramePtrFI);
}
@@ -835,7 +845,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(BasePtrReg);
- buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
+ buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, TmpVGPR,
BasePtrFI);
}
@@ -927,10 +937,11 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
.addReg(StackPtrReg)
.addImm((Alignment - 1) * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
+ auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
.addReg(FramePtrReg, RegState::Kill)
.addImm(-Alignment * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
+ And->getOperand(3).setIsDead(); // Mark SCC as dead.
FuncInfo->setIsStackRealigned(true);
} else if ((HasFP = hasFP(MF))) {
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)
@@ -949,18 +960,22 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
}
if (HasFP && RoundedSize != 0) {
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
+ auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(RoundedSize * getScratchScaleFactor(ST))
.setMIFlag(MachineInstr::FrameSetup);
+ Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
assert((!HasFP || (FuncInfo->SGPRForFPSaveRestoreCopy ||
FuncInfo->FramePointerSaveIndex)) &&
"Needed to save FP but didn't save it anywhere");
+ // If we allow spilling to AGPRs we may have saved FP but then spill
+ // everything into AGPRs instead of the stack.
assert((HasFP || (!FuncInfo->SGPRForFPSaveRestoreCopy &&
- !FuncInfo->FramePointerSaveIndex)) &&
+ !FuncInfo->FramePointerSaveIndex) ||
+ EnableSpillVGPRToAGPR) &&
"Saved FP but didn't need it");
assert((!HasBP || (FuncInfo->SGPRForBPSaveRestoreCopy ||
@@ -1000,10 +1015,11 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
Optional<int> BPSaveIndex = FuncInfo->BasePointerSaveIndex;
if (RoundedSize != 0 && hasFP(MF)) {
- BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
+ auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
.addReg(StackPtrReg)
.addImm(-static_cast<int64_t>(RoundedSize * getScratchScaleFactor(ST)))
.setMIFlag(MachineInstr::FrameDestroy);
+ Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
if (FuncInfo->SGPRForFPSaveRestoreCopy) {
@@ -1028,8 +1044,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
- FramePtrFI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ TmpVGPR, FramePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), FramePtrReg)
.addReg(TmpVGPR, RegState::Kill);
} else {
@@ -1054,8 +1070,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, TmpVGPR,
- BasePtrFI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ TmpVGPR, BasePtrFI);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), BasePtrReg)
.addReg(TmpVGPR, RegState::Kill);
} else {
@@ -1080,8 +1096,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, Reg.VGPR,
- *Reg.FI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
+ Reg.VGPR, *Reg.FI);
}
for (const auto &Reg : FuncInfo->WWMReservedRegs) {
@@ -1094,7 +1110,8 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, /*IsProlog*/ false);
- buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, VGPR, *FI);
+ buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL, VGPR,
+ *FI);
}
if (ScratchExecCopy) {
@@ -1154,11 +1171,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
bool SeenDbgInstr = false;
for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator Next;
- for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
- MachineInstr &MI = *I;
- Next = std::next(I);
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (MI.isDebugInstr())
SeenDbgInstr = true;
@@ -1199,7 +1212,6 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
SpillFIs[MI.getOperand(0).getIndex()]) {
MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
- MI.getOperand(0).setIsDebug();
}
}
}
@@ -1301,10 +1313,13 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
// If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
- // We have to anticipate introducing CSR VGPR spills if we don't have any
- // stack objects already, since we require an FP if there is a call and stack.
+ // We have to anticipate introducing CSR VGPR spills or spill of caller
+ // save VGPR reserved for SGPR spills as we now always create stack entry
+ // for it, if we don't have any stack objects already, since we require
+ // an FP if there is a call and stack.
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- const bool WillHaveFP = FrameInfo.hasCalls() && HaveAnyCSRVGPR;
+ const bool WillHaveFP =
+ FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
// FP will be specially managed like SP.
if (WillHaveFP || hasFP(MF))
@@ -1373,9 +1388,10 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
Amount *= getScratchScaleFactor(ST);
if (IsDestroy)
Amount = -Amount;
- BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
+ auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
.addReg(SPReg)
.addImm(Amount);
+ Add->getOperand(3).setIsDead(); // Mark SCC as dead.
} else if (CalleePopAmount != 0) {
llvm_unreachable("is this used?");
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d98acfc6c532..519c5b936536 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -19,10 +19,12 @@
#include "SIRegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -465,11 +467,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
if (!Subtarget->hasBCNT(64))
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
- if (Subtarget->hasFFBH())
+ if (Subtarget->hasFFBH()) {
+ setOperationAction(ISD::CTLZ, MVT::i32, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+ }
- if (Subtarget->hasFFBL())
+ if (Subtarget->hasFFBL()) {
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+ }
// We only really have 32-bit BFE instructions (and 16-bit on VI).
//
@@ -1061,7 +1067,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
AttributeList Attr = Intrinsic::getAttributes(CI.getContext(),
(Intrinsic::ID)IntrID);
- if (Attr.hasFnAttribute(Attribute::ReadNone))
+ if (Attr.hasFnAttr(Attribute::ReadNone))
return false;
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -1076,7 +1082,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
Info.flags = MachineMemOperand::MODereferenceable;
- if (Attr.hasFnAttribute(Attribute::ReadOnly)) {
+ if (Attr.hasFnAttr(Attribute::ReadOnly)) {
unsigned DMaskLanes = 4;
if (RsrcIntr->IsImage) {
@@ -1100,7 +1106,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// FIXME: What does alignment mean for an image?
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.flags |= MachineMemOperand::MOLoad;
- } else if (Attr.hasFnAttribute(Attribute::WriteOnly)) {
+ } else if (Attr.hasFnAttr(Attribute::WriteOnly)) {
Info.opc = ISD::INTRINSIC_VOID;
Type *DataTy = CI.getArgOperand(0)->getType();
@@ -1423,7 +1429,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
}
bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
if (AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS) {
return (MemVT.getSizeInBits() <= 4 * 32);
} else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
@@ -1657,12 +1663,17 @@ SDValue SITargetLowering::lowerKernArgParameterPtr(SelectionDAG &DAG,
const ArgDescriptor *InputPtrReg;
const TargetRegisterClass *RC;
LLT ArgTy;
+ MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
std::tie(InputPtrReg, RC, ArgTy) =
Info->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ // We may not have the kernarg segment argument if we have no kernel
+ // arguments.
+ if (!InputPtrReg)
+ return DAG.getConstant(0, SL, PtrVT);
+
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
- MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
SDValue BasePtr = DAG.getCopyFromReg(Chain, SL,
MRI.getLiveInVirtReg(InputPtrReg->getRegister()), PtrVT);
@@ -1808,6 +1819,19 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
LLT Ty;
std::tie(Reg, RC, Ty) = MFI.getPreloadedValue(PVID);
+ if (!Reg) {
+ if (PVID == AMDGPUFunctionArgInfo::PreloadedValue::KERNARG_SEGMENT_PTR) {
+ // It's possible for a kernarg intrinsic call to appear in a kernel with
+ // no allocated segment, in which case we do not add the user sgpr
+ // argument, so just return null.
+ return DAG.getConstant(0, SDLoc(), VT);
+ }
+
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ return DAG.getUNDEF(VT);
+ }
+
return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
}
@@ -2023,31 +2047,33 @@ void SITargetLowering::allocateSpecialInputSGPRs(
SIMachineFunctionInfo &Info) const {
auto &ArgInfo = Info.getArgInfo();
- // TODO: Unify handling with private memory pointers.
+ // We need to allocate these in place regardless of their use.
+ const bool IsFixed = AMDGPUTargetMachine::EnableFixedFunctionABI;
- if (Info.hasDispatchPtr())
+ // TODO: Unify handling with private memory pointers.
+ if (IsFixed || Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
- if (Info.hasQueuePtr())
+ if (IsFixed || Info.hasQueuePtr())
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
// constant offset from the kernarg segment.
- if (Info.hasImplicitArgPtr())
+ if (IsFixed || Info.hasImplicitArgPtr())
allocateSGPR64Input(CCInfo, ArgInfo.ImplicitArgPtr);
- if (Info.hasDispatchID())
+ if (IsFixed || Info.hasDispatchID())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchID);
// flat_scratch_init is not applicable for non-kernel functions.
- if (Info.hasWorkGroupIDX())
+ if (IsFixed || Info.hasWorkGroupIDX())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDX);
- if (Info.hasWorkGroupIDY())
+ if (IsFixed || Info.hasWorkGroupIDY())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDY);
- if (Info.hasWorkGroupIDZ())
+ if (IsFixed || Info.hasWorkGroupIDZ())
allocateSGPR32Input(CCInfo, ArgInfo.WorkGroupIDZ);
}
@@ -2590,9 +2616,12 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue ReturnAddrReg = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);
- SDValue ReturnAddrVirtualReg = DAG.getRegister(
- MF.getRegInfo().createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass),
- MVT::i64);
+ SDValue ReturnAddrVirtualReg =
+ DAG.getRegister(MF.getRegInfo().createVirtualRegister(
+ CallConv != CallingConv::AMDGPU_Gfx
+ ? &AMDGPU::CCR_SGPR_64RegClass
+ : &AMDGPU::Gfx_CCR_SGPR_64RegClass),
+ MVT::i64);
Chain =
DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
Flag = Chain.getValue(1);
@@ -2655,8 +2684,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(Flag);
unsigned Opc = AMDGPUISD::ENDPGM;
- if (!IsWaveEnd)
- Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
+ if (!IsWaveEnd) {
+ if (IsShader)
+ Opc = AMDGPUISD::RETURN_TO_EPILOG;
+ else if (CallConv == CallingConv::AMDGPU_Gfx)
+ Opc = AMDGPUISD::RET_GFX_FLAG;
+ else
+ Opc = AMDGPUISD::RET_FLAG;
+ }
+
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
@@ -2747,21 +2783,28 @@ void SITargetLowering::passSpecialInputs(
// TODO: Unify with private memory register handling. This is complicated by
// the fact that at least in kernels, the input argument is not necessarily
// in the same location as the input.
- AMDGPUFunctionArgInfo::PreloadedValue InputRegs[] = {
- AMDGPUFunctionArgInfo::DISPATCH_PTR,
- AMDGPUFunctionArgInfo::QUEUE_PTR,
- AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR,
- AMDGPUFunctionArgInfo::DISPATCH_ID,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_X,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,
- AMDGPUFunctionArgInfo::WORKGROUP_ID_Z
+ static constexpr std::pair<AMDGPUFunctionArgInfo::PreloadedValue,
+ StringLiteral> ImplicitAttrs[] = {
+ {AMDGPUFunctionArgInfo::DISPATCH_PTR, "amdgpu-no-dispatch-ptr"},
+ {AMDGPUFunctionArgInfo::QUEUE_PTR, "amdgpu-no-queue-ptr" },
+ {AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR, "amdgpu-no-implicitarg-ptr"},
+ {AMDGPUFunctionArgInfo::DISPATCH_ID, "amdgpu-no-dispatch-id"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_X, "amdgpu-no-workgroup-id-x"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Y,"amdgpu-no-workgroup-id-y"},
+ {AMDGPUFunctionArgInfo::WORKGROUP_ID_Z,"amdgpu-no-workgroup-id-z"}
};
- for (auto InputID : InputRegs) {
+ for (auto Attr : ImplicitAttrs) {
const ArgDescriptor *OutgoingArg;
const TargetRegisterClass *ArgRC;
LLT ArgTy;
+ AMDGPUFunctionArgInfo::PreloadedValue InputID = Attr.first;
+
+ // If the callee does not use the attribute value, skip copying the value.
+ if (CLI.CB->hasFnAttr(Attr.second))
+ continue;
+
std::tie(OutgoingArg, ArgRC, ArgTy) =
CalleeArgInfo->getPreloadedValue(InputID);
if (!OutgoingArg)
@@ -2780,11 +2823,14 @@ void SITargetLowering::passSpecialInputs(
if (IncomingArg) {
InputReg = loadInputValue(DAG, ArgRC, ArgVT, DL, *IncomingArg);
- } else {
+ } else if (InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR) {
// The implicit arg ptr is special because it doesn't have a corresponding
// input for kernels, and is computed from the kernarg segment pointer.
- assert(InputID == AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
InputReg = getImplicitArgPtr(DAG, DL);
+ } else {
+ // We may have proven the input wasn't needed, although the ABI is
+ // requiring it. We just need to allocate the register appropriately.
+ InputReg = DAG.getUNDEF(ArgVT);
}
if (OutgoingArg->isRegister()) {
@@ -2827,11 +2873,17 @@ void SITargetLowering::passSpecialInputs(
SDValue InputReg;
SDLoc SL;
+ const bool NeedWorkItemIDX = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-x");
+ const bool NeedWorkItemIDY = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-y");
+ const bool NeedWorkItemIDZ = !CLI.CB->hasFnAttr("amdgpu-no-workitem-id-z");
+
// If incoming ids are not packed we need to pack them.
- if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX)
+ if (IncomingArgX && !IncomingArgX->isMasked() && CalleeArgInfo->WorkItemIDX &&
+ NeedWorkItemIDX)
InputReg = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgX);
- if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY) {
+ if (IncomingArgY && !IncomingArgY->isMasked() && CalleeArgInfo->WorkItemIDY &&
+ NeedWorkItemIDY) {
SDValue Y = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgY);
Y = DAG.getNode(ISD::SHL, SL, MVT::i32, Y,
DAG.getShiftAmountConstant(10, MVT::i32, SL));
@@ -2839,7 +2891,8 @@ void SITargetLowering::passSpecialInputs(
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Y) : Y;
}
- if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ) {
+ if (IncomingArgZ && !IncomingArgZ->isMasked() && CalleeArgInfo->WorkItemIDZ &&
+ NeedWorkItemIDZ) {
SDValue Z = loadInputValue(DAG, ArgRC, MVT::i32, DL, *IncomingArgZ);
Z = DAG.getNode(ISD::SHL, SL, MVT::i32, Z,
DAG.getShiftAmountConstant(20, MVT::i32, SL));
@@ -2847,7 +2900,7 @@ void SITargetLowering::passSpecialInputs(
DAG.getNode(ISD::OR, SL, MVT::i32, InputReg, Z) : Z;
}
- if (!InputReg.getNode()) {
+ if (!InputReg && (NeedWorkItemIDX || NeedWorkItemIDY || NeedWorkItemIDZ)) {
// Workitem ids are already packed, any of present incoming arguments
// will carry all required fields.
ArgDescriptor IncomingArg = ArgDescriptor::createArg(
@@ -2858,13 +2911,17 @@ void SITargetLowering::passSpecialInputs(
}
if (OutgoingArg->isRegister()) {
- RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+ if (InputReg)
+ RegsToPass.emplace_back(OutgoingArg->getRegister(), InputReg);
+
CCInfo.AllocateReg(OutgoingArg->getRegister());
} else {
unsigned SpecialArgOffset = CCInfo.AllocateStack(4, Align(4));
- SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
- SpecialArgOffset);
- MemOpChains.push_back(ArgStore);
+ if (InputReg) {
+ SDValue ArgStore = storeStackInputValue(DAG, DL, Chain, InputReg,
+ SpecialArgOffset);
+ MemOpChains.push_back(ArgStore);
+ }
}
}
@@ -4091,7 +4148,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
}
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
- if (TRI->getRegSizeInBits(*Src2RC) == 64) {
+ unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
+ assert(WaveSize == 64 || WaveSize == 32);
+
+ if (WaveSize == 64) {
if (ST.hasScalarCompareEq64()) {
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
.addReg(Src2.getReg())
@@ -4121,8 +4181,13 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
- BuildMI(*BB, MII, DL, TII->get(AMDGPU::COPY), CarryDest.getReg())
- .addReg(AMDGPU::SCC);
+ unsigned SelOpc =
+ (WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
+
+ BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
+ .addImm(-1)
+ .addImm(0);
+
MI.eraseFromParent();
return BB;
}
@@ -4261,6 +4326,13 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::V_ADDC_U32_e32:
+ case AMDGPU::V_SUBB_U32_e32:
+ case AMDGPU::V_SUBBREV_U32_e32:
+ // These instructions have an implicit use of vcc which counts towards the
+ // constant bus limit.
+ TII->legalizeOperands(MI);
+ return BB;
case AMDGPU::DS_GWS_INIT:
case AMDGPU::DS_GWS_SEMA_BR:
case AMDGPU::DS_GWS_BARRIER:
@@ -4818,7 +4890,7 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N,
}
if (const ConstantSDNode *Arg = dyn_cast<ConstantSDNode>(Src)) {
// (ballot 0) -> 0
- if (Arg->isNullValue())
+ if (Arg->isZero())
return DAG.getConstant(0, SL, VT);
// (ballot 1) -> EXEC/EXEC_LO
@@ -5266,9 +5338,18 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr(
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
- SDValue QueuePtr = CreateLiveInRegister(
- DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+
+ SDValue QueuePtr;
+ if (UserSGPR == AMDGPU::NoRegister) {
+ // We probably are in a function incorrectly marked with
+ // amdgpu-no-queue-ptr. This is undefined. We don't want to delete the trap,
+ // so just use a null pointer.
+ QueuePtr = DAG.getConstant(0, SL, MVT::i64);
+ } else {
+ QueuePtr = CreateLiveInRegister(
+ DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
+ }
+
SDValue SGPR01 = DAG.getRegister(AMDGPU::SGPR0_SGPR1, MVT::i64);
SDValue ToReg = DAG.getCopyToReg(Chain, SL, SGPR01,
QueuePtr, SDValue());
@@ -5345,7 +5426,11 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Register UserSGPR = Info->getQueuePtrUserSGPR();
- assert(UserSGPR != AMDGPU::NoRegister);
+ if (UserSGPR == AMDGPU::NoRegister) {
+ // We probably are in a function incorrectly marked with
+ // amdgpu-no-queue-ptr. This is undefined.
+ return DAG.getUNDEF(MVT::i32);
+ }
SDValue QueuePtr = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, UserSGPR, MVT::i64);
@@ -5936,6 +6021,9 @@ static SDValue constructRetValue(SelectionDAG &DAG,
EVT LegalReqRetVT = ReqRetVT;
if (!ReqRetVT.isVector()) {
+ if (!Data.getValueType().isInteger())
+ Data = DAG.getNode(ISD::BITCAST, DL,
+ Data.getValueType().changeTypeToInteger(), Data);
Data = DAG.getNode(ISD::TRUNCATE, DL, ReqRetVT.changeTypeToInteger(), Data);
} else {
// We need to widen the return vector to a legal type
@@ -6124,7 +6212,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (MIPMappingInfo) {
if (auto *ConstantLod = dyn_cast<ConstantSDNode>(
Op.getOperand(ArgOffset + Intr->MipIndex))) {
- if (ConstantLod->isNullValue()) {
+ if (ConstantLod->isZero()) {
IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip
VAddrEnd--; // remove 'mip'
}
@@ -6659,7 +6747,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// intrinsic has the numerator as the first operand to match a normal
// division operation.
- SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator;
+ SDValue Src0 = Param->isAllOnes() ? Numerator : Denominator;
return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
Denominator, Numerator);
@@ -6793,7 +6881,7 @@ static void updateBufferMMO(MachineMemOperand *MMO, SDValue VOffset,
}
if (VIndex && (!isa<ConstantSDNode>(VIndex) ||
- !cast<ConstantSDNode>(VIndex)->isNullValue())) {
+ !cast<ConstantSDNode>(VIndex)->isZero())) {
// The strided index component of the address is not known to be zero, so we
// cannot represent it in the MMO. Give up.
MMO->setValue((Value *)nullptr);
@@ -7341,7 +7429,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
case Intrinsic::amdgcn_image_bvh_intersect_ray: {
- SDLoc DL(Op);
MemSDNode *M = cast<MemSDNode>(Op);
SDValue NodePtr = M->getOperand(2);
SDValue RayExtent = M->getOperand(3);
@@ -7360,12 +7447,27 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return SDValue();
}
- bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
- bool Is64 = NodePtr.getValueType() == MVT::i64;
- unsigned Opcode = IsA16 ? Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa
- : Is64 ? AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa
- : AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa;
+ const bool IsA16 = RayDir.getValueType().getVectorElementType() == MVT::f16;
+ const bool Is64 = NodePtr.getValueType() == MVT::i64;
+ const unsigned NumVDataDwords = 4;
+ const unsigned NumVAddrDwords = IsA16 ? (Is64 ? 9 : 8) : (Is64 ? 12 : 11);
+ const bool UseNSA = Subtarget->hasNSAEncoding() &&
+ NumVAddrDwords <= Subtarget->getNSAMaxSize();
+ const unsigned BaseOpcodes[2][2] = {
+ {AMDGPU::IMAGE_BVH_INTERSECT_RAY, AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16},
+ {AMDGPU::IMAGE_BVH64_INTERSECT_RAY,
+ AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16}};
+ int Opcode;
+ if (UseNSA) {
+ Opcode = AMDGPU::getMIMGOpcode(BaseOpcodes[Is64][IsA16],
+ AMDGPU::MIMGEncGfx10NSA, NumVDataDwords,
+ NumVAddrDwords);
+ } else {
+ Opcode = AMDGPU::getMIMGOpcode(
+ BaseOpcodes[Is64][IsA16], AMDGPU::MIMGEncGfx10Default, NumVDataDwords,
+ PowerOf2Ceil(NumVAddrDwords));
+ }
+ assert(Opcode != -1);
SmallVector<SDValue, 16> Ops;
@@ -7405,6 +7507,20 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
packLanes(RayOrigin, true);
packLanes(RayDir, true);
packLanes(RayInvDir, false);
+
+ if (!UseNSA) {
+ // Build a single vector containing all the operands so far prepared.
+ if (NumVAddrDwords > 8) {
+ SDValue Undef = DAG.getUNDEF(MVT::i32);
+ Ops.append(16 - Ops.size(), Undef);
+ }
+ assert(Ops.size() == 8 || Ops.size() == 16);
+ SDValue MergedOps = DAG.getBuildVector(
+ Ops.size() == 16 ? MVT::v16i32 : MVT::v8i32, DL, Ops);
+ Ops.clear();
+ Ops.push_back(MergedOps);
+ }
+
Ops.push_back(TDescr);
if (IsA16)
Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
@@ -7610,7 +7726,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(0) // Chain
};
- unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
+ unsigned Opc = Done->isZero() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
@@ -8241,6 +8357,16 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Cond = Op.getOperand(0);
+ if (Subtarget->hasScalarCompareEq64() && Op->getOperand(0)->hasOneUse() &&
+ !Op->isDivergent()) {
+ if (VT == MVT::i64)
+ return Op;
+ SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(1));
+ SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(2));
+ return DAG.getNode(ISD::BITCAST, DL, VT,
+ DAG.getSelect(DL, MVT::i64, Cond, LHS, RHS));
+ }
+
SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
SDValue One = DAG.getConstant(1, DL, MVT::i32);
@@ -9358,7 +9484,8 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (CRHS) {
if (SDValue Split
- = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR, LHS, CRHS))
+ = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR,
+ N->getOperand(0), CRHS))
return Split;
}
@@ -9445,7 +9572,7 @@ SDValue SITargetLowering::performClassCombine(SDNode *N,
// fp_class x, 0 -> false
if (const ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Mask)) {
- if (CMask->isNullValue())
+ if (CMask->isZero())
return DAG.getConstant(0, SDLoc(N), MVT::i1);
}
@@ -10348,7 +10475,7 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
return getMad64_32(DAG, SL, VT, MulLHS, MulRHS, AddRHS, false);
}
- if (numBitsSigned(MulLHS, DAG) < 32 && numBitsSigned(MulRHS, DAG) < 32) {
+ if (numBitsSigned(MulLHS, DAG) <= 32 && numBitsSigned(MulRHS, DAG) <= 32) {
MulLHS = DAG.getSExtOrTrunc(MulLHS, SL, MVT::i32);
MulRHS = DAG.getSExtOrTrunc(MulRHS, SL, MVT::i32);
AddRHS = DAG.getSExtOrTrunc(AddRHS, SL, MVT::i64);
@@ -10434,7 +10561,7 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
if (LHS.getOpcode() == ISD::SUBCARRY) {
// sub (subcarry x, 0, cc), y => subcarry x, y, cc
auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
- if (!C || !C->isNullValue())
+ if (!C || !C->isZero())
return SDValue();
SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
@@ -10657,20 +10784,20 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
// setcc (sext from i1 cc), -1, eq|sle|uge) => cc
// setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1
// setcc (sext from i1 cc), 0, ne|ugt|slt) => cc
- if ((CRHS->isAllOnesValue() &&
+ if ((CRHS->isAllOnes() &&
(CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) ||
- (CRHS->isNullValue() &&
+ (CRHS->isZero() &&
(CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE)))
return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0),
DAG.getConstant(-1, SL, MVT::i1));
- if ((CRHS->isAllOnesValue() &&
+ if ((CRHS->isAllOnes() &&
(CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) ||
- (CRHS->isNullValue() &&
+ (CRHS->isZero() &&
(CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT)))
return LHS.getOperand(0);
}
- uint64_t CRHSVal = CRHS->getZExtValue();
+ const APInt &CRHSVal = CRHS->getAPIntValue();
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
LHS.getOpcode() == ISD::SELECT &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
@@ -10682,8 +10809,8 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
// setcc (select cc, CT, CF), CF, ne => cc
// setcc (select cc, CT, CF), CT, ne => xor cc, -1
// setcc (select cc, CT, CF), CT, eq => cc
- uint64_t CT = LHS.getConstantOperandVal(1);
- uint64_t CF = LHS.getConstantOperandVal(2);
+ const APInt &CT = LHS.getConstantOperandAPInt(1);
+ const APInt &CF = LHS.getConstantOperandAPInt(2);
if ((CF == CRHSVal && CC == ISD::SETEQ) ||
(CT == CRHSVal && CC == ISD::SETNE))
@@ -10747,7 +10874,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
// cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
// cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
if (auto *C = dyn_cast<ConstantSDNode>(Shift.getOperand(1))) {
- Shift = DAG.getZExtOrTrunc(Shift.getOperand(0),
+ SDValue Shifted = DAG.getZExtOrTrunc(Shift.getOperand(0),
SDLoc(Shift.getOperand(0)), MVT::i32);
unsigned ShiftOffset = 8 * Offset;
@@ -10758,7 +10885,7 @@ SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
if (ShiftOffset < 32 && (ShiftOffset % 8) == 0) {
return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + ShiftOffset / 8, SL,
- MVT::f32, Shift);
+ MVT::f32, Shifted);
}
}
}
@@ -12086,6 +12213,25 @@ static bool fpModeMatchesGlobalFPAtomicMode(const AtomicRMWInst *RMW) {
TargetLowering::AtomicExpansionKind
SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+
+ auto ReportUnsafeHWInst = [&](TargetLowering::AtomicExpansionKind Kind) {
+ OptimizationRemarkEmitter ORE(RMW->getFunction());
+ LLVMContext &Ctx = RMW->getFunction()->getContext();
+ SmallVector<StringRef> SSNs;
+ Ctx.getSyncScopeNames(SSNs);
+ auto MemScope = SSNs[RMW->getSyncScopeID()].empty()
+ ? "system"
+ : SSNs[RMW->getSyncScopeID()];
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "Passed", RMW)
+ << "Hardware instruction generated for atomic "
+ << RMW->getOperationName(RMW->getOperation())
+ << " operation at memory scope " << MemScope
+ << " due to an unsafe request.";
+ });
+ return Kind;
+ };
+
switch (RMW->getOperation()) {
case AtomicRMWInst::FAdd: {
Type *Ty = RMW->getType();
@@ -12120,28 +12266,30 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
SSID == RMW->getContext().getOrInsertSyncScopeID("one-as"))
return AtomicExpansionKind::CmpXChg;
- return AtomicExpansionKind::None;
+ return ReportUnsafeHWInst(AtomicExpansionKind::None);
}
if (AS == AMDGPUAS::FLAT_ADDRESS)
return AtomicExpansionKind::CmpXChg;
- return RMW->use_empty() ? AtomicExpansionKind::None
+ return RMW->use_empty() ? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}
// DS FP atomics do repect the denormal mode, but the rounding mode is fixed
// to round-to-nearest-even.
// The only exception is DS_ADD_F64 which never flushes regardless of mode.
- if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomics()) {
+ if (AS == AMDGPUAS::LOCAL_ADDRESS && Subtarget->hasLDSFPAtomicAdd()) {
if (!Ty->isDoubleTy())
return AtomicExpansionKind::None;
- return (fpModeMatchesGlobalFPAtomicMode(RMW) ||
- RMW->getFunction()
- ->getFnAttribute("amdgpu-unsafe-fp-atomics")
- .getValueAsString() == "true")
- ? AtomicExpansionKind::None
+ if (fpModeMatchesGlobalFPAtomicMode(RMW))
+ return AtomicExpansionKind::None;
+
+ return RMW->getFunction()
+ ->getFnAttribute("amdgpu-unsafe-fp-atomics")
+ .getValueAsString() == "true"
+ ? ReportUnsafeHWInst(AtomicExpansionKind::None)
: AtomicExpansionKind::CmpXChg;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index f3d34267a81d..1e48c96ad3c8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -16,6 +16,7 @@
#include "AMDGPUISelLowering.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
namespace llvm {
@@ -267,7 +268,7 @@ public:
Instruction *I = nullptr) const override;
bool canMergeStoresTo(unsigned AS, EVT MemVT,
- const SelectionDAG &DAG) const override;
+ const MachineFunction &MF) const override;
bool allowsMisalignedMemoryAccessesImpl(
unsigned Size, unsigned AddrSpace, Align Alignment,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 7ba20eb6027b..125f006a1d1d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -58,6 +58,8 @@ enum HardClauseType {
// Internal instructions, which are allowed in the middle of a hard clause,
// except for s_waitcnt.
HARDCLAUSE_INTERNAL,
+ // Meta instructions that do not result in any ISA like KILL.
+ HARDCLAUSE_IGNORE,
// Instructions that are not allowed in a hard clause: SALU, export, branch,
// message, GDS, s_waitcnt and anything else not mentioned above.
HARDCLAUSE_ILLEGAL,
@@ -100,6 +102,8 @@ public:
// It's safe to treat the rest as illegal.
if (MI.getOpcode() == AMDGPU::S_NOP)
return HARDCLAUSE_INTERNAL;
+ if (MI.isMetaInstruction())
+ return HARDCLAUSE_IGNORE;
return HARDCLAUSE_ILLEGAL;
}
@@ -112,25 +116,25 @@ public:
// The last non-internal instruction in the clause.
MachineInstr *Last = nullptr;
// The length of the clause including any internal instructions in the
- // middle or after the end of the clause.
+ // middle (but not at the end) of the clause.
unsigned Length = 0;
+ // Internal instructions at the and of a clause should not be included in
+ // the clause. Count them in TrailingInternalLength until a new memory
+ // instruction is added.
+ unsigned TrailingInternalLength = 0;
// The base operands of *Last.
SmallVector<const MachineOperand *, 4> BaseOps;
};
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
- // Get the size of the clause excluding any internal instructions at the
- // end.
- unsigned Size =
- std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
- if (Size < 2)
+ if (CI.First == CI.Last)
return false;
- assert(Size <= 64 && "Hard clause is too long!");
+ assert(CI.Length <= 64 && "Hard clause is too long!");
auto &MBB = *CI.First->getParent();
auto ClauseMI =
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
- .addImm(Size - 1);
+ .addImm(CI.Length - 1);
finalizeBundle(MBB, ClauseMI->getIterator(),
std::next(CI.Last->getIterator()));
return true;
@@ -168,6 +172,7 @@ public:
if (CI.Length == 64 ||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
+ Type != HARDCLAUSE_IGNORE &&
(Type != CI.Type ||
// Note that we lie to shouldClusterMemOps about the size of the
// cluster. When shouldClusterMemOps is called from the machine
@@ -182,14 +187,20 @@ public:
if (CI.Length) {
// Extend the current clause.
- ++CI.Length;
- if (Type != HARDCLAUSE_INTERNAL) {
- CI.Last = &MI;
- CI.BaseOps = std::move(BaseOps);
+ if (Type != HARDCLAUSE_IGNORE) {
+ if (Type == HARDCLAUSE_INTERNAL) {
+ ++CI.TrailingInternalLength;
+ } else {
+ ++CI.Length;
+ CI.Length += CI.TrailingInternalLength;
+ CI.TrailingInternalLength = 0;
+ CI.Last = &MI;
+ CI.BaseOps = std::move(BaseOps);
+ }
}
} else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
// Start a new clause.
- CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
+ CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 7d6f79922d2e..f4e5771d2a2a 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -73,7 +73,7 @@ public:
// Class of object that encapsulates latest instruction counter score
// associated with the operand. Used for determining whether
-// s_waitcnt instruction needs to be emited.
+// s_waitcnt instruction needs to be emitted.
#define CNT_MASK(t) (1u << (t))
@@ -963,6 +963,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
+ MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
@@ -1686,17 +1687,13 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
bool HaveScalarStores = false;
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
- ++BI) {
- MachineBasicBlock &MBB = *BI;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;
- ++I) {
- if (!HaveScalarStores && TII->isScalarStore(*I))
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!HaveScalarStores && TII->isScalarStore(MI))
HaveScalarStores = true;
- if (I->getOpcode() == AMDGPU::S_ENDPGM ||
- I->getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
+ if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG)
EndPgmBlocks.push_back(&MBB);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7ab0f7a100c5..4a928123b68f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -19,8 +19,10 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -108,7 +110,7 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA) const {
- if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI)) {
+ if (isVOP1(MI) || isVOP2(MI) || isVOP3(MI) || isSDWA(MI) || isSALU(MI)) {
// Normally VALU use of exec would block the rematerialization, but that
// is OK in this case to have an implicit exec read as all VALU do.
// We really want all of the generic logic for this except for this.
@@ -116,6 +118,10 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
// Another potential implicit use is mode register. The core logic of
// the RA will not attempt rematerialization if mode is set anywhere
// in the function, otherwise it is safe since mode is not changed.
+
+ // There is difference to generic method which does not allow
+ // rematerialization if there are virtual register uses. We allow this,
+ // therefore this method includes SOP instructions as well.
return !MI.hasImplicitDef() &&
MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses() &&
!MI.mayRaiseFPException();
@@ -1637,10 +1643,20 @@ void SIInstrInfo::insertReturn(MachineBasicBlock &MBB) const {
unsigned SIInstrInfo::getNumWaitStates(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- default: return 1; // FIXME: Do wait states equal cycles?
+ default:
+ if (MI.isMetaInstruction())
+ return 0;
+ return 1; // FIXME: Do wait states equal cycles?
case AMDGPU::S_NOP:
return MI.getOperand(0).getImm() + 1;
+
+ // FIXME: Any other pseudo instruction?
+ // SI_RETURN_TO_EPILOG is a fallthrough to code outside of the function. The
+ // hazard, even if one exist, won't really be visible. Should we handle it?
+ case AMDGPU::SI_MASKED_UNREACHABLE:
+ case AMDGPU::WAVE_BARRIER:
+ return 0;
}
}
@@ -1889,7 +1905,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addImm(AMDGPU::VGPRIndexMode::DST_ENABLE);
SetOn->getOperand(3).setIsUndef();
- const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect);
+ const MCInstrDesc &OpDesc = get(AMDGPU::V_MOV_B32_indirect_write);
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, OpDesc)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
@@ -1929,11 +1945,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addImm(AMDGPU::VGPRIndexMode::SRC0_ENABLE);
SetOn->getOperand(3).setIsUndef();
- BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32))
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_indirect_read))
.addDef(Dst)
.addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
- .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0))
- .addReg(AMDGPU::M0, RegState::Implicit);
+ .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
MachineInstr *SetOff = BuildMI(MBB, MI, DL, get(AMDGPU::S_SET_GPR_IDX_OFF));
@@ -2208,15 +2223,17 @@ MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
return MI.getOperand(0).getMBB();
}
-unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &DestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const {
+void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &DestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset,
+ RegScavenger *RS) const {
assert(RS && "RegScavenger required for long branching");
assert(MBB.empty() &&
"new block should be inserted for expanding unconditional branch");
assert(MBB.pred_size() == 1);
+ assert(RestoreBB.empty() &&
+ "restore block should be inserted for restoring clobbered registers");
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -2253,14 +2270,6 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
.addReg(PCReg);
- auto ComputeBlockSize = [](const TargetInstrInfo *TII,
- const MachineBasicBlock &MBB) {
- unsigned Size = 0;
- for (const MachineInstr &MI : MBB)
- Size += TII->getInstSizeInBytes(MI);
- return Size;
- };
-
// FIXME: If spilling is necessary, this will fail because this scavenger has
// no emergency stack slots. It is non-trivial to spill in this situation,
// because the restore code needs to be specially placed after the
@@ -2299,22 +2308,34 @@ unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->enterBasicBlockEnd(MBB);
Register Scav = RS->scavengeRegisterBackwards(
- AMDGPU::SReg_64RegClass,
- MachineBasicBlock::iterator(GetPC), false, 0);
- MRI.replaceRegWith(PCReg, Scav);
- MRI.clearVirtRegs();
- RS->setRegUsed(Scav);
+ AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
+ /* RestoreAfter */ false, 0, /* AllowSpill */ false);
+ if (Scav) {
+ RS->setRegUsed(Scav);
+ MRI.replaceRegWith(PCReg, Scav);
+ MRI.clearVirtRegs();
+ } else {
+ // As SGPR needs VGPR to be spilled, we reuse the slot of temporary VGPR for
+ // SGPR spill.
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ TRI->spillEmergencySGPR(GetPC, RestoreBB, AMDGPU::SGPR0_SGPR1, RS);
+ MRI.replaceRegWith(PCReg, AMDGPU::SGPR0_SGPR1);
+ MRI.clearVirtRegs();
+ }
+ MCSymbol *DestLabel = Scav ? DestBB.getSymbol() : RestoreBB.getSymbol();
// Now, the distance could be defined.
auto *Offset = MCBinaryExpr::createSub(
- MCSymbolRefExpr::create(DestBB.getSymbol(), MCCtx),
+ MCSymbolRefExpr::create(DestLabel, MCCtx),
MCSymbolRefExpr::create(PostGetPCLabel, MCCtx), MCCtx);
// Add offset assignments.
auto *Mask = MCConstantExpr::create(0xFFFFFFFFULL, MCCtx);
OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx));
auto *ShAmt = MCConstantExpr::create(32, MCCtx);
OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx));
- return ComputeBlockSize(this, MBB);
+
+ return;
}
unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
@@ -2443,16 +2464,15 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- MachineBasicBlock::iterator I = MBB.getFirstTerminator();
-
unsigned Count = 0;
unsigned RemovedSize = 0;
- while (I != MBB.end()) {
- MachineBasicBlock::iterator Next = std::next(I);
- RemovedSize += getInstSizeInBytes(*I);
- I->eraseFromParent();
- ++Count;
- I = Next;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB.terminators())) {
+ // Skip over artificial terminators when removing instructions.
+ if (MI.isBranch() || MI.isReturn()) {
+ RemovedSize += getInstSizeInBytes(MI);
+ MI.eraseFromParent();
+ ++Count;
+ }
}
if (BytesRemoved)
@@ -2691,18 +2711,11 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
}
}
-bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) const {
+bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case AMDGPU::V_MOV_B32_e32:
case AMDGPU::V_MOV_B32_e64:
- case AMDGPU::V_MOV_B64_PSEUDO: {
- // If there are additional implicit register operands, this may be used for
- // register indexing so the source register operand isn't simply copied.
- unsigned NumOps = MI.getDesc().getNumOperands() +
- MI.getDesc().getNumImplicitUses();
-
- return MI.getNumOperands() == NumOps;
- }
+ case AMDGPU::V_MOV_B64_PSEUDO:
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::COPY:
@@ -3069,16 +3082,24 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
return false;
}
-static int64_t getFoldableImm(const MachineOperand* MO) {
+static bool getFoldableImm(Register Reg, const MachineRegisterInfo &MRI,
+ int64_t &Imm) {
+ if (Reg.isPhysical())
+ return false;
+ auto *Def = MRI.getUniqueVRegDef(Reg);
+ if (Def && SIInstrInfo::isFoldableCopy(*Def) && Def->getOperand(1).isImm()) {
+ Imm = Def->getOperand(1).getImm();
+ return true;
+ }
+ return false;
+}
+
+static bool getFoldableImm(const MachineOperand *MO, int64_t &Imm) {
if (!MO->isReg())
return false;
const MachineFunction *MF = MO->getParent()->getParent()->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
- auto Def = MRI.getUniqueVRegDef(MO->getReg());
- if (Def && Def->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
- Def->getOperand(1).isImm())
- return Def->getOperand(1).getImm();
- return AMDGPU::NoRegister;
+ return getFoldableImm(MO->getReg(), MRI, Imm);
}
static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
@@ -3093,9 +3114,9 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
}
}
-MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
- MachineInstr &MI,
- LiveVariables *LV) const {
+MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
unsigned Opc = MI.getOpcode();
bool IsF16 = false;
bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
@@ -3145,50 +3166,58 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
MachineInstrBuilder MIB;
+ MachineBasicBlock &MBB = *MI.getParent();
if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 &&
// If we have an SGPR input, we will violate the constant bus restriction.
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
- !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
- if (auto Imm = getFoldableImm(Src2)) {
+ !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
+ int64_t Imm;
+ if (getFoldableImm(Src2, Imm)) {
unsigned NewOpc =
IsFMA ? (IsF16 ? AMDGPU::V_FMAAK_F16 : AMDGPU::V_FMAAK_F32)
: (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
if (pseudoToMCOpcode(NewOpc) != -1) {
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.add(*Src0)
.add(*Src1)
.addImm(Imm);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
}
unsigned NewOpc = IsFMA
? (IsF16 ? AMDGPU::V_FMAMK_F16 : AMDGPU::V_FMAMK_F32)
: (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
- if (auto Imm = getFoldableImm(Src1)) {
+ if (getFoldableImm(Src1, Imm)) {
if (pseudoToMCOpcode(NewOpc) != -1) {
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.add(*Src0)
.addImm(Imm)
.add(*Src2);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
}
- if (auto Imm = getFoldableImm(Src0)) {
+ if (getFoldableImm(Src0, Imm)) {
if (pseudoToMCOpcode(NewOpc) != -1 &&
isOperandLegal(
MI, AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::src0),
Src1)) {
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.add(*Src1)
.addImm(Imm)
.add(*Src2);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
}
@@ -3201,7 +3230,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
- MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(*Dst)
.addImm(Src0Mods ? Src0Mods->getImm() : 0)
.add(*Src0)
@@ -3212,6 +3241,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
.addImm(Clamp ? Clamp->getImm() : 0)
.addImm(Omod ? Omod->getImm() : 0);
updateLiveVariables(LV, MI, *MIB);
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
return MIB;
}
@@ -3382,6 +3413,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
switch (OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_IMM_V2FP32:
@@ -3420,6 +3452,7 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
// This suffers the same problem as the scalar 16-bit cases.
return AMDGPU::isInlinableIntLiteralV216(Imm);
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_AC_FP16: {
if (isInt<16>(Imm) || isUInt<16>(Imm)) {
@@ -3440,6 +3473,9 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
uint32_t Trunc = static_cast<uint32_t>(Imm);
return AMDGPU::isInlinableLiteralV216(Trunc, ST.hasInv2PiInlineImm());
}
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16:
+ return false;
default:
llvm_unreachable("invalid bitwidth");
}
@@ -3566,11 +3602,13 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
// Additional verification is needed for sdst/src2.
return true;
}
- case AMDGPU::V_MAC_F32_e64:
case AMDGPU::V_MAC_F16_e64:
- case AMDGPU::V_FMAC_F32_e64:
+ case AMDGPU::V_MAC_F32_e64:
+ case AMDGPU::V_MAC_LEGACY_F32_e64:
case AMDGPU::V_FMAC_F16_e64:
+ case AMDGPU::V_FMAC_F32_e64:
case AMDGPU::V_FMAC_F64_e64:
+ case AMDGPU::V_FMAC_LEGACY_F32_e64:
if (!Src2->isReg() || !RI.isVGPR(MRI, Src2->getReg()) ||
hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
return false;
@@ -3813,6 +3851,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
break;
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
break;
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
@@ -4472,20 +4511,20 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32;
case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32;
- case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
- case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
- case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
- case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
- case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
- case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
- case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e32;
- case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e32;
- case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e32;
- case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
- case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
- case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
- case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
- case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
+ case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e64;
+ case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e64;
+ case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e64;
+ case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e64;
+ case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e64;
+ case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e64;
+ case AMDGPU::S_CMP_EQ_U32: return AMDGPU::V_CMP_EQ_U32_e64;
+ case AMDGPU::S_CMP_LG_U32: return AMDGPU::V_CMP_NE_U32_e64;
+ case AMDGPU::S_CMP_GT_U32: return AMDGPU::V_CMP_GT_U32_e64;
+ case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e64;
+ case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e64;
+ case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e64;
+ case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e64;
+ case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e64;
case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
@@ -4963,13 +5002,13 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI,
continue;
}
- if (RI.hasAGPRs(MRI.getRegClass(MO.getReg())) &&
+ if (RI.hasAGPRs(RI.getRegClassForReg(MRI, MO.getReg())) &&
!isOperandLegal(MI, Idx, &MO)) {
legalizeOpWithMove(MI, Idx);
continue;
}
- if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+ if (!RI.isSGPRClass(RI.getRegClassForReg(MRI, MO.getReg())))
continue; // VGPRs are legal
// We can use one SGPR in each VOP3 instruction prior to GFX10
@@ -5165,8 +5204,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
return;
Register DstReg = MRI.createVirtualRegister(DstRC);
- MachineInstr *Copy =
- BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
+ auto Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg).add(Op);
Op.setReg(DstReg);
Op.setSubReg(0);
@@ -5188,7 +5226,7 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
}
if (!RI.isSGPRClass(DstRC) && !Copy->readsRegister(AMDGPU::EXEC, &RI) &&
!ImpDef)
- Copy->addOperand(MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
+ Copy.addReg(AMDGPU::EXEC, RegState::Implicit);
}
// Emit the actual waterfall loop, executing the wrapped instruction for each
@@ -5897,18 +5935,18 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
case AMDGPU::S_CBRANCH_SCC0:
- case AMDGPU::S_CBRANCH_SCC1:
- // Clear unused bits of vcc
- if (ST.isWave32())
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B32),
- AMDGPU::VCC_LO)
- .addReg(AMDGPU::EXEC_LO)
- .addReg(AMDGPU::VCC_LO);
- else
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::S_AND_B64),
- AMDGPU::VCC)
- .addReg(AMDGPU::EXEC)
- .addReg(AMDGPU::VCC);
+ case AMDGPU::S_CBRANCH_SCC1: {
+ // Clear unused bits of vcc
+ Register CondReg = Inst.getOperand(1).getReg();
+ bool IsSCC = CondReg == AMDGPU::SCC;
+ Register VCC = RI.getVCC();
+ Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC)
+ .addReg(EXEC)
+ .addReg(IsSCC ? VCC : CondReg);
+ Inst.RemoveOperand(1);
+ }
break;
case AMDGPU::S_BFE_U64:
@@ -6016,12 +6054,43 @@ MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
continue;
case AMDGPU::S_CSELECT_B32:
+ lowerSelect32(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ continue;
case AMDGPU::S_CSELECT_B64:
- lowerSelect(Worklist, Inst, MDT);
+ splitSelect64(Worklist, Inst, MDT);
Inst.eraseFromParent();
continue;
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMP_GT_I32:
+ case AMDGPU::S_CMP_GE_I32:
+ case AMDGPU::S_CMP_LT_I32:
+ case AMDGPU::S_CMP_LE_I32:
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMP_LT_U32:
+ case AMDGPU::S_CMP_LE_U32:
+ case AMDGPU::S_CMP_EQ_U64:
+ case AMDGPU::S_CMP_LG_U64: {
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
+ MachineInstr *NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), NewDesc, CondReg)
+ .add(Inst.getOperand(0))
+ .add(Inst.getOperand(1));
+ legalizeOperands(*NewInstr, MDT);
+ int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC);
+ MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
+ Inst.eraseFromParent();
+ }
+ continue;
}
+
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.
@@ -6167,8 +6236,8 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
return std::make_pair(false, nullptr);
}
-void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT) const {
+void SIInstrInfo::lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -6181,47 +6250,51 @@ void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
MachineOperand &Cond = Inst.getOperand(3);
Register SCCSource = Cond.getReg();
- // Find SCC def, and if that is a copy (SCC = COPY reg) then use reg instead.
- if (!Cond.isUndef()) {
- for (MachineInstr &CandI :
- make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)),
- Inst.getParent()->rend())) {
- if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) !=
- -1) {
- if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
- SCCSource = CandI.getOperand(1).getReg();
- }
- break;
- }
- }
- }
+ bool IsSCC = (SCCSource == AMDGPU::SCC);
// If this is a trivial select where the condition is effectively not SCC
// (SCCSource is a source of copy to SCC), then the select is semantically
// equivalent to copying SCCSource. Hence, there is no need to create
// V_CNDMASK, we can just use that and bail out.
- if ((SCCSource != AMDGPU::SCC) && Src0.isImm() && (Src0.getImm() == -1) &&
- Src1.isImm() && (Src1.getImm() == 0)) {
+ if (!IsSCC && Src0.isImm() && (Src0.getImm() == -1) && Src1.isImm() &&
+ (Src1.getImm() == 0)) {
MRI.replaceRegWith(Dest.getReg(), SCCSource);
return;
}
- const TargetRegisterClass *TC = ST.getWavefrontSize() == 64
- ? &AMDGPU::SReg_64_XEXECRegClass
- : &AMDGPU::SReg_32_XM0_XEXECRegClass;
+ const TargetRegisterClass *TC =
+ RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+
Register CopySCC = MRI.createVirtualRegister(TC);
- if (SCCSource == AMDGPU::SCC) {
- // Insert a trivial select instead of creating a copy, because a copy from
- // SCC would semantically mean just copying a single bit, but we may need
- // the result to be a vector condition mask that needs preserving.
- unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64
- : AMDGPU::S_CSELECT_B32;
- auto NewSelect =
- BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0);
- NewSelect->getOperand(3).setIsUndef(Cond.isUndef());
- } else {
- BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC).addReg(SCCSource);
+ if (IsSCC) {
+ // Now look for the closest SCC def if it is a copy
+ // replacing the SCCSource with the COPY source register
+ bool CopyFound = false;
+ for (MachineInstr &CandI :
+ make_range(std::next(MachineBasicBlock::reverse_iterator(Inst)),
+ Inst.getParent()->rend())) {
+ if (CandI.findRegisterDefOperandIdx(AMDGPU::SCC, false, false, &RI) !=
+ -1) {
+ if (CandI.isCopy() && CandI.getOperand(0).getReg() == AMDGPU::SCC) {
+ BuildMI(MBB, MII, DL, get(AMDGPU::COPY), CopySCC)
+ .addReg(CandI.getOperand(1).getReg());
+ CopyFound = true;
+ }
+ break;
+ }
+ }
+ if (!CopyFound) {
+ // SCC def is not a copy
+ // Insert a trivial select instead of creating a copy, because a copy from
+ // SCC would semantically mean just copying a single bit, but we may need
+ // the result to be a vector condition mask that needs preserving.
+ unsigned Opcode = (ST.getWavefrontSize() == 64) ? AMDGPU::S_CSELECT_B64
+ : AMDGPU::S_CSELECT_B32;
+ auto NewSelect =
+ BuildMI(MBB, MII, DL, get(Opcode), CopySCC).addImm(-1).addImm(0);
+ NewSelect->getOperand(3).setIsUndef(Cond.isUndef());
+ }
}
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -6232,13 +6305,102 @@ void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
.add(Src1) // False
.addImm(0)
.add(Src0) // True
- .addReg(CopySCC);
+ .addReg(IsSCC ? CopySCC : SCCSource);
MRI.replaceRegWith(Dest.getReg(), ResultReg);
legalizeOperands(*UpdatedInst, MDT);
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
+void SIInstrInfo::splitSelect64(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT) const {
+ // Split S_CSELECT_B64 into a pair of S_CSELECT_B32 and lower them
+ // further.
+ const DebugLoc &DL = Inst.getDebugLoc();
+ MachineBasicBlock::iterator MII = Inst;
+ MachineBasicBlock &MBB = *Inst.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ // Get the original operands.
+ MachineOperand &Dest = Inst.getOperand(0);
+ MachineOperand &Src0 = Inst.getOperand(1);
+ MachineOperand &Src1 = Inst.getOperand(2);
+ MachineOperand &Cond = Inst.getOperand(3);
+
+ Register SCCSource = Cond.getReg();
+ bool IsSCC = (SCCSource == AMDGPU::SCC);
+
+ // If this is a trivial select where the condition is effectively not SCC
+ // (SCCSource is a source of copy to SCC), then the select is semantically
+ // equivalent to copying SCCSource. Hence, there is no need to create
+ // V_CNDMASK, we can just use that and bail out.
+ if (!IsSCC && (Src0.isImm() && Src0.getImm() == -1) &&
+ (Src1.isImm() && Src1.getImm() == 0)) {
+ MRI.replaceRegWith(Dest.getReg(), SCCSource);
+ return;
+ }
+
+ // Prepare the split destination.
+ Register FullDestReg = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
+ Register DestSub0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ Register DestSub1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // Split the source operands.
+ const TargetRegisterClass *Src0RC = nullptr;
+ const TargetRegisterClass *Src0SubRC = nullptr;
+ if (Src0.isReg()) {
+ Src0RC = MRI.getRegClass(Src0.getReg());
+ Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+ }
+ const TargetRegisterClass *Src1RC = nullptr;
+ const TargetRegisterClass *Src1SubRC = nullptr;
+ if (Src1.isReg()) {
+ Src1RC = MRI.getRegClass(Src1.getReg());
+ Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
+ }
+ // Split lo.
+ MachineOperand SrcReg0Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
+ MachineOperand SrcReg1Sub0 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub0, Src1SubRC);
+ // Split hi.
+ MachineOperand SrcReg0Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub1, Src0SubRC);
+ MachineOperand SrcReg1Sub1 =
+ buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
+ // Select the lo part.
+ MachineInstr *LoHalf =
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub0)
+ .add(SrcReg0Sub0)
+ .add(SrcReg1Sub0);
+ // Replace the condition operand with the original one.
+ LoHalf->getOperand(3).setReg(SCCSource);
+ Worklist.insert(LoHalf);
+ // Select the hi part.
+ MachineInstr *HiHalf =
+ BuildMI(MBB, MII, DL, get(AMDGPU::S_CSELECT_B32), DestSub1)
+ .add(SrcReg0Sub1)
+ .add(SrcReg1Sub1);
+ // Replace the condition operand with the original one.
+ HiHalf->getOperand(3).setReg(SCCSource);
+ Worklist.insert(HiHalf);
+ // Merge them back to the original 64-bit one.
+ BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
+ .addReg(DestSub0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DestSub1)
+ .addImm(AMDGPU::sub1);
+ MRI.replaceRegWith(Dest.getReg(), FullDestReg);
+
+ // Try to legalize the operands in case we need to swap the order to keep
+ // it valid.
+ legalizeOperands(*LoHalf, MDT);
+ legalizeOperands(*HiHalf, MDT);
+
+ // Move all users of this moved value.
+ addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
+}
+
void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -6823,8 +6985,8 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
MachineInstr &SCCDefInst,
- SetVectorType &Worklist) const {
- bool SCCUsedImplicitly = false;
+ SetVectorType &Worklist,
+ Register NewCond) const {
// Ensure that def inst defines SCC, which is still live.
assert(Op.isReg() && Op.getReg() == AMDGPU::SCC && Op.isDef() &&
@@ -6836,33 +6998,18 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
make_range(std::next(MachineBasicBlock::iterator(SCCDefInst)),
SCCDefInst.getParent()->end())) {
// Check if SCC is used first.
- if (MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI) != -1) {
+ int SCCIdx = MI.findRegisterUseOperandIdx(AMDGPU::SCC, false, &RI);
+ if (SCCIdx != -1) {
if (MI.isCopy()) {
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
Register DestReg = MI.getOperand(0).getReg();
- for (auto &User : MRI.use_nodbg_instructions(DestReg)) {
- if ((User.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO) ||
- (User.getOpcode() == AMDGPU::S_SUB_CO_PSEUDO)) {
- User.getOperand(4).setReg(RI.getVCC());
- Worklist.insert(&User);
- } else if (User.getOpcode() == AMDGPU::V_CNDMASK_B32_e64) {
- User.getOperand(5).setReg(RI.getVCC());
- // No need to add to Worklist.
- }
- }
+ MRI.replaceRegWith(DestReg, NewCond);
CopyToDelete.push_back(&MI);
} else {
- if (MI.getOpcode() == AMDGPU::S_CSELECT_B32 ||
- MI.getOpcode() == AMDGPU::S_CSELECT_B64) {
- // This is an implicit use of SCC and it is really expected by
- // the SCC users to handle.
- // We cannot preserve the edge to the user so add the explicit
- // copy: SCC = COPY VCC.
- // The copy will be cleaned up during the processing of the user
- // in lowerSelect.
- SCCUsedImplicitly = true;
- }
+
+ if (NewCond.isValid())
+ MI.getOperand(SCCIdx).setReg(NewCond);
Worklist.insert(&MI);
}
@@ -6873,12 +7020,6 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
}
for (auto &Copy : CopyToDelete)
Copy->eraseFromParent();
-
- if (SCCUsedImplicitly) {
- BuildMI(*SCCDefInst.getParent(), std::next(SCCDefInst.getIterator()),
- SCCDefInst.getDebugLoc(), get(AMDGPU::COPY), AMDGPU::SCC)
- .addReg(RI.getVCC());
- }
}
// Instructions that use SCC may be converted to VALU instructions. When that
@@ -7171,31 +7312,19 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return Size;
}
- // 4-byte instructions may have a 32-bit literal encoded after them. Check
- // operands that coud ever be literals.
+ // Instructions may have a 32-bit literal encoded after them. Check
+ // operands that could ever be literals.
if (isVALU(MI) || isSALU(MI)) {
- int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
- if (Src0Idx == -1)
- return DescSize; // No operands.
-
- if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
- return isVOP3(MI) ? 12 : (DescSize + 4);
-
- int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
- if (Src1Idx == -1)
+ if (isDPP(MI))
return DescSize;
-
- if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
- return isVOP3(MI) ? 12 : (DescSize + 4);
-
- int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
- if (Src2Idx == -1)
- return DescSize;
-
- if (isLiteralConstantLike(MI.getOperand(Src2Idx), Desc.OpInfo[Src2Idx]))
- return isVOP3(MI) ? 12 : (DescSize + 4);
-
- return DescSize;
+ bool HasLiteral = false;
+ for (int I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
+ if (isLiteralConstant(MI, I)) {
+ HasLiteral = true;
+ break;
+ }
+ }
+ return HasLiteral ? DescSize + 4 : DescSize;
}
// Check whether we have extra NSA words.
@@ -7283,19 +7412,16 @@ void SIInstrInfo::convertNonUniformLoopRegion(
Register BackEdgeReg = MRI.createVirtualRegister(RI.getBoolRC());
MachineInstrBuilder HeaderPHIBuilder =
BuildMI(*(MF), Branch->getDebugLoc(), get(TargetOpcode::PHI), DstReg);
- for (MachineBasicBlock::pred_iterator PI = LoopEntry->pred_begin(),
- E = LoopEntry->pred_end();
- PI != E; ++PI) {
- if (*PI == LoopEnd) {
+ for (MachineBasicBlock *PMBB : LoopEntry->predecessors()) {
+ if (PMBB == LoopEnd) {
HeaderPHIBuilder.addReg(BackEdgeReg);
} else {
- MachineBasicBlock *PMBB = *PI;
Register ZeroReg = MRI.createVirtualRegister(RI.getBoolRC());
materializeImmediate(*PMBB, PMBB->getFirstTerminator(), DebugLoc(),
ZeroReg, 0);
HeaderPHIBuilder.addReg(ZeroReg);
}
- HeaderPHIBuilder.addMBB(*PI);
+ HeaderPHIBuilder.addMBB(PMBB);
}
MachineInstr *HeaderPhi = HeaderPHIBuilder;
MachineInstr *SIIFBREAK = BuildMI(*(MF), Branch->getDebugLoc(),
@@ -7340,6 +7466,20 @@ SIInstrInfo::CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const
return new GCNHazardRecognizer(MF);
}
+// Called during:
+// - pre-RA scheduling and post-RA scheduling
+ScheduleHazardRecognizer *
+SIInstrInfo::CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAGMI *DAG) const {
+ // Borrowed from Arm Target
+ // We would like to restrict this hazard recognizer to only
+ // post-RA scheduling; we can tell that we're post-RA because we don't
+ // track VRegLiveness.
+ if (!DAG->hasVRegLiveness())
+ return new GCNHazardRecognizer(DAG->MF);
+ return TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);
+}
+
std::pair<unsigned, unsigned>
SIInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
return std::make_pair(TF & MO_MASK, TF & ~MO_MASK);
@@ -7919,3 +8059,209 @@ unsigned SIInstrInfo::getDSShaderTypeValue(const MachineFunction &MF) {
return 0;
}
}
+
+bool SIInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
+ if (!MI.getOperand(0).isReg() || MI.getOperand(0).getSubReg())
+ return false;
+
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMP_LT_U32:
+ case AMDGPU::S_CMP_LT_I32:
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMP_GT_I32:
+ case AMDGPU::S_CMP_LE_U32:
+ case AMDGPU::S_CMP_LE_I32:
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMP_GE_I32:
+ case AMDGPU::S_CMP_EQ_U64:
+ case AMDGPU::S_CMP_LG_U64:
+ SrcReg = MI.getOperand(0).getReg();
+ if (MI.getOperand(1).isReg()) {
+ if (MI.getOperand(1).getSubReg())
+ return false;
+ SrcReg2 = MI.getOperand(1).getReg();
+ CmpValue = 0;
+ } else if (MI.getOperand(1).isImm()) {
+ SrcReg2 = Register();
+ CmpValue = MI.getOperand(1).getImm();
+ } else {
+ return false;
+ }
+ CmpMask = ~0;
+ return true;
+ case AMDGPU::S_CMPK_EQ_U32:
+ case AMDGPU::S_CMPK_EQ_I32:
+ case AMDGPU::S_CMPK_LG_U32:
+ case AMDGPU::S_CMPK_LG_I32:
+ case AMDGPU::S_CMPK_LT_U32:
+ case AMDGPU::S_CMPK_LT_I32:
+ case AMDGPU::S_CMPK_GT_U32:
+ case AMDGPU::S_CMPK_GT_I32:
+ case AMDGPU::S_CMPK_LE_U32:
+ case AMDGPU::S_CMPK_LE_I32:
+ case AMDGPU::S_CMPK_GE_U32:
+ case AMDGPU::S_CMPK_GE_I32:
+ SrcReg = MI.getOperand(0).getReg();
+ SrcReg2 = Register();
+ CmpValue = MI.getOperand(1).getImm();
+ CmpMask = ~0;
+ return true;
+ }
+
+ return false;
+}
+
+bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue,
+ const MachineRegisterInfo *MRI) const {
+ if (!SrcReg || SrcReg.isPhysical())
+ return false;
+
+ if (SrcReg2 && !getFoldableImm(SrcReg2, *MRI, CmpValue))
+ return false;
+
+ const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI,
+ this](int64_t ExpectedValue, unsigned SrcSize,
+ bool IsReversable, bool IsSigned) -> bool {
+ // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_ge_u32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_ge_i32 (s_and_b32 $src, 1 << n), 1 << n => s_and_b32 $src, 1 << n
+ // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 1 << n => s_and_b64 $src, 1 << n
+ // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_gt_u32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_gt_i32 (s_and_b32 $src, 1 << n), 0 => s_and_b32 $src, 1 << n
+ // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 0 => s_and_b64 $src, 1 << n
+ //
+ // Signed ge/gt are not used for the sign bit.
+ //
+ // If result of the AND is unused except in the compare:
+ // s_and_b(32|64) $src, 1 << n => s_bitcmp1_b(32|64) $src, n
+ //
+ // s_cmp_eq_u32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n
+ // s_cmp_eq_i32 (s_and_b32 $src, 1 << n), 0 => s_bitcmp0_b32 $src, n
+ // s_cmp_eq_u64 (s_and_b64 $src, 1 << n), 0 => s_bitcmp0_b64 $src, n
+ // s_cmp_lg_u32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n
+ // s_cmp_lg_i32 (s_and_b32 $src, 1 << n), 1 << n => s_bitcmp0_b32 $src, n
+ // s_cmp_lg_u64 (s_and_b64 $src, 1 << n), 1 << n => s_bitcmp0_b64 $src, n
+
+ MachineInstr *Def = MRI->getUniqueVRegDef(SrcReg);
+ if (!Def || Def->getParent() != CmpInstr.getParent())
+ return false;
+
+ if (Def->getOpcode() != AMDGPU::S_AND_B32 &&
+ Def->getOpcode() != AMDGPU::S_AND_B64)
+ return false;
+
+ int64_t Mask;
+ const auto isMask = [&Mask, SrcSize](const MachineOperand *MO) -> bool {
+ if (MO->isImm())
+ Mask = MO->getImm();
+ else if (!getFoldableImm(MO, Mask))
+ return false;
+ Mask &= maxUIntN(SrcSize);
+ return isPowerOf2_64(Mask);
+ };
+
+ MachineOperand *SrcOp = &Def->getOperand(1);
+ if (isMask(SrcOp))
+ SrcOp = &Def->getOperand(2);
+ else if (isMask(&Def->getOperand(2)))
+ SrcOp = &Def->getOperand(1);
+ else
+ return false;
+
+ unsigned BitNo = countTrailingZeros((uint64_t)Mask);
+ if (IsSigned && BitNo == SrcSize - 1)
+ return false;
+
+ ExpectedValue <<= BitNo;
+
+ bool IsReversedCC = false;
+ if (CmpValue != ExpectedValue) {
+ if (!IsReversable)
+ return false;
+ IsReversedCC = CmpValue == (ExpectedValue ^ Mask);
+ if (!IsReversedCC)
+ return false;
+ }
+
+ Register DefReg = Def->getOperand(0).getReg();
+ if (IsReversedCC && !MRI->hasOneNonDBGUse(DefReg))
+ return false;
+
+ for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator();
+ I != E; ++I) {
+ if (I->modifiesRegister(AMDGPU::SCC, &RI) ||
+ I->killsRegister(AMDGPU::SCC, &RI))
+ return false;
+ }
+
+ MachineOperand *SccDef = Def->findRegisterDefOperand(AMDGPU::SCC);
+ SccDef->setIsDead(false);
+ CmpInstr.eraseFromParent();
+
+ if (!MRI->use_nodbg_empty(DefReg)) {
+ assert(!IsReversedCC);
+ return true;
+ }
+
+ // Replace AND with unused result with a S_BITCMP.
+ MachineBasicBlock *MBB = Def->getParent();
+
+ unsigned NewOpc = (SrcSize == 32) ? IsReversedCC ? AMDGPU::S_BITCMP0_B32
+ : AMDGPU::S_BITCMP1_B32
+ : IsReversedCC ? AMDGPU::S_BITCMP0_B64
+ : AMDGPU::S_BITCMP1_B64;
+
+ BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc))
+ .add(*SrcOp)
+ .addImm(BitNo);
+ Def->eraseFromParent();
+
+ return true;
+ };
+
+ switch (CmpInstr.getOpcode()) {
+ default:
+ break;
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMPK_EQ_U32:
+ case AMDGPU::S_CMPK_EQ_I32:
+ return optimizeCmpAnd(1, 32, true, false);
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMPK_GE_U32:
+ return optimizeCmpAnd(1, 32, false, false);
+ case AMDGPU::S_CMP_GE_I32:
+ case AMDGPU::S_CMPK_GE_I32:
+ return optimizeCmpAnd(1, 32, false, true);
+ case AMDGPU::S_CMP_EQ_U64:
+ return optimizeCmpAnd(1, 64, true, false);
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMPK_LG_U32:
+ case AMDGPU::S_CMPK_LG_I32:
+ return optimizeCmpAnd(0, 32, true, false);
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMPK_GT_U32:
+ return optimizeCmpAnd(0, 32, false, false);
+ case AMDGPU::S_CMP_GT_I32:
+ case AMDGPU::S_CMPK_GT_I32:
+ return optimizeCmpAnd(0, 32, false, true);
+ case AMDGPU::S_CMP_LG_U64:
+ return optimizeCmpAnd(0, 64, true, false);
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index fc5e5be03541..dd9ea2b53ca2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -78,8 +78,11 @@ private:
moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
- void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT = nullptr) const;
+ void lowerSelect32(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
+
+ void splitSelect64(SetVectorType &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
void lowerScalarAbs(SetVectorType &Worklist,
MachineInstr &Inst) const;
@@ -122,7 +125,8 @@ private:
void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
MachineInstr &SCCDefInst,
- SetVectorType &Worklist) const;
+ SetVectorType &Worklist,
+ Register NewCond = Register()) const;
void addSCCDefsToVALUWorklist(MachineOperand &Op,
SetVectorType &Worklist) const;
@@ -271,11 +275,10 @@ public:
MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
- unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS = nullptr) const override;
+ void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+ int64_t BrOffset, RegScavenger *RS) const override;
bool analyzeBranchImpl(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
@@ -315,6 +318,14 @@ public:
Register DstReg, ArrayRef<MachineOperand> Cond,
Register TrueReg, Register FalseReg) const;
+ bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
+
+ bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
+ const MachineRegisterInfo *MRI) const override;
+
unsigned getAddressSpaceForPseudoSourceKind(
unsigned Kind) const override;
@@ -322,16 +333,15 @@ public:
areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
const MachineInstr &MIb) const override;
- bool isFoldableCopy(const MachineInstr &MI) const;
+ static bool isFoldableCopy(const MachineInstr &MI);
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
MachineRegisterInfo *MRI) const final;
unsigned getMachineCSELookAheadLimit() const override { return 500; }
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
bool isSchedulingBoundary(const MachineInstr &MI,
const MachineBasicBlock *MBB,
@@ -1036,6 +1046,10 @@ public:
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
+ ScheduleHazardRecognizer *
+ CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAGMI *DAG) const override;
+
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
@@ -1119,6 +1133,8 @@ public:
}
static unsigned getDSShaderTypeValue(const MachineFunction &MF);
+
+ const TargetSchedModel &getSchedModel() const { return SchedModel; }
};
/// \brief Returns true if a reg:subreg pair P has a TRC class
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 25b647d34ec1..8c24268e379e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -333,6 +333,18 @@ def load_glue : PatFrag <(ops node:$ptr), (unindexedload_glue node:$ptr)> {
let IsNonExtLoad = 1;
}
+def atomic_load_8_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+
+def atomic_load_16_glue : PatFrag<(ops node:$ptr),
+ (AMDGPUatomic_ld_glue node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
+
def atomic_load_32_glue : PatFrag<(ops node:$ptr),
(AMDGPUatomic_ld_glue node:$ptr)> {
let IsAtomic = 1;
@@ -423,6 +435,14 @@ def load_align16_local_m0 : PatFrag<(ops node:$ptr),
} // End IsLoad = 1
let IsAtomic = 1, AddressSpaces = LoadAddress_local.AddrSpaces in {
+def atomic_load_8_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_8_glue node:$ptr)> {
+ let MemoryVT = i8;
+}
+def atomic_load_16_local_m0 : PatFrag<(ops node:$ptr),
+ (atomic_load_16_glue node:$ptr)> {
+ let MemoryVT = i16;
+}
def atomic_load_32_local_m0 : PatFrag<(ops node:$ptr),
(atomic_load_32_glue node:$ptr)> {
let MemoryVT = i32;
@@ -509,6 +529,18 @@ def store_align16_local_m0 : PatFrag <(ops node:$value, node:$ptr),
let AddressSpaces = StoreAddress_local.AddrSpaces in {
+def atomic_store_local_8_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i8;
+}
+def atomic_store_local_16_m0 : PatFrag <
+ (ops node:$value, node:$ptr),
+ (AMDGPUatomic_st_glue node:$value, node:$ptr)> {
+ let IsAtomic = 1;
+ let MemoryVT = i16;
+}
def atomic_store_local_32_m0 : PatFrag <
(ops node:$value, node:$ptr),
(AMDGPUatomic_st_glue node:$value, node:$ptr)> {
@@ -527,15 +559,7 @@ def atomic_store_local_64_m0 : PatFrag <
def si_setcc_uniform : PatFrag <
(ops node:$lhs, node:$rhs, node:$cond),
(setcc node:$lhs, node:$rhs, node:$cond), [{
- for (SDNode *Use : N->uses()) {
- if (Use->isMachineOpcode() || Use->getOpcode() != ISD::CopyToReg)
- return false;
-
- unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
- if (Reg != AMDGPU::SCC)
- return false;
- }
- return true;
+ return !N->isDivergent();
}]>;
//===----------------------------------------------------------------------===//
@@ -1181,6 +1205,7 @@ class kimmOperand<ValueType vt> : Operand<vt> {
let OperandType = "OPERAND_KIMM"#vt.Size;
let PrintMethod = "printU"#vt.Size#"ImmOperand";
let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
+ let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm";
}
// 32-bit VALU immediate operand that uses the constant bus.
@@ -1864,8 +1889,8 @@ class getAsm64 <bit HasDst, int NumSrcArgs, bit HasIntClamp, bit HasModifiers,
// Returns the assembly string for the inputs and outputs of a VOP3P
// instruction.
-class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
- bit HasClamp, ValueType DstVT = i32> {
+class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
+ bit HasClamp> {
string dst = "$vdst";
string src0 = !if(!eq(NumSrcArgs, 1), "$src0", "$src0,");
string src1 = !if(!eq(NumSrcArgs, 1), "",
@@ -1883,7 +1908,6 @@ class getAsmVOP3P <bit HasDst, int NumSrcArgs, bit HasModifiers,
class getAsmVOP3OpSel <int NumSrcArgs,
bit HasClamp,
- bit HasOMod,
bit Src0HasMods,
bit Src1HasMods,
bit Src2HasMods> {
@@ -2026,8 +2050,7 @@ class getHasSDWA <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
);
}
-class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
- ValueType Src1VT = i32> {
+class getHasDPP <int NumSrcArgs> {
bit ret = !if(!eq(NumSrcArgs, 3),
0, // NumSrcArgs == 3 - No DPP for VOP3
1);
@@ -2035,14 +2058,14 @@ class getHasDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
class getHasExt64BitDPP <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
- bit ret = !and(getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret,
+ bit ret = !and(getHasDPP<NumSrcArgs>.ret,
getHas64BitOps<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
}
// Function that checks if instruction supports DPP and SDWA
class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32> {
- bit ret = !or(getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret,
+ bit ret = !or(getHasDPP<NumSrcArgs>.ret,
getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret);
}
@@ -2146,7 +2169,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field bit HasSrc2Mods = !if(HasModifiers, !or(HasSrc2FloatMods, HasSrc2IntMods), 0);
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
- field bit HasExtDPP = getHasDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
+ field bit HasExtDPP = getHasDPP<NumSrcArgs>.ret;
field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA9 = HasExtSDWA;
@@ -2197,9 +2220,9 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableF32SrcMods = 0,
field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasIntClamp, HasModifiers, HasOMod, DstVT>.ret;
- field string AsmVOP3P = getAsmVOP3P<HasDst, NumSrcArgs, HasModifiers, HasClamp, DstVT>.ret;
+ field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp>.ret;
field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
- HasClamp, HasOMod,
+ HasClamp,
HasSrc0FloatMods,
HasSrc1FloatMods,
HasSrc2FloatMods>.ret;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index fbf4634bfc94..d5f9cb8ba493 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1017,22 +1017,33 @@ def : GCNPat <
/********** Extraction, Insertion, Building and Casting **********/
/********** ============================================ **********/
-foreach Index = 0-2 in {
- def Extract_Element_v2i32_#Index : Extract_Element <
- i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
+// Special case for 2 element vectors. REQ_SEQUENCE produces better code
+// than an INSERT_SUBREG.
+multiclass Insert_Element_V2<RegisterClass RC, ValueType elem_type, ValueType vec_type> {
+ def : GCNPat <
+ (insertelt vec_type:$vec, elem_type:$elem, 0),
+ (REG_SEQUENCE RC, $elem, sub0, (elem_type (EXTRACT_SUBREG $vec, sub1)), sub1)
>;
- def Insert_Element_v2i32_#Index : Insert_Element <
+
+ def : GCNPat <
+ (insertelt vec_type:$vec, elem_type:$elem, 1),
+ (REG_SEQUENCE RC, (elem_type (EXTRACT_SUBREG $vec, sub0)), sub0, $elem, sub1)
+ >;
+}
+
+foreach Index = 0-1 in {
+ def Extract_Element_v2i32_#Index : Extract_Element <
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
>;
def Extract_Element_v2f32_#Index : Extract_Element <
f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
>;
- def Insert_Element_v2f32_#Index : Insert_Element <
- f32, v2f32, Index, !cast<SubRegIndex>(sub#Index)
- >;
}
+defm : Insert_Element_V2 <SReg_64, i32, v2i32>;
+defm : Insert_Element_V2 <SReg_64, f32, v2f32>;
+
foreach Index = 0-2 in {
def Extract_Element_v3i32_#Index : Extract_Element <
i32, v3i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -1860,40 +1871,92 @@ def : GCNPat <
// Conversion Patterns
//===----------------------------------------------------------------------===//
-def : GCNPat<(i32 (sext_inreg i32:$src, i1)),
+class UniformSextInreg<ValueType VT> : PatFrag<
+ (ops node:$src),
+ (sext_inreg $src, VT),
+ [{ return !N->isDivergent(); }]>;
+
+def : GCNPat<(i32 (UniformSextInreg<i1> i32:$src)),
(S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16
// Handle sext_inreg in i64
def : GCNPat <
- (i64 (sext_inreg i64:$src, i1)),
+ (i64 (UniformSextInreg<i1> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16
>;
def : GCNPat <
- (i16 (sext_inreg i16:$src, i1)),
+ (i16 (UniformSextInreg<i1> i16:$src)),
(S_BFE_I32 $src, (i32 0x00010000)) // 0 | 1 << 16
>;
def : GCNPat <
- (i16 (sext_inreg i16:$src, i8)),
+ (i16 (UniformSextInreg<i8> i16:$src)),
(S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16
>;
def : GCNPat <
- (i64 (sext_inreg i64:$src, i8)),
+ (i64 (UniformSextInreg<i8> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16
>;
def : GCNPat <
- (i64 (sext_inreg i64:$src, i16)),
+ (i64 (UniformSextInreg<i16> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16
>;
def : GCNPat <
- (i64 (sext_inreg i64:$src, i32)),
+ (i64 (UniformSextInreg<i32> i64:$src)),
(S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16
>;
+
+class DivergentSextInreg<ValueType VT> : PatFrag<
+ (ops node:$src),
+ (sext_inreg $src, VT),
+ [{ return N->isDivergent(); }]>;
+
+def : GCNPat<(i32 (DivergentSextInreg<i1> i32:$src)),
+ (V_BFE_I32_e64 i32:$src, (i32 0), (i32 1))>;
+
+def : GCNPat <
+ (i16 (DivergentSextInreg<i1> i16:$src)),
+ (V_BFE_I32_e64 $src, (i32 0), (i32 1)) // 0 | 1 << 16
+>;
+
+def : GCNPat <
+ (i16 (DivergentSextInreg<i8> i16:$src)),
+ (V_BFE_I32_e64 $src, (i32 0), (i32 8)) // 0 | 8 << 16
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i1> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 1)), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 1))), sub1)
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i8> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8)/* 0 | 8 << 16 */), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 8))), sub1)
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i16> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16)/* 0 | 16 << 16 */), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (V_BFE_I32_e64 (i32 (EXTRACT_SUBREG i64:$src, sub0)), (i32 0), (i32 16))), sub1)
+>;
+
+def : GCNPat <
+ (i64 (DivergentSextInreg<i32> i64:$src)),
+ (REG_SEQUENCE VReg_64,
+ (i32 (EXTRACT_SUBREG i64:$src, sub0)), sub0,
+ (V_ASHRREV_I32_e32 (i32 31), (i32 (EXTRACT_SUBREG i64:$src, sub0))), sub1)
+>;
+
def : GCNPat <
(i64 (zext i32:$src)),
(REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1)
@@ -2097,6 +2160,22 @@ def : GCNPat <
>;
def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> i32:$a)),
+ (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1))
+>;
+
+def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> i16:$a)),
+ (S_CMP_EQ_U32 (S_AND_B32 (i32 1), $a), (i32 1))
+>;
+
+def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> i64:$a)),
+ (S_CMP_EQ_U32 (S_AND_B32 (i32 1),
+ (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
+>;
+
+def : GCNPat <
(i1 (trunc i32:$a)),
(V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
>;
@@ -2278,31 +2357,37 @@ let SubtargetPredicate = NotHasMinMaxDenormModes in {
let OtherPredicates = [HasDLInsts] in {
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
def : GCNPat <
- (fma (f32 (VOP3Mods f32:$src0, i32:$src0_modifiers)),
- (f32 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
+ (fma (f32 (VOP3NoMods f32:$src0)),
+ (f32 (VOP3NoMods f32:$src1)),
(f32 (VOP3NoMods f32:$src2))),
- (V_FMAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+ (V_FMAC_F32_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2)
>;
} // End OtherPredicates = [HasDLInsts]
let SubtargetPredicate = isGFX10Plus in
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
def : GCNPat <
- (fma (f16 (VOP3Mods f32:$src0, i32:$src0_modifiers)),
- (f16 (VOP3Mods f32:$src1, i32:$src1_modifiers)),
+ (fma (f16 (VOP3NoMods f32:$src0)),
+ (f16 (VOP3NoMods f32:$src1)),
(f16 (VOP3NoMods f32:$src2))),
- (V_FMAC_F16_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
+ (V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
SRCMODS.NONE, $src2)
>;
let SubtargetPredicate = isGFX90APlus in
+// Don't allow source modifiers. If there are any source modifiers then it's
+// better to select fma instead of fmac.
def : GCNPat <
- (fma (f64 (VOP3Mods0 f64:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
- (f64 (VOP3Mods f64:$src1, i32:$src1_modifiers)),
+ (fma (f64 (VOP3NoMods f64:$src0)),
+ (f64 (VOP3NoMods f64:$src1)),
(f64 (VOP3NoMods f64:$src2))),
- (V_FMAC_F64_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
- SRCMODS.NONE, $src2, $clamp, $omod)
+ (V_FMAC_F64_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1,
+ SRCMODS.NONE, $src2)
>;
// COPY is workaround tablegen bug from multiple outputs
@@ -2656,12 +2741,20 @@ class AMDGPUGenericInstruction : GenericInstruction {
let Namespace = "AMDGPU";
}
+// Returns -1 if the input is zero.
def G_AMDGPU_FFBH_U32 : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
let hasSideEffects = 0;
}
+// Returns -1 if the input is zero.
+def G_AMDGPU_FFBL_B32 : AMDGPUGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type1:$src);
+ let hasSideEffects = 0;
+}
+
def G_AMDGPU_RCP_IFLAG : AMDGPUGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
@@ -2854,3 +2947,16 @@ def G_AMDGPU_INTRIN_BVH_INTERSECT_RAY : AMDGPUGenericInstruction {
let mayLoad = 1;
let mayStore = 0;
}
+
+// Generic instruction for SI_CALL, so we can select the register bank and insert a waterfall loop
+// if necessary.
+def G_SI_CALL : AMDGPUGenericInstruction {
+ let OutOperandList = (outs SReg_64:$dst);
+ let InOperandList = (ins type0:$src0, unknown:$callee);
+ let Size = 4;
+ let isCall = 1;
+ let UseNamedOperandTable = 1;
+ let SchedRW = [WriteBranch];
+ // TODO: Should really base this on the call target
+ let isConvergent = 1;
+}
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index d560b477b8ba..4fa8ec711134 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -140,11 +140,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
- Next = std::next(I);
- MachineInstr &MI = *I;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
switch (MI.getOpcode()) {
case AMDGPU::S_BRANCH:
// Optimize out branches to the next block.
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 493c1ad87f93..34cbb49dcd16 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -146,7 +146,7 @@ class SILoadStoreOptimizer : public MachineFunctionPass {
if (!AddrOp->isReg())
return false;
- // TODO: We should be able to merge physical reg addreses.
+ // TODO: We should be able to merge physical reg addresses.
if (AddrOp->getReg().isPhysical())
return false;
@@ -303,6 +303,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
return 2;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
return 4;
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
+ return 8;
case AMDGPU::DS_READ_B32: LLVM_FALLTHROUGH;
case AMDGPU::DS_READ_B32_gfx9: LLVM_FALLTHROUGH;
case AMDGPU::DS_WRITE_B32: LLVM_FALLTHROUGH;
@@ -343,6 +345,9 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr) == -1 &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0) == -1)
return UNKNOWN;
+ // Ignore BVH instructions
+ if (AMDGPU::getMIMGBaseOpcode(Opc)->BVH)
+ return UNKNOWN;
// TODO: Support IMAGE_GET_RESINFO and IMAGE_GET_LOD.
if (TII.get(Opc).mayStore() || !TII.get(Opc).mayLoad() ||
TII.isGather4(Opc))
@@ -369,6 +374,7 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return S_BUFFER_LOAD_IMM;
case AMDGPU::DS_READ_B32:
case AMDGPU::DS_READ_B32_gfx9:
@@ -380,15 +386,6 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::DS_WRITE_B64:
case AMDGPU::DS_WRITE_B64_gfx9:
return DS_WRITE;
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_sa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_sa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_sa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_sa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_nsa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_nsa:
- case AMDGPU::IMAGE_BVH_INTERSECT_RAY_a16_nsa:
- case AMDGPU::IMAGE_BVH64_INTERSECT_RAY_a16_nsa:
- return UNKNOWN;
}
}
@@ -419,6 +416,7 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
}
}
@@ -469,6 +467,7 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
Result.SBase = true;
return Result;
case AMDGPU::DS_READ_B32:
@@ -653,7 +652,7 @@ static bool canMoveInstsAcrossMemOp(MachineInstr &MemOp,
}
// This function assumes that \p A and \p B have are identical except for
-// size and offset, and they referecne adjacent memory.
+// size and offset, and they reference adjacent memory.
static MachineMemOperand *combineKnownAdjacentMMOs(MachineFunction &MF,
const MachineMemOperand *A,
const MachineMemOperand *B) {
@@ -863,6 +862,7 @@ bool SILoadStoreOptimizer::widthsFit(const GCNSubtarget &STM,
return false;
case 2:
case 4:
+ case 8:
return true;
}
}
@@ -1529,45 +1529,62 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
case 4:
return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
+ case 8:
+ return AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM;
}
case MIMG:
- assert("No overlaps" && (countPopulation(CI.DMask | Paired.DMask) == Width));
+ assert((countPopulation(CI.DMask | Paired.DMask) == Width) &&
+ "No overlaps");
return AMDGPU::getMaskedMIMGOp(CI.I->getOpcode(), Width);
}
}
std::pair<unsigned, unsigned>
-SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI, const CombineInfo &Paired) {
+SILoadStoreOptimizer::getSubRegIdxs(const CombineInfo &CI,
+ const CombineInfo &Paired) {
- if (CI.Width == 0 || Paired.Width == 0 || CI.Width + Paired.Width > 4)
- return std::make_pair(0, 0);
+ assert(CI.Width != 0 && Paired.Width != 0 && "Width cannot be zero");
bool ReverseOrder;
if (CI.InstClass == MIMG) {
- assert((countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
- "No overlaps");
+ assert(
+ (countPopulation(CI.DMask | Paired.DMask) == CI.Width + Paired.Width) &&
+ "No overlaps");
ReverseOrder = CI.DMask > Paired.DMask;
} else
ReverseOrder = CI.Offset > Paired.Offset;
- static const unsigned Idxs[4][4] = {
- {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
- {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0},
- {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0},
- {AMDGPU::sub3, 0, 0, 0},
- };
unsigned Idx0;
unsigned Idx1;
- assert(CI.Width >= 1 && CI.Width <= 3);
- assert(Paired.Width >= 1 && Paired.Width <= 3);
+ if (CI.Width + Paired.Width > 4) {
+ assert(CI.Width == 4 && Paired.Width == 4);
- if (ReverseOrder) {
- Idx1 = Idxs[0][Paired.Width - 1];
- Idx0 = Idxs[Paired.Width][CI.Width - 1];
+ if (ReverseOrder) {
+ Idx1 = AMDGPU::sub0_sub1_sub2_sub3;
+ Idx0 = AMDGPU::sub4_sub5_sub6_sub7;
+ } else {
+ Idx0 = AMDGPU::sub0_sub1_sub2_sub3;
+ Idx1 = AMDGPU::sub4_sub5_sub6_sub7;
+ }
} else {
- Idx0 = Idxs[0][CI.Width - 1];
- Idx1 = Idxs[CI.Width][Paired.Width - 1];
+ static const unsigned Idxs[4][4] = {
+ {AMDGPU::sub0, AMDGPU::sub0_sub1, AMDGPU::sub0_sub1_sub2, AMDGPU::sub0_sub1_sub2_sub3},
+ {AMDGPU::sub1, AMDGPU::sub1_sub2, AMDGPU::sub1_sub2_sub3, 0},
+ {AMDGPU::sub2, AMDGPU::sub2_sub3, 0, 0},
+ {AMDGPU::sub3, 0, 0, 0},
+ };
+
+ assert(CI.Width >= 1 && CI.Width <= 3);
+ assert(Paired.Width >= 1 && Paired.Width <= 3);
+
+ if (ReverseOrder) {
+ Idx1 = Idxs[0][Paired.Width - 1];
+ Idx0 = Idxs[Paired.Width][CI.Width - 1];
+ } else {
+ Idx0 = Idxs[0][CI.Width - 1];
+ Idx1 = Idxs[CI.Width][Paired.Width - 1];
+ }
}
return std::make_pair(Idx0, Idx1);
@@ -2048,7 +2065,7 @@ SILoadStoreOptimizer::collectMergeableInsts(
// adjacent to each other in the list, which will make it easier to find
// matches.
MergeList.sort(
- [] (const CombineInfo &A, CombineInfo &B) {
+ [] (const CombineInfo &A, const CombineInfo &B) {
return A.Offset < B.Offset;
});
++I;
@@ -2140,7 +2157,7 @@ SILoadStoreOptimizer::optimizeInstsWithSameBaseAddr(
MachineBasicBlock::iterator NewMI =
mergeSBufferLoadImmPair(CI, Paired, InstsToMove);
CI.setMI(NewMI, *TII, *STM);
- OptimizeListAgain |= (CI.Width + Paired.Width) < 16;
+ OptimizeListAgain |= (CI.Width + Paired.Width) < 8;
break;
}
case BUFFER_LOAD: {
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 0f2836e1e7fb..3168bcd53eda 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -13,7 +13,7 @@
/// All control flow is handled using predicated instructions and
/// a predicate stack. Each Scalar ALU controls the operations of 64 Vector
/// ALUs. The Scalar ALU can update the predicate for any of the Vector ALUs
-/// by writting to the 64-bit EXEC register (each bit corresponds to a
+/// by writing to the 64-bit EXEC register (each bit corresponds to a
/// single vector ALU). Typically, for predicates, a vector ALU will write
/// to its bit of the VCC register (like EXEC VCC is 64-bits, one for each
/// Vector ALU) and then the ScalarALU will AND the VCC register with the
@@ -38,7 +38,8 @@
/// %vgpr0 = V_ADD_F32 %vgpr0, %vgpr0 // Do the IF block of the branch
///
/// label0:
-/// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then block
+/// %sgpr0 = S_OR_SAVEEXEC_B64 %sgpr0 // Restore the exec mask for the Then
+/// // block
/// %exec = S_XOR_B64 %sgpr0, %exec // Update the exec mask
/// S_BRANCH_EXECZ label1 // Use our branch optimization
/// // instruction again.
@@ -52,6 +53,8 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
@@ -69,6 +72,8 @@ private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
LiveIntervals *LIS = nullptr;
+ LiveVariables *LV = nullptr;
+ MachineDominatorTree *MDT = nullptr;
MachineRegisterInfo *MRI = nullptr;
SetVector<MachineInstr*> LoweredEndCf;
DenseSet<Register> LoweredIf;
@@ -141,6 +146,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
// Should preserve the same set that TwoAddressInstructions does.
+ AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<SlotIndexes>();
AU.addPreserved<LiveIntervals>();
AU.addPreservedID(LiveVariablesID);
@@ -234,6 +240,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
BuildMI(MBB, I, DL, TII->get(AndOpc), Tmp)
.addReg(CopyReg)
.add(Cond);
+ if (LV)
+ LV->replaceKillInstruction(Cond.getReg(), MI, *And);
setImpSCCDefDead(*And, true);
@@ -251,6 +259,8 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {
MachineInstr *SetExec =
BuildMI(MBB, I, DL, TII->get(MovTermOpc), Exec)
.addReg(Tmp, RegState::Kill);
+ if (LV)
+ LV->getVarInfo(Tmp).Kills.push_back(SetExec);
// Skip ahead to the unconditional branch in case there are other terminators
// present.
@@ -304,6 +314,8 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
MachineInstr *OrSaveExec =
BuildMI(MBB, Start, DL, TII->get(OrSaveExecOpc), SaveReg)
.add(MI.getOperand(1)); // Saved EXEC
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *OrSaveExec);
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
@@ -377,15 +389,22 @@ void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
And = BuildMI(MBB, &MI, DL, TII->get(AndOpc), AndReg)
.addReg(Exec)
.add(MI.getOperand(1));
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *And);
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.addReg(AndReg)
.add(MI.getOperand(2));
if (LIS)
LIS->createAndComputeVirtRegInterval(AndReg);
- } else
+ } else {
Or = BuildMI(MBB, &MI, DL, TII->get(OrOpc), Dst)
.add(MI.getOperand(1))
.add(MI.getOperand(2));
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(1).getReg(), MI, *Or);
+ }
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(2).getReg(), MI, *Or);
if (LIS) {
if (And)
@@ -471,6 +490,14 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock *SplitBB = &MBB;
if (NeedBlockSplit) {
SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS);
+ if (MDT && SplitBB != &MBB) {
+ MachineDomTreeNode *MBBNode = (*MDT)[&MBB];
+ SmallVector<MachineDomTreeNode *> Children(MBBNode->begin(),
+ MBBNode->end());
+ MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB);
+ for (MachineDomTreeNode *Child : Children)
+ MDT->changeImmediateDominator(Child, SplitBBNode);
+ }
Opcode = OrTermrOpc;
InsPt = MI;
}
@@ -479,6 +506,8 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec)
.addReg(Exec)
.add(MI.getOperand(0));
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *NewMI);
LoweredEndCf.insert(NewMI);
@@ -570,7 +599,12 @@ void SILowerControlFlow::optimizeEndCf() {
LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump());
if (LIS)
LIS->RemoveMachineInstrFromMaps(*MI);
+ Register Reg;
+ if (LV)
+ Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::src1)->getReg();
MI->eraseFromParent();
+ if (LV)
+ LV->recomputeForSingleDefVirtReg(Reg);
removeMBBifRedundant(MBB);
}
}
@@ -686,6 +720,8 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
auto BfeMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_BFE_U32), CountReg)
.addReg(InputReg)
.addImm((MI.getOperand(1).getImm() & Mask) | 0x70000);
+ if (LV)
+ LV->recomputeForSingleDefVirtReg(InputReg);
auto BfmMI =
BuildMI(*MBB, FirstMI, DL,
TII->get(IsWave32 ? AMDGPU::S_BFM_B32 : AMDGPU::S_BFM_B64), Exec)
@@ -694,6 +730,8 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
auto CmpMI = BuildMI(*MBB, FirstMI, DL, TII->get(AMDGPU::S_CMP_EQ_U32))
.addReg(CountReg, RegState::Kill)
.addImm(WavefrontSize);
+ if (LV)
+ LV->getVarInfo(CountReg).Kills.push_back(CmpMI);
auto CmovMI =
BuildMI(*MBB, FirstMI, DL,
TII->get(IsWave32 ? AMDGPU::S_CMOV_B32 : AMDGPU::S_CMOV_B64),
@@ -719,23 +757,6 @@ void SILowerControlFlow::lowerInitExec(MachineBasicBlock *MBB,
}
bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
- auto GetFallThroughSucc = [=](MachineBasicBlock *B) -> MachineBasicBlock * {
- auto *S = B->getNextNode();
- if (!S)
- return nullptr;
- if (B->isSuccessor(S)) {
- // The only fallthrough candidate
- MachineBasicBlock::iterator I(B->getFirstInstrTerminator());
- MachineBasicBlock::iterator E = B->end();
- for (; I != E; I++) {
- if (I->isBranch() && TII->getBranchDestBlock(*I) == S)
- // We have unoptimized branch to layout successor
- return nullptr;
- }
- }
- return S;
- };
-
for (auto &I : MBB.instrs()) {
if (!I.isDebugInstr() && !I.isUnconditionalBranch())
return false;
@@ -748,7 +769,7 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
while (!MBB.predecessors().empty()) {
MachineBasicBlock *P = *MBB.pred_begin();
- if (GetFallThroughSucc(P) == &MBB)
+ if (P->getFallThrough() == &MBB)
FallThrough = P;
P->ReplaceUsesOfBlockWith(&MBB, Succ);
}
@@ -757,10 +778,19 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
for (auto &I : MBB.instrs())
LIS->RemoveMachineInstrFromMaps(I);
}
+ if (MDT) {
+ // If Succ, the single successor of MBB, is dominated by MBB, MDT needs
+ // updating by changing Succ's idom to the one of MBB; otherwise, MBB must
+ // be a leaf node in MDT and could be erased directly.
+ if (MDT->dominates(&MBB, Succ))
+ MDT->changeImmediateDominator(MDT->getNode(Succ),
+ MDT->getNode(&MBB)->getIDom());
+ MDT->eraseNode(&MBB);
+ }
MBB.clear();
MBB.eraseFromParent();
if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
- if (!GetFallThroughSucc(Succ)) {
+ if (!Succ->canFallThrough()) {
MachineFunction *MF = FallThrough->getParent();
MachineFunction::iterator FallThroughPos(FallThrough);
MF->splice(std::next(FallThroughPos), Succ);
@@ -780,6 +810,9 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
// This doesn't actually need LiveIntervals, but we can preserve them.
LIS = getAnalysisIfAvailable<LiveIntervals>();
+ // This doesn't actually need LiveVariables, but we can preserve them.
+ LV = getAnalysisIfAvailable<LiveVariables>();
+ MDT = getAnalysisIfAvailable<MachineDominatorTree>();
MRI = &MF.getRegInfo();
BoolRC = TRI->getBoolRC();
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 38b9d85b653b..55196fe334e6 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -35,7 +35,6 @@ class SILowerSGPRSpills : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
const SIInstrInfo *TII = nullptr;
- VirtRegMap *VRM = nullptr;
LiveIntervals *LIS = nullptr;
// Save and Restore blocks of the current function. Typically there is a
@@ -289,7 +288,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
- VRM = getAnalysisIfAvailable<VirtRegMap>();
LIS = getAnalysisIfAvailable<LiveIntervals>();
assert(SaveBlocks.empty() && RestoreBlocks.empty());
@@ -334,11 +332,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
for (MachineBasicBlock &MBB : MF) {
- MachineBasicBlock::iterator Next;
- for (auto I = MBB.begin(), E = MBB.end(); I != E; I = Next) {
- MachineInstr &MI = *I;
- Next = std::next(I);
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (!TII->isSGPRSpill(MI))
continue;
@@ -369,11 +363,17 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
if (MI.isDebugValue() && MI.getOperand(0).isFI() &&
SpillFIs[MI.getOperand(0).getIndex()]) {
MI.getOperand(0).ChangeToRegister(Register(), false /*isDef*/);
- MI.getOperand(0).setIsDebug();
}
}
}
+ // All those frame indices which are dead by now should be removed from the
+ // function frame. Otherwise, there is a side effect such as re-mapping of
+ // free frame index ids by the later pass(es) like "stack slot coloring"
+ // which in turn could mess-up with the book keeping of "frame index to VGPR
+ // lane".
+ FuncInfo->removeDeadFrameIndices(MFI);
+
MadeChange = true;
} else if (FuncInfo->VGPRReservedForSGPRSpill) {
FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 85cfe36df16a..c4007f56f350 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -67,9 +67,11 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const bool UseFixedABI = AMDGPUTargetMachine::EnableFixedFunctionABI &&
CC != CallingConv::AMDGPU_Gfx &&
(!isEntryFunction() || HasCalls);
+ const bool IsKernel = CC == CallingConv::AMDGPU_KERNEL ||
+ CC == CallingConv::SPIR_KERNEL;
- if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL) {
- if (!F.arg_empty())
+ if (IsKernel) {
+ if (!F.arg_empty() || ST.getImplicitArgNumBytes(F) != 0)
KernargSegmentPtr = true;
WorkGroupIDX = true;
WorkItemIDX = true;
@@ -94,45 +96,76 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ArgDescriptor::createRegister(ScratchRSrcReg);
}
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr"))
+ if (!F.hasFnAttribute("amdgpu-no-implicitarg-ptr"))
ImplicitArgPtr = true;
} else {
- if (F.hasFnAttribute("amdgpu-implicitarg-ptr")) {
- KernargSegmentPtr = true;
- MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
- MaxKernArgAlign);
- }
+ ImplicitArgPtr = false;
+ MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
+ MaxKernArgAlign);
}
+ bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
+ if (isAmdHsaOrMesa && !ST.enableFlatScratch())
+ PrivateSegmentBuffer = true;
+ else if (ST.isMesaGfxShader(F))
+ ImplicitBufferPtr = true;
+
if (UseFixedABI) {
+ DispatchPtr = true;
+ QueuePtr = true;
+ ImplicitArgPtr = true;
WorkGroupIDX = true;
WorkGroupIDY = true;
WorkGroupIDZ = true;
WorkItemIDX = true;
WorkItemIDY = true;
WorkItemIDZ = true;
- ImplicitArgPtr = true;
- } else {
- if (F.hasFnAttribute("amdgpu-work-group-id-x"))
+
+ // FIXME: We don't need this?
+ DispatchID = true;
+ } else if (!AMDGPU::isGraphics(CC)) {
+ if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-y"))
+ if (!F.hasFnAttribute("amdgpu-no-workgroup-id-y"))
WorkGroupIDY = true;
- if (F.hasFnAttribute("amdgpu-work-group-id-z"))
+ if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
WorkGroupIDZ = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-x"))
+ if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
WorkItemIDX = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-y"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-y"))
WorkItemIDY = true;
- if (F.hasFnAttribute("amdgpu-work-item-id-z"))
+ if (!F.hasFnAttribute("amdgpu-no-workitem-id-z"))
WorkItemIDZ = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-ptr"))
+ DispatchPtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-queue-ptr"))
+ QueuePtr = true;
+
+ if (!F.hasFnAttribute("amdgpu-no-dispatch-id"))
+ DispatchID = true;
}
+ // FIXME: This attribute is a hack, we just need an analysis on the function
+ // to look for allocas.
bool HasStackObjects = F.hasFnAttribute("amdgpu-stack-objects");
+
+ // TODO: This could be refined a lot. The attribute is a poor way of
+ // detecting calls or stack objects that may require it before argument
+ // lowering.
+ if (ST.hasFlatAddressSpace() && isEntryFunction() &&
+ (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
+ (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
+ !ST.flatScratchIsArchitected()) {
+ FlatScratchInit = true;
+ }
+
if (isEntryFunction()) {
// X, XY, and XYZ are the only supported combinations, so make sure Y is
// enabled if Z is.
@@ -150,44 +183,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
}
}
- bool isAmdHsaOrMesa = ST.isAmdHsaOrMesa(F);
- if (isAmdHsaOrMesa) {
- if (!ST.enableFlatScratch())
- PrivateSegmentBuffer = true;
-
- if (UseFixedABI) {
- DispatchPtr = true;
- QueuePtr = true;
-
- // FIXME: We don't need this?
- DispatchID = true;
- } else {
- if (F.hasFnAttribute("amdgpu-dispatch-ptr"))
- DispatchPtr = true;
-
- if (F.hasFnAttribute("amdgpu-queue-ptr"))
- QueuePtr = true;
-
- if (F.hasFnAttribute("amdgpu-dispatch-id"))
- DispatchID = true;
- }
- } else if (ST.isMesaGfxShader(F)) {
- ImplicitBufferPtr = true;
- }
-
- if (UseFixedABI || F.hasFnAttribute("amdgpu-kernarg-segment-ptr"))
- KernargSegmentPtr = true;
-
- // TODO: This could be refined a lot. The attribute is a poor way of
- // detecting calls or stack objects that may require it before argument
- // lowering.
- if (ST.hasFlatAddressSpace() && isEntryFunction() &&
- (isAmdHsaOrMesa || ST.enableFlatScratch()) &&
- (HasCalls || HasStackObjects || ST.enableFlatScratch()) &&
- !ST.flatScratchIsArchitected()) {
- FlatScratchInit = true;
- }
-
Attribute A = F.getFnAttribute("amdgpu-git-ptr-high");
StringRef S = A.getValueAsString();
if (!S.empty())
@@ -426,7 +421,7 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
OtherUsedRegs.set(Reg);
SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
- for (unsigned I = 0; I < NumLanes; ++I) {
+ for (int I = NumLanes - 1; I >= 0; --I) {
NextSpillReg = std::find_if(
NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
@@ -447,10 +442,16 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
}
void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
- // The FP & BP spills haven't been inserted yet, so keep them around.
- for (auto &R : SGPRToVGPRSpills) {
- if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex)
+ // Remove dead frame indices from function frame, however keep FP & BP since
+ // spills for them haven't been inserted yet. And also make sure to remove the
+ // frame indices from `SGPRToVGPRSpills` data structure, otherwise, it could
+ // result in an unexpected side effect and bug, in case of any re-mapping of
+ // freed frame indices by later pass(es) like "stack slot coloring".
+ for (auto &R : make_early_inc_range(SGPRToVGPRSpills)) {
+ if (R.first != FramePointerSaveIndex && R.first != BasePointerSaveIndex) {
MFI.RemoveStackObject(R.first);
+ SGPRToVGPRSpills.erase(R.first);
+ }
}
// All other SPGRs must be allocated on the default stack, so reset the stack
@@ -650,3 +651,38 @@ bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
}
return false;
}
+
+bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
+ if (UsesAGPRs)
+ return *UsesAGPRs;
+
+ if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
+ MF.getFrameInfo().hasCalls()) {
+ UsesAGPRs = true;
+ return true;
+ }
+
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+ const Register Reg = Register::index2VirtReg(I);
+ const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
+ if (RC && SIRegisterInfo::isAGPRClass(RC)) {
+ UsesAGPRs = true;
+ return true;
+ } else if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
+ // Defer caching UsesAGPRs, function might not yet been regbank selected.
+ return true;
+ }
+ }
+
+ for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
+ if (MRI.isPhysRegUsed(Reg)) {
+ UsesAGPRs = true;
+ return true;
+ }
+ }
+
+ UsesAGPRs = false;
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index fb6d4f8841ab..c305bc20e40d 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -26,9 +26,9 @@ namespace llvm {
class MachineFrameInfo;
class MachineFunction;
-class TargetRegisterClass;
class SIMachineFunctionInfo;
class SIRegisterInfo;
+class TargetRegisterClass;
class AMDGPUPseudoSourceValue : public PseudoSourceValue {
public:
@@ -433,6 +433,8 @@ private:
// Current recorded maximum possible occupancy.
unsigned Occupancy;
+ mutable Optional<bool> UsesAGPRs;
+
MCPhysReg getNextUserSGPR() const;
MCPhysReg getNextSystemSGPR() const;
@@ -946,6 +948,9 @@ public:
Occupancy = Limit;
limitOccupancy(MF);
}
+
+ // \returns true if a function needs or may need AGPRs.
+ bool usesAGPRs(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 278dd05b049c..5590d84cc3ab 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -403,7 +403,7 @@ void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
}
// TODO: compute InternalAdditionnalPressure.
- InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
+ InternalAdditionalPressure.resize(TopPressure.MaxSetPressure.size());
// Check everything is right.
#ifndef NDEBUG
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
index a2f5a1453d6a..ac34a748edbc 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -25,6 +25,8 @@ namespace llvm {
class SIInstrInfo;
class SIRegisterInfo;
+class SIScheduleDAGMI;
+class SIScheduleBlockCreator;
enum SIScheduleCandReason {
NoCand,
@@ -48,9 +50,6 @@ struct SISchedulerCandidate {
void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
};
-class SIScheduleDAGMI;
-class SIScheduleBlockCreator;
-
enum SIScheduleBlockLinkKind {
NoData,
Data
@@ -73,7 +72,7 @@ class SIScheduleBlock {
// store the live virtual and real registers.
// We do care only of SGPR32 and VGPR32 and do track only virtual registers.
// Pressure of additional registers required inside the block.
- std::vector<unsigned> InternalAdditionnalPressure;
+ std::vector<unsigned> InternalAdditionalPressure;
// Pressure of input and output registers
std::vector<unsigned> LiveInPressure;
std::vector<unsigned> LiveOutPressure;
@@ -154,8 +153,8 @@ public:
// Needs the block to be scheduled inside
// TODO: find a way to compute it.
- std::vector<unsigned> &getInternalAdditionnalRegUsage() {
- return InternalAdditionnalPressure;
+ std::vector<unsigned> &getInternalAdditionalRegUsage() {
+ return InternalAdditionalPressure;
}
std::set<unsigned> &getInRegs() { return LiveInRegs; }
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 71be73c2f0e4..29f072ca1e6c 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -126,8 +126,7 @@ private:
(OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
SIAtomicAddrSpace::NONE &&
(InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
- SIAtomicAddrSpace::NONE &&
- !isStrongerThan(FailureOrdering, Ordering));
+ SIAtomicAddrSpace::NONE);
// There is also no cross address space ordering if the ordering
// address space is the same as the instruction address space and
@@ -369,7 +368,7 @@ protected:
public:
- SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {};
+ SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -410,7 +409,7 @@ public:
class SIGfx7CacheControl : public SIGfx6CacheControl {
public:
- SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {};
+ SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}
bool insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -422,7 +421,7 @@ public:
class SIGfx90ACacheControl : public SIGfx7CacheControl {
public:
- SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {};
+ SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -471,7 +470,7 @@ protected:
public:
- SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {};
+ SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -651,14 +650,11 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
}
SSID = IsSyncScopeInclusion.getValue() ? SSID : MMO->getSyncScopeID();
- Ordering = isStrongerThan(Ordering, OpOrdering)
- ? Ordering
- : MMO->getSuccessOrdering();
+ Ordering = getMergedAtomicOrdering(Ordering, OpOrdering);
assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
FailureOrdering =
- isStrongerThan(FailureOrdering, MMO->getFailureOrdering()) ?
- FailureOrdering : MMO->getFailureOrdering();
+ getMergedAtomicOrdering(FailureOrdering, MMO->getFailureOrdering());
}
}
@@ -859,7 +855,7 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
// instructions. The latter are always marked as volatile so cannot sensibly
// handle it as do not want to pessimize all atomics. Also they do not support
// the nontemporal attribute.
- assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
@@ -1035,8 +1031,8 @@ bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
Position Pos) const {
- return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
- IsCrossAddrSpaceOrdering, Pos);
+ return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
+ IsCrossAddrSpaceOrdering, Pos);
}
bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
@@ -1108,7 +1104,8 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
// different CUs. Therefore need to bypass the L1 which is per CU.
// Otherwise in non-threadgroup split mode all waves of a work-group are
// on the same CU, and so the L1 does not need to be bypassed.
- if (ST.isTgSplitEnabled()) Changed |= enableGLCBit(MI);
+ if (ST.isTgSplitEnabled())
+ Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -1204,14 +1201,13 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
// instructions. The latter are always marked as volatile so cannot sensibly
// handle it as do not want to pessimize all atomics. Also they do not support
// the nontemporal attribute.
- assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
- if (Op == SIMemOp::LOAD) {
+ if (Op == SIMemOp::LOAD)
Changed |= enableGLCBit(MI);
- }
// Ensure operation has completed at system scope to cause all volatile
// operations to be visible outside the program in a global order. Do not
@@ -1398,7 +1394,8 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
// the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
// CU mode all waves of a work-group are on the same CU, and so the L0
// does not need to be bypassed.
- if (!ST.isCuModeEnabled()) Changed |= enableGLCBit(MI);
+ if (!ST.isCuModeEnabled())
+ Changed |= enableGLCBit(MI);
break;
case SIAtomicScope::WAVEFRONT:
case SIAtomicScope::SINGLETHREAD:
@@ -1432,12 +1429,11 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
// instructions. The latter are always marked as volatile so cannot sensibly
// handle it as do not want to pessimize all atomics. Also they do not support
// the nontemporal attribute.
- assert( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
+ assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
bool Changed = false;
if (IsVolatile) {
-
if (Op == SIMemOp::LOAD) {
Changed |= enableGLCBit(MI);
Changed |= enableDLCBit(MI);
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 3d659eca47db..69eab762f05c 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -225,7 +225,7 @@ void SIModeRegister::processBlockPhase1(MachineBasicBlock &MBB,
// RequirePending is used to indicate whether we are collecting the initial
// requirements for the block, and need to defer the first InsertionPoint to
// Phase 3. It is set to false once we have set FirstInsertionPoint, or when
- // we discover an explict setreg that means this block doesn't have any
+ // we discover an explicit setreg that means this block doesn't have any
// initial requirements.
bool RequirePending = true;
Status IPChange;
@@ -373,12 +373,8 @@ void SIModeRegister::processBlockPhase2(MachineBasicBlock &MBB,
BlockInfo[ThisBlock]->Exit = TmpStatus;
// Add the successors to the work list so we can propagate the changed exit
// status.
- for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
- E = MBB.succ_end();
- S != E; S = std::next(S)) {
- MachineBasicBlock &B = *(*S);
- Phase2List.push(&B);
- }
+ for (MachineBasicBlock *Succ : MBB.successors())
+ Phase2List.push(Succ);
}
BlockInfo[ThisBlock]->ExitSet = ExitSet;
if (RevisitRequired)
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
index 307c9eba9d3b..6bf6c45d8cf6 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -11,7 +11,7 @@
/// structures and waterfall loops.
///
/// When we do structurization, we usually transform an if-else into two
-/// sucessive if-then (with a flow block to do predicate inversion). Consider a
+/// successive if-then (with a flow block to do predicate inversion). Consider a
/// simple case after structurization: A divergent value %a was defined before
/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
/// bb.if:
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 7d7a753bb333..6a698348d389 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -365,7 +365,7 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) {
if (Dst &&
DstUnused->getImm() == AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE) {
- // This will work if the tied src is acessing WORD_0, and the dst is
+ // This will work if the tied src is accessing WORD_0, and the dst is
// writing WORD_1. Modifiers don't matter because all the bits that
// would be impacted are being overwritten by the dst.
// Any other case will not work.
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index e05aafe5e291..13a6a718f4f2 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -8,7 +8,7 @@
//
/// \file
/// This pass creates bundles of memory instructions to protect adjacent loads
-/// and stores from beeing rescheduled apart from each other post-RA.
+/// and stores from being rescheduled apart from each other post-RA.
///
//===----------------------------------------------------------------------===//
@@ -90,6 +90,9 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
BitVector &UsedRegUnits) const {
+ if (MI.isDebugInstr())
+ return;
+
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.readsReg())
continue;
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index dce0f4b0df5f..d1b8e217471e 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -174,7 +174,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
MI.setDesc(TII->get(AMDGPU::S_BRANCH));
} else if (IsVCCZ && MaskValue == 0) {
// Will always branch
- // Remove all succesors shadowed by new unconditional branch
+ // Remove all successors shadowed by new unconditional branch
MachineBasicBlock *Parent = MI.getParent();
SmallVector<MachineInstr *, 4> ToRemove;
bool Found = false;
@@ -257,10 +257,8 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,
})) {
// The only exception allowed here is another indirect vector move
// with the same mode.
- if (!IdxOn ||
- !((I->getOpcode() == AMDGPU::V_MOV_B32_e32 &&
- I->hasRegisterImplicitUseOperand(AMDGPU::M0)) ||
- I->getOpcode() == AMDGPU::V_MOV_B32_indirect))
+ if (!IdxOn || !(I->getOpcode() == AMDGPU::V_MOV_B32_indirect_write ||
+ I->getOpcode() == AMDGPU::V_MOV_B32_indirect_read))
return false;
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index bba5bf7fdbc3..bfbe84f696f8 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -97,7 +97,7 @@ struct SGPRSpillBuilder {
unsigned EltSize = 4;
RegScavenger *RS;
- MachineBasicBlock &MBB;
+ MachineBasicBlock *MBB;
MachineFunction &MF;
SIMachineFunctionInfo &MFI;
const SIInstrInfo &TII;
@@ -110,9 +110,14 @@ struct SGPRSpillBuilder {
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
bool IsWave32, MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS)
- : SuperReg(MI->getOperand(0).getReg()), MI(MI),
- IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index),
- RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()),
+ : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
+ MI->getOperand(0).isKill(), Index, RS) {}
+
+ SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII,
+ bool IsWave32, MachineBasicBlock::iterator MI, Register Reg,
+ bool IsKill, int Index, RegScavenger *RS)
+ : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
+ Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
IsWave32(IsWave32) {
const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
@@ -189,8 +194,9 @@ struct SGPRSpillBuilder {
if (SavedExecReg) {
RS->setRegUsed(SavedExecReg);
// Set exec to needed lanes
- BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
- auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
+ BuildMI(*MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg);
+ auto I =
+ BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitDefine);
// Spill needed lanes
@@ -201,7 +207,7 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
/*IsKill*/ false);
// Spill inactive lanes
- auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
if (!TmpVGPRLive)
I.addReg(TmpVGPR, RegState::ImplicitDefine);
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
@@ -224,7 +230,7 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
/*IsKill*/ false);
// Restore exec
- auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg)
+ auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
.addReg(SavedExecReg, RegState::Kill);
// Add an implicit use of the load so it is not dead.
// FIXME This inserts an unnecessary waitcnt
@@ -235,7 +241,7 @@ struct SGPRSpillBuilder {
// Restore inactive lanes
TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
/*IsKill*/ false);
- auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
if (!TmpVGPRLive) {
I.addReg(TmpVGPR, RegState::ImplicitKill);
}
@@ -261,11 +267,17 @@ struct SGPRSpillBuilder {
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
/*IsKill*/ false);
// Spill inactive lanes
- BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
- BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
+ BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
}
}
+
+ void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI) {
+ assert(MBB->getParent() == &MF);
+ MI = NewMI;
+ MBB = NewMBB;
+ }
};
} // namespace llvm
@@ -348,10 +360,13 @@ const MCPhysReg *SIRegisterInfo::getCalleeSavedRegs(
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
- case CallingConv::AMDGPU_Gfx:
return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
? CSR_AMDGPU_HighRegs_With_AGPRs_SaveList
: CSR_AMDGPU_HighRegs_SaveList;
+ case CallingConv::AMDGPU_Gfx:
+ return MF->getSubtarget<GCNSubtarget>().hasGFX90AInsts()
+ ? CSR_AMDGPU_SI_Gfx_With_AGPRs_SaveList
+ : CSR_AMDGPU_SI_Gfx_SaveList;
default: {
// Dummy to not crash RegisterClassInfo.
static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
@@ -371,10 +386,13 @@ const uint32_t *SIRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Cold:
- case CallingConv::AMDGPU_Gfx:
return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
? CSR_AMDGPU_HighRegs_With_AGPRs_RegMask
: CSR_AMDGPU_HighRegs_RegMask;
+ case CallingConv::AMDGPU_Gfx:
+ return MF.getSubtarget<GCNSubtarget>().hasGFX90AInsts()
+ ? CSR_AMDGPU_SI_Gfx_With_AGPRs_RegMask
+ : CSR_AMDGPU_SI_Gfx_RegMask;
default:
return nullptr;
}
@@ -501,18 +519,36 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
- // TODO: In an entry function without calls and AGPRs used it is possible
- // to use the whole register budget for VGPRs. Even more it shall
- // be possible to estimate maximum AGPR/VGPR pressure and split
- // register file accordingly.
- if (ST.hasGFX90AInsts())
- MaxNumVGPRs /= 2;
+ unsigned MaxNumAGPRs = MaxNumVGPRs;
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+
+ if (ST.hasGFX90AInsts()) {
+ // In an entry function without calls and AGPRs used it is possible to use
+ // the whole register budget for VGPRs.
+
+ // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and
+ // split register file accordingly.
+ if (MFI->usesAGPRs(MF)) {
+ MaxNumVGPRs /= 2;
+ MaxNumAGPRs = MaxNumVGPRs;
+ } else {
+ if (MaxNumVGPRs > TotalNumVGPRs) {
+ MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
+ MaxNumVGPRs = TotalNumVGPRs;
+ } else
+ MaxNumAGPRs = 0;
+ }
+ }
+
for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
- Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ }
+
+ for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
reserveRegisterTuples(Reserved, Reg);
}
@@ -536,8 +572,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
- const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
Register ScratchRSrcReg = MFI->getScratchRSrcReg();
if (ScratchRSrcReg != AMDGPU::NoRegister) {
// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
@@ -801,6 +835,14 @@ const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
return &AMDGPU::VGPR_32RegClass;
}
+const TargetRegisterClass *
+SIRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
+ return getEquivalentVGPRClass(RC);
+
+ return RC;
+}
+
static unsigned getNumSubRegsForSpillOp(unsigned Op) {
switch (Op) {
@@ -1037,7 +1079,7 @@ static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII,
}
void SIRegisterInfo::buildSpillLoadStore(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
RegScavenger *RS, LivePhysRegs *LiveRegs) const {
@@ -1049,7 +1091,6 @@ void SIRegisterInfo::buildSpillLoadStore(
const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
- const DebugLoc &DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
bool IsStore = Desc->mayStore();
bool IsFlat = TII->isFLATScratch(LoadStoreOp);
@@ -1177,9 +1218,19 @@ void SIRegisterInfo::buildSpillLoadStore(
bool NeedSuperRegDef = e > 1 && IsStore && i == 0;
bool NeedSuperRegImpOperand = e > 1;
- unsigned Lane = RegOffset / 4;
- unsigned LaneE = (RegOffset + EltSize) / 4;
- for ( ; Lane != LaneE; ++Lane) {
+ // Remaining element size to spill into memory after some parts of it
+ // spilled into either AGPRs or VGPRs.
+ unsigned RemEltSize = EltSize;
+
+ // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
+ // starting from the last lane. In case if a register cannot be completely
+ // spilled into another register that will ensure its alignment does not
+ // change. For targets with VGPR alignment requirement this is important
+ // in case of flat scratch usage as we might get a scratch_load or
+ // scratch_store of an unaligned register otherwise.
+ for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
+ LaneE = RegOffset / 4;
+ Lane >= LaneE; --Lane) {
bool IsSubReg = e > 1 || EltSize > 4;
Register Sub = IsSubReg
? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
@@ -1187,33 +1238,29 @@ void SIRegisterInfo::buildSpillLoadStore(
auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
if (!MIB.getInstr())
break;
- if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == 0)) {
+ if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && !i)) {
MIB.addReg(ValueReg, RegState::ImplicitDefine);
NeedSuperRegDef = false;
}
if (IsSubReg || NeedSuperRegImpOperand) {
NeedSuperRegImpOperand = true;
unsigned State = SrcDstRegState;
- if (Lane + 1 != LaneE)
+ if (Lane != LaneE)
State &= ~RegState::Kill;
MIB.addReg(ValueReg, RegState::Implicit | State);
}
+ RemEltSize -= 4;
}
- if (Lane == LaneE) // Fully spilled into AGPRs.
+ if (!RemEltSize) // Fully spilled into AGPRs.
continue;
- // Offset in bytes from the beginning of the ValueReg to its portion we
- // still need to spill. It may differ from RegOffset if a portion of
- // current SubReg has been already spilled into AGPRs by the loop above.
- unsigned RemRegOffset = Lane * 4;
- unsigned RemEltSize = EltSize - (RemRegOffset - RegOffset);
if (RemEltSize != EltSize) { // Partially spilled to AGPRs
assert(IsFlat && EltSize > 4);
unsigned NumRegs = RemEltSize / 4;
SubReg = Register(getSubReg(ValueReg,
- getSubRegFromChannel(RemRegOffset / 4, NumRegs)));
+ getSubRegFromChannel(RegOffset / 4, NumRegs)));
unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
Desc = &TII->get(Opc);
}
@@ -1240,10 +1287,10 @@ void SIRegisterInfo::buildSpillLoadStore(
SubReg = TmpReg;
}
- MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RemRegOffset);
+ MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
MachineMemOperand *NewMMO =
MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
- commonAlignment(Alignment, RemRegOffset));
+ commonAlignment(Alignment, RegOffset));
auto MIB =
BuildMI(MBB, MI, DL, *Desc)
@@ -1257,7 +1304,7 @@ void SIRegisterInfo::buildSpillLoadStore(
} else {
MIB.addReg(SOffset, SOffsetRegState);
}
- MIB.addImm(Offset + RemRegOffset)
+ MIB.addImm(Offset + RegOffset)
.addImm(0); // cpol
if (!IsFlat)
MIB.addImm(0) // tfe
@@ -1307,13 +1354,13 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
if (IsLoad) {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
- buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg,
- Offset * SB.EltSize, MMO, SB.RS);
+ buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
+ FrameReg, Offset * SB.EltSize, MMO, SB.RS);
} else {
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
- buildSpillLoadStore(SB.MBB, SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg,
- Offset * SB.EltSize, MMO, SB.RS);
+ buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
+ FrameReg, Offset * SB.EltSize, MMO, SB.RS);
// This only ever adds one VGPR spill
SB.MFI.addToSpilledVGPRs(1);
}
@@ -1336,6 +1383,10 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
SB.SuperReg != SB.MFI.getFrameOffsetReg()));
if (SpillToVGPR) {
+
+ assert(SB.NumSubRegs == VGPRSpills.size() &&
+ "Num of VGPR lanes should be equal to num of SGPRs spilled");
+
for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
Register SubReg =
SB.NumSubRegs == 1
@@ -1347,8 +1398,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
- auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
- Spill.VGPR)
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
+ SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR);
@@ -1394,7 +1445,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
MachineInstrBuilder WriteLane =
- BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
+ BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
SB.TmpVGPR)
.addReg(SubReg, SubKillState)
.addImm(i % PVD.PerVGPR)
@@ -1456,10 +1507,10 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
- auto MIB =
- BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
- .addReg(Spill.VGPR)
- .addImm(Spill.Lane);
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
+ SubReg)
+ .addReg(Spill.VGPR)
+ .addImm(Spill.Lane);
if (SB.NumSubRegs > 1 && i == 0)
MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
if (LIS) {
@@ -1490,7 +1541,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
bool LastSubReg = (i + 1 == e);
- auto MIB = BuildMI(SB.MBB, MI, SB.DL,
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
.addImm(i);
@@ -1516,6 +1567,75 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
return true;
}
+bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
+ MachineBasicBlock &RestoreMBB,
+ Register SGPR, RegScavenger *RS) const {
+ SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
+ RS);
+ SB.prepare();
+ // Generate the spill of SGPR to SB.TmpVGPR.
+ unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
+ auto PVD = SB.getPerVGPRData();
+ for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
+ unsigned TmpVGPRFlags = RegState::Undef;
+ // Write sub registers into the VGPR
+ for (unsigned i = Offset * PVD.PerVGPR,
+ e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
+ i < e; ++i) {
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
+
+ MachineInstrBuilder WriteLane =
+ BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
+ SB.TmpVGPR)
+ .addReg(SubReg, SubKillState)
+ .addImm(i % PVD.PerVGPR)
+ .addReg(SB.TmpVGPR, TmpVGPRFlags);
+ TmpVGPRFlags = 0;
+ // There could be undef components of a spilled super register.
+ // TODO: Can we detect this and skip the spill?
+ if (SB.NumSubRegs > 1) {
+ // The last implicit use of the SB.SuperReg carries the "Kill" flag.
+ unsigned SuperKillState = 0;
+ if (i + 1 == SB.NumSubRegs)
+ SuperKillState |= getKillRegState(SB.IsKill);
+ WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
+ }
+ }
+ // Don't need to write VGPR out.
+ }
+
+ // Restore clobbered registers in the specified restore block.
+ MI = RestoreMBB.end();
+ SB.setMI(&RestoreMBB, MI);
+ // Generate the restore of SGPR from SB.TmpVGPR.
+ for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
+ // Don't need to load VGPR in.
+ // Unpack lanes
+ for (unsigned i = Offset * PVD.PerVGPR,
+ e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
+ i < e; ++i) {
+ Register SubReg =
+ SB.NumSubRegs == 1
+ ? SB.SuperReg
+ : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
+ bool LastSubReg = (i + 1 == e);
+ auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
+ SubReg)
+ .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
+ .addImm(i);
+ if (SB.NumSubRegs > 1 && i == 0)
+ MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
+ }
+ }
+ SB.restore();
+
+ SB.MFI.addToSpilledSGPRs(SB.NumSubRegs);
+ return false;
+}
+
/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
/// a VGPR and the stack slot can be safely eliminated when all other users are
/// handled.
@@ -1632,7 +1752,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
auto *MBB = MI->getParent();
buildSpillLoadStore(
- *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
@@ -1668,7 +1788,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
auto *MBB = MI->getParent();
buildSpillLoadStore(
- *MBB, MI, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
+ *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
MI->eraseFromParent();
@@ -2152,34 +2272,6 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
return isSGPRClass(RC);
}
-// TODO: It might be helpful to have some target specific flags in
-// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
-bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
- unsigned Size = getRegSizeInBits(*RC);
- if (Size == 16) {
- return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
- getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
- }
- const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
- if (!VRC) {
- assert(Size < 32 && "Invalid register class size");
- return false;
- }
- return getCommonSubClass(VRC, RC) != nullptr;
-}
-
-bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
- unsigned Size = getRegSizeInBits(*RC);
- if (Size < 16)
- return false;
- const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
- if (!ARC) {
- assert(getVGPRClassForBitWidth(Size) && "Invalid register class size");
- return false;
- }
- return getCommonSubClass(ARC, RC) != nullptr;
-}
-
const TargetRegisterClass *
SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
unsigned Size = getRegSizeInBits(*SRC);
@@ -2321,7 +2413,7 @@ bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
Register Reg) const {
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
// Registers without classes are unaddressable, SGPR-like registers.
- return RC && hasVGPRs(RC);
+ return RC && isVGPRClass(RC);
}
bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
@@ -2329,7 +2421,7 @@ bool SIRegisterInfo::isAGPR(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
// Registers without classes are unaddressable, SGPR-like registers.
- return RC && hasAGPRs(RC);
+ return RC && isAGPRClass(RC);
}
bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
@@ -2427,8 +2519,10 @@ SIRegisterInfo::getConstrainedRegClassForOperand(const MachineOperand &MO,
if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB, MRI);
- const TargetRegisterClass *RC = RCOrRB.get<const TargetRegisterClass*>();
- return getAllocatableClass(RC);
+ if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ return getAllocatableClass(RC);
+
+ return nullptr;
}
MCRegister SIRegisterInfo::getVCC() const {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 2a92051e5fb2..8d90ddb1cf4c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -17,6 +17,8 @@
#define GET_REGINFO_HEADER
#include "AMDGPUGenRegisterInfo.inc"
+#include "SIDefines.h"
+
namespace llvm {
class GCNSubtarget;
@@ -24,7 +26,6 @@ class LiveIntervals;
class LivePhysRegs;
class RegisterBank;
struct SGPRSpillBuilder;
-class SIMachineFunctionInfo;
class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
private:
@@ -108,6 +109,13 @@ public:
const TargetRegisterClass *getPointerRegClass(
const MachineFunction &MF, unsigned Kind = 0) const override;
+ /// Returns a legal register class to copy a register in the specified class
+ /// to or from. If it is possible to copy the register directly without using
+ /// a cross register class copy, return the specified RC. Returns NULL if it
+ /// is not possible to copy between two registers of the specified class.
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
+
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
bool IsLoad, bool IsKill = true) const;
@@ -122,6 +130,10 @@ public:
LiveIntervals *LIS = nullptr,
bool OnlyToVGPR = false) const;
+ bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
+ MachineBasicBlock &RestoreMBB, Register SGPR,
+ RegScavenger *RS) const;
+
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS) const override;
@@ -151,7 +163,7 @@ public:
const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const;
/// \returns true if this class contains only SGPR registers
- bool isSGPRClass(const TargetRegisterClass *RC) const {
+ static bool isSGPRClass(const TargetRegisterClass *RC) {
return !hasVGPRs(RC) && !hasAGPRs(RC);
}
@@ -162,19 +174,28 @@ public:
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const;
+ /// \returns true if this class contains only VGPR registers
+ static bool isVGPRClass(const TargetRegisterClass *RC) {
+ return hasVGPRs(RC) && !hasAGPRs(RC);
+ }
+
/// \returns true if this class contains only AGPR registers
- bool isAGPRClass(const TargetRegisterClass *RC) const {
+ static bool isAGPRClass(const TargetRegisterClass *RC) {
return hasAGPRs(RC) && !hasVGPRs(RC);
}
/// \returns true if this class contains VGPR registers.
- bool hasVGPRs(const TargetRegisterClass *RC) const;
+ static bool hasVGPRs(const TargetRegisterClass *RC) {
+ return RC->TSFlags & SIRCFlags::HasVGPR;
+ }
/// \returns true if this class contains AGPR registers.
- bool hasAGPRs(const TargetRegisterClass *RC) const;
+ static bool hasAGPRs(const TargetRegisterClass *RC) {
+ return RC->TSFlags & SIRCFlags::HasAGPR;
+ }
/// \returns true if this class contains any vector registers.
- bool hasVectorRegisters(const TargetRegisterClass *RC) const {
+ static bool hasVectorRegisters(const TargetRegisterClass *RC) {
return hasVGPRs(RC) || hasAGPRs(RC);
}
@@ -350,10 +371,11 @@ public:
// For creating spill instructions during frame lowering, where no scavenger
// is available, LiveRegs can be used.
void buildSpillLoadStore(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned LoadStoreOp,
- int Index, Register ValueReg, bool ValueIsKill,
- MCRegister ScratchOffsetReg, int64_t InstrOffset,
- MachineMemOperand *MMO, RegScavenger *RS,
+ MachineBasicBlock::iterator MI, const DebugLoc &DL,
+ unsigned LoadStoreOp, int Index, Register ValueReg,
+ bool ValueIsKill, MCRegister ScratchOffsetReg,
+ int64_t InstrOffset, MachineMemOperand *MMO,
+ RegScavenger *RS,
LivePhysRegs *LiveRegs = nullptr) const;
};
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 6e3c4e8775f3..cf1d90484228 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -126,8 +126,16 @@ class SIReg <string n, bits<16> regIdx = 0> :
let HWEncoding = regIdx;
}
-class SIRegWithSubRegs <string n, list<Register> subregs, bits<16> regIdx> :
- RegisterWithSubRegs<n, subregs> {
+// For register classes that use TSFlags.
+class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
+ : RegisterClass <n, rTypes, Align, rList> {
+ // For vector register classes.
+ field bit HasVGPR = 0;
+ field bit HasAGPR = 0;
+
+ // These need to be kept in sync with the enum SIRCFlags.
+ let TSFlags{0} = HasVGPR;
+ let TSFlags{1} = HasAGPR;
}
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
@@ -490,14 +498,15 @@ class RegisterTypes<list<ValueType> reg_types> {
def Reg16Types : RegisterTypes<[i16, f16]>;
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, p2, p3, p5, p6]>;
-def VGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+let HasVGPR = 1 in {
+def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_LO16", 0, 255))> {
let AllocationPriority = 1;
let Size = 16;
let GeneratePressureSet = 0;
}
-def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "VGPR%u_HI16", 0, 255))> {
let AllocationPriority = 1;
let Size = 16;
@@ -506,12 +515,13 @@ def VGPR_HI16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
// VGPR 32-bit registers
// i16/f16 only on VI+
-def VGPR_32 : RegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
+def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
let Weight = 1;
}
+} // End HasVGPR = 1
// VGPR 64-bit registers
def VGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, VGPR_32, 255, 1, 2, "v">;
@@ -540,7 +550,8 @@ def VGPR_512 : SIRegisterTuples<getSubRegs<16>.ret, VGPR_32, 255, 1, 16, "v">;
// VGPR 1024-bit registers
def VGPR_1024 : SIRegisterTuples<getSubRegs<32>.ret, VGPR_32, 255, 1, 32, "v">;
-def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
+let HasAGPR = 1 in {
+def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
(add (sequence "AGPR%u_LO16", 0, 255))> {
let isAllocatable = 0;
let Size = 16;
@@ -548,12 +559,13 @@ def AGPR_LO16 : RegisterClass<"AMDGPU", Reg16Types.types, 16,
}
// AccVGPR 32-bit registers
-def AGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add (sequence "AGPR%u", 0, 255))> {
let AllocationPriority = 1;
let Size = 32;
let Weight = 1;
}
+} // End HasAGPR = 1
// AGPR 64-bit registers
def AGPR_64 : SIRegisterTuples<getSubRegs<2>.ret, AGPR_32, 255, 1, 2, "a">;
@@ -679,6 +691,14 @@ def CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
let AllocationPriority = SGPR_64.AllocationPriority;
}
+// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
+def Gfx_CCR_SGPR_64 : RegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+ (add (trunc (shl SGPR_64, 15), 1), // s[30:31]
+ (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
+ let CopyCost = SGPR_64.CopyCost;
+ let AllocationPriority = SGPR_64.AllocationPriority;
+}
+
def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
@@ -748,14 +768,15 @@ defm "" : SRegClass<8, 19, [v8i32, v8f32, v4i64, v4f64], SGPR_256Regs, TTMP_256R
defm "" : SRegClass<16, 20, [v16i32, v16f32, v8i64, v8f64], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<32, 21, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
-def VRegOrLds_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
// Register class for all vector registers (VGPRs + Interpolation Registers)
class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
- RegisterClass<"AMDGPU", regTypes, 32, regList> {
+ SIRegisterClass<"AMDGPU", regTypes, 32, regList> {
let Size = !mul(numRegs, 32);
// Requires n v_mov_b32 to copy
@@ -767,11 +788,13 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
// Define a register tuple class, along with one requiring an even
// aligned base register.
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- // Define the regular class.
- def "" : VRegClassBase<numRegs, regTypes, regList>;
+ let HasVGPR = 1 in {
+ // Define the regular class.
+ def "" : VRegClassBase<numRegs, regTypes, regList>;
- // Define 2-aligned variant
- def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ // Define 2-aligned variant
+ def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ }
}
defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
@@ -787,7 +810,7 @@ defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64], (add VGPR_512)>;
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
- let CopyCost = !add(numRegs, numRegs, 1) in {
+ let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, regList>;
@@ -811,7 +834,7 @@ defm AReg_1024 : ARegClass<32, [v32i32, v32f32, v16i64, v16f64], (add AGPR_1024)
// This is not a real register. This is just to have a register to add
// to VReg_1 that does not alias any real register that would
-// introduce inferred register classess.
+// introduce inferred register classes.
def ARTIFICIAL_VGPR : SIReg <"invalid vgpr", 0> {
let isArtificial = 1;
}
@@ -823,44 +846,53 @@ let GeneratePressureSet = 0 in {
// on an empty register set, but also sorts register classes based on
// the number of registerss in them. Add only one register so this is
// sorted to the end and not preferred over VGPR_32.
-def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
+def VReg_1 : SIRegisterClass<"AMDGPU", [i1], 32, (add ARTIFICIAL_VGPR)> {
let Size = 1;
+ let HasVGPR = 1;
}
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
+def VS_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
(add VGPR_32, SReg_32, LDS_DIRECT_CLASS)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
+def VS_64 : SIRegisterClass<"AMDGPU", [i64, f64, v2f32], 32, (add VReg_64, SReg_64)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
}
-def AV_32 : RegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
+def AV_32 : SIRegisterClass<"AMDGPU", VGPR_32.RegTypes, 32,
(add AGPR_32, VGPR_32)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasAGPR = 1;
}
-def AV_64 : RegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
+def AV_64 : SIRegisterClass<"AMDGPU", VReg_64.RegTypes, 32,
(add AReg_64, VReg_64)> {
let isAllocatable = 0;
+ let HasVGPR = 1;
+ let HasAGPR = 1;
}
} // End GeneratePressureSet = 0
-def AV_96 : RegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
+let HasVGPR = 1, HasAGPR = 1 in {
+def AV_96 : SIRegisterClass<"AMDGPU", VReg_96.RegTypes, 32,
(add AReg_96, VReg_96)> {
let isAllocatable = 0;
}
-def AV_128 : RegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
+def AV_128 : SIRegisterClass<"AMDGPU", VReg_128.RegTypes, 32,
(add AReg_128, VReg_128)> {
let isAllocatable = 0;
}
-def AV_160 : RegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
+def AV_160 : SIRegisterClass<"AMDGPU", VReg_160.RegTypes, 32,
(add AReg_160, VReg_160)> {
let isAllocatable = 0;
}
+} // End HasVGPR = 1, HasAGPR = 1
//===----------------------------------------------------------------------===//
// Register operands
@@ -996,6 +1028,30 @@ def VSrc_128 : RegisterOperand<VReg_128> {
}
//===----------------------------------------------------------------------===//
+// VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
+// with FMAMK/FMAAK
+//===----------------------------------------------------------------------===//
+
+multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
+ string rc_suffix = "_32"> {
+ let OperandNamespace = "AMDGPU" in {
+ def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+ let OperandType = opType#"_FP16_DEFERRED";
+ let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+ let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
+ }
+
+ def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
+ let OperandType = opType#"_FP32_DEFERRED";
+ let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+ let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
+ }
+ }
+}
+
+defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
+
+//===----------------------------------------------------------------------===//
// VRegSrc_* Operands with a VGPR
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index b24c061af7ab..0792b303b830 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -137,6 +137,7 @@ def MIReadVGPR : SchedReadVariant<[
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
+ let RetireOOO = 1 in { // llvm-mca specific flag
def : HWWriteRes<WriteBranch, [HWBranch], 8>;
def : HWWriteRes<WriteExport, [HWExport], 4>;
def : HWWriteRes<WriteLDS, [HWLGKM], 5>; // Can be between 2 and 64
@@ -159,6 +160,7 @@ multiclass SICommonWriteRes {
def : HWWriteRes<Write8PassMAI, [HWXDL], 8>;
let ResourceCycles = [16] in
def : HWWriteRes<Write16PassMAI, [HWXDL], 16>;
+ } // End RetireOOO = 1
def : ReadAdvance<MIVGPRRead, -2>;
@@ -182,6 +184,7 @@ let SchedModel = SIFullSpeedModel in {
defm : SICommonWriteRes;
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteIntMul, 4>;
def : HWVALUWriteRes<WriteFloatFMA, 1>;
@@ -189,6 +192,7 @@ def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : HWVALUWriteRes<WriteTrans64, 4>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -198,6 +202,7 @@ let SchedModel = SIQuarterSpeedModel in {
defm : SICommonWriteRes;
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteIntMul, 4>;
def : HWVALUWriteRes<WriteFloatFMA, 16>;
@@ -205,6 +210,7 @@ def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : HWVALUWriteRes<WriteTrans64, 16>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
def : InstRW<[Write64Bit, MIReadVGPR], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>;
@@ -218,6 +224,7 @@ let SchedModel = SIDPFullSpeedModel in {
defm : SICommonWriteRes;
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 1>;
def : HWVALUWriteRes<WriteDoubleAdd, 1>;
@@ -225,6 +232,7 @@ def : HWVALUWriteRes<WriteDoubleCvt, 1>;
def : HWVALUWriteRes<WriteTrans64, 4>;
def : HWVALUWriteRes<WriteIntMul, 1>;
def : HWVALUWriteRes<Write64Bit, 1>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
def : InstRW<[Write64Bit], (instregex "^V_ACCVGPR_WRITE_B32_e64$")>;
@@ -240,6 +248,7 @@ let SchedModel = GFX10SpeedModel in {
// The latency values are 1 / (operations / cycle).
// Add 1 stall cycle for VGPR read.
+let RetireOOO = 1 in { // llvm-mca specific flag
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 6>;
@@ -259,6 +268,7 @@ def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 2>;
def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+} // End RetireOOO = 1
def : InstRW<[WriteCopy], (instrs COPY)>;
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 45dd57ea1be4..3a372d4519fb 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -188,7 +188,7 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
return;
// eq/ne is special because the imm16 can be treated as signed or unsigned,
- // and initially selectd to the unsigned versions.
+ // and initially selected to the unsigned versions.
if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
bool HasUImm;
if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
@@ -810,6 +810,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// Copy extra operands not present in the instruction definition.
copyExtraImplicitOps(*Inst32, MF, MI);
+ // Copy deadness from the old explicit vcc def to the new implicit def.
+ if (SDst && SDst->isDead())
+ Inst32->findRegisterDefOperand(VCCReg)->setIsDead();
+
MI.eraseFromParent();
foldImmediates(*Inst32, TII, MRI);
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 38548eaf9478..6f63f686635a 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1029,11 +1029,8 @@ void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) {
SmallVector<MachineInstr *, 4> SplitPoints;
char State = BI.InitialState;
- auto II = MBB.getFirstNonPHI(), IE = MBB.end();
- while (II != IE) {
- auto Next = std::next(II);
- MachineInstr &MI = *II;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(
+ llvm::make_range(MBB.getFirstNonPHI(), MBB.end()))) {
if (StateTransition.count(&MI))
State = StateTransition[&MI];
@@ -1051,8 +1048,6 @@ void SIWholeQuadMode::lowerBlock(MachineBasicBlock &MBB) {
}
if (SplitPoint)
SplitPoints.push_back(SplitPoint);
-
- II = Next;
}
// Perform splitting after instruction scan to simplify iteration.
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e9697017aac0..61ecc13620a1 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -265,6 +265,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;
let isReturn = 1 in {
// Define variant marked as return rather than branch.
def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>;
+def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>;
}
} // End isTerminator = 1, isBarrier = 1
@@ -517,9 +518,10 @@ let Uses = [SCC] in {
def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32",
[(set i32:$sdst, (SelectPat<select> i32:$src0, i32:$src1))]
>;
+ def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64",
+ [(set i64:$sdst, (SelectPat<select> i64:$src0, i64:$src1))]
+ >;
}
-
- def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
} // End Uses = [SCC]
let Defs = [SCC] in {
@@ -557,19 +559,19 @@ def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
>;
def S_NAND_B32 : SOP2_32 <"s_nand_b32",
- [(set i32:$sdst, (not (and_oneuse i32:$src0, i32:$src1)))]
+ [(set i32:$sdst, (UniformUnaryFrag<not> (and_oneuse i32:$src0, i32:$src1)))]
>;
def S_NAND_B64 : SOP2_64 <"s_nand_b64",
- [(set i64:$sdst, (not (and_oneuse i64:$src0, i64:$src1)))]
+ [(set i64:$sdst, (UniformUnaryFrag<not> (and_oneuse i64:$src0, i64:$src1)))]
>;
def S_NOR_B32 : SOP2_32 <"s_nor_b32",
- [(set i32:$sdst, (not (or_oneuse i32:$src0, i32:$src1)))]
+ [(set i32:$sdst, (UniformUnaryFrag<not> (or_oneuse i32:$src0, i32:$src1)))]
>;
def S_NOR_B64 : SOP2_64 <"s_nor_b64",
- [(set i64:$sdst, (not (or_oneuse i64:$src0, i64:$src1)))]
+ [(set i64:$sdst, (UniformUnaryFrag<not> (or_oneuse i64:$src0, i64:$src1)))]
>;
} // End isCommutable = 1
@@ -597,22 +599,22 @@ let AddedComplexity = 1 in {
let Defs = [SCC] in {
// TODO: b64 versions require VOP3 change since v_lshlrev_b64 is VOP3
def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
- [(set SReg_32:$sdst, (UniformBinFrag<shl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<cshl_32> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
- [(set SReg_64:$sdst, (UniformBinFrag<shl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<cshl_64> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
- [(set SReg_32:$sdst, (UniformBinFrag<srl> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<csrl_32> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
- [(set SReg_64:$sdst, (UniformBinFrag<srl> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<csrl_64> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
- [(set SReg_32:$sdst, (UniformBinFrag<sra> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_32:$sdst, (UniformBinFrag<csra_32> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]
>;
def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
- [(set SReg_64:$sdst, (UniformBinFrag<sra> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
+ [(set SReg_64:$sdst, (UniformBinFrag<csra_64> (i64 SSrc_b64:$src0), (i32 SSrc_b32:$src1)))]
>;
} // End Defs = [SCC]
@@ -621,9 +623,8 @@ def S_BFM_B32 : SOP2_32 <"s_bfm_b32",
[(set i32:$sdst, (UniformBinFrag<AMDGPUbfm> i32:$src0, i32:$src1))]>;
def S_BFM_B64 : SOP2_64_32_32 <"s_bfm_b64">;
-// TODO: S_MUL_I32 require V_MUL_LO_I32 from VOP3 change
def S_MUL_I32 : SOP2_32 <"s_mul_i32",
- [(set i32:$sdst, (mul i32:$src0, i32:$src1))]> {
+ [(set i32:$sdst, (UniformBinFrag<mul> i32:$src0, i32:$src1))]> {
let isCommutable = 1;
}
} // End isReMaterializable = 1
@@ -713,7 +714,7 @@ class SOPK_Pseudo <string opName, dag outs, dag ins,
bits<1> has_sdst = 1;
}
-class SOPK_Real<bits<5> op, SOPK_Pseudo ps> :
+class SOPK_Real<SOPK_Pseudo ps> :
InstSI <ps.OutOperandList, ps.InOperandList,
ps.Mnemonic # " " # ps.AsmOperands, []> {
let SALU = 1;
@@ -739,7 +740,7 @@ class SOPK_Real<bits<5> op, SOPK_Pseudo ps> :
}
class SOPK_Real32<bits<5> op, SOPK_Pseudo ps> :
- SOPK_Real <op, ps>,
+ SOPK_Real <ps>,
Enc32 {
let Inst{15-0} = simm16;
let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
@@ -748,7 +749,7 @@ class SOPK_Real32<bits<5> op, SOPK_Pseudo ps> :
}
class SOPK_Real64<bits<5> op, SOPK_Pseudo ps> :
- SOPK_Real<op, ps>,
+ SOPK_Real<ps>,
Enc64 {
let Inst{15-0} = simm16;
let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
@@ -1107,7 +1108,7 @@ class SOPPRelaxTable <bit isRelaxed, string keyName, string gfxip> {
}
//spaces inserted in realname on instantiation of this record to allow s_endpgm to omit whitespace
-class SOPP_Real<bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> :
+class SOPP_Real<SOPP_Pseudo ps, string real_name = ps.Mnemonic> :
InstSI <ps.OutOperandList, ps.InOperandList,
real_name # ps.AsmOperands, []> {
let SALU = 1;
@@ -1127,14 +1128,14 @@ class SOPP_Real<bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> :
bits <16> simm16;
}
-class SOPP_Real_32 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<op, ps, real_name>,
+class SOPP_Real_32 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<ps, real_name>,
Enc32 {
let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16);
let Inst{22-16} = op;
let Inst{31-23} = 0x17f;
}
-class SOPP_Real_64 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<op, ps, real_name>,
+class SOPP_Real_64 <bits<7> op, SOPP_Pseudo ps, string real_name = ps.Mnemonic> : SOPP_Real<ps, real_name>,
Enc64 {
// encoding
let Inst{15-0} = !if(ps.fixed_imm, ps.simm16, simm16);
diff --git a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index 9ec437760c0a..7573af597056 100644
--- a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/AMDGPUTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 29bbf50cbfdc..9da7b9f5145d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -450,16 +450,16 @@ std::string AMDGPUTargetID::toString() const {
} else if (Processor == "gfx801") {
if (!isXnackOnOrAny())
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor +
- " without XNACK");
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor) + " without XNACK");
} else if (Processor == "gfx802") {
} else if (Processor == "gfx803") {
} else if (Processor == "gfx805") {
} else if (Processor == "gfx810") {
if (!isXnackOnOrAny())
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor +
- " without XNACK");
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor) + " without XNACK");
} else if (Processor == "gfx900") {
if (isXnackOnOrAny())
Processor = "gfx901";
@@ -475,11 +475,12 @@ std::string AMDGPUTargetID::toString() const {
} else if (Processor == "gfx90c") {
if (isXnackOnOrAny())
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor +
- " with XNACK being ON or ANY");
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor) + " with XNACK being ON or ANY");
} else {
report_fatal_error(
- "AMD GPU code object V2 does not support processor " + Processor);
+ "AMD GPU code object V2 does not support processor " +
+ Twine(Processor));
}
break;
case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
@@ -671,7 +672,8 @@ unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
if (XNACKUsed)
ExtraSGPRs = 4;
- if (FlatScrUsed)
+ if (FlatScrUsed ||
+ STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
ExtraSGPRs = 6;
}
@@ -1572,8 +1574,10 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
unsigned OpType = Desc.OpInfo[OpNo].OperandType;
switch (OpType) {
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
@@ -1825,8 +1829,8 @@ bool isArgPassedInSGPR(const Argument *A) {
case CallingConv::AMDGPU_Gfx:
// For non-compute shaders, SGPR inputs are marked with either inreg or byval.
// Everything else is in VGPRs.
- return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
- F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
+ return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) ||
+ F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal);
default:
// TODO: Should calls support inreg for SGPR inputs?
return false;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 72c872dec5ba..061c74c0ace6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -292,9 +292,13 @@ struct MIMGBaseOpcodeInfo {
bool LodOrClampOrMip;
bool HasD16;
bool MSAA;
+ bool BVH;
};
LLVM_READONLY
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
+
+LLVM_READONLY
const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
struct MIMGDimInfo {
@@ -767,7 +771,7 @@ bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
/// Is this floating-point operand?
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
-/// Does this opearnd support only inlinable literals?
+/// Does this operand support only inlinable literals?
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
/// Get the size in bits of a register from the register class \p RC.
@@ -785,6 +789,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
switch (OpInfo.OperandType) {
case AMDGPU::OPERAND_REG_IMM_INT32:
case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT32:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
@@ -793,6 +798,8 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_V2FP32:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
return 4;
case AMDGPU::OPERAND_REG_IMM_INT64:
@@ -804,6 +811,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
case AMDGPU::OPERAND_REG_IMM_INT16:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
index da8fcf3900bb..2e4d83fbbc39 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPULDSUtils.h"
+#include "AMDGPU.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -68,6 +69,11 @@ class CollectReachableCallees {
if (!VisitedCGNodes.insert(CGN).second)
continue;
+ // Ignore call graph node which does not have associated function or
+ // associated function is not a definition.
+ if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
+ continue;
+
for (auto GI = CGN->begin(), GE = CGN->end(); GI != GE; ++GI) {
auto *RCB = cast<CallBase>(GI->first.getValue());
auto *RCGN = GI->second;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
index ffcafb9b76ce..d1c9229bc336 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h
@@ -13,7 +13,6 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H
-#include "AMDGPU.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Constants.h"
@@ -49,7 +48,7 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
/// as an use within some instruction (either from kernel or from non-kernel).
bool hasUserInstruction(const GlobalValue *GV);
-/// \returns true if an LDS global requres lowering to a module LDS structure
+/// \returns true if an LDS global requires lowering to a module LDS structure
/// if \p F is not given. If \p F is given it must be a kernel and function
/// \returns true if an LDS global is directly used from that kernel and it
/// is safe to replace its uses with a kernel LDS structure member.
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 35d5fe13ad30..48548d8b6722 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -860,16 +860,25 @@ defm V_CVT_NORM_U16_F16 : VOP1_Real_vi<0x4e>;
defm V_ACCVGPR_MOV_B32 : VOP1Only_Real_vi<0x52>;
+let VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [EXEC, M0] in {
+
// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
// indexing mode. vdst can't be treated as a def for codegen purposes,
// and an implicit use and def of the super register should be added.
-def V_MOV_B32_indirect : VPseudoInstSI<(outs),
+def V_MOV_B32_indirect_write : VPseudoInstSI<(outs),
(ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>,
PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
- getVOPSrc0ForVT<i32>.ret:$src0)> {
- let VOP1 = 1;
- let SubtargetPredicate = isGFX8GFX9;
-}
+ getVOPSrc0ForVT<i32>.ret:$src0)>;
+
+// Copy of v_mov_b32 for use with VGPR indexing mode. An implicit use of the
+// super register should be added.
+def V_MOV_B32_indirect_read : VPseudoInstSI<
+ (outs getVALUDstForVT<i32>.ret:$vdst),
+ (ins getVOPSrc0ForVT<i32>.ret:$src0)>,
+ PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
+ getVOPSrc0ForVT<i32>.ret:$src0)>;
+
+} // End VOP1 = 1, SubtargetPredicate = isGFX8GFX9, Uses = [M0]
let OtherPredicates = [isGFX8Plus] in {
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 7860b7e7f8a6..8d232ffe4114 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -154,8 +154,6 @@ multiclass VOP2Inst_e64<string opName,
multiclass VOP2Inst_sdwa<string opName,
VOPProfile P,
- SDPatternOperator node = null_frag,
- string revOp = opName,
bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
foreach _ = BoolToList<P.HasExtSDWA>.ret in
@@ -170,7 +168,7 @@ multiclass VOP2Inst<string opName,
bit GFX9Renamed = 0> :
VOP2Inst_e32<opName, P, node, revOp, GFX9Renamed>,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
- VOP2Inst_sdwa<opName, P, node, revOp, GFX9Renamed> {
+ VOP2Inst_sdwa<opName, P, GFX9Renamed> {
let renamedInGFX9 = GFX9Renamed in {
foreach _ = BoolToList<P.HasExtDPP>.ret in
def _dpp : VOP2_DPP_Pseudo <opName, P>;
@@ -188,7 +186,7 @@ multiclass VOP2bInst <string opName,
let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
def _e32 : VOP2_Pseudo <opName, P, VOPPatOrNull<node,P>.ret>,
Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
- let usesCustomInserter = !eq(P.NumSrcArgs, 2);
+ let usesCustomInserter = true;
}
foreach _ = BoolToList<P.HasExtSDWA>.ret in
@@ -272,12 +270,11 @@ multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
field dag Ins32 = !if(!eq(vt.Size, 32),
- (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm),
- (ins VCSrc_f16:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ (ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
+ (ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
+ field string Asm32 = "$vdst, $src0, $src1, $imm";
field bit HasExt = 0;
let IsSingle = 1;
-
- field string Asm32 = "$vdst, $src0, $src1, $imm";
}
def VOP_MADAK_F16 : VOP_MADAK <f16>;
@@ -285,11 +282,10 @@ def VOP_MADAK_F32 : VOP_MADAK <f32>;
class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
- field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
+ field dag Ins32 = (ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1);
+ field string Asm32 = "$vdst, $src0, $imm, $src1";
field bit HasExt = 0;
let IsSingle = 1;
-
- field string Asm32 = "$vdst, $src0, $imm, $src1";
}
def VOP_MADMK_F16 : VOP_MADMK <f16>;
@@ -496,18 +492,18 @@ defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub
defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>;
defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>;
-defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_i24>;
+defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>;
-defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN<VOP_I32_I32_I32, 2>, AMDGPUmulhi_u24>;
+defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>;
defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>;
defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smin>;
defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, smax>;
defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umin>;
defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_PAT_GEN<VOP_I32_I32_I32>, umax>;
-defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, lshr_rev, "v_lshr_b32">;
-defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, ashr_rev, "v_ashr_i32">;
-defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, lshl_rev, "v_lshl_b32">;
+defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, clshr_rev_32, "v_lshr_b32">;
+defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, cashr_rev_32, "v_ashr_i32">;
+defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, clshl_rev_32, "v_lshl_b32">;
defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, and>;
defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, or>;
defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, xor>;
@@ -582,9 +578,9 @@ defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfma
let isCommutable = 1 in {
let SubtargetPredicate = isGFX6GFX7 in {
-defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, srl>;
-defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, sra>;
-defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, shl>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, csrl_32>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_PAT_GEN<VOP_I32_I32_I32>, csra_32>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_PAT_GEN<VOP_I32_I32_I32>, cshl_32>;
} // End SubtargetPredicate = isGFX6GFX7
} // End isCommutable = 1
} // End isReMaterializable = 1
@@ -609,9 +605,9 @@ class DivergentClampingBinOp<SDPatternOperator Op, VOP_Pseudo Inst> :
)
>;
-def : DivergentBinOp<srl, V_LSHRREV_B32_e64>;
-def : DivergentBinOp<sra, V_ASHRREV_I32_e64>;
-def : DivergentBinOp<shl, V_LSHLREV_B32_e64>;
+def : DivergentBinOp<csrl_32, V_LSHRREV_B32_e64>;
+def : DivergentBinOp<csra_32, V_ASHRREV_I32_e64>;
+def : DivergentBinOp<cshl_32, V_LSHLREV_B32_e64>;
let SubtargetPredicate = HasAddNoCarryInsts in {
def : DivergentClampingBinOp<add, V_ADD_U32_e64>;
@@ -652,9 +648,9 @@ def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">;
defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
} // End FPDPRounding = 1
-defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, lshl_rev>;
-defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, lshr_rev>;
-defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, ashr_rev>;
+defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
+defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16, clshr_rev_16>;
+defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16, cashr_rev_16>;
let isCommutable = 1 in {
let FPDPRounding = 1 in {
@@ -856,9 +852,9 @@ defm : Arithmetic_i16_0Hi_Pats<smin, V_MIN_I16_e64>;
defm : Arithmetic_i16_0Hi_Pats<smax, V_MAX_I16_e64>;
defm : Arithmetic_i16_0Hi_Pats<umin, V_MIN_U16_e64>;
defm : Arithmetic_i16_0Hi_Pats<umax, V_MAX_U16_e64>;
-defm : Arithmetic_i16_0Hi_Pats<lshl_rev, V_LSHLREV_B16_e64>;
-defm : Arithmetic_i16_0Hi_Pats<lshr_rev, V_LSHRREV_B16_e64>;
-defm : Arithmetic_i16_0Hi_Pats<ashr_rev, V_ASHRREV_I16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<clshl_rev_16, V_LSHLREV_B16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<clshr_rev_16, V_LSHRREV_B16_e64>;
+defm : Arithmetic_i16_0Hi_Pats<cashr_rev_16, V_ASHRREV_I16_e64>;
} // End Predicates = [Has16BitInsts, isGFX7GFX8GFX9]
def : ZExt_i16_i1_Pat<zext>;
@@ -927,7 +923,7 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps,
SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX10>;
class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
- string opName = ps.OpName, VOPProfile p = ps.Pfl> :
+ VOPProfile p = ps.Pfl> :
VOP_DPP8<ps.OpName, p> {
let hasSideEffects = ps.hasSideEffects;
let Defs = ps.Defs;
@@ -1123,14 +1119,14 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
let DecoderNamespace = "DPP8";
}
foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_w32_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst("vcc", "vcc_lo", AsmDPP8);
let isAsmParserOnly = 1;
@@ -1138,7 +1134,7 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
}
foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
def _dpp8_w64_gfx10 :
- VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32"), asmName> {
+ VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # AsmDPP8;
let isAsmParserOnly = 1;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index ee3b87f487d0..494e3aeb6d55 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -120,11 +120,11 @@ class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
}
// Consistently gives instructions a _e64 suffix.
-multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> {
- def _e64 : VOP3_Pseudo<opName, P, pattern, VOP3Only>;
+multiclass VOP3Inst_Pseudo_Wrapper<string opName, VOPProfile P, list<dag> pattern = []> {
+ def _e64 : VOP3_Pseudo<opName, P, pattern>;
}
-class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
+class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_frag> :
VOP3_Pseudo<OpName, P,
!if(P.HasOpSel,
!if(P.HasModifiers,
@@ -137,7 +137,7 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
!if (P.IsMAI,
getVOP3MAIPat<P, node>.ret,
getVOP3Pat<P, node>.ret)))),
- VOP3Only, 0, P.HasOpSel> {
+ 0, P.HasOpSel> {
let IntClamp = P.HasIntClamp;
let AsmMatchConverter =
@@ -148,8 +148,8 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
""));
}
-multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> {
- def _e64 : VOP3InstBase<OpName, P, node, VOP3Only>;
+multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
+ def _e64 : VOP3InstBase<OpName, P, node>;
}
// Special case for v_div_fmas_{f32|f64}, since it seems to be the
@@ -296,15 +296,15 @@ defm V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_a
let SchedRW = [WriteDoubleAdd] in {
let FPDPRounding = 1 in {
defm V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, any_fma>;
-defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd, 1>;
-defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
+defm V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, any_fadd>;
+defm V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul>;
} // End FPDPRounding = 1
-defm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like, 1>;
-defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like, 1>;
+defm V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum_like>;
+defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum_like>;
} // End SchedRW = [WriteDoubleAdd]
let SchedRW = [WriteIntMul] in {
-defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, mul>;
+defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>, DivergentBinFrag<mul>>;
defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
@@ -371,18 +371,18 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32
let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
- defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
+ defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
} // End isReMaterializable = 1
let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
let SchedRW = [WriteFloatFMA, WriteSALU] in
- defm V_DIV_SCALE_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> ;
+ defm V_DIV_SCALE_F32 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32> ;
// Double precision division pre-scale.
let SchedRW = [WriteDouble, WriteSALU], FPDPRounding = 1 in
- defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1>;
+ defm V_DIV_SCALE_F64 : VOP3Inst_Pseudo_Wrapper <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64>;
} // End mayRaiseFPException = 0
let isReMaterializable = 1 in
@@ -400,15 +400,15 @@ defm V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I
let SchedRW = [Write64Bit] in {
let SubtargetPredicate = isGFX6GFX7 in {
- defm V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, shl>;
- defm V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, srl>;
- defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, sra>;
+ defm V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>, cshl_64>;
+ defm V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>, csrl_64>;
+ defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>, csra_64>;
} // End SubtargetPredicate = isGFX6GFX7
let SubtargetPredicate = isGFX8Plus in {
- defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshl_rev>;
- defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, lshr_rev>;
- defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, ashr_rev>;
+ defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshl_rev_64>;
+ defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>, clshr_rev_64>;
+ defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>, cashr_rev_64>;
} // End SubtargetPredicate = isGFX8Plus
} // End SchedRW = [Write64Bit]
} // End isReMaterializable = 1
@@ -528,7 +528,7 @@ def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst, SDPatternOperator op3> {
+ Instruction inst> {
def : GCNPat <
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
@@ -536,15 +536,15 @@ def : GCNPat <
}
-defm: Ternary_i16_Pats<mul, add, V_MAD_U16_e64, zext>;
-defm: Ternary_i16_Pats<mul, add, V_MAD_I16_e64, sext>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_U16_e64>;
+defm: Ternary_i16_Pats<mul, add, V_MAD_I16_e64>;
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
let Predicates = [Has16BitInsts, isGFX10Plus] in {
multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst, SDPatternOperator op3> {
+ Instruction inst> {
def : GCNPat <
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
@@ -552,8 +552,8 @@ def : GCNPat <
}
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64, zext>;
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_e64, sext>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
+defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_e64>;
} // End Predicates = [Has16BitInsts, isGFX10Plus]
@@ -656,10 +656,10 @@ class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instructio
(inst VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2)
>;
-def : ThreeOp_i32_Pats<shl, add, V_LSHL_ADD_U32_e64>;
-def : ThreeOp_i32_Pats<add, shl, V_ADD_LSHL_U32_e64>;
+def : ThreeOp_i32_Pats<cshl_32, add, V_LSHL_ADD_U32_e64>;
+def : ThreeOp_i32_Pats<add, cshl_32, V_ADD_LSHL_U32_e64>;
def : ThreeOp_i32_Pats<add, add, V_ADD3_U32_e64>;
-def : ThreeOp_i32_Pats<shl, or, V_LSHL_OR_B32_e64>;
+def : ThreeOp_i32_Pats<cshl_32, or, V_LSHL_OR_B32_e64>;
def : ThreeOp_i32_Pats<and, or, V_AND_OR_B32_e64>;
def : ThreeOp_i32_Pats<or, or, V_OR3_B32_e64>;
def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
@@ -667,6 +667,14 @@ def : ThreeOp_i32_Pats<xor, add, V_XAD_U32_e64>;
def : VOPBinOpClampPat<saddsat, V_ADD_I32_e64, i32>;
def : VOPBinOpClampPat<ssubsat, V_SUB_I32_e64, i32>;
+def : GCNPat<(getDivergentFrag<or>.ret (or_oneuse i64:$src0, i64:$src1), i64:$src2),
+ (REG_SEQUENCE VReg_64,
+ (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub0)),
+ (i32 (EXTRACT_SUBREG $src1, sub0)),
+ (i32 (EXTRACT_SUBREG $src2, sub0))), sub0,
+ (V_OR3_B32_e64 (i32 (EXTRACT_SUBREG $src0, sub1)),
+ (i32 (EXTRACT_SUBREG $src1, sub1)),
+ (i32 (EXTRACT_SUBREG $src2, sub1))), sub1)>;
// FIXME: Probably should hardcode clamp bit in pseudo and avoid this.
class OpSelBinOpClampPat<SDPatternOperator node,
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 48f5eb1dc272..32222b3eb93c 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -50,8 +50,7 @@ multiclass VOP3PInst<string OpName, VOPProfile P,
// Non-packed instructions that use the VOP3P encoding.
// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
-multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P,
- SDPatternOperator node = null_frag> {
+multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> {
def NAME : VOP3P_Pseudo<OpName, P> {
let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", "");
let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", "");
@@ -83,9 +82,9 @@ defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16
defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>>;
defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
-defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshl_rev>;
-defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, ashr_rev>;
-defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, lshr_rev>;
+defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, clshl_rev_16>;
+defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, cashr_rev_16>;
+defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3_Profile<VOP_V2I16_V2I16_V2I16>, clshr_rev_16>;
let SubtargetPredicate = HasVOP3PInsts in {
@@ -113,7 +112,6 @@ def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
} // End SubtargetPredicate = HasVOP3PInsts
multiclass MadFmaMixPats<SDPatternOperator fma_like,
- Instruction mix_inst,
Instruction mixlo_inst,
Instruction mixhi_inst> {
def : GCNPat <
@@ -192,7 +190,7 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
+defm : MadFmaMixPats<fmad, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
} // End SubtargetPredicate = HasMadMixInsts
@@ -211,7 +209,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
+defm : MadFmaMixPats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
// Defines patterns that extract signed 4bit from each Idx[0].
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 5f6f664ea3e7..a3eccf13cd71 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -57,8 +57,7 @@ class VOP_Pseudo <string opName, string suffix, VOPProfile P, dag outs, dag ins,
}
class VOP3Common <dag outs, dag ins, string asm = "",
- list<dag> pattern = [], bit HasMods = 0,
- bit VOP3Only = 0> :
+ list<dag> pattern = [], bit HasMods = 0> :
VOPAnyCommon <outs, ins, asm, pattern> {
// Using complex patterns gives VOP3 patterns a very high complexity rating,
@@ -83,7 +82,7 @@ class VOP3Common <dag outs, dag ins, string asm = "",
}
class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
- bit VOP3Only = 0, bit isVOP3P = 0, bit isVop3OpSel = 0> :
+ bit isVOP3P = 0, bit isVop3OpSel = 0> :
VOP_Pseudo <opName, "_e64", P, P.Outs64,
!if(isVop3OpSel,
P.InsVOP3OpSel,
@@ -136,7 +135,7 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
}
class VOP3P_Pseudo <string opName, VOPProfile P, list<dag> pattern = []> :
- VOP3_Pseudo<opName, P, pattern, 1, 1> {
+ VOP3_Pseudo<opName, P, pattern, 1> {
let VOP3P = 1;
}
@@ -760,10 +759,11 @@ class getNumNodeArgs<SDPatternOperator Op> {
int ret = TP.NumOperands;
}
-
class getDivergentFrag<SDPatternOperator Op> {
+ assert !or(!isa<SDNode>(Op), !isa<PatFrags>(Op)), "Expected SDNode or PatFrags";
- int NumSrcArgs = getNumNodeArgs<Op>.ret;
+ int NumSrcArgs = !if(!isa<SDNode>(Op), getNumNodeArgs<Op>.ret,
+ !size(!cast<PatFrags>(Op).Operands));
PatFrag ret = PatFrag <
!if(!eq(NumSrcArgs, 1),
(ops node:$src0),
diff --git a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
index 025b920ff7b4..0390c01eecb1 100644
--- a/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
+++ b/llvm/lib/Target/ARC/ARCAsmPrinter.cpp
@@ -22,7 +22,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARC/ARCExpandPseudos.cpp b/llvm/lib/Target/ARC/ARCExpandPseudos.cpp
index a1646d17605f..84bb6cac2876 100644
--- a/llvm/lib/Target/ARC/ARCExpandPseudos.cpp
+++ b/llvm/lib/Target/ARC/ARCExpandPseudos.cpp
@@ -13,6 +13,7 @@
#include "ARCInstrInfo.h"
#include "ARCRegisterInfo.h"
#include "ARCSubtarget.h"
+#include "MCTargetDesc/ARCInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -34,7 +35,9 @@ public:
StringRef getPassName() const override { return "ARC Expand Pseudos"; }
private:
- void ExpandStore(MachineFunction &, MachineBasicBlock::iterator);
+ void expandStore(MachineFunction &, MachineBasicBlock::iterator);
+ void expandCTLZ(MachineFunction &, MachineBasicBlock::iterator);
+ void expandCTTZ(MachineFunction &, MachineBasicBlock::iterator);
const ARCInstrInfo *TII;
};
@@ -56,11 +59,11 @@ static unsigned getMappedOp(unsigned PseudoOp) {
}
}
-void ARCExpandPseudos::ExpandStore(MachineFunction &MF,
+void ARCExpandPseudos::expandStore(MachineFunction &MF,
MachineBasicBlock::iterator SII) {
MachineInstr &SI = *SII;
- unsigned AddrReg = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
- unsigned AddOpc =
+ Register AddrReg = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+ Register AddOpc =
isUInt<6>(SI.getOperand(2).getImm()) ? ARC::ADD_rru6 : ARC::ADD_rrlimm;
BuildMI(*SI.getParent(), SI, SI.getDebugLoc(), TII->get(AddOpc), AddrReg)
.addReg(SI.getOperand(1).getReg())
@@ -73,10 +76,62 @@ void ARCExpandPseudos::ExpandStore(MachineFunction &MF,
SI.eraseFromParent();
}
+void ARCExpandPseudos::expandCTLZ(MachineFunction &MF,
+ MachineBasicBlock::iterator MII) {
+ // Expand:
+ // %R2<def> = CTLZ %R0, %STATUS<imp-def>
+ // To:
+ // %R2<def> = FLS_f_rr %R0, %STATUS<imp-def>
+ // %R2<def,tied1> = MOV_cc_ru6 %R2<tied0>, 32, pred:1, %STATUS<imp-use>
+ // %R2<def,tied1> = RSUB_cc_rru6 %R2<tied0>, 31, pred:2, %STATUS<imp-use>
+ MachineInstr &MI = *MII;
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
+ Register Ra = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+ Register Rb = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::FLS_f_rr), Ra)
+ .add(Src);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::MOV_cc_ru6), Rb)
+ .addImm(32)
+ .addImm(ARCCC::EQ)
+ .addReg(Ra);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::RSUB_cc_rru6))
+ .add(Dest)
+ .addImm(31)
+ .addImm(ARCCC::NE)
+ .addReg(Rb);
+
+ MI.eraseFromParent();
+}
+
+void ARCExpandPseudos::expandCTTZ(MachineFunction &MF,
+ MachineBasicBlock::iterator MII) {
+ // Expand:
+ // %R0<def> = CTTZ %R0<kill>, %STATUS<imp-def>
+ // To:
+ // %R0<def> = FFS_f_rr %R0<kill>, %STATUS<imp-def>
+ // %R0<def,tied1> = MOVcc_ru6 %R0<tied0>, 32, pred:1, %STATUS<imp-use>
+ MachineInstr &MI = *MII;
+ const MachineOperand &Dest = MI.getOperand(0);
+ const MachineOperand &Src = MI.getOperand(1);
+ Register R = MF.getRegInfo().createVirtualRegister(&ARC::GPR32RegClass);
+
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::FFS_f_rr), R)
+ .add(Src);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(ARC::MOV_cc_ru6))
+ .add(Dest)
+ .addImm(32)
+ .addImm(ARCCC::EQ)
+ .addReg(R);
+
+ MI.eraseFromParent();
+}
+
bool ARCExpandPseudos::runOnMachineFunction(MachineFunction &MF) {
const ARCSubtarget *STI = &MF.getSubtarget<ARCSubtarget>();
TII = STI->getInstrInfo();
- bool ExpandedStore = false;
+ bool Expanded = false;
for (auto &MBB : MF) {
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
@@ -85,8 +140,16 @@ bool ARCExpandPseudos::runOnMachineFunction(MachineFunction &MF) {
case ARC::ST_FAR:
case ARC::STH_FAR:
case ARC::STB_FAR:
- ExpandStore(MF, MBBI);
- ExpandedStore = true;
+ expandStore(MF, MBBI);
+ Expanded = true;
+ break;
+ case ARC::CTLZ:
+ expandCTLZ(MF, MBBI);
+ Expanded = true;
+ break;
+ case ARC::CTTZ:
+ expandCTTZ(MF, MBBI);
+ Expanded = true;
break;
default:
break;
@@ -94,7 +157,7 @@ bool ARCExpandPseudos::runOnMachineFunction(MachineFunction &MF) {
MBBI = NMBBI;
}
}
- return ExpandedStore;
+ return Expanded;
}
FunctionPass *llvm::createARCExpandPseudosPass() {
diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp
index ca33f5297471..7fd08f70ea3b 100644
--- a/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -68,6 +68,31 @@ static ARCCC::CondCode ISDCCtoARCCC(ISD::CondCode isdCC) {
}
}
+void ARCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ LLVM_DEBUG(dbgs() << "[ARC-ISEL] ReplaceNodeResults ");
+ LLVM_DEBUG(N->dump(&DAG));
+ LLVM_DEBUG(dbgs() << "; use_count=" << N->use_size() << "\n");
+
+ switch (N->getOpcode()) {
+ case ISD::READCYCLECOUNTER:
+ if (N->getValueType(0) == MVT::i64) {
+ // We read the TIMER0 and zero-extend it to 64-bits as the intrinsic
+ // requires.
+ SDValue V =
+ DAG.getNode(ISD::READCYCLECOUNTER, SDLoc(N),
+ DAG.getVTList(MVT::i32, MVT::Other), N->getOperand(0));
+ SDValue Op = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i64, V);
+ Results.push_back(Op);
+ Results.push_back(V.getValue(1));
+ }
+ break;
+ default:
+ break;
+ }
+}
+
ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM,
const ARCSubtarget &Subtarget)
: TargetLowering(TM), Subtarget(Subtarget) {
@@ -96,6 +121,11 @@ ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMAX, MVT::i32, Legal);
setOperationAction(ISD::SMIN, MVT::i32, Legal);
+ setOperationAction(ISD::ADDC, MVT::i32, Legal);
+ setOperationAction(ISD::ADDE, MVT::i32, Legal);
+ setOperationAction(ISD::SUBC, MVT::i32, Legal);
+ setOperationAction(ISD::SUBE, MVT::i32, Legal);
+
// Need barrel shifter.
setOperationAction(ISD::SHL, MVT::i32, Legal);
setOperationAction(ISD::SRA, MVT::i32, Legal);
@@ -135,6 +165,15 @@ ARCTargetLowering::ARCTargetLowering(const TargetMachine &TM,
// Sign extend inreg
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+
+ // TODO: Predicate these with `options.hasBitScan() ? Legal : Expand`
+ // when the HasBitScan predicate is available.
+ setOperationAction(ISD::CTLZ, MVT::i32, Legal);
+ setOperationAction(ISD::CTTZ, MVT::i32, Legal);
+
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i32, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
+ isTypeLegal(MVT::i64) ? Legal : Custom);
}
const char *ARCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -495,7 +534,7 @@ SDValue ARCTargetLowering::LowerCallArguments(
CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1));
}
} else {
- // sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getStoreSize();
@@ -761,6 +800,13 @@ SDValue ARCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerJumpTable(Op, DAG);
case ISD::VASTART:
return LowerVASTART(Op, DAG);
+ case ISD::READCYCLECOUNTER:
+ // As of LLVM 3.8, the lowering code insists that we customize it even
+ // though we've declared the i32 version as legal. This is because it only
+ // thinks i64 is the truly supported version. We've already converted the
+ // i64 version to a widened i32.
+ assert(Op.getSimpleValueType() == MVT::i32);
+ return Op;
default:
llvm_unreachable("unimplemented operand");
}
diff --git a/llvm/lib/Target/ARC/ARCISelLowering.h b/llvm/lib/Target/ARC/ARCISelLowering.h
index 4b72bfdaee9c..e070ed8752cc 100644
--- a/llvm/lib/Target/ARC/ARCISelLowering.h
+++ b/llvm/lib/Target/ARC/ARCISelLowering.h
@@ -77,6 +77,9 @@ public:
private:
const ARCSubtarget &Subtarget;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+
// Lower Operand helpers
SDValue LowerCallArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
diff --git a/llvm/lib/Target/ARC/ARCInstrFormats.td b/llvm/lib/Target/ARC/ARCInstrFormats.td
index 5f539c92c745..2a109cc0f764 100644
--- a/llvm/lib/Target/ARC/ARCInstrFormats.td
+++ b/llvm/lib/Target/ARC/ARCInstrFormats.td
@@ -261,32 +261,6 @@ class F32_SOP_RR<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
let Inst{5-0} = subop;
}
-// Single Operand Immediate Instructions.
-// 1-register, unsigned 6-bit immediate Single Operand instruction with
-// condition code.
-// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
-// |B[2-0] | 1| 1| subop| F|B[5-3] |U6 |1|cc |
-class F32_SOP_CC_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
- string asmstr, list<dag> pattern> :
- InstARC<4, outs, ins, asmstr, pattern> {
-
- bits<5> cc;
- bits<6> U6;
- bits<6> B;
-
- let Inst{31-27} = major;
- let Inst{26-24} = B{2-0};
- let Inst{23-22} = 0b11;
- let Inst{21-16} = subop;
- let Inst{15} = F;
- let Inst{14-12} = B{5-3};
- let Inst{11-6} = U6;
- let Inst{5} = 1;
- let Inst{4-0} = cc;
-
- let DecoderMethod = "DecodeCCRU6Instruction";
-}
-
// Dual Operand Instructions. Inst[21-16] specifies the specific operation
// for this format.
@@ -353,6 +327,31 @@ class F32_DOP_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
let Inst{5-0} = A;
}
+// 1-register, unsigned 6-bit, immediate Dual Operand instruction with
+// condition code.
+// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
+// |B[2-0] | 1| 1| subop| F|B[5-3] |U6 |1|cc |
+class F32_DOP_CC_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ InstARC<4, outs, ins, asmstr, pattern> {
+
+ bits<5> cc;
+ bits<6> U6;
+ bits<6> B;
+
+ let Inst{31-27} = major;
+ let Inst{26-24} = B{2-0};
+ let Inst{23-22} = 0b11;
+ let Inst{21-16} = subop;
+ let Inst{15} = F;
+ let Inst{14-12} = B{5-3};
+ let Inst{11-6} = U6;
+ let Inst{5} = 1;
+ let Inst{4-0} = cc;
+
+ let DecoderMethod = "DecodeCCRU6Instruction";
+}
+
// 2-register, unsigned 6-bit immediate Dual Operand instruction with
// condition code. This instruction uses B as the first 2 operands
// (i.e, add.cc B, B, u6).
@@ -364,7 +363,6 @@ class F32_DOP_CC_RRU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
bits<5> cc;
bits<6> U6;
bits<6> B;
- bits<6> A;
let Inst{31-27} = major;
let Inst{26-24} = B{2-0};
@@ -397,6 +395,50 @@ class F32_DOP_RS12<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
let Inst{5-0} = S12{11-6};
}
+// 1-register, signed 12-bit immediate Dual Operand instruction.
+// This instruction uses B as the first operand (i.e., lr B, [%count0]).
+// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
+// |B[2-0] | 1| 0| subop| F|B[5-3] |S12[5-0] |S12[11-6] |
+class F32_SOP_RS12<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ InstARC<4, outs, ins, asmstr, pattern> {
+ bits<6> B;
+ bits<12> S12;
+
+ let Inst{31-27} = major;
+ let Inst{26-24} = B{2-0};
+ let Inst{23-22} = 0b10;
+ let Inst{21-16} = subop;
+ let Inst{15} = F;
+ let Inst{14-12} = B{5-3};
+ let Inst{11-6} = S12{5-0};
+ let Inst{5-0} = S12{11-6};
+
+ let DecoderMethod = "DecodeSOPwithRS12";
+}
+
+// 1-register, unsigned 6-bit immediate Dual Operand instruction.
+// This instruction uses B as the first operand.
+// |26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|11|10|9|8|7|6|5|4|3|2|1|0|
+// |B[2-0] | 0| 1| subop| F|B[5-3] |U6 |0|0|0|0|0|0|
+class F32_SOP_RU6<bits<5> major, bits<6> subop, bit F, dag outs, dag ins,
+ string asmstr, list<dag> pattern> :
+ InstARC<4, outs, ins, asmstr, pattern> {
+ bits<6> B;
+ bits<6> U6;
+
+ let Inst{31-27} = major;
+ let Inst{26-24} = B{2-0};
+ let Inst{23-22} = 0b01;
+ let Inst{21-16} = subop;
+ let Inst{15} = F;
+ let Inst{14-12} = B{5-3};
+ let Inst{11-6} = U6;
+ let Inst{5-0} = 0;
+
+ let DecoderMethod = "DecodeSOPwithRU6";
+}
+
// 2-register, 32-bit immediate (LImm) Dual Operand instruction.
// This instruction has the 32-bit immediate in bits 32-63, and
// 62 in the C register operand slot, but is otherwise F32_DOP_RR.
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.cpp b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
index 527f239c2643..6e8190ee7209 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.cpp
@@ -18,8 +18,8 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -43,8 +43,8 @@ enum TSFlagsConstants {
// Pin the vtable to this file.
void ARCInstrInfo::anchor() {}
-ARCInstrInfo::ARCInstrInfo()
- : ARCGenInstrInfo(ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), RI() {}
+ARCInstrInfo::ARCInstrInfo(const ARCSubtarget &ST)
+ : ARCGenInstrInfo(ARC::ADJCALLSTACKDOWN, ARC::ADJCALLSTACKUP), RI(ST) {}
static bool isZeroImm(const MachineOperand &Op) {
return Op.isImm() && Op.getImm() == 0;
@@ -99,7 +99,7 @@ unsigned ARCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
}
/// Return the inverse of passed condition, i.e. turning COND_E to COND_NE.
-static ARCCC::CondCode GetOppositeBranchCondition(ARCCC::CondCode CC) {
+static ARCCC::CondCode getOppositeBranchCondition(ARCCC::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Illegal condition code!");
@@ -280,23 +280,23 @@ unsigned ARCInstrInfo::removeBranch(MachineBasicBlock &MBB,
void ARCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- const DebugLoc &dl, MCRegister DestReg,
+ const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
assert(ARC::GPR32RegClass.contains(SrcReg) &&
"Only GPR32 src copy supported.");
assert(ARC::GPR32RegClass.contains(DestReg) &&
"Only GPR32 dest copy supported.");
- BuildMI(MBB, I, dl, get(ARC::MOV_rr), DestReg)
+ BuildMI(MBB, I, DL, get(ARC::MOV_rr), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
void ARCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
- Register SrcReg, bool isKill,
+ Register SrcReg, bool IsKill,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc dl = MBB.findDebugLoc(I);
+ DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -312,8 +312,8 @@ void ARCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
"Only support GPR32 stores to stack now.");
LLVM_DEBUG(dbgs() << "Created store reg=" << printReg(SrcReg, TRI)
<< " to FrameIndex=" << FrameIndex << "\n");
- BuildMI(MBB, I, dl, get(ARC::ST_rs9))
- .addReg(SrcReg, getKillRegState(isKill))
+ BuildMI(MBB, I, DL, get(ARC::ST_rs9))
+ .addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FrameIndex)
.addImm(0)
.addMemOperand(MMO);
@@ -324,7 +324,7 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Register DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- DebugLoc dl = MBB.findDebugLoc(I);
+ DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -339,7 +339,7 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
"Only support GPR32 stores to stack now.");
LLVM_DEBUG(dbgs() << "Created load reg=" << printReg(DestReg, TRI)
<< " from FrameIndex=" << FrameIndex << "\n");
- BuildMI(MBB, I, dl, get(ARC::LD_rs9))
+ BuildMI(MBB, I, DL, get(ARC::LD_rs9))
.addReg(DestReg, RegState::Define)
.addFrameIndex(FrameIndex)
.addImm(0)
@@ -350,7 +350,7 @@ void ARCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
bool ARCInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
assert((Cond.size() == 3) && "Invalid ARC branch condition!");
- Cond[2].setImm(GetOppositeBranchCondition((ARCCC::CondCode)Cond[2].getImm()));
+ Cond[2].setImm(getOppositeBranchCondition((ARCCC::CondCode)Cond[2].getImm()));
return false;
}
@@ -358,9 +358,9 @@ MachineBasicBlock::iterator
ARCInstrInfo::loadImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, unsigned Reg,
uint64_t Value) const {
- DebugLoc dl = MBB.findDebugLoc(MI);
+ DebugLoc DL = MBB.findDebugLoc(MI);
if (isInt<12>(Value)) {
- return BuildMI(MBB, MI, dl, get(ARC::MOV_rs12), Reg)
+ return BuildMI(MBB, MI, DL, get(ARC::MOV_rs12), Reg)
.addImm(Value)
.getInstr();
}
@@ -371,7 +371,7 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &dl, int *BytesAdded) const {
+ const DebugLoc &DL, int *BytesAdded) const {
assert(!BytesAdded && "Code size not handled.");
// Shouldn't be a fall through.
@@ -380,11 +380,11 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
"ARC branch conditions have two components!");
if (Cond.empty()) {
- BuildMI(&MBB, dl, get(ARC::BR)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(ARC::BR)).addMBB(TBB);
return 1;
}
int BccOpc = Cond[1].isImm() ? ARC::BRcc_ru6_p : ARC::BRcc_rr_p;
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(BccOpc));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(BccOpc));
MIB.addMBB(TBB);
for (unsigned i = 0; i < 3; i++) {
MIB.add(Cond[i]);
@@ -396,7 +396,7 @@ unsigned ARCInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
// Two-way conditional branch.
- BuildMI(&MBB, dl, get(ARC::BR)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(ARC::BR)).addMBB(FBB);
return 2;
}
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.h b/llvm/lib/Target/ARC/ARCInstrInfo.h
index 4f6122daf91f..ebc02a93b124 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.h
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.h
@@ -28,7 +28,7 @@ class ARCInstrInfo : public ARCGenInstrInfo {
virtual void anchor();
public:
- ARCInstrInfo();
+ ARCInstrInfo(const ARCSubtarget &);
const ARCRegisterInfo &getRegisterInfo() const { return RI; }
@@ -57,19 +57,19 @@ public:
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
- const DebugLoc &dl,
+ const DebugLoc &,
int *BytesAdded = nullptr) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- const DebugLoc &dl, MCRegister DestReg, MCRegister SrcReg,
+ const DebugLoc &, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, Register SrcReg,
- bool isKill, int FrameIndex,
+ bool IsKill, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
diff --git a/llvm/lib/Target/ARC/ARCInstrInfo.td b/llvm/lib/Target/ARC/ARCInstrInfo.td
index ea3e41621323..4a0bc5cf7421 100644
--- a/llvm/lib/Target/ARC/ARCInstrInfo.td
+++ b/llvm/lib/Target/ARC/ARCInstrInfo.td
@@ -45,7 +45,6 @@ def SDT_ARCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
def SDT_ARCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
-
// Global Address.
def ARCGAWrapper : SDNode<"ARCISD::GAWRAPPER", SDT_ARCmov, []>;
@@ -80,6 +79,12 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARCCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
//===----------------------------------------------------------------------===//
+// Instruction predicates
+//===----------------------------------------------------------------------===//
+
+def HasNorm : Predicate<"Subtarget->hasNorm()">;
+
+//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
@@ -128,6 +133,19 @@ def STB_FAR : PseudoInstARC<(outs), (ins GPR32:$dst, MEMrlimm:$addr),
"STB_FAR $dst, $addr",
[(truncstorei8 GPR32:$dst, AddrModeFar:$addr)]>;
+// TODO: Add `Requires<[HasBitScan]>` predicate to these when available.
+let Defs = [STATUS32] in {
+ def CTLZ : PseudoInstARC<(outs GPR32:$A),
+ (ins GPR32:$B),
+ "error.fls $A, $B",
+ [(set GPR32:$A, (ctlz i32:$B))]>;
+
+ def CTTZ : PseudoInstARC<(outs GPR32:$A),
+ (ins GPR32:$B),
+ "error.ffs $A, $B",
+ [(set GPR32:$A, (cttz i32:$B))]>;
+}
+
//===----------------------------------------------------------------------===//
// Instruction Generation multiclasses.
// Generate many variants of a single instruction with a single defining
@@ -252,6 +270,19 @@ multiclass MultiPat<SDPatternOperator InFrag,
def _rrlimm : Pat<(InFrag i32:$B, imm32:$LImm), (RRLImm i32:$B, imm32:$LImm)>;
}
+// NOTE: This could be specialized later with a custom `PrintMethod` for
+// displaying the aux register name. E.g. `[%count0]` instead of [33].
+def AuxReg : Operand<i32>;
+
+def LR_rs12 : F32_SOP_RS12<0b00100, 0b101010, 0,
+ (outs GPR32:$B), (ins AuxReg:$C),
+ "lr\t$B, [$C]", []>;
+def LR_ru6 : F32_SOP_RU6<0b00100, 0b101010, 0,
+ (outs GPR32:$B), (ins AuxReg:$C),
+ "lr\t$B, [$C]", []>;
+
+def: Pat<(i32 readcyclecounter), (LR_rs12 0x21) >; // read timer
+
// ---------------------------------------------------------------------------
// Instruction definitions and patterns for 3 operand binary instructions.
// ---------------------------------------------------------------------------
@@ -276,6 +307,10 @@ defm MPY : ArcBinaryGEN4Inst<0b011010, "mpy",1>;
defm MPYM : ArcBinaryGEN4Inst<0b011011, "mpym",1>;
defm MPYMU : ArcBinaryGEN4Inst<0b011100, "mpymu",1>;
defm SETEQ : ArcBinaryGEN4Inst<0b111000, "seteq",1>;
+let Uses=[STATUS32], isAsCheapAsAMove=0, isReMaterializable=0 in {
+ defm ADC : ArcBinaryGEN4Inst<0b000001, "adc",1>;
+ defm SBC : ArcBinaryGEN4Inst<0b000011, "sbc">;
+}
// Patterns for 3 operand binary instructions.
defm : MultiPat<add, ADD_rrr, ADD_rru6, ADD_rrlimm>;
@@ -293,6 +328,11 @@ defm : MultiPat<mul, MPY_rrr, MPY_rru6, MPY_rrlimm>;
defm : MultiPat<mulhs, MPYM_rrr, MPYM_rru6, MPYM_rrlimm>;
defm : MultiPat<mulhu, MPYMU_rrr, MPYMU_rru6, MPYMU_rrlimm>;
+defm : MultiPat<addc, ADD_f_rrr, ADD_f_rru6, ADD_f_rrlimm>;
+defm : MultiPat<adde, ADC_f_rrr, ADC_f_rru6, ADC_f_rrlimm>;
+defm : MultiPat<subc, SUB_f_rrr, SUB_f_rru6, SUB_f_rrlimm>;
+defm : MultiPat<sube, SBC_f_rrr, SBC_f_rru6, SBC_f_rrlimm>;
+
// ---------------------------------------------------------------------------
// Unary Instruction definitions.
// ---------------------------------------------------------------------------
@@ -301,8 +341,14 @@ defm SEXB : ArcUnaryGEN4Inst<0b000101, "sexb">;
defm SEXH : ArcUnaryGEN4Inst<0b000110, "sexh">;
// Extension unary instruction definitions.
+defm FFS : ArcUnaryEXT5Inst<0b010010, "ffs">;
defm FLS : ArcUnaryEXT5Inst<0b010011, "fls">;
+let Predicates=[HasNorm] in {
+ defm NORM : ArcUnaryEXT5Inst<0b000001,"norm">;
+ defm NORMH : ArcUnaryEXT5Inst<0b001000,"normh">;
+}
+
// General Unary Instruction fragments.
def : Pat<(sext_inreg i32:$a, i8), (SEXB_rr i32:$a)>;
def : Pat<(sext_inreg i32:$a, i16), (SEXH_rr i32:$a)>;
@@ -337,24 +383,30 @@ def MOV_ru6 : F32_DOP_RU6<0b00100, 0b001010, 0,
(outs GPR32:$B), (ins immU6:$U6),
"mov\t$B, $U6", []>;
+def MOV_f_ru6 : F32_DOP_RU6<0b00100, 0b001010, 1,
+ (outs GPR32:$B), (ins u6:$U6),
+ "mov.f\t$B, $U6", []> {
+ let isAsCheapAsAMove=1;
+ let Defs = [STATUS32];
+}
+
def cmov : PatFrag<(ops node:$op1, node:$op2, node:$cc),
(ARCcmov $op1, $op2, $cc)>;
-let Uses = [STATUS32] in {
- def MOVcc : F32_DOP_CC_RR<0b00100, 0b001010, 0,
- (outs GPR32:$B),
- (ins GPR32:$C, GPR32:$fval, cmovpred:$cc),
- !strconcat("mov.", "$cc\t$B, $C"),
- [(set GPR32:$B, (cmov i32:$C, i32:$fval, cmovpred:$cc))]> {
- let Constraints = "$B = $fval";
- }
-
- def MOVcc_ru6 : F32_SOP_CC_RU6<0b00100, 0b001010, 0,
- (outs GPR32:$b), (ins u6:$c, CCOp:$cc, GPR32:$b2),
- "mov.$cc\t$b, $c", []> {
- let isAsCheapAsAMove=0;
- let isPredicable=1;
- let isReMaterializable=0;
- let Constraints="$b2 = $b";
+let Uses = [STATUS32], isAsCheapAsAMove = 1, isPredicable=1,
+ isReMaterializable = 0, Constraints = "$B = $B2" in {
+ def MOV_cc : F32_DOP_CC_RR<0b00100, 0b001010, 0,
+ (outs GPR32:$B), (ins GPR32:$C, GPR32:$B2, cmovpred:$cc),
+ "mov.$cc\t$B, $C",
+ [(set GPR32:$B, (cmov i32:$C, i32:$B2, cmovpred:$cc))]>;
+
+ def MOV_cc_ru6 : F32_DOP_CC_RU6<0b00100, 0b001010, 0,
+ (outs GPR32:$B), (ins u6:$C, CCOp:$cc, GPR32:$B2),
+ "mov.$cc\t$B, $C", []>;
+
+ def MOV_cc_f_ru6 : F32_DOP_CC_RU6<0b00100, 0b001010, 1,
+ (outs GPR32:$B), (ins u6:$C, CCOp:$cc, GPR32:$B2),
+ "mov.$cc.f\t$B, $C", []> {
+ let Defs = [STATUS32];
}
}
diff --git a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
index 232a7be2a9f5..c956f00b628d 100644
--- a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
+++ b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -33,6 +34,16 @@ using namespace llvm;
#define DEBUG_TYPE "arc-addr-mode"
namespace llvm {
+
+static cl::opt<unsigned> ArcKillAddrMode("arc-kill-addr-mode", cl::init(0),
+ cl::ReallyHidden, cl::ZeroOrMore);
+
+#define DUMP_BEFORE() ((ArcKillAddrMode & 0x0001) != 0)
+#define DUMP_AFTER() ((ArcKillAddrMode & 0x0002) != 0)
+#define VIEW_BEFORE() ((ArcKillAddrMode & 0x0004) != 0)
+#define VIEW_AFTER() ((ArcKillAddrMode & 0x0008) != 0)
+#define KILL_PASS() ((ArcKillAddrMode & 0x0010) != 0)
+
FunctionPass *createARCOptAddrMode();
void initializeARCOptAddrModePass(PassRegistry &);
} // end namespace llvm
@@ -73,9 +84,9 @@ private:
// instruction \p To
bool canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
- // Returns true if load/store instruction \p Ldst can be sunk down
- // to instruction \p To
- bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
+ // // Returns true if load/store instruction \p Ldst can be sunk down
+ // // to instruction \p To
+ // bool canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To);
// Check if instructions \p Ldst and \p Add can be moved to become adjacent
// If they can return instruction which need not to move.
@@ -413,30 +424,30 @@ bool ARCOptAddrMode::canHoistLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
return true;
}
-bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
- // Can only sink load/store within same BB
- if (Ldst->getParent() != To->getParent())
- return false;
- MachineBasicBlock::const_iterator MI(Ldst), ME(To),
- End(Ldst->getParent()->end());
-
- bool IsStore = Ldst->mayStore();
- bool IsLoad = Ldst->mayLoad();
-
- Register ValReg = IsLoad ? Ldst->getOperand(0).getReg() : Register();
- for (; MI != ME && MI != End; ++MI) {
- if (MI->isDebugValue())
- continue;
- if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
- MI->hasUnmodeledSideEffects())
- return false;
- if (IsStore && MI->mayLoad())
- return false;
- if (ValReg && MI->readsVirtualRegister(ValReg))
- return false;
- }
- return true;
-}
+// bool ARCOptAddrMode::canSinkLoadStoreTo(MachineInstr *Ldst, MachineInstr *To) {
+// // Can only sink load/store within same BB
+// if (Ldst->getParent() != To->getParent())
+// return false;
+// MachineBasicBlock::const_iterator MI(Ldst), ME(To),
+// End(Ldst->getParent()->end());
+
+// bool IsStore = Ldst->mayStore();
+// bool IsLoad = Ldst->mayLoad();
+
+// Register ValReg = IsLoad ? Ldst->getOperand(0).getReg() : Register();
+// for (; MI != ME && MI != End; ++MI) {
+// if (MI->isDebugValue())
+// continue;
+// if (MI->mayStore() || MI->isCall() || MI->isInlineAsm() ||
+// MI->hasUnmodeledSideEffects())
+// return false;
+// if (IsStore && MI->mayLoad())
+// return false;
+// if (ValReg && MI->readsVirtualRegister(ValReg))
+// return false;
+// }
+// return true;
+// }
void ARCOptAddrMode::changeToAddrMode(MachineInstr &Ldst, unsigned NewOpcode,
unsigned NewBase,
@@ -485,9 +496,16 @@ bool ARCOptAddrMode::processBasicBlock(MachineBasicBlock &MBB) {
}
bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
+ if (skipFunction(MF.getFunction()) || KILL_PASS())
return false;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (DUMP_BEFORE())
+ MF.dump();
+#endif
+ if (VIEW_BEFORE())
+ MF.viewCFG();
+
AST = &MF.getSubtarget<ARCSubtarget>();
AII = AST->getInstrInfo();
MRI = &MF.getRegInfo();
@@ -496,6 +514,13 @@ bool ARCOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (auto &MBB : MF)
Changed |= processBasicBlock(MBB);
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ if (DUMP_AFTER())
+ MF.dump();
+#endif
+ if (VIEW_AFTER())
+ MF.viewCFG();
return Changed;
}
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.cpp b/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
index fb84dd9b266a..91ddd7fe36e1 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -35,19 +35,19 @@ using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "ARCGenRegisterInfo.inc"
-static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
+static void replaceFrameIndex(MachineBasicBlock::iterator II,
const ARCInstrInfo &TII, unsigned Reg,
unsigned FrameReg, int Offset, int StackSize,
int ObjSize, RegScavenger *RS, int SPAdj) {
assert(RS && "Need register scavenger.");
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
- DebugLoc dl = MI.getDebugLoc();
+ DebugLoc DL = MI.getDebugLoc();
unsigned BaseReg = FrameReg;
unsigned KillState = 0;
if (MI.getOpcode() == ARC::LD_rs9 && (Offset >= 256 || Offset < -256)) {
// Loads can always be reached with LD_rlimm.
- BuildMI(MBB, II, dl, TII.get(ARC::LD_rlimm), Reg)
+ BuildMI(MBB, II, DL, TII.get(ARC::LD_rlimm), Reg)
.addReg(BaseReg)
.addImm(Offset)
.addMemOperand(*MI.memoperands_begin());
@@ -72,7 +72,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
RS->setRegUsed(BaseReg);
}
unsigned AddOpc = isUInt<6>(Offset) ? ARC::ADD_rru6 : ARC::ADD_rrlimm;
- BuildMI(MBB, II, dl, TII.get(AddOpc))
+ BuildMI(MBB, II, DL, TII.get(AddOpc))
.addReg(BaseReg, RegState::Define)
.addReg(FrameReg)
.addImm(Offset);
@@ -90,7 +90,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
case ARC::LDB_rs9:
case ARC::LDB_X_rs9:
LLVM_DEBUG(dbgs() << "Building LDFI\n");
- BuildMI(MBB, II, dl, TII.get(MI.getOpcode()), Reg)
+ BuildMI(MBB, II, DL, TII.get(MI.getOpcode()), Reg)
.addReg(BaseReg, KillState)
.addImm(Offset)
.addMemOperand(*MI.memoperands_begin());
@@ -103,7 +103,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
LLVM_FALLTHROUGH;
case ARC::STB_rs9:
LLVM_DEBUG(dbgs() << "Building STFI\n");
- BuildMI(MBB, II, dl, TII.get(MI.getOpcode()))
+ BuildMI(MBB, II, DL, TII.get(MI.getOpcode()))
.addReg(Reg, getKillRegState(MI.getOperand(0).isKill()))
.addReg(BaseReg, KillState)
.addImm(Offset)
@@ -111,7 +111,7 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
break;
case ARC::GETFI:
LLVM_DEBUG(dbgs() << "Building GETFI\n");
- BuildMI(MBB, II, dl,
+ BuildMI(MBB, II, DL,
TII.get(isUInt<6>(Offset) ? ARC::ADD_rru6 : ARC::ADD_rrlimm))
.addReg(Reg, RegState::Define)
.addReg(FrameReg)
@@ -125,7 +125,8 @@ static void ReplaceFrameIndex(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-ARCRegisterInfo::ARCRegisterInfo() : ARCGenRegisterInfo(ARC::BLINK) {}
+ARCRegisterInfo::ARCRegisterInfo(const ARCSubtarget &ST)
+ : ARCGenRegisterInfo(ARC::BLINK), ST(ST) {}
bool ARCRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
return MF.needsFrameMoves();
@@ -145,6 +146,7 @@ BitVector ARCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(ARC::R25);
Reserved.set(ARC::BLINK);
Reserved.set(ARC::FP);
+
return Reserved;
}
@@ -214,7 +216,7 @@ void ARCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
"FP Offset not in bounds.");
}
}
- ReplaceFrameIndex(II, TII, Reg, getFrameRegister(MF), Offset, StackSize,
+ replaceFrameIndex(II, TII, Reg, getFrameRegister(MF), Offset, StackSize,
ObjSize, RS, SPAdj);
}
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.h b/llvm/lib/Target/ARC/ARCRegisterInfo.h
index f8bca11fdbc8..b1ae6b69398f 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.h
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.h
@@ -21,10 +21,13 @@
namespace llvm {
class TargetInstrInfo;
+class ARCSubtarget;
struct ARCRegisterInfo : public ARCGenRegisterInfo {
+ const ARCSubtarget &ST;
+
public:
- ARCRegisterInfo();
+ ARCRegisterInfo(const ARCSubtarget &);
/// Code Generation virtual methods...
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.td b/llvm/lib/Target/ARC/ARCRegisterInfo.td
index 5f2bc7974dde..4b686e4bda64 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.td
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.td
@@ -21,56 +21,56 @@ class Core<int num, string n, list<string>altNames=[]> : ARCReg<n, altNames> {
let HWEncoding = num;
}
-class Status<string n> : ARCReg<n, []> {
+// Auxilary register
+class Aux<int num, string n, list<string> altNames=[]> : ARCReg<n, altNames> {
+ let HWEncoding = num;
}
// Integer registers
-def R0 : Core< 0, "%r0">, DwarfRegNum<[0]>;
-def R1 : Core< 1, "%r1">, DwarfRegNum<[1]>;
-def R2 : Core< 2, "%r2">, DwarfRegNum<[2]>;
-def R3 : Core< 3, "%r3">, DwarfRegNum<[3]>;
+foreach i = 0 - 3 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
+
let CostPerUse=[1] in {
-def R4 : Core< 4, "%r4">, DwarfRegNum<[4]>;
-def R5 : Core< 5, "%r5">, DwarfRegNum<[5]>;
-def R6 : Core< 6, "%r6">, DwarfRegNum<[6]>;
-def R7 : Core< 7, "%r7">, DwarfRegNum<[7]>;
-def R8 : Core< 8, "%r8">, DwarfRegNum<[8]>;
-def R9 : Core< 9, "%r9">, DwarfRegNum<[9]>;
-def R10 : Core<10, "%r10">, DwarfRegNum<[10]>;
-def R11 : Core<11, "%r11">, DwarfRegNum<[11]>;
+ foreach i = 4 - 11 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
}
-def R12 : Core<12, "%r12">, DwarfRegNum<[12]>;
-def R13 : Core<13, "%r13">, DwarfRegNum<[13]>;
-def R14 : Core<14, "%r14">, DwarfRegNum<[14]>;
-def R15 : Core<15, "%r15">, DwarfRegNum<[15]>;
+
+foreach i = 12 - 15 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
let CostPerUse=[1] in {
-def R16 : Core<16, "%r16">, DwarfRegNum<[16]>;
-def R17 : Core<17, "%r17">, DwarfRegNum<[17]>;
-def R18 : Core<18, "%r18">, DwarfRegNum<[18]>;
-def R19 : Core<19, "%r19">, DwarfRegNum<[19]>;
-def R20 : Core<20, "%r20">, DwarfRegNum<[20]>;
-def R21 : Core<21, "%r21">, DwarfRegNum<[21]>;
-def R22 : Core<22, "%r22">, DwarfRegNum<[22]>;
-def R23 : Core<23, "%r23">, DwarfRegNum<[23]>;
-def R24 : Core<24, "%r24">, DwarfRegNum<[24]>;
-def R25 : Core<25, "%r25">, DwarfRegNum<[25]>;
-def GP : Core<26, "%gp",["%r26"]>, DwarfRegNum<[26]>;
-def FP : Core<27, "%fp", ["%r27"]>, DwarfRegNum<[27]>;
-def SP : Core<28, "%sp", ["%r28"]>, DwarfRegNum<[28]>;
-def ILINK : Core<29, "%ilink">, DwarfRegNum<[29]>;
-def R30 : Core<30, "%r30">, DwarfRegNum<[30]>;
-def BLINK: Core<31, "%blink">, DwarfRegNum<[31]>;
-
-def STATUS32 : Status<"status32">, DwarfRegNum<[32]>;
+
+ foreach i = 16 - 25 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
+
+ def GP : Core<26, "%gp",["%r26"]>, DwarfRegNum<[26]>;
+ def FP : Core<27, "%fp", ["%r27"]>, DwarfRegNum<[27]>;
+ def SP : Core<28, "%sp", ["%r28"]>, DwarfRegNum<[28]>;
+ def ILINK : Core<29, "%ilink">, DwarfRegNum<[29]>;
+ def R30 : Core<30, "%r30">, DwarfRegNum<[30]>;
+ def BLINK : Core<31, "%blink">, DwarfRegNum<[31]>;
+
+ // Define extended core registers R32..R63
+ foreach i = 32 - 63 in
+ def R#i : Core<i, "%r"#i>, DwarfRegNum<[i]>;
+}
+
+// Auxilary registers
+let CostPerUse=[1] in {
+ def STATUS32 : Aux<10, "status32">; // No DwarfRegNum defined in the ARC ABI
}
-// Register classes.
-//
def GPR32: RegisterClass<"ARC", [i32], 32,
- (add R0, R1, R2, R3,
- R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19,
- R20, R21, R22, R23, R24, R25, GP, FP, SP, ILINK, R30, BLINK)>;
+ (add (sequence "R%u", 0, 25), GP, FP, SP, ILINK, R30, BLINK, (sequence "R%u", 32, 63))> {
+ let AltOrders=[(add (sequence "R%u", 0, 25), GP, FP, SP, ILINK, R30, BLINK)];
+ let AltOrderSelect = [{
+ // When referenced in a C++ code block like this
+ // 0 is all Core32 regs
+ // 1 is AltOrders[0]
+ // 2 is AltOrders[1] and so on
+ return 1;
+ }];
+}
def SREG : RegisterClass<"ARC", [i32], 1, (add STATUS32)>;
diff --git a/llvm/lib/Target/ARC/ARCSubtarget.cpp b/llvm/lib/Target/ARC/ARCSubtarget.cpp
index 409dd2a98ab4..641c56b06870 100644
--- a/llvm/lib/Target/ARC/ARCSubtarget.cpp
+++ b/llvm/lib/Target/ARC/ARCSubtarget.cpp
@@ -12,7 +12,7 @@
#include "ARCSubtarget.h"
#include "ARC.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -26,5 +26,5 @@ void ARCSubtarget::anchor() {}
ARCSubtarget::ARCSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
- : ARCGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), FrameLowering(*this),
- TLInfo(TM, *this) {}
+ : ARCGenSubtargetInfo(TT, CPU, /*TuneCPU=*/CPU, FS), InstrInfo(*this),
+ FrameLowering(*this), TLInfo(TM, *this) {}
diff --git a/llvm/lib/Target/ARC/ARCSubtarget.h b/llvm/lib/Target/ARC/ARCSubtarget.h
index 6a4856221b8f..f3429677deeb 100644
--- a/llvm/lib/Target/ARC/ARCSubtarget.h
+++ b/llvm/lib/Target/ARC/ARCSubtarget.h
@@ -29,14 +29,15 @@ class StringRef;
class TargetMachine;
class ARCSubtarget : public ARCGenSubtargetInfo {
- bool Xnorm = false;
-
virtual void anchor();
ARCInstrInfo InstrInfo;
ARCFrameLowering FrameLowering;
ARCTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
+ // ARC processor extensions
+ bool Xnorm = false;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
diff --git a/llvm/lib/Target/ARC/ARCTargetMachine.cpp b/llvm/lib/Target/ARC/ARCTargetMachine.cpp
index b8c8949e18dd..52f74b729ff7 100644
--- a/llvm/lib/Target/ARC/ARCTargetMachine.cpp
+++ b/llvm/lib/Target/ARC/ARCTargetMachine.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
index b7033d0972b9..bb5336931932 100644
--- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
+++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp
@@ -21,7 +21,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -107,6 +107,12 @@ static DecodeStatus DecodeStLImmInstruction(MCInst &, uint64_t, uint64_t,
static DecodeStatus DecodeLdRLImmInstruction(MCInst &, uint64_t, uint64_t,
const void *);
+static DecodeStatus DecodeSOPwithRS12(MCInst &, uint64_t, uint64_t,
+ const void *);
+
+static DecodeStatus DecodeSOPwithRU6(MCInst &, uint64_t, uint64_t,
+ const void *);
+
static DecodeStatus DecodeCCRU6Instruction(MCInst &, uint64_t, uint64_t,
const void *);
@@ -304,13 +310,36 @@ static DecodeStatus DecodeCCRU6Instruction(MCInst &Inst, uint64_t Insn,
DstB = decodeBField(Insn);
DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
using Field = decltype(Insn);
- Field U6Field = fieldFromInstruction(Insn, 6, 11);
+ Field U6Field = fieldFromInstruction(Insn, 6, 6);
Inst.addOperand(MCOperand::createImm(U6Field));
Field CCField = fieldFromInstruction(Insn, 0, 4);
Inst.addOperand(MCOperand::createImm(CCField));
return MCDisassembler::Success;
}
+static DecodeStatus DecodeSOPwithRU6(MCInst &Inst, uint64_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned DstB = decodeBField(Insn);
+ DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
+ using Field = decltype(Insn);
+ Field U6 = fieldFromInstruction(Insn, 6, 6);
+ Inst.addOperand(MCOperand::createImm(U6));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSOPwithRS12(MCInst &Inst, uint64_t Insn,
+ uint64_t Address, const void *Decoder) {
+ unsigned DstB = decodeBField(Insn);
+ DecodeGPR32RegisterClass(Inst, DstB, Address, Decoder);
+ using Field = decltype(Insn);
+ Field Lower = fieldFromInstruction(Insn, 6, 6);
+ Field Upper = fieldFromInstruction(Insn, 0, 5);
+ Field Sign = fieldFromInstruction(Insn, 5, 1) ? -1 : 1;
+ Field Result = Sign * ((Upper << 6) + Lower);
+ Inst.addOperand(MCOperand::createImm(Result));
+ return MCDisassembler::Success;
+}
+
DecodeStatus ARCDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
index 358ee6002f80..d4f74fa77fc4 100644
--- a/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp
@@ -19,9 +19,9 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp b/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
index d4a74e1c4174..91d56bb6b86d 100644
--- a/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
+++ b/llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/ARCTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index bb81233cf803..f4d0f4a6d6b0 100644
--- a/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -182,8 +182,7 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) {
Front.push_back(MI);
while (Front.size() != 0) {
- MI = Front.back();
- Front.pop_back();
+ MI = Front.pop_back_val();
// MI is already known to be dead. We need to see
// if other instructions can also be removed.
@@ -621,9 +620,8 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
// Collect all the uses of this MI's DPR def for updating later.
SmallVector<MachineOperand*, 8> Uses;
Register DPRDefReg = MI->getOperand(0).getReg();
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DPRDefReg),
- E = MRI->use_end(); I != E; ++I)
- Uses.push_back(&*I);
+ for (MachineOperand &MO : MRI->use_operands(DPRDefReg))
+ Uses.push_back(&MO);
// We can optimize this.
unsigned NewReg = optimizeSDPattern(MI);
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 5c1bed14c941..8cbd80f1bf65 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -437,6 +437,11 @@ def FeatureLOB : SubtargetFeature<"lob", "HasLOB", "true",
"Enable Low Overhead Branch "
"extensions">;
+def FeatureFixCMSE_CVE_2021_35465 : SubtargetFeature<"fix-cmse-cve-2021-35465",
+ "FixCMSE_CVE_2021_35465", "true",
+ "Mitigate against the cve-2021-35465 "
+ "security vulnurability">;
+
//===----------------------------------------------------------------------===//
// ARM architecture class
//
@@ -539,6 +544,18 @@ def HasV8_7aOps : SubtargetFeature<"v8.7a", "HasV8_7aOps", "true",
"Support ARM v8.7a instructions",
[HasV8_6aOps]>;
+def HasV9_0aOps : SubtargetFeature<"v9a", "HasV9_0aOps", "true",
+ "Support ARM v9a instructions",
+ [HasV8_5aOps]>;
+
+def HasV9_1aOps : SubtargetFeature<"v9.1a", "HasV9_1aOps", "true",
+ "Support ARM v9.1a instructions",
+ [HasV8_6aOps, HasV9_0aOps]>;
+
+def HasV9_2aOps : SubtargetFeature<"v9.2a", "HasV9_2aOps", "true",
+ "Support ARM v9.2a instructions",
+ [HasV8_7aOps, HasV9_1aOps]>;
+
def HasV8_1MMainlineOps : SubtargetFeature<
"v8.1m.main", "HasV8_1MMainlineOps", "true",
"Support ARM v8-1M Mainline instructions",
@@ -619,6 +636,8 @@ def ProcA78 : SubtargetFeature<"cortex-a78", "ARMProcFamily", "CortexA78",
"Cortex-A78 ARM processors", []>;
def ProcA78C : SubtargetFeature<"a78c", "ARMProcFamily", "CortexA78C",
"Cortex-A78C ARM processors", []>;
+def ProcA710 : SubtargetFeature<"cortex-a710", "ARMProcFamily",
+ "CortexA710", "Cortex-A710 ARM processors", []>;
def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
"Cortex-X1 ARM processors", []>;
@@ -867,6 +886,43 @@ def ARMv87a : Architecture<"armv8.7-a", "ARMv87a", [HasV8_7aOps,
FeatureRAS,
FeatureDotProd]>;
+def ARMv9a : Architecture<"armv9-a", "ARMv9a", [HasV9_0aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+def ARMv91a : Architecture<"armv9.1-a", "ARMv91a", [HasV9_1aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+def ARMv92a : Architecture<"armv9.2-a", "ARMv92a", [HasV9_2aOps,
+ FeatureAClass,
+ FeatureDB,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeatureDSP,
+ FeatureTrustZone,
+ FeatureMP,
+ FeatureVirtualization,
+ FeatureCRC,
+ FeatureRAS,
+ FeatureDotProd]>;
+
def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops,
FeatureRClass,
FeatureDB,
@@ -1213,7 +1269,8 @@ def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline,
FeatureHasSlowFPVMLx,
FeatureHasSlowFPVFMx,
FeatureUseMISched,
- FeatureHasNoBranchPredictor]>;
+ FeatureHasNoBranchPredictor,
+ FeatureFixCMSE_CVE_2021_35465]>;
def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureDSP,
@@ -1222,7 +1279,8 @@ def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline,
FeatureHasSlowFPVMLx,
FeatureHasSlowFPVFMx,
FeatureUseMISched,
- FeatureHasNoBranchPredictor]>;
+ FeatureHasNoBranchPredictor,
+ FeatureFixCMSE_CVE_2021_35465]>;
def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline,
FeatureDSP,
@@ -1231,7 +1289,8 @@ def : ProcessorModel<"cortex-m55", CortexM4Model, [ARMv81mMainline,
FeatureHasNoBranchPredictor,
FeaturePrefLoopAlign32,
FeatureHasSlowFPVMLx,
- HasMVEFloatOps]>;
+ HasMVEFloatOps,
+ FeatureFixCMSE_CVE_2021_35465]>;
def : ProcNoItin<"cortex-a32", [ARMv8a,
FeatureHWDivThumb,
@@ -1323,6 +1382,14 @@ def : ProcNoItin<"cortex-a78c", [ARMv82a, ProcA78C,
FeatureDotProd,
FeatureFullFP16]>;
+def : ProcNoItin<"cortex-a710", [ARMv9a, ProcA710,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureFP16FML,
+ FeatureBF16,
+ FeatureMatMulInt8,
+ FeatureSB]>;
+
def : ProcNoItin<"cortex-x1", [ARMv82a, ProcX1,
FeatureHWDivThumb,
FeatureHWDivARM,
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index ba594b7f0935..9901b86b0e87 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -41,11 +41,11 @@
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -1291,9 +1291,6 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
- const MachineFunction &MF = *MI->getParent()->getParent();
- const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
-
// If we just ended a constant pool, mark it as such.
if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
OutStreamer->emitDataRegion(MCDR_DataRegionEnd);
@@ -1742,7 +1739,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
// FIXME: Ideally we could vary the LDRB index based on the padding
// between the sequence and jump table, however that relies on MCExprs
// for load indexes which are currently not supported.
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
.addReg(Idx)
.addReg(Idx)
@@ -2035,6 +2032,9 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
.addImm(ARMCC::AL)
.addReg(0));
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+
if (STI.isTargetDarwin() || STI.isTargetWindows()) {
// These platforms always use the same frame register
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12)
@@ -2080,6 +2080,9 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) {
Register SrcReg = MI->getOperand(0).getReg();
Register ScratchReg = MI->getOperand(1).getReg();
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+
EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi)
.addReg(ScratchReg)
.addReg(SrcReg)
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9b058ff7dbcb..2d981be4cfc1 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -173,8 +173,9 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
return MHR;
}
-MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
- MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const {
// FIXME: Thumb2 support.
if (!EnableARM3Addr)
@@ -336,9 +337,9 @@ MachineInstr *ARMBaseInstrInfo::convertToThreeAddress(
}
}
- MachineBasicBlock::iterator MBBI = MI.getIterator();
- MFI->insert(MBBI, NewMIs[1]);
- MFI->insert(MBBI, NewMIs[0]);
+ MachineBasicBlock &MBB = *MI.getParent();
+ MBB.insert(MI, NewMIs[1]);
+ MBB.insert(MI, NewMIs[0]);
return NewMIs[0];
}
@@ -867,6 +868,7 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
MIB.addImm(ARMVCC::None);
MIB.addReg(0);
+ MIB.addReg(0); // tp_reg
}
void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
@@ -878,6 +880,7 @@ void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
MIB.addImm(Cond);
MIB.addReg(ARM::VPR, RegState::Implicit);
+ MIB.addReg(0); // tp_reg
}
void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
@@ -914,7 +917,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())
Opc = ARM::VMOVD;
else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
- Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;
+ Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;
if (Opc) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
@@ -923,7 +926,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (Opc == ARM::MVE_VORR)
addUnpredicatedMveVpredROp(MIB, DestReg);
- else
+ else if (Opc != ARM::MQPRCopy)
MIB.add(predOps(ARMCC::AL));
return;
}
@@ -1241,7 +1244,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
+ ARM::MQQPRRegClass.hasSubClassEq(RC) ||
+ ARM::DQuadRegClass.hasSubClassEq(RC)) {
if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
Subtarget.hasNEON()) {
// FIXME: It's possible to only store part of the QQ register if the
@@ -1252,6 +1257,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO)
.add(predOps(ARMCC::AL));
+ } else if (Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
} else {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),
get(ARM::VSTMDIA))
@@ -1267,7 +1277,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 64:
- if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
@@ -1328,6 +1344,13 @@ unsigned ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::MQQPRStore:
+ case ARM::MQQQQPRStore:
+ if (MI.getOperand(1).isFI()) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ break;
}
return 0;
@@ -1473,31 +1496,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Unknown reg class!");
break;
case 32:
- if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
- if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
- Subtarget.hasNEON()) {
- BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
- .addFrameIndex(FI)
- .addImm(16)
- .addMemOperand(MMO)
- .add(predOps(ARMCC::AL));
- } else {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
- .addFrameIndex(FI)
- .add(predOps(ARMCC::AL))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
- if (Register::isPhysicalRegister(DestReg))
- MIB.addReg(DestReg, RegState::ImplicitDefine);
- }
- } else
- llvm_unreachable("Unknown reg class!");
- break;
+ if (ARM::QQPRRegClass.hasSubClassEq(RC) ||
+ ARM::MQQPRRegClass.hasSubClassEq(RC) ||
+ ARM::DQuadRegClass.hasSubClassEq(RC)) {
+ if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&
+ Subtarget.hasNEON()) {
+ BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+ .addFrameIndex(FI)
+ .addImm(16)
+ .addMemOperand(MMO)
+ .add(predOps(ARMCC::AL));
+ } else if (Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+ .addFrameIndex(FI)
+ .add(predOps(ARMCC::AL))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+ if (Register::isPhysicalRegister(DestReg))
+ MIB.addReg(DestReg, RegState::ImplicitDefine);
+ }
+ } else
+ llvm_unreachable("Unknown reg class!");
+ break;
case 64:
- if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
+ if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&
+ Subtarget.hasMVEIntegerOps()) {
+ BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ } else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
.addFrameIndex(FI)
.add(predOps(ARMCC::AL))
@@ -1566,6 +1600,13 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
return MI.getOperand(0).getReg();
}
break;
+ case ARM::MQQPRLoad:
+ case ARM::MQQQQPRLoad:
+ if (MI.getOperand(1).isFI()) {
+ FrameIndex = MI.getOperand(1).getIndex();
+ return MI.getOperand(0).getReg();
+ }
+ break;
}
return 0;
@@ -1642,8 +1683,6 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
- assert(getSubtarget().getTargetTriple().isOSBinFormatMachO() &&
- "LOAD_STACK_GUARD currently supported only for MachO.");
expandLoadStackGuard(MI);
MI.getParent()->erase(MI);
return true;
@@ -2331,9 +2370,13 @@ ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,
// Find new register class to use.
MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);
+ MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);
Register DestReg = MI.getOperand(0).getReg();
- const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
- if (!MRI.constrainRegClass(DestReg, PreviousClass))
+ const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());
+ const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());
+ if (!MRI.constrainRegClass(DestReg, FalseClass))
+ return nullptr;
+ if (!MRI.constrainRegClass(DestReg, TrueClass))
return nullptr;
// Create a new predicated version of DefMI.
@@ -2760,8 +2803,8 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
case ARM::CMPri:
@@ -2832,7 +2875,8 @@ inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {
/// This function can be extended later on.
inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,
Register SrcReg, Register SrcReg2,
- int ImmValue, const MachineInstr *OI,
+ int64_t ImmValue,
+ const MachineInstr *OI,
bool &IsThumb1) {
if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&
(OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&
@@ -2967,8 +3011,8 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
/// condition code of instructions which use the flags.
bool ARMBaseInstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue, const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI) return false;
@@ -3220,9 +3264,8 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
// live-out. If it is live-out, do not optimize.
if (!isSafe) {
MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(ARM::CPSR))
+ for (MachineBasicBlock *Succ : MBB->successors())
+ if (Succ->isLiveIn(ARM::CPSR))
return false;
}
@@ -3255,7 +3298,7 @@ bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {
MachineBasicBlock::const_iterator Next = &MI;
++Next;
Register SrcReg, SrcReg2;
- int CmpMask, CmpValue;
+ int64_t CmpMask, CmpValue;
bool IsThumb1;
if (Next != MI.getParent()->end() &&
analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&
@@ -4839,8 +4882,6 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
return true;
}
-// LoadStackGuard has so far only been implemented for MachO. Different code
-// sequence is needed for other targets.
void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned LoadImmOpc,
unsigned LoadOpc) const {
@@ -4850,27 +4891,70 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
Register Reg = MI->getOperand(0).getReg();
- const GlobalValue *GV =
- cast<GlobalValue>((*MI->memoperands_begin())->getValue());
MachineInstrBuilder MIB;
+ unsigned int Offset = 0;
+
+ if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
+ assert(Subtarget.isReadTPHard() &&
+ "TLS stack protector requires hardware TLS register");
+
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addImm(15)
+ .addImm(0)
+ .addImm(13)
+ .addImm(0)
+ .addImm(3)
+ .add(predOps(ARMCC::AL));
- BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
- .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
+ Module &M = *MBB.getParent()->getFunction().getParent();
+ Offset = M.getStackProtectorGuardOffset();
+ if (Offset & ~0xfffU) {
+ // The offset won't fit in the LDR's 12-bit immediate field, so emit an
+ // extra ADD to cover the delta. This gives us a guaranteed 8 additional
+ // bits, resulting in a range of 0 to +1 MiB for the guard offset.
+ unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;
+ BuildMI(MBB, MI, DL, get(AddOpc), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(Offset & ~0xfffU)
+ .add(predOps(ARMCC::AL))
+ .addReg(0);
+ Offset &= 0xfffU;
+ }
+ } else {
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+ bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);
+
+ unsigned TargetFlags = ARMII::MO_NO_FLAG;
+ if (Subtarget.isTargetMachO()) {
+ TargetFlags |= ARMII::MO_NONLAZY;
+ } else if (Subtarget.isTargetCOFF()) {
+ if (GV->hasDLLImportStorageClass())
+ TargetFlags |= ARMII::MO_DLLIMPORT;
+ else if (IsIndirect)
+ TargetFlags |= ARMII::MO_COFFSTUB;
+ } else if (Subtarget.isGVInGOT(GV)) {
+ TargetFlags |= ARMII::MO_GOT;
+ }
- if (Subtarget.isGVIndirectSymbol(GV)) {
- MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
- MIB.addReg(Reg, RegState::Kill).addImm(0);
- auto Flags = MachineMemOperand::MOLoad |
- MachineMemOperand::MODereferenceable |
- MachineMemOperand::MOInvariant;
- MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
- MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
- MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
+ BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
+ .addGlobalAddress(GV, 0, TargetFlags);
+
+ if (IsIndirect) {
+ MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
+ MIB.addReg(Reg, RegState::Kill).addImm(0);
+ auto Flags = MachineMemOperand::MOLoad |
+ MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant;
+ MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));
+ MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));
+ }
}
MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
MIB.addReg(Reg, RegState::Kill)
- .addImm(0)
+ .addImm(Offset)
.cloneMemRefs(*MI)
.add(predOps(ARMCC::AL));
}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 0ebba0d9fdd5..db9320962e81 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -120,9 +120,8 @@ public:
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const = 0;
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0;
const ARMSubtarget &getSubtarget() const { return Subtarget; }
@@ -289,15 +288,15 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
/// optimizeCompareInstr - Convert the instruction to set the zero flag so
/// that we can remove a "comparison with zero"; Remove a redundant CMP
/// instruction if the flags can be updated in the same way by an earlier
/// instruction such as SUB.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
bool analyzeSelect(const MachineInstr &MI,
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 4883e5693f87..b53efe58e8de 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -263,6 +263,13 @@ ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case ARM::QQQQPRRegClassID:
if (MF.getSubtarget<ARMSubtarget>().hasNEON())
return Super;
+ break;
+ case ARM::MQPRRegClassID:
+ case ARM::MQQPRRegClassID:
+ case ARM::MQQQQPRRegClassID:
+ if (MF.getSubtarget<ARMSubtarget>().hasMVEIntegerOps())
+ return Super;
+ break;
}
Super = *I++;
} while (Super);
@@ -928,4 +935,4 @@ bool ARMBaseRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
return TargetRegisterInfo::shouldRewriteCopySrc(DefRC, DefSubReg,
SrcRC, SrcSubReg);
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
index 5ea47f529b23..ddbd6702e528 100644
--- a/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
+++ b/llvm/lib/Target/ARM/ARMBlockPlacement.cpp
@@ -31,6 +31,8 @@ private:
const ARMBaseInstrInfo *TII;
std::unique_ptr<ARMBasicBlockUtils> BBUtils = nullptr;
MachineLoopInfo *MLI = nullptr;
+ // A list of WLS instructions that need to be reverted to DLS.
+ SmallVector<MachineInstr *> RevertedWhileLoops;
public:
static char ID;
@@ -41,9 +43,9 @@ public:
bool blockIsBefore(MachineBasicBlock *BB, MachineBasicBlock *Other);
bool fixBackwardsWLS(MachineLoop *ML);
bool processPostOrderLoops(MachineLoop *ML);
+ bool revertWhileToDoLoop(MachineInstr *WLS);
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -82,6 +84,66 @@ static MachineInstr *findWLS(MachineLoop *ML) {
return nullptr;
}
+// Revert a WhileLoopStart to an equivalent DoLoopStart and branch. Note that
+// because of the branches this requires an extra block to be created.
+bool ARMBlockPlacement::revertWhileToDoLoop(MachineInstr *WLS) {
+ // lr = t2WhileLoopStartTP r0, r1, TgtBB
+ // t2Br Ph
+ // ->
+ // cmp r0, 0
+ // brcc TgtBB
+ // block2:
+ // LR = t2DoLoopStartTP r0, r1
+ // t2Br Ph
+ MachineBasicBlock *Preheader = WLS->getParent();
+ assert(WLS != &Preheader->back());
+ assert(WLS->getNextNode() == &Preheader->back());
+ MachineInstr *Br = &Preheader->back();
+ assert(Br->getOpcode() == ARM::t2B);
+ assert(Br->getOperand(1).getImm() == 14);
+
+ // Clear the kill flags, as the cmp/bcc will no longer kill any operands.
+ WLS->getOperand(1).setIsKill(false);
+ if (WLS->getOpcode() == ARM::t2WhileLoopStartTP)
+ WLS->getOperand(2).setIsKill(false);
+
+ // Create the new block
+ MachineBasicBlock *NewBlock = Preheader->getParent()->CreateMachineBasicBlock(
+ Preheader->getBasicBlock());
+ Preheader->getParent()->insert(++Preheader->getIterator(), NewBlock);
+ // Move the Br to it
+ Br->removeFromParent();
+ NewBlock->insert(NewBlock->end(), Br);
+ // And setup the successors correctly.
+ Preheader->replaceSuccessor(Br->getOperand(0).getMBB(), NewBlock);
+ NewBlock->addSuccessor(Br->getOperand(0).getMBB());
+
+ // Create a new DLS to replace the WLS
+ MachineInstrBuilder MIB =
+ BuildMI(*NewBlock, Br, WLS->getDebugLoc(),
+ TII->get(WLS->getOpcode() == ARM::t2WhileLoopStartTP
+ ? ARM::t2DoLoopStartTP
+ : ARM::t2DoLoopStart));
+ MIB.add(WLS->getOperand(0));
+ MIB.add(WLS->getOperand(1));
+ if (WLS->getOpcode() == ARM::t2WhileLoopStartTP)
+ MIB.add(WLS->getOperand(2));
+
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX
+ << "Reverting While Loop to Do Loop: " << *WLS << "\n");
+
+ RevertWhileLoopStartLR(WLS, TII, ARM::t2Bcc, true);
+
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *NewBlock);
+
+ Preheader->getParent()->RenumberBlocks();
+ BBUtils->computeAllBlockSizes();
+ BBUtils->adjustBBOffsetsAfter(Preheader);
+
+ return true;
+}
+
/// Checks if loop has a backwards branching WLS, and if possible, fixes it.
/// This requires checking the predecessor (ie. preheader or it's predecessor)
/// for a WLS and if its loopExit/target is before it.
@@ -125,11 +187,10 @@ bool ARMBlockPlacement::fixBackwardsWLS(MachineLoop *ML) {
// TODO: Analyse the blocks to make a decision if it would be worth
// moving Preheader even if we'd introduce a backwards WLS
if (WLSTarget == Predecessor) {
- LLVM_DEBUG(
- dbgs() << DEBUG_PREFIX
- << "Can't move Predecessor"
- "block as it would convert a WLS from forward to a "
- "backwards branching WLS\n");
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Can't move Predecessor block as "
+ << "it would convert a WLS from forward to a "
+ << "backwards branching WLS\n");
+ RevertedWhileLoops.push_back(WlsInstr);
return false;
}
}
@@ -162,11 +223,16 @@ bool ARMBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BBUtils->computeAllBlockSizes();
BBUtils->adjustBBOffsetsAfter(&MF.front());
bool Changed = false;
+ RevertedWhileLoops.clear();
// Find loops with a backwards branching WLS and fix if possible.
for (auto *ML : *MLI)
Changed |= processPostOrderLoops(ML);
+ // Revert any While loops still out of range to DLS loops.
+ for (auto *WlsInstr : RevertedWhileLoops)
+ Changed |= revertWhileToDoLoop(WlsInstr);
+
return Changed;
}
@@ -199,18 +265,22 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
assert(From->isSuccessor(To) &&
"'To' is expected to be a successor of 'From'");
MachineInstr &Terminator = *(--From->terminators().end());
- if (!Terminator.isUnconditionalBranch()) {
- // The BB doesn't have an unconditional branch so it relied on
- // fall-through. Fix by adding an unconditional branch to the moved BB.
- MachineInstrBuilder MIB =
- BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
- MIB.addMBB(To);
- MIB.addImm(ARMCC::CondCodes::AL);
- MIB.addReg(ARM::NoRegister);
- LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
- << From->getName() << " to " << To->getName() << ": "
- << *MIB.getInstr());
- }
+ if (!TII->isPredicated(Terminator) &&
+ (isUncondBranchOpcode(Terminator.getOpcode()) ||
+ isIndirectBranchOpcode(Terminator.getOpcode()) ||
+ isJumpTableBranchOpcode(Terminator.getOpcode()) ||
+ Terminator.isReturn()))
+ return;
+ // The BB doesn't have an unconditional branch so it relied on
+ // fall-through. Fix by adding an unconditional branch to the moved BB.
+ MachineInstrBuilder MIB =
+ BuildMI(From, Terminator.getDebugLoc(), TII->get(ARM::t2B));
+ MIB.addMBB(To);
+ MIB.addImm(ARMCC::CondCodes::AL);
+ MIB.addReg(ARM::NoRegister);
+ LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Adding unconditional branch from "
+ << From->getName() << " to " << To->getName() << ": "
+ << *MIB.getInstr());
};
// Fix fall-through to the moved BB from the one that used to be before it.
@@ -225,5 +295,5 @@ void ARMBlockPlacement::moveBasicBlock(MachineBasicBlock *BB,
F->RenumberBlocks();
BBUtils->computeAllBlockSizes();
- BBUtils->adjustBBOffsetsAfter(&F->front());
+ BBUtils->adjustBBOffsetsAfter(BB);
}
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index aff7ec8d2ed6..81ec4d09a408 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -45,6 +45,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <functional>
#include <utility>
using namespace llvm;
@@ -109,7 +110,7 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -130,7 +131,8 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
CCValAssign VA = VAs[0];
@@ -158,9 +160,15 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
if (!IsLittle)
std::swap(NewRegs[0], NewRegs[1]);
+ if (Thunk) {
+ *Thunk = [=]() {
+ assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
+ assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
+ };
+ return 1;
+ }
assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
-
return 1;
}
@@ -273,7 +281,7 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
@@ -298,7 +306,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}
unsigned assignCustomValue(ARMCallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");
CCValAssign VA = VAs[0];
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index e15826fa6159..121558276c3e 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -18,6 +18,7 @@
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MVETailPredUtils.h"
#include "Thumb2InstrInfo.h"
#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/DenseMap.h"
@@ -340,12 +341,12 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() {
// Align blocks where the previous block does not fall through. This may add
// extra NOP's but they will not be executed. It uses the PrefLoopAlignment as a
// measure of how much to align, and only runs at CodeGenOpt::Aggressive.
-static bool AlignBlocks(MachineFunction *MF) {
+static bool AlignBlocks(MachineFunction *MF, const ARMSubtarget *STI) {
if (MF->getTarget().getOptLevel() != CodeGenOpt::Aggressive ||
MF->getFunction().hasOptSize())
return false;
- auto *TLI = MF->getSubtarget().getTargetLowering();
+ auto *TLI = STI->getTargetLowering();
const Align Alignment = TLI->getPrefLoopAlignment();
if (Alignment < 4)
return false;
@@ -357,7 +358,25 @@ static bool AlignBlocks(MachineFunction *MF) {
Changed = true;
MBB.setAlignment(Alignment);
}
+
PrevCanFallthough = MBB.canFallThrough();
+
+ // For LOB's, the ARMLowOverheadLoops pass may remove the unconditional
+ // branch later in the pipeline.
+ if (STI->hasLOB()) {
+ for (const auto &MI : reverse(MBB.terminators())) {
+ if (MI.getOpcode() == ARM::t2B &&
+ MI.getOperand(0).getMBB() == MBB.getNextNode())
+ continue;
+ if (isLoopStart(MI) || MI.getOpcode() == ARM::t2LoopEnd ||
+ MI.getOpcode() == ARM::t2LoopEndDec) {
+ PrevCanFallthough = true;
+ break;
+ }
+ // Any other terminator - nothing to do
+ break;
+ }
+ }
}
return Changed;
@@ -406,7 +425,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
}
// Align any non-fallthrough blocks
- MadeChange |= AlignBlocks(MF);
+ MadeChange |= AlignBlocks(MF, STI);
// Perform the initial placement of the constant pool entries. To start with,
// we put them all at the end of the function.
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2167ad5d7467..a8f09969e948 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -69,6 +69,7 @@ namespace {
void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt);
+ void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
void CMSEClearGPRegs(MachineBasicBlock &MBB,
@@ -887,6 +888,43 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
}
+void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+ unsigned NewOpc =
+ MI.getOpcode() == ARM::MQQPRStore || MI.getOpcode() == ARM::MQQQQPRStore
+ ? ARM::VSTMDIA
+ : ARM::VLDMDIA;
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+
+ unsigned Flags = getKillRegState(MI.getOperand(0).isKill()) |
+ getDefRegState(MI.getOperand(0).isDef());
+ Register SrcReg = MI.getOperand(0).getReg();
+
+ // Copy the destination register.
+ MIB.add(MI.getOperand(1));
+ MIB.add(predOps(ARMCC::AL));
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_0), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_1), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_2), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_3), Flags);
+ if (MI.getOpcode() == ARM::MQQQQPRStore ||
+ MI.getOpcode() == ARM::MQQQQPRLoad) {
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_4), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_5), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_6), Flags);
+ MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_7), Flags);
+ }
+
+ if (NewOpc == ARM::VSTMDIA)
+ MIB.addReg(SrcReg, RegState::Implicit);
+
+ TransferImpOps(MI, MIB, MIB);
+ MIB.cloneMemRefs(MI);
+ MI.eraseFromParent();
+}
+
static bool IsAnAddressOperand(const MachineOperand &MO) {
// This check is overly conservative. Unless we are certain that the machine
// operand is not a symbol reference, we return that it is a symbol reference.
@@ -1295,7 +1333,7 @@ void ARMExpandPseudo::CMSESaveClearFPRegs(
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
if (STI->hasV8_1MMainlineOps())
CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
- else
+ else if (STI->hasV8MMainlineOps())
CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
}
@@ -1303,8 +1341,6 @@ void ARMExpandPseudo::CMSESaveClearFPRegs(
void ARMExpandPseudo::CMSESaveClearFPRegsV8(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
- if (!STI->hasFPRegs())
- return;
// Store an available register for FPSCR clearing
assert(!ScratchRegs.empty());
@@ -1358,7 +1394,11 @@ void ARMExpandPseudo::CMSESaveClearFPRegsV8(
bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
- // Lazy store all fp registers to the stack
+ if (passesFPReg)
+ assert(STI->hasFPRegs() && "Subtarget needs fpregs");
+
+ // Lazy store all fp registers to the stack.
+ // This executes as NOP in the absence of floating-point support.
MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
.addReg(ARM::SP)
.add(predOps(ARMCC::AL));
@@ -1486,15 +1526,18 @@ void ARMExpandPseudo::CMSERestoreFPRegs(
SmallVectorImpl<unsigned> &AvailableRegs) {
if (STI->hasV8_1MMainlineOps())
CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
- else
+ else if (STI->hasV8MMainlineOps())
CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
}
void ARMExpandPseudo::CMSERestoreFPRegsV8(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &AvailableRegs) {
- if (!STI->hasFPRegs())
- return;
+
+ // Keep a scratch register for the mitigation sequence.
+ unsigned ScratchReg = ARM::NoRegister;
+ if (STI->fixCMSE_CVE_2021_35465())
+ ScratchReg = AvailableRegs.pop_back_val();
// Use AvailableRegs to store the fp regs
std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
@@ -1536,24 +1579,64 @@ void ARMExpandPseudo::CMSERestoreFPRegsV8(
}
}
+ bool returnsFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
+
+ if (returnsFPReg)
+ assert(STI->hasFPRegs() && "Subtarget needs fpregs");
+
// Push FP regs that cannot be restored via normal registers on the stack
for (unsigned Reg : NonclearedFPRegs) {
if (ARM::DPR_VFP2RegClass.contains(Reg))
- BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD))
+ .addReg(Reg)
.addReg(ARM::SP)
.addImm((Reg - ARM::D0) * 2)
.add(predOps(ARMCC::AL));
else if (ARM::SPRRegClass.contains(Reg))
- BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg)
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS))
+ .addReg(Reg)
.addReg(ARM::SP)
.addImm(Reg - ARM::S0)
.add(predOps(ARMCC::AL));
}
- // Lazy load fp regs from stack
- BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
- .addReg(ARM::SP)
- .add(predOps(ARMCC::AL));
+ // Lazy load fp regs from stack.
+ // This executes as NOP in the absence of floating-point support.
+ MachineInstrBuilder VLLDM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ if (STI->fixCMSE_CVE_2021_35465()) {
+ auto Bundler = MIBundleBuilder(MBB, VLLDM);
+ // Read the CONTROL register.
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2MRS_M))
+ .addReg(ScratchReg, RegState::Define)
+ .addImm(20)
+ .add(predOps(ARMCC::AL)));
+ // Check bit 3 (SFPA).
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2TSTri))
+ .addReg(ScratchReg)
+ .addImm(8)
+ .add(predOps(ARMCC::AL)));
+ // Emit the IT block.
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2IT))
+ .addImm(ARMCC::NE)
+ .addImm(8));
+ // If SFPA is clear jump over to VLLDM, otherwise execute an instruction
+ // which has no functional effect apart from causing context creation:
+ // vmovne s0, s0. In the absence of FPU we emit .inst.w 0xeeb00a40,
+ // which is defined as NOP if not executed.
+ if (STI->hasFPRegs())
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::VMOVS))
+ .addReg(ARM::S0, RegState::Define)
+ .addReg(ARM::S0, RegState::Undef)
+ .add(predOps(ARMCC::NE)));
+ else
+ Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::INLINEASM))
+ .addExternalSymbol(".inst.w 0xeeb00a40")
+ .addImm(InlineAsm::Extra_HasSideEffects));
+ finalizeBundle(MBB, Bundler.begin(), Bundler.end());
+ }
// Restore all FP registers via normal registers
for (const auto &Regs : ClearedFPRegs) {
@@ -1594,6 +1677,12 @@ void ARMExpandPseudo::CMSERestoreFPRegsV81(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
SmallVectorImpl<unsigned> &AvailableRegs) {
if (!definesOrUsesFPReg(*MBBI)) {
+ if (STI->fixCMSE_CVE_2021_35465()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL))
+ .addReg(ARM::VPR, RegState::Define);
+ }
+
// Load FP registers from stack.
BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
.addReg(ARM::SP)
@@ -1647,7 +1736,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
"CMP_SWAP not expected to be custom expanded for Thumb1");
assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
"ARMv8-M.baseline does not have t2UXTB/t2UXTH");
- assert(ARM::tGPRRegClass.contains(DesiredReg) &&
+ assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) &&
"DesiredReg used for UXT op must be tGPR");
}
@@ -2916,6 +3005,13 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
+ case ARM::MQQPRLoad:
+ case ARM::MQQPRStore:
+ case ARM::MQQQQPRLoad:
+ case ARM::MQQQQPRStore:
+ ExpandMQQPRLoadStore(MBBI);
+ return true;
+
case ARM::tCMP_SWAP_8:
assert(STI->isThumb());
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB,
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9c7055deaaf8..2b83a292db76 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -79,6 +79,10 @@ public:
void Select(SDNode *N) override;
+ /// Return true as some complex patterns, like those that call
+ /// canExtractShiftFromMul can modify the DAG inplace.
+ bool ComplexPatternFuncMutatesDAG() const override { return true; }
+
bool hasNoVMLxHazardUse(SDNode *N) const;
bool isShifterOpProfitable(const SDValue &Shift,
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
@@ -406,11 +410,9 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
return;
bool isThumb2 = Subtarget->isThumb();
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
-
- if (N->getOpcode() != ISD::ADD)
+ // We use make_early_inc_range to avoid invalidation issues.
+ for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
+ if (N.getOpcode() != ISD::ADD)
continue;
// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
@@ -422,8 +424,8 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
// operand of 'add' and the 'and' and 'srl' would become a bits extraction
// node (UBFX).
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
unsigned And_imm = 0;
if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
@@ -480,7 +482,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
- CurDAG->UpdateNodeOperands(N, N0, N1);
+ CurDAG->UpdateNodeOperands(&N, N0, N1);
}
}
@@ -1121,7 +1123,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
SDValue &Offset) {
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
- if (!NC || !NC->isNullValue())
+ if (!NC || !NC->isZero())
return false;
Base = Offset = N;
@@ -1818,8 +1820,11 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
else
return false;
- SDValue Ops[] = {Base, NewOffset,
- CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
+ SDValue Ops[] = {Base,
+ NewOffset,
+ CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
+ PredReg,
+ CurDAG->getRegister(0, MVT::i32), // tp_reg
Chain};
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
N->getValueType(0), MVT::Other, Ops);
@@ -2525,6 +2530,7 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
SDValue PredicateMask) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
Ops.push_back(PredicateMask);
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
}
template <typename SDValueVector>
@@ -2533,6 +2539,7 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
SDValue Inactive) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
Ops.push_back(PredicateMask);
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
Ops.push_back(Inactive);
}
@@ -2540,6 +2547,7 @@ template <typename SDValueVector>
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
}
template <typename SDValueVector>
@@ -2547,6 +2555,7 @@ void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
EVT InactiveTy) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
Ops.push_back(SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
}
@@ -3545,7 +3554,7 @@ void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
return;
SDValue Zero = N->getOperand(1);
- if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
+ if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isZero() ||
And->getOpcode() != ISD::AND)
return;
SDValue X = And.getOperand(0);
@@ -5495,8 +5504,8 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
// using the supplied metadata string to select the instruction node to use
// and the registers/masks to construct as operands for the node.
bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
bool IsThumb2 = Subtarget->isThumb2();
SDLoc DL(N);
@@ -5610,8 +5619,8 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
// using the supplied metadata string to select the instruction node to use
// and the registers/masks to use in the nodes
bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
- const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
- const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
+ const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
bool IsThumb2 = Subtarget->isThumb2();
SDLoc DL(N);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 900113244e41..e7e10ce07a44 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -55,6 +55,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -300,6 +301,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::UINT_TO_FP, VT, Expand);
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
+ } else {
+ setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
}
// Pre and Post inc are supported on loads and stores
@@ -544,6 +548,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
+ setLibcallName(RTLIB::MULO_I128, nullptr);
// RTLIB
if (Subtarget->isAAPCS_ABI() &&
@@ -741,6 +747,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
Subtarget->hasFPRegs()) {
addRegisterClass(MVT::f32, &ARM::SPRRegClass);
addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
+
if (!Subtarget->hasVFP2Base())
setAllExpand(MVT::f32);
if (!Subtarget->hasFP64())
@@ -981,6 +993,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+ setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
@@ -1851,12 +1864,18 @@ ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
// load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
// MVE Q registers.
- if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
+ if (Subtarget->hasNEON()) {
if (VT == MVT::v4i64)
return &ARM::QQPRRegClass;
if (VT == MVT::v8i64)
return &ARM::QQQQPRRegClass;
}
+ if (Subtarget->hasMVEIntegerOps()) {
+ if (VT == MVT::v4i64)
+ return &ARM::MQQPRRegClass;
+ if (VT == MVT::v8i64)
+ return &ARM::MQQQQPRRegClass;
+ }
return TargetLowering::getRegClassFor(VT);
}
@@ -2287,7 +2306,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool PreferIndirect = false;
// Determine whether this is a non-secure function call.
- if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call"))
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
isCmseNSCall = true;
// Disable tail calls if they're not supported.
@@ -3259,26 +3278,24 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
SmallPtrSet<SDNode*, 2> Copies;
- for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() != ISD::CopyToReg)
+ for (SDNode *U : VMov->uses()) {
+ if (U->getOpcode() != ISD::CopyToReg)
return false;
- Copies.insert(*UI);
+ Copies.insert(U);
}
if (Copies.size() > 2)
return false;
- for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
- UI != UE; ++UI) {
- SDValue UseChain = UI->getOperand(0);
+ for (SDNode *U : VMov->uses()) {
+ SDValue UseChain = U->getOperand(0);
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
- Copy = *UI;
+ Copy = U;
else {
// We are at the top of this chain.
// If the copy has a glue operand, we conservatively assume it
// isn't safe to perform a tail call.
- if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
+ if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
return false;
// First CopyToReg
TCChain = UseChain;
@@ -3301,10 +3318,9 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
}
bool HasRet = false;
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() != ARMISD::RET_FLAG &&
- UI->getOpcode() != ARMISD::INTRET_FLAG)
+ for (const SDNode *U : Copy->uses()) {
+ if (U->getOpcode() != ARMISD::RET_FLAG &&
+ U->getOpcode() != ARMISD::INTRET_FLAG)
return false;
HasRet = true;
}
@@ -3782,7 +3798,7 @@ static SDValue promoteToConstantPool(const ARMTargetLowering *TLI,
bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- if (!(GV = GA->getBaseObject()))
+ if (!(GV = GA->getAliaseeObject()))
return false;
if (const auto *V = dyn_cast<GlobalVariable>(GV))
return V->isConstant();
@@ -4517,7 +4533,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
} else { // VA.isRegLoc()
- // sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
@@ -5811,6 +5827,43 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
+static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ EVT VT = Op.getValueType();
+ EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ EVT FromVT = Op.getOperand(0).getValueType();
+
+ if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
+ return Op;
+ if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
+ Subtarget->hasFP64())
+ return Op;
+ if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
+ Subtarget->hasFullFP16())
+ return Op;
+ if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
+ Subtarget->hasMVEFloatOps())
+ return Op;
+ if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
+ Subtarget->hasMVEFloatOps())
+ return Op;
+
+ if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
+ return SDValue();
+
+ SDLoc DL(Op);
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
+ unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
+ SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
+ DAG.getValueType(VT.getScalarType()));
+ SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
+ DAG.getConstant((1 << BW) - 1, DL, VT));
+ if (IsSigned)
+ Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
+ DAG.getConstant(-(1 << BW), DL, VT));
+ return Max;
+}
+
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDLoc dl(Op);
@@ -7660,7 +7713,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned SplatBitSize;
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
- if (SplatUndef.isAllOnesValue())
+ if (SplatUndef.isAllOnes())
return DAG.getUNDEF(VT);
if ((ST->hasNEON() && SplatBitSize <= 64) ||
@@ -8052,7 +8105,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
Src.WindowBase *= Src.WindowScale;
}
- // Final sanity check before we try to actually produce a shuffle.
+ // Final check before we try to actually produce a shuffle.
LLVM_DEBUG(for (auto Src
: Sources)
assert(Src.ShuffleVec.getValueType() == ShuffleVT););
@@ -8175,7 +8228,7 @@ bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
isVTBLMask(M, VT) ||
isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
return true;
- else if (Subtarget->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
isReverseMask(M, VT))
return true;
else if (Subtarget->hasMVEIntegerOps() &&
@@ -8268,21 +8321,23 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
}
-static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
- SelectionDAG &DAG) {
+static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
- SDValue OpLHS = Op.getOperand(0);
- EVT VT = OpLHS.getValueType();
+ EVT VT = Op.getValueType();
- assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
+ assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
"Expect an v8i16/v16i8 type");
- OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
- // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
+ SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
+ // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
// extract the first 8 bytes into the top double word and the last 8 bytes
- // into the bottom double word. The v8i16 case is similar.
- unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
- return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
- DAG.getConstant(ExtractNum, DL, MVT::i32));
+ // into the bottom double word, through a new vector shuffle that will be
+ // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
+ std::vector<int> NewMask;
+ for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
+ NewMask.push_back(VT.getVectorNumElements() / 2 + i);
+ for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
+ NewMask.push_back(i);
+ return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
}
static EVT getVectorTyFromPredicateVector(EVT VT) {
@@ -8704,8 +8759,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
- if (ST->hasNEON() && (VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
- return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
+ if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
+ isReverseMask(ShuffleMask, VT))
+ return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
if (ST->hasNEON() && VT == MVT::v8i8)
if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
@@ -8822,54 +8878,68 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG,
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *ST) {
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
SDLoc dl(Op);
- EVT VT = Op.getValueType();
- EVT Op1VT = V1.getValueType();
- EVT Op2VT = V2.getValueType();
- unsigned NumElts = VT.getVectorNumElements();
-
- assert(Op1VT == Op2VT && "Operand types don't match!");
- assert(VT.getScalarSizeInBits() == 1 &&
+ assert(Op.getValueType().getScalarSizeInBits() == 1 &&
+ "Unexpected custom CONCAT_VECTORS lowering");
+ assert(isPowerOf2_32(Op.getNumOperands()) &&
"Unexpected custom CONCAT_VECTORS lowering");
assert(ST->hasMVEIntegerOps() &&
"CONCAT_VECTORS lowering only supported for MVE");
- SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
- SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
-
- // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
- // promoted to v8i16, etc.
-
- MVT ElType = getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
-
- // Extract the vector elements from Op1 and Op2 one by one and truncate them
- // to be the right size for the destination. For example, if Op1 is v4i1 then
- // the promoted vector is v4i32. The result of concatentation gives a v8i1,
- // which when promoted is v8i16. That means each i32 element from Op1 needs
- // truncating to i16 and inserting in the result.
- EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
- SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
- auto ExractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
- EVT NewVT = NewV.getValueType();
- EVT ConcatVT = ConVec.getValueType();
- for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
- DAG.getIntPtrConstant(i, dl));
- ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
- DAG.getConstant(j, dl, MVT::i32));
- }
- return ConVec;
+ auto ConcatPair = [&](SDValue V1, SDValue V2) {
+ EVT Op1VT = V1.getValueType();
+ EVT Op2VT = V2.getValueType();
+ assert(Op1VT == Op2VT && "Operand types don't match!");
+ EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
+
+ SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
+ SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
+
+ // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
+ // promoted to v8i16, etc.
+ MVT ElType =
+ getVectorTyFromPredicateVector(VT).getScalarType().getSimpleVT();
+ unsigned NumElts = 2 * Op1VT.getVectorNumElements();
+
+ // Extract the vector elements from Op1 and Op2 one by one and truncate them
+ // to be the right size for the destination. For example, if Op1 is v4i1
+ // then the promoted vector is v4i32. The result of concatentation gives a
+ // v8i1, which when promoted is v8i16. That means each i32 element from Op1
+ // needs truncating to i16 and inserting in the result.
+ EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
+ SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
+ auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
+ EVT NewVT = NewV.getValueType();
+ EVT ConcatVT = ConVec.getValueType();
+ for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
+ DAG.getIntPtrConstant(i, dl));
+ ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
+ DAG.getConstant(j, dl, MVT::i32));
+ }
+ return ConVec;
+ };
+ unsigned j = 0;
+ ConVec = ExtractInto(NewV1, ConVec, j);
+ ConVec = ExtractInto(NewV2, ConVec, j);
+
+ // Now return the result of comparing the subvector with zero,
+ // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
+ return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
+ DAG.getConstant(ARMCC::NE, dl, MVT::i32));
};
- unsigned j = 0;
- ConVec = ExractInto(NewV1, ConVec, j);
- ConVec = ExractInto(NewV2, ConVec, j);
- // Now return the result of comparing the subvector with zero,
- // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
- return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
- DAG.getConstant(ARMCC::NE, dl, MVT::i32));
+ // Concat each pair of subvectors and pack into the lower half of the array.
+ SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
+ while (ConcatOps.size() > 1) {
+ for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
+ SDValue V1 = ConcatOps[I];
+ SDValue V2 = ConcatOps[I + 1];
+ ConcatOps[I / 2] = ConcatPair(V1, V2);
+ }
+ ConcatOps.resize(ConcatOps.size() / 2);
+ }
+ return ConcatOps[0];
}
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
@@ -9069,7 +9139,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
return true;
} else {
- if (Hi0->isNullValue() && Hi1->isNullValue())
+ if (Hi0->isZero() && Hi1->isZero())
return true;
}
return false;
@@ -10140,6 +10210,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
@@ -10326,6 +10398,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ZERO_EXTEND:
Res = LowerVectorExtend(N, DAG, Subtarget);
break;
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
+ break;
}
if (Res.getNode())
Results.push_back(Res);
@@ -10877,10 +10953,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
static
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I != Succ)
- return *I;
+ for (MachineBasicBlock *S : MBB->successors())
+ if (S != Succ)
+ return S;
llvm_unreachable("Expecting a BB with two successors!");
}
@@ -11378,13 +11453,9 @@ static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr,
// If we hit the end of the block, check whether CPSR is live into a
// successor.
if (miI == BB->end()) {
- for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
- sEnd = BB->succ_end();
- sItr != sEnd; ++sItr) {
- MachineBasicBlock* succ = *sItr;
- if (succ->isLiveIn(ARM::CPSR))
+ for (MachineBasicBlock *Succ : BB->successors())
+ if (Succ->isLiveIn(ARM::CPSR))
return false;
- }
}
// We found a def, or hit the end of the basic block and CPSR wasn't live
@@ -11487,6 +11558,7 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
.addUse(PredCounterPhiReg)
.addImm(ARMVCC::None)
+ .addReg(0)
.addReg(0);
BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
@@ -11505,7 +11577,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
.addReg(SrcPhiReg)
.addImm(16)
.addImm(ARMVCC::Then)
- .addUse(VccrReg);
+ .addUse(VccrReg)
+ .addReg(0);
} else
SrcValueReg = OpSrcReg;
@@ -11515,7 +11588,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
.addReg(DestPhiReg)
.addImm(16)
.addImm(ARMVCC::Then)
- .addUse(VccrReg);
+ .addUse(VccrReg)
+ .addReg(0);
// Add the pseudoInstrs for decrementing the loop counter and marking the
// end:t2DoLoopDec and t2DoLoopEnd
@@ -12103,8 +12177,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
// When looking for a 0 constant, N can be zext or sext.
OtherOp = DAG.getConstant(1, dl, VT);
else
- OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl,
- VT);
+ OtherOp = DAG.getAllOnesConstant(dl, VT);
return true;
}
}
@@ -12696,7 +12769,7 @@ static SDValue PerformAddcSubcCombine(SDNode *N,
const ARMSubtarget *Subtarget) {
SelectionDAG &DAG(DCI.DAG);
- if (N->getOpcode() == ARMISD::SUBC) {
+ if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
// (SUBC (ADDE 0, 0, C), 1) -> C
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
@@ -12868,6 +12941,9 @@ static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Shft;
ConstantSDNode *Clamp;
+ if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
+ return SDValue();
+
if (N->getOpcode() == ISD::SMIN) {
Shft = N->getOperand(0);
Clamp = isConstOrConstSplat(N->getOperand(1));
@@ -13008,19 +13084,15 @@ static SDValue PerformVSELECTCombine(SDNode *N,
}
static SDValue PerformABSCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARMSubtarget *Subtarget) {
- SDValue res;
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
return SDValue();
- if (!TLI.expandABS(N, res, DAG))
- return SDValue();
-
- return res;
+ return TLI.expandABS(N, DAG);
}
/// PerformADDECombine - Target-specific dag combine transform from
@@ -13064,13 +13136,166 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ auto IsVecReduce = [](SDValue Op) {
+ switch (Op.getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ case ARMISD::VADDVs:
+ case ARMISD::VADDVu:
+ case ARMISD::VMLAVs:
+ case ARMISD::VMLAVu:
+ return true;
+ }
+ return false;
+ };
+
+ auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
+ // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
+ // add(add(X, vecreduce(Y)), vecreduce(Z))
+ // to make better use of vaddva style instructions.
+ if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
+ IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
+ !isa<ConstantSDNode>(N0)) {
+ SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
+ }
+ // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
+ // add(add(add(A, C), reduce(B)), reduce(D))
+ if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
+ N1.getOpcode() == ISD::ADD) {
+ unsigned N0RedOp = 0;
+ if (!IsVecReduce(N0.getOperand(N0RedOp))) {
+ N0RedOp = 1;
+ if (!IsVecReduce(N0.getOperand(N0RedOp)))
+ return SDValue();
+ }
+
+ unsigned N1RedOp = 0;
+ if (!IsVecReduce(N1.getOperand(N1RedOp)))
+ N1RedOp = 1;
+ if (!IsVecReduce(N1.getOperand(N1RedOp)))
+ return SDValue();
+
+ SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
+ N1.getOperand(1 - N1RedOp));
+ SDValue Add1 =
+ DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
+ return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
+ }
+ return SDValue();
+ };
+ if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
+ return R;
+ if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
+ return R;
+
+ // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
+ // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
+ // by ascending load offsets. This can help cores prefetch if the order of
+ // loads is more predictable.
+ auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
+ // Check if two reductions are known to load data where one is before/after
+ // another. Return negative if N0 loads data before N1, positive if N1 is
+ // before N0 and 0 otherwise if nothing is known.
+ auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
+ // Look through to the first operand of a MUL, for the VMLA case.
+ // Currently only looks at the first operand, in the hope they are equal.
+ if (N0.getOpcode() == ISD::MUL)
+ N0 = N0.getOperand(0);
+ if (N1.getOpcode() == ISD::MUL)
+ N1 = N1.getOperand(0);
+
+ // Return true if the two operands are loads to the same object and the
+ // offset of the first is known to be less than the offset of the second.
+ LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
+ LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
+ if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
+ !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
+ Load1->isIndexed())
+ return 0;
+
+ auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
+ auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
+
+ if (!BaseLocDecomp0.getBase() ||
+ BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
+ !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
+ return 0;
+ if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
+ return -1;
+ if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
+ return 1;
+ return 0;
+ };
+
+ SDValue X;
+ if (N0.getOpcode() == ISD::ADD) {
+ if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
+ int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
+ N0.getOperand(1).getOperand(0));
+ if (IsBefore < 0) {
+ X = N0.getOperand(0);
+ N0 = N0.getOperand(1);
+ } else if (IsBefore > 0) {
+ X = N0.getOperand(1);
+ N0 = N0.getOperand(0);
+ } else
+ return SDValue();
+ } else if (IsVecReduce(N0.getOperand(0))) {
+ X = N0.getOperand(1);
+ N0 = N0.getOperand(0);
+ } else if (IsVecReduce(N0.getOperand(1))) {
+ X = N0.getOperand(0);
+ N0 = N0.getOperand(1);
+ } else
+ return SDValue();
+ } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
+ IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
+ // Note this is backward to how you would expect. We create
+ // add(reduce(load + 16), reduce(load + 0)) so that the
+ // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
+ // the X as VADDV(load + 0)
+ return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
+ } else
+ return SDValue();
+
+ if (!IsVecReduce(N0) || !IsVecReduce(N1))
+ return SDValue();
+
+ if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
+ return SDValue();
+
+ // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
+ SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
+ return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
+ };
+ if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
+ return R;
+ if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
+ return R;
+ return SDValue();
+}
+
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
- if (!Subtarget->hasMVEIntegerOps() || N->getValueType(0) != MVT::i64)
+ if (!Subtarget->hasMVEIntegerOps())
return SDValue();
+ if (SDValue R = TryDistrubutionADDVecReduce(N, DAG))
+ return R;
+
+ EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ if (VT != MVT::i64)
+ return SDValue();
// We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
// will look like:
@@ -13090,7 +13315,6 @@ static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
return SDValue();
- SDLoc dl(N);
if (VecRed->getOpcode() == OpcodeA) {
// add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
@@ -14732,6 +14956,7 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDValue Op0 = Ext.getOperand(0);
EVT VecVT = Op0.getValueType();
+ unsigned ResNo = Op0.getResNo();
unsigned Lane = Ext.getConstantOperandVal(1);
if (VecVT.getVectorNumElements() != 4)
return SDValue();
@@ -14740,7 +14965,8 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(V->getOperand(1)) &&
- V->getConstantOperandVal(1) == Lane + 1;
+ V->getConstantOperandVal(1) == Lane + 1 &&
+ V->getOperand(0).getResNo() == ResNo;
});
if (OtherIt == Op0->uses().end())
return SDValue();
@@ -14884,6 +15110,47 @@ static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
Op0->getOperand(0), Op1->getOperand(0));
}
+static SDValue
+PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ uint64_t IdxVal = N->getConstantOperandVal(2);
+ EVT VecVT = Vec.getValueType();
+ EVT SubVT = SubVec.getValueType();
+
+ // Only do this for legal fixed vector types.
+ if (!VecVT.isFixedLengthVector() ||
+ !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
+ !DCI.DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
+ return SDValue();
+
+ // Ignore widening patterns.
+ if (IdxVal == 0 && Vec.isUndef())
+ return SDValue();
+
+ // Subvector must be half the width and an "aligned" insertion.
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+ if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
+ (IdxVal != 0 && IdxVal != NumSubElts))
+ return SDValue();
+
+ // Fold insert_subvector -> concat_vectors
+ // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
+ // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ if (IdxVal == 0) {
+ Lo = SubVec;
+ Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
+ } else {
+ Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
+ DCI.DAG.getVectorIdxConstant(0, DL));
+ Hi = SubVec;
+ }
+ return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
+}
+
// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N,
SelectionDAG &DAG) {
@@ -14965,6 +15232,390 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
DAG.getUNDEF(VT), NewMask);
}
+/// Load/store instruction that can be merged with a base address
+/// update
+struct BaseUpdateTarget {
+ SDNode *N;
+ bool isIntrinsic;
+ bool isStore;
+ unsigned AddrOpIdx;
+};
+
+struct BaseUpdateUser {
+ /// Instruction that updates a pointer
+ SDNode *N;
+ /// Pointer increment operand
+ SDValue Inc;
+ /// Pointer increment value if it is a constant, or 0 otherwise
+ unsigned ConstInc;
+};
+
+static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target,
+ struct BaseUpdateUser &User,
+ bool SimpleConstIncOnly,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDNode *N = Target.N;
+ MemSDNode *MemN = cast<MemSDNode>(N);
+ SDLoc dl(N);
+
+ // Find the new opcode for the updating load/store.
+ bool isLoadOp = true;
+ bool isLaneOp = false;
+ // Workaround for vst1x and vld1x intrinsics which do not have alignment
+ // as an operand.
+ bool hasAlignment = true;
+ unsigned NewOpc = 0;
+ unsigned NumVecs = 0;
+ if (Target.isIntrinsic) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default:
+ llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::arm_neon_vld1:
+ NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1;
+ break;
+ case Intrinsic::arm_neon_vld2:
+ NewOpc = ARMISD::VLD2_UPD;
+ NumVecs = 2;
+ break;
+ case Intrinsic::arm_neon_vld3:
+ NewOpc = ARMISD::VLD3_UPD;
+ NumVecs = 3;
+ break;
+ case Intrinsic::arm_neon_vld4:
+ NewOpc = ARMISD::VLD4_UPD;
+ NumVecs = 4;
+ break;
+ case Intrinsic::arm_neon_vld1x2:
+ NewOpc = ARMISD::VLD1x2_UPD;
+ NumVecs = 2;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vld1x3:
+ NewOpc = ARMISD::VLD1x3_UPD;
+ NumVecs = 3;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vld1x4:
+ NewOpc = ARMISD::VLD1x4_UPD;
+ NumVecs = 4;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vld2dup:
+ NewOpc = ARMISD::VLD2DUP_UPD;
+ NumVecs = 2;
+ break;
+ case Intrinsic::arm_neon_vld3dup:
+ NewOpc = ARMISD::VLD3DUP_UPD;
+ NumVecs = 3;
+ break;
+ case Intrinsic::arm_neon_vld4dup:
+ NewOpc = ARMISD::VLD4DUP_UPD;
+ NumVecs = 4;
+ break;
+ case Intrinsic::arm_neon_vld2lane:
+ NewOpc = ARMISD::VLD2LN_UPD;
+ NumVecs = 2;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vld3lane:
+ NewOpc = ARMISD::VLD3LN_UPD;
+ NumVecs = 3;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vld4lane:
+ NewOpc = ARMISD::VLD4LN_UPD;
+ NumVecs = 4;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst1:
+ NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst2:
+ NewOpc = ARMISD::VST2_UPD;
+ NumVecs = 2;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst3:
+ NewOpc = ARMISD::VST3_UPD;
+ NumVecs = 3;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst4:
+ NewOpc = ARMISD::VST4_UPD;
+ NumVecs = 4;
+ isLoadOp = false;
+ break;
+ case Intrinsic::arm_neon_vst2lane:
+ NewOpc = ARMISD::VST2LN_UPD;
+ NumVecs = 2;
+ isLoadOp = false;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst3lane:
+ NewOpc = ARMISD::VST3LN_UPD;
+ NumVecs = 3;
+ isLoadOp = false;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst4lane:
+ NewOpc = ARMISD::VST4LN_UPD;
+ NumVecs = 4;
+ isLoadOp = false;
+ isLaneOp = true;
+ break;
+ case Intrinsic::arm_neon_vst1x2:
+ NewOpc = ARMISD::VST1x2_UPD;
+ NumVecs = 2;
+ isLoadOp = false;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vst1x3:
+ NewOpc = ARMISD::VST1x3_UPD;
+ NumVecs = 3;
+ isLoadOp = false;
+ hasAlignment = false;
+ break;
+ case Intrinsic::arm_neon_vst1x4:
+ NewOpc = ARMISD::VST1x4_UPD;
+ NumVecs = 4;
+ isLoadOp = false;
+ hasAlignment = false;
+ break;
+ }
+ } else {
+ isLaneOp = true;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("unexpected opcode for Neon base update");
+ case ARMISD::VLD1DUP:
+ NewOpc = ARMISD::VLD1DUP_UPD;
+ NumVecs = 1;
+ break;
+ case ARMISD::VLD2DUP:
+ NewOpc = ARMISD::VLD2DUP_UPD;
+ NumVecs = 2;
+ break;
+ case ARMISD::VLD3DUP:
+ NewOpc = ARMISD::VLD3DUP_UPD;
+ NumVecs = 3;
+ break;
+ case ARMISD::VLD4DUP:
+ NewOpc = ARMISD::VLD4DUP_UPD;
+ NumVecs = 4;
+ break;
+ case ISD::LOAD:
+ NewOpc = ARMISD::VLD1_UPD;
+ NumVecs = 1;
+ isLaneOp = false;
+ break;
+ case ISD::STORE:
+ NewOpc = ARMISD::VST1_UPD;
+ NumVecs = 1;
+ isLaneOp = false;
+ isLoadOp = false;
+ break;
+ }
+ }
+
+ // Find the size of memory referenced by the load/store.
+ EVT VecTy;
+ if (isLoadOp) {
+ VecTy = N->getValueType(0);
+ } else if (Target.isIntrinsic) {
+ VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
+ } else {
+ assert(Target.isStore &&
+ "Node has to be a load, a store, or an intrinsic!");
+ VecTy = N->getOperand(1).getValueType();
+ }
+
+ bool isVLDDUPOp =
+ NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
+ NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
+
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (isLaneOp || isVLDDUPOp)
+ NumBytes /= VecTy.getVectorNumElements();
+
+ if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
+ // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+ // separate instructions that make it harder to use a non-constant update.
+ return false;
+ }
+
+ if (SimpleConstIncOnly && User.ConstInc != NumBytes)
+ return false;
+
+ // OK, we found an ADD we can fold into the base update.
+ // Now, create a _UPD node, taking care of not breaking alignment.
+
+ EVT AlignedVecTy = VecTy;
+ unsigned Alignment = MemN->getAlignment();
+
+ // If this is a less-than-standard-aligned load/store, change the type to
+ // match the standard alignment.
+ // The alignment is overlooked when selecting _UPD variants; and it's
+ // easier to introduce bitcasts here than fix that.
+ // There are 3 ways to get to this base-update combine:
+ // - intrinsics: they are assumed to be properly aligned (to the standard
+ // alignment of the memory type), so we don't need to do anything.
+ // - ARMISD::VLDx nodes: they are only generated from the aforementioned
+ // intrinsics, so, likewise, there's nothing to do.
+ // - generic load/store instructions: the alignment is specified as an
+ // explicit operand, rather than implicitly as the standard alignment
+ // of the memory type (like the intrisics). We need to change the
+ // memory type to match the explicit alignment. That way, we don't
+ // generate non-standard-aligned ARMISD::VLDx nodes.
+ if (isa<LSBaseSDNode>(N)) {
+ if (Alignment == 0)
+ Alignment = 1;
+ if (Alignment < VecTy.getScalarSizeInBits() / 8) {
+ MVT EltTy = MVT::getIntegerVT(Alignment * 8);
+ assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
+ assert(!isLaneOp && "Unexpected generic load/store lane.");
+ unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
+ AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
+ }
+ // Don't set an explicit alignment on regular load/stores that we want
+ // to transform to VLD/VST 1_UPD nodes.
+ // This matches the behavior of regular load/stores, which only get an
+ // explicit alignment if the MMO alignment is larger than the standard
+ // alignment of the memory type.
+ // Intrinsics, however, always get an explicit alignment, set to the
+ // alignment of the MMO.
+ Alignment = 1;
+ }
+
+ // Create the new updating load/store node.
+ // First, create an SDVTList for the new updating node's results.
+ EVT Tys[6];
+ unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
+ unsigned n;
+ for (n = 0; n < NumResultVecs; ++n)
+ Tys[n] = AlignedVecTy;
+ Tys[n++] = MVT::i32;
+ Tys[n] = MVT::Other;
+ SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
+
+ // Then, gather the new node's operands.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // incoming chain
+ Ops.push_back(N->getOperand(Target.AddrOpIdx));
+ Ops.push_back(User.Inc);
+
+ if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
+ // Try to match the intrinsic's signature
+ Ops.push_back(StN->getValue());
+ } else {
+ // Loads (and of course intrinsics) match the intrinsics' signature,
+ // so just add all but the alignment operand.
+ unsigned LastOperand =
+ hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
+ for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
+ Ops.push_back(N->getOperand(i));
+ }
+
+ // For all node types, the alignment operand is always the last one.
+ Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+
+ // If this is a non-standard-aligned STORE, the penultimate operand is the
+ // stored value. Bitcast it to the aligned type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
+ SDValue &StVal = Ops[Ops.size() - 2];
+ StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
+ }
+
+ EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
+ MemN->getMemOperand());
+
+ // Update the uses.
+ SmallVector<SDValue, 5> NewResults;
+ for (unsigned i = 0; i < NumResultVecs; ++i)
+ NewResults.push_back(SDValue(UpdN.getNode(), i));
+
+ // If this is an non-standard-aligned LOAD, the first result is the loaded
+ // value. Bitcast it to the expected result type.
+ if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
+ SDValue &LdVal = NewResults[0];
+ LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
+ }
+
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
+ DCI.CombineTo(N, NewResults);
+ DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
+
+ return true;
+}
+
+// If (opcode ptr inc) is and ADD-like instruction, return the
+// increment value. Otherwise return 0.
+static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
+ SDValue Inc, const SelectionDAG &DAG) {
+ ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
+ if (!CInc)
+ return 0;
+
+ switch (Opcode) {
+ case ARMISD::VLD1_UPD:
+ case ISD::ADD:
+ return CInc->getZExtValue();
+ case ISD::OR: {
+ if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
+ // (OR ptr inc) is the same as (ADD ptr inc)
+ return CInc->getZExtValue();
+ }
+ return 0;
+ }
+ default:
+ return 0;
+ }
+}
+
+static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc) {
+ switch (N->getOpcode()) {
+ case ISD::ADD:
+ case ISD::OR: {
+ if (isa<ConstantSDNode>(N->getOperand(1))) {
+ *Ptr = N->getOperand(0);
+ *CInc = N->getOperand(1);
+ return true;
+ }
+ return false;
+ }
+ case ARMISD::VLD1_UPD: {
+ if (isa<ConstantSDNode>(N->getOperand(2))) {
+ *Ptr = N->getOperand(1);
+ *CInc = N->getOperand(2);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool isValidBaseUpdate(SDNode *N, SDNode *User) {
+ // Check that the add is independent of the load/store.
+ // Otherwise, folding it would create a cycle. Search through Addr
+ // as well, since the User may not be a direct user of Addr and
+ // only share a base pointer.
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 16> Worklist;
+ Worklist.push_back(N);
+ Worklist.push_back(User);
+ if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(User, Visited, Worklist))
+ return false;
+ return true;
+}
+
/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
/// NEON load/store intrinsics, and generic vector load/stores, to merge
/// base address updates.
@@ -14972,237 +15623,89 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
/// The caller is assumed to have checked legality.
static SDValue CombineBaseUpdate(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
const bool isStore = N->getOpcode() == ISD::STORE;
const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
+ BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
+
SDValue Addr = N->getOperand(AddrOpIdx);
- MemSDNode *MemN = cast<MemSDNode>(N);
- SDLoc dl(N);
+
+ SmallVector<BaseUpdateUser, 8> BaseUpdates;
// Search for a use of the address operand that is an increment.
for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
- if (User->getOpcode() != ISD::ADD ||
- UI.getUse().getResNo() != Addr.getResNo())
- continue;
-
- // Check that the add is independent of the load/store. Otherwise, folding
- // it would create a cycle. We can avoid searching through Addr as it's a
- // predecessor to both.
- SmallPtrSet<const SDNode *, 32> Visited;
- SmallVector<const SDNode *, 16> Worklist;
- Visited.insert(Addr.getNode());
- Worklist.push_back(N);
- Worklist.push_back(User);
- if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
- SDNode::hasPredecessorHelper(User, Visited, Worklist))
+ if (UI.getUse().getResNo() != Addr.getResNo() ||
+ User->getNumOperands() != 2)
continue;
- // Find the new opcode for the updating load/store.
- bool isLoadOp = true;
- bool isLaneOp = false;
- // Workaround for vst1x and vld1x intrinsics which do not have alignment
- // as an operand.
- bool hasAlignment = true;
- unsigned NewOpc = 0;
- unsigned NumVecs = 0;
- if (isIntrinsic) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- switch (IntNo) {
- default: llvm_unreachable("unexpected intrinsic for Neon base update");
- case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD;
- NumVecs = 1; break;
- case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD;
- NumVecs = 2; break;
- case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD;
- NumVecs = 3; break;
- case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD;
- NumVecs = 4; break;
- case Intrinsic::arm_neon_vld1x2: NewOpc = ARMISD::VLD1x2_UPD;
- NumVecs = 2; hasAlignment = false; break;
- case Intrinsic::arm_neon_vld1x3: NewOpc = ARMISD::VLD1x3_UPD;
- NumVecs = 3; hasAlignment = false; break;
- case Intrinsic::arm_neon_vld1x4: NewOpc = ARMISD::VLD1x4_UPD;
- NumVecs = 4; hasAlignment = false; break;
- case Intrinsic::arm_neon_vld2dup: NewOpc = ARMISD::VLD2DUP_UPD;
- NumVecs = 2; break;
- case Intrinsic::arm_neon_vld3dup: NewOpc = ARMISD::VLD3DUP_UPD;
- NumVecs = 3; break;
- case Intrinsic::arm_neon_vld4dup: NewOpc = ARMISD::VLD4DUP_UPD;
- NumVecs = 4; break;
- case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
- NumVecs = 2; isLaneOp = true; break;
- case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
- NumVecs = 3; isLaneOp = true; break;
- case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
- NumVecs = 4; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD;
- NumVecs = 1; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD;
- NumVecs = 2; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD;
- NumVecs = 3; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD;
- NumVecs = 4; isLoadOp = false; break;
- case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
- NumVecs = 2; isLoadOp = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
- NumVecs = 3; isLoadOp = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
- NumVecs = 4; isLoadOp = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst1x2: NewOpc = ARMISD::VST1x2_UPD;
- NumVecs = 2; isLoadOp = false; hasAlignment = false; break;
- case Intrinsic::arm_neon_vst1x3: NewOpc = ARMISD::VST1x3_UPD;
- NumVecs = 3; isLoadOp = false; hasAlignment = false; break;
- case Intrinsic::arm_neon_vst1x4: NewOpc = ARMISD::VST1x4_UPD;
- NumVecs = 4; isLoadOp = false; hasAlignment = false; break;
- }
- } else {
- isLaneOp = true;
- switch (N->getOpcode()) {
- default: llvm_unreachable("unexpected opcode for Neon base update");
- case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
- case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
- case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
- case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
- case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD;
- NumVecs = 1; isLaneOp = false; break;
- case ISD::STORE: NewOpc = ARMISD::VST1_UPD;
- NumVecs = 1; isLaneOp = false; isLoadOp = false; break;
- }
- }
-
- // Find the size of memory referenced by the load/store.
- EVT VecTy;
- if (isLoadOp) {
- VecTy = N->getValueType(0);
- } else if (isIntrinsic) {
- VecTy = N->getOperand(AddrOpIdx+1).getValueType();
- } else {
- assert(isStore && "Node has to be a load, a store, or an intrinsic!");
- VecTy = N->getOperand(1).getValueType();
- }
-
- bool isVLDDUPOp =
- NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
- NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
-
- unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
- if (isLaneOp || isVLDDUPOp)
- NumBytes /= VecTy.getVectorNumElements();
-
- // If the increment is a constant, it must match the memory ref size.
- SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
- ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
- if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) {
- // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
- // separate instructions that make it harder to use a non-constant update.
- continue;
- }
+ SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
+ unsigned ConstInc =
+ getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
- // OK, we found an ADD we can fold into the base update.
- // Now, create a _UPD node, taking care of not breaking alignment.
-
- EVT AlignedVecTy = VecTy;
- unsigned Alignment = MemN->getAlignment();
-
- // If this is a less-than-standard-aligned load/store, change the type to
- // match the standard alignment.
- // The alignment is overlooked when selecting _UPD variants; and it's
- // easier to introduce bitcasts here than fix that.
- // There are 3 ways to get to this base-update combine:
- // - intrinsics: they are assumed to be properly aligned (to the standard
- // alignment of the memory type), so we don't need to do anything.
- // - ARMISD::VLDx nodes: they are only generated from the aforementioned
- // intrinsics, so, likewise, there's nothing to do.
- // - generic load/store instructions: the alignment is specified as an
- // explicit operand, rather than implicitly as the standard alignment
- // of the memory type (like the intrisics). We need to change the
- // memory type to match the explicit alignment. That way, we don't
- // generate non-standard-aligned ARMISD::VLDx nodes.
- if (isa<LSBaseSDNode>(N)) {
- if (Alignment == 0)
- Alignment = 1;
- if (Alignment < VecTy.getScalarSizeInBits() / 8) {
- MVT EltTy = MVT::getIntegerVT(Alignment * 8);
- assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
- assert(!isLaneOp && "Unexpected generic load/store lane.");
- unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
- AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
- }
- // Don't set an explicit alignment on regular load/stores that we want
- // to transform to VLD/VST 1_UPD nodes.
- // This matches the behavior of regular load/stores, which only get an
- // explicit alignment if the MMO alignment is larger than the standard
- // alignment of the memory type.
- // Intrinsics, however, always get an explicit alignment, set to the
- // alignment of the MMO.
- Alignment = 1;
- }
+ if (ConstInc || User->getOpcode() == ISD::ADD)
+ BaseUpdates.push_back({User, Inc, ConstInc});
+ }
- // Create the new updating load/store node.
- // First, create an SDVTList for the new updating node's results.
- EVT Tys[6];
- unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
- unsigned n;
- for (n = 0; n < NumResultVecs; ++n)
- Tys[n] = AlignedVecTy;
- Tys[n++] = MVT::i32;
- Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2));
+ // If the address is a constant pointer increment itself, find
+ // another constant increment that has the same base operand
+ SDValue Base;
+ SDValue CInc;
+ if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
+ unsigned Offset =
+ getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
+ for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
+ UI != UE; ++UI) {
- // Then, gather the new node's operands.
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(N->getOperand(0)); // incoming chain
- Ops.push_back(N->getOperand(AddrOpIdx));
- Ops.push_back(Inc);
+ SDNode *User = *UI;
+ if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
+ User->getNumOperands() != 2)
+ continue;
- if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
- // Try to match the intrinsic's signature
- Ops.push_back(StN->getValue());
- } else {
- // Loads (and of course intrinsics) match the intrinsics' signature,
- // so just add all but the alignment operand.
- unsigned LastOperand =
- hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
- for (unsigned i = AddrOpIdx + 1; i < LastOperand; ++i)
- Ops.push_back(N->getOperand(i));
- }
+ SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
+ unsigned UserOffset =
+ getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
- // For all node types, the alignment operand is always the last one.
- Ops.push_back(DAG.getConstant(Alignment, dl, MVT::i32));
+ if (!UserOffset || UserOffset <= Offset)
+ continue;
- // If this is a non-standard-aligned STORE, the penultimate operand is the
- // stored value. Bitcast it to the aligned type.
- if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
- SDValue &StVal = Ops[Ops.size()-2];
- StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
+ unsigned NewConstInc = UserOffset - Offset;
+ SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
+ BaseUpdates.push_back({User, NewInc, NewConstInc});
}
+ }
- EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
- MemN->getMemOperand());
-
- // Update the uses.
- SmallVector<SDValue, 5> NewResults;
- for (unsigned i = 0; i < NumResultVecs; ++i)
- NewResults.push_back(SDValue(UpdN.getNode(), i));
-
- // If this is an non-standard-aligned LOAD, the first result is the loaded
- // value. Bitcast it to the expected result type.
- if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
- SDValue &LdVal = NewResults[0];
- LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
+ // Try to fold the load/store with an update that matches memory
+ // access size. This should work well for sequential loads.
+ //
+ // Filter out invalid updates as well.
+ unsigned NumValidUpd = BaseUpdates.size();
+ for (unsigned I = 0; I < NumValidUpd;) {
+ BaseUpdateUser &User = BaseUpdates[I];
+ if (!isValidBaseUpdate(N, User.N)) {
+ --NumValidUpd;
+ std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
+ continue;
}
- NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
- DCI.CombineTo(N, NewResults);
- DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
-
- break;
+ if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
+ return SDValue();
+ ++I;
+ }
+ BaseUpdates.resize(NumValidUpd);
+
+ // Try to fold with other users. Non-constant updates are considered
+ // first, and constant updates are sorted to not break a sequence of
+ // strided accesses (if there is any).
+ std::sort(BaseUpdates.begin(), BaseUpdates.end(),
+ [](BaseUpdateUser &LHS, BaseUpdateUser &RHS) {
+ return LHS.ConstInc < RHS.ConstInc;
+ });
+ for (BaseUpdateUser &User : BaseUpdates) {
+ if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
+ return SDValue();
}
return SDValue();
}
@@ -15502,11 +16005,12 @@ static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG,
}
static SDValue PerformLOADCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
EVT VT = N->getValueType(0);
// If this is a legal vector load, try to combine it into a VLD1_UPD.
- if (ISD::isNormalLoad(N) && VT.isVector() &&
+ if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT))
return CombineBaseUpdate(N, DCI);
@@ -15976,6 +16480,15 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
SDLoc dl(N);
+ // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
+ if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
+ (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
+ N0.getValueType() == MVT::v16i8)) {
+ SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
+ SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
+ }
+
// We are looking for something that will have illegal types if left alone,
// but that we can convert to a single instruction under MVE. For example
// vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
@@ -16124,38 +16637,8 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
SDValue(Node.getNode(), 1));
};
- if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
- return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
- if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
- return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
- if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
- return Create64bitNode(ARMISD::VADDLVs, {A});
- if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
- return Create64bitNode(ARMISD::VADDLVu, {A});
- if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
- if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
-
- SDValue Mask;
- if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
- return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
- if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
- return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
- if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
- return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
- if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
- return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
- if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
- if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
- return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
- DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
-
SDValue A, B;
+ SDValue Mask;
if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
@@ -16192,6 +16675,36 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
+ if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
+ return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
+ if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
+ return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
+ if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
+ return Create64bitNode(ARMISD::VADDLVs, {A});
+ if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
+ return Create64bitNode(ARMISD::VADDLVu, {A});
+ if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
+ if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
+
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
+ return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
+ return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
+ if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
+ return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
+ DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
+
// Some complications. We can get a case where the two inputs of the mul are
// the same, then the output sext will have been helpfully converted to a
// zext. Turn it back.
@@ -16978,7 +17491,7 @@ static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
if (!Const)
return SDValue();
- if (Const->isNullValue())
+ if (Const->isZero())
Imm = 0;
else if (Const->isOne())
Imm = 1;
@@ -17030,7 +17543,7 @@ static SDValue PerformHWLoopCombine(SDNode *N,
Cond = N->getOperand(2);
Dest = N->getOperand(4);
if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
- if (!Const->isOne() && !Const->isNullValue())
+ if (!Const->isOne() && !Const->isZero())
return SDValue();
Imm = Const->getZExtValue();
} else
@@ -17685,6 +18198,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::EXTRACT_VECTOR_ELT:
return PerformExtractEltCombine(N, DCI, Subtarget);
case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG);
+ case ISD::INSERT_SUBVECTOR: return PerformInsertSubvectorCombine(N, DCI);
case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
@@ -17710,9 +18224,12 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SMAX:
case ISD::UMAX:
return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
- case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
- case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
- case ISD::LOAD: return PerformLOADCombine(N, DCI);
+ case ARMISD::CMOV:
+ return PerformCMOVCombine(N, DCI.DAG);
+ case ARMISD::BRCOND:
+ return PerformBRCONDCombine(N, DCI.DAG);
+ case ISD::LOAD:
+ return PerformLOADCombine(N, DCI, Subtarget);
case ARMISD::VLD1DUP:
case ARMISD::VLD2DUP:
case ARMISD::VLD3DUP:
@@ -17929,7 +18446,7 @@ EVT ARMTargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
// See if we can use NEON instructions for this...
if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
- !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
bool Fast;
if (Op.size() >= 16 &&
(Op.isAligned(Align(16)) ||
@@ -18086,18 +18603,27 @@ bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
return !IsFMS(I);
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
case Intrinsic::arm_mve_add_predicated:
case Intrinsic::arm_mve_mul_predicated:
case Intrinsic::arm_mve_qadd_predicated:
+ case Intrinsic::arm_mve_vhadd:
case Intrinsic::arm_mve_hadd_predicated:
+ case Intrinsic::arm_mve_vqdmull:
case Intrinsic::arm_mve_vqdmull_predicated:
+ case Intrinsic::arm_mve_vqdmulh:
case Intrinsic::arm_mve_qdmulh_predicated:
+ case Intrinsic::arm_mve_vqrdmulh:
case Intrinsic::arm_mve_qrdmulh_predicated:
case Intrinsic::arm_mve_fma_predicated:
return true;
+ case Intrinsic::ssub_sat:
+ case Intrinsic::usub_sat:
case Intrinsic::arm_mve_sub_predicated:
case Intrinsic::arm_mve_qsub_predicated:
case Intrinsic::arm_mve_hsub_predicated:
+ case Intrinsic::arm_mve_vhsub:
return Operand == 1;
default:
return false;
@@ -18508,6 +19034,31 @@ bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
return AbsImm >= 0 && AbsImm <= 255;
}
+// Return false to prevent folding
+// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
+// if the folding leads to worse code.
+bool ARMTargetLowering::isMulAddWithConstProfitable(
+ const SDValue &AddNode, const SDValue &ConstNode) const {
+ // Let the DAGCombiner decide for vector types and large types.
+ const EVT VT = AddNode.getValueType();
+ if (VT.isVector() || VT.getScalarSizeInBits() > 32)
+ return true;
+
+ // It is worse if c0 is legal add immediate, while c1*c0 is not
+ // and has to be composed by at least two instructions.
+ const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
+ const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
+ const int64_t C0 = C0Node->getSExtValue();
+ APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
+ if (!isLegalAddImmediate(C0) || isLegalAddImmediate(CA.getSExtValue()))
+ return true;
+ if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
+ return false;
+
+ // Default to true and let the DAGCombiner decide.
+ return true;
+}
+
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,
@@ -19015,8 +19566,8 @@ bool ARMTargetLowering::SimplifyDemandedBitsForTargetNode(
if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
isa<ConstantSDNode>(Op->getOperand(2))) {
unsigned ShAmt = Op->getConstantOperandVal(2);
- if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(
- APInt::getAllOnesValue(32) << (32 - ShAmt)))
+ if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
+ << (32 - ShAmt)))
return TLO.CombineTo(
Op, TLO.DAG.getNode(
ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
@@ -19760,7 +20311,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
@@ -19774,7 +20325,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
Info.align.reset();
// volatile loads with NEON intrinsics not supported
@@ -19792,7 +20343,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors stored.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
@@ -19801,7 +20352,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+ Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
@@ -19814,7 +20365,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
// Conservatively set memVT to the entire set of vectors stored.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
Type *ArgTy = I.getArgOperand(ArgI)->getType();
if (!ArgTy->isVectorTy())
break;
@@ -20128,10 +20679,7 @@ bool ARMTargetLowering::shouldInsertFencesForAtomic(
return InsertFencesForAtomic;
}
-// This has so far only been implemented for MachO.
-bool ARMTargetLowering::useLoadStackGuardNode() const {
- return Subtarget->isTargetMachO();
-}
+bool ARMTargetLowering::useLoadStackGuardNode() const { return true; }
void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
@@ -20146,7 +20694,7 @@ void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
"__security_check_cookie", Type::getVoidTy(M.getContext()),
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
- F->addAttribute(1, Attribute::AttrKind::InReg);
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
}
Value *ARMTargetLowering::getSDagStackGuard(const Module &M) const {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 844b7d4f1707..0fddd58e178e 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -680,7 +680,7 @@ class VectorType;
unsigned &Cost) const override;
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const override {
+ const MachineFunction &MF) const override {
// Do not merge to larger than i32.
return (MemVT.getSizeInBits() <= 32);
}
@@ -712,6 +712,9 @@ class VectorType;
Align Alignment,
const DataLayout &DL) const;
+ bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const override;
+
bool alignLoopsWithOptSize() const override;
/// Returns the number of interleaved accesses that will be generated when
diff --git a/llvm/lib/Target/ARM/ARMInstrCDE.td b/llvm/lib/Target/ARM/ARMInstrCDE.td
index 0e97668e2e01..54e27a6be558 100644
--- a/llvm/lib/Target/ARM/ARMInstrCDE.td
+++ b/llvm/lib/Target/ARM/ARMInstrCDE.td
@@ -612,14 +612,14 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec MQPR:$inactive), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX1_vec p_imm:$coproc, imm_12b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred),
+ (VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx1qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX1A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
imm_12b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx2q_predicated timm:$coproc,
(VTI.Vec MQPR:$inactive),
@@ -627,7 +627,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX2_vec p_imm:$coproc, (v16i8 MQPR:$n),
imm_7b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred),
+ (VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx2qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc),
@@ -635,7 +635,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX2A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
(v16i8 MQPR:$n), timm:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx3q_predicated timm:$coproc,
(VTI.Vec MQPR:$inactive),
@@ -645,7 +645,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec (CDE_VCX3_vec p_imm:$coproc, (v16i8 MQPR:$n),
(v16i8 MQPR:$m),
imm_4b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred),
+ (VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx3qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc),
@@ -654,7 +654,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec (CDE_VCX3A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
(v16i8 MQPR:$n), (v16i8 MQPR:$m),
imm_4b:$imm, ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
}
let Predicates = [HasCDE, HasMVEInt] in
diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td
index 85da7c5a535e..de351372abf2 100644
--- a/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -249,10 +249,10 @@ def VPTPredROperand : AsmOperandClass {
// Base class for both kinds of vpred.
class vpred_ops<dag extra_op, dag extra_mi> : OperandWithDefaultOps<OtherVT,
- !con((ops (i32 0), (i32 zero_reg)), extra_op)> {
+ !con((ops (i32 0), (i32 zero_reg), (i32 zero_reg)), extra_op)> {
let PrintMethod = "printVPTPredicateOperand";
let OperandNamespace = "ARM";
- let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg), extra_mi);
+ let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg, GPRlr:$tp_reg), extra_mi);
// For convenience, we provide a string value that can be appended
// to the constraints string. It's empty for vpred_n, and for
@@ -408,6 +408,7 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
// in an IT block).
bit thumbArithFlagSetting = 0;
+ bits<2> VecSize = 0;
bit validForTailPredication = 0;
bit retainsPreviousHalfElement = 0;
bit horizontalReduction = 0;
@@ -428,6 +429,7 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
let TSFlags{21} = retainsPreviousHalfElement;
let TSFlags{22} = horizontalReduction;
let TSFlags{23} = doubleWidthResult;
+ let TSFlags{25-24} = VecSize;
let Constraints = cstr;
let Itinerary = itin;
@@ -1385,8 +1387,8 @@ class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
}
class T2I<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
+ string opc, string asm, list<dag> pattern, AddrMode am = AddrModeNone>
+ : Thumb2I<oops, iops, am, 4, itin, opc, asm, "", pattern>;
class T2Ii12<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 3c6c6960b80f..5dee5e04af81 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -95,8 +95,17 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
const TargetMachine &TM = MF.getTarget();
+ Module &M = *MF.getFunction().getParent();
- if (!Subtarget.useMovt()) {
+ if (M.getStackProtectorGuard() == "tls") {
+ expandLoadStackGuardBase(MI, ARM::MRC, ARM::LDRi12);
+ return;
+ }
+
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+ if (!Subtarget.useMovt() || Subtarget.isGVInGOT(GV)) {
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12);
else
@@ -109,9 +118,6 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
return;
}
- const GlobalValue *GV =
- cast<GlobalValue>((*MI->memoperands_begin())->getValue());
-
if (!Subtarget.isGVIndirectSymbol(GV)) {
expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12);
return;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 7466cecb9b33..7d0bc756e882 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -1252,7 +1252,7 @@ def addrmode5_pre : AddrMode5 {
// addrmode5fp16 := reg +/- imm8*2
//
def AddrMode5FP16AsmOperand : AsmOperandClass { let Name = "AddrMode5FP16"; }
-class AddrMode5FP16 : Operand<i32>,
+class AddrMode5FP16 : MemOperand,
ComplexPattern<i32, 2, "SelectAddrMode5FP16", []> {
let EncoderMethod = "getAddrMode5FP16OpValue";
let DecoderMethod = "DecodeAddrMode5FP16Operand";
@@ -1589,7 +1589,7 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
let TwoOperandAliasConstraint = "$Rn = $Rd" in
multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
- SDNode opnode, bit Commutable = 0> {
+ SDNode opnode> {
// The register-immediate version is re-materializable. This is useful
// in particular for taking the address of a local.
let isReMaterializable = 1 in {
@@ -1693,9 +1693,8 @@ multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG
/// operands are reversed.
let hasPostISelHook = 1, Defs = [CPSR] in {
-multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
- InstrItinClass iis, SDNode opnode,
- bit Commutable = 0> {
+multiclass AsI1_rbin_s_is<InstrItinClass iii,
+ InstrItinClass iis, SDNode opnode> {
def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p),
4, iii,
[(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>,
@@ -3853,7 +3852,7 @@ defm RSB : AsI1_rbin_irs<0b0011, "rsb",
// FIXME: Eliminate them if we can write def : Pat patterns which defines
// CPSR and the implicit def of CPSR is not needed.
-defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUsr, ARMsubc>;
defm RSC : AI1_rsc_irs<0b0111, "rsc", ARMsube>;
@@ -5391,14 +5390,16 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
}
class ACI<dag oops, dag iops, string opc, string asm,
- list<dag> pattern, IndexMode im = IndexModeNone>
- : I<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ list<dag> pattern, IndexMode im = IndexModeNone,
+ AddrMode am = AddrModeNone>
+ : I<oops, iops, am, 4, im, BrFrm, NoItinerary,
opc, asm, "", pattern> {
let Inst{27-25} = 0b110;
}
class ACInoP<dag oops, dag iops, string opc, string asm,
- list<dag> pattern, IndexMode im = IndexModeNone>
- : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
+ list<dag> pattern, IndexMode im = IndexModeNone,
+ AddrMode am = AddrModeNone>
+ : InoP<oops, iops, am, 4, im, BrFrm, NoItinerary,
opc, asm, "", pattern> {
let Inst{31-28} = 0b1111;
let Inst{27-25} = 0b110;
@@ -5407,7 +5408,8 @@ class ACInoP<dag oops, dag iops, string opc, string asm,
let DecoderNamespace = "CoProc" in {
multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr", pattern> {
+ asm, "\t$cop, $CRd, $addr", pattern, IndexModeNone,
+ AddrMode5> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -5478,7 +5480,8 @@ multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> {
}
multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : ACInoP<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr", pattern> {
+ asm, "\t$cop, $CRd, $addr", pattern, IndexModeNone,
+ AddrMode5> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 372893814092..697730037277 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -97,7 +97,7 @@ def VecList2QAsmOperand : AsmOperandClass {
"q-registers in range [q0,q7]";
}
-def VecList2Q : RegisterOperand<QQPR, "printMVEVectorListTwoQ"> {
+def VecList2Q : RegisterOperand<MQQPR, "printMVEVectorListTwoQ"> {
let ParserMatchClass = VecList2QAsmOperand;
let PrintMethod = "printMVEVectorList<2>";
}
@@ -110,7 +110,7 @@ def VecList4QAsmOperand : AsmOperandClass {
"q-registers in range [q0,q7]";
}
-def VecList4Q : RegisterOperand<QQQQPR, "printMVEVectorListFourQ"> {
+def VecList4Q : RegisterOperand<MQQQQPR, "printMVEVectorListFourQ"> {
let ParserMatchClass = VecList4QAsmOperand;
let PrintMethod = "printMVEVectorList<4>";
}
@@ -332,7 +332,7 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic
(VTI.Vec MQPR:$Qn))),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
// Optionally with the select folded through the op
@@ -341,7 +341,7 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic
(VTI.Vec MQPR:$Qn),
(VTI.Vec IdentityVec))))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$Qm)))>;
}
@@ -350,7 +350,7 @@ multiclass MVE_TwoOpPattern<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrinsic
PredOperands,
(? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -368,7 +368,7 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
(VTI.Vec (ARMvdup rGPR:$Rn)))),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
// Optionally with the select folded through the op
@@ -377,7 +377,7 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
(ARMvdup rGPR:$Rn),
(VTI.Vec IdentityVec))))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$Qm)))>;
}
@@ -386,19 +386,20 @@ multiclass MVE_TwoOpPatternDup<MVEVectorVTInfo VTI, SDPatternOperator Op, Intrin
PredOperands,
(? (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive)))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), rGPR:$Rn,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
// --------- Start of base classes for the instructions themselves
class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
- string ops, string cstr, list<dag> pattern>
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern>
: Thumb2XI<oops, iops, AddrModeNone, 4, itin, !strconcat(asm, "\t", ops), cstr,
pattern>,
Requires<[HasMVEInt]> {
let D = MVEDomain;
let DecoderNamespace = "MVE";
+ let VecSize = vecsize;
}
// MVE_p is used for most predicated instructions, to add the cluster
@@ -406,22 +407,22 @@ class MVE_MI<dag oops, dag iops, InstrItinClass itin, string asm,
// the input predicate register.
class MVE_p<dag oops, dag iops, InstrItinClass itin, string iname,
string suffix, string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
+ bits<2> vecsize, list<dag> pattern=[]>
: MVE_MI<oops, !con(iops, (ins vpred:$vp)), itin,
// If the instruction has a suffix, like vadd.f32, then the
// VPT predication suffix goes before the dot, so the full
// name has to be "vadd${vp}.f32".
!strconcat(iname, "${vp}",
!if(!eq(suffix, ""), "", !strconcat(".", suffix))),
- ops, !strconcat(cstr, vpred.vpred_constraint), pattern> {
+ ops, !strconcat(cstr, vpred.vpred_constraint), vecsize, pattern> {
let Inst{31-29} = 0b111;
let Inst{27-26} = 0b11;
}
class MVE_f<dag oops, dag iops, InstrItinClass itin, string iname,
string suffix, string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, itin, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
let Predicates = [HasMVEFloat];
}
@@ -599,11 +600,11 @@ def MVE_URSHRL : MVE_ScalarShiftDRegImm<"urshrl", 0b01, 0b1>;
class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
string iname, string suffix,
- string ops, string cstr, list<dag> pattern=[]>
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern=[]>
// Always use vpred_n and not vpred_r: with the output register being
// a GPR and not a vector register, there can't be any question of
// what to put in its inactive lanes.
- : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, pattern> {
+ : MVE_p<oops, iops, itin, iname, suffix, ops, vpred_n, cstr, vecsize, pattern> {
let Inst{25-23} = 0b101;
let Inst{11-9} = 0b111;
@@ -613,7 +614,7 @@ class MVE_rDest<dag oops, dag iops, InstrItinClass itin,
class MVE_VABAV<string suffix, bit U, bits<2> size>
: MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm),
NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src",
- []> {
+ size, []> {
bits<4> Qm;
bits<4> Qn;
bits<4> Rda;
@@ -652,7 +653,7 @@ multiclass MVE_VABAV_m<MVEVectorVTInfo VTI> {
(VTI.Pred VCCR:$mask))),
(i32 (Inst (i32 rGPR:$Rda_src),
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -666,7 +667,7 @@ defm MVE_VABAVu32 : MVE_VABAV_m<MVE_v4u32>;
class MVE_VADDV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, bits<2> size, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary,
- iname, suffix, "$Rda, $Qm", cstr, pattern> {
+ iname, suffix, "$Rda, $Qm", cstr, size, pattern> {
bits<3> Qm;
bits<4> Rda;
@@ -710,11 +711,11 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
def : Pat<(i32 (vecreduce_add (VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec MQPR:$vec),
(VTI.Vec ARMimmAllZerosV))))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
def : Pat<(i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
@@ -722,13 +723,13 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
(VTI.Vec MQPR:$vec),
(VTI.Vec ARMimmAllZerosV))))),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (ARMVADDVu (VTI.Vec MQPR:$vec))),
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
def : Pat<(i32 (add (i32 (ARMVADDVpu (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
} else {
def : Pat<(i32 (ARMVADDVs (VTI.Vec MQPR:$vec))),
(i32 (InstN $vec))>;
@@ -736,21 +737,21 @@ multiclass MVE_VADDV_A<MVEVectorVTInfo VTI> {
(i32 tGPREven:$acc))),
(i32 (InstA $acc, $vec))>;
def : Pat<(i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (ARMVADDVps (VTI.Vec MQPR:$vec), (VTI.Pred VCCR:$pred))),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
}
def : Pat<(i32 (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
(i32 VTI.Unsigned),
(VTI.Pred VCCR:$pred))),
- (i32 (InstN $vec, ARMVCCThen, $pred))>;
+ (i32 (InstN $vec, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (int_arm_mve_addv_predicated (VTI.Vec MQPR:$vec),
(i32 VTI.Unsigned),
(VTI.Pred VCCR:$pred)),
(i32 tGPREven:$acc))),
- (i32 (InstA $acc, $vec, ARMVCCThen, $pred))>;
+ (i32 (InstA $acc, $vec, ARMVCCThen, $pred, zero_reg))>;
}
}
@@ -764,7 +765,7 @@ defm MVE_VADDVu32 : MVE_VADDV_A<MVE_v4u32>;
class MVE_VADDLV<string iname, string suffix, dag iops, string cstr,
bit A, bit U, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname,
- suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> {
+ suffix, "$RdaLo, $RdaHi, $Qm", cstr, 0b10, pattern> {
bits<3> Qm;
bits<4> RdaLo;
bits<4> RdaHi;
@@ -821,11 +822,11 @@ multiclass MVE_VADDLV_A<MVEVectorVTInfo VTI> {
def : Pat<(ARMVADDLVA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec)),
(InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec))>;
def : Pat<(ARMVADDLVp (v4i32 MQPR:$vec), (VTI.Pred VCCR:$pred)),
- (InstN (v4i32 MQPR:$vec), ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ (InstN (v4i32 MQPR:$vec), ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg)>;
def : Pat<(ARMVADDLVAp tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
(VTI.Pred VCCR:$pred)),
(InstA tGPREven:$acclo, tGPROdd:$acchi, (v4i32 MQPR:$vec),
- ARMVCCThen, (VTI.Pred VCCR:$pred))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg)>;
}
}
@@ -836,7 +837,7 @@ class MVE_VMINMAXNMV<string iname, string suffix, bit sz,
bit bit_17, bit bit_7, list<dag> pattern=[]>
: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm),
NoItinerary, iname, suffix, "$RdaSrc, $Qm",
- "$RdaDest = $RdaSrc", pattern> {
+ "$RdaDest = $RdaSrc", !if(sz, 0b01, 0b10), pattern> {
bits<3> Qm;
bits<4> RdaDest;
@@ -876,7 +877,7 @@ multiclass MVE_VMINMAXNMV_p<string iname, bit notAbs, bit isMin,
(VTI.Pred VCCR:$pred))),
(COPY_TO_REGCLASS (Inst (COPY_TO_REGCLASS ScalarReg:$prev, rGPR),
(VTI.Vec MQPR:$vec),
- ARMVCCThen, (VTI.Pred VCCR:$pred)),
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg),
ScalarReg)>;
}
}
@@ -897,7 +898,7 @@ defm MVE_VMAXNMAV: MVE_VMINMAXNMV_fty<"vmaxnmav", 0, 0, "int_arm_mve_maxnmav">;
class MVE_VMINMAXV<string iname, string suffix, bit U, bits<2> size,
bit bit_17, bit bit_7, list<dag> pattern=[]>
: MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary,
- iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> {
+ iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", size, pattern> {
bits<3> Qm;
bits<4> RdaDest;
@@ -931,7 +932,7 @@ multiclass MVE_VMINMAXV_p<string iname, bit notAbs, bit isMin,
(i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec)))>;
def : Pat<(i32 !con(args, (pred_intr (VTI.Pred VCCR:$pred)))),
(i32 (Inst (i32 rGPR:$prev), (VTI.Vec MQPR:$vec),
- ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg))>;
}
}
@@ -1020,9 +1021,10 @@ defm MVE_VMINAV : MVE_VMINMAXAV_ty<"vminav", 1, "int_arm_mve_minav">;
defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">;
class MVE_VMLAMLSDAV<string iname, string suffix, dag iops, string cstr,
- bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0>
+ bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
+ bits<2> vecsize>
: MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix,
- "$RdaDest, $Qn, $Qm", cstr, []> {
+ "$RdaDest, $Qn, $Qm", cstr, vecsize, []> {
bits<4> RdaDest;
bits<3> Qm;
bits<3> Qn;
@@ -1050,11 +1052,11 @@ multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> {
def ""#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # x, VTI.Suffix,
(ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, X, bit_8, bit_0>;
+ sz, bit_28, 0b0, X, bit_8, bit_0, VTI.Size>;
def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV<iname # "a" # x, VTI.Suffix,
(ins tGPREven:$RdaSrc, MQPR:$Qn, MQPR:$Qm),
"$RdaDest = $RdaSrc",
- sz, bit_28, 0b1, X, bit_8, bit_0>;
+ sz, bit_28, 0b1, X, bit_8, bit_0, VTI.Size>;
let Predicates = [HasMVEInt] in {
def : Pat<(i32 (int_arm_mve_vmldava
(i32 VTI.Unsigned),
@@ -1074,7 +1076,7 @@ multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
(VTI.Pred VCCR:$mask))),
(i32 (!cast<Instruction>(NAME # x # VTI.Suffix)
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
def : Pat<(i32 (int_arm_mve_vmldava
(i32 VTI.Unsigned),
@@ -1096,7 +1098,7 @@ multiclass MVE_VMLAMLSDAV_A<string iname, string x, MVEVectorVTInfo VTI,
(i32 (!cast<Instruction>(NAME # "a" # x # VTI.Suffix)
(i32 tGPREven:$RdaSrc),
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -1200,47 +1202,47 @@ let Predicates = [HasMVEInt] in {
def : Pat<(i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
(mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
(v4i32 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu32 $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
(mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
(v8i16 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu16 $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
(mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
(v16i8 ARMimmAllZerosV)))),
- (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu8 $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVs8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred))),
- (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVu8 (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v4i1 VCCR:$pred),
(mul (v4i32 MQPR:$src1), (v4i32 MQPR:$src2)),
(v4i32 ARMimmAllZerosV)))),
(i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau32 $src3, $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v8i1 VCCR:$pred),
(mul (v8i16 MQPR:$src1), (v8i16 MQPR:$src2)),
(v8i16 ARMimmAllZerosV)))),
(i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau16 $src3, $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVas16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau16 tGPREven:$Rd, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (i32 (vecreduce_add (vselect (v16i1 VCCR:$pred),
(mul (v16i8 MQPR:$src1), (v16i8 MQPR:$src2)),
(v16i8 ARMimmAllZerosV)))),
(i32 tGPREven:$src3))),
- (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau8 $src3, $src1, $src2, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVps (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVas8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
def : Pat<(i32 (add (ARMVMLAVpu (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), (v16i1 VCCR:$pred)), tGPREven:$Rd)),
- (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred))>;
+ (i32 (MVE_VMLADAVau8 tGPREven:$Rd, (v16i8 MQPR:$val1), (v16i8 MQPR:$val2), ARMVCCThen, $pred, zero_reg))>;
}
// vmlav aliases vmladav
@@ -1255,9 +1257,9 @@ foreach acc = ["", "a"] in {
// Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH
class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]>
+ bits<2> vecsize, list<dag> pattern=[]>
: MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary,
- iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> {
+ iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, vecsize, pattern> {
bits<4> RdaLoDest;
bits<4> RdaHiDest;
bits<3> Qm;
@@ -1285,35 +1287,35 @@ class MVE_VMLALDAVBase<string iname, string suffix, dag iops, string cstr,
}
multiclass MVE_VMLALDAVBase_A<string iname, string x, string suffix,
- bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
- list<dag> pattern=[]> {
+ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0,
+ bits<2> vecsize, list<dag> pattern=[]> {
def ""#x#suffix : MVE_VMLALDAVBase<
iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "",
- sz, bit_28, 0b0, X, bit_8, bit_0, pattern>;
+ sz, bit_28, 0b0, X, bit_8, bit_0, vecsize, pattern>;
def "a"#x#suffix : MVE_VMLALDAVBase<
iname # "a" # x, suffix,
(ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm),
"$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc",
- sz, bit_28, 0b1, X, bit_8, bit_0, pattern>;
+ sz, bit_28, 0b1, X, bit_8, bit_0, vecsize, pattern>;
}
multiclass MVE_VMLALDAVBase_AX<string iname, string suffix, bit sz, bit bit_28,
- bit bit_8, bit bit_0, list<dag> pattern=[]> {
+ bit bit_8, bit bit_0, bits<2> vecsize, list<dag> pattern=[]> {
defm "" : MVE_VMLALDAVBase_A<iname, "", suffix, sz,
- bit_28, 0b0, bit_8, bit_0, pattern>;
+ bit_28, 0b0, bit_8, bit_0, vecsize, pattern>;
defm "" : MVE_VMLALDAVBase_A<iname, "x", suffix, sz,
- bit_28, 0b1, bit_8, bit_0, pattern>;
+ bit_28, 0b1, bit_8, bit_0, vecsize, pattern>;
}
-multiclass MVE_VRMLALDAVH_multi<string suffix, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix,
- 0b0, 0b0, 0b1, 0b0, pattern>;
- defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix,
- 0b0, 0b1, 0b0, 0b1, 0b0, pattern>;
+multiclass MVE_VRMLALDAVH_multi<MVEVectorVTInfo VTI, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#VTI.BitsSuffix,
+ 0b0, 0b0, 0b1, 0b0, VTI.Size, pattern>;
+ defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#VTI.BitsSuffix,
+ 0b0, 0b1, 0b0, 0b1, 0b0, VTI.Size, pattern>;
}
-defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">;
+defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<MVE_v4i32>;
// vrmlalvh aliases for vrmlaldavh
def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm",
@@ -1333,14 +1335,15 @@ def : MVEInstAlias<"vrmlalvha${vp}.u32\t$RdaLo, $RdaHi, $Qn, $Qm",
tGPREven:$RdaLo, tGPROdd:$RdaHi,
MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>;
-multiclass MVE_VMLALDAV_multi<string suffix, bit sz, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>;
- defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix,
- sz, 0b1, 0b0, 0b0, 0b0, pattern>;
+multiclass MVE_VMLALDAV_multi<MVEVectorVTInfo VTI, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#VTI.BitsSuffix,
+ VTI.Size{1}, 0b0, 0b0, 0b0, VTI.Size, pattern>;
+ defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#VTI.BitsSuffix,
+ VTI.Size{1}, 0b1, 0b0, 0b0, 0b0, VTI.Size, pattern>;
}
-defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>;
-defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>;
+defm MVE_VMLALDAV : MVE_VMLALDAV_multi<MVE_v8i16>;
+defm MVE_VMLALDAV : MVE_VMLALDAV_multi<MVE_v4i32>;
let Predicates = [HasMVEInt] in {
def : Pat<(ARMVMLALVs (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)),
@@ -1363,22 +1366,22 @@ let Predicates = [HasMVEInt] in {
// Predicated
def : Pat<(ARMVMLALVps (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVs32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVpu (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVu32 (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVps (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVs16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVpu (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVu16 (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVas32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), (v4i1 VCCR:$pred)),
- (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVau32 tGPREven:$Rda, tGPROdd:$Rdb, (v4i32 MQPR:$val1), (v4i32 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVAps tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVas16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
def : Pat<(ARMVMLALVApu tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), (v8i1 VCCR:$pred)),
- (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred)>;
+ (MVE_VMLALDAVau16 tGPREven:$Rda, tGPROdd:$Rdb, (v8i16 MQPR:$val1), (v8i16 MQPR:$val2), ARMVCCThen, $pred, zero_reg)>;
}
// vmlalv aliases vmlaldav
@@ -1393,22 +1396,22 @@ foreach acc = ["", "a"] in {
}
multiclass MVE_VMLSLDAV_multi<string iname, string suffix, bit sz,
- bit bit_28, list<dag> pattern=[]> {
- defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, pattern>;
+ bit bit_28, bits<2> vecsize, list<dag> pattern=[]> {
+ defm "" : MVE_VMLALDAVBase_AX<iname, suffix, sz, bit_28, 0b0, 0b1, vecsize, pattern>;
}
-defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>;
-defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>;
-defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>;
+defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0, 0b01>;
+defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0, 0b10>;
+defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1, 0b10>;
// end of mve_rDest instructions
// start of mve_comp instructions
class MVE_comp<InstrItinClass itin, string iname, string suffix,
- string cstr, list<dag> pattern=[]>
+ string cstr, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix,
- "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> {
+ "$Qd, $Qn, $Qm", vpred_r, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Qm;
@@ -1425,25 +1428,26 @@ class MVE_comp<InstrItinClass itin, string iname, string suffix,
let Inst{0} = 0b0;
}
-class MVE_VMINMAXNM<string iname, string suffix, bit sz, bit bit_21,
+class MVE_VMINMAXNM<string iname, string suffix, bits<2> sz, bit bit_21,
list<dag> pattern=[]>
- : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+ : MVE_comp<NoItinerary, iname, suffix, "", sz, pattern> {
let Inst{28} = 0b1;
let Inst{25-24} = 0b11;
let Inst{23} = 0b0;
let Inst{21} = bit_21;
- let Inst{20} = sz;
+ let Inst{20} = sz{0};
let Inst{11} = 0b1;
let Inst{8} = 0b1;
let Inst{6} = 0b1;
let Inst{4} = 0b1;
let Predicates = [HasMVEFloat];
+ let validForTailPredication = 1;
}
multiclass MVE_VMINMAXNM_m<string iname, bit bit_4, MVEVectorVTInfo VTI, SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size{0}, bit_4>;
+ def "" : MVE_VMINMAXNM<iname, VTI.Suffix, VTI.Size, bit_4>;
let Predicates = [HasMVEFloat] in {
defm : MVE_TwoOpPattern<VTI, Op, PredInt, (? (i32 0)), !cast<Instruction>(NAME)>;
@@ -1458,7 +1462,7 @@ defm MVE_VMINNMf16 : MVE_VMINMAXNM_m<"vminnm", 0b1, MVE_v8f16, fminnum, int_arm_
class MVE_VMINMAX<string iname, string suffix, bit U, bits<2> size,
bit bit_4, list<dag> pattern=[]>
- : MVE_comp<NoItinerary, iname, suffix, "", pattern> {
+ : MVE_comp<NoItinerary, iname, suffix, "", size, pattern> {
let Inst{28} = U;
let Inst{25-24} = 0b11;
@@ -1504,8 +1508,8 @@ defm MVE_VMAXu32 : MVE_VMAX<MVE_v4u32>;
// start of mve_bit instructions
class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
- string ops, string cstr, list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, pattern> {
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred_r, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -1516,7 +1520,7 @@ class MVE_bit_arith<dag oops, dag iops, string iname, string suffix,
}
def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
- "vbic", "", "$Qd, $Qn, $Qm", ""> {
+ "vbic", "", "$Qd, $Qn, $Qm", "", 0b00> {
bits<4> Qn;
let Inst{28} = 0b0;
@@ -1532,9 +1536,10 @@ def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
let validForTailPredication = 1;
}
-class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, string cstr="">
+class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7,
+ bits<2> vecsize, string cstr="">
: MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname,
- suffix, "$Qd, $Qm", cstr> {
+ suffix, "$Qd, $Qm", cstr, vecsize> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
@@ -1548,14 +1553,14 @@ class MVE_VREV<string iname, string suffix, bits<2> size, bits<2> bit_8_7, strin
let Inst{0} = 0b0;
}
-def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">;
-def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">;
-def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">;
+def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, 0b11, "@earlyclobber $Qd">;
+def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, 0b11, "@earlyclobber $Qd">;
+def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, 0b11, "@earlyclobber $Qd">;
-def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>;
-def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>;
+def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01, 0b10>;
+def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01, 0b10>;
-def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>;
+def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10, 0b01>;
let Predicates = [HasMVEInt] in {
def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))),
@@ -1574,7 +1579,7 @@ multiclass MVE_VREV_basic_patterns<int revbits, list<MVEVectorVTInfo> VTIs,
def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src),
revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
@@ -1590,7 +1595,7 @@ let Predicates = [HasMVEInt] in {
}
def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm),
- "vmvn", "", "$Qd, $Qm", ""> {
+ "vmvn", "", "$Qd, $Qm", "", 0b00> {
let Inst{28} = 0b1;
let Inst{25-23} = 0b111;
let Inst{21-16} = 0b110000;
@@ -1607,13 +1612,13 @@ let Predicates = [HasMVEInt] in {
def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1),
(VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
class MVE_bit_ops<string iname, bits<2> bit_21_20, bit bit_28>
: MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
- iname, "", "$Qd, $Qn, $Qm", ""> {
+ iname, "", "$Qd, $Qn, $Qm", "", 0b00> {
bits<4> Qn;
let Inst{28} = bit_28;
@@ -1684,9 +1689,9 @@ let Predicates = [HasMVEInt] in {
int_arm_mve_orn_predicated, (? ), MVE_VORN>;
}
-class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
+class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps, bits<2> vecsize>
: MVE_p<(outs MQPR:$Qd), inOps, NoItinerary,
- iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> {
+ iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src", vecsize> {
bits<12> imm;
bits<4> Qd;
@@ -1709,7 +1714,7 @@ class MVE_bit_cmode<string iname, string suffix, bit halfword, dag inOps>
multiclass MVE_bit_cmode_p<string iname, bit opcode,
MVEVectorVTInfo VTI, Operand imm_type, SDNode op> {
def "" : MVE_bit_cmode<iname, VTI.Suffix, VTI.Size{0},
- (ins MQPR:$Qd_src, imm_type:$imm)> {
+ (ins MQPR:$Qd_src, imm_type:$imm), VTI.Size> {
let Inst{5} = opcode;
let validForTailPredication = 1;
}
@@ -1723,7 +1728,7 @@ multiclass MVE_bit_cmode_p<string iname, bit opcode,
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
UnpredPat, (VTI.Vec MQPR:$src))),
(VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm,
- ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg))>;
}
}
@@ -1801,6 +1806,7 @@ class MVE_VMOV_lane_32<MVE_VMOV_lane_direction dir>
let Inst{16} = Idx{1};
let Inst{21} = Idx{0};
+ let VecSize = 0b10;
let Predicates = [HasFPRegsV8_1M];
}
@@ -1812,6 +1818,8 @@ class MVE_VMOV_lane_16<string suffix, bit U, MVE_VMOV_lane_direction dir>
let Inst{16} = Idx{2};
let Inst{21} = Idx{1};
let Inst{6} = Idx{0};
+
+ let VecSize = 0b01;
}
class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
@@ -1822,6 +1830,8 @@ class MVE_VMOV_lane_8<string suffix, bit U, MVE_VMOV_lane_direction dir>
let Inst{21} = Idx{2};
let Inst{6} = Idx{1};
let Inst{5} = Idx{0};
+
+ let VecSize = 0b00;
}
def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>;
@@ -1932,7 +1942,7 @@ let Predicates = [HasMVEInt] in {
class MVE_int<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
- iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+ iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Qm;
@@ -2205,7 +2215,7 @@ multiclass MVE_VRHADD_m<MVEVectorVTInfo VTI,
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2293,7 +2303,7 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2334,7 +2344,7 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2350,9 +2360,9 @@ defm MVE_VHSUBu8 : MVE_VHSUB<MVE_v16u8, subnuw, ARMvshruImm>;
defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16, subnuw, ARMvshruImm>;
defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32, subnuw, ARMvshruImm>;
-class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
+class MVE_VDUP<string suffix, bit B, bit E, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
- "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> {
+ "vdup", suffix, "$Qd, $Rt", vpred_r, "", vecsize, pattern> {
bits<4> Qd;
bits<4> Rt;
@@ -2371,9 +2381,9 @@ class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>;
-def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>;
-def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>;
+def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0, 0b10>;
+def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1, 0b01>;
+def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0, 0b00>;
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))),
@@ -2392,27 +2402,27 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred),
(v16i8 (ARMvdup (i32 rGPR:$elem))),
(v16i8 MQPR:$inactive))),
- (MVE_VDUP8 rGPR:$elem, ARMVCCThen, (v16i1 VCCR:$pred),
+ (MVE_VDUP8 rGPR:$elem, ARMVCCThen, (v16i1 VCCR:$pred), zero_reg,
(v16i8 MQPR:$inactive))>;
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred),
(v8i16 (ARMvdup (i32 rGPR:$elem))),
(v8i16 MQPR:$inactive))),
- (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred), zero_reg,
(v8i16 MQPR:$inactive))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred),
(v4i32 (ARMvdup (i32 rGPR:$elem))),
(v4i32 MQPR:$inactive))),
- (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred), zero_reg,
(v4i32 MQPR:$inactive))>;
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred),
(v4f32 (ARMvdup (i32 rGPR:$elem))),
(v4f32 MQPR:$inactive))),
- (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred),
+ (MVE_VDUP32 rGPR:$elem, ARMVCCThen, (v4i1 VCCR:$pred), zero_reg,
(v4f32 MQPR:$inactive))>;
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred),
(v8f16 (ARMvdup (i32 rGPR:$elem))),
(v8f16 MQPR:$inactive))),
- (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred),
+ (MVE_VDUP16 rGPR:$elem, ARMVCCThen, (v8i1 VCCR:$pred), zero_reg,
(v8f16 MQPR:$inactive))>;
}
@@ -2420,7 +2430,7 @@ let Predicates = [HasMVEInt] in {
class MVEIntSingleSrc<string iname, string suffix, bits<2> size,
list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary,
- iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> {
+ iname, suffix, "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -2460,7 +2470,7 @@ multiclass MVE_VCLSCLZ_p<string opname, bit opcode, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
@@ -2506,7 +2516,7 @@ multiclass MVE_VABSNEG_int_m<string iname, bit negate, bit saturate,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, zero_reg, $inactive))>;
}
}
@@ -2565,9 +2575,9 @@ defm MVE_VQABSNEG_Ps32 : vqabsneg_pattern<MVE_v4i32,
MVE_VQABSs32, MVE_VQNEGs32>;
class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
- dag iops, list<dag> pattern=[]>
+ dag iops, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm",
- vpred_r, "", pattern> {
+ vpred_r, "", vecsize, pattern> {
bits<13> imm;
bits<4> Qd;
@@ -2590,21 +2600,21 @@ class MVE_mod_imm<string iname, string suffix, bits<4> cmode, bit op,
let isReMaterializable = 1 in {
let isAsCheapAsAMove = 1 in {
-def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>;
-def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> {
+def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm), 0b00>;
+def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm), 0b01> {
let Inst{9} = imm{9};
}
-def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> {
+def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm), 0b10> {
let Inst{11-8} = imm{11-8};
}
-def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>;
-def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>;
+def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm), 0b11>;
+def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm), 0b10>;
} // let isAsCheapAsAMove = 1
-def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> {
+def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm), 0b01> {
let Inst{9} = imm{9};
}
-def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> {
+def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm), 0b10> {
let Inst{11-8} = imm{11-8};
}
} // let isReMaterializable = 1
@@ -2630,18 +2640,18 @@ let Predicates = [HasMVEInt] in {
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
MQPR:$inactive)),
(v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm,
- ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+ ARMVCCThen, VCCR:$pred, zero_reg, MQPR:$inactive))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm),
MQPR:$inactive)),
(v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm,
- ARMVCCThen, VCCR:$pred, MQPR:$inactive))>;
+ ARMVCCThen, VCCR:$pred, zero_reg, MQPR:$inactive))>;
}
class MVE_VMINMAXA<string iname, string suffix, bits<2> size,
bit bit_12, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
- pattern> {
+ size, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -2675,7 +2685,7 @@ multiclass MVE_VMINMAXA_m<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -2700,7 +2710,7 @@ defm MVE_VMAXAs32 : MVE_VMAXA<MVE_v4s32>;
def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
(ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm),
NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm",
- vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> {
+ vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc", 0b10> {
bits<5> imm;
bits<4> Qd;
bits<4> RdmDest;
@@ -2717,8 +2727,8 @@ def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd),
class MVE_shift_imm<dag oops, dag iops, string iname, string suffix,
string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -2732,7 +2742,7 @@ class MVE_VMOVL<string iname, string suffix, bits<2> sz, bit U, bit top,
list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
iname, suffix, "$Qd, $Qm", vpred_r, "",
- pattern> {
+ sz, pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b101;
let Inst{21} = 0b1;
@@ -2756,7 +2766,7 @@ multiclass MVE_VMOVL_m<bit top, string chr, MVEVectorVTInfo OutVTI,
(OutVTI.Pred VCCR:$pred),
(OutVTI.Vec MQPR:$inactive))),
(OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen,
- (OutVTI.Pred VCCR:$pred),
+ (OutVTI.Pred VCCR:$pred), zero_reg,
(OutVTI.Vec MQPR:$inactive)))>;
}
@@ -2798,9 +2808,9 @@ let Predicates = [HasMVEInt] in {
class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
- Operand immtype, list<dag> pattern=[]>
+ Operand immtype, bits<2> vecsize, list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm),
- iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> {
+ iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", vecsize, pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b101;
let Inst{21} = 0b1;
@@ -2821,7 +2831,7 @@ class MVE_VSHLL_imm<string iname, string suffix, bit U, bit th,
class MVE_VSHLL_imm8<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_7, 0b01, pattern> {
bits<3> imm;
let Inst{20-19} = 0b01;
let Inst{18-16} = imm;
@@ -2829,7 +2839,7 @@ class MVE_VSHLL_imm8<string iname, string suffix,
class MVE_VSHLL_imm16<string iname, string suffix,
bit U, bit th, list<dag> pattern=[]>
- : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, pattern> {
+ : MVE_VSHLL_imm<iname, suffix, U, th, mve_shift_imm1_15, 0b10, pattern> {
bits<4> imm;
let Inst{20} = 0b1;
let Inst{19-16} = imm;
@@ -2847,7 +2857,7 @@ def MVE_VSHLL_immu16th : MVE_VSHLL_imm16<"vshllt", "u16", 0b1, 0b1>;
class MVE_VSHLL_by_lane_width<string iname, string suffix, bits<2> size,
bit U, string ops, list<dag> pattern=[]>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm),
- iname, suffix, ops, vpred_r, "", pattern> {
+ iname, suffix, ops, vpred_r, "", !if(size, 0b10, 0b01), pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b100;
let Inst{21-20} = 0b11;
@@ -2894,14 +2904,14 @@ multiclass MVE_VSHLL_patterns<MVEVectorVTInfo VTI, int top> {
(VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm,
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits),
(i32 VTI.Unsigned), (i32 top),
(VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen,
- (VTI.DblPred VCCR:$mask),
+ (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
@@ -2909,15 +2919,15 @@ foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in
foreach top = [0, 1] in
defm : MVE_VSHLL_patterns<VTI, top>;
-class MVE_shift_imm_partial<Operand imm, string iname, string suffix>
+class MVE_shift_imm_partial<Operand imm, string iname, string suffix, bits<2> vecsize>
: MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm),
- iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> {
+ iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", vecsize> {
Operand immediateType = imm;
}
class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
- Operand imm, list<dag> pattern=[]>
- : MVE_shift_imm_partial<imm, iname, suffix> {
+ Operand imm, bits<2> vecsize>
+ : MVE_shift_imm_partial<imm, iname, suffix, vecsize> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2932,35 +2942,35 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
let retainsPreviousHalfElement = 1;
}
-def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
+def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> {
+def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> {
+def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
-def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> {
+def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> {
+def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> {
+def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
-def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> {
+def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
-def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> {
+def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
- Operand imm, list<dag> pattern=[]>
- : MVE_shift_imm_partial<imm, iname, suffix> {
+ Operand imm, bits<2> vecsize>
+ : MVE_shift_imm_partial<imm, iname, suffix, vecsize> {
bits<5> imm;
let Inst{28} = bit_28;
@@ -2976,42 +2986,42 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
}
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> {
+ "vqrshrunb", "s16", 0b1, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN<
- "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> {
+ "vqrshrunt", "s16", 0b1, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN<
- "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> {
+ "vqrshrunb", "s32", 0b1, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN<
- "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> {
+ "vqrshrunt", "s32", 0b1, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN<
- "vqshrunb", "s16", 0b0, 0b0, shr_imm8> {
+ "vqshrunb", "s16", 0b0, 0b0, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs16th : MVE_VxQRSHRUN<
- "vqshrunt", "s16", 0b0, 0b1, shr_imm8> {
+ "vqshrunt", "s16", 0b0, 0b1, shr_imm8, 0b01> {
let Inst{20-19} = 0b01;
}
def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN<
- "vqshrunb", "s32", 0b0, 0b0, shr_imm16> {
+ "vqshrunb", "s32", 0b0, 0b0, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
def MVE_VQSHRUNs32th : MVE_VxQRSHRUN<
- "vqshrunt", "s32", 0b0, 0b1, shr_imm16> {
+ "vqshrunt", "s32", 0b0, 0b1, shr_imm16, 0b10> {
let Inst{20} = 0b1;
}
class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
- Operand imm, list<dag> pattern=[]>
- : MVE_shift_imm_partial<imm, iname, suffix> {
+ Operand imm, bits<2> vecsize>
+ : MVE_shift_imm_partial<imm, iname, suffix, vecsize> {
bits<5> imm;
let Inst{25-23} = 0b101;
@@ -3026,19 +3036,19 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
}
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
- def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8> {
+ def s16 : MVE_VxQRSHRN<iname, "s16", bit_0, bit_12, shr_imm8, 0b01> {
let Inst{28} = 0b0;
let Inst{20-19} = 0b01;
}
- def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8> {
+ def u16 : MVE_VxQRSHRN<iname, "u16", bit_0, bit_12, shr_imm8, 0b01> {
let Inst{28} = 0b1;
let Inst{20-19} = 0b01;
}
- def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16> {
+ def s32 : MVE_VxQRSHRN<iname, "s32", bit_0, bit_12, shr_imm16, 0b10> {
let Inst{28} = 0b0;
let Inst{20} = 0b1;
}
- def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16> {
+ def u32 : MVE_VxQRSHRN<iname, "u32", bit_0, bit_12, shr_imm16, 0b10> {
let Inst{28} = 0b1;
let Inst{20} = 0b1;
}
@@ -3062,7 +3072,7 @@ multiclass MVE_VSHRN_patterns<MVE_shift_imm_partial inst,
(OutVTI.Vec outparams)>;
def : Pat<(OutVTI.Vec !con(inparams, (int_arm_mve_vshrn_predicated
(InVTI.Pred VCCR:$pred)))),
- (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+ (OutVTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred, zero_reg)))>;
}
defm : MVE_VSHRN_patterns<MVE_VSHRNi16bh, MVE_v16s8, MVE_v8s16, 0,0,0>;
@@ -3113,7 +3123,7 @@ defm : MVE_VSHRN_patterns<MVE_VQRSHRUNs32th, MVE_v8u16, MVE_v4s32, 1,1,1>;
class MVE_shift_by_vec<string iname, string suffix, bit U,
bits<2> size, bit bit_4, bit bit_8>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary,
- iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> {
+ iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", size, []> {
// Shift instructions which take a vector of shift counts
bits<4> Qd;
bits<4> Qm;
@@ -3152,7 +3162,7 @@ multiclass MVE_shift_by_vec_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
(i32 q), (i32 r), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$in), (VTI.Vec MQPR:$sh),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -3188,8 +3198,8 @@ let Predicates = [HasMVEInt] in {
class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -3212,10 +3222,10 @@ class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
dag unsignedFlag = (?);
}
-class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
+class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType, bits<2> vecsize>
: MVE_shift_with_imm<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qm, immType:$imm),
- "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src"> {
+ "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src", vecsize> {
bits<6> imm;
let Inst{28} = 0b1;
let Inst{25-24} = 0b11;
@@ -3227,27 +3237,27 @@ class MVE_VSxI_imm<string iname, string suffix, bit bit_8, Operand immType>
Operand immediateType = immType;
}
-def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> {
+def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8, 0b00> {
let Inst{21-19} = 0b001;
}
-def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> {
+def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16, 0b01> {
let Inst{21-20} = 0b01;
}
-def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> {
+def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32, 0b10> {
let Inst{21} = 0b1;
}
-def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> {
+def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7, 0b00> {
let Inst{21-19} = 0b001;
}
-def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> {
+def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15, 0b01> {
let Inst{21-20} = 0b01;
}
-def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> {
+def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31, 0b10> {
let Inst{21} = 0b1;
}
@@ -3263,7 +3273,7 @@ multiclass MVE_VSxI_patterns<MVE_VSxI_imm inst, string name,
def : Pat<(VTI.Vec !setdagop(inparams, unpred_int)),
(VTI.Vec outparams)>;
def : Pat<(VTI.Vec !con(inparams, (pred_int (VTI.Pred VCCR:$pred)))),
- (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred)))>;
+ (VTI.Vec !con(outparams, (? ARMVCCThen, VCCR:$pred, zero_reg)))>;
}
defm : MVE_VSxI_patterns<MVE_VSLIimm8, "vsli", MVE_v16i8>;
@@ -3276,7 +3286,7 @@ defm : MVE_VSxI_patterns<MVE_VSRIimm32, "vsri", MVE_v4i32>;
class MVE_VQSHL_imm<MVEVectorVTInfo VTI_, Operand immType>
: MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", VTI_.Size> {
bits<6> imm;
let Inst{28} = VTI_.Unsigned;
@@ -3316,7 +3326,7 @@ let unpred_int = int_arm_mve_vqshl_imm,
class MVE_VQSHLU_imm<MVEVectorVTInfo VTI_, Operand immType>
: MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", VTI_.Size> {
bits<6> imm;
let Inst{28} = 0b1;
@@ -3346,7 +3356,7 @@ let unpred_int = int_arm_mve_vqshlu_imm,
class MVE_VRSHR_imm<MVEVectorVTInfo VTI_, Operand immType>
: MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd),
(ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", VTI_.Size> {
bits<6> imm;
let Inst{28} = VTI_.Unsigned;
@@ -3400,7 +3410,7 @@ multiclass MVE_shift_imm_patterns<MVE_shift_with_imm inst> {
(inst.VTI.Vec MQPR:$inactive)))),
(inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src),
inst.immediateType:$imm,
- ARMVCCThen, (inst.VTI.Pred VCCR:$mask),
+ ARMVCCThen, (inst.VTI.Pred VCCR:$mask), zero_reg,
(inst.VTI.Vec MQPR:$inactive)))>;
}
@@ -3420,10 +3430,10 @@ defm : MVE_shift_imm_patterns<MVE_VRSHR_immu16>;
defm : MVE_shift_imm_patterns<MVE_VRSHR_imms32>;
defm : MVE_shift_imm_patterns<MVE_VRSHR_immu32>;
-class MVE_VSHR_imm<string suffix, dag imm>
+class MVE_VSHR_imm<string suffix, dag imm, bits<2> vecsize>
: MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", vecsize> {
bits<6> imm;
let Inst{25-24} = 0b11;
@@ -3431,40 +3441,40 @@ class MVE_VSHR_imm<string suffix, dag imm>
let Inst{10-8} = 0b000;
}
-def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> {
+def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm), 0b00> {
let Inst{28} = 0b0;
let Inst{21-19} = 0b001;
}
-def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> {
+def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm), 0b00> {
let Inst{28} = 0b1;
let Inst{21-19} = 0b001;
}
-def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> {
+def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm), 0b01> {
let Inst{28} = 0b0;
let Inst{21-20} = 0b01;
}
-def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> {
+def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm), 0b01> {
let Inst{28} = 0b1;
let Inst{21-20} = 0b01;
}
-def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> {
+def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm), 0b10> {
let Inst{28} = 0b0;
let Inst{21} = 0b1;
}
-def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> {
+def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm), 0b10> {
let Inst{28} = 0b1;
let Inst{21} = 0b1;
}
-class MVE_VSHL_imm<string suffix, dag imm>
+class MVE_VSHL_imm<string suffix, dag imm, bits<2> vecsize>
: MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd),
!con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm",
- vpred_r, ""> {
+ vpred_r, "", vecsize> {
bits<6> imm;
let Inst{28} = 0b0;
@@ -3473,15 +3483,15 @@ class MVE_VSHL_imm<string suffix, dag imm>
let Inst{10-8} = 0b101;
}
-def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> {
+def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm), 0b00> {
let Inst{21-19} = 0b001;
}
-def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> {
+def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm), 0b01> {
let Inst{21-20} = 0b01;
}
-def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
+def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm), 0b10> {
let Inst{21} = 0b1;
}
@@ -3497,7 +3507,7 @@ multiclass MVE_immediate_shift_patterns_inner<
(pred_int (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive)))),
(VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm,
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -3525,8 +3535,8 @@ let Predicates = [HasMVEInt] in {
// start of MVE Floating Point instructions
class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
- vpred_ops vpred, string cstr, list<dag> pattern=[]>
- : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+ vpred_ops vpred, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_f<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qm;
let Inst{12} = 0b0;
@@ -3539,7 +3549,7 @@ class MVE_float<string iname, string suffix, dag oops, dag iops, string ops,
class MVE_VRINT<string rmode, bits<3> op, string suffix, bits<2> size,
list<dag> pattern=[]>
: MVE_float<!strconcat("vrint", rmode), suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -3568,7 +3578,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen,
- (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>;
+ (VTI.Pred VCCR:$pred), zero_reg, (VTI.Vec MQPR:$inactive)))>;
}
}
@@ -3586,16 +3596,16 @@ defm MVE_VRINTf32 : MVE_VRINT_ops<MVE_v4f32>;
class MVEFloatArithNeon<string iname, string suffix, bit size,
dag oops, dag iops, string ops,
- vpred_ops vpred, string cstr, list<dag> pattern=[]>
- : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, pattern> {
+ vpred_ops vpred, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_float<iname, suffix, oops, iops, ops, vpred, cstr, vecsize, pattern> {
let Inst{20} = size;
let Inst{16} = 0b0;
}
-class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
- : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
+class MVE_VMUL_fp<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
+ : MVEFloatArithNeon<iname, suffix, size{0}, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm", vpred_r, "",
- pattern> {
+ size, pattern> {
bits<4> Qd;
bits<4> Qn;
@@ -3611,9 +3621,9 @@ class MVE_VMUL_fp<string iname, string suffix, bit size, list<dag> pattern=[]>
let validForTailPredication = 1;
}
-multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
+multiclass MVE_VMULT_fp_m<string iname, MVEVectorVTInfo VTI,
SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size{0}>;
+ def "" : MVE_VMUL_fp<iname, VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3622,15 +3632,15 @@ multiclass MVE_VMULT_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
}
multiclass MVE_VMUL_fp_m<MVEVectorVTInfo VTI>
- : MVE_VMULT_fp_m<"vmul", 0, VTI, fmul, int_arm_mve_mul_predicated>;
+ : MVE_VMULT_fp_m<"vmul", VTI, fmul, int_arm_mve_mul_predicated>;
defm MVE_VMULf32 : MVE_VMUL_fp_m<MVE_v4f32>;
defm MVE_VMULf16 : MVE_VMUL_fp_m<MVE_v8f16>;
-class MVE_VCMLA<string suffix, bit size>
- : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd),
+class MVE_VCMLA<string suffix, bits<2> size>
+ : MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> {
bits<4> Qd;
bits<4> Qn;
bits<2> rot;
@@ -3647,8 +3657,8 @@ class MVE_VCMLA<string suffix, bit size>
let Inst{4} = 0b0;
}
-multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
- def "" : MVE_VCMLA<VTI.Suffix, size>;
+multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
+ def "" : MVE_VCMLA<VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3665,21 +3675,21 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI, bit size> {
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
(VTI.Vec MQPR:$Qm), imm:$rot,
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
-defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16, 0b0>;
-defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32, 0b1>;
+defm MVE_VCMLAf16 : MVE_VCMLA_m<MVE_v8f16>;
+defm MVE_VCMLAf32 : MVE_VCMLA_m<MVE_v4f32>;
-class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
+class MVE_VADDSUBFMA_fp<string iname, string suffix, bits<2> size, bit bit_4,
bit bit_8, bit bit_21, dag iops=(ins),
vpred_ops vpred=vpred_r, string cstr="",
list<dag> pattern=[]>
- : MVEFloatArithNeon<iname, suffix, size, (outs MQPR:$Qd),
+ : MVEFloatArithNeon<iname, suffix, size{0}, (outs MQPR:$Qd),
!con(iops, (ins MQPR:$Qn, MQPR:$Qm)), "$Qd, $Qn, $Qm",
- vpred, cstr, pattern> {
+ vpred, cstr, size, pattern> {
bits<4> Qd;
bits<4> Qn;
@@ -3697,7 +3707,7 @@ class MVE_VADDSUBFMA_fp<string iname, string suffix, bit size, bit bit_4,
}
multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
- def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0b1, 0b0, fms,
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0b1, 0b0, fms,
(ins MQPR:$Qd_src), vpred_n, "$Qd = $Qd_src">;
defvar Inst = !cast<Instruction>(NAME);
defvar pred_int = int_arm_mve_fma_predicated;
@@ -3713,20 +3723,20 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma (fneg m1), m2, add)),
add)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
} else {
def : Pat<(VTI.Vec (fma m1, m2, add)),
(Inst $add, $m1, $m2)>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma m1, m2, add)),
add)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
- (Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
+ (Inst $add, $m1, $m2, ARMVCCThen, $pred, zero_reg)>;
}
}
}
@@ -3738,7 +3748,7 @@ defm MVE_VFMSf16 : MVE_VFMA_fp_multi<"vfms", 1, MVE_v8f16>;
multiclass MVE_VADDSUB_fp_m<string iname, bit bit_21, MVEVectorVTInfo VTI,
SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size{0}, 0, 1, bit_21> {
+ def "" : MVE_VADDSUBFMA_fp<iname, VTI.Suffix, VTI.Size, 0, 1, bit_21> {
let validForTailPredication = 1;
}
defvar Inst = !cast<Instruction>(NAME);
@@ -3759,10 +3769,10 @@ defm MVE_VADDf16 : MVE_VADD_fp_m<MVE_v8f16>;
defm MVE_VSUBf32 : MVE_VSUB_fp_m<MVE_v4f32>;
defm MVE_VSUBf16 : MVE_VSUB_fp_m<MVE_v8f16>;
-class MVE_VCADD<string suffix, bit size, string cstr="">
- : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd),
+class MVE_VCADD<string suffix, bits<2> size, string cstr="">
+ : MVEFloatArithNeon<"vcadd", suffix, size{1}, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> {
bits<4> Qd;
bits<4> Qn;
bit rot;
@@ -3780,8 +3790,8 @@ class MVE_VCADD<string suffix, bit size, string cstr="">
let Inst{4} = 0b0;
}
-multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
- def "" : MVE_VCADD<VTI.Suffix, size, cstr>;
+multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, string cstr=""> {
+ def "" : MVE_VCADD<VTI.Suffix, VTI.Size, cstr>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3795,18 +3805,18 @@ multiclass MVE_VCADD_m<MVEVectorVTInfo VTI, bit size, string cstr=""> {
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
-defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16, 0b0>;
-defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, 0b1, "@earlyclobber $Qd">;
+defm MVE_VCADDf16 : MVE_VCADD_m<MVE_v8f16>;
+defm MVE_VCADDf32 : MVE_VCADD_m<MVE_v4f32, "@earlyclobber $Qd">;
-class MVE_VABD_fp<string suffix, bit size>
+class MVE_VABD_fp<string suffix, bits<2> size>
: MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm),
- "$Qd, $Qn, $Qm", vpred_r, ""> {
+ "$Qd, $Qn, $Qm", vpred_r, "", size> {
bits<4> Qd;
bits<4> Qn;
@@ -3814,7 +3824,7 @@ class MVE_VABD_fp<string suffix, bit size>
let Inst{25-23} = 0b110;
let Inst{22} = Qd{3};
let Inst{21} = 0b1;
- let Inst{20} = size;
+ let Inst{20} = size{0};
let Inst{19-17} = Qn{2-0};
let Inst{16} = 0b0;
let Inst{15-13} = Qd{2-0};
@@ -3826,7 +3836,7 @@ class MVE_VABD_fp<string suffix, bit size>
multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
Intrinsic unpred_int, Intrinsic pred_int> {
- def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size{0}>;
+ def "" : MVE_VABD_fp<VTI.Suffix, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -3837,7 +3847,7 @@ multiclass MVE_VABDT_fp_m<MVEVectorVTInfo VTI,
(i32 0), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
@@ -3846,7 +3856,7 @@ multiclass MVE_VABD_fp_m<MVEVectorVTInfo VTI>
: MVE_VABDT_fp_m<VTI, int_arm_mve_vabd, int_arm_mve_abd_predicated>;
defm MVE_VABDf32 : MVE_VABD_fp_m<MVE_v4f32>;
-defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
+defm MVE_VABDf16 : MVE_VABD_fp_m<MVE_v8f16>;
let Predicates = [HasMVEFloat] in {
def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))),
@@ -3859,7 +3869,7 @@ class MVE_VCVT_fix<string suffix, bit fsi, bit U, bit op,
Operand imm_operand_type>
: MVE_float<"vcvt", suffix,
(outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6),
- "$Qd, $Qm, $imm6", vpred_r, "", []> {
+ "$Qd, $Qm, $imm6", vpred_r, "", !if(fsi, 0b10, 0b01), []> {
bits<4> Qd;
bits<6> imm6;
@@ -3913,7 +3923,7 @@ multiclass MVE_VCVT_fix_patterns<Instruction Inst, bit U, MVEVectorVTInfo DestVT
imm:$scale,
(DestVTI.Pred VCCR:$mask))),
(DestVTI.Vec (Inst (SrcVTI.Vec MQPR:$Qm), imm:$scale,
- ARMVCCThen, (DestVTI.Pred VCCR:$mask),
+ ARMVCCThen, (DestVTI.Pred VCCR:$mask), zero_reg,
(DestVTI.Vec MQPR:$inactive)))>;
}
}
@@ -3942,7 +3952,7 @@ defm MVE_VCVTu32f32_fix : MVE_VCVT_fix_f32_m<0b1, 0b1, MVE_v4u32, MVE_v4f32>;
class MVE_VCVT_fp_int_anpm<string suffix, bits<2> size, bit op, string anpm,
bits<2> rm, list<dag> pattern=[]>
: MVE_float<!strconcat("vcvt", anpm), suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -3976,7 +3986,7 @@ multiclass MVE_VCVT_fp_int_anpm_inner<MVEVectorVTInfo Int, MVEVectorVTInfo Flt,
def : Pat<(Int.Vec (PredIntr (i32 Int.Unsigned), (Int.Vec MQPR:$inactive),
(Flt.Vec MQPR:$in), (Flt.Pred VCCR:$pred))),
(Int.Vec (Inst (Flt.Vec MQPR:$in), ARMVCCThen,
- (Flt.Pred VCCR:$pred), (Int.Vec MQPR:$inactive)))>;
+ (Flt.Pred VCCR:$pred), zero_reg, (Int.Vec MQPR:$inactive)))>;
}
}
@@ -3999,7 +4009,7 @@ defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_outer<MVE_v4u32, MVE_v4f32>;
class MVE_VCVT_fp_int<string suffix, bits<2> size, bit toint, bit unsigned,
list<dag> pattern=[]>
: MVE_float<"vcvt", suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -4032,7 +4042,7 @@ multiclass MVE_VCVT_fp_int_m<MVEVectorVTInfo Dest, MVEVectorVTInfo Src,
(Src.Vec MQPR:$src), (i32 Unsigned),
(Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))),
(Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen,
- (Src.Pred VCCR:$mask),
+ (Src.Pred VCCR:$mask), zero_reg,
(Dest.Vec MQPR:$inactive)))>;
}
}
@@ -4048,10 +4058,21 @@ defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m<MVE_v8f16, MVE_v8u16, uint_to_fp>;
defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4s32, sint_to_fp>;
defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m<MVE_v4f32, MVE_v4u32, uint_to_fp>;
+let Predicates = [HasMVEFloat] in {
+ def : Pat<(v4i32 (fp_to_sint_sat v4f32:$src, i32)),
+ (MVE_VCVTs32f32z v4f32:$src)>;
+ def : Pat<(v4i32 (fp_to_uint_sat v4f32:$src, i32)),
+ (MVE_VCVTu32f32z v4f32:$src)>;
+ def : Pat<(v8i16 (fp_to_sint_sat v8f16:$src, i16)),
+ (MVE_VCVTs16f16z v8f16:$src)>;
+ def : Pat<(v8i16 (fp_to_uint_sat v8f16:$src, i16)),
+ (MVE_VCVTu16f16z v8f16:$src)>;
+}
+
class MVE_VABSNEG_fp<string iname, string suffix, bits<2> size, bit negate,
list<dag> pattern=[]>
: MVE_float<iname, suffix, (outs MQPR:$Qd),
- (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> {
+ (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> {
bits<4> Qd;
let Inst{28} = 0b1;
@@ -4077,7 +4098,7 @@ multiclass MVE_VABSNEG_fp_m<string iname, SDNode unpred_op, Intrinsic pred_int,
(VTI.Vec (Inst $v))>;
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
- (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>;
+ (VTI.Vec (Inst $v, ARMVCCThen, $mask, zero_reg, $inactive))>;
}
}
@@ -4090,15 +4111,15 @@ defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated,
MVE_v4f32, 1>;
-class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
+class MVE_VMAXMINNMA<string iname, string suffix, bits<2> size, bit bit_12,
list<dag> pattern=[]>
: MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm),
NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src",
- pattern> {
+ size, pattern> {
bits<4> Qd;
bits<4> Qm;
- let Inst{28} = size;
+ let Inst{28} = size{0};
let Inst{25-23} = 0b100;
let Inst{22} = Qd{3};
let Inst{21-16} = 0b111111;
@@ -4111,12 +4132,13 @@ class MVE_VMAXMINNMA<string iname, string suffix, bit size, bit bit_12,
let Inst{0} = 0b1;
let isCommutable = 1;
+ let validForTailPredication = 1;
}
multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
SDNode unpred_op, Intrinsic pred_int,
bit bit_12> {
- def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size{0}, bit_12>;
+ def "" : MVE_VMAXMINNMA<iname, VTI.Suffix, VTI.Size, bit_12>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
@@ -4129,7 +4151,7 @@ multiclass MVE_VMAXMINNMA_m<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
}
@@ -4150,9 +4172,9 @@ defm MVE_VMINNMAf16 : MVE_VMINNMA<MVE_v8f16, 0b1>;
// start of MVE compares
class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
- VCMPPredicateOperand predtype, list<dag> pattern=[]>
+ VCMPPredicateOperand predtype, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc),
- NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> {
+ NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", vecsize, pattern> {
// Base class for comparing two vector registers
bits<3> fc;
bits<4> Qn;
@@ -4187,24 +4209,24 @@ class MVE_VCMPqq<string suffix, bit bit_28, bits<2> bits_21_20,
}
class MVE_VCMPqqf<string suffix, bit size>
- : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp> {
+ : MVE_VCMPqq<suffix, size, 0b11, pred_basic_fp, !if(size, 0b01, 0b10)> {
let Predicates = [HasMVEFloat];
}
class MVE_VCMPqqi<string suffix, bits<2> size>
- : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i> {
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_i, size> {
let Inst{12} = 0b0;
let Inst{0} = 0b0;
}
class MVE_VCMPqqu<string suffix, bits<2> size>
- : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u> {
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_u, size> {
let Inst{12} = 0b0;
let Inst{0} = 0b1;
}
class MVE_VCMPqqs<string suffix, bits<2> size>
- : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s> {
+ : MVE_VCMPqq<suffix, 0b1, size, pred_basic_s, size> {
let Inst{12} = 0b1;
}
@@ -4224,9 +4246,9 @@ def MVE_VCMPs16 : MVE_VCMPqqs<"s16", 0b01>;
def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>;
class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
- VCMPPredicateOperand predtype, list<dag> pattern=[]>
+ VCMPPredicateOperand predtype, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc),
- NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> {
+ NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", vecsize, pattern> {
// Base class for comparing a vector register with a scalar
bits<3> fc;
bits<4> Qn;
@@ -4252,24 +4274,24 @@ class MVE_VCMPqr<string suffix, bit bit_28, bits<2> bits_21_20,
}
class MVE_VCMPqrf<string suffix, bit size>
- : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp> {
+ : MVE_VCMPqr<suffix, size, 0b11, pred_basic_fp, !if(size, 0b01, 0b10)> {
let Predicates = [HasMVEFloat];
}
class MVE_VCMPqri<string suffix, bits<2> size>
- : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i> {
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_i, size> {
let Inst{12} = 0b0;
let Inst{5} = 0b0;
}
class MVE_VCMPqru<string suffix, bits<2> size>
- : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u> {
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_u, size> {
let Inst{12} = 0b0;
let Inst{5} = 0b1;
}
class MVE_VCMPqrs<string suffix, bits<2> size>
- : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s> {
+ : MVE_VCMPqr<suffix, 0b1, size, pred_basic_s, size> {
let Inst{12} = 0b1;
}
@@ -4297,11 +4319,11 @@ multiclass unpred_vcmp_z<string suffix, PatLeaf fc> {
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
@@ -4320,18 +4342,18 @@ multiclass unpred_vcmp_r<string suffix, PatLeaf fc> {
(v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup rGPR:$v2)), fc)))),
- (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v16i1 (!cast<Instruction>("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup rGPR:$v2)), fc)))),
- (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (!cast<Instruction>("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup rGPR:$v2)), fc)))),
- (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (!cast<Instruction>("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
multiclass unpred_vcmpf_z<PatLeaf fc> {
@@ -4341,9 +4363,9 @@ multiclass unpred_vcmpf_z<PatLeaf fc> {
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
multiclass unpred_vcmpf_r<PatLeaf fc> {
@@ -4358,14 +4380,14 @@ multiclass unpred_vcmpf_r<PatLeaf fc> {
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))),
- (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))),
- (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup rGPR:$v2)), fc)))),
- (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup rGPR:$v2)), fc)))),
- (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1))>;
+ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 rGPR:$v2), fc, ARMVCCThen, VCCR:$p1, zero_reg))>;
}
let Predicates = [HasMVEInt] in {
@@ -4477,9 +4499,9 @@ let Predicates = [HasMVEInt] in {
class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
+ bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<oops, iops, NoItinerary, iname, suffix,
- ops, vpred, cstr, pattern> {
+ ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qm;
@@ -4494,10 +4516,11 @@ class MVE_qDest_qSrc<string iname, string suffix, dag oops, dag iops,
}
class MVE_VQxDMLxDH<string iname, bit exch, bit round, bit subtract,
- string suffix, bits<2> size, string cstr="", list<dag> pattern=[]>
+ string suffix, bits<2> size, string cstr="",
+ list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_n, "$Qd = $Qd_src"#cstr, pattern> {
+ vpred_n, "$Qd = $Qd_src"#cstr, size, pattern> {
bits<4> Qn;
let Inst{28} = subtract;
@@ -4528,7 +4551,7 @@ multiclass MVE_VQxDMLxDH_p<string iname, bit exch, bit round, bit subtract,
(? (VTI.Pred VCCR:$pred)))),
(VTI.Vec (Inst (VTI.Vec MQPR:$a), (VTI.Vec MQPR:$b),
(VTI.Vec MQPR:$c),
- ARMVCCThen, (VTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$pred), zero_reg))>;
}
multiclass MVE_VQxDMLxDH_multi<string iname, bit exch,
@@ -4547,14 +4570,15 @@ defm MVE_VQDMLSDHX : MVE_VQxDMLxDH_multi<"vqdmlsdhx", 0b1, 0b0, 0b1>;
defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>;
defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>;
-class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
+class MVE_VCMUL<string iname, string suffix, bits<2> size, string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size,
+ []> {
bits<4> Qn;
bits<2> rot;
- let Inst{28} = size;
+ let Inst{28} = size{1};
let Inst{21-20} = 0b11;
let Inst{19-17} = Qn{2-0};
let Inst{16} = 0b0;
@@ -4567,8 +4591,8 @@ class MVE_VCMUL<string iname, string suffix, bit size, string cstr="">
}
multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
- bit size, string cstr=""> {
- def "" : MVE_VCMUL<iname, VTI.Suffix, size, cstr>;
+ string cstr=""> {
+ def "" : MVE_VCMUL<iname, VTI.Suffix, VTI.Size, cstr>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEFloat] in {
@@ -4582,20 +4606,20 @@ multiclass MVE_VCMUL_m<string iname, MVEVectorVTInfo VTI,
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
-defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>;
-defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">;
+defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16>;
+defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, "@earlyclobber $Qd">;
class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
- bit T, string cstr, list<dag> pattern=[]>
+ bit T, string cstr, bits<2> vecsize, list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, cstr, pattern> {
+ vpred_r, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Qm;
@@ -4614,9 +4638,9 @@ class MVE_VMULL<string iname, string suffix, bit bit_28, bits<2> bits_21_20,
multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
SDPatternOperator unpred_op, Intrinsic pred_int,
- bit Top, string cstr=""> {
+ bit Top, bits<2> vecsize, string cstr=""> {
def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned,
- VTI.Size, Top, cstr>;
+ VTI.Size, Top, cstr, vecsize>;
defvar Inst = !cast<Instruction>(NAME);
let Predicates = [HasMVEInt] in {
@@ -4634,7 +4658,7 @@ multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
uflag, (? (i32 Top), (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive)))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -4643,43 +4667,43 @@ multiclass MVE_VMULL_m<MVEVectorVTInfo VTI,
// the unsigned bit switches to encoding the size.
defm MVE_VMULLBs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b01>;
defm MVE_VMULLTs8 : MVE_VMULL_m<MVE_v16s8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b01>;
defm MVE_VMULLBs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b10>;
defm MVE_VMULLTs16 : MVE_VMULL_m<MVE_v8s16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b10>;
defm MVE_VMULLBs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0,
+ int_arm_mve_mull_int_predicated, 0b0, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLTs32 : MVE_VMULL_m<MVE_v4s32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1,
+ int_arm_mve_mull_int_predicated, 0b1, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLBu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b01>;
defm MVE_VMULLTu8 : MVE_VMULL_m<MVE_v16u8, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b01>;
defm MVE_VMULLBu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0>;
+ int_arm_mve_mull_int_predicated, 0b0, 0b10>;
defm MVE_VMULLTu16 : MVE_VMULL_m<MVE_v8u16, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1>;
+ int_arm_mve_mull_int_predicated, 0b1, 0b10>;
defm MVE_VMULLBu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b0,
+ int_arm_mve_mull_int_predicated, 0b0, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLTu32 : MVE_VMULL_m<MVE_v4u32, int_arm_mve_vmull,
- int_arm_mve_mull_int_predicated, 0b1,
+ int_arm_mve_mull_int_predicated, 0b1, 0b11,
"@earlyclobber $Qd">;
defm MVE_VMULLBp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b0>;
+ int_arm_mve_mull_poly_predicated, 0b0, 0b01>;
defm MVE_VMULLTp8 : MVE_VMULL_m<MVE_v16p8, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b1>;
+ int_arm_mve_mull_poly_predicated, 0b1, 0b01>;
defm MVE_VMULLBp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b0>;
+ int_arm_mve_mull_poly_predicated, 0b0, 0b10>;
defm MVE_VMULLTp16 : MVE_VMULL_m<MVE_v8p16, int_arm_mve_vmull_poly,
- int_arm_mve_mull_poly_predicated, 0b1>;
+ int_arm_mve_mull_poly_predicated, 0b1, 0b10>;
let Predicates = [HasMVEInt] in {
def : Pat<(v2i64 (ARMvmulls (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))),
@@ -4729,7 +4753,7 @@ class MVE_VxMULH<string iname, string suffix, bit U, bits<2> size, bit round,
list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, "", pattern> {
+ vpred_r, "", size, pattern> {
bits<4> Qn;
let Inst{28} = U;
@@ -4759,7 +4783,7 @@ multiclass MVE_VxMULH_m<string iname, MVEVectorVTInfo VTI, SDNode unpred_op,
(i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -4794,7 +4818,7 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
bits<2> size, bit T, list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qd_src, MQPR:$Qm), "$Qd, $Qm",
- vpred_n, "$Qd = $Qd_src", pattern> {
+ vpred_n, "$Qd = $Qd_src", !if(size, 0b10, 0b01), pattern> {
let Inst{28} = bit_28;
let Inst{21-20} = 0b11;
@@ -4854,7 +4878,7 @@ multiclass MVE_VMOVN_p<Instruction Inst, bit top,
(InVTI.Pred VCCR:$pred))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
(InVTI.Vec MQPR:$Qm),
- ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (InVTI.Pred VCCR:$pred), zero_reg))>;
}
defm : MVE_VMOVN_p<MVE_VMOVNi32bh, 0, MVE_v8i16, MVE_v4i32>;
@@ -4876,7 +4900,7 @@ multiclass MVE_VQMOVN_p<Instruction Inst, bit outU, bit inU, bit top,
(InVTI.Pred VCCR:$pred))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
(InVTI.Vec MQPR:$Qm),
- ARMVCCThen, (InVTI.Pred VCCR:$pred)))>;
+ ARMVCCThen, (InVTI.Pred VCCR:$pred), zero_reg))>;
}
defm : MVE_VQMOVN_p<MVE_VQMOVNs32bh, 0, 0, 0, MVE_v8i16, MVE_v4i32>;
@@ -4939,7 +4963,7 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
dag iops_extra, vpred_ops vpred, string cstr>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
!con(iops_extra, (ins MQPR:$Qm)), "$Qd, $Qm",
- vpred, cstr, []> {
+ vpred, cstr, 0b10, []> {
let Inst{28} = op;
let Inst{21-16} = 0b111111;
let Inst{12} = T;
@@ -4968,7 +4992,7 @@ multiclass MVE_VCVT_f2h_m<string iname, int half> {
(v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half),
(v4i1 VCCR:$mask))),
(v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm),
- ARMVCCThen, (v4i1 VCCR:$mask)))>;
+ ARMVCCThen, (v4i1 VCCR:$mask), zero_reg))>;
def : Pat<(v8f16 (MVEvcvtn (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm), (i32 half))),
(v8f16 (Inst (v8f16 MQPR:$Qd_src), (v4f32 MQPR:$Qm)))>;
@@ -4986,7 +5010,7 @@ multiclass MVE_VCVT_h2f_m<string iname, int half> {
(v4f32 MQPR:$inactive), (v8f16 MQPR:$Qm), (i32 half),
(v4i1 VCCR:$mask))),
(v4f32 (Inst (v8f16 MQPR:$Qm), ARMVCCThen,
- (v4i1 VCCR:$mask), (v4f32 MQPR:$inactive)))>;
+ (v4i1 VCCR:$mask), zero_reg, (v4f32 MQPR:$inactive)))>;
def : Pat<(v4f32 (MVEvcvtl (v8f16 MQPR:$Qm), (i32 half))),
(v4f32 (Inst (v8f16 MQPR:$Qm)))>;
@@ -5002,7 +5026,7 @@ class MVE_VxCADD<string iname, string suffix, bits<2> size, bit halve,
string cstr="">
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot),
- "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> {
+ "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> {
bits<4> Qn;
bit rot;
@@ -5032,7 +5056,7 @@ multiclass MVE_VxCADD_m<string iname, MVEVectorVTInfo VTI,
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
- imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask),
+ imm:$rot, ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -5050,7 +5074,7 @@ class MVE_VADCSBC<string iname, bit I, bit subtract,
dag carryin, list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, "i32", (outs MQPR:$Qd, cl_FPSCR_NZCV:$carryout),
!con((ins MQPR:$Qn, MQPR:$Qm), carryin),
- "$Qd, $Qn, $Qm", vpred_r, "", pattern> {
+ "$Qd, $Qn, $Qm", vpred_r, "", 0b10, pattern> {
bits<4> Qn;
let Inst{28} = subtract;
@@ -5077,7 +5101,7 @@ class MVE_VQDMULL<string iname, string suffix, bit size, bit T,
string cstr="", list<dag> pattern=[]>
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
(ins MQPR:$Qn, MQPR:$Qm), "$Qd, $Qn, $Qm",
- vpred_r, cstr, pattern> {
+ vpred_r, cstr, !if(size, 0b10, 0b01), pattern> {
bits<4> Qn;
let Inst{28} = size;
@@ -5108,7 +5132,7 @@ multiclass MVE_VQDMULL_m<string iname, MVEVectorVTInfo VTI, bit size, bit T,
(i32 T), (VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -5125,10 +5149,9 @@ defm MVE_VQDMULLs32 : MVE_VQDMULL_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
// start of mve_qDest_rSrc
-class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
- string suffix, string ops, vpred_ops vpred, string cstr,
- list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, pattern> {
+class MVE_qr_base<dag oops, dag iops, string iname, string suffix, string ops,
+ vpred_ops vpred, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, iname, suffix, ops, vpred, cstr, vecsize, pattern> {
bits<4> Qd;
bits<4> Qn;
bits<4> Rm;
@@ -5144,19 +5167,19 @@ class MVE_qr_base<dag oops, dag iops, InstrItinClass itin, string iname,
let Inst{3-0} = Rm{3-0};
}
-class MVE_qDest_rSrc<string iname, string suffix, string cstr="", list<dag> pattern=[]>
+class MVE_qDest_rSrc<string iname, string suffix, string cstr="", bits<2> vecsize, list<dag> pattern=[]>
: MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm),
- NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
- pattern>;
+ iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr,
+ vecsize, pattern>;
-class MVE_qDestSrc_rSrc<string iname, string suffix, list<dag> pattern=[]>
+class MVE_qDestSrc_rSrc<string iname, string suffix, bits<2> vecsize, list<dag> pattern=[]>
: MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm),
- NoItinerary, iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
- pattern>;
+ iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src",
+ vecsize, pattern>;
-class MVE_qDest_single_rSrc<string iname, string suffix, list<dag> pattern=[]>
+class MVE_qDest_single_rSrc<string iname, string suffix, bits<2> vecsize, list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname,
- suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> {
+ suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", vecsize, pattern> {
bits<4> Qd;
bits<4> Rm;
@@ -5187,14 +5210,14 @@ multiclass MVE_vec_scalar_int_pat_m<Instruction inst, MVEVectorVTInfo VTI,
(pred_op (VTI.Pred VCCR:$mask),
(VTI.Vec MQPR:$inactive)))),
(VTI.Vec (inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
}
class MVE_VADDSUB_qr<string iname, string suffix, bits<2> size,
bit bit_5, bit bit_12, bit bit_16, bit bit_28>
- : MVE_qDest_rSrc<iname, suffix, ""> {
+ : MVE_qDest_rSrc<iname, suffix, "", size> {
let Inst{28} = bit_28;
let Inst{21-20} = size;
@@ -5262,7 +5285,7 @@ defm MVE_VQSUB_qr_u32 : MVE_VQSUB_qr_m<MVE_v4u32, usubsat>;
class MVE_VQDMULL_qr<string iname, string suffix, bit size,
bit T, string cstr="", list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, cstr, pattern> {
+ : MVE_qDest_rSrc<iname, suffix, cstr, !if(size, 0b10, 0b01), pattern> {
let Inst{28} = size;
let Inst{21-20} = 0b11;
@@ -5293,7 +5316,7 @@ multiclass MVE_VQDMULL_qr_m<string iname, MVEVectorVTInfo VTI, bit size,
(VTI.DblPred VCCR:$mask),
(VTI.DblVec MQPR:$inactive))),
(VTI.DblVec (Inst (VTI.Vec MQPR:$Qm), (i32 rGPR:$val),
- ARMVCCThen, (VTI.DblPred VCCR:$mask),
+ ARMVCCThen, (VTI.DblPred VCCR:$mask), zero_reg,
(VTI.DblVec MQPR:$inactive)))>;
}
}
@@ -5307,12 +5330,12 @@ defm MVE_VQDMULL_qr_s16 : MVE_VQDMULL_qr_halves<MVE_v8s16, 0b0>;
defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves<MVE_v4s32, 0b1, "@earlyclobber $Qd">;
class MVE_VxADDSUB_qr<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20, bit subtract,
- list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ bit bit_28, bits<2> size, bit subtract,
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, "", vecsize, pattern> {
let Inst{28} = bit_28;
- let Inst{21-20} = bits_21_20;
+ let Inst{21-20} = size;
let Inst{16} = 0b0;
let Inst{12} = subtract;
let Inst{8} = 0b1;
@@ -5322,7 +5345,7 @@ class MVE_VxADDSUB_qr<string iname, string suffix,
multiclass MVE_VHADDSUB_qr_m<string iname, MVEVectorVTInfo VTI, bit subtract,
Intrinsic unpred_int, Intrinsic pred_int> {
- def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract>;
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size, subtract, VTI.Size>;
defm : MVE_vec_scalar_int_pat_m<!cast<Instruction>(NAME),
VTI, unpred_int, pred_int, 1, 1>;
}
@@ -5351,7 +5374,7 @@ defm MVE_VHSUB_qr_u32 : MVE_VHSUB_qr_m<MVE_v4u32>;
multiclass MVE_VADDSUB_qr_f<string iname, MVEVectorVTInfo VTI, bit subtract,
SDNode Op, Intrinsic PredInt> {
- def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract>;
+ def "" : MVE_VxADDSUB_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, subtract, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, Op, PredInt, (? ),
!cast<Instruction>(NAME)>;
}
@@ -5370,7 +5393,7 @@ let Predicates = [HasMVEFloat] in {
class MVE_VxSHL_qr<string iname, string suffix, bit U, bits<2> size,
bit bit_7, bit bit_17, list<dag> pattern=[]>
- : MVE_qDest_single_rSrc<iname, suffix, pattern> {
+ : MVE_qDest_single_rSrc<iname, suffix, size, pattern> {
let Inst{28} = U;
let Inst{25-23} = 0b100;
@@ -5398,7 +5421,7 @@ multiclass MVE_VxSHL_qr_p<string iname, MVEVectorVTInfo VTI, bit q, bit r> {
(i32 q), (i32 r), (i32 VTI.Unsigned),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$in), (i32 rGPR:$sh),
- ARMVCCThen, (VTI.Pred VCCR:$mask)))>;
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
}
multiclass MVE_VxSHL_qr_types<string iname, bit bit_7, bit bit_17> {
@@ -5432,7 +5455,7 @@ let Predicates = [HasMVEInt] in {
}
class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ : MVE_qDest_rSrc<iname, suffix, "", size, pattern> {
let Inst{28} = 0b1;
let Inst{21-20} = size;
@@ -5457,7 +5480,7 @@ multiclass MVE_VBRSR_pat_m<MVEVectorVTInfo VTI, Instruction Inst> {
(VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
(VTI.Pred VCCR:$mask))),
(VTI.Vec (Inst (VTI.Vec MQPR:$Qn), (i32 rGPR:$Rm),
- ARMVCCThen, (VTI.Pred VCCR:$mask),
+ ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
}
@@ -5482,7 +5505,7 @@ let Predicates = [HasMVEFloat] in {
}
class MVE_VMUL_qr_int<string iname, string suffix, bits<2> size>
- : MVE_qDest_rSrc<iname, suffix, ""> {
+ : MVE_qDest_rSrc<iname, suffix, "", size> {
let Inst{28} = 0b0;
let Inst{21-20} = size;
@@ -5506,11 +5529,11 @@ defm MVE_VMUL_qr_i16 : MVE_VMUL_qr_int_m<MVE_v8i16>;
defm MVE_VMUL_qr_i32 : MVE_VMUL_qr_int_m<MVE_v4i32>;
class MVE_VxxMUL_qr<string iname, string suffix,
- bit bit_28, bits<2> bits_21_20, list<dag> pattern=[]>
- : MVE_qDest_rSrc<iname, suffix, "", pattern> {
+ bit bit_28, bits<2> size, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_qDest_rSrc<iname, suffix, "", vecsize, pattern> {
let Inst{28} = bit_28;
- let Inst{21-20} = bits_21_20;
+ let Inst{21-20} = size;
let Inst{16} = 0b1;
let Inst{12} = 0b0;
let Inst{8} = 0b0;
@@ -5520,7 +5543,7 @@ class MVE_VxxMUL_qr<string iname, string suffix,
multiclass MVE_VxxMUL_qr_m<string iname, MVEVectorVTInfo VTI, bit bit_28,
PatFrag Op, Intrinsic int_unpred, Intrinsic int_pred> {
- def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size>;
+ def "" : MVE_VxxMUL_qr<iname, VTI.Suffix, bit_28, VTI.Size, VTI.Size>;
let Predicates = [HasMVEInt] in {
defm : MVE_TwoOpPatternDup<VTI, Op, int_pred, (? ), !cast<Instruction>(NAME)>;
@@ -5546,7 +5569,7 @@ defm MVE_VQRDMULH_qr_s32 : MVE_VQRDMULH_qr_m<MVE_v4s32>;
multiclass MVE_VxxMUL_qr_f_m<MVEVectorVTInfo VTI> {
let validForTailPredication = 1 in
- def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>;
+ def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>;
defm : MVE_TwoOpPatternDup<VTI, fmul, int_arm_mve_mul_predicated, (? ),
!cast<Instruction>(NAME)>;
}
@@ -5558,8 +5581,8 @@ let Predicates = [HasMVEFloat] in {
class MVE_VFMAMLA_qr<string iname, string suffix,
bit bit_28, bits<2> bits_21_20, bit S,
- list<dag> pattern=[]>
- : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+ bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_qDestSrc_rSrc<iname, suffix, vecsize, pattern> {
let Inst{28} = bit_28;
let Inst{21-20} = bits_21_20;
@@ -5574,7 +5597,7 @@ class MVE_VFMAMLA_qr<string iname, string suffix,
multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
bit scalar_addend> {
def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Unsigned, VTI.Size,
- scalar_addend>;
+ scalar_addend, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
defvar pred_int = !cast<Intrinsic>("int_arm_mve_" # iname # "_n_predicated");
defvar v1 = (VTI.Vec MQPR:$v1);
@@ -5596,7 +5619,7 @@ multiclass MVE_VMLA_qr_multi<string iname, MVEVectorVTInfo VTI,
}
def : Pat<(VTI.Vec (pred_int v1, v2, s, pred)),
- (VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v1, v2, s, ARMVCCThen, pred, zero_reg))>;
}
}
@@ -5616,7 +5639,7 @@ defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>;
multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
bit scalar_addend> {
- def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend>;
+ def "": MVE_VFMAMLA_qr<iname, VTI.Suffix, VTI.Size{0}, 0b11, scalar_addend, VTI.Size>;
defvar Inst = !cast<Instruction>(NAME);
defvar pred_int = int_arm_mve_fma_predicated;
defvar v1 = (VTI.Vec MQPR:$v1);
@@ -5632,9 +5655,9 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma v1, v2, vs)),
v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred, zero_reg))>;
} else {
def : Pat<(VTI.Vec (fma v1, vs, v2)),
(VTI.Vec (Inst v2, v1, is))>;
@@ -5643,15 +5666,15 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma vs, v2, v1)),
v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
(VTI.Vec (fma v2, vs, v1)),
v1)),
- (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
+ (VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred, zero_reg))>;
def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
- (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred, zero_reg))>;
def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),
- (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
+ (VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred, zero_reg))>;
}
}
}
@@ -5665,7 +5688,7 @@ let Predicates = [HasMVEFloat] in {
class MVE_VQDMLAH_qr<string iname, string suffix, bit U, bits<2> size,
bit bit_5, bit bit_12, list<dag> pattern=[]>
- : MVE_qDestSrc_rSrc<iname, suffix, pattern> {
+ : MVE_qDestSrc_rSrc<iname, suffix, size, pattern> {
let Inst{28} = U;
let Inst{21-20} = size;
@@ -5691,7 +5714,7 @@ multiclass MVE_VQDMLAH_qr_multi<string iname, MVEVectorVTInfo VTI,
(i32 rGPR:$s), (VTI.Pred VCCR:$pred))),
(VTI.Vec (Inst (VTI.Vec MQPR:$v1), (VTI.Vec MQPR:$v2),
(i32 rGPR:$s), ARMVCCThen,
- (VTI.Pred VCCR:$pred)))>;
+ (VTI.Pred VCCR:$pred), zero_reg))>;
}
}
@@ -5710,7 +5733,7 @@ class MVE_VxDUP<string iname, string suffix, bits<2> size, bit bit_12,
ValueType VT, SDPatternOperator vxdup>
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
(ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary,
- iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src",
+ iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src", size,
[(set (VT MQPR:$Qd), (i32 tGPREven:$Rn),
(vxdup (i32 tGPREven:$Rn_src), (i32 imm:$imm)))]> {
bits<4> Qd;
@@ -5745,7 +5768,7 @@ class MVE_VxWDUP<string iname, string suffix, bits<2> size, bit bit_12,
list<dag> pattern=[]>
: MVE_p<(outs MQPR:$Qd, tGPREven:$Rn),
(ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary,
- iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src",
+ iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src", size,
pattern> {
bits<4> Qd;
bits<4> Rm;
@@ -5780,7 +5803,7 @@ def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
let isReMaterializable = 1 in
class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
- "$Rn", vpred_n, "", pattern> {
+ "$Rn", vpred_n, "", size, pattern> {
bits<4> Rn;
let Inst{28-27} = 0b10;
@@ -5804,7 +5827,7 @@ multiclass MVE_VCTP<MVEVectorVTInfo VTI, Intrinsic intr> {
def : Pat<(intr rGPR:$Rn),
(VTI.Pred (Inst rGPR:$Rn))>;
def : Pat<(and (intr rGPR:$Rn), (VTI.Pred VCCR:$mask)),
- (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask))>;
+ (VTI.Pred (Inst rGPR:$Rn, ARMVCCThen, VCCR:$mask, zero_reg))>;
}
}
@@ -5837,6 +5860,7 @@ class MVE_VMOV_64bit<dag oops, dag iops, bit to_qreg, string ops, string cstr>
let Inst{4} = idx2;
let Inst{3-0} = Rt{3-0};
+ let VecSize = 0b10;
let hasSideEffects = 0;
}
@@ -5925,7 +5949,7 @@ class MVE_vldst24_base<bit writeback, bit fourregs, bits<2> stage, bits<2> size,
bit load, dag Oops, dag loadIops, dag wbIops,
string iname, string ops,
string cstr, list<dag> pattern=[]>
- : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, pattern> {
+ : MVE_MI<Oops, !con(loadIops, wbIops), NoItinerary, iname, ops, cstr, size, pattern> {
bits<4> VQd;
bits<4> Rn;
@@ -6037,13 +6061,13 @@ multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
def : Pat<(int_arm_mve_vst2q i32:$addr,
(VT MQPR:$v0), (VT MQPR:$v1), (i32 stage)),
(!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize)
- (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ (REG_SEQUENCE MQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
t2_addr_offset_none:$addr)>;
foreach stage = [0,1] in
def : Pat<(i32 (MVEVST2UPD i32:$addr, (i32 32),
(VT MQPR:$v0), (VT MQPR:$v1), (i32 stage))),
(i32 (!cast<Instruction>("MVE_VST2"#stage#"_"#lanesize#_wb)
- (REG_SEQUENCE QQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
+ (REG_SEQUENCE MQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1),
t2_addr_offset_none:$addr))>;
foreach stage = [0,1,2,3] in
@@ -6051,16 +6075,16 @@ multiclass MVE_vst24_patterns<int lanesize, ValueType VT> {
(VT MQPR:$v0), (VT MQPR:$v1),
(VT MQPR:$v2), (VT MQPR:$v3), (i32 stage)),
(!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize)
- (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
- VT:$v2, qsub_2, VT:$v3, qsub_3),
+ (REG_SEQUENCE MQQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
t2_addr_offset_none:$addr)>;
foreach stage = [0,1,2,3] in
def : Pat<(i32 (MVEVST4UPD i32:$addr, (i32 64),
(VT MQPR:$v0), (VT MQPR:$v1),
(VT MQPR:$v2), (VT MQPR:$v3), (i32 stage))),
(i32 (!cast<Instruction>("MVE_VST4"#stage#"_"#lanesize#_wb)
- (REG_SEQUENCE QQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
- VT:$v2, qsub_2, VT:$v3, qsub_3),
+ (REG_SEQUENCE MQQQQPR, VT:$v0, qsub_0, VT:$v1, qsub_1,
+ VT:$v2, qsub_2, VT:$v3, qsub_3),
t2_addr_offset_none:$addr))>;
}
defm : MVE_vst24_patterns<8, v16i8>;
@@ -6123,8 +6147,8 @@ def MVE_memD: MVE_memsz<0b11, 3, ?, "d", ["", "u", "s", "f"]>;
// input values.
class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
dag oops, dag iops, string asm, string suffix,
- string ops, string cstr, list<dag> pattern=[]>
- : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, pattern> {
+ string ops, string cstr, bits<2> vecsize, list<dag> pattern=[]>
+ : MVE_p<oops, iops, NoItinerary, asm, suffix, ops, vpred_n, cstr, vecsize, pattern> {
bits<3> Qd;
let Inst{28} = U;
@@ -6160,12 +6184,14 @@ class MVE_VLDRSTR_base<MVE_ldst_direction dir, bit U, bit P, bit W, bit opc,
class MVE_VLDRSTR_cs<MVE_ldst_direction dir, MVE_memsz memsz, bit P, bit W,
dag oops, dag iops, string asm, string suffix,
IndexMode im, string ops, string cstr>
- : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr> {
+ : MVE_VLDRSTR_base<dir, 0, P, W, 1, oops, iops, asm, suffix, ops, cstr, memsz.encoding> {
bits<12> addr;
let Inst{23} = addr{7};
let Inst{19-16} = addr{11-8};
let Inst{8-7} = memsz.encoding;
let Inst{6-0} = addr{6-0};
+
+ let IM = im;
}
// Contiguous, widening/narrowing
@@ -6173,7 +6199,7 @@ class MVE_VLDRSTR_cw<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
bit P, bit W, bits<2> size, dag oops, dag iops,
string asm, string suffix, IndexMode im,
string ops, string cstr>
- : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr> {
+ : MVE_VLDRSTR_base<dir, U, P, W, 0, oops, iops, asm, suffix, ops, cstr, size> {
bits<11> addr;
let Inst{23} = addr{7};
let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit
@@ -6290,7 +6316,7 @@ class MVE_VLDRSTR_rq<MVE_ldst_direction dir, MVE_memsz memsz, bit U,
bits<2> size, bit os, string asm, string suffix, int shift>
: MVE_VLDRSTR_base<dir, U, 0b0, 0b0, 0, dir.Oops,
!con(dir.Iops, (ins mve_addr_rq_shift<shift>:$addr)),
- asm, suffix, "$Qd, $addr", dir.cstr> {
+ asm, suffix, "$Qd, $addr", dir.cstr, size> {
bits<7> addr;
let Inst{23} = 0b1;
let Inst{19-16} = addr{6-3};
@@ -6336,9 +6362,9 @@ multiclass MVE_VLDR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag)),
(VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, 0, UnsignedFlag, (VTI.Pred VCCR:$pred))),
- (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ (VTI.Vec (InstU GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg))>;
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), memsz.TypeBits, memsz.shift, UnsignedFlag, (VTI.Pred VCCR:$pred))),
- (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
@@ -6350,7 +6376,7 @@ multiclass MVE_VLDR_rq_b<list<MVEVectorVTInfo> VTIs> {
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned)),
(VTI.Vec (Inst GPR:$base, MQPR:$offsets))>;
def : Pat<(VTI.Vec (int_arm_mve_vldr_gather_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), 8, 0, VTI.Unsigned, (VTI.Pred VCCR:$pred))),
- (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred))>;
+ (VTI.Vec (Inst GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
@@ -6365,9 +6391,9 @@ multiclass MVE_VSTR_rq_w<MVE_memsz memsz, list<MVEVectorVTInfo> VTIs> {
def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift),
(Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, 0, (VTI.Pred VCCR:$pred)),
- (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ (InstU MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), memsz.TypeBits, memsz.shift, (VTI.Pred VCCR:$pred)),
- (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg)>;
}
}
multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
@@ -6379,7 +6405,7 @@ multiclass MVE_VSTR_rq_b<list<MVEVectorVTInfo> VTIs> {
def : Pat<(int_arm_mve_vstr_scatter_offset GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0),
(Inst MQPR:$data, GPR:$base, MQPR:$offsets)>;
def : Pat<(int_arm_mve_vstr_scatter_offset_predicated GPR:$base, (VTIs[0].Vec MQPR:$offsets), (VTI.Vec MQPR:$data), 8, 0, (VTI.Pred VCCR:$pred)),
- (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred)>;
+ (Inst MQPR:$data, GPR:$base, MQPR:$offsets, ARMVCCThen, VCCR:$pred, zero_reg)>;
}
}
@@ -6423,7 +6449,7 @@ class MVE_VLDRSTR_qi<MVE_ldst_direction dir, MVE_memsz memsz, bit W, dag wbops,
string asm, string wbAsm, string suffix, string cstr = "">
: MVE_VLDRSTR_base<dir, 1, 1, W, 1, !con(wbops, dir.Oops),
!con(dir.Iops, (ins mve_addr_q_shift<memsz.shift>:$addr)),
- asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> {
+ asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr, memsz.encoding> {
bits<11> addr;
let Inst{23} = addr{7};
let Inst{19-17} = addr{10-8};
@@ -6460,7 +6486,7 @@ multiclass MVE_VLDR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
def : Pat<(DVTI.Vec (int_arm_mve_vldr_gather_base_predicated
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (AVTI.Pred VCCR:$pred))),
(DVTI.Vec (Inst (AVTI.Vec MQPR:$addr), (i32 imm:$offset),
- ARMVCCThen, VCCR:$pred))>;
+ ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
@@ -6478,7 +6504,7 @@ multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
def : Pat<(int_arm_mve_vstr_scatter_base_predicated
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred)),
(Inst (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
- (i32 imm:$offset), ARMVCCThen, VCCR:$pred)>;
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data))),
(AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
@@ -6486,7 +6512,7 @@ multiclass MVE_VSTR_qi<MVE_memsz memsz, MVEVectorVTInfo AVTI,
def : Pat<(AVTI.Vec (int_arm_mve_vstr_scatter_base_wb_predicated
(AVTI.Vec MQPR:$addr), (i32 imm:$offset), (DVTI.Vec MQPR:$data), (AVTI.Pred VCCR:$pred))),
(AVTI.Vec (InstPre (DVTI.Vec MQPR:$data), (AVTI.Vec MQPR:$addr),
- (i32 imm:$offset), ARMVCCThen, VCCR:$pred))>;
+ (i32 imm:$offset), ARMVCCThen, VCCR:$pred, zero_reg))>;
}
}
@@ -6532,7 +6558,7 @@ foreach suffix = memsz.suffixes in {
// end of MVE predicable load/store
class MVE_VPT<string suffix, bits<2> size, dag iops, string asm, list<dag> pattern=[]>
- : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> {
+ : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", size, pattern> {
bits<3> fc;
bits<4> Mk;
bits<3> Qn;
@@ -6642,7 +6668,7 @@ def MVE_VPTv16s8r : MVE_VPTt2s<"s8", 0b00>;
class MVE_VPTf<string suffix, bit size, dag iops, string asm, list<dag> pattern=[]>
: MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm,
- "", pattern> {
+ "", !if(size, 0b01, 0b10), pattern> {
bits<3> fc;
bits<4> Mk;
bits<3> Qn;
@@ -6695,7 +6721,7 @@ def MVE_VPTv4f32r : MVE_VPTft2<"f32", 0b0>;
def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>;
def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
- !strconcat("vpst", "${Mk}"), "", "", []> {
+ !strconcat("vpst", "${Mk}"), "", "", 0b00, []> {
bits<4> Mk;
let Inst{31-23} = 0b111111100;
@@ -6712,7 +6738,7 @@ def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary,
}
def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary,
- "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> {
+ "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", 0b00, []> {
bits<4> Qn;
bits<4> Qd;
bits<4> Qm;
@@ -6741,71 +6767,71 @@ def : MVEInstAlias<"vpsel${vp}." # suffix # "\t$Qd, $Qn, $Qm",
let Predicates = [HasMVEInt] in {
def : Pat<(v16i8 (vselect (v16i1 VCCR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
- (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
- (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
- (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8f16 (vselect (v8i1 VCCR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
- (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4f32 (vselect (v4i1 VCCR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
- (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))),
(v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>;
+ (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne), zero_reg))>;
def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))),
(v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne), zero_reg))>;
def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))),
(v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne), zero_reg))>;
def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))),
(v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>;
+ (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne), zero_reg))>;
def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))),
(v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, ARMVCCNone,
- (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>;
+ (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne), zero_reg))>;
// Pred <-> Int
def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (zext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (zext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (sext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (sext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (sext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi8 255), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v16i8 (anyext (v16i1 VCCR:$pred))),
- (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred))>;
+ (v16i8 (MVE_VPSEL (MVE_VMOVimmi8 1), (MVE_VMOVimmi8 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v8i16 (anyext (v8i1 VCCR:$pred))),
- (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred))>;
+ (v8i16 (MVE_VPSEL (MVE_VMOVimmi16 1), (MVE_VMOVimmi16 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i32 (anyext (v4i1 VCCR:$pred))),
- (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred))>;
+ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), ARMVCCNone, VCCR:$pred, zero_reg))>;
}
let Predicates = [HasMVEFloat] in {
// Pred <-> Float
// 112 is 1.0 in float
def : Pat<(v4f32 (uint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 112)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
// 2620 in 1.0 in half
def : Pat<(v8f16 (uint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2620)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
// 240 is -1.0 in float
def : Pat<(v4f32 (sint_to_fp (v4i1 VCCR:$pred))),
- (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred))>;
+ (v4f32 (MVE_VPSEL (v4f32 (MVE_VMOVimmf32 240)), (v4f32 (MVE_VMOVimmi32 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
// 2748 is -1.0 in half
def : Pat<(v8f16 (sint_to_fp (v8i1 VCCR:$pred))),
- (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred))>;
+ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), ARMVCCNone, VCCR:$pred, zero_reg))>;
def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))),
(v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>;
@@ -6818,7 +6844,7 @@ let Predicates = [HasMVEFloat] in {
}
def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary,
- "vpnot", "", "", vpred_n, "", []> {
+ "vpnot", "", "", vpred_n, "", 0b00, []> {
let Inst{31-0} = 0b11111110001100010000111101001101;
let Unpredictable{19-17} = 0b111;
let Unpredictable{12} = 0b1;
@@ -6930,6 +6956,37 @@ def MVE_LCTP : MVE_loltp_end<(outs), (ins pred:$p), "lctp${p}", ""> {
}
+// Pseudo instructions for lowering MQQPR and MQQQQPR stack spills and reloads.
+// They are equivalent to VLDMDIA/VSTMDIA with a single reg, as opposed to multiple
+// dreg subregs.
+
+let Predicates = [HasMVEInt], AM = AddrMode4 in {
+let mayStore = 1, hasSideEffects = 0 in {
+ def MQQPRStore : t2PseudoInst<(outs), (ins MQQPR:$val, GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+ def MQQQQPRStore : t2PseudoInst<(outs), (ins MQQQQPR:$val, GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+}
+let mayLoad = 1, hasSideEffects = 0 in {
+ def MQQPRLoad : t2PseudoInst<(outs MQQPR:$val), (ins GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+ def MQQQQPRLoad : t2PseudoInst<(outs MQQQQPR:$val), (ins GPRnopc:$ptr),
+ 4, NoItinerary, []>;
+}
+}
+
+// Pseudo for lowering MVE Q register COPYs. These will usually get converted
+// to a "MVE_VORR dst, src, src", but may behave differently in tail predicated
+// loops to ensure the whole register is copied, not a subset from a
+// tail-predicated MVE_VORR. In the event we cannot prove a MVE_VORR is valid,
+// it will become a pair of VMOVD instructions for each half of the Q register.
+let Predicates = [HasMVEInt], hasSideEffects = 0, isMoveReg = 1,
+ D = MVEDomain in {
+ def MQPRCopy : t2PseudoInst<(outs MQPR:$dst), (ins MQPR:$src),
+ 8, NoItinerary, []>;
+}
+
+
//===----------------------------------------------------------------------===//
// Patterns
//===----------------------------------------------------------------------===//
@@ -7142,7 +7199,7 @@ class MVE_vector_store_typed<ValueType Ty, Instruction RegImmInst,
class MVE_vector_maskedstore_typed<ValueType Ty, Instruction RegImmInst,
PatFrag StoreKind, int shift>
: Pat<(StoreKind (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, VCCR:$pred),
- (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (RegImmInst (Ty MQPR:$val), t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
multiclass MVE_vector_store<Instruction RegImmInst, PatFrag StoreKind,
int shift> {
@@ -7163,7 +7220,7 @@ class MVE_vector_load_typed<ValueType Ty, Instruction RegImmInst,
class MVE_vector_maskedload_typed<ValueType Ty, Instruction RegImmInst,
PatFrag LoadKind, int shift>
: Pat<(Ty (LoadKind t2addrmode_imm7<shift>:$addr, VCCR:$pred, (Ty (ARMvmovImm (i32 0))))),
- (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (Ty (RegImmInst t2addrmode_imm7<shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
multiclass MVE_vector_load<Instruction RegImmInst, PatFrag LoadKind,
int shift> {
@@ -7184,7 +7241,7 @@ class MVE_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
class MVE_vector_offset_maskedstore_typed<ValueType Ty, Instruction Opcode,
PatFrag StoreKind, int shift>
: Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr, VCCR:$pred),
- (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
multiclass MVE_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
int shift> {
@@ -7314,11 +7371,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
// Masked trunc stores
def : Pat<(!cast<PatFrag>("aligned_truncmaskedst"#Amble) (VT MQPR:$val), taddrmode_imm7<Shift>:$addr, VCCR:$pred),
- (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (!cast<Instruction>(StoreInst) MQPR:$val, taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(!cast<PatFrag>("aligned_post_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
- (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (!cast<Instruction>(StoreInst#"_post") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
def : Pat<(!cast<PatFrag>("aligned_pre_truncmaskedst"#Amble) (VT MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, VCCR:$pred),
- (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred)>;
+ (!cast<Instruction>(StoreInst#"_pre") MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg)>;
// Ext loads
def : Pat<(VT (!cast<PatFrag>("aligned_extload"#Amble) taddrmode_imm7<Shift>:$addr)),
@@ -7330,11 +7387,11 @@ multiclass MVEExtLoadStore<Instruction LoadSInst, Instruction LoadUInst, string
// Masked ext loads
def : Pat<(VT (!cast<PatFrag>("aligned_extmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
- (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
def : Pat<(VT (!cast<PatFrag>("aligned_sextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
- (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (VT (LoadSInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
def : Pat<(VT (!cast<PatFrag>("aligned_zextmaskedload"#Amble) taddrmode_imm7<Shift>:$addr, VCCR:$pred, (VT (ARMvmovImm (i32 0))))),
- (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred))>;
+ (VT (LoadUInst taddrmode_imm7<Shift>:$addr, ARMVCCThen, VCCR:$pred, zero_reg))>;
}
let Predicates = [HasMVEInt] in {
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 3ca6704c17b9..aaf3280ea150 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -2735,8 +2735,11 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
- (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+ (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin, OpcodeStr, Dt,
+ [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp>
@@ -2789,19 +2792,22 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
- [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
// Same as N3VQIntnp but with Vd as a src register.
class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, Format f, InstrItinClass itin, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy,
- SDPatternOperator IntOp, bit Commutable>
+ SDPatternOperator IntOp>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
f, itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
(OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
+ let isCommutable = 0;
}
class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -3118,7 +3124,10 @@ class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+ [(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+ let isCommutable = Commutable;
+}
+
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
@@ -4041,7 +4050,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- string baseOpc, SDNode OpNode> {
+ SDNode OpNode> {
// 64-bit vector types.
def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
@@ -4987,7 +4996,7 @@ class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin,
}
multiclass N3VCP8ComplexTied<bit op21, bit op4,
- string OpcodeStr, SDPatternOperator Op> {
+ string OpcodeStr> {
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd),
(ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot),
@@ -5007,7 +5016,7 @@ multiclass N3VCP8ComplexTied<bit op21, bit op4,
}
multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
- string OpcodeStr, SDPatternOperator Op> {
+ string OpcodeStr> {
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD,
(outs DPR:$Vd),
@@ -5032,8 +5041,7 @@ multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4,
// These instructions index by pairs of lanes, so the VectorIndexes are twice
// as wide as the data types.
-multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
- SDPatternOperator Op> {
+multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr> {
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD,
(outs DPR:$Vd),
@@ -5060,9 +5068,9 @@ multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr,
}
}
-defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>;
-defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>;
-defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>;
+defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla">;
+defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd">;
+defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla">;
let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in {
def : Pat<(v4f16 (int_arm_neon_vcadd_rot90 (v4f16 DPR:$Rn), (v4f16 DPR:$Rm))),
@@ -5991,9 +5999,9 @@ def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
+defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",
ARMvshrsImm>;
-defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
+defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",
ARMvshruImm>;
// VSHLL : Vector Shift Left Long
@@ -6061,9 +6069,9 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
"vrshl", "u", int_arm_neon_vrshiftu>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
+defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",
NEONvrshrsImm>;
-defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
+defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",
NEONvrshruImm>;
// VRSHRN : Vector Rounding Shift Right and Narrow
@@ -6438,6 +6446,18 @@ def : Pat<(ARMvgetlaneu (v8i16 QPR:$src), imm:$lane),
(VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
(DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v8f16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4f16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v4f16 DPR:$src), imm:$lane),
+ (VGETLNu16 (v4f16 DPR:$src), imm:$lane)>;
+def : Pat<(ARMvgetlaneu (v8bf16 QPR:$src), imm:$lane),
+ (VGETLNu16 (v4bf16 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_i16_reg imm:$lane))),
+ (SubReg_i16_lane imm:$lane))>;
+def : Pat<(ARMvgetlaneu (v4bf16 DPR:$src), imm:$lane),
+ (VGETLNu16 (v4bf16 DPR:$src), imm:$lane)>;
}
def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
@@ -7074,7 +7094,7 @@ class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
- (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
+ (ins QPR:$Vn, QPR:$Vm, immTy:$index), NVExtFrm,
IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
[(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
(Ty QPR:$Vm), imm:$index)))]> {
@@ -7337,7 +7357,7 @@ let PostEncoderMethod = "NEONThumb2DataIPostEncoder",
!strconcat("sha", op), "32", v4i32, v4i32, Int>;
class N3SHA3Op<string op, bits<5> op27_23, bits<2> op21_20, SDPatternOperator Int>
: N3VQInt3np<op27_23, op21_20, 0b1100, 1, 0, N3RegFrm, NoItinerary,
- !strconcat("sha", op), "32", v4i32, v4i32, Int, 0>;
+ !strconcat("sha", op), "32", v4i32, v4i32, Int>;
}
let Predicates = [HasV8, HasAES] in {
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index ef07b2839bc9..bf717a4056e9 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -168,6 +168,7 @@ def thumb_cb_target : Operand<OtherVT> {
let EncoderMethod = "getThumbCBTargetOpValue";
let DecoderMethod = "DecodeThumbCmpBROperand";
}
+} // OperandType = "OPERAND_PCREL"
// t_addrmode_pc := <label> => pc + imm8 * 4
//
@@ -177,7 +178,6 @@ def t_addrmode_pc : MemOperand {
let PrintMethod = "printThumbLdrLabelOperand";
let ParserMatchClass = ThumbMemPC;
}
-}
// t_addrmode_rr := reg + reg
//
@@ -1520,6 +1520,7 @@ def tTBH_JT : tPseudoInst<(outs),
let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
[(set R0, ARMthread_pointer)]>,
+ Requires<[IsThumb, IsReadTPSoft]>,
Sched<[WriteBr]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index e7eed2a0bbb1..783db9dde17f 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -200,7 +200,7 @@ def t2addrmode_imm12 : MemOperand,
}
// t2ldrlabel := imm12
-def t2ldrlabel : Operand<i32> {
+def t2ldrlabel : MemOperand {
let EncoderMethod = "getAddrModeImm12OpValue";
let PrintMethod = "printThumbLdrLabelOperand";
}
@@ -1927,7 +1927,7 @@ def : InstAlias<"pli${p}.w\t$addr",
// pci variant is very similar to i12, but supports negative offsets
// from the PC. Only PLD and PLI have pci variants (not PLDW)
-class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
+class T2Iplpci<bits<1> inst, string opc> : T2Ipc<(outs), (ins t2ldrlabel:$addr),
IIC_Preload, opc, "\t$addr",
[(ARMPreload (ARMWrapper tconstpool:$addr),
(i32 0), (i32 inst))]>, Sched<[WritePreLd]> {
@@ -4274,8 +4274,9 @@ def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
//===----------------------------------------------------------------------===//
// Coprocessor load/store -- for disassembly only
//
-class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm, list<dag> pattern>
- : T2I<oops, iops, NoItinerary, opc, asm, pattern> {
+class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm,
+ list<dag> pattern, AddrMode am = AddrModeNone>
+ : T2I<oops, iops, NoItinerary, opc, asm, pattern, am> {
let Inst{31-28} = op31_28;
let Inst{27-25} = 0b110;
}
@@ -4283,7 +4284,7 @@ class T2CI<bits<4> op31_28, dag oops, dag iops, string opc, string asm, list<dag
multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag> pattern> {
def _OFFSET : T2CI<op31_28,
(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr),
- asm, "\t$cop, $CRd, $addr", pattern> {
+ asm, "\t$cop, $CRd, $addr", pattern, AddrMode5> {
bits<13> addr;
bits<4> cop;
bits<4> CRd;
@@ -4670,6 +4671,9 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
}
+// Reading thread pointer from coprocessor register
+def : T2Pat<(ARMthread_pointer), (t2MRC 15, 0, 13, 0, 3)>,
+ Requires<[IsThumb2, IsReadTPHard]>;
//===----------------------------------------------------------------------===//
// ARMv8.1 Privilege Access Never extension
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index bcd6433a579b..9d1bfa414dff 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1600,9 +1600,13 @@ def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
+ def : VFPPat<(i32 (fp_to_sint_sat (f64 DPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
+ def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
+ (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
}
def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
@@ -1619,10 +1623,15 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
(COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
+def : VFPPat<(i32 (fp_to_sint_sat SPR:$a, i32)),
+ (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
+def : VFPPat<(alignedstore32 (i32 (fp_to_sint_sat (f32 SPR:$a), i32)),
+ addrmode5:$ptr),
+ (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
(outs SPR:$Sd), (ins HPR:$Sm),
@@ -1635,6 +1644,8 @@ def VTOSIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1101, 0b1001,
def : VFPNoNEONPat<(i32 (fp_to_sint (f16 HPR:$a))),
(COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>;
+def : VFPPat<(i32 (fp_to_sint_sat (f16 HPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOSIZH (f16 HPR:$a)), GPR)>;
def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$Sd), (ins DPR:$Dm),
@@ -1647,9 +1658,13 @@ def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
let Predicates=[HasVFP2, HasDPVFP] in {
def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
(COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
+ def : VFPPat<(i32 (fp_to_uint_sat (f64 DPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
(VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
+ def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f64 DPR:$a), i32)), addrmode5:$ptr),
+ (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
}
def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
@@ -1666,10 +1681,15 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
(COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
+def : VFPPat<(i32 (fp_to_uint_sat SPR:$a, i32)),
+ (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
addrmode5:$ptr),
(VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
+def : VFPPat<(alignedstore32 (i32 (fp_to_uint_sat (f32 SPR:$a), i32)),
+ addrmode5:$ptr),
+ (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
(outs SPR:$Sd), (ins HPR:$Sm),
@@ -1682,6 +1702,8 @@ def VTOUIZH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001,
def : VFPNoNEONPat<(i32 (fp_to_uint (f16 HPR:$a))),
(COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>;
+def : VFPPat<(i32 (fp_to_uint_sat (f16 HPR:$a), i32)),
+ (COPY_TO_REGCLASS (VTOUIZH (f16 HPR:$a)), GPR)>;
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
let Uses = [FPSCR] in {
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index fd06bfdf352c..6e259b1baf97 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -564,7 +564,7 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
}
// End of block was reached.
- if (MBB.succ_size() > 0) {
+ if (!MBB.succ_empty()) {
// FIXME: Because of a bug, live registers are sometimes missing from
// the successor blocks' live-in sets. This means we can't trust that
// information and *always* have to reset at the end of a block.
@@ -587,7 +587,7 @@ unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
}
for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
- if (!LiveRegs.contains(Reg))
+ if (LiveRegs.available(MF->getRegInfo(), Reg))
return Reg;
return 0;
}
@@ -2476,8 +2476,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
}
} else {
for (unsigned i = 0; i != NumMove; ++i) {
- MachineInstr *Op = Ops.back();
- Ops.pop_back();
+ MachineInstr *Op = Ops.pop_back_val();
MBB->splice(InsertPos, MBB, Op);
}
}
@@ -2811,6 +2810,7 @@ static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
.addImm(Offset)
.add(MI->getOperand(3))
.add(MI->getOperand(4))
+ .add(MI->getOperand(5))
.cloneMemRefs(*MI);
case ARMII::AddrModeT2_i8:
if (MI->mayLoad()) {
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index ea41442857f3..3874db5792d6 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -97,7 +97,15 @@ static bool isDomainMVE(MachineInstr *MI) {
return Domain == ARMII::DomainMVE;
}
+static int getVecSize(const MachineInstr &MI) {
+ const MCInstrDesc &MCID = MI.getDesc();
+ uint64_t Flags = MCID.TSFlags;
+ return (Flags & ARMII::VecSize) >> ARMII::VecSizeShift;
+}
+
static bool shouldInspect(MachineInstr &MI) {
+ if (MI.isDebugInstr())
+ return false;
return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI);
}
@@ -368,9 +376,11 @@ namespace {
MachineInstr *Dec = nullptr;
MachineInstr *End = nullptr;
MachineOperand TPNumElements;
- SmallVector<MachineInstr*, 4> VCTPs;
- SmallPtrSet<MachineInstr*, 4> ToRemove;
- SmallPtrSet<MachineInstr*, 4> BlockMasksToRecompute;
+ SmallVector<MachineInstr *, 4> VCTPs;
+ SmallPtrSet<MachineInstr *, 4> ToRemove;
+ SmallPtrSet<MachineInstr *, 4> BlockMasksToRecompute;
+ SmallPtrSet<MachineInstr *, 4> DoubleWidthResultInstrs;
+ SmallPtrSet<MachineInstr *, 4> VMOVCopies;
bool Revert = false;
bool CannotTailPredicate = false;
@@ -730,6 +740,20 @@ bool LowOverheadLoop::ValidateTailPredicate() {
return false;
}
+ // For any DoubleWidthResultInstrs we found whilst scanning instructions, they
+ // need to compute an output size that is smaller than the VCTP mask operates
+ // on. The VecSize of the DoubleWidthResult is the larger vector size - the
+ // size it extends into, so any VCTP VecSize <= is valid.
+ unsigned VCTPVecSize = getVecSize(*VCTP);
+ for (MachineInstr *MI : DoubleWidthResultInstrs) {
+ unsigned InstrVecSize = getVecSize(*MI);
+ if (InstrVecSize > VCTPVecSize) {
+ LLVM_DEBUG(dbgs() << "ARM Loops: Double width result larger than VCTP "
+ << "VecSize:\n" << *MI);
+ return false;
+ }
+ }
+
// Check that the value change of the element count is what we expect and
// that the predication will be equivalent. For this we need:
// NumElements = NumElements - VectorWidth. The sub will be a sub immediate
@@ -880,6 +904,10 @@ static bool producesFalseLanesZero(MachineInstr &MI,
continue;
if (!isRegInClass(MO, QPRs) && AllowScalars)
continue;
+ // Skip the lr predicate reg
+ int PIdx = llvm::findFirstVPTPredOperandIdx(MI);
+ if (PIdx != -1 && (int)MI.getOperandNo(&MO) == PIdx + 2)
+ continue;
// Check that this instruction will produce zeros in its false lanes:
// - If it only consumes false lanes zero or constant 0 (vmov #0)
@@ -927,6 +955,8 @@ bool LowOverheadLoop::ValidateLiveOuts() {
SmallPtrSet<MachineInstr *, 4> Predicated;
MachineBasicBlock *Header = ML.getHeader();
+ LLVM_DEBUG(dbgs() << "ARM Loops: Validating Live outs\n");
+
for (auto &MI : *Header) {
if (!shouldInspect(MI))
continue;
@@ -944,12 +974,25 @@ bool LowOverheadLoop::ValidateLiveOuts() {
FalseLanesZero.insert(&MI);
else if (MI.getNumDefs() == 0)
continue;
- else if (!isPredicated && retainsOrReduces)
+ else if (!isPredicated && retainsOrReduces) {
+ LLVM_DEBUG(dbgs() << " Unpredicated instruction that retainsOrReduces: " << MI);
return false;
- else if (!isPredicated)
+ } else if (!isPredicated && MI.getOpcode() != ARM::MQPRCopy)
FalseLanesUnknown.insert(&MI);
}
+ LLVM_DEBUG({
+ dbgs() << " Predicated:\n";
+ for (auto *I : Predicated)
+ dbgs() << " " << *I;
+ dbgs() << " FalseLanesZero:\n";
+ for (auto *I : FalseLanesZero)
+ dbgs() << " " << *I;
+ dbgs() << " FalseLanesUnknown:\n";
+ for (auto *I : FalseLanesUnknown)
+ dbgs() << " " << *I;
+ });
+
auto HasPredicatedUsers = [this](MachineInstr *MI, const MachineOperand &MO,
SmallPtrSetImpl<MachineInstr *> &Predicated) {
SmallPtrSet<MachineInstr *, 2> Uses;
@@ -973,7 +1016,7 @@ bool LowOverheadLoop::ValidateLiveOuts() {
if (!isRegInClass(MO, QPRs) || !MO.isDef())
continue;
if (!HasPredicatedUsers(MI, MO, Predicated)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Found an unknown def of : "
+ LLVM_DEBUG(dbgs() << " Found an unknown def of : "
<< TRI.getRegAsmName(MO.getReg()) << " at " << *MI);
NonPredicated.insert(MI);
break;
@@ -993,8 +1036,10 @@ bool LowOverheadLoop::ValidateLiveOuts() {
for (const MachineBasicBlock::RegisterMaskPair &RegMask : ExitBB->liveins()) {
// TODO: Instead of blocking predication, we could move the vctp to the exit
// block and calculate it's operand there in or the preheader.
- if (RegMask.PhysReg == ARM::VPR)
+ if (RegMask.PhysReg == ARM::VPR) {
+ LLVM_DEBUG(dbgs() << " VPR is live in to the exit block.");
return false;
+ }
// Check Q-regs that are live in the exit blocks. We don't collect scalars
// because they won't be affected by lane predication.
if (QPRs->contains(RegMask.PhysReg))
@@ -1007,10 +1052,20 @@ bool LowOverheadLoop::ValidateLiveOuts() {
// any VPT predicated instruction is predicated upon VCTP. Any live-out
// instruction needs to be predicated, so check this here. The instructions
// in NonPredicated have been found to be a reduction that we can ensure its
- // legality.
- for (auto *MI : LiveOutMIs) {
- if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
- LLVM_DEBUG(dbgs() << "ARM Loops: Unable to handle live out: " << *MI);
+ // legality. Any MQPRCopy found will need to validate its input as if it was
+ // live out.
+ SmallVector<MachineInstr *> Worklist(LiveOutMIs.begin(), LiveOutMIs.end());
+ while (!Worklist.empty()) {
+ MachineInstr *MI = Worklist.pop_back_val();
+ if (MI->getOpcode() == ARM::MQPRCopy) {
+ VMOVCopies.insert(MI);
+ MachineInstr *CopySrc =
+ RDA.getUniqueReachingMIDef(MI, MI->getOperand(1).getReg());
+ if (CopySrc)
+ Worklist.push_back(CopySrc);
+ } else if (NonPredicated.count(MI) && FalseLanesUnknown.contains(MI)) {
+ LLVM_DEBUG(dbgs() << " Unable to handle live out: " << *MI);
+ VMOVCopies.clear();
return false;
}
}
@@ -1121,7 +1176,7 @@ static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML) {
return false;
int FI = GetFrameIndex(MI->memoperands().front());
- MachineFrameInfo FrameInfo = MI->getParent()->getParent()->getFrameInfo();
+ auto &FrameInfo = MI->getParent()->getParent()->getFrameInfo();
if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
return false;
@@ -1211,8 +1266,15 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
bool RequiresExplicitPredication =
(MCID.TSFlags & ARMII::ValidForTailPredication) == 0;
if (isDomainMVE(MI) && RequiresExplicitPredication) {
- LLVM_DEBUG(if (!IsUse)
- dbgs() << "ARM Loops: Can't tail predicate: " << *MI);
+ if (MI->getOpcode() == ARM::MQPRCopy)
+ return true;
+ if (!IsUse && producesDoubleWidthResult(*MI)) {
+ DoubleWidthResultInstrs.insert(MI);
+ return true;
+ }
+
+ LLVM_DEBUG(if (!IsUse) dbgs()
+ << "ARM Loops: Can't tail predicate: " << *MI);
return IsUse;
}
@@ -1689,6 +1751,31 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
}
};
+ // And VMOVCopies need to become 2xVMOVD for tail predication to be valid.
+ // Anything other MQPRCopy can be converted to MVE_VORR later on.
+ auto ExpandVMOVCopies = [this](SmallPtrSet<MachineInstr *, 4> &VMOVCopies) {
+ for (auto *MI : VMOVCopies) {
+ LLVM_DEBUG(dbgs() << "Converting copy to VMOVD: " << *MI);
+ assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");
+ MachineBasicBlock *MBB = MI->getParent();
+ Register Dst = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
+ auto MIB1 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),
+ ARM::D0 + (Dst - ARM::Q0) * 2)
+ .addReg(ARM::D0 + (Src - ARM::Q0) * 2)
+ .add(predOps(ARMCC::AL));
+ (void)MIB1;
+ LLVM_DEBUG(dbgs() << " into " << *MIB1);
+ auto MIB2 = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::VMOVD),
+ ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
+ .addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
+ .add(predOps(ARMCC::AL));
+ LLVM_DEBUG(dbgs() << " and " << *MIB2);
+ (void)MIB2;
+ MI->eraseFromParent();
+ }
+ };
+
if (LoLoop.Revert) {
if (isWhileLoopStart(*LoLoop.Start))
RevertWhile(LoLoop.Start);
@@ -1699,6 +1786,7 @@ void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
else
RevertLoopEnd(LoLoop.End, RevertLoopDec(LoLoop.Dec));
} else {
+ ExpandVMOVCopies(LoLoop.VMOVCopies);
LoLoop.Start = ExpandLoopStart(LoLoop);
if (LoLoop.Start)
RemoveDeadBranch(LoLoop.Start);
@@ -1743,6 +1831,7 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
SmallVector<MachineInstr*, 4> Decs;
SmallVector<MachineInstr*, 4> Ends;
SmallVector<MachineInstr *, 4> EndDecs;
+ SmallVector<MachineInstr *, 4> MQPRCopies;
for (auto &I : MBB) {
if (isLoopStart(I))
@@ -1753,9 +1842,12 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
Ends.push_back(&I);
else if (I.getOpcode() == ARM::t2LoopEndDec)
EndDecs.push_back(&I);
+ else if (I.getOpcode() == ARM::MQPRCopy)
+ MQPRCopies.push_back(&I);
}
- if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty())
+ if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty() &&
+ MQPRCopies.empty())
continue;
Changed = true;
@@ -1773,6 +1865,17 @@ bool ARMLowOverheadLoops::RevertNonLoops() {
RevertLoopEnd(End);
for (auto *End : EndDecs)
RevertLoopEndDec(End);
+ for (auto *MI : MQPRCopies) {
+ LLVM_DEBUG(dbgs() << "Converting copy to VORR: " << *MI);
+ assert(MI->getOpcode() == ARM::MQPRCopy && "Only expected MQPRCOPY!");
+ MachineBasicBlock *MBB = MI->getParent();
+ auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::MVE_VORR),
+ MI->getOperand(0).getReg())
+ .add(MI->getOperand(1))
+ .add(MI->getOperand(1));
+ addUnpredicatedMveVpredROp(MIB, MI->getOperand(0).getReg());
+ MI->eraseFromParent();
+ }
}
return Changed;
}
diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index e4b022968431..2030fab6217d 100644
--- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -194,7 +194,7 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
// BLX ip
// POP{ r0, lr }
//
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index b37988232127..9752b3166b45 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -66,6 +66,8 @@ def ssub_10 : ComposedSubRegIndex<dsub_5, ssub_0>;
def ssub_11 : ComposedSubRegIndex<dsub_5, ssub_1>;
def ssub_12 : ComposedSubRegIndex<dsub_6, ssub_0>;
def ssub_13 : ComposedSubRegIndex<dsub_6, ssub_1>;
+def ssub_14 : ComposedSubRegIndex<dsub_7, ssub_0>;
+def ssub_15 : ComposedSubRegIndex<dsub_7, ssub_1>;
def gsub_0 : SubRegIndex<32>;
def gsub_1 : SubRegIndex<32, 32>;
@@ -555,6 +557,9 @@ def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
let AltOrderSelect = [{ return 1; }];
}
+// Same as QQPR but for MVE, containing the 7 register pairs made up from Q0-Q7.
+def MQQPR : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 7)>;
+
// Tuples of 4 D regs that isn't also a pair of Q regs.
def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
[(decimate (shl DPR, 1), 2),
@@ -578,6 +583,9 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
let AltOrderSelect = [{ return 1; }];
}
+// Same as QQPR but for MVE, containing the 5 register quads made up from Q0-Q7.
+def MQQQQPR : RegisterClass<"ARM", [v8i64], 256, (trunc QQQQPR, 5)>;
+
// Pseudo-registers representing 2-spaced consecutive D registers.
def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2],
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 90f1b693fec6..36c4bbaafcbf 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -295,6 +295,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
case CortexA77:
case CortexA78:
case CortexA78C:
+ case CortexA710:
case CortexR4:
case CortexR4F:
case CortexR5:
@@ -389,7 +390,13 @@ bool ARMSubtarget::enableMachineScheduler() const {
return useMachineScheduler();
}
-bool ARMSubtarget::enableSubRegLiveness() const { return EnableSubRegLiveness; }
+bool ARMSubtarget::enableSubRegLiveness() const {
+ if (EnableSubRegLiveness.getNumOccurrences())
+ return EnableSubRegLiveness;
+ // Enable SubRegLiveness for MVE to better optimize s subregs for mqpr regs
+ // and q subregs for qqqqpr regs.
+ return hasMVEIntegerOps();
+}
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index a8a9ae66b4ab..5e1217b6a468 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -65,6 +65,7 @@ protected:
CortexA77,
CortexA78,
CortexA78C,
+ CortexA710,
CortexA8,
CortexA9,
CortexM3,
@@ -124,6 +125,9 @@ protected:
ARMv8mMainline,
ARMv8r,
ARMv81mMainline,
+ ARMv9a,
+ ARMv91a,
+ ARMv92a,
};
public:
@@ -170,6 +174,9 @@ protected:
bool HasV8_5aOps = false;
bool HasV8_6aOps = false;
bool HasV8_7aOps = false;
+ bool HasV9_0aOps = false;
+ bool HasV9_1aOps = false;
+ bool HasV9_2aOps = false;
bool HasV8MBaselineOps = false;
bool HasV8MMainlineOps = false;
bool HasV8_1MMainlineOps = false;
@@ -468,6 +475,9 @@ protected:
/// cannot be encoded. For example, ADD r0, r1, #FFFFFFFF -> SUB r0, r1, #1.
bool NegativeImmediates = true;
+ /// Mitigate against the cve-2021-35465 security vulnurability.
+ bool FixCMSE_CVE_2021_35465 = false;
+
/// Harden against Straight Line Speculation for Returns and Indirect
/// Branches.
bool HardenSlsRetBr = false;
@@ -618,6 +628,9 @@ public:
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasV8_6aOps() const { return HasV8_6aOps; }
bool hasV8_7aOps() const { return HasV8_7aOps; }
+ bool hasV9_0aOps() const { return HasV9_0aOps; }
+ bool hasV9_1aOps() const { return HasV9_1aOps; }
+ bool hasV9_2aOps() const { return HasV9_2aOps; }
bool hasV8MBaselineOps() const { return HasV8MBaselineOps; }
bool hasV8MMainlineOps() const { return HasV8MMainlineOps; }
bool hasV8_1MMainlineOps() const { return HasV8_1MMainlineOps; }
@@ -780,14 +793,7 @@ public:
// ARM Targets that support EHABI exception handling standard
// Darwin uses SjLj. Other targets might need more checks.
bool isTargetEHABICompatible() const {
- return (TargetTriple.getEnvironment() == Triple::EABI ||
- TargetTriple.getEnvironment() == Triple::GNUEABI ||
- TargetTriple.getEnvironment() == Triple::MuslEABI ||
- TargetTriple.getEnvironment() == Triple::EABIHF ||
- TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
- TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
- isTargetAndroid()) &&
- !isTargetDarwin() && !isTargetWindows();
+ return TargetTriple.isTargetEHABICompatible();
}
bool isTargetHardFloat() const;
@@ -934,6 +940,8 @@ public:
unsigned PhysReg) const override;
unsigned getGPRAllocationOrder(const MachineFunction &MF) const;
+ bool fixCMSE_CVE_2021_35465() const { return FixCMSE_CVE_2021_35465; }
+
bool hardenSlsRetBr() const { return HardenSlsRetBr; }
bool hardenSlsBlr() const { return HardenSlsBlr; }
bool hardenSlsNoComdat() const { return HardenSlsNoComdat; }
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index ae7ea7c2f415..833c7effd31c 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -38,12 +38,12 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/CFGuard.h"
@@ -541,7 +541,6 @@ void ARMPassConfig::addPreSched2() {
return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
}));
}
- addPass(createMVEVPTBlockPass());
addPass(createThumb2ITBlockPass());
// Add both scheduling passes to give the subtarget an opportunity to pick
@@ -551,6 +550,7 @@ void ARMPassConfig::addPreSched2() {
addPass(&PostRASchedulerID);
}
+ addPass(createMVEVPTBlockPass());
addPass(createARMIndirectThunks());
addPass(createARMSLSHardeningPass());
}
diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index b03bff92f373..8c5438f7093b 100644
--- a/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -54,6 +54,16 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
}
}
+const MCRegister ARMElfTargetObjectFile::getStaticBase() const {
+ return ARM::R9;
+}
+
+const MCExpr *ARMElfTargetObjectFile::
+getIndirectSymViaRWPI(const MCSymbol *Sym) const {
+ return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_SBREL,
+ getContext());
+}
+
const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
diff --git a/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/llvm/lib/Target/ARM/ARMTargetObjectFile.h
index 7b15dcc61f56..8b13198fe144 100644
--- a/llvm/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCRegister.h"
namespace llvm {
@@ -23,6 +24,10 @@ public:
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+ const MCRegister getStaticBase() const override;
+
+ const MCExpr *getIndirectSymViaRWPI(const MCSymbol *Sym) const override;
+
const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
unsigned Encoding,
const TargetMachine &TM,
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index cf7456e9e4f5..88de84a4fd78 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -149,7 +149,7 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Align MemAlign =
getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II,
&IC.getAssumptionCache(), &IC.getDominatorTree());
- unsigned AlignArg = II.getNumArgOperands() - 1;
+ unsigned AlignArg = II.arg_size() - 1;
Value *AlignArgOp = II.getArgOperand(AlignArg);
MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue();
if (Align && *Align < MemAlign) {
@@ -175,7 +175,7 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
PatternMatch::m_Constant(XorMask))) &&
II.getType() == ArgArg->getType()) {
if (auto *CI = dyn_cast<ConstantInt>(XorMask)) {
- if (CI->getValue().trunc(16).isAllOnesValue()) {
+ if (CI->getValue().trunc(16).isAllOnes()) {
auto TrueVector = IC.Builder.CreateVectorSplat(
cast<FixedVectorType>(II.getType())->getNumElements(),
IC.Builder.getTrue());
@@ -248,6 +248,48 @@ ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return None;
}
+Optional<Value *> ARMTTIImpl::simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt OrigDemandedElts,
+ APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const {
+
+ // Compute the demanded bits for a narrowing MVE intrinsic. The TopOpc is the
+ // opcode specifying a Top/Bottom instruction, which can change between
+ // instructions.
+ auto SimplifyNarrowInstrTopBottom =[&](unsigned TopOpc) {
+ unsigned NumElts = cast<FixedVectorType>(II.getType())->getNumElements();
+ unsigned IsTop = cast<ConstantInt>(II.getOperand(TopOpc))->getZExtValue();
+
+ // The only odd/even lanes of operand 0 will only be demanded depending
+ // on whether this is a top/bottom instruction.
+ APInt DemandedElts =
+ APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
+ : APInt::getHighBitsSet(2, 1));
+ SimplifyAndSetOp(&II, 0, OrigDemandedElts & DemandedElts, UndefElts);
+ // The other lanes will be defined from the inserted elements.
+ UndefElts &= APInt::getSplat(NumElts, !IsTop ? APInt::getLowBitsSet(2, 1)
+ : APInt::getHighBitsSet(2, 1));
+ return None;
+ };
+
+ switch (II.getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::arm_mve_vcvt_narrow:
+ SimplifyNarrowInstrTopBottom(2);
+ break;
+ case Intrinsic::arm_mve_vqmovn:
+ SimplifyNarrowInstrTopBottom(4);
+ break;
+ case Intrinsic::arm_mve_vshrn:
+ SimplifyNarrowInstrTopBottom(7);
+ break;
+ }
+
+ return None;
+}
+
InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
@@ -300,7 +342,7 @@ static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) {
if (InstSPF == SPF_SMAX &&
PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) &&
- C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) {
+ C->getValue() == Imm && Imm.isNegative() && Imm.isNegatedPowerOf2()) {
auto isSSatMin = [&](Value *MinInst) {
if (isa<SelectInst>(MinInst)) {
@@ -368,7 +410,7 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
}
// xor a, -1 can always be folded to MVN
- if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
+ if (Opcode == Instruction::Xor && Imm.isAllOnes())
return 0;
// Ensures negative constant of min(max()) or max(min()) patterns that
@@ -381,6 +423,14 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return 0;
}
+ // We can convert <= -1 to < 0, which is generally quite cheap.
+ if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
+ ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
+ if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE)
+ return std::min(getIntImmCost(Imm, Ty, CostKind),
+ getIntImmCost(Imm + 1, Ty, CostKind));
+ }
+
return getIntImmCost(Imm, Ty, CostKind);
}
@@ -1623,13 +1673,24 @@ ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
TTI::TargetCostKind CostKind) {
EVT ValVT = TLI->getValueType(DL, ValTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
+
if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) {
std::pair<InstructionCost, MVT> LT =
TLI->getTypeLegalizationCost(DL, ValTy);
- if ((LT.second == MVT::v16i8 && ResVT.getSizeInBits() <= 32) ||
- (LT.second == MVT::v8i16 &&
- ResVT.getSizeInBits() <= (IsMLA ? 64 : 32)) ||
- (LT.second == MVT::v4i32 && ResVT.getSizeInBits() <= 64))
+
+ // The legal cases are:
+ // VADDV u/s 8/16/32
+ // VMLAV u/s 8/16/32
+ // VADDLV u/s 32
+ // VMLALV u/s 16/32
+ // Codegen currently cannot always handle larger than legal vectors very
+ // well, especially for predicated reductions where the mask needs to be
+ // split, so restrict to 128bit or smaller input types.
+ unsigned RevVTSize = ResVT.getSizeInBits();
+ if (ValVT.getSizeInBits() <= 128 &&
+ ((LT.second == MVT::v16i8 && RevVTSize <= 32) ||
+ (LT.second == MVT::v8i16 && RevVTSize <= (IsMLA ? 64u : 32u)) ||
+ (LT.second == MVT::v4i32 && RevVTSize <= 64)))
return ST->getMVEVectorCostFactor(CostKind) * LT.first;
}
@@ -1949,6 +2010,20 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
// we simply count the icmps, i.e. there should only be 1 for the backedge.
if (isa<ICmpInst>(&I) && ++ICmpCount > 1)
return false;
+ // FIXME: This is a workaround for poor cost modelling. Min/Max intrinsics are
+ // not currently canonical, but soon will be. Code without them uses icmp, and
+ // so is not tail predicated as per the condition above. In order to get the
+ // same performance we treat min and max the same as an icmp for tailpred
+ // purposes for the moment (we often rely on non-tailpred and higher VF's to
+ // pick more optimial instructions like VQDMULH. They need to be recognized
+ // directly by the vectorizer).
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if ((II->getIntrinsicID() == Intrinsic::smin ||
+ II->getIntrinsicID() == Intrinsic::smax ||
+ II->getIntrinsicID() == Intrinsic::umin ||
+ II->getIntrinsicID() == Intrinsic::umax) &&
+ ++ICmpCount > 1)
+ return false;
if (isa<FCmpInst>(&I))
return false;
@@ -2035,8 +2110,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
return false;
}
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
- Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
- int64_t NextStride = getPtrStride(PSE, Ptr, L);
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ Type *AccessTy = getLoadStoreType(&I);
+ int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L);
if (NextStride == 1) {
// TODO: for now only allow consecutive strides of 1. We could support
// other strides as long as it is uniform, but let's keep it simple
@@ -2055,8 +2131,7 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
// least if they are loop invariant.
// TODO: Loop variant strides should in theory work, too, but
// this requires further testing.
- const SCEV *PtrScev =
- replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr);
+ const SCEV *PtrScev = PSE.getSE()->getSCEV(Ptr);
if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) {
const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
if (PSE.getSE()->isLoopInvariant(Step, L))
@@ -2135,14 +2210,15 @@ bool ARMTTIImpl::emitGetActiveLaneMask() const {
return true;
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// Enable Upper bound unrolling universally, not dependant upon the conditions
// below.
UP.UpperBound = true;
// Only currently enable these preferences for M-Class cores.
if (!ST->isMClass())
- return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
+ return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
// Disable loop unrolling for Oz and Os.
UP.OptSizeThreshold = 0;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 889940534ce5..a56886d4fc11 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -120,6 +120,11 @@ public:
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
/// \name Scalar TTI Implementations
/// @{
@@ -226,8 +231,7 @@ public:
const SCEV *Ptr);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -246,8 +250,7 @@ public:
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
@@ -279,7 +282,8 @@ public:
DominatorTree *DT,
const LoopAccessInfo *LAI);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
bool emitGetActiveLaneMask() const;
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index e410fe0aeff2..64d2e1bfa9b2 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -6,15 +6,15 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMFeatures.h"
#include "ARMBaseInstrInfo.h"
-#include "Utils/ARMBaseInfo.h"
+#include "ARMFeatures.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMInstPrinter.h"
#include "MCTargetDesc/ARMMCExpr.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "TargetInfo/ARMTargetInfo.h"
+#include "Utils/ARMBaseInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/None.h"
@@ -22,8 +22,8 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
@@ -44,6 +44,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
#include "llvm/Support/Casting.h"
@@ -53,7 +54,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -2478,14 +2478,15 @@ public:
}
void addVPTPredNOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
+ assert(N == 3 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(unsigned(getVPTPred())));
unsigned RegNum = getVPTPred() == ARMVCC::None ? 0: ARM::P0;
Inst.addOperand(MCOperand::createReg(RegNum));
+ Inst.addOperand(MCOperand::createReg(0));
}
void addVPTPredROperands(MCInst &Inst, unsigned N) const {
- assert(N == 3 && "Invalid number of operands!");
+ assert(N == 4 && "Invalid number of operands!");
addVPTPredNOperands(Inst, N-1);
unsigned RegNum;
if (getVPTPred() == ARMVCC::None) {
@@ -3343,16 +3344,16 @@ public:
// regs) or q0-q4 (for 4)
//
// The MVE instructions taking a register range of this kind will
- // need an operand in the QQPR or QQQQPR class, representing the
+ // need an operand in the MQQPR or MQQQQPR class, representing the
// entire range as a unit. So we must translate into that class,
// by finding the index of the base register in the MQPR reg
// class, and returning the super-register at the corresponding
// index in the target class.
const MCRegisterClass *RC_in = &ARMMCRegisterClasses[ARM::MQPRRegClassID];
- const MCRegisterClass *RC_out = (VectorList.Count == 2) ?
- &ARMMCRegisterClasses[ARM::QQPRRegClassID] :
- &ARMMCRegisterClasses[ARM::QQQQPRRegClassID];
+ const MCRegisterClass *RC_out =
+ (VectorList.Count == 2) ? &ARMMCRegisterClasses[ARM::MQQPRRegClassID]
+ : &ARMMCRegisterClasses[ARM::MQQQQPRRegClassID];
unsigned I, E = RC_out->getNumRegs();
for (I = 0; I < E; I++)
@@ -10960,7 +10961,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Only after the instruction is fully processed, we can validate it
if (wasInITBlock && hasV8Ops() && isThumb() &&
- !isV8EligibleForIT(&Inst)) {
+ !isV8EligibleForIT(&Inst) && !getTargetOptions().MCNoDeprecatedWarn) {
Warning(IDLoc, "deprecated instruction in IT block");
}
}
@@ -11777,13 +11778,13 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
return true;
if (!Section) {
- getStreamer().InitSections(false);
+ getStreamer().initSections(false, getSTI());
Section = getStreamer().getCurrentSectionOnly();
}
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign())
- getStreamer().emitCodeAlignment(2);
+ getStreamer().emitCodeAlignment(2, &getSTI());
else
getStreamer().emitValueToAlignment(2);
@@ -11985,7 +11986,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
assert(Section && "must have section to emit alignment");
if (Section->UseCodeAlign())
- getStreamer().emitCodeAlignment(4, 0);
+ getStreamer().emitCodeAlignment(4, &getSTI(), 0);
else
getStreamer().emitValueToAlignment(4, 0, 1, 0);
return false;
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 51fd45034534..9caef9f09ea9 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -19,10 +19,10 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -227,10 +227,12 @@ static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeMQPRRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder);
static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
@@ -852,12 +854,15 @@ ARMDisassembler::AddThumbPredicate(MCInst &MI) const {
VCCI = MI.insert(VCCI, MCOperand::createImm(VCC));
++VCCI;
if (VCC == ARMVCC::None)
- MI.insert(VCCI, MCOperand::createReg(0));
+ VCCI = MI.insert(VCCI, MCOperand::createReg(0));
else
- MI.insert(VCCI, MCOperand::createReg(ARM::P0));
+ VCCI = MI.insert(VCCI, MCOperand::createReg(ARM::P0));
+ ++VCCI;
+ VCCI = MI.insert(VCCI, MCOperand::createReg(0));
+ ++VCCI;
if (OpInfo[VCCPos].OperandType == ARM::OPERAND_VPRED_R) {
int TiedOp = ARMInsts[MI.getOpcode()].getOperandConstraint(
- VCCPos + 2, MCOI::TIED_TO);
+ VCCPos + 3, MCOI::TIED_TO);
assert(TiedOp >= 0 &&
"Inactive register in vpred_r is not tied to an output!");
// Copy the operand to ensure it's not invalidated when MI grows.
@@ -6154,9 +6159,9 @@ static const uint16_t QQPRDecoderTable[] = {
ARM::Q4_Q5, ARM::Q5_Q6, ARM::Q6_Q7
};
-static DecodeStatus DecodeQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 6)
return MCDisassembler::Fail;
@@ -6170,9 +6175,9 @@ static const uint16_t QQQQPRDecoderTable[] = {
ARM::Q3_Q4_Q5_Q6, ARM::Q4_Q5_Q6_Q7
};
-static DecodeStatus DecodeQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeMQQQQPRRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 4)
return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 9f7327f792c7..851acea94022 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -48,9 +48,10 @@ public:
} // end anonymous namespace
Optional<MCFixupKind> ARMAsmBackend::getFixupKind(StringRef Name) const {
- if (!STI.getTargetTriple().isOSBinFormatELF())
- return None;
+ return None;
+}
+Optional<MCFixupKind> ARMAsmBackendELF::getFixupKind(StringRef Name) const {
unsigned Type = llvm::StringSwitch<unsigned>(Name)
#define ELF_RELOC(X, Y) .Case(#X, Y)
#include "llvm/BinaryFormat/ELFRelocs/ARM.def"
@@ -330,7 +331,7 @@ void ARMAsmBackend::relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const {
unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode(), STI);
- // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+ // Return a diagnostic if we get here w/ a bogus instruction.
if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
@@ -357,14 +358,15 @@ void ARMAsmBackend::relaxInstruction(MCInst &Inst,
Inst.setOpcode(RelaxedOp);
}
-bool ARMAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool ARMAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0
const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
if (isThumb()) {
const uint16_t nopEncoding =
- hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
+ hasNOP(STI) ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding;
uint64_t NumNops = Count / 2;
for (uint64_t i = 0; i != NumNops; ++i)
support::endian::write(OS, nopEncoding, Endian);
@@ -374,7 +376,7 @@ bool ARMAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
}
// ARM mode
const uint32_t nopEncoding =
- hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
+ hasNOP(STI) ? ARMv6T2_NopEncoding : ARMv4_NopEncoding;
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
support::endian::write(OS, nopEncoding, Endian);
@@ -1300,11 +1302,12 @@ static MCAsmBackend *createARMAsmBackend(const Target &T,
return new ARMAsmBackendDarwin(T, STI, MRI);
case Triple::COFF:
assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
- return new ARMAsmBackendWinCOFF(T, STI);
+ return new ARMAsmBackendWinCOFF(T, STI.getTargetTriple().isThumb());
case Triple::ELF:
assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
- return new ARMAsmBackendELF(T, STI, OSABI, Endian);
+ return new ARMAsmBackendELF(T, STI.getTargetTriple().isThumb(), OSABI,
+ Endian);
}
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 38c7b30769b3..9b0c8c084161 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -13,29 +13,23 @@
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
namespace llvm {
class ARMAsmBackend : public MCAsmBackend {
- // The STI from the target triple the MCAsmBackend was instantiated with
- // note that MCFragments may have a different local STI that should be
- // used in preference.
- const MCSubtargetInfo &STI;
bool isThumbMode; // Currently emitting Thumb code.
public:
- ARMAsmBackend(const Target &T, const MCSubtargetInfo &STI,
- support::endianness Endian)
- : MCAsmBackend(Endian), STI(STI),
- isThumbMode(STI.getTargetTriple().isThumb()) {}
+ ARMAsmBackend(const Target &T, bool isThumb, support::endianness Endian)
+ : MCAsmBackend(Endian), isThumbMode(isThumb) {}
unsigned getNumFixupKinds() const override {
return ARM::NumTargetFixupKinds;
}
- // FIXME: this should be calculated per fragment as the STI may be
- // different.
- bool hasNOP() const { return STI.getFeatureBits()[ARM::HasV6T2Ops]; }
+ bool hasNOP(const MCSubtargetInfo *STI) const {
+ return STI->getFeatureBits()[ARM::HasV6T2Ops];
+ }
Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
@@ -69,7 +63,8 @@ public:
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
void handleAssemblerFlag(MCAssemblerFlag Flag) override;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index e27bb134670f..85013b5f099a 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -21,8 +21,8 @@ public:
const MachO::CPUSubTypeARM Subtype;
ARMAsmBackendDarwin(const Target &T, const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI)
- : ARMAsmBackend(T, STI, support::little), MRI(MRI),
- TT(STI.getTargetTriple()),
+ : ARMAsmBackend(T, STI.getTargetTriple().isThumb(), support::little),
+ MRI(MRI), TT(STI.getTargetTriple()),
Subtype((MachO::CPUSubTypeARM)cantFail(
MachO::getCPUSubType(STI.getTargetTriple()))) {}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
index 5d735114d441..2431c4865b64 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h
@@ -19,14 +19,16 @@ namespace {
class ARMAsmBackendELF : public ARMAsmBackend {
public:
uint8_t OSABI;
- ARMAsmBackendELF(const Target &T, const MCSubtargetInfo &STI, uint8_t OSABI,
+ ARMAsmBackendELF(const Target &T, bool isThumb, uint8_t OSABI,
support::endianness Endian)
- : ARMAsmBackend(T, STI, Endian), OSABI(OSABI) {}
+ : ARMAsmBackend(T, isThumb, Endian), OSABI(OSABI) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createARMELFObjectWriter(OSABI);
}
+
+ Optional<MCFixupKind> getFixupKind(StringRef Name) const override;
};
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
index 62eb1d73a2ce..6e447df9e4cb 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h
@@ -16,8 +16,8 @@ using namespace llvm;
namespace {
class ARMAsmBackendWinCOFF : public ARMAsmBackend {
public:
- ARMAsmBackendWinCOFF(const Target &T, const MCSubtargetInfo &STI)
- : ARMAsmBackend(T, STI, support::little) {}
+ ARMAsmBackendWinCOFF(const Target &T, bool isThumb)
+ : ARMAsmBackend(T, isThumb, support::little) {}
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createARMWinCOFFObjectWriter();
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ecd96114e8a4..43f7575df6db 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -408,6 +408,14 @@ namespace ARMII {
// its input, typically reading from the top/bottom halves of the input(s).
DoubleWidthResult = 1 << 23,
+ // The vector element size for MVE instructions. 00 = i8, 01 = i16, 10 = i32
+ // and 11 = i64. This is the largest type if multiple are present, so a
+ // MVE_VMOVLs8bh is ize 01=i16, as it extends from a i8 to a i16. There are
+ // some caveats so cannot be used blindly, such as exchanging VMLADAVA's and
+ // complex instructions, which may use different input lanes.
+ VecSizeShift = 24,
+ VecSize = 3 << VecSizeShift,
+
//===------------------------------------------------------------------===//
// Code domain.
DomainShift = 15,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 12076b8c49c1..896b104e8d97 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -785,6 +785,9 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
case ARM::ArchKind::ARMV8_4A:
case ARM::ArchKind::ARMV8_5A:
case ARM::ArchKind::ARMV8_6A:
+ case ARM::ArchKind::ARMV9A:
+ case ARM::ArchKind::ARMV9_1A:
+ case ARM::ArchKind::ARMV9_2A:
S.setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
S.setAttributeItem(ARM_ISA_use, Allowed, false);
S.setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1056,7 +1059,7 @@ inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
// Switch to .ARM.extab or .ARM.exidx section
SwitchSection(EHSection);
- emitCodeAlignment(4);
+ emitValueToAlignment(4, 0, 1, 0);
}
inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 40e8e244e312..77c0e3522911 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -91,7 +91,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
ExceptionsType = ExceptionHandling::WinEH;
PrivateGlobalPrefix = "$M";
PrivateLabelPrefix = "$M";
- CommentString = ";";
+ CommentString = "@";
// Conditional Thumb 4-byte instructions can have an implicit IT.
MaxInstLength = 6;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index ced48ccc9883..5ecacdab390f 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1138,6 +1138,7 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
// representation for the complex operand in the .td file. This isn't just
// style, unfortunately. As-is, we can't represent the distinct encoding
// for #-0.
+ assert(((Imm8 & 0x3) == 0) && "Not a valid immediate!");
uint32_t Binary = (Imm8 >> 2) & 0xff;
// Immediate is always encoded as positive. The 'U' bit controls add vs sub.
if (isAdd)
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 87cce08b1ce4..05e5a473a3c6 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -27,9 +27,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetParser.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -441,8 +441,201 @@ public:
}
return false;
}
+
+ Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
+ const MCSubtargetInfo *STI,
+ uint64_t Addr,
+ uint64_t Size) const override;
};
+} // namespace
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrMode_i12(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ return Addr + OffImm;
+}
+
+static Optional<uint64_t> evaluateMemOpAddrForAddrMode3(const MCInst &Inst,
+ const MCInstrDesc &Desc,
+ unsigned MemOpIndex,
+ uint64_t Addr) {
+ if (MemOpIndex + 2 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ const MCOperand &MO3 = Inst.getOperand(MemOpIndex + 2);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || MO2.getReg() || !MO3.isImm())
+ return None;
+
+ unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM3Op(MO3.getImm());
+
+ if (Op == ARM_AM::sub)
+ return Addr - ImmOffs;
+ return Addr + ImmOffs;
+}
+
+static Optional<uint64_t> evaluateMemOpAddrForAddrMode5(const MCInst &Inst,
+ const MCInstrDesc &Desc,
+ unsigned MemOpIndex,
+ uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5Op(MO2.getImm());
+
+ if (Op == ARM_AM::sub)
+ return Addr - ImmOffs * 4;
+ return Addr + ImmOffs * 4;
+}
+
+static Optional<uint64_t>
+evaluateMemOpAddrForAddrMode5FP16(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ unsigned ImmOffs = ARM_AM::getAM5FP16Offset(MO2.getImm());
+ ARM_AM::AddrOpc Op = ARM_AM::getAM5FP16Op(MO2.getImm());
+
+ if (Op == ARM_AM::sub)
+ return Addr - ImmOffs * 2;
+ return Addr + ImmOffs * 2;
+}
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrModeT2_i8s4(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ if (MemOpIndex + 1 >= Desc.getNumOperands())
+ return None;
+
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ const MCOperand &MO2 = Inst.getOperand(MemOpIndex + 1);
+ if (!MO1.isReg() || MO1.getReg() != ARM::PC || !MO2.isImm())
+ return None;
+
+ int32_t OffImm = (int32_t)MO2.getImm();
+ assert(((OffImm & 0x3) == 0) && "Not a valid immediate!");
+
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ return Addr + OffImm;
+}
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrModeT2_pc(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ const MCOperand &MO1 = Inst.getOperand(MemOpIndex);
+ if (!MO1.isImm())
+ return None;
+
+ int32_t OffImm = (int32_t)MO1.getImm();
+
+ // Special value for #-0. All others are normal.
+ if (OffImm == INT32_MIN)
+ OffImm = 0;
+ return Addr + OffImm;
+}
+
+static Optional<uint64_t>
+// NOLINTNEXTLINE(readability-identifier-naming)
+evaluateMemOpAddrForAddrModeT1_s(const MCInst &Inst, const MCInstrDesc &Desc,
+ unsigned MemOpIndex, uint64_t Addr) {
+ return evaluateMemOpAddrForAddrModeT2_pc(Inst, Desc, MemOpIndex, Addr);
+}
+
+Optional<uint64_t> ARMMCInstrAnalysis::evaluateMemoryOperandAddress(
+ const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
+ uint64_t Size) const {
+ const MCInstrDesc &Desc = Info->get(Inst.getOpcode());
+
+ // Only load instructions can have PC-relative memory addressing.
+ if (!Desc.mayLoad())
+ return None;
+
+ // PC-relative addressing does not update the base register.
+ uint64_t TSFlags = Desc.TSFlags;
+ unsigned IndexMode =
+ (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+ if (IndexMode != ARMII::IndexModeNone)
+ return None;
+
+ // Find the memory addressing operand in the instruction.
+ unsigned OpIndex = Desc.NumDefs;
+ while (OpIndex < Desc.getNumOperands() &&
+ Desc.OpInfo[OpIndex].OperandType != MCOI::OPERAND_MEMORY)
+ ++OpIndex;
+ if (OpIndex == Desc.getNumOperands())
+ return None;
+
+ // Base address for PC-relative addressing is always 32-bit aligned.
+ Addr &= ~0x3;
+
+ // For ARM instructions the PC offset is 8 bytes, for Thumb instructions it
+ // is 4 bytes.
+ switch (Desc.TSFlags & ARMII::FormMask) {
+ default:
+ Addr += 8;
+ break;
+ case ARMII::ThumbFrm:
+ Addr += 4;
+ break;
+ // VLDR* instructions share the same opcode (and thus the same form) for Arm
+ // and Thumb. Use a bit longer route through STI in that case.
+ case ARMII::VFPLdStFrm:
+ Addr += STI->getFeatureBits()[ARM::ModeThumb] ? 4 : 8;
+ break;
+ }
+
+ // Eveluate the address depending on the addressing mode
+ unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+ switch (AddrMode) {
+ default:
+ return None;
+ case ARMII::AddrMode_i12:
+ return evaluateMemOpAddrForAddrMode_i12(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrMode3:
+ return evaluateMemOpAddrForAddrMode3(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrMode5:
+ return evaluateMemOpAddrForAddrMode5(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrMode5FP16:
+ return evaluateMemOpAddrForAddrMode5FP16(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrModeT2_i8s4:
+ return evaluateMemOpAddrForAddrModeT2_i8s4(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrModeT2_pc:
+ return evaluateMemOpAddrForAddrModeT2_pc(Inst, Desc, OpIndex, Addr);
+ case ARMII::AddrModeT1_s:
+ return evaluateMemOpAddrForAddrModeT1_s(Inst, Desc, OpIndex, Addr);
+ }
}
static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 1fee354cad93..3e4c97630af6 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -43,7 +43,9 @@ void ARMTargetStreamer::emitCurrentConstantPool() {
}
// finish() - write out any non-empty assembler constant pools.
-void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+void ARMTargetStreamer::emitConstantPools() {
+ ConstantPools->emitAll(Streamer);
+}
// reset() - Reset any state
void ARMTargetStreamer::reset() {}
diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 4981b8051657..cfd275bc0621 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -76,6 +76,7 @@ public:
private:
LoopInfo *LI = nullptr;
+ const DataLayout *DL;
// Check this is a valid gather with correct alignment
bool isLegalTypeAndAlignment(unsigned NumElements, unsigned ElemSize,
@@ -149,10 +150,10 @@ private:
bool optimiseOffsets(Value *Offsets, BasicBlock *BB, LoopInfo *LI);
// Pushes the given add out of the loop
void pushOutAdd(PHINode *&Phi, Value *OffsSecondOperand, unsigned StartIndex);
- // Pushes the given mul out of the loop
- void pushOutMul(PHINode *&Phi, Value *IncrementPerRound,
- Value *OffsSecondOperand, unsigned LoopIncrement,
- IRBuilder<> &Builder);
+ // Pushes the given mul or shl out of the loop
+ void pushOutMulShl(unsigned Opc, PHINode *&Phi, Value *IncrementPerRound,
+ Value *OffsSecondOperand, unsigned LoopIncrement,
+ IRBuilder<> &Builder);
};
} // end anonymous namespace
@@ -335,14 +336,15 @@ int MVEGatherScatterLowering::computeScale(unsigned GEPElemSize,
Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) {
const Constant *C = dyn_cast<Constant>(V);
- if (C != nullptr)
+ if (C && C->getSplatValue())
return Optional<int64_t>{C->getUniqueInteger().getSExtValue()};
if (!isa<Instruction>(V))
return Optional<int64_t>{};
const Instruction *I = cast<Instruction>(V);
- if (I->getOpcode() == Instruction::Add ||
- I->getOpcode() == Instruction::Mul) {
+ if (I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::Mul ||
+ I->getOpcode() == Instruction::Shl) {
Optional<int64_t> Op0 = getIfConst(I->getOperand(0));
Optional<int64_t> Op1 = getIfConst(I->getOperand(1));
if (!Op0 || !Op1)
@@ -351,18 +353,30 @@ Optional<int64_t> MVEGatherScatterLowering::getIfConst(const Value *V) {
return Optional<int64_t>{Op0.getValue() + Op1.getValue()};
if (I->getOpcode() == Instruction::Mul)
return Optional<int64_t>{Op0.getValue() * Op1.getValue()};
+ if (I->getOpcode() == Instruction::Shl)
+ return Optional<int64_t>{Op0.getValue() << Op1.getValue()};
+ if (I->getOpcode() == Instruction::Or)
+ return Optional<int64_t>{Op0.getValue() | Op1.getValue()};
}
return Optional<int64_t>{};
}
+// Return true if I is an Or instruction that is equivalent to an add, due to
+// the operands having no common bits set.
+static bool isAddLikeOr(Instruction *I, const DataLayout &DL) {
+ return I->getOpcode() == Instruction::Or &&
+ haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL);
+}
+
std::pair<Value *, int64_t>
MVEGatherScatterLowering::getVarAndConst(Value *Inst, int TypeScale) {
std::pair<Value *, int64_t> ReturnFalse =
std::pair<Value *, int64_t>(nullptr, 0);
- // At this point, the instruction we're looking at must be an add or we
- // bail out
+ // At this point, the instruction we're looking at must be an add or an
+ // add-like-or.
Instruction *Add = dyn_cast<Instruction>(Inst);
- if (Add == nullptr || Add->getOpcode() != Instruction::Add)
+ if (Add == nullptr ||
+ (Add->getOpcode() != Instruction::Add && !isAddLikeOr(Add, *DL)))
return ReturnFalse;
Value *Summand;
@@ -737,10 +751,9 @@ Instruction *MVEGatherScatterLowering::tryCreateIncrementingGatScat(
// The gep was in charge of making sure the offsets are scaled correctly
// - calculate that factor so it can be applied by hand
- DataLayout DT = I->getParent()->getParent()->getParent()->getDataLayout();
int TypeScale =
- computeScale(DT.getTypeSizeInBits(GEP->getOperand(0)->getType()),
- DT.getTypeSizeInBits(GEP->getType()) /
+ computeScale(DL->getTypeSizeInBits(GEP->getOperand(0)->getType()),
+ DL->getTypeSizeInBits(GEP->getType()) /
cast<FixedVectorType>(GEP->getType())->getNumElements());
if (TypeScale == -1)
return nullptr;
@@ -888,11 +901,11 @@ void MVEGatherScatterLowering::pushOutAdd(PHINode *&Phi,
Phi->removeIncomingValue(StartIndex);
}
-void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
- Value *IncrementPerRound,
- Value *OffsSecondOperand,
- unsigned LoopIncrement,
- IRBuilder<> &Builder) {
+void MVEGatherScatterLowering::pushOutMulShl(unsigned Opcode, PHINode *&Phi,
+ Value *IncrementPerRound,
+ Value *OffsSecondOperand,
+ unsigned LoopIncrement,
+ IRBuilder<> &Builder) {
LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising mul instruction\n");
// Create a new scalar add outside of the loop and transform it to a splat
@@ -901,12 +914,13 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back());
// Create a new index
- Value *StartIndex = BinaryOperator::Create(
- Instruction::Mul, Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1),
- OffsSecondOperand, "PushedOutMul", InsertionPoint);
+ Value *StartIndex =
+ BinaryOperator::Create((Instruction::BinaryOps)Opcode,
+ Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1),
+ OffsSecondOperand, "PushedOutMul", InsertionPoint);
Instruction *Product =
- BinaryOperator::Create(Instruction::Mul, IncrementPerRound,
+ BinaryOperator::Create((Instruction::BinaryOps)Opcode, IncrementPerRound,
OffsSecondOperand, "Product", InsertionPoint);
// Increment NewIndex by Product instead of the multiplication
Instruction *NewIncrement = BinaryOperator::Create(
@@ -923,7 +937,7 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
// Check whether all usages of this instruction are as offsets of
// gathers/scatters or simple arithmetics only used by gathers/scatters
-static bool hasAllGatScatUsers(Instruction *I) {
+static bool hasAllGatScatUsers(Instruction *I, const DataLayout &DL) {
if (I->hasNUses(0)) {
return false;
}
@@ -936,8 +950,10 @@ static bool hasAllGatScatUsers(Instruction *I) {
return Gatscat;
} else {
unsigned OpCode = cast<Instruction>(U)->getOpcode();
- if ((OpCode == Instruction::Add || OpCode == Instruction::Mul) &&
- hasAllGatScatUsers(cast<Instruction>(U))) {
+ if ((OpCode == Instruction::Add || OpCode == Instruction::Mul ||
+ OpCode == Instruction::Shl ||
+ isAddLikeOr(cast<Instruction>(U), DL)) &&
+ hasAllGatScatUsers(cast<Instruction>(U), DL)) {
continue;
}
return false;
@@ -955,14 +971,15 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
if (!isa<Instruction>(Offsets))
return false;
Instruction *Offs = cast<Instruction>(Offsets);
- if (Offs->getOpcode() != Instruction::Add &&
- Offs->getOpcode() != Instruction::Mul)
+ if (Offs->getOpcode() != Instruction::Add && !isAddLikeOr(Offs, *DL) &&
+ Offs->getOpcode() != Instruction::Mul &&
+ Offs->getOpcode() != Instruction::Shl)
return false;
Loop *L = LI->getLoopFor(BB);
if (L == nullptr)
return false;
if (!Offs->hasOneUse()) {
- if (!hasAllGatScatUsers(Offs))
+ if (!hasAllGatScatUsers(Offs, *DL))
return false;
}
@@ -1060,11 +1077,13 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
switch (Offs->getOpcode()) {
case Instruction::Add:
+ case Instruction::Or:
pushOutAdd(NewPhi, OffsSecondOperand, IncrementingBlock == 1 ? 0 : 1);
break;
case Instruction::Mul:
- pushOutMul(NewPhi, IncrementPerRound, OffsSecondOperand, IncrementingBlock,
- Builder);
+ case Instruction::Shl:
+ pushOutMulShl(Offs->getOpcode(), NewPhi, IncrementPerRound,
+ OffsSecondOperand, IncrementingBlock, Builder);
break;
default:
return false;
@@ -1182,8 +1201,7 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
if (!GEP)
return false;
bool Changed = false;
- if (GEP->hasOneUse() &&
- dyn_cast<GetElementPtrInst>(GEP->getPointerOperand())) {
+ if (GEP->hasOneUse() && isa<GetElementPtrInst>(GEP->getPointerOperand())) {
IRBuilder<> Builder(GEP->getContext());
Builder.SetInsertPoint(GEP);
Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
@@ -1214,6 +1232,7 @@ bool MVEGatherScatterLowering::runOnFunction(Function &F) {
if (!ST->hasMVEIntegerOps())
return false;
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ DL = &F.getParent()->getDataLayout();
SmallVector<IntrinsicInst *, 4> Gathers;
SmallVector<IntrinsicInst *, 4> Scatters;
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index 6fa5402096a6..dc58b5427425 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -40,6 +40,11 @@ MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
cl::desc("Enable merging Loop End and Dec instructions."),
cl::init(true));
+static cl::opt<bool>
+SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
+ cl::desc("Enable setting lr as a predicate in tail predication regions."),
+ cl::init(true));
+
namespace {
class MVETPAndVPTOptimisations : public MachineFunctionPass {
public:
@@ -434,10 +439,14 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
return false;
SmallVector<MachineInstr *, 4> VCTPs;
- for (MachineBasicBlock *BB : ML->blocks())
+ SmallVector<MachineInstr *, 4> MVEInstrs;
+ for (MachineBasicBlock *BB : ML->blocks()) {
for (MachineInstr &MI : *BB)
if (isVCTP(&MI))
VCTPs.push_back(&MI);
+ else if (findFirstVPTPredOperandIdx(MI) != -1)
+ MVEInstrs.push_back(&MI);
+ }
if (VCTPs.empty()) {
LLVM_DEBUG(dbgs() << " no VCTPs\n");
@@ -510,6 +519,16 @@ bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
LoopStart->eraseFromParent();
+ if (SetLRPredicate) {
+ // Each instruction in the loop needs to be using LR as the predicate from
+ // the Phi as the predicate.
+ Register LR = LoopPhi->getOperand(0).getReg();
+ for (MachineInstr *MI : MVEInstrs) {
+ int Idx = findFirstVPTPredOperandIdx(*MI);
+ MI->getOperand(Idx + 2).setReg(LR);
+ }
+ }
+
return true;
}
@@ -991,6 +1010,7 @@ bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
.add(MI.getOperand(1))
.addImm(ARMVCC::Then)
.add(MI.getOperand(4))
+ .add(MI.getOperand(5))
.add(MI.getOperand(2));
// Silence unused variable warning in release builds.
(void)MIBuilder;
diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index cf9e2484bab5..6a5bc9284266 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -293,14 +293,18 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
// Check for equality of TC and Ceil by calculating SCEV expression
// TC - Ceil and test it for zero.
//
- bool Zero = SE->getMinusSCEV(
- SE->getBackedgeTakenCount(L),
- SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
- SE->getNegativeSCEV(VW)),
- VW))
- ->isZero();
-
- if (!Zero) {
+ const SCEV *Sub =
+ SE->getMinusSCEV(SE->getBackedgeTakenCount(L),
+ SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
+ SE->getNegativeSCEV(VW)),
+ VW));
+
+ // Use context sensitive facts about the path to the loop to refine. This
+ // comes up as the backedge taken count can incorporate context sensitive
+ // reasoning, and our RHS just above doesn't.
+ Sub = SE->applyLoopGuards(Sub, L);
+
+ if (!Sub->isZero()) {
LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
return false;
}
diff --git a/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index a7f7d75e356e..4d514f3ca444 100644
--- a/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/ARMTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheARMLETarget() {
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index ccd272a8617d..e4e95f63f0a6 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -582,10 +582,10 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
static void findTemporariesForLR(const BitVector &GPRsNoLRSP,
const BitVector &PopFriendly,
const LivePhysRegs &UsedRegs, unsigned &PopReg,
- unsigned &TmpReg) {
+ unsigned &TmpReg, MachineRegisterInfo &MRI) {
PopReg = TmpReg = 0;
for (auto Reg : GPRsNoLRSP.set_bits()) {
- if (!UsedRegs.contains(Reg)) {
+ if (UsedRegs.available(MRI, Reg)) {
// Remember the first pop-friendly register and exit.
if (PopFriendly.test(Reg)) {
PopReg = Reg;
@@ -693,7 +693,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
GPRsNoLRSP.reset(ARM::LR);
GPRsNoLRSP.reset(ARM::SP);
GPRsNoLRSP.reset(ARM::PC);
- findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+ findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg,
+ MF.getRegInfo());
// If we couldn't find a pop-friendly register, try restoring LR before
// popping the other callee-saved registers, so we could use one of them as a
@@ -704,7 +705,8 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
PrevMBBI--;
if (PrevMBBI->getOpcode() == ARM::tPOP) {
UsedRegs.stepBackward(*PrevMBBI);
- findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg);
+ findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg,
+ TemporaryReg, MF.getRegInfo());
if (PopReg) {
MBBI = PrevMBBI;
UseLDRSP = true;
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index cf5eb4b4c0f1..4b18f5e20d40 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -135,6 +135,10 @@ void Thumb1InstrInfo::expandLoadStackGuard(
MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
const TargetMachine &TM = MF.getTarget();
+
+ assert(MF.getFunction().getParent()->getStackProtectorGuard() != "tls" &&
+ "TLS stack protector not supported for Thumb1 targets");
+
if (TM.isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi);
else
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 5204e3b03e9e..bdb167a08e61 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -250,7 +250,19 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void Thumb2InstrInfo::expandLoadStackGuard(
MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
- if (MF.getTarget().isPositionIndependent())
+ Module &M = *MF.getFunction().getParent();
+
+ if (M.getStackProtectorGuard() == "tls") {
+ expandLoadStackGuardBase(MI, ARM::t2MRC, ARM::t2LDRi12);
+ return;
+ }
+
+ const GlobalValue *GV =
+ cast<GlobalValue>((*MI->memoperands_begin())->getValue());
+
+ if (MF.getSubtarget<ARMSubtarget>().isGVInGOT(GV))
+ expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::t2LDRi12);
+ else if (MF.getTarget().isPositionIndependent())
expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12);
else
expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12);
@@ -792,8 +804,12 @@ void llvm::recomputeVPTBlockMask(MachineInstr &Instr) {
MachineBasicBlock::iterator Iter = ++Instr.getIterator(),
End = Instr.getParent()->end();
+ while (Iter != End && Iter->isDebugInstr())
+ ++Iter;
+
// Verify that the instruction after the VPT/VPST is predicated (it should
// be), and skip it.
+ assert(Iter != End && "Expected some instructions in any VPT block");
assert(
getVPTInstrPredicate(*Iter) == ARMVCC::Then &&
"VPT/VPST should be followed by an instruction with a 'then' predicate!");
@@ -802,6 +818,10 @@ void llvm::recomputeVPTBlockMask(MachineInstr &Instr) {
// Iterate over the predicated instructions, updating the BlockMask as we go.
ARM::PredBlockMask BlockMask = ARM::PredBlockMask::T;
while (Iter != End) {
+ if (Iter->isDebugInstr()) {
+ ++Iter;
+ continue;
+ }
ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*Iter);
if (Pred == ARMVCC::None)
break;
diff --git a/llvm/lib/Target/AVR/AVR.h b/llvm/lib/Target/AVR/AVR.h
index 7332307c07a3..143c339c0664 100644
--- a/llvm/lib/Target/AVR/AVR.h
+++ b/llvm/lib/Target/AVR/AVR.h
@@ -32,8 +32,8 @@ FunctionPass *createAVRDynAllocaSRPass();
FunctionPass *createAVRBranchSelectionPass();
void initializeAVRShiftExpandPass(PassRegistry &);
-void initializeAVRExpandPseudoPass(PassRegistry&);
-void initializeAVRRelaxMemPass(PassRegistry&);
+void initializeAVRExpandPseudoPass(PassRegistry &);
+void initializeAVRRelaxMemPass(PassRegistry &);
/// Contains the AVR backend.
namespace AVR {
diff --git a/llvm/lib/Target/AVR/AVR.td b/llvm/lib/Target/AVR/AVR.td
index 53768f99df3b..22ffc4a368ad 100644
--- a/llvm/lib/Target/AVR/AVR.td
+++ b/llvm/lib/Target/AVR/AVR.td
@@ -45,8 +45,8 @@ include "AVRCallingConv.td"
//===---------------------------------------------------------------------===//
def AVRAsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- bit isMCAsmWriter = 1;
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
}
//===---------------------------------------------------------------------===//
@@ -71,10 +71,9 @@ def AVRAsmParserVariant : AsmParserVariant {
//===---------------------------------------------------------------------===//
def AVR : Target {
- let InstructionSet = AVRInstrInfo;
- let AssemblyWriters = [AVRAsmWriter];
+ let InstructionSet = AVRInstrInfo;
+ let AssemblyWriters = [AVRAsmWriter];
- let AssemblyParsers = [AVRAsmParser];
+ let AssemblyParsers = [AVRAsmParser];
let AssemblyParserVariants = [AVRAsmParserVariant];
}
-
diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index e8a13c712210..259ab1bc7aec 100644
--- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -24,11 +24,12 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "avr-asm-printer"
@@ -38,9 +39,8 @@ namespace llvm {
/// An AVR assembly code printer.
class AVRAsmPrinter : public AsmPrinter {
public:
- AVRAsmPrinter(TargetMachine &TM,
- std::unique_ptr<MCStreamer> Streamer)
- : AsmPrinter(TM, std::move(Streamer)), MRI(*TM.getMCRegisterInfo()) { }
+ AVRAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MRI(*TM.getMCRegisterInfo()) {}
StringRef getPassName() const override { return "AVR Assembly Printer"; }
@@ -56,8 +56,13 @@ public:
const MCExpr *lowerConstant(const Constant *CV) override;
+ void emitXXStructor(const DataLayout &DL, const Constant *CV) override;
+
+ bool doFinalization(Module &M) override;
+
private:
const MCRegisterInfo &MRI;
+ bool EmittedStructorSymbolAttrs = false;
};
void AVRAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
@@ -139,9 +144,8 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNum, const char *ExtraCode,
raw_ostream &O) {
- if (ExtraCode && ExtraCode[0]) {
- llvm_unreachable("This branch is not implemented yet");
- }
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier
const MachineOperand &MO = MI->getOperand(OpNum);
(void)MO;
@@ -193,9 +197,47 @@ const MCExpr *AVRAsmPrinter::lowerConstant(const Constant *CV) {
return AsmPrinter::lowerConstant(CV);
}
+void AVRAsmPrinter::emitXXStructor(const DataLayout &DL, const Constant *CV) {
+ if (!EmittedStructorSymbolAttrs) {
+ OutStreamer->emitRawComment(
+ " Emitting these undefined symbol references causes us to link the"
+ " libgcc code that runs our constructors/destructors");
+ OutStreamer->emitRawComment(" This matches GCC's behavior");
+
+ MCSymbol *CtorsSym = OutContext.getOrCreateSymbol("__do_global_ctors");
+ OutStreamer->emitSymbolAttribute(CtorsSym, MCSA_Global);
+
+ MCSymbol *DtorsSym = OutContext.getOrCreateSymbol("__do_global_dtors");
+ OutStreamer->emitSymbolAttribute(DtorsSym, MCSA_Global);
+
+ EmittedStructorSymbolAttrs = true;
+ }
+
+ AsmPrinter::emitXXStructor(DL, CV);
+}
+
+bool AVRAsmPrinter::doFinalization(Module &M) {
+ MCSymbol *DoCopyData = OutContext.getOrCreateSymbol("__do_copy_data");
+ MCSymbol *DoClearBss = OutContext.getOrCreateSymbol("__do_clear_bss");
+
+ // FIXME: We can disable __do_copy_data if there are no static RAM variables.
+
+ OutStreamer->emitRawComment(
+ " Declaring this symbol tells the CRT that it should");
+ OutStreamer->emitRawComment(
+ "copy all variables from program memory to RAM on startup");
+ OutStreamer->emitSymbolAttribute(DoCopyData, MCSA_Global);
+
+ OutStreamer->emitRawComment(
+ " Declaring this symbol tells the CRT that it should");
+ OutStreamer->emitRawComment("clear the zeroed data section on startup");
+ OutStreamer->emitSymbolAttribute(DoClearBss, MCSA_Global);
+
+ return AsmPrinter::doFinalization(M);
+}
+
} // end of namespace llvm
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRAsmPrinter() {
llvm::RegisterAsmPrinter<llvm::AVRAsmPrinter> X(llvm::getTheAVRTarget());
}
-
diff --git a/llvm/lib/Target/AVR/AVRCallingConv.td b/llvm/lib/Target/AVR/AVRCallingConv.td
index 65545e531a88..87874c5c50b2 100644
--- a/llvm/lib/Target/AVR/AVRCallingConv.td
+++ b/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -14,9 +14,8 @@
//===----------------------------------------------------------------------===//
// Special return value calling convention for runtime functions.
-def RetCC_AVR_BUILTIN : CallingConv
-<[
- CCIfType<[i8], CCAssignToReg<[R24,R25]>>,
+def RetCC_AVR_BUILTIN : CallingConv<[
+ CCIfType<[i8], CCAssignToReg<[R24, R25]>>,
CCIfType<[i16], CCAssignToReg<[R23R22, R25R24]>>
]>;
@@ -27,8 +26,7 @@ def RetCC_AVR_BUILTIN : CallingConv
// The calling conventions are implemented in custom C++ code
// Calling convention for variadic functions.
-def ArgCC_AVR_Vararg : CallingConv
-<[
+def ArgCC_AVR_Vararg : CallingConv<[
// i16 are always passed through the stack with an alignment of 1.
CCAssignToStack<2, 1>
]>;
@@ -38,4 +36,4 @@ def ArgCC_AVR_Vararg : CallingConv
//===----------------------------------------------------------------------===//
def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>;
-def CSR_Interrupts : CalleeSavedRegs<(add (sequence "R%u", 31, 0))>;
+def CSR_Interrupts : CalleeSavedRegs<(add(sequence "R%u", 31, 0))>;
diff --git a/llvm/lib/Target/AVR/AVRDevices.td b/llvm/lib/Target/AVR/AVRDevices.td
index 9507aa40c3d8..7ad0fe904a81 100644
--- a/llvm/lib/Target/AVR/AVRDevices.td
+++ b/llvm/lib/Target/AVR/AVRDevices.td
@@ -7,19 +7,18 @@
// In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
// avr2 (with SRAM) adds the rest of the variants.
-
// A feature set aggregates features, grouping them. We don't want to create a
// new member in AVRSubtarget (to store a value) for each set because we do not
// care if the set is supported, only the subfeatures inside the set. We fix
// this by simply setting the same dummy member for all feature sets, which is
// then ignored.
class FeatureSet<string name, string desc, list<SubtargetFeature> i>
- : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
+ : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
// A family of microcontrollers, defining a set of supported features.
class Family<string name, list<SubtargetFeature> i>
- : FeatureSet<name, !strconcat("The device is a part of the ",
- name, " family"), i>;
+ : FeatureSet<
+ name, !strconcat("The device is a part of the ", name, " family"), i>;
// The device has SRAM, and supports the bare minimum of
// SRAM-relevant instructions.
@@ -32,122 +31,122 @@ class Family<string name, list<SubtargetFeature> i>
// `LDS Rd, K`
// `STS k, Rr`
// `PUSH`/`POP`
-def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true",
- "The device has random access memory">;
+def FeatureSRAM : SubtargetFeature<"sram", "m_hasSRAM", "true",
+ "The device has random access memory">;
// The device supports the `JMP k` and `CALL k` instructions.
-def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
- "The device supports the `JMP` and "
- "`CALL` instructions">;
-
+def FeatureJMPCALL : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
+ "The device supports the `JMP` and "
+ "`CALL` instructions">;
// The device supports the indirect branches `IJMP` and `ICALL`.
-def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL",
- "true",
- "The device supports `IJMP`/`ICALL`"
- "instructions">;
+def FeatureIJMPCALL : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL", "true",
+ "The device supports `IJMP`/`ICALL`"
+ "instructions">;
// The device supports the extended indirect branches `EIJMP` and `EICALL`.
-def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL",
- "true", "The device supports the "
- "`EIJMP`/`EICALL` instructions">;
+def FeatureEIJMPCALL : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL", "true",
+ "The device supports the "
+ "`EIJMP`/`EICALL` instructions">;
// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
-def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW",
- "true", "Enable 16-bit register-immediate "
- "addition and subtraction instructions">;
+def FeatureADDSUBIW : SubtargetFeature<"addsubiw", "m_hasADDSUBIW", "true",
+ "Enable 16-bit register-immediate "
+ "addition and subtraction instructions">;
// The device has an 8-bit stack pointer (SP) register.
-def FeatureSmallStack : SubtargetFeature<"smallstack", "m_hasSmallStack",
- "true", "The device has an 8-bit "
- "stack pointer">;
+def FeatureSmallStack
+ : SubtargetFeature<"smallstack", "m_hasSmallStack", "true",
+ "The device has an 8-bit "
+ "stack pointer">;
// The device supports the 16-bit GPR pair MOVW instruction.
-def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
- "The device supports the 16-bit MOVW "
- "instruction">;
+def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
+ "The device supports the 16-bit MOVW "
+ "instruction">;
// The device supports the `LPM` instruction, with implied destination being r0.
-def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
+def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
"The device supports the `LPM` instruction">;
// The device supports the `LPM Rd, Z[+] instruction.
-def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
- "The device supports the `LPM Rd, Z[+]` "
- "instruction">;
+def FeatureLPMX : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
+ "The device supports the `LPM Rd, Z[+]` "
+ "instruction">;
// The device supports the `ELPM` instruction.
-def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true",
- "The device supports the ELPM instruction">;
+def FeatureELPM : SubtargetFeature<"elpm", "m_hasELPM", "true",
+ "The device supports the ELPM instruction">;
// The device supports the `ELPM Rd, Z[+]` instructions.
-def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
- "The device supports the `ELPM Rd, Z[+]` "
- "instructions">;
+def FeatureELPMX : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
+ "The device supports the `ELPM Rd, Z[+]` "
+ "instructions">;
// The device supports the `SPM` instruction.
-def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true",
+def FeatureSPM : SubtargetFeature<"spm", "m_hasSPM", "true",
"The device supports the `SPM` instruction">;
// The device supports the `SPM Z+` instruction.
-def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true",
- "The device supports the `SPM Z+` "
- "instruction">;
+def FeatureSPMX : SubtargetFeature<"spmx", "m_hasSPMX", "true",
+ "The device supports the `SPM Z+` "
+ "instruction">;
// The device supports the `DES k` instruction.
-def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true",
+def FeatureDES : SubtargetFeature<"des", "m_hasDES", "true",
"The device supports the `DES k` encryption "
"instruction">;
// The device supports the Read-Write-Modify instructions
// XCH, LAS, LAC, and LAT.
-def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true",
+def FeatureRMW : SubtargetFeature<"rmw", "m_supportsRMW", "true",
"The device supports the read-write-modify "
"instructions: XCH, LAS, LAC, LAT">;
// The device supports the `[F]MUL[S][U]` family of instructions.
-def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication",
- "true", "The device supports the "
- "multiplication instructions">;
+def FeatureMultiplication
+ : SubtargetFeature<"mul", "m_supportsMultiplication", "true",
+ "The device supports the "
+ "multiplication instructions">;
// The device supports the `BREAK` instruction.
-def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true",
- "The device supports the `BREAK` debugging "
- "instruction">;
+def FeatureBREAK : SubtargetFeature<"break", "m_hasBREAK", "true",
+ "The device supports the `BREAK` debugging "
+ "instruction">;
// The device has instruction encodings specific to the Tiny core.
-def FeatureTinyEncoding : SubtargetFeature<"tinyencoding",
- "m_hasTinyEncoding", "true",
- "The device has Tiny core specific "
- "instruction encodings">;
+def FeatureTinyEncoding
+ : SubtargetFeature<"tinyencoding", "m_hasTinyEncoding", "true",
+ "The device has Tiny core specific "
+ "instruction encodings">;
// The device has CPU registers mapped in data address space
-def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR",
- "true", "The device has CPU registers "
+def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR", "true",
+ "The device has CPU registers "
"mapped in data address space">;
-class ELFArch<string name> : SubtargetFeature<"", "ELFArch",
- !strconcat("ELF::",name), "">;
+class ELFArch<string name>
+ : SubtargetFeature<"", "ELFArch", !strconcat("ELF::", name), "">;
// ELF e_flags architecture values
-def ELFArchAVR1 : ELFArch<"EF_AVR_ARCH_AVR1">;
-def ELFArchAVR2 : ELFArch<"EF_AVR_ARCH_AVR2">;
-def ELFArchAVR25 : ELFArch<"EF_AVR_ARCH_AVR25">;
-def ELFArchAVR3 : ELFArch<"EF_AVR_ARCH_AVR3">;
-def ELFArchAVR31 : ELFArch<"EF_AVR_ARCH_AVR31">;
-def ELFArchAVR35 : ELFArch<"EF_AVR_ARCH_AVR35">;
-def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">;
-def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">;
-def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">;
-def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">;
-def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
-def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">;
-def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">;
-def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">;
-def ELFArchXMEGA4 : ELFArch<"EF_AVR_ARCH_XMEGA4">;
-def ELFArchXMEGA5 : ELFArch<"EF_AVR_ARCH_XMEGA5">;
-def ELFArchXMEGA6 : ELFArch<"EF_AVR_ARCH_XMEGA6">;
-def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
+def ELFArchAVR1 : ELFArch<"EF_AVR_ARCH_AVR1">;
+def ELFArchAVR2 : ELFArch<"EF_AVR_ARCH_AVR2">;
+def ELFArchAVR25 : ELFArch<"EF_AVR_ARCH_AVR25">;
+def ELFArchAVR3 : ELFArch<"EF_AVR_ARCH_AVR3">;
+def ELFArchAVR31 : ELFArch<"EF_AVR_ARCH_AVR31">;
+def ELFArchAVR35 : ELFArch<"EF_AVR_ARCH_AVR35">;
+def ELFArchAVR4 : ELFArch<"EF_AVR_ARCH_AVR4">;
+def ELFArchAVR5 : ELFArch<"EF_AVR_ARCH_AVR5">;
+def ELFArchAVR51 : ELFArch<"EF_AVR_ARCH_AVR51">;
+def ELFArchAVR6 : ELFArch<"EF_AVR_ARCH_AVR6">;
+def ELFArchTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
+def ELFArchXMEGA1 : ELFArch<"EF_AVR_ARCH_XMEGA1">;
+def ELFArchXMEGA2 : ELFArch<"EF_AVR_ARCH_XMEGA2">;
+def ELFArchXMEGA3 : ELFArch<"EF_AVR_ARCH_XMEGA3">;
+def ELFArchXMEGA4 : ELFArch<"EF_AVR_ARCH_XMEGA4">;
+def ELFArchXMEGA5 : ELFArch<"EF_AVR_ARCH_XMEGA5">;
+def ELFArchXMEGA6 : ELFArch<"EF_AVR_ARCH_XMEGA6">;
+def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
//===---------------------------------------------------------------------===//
// AVR Families
@@ -155,68 +154,64 @@ def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
// The device has at least the bare minimum that **every** single AVR
// device should have.
-def FamilyAVR0 : Family<"avr0", []>;
+def FamilyAVR0 : Family<"avr0", []>;
-def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeatureMMR]>;
+def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeatureMMR]>;
-def FamilyAVR2 : Family<"avr2",
- [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
- FeatureSRAM]>;
+def FamilyAVR2
+ : Family<"avr2",
+ [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW, FeatureSRAM]>;
-def FamilyAVR25 : Family<"avr25",
- [FamilyAVR2, FeatureMOVW, FeatureLPMX,
- FeatureSPM, FeatureBREAK]>;
+def FamilyAVR25
+ : Family<"avr25",
+ [FamilyAVR2, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureBREAK]>;
-def FamilyAVR3 : Family<"avr3",
- [FamilyAVR2, FeatureJMPCALL]>;
+def FamilyAVR3 : Family<"avr3", [FamilyAVR2, FeatureJMPCALL]>;
-def FamilyAVR31 : Family<"avr31",
- [FamilyAVR3, FeatureELPM]>;
+def FamilyAVR31 : Family<"avr31", [FamilyAVR3, FeatureELPM]>;
-def FamilyAVR35 : Family<"avr35",
- [FamilyAVR3, FeatureMOVW, FeatureLPMX,
- FeatureSPM, FeatureBREAK]>;
+def FamilyAVR35
+ : Family<"avr35",
+ [FamilyAVR3, FeatureMOVW, FeatureLPMX, FeatureSPM, FeatureBREAK]>;
-def FamilyAVR4 : Family<"avr4",
- [FamilyAVR2, FeatureMultiplication,
- FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK]>;
+def FamilyAVR4 : Family<"avr4", [
+ FamilyAVR2, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK
+]>;
-def FamilyAVR5 : Family<"avr5",
- [FamilyAVR3, FeatureMultiplication,
- FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK]>;
+def FamilyAVR5 : Family<"avr5", [
+ FamilyAVR3, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK
+]>;
-def FamilyAVR51 : Family<"avr51",
- [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
+def FamilyAVR51 : Family<"avr51", [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
-def FamilyAVR6 : Family<"avr6",
- [FamilyAVR51]>;
+def FamilyAVR6 : Family<"avr6", [FamilyAVR51]>;
-def FamilyTiny : Family<"avrtiny",
- [FamilyAVR0, FeatureBREAK, FeatureSRAM,
- FeatureTinyEncoding]>;
+def FamilyTiny
+ : Family<"avrtiny",
+ [FamilyAVR0, FeatureBREAK, FeatureSRAM, FeatureTinyEncoding]>;
-def FamilyXMEGA : Family<"xmega",
- [FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW,
- FeatureSRAM, FeatureJMPCALL, FeatureMultiplication,
- FeatureMOVW, FeatureLPMX, FeatureSPM,
- FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX,
- FeatureDES, FeatureELPM, FeatureELPMX]>;
+def FamilyXMEGA : Family<"xmega", [
+ FamilyAVR0, FeatureLPM, FeatureIJMPCALL, FeatureADDSUBIW, FeatureSRAM,
+ FeatureJMPCALL, FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM,
+ FeatureBREAK, FeatureEIJMPCALL, FeatureSPMX, FeatureDES, FeatureELPM,
+ FeatureELPMX
+]>;
-def FamilyXMEGAU : Family<"xmegau",
- [FamilyXMEGA, FeatureRMW]>;
+def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>;
-def FeatureSetSpecial : FeatureSet<"special",
- "Enable use of the entire instruction "
- "set - used for debugging",
- [FeatureSRAM, FeatureJMPCALL,
- FeatureIJMPCALL, FeatureEIJMPCALL,
- FeatureADDSUBIW, FeatureMOVW,
- FeatureLPM, FeatureLPMX, FeatureELPM,
- FeatureELPMX, FeatureSPM, FeatureSPMX,
- FeatureDES, FeatureRMW,
- FeatureMultiplication, FeatureBREAK, FeatureMMR]>;
+def FeatureSetSpecial
+ : FeatureSet<"special",
+ "Enable use of the entire instruction "
+ "set - used for debugging",
+ [
+ FeatureSRAM, FeatureJMPCALL, FeatureIJMPCALL,
+ FeatureEIJMPCALL, FeatureADDSUBIW, FeatureMOVW, FeatureLPM,
+ FeatureLPMX, FeatureELPM, FeatureELPMX, FeatureSPM,
+ FeatureSPMX, FeatureDES, FeatureRMW, FeatureMultiplication,
+ FeatureBREAK, FeatureMMR
+ ]>;
//===---------------------------------------------------------------------===//
// AVR microcontrollers supported.
@@ -224,284 +219,307 @@ def FeatureSetSpecial : FeatureSet<"special",
class Device<string Name, Family Fam, ELFArch Arch,
list<SubtargetFeature> ExtraFeatures = []>
- : Processor<Name, NoItineraries, !listconcat([Fam,Arch],ExtraFeatures)>;
+ : Processor<Name, NoItineraries, !listconcat([Fam, Arch], ExtraFeatures)>;
// Generic MCUs
// Note that several versions of GCC has strange ELF architecture
// settings for backwards compatibility - see `gas/config/tc-avr.c`
// in AVR binutils. We do not replicate this.
-def : Device<"avr1", FamilyAVR1, ELFArchAVR1>;
-def : Device<"avr2", FamilyAVR2, ELFArchAVR2>;
-def : Device<"avr25", FamilyAVR25, ELFArchAVR25>;
-def : Device<"avr3", FamilyAVR3, ELFArchAVR3>;
-def : Device<"avr31", FamilyAVR31, ELFArchAVR31>;
-def : Device<"avr35", FamilyAVR35, ELFArchAVR35>;
-def : Device<"avr4", FamilyAVR4, ELFArchAVR4>;
-def : Device<"avr5", FamilyAVR5, ELFArchAVR5>;
-def : Device<"avr51", FamilyAVR51, ELFArchAVR51>;
-def : Device<"avr6", FamilyAVR6, ELFArchAVR6>;
-def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>;
-def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>;
-def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
-def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
-def : Device<"avrtiny", FamilyTiny, ELFArchTiny>;
+def : Device<"avr1", FamilyAVR1, ELFArchAVR1>;
+def : Device<"avr2", FamilyAVR2, ELFArchAVR2>;
+def : Device<"avr25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"avr3", FamilyAVR3, ELFArchAVR3>;
+def : Device<"avr31", FamilyAVR31, ELFArchAVR31>;
+def : Device<"avr35", FamilyAVR35, ELFArchAVR35>;
+def : Device<"avr4", FamilyAVR4, ELFArchAVR4>;
+def : Device<"avr5", FamilyAVR5, ELFArchAVR5>;
+def : Device<"avr51", FamilyAVR51, ELFArchAVR51>;
+def : Device<"avr6", FamilyAVR6, ELFArchAVR6>;
+def : Device<"avrxmega1", FamilyXMEGA, ELFArchXMEGA1>;
+def : Device<"avrxmega2", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"avrxmega3", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"avrxmega4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"avrxmega5", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"avrxmega6", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"avrxmega7", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"avrtiny", FamilyTiny, ELFArchTiny>;
// Specific MCUs
-def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
-def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>;
-def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
-def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25,
- [FeatureMOVW, FeatureLPMX]>;
-def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
-def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny841", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>;
-def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>;
-def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>;
-def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>;
-def : Device<"at43usb320", FamilyAVR31, ELFArchAVR31>;
-def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>;
-def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>;
-def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>;
-def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
-def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega8", FamilyAVR2, ELFArchAVR4,
+def : Device<"at90s1200", FamilyAVR0, ELFArchAVR1>;
+def : Device<"attiny11", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny12", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny15", FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny28", FamilyAVR1, ELFArchAVR1>;
+def : Device<"at90s2313", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2323", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2333", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2343", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny22", FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny26", FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
+def : Device<"at86rf401", FamilyAVR2, ELFArchAVR25, [FeatureMOVW, FeatureLPMX]>;
+def : Device<"at90s4414", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4433", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4434", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8515", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90c8534", FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8535", FamilyAVR2, ELFArchAVR2>;
+def : Device<"ata5272", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny4313", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny25", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny45", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny85", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny441", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny841", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861a", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny87", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny43u", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny48", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny88", FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny828", FamilyAVR25, ELFArchAVR25>;
+def : Device<"at43usb355", FamilyAVR3, ELFArchAVR3>;
+def : Device<"at76c711", FamilyAVR3, ELFArchAVR3>;
+def : Device<"atmega103", FamilyAVR31, ELFArchAVR31>;
+def : Device<"at43usb320", FamilyAVR31, ELFArchAVR31>;
+def : Device<"attiny167", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb82", FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb162", FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata5505", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega16u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega32u2", FamilyAVR35, ELFArchAVR35>;
+def : Device<"attiny1634", FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4,
+def : Device<"ata6289", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8a", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
-def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48pb", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega88pb", FamilyAVR4, ELFArchAVR4>;
-def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
+def : Device<"ata6285", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata6286", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48pb", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega48p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88a", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88p", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88pa", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega88pb", FamilyAVR4, ELFArchAVR4>;
+def : Device<"atmega8515", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
+def : Device<"atmega8535", FamilyAVR2, ELFArchAVR4,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>;
-def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>;
-def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>;
-def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega161", FamilyAVR3, ELFArchAVR5,
+def : Device<"atmega8hva", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm1", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm2b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm3b", FamilyAVR4, ELFArchAVR4>;
+def : Device<"at90pwm81", FamilyAVR4, ELFArchAVR4>;
+def : Device<"ata5790", FamilyAVR5, ELFArchAVR5>;
+def : Device<"ata5795", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega161", FamilyAVR3, ELFArchAVR5,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega162", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega163", FamilyAVR3, ELFArchAVR5,
+def : Device<"atmega162", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega163", FamilyAVR3, ELFArchAVR5,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega164a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega164p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega164pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega165pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega168pb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega169pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega324pb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega325pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega328pb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega329pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega3290pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega406", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega640", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644pa", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega645", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega645a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega645p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega649", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega649a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega649p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6450", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6450a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6450p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6490", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6490a", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega6490p", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64rfr2", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega644rfr2", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hva", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hva2", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hvb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90pwm216", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90pwm316", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32c1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64c1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16m1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32m1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega64m1", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega16u4", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32u4", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega32u6", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90usb646", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90usb647", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at90scr100", FamilyAVR5, ELFArchAVR5>;
-def : Device<"at94k", FamilyAVR3, ELFArchAVR5,
+def : Device<"atmega164a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega164pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega165pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega168pb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega169pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega323", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega324pb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega325pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3250pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega328pb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega329pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega3290pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega406", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega640", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644pa", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega645p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega649p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6450p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490a", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega6490p", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega644rfr2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hva2", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32hvbrevb", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64hve", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can32", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90can64", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm161", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm216", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90pwm316", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64c1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega64m1", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega16u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u4", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega32u6", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb646", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90usb647", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at90scr100", FamilyAVR5, ELFArchAVR5>;
+def : Device<"at94k", FamilyAVR3, ELFArchAVR5,
[FeatureMultiplication, FeatureMOVW, FeatureLPMX]>;
-def : Device<"m3000", FamilyAVR5, ELFArchAVR5>;
-def : Device<"atmega128", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128a", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1280", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1281", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284p", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128rfa1", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128rfr2", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284rfr2", FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90can128", FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90usb1286", FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90usb1287", FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega2560", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atmega2561", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atmega256rfr2", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atmega2564rfr2", FamilyAVR6, ELFArchAVR6>;
-def : Device<"atxmega16a4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega16a4u", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32e5", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16e5", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega8e5", FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64b1", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64b3", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64c3", FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64d3", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64d4", FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64a1", FamilyXMEGA, ELFArchXMEGA5>;
-def : Device<"atxmega64a1u", FamilyXMEGAU, ELFArchXMEGA5>;
-def : Device<"atxmega128a3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128a3u", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128b1", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128b3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128d4", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega192a3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega192a3u", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega192c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega192d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3u", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256a3b", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3bu", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega384c3", FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>;
-def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>;
-def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>;
-def : Device<"attiny4", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny5", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny9", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny10", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny20", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny40", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny102", FamilyTiny, ELFArchTiny>;
-def : Device<"attiny104", FamilyTiny, ELFArchTiny>;
-
+def : Device<"m3000", FamilyAVR5, ELFArchAVR5>;
+def : Device<"atmega128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128a", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1280", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1281", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284p", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfa1", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284rfr2", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90can128", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1286", FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1287", FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega2560", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2561", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega256rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atmega2564rfr2", FamilyAVR6, ELFArchAVR6>;
+def : Device<"atxmega16a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4u", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32c4", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32d4", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega8e5", FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32x1", FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega64a3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a3u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64a4u", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b1", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64c3", FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64d3", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64d4", FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a1", FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"atxmega64a1u", FamilyXMEGAU, ELFArchXMEGA5>;
+def : Device<"atxmega128a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b1", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128d4", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3u", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256a3b", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3bu", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega384c3", FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega384d3", FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a1", FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"atxmega128a1u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"atxmega128a4u", FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"attiny4", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny5", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny9", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny10", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny20", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny40", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny102", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny104", FamilyTiny, ELFArchTiny>;
+def : Device<"attiny202", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny402", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny204", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny404", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny804", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1604", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny406", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny806", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1606", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny807", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1607", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny212", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny412", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny214", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny414", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny814", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1614", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny416", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny816", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1616", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny3216", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny417", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny817", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny1617", FamilyXMEGA, ELFArchXMEGA3>;
+def : Device<"attiny3217", FamilyXMEGA, ELFArchXMEGA3>;
diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index f9f91f50c9d5..cb85d73772c5 100644
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -70,25 +70,24 @@ private:
return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode), DstReg);
}
- MachineRegisterInfo &getRegInfo(Block &MBB) { return MBB.getParent()->getRegInfo(); }
+ MachineRegisterInfo &getRegInfo(Block &MBB) {
+ return MBB.getParent()->getRegInfo();
+ }
bool expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI);
bool expandLogic(unsigned Op, Block &MBB, BlockIt MBBI);
bool expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI);
bool isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const;
- template<typename Func>
- bool expandAtomic(Block &MBB, BlockIt MBBI, Func f);
+ template <typename Func> bool expandAtomic(Block &MBB, BlockIt MBBI, Func f);
- template<typename Func>
+ template <typename Func>
bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI, Func f);
bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);
- bool expandAtomicArithmeticOp(unsigned MemOpcode,
- unsigned ArithOpcode,
- Block &MBB,
- BlockIt MBBI);
+ bool expandAtomicArithmeticOp(unsigned MemOpcode, unsigned ArithOpcode,
+ Block &MBB, BlockIt MBBI);
/// Specific shift implementation.
bool expandLSLB7Rd(Block &MBB, BlockIt MBBI);
@@ -150,8 +149,8 @@ bool AVRExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
return Modified;
}
-bool AVRExpandPseudo::
-expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandArith(unsigned OpLo, unsigned OpHi, Block &MBB,
+ BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
@@ -164,14 +163,15 @@ expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -183,8 +183,7 @@ expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
return true;
}
-bool AVRExpandPseudo::
-expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
@@ -196,18 +195,20 @@ expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, Op)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
// SREG is always implicitly dead
MIBLO->getOperand(3).setIsDead();
- auto MIBHI = buildMI(MBB, MBBI, Op)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -216,8 +217,8 @@ expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
return true;
}
-bool AVRExpandPseudo::
- isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const {
+bool AVRExpandPseudo::isLogicImmOpRedundant(unsigned Op,
+ unsigned ImmVal) const {
// ANDI Rd, 0xff is redundant.
if (Op == AVR::ANDIRdK && ImmVal == 0xff)
@@ -230,8 +231,7 @@ bool AVRExpandPseudo::
return false;
}
-bool AVRExpandPseudo::
-expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
@@ -244,20 +244,22 @@ expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
if (!isLogicImmOpRedundant(Op, Lo8)) {
- auto MIBLO = buildMI(MBB, MBBI, Op)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(SrcIsKill))
- .addImm(Lo8);
+ auto MIBLO =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(SrcIsKill))
+ .addImm(Lo8);
// SREG is always implicitly dead
MIBLO->getOperand(3).setIsDead();
}
if (!isLogicImmOpRedundant(Op, Hi8)) {
- auto MIBHI = buildMI(MBB, MBBI, Op)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(SrcIsKill))
- .addImm(Hi8);
+ auto MIBHI =
+ buildMI(MBB, MBBI, Op)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(SrcIsKill))
+ .addImm(Hi8);
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -292,13 +294,15 @@ bool AVRExpandPseudo::expand<AVR::SUBIWRdK>(Block &MBB, BlockIt MBBI) {
bool ImpIsDead = MI.getOperand(3).isDead();
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, AVR::SUBIRdK)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(SrcIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, AVR::SUBIRdK)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(SrcIsKill));
- auto MIBHI = buildMI(MBB, MBBI, AVR::SBCIRdK)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(SrcIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, AVR::SBCIRdK)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(SrcIsKill));
switch (MI.getOperand(2).getType()) {
case MachineOperand::MO_GlobalAddress: {
@@ -349,18 +353,20 @@ bool AVRExpandPseudo::expand<AVR::SBCIWRdK>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::SBCIRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(SrcIsKill))
- .addImm(Lo8);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(SrcIsKill))
+ .addImm(Lo8);
// SREG is always implicitly killed
MIBLO->getOperand(4).setIsKill();
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(SrcIsKill))
- .addImm(Hi8);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(SrcIsKill))
+ .addImm(Hi8);
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -409,16 +415,18 @@ bool AVRExpandPseudo::expand<AVR::COMWRd>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::COMRd;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
// SREG is always implicitly dead
MIBLO->getOperand(2).setIsDead();
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBHI->getOperand(2).setIsDead();
@@ -481,12 +489,12 @@ bool AVRExpandPseudo::expand<AVR::CPWRdRr>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(2).setIsDead();
@@ -513,15 +521,15 @@ bool AVRExpandPseudo::expand<AVR::CPCWRdRr>(Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, getKillRegState(DstIsKill))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, getKillRegState(DstIsKill))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
// SREG is always implicitly killed
MIBLO->getOperand(3).setIsKill();
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, getKillRegState(DstIsKill))
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstHiReg, getKillRegState(DstIsKill))
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
if (ImpIsDead)
MIBHI->getOperand(2).setIsDead();
@@ -543,11 +551,13 @@ bool AVRExpandPseudo::expand<AVR::LDIWRdK>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::LDIRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
switch (MI.getOperand(1).getType()) {
case MachineOperand::MO_GlobalAddress: {
@@ -592,11 +602,13 @@ bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::LDSRdK;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
switch (MI.getOperand(1).getType()) {
case MachineOperand::MO_GlobalAddress: {
@@ -656,9 +668,9 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(CurDstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill))
- .addImm(1);
+ .addReg(CurDstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(1);
if (TmpReg) {
// Move the high byte into the final destination.
@@ -689,15 +701,17 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define)
- .addReg(SrcReg, RegState::Kill);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define)
+ .addReg(SrcReg, RegState::Kill);
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
- .addReg(SrcReg, RegState::Kill);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
+ .addReg(SrcReg, RegState::Kill);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -720,15 +734,17 @@ bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define)
- .addReg(SrcReg, RegState::Kill);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define)
+ .addReg(SrcReg, RegState::Kill);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
- .addReg(SrcReg, RegState::Kill);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
+ .addReg(SrcReg, RegState::Kill);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -750,8 +766,8 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::LDDRdPtrQ;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Offset is out of range");
// Use a temporary register if src and dst registers are the same.
@@ -763,9 +779,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(CurDstLoReg, RegState::Define)
- .addReg(SrcReg)
- .addImm(Imm);
+ .addReg(CurDstLoReg, RegState::Define)
+ .addReg(SrcReg)
+ .addImm(Imm);
// Push low byte onto stack if necessary.
if (TmpReg)
@@ -773,9 +789,9 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(CurDstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill))
- .addImm(Imm + 1);
+ .addReg(CurDstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(Imm + 1);
if (TmpReg) {
// Move the high byte into the final destination.
@@ -813,8 +829,8 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
// Load low byte.
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(CurDstLoReg, RegState::Define)
- .addReg(SrcReg);
+ .addReg(CurDstLoReg, RegState::Define)
+ .addReg(SrcReg);
// Push low byte onto stack if necessary.
if (TmpReg)
@@ -822,8 +838,8 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
// Load high byte.
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(CurDstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(CurDstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
if (TmpReg) {
// Move the high byte into the final destination.
@@ -845,15 +861,15 @@ bool AVRExpandPseudo::expand<AVR::LPMWRdZPi>(Block &MBB, BlockIt MBBI) {
llvm_unreachable("wide LPMPi is unimplemented");
}
-template<typename Func>
+template <typename Func>
bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
// Remove the pseudo instruction.
MachineInstr &MI = *MBBI;
// Store the SREG.
buildMI(MBB, MBBI, AVR::INRdA)
- .addReg(SCRATCH_REGISTER, RegState::Define)
- .addImm(SREG_ADDR);
+ .addReg(SCRATCH_REGISTER, RegState::Define)
+ .addImm(SREG_ADDR);
// Disable exceptions.
buildMI(MBB, MBBI, AVR::BCLRs).addImm(7); // CLI
@@ -861,58 +877,52 @@ bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
f(MI);
// Restore the status reg.
- buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(SREG_ADDR)
- .addReg(SCRATCH_REGISTER);
+ buildMI(MBB, MBBI, AVR::OUTARr).addImm(SREG_ADDR).addReg(SCRATCH_REGISTER);
MI.eraseFromParent();
return true;
}
-template<typename Func>
-bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
- Block &MBB,
- BlockIt MBBI,
- Func f) {
+template <typename Func>
+bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB,
+ BlockIt MBBI, Func f) {
return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
- auto Op1 = MI.getOperand(0);
- auto Op2 = MI.getOperand(1);
+ auto Op1 = MI.getOperand(0);
+ auto Op2 = MI.getOperand(1);
- MachineInstr &NewInst =
- *buildMI(MBB, MBBI, Opcode).add(Op1).add(Op2).getInstr();
- f(NewInst);
+ MachineInstr &NewInst =
+ *buildMI(MBB, MBBI, Opcode).add(Op1).add(Op2).getInstr();
+ f(NewInst);
});
}
-bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
- Block &MBB,
+bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode, Block &MBB,
BlockIt MBBI) {
return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {});
}
bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
- unsigned ArithOpcode,
- Block &MBB,
+ unsigned ArithOpcode, Block &MBB,
BlockIt MBBI) {
return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
- auto DstReg = MI.getOperand(0).getReg();
- auto PtrOp = MI.getOperand(1);
- auto SrcReg = MI.getOperand(2).getReg();
+ auto DstReg = MI.getOperand(0).getReg();
+ auto PtrOp = MI.getOperand(1);
+ auto SrcReg = MI.getOperand(2).getReg();
- unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
- unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
+ unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
+ unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
- // FIXME: this returns the new value (after the operation), not the old
- // value as the atomicrmw instruction is supposed to do!
+ // FIXME: this returns the new value (after the operation), not the old
+ // value as the atomicrmw instruction is supposed to do!
- // Create the load
- buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg());
+ // Create the load
+ buildMI(MBB, MBBI, LoadOpcode, DstReg).addReg(PtrOp.getReg());
- // Create the arithmetic op
- buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg);
+ // Create the arithmetic op
+ buildMI(MBB, MBBI, ArithOpcode, DstReg).addReg(DstReg).addReg(SrcReg);
- // Create the store
- buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg);
+ // Create the store
+ buildMI(MBB, MBBI, StoreOpcode).add(PtrOp).addReg(DstReg);
});
}
@@ -924,8 +934,7 @@ Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
RS.forward(MI);
BitVector Candidates =
- TRI->getAllocatableSet
- (*MBB.getParent(), &AVR::GPR8RegClass);
+ TRI->getAllocatableSet(*MBB.getParent(), &AVR::GPR8RegClass);
// Exclude all the registers being used by the instruction.
for (MachineOperand &MO : MI.operands()) {
@@ -942,77 +951,77 @@ Register AVRExpandPseudo::scavengeGPR8(MachineInstr &MI) {
return Reg;
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoad8>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoad16>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::LDWRdPtr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicStore8>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::STPtrRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicStore16>(Block &MBB, BlockIt MBBI) {
return expandAtomicBinaryOp(AVR::STWPtrRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::ADDRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::ADDWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadSub8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::SUBRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadSub16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::SUBWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::ANDRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::ANDWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadOr8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::ORRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadOr16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::ORWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadXor8>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(8, AVR::EORRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicLoadXor16>(Block &MBB, BlockIt MBBI) {
return expandAtomicArithmeticOp(16, AVR::EORWRdRr, MBB, MBBI);
}
-template<>
+template <>
bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
// On AVR, there is only one core and so atomic fences do nothing.
MBBI->eraseFromParent();
@@ -1077,15 +1086,15 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::STDPtrQRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- //:TODO: need to reverse this order like inw and stsw?
+ //: TODO: need to reverse this order like inw and stsw?
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg, getUndefRegState(DstIsUndef))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, getUndefRegState(DstIsUndef))
- .addImm(1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addImm(1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1110,16 +1119,17 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg, RegState::Define)
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ .addReg(DstReg, RegState::Define)
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1144,16 +1154,17 @@ bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, RegState::Define)
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ .addReg(DstReg, RegState::Define)
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, RegState::Kill)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .addImm(Imm);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, RegState::Kill)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .addImm(Imm);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1175,19 +1186,19 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::STDPtrQRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Offset is out of range");
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg)
- .addImm(Imm)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg)
+ .addImm(Imm)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addImm(Imm + 1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addImm(Imm + 1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1207,17 +1218,19 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::INRdA;
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Address is out of range");
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(Imm);
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(Imm);
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(Imm + 1);
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(Imm + 1);
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1237,18 +1250,18 @@ bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
unsigned OpHi = AVR::OUTARr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- // Since we add 1 to the Imm value for the high byte below, and 63 is the highest Imm value
- // allowed for the instruction, 62 is the limit here.
+ // Since we add 1 to the Imm value for the high byte below, and 63 is the
+ // highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Address is out of range");
// 16 bit I/O writes need the high byte first
auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addImm(Imm + 1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+ .addImm(Imm + 1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill));
auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addImm(Imm)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ .addImm(Imm)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1270,13 +1283,13 @@ bool AVRExpandPseudo::expand<AVR::PUSHWRr>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -1319,15 +1332,15 @@ bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
// Shift part
buildMI(MBB, MBBI, OpShift)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addReg(DstReg);
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addReg(DstReg);
// Add the carry bit
auto MIB = buildMI(MBB, MBBI, OpCarry)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .addReg(ZERO_REGISTER);
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addReg(ZERO_REGISTER);
// SREG is always implicitly killed
MIB->getOperand(2).setIsKill();
@@ -1378,14 +1391,15 @@ bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg)
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg)
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg)
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ auto MIBHI =
+ buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg)
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
@@ -1554,12 +1568,13 @@ bool AVRExpandPseudo::expand<AVR::LSRWRd>(Block &MBB, BlockIt MBBI) {
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBLO->getOperand(2).setIsDead();
@@ -1740,12 +1755,13 @@ bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, getKillRegState(DstIsKill));
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, getKillRegState(DstIsKill));
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstLoReg, getKillRegState(DstIsKill));
+ auto MIBLO =
+ buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstLoReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIBLO->getOperand(2).setIsDead();
@@ -1817,7 +1833,8 @@ bool AVRExpandPseudo::expandLSLB7Rd(Block &MBB, BlockIt MBBI) {
buildMI(MBB, MBBI, AVR::RORRd)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg, getKillRegState(DstIsKill))
- ->getOperand(3).setIsUndef(true);
+ ->getOperand(3)
+ .setIsUndef(true);
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1867,7 +1884,8 @@ bool AVRExpandPseudo::expandLSRB7Rd(Block &MBB, BlockIt MBBI) {
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg, getKillRegState(DstIsKill))
.addReg(DstReg, getKillRegState(DstIsKill))
- ->getOperand(4).setIsUndef(true);
+ ->getOperand(4)
+ .setIsUndef(true);
buildMI(MBB, MBBI, AVR::EORRdRr)
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
@@ -1918,10 +1936,11 @@ bool AVRExpandPseudo::expandASRB7Rd(Block &MBB, BlockIt MBBI) {
.addReg(DstReg, getKillRegState(DstIsKill))
.addReg(DstReg, getKillRegState(DstIsKill));
- auto MIRRC = buildMI(MBB, MBBI, AVR::SBCRdRr)
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addReg(DstReg, getKillRegState(DstIsKill));
+ auto MIRRC =
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addReg(DstReg, getKillRegState(DstIsKill));
if (ImpIsDead)
MIRRC->getOperand(3).setIsDead();
@@ -1970,9 +1989,10 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
if (SrcReg != DstLoReg) {
- auto MOV = buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg);
+ auto MOV =
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg);
if (SrcReg == DstHiReg) {
MOV->getOperand(1).setIsKill();
@@ -1981,19 +2001,20 @@ template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
if (SrcReg != DstHiReg) {
buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
buildMI(MBB, MBBI, AVR::ADDRdRr) // LSL Rd <==> ADD Rd, Rr
- .addReg(DstHiReg, RegState::Define)
- .addReg(DstHiReg)
- .addReg(DstHiReg, RegState::Kill);
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(DstHiReg)
+ .addReg(DstHiReg, RegState::Kill);
- auto SBC = buildMI(MBB, MBBI, AVR::SBCRdRr)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, RegState::Kill)
- .addReg(DstHiReg, RegState::Kill);
+ auto SBC =
+ buildMI(MBB, MBBI, AVR::SBCRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill)
+ .addReg(DstHiReg, RegState::Kill);
if (ImpIsDead)
SBC->getOperand(3).setIsDead();
@@ -2025,14 +2046,15 @@ template <> bool AVRExpandPseudo::expand<AVR::ZEXT>(Block &MBB, BlockIt MBBI) {
if (SrcReg != DstLoReg) {
buildMI(MBB, MBBI, AVR::MOVRdRr)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
}
- auto EOR = buildMI(MBB, MBBI, AVR::EORRdRr)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstHiReg, RegState::Kill | RegState::Undef)
- .addReg(DstHiReg, RegState::Kill | RegState::Undef);
+ auto EOR =
+ buildMI(MBB, MBBI, AVR::EORRdRr)
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstHiReg, RegState::Kill | RegState::Undef)
+ .addReg(DstHiReg, RegState::Kill | RegState::Undef);
if (ImpIsDead)
EOR->getOperand(3).setIsDead();
@@ -2054,15 +2076,15 @@ bool AVRExpandPseudo::expand<AVR::SPREAD>(Block &MBB, BlockIt MBBI) {
// Low part
buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(0x3d)
- .setMIFlags(Flags);
+ .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(0x3d)
+ .setMIFlags(Flags);
// High part
buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
- .addImm(0x3e)
- .setMIFlags(Flags);
+ .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addImm(0x3e)
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -2078,26 +2100,26 @@ bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
buildMI(MBB, MBBI, AVR::INRdA)
- .addReg(AVR::R0, RegState::Define)
- .addImm(SREG_ADDR)
- .setMIFlags(Flags);
+ .addReg(AVR::R0, RegState::Define)
+ .addImm(SREG_ADDR)
+ .setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::BCLRs).addImm(0x07).setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(0x3e)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addImm(0x3e)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(SREG_ADDR)
- .addReg(AVR::R0, RegState::Kill)
- .setMIFlags(Flags);
+ .addImm(SREG_ADDR)
+ .addReg(AVR::R0, RegState::Kill)
+ .setMIFlags(Flags);
buildMI(MBB, MBBI, AVR::OUTARr)
- .addImm(0x3d)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .setMIFlags(Flags);
+ .addImm(0x3d)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMIFlags(Flags);
MI.eraseFromParent();
return true;
@@ -2107,8 +2129,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
int Opcode = MBBI->getOpcode();
-#define EXPAND(Op) \
- case Op: \
+#define EXPAND(Op) \
+ case Op: \
return expand<Op>(MBB, MI)
switch (Opcode) {
@@ -2132,7 +2154,7 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::LDWRdPtr);
EXPAND(AVR::LDWRdPtrPi);
EXPAND(AVR::LDWRdPtrPd);
- case AVR::LDDWRdYQ: //:FIXME: remove this once PR13375 gets fixed
+ case AVR::LDDWRdYQ: //: FIXME: remove this once PR13375 gets fixed
EXPAND(AVR::LDDWRdPtrQ);
EXPAND(AVR::LPMWRdZ);
EXPAND(AVR::LPMWRdZPi);
@@ -2184,8 +2206,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
} // end of anonymous namespace
-INITIALIZE_PASS(AVRExpandPseudo, "avr-expand-pseudo",
- AVR_EXPAND_PSEUDO_NAME, false, false)
+INITIALIZE_PASS(AVRExpandPseudo, "avr-expand-pseudo", AVR_EXPAND_PSEUDO_NAME,
+ false, false)
namespace llvm {
FunctionPass *createAVRExpandPseudoPass() { return new AVRExpandPseudo(); }
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index 89ed30e8bcdb..672611ea2234 100644
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -111,9 +111,8 @@ void AVRFrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ++I) {
- I->addLiveIn(AVR::R29R28);
+ for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF)) {
+ MBBJ.addLiveIn(AVR::R29R28);
}
if (!FrameSize) {
@@ -304,16 +303,16 @@ static void fixStackStores(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const TargetInstrInfo &TII, Register FP) {
// Iterate through the BB until we hit a call instruction or we reach the end.
- for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) {
- MachineBasicBlock::iterator NextMI = std::next(I);
- MachineInstr &MI = *I;
- unsigned Opcode = I->getOpcode();
+ for (MachineInstr &MI :
+ llvm::make_early_inc_range(llvm::make_range(MI, MBB.end()))) {
+ if (MI.isCall())
+ break;
+
+ unsigned Opcode = MI.getOpcode();
// Only care of pseudo store instructions where SP is the base pointer.
- if (Opcode != AVR::STDSPQRr && Opcode != AVR::STDWSPQRr) {
- I = NextMI;
+ if (Opcode != AVR::STDSPQRr && Opcode != AVR::STDWSPQRr)
continue;
- }
assert(MI.getOperand(0).getReg() == AVR::SP &&
"Invalid register, should be SP!");
@@ -325,8 +324,6 @@ static void fixStackStores(MachineBasicBlock &MBB,
MI.setDesc(TII.get(STOpc));
MI.getOperand(0).setReg(FP);
-
- I = NextMI;
}
}
@@ -361,13 +358,13 @@ MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
// values, etc) is tricky and thus left to be optimized in the future.
BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
- MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
- .addReg(AVR::R31R30, RegState::Kill)
- .addImm(Amount);
+ MachineInstr *New =
+ BuildMI(MBB, MI, DL, TII.get(AVR::SUBIWRdK), AVR::R31R30)
+ .addReg(AVR::R31R30, RegState::Kill)
+ .addImm(Amount);
New->getOperand(3).setIsDead();
- BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP)
- .addReg(AVR::R31R30);
+ BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP).addReg(AVR::R31R30);
// Make sure the remaining stack stores are converted to real store
// instructions.
@@ -536,4 +533,3 @@ char AVRDynAllocaSR::ID = 0;
FunctionPass *createAVRDynAllocaSRPass() { return new AVRDynAllocaSR(); }
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index df382d553753..7ec2629ab45d 100644
--- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -156,9 +156,9 @@ bool AVRDAGToDAGISel::selectIndexedLoad(SDNode *N) {
return false;
}
- SDNode *ResNode = CurDAG->getMachineNode(Opcode, SDLoc(N), VT,
- PtrVT, MVT::Other,
- LD->getBasePtr(), LD->getChain());
+ SDNode *ResNode =
+ CurDAG->getMachineNode(Opcode, SDLoc(N), VT, PtrVT, MVT::Other,
+ LD->getBasePtr(), LD->getChain());
ReplaceUses(N, ResNode);
CurDAG->RemoveDeadNode(N);
@@ -199,12 +199,11 @@ unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
return Opcode;
}
-bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- unsigned ConstraintCode,
- std::vector<SDValue> &OutOps) {
+bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, unsigned ConstraintCode, std::vector<SDValue> &OutOps) {
assert((ConstraintCode == InlineAsm::Constraint_m ||
- ConstraintCode == InlineAsm::Constraint_Q) &&
- "Unexpected asm memory constraint");
+ ConstraintCode == InlineAsm::Constraint_Q) &&
+ "Unexpected asm memory constraint");
MachineRegisterInfo &RI = MF->getRegInfo();
const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
@@ -276,7 +275,8 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
}
if (ImmNode->getValueType(0) != MVT::i8) {
- Disp = CurDAG->getTargetConstant(ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8);
+ Disp = CurDAG->getTargetConstant(
+ ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8);
} else {
Disp = ImmOp;
}
@@ -309,11 +309,10 @@ template <> bool AVRDAGToDAGISel::select<ISD::FrameIndex>(SDNode *N) {
// effective address of the final stack slot.
int FI = cast<FrameIndexSDNode>(N)->getIndex();
SDValue TFI =
- CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy(DL));
+ CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy(DL));
- CurDAG->SelectNodeTo(N, AVR::FRMIDX,
- getTargetLowering()->getPointerTy(DL), TFI,
- CurDAG->getTargetConstant(0, SDLoc(N), MVT::i16));
+ CurDAG->SelectNodeTo(N, AVR::FRMIDX, getTargetLowering()->getPointerTy(DL),
+ TFI, CurDAG->getTargetConstant(0, SDLoc(N), MVT::i16));
return true;
}
@@ -380,8 +379,8 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
// Check if the opcode can be converted into an indexed load.
if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT)) {
// It is legal to fold the load into an indexed load.
- ResNode = CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr,
- RegZ);
+ ResNode =
+ CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr, RegZ);
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
} else {
// Selecting an indexed load is not legal, fallback to a normal load.
@@ -391,8 +390,8 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
Ptr, RegZ);
break;
case MVT::i16:
- ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16,
- MVT::Other, Ptr, RegZ);
+ ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16, MVT::Other,
+ Ptr, RegZ);
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
break;
default:
@@ -441,7 +440,7 @@ template <> bool AVRDAGToDAGISel::select<AVRISD::CALL>(SDNode *N) {
Ops.push_back(Chain.getValue(1));
SDNode *ResNode =
- CurDAG->getMachineNode(AVR::ICALL, DL, MVT::Other, MVT::Glue, Ops);
+ CurDAG->getMachineNode(AVR::ICALL, DL, MVT::Other, MVT::Glue, Ops);
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
@@ -532,16 +531,23 @@ bool AVRDAGToDAGISel::trySelect(SDNode *N) {
switch (Opcode) {
// Nodes we fully handle.
- case ISD::FrameIndex: return select<ISD::FrameIndex>(N);
- case ISD::BRIND: return select<ISD::BRIND>(N);
+ case ISD::FrameIndex:
+ return select<ISD::FrameIndex>(N);
+ case ISD::BRIND:
+ return select<ISD::BRIND>(N);
case ISD::UMUL_LOHI:
- case ISD::SMUL_LOHI: return selectMultiplication(N);
+ case ISD::SMUL_LOHI:
+ return selectMultiplication(N);
// Nodes we handle partially. Other cases are autogenerated
- case ISD::STORE: return select<ISD::STORE>(N);
- case ISD::LOAD: return select<ISD::LOAD>(N);
- case AVRISD::CALL: return select<AVRISD::CALL>(N);
- default: return false;
+ case ISD::STORE:
+ return select<ISD::STORE>(N);
+ case ISD::LOAD:
+ return select<ISD::LOAD>(N);
+ case AVRISD::CALL:
+ return select<AVRISD::CALL>(N);
+ default:
+ return false;
}
}
@@ -551,4 +557,3 @@ FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 58a7aed91cdf..a6f2afb87102 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -13,8 +13,8 @@
#include "AVRISelLowering.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -232,8 +232,8 @@ AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
}
const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
-#define NODE(name) \
- case AVRISD::name: \
+#define NODE(name) \
+ case AVRISD::name: \
return #name
switch (Opcode) {
@@ -269,7 +269,7 @@ EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
}
SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
- //:TODO: this function has to be completely rewritten to produce optimal
+ //: TODO: this function has to be completely rewritten to produce optimal
// code, for now it's producing very long but correct code.
unsigned Opc8;
const SDNode *N = Op.getNode();
@@ -527,7 +527,8 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) &&
"LHS and RHS have different types");
assert(((LHS.getSimpleValueType() == MVT::i16) ||
- (LHS.getSimpleValueType() == MVT::i8)) && "invalid comparison type");
+ (LHS.getSimpleValueType() == MVT::i8)) &&
+ "invalid comparison type");
SDValue Cmp;
@@ -856,7 +857,8 @@ void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
/// by AM is legal for this target, for a load/store of the specified type.
bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS, Instruction *I) const {
+ unsigned AS,
+ Instruction *I) const {
int64_t Offs = AM.BaseOffs;
// Allow absolute addresses.
@@ -1003,14 +1005,13 @@ static const MCPhysReg RegList8[] = {
AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14,
AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8};
static const MCPhysReg RegList16[] = {
- AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22,
- AVR::R22R21, AVR::R21R20, AVR::R20R19, AVR::R19R18,
- AVR::R18R17, AVR::R17R16, AVR::R16R15, AVR::R15R14,
- AVR::R14R13, AVR::R13R12, AVR::R12R11, AVR::R11R10,
- AVR::R10R9, AVR::R9R8};
+ AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21,
+ AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16,
+ AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
+ AVR::R11R10, AVR::R10R9, AVR::R9R8};
static_assert(array_lengthof(RegList8) == array_lengthof(RegList16),
- "8-bit and 16-bit register arrays must be of equal length");
+ "8-bit and 16-bit register arrays must be of equal length");
/// Analyze incoming and outgoing function arguments. We need custom C++ code
/// to handle special constraints in the ABI.
@@ -1084,10 +1085,11 @@ analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F,
/// Count the total number of bytes needed to pass or return these arguments.
template <typename ArgT>
-static unsigned getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
+static unsigned
+getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) {
unsigned TotalBytes = 0;
- for (const ArgT& Arg : Args) {
+ for (const ArgT &Arg : Args) {
TotalBytes += Arg.VT.getStoreSize();
}
return TotalBytes;
@@ -1102,7 +1104,8 @@ static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args,
unsigned NumArgs = Args.size();
unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args);
// CanLowerReturn() guarantees this assertion.
- assert(TotalBytes <= 8 && "return values greater than 8 bytes cannot be lowered");
+ assert(TotalBytes <= 8 &&
+ "return values greater than 8 bytes cannot be lowered");
// GCC-ABI says that the size is rounded up to the next even number,
// but actually once it is more than 4 it will always round up to 8.
@@ -1197,7 +1200,7 @@ SDValue AVRTargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
} else {
- // Sanity check.
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
EVT LocVT = VA.getLocVT();
@@ -1406,8 +1409,8 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
///
SDValue AVRTargetLowering::LowerCallResult(
SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1488,17 +1491,14 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Don't emit the ret/reti instruction when the naked attribute is present in
// the function being compiled.
- if (MF.getFunction().getAttributes().hasAttribute(
- AttributeList::FunctionIndex, Attribute::Naked)) {
+ if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) {
return Chain;
}
const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
unsigned RetOpc =
- AFI->isInterruptOrSignalHandler()
- ? AVRISD::RETI_FLAG
- : AVRISD::RET_FLAG;
+ AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_FLAG : AVRISD::RET_FLAG;
RetOps[0] = Chain; // Update chain.
@@ -1572,8 +1572,10 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator I;
- for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I);
- if (I != F->end()) ++I;
+ for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I)
+ ;
+ if (I != F->end())
+ ++I;
// Create loop block.
MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -1636,8 +1638,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
.addReg(ShiftReg2)
.addMBB(LoopBB);
- BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2)
- .addReg(ShiftAmtReg);
+ BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg);
BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
@@ -1725,8 +1726,10 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator I;
- for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I);
- if (I != MF->end()) ++I;
+ for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I)
+ ;
+ if (I != MF->end())
+ ++I;
MF->insert(I, trueMBB);
MF->insert(I, falseMBB);
@@ -1748,11 +1751,12 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
falseMBB->addSuccessor(trueMBB);
// Set up the Phi node to determine where we came from
- BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI), MI.getOperand(0).getReg())
- .addReg(MI.getOperand(1).getReg())
- .addMBB(MBB)
- .addReg(MI.getOperand(2).getReg())
- .addMBB(falseMBB) ;
+ BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI),
+ MI.getOperand(0).getReg())
+ .addReg(MI.getOperand(1).getReg())
+ .addMBB(MBB)
+ .addReg(MI.getOperand(2).getReg())
+ .addMBB(falseMBB);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return trueMBB;
@@ -1779,9 +1783,12 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
case 'w': // Special upper register pairs
return C_RegisterClass;
case 't': // Temporary register
- case 'x': case 'X': // Pointer register pair X
- case 'y': case 'Y': // Pointer register pair Y
- case 'z': case 'Z': // Pointer register pair Z
+ case 'x':
+ case 'X': // Pointer register pair X
+ case 'y':
+ case 'Y': // Pointer register pair Y
+ case 'z':
+ case 'Z': // Pointer register pair Z
return C_Register;
case 'Q': // A memory address based on Y or Z pointer with displacement.
return C_Memory;
@@ -1842,9 +1849,12 @@ AVRTargetLowering::getSingleConstraintMatchWeight(
case 'q':
case 't':
case 'w':
- case 'x': case 'X':
- case 'y': case 'Y':
- case 'z': case 'Z':
+ case 'x':
+ case 'X':
+ case 'y':
+ case 'Y':
+ case 'z':
+ case 'Z':
weight = CW_SpecificReg;
break;
case 'G':
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h
index 8130cf045fa8..3ae036b66bcb 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -133,11 +133,11 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- Register getRegisterByName(const char* RegName, LLT VT,
+ Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
- bool shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL)
- const override {
+ bool shouldSplitFunctionArgumentsAsLittleEndian(
+ const DataLayout &DL) const override {
return false;
}
@@ -179,7 +179,6 @@ private:
SmallVectorImpl<SDValue> &InVals) const;
protected:
-
const AVRSubtarget &Subtarget;
private:
diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td b/llvm/lib/Target/AVR/AVRInstrFormats.td
index 6eb49076efb0..2bcbcdfbf925 100644
--- a/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//
// A generic AVR instruction.
-class AVRInst<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction
-{
+class AVRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+ : Instruction {
let Namespace = "AVR";
dag OutOperandList = outs;
@@ -25,8 +25,7 @@ class AVRInst<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction
/// A 16-bit AVR instruction.
class AVRInst16<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst<outs, ins, asmstr, pattern>
-{
+ : AVRInst<outs, ins, asmstr, pattern> {
field bits<16> Inst;
let Size = 2;
@@ -34,8 +33,7 @@ class AVRInst16<dag outs, dag ins, string asmstr, list<dag> pattern>
/// a 32-bit AVR instruction.
class AVRInst32<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst<outs, ins, asmstr, pattern>
-{
+ : AVRInst<outs, ins, asmstr, pattern> {
field bits<32> Inst;
let Size = 4;
@@ -50,8 +48,7 @@ class AVRInst32<dag outs, dag ins, string asmstr, list<dag> pattern>
// is defined as a pseudo instruction. In AVRExpandPseudoInsts.cpp,
// the instruction is then replaced by two add instructions - one for each byte.
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
let Pattern = pattern;
let isPseudo = 1;
@@ -67,28 +64,26 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FRdRr<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
- list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern>
-{
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
bits<5> rr;
- let Inst{15-12} = opcode;
- let Inst{11-10} = f;
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 10} = f;
let Inst{9} = rr{4};
- let Inst{8-4} = rd;
- let Inst{3-0} = rr{3-0};
+ let Inst{8 - 4} = rd;
+ let Inst{3 - 0} = rr{3 - 0};
}
class FTST<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
- list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern>
-{
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
- let Inst{15-12} = opcode;
- let Inst{11-10} = f;
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 10} = f;
let Inst{9} = rd{4};
- let Inst{8-4} = rd;
- let Inst{3-0} = rd{3-0};
+ let Inst{8 - 4} = rd;
+ let Inst{3 - 0} = rd{3 - 0};
}
//===----------------------------------------------------------------------===//
@@ -96,19 +91,18 @@ class FTST<bits<4> opcode, bits<2> f, dag outs, dag ins, string asmstr,
// <|1001|001r|rrrr|0ttt>
//===----------------------------------------------------------------------===//
class FZRd<bits<3> t, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-9} = 0b001;
+ let Inst{11 - 9} = 0b001;
let Inst{8} = rd{4};
- let Inst{7-4} = rd{3-0};
+ let Inst{7 - 4} = rd{3 - 0};
let Inst{3} = 0;
- let Inst{2-0} = t;
+ let Inst{2 - 0} = t;
}
//===----------------------------------------------------------------------===//
@@ -119,15 +113,14 @@ class FZRd<bits<3> t, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts r16-r31)
//===----------------------------------------------------------------------===//
class FRdK<bits<4> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<4> rd;
bits<8> k;
- let Inst{15-12} = opcode;
- let Inst{11-8} = k{7-4};
- let Inst{7-4} = rd{3-0};
- let Inst{3-0} = k{3-0};
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 8} = k{7 - 4};
+ let Inst{7 - 4} = rd{3 - 0};
+ let Inst{3 - 0} = k{3 - 0};
let isAsCheapAsAMove = 1;
}
@@ -140,14 +133,13 @@ class FRdK<bits<4> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FRd<bits<4> opcode, bits<7> f, dag outs, dag ins, string asmstr,
- list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern>
-{
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> d;
- let Inst{15-12} = opcode;
- let Inst{11-9} = f{6-4};
- let Inst{8-4} = d;
- let Inst{3-0} = f{3-0};
+ let Inst{15 - 12} = opcode;
+ let Inst{11 - 9} = f{6 - 4};
+ let Inst{8 - 4} = d;
+ let Inst{3 - 0} = f{3 - 0};
let DecoderMethod = "decodeFRd";
}
@@ -160,23 +152,22 @@ class FRd<bits<4> opcode, bits<7> f, dag outs, dag ins, string asmstr,
// p = pointer register (1 bit) [1 for Y, 0 for Z]
//===----------------------------------------------------------------------===//
class FSTDLDD<bit type, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<7> memri;
bits<5> reg; // the GP register
- let Inst{15-14} = 0b10;
+ let Inst{15 - 14} = 0b10;
let Inst{13} = memri{5};
let Inst{12} = 0;
- let Inst{11-10} = memri{4-3};
+ let Inst{11 - 10} = memri{4 - 3};
let Inst{9} = type;
let Inst{8} = reg{4};
- let Inst{7-4} = reg{3-0};
+ let Inst{7 - 4} = reg{3 - 0};
let Inst{3} = memri{6};
- let Inst{2-0} = memri{2-0};
+ let Inst{2 - 0} = memri{2 - 0};
}
//===---------------------------------------------------------------------===//
@@ -190,26 +181,24 @@ class FSTDLDD<bit type, dag outs, dag ins, string asmstr, list<dag> pattern>
// Note that the bit labelled 'i' above does not follow a simple pattern,
// so there exists a post encoder method to set it manually.
//===---------------------------------------------------------------------===//
-class FSTLD<bit type, bits<2> mode, dag outs, dag ins,
- string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+class FSTLD<bit type, bits<2> mode, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<2> ptrreg;
bits<5> reg;
- let Inst{15-13} = 0b100;
+ let Inst{15 - 13} = 0b100;
// This bit varies depending on the arguments and the mode.
// We have a post encoder method to set this bit manually.
let Inst{12} = 0;
- let Inst{11-10} = 0b00;
+ let Inst{11 - 10} = 0b00;
let Inst{9} = type;
let Inst{8} = reg{4};
- let Inst{7-4} = reg{3-0};
+ let Inst{7 - 4} = reg{3 - 0};
- let Inst{3-2} = ptrreg{1-0};
- let Inst{1-0} = mode{1-0};
+ let Inst{3 - 2} = ptrreg{1 - 0};
+ let Inst{1 - 0} = mode{1 - 0};
let PostEncoderMethod = "loadStorePostEncoder";
}
@@ -223,22 +212,21 @@ class FSTLD<bit type, bits<2> mode, dag outs, dag ins,
// p = is postincrement
//===---------------------------------------------------------------------===//
class FLPMX<bit e, bit p, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
- bits<5> reg;
+ : AVRInst16<outs, ins, asmstr, pattern> {
+ bits<5> reg;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-9} = 0b000;
- let Inst{8} = reg{4};
+ let Inst{11 - 9} = 0b000;
+ let Inst{8} = reg{4};
- let Inst{7-4} = reg{3-0};
+ let Inst{7 - 4} = reg{3 - 0};
- let Inst{3-2} = 0b01;
- let Inst{1} = e;
- let Inst{0} = p;
+ let Inst{3 - 2} = 0b01;
+ let Inst{1} = e;
+ let Inst{0} = p;
- let DecoderMethod = "decodeFLPMX";
+ let DecoderMethod = "decodeFLPMX";
}
//===----------------------------------------------------------------------===//
@@ -248,14 +236,13 @@ class FLPMX<bit e, bit p, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts even registers)
//===----------------------------------------------------------------------===//
class FMOVWRdRr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> d;
bits<5> r;
- let Inst{15-8} = 0b00000001;
- let Inst{7-4} = d{4-1};
- let Inst{3-0} = r{4-1};
+ let Inst{15 - 8} = 0b00000001;
+ let Inst{7 - 4} = d{4 - 1};
+ let Inst{3 - 0} = r{4 - 1};
let DecoderMethod = "decodeFMOVWRdRr";
}
@@ -267,15 +254,14 @@ class FMOVWRdRr<dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts r16-r31)
//===----------------------------------------------------------------------===//
class FMUL2RdRr<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
- bits<5> rd; // accept 5 bits but only encode the lower 4
- bits<5> rr; // accept 5 bits but only encode the lower 4
+ : AVRInst16<outs, ins, asmstr, pattern> {
+ bits<5> rd; // accept 5 bits but only encode the lower 4
+ bits<5> rr; // accept 5 bits but only encode the lower 4
- let Inst{15-9} = 0b0000001;
+ let Inst{15 - 9} = 0b0000001;
let Inst{8} = f;
- let Inst{7-4} = rd{3-0};
- let Inst{3-0} = rr{3-0};
+ let Inst{7 - 4} = rd{3 - 0};
+ let Inst{3 - 0} = rr{3 - 0};
let DecoderMethod = "decodeFMUL2RdRr";
}
@@ -291,21 +277,19 @@ class FMUL2RdRr<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
// ddd = destination register
// rrr = source register
class FFMULRdRr<bits<2> f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<3> rd;
bits<3> rr;
- let Inst{15-8} = 0b00000011;
+ let Inst{15 - 8} = 0b00000011;
let Inst{7} = f{1};
- let Inst{6-4} = rd;
+ let Inst{6 - 4} = rd;
let Inst{3} = f{0};
- let Inst{2-0} = rr;
+ let Inst{2 - 0} = rr;
let DecoderMethod = "decodeFFMULRdRr";
}
-
//===----------------------------------------------------------------------===//
// Arithmetic word instructions (ADIW / SBIW): <|1001|011f|kkdd|kkkk|>
// f = secondary opcode = 1 bit
@@ -314,16 +298,15 @@ class FFMULRdRr<bits<2> f, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Only accepts r25:24 r27:26 r29:28 r31:30)
//===----------------------------------------------------------------------===//
class FWRdK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
- bits<5> dst; // accept 5 bits but only encode bits 1 and 2
+ : AVRInst16<outs, ins, asmstr, pattern> {
+ bits<5> dst; // accept 5 bits but only encode bits 1 and 2
bits<6> k;
- let Inst{15-9} = 0b1001011;
+ let Inst{15 - 9} = 0b1001011;
let Inst{8} = f;
- let Inst{7-6} = k{5-4};
- let Inst{5-4} = dst{2-1};
- let Inst{3-0} = k{3-0};
+ let Inst{7 - 6} = k{5 - 4};
+ let Inst{5 - 4} = dst{2 - 1};
+ let Inst{3 - 0} = k{3 - 0};
let DecoderMethod = "decodeFWRdK";
}
@@ -335,15 +318,14 @@ class FWRdK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FIORdA<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> d;
bits<6> A;
- let Inst{15-11} = 0b10110;
- let Inst{10-9} = A{5-4};
- let Inst{8-4} = d;
- let Inst{3-0} = A{3-0};
+ let Inst{15 - 11} = 0b10110;
+ let Inst{10 - 9} = A{5 - 4};
+ let Inst{8 - 4} = d;
+ let Inst{3 - 0} = A{3 - 0};
let DecoderMethod = "decodeFIORdA";
}
@@ -355,15 +337,14 @@ class FIORdA<dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class FIOARr<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<6> A;
bits<5> r;
- let Inst{15-11} = 0b10111;
- let Inst{10-9} = A{5-4};
- let Inst{8-4} = r;
- let Inst{3-0} = A{3-0};
+ let Inst{15 - 11} = 0b10111;
+ let Inst{10 - 9} = A{5 - 4};
+ let Inst{8 - 4} = r;
+ let Inst{3 - 0} = A{3 - 0};
let DecoderMethod = "decodeFIOARr";
}
@@ -376,20 +357,19 @@ class FIOARr<dag outs, dag ins, string asmstr, list<dag> pattern>
// b = bit number
//===----------------------------------------------------------------------===//
class FIOBIT<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> A;
bits<3> b;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-10} = 0b10;
- let Inst{9-8} = t;
+ let Inst{11 - 10} = 0b10;
+ let Inst{9 - 8} = t;
- let Inst{7-4} = A{4-1};
+ let Inst{7 - 4} = A{4 - 1};
let Inst{3} = A{0};
- let Inst{2-0} = b{2-0};
+ let Inst{2 - 0} = b{2 - 0};
let DecoderMethod = "decodeFIOBIT";
}
@@ -402,21 +382,20 @@ class FIOBIT<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
// b = bit
//===----------------------------------------------------------------------===//
class FRdB<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<5> rd;
bits<3> b;
- let Inst{15-12} = 0b1111;
+ let Inst{15 - 12} = 0b1111;
let Inst{11} = 0b1;
- let Inst{10-9} = t;
+ let Inst{10 - 9} = t;
let Inst{8} = rd{4};
- let Inst{7-4} = rd{3-0};
+ let Inst{7 - 4} = rd{3 - 0};
let Inst{3} = 0;
- let Inst{2-0} = b;
+ let Inst{2 - 0} = b;
}
// Special encoding for the `DES K` instruction.
@@ -425,17 +404,16 @@ class FRdB<bits<2> t, dag outs, dag ins, string asmstr, list<dag> pattern>
//
// KKKK = 4 bit immediate
class FDES<dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<4> k;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-8} = 0b0100;
+ let Inst{11 - 8} = 0b0100;
- let Inst{7-4} = k;
+ let Inst{7 - 4} = k;
- let Inst{3-0} = 0b1011;
+ let Inst{3 - 0} = 0b1011;
}
//===----------------------------------------------------------------------===//
@@ -444,15 +422,14 @@ class FDES<dag outs, dag ins, string asmstr, list<dag> pattern>
// k = constant address = 7 bits
// s = bit in status register = 3 bits
//===----------------------------------------------------------------------===//
-class FBRsk<bit f, bits<3> s, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+class FBRsk<bit f, bits<3> s, dag outs, dag ins, string asmstr,
+ list<dag> pattern> : AVRInst16<outs, ins, asmstr, pattern> {
bits<7> k;
- let Inst{15-11} = 0b11110;
+ let Inst{15 - 11} = 0b11110;
let Inst{10} = f;
- let Inst{9-3} = k;
- let Inst{2-0} = s;
+ let Inst{9 - 3} = k;
+ let Inst{2 - 0} = s;
}
//===----------------------------------------------------------------------===//
@@ -460,14 +437,12 @@ class FBRsk<bit f, bits<3> s, dag outs, dag ins, string asmstr, list<dag> patter
//===----------------------------------------------------------------------===//
class F16<bits<16> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
let Inst = opcode;
}
class F32<bits<32> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst32<outs, ins, asmstr, pattern>
-{
+ : AVRInst32<outs, ins, asmstr, pattern> {
let Inst = opcode;
}
@@ -477,13 +452,12 @@ class F32<bits<32> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
// k = constant address = 12 bits
//===----------------------------------------------------------------------===//
class FBRk<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<12> k;
- let Inst{15-13} = 0b110;
+ let Inst{15 - 13} = 0b110;
let Inst{12} = f;
- let Inst{11-0} = k;
+ let Inst{11 - 0} = k;
}
//===----------------------------------------------------------------------===//
@@ -492,14 +466,13 @@ class FBRk<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
// k = constant address = 22 bits
//===----------------------------------------------------------------------===//
class F32BRk<bits<3> f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst32<outs, ins, asmstr, pattern>
-{
+ : AVRInst32<outs, ins, asmstr, pattern> {
bits<22> k;
- let Inst{31-25} = 0b1001010;
- let Inst{24-20} = k{21-17};
- let Inst{19-17} = f;
- let Inst{16-0} = k{16-0};
+ let Inst{31 - 25} = 0b1001010;
+ let Inst{24 - 20} = k{21 - 17};
+ let Inst{19 - 17} = f;
+ let Inst{16 - 0} = k{16 - 0};
}
//===----------------------------------------------------------------------===//
@@ -510,38 +483,36 @@ class F32BRk<bits<3> f, dag outs, dag ins, string asmstr, list<dag> pattern>
// (Accepts all registers)
//===----------------------------------------------------------------------===//
class F32DM<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst32<outs, ins, asmstr, pattern>
-{
+ : AVRInst32<outs, ins, asmstr, pattern> {
bits<5> rd;
bits<16> k;
- let Inst{31-28} = 0b1001;
+ let Inst{31 - 28} = 0b1001;
- let Inst{27-26} = 0b00;
+ let Inst{27 - 26} = 0b00;
let Inst{25} = f;
let Inst{24} = rd{4};
- let Inst{23-20} = rd{3-0};
+ let Inst{23 - 20} = rd{3 - 0};
- let Inst{19-16} = 0b0000;
+ let Inst{19 - 16} = 0b0000;
- let Inst{15-0} = k;
+ let Inst{15 - 0} = k;
}
// <|1001|0100|bfff|1000>
class FS<bit b, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<3> s;
- let Inst{15-12} = 0b1001;
+ let Inst{15 - 12} = 0b1001;
- let Inst{11-8} = 0b0100;
+ let Inst{11 - 8} = 0b0100;
let Inst{7} = b;
- let Inst{6-4} = s;
+ let Inst{6 - 4} = s;
- let Inst{3-0} = 0b1000;
+ let Inst{3 - 0} = 0b1000;
}
// Set/clr bit in status flag instructions/
@@ -549,48 +520,42 @@ class FS<bit b, dag outs, dag ins, string asmstr, list<dag> pattern>
// ---------------------
// <|1111|0fkk|kkkk|ksss>
class FSK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
- : AVRInst16<outs, ins, asmstr, pattern>
-{
+ : AVRInst16<outs, ins, asmstr, pattern> {
bits<7> k;
bits<3> s;
- let Inst{15-12} = 0b1111;
+ let Inst{15 - 12} = 0b1111;
let Inst{11} = 0;
let Inst{10} = f;
- let Inst{9-8} = k{6-5};
+ let Inst{9 - 8} = k{6 - 5};
- let Inst{7-4} = k{4-1};
+ let Inst{7 - 4} = k{4 - 1};
let Inst{3} = k{0};
- let Inst{2-0} = s;
+ let Inst{2 - 0} = s;
}
class ExtensionPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let Defs = [SREG];
}
class StorePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let Defs = [SP];
}
class SelectPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let usesCustomInserter = 1;
let Uses = [SREG];
}
class ShiftPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
- : Pseudo<outs, ins, asmstr, pattern>
-{
+ : Pseudo<outs, ins, asmstr, pattern> {
let usesCustomInserter = 1;
let Defs = [SREG];
}
-
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 06f07696bde3..798d08393eae 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -20,9 +20,9 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "AVR.h"
#include "AVRMachineFunctionInfo.h"
@@ -55,13 +55,13 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Register DestLo, DestHi, SrcLo, SrcHi;
TRI.splitReg(DestReg, DestLo, DestHi);
- TRI.splitReg(SrcReg, SrcLo, SrcHi);
+ TRI.splitReg(SrcReg, SrcLo, SrcHi);
// Copy each individual register with the `MOV` instruction.
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
- .addReg(SrcLo, getKillRegState(KillSrc));
+ .addReg(SrcLo, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
- .addReg(SrcHi, getKillRegState(KillSrc));
+ .addReg(SrcHi, getKillRegState(KillSrc));
}
} else {
if (AVR::GPR8RegClass.contains(DestReg, SrcReg)) {
@@ -83,7 +83,7 @@ unsigned AVRInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
switch (MI.getOpcode()) {
case AVR::LDDRdPtrQ:
- case AVR::LDDWRdYQ: { //:FIXME: remove this once PR13375 gets fixed
+ case AVR::LDDWRdYQ: { //: FIXME: remove this once PR13375 gets fixed
if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
MI.getOperand(2).getImm() == 0) {
FrameIndex = MI.getOperand(1).getIndex();
@@ -179,7 +179,7 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opcode = AVR::LDDRdPtrQ;
} else if (TRI->isTypeLegalForClass(*RC, MVT::i16)) {
// Opcode = AVR::LDDWRdPtrQ;
- //:FIXME: remove this once PR13375 gets fixed
+ //: FIXME: remove this once PR13375 gets fixed
Opcode = AVR::LDDWRdYQ;
} else {
llvm_unreachable("Cannot load this register from a stack slot!");
@@ -289,7 +289,7 @@ bool AVRInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
}
// Handle unconditional branches.
- //:TODO: add here jmp
+ //: TODO: add here jmp
if (I->getOpcode() == AVR::RJMPk) {
UnCondBrIter = I;
@@ -399,9 +399,9 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
ArrayRef<MachineOperand> Cond,
- const DebugLoc &DL,
- int *BytesAdded) const {
- if (BytesAdded) *BytesAdded = 0;
+ const DebugLoc &DL, int *BytesAdded) const {
+ if (BytesAdded)
+ *BytesAdded = 0;
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
@@ -421,13 +421,15 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
AVRCC::CondCodes CC = (AVRCC::CondCodes)Cond[0].getImm();
auto &CondMI = *BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
- if (BytesAdded) *BytesAdded += getInstSizeInBytes(CondMI);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(CondMI);
++Count;
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
auto &MI = *BuildMI(&MBB, DL, get(AVR::RJMPk)).addMBB(FBB);
- if (BytesAdded) *BytesAdded += getInstSizeInBytes(MI);
+ if (BytesAdded)
+ *BytesAdded += getInstSizeInBytes(MI);
++Count;
}
@@ -436,7 +438,8 @@ unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- if (BytesRemoved) *BytesRemoved = 0;
+ if (BytesRemoved)
+ *BytesRemoved = 0;
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
@@ -446,7 +449,7 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
if (I->isDebugInstr()) {
continue;
}
- //:TODO: add here the missing jmp instructions once they are implemented
+ //: TODO: add here the missing jmp instructions once they are implemented
// like jmp, {e}ijmp, and other cond branches, ...
if (I->getOpcode() != AVR::RJMPk &&
getCondFromBranchOpc(I->getOpcode()) == AVRCC::COND_INVALID) {
@@ -454,7 +457,8 @@ unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
}
// Remove the branch.
- if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*I);
+ if (BytesRemoved)
+ *BytesRemoved += getInstSizeInBytes(*I);
I->eraseFromParent();
I = MBB.end();
++Count;
@@ -490,7 +494,8 @@ unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR: {
const MachineFunction &MF = *MI.getParent()->getParent();
- const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget());
+ const AVRTargetMachine &TM =
+ static_cast<const AVRTargetMachine &>(MF.getTarget());
const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
const TargetInstrInfo &TII = *STI.getInstrInfo();
@@ -555,20 +560,19 @@ bool AVRInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
}
}
-unsigned AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const {
- // This method inserts a *direct* branch (JMP), despite its name.
- // LLVM calls this method to fixup unconditional branches; it never calls
- // insertBranch or some hypothetical "insertDirectBranch".
- // See lib/CodeGen/RegisterRelaxation.cpp for details.
- // We end up here when a jump is too long for a RJMP instruction.
- auto &MI = *BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
-
- return getInstSizeInBytes(MI);
+void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset,
+ RegScavenger *RS) const {
+ // This method inserts a *direct* branch (JMP), despite its name.
+ // LLVM calls this method to fixup unconditional branches; it never calls
+ // insertBranch or some hypothetical "insertDirectBranch".
+ // See lib/CodeGen/RegisterRelaxation.cpp for details.
+ // We end up here when a jump is too long for a RJMP instruction.
+ BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
+
+ return;
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h
index 11f45865de54..6d0596642fa1 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -107,11 +107,11 @@ public:
bool isBranchOffsetInRange(unsigned BranchOpc,
int64_t BrOffset) const override;
- unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const override;
+ void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+ int64_t BrOffset, RegScavenger *RS) const override;
+
private:
const AVRRegisterInfo RI;
};
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index c7c9656d3bfb..c7f423292da0 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -20,12 +20,13 @@ def SDT_AVRCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDT_AVRCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDT_AVRCall : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_AVRWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
-def SDT_AVRBrcond : SDTypeProfile<0, 2,
- [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>;
+def SDT_AVRBrcond
+ : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>;
def SDT_AVRCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_AVRTst : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_AVRSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
+def SDT_AVRSelectCC
+ : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
//===----------------------------------------------------------------------===//
// AVR Specific Node Definitions
@@ -46,12 +47,12 @@ def AVRcall : SDNode<"AVRISD::CALL", SDT_AVRCall,
def AVRWrapper : SDNode<"AVRISD::WRAPPER", SDT_AVRWrapper>;
-def AVRbrcond : SDNode<"AVRISD::BRCOND", SDT_AVRBrcond,
- [SDNPHasChain, SDNPInGlue]>;
+def AVRbrcond
+ : SDNode<"AVRISD::BRCOND", SDT_AVRBrcond, [SDNPHasChain, SDNPInGlue]>;
def AVRcmp : SDNode<"AVRISD::CMP", SDT_AVRCmp, [SDNPOutGlue]>;
def AVRcmpc : SDNode<"AVRISD::CMPC", SDT_AVRCmp, [SDNPInGlue, SDNPOutGlue]>;
def AVRtst : SDNode<"AVRISD::TST", SDT_AVRTst, [SDNPOutGlue]>;
-def AVRselectcc: SDNode<"AVRISD::SELECT_CC", SDT_AVRSelectCC, [SDNPInGlue]>;
+def AVRselectcc : SDNode<"AVRISD::SELECT_CC", SDT_AVRSelectCC, [SDNPInGlue]>;
// Shift nodes.
def AVRlsl : SDNode<"AVRISD::LSL", SDTIntUnaryOp>;
@@ -80,29 +81,31 @@ def AVRSwap : SDNode<"AVRISD::SWAP", SDTIntUnaryOp>;
// AVR Operands, Complex Patterns and Transformations Definitions.
//===----------------------------------------------------------------------===//
-def imm8_neg_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(-N->getAPIntValue(), SDLoc(N), MVT::i8);
-}]>;
+def imm8_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ -N->getAPIntValue(), SDLoc(N), MVT::i8);
+ }]>;
-def imm16_neg_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(-N->getAPIntValue(), SDLoc(N), MVT::i16);
-}]>;
+def imm16_neg_XFORM
+ : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getAPIntValue(),
+ SDLoc(N), MVT::i16);
+ }]>;
-def imm0_63_neg : PatLeaf<(imm),
-[{
- int64_t val = -N->getSExtValue();
- return val >= 0 && val < 64;
-}], imm16_neg_XFORM>;
+def imm0_63_neg : PatLeaf<(imm), [{
+ int64_t val = -N->getSExtValue();
+ return val >= 0 && val < 64;
+ }],
+ imm16_neg_XFORM>;
def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>;
// imm_com8_XFORM - Return the complement of a imm_com8 value
-def imm_com8_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(~((uint8_t)N->getZExtValue()), SDLoc(N),
- MVT::i8);
-}]>;
+def imm_com8_XFORM
+ : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ ~((uint8_t) N->getZExtValue()), SDLoc(N), MVT::i8);
+ }]>;
// imm_com8 - Match an immediate that is a complement
// of a 8-bit immediate.
@@ -110,59 +113,55 @@ def imm_com8_XFORM : SDNodeXForm<imm, [{
// only used on aliases (Pat<> and InstAlias<>). The actual encoding
// is handled by the destination instructions, which use imm_com8.
def imm_com8_asmoperand : AsmOperandClass { let Name = "ImmCom8"; }
-def imm_com8 : Operand<i8> {
- let ParserMatchClass = imm_com8_asmoperand;
-}
-
-def ioaddr_XFORM : SDNodeXForm<imm,
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()) - offset,
- SDLoc(N), MVT::i8);
-}]>;
-
-def iobitpos8_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(Log2_32(uint8_t(N->getZExtValue())),
- SDLoc(N), MVT::i8);
-}]>;
-
-def iobitposn8_XFORM : SDNodeXForm<imm,
-[{
- return CurDAG->getTargetConstant(Log2_32(uint8_t(~N->getZExtValue())),
- SDLoc(N), MVT::i8);
-}]>;
-
-def ioaddr8 : PatLeaf<(imm),
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- uint64_t val = N->getZExtValue() - offset;
- return val < 0x40;
-}], ioaddr_XFORM>;
-
-def lowioaddr8 : PatLeaf<(imm),
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- uint64_t val = N->getZExtValue() - offset;
- return val < 0x20;
-}], ioaddr_XFORM>;
-
-def ioaddr16 : PatLeaf<(imm),
-[{
- uint8_t offset = Subtarget->getIORegisterOffset();
- uint64_t val = N->getZExtValue() - offset;
- return val < 0x3f;
-}], ioaddr_XFORM>;
-
-def iobitpos8 : PatLeaf<(imm),
-[{
- return isPowerOf2_32(uint8_t(N->getZExtValue()));
-}], iobitpos8_XFORM>;
-
-def iobitposn8 : PatLeaf<(imm),
-[{
- return isPowerOf2_32(uint8_t(~N->getZExtValue()));
-}], iobitposn8_XFORM>;
+def imm_com8 : Operand<i8> { let ParserMatchClass = imm_com8_asmoperand; }
+
+def ioaddr_XFORM
+ : SDNodeXForm<imm, [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ return CurDAG->getTargetConstant(
+ uint8_t(N->getZExtValue()) - offset, SDLoc(N), MVT::i8);
+ }]>;
+
+def iobitpos8_XFORM
+ : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ Log2_32(uint8_t(N->getZExtValue())), SDLoc(N), MVT::i8);
+ }]>;
+
+def iobitposn8_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(
+ Log2_32(uint8_t(~N->getZExtValue())),
+ SDLoc(N), MVT::i8);
+ }]>;
+
+def ioaddr8 : PatLeaf<(imm), [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x40;
+ }],
+ ioaddr_XFORM>;
+
+def lowioaddr8 : PatLeaf<(imm), [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x20;
+ }],
+ ioaddr_XFORM>;
+
+def ioaddr16 : PatLeaf<(imm), [{
+ uint8_t offset = Subtarget->getIORegisterOffset();
+ uint64_t val = N->getZExtValue() - offset;
+ return val < 0x3f;
+ }],
+ ioaddr_XFORM>;
+
+def iobitpos8
+ : PatLeaf<(imm), [{ return isPowerOf2_32(uint8_t(N->getZExtValue())); }],
+ iobitpos8_XFORM>;
+
+def iobitposn8
+ : PatLeaf<(imm), [{ return isPowerOf2_32(uint8_t(~N->getZExtValue())); }],
+ iobitposn8_XFORM>;
def MemriAsmOperand : AsmOperandClass {
let Name = "Memri";
@@ -170,8 +169,7 @@ def MemriAsmOperand : AsmOperandClass {
}
/// Address operand for `reg+imm` used by STD and LDD.
-def memri : Operand<iPTR>
-{
+def memri : Operand<iPTR> {
let MIOperandInfo = (ops PTRDISPREGS, i16imm);
let PrintMethod = "printMemri";
@@ -181,60 +179,47 @@ def memri : Operand<iPTR>
}
// Address operand for `SP+imm` used by STD{W}SPQRr
-def memspi : Operand<iPTR>
-{
- let MIOperandInfo = (ops GPRSP, i16imm);
-}
+def memspi : Operand<iPTR> { let MIOperandInfo = (ops GPRSP, i16imm); }
-def relbrtarget_7 : Operand<OtherVT>
-{
- let PrintMethod = "printPCRelImm";
- let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_7_pcrel>";
+def relbrtarget_7 : Operand<OtherVT> {
+ let PrintMethod = "printPCRelImm";
+ let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_7_pcrel>";
}
-def brtarget_13 : Operand<OtherVT>
-{
- let PrintMethod = "printPCRelImm";
- let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_13_pcrel>";
+def brtarget_13 : Operand<OtherVT> {
+ let PrintMethod = "printPCRelImm";
+ let EncoderMethod = "encodeRelCondBrTarget<AVR::fixup_13_pcrel>";
}
// The target of a 22 or 16-bit call/jmp instruction.
-def call_target : Operand<iPTR>
-{
- let EncoderMethod = "encodeCallTarget";
- let DecoderMethod = "decodeCallTarget";
+def call_target : Operand<iPTR> {
+ let EncoderMethod = "encodeCallTarget";
+ let DecoderMethod = "decodeCallTarget";
}
// A 16-bit address (which can lead to an R_AVR_16 relocation).
-def imm16 : Operand<i16>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_16, 2>";
-}
+def imm16 : Operand<i16> { let EncoderMethod = "encodeImm<AVR::fixup_16, 2>"; }
/// A 6-bit immediate used in the ADIW/SBIW instructions.
-def imm_arith6 : Operand<i16>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_6_adiw, 0>";
+def imm_arith6 : Operand<i16> {
+ let EncoderMethod = "encodeImm<AVR::fixup_6_adiw, 0>";
}
/// An 8-bit immediate inside an instruction with the same format
/// as the `LDI` instruction (the `FRdK` format).
-def imm_ldi8 : Operand<i8>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_ldi, 0>";
+def imm_ldi8 : Operand<i8> {
+ let EncoderMethod = "encodeImm<AVR::fixup_ldi, 0>";
}
/// A 5-bit port number used in SBIC and friends (the `FIOBIT` format).
-def imm_port5 : Operand<i8>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_port5, 0>";
+def imm_port5 : Operand<i8> {
+ let EncoderMethod = "encodeImm<AVR::fixup_port5, 0>";
}
/// A 6-bit port number used in the `IN` instruction and friends (the
/// `FIORdA` format.
-def imm_port6 : Operand<i8>
-{
- let EncoderMethod = "encodeImm<AVR::fixup_port6, 0>";
+def imm_port6 : Operand<i8> {
+ let EncoderMethod = "encodeImm<AVR::fixup_port6, 0>";
}
// Addressing mode pattern reg+imm6
@@ -243,91 +228,85 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], [SDNPWantRoot]>;
// AsmOperand class for a pointer register.
// Used with the LD/ST family of instructions.
// See FSTLD in AVRInstrFormats.td
-def PtrRegAsmOperand : AsmOperandClass
-{
- let Name = "Reg";
-}
+def PtrRegAsmOperand : AsmOperandClass { let Name = "Reg"; }
// A special operand type for the LD/ST instructions.
// It converts the pointer register number into a two-bit field used in the
// instruction.
-def LDSTPtrReg : Operand<i16>
-{
- let MIOperandInfo = (ops PTRREGS);
- let EncoderMethod = "encodeLDSTPtrReg";
+def LDSTPtrReg : Operand<i16> {
+ let MIOperandInfo = (ops PTRREGS);
+ let EncoderMethod = "encodeLDSTPtrReg";
- let ParserMatchClass = PtrRegAsmOperand;
+ let ParserMatchClass = PtrRegAsmOperand;
}
// A special operand type for the LDD/STD instructions.
// It behaves identically to the LD/ST version, except restricts
// the pointer registers to Y and Z.
-def LDDSTDPtrReg : Operand<i16>
-{
- let MIOperandInfo = (ops PTRDISPREGS);
- let EncoderMethod = "encodeLDSTPtrReg";
+def LDDSTDPtrReg : Operand<i16> {
+ let MIOperandInfo = (ops PTRDISPREGS);
+ let EncoderMethod = "encodeLDSTPtrReg";
- let ParserMatchClass = PtrRegAsmOperand;
+ let ParserMatchClass = PtrRegAsmOperand;
}
//===----------------------------------------------------------------------===//
// AVR predicates for subtarget features
//===----------------------------------------------------------------------===//
-def HasSRAM : Predicate<"Subtarget->hasSRAM()">,
- AssemblerPredicate<(all_of FeatureSRAM)>;
+def HasSRAM : Predicate<"Subtarget->hasSRAM()">,
+ AssemblerPredicate<(all_of FeatureSRAM)>;
-def HasJMPCALL : Predicate<"Subtarget->hasJMPCALL()">,
- AssemblerPredicate<(all_of FeatureJMPCALL)>;
+def HasJMPCALL : Predicate<"Subtarget->hasJMPCALL()">,
+ AssemblerPredicate<(all_of FeatureJMPCALL)>;
-def HasIJMPCALL : Predicate<"Subtarget->hasIJMPCALL()">,
- AssemblerPredicate<(all_of FeatureIJMPCALL)>;
+def HasIJMPCALL : Predicate<"Subtarget->hasIJMPCALL()">,
+ AssemblerPredicate<(all_of FeatureIJMPCALL)>;
-def HasEIJMPCALL : Predicate<"Subtarget->hasEIJMPCALL()">,
- AssemblerPredicate<(all_of FeatureEIJMPCALL)>;
+def HasEIJMPCALL : Predicate<"Subtarget->hasEIJMPCALL()">,
+ AssemblerPredicate<(all_of FeatureEIJMPCALL)>;
-def HasADDSUBIW : Predicate<"Subtarget->hasADDSUBIW()">,
- AssemblerPredicate<(all_of FeatureADDSUBIW)>;
+def HasADDSUBIW : Predicate<"Subtarget->hasADDSUBIW()">,
+ AssemblerPredicate<(all_of FeatureADDSUBIW)>;
-def HasSmallStack : Predicate<"Subtarget->HasSmallStack()">,
- AssemblerPredicate<(all_of FeatureSmallStack)>;
+def HasSmallStack : Predicate<"Subtarget->HasSmallStack()">,
+ AssemblerPredicate<(all_of FeatureSmallStack)>;
-def HasMOVW : Predicate<"Subtarget->hasMOVW()">,
- AssemblerPredicate<(all_of FeatureMOVW)>;
+def HasMOVW : Predicate<"Subtarget->hasMOVW()">,
+ AssemblerPredicate<(all_of FeatureMOVW)>;
-def HasLPM : Predicate<"Subtarget->hasLPM()">,
- AssemblerPredicate<(all_of FeatureLPM)>;
+def HasLPM : Predicate<"Subtarget->hasLPM()">,
+ AssemblerPredicate<(all_of FeatureLPM)>;
-def HasLPMX : Predicate<"Subtarget->hasLPMX()">,
- AssemblerPredicate<(all_of FeatureLPMX)>;
+def HasLPMX : Predicate<"Subtarget->hasLPMX()">,
+ AssemblerPredicate<(all_of FeatureLPMX)>;
-def HasELPM : Predicate<"Subtarget->hasELPM()">,
- AssemblerPredicate<(all_of FeatureELPM)>;
+def HasELPM : Predicate<"Subtarget->hasELPM()">,
+ AssemblerPredicate<(all_of FeatureELPM)>;
-def HasELPMX : Predicate<"Subtarget->hasELPMX()">,
- AssemblerPredicate<(all_of FeatureELPMX)>;
+def HasELPMX : Predicate<"Subtarget->hasELPMX()">,
+ AssemblerPredicate<(all_of FeatureELPMX)>;
-def HasSPM : Predicate<"Subtarget->hasSPM()">,
- AssemblerPredicate<(all_of FeatureSPM)>;
+def HasSPM : Predicate<"Subtarget->hasSPM()">,
+ AssemblerPredicate<(all_of FeatureSPM)>;
-def HasSPMX : Predicate<"Subtarget->hasSPMX()">,
- AssemblerPredicate<(all_of FeatureSPMX)>;
+def HasSPMX : Predicate<"Subtarget->hasSPMX()">,
+ AssemblerPredicate<(all_of FeatureSPMX)>;
-def HasDES : Predicate<"Subtarget->hasDES()">,
- AssemblerPredicate<(all_of FeatureDES)>;
+def HasDES : Predicate<"Subtarget->hasDES()">,
+ AssemblerPredicate<(all_of FeatureDES)>;
-def SupportsRMW : Predicate<"Subtarget->supportsRMW()">,
- AssemblerPredicate<(all_of FeatureRMW)>;
+def SupportsRMW : Predicate<"Subtarget->supportsRMW()">,
+ AssemblerPredicate<(all_of FeatureRMW)>;
def SupportsMultiplication : Predicate<"Subtarget->supportsMultiplication()">,
- AssemblerPredicate<(all_of FeatureMultiplication)>;
+ AssemblerPredicate<(all_of FeatureMultiplication)>;
-def HasBREAK : Predicate<"Subtarget->hasBREAK()">,
- AssemblerPredicate<(all_of FeatureBREAK)>;
+def HasBREAK : Predicate<"Subtarget->hasBREAK()">,
+ AssemblerPredicate<(all_of FeatureBREAK)>;
def HasTinyEncoding : Predicate<"Subtarget->hasTinyEncoding()">,
- AssemblerPredicate<(all_of FeatureTinyEncoding)>;
-
+ AssemblerPredicate<(all_of FeatureTinyEncoding)>;
// AVR specific condition code. These correspond to AVR_*_COND in
// AVRInstrInfo.td. They must be kept in synch.
@@ -340,7 +319,6 @@ def AVR_COND_LO : PatLeaf<(i8 5)>;
def AVR_COND_MI : PatLeaf<(i8 6)>;
def AVR_COND_PL : PatLeaf<(i8 7)>;
-
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// AVR Instruction list
@@ -352,43 +330,49 @@ def AVR_COND_PL : PatLeaf<(i8 7)>;
// pointer before prolog-epilog rewriting occurs.
// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
// sub / add which can clobber SREG.
-let Defs = [SP, SREG],
-Uses = [SP] in
-{
+let Defs = [SP, SREG], Uses = [SP] in {
def ADJCALLSTACKDOWN : Pseudo<(outs),
- (ins i16imm:$amt, i16imm:$amt2),
- "#ADJCALLSTACKDOWN",
- [(AVRcallseq_start timm:$amt, timm:$amt2)]>;
+ (ins i16imm
+ : $amt, i16imm
+ : $amt2),
+ "#ADJCALLSTACKDOWN", [(AVRcallseq_start timm
+ : $amt, timm
+ : $amt2)]>;
// R31R30 is used to update SP. It is normally free because it is a
// call-clobbered register but it is necessary to set it as a def as the
// register allocator might use it in rare cases (for rematerialization, it
// seems). hasSideEffects needs to be set to true so this instruction isn't
// considered dead.
- let Defs = [R31R30],
- hasSideEffects=1 in
- def ADJCALLSTACKUP : Pseudo<(outs),
- (ins i16imm:$amt1, i16imm:$amt2),
- "#ADJCALLSTACKUP",
- [(AVRcallseq_end timm:$amt1, timm:$amt2)]>;
+ let Defs = [R31R30], hasSideEffects = 1 in def ADJCALLSTACKUP
+ : Pseudo<(outs),
+ (ins i16imm
+ : $amt1, i16imm
+ : $amt2),
+ "#ADJCALLSTACKUP", [(AVRcallseq_end timm
+ : $amt1, timm
+ : $amt2)]>;
}
//===----------------------------------------------------------------------===//
// Addition
//===----------------------------------------------------------------------===//
-let isCommutable = 1,
-Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let isCommutable = 1, Constraints = "$src = $rd", Defs = [SREG] in {
// ADD Rd, Rr
// Adds two 8-bit registers.
- def ADDRdRr : FRdRr<0b0000,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "add\t$rd, $rr",
- [(set i8:$rd, (add i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def ADDRdRr
+ : FRdRr<0b0000, 0b11,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "add\t$rd, $rr",
+ [(set i8
+ : $rd, (add i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ADDW Rd+1:Rd, Rr+1:Rr
// Pseudo instruction to add four 8-bit registers as two 16-bit values.
@@ -396,22 +380,34 @@ Defs = [SREG] in
// Expands to:
// add Rd, Rr
// adc Rd+1, Rr+1
- def ADDWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "addw\t$rd, $rr",
- [(set i16:$rd, (add i16:$src, i16:$rr)),
- (implicit SREG)]>;
+ def ADDWRdRr
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "addw\t$rd, $rr",
+ [(set i16
+ : $rd, (add i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)]>;
// ADC Rd, Rr
// Adds two 8-bit registers with carry.
- let Uses = [SREG] in
- def ADCRdRr : FRdRr<0b0001,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "adc\t$rd, $rr",
- [(set i8:$rd, (adde i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ let Uses = [SREG] in def ADCRdRr
+ : FRdRr<0b0001, 0b11,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "adc\t$rd, $rr",
+ [(set i8
+ : $rd, (adde i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ADCW Rd+1:Rd, Rr+1:Rr
// Pseudo instruction to add four 8-bit registers as two 16-bit values with
@@ -420,39 +416,56 @@ Defs = [SREG] in
// Expands to:
// adc Rd, Rr
// adc Rd+1, Rr+1
- let Uses = [SREG] in
- def ADCWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "adcw\t$rd, $rr",
- [(set i16:$rd, (adde i16:$src, i16:$rr)),
- (implicit SREG)]>;
+ let Uses = [SREG] in def ADCWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "adcw\t$rd, $rr", [
+ (set i16
+ : $rd, (adde i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
// AIDW Rd, k
// Adds an immediate 6-bit value K to Rd, placing the result in Rd.
- def ADIWRdK : FWRdK<0b0,
- (outs IWREGS:$rd),
- (ins IWREGS:$src, imm_arith6:$k),
- "adiw\t$rd, $k",
- [(set i16:$rd, (add i16:$src, uimm6:$k)),
- (implicit SREG)]>,
- Requires<[HasADDSUBIW]>;
+ def ADIWRdK
+ : FWRdK<0b0,
+ (outs IWREGS
+ : $rd),
+ (ins IWREGS
+ : $src, imm_arith6
+ : $k),
+ "adiw\t$rd, $k",
+ [(set i16
+ : $rd, (add i16
+ : $src, uimm6
+ : $k)),
+ (implicit SREG)]>,
+ Requires<[HasADDSUBIW]>;
}
//===----------------------------------------------------------------------===//
// Subtraction
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let Constraints = "$src = $rd", Defs = [SREG] in {
// SUB Rd, Rr
// Subtracts the 8-bit value of Rr from Rd and places the value in Rd.
- def SUBRdRr : FRdRr<0b0001,
- 0b10,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "sub\t$rd, $rr",
- [(set i8:$rd, (sub i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def SUBRdRr
+ : FRdRr<0b0001, 0b10,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "sub\t$rd, $rr",
+ [(set i8
+ : $rd, (sub i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// SUBW Rd+1:Rd, Rr+1:Rr
// Subtracts two 16-bit values and places the result into Rd.
@@ -460,295 +473,429 @@ Defs = [SREG] in
// Expands to:
// sub Rd, Rr
// sbc Rd+1, Rr+1
- def SUBWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "subw\t$rd, $rr",
- [(set i16:$rd, (sub i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def SUBIRdK : FRdK<0b0101,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "subi\t$rd, $k",
- [(set i8:$rd, (sub i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def SUBWRdRr
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "subw\t$rd, $rr",
+ [(set i16
+ : $rd, (sub i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)]>;
+
+ def SUBIRdK
+ : FRdK<0b0101,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "subi\t$rd, $k",
+ [(set i8
+ : $rd, (sub i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// SUBIW Rd+1:Rd, K+1:K
//
// Expands to:
// subi Rd, K
// sbci Rd+1, K+1
- def SUBIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$rr),
- "subiw\t$rd, $rr",
- [(set i16:$rd, (sub i16:$src, imm:$rr)),
- (implicit SREG)]>;
-
- def SBIWRdK : FWRdK<0b1,
- (outs IWREGS:$rd),
- (ins IWREGS:$src, imm_arith6:$k),
- "sbiw\t$rd, $k",
- [(set i16:$rd, (sub i16:$src, uimm6:$k)),
- (implicit SREG)]>,
- Requires<[HasADDSUBIW]>;
+ def SUBIWRdK
+ : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $rr),
+ "subiw\t$rd, $rr",
+ [(set i16
+ : $rd, (sub i16
+ : $src, imm
+ : $rr)),
+ (implicit SREG)]>;
+
+ def SBIWRdK
+ : FWRdK<0b1,
+ (outs IWREGS
+ : $rd),
+ (ins IWREGS
+ : $src, imm_arith6
+ : $k),
+ "sbiw\t$rd, $k",
+ [(set i16
+ : $rd, (sub i16
+ : $src, uimm6
+ : $k)),
+ (implicit SREG)]>,
+ Requires<[HasADDSUBIW]>;
// Subtract with carry operations which must read the carry flag in SREG.
- let Uses = [SREG] in
- {
- def SBCRdRr : FRdRr<0b0000,
- 0b10,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "sbc\t$rd, $rr",
- [(set i8:$rd, (sube i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ let Uses = [SREG] in {
+ def SBCRdRr
+ : FRdRr<0b0000, 0b10,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "sbc\t$rd, $rr",
+ [(set i8
+ : $rd, (sube i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// SBCW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// sbc Rd, Rr
// sbc Rd+1, Rr+1
- def SBCWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "sbcw\t$rd, $rr",
- [(set i16:$rd, (sube i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def SBCIRdK : FRdK<0b0100,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "sbci\t$rd, $k",
- [(set i8:$rd, (sube i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def SBCWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "sbcw\t$rd, $rr", [
+ (set i16
+ : $rd, (sube i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
+
+ def SBCIRdK
+ : FRdK<0b0100,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "sbci\t$rd, $k",
+ [(set i8
+ : $rd, (sube i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// SBCIW Rd+1:Rd, K+1:K
// sbci Rd, K
// sbci Rd+1, K+1
- def SBCIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$rr),
- "sbciw\t$rd, $rr",
- [(set i16:$rd, (sube i16:$src, imm:$rr)),
- (implicit SREG)]>;
+ def SBCIWRdK : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $rr),
+ "sbciw\t$rd, $rr", [
+ (set i16
+ : $rd, (sube i16
+ : $src, imm
+ : $rr)),
+ (implicit SREG)
+ ]>;
}
}
//===----------------------------------------------------------------------===//
// Increment and Decrement
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
- def INCRd : FRd<0b1001,
- 0b0100011,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "inc\t$rd",
- [(set i8:$rd, (add i8:$src, 1)), (implicit SREG)]>;
-
- def DECRd : FRd<0b1001,
- 0b0101010,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "dec\t$rd",
- [(set i8:$rd, (add i8:$src, -1)), (implicit SREG)]>;
+let Constraints = "$src = $rd", Defs = [SREG] in {
+ def INCRd
+ : FRd<0b1001, 0b0100011,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "inc\t$rd", [(set i8
+ : $rd, (add i8
+ : $src, 1)),
+ (implicit SREG)]>;
+
+ def DECRd
+ : FRd<0b1001, 0b0101010,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "dec\t$rd", [(set i8
+ : $rd, (add i8
+ : $src, -1)),
+ (implicit SREG)]>;
}
//===----------------------------------------------------------------------===//
// Multiplication
//===----------------------------------------------------------------------===//
-let isCommutable = 1,
-Defs = [R1, R0, SREG] in
-{
+let isCommutable = 1, Defs = [R1, R0, SREG] in {
// MUL Rd, Rr
// Multiplies Rd by Rr and places the result into R1:R0.
let usesCustomInserter = 1 in {
- def MULRdRr : FRdRr<0b1001, 0b11,
- (outs),
- (ins GPR8:$lhs, GPR8:$rhs),
+ def MULRdRr : FRdRr<0b1001, 0b11, (outs),
+ (ins GPR8
+ : $lhs, GPR8
+ : $rhs),
"mul\t$lhs, $rhs",
[/*(set R1, R0, (smullohi i8:$lhs, i8:$rhs))*/]>,
- Requires<[SupportsMultiplication]>;
+ Requires<[SupportsMultiplication]>;
- def MULSRdRr : FMUL2RdRr<0,
- (outs),
- (ins LD8:$lhs, LD8:$rhs),
- "muls\t$lhs, $rhs",
- []>,
+ def MULSRdRr : FMUL2RdRr<0, (outs),
+ (ins LD8
+ : $lhs, LD8
+ : $rhs),
+ "muls\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
}
- def MULSURdRr : FMUL2RdRr<1,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "mulsu\t$lhs, $rhs",
- []>,
+ def MULSURdRr : FMUL2RdRr<1, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "mulsu\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
- def FMUL : FFMULRdRr<0b01,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "fmul\t$lhs, $rhs",
- []>,
+ def FMUL : FFMULRdRr<0b01, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "fmul\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
- def FMULS : FFMULRdRr<0b10,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "fmuls\t$lhs, $rhs",
- []>,
+ def FMULS : FFMULRdRr<0b10, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "fmuls\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
- def FMULSU : FFMULRdRr<0b11,
- (outs),
- (ins LD8lo:$lhs, LD8lo:$rhs),
- "fmulsu\t$lhs, $rhs",
- []>,
+ def FMULSU : FFMULRdRr<0b11, (outs),
+ (ins LD8lo
+ : $lhs, LD8lo
+ : $rhs),
+ "fmulsu\t$lhs, $rhs", []>,
Requires<[SupportsMultiplication]>;
}
-let Defs = [R15, R14, R13, R12, R11, R10, R9,
- R8, R7, R6, R5, R4, R3, R2, R1, R0] in
-def DESK : FDES<(outs),
- (ins i8imm:$k),
- "des\t$k",
- []>,
- Requires<[HasDES]>;
+let Defs =
+ [R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R1,
+ R0] in def DESK : FDES<(outs),
+ (ins i8imm
+ : $k),
+ "des\t$k", []>,
+ Requires<[HasDES]>;
//===----------------------------------------------------------------------===//
// Logic
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let Constraints = "$src = $rd", Defs = [SREG] in {
// Register-Register logic instructions (which have the
// property of commutativity).
- let isCommutable = 1 in
- {
- def ANDRdRr : FRdRr<0b0010,
- 0b00,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "and\t$rd, $rr",
- [(set i8:$rd, (and i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ let isCommutable = 1 in {
+ def ANDRdRr
+ : FRdRr<0b0010, 0b00,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "and\t$rd, $rr",
+ [(set i8
+ : $rd, (and i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ANDW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// and Rd, Rr
// and Rd+1, Rr+1
- def ANDWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "andw\t$rd, $rr",
- [(set i16:$rd, (and i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def ORRdRr : FRdRr<0b0010,
- 0b10,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "or\t$rd, $rr",
- [(set i8:$rd, (or i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def ANDWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "andw\t$rd, $rr", [
+ (set i16
+ : $rd, (and i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
+
+ def ORRdRr
+ : FRdRr<0b0010, 0b10,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "or\t$rd, $rr",
+ [(set i8
+ : $rd, (or i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// ORW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// or Rd, Rr
// or Rd+1, Rr+1
- def ORWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "orw\t$rd, $rr",
- [(set i16:$rd, (or i16:$src, i16:$rr)),
- (implicit SREG)]>;
-
- def EORRdRr : FRdRr<0b0010,
- 0b01,
- (outs GPR8:$rd),
- (ins GPR8:$src, GPR8:$rr),
- "eor\t$rd, $rr",
- [(set i8:$rd, (xor i8:$src, i8:$rr)),
- (implicit SREG)]>;
+ def ORWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "orw\t$rd, $rr", [
+ (set i16
+ : $rd, (or i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
+
+ def EORRdRr
+ : FRdRr<0b0010, 0b01,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, GPR8
+ : $rr),
+ "eor\t$rd, $rr",
+ [(set i8
+ : $rd, (xor i8
+ : $src, i8
+ : $rr)),
+ (implicit SREG)]>;
// EORW Rd+1:Rd, Rr+1:Rr
//
// Expands to:
// eor Rd, Rr
// eor Rd+1, Rr+1
- def EORWRdRr : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src, DREGS:$rr),
- "eorw\t$rd, $rr",
- [(set i16:$rd, (xor i16:$src, i16:$rr)),
- (implicit SREG)]>;
+ def EORWRdRr : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src, DREGS
+ : $rr),
+ "eorw\t$rd, $rr", [
+ (set i16
+ : $rd, (xor i16
+ : $src, i16
+ : $rr)),
+ (implicit SREG)
+ ]>;
}
- def ANDIRdK : FRdK<0b0111,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "andi\t$rd, $k",
- [(set i8:$rd, (and i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def ANDIRdK
+ : FRdK<0b0111,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "andi\t$rd, $k",
+ [(set i8
+ : $rd, (and i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// ANDI Rd+1:Rd, K+1:K
//
// Expands to:
// andi Rd, K
// andi Rd+1, K+1
- def ANDIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$k),
- "andiw\t$rd, $k",
- [(set i16:$rd, (and i16:$src, imm:$k)),
- (implicit SREG)]>;
-
- def ORIRdK : FRdK<0b0110,
- (outs LD8:$rd),
- (ins LD8:$src, imm_ldi8:$k),
- "ori\t$rd, $k",
- [(set i8:$rd, (or i8:$src, imm:$k)),
- (implicit SREG)]>;
+ def ANDIWRdK
+ : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $k),
+ "andiw\t$rd, $k",
+ [(set i16
+ : $rd, (and i16
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
+
+ def ORIRdK
+ : FRdK<0b0110,
+ (outs LD8
+ : $rd),
+ (ins LD8
+ : $src, imm_ldi8
+ : $k),
+ "ori\t$rd, $k",
+ [(set i8
+ : $rd, (or i8
+ : $src, imm
+ : $k)),
+ (implicit SREG)]>;
// ORIW Rd+1:Rd, K+1,K
//
// Expands to:
// ori Rd, K
// ori Rd+1, K+1
- def ORIWRdK : Pseudo<(outs DLDREGS:$rd),
- (ins DLDREGS:$src, i16imm:$rr),
- "oriw\t$rd, $rr",
- [(set i16:$rd, (or i16:$src, imm:$rr)),
- (implicit SREG)]>;
+ def ORIWRdK
+ : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DLDREGS
+ : $src, i16imm
+ : $rr),
+ "oriw\t$rd, $rr",
+ [(set i16
+ : $rd, (or i16
+ : $src, imm
+ : $rr)),
+ (implicit SREG)]>;
}
//===----------------------------------------------------------------------===//
// One's/Two's Complement
//===----------------------------------------------------------------------===//
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
- def COMRd : FRd<0b1001,
- 0b0100000,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "com\t$rd",
- [(set i8:$rd, (not i8:$src)), (implicit SREG)]>;
+let Constraints = "$src = $rd", Defs = [SREG] in {
+ def COMRd
+ : FRd<0b1001, 0b0100000,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "com\t$rd", [(set i8
+ : $rd, (not i8
+ : $src)),
+ (implicit SREG)]>;
// COMW Rd+1:Rd
//
// Expands to:
// com Rd
// com Rd+1
- def COMWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def COMWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"comw\t$rd",
- [(set i16:$rd, (not i16:$src)), (implicit SREG)]>;
+ [(set i16
+ : $rd, (not i16
+ : $src)),
+ (implicit SREG)]>;
- def NEGRd : FRd<0b1001,
- 0b0100001,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "neg\t$rd",
- [(set i8:$rd, (ineg i8:$src)), (implicit SREG)]>;
+ def NEGRd
+ : FRd<0b1001, 0b0100001,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "neg\t$rd", [(set i8
+ : $rd, (ineg i8
+ : $src)),
+ (implicit SREG)]>;
// NEGW Rd+1:Rd
//
@@ -756,155 +903,126 @@ Defs = [SREG] in
// neg Rd+1
// neg Rd
// sbc Rd+1, r1
- def NEGWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def NEGWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"negw\t$rd",
- [(set i16:$rd, (ineg i16:$src)), (implicit SREG)]>;
+ [(set i16
+ : $rd, (ineg i16
+ : $src)),
+ (implicit SREG)]>;
}
// TST Rd
// Test for zero of minus.
// This operation is identical to a `Rd AND Rd`.
-def : InstAlias<"tst\t$rd", (ANDRdRr GPR8:$rd, GPR8:$rd)>;
+def : InstAlias<"tst\t$rd", (ANDRdRr GPR8 : $rd, GPR8 : $rd)>;
// SBR Rd, K
//
// Mnemonic alias to 'ORI Rd, K'. Same bit pattern, same operands,
// same everything.
def : InstAlias<"sbr\t$rd, $k",
- (ORIRdK LD8:$rd, imm_ldi8:$k),
+ (ORIRdK LD8
+ : $rd, imm_ldi8
+ : $k),
/* Disable display, so we don't override ORI */ 0>;
//===----------------------------------------------------------------------===//
// Jump instructions
//===----------------------------------------------------------------------===//
-let isBarrier = 1,
-isBranch = 1,
-isTerminator = 1 in
-{
- def RJMPk : FBRk<0,
- (outs),
- (ins brtarget_13:$target),
- "rjmp\t$target",
- [(br bb:$target)]>;
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+ def RJMPk : FBRk<0, (outs),
+ (ins brtarget_13
+ : $target),
+ "rjmp\t$target", [(br bb
+ : $target)]>;
let isIndirectBranch = 1,
- Uses = [R31R30] in
- def IJMP : F16<0b1001010000001001,
- (outs),
- (ins),
- "ijmp",
- []>,
- Requires<[HasIJMPCALL]>;
+ Uses = [R31R30] in def IJMP
+ : F16<0b1001010000001001, (outs), (ins), "ijmp", []>,
+ Requires<[HasIJMPCALL]>;
let isIndirectBranch = 1,
- Uses = [R31R30] in
- def EIJMP : F16<0b1001010000011001,
- (outs),
- (ins),
- "eijmp",
- []>,
- Requires<[HasEIJMPCALL]>;
-
- def JMPk : F32BRk<0b110,
- (outs),
- (ins call_target:$k),
- "jmp\t$k",
- []>,
+ Uses = [R31R30] in def EIJMP
+ : F16<0b1001010000011001, (outs), (ins), "eijmp", []>,
+ Requires<[HasEIJMPCALL]>;
+
+ def JMPk : F32BRk<0b110, (outs),
+ (ins call_target
+ : $k),
+ "jmp\t$k", []>,
Requires<[HasJMPCALL]>;
}
//===----------------------------------------------------------------------===//
// Call instructions
//===----------------------------------------------------------------------===//
-let isCall = 1 in
-{
+let isCall = 1 in {
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
- let Uses = [SP] in
- def RCALLk : FBRk<1,
- (outs),
- (ins brtarget_13:$target),
- "rcall\t$target",
- []>;
+ let Uses = [SP] in def RCALLk : FBRk<1, (outs),
+ (ins brtarget_13
+ : $target),
+ "rcall\t$target", []>;
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
- let Uses = [SP, R31R30] in
- def ICALL : F16<0b1001010100001001,
- (outs),
- (ins variable_ops),
- "icall",
- []>,
- Requires<[HasIJMPCALL]>;
+ let Uses = [SP, R31R30] in def ICALL
+ : F16<0b1001010100001001, (outs), (ins variable_ops), "icall", []>,
+ Requires<[HasIJMPCALL]>;
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
- let Uses = [SP, R31R30] in
- def EICALL : F16<0b1001010100011001,
- (outs),
- (ins variable_ops),
- "eicall",
- []>,
- Requires<[HasEIJMPCALL]>;
+ let Uses = [SP, R31R30] in def EICALL
+ : F16<0b1001010100011001, (outs), (ins variable_ops), "eicall", []>,
+ Requires<[HasEIJMPCALL]>;
// SP is marked as a use to prevent stack-pointer assignments that appear
// immediately before calls from potentially appearing dead.
//
- //:TODO: the imm field can be either 16 or 22 bits in devices with more
+ //: TODO: the imm field can be either 16 or 22 bits in devices with more
// than 64k of ROM, fix it once we support the largest devices.
- let Uses = [SP] in
- def CALLk : F32BRk<0b111,
- (outs),
- (ins call_target:$k),
- "call\t$k",
- [(AVRcall imm:$k)]>,
- Requires<[HasJMPCALL]>;
+ let Uses = [SP] in def CALLk : F32BRk<0b111, (outs),
+ (ins call_target
+ : $k),
+ "call\t$k", [(AVRcall imm
+ : $k)]>,
+ Requires<[HasJMPCALL]>;
}
//===----------------------------------------------------------------------===//
// Return instructions.
//===----------------------------------------------------------------------===//
-let isTerminator = 1,
-isReturn = 1,
-isBarrier = 1 in
-{
- def RET : F16<0b1001010100001000,
- (outs),
- (ins),
- "ret",
- [(AVRretflag)]>;
-
- def RETI : F16<0b1001010100011000,
- (outs),
- (ins),
- "reti",
- [(AVRretiflag)]>;
+let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ def RET : F16<0b1001010100001000, (outs), (ins), "ret", [(AVRretflag)]>;
+
+ def RETI : F16<0b1001010100011000, (outs), (ins), "reti", [(AVRretiflag)]>;
}
//===----------------------------------------------------------------------===//
// Compare operations.
//===----------------------------------------------------------------------===//
-let Defs = [SREG] in
-{
+let Defs = [SREG] in {
// CPSE Rd, Rr
// Compare Rd and Rr, skipping the next instruction if they are equal.
- let isBarrier = 1,
- isBranch = 1,
- isTerminator = 1 in
- def CPSE : FRdRr<0b0001,
- 0b00,
- (outs),
- (ins GPR8:$rd, GPR8:$rr),
- "cpse\t$rd, $rr",
- []>;
-
- def CPRdRr : FRdRr<0b0001,
- 0b01,
- (outs),
- (ins GPR8:$rd, GPR8:$rr),
- "cp\t$rd, $rr",
- [(AVRcmp i8:$rd, i8:$rr), (implicit SREG)]>;
+ let isBarrier = 1, isBranch = 1,
+ isTerminator = 1 in def CPSE : FRdRr<0b0001, 0b00, (outs),
+ (ins GPR8
+ : $rd, GPR8
+ : $rr),
+ "cpse\t$rd, $rr", []>;
+
+ def CPRdRr
+ : FRdRr<0b0001, 0b01, (outs),
+ (ins GPR8
+ : $rd, GPR8
+ : $rr),
+ "cp\t$rd, $rr", [(AVRcmp i8
+ : $rd, i8
+ : $rr),
+ (implicit SREG)]>;
// CPW Rd+1:Rd, Rr+1:Rr
//
@@ -912,251 +1030,256 @@ let Defs = [SREG] in
// cp Rd, Rr
// cpc Rd+1, Rr+1
def CPWRdRr : Pseudo<(outs),
- (ins DREGS:$src, DREGS:$src2),
+ (ins DREGS
+ : $src, DREGS
+ : $src2),
"cpw\t$src, $src2",
- [(AVRcmp i16:$src, i16:$src2), (implicit SREG)]>;
+ [(AVRcmp i16
+ : $src, i16
+ : $src2),
+ (implicit SREG)]>;
- let Uses = [SREG] in
- def CPCRdRr : FRdRr<0b0000,
- 0b01,
- (outs),
- (ins GPR8:$rd, GPR8:$rr),
- "cpc\t$rd, $rr",
- [(AVRcmpc i8:$rd, i8:$rr), (implicit SREG)]>;
+ let Uses = [SREG] in def CPCRdRr
+ : FRdRr<0b0000, 0b01, (outs),
+ (ins GPR8
+ : $rd, GPR8
+ : $rr),
+ "cpc\t$rd, $rr", [(AVRcmpc i8
+ : $rd, i8
+ : $rr),
+ (implicit SREG)]>;
// CPCW Rd+1:Rd. Rr+1:Rr
//
// Expands to:
// cpc Rd, Rr
// cpc Rd+1, Rr+1
- let Uses = [SREG] in
- def CPCWRdRr : Pseudo<(outs),
- (ins DREGS:$src, DREGS:$src2),
- "cpcw\t$src, $src2",
- [(AVRcmpc i16:$src, i16:$src2), (implicit SREG)]>;
+ let Uses = [SREG] in def CPCWRdRr
+ : Pseudo<(outs),
+ (ins DREGS
+ : $src, DREGS
+ : $src2),
+ "cpcw\t$src, $src2",
+ [(AVRcmpc i16
+ : $src, i16
+ : $src2),
+ (implicit SREG)]>;
// CPI Rd, K
// Compares a register with an 8 bit immediate.
- def CPIRdK : FRdK<0b0011,
- (outs),
- (ins LD8:$rd, imm_ldi8:$k),
- "cpi\t$rd, $k",
- [(AVRcmp i8:$rd, imm:$k), (implicit SREG)]>;
+ def CPIRdK
+ : FRdK<0b0011, (outs),
+ (ins LD8
+ : $rd, imm_ldi8
+ : $k),
+ "cpi\t$rd, $k", [(AVRcmp i8
+ : $rd, imm
+ : $k),
+ (implicit SREG)]>;
}
//===----------------------------------------------------------------------===//
// Register conditional skipping/branching operations.
//===----------------------------------------------------------------------===//
-let isBranch = 1,
-isTerminator = 1 in
-{
+let isBranch = 1, isTerminator = 1 in {
// Conditional skipping on GPR register bits, and
// conditional skipping on IO register bits.
- let isBarrier = 1 in
- {
- def SBRCRrB : FRdB<0b10,
- (outs),
- (ins GPR8:$rr, i8imm:$b),
- "sbrc\t$rr, $b",
- []>;
-
- def SBRSRrB : FRdB<0b11,
- (outs),
- (ins GPR8:$rr, i8imm:$b),
- "sbrs\t$rr, $b",
- []>;
-
- def SBICAb : FIOBIT<0b01,
- (outs),
- (ins imm_port5:$a, i8imm:$b),
- "sbic\t$a, $b",
- []>;
-
- def SBISAb : FIOBIT<0b11,
- (outs),
- (ins imm_port5:$a, i8imm:$b),
- "sbis\t$a, $b",
- []>;
+ let isBarrier = 1 in {
+ def SBRCRrB : FRdB<0b10, (outs),
+ (ins GPR8
+ : $rr, i8imm
+ : $b),
+ "sbrc\t$rr, $b", []>;
+
+ def SBRSRrB : FRdB<0b11, (outs),
+ (ins GPR8
+ : $rr, i8imm
+ : $b),
+ "sbrs\t$rr, $b", []>;
+
+ def SBICAb : FIOBIT<0b01, (outs),
+ (ins imm_port5
+ : $a, i8imm
+ : $b),
+ "sbic\t$a, $b", []>;
+
+ def SBISAb : FIOBIT<0b11, (outs),
+ (ins imm_port5
+ : $a, i8imm
+ : $b),
+ "sbis\t$a, $b", []>;
}
// Relative branches on status flag bits.
- let Uses = [SREG] in
- {
+ let Uses = [SREG] in {
// BRBS s, k
// Branch if `s` flag in status register is set.
- def BRBSsk : FSK<0,
- (outs),
- (ins i8imm:$s, relbrtarget_7:$k),
- "brbs\t$s, $k",
- []>;
+ def BRBSsk : FSK<0, (outs),
+ (ins i8imm
+ : $s, relbrtarget_7
+ : $k),
+ "brbs\t$s, $k", []>;
// BRBC s, k
// Branch if `s` flag in status register is clear.
- def BRBCsk : FSK<1,
- (outs),
- (ins i8imm:$s, relbrtarget_7:$k),
- "brbc\t$s, $k",
- []>;
+ def BRBCsk : FSK<1, (outs),
+ (ins i8imm
+ : $s, relbrtarget_7
+ : $k),
+ "brbc\t$s, $k", []>;
}
}
-
// BRCS k
// Branch if carry flag is set
-def : InstAlias<"brcs\t$k", (BRBSsk 0, relbrtarget_7:$k)>;
+def : InstAlias<"brcs\t$k", (BRBSsk 0, relbrtarget_7 : $k)>;
// BRCC k
// Branch if carry flag is clear
-def : InstAlias<"brcc\t$k", (BRBCsk 0, relbrtarget_7:$k)>;
+def : InstAlias<"brcc\t$k", (BRBCsk 0, relbrtarget_7 : $k)>;
// BRHS k
// Branch if half carry flag is set
-def : InstAlias<"brhs\t$k", (BRBSsk 5, relbrtarget_7:$k)>;
+def : InstAlias<"brhs\t$k", (BRBSsk 5, relbrtarget_7 : $k)>;
// BRHC k
// Branch if half carry flag is clear
-def : InstAlias<"brhc\t$k", (BRBCsk 5, relbrtarget_7:$k)>;
+def : InstAlias<"brhc\t$k", (BRBCsk 5, relbrtarget_7 : $k)>;
// BRTS k
// Branch if the T flag is set
-def : InstAlias<"brts\t$k", (BRBSsk 6, relbrtarget_7:$k)>;
+def : InstAlias<"brts\t$k", (BRBSsk 6, relbrtarget_7 : $k)>;
// BRTC k
// Branch if the T flag is clear
-def : InstAlias<"brtc\t$k", (BRBCsk 6, relbrtarget_7:$k)>;
+def : InstAlias<"brtc\t$k", (BRBCsk 6, relbrtarget_7 : $k)>;
// BRVS k
// Branch if the overflow flag is set
-def : InstAlias<"brvs\t$k", (BRBSsk 3, relbrtarget_7:$k)>;
+def : InstAlias<"brvs\t$k", (BRBSsk 3, relbrtarget_7 : $k)>;
// BRVC k
// Branch if the overflow flag is clear
-def : InstAlias<"brvc\t$k", (BRBCsk 3, relbrtarget_7:$k)>;
+def : InstAlias<"brvc\t$k", (BRBCsk 3, relbrtarget_7 : $k)>;
// BRIE k
// Branch if the global interrupt flag is enabled
-def : InstAlias<"brie\t$k", (BRBSsk 7, relbrtarget_7:$k)>;
+def : InstAlias<"brie\t$k", (BRBSsk 7, relbrtarget_7 : $k)>;
// BRID k
// Branch if the global interrupt flag is disabled
-def : InstAlias<"brid\t$k", (BRBCsk 7, relbrtarget_7:$k)>;
+def : InstAlias<"brid\t$k", (BRBCsk 7, relbrtarget_7 : $k)>;
//===----------------------------------------------------------------------===//
// PC-relative conditional branches
//===----------------------------------------------------------------------===//
// Based on status register. We cannot simplify these into instruction aliases
// because we also need to be able to specify a pattern to match for ISel.
-let isBranch = 1,
-isTerminator = 1,
-Uses = [SREG] in
-{
- def BREQk : FBRsk<0,
- 0b001,
- (outs),
- (ins relbrtarget_7:$target),
- "breq\t$target",
- [(AVRbrcond bb:$target, AVR_COND_EQ)]>;
-
- def BRNEk : FBRsk<1,
- 0b001,
- (outs),
- (ins relbrtarget_7:$target),
- "brne\t$target",
- [(AVRbrcond bb:$target, AVR_COND_NE)]>;
-
-
- def BRSHk : FBRsk<1,
- 0b000,
- (outs),
- (ins relbrtarget_7:$target),
- "brsh\t$target",
- [(AVRbrcond bb:$target, AVR_COND_SH)]>;
-
- def BRLOk : FBRsk<0,
- 0b000,
- (outs),
- (ins relbrtarget_7:$target),
- "brlo\t$target",
- [(AVRbrcond bb:$target, AVR_COND_LO)]>;
-
- def BRMIk : FBRsk<0,
- 0b010,
- (outs),
- (ins relbrtarget_7:$target),
- "brmi\t$target",
- [(AVRbrcond bb:$target, AVR_COND_MI)]>;
-
- def BRPLk : FBRsk<1,
- 0b010,
- (outs),
- (ins relbrtarget_7:$target),
- "brpl\t$target",
- [(AVRbrcond bb:$target, AVR_COND_PL)]>;
-
- def BRGEk : FBRsk<1,
- 0b100,
- (outs),
- (ins relbrtarget_7:$target),
- "brge\t$target",
- [(AVRbrcond bb:$target, AVR_COND_GE)]>;
-
- def BRLTk : FBRsk<0,
- 0b100,
- (outs),
- (ins relbrtarget_7:$target),
- "brlt\t$target",
- [(AVRbrcond bb:$target, AVR_COND_LT)]>;
+let isBranch = 1, isTerminator = 1, Uses = [SREG] in {
+ def BREQk : FBRsk<0, 0b001, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "breq\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_EQ)]>;
+
+ def BRNEk : FBRsk<1, 0b001, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brne\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_NE)]>;
+
+ def BRSHk : FBRsk<1, 0b000, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brsh\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_SH)]>;
+
+ def BRLOk : FBRsk<0, 0b000, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brlo\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_LO)]>;
+
+ def BRMIk : FBRsk<0, 0b010, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brmi\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_MI)]>;
+
+ def BRPLk : FBRsk<1, 0b010, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brpl\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_PL)]>;
+
+ def BRGEk : FBRsk<1, 0b100, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brge\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_GE)]>;
+
+ def BRLTk : FBRsk<0, 0b100, (outs),
+ (ins relbrtarget_7
+ : $target),
+ "brlt\t$target", [(AVRbrcond bb
+ : $target, AVR_COND_LT)]>;
}
//===----------------------------------------------------------------------===//
// Data transfer instructions
//===----------------------------------------------------------------------===//
// 8 and 16-bit register move instructions.
-let hasSideEffects = 0 in
-{
- def MOVRdRr : FRdRr<0b0010,
- 0b11,
- (outs GPR8:$rd),
- (ins GPR8:$rr),
- "mov\t$rd, $rr",
- []>;
-
- def MOVWRdRr : FMOVWRdRr<(outs DREGS:$dst),
- (ins DREGS:$src),
- "movw\t$dst, $src",
- []>,
+let hasSideEffects = 0 in {
+ def MOVRdRr : FRdRr<0b0010, 0b11,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $rr),
+ "mov\t$rd, $rr", []>;
+
+ def MOVWRdRr : FMOVWRdRr<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src),
+ "movw\t$dst, $src", []>,
Requires<[HasMOVW]>;
}
// Load immediate values into registers.
-let isReMaterializable = 1 in
-{
+let isReMaterializable = 1 in {
def LDIRdK : FRdK<0b1110,
- (outs LD8:$rd),
- (ins imm_ldi8:$k),
- "ldi\t$rd, $k",
- [(set i8:$rd, imm:$k)]>;
+ (outs LD8
+ : $rd),
+ (ins imm_ldi8
+ : $k),
+ "ldi\t$rd, $k", [(set i8
+ : $rd, imm
+ : $k)]>;
// LDIW Rd+1:Rd, K+1:K
//
// Expands to:
// ldi Rd, K
// ldi Rd+1, K+1
- def LDIWRdK : Pseudo<(outs DLDREGS:$dst),
- (ins i16imm:$src),
- "ldiw\t$dst, $src",
- [(set i16:$dst, imm:$src)]>;
+ def LDIWRdK : Pseudo<(outs DLDREGS
+ : $dst),
+ (ins i16imm
+ : $src),
+ "ldiw\t$dst, $src", [(set i16
+ : $dst, imm
+ : $src)]>;
}
// Load from data space into register.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def LDSRdK : F32DM<0b0,
- (outs GPR8:$rd),
- (ins imm16:$k),
- "lds\t$rd, $k",
- [(set i8:$rd, (load imm:$k))]>,
+ (outs GPR8
+ : $rd),
+ (ins imm16
+ : $k),
+ "lds\t$rd, $k", [(set i8
+ : $rd, (load imm
+ : $k))]>,
Requires<[HasSRAM]>;
// LDSW Rd+1:Rd, K+1:K
@@ -1164,23 +1287,26 @@ isReMaterializable = 1 in
// Expands to:
// lds Rd, (K+1:K)
// lds Rd+1 (K+1:K) + 1
- def LDSWRdK : Pseudo<(outs DREGS:$dst),
- (ins i16imm:$src),
- "ldsw\t$dst, $src",
- [(set i16:$dst, (load imm:$src))]>,
+ def LDSWRdK : Pseudo<(outs DREGS
+ : $dst),
+ (ins i16imm
+ : $src),
+ "ldsw\t$dst, $src", [(set i16
+ : $dst, (load imm
+ : $src))]>,
Requires<[HasSRAM]>;
}
// Indirect loads.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
- def LDRdPtr : FSTLD<0,
- 0b00,
- (outs GPR8:$reg),
- (ins LDSTPtrReg:$ptrreg),
- "ld\t$reg, $ptrreg",
- [(set GPR8:$reg, (load i16:$ptrreg))]>,
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def LDRdPtr : FSTLD<0, 0b00,
+ (outs GPR8
+ : $reg),
+ (ins LDSTPtrReg
+ : $ptrreg),
+ "ld\t$reg, $ptrreg", [(set GPR8
+ : $reg, (load i16
+ : $ptrreg))]>,
Requires<[HasSRAM]>;
// LDW Rd+1:Rd, P
@@ -1188,43 +1314,48 @@ isReMaterializable = 1 in
// Expands to:
// ld Rd, P
// ldd Rd+1, P+1
- let Constraints = "@earlyclobber $reg" in
- def LDWRdPtr : Pseudo<(outs DREGS:$reg),
- (ins PTRDISPREGS:$ptrreg),
- "ldw\t$reg, $ptrreg",
- [(set i16:$reg, (load i16:$ptrreg))]>,
- Requires<[HasSRAM]>;
+ let Constraints = "@earlyclobber $reg" in def LDWRdPtr
+ : Pseudo<(outs DREGS
+ : $reg),
+ (ins PTRDISPREGS
+ : $ptrreg),
+ "ldw\t$reg, $ptrreg", [(set i16
+ : $reg, (load i16
+ : $ptrreg))]>,
+ Requires<[HasSRAM]>;
}
// Indirect loads (with postincrement or predecrement).
-let mayLoad = 1,
-hasSideEffects = 0,
-Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in
-{
- def LDRdPtrPi : FSTLD<0,
- 0b01,
- (outs GPR8:$reg, PTRREGS:$base_wb),
- (ins LDSTPtrReg:$ptrreg),
- "ld\t$reg, $ptrreg+",
- []>,
+let mayLoad = 1, hasSideEffects = 0,
+ Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in {
+ def LDRdPtrPi : FSTLD<0, 0b01,
+ (outs GPR8
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg),
+ "ld\t$reg, $ptrreg+", []>,
Requires<[HasSRAM]>;
// LDW Rd+1:Rd, P+
// Expands to:
// ld Rd, P+
// ld Rd+1, P+
- def LDWRdPtrPi : Pseudo<(outs DREGS:$reg, PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg),
- "ldw\t$reg, $ptrreg+",
- []>,
+ def LDWRdPtrPi : Pseudo<(outs DREGS
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg),
+ "ldw\t$reg, $ptrreg+", []>,
Requires<[HasSRAM]>;
- def LDRdPtrPd : FSTLD<0,
- 0b10,
- (outs GPR8:$reg, PTRREGS:$base_wb),
- (ins LDSTPtrReg:$ptrreg),
- "ld\t$reg, -$ptrreg",
- []>,
+ def LDRdPtrPd : FSTLD<0, 0b10,
+ (outs GPR8
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg),
+ "ld\t$reg, -$ptrreg", []>,
Requires<[HasSRAM]>;
// LDW Rd+1:Rd, -P
@@ -1232,36 +1363,42 @@ Constraints = "$ptrreg = $base_wb,@earlyclobber $reg" in
// Expands to:
// ld Rd+1, -P
// ld Rd, -P
- def LDWRdPtrPd : Pseudo<(outs DREGS:$reg, PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg),
- "ldw\t$reg, -$ptrreg",
- []>,
+ def LDWRdPtrPd : Pseudo<(outs DREGS
+ : $reg, PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg),
+ "ldw\t$reg, -$ptrreg", []>,
Requires<[HasSRAM]>;
}
// Load indirect with displacement operations.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
- let Constraints = "@earlyclobber $reg" in
- def LDDRdPtrQ : FSTDLDD<0,
- (outs GPR8:$reg),
- (ins memri:$memri),
- "ldd\t$reg, $memri",
- [(set i8:$reg, (load addr:$memri))]>,
- Requires<[HasSRAM]>;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ let Constraints = "@earlyclobber $reg" in def LDDRdPtrQ
+ : FSTDLDD<0,
+ (outs GPR8
+ : $reg),
+ (ins memri
+ : $memri),
+ "ldd\t$reg, $memri", [(set i8
+ : $reg, (load addr
+ : $memri))]>,
+ Requires<[HasSRAM]>;
// LDDW Rd+1:Rd, P+q
//
// Expands to:
// ldd Rd, P+q
// ldd Rd+1, P+q+1
- let Constraints = "@earlyclobber $dst" in
- def LDDWRdPtrQ : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND:$dst),
- (ins memri:$memri),
- "lddw\t$dst, $memri",
- [(set i16:$dst, (load addr:$memri))]>,
- Requires<[HasSRAM]>;
+ let Constraints = "@earlyclobber $dst" in def LDDWRdPtrQ
+ : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND
+ : $dst),
+ (ins memri
+ : $memri),
+ "lddw\t$dst, $memri", [(set i16
+ : $dst, (load addr
+ : $memri))]>,
+ Requires<[HasSRAM]>;
// An identical pseudo instruction to LDDWRdPtrQ, expect restricted to the Y
// register and without the @earlyclobber flag.
@@ -1270,7 +1407,8 @@ isReMaterializable = 1 in
// being able to handle the expansion of a COPY into an machine instruction
// that has an earlyclobber flag. This is because the register allocator will
// try expand a copy from a register slot into an earlyclobber instruction.
- // Instructions that are earlyclobber need to be in a dedicated earlyclobber slot.
+ // Instructions that are earlyclobber need to be in a dedicated earlyclobber
+ // slot.
//
// This pseudo instruction can be used pre-AVR pseudo expansion in order to
// get a frame index load without directly using earlyclobber instructions.
@@ -1279,30 +1417,44 @@ isReMaterializable = 1 in
//
// This instruction may be removed once PR13375 is fixed.
let mayLoad = 1,
- hasSideEffects = 0 in
- def LDDWRdYQ : Pseudo<(outs DREGS:$dst),
- (ins memri:$memri),
- "lddw\t$dst, $memri",
- []>,
- Requires<[HasSRAM]>;
+ hasSideEffects = 0 in def LDDWRdYQ : Pseudo<(outs DREGS
+ : $dst),
+ (ins memri
+ : $memri),
+ "lddw\t$dst, $memri", []>,
+ Requires<[HasSRAM]>;
}
-class AtomicLoad<PatFrag Op, RegisterClass DRC,
- RegisterClass PTRRC> :
- Pseudo<(outs DRC:$rd), (ins PTRRC:$rr), "atomic_op",
- [(set DRC:$rd, (Op i16:$rr))]>;
-
-class AtomicStore<PatFrag Op, RegisterClass DRC,
- RegisterClass PTRRC> :
- Pseudo<(outs), (ins PTRRC:$rd, DRC:$rr), "atomic_op",
- [(Op i16:$rd, DRC:$rr)]>;
-
-let Constraints = "@earlyclobber $rd" in
-class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
- RegisterClass PTRRC> :
- Pseudo<(outs DRC:$rd), (ins PTRRC:$rr, DRC:$operand),
- "atomic_op",
- [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>;
+class AtomicLoad<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
+ : Pseudo<(outs DRC
+ : $rd),
+ (ins PTRRC
+ : $rr),
+ "atomic_op", [(set DRC
+ : $rd, (Op i16
+ : $rr))]>;
+
+class AtomicStore<PatFrag Op, RegisterClass DRC, RegisterClass PTRRC>
+ : Pseudo<(outs),
+ (ins PTRRC
+ : $rd, DRC
+ : $rr),
+ "atomic_op", [(Op i16
+ : $rd, DRC
+ : $rr)]>;
+
+let Constraints =
+ "@earlyclobber $rd" in class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
+ RegisterClass PTRRC>
+ : Pseudo<(outs DRC
+ : $rd),
+ (ins PTRRC
+ : $rr, DRC
+ : $operand),
+ "atomic_op", [(set DRC
+ : $rd, (Op i16
+ : $rr, DRC
+ : $operand))]>;
// FIXME: I think 16-bit atomic binary ops need to mark
// r0 as clobbered.
@@ -1318,34 +1470,36 @@ class AtomicLoadOp<PatFrag Op, RegisterClass DRC,
// 16-bit operations use 16-bit load/store postincrement instructions,
// which require PTRDISPREGS.
-def AtomicLoad8 : AtomicLoad<atomic_load_8, GPR8, PTRREGS>;
-def AtomicLoad16 : AtomicLoad<atomic_load_16, DREGS, PTRDISPREGS>;
+def AtomicLoad8 : AtomicLoad<atomic_load_8, GPR8, PTRREGS>;
+def AtomicLoad16 : AtomicLoad<atomic_load_16, DREGS, PTRDISPREGS>;
-def AtomicStore8 : AtomicStore<atomic_store_8, GPR8, PTRREGS>;
+def AtomicStore8 : AtomicStore<atomic_store_8, GPR8, PTRREGS>;
def AtomicStore16 : AtomicStore<atomic_store_16, DREGS, PTRDISPREGS>;
class AtomicLoadOp8<PatFrag Op> : AtomicLoadOp<Op, GPR8, PTRREGS>;
class AtomicLoadOp16<PatFrag Op> : AtomicLoadOp<Op, DREGS, PTRDISPREGS>;
-def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
+def AtomicLoadAdd8 : AtomicLoadOp8<atomic_load_add_8>;
def AtomicLoadAdd16 : AtomicLoadOp16<atomic_load_add_16>;
-def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
+def AtomicLoadSub8 : AtomicLoadOp8<atomic_load_sub_8>;
def AtomicLoadSub16 : AtomicLoadOp16<atomic_load_sub_16>;
-def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
+def AtomicLoadAnd8 : AtomicLoadOp8<atomic_load_and_8>;
def AtomicLoadAnd16 : AtomicLoadOp16<atomic_load_and_16>;
-def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
-def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
-def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
+def AtomicLoadOr8 : AtomicLoadOp8<atomic_load_or_8>;
+def AtomicLoadOr16 : AtomicLoadOp16<atomic_load_or_16>;
+def AtomicLoadXor8 : AtomicLoadOp8<atomic_load_xor_8>;
def AtomicLoadXor16 : AtomicLoadOp16<atomic_load_xor_16>;
-def AtomicFence : Pseudo<(outs), (ins), "atomic_fence",
- [(atomic_fence timm, timm)]>;
+def AtomicFence
+ : Pseudo<(outs), (ins), "atomic_fence", [(atomic_fence timm, timm)]>;
// Indirect store from register to data space.
-def STSKRr : F32DM<0b1,
- (outs),
- (ins imm16:$k, GPR8:$rd),
- "sts\t$k, $rd",
- [(store i8:$rd, imm:$k)]>,
+def STSKRr : F32DM<0b1, (outs),
+ (ins imm16
+ : $k, GPR8
+ : $rd),
+ "sts\t$k, $rd", [(store i8
+ : $rd, imm
+ : $k)]>,
Requires<[HasSRAM]>;
// STSW K+1:K, Rr+1:Rr
@@ -1354,20 +1508,24 @@ def STSKRr : F32DM<0b1,
// sts Rr+1, (K+1:K) + 1
// sts Rr, (K+1:K)
def STSWKRr : Pseudo<(outs),
- (ins i16imm:$dst, DREGS:$src),
- "stsw\t$dst, $src",
- [(store i16:$src, imm:$dst)]>,
+ (ins i16imm
+ : $dst, DREGS
+ : $src),
+ "stsw\t$dst, $src", [(store i16
+ : $src, imm
+ : $dst)]>,
Requires<[HasSRAM]>;
// Indirect stores.
// ST P, Rr
// Stores the value of Rr into the location addressed by pointer P.
-def STPtrRr : FSTLD<1,
- 0b00,
- (outs),
- (ins LDSTPtrReg:$ptrreg, GPR8:$reg),
- "st\t$ptrreg, $reg",
- [(store GPR8:$reg, i16:$ptrreg)]>,
+def STPtrRr : FSTLD<1, 0b00, (outs),
+ (ins LDSTPtrReg
+ : $ptrreg, GPR8
+ : $reg),
+ "st\t$ptrreg, $reg", [(store GPR8
+ : $reg, i16
+ : $ptrreg)]>,
Requires<[HasSRAM]>;
// STW P, Rr+1:Rr
@@ -1377,25 +1535,32 @@ def STPtrRr : FSTLD<1,
// st P, Rr
// std P+1, Rr+1
def STWPtrRr : Pseudo<(outs),
- (ins PTRDISPREGS:$ptrreg, DREGS:$reg),
- "stw\t$ptrreg, $reg",
- [(store i16:$reg, i16:$ptrreg)]>,
+ (ins PTRDISPREGS
+ : $ptrreg, DREGS
+ : $reg),
+ "stw\t$ptrreg, $reg", [(store i16
+ : $reg, i16
+ : $ptrreg)]>,
Requires<[HasSRAM]>;
// Indirect stores (with postincrement or predecrement).
-let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
-{
+let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in {
// ST P+, Rr
// Stores the value of Rr into the location addressed by pointer P.
// Post increments P.
- def STPtrPiRr : FSTLD<1,
- 0b01,
- (outs LDSTPtrReg:$base_wb),
- (ins LDSTPtrReg:$ptrreg, GPR8:$reg, i8imm:$offs),
- "st\t$ptrreg+, $reg",
- [(set i16:$base_wb,
- (post_store GPR8:$reg, i16:$ptrreg, imm:$offs))]>,
+ def STPtrPiRr : FSTLD<1, 0b01,
+ (outs LDSTPtrReg
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg, GPR8
+ : $reg, i8imm
+ : $offs),
+ "st\t$ptrreg+, $reg", [(set i16
+ : $base_wb, (post_store GPR8
+ : $reg, i16
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
// STW P+, Rr+1:Rr
@@ -1405,23 +1570,34 @@ let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
// Expands to:
// st P+, Rr
// st P+, Rr+1
- def STWPtrPiRr : Pseudo<(outs PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg, DREGS:$trh, i8imm:$offs),
- "stw\t$ptrreg+, $trh",
- [(set PTRREGS:$base_wb,
- (post_store DREGS:$trh, PTRREGS:$ptrreg, imm:$offs))]>,
+ def STWPtrPiRr : Pseudo<(outs PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg, DREGS
+ : $trh, i8imm
+ : $offs),
+ "stw\t$ptrreg+, $trh", [(set PTRREGS
+ : $base_wb, (post_store DREGS
+ : $trh, PTRREGS
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
// ST -P, Rr
// Stores the value of Rr into the location addressed by pointer P.
// Pre decrements P.
- def STPtrPdRr : FSTLD<1,
- 0b10,
- (outs LDSTPtrReg:$base_wb),
- (ins LDSTPtrReg:$ptrreg, GPR8:$reg, i8imm:$offs),
- "st\t-$ptrreg, $reg",
- [(set i16:$base_wb,
- (pre_store GPR8:$reg, i16:$ptrreg, imm:$offs))]>,
+ def STPtrPdRr : FSTLD<1, 0b10,
+ (outs LDSTPtrReg
+ : $base_wb),
+ (ins LDSTPtrReg
+ : $ptrreg, GPR8
+ : $reg, i8imm
+ : $offs),
+ "st\t-$ptrreg, $reg", [(set i16
+ : $base_wb, (pre_store GPR8
+ : $reg, i16
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
// STW -P, Rr+1:Rr
@@ -1431,11 +1607,17 @@ let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
// Expands to:
// st -P, Rr+1
// st -P, Rr
- def STWPtrPdRr : Pseudo<(outs PTRREGS:$base_wb),
- (ins PTRREGS:$ptrreg, DREGS:$reg, i8imm:$offs),
- "stw\t-$ptrreg, $reg",
- [(set PTRREGS:$base_wb,
- (pre_store i16:$reg, i16:$ptrreg, imm:$offs))]>,
+ def STWPtrPdRr : Pseudo<(outs PTRREGS
+ : $base_wb),
+ (ins PTRREGS
+ : $ptrreg, DREGS
+ : $reg, i8imm
+ : $offs),
+ "stw\t-$ptrreg, $reg", [(set PTRREGS
+ : $base_wb, (pre_store i16
+ : $reg, i16
+ : $ptrreg, imm
+ : $offs))]>,
Requires<[HasSRAM]>;
}
@@ -1443,11 +1625,13 @@ let Constraints = "$ptrreg = $base_wb,@earlyclobber $base_wb" in
// STD P+q, Rr
// Stores the value of Rr into the location addressed by pointer P with a
// displacement of q. Does not modify P.
-def STDPtrQRr : FSTDLDD<1,
- (outs),
- (ins memri:$memri, GPR8:$reg),
- "std\t$memri, $reg",
- [(store i8:$reg, addr:$memri)]>,
+def STDPtrQRr : FSTDLDD<1, (outs),
+ (ins memri
+ : $memri, GPR8
+ : $reg),
+ "std\t$memri, $reg", [(store i8
+ : $reg, addr
+ : $memri)]>,
Requires<[HasSRAM]>;
// STDW P+q, Rr+1:Rr
@@ -1458,206 +1642,192 @@ def STDPtrQRr : FSTDLDD<1,
// std P+q, Rr
// std P+q+1, Rr+1
def STDWPtrQRr : Pseudo<(outs),
- (ins memri:$memri, DREGS:$src),
- "stdw\t$memri, $src",
- [(store i16:$src, addr:$memri)]>,
+ (ins memri
+ : $memri, DREGS
+ : $src),
+ "stdw\t$memri, $src", [(store i16
+ : $src, addr
+ : $memri)]>,
Requires<[HasSRAM]>;
-
// Load program memory operations.
-let canFoldAsLoad = 1,
-isReMaterializable = 1,
-mayLoad = 1,
-hasSideEffects = 0 in
-{
+let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1,
+ hasSideEffects = 0 in {
let Defs = [R0],
- Uses = [R31R30] in
- def LPM : F16<0b1001010111001000,
- (outs),
- (ins),
- "lpm",
- []>,
- Requires<[HasLPM]>;
-
- def LPMRdZ : FLPMX<0,
- 0,
- (outs GPR8:$dst),
- (ins ZREG:$z),
- "lpm\t$dst, $z",
- []>,
+ Uses = [R31R30] in def LPM
+ : F16<0b1001010111001000, (outs), (ins), "lpm", []>,
+ Requires<[HasLPM]>;
+
+ def LPMRdZ : FLPMX<0, 0,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpm\t$dst, $z", []>,
Requires<[HasLPMX]>;
// Load program memory, while postincrementing the Z register.
- let Defs = [R31R30] in
- {
- def LPMRdZPi : FLPMX<0,
- 1,
- (outs GPR8:$dst),
- (ins ZREG:$z),
- "lpm\t$dst, $z+",
- []>,
+ let Defs = [R31R30] in {
+ def LPMRdZPi : FLPMX<0, 1,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpm\t$dst, $z+", []>,
Requires<[HasLPMX]>;
- def LPMWRdZ : Pseudo<(outs DREGS:$dst),
- (ins ZREG:$z),
- "lpmw\t$dst, $z",
- []>,
+ def LPMWRdZ : Pseudo<(outs DREGS
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpmw\t$dst, $z", []>,
Requires<[HasLPMX]>;
- def LPMWRdZPi : Pseudo<(outs DREGS:$dst),
- (ins ZREG:$z),
- "lpmw\t$dst, $z+",
- []>,
+ def LPMWRdZPi : Pseudo<(outs DREGS
+ : $dst),
+ (ins ZREG
+ : $z),
+ "lpmw\t$dst, $z+", []>,
Requires<[HasLPMX]>;
}
}
// Extended load program memory operations.
-let mayLoad = 1,
-hasSideEffects = 0 in
-{
+let mayLoad = 1, hasSideEffects = 0 in {
let Defs = [R0],
- Uses = [R31R30] in
- def ELPM : F16<0b1001010111011000,
- (outs),
- (ins),
- "elpm",
- []>,
- Requires<[HasELPM]>;
-
- def ELPMRdZ : FLPMX<1,
- 0,
- (outs GPR8:$dst),
- (ins ZREG:$z),
- "elpm\t$dst, $z",
- []>,
+ Uses = [R31R30] in def ELPM
+ : F16<0b1001010111011000, (outs), (ins), "elpm", []>,
+ Requires<[HasELPM]>;
+
+ def ELPMRdZ : FLPMX<1, 0,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "elpm\t$dst, $z", []>,
Requires<[HasELPMX]>;
- let Defs = [R31R30] in
- def ELPMRdZPi : FLPMX<1,
- 1,
- (outs GPR8:$dst),
- (ins ZREG: $z),
- "elpm\t$dst, $z+",
- []>,
- Requires<[HasELPMX]>;
+ let Defs = [R31R30] in def ELPMRdZPi : FLPMX<1, 1,
+ (outs GPR8
+ : $dst),
+ (ins ZREG
+ : $z),
+ "elpm\t$dst, $z+", []>,
+ Requires<[HasELPMX]>;
}
// Store program memory operations.
-let Uses = [R1, R0] in
-{
- let Uses = [R31R30, R1, R0] in
- def SPM : F16<0b1001010111101000,
- (outs),
- (ins),
- "spm",
- []>,
- Requires<[HasSPM]>;
-
- let Defs = [R31R30] in
- def SPMZPi : F16<0b1001010111111000,
- (outs),
- (ins ZREG:$z),
- "spm $z+",
- []>,
- Requires<[HasSPMX]>;
+let Uses = [R1, R0] in {
+ let Uses = [R31R30, R1, R0] in def SPM
+ : F16<0b1001010111101000, (outs), (ins), "spm", []>,
+ Requires<[HasSPM]>;
+
+ let Defs = [R31R30] in def SPMZPi : F16<0b1001010111111000, (outs),
+ (ins ZREG
+ : $z),
+ "spm $z+", []>,
+ Requires<[HasSPMX]>;
}
// Read data from IO location operations.
-let canFoldAsLoad = 1,
-isReMaterializable = 1 in
-{
- def INRdA : FIORdA<(outs GPR8:$dst),
- (ins imm_port6:$src),
- "in\t$dst, $src",
- [(set i8:$dst, (load ioaddr8:$src))]>;
-
- def INWRdA : Pseudo<(outs DREGS:$dst),
- (ins imm_port6:$src),
- "inw\t$dst, $src",
- [(set i16:$dst, (load ioaddr16:$src))]>;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def INRdA : FIORdA<(outs GPR8
+ : $dst),
+ (ins imm_port6
+ : $src),
+ "in\t$dst, $src", [(set i8
+ : $dst, (load ioaddr8
+ : $src))]>;
+
+ def INWRdA : Pseudo<(outs DREGS
+ : $dst),
+ (ins imm_port6
+ : $src),
+ "inw\t$dst, $src", [(set i16
+ : $dst, (load ioaddr16
+ : $src))]>;
}
// Write data to IO location operations.
def OUTARr : FIOARr<(outs),
- (ins imm_port6:$dst, GPR8:$src),
- "out\t$dst, $src",
- [(store i8:$src, ioaddr8:$dst)]>;
+ (ins imm_port6
+ : $dst, GPR8
+ : $src),
+ "out\t$dst, $src", [(store i8
+ : $src, ioaddr8
+ : $dst)]>;
def OUTWARr : Pseudo<(outs),
- (ins imm_port6:$dst, DREGS:$src),
- "outw\t$dst, $src",
- [(store i16:$src, ioaddr16:$dst)]>;
+ (ins imm_port6
+ : $dst, DREGS
+ : $src),
+ "outw\t$dst, $src", [(store i16
+ : $src, ioaddr16
+ : $dst)]>;
// Stack push/pop operations.
-let Defs = [SP],
-Uses = [SP],
-hasSideEffects = 0 in
-{
+let Defs = [SP], Uses = [SP], hasSideEffects = 0 in {
// Stack push operations.
- let mayStore = 1 in
- {
- def PUSHRr : FRd<0b1001,
- 0b0011111,
- (outs),
- (ins GPR8:$reg),
- "push\t$reg",
- []>,
+ let mayStore = 1 in {
+ def PUSHRr : FRd<0b1001, 0b0011111, (outs),
+ (ins GPR8
+ : $reg),
+ "push\t$reg", []>,
Requires<[HasSRAM]>;
def PUSHWRr : Pseudo<(outs),
- (ins DREGS:$reg),
- "pushw\t$reg",
- []>,
+ (ins DREGS
+ : $reg),
+ "pushw\t$reg", []>,
Requires<[HasSRAM]>;
}
// Stack pop operations.
- let mayLoad = 1 in
- {
- def POPRd : FRd<0b1001,
- 0b0001111,
- (outs GPR8:$reg),
- (ins),
- "pop\t$reg",
- []>,
+ let mayLoad = 1 in {
+ def POPRd : FRd<0b1001, 0b0001111,
+ (outs GPR8
+ : $reg),
+ (ins), "pop\t$reg", []>,
Requires<[HasSRAM]>;
- def POPWRd : Pseudo<(outs DREGS:$reg),
- (ins),
- "popw\t$reg",
- []>,
+ def POPWRd : Pseudo<(outs DREGS
+ : $reg),
+ (ins), "popw\t$reg", []>,
Requires<[HasSRAM]>;
}
}
// Read-Write-Modify (RMW) instructions.
def XCHZRd : FZRd<0b100,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "xch\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "xch\t$z, $rd", []>,
Requires<[SupportsRMW]>;
def LASZRd : FZRd<0b101,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "las\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "las\t$z, $rd", []>,
Requires<[SupportsRMW]>;
def LACZRd : FZRd<0b110,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "lac\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "lac\t$z, $rd", []>,
Requires<[SupportsRMW]>;
def LATZRd : FZRd<0b111,
- (outs GPR8:$rd),
- (ins ZREG:$z),
- "lat\t$z, $rd",
- []>,
+ (outs GPR8
+ : $rd),
+ (ins ZREG
+ : $z),
+ "lat\t$z, $rd", []>,
Requires<[SupportsRMW]>;
//===----------------------------------------------------------------------===//
@@ -1665,187 +1835,277 @@ def LATZRd : FZRd<0b111,
//===----------------------------------------------------------------------===//
// Bit shift/rotate operations.
-let Constraints = "$src = $rd",
-Defs = [SREG] in
-{
+let Constraints = "$src = $rd", Defs = [SREG] in {
// 8-bit LSL is an alias of ADD Rd, Rd
- def LSLWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def LSLWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"lslw\t$rd",
- [(set i16:$rd, (AVRlsl i16:$src)), (implicit SREG)]>;
-
- def LSLWNRd : Pseudo<(outs DLDREGS:$rd),
- (ins DREGS:$src, imm16:$bits),
- "lslwn\t$rd, $bits",
- [(set i16:$rd, (AVRlslwn i16:$src, imm:$bits)),
- (implicit SREG)]>;
-
- def LSLBNRd : Pseudo<(outs LD8:$rd),
- (ins GPR8:$src, imm_ldi8:$bits),
- "lslbn\t$rd, $bits",
- [(set i8:$rd, (AVRlslbn i8:$src, imm:$bits)),
- (implicit SREG)]>;
+ [(set i16
+ : $rd, (AVRlsl i16
+ : $src)),
+ (implicit SREG)]>;
- def LSRRd : FRd<0b1001,
- 0b0100110,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "lsr\t$rd",
- [(set i8:$rd, (AVRlsr i8:$src)), (implicit SREG)]>;
+ def LSLWNRd : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DREGS
+ : $src, imm16
+ : $bits),
+ "lslwn\t$rd, $bits", [
+ (set i16
+ : $rd, (AVRlslwn i16
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def LSLBNRd : Pseudo<(outs LD8
+ : $rd),
+ (ins GPR8
+ : $src, imm_ldi8
+ : $bits),
+ "lslbn\t$rd, $bits", [
+ (set i8
+ : $rd, (AVRlslbn i8
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def LSRRd
+ : FRd<0b1001, 0b0100110,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "lsr\t$rd", [(set i8
+ : $rd, (AVRlsr i8
+ : $src)),
+ (implicit SREG)]>;
- def LSRWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def LSRWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"lsrw\t$rd",
- [(set i16:$rd, (AVRlsr i16:$src)), (implicit SREG)]>;
-
- def LSRWNRd : Pseudo<(outs DLDREGS:$rd),
- (ins DREGS:$src, imm16:$bits),
- "lsrwn\t$rd, $bits",
- [(set i16:$rd, (AVRlsrwn i16:$src, imm:$bits)),
- (implicit SREG)]>;
-
- def LSRBNRd : Pseudo<(outs LD8:$rd),
- (ins GPR8:$src, imm_ldi8:$bits),
- "lsrbn\t$rd, $bits",
- [(set i8:$rd, (AVRlsrbn i8:$src, imm:$bits)),
- (implicit SREG)]>;
-
- def ASRRd : FRd<0b1001,
- 0b0100101,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "asr\t$rd",
- [(set i8:$rd, (AVRasr i8:$src)), (implicit SREG)]>;
-
- def ASRWNRd : Pseudo<(outs DLDREGS:$rd),
- (ins DREGS:$src, imm16:$bits),
- "asrwn\t$rd, $bits",
- [(set i16:$rd, (AVRasrwn i16:$src, imm:$bits)),
- (implicit SREG)]>;
+ [(set i16
+ : $rd, (AVRlsr i16
+ : $src)),
+ (implicit SREG)]>;
- def ASRBNRd : Pseudo<(outs LD8:$rd),
- (ins GPR8:$src, imm_ldi8:$bits),
- "asrbn\t$rd, $bits",
- [(set i8:$rd, (AVRasrbn i8:$src, imm:$bits)),
- (implicit SREG)]>;
+ def LSRWNRd : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DREGS
+ : $src, imm16
+ : $bits),
+ "lsrwn\t$rd, $bits", [
+ (set i16
+ : $rd, (AVRlsrwn i16
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def LSRBNRd : Pseudo<(outs LD8
+ : $rd),
+ (ins GPR8
+ : $src, imm_ldi8
+ : $bits),
+ "lsrbn\t$rd, $bits", [
+ (set i8
+ : $rd, (AVRlsrbn i8
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def ASRRd
+ : FRd<0b1001, 0b0100101,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "asr\t$rd", [(set i8
+ : $rd, (AVRasr i8
+ : $src)),
+ (implicit SREG)]>;
- def ASRWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
+ def ASRWNRd : Pseudo<(outs DLDREGS
+ : $rd),
+ (ins DREGS
+ : $src, imm16
+ : $bits),
+ "asrwn\t$rd, $bits", [
+ (set i16
+ : $rd, (AVRasrwn i16
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def ASRBNRd : Pseudo<(outs LD8
+ : $rd),
+ (ins GPR8
+ : $src, imm_ldi8
+ : $bits),
+ "asrbn\t$rd, $bits", [
+ (set i8
+ : $rd, (AVRasrbn i8
+ : $src, imm
+ : $bits)),
+ (implicit SREG)
+ ]>;
+
+ def ASRWRd : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
"asrw\t$rd",
- [(set i16:$rd, (AVRasr i16:$src)), (implicit SREG)]>;
+ [(set i16
+ : $rd, (AVRasr i16
+ : $src)),
+ (implicit SREG)]>;
- def ROLBRd : Pseudo<(outs GPR8:$rd),
- (ins GPR8:$src),
+ def ROLBRd : Pseudo<(outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
"rolb\t$rd",
- [(set i8:$rd, (AVRrol i8:$src)), (implicit SREG)]>;
+ [(set i8
+ : $rd, (AVRrol i8
+ : $src)),
+ (implicit SREG)]>;
- def RORBRd : Pseudo<(outs GPR8:$rd),
- (ins GPR8:$src),
+ def RORBRd : Pseudo<(outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
"rorb\t$rd",
- [(set i8:$rd, (AVRror i8:$src)), (implicit SREG)]>;
+ [(set i8
+ : $rd, (AVRror i8
+ : $src)),
+ (implicit SREG)]>;
// Bit rotate operations.
- let Uses = [SREG] in
- {
-
- def ROLWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
- "rolw\t$rd",
- [(set i16:$rd, (AVRrol i16:$src)), (implicit SREG)]>;
-
- def RORRd : FRd<0b1001,
- 0b0100111,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "ror\t$rd",
- []>;
-
- def RORWRd : Pseudo<(outs DREGS:$rd),
- (ins DREGS:$src),
- "rorw\t$rd",
- [(set i16:$rd, (AVRror i16:$src)), (implicit SREG)]>;
+ let Uses = [SREG] in {
+
+ def ROLWRd
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
+ "rolw\t$rd",
+ [(set i16
+ : $rd, (AVRrol i16
+ : $src)),
+ (implicit SREG)]>;
+
+ def RORRd : FRd<0b1001, 0b0100111,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "ror\t$rd", []>;
+
+ def RORWRd
+ : Pseudo<(outs DREGS
+ : $rd),
+ (ins DREGS
+ : $src),
+ "rorw\t$rd",
+ [(set i16
+ : $rd, (AVRror i16
+ : $src)),
+ (implicit SREG)]>;
}
}
// SWAP Rd
// Swaps the high and low nibbles in a register.
-let Constraints = "$src = $rd" in
-def SWAPRd : FRd<0b1001,
- 0b0100010,
- (outs GPR8:$rd),
- (ins GPR8:$src),
- "swap\t$rd",
- [(set i8:$rd, (AVRSwap i8:$src))]>;
+let Constraints =
+ "$src = $rd" in def SWAPRd : FRd<0b1001, 0b0100010,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src),
+ "swap\t$rd", [(set i8
+ : $rd, (AVRSwap i8
+ : $src))]>;
// IO register bit set/clear operations.
-//:TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi
+//: TODO: add patterns when popcount(imm)==2 to be expanded with 2 sbi/cbi
// instead of in+ori+out which requires one more instr.
-def SBIAb : FIOBIT<0b10,
- (outs),
- (ins imm_port5:$addr, i8imm:$bit),
- "sbi\t$addr, $bit",
- [(store (or (i8 (load lowioaddr8:$addr)), iobitpos8:$bit),
- lowioaddr8:$addr)]>;
-
-def CBIAb : FIOBIT<0b00,
- (outs),
- (ins imm_port5:$addr, i8imm:$bit),
- "cbi\t$addr, $bit",
- [(store (and (i8 (load lowioaddr8:$addr)), iobitposn8:$bit),
- lowioaddr8:$addr)]>;
+def SBIAb : FIOBIT<0b10, (outs),
+ (ins imm_port5
+ : $addr, i8imm
+ : $bit),
+ "sbi\t$addr, $bit", [(store(or(i8(load lowioaddr8
+ : $addr)),
+ iobitpos8
+ : $bit),
+ lowioaddr8
+ : $addr)]>;
+
+def CBIAb : FIOBIT<0b00, (outs),
+ (ins imm_port5
+ : $addr, i8imm
+ : $bit),
+ "cbi\t$addr, $bit", [(store(and(i8(load lowioaddr8
+ : $addr)),
+ iobitposn8
+ : $bit),
+ lowioaddr8
+ : $addr)]>;
// Status register bit load/store operations.
-let Defs = [SREG] in
-def BST : FRdB<0b01,
- (outs),
- (ins GPR8:$rd, i8imm:$b),
- "bst\t$rd, $b",
- []>;
+let Defs = [SREG] in def BST : FRdB<0b01, (outs),
+ (ins GPR8
+ : $rd, i8imm
+ : $b),
+ "bst\t$rd, $b", []>;
let Constraints = "$src = $rd",
-Uses = [SREG] in
-def BLD : FRdB<0b00,
- (outs GPR8:$rd),
- (ins GPR8:$src, i8imm:$b),
- "bld\t$rd, $b",
- []>;
+ Uses = [SREG] in def BLD : FRdB<0b00,
+ (outs GPR8
+ : $rd),
+ (ins GPR8
+ : $src, i8imm
+ : $b),
+ "bld\t$rd, $b", []>;
-def CBR : InstAlias<"cbr\t$rd, $k", (ANDIRdK LD8:$rd, imm_com8:$k), 0>;
+def CBR : InstAlias<"cbr\t$rd, $k", (ANDIRdK LD8 : $rd, imm_com8 : $k), 0>;
// CLR Rd
// Alias for EOR Rd, Rd
// -------------
// Clears all bits in a register.
-def CLR : InstAlias<"clr\t$rd", (EORRdRr GPR8:$rd, GPR8:$rd)>;
+def CLR : InstAlias<"clr\t$rd", (EORRdRr GPR8 : $rd, GPR8 : $rd)>;
// LSL Rd
// Alias for ADD Rd, Rd
// --------------
// Logical shift left one bit.
-def LSL : InstAlias<"lsl\t$rd", (ADDRdRr GPR8:$rd, GPR8:$rd)>;
+def LSL : InstAlias<"lsl\t$rd", (ADDRdRr GPR8 : $rd, GPR8 : $rd)>;
-def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8:$rd, GPR8:$rd)>;
+def ROL : InstAlias<"rol\t$rd", (ADCRdRr GPR8 : $rd, GPR8 : $rd)>;
// SER Rd
// Alias for LDI Rd, 0xff
// ---------
// Sets all bits in a register.
-def : InstAlias<"ser\t$rd", (LDIRdK LD8:$rd, 0xff), 0>;
-
-let Defs = [SREG] in
-def BSETs : FS<0,
- (outs),
- (ins i8imm:$s),
- "bset\t$s",
- []>;
-
-let Defs = [SREG] in
-def BCLRs : FS<1,
- (outs),
- (ins i8imm:$s),
- "bclr\t$s",
- []>;
+def : InstAlias<"ser\t$rd", (LDIRdK LD8 : $rd, 0xff), 0>;
+
+let Defs = [SREG] in def BSETs : FS<0, (outs),
+ (ins i8imm
+ : $s),
+ "bset\t$s", []>;
+
+let Defs = [SREG] in def BCLRs : FS<1, (outs),
+ (ins i8imm
+ : $s),
+ "bclr\t$s", []>;
// Set/clear aliases for the carry (C) status flag (bit 0).
def : InstAlias<"sec", (BSETs 0)>;
@@ -1887,284 +2147,353 @@ def : InstAlias<"cli", (BCLRs 7)>;
// Breakpoint instruction
// ---------
// <|1001|0101|1001|1000>
-def BREAK : F16<0b1001010110011000,
- (outs),
- (ins),
- "break",
- []>,
+def BREAK : F16<0b1001010110011000, (outs), (ins), "break", []>,
Requires<[HasBREAK]>;
// NOP
// No-operation instruction
// ---------
// <|0000|0000|0000|0000>
-def NOP : F16<0b0000000000000000,
- (outs),
- (ins),
- "nop",
- []>;
+def NOP : F16<0b0000000000000000, (outs), (ins), "nop", []>;
// SLEEP
// Sleep instruction
// ---------
// <|1001|0101|1000|1000>
-def SLEEP : F16<0b1001010110001000,
- (outs),
- (ins),
- "sleep",
- []>;
+def SLEEP : F16<0b1001010110001000, (outs), (ins), "sleep", []>;
// WDR
// Watchdog reset
// ---------
// <|1001|0101|1010|1000>
-def WDR : F16<0b1001010110101000,
- (outs),
- (ins),
- "wdr",
- []>;
+def WDR : F16<0b1001010110101000, (outs), (ins), "wdr", []>;
//===----------------------------------------------------------------------===//
// Pseudo instructions for later expansion
//===----------------------------------------------------------------------===//
-//:TODO: Optimize this for wider types AND optimize the following code
+//: TODO: Optimize this for wider types AND optimize the following code
// compile int foo(char a, char b, char c, char d) {return d+b;}
// looks like a missed sext_inreg opportunity.
-def SEXT : ExtensionPseudo<
- (outs DREGS:$dst),
- (ins GPR8:$src),
- "sext\t$dst, $src",
- [(set i16:$dst, (sext i8:$src)), (implicit SREG)]
->;
-
-def ZEXT : ExtensionPseudo<
- (outs DREGS:$dst),
- (ins GPR8:$src),
- "zext\t$dst, $src",
- [(set i16:$dst, (zext i8:$src)), (implicit SREG)]
->;
+def SEXT
+ : ExtensionPseudo<(outs DREGS
+ : $dst),
+ (ins GPR8
+ : $src),
+ "sext\t$dst, $src",
+ [(set i16
+ : $dst, (sext i8
+ : $src)),
+ (implicit SREG)]>;
+
+def ZEXT
+ : ExtensionPseudo<(outs DREGS
+ : $dst),
+ (ins GPR8
+ : $src),
+ "zext\t$dst, $src",
+ [(set i16
+ : $dst, (zext i8
+ : $src)),
+ (implicit SREG)]>;
// This pseudo gets expanded into a movw+adiw thus it clobbers SREG.
let Defs = [SREG],
- hasSideEffects = 0 in
-def FRMIDX : Pseudo<(outs DLDREGS:$dst),
- (ins DLDREGS:$src, i16imm:$src2),
- "frmidx\t$dst, $src, $src2",
- []>;
+ hasSideEffects = 0 in def FRMIDX : Pseudo<(outs DLDREGS
+ : $dst),
+ (ins DLDREGS
+ : $src, i16imm
+ : $src2),
+ "frmidx\t$dst, $src, $src2", []>;
// This pseudo is either converted to a regular store or a push which clobbers
// SP.
-def STDSPQRr : StorePseudo<
- (outs),
- (ins memspi:$dst, GPR8:$src),
- "stdstk\t$dst, $src",
- [(store i8:$src, addr:$dst)]
->;
+def STDSPQRr : StorePseudo<(outs),
+ (ins memspi
+ : $dst, GPR8
+ : $src),
+ "stdstk\t$dst, $src", [(store i8
+ : $src, addr
+ : $dst)]>;
// This pseudo is either converted to a regular store or a push which clobbers
// SP.
-def STDWSPQRr : StorePseudo<
- (outs),
- (ins memspi:$dst, DREGS:$src),
- "stdwstk\t$dst, $src",
- [(store i16:$src, addr:$dst)]
->;
+def STDWSPQRr : StorePseudo<(outs),
+ (ins memspi
+ : $dst, DREGS
+ : $src),
+ "stdwstk\t$dst, $src", [(store i16
+ : $src, addr
+ : $dst)]>;
// SP read/write pseudos.
-let hasSideEffects = 0 in
-{
- let Uses = [SP] in
- def SPREAD : Pseudo<
- (outs DREGS:$dst),
- (ins GPRSP:$src),
- "spread\t$dst, $src",
- []
- >;
-
- let Defs = [SP] in
- def SPWRITE : Pseudo<
- (outs GPRSP:$dst),
- (ins DREGS:$src),
- "spwrite\t$dst, $src",
- []>;
+let hasSideEffects = 0 in {
+ let Uses = [SP] in def SPREAD : Pseudo<(outs DREGS
+ : $dst),
+ (ins GPRSP
+ : $src),
+ "spread\t$dst, $src", []>;
+
+ let Defs = [SP] in def SPWRITE : Pseudo<(outs GPRSP
+ : $dst),
+ (ins DREGS
+ : $src),
+ "spwrite\t$dst, $src", []>;
}
-def Select8 : SelectPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$src2, i8imm:$cc),
- "# Select8 PSEUDO",
- [(set i8:$dst, (AVRselectcc i8:$src, i8:$src2, imm:$cc))]
->;
-
-def Select16 : SelectPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, DREGS:$src2, i8imm:$cc),
- "# Select16 PSEUDO",
- [(set i16:$dst, (AVRselectcc i16:$src, i16:$src2, imm:$cc))]
->;
-
-def Lsl8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Lsl8 PSEUDO",
- [(set i8:$dst, (AVRlslLoop i8:$src, i8:$cnt))]
->;
-
-def Lsl16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Lsl16 PSEUDO",
- [(set i16:$dst, (AVRlslLoop i16:$src, i8:$cnt))]
->;
-
-def Lsr8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Lsr8 PSEUDO",
- [(set i8:$dst, (AVRlsrLoop i8:$src, i8:$cnt))]
->;
-
-def Lsr16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Lsr16 PSEUDO",
- [(set i16:$dst, (AVRlsrLoop i16:$src, i8:$cnt))]
->;
-
-def Rol8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Rol8 PSEUDO",
- [(set i8:$dst, (AVRrolLoop i8:$src, i8:$cnt))]
->;
-
-def Rol16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Rol16 PSEUDO",
- [(set i16:$dst, (AVRrolLoop i16:$src, i8:$cnt))]
->;
-
-def Ror8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Ror8 PSEUDO",
- [(set i8:$dst, (AVRrorLoop i8:$src, i8:$cnt))]
->;
-
-def Ror16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Ror16 PSEUDO",
- [(set i16:$dst, (AVRrorLoop i16:$src, i8:$cnt))]
->;
-
-def Asr8 : ShiftPseudo<
- (outs GPR8:$dst),
- (ins GPR8:$src, GPR8:$cnt),
- "# Asr8 PSEUDO",
- [(set i8:$dst, (AVRasrLoop i8:$src, i8:$cnt))]
->;
-
-def Asr16 : ShiftPseudo<
- (outs DREGS:$dst),
- (ins DREGS:$src, GPR8:$cnt),
- "# Asr16 PSEUDO",
- [(set i16:$dst, (AVRasrLoop i16:$src, i8:$cnt))]
->;
-
+def Select8 : SelectPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $src2, i8imm
+ : $cc),
+ "# Select8 PSEUDO", [(set i8
+ : $dst, (AVRselectcc i8
+ : $src, i8
+ : $src2, imm
+ : $cc))]>;
+
+def Select16 : SelectPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, DREGS
+ : $src2, i8imm
+ : $cc),
+ "# Select16 PSEUDO", [(set i16
+ : $dst, (AVRselectcc i16
+ : $src, i16
+ : $src2, imm
+ : $cc))]>;
+
+def Lsl8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Lsl8 PSEUDO", [(set i8
+ : $dst, (AVRlslLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Lsl16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Lsl16 PSEUDO", [(set i16
+ : $dst, (AVRlslLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Lsr8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Lsr8 PSEUDO", [(set i8
+ : $dst, (AVRlsrLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Lsr16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Lsr16 PSEUDO", [(set i16
+ : $dst, (AVRlsrLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Rol8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Rol8 PSEUDO", [(set i8
+ : $dst, (AVRrolLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Rol16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Rol16 PSEUDO", [(set i16
+ : $dst, (AVRrolLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Ror8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Ror8 PSEUDO", [(set i8
+ : $dst, (AVRrorLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Ror16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Ror16 PSEUDO", [(set i16
+ : $dst, (AVRrorLoop i16
+ : $src, i8
+ : $cnt))]>;
+
+def Asr8 : ShiftPseudo<(outs GPR8
+ : $dst),
+ (ins GPR8
+ : $src, GPR8
+ : $cnt),
+ "# Asr8 PSEUDO", [(set i8
+ : $dst, (AVRasrLoop i8
+ : $src, i8
+ : $cnt))]>;
+
+def Asr16 : ShiftPseudo<(outs DREGS
+ : $dst),
+ (ins DREGS
+ : $src, GPR8
+ : $cnt),
+ "# Asr16 PSEUDO", [(set i16
+ : $dst, (AVRasrLoop i16
+ : $src, i8
+ : $cnt))]>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//===----------------------------------------------------------------------===//
-//:TODO: look in x86InstrCompiler.td for odd encoding trick related to
+//: TODO: look in x86InstrCompiler.td for odd encoding trick related to
// add x, 128 -> sub x, -128. Clang is emitting an eor for this (ldi+eor)
// the add instruction always writes the carry flag
-def : Pat<(addc i8:$src, i8:$src2),
- (ADDRdRr i8:$src, i8:$src2)>;
-def : Pat<(addc DREGS:$src, DREGS:$src2),
- (ADDWRdRr DREGS:$src, DREGS:$src2)>;
+def : Pat<(addc i8 : $src, i8 : $src2), (ADDRdRr i8 : $src, i8 : $src2)>;
+def : Pat<(addc DREGS
+ : $src, DREGS
+ : $src2),
+ (ADDWRdRr DREGS
+ : $src, DREGS
+ : $src2)>;
// all sub instruction variants always writes the carry flag
-def : Pat<(subc i8:$src, i8:$src2),
- (SUBRdRr i8:$src, i8:$src2)>;
-def : Pat<(subc i16:$src, i16:$src2),
- (SUBWRdRr i16:$src, i16:$src2)>;
-def : Pat<(subc i8:$src, imm:$src2),
- (SUBIRdK i8:$src, imm:$src2)>;
-def : Pat<(subc i16:$src, imm:$src2),
- (SUBIWRdK i16:$src, imm:$src2)>;
+def : Pat<(subc i8 : $src, i8 : $src2), (SUBRdRr i8 : $src, i8 : $src2)>;
+def : Pat<(subc i16 : $src, i16 : $src2), (SUBWRdRr i16 : $src, i16 : $src2)>;
+def : Pat<(subc i8 : $src, imm : $src2), (SUBIRdK i8 : $src, imm : $src2)>;
+def : Pat<(subc i16 : $src, imm : $src2), (SUBIWRdK i16 : $src, imm : $src2)>;
// These patterns convert add (x, -imm) to sub (x, imm) since we dont have
// any add with imm instructions. Also take care of the adiw/sbiw instructions.
-def : Pat<(add i16:$src1, imm0_63_neg:$src2),
- (SBIWRdK i16:$src1, (imm0_63_neg:$src2))>;
-def : Pat<(add i16:$src1, imm:$src2),
- (SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>;
-def : Pat<(addc i16:$src1, imm:$src2),
- (SUBIWRdK i16:$src1, (imm16_neg_XFORM imm:$src2))>;
-
-def : Pat<(add i8:$src1, imm:$src2),
- (SUBIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>;
-def : Pat<(addc i8:$src1, imm:$src2),
- (SUBIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>;
-def : Pat<(adde i8:$src1, imm:$src2),
- (SBCIRdK i8:$src1, (imm8_neg_XFORM imm:$src2))>;
+def : Pat<(add i16
+ : $src1, imm0_63_neg
+ : $src2),
+ (SBIWRdK i16
+ : $src1, (imm0_63_neg
+ : $src2))>;
+def : Pat<(add i16
+ : $src1, imm
+ : $src2),
+ (SUBIWRdK i16
+ : $src1, (imm16_neg_XFORM imm
+ : $src2))>;
+def : Pat<(addc i16
+ : $src1, imm
+ : $src2),
+ (SUBIWRdK i16
+ : $src1, (imm16_neg_XFORM imm
+ : $src2))>;
+
+def : Pat<(add i8
+ : $src1, imm
+ : $src2),
+ (SUBIRdK i8
+ : $src1, (imm8_neg_XFORM imm
+ : $src2))>;
+def : Pat<(addc i8
+ : $src1, imm
+ : $src2),
+ (SUBIRdK i8
+ : $src1, (imm8_neg_XFORM imm
+ : $src2))>;
+def : Pat<(adde i8
+ : $src1, imm
+ : $src2),
+ (SBCIRdK i8
+ : $src1, (imm8_neg_XFORM imm
+ : $src2))>;
// Calls.
-def : Pat<(AVRcall (i16 tglobaladdr:$dst)),
- (CALLk tglobaladdr:$dst)>;
-def : Pat<(AVRcall (i16 texternalsym:$dst)),
- (CALLk texternalsym:$dst)>;
+def : Pat<(AVRcall(i16 tglobaladdr : $dst)), (CALLk tglobaladdr : $dst)>;
+def : Pat<(AVRcall(i16 texternalsym : $dst)), (CALLk texternalsym : $dst)>;
// `anyext`
-def : Pat<(i16 (anyext i8:$src)),
- (INSERT_SUBREG (i16 (IMPLICIT_DEF)), i8:$src, sub_lo)>;
+def : Pat<(i16(anyext i8
+ : $src)),
+ (INSERT_SUBREG(i16(IMPLICIT_DEF)), i8
+ : $src, sub_lo)>;
// `trunc`
-def : Pat<(i8 (trunc i16:$src)),
- (EXTRACT_SUBREG i16:$src, sub_lo)>;
+def : Pat<(i8(trunc i16 : $src)), (EXTRACT_SUBREG i16 : $src, sub_lo)>;
// sext_inreg
-def : Pat<(sext_inreg i16:$src, i8),
- (SEXT (i8 (EXTRACT_SUBREG i16:$src, sub_lo)))>;
+def : Pat<(sext_inreg i16
+ : $src, i8),
+ (SEXT(i8(EXTRACT_SUBREG i16
+ : $src, sub_lo)))>;
// GlobalAddress
-def : Pat<(i16 (AVRWrapper tglobaladdr:$dst)),
- (LDIWRdK tglobaladdr:$dst)>;
-def : Pat<(add i16:$src, (AVRWrapper tglobaladdr:$src2)),
- (SUBIWRdK i16:$src, tglobaladdr:$src2)>;
-def : Pat<(i8 (load (AVRWrapper tglobaladdr:$dst))),
- (LDSRdK tglobaladdr:$dst)>;
-def : Pat<(i16 (load (AVRWrapper tglobaladdr:$dst))),
- (LDSWRdK tglobaladdr:$dst)>;
-def : Pat<(store i8:$src, (i16 (AVRWrapper tglobaladdr:$dst))),
- (STSKRr tglobaladdr:$dst, i8:$src)>;
-def : Pat<(store i16:$src, (i16 (AVRWrapper tglobaladdr:$dst))),
- (STSWKRr tglobaladdr:$dst, i16:$src)>;
+def : Pat<(i16(AVRWrapper tglobaladdr : $dst)), (LDIWRdK tglobaladdr : $dst)>;
+def : Pat<(add i16
+ : $src, (AVRWrapper tglobaladdr
+ : $src2)),
+ (SUBIWRdK i16
+ : $src, tglobaladdr
+ : $src2)>;
+def : Pat<(i8(load(AVRWrapper tglobaladdr
+ : $dst))),
+ (LDSRdK tglobaladdr
+ : $dst)>;
+def : Pat<(i16(load(AVRWrapper tglobaladdr
+ : $dst))),
+ (LDSWRdK tglobaladdr
+ : $dst)>;
+def : Pat<(store i8
+ : $src, (i16(AVRWrapper tglobaladdr
+ : $dst))),
+ (STSKRr tglobaladdr
+ : $dst, i8
+ : $src)>;
+def : Pat<(store i16
+ : $src, (i16(AVRWrapper tglobaladdr
+ : $dst))),
+ (STSWKRr tglobaladdr
+ : $dst, i16
+ : $src)>;
// BlockAddress
-def : Pat<(i16 (AVRWrapper tblockaddress:$dst)),
- (LDIWRdK tblockaddress:$dst)>;
+def : Pat<(i16(AVRWrapper tblockaddress
+ : $dst)),
+ (LDIWRdK tblockaddress
+ : $dst)>;
-def : Pat<(i8 (trunc (AVRlsrwn DLDREGS:$src, (i16 8)))),
- (EXTRACT_SUBREG DREGS:$src, sub_hi)>;
+def : Pat<(i8(trunc(AVRlsrwn DLDREGS
+ : $src, (i16 8)))),
+ (EXTRACT_SUBREG DREGS
+ : $src, sub_hi)>;
// :FIXME: DAGCombiner produces an shl node after legalization from these seq:
// BR_JT -> (mul x, 2) -> (shl x, 1)
-def : Pat<(shl i16:$src1, (i8 1)),
- (LSLWRd i16:$src1)>;
+def : Pat<(shl i16 : $src1, (i8 1)), (LSLWRd i16 : $src1)>;
// Lowering of 'tst' node to 'TST' instruction.
// TST is an alias of AND Rd, Rd.
-def : Pat<(AVRtst i8:$rd),
- (ANDRdRr GPR8:$rd, GPR8:$rd)>;
+def : Pat<(AVRtst i8 : $rd), (ANDRdRr GPR8 : $rd, GPR8 : $rd)>;
// Lowering of 'lsl' node to 'LSL' instruction.
// LSL is an alias of 'ADD Rd, Rd'
-def : Pat<(AVRlsl i8:$rd),
- (ADDRdRr GPR8:$rd, GPR8:$rd)>;
-
+def : Pat<(AVRlsl i8 : $rd), (ADDRdRr GPR8 : $rd, GPR8 : $rd)>;
diff --git a/llvm/lib/Target/AVR/AVRMCInstLower.cpp b/llvm/lib/Target/AVR/AVRMCInstLower.cpp
index 49a318762b63..2b8711656139 100644
--- a/llvm/lib/Target/AVR/AVRMCInstLower.cpp
+++ b/llvm/lib/Target/AVR/AVRMCInstLower.cpp
@@ -29,7 +29,9 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
bool IsNegated = false;
- if (TF & AVRII::MO_NEG) { IsNegated = true; }
+ if (TF & AVRII::MO_NEG) {
+ IsNegated = true;
+ }
if (!MO.isJTI() && MO.getOffset()) {
Expr = MCBinaryExpr::createAdd(
@@ -59,7 +61,8 @@ MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
return MCOperand::createExpr(Expr);
}
-void AVRMCInstLower::lowerInstruction(const MachineInstr &MI, MCInst &OutMI) const {
+void AVRMCInstLower::lowerInstruction(const MachineInstr &MI,
+ MCInst &OutMI) const {
OutMI.setOpcode(MI.getOpcode());
for (MachineOperand const &MO : MI.operands()) {
@@ -108,4 +111,3 @@ void AVRMCInstLower::lowerInstruction(const MachineInstr &MI, MCInst &OutMI) con
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AVRMCInstLower.h b/llvm/lib/Target/AVR/AVRMCInstLower.h
index 5e0f42ac16a7..7ad6d472ad87 100644
--- a/llvm/lib/Target/AVR/AVRMCInstLower.h
+++ b/llvm/lib/Target/AVR/AVRMCInstLower.h
@@ -39,4 +39,3 @@ private:
} // end namespace llvm
#endif // LLVM_AVR_MCINST_LOWER_H
-
diff --git a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
index 5432fac122ef..8b1c247eb6a7 100644
--- a/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
+++ b/llvm/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -55,8 +55,10 @@ public:
CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {
unsigned CallConv = MF.getFunction().getCallingConv();
- this->IsInterruptHandler = CallConv == CallingConv::AVR_INTR || MF.getFunction().hasFnAttribute("interrupt");
- this->IsSignalHandler = CallConv == CallingConv::AVR_SIGNAL || MF.getFunction().hasFnAttribute("signal");
+ this->IsInterruptHandler = CallConv == CallingConv::AVR_INTR ||
+ MF.getFunction().hasFnAttribute("interrupt");
+ this->IsSignalHandler = CallConv == CallingConv::AVR_SIGNAL ||
+ MF.getFunction().hasFnAttribute("signal");
}
bool getHasSpills() const { return HasSpills; }
@@ -69,7 +71,9 @@ public:
void setHasStackArgs(bool B) { HasStackArgs = B; }
/// Checks if the function is some form of interrupt service routine.
- bool isInterruptOrSignalHandler() const { return isInterruptHandler() || isSignalHandler(); }
+ bool isInterruptOrSignalHandler() const {
+ return isInterruptHandler() || isSignalHandler();
+ }
bool isInterruptHandler() const { return IsInterruptHandler; }
bool isSignalHandler() const { return IsSignalHandler; }
@@ -81,6 +85,6 @@ public:
void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
};
-} // end llvm namespace
+} // namespace llvm
#endif // LLVM_AVR_MACHINE_FUNCTION_INFO_H
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 2a4905ce2461..1886debaf492 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -17,8 +17,8 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/IR/Function.h"
#include "AVR.h"
#include "AVRInstrInfo.h"
@@ -37,9 +37,8 @@ const uint16_t *
AVRRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const AVRMachineFunctionInfo *AFI = MF->getInfo<AVRMachineFunctionInfo>();
- return AFI->isInterruptOrSignalHandler()
- ? CSR_Interrupts_SaveList
- : CSR_Normal_SaveList;
+ return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_SaveList
+ : CSR_Normal_SaveList;
}
const uint32_t *
@@ -47,9 +46,8 @@ AVRRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
- return AFI->isInterruptOrSignalHandler()
- ? CSR_Interrupts_RegMask
- : CSR_Normal_RegMask;
+ return AFI->isInterruptOrSignalHandler() ? CSR_Interrupts_RegMask
+ : CSR_Normal_RegMask;
}
BitVector AVRRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -207,7 +205,8 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// If the offset is too big we have to adjust and restore the frame pointer
// to materialize a valid load/store with displacement.
- //:TODO: consider using only one adiw/sbiw chain for more than one frame index
+ //: TODO: consider using only one adiw/sbiw chain for more than one frame
+ //: index
if (Offset > 62) {
unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK;
int AddOffset = Offset - 63 + 1;
@@ -276,18 +275,16 @@ void AVRRegisterInfo::splitReg(Register Reg, Register &LoReg,
HiReg = getSubReg(Reg, AVR::sub_hi);
}
-bool AVRRegisterInfo::shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
- LiveIntervals &LIS) const {
- if(this->getRegClass(AVR::PTRDISPREGSRegClassID)->hasSubClassEq(NewRC)) {
+bool AVRRegisterInfo::shouldCoalesce(
+ MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg,
+ const TargetRegisterClass *DstRC, unsigned DstSubReg,
+ const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
+ if (this->getRegClass(AVR::PTRDISPREGSRegClassID)->hasSubClassEq(NewRC)) {
return false;
}
- return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg, NewRC, LIS);
+ return TargetRegisterInfo::shouldCoalesce(MI, SrcRC, SubReg, DstRC, DstSubReg,
+ NewRC, LIS);
}
} // end of namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.h b/llvm/lib/Target/AVR/AVRRegisterInfo.h
index 23439f2fe195..fa27d9283209 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -51,12 +51,9 @@ public:
/// \param Reg A 16-bit register to split.
void splitReg(Register Reg, Register &LoReg, Register &HiReg) const;
- bool shouldCoalesce(MachineInstr *MI,
- const TargetRegisterClass *SrcRC,
- unsigned SubReg,
- const TargetRegisterClass *DstRC,
- unsigned DstSubReg,
- const TargetRegisterClass *NewRC,
+ bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
+ unsigned SubReg, const TargetRegisterClass *DstRC,
+ unsigned DstSubReg, const TargetRegisterClass *NewRC,
LiveIntervals &LIS) const override;
};
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td
index 1948fcbaf75a..bb4e86ca0536 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -11,12 +11,8 @@
//===----------------------------------------------------------------------===//
// 8-bit General purpose register definition.
-class AVRReg<bits<16> num,
- string name,
- list<Register> subregs = [],
- list<string> altNames = []>
- : RegisterWithSubRegs<name, subregs>
-{
+class AVRReg<bits<16> num, string name, list<Register> subregs = [],
+ list<string> altNames = []> : RegisterWithSubRegs<name, subregs> {
field bits<16> Num = num;
let HWEncoding = num;
@@ -26,31 +22,27 @@ class AVRReg<bits<16> num,
}
// Subregister indices.
-let Namespace = "AVR" in
-{
+let Namespace = "AVR" in {
def sub_lo : SubRegIndex<8>;
def sub_hi : SubRegIndex<8, 8>;
}
-let Namespace = "AVR" in {
- def ptr : RegAltNameIndex;
-}
-
+let Namespace = "AVR" in { def ptr : RegAltNameIndex; }
//===----------------------------------------------------------------------===//
// 8-bit general purpose registers
//===----------------------------------------------------------------------===//
-def R0 : AVRReg<0, "r0">, DwarfRegNum<[0]>;
-def R1 : AVRReg<1, "r1">, DwarfRegNum<[1]>;
-def R2 : AVRReg<2, "r2">, DwarfRegNum<[2]>;
-def R3 : AVRReg<3, "r3">, DwarfRegNum<[3]>;
-def R4 : AVRReg<4, "r4">, DwarfRegNum<[4]>;
-def R5 : AVRReg<5, "r5">, DwarfRegNum<[5]>;
-def R6 : AVRReg<6, "r6">, DwarfRegNum<[6]>;
-def R7 : AVRReg<7, "r7">, DwarfRegNum<[7]>;
-def R8 : AVRReg<8, "r8">, DwarfRegNum<[8]>;
-def R9 : AVRReg<9, "r9">, DwarfRegNum<[9]>;
+def R0 : AVRReg<0, "r0">, DwarfRegNum<[0]>;
+def R1 : AVRReg<1, "r1">, DwarfRegNum<[1]>;
+def R2 : AVRReg<2, "r2">, DwarfRegNum<[2]>;
+def R3 : AVRReg<3, "r3">, DwarfRegNum<[3]>;
+def R4 : AVRReg<4, "r4">, DwarfRegNum<[4]>;
+def R5 : AVRReg<5, "r5">, DwarfRegNum<[5]>;
+def R6 : AVRReg<6, "r6">, DwarfRegNum<[6]>;
+def R7 : AVRReg<7, "r7">, DwarfRegNum<[7]>;
+def R8 : AVRReg<8, "r8">, DwarfRegNum<[8]>;
+def R9 : AVRReg<9, "r9">, DwarfRegNum<[9]>;
def R10 : AVRReg<10, "r10">, DwarfRegNum<[10]>;
def R11 : AVRReg<11, "r11">, DwarfRegNum<[11]>;
def R12 : AVRReg<12, "r12">, DwarfRegNum<[12]>;
@@ -76,19 +68,17 @@ def R31 : AVRReg<31, "r31", [], ["zh"]>, DwarfRegNum<[31]>;
def SPL : AVRReg<32, "SPL">, DwarfRegNum<[32]>;
def SPH : AVRReg<33, "SPH">, DwarfRegNum<[33]>;
-let SubRegIndices = [sub_lo, sub_hi],
-CoveredBySubRegs = 1 in
-{
+let SubRegIndices = [sub_lo, sub_hi], CoveredBySubRegs = 1 in {
// 16 bit GPR pairs.
- def SP : AVRReg<32, "SP", [SPL, SPH]>, DwarfRegNum<[32]>;
+ def SP : AVRReg<32, "SP", [SPL, SPH]>, DwarfRegNum<[32]>;
// The pointer registers (X,Y,Z) are a special case because they
// are printed as a `high:low` pair when a DREG is expected,
// but printed using `X`, `Y`, `Z` when a pointer register is expected.
let RegAltNameIndices = [ptr] in {
- def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>;
- def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>;
- def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>;
+ def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>;
+ def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>;
+ def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>;
}
def R25R24 : AVRReg<24, "r25:r24", [R24, R25]>, DwarfRegNum<[24]>;
def R23R22 : AVRReg<22, "r23:r22", [R22, R23]>, DwarfRegNum<[22]>;
@@ -98,11 +88,11 @@ CoveredBySubRegs = 1 in
def R15R14 : AVRReg<14, "r15:r14", [R14, R15]>, DwarfRegNum<[14]>;
def R13R12 : AVRReg<12, "r13:r12", [R12, R13]>, DwarfRegNum<[12]>;
def R11R10 : AVRReg<10, "r11:r10", [R10, R11]>, DwarfRegNum<[10]>;
- def R9R8 : AVRReg<8, "r9:r8", [R8, R9]>, DwarfRegNum<[8]>;
- def R7R6 : AVRReg<6, "r7:r6", [R6, R7]>, DwarfRegNum<[6]>;
- def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>;
- def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>;
- def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>;
+ def R9R8 : AVRReg<8, "r9:r8", [R8, R9]>, DwarfRegNum<[8]>;
+ def R7R6 : AVRReg<6, "r7:r6", [R6, R7]>, DwarfRegNum<[6]>;
+ def R5R4 : AVRReg<4, "r5:r4", [R4, R5]>, DwarfRegNum<[4]>;
+ def R3R2 : AVRReg<2, "r3:r2", [R2, R3]>, DwarfRegNum<[2]>;
+ def R1R0 : AVRReg<0, "r1:r0", [R0, R1]>, DwarfRegNum<[0]>;
// Pseudo registers for unaligned i16
def R26R25 : AVRReg<25, "r26:r25", [R25, R26]>, DwarfRegNum<[25]>;
@@ -113,7 +103,7 @@ CoveredBySubRegs = 1 in
def R16R15 : AVRReg<15, "r16:r15", [R15, R16]>, DwarfRegNum<[15]>;
def R14R13 : AVRReg<13, "r14:r13", [R13, R14]>, DwarfRegNum<[13]>;
def R12R11 : AVRReg<11, "r12:r11", [R11, R12]>, DwarfRegNum<[11]>;
- def R10R9 : AVRReg<9, "r10:r9", [R9, R10]>, DwarfRegNum<[9]>;
+ def R10R9 : AVRReg<9, "r10:r9", [R9, R10]>, DwarfRegNum<[9]>;
}
//===----------------------------------------------------------------------===//
@@ -122,81 +112,71 @@ CoveredBySubRegs = 1 in
// Main 8-bit register class.
def GPR8 : RegisterClass<"AVR", [i8], 8,
- (
- // Return value and argument registers.
- add R24, R25, R18, R19, R20, R21, R22, R23,
- // Scratch registers.
- R30, R31, R26, R27,
- // Callee saved registers.
- R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
- R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
- )>;
+ (
+ // Return value and argument registers.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
+ R9, R8, R7, R6, R5, R4, R3, R2, R0, R1)>;
// Simple lower registers r0..r15
def GPR8lo : RegisterClass<"AVR", [i8], 8,
- (
- add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
- )>;
+ (add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6,
+ R5, R4, R3, R2, R0, R1)>;
// 8-bit register class for instructions which take immediates.
def LD8 : RegisterClass<"AVR", [i8], 8,
- (
- // Return value and arguments.
- add R24, R25, R18, R19, R20, R21, R22, R23,
- // Scratch registers.
- R30, R31, R26, R27,
- // Callee saved registers.
- R28, R29, R17, R16
- )>;
+ (
+ // Return value and arguments.
+ add R24, R25, R18, R19, R20, R21, R22, R23,
+ // Scratch registers.
+ R30, R31, R26, R27,
+ // Callee saved registers.
+ R28, R29, R17, R16)>;
// Simple lower registers r16..r23
def LD8lo : RegisterClass<"AVR", [i8], 8,
- (
- add R23, R22, R21, R20, R19, R18, R17, R16
- )>;
+ (add R23, R22, R21, R20, R19, R18, R17, R16)>;
// Main 16-bit pair register class.
def DREGS : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28, R17R16, R15R14, R13R12, R11R10,
- R9R8, R7R6, R5R4, R3R2, R1R0,
- // Pseudo regs for unaligned 16-bits
- R26R25, R24R23, R22R21,
- R20R19, R18R17, R16R15,
- R14R13, R12R11, R10R9
- )>;
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
+ R7R6, R5R4, R3R2, R1R0,
+ // Pseudo regs for unaligned 16-bits
+ R26R25, R24R23, R22R21, R20R19, R18R17, R16R15,
+ R14R13, R12R11, R10R9)>;
// Lower 16-bit pair registers in R0..R15, only used in inline assembly.
-def DREGSlo : RegisterClass<"AVR", [i16], 8,
- (
- add R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2, R1R0
- )>;
+def DREGSlo
+ : RegisterClass<"AVR", [i16], 8,
+ (add R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2, R1R0)>;
// Lower 16-bit pair registers in r16..r23, only used in inline assembly.
def DREGSLD8lo : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R19R18, R21R20, R23R22,
- // Callee saved registers.
- R17R16
- )>;
+ (
+ // Return value and arguments.
+ add R19R18, R21R20, R23R22,
+ // Callee saved registers.
+ R17R16)>;
// 16-bit pair register class for movw
def DREGSMOVW : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28, R17R16, R15R14, R13R12, R11R10,
- R9R8, R7R6, R5R4, R3R2, R1R0
- )>;
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
+ R7R6, R5R4, R3R2, R1R0)>;
// The 16-bit DREGS register class, excluding the Z pointer register.
//
@@ -207,66 +187,59 @@ def DREGSMOVW : RegisterClass<"AVR", [i16], 8,
// cannot use Z; it's simply a workaround a regalloc bug.
//
// More information can be found in PR39553.
-def DREGS_WITHOUT_YZ_WORKAROUND : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R27R26,
- // Callee saved registers.
- R17R16, R15R14, R13R12, R11R10,
- R9R8, R7R6, R5R4, R3R2, R1R0
- )>;
+def DREGS_WITHOUT_YZ_WORKAROUND
+ : RegisterClass<"AVR", [i16], 8,
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R27R26,
+ // Callee saved registers.
+ R17R16, R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2,
+ R1R0)>;
// 16-bit register class for immediate instructions.
def DLDREGS : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24, R19R18, R21R20, R23R22,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28, R17R16
- )>;
+ (
+ // Return value and arguments.
+ add R25R24, R19R18, R21R20, R23R22,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28, R17R16)>;
// 16-bit register class for the adiw/sbiw instructions.
def IWREGS : RegisterClass<"AVR", [i16], 8,
- (
- // Return value and arguments.
- add R25R24,
- // Scratch registers.
- R31R30, R27R26,
- // Callee saved registers.
- R29R28
- )>;
+ (
+ // Return value and arguments.
+ add R25R24,
+ // Scratch registers.
+ R31R30, R27R26,
+ // Callee saved registers.
+ R29R28)>;
// 16-bit register class for the ld and st instructions.
// AKA X,Y, and Z
def PTRREGS : RegisterClass<"AVR", [i16], 8,
- (
- add R27R26, // X
- R29R28, // Y
- R31R30 // Z
- ), ptr>;
+ (add R27R26, // X
+ R29R28, // Y
+ R31R30 // Z
+ ),
+ ptr>;
// 16-bit register class for the ldd and std instructions.
// AKA Y and Z.
-def PTRDISPREGS : RegisterClass<"AVR", [i16], 8,
- (
- add R31R30, R29R28
- ), ptr>;
+def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, (add R31R30, R29R28), ptr>;
// We have a bunch of instructions with an explicit Z register argument. We
// model this using a register class containing only the Z register.
def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
// Register class used for the stack read pseudo instruction.
-def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>;
+def GPRSP : RegisterClass<"AVR", [i16], 8, (add SP)>;
// Status register.
def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>;
-def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)>
-{
- let CopyCost = -1; // Don't allow copying of status registers
+def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)> {
+ let CopyCost = -1; // Don't allow copying of status registers
}
-
diff --git a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp b/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
index 7d2d19de7578..76f29eb9f369 100644
--- a/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
+++ b/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
@@ -84,8 +84,7 @@ bool AVRRelaxMem::runOnBasicBlock(Block &MBB) {
return Modified;
}
-template <>
-bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
+template <> bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
MachineOperand &Ptr = MI.getOperand(0);
@@ -96,24 +95,23 @@ bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
if (Imm > 63) {
// Push the previous state of the pointer register.
// This instruction must preserve the value.
- buildMI(MBB, MBBI, AVR::PUSHWRr)
- .addReg(Ptr.getReg());
+ buildMI(MBB, MBBI, AVR::PUSHWRr).addReg(Ptr.getReg());
// Add the immediate to the pointer register.
buildMI(MBB, MBBI, AVR::SBCIWRdK)
- .addReg(Ptr.getReg(), RegState::Define)
- .addReg(Ptr.getReg())
- .addImm(-Imm);
+ .addReg(Ptr.getReg(), RegState::Define)
+ .addReg(Ptr.getReg())
+ .addImm(-Imm);
// Store the value in the source register to the address
// pointed to by the pointer register.
buildMI(MBB, MBBI, AVR::STWPtrRr)
- .addReg(Ptr.getReg())
- .addReg(Src.getReg(), getKillRegState(Src.isKill()));
+ .addReg(Ptr.getReg())
+ .addReg(Src.getReg(), getKillRegState(Src.isKill()));
// Pop the original state of the pointer register.
buildMI(MBB, MBBI, AVR::POPWRd)
- .addDef(Ptr.getReg(), getKillRegState(Ptr.isKill()));
+ .addDef(Ptr.getReg(), getKillRegState(Ptr.isKill()));
MI.removeFromParent();
}
@@ -125,21 +123,19 @@ bool AVRRelaxMem::runOnInstruction(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
int Opcode = MBBI->getOpcode();
-#define RELAX(Op) \
- case Op: \
+#define RELAX(Op) \
+ case Op: \
return relax<Op>(MBB, MI)
- switch (Opcode) {
- RELAX(AVR::STDWPtrQRr);
- }
+ switch (Opcode) { RELAX(AVR::STDWPtrQRr); }
#undef RELAX
return false;
}
} // end of anonymous namespace
-INITIALIZE_PASS(AVRRelaxMem, "avr-relax-mem",
- AVR_RELAX_MEM_OPS_NAME, false, false)
+INITIALIZE_PASS(AVRRelaxMem, "avr-relax-mem", AVR_RELAX_MEM_OPS_NAME, false,
+ false)
namespace llvm {
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp
index 601865120491..990e1c57e63f 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -13,7 +13,7 @@
#include "AVRSubtarget.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "AVR.h"
#include "AVRTargetMachine.h"
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h
index 7d49e43a83f5..90b9cd4da7c1 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -39,10 +39,18 @@ public:
const AVRTargetMachine &TM);
const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; }
- const AVRTargetLowering *getTargetLowering() const override { return &TLInfo; }
- const AVRSelectionDAGInfo *getSelectionDAGInfo() const override { return &TSInfo; }
- const AVRRegisterInfo *getRegisterInfo() const override { return &InstrInfo.getRegisterInfo(); }
+ const TargetFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const AVRTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const AVRSelectionDAGInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+ const AVRRegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
/// Parses a subtarget feature string, setting appropriate options.
/// \note Definition of function is auto generated by `tblgen`.
@@ -84,7 +92,6 @@ public:
}
private:
-
/// The ELF e_flags architecture.
unsigned ELFArch;
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 5be4260ce035..65740f7c2306 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "AVR.h"
#include "AVRTargetObjectFile.h"
@@ -25,7 +25,8 @@
namespace llvm {
-static const char *AVRDataLayout = "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8";
+static const char *AVRDataLayout =
+ "e-P1-p:16:8-i8:8-i16:8-i32:8-i64:8-f32:8-f64:8-n8-a:8";
/// Processes a CPU name.
static StringRef getCPU(StringRef CPU) {
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.h b/llvm/lib/Target/AVR/AVRTargetMachine.h
index f9015c8741ea..54669eda060c 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.h
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.h
@@ -29,8 +29,7 @@ class AVRTargetMachine : public LLVMTargetMachine {
public:
AVRTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
- Optional<Reloc::Model> RM,
- Optional<CodeModel::Model> CM,
+ Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM,
CodeGenOpt::Level OL, bool JIT);
const AVRSubtarget *getSubtargetImpl() const;
@@ -42,10 +41,6 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- bool isMachineVerifierClean() const override {
- return false;
- }
-
private:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
AVRSubtarget SubTarget;
diff --git a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
index 14206cdb8276..c7715ca1f51b 100644
--- a/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -24,10 +24,8 @@ void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
Ctx.getELFSection(".progmem.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
}
-MCSection *
-AVRTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
- SectionKind Kind,
- const TargetMachine &TM) const {
+MCSection *AVRTargetObjectFile::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
// Global values in flash memory are placed in the progmem.data section
// unless they already have a user assigned section.
if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection() && Kind.isReadOnly())
@@ -37,4 +35,3 @@ AVRTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
return Base::SelectSectionForGlobal(GO, Kind, TM);
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 19f769270569..95ecd28200ba 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -25,9 +25,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include <sstream>
@@ -170,9 +170,11 @@ public:
}
bool isImmCom8() const {
- if (!isImm()) return false;
+ if (!isImm())
+ return false;
const auto *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ if (!CE)
+ return false;
int64_t Value = CE->getValue();
return isUInt<8>(Value);
}
@@ -322,11 +324,16 @@ bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
switch (MatchResult) {
- case Match_Success: return emit(Inst, Loc, Out);
- case Match_MissingFeature: return missingFeature(Loc, ErrorInfo);
- case Match_InvalidOperand: return invalidOperand(Loc, Operands, ErrorInfo);
- case Match_MnemonicFail: return Error(Loc, "invalid instruction");
- default: return true;
+ case Match_Success:
+ return emit(Inst, Loc, Out);
+ case Match_MissingFeature:
+ return missingFeature(Loc, ErrorInfo);
+ case Match_InvalidOperand:
+ return invalidOperand(Loc, Operands, ErrorInfo);
+ case Match_MnemonicFail:
+ return Error(Loc, "invalid instruction");
+ default:
+ return true;
}
}
@@ -440,8 +447,7 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
tokens[1].getKind() == AsmToken::Minus)) {
AsmToken::TokenKind CurTok = Parser.getLexer().getKind();
- if (CurTok == AsmToken::Minus ||
- tokens[1].getKind() == AsmToken::Minus) {
+ if (CurTok == AsmToken::Minus || tokens[1].getKind() == AsmToken::Minus) {
isNegated = true;
} else {
assert(CurTok == AsmToken::Plus);
@@ -461,7 +467,7 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
return true;
}
StringRef ModifierName = Parser.getTok().getString();
- ModifierKind = AVRMCExpr::getKindByName(ModifierName.str().c_str());
+ ModifierKind = AVRMCExpr::getKindByName(ModifierName);
if (ModifierKind != AVRMCExpr::VK_AVR_None) {
Parser.Lex();
@@ -469,7 +475,7 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
if (Parser.getTok().getString() == GENERATE_STUBS &&
Parser.getTok().getKind() == AsmToken::Identifier) {
std::string GSModName = ModifierName.str() + "_" + GENERATE_STUBS;
- ModifierKind = AVRMCExpr::getKindByName(GSModName.c_str());
+ ModifierKind = AVRMCExpr::getKindByName(GSModName);
if (ModifierKind != AVRMCExpr::VK_AVR_None)
Parser.Lex(); // Eat gs modifier name
}
@@ -498,8 +504,8 @@ bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
assert(Parser.getTok().getKind() == AsmToken::RParen);
Parser.Lex(); // Eat closing parenthesis
- MCExpr const *Expression = AVRMCExpr::create(ModifierKind, InnerExpression,
- isNegated, getContext());
+ MCExpr const *Expression =
+ AVRMCExpr::create(ModifierKind, InnerExpression, isNegated, getContext());
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(AVROperand::CreateImm(Expression, S, E));
@@ -552,8 +558,7 @@ bool AVRAsmParser::parseOperand(OperandVector &Operands) {
return true;
}
-OperandMatchResultTy
-AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
+OperandMatchResultTy AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
LLVM_DEBUG(dbgs() << "parseMemriOperand()\n");
SMLoc E, S;
@@ -620,7 +625,8 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
bool first = true;
while (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (!first) eatComma();
+ if (!first)
+ eatComma();
first = false;
@@ -670,7 +676,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
Tokens[1].getKind() == AsmToken::Identifier) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(".text");
AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L,
- AVRMCExpr::VK_AVR_None);
+ AVRMCExpr::VK_AVR_None);
return false;
}
@@ -678,7 +684,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
Parser.getLexer().peekTok().getKind() == AsmToken::LParen) {
StringRef ModifierName = Parser.getTok().getString();
AVRMCExpr::VariantKind ModifierKind =
- AVRMCExpr::getKindByName(ModifierName.str().c_str());
+ AVRMCExpr::getKindByName(ModifierName);
if (ModifierKind != AVRMCExpr::VK_AVR_None) {
Parser.Lex();
Parser.Lex(); // Eat the modifier and parenthesis
@@ -722,7 +728,7 @@ unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
int64_t RegNum = Const->getValue();
std::ostringstream RegName;
RegName << "r" << RegNum;
- RegNum = MatchRegisterName(RegName.str().c_str());
+ RegNum = MatchRegisterName(RegName.str());
if (RegNum != AVR::NoRegister) {
Op.makeReg(RegNum);
if (validateOperandClass(Op, Expected) == Match_Success) {
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 8e7251a74dfd..9dcd370b9f1e 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -21,7 +21,7 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -42,7 +42,7 @@ public:
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
};
-}
+} // namespace
static MCDisassembler *createAVRDisassembler(const Target &T,
const MCSubtargetInfo &STI,
@@ -50,7 +50,6 @@ static MCDisassembler *createAVRDisassembler(const Target &T,
return new AVRDisassembler(STI, Ctx);
}
-
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRDisassembler() {
// Register the disassembler.
TargetRegistry::RegisterMCDisassembler(getTheAVRTarget(),
@@ -58,18 +57,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRDisassembler() {
}
static const uint16_t GPRDecoderTable[] = {
- AVR::R0, AVR::R1, AVR::R2, AVR::R3,
- AVR::R4, AVR::R5, AVR::R6, AVR::R7,
- AVR::R8, AVR::R9, AVR::R10, AVR::R11,
- AVR::R12, AVR::R13, AVR::R14, AVR::R15,
- AVR::R16, AVR::R17, AVR::R18, AVR::R19,
- AVR::R20, AVR::R21, AVR::R22, AVR::R23,
- AVR::R24, AVR::R25, AVR::R26, AVR::R27,
- AVR::R28, AVR::R29, AVR::R30, AVR::R31,
+ AVR::R0, AVR::R1, AVR::R2, AVR::R3, AVR::R4, AVR::R5, AVR::R6,
+ AVR::R7, AVR::R8, AVR::R9, AVR::R10, AVR::R11, AVR::R12, AVR::R13,
+ AVR::R14, AVR::R15, AVR::R16, AVR::R17, AVR::R18, AVR::R19, AVR::R20,
+ AVR::R21, AVR::R22, AVR::R23, AVR::R24, AVR::R25, AVR::R26, AVR::R27,
+ AVR::R28, AVR::R29, AVR::R30, AVR::R31,
};
static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
@@ -79,39 +76,41 @@ static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
}
static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
- unsigned Register = GPRDecoderTable[RegNo+16];
+ unsigned Register = GPRDecoderTable[RegNo + 16];
Inst.addOperand(MCOperand::createReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus DecodePTRREGSRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+ uint64_t Address,
+ const void *Decoder) {
// Note: this function must be defined but does not seem to be called.
assert(false && "unimplemented: PTRREGS register class");
return MCDisassembler::Success;
}
-static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
-static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
@@ -119,40 +118,42 @@ static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
-static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder);
+static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder);
static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder);
#include "AVRGenDisassemblerTables.inc"
-static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFIOARr(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned addr = 0;
addr |= fieldFromInstruction(Insn, 0, 4);
addr |= fieldFromInstruction(Insn, 9, 2) << 4;
unsigned reg = fieldFromInstruction(Insn, 4, 5);
Inst.addOperand(MCOperand::createImm(addr));
- if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFIORdA(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned addr = 0;
addr |= fieldFromInstruction(Insn, 0, 4);
addr |= fieldFromInstruction(Insn, 9, 2) << 4;
unsigned reg = fieldFromInstruction(Insn, 4, 5);
- if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, reg, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(addr));
return MCDisassembler::Success;
}
-static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFIOBIT(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned addr = fieldFromInstruction(Insn, 3, 5);
unsigned b = fieldFromInstruction(Insn, 0, 3);
Inst.addOperand(MCOperand::createImm(addr));
@@ -168,16 +169,17 @@ static DecodeStatus decodeCallTarget(MCInst &Inst, unsigned Field,
return MCDisassembler::Success;
}
-static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFRd(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 5);
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFLPMX(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
if (decodeFRd(Inst, Insn, Address, Decoder) == MCDisassembler::Fail)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(AVR::R31R30));
@@ -188,9 +190,11 @@ static DecodeStatus decodeFFMULRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 3) + 16;
unsigned r = fieldFromInstruction(Insn, 0, 3) + 16;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
@@ -199,22 +203,26 @@ static DecodeStatus decodeFMOVWRdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned r = fieldFromInstruction(Insn, 4, 4) * 2;
unsigned d = fieldFromInstruction(Insn, 0, 4) * 2;
- if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, r, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
-static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus decodeFWRdK(MCInst &Inst, unsigned Insn, uint64_t Address,
+ const void *Decoder) {
unsigned d = fieldFromInstruction(Insn, 4, 2) * 2 + 24; // starts at r24:r25
unsigned k = 0;
k |= fieldFromInstruction(Insn, 0, 4);
k |= fieldFromInstruction(Insn, 6, 2) << 4;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, d, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(k));
return MCDisassembler::Success;
@@ -224,9 +232,11 @@ static DecodeStatus decodeFMUL2RdRr(MCInst &Inst, unsigned Insn,
uint64_t Address, const void *Decoder) {
unsigned rd = fieldFromInstruction(Insn, 4, 4) + 16;
unsigned rr = fieldFromInstruction(Insn, 0, 4) + 16;
- if (DecodeGPR8RegisterClass(Inst, rd, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, rd, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
- if (DecodeGPR8RegisterClass(Inst, rr, Address, Decoder) == MCDisassembler::Fail)
+ if (DecodeGPR8RegisterClass(Inst, rr, Address, Decoder) ==
+ MCDisassembler::Fail)
return MCDisassembler::Fail;
return MCDisassembler::Success;
}
@@ -253,7 +263,8 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
}
Size = 4;
- Insn = (Bytes[0] << 16) | (Bytes[1] << 24) | (Bytes[2] << 0) | (Bytes[3] << 8);
+ Insn =
+ (Bytes[0] << 16) | (Bytes[1] << 24) | (Bytes[2] << 0) | (Bytes[3] << 8);
return MCDisassembler::Success;
}
@@ -261,9 +272,12 @@ static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
static const uint8_t *getDecoderTable(uint64_t Size) {
switch (Size) {
- case 2: return DecoderTable16;
- case 4: return DecoderTable32;
- default: llvm_unreachable("instructions must be 16 or 32-bits");
+ case 2:
+ return DecoderTable16;
+ case 4:
+ return DecoderTable32;
+ default:
+ llvm_unreachable("instructions must be 16 or 32-bits");
}
}
@@ -279,11 +293,12 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
{
Result = readInstruction16(Bytes, Address, Size, Insn);
- if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
// Try to auto-decode a 16-bit instruction.
- Result = decodeInstruction(getDecoderTable(Size), Instr,
- Insn, Address, this, STI);
+ Result = decodeInstruction(getDecoderTable(Size), Instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail)
return Result;
@@ -293,10 +308,11 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
{
Result = readInstruction32(Bytes, Address, Size, Insn);
- if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+ if (Result == MCDisassembler::Fail)
+ return MCDisassembler::Fail;
- Result = decodeInstruction(getDecoderTable(Size), Instr, Insn,
- Address, this, STI);
+ Result = decodeInstruction(getDecoderTable(Size), Instr, Insn, Address,
+ this, STI);
if (Result != MCDisassembler::Fail) {
return Result;
@@ -308,4 +324,3 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address,
const void *Decoder);
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index 49840672bf9a..a3a4d63932c0 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -44,7 +44,7 @@ static void signed_width(unsigned Width, uint64_t Value,
int64_t Max = maxIntN(Width);
Diagnostic += " (expected an integer in the range " + std::to_string(Min) +
- " to " + std::to_string(Max) + ")";
+ " to " + std::to_string(Max) + ")";
if (Ctx) {
Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
@@ -62,8 +62,8 @@ static void unsigned_width(unsigned Width, uint64_t Value,
int64_t Max = maxUIntN(Width);
- Diagnostic += " (expected an integer in the range 0 to " +
- std::to_string(Max) + ")";
+ Diagnostic +=
+ " (expected an integer in the range 0 to " + std::to_string(Max) + ")";
if (Ctx) {
Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
@@ -233,15 +233,14 @@ static void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
ldi::fixup(Size, Fixup, Value, Ctx);
}
-} // end of ldi namespace
-} // end of adjust namespace
+} // namespace ldi
+} // namespace adjust
namespace llvm {
// Prepare value for the target space for it
void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
- const MCValue &Target,
- uint64_t &Value,
+ const MCValue &Target, uint64_t &Value,
MCContext *Ctx) const {
// The size of the fixup in bits.
uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize;
@@ -280,7 +279,8 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
break;
case AVR::fixup_hh8_ldi:
case AVR::fixup_hh8_ldi_pm:
- if (Kind == AVR::fixup_hh8_ldi_pm) adjust::pm(Value);
+ if (Kind == AVR::fixup_hh8_ldi_pm)
+ adjust::pm(Value);
adjust::ldi::hh8(Size, Fixup, Value, Ctx);
break;
@@ -290,21 +290,24 @@ void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup,
case AVR::fixup_lo8_ldi_neg:
case AVR::fixup_lo8_ldi_pm_neg:
- if (Kind == AVR::fixup_lo8_ldi_pm_neg) adjust::pm(Value);
+ if (Kind == AVR::fixup_lo8_ldi_pm_neg)
+ adjust::pm(Value);
adjust::ldi::neg(Value);
adjust::ldi::lo8(Size, Fixup, Value, Ctx);
break;
case AVR::fixup_hi8_ldi_neg:
case AVR::fixup_hi8_ldi_pm_neg:
- if (Kind == AVR::fixup_hi8_ldi_pm_neg) adjust::pm(Value);
+ if (Kind == AVR::fixup_hi8_ldi_pm_neg)
+ adjust::pm(Value);
adjust::ldi::neg(Value);
adjust::ldi::hi8(Size, Fixup, Value, Ctx);
break;
case AVR::fixup_hh8_ldi_neg:
case AVR::fixup_hh8_ldi_pm_neg:
- if (Kind == AVR::fixup_hh8_ldi_pm_neg) adjust::pm(Value);
+ if (Kind == AVR::fixup_hh8_ldi_pm_neg)
+ adjust::pm(Value);
adjust::ldi::neg(Value);
adjust::ldi::hh8(Size, Fixup, Value, Ctx);
@@ -455,7 +458,8 @@ MCFixupKindInfo const &AVRAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
return Infos[Kind - FirstTargetFixupKind];
}
-bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// If the count is not 2-byte aligned, we must be writing data into the text
// section (otherwise we have unaligned instructions, and thus have far
// bigger problems), so just write zeros instead.
@@ -468,8 +472,9 @@ bool AVRAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
const MCValue &Target) {
- switch ((unsigned) Fixup.getKind()) {
- default: return false;
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ return false;
// Fixups which should always be recorded as relocations.
case AVR::fixup_7_pcrel:
case AVR::fixup_13_pcrel:
@@ -485,4 +490,3 @@ MCAsmBackend *createAVRAsmBackend(const Target &T, const MCSubtargetInfo &STI,
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index 46dc914adf78..ea7fc30ab9d0 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -55,7 +55,8 @@ public:
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
@@ -67,4 +68,3 @@ private:
} // end namespace llvm
#endif // LLVM_AVR_ASM_BACKEND_H
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
index bedf68db08ca..b90e103794da 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
@@ -27,21 +27,18 @@ public:
virtual ~AVRELFObjectWriter() {}
- unsigned getRelocType(MCContext &Ctx,
- const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const override;
+ unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsPCRel) const override;
};
AVRELFObjectWriter::AVRELFObjectWriter(uint8_t OSABI)
: MCELFObjectTargetWriter(false, OSABI, ELF::EM_AVR, true) {}
-unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx,
- const MCValue &Target,
+unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
- switch ((unsigned) Fixup.getKind()) {
+ switch ((unsigned)Fixup.getKind()) {
case FK_Data_1:
switch (Modifier) {
default:
@@ -158,4 +155,3 @@ std::unique_ptr<MCObjectTargetWriter> createAVRELFObjectWriter(uint8_t OSABI) {
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index 6d126ed622aa..85933d6b9bb9 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -53,8 +53,7 @@ static unsigned getEFlagsForFeatureSet(const FeatureBitset &Features) {
return EFlags;
}
-AVRELFStreamer::AVRELFStreamer(MCStreamer &S,
- const MCSubtargetInfo &STI)
+AVRELFStreamer::AVRELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
: AVRTargetStreamer(S) {
MCAssembler &MCA = getStreamer().getAssembler();
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
index a0dd1dc8ac3e..1f7a926edb5c 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
@@ -141,7 +141,7 @@ namespace fixups {
template <typename T> inline void adjustBranchTarget(T &val) { val >>= 1; }
} // end of namespace fixups
-}
-} // end of namespace llvm::AVR
+} // namespace AVR
+} // namespace llvm
#endif // LLVM_AVR_FIXUP_KINDS_H
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 42fac5e2e000..d68e73ce0bb1 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -172,7 +172,8 @@ void AVRInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
void AVRInstPrinter::printMemri(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- assert(MI->getOperand(OpNo).isReg() && "Expected a register for the first operand");
+ assert(MI->getOperand(OpNo).isReg() &&
+ "Expected a register for the first operand");
const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
@@ -195,4 +196,3 @@ void AVRInstPrinter::printMemri(const MCInst *MI, unsigned OpNo,
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
index 8976ef28f3dc..11f55f6d253b 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h
@@ -56,4 +56,3 @@ private:
} // end namespace llvm
#endif // LLVM_AVR_INST_PRINTER_H
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index 50872d6d7a92..9754ff7f1146 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -75,7 +75,7 @@ AVRMCCodeEmitter::loadStorePostEncoder(const MCInst &MI, unsigned EncodedValue,
// check whether either of the registers are the X pointer register.
bool IsRegX = MI.getOperand(0).getReg() == AVR::R27R26 ||
- MI.getOperand(1).getReg() == AVR::R27R26;
+ MI.getOperand(1).getReg() == AVR::R27R26;
bool IsPredec = Opcode == AVR::LDRdPtrPd || Opcode == AVR::STPtrPdRr;
bool IsPostinc = Opcode == AVR::LDRdPtrPi || Opcode == AVR::STPtrPiRr;
@@ -96,8 +96,8 @@ AVRMCCodeEmitter::encodeRelCondBrTarget(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isExpr()) {
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- MCFixupKind(Fixup), MI.getLoc()));
+ Fixups.push_back(
+ MCFixup::create(0, MO.getExpr(), MCFixupKind(Fixup), MI.getLoc()));
return 0;
}
@@ -119,9 +119,12 @@ unsigned AVRMCCodeEmitter::encodeLDSTPtrReg(const MCInst &MI, unsigned OpNo,
assert(MO.isReg());
switch (MO.getReg()) {
- case AVR::R27R26: return 0x03; // X: 0b11
- case AVR::R29R28: return 0x02; // Y: 0b10
- case AVR::R31R30: return 0x00; // Z: 0b00
+ case AVR::R27R26:
+ return 0x03; // X: 0b11
+ case AVR::R29R28:
+ return 0x02; // Y: 0b10
+ case AVR::R31R30:
+ return 0x00; // Z: 0b00
default:
llvm_unreachable("invalid pointer register");
}
@@ -159,7 +162,7 @@ unsigned AVRMCCodeEmitter::encodeMemri(const MCInst &MI, unsigned OpNo,
} else if (OffsetOp.isExpr()) {
OffsetBits = 0;
Fixups.push_back(MCFixup::create(0, OffsetOp.getExpr(),
- MCFixupKind(AVR::fixup_6), MI.getLoc()));
+ MCFixupKind(AVR::fixup_6), MI.getLoc()));
} else {
llvm_unreachable("invalid value for offset");
}
@@ -193,7 +196,8 @@ unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo,
}
MCFixupKind FixupKind = static_cast<MCFixupKind>(Fixup);
- Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), FixupKind, MI.getLoc()));
+ Fixups.push_back(
+ MCFixup::create(Offset, MO.getExpr(), FixupKind, MI.getLoc()));
return 0;
}
@@ -251,8 +255,10 @@ unsigned AVRMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
- if (MO.isImm()) return static_cast<unsigned>(MO.getImm());
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
if (MO.isDFPImm())
return static_cast<unsigned>(bit_cast<double>(MO.getDFPImm()));
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 2e24d885c155..1bfa79f26b27 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -109,7 +109,6 @@ private:
MCContext &Ctx;
};
-} // end namespace of llvm.
+} // namespace llvm
#endif // LLVM_AVR_CODE_EMITTER_H
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
index 0743344bc1ed..5f2a5a82e41d 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp
@@ -13,8 +13,8 @@
#include "MCTargetDesc/AVRMCELFStreamer.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
#define DEBUG_TYPE "avrmcelfstreamer"
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
index a4f8787e5667..7e735ffa6cec 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -19,16 +19,15 @@ namespace llvm {
namespace {
const struct ModifierEntry {
- const char * const Spelling;
+ const char *const Spelling;
AVRMCExpr::VariantKind VariantKind;
} ModifierNames[] = {
{"lo8", AVRMCExpr::VK_AVR_LO8}, {"hi8", AVRMCExpr::VK_AVR_HI8},
{"hh8", AVRMCExpr::VK_AVR_HH8}, // synonym with hlo8
{"hlo8", AVRMCExpr::VK_AVR_HH8}, {"hhi8", AVRMCExpr::VK_AVR_HHI8},
- {"pm", AVRMCExpr::VK_AVR_PM},
- {"pm_lo8", AVRMCExpr::VK_AVR_PM_LO8}, {"pm_hi8", AVRMCExpr::VK_AVR_PM_HI8},
- {"pm_hh8", AVRMCExpr::VK_AVR_PM_HH8},
+ {"pm", AVRMCExpr::VK_AVR_PM}, {"pm_lo8", AVRMCExpr::VK_AVR_PM_LO8},
+ {"pm_hi8", AVRMCExpr::VK_AVR_PM_HI8}, {"pm_hh8", AVRMCExpr::VK_AVR_PM_HH8},
{"lo8_gs", AVRMCExpr::VK_AVR_LO8_GS}, {"hi8_gs", AVRMCExpr::VK_AVR_HI8_GS},
{"gs", AVRMCExpr::VK_AVR_GS},
@@ -81,7 +80,8 @@ bool AVRMCExpr::evaluateAsRelocatableImpl(MCValue &Result,
if (Value.isAbsolute()) {
Result = MCValue::get(evaluateAsInt64(Value.getConstant()));
} else {
- if (!Layout) return false;
+ if (!Layout)
+ return false;
MCContext &Context = Layout->getAssembler().getContext();
const MCSymbolRefExpr *Sym = Value.getSymA();
@@ -219,4 +219,3 @@ AVRMCExpr::VariantKind AVRMCExpr::getKindByName(StringRef Name) {
}
} // end of namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
index e35385ebd90a..68589763f29a 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
@@ -34,7 +34,7 @@ public:
VK_AVR_LO8_GS, ///< Corresponds to `lo8(gs())`.
VK_AVR_HI8_GS, ///< Corresponds to `hi8(gs())`.
- VK_AVR_GS, ///< Corresponds to `gs()`.
+ VK_AVR_GS, ///< Corresponds to `gs()`.
};
public:
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
index 95f4465924cc..cdfe4a21105d 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -10,21 +10,21 @@
//
//===----------------------------------------------------------------------===//
+#include "AVRMCTargetDesc.h"
#include "AVRELFStreamer.h"
#include "AVRInstPrinter.h"
#include "AVRMCAsmInfo.h"
#include "AVRMCELFStreamer.h"
-#include "AVRMCTargetDesc.h"
#include "AVRTargetStreamer.h"
#include "TargetInfo/AVRTargetInfo.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "AVRGenInstrInfo.inc"
@@ -108,7 +108,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetMC() {
createAVRMCInstPrinter);
// Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(getTheAVRTarget(), createAVRMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(getTheAVRTarget(),
+ createAVRMCCodeEmitter);
// Register the obj streamer
TargetRegistry::RegisterELFStreamer(getTheAVRTarget(), createMCStreamer);
@@ -124,4 +125,3 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetMC() {
// Register the asm backend (as little endian).
TargetRegistry::RegisterMCAsmBackend(getTheAVRTarget(), createAVRAsmBackend);
}
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
index eccd343d79ab..56e0e7810466 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp
@@ -21,23 +21,4 @@ AVRTargetStreamer::AVRTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
AVRTargetAsmStreamer::AVRTargetAsmStreamer(MCStreamer &S)
: AVRTargetStreamer(S) {}
-void AVRTargetStreamer::finish() {
- MCStreamer &OS = getStreamer();
- MCContext &Context = OS.getContext();
-
- MCSymbol *DoCopyData = Context.getOrCreateSymbol("__do_copy_data");
- MCSymbol *DoClearBss = Context.getOrCreateSymbol("__do_clear_bss");
-
- // FIXME: We can disable __do_copy_data if there are no static RAM variables.
-
- OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
- OS.emitRawComment("copy all variables from program memory to RAM on startup");
- OS.emitSymbolAttribute(DoCopyData, MCSA_Global);
-
- OS.emitRawComment(" Declaring this symbol tells the CRT that it should");
- OS.emitRawComment("clear the zeroed data section on startup");
- OS.emitSymbolAttribute(DoClearBss, MCSA_Global);
-}
-
} // end namespace llvm
-
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
index 5c4d1a22f6c6..b8b1454a2b8d 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h
@@ -18,8 +18,6 @@ class MCStreamer;
class AVRTargetStreamer : public MCTargetStreamer {
public:
explicit AVRTargetStreamer(MCStreamer &S);
-
- void finish() override;
};
/// A target streamer for textual AVR assembly code.
diff --git a/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
index 69b509b33e88..dd61add1526c 100644
--- a/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
+++ b/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -7,16 +7,15 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/AVRTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
namespace llvm {
Target &getTheAVRTarget() {
static Target TheAVRTarget;
return TheAVRTarget;
}
-}
+} // namespace llvm
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAVRTargetInfo() {
llvm::RegisterTarget<llvm::Triple::avr> X(llvm::getTheAVRTarget(), "avr",
"Atmel AVR Microcontroller", "AVR");
}
-
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 57488bc28f98..50298bf5e943 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -19,8 +19,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index a98a3e08d5de..89990f7e15c2 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -21,6 +21,7 @@ ModulePass *createBPFCheckAndAdjustIR();
FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
FunctionPass *createBPFPreserveDIType();
+FunctionPass *createBPFIRPeephole();
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
@@ -33,6 +34,7 @@ void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &);
void initializeBPFPreserveDITypePass(PassRegistry&);
+void initializeBPFIRPeepholePass(PassRegistry&);
void initializeBPFMISimplifyPatchablePass(PassRegistry&);
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPeepholeTruncElimPass(PassRegistry&);
@@ -57,6 +59,13 @@ public:
static bool isRequired() { return true; }
};
+class BPFIRPeepholePass : public PassInfoMixin<BPFIRPeepholePass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ static bool isRequired() { return true; }
+};
+
class BPFAdjustOptPass : public PassInfoMixin<BPFAdjustOptPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
diff --git a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
index 7088d55e1a71..69d0bca0bd77 100644
--- a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
+++ b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
@@ -15,6 +15,7 @@
#include "BPFTargetMachine.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsBPF.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
@@ -66,6 +67,7 @@ private:
Module *M;
SmallVector<PassThroughInfo, 16> PassThroughs;
+ bool adjustICmpToBuiltin();
void adjustBasicBlock(BasicBlock &BB);
bool serializeICMPCrossBB(BasicBlock &BB);
void adjustInst(Instruction &I);
@@ -85,14 +87,72 @@ ModulePass *llvm::createBPFAdjustOpt() { return new BPFAdjustOpt(); }
bool BPFAdjustOpt::runOnModule(Module &M) { return BPFAdjustOptImpl(&M).run(); }
bool BPFAdjustOptImpl::run() {
+ bool Changed = adjustICmpToBuiltin();
+
for (Function &F : *M)
for (auto &BB : F) {
adjustBasicBlock(BB);
for (auto &I : BB)
adjustInst(I);
}
+ return insertPassThrough() || Changed;
+}
+
+// Commit acabad9ff6bf ("[InstCombine] try to canonicalize icmp with
+// trunc op into mask and cmp") added a transformation to
+// convert "(conv)a < power_2_const" to "a & <const>" in certain
+// cases and bpf kernel verifier has to handle the resulted code
+// conservatively and this may reject otherwise legitimate program.
+// Here, we change related icmp code to a builtin which will
+// be restored to original icmp code later to prevent that
+// InstCombine transformatin.
+bool BPFAdjustOptImpl::adjustICmpToBuiltin() {
+ bool Changed = false;
+ ICmpInst *ToBeDeleted = nullptr;
+ for (Function &F : *M)
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ if (ToBeDeleted) {
+ ToBeDeleted->eraseFromParent();
+ ToBeDeleted = nullptr;
+ }
+
+ auto *Icmp = dyn_cast<ICmpInst>(&I);
+ if (!Icmp)
+ continue;
+
+ Value *Op0 = Icmp->getOperand(0);
+ if (!isa<TruncInst>(Op0))
+ continue;
+
+ auto ConstOp1 = dyn_cast<ConstantInt>(Icmp->getOperand(1));
+ if (!ConstOp1)
+ continue;
+
+ auto ConstOp1Val = ConstOp1->getValue().getZExtValue();
+ auto Op = Icmp->getPredicate();
+ if (Op == ICmpInst::ICMP_ULT || Op == ICmpInst::ICMP_UGE) {
+ if ((ConstOp1Val - 1) & ConstOp1Val)
+ continue;
+ } else if (Op == ICmpInst::ICMP_ULE || Op == ICmpInst::ICMP_UGT) {
+ if (ConstOp1Val & (ConstOp1Val + 1))
+ continue;
+ } else {
+ continue;
+ }
+
+ Constant *Opcode =
+ ConstantInt::get(Type::getInt32Ty(BB.getContext()), Op);
+ Function *Fn = Intrinsic::getDeclaration(
+ M, Intrinsic::bpf_compare, {Op0->getType(), ConstOp1->getType()});
+ auto *NewInst = CallInst::Create(Fn, {Opcode, Op0, ConstOp1});
+ BB.getInstList().insert(I.getIterator(), NewInst);
+ Icmp->replaceAllUsesWith(NewInst);
+ Changed = true;
+ ToBeDeleted = Icmp;
+ }
- return insertPassThrough();
+ return Changed;
}
bool BPFAdjustOptImpl::insertPassThrough() {
diff --git a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 37950e105bdc..d6145f53c170 100644
--- a/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -27,7 +27,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index 5239218ad003..cf1bc3f7c5bc 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -46,6 +46,7 @@ private:
void checkIR(Module &M);
bool adjustIR(Module &M);
bool removePassThroughBuiltin(Module &M);
+ bool removeCompareBuiltin(Module &M);
};
} // End anonymous namespace
@@ -120,8 +121,50 @@ bool BPFCheckAndAdjustIR::removePassThroughBuiltin(Module &M) {
return Changed;
}
+bool BPFCheckAndAdjustIR::removeCompareBuiltin(Module &M) {
+ // Remove __builtin_bpf_compare()'s which are used to prevent
+ // certain IR optimizations. Now major IR optimizations are done,
+ // remove them.
+ bool Changed = false;
+ CallInst *ToBeDeleted = nullptr;
+ for (Function &F : M)
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ if (ToBeDeleted) {
+ ToBeDeleted->eraseFromParent();
+ ToBeDeleted = nullptr;
+ }
+
+ auto *Call = dyn_cast<CallInst>(&I);
+ if (!Call)
+ continue;
+ auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+ if (!GV)
+ continue;
+ if (!GV->getName().startswith("llvm.bpf.compare"))
+ continue;
+
+ Changed = true;
+ Value *Arg0 = Call->getArgOperand(0);
+ Value *Arg1 = Call->getArgOperand(1);
+ Value *Arg2 = Call->getArgOperand(2);
+
+ auto OpVal = cast<ConstantInt>(Arg0)->getValue().getZExtValue();
+ CmpInst::Predicate Opcode = (CmpInst::Predicate)OpVal;
+
+ auto *ICmp = new ICmpInst(Opcode, Arg1, Arg2);
+ BB.getInstList().insert(Call->getIterator(), ICmp);
+
+ Call->replaceAllUsesWith(ICmp);
+ ToBeDeleted = Call;
+ }
+ return Changed;
+}
+
bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
- return removePassThroughBuiltin(M);
+ bool Changed = removePassThroughBuiltin(M);
+ Changed = removeCompareBuiltin(M) || Changed;
+ return Changed;
}
bool BPFCheckAndAdjustIR::runOnModule(Module &M) {
diff --git a/llvm/lib/Target/BPF/BPFIRPeephole.cpp b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
new file mode 100644
index 000000000000..d6a70012dca0
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
@@ -0,0 +1,118 @@
+//===------------ BPFIRPeephole.cpp - IR Peephole Transformation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// IR level peephole optimization, specifically removing @llvm.stacksave() and
+// @llvm.stackrestore().
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+
+#define DEBUG_TYPE "bpf-ir-peephole"
+
+using namespace llvm;
+
+namespace {
+
+static bool BPFIRPeepholeImpl(Function &F) {
+ LLVM_DEBUG(dbgs() << "******** BPF IR Peephole ********\n");
+
+ bool Changed = false;
+ Instruction *ToErase = nullptr;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ // The following code pattern is handled:
+ // %3 = call i8* @llvm.stacksave()
+ // store i8* %3, i8** %saved_stack, align 8
+ // ...
+ // %4 = load i8*, i8** %saved_stack, align 8
+ // call void @llvm.stackrestore(i8* %4)
+ // ...
+ // The goal is to remove the above four instructions,
+ // so we won't have instructions with r11 (stack pointer)
+ // if eventually there is no variable length stack allocation.
+ // InstrCombine also tries to remove the above instructions,
+ // if it is proven safe (constant alloca etc.), but depending
+ // on code pattern, it may still miss some.
+ //
+ // With unconditionally removing these instructions, if alloca is
+ // constant, we are okay then. Otherwise, SelectionDag will complain
+ // since BPF does not support dynamic allocation yet.
+ if (ToErase) {
+ ToErase->eraseFromParent();
+ ToErase = nullptr;
+ }
+
+ if (auto *Call = dyn_cast<CallInst>(&I)) {
+ if (auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand())) {
+ if (!GV->getName().equals("llvm.stacksave"))
+ continue;
+ if (!Call->hasOneUser())
+ continue;
+ auto *Inst = cast<Instruction>(*Call->user_begin());
+ LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
+ LLVM_DEBUG(dbgs() << "Remove:"; Inst->dump(); dbgs() << '\n');
+ Changed = true;
+ Inst->eraseFromParent();
+ ToErase = &I;
+ }
+ continue;
+ }
+
+ if (auto *LD = dyn_cast<LoadInst>(&I)) {
+ if (!LD->hasOneUser())
+ continue;
+ auto *Call = dyn_cast<CallInst>(*LD->user_begin());
+ if (!Call)
+ continue;
+ auto *GV = dyn_cast<GlobalValue>(Call->getCalledOperand());
+ if (!GV)
+ continue;
+ if (!GV->getName().equals("llvm.stackrestore"))
+ continue;
+ LLVM_DEBUG(dbgs() << "Remove:"; I.dump());
+ LLVM_DEBUG(dbgs() << "Remove:"; Call->dump(); dbgs() << '\n');
+ Changed = true;
+ Call->eraseFromParent();
+ ToErase = &I;
+ }
+ }
+ }
+
+ return Changed;
+}
+
+class BPFIRPeephole final : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+public:
+ static char ID;
+ BPFIRPeephole() : FunctionPass(ID) {}
+};
+} // End anonymous namespace
+
+char BPFIRPeephole::ID = 0;
+INITIALIZE_PASS(BPFIRPeephole, DEBUG_TYPE, "BPF IR Peephole", false, false)
+
+FunctionPass *llvm::createBPFIRPeephole() { return new BPFIRPeephole(); }
+
+bool BPFIRPeephole::runOnFunction(Function &F) { return BPFIRPeepholeImpl(F); }
+
+PreservedAnalyses BPFIRPeepholePass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ return BPFIRPeepholeImpl(F) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index c543dfcfca95..90723ac04f64 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -822,7 +822,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
} else {
int64_t imm32 = MI.getOperand(2).getImm();
- // sanity check before we build J*_ri instruction.
+ // Check before we build J*_ri instruction.
assert (isInt<32>(imm32));
BuildMI(BB, DL, TII.get(NewCC))
.addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
@@ -859,3 +859,25 @@ MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT VT) const {
return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
}
+
+bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM, Type *Ty,
+ unsigned AS,
+ Instruction *I) const {
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (!AM.HasBaseReg) // allow "r+i".
+ break;
+ return false; // disallow "r+r" or "r+r+i".
+ default:
+ return false;
+ }
+
+ return true;
+}
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index d5007425a7f8..dcc53019db75 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -130,6 +130,10 @@ private:
return false;
}
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
+
// isTruncateFree - Return true if it's free to truncate a value of
// type Ty1 to type Ty2. e.g. On BPF at alu32 mode, it's free to truncate
// a i64 value in register R1 to i32 by referencing its sub-register W1.
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
index 4e24e3d911b8..eb8c48ac49de 100644
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -164,7 +164,7 @@ bool BPFMIPreEmitChecking::processAtomicInsts(void) {
DebugLoc Empty;
const DebugLoc &DL = MI.getDebugLoc();
if (DL != Empty)
- report_fatal_error("line " + std::to_string(DL.getLine()) +
+ report_fatal_error(Twine("line ") + std::to_string(DL.getLine()) +
": Invalid usage of the XADD return value", false);
else
report_fatal_error("Invalid usage of the XADD return value", false);
diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
index ae1f5ea21c12..7e829ea43e89 100644
--- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
+++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp
@@ -97,15 +97,13 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
// Go through all uses of %1 as in %1 = ADD_rr %2, %3
const MachineOperand Op0 = Inst->getOperand(0);
- auto Begin = MRI->use_begin(Op0.getReg()), End = MRI->use_end();
- decltype(End) NextI;
- for (auto I = Begin; I != End; I = NextI) {
- NextI = std::next(I);
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_operands(Op0.getReg()))) {
// The candidate needs to have a unique definition.
- if (!MRI->getUniqueVRegDef(I->getReg()))
+ if (!MRI->getUniqueVRegDef(MO.getReg()))
continue;
- MachineInstr *DefInst = I->getParent();
+ MachineInstr *DefInst = MO.getParent();
unsigned Opcode = DefInst->getOpcode();
unsigned COREOp;
if (Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW ||
@@ -131,7 +129,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI,
Opcode == BPF::STD || Opcode == BPF::STB32 || Opcode == BPF::STH32 ||
Opcode == BPF::STW32) {
const MachineOperand &Opnd = DefInst->getOperand(0);
- if (Opnd.isReg() && Opnd.getReg() == I->getReg())
+ if (Opnd.isReg() && Opnd.getReg() == MO.getReg())
continue;
}
diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.td b/llvm/lib/Target/BPF/BPFRegisterInfo.td
index 88dec063be70..abeef5dc8aad 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.td
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.td
@@ -36,7 +36,7 @@ foreach I = 0-11 in {
}
// Register classes.
-def GPR32 : RegisterClass<"BPF", [i32], 32, (add
+def GPR32 : RegisterClass<"BPF", [i32], 64, (add
(sequence "W%u", 1, 9),
W0, // Return value
W11, // Stack Ptr
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index fac02e6476b7..77e3cd393f87 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -12,8 +12,8 @@
#include "BPFSubtarget.h"
#include "BPF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 5b0431293dc2..2fb76ab5c440 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -20,9 +20,9 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
@@ -43,6 +43,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
PassRegistry &PR = *PassRegistry::getPassRegistry();
initializeBPFAbstractMemberAccessLegacyPassPass(PR);
initializeBPFPreserveDITypePass(PR);
+ initializeBPFIRPeepholePass(PR);
initializeBPFAdjustOptPass(PR);
initializeBPFCheckAndAdjustIRPass(PR);
initializeBPFMIPeepholePass(PR);
@@ -107,6 +108,7 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
PM.add(createBPFAbstractMemberAccess(this));
PM.add(createBPFPreserveDIType());
+ PM.add(createBPFIRPeephole());
});
Builder.addExtension(
@@ -124,18 +126,19 @@ void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
- [=](ModulePassManager &MPM, PassBuilder::OptimizationLevel) {
+ [=](ModulePassManager &MPM, OptimizationLevel) {
FunctionPassManager FPM;
FPM.addPass(BPFAbstractMemberAccessPass(this));
FPM.addPass(BPFPreserveDITypePass());
+ FPM.addPass(BPFIRPeepholePass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
});
PB.registerPeepholeEPCallback([=](FunctionPassManager &FPM,
- PassBuilder::OptimizationLevel Level) {
+ OptimizationLevel Level) {
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().hoistCommonInsts(true)));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [=](ModulePassManager &MPM, PassBuilder::OptimizationLevel) {
+ [=](ModulePassManager &MPM, OptimizationLevel) {
MPM.addPass(BPFAdjustOptPass());
});
}
diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
index 3bc5556a62f4..6b86bf6e6cc1 100644
--- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
+++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -54,6 +54,23 @@ public:
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
I);
}
+
+ InstructionCost getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
+ TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
+ TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
+ TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
+ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ if (ISD == ISD::ADD && CostKind == TTI::TCK_RecipThroughput)
+ return SCEVCheapExpansionBudget.getValue() + 1;
+
+ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
+ Opd2Info, Opd1PropInfo,
+ Opd2PropInfo);
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def
index 66cf2c90ead4..0ae4194bc512 100644
--- a/llvm/lib/Target/BPF/BTF.def
+++ b/llvm/lib/Target/BPF/BTF.def
@@ -31,5 +31,7 @@ HANDLE_BTF_KIND(13, FUNC_PROTO)
HANDLE_BTF_KIND(14, VAR)
HANDLE_BTF_KIND(15, DATASEC)
HANDLE_BTF_KIND(16, FLOAT)
+HANDLE_BTF_KIND(17, DECL_TAG)
+HANDLE_BTF_KIND(18, TYPE_TAG)
#undef HANDLE_BTF_KIND
diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/lib/Target/BPF/BTF.h
index ad3dcc14c38a..e54b97cd49a9 100644
--- a/llvm/lib/Target/BPF/BTF.h
+++ b/llvm/lib/Target/BPF/BTF.h
@@ -113,7 +113,7 @@ struct CommonType {
/// "Size" tells the size of the type it is describing.
///
/// "Type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
- /// FUNC, FUNC_PROTO and VAR.
+ /// FUNC, FUNC_PROTO, VAR, DECL_TAG and TYPE_TAG.
/// "Type" is a type_id referring to another type.
union {
uint32_t Size;
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index c1f8ea99b959..0c510686a13b 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -43,7 +43,7 @@ void BTFTypeBase::emitType(MCStreamer &OS) {
BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag,
bool NeedsFixup)
- : DTy(DTy), NeedsFixup(NeedsFixup) {
+ : DTy(DTy), NeedsFixup(NeedsFixup), Name(DTy->getName()) {
switch (Tag) {
case dwarf::DW_TAG_pointer_type:
Kind = BTF::BTF_KIND_PTR;
@@ -66,14 +66,23 @@ BTFTypeDerived::BTFTypeDerived(const DIDerivedType *DTy, unsigned Tag,
BTFType.Info = Kind << 24;
}
+/// Used by DW_TAG_pointer_type only.
+BTFTypeDerived::BTFTypeDerived(unsigned NextTypeId, unsigned Tag,
+ StringRef Name)
+ : DTy(nullptr), NeedsFixup(false), Name(Name) {
+ Kind = BTF::BTF_KIND_PTR;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = NextTypeId;
+}
+
void BTFTypeDerived::completeType(BTFDebug &BDebug) {
if (IsCompleted)
return;
IsCompleted = true;
- BTFType.NameOff = BDebug.addString(DTy->getName());
+ BTFType.NameOff = BDebug.addString(Name);
- if (NeedsFixup)
+ if (NeedsFixup || !DTy)
return;
// The base type for PTR/CONST/VOLATILE could be void.
@@ -386,6 +395,55 @@ void BTFTypeFloat::completeType(BTFDebug &BDebug) {
BTFType.NameOff = BDebug.addString(Name);
}
+BTFTypeDeclTag::BTFTypeDeclTag(uint32_t BaseTypeId, int ComponentIdx,
+ StringRef Tag)
+ : Tag(Tag) {
+ Kind = BTF::BTF_KIND_DECL_TAG;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = BaseTypeId;
+ Info = ComponentIdx;
+}
+
+void BTFTypeDeclTag::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
+ BTFType.NameOff = BDebug.addString(Tag);
+}
+
+void BTFTypeDeclTag::emitType(MCStreamer &OS) {
+ BTFTypeBase::emitType(OS);
+ OS.emitInt32(Info);
+}
+
+BTFTypeTypeTag::BTFTypeTypeTag(uint32_t NextTypeId, StringRef Tag)
+ : DTy(nullptr), Tag(Tag) {
+ Kind = BTF::BTF_KIND_TYPE_TAG;
+ BTFType.Info = Kind << 24;
+ BTFType.Type = NextTypeId;
+}
+
+BTFTypeTypeTag::BTFTypeTypeTag(const DIDerivedType *DTy, StringRef Tag)
+ : DTy(DTy), Tag(Tag) {
+ Kind = BTF::BTF_KIND_TYPE_TAG;
+ BTFType.Info = Kind << 24;
+}
+
+void BTFTypeTypeTag::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+ BTFType.NameOff = BDebug.addString(Tag);
+ if (DTy) {
+ const DIType *ResolvedType = DTy->getBaseType();
+ if (!ResolvedType)
+ BTFType.Type = 0;
+ else
+ BTFType.Type = BDebug.getTypeId(ResolvedType);
+ }
+}
+
uint32_t BTFStringTable::addString(StringRef S) {
// Check whether the string already exists.
for (auto &OffsetM : OffsetToIdMap) {
@@ -475,6 +533,25 @@ void BTFDebug::visitSubroutineType(
}
}
+void BTFDebug::processDeclAnnotations(DINodeArray Annotations,
+ uint32_t BaseTypeId,
+ int ComponentIdx) {
+ if (!Annotations)
+ return;
+
+ for (const Metadata *Annotation : Annotations->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotation);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+ if (!Name->getString().equals("btf_decl_tag"))
+ continue;
+
+ const MDString *Value = cast<MDString>(MD->getOperand(1));
+ auto TypeEntry = std::make_unique<BTFTypeDeclTag>(BaseTypeId, ComponentIdx,
+ Value->getString());
+ addType(std::move(TypeEntry));
+ }
+}
+
/// Handle structure/union types.
void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
uint32_t &TypeId) {
@@ -498,9 +575,17 @@ void BTFDebug::visitStructType(const DICompositeType *CTy, bool IsStruct,
StructTypes.push_back(TypeEntry.get());
TypeId = addType(std::move(TypeEntry), CTy);
+ // Check struct/union annotations
+ processDeclAnnotations(CTy->getAnnotations(), TypeId, -1);
+
// Visit all struct members.
- for (const auto *Element : Elements)
- visitTypeEntry(cast<DIDerivedType>(Element));
+ int FieldNo = 0;
+ for (const auto *Element : Elements) {
+ const auto Elem = cast<DIDerivedType>(Element);
+ visitTypeEntry(Elem);
+ processDeclAnnotations(Elem->getAnnotations(), TypeId, FieldNo);
+ FieldNo++;
+ }
}
void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
@@ -609,11 +694,49 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
}
}
- if (Tag == dwarf::DW_TAG_pointer_type || Tag == dwarf::DW_TAG_typedef ||
- Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
- Tag == dwarf::DW_TAG_restrict_type) {
+ if (Tag == dwarf::DW_TAG_pointer_type) {
+ SmallVector<const MDString *, 4> MDStrs;
+ DINodeArray Annots = DTy->getAnnotations();
+ if (Annots) {
+ // For type with "int __tag1 __tag2 *p", the MDStrs will have
+ // content: [__tag1, __tag2].
+ for (const Metadata *Annotations : Annots->operands()) {
+ const MDNode *MD = cast<MDNode>(Annotations);
+ const MDString *Name = cast<MDString>(MD->getOperand(0));
+ if (!Name->getString().equals("btf_type_tag"))
+ continue;
+ MDStrs.push_back(cast<MDString>(MD->getOperand(1)));
+ }
+ }
+
+ if (MDStrs.size() > 0) {
+ // With MDStrs [__tag1, __tag2], the output type chain looks like
+ // PTR -> __tag2 -> __tag1 -> BaseType
+ // In the below, we construct BTF types with the order of __tag1, __tag2
+ // and PTR.
+ auto TypeEntry =
+ std::make_unique<BTFTypeTypeTag>(DTy, MDStrs[0]->getString());
+ unsigned TmpTypeId = addType(std::move(TypeEntry));
+ for (unsigned I = 1; I < MDStrs.size(); I++) {
+ const MDString *Value = MDStrs[I];
+ TypeEntry =
+ std::make_unique<BTFTypeTypeTag>(TmpTypeId, Value->getString());
+ TmpTypeId = addType(std::move(TypeEntry));
+ }
+ auto TypeDEntry =
+ std::make_unique<BTFTypeDerived>(TmpTypeId, Tag, DTy->getName());
+ TypeId = addType(std::move(TypeDEntry), DTy);
+ } else {
+ auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, false);
+ TypeId = addType(std::move(TypeEntry), DTy);
+ }
+ } else if (Tag == dwarf::DW_TAG_typedef || Tag == dwarf::DW_TAG_const_type ||
+ Tag == dwarf::DW_TAG_volatile_type ||
+ Tag == dwarf::DW_TAG_restrict_type) {
auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, false);
TypeId = addType(std::move(TypeEntry), DTy);
+ if (Tag == dwarf::DW_TAG_typedef)
+ processDeclAnnotations(DTy->getAnnotations(), TypeId, -1);
} else if (Tag != dwarf::DW_TAG_member) {
return;
}
@@ -783,7 +906,9 @@ void BTFDebug::emitBTFSection() {
return;
MCContext &Ctx = OS.getContext();
- OS.SwitchSection(Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0));
+ MCSectionELF *Sec = Ctx.getELFSection(".BTF", ELF::SHT_PROGBITS, 0);
+ Sec->setAlignment(Align(4));
+ OS.SwitchSection(Sec);
// Emit header.
emitCommonHeader();
@@ -821,7 +946,9 @@ void BTFDebug::emitBTFExtSection() {
return;
MCContext &Ctx = OS.getContext();
- OS.SwitchSection(Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0));
+ MCSectionELF *Sec = Ctx.getELFSection(".BTF.ext", ELF::SHT_PROGBITS, 0);
+ Sec->setAlignment(Align(4));
+ OS.SwitchSection(Sec);
// Emit header.
emitCommonHeader();
@@ -964,6 +1091,17 @@ void BTFDebug::beginFunctionImpl(const MachineFunction *MF) {
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
uint32_t FuncTypeId = addType(std::move(FuncTypeEntry));
+ // Process argument annotations.
+ for (const DINode *DN : SP->getRetainedNodes()) {
+ if (const auto *DV = dyn_cast<DILocalVariable>(DN)) {
+ uint32_t Arg = DV->getArg();
+ if (Arg)
+ processDeclAnnotations(DV->getAnnotations(), FuncTypeId, Arg - 1);
+ }
+ }
+
+ processDeclAnnotations(SP->getAnnotations(), FuncTypeId, -1);
+
for (const auto &TypeEntry : TypeEntries)
TypeEntry->completeType(*this);
@@ -1176,11 +1314,13 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
continue;
uint32_t GVTypeId = 0;
+ DIGlobalVariable *DIGlobal = nullptr;
for (auto *GVE : GVs) {
+ DIGlobal = GVE->getVariable();
if (SecName.startswith(".maps"))
- visitMapDefType(GVE->getVariable()->getType(), GVTypeId);
+ visitMapDefType(DIGlobal->getType(), GVTypeId);
else
- visitTypeEntry(GVE->getVariable()->getType(), GVTypeId, false, false);
+ visitTypeEntry(DIGlobal->getType(), GVTypeId, false, false);
break;
}
@@ -1212,6 +1352,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
std::make_unique<BTFKindVar>(Global.getName(), GVTypeId, GVarInfo);
uint32_t VarId = addType(std::move(VarEntry));
+ processDeclAnnotations(DIGlobal->getAnnotations(), VarId, -1);
+
// An empty SecName means an extern variable without section attribute.
if (SecName.empty())
continue;
@@ -1306,6 +1448,9 @@ void BTFDebug::processFuncPrototypes(const Function *F) {
auto FuncTypeEntry =
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
uint32_t FuncId = addType(std::move(FuncTypeEntry));
+
+ processDeclAnnotations(SP->getAnnotations(), FuncId, -1);
+
if (F->hasSection()) {
StringRef SecName = F->getSection();
diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h
index 2fdcf8500b7f..7c30675c553c 100644
--- a/llvm/lib/Target/BPF/BTFDebug.h
+++ b/llvm/lib/Target/BPF/BTFDebug.h
@@ -64,9 +64,11 @@ public:
class BTFTypeDerived : public BTFTypeBase {
const DIDerivedType *DTy;
bool NeedsFixup;
+ StringRef Name;
public:
BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup);
+ BTFTypeDerived(unsigned NextTypeId, unsigned Tag, StringRef Name);
void completeType(BTFDebug &BDebug) override;
void emitType(MCStreamer &OS) override;
void setPointeeType(uint32_t PointeeType);
@@ -204,6 +206,28 @@ public:
void completeType(BTFDebug &BDebug) override;
};
+/// Handle decl tags.
+class BTFTypeDeclTag : public BTFTypeBase {
+ uint32_t Info;
+ StringRef Tag;
+
+public:
+ BTFTypeDeclTag(uint32_t BaseTypeId, int ComponentId, StringRef Tag);
+ uint32_t getSize() override { return BTFTypeBase::getSize() + 4; }
+ void completeType(BTFDebug &BDebug) override;
+ void emitType(MCStreamer &OS) override;
+};
+
+class BTFTypeTypeTag : public BTFTypeBase {
+ const DIDerivedType *DTy;
+ StringRef Tag;
+
+public:
+ BTFTypeTypeTag(uint32_t NextTypeId, StringRef Tag);
+ BTFTypeTypeTag(const DIDerivedType *DTy, StringRef Tag);
+ void completeType(BTFDebug &BDebug) override;
+};
+
/// String table.
class BTFStringTable {
/// String table size in bytes.
@@ -313,6 +337,10 @@ class BTFDebug : public DebugHandlerBase {
/// Generate types for function prototypes.
void processFuncPrototypes(const Function *);
+ /// Generate types for decl annotations.
+ void processDeclAnnotations(DINodeArray Annotations, uint32_t BaseTypeId,
+ int ComponentId);
+
/// Generate one field relocation record.
void generatePatchImmReloc(const MCSymbol *ORSym, uint32_t RootId,
const GlobalVariable *, bool IsAma);
diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index 3a1492743bf4..3f643d47f934 100644
--- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cstdint>
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 6687dbe25364..bacd00360f82 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -43,12 +43,14 @@ public:
unsigned getNumFixupKinds() const override { return 1; }
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
} // end anonymous namespace
-bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool BPFAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if ((Count % 8) != 0)
return false;
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 8fb7d7e89f09..5a1e251cd29c 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "BPFGenInstrInfo.inc"
diff --git a/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index 49eb9ad62c56..d7cdcae916aa 100644
--- a/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/BPFTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index f2a381190fe7..ebc04b40d428 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -1,12 +1,12 @@
-//===-- CSKYAsmParser.cpp - Parse CSKY assembly to MCInst instructions --===//
+//===---- CSKYAsmParser.cpp - Parse CSKY assembly to MCInst instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/CSKYInstPrinter.h"
#include "MCTargetDesc/CSKYMCExpr.h"
#include "MCTargetDesc/CSKYMCTargetDesc.h"
#include "TargetInfo/CSKYTargetInfo.h"
@@ -20,10 +20,14 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "csky-asm-parser"
using namespace llvm;
@@ -32,6 +36,8 @@ struct CSKYOperand;
class CSKYAsmParser : public MCTargetAsmParser {
+ const MCRegisterInfo *MRI;
+
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
int64_t Lower, int64_t Upper, Twine Msg);
@@ -52,6 +58,9 @@ class CSKYAsmParser : public MCTargetAsmParser {
OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) override;
+ bool processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands,
+ MCStreamer &Out);
+
// Auto-generated instruction matching functions
#define GET_ASSEMBLER_HEADER
#include "CSKYGenAsmMatcher.inc"
@@ -61,12 +70,18 @@ class CSKYAsmParser : public MCTargetAsmParser {
OperandMatchResultTy parseBaseRegImm(OperandVector &Operands);
OperandMatchResultTy parseCSKYSymbol(OperandVector &Operands);
OperandMatchResultTy parseConstpoolSymbol(OperandVector &Operands);
+ OperandMatchResultTy parseDataSymbol(OperandVector &Operands);
+ OperandMatchResultTy parsePSRFlag(OperandVector &Operands);
+ OperandMatchResultTy parseRegSeq(OperandVector &Operands);
+ OperandMatchResultTy parseRegList(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
public:
enum CSKYMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresSameSrcAndDst,
+ Match_InvalidRegOutOfRange,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "CSKYGenAsmMatcher.inc"
#undef GET_OPERAND_DIAGNOSTIC_TYPES
@@ -81,10 +96,14 @@ public:
/// Instances of this class represent a parsed machine instruction.
struct CSKYOperand : public MCParsedAsmOperand {
+
enum KindTy {
Token,
Register,
Immediate,
+ RegisterSeq,
+ CPOP,
+ RegisterList
} Kind;
struct RegOp {
@@ -95,11 +114,34 @@ struct CSKYOperand : public MCParsedAsmOperand {
const MCExpr *Val;
};
+ struct ConstpoolOp {
+ const MCExpr *Val;
+ };
+
+ struct RegSeqOp {
+ unsigned RegNumFrom;
+ unsigned RegNumTo;
+ };
+
+ struct RegListOp {
+ unsigned List1From = 0;
+ unsigned List1To = 0;
+ unsigned List2From = 0;
+ unsigned List2To = 0;
+ unsigned List3From = 0;
+ unsigned List3To = 0;
+ unsigned List4From = 0;
+ unsigned List4To = 0;
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
+ ConstpoolOp CPool;
+ RegSeqOp RegSeq;
+ RegListOp RegList;
};
CSKYOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -113,18 +155,31 @@ public:
case Register:
Reg = o.Reg;
break;
+ case RegisterSeq:
+ RegSeq = o.RegSeq;
+ break;
+ case CPOP:
+ CPool = o.CPool;
+ break;
case Immediate:
Imm = o.Imm;
break;
case Token:
Tok = o.Tok;
break;
+ case RegisterList:
+ RegList = o.RegList;
+ break;
}
}
bool isToken() const override { return Kind == Token; }
bool isReg() const override { return Kind == Register; }
bool isImm() const override { return Kind == Immediate; }
+ bool isRegisterSeq() const { return Kind == RegisterSeq; }
+ bool isRegisterList() const { return Kind == RegisterList; }
+ bool isConstPoolOp() const { return Kind == CPOP; }
+
bool isMem() const override { return false; }
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm) {
@@ -163,29 +218,132 @@ public:
return IsConstantImm && isShiftedInt<num, shift>(Imm);
}
+ bool isUImm1() const { return isUImm<1>(); }
bool isUImm2() const { return isUImm<2>(); }
+ bool isUImm3() const { return isUImm<3>(); }
+ bool isUImm4() const { return isUImm<4>(); }
bool isUImm5() const { return isUImm<5>(); }
+ bool isUImm6() const { return isUImm<6>(); }
+ bool isUImm7() const { return isUImm<7>(); }
+ bool isUImm8() const { return isUImm<8>(); }
bool isUImm12() const { return isUImm<12>(); }
bool isUImm16() const { return isUImm<16>(); }
-
+ bool isUImm20() const { return isUImm<20>(); }
+ bool isUImm24() const { return isUImm<24>(); }
+
+ bool isOImm3() const { return isOImm<3>(); }
+ bool isOImm4() const { return isOImm<4>(); }
+ bool isOImm5() const { return isOImm<5>(); }
+ bool isOImm6() const { return isOImm<6>(); }
+ bool isOImm8() const { return isOImm<8>(); }
bool isOImm12() const { return isOImm<12>(); }
bool isOImm16() const { return isOImm<16>(); }
+ bool isSImm8() const { return isSImm<8>(); }
+
+ bool isUImm5Shift1() { return isUImm<5, 1>(); }
+ bool isUImm5Shift2() { return isUImm<5, 2>(); }
+ bool isUImm7Shift1() { return isUImm<7, 1>(); }
+ bool isUImm7Shift2() { return isUImm<7, 2>(); }
+ bool isUImm7Shift3() { return isUImm<7, 3>(); }
+ bool isUImm8Shift2() { return isUImm<8, 2>(); }
+ bool isUImm8Shift3() { return isUImm<8, 3>(); }
+ bool isUImm8Shift8() { return isUImm<8, 8>(); }
+ bool isUImm8Shift16() { return isUImm<8, 16>(); }
+ bool isUImm8Shift24() { return isUImm<8, 24>(); }
bool isUImm12Shift1() { return isUImm<12, 1>(); }
bool isUImm12Shift2() { return isUImm<12, 2>(); }
+ bool isUImm16Shift8() { return isUImm<16, 8>(); }
+ bool isUImm16Shift16() { return isUImm<16, 16>(); }
+ bool isUImm24Shift8() { return isUImm<24, 8>(); }
bool isSImm16Shift1() { return isSImm<16, 1>(); }
- bool isCSKYSymbol() const {
+ bool isCSKYSymbol() const { return isImm(); }
+
+ bool isConstpool() const { return isConstPoolOp(); }
+ bool isDataSymbol() const { return isConstPoolOp(); }
+
+ bool isSPOperand() const {
+ if (!isReg())
+ return false;
+ return getReg() == CSKY::R14;
+ }
+
+ bool isPSRFlag() const {
int64_t Imm;
- // Must be of 'immediate' type but not a constant.
- return isImm() && !evaluateConstantImm(getImm(), Imm);
+ // Must be of 'immediate' type and a constant.
+ if (!isImm() || !evaluateConstantImm(getImm(), Imm))
+ return false;
+
+ return isUInt<5>(Imm);
+ }
+
+ template <unsigned MIN, unsigned MAX> bool isRegSeqTemplate() const {
+ if (!isRegisterSeq())
+ return false;
+
+ std::pair<unsigned, unsigned> regSeq = getRegSeq();
+
+ return MIN <= regSeq.first && regSeq.first <= regSeq.second &&
+ regSeq.second <= MAX;
+ }
+
+ bool isRegSeq() const { return isRegSeqTemplate<CSKY::R0, CSKY::R31>(); }
+
+ static bool isLegalRegList(unsigned from, unsigned to) {
+ if (from == 0 && to == 0)
+ return true;
+
+ if (from == to) {
+ if (from != CSKY::R4 && from != CSKY::R15 && from != CSKY::R16 &&
+ from != CSKY::R28)
+ return false;
+
+ return true;
+ } else {
+ if (from != CSKY::R4 && from != CSKY::R16)
+ return false;
+
+ if (from == CSKY::R4 && to > CSKY::R4 && to < CSKY::R12)
+ return true;
+ else if (from == CSKY::R16 && to > CSKY::R16 && to < CSKY::R18)
+ return true;
+ else
+ return false;
+ }
+ }
+
+ bool isRegList() const {
+ if (!isRegisterList())
+ return false;
+
+ auto regList = getRegList();
+
+ if (!isLegalRegList(regList.List1From, regList.List1To))
+ return false;
+ if (!isLegalRegList(regList.List2From, regList.List2To))
+ return false;
+ if (!isLegalRegList(regList.List3From, regList.List3To))
+ return false;
+ if (!isLegalRegList(regList.List4From, regList.List4To))
+ return false;
+
+ return true;
}
- bool isConstpoolSymbol() const {
+ bool isExtImm6() {
+ if (!isImm())
+ return false;
+
int64_t Imm;
- // Must be of 'immediate' type but not a constant.
- return isImm() && !evaluateConstantImm(getImm(), Imm);
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm);
+ if (!IsConstantImm)
+ return false;
+
+ int uimm4 = Imm & 0xf;
+
+ return isShiftedUInt<6, 0>(Imm) && uimm4 >= 0 && uimm4 <= 14;
}
/// Gets location of the first token of this operand.
@@ -198,23 +356,64 @@ public:
return Reg.RegNum;
}
+ std::pair<unsigned, unsigned> getRegSeq() const {
+ assert(Kind == RegisterSeq && "Invalid type access!");
+ return std::pair<unsigned, unsigned>(RegSeq.RegNumFrom, RegSeq.RegNumTo);
+ }
+
+ RegListOp getRegList() const {
+ assert(Kind == RegisterList && "Invalid type access!");
+ return RegList;
+ }
+
const MCExpr *getImm() const {
assert(Kind == Immediate && "Invalid type access!");
return Imm.Val;
}
+ const MCExpr *getConstpoolOp() const {
+ assert(Kind == CPOP && "Invalid type access!");
+ return CPool.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid type access!");
return Tok;
}
void print(raw_ostream &OS) const override {
+ auto RegName = [](unsigned Reg) {
+ if (Reg)
+ return CSKYInstPrinter::getRegisterName(Reg);
+ else
+ return "noreg";
+ };
+
switch (Kind) {
+ case CPOP:
+ OS << *getConstpoolOp();
+ break;
case Immediate:
OS << *getImm();
break;
- case Register:
- OS << "<register x" << getReg() << ">";
+ case KindTy::Register:
+ OS << "<register " << RegName(getReg()) << ">";
+ break;
+ case RegisterSeq:
+ OS << "<register-seq ";
+ OS << RegName(getRegSeq().first) << "-" << RegName(getRegSeq().second)
+ << ">";
+ break;
+ case RegisterList:
+ OS << "<register-list ";
+ OS << RegName(getRegList().List1From) << "-"
+ << RegName(getRegList().List1To) << ",";
+ OS << RegName(getRegList().List2From) << "-"
+ << RegName(getRegList().List2To) << ",";
+ OS << RegName(getRegList().List3From) << "-"
+ << RegName(getRegList().List3To) << ",";
+ OS << RegName(getRegList().List4From) << "-"
+ << RegName(getRegList().List4To);
break;
case Token:
OS << "'" << getToken() << "'";
@@ -239,6 +438,51 @@ public:
return Op;
}
+ static std::unique_ptr<CSKYOperand> createRegSeq(unsigned RegNoFrom,
+ unsigned RegNoTo, SMLoc S) {
+ auto Op = std::make_unique<CSKYOperand>(RegisterSeq);
+ Op->RegSeq.RegNumFrom = RegNoFrom;
+ Op->RegSeq.RegNumTo = RegNoTo;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<CSKYOperand>
+ createRegList(SmallVector<unsigned, 4> reglist, SMLoc S) {
+ auto Op = std::make_unique<CSKYOperand>(RegisterList);
+ Op->RegList.List1From = 0;
+ Op->RegList.List1To = 0;
+ Op->RegList.List2From = 0;
+ Op->RegList.List2To = 0;
+ Op->RegList.List3From = 0;
+ Op->RegList.List3To = 0;
+ Op->RegList.List4From = 0;
+ Op->RegList.List4To = 0;
+
+ for (unsigned i = 0; i < reglist.size(); i += 2) {
+ if (Op->RegList.List1From == 0) {
+ Op->RegList.List1From = reglist[i];
+ Op->RegList.List1To = reglist[i + 1];
+ } else if (Op->RegList.List2From == 0) {
+ Op->RegList.List2From = reglist[i];
+ Op->RegList.List2To = reglist[i + 1];
+ } else if (Op->RegList.List3From == 0) {
+ Op->RegList.List3From = reglist[i];
+ Op->RegList.List3To = reglist[i + 1];
+ } else if (Op->RegList.List4From == 0) {
+ Op->RegList.List4From = reglist[i];
+ Op->RegList.List4To = reglist[i + 1];
+ } else {
+ assert(0);
+ }
+ }
+
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
static std::unique_ptr<CSKYOperand> createImm(const MCExpr *Val, SMLoc S,
SMLoc E) {
auto Op = std::make_unique<CSKYOperand>(Immediate);
@@ -248,6 +492,15 @@ public:
return Op;
}
+ static std::unique_ptr<CSKYOperand> createConstpoolOp(const MCExpr *Val,
+ SMLoc S, SMLoc E) {
+ auto Op = std::make_unique<CSKYOperand>(CPOP);
+ Op->CPool.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
+
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
assert(Expr && "Expr shouldn't be null!");
if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
@@ -266,6 +519,70 @@ public:
assert(N == 1 && "Invalid number of operands!");
addExpr(Inst, getImm());
}
+
+ void addConstpoolOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createExpr(getConstpoolOp()));
+ }
+
+ void addRegSeqOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ auto regSeq = getRegSeq();
+
+ Inst.addOperand(MCOperand::createReg(regSeq.first));
+ Inst.addOperand(MCOperand::createReg(regSeq.second));
+ }
+
+ static unsigned getListValue(unsigned ListFrom, unsigned ListTo) {
+ if (ListFrom == ListTo && ListFrom == CSKY::R15)
+ return (1 << 4);
+ else if (ListFrom == ListTo && ListFrom == CSKY::R28)
+ return (1 << 8);
+ else if (ListFrom == CSKY::R4)
+ return ListTo - ListFrom + 1;
+ else if (ListFrom == CSKY::R16)
+ return ((ListTo - ListFrom + 1) << 5);
+ else
+ return 0;
+ }
+
+ void addRegListOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ auto regList = getRegList();
+
+ unsigned V = 0;
+
+ unsigned T = getListValue(regList.List1From, regList.List1To);
+ if (T != 0)
+ V = V | T;
+
+ T = getListValue(regList.List2From, regList.List2To);
+ if (T != 0)
+ V = V | T;
+
+ T = getListValue(regList.List3From, regList.List3To);
+ if (T != 0)
+ V = V | T;
+
+ T = getListValue(regList.List4From, regList.List4To);
+ if (T != 0)
+ V = V | T;
+
+ Inst.addOperand(MCOperand::createImm(V));
+ }
+
+ bool isValidForTie(const CSKYOperand &Other) const {
+ if (Kind != Other.Kind)
+ return false;
+
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unexpected kind");
+ return false;
+ case Register:
+ return Reg.RegNum == Other.Reg.RegNum;
+ }
+ }
};
} // end anonymous namespace.
@@ -299,9 +616,7 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
default:
break;
case Match_Success:
- Inst.setLoc(IDLoc);
- Out.emitInstruction(Inst, getSTI());
- return false;
+ return processInstruction(Inst, IDLoc, Operands, Out);
case Match_MissingFeature: {
assert(MissingFeatures.any() && "Unknown missing features!");
ListSeparator LS;
@@ -347,26 +662,79 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (Result) {
default:
break;
+ case Match_InvalidSImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 7),
+ (1 << 7) - 1);
+ case Match_InvalidOImm3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 3));
+ case Match_InvalidOImm4:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 4));
+ case Match_InvalidOImm5:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 5));
+ case Match_InvalidOImm6:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 6));
+ case Match_InvalidOImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 8));
case Match_InvalidOImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 12));
case Match_InvalidOImm16:
return generateImmOutOfRangeError(Operands, ErrorInfo, 1, (1 << 16));
+ case Match_InvalidUImm1:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 1) - 1);
case Match_InvalidUImm2:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 2) - 1);
+ case Match_InvalidUImm3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 3) - 1);
+ case Match_InvalidUImm4:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
+ case Match_InvalidUImm6:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1);
+ case Match_InvalidUImm7:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 7) - 1);
+ case Match_InvalidUImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 8) - 1);
case Match_InvalidUImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 12) - 1);
+ case Match_InvalidUImm16:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 16) - 1);
+ case Match_InvalidUImm5Shift1:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 5) - 2,
+ "immediate must be a multiple of 2 bytes in the range");
case Match_InvalidUImm12Shift1:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 12) - 2,
"immediate must be a multiple of 2 bytes in the range");
+ case Match_InvalidUImm5Shift2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 5) - 4,
+ "immediate must be a multiple of 4 bytes in the range");
+ case Match_InvalidUImm7Shift1:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 7) - 2,
+ "immediate must be a multiple of 2 bytes in the range");
+ case Match_InvalidUImm7Shift2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 7) - 4,
+ "immediate must be a multiple of 4 bytes in the range");
+ case Match_InvalidUImm8Shift2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 8) - 4,
+ "immediate must be a multiple of 4 bytes in the range");
+ case Match_InvalidUImm8Shift3:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 8) - 8,
+ "immediate must be a multiple of 8 bytes in the range");
+ case Match_InvalidUImm8Shift8:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, 0, (1 << 8) - 256,
+ "immediate must be a multiple of 256 bytes in the range");
case Match_InvalidUImm12Shift2:
return generateImmOutOfRangeError(
Operands, ErrorInfo, 0, (1 << 12) - 4,
"immediate must be a multiple of 4 bytes in the range");
- case Match_InvalidUImm16:
- return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 16) - 1);
case Match_InvalidCSKYSymbol: {
SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a symbol name");
@@ -375,15 +743,68 @@ bool CSKYAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a constpool symbol name");
}
+ case Match_InvalidPSRFlag: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "psrset operand is not valid");
+ }
+ case Match_InvalidRegSeq: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "Register sequence is not valid");
}
-
+ case Match_InvalidRegOutOfRange: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "register is out of range");
+ }
+ case Match_InvalidSPOperand: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand must be sp register");
+ }
+ case Match_RequiresSameSrcAndDst: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "src and dst operand must be same");
+ }
+ case Match_InvalidRegList: {
+ SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "invalid register list");
+ }
+ }
+ LLVM_DEBUG(dbgs() << "Result = " << Result);
llvm_unreachable("Unknown match type detected!");
}
+bool CSKYAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
+ OperandVector &Operands,
+ MCStreamer &Out) {
+
+ if (Inst.getOpcode() == CSKY::LDQ32 || Inst.getOpcode() == CSKY::STQ32) {
+ if (Inst.getOperand(1).getReg() != CSKY::R4 ||
+ Inst.getOperand(2).getReg() != CSKY::R7) {
+ return Error(IDLoc, "Register sequence is not valid. 'r4-r7' expected");
+ }
+ Inst.setOpcode(Inst.getOpcode() == CSKY::LDQ32 ? CSKY::LDM32 : CSKY::STM32);
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+ } else if (Inst.getOpcode() == CSKY::SEXT32 ||
+ Inst.getOpcode() == CSKY::ZEXT32) {
+ if (Inst.getOperand(2).getImm() < Inst.getOperand(3).getImm())
+ return Error(IDLoc, "msb must be greater or equal to lsb");
+ } else if (Inst.getOpcode() == CSKY::INS32) {
+ if (Inst.getOperand(3).getImm() < Inst.getOperand(4).getImm())
+ return Error(IDLoc, "msb must be greater or equal to lsb");
+ } else if (Inst.getOpcode() == CSKY::IDLY32) {
+ if (Inst.getOperand(0).getImm() > 32 || Inst.getOperand(0).getImm() < 0)
+ return Error(IDLoc, "n must be in range [0,32]");
+ }
+
+ Out.emitInstruction(Inst, getSTI());
+ return false;
+}
+
// Attempts to match Name as a register (either using the default name or
// alternative ABI names), setting RegNo to the matching register. Upon
// failure, returns true and sets RegNo to 0.
-static bool matchRegisterNameHelper(MCRegister &RegNo, StringRef Name) {
+static bool matchRegisterNameHelper(const MCSubtargetInfo &STI,
+ MCRegister &RegNo, StringRef Name) {
RegNo = MatchRegisterName(Name);
if (RegNo == CSKY::NoRegister)
@@ -399,12 +820,12 @@ bool CSKYAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
EndLoc = Tok.getEndLoc();
StringRef Name = getLexer().getTok().getIdentifier();
- if (!matchRegisterNameHelper((MCRegister &)RegNo, Name)) {
+ if (!matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name)) {
getParser().Lex(); // Eat identifier token.
return false;
}
- return Error(StartLoc, "invalid register name");
+ return MatchOperand_NoMatch;
}
OperandMatchResultTy CSKYAsmParser::parseRegister(OperandVector &Operands) {
@@ -418,7 +839,7 @@ OperandMatchResultTy CSKYAsmParser::parseRegister(OperandVector &Operands) {
StringRef Name = getLexer().getTok().getIdentifier();
MCRegister RegNo;
- if (matchRegisterNameHelper((MCRegister &)RegNo, Name))
+ if (matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name))
return MatchOperand_NoMatch;
getLexer().Lex();
@@ -439,7 +860,13 @@ OperandMatchResultTy CSKYAsmParser::parseBaseRegImm(OperandVector &Operands) {
if (parseRegister(Operands) != MatchOperand_Success) {
getLexer().UnLex(Tok);
Operands.pop_back();
- return MatchOperand_ParseFail;
+ return MatchOperand_NoMatch;
+ }
+
+ if (getLexer().is(AsmToken::RParen)) {
+ Operands.push_back(CSKYOperand::createToken(")", getLoc()));
+ getParser().Lex(); // Eat ')'
+ return MatchOperand_Success;
}
if (getLexer().isNot(AsmToken::Comma)) {
@@ -495,8 +922,10 @@ OperandMatchResultTy CSKYAsmParser::parseImmediate(OperandVector &Operands) {
const MCExpr *IdVal;
SMLoc S = getLoc();
- if (getParser().parseExpression(IdVal))
+ if (getParser().parseExpression(IdVal)) {
+ Error(getLoc(), "unknown expression");
return MatchOperand_ParseFail;
+ }
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
Operands.push_back(CSKYOperand::createImm(IdVal, S, E));
@@ -517,17 +946,26 @@ bool CSKYAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
// Attempt to parse token as register
- if (parseRegister(Operands) == MatchOperand_Success)
+ auto Res = parseRegister(Operands);
+ if (Res == MatchOperand_Success)
return false;
+ else if (Res == MatchOperand_ParseFail)
+ return true;
// Attempt to parse token as (register, imm)
- if (getLexer().is(AsmToken::LParen))
- if (parseBaseRegImm(Operands) == MatchOperand_Success)
+ if (getLexer().is(AsmToken::LParen)) {
+ Res = parseBaseRegImm(Operands);
+ if (Res == MatchOperand_Success)
return false;
+ else if (Res == MatchOperand_ParseFail)
+ return true;
+ }
- // Attempt to parse token as a imm.
- if (parseImmediate(Operands) == MatchOperand_Success)
+ Res = parseImmediate(Operands);
+ if (Res == MatchOperand_Success)
return false;
+ else if (Res == MatchOperand_ParseFail)
+ return true;
// Finally we have exhausted all options and must declare defeat.
Error(getLoc(), "unknown operand");
@@ -537,16 +975,20 @@ bool CSKYAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
return MatchOperand_NoMatch;
StringRef Identifier;
- if (getParser().parseIdentifier(Identifier))
+ AsmToken Tok = getLexer().getTok();
+
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier");
return MatchOperand_ParseFail;
+ }
CSKYMCExpr::VariantKind Kind = CSKYMCExpr::VK_CSKY_None;
-
if (Identifier.consume_back("@GOT"))
Kind = CSKYMCExpr::VK_CSKY_GOT;
else if (Identifier.consume_back("@GOTOFF"))
@@ -555,44 +997,377 @@ OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
Kind = CSKYMCExpr::VK_CSKY_PLT;
else if (Identifier.consume_back("@GOTPC"))
Kind = CSKYMCExpr::VK_CSKY_GOTPC;
+ else if (Identifier.consume_back("@TLSGD32"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSGD;
+ else if (Identifier.consume_back("@GOTTPOFF"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSIE;
+ else if (Identifier.consume_back("@TPOFF"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSLE;
+ else if (Identifier.consume_back("@TLSLDM32"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSLDM;
+ else if (Identifier.consume_back("@TLSLDO32"))
+ Kind = CSKYMCExpr::VK_CSKY_TLSLDO;
+
+ MCSymbol *Sym = getContext().getInlineAsmLabel(Identifier);
+
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(Identifier);
+
+ if (Sym->isVariable()) {
+ const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+ if (!isa<MCSymbolRefExpr>(V)) {
+ getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ }
+ Res = V;
+ } else
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
- const MCExpr *Res =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ if (Kind != CSKYMCExpr::VK_CSKY_None)
+ Res = CSKYMCExpr::create(Res, Kind, getContext());
- if (Kind != CSKYMCExpr::VK_CSKY_None)
- Res = CSKYMCExpr::create(Res, Kind, getContext());
+ Operands.push_back(CSKYOperand::createImm(Res, S, E));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+ getLexer().Lex(); // eat + or -
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(CSKYOperand::createImm(Res, S, E));
return MatchOperand_Success;
}
+OperandMatchResultTy CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
+
+ if (getLexer().getKind() != AsmToken::LBrac)
+ return MatchOperand_NoMatch;
+
+ getLexer().Lex(); // Eat '['.
+
+ if (getLexer().getKind() != AsmToken::Identifier) {
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ]");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // Eat ']'.
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Expr, S, E));
+ return MatchOperand_Success;
+ }
+
+ AsmToken Tok = getLexer().getTok();
+ StringRef Identifier;
+
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier " + Identifier);
+ return MatchOperand_ParseFail;
+ }
+
+ CSKYMCExpr::VariantKind Kind = CSKYMCExpr::VK_CSKY_None;
+ if (Identifier.consume_back("@GOT"))
+ Kind = CSKYMCExpr::VK_CSKY_GOT_IMM18_BY4;
+ else if (Identifier.consume_back("@PLT"))
+ Kind = CSKYMCExpr::VK_CSKY_PLT_IMM18_BY4;
+
+ MCSymbol *Sym = getContext().getInlineAsmLabel(Identifier);
+
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(Identifier);
+
+ if (Sym->isVariable()) {
+ const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+ if (!isa<MCSymbolRefExpr>(V)) {
+ getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ }
+ Res = V;
+ } else {
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ }
+
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ case AsmToken::RBrac:
+
+ getLexer().Lex(); // Eat ']'.
+
+ if (Kind != CSKYMCExpr::VK_CSKY_None)
+ Res = CSKYMCExpr::create(Res, Kind, getContext());
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+
+ getLexer().Lex(); // eat + or -
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ']'");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // Eat ']'.
+
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+}
+
OperandMatchResultTy
CSKYAsmParser::parseConstpoolSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+ const MCExpr *Res;
if (getLexer().getKind() != AsmToken::LBrac)
return MatchOperand_NoMatch;
getLexer().Lex(); // Eat '['.
- if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ if (getLexer().getKind() != AsmToken::Identifier) {
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ']'");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // Eat ']'.
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Expr, S, E));
+ return MatchOperand_Success;
+ }
+ AsmToken Tok = getLexer().getTok();
StringRef Identifier;
- if (getParser().parseIdentifier(Identifier))
+
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier");
return MatchOperand_ParseFail;
+ }
- if (getLexer().getKind() != AsmToken::RBrac)
- return MatchOperand_NoMatch;
+ MCSymbol *Sym = getContext().getInlineAsmLabel(Identifier);
+
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(Identifier);
+
+ if (Sym->isVariable()) {
+ const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
+ if (!isa<MCSymbolRefExpr>(V)) {
+ getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ }
+ Res = V;
+ } else {
+ Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+ }
+
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ Error(getLoc(), "unknown symbol");
+ return MatchOperand_ParseFail;
+ case AsmToken::RBrac:
+
+ getLexer().Lex(); // Eat ']'.
+
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+
+ getLexer().Lex(); // eat + or -
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(getLoc(), "unknown expression");
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().getKind() != AsmToken::RBrac) {
+ Error(getLoc(), "expected ']'");
+ return MatchOperand_ParseFail;
+ }
getLexer().Lex(); // Eat ']'.
- MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
- const MCExpr *Res =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- Operands.push_back(CSKYOperand::createImm(Res, S, E));
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
+ Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy CSKYAsmParser::parsePSRFlag(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
+
+ unsigned Flag = 0;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement)) {
+ StringRef Identifier;
+ if (getParser().parseIdentifier(Identifier)) {
+ Error(getLoc(), "unknown identifier " + Identifier);
+ return MatchOperand_ParseFail;
+ }
+
+ if (Identifier == "sie")
+ Flag = (1 << 4) | Flag;
+ else if (Identifier == "ee")
+ Flag = (1 << 3) | Flag;
+ else if (Identifier == "ie")
+ Flag = (1 << 2) | Flag;
+ else if (Identifier == "fe")
+ Flag = (1 << 1) | Flag;
+ else if (Identifier == "af")
+ Flag = (1 << 0) | Flag;
+ else {
+ Error(getLoc(), "expected " + Identifier);
+ return MatchOperand_ParseFail;
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ getLexer().Lex(); // eat ','
+ } else {
+ Error(getLoc(), "expected ,");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ Operands.push_back(
+ CSKYOperand::createImm(MCConstantExpr::create(Flag, getContext()), S, E));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy CSKYAsmParser::parseRegSeq(OperandVector &Operands) {
+ SMLoc S = getLoc();
+
+ if (parseRegister(Operands) != MatchOperand_Success)
+ return MatchOperand_NoMatch;
+
+ auto Ry = Operands.back()->getReg();
+ Operands.pop_back();
+
+ if (getLexer().isNot(AsmToken::Minus)) {
+ Error(getLoc(), "expected '-'");
+ return MatchOperand_ParseFail;
+ }
+
+ getLexer().Lex(); // eat '-'
+
+ if (parseRegister(Operands) != MatchOperand_Success) {
+ Error(getLoc(), "invalid register");
+ return MatchOperand_ParseFail;
+ }
+
+ auto Rz = Operands.back()->getReg();
+ Operands.pop_back();
+
+ Operands.push_back(CSKYOperand::createRegSeq(Ry, Rz, S));
+ return MatchOperand_Success;
+}
+
+OperandMatchResultTy CSKYAsmParser::parseRegList(OperandVector &Operands) {
+ SMLoc S = getLoc();
+
+ SmallVector<unsigned, 4> reglist;
+
+ while (true) {
+
+ if (parseRegister(Operands) != MatchOperand_Success) {
+ Error(getLoc(), "invalid register");
+ return MatchOperand_ParseFail;
+ }
+
+ auto Ry = Operands.back()->getReg();
+ Operands.pop_back();
+
+ if (getLexer().is(AsmToken::Minus)) {
+ getLexer().Lex(); // eat '-'
+
+ if (parseRegister(Operands) != MatchOperand_Success) {
+ Error(getLoc(), "invalid register");
+ return MatchOperand_ParseFail;
+ }
+
+ auto Rz = Operands.back()->getReg();
+ Operands.pop_back();
+
+ reglist.push_back(Ry);
+ reglist.push_back(Rz);
+
+ if (getLexer().is(AsmToken::Comma))
+ getLexer().Lex(); // eat ','
+ else if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ } else if (getLexer().is(AsmToken::Comma)) {
+ reglist.push_back(Ry);
+ reglist.push_back(Ry);
+
+ getLexer().Lex(); // eat ','
+ } else if (getLexer().is(AsmToken::EndOfStatement)) {
+ reglist.push_back(Ry);
+ reglist.push_back(Ry);
+ break;
+ } else {
+ Error(getLoc(), "invalid register list");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ Operands.push_back(CSKYOperand::createRegList(reglist, S));
return MatchOperand_Success;
}
@@ -638,7 +1413,7 @@ OperandMatchResultTy CSKYAsmParser::tryParseRegister(unsigned &RegNo,
StringRef Name = getLexer().getTok().getIdentifier();
- if (matchRegisterNameHelper((MCRegister &)RegNo, Name))
+ if (matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name))
return MatchOperand_NoMatch;
getParser().Lex(); // Eat identifier token.
diff --git a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h b/llvm/lib/Target/CSKY/CSKY.h
index 7ed254b3ee04..357b1e96e606 100644
--- a/llvm/include/llvm/ExecutionEngine/OrcV1Deprecation.h
+++ b/llvm/lib/Target/CSKY/CSKY.h
@@ -1,4 +1,4 @@
-//===------ OrcV1Deprecation.h - Memory manager for MC-JIT ------*- C++ -*-===//
+//===-- CSKY.h - Top-level interface for CSKY--------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,17 +6,22 @@
//
//===----------------------------------------------------------------------===//
//
-// Tag for suppressing ORCv1 deprecation warnings.
+// This file contains the entry points for global functions defined in the LLVM
+// CSKY back-end.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
-#define LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+#ifndef LLVM_LIB_TARGET_CSKY_CSKY_H
+#define LLVM_LIB_TARGET_CSKY_CSKY_H
+
+#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class CSKYTargetMachine;
+class FunctionPass;
-enum ORCv1DeprecationAcknowledgement { AcknowledgeORCv1Deprecation };
+FunctionPass *createCSKYISelDag(CSKYTargetMachine &TM);
} // namespace llvm
-#endif // LLVM_EXECUTIONENGINE_ORCV1DEPRECATION_H
+#endif // LLVM_LIB_TARGET_CSKY_CSKY_H
diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td
index 854a8b5f22a2..e26781ca6aa1 100644
--- a/llvm/lib/Target/CSKY/CSKY.td
+++ b/llvm/lib/Target/CSKY/CSKY.td
@@ -9,10 +9,97 @@
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
+// CSKY subtarget features and instruction predicates.
+//===----------------------------------------------------------------------===//
+
+def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true",
+ "Use the 16-bit btsti instruction">;
+def HasBTST16 : Predicate<"Subtarget->hasBTST16()">,
+ AssemblerPredicate<(all_of FeatureBTST16),
+ "Use the 16-bit btsti instruction">;
+
+// Atomic Support
+def FeatureExtendLrw : SubtargetFeature<"elrw", "HasExtendLrw", "true",
+ "Use the extend LRW instruction">;
+def HasExtendLrw : Predicate<"Subtarget->hasExtendLrw()">,
+ AssemblerPredicate<(all_of FeatureExtendLrw),
+ "Use the extend LRW instruction">;
+
+def FeatureJAVA
+ : SubtargetFeature<"java", "HasJAVA", "true", "Enable java instructions">;
+def HasJAVA : Predicate<"Subtarget->hasJAVA()">,
+ AssemblerPredicate<(all_of FeatureJAVA),
+ "Enable java instructions">;
+
+def FeatureDoloop : SubtargetFeature<"doloop", "HasDoloop", "true",
+ "Enable doloop instructions">;
+def HasDoloop : Predicate<"Subtarget->hasDoloop()">,
+ AssemblerPredicate<(all_of FeatureDoloop),
+ "Enable doloop instructions">;
+
+def HasE1
+ : SubtargetFeature<"e1", "HasE1", "true", "Support CSKY e1 instructions",
+ [FeatureExtendLrw]>;
+def iHasE1 : Predicate<"Subtarget->hasE1()">,
+ AssemblerPredicate<(all_of HasE1),
+ "Support CSKY e1 instructions">;
+
+def HasE2
+ : SubtargetFeature<"e2", "HasE2", "true", "Support CSKY e2 instructions",
+ [HasE1]>;
+def iHasE2 : Predicate<"Subtarget->hasE2()">,
+ AssemblerPredicate<(all_of HasE2),
+ "Support CSKY e2 instructions">;
+
+def Has2E3 : SubtargetFeature<"2e3", "Has2E3", "true",
+ "Support CSKY 2e3 instructions", [HasE2]>;
+def iHas2E3 : Predicate<"Subtarget->has2E3()">,
+ AssemblerPredicate<(all_of Has2E3),
+ "Support CSKY 2e3 instructions">;
+
+def Has3E3r1 : SubtargetFeature<"3e3r1", "Has3E3r1", "true",
+ "Support CSKY 3e3r1 instructions">;
+def iHas3E3r1 : Predicate<"Subtarget->has3E3r1()">,
+ AssemblerPredicate<(all_of Has3E3r1),
+ "Support CSKY 3e3r1 instructions">;
+
+def Has3r2E3r3
+ : SubtargetFeature<"3e3r3", "Has3r2E3r3", "true",
+ "Support CSKY 3e3r3 instructions", [FeatureDoloop]>;
+def iHas3r2E3r3 : Predicate<"Subtarget->has3r2E3r3()">,
+ AssemblerPredicate<(all_of Has3r2E3r3),
+ "Support CSKY 3e3r3 instructions">;
+
+def Has3E7 : SubtargetFeature<"3e7", "Has3E7", "true",
+ "Support CSKY 3e7 instructions", [Has2E3]>;
+def iHas3E7 : Predicate<"Subtarget->has3E7()">,
+ AssemblerPredicate<(all_of Has3E7),
+ "Support CSKY 3e7 instructions">;
+
+def HasMP1E2 : SubtargetFeature<"mp1e2", "HasMP1E2", "true",
+ "Support CSKY mp1e2 instructions", [Has3E7]>;
+def iHasMP1E2 : Predicate<"Subtarget->hasMP1E2()">,
+ AssemblerPredicate<(all_of HasMP1E2),
+ "Support CSKY mp1e2 instructions">;
+
+def Has7E10 : SubtargetFeature<"7e10", "Has7E10", "true",
+ "Support CSKY 7e10 instructions", [Has3E7]>;
+def iHas7E10 : Predicate<"Subtarget->has7E10()">,
+ AssemblerPredicate<(all_of Has7E10),
+ "Support CSKY 7e10 instructions">;
+
+def Has10E60 : SubtargetFeature<"10e60", "Has10E60", "true",
+ "Support CSKY 10e60 instructions", [Has7E10]>;
+def iHas10E60 : Predicate<"Subtarget->has10E60()">,
+ AssemblerPredicate<(all_of Has10E60),
+ "Support CSKY 10e60 instructions">;
+
+//===----------------------------------------------------------------------===//
// Registers, calling conventions, instruction descriptions.
//===----------------------------------------------------------------------===//
include "CSKYRegisterInfo.td"
+include "CSKYCallingConv.td"
include "CSKYInstrInfo.td"
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
new file mode 100644
index 000000000000..1c38c5d1fde6
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -0,0 +1,58 @@
+//===-- CSKYAsmPrinter.cpp - CSKY LLVM assembly writer --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the CSKY assembly language.
+//
+//===----------------------------------------------------------------------===//
+#include "CSKYAsmPrinter.h"
+#include "CSKY.h"
+#include "CSKYTargetMachine.h"
+#include "MCTargetDesc/CSKYInstPrinter.h"
+#include "MCTargetDesc/CSKYMCExpr.h"
+#include "TargetInfo/CSKYTargetInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-asm-printer"
+
+CSKYAsmPrinter::CSKYAsmPrinter(llvm::TargetMachine &TM,
+ std::unique_ptr<llvm::MCStreamer> Streamer)
+ : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this) {}
+
+bool CSKYAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &MF.getSubtarget<CSKYSubtarget>();
+ return AsmPrinter::runOnMachineFunction(MF);
+}
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "CSKYGenMCPseudoLowering.inc"
+
+void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
+ // Do any auto-generated pseudo lowerings.
+ if (emitPseudoExpansionLowering(*OutStreamer, MI))
+ return;
+
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeCSKYAsmPrinter() {
+ RegisterAsmPrinter<CSKYAsmPrinter> X(getTheCSKYTarget());
+}
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
new file mode 100644
index 000000000000..f0f5d8657c04
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -0,0 +1,40 @@
+//===-- CSKYAsmPrinter.h - CSKY implementation of AsmPrinter ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYASMPRINTER_H
+#define LLVM_LIB_TARGET_CSKY_CSKYASMPRINTER_H
+
+#include "CSKYMCInstLower.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCDirectives.h"
+
+namespace llvm {
+class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
+ CSKYMCInstLower MCInstLowering;
+
+ const CSKYSubtarget *Subtarget;
+
+public:
+ explicit CSKYAsmPrinter(TargetMachine &TM,
+ std::unique_ptr<MCStreamer> Streamer);
+
+ StringRef getPassName() const override { return "CSKY Assembly Printer"; }
+
+ /// tblgen'erated driver function for lowering simple MI->MC
+ /// pseudo instructions.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ void emitInstruction(const MachineInstr *MI) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYASMPRINTER_H
diff --git a/llvm/lib/Target/CSKY/CSKYCallingConv.h b/llvm/lib/Target/CSKY/CSKYCallingConv.h
new file mode 100644
index 000000000000..f1048f86264b
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYCallingConv.h
@@ -0,0 +1,63 @@
+//=== CSKYCallingConv.h - CSKY Custom Calling Convention Routines -*-C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the CSKY Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYCALLINGCONV_H
+#define LLVM_LIB_TARGET_CSKY_CSKYCALLINGCONV_H
+
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+static bool CC_CSKY_ABIV2_SOFT_64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+
+ static const MCPhysReg ArgGPRs[] = {CSKY::R0, CSKY::R1, CSKY::R2, CSKY::R3};
+ Register Reg = State.AllocateReg(ArgGPRs);
+ LocVT = MVT::i32;
+ if (!Reg) {
+ unsigned StackOffset = State.AllocateStack(8, Align(4));
+ State.addLoc(
+ CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ return true;
+ }
+ if (!State.AllocateReg(ArgGPRs))
+ State.AllocateStack(4, Align(4));
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+}
+
+static bool Ret_CSKY_ABIV2_SOFT_64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+
+ static const MCPhysReg ArgGPRs[] = {CSKY::R0, CSKY::R1};
+ Register Reg = State.AllocateReg(ArgGPRs);
+ LocVT = MVT::i32;
+ if (!Reg)
+ return false;
+
+ if (!State.AllocateReg(ArgGPRs))
+ return false;
+
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return true;
+}
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/CSKY/CSKYCallingConv.td b/llvm/lib/Target/CSKY/CSKYCallingConv.td
new file mode 100644
index 000000000000..87e2e6b9dc31
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYCallingConv.td
@@ -0,0 +1,82 @@
+//===-- CSKYCallingConv.td - Calling Conventions CSKY ----*- tablegen -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the CSKY architecture.
+//
+//===----------------------------------------------------------------------===//
+
+def CSR_I32 : CalleeSavedRegs<(add R8, R15, (sequence "R%u", 4, 7),
+ (sequence "R%u", 9, 11), (sequence "R%u", 16, 17), R28)>;
+def CSR_GPR_FPR32 : CalleeSavedRegs<(add CSR_I32, (sequence "F%u_32", 8, 15))>;
+def CSR_GPR_FPR64 : CalleeSavedRegs<(add CSR_I32,
+ (sequence "F%u_64", 8, 15))>;
+
+// Interrupt handler needs to save/restore all registers that are used,
+// both Caller and Callee saved registers.
+def CSR_GPR_ISR : CalleeSavedRegs<(add R8, R15,
+ (sequence "R%u", 0, 3),
+ (sequence "R%u", 4, 7),
+ (sequence "R%u", 9, 13),
+ (sequence "R%u", 16, 31))>;
+
+def CSR_GPR_FPR32_ISR: CalleeSavedRegs<(add CSR_GPR_ISR,
+ (sequence "F%u_32", 0, 15))>;
+def CSR_GPR_FPR64_ISR: CalleeSavedRegs<(add CSR_GPR_ISR,
+ (sequence "F%u_64", 0, 15))>;
+
+def CSR_GPR_FPR32v3_ISR: CalleeSavedRegs<(add CSR_GPR_FPR32_ISR,
+ (sequence "F%u_32", 16, 31))>;
+def CSR_GPR_FPR64v3_ISR: CalleeSavedRegs<(add CSR_GPR_FPR64_ISR,
+ (sequence "F%u_64", 16, 31))>;
+
+// Needed for implementation of CSKYRegisterInfo::getNoPreservedMask()
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
+def CC_CSKY_ABIV2_SOFT : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[v2i16, v4i8], CCAssignToStack<4, 4>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[f32], CCAssignToStack<4, 4>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCCustom<"CC_CSKY_ABIV2_SOFT_64">>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>
+]>;
+
+def RetCC_CSKY_ABIV2_SOFT : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[f64], CCCustom<"Ret_CSKY_ABIV2_SOFT_64">>
+]>;
+
+def CC_CSKY_ABIV2_FP : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[v2i16, v4i8], CCAssignToStack<4, 4>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[f32], CCAssignToReg<[F0_32, F1_32, F2_32, F3_32]>>,
+ CCIfType<[f32], CCAssignToStack<4, 4>>,
+ CCIfType<[f64], CCAssignToReg<[F0_64, F1_64, F2_64, F3_64]>>,
+ CCIfType<[f64], CCAssignToStack<8, 4>>
+]>;
+
+def RetCC_CSKY_ABIV2_FP : CallingConv<[
+ // DSP types
+ CCIfType<[v2i16, v4i8], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[i32], CCAssignToReg<[R0, R1]>>,
+ CCIfType<[f32], CCAssignToReg<[F0_32]>>,
+ CCIfType<[f64], CCAssignToReg<[F0_64]>>
+]>; \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
new file mode 100644
index 000000000000..9b22c95cfe21
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.cpp
@@ -0,0 +1,57 @@
+//===-- CSKYFrameLowering.cpp - CSKY Frame Information ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYFrameLowering.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/MC/MCDwarf.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-frame-lowering"
+
+// Returns the register used to hold the frame pointer.
+static Register getFPReg(const CSKYSubtarget &STI) { return CSKY::R8; }
+
+// To avoid the BP value clobbered by a function call, we need to choose a
+// callee saved register to save the value.
+static Register getBPReg(const CSKYSubtarget &STI) { return CSKY::R7; }
+
+bool CSKYFrameLowering::hasFP(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
+ MFI.isFrameAddressTaken();
+}
+
+bool CSKYFrameLowering::hasBP(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ return MFI.hasVarSizedObjects();
+}
+
+void CSKYFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // FIXME: Implement this when we have function calls
+}
+
+void CSKYFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ // FIXME: Implement this when we have function calls
+} \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYFrameLowering.h b/llvm/lib/Target/CSKY/CSKYFrameLowering.h
new file mode 100644
index 000000000000..49921a1866bc
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYFrameLowering.h
@@ -0,0 +1,38 @@
+//===-- CSKYFrameLowering.h - Define frame lowering for CSKY -*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements CSKY-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYFRAMELOWERING_H
+#define LLVM_LIB_TARGET_CSKY_CSKYFRAMELOWERING_H
+
+#include "llvm/CodeGen/TargetFrameLowering.h"
+
+namespace llvm {
+class CSKYSubtarget;
+
+class CSKYFrameLowering : public TargetFrameLowering {
+ const CSKYSubtarget &STI;
+
+public:
+ explicit CSKYFrameLowering(const CSKYSubtarget &STI)
+ : TargetFrameLowering(StackGrowsDown,
+ /*StackAlignment=*/Align(4),
+ /*LocalAreaOffset=*/0),
+ STI(STI) {}
+
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasBP(const MachineFunction &MF) const;
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
new file mode 100644
index 000000000000..fc9ef8bfd9d9
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
@@ -0,0 +1,75 @@
+//===-- CSKYISelDAGToDAG.cpp - A dag to dag inst selector for CSKY---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the CSKY target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
+#include "CSKYTargetMachine.h"
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-isel"
+
+namespace {
+class CSKYDAGToDAGISel : public SelectionDAGISel {
+ const CSKYSubtarget *Subtarget;
+
+public:
+ explicit CSKYDAGToDAGISel(CSKYTargetMachine &TM) : SelectionDAGISel(TM) {}
+
+ StringRef getPassName() const override {
+ return "CSKY DAG->DAG Pattern Instruction Selection";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ Subtarget = &MF.getSubtarget<CSKYSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ void Select(SDNode *N) override;
+
+#include "CSKYGenDAGISel.inc"
+};
+} // namespace
+
+void CSKYDAGToDAGISel::Select(SDNode *N) {
+ // If we have a custom node, we have already selected
+ if (N->isMachineOpcode()) {
+ LLVM_DEBUG(dbgs() << "== "; N->dump(CurDAG); dbgs() << "\n");
+ N->setNodeId(-1);
+ return;
+ }
+
+ SDLoc Dl(N);
+ unsigned Opcode = N->getOpcode();
+ bool IsSelected = false;
+
+ switch (Opcode) {
+ default:
+ break;
+ // FIXME: Add selection nodes needed later.
+ }
+
+ if (IsSelected)
+ return;
+
+ // Select the default instruction.
+ SelectCode(N);
+}
+
+FunctionPass *llvm::createCSKYISelDag(CSKYTargetMachine &TM) {
+ return new CSKYDAGToDAGISel(TM);
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
new file mode 100644
index 000000000000..ac6d069e592c
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -0,0 +1,346 @@
+//===-- CSKYISelLowering.cpp - CSKY DAG Lowering Implementation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that CSKY uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYISelLowering.h"
+#include "CSKYCallingConv.h"
+#include "CSKYMachineFunctionInfo.h"
+#include "CSKYRegisterInfo.h"
+#include "CSKYSubtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-isel-lowering"
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+#include "CSKYGenCallingConv.inc"
+
+static const MCPhysReg GPRArgRegs[] = {CSKY::R0, CSKY::R1, CSKY::R2, CSKY::R3};
+
+CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
+ const CSKYSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
+ // Register Class
+ addRegisterClass(MVT::i32, &CSKY::GPRRegClass);
+
+ // Compute derived properties from the register classes.
+ computeRegisterProperties(STI.getRegisterInfo());
+
+ setBooleanContents(UndefinedBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ // TODO: Add atomic support fully.
+ setMaxAtomicSizeInBitsSupported(0);
+
+ setStackPointerRegisterToSaveRestore(CSKY::R14);
+ const Align FunctionAlignment(2);
+ setMinFunctionAlignment(FunctionAlignment);
+ setSchedulingPreference(Sched::Source);
+}
+
+EVT CSKYTargetLowering::getSetCCResultType(const DataLayout &DL,
+ LLVMContext &Context, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+
+ return VT.changeVectorElementTypeToInteger();
+}
+
+static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ EVT LocVT = VA.getLocVT();
+
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
+ break;
+ }
+ return Val;
+}
+
+static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
+ const CCValAssign &VA, const SDLoc &DL) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+ break;
+ }
+ return Val;
+}
+
+static SDValue unpackFromRegLoc(const CSKYSubtarget &Subtarget,
+ SelectionDAG &DAG, SDValue Chain,
+ const CCValAssign &VA, const SDLoc &DL) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ EVT LocVT = VA.getLocVT();
+ SDValue Val;
+ const TargetRegisterClass *RC;
+
+ switch (LocVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected register type");
+ case MVT::i32:
+ RC = &CSKY::GPRRegClass;
+ break;
+ }
+
+ Register VReg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
+
+ return convertLocVTToValVT(DAG, Val, VA, DL);
+}
+
+static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
+ const CCValAssign &VA, const SDLoc &DL) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ EVT LocVT = VA.getLocVT();
+ EVT ValVT = VA.getValVT();
+ EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
+ int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
+ VA.getLocMemOffset(), /*Immutable=*/true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val;
+
+ ISD::LoadExtType ExtType;
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unexpected CCValAssign::LocInfo");
+ case CCValAssign::Full:
+ case CCValAssign::BCvt:
+ ExtType = ISD::NON_EXTLOAD;
+ break;
+ }
+ Val = DAG.getExtLoad(
+ ExtType, DL, LocVT, Chain, FIN,
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
+ return Val;
+}
+
+// Transform physical registers into virtual registers.
+SDValue CSKYTargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+ switch (CallConv) {
+ default:
+ report_fatal_error("Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ break;
+ }
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Used with vargs to acumulate store chains.
+ std::vector<SDValue> OutChains;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, IsVarArg));
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue ArgValue;
+
+ if (VA.isRegLoc())
+ ArgValue = unpackFromRegLoc(Subtarget, DAG, Chain, VA, DL);
+ else
+ ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
+
+ InVals.push_back(ArgValue);
+ }
+
+ if (IsVarArg) {
+ const unsigned XLenInBytes = 4;
+ const MVT XLenVT = MVT::i32;
+
+ ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(GPRArgRegs);
+ unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
+ const TargetRegisterClass *RC = &CSKY::GPRRegClass;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ CSKYMachineFunctionInfo *CSKYFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+ // Offset of the first variable argument from stack pointer, and size of
+ // the vararg save area. For now, the varargs save area is either zero or
+ // large enough to hold a0-a4.
+ int VaArgOffset, VarArgsSaveSize;
+
+ // If all registers are allocated, then all varargs must be passed on the
+ // stack and we don't need to save any argregs.
+ if (ArgRegs.size() == Idx) {
+ VaArgOffset = CCInfo.getNextStackOffset();
+ VarArgsSaveSize = 0;
+ } else {
+ VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
+ VaArgOffset = -VarArgsSaveSize;
+ }
+
+ // Record the frame index of the first variable argument
+ // which is a value necessary to VASTART.
+ int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
+ CSKYFI->setVarArgsFrameIndex(FI);
+
+ // Copy the integer registers that may have been used for passing varargs
+ // to the vararg save area.
+ for (unsigned I = Idx; I < ArgRegs.size();
+ ++I, VaArgOffset += XLenInBytes) {
+ const Register Reg = RegInfo.createVirtualRegister(RC);
+ RegInfo.addLiveIn(ArgRegs[I], Reg);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
+ FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
+ MachinePointerInfo::getFixedStack(MF, FI));
+ cast<StoreSDNode>(Store.getNode())
+ ->getMemOperand()
+ ->setValue((Value *)nullptr);
+ OutChains.push_back(Store);
+ }
+ CSKYFI->setVarArgsSaveSize(VarArgsSaveSize);
+ }
+
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens for vararg functions.
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+ }
+
+ return Chain;
+}
+
+bool CSKYTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> CSKYLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, CSKYLocs, Context);
+ return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
+}
+
+SDValue
+CSKYTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SDLoc &DL, SelectionDAG &DAG) const {
+ // Stores the assignment of the return value to a location.
+ SmallVector<CCValAssign, 16> CSKYLocs;
+
+ // Info about the registers and stack slot.
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), CSKYLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
+
+ SDValue Glue;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, e = CSKYLocs.size(); i < e; ++i) {
+ SDValue Val = OutVals[i];
+ CCValAssign &VA = CSKYLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ bool IsF64OnCSKY = VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
+
+ if (IsF64OnCSKY) {
+
+ assert(VA.isRegLoc() && "Expected return via registers");
+ SDValue Split64 = DAG.getNode(CSKYISD::BITCAST_TO_LOHI, DL,
+ DAG.getVTList(MVT::i32, MVT::i32), Val);
+ SDValue Lo = Split64.getValue(0);
+ SDValue Hi = Split64.getValue(1);
+
+ Register RegLo = VA.getLocReg();
+ assert(RegLo < CSKY::R31 && "Invalid register pair");
+ Register RegHi = RegLo + 1;
+
+ Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
+ Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
+ } else {
+ // Handle a 'normal' return.
+ Val = convertValVTToLocVT(DAG, Val, VA, DL);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
+
+ // Guarantee that all emitted copies are stuck together.
+ Glue = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the glue node if we have it.
+ if (Glue.getNode()) {
+ RetOps.push_back(Glue);
+ }
+
+ // Interrupt service routines use different return instructions.
+ if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt"))
+ return DAG.getNode(CSKYISD::NIR, DL, MVT::Other, RetOps);
+
+ return DAG.getNode(CSKYISD::RET, DL, MVT::Other, RetOps);
+}
+
+CCAssignFn *CSKYTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
+ bool IsVarArg) const {
+ if (IsVarArg || !Subtarget.useHardFloatABI())
+ return RetCC_CSKY_ABIV2_SOFT;
+ else
+ return RetCC_CSKY_ABIV2_FP;
+}
+
+CCAssignFn *CSKYTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) const {
+ if (IsVarArg || !Subtarget.useHardFloatABI())
+ return CC_CSKY_ABIV2_SOFT;
+ else
+ return CC_CSKY_ABIV2_FP;
+}
+
+const char *CSKYTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("unknown CSKYISD node");
+ case CSKYISD::NIE:
+ return "CSKYISD::NIE";
+ case CSKYISD::NIR:
+ return "CSKYISD::NIR";
+ case CSKYISD::RET:
+ return "CSKYISD::RET";
+ case CSKYISD::BITCAST_TO_LOHI:
+ return "CSKYISD::BITCAST_TO_LOHI";
+ }
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h
new file mode 100644
index 000000000000..7557c11f50a8
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -0,0 +1,69 @@
+//===-- CSKYISelLowering.cpp - CSKY DAG Lowering Implementation ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that CSKY uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYISELLOWERING_H
+#define LLVM_LIB_TARGET_CSKY_CSKYISELLOWERING_H
+
+#include "MCTargetDesc/CSKYBaseInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/TargetLowering.h"
+
+namespace llvm {
+class CSKYSubtarget;
+
+namespace CSKYISD {
+enum NodeType : unsigned {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ NIE,
+ NIR,
+ RET,
+ BITCAST_TO_LOHI
+};
+}
+
+class CSKYTargetLowering : public TargetLowering {
+ const CSKYSubtarget &Subtarget;
+
+public:
+ explicit CSKYTargetLowering(const TargetMachine &TM,
+ const CSKYSubtarget &STI);
+
+ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+ EVT VT) const override;
+
+private:
+ SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const SDLoc &DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
+ SelectionDAG &DAG) const override;
+
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
+ CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYISELLOWERING_H
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats.td b/llvm/lib/Target/CSKY/CSKYInstrFormats.td
index dd71b693bbbb..9b6ef9ca23db 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrFormats.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormats.td
@@ -24,7 +24,7 @@ class CSKYInst<AddrMode am, int sz, dag outs, dag ins, string asmstr,
let Namespace = "CSKY";
int Size = sz;
AddrMode AM = am;
-
+ field bits<32> SoftFail = 0;
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asmstr;
@@ -46,6 +46,11 @@ class CSKY32Inst<AddrMode am, bits<6> opcode, dag outs, dag ins, string asmstr,
let Inst{31 - 26} = opcode;
}
+class CSKY16Inst<AddrMode am, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : CSKYInst<am, 2, outs, ins, asmstr, pattern> {
+ field bits<16> Inst;
+}
+
// CSKY 32-bit instruction
// Format< OP[6] | Offset[26] >
// Instruction(1): bsr32
@@ -157,19 +162,7 @@ class I_16_RET<bits<5> sop, bits<5> pcode, string op, list<dag> pattern>
let isTerminator = 1;
let isReturn = 1;
let isBarrier = 1;
-}
-
-// Instructions(1): rte32
-class I_16_RET_I<bits<5> sop, bits<5> pcode, string op, list<dag> pattern>
- : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), op, pattern> {
- let Inst{25 - 21} = sop;
- let Inst{20 - 16} = pcode;
- let Inst{15 - 10} = 0x10;
- let Inst{9 - 5} = 1;
- let Inst{4 - 0} = 0;
- let isTerminator = 1;
- let isReturn = 1;
- let isBarrier = 1;
+ let Uses = [ R15 ];
}
// Format< OP[6] | SOP[5] | RX[5] | IMM16[16] >
@@ -227,14 +220,27 @@ class I_LDST<AddrMode am, bits<6> opcode, bits<4> sop, dag outs, dag ins,
let Inst{11 - 0} = imm12;
}
+class I_PLDR<AddrMode am, bits<6> opcode, bits<4> sop, dag outs, dag ins,
+ string op, list<dag> pattern>
+ : CSKY32Inst<am, opcode, outs, ins, !strconcat(op, "\t($rx, ${imm12})"),
+ pattern> {
+ bits<5> rx;
+ bits<12> imm12;
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = imm12;
+}
+
+
// Format< OP[6] | RZ[5] | RX[5] | SOP[4] | OFFSET[12] >
-// Instructions(6): ld32.b, ld32.bs, ld32.h, ld32.hs, ld32.w, ld32.d
+// Instructions(6): ld32.b, ld32.bs, ld32.h, ld32.hs, ld32.w
class I_LD<AddrMode am, bits<4> sop, string op, Operand operand>
: I_LDST<am, 0x36, sop,
(outs GPR:$rz), (ins GPR:$rx, operand:$imm12), op, []>;
// Format< OP[6] | RZ[5] | RX[5] | SOP[4] | OFFSET[12] >
-// Instructions(4): st32.b, st32.h, st32.w, st32.d
+// Instructions(4): st32.b, st32.h, st32.w
class I_ST<AddrMode am, bits<4> sop, string op, Operand operand>
: I_LDST<am, 0x37, sop, (outs),
(ins GPR:$rz, GPR:$rx, operand:$imm12), op, []>;
@@ -249,6 +255,8 @@ class I_12_PP<bits<5> sop, bits<5> pcode, dag outs, dag ins, string op>
let Inst{20 - 16} = pcode;
let Inst{15 - 12} = 0;
let Inst{11 - 0} = regs;
+ let Uses = [R14];
+ let Defs = [R14];
}
// Format< OP[6] | RZ[5] | RX[5] | SOP[6] | PCODE[5] | IMM[5]>
@@ -256,7 +264,7 @@ class I_12_PP<bits<5> sop, bits<5> pcode, dag outs, dag ins, string op>
class I_5_ZX<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
list<dag> pattern>
: CSKY32Inst<AddrModeNone, 0x31, (outs GPR:$rz),
- (ins GPR:$false, GPR:$rx, ImmType:$imm5),
+ (ins CARRY:$cond, GPR:$false, GPR:$rx, ImmType:$imm5),
!strconcat(op, "\t$rz, $rx, $imm5"), pattern> {
bits<5> rz;
bits<5> rx;
@@ -272,9 +280,9 @@ class I_5_ZX<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
// Format< OP[6] | IMM[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5]>
// Instructions(13): decgt32, declt32, decne32, lsli32, lslc32, lsri32
// lsrc32, asri32, asrc32, rotli32, xsr32, bclri32, bseti32
-class I_5_XZ<bits<6> sop, bits<5> pcode, string op, dag ins, dag outs,
+class I_5_XZ<bits<6> sop, bits<5> pcode, string op, dag outs, dag ins,
list<dag> pattern>
- : CSKY32Inst<AddrModeNone, 0x31, ins, outs,
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins,
!strconcat(op, "\t$rz, $rx, $imm5"), pattern> {
bits<5> imm5;
bits<5> rx;
@@ -286,19 +294,107 @@ class I_5_XZ<bits<6> sop, bits<5> pcode, string op, dag ins, dag outs,
let Inst{4 - 0} = rz;
}
+// mtcr32, mfcr32
+class I_5_XZ_CR<bits<6> sop, bits<5> pcode, string opStr, dag outs, dag ins,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x30, outs, ins, opStr, pattern> {
+ bits<5> sel;
+ bits<5> rx;
+ bits<5> cr;
+ let Inst{25 - 21} = sel;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = cr;
+}
+
+// sync
+class I_5_XZ_SYNC<bits<6> sop, bits<5> pcode, string opStr, bits<1> S, bits<1> I>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), opStr, []> {
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+ let Inst{25} = S;
+ let Inst{21} = I;
+
+}
+
+// Priviledged Instructions
+class I_5_XZ_PRIVI<bits<6> sop, bits<5> pcode, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), opStr, []> {
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+}
+
+class I_CP<bits<4> sop, dag outs, dag ins, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x3f, outs, ins, opStr, []> {
+ bits<5> cpid;
+ bits<12> usdef;
+ let Inst{25 - 21} = cpid;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = usdef;
+}
+
+class I_CPOP<dag outs, dag ins, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x3f, outs, ins, opStr, []> {
+ bits<5> cpid;
+ bits<20> usdef;
+ let Inst{25 - 21} = cpid;
+ let Inst{20 - 16} = usdef{19-15};
+ let Inst{15} = 1;
+ let Inst{14 - 0} = usdef{14-0};
+}
+
+class I_CP_Z<bits<4> sop, dag outs, dag ins, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x3f, outs, ins, opStr, []> {
+ bits<5> cpid;
+ bits<12> usdef;
+ bits<5> rz;
+
+ let Inst{25 - 21} = cpid;
+ let Inst{20 - 16} = rz;
+ let Inst{15 - 12} = sop;
+ let Inst{11 - 0} = usdef;
+}
+
+class I_5_CACHE<bits<6> sop, bits<5> pcode, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), opStr, []> {
+ let Inst{25 - 21} = pcode;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+class I_5_X_CACHE<bits<6> sop, bits<5> pcode, string opStr>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins GPR:$rx), opStr #"\t$rx", []> {
+ bits<5> rx;
+
+ let Inst{25 - 21} = pcode;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | IMM[5]>
// Instructions(2): ldm32, (ldq32), stm32, (stq32)
-class I_5_YX<bits<6> opcode, dag outs, dag ins, string op, list<dag> pattern,
- bits<5> imm5>
- : CSKY32Inst<AddrModeNone, opcode, outs, ins,
- op #"\t${ry}, (${rx}), " #!cast<int>(imm5), pattern> {
+class I_5_YX<bits<6> opcode, bits<6> sop, dag outs, dag ins, string opStr, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, opcode, outs, ins, opStr, pattern> {
+ bits<10> regs;
bits<5> rx;
- bits<5> ry;
- let Inst{25 - 21} = ry; // ry
+
+ let Inst{25 - 21} = regs{9 - 5}; // ry
let Inst{20 - 16} = rx;
- let Inst{15 - 10} = 0b000111;
+ let Inst{15 - 10} = sop;
let Inst{9 - 5} = 0b00001;
- let Inst{4 - 0} = imm5{4 - 0}; // imm5
+ let Inst{4 - 0} = regs{4 - 0}; // imm5
}
// Format< OP[6] | LSB[5] | RX[5] | SOP[6] | MSB[5] | RZ[5]>
@@ -317,14 +413,33 @@ class I_5_XZ_U<bits<6> sop, dag outs, dag ins, string op, list<dag> pattern>
let Inst{4 - 0} = rz;
}
-// sextb, sexth
-class I_5_XZ_US<bits<6> sop, string op, SDNode opnode,
- ValueType type> : I_5_XZ_U<sop, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), op,
- [(set GPR:$rz, (opnode GPR:$rx, type))]>;
+class I_5_XZ_INS<bits<6> sop, dag outs, dag ins, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins, op #"\t$rz, $rx, $msb, $lsb",
+ pattern> {
+ bits<5> rx;
+ bits<5> rz;
+ bits<5> msb;
+ bits<5> lsb;
+ let Inst{25 - 21} = rz;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = msb;
+ let Inst{4 - 0} = lsb;
+}
-class I_5_XZ_UZ<bits<6> sop, string op, int v>
- : I_5_XZ_U<sop, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), op,
- [(set GPR:$rz, (and GPR:$rx, (i32 v)))]>;
+// Format< OP[6] | LSB[5] | RX[5] | SOP[6] | MSB[5] | RZ[5]>
+// Instructions(6): zext32, zextb32, zexth32, sext32, sextb32, sexth32
+class I_5_XZ_U2<bits<6> sop, bits<5> lsb, bits<5> msb, dag outs, dag ins,
+ string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins, !strconcat(op, "\t$rz, $rx"), pattern> {
+ bits<5> rx;
+ bits<5> rz;
+ let Inst{25 - 21} = lsb; // lsb
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = msb; // msb
+ let Inst{4 - 0} = rz;
+}
// Format< OP[6] | RZ[5] | RX[5] | SOP[6] | SIZE[5] | LSB[5]>
// Instructions(1): ins32
@@ -341,6 +456,16 @@ class I_5_ZX_U<bits<6> sop, string op, Operand operand, list<dag> pattern>
let Inst{4 - 0} = size_lsb{4 - 0}; // lsb
}
+// sextb, sexth
+class I_5_XZ_US<bits<6> sop, bits<5> lsb, bits<5> msb, string op,
+ SDNode opnode, ValueType type>
+ : I_5_XZ_U2<sop, lsb, msb, (outs GPR:$rz), (ins GPR:$rx), op,
+ [(set GPR:$rz, (opnode GPR:$rx, type))]>;
+
+class I_5_XZ_UZ<bits<6> sop, bits<5> lsb, bits<5> msb, string op, int v>
+ : I_5_XZ_U2<sop, lsb, msb, (outs GPR:$rz), (ins GPR:$rx), op,
+ [(set GPR:$rz, (and GPR:$rx, (i32 v)))]>;
+
// Format< OP[6] | IMM[5] | RX[5] | SOP[6] | PCODE[5] | 00000 >
// Instructions(1): btsti32
class I_5_X<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
@@ -373,6 +498,18 @@ class I_5_Z<bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
let Inst{4 - 0} = rz;
}
+class I_5_IMM5<bits<6> opcode, bits<6> sop, bits<5> pcode, string op, ImmLeaf ImmType,
+ list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, opcode, (outs), (ins ImmType:$imm5),
+ !strconcat(op, "\t$imm5"), pattern> {
+ bits<5> imm5;
+ let Inst{25 - 21} = imm5;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = sop;
+ let Inst{9 - 5} = pcode;
+ let Inst{4 - 0} = 0;
+}
+
// Format< OP[6] | RY[5] | RX[5] | SOP[6] | PCODE[5] | RZ[5] >
// Instructions(24): addu32, addc32, subu32, subc32, (rsub32), ixh32, ixw32,
// ixd32, and32, andn32, or32, xor32, nor32, lsl32, lsr32, asr32, rotl32
@@ -493,9 +630,8 @@ class R_ZX<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
// Format< OP[6] | 00000[5] | RX[5] | SOP[6] | PCODE[5] | 00000[5] >
// Instructions:(1) tstnbz32
-class R_X<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
- : CSKY32Inst<AddrModeNone, 0x31, (outs CARRY:$ca),(ins GPR:$rx),
- !strconcat(op, "\t$rx"), pattern> {
+class R_X<bits<6> sop, bits<5> pcode, dag outs, dag ins, string op, list<dag> pattern>
+ : CSKY32Inst<AddrModeNone, 0x31, outs, ins, !strconcat(op, "\t$rx"), pattern> {
bits<5> rx;
let Inst{25 - 21} = 0;
let Inst{20 - 16} = rx;
@@ -530,3 +666,14 @@ class R_Z_2<bits<6> sop, bits<5> pcode, string op, list<dag> pattern>
let Inst{4 - 0} = 0;
let Constraints = "$rz = $false";
}
+
+class BAR<bits<5> sop, string op, bits<1> signed>
+ : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins), op, []> {
+ let Inst{25} = signed;
+ let Inst{24 - 16} = 0;
+ let Inst{15 - 5} = 0x421;
+ let Inst{4 - 0} = sop;
+ let hasSideEffects = 1;
+ let mayLoad = 0;
+ let mayStore = 0;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td
new file mode 100644
index 000000000000..6d42bddcdd78
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormats16Instr.td
@@ -0,0 +1,219 @@
+//===- CSKYInstrFormats16Instr.td - 16-bit Instr. Formats -*- tablegen --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+class J16<bits<5> sop, string opstr, dag ins>
+ : CSKY16Inst<AddrModeNone, (outs), ins,
+ !strconcat(opstr, "\t$offset"), []> {
+ bits<10> offset;
+ let Inst{15} = 0;
+ let Inst{14 - 10} = sop;
+ let Inst{9 - 0} = offset;
+}
+
+class J16_B<bits<5> sop, string opstr>
+ : CSKY16Inst<AddrModeNone, (outs), (ins CARRY:$ca, br_symbol_16bit:$offset),
+ !strconcat(opstr, "\t$offset"), []> {
+ bits<10> offset;
+ let Inst{15} = 0;
+ let Inst{14 - 10} = sop;
+ let Inst{9 - 0} = offset;
+}
+
+class R16_XYZ<bits<2> sop, string opstr, SDNode opnode> : CSKY16Inst<AddrModeNone,
+ (outs mGPR:$rz), (ins mGPR:$rx, mGPR:$ry), !strconcat(opstr, "\t$rz, $rx, $ry"),
+ [(set mGPR:$rz, (opnode mGPR:$rx, mGPR:$ry)) ]> {
+ bits<3> rz;
+ bits<3> rx;
+ bits<3> ry;
+ let Inst{15 - 11} = 0b01011;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 2} = ry;
+ let Inst{1, 0} = sop;
+}
+
+class R16_XZ_BINOP<bits<4> op, bits<2> sop, string opstr, PatFrag opnode> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rZ, sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"),
+ [(set sGPR:$rz, (opnode sGPR:$rZ, sGPR:$rx))]> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let Constraints = "$rz = $rZ";
+}
+
+class R16_XZ_BINOP_NOPat<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rZ, sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"),
+ []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let Constraints = "$rz = $rZ";
+}
+
+class R16_XZ_BINOP_C<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz, CARRY:$cout),
+ (ins sGPR:$rZ, sGPR:$rx, CARRY:$cin), !strconcat(opstr, "\t$rz, $rx"), []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let Constraints = "$rz = $rZ";
+}
+
+class R16_XZ_UNOP<bits<4> op, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rx), !strconcat(opstr, "\t$rz, $rx"),
+ []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = op;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+}
+
+class R16_XY_CMP<bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx, sGPR:$ry), !strconcat(opstr, "\t$rx, $ry"),
+ []> {
+ bits<4> ry;
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = ry;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+ let isCompare = 1;
+}
+
+class R16_X_J<bits<8> op_rz, bits<2> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs), (ins sGPR:$rx), !strconcat(opstr, "\t$rx"), []> {
+ bits<4> rx;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 6} = op_rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1, 0} = sop;
+}
+
+class I16_Z_8<bits<3> op, dag ins, string asmstr>
+ : CSKY16Inst<AddrModeNone, (outs mGPR:$rz), ins, asmstr, []> {
+ bits<3> rz;
+ bits<8> imm8;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 11} = op;
+ let Inst{10 - 8} = rz;
+ let Inst{7 - 0} = imm8;
+}
+
+class I16_Z_5<bits<3> sop, dag outs, dag ins,string opstr>
+ : CSKY16Inst<AddrModeNone, outs, ins,
+ !strconcat(opstr, "\t$rz, $imm5"), []> {
+ bits<3> rz;
+ bits<5> imm5;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 11} = 0b111;
+ let Inst{10 - 8} = rz;
+ let Inst{7 - 5} = sop;
+ let Inst{4 - 0} = imm5;
+}
+
+class I16_X_CMP<bits<3> sop, string opstr, Operand Immoperand> : CSKY16Inst<
+ AddrModeNone, (outs CARRY:$ca), (ins mGPR:$rx, Immoperand:$imm5),
+ !strconcat(opstr, "\t$rx, $imm5"), []> {
+ bits<3> rx;
+ bits<5> imm5;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 11} = 0b111;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = sop;
+ let Inst{4 - 0} = imm5;
+ let isCompare = 1;
+}
+
+class I16_SP_IMM7<bits<3> sop, string opstr> : CSKY16Inst<
+ AddrModeNone, (outs SPOp:$sp2), (ins SPOp:$sp1, uimm7_2:$imm7),
+ !strconcat(opstr, "\t$sp2, $sp1, $imm7"), []> {
+ bits<7> imm7;
+ let Inst{15, 14} = 0b00;
+ let Inst{13 - 10} = 0b0101;
+ let Inst{9, 8} = imm7{6,5};
+ let Inst{7 - 5} = sop;
+ let Inst{4 - 0} = imm7{4 - 0};
+}
+
+class I16_XZ_IMM5<bits<3> sop, string opstr, SDNode opnode> : CSKY16Inst<
+ AddrModeNone, (outs mGPR:$rz), (ins mGPR:$rx, uimm5:$imm5),
+ !strconcat(opstr, "\t$rz, $rx, $imm5"), [(set mGPR:$rz, (opnode mGPR:$rx, uimm5:$imm5))]> {
+ bits<3> rx;
+ bits<3> rz;
+ bits<5> imm5;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 11} = sop;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = imm5;
+}
+
+class I16_XZ_LDST<AddrMode am, bits<3> sop, string opstr, dag outs, dag ins>
+ : CSKY16Inst<am, outs, ins, !strconcat(opstr, "\t$rz, ($rx, ${imm})"),
+ []> {
+ bits<3> rx;
+ bits<3> rz;
+ bits<5> imm;
+ let Inst{15, 14} = 0b10;
+ let Inst{13 - 11} = sop;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = imm;
+}
+
+class I16_ZSP_LDST<AddrMode am, bits<3> sop, string opstr, dag outs, dag ins> : CSKY16Inst<
+ am, outs, ins, !strconcat(opstr, "\t$rz, ($sp, ${addr})"),
+ []> {
+ bits<3> rz;
+ bits<8> addr;
+ let Inst{15, 14} = 0b10;
+ let Inst{13 - 11} = sop;
+ let Inst{10 - 8} = addr{7 - 5};
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = addr{4 - 0};
+}
+
+class I16_XZ_IMM3<bits<2> sop, string opstr, SDNode opnode> : CSKY16Inst<
+ AddrModeNone, (outs mGPR:$rz), (ins mGPR:$rx, oimm3:$oimm3),
+ !strconcat(opstr, "\t$rz, $rx, $oimm3"), [(set mGPR:$rz, (opnode mGPR:$rx, oimm3:$oimm3))]> {
+ bits<3> rx;
+ bits<3> rz;
+ bits<3> oimm3;
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 11} = 0b011;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 2} = oimm3;
+ let Inst{1, 0} = sop;
+}
+
+class I16_BPushPop<bits<11> op, bits<2> uop, dag out, dag ins, string opstr> :
+ CSKY16Inst<AddrModeNone, out, ins, opstr, []>{
+ bits<3> rz;
+ let Inst{15- 5} = op;
+ let Inst{4 -2} = rz;
+ let Inst{1,0} = uop;
+ let Predicates = [HasJAVA];
+ let hasSideEffects = 1;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
new file mode 100644
index 000000000000..e12235cf9478
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -0,0 +1,25 @@
+//===-- CSKYInstrInfo.h - CSKY Instruction Information --------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+
+#define DEBUG_TYPE "csky-instr-info"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_CTOR_DTOR
+#include "CSKYGenInstrInfo.inc"
+
+CSKYInstrInfo::CSKYInstrInfo(CSKYSubtarget &STI)
+ : CSKYGenInstrInfo(CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) {
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
new file mode 100644
index 000000000000..04be9da27b57
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -0,0 +1,36 @@
+//===-- CSKYInstrInfo.h - CSKY Instruction Information --------*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYINSTRINFO_H
+#define LLVM_LIB_TARGET_CSKY_CSKYINSTRINFO_H
+
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "CSKYGenInstrInfo.inc"
+
+namespace llvm {
+
+class CSKYSubtarget;
+
+class CSKYInstrInfo : public CSKYGenInstrInfo {
+protected:
+ const CSKYSubtarget &STI;
+
+public:
+ explicit CSKYInstrInfo(CSKYSubtarget &STI);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYINSTRINFO_H
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 20adda4f9ca2..9dda3159e446 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -15,6 +15,18 @@
// CSKY specific DAG Nodes.
//===----------------------------------------------------------------------===//
+def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
// Target-dependent nodes.
def CSKY_RET : SDNode<"CSKYISD::RET", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -44,6 +56,7 @@ class oimm<int num> : Operand<i32>,
ImmLeaf<i32, "return isUInt<"#num#">(Imm - 1);"> {
let EncoderMethod = "getOImmOpValue";
let ParserMatchClass = OImmAsmOperand<num>;
+ let DecoderMethod = "decodeOImmOperand<"#num#">";
}
class uimm<int num, int shift = 0> : Operand<i32>,
@@ -53,12 +66,14 @@ class uimm<int num, int shift = 0> : Operand<i32>,
!if(!ne(shift, 0),
UImmAsmOperand<num, "Shift"#shift>,
UImmAsmOperand<num>);
+ let DecoderMethod = "decodeUImmOperand<"#num#", "#shift#">";
}
class simm<int num, int shift = 0> : Operand<i32>,
ImmLeaf<i32, "return isShiftedInt<"#num#", "#shift#">(Imm);"> {
let EncoderMethod = "getImmOpValue<"#shift#">";
let ParserMatchClass = SImmAsmOperand<num>;
+ let DecoderMethod = "decodeSImmOperand<"#num#", "#shift#">";
}
def nimm_XFORM : SDNodeXForm<imm, [{
@@ -73,14 +88,19 @@ def uimm32_hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((N->getZExtValue() >> 16) & 0xFFFF,
SDLoc(N), MVT::i32);
}]>;
+def uimm32_lo16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue()& 0xFFFF, SDLoc(N), MVT::i32);
+}]>;
def uimm16_16_xform : Operand<i32>,
ImmLeaf<i32, "return isShiftedUInt<16, 16>(Imm);", uimm32_hi16> {
let ParserMatchClass = UImmAsmOperand<16>;
+ let EncoderMethod = "getImmOpValue";
}
def uimm_shift : Operand<i32>, ImmLeaf<i32, "return isUInt<2>(Imm);"> {
let EncoderMethod = "getImmShiftOpValue";
let ParserMatchClass = UImmAsmOperand<2>;
+ let DecoderMethod = "decodeImmShiftOpValue";
}
def CSKYSymbol : AsmOperandClass {
@@ -94,16 +114,22 @@ def br_symbol : Operand<iPTR> {
let EncoderMethod =
"getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm16_scale2>";
let ParserMatchClass = CSKYSymbol;
+ let DecoderMethod = "decodeSImmOperand<16, 1>";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let OperandType = "OPERAND_PCREL";
}
def call_symbol : Operand<iPTR> {
let ParserMatchClass = CSKYSymbol;
let EncoderMethod = "getCallSymbolOpValue";
+ let DecoderMethod = "decodeSImmOperand<26, 1>";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let OperandType = "OPERAND_PCREL";
}
def Constpool : AsmOperandClass {
- let Name = "ConstpoolSymbol";
- let RenderMethod = "addImmOperands";
+ let Name = "Constpool";
+ let RenderMethod = "addConstpoolOperands";
let DiagnosticType = "InvalidConstpool";
let ParserMethod = "parseConstpoolSymbol";
}
@@ -112,24 +138,132 @@ def constpool_symbol : Operand<iPTR> {
let ParserMatchClass = Constpool;
let EncoderMethod =
"getConstpoolSymbolOpValue<CSKY::fixup_csky_pcrel_uimm16_scale4>";
+ let DecoderMethod = "decodeUImmOperand<16, 2>";
+ let PrintMethod = "printConstpool";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def DataAsmClass : AsmOperandClass {
+ let Name = "DataSymbol";
+ let RenderMethod = "addConstpoolOperands";
+ let DiagnosticType = "InvalidConstpool";
+ let ParserMethod = "parseDataSymbol";
+}
+
+class data_symbol<string reloc, int shift> : Operand<iPTR> {
+ let ParserMatchClass = Constpool;
+ let EncoderMethod =
+ "getDataSymbolOpValue<"#reloc#">";
+ let DecoderMethod = "decodeUImmOperand<18, "#shift#">";
+ let PrintMethod = "printDataSymbol";
}
def bare_symbol : Operand<iPTR> {
let ParserMatchClass = CSKYSymbol;
let EncoderMethod = "getBareSymbolOpValue";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let DecoderMethod = "decodeSImmOperand<18, 1>";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def oimm3 : oimm<3>;
+def oimm4 : oimm<4>;
+def oimm5 : oimm<5>;
+def oimm6 : oimm<6>;
+
+def imm5_idly : Operand<i32>, ImmLeaf<i32,
+ "return Imm <= 32 && Imm >= 0;"> {
+ let EncoderMethod = "getImmOpValueIDLY";
+ let DecoderMethod = "decodeOImmOperand<5>";
}
+def oimm8 : oimm<8>;
def oimm12 : oimm<12>;
def oimm16 : oimm<16>;
def nimm12 : nimm<12>;
+def uimm1 : uimm<1>;
+def uimm2 : uimm<2>;
+
+
+def uimm2_jmpix : Operand<i32>,
+ ImmLeaf<i32, "return Imm == 16 || Imm == 24 || Imm == 32 || Imm == 40;"> {
+ let EncoderMethod = "getImmJMPIX";
+ let DecoderMethod = "decodeJMPIXImmOperand";
+}
+
+def uimm3 : uimm<3>;
+def uimm4 : uimm<4>;
def uimm5 : uimm<5>;
+def uimm5_msb_size : uimm<5> {
+ let EncoderMethod = "getImmOpValueMSBSize";
+}
+
+def uimm5_1 : uimm<5, 1>;
+def uimm5_2 : uimm<5, 2>;
+def uimm6 : uimm<6>;
+def uimm7 : uimm<7>;
+def uimm7_1 : uimm<7, 1>;
+def uimm7_2 : uimm<7, 2>;
+def uimm7_3 : uimm<7, 3>;
+def uimm8 : uimm<8>;
+def uimm8_2 : uimm<8, 2>;
+def uimm8_3 : uimm<8, 3>;
+def uimm8_8 : uimm<8, 8>;
+def uimm8_16 : uimm<8, 16>;
+def uimm8_24 : uimm<8, 24>;
def uimm12 : uimm<12>;
def uimm12_1 : uimm<12, 1>;
def uimm12_2 : uimm<12, 2>;
def uimm16 : uimm<16>;
+def uimm16_8 : uimm<16, 8>;
+def uimm16_16 : uimm<16, 16>;
+def uimm20 : uimm<20>;
+def uimm24 : uimm<24>;
+def uimm24_8 : uimm<24, 8>;
+
+def simm8_2 : simm<8, 2>;
+
+class RegSeqAsmOperand<string Suffix = ""> : AsmOperandClass {
+ let Name = "RegSeq"#Suffix;
+ let RenderMethod = "addRegSeqOperands";
+ let DiagnosticType = "InvalidRegSeq";
+ let ParserMethod = "parseRegSeq";
+}
+
+def regseq : Operand<iPTR> {
+ let EncoderMethod = "getRegisterSeqOpValue";
+ let ParserMatchClass = RegSeqAsmOperand<"">;
+ let PrintMethod = "printRegisterSeq";
+ let DecoderMethod = "DecodeRegSeqOperand";
+ let MIOperandInfo = (ops GPR, uimm5);
+}
+def RegListAsmOperand : AsmOperandClass {
+ let Name = "RegList";
+ let RenderMethod = "addRegListOperands";
+ let DiagnosticType = "InvalidRegList";
+ let ParserMethod = "parseRegList";
+}
+
+def reglist : Operand<iPTR> {
+ let ParserMatchClass = RegListAsmOperand;
+ let PrintMethod = "printRegisterList";
+}
+
+def PSRFlag : AsmOperandClass {
+ let Name = "PSRFlag";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidPSRFlag";
+ let ParserMethod = "parsePSRFlag";
+}
+
+def psrflag : Operand<i32>, ImmLeaf<i32, "return isShiftedUInt<5, 0>(Imm);"> {
+ let EncoderMethod = "getImmOpValue";
+ let ParserMatchClass = PSRFlag;
+ let PrintMethod = "printPSRFlag";
+}
//===----------------------------------------------------------------------===//
// Instruction Formats
@@ -145,12 +279,33 @@ class TriOpFrag<dag res> : PatFrag<(ops node: $LHS, node:$MHS, node:$RHS), res>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
+def eqToAdd : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs), [{
+ return isOrEquivalentToAdd(N);
+}]>;
+
+def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
+
+
+//===----------------------------------------------------------------------===//
+// CSKYPseudo
+//===----------------------------------------------------------------------===//
+
+// Pessimistically assume the stack pointer will be clobbered
+let Defs = [R14], Uses = [R14] in {
+def ADJCALLSTACKDOWN : CSKYPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKDOWN $amt1, $amt2", [(callseq_start timm:$amt1, timm:$amt2)]>;
+def ADJCALLSTACKUP : CSKYPseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ "!ADJCALLSTACKUP $amt1, $amt2", [(callseq_end timm:$amt1, timm:$amt2)]>;
+} // Defs = [R14], Uses = [R14]
//===----------------------------------------------------------------------===//
// Basic ALU instructions.
//===----------------------------------------------------------------------===//
+let Predicates = [iHasE2] in {
+ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ let isAdd = 1 in
def ADDI32 : I_12<0x0, "addi32", add, oimm12>;
def SUBI32 : I_12<0x1, "subi32", sub, oimm12>;
def ORI32 : I_16_ZX<"ori32", uimm16,
@@ -171,11 +326,15 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
(outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5),
[(set GPR:$rz, (rotl GPR:$rx, uimm5:$imm5))]>;
-
+ def ROTRI32 : CSKYPseudo<(outs GPR:$rz), (ins GPR:$rx, oimm5:$imm5),
+ "rotri32 $rz, $rx, $imm5", []>;
+ }
+ let isAdd = 1 in
def ADDU32 : R_YXZ_SP_F1<0x0, 0x1,
BinOpFrag<(add node:$LHS, node:$RHS)>, "addu32", 1>;
def SUBU32 : R_YXZ_SP_F1<0x0, 0x4,
BinOpFrag<(sub node:$LHS, node:$RHS)>, "subu32">;
+
def MULT32 : R_YXZ_SP_F1<0x21, 0x1,
BinOpFrag<(mul node:$LHS, node:$RHS)>, "mult32", 1>;
def AND32 : R_YXZ_SP_F1<0x8, 0x1,
@@ -188,8 +347,16 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
BinOpFrag<(xor node:$LHS, node:$RHS)>, "xor32", 1>;
def NOR32 : R_YXZ_SP_F1<0x9, 0x4,
BinOpFrag<(not (or node:$LHS, node:$RHS))>, "nor32", 1>;
+ let isCodeGenOnly = 1 in
def NOT32 : R_XXZ<0b001001, 0b00100, (outs GPR:$rz), (ins GPR:$rx),
"not32", [(set GPR:$rz, (not GPR:$rx))]>;
+
+ let Size = 8 in
+ def NEG32 : CSKYPseudo<(outs GPR:$rd), (ins GPR:$rx), "neg32 $rd, $rx", []>;
+
+ let Size = 8 in
+ def RSUBI32 : CSKYPseudo<(outs GPR:$rd), (ins GPR:$rx, uimm12:$imm12), "rsubi32 $rd, $rx, $imm12", []>;
+
def LSL32 : R_YXZ_SP_F1<0x10, 0x1,
BinOpFrag<(shl node:$LHS, node:$RHS)>, "lsl32">;
def LSR32 : R_YXZ_SP_F1<0x10, 0x2,
@@ -199,23 +366,37 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
def ROTL32 : R_YXZ_SP_F1<0x10, 0x8,
BinOpFrag<(rotl node:$LHS, (and node:$RHS, 0x1f))>, "rotl32">;
- // TODO: Shift series instr. with carry.
+ def BMASKI32 : I_5_Z<0b010100, 0x1, "bmaski32", oimm5, []>;
+ def LSLC32 : I_5_XZ<0x13, 0x1, "lslc32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5), []>;
+ def LSRC32 : I_5_XZ<0x13, 0x2, "lsrc32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5), []>;
+ def ASRC32 : I_5_XZ<0x13, 0x4, "asrc32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5), []>;
+ def XSR32 : I_5_XZ<0x13, 0x8, "xsr32",
+ (outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, oimm5:$imm5, CARRY:$cin), []>;
def IXH32 : R_YXZ_SP_F1<0x2, 0x1,
BinOpFrag<(add node:$LHS, (shl node:$RHS, (i32 1)))>, "ixh32">;
def IXW32 : R_YXZ_SP_F1<0x2, 0x2,
BinOpFrag<(add node:$LHS, (shl node:$RHS, (i32 2)))>, "ixw32">;
-
+ let Predicates = [iHas2E3] in
def IXD32 : R_YXZ_SP_F1<0x2, 0x4,
BinOpFrag<(add node:$LHS, (shl node:$RHS, (i32 3)))>, "ixd32">;
- let isCommutable = 1 in
+ let isCommutable = 1, isAdd = 1 in
def ADDC32 : R_YXZ<0x31, 0x0, 0x2, (outs GPR:$rz, CARRY:$cout),
(ins GPR:$rx, GPR:$ry, CARRY:$cin), "addc32", []>;
def SUBC32 : R_YXZ<0x31, 0x0, 0x8, (outs GPR:$rz, CARRY:$cout),
(ins GPR:$rx, GPR:$ry, CARRY:$cin), "subc32", []>;
- // TODO: incf32.
+ def INCF32 : I_5_ZX<0x3, 0x1, "incf32", uimm5, []>;
+ def INCT32 : I_5_ZX<0x3, 0x2, "inct32", uimm5, []>;
+ def DECF32 : I_5_ZX<0x3, 0x4, "decf32", uimm5, []>;
+ def DECT32 : I_5_ZX<0x3, 0x8, "dect32", uimm5, []>;
+}
+
+let Predicates = [iHas2E3] in {
def DIVS32 : R_YXZ_SP_F1<0x20, 0x2,
BinOpFrag<(sdiv node:$LHS, node:$RHS)>, "divs32">;
def DIVU32 : R_YXZ_SP_F1<0x20, 0x1,
@@ -228,11 +409,35 @@ class UnOpFrag<dag res> : PatFrag<(ops node:$Src), res>;
def DECNE32 : I_5_XZ<0x4, 0x4, "decne32",
(outs GPR:$rz, CARRY:$cout), (ins GPR:$rx, uimm5:$imm5), []>;
- // TODO: s/zext.
- def ZEXT32 : I_5_XZ_U<0x15, (outs GPR:$rz),
- (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "zext32",[]>;
- def SEXT32 : I_5_XZ_U<0x16, (outs GPR:$rz),
- (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "sext32", []>;
+ def SEXT32 : I_5_XZ_U<0x16, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "sext32", []>;
+ let isCodeGenOnly = 1 in {
+ def SEXTB32 : I_5_XZ_US<0x16, 0, 7, "sextb32", sext_inreg, i8>;
+ def SEXTH32 : I_5_XZ_US<0x16, 0, 15, "sexth32", sext_inreg, i16>;
+ def ZEXTB32 : I_5_XZ_UZ<0x15, 0, 7, "zextb32", 255>;
+ def ZEXTH32 : I_5_XZ_UZ<0x15, 0, 15, "zexth32", 65535>;
+ }
+ def ZEXT32 : I_5_XZ_U<0x15, (outs GPR:$rz), (ins GPR:$rx, uimm5:$msb, uimm5:$lsb), "zext32",[]>;
+
+ let Constraints = "$rZ = $rz" in
+ def INS32 : I_5_XZ_INS<0b010111, (outs GPR:$rz), (ins GPR:$rZ, GPR:$rx, uimm5_msb_size:$msb, uimm5:$lsb), "ins32", []>;
+}
+
+let Predicates = [iHas3E3r1] in {
+def MULTS32 : R_YXZ<0x3e, 0x20, 0x10, (outs GPRPair:$rz),
+ (ins GPR:$rx, GPR:$ry), "mul.s32", []>;
+def MULTU32 : R_YXZ<0x3e, 0x20, 0x00, (outs GPRPair:$rz),
+ (ins GPR:$rx, GPR:$ry), "mul.u32", []>;
+
+let Constraints = "$rZ = $rz" in {
+def MULATS32 : R_YXZ<0x3e, 0x20, 0x14, (outs GPRPair:$rZ),
+ (ins GPRPair:$rz, GPR:$rx, GPR:$ry), "mula.s32", []>;
+def MULATU32 : R_YXZ<0x3e, 0x20, 0x04, (outs GPRPair:$rZ),
+ (ins GPRPair:$rz, GPR:$rx, GPR:$ry), "mula.u32", []>;
+}
+}
+
+def MULSH32 : R_YXZ<0x31, 0b100100, 0b00001, (outs GPR:$rz),
+ (ins GPR:$rx, GPR:$ry), "mulsh32", []>;
//===----------------------------------------------------------------------===//
// Load & Store instructions.
@@ -242,18 +447,35 @@ def LD32B : I_LD<AddrMode32B, 0x0, "ld32.b", uimm12>;
def LD32H : I_LD<AddrMode32H, 0x1, "ld32.h", uimm12_1>;
def LD32W : I_LD<AddrMode32WD, 0x2, "ld32.w", uimm12_2>;
+let OutOperandList = (outs GPRPair:$rz) in
+def LD32D : I_LD<AddrMode32WD, 0x3, "ld32.d", uimm12_2>;
+let Predicates = [iHasE2] in {
def LD32BS : I_LD<AddrMode32B, 0x4, "ld32.bs", uimm12>;
def LD32HS : I_LD<AddrMode32H, 0x5, "ld32.hs", uimm12_1>;
- // TODO: LDM and STM.
+ def LDM32 : I_5_YX<0b110100, 0b000111,
+ (outs), (ins GPR:$rx, regseq:$regs, variable_ops), "ldm32\t$regs, (${rx})", []>;
+ def STM32 : I_5_YX<0b110101, 0b000111,
+ (outs), (ins GPR:$rx, regseq:$regs, variable_ops), "stm32\t$regs, (${rx})", []>;
+ let Size = 4, isCodeGenOnly = 0 in {
+ def LDQ32 : CSKYPseudo<(outs), (ins GPR:$rx, regseq:$regs, variable_ops),
+ "ldq32\t$regs, (${rx})", []>;
+ def STQ32 : CSKYPseudo<(outs), (ins GPR:$rx, regseq:$regs, variable_ops),
+ "stq32\t$regs, (${rx})", []>;
+ }
+
+}
def ST32B : I_ST<AddrMode32B, 0x0, "st32.b", uimm12>;
def ST32H : I_ST<AddrMode32H, 0x1, "st32.h", uimm12_1>;
def ST32W : I_ST<AddrMode32WD, 0x2, "st32.w", uimm12_2>;
+let InOperandList = (ins GPRPair:$rz, GPR:$rx, uimm12_2:$imm12 ) in
+def ST32D : I_ST<AddrMode32WD, 0x3, "st32.d", uimm12_2>;
+let Predicates = [iHas2E3] in {
def LDR32B : I_LDR<0x0, "ldr32.b">;
def LDR32BS : I_LDR<0x4, "ldr32.bs">;
def LDR32H : I_LDR<0x1, "ldr32.h">;
@@ -262,42 +484,100 @@ def ST32W : I_ST<AddrMode32WD, 0x2, "st32.w", uimm12_2>;
def STR32B : I_STR<0x0, "str32.b">;
def STR32H : I_STR<0x1, "str32.h">;
def STR32W : I_STR<0x2, "str32.w">;
+}
+
+// Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+let mayStore = 1 in {
+def SPILL_CARRY : CSKYPseudo<(outs), (ins CARRY:$cond, GPR:$rx, uimm12_2:$imm),
+ "!SPILL_CARRY $cond, $rx, $imm", []>;
+}
+
+// Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in {
+def RESTORE_CARRY : CSKYPseudo<(outs CARRY:$cond), (ins GPR:$rx, uimm12_2:$imm),
+ "!RESTORE_CARRY $cond, $rx, $imm", []>;
+}
- //TODO: SPILL_CARRY and RESTORE_CARRY.
+let mayLoad = 1 in {
+def STORE_PAIR : CSKYPseudo<(outs), (ins GPRPair:$rz, GPR:$rx, uimm12_2:$imm),
+ "!STORE_PAIR $rz, $rx, $imm", []>;
+}
+
+let mayLoad = 1 in {
+def LOAD_PAIR : CSKYPseudo<(outs GPRPair:$rz), (ins GPR:$rx, uimm12_2:$imm),
+ "!LOAD_PAIR $rz, $rx, $imm", []>;
+}
//===----------------------------------------------------------------------===//
// Compare instructions.
//===----------------------------------------------------------------------===//
-
+let Predicates = [iHasE2] in {
def CMPNEI32 : I_16_X<0x1A, "cmpnei32", uimm16>;
def CMPHSI32 : I_16_X<0x18, "cmphsi32", oimm16>;
def CMPLTI32 : I_16_X<0x19, "cmplti32", oimm16>;
-
-
+ def CMPLEI32 : CSKYPseudo<(outs CARRY:$ca), (ins GPR:$rx, uimm16:$imm16),
+ "cmplei32\t$rx, $imm16", []>;
+}
+let Predicates = [iHas2E3] in {
def CMPNE32 : R_YX<0x1, 0x4, "cmpne32">;
def CMPHS32 : R_YX<0x1, 0x1, "cmphs32">;
def CMPLT32 : R_YX<0x1, 0x2, "cmplt32">;
- // TODO: setc and clrc.
- // TODO: test32 and tstnbz.
+ def SETC32 : CSKY32Inst<AddrModeNone, 0x31,
+ (outs CARRY:$ca), (ins), "setc32", []> {
+ let Inst{25 - 21} = 0; //rx
+ let Inst{20 - 16} = 0; //ry
+ let Inst{15 - 10} = 0x1;
+ let Inst{9 - 5} = 0x1;
+ let Inst{4 - 0} = 0;
+ let isCompare = 1;
+ }
+ def CLRC32 : CSKY32Inst<AddrModeNone, 0x31,
+ (outs CARRY:$ca), (ins), "clrc32", []> {
+ let Inst{25 - 21} = 0; //rx
+ let Inst{20 - 16} = 0; //ry
+ let Inst{15 - 10} = 0x1;
+ let Inst{9 - 5} = 0x4;
+ let Inst{4 - 0} = 0;
+ let isCompare = 1;
+ }
+
+ def TST32 : R_YX<0x8, 0x4, "tst32">;
+ def TSTNBZ32 : R_X<0x8, 0x8,
+ (outs CARRY:$ca), (ins GPR:$rx), "tstnbz32", []>;
+}
//===----------------------------------------------------------------------===//
// Data move instructions.
//===----------------------------------------------------------------------===//
+let Predicates= [iHasE2] in {
+ let isCodeGenOnly = 1 in {
def MOVT32 : R_ZX<0x3, 0x2, "movt32", []>;
def MOVF32 : R_ZX<0x3, 0x1, "movf32", []>;
+ }
def MOVI32 : I_16_MOV<0x10, "movi32", uimm16>;
+ let Size = 4, isCodeGenOnly = 0 in
+ def BGENI : CSKYPseudo<(outs GPR:$dst), (ins uimm5:$imm), "bgeni\t$dst, $imm", []>;
+ def : InstAlias<"bgeni16 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
+ def : InstAlias<"bgeni32 $dst, $imm", (BGENI GPR:$dst, uimm5:$imm)>;
def MOVIH32 : I_16_MOV<0x11, "movih32", uimm16_16_xform>;
def MVC32 : R_Z_1<0x1, 0x8, "mvc32">;
+ let isCodeGenOnly = 1 in
def MOV32 : R_XZ<0x12, 0x1, "mov32">;
- // TODO: ISEL Pseudo.
+ let usesCustomInserter = 1 in
+ def ISEL32 : CSKYPseudo<(outs GPR:$dst), (ins CARRY:$cond, GPR:$src1, GPR:$src2),
+ "!isel32\t$dst, $src1, src2", [(set GPR:$dst, (select CARRY:$cond, GPR:$src1, GPR:$src2))]>;
+}
+let Predicates = [iHas2E3] in {
def MVCV32 : R_Z_1<0x1, 0x10, "mvcv32">;
- // TODO: clrf and clrt.
def CLRF32 : R_Z_2<0xB, 0x1, "clrf32", []>;
def CLRT32 : R_Z_2<0xB, 0x2, "clrt32", []>;
+}
//===----------------------------------------------------------------------===//
// Branch and call instructions.
@@ -309,12 +589,12 @@ let isBranch = 1, isTerminator = 1 in {
[(br bb:$imm16)]>;
def BT32 : I_16_L<0x3, (outs), (ins CARRY:$ca, br_symbol:$imm16),
- "bt32\t$imm16", [(brcond CARRY:$ca, bb:$imm16)]>;
+ "bt32\t$imm16", [(brcond CARRY:$ca, bb:$imm16)]>, Requires<[iHasE2]>;
def BF32 : I_16_L<0x2, (outs), (ins CARRY:$ca, br_symbol:$imm16),
- "bf32\t$imm16", []>;
+ "bf32\t$imm16", []>, Requires<[iHasE2]>;
}
-
+let Predicates = [iHas2E3] in {
def BEZ32 : I_16_X_L<0x8, "bez32", br_symbol>;
def BNEZ32 : I_16_X_L<0x9, "bnez32", br_symbol>;
def BHZ32 : I_16_X_L<0xA, "bhz32", br_symbol>;
@@ -334,10 +614,25 @@ let isBranch = 1, isTerminator = 1 in {
let isCall = 1, Defs = [ R15 ] , mayLoad = 1 in
def JSRI32: I_16_L<0x17, (outs),
(ins constpool_symbol:$imm16), "jsri32\t$imm16", []>;
+}
+def BNEZAD32 : CSKY32Inst<AddrModeNone, 0x3a,
+ (outs GPR:$rx_u), (ins GPR:$rx, br_symbol:$imm16), "bnezad32\t$rx, $imm16", []> {
+ bits<5> rx;
+ bits<16> imm16;
+ let Inst{25 - 21} = 0x1;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 0} = imm16;
+ let isBranch = 1;
+ let isTerminator = 1;
+ let Constraints = "$rx_u = $rx";
+ let Predicates = [iHas2E3, iHas10E60];
+}
def BSR32 : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>;
+def : InstAlias<"bsr $dst", (BSR32 call_symbol:$dst)>;
+
def BSR32_BR : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>{
let isCodeGenOnly = 1;
let isBranch = 1;
@@ -347,27 +642,310 @@ def BSR32_BR : J<0x38, (outs), (ins call_symbol:$offset), "bsr32", []>{
let Defs = [ R15 ];
}
-
+let Predicates = [iHasE2], isCodeGenOnly = 1 in {
def RTS32 : I_16_RET<0x6, 0xF, "rts32", [(CSKY_RET)]>;
+}
-def RTE32 : I_16_RET_I<0, 0, "rte32", []>;
-
//===----------------------------------------------------------------------===//
// Symbol address instructions.
//===----------------------------------------------------------------------===//
+def data_symbol_b : data_symbol<"CSKY::fixup_csky_doffset_imm18", 0>;
+def data_symbol_h : data_symbol<"CSKY::fixup_csky_doffset_imm18_scale2", 1>;
+def data_symbol_w : data_symbol<"CSKY::fixup_csky_doffset_imm18_scale4", 2> {
+ let ParserMatchClass = DataAsmClass;
+}
+
+let Predicates = [iHas2E3] in {
+
def GRS32 : I_18_Z_L<0x3, "grs32\t$rz, $offset",
(outs GPR:$rz), (ins bare_symbol:$offset), []>;
+def : InstAlias<"grs\t$rz, $offset", (GRS32 GPR:$rz, bare_symbol:$offset)>;
+
+let Uses = [R28] in {
+def LRS32B : I_18_Z_L<0x0, "lrs32.b\t$rz, $offset",
+ (outs GPR:$rz), (ins data_symbol_b:$offset), []>;
+def LRS32H : I_18_Z_L<0x1, "lrs32.h\t$rz, $offset",
+ (outs GPR:$rz), (ins data_symbol_h:$offset), []>;
+def LRS32W : I_18_Z_L<0x2, "lrs32.w\t$rz, $offset",
+ (outs GPR:$rz), (ins data_symbol_w:$offset), []>;
+def SRS32B : I_18_Z_L<0x4, "srs32.b\t$rz, $offset",
+ (outs), (ins GPR:$rz, data_symbol_b:$offset), []>;
+def SRS32H : I_18_Z_L<0x5, "srs32.h\t$rz, $offset",
+ (outs), (ins GPR:$rz, data_symbol_h:$offset), []>;
+def SRS32W : I_18_Z_L<0x6, "srs32.w\t$rz, $offset",
+ (outs), (ins GPR:$rz, data_symbol_w:$offset), []>;
+}
+
+def PUSH32 : I_12_PP<0b11111, 0b00000, (outs), (ins reglist:$regs, variable_ops), "push32 $regs">;
+
+let Uses = [R14, R15], isReturn = 1, isTerminator = 1, isBarrier = 1 in
+def POP32 : I_12_PP<0b11110, 0b00000, (outs), (ins reglist:$regs, variable_ops), "pop32 $regs">;
+
+}
let mayLoad = 1, mayStore = 0 in {
def LRW32 : I_16_Z_L<0x14, "lrw32", (ins constpool_symbol:$imm16), []>;
let isCodeGenOnly = 1 in
-def LRW32_Gen : I_16_Z_L<0x14, "lrw32",
- (ins bare_symbol:$src1, constpool_symbol:$imm16), []>;
+def LRW32_Gen : I_16_Z_L<0x14, "lrw32", (ins bare_symbol:$src1, constpool_symbol:$imm16), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Atomic and fence instructions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [iHasMP1E2] in {
+ def BRWARW : BAR<0b01111, "bar.brwarw", 0>;
+ def BRWARWS : BAR<0b01111, "bar.brwarws", 1>;
+ def BRARW : BAR<0b00111, "bar.brarw", 0>;
+ def BRARWS : BAR<0b00111, "bar.brarws", 1>;
+ def BRWAW : BAR<0b01110, "bar.brwaw", 0>;
+ def BRWAWS : BAR<0b01110, "bar.brwaws", 1>;
+ def BRAR : BAR<0b00101, "bar.brar", 0>;
+ def BRARS : BAR<0b00101, "bar.brars", 1>;
+ def BWAW : BAR<0b01010, "bar.bwaw", 0>;
+ def BWAWS : BAR<0b01010, "bar.bwaws", 1>;
+
+ def LDEX32W : I_LD<AddrMode32WD, 0x7, "ldex32.w", uimm12_2>;
+ let Constraints = "$rd = $rz" in
+ def STEX32W : I_LDST<AddrMode32WD, 0x37, 7,
+ (outs GPR:$rd), (ins GPR:$rz, GPR:$rx, uimm12_2:$imm12), "stex32.w", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Other operation instructions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [iHas2E3] in {
+ def BREV32 : R_XZ<0x18, 0x10, "brev32">;
+ def ABS32 : R_XZ<0x0, 0x10, "abs32">;
+ def BGENR32 : R_XZ<0x14, 0x2, "bgenr32">;
+}
+
+let Predicates = [iHasE2] in {
+ def REVB32 : R_XZ<0x18, 0x4, "revb32">;
+ def REVH32 : R_XZ<0x18, 0x8, "revh32">;
+ def FF0 : R_XZ<0x1F, 0x1, "ff0.32">;
+ def FF1 : R_XZ<0x1F, 0x2, "ff1.32">;
+ def XTRB0 : R_XZ<0x1C, 0x1, "xtrb0.32">;
+ def XTRB1 : R_XZ<0x1C, 0x2, "xtrb1.32">;
+ def XTRB2 : R_XZ<0x1C, 0x4, "xtrb2.32">;
+ def XTRB3 : R_XZ<0x1C, 0x8, "xtrb3.32">;
+ def BTSTI32 : I_5_X<0x0A, 0x4, "btsti32", uimm5, []>;
+ def BCLRI32 : I_5_XZ<0xA, 0x1, "bclri32",
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
+ def BSETI32 : I_5_XZ<0xA, 0x2, "bseti32",
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instructions.
+//===----------------------------------------------------------------------===//
+
+def MFFCR : CSKY32Inst<AddrModeNone, 0x30,
+ (outs GPR:$rx), (ins), "mfcr\t$rx, fcr", []> {
+ bits<5> rx;
+
+ let Inst{25 - 21} = 0b00010;
+ let Inst{20 - 16} = 0b00001;
+ let Inst{15 - 10} = 0b011000;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = rx;
+ let hasSideEffects = 1;
+ let isCodeGenOnly = 1;
+}
+
+def MTFCR : CSKY32Inst<AddrModeNone, 0x30,
+ (outs), (ins GPR:$rx), "mtcr\t$rx, fcr", []> {
+ bits<5> rx;
+
+ let Inst{25 - 21} = 0b00010;
+ let Inst{20 - 16} = rx;
+ let Inst{15 - 10} = 0b011001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0b00001;
+ let hasSideEffects = 1;
+ let isCodeGenOnly = 1;
+}
+
+def SYNC32 : I_5_IMM5<0x30, 0b000001, 0b00001, "sync32", uimm5, []>;
+
+def SYNC0_32 : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32", []> {
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+def SYNC_32_I : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32.i", []> {
+ let Inst{25 - 21} = 1;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+def SYNC_32_S : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32.s", []> {
+ let Inst{25 - 21} = 0b10000;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+}
+
+def SYNC_32_IS : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins),
+ "sync32.is", []> {
+ let Inst{25 - 21} = 0b10001;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 10} = 0b000001;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
}
-// TODO: Atomic and fence instructions.
-// TODO: Other operations.
-// TODO: Special instructions.
-// TODO: Pseudo for assembly.
+let Predicates = [iHas2E3] in {
+ def RFI32 : I_5_XZ_PRIVI<0x11, 0x1, "rfi32">;
+ def SCE32 : I_5_IMM5<0x30, 0b000110, 0b00001, "sce32", uimm4, []>;
+}
+let Predicates = [HasExtendLrw] in
+def IDLY32 : I_5_IMM5<0x30, 0b000111, 0b00001, "idly32", imm5_idly, []>;
+def STOP32 : I_5_XZ_PRIVI<0x12, 0x1, "stop32">;
+def WAIT32 : I_5_XZ_PRIVI<0x13, 0x1, "wait32">;
+def DOZE32 : I_5_XZ_PRIVI<0x14, 0x1, "doze32">;
+def WE32 : I_5_XZ_PRIVI<0b010101, 0x1, "we32">;
+def SE32 : I_5_XZ_PRIVI<0b010110, 0x1, "se32">;
+def WSC32 : I_5_XZ_PRIVI<0b001111, 0x1, "wsc32">;
+
+def CPOP32 : I_CPOP<(outs), (ins uimm5:$cpid, uimm20:$usdef), "cpop32 <$cpid, ${usdef}>">;
+def CPRC32 : I_CP<0b0100, (outs CARRY:$ca), (ins uimm5:$cpid, uimm12:$usdef), "cprc32 <$cpid, ${usdef}>">;
+def CPRCR32 : I_CP_Z<0b0010, (outs GPR:$rz), (ins uimm5:$cpid, uimm12:$usdef), "cprcr32 $rz, <$cpid, ${usdef}>">;
+def CPRGR32 : I_CP_Z<0b0000, (outs GPR:$rz), (ins uimm5:$cpid, uimm12:$usdef), "cprgr32 $rz, <$cpid, ${usdef}>">;
+def CPWCR32 : I_CP_Z<0b0011, (outs), (ins GPR:$rz, uimm5:$cpid, uimm12:$usdef), "cpwcr32 $rz, <$cpid, ${usdef}>">;
+def CPWGR32 : I_CP_Z<0b0001, (outs), (ins GPR:$rz, uimm5:$cpid, uimm12:$usdef), "cpwgr32 $rz, <$cpid, ${usdef}>">;
+
+let Predicates = [iHas3r2E3r3] in {
+def DCACHE_IALL32 : I_5_CACHE<0b100101, 0b01000, "dcache32.iall">;
+def DCACHE_CALL32 : I_5_CACHE<0b100101, 0b00100, "dcache32.call">;
+def DCACHE_CIALL32 : I_5_CACHE<0b100101, 0b01100, "dcache32.ciall">;
+def DCACHE_IVA32 : I_5_X_CACHE<0b100101, 0b01011, "dcache32.iva">;
+def DCACHE_ISW32: I_5_X_CACHE<0b100101, 0b01010, "dcache32.isw">;
+def DCACHE_CVA32 : I_5_X_CACHE<0b100101, 0b00111, "dcache32.cva">;
+def DCACHE_CVAL32 : I_5_X_CACHE<0b100101, 0b10111, "dcache32.cval1">;
+def DCACHE_CSW32 : I_5_X_CACHE<0b100101, 0b00110, "dcache32.csw">;
+def DCACHE_CIVA32 : I_5_X_CACHE<0b100101, 0b01111, "dcache32.civa">;
+def DCACHE_CISW32 : I_5_X_CACHE<0b100101, 0b01110, "dcache32.cisw">;
+
+def ICACHE_IALL32 : I_5_CACHE<0b100100, 0b01000, "icache32.iall">;
+def ICACHE_IALLS32 : I_5_CACHE<0b100100, 0b11000, "icache32.ialls">;
+def ICACHE_IVA32 : I_5_X_CACHE<0b100100, 0b01011, "icache32.iva">;
+
+def TLBI_VAA32 : I_5_X_CACHE<0b100010, 0b00010, "tlbi32.vaa">;
+def TLBI_VAAS32 : I_5_X_CACHE<0b100010, 0b10010, "tlbi32.vaas">;
+def TLBI_ASID32 : I_5_X_CACHE<0b100010, 0b00001, "tlbi32.asid">;
+def TLBI_ASIDS32 : I_5_X_CACHE<0b100010, 0b10001, "tlbi32.asids">;
+def TLBI_VA32 : I_5_X_CACHE<0b100010, 0b00011, "tlbi32.va">;
+def TLBI_VAS32 : I_5_X_CACHE<0b100010, 0b10011, "tlbi32.vas">;
+def TLBI_ALL32 : I_5_CACHE<0b100010, 0b00000, "tlbi32.all">;
+def TLBI_ALLS32 : I_5_CACHE<0b100010, 0b10000, "tlbi32.alls">;
+
+def L2CACHE_IALL : I_5_CACHE<0b100110, 0b01000, "l2cache.iall">;
+def L2CACHE_CALL : I_5_CACHE<0b100110, 0b00100, "l2cache.call">;
+def L2CACHE_CIALL : I_5_CACHE<0b100110, 0b01100, "l2cache.ciall">;
+}
+
+def PLDR32 :I_PLDR<AddrMode32WD, 0x36, 0b0110, (outs), (ins GPR:$rx, uimm12_2:$imm12), "pldr32", []>;
+def PLDW32 :I_PLDR<AddrMode32WD, 0x37, 0b0110, (outs), (ins GPR:$rx, uimm12_2:$imm12), "pldw32", []>;
+
+def TRAP32 : CSKY32Inst<AddrModeNone, 0x30, (outs), (ins uimm2:$imm2), "trap32 ${imm2}", []> {
+ bits<2> imm2;
+
+ let Inst{25 - 21} = 0;
+ let Inst{20 - 16} = 0;
+ let Inst{15 - 12} = 0b0010;
+ let Inst{11 - 10} = imm2;
+ let Inst{9 - 5} = 0b00001;
+ let Inst{4 - 0} = 0;
+
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo for assembly
+//===----------------------------------------------------------------------===//
+
+let isCall = 1, Defs = [ R15 ], mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
+def JBSR32 : CSKYPseudo<(outs), (ins call_symbol:$src1), "jbsr32\t$src1", []>;
+
+def : InstAlias<"jbsr\t$src1", (JBSR32 call_symbol:$src1)>;
+
+def JBR32 : CSKYPseudo<(outs), (ins br_symbol:$src1), "jbr32\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 4;
+}
+
+def JBT32 : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "jbt32\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 4;
+}
+
+def JBF32 : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "jbf32\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 4;
+}
+
+def JBT_E : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "!jbt_e\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 6;
+}
+
+def JBF_E : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "!jbf_e\t$src1", []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 6;
+}
+
+let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
+def PseudoLRW32 : CSKYPseudo<(outs GPR:$rz), (ins bare_symbol:$src), "lrw32 $rz, $src", []>;
+
+
+def : InstAlias<"lrw $rz, $src", (PseudoLRW32 GPR:$rz, bare_symbol:$src)>;
+def : InstAlias<"lrw $rz, $src", (LRW32 GPR:$rz, constpool_symbol:$src)>;
+
+let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
+def PseudoJSRI32 : CSKYPseudo<(outs), (ins call_symbol:$src), "jsri32 $src", []>;
+def : InstAlias<"jsri $dst", (PseudoJSRI32 call_symbol:$dst)>;
+def : InstAlias<"jsri $dst", (JSRI32 constpool_symbol:$dst)>;
+
+let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
+def PseudoJMPI32 : CSKYPseudo<(outs), (ins br_symbol:$src), "jmpi32 $src", []>;
+def : InstAlias<"jmpi $dst", (PseudoJMPI32 br_symbol:$dst)>;
+def : InstAlias<"jmpi $dst", (JMPI32 constpool_symbol:$dst)>;
+
+let isNotDuplicable = 1, mayLoad = 1, mayStore = 0, Size = 8 in
+def PseudoTLSLA32 : CSKYPseudo<(outs GPR:$dst1, GPR:$dst2),
+ (ins constpool_symbol:$src, i32imm:$label), "!tlslrw32\t$dst1, $dst2, $src, $label", []>;
+
+let hasSideEffects = 0, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY : CSKYPseudo<(outs),
+ (ins i32imm:$instid, i32imm:$cpidx, i32imm:$size), "", []>;
+
+include "CSKYInstrInfo16Instr.td"
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
new file mode 100644
index 000000000000..c98f43622155
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
@@ -0,0 +1,452 @@
+//===-- CSKYInstrInfo16Instr.td - CSKY 16-bit Instruction --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY 16-bit instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CSKY specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// Target-dependent nodes.
+def CSKY_NIE : SDNode<"CSKYISD::NIE", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+def CSKY_NIR : SDNode<"CSKYISD::NIR", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def br_symbol_16bit : Operand<iPTR> {
+ let EncoderMethod =
+ "getBranchSymbolOpValue<CSKY::fixup_csky_pcrel_imm10_scale2>";
+ let ParserMatchClass = CSKYSymbol;
+ let DecoderMethod = "decodeSImmOperand<10, 1>";
+ let PrintMethod = "printCSKYSymbolOperand";
+ let OperandType = "OPERAND_PCREL";
+}
+
+def SPOperand : AsmOperandClass {
+ let Name = "SPOperand";
+ let RenderMethod = "addRegOperands";
+ let DiagnosticType = !strconcat("Invalid", Name);
+}
+
+def SPOp : RegisterOperand<GPR> {
+ let ParserMatchClass = SPOperand;
+}
+
+def constpool_symbol_16bit : Operand<iPTR> {
+ let ParserMatchClass = Constpool;
+ let EncoderMethod =
+ "getConstpoolSymbolOpValue<CSKY::fixup_csky_pcrel_uimm7_scale4>";
+ let DecoderMethod = "decodeLRW16Imm8";
+ let PrintMethod = "printConstpool";
+ let OperandType = "OPERAND_PCREL";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Formats
+//===----------------------------------------------------------------------===//
+
+include "CSKYInstrFormats16Instr.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Basic ALU instructions.
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAdd = 1 in
+ def ADDU16 : R16_XYZ<0, "addu16", add>;
+let Pattern = [(set mGPR:$rz, (sub mGPR:$rx, mGPR:$ry))] in
+ def SUBU16 : R16_XYZ<1, "subu16", sub>;
+
+let isCommutable = 1, isAdd = 1 in
+ def ADDC16 : R16_XZ_BINOP_C<0b1000, 0b01, "addc16">;
+def SUBC16 : R16_XZ_BINOP_C<0b1000, 0b11, "subc16">;
+
+let isCommutable = 1 in {
+ let isAdd = 1 in
+ def ADDU16XZ : R16_XZ_BINOP<0b1000, 0b00, "addu16", BinOpFrag<(add node:$LHS, node:$RHS)>>;
+ def AND16 : R16_XZ_BINOP<0b1010, 0b00, "and16", BinOpFrag<(and node:$LHS, node:$RHS)>>;
+ def OR16 : R16_XZ_BINOP<0b1011, 0b00, "or16", BinOpFrag<(or node:$LHS, node:$RHS)>>;
+ def XOR16 : R16_XZ_BINOP<0b1011, 0b01, "xor16", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+ def NOR16 : R16_XZ_BINOP<0b1011, 0b10, "nor16", BinOpFrag<(not (or node:$LHS, node:$RHS))>>;
+ let isCodeGenOnly = 1 in
+ def NOT16 : R16_XZ_UNOP<0b1011, 0b10, "not16">;
+ def MULT16 : R16_XZ_BINOP<0b1111, 0b00, "mult16", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+}
+def SUBU16XZ : R16_XZ_BINOP<0b1000, 0b10, "subu16", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+def ANDN16 : R16_XZ_BINOP<0b1010, 0b01, "andn16", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+def LSL16 : R16_XZ_BINOP<0b1100, 0b00, "lsl16", BinOpFrag<(shl node:$LHS, node:$RHS)>>;
+def LSR16 : R16_XZ_BINOP<0b1100, 0b01, "lsr16", BinOpFrag<(srl node:$LHS, node:$RHS)>>;
+def ASR16 : R16_XZ_BINOP<0b1100, 0b10, "asr16", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
+def ROTL16 : R16_XZ_BINOP<0b1100, 0b11, "rotl16", BinOpFrag<(rotl node:$LHS, (and node:$RHS, 0x1f))>>;
+
+def MULSH16 : R16_XZ_BINOP_NOPat<0b1111, 0b01, "mulsh16">;
+
+def ZEXTB16 : R16_XZ_UNOP<0b1101, 0b00, "zextb16">;
+def ZEXTH16 : R16_XZ_UNOP<0b1101, 0b01, "zexth16">;
+def SEXTB16 : R16_XZ_UNOP<0b1101, 0b10, "sextb16">;
+def SEXTH16 : R16_XZ_UNOP<0b1101, 0b11, "sexth16">;
+
+let Constraints = "$rZ = $rz", isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ let isAdd = 1, Pattern = [(set mGPR:$rz, (add mGPR:$rZ, oimm8:$imm8))] in
+ def ADDI16 : I16_Z_8<0b100, (ins mGPR:$rZ, oimm8:$imm8), "addi16\t$rz, $imm8">;
+ let Pattern = [(set mGPR:$rz, (sub mGPR:$rZ, oimm8:$imm8))] in
+ def SUBI16 : I16_Z_8<0b101, (ins mGPR:$rZ, oimm8:$imm8), "subi16\t$rz, $imm8">;
+}
+
+let isAdd = 1 in
+def ADDI16ZSP : I16_Z_8<0b011, (ins SPOp:$sp, uimm8_2:$imm8),
+ "addi16\t$rz, $sp, $imm8">;
+
+let isAdd = 1 in
+def ADDI16SPSP : I16_SP_IMM7<0b000,"addi16">;
+def SUBI16SPSP : I16_SP_IMM7<0b001,"subi16">;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+ def LSLI16 : I16_XZ_IMM5<0, "lsli16", shl>;
+ def LSRI16 : I16_XZ_IMM5<1, "lsri16", srl>;
+ def ASRI16 : I16_XZ_IMM5<2, "asri16", sra>;
+}
+
+let isAdd = 1 in
+def ADDI16XZ : I16_XZ_IMM3<0b10, "addi16", add>;
+def SUBI16XZ : I16_XZ_IMM3<0b11, "subi16", sub>;
+
+let Size = 4 in
+def NEG16 : CSKYPseudo<(outs mGPR:$rd), (ins mGPR:$rx), "neg16 $rd, $rx", []>;
+
+let Size = 4 in
+def RSUBI16 : CSKYPseudo<(outs mGPR:$rd),
+ (ins mGPR:$rx, uimm8:$imm8), "rsubi16 $rd, $rx, $imm8", []>;
+
+//===----------------------------------------------------------------------===//
+// Load & Store instructions.
+//===----------------------------------------------------------------------===//
+
+def LD16B : I16_XZ_LDST<AddrMode16B, 0b000, "ld16.b",
+ (outs mGPR:$rz), (ins mGPR:$rx, uimm5:$imm)>;
+def LD16H : I16_XZ_LDST<AddrMode16H, 0b001, "ld16.h",
+ (outs mGPR:$rz), (ins mGPR:$rx, uimm5_1:$imm)>;
+def LD16W : I16_XZ_LDST<AddrMode16W, 0b010, "ld16.w",
+ (outs mGPR:$rz), (ins mGPR:$rx, uimm5_2:$imm)>;
+def ST16B : I16_XZ_LDST<AddrMode16B, 0b100, "st16.b",
+ (outs), (ins mGPR:$rz, mGPR:$rx, uimm5:$imm)>;
+def ST16H : I16_XZ_LDST<AddrMode16H, 0b101, "st16.h",
+ (outs), (ins mGPR:$rz, mGPR:$rx, uimm5_1:$imm)>;
+def ST16W : I16_XZ_LDST<AddrMode16W, 0b110, "st16.w",
+ (outs), (ins mGPR:$rz, mGPR:$rx, uimm5_2:$imm)>;
+
+def LD16WSP : I16_ZSP_LDST<AddrMode16W, 0b011, "ld16.w",
+ (outs mGPR:$rz), (ins SPOp:$sp, uimm8_2:$addr)>;
+def ST16WSP : I16_ZSP_LDST<AddrMode16W, 0b111, "st16.w",
+ (outs), (ins mGPR:$rz, SPOp:$sp, uimm8_2:$addr)>;
+
+//===----------------------------------------------------------------------===//
+// Compare instructions.
+//===----------------------------------------------------------------------===//
+
+def CMPHS16 : R16_XY_CMP<0, "cmphs16">;
+def CMPLT16 : R16_XY_CMP<1, "cmplt16">;
+let isCommutable = 1 in
+def CMPNE16 : R16_XY_CMP<2, "cmpne16">;
+
+
+def CMPHSI16 : I16_X_CMP<0, "cmphsi16", oimm5>;
+def CMPLTI16 : I16_X_CMP<1, "cmplti16", oimm5>;
+def CMPLEI16 : CSKYPseudo<(outs CARRY:$ca), (ins mGPR:$rx, uimm5:$imm5),
+ "cmplei16\t$rx, $imm5", []>;
+def CMPNEI16 : I16_X_CMP<2, "cmpnei16", uimm5>;
+
+//===----------------------------------------------------------------------===//
+// Data move instructions.
+//===----------------------------------------------------------------------===//
+
+
+def MOVI16 : I16_Z_8<0b110, (ins uimm8:$imm8), "movi16\t$rz, $imm8"> {
+ let isReMaterializable = 1;
+ let isAsCheapAsAMove = 1;
+ let isMoveImm = 1;
+ let Pattern = [(set mGPR:$rz, uimm8:$imm8)];
+}
+
+def MOV16 : CSKY16Inst<AddrModeNone, (outs sGPR:$rz), (ins sGPR:$rx),
+ "mov16\t$rz, $rx", []> {
+ bits<4> rz;
+ bits<4> rx;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1011;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = rx;
+ let Inst{1,0} = 0b11;
+}
+
+// MVC16 is not in "cskyv2 instructions reference manul"
+def MVCV16 : CSKY16Inst<AddrModeNone,
+ (outs sGPR:$rz), (ins CARRY:$ca), "mvcv16\t$rz", []> {
+ bits<4> rz;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = rz;
+ let Inst{5 - 2} = 0;
+ let Inst{1,0} = 0b11;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Branch and call instructions.
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1 in {
+ let isBarrier = 1, isPredicable = 1 in
+ def BR16 : J16<1, "br16", (ins br_symbol_16bit:$offset)>;
+
+ def BT16 : J16_B<2, "bt16">;
+ def BF16 : J16_B<3, "bf16">;
+}
+
+def JMP16 : R16_X_J<0b11100000, 0b00, "jmp16"> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isIndirectBranch = 1;
+ let Pattern = [(brind sGPR:$rx)];
+}
+
+def JSR16 : R16_X_J<0b11101111, 0b01, "jsr16"> {
+ let isCall = 1;
+ let Defs = [ R15 ];
+}
+
+def RTS16 : CSKY16Inst<AddrModeNone, (outs), (ins), "rts16", [(CSKY_RET)]> {
+ let isTerminator = 1;
+ let isReturn = 1;
+ let isBarrier = 1;
+ let Inst = 0b0111100000111100;
+ let Uses = [R15];
+ let isCodeGenOnly = 1;
+}
+
+def JMPIX16 : CSKY16Inst<AddrModeNone, (outs),
+ (ins mGPR:$rx, uimm2_jmpix:$indeximm2), "jmpix16\t$rx, $indeximm2", []> {
+ bits<3> rx;
+ bits<2> indeximm2;
+ let Inst{15,14} = 0b00;
+ let Inst{13 - 11} = 0b111;
+ let Inst{10 - 8} = rx;
+ let Inst{7 - 2} = 0b111000;
+ let Inst{1,0} = indeximm2;
+ let Predicates = [HasJAVA];
+ let Uses = [R30];
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol address instructions.
+//===----------------------------------------------------------------------===//
+
+def LRW16 : CSKY16Inst<AddrModeNone, (outs mGPR:$rz),
+ (ins constpool_symbol_16bit:$label), "lrw16\t$rz, $label", []> {
+ bits<3> rz;
+ bits<8> label;
+ let Inst{15 - 13} = 0b000;
+ let Inst{12} = label{7};
+ let Inst{11,10} = 0b00;
+ let Inst{9,8} = label{6,5};
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = label{4-0};
+ let mayLoad = 1;
+ let mayStore = 0;
+}
+
+def LRW16_Gen : CSKY16Inst<AddrModeNone, (outs mGPR:$rz),
+ (ins bare_symbol:$src, constpool_symbol_16bit:$label),
+ "lrw16\t$rz, $label", []> {
+ bits<3> rz;
+ bits<8> label;
+ let Inst{15 - 13} = 0b000;
+ let Inst{12} = label{7};
+ let Inst{11,10} = 0b00;
+ let Inst{9,8} = label{6,5};
+ let Inst{7 - 5} = rz;
+ let Inst{4 - 0} = label{4-0};
+ let mayLoad = 1;
+ let mayStore = 0;
+ let isCodeGenOnly = 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other operation instructions.
+//===----------------------------------------------------------------------===//
+
+def REVB16 : R16_XZ_UNOP<0b1110, 0b10, "revb16">;
+def REVH16 : R16_XZ_UNOP<0b1110, 0b11, "revh16">;
+
+let isCodeGenOnly = 1 in
+def SETC16 : CSKY16Inst<AddrModeNone,
+ (outs CARRY:$ca), (ins), "setc16", []> {
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = 0;
+ let Inst{5 - 2} = 0;
+ let Inst{1, 0} = 0;
+ let isCompare = 1;
+}
+
+let isCodeGenOnly = 1 in
+def CLRC16 : CSKY16Inst<AddrModeNone,
+ (outs CARRY:$ca), (ins), "clrc16", []> {
+ let Inst{15, 14} = 0b01;
+ let Inst{13 - 10} = 0b1001;
+ let Inst{9 - 6} = 0;
+ let Inst{5 - 2} = 0;
+ let Inst{1, 0} = 2;
+ let isCompare = 1;
+}
+
+let Constraints = "$rZ = $rz" in {
+ def BCLRI16 : I16_Z_5<0b100, (outs mGPR:$rz), (ins mGPR:$rZ, uimm5:$imm5),
+ "bclri16">;
+ def BSETI16 : I16_Z_5<0b101, (outs mGPR:$rz), (ins mGPR:$rZ, uimm5:$imm5),
+ "bseti16">;
+}
+
+let Predicates = [HasBTST16] in
+ def BTSTI16 : I16_Z_5<0b110, (outs CARRY:$ca), (ins mGPR:$rz, uimm5:$imm5),
+ "btsti16">;
+
+def TST16 : CSKY16Inst<AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx, sGPR:$ry),
+ "tst16\t$rx, $ry", []> {
+ bits<4> ry;
+ bits<4> rx;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1010;
+ let Inst{9 - 6} = ry;
+ let Inst{5 - 2} = rx;
+ let Inst{1,0} = 0b10;
+ let isCompare = 1;
+}
+
+def TSTNBZ16 : CSKY16Inst<AddrModeNone, (outs CARRY:$ca), (ins sGPR:$rx),
+ "tstnbz16\t$rx", []> {
+ bits<4> rx;
+ let Inst{15,14} = 0b01;
+ let Inst{13 - 10} = 0b1010;
+ let Inst{9 - 6} = 0b0000;
+ let Inst{5 - 2} = rx;
+ let Inst{1,0} = 0b11;
+ let isCompare = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instructions.
+//===----------------------------------------------------------------------===//
+
+def BKPT : CSKY16Inst<AddrModeNone, (outs), (ins), "bkpt", []> {
+ let Inst = 0;
+}
+
+let mayStore = 1 in {
+def BPUSHH : I16_BPushPop<0b00010100111, 0, (outs), (ins mGPR:$rz), "bpush.h $rz">;
+def BPUSHW : I16_BPushPop<0b00010100111, 0b10, (outs), (ins mGPR:$rz), "bpush.w $rz">;
+}
+
+let mayLoad = 1 in {
+def BPOPH : I16_BPushPop<0b00010100101, 0, (outs mGPR:$rz), (ins), "bpop.h $rz">;
+def BPOPW : I16_BPushPop<0b00010100101, 0b10, (outs mGPR:$rz), (ins), "bpop.w $rz">;
+}
+
+def NIE : CSKY16Inst<AddrModeNone, (outs), (ins), "nie", [(CSKY_NIE)]> {
+ let Inst = 0b0001010001100000;
+}
+
+let isBarrier = 1, isReturn = 1, isTerminator = 1 in
+def NIR : CSKY16Inst<AddrModeNone, (outs), (ins), "nir", [(CSKY_NIR)]> {
+ let Inst = 0b0001010001100001;
+}
+
+def IPUSH16 : CSKY16Inst<AddrModeNone, (outs), (ins), "ipush16", []> {
+ let Inst{15- 5} = 0b00010100011;
+ let Inst{4-0} = 0b00010;
+ let Predicates = [iHasE1];
+ let Defs = [R14];
+ let Uses = [R14, R0, R1, R2, R3, R12, R13];
+ let mayStore = 1;
+}
+
+def IPOP16 : CSKY16Inst<AddrModeNone, (outs), (ins), "ipop16", []> {
+ let Inst{15- 5} = 0b00010100011;
+ let Inst{4-0} = 0b00011;
+ let Predicates = [iHasE1];
+ let Defs = [R14, R0, R1, R2, R3, R12, R13];
+ let Uses = [R14];
+ let mayLoad = 1;
+}
+
+def PUSH16 : CSKY16Inst<AddrModeNone, (outs),
+ (ins reglist:$regs, variable_ops), "push16 $regs", []> {
+ bits<5> regs;
+
+ let Inst{15- 5} = 0b00010100110;
+ let Inst{4-0} = regs;
+ let Predicates = [iHasE1];
+ let Defs = [R14];
+ let Uses = [R14];
+ let mayStore = 1;
+}
+
+def POP16 : CSKY16Inst<AddrModeNone, (outs),
+ (ins reglist:$regs, variable_ops), "pop16 $regs", []> {
+ bits<5> regs;
+
+ let Inst{15- 5} = 0b00010100100;
+ let Inst{4-0} = regs;
+ let Predicates = [iHasE1];
+ let Defs = [R14];
+ let Uses = [R14];
+ let mayLoad = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// CSKYPseudo
+//===----------------------------------------------------------------------===//
+
+let usesCustomInserter = 1 in {
+ def ISEL16 : CSKYPseudo<(outs sGPR:$dst),
+ (ins CARRY:$cond, sGPR:$src1, sGPR:$src2),
+ "!isel16\t$dst, $src1, src2",
+ [(set sGPR:$dst, (select CARRY:$cond, sGPR:$src1, sGPR:$src2))]>;
+}
+
+class JBranchPseudo<dag out, dag ins, string opstr> :
+ CSKYPseudo<out, ins, opstr, []> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let isIndirectBranch = 1;
+ let mayLoad = 1;
+ let Size = 2;
+}
+
+let isBarrier = 1 in
+def JBR16 : JBranchPseudo<(outs),
+ (ins br_symbol_16bit:$src1), "jbr16\t$src1">;
+def JBT16 : JBranchPseudo<(outs),
+ (ins CARRY:$ca, br_symbol_16bit:$src1), "jbt16\t$src1">;
+def JBF16 : JBranchPseudo<(outs),
+ (ins CARRY:$ca, br_symbol_16bit:$src1), "jbf16\t$src1">;
+
+let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
+def PseudoLRW16 : CSKYPseudo<(outs mGPR:$rz),
+ (ins bare_symbol:$src), "lrw16 $rz, $src", []>;
diff --git a/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp b/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp
new file mode 100644
index 000000000000..c42a56bfb04e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYMCInstLower.cpp
@@ -0,0 +1,117 @@
+//===-- CSKYMCInstLower.cpp - Convert CSKY MachineInstr to an MCInst --------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower CSKY MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYMCInstLower.h"
+#include "MCTargetDesc/CSKYBaseInfo.h"
+#include "MCTargetDesc/CSKYMCExpr.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+
+#define DEBUG_TYPE "csky-mcinst-lower"
+
+using namespace llvm;
+
+CSKYMCInstLower::CSKYMCInstLower(MCContext &Ctx, AsmPrinter &Printer)
+ : Ctx(Ctx), Printer(Printer) {}
+
+void CSKYMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (const MachineOperand &MO : MI->operands()) {
+ MCOperand MCOp;
+ if (lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+}
+
+MCOperand CSKYMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ CSKYMCExpr::VariantKind Kind;
+ MCContext &Ctx = Printer.OutContext;
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag.");
+ case CSKYII::MO_None:
+ Kind = CSKYMCExpr::VK_CSKY_None;
+ break;
+ case CSKYII::MO_GOT32:
+ Kind = CSKYMCExpr::VK_CSKY_GOT;
+ break;
+ case CSKYII::MO_GOTOFF:
+ Kind = CSKYMCExpr::VK_CSKY_GOTOFF;
+ break;
+ case CSKYII::MO_ADDR32:
+ Kind = CSKYMCExpr::VK_CSKY_ADDR;
+ break;
+ case CSKYII::MO_PLT32:
+ Kind = CSKYMCExpr::VK_CSKY_PLT;
+ break;
+ case CSKYII::MO_ADDR_HI16:
+ Kind = CSKYMCExpr::VK_CSKY_ADDR_HI16;
+ break;
+ case CSKYII::MO_ADDR_LO16:
+ Kind = CSKYMCExpr::VK_CSKY_ADDR_LO16;
+ break;
+ }
+ const MCExpr *ME =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (Kind != CSKYMCExpr::VK_CSKY_None)
+ ME = CSKYMCExpr::create(ME, Kind, Ctx);
+
+ return MCOperand::createExpr(ME);
+}
+
+bool CSKYMCInstLower::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) const {
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_RegisterMask:
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_Register:
+ if (MO.isImplicit())
+ return false;
+ MCOp = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::createExpr(
+ MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = lowerSymbolOperand(MO, Printer.getSymbol(MO.getGlobal()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = lowerSymbolOperand(
+ MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = lowerSymbolOperand(
+ MO, Printer.GetExternalSymbolSymbol(MO.getSymbolName()));
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = lowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_MCSymbol:
+ MCOp = lowerSymbolOperand(MO, MO.getMCSymbol());
+ break;
+ }
+ return true;
+} \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYMCInstLower.h b/llvm/lib/Target/CSKY/CSKYMCInstLower.h
new file mode 100644
index 000000000000..ea76bd129d30
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYMCInstLower.h
@@ -0,0 +1,35 @@
+//===-- CSKYMCInstLower.cpp - Convert CSKY MachineInstr to an MCInst --------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYMCINSTLOWER_H
+#define LLVM_LIB_TARGET_CSKY_CSKYMCINSTLOWER_H
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+class MachineInstr;
+class MCInst;
+class MachineOperand;
+class MCOperand;
+class MCSymbol;
+
+class CSKYMCInstLower {
+ MCContext &Ctx;
+ AsmPrinter &Printer;
+
+public:
+ CSKYMCInstLower(MCContext &Ctx, AsmPrinter &Printer);
+
+ void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+ MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYMCINSTLOWER_H
diff --git a/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h
new file mode 100644
index 000000000000..b6e303f8ccfb
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h
@@ -0,0 +1,62 @@
+//=- CSKYMachineFunctionInfo.h - CSKY machine function info -------*- C++ -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares CSKY-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_CSKY_CSKYMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class CSKYMachineFunctionInfo : public MachineFunctionInfo {
+ MachineFunction &MF;
+
+ Register GlobalBaseReg = 0;
+ bool SpillsCR = false;
+
+ int VarArgsFrameIndex = 0;
+ unsigned VarArgsSaveSize = 0;
+
+ int spillAreaSize = 0;
+
+ bool LRSpilled = false;
+
+ unsigned PICLabelUId = 0;
+
+public:
+ CSKYMachineFunctionInfo(MachineFunction &MF) : MF(MF) {}
+
+ Register getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(Register Reg) { GlobalBaseReg = Reg; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+ int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+ unsigned getVarArgsSaveSize() const { return VarArgsSaveSize; }
+ void setVarArgsSaveSize(int Size) { VarArgsSaveSize = Size; }
+
+ bool isLRSpilled() const { return LRSpilled; }
+ void setLRIsSpilled(bool s) { LRSpilled = s; }
+
+ void setCalleeSaveAreaSize(int v) { spillAreaSize = v; }
+ int getCalleeSaveAreaSize() const { return spillAreaSize; }
+
+ unsigned createPICLabelUId() { return ++PICLabelUId; }
+ void initPICLabelUId(unsigned UId) { PICLabelUId = UId; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYMACHINEFUNCTIONINFO_H
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
new file mode 100644
index 000000000000..a1d45fea534b
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp
@@ -0,0 +1,95 @@
+//===-- CSKYRegisterInfo.h - CSKY Register Information Impl ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYRegisterInfo.h"
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCContext.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "CSKYGenRegisterInfo.inc"
+
+using namespace llvm;
+
+CSKYRegisterInfo::CSKYRegisterInfo()
+ : CSKYGenRegisterInfo(CSKY::R15, 0, 0, 0) {}
+
+const uint32_t *
+CSKYRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID Id) const {
+ const CSKYSubtarget &STI = MF.getSubtarget<CSKYSubtarget>();
+ return CSR_I32_RegMask;
+}
+
+Register CSKYRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = getFrameLowering(MF);
+ return TFI->hasFP(MF) ? CSKY::R8 : CSKY::R14;
+}
+
+BitVector CSKYRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ const CSKYFrameLowering *TFI = getFrameLowering(MF);
+ const CSKYSubtarget &STI = MF.getSubtarget<CSKYSubtarget>();
+ BitVector Reserved(getNumRegs());
+
+ // Reserve the base register if we need to allocate
+ // variable-sized objects at runtime.
+ if (TFI->hasBP(MF))
+ markSuperRegs(Reserved, CSKY::R7); // bp
+
+ if (TFI->hasFP(MF))
+ markSuperRegs(Reserved, CSKY::R8); // fp
+
+ if (!STI.hasE2()) {
+ for (unsigned i = 0; i < 6; i++)
+ markSuperRegs(Reserved, CSKY::R8 + i); // R8 - R13
+ }
+
+ markSuperRegs(Reserved, CSKY::R14); // sp
+ markSuperRegs(Reserved, CSKY::R15); // lr
+
+ if (!STI.hasHighRegisters()) {
+ for (unsigned i = 0; i < 10; i++)
+ markSuperRegs(Reserved, CSKY::R16 + i); // R16 - R25
+ }
+
+ markSuperRegs(Reserved, CSKY::R26);
+ markSuperRegs(Reserved, CSKY::R27);
+ markSuperRegs(Reserved, CSKY::R28); // gp
+ markSuperRegs(Reserved, CSKY::R29);
+ markSuperRegs(Reserved, CSKY::R30);
+ markSuperRegs(Reserved, CSKY::R31); // tp
+
+ assert(checkAllSuperRegsMarked(Reserved));
+ return Reserved;
+}
+
+const uint32_t *CSKYRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+const MCPhysReg *
+CSKYRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const CSKYSubtarget &STI = MF->getSubtarget<CSKYSubtarget>();
+ if (MF->getFunction().hasFnAttribute("interrupt")) {
+ return CSR_GPR_ISR_SaveList;
+ }
+
+ return CSR_I32_SaveList;
+}
+
+void CSKYRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected non-zero SPAdj value");
+} \ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.h b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
new file mode 100644
index 000000000000..779ea6493c7e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.h
@@ -0,0 +1,45 @@
+//===-- CSKYRegisterInfo.h - CSKY Register Information Impl ---*- C++ -*---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CSKY implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYREGISTERINFO_H
+#define LLVM_LIB_TARGET_CSKY_CSKYREGISTERINFO_H
+
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_REGINFO_HEADER
+#include "CSKYGenRegisterInfo.inc"
+
+namespace llvm {
+class CSKYInstrInfo;
+
+class CSKYRegisterInfo : public CSKYGenRegisterInfo {
+public:
+ CSKYRegisterInfo();
+
+ const uint32_t *getCallPreservedMask(const MachineFunction &MF,
+ CallingConv::ID id) const override;
+ const uint32_t *getNoPreservedMask() const override;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+ Register getFrameRegister(const MachineFunction &MF) const override;
+
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS) const override;
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYREGISTERINFO_H
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
index aef4589a67f2..7548c22bb2c5 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
@@ -153,6 +153,21 @@ def GPR : RegisterClass<"CSKY", [i32], 32,
let Size = 32;
}
+// Register class for R0 - R15.
+// Some 16-bit integer instructions can only access R0 - R15.
+def sGPR : RegisterClass<"CSKY", [i32], 32,
+ (add (sequence "R%u", 0, 3), (sequence "R%u", 12, 13), R15,
+ (sequence "R%u", 4, 11), R14)> {
+ let Size = 32;
+}
+
+// Register class for R0 - R7.
+// Some 16-bit integer instructions can only access R0 - R7.
+def mGPR : RegisterClass<"CSKY", [i32], 32,
+ (add (sequence "R%u", 0, 7))> {
+ let Size = 32;
+}
+
def GPRPair : RegisterClass<"CSKY", [untyped], 32, (add GPRTuple)> {
let Size = 64;
}
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.cpp b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
new file mode 100644
index 000000000000..963c2ede9c44
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.cpp
@@ -0,0 +1,74 @@
+//===-- CSKYSubtarget.h - Define Subtarget for the CSKY----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CSKY specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CSKYSubtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "csky-subtarget"
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "CSKYGenSubtargetInfo.inc"
+
+void CSKYSubtarget::anchor() {}
+
+CSKYSubtarget &CSKYSubtarget::initializeSubtargetDependencies(
+ const Triple &TT, StringRef CPUName, StringRef TuneCPUName, StringRef FS) {
+
+ if (CPUName.empty())
+ CPUName = "generic";
+ if (TuneCPUName.empty())
+ TuneCPUName = CPUName;
+
+ UseHardFloat = false;
+ UseHardFloatABI = false;
+ HasFPUv2SingleFloat = false;
+ HasFPUv2DoubleFloat = false;
+ HasFPUv3SingleFloat = false;
+ HasFPUv3DoubleFloat = false;
+
+ HasBTST16 = false;
+ HasJAVA = false;
+ HasExtendLrw = false;
+ HasDoloop = false;
+ HasHighRegisters = false;
+
+ HasE1 = false;
+ HasE2 = false;
+ Has2E3 = false;
+ HasMP = false;
+ Has3E3r1 = false;
+ Has3r1E3r2 = false;
+ Has3r2E3r3 = false;
+ Has3E7 = false;
+ HasMP1E2 = false;
+ Has7E10 = false;
+ Has10E60 = false;
+
+ ParseSubtargetFeatures(CPUName, TuneCPUName, FS);
+ return *this;
+}
+
+CSKYSubtarget::CSKYSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, const TargetMachine &TM)
+ : CSKYGenSubtargetInfo(TT, CPU, TuneCPU, FS),
+ FrameLowering(initializeSubtargetDependencies(TT, CPU, TuneCPU, FS)),
+ InstrInfo(*this), RegInfo(), TLInfo(TM, *this) {}
+
+bool CSKYSubtarget::useHardFloatABI() const {
+ auto FloatABI = getTargetLowering()->getTargetMachine().Options.FloatABIType;
+
+ if (FloatABI == FloatABI::Default)
+ return UseHardFloatABI;
+ else
+ return FloatABI == FloatABI::Hard;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h
new file mode 100644
index 000000000000..4cd590e8e76e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h
@@ -0,0 +1,120 @@
+//===-- CSKYSubtarget.h - Define Subtarget for the CSKY----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CSKY specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_CSKYSUBTARGET_H
+#define LLVM_LIB_TARGET_CSKY_CSKYSUBTARGET_H
+
+#include "CSKYFrameLowering.h"
+#include "CSKYISelLowering.h"
+#include "CSKYInstrInfo.h"
+#include "CSKYRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "CSKYGenSubtargetInfo.inc"
+
+namespace llvm {
+class StringRef;
+
+class CSKYSubtarget : public CSKYGenSubtargetInfo {
+ virtual void anchor();
+
+ CSKYFrameLowering FrameLowering;
+ CSKYInstrInfo InstrInfo;
+ CSKYRegisterInfo RegInfo;
+ CSKYTargetLowering TLInfo;
+ SelectionDAGTargetInfo TSInfo;
+
+ bool UseHardFloat;
+ bool UseHardFloatABI;
+ bool HasFPUv2SingleFloat;
+ bool HasFPUv2DoubleFloat;
+ bool HasFPUv3SingleFloat;
+ bool HasFPUv3DoubleFloat;
+
+ bool HasBTST16;
+ bool HasJAVA;
+ bool HasExtendLrw;
+ bool HasDoloop;
+ bool HasHighRegisters;
+
+ bool HasE1;
+ bool HasE2;
+ bool Has2E3;
+ bool HasMP;
+ bool Has3E3r1;
+ bool Has3r1E3r2;
+ bool Has3r2E3r3;
+ bool Has3E7;
+ bool HasMP1E2;
+ bool Has7E10;
+ bool Has10E60;
+
+public:
+ CSKYSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
+ StringRef FS, const TargetMachine &TM);
+
+ const CSKYFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+ const CSKYInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const CSKYRegisterInfo *getRegisterInfo() const override { return &RegInfo; }
+ const CSKYTargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ /// Initializes using the passed in CPU and feature strings so that we can
+ /// use initializer lists for subtarget initialization.
+ CSKYSubtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef CPU,
+ StringRef TuneCPU,
+ StringRef FS);
+
+ // Generated by inc file
+ void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
+
+ bool useHardFloatABI() const;
+ bool useHardFloat() const { return UseHardFloat; }
+ bool hasFPUv2SingleFloat() const { return HasFPUv2SingleFloat; }
+ bool hasFPUv2DoubleFloat() const { return HasFPUv2DoubleFloat; }
+ bool hasFPUv2() const { return HasFPUv2SingleFloat || HasFPUv2DoubleFloat; }
+ bool hasFPUv3SingleFloat() const { return HasFPUv3SingleFloat; }
+ bool hasFPUv3DoubleFloat() const { return HasFPUv3DoubleFloat; }
+ bool hasFPUv3() const { return HasFPUv3SingleFloat || HasFPUv3DoubleFloat; }
+ bool hasAnyFloatExt() const { return hasFPUv2() || hasFPUv3(); };
+
+ bool hasBTST16() const { return HasBTST16; }
+ bool hasJAVA() const { return HasJAVA; }
+ bool hasExtendLrw() const { return HasExtendLrw; }
+ bool hasDoloop() const { return HasDoloop; }
+ bool hasHighRegisters() const { return HasHighRegisters; }
+
+ bool hasE1() const { return HasE1; }
+ bool hasE2() const { return HasE2; }
+ bool has2E3() const { return Has2E3; }
+ bool has3r1E3r2() const { return Has3r1E3r2; }
+ bool has3r2E3r3() const { return Has3r2E3r3; }
+ bool has3E3r1() const { return Has3E3r1; }
+ bool has3E7() const { return Has3E7; }
+ bool hasMP() const { return HasMP; }
+ bool hasMP1E2() const { return HasMP1E2; }
+ bool has7E10() const { return Has7E10; }
+ bool has10E60() const { return Has10E60; }
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_CSKYSUBTARGET_H
diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
index 1c13796e84b6..8f61feb6506d 100644
--- a/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
+++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.cpp
@@ -11,10 +11,13 @@
//===----------------------------------------------------------------------===//
#include "CSKYTargetMachine.h"
+#include "CSKY.h"
+#include "CSKYSubtarget.h"
#include "TargetInfo/CSKYTargetInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -50,6 +53,34 @@ CSKYTargetMachine::CSKYTargetMachine(const Target &T, const Triple &TT,
initAsmInfo();
}
+const CSKYSubtarget *
+CSKYTargetMachine::getSubtargetImpl(const Function &F) const {
+ Attribute CPUAttr = F.getFnAttribute("target-cpu");
+ Attribute TuneAttr = F.getFnAttribute("tune-cpu");
+ Attribute FSAttr = F.getFnAttribute("target-features");
+
+ std::string CPU =
+ CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
+ std::string TuneCPU =
+ TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU;
+ std::string FS =
+ FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
+
+ std::string Key = CPU + TuneCPU + FS;
+ auto &I = SubtargetMap[Key];
+ if (!I) {
+ // This needs to be done before we create a new subtarget since any
+ // creation will depend on the TM and the code generation flags on the
+ // function that reside in TargetOptions.
+ resetTargetOptions(F);
+ I = std::make_unique<CSKYSubtarget>(TargetTriple, CPU, TuneCPU, FS, *this);
+ if (I->useHardFloat() && !I->hasAnyFloatExt())
+ errs() << "Hard-float can't be used with current CPU,"
+ " set to Soft-float\n";
+ }
+ return I.get();
+}
+
namespace {
class CSKYPassConfig : public TargetPassConfig {
public:
@@ -59,6 +90,8 @@ public:
CSKYTargetMachine &getCSKYTargetMachine() const {
return getTM<CSKYTargetMachine>();
}
+
+ bool addInstSelector() override;
};
} // namespace
@@ -66,3 +99,9 @@ public:
TargetPassConfig *CSKYTargetMachine::createPassConfig(PassManagerBase &PM) {
return new CSKYPassConfig(*this, PM);
}
+
+bool CSKYPassConfig::addInstSelector() {
+ addPass(createCSKYISelDag(getCSKYTargetMachine()));
+
+ return false;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYTargetMachine.h b/llvm/lib/Target/CSKY/CSKYTargetMachine.h
index d50e3877b550..ecb9fe953077 100644
--- a/llvm/lib/Target/CSKY/CSKYTargetMachine.h
+++ b/llvm/lib/Target/CSKY/CSKYTargetMachine.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H
#define LLVM_LIB_TARGET_CSKY_CSKYTARGETMACHINE_H
+#include "CSKYSubtarget.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
@@ -20,6 +21,7 @@ namespace llvm {
class CSKYTargetMachine : public LLVMTargetMachine {
std::unique_ptr<TargetLoweringObjectFile> TLOF;
+ mutable StringMap<std::unique_ptr<CSKYSubtarget>> SubtargetMap;
public:
CSKYTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -29,6 +31,12 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ const CSKYSubtarget *getSubtargetImpl(const Function &F) const override;
+ // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget,
+ // subtargets are per-function entities based on the target-specific
+ // attributes of each function.
+ const CSKYSubtarget *getSubtargetImpl() const = delete;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index 7fb5f35548b4..daa655416c47 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -30,25 +30,57 @@ CSKYAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
static llvm::DenseMap<unsigned, MCFixupKindInfo> Infos = {
{CSKY::Fixups::fixup_csky_addr32, {"fixup_csky_addr32", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_addr_hi16, {"fixup_csky_addr_hi16", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_addr_lo16, {"fixup_csky_addr_lo16", 0, 32, 0}},
{CSKY::Fixups::fixup_csky_pcrel_imm16_scale2,
{"fixup_csky_pcrel_imm16_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
{CSKY::Fixups::fixup_csky_pcrel_uimm16_scale4,
- {"fixup_csky_pcrel_uimm16_scale4", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
+ {"fixup_csky_pcrel_uimm16_scale4", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}},
+ {CSKY::Fixups::fixup_csky_pcrel_uimm8_scale4,
+ {"fixup_csky_pcrel_uimm8_scale4", 0, 32,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}},
{CSKY::Fixups::fixup_csky_pcrel_imm26_scale2,
{"fixup_csky_pcrel_imm26_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
{CSKY::Fixups::fixup_csky_pcrel_imm18_scale2,
- {"fixup_csky_pcrel_imm18_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}}};
+ {"fixup_csky_pcrel_imm18_scale2", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
+ {CSKY::Fixups::fixup_csky_got32, {"fixup_csky_got32", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_got_imm18_scale4,
+ {"fixup_csky_got_imm18_scale4", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_gotoff, {"fixup_csky_gotoff", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_gotpc,
+ {"fixup_csky_gotpc", 0, 32, MCFixupKindInfo::FKF_IsPCRel}},
+ {CSKY::Fixups::fixup_csky_plt32, {"fixup_csky_plt32", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_plt_imm18_scale4,
+ {"fixup_csky_plt_imm18_scale4", 0, 32, 0}},
+ {CSKY::Fixups::fixup_csky_pcrel_imm10_scale2,
+ {"fixup_csky_pcrel_imm10_scale2", 0, 16, MCFixupKindInfo::FKF_IsPCRel}},
+ {CSKY::Fixups::fixup_csky_pcrel_uimm7_scale4,
+ {"fixup_csky_pcrel_uimm7_scale4", 0, 16,
+ MCFixupKindInfo::FKF_IsPCRel |
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}},
+ {CSKY::Fixups::fixup_csky_doffset_imm18,
+ {"fixup_csky_doffset_imm18", 0, 18, 0}},
+ {CSKY::Fixups::fixup_csky_doffset_imm18_scale2,
+ {"fixup_csky_doffset_imm18_scale2", 0, 18, 0}},
+ {CSKY::Fixups::fixup_csky_doffset_imm18_scale4,
+ {"fixup_csky_doffset_imm18_scale4", 0, 18, 0}}};
+
assert(Infos.size() == CSKY::NumTargetFixupKinds &&
"Not all fixup kinds added to Infos array");
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- if (FirstTargetFixupKind <= Kind && Kind < FirstLiteralRelocationKind)
+ if (FirstTargetFixupKind <= Kind && Kind < FirstLiteralRelocationKind) {
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+
return Infos[Kind];
- else if (Kind < FirstTargetFixupKind)
+ } else if (Kind < FirstTargetFixupKind) {
return MCAsmBackend::getFixupKindInfo(Kind);
- else
+ } else {
return MCAsmBackend::getFixupKindInfo(FK_NONE);
+ }
}
static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
@@ -145,7 +177,8 @@ void CSKYAsmBackend::relaxInstruction(MCInst &Inst,
llvm_unreachable("CSKYAsmBackend::relaxInstruction() unimplemented");
}
-bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool CSKYAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if (Count % 2)
return false;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
index cdf688e9032a..e710954e9df8 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h
@@ -39,7 +39,8 @@ public:
void relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h
new file mode 100644
index 000000000000..fbfca4b6b85f
--- /dev/null
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h
@@ -0,0 +1,70 @@
+//===-- CSKYBaseInfo.h - Top level definitions for CSKY ---*- C++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the CSKY target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYBASEINFO_H
+#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYBASEINFO_H
+
+#include "MCTargetDesc/CSKYMCTargetDesc.h"
+#include "llvm/MC/MCInstrDesc.h"
+
+namespace llvm {
+
+// CSKYII - This namespace holds all of the target specific flags that
+// instruction info tracks. All definitions must match CSKYInstrFormats.td.
+namespace CSKYII {
+
+enum AddrMode {
+ AddrModeNone = 0,
+ AddrMode32B = 1, // ld32.b, ld32.bs, st32.b, st32.bs, +4kb
+ AddrMode32H = 2, // ld32.h, ld32.hs, st32.h, st32.hs, +8kb
+ AddrMode32WD = 3, // ld32.w, st32.w, ld32.d, st32.d, +16kb
+ AddrMode16B = 4, // ld16.b, +32b
+ AddrMode16H = 5, // ld16.h, +64b
+ AddrMode16W = 6, // ld16.w, +128b or +1kb
+ AddrMode32SDF = 7, // flds, fldd, +1kb
+};
+
+// CSKY Specific MachineOperand Flags.
+enum TOF {
+ MO_None = 0,
+ MO_ADDR32,
+ MO_GOT32,
+ MO_GOTOFF,
+ MO_PLT32,
+ MO_ADDR_HI16,
+ MO_ADDR_LO16,
+
+ // Used to differentiate between target-specific "direct" flags and "bitmask"
+ // flags. A machine operand can only have one "direct" flag, but can have
+ // multiple "bitmask" flags.
+ MO_DIRECT_FLAG_MASK = 15
+};
+
+enum {
+ AddrModeMask = 0x1f,
+};
+
+} // namespace CSKYII
+
+namespace CSKYOp {
+enum OperandType : unsigned {
+ OPERAND_BARESYMBOL = MCOI::OPERAND_FIRST_TARGET,
+ OPERAND_CONSTPOOL
+};
+} // namespace CSKYOp
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYBASEINFO_H
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h
index 917f940fcad4..434fd5481626 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h
@@ -16,6 +16,10 @@ namespace CSKY {
enum Fixups {
fixup_csky_addr32 = FirstTargetFixupKind,
+ fixup_csky_addr_hi16,
+
+ fixup_csky_addr_lo16,
+
fixup_csky_pcrel_imm16_scale2,
fixup_csky_pcrel_uimm16_scale4,
@@ -24,6 +28,29 @@ enum Fixups {
fixup_csky_pcrel_imm18_scale2,
+ fixup_csky_gotpc,
+
+ fixup_csky_gotoff,
+
+ fixup_csky_got32,
+
+ fixup_csky_got_imm18_scale4,
+
+ fixup_csky_plt32,
+
+ fixup_csky_plt_imm18_scale4,
+
+ fixup_csky_pcrel_imm10_scale2,
+
+ fixup_csky_pcrel_uimm7_scale4,
+
+ fixup_csky_pcrel_uimm8_scale4,
+
+ fixup_csky_doffset_imm18,
+
+ fixup_csky_doffset_imm18_scale2,
+
+ fixup_csky_doffset_imm18_scale4,
// Marker
fixup_csky_invalid,
NumTargetFixupKinds = fixup_csky_invalid - FirstTargetFixupKind
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
index c8920fbb4b4c..7001de999a51 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -95,6 +96,107 @@ void CSKYInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
MO.getExpr()->print(O, &MAI);
}
+void CSKYInstPrinter::printDataSymbol(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ O << "[";
+ if (MO.isImm())
+ O << MO.getImm();
+ else
+ MO.getExpr()->print(O, &MAI);
+ O << "]";
+}
+
+void CSKYInstPrinter::printConstpool(const MCInst *MI, uint64_t Address,
+ unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+
+ if (MO.isImm()) {
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + MO.getImm();
+ Target &= 0xfffffffc;
+ O << formatHex(Target);
+ } else {
+ O << MO.getImm();
+ }
+ return;
+ }
+
+ assert(MO.isExpr() && "Unknown operand kind in printConstpool");
+
+ O << "[";
+ MO.getExpr()->print(O, &MAI);
+ O << "]";
+}
+
+void CSKYInstPrinter::printCSKYSymbolOperand(const MCInst *MI, uint64_t Address,
+ unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNo);
+ if (!MO.isImm()) {
+ return printOperand(MI, OpNo, STI, O);
+ }
+
+ if (PrintBranchImmAsAddress) {
+ uint64_t Target = Address + MO.getImm();
+ Target &= 0xffffffff;
+ O << formatHex(Target);
+ } else {
+ O << MO.getImm();
+ }
+}
+
+void CSKYInstPrinter::printRegisterSeq(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ printRegName(O, MI->getOperand(OpNum).getReg());
+ O << "-";
+ printRegName(O, MI->getOperand(OpNum + 1).getReg());
+}
+
+void CSKYInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ auto V = MI->getOperand(OpNum).getImm();
+ ListSeparator LS;
+
+ if (V & 0xf) {
+ O << LS;
+ printRegName(O, CSKY::R4);
+ auto Offset = (V & 0xf) - 1;
+ if (Offset) {
+ O << "-";
+ printRegName(O, CSKY::R4 + Offset);
+ }
+ }
+
+ if ((V >> 4) & 0x1) {
+ O << LS;
+ printRegName(O, CSKY::R15);
+ }
+
+ if ((V >> 5) & 0x7) {
+ O << LS;
+ printRegName(O, CSKY::R16);
+
+ auto Offset = ((V >> 5) & 0x7) - 1;
+
+ if (Offset) {
+ O << "-";
+ printRegName(O, CSKY::R16 + Offset);
+ }
+ }
+
+ if ((V >> 8) & 0x1) {
+ O << LS;
+ printRegName(O, CSKY::R28);
+ }
+}
+
const char *CSKYInstPrinter::getRegisterName(unsigned RegNo) {
return getRegisterName(RegNo, ArchRegNames ? CSKY::NoRegAltName
: CSKY::ABIRegAltName);
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
index a28791a6d8e9..f93a342ec6a3 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
@@ -19,6 +19,9 @@
namespace llvm {
class CSKYInstPrinter : public MCInstPrinter {
+private:
+ bool ABIRegNames = false;
+
public:
CSKYInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
@@ -43,6 +46,20 @@ public:
unsigned OpIdx, unsigned PrintMethodIdx,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDataSymbol(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printConstpool(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printPSRFlag(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printRegisterSeq(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printRegisterList(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printCSKYSymbolOperand(const MCInst *MI, uint64_t Address, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printSPAddr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
static const char *getRegisterName(unsigned RegNo, unsigned AltIdx);
};
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
index 1a5b0225e0b9..1d220b749cb1 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "CSKYMCCodeEmitter.h"
+#include "CSKYMCExpr.h"
#include "MCTargetDesc/CSKYMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCInstBuilder.h"
@@ -31,11 +32,46 @@ unsigned CSKYMCCodeEmitter::getOImmOpValue(const MCInst &MI, unsigned Idx,
return MO.getImm() - 1;
}
+unsigned
+CSKYMCCodeEmitter::getImmOpValueIDLY(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Idx);
+ assert(MO.isImm() && "Unexpected MO type.");
+
+ auto V = (MO.getImm() <= 3) ? 4 : MO.getImm();
+ return V - 1;
+}
+
+unsigned
+CSKYMCCodeEmitter::getImmOpValueMSBSize(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MSB = MI.getOperand(Idx);
+ const MCOperand &LSB = MI.getOperand(Idx + 1);
+ assert(MSB.isImm() && LSB.isImm() && "Unexpected MO type.");
+
+ return MSB.getImm() - LSB.getImm();
+}
+
+static void writeData(uint32_t Bin, unsigned Size, raw_ostream &OS) {
+ uint16_t LO16 = static_cast<uint16_t>(Bin);
+ uint16_t HI16 = static_cast<uint16_t>(Bin >> 16);
+
+ if (Size == 4)
+ support::endian::write<uint16_t>(OS, HI16, support::little);
+
+ support::endian::write<uint16_t>(OS, LO16, support::little);
+}
+
void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MII.get(MI.getOpcode());
unsigned Size = Desc.getSize();
+
+ ++MCNumEmitted;
+
uint32_t Bin = getBinaryCodeForInstr(MI, Fixups, STI);
uint16_t LO16 = static_cast<uint16_t>(Bin);
@@ -45,7 +81,6 @@ void CSKYMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
support::endian::write<uint16_t>(OS, HI16, support::little);
support::endian::write<uint16_t>(OS, LO16, support::little);
- ++MCNumEmitted; // Keep track of the # of mi's emitted.
}
unsigned
@@ -62,6 +97,51 @@ CSKYMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return 0;
}
+unsigned
+CSKYMCCodeEmitter::getRegSeqImmOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(Idx).isReg() && "Unexpected MO type.");
+ assert(MI.getOperand(Idx + 1).isImm() && "Unexpected MO type.");
+
+ unsigned Ry = MI.getOperand(Idx).getReg();
+ unsigned Rz = MI.getOperand(Idx + 1).getImm();
+
+ unsigned Imm = Ctx.getRegisterInfo()->getEncodingValue(Rz) -
+ Ctx.getRegisterInfo()->getEncodingValue(Ry);
+
+ return ((Ctx.getRegisterInfo()->getEncodingValue(Ry) << 5) | Imm);
+}
+
+unsigned
+CSKYMCCodeEmitter::getRegisterSeqOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned Reg1 =
+ Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(Op).getReg());
+ unsigned Reg2 =
+ Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(Op + 1).getReg());
+
+ unsigned Binary = ((Reg1 & 0x1f) << 5) | (Reg2 - Reg1);
+
+ return Binary;
+}
+
+unsigned CSKYMCCodeEmitter::getImmJMPIX(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MI.getOperand(Idx).getImm() == 16)
+ return 0;
+ else if (MI.getOperand(Idx).getImm() == 24)
+ return 1;
+ else if (MI.getOperand(Idx).getImm() == 32)
+ return 2;
+ else if (MI.getOperand(Idx).getImm() == 40)
+ return 3;
+ else
+ assert(0);
+}
+
MCFixupKind CSKYMCCodeEmitter::getTargetFixup(const MCExpr *Expr) const {
const CSKYMCExpr *CSKYExpr = cast<CSKYMCExpr>(Expr);
@@ -70,6 +150,22 @@ MCFixupKind CSKYMCCodeEmitter::getTargetFixup(const MCExpr *Expr) const {
llvm_unreachable("Unhandled fixup kind!");
case CSKYMCExpr::VK_CSKY_ADDR:
return MCFixupKind(CSKY::fixup_csky_addr32);
+ case CSKYMCExpr::VK_CSKY_ADDR_HI16:
+ return MCFixupKind(CSKY::fixup_csky_addr_hi16);
+ case CSKYMCExpr::VK_CSKY_ADDR_LO16:
+ return MCFixupKind(CSKY::fixup_csky_addr_lo16);
+ case CSKYMCExpr::VK_CSKY_GOT:
+ return MCFixupKind(CSKY::fixup_csky_got32);
+ case CSKYMCExpr::VK_CSKY_GOTPC:
+ return MCFixupKind(CSKY::fixup_csky_gotpc);
+ case CSKYMCExpr::VK_CSKY_GOTOFF:
+ return MCFixupKind(CSKY::fixup_csky_gotoff);
+ case CSKYMCExpr::VK_CSKY_PLT:
+ return MCFixupKind(CSKY::fixup_csky_plt32);
+ case CSKYMCExpr::VK_CSKY_PLT_IMM18_BY4:
+ return MCFixupKind(CSKY::fixup_csky_plt_imm18_scale4);
+ case CSKYMCExpr::VK_CSKY_GOT_IMM18_BY4:
+ return MCFixupKind(CSKY::fixup_csky_got_imm18_scale4);
}
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
index a4c50d992a07..bfba07bcb32a 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h
@@ -13,8 +13,8 @@
#ifndef LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H
#define LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCCODEEMITTER_H
-#include "CSKYMCExpr.h"
#include "MCTargetDesc/CSKYFixupKinds.h"
+#include "MCTargetDesc/CSKYMCExpr.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -49,14 +49,40 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(Idx);
- assert(MO.isImm() && "Unexpected MO type.");
- return (MO.getImm() >> shift);
+ if (MO.isImm())
+ return (MO.getImm() >> shift);
+
+ assert(MO.isExpr() && "Unexpected MO type.");
+
+ MCFixupKind Kind = getTargetFixup(MO.getExpr());
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
+ return 0;
}
+ unsigned getRegSeqImmOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getRegisterSeqOpValue(const MCInst &MI, unsigned Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getOImmOpValue(const MCInst &MI, unsigned Idx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getImmOpValueIDLY(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getImmJMPIX(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ unsigned getImmOpValueMSBSize(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getImmShiftOpValue(const MCInst &MI, unsigned Idx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -101,6 +127,21 @@ public:
return 0;
}
+ template <llvm::CSKY::Fixups FIXUP>
+ unsigned getDataSymbolOpValue(const MCInst &MI, unsigned Idx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(Idx);
+ assert(MO.isExpr() && "Unexpected MO type.");
+
+ MCFixupKind Kind = MCFixupKind(FIXUP);
+ if (MO.getExpr()->getKind() == MCExpr::Target)
+ Kind = getTargetFixup(MO.getExpr());
+
+ Fixups.push_back(MCFixup::create(0, MO.getExpr(), Kind, MI.getLoc()));
+ return 0;
+ }
+
unsigned getCallSymbolOpValue(const MCInst &MI, unsigned Idx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
index 59e630f43a42..7987613b0608 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp
@@ -26,22 +26,33 @@ StringRef CSKYMCExpr::getVariantKindName(VariantKind Kind) {
switch (Kind) {
default:
llvm_unreachable("Invalid ELF symbol kind");
+ case VK_CSKY_None:
case VK_CSKY_ADDR:
return "";
- case VK_CSKY_PCREL:
- return "";
+ case VK_CSKY_ADDR_HI16:
+ return "@HI16";
+ case VK_CSKY_ADDR_LO16:
+ return "@LO16";
+ case VK_CSKY_GOT_IMM18_BY4:
case VK_CSKY_GOT:
return "@GOT";
case VK_CSKY_GOTPC:
return "@GOTPC";
case VK_CSKY_GOTOFF:
return "@GOTOFF";
+ case VK_CSKY_PLT_IMM18_BY4:
case VK_CSKY_PLT:
return "@PLT";
- case VK_CSKY_TPOFF:
+ case VK_CSKY_TLSLE:
return "@TPOFF";
+ case VK_CSKY_TLSIE:
+ return "@GOTTPOFF";
case VK_CSKY_TLSGD:
- return "@TLSGD";
+ return "@TLSGD32";
+ case VK_CSKY_TLSLDO:
+ return "@TLSLDO32";
+ case VK_CSKY_TLSLDM:
+ return "@TLSLDM32";
}
}
@@ -87,7 +98,8 @@ void CSKYMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
switch (getKind()) {
default:
return;
- case VK_CSKY_TPOFF:
+ case VK_CSKY_TLSLE:
+ case VK_CSKY_TLSIE:
case VK_CSKY_TLSGD:
break;
}
@@ -106,17 +118,20 @@ bool CSKYMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
switch (getKind()) {
default:
return true;
-
- case VK_CSKY_ADDR:
- case VK_CSKY_PCREL:
case VK_CSKY_GOT:
+ case VK_CSKY_GOT_IMM18_BY4:
case VK_CSKY_GOTPC:
case VK_CSKY_GOTOFF:
- case VK_CSKY_TPOFF:
+ case VK_CSKY_PLT:
+ case VK_CSKY_PLT_IMM18_BY4:
+ case VK_CSKY_TLSIE:
+ case VK_CSKY_TLSLE:
case VK_CSKY_TLSGD:
+ case VK_CSKY_TLSLDO:
+ case VK_CSKY_TLSLDM:
return false;
}
}
return true;
-} \ No newline at end of file
+}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h
index 06fccada53ce..9e5b4ca7d9bb 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h
@@ -19,13 +19,20 @@ public:
enum VariantKind {
VK_CSKY_None,
VK_CSKY_ADDR,
+ VK_CSKY_ADDR_HI16,
+ VK_CSKY_ADDR_LO16,
VK_CSKY_PCREL,
VK_CSKY_GOT,
+ VK_CSKY_GOT_IMM18_BY4,
VK_CSKY_GOTPC,
VK_CSKY_GOTOFF,
VK_CSKY_PLT,
- VK_CSKY_TPOFF,
+ VK_CSKY_PLT_IMM18_BY4,
+ VK_CSKY_TLSIE,
+ VK_CSKY_TLSLE,
VK_CSKY_TLSGD,
+ VK_CSKY_TLSLDO,
+ VK_CSKY_TLSLDM,
VK_CSKY_Invalid
};
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
index 169e1e14eb0a..0901c0993607 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "CSKYGenInstrInfo.inc"
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
index da8a3b63a2f9..25bbd635fc58 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h
@@ -45,4 +45,7 @@ MCCodeEmitter *createCSKYMCCodeEmitter(const MCInstrInfo &MCII,
#define GET_INSTRINFO_ENUM
#include "CSKYGenInstrInfo.inc"
+#define GET_SUBTARGETINFO_ENUM
+#include "CSKYGenSubtargetInfo.inc"
+
#endif // LLVM_LIB_TARGET_CSKY_MCTARGETDESC_CSKYMCTARGETDESC_H
diff --git a/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp b/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
index 1af2e672ff42..40b7d493652d 100644
--- a/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
+++ b/llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/CSKYTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheCSKYTarget() {
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 7edc2a01eeb8..d131cf896834 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -37,6 +37,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -45,7 +46,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -141,12 +141,6 @@ class HexagonAsmParser : public MCTargetAsmParser {
int processInstruction(MCInst &Inst, OperandVector const &Operands,
SMLoc IDLoc);
- // Check if we have an assembler and, if so, set the ELF e_header flags.
- void chksetELFHeaderEFlags(unsigned flags) {
- if (getAssembler())
- getAssembler()->setELFHeaderEFlags(flags);
- }
-
unsigned matchRegister(StringRef Name);
/// @name Auto-generated Match Functions
@@ -211,10 +205,6 @@ struct HexagonOperand : public MCParsedAsmOperand {
const MCExpr *Val;
};
- struct InstTy {
- OperandVector *SubInsts;
- };
-
union {
struct TokTy Tok;
struct RegTy Reg;
@@ -1498,7 +1488,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
MES->SwitchSection(mySection);
unsigned byteSize = is32bit ? 4 : 8;
- getStreamer().emitCodeAlignment(byteSize, byteSize);
+ getStreamer().emitCodeAlignment(byteSize, &getSTI(), byteSize);
MCSymbol *Sym;
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 80a987c3a549..3c742c98077b 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstddef>
@@ -131,6 +131,9 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
@@ -140,6 +143,10 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
uint64_t Address, const void *Decoder);
@@ -760,6 +767,78 @@ static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
return MCDisassembler::Success;
}
+static const uint16_t SysRegDecoderTable[] = {
+ Hexagon::SGP0, Hexagon::SGP1, Hexagon::STID,
+ Hexagon::ELR, Hexagon::BADVA0, Hexagon::BADVA1,
+ Hexagon::SSR, Hexagon::CCR, Hexagon::HTID,
+ Hexagon::BADVA, Hexagon::IMASK, Hexagon::S11,
+ Hexagon::S12, Hexagon::S13, Hexagon::S14,
+ Hexagon::S15, Hexagon::EVB, Hexagon::MODECTL,
+ Hexagon::SYSCFG, Hexagon::S19, Hexagon::S20,
+ Hexagon::VID, Hexagon::S22, Hexagon::S23,
+ Hexagon::S24, Hexagon::S25, Hexagon::S26,
+ Hexagon::CFGBASE, Hexagon::DIAG, Hexagon::REV,
+ Hexagon::PCYCLELO, Hexagon::PCYCLEHI, Hexagon::ISDBST,
+ Hexagon::ISDBCFG0, Hexagon::ISDBCFG1, Hexagon::S35,
+ Hexagon::BRKPTPC0, Hexagon::BRKPTCFG0, Hexagon::BRKPTPC1,
+ Hexagon::BRKPTCFG1, Hexagon::ISDBMBXIN, Hexagon::ISDBMBXOUT,
+ Hexagon::ISDBEN, Hexagon::ISDBGPR, Hexagon::S44,
+ Hexagon::S45, Hexagon::S46, Hexagon::S47,
+ Hexagon::PMUCNT0, Hexagon::PMUCNT1, Hexagon::PMUCNT2,
+ Hexagon::PMUCNT3, Hexagon::PMUEVTCFG, Hexagon::PMUCFG,
+ Hexagon::S54, Hexagon::S55, Hexagon::S56,
+ Hexagon::S57, Hexagon::S58, Hexagon::S59,
+ Hexagon::S60, Hexagon::S61, Hexagon::S62,
+ Hexagon::S63, Hexagon::S64, Hexagon::S65,
+ Hexagon::S66, Hexagon::S67, Hexagon::S68,
+ Hexagon::S69, Hexagon::S70, Hexagon::S71,
+ Hexagon::S72, Hexagon::S73, Hexagon::S74,
+ Hexagon::S75, Hexagon::S76, Hexagon::S77,
+ Hexagon::S78, Hexagon::S79, Hexagon::S80,
+};
+
+static DecodeStatus DecodeSysRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ if (RegNo >= sizeof(SysRegDecoderTable) / sizeof(SysRegDecoderTable[0]))
+ return MCDisassembler::Fail;
+
+ if (SysRegDecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SysRegDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
+static const uint16_t SysReg64DecoderTable[] = {
+ Hexagon::SGP1_0, Hexagon::S3_2, Hexagon::S5_4, Hexagon::S7_6,
+ Hexagon::S9_8, Hexagon::S11_10, Hexagon::S13_12, Hexagon::S15_14,
+ Hexagon::S17_16, Hexagon::S19_18, Hexagon::S21_20, Hexagon::S23_22,
+ Hexagon::S25_24, Hexagon::S27_26, Hexagon::S29_28, Hexagon::S31_30,
+ Hexagon::S33_32, Hexagon::S35_34, Hexagon::S37_36, Hexagon::S39_38,
+ Hexagon::S41_40, Hexagon::S43_42, Hexagon::S45_44, Hexagon::S47_46,
+ Hexagon::S49_48, Hexagon::S51_50, Hexagon::S53_52, Hexagon::S55_54,
+ Hexagon::S57_56, Hexagon::S59_58, Hexagon::S61_60, Hexagon::S63_62,
+ Hexagon::S65_64, Hexagon::S67_66, Hexagon::S69_68, Hexagon::S71_70,
+ Hexagon::S73_72, Hexagon::S75_74, Hexagon::S77_76, Hexagon::S79_78,
+};
+
+static DecodeStatus DecodeSysRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t /*Address*/,
+ const void *Decoder) {
+ RegNo = RegNo >> 1;
+ if (RegNo >= sizeof(SysReg64DecoderTable) / sizeof(SysReg64DecoderTable[0]))
+ return MCDisassembler::Fail;
+
+ if (SysReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+ return MCDisassembler::Fail;
+
+ unsigned Register = SysReg64DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::createReg(Register));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
uint64_t /*Address*/,
const void *Decoder) {
diff --git a/llvm/lib/Target/Hexagon/HexagonArch.h b/llvm/lib/Target/Hexagon/HexagonArch.h
index e5d528390c51..4a42ec98feb1 100644
--- a/llvm/lib/Target/Hexagon/HexagonArch.h
+++ b/llvm/lib/Target/Hexagon/HexagonArch.h
@@ -19,12 +19,6 @@ namespace llvm {
namespace Hexagon {
template <class ArchCont, typename Val>
-bool ValidArch(ArchCont const &ArchList, Val HexArch) {
- return std::any_of(std::begin(ArchList), std::end(ArchList),
- [HexArch](Val V) { return V == HexArch; });
-}
-
-template <class ArchCont, typename Val>
llvm::Optional<ArchEnum> GetCpu(ArchCont const &ArchList, Val CPUString) {
llvm::Optional<ArchEnum> Res;
auto Entry = ArchList.find(CPUString);
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index f3017d02995e..8e6a01e3a186 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -41,10 +41,10 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -179,7 +179,7 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
MCStreamer &OutStreamer, const MCOperand &Imm,
- int AlignSize) {
+ int AlignSize, const MCSubtargetInfo& STI) {
MCSymbol *Sym;
int64_t Value;
if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
@@ -209,7 +209,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
OutStreamer.emitLabel(Sym);
OutStreamer.emitSymbolAttribute(Sym, MCSA_Global);
OutStreamer.emitIntValue(Value, AlignSize);
- OutStreamer.emitCodeAlignment(AlignSize);
+ OutStreamer.emitCodeAlignment(AlignSize, &STI);
}
} else {
assert(Imm.isExpr() && "Expected expression and found none");
@@ -237,7 +237,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
OutStreamer.emitLabel(Sym);
OutStreamer.emitSymbolAttribute(Sym, MCSA_Local);
OutStreamer.emitValue(Imm.getExpr(), AlignSize);
- OutStreamer.emitCodeAlignment(AlignSize);
+ OutStreamer.emitCodeAlignment(AlignSize, &STI);
}
}
return Sym;
@@ -328,7 +328,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
const MCOperand &Imm = MappedInst.getOperand(1);
MCSectionSubPair Current = OutStreamer->getCurrentSection();
- MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8);
+ MCSymbol *Sym =
+ smallData(*this, MI, *OutStreamer, Imm, 8, getSubtargetInfo());
OutStreamer->SwitchSection(Current.first, Current.second);
MCInst TmpInst;
@@ -345,7 +346,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
if (!OutStreamer->hasRawTextSupport()) {
MCOperand &Imm = MappedInst.getOperand(1);
MCSectionSubPair Current = OutStreamer->getCurrentSection();
- MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4);
+ MCSymbol *Sym =
+ smallData(*this, MI, *OutStreamer, Imm, 4, getSubtargetInfo());
OutStreamer->SwitchSection(Current.first, Current.second);
MCInst TmpInst;
MCOperand &Reg = MappedInst.getOperand(0);
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 54aa14849dd9..2c5ad3b589d2 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -972,8 +972,8 @@ namespace {
} // end anonymous namespace
bool DeadCodeElimination::isDead(unsigned R) const {
- for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
- MachineInstr *UseI = I->getParent();
+ for (const MachineOperand &MO : MRI.use_operands(R)) {
+ const MachineInstr *UseI = MO.getParent();
if (UseI->isDebugValue())
continue;
if (UseI->isPHI()) {
@@ -1305,8 +1305,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
return false;
bool Changed = false;
- for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
- NextI = std::next(I);
+ for (auto I = B.begin(), E = B.end(); I != E; ++I) {
MachineInstr *MI = &*I;
if (MI->getOpcode() == TargetOpcode::COPY)
@@ -1598,9 +1597,7 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B,
bool Changed = false;
RegisterSet Defs;
- for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
- ++I, AVB.insert(Defs)) {
- NextI = std::next(I);
+ for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
Defs.clear();
HBS::getInstrDefs(*I, Defs);
@@ -1726,8 +1723,8 @@ bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
std::vector<MachineInstr*> Instrs;
- for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
- Instrs.push_back(&*I);
+ for (MachineInstr &MI : llvm::reverse(B))
+ Instrs.push_back(&MI);
bool Changed = false;
for (auto I : Instrs) {
@@ -3123,8 +3120,8 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
if (isConst(PR))
continue;
bool BadUse = false, GoodUse = false;
- for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
- MachineInstr *UseI = UI->getParent();
+ for (const MachineOperand &MO : MRI->use_operands(PR)) {
+ const MachineInstr *UseI = MO.getParent();
if (UseI->getParent() != C.LB) {
BadUse = true;
break;
@@ -3252,7 +3249,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
return G.Out.Reg == P.LR.Reg;
};
- if (llvm::find_if(Phis, LoopInpEq) == Phis.end())
+ if (llvm::none_of(Phis, LoopInpEq))
continue;
G.Inp.Reg = Inputs.find_first();
@@ -3338,9 +3335,9 @@ bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
continue;
MachineBasicBlock *PB = nullptr;
bool IsLoop = false;
- for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
- if (*PI != &B)
- PB = *PI;
+ for (MachineBasicBlock *Pred : B.predecessors()) {
+ if (Pred != &B)
+ PB = Pred;
else
IsLoop = true;
}
@@ -3348,13 +3345,13 @@ bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
continue;
MachineBasicBlock *EB = nullptr;
- for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
- if (*SI == &B)
+ for (MachineBasicBlock *Succ : B.successors()) {
+ if (Succ == &B)
continue;
// Set EP to the epilog block, if it has only 1 predecessor (i.e. the
// edge from B to EP is non-critical.
- if ((*SI)->pred_size() == 1)
- EB = *SI;
+ if (Succ->pred_size() == 1)
+ EB = Succ;
break;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 9f18d0b3162c..43f0758f6598 100644
--- a/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -386,9 +386,8 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
// dereferences the pointer operand.
GepNode *PN = N;
Type *PtrTy = GepI->getSourceElementType();
- for (User::op_iterator OI = GepI->idx_begin()+1, OE = GepI->idx_end();
- OI != OE; ++OI) {
- Value *Op = *OI;
+ for (Use &U : llvm::drop_begin(GepI->indices())) {
+ Value *Op = U;
GepNode *Nx = new (*Mem) GepNode;
Nx->Parent = PN; // Link Nx to the previous node.
Nx->Flags |= GepNode::Internal | InBounds;
diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 954e61563697..daf311fc49d4 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -863,14 +863,13 @@ void MachineConstPropagator::removeCFGEdge(MachineBasicBlock *From,
// First, remove the CFG successor/predecessor information.
From->removeSuccessor(To);
// Remove all corresponding PHI operands in the To block.
- for (auto I = To->begin(), E = To->getFirstNonPHI(); I != E; ++I) {
- MachineInstr *PN = &*I;
+ for (MachineInstr &PN : To->phis()) {
// reg0 = PHI reg1, bb2, reg3, bb4, ...
- int N = PN->getNumOperands()-2;
+ int N = PN.getNumOperands() - 2;
while (N > 0) {
- if (PN->getOperand(N+1).getMBB() == From) {
- PN->RemoveOperand(N+1);
- PN->RemoveOperand(N);
+ if (PN.getOperand(N + 1).getMBB() == From) {
+ PN.RemoveOperand(N + 1);
+ PN.RemoveOperand(N);
}
N -= 2;
}
@@ -996,8 +995,7 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) {
bool HaveTargets = computeBlockSuccessors(B, Targets);
// Rewrite the executable instructions. Skip branches if we don't
// have block successor information.
- for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::reverse(*B)) {
if (InstrExec.count(&MI)) {
if (MI.isBranch() && !HaveTargets)
continue;
@@ -1046,13 +1044,9 @@ bool MachineConstPropagator::rewrite(MachineFunction &MF) {
// erase instructions during rewriting, so this needs to be delayed until
// now.
for (MachineBasicBlock &B : MF) {
- MachineBasicBlock::iterator I = B.begin(), E = B.end();
- while (I != E) {
- auto Next = std::next(I);
- if (I->isBranch() && !InstrExec.count(&*I))
- B.erase(I);
- I = Next;
- }
+ for (MachineInstr &MI : llvm::make_early_inc_range(B))
+ if (MI.isBranch() && !InstrExec.count(&MI))
+ B.erase(&MI);
}
return Changed;
}
@@ -3133,11 +3127,9 @@ void HexagonConstEvaluator::replaceAllRegUsesWith(Register FromReg,
Register ToReg) {
assert(FromReg.isVirtual());
assert(ToReg.isVirtual());
- for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) {
- MachineOperand &O = *I;
- ++I;
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI->use_operands(FromReg)))
O.setReg(ToReg);
- }
}
bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI,
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
index 4dd0110c4fed..b3f1b6638193 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td
@@ -1700,6 +1700,12 @@ class Enc_7b7ba8 : OpcodeHexagon {
bits <5> Vd32;
let Inst{4-0} = Vd32{4-0};
}
+class Enc_7d1542 : OpcodeHexagon {
+ bits <7> Ss128;
+ let Inst{22-16} = Ss128{6-0};
+ bits <5> Rd32;
+ let Inst{4-0} = Rd32{4-0};
+}
class Enc_7e5a82 : OpcodeHexagon {
bits <5> Ii;
let Inst{12-8} = Ii{4-0};
@@ -2011,6 +2017,12 @@ class Enc_8e583a : OpcodeHexagon {
let Inst{25-23} = n1{3-1};
let Inst{13-13} = n1{0-0};
}
+class Enc_8f7633 : OpcodeHexagon {
+ bits <5> Rs32;
+ let Inst{20-16} = Rs32{4-0};
+ bits <7> Sd128;
+ let Inst{6-0} = Sd128{6-0};
+}
class Enc_90cd8b : OpcodeHexagon {
bits <5> Rss32;
let Inst{20-16} = Rss32{4-0};
@@ -2346,6 +2358,12 @@ class Enc_a6ce9c : OpcodeHexagon {
bits <4> Rs16;
let Inst{7-4} = Rs16{3-0};
}
+class Enc_a705fc : OpcodeHexagon {
+ bits <5> Rss32;
+ let Inst{20-16} = Rss32{4-0};
+ bits <7> Sdd128;
+ let Inst{6-0} = Sdd128{6-0};
+}
class Enc_a7341a : OpcodeHexagon {
bits <5> Vu32;
let Inst{12-8} = Vu32{4-0};
@@ -3127,6 +3145,12 @@ class Enc_e26546 : OpcodeHexagon {
bits <5> Rx32;
let Inst{20-16} = Rx32{4-0};
}
+class Enc_e32517 : OpcodeHexagon {
+ bits <7> Sss128;
+ let Inst{22-16} = Sss128{6-0};
+ bits <5> Rdd32;
+ let Inst{4-0} = Rdd32{4-0};
+}
class Enc_e38e1f : OpcodeHexagon {
bits <8> Ii;
let Inst{12-5} = Ii{7-0};
diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
index bba36352815e..4f00409c336c 100644
--- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td
@@ -38870,6 +38870,26 @@ let Inst{13-0} = 0b00000000000000;
let Inst{31-16} = 0b1010100001000000;
let isSolo = 1;
}
+def Y2_tfrscrr : HInst<
+(outs IntRegs:$Rd32),
+(ins SysRegs:$Ss128),
+"$Rd32 = $Ss128",
+tc_fae9dfa5, TypeCR>, Enc_7d1542 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-23} = 0b011011101;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
+def Y2_tfrsrcr : HInst<
+(outs SysRegs:$Sd128),
+(ins IntRegs:$Rs32),
+"$Sd128 = $Rs32",
+tc_6ae3426b, TypeCR>, Enc_8f7633 {
+let Inst{13-7} = 0b0000000;
+let Inst{31-21} = 0b01100111000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
def Y2_wait : HInst<
(outs),
(ins IntRegs:$Rs32),
@@ -38891,6 +38911,24 @@ let isSoloAX = 1;
let hasSideEffects = 1;
let mayStore = 1;
}
+def Y4_tfrscpp : HInst<
+(outs DoubleRegs:$Rdd32),
+(ins SysRegs64:$Sss128),
+"$Rdd32 = $Sss128",
+tc_fae9dfa5, TypeCR>, Enc_e32517 {
+let Inst{13-5} = 0b000000000;
+let Inst{31-23} = 0b011011110;
+}
+def Y4_tfrspcp : HInst<
+(outs SysRegs64:$Sdd128),
+(ins DoubleRegs:$Rss32),
+"$Sdd128 = $Rss32",
+tc_6ae3426b, TypeCR>, Enc_a705fc {
+let Inst{13-7} = 0b0000000;
+let Inst{31-21} = 0b01101101000;
+let hasNewValue = 1;
+let opNewValue = 0;
+}
def Y4_trace : HInst<
(outs),
(ins IntRegs:$Rs32),
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index d36ffc3da641..9a3feb5b6af1 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -364,7 +364,7 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
return true;
if (B->isEHPad() || B->hasAddressTaken())
return false;
- if (B->succ_size() == 0)
+ if (B->succ_empty())
return false;
for (auto &MI : *B) {
@@ -390,8 +390,8 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
continue;
if (!isPredicate(R))
continue;
- for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
- if (U->getParent()->isPHI())
+ for (const MachineOperand &U : MRI->use_operands(R))
+ if (U.getParent()->isPHI())
return false;
}
}
@@ -570,12 +570,12 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
TotalPh = computePhiCost(FP.JoinB, FP);
PredDefs += countPredicateDefs(FP.JoinB);
} else {
- if (FP.TrueB && FP.TrueB->succ_size() > 0) {
+ if (FP.TrueB && !FP.TrueB->succ_empty()) {
MachineBasicBlock *SB = *FP.TrueB->succ_begin();
TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
}
- if (FP.FalseB && FP.FalseB->succ_size() > 0) {
+ if (FP.FalseB && !FP.FalseB->succ_empty()) {
MachineBasicBlock *SB = *FP.FalseB->succ_begin();
TotalPh += computePhiCost(SB, FP);
PredDefs += countPredicateDefs(SB);
@@ -877,7 +877,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
// existing terminators/successors from the split block.
MachineBasicBlock *SSB = nullptr;
FP.SplitB->erase(OldTI, FP.SplitB->end());
- while (FP.SplitB->succ_size() > 0) {
+ while (!FP.SplitB->succ_empty()) {
MachineBasicBlock *T = *FP.SplitB->succ_begin();
// It's possible that the split block had a successor that is not a pre-
// dicated block. This could only happen if there was only one block to
@@ -970,7 +970,7 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
}
}
- while (B->succ_size() > 0)
+ while (!B->succ_empty())
B->removeSuccessor(B->succ_begin());
for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
diff --git a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index fcc880463925..c444cf557c21 100644
--- a/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -1070,20 +1070,18 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
std::set<Register> &UpdRegs) {
bool Changed = false;
- MachineBasicBlock::iterator I, E, NextI;
- for (I = B.begin(), E = B.end(); I != E; I = NextI) {
- NextI = std::next(I);
- unsigned Opc = I->getOpcode();
+ for (MachineInstr &MI : llvm::make_early_inc_range(B)) {
+ unsigned Opc = MI.getOpcode();
if (Opc == Hexagon::A2_tfrt || Opc == Hexagon::A2_tfrf) {
- bool Done = predicate(*I, (Opc == Hexagon::A2_tfrt), UpdRegs);
+ bool Done = predicate(MI, (Opc == Hexagon::A2_tfrt), UpdRegs);
if (!Done) {
// If we didn't predicate I, we may need to remove it in case it is
// an "identity" copy, e.g. %1 = A2_tfrt %2, %1.
- if (RegisterRef(I->getOperand(0)) == RegisterRef(I->getOperand(2))) {
- for (auto &Op : I->operands())
+ if (RegisterRef(MI.getOperand(0)) == RegisterRef(MI.getOperand(2))) {
+ for (auto &Op : MI.operands())
if (Op.isReg())
UpdRegs.insert(Op.getReg());
- removeInstr(*I);
+ removeInstr(MI);
}
}
Changed |= Done;
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 5b782543b3b4..bff596e69efd 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -281,11 +281,10 @@ static unsigned getMaxCalleeSavedReg(ArrayRef<CalleeSavedInfo> CSI,
/// frame to be already in place.
static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
const HexagonRegisterInfo &HRI) {
- for (auto &I : MBB) {
- const MachineInstr *MI = &I;
- if (MI->isCall())
+ for (const MachineInstr &MI : MBB) {
+ if (MI.isCall())
return true;
- unsigned Opc = MI->getOpcode();
+ unsigned Opc = MI.getOpcode();
switch (Opc) {
case Hexagon::PS_alloca:
case Hexagon::PS_aligna:
@@ -294,7 +293,7 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
break;
}
// Check individual operands.
- for (const MachineOperand &MO : MI->operands()) {
+ for (const MachineOperand &MO : MI.operands()) {
// While the presence of a frame index does not prove that a stack
// frame will be required, all frame indexes should be within alloc-
// frame/deallocframe. Otherwise, the code that translates a frame
@@ -343,8 +342,8 @@ static bool hasTailCall(const MachineBasicBlock &MBB) {
/// Returns true if MBB contains an instruction that returns.
static bool hasReturn(const MachineBasicBlock &MBB) {
- for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
- if (I->isReturn())
+ for (const MachineInstr &MI : MBB.terminators())
+ if (MI.isReturn())
return true;
return false;
}
@@ -425,11 +424,10 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
// city don't do it right now.
for (auto &I : MF) {
unsigned BN = RPO[I.getNumber()];
- for (auto SI = I.succ_begin(), SE = I.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock *Succ : I.successors())
// If found a back-edge, return.
- if (RPO[(*SI)->getNumber()] <= BN)
+ if (RPO[Succ->getNumber()] <= BN)
return;
- }
}
// Collect the set of blocks that need a stack frame to execute. Scan
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index f2026877b22c..02da2f29591a 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -765,10 +765,7 @@ unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
unsigned MaxD = 0;
- using pred_iterator = MachineBasicBlock::const_pred_iterator;
-
- for (pred_iterator I = ToB->pred_begin(), E = ToB->pred_end(); I != E; ++I) {
- const MachineBasicBlock *PB = *I;
+ for (const MachineBasicBlock *PB : ToB->predecessors()) {
// Skip back edges. Also, if FromB is a predecessor of ToB, the distance
// along that path will be 0, and we don't need to do any calculations
// on it.
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index 07f85e69abba..cf4f13fb8c0d 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -183,12 +183,11 @@ void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
unsigned NR = HRI->getNumRegs();
BitVector Defs(NR), Uses(NR);
- for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
- MachineInstr *MI = &*I;
- I2X.insert(std::make_pair(MI, Index));
+ for (MachineInstr &MI : B) {
+ I2X.insert(std::make_pair(&MI, Index));
Defs.reset();
Uses.reset();
- getDefsUses(MI, Defs, Uses);
+ getDefsUses(&MI, Defs, Uses);
DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses)));
Index++;
}
@@ -232,22 +231,19 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
CondsetMap CM;
MuxInfoList ML;
- MachineBasicBlock::iterator NextI, End = B.end();
- for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
- MachineInstr *MI = &*I;
- NextI = std::next(I);
- unsigned Opc = MI->getOpcode();
+ for (MachineInstr &MI : llvm::make_early_inc_range(B)) {
+ unsigned Opc = MI.getOpcode();
if (!isCondTransfer(Opc))
continue;
- Register DR = MI->getOperand(0).getReg();
+ Register DR = MI.getOperand(0).getReg();
if (isRegPair(DR))
continue;
- MachineOperand &PredOp = MI->getOperand(1);
+ MachineOperand &PredOp = MI.getOperand(1);
if (PredOp.isUndef())
continue;
Register PR = PredOp.getReg();
- unsigned Idx = I2X.lookup(MI);
+ unsigned Idx = I2X.lookup(&MI);
CondsetMap::iterator F = CM.find(DR);
bool IfTrue = HII->isPredicatedTrue(Opc);
@@ -360,21 +356,21 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
return true;
return false;
};
- for (auto I = B.rbegin(), E = B.rend(); I != E; ++I) {
- if (I->isDebugInstr())
+ for (MachineInstr &I : llvm::reverse(B)) {
+ if (I.isDebugInstr())
continue;
// This isn't 100% accurate, but it's safe.
// It won't detect (as a kill) a case like this
// r0 = add r0, 1 <-- r0 should be "killed"
// ... = r0
- for (MachineOperand &Op : I->operands()) {
+ for (MachineOperand &Op : I.operands()) {
if (!Op.isReg() || !Op.isUse())
continue;
assert(Op.getSubReg() == 0 && "Should have physical registers only");
bool Live = IsLive(Op.getReg());
Op.setIsKill(!Live);
}
- LPR.stepBackward(*I);
+ LPR.stepBackward(I);
}
return Changed;
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 58f3cd55ee9f..a4971ad712eb 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -468,7 +468,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
return false;
Register CmpReg1, CmpReg2;
- int CmpImm = 0, CmpMask = 0;
+ int64_t CmpImm = 0, CmpMask = 0;
bool CmpAnalyzed =
TII->analyzeCompare(*PredI, CmpReg1, CmpReg2, CmpMask, CmpImm);
// Fail if the compare was not analyzed, or it's not comparing a register
@@ -652,7 +652,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
unsigned CondOpc = CondI->getOpcode();
Register CmpReg1, CmpReg2;
- int Mask = 0, ImmValue = 0;
+ int64_t Mask = 0, ImmValue = 0;
bool AnalyzedCmp =
TII->analyzeCompare(*CondI, CmpReg1, CmpReg2, Mask, ImmValue);
if (!AnalyzedCmp)
@@ -1094,15 +1094,15 @@ void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
if (!MO.isReg() || !MO.isDef())
continue;
Register Reg = MO.getReg();
- MachineRegisterInfo::use_iterator nextI;
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
- E = MRI->use_end(); I != E; I = nextI) {
- nextI = std::next(I); // I is invalidated by the setReg
- MachineInstr *UseMI = I->getParent();
+ // We use make_early_inc_range here because setReg below invalidates the
+ // iterator.
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_operands(Reg))) {
+ MachineInstr *UseMI = MO.getParent();
if (UseMI == MI)
continue;
- if (I->isDebug())
- I->setReg(0U);
+ if (MO.isDebug())
+ MO.setReg(0U);
}
}
@@ -1453,7 +1453,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
E = MRI->use_instr_nodbg_end(); I != E; ++I) {
MachineInstr *MI = &*I;
Register CmpReg1, CmpReg2;
- int CmpMask = 0, CmpValue = 0;
+ int64_t CmpMask = 0, CmpValue = 0;
if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index fd404a156903..2679e399852f 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -990,7 +990,7 @@ void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
auto IsZero = [] (const SDValue &V) -> bool {
if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
- return SC->isNullValue();
+ return SC->isZero();
return false;
};
auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
@@ -2247,8 +2247,8 @@ SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
}
void HexagonDAGToDAGISel::rebalanceAddressTrees() {
- for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
- SDNode *N = &*I++;
+ for (SDNode &Node : llvm::make_early_inc_range(CurDAG->allnodes())) {
+ SDNode *N = &Node;
if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 6ded323a34c3..29572e3106d1 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1231,7 +1231,7 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
if (RM == Reloc::Static) {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
- const GlobalObject *GO = GV->getBaseObject();
+ const GlobalObject *GO = GV->getAliaseeObject();
if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
@@ -2556,7 +2556,7 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
// Extracting the lowest bit is a no-op, but it changes the type,
// so it must be kept as an operation to avoid errors related to
// type mismatches.
- if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
+ if (IdxN->isZero() && ValTy.getSizeInBits() == 1)
return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index e7d3c7c24f34..8900fca8bb78 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -525,7 +525,7 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
if (IsSplat) {
assert(SplatV.getNode());
auto *IdxN = dyn_cast<ConstantSDNode>(SplatV.getNode());
- if (IdxN && IdxN->isNullValue())
+ if (IdxN && IdxN->isZero())
return getZero(dl, VecTy, DAG);
MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
@@ -743,12 +743,12 @@ HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
auto IsTrue = [] (SDValue V) {
if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
- return !N->isNullValue();
+ return !N->isZero();
return false;
};
auto IsFalse = [] (SDValue V) {
if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
- return N->isNullValue();
+ return N->isZero();
return false;
};
@@ -1065,7 +1065,7 @@ HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
// Convert IdxV to be index in bytes.
auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
- if (!IdxN || !IdxN->isNullValue()) {
+ if (!IdxN || !IdxN->isZero()) {
IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
DAG.getConstant(ElemWidth/8, dl, MVT::i32));
SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
@@ -1088,7 +1088,7 @@ HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
RolBase = HwLen-4;
}
// If the vector wasn't ror'ed, don't ror it back.
- if (RolBase != 4 || !IdxN || !IdxN->isNullValue()) {
+ if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
@@ -1125,7 +1125,7 @@ HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
SDValue ByteIdx;
auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
- if (!IdxN || !IdxN->isNullValue()) {
+ if (!IdxN || !IdxN->isZero()) {
ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
DAG.getConstant(BitBytes, dl, MVT::i32));
ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
@@ -1140,7 +1140,7 @@ HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
{DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
// Rotate ByteVec back, and convert to a vector predicate.
- if (!IdxN || !IdxN->isNullValue()) {
+ if (!IdxN || !IdxN->isZero()) {
SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
@@ -1594,15 +1594,15 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
- SDValue ValQ = Op.getOperand(0);
+ SDValue Val = Op.getOperand(0);
MVT ResTy = ty(Op);
- MVT VecTy = ty(ValQ);
+ MVT ValTy = ty(Val);
const SDLoc &dl(Op);
- if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) {
+ if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
unsigned HwLen = Subtarget.getVectorLength();
MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
- SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG);
+ SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
unsigned BitWidth = ResTy.getSizeInBits();
if (BitWidth < 64) {
@@ -1635,6 +1635,39 @@ HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
}
+ if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
+ // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
+ unsigned BitWidth = ValTy.getSizeInBits();
+ unsigned HwLen = Subtarget.getVectorLength();
+ assert(BitWidth == HwLen);
+
+ MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
+ SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
+ // Splat each byte of Val 8 times.
+ // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
+ // where b0, b1,..., b15 are least to most significant bytes of I.
+ SmallVector<SDValue, 128> Bytes;
+ // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
+ // These are bytes with the LSB rotated left with respect to their index.
+ SmallVector<SDValue, 128> Tmp;
+ for (unsigned I = 0; I != HwLen / 8; ++I) {
+ SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
+ SDValue Byte =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
+ for (unsigned J = 0; J != 8; ++J) {
+ Bytes.push_back(Byte);
+ Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
+ }
+ }
+
+ MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
+ SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
+ SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
+
+ // Each Byte in the I2V will be set iff corresponding bit is set in Val.
+ I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
+ return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
+ }
return Op;
}
@@ -2255,8 +2288,8 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
case HexagonISD::V2Q:
if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
- return C->isNullValue() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
- : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
+ return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
+ : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
}
break;
case HexagonISD::Q2V:
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index ef2b3040931d..45adaf50774f 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -268,8 +268,7 @@ class OpcodeDuplex {
let Inst{12-0} = ISubLo;
}
-class InstDuplex<bits<4> iClass, list<dag> pattern = [],
- string cstr = "">
+class InstDuplex<bits<4> iClass, string cstr = "">
: Instruction, OpcodeDuplex {
let Namespace = "Hexagon";
IType Type = TypeDUPLEX; // uses slot 0,1
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index f14eaacbf071..76220eff4d51 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -169,13 +169,13 @@ MachineInstr *HexagonInstrInfo::findLoopInstr(MachineBasicBlock *BB,
continue;
if (PB == BB)
continue;
- for (auto I = PB->instr_rbegin(), E = PB->instr_rend(); I != E; ++I) {
- unsigned Opc = I->getOpcode();
+ for (MachineInstr &I : llvm::reverse(PB->instrs())) {
+ unsigned Opc = I.getOpcode();
if (Opc == LOOPi || Opc == LOOPr)
- return &*I;
+ return &I;
// We've reached a different loop, which means the loop01 has been
// removed.
- if (Opc == EndLoopOp && I->getOperand(0).getMBB() != TargetBB)
+ if (Opc == EndLoopOp && I.getOperand(0).getMBB() != TargetBB)
return nullptr;
}
// Check the predecessors for the LOOP instruction.
@@ -1791,8 +1791,8 @@ HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask,
- int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
unsigned Opc = MI.getOpcode();
// Set mask and the first source register.
@@ -3627,8 +3627,8 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
- report_fatal_error(std::string("Unknown .new type: ") +
- std::to_string(MI.getOpcode()));
+ report_fatal_error(Twine("Unknown .new type: ") +
+ std::to_string(MI.getOpcode()));
case Hexagon::S4_storerb_ur:
return Hexagon::S4_storerbnew_ur;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 11717996935d..eaaf9f7046c7 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -270,7 +270,8 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const override;
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const override;
/// Compute the instruction latency of a given instruction.
/// If the instruction has higher cost when predicated, it's returned via
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 76cc8f402c5a..ccaf1aac1ce0 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -1351,8 +1351,8 @@ bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
// be unshifted.
if (!commutesWithShift(R))
return false;
- for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
- auto *T = cast<Instruction>(*I);
+ for (User *U : R->users()) {
+ auto *T = cast<Instruction>(U);
// Skip users from outside of the loop. They will be handled later.
// Also, skip the right-shifts and phi nodes, since they mix early
// and late values.
@@ -1490,10 +1490,8 @@ void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
I.replaceAllUsesWith(SV);
- for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
- N = std::next(I);
- RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
- }
+ for (Instruction &I : llvm::make_early_inc_range(*LoopB))
+ RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI);
}
unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
@@ -2247,8 +2245,7 @@ CleanupAndExit:
DT->addNewBlock(MemmoveB, Preheader);
// Find the new immediate dominator of the exit block.
BasicBlock *ExitD = Preheader;
- for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
- BasicBlock *PB = *PI;
+ for (BasicBlock *PB : predecessors(ExitB)) {
ExitD = DT->findNearestCommonDominator(ExitD, PB);
if (!ExitD)
break;
diff --git a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 0e6555024303..47bebf77b31b 100644
--- a/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -286,9 +286,6 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
}
void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
- if (SU->isScheduled)
- return;
-
for (const SDep &PI : SU->Preds) {
unsigned PredReadyCycle = PI.getSUnit()->TopReadyCycle;
unsigned MinLatency = PI.getLatency();
@@ -298,13 +295,12 @@ void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
SU->TopReadyCycle = PredReadyCycle + MinLatency;
}
- Top.releaseNode(SU, SU->TopReadyCycle);
+
+ if (!SU->isScheduled)
+ Top.releaseNode(SU, SU->TopReadyCycle);
}
void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
- if (SU->isScheduled)
- return;
-
assert(SU->getInstr() && "Scheduled SUnit must have instr");
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -317,7 +313,9 @@ void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
SU->BotReadyCycle = SuccReadyCycle + MinLatency;
}
- Bot.releaseNode(SU, SU->BotReadyCycle);
+
+ if (!SU->isScheduled)
+ Bot.releaseNode(SU, SU->BotReadyCycle);
}
/// Does this SU have a hazard within the current instruction group.
diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 8dc1113194a8..8edcb745d654 100644
--- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -535,13 +535,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
// I am doing this only because LLVM does not provide LiveOut
// at the BB level.
bool predLive = false;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SIE = MBB->succ_end();
- SI != SIE; ++SI) {
- MachineBasicBlock *succMBB = *SI;
- if (succMBB->isLiveIn(predReg))
+ for (const MachineBasicBlock *SuccMBB : MBB->successors())
+ if (SuccMBB->isLiveIn(predReg))
predLive = true;
- }
if (predLive)
break;
diff --git a/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
index e026bb6d601d..bfd02802b782 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -67,26 +67,23 @@ bool HexagonOptimizeSZextends::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- unsigned Idx = 1;
+ unsigned Idx = 0;
// Try to optimize sign extends in formal parameters. It's relying on
// callee already sign extending the values. I'm not sure if our ABI
// requires callee to sign extend though.
for (auto &Arg : F.args()) {
- if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+ if (F.getAttributes().hasParamAttr(Idx, Attribute::SExt)) {
if (!isa<PointerType>(Arg.getType())) {
- for (auto UI = Arg.use_begin(); UI != Arg.use_end();) {
- if (isa<SExtInst>(*UI)) {
- Instruction* Use = cast<Instruction>(*UI);
+ for (Use &U : llvm::make_early_inc_range(Arg.uses())) {
+ if (isa<SExtInst>(U)) {
+ Instruction* Use = cast<Instruction>(U);
SExtInst* SI = new SExtInst(&Arg, Use->getType());
assert (EVT::getEVT(SI->getType()) ==
(EVT::getEVT(Use->getType())));
- ++UI;
Use->replaceAllUsesWith(SI);
Instruction* First = &F.getEntryBlock().front();
SI->insertBefore(First);
Use->eraseFromParent();
- } else {
- ++UI;
}
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td
index 20c939577586..11f8af7c41a0 100644
--- a/llvm/lib/Target/Hexagon/HexagonPseudo.td
+++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td
@@ -198,7 +198,7 @@ def PS_callr_nr: InstHexagon<(outs), (ins IntRegs:$Rs),
let isCall = 1, hasSideEffects = 1,
isExtended = 0, isExtendable = 1, opExtendable = 0, isCodeGenOnly = 1,
BaseOpcode = "PS_call_nr", isExtentSigned = 1, opExtentAlign = 2 in
-class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops,
+class Call_nr<bits<5> nbits, bit isFalse, dag iops,
InstrItinClass itin>
: Pseudo<(outs), iops, "">, PredRel {
bits<2> Pu;
@@ -210,7 +210,7 @@ class Call_nr<bits<5> nbits, bit isPred, bit isFalse, dag iops,
let Itinerary = itin;
}
-def PS_call_nr : Call_nr<24, 0, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>;
+def PS_call_nr : Call_nr<24, 0, (ins s32_0Imm:$Ii), J2_call.Itinerary>;
//def PS_call_nrt: Call_nr<17, 1, 0, (ins PredRegs:$Pu, s32_0Imm:$dst),
// J2_callt.Itinerary>;
//def PS_call_nrf: Call_nr<17, 1, 1, (ins PredRegs:$Pu, s32_0Imm:$dst),
@@ -267,7 +267,7 @@ let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
class CondStr<string CReg, bit True, bit New> {
string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
}
-class JumpOpcStr<string Mnemonic, bit New, bit Taken> {
+class JumpOpcStr<string Mnemonic, bit Taken> {
string S = Mnemonic # !if(Taken, ":t", ":nt");
}
let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
@@ -275,7 +275,7 @@ let isBranch = 1, isIndirectBranch = 1, Defs = [PC], isPredicated = 1,
class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak, InstHexagon rootInst>
: InstHexagon<(outs), (ins PredRegs:$src, IntRegs:$dst),
CondStr<"$src", !if(PredNot,0,1), isPredNew>.S #
- JumpOpcStr<"jumpr", isPredNew, isTak>.S # " $dst",
+ JumpOpcStr<"jumpr", isTak>.S # " $dst",
[], "", rootInst.Itinerary, rootInst.Type>, OpcodeHexagon {
let isTaken = isTak;
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 49428db223a1..8b7138d3c809 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -31,6 +31,19 @@ let Namespace = "Hexagon" in {
let HWEncoding{4-0} = num;
}
+ class HexagonSys<bits<7> num, string n, list<string> alt = [],
+ list<Register> alias = []> : Register<n, alt> {
+ let Aliases = alias;
+ let HWEncoding{6-0} = num;
+ }
+
+ class HexagonDoubleSys<bits<7> num, string n, list<Register> subregs,
+ list<string> alt = []> :
+ RegisterWithSubRegs<n, subregs> {
+ let AltNames = alt;
+ let HWEncoding{6-0} = num;
+ }
+
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers.
class Ri<bits<5> num, string n, list<string> alt = []> :
@@ -74,6 +87,18 @@ let Namespace = "Hexagon" in {
let SubRegs = subregs;
}
+ // Rs - system registers
+ class Rs<bits<7> num, string n,
+ list<string> alt = [], list<Register> alias = []> :
+ HexagonSys<num, n, alt, alias>;
+
+ // Rss - 64-bit system registers.
+ class Rss<bits<7> num, string n, list<Register> subregs,
+ list<string> alt = []> :
+ HexagonDoubleSys<num, n, subregs, alt> {
+ let SubRegs = subregs;
+ }
+
// Mx - address modifier registers
class Mx<bits<1> num, string n> : Register<n, []> {
let HWEncoding{0} = num;
@@ -260,6 +285,134 @@ let Namespace = "Hexagon" in {
def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+ // System registers.
+ def SGP0 : Rs<0, "sgp0", ["s0"]>, DwarfRegNum<[144]>;
+ def SGP1 : Rs<1, "sgp1", ["s1"]>, DwarfRegNum<[145]>;
+ def STID : Rs<2, "stid", ["s2"]>, DwarfRegNum<[146]>;
+ def ELR : Rs<3, "elr", ["s3"]>, DwarfRegNum<[147]>;
+ def BADVA0 : Rs<4, "badva0", ["s4"]>, DwarfRegNum<[148]>;
+ def BADVA1 : Rs<5, "badva1", ["s5"]>, DwarfRegNum<[149]>;
+ def SSR : Rs<6, "ssr", ["s6"]>, DwarfRegNum<[150]>;
+ def CCR : Rs<7, "ccr", ["s7"]>, DwarfRegNum<[151]>;
+ def HTID : Rs<8, "htid", ["s8"]>, DwarfRegNum<[152]>;
+ def BADVA : Rs<9, "badva", ["s9"]>, DwarfRegNum<[153]>;
+ def IMASK : Rs<10, "imask", ["s10"]>, DwarfRegNum<[154]>;
+ def S11 : Rs<11, "s11">, DwarfRegNum<[155]>;
+ def S12 : Rs<12, "s12">, DwarfRegNum<[156]>;
+ def S13 : Rs<13, "s13">, DwarfRegNum<[157]>;
+ def S14 : Rs<14, "s14">, DwarfRegNum<[158]>;
+ def S15 : Rs<15, "s15">, DwarfRegNum<[159]>;
+ def EVB : Rs<16, "evb", ["s16"]>, DwarfRegNum<[160]>;
+ def MODECTL : Rs<17, "modectl", ["s17"]>, DwarfRegNum<[161]>;
+ def SYSCFG : Rs<18, "syscfg", ["s18"]>, DwarfRegNum<[162]>;
+ def S19 : Rs<19, "s19", ["s19"]>, DwarfRegNum<[163]>;
+ def S20 : Rs<20, "s20", ["s20"]>, DwarfRegNum<[164]>;
+ def VID : Rs<21, "vid", ["s21"]>, DwarfRegNum<[165]>;
+ def S22 : Rs<22, "s22", ["s22"]>, DwarfRegNum<[166]>;
+ def S23 : Rs<23, "s23">, DwarfRegNum<[167]>;
+ def S24 : Rs<24, "s24">, DwarfRegNum<[168]>;
+ def S25 : Rs<25, "s25">, DwarfRegNum<[169]>;
+ def S26 : Rs<26, "s26">, DwarfRegNum<[170]>;
+ def CFGBASE : Rs<27, "cfgbase", ["s27"]>, DwarfRegNum<[171]>;
+ def DIAG : Rs<28, "diag", ["s28"]>, DwarfRegNum<[172]>;
+ def REV : Rs<29, "rev", ["s29"]>, DwarfRegNum<[173]>;
+ def PCYCLELO : Rs<30, "pcyclelo", ["s30"]>, DwarfRegNum<[174]>;
+ def PCYCLEHI : Rs<31, "pcyclehi", ["s31"]>, DwarfRegNum<[175]>;
+ def ISDBST : Rs<32, "isdbst", ["s32"]>, DwarfRegNum<[176]>;
+ def ISDBCFG0 : Rs<33, "isdbcfg0", ["s33"]>, DwarfRegNum<[177]>;
+ def ISDBCFG1 : Rs<34, "isdbcfg1", ["s34"]>, DwarfRegNum<[178]>;
+ def S35 : Rs<35, "s35">, DwarfRegNum<[179]>;
+ def BRKPTPC0 : Rs<36, "brkptpc0", ["s36"]>, DwarfRegNum<[180]>;
+ def BRKPTCFG0: Rs<37, "brkptcfg0", ["s37"]>, DwarfRegNum<[181]>;
+ def BRKPTPC1 : Rs<38, "brkptpc1", ["s38"]>, DwarfRegNum<[182]>;
+ def BRKPTCFG1: Rs<39, "brkptcfg1", ["s39"]>, DwarfRegNum<[183]>;
+ def ISDBMBXIN: Rs<40, "isdbmbxin", ["s40"]>, DwarfRegNum<[184]>;
+ def ISDBMBXOUT: Rs<41, "isdbmbxout", ["s41"]>, DwarfRegNum<[185]>;
+ def ISDBEN: Rs<42, "isdben", ["s42"]>, DwarfRegNum<[186]>;
+ def ISDBGPR: Rs<43, "isdbgpr", ["s43"]>, DwarfRegNum<[187]>;
+ def S44: Rs<44, "s44">, DwarfRegNum<[188]>;
+ def S45: Rs<45, "s45">, DwarfRegNum<[189]>;
+ def S46: Rs<46, "s46">, DwarfRegNum<[190]>;
+ def S47: Rs<47, "s47">, DwarfRegNum<[191]>;
+ def PMUCNT0: Rs<48, "pmucnt0", ["s48"]>, DwarfRegNum<[192]>;
+ def PMUCNT1: Rs<49, "pmucnt1", ["s49"]>, DwarfRegNum<[193]>;
+ def PMUCNT2: Rs<50, "pmucnt2", ["s50"]>, DwarfRegNum<[194]>;
+ def PMUCNT3: Rs<51, "pmucnt3", ["s51"]>, DwarfRegNum<[195]>;
+ def PMUEVTCFG: Rs<52, "pmuevtcfg", ["s52"]>, DwarfRegNum<[196]>;
+ def PMUCFG: Rs<53, "pmucfg", ["s53"]>, DwarfRegNum<[197]>;
+ def S54: Rs<54, "s54">, DwarfRegNum<[198]>;
+ def S55: Rs<55, "s55">, DwarfRegNum<[199]>;
+ def S56: Rs<56, "s56">, DwarfRegNum<[200]>;
+ def S57: Rs<57, "s57">, DwarfRegNum<[201]>;
+ def S58: Rs<58, "s58">, DwarfRegNum<[202]>;
+ def S59: Rs<59, "s59">, DwarfRegNum<[203]>;
+ def S60: Rs<60, "s60">, DwarfRegNum<[204]>;
+ def S61: Rs<61, "s61">, DwarfRegNum<[205]>;
+ def S62: Rs<62, "s62">, DwarfRegNum<[206]>;
+ def S63: Rs<63, "s63">, DwarfRegNum<[207]>;
+ def S64: Rs<64, "s64">, DwarfRegNum<[208]>;
+ def S65: Rs<65, "s65">, DwarfRegNum<[209]>;
+ def S66: Rs<66, "s66">, DwarfRegNum<[210]>;
+ def S67: Rs<67, "s67">, DwarfRegNum<[211]>;
+ def S68: Rs<68, "s68">, DwarfRegNum<[212]>;
+ def S69: Rs<69, "s69">, DwarfRegNum<[213]>;
+ def S70: Rs<70, "s70">, DwarfRegNum<[214]>;
+ def S71: Rs<71, "s71">, DwarfRegNum<[215]>;
+ def S72: Rs<72, "s72">, DwarfRegNum<[216]>;
+ def S73: Rs<73, "s73">, DwarfRegNum<[217]>;
+ def S74: Rs<74, "s74">, DwarfRegNum<[218]>;
+ def S75: Rs<75, "s75">, DwarfRegNum<[219]>;
+ def S76: Rs<76, "s76">, DwarfRegNum<[220]>;
+ def S77: Rs<77, "s77">, DwarfRegNum<[221]>;
+ def S78: Rs<78, "s78">, DwarfRegNum<[222]>;
+ def S79: Rs<79, "s79">, DwarfRegNum<[223]>;
+ def S80: Rs<80, "s80">, DwarfRegNum<[224]>;
+
+ // System Register Pair
+ let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
+ def SGP1_0 : Rss<0, "s1:0", [SGP0, SGP1], ["sgp1:0"]>, DwarfRegNum<[144]>;
+ def S3_2 : Rss<2, "s3:2", [STID, ELR]>, DwarfRegNum<[146]>;
+ def S5_4 : Rss<4, "s5:4", [BADVA0, BADVA1], ["badva1:0"]>,
+ DwarfRegNum<[148]>;
+ def S7_6 : Rss<6, "s7:6", [SSR, CCR], ["ccr:ssr"]>, DwarfRegNum<[150]>;
+ def S9_8 : Rss<8, "s9:8", [HTID, BADVA]>, DwarfRegNum<[152]>;
+ def S11_10 : Rss<10, "s11:10", [IMASK, S11]>, DwarfRegNum<[154]>;
+ def S13_12 : Rss<12, "s13:12", [S12, S13]>, DwarfRegNum<[156]>;
+ def S15_14 : Rss<14, "s15:14", [S14, S15]>, DwarfRegNum<[158]>;
+ def S17_16 : Rss<16, "s17:16", [EVB, MODECTL]>, DwarfRegNum<[160]>;
+ def S19_18 : Rss<18, "s19:18", [SYSCFG, S19]>, DwarfRegNum<[162]>;
+ def S21_20 : Rss<20, "s21:20", [S20, VID]>, DwarfRegNum<[164]>;
+ def S23_22 : Rss<22, "s23:22", [S22, S23]>, DwarfRegNum<[166]>;
+ def S25_24 : Rss<24, "s25:24", [S24, S25]>, DwarfRegNum<[168]>;
+ def S27_26 : Rss<26, "s27:26", [S26, CFGBASE]>, DwarfRegNum<[170]>;
+ def S29_28 : Rss<28, "s29:28", [DIAG, REV]>, DwarfRegNum<[172]>;
+ def S31_30 : Rss<30, "s31:30", [PCYCLELO, PCYCLEHI], ["pcycle"]>, DwarfRegNum<[174]>;
+ def S33_32 : Rss<32, "s33:32", [ISDBST, ISDBCFG0]>, DwarfRegNum<[176]>;
+ def S35_34 : Rss<34, "s35:34", [ISDBCFG1, S35]>, DwarfRegNum<[178]>;
+ def S37_36 : Rss<36, "s37:36", [BRKPTPC0, BRKPTCFG0]>, DwarfRegNum<[180]>;
+ def S39_38 : Rss<38, "s39:38", [BRKPTPC1, BRKPTCFG1]>, DwarfRegNum<[182]>;
+ def S41_40 : Rss<40, "s41:40", [ISDBMBXIN, ISDBMBXOUT]>, DwarfRegNum<[184]>;
+ def S43_42 : Rss<42, "s43:42", [ISDBEN, ISDBGPR]>, DwarfRegNum<[186]>;
+ def S45_44 : Rss<44, "s45:44", [S44, S45]>, DwarfRegNum<[188]>;
+ def S47_46 : Rss<46, "s47:46", [S46, S47]>, DwarfRegNum<[190]>;
+ def S49_48 : Rss<48, "s49:48", [PMUCNT0, PMUCNT1]>, DwarfRegNum<[192]>;
+ def S51_50 : Rss<50, "s51:50", [PMUCNT2, PMUCNT3]>, DwarfRegNum<[194]>;
+ def S53_52 : Rss<52, "s53:52", [PMUEVTCFG, PMUCFG]>, DwarfRegNum<[196]>;
+ def S55_54 : Rss<54, "s55:54", [S54, S55]>, DwarfRegNum<[198]>;
+ def S57_56 : Rss<56, "s57:56", [S56, S57]>, DwarfRegNum<[200]>;
+ def S59_58 : Rss<58, "s59:58", [S58, S59]>, DwarfRegNum<[202]>;
+ def S61_60 : Rss<60, "s61:60", [S60, S61]>, DwarfRegNum<[204]>;
+ def S63_62 : Rss<62, "s63:62", [S62, S63]>, DwarfRegNum<[206]>;
+ def S65_64 : Rss<64, "s65:64", [S64, S65]>, DwarfRegNum<[208]>;
+ def S67_66 : Rss<66, "s67:66", [S66, S67]>, DwarfRegNum<[210]>;
+ def S69_68 : Rss<68, "s69:68", [S68, S69]>, DwarfRegNum<[212]>;
+ def S71_70 : Rss<70, "s71:70", [S70, S71]>, DwarfRegNum<[214]>;
+ def S73_72 : Rss<72, "s73:72", [S72, S73]>, DwarfRegNum<[216]>;
+ def S75_74 : Rss<74, "s75:74", [S74, S75]>, DwarfRegNum<[218]>;
+ def S77_76 : Rss<76, "s77:76", [S77, S76]>, DwarfRegNum<[219]>;
+ def S79_78 : Rss<78, "s79:78", [S79, S78]>, DwarfRegNum<[220]>;
+ }
+
// Guest Registers
def GELR: Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>;
def GSR: Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>;
@@ -432,6 +585,40 @@ def GuestRegs64 : RegisterClass<"Hexagon", [i64], 64,
G25_24, G27_26, G29_28,
G31_30)>;
+let Size = 32, isAllocatable = 0 in
+def SysRegs : RegisterClass<"Hexagon", [i32], 32,
+ (add SGP0, SGP1, STID, ELR, BADVA0, BADVA1,
+ SSR, CCR, HTID, BADVA, IMASK,
+ S11, S12, S13, S14, S15,
+ S19, S23, S25,
+ EVB, MODECTL, SYSCFG, S20, VID, S22, S24,
+ S26, CFGBASE, DIAG, REV, PCYCLEHI,
+ PCYCLELO, ISDBST, ISDBCFG0, ISDBCFG1, S35,
+ BRKPTPC0, BRKPTCFG0, BRKPTPC1, BRKPTCFG1,
+ ISDBMBXIN, ISDBMBXOUT, ISDBEN, ISDBGPR,
+ S44, S45, S46, S47,
+ PMUCNT0, PMUCNT1, PMUCNT2, PMUCNT3,
+ PMUEVTCFG, PMUCFG, S54, S55, S56, S57,
+ S58, S59, S60, S61, S62, S63, S64, S65, S66, S67,
+ S68, S69, S70, S71, S72, S73, S74, S75, S76, S77,
+ S78, S79, S80
+ )>;
+
+let Size = 64, isAllocatable = 0 in
+def SysRegs64 : RegisterClass<"Hexagon", [i64], 64,
+ (add SGP1_0,
+ S3_2, S5_4, S7_6, S9_8,
+ S11_10, S13_12, S15_14,
+ S17_16, S19_18, S21_20,
+ S23_22, S25_24,
+ S27_26, S29_28, S31_30, S33_32, S35_34,
+ S37_36, S39_38, S41_40, S43_42, S45_44,
+ S47_46, S49_48, S51_50, S53_52,
+ S55_54, S57_56, S59_58,
+ S61_60, S63_62, S65_64, S67_66, S69_68,
+ S71_70, S73_72, S75_74, S77_76, S79_78
+ )>;
+
// These registers are new for v62 and onward.
// The function RegisterMatchesArch() uses this list for validation.
let isAllocatable = 0 in
diff --git a/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index f9fb14c190ff..4890c3dbb7bc 100644
--- a/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -70,9 +70,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
// Loop over all of the basic blocks
for (MachineBasicBlock &B : Fn) {
- for (auto I = B.begin(), E = B.end(); I != E; ) {
- MachineInstr &MI = *I;
- ++I;
+ for (MachineInstr &MI : llvm::make_early_inc_range(B)) {
unsigned Opc = MI.getOpcode();
if (Opc == Hexagon::CONST32) {
diff --git a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 52452e9c6cd5..9a0f57fce97d 100644
--- a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -508,7 +508,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
while (CmpI->getOpcode() == Hexagon::C2_not)
CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
- int Mask = 0, Val = 0;
+ int64_t Mask = 0, Val = 0;
bool OkCI = TII->analyzeCompare(*CmpI, CmpR1, CmpR2, Mask, Val);
if (!OkCI)
return;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 3cbb4b591f8c..66de698182d7 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -23,9 +23,9 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
@@ -238,9 +238,9 @@ const HexagonSubtarget *
HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
AttributeList FnAttrs = F.getAttributes();
Attribute CPUAttr =
- FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-cpu");
+ FnAttrs.getFnAttr("target-cpu");
Attribute FSAttr =
- FnAttrs.getAttribute(AttributeList::FunctionIndex, "target-features");
+ FnAttrs.getFnAttr("target-features");
std::string CPU =
CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
@@ -280,11 +280,11 @@ void HexagonTargetMachine::adjustPassManager(PassManagerBuilder &PMB) {
void HexagonTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerLateLoopOptimizationsEPCallback(
- [=](LoopPassManager &LPM, PassBuilder::OptimizationLevel Level) {
+ [=](LoopPassManager &LPM, OptimizationLevel Level) {
LPM.addPass(HexagonLoopIdiomRecognitionPass());
});
PB.registerLoopOptimizerEndEPCallback(
- [=](LoopPassManager &LPM, PassBuilder::OptimizationLevel Level) {
+ [=](LoopPassManager &LPM, OptimizationLevel Level) {
LPM.addPass(HexagonVectorLoopCarriedReusePass());
});
}
@@ -447,11 +447,11 @@ void HexagonPassConfig::addPreEmitPass() {
}
// Packetization is mandatory: it handles gather/scatter at all opt levels.
- addPass(createHexagonPacketizer(NoOpt), false);
+ addPass(createHexagonPacketizer(NoOpt));
if (EnableVectorPrint)
- addPass(createHexagonVectorPrint(), false);
+ addPass(createHexagonVectorPrint());
// Add CFI instructions if necessary.
- addPass(createHexagonCallFrameInformation(), false);
+ addPass(createHexagonCallFrameInformation());
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 25466786ee41..7df32e4072e3 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -90,9 +90,8 @@ static bool isSmallDataSection(StringRef Sec) {
return true;
// If either ".sdata." or ".sbss." is a substring of the section name
// then put the symbol in small data.
- return Sec.find(".sdata.") != StringRef::npos ||
- Sec.find(".sbss.") != StringRef::npos ||
- Sec.find(".scommon.") != StringRef::npos;
+ return Sec.contains(".sdata.") || Sec.contains(".sbss.") ||
+ Sec.contains(".scommon.");
}
static const char *getSectionSuffixForSize(unsigned Size) {
@@ -178,10 +177,10 @@ MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
if (GO->hasSection()) {
StringRef Section = GO->getSection();
- if (Section.find(".access.text.group") != StringRef::npos)
+ if (Section.contains(".access.text.group"))
return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
- if (Section.find(".access.data.group") != StringRef::npos)
+ if (Section.contains(".access.data.group"))
return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS,
ELF::SHF_WRITE | ELF::SHF_ALLOC);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h b/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
index a5b14a7e0764..a99aa4f16a08 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetStreamer.h
@@ -16,6 +16,7 @@ class HexagonTargetStreamer : public MCTargetStreamer {
public:
HexagonTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
virtual void emitCodeAlignment(unsigned ByteAlignment,
+ const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0){};
virtual void emitFAlign(unsigned Size, unsigned MaxBytesToEmit){};
virtual void emitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 108027d79754..1bdd8c3c513a 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -64,7 +64,8 @@ HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
// The Hexagon target can unroll loops with run-time trip counts.
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
UP.Runtime = UP.Partial = true;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 2144fb27eb67..9e637dfc3e16 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -61,7 +61,8 @@ public:
// The Hexagon target can unroll loops with run-time trip counts.
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -120,10 +121,9 @@ public:
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+ InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
ArrayRef<int> Mask, int Index, Type *SubTp);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
@@ -133,16 +133,14 @@ public:
const Instruction *I);
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
CmpInst::Predicate VecPred,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index fa1ba4f2e469..1d325553f45a 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -203,6 +203,10 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
}
bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
+ // FIXME: This pass causes verification failures.
+ MF.getProperties().set(
+ MachineFunctionProperties::Property::FailsVerification);
+
auto &HST = MF.getSubtarget<HexagonSubtarget>();
HII = HST.getInstrInfo();
HRI = HST.getRegisterInfo();
@@ -230,16 +234,9 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
// dependence between Insn 0 and Insn 2. This can lead to incorrect
// packetization
for (MachineBasicBlock &MB : MF) {
- auto End = MB.end();
- auto MI = MB.begin();
- while (MI != End) {
- auto NextI = std::next(MI);
- if (MI->isKill()) {
- MB.erase(MI);
- End = MB.end();
- }
- MI = NextI;
- }
+ for (MachineInstr &MI : llvm::make_early_inc_range(MB))
+ if (MI.isKill())
+ MB.erase(&MI);
}
// TinyCore with Duplexes: Translate to big-instructions.
@@ -1156,12 +1153,9 @@ bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI,
void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
for (auto &B : MF) {
MachineBasicBlock::iterator BundleIt;
- MachineBasicBlock::instr_iterator NextI;
- for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
- NextI = std::next(I);
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::make_early_inc_range(B.instrs())) {
if (MI.isBundle())
- BundleIt = I;
+ BundleIt = MI.getIterator();
if (!MI.isInsideBundle())
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index f949a9327f7a..897fb209a8bf 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -82,6 +82,7 @@ public:
int getSizeOf(const Value *Val) const;
int getSizeOf(const Type *Ty) const;
+ int getAllocSizeOf(const Type *Ty) const;
int getTypeAlignment(Type *Ty) const;
VectorType *getByteVectorTy(int ScLen) const;
@@ -443,8 +444,8 @@ auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
auto *PtrTy = cast<PointerType>(Ptr->getType());
if (!PtrTy->isOpaque()) {
Type *ElemTy = PtrTy->getElementType();
- int ElemSize = HVC.getSizeOf(ElemTy);
- if (Adjust % ElemSize == 0) {
+ int ElemSize = HVC.getAllocSizeOf(ElemTy);
+ if (Adjust % ElemSize == 0 && Adjust != 0) {
Value *Tmp0 =
Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
@@ -979,6 +980,10 @@ auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int {
return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue();
}
+auto HexagonVectorCombine::getAllocSizeOf(const Type *Ty) const -> int {
+ return DL.getTypeAllocSize(const_cast<Type *>(Ty)).getFixedValue();
+}
+
auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
// The actual type may be shorter than the HVX vector, so determine
// the alignment based on subtarget info.
@@ -1326,7 +1331,7 @@ auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
return None;
Builder B(Gep0->getParent());
- int Scale = DL.getTypeStoreSize(Gep0->getSourceElementType());
+ int Scale = getAllocSizeOf(Gep0->getSourceElementType());
// FIXME: for now only check GEPs with a single index.
if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
@@ -1343,7 +1348,7 @@ auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
- if (Unknown.isAllOnesValue())
+ if (Unknown.isAllOnes())
return None;
Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
index 310536458de9..f973862a0c9b 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.cpp
@@ -386,8 +386,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
<< " can be reused\n");
SmallVector<Instruction *, 4> PNUsers;
- for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
- Use &U = *UI;
+ for (Use &U : PN->uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (User->getParent() != BB)
@@ -415,9 +414,7 @@ void HexagonVectorLoopCarriedReuse::findValueToReuse() {
// rematerialized in OtherBB, we may find more such "fixup" opportunities
// in this block. So, we'll start over again.
for (Instruction *I : PNUsers) {
- for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
- ++UI) {
- Use &U = *UI;
+ for (Use &U : BEInst->uses()) {
Instruction *BEUser = cast<Instruction>(U.getUser());
if (BEUser->getParent() != BB)
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 627c53cadd84..5e5a26fea076 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include <sstream>
@@ -686,10 +686,11 @@ public:
assert(Update && "Didn't find relaxation target");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
- static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP.
- ParseIn = 0x00004000, // In packet parse-bits.
- ParseEnd = 0x0000c000; // End of packet parse-bits.
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
+ static const uint32_t Nopcode = 0x7f000000, // Hard-coded NOP.
+ ParseIn = 0x00004000, // In packet parse-bits.
+ ParseEnd = 0x0000c000; // End of packet parse-bits.
while (Count % HEXAGON_INSTR_SIZE) {
LLVM_DEBUG(dbgs() << "Alignment not a multiple of the instruction size:"
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 24169c83bdb9..33b2e9a9e302 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -448,13 +448,12 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(const MCInst &MI,
++MCNumEmitted;
}
-LLVM_ATTRIBUTE_NORETURN
-static void raise_relocation_error(unsigned Width, unsigned Kind) {
+[[noreturn]] static void raise_relocation_error(unsigned Width, unsigned Kind) {
std::string Text;
raw_string_ostream Stream(Text);
Stream << "Unrecognized relocation combination: width=" << Width
<< " kind=" << Kind;
- report_fatal_error(Stream.str());
+ report_fatal_error(Twine(Stream.str()));
}
/// Some insns are not extended and thus have no bits. These cases require
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 32b0c610d63d..d832a756cb92 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -10,13 +10,13 @@
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "HexagonArch.h"
#include "HexagonTargetStreamer.h"
#include "MCTargetDesc/HexagonInstPrinter.h"
#include "MCTargetDesc/HexagonMCAsmInfo.h"
#include "MCTargetDesc/HexagonMCELFStreamer.h"
#include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "TargetInfo/HexagonTargetInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
@@ -32,8 +32,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 48770be3e301..ef9f9fd337fa 100644
--- a/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/HexagonTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheHexagonTarget() {
diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index e2642ddf722b..a994bd7e57a4 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -24,11 +24,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index b6f372657d59..57343784237d 100644
--- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -19,17 +19,13 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
-namespace llvm {
-Target &getTheLanaiTarget();
-}
-
static MCDisassembler *createLanaiDisassembler(const Target & /*T*/,
const MCSubtargetInfo &STI,
MCContext &Ctx) {
diff --git a/llvm/lib/Target/Lanai/LanaiAluCode.h b/llvm/lib/Target/Lanai/LanaiAluCode.h
index 728332bff00b..69be05542723 100644
--- a/llvm/lib/Target/Lanai/LanaiAluCode.h
+++ b/llvm/lib/Target/Lanai/LanaiAluCode.h
@@ -70,7 +70,7 @@ inline static unsigned makePostOp(unsigned AluOp) {
}
inline static bool modifiesOp(unsigned AluOp) {
- return isPreOp(AluOp) | isPostOp(AluOp);
+ return isPreOp(AluOp) || isPostOp(AluOp);
}
inline static const char *lanaiAluCodeToString(unsigned AluOp) {
diff --git a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index 6bac7c75853d..c0b7fd3fdd5d 100644
--- a/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/LanaiInstPrinter.h"
#include "LanaiAluCode.h"
#include "LanaiCondCode.h"
#include "LanaiInstrInfo.h"
#include "LanaiMCInstLower.h"
#include "LanaiTargetMachine.h"
+#include "MCTargetDesc/LanaiInstPrinter.h"
#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,7 +32,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "asm-printer"
diff --git a/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp b/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
index aadcdc43f560..45af250b1410 100644
--- a/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
@@ -287,14 +287,14 @@ void LanaiDAGToDAGISel::Select(SDNode *Node) {
ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
// Materialize zero constants as copies from R0. This allows the coalescer
// to propagate these into other instructions.
- if (ConstNode->isNullValue()) {
+ if (ConstNode->isZero()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(Node), Lanai::R0, MVT::i32);
return ReplaceNode(Node, New.getNode());
}
// Materialize all ones constants as copies from R1. This allows the
// coalescer to propagate these into other instructions.
- if (ConstNode->isAllOnesValue()) {
+ if (ConstNode->isAllOnes()) {
SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(Node), Lanai::R1, MVT::i32);
return ReplaceNode(Node, New.getNode());
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index b96e178109d0..0d9e63c112fb 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -486,7 +486,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
llvm_unreachable("unhandled argument type");
}
} else {
- // Sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8;
@@ -530,6 +530,15 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
return Chain;
}
+bool LanaiTargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+
+ return CCInfo.CheckReturn(Outs, RetCC_Lanai32);
+}
+
SDValue
LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
bool IsVarArg,
@@ -1167,7 +1176,7 @@ SDValue LanaiTargetLowering::LowerGlobalAddress(SDValue Op,
// If the code model is small or global variable will be placed in the small
// section, then assume address will fit in 21-bits.
- const GlobalObject *GO = GV->getBaseObject();
+ const GlobalObject *GO = GV->getAliaseeObject();
if (TLOF->isGlobalInSmallSection(GO, getTargetMachine())) {
SDValue Small = DAG.getTargetGlobalAddress(
GV, DL, getPointerTy(DAG.getDataLayout()), Offset, LanaiII::MO_NO_FLAG);
@@ -1391,8 +1400,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC,
// value is 0.
OtherOp = DAG.getConstant(0, dl, VT);
else
- OtherOp =
- DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, VT);
+ OtherOp = DAG.getAllOnesConstant(dl, VT);
return true;
}
}
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.h b/llvm/lib/Target/Lanai/LanaiISelLowering.h
index d29d69eaadb0..2f58560f4efe 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.h
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.h
@@ -90,6 +90,11 @@ public:
SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
Register getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const override;
std::pair<unsigned, const TargetRegisterClass *>
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index c82142970357..21d035c7ee9c 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -19,8 +19,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -175,8 +175,8 @@ LanaiInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
}
bool LanaiInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
switch (MI.getOpcode()) {
default:
break;
@@ -203,7 +203,7 @@ bool LanaiInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
// * SFSUB_F_RR can be made redundant by SUB_RI if the operands are the same.
// * SFSUB_F_RI can be made redundant by SUB_I if the operands are the same.
inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
- unsigned SrcReg2, int ImmValue,
+ unsigned SrcReg2, int64_t ImmValue,
MachineInstr *OI) {
if (CmpI->getOpcode() == Lanai::SFSUB_F_RR &&
OI->getOpcode() == Lanai::SUB_R &&
@@ -281,8 +281,9 @@ inline static unsigned flagSettingOpcodeVariant(unsigned OldOpcode) {
}
bool LanaiInstrInfo::optimizeCompareInstr(
- MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int /*CmpMask*/,
- int CmpValue, const MachineRegisterInfo *MRI) const {
+ MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2,
+ int64_t /*CmpMask*/, int64_t CmpValue,
+ const MachineRegisterInfo *MRI) const {
// Get the unique definition of SrcReg.
MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
if (!MI)
@@ -418,10 +419,8 @@ bool LanaiInstrInfo::optimizeCompareInstr(
// live-out. If it is live-out, do not optimize.
if (!isSafe) {
MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end();
- SI != SE; ++SI)
- if ((*SI)->isLiveIn(Lanai::SR))
+ for (const MachineBasicBlock *Succ : MBB->successors())
+ if (Succ->isLiveIn(Lanai::SR))
return false;
}
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index 44c1e629a8e6..5eef4474801d 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -96,14 +96,14 @@ public:
// SrcReg2 if having two register operands, and the value it compares against
// in CmpValue. Return true if the comparison instruction can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
// See if the comparison instruction can be converted into something more
// efficient. E.g., on Lanai register-register instructions can set the flag
// register, obviating the need for a separate compare.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
// Analyze the given select instruction, returning true if it cannot be
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.td b/llvm/lib/Target/Lanai/LanaiInstrInfo.td
index fcf89a0b52f6..d1fd327722ef 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.td
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.td
@@ -269,7 +269,7 @@ def splsIdempotent : InstrMapping {
// -------------------------------------------------- //
// ALU instructions
// -------------------------------------------------- //
-multiclass ALUbase<bits<3> subOp, string AsmStr, SDNode OpNode,
+multiclass ALUbase<bits<3> subOp, string AsmStr,
PatLeaf LoExt, PatLeaf HiExt,
list<dag> loPattern, list<dag> hiPattern> {
// Register Immediate
@@ -286,7 +286,7 @@ multiclass ALUbase<bits<3> subOp, string AsmStr, SDNode OpNode,
multiclass ALUarith<bits<3> subOp, string AsmStr, SDNode OpNode,
PatLeaf LoExt, PatLeaf HiExt> {
- defm I_ : ALUbase<subOp, AsmStr, OpNode, LoExt, HiExt, [], []>;
+ defm I_ : ALUbase<subOp, AsmStr, LoExt, HiExt, [], []>;
// Register Register
let JJJJJ = 0 in
@@ -297,7 +297,7 @@ multiclass ALUarith<bits<3> subOp, string AsmStr, SDNode OpNode,
multiclass ALUlogic<bits<3> subOp, string AsmStr, SDNode OpNode,
PatLeaf LoExt, PatLeaf HiExt> {
- defm I_ : ALUbase<subOp, AsmStr, OpNode, LoExt, HiExt,
+ defm I_ : ALUbase<subOp, AsmStr, LoExt, HiExt,
[(set GPR:$Rd, (OpNode GPR:$Rs1, LoExt:$imm16))],
[(set GPR:$Rd, (OpNode GPR:$Rs1, HiExt:$imm16))]>;
diff --git a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index a31f59214ec7..70b6fd2c185d 100644
--- a/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
index f1fcbe4f418a..19a3bf4455ad 100644
--- a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -52,6 +52,16 @@ public:
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
TTI::TargetCostKind CostKind) {
assert(Ty->isIntegerTy());
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return
+ // TCC_Free here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TTI::TCC_Free;
+ // No cost model for operations on integers larger than 64 bit implemented
+ // yet.
+ if (BitSize > 64)
+ return TTI::TCC_Free;
+
if (Imm == 0)
return TTI::TCC_Free;
if (isInt<16>(Imm.getSExtValue()))
@@ -81,8 +91,7 @@ public:
}
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index a17afe5e62f6..3c2a3ac69224 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -69,10 +69,12 @@ public:
return Lanai::NumTargetFixupKinds;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
-bool LanaiAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool LanaiAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if ((Count % 4) != 0)
return false;
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index e850b98de806..eb6bf8d3836c 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -22,8 +22,8 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cstdint>
#include <string>
@@ -97,6 +97,9 @@ public:
uint64_t &Target) const override {
if (Inst.getNumOperands() == 0)
return false;
+ if (!isConditionalBranch(Inst) && !isUnconditionalBranch(Inst) &&
+ !isCall(Inst))
+ return false;
if (Info->get(Inst.getOpcode()).OpInfo[0].OperandType ==
MCOI::OPERAND_PCREL) {
diff --git a/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp b/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
index 2bb9f6ed1e97..5c63df670938 100644
--- a/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
+++ b/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/LanaiTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index d8465f6d682b..4db879c34ad9 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -14,7 +14,7 @@
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include <sstream>
@@ -52,6 +52,7 @@ class M68kAsmParser : public MCTargetAsmParser {
bool isExpr();
OperandMatchResultTy parseImm(OperandVector &Operands);
OperandMatchResultTy parseMemOp(OperandVector &Operands);
+ OperandMatchResultTy parseRegOrMoveMask(OperandVector &Operands);
public:
M68kAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
@@ -80,6 +81,7 @@ public:
struct M68kMemOp {
enum class Kind {
Addr,
+ RegMask,
Reg,
RegIndirect,
RegPostIncrement,
@@ -90,6 +92,7 @@ struct M68kMemOp {
// These variables are used for the following forms:
// Addr: (OuterDisp)
+ // RegMask: RegMask (as register mask)
// Reg: %OuterReg
// RegIndirect: (%OuterReg)
// RegPostIncrement: (%OuterReg)+
@@ -106,6 +109,7 @@ struct M68kMemOp {
uint8_t Size : 4;
uint8_t Scale : 4;
const MCExpr *Expr;
+ uint16_t RegMask;
M68kMemOp() {}
M68kMemOp(Kind Op) : Op(Op) {}
@@ -117,14 +121,14 @@ struct M68kMemOp {
class M68kOperand : public MCParsedAsmOperand {
typedef MCParsedAsmOperand Base;
- enum class Kind {
+ enum class KindTy {
Invalid,
Token,
Imm,
MemOp,
};
- Kind Kind;
+ KindTy Kind;
SMLoc Start, End;
union {
StringRef Token;
@@ -133,8 +137,10 @@ class M68kOperand : public MCParsedAsmOperand {
M68kMemOp MemOp;
};
+ template <unsigned N> bool isAddrN() const;
+
public:
- M68kOperand(enum Kind Kind, SMLoc Start, SMLoc End)
+ M68kOperand(KindTy Kind, SMLoc Start, SMLoc End)
: Base(), Kind(Kind), Start(Start), End(End) {}
SMLoc getStartLoc() const override { return Start; }
@@ -143,12 +149,14 @@ public:
void print(raw_ostream &OS) const override;
bool isMem() const override { return false; }
- bool isMemOp() const { return Kind == Kind::MemOp; }
+ bool isMemOp() const { return Kind == KindTy::MemOp; }
static void addExpr(MCInst &Inst, const MCExpr *Expr);
// Reg
bool isReg() const override;
+ bool isAReg() const;
+ bool isDReg() const;
unsigned getReg() const override;
void addRegOperands(MCInst &Inst, unsigned N) const;
@@ -168,8 +176,15 @@ public:
static std::unique_ptr<M68kOperand> createImm(const MCExpr *Expr, SMLoc Start,
SMLoc End);
+ // MoveMask
+ bool isMoveMask() const;
+ void addMoveMaskOperands(MCInst &Inst, unsigned N) const;
+
// Addr
bool isAddr() const;
+ bool isAddr8() const { return isAddrN<8>(); }
+ bool isAddr16() const { return isAddrN<16>(); }
+ bool isAddr32() const { return isAddrN<32>(); }
void addAddrOperands(MCInst &Inst, unsigned N) const;
// ARI
@@ -210,11 +225,45 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kAsmParser() {
#define GET_MATCHER_IMPLEMENTATION
#include "M68kGenAsmMatcher.inc"
+static inline unsigned getRegisterByIndex(unsigned RegisterIndex) {
+ static unsigned RegistersByIndex[] = {
+ M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
+ M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
+ M68k::A4, M68k::A5, M68k::A6, M68k::SP,
+ };
+ assert(RegisterIndex <=
+ sizeof(RegistersByIndex) / sizeof(RegistersByIndex[0]));
+ return RegistersByIndex[RegisterIndex];
+}
+
+static inline unsigned getRegisterIndex(unsigned Register) {
+ if (Register >= M68k::D0 && Register <= M68k::D7)
+ return Register - M68k::D0;
+ if (Register >= M68k::A0 && Register <= M68k::A6)
+ return Register - M68k::A0 + 8;
+
+ switch (Register) {
+ case M68k::SP:
+ // SP is sadly not contiguous with the rest of the An registers
+ return 15;
+
+ case M68k::PC:
+ case M68k::CCR:
+ return 16;
+
+ default:
+ llvm_unreachable("unexpected register number");
+ }
+}
+
void M68kMemOp::print(raw_ostream &OS) const {
switch (Op) {
case Kind::Addr:
OS << OuterDisp;
break;
+ case Kind::RegMask:
+ OS << "RegMask(" << format("%04x", RegMask) << ")";
+ break;
case Kind::Reg:
OS << '%' << OuterReg;
break;
@@ -248,7 +297,7 @@ void M68kOperand::addExpr(MCInst &Inst, const MCExpr *Expr) {
// Reg
bool M68kOperand::isReg() const {
- return Kind == Kind::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
+ return Kind == KindTy::MemOp && MemOp.Op == M68kMemOp::Kind::Reg;
}
unsigned M68kOperand::getReg() const {
@@ -265,13 +314,13 @@ void M68kOperand::addRegOperands(MCInst &Inst, unsigned N) const {
std::unique_ptr<M68kOperand> M68kOperand::createMemOp(M68kMemOp MemOp,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::MemOp, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::MemOp, Start, End);
Op->MemOp = MemOp;
return Op;
}
// Token
-bool M68kOperand::isToken() const { return Kind == Kind::Token; }
+bool M68kOperand::isToken() const { return Kind == KindTy::Token; }
StringRef M68kOperand::getToken() const {
assert(isToken());
return Token;
@@ -279,15 +328,15 @@ StringRef M68kOperand::getToken() const {
std::unique_ptr<M68kOperand> M68kOperand::createToken(StringRef Token,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::Token, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::Token, Start, End);
Op->Token = Token;
return Op;
}
// Imm
-bool M68kOperand::isImm() const { return Kind == Kind::Imm; }
+bool M68kOperand::isImm() const { return Kind == KindTy::Imm; }
void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
- assert(isImm() && "wrong oeprand kind");
+ assert(isImm() && "wrong operand kind");
assert((N == 1) && "can only handle one register operand");
M68kOperand::addExpr(Inst, Expr);
@@ -295,15 +344,53 @@ void M68kOperand::addImmOperands(MCInst &Inst, unsigned N) const {
std::unique_ptr<M68kOperand> M68kOperand::createImm(const MCExpr *Expr,
SMLoc Start, SMLoc End) {
- auto Op = std::make_unique<M68kOperand>(Kind::Imm, Start, End);
+ auto Op = std::make_unique<M68kOperand>(KindTy::Imm, Start, End);
Op->Expr = Expr;
return Op;
}
+// MoveMask
+bool M68kOperand::isMoveMask() const {
+ if (!isMemOp())
+ return false;
+
+ if (MemOp.Op == M68kMemOp::Kind::RegMask)
+ return true;
+
+ if (MemOp.Op != M68kMemOp::Kind::Reg)
+ return false;
+
+ // Only regular address / data registers are allowed to be used
+ // in register masks.
+ return getRegisterIndex(MemOp.OuterReg) < 16;
+}
+
+void M68kOperand::addMoveMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(isMoveMask() && "wrong operand kind");
+ assert((N == 1) && "can only handle one immediate operand");
+
+ uint16_t MoveMask = MemOp.RegMask;
+ if (MemOp.Op == M68kMemOp::Kind::Reg)
+ MoveMask = 1 << getRegisterIndex(MemOp.OuterReg);
+
+ Inst.addOperand(MCOperand::createImm(MoveMask));
+}
+
// Addr
bool M68kOperand::isAddr() const {
return isMemOp() && MemOp.Op == M68kMemOp::Kind::Addr;
}
+// TODO: Maybe we can also store the size of OuterDisp
+// in Size?
+template <unsigned N> bool M68kOperand::isAddrN() const {
+ if (isAddr()) {
+ int64_t Res;
+ if (MemOp.OuterDisp->evaluateAsAbsolute(Res))
+ return isInt<N>(Res);
+ return true;
+ }
+ return false;
+}
void M68kOperand::addAddrOperands(MCInst &Inst, unsigned N) const {
M68kOperand::addExpr(Inst, MemOp.OuterDisp);
}
@@ -412,6 +499,18 @@ static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
}
}
+bool M68kOperand::isAReg() const {
+ return isReg() && checkRegisterClass(getReg(),
+ /*Data=*/false,
+ /*Address=*/true, /*SP=*/true);
+}
+
+bool M68kOperand::isDReg() const {
+ return isReg() && checkRegisterClass(getReg(),
+ /*Data=*/true,
+ /*Address=*/false, /*SP=*/false);
+}
+
unsigned M68kAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
M68kOperand &Operand = (M68kOperand &)Op;
@@ -487,11 +586,6 @@ bool M68kAsmParser::parseRegisterName(unsigned &RegNo, SMLoc Loc,
// Parse simple general-purpose registers.
if (RegisterNameLower.size() == 2) {
- static unsigned RegistersByIndex[] = {
- M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
- M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
- M68k::A4, M68k::A5, M68k::A6, M68k::SP,
- };
switch (RegisterNameLower[0]) {
case 'd':
@@ -500,7 +594,7 @@ bool M68kAsmParser::parseRegisterName(unsigned &RegNo, SMLoc Loc,
unsigned IndexOffset = (RegisterNameLower[0] == 'a') ? 8 : 0;
unsigned RegIndex = (unsigned)(RegisterNameLower[1] - '0');
if (RegIndex < 8) {
- RegNo = RegistersByIndex[IndexOffset + RegIndex];
+ RegNo = getRegisterByIndex(IndexOffset + RegIndex);
return true;
}
}
@@ -616,16 +710,9 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
bool IsPD = false;
M68kMemOp MemOp;
- // Check for a plain register.
- auto Result = parseRegister(MemOp.OuterReg);
- if (Result == MatchOperand_Success) {
- MemOp.Op = M68kMemOp::Kind::Reg;
- Operands.push_back(
- M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
- return MatchOperand_Success;
- }
-
- if (Result == MatchOperand_ParseFail) {
+ // Check for a plain register or register mask.
+ auto Result = parseRegOrMoveMask(Operands);
+ if (Result != llvm::MatchOperand_NoMatch) {
return Result;
}
@@ -743,6 +830,87 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
return MatchOperand_Success;
}
+OperandMatchResultTy
+M68kAsmParser::parseRegOrMoveMask(OperandVector &Operands) {
+ SMLoc Start = getLexer().getLoc();
+ M68kMemOp MemOp(M68kMemOp::Kind::RegMask);
+ MemOp.RegMask = 0;
+
+ for (;;) {
+ bool IsFirstRegister =
+ (MemOp.Op == M68kMemOp::Kind::RegMask) && (MemOp.RegMask == 0);
+
+ unsigned FirstRegister;
+ auto Result = parseRegister(FirstRegister);
+ if (IsFirstRegister && (Result == llvm::MatchOperand_NoMatch)) {
+ return MatchOperand_NoMatch;
+ }
+ if (Result != llvm::MatchOperand_Success) {
+ Error(getLexer().getLoc(), "expected start register");
+ return MatchOperand_ParseFail;
+ }
+
+ unsigned LastRegister = FirstRegister;
+ if (getLexer().is(AsmToken::Minus)) {
+ getLexer().Lex();
+ Result = parseRegister(LastRegister);
+ if (Result != llvm::MatchOperand_Success) {
+ Error(getLexer().getLoc(), "expected end register");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ unsigned FirstRegisterIndex = getRegisterIndex(FirstRegister);
+ unsigned LastRegisterIndex = getRegisterIndex(LastRegister);
+
+ uint16_t NumNewBits = LastRegisterIndex - FirstRegisterIndex + 1;
+ uint16_t NewMaskBits = ((1 << NumNewBits) - 1) << FirstRegisterIndex;
+
+ if (IsFirstRegister && (FirstRegister == LastRegister)) {
+ // First register range is a single register, simplify to just Reg
+ // so that it matches more operands.
+ MemOp.Op = M68kMemOp::Kind::Reg;
+ MemOp.OuterReg = FirstRegister;
+ } else {
+ if (MemOp.Op == M68kMemOp::Kind::Reg) {
+ // This is the second register being specified - expand the Reg operand
+ // into a mask first.
+ MemOp.Op = M68kMemOp::Kind::RegMask;
+ MemOp.RegMask = 1 << getRegisterIndex(MemOp.OuterReg);
+
+ if (MemOp.RegMask == 0) {
+ Error(getLexer().getLoc(),
+ "special registers cannot be used in register masks");
+ return MatchOperand_ParseFail;
+ }
+ }
+
+ if ((FirstRegisterIndex >= 16) || (LastRegisterIndex >= 16)) {
+ Error(getLexer().getLoc(),
+ "special registers cannot be used in register masks");
+ return MatchOperand_ParseFail;
+ }
+
+ if (NewMaskBits & MemOp.RegMask) {
+ Error(getLexer().getLoc(), "conflicting masked registers");
+ return MatchOperand_ParseFail;
+ }
+
+ MemOp.RegMask |= NewMaskBits;
+ }
+
+ if (getLexer().isNot(AsmToken::Slash)) {
+ break;
+ }
+
+ getLexer().Lex();
+ }
+
+ Operands.push_back(
+ M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
+ return MatchOperand_Success;
+}
+
void M68kAsmParser::eatComma() {
if (Parser.getTok().is(AsmToken::Comma)) {
Parser.Lex();
@@ -842,19 +1010,19 @@ bool M68kAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
void M68kOperand::print(raw_ostream &OS) const {
switch (Kind) {
- case Kind::Invalid:
+ case KindTy::Invalid:
OS << "invalid";
break;
- case Kind::Token:
+ case KindTy::Token:
OS << "token '" << Token << "'";
break;
- case Kind::Imm:
+ case KindTy::Imm:
OS << "immediate " << Imm;
break;
- case Kind::MemOp:
+ case KindTy::MemOp:
MemOp.print(OS);
break;
}
diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index a8453c838493..a08ffa787095 100644
--- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -21,7 +21,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -451,7 +451,8 @@ void M68kDisassembler::decodeImm(MCInst &Instr, unsigned Bead,
llvm_unreachable("invalid imm");
}
- Scratch = (Scratch << NumToRead) | Reader.readBits(NumToRead);
+ Scratch = (NumToRead < 32) ? (Scratch << NumToRead) : 0;
+ Scratch |= Reader.readBits(NumToRead);
}
DecodeStatus M68kDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
index c5931cbfe04f..9cd959012e6f 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
@@ -33,7 +33,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
: OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -110,7 +110,7 @@ bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void M68kIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
diff --git a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
index 9e0d462db677..47cdefdba100 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kCallLowering.h
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.h
@@ -52,7 +52,7 @@ struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler {
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
diff --git a/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
index 9ac4ab9a5ba1..9ac4ab9a5ba1 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kInstructionSelector.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
index bcbe62816beb..bcbe62816beb 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp
diff --git a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h
index 205aa81aedcc..205aa81aedcc 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kLegalizerInfo.h
+++ b/llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h
diff --git a/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
new file mode 100644
index 000000000000..5c0f5dae8e37
--- /dev/null
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp
@@ -0,0 +1,105 @@
+//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for M68k.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "M68kRegisterBankInfo.h"
+#include "M68kInstrInfo.h" // For the register classes
+#include "M68kSubtarget.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+
+#define GET_TARGET_REGBANK_IMPL
+#include "M68kGenRegisterBank.inc"
+
+using namespace llvm;
+
+// FIXME: TableGen this.
+// If it grows too much and TableGen still isn't ready to do the job, extract it
+// into an M68kGenRegisterBankInfo.def (similar to AArch64).
+namespace llvm {
+namespace M68k {
+enum PartialMappingIdx {
+ PMI_GPR,
+ PMI_Min = PMI_GPR,
+};
+
+RegisterBankInfo::PartialMapping PartMappings[]{
+ // GPR Partial Mapping
+ {0, 32, GPRRegBank},
+};
+
+enum ValueMappingIdx {
+ InvalidIdx = 0,
+ GPR3OpsIdx = 1,
+};
+
+RegisterBankInfo::ValueMapping ValueMappings[] = {
+ // invalid
+ {nullptr, 0},
+ // 3 operands in GPRs
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+ {&PartMappings[PMI_GPR - PMI_Min], 1},
+
+};
+} // end namespace M68k
+} // end namespace llvm
+
+M68kRegisterBankInfo::M68kRegisterBankInfo(const TargetRegisterInfo &TRI)
+ : M68kGenRegisterBankInfo() {}
+
+const RegisterBank &
+M68kRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const {
+ return getRegBank(M68k::GPRRegBankID);
+}
+
+const RegisterBankInfo::InstructionMapping &
+M68kRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+ auto Opc = MI.getOpcode();
+
+ if (!isPreISelGenericOpcode(Opc)) {
+ const InstructionMapping &Mapping = getInstrMappingImpl(MI);
+ if (Mapping.isValid())
+ return Mapping;
+ }
+
+ using namespace TargetOpcode;
+
+ unsigned NumOperands = MI.getNumOperands();
+ const ValueMapping *OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx];
+
+ switch (Opc) {
+ case G_ADD:
+ case G_SUB:
+ case G_MUL:
+ case G_SDIV:
+ case G_UDIV:
+ case G_LOAD:
+ case G_STORE: {
+ OperandsMapping = &M68k::ValueMappings[M68k::GPR3OpsIdx];
+ break;
+ }
+
+ case G_CONSTANT:
+ case G_FRAME_INDEX:
+ OperandsMapping =
+ getOperandsMapping({&M68k::ValueMappings[M68k::GPR3OpsIdx], nullptr});
+ break;
+ default:
+ return getInvalidInstructionMapping();
+ }
+
+ return getInstructionMapping(DefaultMappingID, /*Cost=*/1, OperandsMapping,
+ NumOperands);
+}
diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
index 9b97cc4a6dd4..853c75df2bb3 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.h
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h
@@ -34,6 +34,12 @@ protected:
class M68kRegisterBankInfo final : public M68kGenRegisterBankInfo {
public:
M68kRegisterBankInfo(const TargetRegisterInfo &TRI);
+
+ const RegisterBank &getRegBankFromRegClass(const TargetRegisterClass &RC,
+ LLT) const override;
+
+ const InstructionMapping &
+ getInstrMapping(const MachineInstr &MI) const override;
};
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td
index 2d1e74f78480..942677a60e6c 100644
--- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBanks.td
+++ b/llvm/lib/Target/M68k/GISel/M68kRegisterBanks.td
@@ -12,4 +12,4 @@
//===----------------------------------------------------------------------===//
/// General Purpose Registers. Here we define a register bank with name AnyGPR
-def GPRRegBank : RegisterBank<"AnyGPR", [DR8]>;
+def GPRRegBank : RegisterBank<"AnyGPR", [XR32]>;
diff --git a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp b/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp
deleted file mode 100644
index d12478624655..000000000000
--- a/llvm/lib/Target/M68k/GlSel/M68kRegisterBankInfo.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- M68kRegisterBankInfo.cpp -------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements the targeting of the RegisterBankInfo class for M68k.
-/// \todo This should be generated by TableGen.
-//===----------------------------------------------------------------------===//
-
-#include "M68kRegisterBankInfo.h"
-#include "MCTargetDesc/M68kMCTargetDesc.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
-#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetRegisterInfo.h"
-
-#define GET_TARGET_REGBANK_IMPL
-#include "M68kGenRegisterBank.inc"
-#undef GET_TARGET_REGBANK_IMPL
-
-using namespace llvm;
-
-M68kRegisterBankInfo::M68kRegisterBankInfo(const TargetRegisterInfo &TRI)
- : M68kGenRegisterBankInfo() {}
diff --git a/llvm/lib/Target/M68k/M68k.td b/llvm/lib/Target/M68k/M68k.td
index 669eb32f46f1..fde491e1b6d5 100644
--- a/llvm/lib/Target/M68k/M68k.td
+++ b/llvm/lib/Target/M68k/M68k.td
@@ -78,7 +78,7 @@ def : Proc<"M68060", [ FeatureISA60 ]>;
//===----------------------------------------------------------------------===//
include "M68kRegisterInfo.td"
-include "GlSel/M68kRegisterBanks.td"
+include "GISel/M68kRegisterBanks.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
index a6fc58b5a277..08b7153632b4 100644
--- a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
+++ b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
@@ -21,7 +21,7 @@
#include "MCTargetDesc/M68kInstPrinter.h"
#include "TargetInfo/M68kTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/M68kCallingConv.h b/llvm/lib/Target/M68k/M68kCallingConv.h
index 18f72c95cedb..20ffa993897f 100644
--- a/llvm/lib/Target/M68k/M68kCallingConv.h
+++ b/llvm/lib/Target/M68k/M68kCallingConv.h
@@ -24,14 +24,13 @@
namespace llvm {
/// Custom state to propagate llvm type info to register CC assigner
-class M68kCCState : public CCState {
-public:
- const llvm::Function &F;
+struct M68kCCState : public CCState {
+ ArrayRef<Type *> ArgTypeList;
- M68kCCState(const llvm::Function &F, CallingConv::ID CC, bool IsVarArg,
+ M68kCCState(ArrayRef<Type *> ArgTypes, CallingConv::ID CC, bool IsVarArg,
MachineFunction &MF, SmallVectorImpl<CCValAssign> &Locs,
LLVMContext &C)
- : CCState(CC, IsVarArg, MF, Locs, C), F(F) {}
+ : CCState(CC, IsVarArg, MF, Locs, C), ArgTypeList(ArgTypes) {}
};
/// NOTE this function is used to select registers for formal arguments and call
@@ -39,7 +38,7 @@ public:
inline bool CC_M68k_Any_AssignToReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
- M68kCCState CCInfo = static_cast<M68kCCState &>(State);
+ const M68kCCState &CCInfo = static_cast<M68kCCState &>(State);
static const MCPhysReg DataRegList[] = {M68k::D0, M68k::D1, M68k::A0,
M68k::A1};
@@ -52,14 +51,15 @@ inline bool CC_M68k_Any_AssignToReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
M68k::D1,
};
- auto I = CCInfo.F.arg_begin();
+ const auto &ArgTypes = CCInfo.ArgTypeList;
+ auto I = ArgTypes.begin(), End = ArgTypes.end();
int No = ValNo;
- while (No > 0) {
- No -= I->getType()->isIntegerTy(64) ? 2 : 1;
- I++;
+ while (No > 0 && I != End) {
+ No -= (*I)->isIntegerTy(64) ? 2 : 1;
+ ++I;
}
- bool IsPtr = I != CCInfo.F.arg_end() && I->getType()->isPointerTy();
+ bool IsPtr = I != End && (*I)->isPointerTy();
unsigned Reg =
IsPtr ? State.AllocateReg(AddrRegList) : State.AllocateReg(DataRegList);
diff --git a/llvm/lib/Target/M68k/M68kFrameLowering.cpp b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
index 26262b9b573d..66ea6ae38f43 100644
--- a/llvm/lib/Target/M68k/M68kFrameLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kFrameLowering.cpp
@@ -357,7 +357,7 @@ void M68kFrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
if (Reg) {
unsigned Opc = M68k::MOV32ri;
BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg).addImm(Offset);
- Opc = IsSub ? M68k::SUB32rr : M68k::ADD32rr;
+ Opc = IsSub ? M68k::SUB32ar : M68k::ADD32ar;
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addReg(Reg);
@@ -400,13 +400,13 @@ int M68kFrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
return Offset;
}
- if (Opc == M68k::ADD32ri && PI->getOperand(0).getReg() == StackPtr) {
+ if (Opc == M68k::ADD32ai && PI->getOperand(0).getReg() == StackPtr) {
assert(PI->getOperand(1).getReg() == StackPtr);
Offset += PI->getOperand(2).getImm();
MBB.erase(PI);
if (!MergeWithPrevious)
MBBI = NI;
- } else if (Opc == M68k::SUB32ri && PI->getOperand(0).getReg() == StackPtr) {
+ } else if (Opc == M68k::SUB32ai && PI->getOperand(0).getReg() == StackPtr) {
assert(PI->getOperand(1).getReg() == StackPtr);
Offset -= PI->getOperand(2).getImm();
MBB.erase(PI);
@@ -426,7 +426,7 @@ MachineInstrBuilder M68kFrameLowering::BuildStackAdjustment(
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
- unsigned Opc = IsSub ? M68k::SUB32ri : M68k::ADD32ri;
+ unsigned Opc = IsSub ? M68k::SUB32ai : M68k::ADD32ai;
MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 3e7cee9889d7..79b395f8f984 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -519,9 +519,10 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- // It is empty for LibCall
- const Function *CalleeFunc = CLI.CB ? CLI.CB->getCalledFunction() : nullptr;
- M68kCCState CCInfo(*CalleeFunc, CallConv, IsVarArg, MF, ArgLocs,
+ SmallVector<Type *, 4> ArgTypes;
+ for (const auto &Arg : CLI.getArgs())
+ ArgTypes.emplace_back(Arg.Ty);
+ M68kCCState CCInfo(ArgTypes, CallConv, IsVarArg, MF, ArgLocs,
*DAG.getContext());
CCInfo.AnalyzeCallOperands(Outs, CC_M68k);
@@ -876,8 +877,10 @@ SDValue M68kTargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- M68kCCState CCInfo(MF.getFunction(), CCID, IsVarArg, MF, ArgLocs,
- *DAG.getContext());
+ SmallVector<Type *, 4> ArgTypes;
+ for (const Argument &Arg : MF.getFunction().args())
+ ArgTypes.emplace_back(Arg.getType());
+ M68kCCState CCInfo(ArgTypes, CCID, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_M68k);
@@ -1975,7 +1978,7 @@ SDValue M68kTargetLowering::LowerSETCCCARRY(SDValue Op,
M68k::CondCode CC = TranslateIntegerM68kCC(cast<CondCodeSDNode>(Cond)->get());
EVT CarryVT = Carry.getValueType();
- APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
+ APInt NegOne = APInt::getAllOnes(CarryVT.getScalarSizeInBits());
Carry = DAG.getNode(M68kISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), Carry,
DAG.getConstant(NegOne, DL, CarryVT));
@@ -2199,7 +2202,7 @@ SDValue M68kTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&
- // Blacklist CopyFromReg to avoid partial register stalls.
+ // Block CopyFromReg so partial register stalls are avoided.
T1.getOpcode() != ISD::CopyFromReg &&
T2.getOpcode() != ISD::CopyFromReg) {
SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
index f65ad5729eb4..b2c05365d30b 100644
--- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td
+++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
@@ -88,14 +88,15 @@ let Defs = [CCR] in {
let Constraints = "$src = $dst" in {
// $reg, $ccr <- $reg op $reg
-class MxBiArOp_RFRR_xEA<string MN, SDNode NODE, MxType TYPE, bits<4> CMD, MxBead REG>
- : MxInst<(outs TYPE.ROp:$dst), (ins TYPE.ROp:$src, TYPE.ROp:$opd),
- MN#"."#TYPE.Prefix#"\t$opd, $dst",
- [(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.VT:$opd))],
+class MxBiArOp_RFRR_xEA<string MN, SDNode NODE, MxType DST_TYPE, MxType SRC_TYPE,
+ bits<4> CMD, MxBead REG>
+ : MxInst<(outs DST_TYPE.ROp:$dst), (ins DST_TYPE.ROp:$src, SRC_TYPE.ROp:$opd),
+ MN#"."#DST_TYPE.Prefix#"\t$opd, $dst",
+ [(set DST_TYPE.VT:$dst, CCR, (NODE DST_TYPE.VT:$src, SRC_TYPE.VT:$opd))],
MxArithEncoding<MxBead4Bits<CMD>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#TYPE.RLet#"EA"),
+ !cast<MxEncOpMode>("MxOpMode"#DST_TYPE.Size#DST_TYPE.RLet#"EA"),
REG,
- !cast<MxEncEA>("MxEncEA"#TYPE.RLet#"_2"),
+ !cast<MxEncEA>("MxEncEA"#SRC_TYPE.RLet#"_2"),
MxExtEmpty>>;
/// This Op is similar to the one above except it uses reversed opmode, some
@@ -260,11 +261,19 @@ multiclass MxBiArOp_DF<string MN, SDNode NODE, bit isComm,
def NAME#"32ji" : MxBiArOp_FMI<MN, NODE, MxType32, MxType32.JOp, MxType32.JPat,
CMDI, MxEncEAj_0, MxExtEmpty>;
+ def NAME#"16dr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, MxType16r,
+ CMD, MxBeadDReg<0>>;
+ def NAME#"32dr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, MxType32r,
+ CMD, MxBeadDReg<0>>;
+
let isCommutable = isComm in {
- def NAME#"8dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType8d, CMD, MxBeadDReg<0>>;
- def NAME#"16dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, CMD, MxBeadDReg<0>>;
- def NAME#"32dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, CMD, MxBeadDReg<0>>;
+ def NAME#"8dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType8d, MxType8d,
+ CMD, MxBeadDReg<0>>;
+ def NAME#"16dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType16d, MxType16d,
+ CMD, MxBeadDReg<0>>;
+ def NAME#"32dd" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32d, MxType32d,
+ CMD, MxBeadDReg<0>>;
} // isComm
@@ -278,29 +287,29 @@ let Pattern = [(null_frag)] in
multiclass MxBiArOp_AF<string MN, SDNode NODE, bit isComm,
bits<4> CMD, bits<4> CMDI> {
- def NAME#"32rk" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.KOp, MxType32.KPat,
+ def NAME#"32ak" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.KOp, MxType32.KPat,
CMD, MxEncEAk, MxExtBrief_2>;
- def NAME#"32rq" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.QOp, MxType32.QPat,
+ def NAME#"32aq" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.QOp, MxType32.QPat,
CMD, MxEncEAq, MxExtI16_2>;
- def NAME#"32rf" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.FOp, MxType32.FPat,
+ def NAME#"32af" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.FOp, MxType32.FPat,
CMD, MxEncEAf_2, MxExtBrief_2>;
- def NAME#"32rp" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.POp, MxType32.PPat,
+ def NAME#"32ap" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.POp, MxType32.PPat,
CMD, MxEncEAp_2, MxExtI16_2>;
- def NAME#"32rj" : MxBiArOp_RFRM<MN, NODE, MxType32r, MxType32.JOp, MxType32.JPat,
+ def NAME#"32aj" : MxBiArOp_RFRM<MN, NODE, MxType32a, MxType32.JOp, MxType32.JPat,
CMD, MxEncEAj_2, MxExtEmpty>;
- def NAME#"32ri" : MxBiArOp_RFRI_xEA<MN, NODE, MxType32r, CMD>;
+ def NAME#"32ai" : MxBiArOp_RFRI_xEA<MN, NODE, MxType32a, CMD>;
- let isCommutable = isComm in
- def NAME#"32rr" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32r, CMD, MxBeadReg<0>>;
+ def NAME#"32ar" : MxBiArOp_RFRR_xEA<MN, NODE, MxType32a, MxType32r,
+ CMD, MxBeadReg<0>>;
} // MxBiArOp_AF
// NOTE These naturally produce CCR
-defm ADD : MxBiArOp_DF<"add", MxAdd, 1, 0xD, 0x6>;
-defm ADD : MxBiArOp_AF<"add", MxAdd, 1, 0xD, 0x6>;
-defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>;
-defm SUB : MxBiArOp_AF<"sub", MxSub, 0, 0x9, 0x4>;
+defm ADD : MxBiArOp_DF<"add", MxAdd, 1, 0xD, 0x6>;
+defm ADD : MxBiArOp_AF<"adda", MxAdd, 1, 0xD, 0x6>;
+defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>;
+defm SUB : MxBiArOp_AF<"suba", MxSub, 0, 0x9, 0x4>;
let Uses = [CCR], Defs = [CCR] in {
@@ -366,13 +375,16 @@ defm XOR : MxBiArOp_DF_EAd<"eor", MxXor, 0xB, 0xA>;
//===----------------------------------------------------------------------===//
let Defs = [CCR] in {
-class MxCmp_RR<MxType TYPE>
- : MxInst<(outs), (ins TYPE.ROp:$lhs, TYPE.ROp:$rhs),
- "cmp."#TYPE.Prefix#"\t$lhs, $rhs",
- [(set CCR, (MxCmp TYPE.VT:$lhs, TYPE.VT:$rhs))],
+class MxCmp_RR<MxType LHS_TYPE, MxType RHS_TYPE = LHS_TYPE,
+ MxBead REG = MxBeadDReg<1>>
+ : MxInst<(outs), (ins LHS_TYPE.ROp:$lhs, RHS_TYPE.ROp:$rhs),
+ "cmp."#RHS_TYPE.Prefix#"\t$lhs, $rhs",
+ [(set CCR, (MxCmp LHS_TYPE.VT:$lhs, RHS_TYPE.VT:$rhs))],
MxArithEncoding<MxBead4Bits<0xB>,
- !cast<MxEncOpMode>("MxOpMode"#TYPE.Size#"dEA"),
- MxBeadDReg<1>, MxEncEAd_0, MxExtEmpty>>;
+ !cast<MxEncOpMode>("MxOpMode"#RHS_TYPE.Size#RHS_TYPE.RLet#"EA"),
+ REG,
+ !cast<MxEncEA>("MxEncEA"#LHS_TYPE.RLet#"_0"),
+ MxExtEmpty>>;
class MxCmp_RI<MxType TYPE>
: MxInst<(outs), (ins TYPE.IOp:$imm, TYPE.ROp:$reg),
@@ -444,11 +456,16 @@ multiclass MMxCmp_MI<MxType TYPE> {
}
foreach S = [8, 16, 32] in {
- def CMP#S#dd : MxCmp_RR<!cast<MxType>("MxType"#S#"d")>;
def CMP#S#di : MxCmp_RI<!cast<MxType>("MxType"#S#"d")>;
def CMP#S#bi : MxCmp_BI<!cast<MxType>("MxType"#S#"d")>;
} // foreach
+def CMP8dd : MxCmp_RR<MxType8d>;
+foreach S = [16, 32] in {
+ def CMP#S#dr : MxCmp_RR<!cast<MxType>("MxType"#S#"r"),
+ !cast<MxType>("MxType"#S#"d")>;
+}
+
// cmp mem, Dn
defm CMP8d : MMxCmp_RM<MxType8d>;
defm CMP16d : MMxCmp_RM<MxType16d>;
@@ -737,9 +754,9 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) i8 :$src, i8 :$opd),
(ADD8dd MxDRD8 :$src, MxDRD8 :$opd)>;
def : Pat<(!cast<SDNode>(N) i16:$src, i16:$opd),
- (ADD16dd MxDRD16:$src, MxDRD16:$opd)>;
+ (ADD16dr MxXRD16:$src, MxDRD16:$opd)>;
def : Pat<(!cast<SDNode>(N) i32:$src, i32:$opd),
- (ADD32rr MxXRD32:$src, MxXRD32:$opd)>;
+ (ADD32dr MxXRD32:$src, MxDRD32:$opd)>;
// add (An), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.JPat:$opd)),
@@ -747,7 +764,7 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.JPat:$opd)),
(ADD16dj MxDRD16:$src, MxType16.JOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.JPat:$opd)),
- (ADD32rj MxXRD32:$src, MxType32.JOp:$opd)>;
+ (ADD32dj MxDRD32:$src, MxType32.JOp:$opd)>;
// add (i,An), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.PPat:$opd)),
@@ -755,7 +772,7 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.PPat:$opd)),
(ADD16dp MxDRD16:$src, MxType16.POp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.PPat:$opd)),
- (ADD32rp MxXRD32:$src, MxType32.POp:$opd)>;
+ (ADD32dp MxDRD32:$src, MxType32.POp:$opd)>;
// add (i,An,Xn), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.FPat:$opd)),
@@ -763,7 +780,7 @@ foreach N = ["add", "addc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.FPat:$opd)),
(ADD16df MxDRD16:$src, MxType16.FOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.FPat:$opd)),
- (ADD32rf MxXRD32:$src, MxType32.FOp:$opd)>;
+ (ADD32df MxDRD32:$src, MxType32.FOp:$opd)>;
// add reg, imm
def : Pat<(!cast<SDNode>(N) i8: $src, MximmSExt8:$opd),
@@ -776,7 +793,7 @@ foreach N = ["add", "addc"] in {
// we make sure it will be selected over LEAp
let AddedComplexity = 15 in {
def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd),
- (ADD32ri MxXRD32:$src, imm:$opd)>;
+ (ADD32di MxDRD32:$src, imm:$opd)>;
} // AddedComplexity = 15
// add imm, (An)
@@ -806,7 +823,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) i16:$src, i16:$opd),
(SUB16dd MxDRD16:$src, MxDRD16:$opd)>;
def : Pat<(!cast<SDNode>(N) i32:$src, i32:$opd),
- (SUB32rr MxXRD32:$src, MxXRD32:$opd)>;
+ (SUB32dd MxDRD32:$src, MxDRD32:$opd)>;
// sub (An), reg
@@ -815,7 +832,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.JPat:$opd)),
(SUB16dj MxDRD16:$src, MxType16.JOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.JPat:$opd)),
- (SUB32rj MxXRD32:$src, MxType32.JOp:$opd)>;
+ (SUB32dj MxDRD32:$src, MxType32.JOp:$opd)>;
// sub (i,An), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.PPat:$opd)),
@@ -823,7 +840,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.PPat:$opd)),
(SUB16dp MxDRD16:$src, MxType16.POp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.PPat:$opd)),
- (SUB32rp MxXRD32:$src, MxType32.POp:$opd)>;
+ (SUB32dp MxDRD32:$src, MxType32.POp:$opd)>;
// sub (i,An,Xn), reg
def : Pat<(!cast<SDNode>(N) MxType8.VT:$src, (Mxloadi8 MxType8.FPat:$opd)),
@@ -831,7 +848,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) MxType16.VT:$src, (Mxloadi16 MxType16.FPat:$opd)),
(SUB16df MxDRD16:$src, MxType16.FOp:$opd)>;
def : Pat<(!cast<SDNode>(N) MxType32.VT:$src, (Mxloadi32 MxType32.FPat:$opd)),
- (SUB32rf MxXRD32:$src, MxType32.FOp:$opd)>;
+ (SUB32df MxDRD32:$src, MxType32.FOp:$opd)>;
// sub reg, imm
def : Pat<(!cast<SDNode>(N) i8 :$src, MximmSExt8 :$opd),
@@ -839,7 +856,7 @@ foreach N = ["sub", "subc"] in {
def : Pat<(!cast<SDNode>(N) i16:$src, MximmSExt16:$opd),
(SUB16di MxDRD16:$src, imm:$opd)>;
def : Pat<(!cast<SDNode>(N) i32:$src, MximmSExt32:$opd),
- (SUB32ri MxXRD32:$src, imm:$opd)>;
+ (SUB32di MxDRD32:$src, imm:$opd)>;
// sub imm, (An)
def : Pat<(store (!cast<SDNode>(N) (load MxType8.JPat:$dst), MxType8.IPat:$opd),
diff --git a/llvm/lib/Target/M68k/M68kInstrCompiler.td b/llvm/lib/Target/M68k/M68kInstrCompiler.td
index bcb815dbc4eb..8fb331dec0e9 100644
--- a/llvm/lib/Target/M68k/M68kInstrCompiler.td
+++ b/llvm/lib/Target/M68k/M68kInstrCompiler.td
@@ -23,15 +23,15 @@ def : Pat<(i32 (MxWrapper tjumptable :$src)), (MOV32ri tjumptable :$src)>;
def : Pat<(i32 (MxWrapper tblockaddress :$src)), (MOV32ri tblockaddress :$src)>;
def : Pat<(add MxDRD32:$src, (MxWrapper tconstpool:$opd)),
- (ADD32ri MxDRD32:$src, tconstpool:$opd)>;
+ (ADD32di MxDRD32:$src, tconstpool:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper tjumptable:$opd)),
- (ADD32ri MxARD32:$src, tjumptable:$opd)>;
+ (ADD32ai MxARD32:$src, tjumptable:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper tglobaladdr :$opd)),
- (ADD32ri MxARD32:$src, tglobaladdr:$opd)>;
+ (ADD32ai MxARD32:$src, tglobaladdr:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper texternalsym:$opd)),
- (ADD32ri MxARD32:$src, texternalsym:$opd)>;
+ (ADD32ai MxARD32:$src, texternalsym:$opd)>;
def : Pat<(add MxARD32:$src, (MxWrapper tblockaddress:$opd)),
- (ADD32ri MxARD32:$src, tblockaddress:$opd)>;
+ (ADD32ai MxARD32:$src, tblockaddress:$opd)>;
def : Pat<(store (i32 (MxWrapper tglobaladdr:$src)), iPTR:$dst),
(MOV32ji MxARI32:$dst, tglobaladdr:$src)>;
diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td
index 1d950bd0377a..99b7ffd17971 100644
--- a/llvm/lib/Target/M68k/M68kInstrFormats.td
+++ b/llvm/lib/Target/M68k/M68kInstrFormats.td
@@ -250,7 +250,7 @@ def MxOpMode16dEA : MxEncOpMode<MxBead3Bits<0b001>>;
def MxOpMode32dEA : MxEncOpMode<MxBead3Bits<0b010>>;
// op EA, An
-def MxOpMode16aEA : MxEncOpMode<MxBead3Bits<0b110>>;
+def MxOpMode16aEA : MxEncOpMode<MxBead3Bits<0b011>>;
def MxOpMode32aEA : MxEncOpMode<MxBead3Bits<0b111>>;
// op EA, Rn
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index 0eddd8ce5f4c..639bcd455687 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -24,8 +24,8 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include <functional>
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.h b/llvm/lib/Target/M68k/M68kInstrInfo.h
index a503b02c5a82..6aced1487365 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.h
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.h
@@ -173,7 +173,7 @@ static inline unsigned IsCMP(unsigned Op) {
case M68k::CMP8di:
case M68k::CMP8dj:
case M68k::CMP8dp:
- case M68k::CMP16dd:
+ case M68k::CMP16dr:
case M68k::CMP16df:
case M68k::CMP16di:
case M68k::CMP16dj:
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td
index e743213830de..ed6cd9ecf442 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -165,12 +165,23 @@ def MxSize8 : MxSize<8, "b", "byte">;
def MxSize16 : MxSize<16, "w", "word">;
def MxSize32 : MxSize<32, "l", "long">;
-class MxOpClass<string name> : AsmOperandClass {
+class MxOpClass<string name,
+ list<AsmOperandClass> superClasses = []> : AsmOperandClass {
let Name = name;
let ParserMethod = "parseMemOp";
+ let SuperClasses = superClasses;
}
def MxRegClass : MxOpClass<"Reg">;
+// Splitting asm register class to avoid ambiguous on operands'
+// MatchClassKind. For instance, without this separation,
+// both ADD32dd and ADD32dr has {MCK_RegClass, MCK_RegClass} for
+// their operands, which makes AsmParser unable to pick the correct
+// one in a deterministic way.
+let RenderMethod = "addRegOperands", SuperClasses = [MxRegClass]in {
+ def MxARegClass : MxOpClass<"AReg">;
+ def MxDRegClass : MxOpClass<"DReg">;
+}
class MxOperand<ValueType vt, MxSize size, string letter, RegisterClass rc, dag pat = (null_frag)> {
ValueType VT = vt;
@@ -200,20 +211,24 @@ def MxXRD32_TC : MxRegOp<i32, XR32_TC, MxSize32, "r">;
// DATA REGISTER DIRECT. The operand is in the data register specified by
// the effective address register field.
-def MxDRD8 : MxRegOp<i8, DR8, MxSize8, "d">;
-def MxDRD16 : MxRegOp<i16, DR16, MxSize16, "d">;
-def MxDRD32 : MxRegOp<i32, DR32, MxSize32, "d">;
+let ParserMatchClass = MxDRegClass in {
+ def MxDRD8 : MxRegOp<i8, DR8, MxSize8, "d">;
+ def MxDRD16 : MxRegOp<i16, DR16, MxSize16, "d">;
+ def MxDRD32 : MxRegOp<i32, DR32, MxSize32, "d">;
-def MxDRD16_TC : MxRegOp<i16, DR16_TC, MxSize16, "d">;
-def MxDRD32_TC : MxRegOp<i32, DR32_TC, MxSize32, "d">;
+ def MxDRD16_TC : MxRegOp<i16, DR16_TC, MxSize16, "d">;
+ def MxDRD32_TC : MxRegOp<i32, DR32_TC, MxSize32, "d">;
+}
// ADDRESS REGISTER DIRECT. The operand is in the address register specified by
// the effective address register field.
-def MxARD16 : MxRegOp<i16, AR16, MxSize16, "a">;
-def MxARD32 : MxRegOp<i32, AR32, MxSize32, "a">;
+let ParserMatchClass = MxARegClass in {
+ def MxARD16 : MxRegOp<i16, AR16, MxSize16, "a">;
+ def MxARD32 : MxRegOp<i32, AR32, MxSize32, "a">;
-def MxARD16_TC : MxRegOp<i16, AR16_TC, MxSize16, "a">;
-def MxARD32_TC : MxRegOp<i32, AR32_TC, MxSize32, "a">;
+ def MxARD16_TC : MxRegOp<i16, AR16_TC, MxSize16, "a">;
+ def MxARD32_TC : MxRegOp<i32, AR32_TC, MxSize32, "a">;
+}
class MxMemOp<dag ops, MxSize size, string letter,
string printMethod = "printOperand",
@@ -304,9 +319,17 @@ def MxARII32_TC : MxMemOp<(ops i8imm, AR32_TC, XR32_TC), MxSize32, "f", "printA
// extended before it is used. The reference is classified as a data reference
// with the exception of the jump and jump-tosubroutine instructions.
def MxAddr : MxOpClass<"Addr">;
-def MxAS8 : MxMemOp<(ops OtherVT), MxSize8, "B", "printAS8Mem", MxAddr>;
-def MxAS16 : MxMemOp<(ops OtherVT), MxSize16, "B", "printAS16Mem", MxAddr>;
-def MxAS32 : MxMemOp<(ops OtherVT), MxSize32, "B", "printAS32Mem", MxAddr>;
+let RenderMethod = "addAddrOperands" in {
+ // This hierarchy ensures Addr8 will always be parsed
+ // before other larger-width variants.
+ def MxAddr32 : MxOpClass<"Addr32", [MxAddr]>;
+ def MxAddr16 : MxOpClass<"Addr16", [MxAddr32]>;
+ def MxAddr8 : MxOpClass<"Addr8", [MxAddr16]>;
+}
+
+def MxAS8 : MxMemOp<(ops OtherVT), MxSize8, "B", "printAS8Mem", MxAddr8>;
+def MxAS16 : MxMemOp<(ops OtherVT), MxSize16, "B", "printAS16Mem", MxAddr16>;
+def MxAS32 : MxMemOp<(ops OtherVT), MxSize32, "B", "printAS32Mem", MxAddr32>;
// ABSOLUTE LONG ADDRESS. This addressing mode requires two words of extension.
// The address of the operand is developed by the concatenation of the extension
@@ -314,9 +337,9 @@ def MxAS32 : MxMemOp<(ops OtherVT), MxSize32, "B", "printAS32Mem", MxAddr>;
// order part of the address is the second extension word. The reference is
// classified as a data reference with the exception of the jump and jump
// to-subroutine instructions.
-def MxAL8 : MxMemOp<(ops OtherVT), MxSize8, "b", "printAL8Mem", MxAddr>;
-def MxAL16 : MxMemOp<(ops OtherVT), MxSize16, "b", "printAL16Mem", MxAddr>;
-def MxAL32 : MxMemOp<(ops OtherVT), MxSize32, "b", "printAL32Mem", MxAddr>;
+def MxAL8 : MxMemOp<(ops OtherVT), MxSize8, "b", "printAL8Mem", MxAddr8>;
+def MxAL16 : MxMemOp<(ops OtherVT), MxSize16, "b", "printAL16Mem", MxAddr16>;
+def MxAL32 : MxMemOp<(ops OtherVT), MxSize32, "b", "printAL32Mem", MxAddr32>;
def MxPCD : MxOpClass<"PCD">;
def MxPCI : MxOpClass<"PCI">;
@@ -370,21 +393,22 @@ def Mxi16imm : MxOp<i16, MxSize16, "i">;
def Mxi32imm : MxOp<i32, MxSize32, "i">;
} // OPERAND_IMMEDIATE
-let OperandType = "OPERAND_PCREL",
- ParserMatchClass = MxAddr,
- PrintMethod = "printPCRelImm" in {
-
+class MxBrTargetOperand<int N> : Operand<OtherVT> {
+ let OperandType = "OPERAND_PCREL";
+ let PrintMethod = "printPCRelImm";
+ let ParserMatchClass = !cast<AsmOperandClass>("MxAddr"#N);
+}
// Branch targets have OtherVT type and print as pc-relative values.
-def MxBrTarget8 : Operand<OtherVT>;
-def MxBrTarget16 : Operand<OtherVT>;
-def MxBrTarget32 : Operand<OtherVT>;
-
-} // OPERAND_PCREL
+def MxBrTarget8 : MxBrTargetOperand<8>;
+def MxBrTarget16 : MxBrTargetOperand<16>;
+def MxBrTarget32 : MxBrTargetOperand<32>;
// Used with MOVEM
+def MxMoveMaskClass : MxOpClass<"MoveMask">;
def MxMoveMask : MxOp<i16, MxSize16, "m"> {
let OperandType = "OPERAND_IMMEDIATE";
let PrintMethod = "printMoveMask";
+ let ParserMatchClass = MxMoveMaskClass;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.cpp b/llvm/lib/Target/M68k/M68kSubtarget.cpp
index 963e83cfbb07..991889706e67 100644
--- a/llvm/lib/Target/M68k/M68kSubtarget.cpp
+++ b/llvm/lib/Target/M68k/M68kSubtarget.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "M68kSubtarget.h"
-#include "GlSel/M68kCallLowering.h"
-#include "GlSel/M68kLegalizerInfo.h"
-#include "GlSel/M68kRegisterBankInfo.h"
+#include "GISel/M68kCallLowering.h"
+#include "GISel/M68kLegalizerInfo.h"
+#include "GISel/M68kRegisterBankInfo.h"
#include "M68k.h"
#include "M68kMachineFunction.h"
@@ -24,9 +24,9 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
index 5b8fd3d41b14..e8126c6219e8 100644
--- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp
+++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
@@ -24,8 +24,8 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/PassRegistry.h"
-#include "llvm/Support/TargetRegistry.h"
#include <memory>
using namespace llvm;
@@ -49,10 +49,14 @@ std::string computeDataLayout(const Triple &TT, StringRef CPU,
// FIXME how to wire it with the used object format?
Ret += "-m:e";
- // M68k pointers are always 32 bit wide even for 16 bit cpus
- Ret += "-p:32:32";
+ // M68k pointers are always 32 bit wide even for 16-bit CPUs.
+ // The ABI only specifies 16-bit alignment.
+ // On at least the 68020+ with a 32-bit bus, there is a performance benefit
+ // to having 32-bit alignment.
+ Ret += "-p:32:16:32";
- // M68k requires i8 to align on 2 byte boundry
+ // Bytes do not require special alignment, words are word aligned and
+ // long words are word aligned at minimum.
Ret += "-i8:8:8-i16:16:16-i32:16:32";
// FIXME no floats at the moment
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
index 8a0f32b58da4..c1f88fb78ee1 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp
@@ -29,9 +29,9 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -82,7 +82,8 @@ public:
/// Write a sequence of optimal nops to the output, covering \p Count bytes.
/// \return - true on success, false on failure
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
} // end anonymous namespace
@@ -200,7 +201,8 @@ void M68kAsmBackend::relaxInstruction(MCInst &Inst,
Inst.setOpcode(RelaxedOp);
}
-bool M68kAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool M68kAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// Cannot emit NOP with size being not multiple of 16 bits.
if (Count % 2 != 0)
return false;
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
index e5f5909b5d79..a2e41437ee21 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
@@ -109,7 +109,7 @@ void M68kInstPrinter::printMoveMask(const MCInst *MI, unsigned opNum,
// Print separation comma only if
// both data & register parts have bit(s) set
if (s != 0 && (Mask & 0xFF) && HalfMask)
- O << ',';
+ O << '/';
for (int i = 0; HalfMask; ++i) {
if ((HalfMask >> i) & 0b1) {
@@ -130,7 +130,7 @@ void M68kInstPrinter::printMoveMask(const MCInst *MI, unsigned opNum,
i = j;
if (HalfMask)
- O << ',';
+ O << '/';
}
}
}
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
index 0a438ea042be..9f4db895a821 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp
@@ -23,10 +23,10 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
index 5f08b9044b4e..2a225b8a43cd 100644
--- a/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
+++ b/llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp
@@ -10,7 +10,7 @@
/// This file contains M68k target initializer.
///
//===----------------------------------------------------------------------===//
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 4bad0368505a..c1677baf52a7 100644
--- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -23,9 +23,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#define DEBUG_TYPE "msp430-asm-parser"
diff --git a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
index d2902189ec40..9bbb2938ab75 100644
--- a/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
+++ b/llvm/lib/Target/MSP430/Disassembler/MSP430Disassembler.cpp
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MSP430.h"
#include "MCTargetDesc/MSP430MCTargetDesc.h"
+#include "MSP430.h"
#include "TargetInfo/MSP430TargetInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -19,8 +19,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 071e1484196b..953916776c57 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -90,7 +90,8 @@ public:
return Infos[Kind - FirstTargetFixupKind];
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
uint64_t MSP430AsmBackend::adjustFixupValue(const MCFixup &Fixup,
@@ -147,7 +148,8 @@ void MSP430AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
}
}
-bool MSP430AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool MSP430AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
if ((Count % 2) != 0)
return false;
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
index 87ee312424c8..087045ccb1df 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFStreamer.cpp
@@ -17,8 +17,10 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MSP430Attributes.h"
using namespace llvm;
+using namespace llvm::MSP430Attrs;
namespace llvm {
@@ -54,15 +56,14 @@ MSP430TargetELFStreamer::MSP430TargetELFStreamer(MCStreamer &S,
Streamer.emitInt8(1);
// Attribute vector length.
Streamer.emitInt32(11);
- // OFBA_MSPABI_Tag_ISA(4) = 1, MSP430
- Streamer.emitInt8(4);
- Streamer.emitInt8(1);
- // OFBA_MSPABI_Tag_Code_Model(6) = 1, Small
- Streamer.emitInt8(6);
- Streamer.emitInt8(1);
- // OFBA_MSPABI_Tag_Data_Model(8) = 1, Small
- Streamer.emitInt8(8);
- Streamer.emitInt8(1);
+
+ Streamer.emitInt8(TagISA);
+ Streamer.emitInt8(STI.hasFeature(MSP430::FeatureX) ? ISAMSP430X : ISAMSP430);
+ Streamer.emitInt8(TagCodeModel);
+ Streamer.emitInt8(CMSmall);
+ Streamer.emitInt8(TagDataModel);
+ Streamer.emitInt8(DMSmall);
+ // Don't emit TagEnumSize, for full GCC compatibility.
}
MCELFStreamer &MSP430TargetELFStreamer::getStreamer() {
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index c352ea563454..3f006056955d 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -17,7 +17,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 459188434f2c..8eb3fbd58328 100644
--- a/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -32,7 +32,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index 4be8d0760e68..a83a5d2dfcc9 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -71,9 +71,8 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(MSP430::SP);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
- I != E; ++I)
- I->addLiveIn(MSP430::R4);
+ for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF))
+ MBBJ.addLiveIn(MSP430::R4);
} else
NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
diff --git a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 7dabb9b4abae..abd48dfd5139 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -304,13 +304,11 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) {
switch (VT.getSimpleVT().SimpleTy) {
case MVT::i8:
- // Sanity check
if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
return false;
break;
case MVT::i16:
- // Sanity check
if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
return false;
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 9c6d44bf92de..c64a44a0ef95 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -670,7 +670,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
InVals.push_back(ArgValue);
}
} else {
- // Sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
SDValue InVal;
@@ -1150,7 +1150,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// lowering & isel wouldn't diverge.
bool andCC = false;
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- if (RHSC->isNullValue() && LHS.hasOneUse() &&
+ if (RHSC->isZero() && LHS.hasOneUse() &&
(LHS.getOpcode() == ISD::AND ||
(LHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getOpcode() == ISD::AND))) {
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index 130211878be1..e9e26e295fd5 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -18,8 +18,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -116,6 +116,7 @@ unsigned MSP430InstrInfo::removeBranch(MachineBasicBlock &MBB,
continue;
if (I->getOpcode() != MSP430::JMP &&
I->getOpcode() != MSP430::JCC &&
+ I->getOpcode() != MSP430::Bi &&
I->getOpcode() != MSP430::Br &&
I->getOpcode() != MSP430::Bm)
break;
@@ -189,7 +190,7 @@ bool MSP430InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return true;
// Handle unconditional branches.
- if (I->getOpcode() == MSP430::JMP) {
+ if (I->getOpcode() == MSP430::JMP || I->getOpcode() == MSP430::Bi) {
if (!AllowModify) {
TBB = I->getOperand(0).getMBB();
continue;
diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index 5a117404d772..2fd58717c4db 100644
--- a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -12,7 +12,7 @@
#include "MSP430Subtarget.h"
#include "MSP430.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index 827f24daad16..a33146ce2239 100644
--- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -18,7 +18,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430Target() {
@@ -81,5 +81,5 @@ bool MSP430PassConfig::addInstSelector() {
void MSP430PassConfig::addPreEmitPass() {
// Must run branch selection immediately preceding the asm printer.
- addPass(createMSP430BranchSelectionPass(), false);
+ addPass(createMSP430BranchSelectionPass());
}
diff --git a/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index 9d4a8f141cc4..fc2b38f41c14 100644
--- a/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/MSP430TargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheMSP430Target() {
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index e4d61f8c210e..01b5dff2e448 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -48,7 +49,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 6f197e424561..9a66dd77c0d3 100644
--- a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -20,11 +20,11 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -455,14 +455,6 @@ static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
template <typename InsnType>
-static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn,
- uint64_t Address, const void *Decoder);
-
-template <typename InsnType>
-static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
- const void *Decoder);
-
-template <typename InsnType>
static DecodeStatus
DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 94d338746a6c..bfe413a152b6 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -518,7 +518,8 @@ getFixupKindInfo(MCFixupKind Kind) const {
/// it should return an error.
///
/// \return - True on success.
-bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool MipsAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
// Check for a less than instruction size number of bytes
// FIXME: 16 bit instructions are not handled yet here.
// We shouldn't be using a hard coded number for instruction size.
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 16c7befb2670..5a0da3bc49bf 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -63,7 +63,8 @@ public:
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target) override;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 454f79926dd0..6fc8fcb482cd 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -29,9 +29,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 232d0eb33164..57cd016da4dc 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -902,7 +902,7 @@ void MipsTargetELFStreamer::finish() {
if (Alignment) {
OS.SwitchSection(&Section);
if (Section.UseCodeAlign())
- OS.emitCodeAlignment(Alignment, Alignment);
+ OS.emitCodeAlignment(Alignment, &STI, Alignment);
else
OS.emitValueToAlignment(Alignment, 0, 1, Alignment);
}
diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td b/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
index da8a06b0cff8..00ac9bf99c92 100644
--- a/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/llvm/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -958,7 +958,7 @@ class POOL32A_DVPEVP_FM_MMR6<string instr_asm, bits<10> funct>
let Inst{5-0} = 0b111100;
}
-class CMP_BRANCH_OFF21_FM_MMR6<string opstr, bits<6> funct> : MipsR6Inst {
+class CMP_BRANCH_OFF21_FM_MMR6<bits<6> funct> : MipsR6Inst {
bits<5> rs;
bits<21> offset;
diff --git a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 832124cb3f57..b1a05388884b 100644
--- a/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -62,8 +62,8 @@ class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>;
class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>;
class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
-class BEQZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"beqzc", 0b100000>;
-class BNEZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"bnezc", 0b101000>;
+class BEQZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<0b100000>;
+class BNEZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<0b101000>;
class BGEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgec", 0b111101>,
DecodeDisambiguates<"POP75GroupBranchMMR6">;
class BGEUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgeuc", 0b110000>,
@@ -406,7 +406,7 @@ class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>;
class BRK_MMR6_DESC : BRK_FT<"break">;
class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
- RegisterOperand GPROpnd, InstrItinClass Itin>
+ InstrItinClass Itin>
: MMR6Arch<instr_asm> {
dag OutOperandList = (outs);
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
@@ -416,10 +416,8 @@ class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
InstrItinClass Itinerary = Itin;
}
-class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd,
- II_CACHE>;
-class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd,
- II_PREF>;
+class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, II_CACHE>;
+class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, II_PREF>;
class LB_LBU_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
RegisterOperand GPROpnd, InstrItinClass Itin>
@@ -1197,21 +1195,21 @@ class SWM16_MMR6_DESC
ComplexPattern Addr = addr;
}
-class SB16_MMR6_DESC_BASE<string opstr, DAGOperand RTOpnd, DAGOperand RO,
- SDPatternOperator OpNode, InstrItinClass Itin,
- Operand MemOpnd>
+class SB16_MMR6_DESC_BASE<string opstr, DAGOperand RTOpnd,
+ InstrItinClass Itin, Operand MemOpnd>
: MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>,
MMR6Arch<opstr> {
let DecoderMethod = "DecodeMemMMImm4";
let mayStore = 1;
}
-class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd,
- truncstorei8, II_SB, mem_mm_4>;
-class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd,
- truncstorei16, II_SH, mem_mm_4_lsl1>;
-class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd,
- store, II_SW, mem_mm_4_lsl2>;
+
+class SB16_MMR6_DESC
+ : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, II_SB, mem_mm_4>;
+class SH16_MMR6_DESC
+ : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, II_SH, mem_mm_4_lsl1>;
+class SW16_MMR6_DESC
+ : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, II_SW, mem_mm_4_lsl2>;
class SWSP_MMR6_DESC
: MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset),
diff --git a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
index 9a1e47e5ecca..8950de230a01 100644
--- a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -281,57 +281,46 @@ class SHRLV_PH_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
class SHRLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
"shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>;
-class EXT_MM_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXT_MM_2R_DESC_BASE<string instr_asm> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $rs");
- InstrItinClass Itinerary = itin;
+ InstrItinClass Itinerary = NoItinerary;
}
-class EXT_MM_1R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXT_MM_1R_DESC_BASE<string instr_asm> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$imm);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $imm");
- InstrItinClass Itinerary = itin;
+ InstrItinClass Itinerary = NoItinerary;
}
-class EXTP_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPDP_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTPDPV_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extpdpv", MipsEXTPDP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTPV_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
- Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTR_W_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTR_R_W_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr_r.w", MipsEXTR_R_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTR_RS_W_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTR_S_H_MM_DESC
- : EXT_MM_1R_DESC_BASE<"extr_s.h", MipsEXTR_S_H, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_W_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv.w", MipsEXTR_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_R_W_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_RS_W_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, NoItinerary>,
- Defs<[DSPOutFlag23]>;
-class EXTRV_S_H_MM_DESC
- : EXT_MM_2R_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>,
- Defs<[DSPOutFlag23]>;
+class EXTP_MM_DESC : EXT_MM_1R_DESC_BASE<"extp">,
+ Uses<[DSPPos]>,
+ Defs<[DSPEFI]>;
+class EXTPDP_MM_DESC : EXT_MM_1R_DESC_BASE<"extpdp">,
+ Uses<[DSPPos]>,
+ Defs<[DSPPos, DSPEFI]>;
+class EXTPDPV_MM_DESC : EXT_MM_2R_DESC_BASE<"extpdpv">,
+ Uses<[DSPPos]>,
+ Defs<[DSPPos, DSPEFI]>;
+class EXTPV_MM_DESC : EXT_MM_2R_DESC_BASE<"extpv">,
+ Uses<[DSPPos]>,
+ Defs<[DSPEFI]>;
+class EXTR_W_MM_DESC : EXT_MM_1R_DESC_BASE<"extr.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTR_R_W_MM_DESC : EXT_MM_1R_DESC_BASE<"extr_r.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTR_RS_W_MM_DESC : EXT_MM_1R_DESC_BASE<"extr_rs.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTR_S_H_MM_DESC : EXT_MM_1R_DESC_BASE<"extr_s.h">,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_W_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv.w">, Defs<[DSPOutFlag23]>;
+class EXTRV_R_W_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv_r.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_RS_W_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv_rs.w">,
+ Defs<[DSPOutFlag23]>;
+class EXTRV_S_H_MM_DESC : EXT_MM_2R_DESC_BASE<"extrv_s.h">,
+ Defs<[DSPOutFlag23]>;
class MFHI_MM_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
InstrItinClass itin> {
diff --git a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index 269ad8b548a4..5f6354e19ebc 100644
--- a/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -195,8 +195,7 @@ def simm23_lsl2 : Operand<i32> {
let DecoderMethod = "DecodeSimm23Lsl2";
}
-class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
- RegisterOperand RO> :
+class CompactBranchMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
InstSE<(outs), (ins RO:$rs, opnd:$offset),
!strconcat(opstr, "\t$rs, $offset"), [], II_BCCZC, FrmI> {
let isBranch = 1;
@@ -240,7 +239,7 @@ MicroMipsInst16<(outs RO1:$rd1, RO2:$rd2), (ins RO3:$rs, RO3:$rt),
let DecoderMethod = "DecodeMovePOperands";
}
-class StorePairMM<string opstr, ComplexPattern Addr = addr>
+class StorePairMM<string opstr>
: InstSE<(outs), (ins GPR32Opnd:$rt, GPR32Opnd:$rt2, mem_simm12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], II_SWP, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
@@ -248,7 +247,7 @@ class StorePairMM<string opstr, ComplexPattern Addr = addr>
let AsmMatchConverter = "ConvertXWPOperands";
}
-class LoadPairMM<string opstr, ComplexPattern Addr = addr>
+class LoadPairMM<string opstr>
: InstSE<(outs GPR32Opnd:$rt, GPR32Opnd:$rt2), (ins mem_simm12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], II_LWP, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
@@ -332,7 +331,7 @@ class ShiftIMM16<string opstr, Operand ImmOpnd, RegisterOperand RO,
MicroMipsInst16<(outs RO:$rd), (ins RO:$rt, ImmOpnd:$shamt),
!strconcat(opstr, "\t$rd, $rt, $shamt"), [], Itin, FrmR>;
-class LoadMM16<string opstr, DAGOperand RO, SDPatternOperator OpNode,
+class LoadMM16<string opstr, DAGOperand RO,
InstrItinClass Itin, Operand MemOpnd> :
MicroMipsInst16<(outs RO:$rt), (ins MemOpnd:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
@@ -341,8 +340,7 @@ class LoadMM16<string opstr, DAGOperand RO, SDPatternOperator OpNode,
let mayLoad = 1;
}
-class StoreMM16<string opstr, DAGOperand RTOpnd, DAGOperand RO,
- SDPatternOperator OpNode, InstrItinClass Itin,
+class StoreMM16<string opstr, DAGOperand RTOpnd, InstrItinClass Itin,
Operand MemOpnd> :
MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
@@ -499,8 +497,7 @@ let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
!strconcat(opstr, "\t$rs, $offset"), [], II_BCCZALS, FrmI, opstr>;
}
-class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
- SDPatternOperator OpNode = null_frag> :
+class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO> :
InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
!strconcat(opstr, "\t$rd, ${index}(${base})"), [], II_LWXS, FrmFI>;
@@ -540,34 +537,28 @@ def reglist16 : Operand<i32> {
let ParserMatchClass = RegList16AsmOperand;
}
-class StoreMultMM<string opstr,
- InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+class StoreMultMM<string opstr, InstrItinClass Itin> :
InstSE<(outs), (ins reglist:$rt, mem_mm_12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
let mayStore = 1;
}
-class LoadMultMM<string opstr,
- InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+class LoadMultMM<string opstr, InstrItinClass Itin> :
InstSE<(outs reglist:$rt), (ins mem_mm_12:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
let DecoderMethod = "DecodeMemMMImm12";
let mayLoad = 1;
}
-class StoreMultMM16<string opstr,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> :
+class StoreMultMM16<string opstr, InstrItinClass Itin> :
MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
let mayStore = 1;
}
-class LoadMultMM16<string opstr,
- InstrItinClass Itin = NoItinerary,
- ComplexPattern Addr = addr> :
+class LoadMultMM16<string opstr, InstrItinClass Itin> :
MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
!strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
@@ -636,21 +627,21 @@ let FastISelShouldIgnore = 1 in {
def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
LOGIC_FM_MM16<0x1>, ISA_MICROMIPS32_NOT_MIPS32R6;
}
-def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU,
- mem_mm_4>, LOAD_STORE_FM_MM16<0x02>, ISA_MICROMIPS;
-def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU,
- mem_mm_4_lsl1>, LOAD_STORE_FM_MM16<0x0a>, ISA_MICROMIPS;
-def LW16_MM : LoadMM16<"lw16", GPRMM16Opnd, load, II_LW, mem_mm_4_lsl2>,
+def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, II_LBU, mem_mm_4>,
+ LOAD_STORE_FM_MM16<0x02>, ISA_MICROMIPS;
+def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, II_LHU, mem_mm_4_lsl1>,
+ LOAD_STORE_FM_MM16<0x0a>, ISA_MICROMIPS;
+def LW16_MM : LoadMM16<"lw16", GPRMM16Opnd, II_LW, mem_mm_4_lsl2>,
LOAD_STORE_FM_MM16<0x1a>, ISA_MICROMIPS;
-def SB16_MM : StoreMM16<"sb16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei8,
- II_SB, mem_mm_4>, LOAD_STORE_FM_MM16<0x22>,
- ISA_MICROMIPS32_NOT_MIPS32R6;
-def SH16_MM : StoreMM16<"sh16", GPRMM16OpndZero, GPRMM16Opnd, truncstorei16,
- II_SH, mem_mm_4_lsl1>,
- LOAD_STORE_FM_MM16<0x2a>, ISA_MICROMIPS32_NOT_MIPS32R6;
-def SW16_MM : StoreMM16<"sw16", GPRMM16OpndZero, GPRMM16Opnd, store, II_SW,
- mem_mm_4_lsl2>, LOAD_STORE_FM_MM16<0x3a>,
- ISA_MICROMIPS32_NOT_MIPS32R6;
+def SB16_MM : StoreMM16<"sb16", GPRMM16OpndZero, II_SB, mem_mm_4>,
+ LOAD_STORE_FM_MM16<0x22>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+def SH16_MM : StoreMM16<"sh16", GPRMM16OpndZero, II_SH, mem_mm_4_lsl1>,
+ LOAD_STORE_FM_MM16<0x2a>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
+def SW16_MM : StoreMM16<"sw16", GPRMM16OpndZero, II_SW, mem_mm_4_lsl2>,
+ LOAD_STORE_FM_MM16<0x3a>,
+ ISA_MICROMIPS32_NOT_MIPS32R6;
def LWGP_MM : LoadGPMM16<"lw", GPRMM16Opnd, II_LW, mem_mm_gp_simm7_lsl2>,
LOAD_GP_FM_MM16<0x19>, ISA_MICROMIPS;
def LWSP_MM : LoadSPMM16<"lw", GPR32Opnd, II_LW, mem_mm_sp_imm5_lsl2>,
@@ -713,9 +704,9 @@ let DecoderNamespace = "MicroMips" in {
POOL32A_CFTC2_FM_MM<0b1101110100>, ISA_MICROMIPS;
/// Compact Branch Instructions
- def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, seteq, GPR32Opnd>,
+ def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, GPR32Opnd>,
COMPACT_BRANCH_FM_MM<0x7>, ISA_MICROMIPS32_NOT_MIPS32R6;
- def BNEZC_MM : CompactBranchMM<"bnezc", brtarget_mm, setne, GPR32Opnd>,
+ def BNEZC_MM : CompactBranchMM<"bnezc", brtarget_mm, GPR32Opnd>,
COMPACT_BRANCH_FM_MM<0x5>, ISA_MICROMIPS32_NOT_MIPS32R6;
/// Arithmetic Instructions (ALU Immediate)
diff --git a/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index 6c5f63804d19..203e05dde7ad 100644
--- a/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -408,12 +408,9 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
// during call setup, the proper call lowering to the helper
// functions will take place.
//
- A = A.addAttribute(C, AttributeList::FunctionIndex,
- "__Mips16RetHelper");
- A = A.addAttribute(C, AttributeList::FunctionIndex,
- Attribute::ReadNone);
- A = A.addAttribute(C, AttributeList::FunctionIndex,
- Attribute::NoInline);
+ A = A.addFnAttribute(C, "__Mips16RetHelper");
+ A = A.addFnAttribute(C, Attribute::ReadNone);
+ A = A.addFnAttribute(C, Attribute::NoInline);
FunctionCallee F = (M->getOrInsertFunction(Name, A, MyVoid, T));
CallInst::Create(F, Params, "", &I);
} else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
@@ -485,11 +482,11 @@ static void removeUseSoftFloat(Function &F) {
AttrBuilder B;
LLVM_DEBUG(errs() << "removing -use-soft-float\n");
B.addAttribute("use-soft-float", "false");
- F.removeAttributes(AttributeList::FunctionIndex, B);
+ F.removeFnAttrs(B);
if (F.hasFnAttribute("use-soft-float")) {
LLVM_DEBUG(errs() << "still has -use-soft-float\n");
}
- F.addAttributes(AttributeList::FunctionIndex, B);
+ F.addFnAttrs(B);
}
// This pass only makes sense when the underlying chip has floating point but
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.td b/llvm/lib/Target/Mips/Mips16InstrInfo.td
index 990202b23bc0..3410fcd85fdc 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.td
@@ -304,14 +304,14 @@ class FI8_MOV32R16_ins<string asmstr, InstrItinClass itin>:
//
// MULT
//
-class FMULT16_ins<string asmstr, InstrItinClass itin> :
+class FMULT16_ins<string asmstr> :
MipsPseudo16<(outs), (ins CPU16Regs:$rx, CPU16Regs:$ry),
!strconcat(asmstr, "\t$rx, $ry"), []>;
//
// MULT-LO
//
-class FMULT16_LO_ins<string asmstr, InstrItinClass itin> :
+class FMULT16_LO_ins<string asmstr> :
MipsPseudo16<(outs CPU16Regs:$rz), (ins CPU16Regs:$rx, CPU16Regs:$ry),
!strconcat(asmstr, "\t$rx, $ry\n\tmflo\t$rz"), []> {
let isCodeGenOnly=1;
@@ -895,13 +895,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIM16Alu> {
//
// Pseudo Instruction for mult
//
-def MultRxRy16: FMULT16_ins<"mult", IIM16Alu> {
+def MultRxRy16: FMULT16_ins<"mult"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
}
-def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> {
+def MultuRxRy16: FMULT16_ins<"multu"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -912,7 +912,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> {
// Purpose: Multiply Word
// To multiply 32-bit signed integers.
//
-def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> {
+def MultRxRyRz16: FMULT16_LO_ins<"mult"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
@@ -923,7 +923,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> {
// Purpose: Multiply Unsigned Word
// To multiply 32-bit unsigned integers.
//
-def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIM16Alu> {
+def MultuRxRyRz16: FMULT16_LO_ins<"multu"> {
let isCommutable = 1;
let hasSideEffects = 0;
let Defs = [HI0, LO0];
diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index 9607d008bc97..192d0013d89c 100644
--- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -700,8 +700,7 @@ class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd, II_RINT_D>;
class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd, II_CLASS_S>;
class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd, II_CLASS_D>;
-class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
- RegisterOperand GPROpnd, InstrItinClass itin>
+class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd, InstrItinClass itin>
: MipsR6Arch<instr_asm> {
dag OutOperandList = (outs);
dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
@@ -711,8 +710,8 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
InstrItinClass Itinerary = itin;
}
-class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd, II_CACHE>;
-class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, GPR32Opnd, II_PREF>;
+class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, II_CACHE>;
+class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, II_PREF>;
class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd,
InstrItinClass itin> {
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index b460bc71b11f..6d3f3adb2b7a 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -52,9 +52,9 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -1203,7 +1203,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) {
// LD RA, 8(SP)
// DADDIU SP, SP, 16
//
- OutStreamer->emitCodeAlignment(4);
+ OutStreamer->emitCodeAlignment(4, &getSubtargetInfo());
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 5c2549ee176b..f6ec34c7f403 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
MipsCallLowering::MipsCallLowering(const MipsTargetLowering &TLI)
: CallLowering(&TLI) {}
+namespace {
struct MipsOutgoingValueAssigner : public CallLowering::OutgoingValueAssigner {
/// This is the name of the function being called
/// FIXME: Relying on this is unsound
@@ -80,7 +81,6 @@ struct MipsIncomingValueAssigner : public CallLowering::IncomingValueAssigner {
}
};
-namespace {
class MipsIncomingValueHandler : public CallLowering::IncomingValueHandler {
const MipsSubtarget &STI;
@@ -92,7 +92,7 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
@@ -101,7 +101,8 @@ private:
MachinePointerInfo &MPO, CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override;
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk = nullptr) override;
virtual void markPhysRegUsed(unsigned PhysReg) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
@@ -127,7 +128,7 @@ private:
void MipsIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
@@ -163,7 +164,8 @@ void MipsIncomingValueHandler::assignValueToAddress(Register ValVReg,
/// dependent on other arguments.
unsigned
MipsIncomingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];
@@ -197,7 +199,7 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
@@ -206,7 +208,8 @@ private:
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) override;
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) override;
MachineInstrBuilder &MIB;
};
@@ -214,7 +217,7 @@ private:
void MipsOutgoingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
@@ -253,7 +256,8 @@ void MipsOutgoingValueHandler::assignValueToAddress(Register ValVReg,
unsigned
MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
- ArrayRef<CCValAssign> VAs) {
+ ArrayRef<CCValAssign> VAs,
+ std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];
@@ -271,6 +275,15 @@ MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
if (!STI.isLittle())
std::swap(Lo, Hi);
+ // If we can return a thunk, just include the register copies. The unmerge can
+ // be emitted earlier.
+ if (Thunk) {
+ *Thunk = [=]() {
+ MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
+ MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
+ };
+ return 2;
+ }
MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
return 2;
diff --git a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index 727d47d06ad4..dd0b48573ef6 100644
--- a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -398,8 +398,7 @@ class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string BaseOpcode = instr_asm;
}
-class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, InstrItinClass itin> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
@@ -407,8 +406,7 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
string BaseOpcode = instr_asm;
}
-class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
- InstrItinClass itin> {
+class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, InstrItinClass itin> {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$shift_rs);
string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
@@ -522,7 +520,7 @@ class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO,
bit isMoveReg = 1;
}
-class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
+class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode> :
MipsPseudo<(outs GPR32Opnd:$dst), (ins), [(set GPR32Opnd:$dst, (OpNode))]> {
bit hasNoSchedulingInfo = 1;
bit usesCustomInserter = 1;
@@ -891,47 +889,40 @@ class LBUX_DESC : LX_DESC_BASE<"lbux", int_mips_lbux, NoItinerary>;
class BPOSGE32_DESC : BPOSGE32_DESC_BASE<"bposge32", brtarget, NoItinerary>;
// Extr
-class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+class EXTP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extp", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+class EXTPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpv", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPEFI]>;
-class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+class EXTPDP_DESC : EXTR_W_TY1_R1_DESC_BASE<"extpdp", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", MipsEXTPDP,
- NoItinerary>,
+class EXTPDPV_DESC : EXTR_W_TY1_R2_DESC_BASE<"extpdpv", NoItinerary>,
Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
-class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+class EXTR_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", MipsEXTR_W,
- NoItinerary>, Defs<[DSPOutFlag23]>;
+class EXTRV_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv.w", NoItinerary>,
+ Defs<[DSPOutFlag23]>;
-class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", MipsEXTR_R_W,
- NoItinerary>,
+class EXTR_R_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_r.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", MipsEXTR_R_W,
- NoItinerary>,
+class EXTRV_R_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_r.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W,
- NoItinerary>,
+class EXTR_RS_W_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_rs.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W,
- NoItinerary>,
+class EXTRV_RS_W_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_rs.w", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", MipsEXTR_S_H,
- NoItinerary>,
+class EXTR_S_H_DESC : EXTR_W_TY1_R1_DESC_BASE<"extr_s.h", NoItinerary>,
Defs<[DSPOutFlag23]>;
-class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", MipsEXTR_S_H,
- NoItinerary>,
+class EXTRV_S_H_DESC : EXTR_W_TY1_R2_DESC_BASE<"extrv_s.h", NoItinerary>,
Defs<[DSPOutFlag23]>;
class SHILO_DESC : SHILO_R1_DESC_BASE<"shilo", MipsSHILO>;
@@ -1115,8 +1106,8 @@ class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
timmZExt5, NoItinerary>;
// Pseudos.
-def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
- NoItinerary>, Uses<[DSPPos]>;
+def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32>,
+ Uses<[DSPPos]>;
// Instruction defs.
// MIPS DSP Rev 1
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 797d81204305..c2e3d7393a6d 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -401,10 +401,9 @@ void RegDefsUses::setUnallocatableRegs(const MachineFunction &MF) {
void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
const MachineBasicBlock &SuccBB) {
- for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end(); SI != SE; ++SI)
- if (*SI != &SuccBB)
- for (const auto &LI : (*SI)->liveins())
+ for (const MachineBasicBlock *S : MBB.successors())
+ if (S != &SuccBB)
+ for (const auto &LI : S->liveins())
Uses.set(LI.PhysReg);
}
@@ -839,9 +838,8 @@ bool MipsDelaySlotFiller::searchSuccBBs(MachineBasicBlock &MBB,
auto *Fn = MBB.getParent();
// Iterate over SuccBB's predecessor list.
- for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
- PE = SuccBB->pred_end(); PI != PE; ++PI)
- if (!examinePred(**PI, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
+ for (MachineBasicBlock *Pred : SuccBB->predecessors())
+ if (!examinePred(*Pred, *SuccBB, RegDU, HasMultipleSuccs, BrMap))
return false;
// Do not allow moving instructions which have unallocatable register operands
diff --git a/llvm/lib/Target/Mips/MipsEVAInstrInfo.td b/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
index 73cca8cfa5d9..c697dc90c14c 100644
--- a/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsEVAInstrInfo.td
@@ -70,8 +70,7 @@ class LHuE_DESC : LOAD_EVA_DESC_BASE<"lhue", GPR32Opnd, II_LHUE>;
class LWE_DESC : LOAD_EVA_DESC_BASE<"lwe", GPR32Opnd, II_LWE>;
class STORE_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
- SDPatternOperator OpNode = null_frag,
- InstrItinClass itin = NoItinerary> {
+ InstrItinClass itin> {
dag OutOperandList = (outs);
dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
@@ -82,9 +81,9 @@ class STORE_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
InstrItinClass Itinerary = itin;
}
-class SBE_DESC : STORE_EVA_DESC_BASE<"sbe", GPR32Opnd, null_frag, II_SBE>;
-class SHE_DESC : STORE_EVA_DESC_BASE<"she", GPR32Opnd, null_frag, II_SHE>;
-class SWE_DESC : STORE_EVA_DESC_BASE<"swe", GPR32Opnd, null_frag, II_SWE>;
+class SBE_DESC : STORE_EVA_DESC_BASE<"sbe", GPR32Opnd, II_SBE>;
+class SHE_DESC : STORE_EVA_DESC_BASE<"she", GPR32Opnd, II_SHE>;
+class SWE_DESC : STORE_EVA_DESC_BASE<"swe", GPR32Opnd, II_SWE>;
// Load/Store Left/Right EVA descriptions
class LOAD_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index e963185eaeaa..05c1c06ffefe 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -1660,7 +1660,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!MTI->getLength()->getType()->isIntegerTy(32))
return false;
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
- return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
+ return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
@@ -1669,7 +1669,7 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
return false;
if (!MSI->getLength()->getType()->isIntegerTy(32))
return false;
- return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memset", II->arg_size() - 1);
}
}
return false;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 9399c949a3f2..4f364ef6afc7 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -509,6 +509,9 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
+ setLibcallName(RTLIB::MULO_I128, nullptr);
}
setMinFunctionAlignment(Subtarget.isGP64bit() ? Align(8) : Align(4));
@@ -2073,7 +2076,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
const MipsTargetObjectFile *TLOF =
static_cast<const MipsTargetObjectFile *>(
getTargetMachine().getObjFileLowering());
- const GlobalObject *GO = GV->getBaseObject();
+ const GlobalObject *GO = GV->getAliaseeObject();
if (GO && TLOF->IsGlobalInSmallSection(GO, getTargetMachine()))
// %gp_rel relocation
return getAddrGPRel(N, SDLoc(N), Ty, DAG, ABI.IsN64());
@@ -3714,7 +3717,7 @@ SDValue MipsTargetLowering::LowerFormalArguments(
LocVT = VA.getValVT();
}
- // sanity check
+ // Only arguments pased on the stack should make it here.
assert(VA.isMemLoc());
// The stack pointer offset is relative to the caller stack frame.
diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 256fb74c1d6c..6d44ce2ab563 100644
--- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -145,14 +145,14 @@ bool MipsInstructionSelector::materialize32BitImm(Register DestReg, APInt Imm,
MachineIRBuilder &B) const {
assert(Imm.getBitWidth() == 32 && "Unsupported immediate size.");
// Ori zero extends immediate. Used for values with zeros in high 16 bits.
- if (Imm.getHiBits(16).isNullValue()) {
+ if (Imm.getHiBits(16).isZero()) {
MachineInstr *Inst =
B.buildInstr(Mips::ORi, {DestReg}, {Register(Mips::ZERO)})
.addImm(Imm.getLoBits(16).getLimitedValue());
return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
}
// Lui places immediate in high 16 bits and sets low 16 bits to zero.
- if (Imm.getLoBits(16).isNullValue()) {
+ if (Imm.getLoBits(16).isZero()) {
MachineInstr *Inst = B.buildInstr(Mips::LUi, {DestReg}, {})
.addImm(Imm.getHiBits(16).getLimitedValue());
return constrainSelectedInstRegOperands(*Inst, TII, TRI, RBI);
diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index 301f1c158010..c4abccb24c6f 100644
--- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -1308,8 +1308,8 @@ class MSA_2R_FILL_DESC_BASE<string instr_asm, ValueType VT,
InstrItinClass Itinerary = itin;
}
-class MSA_2R_FILL_PSEUDO_BASE<ValueType VT, SDPatternOperator OpNode,
- RegisterClass RCWD, RegisterClass RCWS = RCWD> :
+class MSA_2R_FILL_PSEUDO_BASE<SDPatternOperator OpNode,
+ RegisterClass RCWD, RegisterClass RCWS> :
MSAPseudo<(outs RCWD:$wd), (ins RCWS:$fs),
[(set RCWD:$wd, (OpNode RCWS:$fs))]> {
let usesCustomInserter = 1;
@@ -2091,10 +2091,8 @@ class FILL_W_DESC : MSA_2R_FILL_DESC_BASE<"fill.w", v4i32, vsplati32,
class FILL_D_DESC : MSA_2R_FILL_DESC_BASE<"fill.d", v2i64, vsplati64,
MSA128DOpnd, GPR64Opnd>;
-class FILL_FW_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v4f32, vsplatf32, MSA128W,
- FGR32>;
-class FILL_FD_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<v2f64, vsplatf64, MSA128D,
- FGR64>;
+class FILL_FW_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<vsplatf32, MSA128W, FGR32>;
+class FILL_FD_PSEUDO_DESC : MSA_2R_FILL_PSEUDO_BASE<vsplatf64, MSA128D, FGR64>;
class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128WOpnd>;
class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128DOpnd>;
@@ -3755,8 +3753,7 @@ def : MSABitconvertReverseWInDPat<v4f32, v2f64, MSA128W>;
// Pseudos used to implement BNZ.df, and BZ.df
class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
- RegisterClass RCWS,
- InstrItinClass itin = NoItinerary> :
+ RegisterClass RCWS> :
MipsPseudo<(outs GPR32:$dst),
(ins RCWS:$ws),
[(set GPR32:$dst, (OpNode (TyNode RCWS:$ws)))]> {
@@ -3764,27 +3761,22 @@ class MSA_CBRANCH_PSEUDO_DESC_BASE<SDPatternOperator OpNode, ValueType TyNode,
bit hasNoSchedulingInfo = 1;
}
-def SNZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8,
- MSA128B, NoItinerary>;
-def SNZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v8i16,
- MSA128H, NoItinerary>;
-def SNZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v4i32,
- MSA128W, NoItinerary>;
-def SNZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v2i64,
- MSA128D, NoItinerary>;
-def SNZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyNonZero, v16i8,
- MSA128B, NoItinerary>;
-
-def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v16i8,
- MSA128B, NoItinerary>;
-def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v8i16,
- MSA128H, NoItinerary>;
-def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v4i32,
- MSA128W, NoItinerary>;
-def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
- MSA128D, NoItinerary>;
-def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
- MSA128B, NoItinerary>;
+def SNZ_B_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v16i8, MSA128B>;
+def SNZ_H_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v8i16, MSA128H>;
+def SNZ_W_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v4i32, MSA128W>;
+def SNZ_D_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllNonZero, v2i64, MSA128D>;
+def SNZ_V_PSEUDO
+ : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyNonZero, v16i8, MSA128B>;
+
+def SZ_B_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v16i8, MSA128B>;
+def SZ_H_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v8i16, MSA128H>;
+def SZ_W_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v4i32, MSA128W>;
+def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64, MSA128D>;
+def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8, MSA128B>;
// Pseudoes used to implement transparent fp16 support.
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 7be5fc33a0af..03a545605fe1 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -964,7 +964,7 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
// match the instruction.
case MipsISD::Ins: {
- // Sanity checking for the node operands.
+ // Validating the node operands.
if (Node->getValueType(0) != MVT::i32 && Node->getValueType(0) != MVT::i64)
return false;
@@ -1027,12 +1027,13 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
}
SDNode *Rdhwr =
- CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0),
+ CurDAG->getMachineNode(RdhwrOpc, DL, Node->getValueType(0), MVT::Glue,
CurDAG->getRegister(Mips::HWR29, MVT::i32),
CurDAG->getTargetConstant(0, DL, MVT::i32));
SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, DestReg,
- SDValue(Rdhwr, 0));
- SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT);
+ SDValue(Rdhwr, 0), SDValue(Rdhwr, 1));
+ SDValue ResNode = CurDAG->getCopyFromReg(Chain, DL, DestReg, PtrVT,
+ Chain.getValue(1));
ReplaceNode(Node, ResNode.getNode());
return true;
}
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 37d4313cc506..1fe6ab09804b 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -569,7 +569,7 @@ static bool isVectorAllOnes(SDValue N) {
// Endianness doesn't matter in this context because we are looking for
// an all-ones value.
if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
- return SplatValue.isAllOnesValue();
+ return SplatValue.isAllOnes();
return false;
}
@@ -701,7 +701,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
// Fold degenerate cases.
if (IsConstantMask) {
- if (Mask.isAllOnesValue())
+ if (Mask.isAllOnes())
return IfSet;
else if (Mask == 0)
return IfClr;
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 901a4fe4e2ac..26b31cfa9f2a 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -18,9 +18,9 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 8bb9d75e9173..c285385a19dd 100644
--- a/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -12,17 +12,17 @@
#include "MipsSubtarget.h"
#include "Mips.h"
-#include "MipsMachineFunction.h"
-#include "MipsRegisterInfo.h"
-#include "MipsTargetMachine.h"
#include "MipsCallLowering.h"
#include "MipsLegalizerInfo.h"
+#include "MipsMachineFunction.h"
#include "MipsRegisterBankInfo.h"
+#include "MipsRegisterInfo.h"
+#include "MipsTargetMachine.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -78,7 +78,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
- HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
+ HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 || Mips_Os16),
Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasSym32(false),
HasEVA(false), DisableMadd4(false), HasMT(false), HasCRC(false),
HasVirt(false), HasGINV(false), UseIndirectJumpsHazard(false),
diff --git a/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 7dd030f73d55..8de3c9fd25bd 100644
--- a/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -34,9 +34,9 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include <string>
diff --git a/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 44041987ec76..db5f607bbb4f 100644
--- a/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/MipsTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheMipsTarget() {
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index d69166feb042..856d03f0b210 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -10,15 +10,15 @@
//
//===----------------------------------------------------------------------===//
+#include "NVPTXMCTargetDesc.h"
#include "NVPTXInstPrinter.h"
#include "NVPTXMCAsmInfo.h"
-#include "NVPTXMCTargetDesc.h"
#include "NVPTXTargetStreamer.h"
#include "TargetInfo/NVPTXTargetInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
index fe335f154703..1cbd650bdf06 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.cpp
@@ -26,7 +26,7 @@ NVPTXTargetStreamer::~NVPTXTargetStreamer() = default;
void NVPTXTargetStreamer::outputDwarfFileDirectives() {
for (const std::string &S : DwarfFiles)
- getStreamer().emitRawText(S.data());
+ getStreamer().emitRawText(S);
DwarfFiles.clear();
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 2b0972b8531e..7af927aba64e 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -89,6 +89,12 @@ def PTX71 : SubtargetFeature<"ptx71", "PTXVersion", "71",
"Use PTX version 7.1">;
def PTX72 : SubtargetFeature<"ptx72", "PTXVersion", "72",
"Use PTX version 7.2">;
+def PTX73 : SubtargetFeature<"ptx73", "PTXVersion", "73",
+ "Use PTX version 7.3">;
+def PTX74 : SubtargetFeature<"ptx74", "PTXVersion", "74",
+ "Use PTX version 7.4">;
+def PTX75 : SubtargetFeature<"ptx75", "PTXVersion", "75",
+ "Use PTX version 7.5">;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 38844ff4ddf9..aab6d2034f11 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -71,12 +71,12 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
@@ -417,8 +417,7 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
// llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
// we iterate through each back edge of the loop with header MBB, and check
// whether its metadata contains llvm.loop.unroll.disable.
- for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
- const MachineBasicBlock *PMBB = *I;
+ for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
// Edges from other loops to MBB are not back edges.
continue;
@@ -703,7 +702,7 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
const Function *F = &*FI;
- if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
+ if (F->getAttributes().hasFnAttr("nvptx-libcall-callee")) {
emitDeclaration(F, O);
continue;
}
@@ -1457,7 +1456,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
}
}
- if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
+ if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
// Just print .param .align <a> .b8 .param[size];
// <a> = PAL.getparamalignment
@@ -1748,135 +1747,63 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
-// These utility functions assure we get the right sequence of bytes for a given
-// type even for big-endian machines
-template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
- int64_t vp = (int64_t)val;
- for (unsigned i = 0; i < sizeof(T); ++i) {
- p[i] = (unsigned char)vp;
- vp >>= 8;
- }
-}
-static void ConvertFloatToBytes(unsigned char *p, float val) {
- int32_t *vp = (int32_t *)&val;
- for (unsigned i = 0; i < sizeof(int32_t); ++i) {
- p[i] = (unsigned char)*vp;
- *vp >>= 8;
- }
-}
-static void ConvertDoubleToBytes(unsigned char *p, double val) {
- int64_t *vp = (int64_t *)&val;
- for (unsigned i = 0; i < sizeof(int64_t); ++i) {
- p[i] = (unsigned char)*vp;
- *vp >>= 8;
- }
-}
-
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
- AggBuffer *aggBuffer) {
+ AggBuffer *AggBuffer) {
const DataLayout &DL = getDataLayout();
-
+ int AllocSize = DL.getTypeAllocSize(CPV->getType());
if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
- int s = DL.getTypeAllocSize(CPV->getType());
- if (s < Bytes)
- s = Bytes;
- aggBuffer->addZeros(s);
+ // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
+ // only the space allocated by CPV.
+ AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
return;
}
- unsigned char ptr[8];
- switch (CPV->getType()->getTypeID()) {
+ // Helper for filling AggBuffer with APInts.
+ auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
+ size_t NumBytes = (Val.getBitWidth() + 7) / 8;
+ SmallVector<unsigned char, 16> Buf(NumBytes);
+ for (unsigned I = 0; I < NumBytes; ++I) {
+ Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
+ }
+ AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
+ };
- case Type::IntegerTyID: {
- Type *ETy = CPV->getType();
- if (ETy == Type::getInt8Ty(CPV->getContext())) {
- unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
- ConvertIntToBytes<>(ptr, c);
- aggBuffer->addBytes(ptr, 1, Bytes);
- } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
- short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
- ConvertIntToBytes<>(ptr, int16);
- aggBuffer->addBytes(ptr, 2, Bytes);
- } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
- if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- int int32 = (int)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int32);
- aggBuffer->addBytes(ptr, 4, Bytes);
+ switch (CPV->getType()->getTypeID()) {
+ case Type::IntegerTyID:
+ if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
+ AddIntToBuffer(CI->getValue());
+ break;
+ }
+ if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+ if (const auto *CI =
+ dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
+ AddIntToBuffer(CI->getValue());
break;
- } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstant(Cexpr, DL))) {
- int int32 = (int)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int32);
- aggBuffer->addBytes(ptr, 4, Bytes);
- break;
- }
- if (Cexpr->getOpcode() == Instruction::PtrToInt) {
- Value *v = Cexpr->getOperand(0)->stripPointerCasts();
- aggBuffer->addSymbol(v, Cexpr->getOperand(0));
- aggBuffer->addZeros(4);
- break;
- }
}
- llvm_unreachable("unsupported integer const type");
- } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
- if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
- long long int64 = (long long)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int64);
- aggBuffer->addBytes(ptr, 8, Bytes);
+ if (Cexpr->getOpcode() == Instruction::PtrToInt) {
+ Value *V = Cexpr->getOperand(0)->stripPointerCasts();
+ AggBuffer->addSymbol(V, Cexpr->getOperand(0));
+ AggBuffer->addZeros(AllocSize);
break;
- } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
- if (const auto *constInt = dyn_cast<ConstantInt>(
- ConstantFoldConstant(Cexpr, DL))) {
- long long int64 = (long long)(constInt->getZExtValue());
- ConvertIntToBytes<>(ptr, int64);
- aggBuffer->addBytes(ptr, 8, Bytes);
- break;
- }
- if (Cexpr->getOpcode() == Instruction::PtrToInt) {
- Value *v = Cexpr->getOperand(0)->stripPointerCasts();
- aggBuffer->addSymbol(v, Cexpr->getOperand(0));
- aggBuffer->addZeros(8);
- break;
- }
}
- llvm_unreachable("unsupported integer const type");
- } else
- llvm_unreachable("unsupported integer const type");
+ }
+ llvm_unreachable("unsupported integer const type");
break;
- }
+
case Type::HalfTyID:
case Type::FloatTyID:
- case Type::DoubleTyID: {
- const auto *CFP = cast<ConstantFP>(CPV);
- Type *Ty = CFP->getType();
- if (Ty == Type::getHalfTy(CPV->getContext())) {
- APInt API = CFP->getValueAPF().bitcastToAPInt();
- uint16_t float16 = API.getLoBits(16).getZExtValue();
- ConvertIntToBytes<>(ptr, float16);
- aggBuffer->addBytes(ptr, 2, Bytes);
- } else if (Ty == Type::getFloatTy(CPV->getContext())) {
- float float32 = (float) CFP->getValueAPF().convertToFloat();
- ConvertFloatToBytes(ptr, float32);
- aggBuffer->addBytes(ptr, 4, Bytes);
- } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
- double float64 = CFP->getValueAPF().convertToDouble();
- ConvertDoubleToBytes(ptr, float64);
- aggBuffer->addBytes(ptr, 8, Bytes);
- } else {
- llvm_unreachable("unsupported fp const type");
- }
+ case Type::DoubleTyID:
+ AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
break;
- }
+
case Type::PointerTyID: {
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- aggBuffer->addSymbol(GVar, GVar);
+ AggBuffer->addSymbol(GVar, GVar);
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
- aggBuffer->addSymbol(v, Cexpr);
+ AggBuffer->addSymbol(v, Cexpr);
}
- unsigned int s = DL.getTypeAllocSize(CPV->getType());
- aggBuffer->addZeros(s);
+ AggBuffer->addZeros(AllocSize);
break;
}
@@ -1884,12 +1811,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
case Type::FixedVectorTyID:
case Type::StructTyID: {
if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
- int ElementSize = DL.getTypeAllocSize(CPV->getType());
- bufferAggregateConstant(CPV, aggBuffer);
- if (Bytes > ElementSize)
- aggBuffer->addZeros(Bytes - ElementSize);
+ bufferAggregateConstant(CPV, AggBuffer);
+ if (Bytes > AllocSize)
+ AggBuffer->addZeros(Bytes - AllocSize);
} else if (isa<ConstantAggregateZero>(CPV))
- aggBuffer->addZeros(Bytes);
+ AggBuffer->addZeros(Bytes);
else
llvm_unreachable("Unexpected Constant type");
break;
@@ -1996,7 +1922,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
+ report_fatal_error(Twine(OS.str()));
}
case Instruction::AddrSpaceCast: {
@@ -2010,7 +1936,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/ false,
!MF ? nullptr : MF->getFunction().getParent());
- report_fatal_error(OS.str());
+ report_fatal_error(Twine(OS.str()));
}
case Instruction::GetElementPtr: {
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 5c3a4eb470c1..5d680e731e4a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -106,6 +106,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
EmitGeneric = AP.EmitGeneric;
}
+ // Copy Num bytes from Ptr.
+ // if Bytes > Num, zero fill up to Bytes.
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
assert((curpos + Num) <= size);
assert((curpos + Bytes) <= size);
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 024e51e5f488..1e19ef4116c3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -36,6 +36,9 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr *MI = &MBB.front();
MachineRegisterInfo &MR = MF.getRegInfo();
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
// This instruction really occurs before first instruction
// in the BB, so giving it no debug location.
DebugLoc dl = DebugLoc();
@@ -50,15 +53,15 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
(Is64Bit ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes);
unsigned MovDepotOpcode =
(Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
- if (!MR.use_empty(NVPTX::VRFrame)) {
+ if (!MR.use_empty(NRI->getFrameRegister(MF))) {
// If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
MI = BuildMI(MBB, MI, dl,
MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
- NVPTX::VRFrame)
- .addReg(NVPTX::VRFrameLocal);
+ NRI->getFrameRegister(MF))
+ .addReg(NRI->getFrameLocalRegister(MF));
}
BuildMI(MBB, MI, dl, MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
- NVPTX::VRFrameLocal)
+ NRI->getFrameLocalRegister(MF))
.addImm(MF.getFunctionNumber());
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 9078ff8cfb97..a9a5eae42c1d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -74,19 +74,16 @@ bool GenericToNVVM::runOnModule(Module &M) {
// of original global variable and its clone is placed in the GVMap for later
// use.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E;) {
- GlobalVariable *GV = &*I++;
- if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
- !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
- !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
+ if (GV.getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
+ !llvm::isTexture(GV) && !llvm::isSurface(GV) && !llvm::isSampler(GV) &&
+ !GV.getName().startswith("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
- M, GV->getValueType(), GV->isConstant(),
- GV->getLinkage(),
- GV->hasInitializer() ? GV->getInitializer() : nullptr,
- "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
- NewGV->copyAttributesFrom(GV);
- GVMap[GV] = NewGV;
+ M, GV.getValueType(), GV.isConstant(), GV.getLinkage(),
+ GV.hasInitializer() ? GV.getInitializer() : nullptr, "", &GV,
+ GV.getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
+ NewGV->copyAttributesFrom(&GV);
+ GVMap[&GV] = NewGV;
}
}
@@ -215,7 +212,7 @@ Value *GenericToNVVM::remapConstantVectorOrConstantAggregate(
// If any of the elements has been modified, construct the equivalent
// vector or aggregate value with a set instructions and the converted
// elements.
- Value *NewValue = UndefValue::get(C->getType());
+ Value *NewValue = PoisonValue::get(C->getType());
if (isa<ConstantVector>(C)) {
for (unsigned i = 0; i < NumOperands; ++i) {
Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 00913e93cfd3..dd4290a605a9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2348,508 +2348,508 @@ bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
switch (N->getOpcode()) {
default: return false;
case NVPTXISD::Tex1DFloatS32:
- Opc = NVPTX::TEX_1D_F32_S32;
+ Opc = NVPTX::TEX_1D_F32_S32_RR;
break;
case NVPTXISD::Tex1DFloatFloat:
- Opc = NVPTX::TEX_1D_F32_F32;
+ Opc = NVPTX::TEX_1D_F32_F32_RR;
break;
case NVPTXISD::Tex1DFloatFloatLevel:
- Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DFloatFloatGrad:
- Opc = NVPTX::TEX_1D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DS32S32:
- Opc = NVPTX::TEX_1D_S32_S32;
+ Opc = NVPTX::TEX_1D_S32_S32_RR;
break;
case NVPTXISD::Tex1DS32Float:
- Opc = NVPTX::TEX_1D_S32_F32;
+ Opc = NVPTX::TEX_1D_S32_F32_RR;
break;
case NVPTXISD::Tex1DS32FloatLevel:
- Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DS32FloatGrad:
- Opc = NVPTX::TEX_1D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DU32S32:
- Opc = NVPTX::TEX_1D_U32_S32;
+ Opc = NVPTX::TEX_1D_U32_S32_RR;
break;
case NVPTXISD::Tex1DU32Float:
- Opc = NVPTX::TEX_1D_U32_F32;
+ Opc = NVPTX::TEX_1D_U32_F32_RR;
break;
case NVPTXISD::Tex1DU32FloatLevel:
- Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DU32FloatGrad:
- Opc = NVPTX::TEX_1D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DArrayFloatS32:
- Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_S32_RR;
break;
case NVPTXISD::Tex1DArrayFloatFloat:
- Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_RR;
break;
case NVPTXISD::Tex1DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DArrayS32S32:
- Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_S32_RR;
break;
case NVPTXISD::Tex1DArrayS32Float:
- Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_RR;
break;
case NVPTXISD::Tex1DArrayS32FloatLevel:
- Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DArrayS32FloatGrad:
- Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex1DArrayU32S32:
- Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_S32_RR;
break;
case NVPTXISD::Tex1DArrayU32Float:
- Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_RR;
break;
case NVPTXISD::Tex1DArrayU32FloatLevel:
- Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex1DArrayU32FloatGrad:
- Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DFloatS32:
- Opc = NVPTX::TEX_2D_F32_S32;
+ Opc = NVPTX::TEX_2D_F32_S32_RR;
break;
case NVPTXISD::Tex2DFloatFloat:
- Opc = NVPTX::TEX_2D_F32_F32;
+ Opc = NVPTX::TEX_2D_F32_F32_RR;
break;
case NVPTXISD::Tex2DFloatFloatLevel:
- Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DFloatFloatGrad:
- Opc = NVPTX::TEX_2D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DS32S32:
- Opc = NVPTX::TEX_2D_S32_S32;
+ Opc = NVPTX::TEX_2D_S32_S32_RR;
break;
case NVPTXISD::Tex2DS32Float:
- Opc = NVPTX::TEX_2D_S32_F32;
+ Opc = NVPTX::TEX_2D_S32_F32_RR;
break;
case NVPTXISD::Tex2DS32FloatLevel:
- Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DS32FloatGrad:
- Opc = NVPTX::TEX_2D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DU32S32:
- Opc = NVPTX::TEX_2D_U32_S32;
+ Opc = NVPTX::TEX_2D_U32_S32_RR;
break;
case NVPTXISD::Tex2DU32Float:
- Opc = NVPTX::TEX_2D_U32_F32;
+ Opc = NVPTX::TEX_2D_U32_F32_RR;
break;
case NVPTXISD::Tex2DU32FloatLevel:
- Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DU32FloatGrad:
- Opc = NVPTX::TEX_2D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DArrayFloatS32:
- Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_S32_RR;
break;
case NVPTXISD::Tex2DArrayFloatFloat:
- Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_RR;
break;
case NVPTXISD::Tex2DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DArrayS32S32:
- Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_S32_RR;
break;
case NVPTXISD::Tex2DArrayS32Float:
- Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_RR;
break;
case NVPTXISD::Tex2DArrayS32FloatLevel:
- Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DArrayS32FloatGrad:
- Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex2DArrayU32S32:
- Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_S32_RR;
break;
case NVPTXISD::Tex2DArrayU32Float:
- Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_RR;
break;
case NVPTXISD::Tex2DArrayU32FloatLevel:
- Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex2DArrayU32FloatGrad:
- Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR;
break;
case NVPTXISD::Tex3DFloatS32:
- Opc = NVPTX::TEX_3D_F32_S32;
+ Opc = NVPTX::TEX_3D_F32_S32_RR;
break;
case NVPTXISD::Tex3DFloatFloat:
- Opc = NVPTX::TEX_3D_F32_F32;
+ Opc = NVPTX::TEX_3D_F32_F32_RR;
break;
case NVPTXISD::Tex3DFloatFloatLevel:
- Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_3D_F32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex3DFloatFloatGrad:
- Opc = NVPTX::TEX_3D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_3D_F32_F32_GRAD_RR;
break;
case NVPTXISD::Tex3DS32S32:
- Opc = NVPTX::TEX_3D_S32_S32;
+ Opc = NVPTX::TEX_3D_S32_S32_RR;
break;
case NVPTXISD::Tex3DS32Float:
- Opc = NVPTX::TEX_3D_S32_F32;
+ Opc = NVPTX::TEX_3D_S32_F32_RR;
break;
case NVPTXISD::Tex3DS32FloatLevel:
- Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_3D_S32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex3DS32FloatGrad:
- Opc = NVPTX::TEX_3D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_3D_S32_F32_GRAD_RR;
break;
case NVPTXISD::Tex3DU32S32:
- Opc = NVPTX::TEX_3D_U32_S32;
+ Opc = NVPTX::TEX_3D_U32_S32_RR;
break;
case NVPTXISD::Tex3DU32Float:
- Opc = NVPTX::TEX_3D_U32_F32;
+ Opc = NVPTX::TEX_3D_U32_F32_RR;
break;
case NVPTXISD::Tex3DU32FloatLevel:
- Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_3D_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tex3DU32FloatGrad:
- Opc = NVPTX::TEX_3D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_3D_U32_F32_GRAD_RR;
break;
case NVPTXISD::TexCubeFloatFloat:
- Opc = NVPTX::TEX_CUBE_F32_F32;
+ Opc = NVPTX::TEX_CUBE_F32_F32_RR;
break;
case NVPTXISD::TexCubeFloatFloatLevel:
- Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeS32Float:
- Opc = NVPTX::TEX_CUBE_S32_F32;
+ Opc = NVPTX::TEX_CUBE_S32_F32_RR;
break;
case NVPTXISD::TexCubeS32FloatLevel:
- Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeU32Float:
- Opc = NVPTX::TEX_CUBE_U32_F32;
+ Opc = NVPTX::TEX_CUBE_U32_F32_RR;
break;
case NVPTXISD::TexCubeU32FloatLevel:
- Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeArrayFloatFloat:
- Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_RR;
break;
case NVPTXISD::TexCubeArrayFloatFloatLevel:
- Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeArrayS32Float:
- Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_RR;
break;
case NVPTXISD::TexCubeArrayS32FloatLevel:
- Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR;
break;
case NVPTXISD::TexCubeArrayU32Float:
- Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_RR;
break;
case NVPTXISD::TexCubeArrayU32FloatLevel:
- Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR;
break;
case NVPTXISD::Tld4R2DFloatFloat:
- Opc = NVPTX::TLD4_R_2D_F32_F32;
+ Opc = NVPTX::TLD4_R_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4G2DFloatFloat:
- Opc = NVPTX::TLD4_G_2D_F32_F32;
+ Opc = NVPTX::TLD4_G_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4B2DFloatFloat:
- Opc = NVPTX::TLD4_B_2D_F32_F32;
+ Opc = NVPTX::TLD4_B_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4A2DFloatFloat:
- Opc = NVPTX::TLD4_A_2D_F32_F32;
+ Opc = NVPTX::TLD4_A_2D_F32_F32_RR;
break;
case NVPTXISD::Tld4R2DS64Float:
- Opc = NVPTX::TLD4_R_2D_S32_F32;
+ Opc = NVPTX::TLD4_R_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4G2DS64Float:
- Opc = NVPTX::TLD4_G_2D_S32_F32;
+ Opc = NVPTX::TLD4_G_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4B2DS64Float:
- Opc = NVPTX::TLD4_B_2D_S32_F32;
+ Opc = NVPTX::TLD4_B_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4A2DS64Float:
- Opc = NVPTX::TLD4_A_2D_S32_F32;
+ Opc = NVPTX::TLD4_A_2D_S32_F32_RR;
break;
case NVPTXISD::Tld4R2DU64Float:
- Opc = NVPTX::TLD4_R_2D_U32_F32;
+ Opc = NVPTX::TLD4_R_2D_U32_F32_RR;
break;
case NVPTXISD::Tld4G2DU64Float:
- Opc = NVPTX::TLD4_G_2D_U32_F32;
+ Opc = NVPTX::TLD4_G_2D_U32_F32_RR;
break;
case NVPTXISD::Tld4B2DU64Float:
- Opc = NVPTX::TLD4_B_2D_U32_F32;
+ Opc = NVPTX::TLD4_B_2D_U32_F32_RR;
break;
case NVPTXISD::Tld4A2DU64Float:
- Opc = NVPTX::TLD4_A_2D_U32_F32;
+ Opc = NVPTX::TLD4_A_2D_U32_F32_RR;
break;
case NVPTXISD::TexUnified1DFloatS32:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_S32_R;
break;
case NVPTXISD::TexUnified1DFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_R;
break;
case NVPTXISD::TexUnified1DFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DS32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_S32_R;
break;
case NVPTXISD::TexUnified1DS32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_R;
break;
case NVPTXISD::TexUnified1DS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DU32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_S32_R;
break;
case NVPTXISD::TexUnified1DU32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_R;
break;
case NVPTXISD::TexUnified1DU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DArrayFloatS32:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R;
break;
case NVPTXISD::TexUnified1DArrayFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R;
break;
case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DArrayS32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R;
break;
case NVPTXISD::TexUnified1DArrayS32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R;
break;
case NVPTXISD::TexUnified1DArrayS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DArrayS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified1DArrayU32S32:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R;
break;
case NVPTXISD::TexUnified1DArrayU32Float:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R;
break;
case NVPTXISD::TexUnified1DArrayU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified1DArrayU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DFloatS32:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_S32_R;
break;
case NVPTXISD::TexUnified2DFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_R;
break;
case NVPTXISD::TexUnified2DFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DS32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_S32_R;
break;
case NVPTXISD::TexUnified2DS32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_R;
break;
case NVPTXISD::TexUnified2DS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DU32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_S32_R;
break;
case NVPTXISD::TexUnified2DU32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_R;
break;
case NVPTXISD::TexUnified2DU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DArrayFloatS32:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R;
break;
case NVPTXISD::TexUnified2DArrayFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R;
break;
case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DArrayS32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R;
break;
case NVPTXISD::TexUnified2DArrayS32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R;
break;
case NVPTXISD::TexUnified2DArrayS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DArrayS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified2DArrayU32S32:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R;
break;
case NVPTXISD::TexUnified2DArrayU32Float:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R;
break;
case NVPTXISD::TexUnified2DArrayU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified2DArrayU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified3DFloatS32:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_S32_R;
break;
case NVPTXISD::TexUnified3DFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_R;
break;
case NVPTXISD::TexUnified3DFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified3DFloatFloatGrad:
- Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified3DS32S32:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_S32_R;
break;
case NVPTXISD::TexUnified3DS32Float:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_R;
break;
case NVPTXISD::TexUnified3DS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified3DS32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R;
break;
case NVPTXISD::TexUnified3DU32S32:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_S32_R;
break;
case NVPTXISD::TexUnified3DU32Float:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_R;
break;
case NVPTXISD::TexUnified3DU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnified3DU32FloatGrad:
- Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
+ Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R;
break;
case NVPTXISD::TexUnifiedCubeFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeS32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeU32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeArrayS32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R;
break;
case NVPTXISD::TexUnifiedCubeArrayU32Float:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R;
break;
case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
- Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
+ Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R;
break;
case NVPTXISD::Tld4UnifiedR2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedG2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedB2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedA2DFloatFloat:
- Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R;
break;
case NVPTXISD::Tld4UnifiedR2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedG2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedB2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedA2DS64Float:
- Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R;
break;
case NVPTXISD::Tld4UnifiedR2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R;
break;
case NVPTXISD::Tld4UnifiedG2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R;
break;
case NVPTXISD::Tld4UnifiedB2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R;
break;
case NVPTXISD::Tld4UnifiedA2DU64Float:
- Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
+ Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R;
break;
}
@@ -2866,499 +2866,499 @@ bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
switch (N->getOpcode()) {
default: return false;
case NVPTXISD::Suld1DI8Clamp:
- Opc = NVPTX::SULD_1D_I8_CLAMP;
+ Opc = NVPTX::SULD_1D_I8_CLAMP_R;
break;
case NVPTXISD::Suld1DI16Clamp:
- Opc = NVPTX::SULD_1D_I16_CLAMP;
+ Opc = NVPTX::SULD_1D_I16_CLAMP_R;
break;
case NVPTXISD::Suld1DI32Clamp:
- Opc = NVPTX::SULD_1D_I32_CLAMP;
+ Opc = NVPTX::SULD_1D_I32_CLAMP_R;
break;
case NVPTXISD::Suld1DI64Clamp:
- Opc = NVPTX::SULD_1D_I64_CLAMP;
+ Opc = NVPTX::SULD_1D_I64_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I8Clamp:
- Opc = NVPTX::SULD_1D_V2I8_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I16Clamp:
- Opc = NVPTX::SULD_1D_V2I16_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I32Clamp:
- Opc = NVPTX::SULD_1D_V2I32_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld1DV2I64Clamp:
- Opc = NVPTX::SULD_1D_V2I64_CLAMP;
+ Opc = NVPTX::SULD_1D_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld1DV4I8Clamp:
- Opc = NVPTX::SULD_1D_V4I8_CLAMP;
+ Opc = NVPTX::SULD_1D_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld1DV4I16Clamp:
- Opc = NVPTX::SULD_1D_V4I16_CLAMP;
+ Opc = NVPTX::SULD_1D_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld1DV4I32Clamp:
- Opc = NVPTX::SULD_1D_V4I32_CLAMP;
+ Opc = NVPTX::SULD_1D_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI8Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI16Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI32Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayI64Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I8Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I16Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I32Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV2I64Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV4I8Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV4I16Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld1DArrayV4I32Clamp:
- Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld2DI8Clamp:
- Opc = NVPTX::SULD_2D_I8_CLAMP;
+ Opc = NVPTX::SULD_2D_I8_CLAMP_R;
break;
case NVPTXISD::Suld2DI16Clamp:
- Opc = NVPTX::SULD_2D_I16_CLAMP;
+ Opc = NVPTX::SULD_2D_I16_CLAMP_R;
break;
case NVPTXISD::Suld2DI32Clamp:
- Opc = NVPTX::SULD_2D_I32_CLAMP;
+ Opc = NVPTX::SULD_2D_I32_CLAMP_R;
break;
case NVPTXISD::Suld2DI64Clamp:
- Opc = NVPTX::SULD_2D_I64_CLAMP;
+ Opc = NVPTX::SULD_2D_I64_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I8Clamp:
- Opc = NVPTX::SULD_2D_V2I8_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I16Clamp:
- Opc = NVPTX::SULD_2D_V2I16_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I32Clamp:
- Opc = NVPTX::SULD_2D_V2I32_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld2DV2I64Clamp:
- Opc = NVPTX::SULD_2D_V2I64_CLAMP;
+ Opc = NVPTX::SULD_2D_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld2DV4I8Clamp:
- Opc = NVPTX::SULD_2D_V4I8_CLAMP;
+ Opc = NVPTX::SULD_2D_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld2DV4I16Clamp:
- Opc = NVPTX::SULD_2D_V4I16_CLAMP;
+ Opc = NVPTX::SULD_2D_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld2DV4I32Clamp:
- Opc = NVPTX::SULD_2D_V4I32_CLAMP;
+ Opc = NVPTX::SULD_2D_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI8Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI16Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI32Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayI64Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I8Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I16Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I32Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV2I64Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV4I8Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV4I16Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld2DArrayV4I32Clamp:
- Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld3DI8Clamp:
- Opc = NVPTX::SULD_3D_I8_CLAMP;
+ Opc = NVPTX::SULD_3D_I8_CLAMP_R;
break;
case NVPTXISD::Suld3DI16Clamp:
- Opc = NVPTX::SULD_3D_I16_CLAMP;
+ Opc = NVPTX::SULD_3D_I16_CLAMP_R;
break;
case NVPTXISD::Suld3DI32Clamp:
- Opc = NVPTX::SULD_3D_I32_CLAMP;
+ Opc = NVPTX::SULD_3D_I32_CLAMP_R;
break;
case NVPTXISD::Suld3DI64Clamp:
- Opc = NVPTX::SULD_3D_I64_CLAMP;
+ Opc = NVPTX::SULD_3D_I64_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I8Clamp:
- Opc = NVPTX::SULD_3D_V2I8_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I8_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I16Clamp:
- Opc = NVPTX::SULD_3D_V2I16_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I16_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I32Clamp:
- Opc = NVPTX::SULD_3D_V2I32_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I32_CLAMP_R;
break;
case NVPTXISD::Suld3DV2I64Clamp:
- Opc = NVPTX::SULD_3D_V2I64_CLAMP;
+ Opc = NVPTX::SULD_3D_V2I64_CLAMP_R;
break;
case NVPTXISD::Suld3DV4I8Clamp:
- Opc = NVPTX::SULD_3D_V4I8_CLAMP;
+ Opc = NVPTX::SULD_3D_V4I8_CLAMP_R;
break;
case NVPTXISD::Suld3DV4I16Clamp:
- Opc = NVPTX::SULD_3D_V4I16_CLAMP;
+ Opc = NVPTX::SULD_3D_V4I16_CLAMP_R;
break;
case NVPTXISD::Suld3DV4I32Clamp:
- Opc = NVPTX::SULD_3D_V4I32_CLAMP;
+ Opc = NVPTX::SULD_3D_V4I32_CLAMP_R;
break;
case NVPTXISD::Suld1DI8Trap:
- Opc = NVPTX::SULD_1D_I8_TRAP;
+ Opc = NVPTX::SULD_1D_I8_TRAP_R;
break;
case NVPTXISD::Suld1DI16Trap:
- Opc = NVPTX::SULD_1D_I16_TRAP;
+ Opc = NVPTX::SULD_1D_I16_TRAP_R;
break;
case NVPTXISD::Suld1DI32Trap:
- Opc = NVPTX::SULD_1D_I32_TRAP;
+ Opc = NVPTX::SULD_1D_I32_TRAP_R;
break;
case NVPTXISD::Suld1DI64Trap:
- Opc = NVPTX::SULD_1D_I64_TRAP;
+ Opc = NVPTX::SULD_1D_I64_TRAP_R;
break;
case NVPTXISD::Suld1DV2I8Trap:
- Opc = NVPTX::SULD_1D_V2I8_TRAP;
+ Opc = NVPTX::SULD_1D_V2I8_TRAP_R;
break;
case NVPTXISD::Suld1DV2I16Trap:
- Opc = NVPTX::SULD_1D_V2I16_TRAP;
+ Opc = NVPTX::SULD_1D_V2I16_TRAP_R;
break;
case NVPTXISD::Suld1DV2I32Trap:
- Opc = NVPTX::SULD_1D_V2I32_TRAP;
+ Opc = NVPTX::SULD_1D_V2I32_TRAP_R;
break;
case NVPTXISD::Suld1DV2I64Trap:
- Opc = NVPTX::SULD_1D_V2I64_TRAP;
+ Opc = NVPTX::SULD_1D_V2I64_TRAP_R;
break;
case NVPTXISD::Suld1DV4I8Trap:
- Opc = NVPTX::SULD_1D_V4I8_TRAP;
+ Opc = NVPTX::SULD_1D_V4I8_TRAP_R;
break;
case NVPTXISD::Suld1DV4I16Trap:
- Opc = NVPTX::SULD_1D_V4I16_TRAP;
+ Opc = NVPTX::SULD_1D_V4I16_TRAP_R;
break;
case NVPTXISD::Suld1DV4I32Trap:
- Opc = NVPTX::SULD_1D_V4I32_TRAP;
+ Opc = NVPTX::SULD_1D_V4I32_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI8Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI16Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI32Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP_R;
break;
case NVPTXISD::Suld1DArrayI64Trap:
- Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I8Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I16Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I32Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV2I64Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV4I8Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV4I16Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R;
break;
case NVPTXISD::Suld1DArrayV4I32Trap:
- Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R;
break;
case NVPTXISD::Suld2DI8Trap:
- Opc = NVPTX::SULD_2D_I8_TRAP;
+ Opc = NVPTX::SULD_2D_I8_TRAP_R;
break;
case NVPTXISD::Suld2DI16Trap:
- Opc = NVPTX::SULD_2D_I16_TRAP;
+ Opc = NVPTX::SULD_2D_I16_TRAP_R;
break;
case NVPTXISD::Suld2DI32Trap:
- Opc = NVPTX::SULD_2D_I32_TRAP;
+ Opc = NVPTX::SULD_2D_I32_TRAP_R;
break;
case NVPTXISD::Suld2DI64Trap:
- Opc = NVPTX::SULD_2D_I64_TRAP;
+ Opc = NVPTX::SULD_2D_I64_TRAP_R;
break;
case NVPTXISD::Suld2DV2I8Trap:
- Opc = NVPTX::SULD_2D_V2I8_TRAP;
+ Opc = NVPTX::SULD_2D_V2I8_TRAP_R;
break;
case NVPTXISD::Suld2DV2I16Trap:
- Opc = NVPTX::SULD_2D_V2I16_TRAP;
+ Opc = NVPTX::SULD_2D_V2I16_TRAP_R;
break;
case NVPTXISD::Suld2DV2I32Trap:
- Opc = NVPTX::SULD_2D_V2I32_TRAP;
+ Opc = NVPTX::SULD_2D_V2I32_TRAP_R;
break;
case NVPTXISD::Suld2DV2I64Trap:
- Opc = NVPTX::SULD_2D_V2I64_TRAP;
+ Opc = NVPTX::SULD_2D_V2I64_TRAP_R;
break;
case NVPTXISD::Suld2DV4I8Trap:
- Opc = NVPTX::SULD_2D_V4I8_TRAP;
+ Opc = NVPTX::SULD_2D_V4I8_TRAP_R;
break;
case NVPTXISD::Suld2DV4I16Trap:
- Opc = NVPTX::SULD_2D_V4I16_TRAP;
+ Opc = NVPTX::SULD_2D_V4I16_TRAP_R;
break;
case NVPTXISD::Suld2DV4I32Trap:
- Opc = NVPTX::SULD_2D_V4I32_TRAP;
+ Opc = NVPTX::SULD_2D_V4I32_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI8Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI16Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI32Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP_R;
break;
case NVPTXISD::Suld2DArrayI64Trap:
- Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I8Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I16Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I32Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV2I64Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV4I8Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV4I16Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R;
break;
case NVPTXISD::Suld2DArrayV4I32Trap:
- Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R;
break;
case NVPTXISD::Suld3DI8Trap:
- Opc = NVPTX::SULD_3D_I8_TRAP;
+ Opc = NVPTX::SULD_3D_I8_TRAP_R;
break;
case NVPTXISD::Suld3DI16Trap:
- Opc = NVPTX::SULD_3D_I16_TRAP;
+ Opc = NVPTX::SULD_3D_I16_TRAP_R;
break;
case NVPTXISD::Suld3DI32Trap:
- Opc = NVPTX::SULD_3D_I32_TRAP;
+ Opc = NVPTX::SULD_3D_I32_TRAP_R;
break;
case NVPTXISD::Suld3DI64Trap:
- Opc = NVPTX::SULD_3D_I64_TRAP;
+ Opc = NVPTX::SULD_3D_I64_TRAP_R;
break;
case NVPTXISD::Suld3DV2I8Trap:
- Opc = NVPTX::SULD_3D_V2I8_TRAP;
+ Opc = NVPTX::SULD_3D_V2I8_TRAP_R;
break;
case NVPTXISD::Suld3DV2I16Trap:
- Opc = NVPTX::SULD_3D_V2I16_TRAP;
+ Opc = NVPTX::SULD_3D_V2I16_TRAP_R;
break;
case NVPTXISD::Suld3DV2I32Trap:
- Opc = NVPTX::SULD_3D_V2I32_TRAP;
+ Opc = NVPTX::SULD_3D_V2I32_TRAP_R;
break;
case NVPTXISD::Suld3DV2I64Trap:
- Opc = NVPTX::SULD_3D_V2I64_TRAP;
+ Opc = NVPTX::SULD_3D_V2I64_TRAP_R;
break;
case NVPTXISD::Suld3DV4I8Trap:
- Opc = NVPTX::SULD_3D_V4I8_TRAP;
+ Opc = NVPTX::SULD_3D_V4I8_TRAP_R;
break;
case NVPTXISD::Suld3DV4I16Trap:
- Opc = NVPTX::SULD_3D_V4I16_TRAP;
+ Opc = NVPTX::SULD_3D_V4I16_TRAP_R;
break;
case NVPTXISD::Suld3DV4I32Trap:
- Opc = NVPTX::SULD_3D_V4I32_TRAP;
+ Opc = NVPTX::SULD_3D_V4I32_TRAP_R;
break;
case NVPTXISD::Suld1DI8Zero:
- Opc = NVPTX::SULD_1D_I8_ZERO;
+ Opc = NVPTX::SULD_1D_I8_ZERO_R;
break;
case NVPTXISD::Suld1DI16Zero:
- Opc = NVPTX::SULD_1D_I16_ZERO;
+ Opc = NVPTX::SULD_1D_I16_ZERO_R;
break;
case NVPTXISD::Suld1DI32Zero:
- Opc = NVPTX::SULD_1D_I32_ZERO;
+ Opc = NVPTX::SULD_1D_I32_ZERO_R;
break;
case NVPTXISD::Suld1DI64Zero:
- Opc = NVPTX::SULD_1D_I64_ZERO;
+ Opc = NVPTX::SULD_1D_I64_ZERO_R;
break;
case NVPTXISD::Suld1DV2I8Zero:
- Opc = NVPTX::SULD_1D_V2I8_ZERO;
+ Opc = NVPTX::SULD_1D_V2I8_ZERO_R;
break;
case NVPTXISD::Suld1DV2I16Zero:
- Opc = NVPTX::SULD_1D_V2I16_ZERO;
+ Opc = NVPTX::SULD_1D_V2I16_ZERO_R;
break;
case NVPTXISD::Suld1DV2I32Zero:
- Opc = NVPTX::SULD_1D_V2I32_ZERO;
+ Opc = NVPTX::SULD_1D_V2I32_ZERO_R;
break;
case NVPTXISD::Suld1DV2I64Zero:
- Opc = NVPTX::SULD_1D_V2I64_ZERO;
+ Opc = NVPTX::SULD_1D_V2I64_ZERO_R;
break;
case NVPTXISD::Suld1DV4I8Zero:
- Opc = NVPTX::SULD_1D_V4I8_ZERO;
+ Opc = NVPTX::SULD_1D_V4I8_ZERO_R;
break;
case NVPTXISD::Suld1DV4I16Zero:
- Opc = NVPTX::SULD_1D_V4I16_ZERO;
+ Opc = NVPTX::SULD_1D_V4I16_ZERO_R;
break;
case NVPTXISD::Suld1DV4I32Zero:
- Opc = NVPTX::SULD_1D_V4I32_ZERO;
+ Opc = NVPTX::SULD_1D_V4I32_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI8Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI16Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI32Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO_R;
break;
case NVPTXISD::Suld1DArrayI64Zero:
- Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I8Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I16Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I32Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV2I64Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV4I8Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV4I16Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R;
break;
case NVPTXISD::Suld1DArrayV4I32Zero:
- Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R;
break;
case NVPTXISD::Suld2DI8Zero:
- Opc = NVPTX::SULD_2D_I8_ZERO;
+ Opc = NVPTX::SULD_2D_I8_ZERO_R;
break;
case NVPTXISD::Suld2DI16Zero:
- Opc = NVPTX::SULD_2D_I16_ZERO;
+ Opc = NVPTX::SULD_2D_I16_ZERO_R;
break;
case NVPTXISD::Suld2DI32Zero:
- Opc = NVPTX::SULD_2D_I32_ZERO;
+ Opc = NVPTX::SULD_2D_I32_ZERO_R;
break;
case NVPTXISD::Suld2DI64Zero:
- Opc = NVPTX::SULD_2D_I64_ZERO;
+ Opc = NVPTX::SULD_2D_I64_ZERO_R;
break;
case NVPTXISD::Suld2DV2I8Zero:
- Opc = NVPTX::SULD_2D_V2I8_ZERO;
+ Opc = NVPTX::SULD_2D_V2I8_ZERO_R;
break;
case NVPTXISD::Suld2DV2I16Zero:
- Opc = NVPTX::SULD_2D_V2I16_ZERO;
+ Opc = NVPTX::SULD_2D_V2I16_ZERO_R;
break;
case NVPTXISD::Suld2DV2I32Zero:
- Opc = NVPTX::SULD_2D_V2I32_ZERO;
+ Opc = NVPTX::SULD_2D_V2I32_ZERO_R;
break;
case NVPTXISD::Suld2DV2I64Zero:
- Opc = NVPTX::SULD_2D_V2I64_ZERO;
+ Opc = NVPTX::SULD_2D_V2I64_ZERO_R;
break;
case NVPTXISD::Suld2DV4I8Zero:
- Opc = NVPTX::SULD_2D_V4I8_ZERO;
+ Opc = NVPTX::SULD_2D_V4I8_ZERO_R;
break;
case NVPTXISD::Suld2DV4I16Zero:
- Opc = NVPTX::SULD_2D_V4I16_ZERO;
+ Opc = NVPTX::SULD_2D_V4I16_ZERO_R;
break;
case NVPTXISD::Suld2DV4I32Zero:
- Opc = NVPTX::SULD_2D_V4I32_ZERO;
+ Opc = NVPTX::SULD_2D_V4I32_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI8Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI16Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI32Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO_R;
break;
case NVPTXISD::Suld2DArrayI64Zero:
- Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I8Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I16Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I32Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV2I64Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV4I8Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV4I16Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R;
break;
case NVPTXISD::Suld2DArrayV4I32Zero:
- Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R;
break;
case NVPTXISD::Suld3DI8Zero:
- Opc = NVPTX::SULD_3D_I8_ZERO;
+ Opc = NVPTX::SULD_3D_I8_ZERO_R;
break;
case NVPTXISD::Suld3DI16Zero:
- Opc = NVPTX::SULD_3D_I16_ZERO;
+ Opc = NVPTX::SULD_3D_I16_ZERO_R;
break;
case NVPTXISD::Suld3DI32Zero:
- Opc = NVPTX::SULD_3D_I32_ZERO;
+ Opc = NVPTX::SULD_3D_I32_ZERO_R;
break;
case NVPTXISD::Suld3DI64Zero:
- Opc = NVPTX::SULD_3D_I64_ZERO;
+ Opc = NVPTX::SULD_3D_I64_ZERO_R;
break;
case NVPTXISD::Suld3DV2I8Zero:
- Opc = NVPTX::SULD_3D_V2I8_ZERO;
+ Opc = NVPTX::SULD_3D_V2I8_ZERO_R;
break;
case NVPTXISD::Suld3DV2I16Zero:
- Opc = NVPTX::SULD_3D_V2I16_ZERO;
+ Opc = NVPTX::SULD_3D_V2I16_ZERO_R;
break;
case NVPTXISD::Suld3DV2I32Zero:
- Opc = NVPTX::SULD_3D_V2I32_ZERO;
+ Opc = NVPTX::SULD_3D_V2I32_ZERO_R;
break;
case NVPTXISD::Suld3DV2I64Zero:
- Opc = NVPTX::SULD_3D_V2I64_ZERO;
+ Opc = NVPTX::SULD_3D_V2I64_ZERO_R;
break;
case NVPTXISD::Suld3DV4I8Zero:
- Opc = NVPTX::SULD_3D_V4I8_ZERO;
+ Opc = NVPTX::SULD_3D_V4I8_ZERO_R;
break;
case NVPTXISD::Suld3DV4I16Zero:
- Opc = NVPTX::SULD_3D_V4I16_ZERO;
+ Opc = NVPTX::SULD_3D_V4I16_ZERO_R;
break;
case NVPTXISD::Suld3DV4I32Zero:
- Opc = NVPTX::SULD_3D_V4I32_ZERO;
+ Opc = NVPTX::SULD_3D_V4I32_ZERO_R;
break;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index d4842c953ce7..e2f6b69fc530 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2530,7 +2530,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// to newly created nodes. The SDNodes for params have to
// appear in the same order as their order of appearance
// in the original function. "idx+1" holds that order.
- if (!PAL.hasParamAttribute(i, Attribute::ByVal)) {
+ if (!PAL.hasParamAttr(i, Attribute::ByVal)) {
bool aggregateIsPacked = false;
if (StructType *STy = dyn_cast<StructType>(Ty))
aggregateIsPacked = STy->isPacked();
@@ -3547,7 +3547,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col:
case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_col_stride:
case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row:
- case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride: {
+ case Intrinsic::nvvm_wmma_m16n16k8_load_b_tf32_row_stride:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_b16:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x4_trans_b16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::v4i32;
Info.ptrVal = I.getArgOperand(0);
@@ -3585,7 +3587,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col:
case Intrinsic::nvvm_wmma_m8n8k32_load_b_s4_col_stride:
case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col_stride:
- case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col: {
+ case Intrinsic::nvvm_wmma_m8n8k32_load_b_u4_col:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_b16:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x1_trans_b16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i32;
Info.ptrVal = I.getArgOperand(0);
@@ -3679,7 +3683,9 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col:
case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_col_stride:
case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row:
- case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride: {
+ case Intrinsic::nvvm_wmma_m8n8k32_load_c_s32_row_stride:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_b16:
+ case Intrinsic::nvvm_ldmatrix_sync_aligned_m8n8_x2_trans_b16: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::v2i32;
Info.ptrVal = I.getArgOperand(0);
@@ -4441,11 +4447,8 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
//
int numUses = 0;
int nonAddCount = 0;
- for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
- UE = N0.getNode()->use_end();
- UI != UE; ++UI) {
+ for (const SDNode *User : N0.getNode()->uses()) {
numUses++;
- SDNode *User = *UI;
if (User->getOpcode() != ISD::FADD)
++nonAddCount;
}
@@ -4471,8 +4474,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
opIsLive = true;
if (!opIsLive)
- for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : left->uses()) {
int orderNo3 = User->getIROrder();
if (orderNo3 > orderNo) {
opIsLive = true;
@@ -4481,8 +4483,7 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
}
if (!opIsLive)
- for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : right->uses()) {
int orderNo3 = User->getIROrder();
if (orderNo3 > orderNo) {
opIsLive = true;
diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index a846c2fada26..fc0d5cc6fbfa 100644
--- a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -148,9 +148,8 @@ void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
// We implement "poor man's DCE" here to make sure any code that is no longer
// live is actually unreachable and can be trivially eliminated by the
// unreachable block elimination pass.
- for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE; ++UI) {
- if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
+ for (Use &U : From->uses()) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(U)) {
if (BI->isUnconditional()) continue;
BasicBlock *Dest;
if (To->isZero())
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index ec0c92ccf5c5..953d95e55f65 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -195,13 +195,12 @@ unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
else // Conditional branch
- BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
- .addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
- BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, get(NVPTX::CBranch)).add(Cond[0]).addMBB(TBB);
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
return 2;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 4834985b1019..96386af569de 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2247,8 +2247,18 @@ class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
!strconcat("mov", asmstr, " \t$dst, $src;"),
[(set regclass:$dst, (MoveParam regclass:$src))]>;
+class MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty,
+ string asmstr> :
+ NVPTXInst<(outs regclass:$dst), (ins srcty:$src),
+ !strconcat("mov", asmstr, " \t$dst, $src;"),
+ [(set regclass:$dst, (MoveParam texternalsym:$src))]>;
+
def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
+
+def MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, ".b64">;
+def MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, ".b32">;
+
def MoveParamI16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"cvt.u16.u32 \t$dst, $src;",
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index de4bf2ef3055..511cd875ac55 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1232,7 +1232,7 @@ multiclass F_ATOMIC_2<NVPTXRegClass regclass, string SpaceStr, string TypeStr,
// has 2 operands, neg the second one
multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
- Operand IMMType, list<Predicate> Pred> {
+ list<Predicate> Pred> {
def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
!strconcat(
"{{ \n\t",
@@ -1244,12 +1244,11 @@ multiclass F_ATOMIC_2_NEG_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
Requires<Pred>;
}
multiclass F_ATOMIC_2_NEG<NVPTXRegClass regclass, string SpaceStr,
- string TypeStr, string OpcStr, PatFrag IntOp, Operand IMMType,
- list<Predicate> Pred = []> {
+ string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
defm p32: F_ATOMIC_2_NEG_imp<Int32Regs, regclass, SpaceStr, TypeStr, OpcStr,
- IntOp, IMMType, Pred> ;
+ IntOp, Pred> ;
defm p64: F_ATOMIC_2_NEG_imp<Int64Regs, regclass, SpaceStr, TypeStr, OpcStr,
- IntOp, IMMType, Pred> ;
+ IntOp, Pred> ;
}
// has 3 operands
@@ -1357,21 +1356,21 @@ def atomic_load_sub_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
(atomic_load_sub_64 node:$a, node:$b)>;
defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<Int32Regs, ".global", "32", ".add",
- atomic_load_sub_32_g, i32imm>;
+ atomic_load_sub_32_g>;
defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<Int64Regs, ".global", "64", ".add",
- atomic_load_sub_64_g, i64imm>;
+ atomic_load_sub_64_g>;
defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<Int32Regs, "", "32", ".add",
- atomic_load_sub_32_gen, i32imm>;
+ atomic_load_sub_32_gen>;
defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<Int32Regs, ".global", "32",
- ".add", atomic_load_sub_32_gen, i32imm>;
+ ".add", atomic_load_sub_32_gen>;
defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<Int32Regs, ".shared", "32", ".add",
- atomic_load_sub_32_s, i32imm>;
+ atomic_load_sub_32_s>;
defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<Int64Regs, ".shared", "64", ".add",
- atomic_load_sub_64_s, i64imm>;
+ atomic_load_sub_64_s>;
defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<Int64Regs, "", "64", ".add",
- atomic_load_sub_64_gen, i64imm>;
+ atomic_load_sub_64_gen>;
defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<Int64Regs, ".global", "64",
- ".add", atomic_load_sub_64_gen, i64imm>;
+ ".add", atomic_load_sub_64_gen>;
// atom_swap
@@ -2465,2303 +2464,1563 @@ def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt),
// texmode_independent
let IsTex = true, IsTexModeUnified = false in {
// Texture fetch instructions using handles
-def TEX_1D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
- "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
- "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
- []>;
-def TEX_1D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], $lod;",
- []>;
-def TEX_1D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_1D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}];",
- []>;
-def TEX_1D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_1D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-
-def TEX_2D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TEX_2D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_2D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+class TEX_1D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+
+multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_1D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_2D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_2D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_2D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_2D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;",
+ []>;
+
+multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_3D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_3D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_3D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_3D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_3D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+defm TEX_1D_F32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_LEVEL :
+ TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}],"
+ " \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_CUBE_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_CUBE_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_CUBE_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_CUBE_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+defm TEX_1D_F32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_S32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_U32_F32_GRAD
+ : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];",
+ []>;
+
+multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_CUBE_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_CUBE_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_CUBE_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_CUBE_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+defm TEX_1D_ARRAY_F32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_F32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_S32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_S32
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_1D_ARRAY_U32_F32
+ : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+
+multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TLD4_R_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_R_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_R_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_G_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_B_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
-def TLD4_A_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, $s, \\{$x, $y\\}];",
- []>;
+defm TEX_1D_ARRAY_F32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_LEVEL
+ : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x,
+ intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}],"
+ " \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
}
+defm TEX_1D_ARRAY_F32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_S32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_1D_ARRAY_U32_F32_GRAD
+ : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];",
+ []>;
+
+multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>;
+}
-// texmode_unified
-let IsTex = true, IsTexModeUnified = true in {
-// Texture fetch instructions using handles
-def TEX_UNIFIED_1D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$lod),
- "tex.level.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x),
- "tex.1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x),
- "tex.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
- []>;
-def TEX_UNIFIED_1D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_1D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x),
- "tex.a1d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x),
- "tex.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}];",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$lod),
- "tex.level.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], $lod;",
- []>;
-def TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$gradx, Float32Regs:$grady),
- "tex.grad.a1d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
- []>;
+defm TEX_2D_F32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_LEVEL :
+ TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+
+multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_2D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y),
- "tex.2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tex.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$lod),
- "tex.level.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_2D_F32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_S32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_U32_F32_GRAD :
+ TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_2D_ARRAY_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Int32Regs:$x,
- Int32Regs:$y),
- "tex.a2d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y),
- "tex.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}];",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y, Float32Regs:$lod),
- "tex.level.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], $lod;",
- []>;
-def TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l, Float32Regs:$x,
- Float32Regs:$y,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$grady0, Float32Regs:$grady1),
- "tex.grad.a2d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
- "\\{$grady0, $grady1\\};",
- []>;
+defm TEX_2D_ARRAY_F32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_F32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_S32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_S32
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_2D_ARRAY_U32_F32
+ : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_3D_F32_S32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.f32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_F32_F32_GRAD
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_UNIFIED_3D_S32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.s32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_S32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
-def TEX_UNIFIED_3D_U32_S32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$z),
- "tex.3d.v4.u32.s32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z),
- "tex.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_3D_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z, Float32Regs:$lod),
- "tex.level.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_3D_U32_F32_GRAD
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y,
- Float32Regs:$z,
- Float32Regs:$gradx0, Float32Regs:$gradx1,
- Float32Regs:$gradx2, Float32Regs:$grady0,
- Float32Regs:$grady1, Float32Regs:$grady2),
- "tex.grad.3d.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], "
- "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
- "\\{$grady0, $grady1, $grady2, $grady2\\};",
- []>;
+defm TEX_2D_ARRAY_F32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_LEVEL
+ : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+
+multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_CUBE_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.cube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$x, $y, $z, $z\\}], $lod;",
- []>;
+defm TEX_2D_ARRAY_F32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_S32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_2D_ARRAY_U32_F32_GRAD
+ : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_3D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
- : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
- Float32Regs:$b, Float32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.f32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.s32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z),
- "tex.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}];",
- []>;
-def TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$t, Int32Regs:$l,
- Float32Regs:$x, Float32Regs:$y, Float32Regs:$z,
- Float32Regs:$lod),
- "tex.level.acube.v4.u32.f32 \t\\{$r, $g, $b, $a\\}, "
- "[$t, \\{$l, $x, $y, $z\\}], $lod;",
- []>;
+defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def TLD4_UNIFIED_R_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_F32_F32
- : NVPTXInst<(outs Float32Regs:$v0, Float32Regs:$v1,
- Float32Regs:$v2, Float32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.f32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_R_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_S32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.s32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_R_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.r.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_G_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.g.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_B_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.b.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
-def TLD4_UNIFIED_A_2D_U32_F32
- : NVPTXInst<(outs Int32Regs:$v0, Int32Regs:$v1,
- Int32Regs:$v2, Int32Regs:$v3),
- (ins Int64Regs:$t, Float32Regs:$x, Float32Regs:$y),
- "tld4.a.2d.v4.u32.f32 \t\\{$v0, $v1, $v2, $v3\\}, "
- "[$t, \\{$x, $y\\}];",
- []>;
+defm TEX_3D_F32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_LEVEL
+ : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype :$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_3D_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
}
+defm TEX_3D_F32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_3D_S32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_3D_U32_F32_GRAD
+ : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
+defm TEX_CUBE_F32_F32
+ : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32
+ : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32
+ : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-//=== Surface load instructions
-// .clamp variant
-let IsSuld = true in {
-def SULD_1D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.clamp \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_CUBE_F32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_S32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_U32_F32_LEVEL
+ : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $z\\}];",
+ []>;
+
+multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_1D_ARRAY_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.clamp \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_CUBE_ARRAY_F32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32
+ : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+
+multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _RR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_2D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.clamp \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_CUBE_ARRAY_F32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_S32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_CUBE_ARRAY_U32_F32_LEVEL
+ : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TLD4_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag texsamp>
+ : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+ outtype:$v2, outtype:$v3),
+ !con(texsamp, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];",
+ []>;
+
+multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype> {
+ def _RR : TLD4_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, Int64Regs:$s)>;
+ def _RI : TLD4_2D_base<inst, outtype, intype,
+ (ins Int64Regs:$t, i64imm:$s)>;
+ def _IR : TLD4_2D_base<inst, outtype, intype,
+ (ins i64imm:$t, Int64Regs:$s)>;
+ def _II : TLD4_2D_base<inst, outtype, intype,
+ (ins i64imm:$t, i64imm:$s)>;
+}
-def SULD_2D_ARRAY_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.clamp \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TLD4_R_2D_F32_F32
+ : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_G_2D_F32_F32
+ : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_B_2D_F32_F32
+ : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_A_2D_F32_F32
+ : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_R_2D_S32_F32
+ : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_S32_F32
+ : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_S32_F32
+ : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_S32_F32
+ : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_R_2D_U32_F32
+ : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_G_2D_U32_F32
+ : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_B_2D_U32_F32
+ : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_A_2D_U32_F32
+ : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
-def SULD_3D_I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.clamp \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
-let IsSuld = 2 in {
-def SULD_1D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+// texmode_unified
+let IsTex = true, IsTexModeUnified = true in {
+// Texture fetch instructions using handles
-def SULD_2D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.clamp \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_F32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_S32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_S32
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_U32_F32
+ : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_V2I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_CLAMP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.clamp \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_LEVEL
+ : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 3 in {
-def SULD_1D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_1D_F32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_S32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_U32_F32_GRAD
+ : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_S32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32
+ : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x,
+ intype:$gradx, intype:$grady)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];",
+ []>;
+
+multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
+defm TEX_UNIFIED_2D_F32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_F32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_S32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_S32
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_U32_F32
+ : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;",
+ []>;
+
+multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_V4I8_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_CLAMP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_CLAMP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.clamp \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_2D_F32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_LEVEL
+ : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
+defm TEX_UNIFIED_2D_F32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_S32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_U32_F32_GRAD
+ : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-// .trap variant
-let IsSuld = true in {
-def SULD_1D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.trap \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_S32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32
+ : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y,
+ intype:$gradx0, intype:$gradx1,
+ intype:$grady0, intype:$grady1)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}],"
+ " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};",
+ []>;
+multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD
+ : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_F32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_S32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_S32
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs>;
+defm TEX_UNIFIED_3D_U32_F32
+ : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_3D_I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_LEVEL
+ : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z,
+ intype:$gradx0, intype:$gradx1,
+ intype:$gradx2, intype:$grady0,
+ intype:$grady1, intype:$grady2)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}],"
+ " \\{$gradx0, $gradx1, $gradx2, $gradx2\\},"
+ " \\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 2 in {
-def SULD_1D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
+defm TEX_UNIFIED_3D_F32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_S32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_3D_U32_F32_GRAD
+ : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>;
+}
-def SULD_1D_ARRAY_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_F32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32
+ : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_F32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_S32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_U32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)),
+ inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_2D_ARRAY_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.trap \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32
+ : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs>;
+
+class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$r, outtype:$g,
+ outtype:$b, outtype:$a),
+ !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z,
+ intype:$lod)),
+ inst # " \t\\{$r, $g, $b, $a\\},"
+ " [$t, \\{$l, $x, $y, $z\\}], $lod;",
+ []>;
+multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins Int64Regs:$t)>;
+ def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype,
+ (ins i64imm:$t)>;
+}
-def SULD_3D_V2I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_TRAP
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32",
+ Float32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32",
+ Int32Regs, Float32Regs>;
+defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL
+ : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32",
+ Int32Regs, Float32Regs>;
+
+class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype, dag tex>
+ : NVPTXInst<(outs outtype:$v0, outtype:$v1,
+ outtype:$v2, outtype:$v3),
+ !con(tex, (ins intype:$x, intype:$y)),
+ inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];",
+ []>;
+multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype,
+ NVPTXRegClass intype> {
+ def _R : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins Int64Regs:$t)>;
+ def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>;
}
-let IsSuld = 3 in {
-def SULD_1D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
+defm TLD4_UNIFIED_R_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_F32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_S32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs>;
+
+defm TLD4_UNIFIED_R_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_G_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_B_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs>;
+defm TLD4_UNIFIED_A_2D_U32_F32
+ : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs>;
-def SULD_1D_ARRAY_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+}
-def SULD_2D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+//=== Surface load instructions
-def SULD_3D_V4I8_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_TRAP
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_TRAP
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
+let IsSuld = true in {
+
+class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>;
}
-// .zero variant
-let IsSuld = true in {
-def SULD_1D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b8.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b16.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b32.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.b64.zero \\{$r\\}, [$s, \\{$x\\}];",
- []>;
+defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>;
+defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>;
+defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>;
+defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>;
+defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>;
+defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>;
+defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>;
+defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>;
+defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>;
+defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_1D_ARRAY_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b8.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b16.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b32.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.b64.zero \\{$r\\}, [$s, \\{$l, $x\\}];",
- []>;
+defm SULD_1D_ARRAY_I8_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I16_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_I32_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_I64_CLAMP
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I16_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_I32_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_I64_TRAP
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_I8_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I16_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_I32_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_I64_ZERO
+ : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>;
+
+class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b8.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b16.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b32.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.b64.zero \\{$r\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>;
+defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>;
+defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>;
+defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>;
+defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>;
+defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>;
+defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>;
+defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>;
+defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>;
+defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b8.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b16.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b32.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.b64.zero \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>;
+
+class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_3D_I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b8.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b16.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b32.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.b64.zero \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
+defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>;
+defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>;
+defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>;
+defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>;
+
+defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>;
+defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>;
+defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>;
+defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>;
+
+defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>;
+defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>;
+defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>;
+defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>;
}
let IsSuld = 2 in {
-def SULD_1D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
- []>;
+class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>;
+
+class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b8.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b16.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b32.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v2.b64.zero \\{$r, $g\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_1D_ARRAY_V2I8_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_CLAMP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_TRAP
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SULD_1D_ARRAY_V2I8_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I16_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V2I32_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>;
+defm SULD_1D_ARRAY_V2I64_ZERO
+ : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_V2I8_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_V2I16_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_V2I32_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_V2I64_CLAMP
+ : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_V2I8_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_V2I16_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_V2I32_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_V2I64_TRAP
+ : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_V2I8_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_V2I16_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_V2I32_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_V2I64_ZERO
+ : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>;
+
+class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V2I8_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_CLAMP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_TRAP
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SULD_2D_ARRAY_V2I8_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I16_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V2I32_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>;
+defm SULD_2D_ARRAY_V2I64_ZERO
+ : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>;
+
+class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>;
+defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>;
+defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>;
+defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>;
+defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>;
+defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>;
+defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>;
+defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>;
+defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>;
+defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>;
-def SULD_3D_V2I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b8.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b16.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b32.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V2I64_ZERO
- : NVPTXInst<(outs Int64Regs:$r, Int64Regs:$g),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v2.b64.zero \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
let IsSuld = 3 in {
-def SULD_1D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x),
- "suld.b.1d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
-def SULD_1D_ARRAY_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
- "suld.b.a1d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x\\}];",
- []>;
+class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b8.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b16.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
-def SULD_2D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
- "suld.b.2d.v4.b32.zero \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
- []>;
+defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>;
+
+class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];",
+ []>;
+multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
-def SULD_2D_ARRAY_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
-def SULD_2D_ARRAY_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
- "suld.b.a2d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$l, $x, $y, $y\\}];",
- []>;
+defm SULD_1D_ARRAY_V4I8_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_CLAMP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_TRAP
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SULD_1D_ARRAY_V4I8_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I16_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>;
+defm SULD_1D_ARRAY_V4I32_ZERO
+ : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>;
+
+class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_2D_ARRAY_V4I8_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_CLAMP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_TRAP
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SULD_2D_ARRAY_V4I8_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I16_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>;
+defm SULD_2D_ARRAY_V4I32_ZERO
+ : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>;
+
+class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf>
+ : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)),
+ inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> {
+ def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s)>;
+ def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>;
+}
+
+defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>;
+defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>;
+defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>;
+
+defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>;
+defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>;
+defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>;
+
+defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>;
+defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>;
+defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>;
-def SULD_3D_V4I8_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b8.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I16_ZERO
- : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b16.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
-def SULD_3D_V4I32_ZERO
- : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
- "suld.b.3d.v4.b32.zero \\{$r, $g, $b, $a\\}, "
- "[$s, \\{$x, $y, $z, $z\\}];",
- []>;
}
//-----------------------------------
@@ -4769,56 +4028,88 @@ def SULD_3D_V4I32_ZERO
//-----------------------------------
let IsSurfTexQuery = true in {
-def TXQ_CHANNEL_ORDER
+def TXQ_CHANNEL_ORDER_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_order.b32 \t$d, [$a];",
[]>;
-def TXQ_CHANNEL_DATA_TYPE
+def TXQ_CHANNEL_ORDER_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.channel_order.b32 \t$d, [$a];",
+ []>;
+def TXQ_CHANNEL_DATA_TYPE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.channel_data_type.b32 \t$d, [$a];",
[]>;
-def TXQ_WIDTH
+def TXQ_CHANNEL_DATA_TYPE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def TXQ_WIDTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.width.b32 \t$d, [$a];",
[]>;
-def TXQ_HEIGHT
+def TXQ_WIDTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.width.b32 \t$d, [$a];",
+ []>;
+def TXQ_HEIGHT_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.height.b32 \t$d, [$a];",
[]>;
-def TXQ_DEPTH
+def TXQ_HEIGHT_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.height.b32 \t$d, [$a];",
+ []>;
+def TXQ_DEPTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.depth.b32 \t$d, [$a];",
[]>;
-def TXQ_ARRAY_SIZE
+def TXQ_DEPTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.depth.b32 \t$d, [$a];",
+ []>;
+def TXQ_ARRAY_SIZE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.array_size.b32 \t$d, [$a];",
[]>;
-def TXQ_NUM_SAMPLES
+def TXQ_ARRAY_SIZE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.array_size.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_SAMPLES_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.num_samples.b32 \t$d, [$a];",
[]>;
-def TXQ_NUM_MIPMAP_LEVELS
+def TXQ_NUM_SAMPLES_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.num_samples.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_MIPMAP_LEVELS_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"txq.num_mipmap_levels.b32 \t$d, [$a];",
[]>;
+def TXQ_NUM_MIPMAP_LEVELS_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "txq.num_mipmap_levels.b32 \t$d, [$a];",
+ []>;
}
def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
- (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
+ (TXQ_CHANNEL_ORDER_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
- (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+ (TXQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_width Int64Regs:$a),
- (TXQ_WIDTH Int64Regs:$a)>;
+ (TXQ_WIDTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_height Int64Regs:$a),
- (TXQ_HEIGHT Int64Regs:$a)>;
+ (TXQ_HEIGHT_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
- (TXQ_DEPTH Int64Regs:$a)>;
+ (TXQ_DEPTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
- (TXQ_ARRAY_SIZE Int64Regs:$a)>;
+ (TXQ_ARRAY_SIZE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
- (TXQ_NUM_SAMPLES Int64Regs:$a)>;
+ (TXQ_NUM_SAMPLES_R Int64Regs:$a)>;
def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
- (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
+ (TXQ_NUM_MIPMAP_LEVELS_R Int64Regs:$a)>;
//-----------------------------------
@@ -4826,44 +4117,68 @@ def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
//-----------------------------------
let IsSurfTexQuery = true in {
-def SUQ_CHANNEL_ORDER
+def SUQ_CHANNEL_ORDER_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_order.b32 \t$d, [$a];",
[]>;
-def SUQ_CHANNEL_DATA_TYPE
+def SUQ_CHANNEL_ORDER_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.channel_order.b32 \t$d, [$a];",
+ []>;
+def SUQ_CHANNEL_DATA_TYPE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.channel_data_type.b32 \t$d, [$a];",
[]>;
-def SUQ_WIDTH
+def SUQ_CHANNEL_DATA_TYPE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def SUQ_WIDTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.width.b32 \t$d, [$a];",
[]>;
-def SUQ_HEIGHT
+def SUQ_WIDTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.width.b32 \t$d, [$a];",
+ []>;
+def SUQ_HEIGHT_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.height.b32 \t$d, [$a];",
[]>;
-def SUQ_DEPTH
+def SUQ_HEIGHT_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.height.b32 \t$d, [$a];",
+ []>;
+def SUQ_DEPTH_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.depth.b32 \t$d, [$a];",
[]>;
-def SUQ_ARRAY_SIZE
+def SUQ_DEPTH_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.depth.b32 \t$d, [$a];",
+ []>;
+def SUQ_ARRAY_SIZE_R
: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"suq.array_size.b32 \t$d, [$a];",
[]>;
+def SUQ_ARRAY_SIZE_I
+ : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a),
+ "suq.array_size.b32 \t$d, [$a];",
+ []>;
}
def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
- (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
+ (SUQ_CHANNEL_ORDER_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
- (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+ (SUQ_CHANNEL_DATA_TYPE_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_width Int64Regs:$a),
- (SUQ_WIDTH Int64Regs:$a)>;
+ (SUQ_WIDTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_height Int64Regs:$a),
- (SUQ_HEIGHT Int64Regs:$a)>;
+ (SUQ_HEIGHT_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
- (SUQ_DEPTH Int64Regs:$a)>;
+ (SUQ_DEPTH_R Int64Regs:$a)>;
def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
- (SUQ_ARRAY_SIZE Int64Regs:$a)>;
+ (SUQ_ARRAY_SIZE_R Int64Regs:$a)>;
//===- Handle Query -------------------------------------------------------===//
@@ -4885,1329 +4200,522 @@ def ISTYPEP_TEXTURE
//===- Surface Stores -----------------------------------------------------===//
let IsSust = true in {
-// Unformatted
-// .clamp variant
-def SUST_B_1D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.clamp \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.clamp \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.clamp \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_1D_ARRAY_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.clamp \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.clamp \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.clamp \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.clamp \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_ARRAY_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.clamp \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_3D_B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_CLAMP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.clamp \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-// .trap variant
-def SUST_B_1D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_1D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_2D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-def SUST_B_3D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-
-
-// .zero variant
-def SUST_B_1D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b8.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.b.1d.b16.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.b.1d.b32.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- "sust.b.1d.b64.zero \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b8.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.b.1d.v2.b16.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.b.1d.v2.b32.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- "sust.b.1d.v2.b64.zero \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b8.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.b.1d.v4.b16.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.b.1d.v4.b32.zero \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
+class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r)),
+ inst # " \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+multiclass SUST_1D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_1D_ARRAY_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.b.a1d.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.b.a1d.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r),
- "sust.b.a1d.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b8.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.a1d.v2.b16.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.a1d.v2.b32.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.a1d.v2.b64.zero \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b8.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a1d.v4.b16.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_1D_ARRAY_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a1d.v4.b32.zero \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>;
+
+class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g,
+ intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_2D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b8.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.b.2d.b16.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.b.2d.b32.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- "sust.b.2d.b64.zero \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b8.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.b.2d.v2.b16.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.b.2d.v2.b32.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
- Int64Regs:$g),
- "sust.b.2d.v2.b64.zero \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_B_2D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b8.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.2d.v4.b16.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.2d.v4.b32.zero \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_ARRAY_B8_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_CLAMP
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_TRAP
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_B8_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B16_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_B32_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_B64_ZERO
+ : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_B8_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B16_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_B32_TRAP
+ : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_2D_ARRAY_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.b.a2d.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.b.a2d.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r),
- "sust.b.a2d.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.a2d.v2.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.a2d.v2.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.a2d.v2.b64.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b8.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.a2d.v4.b16.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_2D_ARRAY_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.a2d.v4.b32.zero \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_1D_ARRAY_V2B8_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_CLAMP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_TRAP
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_1D_ARRAY_V2B8_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B16_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V2B32_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>;
+defm SUST_B_1D_ARRAY_V2B64_ZERO
+ : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_1D_ARRAY_V2B8_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B16_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V2B32_TRAP
+ : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>;
+
+class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_1D_ARRAY_V4B8_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_CLAMP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_TRAP
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_1D_ARRAY_V4B8_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B16_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>;
+defm SUST_B_1D_ARRAY_V4B32_ZERO
+ : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_1D_ARRAY_V4B8_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B16_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>;
+defm SUST_P_1D_ARRAY_V4B32_TRAP
+ : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+multiclass SUST_2D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_B_3D_B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.b.3d.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.b.3d.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r),
- "sust.b.3d.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_B_3D_V2B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.b.3d.v2.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.b.3d.v2.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V2B64_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int64Regs:$r, Int64Regs:$g),
- "sust.b.3d.v2.b64.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_B_3D_V4B8_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b8.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B16_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.b.3d.v4.b16.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_B_3D_V4B32_ZERO
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.b.3d.v4.b32.zero \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>;
+
+class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>;
+}
-// Formatted
+defm SUST_B_2D_ARRAY_B8_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_CLAMP
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_TRAP
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_B8_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B16_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_B32_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_B64_ZERO
+ : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_B8_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B16_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_B32_TRAP
+ : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_1D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
- Int16Regs:$b, Int16Regs:$a),
- "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
- Int32Regs:$b, Int32Regs:$a),
- "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_2D_ARRAY_V2B8_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_CLAMP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_TRAP
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_2D_ARRAY_V2B8_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B16_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V2B32_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>;
+defm SUST_B_2D_ARRAY_V2B64_ZERO
+ : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_2D_ARRAY_V2B8_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B16_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V2B32_TRAP
+ : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>;
+
+class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_2D_ARRAY_V4B8_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_CLAMP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_TRAP
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>;
+
+defm SUST_B_2D_ARRAY_V4B8_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B16_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>;
+defm SUST_B_2D_ARRAY_V4B32_ZERO
+ : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>;
+
+defm SUST_P_2D_ARRAY_V4B8_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B16_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>;
+defm SUST_P_2D_ARRAY_V4B32_TRAP
+ : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>;
+
+class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+multiclass SUST_3D<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_1D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
- "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
- "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g),
- "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_1D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>;
+defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>;
+defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>;
+defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>;
+defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>;
+defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>;
+defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>;
+defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>;
+defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>;
+defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>;
+defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>;
+defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>;
+
+class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r, intype:$g)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};",
+ []>;
+multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>;
+}
+defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>;
+
+defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>;
+defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>;
+defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>;
+defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>;
+
+defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>;
+defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>;
+defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>;
+defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>;
+
+defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>;
+defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>;
+defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>;
+
+class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf>
+ : NVPTXInst<(outs),
+ !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ intype:$r, intype:$g, intype:$b, intype:$a)),
+ inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> {
+ def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>;
+ def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>;
+}
-def SUST_P_2D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g),
- "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g),
- "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
- []>;
-def SUST_P_2D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
- Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
- Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>;
+defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>;
+defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>;
+defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>;
+defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>;
+defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>;
-def SUST_P_2D_ARRAY_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r),
- "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r),
- "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g),
- "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_2D_ARRAY_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
+defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>;
+defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>;
+defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>;
+defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>;
+defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>;
+defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>;
-def SUST_P_3D_B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r),
- "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r),
- "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
- []>;
-def SUST_P_3D_V2B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V2B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g),
- "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V2B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g),
- "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g\\};",
- []>;
-def SUST_P_3D_V4B8_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_3D_V4B16_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
-def SUST_P_3D_V4B32_TRAP
- : NVPTXInst<(outs),
- (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
- Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
- "\\{$r, $g, $b, $a\\};",
- []>;
}
// Surface store instruction patterns
@@ -6217,248 +4725,248 @@ def SUST_P_3D_V4B32_TRAP
// .clamp variant
def : Pat<(int_nvvm_sust_b_1d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_CLAMP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_CLAMP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_CLAMP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6467,77 +4975,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp
def : Pat<(int_nvvm_sust_b_3d_i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_CLAMP Int64Regs:$s,
+ (SUST_B_3D_B64_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_CLAMP Int64Regs:$s,
+ (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6545,248 +5053,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp
// .trap variant
def : Pat<(int_nvvm_sust_b_1d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6795,77 +5303,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
def : Pat<(int_nvvm_sust_b_3d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_TRAP Int64Regs:$s,
+ (SUST_B_3D_B64_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_TRAP Int64Regs:$s,
+ (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
+ (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -6873,248 +5381,248 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
// .zero variant
def : Pat<(int_nvvm_sust_b_1d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
+ (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_v4i8_zero
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i16_zero
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_v4i32_zero
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x,
+ (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_B_1D_ARRAY_B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_B_1D_ARRAY_B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r),
- (SUST_B_1D_ARRAY_B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_1D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_1D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_1D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B8_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_1D_ARRAY_V4B16_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_1D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_V2B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_B_2D_V2B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g),
- (SUST_B_2D_V2B64_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_v4i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B8_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_V4B16_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_v4i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_V4B32_ZERO Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B8_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_B_2D_ARRAY_B16_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_B_2D_ARRAY_B32_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r),
- (SUST_B_2D_ARRAY_B64_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B8_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_2D_ARRAY_V2B16_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_B_2D_ARRAY_V2B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r,
Int64Regs:$g),
- (SUST_B_2D_ARRAY_V2B64_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B8_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_2D_ARRAY_V4B16_ZERO Int64Regs:$s,
+ (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_2D_ARRAY_V4B32_ZERO Int64Regs:$s, Int32Regs:$l,
+ (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7123,77 +5631,77 @@ def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero
def : Pat<(int_nvvm_sust_b_3d_i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_B_3D_B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_B_3D_B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r),
- (SUST_B_3D_B64_ZERO Int64Regs:$s,
+ (SUST_B_3D_B64_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r)>;
def : Pat<(int_nvvm_sust_b_3d_v2i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_B_3D_V2B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_B_3D_V2B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v2i64_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g),
- (SUST_B_3D_V2B64_ZERO Int64Regs:$s,
+ (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int64Regs:$r, Int64Regs:$g)>;
def : Pat<(int_nvvm_sust_b_3d_v4i8_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B8_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i16_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_B_3D_V4B16_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_B_3D_V4B32_ZERO Int64Regs:$s,
+ (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7202,207 +5710,207 @@ def : Pat<(int_nvvm_sust_b_3d_v4i32_zero
def : Pat<(int_nvvm_sust_p_1d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+ (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
- (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+ (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
- (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
- (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
- (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
- (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
- (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
Int32Regs:$g),
- (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l,
Int32Regs:$x, Int32Regs:$y,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7411,63 +5919,63 @@ def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
def : Pat<(int_nvvm_sust_p_3d_i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_P_3D_B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r),
- (SUST_P_3D_B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r),
- (SUST_P_3D_B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r)>;
def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g),
- (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g),
- (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g)>;
def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
- (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
- (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
+ (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s,
Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
@@ -7578,6 +6086,7 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(ptx_elt_type, "bf16") : Int32Regs,
!eq(ptx_elt_type, "tf32") : Int32Regs,
!eq(ptx_elt_type, "s32") : Int32Regs,
+ !eq(ptx_elt_type, "b16") : Int32Regs,
!eq(ptx_elt_type, "s8") : Int32Regs,
!eq(ptx_elt_type, "u8") : Int32Regs,
!eq(ptx_elt_type, "s4") : Int32Regs,
@@ -7661,7 +6170,11 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(geom, "m16n8k64"),
!eq(geom, "m8n8k128"),
!eq(geom, "m16n8k128"),
- !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70]);
+ !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
+
+ !and(!eq(op,"ldmatrix"),
+ !eq(ptx_elt_type,"b16"),
+ !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
// template DAGs for instruction inputs/output.
dag Outs = !dag(outs, ptx_regs, reg_names);
@@ -7910,6 +6423,44 @@ defset list<WMMA_INSTR> MMAs = {
} // layout_a
} // defset
+//
+// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16
+//
+class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space,
+ DAGOperand SrcOp>
+ : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>,
+ Requires<Frag.Predicates> {
+ // Build PatFrag that only matches particular address space.
+ PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src),
+ !cond(!eq(Space, ".shared"): AS_match.shared,
+ true: AS_match.generic)>;
+ // Build AS-constrained pattern.
+ let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
+
+ let OutOperandList = Frag.Outs;
+ let InOperandList = !con(Args, (ins MmaCode:$ptx));
+ let AsmString = "ldmatrix.sync.aligned."
+ # Frag.geom
+ # "." # Frag.frag
+ # !if(Transposed, ".trans", "")
+ # Space
+ # "." # Frag.ptx_elt_type
+ # " " # Frag.regstring # ", [$src];";
+}
+
+// Create all ldmatrix variants
+defset list<WMMA_INSTR> LDMATRIXs = {
+ foreach transposed = [false, true] in {
+ foreach space = [".shared", ""] in {
+ foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
+ foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in
+ if NVVM_LDMATRIX_SUPPORTED<frag>.ret then
+ def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space,
+ addr>;
+ } // addr
+ } // space
+ } // transposed
+} // defset
// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
@@ -7921,5 +6472,5 @@ class MMA_PAT<WMMA_INSTR wi>
Requires<wi.Predicates>;
// Build intrinsic->instruction patterns for all MMA instructions.
-foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs) in
+foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
def : MMA_PAT<mma>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 1bd02552b666..369238436083 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -78,15 +78,12 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
new AddrSpaceCastInst(NewASCToLocal, GenericAddrTy, "");
NewASCToLocal->insertAfter(allocaInst);
NewASCToGeneric->insertAfter(NewASCToLocal);
- for (Value::use_iterator UI = allocaInst->use_begin(),
- UE = allocaInst->use_end();
- UI != UE;) {
+ for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
// addrspacecast to NVPTXInferAddressSpaces. For other types
// of instructions this is unnecessary and may introduce redundant
// address cast.
- const auto &AllocaUse = *UI++;
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
if (LI && LI->getPointerOperand() == allocaInst &&
!LI->isVolatile()) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
index 5e6411c61eab..1f3b4c9440d8 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -21,17 +21,19 @@
// This peephole pass optimizes these cases, for example
//
// It will transform the following pattern
-// %0 = LEA_ADDRi64 %VRFrame, 4
+// %0 = LEA_ADDRi64 %VRFrame64, 4
// %1 = cvta_to_local_yes_64 %0
//
// into
-// %1 = LEA_ADDRi64 %VRFrameLocal, 4
+// %1 = LEA_ADDRi64 %VRFrameLocal64, 4
//
-// %VRFrameLocal is the virtual register name of %SPL
+// %VRFrameLocal64 is the virtual register name of %SPL
//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
+#include "NVPTXRegisterInfo.h"
+#include "NVPTXSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -92,9 +94,12 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
return false;
}
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
// Check the LEA_ADDRi operand is Frame index
auto &BaseAddrOp = GenericAddrDef->getOperand(1);
- if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
+ if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
return true;
}
@@ -108,10 +113,13 @@ static void CombineCVTAToLocal(MachineInstr &Root) {
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
MachineInstrBuilder MIB =
BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
Root.getOperand(0).getReg())
- .addReg(NVPTX::VRFrameLocal)
+ .addReg(NRI->getFrameLocalRegister(MF))
.add(Prev.getOperand(2));
MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
@@ -142,10 +150,13 @@ bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
} // Instruction
} // Basic Block
+ const NVPTXRegisterInfo *NRI =
+ MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
+
// Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
const auto &MRI = MF.getRegInfo();
- if (MRI.use_empty(NVPTX::VRFrame)) {
- if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
+ if (MRI.use_empty(NRI->getFrameRegister(MF))) {
+ if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
MI->eraseFromParentAndMarkDBGValuesForRemoval();
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 8e2299e65222..16fbe1a65562 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -74,7 +74,6 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
auto Offset =
TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
Op.ChangeToRegister(Reg, /*isDef=*/false);
- Op.setIsDebug();
const DIExpression *DIExpr = MI.getDebugExpression();
if (MI.isNonListDebugValue()) {
DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 5cdec0925b26..ec7307265bca 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -13,6 +13,7 @@
#include "NVPTXRegisterInfo.h"
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
+#include "NVPTXTargetMachine.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -106,6 +107,14 @@ NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
+ for (unsigned Reg = NVPTX::ENVREG0; Reg <= NVPTX::ENVREG31; ++Reg) {
+ markSuperRegs(Reserved, Reg);
+ }
+ markSuperRegs(Reserved, NVPTX::VRFrame32);
+ markSuperRegs(Reserved, NVPTX::VRFrameLocal32);
+ markSuperRegs(Reserved, NVPTX::VRFrame64);
+ markSuperRegs(Reserved, NVPTX::VRFrameLocal64);
+ markSuperRegs(Reserved, NVPTX::VRDepot);
return Reserved;
}
@@ -122,10 +131,19 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
- MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return NVPTX::VRFrame;
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget());
+ return TM.is64Bit() ? NVPTX::VRFrame64 : NVPTX::VRFrame32;
+}
+
+Register
+NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const {
+ const NVPTXTargetMachine &TM =
+ static_cast<const NVPTXTargetMachine &>(MF.getTarget());
+ return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 9ef6940daf86..c6dd647f4637 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -43,6 +43,7 @@ public:
RegScavenger *RS = nullptr) const override;
Register getFrameRegister(const MachineFunction &MF) const override;
+ Register getFrameLocalRegister(const MachineFunction &MF) const;
ManagedStringPool *getStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 19895a20bacf..162167e8720d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -22,8 +22,10 @@ class NVPTXRegClass<list<ValueType> regTypes, int alignment, dag regList>
//===----------------------------------------------------------------------===//
// Special Registers used as stack pointer
-def VRFrame : NVPTXReg<"%SP">;
-def VRFrameLocal : NVPTXReg<"%SPL">;
+def VRFrame32 : NVPTXReg<"%SP">;
+def VRFrame64 : NVPTXReg<"%SP">;
+def VRFrameLocal32 : NVPTXReg<"%SPL">;
+def VRFrameLocal64 : NVPTXReg<"%SPL">;
// Special Registers used as the stack
def VRDepot : NVPTXReg<"%Depot">;
@@ -56,8 +58,8 @@ foreach i = 0...31 in {
//===----------------------------------------------------------------------===//
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>;
-def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4))>;
-def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4))>;
+def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>;
+def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
def Float16Regs : NVPTXRegClass<[f16], 16, (add (sequence "H%u", 0, 4))>;
def Float16x2Regs : NVPTXRegClass<[v2f16], 32, (add (sequence "HH%u", 0, 4))>;
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
@@ -68,5 +70,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>;
def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>;
// Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used.
-def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRFrameLocal, VRDepot,
+def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame32, VRFrameLocal32, VRDepot,
(sequence "ENVREG%u", 0, 31))>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 8ae542130a14..e404cead344b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -41,7 +41,7 @@ public:
}
private:
bool processInstr(MachineInstr &MI);
- void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
+ bool replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
bool findIndexForHandle(MachineOperand &Op, MachineFunction &MF,
unsigned &Idx);
};
@@ -76,19 +76,1675 @@ bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+static unsigned suldRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::SULD_1D_I8_CLAMP_R:
+ return NVPTX::SULD_1D_I8_CLAMP_I;
+ case NVPTX::SULD_1D_I16_CLAMP_R:
+ return NVPTX::SULD_1D_I16_CLAMP_I;
+ case NVPTX::SULD_1D_I32_CLAMP_R:
+ return NVPTX::SULD_1D_I32_CLAMP_I;
+ case NVPTX::SULD_1D_I64_CLAMP_R:
+ return NVPTX::SULD_1D_I64_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I8_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I8_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I16_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I16_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I32_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I32_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_I64_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_I64_CLAMP_I;
+ case NVPTX::SULD_2D_I8_CLAMP_R:
+ return NVPTX::SULD_2D_I8_CLAMP_I;
+ case NVPTX::SULD_2D_I16_CLAMP_R:
+ return NVPTX::SULD_2D_I16_CLAMP_I;
+ case NVPTX::SULD_2D_I32_CLAMP_R:
+ return NVPTX::SULD_2D_I32_CLAMP_I;
+ case NVPTX::SULD_2D_I64_CLAMP_R:
+ return NVPTX::SULD_2D_I64_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I8_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I8_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I16_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I16_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I32_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I32_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_I64_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_I64_CLAMP_I;
+ case NVPTX::SULD_3D_I8_CLAMP_R:
+ return NVPTX::SULD_3D_I8_CLAMP_I;
+ case NVPTX::SULD_3D_I16_CLAMP_R:
+ return NVPTX::SULD_3D_I16_CLAMP_I;
+ case NVPTX::SULD_3D_I32_CLAMP_R:
+ return NVPTX::SULD_3D_I32_CLAMP_I;
+ case NVPTX::SULD_3D_I64_CLAMP_R:
+ return NVPTX::SULD_3D_I64_CLAMP_I;
+ case NVPTX::SULD_1D_V2I8_CLAMP_R:
+ return NVPTX::SULD_1D_V2I8_CLAMP_I;
+ case NVPTX::SULD_1D_V2I16_CLAMP_R:
+ return NVPTX::SULD_1D_V2I16_CLAMP_I;
+ case NVPTX::SULD_1D_V2I32_CLAMP_R:
+ return NVPTX::SULD_1D_V2I32_CLAMP_I;
+ case NVPTX::SULD_1D_V2I64_CLAMP_R:
+ return NVPTX::SULD_1D_V2I64_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I8_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I16_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I32_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I64_CLAMP_I;
+ case NVPTX::SULD_2D_V2I8_CLAMP_R:
+ return NVPTX::SULD_2D_V2I8_CLAMP_I;
+ case NVPTX::SULD_2D_V2I16_CLAMP_R:
+ return NVPTX::SULD_2D_V2I16_CLAMP_I;
+ case NVPTX::SULD_2D_V2I32_CLAMP_R:
+ return NVPTX::SULD_2D_V2I32_CLAMP_I;
+ case NVPTX::SULD_2D_V2I64_CLAMP_R:
+ return NVPTX::SULD_2D_V2I64_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I8_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I16_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I32_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I64_CLAMP_I;
+ case NVPTX::SULD_3D_V2I8_CLAMP_R:
+ return NVPTX::SULD_3D_V2I8_CLAMP_I;
+ case NVPTX::SULD_3D_V2I16_CLAMP_R:
+ return NVPTX::SULD_3D_V2I16_CLAMP_I;
+ case NVPTX::SULD_3D_V2I32_CLAMP_R:
+ return NVPTX::SULD_3D_V2I32_CLAMP_I;
+ case NVPTX::SULD_3D_V2I64_CLAMP_R:
+ return NVPTX::SULD_3D_V2I64_CLAMP_I;
+ case NVPTX::SULD_1D_V4I8_CLAMP_R:
+ return NVPTX::SULD_1D_V4I8_CLAMP_I;
+ case NVPTX::SULD_1D_V4I16_CLAMP_R:
+ return NVPTX::SULD_1D_V4I16_CLAMP_I;
+ case NVPTX::SULD_1D_V4I32_CLAMP_R:
+ return NVPTX::SULD_1D_V4I32_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I8_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I16_CLAMP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I32_CLAMP_I;
+ case NVPTX::SULD_2D_V4I8_CLAMP_R:
+ return NVPTX::SULD_2D_V4I8_CLAMP_I;
+ case NVPTX::SULD_2D_V4I16_CLAMP_R:
+ return NVPTX::SULD_2D_V4I16_CLAMP_I;
+ case NVPTX::SULD_2D_V4I32_CLAMP_R:
+ return NVPTX::SULD_2D_V4I32_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I8_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I16_CLAMP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I32_CLAMP_I;
+ case NVPTX::SULD_3D_V4I8_CLAMP_R:
+ return NVPTX::SULD_3D_V4I8_CLAMP_I;
+ case NVPTX::SULD_3D_V4I16_CLAMP_R:
+ return NVPTX::SULD_3D_V4I16_CLAMP_I;
+ case NVPTX::SULD_3D_V4I32_CLAMP_R:
+ return NVPTX::SULD_3D_V4I32_CLAMP_I;
+ case NVPTX::SULD_1D_I8_TRAP_R:
+ return NVPTX::SULD_1D_I8_TRAP_I;
+ case NVPTX::SULD_1D_I16_TRAP_R:
+ return NVPTX::SULD_1D_I16_TRAP_I;
+ case NVPTX::SULD_1D_I32_TRAP_R:
+ return NVPTX::SULD_1D_I32_TRAP_I;
+ case NVPTX::SULD_1D_I64_TRAP_R:
+ return NVPTX::SULD_1D_I64_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I8_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I8_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I16_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I16_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I32_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I32_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_I64_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_I64_TRAP_I;
+ case NVPTX::SULD_2D_I8_TRAP_R:
+ return NVPTX::SULD_2D_I8_TRAP_I;
+ case NVPTX::SULD_2D_I16_TRAP_R:
+ return NVPTX::SULD_2D_I16_TRAP_I;
+ case NVPTX::SULD_2D_I32_TRAP_R:
+ return NVPTX::SULD_2D_I32_TRAP_I;
+ case NVPTX::SULD_2D_I64_TRAP_R:
+ return NVPTX::SULD_2D_I64_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I8_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I8_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I16_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I16_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I32_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I32_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_I64_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_I64_TRAP_I;
+ case NVPTX::SULD_3D_I8_TRAP_R:
+ return NVPTX::SULD_3D_I8_TRAP_I;
+ case NVPTX::SULD_3D_I16_TRAP_R:
+ return NVPTX::SULD_3D_I16_TRAP_I;
+ case NVPTX::SULD_3D_I32_TRAP_R:
+ return NVPTX::SULD_3D_I32_TRAP_I;
+ case NVPTX::SULD_3D_I64_TRAP_R:
+ return NVPTX::SULD_3D_I64_TRAP_I;
+ case NVPTX::SULD_1D_V2I8_TRAP_R:
+ return NVPTX::SULD_1D_V2I8_TRAP_I;
+ case NVPTX::SULD_1D_V2I16_TRAP_R:
+ return NVPTX::SULD_1D_V2I16_TRAP_I;
+ case NVPTX::SULD_1D_V2I32_TRAP_R:
+ return NVPTX::SULD_1D_V2I32_TRAP_I;
+ case NVPTX::SULD_1D_V2I64_TRAP_R:
+ return NVPTX::SULD_1D_V2I64_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I8_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I8_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I16_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I16_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I32_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I32_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V2I64_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V2I64_TRAP_I;
+ case NVPTX::SULD_2D_V2I8_TRAP_R:
+ return NVPTX::SULD_2D_V2I8_TRAP_I;
+ case NVPTX::SULD_2D_V2I16_TRAP_R:
+ return NVPTX::SULD_2D_V2I16_TRAP_I;
+ case NVPTX::SULD_2D_V2I32_TRAP_R:
+ return NVPTX::SULD_2D_V2I32_TRAP_I;
+ case NVPTX::SULD_2D_V2I64_TRAP_R:
+ return NVPTX::SULD_2D_V2I64_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I8_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I8_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I16_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I16_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I32_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I32_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V2I64_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V2I64_TRAP_I;
+ case NVPTX::SULD_3D_V2I8_TRAP_R:
+ return NVPTX::SULD_3D_V2I8_TRAP_I;
+ case NVPTX::SULD_3D_V2I16_TRAP_R:
+ return NVPTX::SULD_3D_V2I16_TRAP_I;
+ case NVPTX::SULD_3D_V2I32_TRAP_R:
+ return NVPTX::SULD_3D_V2I32_TRAP_I;
+ case NVPTX::SULD_3D_V2I64_TRAP_R:
+ return NVPTX::SULD_3D_V2I64_TRAP_I;
+ case NVPTX::SULD_1D_V4I8_TRAP_R:
+ return NVPTX::SULD_1D_V4I8_TRAP_I;
+ case NVPTX::SULD_1D_V4I16_TRAP_R:
+ return NVPTX::SULD_1D_V4I16_TRAP_I;
+ case NVPTX::SULD_1D_V4I32_TRAP_R:
+ return NVPTX::SULD_1D_V4I32_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I8_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I8_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I16_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I16_TRAP_I;
+ case NVPTX::SULD_1D_ARRAY_V4I32_TRAP_R:
+ return NVPTX::SULD_1D_ARRAY_V4I32_TRAP_I;
+ case NVPTX::SULD_2D_V4I8_TRAP_R:
+ return NVPTX::SULD_2D_V4I8_TRAP_I;
+ case NVPTX::SULD_2D_V4I16_TRAP_R:
+ return NVPTX::SULD_2D_V4I16_TRAP_I;
+ case NVPTX::SULD_2D_V4I32_TRAP_R:
+ return NVPTX::SULD_2D_V4I32_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I8_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I8_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I16_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I16_TRAP_I;
+ case NVPTX::SULD_2D_ARRAY_V4I32_TRAP_R:
+ return NVPTX::SULD_2D_ARRAY_V4I32_TRAP_I;
+ case NVPTX::SULD_3D_V4I8_TRAP_R:
+ return NVPTX::SULD_3D_V4I8_TRAP_I;
+ case NVPTX::SULD_3D_V4I16_TRAP_R:
+ return NVPTX::SULD_3D_V4I16_TRAP_I;
+ case NVPTX::SULD_3D_V4I32_TRAP_R:
+ return NVPTX::SULD_3D_V4I32_TRAP_I;
+ case NVPTX::SULD_1D_I8_ZERO_R:
+ return NVPTX::SULD_1D_I8_ZERO_I;
+ case NVPTX::SULD_1D_I16_ZERO_R:
+ return NVPTX::SULD_1D_I16_ZERO_I;
+ case NVPTX::SULD_1D_I32_ZERO_R:
+ return NVPTX::SULD_1D_I32_ZERO_I;
+ case NVPTX::SULD_1D_I64_ZERO_R:
+ return NVPTX::SULD_1D_I64_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I8_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I8_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I16_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I16_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I32_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I32_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_I64_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_I64_ZERO_I;
+ case NVPTX::SULD_2D_I8_ZERO_R:
+ return NVPTX::SULD_2D_I8_ZERO_I;
+ case NVPTX::SULD_2D_I16_ZERO_R:
+ return NVPTX::SULD_2D_I16_ZERO_I;
+ case NVPTX::SULD_2D_I32_ZERO_R:
+ return NVPTX::SULD_2D_I32_ZERO_I;
+ case NVPTX::SULD_2D_I64_ZERO_R:
+ return NVPTX::SULD_2D_I64_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I8_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I8_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I16_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I16_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I32_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I32_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_I64_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_I64_ZERO_I;
+ case NVPTX::SULD_3D_I8_ZERO_R:
+ return NVPTX::SULD_3D_I8_ZERO_I;
+ case NVPTX::SULD_3D_I16_ZERO_R:
+ return NVPTX::SULD_3D_I16_ZERO_I;
+ case NVPTX::SULD_3D_I32_ZERO_R:
+ return NVPTX::SULD_3D_I32_ZERO_I;
+ case NVPTX::SULD_3D_I64_ZERO_R:
+ return NVPTX::SULD_3D_I64_ZERO_I;
+ case NVPTX::SULD_1D_V2I8_ZERO_R:
+ return NVPTX::SULD_1D_V2I8_ZERO_I;
+ case NVPTX::SULD_1D_V2I16_ZERO_R:
+ return NVPTX::SULD_1D_V2I16_ZERO_I;
+ case NVPTX::SULD_1D_V2I32_ZERO_R:
+ return NVPTX::SULD_1D_V2I32_ZERO_I;
+ case NVPTX::SULD_1D_V2I64_ZERO_R:
+ return NVPTX::SULD_1D_V2I64_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I8_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I8_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I16_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I16_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I32_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I32_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V2I64_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V2I64_ZERO_I;
+ case NVPTX::SULD_2D_V2I8_ZERO_R:
+ return NVPTX::SULD_2D_V2I8_ZERO_I;
+ case NVPTX::SULD_2D_V2I16_ZERO_R:
+ return NVPTX::SULD_2D_V2I16_ZERO_I;
+ case NVPTX::SULD_2D_V2I32_ZERO_R:
+ return NVPTX::SULD_2D_V2I32_ZERO_I;
+ case NVPTX::SULD_2D_V2I64_ZERO_R:
+ return NVPTX::SULD_2D_V2I64_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I8_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I8_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I16_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I16_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I32_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I32_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V2I64_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V2I64_ZERO_I;
+ case NVPTX::SULD_3D_V2I8_ZERO_R:
+ return NVPTX::SULD_3D_V2I8_ZERO_I;
+ case NVPTX::SULD_3D_V2I16_ZERO_R:
+ return NVPTX::SULD_3D_V2I16_ZERO_I;
+ case NVPTX::SULD_3D_V2I32_ZERO_R:
+ return NVPTX::SULD_3D_V2I32_ZERO_I;
+ case NVPTX::SULD_3D_V2I64_ZERO_R:
+ return NVPTX::SULD_3D_V2I64_ZERO_I;
+ case NVPTX::SULD_1D_V4I8_ZERO_R:
+ return NVPTX::SULD_1D_V4I8_ZERO_I;
+ case NVPTX::SULD_1D_V4I16_ZERO_R:
+ return NVPTX::SULD_1D_V4I16_ZERO_I;
+ case NVPTX::SULD_1D_V4I32_ZERO_R:
+ return NVPTX::SULD_1D_V4I32_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V4I8_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V4I8_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V4I16_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V4I16_ZERO_I;
+ case NVPTX::SULD_1D_ARRAY_V4I32_ZERO_R:
+ return NVPTX::SULD_1D_ARRAY_V4I32_ZERO_I;
+ case NVPTX::SULD_2D_V4I8_ZERO_R:
+ return NVPTX::SULD_2D_V4I8_ZERO_I;
+ case NVPTX::SULD_2D_V4I16_ZERO_R:
+ return NVPTX::SULD_2D_V4I16_ZERO_I;
+ case NVPTX::SULD_2D_V4I32_ZERO_R:
+ return NVPTX::SULD_2D_V4I32_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V4I8_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V4I8_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V4I16_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V4I16_ZERO_I;
+ case NVPTX::SULD_2D_ARRAY_V4I32_ZERO_R:
+ return NVPTX::SULD_2D_ARRAY_V4I32_ZERO_I;
+ case NVPTX::SULD_3D_V4I8_ZERO_R:
+ return NVPTX::SULD_3D_V4I8_ZERO_I;
+ case NVPTX::SULD_3D_V4I16_ZERO_R:
+ return NVPTX::SULD_3D_V4I16_ZERO_I;
+ case NVPTX::SULD_3D_V4I32_ZERO_R:
+ return NVPTX::SULD_3D_V4I32_ZERO_I;
+ default:
+ llvm_unreachable("Unhandled SULD opcode");
+ }
+}
+
+static unsigned sustRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::SUST_B_1D_B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_3D_B8_CLAMP_R:
+ return NVPTX::SUST_B_3D_B8_CLAMP_I;
+ case NVPTX::SUST_B_3D_B16_CLAMP_R:
+ return NVPTX::SUST_B_3D_B16_CLAMP_I;
+ case NVPTX::SUST_B_3D_B32_CLAMP_R:
+ return NVPTX::SUST_B_3D_B32_CLAMP_I;
+ case NVPTX::SUST_B_3D_B64_CLAMP_R:
+ return NVPTX::SUST_B_3D_B64_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B8_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B8_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B16_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B16_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B32_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B32_CLAMP_I;
+ case NVPTX::SUST_B_3D_V2B64_CLAMP_R:
+ return NVPTX::SUST_B_3D_V2B64_CLAMP_I;
+ case NVPTX::SUST_B_3D_V4B8_CLAMP_R:
+ return NVPTX::SUST_B_3D_V4B8_CLAMP_I;
+ case NVPTX::SUST_B_3D_V4B16_CLAMP_R:
+ return NVPTX::SUST_B_3D_V4B16_CLAMP_I;
+ case NVPTX::SUST_B_3D_V4B32_CLAMP_R:
+ return NVPTX::SUST_B_3D_V4B32_CLAMP_I;
+ case NVPTX::SUST_B_1D_B8_TRAP_R:
+ return NVPTX::SUST_B_1D_B8_TRAP_I;
+ case NVPTX::SUST_B_1D_B16_TRAP_R:
+ return NVPTX::SUST_B_1D_B16_TRAP_I;
+ case NVPTX::SUST_B_1D_B32_TRAP_R:
+ return NVPTX::SUST_B_1D_B32_TRAP_I;
+ case NVPTX::SUST_B_1D_B64_TRAP_R:
+ return NVPTX::SUST_B_1D_B64_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B8_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B8_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B16_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B16_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B32_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B32_TRAP_I;
+ case NVPTX::SUST_B_1D_V2B64_TRAP_R:
+ return NVPTX::SUST_B_1D_V2B64_TRAP_I;
+ case NVPTX::SUST_B_1D_V4B8_TRAP_R:
+ return NVPTX::SUST_B_1D_V4B8_TRAP_I;
+ case NVPTX::SUST_B_1D_V4B16_TRAP_R:
+ return NVPTX::SUST_B_1D_V4B16_TRAP_I;
+ case NVPTX::SUST_B_1D_V4B32_TRAP_R:
+ return NVPTX::SUST_B_1D_V4B32_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_B64_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_B64_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B64_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_B_2D_B8_TRAP_R:
+ return NVPTX::SUST_B_2D_B8_TRAP_I;
+ case NVPTX::SUST_B_2D_B16_TRAP_R:
+ return NVPTX::SUST_B_2D_B16_TRAP_I;
+ case NVPTX::SUST_B_2D_B32_TRAP_R:
+ return NVPTX::SUST_B_2D_B32_TRAP_I;
+ case NVPTX::SUST_B_2D_B64_TRAP_R:
+ return NVPTX::SUST_B_2D_B64_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B8_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B8_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B16_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B16_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B32_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B32_TRAP_I;
+ case NVPTX::SUST_B_2D_V2B64_TRAP_R:
+ return NVPTX::SUST_B_2D_V2B64_TRAP_I;
+ case NVPTX::SUST_B_2D_V4B8_TRAP_R:
+ return NVPTX::SUST_B_2D_V4B8_TRAP_I;
+ case NVPTX::SUST_B_2D_V4B16_TRAP_R:
+ return NVPTX::SUST_B_2D_V4B16_TRAP_I;
+ case NVPTX::SUST_B_2D_V4B32_TRAP_R:
+ return NVPTX::SUST_B_2D_V4B32_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_B64_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_B64_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B64_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_B_3D_B8_TRAP_R:
+ return NVPTX::SUST_B_3D_B8_TRAP_I;
+ case NVPTX::SUST_B_3D_B16_TRAP_R:
+ return NVPTX::SUST_B_3D_B16_TRAP_I;
+ case NVPTX::SUST_B_3D_B32_TRAP_R:
+ return NVPTX::SUST_B_3D_B32_TRAP_I;
+ case NVPTX::SUST_B_3D_B64_TRAP_R:
+ return NVPTX::SUST_B_3D_B64_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B8_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B8_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B16_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B16_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B32_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B32_TRAP_I;
+ case NVPTX::SUST_B_3D_V2B64_TRAP_R:
+ return NVPTX::SUST_B_3D_V2B64_TRAP_I;
+ case NVPTX::SUST_B_3D_V4B8_TRAP_R:
+ return NVPTX::SUST_B_3D_V4B8_TRAP_I;
+ case NVPTX::SUST_B_3D_V4B16_TRAP_R:
+ return NVPTX::SUST_B_3D_V4B16_TRAP_I;
+ case NVPTX::SUST_B_3D_V4B32_TRAP_R:
+ return NVPTX::SUST_B_3D_V4B32_TRAP_I;
+ case NVPTX::SUST_B_1D_B8_ZERO_R:
+ return NVPTX::SUST_B_1D_B8_ZERO_I;
+ case NVPTX::SUST_B_1D_B16_ZERO_R:
+ return NVPTX::SUST_B_1D_B16_ZERO_I;
+ case NVPTX::SUST_B_1D_B32_ZERO_R:
+ return NVPTX::SUST_B_1D_B32_ZERO_I;
+ case NVPTX::SUST_B_1D_B64_ZERO_R:
+ return NVPTX::SUST_B_1D_B64_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B8_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B8_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B16_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B16_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B32_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B32_ZERO_I;
+ case NVPTX::SUST_B_1D_V2B64_ZERO_R:
+ return NVPTX::SUST_B_1D_V2B64_ZERO_I;
+ case NVPTX::SUST_B_1D_V4B8_ZERO_R:
+ return NVPTX::SUST_B_1D_V4B8_ZERO_I;
+ case NVPTX::SUST_B_1D_V4B16_ZERO_R:
+ return NVPTX::SUST_B_1D_V4B16_ZERO_I;
+ case NVPTX::SUST_B_1D_V4B32_ZERO_R:
+ return NVPTX::SUST_B_1D_V4B32_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B8_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B8_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B16_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B16_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B32_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B32_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_B64_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_B64_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B8_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B16_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B32_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V2B64_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B8_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B16_ZERO_I;
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_R:
+ return NVPTX::SUST_B_1D_ARRAY_V4B32_ZERO_I;
+ case NVPTX::SUST_B_2D_B8_ZERO_R:
+ return NVPTX::SUST_B_2D_B8_ZERO_I;
+ case NVPTX::SUST_B_2D_B16_ZERO_R:
+ return NVPTX::SUST_B_2D_B16_ZERO_I;
+ case NVPTX::SUST_B_2D_B32_ZERO_R:
+ return NVPTX::SUST_B_2D_B32_ZERO_I;
+ case NVPTX::SUST_B_2D_B64_ZERO_R:
+ return NVPTX::SUST_B_2D_B64_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B8_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B8_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B16_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B16_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B32_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B32_ZERO_I;
+ case NVPTX::SUST_B_2D_V2B64_ZERO_R:
+ return NVPTX::SUST_B_2D_V2B64_ZERO_I;
+ case NVPTX::SUST_B_2D_V4B8_ZERO_R:
+ return NVPTX::SUST_B_2D_V4B8_ZERO_I;
+ case NVPTX::SUST_B_2D_V4B16_ZERO_R:
+ return NVPTX::SUST_B_2D_V4B16_ZERO_I;
+ case NVPTX::SUST_B_2D_V4B32_ZERO_R:
+ return NVPTX::SUST_B_2D_V4B32_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B8_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B8_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B16_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B16_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B32_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B32_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_B64_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_B64_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B8_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B16_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B32_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V2B64_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B8_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B16_ZERO_I;
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_R:
+ return NVPTX::SUST_B_2D_ARRAY_V4B32_ZERO_I;
+ case NVPTX::SUST_B_3D_B8_ZERO_R:
+ return NVPTX::SUST_B_3D_B8_ZERO_I;
+ case NVPTX::SUST_B_3D_B16_ZERO_R:
+ return NVPTX::SUST_B_3D_B16_ZERO_I;
+ case NVPTX::SUST_B_3D_B32_ZERO_R:
+ return NVPTX::SUST_B_3D_B32_ZERO_I;
+ case NVPTX::SUST_B_3D_B64_ZERO_R:
+ return NVPTX::SUST_B_3D_B64_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B8_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B8_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B16_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B16_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B32_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B32_ZERO_I;
+ case NVPTX::SUST_B_3D_V2B64_ZERO_R:
+ return NVPTX::SUST_B_3D_V2B64_ZERO_I;
+ case NVPTX::SUST_B_3D_V4B8_ZERO_R:
+ return NVPTX::SUST_B_3D_V4B8_ZERO_I;
+ case NVPTX::SUST_B_3D_V4B16_ZERO_R:
+ return NVPTX::SUST_B_3D_V4B16_ZERO_I;
+ case NVPTX::SUST_B_3D_V4B32_ZERO_R:
+ return NVPTX::SUST_B_3D_V4B32_ZERO_I;
+ case NVPTX::SUST_P_1D_B8_TRAP_R:
+ return NVPTX::SUST_P_1D_B8_TRAP_I;
+ case NVPTX::SUST_P_1D_B16_TRAP_R:
+ return NVPTX::SUST_P_1D_B16_TRAP_I;
+ case NVPTX::SUST_P_1D_B32_TRAP_R:
+ return NVPTX::SUST_P_1D_B32_TRAP_I;
+ case NVPTX::SUST_P_1D_V2B8_TRAP_R:
+ return NVPTX::SUST_P_1D_V2B8_TRAP_I;
+ case NVPTX::SUST_P_1D_V2B16_TRAP_R:
+ return NVPTX::SUST_P_1D_V2B16_TRAP_I;
+ case NVPTX::SUST_P_1D_V2B32_TRAP_R:
+ return NVPTX::SUST_P_1D_V2B32_TRAP_I;
+ case NVPTX::SUST_P_1D_V4B8_TRAP_R:
+ return NVPTX::SUST_P_1D_V4B8_TRAP_I;
+ case NVPTX::SUST_P_1D_V4B16_TRAP_R:
+ return NVPTX::SUST_P_1D_V4B16_TRAP_I;
+ case NVPTX::SUST_P_1D_V4B32_TRAP_R:
+ return NVPTX::SUST_P_1D_V4B32_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_P_2D_B8_TRAP_R:
+ return NVPTX::SUST_P_2D_B8_TRAP_I;
+ case NVPTX::SUST_P_2D_B16_TRAP_R:
+ return NVPTX::SUST_P_2D_B16_TRAP_I;
+ case NVPTX::SUST_P_2D_B32_TRAP_R:
+ return NVPTX::SUST_P_2D_B32_TRAP_I;
+ case NVPTX::SUST_P_2D_V2B8_TRAP_R:
+ return NVPTX::SUST_P_2D_V2B8_TRAP_I;
+ case NVPTX::SUST_P_2D_V2B16_TRAP_R:
+ return NVPTX::SUST_P_2D_V2B16_TRAP_I;
+ case NVPTX::SUST_P_2D_V2B32_TRAP_R:
+ return NVPTX::SUST_P_2D_V2B32_TRAP_I;
+ case NVPTX::SUST_P_2D_V4B8_TRAP_R:
+ return NVPTX::SUST_P_2D_V4B8_TRAP_I;
+ case NVPTX::SUST_P_2D_V4B16_TRAP_R:
+ return NVPTX::SUST_P_2D_V4B16_TRAP_I;
+ case NVPTX::SUST_P_2D_V4B32_TRAP_R:
+ return NVPTX::SUST_P_2D_V4B32_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_B8_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_B8_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_B16_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_B16_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_B32_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_B32_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP_I;
+ case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_R:
+ return NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP_I;
+ case NVPTX::SUST_P_3D_B8_TRAP_R:
+ return NVPTX::SUST_P_3D_B8_TRAP_I;
+ case NVPTX::SUST_P_3D_B16_TRAP_R:
+ return NVPTX::SUST_P_3D_B16_TRAP_I;
+ case NVPTX::SUST_P_3D_B32_TRAP_R:
+ return NVPTX::SUST_P_3D_B32_TRAP_I;
+ case NVPTX::SUST_P_3D_V2B8_TRAP_R:
+ return NVPTX::SUST_P_3D_V2B8_TRAP_I;
+ case NVPTX::SUST_P_3D_V2B16_TRAP_R:
+ return NVPTX::SUST_P_3D_V2B16_TRAP_I;
+ case NVPTX::SUST_P_3D_V2B32_TRAP_R:
+ return NVPTX::SUST_P_3D_V2B32_TRAP_I;
+ case NVPTX::SUST_P_3D_V4B8_TRAP_R:
+ return NVPTX::SUST_P_3D_V4B8_TRAP_I;
+ case NVPTX::SUST_P_3D_V4B16_TRAP_R:
+ return NVPTX::SUST_P_3D_V4B16_TRAP_I;
+ case NVPTX::SUST_P_3D_V4B32_TRAP_R:
+ return NVPTX::SUST_P_3D_V4B32_TRAP_I;
+ default:
+ llvm_unreachable("Unhandled SUST opcode");
+ }
+}
+
+static unsigned texRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::TEX_1D_F32_S32_RR:
+ return NVPTX::TEX_1D_F32_S32_IR;
+ case NVPTX::TEX_1D_F32_S32_RI:
+ return NVPTX::TEX_1D_F32_S32_II;
+ case NVPTX::TEX_1D_F32_F32_RR:
+ return NVPTX::TEX_1D_F32_F32_IR;
+ case NVPTX::TEX_1D_F32_F32_RI:
+ return NVPTX::TEX_1D_F32_F32_II;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_F32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_F32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_S32_S32_RR:
+ return NVPTX::TEX_1D_S32_S32_IR;
+ case NVPTX::TEX_1D_S32_S32_RI:
+ return NVPTX::TEX_1D_S32_S32_II;
+ case NVPTX::TEX_1D_S32_F32_RR:
+ return NVPTX::TEX_1D_S32_F32_IR;
+ case NVPTX::TEX_1D_S32_F32_RI:
+ return NVPTX::TEX_1D_S32_F32_II;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_S32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_S32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_U32_S32_RR:
+ return NVPTX::TEX_1D_U32_S32_IR;
+ case NVPTX::TEX_1D_U32_S32_RI:
+ return NVPTX::TEX_1D_U32_S32_II;
+ case NVPTX::TEX_1D_U32_F32_RR:
+ return NVPTX::TEX_1D_U32_F32_IR;
+ case NVPTX::TEX_1D_U32_F32_RI:
+ return NVPTX::TEX_1D_U32_F32_II;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_U32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_U32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_U32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_F32_S32_RR:
+ return NVPTX::TEX_2D_F32_S32_IR;
+ case NVPTX::TEX_2D_F32_S32_RI:
+ return NVPTX::TEX_2D_F32_S32_II;
+ case NVPTX::TEX_2D_F32_F32_RR:
+ return NVPTX::TEX_2D_F32_F32_IR;
+ case NVPTX::TEX_2D_F32_F32_RI:
+ return NVPTX::TEX_2D_F32_F32_II;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_F32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_F32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_S32_S32_RR:
+ return NVPTX::TEX_2D_S32_S32_IR;
+ case NVPTX::TEX_2D_S32_S32_RI:
+ return NVPTX::TEX_2D_S32_S32_II;
+ case NVPTX::TEX_2D_S32_F32_RR:
+ return NVPTX::TEX_2D_S32_F32_IR;
+ case NVPTX::TEX_2D_S32_F32_RI:
+ return NVPTX::TEX_2D_S32_F32_II;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_S32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_S32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_U32_S32_RR:
+ return NVPTX::TEX_2D_U32_S32_IR;
+ case NVPTX::TEX_2D_U32_S32_RI:
+ return NVPTX::TEX_2D_U32_S32_II;
+ case NVPTX::TEX_2D_U32_F32_RR:
+ return NVPTX::TEX_2D_U32_F32_IR;
+ case NVPTX::TEX_2D_U32_F32_RI:
+ return NVPTX::TEX_2D_U32_F32_II;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_U32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_U32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_3D_F32_S32_RR:
+ return NVPTX::TEX_3D_F32_S32_IR;
+ case NVPTX::TEX_3D_F32_S32_RI:
+ return NVPTX::TEX_3D_F32_S32_II;
+ case NVPTX::TEX_3D_F32_F32_RR:
+ return NVPTX::TEX_3D_F32_F32_IR;
+ case NVPTX::TEX_3D_F32_F32_RI:
+ return NVPTX::TEX_3D_F32_F32_II;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_F32_F32_GRAD_IR;
+ case NVPTX::TEX_3D_F32_F32_GRAD_RI:
+ return NVPTX::TEX_3D_F32_F32_GRAD_II;
+ case NVPTX::TEX_3D_S32_S32_RR:
+ return NVPTX::TEX_3D_S32_S32_IR;
+ case NVPTX::TEX_3D_S32_S32_RI:
+ return NVPTX::TEX_3D_S32_S32_II;
+ case NVPTX::TEX_3D_S32_F32_RR:
+ return NVPTX::TEX_3D_S32_F32_IR;
+ case NVPTX::TEX_3D_S32_F32_RI:
+ return NVPTX::TEX_3D_S32_F32_II;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_S32_F32_GRAD_IR;
+ case NVPTX::TEX_3D_S32_F32_GRAD_RI:
+ return NVPTX::TEX_3D_S32_F32_GRAD_II;
+ case NVPTX::TEX_3D_U32_S32_RR:
+ return NVPTX::TEX_3D_U32_S32_IR;
+ case NVPTX::TEX_3D_U32_S32_RI:
+ return NVPTX::TEX_3D_U32_S32_II;
+ case NVPTX::TEX_3D_U32_F32_RR:
+ return NVPTX::TEX_3D_U32_F32_IR;
+ case NVPTX::TEX_3D_U32_F32_RI:
+ return NVPTX::TEX_3D_U32_F32_II;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_U32_F32_GRAD_IR;
+ case NVPTX::TEX_3D_U32_F32_GRAD_RI:
+ return NVPTX::TEX_3D_U32_F32_GRAD_II;
+ case NVPTX::TEX_CUBE_F32_F32_RR:
+ return NVPTX::TEX_CUBE_F32_F32_IR;
+ case NVPTX::TEX_CUBE_F32_F32_RI:
+ return NVPTX::TEX_CUBE_F32_F32_II;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_S32_F32_RR:
+ return NVPTX::TEX_CUBE_S32_F32_IR;
+ case NVPTX::TEX_CUBE_S32_F32_RI:
+ return NVPTX::TEX_CUBE_S32_F32_II;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_U32_F32_RR:
+ return NVPTX::TEX_CUBE_U32_F32_IR;
+ case NVPTX::TEX_CUBE_U32_F32_RI:
+ return NVPTX::TEX_CUBE_U32_F32_II;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_IR;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_RI:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_IR;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_RI:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_IR;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_RI:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TLD4_R_2D_F32_F32_RR:
+ return NVPTX::TLD4_R_2D_F32_F32_IR;
+ case NVPTX::TLD4_R_2D_F32_F32_RI:
+ return NVPTX::TLD4_R_2D_F32_F32_II;
+ case NVPTX::TLD4_G_2D_F32_F32_RR:
+ return NVPTX::TLD4_G_2D_F32_F32_IR;
+ case NVPTX::TLD4_G_2D_F32_F32_RI:
+ return NVPTX::TLD4_G_2D_F32_F32_II;
+ case NVPTX::TLD4_B_2D_F32_F32_RR:
+ return NVPTX::TLD4_B_2D_F32_F32_IR;
+ case NVPTX::TLD4_B_2D_F32_F32_RI:
+ return NVPTX::TLD4_B_2D_F32_F32_II;
+ case NVPTX::TLD4_A_2D_F32_F32_RR:
+ return NVPTX::TLD4_A_2D_F32_F32_IR;
+ case NVPTX::TLD4_A_2D_F32_F32_RI:
+ return NVPTX::TLD4_A_2D_F32_F32_II;
+ case NVPTX::TLD4_R_2D_S32_F32_RR:
+ return NVPTX::TLD4_R_2D_S32_F32_IR;
+ case NVPTX::TLD4_R_2D_S32_F32_RI:
+ return NVPTX::TLD4_R_2D_S32_F32_II;
+ case NVPTX::TLD4_G_2D_S32_F32_RR:
+ return NVPTX::TLD4_G_2D_S32_F32_IR;
+ case NVPTX::TLD4_G_2D_S32_F32_RI:
+ return NVPTX::TLD4_G_2D_S32_F32_II;
+ case NVPTX::TLD4_B_2D_S32_F32_RR:
+ return NVPTX::TLD4_B_2D_S32_F32_IR;
+ case NVPTX::TLD4_B_2D_S32_F32_RI:
+ return NVPTX::TLD4_B_2D_S32_F32_II;
+ case NVPTX::TLD4_A_2D_S32_F32_RR:
+ return NVPTX::TLD4_A_2D_S32_F32_IR;
+ case NVPTX::TLD4_A_2D_S32_F32_RI:
+ return NVPTX::TLD4_A_2D_S32_F32_II;
+ case NVPTX::TLD4_R_2D_U32_F32_RR:
+ return NVPTX::TLD4_R_2D_U32_F32_IR;
+ case NVPTX::TLD4_R_2D_U32_F32_RI:
+ return NVPTX::TLD4_R_2D_U32_F32_II;
+ case NVPTX::TLD4_G_2D_U32_F32_RR:
+ return NVPTX::TLD4_G_2D_U32_F32_IR;
+ case NVPTX::TLD4_G_2D_U32_F32_RI:
+ return NVPTX::TLD4_G_2D_U32_F32_II;
+ case NVPTX::TLD4_B_2D_U32_F32_RR:
+ return NVPTX::TLD4_B_2D_U32_F32_IR;
+ case NVPTX::TLD4_B_2D_U32_F32_RI:
+ return NVPTX::TLD4_B_2D_U32_F32_II;
+ case NVPTX::TLD4_A_2D_U32_F32_RR:
+ return NVPTX::TLD4_A_2D_U32_F32_IR;
+ case NVPTX::TLD4_A_2D_U32_F32_RI:
+ return NVPTX::TLD4_A_2D_U32_F32_II;
+ case NVPTX::TEX_UNIFIED_1D_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_S32_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_S32_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_S32_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_S32_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_S32_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_S32_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_R:
+ return NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD_I;
+ case NVPTX::TEX_UNIFIED_CUBE_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_I;
+ case NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_R:
+ return NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL_I;
+ case NVPTX::TLD4_UNIFIED_R_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_R_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_G_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_G_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_B_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_B_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_A_2D_F32_F32_R:
+ return NVPTX::TLD4_UNIFIED_A_2D_F32_F32_I;
+ case NVPTX::TLD4_UNIFIED_R_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_R_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_G_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_G_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_B_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_B_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_A_2D_S32_F32_R:
+ return NVPTX::TLD4_UNIFIED_A_2D_S32_F32_I;
+ case NVPTX::TLD4_UNIFIED_R_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_R_2D_U32_F32_I;
+ case NVPTX::TLD4_UNIFIED_G_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_G_2D_U32_F32_I;
+ case NVPTX::TLD4_UNIFIED_B_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_B_2D_U32_F32_I;
+ case NVPTX::TLD4_UNIFIED_A_2D_U32_F32_R:
+ return NVPTX::TLD4_UNIFIED_A_2D_U32_F32_I;
+ default:
+ llvm_unreachable("Unhandled TEX opcode");
+ };
+}
+
+static unsigned samplerRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::TEX_1D_F32_S32_RR:
+ return NVPTX::TEX_1D_F32_S32_RI;
+ case NVPTX::TEX_1D_F32_S32_IR:
+ return NVPTX::TEX_1D_F32_S32_II;
+ case NVPTX::TEX_1D_F32_F32_RR:
+ return NVPTX::TEX_1D_F32_F32_RI;
+ case NVPTX::TEX_1D_F32_F32_IR:
+ return NVPTX::TEX_1D_F32_F32_II;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_F32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_F32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_S32_S32_RR:
+ return NVPTX::TEX_1D_S32_S32_RI;
+ case NVPTX::TEX_1D_S32_S32_IR:
+ return NVPTX::TEX_1D_S32_S32_II;
+ case NVPTX::TEX_1D_S32_F32_RR:
+ return NVPTX::TEX_1D_S32_F32_RI;
+ case NVPTX::TEX_1D_S32_F32_IR:
+ return NVPTX::TEX_1D_S32_F32_II;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_S32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_S32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_U32_S32_RR:
+ return NVPTX::TEX_1D_U32_S32_RI;
+ case NVPTX::TEX_1D_U32_S32_IR:
+ return NVPTX::TEX_1D_U32_S32_II;
+ case NVPTX::TEX_1D_U32_F32_RR:
+ return NVPTX::TEX_1D_U32_F32_RI;
+ case NVPTX::TEX_1D_U32_F32_IR:
+ return NVPTX::TEX_1D_U32_F32_II;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_U32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_U32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_U32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_S32_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_S32_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_S32_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_RI;
+ case NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_IR:
+ return NVPTX::TEX_1D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_F32_S32_RR:
+ return NVPTX::TEX_2D_F32_S32_RI;
+ case NVPTX::TEX_2D_F32_S32_IR:
+ return NVPTX::TEX_2D_F32_S32_II;
+ case NVPTX::TEX_2D_F32_F32_RR:
+ return NVPTX::TEX_2D_F32_F32_RI;
+ case NVPTX::TEX_2D_F32_F32_IR:
+ return NVPTX::TEX_2D_F32_F32_II;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_F32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_F32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_S32_S32_RR:
+ return NVPTX::TEX_2D_S32_S32_RI;
+ case NVPTX::TEX_2D_S32_S32_IR:
+ return NVPTX::TEX_2D_S32_S32_II;
+ case NVPTX::TEX_2D_S32_F32_RR:
+ return NVPTX::TEX_2D_S32_F32_RI;
+ case NVPTX::TEX_2D_S32_F32_IR:
+ return NVPTX::TEX_2D_S32_F32_II;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_S32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_S32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_U32_S32_RR:
+ return NVPTX::TEX_2D_U32_S32_RI;
+ case NVPTX::TEX_2D_U32_S32_IR:
+ return NVPTX::TEX_2D_U32_S32_II;
+ case NVPTX::TEX_2D_U32_F32_RR:
+ return NVPTX::TEX_2D_U32_F32_RI;
+ case NVPTX::TEX_2D_U32_F32_IR:
+ return NVPTX::TEX_2D_U32_F32_II;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_U32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_U32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_U32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_S32_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_ARRAY_F32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_S32_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_ARRAY_S32_F32_GRAD_II;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_S32_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_S32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_RI;
+ case NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_IR:
+ return NVPTX::TEX_2D_ARRAY_U32_F32_GRAD_II;
+ case NVPTX::TEX_3D_F32_S32_RR:
+ return NVPTX::TEX_3D_F32_S32_RI;
+ case NVPTX::TEX_3D_F32_S32_IR:
+ return NVPTX::TEX_3D_F32_S32_II;
+ case NVPTX::TEX_3D_F32_F32_RR:
+ return NVPTX::TEX_3D_F32_F32_RI;
+ case NVPTX::TEX_3D_F32_F32_IR:
+ return NVPTX::TEX_3D_F32_F32_II;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_3D_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_3D_F32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_F32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_F32_F32_GRAD_RI;
+ case NVPTX::TEX_3D_F32_F32_GRAD_IR:
+ return NVPTX::TEX_3D_F32_F32_GRAD_II;
+ case NVPTX::TEX_3D_S32_S32_RR:
+ return NVPTX::TEX_3D_S32_S32_RI;
+ case NVPTX::TEX_3D_S32_S32_IR:
+ return NVPTX::TEX_3D_S32_S32_II;
+ case NVPTX::TEX_3D_S32_F32_RR:
+ return NVPTX::TEX_3D_S32_F32_RI;
+ case NVPTX::TEX_3D_S32_F32_IR:
+ return NVPTX::TEX_3D_S32_F32_II;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_3D_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_3D_S32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_S32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_S32_F32_GRAD_RI;
+ case NVPTX::TEX_3D_S32_F32_GRAD_IR:
+ return NVPTX::TEX_3D_S32_F32_GRAD_II;
+ case NVPTX::TEX_3D_U32_S32_RR:
+ return NVPTX::TEX_3D_U32_S32_RI;
+ case NVPTX::TEX_3D_U32_S32_IR:
+ return NVPTX::TEX_3D_U32_S32_II;
+ case NVPTX::TEX_3D_U32_F32_RR:
+ return NVPTX::TEX_3D_U32_F32_RI;
+ case NVPTX::TEX_3D_U32_F32_IR:
+ return NVPTX::TEX_3D_U32_F32_II;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_3D_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_3D_U32_F32_LEVEL_II;
+ case NVPTX::TEX_3D_U32_F32_GRAD_RR:
+ return NVPTX::TEX_3D_U32_F32_GRAD_RI;
+ case NVPTX::TEX_3D_U32_F32_GRAD_IR:
+ return NVPTX::TEX_3D_U32_F32_GRAD_II;
+ case NVPTX::TEX_CUBE_F32_F32_RR:
+ return NVPTX::TEX_CUBE_F32_F32_RI;
+ case NVPTX::TEX_CUBE_F32_F32_IR:
+ return NVPTX::TEX_CUBE_F32_F32_II;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_S32_F32_RR:
+ return NVPTX::TEX_CUBE_S32_F32_RI;
+ case NVPTX::TEX_CUBE_S32_F32_IR:
+ return NVPTX::TEX_CUBE_S32_F32_II;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_U32_F32_RR:
+ return NVPTX::TEX_CUBE_U32_F32_RI;
+ case NVPTX::TEX_CUBE_U32_F32_IR:
+ return NVPTX::TEX_CUBE_U32_F32_II;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_U32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_RI;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_IR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_RI;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_IR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_RI;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_IR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_II;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_RI;
+ case NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_IR:
+ return NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL_II;
+ case NVPTX::TLD4_R_2D_F32_F32_RR:
+ return NVPTX::TLD4_R_2D_F32_F32_RI;
+ case NVPTX::TLD4_R_2D_F32_F32_IR:
+ return NVPTX::TLD4_R_2D_F32_F32_II;
+ case NVPTX::TLD4_G_2D_F32_F32_RR:
+ return NVPTX::TLD4_G_2D_F32_F32_RI;
+ case NVPTX::TLD4_G_2D_F32_F32_IR:
+ return NVPTX::TLD4_G_2D_F32_F32_II;
+ case NVPTX::TLD4_B_2D_F32_F32_RR:
+ return NVPTX::TLD4_B_2D_F32_F32_RI;
+ case NVPTX::TLD4_B_2D_F32_F32_IR:
+ return NVPTX::TLD4_B_2D_F32_F32_II;
+ case NVPTX::TLD4_A_2D_F32_F32_RR:
+ return NVPTX::TLD4_A_2D_F32_F32_RI;
+ case NVPTX::TLD4_A_2D_F32_F32_IR:
+ return NVPTX::TLD4_A_2D_F32_F32_II;
+ case NVPTX::TLD4_R_2D_S32_F32_RR:
+ return NVPTX::TLD4_R_2D_S32_F32_RI;
+ case NVPTX::TLD4_R_2D_S32_F32_IR:
+ return NVPTX::TLD4_R_2D_S32_F32_II;
+ case NVPTX::TLD4_G_2D_S32_F32_RR:
+ return NVPTX::TLD4_G_2D_S32_F32_RI;
+ case NVPTX::TLD4_G_2D_S32_F32_IR:
+ return NVPTX::TLD4_G_2D_S32_F32_II;
+ case NVPTX::TLD4_B_2D_S32_F32_RR:
+ return NVPTX::TLD4_B_2D_S32_F32_RI;
+ case NVPTX::TLD4_B_2D_S32_F32_IR:
+ return NVPTX::TLD4_B_2D_S32_F32_II;
+ case NVPTX::TLD4_A_2D_S32_F32_RR:
+ return NVPTX::TLD4_A_2D_S32_F32_RI;
+ case NVPTX::TLD4_A_2D_S32_F32_IR:
+ return NVPTX::TLD4_A_2D_S32_F32_II;
+ case NVPTX::TLD4_R_2D_U32_F32_RR:
+ return NVPTX::TLD4_R_2D_U32_F32_RI;
+ case NVPTX::TLD4_R_2D_U32_F32_IR:
+ return NVPTX::TLD4_R_2D_U32_F32_II;
+ case NVPTX::TLD4_G_2D_U32_F32_RR:
+ return NVPTX::TLD4_G_2D_U32_F32_RI;
+ case NVPTX::TLD4_G_2D_U32_F32_IR:
+ return NVPTX::TLD4_G_2D_U32_F32_II;
+ case NVPTX::TLD4_B_2D_U32_F32_RR:
+ return NVPTX::TLD4_B_2D_U32_F32_RI;
+ case NVPTX::TLD4_B_2D_U32_F32_IR:
+ return NVPTX::TLD4_B_2D_U32_F32_II;
+ case NVPTX::TLD4_A_2D_U32_F32_RR:
+ return NVPTX::TLD4_A_2D_U32_F32_RI;
+ case NVPTX::TLD4_A_2D_U32_F32_IR:
+ return NVPTX::TLD4_A_2D_U32_F32_II;
+ default:
+ llvm_unreachable("Unhandled TEX opcode");
+ };
+}
+
+static unsigned queryRegisterToIndexOpcode(unsigned RegOC) {
+ switch (RegOC) {
+ case NVPTX::TXQ_CHANNEL_ORDER_R:
+ return NVPTX::TXQ_CHANNEL_ORDER_I;
+ case NVPTX::TXQ_CHANNEL_DATA_TYPE_R:
+ return NVPTX::TXQ_CHANNEL_DATA_TYPE_I;
+ case NVPTX::TXQ_WIDTH_R:
+ return NVPTX::TXQ_WIDTH_I;
+ case NVPTX::TXQ_HEIGHT_R:
+ return NVPTX::TXQ_HEIGHT_I;
+ case NVPTX::TXQ_DEPTH_R:
+ return NVPTX::TXQ_DEPTH_I;
+ case NVPTX::TXQ_ARRAY_SIZE_R:
+ return NVPTX::TXQ_ARRAY_SIZE_I;
+ case NVPTX::TXQ_NUM_SAMPLES_R:
+ return NVPTX::TXQ_NUM_SAMPLES_I;
+ case NVPTX::TXQ_NUM_MIPMAP_LEVELS_R:
+ return NVPTX::TXQ_NUM_MIPMAP_LEVELS_I;
+ case NVPTX::SUQ_CHANNEL_ORDER_R:
+ return NVPTX::SUQ_CHANNEL_ORDER_I;
+ case NVPTX::SUQ_CHANNEL_DATA_TYPE_R:
+ return NVPTX::SUQ_CHANNEL_DATA_TYPE_I;
+ case NVPTX::SUQ_WIDTH_R:
+ return NVPTX::SUQ_WIDTH_I;
+ case NVPTX::SUQ_HEIGHT_R:
+ return NVPTX::SUQ_HEIGHT_I;
+ case NVPTX::SUQ_DEPTH_R:
+ return NVPTX::SUQ_DEPTH_I;
+ case NVPTX::SUQ_ARRAY_SIZE_R:
+ return NVPTX::SUQ_ARRAY_SIZE_I;
+ default:
+ llvm_unreachable("Unhandled TXQ/SUQ opcode");
+ };
+}
+
bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
MachineFunction &MF = *MI.getParent()->getParent();
const MCInstrDesc &MCID = MI.getDesc();
+ const NVPTXInstrInfo *TII = MF.getSubtarget<NVPTXSubtarget>().getInstrInfo();
if (MCID.TSFlags & NVPTXII::IsTexFlag) {
// This is a texture fetch, so operand 4 is a texref and operand 5 is
// a samplerref
MachineOperand &TexHandle = MI.getOperand(4);
- replaceImageHandle(TexHandle, MF);
+ if (replaceImageHandle(TexHandle, MF))
+ MI.setDesc(TII->get(texRegisterToIndexOpcode(MI.getOpcode())));
if (!(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
MachineOperand &SampHandle = MI.getOperand(5);
- replaceImageHandle(SampHandle, MF);
+ if (replaceImageHandle(SampHandle, MF))
+ MI.setDesc(TII->get(samplerRegisterToIndexOpcode(MI.getOpcode())));
}
return true;
@@ -99,21 +1755,24 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
// For a surface load of vector size N, the Nth operand will be the surfref
MachineOperand &SurfHandle = MI.getOperand(VecSize);
- replaceImageHandle(SurfHandle, MF);
+ if (replaceImageHandle(SurfHandle, MF))
+ MI.setDesc(TII->get(suldRegisterToIndexOpcode(MI.getOpcode())));
return true;
} else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
// This is a surface store, so operand 0 is a surfref
MachineOperand &SurfHandle = MI.getOperand(0);
- replaceImageHandle(SurfHandle, MF);
+ if (replaceImageHandle(SurfHandle, MF))
+ MI.setDesc(TII->get(sustRegisterToIndexOpcode(MI.getOpcode())));
return true;
} else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
// This is a query, so operand 1 is a surfref/texref
MachineOperand &Handle = MI.getOperand(1);
- replaceImageHandle(Handle, MF);
+ if (replaceImageHandle(Handle, MF))
+ MI.setDesc(TII->get(queryRegisterToIndexOpcode(MI.getOpcode())));
return true;
}
@@ -121,12 +1780,14 @@ bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
return false;
}
-void NVPTXReplaceImageHandles::
-replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
+bool NVPTXReplaceImageHandles::replaceImageHandle(MachineOperand &Op,
+ MachineFunction &MF) {
unsigned Idx;
if (findIndexForHandle(Op, MF, Idx)) {
Op.ChangeToImmediate(Idx);
+ return true;
}
+ return false;
}
bool NVPTXReplaceImageHandles::
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index e3515f35d022..0a1c61a35795 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -23,11 +23,12 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
@@ -225,7 +226,7 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineStartEPCallback(
- [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
// FIXME: NVVMIntrRangePass is causing numerical discrepancies,
@@ -240,6 +241,25 @@ NVPTXTargetMachine::getTargetTransformInfo(const Function &F) {
return TargetTransformInfo(NVPTXTTIImpl(this, F));
}
+std::pair<const Value *, unsigned>
+NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
+ if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::nvvm_isspacep_const:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST);
+ case Intrinsic::nvvm_isspacep_global:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL);
+ case Intrinsic::nvvm_isspacep_local:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
+ case Intrinsic::nvvm_isspacep_shared:
+ return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
+ default:
+ break;
+ }
+ }
+ return std::make_pair(nullptr, -1);
+}
+
void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
if (getOptLevel() == CodeGenOpt::Aggressive)
addPass(createGVNPass());
@@ -328,6 +348,7 @@ void NVPTXPassConfig::addIRPasses() {
addEarlyCSEOrGVNPass();
if (!DisableLoadStoreVectorizer)
addPass(createLoadStoreVectorizerPass());
+ addPass(createSROAPass());
}
}
@@ -350,7 +371,7 @@ void NVPTXPassConfig::addPreRegAlloc() {
}
void NVPTXPassConfig::addPostRegAlloc() {
- addPass(createNVPTXPrologEpilogPass(), false);
+ addPass(createNVPTXPrologEpilogPass());
if (getOptLevel() != CodeGenOpt::None) {
// NVPTXPrologEpilogPass calculates frame object offset and replace frame
// index with VRFrame register. NVPTXPeephole need to be run after that and
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 39647eb65c0c..7a69197abcff 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -16,6 +16,7 @@
#include "ManagedStringPool.h"
#include "NVPTXSubtarget.h"
#include "llvm/Target/TargetMachine.h"
+#include <utility>
namespace llvm {
@@ -69,6 +70,9 @@ public:
bool isMachineVerifierClean() const override {
return false;
}
+
+ std::pair<const Value *, unsigned>
+ getPredicatedAddrSpace(const Value *V) const override;
}; // NVPTXTargetMachine.
class NVPTXTargetMachine32 : public NVPTXTargetMachine {
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 20bd227b4b16..466aa7130216 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -328,7 +328,7 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
// Simplify to target-generic intrinsic.
if (Action.IID) {
- SmallVector<Value *, 4> Args(II->arg_operands());
+ SmallVector<Value *, 4> Args(II->args());
// All the target-generic intrinsics currently of interest to us have one
// type argument, equal to that of the nvvm intrinsic's argument.
Type *Tys[] = {II->getArgOperand(0)->getType()};
@@ -402,8 +402,9 @@ InstructionCost NVPTXTTIImpl::getArithmeticInstrCost(
}
void NVPTXTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
- BaseT::getUnrollingPreferences(L, SE, UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
// Enable partial unrolling and runtime unrolling, but reduce the
// threshold. This partially unrolls small loops which are often
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index d5a52d42ca00..bf593af68f33 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -48,6 +48,11 @@ public:
return AddressSpace::ADDRESS_SPACE_GENERIC;
}
+ bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
+ return AS != AddressSpace::ADDRESS_SPACE_SHARED &&
+ AS != AddressSpace::ADDRESS_SPACE_LOCAL && AS != ADDRESS_SPACE_PARAM;
+ }
+
Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
@@ -89,8 +94,7 @@ public:
unsigned getInliningThresholdMultiplier() { return 5; }
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -99,7 +103,8 @@ public:
const Instruction *CxtI = nullptr);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
diff --git a/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index 235be9c0dbbb..e4f0a517599f 100644
--- a/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/NVPTXTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheNVPTXTarget32() {
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 7631bb4bccfb..9e181d4052d6 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -23,8 +23,8 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 94416fc584b5..5a12c3f22dee 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -12,8 +12,8 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
index 22731bbd0f82..6b16af293244 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
@@ -80,7 +80,7 @@ bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
void PPCIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
- CCValAssign &VA) {
+ CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
index b045032bec06..cc2cb7b26e84 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
+++ b/llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
@@ -46,7 +46,7 @@ public:
private:
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override;
+ CCValAssign VA) override;
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 28294b4c00ed..9df94edc8cdf 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -20,8 +20,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
@@ -196,7 +196,8 @@ public:
llvm_unreachable("relaxInstruction() unimplemented");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
support::endian::write<uint32_t>(OS, 0x60000000, Endian);
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index 386d59266096..0ca8587ba483 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -1,9 +1,8 @@
//===-------- PPCELFStreamer.cpp - ELF Object Output ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,7 +56,7 @@ void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst,
// all of the nops required as part of the alignment operation. In the cases
// when no nops are added then The fragment is still created but it remains
// empty.
- emitCodeAlignment(64, 4);
+ emitCodeAlignment(64, &STI, 4);
// Emit the instruction.
// Since the previous emit created a new fragment then adding this instruction
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
index f44200104f32..b3e12413eacf 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.h
@@ -1,9 +1,8 @@
//===- PPCELFStreamer.h - ELF Object Output --------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 3f6497aa0e8f..67c02c17bc46 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -158,7 +158,10 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// dcbt ra, rb, th [server]
// dcbt th, ra, rb [embedded]
// where th can be omitted when it is 0. dcbtst is the same.
- if (MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) {
+ // On AIX, only emit the extended mnemonics for dcbt and dcbtst if
+ // the "modern assembler" is available.
+ if ((MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) &&
+ (!TT.isOSAIX() || STI.getFeatureBits()[PPC::FeatureModernAIXAs])) {
unsigned char TH = MI->getOperand(0).getImm();
O << "\tdcbt";
if (MI->getOpcode() == PPC::DCBTST)
@@ -628,8 +631,6 @@ const char *PPCInstPrinter::getVerboseConditionRegName(unsigned RegNum,
// showRegistersWithPrefix - This method determines whether registers
// should be number-only or include the prefix.
bool PPCInstPrinter::showRegistersWithPrefix() const {
- if (TT.getOS() == Triple::AIX)
- return false;
return FullRegNamesWithPercent || FullRegNames;
}
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index e9fc056a08f0..22b948a83c34 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -37,11 +37,11 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
index e582ddfca323..79db03b0331b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp
@@ -1,9 +1,8 @@
//===-------- PPCXCOFFStreamer.cpp - XCOFF Object Output ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,7 +26,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -46,7 +45,7 @@ void PPCXCOFFStreamer::emitPrefixedInstruction(const MCInst &Inst,
// prefixed instruction. Align to 64 bytes if possible but add a maximum of 4
// bytes when trying to do that. If alignment requires adding more than 4
// bytes then the instruction won't be aligned.
- emitCodeAlignment(64, 4);
+ emitCodeAlignment(64, &STI, 4);
// Emit the instruction.
// Since the previous emit created a new fragment then adding this instruction
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h
index f6eb5edfb7a7..5fa35127b70b 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h
@@ -1,9 +1,8 @@
//===- PPCXCOFFStreamer.h - XCOFF Object Output -----------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
new file mode 100644
index 000000000000..f43ba00ec373
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -0,0 +1,2075 @@
+//===--- P10InstrResources.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the itinerary class data for the POWER10 processor.
+//
+//===----------------------------------------------------------------------===//
+// 22 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FDIVS,
+ XSDIVSP
+)>;
+
+// 2-way crack instructions
+// 22 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_22C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FDIVS_rec
+)>;
+
+// 24 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_24C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ XVDIVSP
+)>;
+
+// 26 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FSQRTS,
+ XSSQRTSP
+)>;
+
+// 2-way crack instructions
+// 26 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_26C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FSQRTS_rec
+)>;
+
+// 27 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ XVSQRTSP
+)>;
+
+// 27 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FDIV,
+ XSDIVDP,
+ XVDIVDP
+)>;
+
+// 2-way crack instructions
+// 27 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_27C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FDIV_rec
+)>;
+
+// 36 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FSQRT,
+ XSSQRTDP,
+ XVSQRTDP
+)>;
+
+// 2-way crack instructions
+// 36 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_36C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FSQRT_rec
+)>;
+
+// 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ FCFID,
+ FCFIDS,
+ FCFIDU,
+ FCFIDUS,
+ FCTID,
+ FCTIDU,
+ FCTIDUZ,
+ FCTIDZ,
+ FCTIW,
+ FCTIWU,
+ FCTIWUZ,
+ FCTIWZ,
+ FRE,
+ FRES,
+ FRIMD, FRIMS,
+ FRIND, FRINS,
+ FRIPD, FRIPS,
+ FRIZD, FRIZS,
+ FRSP,
+ FRSQRTE,
+ FRSQRTES,
+ VCFSX, VCFSX_0,
+ VCFUX, VCFUX_0,
+ VCTSXS, VCTSXS_0,
+ VCTUXS, VCTUXS_0,
+ VLOGEFP,
+ VREFP,
+ VRFIM,
+ VRFIN,
+ VRFIP,
+ VRFIZ,
+ VRSQRTEFP,
+ XSCVDPHP,
+ XSCVDPSP,
+ XSCVDPSPN,
+ XSCVDPSXDS, XSCVDPSXDSs,
+ XSCVDPSXWS, XSCVDPSXWSs,
+ XSCVDPUXDS, XSCVDPUXDSs,
+ XSCVDPUXWS, XSCVDPUXWSs,
+ XSCVSPDP,
+ XSCVSXDDP,
+ XSCVSXDSP,
+ XSCVUXDDP,
+ XSCVUXDSP,
+ XSRDPI,
+ XSRDPIC,
+ XSRDPIM,
+ XSRDPIP,
+ XSRDPIZ,
+ XSREDP,
+ XSRESP,
+ XSRSP,
+ XSRSQRTEDP,
+ XSRSQRTESP,
+ XVCVDPSP,
+ XVCVDPSXDS,
+ XVCVDPSXWS,
+ XVCVDPUXDS,
+ XVCVDPUXWS,
+ XVCVSPBF16,
+ XVCVSPDP,
+ XVCVSPHP,
+ XVCVSPSXDS,
+ XVCVSPSXWS,
+ XVCVSPUXDS,
+ XVCVSPUXWS,
+ XVCVSXDDP,
+ XVCVSXDSP,
+ XVCVSXWDP,
+ XVCVSXWSP,
+ XVCVUXDDP,
+ XVCVUXDSP,
+ XVCVUXWDP,
+ XVCVUXWSP,
+ XVRDPI,
+ XVRDPIC,
+ XVRDPIM,
+ XVRDPIP,
+ XVRDPIZ,
+ XVREDP,
+ XVRESP,
+ XVRSPI,
+ XVRSPIC,
+ XVRSPIM,
+ XVRSPIP,
+ XVRSPIZ,
+ XVRSQRTEDP,
+ XVRSQRTESP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read],
+ (instrs
+ FADD,
+ FADDS,
+ FMUL,
+ FMULS,
+ FSUB,
+ FSUBS,
+ VADDFP,
+ VSUBFP,
+ XSADDDP,
+ XSADDSP,
+ XSMULDP,
+ XSMULSP,
+ XSSUBDP,
+ XSSUBSP,
+ XVADDDP,
+ XVADDSP,
+ XVMULDP,
+ XVMULSP,
+ XVSUBDP,
+ XVSUBSP
+)>;
+
+// 7 Cycles Binary Floating Point operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_ANY, P10BF_Read, P10BF_Read, P10BF_Read],
+ (instrs
+ FMADD,
+ FMADDS,
+ FMSUB,
+ FMSUBS,
+ FNMADD,
+ FNMADDS,
+ FNMSUB,
+ FNMSUBS,
+ FSELD, FSELS,
+ VMADDFP,
+ VNMSUBFP,
+ XSMADDADP,
+ XSMADDASP,
+ XSMADDMDP,
+ XSMADDMSP,
+ XSMSUBADP,
+ XSMSUBASP,
+ XSMSUBMDP,
+ XSMSUBMSP,
+ XSNMADDADP,
+ XSNMADDASP,
+ XSNMADDMDP,
+ XSNMADDMSP,
+ XSNMSUBADP,
+ XSNMSUBASP,
+ XSNMSUBMDP,
+ XSNMSUBMSP,
+ XVMADDADP,
+ XVMADDASP,
+ XVMADDMDP,
+ XVMADDMSP,
+ XVMSUBADP,
+ XVMSUBASP,
+ XVMSUBMDP,
+ XVMSUBMSP,
+ XVNMADDADP,
+ XVNMADDASP,
+ XVNMADDMDP,
+ XVNMADDMSP,
+ XVNMSUBADP,
+ XVNMSUBASP,
+ XVNMSUBMDP,
+ XVNMSUBMSP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 7 Cycles Binary Floating Point operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_BF_7C, P10W_DISP_ANY, P10BF_Read],
+ (instrs
+ VEXPTEFP
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FADD_rec,
+ FADDS_rec,
+ FMUL_rec,
+ FMULS_rec,
+ FSUB_rec,
+ FSUBS_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FCFID_rec,
+ FCFIDS_rec,
+ FCFIDU_rec,
+ FCFIDUS_rec,
+ FCTID_rec,
+ FCTIDU_rec,
+ FCTIDUZ_rec,
+ FCTIDZ_rec,
+ FCTIW_rec,
+ FCTIWU_rec,
+ FCTIWUZ_rec,
+ FCTIWZ_rec,
+ FRE_rec,
+ FRES_rec,
+ FRIMD_rec, FRIMS_rec,
+ FRIND_rec, FRINS_rec,
+ FRIPD_rec, FRIPS_rec,
+ FRIZD_rec, FRIZS_rec,
+ FRSP_rec,
+ FRSQRTE_rec,
+ FRSQRTES_rec
+)>;
+
+// 2-way crack instructions
+// 7 Cycles Binary Floating Point operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ FMADD_rec,
+ FMADDS_rec,
+ FMSUB_rec,
+ FMSUBS_rec,
+ FNMADD_rec,
+ FNMADDS_rec,
+ FNMSUB_rec,
+ FNMSUBS_rec,
+ FSELD_rec, FSELS_rec
+)>;
+
+// 2 Cycles Branch operations, 0 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
+ (instrs
+ BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
+ BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
+ BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS
+)>;
+
+// 2 Cycles Branch operations, 1 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
+ (instrs
+ B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
+ BA, TAILBA, TAILBA8,
+ BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
+ BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
+ BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
+)>;
+
+// 2 Cycles Branch operations, 3 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read],
+ (instrs
+ BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR,
+ BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL
+)>;
+
+// 2 Cycles Branch operations, 4 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read],
+ (instrs
+ BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat,
+ BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat
+)>;
+
+// 7 Cycles Crypto operations, 1 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read],
+ (instrs
+ VSBOX
+)>;
+
+// 7 Cycles Crypto operations, 2 input operands
+def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read],
+ (instrs
+ CFUGED,
+ CNTLZDM,
+ CNTTZDM,
+ PDEPD,
+ PEXTD,
+ VCFUGED,
+ VCIPHER,
+ VCIPHERLAST,
+ VCLZDM,
+ VCTZDM,
+ VGNB,
+ VNCIPHER,
+ VNCIPHERLAST,
+ VPDEPD,
+ VPEXTD,
+ VPMSUMB,
+ VPMSUMD,
+ VPMSUMH,
+ VPMSUMW
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ XSCVDPQP,
+ XSCVQPDP,
+ XSCVQPDPO,
+ XSCVQPSDZ,
+ XSCVQPSQZ,
+ XSCVQPSWZ,
+ XSCVQPUDZ,
+ XSCVQPUQZ,
+ XSCVQPUWZ,
+ XSCVSDQP,
+ XSCVSQQP,
+ XSCVUDQP,
+ XSCVUQQP
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSADDQP,
+ XSADDQPO,
+ XSSUBQP,
+ XSSUBQPO
+)>;
+
+// 13 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+ (instrs
+ BCDSR_rec,
+ XSRQPI,
+ XSRQPIX,
+ XSRQPXP
+)>;
+
+// 2-way crack instructions
+// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ HASHST,
+ HASHSTP
+)>;
+
+// 24 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_24C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ BCDCTSQ_rec
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSMULQP,
+ XSMULQPO
+)>;
+
+// 25 Cycles Decimal Floating Point operations, 3 input operands
+def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
+ (instrs
+ XSMADDQP,
+ XSMADDQPO,
+ XSMSUBQP,
+ XSMSUBQPO,
+ XSNMADDQP,
+ XSNMADDQPO,
+ XSNMSUBQP,
+ XSNMSUBQPO
+)>;
+
+// 38 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ BCDCFSQ_rec
+)>;
+
+// 59 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_59C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ XSDIVQP,
+ XSDIVQPO
+)>;
+
+// 61 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_61C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ VDIVESQ,
+ VDIVEUQ,
+ VDIVSQ,
+ VDIVUQ
+)>;
+
+// 68 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_DF_68C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+ (instrs
+ VMODSQ,
+ VMODUQ
+)>;
+
+// 77 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_77C, P10W_DISP_ANY, P10DF_Read],
+ (instrs
+ XSSQRTQP,
+ XSSQRTQPO
+)>;
+
+// 20 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVW,
+ DIVWO,
+ DIVWU,
+ DIVWUO,
+ MODSW
+)>;
+
+// 2-way crack instructions
+// 20 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_20C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVW_rec,
+ DIVWO_rec,
+ DIVWU_rec,
+ DIVWUO_rec
+)>;
+
+// 25 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVD,
+ DIVDO,
+ DIVDU,
+ DIVDUO,
+ DIVWE,
+ DIVWEO,
+ DIVWEU,
+ DIVWEUO
+)>;
+
+// 2-way crack instructions
+// 25 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_25C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVD_rec,
+ DIVDO_rec,
+ DIVDU_rec,
+ DIVDUO_rec,
+ DIVWE_rec,
+ DIVWEO_rec,
+ DIVWEU_rec,
+ DIVWEUO_rec
+)>;
+
+// 27 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_27C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ MODSD,
+ MODUD,
+ MODUW
+)>;
+
+// 41 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ DIVDE,
+ DIVDEO,
+ DIVDEU,
+ DIVDEUO
+)>;
+
+// 2-way crack instructions
+// 41 Cycles Scalar Fixed-Point Divide operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_DV_41C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ DIVDE_rec,
+ DIVDEO_rec,
+ DIVDEU_rec,
+ DIVDEUO_rec
+)>;
+
+// 43 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_43C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVSD,
+ VDIVUD
+)>;
+
+// 47 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_47C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VMODSD,
+ VMODUD
+)>;
+
+// 54 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_54C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVSW,
+ VDIVUW
+)>;
+
+// 60 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_60C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VMODSW,
+ VMODUW
+)>;
+
+// 75 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_75C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVESD,
+ VDIVEUD
+)>;
+
+// 83 Cycles Scalar Fixed-Point Divide operations, 2 input operands
+def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
+ (instrs
+ VDIVESW,
+ VDIVEUW
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 1 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read],
+ (instrs
+ BCDCTN_rec,
+ VMUL10CUQ,
+ VMUL10UQ,
+ XSXSIGQP
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 2 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
+ (instrs
+ BCDCFN_rec,
+ BCDCFZ_rec,
+ BCDCPSGN_rec,
+ BCDCTZ_rec,
+ BCDSETSGN_rec,
+ BCDUS_rec,
+ BCDUTRUNC_rec,
+ VADDCUQ,
+ VADDUQM,
+ VMUL10ECUQ,
+ VMUL10EUQ,
+ VSUBCUQ,
+ VSUBUQM,
+ XSCMPEXPQP,
+ XSCMPOQP,
+ XSCMPUQP,
+ XSTSTDCQP,
+ XXGENPCVBM
+)>;
+
+// 5 Cycles Fixed-Point and BCD operations, 3 input operands
+def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
+ (instrs
+ BCDS_rec,
+ BCDTRUNC_rec,
+ VADDECUQ,
+ VADDEUQM,
+ VSUBECUQ,
+ VSUBEUQM
+)>;
+
+// 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY],
+ (instrs
+ TRAP, TW
+)>;
+
+// 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
+ (instrs
+ CNTLZD,
+ CNTLZD_rec,
+ CNTLZW, CNTLZW8,
+ CNTLZW8_rec, CNTLZW_rec,
+ CNTTZD,
+ CNTTZD_rec,
+ CNTTZW, CNTTZW8,
+ CNTTZW8_rec, CNTTZW_rec,
+ FTSQRT,
+ MTVSRBM,
+ MTVSRBMI,
+ MTVSRDM,
+ MTVSRHM,
+ MTVSRQM,
+ MTVSRWM,
+ POPCNTB, POPCNTB8,
+ POPCNTD,
+ POPCNTW,
+ VCLZB,
+ VCLZD,
+ VCLZH,
+ VCLZW,
+ VCTZB,
+ VCTZD,
+ VCTZH,
+ VCTZW,
+ VEXPANDBM,
+ VEXPANDDM,
+ VEXPANDHM,
+ VEXPANDQM,
+ VEXPANDWM,
+ VEXTRACTBM,
+ VEXTRACTDM,
+ VEXTRACTHM,
+ VEXTRACTQM,
+ VEXTRACTWM,
+ VPOPCNTB,
+ VPOPCNTD,
+ VPOPCNTH,
+ VPOPCNTW,
+ VPRTYBD,
+ VPRTYBW,
+ XSCVHPDP,
+ XSCVSPDPN,
+ XSTSQRTDP,
+ XVCVHPSP,
+ XVTLSBB,
+ XVTSQRTDP,
+ XVTSQRTSP
+)>;
+
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+ (instrs
+ CMPEQB,
+ EXTSWSLI_32_64_rec, EXTSWSLI_rec,
+ FCMPOD, FCMPOS,
+ FCMPUD, FCMPUS,
+ FTDIV,
+ SLD_rec,
+ SLW8_rec, SLW_rec,
+ SRD_rec,
+ SRW8_rec, SRW_rec,
+ VABSDUB,
+ VABSDUH,
+ VABSDUW,
+ VADDCUW,
+ VADDSBS,
+ VADDSHS,
+ VADDSWS,
+ VADDUBS,
+ VADDUHS,
+ VADDUWS,
+ VAVGSB,
+ VAVGSH,
+ VAVGSW,
+ VAVGUB,
+ VAVGUH,
+ VAVGUW,
+ VCMPBFP,
+ VCMPBFP_rec,
+ VCMPEQFP,
+ VCMPEQFP_rec,
+ VCMPEQUB_rec,
+ VCMPEQUD_rec,
+ VCMPEQUH_rec,
+ VCMPEQUQ,
+ VCMPEQUQ_rec,
+ VCMPEQUW_rec,
+ VCMPGEFP,
+ VCMPGEFP_rec,
+ VCMPGTFP,
+ VCMPGTFP_rec,
+ VCMPGTSB_rec,
+ VCMPGTSD_rec,
+ VCMPGTSH_rec,
+ VCMPGTSQ,
+ VCMPGTSQ_rec,
+ VCMPGTSW_rec,
+ VCMPGTUB_rec,
+ VCMPGTUD_rec,
+ VCMPGTUH_rec,
+ VCMPGTUQ,
+ VCMPGTUQ_rec,
+ VCMPGTUW_rec,
+ VCMPNEB_rec,
+ VCMPNEH_rec,
+ VCMPNEW_rec,
+ VCMPNEZB_rec,
+ VCMPNEZH_rec,
+ VCMPNEZW_rec,
+ VCMPSQ,
+ VCMPUQ,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
+ VMAXFP,
+ VMINFP,
+ VSUBCUW,
+ VSUBSBS,
+ VSUBSHS,
+ VSUBSWS,
+ VSUBUBS,
+ VSUBUHS,
+ VSUBUWS,
+ XSCMPEQDP,
+ XSCMPEXPDP,
+ XSCMPGEDP,
+ XSCMPGTDP,
+ XSCMPODP,
+ XSCMPUDP,
+ XSMAXCDP,
+ XSMAXDP,
+ XSMAXJDP,
+ XSMINCDP,
+ XSMINDP,
+ XSMINJDP,
+ XSTDIVDP,
+ XSTSTDCDP,
+ XSTSTDCSP,
+ XVCMPEQDP,
+ XVCMPEQDP_rec,
+ XVCMPEQSP,
+ XVCMPEQSP_rec,
+ XVCMPGEDP,
+ XVCMPGEDP_rec,
+ XVCMPGESP,
+ XVCMPGESP_rec,
+ XVCMPGTDP,
+ XVCMPGTDP_rec,
+ XVCMPGTSP,
+ XVCMPGTSP_rec,
+ XVMAXDP,
+ XVMAXSP,
+ XVMINDP,
+ XVMINSP,
+ XVTDIVDP,
+ XVTDIVSP,
+ XVTSTDCDP,
+ XVTSTDCSP
+)>;
+
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ CMPRB, CMPRB8,
+ RLDCL_rec,
+ RLDCR_rec,
+ RLDIC_rec,
+ RLDICL_32_rec, RLDICL_rec,
+ RLDICR_rec,
+ TD,
+ TDI,
+ TWI,
+ VSHASIGMAD,
+ VSHASIGMAW
+)>;
+
+// 4 Cycles ALU2 operations, 4 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ RLDIMI_rec,
+ RLWINM8_rec, RLWINM_rec,
+ RLWNM8_rec, RLWNM_rec
+)>;
+
+// 4 Cycles ALU2 operations, 5 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ RLWIMI8_rec, RLWIMI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
+ (instrs
+ SRAD_rec,
+ SRADI_rec,
+ SRAW_rec,
+ SRAWI_rec
+)>;
+
+// Single crack instructions
+// 4 Cycles ALU2 operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
+ (instrs
+ TABORTDC,
+ TABORTDCI,
+ TABORTWC,
+ TABORTWCI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VRLQ,
+ VRLQNM,
+ VSLQ,
+ VSRAQ,
+ VSRQ
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VRLQMI
+)>;
+
+// 2-way crack instructions
+// 4 Cycles ALU2 operations, and 4 Cycles ALU2 operations, 0 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ MFCR, MFCR8
+)>;
+
+// 2 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ MTCTR, MTCTR8, MTCTR8loop, MTCTRloop,
+ MTLR, MTLR8
+)>;
+
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ CR6SET, CREQV, CRSET,
+ DSS, DSSALL,
+ MCRXRX,
+ MFCTR, MFCTR8,
+ MFLR, MFLR8,
+ NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
+ VXOR, V_SET0, V_SET0B, V_SET0H,
+ XXLEQV, XXLEQVOnes,
+ XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
+)>;
+
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
+ ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
+ ADDME, ADDME8,
+ ADDME8O, ADDMEO,
+ ADDZE, ADDZE8,
+ ADDZE8O, ADDZEO,
+ EXTSB, EXTSB8, EXTSB8_32_64,
+ EXTSB8_rec, EXTSB_rec,
+ EXTSH, EXTSH8, EXTSH8_32_64,
+ EXTSH8_rec, EXTSH_rec,
+ EXTSW, EXTSW_32, EXTSW_32_64,
+ EXTSW_32_64_rec, EXTSW_rec,
+ FABSD, FABSS,
+ FMR,
+ FNABSD, FNABSS,
+ FNEGD, FNEGS,
+ MCRF,
+ MFOCRF, MFOCRF8,
+ MFVRD, MFVSRD,
+ MFVRWZ, MFVSRWZ,
+ MTOCRF, MTOCRF8,
+ MTVRD, MTVSRD,
+ MTVRWA, MTVSRWA,
+ MTVRWZ, MTVSRWZ,
+ NEG, NEG8,
+ NEG8_rec, NEG_rec,
+ NEG8O, NEGO,
+ SETB, SETB8,
+ SETBC, SETBC8,
+ SETBCR, SETBCR8,
+ SETNBC, SETNBC8,
+ SETNBCR, SETNBCR8,
+ SUBFME, SUBFME8,
+ SUBFME8O, SUBFMEO,
+ SUBFZE, SUBFZE8,
+ SUBFZE8O, SUBFZEO,
+ VEXTSB2D, VEXTSB2Ds,
+ VEXTSB2W, VEXTSB2Ws,
+ VEXTSD2Q,
+ VEXTSH2D, VEXTSH2Ds,
+ VEXTSH2W, VEXTSH2Ws,
+ VEXTSW2D, VEXTSW2Ds,
+ VNEGD,
+ VNEGW,
+ WAIT,
+ XSABSDP,
+ XSABSQP,
+ XSNABSDP,
+ XSNABSQP,
+ XSNEGDP,
+ XSNEGQP,
+ XSXEXPDP,
+ XSXEXPQP,
+ XSXSIGDP,
+ XVABSDP,
+ XVABSSP,
+ XVNABSDP,
+ XVNABSSP,
+ XVNEGDP,
+ XVNEGSP,
+ XVXEXPDP,
+ XVXEXPSP,
+ XVXSIGDP,
+ XVXSIGSP
+)>;
+
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADD4, ADD4TLS, ADD8, ADD8TLS, ADD8TLS_,
+ ADD4_rec, ADD8_rec,
+ ADDE, ADDE8,
+ ADDE8O, ADDEO,
+ ADDIC, ADDIC8,
+ ADD4O, ADD8O,
+ AND, AND8,
+ AND8_rec, AND_rec,
+ ANDC, ANDC8,
+ ANDC8_rec, ANDC_rec,
+ ANDI8_rec, ANDI_rec,
+ ANDIS8_rec, ANDIS_rec,
+ CMPD, CMPW,
+ CMPB, CMPB8,
+ CMPDI, CMPWI,
+ CMPLD, CMPLW,
+ CMPLDI, CMPLWI,
+ CRAND,
+ CRANDC,
+ CRNAND,
+ CRNOR,
+ CROR,
+ CRORC,
+ CR6UNSET, CRUNSET, CRXOR,
+ EQV, EQV8,
+ EQV8_rec, EQV_rec,
+ EXTSWSLI, EXTSWSLI_32_64,
+ FCPSGND, FCPSGNS,
+ NAND, NAND8,
+ NAND8_rec, NAND_rec,
+ NOR, NOR8,
+ NOR8_rec, NOR_rec,
+ COPY, OR, OR8,
+ OR8_rec, OR_rec,
+ ORC, ORC8,
+ ORC8_rec, ORC_rec,
+ ORIS, ORIS8,
+ SLD,
+ SLW, SLW8,
+ SRAD,
+ SRADI, SRADI_32,
+ SRAW,
+ SRAWI,
+ SRD,
+ SRW, SRW8,
+ SUBF, SUBF8,
+ SUBF8_rec, SUBF_rec,
+ SUBFE, SUBFE8,
+ SUBFE8O, SUBFEO,
+ SUBFIC, SUBFIC8,
+ SUBF8O, SUBFO,
+ VADDUBM,
+ VADDUDM,
+ VADDUHM,
+ VADDUWM,
+ VAND,
+ VANDC,
+ VCMPEQUB,
+ VCMPEQUD,
+ VCMPEQUH,
+ VCMPEQUW,
+ VCMPGTSB,
+ VCMPGTSD,
+ VCMPGTSH,
+ VCMPGTSW,
+ VCMPGTUB,
+ VCMPGTUD,
+ VCMPGTUH,
+ VCMPGTUW,
+ VCMPNEB,
+ VCMPNEH,
+ VCMPNEW,
+ VCMPNEZB,
+ VCMPNEZH,
+ VCMPNEZW,
+ VEQV,
+ VMAXSB,
+ VMAXSD,
+ VMAXSH,
+ VMAXSW,
+ VMAXUB,
+ VMAXUD,
+ VMAXUH,
+ VMAXUW,
+ VMINSB,
+ VMINSD,
+ VMINSH,
+ VMINSW,
+ VMINUB,
+ VMINUD,
+ VMINUH,
+ VMINUW,
+ VMRGEW,
+ VMRGOW,
+ VNAND,
+ VNOR,
+ VOR,
+ VORC,
+ VRLB,
+ VRLD,
+ VRLDNM,
+ VRLH,
+ VRLW,
+ VRLWNM,
+ VSLB,
+ VSLD,
+ VSLH,
+ VSLW,
+ VSRAB,
+ VSRAD,
+ VSRAH,
+ VSRAW,
+ VSRB,
+ VSRD,
+ VSRH,
+ VSRW,
+ VSUBUBM,
+ VSUBUDM,
+ VSUBUHM,
+ VSUBUWM,
+ XOR, XOR8,
+ XOR8_rec, XOR_rec,
+ XORI, XORI8,
+ XORIS, XORIS8,
+ XSCPSGNDP,
+ XSCPSGNQP,
+ XSIEXPDP,
+ XSIEXPQP,
+ XVCPSGNDP,
+ XVCPSGNSP,
+ XVIEXPDP,
+ XVIEXPSP,
+ XXLAND,
+ XXLANDC,
+ XXLNAND,
+ XXLNOR,
+ XXLOR, XXLORf,
+ XXLORC
+)>;
+
+// 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDEX, ADDEX8,
+ DST, DST64, DSTT, DSTT64,
+ DSTST, DSTST64, DSTSTT, DSTSTT64,
+ ISEL, ISEL8,
+ RLDCL,
+ RLDCR,
+ RLDIC,
+ RLDICL, RLDICL_32, RLDICL_32_64,
+ RLDICR, RLDICR_32,
+ VRLDMI,
+ VRLWMI,
+ VSEL,
+ XXSEL
+)>;
+
+// 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ RLDIMI,
+ RLWINM, RLWINM8,
+ RLWNM, RLWNM8
+)>;
+
+// 3 Cycles ALU operations, 5 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ RLWIMI, RLWIMI8
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ MFFS,
+ MFFS_rec,
+ MFFSL,
+ MFVSCR,
+ TRECHKPT
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ ADDME8_rec, ADDME_rec,
+ ADDME8O_rec, ADDMEO_rec,
+ ADDZE8_rec, ADDZE_rec,
+ ADDZE8O_rec, ADDZEO_rec,
+ MCRFS,
+ MFFSCDRN,
+ MFFSCDRNI,
+ MFFSCRN,
+ MFFSCRNI,
+ MTFSB0,
+ MTVSCR,
+ NEG8O_rec, NEGO_rec,
+ SUBFME8_rec, SUBFME_rec,
+ SUBFME8O_rec, SUBFMEO_rec,
+ SUBFZE8_rec, SUBFZE_rec,
+ SUBFZE8O_rec, SUBFZEO_rec,
+ TABORT,
+ TBEGIN,
+ TRECLAIM,
+ TSR
+)>;
+
+// Single crack instructions
+// 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDE8_rec, ADDE_rec,
+ ADDE8O_rec, ADDEO_rec,
+ ADDIC_rec,
+ ADD4O_rec, ADD8O_rec,
+ SUBFE8_rec, SUBFE_rec,
+ SUBFE8O_rec, SUBFEO_rec,
+ SUBF8O_rec, SUBFO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ HRFID,
+ MFFSCE,
+ RFID,
+ STOP
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
+ (instrs
+ FABSD_rec, FABSS_rec,
+ FMR_rec,
+ FNABSD_rec, FNABSS_rec,
+ FNEGD_rec, FNEGS_rec,
+ MTFSB1,
+ RFEBB,
+ SC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDC, ADDC8,
+ ADDC8_rec, ADDC_rec,
+ ADDC8O, ADDCO,
+ FCPSGND_rec, FCPSGNS_rec,
+ MTFSF, MTFSFb,
+ MTFSFI, MTFSFIb,
+ SUBFC, SUBFC8,
+ SUBFC8_rec, SUBFC_rec,
+ SUBFC8O, SUBFCO
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ MTFSFI_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
+ (instrs
+ MTFSF_rec
+)>;
+
+// 4-way crack instructions
+// 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
+ (instrs
+ ADDC8O_rec, ADDCO_rec,
+ SUBFC8O_rec, SUBFCO_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VSTRIBL_rec,
+ VSTRIBR_rec,
+ VSTRIHL_rec,
+ VSTRIHR_rec
+)>;
+
+// 2-way crack instructions
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read],
+ (instrs
+ MTCRF, MTCRF8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ LBZ, LBZ8,
+ LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
+ LDBRX,
+ DFLOADf32, DFLOADf64, LFD,
+ LFDX, XFLOADf32, XFLOADf64,
+ LFIWAX, LIWAX,
+ LFIWZX, LIWZX,
+ LHA, LHA8,
+ LHAX, LHAX8,
+ LHBRX, LHBRX8,
+ LHZ, LHZ8,
+ LVEBX,
+ LVEHX,
+ LVEWX,
+ LVX,
+ LVXL,
+ LWA, LWA_32,
+ LWAX, LWAX_32,
+ LWBRX, LWBRX8,
+ LWZ, LWZ8, LWZtoc, LWZtocL,
+ LXSD,
+ LXSDX,
+ LXSIBZX,
+ LXSIHZX,
+ LXSIWAX,
+ LXSIWZX,
+ LXV,
+ LXVB16X,
+ LXVD2X,
+ LXVDSX,
+ LXVH8X,
+ LXVRBX,
+ LXVRDX,
+ LXVRHX,
+ LXVRWX,
+ LXVW4X,
+ LXVWSX,
+ LXVX
+)>;
+
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ DCBT,
+ DCBTST,
+ ICBT,
+ LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
+ LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+ LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
+ LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
+ LXVL,
+ LXVLL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
+ (instrs
+ HASHCHK,
+ HASHCHKP
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ SLBIA
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ DARN,
+ LBARX, LBARXL,
+ LDARX, LDARXL,
+ LHARX, LHARXL,
+ LWARX, LWARXL,
+ SLBFEE_rec,
+ SLBIE,
+ SLBMFEE,
+ SLBMFEV
+)>;
+
+// Single crack instructions
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ LBZCIX,
+ LDCIX,
+ LHZCIX,
+ LWZCIX,
+ MTSPR, MTSPR8, MTSR, MTVRSAVE, MTVRSAVEv
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+ (instrs
+ LMW
+)>;
+
+// Expand instructions
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ LSWI
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
+ (instrs
+ LBZU, LBZU8,
+ LBZUX, LBZUX8,
+ LDU,
+ LDUX,
+ LFDU,
+ LFDUX,
+ LHAU, LHAU8,
+ LHAUX, LHAUX8,
+ LHZU, LHZU8,
+ LHZUX, LHZUX8,
+ LWAUX,
+ LWZU, LWZU8,
+ LWZUX, LWZUX8
+)>;
+
+// 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+ (instrs
+ PLBZ, PLBZ8, PLBZ8pc, PLBZpc,
+ PLD, PLDpc,
+ PLFD, PLFDpc,
+ PLFS, PLFSpc,
+ PLHA, PLHA8, PLHA8pc, PLHApc,
+ PLHZ, PLHZ8, PLHZ8pc, PLHZpc,
+ PLWA, PLWA8, PLWA8pc, PLWApc,
+ PLWZ, PLWZ8, PLWZ8pc, PLWZpc,
+ PLXSD, PLXSDpc,
+ PLXSSP, PLXSSPpc,
+ PLXV, PLXVpc,
+ PLXVP, PLXVPpc
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ LFS,
+ LFSX,
+ LXSSP,
+ LXSSPX
+)>;
+
+// 4-way crack instructions
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ LFSU,
+ LFSUX
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+ (instrs
+ TLBIEL
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read, P10LD_Read],
+ (instrs
+ SLBMTE
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
+ (instrs
+ LXVP,
+ LXVPX
+)>;
+
+// Single crack instructions
+// 13 Cycles Unknown operations, 1 input operands
+def : InstRW<[P10W_MFL_13C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ MFSPR, MFSPR8, MFSR, MFTB8, MFVRSAVE, MFVRSAVEv
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 0 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY],
+ (instrs
+ XXSETACCZ
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read],
+ (instrs
+ XVBF16GER2,
+ XVF16GER2,
+ XVF32GER,
+ XVF64GER,
+ XVI16GER2,
+ XVI16GER2S,
+ XVI4GER8,
+ XVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ XVBF16GER2NN,
+ XVBF16GER2NP,
+ XVBF16GER2PN,
+ XVBF16GER2PP,
+ XVF16GER2NN,
+ XVF16GER2NP,
+ XVF16GER2PN,
+ XVF16GER2PP,
+ XVF32GERNN,
+ XVF32GERNP,
+ XVF32GERPN,
+ XVF32GERPP,
+ XVF64GERNN,
+ XVF64GERNP,
+ XVF64GERPN,
+ XVF64GERPP,
+ XVI16GER2PP,
+ XVI16GER2SPP,
+ XVI4GER8PP,
+ XVI8GER4PP,
+ XVI8GER4SPP
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVF32GER,
+ PMXVF64GER
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVBF16GER2,
+ PMXVF16GER2,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
+ PMXVI16GER2,
+ PMXVI16GER2S,
+ PMXVI4GER8,
+ PMXVI8GER4
+)>;
+
+// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+ (instrs
+ PMXVBF16GER2NN,
+ PMXVBF16GER2NP,
+ PMXVBF16GER2PN,
+ PMXVBF16GER2PP,
+ PMXVF16GER2NN,
+ PMXVF16GER2NP,
+ PMXVF16GER2PN,
+ PMXVF16GER2PP,
+ PMXVI16GER2PP,
+ PMXVI16GER2SPP,
+ PMXVI4GER8PP,
+ PMXVI8GER4PP,
+ PMXVI8GER4SPP
+)>;
+
+// 2-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+ (instrs
+ XXMTACC
+)>;
+
+// 4-way crack instructions
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 Cycles ALU operations, 10 Cycles SIMD Matrix Multiply Engine operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C],
+ (instrs
+ XXMFACC
+)>;
+
+// 5 Cycles GPR Multiply operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
+ (instrs
+ MULHD,
+ MULHDU,
+ MULHW,
+ MULHWU,
+ MULLD,
+ MULLDO,
+ MULLI, MULLI8,
+ MULLW,
+ MULLWO,
+ VMULHSD,
+ VMULHUD,
+ VMULLD
+)>;
+
+// 5 Cycles GPR Multiply operations, 3 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read, P10MU_Read],
+ (instrs
+ MADDHD,
+ MADDHDU,
+ MADDLD, MADDLD8
+)>;
+
+// 2-way crack instructions
+// 5 Cycles GPR Multiply operations, and 3 Cycles ALU operations, 2 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ MULHD_rec,
+ MULHDU_rec,
+ MULHW_rec,
+ MULHWU_rec,
+ MULLD_rec,
+ MULLDO_rec,
+ MULLW_rec,
+ MULLWO_rec
+)>;
+
+// 4 Cycles Permute operations, 0 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
+ (instrs
+ VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
+ (instrs
+ LVSL,
+ LVSR,
+ MFVSRLD,
+ MTVSRWS,
+ VCLZLSBB,
+ VCTZLSBB,
+ VGBBD,
+ VPRTYBQ,
+ VSPLTISB,
+ VSPLTISH,
+ VSTRIBL,
+ VSTRIBR,
+ VSTRIHL,
+ VSTRIHR,
+ VUPKHPX,
+ VUPKHSB,
+ VUPKHSH,
+ VUPKHSW,
+ VUPKLPX,
+ VUPKLSB,
+ VUPKLSH,
+ VUPKLSW,
+ XVCVBF16SPN,
+ XXBRD,
+ XXBRH,
+ XXBRQ,
+ XXBRW,
+ XXSPLTIB
+)>;
+
+// 4 Cycles Permute operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
+ (instrs
+ BPERMD,
+ MTVSRDD,
+ VBPERMD,
+ VBPERMQ,
+ VCLRLB,
+ VCLRRB,
+ VEXTRACTD,
+ VEXTRACTUB,
+ VEXTRACTUH,
+ VEXTRACTUW,
+ VEXTUBLX,
+ VEXTUBRX,
+ VEXTUHLX,
+ VEXTUHRX,
+ VEXTUWLX,
+ VEXTUWRX,
+ VINSERTD,
+ VINSERTW,
+ VMRGHB,
+ VMRGHH,
+ VMRGHW,
+ VMRGLB,
+ VMRGLH,
+ VMRGLW,
+ VPKPX,
+ VPKSDSS,
+ VPKSDUS,
+ VPKSHSS,
+ VPKSHUS,
+ VPKSWSS,
+ VPKSWUS,
+ VPKUDUM,
+ VPKUDUS,
+ VPKUHUM,
+ VPKUHUS,
+ VPKUWUM,
+ VPKUWUS,
+ VSL,
+ VSLO,
+ VSLV,
+ VSPLTB, VSPLTBs,
+ VSPLTH, VSPLTHs,
+ VSPLTW,
+ VSR,
+ VSRO,
+ VSRV,
+ XXEXTRACTUW,
+ XXGENPCVDM,
+ XXGENPCVHM,
+ XXGENPCVWM,
+ XXMRGHW,
+ XXMRGLW,
+ XXPERM,
+ XXPERMDI, XXPERMDIs,
+ XXPERMR,
+ XXSLDWI, XXSLDWIs,
+ XXSPLTW, XXSPLTWs
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ VEXTDDVLX,
+ VEXTDDVRX,
+ VEXTDUBVLX,
+ VEXTDUBVRX,
+ VEXTDUHVLX,
+ VEXTDUHVRX,
+ VEXTDUWVLX,
+ VEXTDUWVRX,
+ VINSBLX,
+ VINSBRX,
+ VINSBVLX,
+ VINSBVRX,
+ VINSD,
+ VINSDLX,
+ VINSDRX,
+ VINSERTB,
+ VINSERTH,
+ VINSHLX,
+ VINSHRX,
+ VINSHVLX,
+ VINSHVRX,
+ VINSW,
+ VINSWLX,
+ VINSWRX,
+ VINSWVLX,
+ VINSWVRX,
+ VPERM,
+ VPERMR,
+ VPERMXOR,
+ VSLDBI,
+ VSLDOI,
+ VSRDBI,
+ XXINSERTW
+)>;
+
+// 2-way crack instructions
+// 4 Cycles Permute operations, and 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY],
+ (instrs
+ VSUMSWS
+)>;
+
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+ (instrs
+ XXSPLTIDP,
+ XXSPLTIW
+)>;
+
+// 4 Cycles Permute operations, 3 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ XXBLENDVB,
+ XXBLENDVD,
+ XXBLENDVH,
+ XXBLENDVW,
+ XXSPLTI32DX
+)>;
+
+// 4 Cycles Permute operations, 4 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read],
+ (instrs
+ XXEVAL,
+ XXPERMX
+)>;
+
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
+ (instrs
+ DCBST,
+ DCBZ,
+ ICBI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ DCBF,
+ PSTXVP, PSTXVPpc,
+ STB, STB8,
+ STBU, STBU8,
+ STBUX, STBUX8,
+ SPILLTOVSR_ST, STD,
+ STDBRX,
+ STDU,
+ STDUX,
+ DFSTOREf32, DFSTOREf64, STFD,
+ STFDU,
+ STFDUX,
+ STFDX,
+ STFIWX, STIWX,
+ STFS,
+ STFSU,
+ STFSUX,
+ STFSX,
+ STH, STH8,
+ STHBRX,
+ STHU, STHU8,
+ STHUX, STHUX8,
+ STVEBX,
+ STVEHX,
+ STVEWX,
+ STVX,
+ STVXL,
+ STW, STW8,
+ STWBRX,
+ STWU, STWU8,
+ STWUX, STWUX8,
+ STXSD,
+ STXSDX,
+ STXSIBX, STXSIBXv,
+ STXSIHX, STXSIHXv,
+ STXSIWX,
+ STXSSP,
+ STXSSPX,
+ STXV,
+ STXVB16X,
+ STXVD2X,
+ STXVH8X,
+ STXVRBX,
+ STXVRDX,
+ STXVRHX,
+ STXVRWX,
+ STXVW4X,
+ STXVX
+)>;
+
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ CP_COPY, CP_COPY8,
+ STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
+ SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
+ STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
+ STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
+ STXVL,
+ STXVLL
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ EnforceIEIO,
+ MSGSYNC,
+ SLBSYNC,
+ TCHECK,
+ TLBSYNC
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read],
+ (instrs
+ TEND
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ SLBIEG,
+ STBCX,
+ STDCX,
+ STHCX,
+ STWCX,
+ TLBIE
+)>;
+
+// Single crack instructions
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ CP_PASTE8_rec, CP_PASTE_rec,
+ STBCIX,
+ STDCIX,
+ STHCIX,
+ STWCIX
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ ISYNC
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ SYNC
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ LDAT,
+ LWAT
+)>;
+
+// 4-way crack instructions
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
+ (instrs
+ STDAT,
+ STWAT
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+ (instrs
+ STMW
+)>;
+
+// Expand instructions
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
+ STSWI
+)>;
+
+// 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
+ (instrs
+ PSTB, PSTB8, PSTB8pc, PSTBpc,
+ PSTD, PSTDpc,
+ PSTFD, PSTFDpc,
+ PSTFS, PSTFSpc,
+ PSTH, PSTH8, PSTH8pc, PSTHpc,
+ PSTW, PSTW8, PSTW8pc, PSTWpc,
+ PSTXSD, PSTXSDpc,
+ PSTXSSP, PSTXSSPpc,
+ PSTXV, PSTXVpc
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read],
+ (instrs
+ STXVP,
+ STXVPX
+)>;
+
+// FIXME - Miss scheduling information from datasheet
+// Temporary set it as 1 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX, P10W_DISP_ANY],
+ (instrs
+ ATTN,
+ CP_ABORT,
+ DCBA,
+ DCBI,
+ DCBZL,
+ DCCCI,
+ ICBLC,
+ ICBLQ,
+ ICBTLS,
+ ICCCI,
+ LA,
+ LDMX,
+ MFDCR,
+ MFPMR,
+ MFSRIN,
+ MSYNC,
+ MTDCR,
+ MTPMR,
+ MTSRIN,
+ NAP,
+ TLBIA,
+ TLBLD,
+ TLBLI,
+ TLBRE2,
+ TLBSX2,
+ TLBSX2D,
+ TLBWE2
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
+ (instrs
+ CLRBHRB,
+ MFMSR
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read],
+ (instrs
+ MFTB
+)>;
+
+// Single crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read],
+ (instrs
+ MFBHRBE,
+ MTMSR,
+ MTMSRD
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ ADDPCIS
+)>;
+
+// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+def : InstRW<[P10W_SX_3C, P10W_DISP_PAIR, P10SX_Read],
+ (instrs
+ PADDI, PADDI8, PADDI8pc, PADDIpc, PLI, PLI8
+)>;
+
+// 7 Cycles VMX Multiply operations, 2 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read],
+ (instrs
+ VMULESB,
+ VMULESD,
+ VMULESH,
+ VMULESW,
+ VMULEUB,
+ VMULEUD,
+ VMULEUH,
+ VMULEUW,
+ VMULHSW,
+ VMULHUW,
+ VMULOSB,
+ VMULOSD,
+ VMULOSH,
+ VMULOSW,
+ VMULOUB,
+ VMULOUD,
+ VMULOUH,
+ VMULOUW,
+ VMULUWM,
+ VSUM2SWS,
+ VSUM4SBS,
+ VSUM4SHS,
+ VSUM4UBS
+)>;
+
+// 7 Cycles VMX Multiply operations, 3 input operands
+def : InstRW<[P10W_vMU_7C, P10W_DISP_ANY, P10vMU_Read, P10vMU_Read, P10vMU_Read],
+ (instrs
+ VMHADDSHS,
+ VMHRADDSHS,
+ VMLADDUHM,
+ VMSUMCUD,
+ VMSUMMBM,
+ VMSUMSHM,
+ VMSUMSHS,
+ VMSUMUBM,
+ VMSUMUDM,
+ VMSUMUHM,
+ VMSUMUHS
+)>;
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index 76663acf4782..c4f4a2b3d796 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -1302,15 +1302,15 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
(instregex "BCCTR(L)?(8)?(n)?$"),
(instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
(instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
- (instregex "BL(_TLS|_NOP)?$"),
- (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
- (instregex "BLA(8|8_NOP)?$"),
+ (instregex "BL(_TLS|_NOP)?(_RM)?$"),
+ (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
+ (instregex "BLA(8|8_NOP)?(_RM)?$"),
(instregex "BLR(8|L)?$"),
(instregex "TAILB(A)?(8)?$"),
(instregex "TAILBCTR(8)?$"),
(instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
(instregex "BCLR(L)?(n)?$"),
- (instregex "BCTR(L)?(8)?$"),
+ (instregex "BCTR(L)?(8)?(_RM)?$"),
B,
BA,
BC,
@@ -1321,6 +1321,8 @@ def : InstRW<[P9_BR_2C, DISP_BR_1C],
BCLn,
BCTRL8_LDinto_toc,
BCTRL_LWZinto_toc,
+ BCTRL8_LDinto_toc_RM,
+ BCTRL_LWZinto_toc_RM,
BCn,
CTRL_DEP
)>;
@@ -1430,5 +1432,6 @@ def : InstRW<[],
DCBI,
DCCCI,
ICCCI,
- ADDEX
+ ADDEX,
+ ADDEX8
)> { let Unsupported = 1; }
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index ce43ced57560..a1ff20bb3612 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -187,6 +187,22 @@ def FeatureAddisLoadFusion : SubtargetFeature<"fuse-addis-load",
def FeatureStoreFusion : SubtargetFeature<"fuse-store", "HasStoreFusion", "true",
"Target supports store clustering",
[FeatureFusion]>;
+def FeatureArithAddFusion :
+ SubtargetFeature<"fuse-arith-add", "HasArithAddFusion", "true",
+ "Target supports Arithmetic Operations with Add fusion",
+ [FeatureFusion]>;
+def FeatureAddLogicalFusion :
+ SubtargetFeature<"fuse-add-logical", "HasAddLogicalFusion", "true",
+ "Target supports Add with Logical Operations fusion",
+ [FeatureFusion]>;
+def FeatureLogicalAddFusion :
+ SubtargetFeature<"fuse-logical-add", "HasLogicalAddFusion", "true",
+ "Target supports Logical with Add Operations fusion",
+ [FeatureFusion]>;
+def FeatureLogicalFusion :
+ SubtargetFeature<"fuse-logical", "HasLogicalFusion", "true",
+ "Target supports Logical Operations fusion",
+ [FeatureFusion]>;
def FeatureUnalignedFloats :
SubtargetFeature<"allow-unaligned-fp-access", "AllowsUnalignedFPAccess",
"true", "CPU does not trap on unaligned FP access">;
@@ -213,6 +229,9 @@ def FeatureSlowPOPCNTD : SubtargetFeature<"slow-popcntd","HasPOPCNTD",
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
"Treat vector data stream cache control instructions as deprecated">;
+def FeatureISA2_06 : SubtargetFeature<"isa-v206-instructions", "IsISA2_06",
+ "true",
+ "Enable instructions in ISA 2.06.">;
def FeatureISA2_07 : SubtargetFeature<"isa-v207-instructions", "IsISA2_07",
"true",
"Enable instructions in ISA 2.07.">;
@@ -319,7 +338,8 @@ def ProcessorFeatures {
FeatureMFTB,
DeprecatedDST,
FeatureTwoConstNR,
- FeatureUnalignedFloats];
+ FeatureUnalignedFloats,
+ FeatureISA2_06];
list<SubtargetFeature> P7SpecificFeatures = [];
list<SubtargetFeature> P7Features =
!listconcat(P7InheritableFeatures, P7SpecificFeatures);
@@ -371,7 +391,10 @@ def ProcessorFeatures {
// Power10
// For P10 CPU we assume that all of the existing features from Power9
// still exist with the exception of those we know are Power9 specific.
- list<SubtargetFeature> FusionFeatures = [FeatureStoreFusion];
+ list<SubtargetFeature> FusionFeatures = [
+ FeatureStoreFusion, FeatureAddLogicalFusion, FeatureLogicalAddFusion,
+ FeatureLogicalFusion, FeatureArithAddFusion
+ ];
list<SubtargetFeature> P10AdditionalFeatures =
!listconcat(FusionFeatures, [
DirectivePwr10, FeatureISA3_1, FeaturePrefixInstrs,
@@ -586,8 +609,7 @@ def : ProcessorModel<"pwr6x", G5Model,
def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
-// No scheduler model yet.
-def : ProcessorModel<"pwr10", P9Model, ProcessorFeatures.P10Features>;
+def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index d0109f968446..a76963abb8e4 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -58,13 +58,13 @@
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Process.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -1494,7 +1494,7 @@ void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) {
//
// Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number
// of instructions change.
- OutStreamer->emitCodeAlignment(8);
+ OutStreamer->emitCodeAlignment(8, &getSubtargetInfo());
MCSymbol *BeginOfSled = OutContext.createTempSymbol();
OutStreamer->emitLabel(BeginOfSled);
EmitToStreamer(*OutStreamer, RetInst);
@@ -2023,9 +2023,10 @@ void PPCAIXAsmPrinter::emitTracebackTable() {
// Set the 4th byte of the mandatory field.
FirstHalfOfMandatoryField |= TracebackTable::IsFunctionNamePresentMask;
- static_assert(XCOFF::AllocRegNo == 31, "Unexpected register usage!");
- if (MRI.isPhysRegUsed(Subtarget->isPPC64() ? PPC::X31 : PPC::R31,
- /* SkipRegMaskTest */ true))
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo *>(Subtarget->getRegisterInfo());
+ Register FrameReg = RegInfo->getFrameRegister(*MF);
+ if (FrameReg == (Subtarget->isPPC64() ? PPC::X31 : PPC::R31))
FirstHalfOfMandatoryField |= TracebackTable::IsAllocaUsedMask;
const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs();
@@ -2527,7 +2528,7 @@ bool PPCAIXAsmPrinter::doInitialization(Module &M) {
// Construct an aliasing list for each GlobalObject.
for (const auto &Alias : M.aliases()) {
- const GlobalObject *Base = Alias.getBaseObject();
+ const GlobalObject *Base = Alias.getAliaseeObject();
if (!Base)
report_fatal_error(
"alias without a base object is not yet supported on AIX");
diff --git a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
index 50ae4450a837..786a3e163540 100644
--- a/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp
@@ -291,7 +291,7 @@ bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) {
return false;
}
- // Sanity check - the block must be able to fall through
+ // The block must be able to fall through.
assert(Cand.BranchBlock->canFallThrough() &&
"Expecting the block to fall through!");
@@ -751,9 +751,8 @@ bool PPCBranchCoalescing::runOnMachineFunction(MachineFunction &MF) {
if (!canCoalesceBranch(Cand2))
break;
- // Sanity check
// The branch-taken block of the second candidate should post-dominate the
- // first candidate
+ // first candidate.
assert(MPDT->dominates(Cand2.BranchTargetBlock, Cand1.BranchBlock) &&
"Branch-taken block should post-dominate first candidate");
diff --git a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index 08b7bdb3ac1e..ff3d36d39fb2 100644
--- a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -25,9 +25,9 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -62,15 +62,14 @@ protected:
return Changed;
SmallVector<MachineBasicBlock*, 8> PredToRemove;
- for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
- PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+ for (MachineBasicBlock *Pred : ReturnMBB.predecessors()) {
bool OtherReference = false, BlockChanged = false;
- if ((*PI)->empty())
+ if (Pred->empty())
continue;
- for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
- if (J == (*PI)->end())
+ for (MachineBasicBlock::iterator J = Pred->getLastNonDebugInstr();;) {
+ if (J == Pred->end())
break;
if (J->getOpcode() == PPC::B) {
@@ -78,7 +77,7 @@ protected:
// This is an unconditional branch to the return. Replace the
// branch with a blr.
MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
- (*PI)->insert(J, MI);
+ Pred->insert(J, MI);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -95,7 +94,7 @@ protected:
MachineInstrBuilder(*ReturnMBB.getParent(), MI)
.add(J->getOperand(0))
.add(J->getOperand(1));
- (*PI)->insert(J, MI);
+ Pred->insert(J, MI);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -112,7 +111,7 @@ protected:
TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn));
MachineInstrBuilder(*ReturnMBB.getParent(), MI)
.add(J->getOperand(0));
- (*PI)->insert(J, MI);
+ Pred->insert(J, MI);
MachineBasicBlock::iterator K = J--;
K->eraseFromParent();
@@ -132,18 +131,18 @@ protected:
} else if (!J->isTerminator() && !J->isDebugInstr())
break;
- if (J == (*PI)->begin())
+ if (J == Pred->begin())
break;
--J;
}
- if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+ if (Pred->canFallThrough() && Pred->isLayoutSuccessor(&ReturnMBB))
OtherReference = true;
// Predecessors are stored in a vector and can't be removed here.
if (!OtherReference && BlockChanged) {
- PredToRemove.push_back(*PI);
+ PredToRemove.push_back(Pred);
}
if (BlockChanged)
@@ -185,12 +184,9 @@ public:
// nothing to do.
if (MF.size() < 2)
return Changed;
-
- // We can't use a range-based for loop due to clobbering the iterator.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E;) {
- MachineBasicBlock &B = *I++;
+
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
Changed |= processBlock(B);
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
index 9daef26ede47..be4c9dd60b00 100644
--- a/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp
@@ -102,6 +102,16 @@ bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
return expandAtomicRMW128(MBB, MI, NMBBI);
case PPC::ATOMIC_CMP_SWAP_I128:
return expandAtomicCmpSwap128(MBB, MI, NMBBI);
+ case PPC::BUILD_QUADWORD: {
+ Register Dst = MI.getOperand(0).getReg();
+ Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0);
+ Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1);
+ Register Lo = MI.getOperand(1).getReg();
+ Register Hi = MI.getOperand(2).getReg();
+ PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo);
+ MI.eraseFromParent();
+ return true;
+ }
default:
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index dfb2c1e5c0f5..856569bc8a73 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -987,15 +987,16 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
auto RC = MRI.getRegClass(SrcReg);
if (Subtarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(PPC::EFSCFD), DestReg)
- .addReg(SrcReg);
- } else if (isVSFRCRegClass(RC)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::EFSCFD),
+ DestReg)
+ .addReg(SrcReg);
+ } else if (Subtarget->hasP8Vector() && isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSSRCRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(PPC::XSRSP), DestReg)
- .addReg(SrcReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::XSRSP),
+ DestReg)
+ .addReg(SrcReg);
} else {
+ SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
DestReg = createResultReg(&PPC::F4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::FRSP), DestReg)
@@ -2467,9 +2468,9 @@ namespace llvm {
// Create the fast instruction selector for PowerPC64 ELF.
FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {
- // Only available on 64-bit ELF for now.
+ // Only available on 64-bit for now.
const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
- if (Subtarget.is64BitELFABI())
+ if (Subtarget.isPPC64())
return new PPCFastISel(FuncInfo, LibInfo);
return nullptr;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 1de6b633d20a..fc3c7ec35b8d 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -279,11 +279,11 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
/// call frame size. Update the MachineFunction object with the stack size.
-unsigned
+uint64_t
PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
bool UseEstimate) const {
unsigned NewMaxCallFrameSize = 0;
- unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
+ uint64_t FrameSize = determineFrameLayout(MF, UseEstimate,
&NewMaxCallFrameSize);
MF.getFrameInfo().setStackSize(FrameSize);
MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
@@ -292,7 +292,7 @@ PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
-unsigned
+uint64_t
PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
bool UseEstimate,
unsigned *NewMaxCallFrameSize) const {
@@ -300,7 +300,7 @@ PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
// Get the number of bytes to allocate from the FrameInfo
- unsigned FrameSize =
+ uint64_t FrameSize =
UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
// Get stack alignments. The frame must be aligned to the greatest of these:
@@ -624,9 +624,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI.");
// Work out frame sizes.
- unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
- int NegFrameSize = -FrameSize;
- if (!isInt<32>(NegFrameSize))
+ uint64_t FrameSize = determineFrameLayoutAndUpdate(MF);
+ int64_t NegFrameSize = -FrameSize;
+ if (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))
llvm_unreachable("Unhandled stack size!");
if (MFI.isFrameAddressTaken())
@@ -692,9 +692,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
SingleScratchReg = ScratchReg == TempReg;
- int LROffset = getReturnSaveOffset();
+ int64_t LROffset = getReturnSaveOffset();
- int FPOffset = 0;
+ int64_t FPOffset = 0;
if (HasFP) {
MachineFrameInfo &MFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
@@ -702,7 +702,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
FPOffset = MFI.getObjectOffset(FPIndex);
}
- int BPOffset = 0;
+ int64_t BPOffset = 0;
if (HasBP) {
MachineFrameInfo &MFI = MF.getFrameInfo();
int BPIndex = FI->getBasePointerSaveIndex();
@@ -710,7 +710,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BPOffset = MFI.getObjectOffset(BPIndex);
}
- int PBPOffset = 0;
+ int64_t PBPOffset = 0;
if (FI->usesPICBase()) {
MachineFrameInfo &MFI = MF.getFrameInfo();
int PBPIndex = FI->getPICBasePointerSaveIndex();
@@ -854,7 +854,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
// ABI.
if (HasROPProtect) {
const int SaveIndex = FI->getROPProtectionHashSaveIndex();
- const int ImmOffset = MFI.getObjectOffset(SaveIndex);
+ const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
assert((ImmOffset <= -8 && ImmOffset >= -512) &&
"ROP hash save offset out of range.");
assert(((ImmOffset & 0x7) == 0) &&
@@ -1212,7 +1212,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIRegister);
} else {
- int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+ int64_t Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
// We have changed the object offset above but we do not want to change
// the actual offsets in the CFI instruction so we have to undo the
// offset change here.
@@ -1550,7 +1550,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
const MachineFrameInfo &MFI = MF.getFrameInfo();
// Get the number of bytes allocated from the FrameInfo.
- int FrameSize = MFI.getStackSize();
+ int64_t FrameSize = MFI.getStackSize();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
@@ -1592,9 +1592,9 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
: PPC::MTOCRF);
const MCInstrDesc &HashChk =
TII.get(HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK);
- int LROffset = getReturnSaveOffset();
+ int64_t LROffset = getReturnSaveOffset();
- int FPOffset = 0;
+ int64_t FPOffset = 0;
// Using the same bool variable as below to suppress compiler warnings.
bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
@@ -1610,14 +1610,14 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
FPOffset = MFI.getObjectOffset(FPIndex);
}
- int BPOffset = 0;
+ int64_t BPOffset = 0;
if (HasBP) {
int BPIndex = FI->getBasePointerSaveIndex();
assert(BPIndex && "No Base Pointer Save Slot!");
BPOffset = MFI.getObjectOffset(BPIndex);
}
- int PBPOffset = 0;
+ int64_t PBPOffset = 0;
if (FI->usesPICBase()) {
int PBPIndex = FI->getPICBasePointerSaveIndex();
assert(PBPIndex && "No PIC Base Pointer Save Slot!");
@@ -1865,7 +1865,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// hash and then compare it to the hash stored in the prologue.
if (HasROPProtect) {
const int SaveIndex = FI->getROPProtectionHashSaveIndex();
- const int ImmOffset = MFI.getObjectOffset(SaveIndex);
+ const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex);
assert((ImmOffset <= -8 && ImmOffset >= -512) &&
"ROP hash check location offset out of range.");
assert(((ImmOffset & 0x7) == 0) &&
@@ -2680,15 +2680,15 @@ bool PPCFrameLowering::restoreCalleeSavedRegisters(
return true;
}
-unsigned PPCFrameLowering::getTOCSaveOffset() const {
+uint64_t PPCFrameLowering::getTOCSaveOffset() const {
return TOCSaveOffset;
}
-unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
+uint64_t PPCFrameLowering::getFramePointerSaveOffset() const {
return FramePointerSaveOffset;
}
-unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
+uint64_t PPCFrameLowering::getBasePointerSaveOffset() const {
return BasePointerSaveOffset;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index b378c2739925..21883b19a575 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -21,12 +21,12 @@ class PPCSubtarget;
class PPCFrameLowering: public TargetFrameLowering {
const PPCSubtarget &Subtarget;
- const unsigned ReturnSaveOffset;
- const unsigned TOCSaveOffset;
- const unsigned FramePointerSaveOffset;
+ const uint64_t ReturnSaveOffset;
+ const uint64_t TOCSaveOffset;
+ const uint64_t FramePointerSaveOffset;
const unsigned LinkageSize;
- const unsigned BasePointerSaveOffset;
- const unsigned CRSaveOffset;
+ const uint64_t BasePointerSaveOffset;
+ const uint64_t CRSaveOffset;
// Map each group of one or two GPRs to corresponding VSR for spilling.
// TODO: Use local table in methods to avoid this mutable member.
@@ -88,7 +88,7 @@ public:
/**
* Determine the frame layout and update the machine function.
*/
- unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
+ uint64_t determineFrameLayoutAndUpdate(MachineFunction &MF,
bool UseEstimate = false) const;
/**
@@ -96,7 +96,7 @@ public:
* The MachineFunction object can be const in this case as it is not
* modified.
*/
- unsigned determineFrameLayout(const MachineFunction &MF,
+ uint64_t determineFrameLayout(const MachineFunction &MF,
bool UseEstimate = false,
unsigned *NewMaxCallFrameSize = nullptr) const;
@@ -146,19 +146,19 @@ public:
/// getReturnSaveOffset - Return the previous frame offset to save the
/// return address.
- unsigned getReturnSaveOffset() const { return ReturnSaveOffset; }
+ uint64_t getReturnSaveOffset() const { return ReturnSaveOffset; }
/// getTOCSaveOffset - Return the previous frame offset to save the
/// TOC register -- 64-bit SVR4 ABI only.
- unsigned getTOCSaveOffset() const;
+ uint64_t getTOCSaveOffset() const;
/// getFramePointerSaveOffset - Return the previous frame offset to save the
/// frame pointer.
- unsigned getFramePointerSaveOffset() const;
+ uint64_t getFramePointerSaveOffset() const;
/// getBasePointerSaveOffset - Return the previous frame offset to save the
/// base pointer.
- unsigned getBasePointerSaveOffset() const;
+ uint64_t getBasePointerSaveOffset() const;
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 606aae66196c..0abdf81d0908 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -262,6 +262,21 @@ namespace {
None) == PPC::AM_DForm;
}
+ /// SelectPCRelForm - Returns true if address N can be represented by
+ /// PC-Relative addressing mode.
+ bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
+ None) == PPC::AM_PCRel;
+ }
+
+ /// SelectPDForm - Returns true if address N can be represented by Prefixed
+ /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
+ bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
+ None) == PPC::AM_PrefixDForm;
+ }
+
/// SelectXForm - Returns true if address N can be represented by the
/// addressing mode of XForm instructions (an indexed [r+r] operation).
bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
@@ -3186,7 +3201,7 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
// by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3236,7 +3251,7 @@ IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
// (%b < %a) by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -3370,7 +3385,7 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
// by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3415,7 +3430,7 @@ IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
// (%b < %a) by swapping inputs and falling through.
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -3528,7 +3543,7 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3570,7 +3585,7 @@ IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
}
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -3687,7 +3702,7 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
LLVM_FALLTHROUGH;
}
case ISD::SETLE: {
@@ -3730,7 +3745,7 @@ IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
}
std::swap(LHS, RHS);
ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
- IsRHSZero = RHSConst && RHSConst->isNullValue();
+ IsRHSZero = RHSConst && RHSConst->isZero();
IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
LLVM_FALLTHROUGH;
}
@@ -4982,6 +4997,51 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
break;
+ case ISD::INTRINSIC_VOID: {
+ auto IntrinsicID = N->getConstantOperandVal(1);
+ if (IntrinsicID == Intrinsic::ppc_tdw || IntrinsicID == Intrinsic::ppc_tw) {
+ unsigned Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TDI : PPC::TWI;
+ SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
+ int16_t SImmOperand2;
+ int16_t SImmOperand3;
+ int16_t SImmOperand4;
+ bool isOperand2IntS16Immediate =
+ isIntS16Immediate(N->getOperand(2), SImmOperand2);
+ bool isOperand3IntS16Immediate =
+ isIntS16Immediate(N->getOperand(3), SImmOperand3);
+ // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
+ // reg or imm + imm. The imm + imm form will be optimized to either an
+ // unconditional trap or a nop in a later pass.
+ if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
+ Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
+ else if (isOperand3IntS16Immediate)
+ // The 2nd and 3rd operands are reg + imm.
+ Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
+ else {
+ // The 2nd and 3rd operands are imm + reg.
+ bool isOperand4IntS16Immediate =
+ isIntS16Immediate(N->getOperand(4), SImmOperand4);
+ (void)isOperand4IntS16Immediate;
+ assert(isOperand4IntS16Immediate &&
+ "The 4th operand is not an Immediate");
+ // We need to flip the condition immediate TO.
+ int16_t TO = int(SImmOperand4) & 0x1F;
+ // We swap the first and second bit of TO if they are not same.
+ if ((TO & 0x1) != ((TO & 0x2) >> 1))
+ TO = (TO & 0x1) ? TO + 1 : TO - 1;
+ // We swap the fourth and fifth bit of TO if they are not same.
+ if ((TO & 0x8) != ((TO & 0x10) >> 1))
+ TO = (TO & 0x8) ? TO + 8 : TO - 8;
+ Ops[0] = getI32Imm(TO, dl);
+ Ops[1] = N->getOperand(3);
+ Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
+ }
+ CurDAG->SelectNodeTo(N, Opcode, MVT::Other, Ops);
+ return;
+ }
+ break;
+ }
+
case ISD::INTRINSIC_WO_CHAIN: {
// We emit the PPC::FSELS instruction here because of type conflicts with
// the comparison operand. The FSELS instruction is defined to use an 8-byte
@@ -5423,8 +5483,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
- if (N1C->isNullValue() && N3C->isNullValue() &&
- N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
+ CC == ISD::SETNE &&
// FIXME: Implement this optzn for PPC64.
N->getValueType(0) == MVT::i32) {
SDNode *Tmp =
@@ -5810,6 +5870,69 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
}
}
+ case PPCISD::LD_SPLAT: {
+ // Here we want to handle splat load for type v16i8 and v8i16 when there is
+ // no direct move, we don't need to use stack for this case. If target has
+ // direct move, we should be able to get the best selection in the .td file.
+ if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
+ break;
+
+ EVT Type = N->getValueType(0);
+ if (Type != MVT::v16i8 && Type != MVT::v8i16)
+ break;
+
+ SDValue ZeroReg =
+ CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
+ unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
+ // v16i8 LD_SPLAT addr
+ // ======>
+ // Mask = LVSR/LVSL 0, addr
+ // LoadLow = LVX 0, addr
+ // Perm = VPERM LoadLow, LoadLow, Mask
+ // Splat = VSPLTB 15/0, Perm
+ //
+ // v8i16 LD_SPLAT addr
+ // ======>
+ // Mask = LVSR/LVSL 0, addr
+ // LoadLow = LVX 0, addr
+ // LoadHigh = LVX (LI, 1), addr
+ // Perm = VPERM LoadLow, LoadHigh, Mask
+ // Splat = VSPLTH 7/0, Perm
+ unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
+ unsigned SplatElemIndex =
+ Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
+
+ SDNode *Mask = CurDAG->getMachineNode(
+ Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
+ N->getOperand(1));
+
+ SDNode *LoadLow =
+ CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
+ {ZeroReg, N->getOperand(1), N->getOperand(0)});
+
+ SDNode *LoadHigh = LoadLow;
+ if (Type == MVT::v8i16) {
+ LoadHigh = CurDAG->getMachineNode(
+ PPC::LVX, dl, MVT::v16i8, MVT::Other,
+ {SDValue(CurDAG->getMachineNode(
+ LIOpcode, dl, MVT::i32,
+ CurDAG->getTargetConstant(1, dl, MVT::i8)),
+ 0),
+ N->getOperand(1), SDValue(LoadLow, 1)});
+ }
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
+ transferMemOperands(N, LoadHigh);
+
+ SDNode *Perm =
+ CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
+ SDValue(LoadHigh, 0), SDValue(Mask, 0));
+ CurDAG->SelectNodeTo(N, SplatOp, Type,
+ CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
+ SDValue(Perm, 0));
+ return;
+ }
}
SelectCode(N);
@@ -6153,9 +6276,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
// be folded with the isel so that we don't need to materialize a register
// containing zero.
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : N->uses()) {
if (!User->isMachineOpcode())
return false;
if (User->getMachineOpcode() != PPC::SELECT_I4 &&
@@ -6180,7 +6301,7 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
if (!C)
return false;
- if (!C->isNullValue())
+ if (!C->isZero())
return false;
}
@@ -6189,18 +6310,14 @@ bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
SmallVector<SDNode *, 4> ToReplace;
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : N->uses()) {
assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
User->getMachineOpcode() == PPC::SELECT_I8) &&
"Must have all select users");
ToReplace.push_back(User);
}
- for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
- UE = ToReplace.end(); UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : ToReplace) {
SDNode *ResNode =
CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
User->getValueType(0), User->getOperand(0),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 37358176f35e..ac952b240a48 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -601,6 +601,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
// To handle counter-based loop conditions.
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
@@ -1245,9 +1247,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
}
if (Subtarget.hasP9Altivec()) {
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
-
+ if (Subtarget.isISA3_1()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Legal);
+ } else {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
+ }
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
@@ -1256,9 +1265,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
-
- if (Subtarget.isISA3_1())
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
}
if (Subtarget.pairedVectorMemops()) {
@@ -1286,8 +1292,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
}
- if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics())
+ if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
setMaxAtomicSizeInBitsSupported(128);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
+ }
setBooleanContents(ZeroOrOneBooleanContent);
@@ -1301,6 +1311,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!isPPC64)
@@ -1513,10 +1524,10 @@ void PPCTargetLowering::initializeAddrModeMap() {
PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
- PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
- PPC::MOF_NotAddNorCst | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
- PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector256 | PPC::MOF_SubtargetP10,
};
+ AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
+ PPC::MOF_SubtargetP10};
+ // TODO: Add mapping for quadword load/store.
}
/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
@@ -1550,7 +1561,7 @@ static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
-unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
+uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
// 16byte and wider vectors are passed on 16byte boundary.
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
@@ -1623,9 +1634,19 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
+ case PPCISD::CALL_RM:
+ return "PPCISD::CALL_RM";
+ case PPCISD::CALL_NOP_RM:
+ return "PPCISD::CALL_NOP_RM";
+ case PPCISD::CALL_NOTOC_RM:
+ return "PPCISD::CALL_NOTOC_RM";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
+ case PPCISD::BCTRL_RM:
+ return "PPCISD::BCTRL_RM";
+ case PPCISD::BCTRL_LOAD_TOC_RM:
+ return "PPCISD::BCTRL_LOAD_TOC_RM";
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
@@ -1707,6 +1728,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
+ case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
+ case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
case PPCISD::STRICT_FADDRTZ:
return "PPCISD::STRICT_FADDRTZ";
@@ -2551,9 +2574,8 @@ static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
SDValue &Index,
SelectionDAG &DAG) const {
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
- UI != E; ++UI) {
- if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
+ for (SDNode *U : N->uses()) {
+ if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
if (Memop->getMemoryVT() == MVT::f64) {
Base = N.getOperand(0);
Index = N.getOperand(1);
@@ -3503,7 +3525,7 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// Leave comparisons against 0 and -1 alone for now, since they're usually
// optimized. FIXME: revisit this when we can custom lower all setcc
// optimizations.
- if (C->isAllOnesValue() || C->isNullValue())
+ if (C->isAllOnes() || C->isZero())
return SDValue();
}
@@ -4364,21 +4386,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
FuncInfo->addLiveInAttr(VReg, Flags);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
- SDValue Store;
-
- if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
- EVT ObjType = (ObjSize == 1 ? MVT::i8 :
- (ObjSize == 2 ? MVT::i16 : MVT::i32));
- Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
- MachinePointerInfo(&*FuncArg), ObjType);
- } else {
- // For sizes that don't fit a truncating store (3, 5, 6, 7),
- // store the whole register as-is to the parameter save area
- // slot.
- Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
- MachinePointerInfo(&*FuncArg));
- }
-
+ EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
+ SDValue Store =
+ DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
+ MachinePointerInfo(&*FuncArg), ObjType);
MemOps.push_back(Store);
}
// Whether we copied from a register or not, advance the offset
@@ -4649,7 +4660,7 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
// If we have an Alias we can try to get the function from there.
if (Alias) {
- const GlobalObject *GlobalObj = Alias->getBaseObject();
+ const GlobalObject *GlobalObj = Alias->getAliaseeObject();
F = dyn_cast<Function>(GlobalObj);
}
@@ -5174,13 +5185,14 @@ static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
}
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
- const Function &Caller,
- const SDValue &Callee,
+ const Function &Caller, const SDValue &Callee,
const PPCSubtarget &Subtarget,
- const TargetMachine &TM) {
+ const TargetMachine &TM,
+ bool IsStrictFPCall = false) {
if (CFlags.IsTailCall)
return PPCISD::TC_RETURN;
+ unsigned RetOpc = 0;
// This is a call through a function pointer.
if (CFlags.IsIndirect) {
// AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
@@ -5191,28 +5203,46 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
// immediately followed by a load of the TOC pointer from the the stack save
// slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
// as it is not saved or used.
- return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
- : PPCISD::BCTRL;
- }
-
- if (Subtarget.isUsingPCRelativeCalls()) {
+ RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
+ : PPCISD::BCTRL;
+ } else if (Subtarget.isUsingPCRelativeCalls()) {
assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
- return PPCISD::CALL_NOTOC;
+ RetOpc = PPCISD::CALL_NOTOC;
+ } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ // The ABIs that maintain a TOC pointer accross calls need to have a nop
+ // immediately following the call instruction if the caller and callee may
+ // have different TOC bases. At link time if the linker determines the calls
+ // may not share a TOC base, the call is redirected to a trampoline inserted
+ // by the linker. The trampoline will (among other things) save the callers
+ // TOC pointer at an ABI designated offset in the linkage area and the
+ // linker will rewrite the nop to be a load of the TOC pointer from the
+ // linkage area into gpr2.
+ RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
+ : PPCISD::CALL_NOP;
+ else
+ RetOpc = PPCISD::CALL;
+ if (IsStrictFPCall) {
+ switch (RetOpc) {
+ default:
+ llvm_unreachable("Unknown call opcode");
+ case PPCISD::BCTRL_LOAD_TOC:
+ RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
+ break;
+ case PPCISD::BCTRL:
+ RetOpc = PPCISD::BCTRL_RM;
+ break;
+ case PPCISD::CALL_NOTOC:
+ RetOpc = PPCISD::CALL_NOTOC_RM;
+ break;
+ case PPCISD::CALL:
+ RetOpc = PPCISD::CALL_RM;
+ break;
+ case PPCISD::CALL_NOP:
+ RetOpc = PPCISD::CALL_NOP_RM;
+ break;
+ }
}
-
- // The ABIs that maintain a TOC pointer accross calls need to have a nop
- // immediately following the call instruction if the caller and callee may
- // have different TOC bases. At link time if the linker determines the calls
- // may not share a TOC base, the call is redirected to a trampoline inserted
- // by the linker. The trampoline will (among other things) save the callers
- // TOC pointer at an ABI designated offset in the linkage area and the linker
- // will rewrite the nop to be a load of the TOC pointer from the linkage area
- // into gpr2.
- if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
- return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
- : PPCISD::CALL_NOP;
-
- return PPCISD::CALL;
+ return RetOpc;
}
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
@@ -5228,7 +5258,7 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
const GlobalValue *GV = G ? G->getGlobal() : nullptr;
return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
- !dyn_cast_or_null<GlobalIFunc>(GV);
+ !isa_and_nonnull<GlobalIFunc>(GV);
};
// The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
@@ -5508,7 +5538,7 @@ SDValue PPCTargetLowering::FinishCall(
unsigned CallOpc =
getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
- Subtarget, DAG.getTarget());
+ Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
if (!CFlags.IsIndirect)
Callee = transformCallee(Callee, DAG, dl, Subtarget);
@@ -9066,6 +9096,34 @@ bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
return (!LosesInfo && !APFloatToConvert.isDenormal());
}
+static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
+ unsigned &Opcode) {
+ const SDNode *InputNode = Op.getOperand(0).getNode();
+ if (!InputNode || !ISD::isUNINDEXEDLoad(InputNode))
+ return false;
+
+ if (!Subtarget.hasVSX())
+ return false;
+
+ EVT Ty = Op->getValueType(0);
+ if (Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32 ||
+ Ty == MVT::v8i16 || Ty == MVT::v16i8)
+ return true;
+
+ if (Ty == MVT::v2i64) {
+ // Check the extend type, when the input type is i32, and the output vector
+ // type is v2i64.
+ if (cast<LoadSDNode>(Op.getOperand(0))->getMemoryVT() == MVT::i32) {
+ if (ISD::isZEXTLoad(InputNode))
+ Opcode = PPCISD::ZEXT_LD_SPLAT;
+ if (ISD::isSEXTLoad(InputNode))
+ Opcode = PPCISD::SEXT_LD_SPLAT;
+ }
+ return true;
+ }
+ return false;
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it. If we CAN select this case, and if it
// selects to a single instruction, return Op. Otherwise, if we can codegen
@@ -9129,17 +9187,26 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
if (!BVNIsConstantSplat || SplatBitSize > 32) {
+ unsigned NewOpcode = PPCISD::LD_SPLAT;
- bool IsPermutedLoad = false;
- const SDValue *InputLoad =
- getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
// Handle load-and-splat patterns as we have instructions that will do this
// in one go.
- if (InputLoad && DAG.isSplatValue(Op, true)) {
+ if (DAG.isSplatValue(Op, true) &&
+ isValidSplatLoad(Subtarget, Op, NewOpcode)) {
+ const SDValue *InputLoad = &Op.getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
- // We have handling for 4 and 8 byte elements.
- unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
+ // If the input load is an extending load, it will be an i32 -> i64
+ // extending load and isValidSplatLoad() will update NewOpcode.
+ unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
+ unsigned ElementSize =
+ MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
+
+ assert(((ElementSize == 2 * MemorySize)
+ ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
+ NewOpcode == PPCISD::SEXT_LD_SPLAT)
+ : (NewOpcode == PPCISD::LD_SPLAT)) &&
+ "Unmatched element size and opcode!\n");
// Checking for a single use of this load, we have to check for vector
// width (128 bits) / ElementSize uses (since each operand of the
@@ -9148,18 +9215,45 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
for (SDValue BVInOp : Op->ops())
if (BVInOp.isUndef())
NumUsesOfInputLD--;
+
+ // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
+ // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
+ // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
+ // 15", but funciton IsValidSplatLoad() now will only return true when
+ // the data at index 0 is not nullptr. So we will not get into trouble for
+ // these cases.
+ //
+ // case 1 - lfiwzx/lfiwax
+ // 1.1: load result is i32 and is sign/zero extend to i64;
+ // 1.2: build a v2i64 vector type with above loaded value;
+ // 1.3: the vector has only one value at index 0, others are all undef;
+ // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
+ if (NumUsesOfInputLD == 1 &&
+ (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
+ !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
+ Subtarget.hasLFIWAX()))
+ return SDValue();
+
+ // case 2 - lxvr[hb]x
+ // 2.1: load result is at most i16;
+ // 2.2: build a vector with above loaded value;
+ // 2.3: the vector has only one value at index 0, others are all undef;
+ // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
+ if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
+ Subtarget.isISA3_1() && ElementSize <= 16)
+ return SDValue();
+
assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
- ((Subtarget.hasVSX() && ElementSize == 64) ||
- (Subtarget.hasP9Vector() && ElementSize == 32))) {
+ Subtarget.hasVSX()) {
SDValue Ops[] = {
LD->getChain(), // Chain
LD->getBasePtr(), // Ptr
DAG.getValueType(Op.getValueType()) // VT
};
SDValue LdSplt = DAG.getMemIntrinsicNode(
- PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
- Ops, LD->getMemoryVT(), LD->getMemOperand());
+ NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
+ LD->getMemoryVT(), LD->getMemOperand());
// Replace all uses of the output chain of the original load with the
// output chain of the new load.
DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
@@ -10368,6 +10462,71 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
return DAG.getMergeValues(RetOps, dl);
}
+
+ case Intrinsic::ppc_unpack_longdouble: {
+ auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
+ "Argument of long double unpack must be 0 or 1!");
+ return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
+ DAG.getConstant(!!(Idx->getSExtValue()), dl,
+ Idx->getValueType(0)));
+ }
+
+ case Intrinsic::ppc_compare_exp_lt:
+ case Intrinsic::ppc_compare_exp_gt:
+ case Intrinsic::ppc_compare_exp_eq:
+ case Intrinsic::ppc_compare_exp_uo: {
+ unsigned Pred;
+ switch (IntrinsicID) {
+ case Intrinsic::ppc_compare_exp_lt:
+ Pred = PPC::PRED_LT;
+ break;
+ case Intrinsic::ppc_compare_exp_gt:
+ Pred = PPC::PRED_GT;
+ break;
+ case Intrinsic::ppc_compare_exp_eq:
+ Pred = PPC::PRED_EQ;
+ break;
+ case Intrinsic::ppc_compare_exp_uo:
+ Pred = PPC::PRED_UN;
+ break;
+ }
+ return SDValue(
+ DAG.getMachineNode(
+ PPC::SELECT_CC_I4, dl, MVT::i32,
+ {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
+ Op.getOperand(1), Op.getOperand(2)),
+ 0),
+ DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(Pred, dl, MVT::i32)}),
+ 0);
+ }
+ case Intrinsic::ppc_test_data_class_d:
+ case Intrinsic::ppc_test_data_class_f: {
+ unsigned CmprOpc = PPC::XSTSTDCDP;
+ if (IntrinsicID == Intrinsic::ppc_test_data_class_f)
+ CmprOpc = PPC::XSTSTDCSP;
+ return SDValue(
+ DAG.getMachineNode(
+ PPC::SELECT_CC_I4, dl, MVT::i32,
+ {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
+ Op.getOperand(1)),
+ 0),
+ DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
+ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
+ 0);
+ }
+ case Intrinsic::ppc_convert_f128_to_ppcf128:
+ case Intrinsic::ppc_convert_ppcf128_to_f128: {
+ RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
+ ? RTLIB::CONVERT_PPCF128_F128
+ : RTLIB::CONVERT_F128_PPCF128;
+ MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Result =
+ makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
+ dl, SDValue());
+ return Result.first;
+ }
}
// If this is a lowered altivec predicate compare, CompareOpc is set to the
@@ -10443,11 +10602,18 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
case Intrinsic::ppc_cfence: {
assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
- return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
- Op.getOperand(ArgStart + 1)),
- Op.getOperand(0)),
- 0);
+ SDValue Val = Op.getOperand(ArgStart + 1);
+ EVT Ty = Val.getValueType();
+ if (Ty == MVT::i128) {
+ // FIXME: Testing one of two paired registers is sufficient to guarantee
+ // ordering?
+ Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
+ }
+ return SDValue(
+ DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
+ Op.getOperand(0)),
+ 0);
}
default:
break;
@@ -10510,6 +10676,59 @@ SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
}
+SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
+ SelectionDAG &DAG) const {
+ AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
+ EVT MemVT = N->getMemoryVT();
+ assert(MemVT.getSimpleVT() == MVT::i128 &&
+ "Expect quadword atomic operations");
+ SDLoc dl(N);
+ unsigned Opc = N->getOpcode();
+ switch (Opc) {
+ case ISD::ATOMIC_LOAD: {
+ // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
+ // lowered to ppc instructions by pattern matching instruction selector.
+ SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ N->getOperand(0),
+ DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
+ for (int I = 1, E = N->getNumOperands(); I < E; ++I)
+ Ops.push_back(N->getOperand(I));
+ SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
+ Ops, MemVT, N->getMemOperand());
+ SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
+ SDValue ValHi =
+ DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
+ ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
+ DAG.getConstant(64, dl, MVT::i32));
+ SDValue Val =
+ DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
+ return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
+ {Val, LoadedVal.getValue(2)});
+ }
+ case ISD::ATOMIC_STORE: {
+ // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
+ // lowered to ppc instructions by pattern matching instruction selector.
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SmallVector<SDValue, 4> Ops{
+ N->getOperand(0),
+ DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
+ SDValue Val = N->getOperand(2);
+ SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
+ SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
+ DAG.getConstant(64, dl, MVT::i32));
+ ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
+ Ops.push_back(ValLo);
+ Ops.push_back(ValHi);
+ Ops.push_back(N->getOperand(1));
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
+ N->getMemOperand());
+ }
+ default:
+ llvm_unreachable("Unexpected atomic opcode");
+ }
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -10537,7 +10756,6 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
- SDValue V3 = Op.getOperand(2);
if (VT == MVT::v2f64 && C)
return Op;
@@ -10546,18 +10764,10 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
return SDValue();
// On P10, we have legal lowering for constant and variable indices for
- // integer vectors.
+ // all vectors.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64)
- return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
- // For f32 and f64 vectors, we have legal lowering for variable indices.
- // For f32 we also have legal lowering when the element is loaded from
- // memory.
- if (VT == MVT::v4f32 || VT == MVT::v2f64) {
- if (!C || (VT == MVT::v4f32 && dyn_cast<LoadSDNode>(V2)))
- return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
return Op;
- }
}
// Before P10, we have legal lowering for constant indices but not for
@@ -10901,6 +11111,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerBSWAP(Op, DAG);
case ISD::ATOMIC_CMP_SWAP:
return LowerATOMIC_CMP_SWAP(Op, DAG);
+ case ISD::ATOMIC_STORE:
+ return LowerATOMIC_LOAD_STORE(Op, DAG);
}
}
@@ -10911,6 +11123,12 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::ATOMIC_LOAD: {
+ SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::READCYCLECOUNTER: {
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
@@ -10937,6 +11155,18 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(NewInt.getValue(1));
break;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
+ case Intrinsic::ppc_pack_longdouble:
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ N->getOperand(2), N->getOperand(1)));
+ break;
+ case Intrinsic::ppc_convert_f128_to_ppcf128:
+ Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
+ break;
+ }
+ break;
+ }
case ISD::VAARG: {
if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
return;
@@ -12647,6 +12877,24 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
.addDef(Hi)
.addUse(Src, 0, PPC::sub_gp8_x0);
+ } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
+ MI.getOpcode() == PPC::STQX_PSEUDO) {
+ DebugLoc DL = MI.getDebugLoc();
+ // Ptr is used as the ptr_rc_no_r0 part
+ // of LQ/STQ's memory operand and adding result of RA and RB,
+ // so it has to be g8rc_and_g8rc_nox0.
+ Register Ptr =
+ F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ Register Val = MI.getOperand(0).getReg();
+ Register RA = MI.getOperand(1).getReg();
+ Register RB = MI.getOperand(2).getReg();
+ BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
+ BuildMI(*BB, MI, DL,
+ MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
+ : TII->get(PPC::STQ))
+ .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
+ .addImm(0)
+ .addReg(Ptr);
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -12951,12 +13199,12 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
return true;
- for (SDNode::use_iterator UI = LoadRoot->use_begin(),
- UE = LoadRoot->use_end(); UI != UE; ++UI)
- if (((isa<MemSDNode>(*UI) &&
- cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
- UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
- Queue.push_back(*UI);
+ for (SDNode *U : LoadRoot->uses())
+ if (((isa<MemSDNode>(U) &&
+ cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
+ U->getOpcode() == ISD::TokenFactor) &&
+ !Visited.count(U))
+ Queue.push_back(U);
}
}
@@ -13013,11 +13261,9 @@ SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
// If all users of SETCC extend its value to a legal integer type
// then we replace SETCC with a subtraction
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() != ISD::ZERO_EXTEND)
+ for (const SDNode *U : N->uses())
+ if (U->getOpcode() != ISD::ZERO_EXTEND)
return SDValue();
- }
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
auto OpSize = N->getOperand(0).getValueSizeInBits();
@@ -13194,10 +13440,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
if (isa<ConstantSDNode>(Inputs[i]))
continue;
- for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
- UE = Inputs[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : Inputs[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -13218,10 +13461,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
}
for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
- for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
- UE = PromOps[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (const SDNode *User : PromOps[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -13406,10 +13646,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
if (isa<ConstantSDNode>(Inputs[i]))
continue;
- for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
- UE = Inputs[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Inputs[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -13431,10 +13668,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
}
for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
- for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
- UE = PromOps[i].getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : PromOps[i].getNode()->uses()) {
if (User != N && !Visited.count(User))
return SDValue();
@@ -14753,8 +14987,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case PPCISD::SRA:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->isNullValue() || // 0 >>s V -> 0.
- C->isAllOnesValue()) // -1 >>s V -> -1.
+ if (C->isZero() || // 0 >>s V -> 0.
+ C->isAllOnes()) // -1 >>s V -> -1.
return N->getOperand(0);
}
break;
@@ -15126,39 +15360,36 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
int Bits = 4 /* 16 byte alignment */;
if (DAG.MaskedValueIsZero(Add->getOperand(1),
- APInt::getAllOnesValue(Bits /* alignment */)
+ APInt::getAllOnes(Bits /* alignment */)
.zext(Add.getScalarValueSizeInBits()))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
- for (SDNode::use_iterator UI = BasePtr->use_begin(),
- UE = BasePtr->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
- IID) {
+ for (SDNode *U : BasePtr->uses()) {
+ if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
// We've found another LVSL/LVSR, and this address is an aligned
// multiple of that one. The results will be the same, so use the
// one we've just found instead.
- return SDValue(*UI, 0);
+ return SDValue(U, 0);
}
}
}
if (isa<ConstantSDNode>(Add->getOperand(1))) {
SDNode *BasePtr = Add->getOperand(0).getNode();
- for (SDNode::use_iterator UI = BasePtr->use_begin(),
- UE = BasePtr->use_end(); UI != UE; ++UI) {
- if (UI->getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(UI->getOperand(1)) &&
+ for (SDNode *U : BasePtr->uses()) {
+ if (U->getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(U->getOperand(1)) &&
(cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
- cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
- (1ULL << Bits) == 0) {
- SDNode *OtherAdd = *UI;
- for (SDNode::use_iterator VI = OtherAdd->use_begin(),
- VE = OtherAdd->use_end(); VI != VE; ++VI) {
- if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
- cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
- return SDValue(*VI, 0);
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
+ (1ULL << Bits) ==
+ 0) {
+ SDNode *OtherAdd = U;
+ for (SDNode *V : OtherAdd->uses()) {
+ if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
+ IID) {
+ return SDValue(V, 0);
}
}
}
@@ -15482,13 +15713,13 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (VT == MVT::i64 && !Subtarget.isPPC64())
return SDValue();
if ((VT != MVT::i32 && VT != MVT::i64) ||
- !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
+ !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
- bool IsNegPow2 = (-Divisor).isPowerOf2();
+ bool IsNegPow2 = Divisor.isNegatedPowerOf2();
unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
@@ -15546,6 +15777,18 @@ void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero = ~1U; // All bits but the low one are known to be zero.
break;
}
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_load2r:
+ // Top bits are cleared for load2r (which is the same as lhbrx).
+ Known.Zero = 0xFFFF0000;
+ break;
+ }
+ break;
}
}
}
@@ -15960,7 +16203,12 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
auto PtrVT = getPointerTy(MF.getDataLayout());
if (Depth > 0) {
- SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ // The link register (return address) is saved in the caller's frame
+ // not the callee's stack frame. So we must get the caller's frame
+ // address and load the return address at the LR offset from there.
+ SDValue FrameAddr =
+ DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+ LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
SDValue Offset =
DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
isPPC64 ? MVT::i64 : MVT::i32);
@@ -16077,6 +16325,22 @@ bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
MachineMemOperand::MOVolatile;
return true;
+ case Intrinsic::ppc_atomic_load_i128:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = Align(16);
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+ return true;
+ case Intrinsic::ppc_atomic_store_i128:
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = Align(16);
+ Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
+ return true;
case Intrinsic::ppc_altivec_lvx:
case Intrinsic::ppc_altivec_lvxl:
case Intrinsic::ppc_altivec_lvebx:
@@ -17146,6 +17410,9 @@ PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
if ((Flags & FlagSet) == FlagSet)
return PPC::AM_DQForm;
+ for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
+ if ((Flags & FlagSet) == FlagSet)
+ return PPC::AM_PrefixDForm;
// If no other forms are selected, return an X-Form as it is the most
// general addressing mode.
return PPC::AM_XForm;
@@ -17236,6 +17503,14 @@ static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
}
}
+static bool isPCRelNode(SDValue N) {
+ return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
+ isValidPCRelNode<ConstantPoolSDNode>(N) ||
+ isValidPCRelNode<GlobalAddressSDNode>(N) ||
+ isValidPCRelNode<JumpTableSDNode>(N) ||
+ isValidPCRelNode<BlockAddressSDNode>(N));
+}
+
/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
/// the address flags of the load/store instruction that is to be matched.
unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
@@ -17253,6 +17528,26 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
if (Subtarget.hasSPE())
FlagSet |= PPC::MOF_SubtargetSPE;
+ // Check if we have a PCRel node and return early.
+ if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
+ return FlagSet;
+
+ // If the node is the paired load/store intrinsics, compute flags for
+ // address computation and return early.
+ unsigned ParentOp = Parent->getOpcode();
+ if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
+ (ParentOp == ISD::INTRINSIC_VOID))) {
+ unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
+ assert(
+ ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) &&
+ "Only the paired load and store (lxvp/stxvp) intrinsics are valid.");
+ SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp) ? Parent->getOperand(2)
+ : Parent->getOperand(3);
+ computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
+ FlagSet |= PPC::MOF_Vector;
+ return FlagSet;
+ }
+
// Mark this as something we don't want to handle here if it is atomic
// or pre-increment instruction.
if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
@@ -17266,7 +17561,8 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
EVT MemVT = MN->getMemoryVT();
unsigned Size = MemVT.getSizeInBits();
if (MemVT.isScalarInteger()) {
- assert(Size <= 64 && "Not expecting scalar integers larger than 8 bytes!");
+ assert(Size <= 128 &&
+ "Not expecting scalar integers larger than 16 bytes!");
if (Size < 32)
FlagSet |= PPC::MOF_SubWordInt;
else if (Size == 32)
@@ -17276,9 +17572,12 @@ unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
} else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
if (Size == 128)
FlagSet |= PPC::MOF_Vector;
- else if (Size == 256)
- FlagSet |= PPC::MOF_Vector256;
- else
+ else if (Size == 256) {
+ assert(Subtarget.pairedVectorMemops() &&
+ "256-bit vectors are only available when paired vector memops is "
+ "enabled!");
+ FlagSet |= PPC::MOF_Vector;
+ } else
llvm_unreachable("Not expecting illegal vectors!");
} else { // Floating point type: can be scalar, f128 or vector types.
if (Size == 32 || Size == 64)
@@ -17396,6 +17695,14 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
// Select an X-Form load if it is not.
setXFormForUnalignedFI(N, Flags, Mode);
+ // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
+ if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
+ assert(Subtarget.isUsingPCRelativeCalls() &&
+ "Must be using PC-Relative calls when a valid PC-Relative node is "
+ "present!");
+ Mode = PPC::AM_PCRel;
+ }
+
// Set Base and Disp accordingly depending on the address mode.
switch (Mode) {
case PPC::AM_DForm:
@@ -17467,6 +17774,30 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
Base = N;
break;
}
+ case PPC::AM_PrefixDForm: {
+ int64_t Imm34 = 0;
+ unsigned Opcode = N.getOpcode();
+ if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
+ (isIntS34Immediate(N.getOperand(1), Imm34))) {
+ // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
+ Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N.getOperand(0);
+ } else if (isIntS34Immediate(N, Imm34)) {
+ // The address is a 34-bit signed immediate.
+ Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
+ Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
+ }
+ break;
+ }
+ case PPC::AM_PCRel: {
+ // When selecting PC-Relative instructions, "Base" is not utilized as
+ // we select the address as [PC+imm].
+ Disp = N;
+ break;
+ }
case PPC::AM_None:
break;
default: { // By default, X-Form is always available to be selected.
@@ -17503,10 +17834,7 @@ PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
TargetLowering::AtomicExpansionKind
PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
- unsigned Size = AI->getPointerOperand()
- ->getType()
- ->getPointerElementType()
- ->getPrimitiveSizeInBits();
+ unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
return AtomicExpansionKind::MaskedIntrinsic;
return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 87579bad118f..34dce2c3172d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -200,6 +200,14 @@ namespace llvm {
/// and 64-bit AIX.
BCTRL_LOAD_TOC,
+ /// The variants that implicitly define rounding mode for calls with
+ /// strictfp semantics.
+ CALL_RM,
+ CALL_NOP_RM,
+ CALL_NOTOC_RM,
+ BCTRL_RM,
+ BCTRL_LOAD_TOC_RM,
+
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
@@ -494,6 +502,11 @@ namespace llvm {
/// Constrained floating point add in round-to-zero mode.
STRICT_FADDRTZ,
+ // NOTE: The nodes below may require PC-Rel specific patterns if the
+ // address could be PC-Relative. When adding new nodes below, consider
+ // whether or not the address can be PC-Relative and add the corresponding
+ // PC-relative patterns and tests.
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -554,6 +567,14 @@ namespace llvm {
/// instructions such as LXVDSX, LXVWSX.
LD_SPLAT,
+ /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// that zero-extends.
+ ZEXT_LD_SPLAT,
+
+ /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
+ /// that sign-extends.
+ SEXT_LD_SPLAT,
+
/// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
/// Maps directly to an stxvd2x instruction that will be preceded by
/// an xxswapd.
@@ -712,7 +733,9 @@ namespace llvm {
AM_DForm,
AM_DSForm,
AM_DQForm,
+ AM_PrefixDForm,
AM_XForm,
+ AM_PCRel
};
} // end namespace PPC
@@ -936,7 +959,7 @@ namespace llvm {
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(Type *Ty,
+ uint64_t getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -1246,6 +1269,7 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 92712c5c072b..417a6ce7e522 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -73,7 +73,7 @@ def SRL64 : SDNodeXForm<imm, [{
//
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
-let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let isReturn = 1, isPredicable = 1, Uses = [LR8, RM] in
def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In64BitMode]>;
@@ -100,7 +100,7 @@ let Defs = [LR8] in
def MovePCtoLR8 : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR8", []>,
PPC970_Unit_BRU;
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let Defs = [CTR8], Uses = [CTR8] in {
def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
"bdz $dst">;
@@ -118,7 +118,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
-let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in {
// Convenient aliases for call instructions
let Uses = [RM] in {
def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
@@ -178,6 +178,39 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
}
}
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8, RM], hasSideEffects = 0,
+ isCodeGenOnly = 1, Uses = [RM] in {
+ // Convenient aliases for call instructions
+ def BL8_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+
+ def BLA8_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall_rm (i64 imm:$func))]>;
+ def BL8_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+
+ def BLA8_NOP_RM : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ (outs), (ins abscalltarget:$func),
+ "bla $func\n\tnop", IIC_BrB,
+ [(PPCcall_nop_rm (i64 imm:$func))]>;
+ let Predicates = [PCRelativeMemops] in {
+ // BL8_NOTOC means that the caller does not use the TOC pointer and if
+ // it does use R2 then it is just a caller saved register. Therefore it is
+ // safe to emit only the bl and not the nop for this instruction. The
+ // linker will not try to restore R2 after the call.
+ def BL8_NOTOC_RM : IForm<18, 0, 1, (outs),
+ (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>;
+ }
+ let Uses = [CTR8, RM] in {
+ let isPredicable = 1 in
+ def BCTRL8_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
+ Requires<[In64BitMode]>;
+ }
+}
+
let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
def BCTRL8_LDinto_toc :
@@ -188,12 +221,22 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Requires<[In64BitMode]>;
}
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR8, X2, RM], Uses = [CTR8, RM], RST = 2 in {
+ def BCTRL8_LDinto_toc_RM :
+ XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
+ (ins memrix:$src),
+ "bctrl\n\tld 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc_rm iaddrX4:$src)]>,
+ Requires<[In64BitMode]>;
+}
+
} // Interpretation64Bit
// FIXME: Duplicating this for the asm parser should be unnecessary, but the
// previous definition must be marked as CodeGen only to prevent decoding
// conflicts.
-let Interpretation64Bit = 1, isAsmParserOnly = 1 in
+let Interpretation64Bit = 1, isAsmParserOnly = 1, hasSideEffects = 0 in
let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in
def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func),
"bl $func", IIC_BrB, []>;
@@ -214,12 +257,32 @@ def : Pat<(PPCcall_notoc (i64 tglobaladdr:$dst)),
def : Pat<(PPCcall_notoc (i64 texternalsym:$dst)),
(BL8_NOTOC texternalsym:$dst)>;
+def : Pat<(PPCcall_rm (i64 tglobaladdr:$dst)),
+ (BL8_RM tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop_rm (i64 tglobaladdr:$dst)),
+ (BL8_NOP_RM tglobaladdr:$dst)>;
+
+def : Pat<(PPCcall_rm (i64 texternalsym:$dst)),
+ (BL8_RM texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_rm (i64 texternalsym:$dst)),
+ (BL8_NOP_RM texternalsym:$dst)>;
+
+def : Pat<(PPCcall_notoc_rm (i64 tglobaladdr:$dst)),
+ (BL8_NOTOC_RM tglobaladdr:$dst)>;
+def : Pat<(PPCcall_notoc_rm (i64 texternalsym:$dst)),
+ (BL8_NOTOC_RM texternalsym:$dst)>;
+
// Calls for AIX
def : Pat<(PPCcall (i64 mcsym:$dst)),
(BL8 mcsym:$dst)>;
def : Pat<(PPCcall_nop (i64 mcsym:$dst)),
(BL8_NOP mcsym:$dst)>;
+def : Pat<(PPCcall_rm (i64 mcsym:$dst)),
+ (BL8_RM mcsym:$dst)>;
+def : Pat<(PPCcall_nop_rm (i64 mcsym:$dst)),
+ (BL8_NOP_RM mcsym:$dst)>;
+
// Atomic operations
// FIXME: some of these might be used with constant operands. This will result
// in constant materialization instructions that may be redundant. We currently
@@ -408,6 +471,7 @@ def TCRETURNri8 : PPCEmitTimePseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
"#TC_RETURNr8 $dst $offset",
[]>;
+let hasSideEffects = 0 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
@@ -425,6 +489,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
"ba $dst", IIC_BrB,
[]>;
+}
} // Interpretation64Bit
def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
@@ -638,7 +703,7 @@ def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
[(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
let isCommutable = 1 in
-defm ADD8 : XOForm_1rx<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+defm ADD8 : XOForm_1rx<31, 266, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
@@ -717,7 +782,7 @@ defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"subfc", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
PPC970_DGroup_Cracked;
-defm SUBF8 : XOForm_1rx<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+defm SUBF8 : XOForm_1rx<31, 40, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
@@ -961,7 +1026,7 @@ defm DIVDEU : XOForm_1rcr<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
[(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
isPPC64, Requires<[HasExtDiv]>;
let isCommutable = 1 in
-defm MULLD : XOForm_1rx<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+defm MULLD : XOForm_1rx<31, 233, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"mulld", "$rT, $rA, $rB", IIC_IntMulHD,
[(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
@@ -1300,9 +1365,12 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src),
"ldx $rD, $src", IIC_LdStLD,
[(set i64:$rD, (load XForm:$src))]>, isPPC64;
+
+let Predicates = [IsISA2_06] in {
def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src),
"ldbrx $rD, $src", IIC_LdStLoad,
[(set i64:$rD, (PPClbrx ForceXForm:$src, i64))]>, isPPC64;
+}
let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in {
def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$rD), (ins memrr:$src),
@@ -1340,12 +1408,25 @@ def LQ : DQForm_RTp5_RA17_MEM<56, 0,
[]>,
RegConstraint<"@earlyclobber $RTp">,
isPPC64;
+// We don't really have LQX in the ISA, make a pseudo one so that we can
+// handle x-form during isel. Make it pre-ra may expose
+// oppotunities to some opts(CSE, LICM and etc.) for the result of adding
+// RA and RB.
+def LQX_PSEUDO : PPCCustomInserterPseudo<(outs g8prc:$RTp),
+ (ins memrr:$src), "#LQX_PSEUDO", []>;
+
def RESTORE_QUADWORD : PPCEmitTimePseudo<(outs g8prc:$RTp), (ins memrix:$src),
"#RESTORE_QUADWORD", []>;
}
}
+def : Pat<(int_ppc_atomic_load_i128 iaddrX16:$src),
+ (SPLIT_QUADWORD (LQ memrix16:$src))>;
+
+def : Pat<(int_ppc_atomic_load_i128 ForceXForm:$src),
+ (SPLIT_QUADWORD (LQX_PSEUDO memrr:$src))>;
+
// Support for medium and large code model.
let hasSideEffects = 0 in {
let isReMaterializable = 1 in {
@@ -1523,10 +1604,13 @@ def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
"stdx $rS, $dst", IIC_LdStSTD,
[(store i64:$rS, XForm:$dst)]>, isPPC64,
PPC970_DGroup_Cracked;
+
+let Predicates = [IsISA2_06] in {
def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
"stdbrx $rS, $dst", IIC_LdStStore,
[(PPCstbrx i64:$rS, ForceXForm:$dst, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
+}
let mayStore = 1, hasNoSchedulingInfo = 1 in {
// Normal 16-byte stores.
@@ -1534,12 +1618,28 @@ let mayStore = 1, hasNoSchedulingInfo = 1 in {
def STQ : DSForm_1<62, 2, (outs), (ins g8prc:$RSp, memrix:$dst),
"stq $RSp, $dst", IIC_LdStSTQ,
[]>, isPPC64;
+
+def STQX_PSEUDO : PPCCustomInserterPseudo<(outs),
+ (ins g8prc:$RSp, memrr:$dst),
+ "#STQX_PSEUDO", []>;
+
def SPILL_QUADWORD : PPCEmitTimePseudo<(outs), (ins g8prc:$RSp, memrix:$dst),
"#SPILL_QUADWORD", []>;
}
}
+def BUILD_QUADWORD : PPCPostRAExpPseudo<
+ (outs g8prc:$RTp),
+ (ins g8rc:$lo, g8rc:$hi),
+ "#BUILD_QUADWORD", []>;
+
+def : Pat<(int_ppc_atomic_store_i128 i64:$lo, i64:$hi, DSForm:$dst),
+ (STQ (BUILD_QUADWORD g8rc:$lo, g8rc:$hi), memrix:$dst)>;
+
+def : Pat<(int_ppc_atomic_store_i128 i64:$lo, i64:$hi, ForceXForm:$dst),
+ (STQX_PSEUDO (BUILD_QUADWORD g8rc:$lo, g8rc:$hi), memrr:$dst)>;
+
// Stores with Update (pre-inc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
@@ -1670,6 +1770,13 @@ def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
"hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>;
}
+let Interpretation64Bit = 1, isCodeGenOnly = 1, hasSideEffects = 1 in
+def ADDEX8 : Z23Form_RTAB5_CY2<31, 170, (outs g8rc:$rT),
+ (ins g8rc:$rA, g8rc:$rB, u2imm:$CY),
+ "addex $rT, $rA, $rB, $CY", IIC_IntGeneral,
+ [(set i64:$rT, (int_ppc_addex i64:$rA, i64:$rB,
+ timm:$CY))]>;
+
//===----------------------------------------------------------------------===//
// Instruction Patterns
//
@@ -1833,8 +1940,6 @@ def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
(STDCX g8rc:$A, ForceXForm:$dst)>;
-def : Pat<(int_ppc_tdw g8rc:$A, g8rc:$B, i32:$IMM),
- (TD $IMM, $A, $B)>;
// trapd
def : Pat<(int_ppc_trapd g8rc:$A),
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 2bc7fb2a1a5f..1e0e2d88e54b 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1518,8 +1518,8 @@ def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", [(set v1i128:$vD,
(int_ppc_altivec_vprtybq v1i128:$vB))]>;
// Vector (Bit) Permute (Right-indexed)
-def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vbpermd $vD, $vA, $vB", IIC_VecFP, []>;
+def VBPERMD : VX1_Int_Ty3<1484, "vbpermd", int_ppc_altivec_vbpermd,
+ v2i64, v2i64, v16i8>;
def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
"vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 91b507ea6c4c..f7e4c0708d7d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -889,7 +889,7 @@ class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
}
class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
- string asmstr, InstrItinClass itin, list<dag> pattern>
+ string asmstr, InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bit R;
@@ -903,7 +903,7 @@ class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
- string asmstr, InstrItinClass itin, list<dag> pattern>
+ string asmstr, InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bit A;
@@ -916,7 +916,7 @@ class XForm_htm1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
+ InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bit L;
@@ -930,7 +930,7 @@ class XForm_htm2<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
}
class XForm_htm3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
+ InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<3> BF;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index e59a08774dc5..ec1c397ff57f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -29,10 +29,10 @@ let Predicates = [HasHTM] in {
let Defs = [CR0] in {
def TBEGIN : XForm_htm0 <31, 654,
- (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$R), "tbegin. $R", IIC_SprMTSPR>;
def TEND : XForm_htm1 <31, 686,
- (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR, []>;
+ (outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR>;
def TABORT : XForm_base_r3xo <31, 910,
(outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
@@ -62,7 +62,7 @@ def TABORTDCI : XForm_base_r3xo <31, 878,
isRecordForm;
def TSR : XForm_htm2 <31, 750,
- (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR, []>,
+ (outs), (ins u1imm:$L), "tsr. $L", IIC_SprMTSPR>,
isRecordForm;
def TRECLAIM : XForm_base_r3xo <31, 942,
@@ -84,7 +84,7 @@ def TRECHKPT : XForm_base_r3xo <31, 1006,
}
def TCHECK : XForm_htm3 <31, 718,
- (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR, []>;
+ (outs crrc:$BF), (ins), "tcheck $BF", IIC_SprMTSPR>;
// Builtins
// All HTM instructions, with the exception of tcheck, set CR0 with the
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9dd35d5f44d1..649a150866b4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -35,10 +35,10 @@
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -1109,6 +1109,8 @@ bool PPCInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case PPC::XXLXORdpz:
case PPC::XXLEQVOnes:
case PPC::XXSPLTI32DX:
+ case PPC::XXSPLTIW:
+ case PPC::XXSPLTIDP:
case PPC::V_SET0B:
case PPC::V_SET0H:
case PPC::V_SET0:
@@ -1541,6 +1543,11 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
return false;
+ // If the conditional branch uses a physical register, then it cannot be
+ // turned into a select.
+ if (Register::isPhysicalRegister(Cond[1].getReg()))
+ return false;
+
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
@@ -2239,11 +2246,13 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
return true;
} else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||
- OpC == PPC::BCTRL8) {
+ OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||
+ OpC == PPC::BCTRL8_RM) {
if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
- bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+ bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||
+ OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;
bool isPPC64 = Subtarget.isPPC64();
if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
@@ -2267,6 +2276,9 @@ bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)
.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);
+ if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)
+ MachineInstrBuilder(*MI.getParent()->getParent(), MI)
+ .addReg(PPC::RM, RegState::ImplicitDefine);
return true;
}
@@ -2343,8 +2355,8 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
}
bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask,
- int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
unsigned Opc = MI.getOpcode();
switch (Opc) {
@@ -2373,7 +2385,8 @@ bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
}
bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int Mask, int Value,
+ Register SrcReg2, int64_t Mask,
+ int64_t Value,
const MachineRegisterInfo *MRI) const {
if (DisableCmpOpt)
return false;
@@ -3009,7 +3022,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
.addReg(SrcVSR + VecNo)
.addReg(SrcVSR + VecNo);
}
- // BUILD_UACC is expanded to 4 copies of the underlying vsx regisers.
+ // BUILD_UACC is expanded to 4 copies of the underlying vsx registers.
// So after building the 4 copies, we can replace the BUILD_UACC instruction
// with a NOP.
LLVM_FALLTHROUGH;
@@ -3103,6 +3116,7 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+ // FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.
case PPC::CFENCE8: {
auto Val = MI.getOperand(0).getReg();
BuildMI(MBB, MI, DL, get(PPC::CMPD), PPC::CR7).addReg(Val).addReg(Val);
@@ -3770,7 +3784,7 @@ bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,
bool Simplified = false;
// If final mask is 0, MI result should be 0 too.
- if (FinalMask.isNullValue()) {
+ if (FinalMask.isZero()) {
bool Is64Bit =
(MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);
Simplified = true;
@@ -5241,8 +5255,7 @@ PPCInstrInfo::isSignOrZeroExtended(const MachineInstr &MI, bool SignExt,
return false;
const IntegerType *IntTy =
dyn_cast<IntegerType>(CalleeFn->getReturnType());
- const AttributeSet &Attrs =
- CalleeFn->getAttributes().getRetAttributes();
+ const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
if (IntTy && IntTy->getBitWidth() <= 32)
return Attrs.hasAttribute(SignExt ? Attribute::SExt :
Attribute::ZExt);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 404156de232f..2cfd53de3290 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -524,10 +524,11 @@ public:
// Comparison optimization.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const override;
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const override;
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int Mask, int Value,
+ Register SrcReg2, int64_t Mask, int64_t Value,
const MachineRegisterInfo *MRI) const override;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index f53e1b89626f..d83ecc699b19 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -316,6 +316,24 @@ def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
+// Call nodes for strictfp calls (that define RM).
+def PPCcall_rm : SDNode<"PPCISD::CALL_RM", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop_rm : SDNode<"PPCISD::CALL_NOP_RM", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_notoc_rm : SDNode<"PPCISD::CALL_NOTOC_RM", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCbctrl_rm : SDNode<"PPCISD::BCTRL_RM", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM",
+ SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -1152,15 +1170,14 @@ def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
/// This is just the offset part of iaddr, used for preinc.
def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
-// PC Relative Address
-def pcreladdr : ComplexPattern<iPTR, 1, "SelectAddrPCRel", [], []>;
-
// Load and Store Instruction Selection addressing modes.
def DForm : ComplexPattern<iPTR, 2, "SelectDForm", [], [SDNPWantParent]>;
def DSForm : ComplexPattern<iPTR, 2, "SelectDSForm", [], [SDNPWantParent]>;
def DQForm : ComplexPattern<iPTR, 2, "SelectDQForm", [], [SDNPWantParent]>;
def XForm : ComplexPattern<iPTR, 2, "SelectXForm", [], [SDNPWantParent]>;
def ForceXForm : ComplexPattern<iPTR, 2, "SelectForceXForm", [], [SDNPWantParent]>;
+def PCRelForm : ComplexPattern<iPTR, 2, "SelectPCRelForm", [], [SDNPWantParent]>;
+def PDForm : ComplexPattern<iPTR, 2, "SelectPDForm", [], [SDNPWantParent]>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
@@ -1183,6 +1200,7 @@ def NaNsFPMath
: Predicate<"!Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
def HasBPERMD : Predicate<"Subtarget->hasBPERMD()">;
def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">;
+def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">;
def IsISA2_07 : Predicate<"Subtarget->isISA2_07()">;
def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
def HasFPU : Predicate<"Subtarget->hasFPU()">;
@@ -1272,7 +1290,7 @@ multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
// Multiclass for instructions which have a record overflow form as well
// as a record form but no carry (i.e. mulld, mulldo, subf, subfo, etc.)
-multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+multiclass XOForm_1rx<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
let BaseName = asmbase in {
@@ -1649,7 +1667,7 @@ def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
"#RESTORE_CRBIT", []>;
}
-let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let isPredicable = 1, isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
[(retflag)]>, Requires<[In32BitMode]>;
@@ -1690,7 +1708,8 @@ let Defs = [LR] in
def MoveGOTtoLR : PPCEmitTimePseudo<(outs), (ins), "#MoveGOTtoLR", []>,
PPC970_Unit_BRU;
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ hasSideEffects = 0 in {
let isBarrier = 1 in {
let isPredicable = 1 in
def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
@@ -1782,7 +1801,8 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
}
// The unconditional BCL used by the SjLj setjmp code.
-let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7,
+ hasSideEffects = 0 in {
let Defs = [LR], Uses = [RM] in {
def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
"bcl 20, 31, $dst">;
@@ -1890,6 +1910,26 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
}
}
+let isCall = 1, PPC970_Unit = 7, Defs = [LR, RM], isCodeGenOnly = 1 in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+ def BLA_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall_rm (i32 imm:$func))]>;
+
+ def BL_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+ }
+ let Uses = [CTR, RM] in {
+ let isPredicable = 1 in
+ def BCTRL_RM : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl_rm)]>,
+ Requires<[In32BitMode]>;
+ }
+}
+
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
def TCRETURNdi :PPCEmitTimePseudo< (outs),
(ins calltarget:$dst, i32imm:$offset),
@@ -1916,8 +1956,16 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
}
+let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
+ Defs = [LR, R2, RM], Uses = [CTR, RM], RST = 2 in {
+ def BCTRL_LWZinto_toc_RM:
+ XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
+ (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
+ [(PPCbctrl_load_toc_rm iaddr:$src)]>, Requires<[In32BitMode]>;
-let isCodeGenOnly = 1 in {
+}
+
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
@@ -3106,14 +3154,14 @@ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
-let Defs = [RM] in {
+let Defs = [RM], hasSideEffects = 1 in {
let isCodeGenOnly = 1 in
def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
"mtfsf $FM, $rT", IIC_IntMTFSB0,
[(int_ppc_mtfsf timm:$FM, f64:$rT)]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
-let Uses = [RM] in {
+let Uses = [RM], hasSideEffects = 1 in {
def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
"mffs $rT", IIC_IntMFFS,
[(set f64:$rT, (PPCmffs))]>,
@@ -3170,7 +3218,7 @@ def ADDEX : Z23Form_RTAB5_CY2<31, 170, (outs gprc:$rT),
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
let isCommutable = 1 in
-defm ADD4 : XOForm_1rx<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+defm ADD4 : XOForm_1rx<31, 266, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"add", "$rT, $rA, $rB", IIC_IntSimple,
[(set i32:$rT, (add i32:$rA, i32:$rB))]>;
let isCodeGenOnly = 1 in
@@ -3204,11 +3252,11 @@ defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
[(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-defm MULLW : XOForm_1rx<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+defm MULLW : XOForm_1rx<31, 235, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"mullw", "$rT, $rA, $rB", IIC_IntMulHW,
[(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
} // isCommutable
-defm SUBF : XOForm_1rx<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+defm SUBF : XOForm_1rx<31, 40, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
"subf", "$rT, $rA, $rB", IIC_IntGeneral,
[(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
@@ -3433,6 +3481,12 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
+def : Pat<(PPCcall_rm (i32 tglobaladdr:$dst)),
+ (BL_RM tglobaladdr:$dst)>;
+
+def : Pat<(PPCcall_rm (i32 texternalsym:$dst)),
+ (BL_RM texternalsym:$dst)>;
+
// Calls for AIX only
def : Pat<(PPCcall (i32 mcsym:$dst)),
(BL mcsym:$dst)>;
@@ -3443,6 +3497,15 @@ def : Pat<(PPCcall_nop (i32 mcsym:$dst)),
def : Pat<(PPCcall_nop (i32 texternalsym:$dst)),
(BL_NOP texternalsym:$dst)>;
+def : Pat<(PPCcall_rm (i32 mcsym:$dst)),
+ (BL_RM mcsym:$dst)>;
+
+def : Pat<(PPCcall_nop_rm (i32 mcsym:$dst)),
+ (BL_NOP_RM mcsym:$dst)>;
+
+def : Pat<(PPCcall_nop_rm (i32 texternalsym:$dst)),
+ (BL_NOP_RM texternalsym:$dst)>;
+
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@@ -4501,7 +4564,7 @@ def MCRFS : XLForm_3<63, 64, (outs crrc:$BF), (ins crrc:$BFA),
// All MTFSF variants may change the rounding mode so conservatively set it
// as an implicit def for all of them.
let Predicates = [HasFPU] in {
-let Defs = [RM] in {
+let Defs = [RM], hasSideEffects = 1 in {
let isCodeGenOnly = 1,
Pattern = [(int_ppc_mtfsfi timm:$BF, timm:$U)], W = 0 in
def MTFSFIb : XLForm_4<63, 134, (outs), (ins u3imm:$BF, u4imm:$U),
@@ -5059,7 +5122,7 @@ def RLWNMbm_rec : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
// These generic branch instruction forms are used for the assembler parser only.
// Defs and Uses are conservative, since we don't know the BO value.
-let PPC970_Unit = 7, isBranch = 1 in {
+let PPC970_Unit = 7, isBranch = 1, hasSideEffects = 0 in {
let Defs = [CTR], Uses = [CTR, RM] in {
def gBC : BForm_3<16, 0, 0, (outs),
(ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
@@ -5475,8 +5538,6 @@ def : Pat<(int_ppc_stwcx ForceXForm:$dst, gprc:$A),
(STWCX gprc:$A, ForceXForm:$dst)>;
def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
(STBCX gprc:$A, ForceXForm:$dst)>;
-def : Pat<(int_ppc_tw gprc:$A, gprc:$B, i32:$IMM),
- (TW $IMM, $A, $B)>;
def : Pat<(int_ppc_trap gprc:$A),
(TWI 24, $A, 0)>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index b183dbd4b3bb..a19289e96b3e 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -29,9 +29,6 @@ def SDT_PPCPairExtractVsx : SDTypeProfile<1, 2, [
def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;
-def SDT_PPCVecInsertElt : SDTypeProfile<1, 3, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<3>
-]>;
//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
@@ -45,7 +42,6 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
-def PPCvecinsertelt : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsertElt, []>;
//===----------------------------------------------------------------------===//
@@ -621,7 +617,7 @@ class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Inst{48-63} = D_RA{15-0}; // D
}
-multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> pref, bits<6> opcode, dag OOL,
+multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> opcode, dag OOL,
dag IOL, dag PCRel_IOL,
string asmstr, InstrItinClass itin> {
def NAME : 8LS_DForm_R_XTp5_SI34_MEM<opcode, OOL, IOL,
@@ -1652,208 +1648,201 @@ let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in {
let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in {
defm PLXVP :
- 8LS_DForm_R_XTp5_SI34_MEM_p<1, 58, (outs vsrprc:$XTp), (ins memri34:$D_RA),
+ 8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins memri34:$D_RA),
(ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA",
IIC_LdStLFD>;
}
let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in {
defm PSTXVP :
- 8LS_DForm_R_XTp5_SI34_MEM_p<1, 62, (outs), (ins vsrprc:$XTp, memri34:$D_RA),
+ 8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, memri34:$D_RA),
(ins vsrprc:$XTp, memri34_pcrel:$D_RA),
"pstxvp $XTp, $D_RA", IIC_LdStLFD>;
}
let Predicates = [PairedVectorMemops] in {
// Intrinsics for Paired Vector Loads.
- def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX16:$src)), (LXVP memrix16:$src)>;
- def : Pat<(v256i1 (int_ppc_vsx_lxvp xaddrX16:$src)), (LXVPX xaddrX16:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp DQForm:$src)), (LXVP memrix16:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp XForm:$src)), (LXVPX XForm:$src)>;
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
- def : Pat<(v256i1 (int_ppc_vsx_lxvp iaddrX34:$src)), (PLXVP memri34:$src)>;
+ def : Pat<(v256i1 (int_ppc_vsx_lxvp PDForm:$src)), (PLXVP memri34:$src)>;
}
// Intrinsics for Paired Vector Stores.
- def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX16:$dst),
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, DQForm:$dst),
(STXVP $XSp, memrix16:$dst)>;
- def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, xaddrX16:$dst),
- (STXVPX $XSp, xaddrX16:$dst)>;
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, XForm:$dst),
+ (STXVPX $XSp, XForm:$dst)>;
let Predicates = [PairedVectorMemops, PrefixInstrs] in {
- def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, iaddrX34:$dst),
+ def : Pat<(int_ppc_vsx_stxvp v256i1:$XSp, PDForm:$dst),
(PSTXVP $XSp, memri34:$dst)>;
}
}
-// TODO: We have an added complexity of 500 here. This is only a temporary
-// solution to have tablegen consider these patterns first. The way we do
-// addressing for PowerPC is complex depending on available D form, X form, or
-// aligned D form loads/stores like DS and DQ forms. The prefixed
-// instructions in this file also add additional PC Relative loads/stores
-// and D form loads/stores with 34 bit immediates. It is very difficult to force
-// instruction selection to consistently pick these first without the current
-// added complexity. Once pc-relative implementation is complete, a set of
-// follow-up patches will address this refactoring and the AddedComplexity will
-// be removed.
-let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
+let Predicates = [PCRelativeMemops] in {
// Load i32
- def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZpc $ga, 0)>;
- def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHApc $ga, 0)>;
- def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZpc $ga, 0)>;
- def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i32 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZpc $ga, 0)>;
- def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>;
+ def : Pat<(i32 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLWZpc $ga, 0)>;
// Store i32
- def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTBpc $RS, $ga, 0)>;
- def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTHpc $RS, $ga, 0)>;
- def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store i32:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTWpc $RS, $ga, 0)>;
// Load i64
- def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (extloadi1 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi1 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (extloadi8 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi8 (PPCmatpcreladdr PCRelForm:$ga))),
(PLBZ8pc $ga, 0)>;
- def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHA8pc $ga, 0)>;
- def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZ8pc $ga, 0)>;
- def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi16 (PPCmatpcreladdr PCRelForm:$ga))),
(PLHZ8pc $ga, 0)>;
- def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWZ8pc $ga, 0)>;
- def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWA8pc $ga, 0)>;
- def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+ def : Pat<(i64 (extloadi32 (PPCmatpcreladdr PCRelForm:$ga))),
(PLWZ8pc $ga, 0)>;
- def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>;
+ def : Pat<(i64 (load (PPCmatpcreladdr PCRelForm:$ga))), (PLDpc $ga, 0)>;
// Store i64
- def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTB8pc $RS, $ga, 0)>;
- def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTH8pc $RS, $ga, 0)>;
- def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTW8pc $RS, $ga, 0)>;
- def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store i64:$RS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTDpc $RS, $ga, 0)>;
// Load f32
- def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>;
+ def : Pat<(f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFSpc $addr, 0)>;
// Store f32
- def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store f32:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTFSpc $FRS, $ga, 0)>;
// Load f64
- def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))),
+ def : Pat<(f64 (extloadf32 (PPCmatpcreladdr PCRelForm:$addr))),
(COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>;
- def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>;
+ def : Pat<(f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLFDpc $addr, 0)>;
// Store f64
- def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store f64:$FRS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTFDpc $FRS, $ga, 0)>;
// Load f128
- def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))),
+ def : Pat<(f128 (load (PPCmatpcreladdr PCRelForm:$addr))),
(COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>;
// Store f128
- def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store f128:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>;
// Load v4i32
- def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v4i32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v4i32
- def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v4i32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v2i64
- def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v2i64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v2i64
- def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v2i64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v4f32
- def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v4f32 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v4f32
- def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v4f32:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Load v2f64
- def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
+ def : Pat<(v2f64 (load (PPCmatpcreladdr PCRelForm:$addr))), (PLXVpc $addr, 0)>;
// Store v2f64
- def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(store v2f64:$XS, (PPCmatpcreladdr PCRelForm:$ga)),
(PSTXVpc $XS, $ga, 0)>;
// Atomic Load
- def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_8 (PPCmatpcreladdr PCRelForm:$ga)),
(PLBZpc $ga, 0)>;
- def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_16 (PPCmatpcreladdr PCRelForm:$ga)),
(PLHZpc $ga, 0)>;
- def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_32 (PPCmatpcreladdr PCRelForm:$ga)),
(PLWZpc $ga, 0)>;
- def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)),
+ def : Pat<(atomic_load_64 (PPCmatpcreladdr PCRelForm:$ga)),
(PLDpc $ga, 0)>;
// Atomic Store
- def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTBpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTHpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i32:$RS),
(PSTWpc $RS, $ga, 0)>;
- def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_8 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTB8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_16 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTH8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_32 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTW8pc $RS, $ga, 0)>;
- def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+ def : Pat<(atomic_store_64 (PPCmatpcreladdr PCRelForm:$ga), i64:$RS),
(PSTDpc $RS, $ga, 0)>;
// Special Cases For PPCstore_scal_int_from_vsr
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
def : Pat<(PPCstore_scal_int_from_vsr
(f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
- (PPCmatpcreladdr pcreladdr:$dst), 8),
+ (PPCmatpcreladdr PCRelForm:$dst), 8),
(PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
+ def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))),
+ (SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>;
+
// If the PPCmatpcreladdr node is not caught by any other pattern it should be
// caught here and turned into a paddi instruction to materialize the address.
- def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ def : Pat<(PPCmatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
// PPCtlsdynamatpcreladdr node is used for TLS dynamic models to materialize
// tls global address with paddi instruction.
- def : Pat<(PPCtlsdynamatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
+ def : Pat<(PPCtlsdynamatpcreladdr PCRelForm:$addr), (PADDI8pc 0, $addr)>;
// PPCtlslocalexecmataddr node is used for TLS local exec models to
// materialize tls global address with paddi instruction.
def : Pat<(PPCaddTls i64:$in, (PPCtlslocalexecmataddr tglobaltlsaddr:$addr)),
@@ -1861,15 +1850,6 @@ let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
}
let Predicates = [PrefixInstrs] in {
- def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT),
- (ins i32imm:$IMM32),
- "xxspltiw $XT, $IMM32", IIC_VecGeneral,
- []>;
- def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
- (ins i32imm:$IMM32),
- "xxspltidp $XT, $IMM32", IIC_VecGeneral,
- [(set v2f64:$XT,
- (PPCxxspltidp i32:$IMM32))]>;
def XXPERMX :
8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
vsrc:$XC, u3imm:$UIM),
@@ -1893,9 +1873,18 @@ let Predicates = [PrefixInstrs] in {
IIC_VecGeneral, []>;
}
-// XXSPLI32DX needs extra flags to make sure the compiler does not attempt
+// XXSPLTIW/DP/32DX need extra flags to make sure the compiler does not attempt
// to spill part of the instruction when the values are similar.
-let isReMaterializable = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in {
+ def XXSPLTIW : 8RR_DForm_IMM32_XT6<32, 3, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltiw $XT, $IMM32", IIC_VecGeneral,
+ []>;
+ def XXSPLTIDP : 8RR_DForm_IMM32_XT6<32, 2, (outs vsrc:$XT),
+ (ins i32imm:$IMM32),
+ "xxspltidp $XT, $IMM32", IIC_VecGeneral,
+ [(set v2f64:$XT,
+ (PPCxxspltidp i32:$IMM32))]>;
def XXSPLTI32DX :
8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT),
(ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32),
@@ -1934,7 +1923,7 @@ let Predicates = [IsISA3_1] in {
[(set v16i8:$VRT,
(int_ppc_altivec_vsldbi v16i8:$VRA,
v16i8:$VRB,
- i32:$SH))]>;
+ timm:$SH))]>;
def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT),
(ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
"vsrdbi $VRT, $VRA, $VRB, $SH",
@@ -1942,7 +1931,7 @@ let Predicates = [IsISA3_1] in {
[(set v16i8:$VRT,
(int_ppc_altivec_vsrdbi v16i8:$VRA,
v16i8:$VRB,
- i32:$SH))]>;
+ timm:$SH))]>;
defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB),
"vstribr", "$vT, $vB", IIC_VecGeneral,
[(set v16i8:$vT,
@@ -2678,6 +2667,45 @@ def : Pat<(f64 nzFPImmAsi64:$A),
// nand(A, nand(B, C))
def : xxevalPattern<(or (vnot v4i32:$vA), (and v4i32:$vB, v4i32:$vC)),
!sub(255, 14)>;
+
+ // Anonymous patterns to select prefixed VSX loads and stores.
+ // Load / Store f128
+ def : Pat<(f128 (load PDForm:$src)),
+ (COPY_TO_REGCLASS (PLXV memri34:$src), VRRC)>;
+ def : Pat<(store f128:$XS, PDForm:$dst),
+ (PSTXV (COPY_TO_REGCLASS $XS, VSRC), memri34:$dst)>;
+
+ // Load / Store v4i32
+ def : Pat<(v4i32 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v4i32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Load / Store v2i64
+ def : Pat<(v2i64 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v2i64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Load / Store v4f32
+ def : Pat<(v4f32 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v4f32:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Load / Store v2f64
+ def : Pat<(v2f64 (load PDForm:$src)), (PLXV memri34:$src)>;
+ def : Pat<(store v2f64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
+
+ // Cases For PPCstore_scal_int_from_vsr
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), PDForm:$dst, 8),
+ (PSTXSD (XSCVDPUXDS f64:$src), PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), PDForm:$dst, 8),
+ (PSTXSD (XSCVDPSXDS f64:$src), PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), PDForm:$dst, 8),
+ (PSTXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
+ PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr
+ (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), PDForm:$dst, 8),
+ (PSTXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
+ PDForm:$dst)>;
}
let Predicates = [PrefixInstrs] in {
@@ -2701,138 +2729,192 @@ let Predicates = [PrefixInstrs] in {
(XXBLENDVW $A, $B, $C)>;
def : Pat<(int_ppc_vsx_xxblendvd v2i64:$A, v2i64:$B, v2i64:$C),
(XXBLENDVD $A, $B, $C)>;
+
+ // Anonymous patterns to select prefixed loads and stores.
+ // Load i32
+ def : Pat<(i32 (extloadi1 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (zextloadi1 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (extloadi8 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (zextloadi8 PDForm:$src)), (PLBZ memri34:$src)>;
+ def : Pat<(i32 (extloadi16 PDForm:$src)), (PLHZ memri34:$src)>;
+ def : Pat<(i32 (zextloadi16 PDForm:$src)), (PLHZ memri34:$src)>;
+ def : Pat<(i32 (sextloadi16 PDForm:$src)), (PLHA memri34:$src)>;
+ def : Pat<(i32 (load PDForm:$src)), (PLWZ memri34:$src)>;
+
+ // Store i32
+ def : Pat<(truncstorei8 i32:$rS, PDForm:$dst), (PSTB gprc:$rS, memri34:$dst)>;
+ def : Pat<(truncstorei16 i32:$rS, PDForm:$dst), (PSTH gprc:$rS, memri34:$dst)>;
+ def : Pat<(store i32:$rS, PDForm:$dst), (PSTW gprc:$rS, memri34:$dst)>;
+
+ // Load i64
+ def : Pat<(i64 (extloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi1 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (extloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi8 PDForm:$src)), (PLBZ8 memri34:$src)>;
+ def : Pat<(i64 (extloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi16 PDForm:$src)), (PLHZ8 memri34:$src)>;
+ def : Pat<(i64 (sextloadi16 PDForm:$src)), (PLHA8 memri34:$src)>;
+ def : Pat<(i64 (extloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>;
+ def : Pat<(i64 (zextloadi32 PDForm:$src)), (PLWZ8 memri34:$src)>;
+ def : Pat<(i64 (sextloadi32 PDForm:$src)), (PLWA8 memri34:$src)>;
+ def : Pat<(i64 (load PDForm:$src)), (PLD memri34:$src)>;
+
+ // Store i64
+ def : Pat<(truncstorei8 i64:$rS, PDForm:$dst), (PSTB8 g8rc:$rS, memri34:$dst)>;
+ def : Pat<(truncstorei16 i64:$rS, PDForm:$dst), (PSTH8 g8rc:$rS, memri34:$dst)>;
+ def : Pat<(truncstorei32 i64:$rS, PDForm:$dst), (PSTW8 g8rc:$rS, memri34:$dst)>;
+ def : Pat<(store i64:$rS, PDForm:$dst), (PSTD g8rc:$rS, memri34:$dst)>;
+
+ // Load / Store f32
+ def : Pat<(f32 (load PDForm:$src)), (PLFS memri34:$src)>;
+ def : Pat<(store f32:$FRS, PDForm:$dst), (PSTFS $FRS, memri34:$dst)>;
+
+ // Load / Store f64
+ def : Pat<(f64 (extloadf32 PDForm:$src)),
+ (COPY_TO_REGCLASS (PLFS memri34:$src), VSFRC)>;
+ def : Pat<(f64 (load PDForm:$src)), (PLFD memri34:$src)>;
+ def : Pat<(store f64:$FRS, PDForm:$dst), (PSTFD $FRS, memri34:$dst)>;
+
+ // Atomic Load
+ def : Pat<(atomic_load_8 PDForm:$src), (PLBZ memri34:$src)>;
+ def : Pat<(atomic_load_16 PDForm:$src), (PLHZ memri34:$src)>;
+ def : Pat<(atomic_load_32 PDForm:$src), (PLWZ memri34:$src)>;
+ def : Pat<(atomic_load_64 PDForm:$src), (PLD memri34:$src)>;
+
+ // Atomic Store
+ def : Pat<(atomic_store_8 PDForm:$dst, i32:$RS), (PSTB $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_16 PDForm:$dst, i32:$RS), (PSTH $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_32 PDForm:$dst, i32:$RS), (PSTW $RS, memri34:$dst)>;
+ def : Pat<(atomic_store_64 PDForm:$dst, i64:$RS), (PSTD $RS, memri34:$dst)>;
+
+ // Prefixed fpext to v2f64
+ def : Pat<(v4f32 (PPCldvsxlh PDForm:$src)),
+ (SUBREG_TO_REG (i64 1), (PLFD PDForm:$src), sub_64)>;
}
def InsertEltShift {
- dag Sub32Left0 = (EXTRACT_SUBREG $rB, sub_32);
+ dag Sub32 = (i32 (EXTRACT_SUBREG $rB, sub_32));
dag Sub32Left1 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 1, 0, 30);
dag Sub32Left2 = (RLWINM (EXTRACT_SUBREG $rB, sub_32), 2, 0, 29);
+ dag Left1 = (RLWINM $rB, 1, 0, 30);
+ dag Left2 = (RLWINM $rB, 2, 0, 29);
dag Left3 = (RLWINM8 $rB, 3, 0, 28);
}
let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
// Indexed vector insert element
- def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
- (VINSBRX $vDi, InsertEltShift.Sub32Left0, $rA)>;
- def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
+ (VINSBRX $vDi, InsertEltShift.Sub32, $rA)>;
+ def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHRX $vDi, InsertEltShift.Sub32Left1, $rA)>;
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, $rA)>;
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
- (VINSWRX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
+ (VINSWVRX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWRX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDRX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
-
- // Immediate vector insert element
- foreach i = [0, 1, 2, 3] in {
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(!sub(3, i), 4), (LWZX memrr:$rA))>;
+ let AddedComplexity = 400 in {
+ // Immediate vector insert element
+ foreach Idx = [0, 1, 2, 3] in {
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), $rA)>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), Idx)),
+ (VINSW $vDi, !mul(!sub(3, Idx), 4), (LWZX memrr:$rA))>;
+ }
+ foreach i = [0, 1] in
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, (i64 i))),
+ (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
- foreach i = [0, 1] in
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
- (VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
// Indexed vector insert element
- def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
+ def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i32:$rB)),
(VINSBLX $vDi, $rB, $rA)>;
- def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
- (VINSHLX $vDi, $rB, $rA)>;
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
- (VINSWLX $vDi, $rB, $rA)>;
-
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
- (VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
- i32:$rB)),
- (VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
- i32:$rB)),
- (VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
- i32:$rB)),
- (VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
+ def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i32:$rB)),
+ (VINSHLX $vDi, InsertEltShift.Left1, $rA)>;
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i32:$rB)),
+ (VINSWLX $vDi, InsertEltShift.Left2, $rA)>;
- // Immediate vector insert element
- foreach i = [0, 1, 2, 3] in {
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
- (VINSW $vDi, !mul(i, 4), $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
- (i32 i))),
- (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
- (i32 i))),
- (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
- (i32 i))),
- (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
- }
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i32:$rB)),
+ (VINSWVLX $vDi, InsertEltShift.Left2, (XSCVDPSPN $rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i32:$rB)),
+ (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i32:$rB)),
+ (VINSWLX v4f32:$vDi, InsertEltShift.Left2, (PLWZ memri34:$rA))>;
+ def: Pat<(v4f32(insertelt v4f32 : $vDi, (f32(load xaddr : $rA)), i32 : $rB)),
+ (VINSWLX v4f32 : $vDi, InsertEltShift.Left2, (LWZX memrr : $rA))>;
}
let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
// Indexed vector insert element
- def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
- (VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
- def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v16i8 (vector_insert v16i8:$vDi, i32:$rA, i64:$rB)),
+ (VINSBLX $vDi, InsertEltShift.Sub32, $rA)>;
+ def : Pat<(v8i16 (vector_insert v8i16:$vDi, i32:$rA, i64:$rB)),
(VINSHLX $vDi, InsertEltShift.Sub32Left1, $rA)>;
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i64:$rB)),
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, $rA)>;
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, i64:$rB)),
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i64:$rB)),
- (VINSWLX $vDi, InsertEltShift.Sub32Left2, Bitcast.FltToInt)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, f32:$rA, i64:$rB)),
+ (VINSWVLX $vDi, InsertEltShift.Sub32Left2, (XSCVDPSPN $rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)), i64:$rB)),
(VINSWLX $vDi, InsertEltShift.Sub32Left2, (LWZX memrr:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, f64:$A, i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, f64:$A, i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, Bitcast.DblToLong)>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LD memrix:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load iaddrX34:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (PLD memri34:$rA))>;
- def : Pat<(v2f64 (PPCvecinsertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
+ def : Pat<(v2f64 (insertelt v2f64:$vDi, (f64 (load xaddrX4:$rA)), i64:$rB)),
(VINSDLX $vDi, InsertEltShift.Left3, (LDX memrr:$rA))>;
+}
+let AddedComplexity = 400, Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
// Immediate vector insert element
- foreach i = [0, 1, 2, 3] in {
- def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i64 i))),
- (VINSW $vDi, !mul(i, 4), $rA)>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)), (i64 i))),
- (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
- def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)), (i64 i))),
- (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
+ foreach Ty = [i32, i64] in {
+ foreach Idx = [0, 1, 2, 3] in {
+ def : Pat<(v4i32 (insertelt v4i32:$vDi, i32:$rA, (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), $rA)>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
+ (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
+ (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (insertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
+ (Ty Idx))),
+ (VINSW $vDi, !mul(Idx, 4), (LWZX memrr:$rA))>;
+ }
}
- foreach i = [0, 1] in
- def : Pat<(v2i64 (PPCvecinsertelt v2i64:$vDi, i64:$rA, (i64 i))),
- (VINSD $vDi, !mul(i, 8), $rA)>;
+
+ foreach Idx = [0, 1] in
+ def : Pat<(v2i64 (insertelt v2i64:$vDi, i64:$rA, Idx)),
+ (VINSD $vDi, !mul(Idx, 8), $rA)>;
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index c0f2aed43a4d..d2d5ca92ca1c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -138,6 +138,10 @@ def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCzextldsplat : SDNode<"PPCISD::ZEXT_LD_SPLAT", SDT_PPCldsplat,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def PPCsextldsplat : SDNode<"PPCISD::SEXT_LD_SPLAT", SDT_PPCldsplat,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED",
SDTypeProfile<1, 1, []>, []>;
@@ -902,16 +906,13 @@ let hasSideEffects = 0 in {
// Rounding Instructions respecting current rounding mode
def XSRDPIC : XX2Form<60, 107,
(outs vsfrc:$XT), (ins vsfrc:$XB),
- "xsrdpic $XT, $XB", IIC_VecFP,
- [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ "xsrdpic $XT, $XB", IIC_VecFP, []>;
def XVRDPIC : XX2Form<60, 235,
(outs vsrc:$XT), (ins vsrc:$XB),
- "xvrdpic $XT, $XB", IIC_VecFP,
- [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ "xvrdpic $XT, $XB", IIC_VecFP, []>;
def XVRSPIC : XX2Form<60, 171,
(outs vsrc:$XT), (ins vsrc:$XB),
- "xvrspic $XT, $XB", IIC_VecFP,
- [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ "xvrspic $XT, $XB", IIC_VecFP, []>;
// Max/Min Instructions
let isCommutable = 1 in {
def XSMAXDP : XX3Form<60, 160,
@@ -1062,6 +1063,14 @@ let hasSideEffects = 0 in {
[(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
imm32SExt16:$DM))]>;
let isCodeGenOnly = 1 in
+ // Note that the input register class for `$XA` of XXPERMDIs is `vsfrc` which
+ // is not the same with the input register class(`vsrc`) of XXPERMDI instruction.
+ // We did this on purpose because:
+ // 1: The input is primarily for loads that load a partial vector(LFIWZX,
+ // etc.), no need for SUBREG_TO_REG.
+ // 2: With `vsfrc` register class, in the final assembly, float registers
+ // like `f0` are used instead of vector scalar register like `vs0`. This
+ // helps readability.
def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
"xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
def XXSEL : XX4Form<60, 3,
@@ -2771,9 +2780,6 @@ def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be ForceXForm:$src)), (LXVD2X ForceXForm:$s
def : Pat<(f32 (any_fround f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPI
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
-def : Pat<(f32 (fnearbyint f32:$S)),
- (f32 (COPY_TO_REGCLASS (XSRDPIC
- (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
def : Pat<(f32 (any_ffloor f32:$S)),
(f32 (COPY_TO_REGCLASS (XSRDPIM
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
@@ -2792,6 +2798,19 @@ def : Pat<(v4f32 (any_frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
def : Pat<(f64 (any_frint f64:$S)), (f64 (XSRDPIC $S))>;
def : Pat<(v2f64 (any_frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
+// Rounding without exceptions (nearbyint). Due to strange tblgen behaviour,
+// these need to be defined after the any_frint versions so ISEL will correctly
+// add the chain to the strict versions.
+def : Pat<(f32 (fnearbyint f32:$S)),
+ (f32 (COPY_TO_REGCLASS (XSRDPIC
+ (COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
+def : Pat<(f64 (fnearbyint f64:$S)),
+ (f64 (XSRDPIC $S))>;
+def : Pat<(v2f64 (fnearbyint v2f64:$S)),
+ (v2f64 (XVRDPIC $S))>;
+def : Pat<(v4f32 (fnearbyint v4f32:$S)),
+ (v4f32 (XVRSPIC $S))>;
+
// Materialize a zero-vector of long long
def : Pat<(v2i64 immAllZerosV),
(v2i64 (XXLXORz))>;
@@ -2809,6 +2828,10 @@ def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
(v2i64 (XXPERMDI (SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64),
(SUBREG_TO_REG (i64 1), (XSCVDPUXDS $A), sub_64), 0))>;
+def : Pat<(v4i32 (PPCSToV DblToInt.A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPSXWS f64:$A), sub_64))>;
+def : Pat<(v4i32 (PPCSToV DblToUInt.A)),
+ (v4i32 (SUBREG_TO_REG (i64 1), (XSCVDPUXWS f64:$A), sub_64))>;
defm : ScalToVecWPermute<
v4i32, FltToIntLoad.A,
(XXSPLTW (SUBREG_TO_REG (i64 1), (XSCVDPSXWSs (XFLOADf32 ForceXForm:$A)), sub_64), 1),
@@ -2823,10 +2846,20 @@ def : Pat<(v4f32 (build_vector (f32 (fpround f64:$A)), (f32 (fpround f64:$A)),
def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
(v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+
+// Splat loads.
def : Pat<(v2f64 (PPCldsplat ForceXForm:$A)),
(v2f64 (LXVDSX ForceXForm:$A))>;
+def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)),
+ (v4f32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>;
def : Pat<(v2i64 (PPCldsplat ForceXForm:$A)),
(v2i64 (LXVDSX ForceXForm:$A))>;
+def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)),
+ (v4i32 (XXSPLTW (SUBREG_TO_REG (i64 1), (LFIWZX ForceXForm:$A), sub_64), 1))>;
+def : Pat<(v2i64 (PPCzextldsplat ForceXForm:$A)),
+ (v2i64 (XXPERMDIs (LFIWZX ForceXForm:$A), 0))>;
+def : Pat<(v2i64 (PPCsextldsplat ForceXForm:$A)),
+ (v2i64 (XXPERMDIs (LFIWAX ForceXForm:$A), 0))>;
// Build vectors of floating point converted to i64.
def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
@@ -2962,11 +2995,11 @@ def : Pat<(v2i64 (fp_to_uint
def : Pat<WToDPExtractConv.BV02S,
(v2f64 (XVCVSXWDP $A))>;
def : Pat<WToDPExtractConv.BV13S,
- (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 3)))>;
+ (v2f64 (XVCVSXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV02U,
(v2f64 (XVCVUXWDP $A))>;
def : Pat<WToDPExtractConv.BV13U,
- (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
+ (v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
(v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
@@ -3536,6 +3569,12 @@ def : Pat<(v16i8 (PPCmtvsrz i32:$A)),
def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
immSExt5NonZero:$A, immSExt5NonZero:$A)),
(v4i32 (VSPLTISW imm:$A))>;
+
+// Splat loads.
+def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
+ (v8i16 (VSPLTHs 3, (MTVSRWZ (LHZX ForceXForm:$A))))>;
+def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)),
+ (v16i8 (VSPLTBs 7, (MTVSRWZ (LBZX ForceXForm:$A))))>;
} // HasVSX, HasDirectMove
// Big endian VSX subtarget with direct moves.
@@ -3547,7 +3586,7 @@ defm : ScalToVecWPermute<
(SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
defm : ScalToVecWPermute<
v8i16, (i32 i32:$A),
- (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64),
+ (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64),
(SUBREG_TO_REG (i64 1), (MTVSRWZ $A), sub_64)>;
defm : ScalToVecWPermute<
v4i32, (i32 i32:$A),
@@ -4083,6 +4122,10 @@ def : Pat<(v4f32 (PPCldsplat ForceXForm:$A)),
(v4f32 (LXVWSX ForceXForm:$A))>;
def : Pat<(v4i32 (PPCldsplat ForceXForm:$A)),
(v4i32 (LXVWSX ForceXForm:$A))>;
+def : Pat<(v8i16 (PPCldsplat ForceXForm:$A)),
+ (v8i16 (VSPLTHs 3, (LXSIHZX ForceXForm:$A)))>;
+def : Pat<(v16i8 (PPCldsplat ForceXForm:$A)),
+ (v16i8 (VSPLTBs 7, (LXSIBZX ForceXForm:$A)))>;
} // HasVSX, HasP9Vector
// Any Power9 VSX subtarget with equivalent length but better Power10 VSX
@@ -4138,12 +4181,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 12)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 0))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 4))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 8))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 12))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
@@ -4382,12 +4465,52 @@ def : Pat<(f64 (PPCfcfidu (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 3)))))),
(f64 (XSCVUXDDP (XXEXTRACTUW $A, 0)))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 0)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 12))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 0)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 12))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 1)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 8))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 1)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 8))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 2)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 4))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 2)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 4))>;
def : Pat<(v4i32 (insertelt v4i32:$A, i32:$B, 3)),
(v4i32 (XXINSERTW v4i32:$A, AlignValues.I32_TO_BE_WORD1, 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPSXWS f64:$B), sub_64),
+ 0))>;
+def : Pat<(v4i32 (insertelt v4i32:$A, DblToUInt.B, 3)),
+ (v4i32 (XXINSERTW v4i32:$A,
+ (SUBREG_TO_REG (i64 1),
+ (XSCVDPUXWS f64:$B), sub_64),
+ 0))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 0)),
(v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 1)),
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 1d2b1ed3f626..7f63827afbd6 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -39,6 +39,40 @@
// T *p = array[-1];
// for (int i = 0; i < n; ++i)
// *++p = c;
+//
+// 3: common multiple chains for the load/stores with same offsets in the loop,
+// so that we can reuse the offsets and reduce the register pressure in the
+// loop. This transformation can also increase the loop ILP as now each chain
+// uses its own loop induction add/addi. But this will increase the number of
+// add/addi in the loop.
+//
+// Generically, this means transforming loops like this:
+//
+// char *p;
+// A1 = p + base1
+// A2 = p + base1 + offset
+// B1 = p + base2
+// B2 = p + base2 + offset
+//
+// for (int i = 0; i < n; i++)
+// unsigned long x1 = *(unsigned long *)(A1 + i);
+// unsigned long x2 = *(unsigned long *)(A2 + i)
+// unsigned long x3 = *(unsigned long *)(B1 + i);
+// unsigned long x4 = *(unsigned long *)(B2 + i);
+// }
+//
+// to look like this:
+//
+// A1_new = p + base1 // chain 1
+// B1_new = p + base2 // chain 2, now inside the loop, common offset is
+// // reused.
+//
+// for (long long i = 0; i < n; i+=count) {
+// unsigned long x1 = *(unsigned long *)(A1_new + i);
+// unsigned long x2 = *(unsigned long *)((A1_new + i) + offset);
+// unsigned long x3 = *(unsigned long *)(B1_new + i);
+// unsigned long x4 = *(unsigned long *)((B1_new + i) + offset);
+// }
//===----------------------------------------------------------------------===//
#include "PPC.h"
@@ -81,15 +115,25 @@
using namespace llvm;
-static cl::opt<unsigned> MaxVarsPrep("ppc-formprep-max-vars",
- cl::Hidden, cl::init(24),
- cl::desc("Potential common base number threshold per function for PPC loop "
- "prep"));
+static cl::opt<unsigned>
+ MaxVarsPrep("ppc-formprep-max-vars", cl::Hidden, cl::init(24),
+ cl::ZeroOrMore,
+ cl::desc("Potential common base number threshold per function "
+ "for PPC loop prep"));
static cl::opt<bool> PreferUpdateForm("ppc-formprep-prefer-update",
cl::init(true), cl::Hidden,
cl::desc("prefer update form when ds form is also a update form"));
+static cl::opt<bool> EnableUpdateFormForNonConstInc(
+ "ppc-formprep-update-nonconst-inc", cl::init(false), cl::Hidden,
+ cl::desc("prepare update form when the load/store increment is a loop "
+ "invariant non-const value."));
+
+static cl::opt<bool> EnableChainCommoning(
+ "ppc-formprep-chain-commoning", cl::init(false), cl::Hidden,
+ cl::desc("Enable chain commoning in PPC loop prepare pass."));
+
// Sum of following 3 per loop thresholds for all loops can not be larger
// than MaxVarsPrep.
// now the thresholds for each kind prep are exterimental values on Power9.
@@ -106,6 +150,16 @@ static cl::opt<unsigned> MaxVarsDQForm("ppc-dqprep-max-vars",
cl::Hidden, cl::init(8),
cl::desc("Potential PHI threshold per loop for PPC loop prep of DQ form"));
+// Commoning chain will reduce the register pressure, so we don't consider about
+// the PHI nodes number.
+// But commoning chain will increase the addi/add number in the loop and also
+// increase loop ILP. Maximum chain number should be same with hardware
+// IssueWidth, because we won't benefit from ILP if the parallel chains number
+// is bigger than IssueWidth. We assume there are 2 chains in one bucket, so
+// there would be 4 buckets at most on P9(IssueWidth is 8).
+static cl::opt<unsigned> MaxVarsChainCommon(
+ "ppc-chaincommon-max-vars", cl::Hidden, cl::init(4),
+ cl::desc("Bucket number per loop for PPC loop chain common"));
// If would not be profitable if the common base has only one load/store, ISEL
// should already be able to choose best load/store form based on offset for
@@ -116,35 +170,54 @@ static cl::opt<unsigned> DispFormPrepMinThreshold("ppc-dispprep-min-threshold",
cl::desc("Minimal common base load/store instructions triggering DS/DQ form "
"preparation"));
+static cl::opt<unsigned> ChainCommonPrepMinThreshold(
+ "ppc-chaincommon-min-threshold", cl::Hidden, cl::init(4),
+ cl::desc("Minimal common base load/store instructions triggering chain "
+ "commoning preparation. Must be not smaller than 4"));
+
STATISTIC(PHINodeAlreadyExistsUpdate, "PHI node already in pre-increment form");
STATISTIC(PHINodeAlreadyExistsDS, "PHI node already in DS form");
STATISTIC(PHINodeAlreadyExistsDQ, "PHI node already in DQ form");
STATISTIC(DSFormChainRewritten, "Num of DS form chain rewritten");
STATISTIC(DQFormChainRewritten, "Num of DQ form chain rewritten");
STATISTIC(UpdFormChainRewritten, "Num of update form chain rewritten");
+STATISTIC(ChainCommoningRewritten, "Num of commoning chains");
namespace {
struct BucketElement {
- BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+ BucketElement(const SCEV *O, Instruction *I) : Offset(O), Instr(I) {}
BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
- const SCEVConstant *Offset;
+ const SCEV *Offset;
Instruction *Instr;
};
struct Bucket {
- Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
- Elements(1, BucketElement(I)) {}
+ Bucket(const SCEV *B, Instruction *I)
+ : BaseSCEV(B), Elements(1, BucketElement(I)) {
+ ChainSize = 0;
+ }
+ // The base of the whole bucket.
const SCEV *BaseSCEV;
+
+ // All elements in the bucket. In the bucket, the element with the BaseSCEV
+ // has no offset and all other elements are stored as offsets to the
+ // BaseSCEV.
SmallVector<BucketElement, 16> Elements;
+
+ // The potential chains size. This is used for chain commoning only.
+ unsigned ChainSize;
+
+ // The base for each potential chain. This is used for chain commoning only.
+ SmallVector<BucketElement, 16> ChainBases;
};
// "UpdateForm" is not a real PPC instruction form, it stands for dform
// load/store with update like ldu/stdu, or Prefetch intrinsic.
// For DS form instructions, their displacements must be multiple of 4.
// For DQ form instructions, their displacements must be multiple of 16.
- enum InstrForm { UpdateForm = 1, DSForm = 4, DQForm = 16 };
+ enum PrepForm { UpdateForm = 1, DSForm = 4, DQForm = 16, ChainCommoning };
class PPCLoopInstrFormPrep : public FunctionPass {
public:
@@ -169,11 +242,12 @@ namespace {
private:
PPCTargetMachine *TM = nullptr;
- const PPCSubtarget *ST;
+ const PPCSubtarget *ST;
DominatorTree *DT;
LoopInfo *LI;
ScalarEvolution *SE;
bool PreserveLCSSA;
+ bool HasCandidateForPrepare;
/// Successful preparation number for Update/DS/DQ form in all inner most
/// loops. One successful preparation will put one common base out of loop,
@@ -184,22 +258,39 @@ namespace {
bool runOnLoop(Loop *L);
/// Check if required PHI node is already exist in Loop \p L.
- bool alreadyPrepared(Loop *L, Instruction* MemI,
+ bool alreadyPrepared(Loop *L, Instruction *MemI,
const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV,
- InstrForm Form);
+ const SCEV *BasePtrIncSCEV, PrepForm Form);
+
+ /// Get the value which defines the increment SCEV \p BasePtrIncSCEV.
+ Value *getNodeForInc(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrIncSCEV);
+
+ /// Common chains to reuse offsets for a loop to reduce register pressure.
+ bool chainCommoning(Loop *L, SmallVector<Bucket, 16> &Buckets);
+
+ /// Find out the potential commoning chains and their bases.
+ bool prepareBasesForCommoningChains(Bucket &BucketChain);
+
+ /// Rewrite load/store according to the common chains.
+ bool
+ rewriteLoadStoresForCommoningChains(Loop *L, Bucket &Bucket,
+ SmallSet<BasicBlock *, 16> &BBChanged);
/// Collect condition matched(\p isValidCandidate() returns true)
/// candidates in Loop \p L.
SmallVector<Bucket, 16> collectCandidates(
Loop *L,
- std::function<bool(const Instruction *, const Value *, const Type *)>
+ std::function<bool(const Instruction *, Value *, const Type *)>
isValidCandidate,
+ std::function<bool(const SCEV *)> isValidDiff,
unsigned MaxCandidateNum);
- /// Add a candidate to candidates \p Buckets.
+ /// Add a candidate to candidates \p Buckets if diff between candidate and
+ /// one base in \p Buckets matches \p isValidDiff.
void addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
SmallVector<Bucket, 16> &Buckets,
+ std::function<bool(const SCEV *)> isValidDiff,
unsigned MaxCandidateNum);
/// Prepare all candidates in \p Buckets for update form.
@@ -207,8 +298,7 @@ namespace {
/// Prepare all candidates in \p Buckets for displacement form, now for
/// ds/dq.
- bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
- InstrForm Form);
+ bool dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets, PrepForm Form);
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
@@ -216,8 +306,7 @@ namespace {
/// If success, best base element must be stored as the first element of
/// \p BucketChain.
/// Return false if no base element found, otherwise return true.
- bool prepareBaseForDispFormChain(Bucket &BucketChain,
- InstrForm Form);
+ bool prepareBaseForDispFormChain(Bucket &BucketChain, PrepForm Form);
/// Prepare for one chain \p BucketChain, find the best base element and
/// update all other elements in \p BucketChain accordingly.
@@ -230,7 +319,20 @@ namespace {
/// preparation.
bool rewriteLoadStores(Loop *L, Bucket &BucketChain,
SmallSet<BasicBlock *, 16> &BBChanged,
- InstrForm Form);
+ PrepForm Form);
+
+ /// Rewrite for the base load/store of a chain.
+ std::pair<Instruction *, Instruction *>
+ rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
+ Instruction *BaseMemI, bool CanPreInc, PrepForm Form,
+ SCEVExpander &SCEVE, SmallPtrSet<Value *, 16> &DeletedPtrs);
+
+ /// Rewrite for the other load/stores of a chain according to the new \p
+ /// Base.
+ Instruction *
+ rewriteForBucketElement(std::pair<Instruction *, Instruction *> Base,
+ const BucketElement &Element, Value *OffToBase,
+ SmallPtrSet<Value *, 16> &DeletedPtrs);
};
} // end anonymous namespace
@@ -266,23 +368,35 @@ static std::string getInstrName(const Value *I, StringRef Suffix) {
if (I->hasName())
return (I->getName() + Suffix).str();
else
- return "";
+ return "";
}
-static Value *GetPointerOperand(Value *MemI) {
+static Value *getPointerOperandAndType(Value *MemI,
+ Type **PtrElementType = nullptr) {
+
+ Value *PtrValue = nullptr;
+ Type *PointerElementType = nullptr;
+
if (LoadInst *LMemI = dyn_cast<LoadInst>(MemI)) {
- return LMemI->getPointerOperand();
+ PtrValue = LMemI->getPointerOperand();
+ PointerElementType = LMemI->getType();
} else if (StoreInst *SMemI = dyn_cast<StoreInst>(MemI)) {
- return SMemI->getPointerOperand();
+ PtrValue = SMemI->getPointerOperand();
+ PointerElementType = SMemI->getValueOperand()->getType();
} else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(MemI)) {
+ PointerElementType = Type::getInt8Ty(MemI->getContext());
if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
- IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp)
- return IMemI->getArgOperand(0);
- if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp)
- return IMemI->getArgOperand(1);
+ IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
+ PtrValue = IMemI->getArgOperand(0);
+ } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
+ PtrValue = IMemI->getArgOperand(1);
+ }
}
+ /*Get ElementType if PtrElementType is not null.*/
+ if (PtrElementType)
+ *PtrElementType = PointerElementType;
- return nullptr;
+ return PtrValue;
}
bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
@@ -306,58 +420,460 @@ bool PPCLoopInstrFormPrep::runOnFunction(Function &F) {
return MadeChange;
}
-void PPCLoopInstrFormPrep::addOneCandidate(Instruction *MemI, const SCEV *LSCEV,
- SmallVector<Bucket, 16> &Buckets,
- unsigned MaxCandidateNum) {
- assert((MemI && GetPointerOperand(MemI)) &&
+// Finding the minimal(chain_number + reusable_offset_number) is a complicated
+// algorithmic problem.
+// For now, the algorithm used here is simply adjusted to handle the case for
+// manually unrolling cases.
+// FIXME: use a more powerful algorithm to find minimal sum of chain_number and
+// reusable_offset_number for one base with multiple offsets.
+bool PPCLoopInstrFormPrep::prepareBasesForCommoningChains(Bucket &CBucket) {
+ // The minimal size for profitable chain commoning:
+ // A1 = base + offset1
+ // A2 = base + offset2 (offset2 - offset1 = X)
+ // A3 = base + offset3
+ // A4 = base + offset4 (offset4 - offset3 = X)
+ // ======>
+ // base1 = base + offset1
+ // base2 = base + offset3
+ // A1 = base1
+ // A2 = base1 + X
+ // A3 = base2
+ // A4 = base2 + X
+ //
+ // There is benefit because of reuse of offest 'X'.
+
+ assert(ChainCommonPrepMinThreshold >= 4 &&
+ "Thredhold can not be smaller than 4!\n");
+ if (CBucket.Elements.size() < ChainCommonPrepMinThreshold)
+ return false;
+
+ // We simply select the FirstOffset as the first reusable offset between each
+ // chain element 1 and element 0.
+ const SCEV *FirstOffset = CBucket.Elements[1].Offset;
+
+ // Figure out how many times above FirstOffset is used in the chain.
+ // For a success commoning chain candidate, offset difference between each
+ // chain element 1 and element 0 must be also FirstOffset.
+ unsigned FirstOffsetReusedCount = 1;
+
+ // Figure out how many times above FirstOffset is used in the first chain.
+ // Chain number is FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain
+ unsigned FirstOffsetReusedCountInFirstChain = 1;
+
+ unsigned EleNum = CBucket.Elements.size();
+ bool SawChainSeparater = false;
+ for (unsigned j = 2; j != EleNum; ++j) {
+ if (SE->getMinusSCEV(CBucket.Elements[j].Offset,
+ CBucket.Elements[j - 1].Offset) == FirstOffset) {
+ if (!SawChainSeparater)
+ FirstOffsetReusedCountInFirstChain++;
+ FirstOffsetReusedCount++;
+ } else
+ // For now, if we meet any offset which is not FirstOffset, we assume we
+ // find a new Chain.
+ // This makes us miss some opportunities.
+ // For example, we can common:
+ //
+ // {OffsetA, Offset A, OffsetB, OffsetA, OffsetA, OffsetB}
+ //
+ // as two chains:
+ // {{OffsetA, Offset A, OffsetB}, {OffsetA, OffsetA, OffsetB}}
+ // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 2
+ //
+ // But we fail to common:
+ //
+ // {OffsetA, OffsetB, OffsetA, OffsetA, OffsetB, OffsetA}
+ // FirstOffsetReusedCount = 4; FirstOffsetReusedCountInFirstChain = 1
+
+ SawChainSeparater = true;
+ }
+
+ // FirstOffset is not reused, skip this bucket.
+ if (FirstOffsetReusedCount == 1)
+ return false;
+
+ unsigned ChainNum =
+ FirstOffsetReusedCount / FirstOffsetReusedCountInFirstChain;
+
+ // All elements are increased by FirstOffset.
+ // The number of chains should be sqrt(EleNum).
+ if (!SawChainSeparater)
+ ChainNum = (unsigned)sqrt((double)EleNum);
+
+ CBucket.ChainSize = (unsigned)(EleNum / ChainNum);
+
+ // If this is not a perfect chain(eg: not all elements can be put inside
+ // commoning chains.), skip now.
+ if (CBucket.ChainSize * ChainNum != EleNum)
+ return false;
+
+ if (SawChainSeparater) {
+ // Check that the offset seqs are the same for all chains.
+ for (unsigned i = 1; i < CBucket.ChainSize; i++)
+ for (unsigned j = 1; j < ChainNum; j++)
+ if (CBucket.Elements[i].Offset !=
+ SE->getMinusSCEV(CBucket.Elements[i + j * CBucket.ChainSize].Offset,
+ CBucket.Elements[j * CBucket.ChainSize].Offset))
+ return false;
+ }
+
+ for (unsigned i = 0; i < ChainNum; i++)
+ CBucket.ChainBases.push_back(CBucket.Elements[i * CBucket.ChainSize]);
+
+ LLVM_DEBUG(dbgs() << "Bucket has " << ChainNum << " chains.\n");
+
+ return true;
+}
+
+bool PPCLoopInstrFormPrep::chainCommoning(Loop *L,
+ SmallVector<Bucket, 16> &Buckets) {
+ bool MadeChange = false;
+
+ if (Buckets.empty())
+ return MadeChange;
+
+ SmallSet<BasicBlock *, 16> BBChanged;
+
+ for (auto &Bucket : Buckets) {
+ if (prepareBasesForCommoningChains(Bucket))
+ MadeChange |= rewriteLoadStoresForCommoningChains(L, Bucket, BBChanged);
+ }
+
+ if (MadeChange)
+ for (auto *BB : BBChanged)
+ DeleteDeadPHIs(BB);
+ return MadeChange;
+}
+
+bool PPCLoopInstrFormPrep::rewriteLoadStoresForCommoningChains(
+ Loop *L, Bucket &Bucket, SmallSet<BasicBlock *, 16> &BBChanged) {
+ bool MadeChange = false;
+
+ assert(Bucket.Elements.size() ==
+ Bucket.ChainBases.size() * Bucket.ChainSize &&
+ "invalid bucket for chain commoning!\n");
+ SmallPtrSet<Value *, 16> DeletedPtrs;
+
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
+ "loopprepare-chaincommon");
+
+ for (unsigned ChainIdx = 0; ChainIdx < Bucket.ChainBases.size(); ++ChainIdx) {
+ unsigned BaseElemIdx = Bucket.ChainSize * ChainIdx;
+ const SCEV *BaseSCEV =
+ ChainIdx ? SE->getAddExpr(Bucket.BaseSCEV,
+ Bucket.Elements[BaseElemIdx].Offset)
+ : Bucket.BaseSCEV;
+ const SCEVAddRecExpr *BasePtrSCEV = cast<SCEVAddRecExpr>(BaseSCEV);
+
+ // Make sure the base is able to expand.
+ if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
+ return MadeChange;
+
+ assert(BasePtrSCEV->isAffine() &&
+ "Invalid SCEV type for the base ptr for a candidate chain!\n");
+
+ std::pair<Instruction *, Instruction *> Base = rewriteForBase(
+ L, BasePtrSCEV, Bucket.Elements[BaseElemIdx].Instr,
+ false /* CanPreInc */, ChainCommoning, SCEVE, DeletedPtrs);
+
+ if (!Base.first || !Base.second)
+ return MadeChange;
+
+ // Keep track of the replacement pointer values we've inserted so that we
+ // don't generate more pointer values than necessary.
+ SmallPtrSet<Value *, 16> NewPtrs;
+ NewPtrs.insert(Base.first);
+
+ for (unsigned Idx = BaseElemIdx + 1; Idx < BaseElemIdx + Bucket.ChainSize;
+ ++Idx) {
+ BucketElement &I = Bucket.Elements[Idx];
+ Value *Ptr = getPointerOperandAndType(I.Instr);
+ assert(Ptr && "No pointer operand");
+ if (NewPtrs.count(Ptr))
+ continue;
+
+ const SCEV *OffsetSCEV =
+ BaseElemIdx ? SE->getMinusSCEV(Bucket.Elements[Idx].Offset,
+ Bucket.Elements[BaseElemIdx].Offset)
+ : Bucket.Elements[Idx].Offset;
+
+ // Make sure offset is able to expand. Only need to check one time as the
+ // offsets are reused between different chains.
+ if (!BaseElemIdx)
+ if (!isSafeToExpand(OffsetSCEV, *SE))
+ return false;
+
+ Value *OffsetValue = SCEVE.expandCodeFor(
+ OffsetSCEV, OffsetSCEV->getType(), LoopPredecessor->getTerminator());
+
+ Instruction *NewPtr = rewriteForBucketElement(Base, Bucket.Elements[Idx],
+ OffsetValue, DeletedPtrs);
+
+ assert(NewPtr && "Wrong rewrite!\n");
+ NewPtrs.insert(NewPtr);
+ }
+
+ ++ChainCommoningRewritten;
+ }
+
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted below, causing the AssertingVH in the cache to trigger.
+ SCEVE.clear();
+
+ for (auto *Ptr : DeletedPtrs) {
+ if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
+ BBChanged.insert(IDel->getParent());
+ RecursivelyDeleteTriviallyDeadInstructions(Ptr);
+ }
+
+ MadeChange = true;
+ return MadeChange;
+}
+
+// Rewrite the new base according to BasePtrSCEV.
+// bb.loop.preheader:
+// %newstart = ...
+// bb.loop.body:
+// %phinode = phi [ %newstart, %bb.loop.preheader ], [ %add, %bb.loop.body ]
+// ...
+// %add = getelementptr %phinode, %inc
+//
+// First returned instruciton is %phinode (or a type cast to %phinode), caller
+// needs this value to rewrite other load/stores in the same chain.
+// Second returned instruction is %add, caller needs this value to rewrite other
+// load/stores in the same chain.
+std::pair<Instruction *, Instruction *>
+PPCLoopInstrFormPrep::rewriteForBase(Loop *L, const SCEVAddRecExpr *BasePtrSCEV,
+ Instruction *BaseMemI, bool CanPreInc,
+ PrepForm Form, SCEVExpander &SCEVE,
+ SmallPtrSet<Value *, 16> &DeletedPtrs) {
+
+ LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
+
+ assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
+
+ Value *BasePtr = getPointerOperandAndType(BaseMemI);
+ assert(BasePtr && "No pointer operand");
+
+ Type *I8Ty = Type::getInt8Ty(BaseMemI->getParent()->getContext());
+ Type *I8PtrTy =
+ Type::getInt8PtrTy(BaseMemI->getParent()->getContext(),
+ BasePtr->getType()->getPointerAddressSpace());
+
+ bool IsConstantInc = false;
+ const SCEV *BasePtrIncSCEV = BasePtrSCEV->getStepRecurrence(*SE);
+ Value *IncNode = getNodeForInc(L, BaseMemI, BasePtrIncSCEV);
+
+ const SCEVConstant *BasePtrIncConstantSCEV =
+ dyn_cast<SCEVConstant>(BasePtrIncSCEV);
+ if (BasePtrIncConstantSCEV)
+ IsConstantInc = true;
+
+ // No valid representation for the increment.
+ if (!IncNode) {
+ LLVM_DEBUG(dbgs() << "Loop Increasement can not be represented!\n");
+ return std::make_pair(nullptr, nullptr);
+ }
+
+ if (Form == UpdateForm && !IsConstantInc && !EnableUpdateFormForNonConstInc) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Update form prepare for non-const increment is not enabled!\n");
+ return std::make_pair(nullptr, nullptr);
+ }
+
+ const SCEV *BasePtrStartSCEV = nullptr;
+ if (CanPreInc) {
+ assert(SE->isLoopInvariant(BasePtrIncSCEV, L) &&
+ "Increment is not loop invariant!\n");
+ BasePtrStartSCEV = SE->getMinusSCEV(BasePtrSCEV->getStart(),
+ IsConstantInc ? BasePtrIncConstantSCEV
+ : BasePtrIncSCEV);
+ } else
+ BasePtrStartSCEV = BasePtrSCEV->getStart();
+
+ if (alreadyPrepared(L, BaseMemI, BasePtrStartSCEV, BasePtrIncSCEV, Form)) {
+ LLVM_DEBUG(dbgs() << "Instruction form is already prepared!\n");
+ return std::make_pair(nullptr, nullptr);
+ }
+
+ LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
+
+ BasicBlock *Header = L->getHeader();
+ unsigned HeaderLoopPredCount = pred_size(Header);
+ BasicBlock *LoopPredecessor = L->getLoopPredecessor();
+
+ PHINode *NewPHI = PHINode::Create(I8PtrTy, HeaderLoopPredCount,
+ getInstrName(BaseMemI, PHINodeNameSuffix),
+ Header->getFirstNonPHI());
+
+ Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
+ LoopPredecessor->getTerminator());
+
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to add it the right number of times.
+ for (auto PI : predecessors(Header)) {
+ if (PI != LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
+ }
+
+ Instruction *PtrInc = nullptr;
+ Instruction *NewBasePtr = nullptr;
+ if (CanPreInc) {
+ Instruction *InsPoint = &*Header->getFirstInsertionPt();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
+ InsPoint);
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ if (PtrInc->getType() != BasePtr->getType())
+ NewBasePtr =
+ new BitCastInst(PtrInc, BasePtr->getType(),
+ getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
+ else
+ NewBasePtr = PtrInc;
+ } else {
+ // Note that LoopPredecessor might occur in the predecessor list multiple
+ // times, and we need to make sure no more incoming value for them in PHI.
+ for (auto PI : predecessors(Header)) {
+ if (PI == LoopPredecessor)
+ continue;
+
+ // For the latch predecessor, we need to insert a GEP just before the
+ // terminator to increase the address.
+ BasicBlock *BB = PI;
+ Instruction *InsPoint = BB->getTerminator();
+ PtrInc = GetElementPtrInst::Create(
+ I8Ty, NewPHI, IncNode, getInstrName(BaseMemI, GEPNodeIncNameSuffix),
+ InsPoint);
+ cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
+
+ NewPHI->addIncoming(PtrInc, PI);
+ }
+ PtrInc = NewPHI;
+ if (NewPHI->getType() != BasePtr->getType())
+ NewBasePtr = new BitCastInst(NewPHI, BasePtr->getType(),
+ getInstrName(NewPHI, CastNodeNameSuffix),
+ &*Header->getFirstInsertionPt());
+ else
+ NewBasePtr = NewPHI;
+ }
+
+ BasePtr->replaceAllUsesWith(NewBasePtr);
+
+ DeletedPtrs.insert(BasePtr);
+
+ return std::make_pair(NewBasePtr, PtrInc);
+}
+
+Instruction *PPCLoopInstrFormPrep::rewriteForBucketElement(
+ std::pair<Instruction *, Instruction *> Base, const BucketElement &Element,
+ Value *OffToBase, SmallPtrSet<Value *, 16> &DeletedPtrs) {
+ Instruction *NewBasePtr = Base.first;
+ Instruction *PtrInc = Base.second;
+ assert((NewBasePtr && PtrInc) && "base does not exist!\n");
+
+ Type *I8Ty = Type::getInt8Ty(PtrInc->getParent()->getContext());
+
+ Value *Ptr = getPointerOperandAndType(Element.Instr);
+ assert(Ptr && "No pointer operand");
+
+ Instruction *RealNewPtr;
+ if (!Element.Offset ||
+ (isa<SCEVConstant>(Element.Offset) &&
+ cast<SCEVConstant>(Element.Offset)->getValue()->isZero())) {
+ RealNewPtr = NewBasePtr;
+ } else {
+ Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
+ if (PtrIP && isa<Instruction>(NewBasePtr) &&
+ cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
+ PtrIP = nullptr;
+ else if (PtrIP && isa<PHINode>(PtrIP))
+ PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
+ else if (!PtrIP)
+ PtrIP = Element.Instr;
+
+ assert(OffToBase && "There should be an offset for non base element!\n");
+ GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
+ I8Ty, PtrInc, OffToBase,
+ getInstrName(Element.Instr, GEPNodeOffNameSuffix), PtrIP);
+ if (!PtrIP)
+ NewPtr->insertAfter(cast<Instruction>(PtrInc));
+ NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
+ RealNewPtr = NewPtr;
+ }
+
+ Instruction *ReplNewPtr;
+ if (Ptr->getType() != RealNewPtr->getType()) {
+ ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
+ getInstrName(Ptr, CastNodeNameSuffix));
+ ReplNewPtr->insertAfter(RealNewPtr);
+ } else
+ ReplNewPtr = RealNewPtr;
+
+ Ptr->replaceAllUsesWith(ReplNewPtr);
+ DeletedPtrs.insert(Ptr);
+
+ return ReplNewPtr;
+}
+
+void PPCLoopInstrFormPrep::addOneCandidate(
+ Instruction *MemI, const SCEV *LSCEV, SmallVector<Bucket, 16> &Buckets,
+ std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
+ assert((MemI && getPointerOperandAndType(MemI)) &&
"Candidate should be a memory instruction.");
assert(LSCEV && "Invalid SCEV for Ptr value.");
+
bool FoundBucket = false;
for (auto &B : Buckets) {
+ if (cast<SCEVAddRecExpr>(B.BaseSCEV)->getStepRecurrence(*SE) !=
+ cast<SCEVAddRecExpr>(LSCEV)->getStepRecurrence(*SE))
+ continue;
const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
- if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
- B.Elements.push_back(BucketElement(CDiff, MemI));
+ if (isValidDiff(Diff)) {
+ B.Elements.push_back(BucketElement(Diff, MemI));
FoundBucket = true;
break;
}
}
if (!FoundBucket) {
- if (Buckets.size() == MaxCandidateNum)
+ if (Buckets.size() == MaxCandidateNum) {
+ LLVM_DEBUG(dbgs() << "Can not prepare more chains, reach maximum limit "
+ << MaxCandidateNum << "\n");
return;
+ }
Buckets.push_back(Bucket(LSCEV, MemI));
}
}
SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
Loop *L,
- std::function<bool(const Instruction *, const Value *, const Type *)>
+ std::function<bool(const Instruction *, Value *, const Type *)>
isValidCandidate,
- unsigned MaxCandidateNum) {
+ std::function<bool(const SCEV *)> isValidDiff, unsigned MaxCandidateNum) {
SmallVector<Bucket, 16> Buckets;
+
for (const auto &BB : L->blocks())
for (auto &J : *BB) {
- Value *PtrValue;
- Type *PointerElementType;
-
- if (LoadInst *LMemI = dyn_cast<LoadInst>(&J)) {
- PtrValue = LMemI->getPointerOperand();
- PointerElementType = LMemI->getType();
- } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&J)) {
- PtrValue = SMemI->getPointerOperand();
- PointerElementType = SMemI->getValueOperand()->getType();
- } else if (IntrinsicInst *IMemI = dyn_cast<IntrinsicInst>(&J)) {
- PointerElementType = Type::getInt8Ty(J.getContext());
- if (IMemI->getIntrinsicID() == Intrinsic::prefetch ||
- IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_lxvp) {
- PtrValue = IMemI->getArgOperand(0);
- } else if (IMemI->getIntrinsicID() == Intrinsic::ppc_vsx_stxvp) {
- PtrValue = IMemI->getArgOperand(1);
- } else continue;
- } else continue;
-
- unsigned PtrAddrSpace = PtrValue->getType()->getPointerAddressSpace();
- if (PtrAddrSpace)
+ Value *PtrValue = nullptr;
+ Type *PointerElementType = nullptr;
+ PtrValue = getPointerOperandAndType(&J, &PointerElementType);
+
+ if (!PtrValue)
+ continue;
+
+ if (PtrValue->getType()->getPointerAddressSpace())
continue;
if (L->isLoopInvariant(PtrValue))
@@ -368,14 +884,17 @@ SmallVector<Bucket, 16> PPCLoopInstrFormPrep::collectCandidates(
if (!LARSCEV || LARSCEV->getLoop() != L)
continue;
+ // Mark that we have candidates for preparing.
+ HasCandidateForPrepare = true;
+
if (isValidCandidate(&J, PtrValue, PointerElementType))
- addOneCandidate(&J, LSCEV, Buckets, MaxCandidateNum);
+ addOneCandidate(&J, LSCEV, Buckets, isValidDiff, MaxCandidateNum);
}
return Buckets;
}
bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
- InstrForm Form) {
+ PrepForm Form) {
// RemainderOffsetInfo details:
// key: value of (Offset urem DispConstraint). For DSForm, it can
// be [0, 4).
@@ -388,8 +907,9 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
if (!BucketChain.Elements[j].Offset)
RemainderOffsetInfo[0] = std::make_pair(0, 1);
else {
- unsigned Remainder =
- BucketChain.Elements[j].Offset->getAPInt().urem(Form);
+ unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[j].Offset)
+ ->getAPInt()
+ .urem(Form);
if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
else
@@ -404,13 +924,13 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
// contains following load/stores with different remainders:
// 1: 10 load/store whose remainder is 1;
// 2: 9 load/store whose remainder is 2;
- // 3: 1 for remainder 3 and 0 for remainder 0;
+ // 3: 1 for remainder 3 and 0 for remainder 0;
// Now we will choose the first load/store whose remainder is 1 as base and
// adjust all other load/stores according to new base, so we will get 10 DS
// form and 10 X form.
// But we should be more clever, for this case we could use two bases, one for
- // remainder 1 and the other for remainder 2, thus we could get 19 DS form and 1
- // X form.
+ // remainder 1 and the other for remainder 2, thus we could get 19 DS form and
+ // 1 X form.
unsigned MaxCountRemainder = 0;
for (unsigned j = 0; j < (unsigned)Form; j++)
if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
@@ -471,7 +991,7 @@ bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
// If our chosen element has no offset from the base pointer, there's
// nothing to do.
if (!BucketChain.Elements[j].Offset ||
- BucketChain.Elements[j].Offset->isZero())
+ cast<SCEVConstant>(BucketChain.Elements[j].Offset)->isZero())
break;
const SCEV *Offset = BucketChain.Elements[j].Offset;
@@ -489,191 +1009,76 @@ bool PPCLoopInstrFormPrep::prepareBaseForUpdateFormChain(Bucket &BucketChain) {
return true;
}
-bool PPCLoopInstrFormPrep::rewriteLoadStores(Loop *L, Bucket &BucketChain,
- SmallSet<BasicBlock *, 16> &BBChanged,
- InstrForm Form) {
+bool PPCLoopInstrFormPrep::rewriteLoadStores(
+ Loop *L, Bucket &BucketChain, SmallSet<BasicBlock *, 16> &BBChanged,
+ PrepForm Form) {
bool MadeChange = false;
+
const SCEVAddRecExpr *BasePtrSCEV =
cast<SCEVAddRecExpr>(BucketChain.BaseSCEV);
if (!BasePtrSCEV->isAffine())
return MadeChange;
- LLVM_DEBUG(dbgs() << "PIP: Transforming: " << *BasePtrSCEV << "\n");
-
- assert(BasePtrSCEV->getLoop() == L && "AddRec for the wrong loop?");
-
- // The instruction corresponding to the Bucket's BaseSCEV must be the first
- // in the vector of elements.
- Instruction *MemI = BucketChain.Elements.begin()->Instr;
- Value *BasePtr = GetPointerOperand(MemI);
- assert(BasePtr && "No pointer operand");
-
- Type *I8Ty = Type::getInt8Ty(MemI->getParent()->getContext());
- Type *I8PtrTy = Type::getInt8PtrTy(MemI->getParent()->getContext(),
- BasePtr->getType()->getPointerAddressSpace());
-
- if (!SE->isLoopInvariant(BasePtrSCEV->getStart(), L))
+ if (!isSafeToExpand(BasePtrSCEV->getStart(), *SE))
return MadeChange;
- const SCEVConstant *BasePtrIncSCEV =
- dyn_cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE));
- if (!BasePtrIncSCEV)
- return MadeChange;
+ SmallPtrSet<Value *, 16> DeletedPtrs;
+
+ BasicBlock *Header = L->getHeader();
+ SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(),
+ "loopprepare-formrewrite");
// For some DS form load/store instructions, it can also be an update form,
- // if the stride is a multipler of 4. Use update form if prefer it.
+ // if the stride is constant and is a multipler of 4. Use update form if
+ // prefer it.
bool CanPreInc = (Form == UpdateForm ||
- ((Form == DSForm) && !BasePtrIncSCEV->getAPInt().urem(4) &&
+ ((Form == DSForm) &&
+ isa<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE)) &&
+ !cast<SCEVConstant>(BasePtrSCEV->getStepRecurrence(*SE))
+ ->getAPInt()
+ .urem(4) &&
PreferUpdateForm));
- const SCEV *BasePtrStartSCEV = nullptr;
- if (CanPreInc)
- BasePtrStartSCEV =
- SE->getMinusSCEV(BasePtrSCEV->getStart(), BasePtrIncSCEV);
- else
- BasePtrStartSCEV = BasePtrSCEV->getStart();
- if (!isSafeToExpand(BasePtrStartSCEV, *SE))
- return MadeChange;
+ std::pair<Instruction *, Instruction *> Base =
+ rewriteForBase(L, BasePtrSCEV, BucketChain.Elements.begin()->Instr,
+ CanPreInc, Form, SCEVE, DeletedPtrs);
- if (alreadyPrepared(L, MemI, BasePtrStartSCEV, BasePtrIncSCEV, Form))
+ if (!Base.first || !Base.second)
return MadeChange;
- LLVM_DEBUG(dbgs() << "PIP: New start is: " << *BasePtrStartSCEV << "\n");
-
- BasicBlock *Header = L->getHeader();
- unsigned HeaderLoopPredCount = pred_size(Header);
- BasicBlock *LoopPredecessor = L->getLoopPredecessor();
-
- PHINode *NewPHI =
- PHINode::Create(I8PtrTy, HeaderLoopPredCount,
- getInstrName(MemI, PHINodeNameSuffix),
- Header->getFirstNonPHI());
-
- SCEVExpander SCEVE(*SE, Header->getModule()->getDataLayout(), "pistart");
- Value *BasePtrStart = SCEVE.expandCodeFor(BasePtrStartSCEV, I8PtrTy,
- LoopPredecessor->getTerminator());
-
- // Note that LoopPredecessor might occur in the predecessor list multiple
- // times, and we need to add it the right number of times.
- for (auto PI : predecessors(Header)) {
- if (PI != LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
- }
-
- Instruction *PtrInc = nullptr;
- Instruction *NewBasePtr = nullptr;
- if (CanPreInc) {
- Instruction *InsPoint = &*Header->getFirstInsertionPt();
- PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
- cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
- for (auto PI : predecessors(Header)) {
- if (PI == LoopPredecessor)
- continue;
-
- NewPHI->addIncoming(PtrInc, PI);
- }
- if (PtrInc->getType() != BasePtr->getType())
- NewBasePtr = new BitCastInst(
- PtrInc, BasePtr->getType(),
- getInstrName(PtrInc, CastNodeNameSuffix), InsPoint);
- else
- NewBasePtr = PtrInc;
- } else {
- // Note that LoopPredecessor might occur in the predecessor list multiple
- // times, and we need to make sure no more incoming value for them in PHI.
- for (auto PI : predecessors(Header)) {
- if (PI == LoopPredecessor)
- continue;
-
- // For the latch predecessor, we need to insert a GEP just before the
- // terminator to increase the address.
- BasicBlock *BB = PI;
- Instruction *InsPoint = BB->getTerminator();
- PtrInc = GetElementPtrInst::Create(
- I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
- getInstrName(MemI, GEPNodeIncNameSuffix), InsPoint);
-
- cast<GetElementPtrInst>(PtrInc)->setIsInBounds(IsPtrInBounds(BasePtr));
-
- NewPHI->addIncoming(PtrInc, PI);
- }
- PtrInc = NewPHI;
- if (NewPHI->getType() != BasePtr->getType())
- NewBasePtr =
- new BitCastInst(NewPHI, BasePtr->getType(),
- getInstrName(NewPHI, CastNodeNameSuffix),
- &*Header->getFirstInsertionPt());
- else
- NewBasePtr = NewPHI;
- }
-
- // Clear the rewriter cache, because values that are in the rewriter's cache
- // can be deleted below, causing the AssertingVH in the cache to trigger.
- SCEVE.clear();
-
- if (Instruction *IDel = dyn_cast<Instruction>(BasePtr))
- BBChanged.insert(IDel->getParent());
- BasePtr->replaceAllUsesWith(NewBasePtr);
- RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
-
// Keep track of the replacement pointer values we've inserted so that we
// don't generate more pointer values than necessary.
SmallPtrSet<Value *, 16> NewPtrs;
- NewPtrs.insert(NewBasePtr);
+ NewPtrs.insert(Base.first);
for (auto I = std::next(BucketChain.Elements.begin()),
IE = BucketChain.Elements.end(); I != IE; ++I) {
- Value *Ptr = GetPointerOperand(I->Instr);
+ Value *Ptr = getPointerOperandAndType(I->Instr);
assert(Ptr && "No pointer operand");
if (NewPtrs.count(Ptr))
continue;
- Instruction *RealNewPtr;
- if (!I->Offset || I->Offset->getValue()->isZero()) {
- RealNewPtr = NewBasePtr;
- } else {
- Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
- if (PtrIP && isa<Instruction>(NewBasePtr) &&
- cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
- PtrIP = nullptr;
- else if (PtrIP && isa<PHINode>(PtrIP))
- PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
- else if (!PtrIP)
- PtrIP = I->Instr;
-
- GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
- I8Ty, PtrInc, I->Offset->getValue(),
- getInstrName(I->Instr, GEPNodeOffNameSuffix), PtrIP);
- if (!PtrIP)
- NewPtr->insertAfter(cast<Instruction>(PtrInc));
- NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
- RealNewPtr = NewPtr;
- }
+ Instruction *NewPtr = rewriteForBucketElement(
+ Base, *I,
+ I->Offset ? cast<SCEVConstant>(I->Offset)->getValue() : nullptr,
+ DeletedPtrs);
+ assert(NewPtr && "wrong rewrite!\n");
+ NewPtrs.insert(NewPtr);
+ }
+
+ // Clear the rewriter cache, because values that are in the rewriter's cache
+ // can be deleted below, causing the AssertingVH in the cache to trigger.
+ SCEVE.clear();
+ for (auto *Ptr : DeletedPtrs) {
if (Instruction *IDel = dyn_cast<Instruction>(Ptr))
BBChanged.insert(IDel->getParent());
-
- Instruction *ReplNewPtr;
- if (Ptr->getType() != RealNewPtr->getType()) {
- ReplNewPtr = new BitCastInst(RealNewPtr, Ptr->getType(),
- getInstrName(Ptr, CastNodeNameSuffix));
- ReplNewPtr->insertAfter(RealNewPtr);
- } else
- ReplNewPtr = RealNewPtr;
-
- Ptr->replaceAllUsesWith(ReplNewPtr);
RecursivelyDeleteTriviallyDeadInstructions(Ptr);
-
- NewPtrs.insert(RealNewPtr);
}
MadeChange = true;
- SuccPrepCount++;
+ SuccPrepCount++;
if (Form == DSForm && !CanPreInc)
DSFormChainRewritten++;
@@ -698,14 +1103,14 @@ bool PPCLoopInstrFormPrep::updateFormPrep(Loop *L,
MadeChange |= rewriteLoadStores(L, Bucket, BBChanged, UpdateForm);
if (MadeChange)
- for (auto &BB : L->blocks())
- if (BBChanged.count(BB))
- DeleteDeadPHIs(BB);
+ for (auto *BB : BBChanged)
+ DeleteDeadPHIs(BB);
return MadeChange;
}
-bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Buckets,
- InstrForm Form) {
+bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L,
+ SmallVector<Bucket, 16> &Buckets,
+ PrepForm Form) {
bool MadeChange = false;
if (Buckets.empty())
@@ -720,20 +1125,95 @@ bool PPCLoopInstrFormPrep::dispFormPrep(Loop *L, SmallVector<Bucket, 16> &Bucket
}
if (MadeChange)
- for (auto &BB : L->blocks())
- if (BBChanged.count(BB))
- DeleteDeadPHIs(BB);
+ for (auto *BB : BBChanged)
+ DeleteDeadPHIs(BB);
return MadeChange;
}
+// Find the loop invariant increment node for SCEV BasePtrIncSCEV.
+// bb.loop.preheader:
+// %start = ...
+// bb.loop.body:
+// %phinode = phi [ %start, %bb.loop.preheader ], [ %add, %bb.loop.body ]
+// ...
+// %add = add %phinode, %inc ; %inc is what we want to get.
+//
+Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrIncSCEV) {
+ // If the increment is a constant, no definition is needed.
+ // Return the value directly.
+ if (isa<SCEVConstant>(BasePtrIncSCEV))
+ return cast<SCEVConstant>(BasePtrIncSCEV)->getValue();
+
+ if (!SE->isLoopInvariant(BasePtrIncSCEV, L))
+ return nullptr;
+
+ BasicBlock *BB = MemI->getParent();
+ if (!BB)
+ return nullptr;
+
+ BasicBlock *LatchBB = L->getLoopLatch();
+
+ if (!LatchBB)
+ return nullptr;
+
+ // Run through the PHIs and check their operands to find valid representation
+ // for the increment SCEV.
+ iterator_range<BasicBlock::phi_iterator> PHIIter = BB->phis();
+ for (auto &CurrentPHI : PHIIter) {
+ PHINode *CurrentPHINode = dyn_cast<PHINode>(&CurrentPHI);
+ if (!CurrentPHINode)
+ continue;
+
+ if (!SE->isSCEVable(CurrentPHINode->getType()))
+ continue;
+
+ const SCEV *PHISCEV = SE->getSCEVAtScope(CurrentPHINode, L);
+
+ const SCEVAddRecExpr *PHIBasePtrSCEV = dyn_cast<SCEVAddRecExpr>(PHISCEV);
+ if (!PHIBasePtrSCEV)
+ continue;
+
+ const SCEV *PHIBasePtrIncSCEV = PHIBasePtrSCEV->getStepRecurrence(*SE);
+
+ if (!PHIBasePtrIncSCEV || (PHIBasePtrIncSCEV != BasePtrIncSCEV))
+ continue;
+
+ // Get the incoming value from the loop latch and check if the value has
+ // the add form with the required increment.
+ if (Instruction *I = dyn_cast<Instruction>(
+ CurrentPHINode->getIncomingValueForBlock(LatchBB))) {
+ Value *StrippedBaseI = I;
+ while (BitCastInst *BC = dyn_cast<BitCastInst>(StrippedBaseI))
+ StrippedBaseI = BC->getOperand(0);
+
+ Instruction *StrippedI = dyn_cast<Instruction>(StrippedBaseI);
+ if (!StrippedI)
+ continue;
+
+ // LSR pass may add a getelementptr instruction to do the loop increment,
+ // also search in that getelementptr instruction.
+ if (StrippedI->getOpcode() == Instruction::Add ||
+ (StrippedI->getOpcode() == Instruction::GetElementPtr &&
+ StrippedI->getNumOperands() == 2)) {
+ if (SE->getSCEVAtScope(StrippedI->getOperand(0), L) == BasePtrIncSCEV)
+ return StrippedI->getOperand(0);
+ if (SE->getSCEVAtScope(StrippedI->getOperand(1), L) == BasePtrIncSCEV)
+ return StrippedI->getOperand(1);
+ }
+ }
+ }
+ return nullptr;
+}
+
// In order to prepare for the preferred instruction form, a PHI is added.
// This function will check to see if that PHI already exists and will return
// true if it found an existing PHI with the matched start and increment as the
// one we wanted to create.
-bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
- const SCEV *BasePtrStartSCEV,
- const SCEVConstant *BasePtrIncSCEV,
- InstrForm Form) {
+bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction *MemI,
+ const SCEV *BasePtrStartSCEV,
+ const SCEV *BasePtrIncSCEV,
+ PrepForm Form) {
BasicBlock *BB = MemI->getParent();
if (!BB)
return false;
@@ -773,11 +1253,11 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
if (PHIBasePtrIncSCEV == BasePtrIncSCEV) {
// The existing PHI (CurrentPHINode) has the same start and increment
// as the PHI that we wanted to create.
- if (Form == UpdateForm &&
+ if ((Form == UpdateForm || Form == ChainCommoning ) &&
PHIBasePtrSCEV->getStart() == BasePtrStartSCEV) {
++PHINodeAlreadyExistsUpdate;
return true;
- }
+ }
if (Form == DSForm || Form == DQForm) {
const SCEVConstant *Diff = dyn_cast<SCEVConstant>(
SE->getMinusSCEV(PHIBasePtrSCEV->getStart(), BasePtrStartSCEV));
@@ -788,7 +1268,7 @@ bool PPCLoopInstrFormPrep::alreadyPrepared(Loop *L, Instruction* MemI,
++PHINodeAlreadyExistsDQ;
return true;
}
- }
+ }
}
}
}
@@ -825,7 +1305,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
}
// Check if a load/store has update form. This lambda is used by function
// collectCandidates which can collect candidates for types defined by lambda.
- auto isUpdateFormCandidate = [&](const Instruction *I, const Value *PtrValue,
+ auto isUpdateFormCandidate = [&](const Instruction *I, Value *PtrValue,
const Type *PointerElementType) {
assert((PtrValue && I) && "Invalid parameter!");
// There are no update forms for Altivec vector load/stores.
@@ -857,7 +1337,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
};
// Check if a load/store has DS form.
- auto isDSFormCandidate = [](const Instruction *I, const Value *PtrValue,
+ auto isDSFormCandidate = [](const Instruction *I, Value *PtrValue,
const Type *PointerElementType) {
assert((PtrValue && I) && "Invalid parameter!");
if (isa<IntrinsicInst>(I))
@@ -871,7 +1351,7 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
};
// Check if a load/store has DQ form.
- auto isDQFormCandidate = [&](const Instruction *I, const Value *PtrValue,
+ auto isDQFormCandidate = [&](const Instruction *I, Value *PtrValue,
const Type *PointerElementType) {
assert((PtrValue && I) && "Invalid parameter!");
// Check if it is a P10 lxvp/stxvp intrinsic.
@@ -883,31 +1363,131 @@ bool PPCLoopInstrFormPrep::runOnLoop(Loop *L) {
return ST && ST->hasP9Vector() && (PointerElementType->isVectorTy());
};
- // intrinsic for update form.
- SmallVector<Bucket, 16> UpdateFormBuckets =
- collectCandidates(L, isUpdateFormCandidate, MaxVarsUpdateForm);
+ // Check if a load/store is candidate for chain commoning.
+ // If the SCEV is only with one ptr operand in its start, we can use that
+ // start as a chain separator. Mark this load/store as a candidate.
+ auto isChainCommoningCandidate = [&](const Instruction *I, Value *PtrValue,
+ const Type *PointerElementType) {
+ const SCEVAddRecExpr *ARSCEV =
+ cast<SCEVAddRecExpr>(SE->getSCEVAtScope(PtrValue, L));
+ if (!ARSCEV)
+ return false;
+
+ if (!ARSCEV->isAffine())
+ return false;
+
+ const SCEV *Start = ARSCEV->getStart();
+
+ // A single pointer. We can treat it as offset 0.
+ if (isa<SCEVUnknown>(Start) && Start->getType()->isPointerTy())
+ return true;
+
+ const SCEVAddExpr *ASCEV = dyn_cast<SCEVAddExpr>(Start);
+
+ // We need a SCEVAddExpr to include both base and offset.
+ if (!ASCEV)
+ return false;
+
+ // Make sure there is only one pointer operand(base) and all other operands
+ // are integer type.
+ bool SawPointer = false;
+ for (const SCEV *Op : ASCEV->operands()) {
+ if (Op->getType()->isPointerTy()) {
+ if (SawPointer)
+ return false;
+ SawPointer = true;
+ } else if (!Op->getType()->isIntegerTy())
+ return false;
+ }
+
+ return SawPointer;
+ };
+
+ // Check if the diff is a constant type. This is used for update/DS/DQ form
+ // preparation.
+ auto isValidConstantDiff = [](const SCEV *Diff) {
+ return dyn_cast<SCEVConstant>(Diff) != nullptr;
+ };
+
+ // Make sure the diff between the base and new candidate is required type.
+ // This is used for chain commoning preparation.
+ auto isValidChainCommoningDiff = [](const SCEV *Diff) {
+ assert(Diff && "Invalid Diff!\n");
+
+ // Don't mess up previous dform prepare.
+ if (isa<SCEVConstant>(Diff))
+ return false;
+
+ // A single integer type offset.
+ if (isa<SCEVUnknown>(Diff) && Diff->getType()->isIntegerTy())
+ return true;
+
+ const SCEVNAryExpr *ADiff = dyn_cast<SCEVNAryExpr>(Diff);
+ if (!ADiff)
+ return false;
+
+ for (const SCEV *Op : ADiff->operands())
+ if (!Op->getType()->isIntegerTy())
+ return false;
+
+ return true;
+ };
+
+ HasCandidateForPrepare = false;
+
+ LLVM_DEBUG(dbgs() << "Start to prepare for update form.\n");
+ // Collect buckets of comparable addresses used by loads and stores for update
+ // form.
+ SmallVector<Bucket, 16> UpdateFormBuckets = collectCandidates(
+ L, isUpdateFormCandidate, isValidConstantDiff, MaxVarsUpdateForm);
// Prepare for update form.
if (!UpdateFormBuckets.empty())
MadeChange |= updateFormPrep(L, UpdateFormBuckets);
+ else if (!HasCandidateForPrepare) {
+ LLVM_DEBUG(
+ dbgs()
+ << "No prepare candidates found, stop praparation for current loop!\n");
+ // If no candidate for preparing, return early.
+ return MadeChange;
+ }
+ LLVM_DEBUG(dbgs() << "Start to prepare for DS form.\n");
// Collect buckets of comparable addresses used by loads and stores for DS
// form.
- SmallVector<Bucket, 16> DSFormBuckets =
- collectCandidates(L, isDSFormCandidate, MaxVarsDSForm);
+ SmallVector<Bucket, 16> DSFormBuckets = collectCandidates(
+ L, isDSFormCandidate, isValidConstantDiff, MaxVarsDSForm);
// Prepare for DS form.
if (!DSFormBuckets.empty())
MadeChange |= dispFormPrep(L, DSFormBuckets, DSForm);
+ LLVM_DEBUG(dbgs() << "Start to prepare for DQ form.\n");
// Collect buckets of comparable addresses used by loads and stores for DQ
// form.
- SmallVector<Bucket, 16> DQFormBuckets =
- collectCandidates(L, isDQFormCandidate, MaxVarsDQForm);
+ SmallVector<Bucket, 16> DQFormBuckets = collectCandidates(
+ L, isDQFormCandidate, isValidConstantDiff, MaxVarsDQForm);
// Prepare for DQ form.
if (!DQFormBuckets.empty())
MadeChange |= dispFormPrep(L, DQFormBuckets, DQForm);
+ // Collect buckets of comparable addresses used by loads and stores for chain
+ // commoning. With chain commoning, we reuse offsets between the chains, so
+ // the register pressure will be reduced.
+ if (!EnableChainCommoning) {
+ LLVM_DEBUG(dbgs() << "Chain commoning is not enabled.\n");
+ return MadeChange;
+ }
+
+ LLVM_DEBUG(dbgs() << "Start to prepare for chain commoning.\n");
+ SmallVector<Bucket, 16> Buckets =
+ collectCandidates(L, isChainCommoningCandidate, isValidChainCommoningDiff,
+ MaxVarsChainCommon);
+
+ // Prepare for chain commoning.
+ if (!Buckets.empty())
+ MadeChange |= chainCommoning(L, Buckets);
+
return MadeChange;
}
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 4bbb6ed85a6c..d12a9b806fd0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -79,6 +79,11 @@ static cl::opt<bool>
cl::desc("enable elimination of zero-extensions"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+ EnableTrapOptimization("ppc-opt-conditional-trap",
+ cl::desc("enable optimization of conditional traps"),
+ cl::init(false), cl::Hidden);
+
namespace {
struct PPCMIPeephole : public MachineFunctionPass {
@@ -322,8 +327,7 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
SmallVectorImpl<MachineInstr *> &PHIs,
Register Dst) {
DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
- for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) {
- MachineInstr *PHI = *It;
+ for (MachineInstr *PHI : llvm::reverse(PHIs)) {
SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
// We check if the current PHI node can be changed by looking at its
// operands. If all the operands are either copies from primed
@@ -379,6 +383,7 @@ static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
// Perform peephole optimizations.
bool PPCMIPeephole::simplifyCode(void) {
bool Simplified = false;
+ bool TrapOpt = false;
MachineInstr* ToErase = nullptr;
std::map<MachineInstr *, bool> TOCSaves;
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
@@ -420,6 +425,13 @@ bool PPCMIPeephole::simplifyCode(void) {
ToErase->eraseFromParent();
ToErase = nullptr;
}
+ // If a conditional trap instruction got optimized to an
+ // unconditional trap, eliminate all the instructions after
+ // the trap.
+ if (EnableTrapOptimization && TrapOpt) {
+ ToErase = &MI;
+ continue;
+ }
// Ignore debug instructions.
if (MI.isDebugInstr())
@@ -603,14 +615,24 @@ bool PPCMIPeephole::simplifyCode(void) {
ToErase = &MI;
Simplified = true;
}
- } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
+ } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
+ DefOpc == PPC::XXPERMDIs &&
(DefMI->getOperand(2).getImm() == 0 ||
DefMI->getOperand(2).getImm() == 3)) {
+ ToErase = &MI;
+ Simplified = true;
+ // Swap of a splat, convert to copy.
+ if (Immed == 2) {
+ LLVM_DEBUG(dbgs() << "Optimizing swap(splat) => copy(splat): ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ break;
+ }
// Splat fed by another splat - switch the output of the first
// and remove the second.
DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
- ToErase = &MI;
- Simplified = true;
LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
LLVM_DEBUG(MI.dump());
}
@@ -997,6 +1019,51 @@ bool PPCMIPeephole::simplifyCode(void) {
++NumRotatesCollapsed;
break;
}
+ // We will replace TD/TW/TDI/TWI with an unconditional trap if it will
+ // always trap, we will delete the node if it will never trap.
+ case PPC::TDI:
+ case PPC::TWI:
+ case PPC::TD:
+ case PPC::TW: {
+ if (!EnableTrapOptimization) break;
+ MachineInstr *LiMI1 = getVRegDefOrNull(&MI.getOperand(1), MRI);
+ MachineInstr *LiMI2 = getVRegDefOrNull(&MI.getOperand(2), MRI);
+ bool IsOperand2Immediate = MI.getOperand(2).isImm();
+ // We can only do the optimization if we can get immediates
+ // from both operands
+ if (!(LiMI1 && (LiMI1->getOpcode() == PPC::LI ||
+ LiMI1->getOpcode() == PPC::LI8)))
+ break;
+ if (!IsOperand2Immediate &&
+ !(LiMI2 && (LiMI2->getOpcode() == PPC::LI ||
+ LiMI2->getOpcode() == PPC::LI8)))
+ break;
+
+ auto ImmOperand0 = MI.getOperand(0).getImm();
+ auto ImmOperand1 = LiMI1->getOperand(1).getImm();
+ auto ImmOperand2 = IsOperand2Immediate ? MI.getOperand(2).getImm()
+ : LiMI2->getOperand(1).getImm();
+
+ // We will replace the MI with an unconditional trap if it will always
+ // trap.
+ if ((ImmOperand0 == 31) ||
+ ((ImmOperand0 & 0x10) &&
+ ((int64_t)ImmOperand1 < (int64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x8) &&
+ ((int64_t)ImmOperand1 > (int64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x2) &&
+ ((uint64_t)ImmOperand1 < (uint64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x1) &&
+ ((uint64_t)ImmOperand1 > (uint64_t)ImmOperand2)) ||
+ ((ImmOperand0 & 0x4) && (ImmOperand1 == ImmOperand2))) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::TRAP));
+ TrapOpt = true;
+ }
+ // We will delete the MI if it will never trap.
+ ToErase = &MI;
+ Simplified = true;
+ break;
+ }
}
}
@@ -1006,6 +1073,9 @@ bool PPCMIPeephole::simplifyCode(void) {
ToErase->eraseFromParent();
ToErase = nullptr;
}
+ // Reset TrapOpt to false at the end of the basic block.
+ if (EnableTrapOptimization)
+ TrapOpt = false;
}
// Eliminate all the TOC save instructions which are redundant.
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
index d12c6d9cd406..bdff5109c1e1 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.cpp
@@ -75,6 +75,19 @@ static bool matchingRegOps(const MachineInstr &FirstMI,
return Op1.getReg() == Op2.getReg();
}
+static bool matchingImmOps(const MachineInstr &MI,
+ int MIOpIndex,
+ int64_t Expect,
+ unsigned ExtendFrom = 64) {
+ const MachineOperand &Op = MI.getOperand(MIOpIndex);
+ if (!Op.isImm())
+ return false;
+ int64_t Imm = Op.getImm();
+ if (ExtendFrom < 64)
+ Imm = SignExtend64(Imm, ExtendFrom);
+ return Imm == Expect;
+}
+
// Return true if the FirstMI meets the constraints of SecondMI according to
// fusion specification.
static bool checkOpConstraints(FusionFeature::FusionKind Kd,
@@ -116,7 +129,7 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
if (((Imm & 0xFFF0) != 0) && ((Imm & 0xFFF0) != 0xFFF0))
return false;
- // If si = 1111111111110000 and the msb of the d/ds field of the load equals
+ // If si = 1111111111110000 and the msb of the d/ds field of the load equals
// 1, then fusion does not occur.
if ((Imm & 0xFFF0) == 0xFFF0) {
const MachineOperand &D = SecondMI.getOperand(1);
@@ -132,6 +145,10 @@ static bool checkOpConstraints(FusionFeature::FusionKind Kd,
}
return true;
}
+
+ case FusionFeature::FK_SldiAdd:
+ return (matchingImmOps(FirstMI, 2, 3) && matchingImmOps(FirstMI, 3, 60)) ||
+ (matchingImmOps(FirstMI, 2, 6) && matchingImmOps(FirstMI, 3, 57));
}
llvm_unreachable("All the cases should have been handled");
diff --git a/llvm/lib/Target/PowerPC/PPCMacroFusion.def b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
index c7e4e7c22e0a..469a24800423 100644
--- a/llvm/lib/Target/PowerPC/PPCMacroFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCMacroFusion.def
@@ -41,5 +41,42 @@ FUSION_FEATURE(AddisLoad, hasAddisLoadFusion, 2, \
FUSION_OP_SET(ADDIS, ADDIS8, ADDIStocHA8), \
FUSION_OP_SET(LD, LBZ, LBZ8, LHZ, LHZ8, LWZ, LWZ8))
+// Power10 User Manual Section 19.1.5.4, Fusion
+// {add, mulld} - add
+FUSION_FEATURE(ArithAdd, hasArithAddFusion, -1,
+ FUSION_OP_SET(ADD4, ADD8, MULLD), FUSION_OP_SET(ADD4, ADD8))
+
+// {add, subf} - {and, nand, nor, or}
+FUSION_FEATURE(ArithLogical, hasAddLogicalFusion, -1,
+ FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8),
+ FUSION_OP_SET(AND, AND8, OR, OR8, NAND, NAND8, NOR, NOR8))
+
+// {and, andc, eqv, nand, nor, or, orc, xor} - {add, subf}
+FUSION_FEATURE(LogicalArith, hasLogicalAddFusion, -1,
+ FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8,
+ ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8),
+ FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
+
+// Either of {and, andc, eqv, nand, nor, or, orc, xor}
+FUSION_FEATURE(Logical, hasLogicalFusion, -1,
+ FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8,
+ ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8),
+ FUSION_OP_SET(AND, ANDC, EQV, NAND, NOR, OR, ORC, XOR, AND8,
+ ANDC8, EQV8, NAND8, NOR8, OR8, ORC8, XOR8))
+
+// vaddudm - vaddudm
+FUSION_FEATURE(VecAdd, hasArithAddFusion, -1, FUSION_OP_SET(VADDUDM),
+ FUSION_OP_SET(VADDUDM))
+
+// Either of {vand, vandc, veqv, vnand, vnor, vor, vorc, vxor}
+FUSION_FEATURE(VecLogical, hasLogicalFusion, -1,
+ FUSION_OP_SET(VAND, VANDC, VEQV, VNAND, VNOR, VOR, VORC, VXOR),
+ FUSION_OP_SET(VAND, VANDC, VEQV, VNAND, VNOR, VOR, VORC, VXOR))
+
+// sldi rx, ra, {3, 6} - {add, subf}
+// sldi rx, ra n is alias of rldicr rx, ra, n, 63-n
+FUSION_FEATURE(SldiAdd, hasArithAddFusion, -1, FUSION_OP_SET(RLDICR, RLDICR_32),
+ FUSION_OP_SET(ADD4, ADD8, SUBF, SUBF8))
+
#undef FUSION_FEATURE
#undef FUSION_OP_SET
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4f16c7f5ff17..4bccc5596d2b 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -135,6 +135,23 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
ImmToIdxMap[PPC::SPELWZ] = PPC::SPELWZX;
// Power10
+ ImmToIdxMap[PPC::PLBZ] = PPC::LBZX; ImmToIdxMap[PPC::PLBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::PLHZ] = PPC::LHZX; ImmToIdxMap[PPC::PLHZ8] = PPC::LHZX8;
+ ImmToIdxMap[PPC::PLHA] = PPC::LHAX; ImmToIdxMap[PPC::PLHA8] = PPC::LHAX8;
+ ImmToIdxMap[PPC::PLWZ] = PPC::LWZX; ImmToIdxMap[PPC::PLWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::PLWA] = PPC::LWAX; ImmToIdxMap[PPC::PLWA8] = PPC::LWAX;
+ ImmToIdxMap[PPC::PLD] = PPC::LDX; ImmToIdxMap[PPC::PSTD] = PPC::STDX;
+
+ ImmToIdxMap[PPC::PSTB] = PPC::STBX; ImmToIdxMap[PPC::PSTB8] = PPC::STBX8;
+ ImmToIdxMap[PPC::PSTH] = PPC::STHX; ImmToIdxMap[PPC::PSTH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::PSTW] = PPC::STWX; ImmToIdxMap[PPC::PSTW8] = PPC::STWX8;
+
+ ImmToIdxMap[PPC::PLFS] = PPC::LFSX; ImmToIdxMap[PPC::PSTFS] = PPC::STFSX;
+ ImmToIdxMap[PPC::PLFD] = PPC::LFDX; ImmToIdxMap[PPC::PSTFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::PLXSSP] = PPC::LXSSPX; ImmToIdxMap[PPC::PSTXSSP] = PPC::STXSSPX;
+ ImmToIdxMap[PPC::PLXSD] = PPC::LXSDX; ImmToIdxMap[PPC::PSTXSD] = PPC::STXSDX;
+ ImmToIdxMap[PPC::PLXV] = PPC::LXVX; ImmToIdxMap[PPC::PSTXV] = PPC::STXVX;
+
ImmToIdxMap[PPC::LXVP] = PPC::LXVPX;
ImmToIdxMap[PPC::STXVP] = PPC::STXVPX;
ImmToIdxMap[PPC::PLXVP] = PPC::LXVPX;
@@ -506,7 +523,9 @@ bool PPCRegisterInfo::getRegAllocationHints(Register VirtReg,
VRM->hasPhys(ResultReg)) {
Register UACCPhys = VRM->getPhys(ResultReg);
Register HintReg = getSubReg(UACCPhys, ResultOp->getSubReg());
- Hints.push_back(HintReg);
+ // Ensure that the hint is a VSRp register.
+ if (HintReg >= PPC::VSRp0 && HintReg <= PPC::VSRp31)
+ Hints.push_back(HintReg);
}
break;
}
@@ -1345,7 +1364,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineFunction &MF = *MBB.getParent();
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
// Get the instruction info.
- const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
// Get the frame info.
MachineFrameInfo &MFI = MF.getFrameInfo();
DebugLoc dl = MI.getDebugLoc();
@@ -1457,7 +1476,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
bool OffsetFitsMnemonic = (OpC == PPC::EVSTDD || OpC == PPC::EVLDD) ?
isUInt<8>(Offset) :
isInt<16>(Offset);
- if (OpC == PPC::PLXVP || OpC == PPC::PSTXVP)
+ if (TII.isPrefixed(MI.getOpcode()))
OffsetFitsMnemonic = isInt<34>(Offset);
if (!noImmForm && ((OffsetFitsMnemonic &&
((Offset % offsetMinAlign(MI)) == 0)) ||
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index c22a5826337b..2e534dd1bcd5 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -147,8 +147,6 @@ public:
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
- bool addAllocPriorityToGlobalRanges() const override { return true; }
-
// Support for virtual base registers.
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx,
diff --git a/llvm/lib/Target/PowerPC/PPCSchedPredicates.td b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
new file mode 100644
index 000000000000..18f325e99a60
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCSchedPredicates.td
@@ -0,0 +1,294 @@
+//===--- PPCSchedPredicates.td - PowerPC Scheduling Preds -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines scheduling predicate definitions that are used by the
+// PowerPC subtargets.
+//===----------------------------------------------------------------------===//
+// Identify instructions that write BF pipelines with 7 cycles.
+def P10W_BF_7C_Pred : MCSchedPredicate<
+ CheckOpcode<[FADD,
+ FADDS,
+ FADDS_rec,
+ FADD_rec,
+ FCFID,
+ FCFIDS,
+ FCFIDS_rec,
+ FCFIDU,
+ FCFIDUS,
+ FCFIDUS_rec,
+ FCFIDU_rec,
+ FCFID_rec,
+ FCTID,
+ FCTIDU,
+ FCTIDUZ,
+ FCTIDUZ_rec,
+ FCTIDU_rec,
+ FCTIDZ,
+ FCTIDZ_rec,
+ FCTID_rec,
+ FCTIW,
+ FCTIWU,
+ FCTIWUZ,
+ FCTIWUZ_rec,
+ FCTIWU_rec,
+ FCTIWZ,
+ FCTIWZ_rec,
+ FCTIW_rec,
+ FMADD,
+ FMADDS,
+ FMADDS_rec,
+ FMADD_rec,
+ FMSUB,
+ FMSUBS,
+ FMSUBS_rec,
+ FMSUB_rec,
+ FMUL,
+ FMULS,
+ FMULS_rec,
+ FMUL_rec,
+ FNMADD,
+ FNMADDS,
+ FNMADDS_rec,
+ FNMADD_rec,
+ FNMSUB,
+ FNMSUBS,
+ FNMSUBS_rec,
+ FNMSUB_rec,
+ FRE,
+ FRES,
+ FRES_rec,
+ FRE_rec,
+ FRIMD, FRIMS,
+ FRIMD_rec, FRIMS_rec,
+ FRIND, FRINS,
+ FRIND_rec, FRINS_rec,
+ FRIPD, FRIPS,
+ FRIPD_rec, FRIPS_rec,
+ FRIZD, FRIZS,
+ FRIZD_rec, FRIZS_rec,
+ FRSP,
+ FRSP_rec,
+ FRSQRTE,
+ FRSQRTES,
+ FRSQRTES_rec,
+ FRSQRTE_rec,
+ FSELD, FSELS,
+ FSELD_rec, FSELS_rec,
+ FSUB,
+ FSUBS,
+ FSUBS_rec,
+ FSUB_rec,
+ VADDFP,
+ VCFSX, VCFSX_0,
+ VCFUX, VCFUX_0,
+ VCTSXS, VCTSXS_0,
+ VCTUXS, VCTUXS_0,
+ VEXPTEFP,
+ VEXPTEFP,
+ VLOGEFP,
+ VMADDFP,
+ VNMSUBFP,
+ VREFP,
+ VRFIM,
+ VRFIN,
+ VRFIP,
+ VRFIZ,
+ VRSQRTEFP,
+ VSUBFP,
+ XSADDDP,
+ XSADDSP,
+ XSCVDPHP,
+ XSCVDPSP,
+ XSCVDPSPN,
+ XSCVDPSXDS, XSCVDPSXDSs,
+ XSCVDPSXWS, XSCVDPSXWSs,
+ XSCVDPUXDS, XSCVDPUXDSs,
+ XSCVDPUXWS, XSCVDPUXWSs,
+ XSCVSPDP,
+ XSCVSXDDP,
+ XSCVSXDSP,
+ XSCVUXDDP,
+ XSCVUXDSP,
+ XSMADDADP,
+ XSMADDASP,
+ XSMADDMDP,
+ XSMADDMSP,
+ XSMSUBADP,
+ XSMSUBASP,
+ XSMSUBMDP,
+ XSMSUBMSP,
+ XSMULDP,
+ XSMULSP,
+ XSNMADDADP,
+ XSNMADDASP,
+ XSNMADDMDP,
+ XSNMADDMSP,
+ XSNMSUBADP,
+ XSNMSUBASP,
+ XSNMSUBMDP,
+ XSNMSUBMSP,
+ XSRDPI,
+ XSRDPIC,
+ XSRDPIM,
+ XSRDPIP,
+ XSRDPIZ,
+ XSREDP,
+ XSRESP,
+ XSRSP,
+ XSRSQRTEDP,
+ XSRSQRTESP,
+ XSSUBDP,
+ XSSUBSP,
+ XVADDDP,
+ XVADDSP,
+ XVCVDPSP,
+ XVCVDPSXDS,
+ XVCVDPSXWS,
+ XVCVDPUXDS,
+ XVCVDPUXWS,
+ XVCVSPBF16,
+ XVCVSPDP,
+ XVCVSPHP,
+ XVCVSPSXDS,
+ XVCVSPSXWS,
+ XVCVSPUXDS,
+ XVCVSPUXWS,
+ XVCVSXDDP,
+ XVCVSXDSP,
+ XVCVSXWDP,
+ XVCVSXWSP,
+ XVCVUXDDP,
+ XVCVUXDSP,
+ XVCVUXWDP,
+ XVCVUXWSP,
+ XVMADDADP,
+ XVMADDASP,
+ XVMADDMDP,
+ XVMADDMSP,
+ XVMSUBADP,
+ XVMSUBASP,
+ XVMSUBMDP,
+ XVMSUBMSP,
+ XVMULDP,
+ XVMULSP,
+ XVNMADDADP,
+ XVNMADDASP,
+ XVNMADDMDP,
+ XVNMADDMSP,
+ XVNMSUBADP,
+ XVNMSUBASP,
+ XVNMSUBMDP,
+ XVNMSUBMSP,
+ XVRDPI,
+ XVRDPIC,
+ XVRDPIM,
+ XVRDPIP,
+ XVRDPIZ,
+ XVREDP,
+ XVRESP,
+ XVRSPI,
+ XVRSPIC,
+ XVRSPIM,
+ XVRSPIP,
+ XVRSPIZ,
+ XVRSQRTEDP,
+ XVRSQRTESP,
+ XVSUBDP,
+ XVSUBSP]>
+>;
+
+// Identify instructions that write CY pipelines with 7 cycles.
+def P10W_CY_7C_Pred : MCSchedPredicate<
+ CheckOpcode<[CFUGED,
+ CNTLZDM,
+ CNTTZDM,
+ PDEPD,
+ PEXTD,
+ VCFUGED,
+ VCIPHER,
+ VCIPHERLAST,
+ VCLZDM,
+ VCTZDM,
+ VGNB,
+ VNCIPHER,
+ VNCIPHERLAST,
+ VPDEPD,
+ VPEXTD,
+ VPMSUMB,
+ VPMSUMD,
+ VPMSUMH,
+ VPMSUMW,
+ VSBOX]>
+>;
+
+// Identify instructions that write MM pipelines with 10 cycles.
+def P10W_MM_10C_Pred : MCSchedPredicate<
+ CheckOpcode<[PMXVBF16GER2,
+ PMXVBF16GER2NN,
+ PMXVBF16GER2NP,
+ PMXVBF16GER2PN,
+ PMXVBF16GER2PP,
+ PMXVF16GER2,
+ PMXVF16GER2NN,
+ PMXVF16GER2NP,
+ PMXVF16GER2PN,
+ PMXVF16GER2PP,
+ PMXVF32GER,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GER,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
+ PMXVI16GER2,
+ PMXVI16GER2PP,
+ PMXVI16GER2S,
+ PMXVI16GER2SPP,
+ PMXVI4GER8,
+ PMXVI4GER8PP,
+ PMXVI8GER4,
+ PMXVI8GER4PP,
+ PMXVI8GER4SPP,
+ XVBF16GER2,
+ XVBF16GER2NN,
+ XVBF16GER2NP,
+ XVBF16GER2PN,
+ XVBF16GER2PP,
+ XVF16GER2,
+ XVF16GER2NN,
+ XVF16GER2NP,
+ XVF16GER2PN,
+ XVF16GER2PP,
+ XVF32GER,
+ XVF32GERNN,
+ XVF32GERNP,
+ XVF32GERPN,
+ XVF32GERPP,
+ XVF64GER,
+ XVF64GERNN,
+ XVF64GERNP,
+ XVF64GERPN,
+ XVF64GERPP,
+ XVI16GER2,
+ XVI16GER2PP,
+ XVI16GER2S,
+ XVI16GER2SPP,
+ XVI4GER8,
+ XVI4GER8PP,
+ XVI8GER4,
+ XVI8GER4PP,
+ XVI8GER4SPP,
+ XXMFACC,
+ XXMFACC,
+ XXMTACC,
+ XXSETACCZ]>
+>;
diff --git a/llvm/lib/Target/PowerPC/PPCSchedule.td b/llvm/lib/Target/PowerPC/PPCSchedule.td
index e378d57d325e..f65dbae16d3a 100644
--- a/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -128,7 +128,9 @@ def IIC_SprMTPMR : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
+include "PPCInstrInfo.td"
+include "PPCSchedPredicates.td"
include "PPCScheduleG3.td"
include "PPCSchedule440.td"
include "PPCScheduleG4.td"
@@ -137,6 +139,7 @@ include "PPCScheduleG5.td"
include "PPCScheduleP7.td"
include "PPCScheduleP8.td"
include "PPCScheduleP9.td"
+include "PPCScheduleP10.td"
include "PPCScheduleA2.td"
include "PPCScheduleE500.td"
include "PPCScheduleE500mc.td"
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
new file mode 100644
index 000000000000..bf56491f373a
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -0,0 +1,416 @@
+//===--- PPCScheduleP10.td - P10 Scheduling Definitions -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Automatically generated file, do not edit!
+//
+// This file defines the resources required by P10 instructions.
+//===----------------------------------------------------------------------===//
+// Modeling pipeline forwarding logic.
+def P10BR_Read : SchedRead;
+def P10DF_Read : SchedRead;
+def P10DV_Read : SchedRead;
+def P10DX_Read : SchedRead;
+def P10F2_Read : SchedRead;
+def P10FX_Read : SchedRead;
+def P10LD_Read : SchedRead;
+def P10MU_Read : SchedRead;
+def P10PM_Read : SchedRead;
+def P10ST_Read : SchedRead;
+def P10SX_Read : SchedRead;
+def P10vMU_Read : SchedRead;
+
+def P10Model : SchedMachineModel {
+ let IssueWidth = 8;
+
+ // TODO - Need to be updated according to P10 UM.
+ let MicroOpBufferSize = 44;
+
+ // TODO - tune this on real HW once it arrives. For now, we will use the same
+ // value as we do on P9.
+ let LoopMicroOpBufferSize = 60;
+
+ let CompleteModel = 1;
+
+ // Do not support SPE (Signal Procesing Engine) on Power 10.
+ let UnsupportedFeatures = [HasSPE, IsE500, IsBookE];
+}
+
+let SchedModel = P10Model in {
+
+ // ***************** Processor Resources *****************
+
+ // Pipeline Groups
+
+ def P10_BF : ProcResource<4>; // Four Binary Floating Point pipelines.
+ def P10_BR : ProcResource<2>; // Two Branch pipelines.
+ def P10_CY : ProcResource<4>; // Four Crypto pipelines.
+ def P10_DF : ProcResource<1>; // One Decimal Floating Point pipelines.
+ def P10_DV : ProcResource<2>; // Two Fixed-point divide (DIV) pipelines.
+ def P10_DX : ProcResource<2>; // Two 128-bit fixed-point and BCD pipelines.
+ def P10_FX : ProcResource<4>; // Four ALU pipelines.
+ def P10_LD : ProcResource<2>; // Two Load pipelines.
+ def P10_MM : ProcResource<2>; // Two 512-bit SIMD matrix multiply engine pipelines.
+ def P10_PM : ProcResource<4>; // Four 128-bit permute (PM) pipelines.
+ def P10_ST : ProcResource<2>; // Two ST-D pipelines.
+ def P10_SX : ProcResource<2>; // Two Simple Fixed-point (SFX) pipelines.
+
+ // Dispatch Groups
+
+ // Dispatch to any slots
+ def P10_ANY_SLOT : ProcResource<8>;
+
+ let Super = P10_ANY_SLOT in {
+
+ // Dispatch to even slots
+ def P10_EVEN_SLOT : ProcResource<4>;
+
+ // Dispatch to odd slots
+ def P10_ODD_SLOT : ProcResource<4>;
+ }
+
+ // Dispatch Rules
+ let NumMicroOps = 0, Latency = 1 in {
+ // Dispatch Rule '-'
+ def P10W_DISP_ANY : SchedWriteRes<[P10_ANY_SLOT]>;
+
+ // Dispatch Rule '-', even slot
+ def P10W_DISP_EVEN : SchedWriteRes<[P10_EVEN_SLOT]>;
+
+ // Dispatch Rule 'P'
+ def P10W_DISP_PAIR : SchedWriteRes<[P10_EVEN_SLOT, P10_ODD_SLOT]>;
+ }
+
+ // ***************** SchedWriteRes Definitions *****************
+
+ // A BF pipeline may take from 7 to 36 cycles to complete.
+ // Some BF operations may keep the pipeline busy for up to 10 cycles.
+ def P10W_BF_7C : SchedWriteRes<[P10_BF]> {
+ let Latency = 7;
+ }
+
+ def P10W_BF_22C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 5 ];
+ let Latency = 22;
+ }
+
+ def P10W_BF_24C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 8 ];
+ let Latency = 24;
+ }
+
+ def P10W_BF_26C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 5 ];
+ let Latency = 26;
+ }
+
+ def P10W_BF_27C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 7 ];
+ let Latency = 27;
+ }
+
+ def P10W_BF_36C : SchedWriteRes<[P10_BF]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 36;
+ }
+
+ // A BR pipeline may take 2 cycles to complete.
+ def P10W_BR_2C : SchedWriteRes<[P10_BR]> {
+ let Latency = 2;
+ }
+
+ // A CY pipeline may take 7 cycles to complete.
+ def P10W_CY_7C : SchedWriteRes<[P10_CY]> {
+ let Latency = 7;
+ }
+
+ // A DF pipeline may take from 13 to 174 cycles to complete.
+ // Some DF operations may keep the pipeline busy for up to 67 cycles.
+ def P10W_DF_13C : SchedWriteRes<[P10_DF]> {
+ let Latency = 13;
+ }
+
+ def P10W_DF_24C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 16 ];
+ let Latency = 24;
+ }
+
+ def P10W_DF_25C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 17 ];
+ let Latency = 25;
+ }
+
+ def P10W_DF_26C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 18 ];
+ let Latency = 26;
+ }
+
+ def P10W_DF_32C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 22 ];
+ let Latency = 32;
+ }
+
+ def P10W_DF_33C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 25 ];
+ let Latency = 33;
+ }
+
+ def P10W_DF_34C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 25 ];
+ let Latency = 34;
+ }
+
+ def P10W_DF_38C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 30 ];
+ let Latency = 38;
+ }
+
+ def P10W_DF_40C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 17 ];
+ let Latency = 40;
+ }
+
+ def P10W_DF_43C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 34 ];
+ let Latency = 43;
+ }
+
+ def P10W_DF_59C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 49 ];
+ let Latency = 59;
+ }
+
+ def P10W_DF_61C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 12 ];
+ let Latency = 61;
+ }
+
+ def P10W_DF_68C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 15 ];
+ let Latency = 68;
+ }
+
+ def P10W_DF_77C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 67 ];
+ let Latency = 77;
+ }
+
+ def P10W_DF_87C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 12 ];
+ let Latency = 87;
+ }
+
+ def P10W_DF_100C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 32 ];
+ let Latency = 100;
+ }
+
+ def P10W_DF_174C : SchedWriteRes<[P10_DF]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 174;
+ }
+
+ // A DV pipeline may take from 20 to 83 cycles to complete.
+ // Some DV operations may keep the pipeline busy for up to 33 cycles.
+ def P10W_DV_20C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 20;
+ }
+
+ def P10W_DV_25C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 25;
+ }
+
+ def P10W_DV_27C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 27;
+ }
+
+ def P10W_DV_41C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 10 ];
+ let Latency = 41;
+ }
+
+ def P10W_DV_43C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 43;
+ }
+
+ def P10W_DV_47C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 47;
+ }
+
+ def P10W_DV_54C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 54;
+ }
+
+ def P10W_DV_60C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 60;
+ }
+
+ def P10W_DV_75C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 21 ];
+ let Latency = 75;
+ }
+
+ def P10W_DV_83C : SchedWriteRes<[P10_DV]> {
+ let ResourceCycles = [ 33 ];
+ let Latency = 83;
+ }
+
+ // A DX pipeline may take 5 cycles to complete.
+ def P10W_DX_5C : SchedWriteRes<[P10_DX]> {
+ let Latency = 5;
+ }
+
+ // A F2 pipeline may take 4 cycles to complete.
+ def P10W_F2_4C : SchedWriteRes<[P10_FX]> {
+ let Latency = 4;
+ }
+
+ // A FX pipeline may take from 2 to 3 cycles to complete.
+ def P10W_FX_2C : SchedWriteRes<[P10_FX]> {
+ let Latency = 2;
+ }
+
+ def P10W_FX_3C : SchedWriteRes<[P10_FX]> {
+ let Latency = 3;
+ }
+
+ // A LD pipeline may take 6 cycles to complete.
+ def P10W_LD_6C : SchedWriteRes<[P10_LD]> {
+ let Latency = 6;
+ }
+
+ // A MF pipeline may take 13 cycles to complete.
+ def P10W_MF_13C : SchedWriteRes<[P10_SX]> {
+ let Latency = 13;
+ }
+
+ // A MFL pipeline may take 13 cycles to complete.
+ def P10W_MFL_13C : SchedWriteRes<[P10_SX]> {
+ let Latency = 13;
+ }
+
+ // A MM pipeline may take 10 cycles to complete.
+ def P10W_MM_10C : SchedWriteRes<[P10_MM]> {
+ let Latency = 10;
+ }
+
+ // A MU pipeline may take 5 cycles to complete.
+ def P10W_MU_5C : SchedWriteRes<[P10_BF]> {
+ let Latency = 5;
+ }
+
+ // A PM pipeline may take 4 cycles to complete.
+ def P10W_PM_4C : SchedWriteRes<[P10_PM]> {
+ let Latency = 4;
+ }
+
+ // A ST pipeline may take 3 cycles to complete.
+ def P10W_ST_3C : SchedWriteRes<[P10_ST]> {
+ let Latency = 3;
+ }
+
+ // A SX pipeline may take from 0 to 3 cycles to complete.
+ def P10W_SX : SchedWriteRes<[P10_SX]> {
+ let Latency = 0;
+ }
+
+ def P10W_SX_3C : SchedWriteRes<[P10_SX]> {
+ let Latency = 3;
+ }
+
+ // A vMU pipeline may take 7 cycles to complete.
+ def P10W_vMU_7C : SchedWriteRes<[P10_BF]> {
+ let Latency = 7;
+ }
+
+ // ***************** Read Advance Definitions *****************
+
+ // Modeling pipeline forwarding logic.
+ def P10BF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10BF_Read_2C : SchedReadAdvance<2, [P10W_BF_7C]>;
+ def P10BR_Read_1C : SchedReadAdvance<1, [P10W_FX_3C, P10W_F2_4C]>;
+ def P10CY_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10CY_Read_3C : SchedReadAdvance<3, [P10W_CY_7C]>;
+ def P10DF_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10DV_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10DX_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+ def P10F2_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10FX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10LD_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C]>;
+ def P10MM_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C]>;
+ def P10MM_Read_6C : SchedReadAdvance<6, [P10W_MM_10C]>;
+ def P10MU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_DF_13C]>;
+ def P10PM_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10ST_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C]>;
+ def P10SX_Read_1C : SchedReadAdvance<1, [P10W_ST_3C, P10W_SX_3C, P10W_FX_3C, P10W_F2_4C, P10W_PM_4C, P10W_MM_10C]>;
+ def P10vMU_Read_1C : SchedReadAdvance<1, [P10W_DX_5C, P10W_MU_5C, P10W_vMU_7C, P10W_BF_7C, P10W_CY_7C, P10W_DF_13C, P10W_MM_10C]>;
+
+ // Save 1 cycles if pipeline BF reads the data from pipelines DX, MU, vMU, CY, DF, MM.
+ // Save 2 cycles if pipeline BF reads the data from pipelines BF.
+ def P10BF_Read : SchedReadVariant<[
+ SchedVar<P10W_BF_7C_Pred, [P10BF_Read_2C]>,
+ SchedVar<NoSchedPred, [P10BF_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline CY reads the data from pipelines DX, MU, vMU, BF, DF, MM.
+ // Save 3 cycles if pipeline CY reads the data from pipelines CY.
+ def P10CY_Read : SchedReadVariant<[
+ SchedVar<P10W_CY_7C_Pred, [P10CY_Read_3C]>,
+ SchedVar<NoSchedPred, [P10CY_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline MM reads the data from pipelines DX, MU, vMU, BF, CY, DF.
+ // Save 6 cycles if pipeline MM reads the data from pipelines MM.
+ def P10MM_Read : SchedReadVariant<[
+ SchedVar<P10W_MM_10C_Pred, [P10MM_Read_6C]>,
+ SchedVar<NoSchedPred, [P10MM_Read_1C]>
+ ]>;
+
+ // Save 1 cycles if pipeline BR reads the data from pipelines FX, F2.
+ def : SchedAlias<P10BR_Read, P10BR_Read_1C>;
+
+ // Save 1 cycles if pipeline DF reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DF_Read, P10DF_Read_1C>;
+
+ // Save 1 cycles if pipeline DV reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DV_Read, P10DV_Read_1C>;
+
+ // Save 1 cycles if pipeline DX reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10DX_Read, P10DX_Read_1C>;
+
+ // Save 1 cycles if pipeline F2 reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10F2_Read, P10F2_Read_1C>;
+
+ // Save 1 cycles if pipeline FX reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10FX_Read, P10FX_Read_1C>;
+
+ // Save 1 cycles if pipeline LD reads the data from pipelines ST, SX, FX, F2.
+ def : SchedAlias<P10LD_Read, P10LD_Read_1C>;
+
+ // Save 1 cycles if pipeline MU reads the data from pipelines DX, MU, DF.
+ def : SchedAlias<P10MU_Read, P10MU_Read_1C>;
+
+ // Save 1 cycles if pipeline PM reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10PM_Read, P10PM_Read_1C>;
+
+ // Save 1 cycles if pipeline ST reads the data from pipelines ST, SX, FX, F2, PM.
+ def : SchedAlias<P10ST_Read, P10ST_Read_1C>;
+
+ // Save 1 cycles if pipeline SX reads the data from pipelines ST, SX, FX, F2, PM, MM.
+ def : SchedAlias<P10SX_Read, P10SX_Read_1C>;
+
+ // Save 1 cycles if pipeline vMU reads the data from pipelines DX, MU, vMU, BF, CY, DF, MM.
+ def : SchedAlias<P10vMU_Read, P10vMU_Read_1C>;
+
+ include "P10InstrResources.td"
+}
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
index 571cc219ff2b..3dc069ecad8a 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP9.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -9,8 +9,6 @@
// This file defines the itinerary class data for the POWER9 processor.
//
//===----------------------------------------------------------------------===//
-include "PPCInstrInfo.td"
-
def P9Model : SchedMachineModel {
// The maximum number of instructions to be issued at the same time.
// While a value of 8 is technically correct since 8 instructions can be
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index 87ce32f027ab..dfc29dbb10f1 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -23,8 +23,8 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
@@ -127,6 +127,11 @@ void PPCSubtarget::initializeEnvironment() {
HasStoreFusion = false;
HasAddiLoadFusion = false;
HasAddisLoadFusion = false;
+ HasArithAddFusion = false;
+ HasAddLogicalFusion = false;
+ HasLogicalAddFusion = false;
+ HasLogicalFusion = false;
+ IsISA2_06 = false;
IsISA2_07 = false;
IsISA3_0 = false;
IsISA3_1 = false;
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index e916b0c02000..783ea121ccb8 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -147,6 +147,11 @@ protected:
bool HasStoreFusion;
bool HasAddiLoadFusion;
bool HasAddisLoadFusion;
+ bool HasArithAddFusion;
+ bool HasAddLogicalFusion;
+ bool HasLogicalAddFusion;
+ bool HasLogicalFusion;
+ bool IsISA2_06;
bool IsISA2_07;
bool IsISA3_0;
bool IsISA3_1;
@@ -322,6 +327,7 @@ public:
bool hasHTM() const { return HasHTM; }
bool hasFloat128() const { return HasFloat128; }
+ bool isISA2_06() const { return IsISA2_06; }
bool isISA2_07() const { return IsISA2_07; }
bool isISA3_0() const { return IsISA3_0; }
bool isISA3_1() const { return IsISA3_1; }
@@ -330,6 +336,10 @@ public:
bool hasStoreFusion() const { return HasStoreFusion; }
bool hasAddiLoadFusion() const { return HasAddiLoadFusion; }
bool hasAddisLoadFusion() const { return HasAddisLoadFusion; }
+ bool hasArithAddFusion() const { return HasArithAddFusion; }
+ bool hasAddLogicalFusion() const { return HasAddLogicalFusion; }
+ bool hasLogicalAddFusion() const { return HasLogicalAddFusion; }
+ bool hasLogicalFusion() const { return HasLogicalFusion; }
bool needsSwapsForVSXMemOps() const {
return hasVSX() && isLittleEndian() && !hasP9Vector();
}
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 3186d197931d..fbd487fbcfd5 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -208,11 +208,9 @@ public:
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
index 895ae6744421..8120975c4fb2 100644
--- a/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTOCRegDeps.cpp
@@ -73,9 +73,9 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -131,11 +131,9 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override {
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 82717300a480..3eff00fc3c05 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -36,10 +36,10 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
index ed9e74b72d1e..d3fe5362ccdc 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -68,6 +68,8 @@ public:
}
bool isLittleEndian() const;
+
+ int unqualifiedInlineAsmVariant() const override { return 1; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index d5a7873bd056..77d5a2668b60 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -485,6 +485,9 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
case Intrinsic::experimental_constrained_sin:
case Intrinsic::experimental_constrained_cos:
return true;
+ // There is no corresponding FMA instruction for PPC double double.
+ // Thus, we need to disable CTR loop generation for this type.
+ case Intrinsic::fmuladd:
case Intrinsic::copysign:
if (CI->getArgOperand(0)->getType()->getScalarType()->
isPPC_FP128Ty())
@@ -787,7 +790,8 @@ bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
}
void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
if (ST->getCPUDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
// helps expose latency-hiding opportunities to the instruction scheduler.
@@ -798,7 +802,7 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
UP.AllowExpensiveTripCount = true;
}
- BaseT::getUnrollingPreferences(L, SE, UP);
+ BaseT::getUnrollingPreferences(L, SE, UP, ORE);
}
void PPCTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 8ac3038d51d6..aa84013803af 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -72,7 +72,8 @@ public:
TargetLibraryInfo *LibInfo);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
@@ -102,8 +103,7 @@ public:
InstructionCost vectorCostAdjustment(InstructionCost Cost, unsigned Opcode,
Type *Ty1, Type *Ty2);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -130,8 +130,7 @@ public:
const Instruction *I = nullptr);
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
diff --git a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 3463bbbdc5f0..7272e6edefc5 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -27,9 +27,9 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -148,11 +148,9 @@ public:
bool Changed = false;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
@@ -169,4 +167,3 @@ INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
char PPCVSXCopy::ID = 0;
FunctionPass*
llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
-
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index e72e29112da7..0be35adc35c7 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -31,10 +31,10 @@
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -361,11 +361,9 @@ public:
if (DisableVSXFMAMutate)
return Changed;
- for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
- MachineBasicBlock &B = *I++;
+ for (MachineBasicBlock &B : llvm::make_early_inc_range(MF))
if (processBlock(B))
Changed = true;
- }
return Changed;
}
diff --git a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 6bb952f27fee..0bfa0bd5ec0e 100644
--- a/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/PowerPCTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getThePPC32Target() {
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 87496e0b9330..f00813f1301a 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -32,10 +32,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributes.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/RISCVISAInfo.h"
#include <limits>
@@ -50,6 +51,10 @@ using namespace llvm;
STATISTIC(RISCVNumInstrsCompressed,
"Number of RISC-V Compressed instructions emitted");
+namespace llvm {
+extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
+} // namespace llvm
+
namespace {
struct RISCVOperand;
@@ -169,6 +174,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool parseDirectiveOption();
bool parseDirectiveAttribute();
+ bool parseDirectiveInsn(SMLoc L);
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
if (!(getSTI().getFeatureBits()[Feature])) {
@@ -504,6 +510,24 @@ public:
return (isRV64() && isUInt<5>(Imm)) || isUInt<4>(Imm);
}
+ bool isUImm2() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<2>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
+ bool isUImm3() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<3>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isUImm5() const {
int64_t Imm;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
@@ -513,6 +537,15 @@ public:
return IsConstantImm && isUInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isUImm7() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isSImm5() const {
if (!isImm())
return false;
@@ -960,10 +993,6 @@ bool RISCVAsmParser::generateImmOutOfRangeError(
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
-static std::string RISCVMnemonicSpellCheck(StringRef S,
- const FeatureBitset &FBS,
- unsigned VariantID = 0);
-
bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
@@ -996,13 +1025,13 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
}
case Match_MnemonicFail: {
FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
- std::string Suggestion =
- RISCVMnemonicSpellCheck(((RISCVOperand &)*Operands[0]).getToken(), FBS);
+ std::string Suggestion = RISCVMnemonicSpellCheck(
+ ((RISCVOperand &)*Operands[0]).getToken(), FBS, 0);
return Error(IDLoc, "unrecognized instruction mnemonic" + Suggestion);
}
case Match_InvalidOperand: {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
+ if (ErrorInfo != ~0ULL) {
if (ErrorInfo >= Operands.size())
return Error(ErrorLoc, "too few operands for instruction");
@@ -1019,7 +1048,7 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// corresponding operand is missing.
if (Result > FIRST_TARGET_MATCH_RESULT_TY) {
SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
+ if (ErrorInfo != ~0ULL && ErrorInfo >= Operands.size())
return Error(ErrorLoc, "too few operands for instruction");
}
@@ -1050,8 +1079,14 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
+ case Match_InvalidUImm2:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 2) - 1);
+ case Match_InvalidUImm3:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 3) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
+ case Match_InvalidUImm7:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 7) - 1);
case Match_InvalidSImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4),
(1 << 4) - 1);
@@ -1835,8 +1870,10 @@ bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".option")
return parseDirectiveOption();
- else if (IDVal == ".attribute")
+ if (IDVal == ".attribute")
return parseDirectiveAttribute();
+ if (IDVal == ".insn")
+ return parseDirectiveInsn(DirectiveID.getLoc());
return true;
}
@@ -2027,113 +2064,35 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
if (Tag == RISCVAttrs::ARCH) {
StringRef Arch = StringValue;
- if (Arch.consume_front("rv32"))
+ for (auto Feature : RISCVFeatureKV)
+ if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
+ clearFeatureBits(Feature.Value, Feature.Key);
+
+ auto ParseResult = llvm::RISCVISAInfo::parseArchString(
+ StringValue, /*EnableExperimentalExtension=*/true,
+ /*ExperimentalExtensionVersionCheck=*/false);
+ if (!ParseResult) {
+ std::string Buffer;
+ raw_string_ostream OutputErrMsg(Buffer);
+ handleAllErrors(ParseResult.takeError(), [&](llvm::StringError &ErrMsg) {
+ OutputErrMsg << "invalid arch name '" << Arch << "', "
+ << ErrMsg.getMessage();
+ });
+
+ return Error(ValueExprLoc, OutputErrMsg.str());
+ }
+ auto &ISAInfo = *ParseResult;
+
+ for (auto Feature : RISCVFeatureKV)
+ if (ISAInfo->hasExtension(Feature.Key))
+ setFeatureBits(Feature.Value, Feature.Key);
+
+ if (ISAInfo->getXLen() == 32)
clearFeatureBits(RISCV::Feature64Bit, "64bit");
- else if (Arch.consume_front("rv64"))
+ else if (ISAInfo->getXLen() == 64)
setFeatureBits(RISCV::Feature64Bit, "64bit");
else
return Error(ValueExprLoc, "bad arch string " + Arch);
-
- // .attribute arch overrides the current architecture, so unset all
- // currently enabled extensions
- clearFeatureBits(RISCV::FeatureRV32E, "e");
- clearFeatureBits(RISCV::FeatureStdExtM, "m");
- clearFeatureBits(RISCV::FeatureStdExtA, "a");
- clearFeatureBits(RISCV::FeatureStdExtF, "f");
- clearFeatureBits(RISCV::FeatureStdExtD, "d");
- clearFeatureBits(RISCV::FeatureStdExtC, "c");
- clearFeatureBits(RISCV::FeatureStdExtB, "experimental-b");
- clearFeatureBits(RISCV::FeatureStdExtV, "experimental-v");
- clearFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh");
- clearFeatureBits(RISCV::FeatureExtZba, "experimental-zba");
- clearFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb");
- clearFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc");
- clearFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe");
- clearFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf");
- clearFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm");
- clearFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp");
- clearFeatureBits(RISCV::FeatureExtZbproposedc, "experimental-zbproposedc");
- clearFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr");
- clearFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs");
- clearFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt");
- clearFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo");
- clearFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg");
-
- while (!Arch.empty()) {
- bool DropFirst = true;
- if (Arch[0] == 'i')
- clearFeatureBits(RISCV::FeatureRV32E, "e");
- else if (Arch[0] == 'e')
- setFeatureBits(RISCV::FeatureRV32E, "e");
- else if (Arch[0] == 'g') {
- clearFeatureBits(RISCV::FeatureRV32E, "e");
- setFeatureBits(RISCV::FeatureStdExtM, "m");
- setFeatureBits(RISCV::FeatureStdExtA, "a");
- setFeatureBits(RISCV::FeatureStdExtF, "f");
- setFeatureBits(RISCV::FeatureStdExtD, "d");
- } else if (Arch[0] == 'm')
- setFeatureBits(RISCV::FeatureStdExtM, "m");
- else if (Arch[0] == 'a')
- setFeatureBits(RISCV::FeatureStdExtA, "a");
- else if (Arch[0] == 'f')
- setFeatureBits(RISCV::FeatureStdExtF, "f");
- else if (Arch[0] == 'd') {
- setFeatureBits(RISCV::FeatureStdExtF, "f");
- setFeatureBits(RISCV::FeatureStdExtD, "d");
- } else if (Arch[0] == 'c') {
- setFeatureBits(RISCV::FeatureStdExtC, "c");
- } else if (Arch[0] == 'b') {
- setFeatureBits(RISCV::FeatureStdExtB, "experimental-b");
- } else if (Arch[0] == 'v') {
- setFeatureBits(RISCV::FeatureStdExtV, "experimental-v");
- } else if (Arch[0] == 's' || Arch[0] == 'x' || Arch[0] == 'z') {
- StringRef Ext =
- Arch.take_until([](char c) { return ::isdigit(c) || c == '_'; });
- if (Ext == "zba")
- setFeatureBits(RISCV::FeatureExtZba, "experimental-zba");
- else if (Ext == "zbb")
- setFeatureBits(RISCV::FeatureExtZbb, "experimental-zbb");
- else if (Ext == "zbc")
- setFeatureBits(RISCV::FeatureExtZbc, "experimental-zbc");
- else if (Ext == "zbe")
- setFeatureBits(RISCV::FeatureExtZbe, "experimental-zbe");
- else if (Ext == "zbf")
- setFeatureBits(RISCV::FeatureExtZbf, "experimental-zbf");
- else if (Ext == "zbm")
- setFeatureBits(RISCV::FeatureExtZbm, "experimental-zbm");
- else if (Ext == "zbp")
- setFeatureBits(RISCV::FeatureExtZbp, "experimental-zbp");
- else if (Ext == "zbproposedc")
- setFeatureBits(RISCV::FeatureExtZbproposedc,
- "experimental-zbproposedc");
- else if (Ext == "zbr")
- setFeatureBits(RISCV::FeatureExtZbr, "experimental-zbr");
- else if (Ext == "zbs")
- setFeatureBits(RISCV::FeatureExtZbs, "experimental-zbs");
- else if (Ext == "zbt")
- setFeatureBits(RISCV::FeatureExtZbt, "experimental-zbt");
- else if (Ext == "zfh")
- setFeatureBits(RISCV::FeatureExtZfh, "experimental-zfh");
- else if (Ext == "zvamo")
- setFeatureBits(RISCV::FeatureExtZvamo, "experimental-zvamo");
- else if (Ext == "zvlsseg")
- setFeatureBits(RISCV::FeatureStdExtZvlsseg, "experimental-zvlsseg");
- else
- return Error(ValueExprLoc, "bad arch string " + Ext);
- Arch = Arch.drop_until([](char c) { return ::isdigit(c) || c == '_'; });
- DropFirst = false;
- } else
- return Error(ValueExprLoc, "bad arch string " + Arch);
-
- if (DropFirst)
- Arch = Arch.drop_front(1);
- int major = 0;
- int minor = 0;
- Arch.consumeInteger(10, major);
- Arch.consume_front("p");
- Arch.consumeInteger(10, minor);
- Arch = Arch.drop_while([](char c) { return c == '_'; });
- }
}
if (IsIntegerValue)
@@ -2142,64 +2101,63 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
if (Tag != RISCVAttrs::ARCH) {
getTargetStreamer().emitTextAttribute(Tag, StringValue);
} else {
- std::string formalArchStr = "rv32";
- if (getFeatureBits(RISCV::Feature64Bit))
- formalArchStr = "rv64";
- if (getFeatureBits(RISCV::FeatureRV32E))
- formalArchStr = (Twine(formalArchStr) + "e1p9").str();
- else
- formalArchStr = (Twine(formalArchStr) + "i2p0").str();
-
- if (getFeatureBits(RISCV::FeatureStdExtM))
- formalArchStr = (Twine(formalArchStr) + "_m2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtA))
- formalArchStr = (Twine(formalArchStr) + "_a2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtF))
- formalArchStr = (Twine(formalArchStr) + "_f2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtD))
- formalArchStr = (Twine(formalArchStr) + "_d2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtC))
- formalArchStr = (Twine(formalArchStr) + "_c2p0").str();
- if (getFeatureBits(RISCV::FeatureStdExtB))
- formalArchStr = (Twine(formalArchStr) + "_b0p93").str();
- if (getFeatureBits(RISCV::FeatureStdExtV))
- formalArchStr = (Twine(formalArchStr) + "_v0p10").str();
- if (getFeatureBits(RISCV::FeatureExtZfh))
- formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str();
- if (getFeatureBits(RISCV::FeatureExtZba))
- formalArchStr = (Twine(formalArchStr) + "_zba0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbb))
- formalArchStr = (Twine(formalArchStr) + "_zbb0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbc))
- formalArchStr = (Twine(formalArchStr) + "_zbc0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbe))
- formalArchStr = (Twine(formalArchStr) + "_zbe0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbf))
- formalArchStr = (Twine(formalArchStr) + "_zbf0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbm))
- formalArchStr = (Twine(formalArchStr) + "_zbm0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbp))
- formalArchStr = (Twine(formalArchStr) + "_zbp0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbproposedc))
- formalArchStr = (Twine(formalArchStr) + "_zbproposedc0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbr))
- formalArchStr = (Twine(formalArchStr) + "_zbr0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbs))
- formalArchStr = (Twine(formalArchStr) + "_zbs0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZbt))
- formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str();
- if (getFeatureBits(RISCV::FeatureExtZvamo))
- formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str();
- if (getFeatureBits(RISCV::FeatureStdExtZvlsseg))
- formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str();
-
- getTargetStreamer().emitTextAttribute(Tag, formalArchStr);
+ std::vector<std::string> FeatureVector;
+ RISCVFeatures::toFeatureVector(FeatureVector, getSTI().getFeatureBits());
+
+ // Parse that by RISCVISAInfo->
+ unsigned XLen = getFeatureBits(RISCV::Feature64Bit) ? 64 : 32;
+ auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector);
+ if (!ParseResult) {
+ std::string Buffer;
+ raw_string_ostream OutputErrMsg(Buffer);
+ handleAllErrors(ParseResult.takeError(),
+ [&](llvm::StringError &ErrMsg) {
+ OutputErrMsg << ErrMsg.getMessage();
+ });
+
+ return Error(ValueExprLoc, OutputErrMsg.str());
+ }
+ auto &ISAInfo = *ParseResult;
+
+ // Then emit the arch string.
+ getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString());
}
}
return false;
}
+/// parseDirectiveInsn
+/// ::= .insn [ format encoding, (operands (, operands)*) ]
+bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+
+ // Expect instruction format as identifier.
+ StringRef Format;
+ SMLoc ErrorLoc = Parser.getTok().getLoc();
+ if (Parser.parseIdentifier(Format))
+ return Error(ErrorLoc, "expected instruction format");
+
+ if (Format != "r" && Format != "r4" && Format != "i" && Format != "b" &&
+ Format != "sb" && Format != "u" && Format != "j" && Format != "uj" &&
+ Format != "s")
+ return Error(ErrorLoc, "invalid instruction format");
+
+ std::string FormatName = (".insn_" + Format).str();
+
+ ParseInstructionInfo Info;
+ SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands;
+
+ if (ParseInstruction(Info, FormatName, L, Operands))
+ return true;
+
+ unsigned Opcode;
+ uint64_t ErrorInfo;
+ return MatchAndEmitInstruction(L, Opcode, Operands, Parser.getStreamer(),
+ ErrorInfo,
+ /*MatchingInlineAsm=*/false);
+}
+
void RISCVAsmParser::emitToStreamer(MCStreamer &S, const MCInst &Inst) {
MCInst CInst;
bool Res = compressInst(CInst, Inst, getSTI(), S.getContext());
@@ -2223,6 +2181,11 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
.addReg(DestReg)
.addReg(SrcReg)
.addReg(RISCV::X0));
+ } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
+ Inst.Opc == RISCV::SH3ADD) {
+ emitToStreamer(
+ Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addReg(
+ SrcReg));
} else {
emitToStreamer(
Out, MCInstBuilder(Inst.Opc).addReg(DestReg).addReg(SrcReg).addImm(
@@ -2339,10 +2302,10 @@ void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode,
//
// TmpLabel: AUIPC tmp, %pcrel_hi(symbol)
// [S|L]X rd, %pcrel_lo(TmpLabel)(tmp)
- MCOperand DestReg = Inst.getOperand(0);
+ unsigned DestRegOpIdx = HasTmpReg ? 1 : 0;
+ MCOperand DestReg = Inst.getOperand(DestRegOpIdx);
unsigned SymbolOpIdx = HasTmpReg ? 2 : 1;
- unsigned TmpRegOpIdx = HasTmpReg ? 1 : 0;
- MCOperand TmpReg = Inst.getOperand(TmpRegOpIdx);
+ MCOperand TmpReg = Inst.getOperand(0);
const MCExpr *Symbol = Inst.getOperand(SymbolOpIdx).getExpr();
emitAuipcInstPair(DestReg, TmpReg, Symbol, RISCVMCExpr::VK_RISCV_PCREL_HI,
Opcode, IDLoc, Out);
@@ -2414,7 +2377,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
// masked va >= x, vd == v0
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt
+ // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
assert(Inst.getOperand(0).getReg() == RISCV::V0 &&
"The destination register should be V0.");
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
@@ -2424,7 +2387,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
.addOperand(Inst.getOperand(4)));
- emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM)
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(1)));
@@ -2432,7 +2395,7 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
// masked va >= x, any vd
//
// pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt
- // expansion: vmslt{u}.vx vt, va, x; vmandnot.mm vt, v0, vt; vmandnot.mm vd,
+ // expansion: vmslt{u}.vx vt, va, x; vmandn.mm vt, v0, vt; vmandn.mm vd,
// vd, v0; vmor.mm vd, vt, vd
assert(Inst.getOperand(1).getReg() != RISCV::V0 &&
"The temporary vector register should not be V0.");
@@ -2441,11 +2404,11 @@ void RISCVAsmParser::emitVMSGE(MCInst &Inst, unsigned Opcode, SMLoc IDLoc,
.addOperand(Inst.getOperand(2))
.addOperand(Inst.getOperand(3))
.addReg(RISCV::NoRegister));
- emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM)
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(1))
.addReg(RISCV::V0)
.addOperand(Inst.getOperand(1)));
- emitToStreamer(Out, MCInstBuilder(RISCV::VMANDNOT_MM)
+ emitToStreamer(Out, MCInstBuilder(RISCV::VMANDN_MM)
.addOperand(Inst.getOperand(0))
.addOperand(Inst.getOperand(0))
.addReg(RISCV::V0));
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 504a78d91f32..ff96b2b254ca 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -20,8 +20,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -449,19 +449,6 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- if (STI.getFeatureBits()[RISCV::FeatureExtZbproposedc] &&
- STI.getFeatureBits()[RISCV::FeatureStdExtC]) {
- LLVM_DEBUG(
- dbgs() << "Trying RVBC32 table (BitManip 16-bit Instruction):\n");
- // Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableRVBC16, MI, Insn, Address,
- this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 2;
- return Result;
- }
- }
-
LLVM_DEBUG(dbgs() << "Trying RISCV_C table (16-bit Instruction):\n");
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTable16, MI, Insn, Address, this, STI);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index b93197e713e5..514789b3f645 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -352,8 +352,9 @@ bool RISCVAsmBackend::mayNeedRelaxation(const MCInst &Inst,
return getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode();
}
-bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
- bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC];
+bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
+ bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC];
unsigned MinNopLen = HasStdExtC ? 2 : 4;
if ((Count % MinNopLen) != 0)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index e1628673419a..f04d2912f09d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -99,7 +99,8 @@ public:
bool relaxDwarfCFA(MCDwarfCallFrameFragment &DF, MCAsmLayout &Layout,
bool &WasRelaxed) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
RISCVABI::ABI getTargetABI() const { return TargetABI; }
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 60e86093d9f4..0aba18b20f0d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -14,9 +14,14 @@
#include "RISCVBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/RISCVISAInfo.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
+
+extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
+
namespace RISCVSysReg {
#define GET_SysRegsList_IMPL
#include "RISCVGenSearchableTables.inc"
@@ -96,6 +101,15 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
report_fatal_error("RV32E can't be enabled for an RV64 target");
}
+void toFeatureVector(std::vector<std::string> &FeatureVector,
+ const FeatureBitset &FeatureBits) {
+ for (auto Feature : RISCVFeatureKV) {
+ if (FeatureBits[Feature.Value] &&
+ llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
+ FeatureVector.push_back(std::string("+") + Feature.Key);
+ }
+}
+
} // namespace RISCVFeatures
// Encode VTYPE into the binary format used by the the VSETVLI instruction which
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 9bdd2003cb15..d8f4403c824f 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -68,14 +68,25 @@ enum {
HasMergeOpMask = 1 << HasMergeOpShift,
// Does this instruction have a SEW operand. It will be the last explicit
- // operand. Used by RVV Pseudos.
+ // operand unless there is a vector policy operand. Used by RVV Pseudos.
HasSEWOpShift = HasMergeOpShift + 1,
HasSEWOpMask = 1 << HasSEWOpShift,
// Does this instruction have a VL operand. It will be the second to last
- // explicit operand. Used by RVV Pseudos.
+ // explicit operand unless there is a vector policy operand. Used by RVV
+ // Pseudos.
HasVLOpShift = HasSEWOpShift + 1,
HasVLOpMask = 1 << HasVLOpShift,
+
+ // Does this instruction have a vector policy operand. It will be the last
+ // explicit operand. Used by RVV Pseudos.
+ HasVecPolicyOpShift = HasVLOpShift + 1,
+ HasVecPolicyOpMask = 1 << HasVecPolicyOpShift,
+
+ // Is this instruction a vector widening reduction instruction. Used by RVV
+ // Pseudos.
+ IsRVVWideningReductionShift = HasVecPolicyOpShift + 1,
+ IsRVVWideningReductionMask = 1 << IsRVVWideningReductionShift,
};
// Match with the definitions in RISCVInstrFormatsV.td
@@ -97,6 +108,11 @@ enum VLMUL : uint8_t {
LMUL_F2
};
+enum {
+ TAIL_UNDISTURBED = 0,
+ TAIL_AGNOSTIC = 1,
+};
+
// Helper functions to read TSFlags.
/// \returns the format of the instruction.
static inline unsigned getFormat(uint64_t TSFlags) {
@@ -131,6 +147,14 @@ static inline bool hasSEWOp(uint64_t TSFlags) {
static inline bool hasVLOp(uint64_t TSFlags) {
return TSFlags & HasVLOpMask;
}
+/// \returns true if there is a vector policy operand for this instruction.
+static inline bool hasVecPolicyOp(uint64_t TSFlags) {
+ return TSFlags & HasVecPolicyOpMask;
+}
+/// \returns true if it is a vector widening reduction instruction.
+static inline bool isRVVWideningReduction(uint64_t TSFlags) {
+ return TSFlags & IsRVVWideningReductionMask;
+}
// RISC-V Specific Machine Operand Flags
enum {
@@ -158,8 +182,11 @@ enum {
namespace RISCVOp {
enum OperandType : unsigned {
OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_UIMM4 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM2 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM3,
+ OPERAND_UIMM4,
OPERAND_UIMM5,
+ OPERAND_UIMM7,
OPERAND_UIMM12,
OPERAND_SIMM12,
OPERAND_UIMM20,
@@ -306,6 +333,10 @@ namespace RISCVFeatures {
// triple. Exits with report_fatal_error if not.
void validate(const Triple &TT, const FeatureBitset &FeatureBits);
+// Convert FeatureBitset to FeatureVector.
+void toFeatureVector(std::vector<std::string> &FeatureVector,
+ const FeatureBitset &FeatureBits);
+
} // namespace RISCVFeatures
namespace RISCVVType {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 1ef276b10100..14d0191a505f 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -358,7 +358,7 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
}
} else if (Kind == MCExpr::SymbolRef &&
cast<MCSymbolRefExpr>(Expr)->getKind() == MCSymbolRefExpr::VK_None) {
- if (Desc.getOpcode() == RISCV::JAL) {
+ if (MIFrm == RISCVII::InstFormatJ) {
FixupKind = RISCV::fixup_riscv_jal;
} else if (MIFrm == RISCVII::InstFormatB) {
FixupKind = RISCV::fixup_riscv_branch;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
new file mode 100644
index 000000000000..9c9d9221578c
--- /dev/null
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
@@ -0,0 +1,22 @@
+//===-- RISCVMCObjectFileInfo.cpp - RISCV object file properties ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the RISCVMCObjectFileInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMCObjectFileInfo.h"
+#include "RISCVMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+
+using namespace llvm;
+
+unsigned RISCVMCObjectFileInfo::getTextSectionAlignment() const {
+ const MCSubtargetInfo *STI = getContext().getSubtargetInfo();
+ return STI->hasFeature(RISCV::FeatureStdExtC) ? 2 : 4;
+}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h
new file mode 100644
index 000000000000..2f6b10229864
--- /dev/null
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h
@@ -0,0 +1,27 @@
+//===-- RISCVMCObjectFileInfo.h - RISCV object file Info -------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the RISCVMCObjectFileInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCOBJECTFILEINFO_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCOBJECTFILEINFO_H
+
+#include "llvm/MC/MCObjectFileInfo.h"
+
+namespace llvm {
+
+class RISCVMCObjectFileInfo : public MCObjectFileInfo {
+public:
+ unsigned getTextSectionAlignment() const override;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index 38c32539833c..07c2be624932 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -15,6 +15,7 @@
#include "RISCVELFStreamer.h"
#include "RISCVInstPrinter.h"
#include "RISCVMCAsmInfo.h"
+#include "RISCVMCObjectFileInfo.h"
#include "RISCVTargetStreamer.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
@@ -23,12 +24,13 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "RISCVGenInstrInfo.inc"
@@ -65,6 +67,14 @@ static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
return MAI;
}
+static MCObjectFileInfo *
+createRISCVMCObjectFileInfo(MCContext &Ctx, bool PIC,
+ bool LargeCodeModel = false) {
+ MCObjectFileInfo *MOFI = new RISCVMCObjectFileInfo();
+ MOFI->initMCObjectFileInfo(Ctx, PIC, LargeCodeModel);
+ return MOFI;
+}
+
static MCSubtargetInfo *createRISCVMCSubtargetInfo(const Triple &TT,
StringRef CPU, StringRef FS) {
if (CPU.empty())
@@ -155,6 +165,7 @@ MCStreamer *createRISCVELFStreamer(const Triple &T, MCContext &Context,
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMC() {
for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
TargetRegistry::RegisterMCAsmInfo(*T, createRISCVMCAsmInfo);
+ TargetRegistry::RegisterMCObjectFileInfo(*T, createRISCVMCObjectFileInfo);
TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(*T, createRISCVMCRegisterInfo);
TargetRegistry::RegisterMCAsmBackend(*T, createRISCVAsmBackend);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 2ca5eeb8392e..0ee6d8de78c9 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -20,7 +20,8 @@ static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
for (auto Instr : Res) {
bool Compressed;
switch (Instr.Opc) {
- default: llvm_unreachable("Unexpected opcode");
+ default:
+ llvm_unreachable("Unexpected opcode");
case RISCV::SLLI:
case RISCV::SRLI:
Compressed = true;
@@ -77,7 +78,7 @@ static void generateInstSeqImpl(int64_t Val,
assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
// In the worst case, for a full 64-bit constant, a sequence of 8 instructions
- // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emmitted. Note
+ // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
// that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
// while the following ADDI instructions contribute up to 12 bits each.
//
@@ -106,15 +107,36 @@ static void generateInstSeqImpl(int64_t Val,
// If the remaining bits don't fit in 12 bits, we might be able to reduce the
// shift amount in order to use LUI which will zero the lower 12 bits.
- if (ShiftAmount > 12 && !isInt<12>(Hi52) && isInt<32>((uint64_t)Hi52 << 12)) {
- // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
- ShiftAmount -= 12;
- Hi52 = (uint64_t)Hi52 << 12;
+ bool Unsigned = false;
+ if (ShiftAmount > 12 && !isInt<12>(Hi52)) {
+ if (isInt<32>((uint64_t)Hi52 << 12)) {
+ // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
+ ShiftAmount -= 12;
+ Hi52 = (uint64_t)Hi52 << 12;
+ } else if (isUInt<32>((uint64_t)Hi52 << 12) &&
+ ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ // Reduce the shift amount and add zeros to the LSBs so it will match
+ // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
+ ShiftAmount -= 12;
+ Hi52 = ((uint64_t)Hi52 << 12) | (0xffffffffull << 32);
+ Unsigned = true;
+ }
+ }
+
+ // Try to use SLLIUW for Hi52 when it is uint32 but not int32.
+ if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) &&
+ ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with SLLIUW.
+ Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32);
+ Unsigned = true;
}
generateInstSeqImpl(Hi52, ActiveFeatures, Res);
- Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
+ if (Unsigned)
+ Res.push_back(RISCVMatInt::Inst(RISCV::SLLIUW, ShiftAmount));
+ else
+ Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
if (Lo12)
Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
}
@@ -165,7 +187,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// If we have exactly 32 leading zeros and Zba, we can try using zext.w at
// the end of the sequence.
- if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureExtZba]) {
+ if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
// Try replacing upper bits with 1.
uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
TmpSeq.clear();
@@ -182,12 +204,119 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
}
}
+ // Perform optimization with BCLRI/BSETI in the Zbs extension.
+ if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
+ assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ "Expected RV32 to only need 2 instructions");
+
+ // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
+ // call generateInstSeqImpl with Val|0x80000000 (which is expected be
+ // an int32), then emit (BCLRI r, 31).
+ // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
+ // with Val&~0x80000000 (which is expected to be an int32), then
+ // emit (BSETI r, 31).
+ int64_t NewVal;
+ unsigned Opc;
+ if (Val < 0) {
+ Opc = RISCV::BCLRI;
+ NewVal = Val | 0x80000000ll;
+ } else {
+ Opc = RISCV::BSETI;
+ NewVal = Val & ~0x80000000ll;
+ }
+ if (isInt<32>(NewVal)) {
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, 31));
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+
+ // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
+ // negative int32, or use BSETI for upper 32 bits if the original lower
+ // 32 bits are positive int32.
+ int32_t Lo = Val;
+ uint32_t Hi = Val >> 32;
+ Opc = 0;
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
+ // Check if it is profitable to use BCLRI/BSETI.
+ if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
+ Opc = RISCV::BSETI;
+ } else if (Lo < 0 && TmpSeq.size() + countPopulation(~Hi) < Res.size()) {
+ Opc = RISCV::BCLRI;
+ Hi = ~Hi;
+ }
+ // Search for each bit and build corresponding BCLRI/BSETI.
+ if (Opc > 0) {
+ while (Hi != 0) {
+ unsigned Bit = countTrailingZeros(Hi);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, Bit + 32));
+ Hi &= ~(1 << Bit);
+ }
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+ }
+
+ // Perform optimization with SH*ADD in the Zba extension.
+ if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
+ assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ "Expected RV32 to only need 2 instructions");
+ int64_t Div = 0;
+ unsigned Opc = 0;
+ RISCVMatInt::InstSeq TmpSeq;
+ // Select the opcode and divisor.
+ if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
+ Div = 3;
+ Opc = RISCV::SH1ADD;
+ } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
+ Div = 5;
+ Opc = RISCV::SH2ADD;
+ } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
+ Div = 9;
+ Opc = RISCV::SH3ADD;
+ }
+ // Build the new instruction sequence.
+ if (Div > 0) {
+ generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+ // Try to use LUI+SH*ADD+ADDI.
+ int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
+ int64_t Lo12 = SignExtend64<12>(Val);
+ Div = 0;
+ if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
+ Div = 3;
+ Opc = RISCV::SH1ADD;
+ } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
+ Div = 5;
+ Opc = RISCV::SH2ADD;
+ } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
+ Div = 9;
+ Opc = RISCV::SH3ADD;
+ }
+ // Build the new instruction sequence.
+ if (Div > 0) {
+ // For Val that has zero Lo12 (implies Val equals to Hi52) should has
+ // already been processed to LUI+SH*ADD by previous optimization.
+ assert(Lo12 != 0 &&
+ "unexpected instruction sequence for immediate materialisation");
+ generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
+ TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
+ TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
+ if (TmpSeq.size() < Res.size())
+ Res = TmpSeq;
+ }
+ }
+
return Res;
}
int getIntMatCost(const APInt &Val, unsigned Size,
- const FeatureBitset &ActiveFeatures,
- bool CompressionCost) {
+ const FeatureBitset &ActiveFeatures, bool CompressionCost) {
bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
int PlatRegSize = IsRV64 ? 64 : 32;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 13c4b84aa300..2f016374e6a2 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -11,9 +11,11 @@
//===----------------------------------------------------------------------===//
#include "RISCVTargetStreamer.h"
+#include "RISCVBaseInfo.h"
#include "RISCVMCTargetDesc.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/RISCVAttributes.h"
+#include "llvm/Support/RISCVISAInfo.h"
using namespace llvm;
@@ -43,57 +45,19 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
else
emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16);
- std::string Arch = "rv32";
- if (STI.hasFeature(RISCV::Feature64Bit))
- Arch = "rv64";
- if (STI.hasFeature(RISCV::FeatureRV32E))
- Arch += "e1p9";
- else
- Arch += "i2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtM))
- Arch += "_m2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtA))
- Arch += "_a2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtF))
- Arch += "_f2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtD))
- Arch += "_d2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtC))
- Arch += "_c2p0";
- if (STI.hasFeature(RISCV::FeatureStdExtB))
- Arch += "_b0p93";
- if (STI.hasFeature(RISCV::FeatureStdExtV))
- Arch += "_v0p10";
- if (STI.hasFeature(RISCV::FeatureExtZfh))
- Arch += "_zfh0p1";
- if (STI.hasFeature(RISCV::FeatureExtZba))
- Arch += "_zba0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbb))
- Arch += "_zbb0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbc))
- Arch += "_zbc0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbe))
- Arch += "_zbe0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbf))
- Arch += "_zbf0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbm))
- Arch += "_zbm0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbp))
- Arch += "_zbp0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbproposedc))
- Arch += "_zbproposedc0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbr))
- Arch += "_zbr0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbs))
- Arch += "_zbs0p93";
- if (STI.hasFeature(RISCV::FeatureExtZbt))
- Arch += "_zbt0p93";
- if (STI.hasFeature(RISCV::FeatureExtZvamo))
- Arch += "_zvamo0p10";
- if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg))
- Arch += "_zvlsseg0p10";
-
- emitTextAttribute(RISCVAttrs::ARCH, Arch);
+ unsigned XLen = STI.hasFeature(RISCV::Feature64Bit) ? 64 : 32;
+ std::vector<std::string> FeatureVector;
+ RISCVFeatures::toFeatureVector(FeatureVector, STI.getFeatureBits());
+
+ auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector);
+ if (!ParseResult) {
+ /* Assume any error about features should handled earlier. */
+ consumeError(ParseResult.takeError());
+ llvm_unreachable("Parsing feature error when emitTargetAttributes?");
+ } else {
+ auto &ISAInfo = *ParseResult;
+ emitTextAttribute(RISCVAttrs::ARCH, ISAInfo->toString());
+ }
}
// This part is for ascii assembly output
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index ef1f97067e12..b415c9f35e7f 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -37,6 +37,9 @@ bool LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
+FunctionPass *createRISCVGatherScatterLoweringPass();
+void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
+
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 52e8d8cdc774..772a4f8ecd53 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -41,12 +41,20 @@ def HasStdExtD : Predicate<"Subtarget->hasStdExtD()">,
AssemblerPredicate<(all_of FeatureStdExtD),
"'D' (Double-Precision Floating-Point)">;
-def FeatureExtZfh
+def FeatureStdExtZfhmin
+ : SubtargetFeature<"experimental-zfhmin", "HasStdExtZfhmin", "true",
+ "'Zfhmin' (Half-Precision Floating-Point Minimal)",
+ [FeatureStdExtF]>;
+def HasStdExtZfhmin : Predicate<"Subtarget->hasStdExtZfhmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfhmin),
+ "'Zfhmin' (Half-Precision Floating-Point Minimal)">;
+
+def FeatureStdExtZfh
: SubtargetFeature<"experimental-zfh", "HasStdExtZfh", "true",
"'Zfh' (Half-Precision Floating-Point)",
- [FeatureStdExtF]>;
+ [FeatureStdExtZfhmin, FeatureStdExtF]>;
def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
- AssemblerPredicate<(all_of FeatureExtZfh),
+ AssemblerPredicate<(all_of FeatureStdExtZfh),
"'Zfh' (Half-Precision Floating-Point)">;
def FeatureStdExtC
@@ -56,109 +64,85 @@ def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
AssemblerPredicate<(all_of FeatureStdExtC),
"'C' (Compressed Instructions)">;
-def FeatureExtZba
+def FeatureStdExtZba
: SubtargetFeature<"experimental-zba", "HasStdExtZba", "true",
"'Zba' (Address calculation 'B' Instructions)">;
def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">,
- AssemblerPredicate<(all_of FeatureExtZba),
+ AssemblerPredicate<(all_of FeatureStdExtZba),
"'Zba' (Address calculation 'B' Instructions)">;
def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">;
-def FeatureExtZbb
+def FeatureStdExtZbb
: SubtargetFeature<"experimental-zbb", "HasStdExtZbb", "true",
"'Zbb' (Base 'B' Instructions)">;
def HasStdExtZbb : Predicate<"Subtarget->hasStdExtZbb()">,
- AssemblerPredicate<(all_of FeatureExtZbb),
+ AssemblerPredicate<(all_of FeatureStdExtZbb),
"'Zbb' (Base 'B' Instructions)">;
-def FeatureExtZbc
+def FeatureStdExtZbc
: SubtargetFeature<"experimental-zbc", "HasStdExtZbc", "true",
"'Zbc' (Carry-Less 'B' Instructions)">;
def HasStdExtZbc : Predicate<"Subtarget->hasStdExtZbc()">,
- AssemblerPredicate<(all_of FeatureExtZbc),
+ AssemblerPredicate<(all_of FeatureStdExtZbc),
"'Zbc' (Carry-Less 'B' Instructions)">;
-def FeatureExtZbe
+def FeatureStdExtZbe
: SubtargetFeature<"experimental-zbe", "HasStdExtZbe", "true",
"'Zbe' (Extract-Deposit 'B' Instructions)">;
def HasStdExtZbe : Predicate<"Subtarget->hasStdExtZbe()">,
- AssemblerPredicate<(all_of FeatureExtZbe),
+ AssemblerPredicate<(all_of FeatureStdExtZbe),
"'Zbe' (Extract-Deposit 'B' Instructions)">;
-def FeatureExtZbf
+def FeatureStdExtZbf
: SubtargetFeature<"experimental-zbf", "HasStdExtZbf", "true",
"'Zbf' (Bit-Field 'B' Instructions)">;
def HasStdExtZbf : Predicate<"Subtarget->hasStdExtZbf()">,
- AssemblerPredicate<(all_of FeatureExtZbf),
+ AssemblerPredicate<(all_of FeatureStdExtZbf),
"'Zbf' (Bit-Field 'B' Instructions)">;
-def FeatureExtZbm
+def FeatureStdExtZbm
: SubtargetFeature<"experimental-zbm", "HasStdExtZbm", "true",
"'Zbm' (Matrix 'B' Instructions)">;
def HasStdExtZbm : Predicate<"Subtarget->hasStdExtZbm()">,
- AssemblerPredicate<(all_of FeatureExtZbm),
+ AssemblerPredicate<(all_of FeatureStdExtZbm),
"'Zbm' (Matrix 'B' Instructions)">;
-def FeatureExtZbp
+def FeatureStdExtZbp
: SubtargetFeature<"experimental-zbp", "HasStdExtZbp", "true",
"'Zbp' (Permutation 'B' Instructions)">;
def HasStdExtZbp : Predicate<"Subtarget->hasStdExtZbp()">,
- AssemblerPredicate<(all_of FeatureExtZbp),
+ AssemblerPredicate<(all_of FeatureStdExtZbp),
"'Zbp' (Permutation 'B' Instructions)">;
-def FeatureExtZbr
+def FeatureStdExtZbr
: SubtargetFeature<"experimental-zbr", "HasStdExtZbr", "true",
"'Zbr' (Polynomial Reduction 'B' Instructions)">;
def HasStdExtZbr : Predicate<"Subtarget->hasStdExtZbr()">,
- AssemblerPredicate<(all_of FeatureExtZbr),
+ AssemblerPredicate<(all_of FeatureStdExtZbr),
"'Zbr' (Polynomial Reduction 'B' Instructions)">;
-def FeatureExtZbs
+def FeatureStdExtZbs
: SubtargetFeature<"experimental-zbs", "HasStdExtZbs", "true",
"'Zbs' (Single-Bit 'B' Instructions)">;
def HasStdExtZbs : Predicate<"Subtarget->hasStdExtZbs()">,
- AssemblerPredicate<(all_of FeatureExtZbs),
+ AssemblerPredicate<(all_of FeatureStdExtZbs),
"'Zbs' (Single-Bit 'B' Instructions)">;
-def FeatureExtZbt
+def FeatureStdExtZbt
: SubtargetFeature<"experimental-zbt", "HasStdExtZbt", "true",
"'Zbt' (Ternary 'B' Instructions)">;
def HasStdExtZbt : Predicate<"Subtarget->hasStdExtZbt()">,
- AssemblerPredicate<(all_of FeatureExtZbt),
+ AssemblerPredicate<(all_of FeatureStdExtZbt),
"'Zbt' (Ternary 'B' Instructions)">;
// Some instructions belong to both the basic and the permutation
// subextensions. They should be enabled if either has been specified.
def HasStdExtZbbOrZbp
: Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()">,
- AssemblerPredicate<(any_of FeatureExtZbb, FeatureExtZbp),
+ AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp),
"'Zbb' (Base 'B' Instructions) or "
"'Zbp' (Permutation 'B' Instructions)">;
-def FeatureExtZbproposedc
- : SubtargetFeature<"experimental-zbproposedc", "HasStdExtZbproposedc", "true",
- "'Zbproposedc' (Proposed Compressed 'B' Instructions)">;
-def HasStdExtZbproposedc : Predicate<"Subtarget->hasStdExtZbproposedc()">,
- AssemblerPredicate<(all_of FeatureExtZbproposedc),
- "'Zbproposedc' (Proposed Compressed 'B' Instructions)">;
-
-def FeatureStdExtB
- : SubtargetFeature<"experimental-b", "HasStdExtB", "true",
- "'B' (Bit Manipulation Instructions)",
- [FeatureExtZba,
- FeatureExtZbb,
- FeatureExtZbc,
- FeatureExtZbe,
- FeatureExtZbf,
- FeatureExtZbm,
- FeatureExtZbp,
- FeatureExtZbr,
- FeatureExtZbs,
- FeatureExtZbt]>;
-def HasStdExtB : Predicate<"Subtarget->hasStdExtB()">,
- AssemblerPredicate<(all_of FeatureStdExtB),
- "'B' (Bit Manipulation Instructions)">;
-
def FeatureNoRVCHints
: SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false",
"Disable RVC Hint Instructions.">;
@@ -173,6 +157,9 @@ def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">,
AssemblerPredicate<(all_of FeatureStdExtV),
"'V' (Vector Instructions)">;
+def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">;
+def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">;
+
def FeatureStdExtZvlsseg
: SubtargetFeature<"experimental-zvlsseg", "HasStdExtZvlsseg", "true",
"'Zvlsseg' (Vector segment load/store instructions)",
@@ -181,12 +168,12 @@ def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">,
AssemblerPredicate<(all_of FeatureStdExtZvlsseg),
"'Zvlsseg' (Vector segment load/store instructions)">;
-def FeatureExtZvamo
+def FeatureStdExtZvamo
: SubtargetFeature<"experimental-zvamo", "HasStdExtZvamo", "true",
"'Zvamo' (Vector AMO Operations)",
[FeatureStdExtV]>;
def HasStdExtZvamo : Predicate<"Subtarget->hasStdExtZvamo()">,
- AssemblerPredicate<(all_of FeatureExtZvamo),
+ AssemblerPredicate<(all_of FeatureStdExtZvamo),
"'Zvamo' (Vector AMO Operations)">;
def Feature64Bit
@@ -250,22 +237,63 @@ def : ProcessorModel<"rocket-rv64", RocketModel, [Feature64Bit]>;
def : ProcessorModel<"sifive-7-rv32", SiFive7Model, []>;
def : ProcessorModel<"sifive-7-rv64", SiFive7Model, [Feature64Bit]>;
+def : ProcessorModel<"sifive-e20", RocketModel, [FeatureStdExtM,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-e21", RocketModel, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-e24", RocketModel, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtC]>;
+
def : ProcessorModel<"sifive-e31", RocketModel, [FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtC]>;
-def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
+def : ProcessorModel<"sifive-e34", RocketModel, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-s21", RocketModel, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-s51", RocketModel, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-s54", RocketModel, [Feature64Bit,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
FeatureStdExtD,
FeatureStdExtC]>;
-def : ProcessorModel<"sifive-e76", SiFive7Model, [FeatureStdExtM,
+def : ProcessorModel<"sifive-s76", SiFive7Model, [Feature64Bit,
+ FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
+ FeatureStdExtD,
FeatureStdExtC]>;
+def : ProcessorModel<"sifive-u54", RocketModel, [Feature64Bit,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC]>;
+
def : ProcessorModel<"sifive-u74", SiFive7Model, [Feature64Bit,
FeatureStdExtM,
FeatureStdExtA,
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index bdf30f8eb1b3..9fed6e7baadc 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -27,7 +27,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 31ef752967cc..80340ee81509 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -105,6 +105,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoLA_TLS_GD:
return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
+ case RISCV::PseudoVSETVLIX0:
case RISCV::PseudoVSETIVLI:
return expandVSetVL(MBB, MBBI);
case RISCV::PseudoVMCLR_M_B1:
@@ -246,13 +247,14 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
DebugLoc DL = MBBI->getDebugLoc();
assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+ MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
MBBI->getOpcode() == RISCV::PseudoVSETIVLI) &&
"Unexpected pseudo instruction");
unsigned Opcode;
- if (MBBI->getOpcode() == RISCV::PseudoVSETVLI)
- Opcode = RISCV::VSETVLI;
- else
+ if (MBBI->getOpcode() == RISCV::PseudoVSETIVLI)
Opcode = RISCV::VSETIVLI;
+ else
+ Opcode = RISCV::VSETVLI;
const MCInstrDesc &Desc = TII->get(Opcode);
assert(Desc.getNumOperands() == 3 && "Unexpected instruction format");
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 188bd49595a5..595c3cdfbb1d 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -220,6 +220,10 @@ getRestoreLibCallName(const MachineFunction &MF,
return RestoreLibCalls[LibCallID];
}
+// Return true if the specified function should have a dedicated frame
+// pointer register. This is true if frame pointer elimination is
+// disabled, if it needs dynamic stack realignment, if the function has
+// variable sized allocas, or if the frame address is taken.
bool RISCVFrameLowering::hasFP(const MachineFunction &MF) const {
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -671,15 +675,15 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- BP
@@ -696,15 +700,15 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |-- MFI.getStackSize()
// | RVV objects | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- SP
@@ -749,15 +753,15 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// |--------------------------| -- |
// | Padding after RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | RVV objects | | |-- MFI.getStackSize()
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | Padding before RVV | | |
// | (not counted in | | |
- // | MFI.getStackSize() | | |
+ // | MFI.getStackSize()) | | |
// |--------------------------| -- |
// | scalar local variables | | <----'
// |--------------------------| -- <-- SP
@@ -767,8 +771,10 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// objects to 8 bytes.
if (MFI.getStackID(FI) == TargetStackID::Default) {
if (MFI.isFixedObjectIndex(FI)) {
- Offset += StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding()
- + RVFI->getLibCallStackSize(), RVFI->getRVVStackSize());
+ Offset +=
+ StackOffset::get(MFI.getStackSize() + RVFI->getRVVPadding() +
+ RVFI->getLibCallStackSize(),
+ RVFI->getRVVStackSize());
} else {
Offset += StackOffset::getFixed(MFI.getStackSize());
}
@@ -860,7 +866,7 @@ RISCVFrameLowering::assignRVVStackObjectOffsets(MachineFrameInfo &MFI) const {
}
static bool hasRVVSpillWithFIs(MachineFunction &MF, const RISCVInstrInfo &TII) {
- if (!MF.getSubtarget<RISCVSubtarget>().hasStdExtV())
+ if (!MF.getSubtarget<RISCVSubtarget>().hasVInstructions())
return false;
return any_of(MF, [&TII](const MachineBasicBlock &MBB) {
return any_of(MBB, [&TII](const MachineInstr &MI) {
@@ -1040,7 +1046,8 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
// Insert the spill to the stack frame.
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CS.getFrameIdx(), RC, TRI);
+ TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
+ RC, TRI);
}
return true;
@@ -1087,6 +1094,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
return true;
}
+bool RISCVFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ // Keep the conventional code flow when not optimizing.
+ if (MF.getFunction().hasOptNone())
+ return false;
+
+ return true;
+}
+
bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const MachineFunction *MF = MBB.getParent();
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index bc3ace786272..1e94e34acf2f 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -65,6 +65,8 @@ public:
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
bool isSupportedStackID(TargetStackID::Value ID) const override;
TargetStackID::Value getStackIDForScalableVectors() const override;
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
new file mode 100644
index 000000000000..d47bd739235f
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -0,0 +1,475 @@
+//===- RISCVGatherScatterLowering.cpp - Gather/Scatter lowering -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass custom lowers llvm.gather and llvm.scatter instructions to
+// RISCV intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-gather-scatter-lowering"
+
+namespace {
+
+class RISCVGatherScatterLowering : public FunctionPass {
+ const RISCVSubtarget *ST = nullptr;
+ const RISCVTargetLowering *TLI = nullptr;
+ LoopInfo *LI = nullptr;
+ const DataLayout *DL = nullptr;
+
+ SmallVector<WeakTrackingVH> MaybeDeadPHIs;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ RISCVGatherScatterLowering() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<TargetPassConfig>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ }
+
+ StringRef getPassName() const override {
+ return "RISCV gather/scatter lowering";
+ }
+
+private:
+ bool isLegalTypeAndAlignment(Type *DataType, Value *AlignOp);
+
+ bool tryCreateStridedLoadStore(IntrinsicInst *II, Type *DataType, Value *Ptr,
+ Value *AlignOp);
+
+ std::pair<Value *, Value *> determineBaseAndStride(GetElementPtrInst *GEP,
+ IRBuilder<> &Builder);
+
+ bool matchStridedRecurrence(Value *Index, Loop *L, Value *&Stride,
+ PHINode *&BasePtr, BinaryOperator *&Inc,
+ IRBuilder<> &Builder);
+};
+
+} // end anonymous namespace
+
+char RISCVGatherScatterLowering::ID = 0;
+
+INITIALIZE_PASS(RISCVGatherScatterLowering, DEBUG_TYPE,
+ "RISCV gather/scatter lowering pass", false, false)
+
+FunctionPass *llvm::createRISCVGatherScatterLoweringPass() {
+ return new RISCVGatherScatterLowering();
+}
+
+bool RISCVGatherScatterLowering::isLegalTypeAndAlignment(Type *DataType,
+ Value *AlignOp) {
+ Type *ScalarType = DataType->getScalarType();
+ if (!TLI->isLegalElementTypeForRVV(ScalarType))
+ return false;
+
+ MaybeAlign MA = cast<ConstantInt>(AlignOp)->getMaybeAlignValue();
+ if (MA && MA->value() < DL->getTypeStoreSize(ScalarType).getFixedSize())
+ return false;
+
+ // FIXME: Let the backend type legalize by splitting/widening?
+ EVT DataVT = TLI->getValueType(*DL, DataType);
+ if (!TLI->isTypeLegal(DataVT))
+ return false;
+
+ return true;
+}
+
+// TODO: Should we consider the mask when looking for a stride?
+static std::pair<Value *, Value *> matchStridedConstant(Constant *StartC) {
+ unsigned NumElts = cast<FixedVectorType>(StartC->getType())->getNumElements();
+
+ // Check that the start value is a strided constant.
+ auto *StartVal =
+ dyn_cast_or_null<ConstantInt>(StartC->getAggregateElement((unsigned)0));
+ if (!StartVal)
+ return std::make_pair(nullptr, nullptr);
+ APInt StrideVal(StartVal->getValue().getBitWidth(), 0);
+ ConstantInt *Prev = StartVal;
+ for (unsigned i = 1; i != NumElts; ++i) {
+ auto *C = dyn_cast_or_null<ConstantInt>(StartC->getAggregateElement(i));
+ if (!C)
+ return std::make_pair(nullptr, nullptr);
+
+ APInt LocalStride = C->getValue() - Prev->getValue();
+ if (i == 1)
+ StrideVal = LocalStride;
+ else if (StrideVal != LocalStride)
+ return std::make_pair(nullptr, nullptr);
+
+ Prev = C;
+ }
+
+ Value *Stride = ConstantInt::get(StartVal->getType(), StrideVal);
+
+ return std::make_pair(StartVal, Stride);
+}
+
+// Recursively, walk about the use-def chain until we find a Phi with a strided
+// start value. Build and update a scalar recurrence as we unwind the recursion.
+// We also update the Stride as we unwind. Our goal is to move all of the
+// arithmetic out of the loop.
+bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
+ Value *&Stride,
+ PHINode *&BasePtr,
+ BinaryOperator *&Inc,
+ IRBuilder<> &Builder) {
+ // Our base case is a Phi.
+ if (auto *Phi = dyn_cast<PHINode>(Index)) {
+ // A phi node we want to perform this function on should be from the
+ // loop header.
+ if (Phi->getParent() != L->getHeader())
+ return false;
+
+ Value *Step, *Start;
+ if (!matchSimpleRecurrence(Phi, Inc, Start, Step) ||
+ Inc->getOpcode() != Instruction::Add)
+ return false;
+ assert(Phi->getNumIncomingValues() == 2 && "Expected 2 operand phi.");
+ unsigned IncrementingBlock = Phi->getIncomingValue(0) == Inc ? 0 : 1;
+ assert(Phi->getIncomingValue(IncrementingBlock) == Inc &&
+ "Expected one operand of phi to be Inc");
+
+ // Only proceed if the step is loop invariant.
+ if (!L->isLoopInvariant(Step))
+ return false;
+
+ // Step should be a splat.
+ Step = getSplatValue(Step);
+ if (!Step)
+ return false;
+
+ // Start should be a strided constant.
+ auto *StartC = dyn_cast<Constant>(Start);
+ if (!StartC)
+ return false;
+
+ std::tie(Start, Stride) = matchStridedConstant(StartC);
+ if (!Start)
+ return false;
+ assert(Stride != nullptr);
+
+ // Build scalar phi and increment.
+ BasePtr =
+ PHINode::Create(Start->getType(), 2, Phi->getName() + ".scalar", Phi);
+ Inc = BinaryOperator::CreateAdd(BasePtr, Step, Inc->getName() + ".scalar",
+ Inc);
+ BasePtr->addIncoming(Start, Phi->getIncomingBlock(1 - IncrementingBlock));
+ BasePtr->addIncoming(Inc, Phi->getIncomingBlock(IncrementingBlock));
+
+ // Note that this Phi might be eligible for removal.
+ MaybeDeadPHIs.push_back(Phi);
+ return true;
+ }
+
+ // Otherwise look for binary operator.
+ auto *BO = dyn_cast<BinaryOperator>(Index);
+ if (!BO)
+ return false;
+
+ if (BO->getOpcode() != Instruction::Add &&
+ BO->getOpcode() != Instruction::Or &&
+ BO->getOpcode() != Instruction::Mul &&
+ BO->getOpcode() != Instruction::Shl)
+ return false;
+
+ // Only support shift by constant.
+ if (BO->getOpcode() == Instruction::Shl && !isa<Constant>(BO->getOperand(1)))
+ return false;
+
+ // We need to be able to treat Or as Add.
+ if (BO->getOpcode() == Instruction::Or &&
+ !haveNoCommonBitsSet(BO->getOperand(0), BO->getOperand(1), *DL))
+ return false;
+
+ // We should have one operand in the loop and one splat.
+ Value *OtherOp;
+ if (isa<Instruction>(BO->getOperand(0)) &&
+ L->contains(cast<Instruction>(BO->getOperand(0)))) {
+ Index = cast<Instruction>(BO->getOperand(0));
+ OtherOp = BO->getOperand(1);
+ } else if (isa<Instruction>(BO->getOperand(1)) &&
+ L->contains(cast<Instruction>(BO->getOperand(1)))) {
+ Index = cast<Instruction>(BO->getOperand(1));
+ OtherOp = BO->getOperand(0);
+ } else {
+ return false;
+ }
+
+ // Make sure other op is loop invariant.
+ if (!L->isLoopInvariant(OtherOp))
+ return false;
+
+ // Make sure we have a splat.
+ Value *SplatOp = getSplatValue(OtherOp);
+ if (!SplatOp)
+ return false;
+
+ // Recurse up the use-def chain.
+ if (!matchStridedRecurrence(Index, L, Stride, BasePtr, Inc, Builder))
+ return false;
+
+ // Locate the Step and Start values from the recurrence.
+ unsigned StepIndex = Inc->getOperand(0) == BasePtr ? 1 : 0;
+ unsigned StartBlock = BasePtr->getOperand(0) == Inc ? 1 : 0;
+ Value *Step = Inc->getOperand(StepIndex);
+ Value *Start = BasePtr->getOperand(StartBlock);
+
+ // We need to adjust the start value in the preheader.
+ Builder.SetInsertPoint(
+ BasePtr->getIncomingBlock(StartBlock)->getTerminator());
+ Builder.SetCurrentDebugLocation(DebugLoc());
+
+ switch (BO->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case Instruction::Add:
+ case Instruction::Or: {
+ // An add only affects the start value. It's ok to do this for Or because
+ // we already checked that there are no common set bits.
+
+ // If the start value is Zero, just take the SplatOp.
+ if (isa<ConstantInt>(Start) && cast<ConstantInt>(Start)->isZero())
+ Start = SplatOp;
+ else
+ Start = Builder.CreateAdd(Start, SplatOp, "start");
+ BasePtr->setIncomingValue(StartBlock, Start);
+ break;
+ }
+ case Instruction::Mul: {
+ // If the start is zero we don't need to multiply.
+ if (!isa<ConstantInt>(Start) || !cast<ConstantInt>(Start)->isZero())
+ Start = Builder.CreateMul(Start, SplatOp, "start");
+
+ Step = Builder.CreateMul(Step, SplatOp, "step");
+
+ // If the Stride is 1 just take the SplatOpt.
+ if (isa<ConstantInt>(Stride) && cast<ConstantInt>(Stride)->isOne())
+ Stride = SplatOp;
+ else
+ Stride = Builder.CreateMul(Stride, SplatOp, "stride");
+ Inc->setOperand(StepIndex, Step);
+ BasePtr->setIncomingValue(StartBlock, Start);
+ break;
+ }
+ case Instruction::Shl: {
+ // If the start is zero we don't need to shift.
+ if (!isa<ConstantInt>(Start) || !cast<ConstantInt>(Start)->isZero())
+ Start = Builder.CreateShl(Start, SplatOp, "start");
+ Step = Builder.CreateShl(Step, SplatOp, "step");
+ Stride = Builder.CreateShl(Stride, SplatOp, "stride");
+ Inc->setOperand(StepIndex, Step);
+ BasePtr->setIncomingValue(StartBlock, Start);
+ break;
+ }
+ }
+
+ return true;
+}
+
+std::pair<Value *, Value *>
+RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
+ IRBuilder<> &Builder) {
+
+ SmallVector<Value *, 2> Ops(GEP->operands());
+
+ // Base pointer needs to be a scalar.
+ if (Ops[0]->getType()->isVectorTy())
+ return std::make_pair(nullptr, nullptr);
+
+ // Make sure we're in a loop and it is in loop simplify form.
+ Loop *L = LI->getLoopFor(GEP->getParent());
+ if (!L || !L->isLoopSimplifyForm())
+ return std::make_pair(nullptr, nullptr);
+
+ Optional<unsigned> VecOperand;
+ unsigned TypeScale = 0;
+
+ // Look for a vector operand and scale.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ if (!Ops[i]->getType()->isVectorTy())
+ continue;
+
+ if (VecOperand)
+ return std::make_pair(nullptr, nullptr);
+
+ VecOperand = i;
+
+ TypeSize TS = DL->getTypeAllocSize(GTI.getIndexedType());
+ if (TS.isScalable())
+ return std::make_pair(nullptr, nullptr);
+
+ TypeScale = TS.getFixedSize();
+ }
+
+ // We need to find a vector index to simplify.
+ if (!VecOperand)
+ return std::make_pair(nullptr, nullptr);
+
+ // We can't extract the stride if the arithmetic is done at a different size
+ // than the pointer type. Adding the stride later may not wrap correctly.
+ // Technically we could handle wider indices, but I don't expect that in
+ // practice.
+ Value *VecIndex = Ops[*VecOperand];
+ Type *VecIntPtrTy = DL->getIntPtrType(GEP->getType());
+ if (VecIndex->getType() != VecIntPtrTy)
+ return std::make_pair(nullptr, nullptr);
+
+ Value *Stride;
+ BinaryOperator *Inc;
+ PHINode *BasePhi;
+ if (!matchStridedRecurrence(VecIndex, L, Stride, BasePhi, Inc, Builder))
+ return std::make_pair(nullptr, nullptr);
+
+ assert(BasePhi->getNumIncomingValues() == 2 && "Expected 2 operand phi.");
+ unsigned IncrementingBlock = BasePhi->getOperand(0) == Inc ? 0 : 1;
+ assert(BasePhi->getIncomingValue(IncrementingBlock) == Inc &&
+ "Expected one operand of phi to be Inc");
+
+ Builder.SetInsertPoint(GEP);
+
+ // Replace the vector index with the scalar phi and build a scalar GEP.
+ Ops[*VecOperand] = BasePhi;
+ Type *SourceTy = GEP->getSourceElementType();
+ Value *BasePtr =
+ Builder.CreateGEP(SourceTy, Ops[0], makeArrayRef(Ops).drop_front());
+
+ // Cast the GEP to an i8*.
+ LLVMContext &Ctx = GEP->getContext();
+ Type *I8PtrTy =
+ Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
+ if (BasePtr->getType() != I8PtrTy)
+ BasePtr = Builder.CreatePointerCast(BasePtr, I8PtrTy);
+
+ // Final adjustments to stride should go in the start block.
+ Builder.SetInsertPoint(
+ BasePhi->getIncomingBlock(1 - IncrementingBlock)->getTerminator());
+
+ // Convert stride to pointer size if needed.
+ Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
+ assert(Stride->getType() == IntPtrTy && "Unexpected type");
+
+ // Scale the stride by the size of the indexed type.
+ if (TypeScale != 1)
+ Stride = Builder.CreateMul(Stride, ConstantInt::get(IntPtrTy, TypeScale));
+
+ return std::make_pair(BasePtr, Stride);
+}
+
+bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
+ Type *DataType,
+ Value *Ptr,
+ Value *AlignOp) {
+ // Make sure the operation will be supported by the backend.
+ if (!isLegalTypeAndAlignment(DataType, AlignOp))
+ return false;
+
+ // Pointer should be a GEP.
+ auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return false;
+
+ IRBuilder<> Builder(GEP);
+
+ Value *BasePtr, *Stride;
+ std::tie(BasePtr, Stride) = determineBaseAndStride(GEP, Builder);
+ if (!BasePtr)
+ return false;
+ assert(Stride != nullptr);
+
+ Builder.SetInsertPoint(II);
+
+ CallInst *Call;
+ if (II->getIntrinsicID() == Intrinsic::masked_gather)
+ Call = Builder.CreateIntrinsic(
+ Intrinsic::riscv_masked_strided_load,
+ {DataType, BasePtr->getType(), Stride->getType()},
+ {II->getArgOperand(3), BasePtr, Stride, II->getArgOperand(2)});
+ else
+ Call = Builder.CreateIntrinsic(
+ Intrinsic::riscv_masked_strided_store,
+ {DataType, BasePtr->getType(), Stride->getType()},
+ {II->getArgOperand(0), BasePtr, Stride, II->getArgOperand(3)});
+
+ Call->takeName(II);
+ II->replaceAllUsesWith(Call);
+ II->eraseFromParent();
+
+ if (GEP->use_empty())
+ RecursivelyDeleteTriviallyDeadInstructions(GEP);
+
+ return true;
+}
+
+bool RISCVGatherScatterLowering::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ auto &TPC = getAnalysis<TargetPassConfig>();
+ auto &TM = TPC.getTM<RISCVTargetMachine>();
+ ST = &TM.getSubtarget<RISCVSubtarget>(F);
+ if (!ST->hasVInstructions() || !ST->useRVVForFixedLengthVectors())
+ return false;
+
+ TLI = ST->getTargetLowering();
+ DL = &F.getParent()->getDataLayout();
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+
+ SmallVector<IntrinsicInst *, 4> Gathers;
+ SmallVector<IntrinsicInst *, 4> Scatters;
+
+ bool Changed = false;
+
+ for (BasicBlock &BB : F) {
+ for (Instruction &I : BB) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+ if (II && II->getIntrinsicID() == Intrinsic::masked_gather &&
+ isa<FixedVectorType>(II->getType())) {
+ Gathers.push_back(II);
+ } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter &&
+ isa<FixedVectorType>(II->getArgOperand(0)->getType())) {
+ Scatters.push_back(II);
+ }
+ }
+ }
+
+ // Rewrite gather/scatter to form strided load/store if possible.
+ for (auto *II : Gathers)
+ Changed |= tryCreateStridedLoadStore(
+ II, II->getType(), II->getArgOperand(0), II->getArgOperand(1));
+ for (auto *II : Scatters)
+ Changed |=
+ tryCreateStridedLoadStore(II, II->getArgOperand(0)->getType(),
+ II->getArgOperand(1), II->getArgOperand(2));
+
+ // Remove any dead phis.
+ while (!MaybeDeadPHIs.empty()) {
+ if (auto *Phi = dyn_cast_or_null<PHINode>(MaybeDeadPHIs.pop_back_val()))
+ RecursivelyDeleteDeadPHINode(Phi);
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9866567ac1ee..66a34d73dd37 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -108,7 +108,21 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
- doPeepholeLoadStoreADDI();
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ MadeChange |= doPeepholeSExtW(N);
+ MadeChange |= doPeepholeLoadStoreADDI(N);
+ }
+
+ if (MadeChange)
+ CurDAG->RemoveDeadNodes();
}
static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
@@ -126,6 +140,9 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, int64_t Imm,
else if (Inst.Opc == RISCV::ADDUW)
Result = CurDAG->getMachineNode(RISCV::ADDUW, DL, XLenVT, SrcReg,
CurDAG->getRegister(RISCV::X0, XLenVT));
+ else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
+ Inst.Opc == RISCV::SH3ADD)
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);
else
Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
@@ -199,7 +216,7 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
SDNode *Node, unsigned Log2SEW, const SDLoc &DL, unsigned CurOp,
bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl<SDValue> &Operands,
- MVT *IndexVT) {
+ bool IsLoad, MVT *IndexVT) {
SDValue Chain = Node->getOperand(0);
SDValue Glue;
@@ -228,6 +245,14 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
Operands.push_back(SEWOp);
+ // Masked load has the tail policy argument.
+ if (IsMasked && IsLoad) {
+ // Policy must be a constant.
+ uint64_t Policy = Node->getConstantOperandVal(CurOp++);
+ SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
+ Operands.push_back(PolicyOp);
+ }
+
Operands.push_back(Chain); // Chain.
if (Glue)
Operands.push_back(Glue);
@@ -252,7 +277,7 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
}
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
- Operands);
+ Operands, /*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
@@ -293,7 +318,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
}
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ false, Operands);
+ /*IsStridedOrIndexed*/ false, Operands,
+ /*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
@@ -338,7 +364,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ true, Operands, &IndexVT);
+ /*IsStridedOrIndexed*/ true, Operands,
+ /*IsLoad=*/true, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -415,7 +442,8 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ true, Operands, &IndexVT);
+ /*IsStridedOrIndexed*/ true, Operands,
+ /*IsLoad=*/false, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -453,14 +481,24 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
case ISD::Constant: {
auto *ConstNode = cast<ConstantSDNode>(Node);
- if (VT == XLenVT && ConstNode->isNullValue()) {
+ if (VT == XLenVT && ConstNode->isZero()) {
SDValue New =
CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
ReplaceNode(Node, New.getNode());
return;
}
- ReplaceNode(Node,
- selectImm(CurDAG, DL, ConstNode->getSExtValue(), *Subtarget));
+ int64_t Imm = ConstNode->getSExtValue();
+ // If the upper XLen-16 bits are not used, try to convert this to a simm12
+ // by sign extending bit 15.
+ if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) &&
+ hasAllHUsers(Node))
+ Imm = SignExtend64(Imm, 16);
+ // If the upper 32-bits are not used try to convert this into a simm32 by
+ // sign extending bit 32.
+ if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
+ Imm = SignExtend64(Imm, 32);
+
+ ReplaceNode(Node, selectImm(CurDAG, DL, Imm, *Subtarget));
return;
}
case ISD::FrameIndex: {
@@ -591,7 +629,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
}
- // Turn (and (shl x, c2) c1) -> (srli (slli c2+c3), c3) if c1 is a mask
+ // Turn (and (shl x, c2), c1) -> (srli (slli c2+c3), c3) if c1 is a mask
// shifted by c2 bits with c3 leading zeros.
if (LeftShift && isShiftedMask_64(C1)) {
uint64_t C3 = XLen - (64 - countLeadingZeros(C1));
@@ -621,6 +659,63 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
}
+ // Turn (and (shr x, c2), c1) -> (slli (srli x, c2+c3), c3) if c1 is a
+ // shifted mask with c2 leading zeros and c3 trailing zeros.
+ if (!LeftShift && isShiftedMask_64(C1)) {
+ uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
+ uint64_t C3 = countTrailingZeros(C1);
+ if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ // If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
+ if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
+ OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+
+ // Turn (and (shl x, c2), c1) -> (slli (srli x, c3-c2), c3) if c1 is a
+ // shifted mask with no leading zeros and c3 trailing zeros.
+ if (LeftShift && isShiftedMask_64(C1)) {
+ uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
+ uint64_t C3 = countTrailingZeros(C1);
+ if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLI = CurDAG->getMachineNode(
+ RISCV::SRLI, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ // If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
+ if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, XLenVT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ SDNode *SLLI =
+ CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, XLenVT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+
break;
}
case ISD::INTRINSIC_WO_CHAIN: {
@@ -713,7 +808,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
bool IsUnsigned = IntNo == Intrinsic::riscv_vmsgeu_mask;
MVT Src1VT = Src1.getSimpleValueType();
- unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOTOpcode;
+ unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode;
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
@@ -766,31 +861,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
llvm_unreachable("Unexpected LMUL!");
case RISCVII::VLMUL::LMUL_F8:
VMXOROpcode = RISCV::PseudoVMXOR_MM_MF8;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF8;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF8;
break;
case RISCVII::VLMUL::LMUL_F4:
VMXOROpcode = RISCV::PseudoVMXOR_MM_MF4;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF4;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF4;
break;
case RISCVII::VLMUL::LMUL_F2:
VMXOROpcode = RISCV::PseudoVMXOR_MM_MF2;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_MF2;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_MF2;
break;
case RISCVII::VLMUL::LMUL_1:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M1;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M1;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M1;
break;
case RISCVII::VLMUL::LMUL_2:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M2;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M2;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M2;
break;
case RISCVII::VLMUL::LMUL_4:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M4;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M4;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M4;
break;
case RISCVII::VLMUL::LMUL_8:
VMXOROpcode = RISCV::PseudoVMXOR_MM_M8;
- VMANDNOTOpcode = RISCV::PseudoVMANDNOT_MM_M8;
+ VMANDNOpcode = RISCV::PseudoVMANDN_MM_M8;
break;
}
SDValue SEW = CurDAG->getTargetConstant(
@@ -801,13 +896,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue MaskedOff = Node->getOperand(1);
SDValue Mask = Node->getOperand(4);
// If the MaskedOff value and the Mask are the same value use
- // vmslt{u}.vx vt, va, x; vmandnot.mm vd, vd, vt
+ // vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt
// This avoids needing to copy v0 to vd before starting the next sequence.
if (Mask == MaskedOff) {
SDValue Cmp = SDValue(
CurDAG->getMachineNode(VMSLTOpcode, DL, VT, {Src1, Src2, VL, SEW}),
0);
- ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOTOpcode, DL, VT,
+ ReplaceNode(Node, CurDAG->getMachineNode(VMANDNOpcode, DL, VT,
{Mask, Cmp, VL, MaskSEW}));
return;
}
@@ -840,7 +935,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax: {
- if (!Subtarget->hasStdExtV())
+ if (!Subtarget->hasVInstructions())
break;
bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
@@ -859,8 +954,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
SDValue VLOperand;
+ unsigned Opcode = RISCV::PseudoVSETVLI;
if (VLMax) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
+ Opcode = RISCV::PseudoVSETVLIX0;
} else {
VLOperand = Node->getOperand(2);
@@ -878,7 +975,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
ReplaceNode(Node,
- CurDAG->getMachineNode(RISCV::PseudoVSETVLI, DL, XLenVT,
+ CurDAG->getMachineNode(Opcode, DL, XLenVT,
MVT::Other, VLOperand, VTypeIOp,
/* Chain */ Node->getOperand(0)));
return;
@@ -999,7 +1096,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ true, Operands,
- &IndexVT);
+ /*IsLoad=*/true, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -1019,7 +1116,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Load);
return;
}
- case Intrinsic::riscv_vle1:
+ case Intrinsic::riscv_vlm:
case Intrinsic::riscv_vle:
case Intrinsic::riscv_vle_mask:
case Intrinsic::riscv_vlse:
@@ -1038,7 +1135,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Operands.push_back(Node->getOperand(CurOp++));
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
- Operands);
+ Operands, /*IsLoad=*/true);
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
@@ -1066,7 +1163,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Operands.push_back(Node->getOperand(CurOp++));
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
- /*IsStridedOrIndexed*/ false, Operands);
+ /*IsStridedOrIndexed*/ false, Operands,
+ /*IsLoad=*/true);
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
@@ -1188,7 +1286,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ true, Operands,
- &IndexVT);
+ /*IsLoad=*/false, &IndexVT);
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Element count mismatch");
@@ -1208,7 +1306,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Store);
return;
}
- case Intrinsic::riscv_vse1:
+ case Intrinsic::riscv_vsm:
case Intrinsic::riscv_vse:
case Intrinsic::riscv_vse_mask:
case Intrinsic::riscv_vsse:
@@ -1496,6 +1594,97 @@ bool RISCVDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) {
return false;
}
+// Return true if all users of this SDNode* only consume the lower \p Bits.
+// This can be used to form W instructions for add/sub/mul/shl even when the
+// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
+// SimplifyDemandedBits has made it so some users see a sext_inreg and some
+// don't. The sext_inreg+add/sub/mul/shl will get selected, but still leave
+// the add/sub/mul/shl to become non-W instructions. By checking the users we
+// may be able to use a W instruction and CSE with the other instruction if
+// this has happened. We could try to detect that the CSE opportunity exists
+// before doing this, but that would be more complicated.
+// TODO: Does this need to look through AND/OR/XOR to their users to find more
+// opportunities.
+bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
+ assert((Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::SUB ||
+ Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
+ Node->getOpcode() == ISD::SRL ||
+ Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ isa<ConstantSDNode>(Node)) &&
+ "Unexpected opcode");
+
+ for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ // Users of this node should have already been instruction selected
+ if (!User->isMachineOpcode())
+ return false;
+
+ // TODO: Add more opcodes?
+ switch (User->getMachineOpcode()) {
+ default:
+ return false;
+ case RISCV::ADDW:
+ case RISCV::ADDIW:
+ case RISCV::SUBW:
+ case RISCV::MULW:
+ case RISCV::SLLW:
+ case RISCV::SLLIW:
+ case RISCV::SRAW:
+ case RISCV::SRAIW:
+ case RISCV::SRLW:
+ case RISCV::SRLIW:
+ case RISCV::DIVW:
+ case RISCV::DIVUW:
+ case RISCV::REMW:
+ case RISCV::REMUW:
+ case RISCV::ROLW:
+ case RISCV::RORW:
+ case RISCV::RORIW:
+ case RISCV::CLZW:
+ case RISCV::CTZW:
+ case RISCV::CPOPW:
+ case RISCV::SLLIUW:
+ case RISCV::FCVT_H_W:
+ case RISCV::FCVT_H_WU:
+ case RISCV::FCVT_S_W:
+ case RISCV::FCVT_S_WU:
+ case RISCV::FCVT_D_W:
+ case RISCV::FCVT_D_WU:
+ if (Bits < 32)
+ return false;
+ break;
+ case RISCV::SLLI:
+ // SLLI only uses the lower (XLen - ShAmt) bits.
+ if (Bits < Subtarget->getXLen() - User->getConstantOperandVal(1))
+ return false;
+ break;
+ case RISCV::ADDUW:
+ case RISCV::SH1ADDUW:
+ case RISCV::SH2ADDUW:
+ case RISCV::SH3ADDUW:
+ // The first operand to add.uw/shXadd.uw is implicitly zero extended from
+ // 32 bits.
+ if (UI.getOperandNo() != 0 || Bits < 32)
+ return false;
+ break;
+ case RISCV::SB:
+ if (UI.getOperandNo() != 0 || Bits < 8)
+ return false;
+ break;
+ case RISCV::SH:
+ if (UI.getOperandNo() != 0 || Bits < 16)
+ return false;
+ break;
+ case RISCV::SW:
+ if (UI.getOperandNo() != 0 || Bits < 32)
+ return false;
+ break;
+ }
+ }
+
+ return true;
+}
+
// Select VL as a 5 bit immediate or a value that will become a register. This
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
@@ -1609,113 +1798,162 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
// This is possible when off1+off2 fits a 12-bit immediate.
-void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() {
- SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
- ++Position;
+bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
+ int OffsetOpIdx;
+ int BaseOpIdx;
- while (Position != CurDAG->allnodes_begin()) {
- SDNode *N = &*--Position;
- // Skip dead nodes and any non-machine opcodes.
- if (N->use_empty() || !N->isMachineOpcode())
- continue;
+ // Only attempt this optimisation for I-type loads and S-type stores.
+ switch (N->getMachineOpcode()) {
+ default:
+ return false;
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ BaseOpIdx = 0;
+ OffsetOpIdx = 1;
+ break;
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ BaseOpIdx = 1;
+ OffsetOpIdx = 2;
+ break;
+ }
- int OffsetOpIdx;
- int BaseOpIdx;
+ if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
+ return false;
- // Only attempt this optimisation for I-type loads and S-type stores.
- switch (N->getMachineOpcode()) {
- default:
- continue;
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- BaseOpIdx = 0;
- OffsetOpIdx = 1;
- break;
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD:
- BaseOpIdx = 1;
- OffsetOpIdx = 2;
- break;
- }
+ SDValue Base = N->getOperand(BaseOpIdx);
- if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
- continue;
+ // If the base is an ADDI, we can merge it in to the load/store.
+ if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
+ return false;
- SDValue Base = N->getOperand(BaseOpIdx);
+ SDValue ImmOperand = Base.getOperand(1);
+ uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
- // If the base is an ADDI, we can merge it in to the load/store.
- if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
- continue;
+ if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
+ int64_t Offset1 = Const->getSExtValue();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ if (!isInt<12>(CombinedOffset))
+ return false;
+ ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
+ ImmOperand.getValueType());
+ } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
+ // If the off1 in (addi base, off1) is a global variable's address (its
+ // low part, really), then we can rely on the alignment of that variable
+ // to provide a margin of safety before off1 can overflow the 12 bits.
+ // Check if off2 falls within that margin; if so off1+off2 can't overflow.
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
+ if (Offset2 != 0 && Alignment <= Offset2)
+ return false;
+ int64_t Offset1 = GA->getOffset();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ ImmOperand = CurDAG->getTargetGlobalAddress(
+ GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
+ CombinedOffset, GA->getTargetFlags());
+ } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
+ // Ditto.
+ Align Alignment = CP->getAlign();
+ if (Offset2 != 0 && Alignment <= Offset2)
+ return false;
+ int64_t Offset1 = CP->getOffset();
+ int64_t CombinedOffset = Offset1 + Offset2;
+ ImmOperand = CurDAG->getTargetConstantPool(
+ CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
+ CombinedOffset, CP->getTargetFlags());
+ } else {
+ return false;
+ }
- SDValue ImmOperand = Base.getOperand(1);
- uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
-
- if (auto *Const = dyn_cast<ConstantSDNode>(ImmOperand)) {
- int64_t Offset1 = Const->getSExtValue();
- int64_t CombinedOffset = Offset1 + Offset2;
- if (!isInt<12>(CombinedOffset))
- continue;
- ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand),
- ImmOperand.getValueType());
- } else if (auto *GA = dyn_cast<GlobalAddressSDNode>(ImmOperand)) {
- // If the off1 in (addi base, off1) is a global variable's address (its
- // low part, really), then we can rely on the alignment of that variable
- // to provide a margin of safety before off1 can overflow the 12 bits.
- // Check if off2 falls within that margin; if so off1+off2 can't overflow.
- const DataLayout &DL = CurDAG->getDataLayout();
- Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
- if (Offset2 != 0 && Alignment <= Offset2)
- continue;
- int64_t Offset1 = GA->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetGlobalAddress(
- GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(),
- CombinedOffset, GA->getTargetFlags());
- } else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
- // Ditto.
- Align Alignment = CP->getAlign();
- if (Offset2 != 0 && Alignment <= Offset2)
- continue;
- int64_t Offset1 = CP->getOffset();
- int64_t CombinedOffset = Offset1 + Offset2;
- ImmOperand = CurDAG->getTargetConstantPool(
- CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(),
- CombinedOffset, CP->getTargetFlags());
- } else {
- continue;
+ LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ LLVM_DEBUG(Base->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\nN: ");
+ LLVM_DEBUG(N->dump(CurDAG));
+ LLVM_DEBUG(dbgs() << "\n");
+
+ // Modify the offset operand of the load/store.
+ if (BaseOpIdx == 0) // Load
+ CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
+ N->getOperand(2));
+ else // Store
+ CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
+ ImmOperand, N->getOperand(3));
+
+ return true;
+}
+
+// Try to remove sext.w if the input is a W instruction or can be made into
+// a W instruction cheaply.
+bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
+ // Look for the sext.w pattern, addiw rd, rs1, 0.
+ if (N->getMachineOpcode() != RISCV::ADDIW ||
+ !isNullConstant(N->getOperand(1)))
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ if (!N0.isMachineOpcode())
+ return false;
+
+ switch (N0.getMachineOpcode()) {
+ default:
+ break;
+ case RISCV::ADD:
+ case RISCV::ADDI:
+ case RISCV::SUB:
+ case RISCV::MUL:
+ case RISCV::SLLI: {
+ // Convert sext.w+add/sub/mul to their W instructions. This will create
+ // a new independent instruction. This improves latency.
+ unsigned Opc;
+ switch (N0.getMachineOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode!");
+ case RISCV::ADD: Opc = RISCV::ADDW; break;
+ case RISCV::ADDI: Opc = RISCV::ADDIW; break;
+ case RISCV::SUB: Opc = RISCV::SUBW; break;
+ case RISCV::MUL: Opc = RISCV::MULW; break;
+ case RISCV::SLLI: Opc = RISCV::SLLIW; break;
}
- LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
- LLVM_DEBUG(Base->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\nN: ");
- LLVM_DEBUG(N->dump(CurDAG));
- LLVM_DEBUG(dbgs() << "\n");
-
- // Modify the offset operand of the load/store.
- if (BaseOpIdx == 0) // Load
- CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
- N->getOperand(2));
- else // Store
- CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
- ImmOperand, N->getOperand(3));
-
- // The add-immediate may now be dead, in which case remove it.
- if (Base.getNode()->use_empty())
- CurDAG->RemoveDeadNode(Base.getNode());
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+
+ // Shift amount needs to be uimm5.
+ if (N0.getMachineOpcode() == RISCV::SLLI &&
+ !isUInt<5>(cast<ConstantSDNode>(N01)->getSExtValue()))
+ break;
+
+ SDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
+ N00, N01);
+ ReplaceUses(N, Result);
+ return true;
+ }
+ case RISCV::ADDW:
+ case RISCV::ADDIW:
+ case RISCV::SUBW:
+ case RISCV::MULW:
+ case RISCV::SLLIW:
+ // Result is already sign extended just remove the sext.w.
+ // NOTE: We only handle the nodes that are selected with hasAllWUsers.
+ ReplaceUses(N, N0.getNode());
+ return true;
}
+
+ return false;
}
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 56d072206316..a2770089995d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -58,6 +58,10 @@ public:
bool selectSExti32(SDValue N, SDValue &Val);
bool selectZExti32(SDValue N, SDValue &Val);
+ bool hasAllNBitUsers(SDNode *Node, unsigned Bits) const;
+ bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
+ bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
+
bool selectVLOp(SDValue N, SDValue &VL);
bool selectVSplat(SDValue N, SDValue &SplatVal);
@@ -75,7 +79,7 @@ public:
const SDLoc &DL, unsigned CurOp,
bool IsMasked, bool IsStridedOrIndexed,
SmallVectorImpl<SDValue> &Operands,
- MVT *IndexVT = nullptr);
+ bool IsLoad = false, MVT *IndexVT = nullptr);
void selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided);
void selectVLSEGFF(SDNode *Node, bool IsMasked);
@@ -83,11 +87,34 @@ public:
void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
+ // Return the RISC-V condition code that matches the given DAG integer
+ // condition code. The CondCode must be one of those supported by the RISC-V
+ // ISA (see translateSetCCForBranch).
+ static RISCVCC::CondCode getRISCVCCForIntCC(ISD::CondCode CC) {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported CondCode");
+ case ISD::SETEQ:
+ return RISCVCC::COND_EQ;
+ case ISD::SETNE:
+ return RISCVCC::COND_NE;
+ case ISD::SETLT:
+ return RISCVCC::COND_LT;
+ case ISD::SETGE:
+ return RISCVCC::COND_GE;
+ case ISD::SETULT:
+ return RISCVCC::COND_LTU;
+ case ISD::SETUGE:
+ return RISCVCC::COND_GEU;
+ }
+ }
+
// Include the pieces autogenerated from the target description.
#include "RISCVGenDAGISel.inc"
private:
- void doPeepholeLoadStoreADDI();
+ bool doPeepholeLoadStoreADDI(SDNode *Node);
+ bool doPeepholeSExtW(SDNode *Node);
};
namespace RISCV {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d37ed584d9d2..0f1a6e5f9154 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20,6 +20,7 @@
#include "RISCVTargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,8 +29,9 @@
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -107,7 +109,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const MVT::SimpleValueType F64VecVTs[] = {
MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
unsigned Size = VT.getSizeInBits().getKnownMinValue();
assert(Size <= 512 && isPowerOf2_32(Size));
@@ -126,18 +128,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (MVT VT : BoolVecVTs)
addRegClassForRVV(VT);
- for (MVT VT : IntVecVTs)
+ for (MVT VT : IntVecVTs) {
+ if (VT.getVectorElementType() == MVT::i64 &&
+ !Subtarget.hasVInstructionsI64())
+ continue;
addRegClassForRVV(VT);
+ }
- if (Subtarget.hasStdExtZfh())
+ if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
addRegClassForRVV(VT);
- if (Subtarget.hasStdExtF())
+ if (Subtarget.hasVInstructionsF32())
for (MVT VT : F32VecVTs)
addRegClassForRVV(VT);
- if (Subtarget.hasStdExtD())
+ if (Subtarget.hasVInstructionsF64())
for (MVT VT : F64VecVTs)
addRegClassForRVV(VT);
@@ -199,6 +205,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::USUBO, MVT::i32, Custom);
setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
+ } else {
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+ setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!Subtarget.hasStdExtM()) {
@@ -299,14 +311,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
- ISD::CondCode FPCCToExpand[] = {
+ static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
- ISD::NodeType FPOpToExpand[] = {
- ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
- ISD::FP_TO_FP16};
+ static const ISD::NodeType FPOpToExpand[] = {
+ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
+ ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
@@ -325,6 +337,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
for (auto Op : FPOpToExpand)
setOperationAction(Op, MVT::f16, Expand);
+
+ setOperationAction(ISD::FREM, MVT::f16, Promote);
+ setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+ setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+ setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+ setOperationAction(ISD::FRINT, MVT::f16, Promote);
+ setOperationAction(ISD::FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
+ setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
}
if (Subtarget.hasStdExtF()) {
@@ -376,6 +397,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtF()) {
+ setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
+ setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
+
setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
@@ -407,7 +431,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setBooleanContents(ZeroOrOneBooleanContent);
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
setBooleanVectorContents(ZeroOrOneBooleanContent);
setOperationAction(ISD::VSCALE, XLenVT, Custom);
@@ -426,14 +450,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
-
- static unsigned IntegerVPOps[] = {
- ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
- ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR,
- ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
-
- static unsigned FloatingPointVPOps[] = {ISD::VP_FADD, ISD::VP_FSUB,
- ISD::VP_FMUL, ISD::VP_FDIV};
+ setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+
+ static const unsigned IntegerVPOps[] = {
+ ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
+ ISD::VP_SDIV, ISD::VP_UDIV, ISD::VP_SREM,
+ ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,
+ ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR,
+ ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
+ ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
+ ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN};
+
+ static const unsigned FloatingPointVPOps[] = {
+ ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
+ ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
@@ -449,6 +480,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
+
+ setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
+ setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
@@ -471,6 +511,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
@@ -491,9 +535,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
for (MVT VT : IntVecVTs) {
+ if (VT.getVectorElementType() == MVT::i64 &&
+ !Subtarget.hasVInstructionsI64())
+ continue;
+
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
+ // Vectors implement MULHS/MULHU.
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
@@ -502,6 +554,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTL, VT, Expand);
setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
// Custom-lower extensions and truncations from/to mask types.
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
@@ -551,6 +609,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
@@ -567,6 +630,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
}
+
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
+ // type that can represent the value exactly.
+ if (VT.getVectorElementType() != MVT::i64) {
+ MVT FloatEltVT =
+ VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
+ EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT)) {
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ }
+ }
}
// Expand various CCs to best match the RVV ISA, which natively supports UNE
@@ -576,7 +651,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// and we pattern-match those back to the "original", swapping operands once
// more. This way we catch both operations and both "vf" and "fv" forms with
// fewer patterns.
- ISD::CondCode VFPCCToExpand[] = {
+ static const ISD::CondCode VFPCCToExpand[] = {
ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
@@ -605,6 +680,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
setOperationAction(ISD::LOAD, VT, Custom);
@@ -615,6 +691,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -638,18 +719,18 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
- if (Subtarget.hasStdExtZfh())
+ if (Subtarget.hasVInstructionsF16())
for (MVT VT : F16VecVTs)
SetCommonVFPActions(VT);
for (MVT VT : F32VecVTs) {
- if (Subtarget.hasStdExtF())
+ if (Subtarget.hasVInstructionsF32())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
}
for (MVT VT : F64VecVTs) {
- if (Subtarget.hasStdExtD())
+ if (Subtarget.hasVInstructionsF64())
SetCommonVFPActions(VT);
SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
@@ -695,6 +776,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+
setOperationAction(ISD::SINT_TO_FP, VT, Custom);
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
setOperationAction(ISD::FP_TO_SINT, VT, Custom);
@@ -724,6 +809,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::SUB, VT, Custom);
@@ -769,6 +860,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
for (unsigned VPOpc : IntegerVPOps)
setOperationAction(VPOpc, VT, Custom);
+
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
+ // type that can represent the value exactly.
+ if (VT.getVectorElementType() != MVT::i64) {
+ MVT FloatEltVT =
+ VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
+ EVT FloatVT =
+ MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT)) {
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ }
+ }
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -788,6 +892,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -798,6 +903,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MGATHER, VT, Custom);
setOperationAction(ISD::MSCATTER, VT, Custom);
+
+ setOperationAction(ISD::VP_LOAD, VT, Custom);
+ setOperationAction(ISD::VP_STORE, VT, Custom);
+ setOperationAction(ISD::VP_GATHER, VT, Custom);
+ setOperationAction(ISD::VP_SCATTER, VT, Custom);
+
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FMUL, VT, Custom);
@@ -852,21 +963,23 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Jumps are expensive, compared to logic
setJumpIsExpensive();
- // We can use any register for comparisons
- setHasMultipleConditionRegisters();
-
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::XOR);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
setTargetDAGCombine(ISD::FCOPYSIGN);
setTargetDAGCombine(ISD::MGATHER);
setTargetDAGCombine(ISD::MSCATTER);
+ setTargetDAGCombine(ISD::VP_GATHER);
+ setTargetDAGCombine(ISD::VP_SCATTER);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::SHL);
+ setTargetDAGCombine(ISD::STORE);
}
}
@@ -875,7 +988,7 @@ EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
EVT VT) const {
if (!VT.isVector())
return getPointerTy(DL);
- if (Subtarget.hasStdExtV() &&
+ if (Subtarget.hasVInstructions() &&
(VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
return VT.changeVectorElementTypeToInteger();
@@ -889,6 +1002,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
+ auto &DL = I.getModule()->getDataLayout();
switch (Intrinsic) {
default:
return false;
@@ -911,6 +1025,25 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
}
+ case Intrinsic::riscv_masked_strided_load:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.memVT = getValueType(DL, I.getType()->getScalarType());
+ Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
+ case Intrinsic::riscv_masked_strided_store:
+ Info.opc = ISD::INTRINSIC_VOID;
+ Info.ptrVal = I.getArgOperand(1);
+ Info.memVT =
+ getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
+ Info.align = Align(
+ DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
+ 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOStore;
+ return true;
}
}
@@ -994,9 +1127,91 @@ bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasStdExtZbb();
}
+bool RISCVTargetLowering::hasAndNot(SDValue Y) const {
+ EVT VT = Y.getValueType();
+
+ // FIXME: Support vectors once we have tests.
+ if (VT.isVector())
+ return false;
+
+ return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y);
+}
+
+/// Check if sinking \p I's operands to I's basic block is profitable, because
+/// the operands can be folded into a target instruction, e.g.
+/// splats of scalars can fold into vector instructions.
+bool RISCVTargetLowering::shouldSinkOperands(
+ Instruction *I, SmallVectorImpl<Use *> &Ops) const {
+ using namespace llvm::PatternMatch;
+
+ if (!I->getType()->isVectorTy() || !Subtarget.hasVInstructions())
+ return false;
+
+ auto IsSinker = [&](Instruction *I, int Operand) {
+ switch (I->getOpcode()) {
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ return true;
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Operand == 1;
+ case Instruction::Call:
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fma:
+ return Operand == 0 || Operand == 1;
+ default:
+ return false;
+ }
+ }
+ return false;
+ default:
+ return false;
+ }
+ };
+
+ for (auto OpIdx : enumerate(I->operands())) {
+ if (!IsSinker(I, OpIdx.index()))
+ continue;
+
+ Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
+ // Make sure we are not already sinking this operand
+ if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
+ continue;
+
+ // We are looking for a splat that can be sunk.
+ if (!match(Op, m_Shuffle(m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
+ m_Undef(), m_ZeroMask())))
+ continue;
+
+ // All uses of the shuffle should be sunk to avoid duplicating it across gpr
+ // and vector registers
+ for (Use &U : Op->uses()) {
+ Instruction *Insn = cast<Instruction>(U.getUser());
+ if (!IsSinker(Insn, U.getOperandNo()))
+ return false;
+ }
+
+ Ops.push_back(&Op->getOperandUse(0));
+ Ops.push_back(&OpIdx.value());
+ }
+ return true;
+}
+
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
- if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
+ if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin())
return false;
if (VT == MVT::f32 && !Subtarget.hasStdExtF())
return false;
@@ -1016,9 +1231,9 @@ bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
- // end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
+ // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
+ // We might still end up using a GPR but that will be decided based on ABI.
+ if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
@@ -1027,9 +1242,9 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
- // end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
+ // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled.
+ // We might still end up using a GPR but that will be decided based on ABI.
+ if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -1068,28 +1283,6 @@ static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
}
}
-// Return the RISC-V branch opcode that matches the given DAG integer
-// condition code. The CondCode must be one of those supported by the RISC-V
-// ISA (see translateSetCCForBranch).
-static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
- switch (CC) {
- default:
- llvm_unreachable("Unsupported CondCode");
- case ISD::SETEQ:
- return RISCV::BEQ;
- case ISD::SETNE:
- return RISCV::BNE;
- case ISD::SETLT:
- return RISCV::BLT;
- case ISD::SETGE:
- return RISCV::BGE;
- case ISD::SETULT:
- return RISCV::BLTU;
- case ISD::SETUGE:
- return RISCV::BGEU;
- }
-}
-
RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
assert(VT.isScalableVector() && "Expecting a scalable vector type");
unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
@@ -1206,6 +1399,27 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
+bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
+ if (ScalarTy->isPointerTy())
+ return true;
+
+ if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
+ ScalarTy->isIntegerTy(32))
+ return true;
+
+ if (ScalarTy->isIntegerTy(64))
+ return Subtarget.hasVInstructionsI64();
+
+ if (ScalarTy->isHalfTy())
+ return Subtarget.hasVInstructionsF16();
+ if (ScalarTy->isFloatTy())
+ return Subtarget.hasVInstructionsF32();
+ if (ScalarTy->isDoubleTy())
+ return Subtarget.hasVInstructionsF64();
+
+ return false;
+}
+
static bool useRVVForFixedLengthVectorVT(MVT VT,
const RISCVSubtarget &Subtarget) {
assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
@@ -1221,8 +1435,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
+ MVT EltVT = VT.getVectorElementType();
+
// Don't use RVV for vectors we cannot scalarize if required.
- switch (VT.getVectorElementType().SimpleTy) {
+ switch (EltVT.SimpleTy) {
// i1 is supported but has different rules.
default:
return false;
@@ -1235,22 +1451,29 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
case MVT::i8:
case MVT::i16:
case MVT::i32:
+ break;
case MVT::i64:
+ if (!Subtarget.hasVInstructionsI64())
+ return false;
break;
case MVT::f16:
- if (!Subtarget.hasStdExtZfh())
+ if (!Subtarget.hasVInstructionsF16())
return false;
break;
case MVT::f32:
- if (!Subtarget.hasStdExtF())
+ if (!Subtarget.hasVInstructionsF32())
return false;
break;
case MVT::f64:
- if (!Subtarget.hasStdExtD())
+ if (!Subtarget.hasVInstructionsF64())
return false;
break;
}
+ // Reject elements larger than ELEN.
+ if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
+ return false;
+
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
// Don't use RVV for types that don't fit.
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
@@ -1277,6 +1500,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
"Expected legal fixed length vector!");
unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
+ unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
@@ -1291,10 +1515,12 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
case MVT::f32:
case MVT::f64: {
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
- // narrower types, but we can't have a fractional LMUL with demoninator less
- // than 64/SEW.
+ // narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
+ // each fractional LMUL we support SEW between 8 and LMUL*ELEN.
unsigned NumElts =
- divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
+ (VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
+ NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
+ assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");
return MVT::getScalableVectorVT(EltVT, NumElts);
}
}
@@ -1344,7 +1570,7 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = VecVT.isFixedLengthVector()
? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
- : DAG.getRegister(RISCV::X0, XLenVT);
+ : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
return {Mask, VL};
@@ -1379,6 +1605,32 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
return false;
}
+static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) {
+ // RISCV FP-to-int conversions saturate to the destination register size, but
+ // don't produce 0 for nan. We can use a conversion instruction and fix the
+ // nan case with a compare and a select.
+ SDValue Src = Op.getOperand(0);
+
+ EVT DstVT = Op.getValueType();
+ EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+ bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
+ unsigned Opc;
+ if (SatVT == DstVT)
+ Opc = IsSigned ? RISCVISD::FCVT_X_RTZ : RISCVISD::FCVT_XU_RTZ;
+ else if (DstVT == MVT::i64 && SatVT == MVT::i32)
+ Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
+ else
+ return SDValue();
+ // FIXME: Support other SatVTs by clamping before or after the conversion.
+
+ SDLoc DL(Op);
+ SDValue FpToInt = DAG.getNode(Opc, DL, DstVT, Src);
+
+ SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
+ return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+}
+
static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -1397,13 +1649,18 @@ static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
}
struct VIDSequence {
- int64_t Step;
+ int64_t StepNumerator;
+ unsigned StepDenominator;
int64_t Addend;
};
// Try to match an arithmetic-sequence BUILD_VECTOR [X,X+S,X+2*S,...,X+(N-1)*S]
// to the (non-zero) step S and start value X. This can be then lowered as the
// RVV sequence (VID * S) + X, for example.
+// The step S is represented as an integer numerator divided by a positive
+// denominator. Note that the implementation currently only identifies
+// sequences in which either the numerator is +/- 1 or the denominator is 1. It
+// cannot detect 2/3, for example.
// Note that this method will also match potentially unappealing index
// sequences, like <i32 0, i32 50939494>, however it is left to the caller to
// determine whether this is worth generating code for.
@@ -1413,7 +1670,8 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (!Op.getValueType().isInteger())
return None;
- Optional<int64_t> SeqStep, SeqAddend;
+ Optional<unsigned> SeqStepDenom;
+ Optional<int64_t> SeqStepNum, SeqAddend;
Optional<std::pair<uint64_t, unsigned>> PrevElt;
unsigned EltSizeInBits = Op.getValueType().getScalarSizeInBits();
for (unsigned Idx = 0; Idx < NumElts; Idx++) {
@@ -1431,26 +1689,40 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (PrevElt) {
// Calculate the step since the last non-undef element, and ensure
// it's consistent across the entire sequence.
- int64_t Diff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
- // The difference must cleanly divide the element span.
- if (Diff % (Idx - PrevElt->second) != 0)
- return None;
- int64_t Step = Diff / (Idx - PrevElt->second);
- // A zero step indicates we're either a not an index sequence, or we
- // have a fractional step. This must be handled by a more complex
- // pattern recognition (undefs complicate things here).
- if (Step == 0)
- return None;
- if (!SeqStep)
- SeqStep = Step;
- else if (Step != SeqStep)
- return None;
+ unsigned IdxDiff = Idx - PrevElt->second;
+ int64_t ValDiff = SignExtend64(Val - PrevElt->first, EltSizeInBits);
+
+ // A zero-value value difference means that we're somewhere in the middle
+ // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
+ // step change before evaluating the sequence.
+ if (ValDiff != 0) {
+ int64_t Remainder = ValDiff % IdxDiff;
+ // Normalize the step if it's greater than 1.
+ if (Remainder != ValDiff) {
+ // The difference must cleanly divide the element span.
+ if (Remainder != 0)
+ return None;
+ ValDiff /= IdxDiff;
+ IdxDiff = 1;
+ }
+
+ if (!SeqStepNum)
+ SeqStepNum = ValDiff;
+ else if (ValDiff != SeqStepNum)
+ return None;
+
+ if (!SeqStepDenom)
+ SeqStepDenom = IdxDiff;
+ else if (IdxDiff != *SeqStepDenom)
+ return None;
+ }
}
// Record and/or check any addend.
- if (SeqStep) {
- int64_t Addend =
- SignExtend64(Val - (Idx * (uint64_t)*SeqStep), EltSizeInBits);
+ if (SeqStepNum && SeqStepDenom) {
+ uint64_t ExpectedVal =
+ (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
+ int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
if (!SeqAddend)
SeqAddend = Addend;
else if (SeqAddend != Addend)
@@ -1458,14 +1730,15 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
}
// Record this non-undef element for later.
- PrevElt = std::make_pair(Val, Idx);
+ if (!PrevElt || PrevElt->first != Val)
+ PrevElt = std::make_pair(Val, Idx);
}
// We need to have logged both a step and an addend for this to count as
// a legal index sequence.
- if (!SeqStep || !SeqAddend)
+ if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
return None;
- return VIDSequence{*SeqStep, *SeqAddend};
+ return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
}
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
@@ -1599,31 +1872,38 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// with optional modifications. An all-undef vector is matched by
// getSplatValue, above.
if (auto SimpleVID = isSimpleVIDSequence(Op)) {
- int64_t Step = SimpleVID->Step;
+ int64_t StepNumerator = SimpleVID->StepNumerator;
+ unsigned StepDenominator = SimpleVID->StepDenominator;
int64_t Addend = SimpleVID->Addend;
// Only emit VIDs with suitably-small steps/addends. We use imm5 is a
// threshold since it's the immediate value many RVV instructions accept.
- if (isInt<5>(Step) && isInt<5>(Addend)) {
+ if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) &&
+ isInt<5>(Addend)) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
// these, we'd lose the fixed-length vector info and generate worse
// vsetvli code.
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
- assert(Step != 0 && "Invalid step");
+ assert(StepNumerator != 0 && "Invalid step");
bool Negate = false;
- if (Step != 1) {
- int64_t SplatStepVal = Step;
+ if (StepNumerator != 1) {
+ int64_t SplatStepVal = StepNumerator;
unsigned Opcode = ISD::MUL;
- if (isPowerOf2_64(std::abs(Step))) {
- Negate = Step < 0;
+ if (isPowerOf2_64(std::abs(StepNumerator))) {
+ Negate = StepNumerator < 0;
Opcode = ISD::SHL;
- SplatStepVal = Log2_64(std::abs(Step));
+ SplatStepVal = Log2_64(std::abs(StepNumerator));
}
SDValue SplatStep = DAG.getSplatVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep);
}
+ if (StepDenominator != 1) {
+ SDValue SplatStep = DAG.getSplatVector(
+ VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
+ VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
+ }
if (Addend != 0 || Negate) {
SDValue SplatAddend =
DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
@@ -1704,6 +1984,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
unsigned NumUndefElts =
count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
+ // Track the number of scalar loads we know we'd be inserting, estimated as
+ // any non-zero floating-point constant. Other kinds of element are either
+ // already in registers or are materialized on demand. The threshold at which
+ // a vector load is more desirable than several scalar materializion and
+ // vector-insertion instructions is not known.
+ unsigned NumScalarLoads = 0;
+
for (SDValue V : Op->op_values()) {
if (V.isUndef())
continue;
@@ -1711,6 +1998,9 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
+ NumScalarLoads += !CFP->isExactlyValue(+0.0);
+
// Is this value dominant? In case of a tie, prefer the highest element as
// it's cheaper to insert near the beginning of a vector than it is at the
// end.
@@ -1726,7 +2016,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Don't perform this optimization when optimizing for size, since
// materializing elements and inserting them tends to cause code bloat.
- if (!DAG.shouldOptForSize() &&
+ if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
((MostCommonCount > DominantValueCountThreshold) ||
(ValueCounts.size() <= Log2_32(NumDefElts)))) {
// Start by splatting the most common element.
@@ -1926,6 +2216,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
bool InvertMask = IsSelect == SwapOps;
+ // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
+ // half.
+ DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
+
// Now construct the mask that will be used by the vselect or blended
// vrgather operation. For vrgathers, construct the appropriate indices into
// each vector.
@@ -1940,6 +2234,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
GatherIndicesRHS.push_back(
IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
: DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
+ if (IsLHSOrUndefIndex && MaskIndex >= 0)
+ ++LHSIndexCounts[MaskIndex];
+ if (!IsLHSOrUndefIndex)
+ ++RHSIndexCounts[MaskIndex - NumElts];
}
}
@@ -1963,13 +2261,14 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
- unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
+ unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
+ unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
MVT IndexVT = VT.changeTypeToInteger();
// Since we can't introduce illegal index types at this stage, use i16 and
// vrgatherei16 if the corresponding index type for plain vrgather is greater
// than XLenVT.
if (IndexVT.getScalarType().bitsGT(XLenVT)) {
- GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
+ GatherVVOpc = RISCVISD::VRGATHEREI16_VV_VL;
IndexVT = IndexVT.changeVectorElementType(MVT::i16);
}
@@ -1982,28 +2281,48 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
} else {
- SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
- LHSIndices =
- convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
-
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
- Gather =
- DAG.getNode(GatherOpc, DL, ContainerVT, V1, LHSIndices, TrueMask, VL);
+ // If only one index is used, we can use a "splat" vrgather.
+ // TODO: We can splat the most-common index and fix-up any stragglers, if
+ // that's beneficial.
+ if (LHSIndexCounts.size() == 1) {
+ int SplatIndex = LHSIndexCounts.begin()->getFirst();
+ Gather =
+ DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
+ DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ } else {
+ SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
+ LHSIndices =
+ convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
+
+ Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
+ TrueMask, VL);
+ }
}
// If a second vector operand is used by this shuffle, blend it in with an
// additional vrgather.
if (!V2.isUndef()) {
+ V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+ // If only one index is used, we can use a "splat" vrgather.
+ // TODO: We can splat the most-common index and fix-up any stragglers, if
+ // that's beneficial.
+ if (RHSIndexCounts.size() == 1) {
+ int SplatIndex = RHSIndexCounts.begin()->getFirst();
+ V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
+ DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ } else {
+ SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
+ RHSIndices =
+ convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
+ V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
+ VL);
+ }
+
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SelectMask =
convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
- SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
- RHSIndices =
- convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
-
- V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
- V2 = DAG.getNode(GatherOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, VL);
Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
Gather, VL);
}
@@ -2026,6 +2345,57 @@ static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
}
+// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
+// the exponent.
+static SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ unsigned EltSize = VT.getScalarSizeInBits();
+ SDValue Src = Op.getOperand(0);
+ SDLoc DL(Op);
+
+ // We need a FP type that can represent the value.
+ // TODO: Use f16 for i8 when possible?
+ MVT FloatEltVT = EltSize == 32 ? MVT::f64 : MVT::f32;
+ MVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
+
+ // Legal types should have been checked in the RISCVTargetLowering
+ // constructor.
+ // TODO: Splitting may make sense in some cases.
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(FloatVT) &&
+ "Expected legal float type!");
+
+ // For CTTZ_ZERO_UNDEF, we need to extract the lowest set bit using X & -X.
+ // The trailing zero count is equal to log2 of this single bit value.
+ if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
+ SDValue Neg =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src);
+ Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
+ }
+
+ // We have a legal FP type, convert to it.
+ SDValue FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
+ // Bitcast to integer and shift the exponent to the LSB.
+ EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
+ SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
+ unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
+ DAG.getConstant(ShiftAmt, DL, IntVT));
+ // Truncate back to original type to allow vnsrl.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Shift);
+ // The exponent contains log2 of the value in biased form.
+ unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
+
+ // For trailing zeros, we just need to subtract the bias.
+ if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
+ return DAG.getNode(ISD::SUB, DL, VT, Trunc,
+ DAG.getConstant(ExponentBias, DL, VT));
+
+ // For leading zeros, we need to remove the bias and convert from log2 to
+ // leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
+ unsigned Adjust = ExponentBias + (EltSize - 1);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Trunc);
+}
+
// While RVV has alignment restrictions, we should always be able to load as a
// legal equivalently-sized byte-typed vector instead. This method is
// responsible for re-expressing a ISD::LOAD via a correctly-aligned type. If
@@ -2132,7 +2502,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// into a one-element vector of the result type, and perform a vector
// bitcast.
if (!Op0VT.isVector()) {
- auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
+ EVT BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
+ if (!isTypeLegal(BVT))
+ return SDValue();
return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
DAG.getUNDEF(BVT), Op0,
DAG.getConstant(0, DL, XLenVT)));
@@ -2143,8 +2515,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// thus: bitcast the vector to a one-element vector type whose element type
// is the same as the result type, and extract the first element.
if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
- LLVMContext &Context = *DAG.getContext();
- SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
+ EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
+ if (!isTypeLegal(BVT))
+ return SDValue();
+ SDValue BVec = DAG.getBitcast(BVT, Op0);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
DAG.getConstant(0, DL, XLenVT));
}
@@ -2166,6 +2540,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_W_CHAIN:
return LowerINTRINSIC_W_CHAIN(Op, DAG);
+ case ISD::INTRINSIC_VOID:
+ return LowerINTRINSIC_VOID(Op, DAG);
case ISD::BSWAP:
case ISD::BITREVERSE: {
// Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
@@ -2479,6 +2855,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
+ return lowerFP_TO_INT_SAT(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -2489,13 +2868,29 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
- return lowerVectorMaskVECREDUCE(Op, DAG);
+ return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ false);
return lowerVECREDUCE(Op, DAG);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_SEQ_FADD:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
return lowerFPVECREDUCE(Op, DAG);
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_UMIN:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_FADD:
+ case ISD::VP_REDUCE_SEQ_FADD:
+ case ISD::VP_REDUCE_FMIN:
+ case ISD::VP_REDUCE_FMAX:
+ return lowerVPREDUCE(Op, DAG);
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
+ return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
+ return lowerVPREDUCE(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
@@ -2538,9 +2933,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFixedLengthVectorStoreToRVV(Op, DAG);
return Op;
case ISD::MLOAD:
- return lowerMLOAD(Op, DAG);
+ case ISD::VP_LOAD:
+ return lowerMaskedLoad(Op, DAG);
case ISD::MSTORE:
- return lowerMSTORE(Op, DAG);
+ case ISD::VP_STORE:
+ return lowerMaskedStore(Op, DAG);
case ISD::SETCC:
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
case ISD::ADD:
@@ -2617,14 +3014,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
case ISD::ABS:
return lowerABS(Op, DAG);
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
case ISD::VSELECT:
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
case ISD::MGATHER:
- return lowerMGATHER(Op, DAG);
+ case ISD::VP_GATHER:
+ return lowerMaskedGather(Op, DAG);
case ISD::MSCATTER:
- return lowerMSCATTER(Op, DAG);
+ case ISD::VP_SCATTER:
+ return lowerMaskedScatter(Op, DAG);
case ISD::FLT_ROUNDS_:
return lowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
@@ -2932,7 +3334,7 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
- SDValue TargetCC = DAG.getTargetConstant(CCVal, DL, XLenVT);
+ SDValue TargetCC = DAG.getCondCode(CCVal);
SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
}
@@ -2941,7 +3343,7 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// (select condv, truev, falsev)
// -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
- SDValue SetNE = DAG.getTargetConstant(ISD::SETNE, DL, XLenVT);
+ SDValue SetNE = DAG.getCondCode(ISD::SETNE);
SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
@@ -3200,7 +3602,7 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
// Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
- DAG.getRegister(RISCV::X0, MVT::i64));
+ DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
@@ -3483,7 +3885,7 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
- if (!Subtarget.hasStdExtV())
+ if (!Subtarget.hasVInstructions())
return SDValue();
bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
@@ -3645,7 +4047,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_vslide1down_mask: {
// We need to special case these when the scalar is larger than XLen.
unsigned NumOps = Op.getNumOperands();
- bool IsMasked = NumOps == 6;
+ bool IsMasked = NumOps == 7;
unsigned OpOffset = IsMasked ? 1 : 0;
SDValue Scalar = Op.getOperand(2 + OpOffset);
if (Scalar.getValueType().bitsLE(XLenVT))
@@ -3670,7 +4072,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(1, DL, XLenVT));
// Double the VL since we halved SEW.
- SDValue VL = Op.getOperand(NumOps - 1);
+ SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
SDValue I32VL =
DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
@@ -3699,7 +4101,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return Vec;
// Apply mask after the operation.
- SDValue Mask = Op.getOperand(NumOps - 2);
+ SDValue Mask = Op.getOperand(NumOps - 3);
SDValue MaskedOff = Op.getOperand(1);
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
}
@@ -3710,9 +4112,113 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::riscv_masked_strided_load: {
+ SDLoc DL(Op);
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // If the mask is known to be all ones, optimize to an unmasked intrinsic;
+ // the selection of the masked intrinsics doesn't do this for us.
+ SDValue Mask = Op.getOperand(5);
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ MVT VT = Op->getSimpleValueType(0);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue PassThru = Op.getOperand(2);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
+ }
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+
+ SDValue IntID = DAG.getTargetConstant(
+ IsUnmasked ? Intrinsic::riscv_vlse : Intrinsic::riscv_vlse_mask, DL,
+ XLenVT);
+
+ auto *Load = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
+ if (!IsUnmasked)
+ Ops.push_back(PassThru);
+ Ops.push_back(Op.getOperand(3)); // Ptr
+ Ops.push_back(Op.getOperand(4)); // Stride
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+ if (!IsUnmasked) {
+ SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
+ Ops.push_back(Policy);
+ }
+
+ SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ Load->getMemoryVT(), Load->getMemOperand());
+ SDValue Chain = Result.getValue(1);
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+ return DAG.getMergeValues({Result, Chain}, DL);
+ }
+ }
+
return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
}
+SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::riscv_masked_strided_store: {
+ SDLoc DL(Op);
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // If the mask is known to be all ones, optimize to an unmasked intrinsic;
+ // the selection of the masked intrinsics doesn't do this for us.
+ SDValue Mask = Op.getOperand(5);
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ SDValue Val = Op.getOperand(2);
+ MVT VT = Val.getSimpleValueType();
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+
+ SDValue IntID = DAG.getTargetConstant(
+ IsUnmasked ? Intrinsic::riscv_vsse : Intrinsic::riscv_vsse_mask, DL,
+ XLenVT);
+
+ auto *Store = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<SDValue, 8> Ops{Store->getChain(), IntID};
+ Ops.push_back(Val);
+ Ops.push_back(Op.getOperand(3)); // Ptr
+ Ops.push_back(Op.getOperand(4)); // Stride
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, Store->getVTList(),
+ Ops, Store->getMemoryVT(),
+ Store->getMemOperand());
+ }
+ }
+
+ return SDValue();
+}
+
static MVT getLMUL1VT(MVT VT) {
assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
"Unexpected vector MVT");
@@ -3744,14 +4250,18 @@ static unsigned getRVVReductionOp(unsigned ISDOpcode) {
}
}
-SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
+ SelectionDAG &DAG,
+ bool IsVP) const {
SDLoc DL(Op);
- SDValue Vec = Op.getOperand(0);
+ SDValue Vec = Op.getOperand(IsVP ? 1 : 0);
MVT VecVT = Vec.getSimpleValueType();
assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
- Op.getOpcode() == ISD::VECREDUCE_XOR) &&
+ Op.getOpcode() == ISD::VECREDUCE_XOR ||
+ Op.getOpcode() == ISD::VP_REDUCE_AND ||
+ Op.getOpcode() == ISD::VP_REDUCE_OR ||
+ Op.getOpcode() == ISD::VP_REDUCE_XOR) &&
"Unexpected reduction lowering");
MVT XLenVT = Subtarget.getXLenVT();
@@ -3765,29 +4275,62 @@ SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
}
SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ if (IsVP) {
+ Mask = Op.getOperand(2);
+ VL = Op.getOperand(3);
+ } else {
+ std::tie(Mask, VL) =
+ getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ }
+
+ unsigned BaseOpc;
+ ISD::CondCode CC;
SDValue Zero = DAG.getConstant(0, DL, XLenVT);
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unhandled reduction");
case ISD::VECREDUCE_AND:
- // vpopc ~x == 0
- Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
- Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
- return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
+ case ISD::VP_REDUCE_AND: {
+ // vcpop ~x == 0
+ SDValue TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
+ Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, TrueMask, VL);
+ Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
+ CC = ISD::SETEQ;
+ BaseOpc = ISD::AND;
+ break;
+ }
case ISD::VECREDUCE_OR:
- // vpopc x != 0
- Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
- return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
- case ISD::VECREDUCE_XOR: {
- // ((vpopc x) & 1) != 0
+ case ISD::VP_REDUCE_OR:
+ // vcpop x != 0
+ Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
+ CC = ISD::SETNE;
+ BaseOpc = ISD::OR;
+ break;
+ case ISD::VECREDUCE_XOR:
+ case ISD::VP_REDUCE_XOR: {
+ // ((vcpop x) & 1) != 0
SDValue One = DAG.getConstant(1, DL, XLenVT);
- Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
+ Vec = DAG.getNode(RISCVISD::VCPOP_VL, DL, XLenVT, Vec, Mask, VL);
Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
- return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
+ CC = ISD::SETNE;
+ BaseOpc = ISD::XOR;
+ break;
}
}
+
+ SDValue SetCC = DAG.getSetCC(DL, XLenVT, Vec, Zero, CC);
+
+ if (!IsVP)
+ return SetCC;
+
+ // Now include the start value in the operation.
+ // Note that we must return the start value when no elements are operated
+ // upon. The vcpop instructions we've emitted in each case above will return
+ // 0 for an inactive vector, and so we've already received the neutral value:
+ // AND gives us (0 == 0) -> 1 and OR/XOR give us (0 != 0) -> 0. Therefore we
+ // can simply include the start value.
+ return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
}
SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
@@ -3833,8 +4376,8 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
SDValue NeutralElem =
DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
- SDValue Reduction =
- DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
+ SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
+ IdentitySplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
@@ -3892,12 +4435,83 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
// FIXME: This is a VLMAX splat which might be too large and can prevent
// vsetvli removal.
SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
- SDValue Reduction =
- DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
+ SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
+ VectorVal, ScalarSplat, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
DAG.getConstant(0, DL, Subtarget.getXLenVT()));
}
+static unsigned getRVVVPReductionOp(unsigned ISDOpcode) {
+ switch (ISDOpcode) {
+ default:
+ llvm_unreachable("Unhandled reduction");
+ case ISD::VP_REDUCE_ADD:
+ return RISCVISD::VECREDUCE_ADD_VL;
+ case ISD::VP_REDUCE_UMAX:
+ return RISCVISD::VECREDUCE_UMAX_VL;
+ case ISD::VP_REDUCE_SMAX:
+ return RISCVISD::VECREDUCE_SMAX_VL;
+ case ISD::VP_REDUCE_UMIN:
+ return RISCVISD::VECREDUCE_UMIN_VL;
+ case ISD::VP_REDUCE_SMIN:
+ return RISCVISD::VECREDUCE_SMIN_VL;
+ case ISD::VP_REDUCE_AND:
+ return RISCVISD::VECREDUCE_AND_VL;
+ case ISD::VP_REDUCE_OR:
+ return RISCVISD::VECREDUCE_OR_VL;
+ case ISD::VP_REDUCE_XOR:
+ return RISCVISD::VECREDUCE_XOR_VL;
+ case ISD::VP_REDUCE_FADD:
+ return RISCVISD::VECREDUCE_FADD_VL;
+ case ISD::VP_REDUCE_SEQ_FADD:
+ return RISCVISD::VECREDUCE_SEQ_FADD_VL;
+ case ISD::VP_REDUCE_FMAX:
+ return RISCVISD::VECREDUCE_FMAX_VL;
+ case ISD::VP_REDUCE_FMIN:
+ return RISCVISD::VECREDUCE_FMIN_VL;
+ }
+}
+
+SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Vec = Op.getOperand(1);
+ EVT VecEVT = Vec.getValueType();
+
+ // TODO: The type may need to be widened rather than split. Or widened before
+ // it can be split.
+ if (!isTypeLegal(VecEVT))
+ return SDValue();
+
+ MVT VecVT = VecEVT.getSimpleVT();
+ MVT VecEltVT = VecVT.getVectorElementType();
+ unsigned RVVOpcode = getRVVVPReductionOp(Op.getOpcode());
+
+ MVT ContainerVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VecVT);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
+ SDValue VL = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(2);
+
+ MVT M1VT = getLMUL1VT(ContainerVT);
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
+
+ // FIXME: This is a VLMAX splat which might be too large and can prevent
+ // vsetvli removal.
+ SDValue StartSplat = DAG.getSplatVector(M1VT, DL, Op.getOperand(0));
+ SDValue Reduction =
+ DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
+ SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ if (!VecVT.isInteger())
+ return Elt0;
+ return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
+}
+
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
@@ -4338,36 +4952,63 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
Store->getMemoryVT(), Store->getMemOperand());
}
-SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
- auto *Load = cast<MaskedLoadSDNode>(Op);
-
+SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VT = Op.getSimpleValueType();
- MVT XLenVT = Subtarget.getXLenVT();
- SDValue Mask = Load->getMask();
- SDValue PassThru = Load->getPassThru();
- SDValue VL;
+ const auto *MemSD = cast<MemSDNode>(Op);
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+
+ SDValue Mask, PassThru, VL;
+ if (const auto *VPLoad = dyn_cast<VPLoadSDNode>(Op)) {
+ Mask = VPLoad->getMask();
+ PassThru = DAG.getUNDEF(VT);
+ VL = VPLoad->getVectorLength();
+ } else {
+ const auto *MLoad = cast<MaskedLoadSDNode>(Op);
+ Mask = MLoad->getMask();
+ PassThru = MLoad->getPassThru();
+ }
+
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
+ MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
-
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+
+ unsigned IntID =
+ IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
+ if (!IsUnmasked)
+ Ops.push_back(PassThru);
+ Ops.push_back(BasePtr);
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+ if (!IsUnmasked)
+ Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
- SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
- SDValue Ops[] = {Load->getChain(), IntID, PassThru,
- Load->getBasePtr(), Mask, VL};
+
SDValue Result =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
- Load->getMemoryVT(), Load->getMemOperand());
- SDValue Chain = Result.getValue(1);
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
+ Chain = Result.getValue(1);
if (VT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
@@ -4375,32 +5016,58 @@ SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Result, Chain}, DL);
}
-SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const {
- auto *Store = cast<MaskedStoreSDNode>(Op);
-
+SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
- SDValue Val = Store->getValue();
- SDValue Mask = Store->getMask();
+
+ const auto *MemSD = cast<MemSDNode>(Op);
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+ SDValue Val, Mask, VL;
+
+ if (const auto *VPStore = dyn_cast<VPStoreSDNode>(Op)) {
+ Val = VPStore->getValue();
+ Mask = VPStore->getMask();
+ VL = VPStore->getVectorLength();
+ } else {
+ const auto *MStore = cast<MaskedStoreSDNode>(Op);
+ Val = MStore->getValue();
+ Mask = MStore->getMask();
+ }
+
+ bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
+
MVT VT = Val.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
- SDValue VL;
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
- Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!IsUnmasked) {
+ MVT MaskVT =
+ MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
- SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT);
- return DAG.getMemIntrinsicNode(
- ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
- {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL},
- Store->getMemoryVT(), Store->getMemOperand());
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+
+ unsigned IntID =
+ IsUnmasked ? Intrinsic::riscv_vse : Intrinsic::riscv_vse_mask;
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
+ Ops.push_back(Val);
+ Ops.push_back(BasePtr);
+ if (!IsUnmasked)
+ Ops.push_back(Mask);
+ Ops.push_back(VL);
+
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
+ DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
SDValue
@@ -4596,36 +5263,57 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
}
-// Custom lower MGATHER to a legalized form for RVV. It will then be matched to
-// a RVV indexed load. The RVV indexed load instructions only support the
-// "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
-// truncated to XLEN and are treated as byte offsets. Any signed or scaled
-// indexing is extended to the XLEN value type and scaled accordingly.
-SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
- auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
+// Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be
+// matched to a RVV indexed load. The RVV indexed load instructions only
+// support the "unsigned unscaled" addressing mode; indices are implicitly
+// zero-extended or truncated to XLEN and are treated as byte offsets. Any
+// signed or scaled indexing is extended to the XLEN value type and scaled
+// accordingly.
+SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
- SDValue Index = MGN->getIndex();
- SDValue Mask = MGN->getMask();
- SDValue PassThru = MGN->getPassThru();
+ const auto *MemSD = cast<MemSDNode>(Op.getNode());
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+
+ ISD::LoadExtType LoadExtType;
+ SDValue Index, Mask, PassThru, VL;
+
+ if (auto *VPGN = dyn_cast<VPGatherSDNode>(Op.getNode())) {
+ Index = VPGN->getIndex();
+ Mask = VPGN->getMask();
+ PassThru = DAG.getUNDEF(VT);
+ VL = VPGN->getVectorLength();
+ // VP doesn't support extending loads.
+ LoadExtType = ISD::NON_EXTLOAD;
+ } else {
+ // Else it must be a MGATHER.
+ auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
+ Index = MGN->getIndex();
+ Mask = MGN->getMask();
+ PassThru = MGN->getPassThru();
+ LoadExtType = MGN->getExtensionType();
+ }
- MVT VT = Op.getSimpleValueType();
MVT IndexVT = Index.getSimpleValueType();
MVT XLenVT = Subtarget.getXLenVT();
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Unexpected VTs!");
- assert(MGN->getBasePtr().getSimpleValueType() == XLenVT &&
- "Unexpected pointer type");
+ assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
// Targets have to explicitly opt-in for extending vector loads.
- assert(MGN->getExtensionType() == ISD::NON_EXTLOAD &&
- "Unexpected extending MGATHER");
+ assert(LoadExtType == ISD::NON_EXTLOAD &&
+ "Unexpected extending MGATHER/VP_GATHER");
+ (void)LoadExtType;
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
- SDValue VL;
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
// We need to use the larger of the result and index type to determine the
@@ -4648,28 +5336,28 @@ SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
+ }
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
- SmallVector<SDValue, 8> Ops{MGN->getChain(),
- DAG.getTargetConstant(IntID, DL, XLenVT)};
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
if (!IsUnmasked)
Ops.push_back(PassThru);
- Ops.push_back(MGN->getBasePtr());
+ Ops.push_back(BasePtr);
Ops.push_back(Index);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
+ if (!IsUnmasked)
+ Ops.push_back(DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT));
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
SDValue Result =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
- MGN->getMemoryVT(), MGN->getMemOperand());
- SDValue Chain = Result.getValue(1);
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO);
+ Chain = Result.getValue(1);
if (VT.isFixedLengthVector())
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
@@ -4677,18 +5365,39 @@ SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
return DAG.getMergeValues({Result, Chain}, DL);
}
-// Custom lower MSCATTER to a legalized form for RVV. It will then be matched to
-// a RVV indexed store. The RVV indexed store instructions only support the
-// "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
-// truncated to XLEN and are treated as byte offsets. Any signed or scaled
-// indexing is extended to the XLEN value type and scaled accordingly.
-SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
- SelectionDAG &DAG) const {
- auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
+// Custom lower MSCATTER/VP_SCATTER to a legalized form for RVV. It will then be
+// matched to a RVV indexed store. The RVV indexed store instructions only
+// support the "unsigned unscaled" addressing mode; indices are implicitly
+// zero-extended or truncated to XLEN and are treated as byte offsets. Any
+// signed or scaled indexing is extended to the XLEN value type and scaled
+// accordingly.
+SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
- SDValue Index = MSN->getIndex();
- SDValue Mask = MSN->getMask();
- SDValue Val = MSN->getValue();
+ const auto *MemSD = cast<MemSDNode>(Op.getNode());
+ EVT MemVT = MemSD->getMemoryVT();
+ MachineMemOperand *MMO = MemSD->getMemOperand();
+ SDValue Chain = MemSD->getChain();
+ SDValue BasePtr = MemSD->getBasePtr();
+
+ bool IsTruncatingStore = false;
+ SDValue Index, Mask, Val, VL;
+
+ if (auto *VPSN = dyn_cast<VPScatterSDNode>(Op.getNode())) {
+ Index = VPSN->getIndex();
+ Mask = VPSN->getMask();
+ Val = VPSN->getValue();
+ VL = VPSN->getVectorLength();
+ // VP doesn't support truncating stores.
+ IsTruncatingStore = false;
+ } else {
+ // Else it must be a MSCATTER.
+ auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
+ Index = MSN->getIndex();
+ Mask = MSN->getMask();
+ Val = MSN->getValue();
+ IsTruncatingStore = MSN->isTruncatingStore();
+ }
MVT VT = Val.getSimpleValueType();
MVT IndexVT = Index.getSimpleValueType();
@@ -4696,21 +5405,20 @@ SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
"Unexpected VTs!");
- assert(MSN->getBasePtr().getSimpleValueType() == XLenVT &&
- "Unexpected pointer type");
+ assert(BasePtr.getSimpleValueType() == XLenVT && "Unexpected pointer type");
// Targets have to explicitly opt-in for extending vector loads and
// truncating vector stores.
- assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER");
+ assert(!IsTruncatingStore && "Unexpected truncating MSCATTER/VP_SCATTER");
+ (void)IsTruncatingStore;
// If the mask is known to be all ones, optimize to an unmasked intrinsic;
// the selection of the masked intrinsics doesn't do this for us.
bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
- SDValue VL;
+ MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
// We need to use the larger of the value and index type to determine the
// scalable type to use so we don't increase LMUL for any operand/result.
- MVT ContainerVT;
if (VT.bitsGE(IndexVT)) {
ContainerVT = getContainerForFixedLengthVector(VT);
IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
@@ -4729,24 +5437,23 @@ SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
+ }
- VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
- } else
- VL = DAG.getRegister(RISCV::X0, XLenVT);
+ if (!VL)
+ VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
unsigned IntID =
IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
- SmallVector<SDValue, 8> Ops{MSN->getChain(),
- DAG.getTargetConstant(IntID, DL, XLenVT)};
+ SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
Ops.push_back(Val);
- Ops.push_back(MSN->getBasePtr());
+ Ops.push_back(BasePtr);
Ops.push_back(Index);
if (!IsUnmasked)
Ops.push_back(Mask);
Ops.push_back(VL);
- return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops,
- MSN->getMemoryVT(), MSN->getMemOperand());
+ return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL,
+ DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
}
SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
@@ -4754,7 +5461,7 @@ SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
const MVT XLenVT = Subtarget.getXLenVT();
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
- SDValue SysRegNo = DAG.getConstant(
+ SDValue SysRegNo = DAG.getTargetConstant(
RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
@@ -4786,7 +5493,7 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
SDLoc DL(Op);
SDValue Chain = Op->getOperand(0);
SDValue RMValue = Op->getOperand(1);
- SDValue SysRegNo = DAG.getConstant(
+ SDValue SysRegNo = DAG.getTargetConstant(
RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
// Encoding used for rounding mode in RISCV differs from that used in
@@ -4891,7 +5598,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
if (!isTypeLegal(Op0.getValueType()))
return;
- unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+ unsigned Opc =
+ IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64;
SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -4973,8 +5681,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SUB:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- if (N->getOperand(1).getOpcode() == ISD::Constant)
- return;
Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
break;
case ISD::SHL:
@@ -4982,9 +5688,26 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SRL:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- if (N->getOperand(1).getOpcode() == ISD::Constant)
- return;
- Results.push_back(customLegalizeToWOp(N, DAG));
+ if (N->getOperand(1).getOpcode() != ISD::Constant) {
+ Results.push_back(customLegalizeToWOp(N, DAG));
+ break;
+ }
+
+ // Custom legalize ISD::SHL by placing a SIGN_EXTEND_INREG after. This is
+ // similar to customLegalizeToWOpWithSExt, but we must zero_extend the
+ // shift amount.
+ if (N->getOpcode() == ISD::SHL) {
+ SDLoc DL(N);
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewWOp = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0, NewOp1);
+ SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
+ DAG.getValueType(MVT::i32));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ }
+
break;
case ISD::ROTL:
case ISD::ROTR:
@@ -5098,10 +5821,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// scalar types in order to improve codegen. Bitcast the vector to a
// one-element vector type whose element type is the same as the result
// type, and extract the first element.
- LLVMContext &Context = *DAG.getContext();
- SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
- Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
- DAG.getConstant(0, DL, XLenVT)));
+ EVT BVT = EVT::getVectorVT(*DAG.getContext(), VT, 1);
+ if (isTypeLegal(BVT)) {
+ SDValue BVec = DAG.getBitcast(BVT, Op0);
+ Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
+ DAG.getConstant(0, DL, XLenVT)));
+ }
}
break;
}
@@ -5211,7 +5936,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
MVT XLenVT = Subtarget.getXLenVT();
// Use a VL of 1 to avoid processing more elements than we need.
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue VL = DAG.getConstant(1, DL, XLenVT);
SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
@@ -5354,6 +6079,17 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
Results.push_back(V);
break;
+ case ISD::VP_REDUCE_ADD:
+ case ISD::VP_REDUCE_AND:
+ case ISD::VP_REDUCE_OR:
+ case ISD::VP_REDUCE_XOR:
+ case ISD::VP_REDUCE_SMAX:
+ case ISD::VP_REDUCE_UMAX:
+ case ISD::VP_REDUCE_SMIN:
+ case ISD::VP_REDUCE_UMIN:
+ if (SDValue V = lowerVPREDUCE(SDValue(N, 0), DAG))
+ Results.push_back(V);
+ break;
case ISD::FLT_ROUNDS_: {
SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
@@ -5656,6 +6392,52 @@ static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(Match1->ShAmt, DL, VT));
}
+// Optimize (add (shl x, c0), (shl y, c1)) ->
+// (SLLI (SH*ADD x, y), c0), if c1-c0 equals to [1|2|3].
+static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ // Perform this optimization only in the zba extension.
+ if (!Subtarget.hasStdExtZba())
+ return SDValue();
+
+ // Skip for vector types and larger types.
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
+ return SDValue();
+
+ // The two operand nodes must be SHL and have no other use.
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0->getOpcode() != ISD::SHL || N1->getOpcode() != ISD::SHL ||
+ !N0->hasOneUse() || !N1->hasOneUse())
+ return SDValue();
+
+ // Check c0 and c1.
+ auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ auto *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ if (!N0C || !N1C)
+ return SDValue();
+ int64_t C0 = N0C->getSExtValue();
+ int64_t C1 = N1C->getSExtValue();
+ if (C0 <= 0 || C1 <= 0)
+ return SDValue();
+
+ // Skip if SH1ADD/SH2ADD/SH3ADD are not applicable.
+ int64_t Bits = std::min(C0, C1);
+ int64_t Diff = std::abs(C0 - C1);
+ if (Diff != 1 && Diff != 2 && Diff != 3)
+ return SDValue();
+
+ // Build nodes.
+ SDLoc DL(N);
+ SDValue NS = (C0 < C1) ? N0->getOperand(0) : N1->getOperand(0);
+ SDValue NL = (C0 > C1) ? N0->getOperand(0) : N1->getOperand(0);
+ SDValue NA0 =
+ DAG.getNode(ISD::SHL, DL, VT, NL, DAG.getConstant(Diff, DL, VT));
+ SDValue NA1 = DAG.getNode(ISD::ADD, DL, VT, NA0, NS);
+ return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
+}
+
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
@@ -5691,17 +6473,27 @@ static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
// Combine a constant select operand into its use:
//
-// (and (select_cc lhs, rhs, cc, -1, c), x)
-// -> (select_cc lhs, rhs, cc, x, (and, x, c)) [AllOnes=1]
-// (or (select_cc lhs, rhs, cc, 0, c), x)
-// -> (select_cc lhs, rhs, cc, x, (or, x, c)) [AllOnes=0]
-// (xor (select_cc lhs, rhs, cc, 0, c), x)
-// -> (select_cc lhs, rhs, cc, x, (xor, x, c)) [AllOnes=0]
-static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
- SelectionDAG &DAG, bool AllOnes) {
+// (and (select cond, -1, c), x)
+// -> (select cond, x, (and x, c)) [AllOnes=1]
+// (or (select cond, 0, c), x)
+// -> (select cond, x, (or x, c)) [AllOnes=0]
+// (xor (select cond, 0, c), x)
+// -> (select cond, x, (xor x, c)) [AllOnes=0]
+// (add (select cond, 0, c), x)
+// -> (select cond, x, (add x, c)) [AllOnes=0]
+// (sub x, (select cond, 0, c))
+// -> (select cond, x, (sub x, c)) [AllOnes=0]
+static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+ SelectionDAG &DAG, bool AllOnes) {
EVT VT = N->getValueType(0);
- if (Slct.getOpcode() != RISCVISD::SELECT_CC || !Slct.hasOneUse())
+ // Skip vectors.
+ if (VT.isVector())
+ return SDValue();
+
+ if ((Slct.getOpcode() != ISD::SELECT &&
+ Slct.getOpcode() != RISCVISD::SELECT_CC) ||
+ !Slct.hasOneUse())
return SDValue();
auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
@@ -5709,8 +6501,9 @@ static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
};
bool SwapSelectOps;
- SDValue TrueVal = Slct.getOperand(3);
- SDValue FalseVal = Slct.getOperand(4);
+ unsigned OpOffset = Slct.getOpcode() == RISCVISD::SELECT_CC ? 2 : 0;
+ SDValue TrueVal = Slct.getOperand(1 + OpOffset);
+ SDValue FalseVal = Slct.getOperand(2 + OpOffset);
SDValue NonConstantVal;
if (isZeroOrAllOnes(TrueVal, AllOnes)) {
SwapSelectOps = false;
@@ -5724,40 +6517,120 @@ static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
// Slct is now know to be the desired identity constant when CC is true.
TrueVal = OtherOp;
FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
- // Unless SwapSelectOps says CC should be false.
+ // Unless SwapSelectOps says the condition should be false.
if (SwapSelectOps)
std::swap(TrueVal, FalseVal);
- return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
- {Slct.getOperand(0), Slct.getOperand(1),
- Slct.getOperand(2), TrueVal, FalseVal});
+ if (Slct.getOpcode() == RISCVISD::SELECT_CC)
+ return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
+ {Slct.getOperand(0), Slct.getOperand(1),
+ Slct.getOperand(2), TrueVal, FalseVal});
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
+ {Slct.getOperand(0), TrueVal, FalseVal});
}
// Attempt combineSelectAndUse on each operand of a commutative operator N.
-static SDValue combineSelectCCAndUseCommutative(SDNode *N, SelectionDAG &DAG,
- bool AllOnes) {
+static SDValue combineSelectAndUseCommutative(SDNode *N, SelectionDAG &DAG,
+ bool AllOnes) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- if (SDValue Result = combineSelectCCAndUse(N, N0, N1, DAG, AllOnes))
+ if (SDValue Result = combineSelectAndUse(N, N0, N1, DAG, AllOnes))
return Result;
- if (SDValue Result = combineSelectCCAndUse(N, N1, N0, DAG, AllOnes))
+ if (SDValue Result = combineSelectAndUse(N, N1, N0, DAG, AllOnes))
return Result;
return SDValue();
}
-static SDValue performANDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
+// Transform (add (mul x, c0), c1) ->
+// (add (mul (add x, c1/c0), c0), c1%c0).
+// if c1/c0 and c1%c0 are simm12, while c1 is not. A special corner case
+// that should be excluded is when c0*(c1/c0) is simm12, which will lead
+// to an infinite loop in DAGCombine if transformed.
+// Or transform (add (mul x, c0), c1) ->
+// (add (mul (add x, c1/c0+1), c0), c1%c0-c0),
+// if c1/c0+1 and c1%c0-c0 are simm12, while c1 is not. A special corner
+// case that should be excluded is when c0*(c1/c0+1) is simm12, which will
+// lead to an infinite loop in DAGCombine if transformed.
+// Or transform (add (mul x, c0), c1) ->
+// (add (mul (add x, c1/c0-1), c0), c1%c0+c0),
+// if c1/c0-1 and c1%c0+c0 are simm12, while c1 is not. A special corner
+// case that should be excluded is when c0*(c1/c0-1) is simm12, which will
+// lead to an infinite loop in DAGCombine if transformed.
+// Or transform (add (mul x, c0), c1) ->
+// (mul (add x, c1/c0), c0).
+// if c1%c0 is zero, and c1/c0 is simm12 while c1 is not.
+static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ // Skip for vector types and larger types.
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() || VT.getSizeInBits() > Subtarget.getXLen())
+ return SDValue();
+ // The first operand node must be a MUL and has no other use.
+ SDValue N0 = N->getOperand(0);
+ if (!N0->hasOneUse() || N0->getOpcode() != ISD::MUL)
+ return SDValue();
+ // Check if c0 and c1 match above conditions.
+ auto *N0C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!N0C || !N1C)
+ return SDValue();
+ int64_t C0 = N0C->getSExtValue();
+ int64_t C1 = N1C->getSExtValue();
+ int64_t CA, CB;
+ if (C0 == -1 || C0 == 0 || C0 == 1 || isInt<12>(C1))
+ return SDValue();
+ // Search for proper CA (non-zero) and CB that both are simm12.
+ if ((C1 / C0) != 0 && isInt<12>(C1 / C0) && isInt<12>(C1 % C0) &&
+ !isInt<12>(C0 * (C1 / C0))) {
+ CA = C1 / C0;
+ CB = C1 % C0;
+ } else if ((C1 / C0 + 1) != 0 && isInt<12>(C1 / C0 + 1) &&
+ isInt<12>(C1 % C0 - C0) && !isInt<12>(C0 * (C1 / C0 + 1))) {
+ CA = C1 / C0 + 1;
+ CB = C1 % C0 - C0;
+ } else if ((C1 / C0 - 1) != 0 && isInt<12>(C1 / C0 - 1) &&
+ isInt<12>(C1 % C0 + C0) && !isInt<12>(C0 * (C1 / C0 - 1))) {
+ CA = C1 / C0 - 1;
+ CB = C1 % C0 + C0;
+ } else
+ return SDValue();
+ // Build new nodes (add (mul (add x, c1/c0), c0), c1%c0).
+ SDLoc DL(N);
+ SDValue New0 = DAG.getNode(ISD::ADD, DL, VT, N0->getOperand(0),
+ DAG.getConstant(CA, DL, VT));
+ SDValue New1 =
+ DAG.getNode(ISD::MUL, DL, VT, New0, DAG.getConstant(C0, DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
+}
+
+static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
+ if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
+ return V;
+ if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
+ return V;
+ // fold (add (select lhs, rhs, cc, 0, y), x) ->
+ // (select lhs, rhs, cc, x, (add x, y))
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
+}
+
+static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
+ // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
+ // (select lhs, rhs, cc, x, (sub x, y))
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
+}
- // fold (and (select_cc lhs, rhs, cc, -1, y), x) ->
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
+ // fold (and (select lhs, rhs, cc, -1, y), x) ->
// (select lhs, rhs, cc, x, (and x, y))
- return combineSelectCCAndUseCommutative(N, DAG, true);
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
}
-static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
if (Subtarget.hasStdExtZbp()) {
if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
return GREV;
@@ -5767,19 +6640,15 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
return SHFL;
}
- // fold (or (select_cc lhs, rhs, cc, 0, y), x) ->
- // (select lhs, rhs, cc, x, (or x, y))
- return combineSelectCCAndUseCommutative(N, DAG, false);
+ // fold (or (select cond, 0, y), x) ->
+ // (select cond, x, (or x, y))
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
-static SDValue performXORCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const RISCVSubtarget &Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
-
- // fold (xor (select_cc lhs, rhs, cc, 0, y), x) ->
- // (select lhs, rhs, cc, x, (xor x, y))
- return combineSelectCCAndUseCommutative(N, DAG, false);
+static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
+ // fold (xor (select cond, 0, y), x) ->
+ // (select cond, x, (xor x, y))
+ return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
@@ -5814,6 +6683,13 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
break;
}
+ // Only handle cases where the result is used by a CopyToReg. That likely
+ // means the value is a liveout of the basic block. This helps prevent
+ // infinite combine loops like PR51206.
+ if (none_of(N->uses(),
+ [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
+ return SDValue();
+
SmallVector<SDNode *, 4> SetCCs;
for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
UE = Src.getNode()->use_end();
@@ -5859,10 +6735,105 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
return SDValue(N, 0);
}
+// Try to form VWMUL or VWMULU.
+// FIXME: Support VWMULSU.
+static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
+ SelectionDAG &DAG) {
+ assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
+ bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
+ bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
+ if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
+ return SDValue();
+
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ // Make sure the mask and VL match.
+ if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL)
+ return SDValue();
+
+ MVT VT = N->getSimpleValueType(0);
+
+ // Determine the narrow size for a widening multiply.
+ unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
+ MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
+ VT.getVectorElementCount());
+
+ SDLoc DL(N);
+
+ // See if the other operand is the same opcode.
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ if (!Op1.hasOneUse())
+ return SDValue();
+
+ // Make sure the mask and VL match.
+ if (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
+ return SDValue();
+
+ Op1 = Op1.getOperand(0);
+ } else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
+ // The operand is a splat of a scalar.
+
+ // The VL must be the same.
+ if (Op1.getOperand(1) != VL)
+ return SDValue();
+
+ // Get the scalar value.
+ Op1 = Op1.getOperand(0);
+
+ // See if have enough sign bits or zero bits in the scalar to use a
+ // widening multiply by splatting to smaller element size.
+ unsigned EltBits = VT.getScalarSizeInBits();
+ unsigned ScalarBits = Op1.getValueSizeInBits();
+ // Make sure we're getting all element bits from the scalar register.
+ // FIXME: Support implicit sign extension of vmv.v.x?
+ if (ScalarBits < EltBits)
+ return SDValue();
+
+ if (IsSignExt) {
+ if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
+ return SDValue();
+ } else {
+ APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
+ if (!DAG.MaskedValueIsZero(Op1, Mask))
+ return SDValue();
+ }
+
+ Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
+ } else
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+
+ // Re-introduce narrower extends if needed.
+ unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
+ if (Op0.getValueType() != NarrowVT)
+ Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ if (Op1.getValueType() != NarrowVT)
+ Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
+
+ unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
+ return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
+ // Helper to call SimplifyDemandedBits on an operand of N where only some low
+ // bits are demanded. N will be added to the Worklist if it was not deleted.
+ // Caller should return SDValue(N, 0) if this returns true.
+ auto SimplifyDemandedLowBitsHelper = [&](unsigned OpNo, unsigned LowBits) {
+ SDValue Op = N->getOperand(OpNo);
+ APInt Mask = APInt::getLowBitsSet(Op.getValueSizeInBits(), LowBits);
+ if (!SimplifyDemandedBits(Op, Mask, DCI))
+ return false;
+
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ DCI.AddToWorklist(N);
+ return true;
+ };
+
switch (N->getOpcode()) {
default:
break;
@@ -5914,147 +6885,101 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::ROLW:
case RISCVISD::RORW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
- if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
- SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
- SDValue Op0 = N->getOperand(0);
- APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
- if (SimplifyDemandedBits(Op0, Mask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::FSL:
case RISCVISD::FSR: {
// Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
- SDValue ShAmt = N->getOperand(2);
- unsigned BitWidth = ShAmt.getValueSizeInBits();
+ unsigned BitWidth = N->getOperand(2).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
- APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
- if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(2, Log2_32(BitWidth) + 1))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::FSLW:
case RISCVISD::FSRW: {
// Only the lower 32 bits of Values and lower 6 bits of shift amount are
// read.
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- SDValue ShAmt = N->getOperand(2);
- APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
- APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
- if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
- SimplifyDemandedBits(Op1, OpMask, DCI) ||
- SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 32) ||
+ SimplifyDemandedLowBitsHelper(2, 6))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::GREV:
case RISCVISD::GORC: {
// Only the lower log2(Bitwidth) bits of the the shift amount are read.
- SDValue ShAmt = N->getOperand(1);
- unsigned BitWidth = ShAmt.getValueSizeInBits();
+ unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
- APInt ShAmtMask(BitWidth, BitWidth - 1);
- if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth)))
return SDValue(N, 0);
- }
return combineGREVI_GORCI(N, DCI.DAG);
}
case RISCVISD::GREVW:
case RISCVISD::GORCW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
- if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
- SimplifyDemandedBits(RHS, RHSMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
- }
return combineGREVI_GORCI(N, DCI.DAG);
}
case RISCVISD::SHFL:
case RISCVISD::UNSHFL: {
- // Only the lower log2(Bitwidth) bits of the the shift amount are read.
- SDValue ShAmt = N->getOperand(1);
- unsigned BitWidth = ShAmt.getValueSizeInBits();
+ // Only the lower log2(Bitwidth)-1 bits of the the shift amount are read.
+ unsigned BitWidth = N->getOperand(1).getValueSizeInBits();
assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
- APInt ShAmtMask(BitWidth, (BitWidth / 2) - 1);
- if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) - 1))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::SHFLW:
case RISCVISD::UNSHFLW: {
- // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
+ // Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
- if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
- SimplifyDemandedBits(RHS, RHSMask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 4))
return SDValue(N, 0);
- }
break;
}
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW: {
// Only the lower 32 bits of LHS and RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt Mask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- if (SimplifyDemandedBits(LHS, Mask, DCI) ||
- SimplifyDemandedBits(RHS, Mask, DCI)) {
- if (N->getOpcode() != ISD::DELETED_NODE)
- DCI.AddToWorklist(N);
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 32))
return SDValue(N, 0);
- }
break;
}
+ case RISCVISD::FMV_X_ANYEXTH:
case RISCVISD::FMV_X_ANYEXTW_RV64: {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
+ MVT VT = N->getSimpleValueType(0);
// If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
- // conversion is unnecessary and can be replaced with an ANY_EXTEND
- // of the FMV_W_X_RV64 operand.
- if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
- assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
+ // conversion is unnecessary and can be replaced with the FMV_W_X_RV64
+ // operand. Similar for FMV_X_ANYEXTH and FMV_H_X.
+ if ((N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 &&
+ Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) ||
+ (N->getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
+ Op0->getOpcode() == RISCVISD::FMV_H_X)) {
+ assert(Op0.getOperand(0).getValueType() == VT &&
"Unexpected value type!");
return Op0.getOperand(0);
}
@@ -6066,23 +6991,27 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
!Op0.getNode()->hasOneUse())
break;
- SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
- Op0.getOperand(0));
- APInt SignBit = APInt::getSignMask(32).sext(64);
+ SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
+ unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
+ APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
if (Op0.getOpcode() == ISD::FNEG)
- return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
- DAG.getConstant(SignBit, DL, MVT::i64));
+ return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
+ DAG.getConstant(SignBit, DL, VT));
assert(Op0.getOpcode() == ISD::FABS);
- return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
- DAG.getConstant(~SignBit, DL, MVT::i64));
+ return DAG.getNode(ISD::AND, DL, VT, NewFMV,
+ DAG.getConstant(~SignBit, DL, VT));
}
+ case ISD::ADD:
+ return performADDCombine(N, DAG, Subtarget);
+ case ISD::SUB:
+ return performSUBCombine(N, DAG);
case ISD::AND:
- return performANDCombine(N, DCI, Subtarget);
+ return performANDCombine(N, DAG);
case ISD::OR:
- return performORCombine(N, DCI, Subtarget);
+ return performORCombine(N, DAG, Subtarget);
case ISD::XOR:
- return performXORCombine(N, DCI, Subtarget);
+ return performXORCombine(N, DAG);
case ISD::ANY_EXTEND:
return performANY_EXTENDCombine(N, DCI, Subtarget);
case ISD::ZERO_EXTEND:
@@ -6099,7 +7028,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Transform
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
+ SDValue TrueV = N->getOperand(3);
+ SDValue FalseV = N->getOperand(4);
+
+ // If the True and False values are the same, we don't need a select_cc.
+ if (TrueV == FalseV)
+ return TrueV;
+
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
if (!ISD::isIntEqualitySetCC(CCVal))
break;
@@ -6120,11 +7056,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
LHS = LHS.getOperand(0);
translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
- SDValue TargetCC =
- DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
- return DAG.getNode(
- RISCVISD::SELECT_CC, DL, N->getValueType(0),
- {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
+ SDValue TargetCC = DAG.getCondCode(CCVal);
+ return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
+ {LHS, RHS, TargetCC, TrueV, FalseV});
}
// Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
@@ -6132,8 +7066,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
{LHS.getOperand(0), LHS.getOperand(1),
- N->getOperand(2), N->getOperand(3),
- N->getOperand(4)});
+ N->getOperand(2), TrueV, FalseV});
// (select_cc X, 1, setne, trueV, falseV) ->
// (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
// This can occur when legalizing some floating point comparisons.
@@ -6141,12 +7074,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
SDLoc DL(N);
CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
- SDValue TargetCC =
- DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
+ SDValue TargetCC = DAG.getCondCode(CCVal);
RHS = DAG.getConstant(0, DL, LHS.getValueType());
- return DAG.getNode(
- RISCVISD::SELECT_CC, DL, N->getValueType(0),
- {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
+ return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
+ {LHS, RHS, TargetCC, TrueV, FalseV});
}
break;
@@ -6227,18 +7158,33 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
}
case ISD::MGATHER:
- case ISD::MSCATTER: {
+ case ISD::MSCATTER:
+ case ISD::VP_GATHER:
+ case ISD::VP_SCATTER: {
if (!DCI.isBeforeLegalize())
break;
- MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N);
- SDValue Index = MGSN->getIndex();
+ SDValue Index, ScaleOp;
+ bool IsIndexScaled = false;
+ bool IsIndexSigned = false;
+ if (const auto *VPGSN = dyn_cast<VPGatherScatterSDNode>(N)) {
+ Index = VPGSN->getIndex();
+ ScaleOp = VPGSN->getScale();
+ IsIndexScaled = VPGSN->isIndexScaled();
+ IsIndexSigned = VPGSN->isIndexSigned();
+ } else {
+ const auto *MGSN = cast<MaskedGatherScatterSDNode>(N);
+ Index = MGSN->getIndex();
+ ScaleOp = MGSN->getScale();
+ IsIndexScaled = MGSN->isIndexScaled();
+ IsIndexSigned = MGSN->isIndexSigned();
+ }
EVT IndexVT = Index.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
// RISCV indexed loads only support the "unsigned unscaled" addressing
// mode, so anything else must be manually legalized.
- bool NeedsIdxLegalization = MGSN->isIndexScaled() ||
- (MGSN->isIndexSigned() &&
- IndexVT.getVectorElementType().bitsLT(XLenVT));
+ bool NeedsIdxLegalization =
+ IsIndexScaled ||
+ (IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
if (!NeedsIdxLegalization)
break;
@@ -6247,36 +7193,48 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// Any index legalization should first promote to XLenVT, so we don't lose
// bits when scaling. This may create an illegal index type so we let
// LLVM's legalization take care of the splitting.
+ // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
IndexVT = IndexVT.changeVectorElementType(XLenVT);
- Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND,
+ Index = DAG.getNode(IsIndexSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
DL, IndexVT, Index);
}
- unsigned Scale = N->getConstantOperandVal(5);
- if (MGSN->isIndexScaled() && Scale != 1) {
+ unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
+ if (IsIndexScaled && Scale != 1) {
// Manually scale the indices by the element size.
// TODO: Sanitize the scale operand here?
+ // TODO: For VP nodes, should we use VP_SHL here?
assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
}
ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
- if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) {
+ if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
+ return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
+ {VPGN->getChain(), VPGN->getBasePtr(), Index,
+ VPGN->getScale(), VPGN->getMask(),
+ VPGN->getVectorLength()},
+ VPGN->getMemOperand(), NewIndexTy);
+ if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
+ return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
+ {VPSN->getChain(), VPSN->getValue(),
+ VPSN->getBasePtr(), Index, VPSN->getScale(),
+ VPSN->getMask(), VPSN->getVectorLength()},
+ VPSN->getMemOperand(), NewIndexTy);
+ if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
return DAG.getMaskedGather(
- N->getVTList(), MGSN->getMemoryVT(), DL,
- {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(),
- MGSN->getBasePtr(), Index, MGN->getScale()},
+ N->getVTList(), MGN->getMemoryVT(), DL,
+ {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
+ MGN->getBasePtr(), Index, MGN->getScale()},
MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
- }
const auto *MSN = cast<MaskedScatterSDNode>(N);
return DAG.getMaskedScatter(
- N->getVTList(), MGSN->getMemoryVT(), DL,
- {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(),
- Index, MGSN->getScale()},
- MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
+ N->getVTList(), MSN->getMemoryVT(), DL,
+ {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
+ Index, MSN->getScale()},
+ MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
}
case RISCVISD::SRA_VL:
case RISCVISD::SRL_VL:
@@ -6309,45 +7267,37 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::MUL_VL: {
- // Try to form VWMUL or VWMULU.
- // FIXME: Look for splat of extended scalar as well.
- // FIXME: Support VWMULSU.
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
- bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
- bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
- if ((!IsSignExt && !IsZeroExt) || Op0.getOpcode() != Op1.getOpcode())
- return SDValue();
-
- // Make sure the extends have a single use.
- if (!Op0.hasOneUse() || !Op1.hasOneUse())
- return SDValue();
-
- SDValue Mask = N->getOperand(2);
- SDValue VL = N->getOperand(3);
- if (Op0.getOperand(1) != Mask || Op1.getOperand(1) != Mask ||
- Op0.getOperand(2) != VL || Op1.getOperand(2) != VL)
- return SDValue();
-
- Op0 = Op0.getOperand(0);
- Op1 = Op1.getOperand(0);
-
- MVT VT = N->getSimpleValueType(0);
- MVT NarrowVT =
- MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() / 2),
- VT.getVectorElementCount());
-
- SDLoc DL(N);
-
- // Re-introduce narrower extends if needed.
- unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
- if (Op0.getValueType() != NarrowVT)
- Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
- if (Op1.getValueType() != NarrowVT)
- Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
+ if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG))
+ return V;
+ if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG))
+ return V;
+ return SDValue();
+ }
+ case ISD::STORE: {
+ auto *Store = cast<StoreSDNode>(N);
+ SDValue Val = Store->getValue();
+ // Combine store of vmv.x.s to vse with VL of 1.
+ // FIXME: Support FP.
+ if (Val.getOpcode() == RISCVISD::VMV_X_S) {
+ SDValue Src = Val.getOperand(0);
+ EVT VecVT = Src.getValueType();
+ EVT MemVT = Store->getMemoryVT();
+ // The memory VT and the element type must match.
+ if (VecVT.getVectorElementType() == MemVT) {
+ SDLoc DL(N);
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
+ return DAG.getStoreVP(Store->getChain(), DL, Src, Store->getBasePtr(),
+ DAG.getConstant(1, DL, MaskVT),
+ DAG.getConstant(1, DL, Subtarget.getXLenVT()),
+ Store->getPointerInfo(),
+ Store->getOriginalAlign(),
+ Store->getMemOperand()->getFlags());
+ }
+ }
- unsigned WMulOpc = IsSignExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
- return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
+ break;
}
}
@@ -6479,7 +7429,7 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
else
return false;
- // Sanity check that our new mask is a subset of the demanded mask.
+ // Check that our new mask is a subset of the demanded mask.
assert(IsLegalMask(NewMask));
return UseMask(NewMask);
}
@@ -6609,6 +7559,12 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
switch (Op.getOpcode()) {
default:
break;
+ case RISCVISD::SELECT_CC: {
+ unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth + 1);
+ if (Tmp == 1) return 1; // Early out.
+ unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
+ return std::min(Tmp, Tmp2);
+ }
case RISCVISD::SLLW:
case RISCVISD::SRAW:
case RISCVISD::SRLW:
@@ -6625,8 +7581,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::UNSHFLW:
case RISCVISD::BCOMPRESSW:
case RISCVISD::BDECOMPRESSW:
- case RISCVISD::FCVT_W_RV64:
- case RISCVISD::FCVT_WU_RV64:
+ case RISCVISD::FCVT_W_RTZ_RV64:
+ case RISCVISD::FCVT_WU_RTZ_RV64:
// TODO: As the result is sign-extended, this is conservatively correct. A
// more precise answer could be calculated for SRAW depending on known
// bits in the shift amount.
@@ -6803,7 +7759,8 @@ static bool isSelectPseudo(MachineInstr &MI) {
}
static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
- MachineBasicBlock *BB) {
+ MachineBasicBlock *BB,
+ const RISCVSubtarget &Subtarget) {
// To "insert" Select_* instructions, we actually have to insert the triangle
// control-flow pattern. The incoming instructions know the destination vreg
// to set, the condition code register to branch on, the true/false values to
@@ -6830,7 +7787,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
// related approach and more information.
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
- auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
+ auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
SmallVector<MachineInstr *, 4> SelectDebugValues;
SmallSet<Register, 4> SelectDests;
@@ -6863,7 +7820,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
}
}
- const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
+ const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
DebugLoc DL = MI.getDebugLoc();
MachineFunction::iterator I = ++BB->getIterator();
@@ -6892,9 +7849,7 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
HeadMBB->addSuccessor(TailMBB);
// Insert appropriate branch.
- unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
-
- BuildMI(HeadMBB, DL, TII.get(Opcode))
+ BuildMI(HeadMBB, DL, TII.getBrCond(CC))
.addReg(LHS)
.addReg(RHS)
.addMBB(TailMBB);
@@ -6939,7 +7894,7 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case RISCV::Select_FPR16_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
- return emitSelectPseudo(MI, BB);
+ return emitSelectPseudo(MI, BB, Subtarget);
case RISCV::BuildPairF64Pseudo:
return emitBuildPairF64Pseudo(MI, BB);
case RISCV::SplitF64Pseudo:
@@ -7258,7 +8213,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
}
assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
- (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
+ (TLI.getSubtarget().hasVInstructions() && ValVT.isVector())) &&
"Expected an XLenVT or vector types at this stage");
if (Reg) {
@@ -7294,7 +8249,7 @@ void RISCVTargetLowering::analyzeInputArgs(
FunctionType *FType = MF.getFunction().getFunctionType();
Optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasStdExtV())
+ if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Ins);
for (unsigned i = 0; i != NumArgs; ++i) {
@@ -7325,7 +8280,7 @@ void RISCVTargetLowering::analyzeOutputArgs(
unsigned NumArgs = Outs.size();
Optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasStdExtV())
+ if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
for (unsigned i = 0; i != NumArgs; i++) {
@@ -8170,7 +9125,7 @@ bool RISCVTargetLowering::CanLowerReturn(
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
Optional<unsigned> FirstMaskArgument;
- if (Subtarget.hasStdExtV())
+ if (Subtarget.hasVInstructions())
FirstMaskArgument = preAssignMask(Outs);
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
@@ -8339,8 +9294,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FMV_X_ANYEXTH)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
- NODE_NAME_CASE(FCVT_W_RV64)
- NODE_NAME_CASE(FCVT_WU_RV64)
+ NODE_NAME_CASE(FCVT_X_RTZ)
+ NODE_NAME_CASE(FCVT_XU_RTZ)
+ NODE_NAME_CASE(FCVT_W_RTZ_RV64)
+ NODE_NAME_CASE(FCVT_WU_RTZ_RV64)
NODE_NAME_CASE(READ_CYCLE_WIDE)
NODE_NAME_CASE(GREV)
NODE_NAME_CASE(GREVW)
@@ -8435,7 +9392,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VRGATHEREI16_VV_VL)
NODE_NAME_CASE(VSEXT_VL)
NODE_NAME_CASE(VZEXT_VL)
- NODE_NAME_CASE(VPOPC_VL)
+ NODE_NAME_CASE(VCPOP_VL)
NODE_NAME_CASE(VLE_VL)
NODE_NAME_CASE(VSE_VL)
NODE_NAME_CASE(READ_CSR)
@@ -8456,7 +9413,6 @@ RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
default:
break;
case 'f':
- case 'v':
return C_RegisterClass;
case 'I':
case 'J':
@@ -8467,6 +9423,9 @@ RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
case 'S': // A symbolic address
return C_Other;
}
+ } else {
+ if (Constraint == "vr" || Constraint == "vm")
+ return C_RegisterClass;
}
return TargetLowering::getConstraintType(Constraint);
}
@@ -8489,16 +9448,19 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasStdExtD() && VT == MVT::f64)
return std::make_pair(0U, &RISCV::FPR64RegClass);
break;
- case 'v':
- for (const auto *RC :
- {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
- &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
+ default:
+ break;
+ }
+ } else {
+ if (Constraint == "vr") {
+ for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass,
+ &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
return std::make_pair(0U, RC);
}
- break;
- default:
- break;
+ } else if (Constraint == "vm") {
+ if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
+ return std::make_pair(0U, &RISCV::VMRegClass);
}
}
@@ -8596,7 +9558,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- if (Subtarget.hasStdExtV()) {
+ if (Subtarget.hasVInstructions()) {
Register VReg = StringSwitch<Register>(Constraint.lower())
.Case("{v0}", RISCV::V0)
.Case("{v1}", RISCV::V1)
@@ -8934,6 +9896,11 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
return true;
+ // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
+ if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
+ ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
+ (Imm - 8).isPowerOf2()))
+ return true;
// Omit the following optimization if the sub target has the M extension
// and the data size >= XLen.
if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
@@ -8952,6 +9919,29 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
+bool RISCVTargetLowering::isMulAddWithConstProfitable(
+ const SDValue &AddNode, const SDValue &ConstNode) const {
+ // Let the DAGCombiner decide for vectors.
+ EVT VT = AddNode.getValueType();
+ if (VT.isVector())
+ return true;
+
+ // Let the DAGCombiner decide for larger types.
+ if (VT.getScalarSizeInBits() > Subtarget.getXLen())
+ return true;
+
+ // It is worse if c1 is simm12 while c1*c2 is not.
+ ConstantSDNode *C1Node = cast<ConstantSDNode>(AddNode.getOperand(1));
+ ConstantSDNode *C2Node = cast<ConstantSDNode>(ConstNode);
+ const APInt &C1 = C1Node->getAPIntValue();
+ const APInt &C2 = C2Node->getAPIntValue();
+ if (C1.isSignedIntN(12) && !(C1 * C2).isSignedIntN(12))
+ return false;
+
+ // Default to true and let the DAGCombiner decide.
+ return true;
+}
+
bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 0e71220da3b3..8e3d716ae919 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -84,10 +84,16 @@ enum NodeType : unsigned {
FMV_X_ANYEXTH,
FMV_W_X_RV64,
FMV_X_ANYEXTW_RV64,
+ // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
+ // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
+ // range inputs. These are used for FP_TO_S/UINT_SAT lowering.
+ FCVT_X_RTZ,
+ FCVT_XU_RTZ,
// FP to 32 bit int conversions for RV64. These are used to keep track of the
- // result being sign extended to 64 bit.
- FCVT_W_RV64,
- FCVT_WU_RV64,
+ // result being sign extended to 64 bit. These saturate out of range inputs.
+ // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering.
+ FCVT_W_RTZ_RV64,
+ FCVT_WU_RTZ_RV64,
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
// (returns (Lo, Hi)). It takes a chain operand.
READ_CYCLE_WIDE,
@@ -158,12 +164,13 @@ enum NodeType : unsigned {
VFNCVT_ROD_VL,
// These nodes match the semantics of the corresponding RVV vector reduction
// instructions. They produce a vector result which is the reduction
- // performed over the first vector operand plus the first element of the
- // second vector operand. The first operand is an unconstrained vector type,
- // and the result and second operand's types are expected to be the
- // corresponding full-width LMUL=1 type for the first operand:
- // nxv8i8 = vecreduce_add nxv32i8, nxv8i8
- // nxv2i32 = vecreduce_add nxv8i32, nxv2i32
+ // performed over the second vector operand plus the first element of the
+ // third vector operand. The first operand is the pass-thru operand. The
+ // second operand is an unconstrained vector type, and the result, first, and
+ // third operand's types are expected to be the corresponding full-width
+ // LMUL=1 type for the second operand:
+ // nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
+ // nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
// The different in types does introduce extra vsetvli instructions but
// similarly it reduces the number of registers consumed per reduction.
// Also has a mask and VL operand.
@@ -256,8 +263,8 @@ enum NodeType : unsigned {
VSEXT_VL,
VZEXT_VL,
- // vpopc.m with additional mask and VL operands.
- VPOPC_VL,
+ // vcpop.m with additional mask and VL operands.
+ VCPOP_VL,
// Reads value of CSR.
// The first operand is a chain pointer. The second specifies address of the
@@ -308,6 +315,9 @@ public:
bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ bool hasAndNot(SDValue Y) const override;
+ bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
@@ -455,6 +465,9 @@ public:
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
+ bool isMulAddWithConstProfitable(const SDValue &AddNode,
+ const SDValue &ConstNode) const override;
+
TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
@@ -499,6 +512,8 @@ public:
bool shouldRemoveExtendFromGSIndex(EVT VT) const override;
+ bool isLegalElementTypeForRVV(Type *ScalarTy) const;
+
private:
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling
@@ -547,20 +562,23 @@ private:
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerVectorMaskVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG,
+ bool IsVP) const;
SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerMSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
SelectionDAG &DAG) const;
- SDValue lowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
@@ -602,6 +620,14 @@ private:
/// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
/// this override can be removed.
bool mergeStoresAfterLegalization(EVT VT) const override;
+
+ /// Disable normalizing
+ /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
+ /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
+ /// RISCV doesn't have flags so it's better to perform the and/or in a GPR.
+ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
+ return false;
+ };
};
namespace RISCV {
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index fb7cb408cade..dbfc90f36f80 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -58,12 +58,13 @@ class VSETVLIInfo {
uint8_t TailAgnostic : 1;
uint8_t MaskAgnostic : 1;
uint8_t MaskRegOp : 1;
+ uint8_t StoreOp : 1;
uint8_t SEWLMULRatioOnly : 1;
public:
VSETVLIInfo()
: AVLImm(0), TailAgnostic(false), MaskAgnostic(false), MaskRegOp(false),
- SEWLMULRatioOnly(false) {}
+ StoreOp(false), SEWLMULRatioOnly(false) {}
static VSETVLIInfo getUnknown() {
VSETVLIInfo Info;
@@ -118,7 +119,8 @@ public:
TailAgnostic = RISCVVType::isTailAgnostic(VType);
MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
}
- void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO) {
+ void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA, bool MRO,
+ bool IsStore) {
assert(isValid() && !isUnknown() &&
"Can't set VTYPE for uninitialized or unknown");
VLMul = L;
@@ -126,6 +128,7 @@ public:
TailAgnostic = TA;
MaskAgnostic = MA;
MaskRegOp = MRO;
+ StoreOp = IsStore;
}
unsigned encodeVTYPE() const {
@@ -148,10 +151,7 @@ public:
Other.MaskAgnostic);
}
- // Convert VLMUL to a fixed point value with 3 bits of fraction.
- unsigned getSEWLMULRatio() const {
- assert(isValid() && !isUnknown() &&
- "Can't use VTYPE for uninitialized or unknown");
+ static unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
unsigned LMul;
bool Fractional;
std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(VLMul);
@@ -163,6 +163,12 @@ public:
return (SEW * 8) / LMul;
}
+ unsigned getSEWLMULRatio() const {
+ assert(isValid() && !isUnknown() &&
+ "Can't use VTYPE for uninitialized or unknown");
+ return getSEWLMULRatio(SEW, VLMul);
+ }
+
// Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
bool hasSameVLMAX(const VSETVLIInfo &Other) const {
assert(isValid() && Other.isValid() &&
@@ -172,10 +178,30 @@ public:
return getSEWLMULRatio() == Other.getSEWLMULRatio();
}
+ bool hasCompatibleVTYPE(const VSETVLIInfo &InstrInfo, bool Strict) const {
+ // Simple case, see if full VTYPE matches.
+ if (hasSameVTYPE(InstrInfo))
+ return true;
+
+ if (Strict)
+ return false;
+
+ // If this is a mask reg operation, it only cares about VLMAX.
+ // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
+ // than "InstrInfo".
+ // FIXME: The policy bits can probably be ignored for mask reg operations.
+ if (InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
+ TailAgnostic == InstrInfo.TailAgnostic &&
+ MaskAgnostic == InstrInfo.MaskAgnostic)
+ return true;
+
+ return false;
+ }
+
// Determine whether the vector instructions requirements represented by
// InstrInfo are compatible with the previous vsetvli instruction represented
// by this.
- bool isCompatible(const VSETVLIInfo &InstrInfo) const {
+ bool isCompatible(const VSETVLIInfo &InstrInfo, bool Strict) const {
assert(isValid() && InstrInfo.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!InstrInfo.SEWLMULRatioOnly &&
@@ -190,22 +216,52 @@ public:
// If the instruction doesn't need an AVLReg and the SEW matches, consider
// it compatible.
- if (InstrInfo.hasAVLReg() && InstrInfo.AVLReg == RISCV::NoRegister) {
+ if (!Strict && InstrInfo.hasAVLReg() &&
+ InstrInfo.AVLReg == RISCV::NoRegister) {
if (SEW == InstrInfo.SEW)
return true;
}
- // VTypes must match unless the instruction is a mask reg operation, then it
- // only care about VLMAX.
- // FIXME: Mask reg operations are probably ok if "this" VLMAX is larger
- // than "InstrInfo".
- if (!hasSameVTYPE(InstrInfo) &&
- !(InstrInfo.MaskRegOp && hasSameVLMAX(InstrInfo) &&
- TailAgnostic == InstrInfo.TailAgnostic &&
- MaskAgnostic == InstrInfo.MaskAgnostic))
+ // The AVL must match.
+ if (!hasSameAVL(InstrInfo))
return false;
- return hasSameAVL(InstrInfo);
+ if (hasCompatibleVTYPE(InstrInfo, Strict))
+ return true;
+
+ // Strict matches must ensure a full VTYPE match.
+ if (Strict)
+ return false;
+
+ // Store instructions don't use the policy fields.
+ // TODO: Move into hasCompatibleVTYPE?
+ if (InstrInfo.StoreOp && VLMul == InstrInfo.VLMul && SEW == InstrInfo.SEW)
+ return true;
+
+ // Anything else is not compatible.
+ return false;
+ }
+
+ bool isCompatibleWithLoadStoreEEW(unsigned EEW,
+ const VSETVLIInfo &InstrInfo) const {
+ assert(isValid() && InstrInfo.isValid() &&
+ "Can't compare invalid VSETVLIInfos");
+ assert(!InstrInfo.SEWLMULRatioOnly &&
+ "Expected a valid VTYPE for instruction!");
+ assert(EEW == InstrInfo.SEW && "Mismatched EEW/SEW for store");
+
+ if (isUnknown() || hasSEWLMULRatioOnly())
+ return false;
+
+ if (!hasSameAVL(InstrInfo))
+ return false;
+
+ // Stores can ignore the tail and mask policies.
+ if (!InstrInfo.StoreOp && (TailAgnostic != InstrInfo.TailAgnostic ||
+ MaskAgnostic != InstrInfo.MaskAgnostic))
+ return false;
+
+ return getSEWLMULRatio() == getSEWLMULRatio(EEW, InstrInfo.VLMul);
}
bool operator==(const VSETVLIInfo &Other) const {
@@ -278,7 +334,7 @@ public:
// If the change is compatible with the input, we won't create a VSETVLI
// and should keep the predecessor.
- if (isCompatible(Other))
+ if (isCompatible(Other, /*Strict*/ true))
return *this;
// Otherwise just use whatever is in this block.
@@ -362,14 +418,7 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
unsigned NumOperands = MI.getNumExplicitOperands();
-
- RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
-
- unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
- // A Log2SEW of 0 is an operation on mask registers only.
- bool MaskRegOp = Log2SEW == 0;
- unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
- assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+ bool HasPolicy = RISCVII::hasVecPolicyOp(TSFlags);
// Default to tail agnostic unless the destination is tied to a source.
// Unless the source is undef. In that case the user would have some control
@@ -377,8 +426,15 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
// despite having a tied def.
bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
bool TailAgnostic = true;
+ // If the instruction has policy argument, use the argument.
+ if (HasPolicy) {
+ const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
+ TailAgnostic = Op.getImm() & 0x1;
+ }
+
unsigned UseOpIdx;
- if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ if (!(ForceTailAgnostic || (HasPolicy && TailAgnostic)) &&
+ MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
TailAgnostic = false;
// If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
@@ -390,16 +446,38 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
}
}
+ // Remove the tail policy so we can find the SEW and VL.
+ if (HasPolicy)
+ --NumOperands;
+
+ RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
+
+ unsigned Log2SEW = MI.getOperand(NumOperands - 1).getImm();
+ // A Log2SEW of 0 is an operation on mask registers only.
+ bool MaskRegOp = Log2SEW == 0;
+ unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
+ assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
+
+ // If there are no explicit defs, this is a store instruction which can
+ // ignore the tail and mask policies.
+ bool StoreOp = MI.getNumExplicitDefs() == 0;
+
if (RISCVII::hasVLOp(TSFlags)) {
- const MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
- if (VLOp.isImm())
- InstrInfo.setAVLImm(VLOp.getImm());
- else
+ const MachineOperand &VLOp = MI.getOperand(NumOperands - 2);
+ if (VLOp.isImm()) {
+ int64_t Imm = VLOp.getImm();
+ // Conver the VLMax sentintel to X0 register.
+ if (Imm == RISCV::VLMaxSentinel)
+ InstrInfo.setAVLReg(RISCV::X0);
+ else
+ InstrInfo.setAVLImm(Imm);
+ } else {
InstrInfo.setAVLReg(VLOp.getReg());
+ }
} else
InstrInfo.setAVLReg(RISCV::NoRegister);
InstrInfo.setVTYPE(VLMul, SEW, /*TailAgnostic*/ TailAgnostic,
- /*MaskAgnostic*/ false, MaskRegOp);
+ /*MaskAgnostic*/ false, MaskRegOp, StoreOp);
return InstrInfo;
}
@@ -413,7 +491,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// VLMAX.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+ BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
@@ -435,7 +513,7 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// the previous vl to become invalid.
if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+ BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLIX0))
.addReg(RISCV::X0, RegState::Define | RegState::Dead)
.addReg(RISCV::X0, RegState::Kill)
.addImm(Info.encodeVTYPE())
@@ -450,11 +528,19 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
return;
}
- // Use X0 as the DestReg unless AVLReg is X0.
+ if (AVLReg.isVirtual())
+ MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
+
+ // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
+ // opcode if the AVLReg is X0 as they have different register classes for
+ // the AVL operand.
Register DestReg = RISCV::X0;
- if (AVLReg == RISCV::X0)
+ unsigned Opcode = RISCV::PseudoVSETVLI;
+ if (AVLReg == RISCV::X0) {
DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoVSETVLI))
+ Opcode = RISCV::PseudoVSETVLIX0;
+ }
+ BuildMI(MBB, MI, DL, TII->get(Opcode))
.addReg(DestReg, RegState::Define | RegState::Dead)
.addReg(AVLReg)
.addImm(Info.encodeVTYPE());
@@ -464,14 +550,15 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
// VSETIVLI instruction.
static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
VSETVLIInfo NewInfo;
- if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
+ if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else {
+ assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0);
Register AVLReg = MI.getOperand(1).getReg();
assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
"Can't handle X0, X0 vsetvli yet");
NewInfo.setAVLReg(AVLReg);
- } else {
- assert(MI.getOpcode() == RISCV::PseudoVSETIVLI);
- NewInfo.setAVLImm(MI.getOperand(1).getImm());
}
NewInfo.setVTYPE(MI.getOperand(2).getImm());
@@ -480,7 +567,7 @@ static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
const VSETVLIInfo &CurInfo) {
- if (CurInfo.isCompatible(Require))
+ if (CurInfo.isCompatible(Require, /*Strict*/ false))
return false;
// We didn't find a compatible value. If our AVL is a virtual register,
@@ -489,9 +576,10 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
// VSETVLI here.
if (!CurInfo.isUnknown() && Require.hasAVLReg() &&
Require.getAVLReg().isVirtual() && !CurInfo.hasSEWLMULRatioOnly() &&
- Require.hasSameVTYPE(CurInfo)) {
+ CurInfo.hasCompatibleVTYPE(Require, /*Strict*/ false)) {
if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
if (DefMI->getOpcode() == RISCV::PseudoVSETVLI ||
+ DefMI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
DefMI->getOpcode() == RISCV::PseudoVSETIVLI) {
VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVTYPE(CurInfo))
@@ -503,6 +591,202 @@ bool RISCVInsertVSETVLI::needVSETVLI(const VSETVLIInfo &Require,
return true;
}
+bool canSkipVSETVLIForLoadStore(const MachineInstr &MI,
+ const VSETVLIInfo &Require,
+ const VSETVLIInfo &CurInfo) {
+ unsigned EEW;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case RISCV::PseudoVLE8_V_M1:
+ case RISCV::PseudoVLE8_V_M1_MASK:
+ case RISCV::PseudoVLE8_V_M2:
+ case RISCV::PseudoVLE8_V_M2_MASK:
+ case RISCV::PseudoVLE8_V_M4:
+ case RISCV::PseudoVLE8_V_M4_MASK:
+ case RISCV::PseudoVLE8_V_M8:
+ case RISCV::PseudoVLE8_V_M8_MASK:
+ case RISCV::PseudoVLE8_V_MF2:
+ case RISCV::PseudoVLE8_V_MF2_MASK:
+ case RISCV::PseudoVLE8_V_MF4:
+ case RISCV::PseudoVLE8_V_MF4_MASK:
+ case RISCV::PseudoVLE8_V_MF8:
+ case RISCV::PseudoVLE8_V_MF8_MASK:
+ case RISCV::PseudoVLSE8_V_M1:
+ case RISCV::PseudoVLSE8_V_M1_MASK:
+ case RISCV::PseudoVLSE8_V_M2:
+ case RISCV::PseudoVLSE8_V_M2_MASK:
+ case RISCV::PseudoVLSE8_V_M4:
+ case RISCV::PseudoVLSE8_V_M4_MASK:
+ case RISCV::PseudoVLSE8_V_M8:
+ case RISCV::PseudoVLSE8_V_M8_MASK:
+ case RISCV::PseudoVLSE8_V_MF2:
+ case RISCV::PseudoVLSE8_V_MF2_MASK:
+ case RISCV::PseudoVLSE8_V_MF4:
+ case RISCV::PseudoVLSE8_V_MF4_MASK:
+ case RISCV::PseudoVLSE8_V_MF8:
+ case RISCV::PseudoVLSE8_V_MF8_MASK:
+ case RISCV::PseudoVSE8_V_M1:
+ case RISCV::PseudoVSE8_V_M1_MASK:
+ case RISCV::PseudoVSE8_V_M2:
+ case RISCV::PseudoVSE8_V_M2_MASK:
+ case RISCV::PseudoVSE8_V_M4:
+ case RISCV::PseudoVSE8_V_M4_MASK:
+ case RISCV::PseudoVSE8_V_M8:
+ case RISCV::PseudoVSE8_V_M8_MASK:
+ case RISCV::PseudoVSE8_V_MF2:
+ case RISCV::PseudoVSE8_V_MF2_MASK:
+ case RISCV::PseudoVSE8_V_MF4:
+ case RISCV::PseudoVSE8_V_MF4_MASK:
+ case RISCV::PseudoVSE8_V_MF8:
+ case RISCV::PseudoVSE8_V_MF8_MASK:
+ case RISCV::PseudoVSSE8_V_M1:
+ case RISCV::PseudoVSSE8_V_M1_MASK:
+ case RISCV::PseudoVSSE8_V_M2:
+ case RISCV::PseudoVSSE8_V_M2_MASK:
+ case RISCV::PseudoVSSE8_V_M4:
+ case RISCV::PseudoVSSE8_V_M4_MASK:
+ case RISCV::PseudoVSSE8_V_M8:
+ case RISCV::PseudoVSSE8_V_M8_MASK:
+ case RISCV::PseudoVSSE8_V_MF2:
+ case RISCV::PseudoVSSE8_V_MF2_MASK:
+ case RISCV::PseudoVSSE8_V_MF4:
+ case RISCV::PseudoVSSE8_V_MF4_MASK:
+ case RISCV::PseudoVSSE8_V_MF8:
+ case RISCV::PseudoVSSE8_V_MF8_MASK:
+ EEW = 8;
+ break;
+ case RISCV::PseudoVLE16_V_M1:
+ case RISCV::PseudoVLE16_V_M1_MASK:
+ case RISCV::PseudoVLE16_V_M2:
+ case RISCV::PseudoVLE16_V_M2_MASK:
+ case RISCV::PseudoVLE16_V_M4:
+ case RISCV::PseudoVLE16_V_M4_MASK:
+ case RISCV::PseudoVLE16_V_M8:
+ case RISCV::PseudoVLE16_V_M8_MASK:
+ case RISCV::PseudoVLE16_V_MF2:
+ case RISCV::PseudoVLE16_V_MF2_MASK:
+ case RISCV::PseudoVLE16_V_MF4:
+ case RISCV::PseudoVLE16_V_MF4_MASK:
+ case RISCV::PseudoVLSE16_V_M1:
+ case RISCV::PseudoVLSE16_V_M1_MASK:
+ case RISCV::PseudoVLSE16_V_M2:
+ case RISCV::PseudoVLSE16_V_M2_MASK:
+ case RISCV::PseudoVLSE16_V_M4:
+ case RISCV::PseudoVLSE16_V_M4_MASK:
+ case RISCV::PseudoVLSE16_V_M8:
+ case RISCV::PseudoVLSE16_V_M8_MASK:
+ case RISCV::PseudoVLSE16_V_MF2:
+ case RISCV::PseudoVLSE16_V_MF2_MASK:
+ case RISCV::PseudoVLSE16_V_MF4:
+ case RISCV::PseudoVLSE16_V_MF4_MASK:
+ case RISCV::PseudoVSE16_V_M1:
+ case RISCV::PseudoVSE16_V_M1_MASK:
+ case RISCV::PseudoVSE16_V_M2:
+ case RISCV::PseudoVSE16_V_M2_MASK:
+ case RISCV::PseudoVSE16_V_M4:
+ case RISCV::PseudoVSE16_V_M4_MASK:
+ case RISCV::PseudoVSE16_V_M8:
+ case RISCV::PseudoVSE16_V_M8_MASK:
+ case RISCV::PseudoVSE16_V_MF2:
+ case RISCV::PseudoVSE16_V_MF2_MASK:
+ case RISCV::PseudoVSE16_V_MF4:
+ case RISCV::PseudoVSE16_V_MF4_MASK:
+ case RISCV::PseudoVSSE16_V_M1:
+ case RISCV::PseudoVSSE16_V_M1_MASK:
+ case RISCV::PseudoVSSE16_V_M2:
+ case RISCV::PseudoVSSE16_V_M2_MASK:
+ case RISCV::PseudoVSSE16_V_M4:
+ case RISCV::PseudoVSSE16_V_M4_MASK:
+ case RISCV::PseudoVSSE16_V_M8:
+ case RISCV::PseudoVSSE16_V_M8_MASK:
+ case RISCV::PseudoVSSE16_V_MF2:
+ case RISCV::PseudoVSSE16_V_MF2_MASK:
+ case RISCV::PseudoVSSE16_V_MF4:
+ case RISCV::PseudoVSSE16_V_MF4_MASK:
+ EEW = 16;
+ break;
+ case RISCV::PseudoVLE32_V_M1:
+ case RISCV::PseudoVLE32_V_M1_MASK:
+ case RISCV::PseudoVLE32_V_M2:
+ case RISCV::PseudoVLE32_V_M2_MASK:
+ case RISCV::PseudoVLE32_V_M4:
+ case RISCV::PseudoVLE32_V_M4_MASK:
+ case RISCV::PseudoVLE32_V_M8:
+ case RISCV::PseudoVLE32_V_M8_MASK:
+ case RISCV::PseudoVLE32_V_MF2:
+ case RISCV::PseudoVLE32_V_MF2_MASK:
+ case RISCV::PseudoVLSE32_V_M1:
+ case RISCV::PseudoVLSE32_V_M1_MASK:
+ case RISCV::PseudoVLSE32_V_M2:
+ case RISCV::PseudoVLSE32_V_M2_MASK:
+ case RISCV::PseudoVLSE32_V_M4:
+ case RISCV::PseudoVLSE32_V_M4_MASK:
+ case RISCV::PseudoVLSE32_V_M8:
+ case RISCV::PseudoVLSE32_V_M8_MASK:
+ case RISCV::PseudoVLSE32_V_MF2:
+ case RISCV::PseudoVLSE32_V_MF2_MASK:
+ case RISCV::PseudoVSE32_V_M1:
+ case RISCV::PseudoVSE32_V_M1_MASK:
+ case RISCV::PseudoVSE32_V_M2:
+ case RISCV::PseudoVSE32_V_M2_MASK:
+ case RISCV::PseudoVSE32_V_M4:
+ case RISCV::PseudoVSE32_V_M4_MASK:
+ case RISCV::PseudoVSE32_V_M8:
+ case RISCV::PseudoVSE32_V_M8_MASK:
+ case RISCV::PseudoVSE32_V_MF2:
+ case RISCV::PseudoVSE32_V_MF2_MASK:
+ case RISCV::PseudoVSSE32_V_M1:
+ case RISCV::PseudoVSSE32_V_M1_MASK:
+ case RISCV::PseudoVSSE32_V_M2:
+ case RISCV::PseudoVSSE32_V_M2_MASK:
+ case RISCV::PseudoVSSE32_V_M4:
+ case RISCV::PseudoVSSE32_V_M4_MASK:
+ case RISCV::PseudoVSSE32_V_M8:
+ case RISCV::PseudoVSSE32_V_M8_MASK:
+ case RISCV::PseudoVSSE32_V_MF2:
+ case RISCV::PseudoVSSE32_V_MF2_MASK:
+ EEW = 32;
+ break;
+ case RISCV::PseudoVLE64_V_M1:
+ case RISCV::PseudoVLE64_V_M1_MASK:
+ case RISCV::PseudoVLE64_V_M2:
+ case RISCV::PseudoVLE64_V_M2_MASK:
+ case RISCV::PseudoVLE64_V_M4:
+ case RISCV::PseudoVLE64_V_M4_MASK:
+ case RISCV::PseudoVLE64_V_M8:
+ case RISCV::PseudoVLE64_V_M8_MASK:
+ case RISCV::PseudoVLSE64_V_M1:
+ case RISCV::PseudoVLSE64_V_M1_MASK:
+ case RISCV::PseudoVLSE64_V_M2:
+ case RISCV::PseudoVLSE64_V_M2_MASK:
+ case RISCV::PseudoVLSE64_V_M4:
+ case RISCV::PseudoVLSE64_V_M4_MASK:
+ case RISCV::PseudoVLSE64_V_M8:
+ case RISCV::PseudoVLSE64_V_M8_MASK:
+ case RISCV::PseudoVSE64_V_M1:
+ case RISCV::PseudoVSE64_V_M1_MASK:
+ case RISCV::PseudoVSE64_V_M2:
+ case RISCV::PseudoVSE64_V_M2_MASK:
+ case RISCV::PseudoVSE64_V_M4:
+ case RISCV::PseudoVSE64_V_M4_MASK:
+ case RISCV::PseudoVSE64_V_M8:
+ case RISCV::PseudoVSE64_V_M8_MASK:
+ case RISCV::PseudoVSSE64_V_M1:
+ case RISCV::PseudoVSSE64_V_M1_MASK:
+ case RISCV::PseudoVSSE64_V_M2:
+ case RISCV::PseudoVSSE64_V_M2_MASK:
+ case RISCV::PseudoVSSE64_V_M4:
+ case RISCV::PseudoVSSE64_V_M4_MASK:
+ case RISCV::PseudoVSSE64_V_M8:
+ case RISCV::PseudoVSSE64_V_M8_MASK:
+ EEW = 64;
+ break;
+ }
+
+ return CurInfo.isCompatibleWithLoadStoreEEW(EEW, Require);
+}
+
bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
bool HadVectorOp = false;
@@ -510,6 +794,7 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
for (const MachineInstr &MI : MBB) {
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
HadVectorOp = true;
BBInfo.Change = getInfoForVSETVLI(MI);
@@ -527,7 +812,13 @@ bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
} else {
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
- if (needVSETVLI(NewInfo, BBInfo.Change))
+ // If this is a unit-stride or strided load/store, we may be able to use
+ // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
+ // NOTE: We only do this if the vtype we're comparing against was
+ // created in this block. We need the first and third phase to treat
+ // the store the same way.
+ if (!canSkipVSETVLIForLoadStore(MI, NewInfo, BBInfo.Change) &&
+ needVSETVLI(NewInfo, BBInfo.Change))
BBInfo.Change = NewInfo;
}
}
@@ -609,12 +900,14 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
// If the exit from the predecessor has the VTYPE we are looking for
// we might be able to avoid a VSETVLI.
- if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
+ if (PBBInfo.Exit.isUnknown() ||
+ !PBBInfo.Exit.hasCompatibleVTYPE(Require, /*Strict*/ false))
return true;
// We need the PHI input to the be the output of a VSET(I)VLI.
MachineInstr *DefMI = MRI->getVRegDef(InReg);
if (!DefMI || (DefMI->getOpcode() != RISCV::PseudoVSETVLI &&
+ DefMI->getOpcode() != RISCV::PseudoVSETVLIX0 &&
DefMI->getOpcode() != RISCV::PseudoVSETIVLI))
return true;
@@ -633,10 +926,13 @@ bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
VSETVLIInfo CurInfo;
+ // Only be set if current VSETVLIInfo is from an explicit VSET(I)VLI.
+ MachineInstr *PrevVSETVLIMI = nullptr;
for (MachineInstr &MI : MBB) {
// If this is an explicit VSETVLI or VSETIVLI, update our state.
if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
MI.getOpcode() == RISCV::PseudoVSETIVLI) {
// Conservatively, mark the VL and VTYPE as live.
assert(MI.getOperand(3).getReg() == RISCV::VL &&
@@ -645,6 +941,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
MI.getOperand(3).setIsDead(false);
MI.getOperand(4).setIsDead(false);
CurInfo = getInfoForVSETVLI(MI);
+ PrevVSETVLIMI = &MI;
continue;
}
@@ -652,7 +949,11 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (RISCVII::hasSEWOp(TSFlags)) {
VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
if (RISCVII::hasVLOp(TSFlags)) {
- MachineOperand &VLOp = MI.getOperand(MI.getNumExplicitOperands() - 2);
+ unsigned Offset = 2;
+ if (RISCVII::hasVecPolicyOp(TSFlags))
+ Offset = 3;
+ MachineOperand &VLOp =
+ MI.getOperand(MI.getNumExplicitOperands() - Offset);
if (VLOp.isReg()) {
// Erase the AVL operand from the instruction.
VLOp.setReg(RISCV::NoRegister);
@@ -677,11 +978,35 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
} else {
// If this instruction isn't compatible with the previous VL/VTYPE
// we need to insert a VSETVLI.
- if (needVSETVLI(NewInfo, CurInfo)) {
- insertVSETVLI(MBB, MI, NewInfo, CurInfo);
+ // If this is a unit-stride or strided load/store, we may be able to use
+ // the EMUL=(EEW/SEW)*LMUL relationship to avoid changing vtype.
+ // NOTE: We can't use predecessor information for the store. We must
+ // treat it the same as the first phase so that we produce the correct
+ // vl/vtype for succesor blocks.
+ if (!canSkipVSETVLIForLoadStore(MI, NewInfo, CurInfo) &&
+ needVSETVLI(NewInfo, CurInfo)) {
+ // If the previous VL/VTYPE is set by VSETVLI and do not use, Merge it
+ // with current VL/VTYPE.
+ bool NeedInsertVSETVLI = true;
+ if (PrevVSETVLIMI) {
+ bool HasSameAVL =
+ CurInfo.hasSameAVL(NewInfo) ||
+ (NewInfo.hasAVLReg() && NewInfo.getAVLReg().isVirtual() &&
+ NewInfo.getAVLReg() == PrevVSETVLIMI->getOperand(0).getReg());
+ // If these two VSETVLI have the same AVL and the same VLMAX,
+ // we could merge these two VSETVLI.
+ if (HasSameAVL &&
+ CurInfo.getSEWLMULRatio() == NewInfo.getSEWLMULRatio()) {
+ PrevVSETVLIMI->getOperand(2).setImm(NewInfo.encodeVTYPE());
+ NeedInsertVSETVLI = false;
+ }
+ }
+ if (NeedInsertVSETVLI)
+ insertVSETVLI(MBB, MI, NewInfo, CurInfo);
CurInfo = NewInfo;
}
}
+ PrevVSETVLIMI = nullptr;
}
// If this is something updates VL/VTYPE that we don't know about, set
@@ -689,6 +1014,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE)) {
CurInfo = VSETVLIInfo::getUnknown();
+ PrevVSETVLIMI = nullptr;
}
}
}
@@ -696,7 +1022,7 @@ void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- if (!ST.hasStdExtV())
+ if (!ST.hasVInstructions())
return false;
TII = ST.getInstrInfo();
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 8e9d245f13eb..cfad4cdb9364 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -178,6 +178,12 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
bit HasVLOp = 0;
let TSFlags{15} = HasVLOp;
+
+ bit HasVecPolicyOp = 0;
+ let TSFlags{16} = HasVecPolicyOp;
+
+ bit IsRVVWideningReduction = 0;
+ let TSFlags{17} = IsRVVWideningReduction;
}
// Pseudo instructions
@@ -199,7 +205,7 @@ class PseudoLoad<string opcodestr, RegisterClass rdty = GPR>
}
class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
- : Pseudo<(outs rdty:$rd, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
+ : Pseudo<(outs GPR:$tmp, rdty:$rd), (ins bare_symbol:$addr), [], opcodestr, "$rd, $addr, $tmp"> {
let hasSideEffects = 0;
let mayLoad = 1;
let mayStore = 0;
@@ -209,7 +215,7 @@ class PseudoFloatLoad<string opcodestr, RegisterClass rdty = GPR>
// Pseudo store instructions.
class PseudoStore<string opcodestr, RegisterClass rsty = GPR>
- : Pseudo<(outs rsty:$rs, GPR:$tmp), (ins bare_symbol:$addr), [], opcodestr, "$rs, $addr, $tmp"> {
+ : Pseudo<(outs GPR:$tmp), (ins rsty:$rs, bare_symbol:$addr), [], opcodestr, "$rs, $addr, $tmp"> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 1;
@@ -406,3 +412,135 @@ class RVInstJ<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
let Inst{11-7} = rd;
let Opcode = opcode.Value;
}
+
+//===----------------------------------------------------------------------===//
+// Instruction classes for .insn directives
+//===----------------------------------------------------------------------===//
+
+class DirectiveInsnR<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatR> {
+ bits<7> opcode;
+ bits<7> funct7;
+ bits<3> funct3;
+
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn r " # argstr;
+}
+
+class DirectiveInsnR4<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatR4> {
+ bits<7> opcode;
+ bits<2> funct2;
+ bits<3> funct3;
+
+ bits<5> rs3;
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-27} = rs3;
+ let Inst{26-25} = funct2;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn r4 " # argstr;
+}
+
+class DirectiveInsnI<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatI> {
+ bits<7> opcode;
+ bits<3> funct3;
+
+ bits<12> imm12;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-20} = imm12;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn i " # argstr;
+}
+
+class DirectiveInsnS<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatS> {
+ bits<7> opcode;
+ bits<3> funct3;
+
+ bits<12> imm12;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31-25} = imm12{11-5};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = imm12{4-0};
+ let Opcode = opcode;
+
+ let AsmString = ".insn s " # argstr;
+}
+
+class DirectiveInsnB<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatB> {
+ bits<7> opcode;
+ bits<3> funct3;
+
+ bits<12> imm12;
+ bits<5> rs2;
+ bits<5> rs1;
+
+ let Inst{31} = imm12{11};
+ let Inst{30-25} = imm12{9-4};
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-8} = imm12{3-0};
+ let Inst{7} = imm12{10};
+ let Opcode = opcode;
+
+ let AsmString = ".insn b " # argstr;
+}
+
+class DirectiveInsnU<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatU> {
+ bits<7> opcode;
+
+ bits<20> imm20;
+ bits<5> rd;
+
+ let Inst{31-12} = imm20;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn u " # argstr;
+}
+
+class DirectiveInsnJ<dag outs, dag ins, string argstr>
+ : RVInst<outs, ins, "", "", [], InstFormatJ> {
+ bits<7> opcode;
+
+ bits<20> imm20;
+ bits<5> rd;
+
+ let Inst{31-12} = imm20;
+ let Inst{11-7} = rd;
+ let Opcode = opcode;
+
+ let AsmString = ".insn j " # argstr;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index a541daaff9f4..547d82550cac 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -19,14 +19,15 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -36,6 +37,10 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "RISCVGenInstrInfo.inc"
+static cl::opt<bool> PreferWholeRegisterMove(
+ "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
+ cl::desc("Prefer whole register move for vector registers."));
+
namespace llvm {
namespace RISCVVPseudosTable {
@@ -113,9 +118,137 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg,
unsigned NumRegs) {
- // We really want the positive remainder mod 32 here, that happens to be
- // easily obtainable with a mask.
- return ((DstReg - SrcReg) & 0x1f) < NumRegs;
+ return DstReg > SrcReg && (DstReg - SrcReg) < NumRegs;
+}
+
+static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
+ const MachineBasicBlock &MBB,
+ MachineBasicBlock::const_iterator MBBI,
+ MachineBasicBlock::const_iterator &DefMBBI,
+ RISCVII::VLMUL LMul) {
+ if (PreferWholeRegisterMove)
+ return false;
+
+ assert(MBBI->getOpcode() == TargetOpcode::COPY &&
+ "Unexpected COPY instruction.");
+ Register SrcReg = MBBI->getOperand(1).getReg();
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ bool FoundDef = false;
+ bool FirstVSetVLI = false;
+ unsigned FirstSEW = 0;
+ while (MBBI != MBB.begin()) {
+ --MBBI;
+ if (MBBI->isMetaInstruction())
+ continue;
+
+ if (MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+ MBBI->getOpcode() == RISCV::PseudoVSETVLIX0 ||
+ MBBI->getOpcode() == RISCV::PseudoVSETIVLI) {
+ // There is a vsetvli between COPY and source define instruction.
+ // vy = def_vop ... (producing instruction)
+ // ...
+ // vsetvli
+ // ...
+ // vx = COPY vy
+ if (!FoundDef) {
+ if (!FirstVSetVLI) {
+ FirstVSetVLI = true;
+ unsigned FirstVType = MBBI->getOperand(2).getImm();
+ RISCVII::VLMUL FirstLMul = RISCVVType::getVLMUL(FirstVType);
+ FirstSEW = RISCVVType::getSEW(FirstVType);
+ // The first encountered vsetvli must have the same lmul as the
+ // register class of COPY.
+ if (FirstLMul != LMul)
+ return false;
+ }
+ // Only permit `vsetvli x0, x0, vtype` between COPY and the source
+ // define instruction.
+ if (MBBI->getOperand(0).getReg() != RISCV::X0)
+ return false;
+ if (MBBI->getOperand(1).isImm())
+ return false;
+ if (MBBI->getOperand(1).getReg() != RISCV::X0)
+ return false;
+ continue;
+ }
+
+ // MBBI is the first vsetvli before the producing instruction.
+ unsigned VType = MBBI->getOperand(2).getImm();
+ // If there is a vsetvli between COPY and the producing instruction.
+ if (FirstVSetVLI) {
+ // If SEW is different, return false.
+ if (RISCVVType::getSEW(VType) != FirstSEW)
+ return false;
+ }
+
+ // If the vsetvli is tail undisturbed, keep the whole register move.
+ if (!RISCVVType::isTailAgnostic(VType))
+ return false;
+
+ // The checking is conservative. We only have register classes for
+ // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v
+ // for fractional LMUL operations. However, we could not use the vsetvli
+ // lmul for widening operations. The result of widening operation is
+ // 2 x LMUL.
+ return LMul == RISCVVType::getVLMUL(VType);
+ } else if (MBBI->isInlineAsm() || MBBI->isCall()) {
+ return false;
+ } else if (MBBI->getNumDefs()) {
+ // Check all the instructions which will change VL.
+ // For example, vleff has implicit def VL.
+ if (MBBI->modifiesRegister(RISCV::VL))
+ return false;
+
+ // Go through all defined operands, including implicit defines.
+ for (const MachineOperand &MO : MBBI->operands()) {
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ if (!FoundDef && TRI->isSubRegisterEq(MO.getReg(), SrcReg)) {
+ // We only permit the source of COPY has the same LMUL as the defined
+ // operand.
+ // There are cases we need to keep the whole register copy if the LMUL
+ // is different.
+ // For example,
+ // $x0 = PseudoVSETIVLI 4, 73 // vsetivli zero, 4, e16,m2,ta,m
+ // $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2
+ // # The COPY may be created by vlmul_trunc intrinsic.
+ // $v26m2 = COPY renamable $v28m2, implicit killed $v28m4
+ //
+ // After widening, the valid value will be 4 x e32 elements. If we
+ // convert the COPY to vmv.v.v, it will only copy 4 x e16 elements.
+ // FIXME: The COPY of subregister of Zvlsseg register will not be able
+ // to convert to vmv.v.[v|i] under the constraint.
+ if (MO.getReg() != SrcReg)
+ return false;
+
+ // In widening reduction instructions with LMUL_1 input vector case,
+ // only checking the LMUL is insufficient due to reduction result is
+ // always LMUL_1.
+ // For example,
+ // $x11 = PseudoVSETIVLI 1, 64 // vsetivli a1, 1, e8, m1, ta, mu
+ // $v8m1 = PseudoVWREDSUM_VS_M1 $v26, $v27
+ // $v26 = COPY killed renamable $v8
+ // After widening, The valid value will be 1 x e16 elements. If we
+ // convert the COPY to vmv.v.v, it will only copy 1 x e8 elements.
+ uint64_t TSFlags = MBBI->getDesc().TSFlags;
+ if (RISCVII::isRVVWideningReduction(TSFlags))
+ return false;
+
+ // Found the definition.
+ FoundDef = true;
+ DefMBBI = MBBI;
+ // If the producing instruction does not depend on vsetvli, do not
+ // convert COPY to vmv.v.v. For example, VL1R_V or PseudoVRELOAD.
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ break;
+ }
+ }
+ }
+ }
+
+ return false;
}
void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -133,7 +266,7 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Opc;
bool IsScalableVector = true;
unsigned NF = 1;
- unsigned LMul = 1;
+ RISCVII::VLMUL LMul = RISCVII::LMUL_1;
unsigned SubRegIdx = RISCV::sub_vrm1_0;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::FSGNJ_H;
@@ -146,91 +279,157 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
IsScalableVector = false;
} else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
+ LMul = RISCVII::LMUL_4;
} else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV8R_V;
+ LMul = RISCVII::LMUL_8;
} else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 2;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 2;
- LMul = 2;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV4R_V;
SubRegIdx = RISCV::sub_vrm4_0;
NF = 2;
- LMul = 4;
+ LMul = RISCVII::LMUL_4;
} else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 3;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 3;
- LMul = 2;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 4;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV2R_V;
SubRegIdx = RISCV::sub_vrm2_0;
NF = 4;
- LMul = 2;
+ LMul = RISCVII::LMUL_2;
} else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 5;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 6;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 7;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) {
Opc = RISCV::PseudoVMV1R_V;
SubRegIdx = RISCV::sub_vrm1_0;
NF = 8;
- LMul = 1;
+ LMul = RISCVII::LMUL_1;
} else {
llvm_unreachable("Impossible reg-to-reg copy");
}
if (IsScalableVector) {
+ bool UseVMV_V_V = false;
+ MachineBasicBlock::const_iterator DefMBBI;
+ unsigned DefExplicitOpNum;
+ unsigned VIOpc;
+ if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
+ UseVMV_V_V = true;
+ DefExplicitOpNum = DefMBBI->getNumExplicitOperands();
+ // We only need to handle LMUL = 1/2/4/8 here because we only define
+ // vector register classes for LMUL = 1/2/4/8.
+ switch (LMul) {
+ default:
+ llvm_unreachable("Impossible LMUL for vector register copy.");
+ case RISCVII::LMUL_1:
+ Opc = RISCV::PseudoVMV_V_V_M1;
+ VIOpc = RISCV::PseudoVMV_V_I_M1;
+ break;
+ case RISCVII::LMUL_2:
+ Opc = RISCV::PseudoVMV_V_V_M2;
+ VIOpc = RISCV::PseudoVMV_V_I_M2;
+ break;
+ case RISCVII::LMUL_4:
+ Opc = RISCV::PseudoVMV_V_V_M4;
+ VIOpc = RISCV::PseudoVMV_V_I_M4;
+ break;
+ case RISCVII::LMUL_8:
+ Opc = RISCV::PseudoVMV_V_V_M8;
+ VIOpc = RISCV::PseudoVMV_V_I_M8;
+ break;
+ }
+ }
+
+ bool UseVMV_V_I = false;
+ if (UseVMV_V_V && (DefMBBI->getOpcode() == VIOpc)) {
+ UseVMV_V_I = true;
+ Opc = VIOpc;
+ }
+
if (NF == 1) {
- BuildMI(MBB, MBBI, DL, get(Opc), DstReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
+ auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(1));
+ else
+ MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ // The last two arguments of vector instructions are
+ // AVL, SEW. We also need to append the implicit-use vl and vtype.
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 2)); // AVL
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 1)); // SEW
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
} else {
const TargetRegisterInfo *TRI = STI.getRegisterInfo();
int I = 0, End = NF, Incr = 1;
unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned DstEncoding = TRI->getEncodingValue(DstReg);
- if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMul)) {
+ unsigned LMulVal;
+ bool Fractional;
+ std::tie(LMulVal, Fractional) = RISCVVType::decodeVLMUL(LMul);
+ assert(!Fractional && "It is impossible be fractional lmul here.");
+ if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) {
I = NF - 1;
End = -1;
Incr = -1;
}
for (; I != End; I += Incr) {
- BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I))
- .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
- getKillRegState(KillSrc));
+ auto MIB = BuildMI(MBB, MBBI, DL, get(Opc),
+ TRI->getSubReg(DstReg, SubRegIdx + I));
+ if (UseVMV_V_I)
+ MIB = MIB.add(DefMBBI->getOperand(1));
+ else
+ MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
+ getKillRegState(KillSrc));
+ if (UseVMV_V_V) {
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 2)); // AVL
+ MIB.add(DefMBBI->getOperand(DefExplicitOpNum - 1)); // SEW
+ MIB.addReg(RISCV::VL, RegState::Implicit);
+ MIB.addReg(RISCV::VTYPE, RegState::Implicit);
+ }
}
}
} else {
@@ -458,6 +657,12 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
.addReg(SrcReg, RegState::Kill)
.addReg(RISCV::X0)
.setMIFlag(Flag);
+ } else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
+ Inst.Opc == RISCV::SH3ADD) {
+ BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
+ .addReg(SrcReg, RegState::Kill)
+ .addReg(SrcReg, RegState::Kill)
+ .setMIFlag(Flag);
} else {
BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
.addReg(SrcReg, RegState::Kill)
@@ -469,6 +674,25 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
}
}
+static RISCVCC::CondCode getCondFromBranchOpc(unsigned Opc) {
+ switch (Opc) {
+ default:
+ return RISCVCC::COND_INVALID;
+ case RISCV::BEQ:
+ return RISCVCC::COND_EQ;
+ case RISCV::BNE:
+ return RISCVCC::COND_NE;
+ case RISCV::BLT:
+ return RISCVCC::COND_LT;
+ case RISCV::BGE:
+ return RISCVCC::COND_GE;
+ case RISCV::BLTU:
+ return RISCVCC::COND_LTU;
+ case RISCV::BGEU:
+ return RISCVCC::COND_GEU;
+ }
+}
+
// The contents of values added to Cond are not examined outside of
// RISCVInstrInfo, giving us flexibility in what to push to it. For RISCV, we
// push BranchOpcode, Reg1, Reg2.
@@ -478,27 +702,47 @@ static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
assert(LastInst.getDesc().isConditionalBranch() &&
"Unknown conditional branch");
Target = LastInst.getOperand(2).getMBB();
- Cond.push_back(MachineOperand::CreateImm(LastInst.getOpcode()));
+ unsigned CC = getCondFromBranchOpc(LastInst.getOpcode());
+ Cond.push_back(MachineOperand::CreateImm(CC));
Cond.push_back(LastInst.getOperand(0));
Cond.push_back(LastInst.getOperand(1));
}
-static unsigned getOppositeBranchOpcode(int Opc) {
- switch (Opc) {
+const MCInstrDesc &RISCVInstrInfo::getBrCond(RISCVCC::CondCode CC) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown condition code!");
+ case RISCVCC::COND_EQ:
+ return get(RISCV::BEQ);
+ case RISCVCC::COND_NE:
+ return get(RISCV::BNE);
+ case RISCVCC::COND_LT:
+ return get(RISCV::BLT);
+ case RISCVCC::COND_GE:
+ return get(RISCV::BGE);
+ case RISCVCC::COND_LTU:
+ return get(RISCV::BLTU);
+ case RISCVCC::COND_GEU:
+ return get(RISCV::BGEU);
+ }
+}
+
+RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) {
+ switch (CC) {
default:
llvm_unreachable("Unrecognized conditional branch");
- case RISCV::BEQ:
- return RISCV::BNE;
- case RISCV::BNE:
- return RISCV::BEQ;
- case RISCV::BLT:
- return RISCV::BGE;
- case RISCV::BGE:
- return RISCV::BLT;
- case RISCV::BLTU:
- return RISCV::BGEU;
- case RISCV::BGEU:
- return RISCV::BLTU;
+ case RISCVCC::COND_EQ:
+ return RISCVCC::COND_NE;
+ case RISCVCC::COND_NE:
+ return RISCVCC::COND_EQ;
+ case RISCVCC::COND_LT:
+ return RISCVCC::COND_GE;
+ case RISCVCC::COND_GE:
+ return RISCVCC::COND_LT;
+ case RISCVCC::COND_LTU:
+ return RISCVCC::COND_GEU;
+ case RISCVCC::COND_GEU:
+ return RISCVCC::COND_LTU;
}
}
@@ -624,9 +868,9 @@ unsigned RISCVInstrInfo::insertBranch(
}
// Either a one or two-way conditional branch.
- unsigned Opc = Cond[0].getImm();
+ auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
MachineInstr &CondMI =
- *BuildMI(&MBB, DL, get(Opc)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
+ *BuildMI(&MBB, DL, getBrCond(CC)).add(Cond[1]).add(Cond[2]).addMBB(TBB);
if (BytesAdded)
*BytesAdded += getInstSizeInBytes(CondMI);
@@ -641,11 +885,11 @@ unsigned RISCVInstrInfo::insertBranch(
return 2;
}
-unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &DestBB,
- const DebugLoc &DL,
- int64_t BrOffset,
- RegScavenger *RS) const {
+void RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &DestBB,
+ MachineBasicBlock &RestoreBB,
+ const DebugLoc &DL, int64_t BrOffset,
+ RegScavenger *RS) const {
assert(RS && "RegScavenger required for long branching");
assert(MBB.empty() &&
"new block should be inserted for expanding unconditional branch");
@@ -671,16 +915,18 @@ unsigned RISCVInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
RS->enterBasicBlockEnd(MBB);
unsigned Scav = RS->scavengeRegisterBackwards(RISCV::GPRRegClass,
MI.getIterator(), false, 0);
+ // TODO: The case when there is no scavenged register needs special handling.
+ assert(Scav != RISCV::NoRegister && "No register is scavenged!");
MRI.replaceRegWith(ScratchReg, Scav);
MRI.clearVirtRegs();
RS->setRegUsed(Scav);
- return 8;
}
bool RISCVInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
assert((Cond.size() == 3) && "Invalid branch condition!");
- Cond[0].setImm(getOppositeBranchOpcode(Cond[0].getImm()));
+ auto CC = static_cast<RISCVCC::CondCode>(Cond[0].getImm());
+ Cond[0].setImm(getOppositeBranchCondition(CC));
return false;
}
@@ -866,12 +1112,21 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
switch (OpType) {
default:
llvm_unreachable("Unexpected operand type");
+ case RISCVOp::OPERAND_UIMM2:
+ Ok = isUInt<2>(Imm);
+ break;
+ case RISCVOp::OPERAND_UIMM3:
+ Ok = isUInt<3>(Imm);
+ break;
case RISCVOp::OPERAND_UIMM4:
Ok = isUInt<4>(Imm);
break;
case RISCVOp::OPERAND_UIMM5:
Ok = isUInt<5>(Imm);
break;
+ case RISCVOp::OPERAND_UIMM7:
+ Ok = isUInt<7>(Imm);
+ break;
case RISCVOp::OPERAND_UIMM12:
Ok = isUInt<12>(Imm);
break;
@@ -1086,7 +1341,7 @@ RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
// Make sure the operands don't reference something unsafe.
for (const auto &MO : MI.operands())
- if (MO.isMBB() || MO.isBlockAddress() || MO.isCPI())
+ if (MO.isMBB() || MO.isBlockAddress() || MO.isCPI() || MO.isJTI())
return outliner::InstrType::Illegal;
// Don't allow instructions which won't be materialized to impact outlining
@@ -1139,7 +1394,7 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
// clang-format off
#define CASE_VFMA_OPCODE_COMMON(OP, TYPE, LMUL) \
- RISCV::PseudoV##OP##_##TYPE##_##LMUL##_COMMUTABLE
+ RISCV::PseudoV##OP##_##TYPE##_##LMUL
#define CASE_VFMA_OPCODE_LMULS(OP, TYPE) \
CASE_VFMA_OPCODE_COMMON(OP, TYPE, MF8): \
@@ -1182,6 +1437,11 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case CASE_VFMA_OPCODE_LMULS(NMSAC, VX):
case CASE_VFMA_OPCODE_LMULS(MACC, VV):
case CASE_VFMA_OPCODE_LMULS(NMSAC, VV): {
+ // If the tail policy is undisturbed we can't commute.
+ assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
+ if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
+ return false;
+
// For these instructions we can only swap operand 1 and operand 3 by
// changing the opcode.
unsigned CommutableOpIdx1 = 1;
@@ -1197,6 +1457,11 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case CASE_VFMA_OPCODE_LMULS(FNMSUB, VV):
case CASE_VFMA_OPCODE_LMULS(MADD, VV):
case CASE_VFMA_OPCODE_LMULS(NMSUB, VV): {
+ // If the tail policy is undisturbed we can't commute.
+ assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags));
+ if ((MI.getOperand(MI.getNumExplicitOperands() - 1).getImm() & 1) == 0)
+ return false;
+
// For these instructions we have more freedom. We can commute with the
// other multiplicand or with the addend/subtrahend/minuend.
@@ -1223,7 +1488,7 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
// Both of operands are not fixed. Set one of commutable
// operands to the tied source.
CommutableOpIdx1 = 1;
- } else if (SrcOpIdx1 == CommutableOpIdx1) {
+ } else if (SrcOpIdx1 == CommuteAnyOperandIndex) {
// Only one of the operands is not fixed.
CommutableOpIdx1 = SrcOpIdx2;
}
@@ -1261,8 +1526,8 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
}
#define CASE_VFMA_CHANGE_OPCODE_COMMON(OLDOP, NEWOP, TYPE, LMUL) \
- case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL##_COMMUTABLE: \
- Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL##_COMMUTABLE; \
+ case RISCV::PseudoV##OLDOP##_##TYPE##_##LMUL: \
+ Opc = RISCV::PseudoV##NEWOP##_##TYPE##_##LMUL; \
break;
#define CASE_VFMA_CHANGE_OPCODE_LMULS(OLDOP, NEWOP, TYPE) \
@@ -1409,8 +1674,9 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
-MachineInstr *RISCVInstrInfo::convertToThreeAddress(
- MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
switch (MI.getOpcode()) {
default:
break;
@@ -1434,7 +1700,8 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(
}
//clang-format on
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.add(MI.getOperand(2))
@@ -1451,6 +1718,20 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(
}
}
+ if (LIS) {
+ SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+
+ if (MI.getOperand(0).isEarlyClobber()) {
+ // Use operand 1 was tied to early-clobber def operand 0, so its live
+ // interval could have ended at an early-clobber slot. Now they are not
+ // tied we need to update it to the normal register slot.
+ LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
+ LiveRange::Segment *S = LI.getSegmentContaining(Idx);
+ if (S->end == Idx.getRegSlot(true))
+ S->end = Idx.getRegSlot();
+ }
+ }
+
return MIB;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index d80fc483826f..2bfad7844c43 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -24,12 +24,29 @@ namespace llvm {
class RISCVSubtarget;
+namespace RISCVCC {
+
+enum CondCode {
+ COND_EQ,
+ COND_NE,
+ COND_LT,
+ COND_GE,
+ COND_LTU,
+ COND_GEU,
+ COND_INVALID
+};
+
+CondCode getOppositeBranchCondition(CondCode);
+
+} // end of namespace RISCVCC
+
class RISCVInstrInfo : public RISCVGenInstrInfo {
public:
explicit RISCVInstrInfo(RISCVSubtarget &STI);
MCInst getNop() const override;
+ const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const;
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
@@ -68,10 +85,10 @@ public:
const DebugLoc &dl,
int *BytesAdded = nullptr) const override;
- unsigned insertIndirectBranch(MachineBasicBlock &MBB,
- MachineBasicBlock &NewDestBB,
- const DebugLoc &DL, int64_t BrOffset,
- RegScavenger *RS = nullptr) const override;
+ void insertIndirectBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock &NewDestBB,
+ MachineBasicBlock &RestoreBB, const DebugLoc &DL,
+ int64_t BrOffset, RegScavenger *RS) const override;
unsigned removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved = nullptr) const override;
@@ -143,9 +160,8 @@ public:
unsigned OpIdx1,
unsigned OpIdx2) const override;
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
Register getVLENFactoredAmount(
MachineFunction &MF, MachineBasicBlock &MBB,
@@ -164,6 +180,11 @@ protected:
const RISCVSubtarget &STI;
};
+namespace RISCV {
+// Special immediate for AVL operand of V pseudo instructions to indicate VLMax.
+static constexpr int64_t VLMaxSentinel = -1LL;
+} // namespace RISCV
+
namespace RISCVVPseudosTable {
struct PseudoInfo {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 949fff25e9e0..b653928ccea9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>,
// Target-dependent type requirements.
def SDT_RISCVCall : SDTypeProfile<0, -1, [SDTCisVT<0, XLenVT>]>;
def SDT_RISCVSelectCC : SDTypeProfile<1, 5, [SDTCisSameAs<1, 2>,
+ SDTCisVT<3, OtherVT>,
SDTCisSameAs<0, 4>,
SDTCisSameAs<4, 5>]>;
def SDT_RISCVBrCC : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
@@ -152,6 +153,20 @@ def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
let OperandNamespace = "RISCVOp";
}
+def uimm2 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<2>;
+ let DecoderMethod = "decodeUImmOperand<2>";
+ let OperandType = "OPERAND_UIMM2";
+ let OperandNamespace = "RISCVOp";
+}
+
+def uimm3 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<3>;
+ let DecoderMethod = "decodeUImmOperand<3>";
+ let OperandType = "OPERAND_UIMM3";
+ let OperandNamespace = "RISCVOp";
+}
+
def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5>;
let DecoderMethod = "decodeUImmOperand<5>";
@@ -159,6 +174,13 @@ def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
let OperandNamespace = "RISCVOp";
}
+def uimm7 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<7>;
+ let DecoderMethod = "decodeUImmOperand<7>";
+ let OperandType = "OPERAND_UIMM7";
+ let OperandNamespace = "RISCVOp";
+}
+
def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<12>;
let EncoderMethod = "getImmOpValue";
@@ -849,6 +871,87 @@ def : MnemonicAlias<"sbreak", "ebreak">;
def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>;
//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// isCodeGenOnly = 1 to hide them from the tablegened assembly parser.
+let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1,
+ hasNoSchedulingInfo = 1 in {
+def InsnR : DirectiveInsnR<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3,
+ uimm7:$funct7, AnyReg:$rs1,
+ AnyReg:$rs2),
+ "$opcode, $funct3, $funct7, $rd, $rs1, $rs2">;
+def InsnR4 : DirectiveInsnR4<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ uimm3:$funct3,
+ uimm2:$funct2,
+ AnyReg:$rs1, AnyReg:$rs2,
+ AnyReg:$rs3),
+ "$opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3">;
+def InsnI : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs1, simm12:$imm12),
+ "$opcode, $funct3, $rd, $rs1, $imm12">;
+def InsnI_Mem : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ uimm3:$funct3,
+ AnyReg:$rs1,
+ simm12:$imm12),
+ "$opcode, $funct3, $rd, ${imm12}(${rs1})">;
+def InsnB : DirectiveInsnB<(outs), (ins uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs1, AnyReg:$rs2,
+ simm13_lsb0:$imm12),
+ "$opcode, $funct3, $rs1, $rs2, $imm12">;
+def InsnU : DirectiveInsnU<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ uimm20_lui:$imm20),
+ "$opcode, $rd, $imm20">;
+def InsnJ : DirectiveInsnJ<(outs AnyReg:$rd), (ins uimm7:$opcode,
+ simm21_lsb0_jal:$imm20),
+ "$opcode, $rd, $imm20">;
+def InsnS : DirectiveInsnS<(outs), (ins uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs2, AnyReg:$rs1,
+ simm12:$imm12),
+ "$opcode, $funct3, $rs2, ${imm12}(${rs1})">;
+}
+
+// Use InstAliases to match these so that we can combine the insn and format
+// into a mnemonic to use as the key for the tablegened asm matcher table. The
+// parser will take care of creating these fake mnemonics and will only do it
+// for known formats.
+let EmitPriority = 0 in {
+def : InstAlias<".insn_r $opcode, $funct3, $funct7, $rd, $rs1, $rs2",
+ (InsnR AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm7:$funct7,
+ AnyReg:$rs1, AnyReg:$rs2)>;
+// Accept 4 register form of ".insn r" as alias for ".insn r4".
+def : InstAlias<".insn_r $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3",
+ (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2,
+ AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>;
+def : InstAlias<".insn_r4 $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3",
+ (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2,
+ AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>;
+def : InstAlias<".insn_i $opcode, $funct3, $rd, $rs1, $imm12",
+ (InsnI AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ simm12:$imm12)>;
+def : InstAlias<".insn_i $opcode, $funct3, $rd, ${imm12}(${rs1})",
+ (InsnI_Mem AnyReg:$rd, uimm7:$opcode, uimm3:$funct3,
+ AnyReg:$rs1, simm12:$imm12)>;
+def : InstAlias<".insn_b $opcode, $funct3, $rs1, $rs2, $imm12",
+ (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ AnyReg:$rs2, simm13_lsb0:$imm12)>;
+// Accept sb as an alias for b.
+def : InstAlias<".insn_sb $opcode, $funct3, $rs1, $rs2, $imm12",
+ (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1,
+ AnyReg:$rs2, simm13_lsb0:$imm12)>;
+def : InstAlias<".insn_u $opcode, $rd, $imm20",
+ (InsnU AnyReg:$rd, uimm7:$opcode, uimm20_lui:$imm20)>;
+def : InstAlias<".insn_j $opcode, $rd, $imm20",
+ (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>;
+// Accept uj as an alias for j.
+def : InstAlias<".insn_uj $opcode, $rd, $imm20",
+ (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>;
+def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
+ (InsnS uimm7:$opcode, uimm3:$funct3, AnyReg:$rs2,
+ AnyReg:$rs1, simm12:$imm12)>;
+}
+
+//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//
// Naming convention: For 'generic' pattern classes, we use the naming
@@ -893,6 +996,14 @@ def mul_oneuse : PatFrag<(ops node:$A, node:$B), (mul node:$A, node:$B), [{
return N->hasOneUse();
}]>;
+def mul_const_oneuse : PatFrag<(ops node:$A, node:$B),
+ (mul node:$A, node:$B), [{
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (N1C->hasOneUse())
+ return true;
+ return false;
+}]>;
+
/// Simple arithmetic operations
def : PatGprGpr<add, ADD>;
@@ -966,13 +1077,27 @@ def : Pat<(setgt GPR:$rs1, GPR:$rs2), (SLT GPR:$rs2, GPR:$rs1)>;
def : Pat<(setge GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs1, GPR:$rs2), 1)>;
def : Pat<(setle GPR:$rs1, GPR:$rs2), (XORI (SLT GPR:$rs2, GPR:$rs1), 1)>;
+def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ RISCVCC::CondCode BrCC = getRISCVCCForIntCC(CC);
+ return CurDAG->getTargetConstant(BrCC, SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc,
+ node:$truev, node:$falsev),
+ (riscv_selectcc node:$lhs, node:$rhs,
+ node:$cc, node:$truev,
+ node:$falsev), [{}],
+ IntCCtoRISCVCC>;
+
let usesCustomInserter = 1 in
class SelectCC_rrirr<RegisterClass valty, RegisterClass cmpty>
: Pseudo<(outs valty:$dst),
(ins cmpty:$lhs, cmpty:$rhs, ixlenimm:$imm,
valty:$truev, valty:$falsev),
- [(set valty:$dst, (riscv_selectcc cmpty:$lhs, cmpty:$rhs,
- (XLenVT timm:$imm), valty:$truev, valty:$falsev))]>;
+ [(set valty:$dst,
+ (riscv_selectcc_frag:$imm cmpty:$lhs, cmpty:$rhs, cond,
+ valty:$truev, valty:$falsev))]>;
def Select_GPR_Using_CC_GPR : SelectCC_rrirr<GPR, GPR>;
@@ -1231,22 +1356,30 @@ def : Pat<(i64 (shl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLI (SLLI GPR:$rs1, 32), (ImmSubFrom32 uimm5:$shamt))>;
}
+// PatFrag to allow ADDW/SUBW/MULW/SLLW to be selected from i64 add/sub/mul/shl
+// if only the lower 32 bits of their result is used.
+class binop_allwusers<SDPatternOperator operator>
+ : PatFrag<(ops node:$lhs, node:$rhs),
+ (operator node:$lhs, node:$rhs), [{
+ return hasAllWUsers(Node);
+}]>;
+
+def sexti32_allwusers : PatFrag<(ops node:$src),
+ (sext_inreg node:$src, i32), [{
+ return hasAllWUsers(Node);
+}]>;
+
let Predicates = [IsRV64] in {
/// sext and zext
+// Sign extend is not needed if all users are W instructions.
+def : Pat<(sexti32_allwusers GPR:$rs1), (XLenVT GPR:$rs1)>;
+
def : Pat<(sext_inreg GPR:$rs1, i32), (ADDIW GPR:$rs1, 0)>;
/// ALU operations
-def : Pat<(sext_inreg (add GPR:$rs1, GPR:$rs2), i32),
- (ADDW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (add GPR:$rs1, simm12:$imm12), i32),
- (ADDIW GPR:$rs1, simm12:$imm12)>;
-def : Pat<(sext_inreg (sub GPR:$rs1, GPR:$rs2), i32),
- (SUBW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(sext_inreg (shl GPR:$rs1, uimm5:$shamt), i32),
- (SLLIW GPR:$rs1, uimm5:$shamt)>;
def : Pat<(i64 (srl (and GPR:$rs1, 0xffffffff), uimm5:$shamt)),
(SRLIW GPR:$rs1, uimm5:$shamt)>;
def : Pat<(i64 (srl (shl GPR:$rs1, (i64 32)), uimm6gt32:$shamt)),
@@ -1260,6 +1393,18 @@ def : PatGprGpr<shiftopw<riscv_sllw>, SLLW>;
def : PatGprGpr<shiftopw<riscv_srlw>, SRLW>;
def : PatGprGpr<shiftopw<riscv_sraw>, SRAW>;
+// Select W instructions if only the lower 32 bits of the result are used.
+def : PatGprGpr<binop_allwusers<add>, ADDW>;
+def : PatGprSimm12<binop_allwusers<add>, ADDIW>;
+def : PatGprGpr<binop_allwusers<sub>, SUBW>;
+def : PatGprImm<binop_allwusers<shl>, SLLIW, uimm5>;
+
+// If this is a shr of a value sign extended from i32, and all the users only
+// use the lower 32 bits, we can use an sraiw to remove the sext_inreg. This
+// occurs because SimplifyDemandedBits prefers srl over sra.
+def : Pat<(binop_allwusers<srl> (sext_inreg GPR:$rs1, i32), uimm5:$shamt),
+ (SRAIW GPR:$rs1, uimm5:$shamt)>;
+
/// Loads
defm : LdPat<sextloadi32, LW, i64>;
@@ -1300,7 +1445,8 @@ def : Pat<(add GPR:$rs1, (AddiPair:$rs2)),
(AddiPairImmA GPR:$rs2))>;
let Predicates = [IsRV64] in {
-def : Pat<(sext_inreg (add_oneuse GPR:$rs1, (AddiPair:$rs2)), i32),
+// Select W instructions if only the lower 32-bits of the result are used.
+def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
(ADDIW (ADDIW GPR:$rs1, (AddiPairImmB AddiPair:$rs2)),
(AddiPairImmA AddiPair:$rs2))>;
}
@@ -1314,6 +1460,6 @@ include "RISCVInstrInfoA.td"
include "RISCVInstrInfoF.td"
include "RISCVInstrInfoD.td"
include "RISCVInstrInfoC.td"
-include "RISCVInstrInfoB.td"
+include "RISCVInstrInfoZb.td"
include "RISCVInstrInfoV.td"
include "RISCVInstrInfoZfh.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 86f96c1529b1..d204c85d6179 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -745,13 +745,6 @@ def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SP:$rs1, 0)>;
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
-class CompressPat<dag input, dag output> {
- dag Input = input;
- dag Output = output;
- list<Predicate> Predicates = [];
- bit isCompressOnly = false;
-}
-
// Patterns are defined in the same order the compressed instructions appear
// on page 82 of the ISA manual.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 41eff2ef7607..2cd011a02345 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -331,6 +331,10 @@ def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>;
def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>;
def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>;
+// Saturating double->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_WU_D $rs1, 0b001)>;
+
// float->int32 with current rounding mode.
def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>;
@@ -354,13 +358,17 @@ def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>;
// [u]int32->fp
def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>;
def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>;
+// Saturating double->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>;
+
// double->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>;
def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 6b5c9617426a..3400c3be52bf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -21,15 +21,21 @@ def SDT_RISCVFMV_X_ANYEXTW_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>;
def STD_RISCVFCVT_W_RV64
: SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>;
+def STD_RISCVFCVT_X
+ : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
def riscv_fmv_w_x_rv64
: SDNode<"RISCVISD::FMV_W_X_RV64", SDT_RISCVFMV_W_X_RV64>;
def riscv_fmv_x_anyextw_rv64
: SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>;
-def riscv_fcvt_w_rv64
- : SDNode<"RISCVISD::FCVT_W_RV64", STD_RISCVFCVT_W_RV64>;
-def riscv_fcvt_wu_rv64
- : SDNode<"RISCVISD::FCVT_WU_RV64", STD_RISCVFCVT_W_RV64>;
+def riscv_fcvt_w_rtz_rv64
+ : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>;
+def riscv_fcvt_wu_rtz_rv64
+ : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>;
+def riscv_fcvt_x_rtz
+ : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>;
+def riscv_fcvt_xu_rtz
+ : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -379,6 +385,10 @@ def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
+// Saturating float->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
+
// float->int32 with current rounding mode.
def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>;
@@ -400,13 +410,17 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32),
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>;
// float->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
+// Saturating float->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_xu_rtz FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
+
// float->int64 with current rounding mode.
def : Pat<(i64 (lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
def : Pat<(i64 (llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index f654ed1949a4..a037dbf585ce 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -72,8 +72,8 @@ def : PatGprGpr<urem, REMU>;
} // Predicates = [HasStdExtM]
let Predicates = [HasStdExtM, IsRV64] in {
-def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32),
- (MULW GPR:$rs1, GPR:$rs2)>;
+// Select W instructions if only the lower 32-bits of the result are used.
+def : PatGprGpr<binop_allwusers<mul>, MULW>;
def : PatGprGpr<riscv_divw, DIVW>;
def : PatGprGpr<riscv_divuw, DIVUW>;
@@ -96,20 +96,24 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))),
(REMW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtM, IsRV64]
+// Pattern to detect constants with no more than 32 active bits that can't
+// be materialized with lui+addiw.
+def uimm32_not_simm32 : PatLeaf<(XLenVT GPR:$a), [{
+ auto *C = dyn_cast<ConstantSDNode>(N);
+ return C && C->hasOneUse() && isUInt<32>(C->getZExtValue()) &&
+ !isInt<32>(C->getSExtValue());
+}]>;
+
let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in {
// Special case for calculating the full 64-bit product of a 32x32 unsigned
// multiply where the inputs aren't known to be zero extended. We can shift the
// inputs left by 32 and use a MULHU. This saves two SRLIs needed to finish
// zeroing the upper 32 bits.
-// TODO: If one of the operands is zero extended and the other isn't, we might
-// still be better off shifting both left by 32.
def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))),
(MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
-// Prevent matching the first part of this pattern to mulw. The mul here has
-// additionals users or the ANDs would have been removed. The above pattern
-// will be used for the other users. If we form a mulw we'll keep the ANDs alive
-// and they'll still become SLLI+SRLI.
-def : Pat<(sext_inreg (mul (and GPR:$rs1, 0xffffffff),
- (and GPR:$rs2, 0xffffffff)), i32),
- (ADDIW (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32)), 0)>;
+// The RHS could also be a constant that is hard to materialize. By shifting
+// left we can allow constant materialization to use LUI+ADDIW via
+// hasAllWUsers.
+def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), uimm32_not_simm32:$rs2)),
+ (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>;
} // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 342497150d49..3d5f9bc54731 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -78,65 +78,105 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
}
//===----------------------------------------------------------------------===//
+// Scheduling definitions.
+//===----------------------------------------------------------------------===//
+
+class VMVRSched<int n>: Sched <[!cast<SchedReadWrite>("WriteVMov" # n # "V"),
+ !cast<SchedReadWrite>("ReadVMov" # n # "V")]>;
+
+class VLESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDE" # n),
+ ReadVLDX, ReadVMask]>;
+
+class VSESched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTE" # n),
+ !cast<SchedReadWrite>("ReadVSTE" # n # "V"),
+ ReadVSTX, ReadVMask]>;
+
+class VLSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDS" # n),
+ ReadVLDX, ReadVLDSX, ReadVMask]>;
+
+class VSSSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVSTS" # n),
+ !cast<SchedReadWrite>("ReadVSTS" # n # "V"),
+ ReadVSTX, ReadVSTSX, ReadVMask]>;
+
+class VLXSched<int n, string o> :
+ Sched <[!cast<SchedReadWrite>("WriteVLD" # o # "X" # n),
+ ReadVLDX, !cast<SchedReadWrite>("ReadVLD" # o # "XV"), ReadVMask]>;
+
+class VSXSched<int n, string o> :
+ Sched <[!cast<SchedReadWrite>("WriteVST" # o # "X" # n),
+ !cast<SchedReadWrite>("ReadVST" # o # "X" # n),
+ ReadVSTX, !cast<SchedReadWrite>("ReadVST" # o # "XV"), ReadVMask]>;
+
+class VLFSched<int n> : Sched <[!cast<SchedReadWrite>("WriteVLDFF" # n),
+ ReadVLDX, ReadVMask]>;
+
+//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
-// load vd, (rs1)
+// unit-stride load vd, (rs1), vm
+class VUnitStrideLoad<RISCVWidth width, string opcodestr>
+ : RVInstVLU<0b000, width.Value{3}, LUMOPUnitStride, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+
+let vm = 1, RVVConstraint = NoConstraint in {
+// unit-stride whole register load vl<nf>r.v vd, (rs1)
+class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr, RegisterClass VRC>
+ : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg,
+ width.Value{2-0}, (outs VRC:$vd), (ins GPR:$rs1),
+ opcodestr, "$vd, (${rs1})"> {
+ let Uses = [];
+}
+
+// unit-stride mask load vd, (rs1)
class VUnitStrideLoadMask<string opcodestr>
: RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0},
(outs VR:$vd),
- (ins GPR:$rs1), opcodestr, "$vd, (${rs1})"> {
- let vm = 1;
- let RVVConstraint = NoConstraint;
-}
+ (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">;
+} // vm = 1, RVVConstraint = NoConstraint
-// load vd, (rs1), vm
-class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width,
- string opcodestr>
- : RVInstVLU<0b000, width.Value{3}, lumop, width.Value{2-0},
+// unit-stride fault-only-first load vd, (rs1), vm
+class VUnitStrideLoadFF<RISCVWidth width, string opcodestr>
+ : RVInstVLU<0b000, width.Value{3}, LUMOPUnitStrideFF, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
-// load vd, (rs1), rs2, vm
+// strided load vd, (rs1), rs2, vm
class VStridedLoad<RISCVWidth width, string opcodestr>
: RVInstVLS<0b000, width.Value{3}, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $rs2$vm">;
-// load vd, (rs1), vs2, vm
+// indexed load vd, (rs1), vs2, vm
class VIndexedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr>
: RVInstVLX<0b000, width.Value{3}, mop, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $vs2$vm">;
-// vl<nf>r.v vd, (rs1)
-class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr, RegisterClass VRC>
- : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg,
- width.Value{2-0}, (outs VRC:$vd), (ins GPR:$rs1),
- opcodestr, "$vd, (${rs1})"> {
- let vm = 1;
- let Uses = [];
- let RVVConstraint = NoConstraint;
-}
+// unit-stride segment load vd, (rs1), vm
+class VUnitStrideSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVLU<nf, width.Value{3}, LUMOPUnitStride, width.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
-// segment load vd, (rs1), vm
-class VUnitStrideSegmentLoad<bits<3> nf, RISCVLSUMOP lumop,
- RISCVWidth width, string opcodestr>
- : RVInstVLU<nf, width.Value{3}, lumop, width.Value{2-0},
+// segment fault-only-first load vd, (rs1), vm
+class VUnitStrideSegmentLoadFF<bits<3> nf, RISCVWidth width, string opcodestr>
+ : RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideFF, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
-// segment load vd, (rs1), rs2, vm
+// strided segment load vd, (rs1), rs2, vm
class VStridedSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVLS<nf, width.Value{3}, width.Value{2-0},
(outs VR:$vd),
(ins GPR:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
"$vd, (${rs1}), $rs2$vm">;
-// segment load vd, (rs1), vs2, vm
+// indexed segment load vd, (rs1), vs2, vm
class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
string opcodestr>
: RVInstVLX<nf, width.Value{3}, mop, width.Value{2-0},
@@ -146,42 +186,40 @@ class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
-// store vd, vs3, (rs1)
+// unit-stride store vd, vs3, (rs1), vm
+class VUnitStrideStore<RISCVWidth width, string opcodestr>
+ : RVInstVSU<0b000, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
+ (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
+ "$vs3, (${rs1})$vm">;
+
+let vm = 1 in {
+// vs<nf>r.v vd, (rs1)
+class VWholeStore<bits<3> nf, string opcodestr, RegisterClass VRC>
+ : RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg,
+ 0b000, (outs), (ins VRC:$vs3, GPR:$rs1),
+ opcodestr, "$vs3, (${rs1})"> {
+ let Uses = [];
+}
+
+// unit-stride mask store vd, vs3, (rs1)
class VUnitStrideStoreMask<string opcodestr>
: RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0},
(outs), (ins VR:$vs3, GPR:$rs1), opcodestr,
- "$vs3, (${rs1})"> {
- let vm = 1;
-}
-
-// store vd, vs3, (rs1), vm
-class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width,
- string opcodestr>
- : RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0},
- (outs), (ins VR:$vs3, GPR:$rs1, VMaskOp:$vm), opcodestr,
- "$vs3, (${rs1})$vm">;
+ "$vs3, (${rs1})">;
+} // vm = 1
-// store vd, vs3, (rs1), rs2, vm
+// strided store vd, vs3, (rs1), rs2, vm
class VStridedStore<RISCVWidth width, string opcodestr>
: RVInstVSS<0b000, width.Value{3}, width.Value{2-0}, (outs),
(ins VR:$vs3, GPR:$rs1, GPR:$rs2, VMaskOp:$vm),
opcodestr, "$vs3, (${rs1}), $rs2$vm">;
-// store vd, vs3, (rs1), vs2, vm
+// indexed store vd, vs3, (rs1), vs2, vm
class VIndexedStore<RISCVMOP mop, RISCVWidth width, string opcodestr>
: RVInstVSX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs),
(ins VR:$vs3, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
opcodestr, "$vs3, (${rs1}), $vs2$vm">;
-// vs<nf>r.v vd, (rs1)
-class VWholeStore<bits<3> nf, string opcodestr, RegisterClass VRC>
- : RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg,
- 0b000, (outs), (ins VRC:$vs3, GPR:$rs1),
- opcodestr, "$vs3, (${rs1})"> {
- let vm = 1;
- let Uses = [];
-}
-
// segment store vd, vs3, (rs1), vm
class VUnitStrideSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVSU<nf, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
@@ -328,106 +366,417 @@ class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr>
// Use these multiclasses to define instructions more easily.
//===----------------------------------------------------------------------===//
multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
}
multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>;
}
-multiclass VALUr_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPIVV, opcodestr # "." # vw # "v">;
- def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>;
}
-multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>;
+multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>;
}
-multiclass VALU_IV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">;
+multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>;
}
-multiclass VALUr_IV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUrVX<funct6, OPIVX, opcodestr # "." # vw # "x">;
+multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
}
-multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>;
}
-multiclass VALU_MV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">;
+multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVExtV, ReadVExtV, ReadVMask]>;
}
-multiclass VALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
- def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">;
+multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
+ def IM : VALUmVI<funct6, opcodestr # ".vim">,
+ Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>;
}
-multiclass VALU_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
+ def IM : VALUmVI<funct6, opcodestr # ".vim">,
+ Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
}
-multiclass VALUr_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">;
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>;
+ def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>;
}
-multiclass VALUr_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">;
+multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
+ def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>,
+ Sched<[WriteVICALUI, ReadVIALUCV]>;
}
-multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
- def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>;
+multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>;
+ def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>;
}
-multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
- def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
- def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
- def IM : VALUmVI<funct6, opcodestr # ".vim">;
+multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
}
-multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
- def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">;
- def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">;
+multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>;
}
-multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
- def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
- def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
- def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>;
+multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>;
}
-multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
- def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">;
- def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">;
+multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>;
}
-multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
}
-multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VRDIV_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>;
}
-multiclass VALUr_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">;
- def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">;
+multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>;
}
-multiclass VALU_FV_V<string opcodestr, bits<6> funct6> {
- def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">;
+multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>;
}
-multiclass VALU_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
- def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>;
+multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>;
+}
+
+multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>;
+}
+
+multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>;
+}
+
+multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VCMP_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>;
+}
+
+multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>;
+}
+
+multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>;
+}
+
+multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>;
+}
+
+multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>;
+}
+
+multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>;
+}
+
+multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>;
+}
+
+multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>;
+}
+
+multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>;
+}
+
+multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>;
+}
+
+multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
+ Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>;
+}
+
+multiclass VRED_MV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
+ Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV0, ReadVMask]>;
+}
+
+multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">,
+ Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV0, ReadVMask]>;
+}
+
+multiclass VRED_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV0, ReadVMask]>;
+}
+
+multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV0, ReadVMask]>;
+}
+
+multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV0, ReadVMask]>;
+}
+
+multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFWRedOV, ReadVFWRedOV, ReadVFWRedOV0, ReadVMask]>;
+}
+
+multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
+ def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
+ Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>;
+}
+
+multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>;
+}
+
+multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
+ def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
+ Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>;
+}
+
+multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>;
+}
+
+multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>;
+}
+
+multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>;
+}
+
+multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>;
+}
+
+multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>;
+}
+
+multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>;
+}
+
+multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>;
+}
+
+multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>;
+}
+
+multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>;
+}
+
+multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>;
+}
+
+multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>;
+}
+
+multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>;
+}
+
+multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVISlideI, ReadVISlideV, ReadVMask]>;
+}
+
+multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>;
+}
+
+multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
+ Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>;
+}
+
+multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
+ Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
+ Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>;
+}
+
+multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
+ def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
+ Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>;
}
multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
@@ -435,11 +784,14 @@ multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> {
def _UNWD : VAMONoWd<amoop, width, opcodestr>;
}
-multiclass VWholeLoad<bits<3> nf, string opcodestr, RegisterClass VRC> {
- def E8_V : VWholeLoad<nf, LSWidth8, opcodestr # "e8.v", VRC>;
- def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v", VRC>;
- def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v", VRC>;
- def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>;
+multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
+ foreach l = [8, 16, 32, 64] in {
+ defvar w = !cast<RISCVWidth>("LSWidth" # l);
+ defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R" # l);
+
+ def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
+ Sched<[s, ReadVLDX]>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -457,71 +809,58 @@ def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei)
def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
"vsetvl", "$rd, $rs1, $rs2">;
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
+foreach eew = [8, 16, 32, 64] in {
+ defvar w = !cast<RISCVWidth>("LSWidth" # eew);
+
+ // Vector Unit-Stride Instructions
+ def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched<eew>;
+ def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched<eew>;
+
+ // Vector Unit-Stride Fault-only-First Loads
+ def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSched<eew>;
+
+ // Vector Strided Instructions
+ def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>;
+ def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>;
+
+ // Vector Indexed Instructions
+ def VLUXEI#eew#_V :
+ VIndexedLoad<MOPLDIndexedUnord, w, "vluxei"#eew#".v">, VLXSched<eew, "U">;
+ def VLOXEI#eew#_V :
+ VIndexedLoad<MOPLDIndexedOrder, w, "vloxei"#eew#".v">, VLXSched<eew, "O">;
+ def VSUXEI#eew#_V :
+ VIndexedStore<MOPSTIndexedUnord, w, "vsuxei"#eew#".v">, VSXSched<eew, "U">;
+ def VSOXEI#eew#_V :
+ VIndexedStore<MOPSTIndexedOrder, w, "vsoxei"#eew#".v">, VSXSched<eew, "O">;
+}
+
+def VLM_V : VUnitStrideLoadMask<"vlm.v">,
+ Sched<[WriteVLDM, ReadVLDX]>;
+def VSM_V : VUnitStrideStoreMask<"vsm.v">,
+ Sched<[WriteVSTM, ReadVSTM, ReadVSTX]>;
+def : InstAlias<"vle1.v $vd, (${rs1})",
+ (VLM_V VR:$vd, GPR:$rs1), 0>;
+def : InstAlias<"vse1.v $vs3, (${rs1})",
+ (VSM_V VR:$vs3, GPR:$rs1), 0>;
+
+defm VL1R : VWholeLoadN<0, "vl1r", VR>;
+defm VL2R : VWholeLoadN<1, "vl2r", VRM2>;
+defm VL4R : VWholeLoadN<3, "vl4r", VRM4>;
+defm VL8R : VWholeLoadN<7, "vl8r", VRM8>;
-// Vector Unit-Stride Instructions
-def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">;
-def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">;
-def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">;
-def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">;
-
-def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">;
-def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">;
-def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">;
-def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">;
-
-def VLE1_V : VUnitStrideLoadMask<"vle1.v">;
-def VSE1_V : VUnitStrideStoreMask<"vse1.v">;
-
-def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">;
-def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">;
-def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">;
-def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">;
-
-// Vector Strided Instructions
-def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">;
-def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">;
-def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">;
-def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">;
-
-def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">;
-def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">;
-def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">;
-def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">;
-
-// Vector Indexed Instructions
-def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">;
-def VLUXEI16_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth16, "vluxei16.v">;
-def VLUXEI32_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth32, "vluxei32.v">;
-def VLUXEI64_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth64, "vluxei64.v">;
-
-def VLOXEI8_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth8, "vloxei8.v">;
-def VLOXEI16_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth16, "vloxei16.v">;
-def VLOXEI32_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth32, "vloxei32.v">;
-def VLOXEI64_V : VIndexedLoad<MOPLDIndexedOrder, LSWidth64, "vloxei64.v">;
-
-def VSUXEI8_V : VIndexedStore<MOPSTIndexedUnord, LSWidth8, "vsuxei8.v">;
-def VSUXEI16_V : VIndexedStore<MOPSTIndexedUnord, LSWidth16, "vsuxei16.v">;
-def VSUXEI32_V : VIndexedStore<MOPSTIndexedUnord, LSWidth32, "vsuxei32.v">;
-def VSUXEI64_V : VIndexedStore<MOPSTIndexedUnord, LSWidth64, "vsuxei64.v">;
-
-def VSOXEI8_V : VIndexedStore<MOPSTIndexedOrder, LSWidth8, "vsoxei8.v">;
-def VSOXEI16_V : VIndexedStore<MOPSTIndexedOrder, LSWidth16, "vsoxei16.v">;
-def VSOXEI32_V : VIndexedStore<MOPSTIndexedOrder, LSWidth32, "vsoxei32.v">;
-def VSOXEI64_V : VIndexedStore<MOPSTIndexedOrder, LSWidth64, "vsoxei64.v">;
-
-defm VL1R : VWholeLoad<0, "vl1r", VR>;
-defm VL2R : VWholeLoad<1, "vl2r", VRM2>;
-defm VL4R : VWholeLoad<3, "vl4r", VRM4>;
-defm VL8R : VWholeLoad<7, "vl8r", VRM8>;
def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>;
def : InstAlias<"vl4r.v $vd, (${rs1})", (VL4RE8_V VRM4:$vd, GPR:$rs1)>;
def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
-def VS1R_V : VWholeStore<0, "vs1r.v", VR>;
-def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>;
-def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>;
-def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>;
+def VS1R_V : VWholeStore<0, "vs1r.v", VR>,
+ Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
+def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>,
+ Sched<[WriteVST2R, ReadVST2R, ReadVSTX]>;
+def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>,
+ Sched<[WriteVST4R, ReadVST4R, ReadVSTX]>;
+def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>,
+ Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>;
// Vector Single-Width Integer Add and Subtract
defm VADD_V : VALU_IV_V_X_I<"vadd", 0b000000>;
@@ -588,9 +927,9 @@ def : InstAlias<"vnot.v $vd, $vs$vm",
(VXOR_VI VR:$vd, VR:$vs, -1, VMaskOp:$vm)>;
// Vector Single-Width Bit Shift Instructions
-defm VSLL_V : VALU_IV_V_X_I<"vsll", 0b100101, uimm5>;
-defm VSRL_V : VALU_IV_V_X_I<"vsrl", 0b101000, uimm5>;
-defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
+defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>;
+defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000, uimm5>;
+defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>;
// Vector Narrowing Integer Right Shift Instructions
// Refer to 11.3. Narrowing Vector Arithmetic Instructions
@@ -598,8 +937,8 @@ defm VSRA_V : VALU_IV_V_X_I<"vsra", 0b101001, uimm5>;
// vector register group (specified by vs2). The destination vector register
// group cannot overlap the mask register if used, unless LMUL=1.
let Constraints = "@earlyclobber $vd" in {
-defm VNSRL_W : VALU_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
-defm VNSRA_W : VALU_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
+defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
+defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
} // Constraints = "@earlyclobber $vd"
def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
@@ -607,14 +946,14 @@ def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
// Vector Integer Comparison Instructions
let RVVConstraint = NoConstraint in {
-defm VMSEQ_V : VALU_IV_V_X_I<"vmseq", 0b011000>;
-defm VMSNE_V : VALU_IV_V_X_I<"vmsne", 0b011001>;
-defm VMSLTU_V : VALU_IV_V_X<"vmsltu", 0b011010>;
-defm VMSLT_V : VALU_IV_V_X<"vmslt", 0b011011>;
-defm VMSLEU_V : VALU_IV_V_X_I<"vmsleu", 0b011100>;
-defm VMSLE_V : VALU_IV_V_X_I<"vmsle", 0b011101>;
-defm VMSGTU_V : VALU_IV_X_I<"vmsgtu", 0b011110>;
-defm VMSGT_V : VALU_IV_X_I<"vmsgt", 0b011111>;
+defm VMSEQ_V : VCMP_IV_V_X_I<"vmseq", 0b011000>;
+defm VMSNE_V : VCMP_IV_V_X_I<"vmsne", 0b011001>;
+defm VMSLTU_V : VCMP_IV_V_X<"vmsltu", 0b011010>;
+defm VMSLT_V : VCMP_IV_V_X<"vmslt", 0b011011>;
+defm VMSLEU_V : VCMP_IV_V_X_I<"vmsleu", 0b011100>;
+defm VMSLE_V : VCMP_IV_V_X_I<"vmsle", 0b011101>;
+defm VMSGTU_V : VCMP_IV_X_I<"vmsgtu", 0b011110>;
+defm VMSGT_V : VCMP_IV_X_I<"vmsgt", 0b011111>;
} // RVVConstraint = NoConstraint
def : InstAlias<"vmsgtu.vv $vd, $va, $vb$vm",
@@ -672,84 +1011,87 @@ def PseudoVMSGE_VX_M_T : Pseudo<(outs VR:$vd, VRNoV0:$scratch),
}
// Vector Integer Min/Max Instructions
-defm VMINU_V : VALU_IV_V_X<"vminu", 0b000100>;
-defm VMIN_V : VALU_IV_V_X<"vmin", 0b000101>;
-defm VMAXU_V : VALU_IV_V_X<"vmaxu", 0b000110>;
-defm VMAX_V : VALU_IV_V_X<"vmax", 0b000111>;
+defm VMINU_V : VCMP_IV_V_X<"vminu", 0b000100>;
+defm VMIN_V : VCMP_IV_V_X<"vmin", 0b000101>;
+defm VMAXU_V : VCMP_IV_V_X<"vmaxu", 0b000110>;
+defm VMAX_V : VCMP_IV_V_X<"vmax", 0b000111>;
// Vector Single-Width Integer Multiply Instructions
-defm VMUL_V : VALU_MV_V_X<"vmul", 0b100101>;
-defm VMULH_V : VALU_MV_V_X<"vmulh", 0b100111>;
-defm VMULHU_V : VALU_MV_V_X<"vmulhu", 0b100100>;
-defm VMULHSU_V : VALU_MV_V_X<"vmulhsu", 0b100110>;
+defm VMUL_V : VMUL_MV_V_X<"vmul", 0b100101>;
+defm VMULH_V : VMUL_MV_V_X<"vmulh", 0b100111>;
+defm VMULHU_V : VMUL_MV_V_X<"vmulhu", 0b100100>;
+defm VMULHSU_V : VMUL_MV_V_X<"vmulhsu", 0b100110>;
// Vector Integer Divide Instructions
-defm VDIVU_V : VALU_MV_V_X<"vdivu", 0b100000>;
-defm VDIV_V : VALU_MV_V_X<"vdiv", 0b100001>;
-defm VREMU_V : VALU_MV_V_X<"vremu", 0b100010>;
-defm VREM_V : VALU_MV_V_X<"vrem", 0b100011>;
+defm VDIVU_V : VDIV_MV_V_X<"vdivu", 0b100000>;
+defm VDIV_V : VDIV_MV_V_X<"vdiv", 0b100001>;
+defm VREMU_V : VDIV_MV_V_X<"vremu", 0b100010>;
+defm VREM_V : VDIV_MV_V_X<"vrem", 0b100011>;
// Vector Widening Integer Multiply Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VWMUL_V : VALU_MV_V_X<"vwmul", 0b111011>;
-defm VWMULU_V : VALU_MV_V_X<"vwmulu", 0b111000>;
-defm VWMULSU_V : VALU_MV_V_X<"vwmulsu", 0b111010>;
+defm VWMUL_V : VWMUL_MV_V_X<"vwmul", 0b111011>;
+defm VWMULU_V : VWMUL_MV_V_X<"vwmulu", 0b111000>;
+defm VWMULSU_V : VWMUL_MV_V_X<"vwmulsu", 0b111010>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Single-Width Integer Multiply-Add Instructions
-defm VMACC_V : VALUr_MV_V_X<"vmacc", 0b101101>;
-defm VNMSAC_V : VALUr_MV_V_X<"vnmsac", 0b101111>;
-defm VMADD_V : VALUr_MV_V_X<"vmadd", 0b101001>;
-defm VNMSUB_V : VALUr_MV_V_X<"vnmsub", 0b101011>;
+defm VMACC_V : VMAC_MV_V_X<"vmacc", 0b101101>;
+defm VNMSAC_V : VMAC_MV_V_X<"vnmsac", 0b101111>;
+defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
+defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
// Vector Widening Integer Multiply-Add Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VWMACCU_V : VALUr_MV_V_X<"vwmaccu", 0b111100>;
-defm VWMACC_V : VALUr_MV_V_X<"vwmacc", 0b111101>;
-defm VWMACCSU_V : VALUr_MV_V_X<"vwmaccsu", 0b111111>;
-defm VWMACCUS_V : VALUr_MV_X<"vwmaccus", 0b111110>;
+defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
+defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>;
+defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>;
+defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Integer Merge Instructions
-defm VMERGE_V : VALUm_IV_V_X_I<"vmerge", 0b010111>;
+defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>;
// Vector Integer Move Instructions
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1,
RVVConstraint = NoConstraint in {
// op vd, vs1
def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd),
- (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">;
+ (ins VR:$vs1), "vmv.v.v", "$vd, $vs1">,
+ Sched<[WriteVIMovV, ReadVIMovV]>;
// op vd, rs1
def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd),
- (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">;
+ (ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">,
+ Sched<[WriteVIMovX, ReadVIMovX]>;
// op vd, imm
def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd),
- (ins simm5:$imm), "vmv.v.i", "$vd, $imm">;
+ (ins simm5:$imm), "vmv.v.i", "$vd, $imm">,
+ Sched<[WriteVIMovI]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Vector Fixed-Point Arithmetic Instructions
-defm VSADDU_V : VALU_IV_V_X_I<"vsaddu", 0b100000>;
-defm VSADD_V : VALU_IV_V_X_I<"vsadd", 0b100001>;
-defm VSSUBU_V : VALU_IV_V_X<"vssubu", 0b100010>;
-defm VSSUB_V : VALU_IV_V_X<"vssub", 0b100011>;
+defm VSADDU_V : VSALU_IV_V_X_I<"vsaddu", 0b100000>;
+defm VSADD_V : VSALU_IV_V_X_I<"vsadd", 0b100001>;
+defm VSSUBU_V : VSALU_IV_V_X<"vssubu", 0b100010>;
+defm VSSUB_V : VSALU_IV_V_X<"vssub", 0b100011>;
// Vector Single-Width Averaging Add and Subtract
-defm VAADDU_V : VALU_MV_V_X<"vaaddu", 0b001000>;
-defm VAADD_V : VALU_MV_V_X<"vaadd", 0b001001>;
-defm VASUBU_V : VALU_MV_V_X<"vasubu", 0b001010>;
-defm VASUB_V : VALU_MV_V_X<"vasub", 0b001011>;
+defm VAADDU_V : VAALU_MV_V_X<"vaaddu", 0b001000>;
+defm VAADD_V : VAALU_MV_V_X<"vaadd", 0b001001>;
+defm VASUBU_V : VAALU_MV_V_X<"vasubu", 0b001010>;
+defm VASUB_V : VAALU_MV_V_X<"vasub", 0b001011>;
// Vector Single-Width Fractional Multiply with Rounding and Saturation
-defm VSMUL_V : VALU_IV_V_X<"vsmul", 0b100111>;
+defm VSMUL_V : VSMUL_IV_V_X<"vsmul", 0b100111>;
// Vector Single-Width Scaling Shift Instructions
-defm VSSRL_V : VALU_IV_V_X_I<"vssrl", 0b101010, uimm5>;
-defm VSSRA_V : VALU_IV_V_X_I<"vssra", 0b101011, uimm5>;
+defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010, uimm5>;
+defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011, uimm5>;
// Vector Narrowing Fixed-Point Clip Instructions
let Constraints = "@earlyclobber $vd" in {
-defm VNCLIPU_W : VALU_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
-defm VNCLIP_W : VALU_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
+defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
+defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
} // Constraints = "@earlyclobber $vd"
} // Predicates = [HasStdExtV]
@@ -762,60 +1104,60 @@ defm VFRSUB_V : VALU_FV_F<"vfrsub", 0b100111>;
// Vector Widening Floating-Point Add/Subtract Instructions
let Constraints = "@earlyclobber $vd" in {
let RVVConstraint = WidenV in {
-defm VFWADD_V : VALU_FV_V_F<"vfwadd", 0b110000>;
-defm VFWSUB_V : VALU_FV_V_F<"vfwsub", 0b110010>;
+defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>;
+defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>;
} // RVVConstraint = WidenV
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
let RVVConstraint = WidenW in {
-defm VFWADD_W : VALU_FV_V_F<"vfwadd", 0b110100, "w">;
-defm VFWSUB_W : VALU_FV_V_F<"vfwsub", 0b110110, "w">;
+defm VFWADD_W : VWALU_FV_V_F<"vfwadd", 0b110100, "w">;
+defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">;
} // RVVConstraint = WidenW
} // Constraints = "@earlyclobber $vd"
// Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm VFMUL_V : VALU_FV_V_F<"vfmul", 0b100100>;
-defm VFDIV_V : VALU_FV_V_F<"vfdiv", 0b100000>;
-defm VFRDIV_V : VALU_FV_F<"vfrdiv", 0b100001>;
+defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>;
+defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>;
+defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>;
// Vector Widening Floating-Point Multiply
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VFWMUL_V : VALU_FV_V_F<"vfwmul", 0b111000>;
+defm VFWMUL_V : VWMUL_FV_V_F<"vfwmul", 0b111000>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Single-Width Floating-Point Fused Multiply-Add Instructions
-defm VFMACC_V : VALUr_FV_V_F<"vfmacc", 0b101100>;
-defm VFNMACC_V : VALUr_FV_V_F<"vfnmacc", 0b101101>;
-defm VFMSAC_V : VALUr_FV_V_F<"vfmsac", 0b101110>;
-defm VFNMSAC_V : VALUr_FV_V_F<"vfnmsac", 0b101111>;
-defm VFMADD_V : VALUr_FV_V_F<"vfmadd", 0b101000>;
-defm VFNMADD_V : VALUr_FV_V_F<"vfnmadd", 0b101001>;
-defm VFMSUB_V : VALUr_FV_V_F<"vfmsub", 0b101010>;
-defm VFNMSUB_V : VALUr_FV_V_F<"vfnmsub", 0b101011>;
+defm VFMACC_V : VMAC_FV_V_F<"vfmacc", 0b101100>;
+defm VFNMACC_V : VMAC_FV_V_F<"vfnmacc", 0b101101>;
+defm VFMSAC_V : VMAC_FV_V_F<"vfmsac", 0b101110>;
+defm VFNMSAC_V : VMAC_FV_V_F<"vfnmsac", 0b101111>;
+defm VFMADD_V : VMAC_FV_V_F<"vfmadd", 0b101000>;
+defm VFNMADD_V : VMAC_FV_V_F<"vfnmadd", 0b101001>;
+defm VFMSUB_V : VMAC_FV_V_F<"vfmsub", 0b101010>;
+defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
// Vector Widening Floating-Point Fused Multiply-Add Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
-defm VFWMACC_V : VALUr_FV_V_F<"vfwmacc", 0b111100>;
-defm VFWNMACC_V : VALUr_FV_V_F<"vfwnmacc", 0b111101>;
-defm VFWMSAC_V : VALUr_FV_V_F<"vfwmsac", 0b111110>;
-defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
+defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
+defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
+defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;
+defm VFWNMSAC_V : VWMAC_FV_V_F<"vfwnmsac", 0b111111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
// Vector Floating-Point Square-Root Instruction
-defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
-defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
-defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
+defm VFSQRT_V : VSQR_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
+defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
+defm VFREC7_V : VRCP_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
// Vector Floating-Point MIN/MAX Instructions
-defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
-defm VFMAX_V : VALU_FV_V_F<"vfmax", 0b000110>;
+defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>;
+defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>;
// Vector Floating-Point Sign-Injection Instructions
-defm VFSGNJ_V : VALU_FV_V_F<"vfsgnj", 0b001000>;
-defm VFSGNJN_V : VALU_FV_V_F<"vfsgnjn", 0b001001>;
-defm VFSGNJX_V : VALU_FV_V_F<"vfsgnjx", 0b001010>;
+defm VFSGNJ_V : VSGNJ_FV_V_F<"vfsgnj", 0b001000>;
+defm VFSGNJN_V : VSGNJ_FV_V_F<"vfsgnjn", 0b001001>;
+defm VFSGNJX_V : VSGNJ_FV_V_F<"vfsgnjx", 0b001010>;
def : InstAlias<"vfneg.v $vd, $vs$vm",
(VFSGNJN_VV VR:$vd, VR:$vs, VR:$vs, VMaskOp:$vm)>;
@@ -824,12 +1166,12 @@ def : InstAlias<"vfabs.v $vd, $vs$vm",
// Vector Floating-Point Compare Instructions
let RVVConstraint = NoConstraint in {
-defm VMFEQ_V : VALU_FV_V_F<"vmfeq", 0b011000>;
-defm VMFNE_V : VALU_FV_V_F<"vmfne", 0b011100>;
-defm VMFLT_V : VALU_FV_V_F<"vmflt", 0b011011>;
-defm VMFLE_V : VALU_FV_V_F<"vmfle", 0b011001>;
-defm VMFGT_V : VALU_FV_F<"vmfgt", 0b011101>;
-defm VMFGE_V : VALU_FV_F<"vmfge", 0b011111>;
+defm VMFEQ_V : VCMP_FV_V_F<"vmfeq", 0b011000>;
+defm VMFNE_V : VCMP_FV_V_F<"vmfne", 0b011100>;
+defm VMFLT_V : VCMP_FV_V_F<"vmflt", 0b011011>;
+defm VMFLE_V : VCMP_FV_V_F<"vmfle", 0b011001>;
+defm VMFGT_V : VCMP_FV_F<"vmfgt", 0b011101>;
+defm VMFGE_V : VCMP_FV_F<"vmfge", 0b011111>;
} // RVVConstraint = NoConstraint
def : InstAlias<"vmfgt.vv $vd, $va, $vb$vm",
@@ -838,68 +1180,70 @@ def : InstAlias<"vmfge.vv $vd, $va, $vb$vm",
(VMFLE_VV VR:$vd, VR:$vb, VR:$va, VMaskOp:$vm), 0>;
// Vector Floating-Point Classify Instruction
-defm VFCLASS_V : VALU_FV_VS2<"vfclass.v", 0b010011, 0b10000>;
+defm VFCLASS_V : VCLS_FV_VS2<"vfclass.v", 0b010011, 0b10000>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+
// Vector Floating-Point Merge Instruction
+let vm = 0 in
def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins VR:$vs2, FPR32:$rs1, VMV0:$v0),
- "vfmerge.vfm", "$vd, $vs2, $rs1, v0"> {
- let vm = 0;
-}
+ "vfmerge.vfm", "$vd, $vs2, $rs1, v0">,
+ Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
// Vector Floating-Point Move Instruction
let RVVConstraint = NoConstraint in
+let vm = 1, vs2 = 0 in
def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
- (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1"> {
- let vs2 = 0;
- let vm = 1;
-}
+ (ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">,
+ Sched<[WriteVFMovV, ReadVFMovF]>;
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Single-Width Floating-Point/Integer Type-Convert Instructions
-defm VFCVT_XU_F_V : VALU_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
-defm VFCVT_X_F_V : VALU_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
-defm VFCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
-defm VFCVT_RTZ_X_F_V : VALU_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
-defm VFCVT_F_XU_V : VALU_FV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
-defm VFCVT_F_X_V : VALU_FV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
+defm VFCVT_XU_F_V : VCVTI_FV_VS2<"vfcvt.xu.f.v", 0b010010, 0b00000>;
+defm VFCVT_X_F_V : VCVTI_FV_VS2<"vfcvt.x.f.v", 0b010010, 0b00001>;
+defm VFCVT_RTZ_XU_F_V : VCVTI_FV_VS2<"vfcvt.rtz.xu.f.v", 0b010010, 0b00110>;
+defm VFCVT_RTZ_X_F_V : VCVTI_FV_VS2<"vfcvt.rtz.x.f.v", 0b010010, 0b00111>;
+defm VFCVT_F_XU_V : VCVTF_IV_VS2<"vfcvt.f.xu.v", 0b010010, 0b00010>;
+defm VFCVT_F_X_V : VCVTF_IV_VS2<"vfcvt.f.x.v", 0b010010, 0b00011>;
// Widening Floating-Point/Integer Type-Convert Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt in {
-defm VFWCVT_XU_F_V : VALU_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
-defm VFWCVT_X_F_V : VALU_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
-defm VFWCVT_RTZ_XU_F_V : VALU_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>;
-defm VFWCVT_RTZ_X_F_V : VALU_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
-defm VFWCVT_F_XU_V : VALU_FV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
-defm VFWCVT_F_X_V : VALU_FV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
-defm VFWCVT_F_F_V : VALU_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
+defm VFWCVT_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.xu.f.v", 0b010010, 0b01000>;
+defm VFWCVT_X_F_V : VWCVTI_FV_VS2<"vfwcvt.x.f.v", 0b010010, 0b01001>;
+defm VFWCVT_RTZ_XU_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.xu.f.v", 0b010010, 0b01110>;
+defm VFWCVT_RTZ_X_F_V : VWCVTI_FV_VS2<"vfwcvt.rtz.x.f.v", 0b010010, 0b01111>;
+defm VFWCVT_F_XU_V : VWCVTF_IV_VS2<"vfwcvt.f.xu.v", 0b010010, 0b01010>;
+defm VFWCVT_F_X_V : VWCVTF_IV_VS2<"vfwcvt.f.x.v", 0b010010, 0b01011>;
+defm VFWCVT_F_F_V : VWCVTF_FV_VS2<"vfwcvt.f.f.v", 0b010010, 0b01100>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenCvt
// Narrowing Floating-Point/Integer Type-Convert Instructions
let Constraints = "@earlyclobber $vd" in {
-defm VFNCVT_XU_F_W : VALU_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>;
-defm VFNCVT_X_F_W : VALU_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>;
-defm VFNCVT_RTZ_XU_F_W : VALU_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>;
-defm VFNCVT_RTZ_X_F_W : VALU_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>;
-defm VFNCVT_F_XU_W : VALU_FV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>;
-defm VFNCVT_F_X_W : VALU_FV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
-defm VFNCVT_F_F_W : VALU_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
-defm VFNCVT_ROD_F_F_W : VALU_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
+defm VFNCVT_XU_F_W : VNCVTI_FV_VS2<"vfncvt.xu.f.w", 0b010010, 0b10000>;
+defm VFNCVT_X_F_W : VNCVTI_FV_VS2<"vfncvt.x.f.w", 0b010010, 0b10001>;
+defm VFNCVT_RTZ_XU_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.xu.f.w", 0b010010, 0b10110>;
+defm VFNCVT_RTZ_X_F_W : VNCVTI_FV_VS2<"vfncvt.rtz.x.f.w", 0b010010, 0b10111>;
+defm VFNCVT_F_XU_W : VNCVTF_IV_VS2<"vfncvt.f.xu.w", 0b010010, 0b10010>;
+defm VFNCVT_F_X_W : VNCVTF_IV_VS2<"vfncvt.f.x.w", 0b010010, 0b10011>;
+defm VFNCVT_F_F_W : VNCVTF_FV_VS2<"vfncvt.f.f.w", 0b010010, 0b10100>;
+defm VFNCVT_ROD_F_F_W : VNCVTF_FV_VS2<"vfncvt.rod.f.f.w", 0b010010, 0b10101>;
} // Constraints = "@earlyclobber $vd"
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
+
// Vector Single-Width Integer Reduction Instructions
let RVVConstraint = NoConstraint in {
-defm VREDSUM : VALU_MV_V<"vredsum", 0b000000>;
-defm VREDMAXU : VALU_MV_V<"vredmaxu", 0b000110>;
-defm VREDMAX : VALU_MV_V<"vredmax", 0b000111>;
-defm VREDMINU : VALU_MV_V<"vredminu", 0b000100>;
-defm VREDMIN : VALU_MV_V<"vredmin", 0b000101>;
-defm VREDAND : VALU_MV_V<"vredand", 0b000001>;
-defm VREDOR : VALU_MV_V<"vredor", 0b000010>;
-defm VREDXOR : VALU_MV_V<"vredxor", 0b000011>;
+defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>;
+defm VREDMAXU : VRED_MV_V<"vredmaxu", 0b000110>;
+defm VREDMAX : VRED_MV_V<"vredmax", 0b000111>;
+defm VREDMINU : VRED_MV_V<"vredminu", 0b000100>;
+defm VREDMIN : VRED_MV_V<"vredmin", 0b000101>;
+defm VREDAND : VRED_MV_V<"vredand", 0b000001>;
+defm VREDOR : VRED_MV_V<"vredor", 0b000010>;
+defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>;
} // RVVConstraint = NoConstraint
// Vector Widening Integer Reduction Instructions
@@ -908,42 +1252,49 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
-defm VWREDSUMU : VALU_IV_V<"vwredsumu", 0b110000>;
-defm VWREDSUM : VALU_IV_V<"vwredsum", 0b110001>;
+defm VWREDSUMU : VWRED_IV_V<"vwredsumu", 0b110000>;
+defm VWREDSUM : VWRED_IV_V<"vwredsum", 0b110001>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
// Vector Single-Width Floating-Point Reduction Instructions
let RVVConstraint = NoConstraint in {
-defm VFREDOSUM : VALU_FV_V<"vfredosum", 0b000011>;
-defm VFREDSUM : VALU_FV_V<"vfredsum", 0b000001>;
-defm VFREDMAX : VALU_FV_V<"vfredmax", 0b000111>;
-defm VFREDMIN : VALU_FV_V<"vfredmin", 0b000101>;
+defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>;
+defm VFREDUSUM : VRED_FV_V<"vfredusum", 0b000001>;
+defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>;
+defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>;
} // RVVConstraint = NoConstraint
+def : InstAlias<"vfredsum.vs $vd, $vs2, $vs1$vm",
+ (VFREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>;
+
// Vector Widening Floating-Point Reduction Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint in {
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
// will impose unnecessary restrictions by not allowing the destination to
// overlap with the first (wide) operand.
-defm VFWREDOSUM : VALU_FV_V<"vfwredosum", 0b110011>;
-defm VFWREDSUM : VALU_FV_V<"vfwredsum", 0b110001>;
+defm VFWREDOSUM : VWREDO_FV_V<"vfwredosum", 0b110011>;
+defm VFWREDUSUM : VWRED_FV_V<"vfwredusum", 0b110001>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = NoConstraint
+
+def : InstAlias<"vfwredsum.vs $vd, $vs2, $vs1$vm",
+ (VFWREDUSUM_VS VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), 0>;
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Mask-Register Logical Instructions
let RVVConstraint = NoConstraint in {
-defm VMAND_M : VALU_MV_Mask<"vmand", 0b011001, "m">;
-defm VMNAND_M : VALU_MV_Mask<"vmnand", 0b011101, "m">;
-defm VMANDNOT_M : VALU_MV_Mask<"vmandnot", 0b011000, "m">;
-defm VMXOR_M : VALU_MV_Mask<"vmxor", 0b011011, "m">;
-defm VMOR_M : VALU_MV_Mask<"vmor", 0b011010, "m">;
-defm VMNOR_M : VALU_MV_Mask<"vmnor", 0b011110, "m">;
-defm VMORNOT_M : VALU_MV_Mask<"vmornot", 0b011100, "m">;
-defm VMXNOR_M : VALU_MV_Mask<"vmxnor", 0b011111, "m">;
+defm VMAND_M : VMALU_MV_Mask<"vmand", 0b011001, "m">;
+defm VMNAND_M : VMALU_MV_Mask<"vmnand", 0b011101, "m">;
+defm VMANDN_M : VMALU_MV_Mask<"vmandn", 0b011000, "m">;
+defm VMXOR_M : VMALU_MV_Mask<"vmxor", 0b011011, "m">;
+defm VMOR_M : VMALU_MV_Mask<"vmor", 0b011010, "m">;
+defm VMNOR_M : VMALU_MV_Mask<"vmnor", 0b011110, "m">;
+defm VMORN_M : VMALU_MV_Mask<"vmorn", 0b011100, "m">;
+defm VMXNOR_M : VMALU_MV_Mask<"vmxnor", 0b011111, "m">;
}
def : InstAlias<"vmmv.m $vd, $vs",
@@ -955,207 +1306,175 @@ def : InstAlias<"vmset.m $vd",
def : InstAlias<"vmnot.m $vd, $vs",
(VMNAND_MM VR:$vd, VR:$vs, VR:$vs)>;
+def : InstAlias<"vmandnot.mm $vd, $vs2, $vs1",
+ (VMANDN_MM VR:$vd, VR:$vs2, VR:$vs1), 0>;
+def : InstAlias<"vmornot.mm $vd, $vs2, $vs1",
+ (VMORN_MM VR:$vd, VR:$vs2, VR:$vs1), 0>;
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
-// Vector mask population count vpopc
-def VPOPC_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2, VMaskOp:$vm),
- "vpopc.m", "$vd, $vs2$vm">;
+
+// Vector mask population count vcpop
+def VCPOP_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
+ (ins VR:$vs2, VMaskOp:$vm),
+ "vcpop.m", "$vd, $vs2$vm">,
+ Sched<[WriteVMPopV, ReadVMPopV, ReadVMask]>;
// vfirst find-first-set mask bit
def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2, VMaskOp:$vm),
- "vfirst.m", "$vd, $vs2$vm">;
+ (ins VR:$vs2, VMaskOp:$vm),
+ "vfirst.m", "$vd, $vs2$vm">,
+ Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMask]>;
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+def : InstAlias<"vpopc.m $vd, $vs2$vm",
+ (VCPOP_M GPR:$vd, VR:$vs2, VMaskOp:$vm), 0>;
+
let Constraints = "@earlyclobber $vd", RVVConstraint = Iota in {
+
// vmsbf.m set-before-first mask bit
-defm VMSBF_M : VALU_MV_VS2<"vmsbf.m", 0b010100, 0b00001>;
+defm VMSBF_M : VMSFS_MV_V<"vmsbf.m", 0b010100, 0b00001>;
// vmsif.m set-including-first mask bit
-defm VMSIF_M : VALU_MV_VS2<"vmsif.m", 0b010100, 0b00011>;
+defm VMSIF_M : VMSFS_MV_V<"vmsif.m", 0b010100, 0b00011>;
// vmsof.m set-only-first mask bit
-defm VMSOF_M : VALU_MV_VS2<"vmsof.m", 0b010100, 0b00010>;
+defm VMSOF_M : VMSFS_MV_V<"vmsof.m", 0b010100, 0b00010>;
// Vector Iota Instruction
-defm VIOTA_M : VALU_MV_VS2<"viota.m", 0b010100, 0b10000>;
+defm VIOTA_M : VMIOT_MV_V<"viota.m", 0b010100, 0b10000>;
+
} // Constraints = "@earlyclobber $vd", RVVConstraint = Iota
// Vector Element Index Instruction
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+
+let vs2 = 0 in
def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd),
- (ins VMaskOp:$vm), "vid.v", "$vd$vm"> {
- let vs2 = 0;
-}
+ (ins VMaskOp:$vm), "vid.v", "$vd$vm">,
+ Sched<[WriteVMIdxV, ReadVMask]>;
// Integer Scalar Move Instructions
let vm = 1, RVVConstraint = NoConstraint in {
def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd),
- (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">;
+ (ins VR:$vs2), "vmv.x.s", "$vd, $vs2">,
+ Sched<[WriteVIMovVX, ReadVIMovVX]>;
let Constraints = "$vd = $vd_wb" in
def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb),
- (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">;
-
+ (ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">,
+ Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>;
}
+
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1,
RVVConstraint = NoConstraint in {
// Floating-Point Scalar Move Instructions
def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd),
- (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">;
+ (ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">,
+ Sched<[WriteVFMovVF, ReadVFMovVF]>;
let Constraints = "$vd = $vd_wb" in
def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb),
- (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">;
+ (ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">,
+ Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1
+
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Slide Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VSLIDEUP_V : VALU_IV_X_I<"vslideup", 0b001110, uimm5>;
-defm VSLIDE1UP_V : VALU_MV_X<"vslide1up", 0b001110>;
+defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>;
+defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VSLIDEDOWN_V : VALU_IV_X_I<"vslidedown", 0b001111, uimm5>;
-defm VSLIDE1DOWN_V : VALU_MV_X<"vslide1down", 0b001111>;
+defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>;
+defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtV, HasStdExtF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VFSLIDE1UP_V : VALU_FV_F<"vfslide1up", 0b001110>;
+defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VFSLIDE1DOWN_V : VALU_FV_F<"vfslide1down", 0b001111>;
+defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
} // Predicates = [HasStdExtV, HasStdExtF]
let Predicates = [HasStdExtV] in {
// Vector Register Gather Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
-defm VRGATHER_V : VALU_IV_V_X_I<"vrgather", 0b001100, uimm5>;
-def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">;
+defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>;
+def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">,
+ Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
// Vector Compress Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in {
-defm VCOMPRESS_V : VALU_MV_Mask<"vcompress", 0b010111>;
+defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress
let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
RVVConstraint = NoConstraint in {
-foreach nf = [1, 2, 4, 8] in {
- def VMV#nf#R_V : RVInstV<0b100111, !add(nf, -1), OPIVI, (outs VR:$vd),
- (ins VR:$vs2), "vmv" # nf # "r.v",
- "$vd, $vs2"> {
- let Uses = [];
- let vm = 1;
- }
+foreach n = [1, 2, 4, 8] in {
+ def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd),
+ (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">,
+ VMVRSched<n> {
+ let Uses = [];
+ let vm = 1;
+}
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
} // Predicates = [HasStdExtV]
let Predicates = [HasStdExtZvlsseg] in {
foreach nf=2-8 in {
- def VLSEG#nf#E8_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth8, "vlseg"#nf#"e8.v">;
- def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">;
- def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">;
- def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">;
-
- def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">;
- def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">;
- def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">;
- def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">;
-
- def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">;
- def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">;
- def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">;
- def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
-
- // Vector Strided Instructions
- def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">;
- def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">;
- def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">;
- def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
-
- def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">;
- def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">;
- def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">;
- def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
-
- // Vector Indexed Instructions
- def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth8, "vluxseg"#nf#"ei8.v">;
- def VLUXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth16, "vluxseg"#nf#"ei16.v">;
- def VLUXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth32, "vluxseg"#nf#"ei32.v">;
- def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth64, "vluxseg"#nf#"ei64.v">;
-
- def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth8, "vloxseg"#nf#"ei8.v">;
- def VLOXSEG#nf#EI16_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth16, "vloxseg"#nf#"ei16.v">;
- def VLOXSEG#nf#EI32_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth32, "vloxseg"#nf#"ei32.v">;
- def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth64, "vloxseg"#nf#"ei64.v">;
-
- def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth8, "vsuxseg"#nf#"ei8.v">;
- def VSUXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth16, "vsuxseg"#nf#"ei16.v">;
- def VSUXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth32, "vsuxseg"#nf#"ei32.v">;
- def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth64, "vsuxseg"#nf#"ei64.v">;
-
- def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth8, "vsoxseg"#nf#"ei8.v">;
- def VSOXSEG#nf#EI16_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth16, "vsoxseg"#nf#"ei16.v">;
- def VSOXSEG#nf#EI32_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth32, "vsoxseg"#nf#"ei32.v">;
- def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth64, "vsoxseg"#nf#"ei64.v">;
+ foreach eew = [8, 16, 32, 64] in {
+ defvar w = !cast<RISCVWidth>("LSWidth"#eew);
+
+ def VLSEG#nf#E#eew#_V :
+ VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">;
+ def VLSEG#nf#E#eew#FF_V :
+ VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">;
+ def VSSEG#nf#E#eew#_V :
+ VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">;
+
+ // Vector Strided Instructions
+ def VLSSEG#nf#E#eew#_V :
+ VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">;
+ def VSSSEG#nf#E#eew#_V :
+ VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">;
+
+ // Vector Indexed Instructions
+ def VLUXSEG#nf#EI#eew#_V :
+ VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w,
+ "vluxseg"#nf#"ei"#eew#".v">;
+ def VLOXSEG#nf#EI#eew#_V :
+ VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w,
+ "vloxseg"#nf#"ei"#eew#".v">;
+ def VSUXSEG#nf#EI#eew#_V :
+ VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w,
+ "vsuxseg"#nf#"ei"#eew#".v">;
+ def VSOXSEG#nf#EI#eew#_V :
+ VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w,
+ "vsoxseg"#nf#"ei"#eew#".v">;
+ }
}
} // Predicates = [HasStdExtZvlsseg]
let Predicates = [HasStdExtZvamo, HasStdExtA] in {
- defm VAMOSWAPEI8 : VAMO<AMOOPVamoSwap, LSWidth8, "vamoswapei8.v">;
- defm VAMOSWAPEI16 : VAMO<AMOOPVamoSwap, LSWidth16, "vamoswapei16.v">;
- defm VAMOSWAPEI32 : VAMO<AMOOPVamoSwap, LSWidth32, "vamoswapei32.v">;
-
- defm VAMOADDEI8 : VAMO<AMOOPVamoAdd, LSWidth8, "vamoaddei8.v">;
- defm VAMOADDEI16 : VAMO<AMOOPVamoAdd, LSWidth16, "vamoaddei16.v">;
- defm VAMOADDEI32 : VAMO<AMOOPVamoAdd, LSWidth32, "vamoaddei32.v">;
-
- defm VAMOXOREI8 : VAMO<AMOOPVamoXor, LSWidth8, "vamoxorei8.v">;
- defm VAMOXOREI16 : VAMO<AMOOPVamoXor, LSWidth16, "vamoxorei16.v">;
- defm VAMOXOREI32 : VAMO<AMOOPVamoXor, LSWidth32, "vamoxorei32.v">;
-
- defm VAMOANDEI8 : VAMO<AMOOPVamoAnd, LSWidth8, "vamoandei8.v">;
- defm VAMOANDEI16 : VAMO<AMOOPVamoAnd, LSWidth16, "vamoandei16.v">;
- defm VAMOANDEI32 : VAMO<AMOOPVamoAnd, LSWidth32, "vamoandei32.v">;
-
- defm VAMOOREI8 : VAMO<AMOOPVamoOr, LSWidth8, "vamoorei8.v">;
- defm VAMOOREI16 : VAMO<AMOOPVamoOr, LSWidth16, "vamoorei16.v">;
- defm VAMOOREI32 : VAMO<AMOOPVamoOr, LSWidth32, "vamoorei32.v">;
-
- defm VAMOMINEI8 : VAMO<AMOOPVamoMin, LSWidth8, "vamominei8.v">;
- defm VAMOMINEI16 : VAMO<AMOOPVamoMin, LSWidth16, "vamominei16.v">;
- defm VAMOMINEI32 : VAMO<AMOOPVamoMin, LSWidth32, "vamominei32.v">;
-
- defm VAMOMAXEI8 : VAMO<AMOOPVamoMax, LSWidth8, "vamomaxei8.v">;
- defm VAMOMAXEI16 : VAMO<AMOOPVamoMax, LSWidth16, "vamomaxei16.v">;
- defm VAMOMAXEI32 : VAMO<AMOOPVamoMax, LSWidth32, "vamomaxei32.v">;
-
- defm VAMOMINUEI8 : VAMO<AMOOPVamoMinu, LSWidth8, "vamominuei8.v">;
- defm VAMOMINUEI16 : VAMO<AMOOPVamoMinu, LSWidth16, "vamominuei16.v">;
- defm VAMOMINUEI32 : VAMO<AMOOPVamoMinu, LSWidth32, "vamominuei32.v">;
-
- defm VAMOMAXUEI8 : VAMO<AMOOPVamoMaxu, LSWidth8, "vamomaxuei8.v">;
- defm VAMOMAXUEI16 : VAMO<AMOOPVamoMaxu, LSWidth16, "vamomaxuei16.v">;
- defm VAMOMAXUEI32 : VAMO<AMOOPVamoMaxu, LSWidth32, "vamomaxuei32.v">;
+ foreach eew = [8, 16, 32] in {
+ defvar w = !cast<RISCVWidth>("LSWidth"#eew);
+ defm VAMOSWAPEI#eew : VAMO<AMOOPVamoSwap, w, "vamoswapei"#eew#".v">;
+ defm VAMOADDEI#eew : VAMO<AMOOPVamoAdd, w, "vamoaddei"#eew#".v">;
+ defm VAMOXOREI#eew : VAMO<AMOOPVamoXor, w, "vamoxorei"#eew#".v">;
+ defm VAMOANDEI#eew : VAMO<AMOOPVamoAnd, w, "vamoandei"#eew#".v">;
+ defm VAMOOREI#eew : VAMO<AMOOPVamoOr, w, "vamoorei"#eew#".v">;
+ defm VAMOMINEI#eew : VAMO<AMOOPVamoMin, w, "vamominei"#eew#".v">;
+ defm VAMOMAXEI#eew : VAMO<AMOOPVamoMax, w, "vamomaxei"#eew#".v">;
+ defm VAMOMINUEI#eew : VAMO<AMOOPVamoMinu, w, "vamominuei"#eew#".v">;
+ defm VAMOMAXUEI#eew : VAMO<AMOOPVamoMaxu, w, "vamomaxuei"#eew#".v">;
+ }
} // Predicates = [HasStdExtZvamo, HasStdExtA]
let Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 0284ff6d1c6b..a82e333e6bab 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -23,7 +23,7 @@ def riscv_read_vlenb : SDNode<"RISCVISD::READ_VLENB",
// Operand that is allowed to be a register or a 5 bit immediate.
// This allows us to pick between VSETIVLI and VSETVLI opcodes using the same
// pseudo instructions.
-def AVL : RegisterOperand<GPR> {
+def AVL : RegisterOperand<GPRNoX0> {
let OperandNamespace = "RISCVOp";
let OperandType = "OPERAND_AVL";
}
@@ -40,6 +40,9 @@ def DecImm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+defvar TAIL_UNDISTURBED = 0;
+defvar TAIL_AGNOSTIC = 1;
+
//===----------------------------------------------------------------------===//
// Utilities.
//===----------------------------------------------------------------------===//
@@ -137,7 +140,9 @@ class octuple_to_str<int octuple> {
def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
// Output pattern for X0 used to represent VLMAX in the pseudo instructions.
-def VLMax : OutPatFrag<(ops), (XLenVT X0)>;
+// We can't use X0 register becuase the AVL operands use GPRNoX0.
+// This must be kept in sync with RISCV::VLMaxSentinel.
+def VLMax : OutPatFrag<(ops), (XLenVT -1)>;
// List of EEW.
defvar EEWList = [8, 16, 32, 64];
@@ -577,13 +582,11 @@ class PseudoToVInst<string PseudoInst> {
!subst("_B32", "",
!subst("_B64", "",
!subst("_MASK", "",
- !subst("_COMMUTABLE", "",
- !subst("_TA", "",
!subst("_TIED", "",
!subst("F16", "F",
!subst("F32", "F",
!subst("F64", "F",
- !subst("Pseudo", "", PseudoInst))))))))))))))))))))));
+ !subst("Pseudo", "", PseudoInst))))))))))))))))))));
}
// The destination vector register group for a masked vector instruction cannot
@@ -643,7 +646,7 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -653,6 +656,7 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -674,7 +678,7 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1, GPR:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -684,6 +688,7 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -708,7 +713,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
GPR:$rs1, IdxClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLX</*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
@@ -718,6 +723,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -861,6 +867,22 @@ class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoUnaryMaskTA<VReg RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
// mask unary operation without maskedoff
class VPseudoMaskUnarySOutMask:
Pseudo<(outs GPR:$rd),
@@ -976,6 +998,26 @@ class VPseudoBinaryMask<VReg RetClass,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoBinaryMaskTA<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
// Like VPseudoBinaryMask, but output can be V0.
class VPseudoBinaryMOutMask<VReg RetClass,
RegisterClass Op1Class,
@@ -1005,7 +1047,7 @@ class VPseudoTiedBinaryMask<VReg RetClass,
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge,
Op2Class:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
@@ -1014,6 +1056,7 @@ class VPseudoTiedBinaryMask<VReg RetClass,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 0; // Merge is also rs2.
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1060,6 +1103,27 @@ class VPseudoTernaryNoMask<VReg RetClass,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),
+ []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret;
+ let HasVecPolicyOp = 1;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasMergeOp = 1;
+ let HasDummyMask = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoAMOWDNoMask<VReg RetClass,
VReg Op1Class> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd),
@@ -1139,7 +1203,7 @@ class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -1149,6 +1213,7 @@ class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1170,7 +1235,8 @@ class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
- GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
let mayLoad = 1;
@@ -1180,6 +1246,7 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1205,7 +1272,8 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
bits<4> NF, bit Ordered>:
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, GPR:$rs1,
- IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
+ IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy),[]>,
RISCVVPseudo,
RISCVVLXSEG<NF, /*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
let mayLoad = 1;
@@ -1217,6 +1285,7 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let HasVLOp = 1;
let HasSEWOp = 1;
let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
@@ -1492,8 +1561,8 @@ multiclass VPseudoBinary<VReg RetClass,
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class,
- Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskTA<RetClass, Op1Class, Op2Class,
+ Constraint>;
}
}
@@ -1520,8 +1589,8 @@ multiclass VPseudoBinaryEmul<VReg RetClass,
let VLMul = lmul.value in {
def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
Constraint>;
- def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class,
- Constraint>;
+ def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskTA<RetClass, Op1Class, Op2Class,
+ Constraint>;
}
}
@@ -1713,6 +1782,15 @@ multiclass VPseudoUnaryV_F_NoDummyMask {
}
}
+multiclass VPseudoUnaryTAV_V {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>;
+ }
+ }
+}
+
multiclass VPseudoUnaryV_V {
foreach m = MxList.m in {
let VLMul = m.value in {
@@ -1728,8 +1806,8 @@ multiclass PseudoUnaryV_VF2 {
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f2vrclass,
- constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass,
+ constraints>;
}
}
}
@@ -1740,8 +1818,8 @@ multiclass PseudoUnaryV_VF4 {
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f4vrclass,
- constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass,
+ constraints>;
}
}
}
@@ -1752,8 +1830,8 @@ multiclass PseudoUnaryV_VF8 {
{
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.f8vrclass,
- constraints>;
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass,
+ constraints>;
}
}
}
@@ -1887,16 +1965,23 @@ multiclass VPseudoTernary<VReg RetClass,
}
}
-multiclass VPseudoTernaryV_VV<string Constraint = ""> {
+multiclass VPseudoTernaryWithPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = "",
+ bit Commutable = 0> {
+ let VLMul = MInfo.value in {
+ let isCommutable = Commutable in
+ def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, Constraint>;
+ }
+}
+
+multiclass VPseudoTernaryV_VV_AAXA<string Constraint = ""> {
foreach m = MxList.m in {
- defm _VV : VPseudoTernary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
-
- // Add a commutable version for use by IR mul+add.
- let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in
- def "_VV_" # m.MX # "_COMMUTABLE" : VPseudoTernaryNoMask<m.vrclass,
- m.vrclass,
- m.vrclass,
- Constraint>;
+ defm _VV : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint, /*Commutable*/1>;
}
}
@@ -1906,68 +1991,39 @@ multiclass VPseudoTernaryV_VX<string Constraint = ""> {
}
multiclass VPseudoTernaryV_VX_AAXA<string Constraint = ""> {
- foreach m = MxList.m in {
- defm "_VX" : VPseudoTernary<m.vrclass, GPR, m.vrclass, m, Constraint>;
-
- // Add a commutable version for use by IR mul+add.
- let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in
- def "_VX_" # m.MX # "_COMMUTABLE" :
- VPseudoTernaryNoMask<m.vrclass, GPR, m.vrclass, Constraint>;
- }
+ foreach m = MxList.m in
+ defm "_VX" : VPseudoTernaryWithPolicy<m.vrclass, GPR, m.vrclass, m,
+ Constraint, /*Commutable*/1>;
}
multiclass VPseudoTernaryV_VF_AAXA<string Constraint = ""> {
- foreach m = MxList.m in {
- foreach f = FPList.fpinfo in {
- defm "_V" # f.FX : VPseudoTernary<m.vrclass, f.fprclass, m.vrclass,
- m, Constraint>;
-
- // Add a commutable version for use by IR mul+add.
- let isCommutable = 1, ForceTailAgnostic = true, VLMul = m.value in
- def "_V" # f.FX # "_" # m.MX # "_COMMUTABLE" :
- VPseudoTernaryNoMask<m.vrclass, f.fprclass, m.vrclass, Constraint>;
- }
- }
+ foreach m = MxList.m in
+ foreach f = FPList.fpinfo in
+ defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.vrclass, f.fprclass,
+ m.vrclass, m, Constraint,
+ /*Commutable*/1>;
}
multiclass VPseudoTernaryW_VV {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in {
- defm _VV : VPseudoTernary<m.wvrclass, m.vrclass, m.vrclass, m, constraint>;
-
- // Add a tail agnostic version for us by IR mul+add.
- let ForceTailAgnostic = true, VLMul = m.value in
- def "_VV_" # m.MX # "_TA" : VPseudoTernaryNoMask<m.wvrclass,
- m.vrclass,
- m.vrclass,
- constraint>;
- }
+ foreach m = MxListW.m in
+ defm _VV : VPseudoTernaryWithPolicy<m.wvrclass, m.vrclass, m.vrclass, m,
+ constraint>;
}
multiclass VPseudoTernaryW_VX {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW.m in {
- defm "_VX" : VPseudoTernary<m.wvrclass, GPR, m.vrclass, m, constraint>;
-
- // Add a tail agnostic version for use by IR mul+add.
- let ForceTailAgnostic = true, VLMul = m.value in
- def "_VX_" # m.MX # "_TA" :
- VPseudoTernaryNoMask<m.wvrclass, GPR, m.vrclass, constraint>;
- }
+ foreach m = MxListW.m in
+ defm "_VX" : VPseudoTernaryWithPolicy<m.wvrclass, GPR, m.vrclass, m,
+ constraint>;
}
multiclass VPseudoTernaryW_VF {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW.m in
- foreach f = FPListW.fpinfo in {
- defm "_V" # f.FX : VPseudoTernary<m.wvrclass, f.fprclass, m.vrclass, m,
- constraint>;
-
- // Add a tail agnostic version for use by IR mul+add.
- let ForceTailAgnostic = true, VLMul = m.value in
- def "_V" # f.FX # "_" # m.MX # "_TA" :
- VPseudoTernaryNoMask<m.vrclass, f.fprclass, m.vrclass, constraint>;
- }
+ foreach f = FPListW.fpinfo in
+ defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.wvrclass, f.fprclass,
+ m.vrclass, m, constraint>;
}
multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
@@ -1976,12 +2032,12 @@ multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> {
}
multiclass VPseudoTernaryV_VV_VX_AAXA<string Constraint = ""> {
- defm "" : VPseudoTernaryV_VV<Constraint>;
+ defm "" : VPseudoTernaryV_VV_AAXA<Constraint>;
defm "" : VPseudoTernaryV_VX_AAXA<Constraint>;
}
multiclass VPseudoTernaryV_VV_VF_AAXA<string Constraint = ""> {
- defm "" : VPseudoTernaryV_VV<Constraint>;
+ defm "" : VPseudoTernaryV_VV_AAXA<Constraint>;
defm "" : VPseudoTernaryV_VF_AAXA<Constraint>;
}
@@ -2033,8 +2089,8 @@ multiclass VPseudoConversion<VReg RetClass,
string Constraint = ""> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
- Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA<RetClass, Op1Class,
+ Constraint>;
}
}
@@ -2217,6 +2273,26 @@ class VPatUnaryMask<string intrinsic_name,
(op2_type op2_reg_class:$rs2),
(mask_type V0), GPR:$vl, sew)>;
+class VPatUnaryMaskTA<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ (mask_type V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_reg_class:$rs2),
+ (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
+
class VPatMaskUnaryNoMask<string intrinsic_name,
string inst,
MTypeInfo mti> :
@@ -2318,6 +2394,28 @@ class VPatBinaryMask<string intrinsic_name,
(op2_type op2_kind:$rs2),
(mask_type V0), GPR:$vl, sew)>;
+class VPatBinaryMaskTA<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
+
// Same as above but source operands are swapped.
class VPatBinaryMaskSwapped<string intrinsic_name,
string inst,
@@ -2370,11 +2468,11 @@ class VPatTiedBinaryMask<string intrinsic_name,
(result_type result_reg_class:$merge),
(op2_type op2_kind:$rs2),
(mask_type V0),
- VLOpFrag)),
+ VLOpFrag, (XLenVT timm:$policy))),
(!cast<Instruction>(inst#"_MASK_TIED")
(result_type result_reg_class:$merge),
(op2_type op2_kind:$rs2),
- (mask_type V0), GPR:$vl, sew)>;
+ (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
class VPatTernaryNoMask<string intrinsic,
string inst,
@@ -2382,7 +2480,6 @@ class VPatTernaryNoMask<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType op2_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
VReg result_reg_class,
@@ -2399,6 +2496,28 @@ class VPatTernaryNoMask<string intrinsic,
op2_kind:$rs2,
GPR:$vl, sew)>;
+class VPatTernaryNoMaskWithPolicy<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ GPR:$vl, sew, TAIL_UNDISTURBED)>;
+
class VPatTernaryMask<string intrinsic,
string inst,
string kind,
@@ -2514,9 +2633,9 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
def : VPatUnaryNoMask<intrinsic, instruction, suffix,
vti.Vector, fti.Vector,
vti.Log2SEW, vti.LMul, fti.RegClass>;
- def : VPatUnaryMask<intrinsic, instruction, suffix,
- vti.Vector, fti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
+ def : VPatUnaryMaskTA<intrinsic, instruction, suffix,
+ vti.Vector, fti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
}
}
@@ -2526,9 +2645,9 @@ multiclass VPatUnaryV_V<string intrinsic, string instruction,
def : VPatUnaryNoMask<intrinsic, instruction, "V",
vti.Vector, vti.Vector,
vti.Log2SEW, vti.LMul, vti.RegClass>;
- def : VPatUnaryMask<intrinsic, instruction, "V",
- vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
+ def : VPatUnaryMaskTA<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
}
}
@@ -2574,6 +2693,24 @@ multiclass VPatBinary<string intrinsic,
op2_kind>;
}
+multiclass VPatBinaryTA<string intrinsic,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind>
+{
+ def : VPatBinaryNoMask<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, op1_reg_class, op2_kind>;
+ def : VPatBinaryMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
+ mask_type, sew, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
multiclass VPatBinarySwapped<string intrinsic,
string inst,
ValueType result_type,
@@ -2653,23 +2790,40 @@ multiclass VPatConversion<string intrinsic,
mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
}
+multiclass VPatConversionTA<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class>
+{
+ def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
+ sew, vlmul, op1_reg_class>;
+ def : VPatUnaryMaskTA<intrinsic, inst, kind, result_type, op1_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+}
+
multiclass VPatBinaryV_VV<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
- vti.Vector, vti.Vector, vti.Vector,vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Vector,vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
multiclass VPatBinaryV_VV_INT<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
defvar ivti = GetIntVTypeInfo<vti>.Vti;
- defm : VPatBinary<intrinsic, instruction # "_VV_" # vti.LMul.MX,
- vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
}
@@ -2684,10 +2838,10 @@ multiclass VPatBinaryV_VV_INT_EEW<string intrinsic, string instruction,
defvar emul_str = octuple_to_str<octuple_emul>.ret;
defvar ivti = !cast<VTypeInfo>("VI" # eew # emul_str);
defvar inst = instruction # "_VV_" # vti.LMul.MX # "_" # emul_str;
- defm : VPatBinary<intrinsic, inst,
- vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, ivti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, inst,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, ivti.RegClass>;
}
}
}
@@ -2696,29 +2850,29 @@ multiclass VPatBinaryV_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
defvar kind = "V"#vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
- vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
+ vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
}
}
multiclass VPatBinaryV_VX_INT<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VX_" # vti.LMul.MX,
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, GPR>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VX_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, GPR>;
}
multiclass VPatBinaryV_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist, Operand imm_type> {
foreach vti = vtilist in
- defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX,
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, imm_type>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, imm_type>;
}
multiclass VPatBinaryM_MM<string intrinsic, string instruction> {
@@ -2733,10 +2887,10 @@ multiclass VPatBinaryW_VV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinary<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
- Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Vti.RegClass, Vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.RegClass>;
}
}
@@ -2746,10 +2900,10 @@ multiclass VPatBinaryW_VX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "V"#Vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Vti.RegClass, Vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -2765,10 +2919,10 @@ multiclass VPatBinaryW_WV<string intrinsic, string instruction,
def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- def : VPatBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.RegClass>;
+ def : VPatBinaryMaskTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
}
@@ -2778,10 +2932,10 @@ multiclass VPatBinaryW_WX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "W"#Vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -2790,10 +2944,10 @@ multiclass VPatBinaryV_WV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, Vti.RegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
}
@@ -2803,10 +2957,10 @@ multiclass VPatBinaryV_WX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "W"#Vti.ScalarSuffix;
- defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, Vti.ScalarRegClass>;
+ defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -2815,10 +2969,10 @@ multiclass VPatBinaryV_WI<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinary<intrinsic, instruction # "_WI_" # Vti.LMul.MX,
- Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, uimm5>;
+ defm : VPatBinaryTA<intrinsic, instruction # "_WI_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, uimm5>;
}
}
@@ -2989,20 +3143,40 @@ multiclass VPatTernary<string intrinsic,
RegisterClass op1_reg_class,
DAGOperand op2_kind> {
def : VPatTernaryNoMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class,
- op2_kind>;
+ sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
def : VPatTernaryMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
mask_type, sew, vlmul, result_reg_class, op1_reg_class,
op2_kind>;
}
-multiclass VPatTernaryV_VV<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+multiclass VPatTernaryWithPolicy<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatTernaryNoMaskWithPolicy<intrinsic, inst, kind, result_type, op1_type,
+ op2_type, sew, vlmul, result_reg_class,
+ op1_reg_class, op2_kind>;
+ def : VPatTernaryMask<intrinsic, inst, kind, result_type, op1_type, op2_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
+multiclass VPatTernaryV_VV_AAXA<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatTernary<intrinsic, instruction, "VV",
- vti.Vector, vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
multiclass VPatTernaryV_VX<string intrinsic, string instruction,
@@ -3017,11 +3191,11 @@ multiclass VPatTernaryV_VX<string intrinsic, string instruction,
multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatTernary<intrinsic, instruction,
- "V"#vti.ScalarSuffix,
- vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.ScalarRegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.ScalarRegClass, vti.RegClass>;
}
multiclass VPatTernaryV_VI<string intrinsic, string instruction,
@@ -3038,10 +3212,10 @@ multiclass VPatTernaryW_VV<string intrinsic, string instruction,
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- defm : VPatTernary<intrinsic, instruction, "VV",
- wti.Vector, vti.Vector, vti.Vector,
- vti.Mask, vti.Log2SEW, vti.LMul,
- wti.RegClass, vti.RegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
+ wti.Vector, vti.Vector, vti.Vector,
+ vti.Mask, vti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.RegClass, vti.RegClass>;
}
}
@@ -3050,17 +3224,17 @@ multiclass VPatTernaryW_VX<string intrinsic, string instruction,
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- defm : VPatTernary<intrinsic, instruction,
- "V"#vti.ScalarSuffix,
- wti.Vector, vti.Scalar, vti.Vector,
- vti.Mask, vti.Log2SEW, vti.LMul,
- wti.RegClass, vti.ScalarRegClass, vti.RegClass>;
+ defm : VPatTernaryWithPolicy<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ wti.Vector, vti.Scalar, vti.Vector,
+ vti.Mask, vti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.ScalarRegClass, vti.RegClass>;
}
}
multiclass VPatTernaryV_VV_VX_AAXA<string intrinsic, string instruction,
list<VTypeInfo> vtilist>
- : VPatTernaryV_VV<intrinsic, instruction, vtilist>,
+ : VPatTernaryV_VV_AAXA<intrinsic, instruction, vtilist>,
VPatTernaryV_VX_AAXA<intrinsic, instruction, vtilist>;
multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction,
@@ -3131,8 +3305,8 @@ multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat =
}
}
-multiclass VPatConversionVI_VF<string intrinsic,
- string instruction>
+multiclass VPatClassifyVI_VF<string intrinsic,
+ string instruction>
{
foreach fvti = AllFloatVectors in
{
@@ -3144,6 +3318,19 @@ multiclass VPatConversionVI_VF<string intrinsic,
}
}
+multiclass VPatConversionVI_VF<string intrinsic,
+ string instruction>
+{
+ foreach fvti = AllFloatVectors in
+ {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
+
multiclass VPatConversionVF_VI<string intrinsic,
string instruction>
{
@@ -3151,9 +3338,9 @@ multiclass VPatConversionVF_VI<string intrinsic,
{
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
- defm : VPatConversion<intrinsic, instruction, "V",
- fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
- ivti.LMul, fvti.RegClass, ivti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
+ ivti.LMul, fvti.RegClass, ivti.RegClass>;
}
}
@@ -3163,9 +3350,9 @@ multiclass VPatConversionWI_VF<string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
- defm : VPatConversion<intrinsic, instruction, "V",
- iwti.Vector, fvti.Vector, iwti.Mask, fvti.Log2SEW,
- fvti.LMul, iwti.RegClass, fvti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ iwti.Vector, fvti.Vector, iwti.Mask, fvti.Log2SEW,
+ fvti.LMul, iwti.RegClass, fvti.RegClass>;
}
}
@@ -3175,9 +3362,9 @@ multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "V",
- fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
- vti.LMul, fwti.RegClass, vti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
+ vti.LMul, fwti.RegClass, vti.RegClass>;
}
}
@@ -3187,9 +3374,9 @@ multiclass VPatConversionWF_VF <string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "V",
- fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
- fvti.LMul, fwti.RegClass, fvti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "V",
+ fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
+ fvti.LMul, fwti.RegClass, fvti.RegClass>;
}
}
@@ -3199,9 +3386,9 @@ multiclass VPatConversionVI_WF <string intrinsic, string instruction> {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "W",
- vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, fwti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
}
}
@@ -3211,9 +3398,9 @@ multiclass VPatConversionVF_WI <string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
- defm : VPatConversion<intrinsic, instruction, "W",
- fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, iwti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "W",
+ fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, iwti.RegClass>;
}
}
@@ -3223,9 +3410,9 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
- defm : VPatConversion<intrinsic, instruction, "W",
- fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ defm : VPatConversionTA<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass>;
}
}
@@ -3271,7 +3458,7 @@ multiclass VPatAMOV_WD<string intrinsic,
// Pseudo instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// Pseudo Instructions for CodeGen
@@ -3326,7 +3513,12 @@ foreach lmul = MxList.m in {
// Pseudos.
let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
-def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>;
+// Due to rs1=X0 having special meaning, we need a GPRNoX0 register class for
+// the when we aren't using one of the special X0 encodings. Otherwise it could
+// be accidentally be made X0 by MachineIR optimizations. To satisfy the
+// verifier, we also need a GPRX0 instruction for the special encodings.
+def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPRNoX0:$rs1, VTypeIOp:$vtypei), []>;
+def PseudoVSETVLIX0 : Pseudo<(outs GPR:$rd), (ins GPRX0:$rs1, VTypeIOp:$vtypei), []>;
def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>;
}
@@ -3342,8 +3534,8 @@ def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei),
defm PseudoVL : VPseudoUSLoad</*isFF=*/false>;
defm PseudoVS : VPseudoUSStore;
-defm PseudoVLE1 : VPseudoLoadMask;
-defm PseudoVSE1 : VPseudoStoreMask;
+defm PseudoVLM : VPseudoLoadMask;
+defm PseudoVSM : VPseudoStoreMask;
//===----------------------------------------------------------------------===//
// 7.5 Vector Strided Instructions
@@ -3427,14 +3619,16 @@ foreach vti = AllIntegerVectors in {
(vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1),
(vti.Mask V0),
- VLOpFrag)),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
(!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX#"_MASK")
vti.RegClass:$merge,
vti.RegClass:$rs1,
vti.RegClass:$rs2,
(vti.Mask V0),
GPR:$vl,
- vti.Log2SEW)>;
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
// Match VSUB with a small immediate to vadd.vi by negating the immediate.
def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector vti.RegClass:$rs1),
@@ -3448,14 +3642,16 @@ foreach vti = AllIntegerVectors in {
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(vti.Mask V0),
- VLOpFrag)),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
(!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX#"_MASK")
vti.RegClass:$merge,
vti.RegClass:$rs1,
(NegImm simm5_plus1:$rs2),
(vti.Mask V0),
GPR:$vl,
- vti.Log2SEW)>;
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
}
//===----------------------------------------------------------------------===//
@@ -3623,9 +3819,9 @@ let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
@@ -3676,17 +3872,17 @@ defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VF;
//===----------------------------------------------------------------------===//
// 14.8. Vector Floating-Point Square-Root Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFSQRT : VPseudoUnaryV_V;
+defm PseudoVFSQRT : VPseudoUnaryTAV_V;
//===----------------------------------------------------------------------===//
// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFRSQRT7 : VPseudoUnaryV_V;
+defm PseudoVFRSQRT7 : VPseudoUnaryTAV_V;
//===----------------------------------------------------------------------===//
// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFREC7 : VPseudoUnaryV_V;
+defm PseudoVFREC7 : VPseudoUnaryTAV_V;
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
@@ -3758,9 +3954,9 @@ defm PseudoVFNCVT_F_XU : VPseudoConversionV_W;
defm PseudoVFNCVT_F_X : VPseudoConversionV_W;
defm PseudoVFNCVT_F_F : VPseudoConversionV_W;
defm PseudoVFNCVT_ROD_F_F : VPseudoConversionV_W;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 15.1. Vector Single-Width Integer Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -3776,26 +3972,30 @@ defm PseudoVREDMAX : VPseudoReductionV_VS;
//===----------------------------------------------------------------------===//
// 15.2. Vector Widening Integer Reduction Instructions
//===----------------------------------------------------------------------===//
+let IsRVVWideningReduction = 1 in {
defm PseudoVWREDSUMU : VPseudoReductionV_VS;
defm PseudoVWREDSUM : VPseudoReductionV_VS;
-} // Predicates = [HasStdExtV]
+}
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
defm PseudoVFREDOSUM : VPseudoReductionV_VS;
-defm PseudoVFREDSUM : VPseudoReductionV_VS;
+defm PseudoVFREDUSUM : VPseudoReductionV_VS;
defm PseudoVFREDMIN : VPseudoReductionV_VS;
defm PseudoVFREDMAX : VPseudoReductionV_VS;
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-defm PseudoVFWREDSUM : VPseudoReductionV_VS;
+let IsRVVWideningReduction = 1 in {
+defm PseudoVFWREDUSUM : VPseudoReductionV_VS;
defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
+}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 16. Vector Mask Instructions
@@ -3807,11 +4007,11 @@ defm PseudoVFWREDOSUM : VPseudoReductionV_VS;
defm PseudoVMAND: VPseudoBinaryM_MM;
defm PseudoVMNAND: VPseudoBinaryM_MM;
-defm PseudoVMANDNOT: VPseudoBinaryM_MM;
+defm PseudoVMANDN: VPseudoBinaryM_MM;
defm PseudoVMXOR: VPseudoBinaryM_MM;
defm PseudoVMOR: VPseudoBinaryM_MM;
defm PseudoVMNOR: VPseudoBinaryM_MM;
-defm PseudoVMORNOT: VPseudoBinaryM_MM;
+defm PseudoVMORN: VPseudoBinaryM_MM;
defm PseudoVMXNOR: VPseudoBinaryM_MM;
// Pseudo instructions
@@ -3819,10 +4019,10 @@ defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">;
defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">;
//===----------------------------------------------------------------------===//
-// 16.2. Vector mask population count vpopc
+// 16.2. Vector mask population count vcpop
//===----------------------------------------------------------------------===//
-defm PseudoVPOPC: VPseudoUnaryS_M;
+defm PseudoVCPOP: VPseudoUnaryS_M;
//===----------------------------------------------------------------------===//
// 16.3. vfirst find-first-set mask bit
@@ -3863,7 +4063,7 @@ defm PseudoVID : VPseudoMaskNullaryV;
// 17.1. Integer Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList.m in {
let VLMul = m.value in {
@@ -3880,13 +4080,13 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
}
}
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 17.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList.m in {
foreach f = FPList.fpinfo in {
@@ -3908,22 +4108,22 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
}
}
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.3. Vector Slide Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI<uimm5, "@earlyclobber $rd">;
defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI<uimm5>;
defm PseudoVSLIDE1UP : VPseudoBinaryV_VX<"@earlyclobber $rd">;
defm PseudoVSLIDE1DOWN : VPseudoBinaryV_VX;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm PseudoVFSLIDE1UP : VPseudoBinaryV_VF<"@earlyclobber $rd">;
defm PseudoVFSLIDE1DOWN : VPseudoBinaryV_VF;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.4. Vector Register Gather Instructions
@@ -3955,15 +4155,15 @@ let Predicates = [HasStdExtZvamo] in {
defm : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>;
} // Predicates = [HasStdExtZvamo]
-let Predicates = [HasStdExtZvamo, HasStdExtF] in {
+let Predicates = [HasStdExtZvamo, HasVInstructionsAnyF] in {
defm : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>;
-} // Predicates = [HasStdExtZvamo, HasStdExtF]
+} // Predicates = [HasStdExtZvamo, HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 12. Vector Integer Arithmetic Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 12.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
@@ -4279,9 +4479,9 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vssra", "PseudoVSSRA", AllIntegerVectors,
defm : VPatBinaryV_WV_WX_WI<"int_riscv_vnclipu", "PseudoVNCLIPU", AllWidenableIntVectors>;
defm : VPatBinaryV_WV_WX_WI<"int_riscv_vnclip", "PseudoVNCLIP", AllWidenableIntVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
@@ -4372,12 +4572,16 @@ defm : VPatBinarySwappedM_VV<"int_riscv_vmfge", "PseudoVMFLE", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 14.14. Vector Floating-Point Classify Instruction
//===----------------------------------------------------------------------===//
-defm : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
+defm : VPatClassifyVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
//===----------------------------------------------------------------------===//
// 14.15. Vector Floating-Point Merge Instruction
//===----------------------------------------------------------------------===//
// We can use vmerge.vvm to support vector-vector vfmerge.
+// NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses
+// int_riscv_vmerge. Support both for compatibility.
+defm : VPatBinaryV_VM<"int_riscv_vmerge", "PseudoVMERGE",
+ /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
defm : VPatBinaryV_VM<"int_riscv_vfmerge", "PseudoVMERGE",
/*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
defm : VPatBinaryV_XM<"int_riscv_vfmerge", "PseudoVFMERGE",
@@ -4423,9 +4627,9 @@ defm : VPatConversionVF_WI <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
defm : VPatConversionVF_WI <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 15.1. Vector Single-Width Integer Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -4443,40 +4647,40 @@ defm : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">;
//===----------------------------------------------------------------------===//
defm : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">;
defm : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
defm : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat=*/1>;
-defm : VPatReductionV_VS<"int_riscv_vfredsum", "PseudoVFREDSUM", /*IsFloat=*/1>;
+defm : VPatReductionV_VS<"int_riscv_vfredusum", "PseudoVFREDUSUM", /*IsFloat=*/1>;
defm : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>;
defm : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>;
//===----------------------------------------------------------------------===//
// 15.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-defm : VPatReductionW_VS<"int_riscv_vfwredsum", "PseudoVFWREDSUM", /*IsFloat=*/1>;
+defm : VPatReductionW_VS<"int_riscv_vfwredusum", "PseudoVFWREDUSUM", /*IsFloat=*/1>;
defm : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 16. Vector Mask Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 16.1 Vector Mask-Register Logical Instructions
//===----------------------------------------------------------------------===//
defm : VPatBinaryM_MM<"int_riscv_vmand", "PseudoVMAND">;
defm : VPatBinaryM_MM<"int_riscv_vmnand", "PseudoVMNAND">;
-defm : VPatBinaryM_MM<"int_riscv_vmandnot", "PseudoVMANDNOT">;
+defm : VPatBinaryM_MM<"int_riscv_vmandn", "PseudoVMANDN">;
defm : VPatBinaryM_MM<"int_riscv_vmxor", "PseudoVMXOR">;
defm : VPatBinaryM_MM<"int_riscv_vmor", "PseudoVMOR">;
defm : VPatBinaryM_MM<"int_riscv_vmnor", "PseudoVMNOR">;
-defm : VPatBinaryM_MM<"int_riscv_vmornot", "PseudoVMORNOT">;
+defm : VPatBinaryM_MM<"int_riscv_vmorn", "PseudoVMORN">;
defm : VPatBinaryM_MM<"int_riscv_vmxnor", "PseudoVMXNOR">;
// pseudo instructions
@@ -4484,9 +4688,9 @@ defm : VPatNullaryM<"int_riscv_vmclr", "PseudoVMCLR">;
defm : VPatNullaryM<"int_riscv_vmset", "PseudoVMSET">;
//===----------------------------------------------------------------------===//
-// 16.2. Vector mask population count vpopc
+// 16.2. Vector count population in mask vcpop.m
//===----------------------------------------------------------------------===//
-defm : VPatUnaryS_M<"int_riscv_vpopc", "PseudoVPOPC">;
+defm : VPatUnaryS_M<"int_riscv_vcpop", "PseudoVCPOP">;
//===----------------------------------------------------------------------===//
// 16.3. vfirst find-first-set mask bit
@@ -4518,7 +4722,7 @@ defm : VPatUnaryV_M<"int_riscv_viota", "PseudoVIOTA">;
//===----------------------------------------------------------------------===//
defm : VPatNullaryV<"int_riscv_vid", "PseudoVID">;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 17. Vector Permutation Instructions
@@ -4528,19 +4732,19 @@ defm : VPatNullaryV<"int_riscv_vid", "PseudoVID">;
// 17.1. Integer Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach vti = AllIntegerVectors in {
def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.Log2SEW)>;
// vmv.s.x is handled with a custom node in RISCVInstrInfoVVLPatterns.td
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 17.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
foreach fvti = AllFloatVectors in {
defvar instr = !cast<Instruction>("PseudoVFMV_"#fvti.ScalarSuffix#"_S_" #
fvti.LMul.MX);
@@ -4555,52 +4759,52 @@ foreach fvti = AllFloatVectors in {
(fvti.Scalar fvti.ScalarRegClass:$rs2),
GPR:$vl, fvti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.3. Vector Slide Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllIntegerVectors, uimm5>;
defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllIntegerVectors, uimm5>;
defm : VPatBinaryV_VX<"int_riscv_vslide1up", "PseudoVSLIDE1UP", AllIntegerVectors>;
defm : VPatBinaryV_VX<"int_riscv_vslide1down", "PseudoVSLIDE1DOWN", AllIntegerVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllFloatVectors, uimm5>;
defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllFloatVectors, uimm5>;
defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP", AllFloatVectors>;
defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN", AllFloatVectors>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.4. Vector Register Gather Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
AllIntegerVectors, uimm5>;
defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
/* eew */ 16, AllIntegerVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
AllFloatVectors, uimm5>;
defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
/* eew */ 16, AllFloatVectors>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 17.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
// Include the non-intrinsic ISel patterns
include "RISCVInstrInfoVSDPatterns.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 483fc8bfecda..711ad4335ece 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -89,8 +89,8 @@ multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m>
{
- defvar load_instr = !cast<Instruction>("PseudoVLE1_V_"#m.BX);
- defvar store_instr = !cast<Instruction>("PseudoVSE1_V_"#m.BX);
+ defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#m.BX);
+ defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#m.BX);
// Load
def : Pat<(m.Mask (load BaseAddr:$rs1)),
(load_instr BaseAddr:$rs1, m.AVL, m.Log2SEW)>;
@@ -103,11 +103,9 @@ class VPatBinarySDNode_VV<SDNode vop,
string instruction_name,
ValueType result_type,
ValueType op_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
OutPatFrag avl,
- VReg RetClass,
VReg op_reg_class> :
Pat<(result_type (vop
(op_type op_reg_class:$rs1),
@@ -122,11 +120,9 @@ class VPatBinarySDNode_XI<SDNode vop,
string suffix,
ValueType result_type,
ValueType vop_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
OutPatFrag avl,
- VReg RetClass,
VReg vop_reg_class,
ComplexPattern SplatPatKind,
DAGOperand xop_kind> :
@@ -141,11 +137,11 @@ class VPatBinarySDNode_XI<SDNode vop,
multiclass VPatBinarySDNode_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
def : VPatBinarySDNode_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass>;
def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass,
SplatPat, GPR>;
}
}
@@ -155,8 +151,8 @@ multiclass VPatBinarySDNode_VV_VX_VI<SDNode vop, string instruction_name,
: VPatBinarySDNode_VV_VX<vop, instruction_name> {
foreach vti = AllIntegerVectors in {
def : VPatBinarySDNode_XI<vop, instruction_name, "VI",
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass,
!cast<ComplexPattern>(SplatPat#_#ImmType),
ImmType>;
}
@@ -167,11 +163,9 @@ class VPatBinarySDNode_VF<SDNode vop,
ValueType result_type,
ValueType vop_type,
ValueType xop_type,
- ValueType mask_type,
int sew,
LMULInfo vlmul,
OutPatFrag avl,
- VReg RetClass,
VReg vop_reg_class,
DAGOperand xop_kind> :
Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
@@ -184,11 +178,11 @@ class VPatBinarySDNode_VF<SDNode vop,
multiclass VPatBinaryFPSDNode_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
def : VPatBinarySDNode_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass, vti.RegClass>;
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass>;
def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
- vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass, vti.RegClass,
+ vti.Vector, vti.Vector, vti.Scalar,
+ vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
vti.ScalarRegClass>;
}
}
@@ -373,7 +367,7 @@ multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
// Patterns.
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
@@ -491,17 +485,17 @@ defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
foreach vti = AllIntegerVectors in {
// NOTE: We choose VMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = vti.LMul.MX;
def : Pat<(vti.Vector (add vti.RegClass:$rs2,
(mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (sub vti.RegClass:$rs2,
(mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVNMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
// commutable.
@@ -510,32 +504,32 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVMADD_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (sub vti.RegClass:$rs2,
(mul_oneuse (SplatPat XLenVT:$rs1),
vti.RegClass:$rd))),
(!cast<Instruction>("PseudoVNMSUB_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW)>;
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.15. Vector Integer Merge Instructions
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1,
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1,
vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, VMV0:$vm,
+ vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1),
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1),
vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, VMV0:$vm, vti.AVL, vti.Log2SEW)>;
+ vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1),
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1),
vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
- vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, vti.AVL, vti.Log2SEW)>;
+ vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
}
// 12.1. Vector Single-Width Saturating Add and Subtract
@@ -567,10 +561,10 @@ foreach mti = AllMasks in {
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
def : Pat<(mti.Mask (and VR:$rs1, (rvv_vnot VR:$rs2))),
- (!cast<Instruction>("PseudoVMANDNOT_MM_"#mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMANDN_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
def : Pat<(mti.Mask (or VR:$rs1, (rvv_vnot VR:$rs2))),
- (!cast<Instruction>("PseudoVMORNOT_MM_"#mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMORN_MM_"#mti.LMul.MX)
VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
// Handle rvv_vnot the same as the vmnot.m pseudoinstruction.
@@ -579,9 +573,9 @@ foreach mti = AllMasks in {
VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
defm : VPatBinaryFPSDNode_VV_VF<fadd, "PseudoVFADD">;
@@ -597,27 +591,27 @@ defm : VPatBinaryFPSDNode_R_VF<fdiv, "PseudoVFRDIV">;
foreach fvti = AllFloatVectors in {
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = fvti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = fvti.LMul.MX;
def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
(fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
(fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
// commutable.
@@ -625,35 +619,35 @@ foreach fvti = AllFloatVectors in {
fvti.RegClass:$rd, fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
(fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (splat_vector fvti.ScalarRegClass:$rs1),
(fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
// The splat might be negated.
def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)),
fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(fvti.Vector (fma (fneg (splat_vector fvti.ScalarRegClass:$rs1)),
fvti.RegClass:$rd, fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
foreach vti = AllFloatVectors in {
@@ -711,25 +705,25 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
// 12.15. Vector Integer Merge Instructions
// 14.15. Vector Floating-Point Merge Instruction
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1,
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, VMV0:$vm,
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(splat_vector fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
- VMV0:$vm, fvti.AVL, fvti.Log2SEW)>;
+ (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
(splat_vector (fvti.Scalar fpimm0)),
fvti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, 0, VMV0:$vm, fvti.AVL, fvti.Log2SEW)>;
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
}
// 14.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
@@ -763,13 +757,13 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
fwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Vector Splats
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (SplatPat GPR:$rs1)),
(!cast<Instruction>("PseudoVMV_V_X_" # vti.LMul.MX)
@@ -778,9 +772,9 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVMV_V_I_" # vti.LMul.MX)
simm5:$rs1, vti.AVL, vti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
foreach fvti = AllFloatVectors in {
def : Pat<(fvti.Vector (splat_vector fvti.ScalarRegClass:$rs1)),
(!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
@@ -791,12 +785,12 @@ foreach fvti = AllFloatVectors in {
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
0, fvti.AVL, fvti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Vector Element Extracts
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV, HasStdExtF] in
+let Predicates = [HasVInstructionsAnyF] in
foreach vti = AllFloatVectors in {
defvar vmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
vti.ScalarSuffix,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index c9c42152c47b..73b97e1c3675 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -199,7 +199,7 @@ def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>;
def riscv_vmnot_vl : PatFrag<(ops node:$rs, node:$vl),
(riscv_vmxor_vl node:$rs, true_mask, node:$vl)>;
-def riscv_vpopc_vl : SDNode<"RISCVISD::VPOPC_VL",
+def riscv_vcpop_vl : SDNode<"RISCVISD::VCPOP_VL",
SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>,
SDTCisVec<1>, SDTCisInt<1>,
SDTCVecEltisVT<2, i1>,
@@ -230,9 +230,9 @@ def SDT_RISCVVWMUL_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
-def SDTRVVVecReduce : SDTypeProfile<1, 4, [
- SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>,
- SDTCisSameNumEltsAs<1, 3>, SDTCisVT<4, XLenVT>
+def SDTRVVVecReduce : SDTypeProfile<1, 5, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<2, 4>, SDTCisVT<5, XLenVT>
]>;
def riscv_mul_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D),
@@ -273,7 +273,6 @@ multiclass VPatBinaryVL_VV<SDNode vop,
ValueType mask_type,
int sew,
LMULInfo vlmul,
- VReg RetClass,
VReg op_reg_class> {
def : Pat<(result_type (vop
(op_type op_reg_class:$rs1),
@@ -287,13 +286,13 @@ multiclass VPatBinaryVL_VV<SDNode vop,
def : Pat<(result_type (vop
(op_type op_reg_class:$rs1),
(op_type op_reg_class:$rs2),
- (mask_type VMV0:$vm),
+ (mask_type V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX#"_MASK")
(result_type (IMPLICIT_DEF)),
op_reg_class:$rs1,
op_reg_class:$rs2,
- VMV0:$vm, GPR:$vl, sew)>;
+ (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
}
multiclass VPatBinaryVL_XI<SDNode vop,
@@ -304,7 +303,6 @@ multiclass VPatBinaryVL_XI<SDNode vop,
ValueType mask_type,
int sew,
LMULInfo vlmul,
- VReg RetClass,
VReg vop_reg_class,
ComplexPattern SplatPatKind,
DAGOperand xop_kind> {
@@ -320,24 +318,23 @@ multiclass VPatBinaryVL_XI<SDNode vop,
def : Pat<(result_type (vop
(vop_type vop_reg_class:$rs1),
(vop_type (SplatPatKind (XLenVT xop_kind:$rs2))),
- (mask_type VMV0:$vm),
+ (mask_type V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX#"_MASK")
(result_type (IMPLICIT_DEF)),
vop_reg_class:$rs1,
xop_kind:$rs2,
- VMV0:$vm, GPR:$vl, sew)>;
+ (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
}
multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
foreach vti = AllIntegerVectors in {
defm : VPatBinaryVL_VV<vop, instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- SplatPat, GPR>;
+ vti.LMul, vti.RegClass, SplatPat, GPR>;
}
}
@@ -347,7 +344,7 @@ multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
foreach vti = AllIntegerVectors in {
defm : VPatBinaryVL_XI<vop, instruction_name, "VI",
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
+ vti.LMul, vti.RegClass,
!cast<ComplexPattern>(SplatPat#_#ImmType),
ImmType>;
}
@@ -359,11 +356,10 @@ multiclass VPatBinaryWVL_VV_VX<SDNode vop, string instruction_name> {
defvar wti = VtiToWti.Wti;
defm : VPatBinaryVL_VV<vop, instruction_name,
wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, wti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass>;
defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
wti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, wti.RegClass, vti.RegClass,
- SplatPat, GPR>;
+ vti.LMul, vti.RegClass, SplatPat, GPR>;
}
}
@@ -374,7 +370,6 @@ class VPatBinaryVL_VF<SDNode vop,
ValueType mask_type,
int sew,
LMULInfo vlmul,
- VReg RetClass,
VReg vop_reg_class,
RegisterClass scalar_reg_class> :
Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
@@ -390,11 +385,10 @@ multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
foreach vti = AllFloatVectors in {
defm : VPatBinaryVL_VV<vop, instruction_name,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass>;
+ vti.LMul, vti.RegClass>;
def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- vti.ScalarRegClass>;
+ vti.LMul, vti.RegClass, vti.ScalarRegClass>;
}
}
@@ -589,14 +583,22 @@ multiclass VPatNConvertI2FPSDNode_V_VL<SDNode vop, string instruction_name> {
multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in {
defvar vti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # vti.SEW # "M1");
- def: Pat<(vti_m1.Vector (vop (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2,
(vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
- (vti_m1.Vector (IMPLICIT_DEF)),
+ (vti_m1.Vector VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti_m1.Vector VR:$rs2),
GPR:$vl, vti.Log2SEW)>;
+
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_MASK")
+ (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti_m1.Vector VR:$rs2),
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
}
@@ -604,7 +606,7 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
// Patterns.
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// 7.4. Vector Unit-Stride Instructions
foreach vti = AllVectors in {
@@ -620,8 +622,8 @@ foreach vti = AllVectors in {
}
foreach mti = AllMasks in {
- defvar load_instr = !cast<Instruction>("PseudoVLE1_V_"#mti.BX);
- defvar store_instr = !cast<Instruction>("PseudoVSE1_V_"#mti.BX);
+ defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#mti.BX);
+ defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#mti.BX);
def : Pat<(mti.Mask (riscv_vle_vl BaseAddr:$rs1, VLOpFrag)),
(load_instr BaseAddr:$rs1, GPR:$vl, mti.Log2SEW)>;
def : Pat<(riscv_vse_vl (mti.Mask VR:$rs2), BaseAddr:$rs1,
@@ -641,22 +643,22 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask V0),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
(vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>;
def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
- (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm),
+ (vti.Vector vti.RegClass:$rs1), (vti.Mask V0),
VLOpFrag),
(!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, simm5:$rs2,
- VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.3. Vector Integer Extension
@@ -794,7 +796,7 @@ defm : VPatBinaryWVL_VV_VX<riscv_vwmulu_vl, "PseudoVWMULU">;
foreach vti = AllIntegerVectors in {
// NOTE: We choose VMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = vti.LMul.MX;
def : Pat<(vti.Vector
(riscv_add_vl vti.RegClass:$rs2,
(riscv_mul_vl_oneuse vti.RegClass:$rs1,
@@ -803,7 +805,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector
(riscv_sub_vl vti.RegClass:$rs2,
(riscv_mul_vl_oneuse vti.RegClass:$rs1,
@@ -812,7 +814,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVNMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
// commutable.
@@ -824,7 +826,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVMADD_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector
(riscv_sub_vl vti.RegClass:$rs2,
(riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1),
@@ -834,7 +836,7 @@ foreach vti = AllIntegerVectors in {
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>("PseudoVNMSUB_VX_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.14. Vector Widening Integer Multiply-Add Instructions
@@ -847,18 +849,18 @@ foreach vtiTowti = AllWidenableIntVectors in {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACC_VV_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACC_VV_" # vti.LMul.MX)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(wti.Vector
(riscv_add_vl wti.RegClass:$rd,
(riscv_vwmulu_vl_oneuse vti.RegClass:$rs1,
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACCU_VV_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACCU_VV_" # vti.LMul.MX)
wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(wti.Vector
(riscv_add_vl wti.RegClass:$rd,
@@ -866,43 +868,43 @@ foreach vtiTowti = AllWidenableIntVectors in {
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACC_VX_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACC_VX_" # vti.LMul.MX)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(wti.Vector
(riscv_add_vl wti.RegClass:$rd,
(riscv_vwmulu_vl_oneuse (SplatPat XLenVT:$rs1),
(vti.Vector vti.RegClass:$rs2),
(vti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACCU_VX_" # vti.LMul.MX # "_TA")
+ (!cast<Instruction>("PseudoVWMACCU_VX_" # vti.LMul.MX)
wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 12.15. Vector Integer Merge Instructions
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
vti.RegClass:$rs1,
vti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, VMV0:$vm,
+ vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(SplatPat XLenVT:$rs1),
vti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(SplatPat_simm5 simm5:$rs1),
vti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
- vti.RegClass:$rs2, simm5:$rs1, VMV0:$vm, GPR:$vl, vti.Log2SEW)>;
+ vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
}
// 12.16. Vector Integer Move Instructions
@@ -923,10 +925,10 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
// 15.1. Vector Single-Width Integer Reduction Instructions
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
defm : VPatReductionVL<rvv_vecreduce_ADD_vl, "PseudoVREDSUM", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_UMAX_vl, "PseudoVREDMAXU", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_SMAX_vl, "PseudoVREDMAX", /*is_float*/0>;
@@ -935,17 +937,17 @@ defm : VPatReductionVL<rvv_vecreduce_SMIN_vl, "PseudoVREDMIN", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_AND_vl, "PseudoVREDAND", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_OR_vl, "PseudoVREDOR", /*is_float*/0>;
defm : VPatReductionVL<rvv_vecreduce_XOR_vl, "PseudoVREDXOR", /*is_float*/0>;
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
// 15.3. Vector Single-Width Floating-Point Reduction Instructions
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
defm : VPatReductionVL<rvv_vecreduce_SEQ_FADD_vl, "PseudoVFREDOSUM", /*is_float*/1>;
-defm : VPatReductionVL<rvv_vecreduce_FADD_vl, "PseudoVFREDSUM", /*is_float*/1>;
+defm : VPatReductionVL<rvv_vecreduce_FADD_vl, "PseudoVFREDUSUM", /*is_float*/1>;
defm : VPatReductionVL<rvv_vecreduce_FMIN_vl, "PseudoVFREDMIN", /*is_float*/1>;
defm : VPatReductionVL<rvv_vecreduce_FMAX_vl, "PseudoVFREDMAX", /*is_float*/1>;
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions
defm : VPatBinaryFPVL_VV_VF<riscv_fadd_vl, "PseudoVFADD">;
@@ -961,13 +963,13 @@ defm : VPatBinaryFPVL_R_VF<riscv_fdiv_vl, "PseudoVFRDIV">;
foreach vti = AllFloatVectors in {
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
- defvar suffix = vti.LMul.MX # "_COMMUTABLE";
+ defvar suffix = vti.LMul.MX;
def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
vti.RegClass:$rs2, (vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl vti.RegClass:$rs1, vti.RegClass:$rd,
(riscv_fneg_vl vti.RegClass:$rs2,
(vti.Mask true_mask),
@@ -976,7 +978,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
(vti.Mask true_mask),
VLOpFrag),
@@ -988,7 +990,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl vti.RegClass:$rs1,
(vti.Mask true_mask),
VLOpFrag),
@@ -997,7 +999,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
// commutable.
@@ -1007,7 +1009,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
vti.RegClass:$rd,
(riscv_fneg_vl vti.RegClass:$rs2,
@@ -1017,7 +1019,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFMSUB_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
(riscv_fneg_vl vti.RegClass:$rd,
(vti.Mask true_mask),
@@ -1029,7 +1031,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1),
(riscv_fneg_vl vti.RegClass:$rd,
(vti.Mask true_mask),
@@ -1039,7 +1041,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// The splat might be negated.
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
@@ -1053,7 +1055,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMADD_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
def : Pat<(vti.Vector (riscv_fma_vl (riscv_fneg_vl (SplatFPOp vti.ScalarRegClass:$rs1),
(vti.Mask true_mask),
VLOpFrag),
@@ -1062,7 +1064,7 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVFNMSUB_V" # vti.ScalarSuffix # "_" # suffix)
vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW)>;
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 14.11. Vector Floating-Point MIN/MAX Instructions
@@ -1126,29 +1128,29 @@ foreach fvti = AllFloatVectors in {
// Floating-point vselects:
// 12.15. Vector Integer Merge Instructions
// 14.15. Vector Floating-Point Merge Instruction
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
fvti.RegClass:$rs1,
fvti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, VMV0:$vm,
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
(SplatFPOp fvti.ScalarRegClass:$rs1),
fvti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
fvti.RegClass:$rs2,
(fvti.Scalar fvti.ScalarRegClass:$rs1),
- VMV0:$vm, GPR:$vl, fvti.Log2SEW)>;
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask VMV0:$vm),
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
(SplatFPOp (fvti.Scalar fpimm0)),
fvti.RegClass:$rs2,
VLOpFrag)),
(!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, 0, VMV0:$vm, GPR:$vl, fvti.Log2SEW)>;
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
// 14.16. Vector Floating-Point Move Instruction
// If we're splatting fpimm0, use vmv.v.x vd, x0.
@@ -1207,9 +1209,9 @@ foreach fvti = AllFloatVectors in {
}
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach mti = AllMasks in {
// 16.1 Vector Mask-Register Logical Instructions
@@ -1231,12 +1233,12 @@ foreach mti = AllMasks in {
def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1,
(riscv_vmnot_vl VR:$rs2, VLOpFrag),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMANDNOT_MM_" # mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMANDN_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1,
(riscv_vmnot_vl VR:$rs2, VLOpFrag),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMORNOT_MM_" # mti.LMul.MX)
+ (!cast<Instruction>("PseudoVMORN_MM_" # mti.LMul.MX)
VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
// XOR is associative so we need 2 patterns for VMXNOR.
def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmnot_vl VR:$rs1,
@@ -1266,16 +1268,20 @@ foreach mti = AllMasks in {
(!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
VR:$rs, VR:$rs, GPR:$vl, mti.Log2SEW)>;
- // 16.2 Vector Mask Population Count vpopc
- def : Pat<(XLenVT (riscv_vpopc_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
+ // 16.2 Vector count population in mask vcpop.m
+ def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
VLOpFrag)),
- (!cast<Instruction>("PseudoVPOPC_M_" # mti.BX)
+ (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX)
VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX # "_MASK")
+ VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
// 17.1. Integer Scalar Move Instructions
// 17.4. Vector Register Gather Instruction
foreach vti = AllIntegerVectors in {
@@ -1302,7 +1308,7 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgather_vv_vl
vti.RegClass:$rs2,
vti.RegClass:$rs1,
@@ -1312,7 +1318,19 @@ foreach vti = AllIntegerVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
+ (riscv_vrgather_vx_vl
+ vti.RegClass:$rs2,
+ uimm5:$imm,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ vti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
// emul = lmul * 16 / sew
defvar vlmul = vti.LMul;
@@ -1329,7 +1347,7 @@ foreach vti = AllIntegerVectors in {
(!cast<Instruction>(inst)
vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgatherei16_vv_vl
vti.RegClass:$rs2,
(ivti.Vector ivti.RegClass:$rs1),
@@ -1339,13 +1357,13 @@ foreach vti = AllIntegerVectors in {
VLOpFrag)),
(!cast<Instruction>(inst#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
-let Predicates = [HasStdExtV, HasStdExtF] in {
+let Predicates = [HasVInstructionsAnyF] in {
// 17.2. Floating-Point Scalar Move Instructions
foreach vti = AllFloatVectors in {
@@ -1373,7 +1391,7 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX)
vti.RegClass:$rs2, uimm5:$imm, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgather_vv_vl
vti.RegClass:$rs2,
(ivti.Vector vti.RegClass:$rs1),
@@ -1383,7 +1401,19 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
+ (riscv_vrgather_vx_vl
+ vti.RegClass:$rs2,
+ uimm5:$imm,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ vti.RegClass:$merge,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
defvar vlmul = vti.LMul;
defvar octuple_lmul = vlmul.octuple;
@@ -1399,7 +1429,7 @@ foreach vti = AllFloatVectors in {
(!cast<Instruction>(inst)
vti.RegClass:$rs2, ivti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask VMV0:$vm),
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
(riscv_vrgatherei16_vv_vl
vti.RegClass:$rs2,
(ivti.Vector ivti.RegClass:$rs1),
@@ -1409,11 +1439,11 @@ foreach vti = AllFloatVectors in {
VLOpFrag)),
(!cast<Instruction>(inst#"_MASK")
vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1,
- vti.Mask:$vm, GPR:$vl, vti.Log2SEW)>;
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
-} // Predicates = [HasStdExtV, HasStdExtF]
+} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Miscellaneous RISCVISD SDNodes
@@ -1437,7 +1467,7 @@ def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;
def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>;
-let Predicates = [HasStdExtV] in {
+let Predicates = [HasVInstructions] in {
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask true_mask),
@@ -1490,4 +1520,4 @@ foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
GPR:$vl, vti.Log2SEW)>;
}
-} // Predicates = [HasStdExtV]
+} // Predicates = [HasVInstructions]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 7359e567a58d..461bdd348934 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrInfoB.td - RISC-V 'B' instructions -------*- tablegen -*-===//
+//===-- RISCVInstrInfoZb.td - RISC-V Bitmanip instructions -*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,19 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes the RISC-V instructions from the standard 'B' Bitmanip
-// extension, version 0.93.
-// This version is still experimental as the 'B' extension hasn't been
+// This file describes the RISC-V instructions from the standard Bitmanip
+// extensions, versions:
+// Zba - 1.0
+// Zbb - 1.0
+// Zbc - 1.0
+// Zbs - 1.0
+// Zbe - 0.93
+// Zbf - 0.93
+// Zbm - 0.93
+// Zbp - 0.93
+// Zbr - 0.93
+// Zbt - 0.93
+// This version is still experimental as the Bitmanip extensions haven't been
// ratified yet.
//
//===----------------------------------------------------------------------===//
@@ -186,6 +196,32 @@ def C9LeftShift : PatLeaf<(imm), [{
return C > 9 && ((C % 9) == 0) && isPowerOf2_64(C / 9);
}]>;
+def CSImm12MulBy4 : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ int64_t C = N->getSExtValue();
+ // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair.
+ return !isInt<13>(C) && isInt<14>(C) && (C & 3) == 0;
+}]>;
+
+def CSImm12MulBy8 : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ int64_t C = N->getSExtValue();
+ // Skip if C is simm12 or can be optimized by the PatLeaf AddiPair.
+ return !isInt<13>(C) && isInt<15>(C) && (C & 7) == 0;
+}]>;
+
+def SimmShiftRightBy2XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 2, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+def SimmShiftRightBy3XForm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 3, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
@@ -459,15 +495,6 @@ def RORW : ALUW_rr<0b0110000, 0b101, "rorw">,
Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
-let Predicates = [HasStdExtZbs, IsRV64] in {
-// NOTE: These instructions have been removed from the 0.94 spec. As a result
-// we have no isel patterns for them.
-def BCLRW : ALUW_rr<0b0100100, 0b001, "bclrw">, Sched<[]>;
-def BSETW : ALUW_rr<0b0010100, 0b001, "bsetw">, Sched<[]>;
-def BINVW : ALUW_rr<0b0110100, 0b001, "binvw">, Sched<[]>;
-def BEXTW : ALUW_rr<0b0100100, 0b101, "bextw">, Sched<[]>;
-} // Predicates = [HasStdExtZbs, IsRV64]
-
let Predicates = [HasStdExtZbp, IsRV64] in {
def GORCW : ALUW_rr<0b0010100, 0b101, "gorcw">, Sched<[]>;
def GREVW : ALUW_rr<0b0110100, 0b101, "grevw">, Sched<[]>;
@@ -481,17 +508,6 @@ let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
Sched<[WriteRotateImm32, ReadRotateImm32]>;
-let Predicates = [HasStdExtZbs, IsRV64] in {
-// NOTE: These instructions have been removed from the 0.94 spec. As a result
-// we have no isel patterns for them.
-def BCLRIW : RVBShiftW_ri<0b0100100, 0b001, OPC_OP_IMM_32, "bclriw">,
- Sched<[]>;
-def BSETIW : RVBShiftW_ri<0b0010100, 0b001, OPC_OP_IMM_32, "bsetiw">,
- Sched<[]>;
-def BINVIW : RVBShiftW_ri<0b0110100, 0b001, OPC_OP_IMM_32, "binviw">,
- Sched<[]>;
-} // Predicates = [HasStdExtZbs, IsRV64]
-
let Predicates = [HasStdExtZbp, IsRV64] in {
def GORCIW : RVBShiftW_ri<0b0010100, 0b101, OPC_OP_IMM_32, "gorciw">, Sched<[]>;
def GREVIW : RVBShiftW_ri<0b0110100, 0b101, OPC_OP_IMM_32, "greviw">, Sched<[]>;
@@ -585,43 +601,10 @@ def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
} // Predicates = [HasStdExtZbbOrZbp]
//===----------------------------------------------------------------------===//
-// Future compressed instructions
-//===----------------------------------------------------------------------===//
-
-// The presence of these instructions in the B extension is purely experimental
-// and they should be moved to the C extension as soon as they are ratified.
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVBInstC<bits<2> funct2, string opcodestr>
- : RVInst16<(outs GPRC:$rs_wb), (ins GPRC:$rs), opcodestr, "$rs", [],
- InstFormatCR> {
- bits<3> rs;
- let Constraints = "$rs = $rs_wb";
-
- let Inst{15-12} = 0b0110;
- let Inst{11-10} = funct2;
- let Inst{9-7} = rs;
- let Inst{6-0} = 0b0000001;
-}
-
-// The namespace RVBC exists to avoid encoding conflicts with the compressed
-// instructions c.addi16sp and c.lui already implemented in the C extension.
-
-let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC] in {
-def C_NOT : RVBInstC<0b00, "c.not">, Sched<[]>;
-def C_NEG : RVBInstC<0b01, "c.neg">, Sched<[]>;
-} // DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtC]
-
-let DecoderNamespace = "RVBC", Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in
-def C_ZEXTW : RVBInstC<0b10, "c.zext.w">, Sched<[]>;
-
-//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZba, IsRV64] in {
-// NOTE: The 0.93 spec shows zext.w as an alias of pack/packw. It has been
-// changed to add.uw in a draft after 0.94.
def : InstAlias<"zext.w $rd, $rs", (ADDUW GPR:$rd, GPR:$rs, X0)>;
}
@@ -770,21 +753,6 @@ def : InstAlias<"bext $rd, $rs1, $shamt",
} // Predicates = [HasStdExtZbs]
//===----------------------------------------------------------------------===//
-// Compressed Instruction patterns
-//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZbproposedc, HasStdExtC] in {
-def : CompressPat<(XORI GPRC:$rs1, GPRC:$rs1, -1),
- (C_NOT GPRC:$rs1)>;
-def : CompressPat<(SUB GPRC:$rs1, X0, GPRC:$rs1),
- (C_NEG GPRC:$rs1)>;
-} // Predicates = [HasStdExtZbproposedc, HasStdExtC]
-
-let Predicates = [HasStdExtZbproposedc, HasStdExtZba, HasStdExtC, IsRV64] in {
-def : CompressPat<(ADDUW GPRC:$rs1, GPRC:$rs1, X0),
- (C_ZEXTW GPRC:$rs1)>;
-} // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
-
-//===----------------------------------------------------------------------===//
// Codegen patterns
//===----------------------------------------------------------------------===//
@@ -1011,6 +979,13 @@ def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
(SH3ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
+def : Pat<(add GPR:$r, CSImm12MulBy4:$i),
+ (SH2ADD (ADDI X0, (SimmShiftRightBy2XForm CSImm12MulBy4:$i)),
+ GPR:$r)>;
+def : Pat<(add GPR:$r, CSImm12MulBy8:$i),
+ (SH3ADD (ADDI X0, (SimmShiftRightBy3XForm CSImm12MulBy8:$i)),
+ GPR:$r)>;
+
def : Pat<(mul GPR:$r, C3LeftShift:$i),
(SLLI (SH1ADD GPR:$r, GPR:$r),
(TrailingZerosXForm C3LeftShift:$i))>;
@@ -1020,6 +995,29 @@ def : Pat<(mul GPR:$r, C5LeftShift:$i),
def : Pat<(mul GPR:$r, C9LeftShift:$i),
(SLLI (SH3ADD GPR:$r, GPR:$r),
(TrailingZerosXForm C9LeftShift:$i))>;
+
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)),
+ (SH1ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)),
+ (SH1ADD (SH3ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)),
+ (SH2ADD (SH1ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)),
+ (SH2ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)),
+ (SH2ADD (SH3ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)),
+ (SH3ADD (SH1ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)),
+ (SH3ADD (SH2ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)),
+ (SH3ADD (SH3ADD GPR:$r, GPR:$r), GPR:$r)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)),
+ (SH1ADD (SH3ADD GPR:$r, GPR:$r), (SH3ADD GPR:$r, GPR:$r))>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)),
+ (SH2ADD (SH3ADD GPR:$r, GPR:$r), (SH3ADD GPR:$r, GPR:$r))>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
+ (SH3ADD (SH3ADD GPR:$r, GPR:$r), (SH3ADD GPR:$r, GPR:$r))>;
} // Predicates = [HasStdExtZba]
let Predicates = [HasStdExtZba, IsRV64] in {
@@ -1085,6 +1083,9 @@ def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
(and GPR:$rs1, 0x000000000000FFFF)),
i32)),
(PACKW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
+ (and GPR:$rs1, 0x000000000000FFFF))),
+ (PACKW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
(srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
(PACKUW GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 7316b7ad7674..a33494461869 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -66,7 +66,7 @@ class FPCmpH_rr<bits<3> funct3, string opcodestr>
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfh] in {
+let Predicates = [HasStdExtZfhmin] in {
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def FLH : RVInstI<0b001, OPC_LOAD_FP, (outs FPR16:$rd),
(ins GPR:$rs1, simm12:$imm12),
@@ -81,7 +81,9 @@ def FSH : RVInstS<0b001, OPC_STORE_FP, (outs),
(ins FPR16:$rs2, GPR:$rs1, simm12:$imm12),
"fsh", "$rs2, ${imm12}(${rs1})">,
Sched<[WriteFST16, ReadStoreData, ReadFMemBase]>;
+} // Predicates = [HasStdExtZfhmin]
+let Predicates = [HasStdExtZfh] in {
def FMADD_H : FPFMAH_rrr_frm<OPC_MADD, "fmadd.h">,
Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>;
def : FPFMAHDynFrmAlias<FMADD_H, "fmadd.h">;
@@ -148,7 +150,9 @@ def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">,
let rs2 = 0b00001;
}
def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>;
+} // Predicates = [HasStdExtZfh]
+let Predicates = [HasStdExtZfhmin] in {
def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">,
Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]> {
let rs2 = 0b00000;
@@ -169,7 +173,9 @@ def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">,
Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]> {
let rs2 = 0b00000;
}
+} // Predicates = [HasStdExtZfhmin]
+let Predicates = [HasStdExtZfh] in {
def FEQ_H : FPCmpH_rr<0b010, "feq.h">;
def FLT_H : FPCmpH_rr<0b001, "flt.h">;
def FLE_H : FPCmpH_rr<0b000, "fle.h">;
@@ -206,7 +212,7 @@ def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">,
def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>;
} // Predicates = [HasStdExtZfh, IsRV64]
-let Predicates = [HasStdExtZfh, HasStdExtD] in {
+let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">,
Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]> {
let rs2 = 0b00001;
@@ -217,16 +223,18 @@ def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">,
Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]> {
let rs2 = 0b00010;
}
-} // Predicates = [HasStdExtZfh, HasStdExtD]
+} // Predicates = [HasStdExtZfhmin, HasStdExtD]
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfh] in {
+let Predicates = [HasStdExtZfhmin] in {
def : InstAlias<"flh $rd, (${rs1})", (FLH FPR16:$rd, GPR:$rs1, 0), 0>;
def : InstAlias<"fsh $rs2, (${rs1})", (FSH FPR16:$rs2, GPR:$rs1, 0), 0>;
+} // Predicates = [HasStdExtZfhmin]
+let Predicates = [HasStdExtZfh] in {
def : InstAlias<"fmv.h $rd, $rs", (FSGNJ_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
def : InstAlias<"fabs.h $rd, $rs", (FSGNJX_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
def : InstAlias<"fneg.h $rd, $rs", (FSGNJN_H FPR16:$rd, FPR16:$rs, FPR16:$rs)>;
@@ -237,10 +245,12 @@ def : InstAlias<"fgt.h $rd, $rs, $rt",
(FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
def : InstAlias<"fge.h $rd, $rs, $rt",
(FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
+} // Predicates = [HasStdExtZfh]
+let Predicates = [HasStdExtZfhmin] in {
def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
def PseudoFSH : PseudoStore<"fsh", FPR16>;
-} // Predicates = [HasStdExtZfh]
+} // Predicates = [HasStdExtZfhmin]
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
@@ -313,7 +323,9 @@ def : PatFpr16Fpr16<setle, FLE_H>;
def : PatFpr16Fpr16<setole, FLE_H>;
def Select_FPR16_Using_CC_GPR : SelectCC_rrirr<FPR16, GPR>;
+} // Predicates = [HasStdExtZfh]
+let Predicates = [HasStdExtZfhmin] in {
/// Loads
defm : LdPat<load, FLH, f16>;
@@ -331,13 +343,17 @@ def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
// Moves (no conversion)
def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
-} // Predicates = [HasStdExtZfh]
+} // Predicates = [HasStdExtZfhmin]
let Predicates = [HasStdExtZfh, IsRV32] in {
// half->[u]int. Round-to-zero must be used.
def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
+// Saturating float->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(i32 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
+
// half->int32 with current rounding mode.
def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>;
@@ -353,13 +369,17 @@ let Predicates = [HasStdExtZfh, IsRV64] in {
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_fcvt_w_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
-def : Pat<(riscv_fcvt_wu_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>;
// half->[u]int64. Round-to-zero must be used.
def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
+// Saturating float->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
+def : Pat<(i64 (riscv_fcvt_xu_rtz FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
+
// half->int64 with current rounding mode.
def : Pat<(i64 (lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
def : Pat<(i64 (llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
@@ -375,7 +395,7 @@ def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>;
def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>;
} // Predicates = [HasStdExtZfh, IsRV64]
-let Predicates = [HasStdExtZfh, HasStdExtD] in {
+let Predicates = [HasStdExtZfhmin, HasStdExtD] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
@@ -385,4 +405,4 @@ def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2),
(FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>;
def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
-}
+} // Predicates = [HasStdExtZfhmin, HasStdExtD]
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index 74d92468b9b9..dd084f53e511 100644
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -148,17 +148,18 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
assert(TRI && "TargetRegisterInfo expected");
uint64_t TSFlags = MI->getDesc().TSFlags;
- int NumOps = MI->getNumExplicitOperands();
-
- for (const MachineOperand &MO : MI->explicit_operands()) {
- int OpNo = (int)MI->getOperandNo(&MO);
- assert(OpNo >= 0 && "Operand number doesn't fit in an 'int' type");
-
- // Skip VL and SEW operands which are the last two operands if present.
- if (RISCVII::hasVLOp(TSFlags) && OpNo == (NumOps - 2))
- continue;
- if (RISCVII::hasSEWOp(TSFlags) && OpNo == (NumOps - 1))
- continue;
+ unsigned NumOps = MI->getNumExplicitOperands();
+
+ // Skip policy, VL and SEW operands which are the last operands if present.
+ if (RISCVII::hasVecPolicyOp(TSFlags))
+ --NumOps;
+ if (RISCVII::hasVLOp(TSFlags))
+ --NumOps;
+ if (RISCVII::hasSEWOp(TSFlags))
+ --NumOps;
+
+ for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
// Skip merge op. It should be the first operand after the result.
if (RISCVII::hasMergeOp(TSFlags) && OpNo == 1) {
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 87586023caa4..5f4022439abb 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -26,8 +26,8 @@
#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include <set>
using namespace llvm;
@@ -38,7 +38,6 @@ namespace {
struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
static char ID;
- const MachineFunction *MF;
bool runOnMachineFunction(MachineFunction &Fn) override;
bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI);
@@ -53,6 +52,11 @@ struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
MachineFunctionProperties::Property::IsSSA);
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
StringRef getPassName() const override {
return RISCV_MERGE_BASE_OFFSET_NAME;
}
@@ -193,7 +197,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
- } break;
+ }
case RISCV::ADD: {
// The offset is too large to fit in the immediate field of ADDI.
// This can be in two forms:
@@ -208,7 +212,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
return false;
foldOffset(HiLUI, LoADDI, Tail, Offset);
return true;
- } break;
+ }
case RISCV::LB:
case RISCV::LH:
case RISCV::LW:
@@ -252,7 +256,7 @@ bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI,
Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg());
DeadInstrs.insert(&LoADDI);
return true;
- } break;
+ }
}
return false;
}
@@ -261,6 +265,7 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;
+ bool MadeChange = false;
DeadInstrs.clear();
MRI = &Fn.getRegInfo();
for (MachineBasicBlock &MBB : Fn) {
@@ -272,13 +277,13 @@ bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
LLVM_DEBUG(dbgs() << " Found lowered global address with one use: "
<< *LoADDI->getOperand(2).getGlobal() << "\n");
// If the use count is only one, merge the offset
- detectAndFoldOffset(HiLUI, *LoADDI);
+ MadeChange |= detectAndFoldOffset(HiLUI, *LoADDI);
}
}
// Delete dead instructions.
for (auto *MI : DeadInstrs)
MI->eraseFromParent();
- return true;
+ return MadeChange;
}
/// Returns an instance of the Merge Base Offset Optimization pass.
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index fde75206889c..a915a572f3b7 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -372,7 +372,7 @@ class NFList<int lmul> {
}
// Generate [start, end) SubRegIndex list.
-class SubRegSet<list<SubRegIndex> LIn, int start, int nf, int lmul> {
+class SubRegSet<int nf, int lmul> {
list<SubRegIndex> L = !foldl([]<SubRegIndex>,
[0, 1, 2, 3, 4, 5, 6, 7],
AccList, i,
@@ -382,39 +382,61 @@ class SubRegSet<list<SubRegIndex> LIn, int start, int nf, int lmul> {
[])));
}
-class IndexSet<int index, int nf, int lmul> {
+// Collect the valid indexes into 'R' under NF and LMUL values from TUPLE_INDEX.
+// When NF = 2, the valid TUPLE_INDEX is 0 and 1.
+// For example, when LMUL = 4, the potential valid indexes is
+// [8, 12, 16, 20, 24, 28, 4]. However, not all these indexes are valid under
+// NF = 2. For example, 28 is not valid under LMUL = 4, NF = 2 and TUPLE_INDEX = 0.
+// The filter is
+// (tuple_index + i) x lmul <= (tuple_index x lmul) + 32 - (nf x lmul)
+//
+// Use START = 0, LMUL = 4 and NF = 2 as the example,
+// i x 4 <= 24
+// The class will return [8, 12, 16, 20, 24, 4].
+// Use START = 1, LMUL = 4 and NF = 2 as the example,
+// (1 + i) x 4 <= 28
+// The class will return [12, 16, 20, 24, 28, 8].
+//
+class IndexSet<int tuple_index, int nf, int lmul, bit isV0 = false> {
list<int> R =
!foldl([]<int>,
- [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
- 23, 24, 25, 26, 27, 28, 29, 30, 31],
+ !if(isV0, [0],
+ !cond(
+ !eq(lmul, 1):
+ [8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 1, 2, 3, 4, 5, 6, 7],
+ !eq(lmul, 2):
+ [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 2, 3],
+ !eq(lmul, 4):
+ [2, 3, 4, 5, 6, 7, 1])),
L, i,
!listconcat(L,
- !if(!and(
- !le(!mul(index, lmul), !mul(i, lmul)),
- !le(!mul(i, lmul),
- !sub(!add(32, !mul(index, lmul)), !mul(nf, lmul)))
- ), [!mul(i, lmul)], [])));
+ !if(!le(!mul(!add(i, tuple_index), lmul),
+ !sub(!add(32, !mul(tuple_index, lmul)), !mul(nf, lmul))),
+ [!mul(!add(i, tuple_index), lmul)], [])));
}
-class VRegList<list<dag> LIn, int start, int nf, int lmul, bit NoV0> {
+// This class returns a list of vector register collections.
+// For example, for NF = 2 and LMUL = 4,
+// it will return
+// ([ V8M4, V12M4, V16M4, V20M4, V24M4, V4M4],
+// [V12M4, V16M4, V20M4, V24M4, V28M4, V8M4])
+//
+class VRegList<list<dag> LIn, int start, int nf, int lmul, bit isV0> {
list<dag> L =
!if(!ge(start, nf),
LIn,
!listconcat(
[!dag(add,
- !foreach(i,
- !if(NoV0,
- !tail(IndexSet<start, nf, lmul>.R),
- [!head(IndexSet<start, nf, lmul>.R)]),
+ !foreach(i, IndexSet<start, nf, lmul, isV0>.R,
!cast<Register>("V" # i # !cond(!eq(lmul, 2): "M2",
!eq(lmul, 4): "M4",
true: ""))),
!listsplat("",
- !if(NoV0,
- !size(!tail(IndexSet<start, nf, lmul>.R)),
- !size([!head(IndexSet<start, nf, lmul>.R)]))))],
- VRegList<LIn, !add(start, 1), nf, lmul, NoV0>.L));
+ !size(IndexSet<start, nf, lmul, isV0>.R)))],
+ VRegList<LIn, !add(start, 1), nf, lmul, isV0>.L));
}
// Vector registers
@@ -463,11 +485,11 @@ let RegAltNameIndices = [ABIRegAltName] in {
foreach m = [1, 2, 4] in {
foreach n = NFList<m>.L in {
def "VN" # n # "M" # m # "NoV0": RegisterTuples<
- SubRegSet<[], 0, n, m>.L,
- VRegList<[], 0, n, m, 1>.L>;
+ SubRegSet<n, m>.L,
+ VRegList<[], 0, n, m, false>.L>;
def "VN" # n # "M" # m # "V0" : RegisterTuples<
- SubRegSet<[], 0, n, m>.L,
- VRegList<[], 0, n, m, 0>.L>;
+ SubRegSet<n, m>.L,
+ VRegList<[], 0, n, m, true>.L>;
}
}
@@ -487,8 +509,7 @@ def VR : VReg<[vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t,
vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t,
vbool2_t, vbool1_t],
- (add (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
+ (add (sequence "V%u", 8, 31),
(sequence "V%u", 0, 7)), 1>;
def VRNoV0 : VReg<[vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
@@ -498,27 +519,26 @@ def VRNoV0 : VReg<[vint8m1_t, vint16m1_t, vint32m1_t, vint64m1_t,
vfloat16mf4_t, vfloat16mf2_t, vfloat32mf2_t,
vbool64_t, vbool32_t, vbool16_t, vbool8_t, vbool4_t,
vbool2_t, vbool1_t],
- (add (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
+ (add (sequence "V%u", 8, 31),
(sequence "V%u", 1, 7)), 1>;
def VRM2 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
vfloat16m2_t, vfloat32m2_t, vfloat64m2_t],
- (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
- V18M2, V20M2, V22M2, V24M2, V0M2, V2M2, V4M2, V6M2), 2>;
+ (add (sequence "V%uM2", 8, 31, 2),
+ (sequence "V%uM2", 0, 7, 2)), 2>;
def VRM2NoV0 : VReg<[vint8m2_t, vint16m2_t, vint32m2_t, vint64m2_t,
vfloat16m2_t, vfloat32m2_t, vfloat64m2_t],
- (add V26M2, V28M2, V30M2, V8M2, V10M2, V12M2, V14M2, V16M2,
- V18M2, V20M2, V22M2, V24M2, V2M2, V4M2, V6M2), 2>;
+ (add (sequence "V%uM2", 8, 31, 2),
+ (sequence "V%uM2", 2, 7, 2)), 2>;
def VRM4 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
vfloat16m4_t, vfloat32m4_t, vfloat64m4_t],
- (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V0M4, V4M4), 4>;
+ (add V8M4, V12M4, V16M4, V20M4, V24M4, V28M4, V0M4, V4M4), 4>;
def VRM4NoV0 : VReg<[vint8m4_t, vint16m4_t, vint32m4_t, vint64m4_t,
vfloat16m4_t, vfloat32m4_t, vfloat64m4_t],
- (add V28M4, V8M4, V12M4, V16M4, V20M4, V24M4, V4M4), 4>;
+ (add V8M4, V12M4, V16M4, V20M4, V24M4, V28M4, V4M4), 4>;
def VRM8 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
vfloat16m8_t, vfloat32m8_t, vfloat64m8_t],
@@ -526,7 +546,7 @@ def VRM8 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
def VRM8NoV0 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t,
vfloat16m8_t, vfloat32m8_t, vfloat64m8_t],
- (add V8M8, V16M8, V24M8), 8>;
+ (add V8M8, V16M8, V24M8), 8>;
defvar VMaskVTs = [vbool64_t, vbool32_t, vbool16_t, vbool8_t,
vbool4_t, vbool2_t, vbool1_t];
@@ -538,18 +558,18 @@ def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> {
// The register class is added for inline assembly for vector mask types.
def VM : VReg<[vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t,
vbool32_t, vbool64_t],
- (add (sequence "V%u", 25, 31),
- (sequence "V%u", 8, 24),
+ (add (sequence "V%u", 8, 31),
(sequence "V%u", 0, 7)), 1>;
foreach m = LMULList.m in {
foreach nf = NFList<m>.L in {
- def "VRN" # nf # "M" # m: VReg<[untyped],
- (add !cast<RegisterTuples>("VN" # nf # "M" # m # "V0"), !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0")),
- !mul(nf, m)>;
def "VRN" # nf # "M" # m # "NoV0": VReg<[untyped],
(add !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0")),
!mul(nf, m)>;
+ def "VRN" # nf # "M" # m: VReg<[untyped],
+ (add !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0"),
+ !cast<RegisterTuples>("VN" # nf # "M" # m # "V0")),
+ !mul(nf, m)>;
}
}
@@ -557,3 +577,15 @@ foreach m = LMULList.m in {
def FFLAGS : RISCVReg<0, "fflags">;
def FRM : RISCVReg<0, "frm">;
def FCSR : RISCVReg<0, "fcsr">;
+
+// Any type register. Used for .insn directives when we don't know what the
+// register types could be.
+// NOTE: The alignment and size are bogus values. The Size needs to be non-zero
+// or tablegen will use "untyped" to determine the size which will assert.
+let isAllocatable = 0 in
+def AnyReg : RegisterClass<"RISCV", [untyped], 32,
+ (add (sequence "X%u", 0, 31),
+ (sequence "F%u_D", 0, 31),
+ (sequence "V%u", 0, 31))> {
+ let Size = 32;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index ed26a5026114..14f59152ed42 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -231,6 +231,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZfh;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 314af180aca1..5b435fcb16a2 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -18,7 +18,7 @@ def SiFive7Model : SchedMachineModel {
let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg];
}
-// The SiFive7 microarchitecure has two pipelines: A and B.
+// The SiFive7 microarchitecture has two pipelines: A and B.
// Pipe A can handle memory, integer alu and vector operations.
// Pipe B can handle integer alu, control flow, integer multiply and divide,
// and floating point computation.
@@ -219,6 +219,9 @@ def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
+//===----------------------------------------------------------------------===//
+// Unsupported extensions
+defm : UnsupportedSchedV;
defm : UnsupportedSchedZba;
defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZfh;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index f31e4af46c1b..4971ca1d4e3e 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -230,3 +230,4 @@ def : ReadAdvance<ReadFSqrt16, 0>;
// Include the scheduler resources for other instruction extensions.
include "RISCVScheduleB.td"
+include "RISCVScheduleV.td"
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
new file mode 100644
index 000000000000..43af1802d706
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -0,0 +1,820 @@
+//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// Define scheduler resources associated with def operands.
+
+// 7. Vector Loads and Stores
+// 7.4. Vector Unit-Stride Instructions
+def WriteVLDE8 : SchedWrite;
+def WriteVLDE16 : SchedWrite;
+def WriteVLDE32 : SchedWrite;
+def WriteVLDE64 : SchedWrite;
+def WriteVSTE8 : SchedWrite;
+def WriteVSTE16 : SchedWrite;
+def WriteVSTE32 : SchedWrite;
+def WriteVSTE64 : SchedWrite;
+// 7.4.1. Vector Unit-Strided Mask
+def WriteVLDM : SchedWrite;
+def WriteVSTM : SchedWrite;
+// 7.5. Vector Strided Instructions
+def WriteVLDS8 : SchedWrite;
+def WriteVLDS16 : SchedWrite;
+def WriteVLDS32 : SchedWrite;
+def WriteVLDS64 : SchedWrite;
+def WriteVSTS8 : SchedWrite;
+def WriteVSTS16 : SchedWrite;
+def WriteVSTS32 : SchedWrite;
+def WriteVSTS64 : SchedWrite;
+// 7.6. Vector Indexed Instructions
+def WriteVLDUX8 : SchedWrite;
+def WriteVLDUX16 : SchedWrite;
+def WriteVLDUX32 : SchedWrite;
+def WriteVLDUX64 : SchedWrite;
+def WriteVLDOX8 : SchedWrite;
+def WriteVLDOX16 : SchedWrite;
+def WriteVLDOX32 : SchedWrite;
+def WriteVLDOX64 : SchedWrite;
+def WriteVSTUX8 : SchedWrite;
+def WriteVSTUX16 : SchedWrite;
+def WriteVSTUX32 : SchedWrite;
+def WriteVSTUX64 : SchedWrite;
+def WriteVSTOX8 : SchedWrite;
+def WriteVSTOX16 : SchedWrite;
+def WriteVSTOX32 : SchedWrite;
+def WriteVSTOX64 : SchedWrite;
+// 7.7. Vector Unit-stride Fault-Only-First Loads
+def WriteVLDFF8 : SchedWrite;
+def WriteVLDFF16 : SchedWrite;
+def WriteVLDFF32 : SchedWrite;
+def WriteVLDFF64 : SchedWrite;
+// 7.9. Vector Whole Register Instructions
+def WriteVLD1R8 : SchedWrite;
+def WriteVLD1R16 : SchedWrite;
+def WriteVLD1R32 : SchedWrite;
+def WriteVLD1R64 : SchedWrite;
+def WriteVLD2R8 : SchedWrite;
+def WriteVLD2R16 : SchedWrite;
+def WriteVLD2R32 : SchedWrite;
+def WriteVLD2R64 : SchedWrite;
+def WriteVLD4R8 : SchedWrite;
+def WriteVLD4R16 : SchedWrite;
+def WriteVLD4R32 : SchedWrite;
+def WriteVLD4R64 : SchedWrite;
+def WriteVLD8R8 : SchedWrite;
+def WriteVLD8R16 : SchedWrite;
+def WriteVLD8R32 : SchedWrite;
+def WriteVLD8R64 : SchedWrite;
+def WriteVST1R : SchedWrite;
+def WriteVST2R : SchedWrite;
+def WriteVST4R : SchedWrite;
+def WriteVST8R : SchedWrite;
+
+// 11. Vector Integer Arithmetic Instructions
+// 11.1. Vector Single-Width Integer Add and Subtract
+// 11.5. Vector Bitwise Logical Instructions
+def WriteVIALUV : SchedWrite;
+def WriteVIALUX : SchedWrite;
+def WriteVIALUI : SchedWrite;
+// 11.2. Vector Widening Integer Add/Subtract
+def WriteVIWALUV : SchedWrite;
+def WriteVIWALUX : SchedWrite;
+def WriteVIWALUI : SchedWrite;
+// 11.3. Vector Integer Extension
+def WriteVExtV : SchedWrite;
+// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
+def WriteVICALUV : SchedWrite;
+def WriteVICALUX : SchedWrite;
+def WriteVICALUI : SchedWrite;
+// 11.6. Vector Single-Width Bit Shift Instructions
+def WriteVShiftV : SchedWrite;
+def WriteVShiftX : SchedWrite;
+def WriteVShiftI : SchedWrite;
+// 11.7. Vector Narrowing Integer Right Shift Instructions
+def WriteVNShiftV : SchedWrite;
+def WriteVNShiftX : SchedWrite;
+def WriteVNShiftI : SchedWrite;
+// 11.8. Vector Integer Comparison Instructions
+// 11.9. Vector Integer Min/Max Instructions
+def WriteVICmpV : SchedWrite;
+def WriteVICmpX : SchedWrite;
+def WriteVICmpI : SchedWrite;
+// 11.10. Vector Single-Width Integer Multiply Instructions
+def WriteVIMulV : SchedWrite;
+def WriteVIMulX : SchedWrite;
+// 11.11. Vector Integer Divide Instructions
+def WriteVIDivV : SchedWrite;
+def WriteVIDivX : SchedWrite;
+// 11.12. Vector Widening Integer Multiply Instructions
+def WriteVIWMulV : SchedWrite;
+def WriteVIWMulX : SchedWrite;
+// 11.13. Vector Single-Width Integer Multiply-Add Instructions
+def WriteVIMulAddV : SchedWrite;
+def WriteVIMulAddX : SchedWrite;
+// 11.14. Vector Widening Integer Multiply-Add Instructions
+def WriteVIWMulAddV : SchedWrite;
+def WriteVIWMulAddX : SchedWrite;
+// 11.15. Vector Integer Merge Instructions
+def WriteVIMergeV : SchedWrite;
+def WriteVIMergeX : SchedWrite;
+def WriteVIMergeI : SchedWrite;
+// 11.16. Vector Integer Move Instructions
+def WriteVIMovV : SchedWrite;
+def WriteVIMovX : SchedWrite;
+def WriteVIMovI : SchedWrite;
+
+// 12. Vector Fixed-Point Arithmetic Instructions
+// 12.1. Vector Single-Width Saturating Add and Subtract
+def WriteVSALUV : SchedWrite;
+def WriteVSALUX : SchedWrite;
+def WriteVSALUI : SchedWrite;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+def WriteVAALUV : SchedWrite;
+def WriteVAALUX : SchedWrite;
+// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+def WriteVSMulV : SchedWrite;
+def WriteVSMulX : SchedWrite;
+// 12.4. Vector Single-Width Scaling Shift Instructions
+def WriteVSShiftV : SchedWrite;
+def WriteVSShiftX : SchedWrite;
+def WriteVSShiftI : SchedWrite;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+def WriteVNClipV : SchedWrite;
+def WriteVNClipX : SchedWrite;
+def WriteVNClipI : SchedWrite;
+
+// 13. Vector Floating-Point Instructions
+// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+def WriteVFALUV : SchedWrite;
+def WriteVFALUF : SchedWrite;
+// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
+def WriteVFWALUV : SchedWrite;
+def WriteVFWALUF : SchedWrite;
+// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+def WriteVFMulV : SchedWrite;
+def WriteVFMulF : SchedWrite;
+def WriteVFDivV : SchedWrite;
+def WriteVFDivF : SchedWrite;
+// 13.5. Vector Widening Floating-Point Multiply
+def WriteVFWMulV : SchedWrite;
+def WriteVFWMulF : SchedWrite;
+// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+def WriteVFMulAddV : SchedWrite;
+def WriteVFMulAddF : SchedWrite;
+// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+def WriteVFWMulAddV : SchedWrite;
+def WriteVFWMulAddF : SchedWrite;
+// 13.8. Vector Floating-Point Square-Root Instruction
+def WriteVFSqrtV : SchedWrite;
+// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
+def WriteVFRecpV : SchedWrite;
+// 13.11. Vector Floating-Point MIN/MAX Instructions
+// 13.13. Vector Floating-Point Compare Instructions
+def WriteVFCmpV : SchedWrite;
+def WriteVFCmpF : SchedWrite;
+// 13.12. Vector Floating-Point Sign-Injection Instructions
+def WriteVFSgnjV : SchedWrite;
+def WriteVFSgnjF : SchedWrite;
+// 13.14. Vector Floating-Point Classify Instruction
+def WriteVFClassV : SchedWrite;
+// 13.15. Vector Floating-Point Merge Instruction
+def WriteVFMergeV : SchedWrite;
+// 13.16. Vector Floating-Point Move Instruction
+def WriteVFMovV : SchedWrite;
+// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+def WriteVFCvtIToFV : SchedWrite;
+def WriteVFCvtFToIV : SchedWrite;
+def WriteVFCvtFToFV : SchedWrite;
+// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+def WriteVFWCvtIToFV : SchedWrite;
+def WriteVFWCvtFToIV : SchedWrite;
+def WriteVFWCvtFToFV : SchedWrite;
+// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+def WriteVFNCvtIToFV : SchedWrite;
+def WriteVFNCvtFToIV : SchedWrite;
+def WriteVFNCvtFToFV : SchedWrite;
+
+// 14. Vector Reduction Operations
+// 14.1. Vector Single-Width Integer Reduction Instructions
+def WriteVIRedV : SchedWrite;
+// 14.2. Vector Widening Integer Reduction Instructions
+def WriteVIWRedV : SchedWrite;
+// 14.3. Vector Single-Width Floating-Point Reduction Instructions
+def WriteVFRedV : SchedWrite;
+def WriteVFRedOV : SchedWrite;
+// 14.4. Vector Widening Floating-Point Reduction Instructions
+def WriteVFWRedV : SchedWrite;
+def WriteVFWRedOV : SchedWrite;
+
+// 15. Vector Mask Instructions
+// 15.1. Vector Mask-Register Logical Instructions
+def WriteVMALUV : SchedWrite;
+// 15.2. Vector Mask Population Count
+def WriteVMPopV : SchedWrite;
+// 15.3. Vector Find-First-Set Mask Bit
+def WriteVMFFSV : SchedWrite;
+// 15.4. Vector Set-Before-First Mask Bit
+// 15.5. Vector Set-Including-First Mask Bit
+// 15.6. Vector Set-only-First Mask Bit
+def WriteVMSFSV : SchedWrite;
+// 15.8. Vector Iota Instruction
+def WriteVMIotV : SchedWrite;
+// 15.9. Vector Element Index Instruction
+def WriteVMIdxV : SchedWrite;
+
+// 16. Vector Permutation Instructions
+// 16.1. Integer Scalar Move Instructions
+def WriteVIMovVX : SchedWrite;
+def WriteVIMovXV : SchedWrite;
+// 16.2. Floating-Point Scalar Move Instructions
+def WriteVFMovVF : SchedWrite;
+def WriteVFMovFV : SchedWrite;
+// 16.3. Vector Slide Instructions
+def WriteVISlideX : SchedWrite;
+def WriteVISlideI : SchedWrite;
+def WriteVISlide1X : SchedWrite;
+def WriteVFSlide1F : SchedWrite;
+// 16.4. Vector Register Gather Instructions
+def WriteVGatherV : SchedWrite;
+def WriteVGatherX : SchedWrite;
+def WriteVGatherI : SchedWrite;
+// 16.5. Vector Compress Instruction
+def WriteVCompressV : SchedWrite;
+// 16.6. Whole Vector Register Move
+def WriteVMov1V : SchedWrite;
+def WriteVMov2V : SchedWrite;
+def WriteVMov4V : SchedWrite;
+def WriteVMov8V : SchedWrite;
+
+//===----------------------------------------------------------------------===//
+/// Define scheduler resources associated with use operands.
+
+// 7. Vector Loads and Stores
+def ReadVLDX : SchedRead;
+def ReadVSTX : SchedRead;
+// 7.4. Vector Unit-Stride Instructions
+def ReadVSTE8V : SchedRead;
+def ReadVSTE16V : SchedRead;
+def ReadVSTE32V : SchedRead;
+def ReadVSTE64V : SchedRead;
+// 7.4.1. Vector Unit-Strided Mask
+def ReadVSTM : SchedRead;
+// 7.5. Vector Strided Instructions
+def ReadVLDSX : SchedRead;
+def ReadVSTSX : SchedRead;
+def ReadVSTS8V : SchedRead;
+def ReadVSTS16V : SchedRead;
+def ReadVSTS32V : SchedRead;
+def ReadVSTS64V : SchedRead;
+// 7.6. Vector Indexed Instructions
+def ReadVLDUXV : SchedRead;
+def ReadVLDOXV : SchedRead;
+def ReadVSTUX8 : SchedRead;
+def ReadVSTUX16 : SchedRead;
+def ReadVSTUX32 : SchedRead;
+def ReadVSTUX64 : SchedRead;
+def ReadVSTUXV : SchedRead;
+def ReadVSTUX8V : SchedRead;
+def ReadVSTUX16V : SchedRead;
+def ReadVSTUX32V : SchedRead;
+def ReadVSTUX64V : SchedRead;
+def ReadVSTOX8 : SchedRead;
+def ReadVSTOX16 : SchedRead;
+def ReadVSTOX32 : SchedRead;
+def ReadVSTOX64 : SchedRead;
+def ReadVSTOXV : SchedRead;
+def ReadVSTOX8V : SchedRead;
+def ReadVSTOX16V : SchedRead;
+def ReadVSTOX32V : SchedRead;
+def ReadVSTOX64V : SchedRead;
+// 7.9. Vector Whole Register Instructions
+def ReadVST1R : SchedRead;
+def ReadVST2R : SchedRead;
+def ReadVST4R : SchedRead;
+def ReadVST8R : SchedRead;
+
+// 11. Vector Integer Arithmetic Instructions
+// 11.1. Vector Single-Width Integer Add and Subtract
+// 11.5. Vector Bitwise Logical Instructions
+def ReadVIALUV : SchedRead;
+def ReadVIALUX : SchedRead;
+// 11.2. Vector Widening Integer Add/Subtract
+def ReadVIWALUV : SchedRead;
+def ReadVIWALUX : SchedRead;
+// 11.3. Vector Integer Extension
+def ReadVExtV : SchedRead;
+// 11.4. Vector Integer Arithmetic with Carry or Borrow Instructions
+def ReadVIALUCV : SchedRead;
+def ReadVIALUCX : SchedRead;
+// 11.6. Vector Single-Width Bit Shift Instructions
+def ReadVShiftV : SchedRead;
+def ReadVShiftX : SchedRead;
+// 11.7. Vector Narrowing Integer Right Shift Instructions
+def ReadVNShiftV : SchedRead;
+def ReadVNShiftX : SchedRead;
+// 11.8. Vector Integer Comparison Instructions
+// 11.9. Vector Integer Min/Max Instructions
+def ReadVICmpV : SchedRead;
+def ReadVICmpX : SchedRead;
+// 11.10. Vector Single-Width Integer Multiply Instructions
+def ReadVIMulV : SchedRead;
+def ReadVIMulX : SchedRead;
+// 11.11. Vector Integer Divide Instructions
+def ReadVIDivV : SchedRead;
+def ReadVIDivX : SchedRead;
+// 11.12. Vector Widening Integer Multiply Instructions
+def ReadVIWMulV : SchedRead;
+def ReadVIWMulX : SchedRead;
+// 11.13. Vector Single-Width Integer Multiply-Add Instructions
+def ReadVIMulAddV : SchedRead;
+def ReadVIMulAddX : SchedRead;
+// 11.14. Vector Widening Integer Multiply-Add Instructions
+def ReadVIWMulAddV : SchedRead;
+def ReadVIWMulAddX : SchedRead;
+// 11.15. Vector Integer Merge Instructions
+def ReadVIMergeV : SchedRead;
+def ReadVIMergeX : SchedRead;
+// 11.16. Vector Integer Move Instructions
+def ReadVIMovV : SchedRead;
+def ReadVIMovX : SchedRead;
+
+// 12. Vector Fixed-Point Arithmetic Instructions
+// 12.1. Vector Single-Width Saturating Add and Subtract
+def ReadVSALUV : SchedRead;
+def ReadVSALUX : SchedRead;
+// 12.2. Vector Single-Width Averaging Add and Subtract
+def ReadVAALUV : SchedRead;
+def ReadVAALUX : SchedRead;
+// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+def ReadVSMulV : SchedRead;
+def ReadVSMulX : SchedRead;
+// 12.4. Vector Single-Width Scaling Shift Instructions
+def ReadVSShiftV : SchedRead;
+def ReadVSShiftX : SchedRead;
+// 12.5. Vector Narrowing Fixed-Point Clip Instructions
+def ReadVNClipV : SchedRead;
+def ReadVNClipX : SchedRead;
+
+// 13. Vector Floating-Point Instructions
+// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
+def ReadVFALUV : SchedRead;
+def ReadVFALUF : SchedRead;
+// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
+def ReadVFWALUV : SchedRead;
+def ReadVFWALUF : SchedRead;
+// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
+def ReadVFMulV : SchedRead;
+def ReadVFMulF : SchedRead;
+def ReadVFDivV : SchedRead;
+def ReadVFDivF : SchedRead;
+// 13.5. Vector Widening Floating-Point Multiply
+def ReadVFWMulV : SchedRead;
+def ReadVFWMulF : SchedRead;
+// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
+def ReadVFMulAddV : SchedRead;
+def ReadVFMulAddF : SchedRead;
+// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
+def ReadVFWMulAddV : SchedRead;
+def ReadVFWMulAddF : SchedRead;
+// 13.8. Vector Floating-Point Square-Root Instruction
+def ReadVFSqrtV : SchedRead;
+// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
+// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
+def ReadVFRecpV : SchedRead;
+// 13.11. Vector Floating-Point MIN/MAX Instructions
+// 13.13. Vector Floating-Point Compare Instructions
+def ReadVFCmpV : SchedRead;
+def ReadVFCmpF : SchedRead;
+// 13.12. Vector Floating-Point Sign-Injection Instructions
+def ReadVFSgnjV : SchedRead;
+def ReadVFSgnjF : SchedRead;
+// 13.14. Vector Floating-Point Classify Instruction
+def ReadVFClassV : SchedRead;
+// 13.15. Vector Floating-Point Merge Instruction
+def ReadVFMergeV : SchedRead;
+def ReadVFMergeF : SchedRead;
+// 13.16. Vector Floating-Point Move Instruction
+def ReadVFMovF : SchedRead;
+// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
+def ReadVFCvtIToFV : SchedRead;
+def ReadVFCvtFToIV : SchedRead;
+// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+def ReadVFWCvtIToFV : SchedRead;
+def ReadVFWCvtFToIV : SchedRead;
+def ReadVFWCvtFToFV : SchedRead;
+// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
+def ReadVFNCvtIToFV : SchedRead;
+def ReadVFNCvtFToIV : SchedRead;
+def ReadVFNCvtFToFV : SchedRead;
+
+// 14. Vector Reduction Operations
+// 14.1. Vector Single-Width Integer Reduction Instructions
+def ReadVIRedV : SchedRead;
+def ReadVIRedV0 : SchedRead;
+// 14.2. Vector Widening Integer Reduction Instructions
+def ReadVIWRedV : SchedRead;
+def ReadVIWRedV0 : SchedRead;
+// 14.3. Vector Single-Width Floating-Point Reduction Instructions
+def ReadVFRedV : SchedRead;
+def ReadVFRedV0 : SchedRead;
+def ReadVFRedOV : SchedRead;
+def ReadVFRedOV0 : SchedRead;
+// 14.4. Vector Widening Floating-Point Reduction Instructions
+def ReadVFWRedV : SchedRead;
+def ReadVFWRedV0 : SchedRead;
+def ReadVFWRedOV : SchedRead;
+def ReadVFWRedOV0 : SchedRead;
+
+// 15. Vector Mask Instructions
+// 15.1. Vector Mask-Register Logical Instructions
+def ReadVMALUV : SchedRead;
+// 15.2. Vector Mask Population Count
+def ReadVMPopV : SchedRead;
+// 15.3. Vector Find-First-Set Mask Bit
+def ReadVMFFSV : SchedRead;
+// 15.4. Vector Set-Before-First Mask Bit
+// 15.5. Vector Set-Including-First Mask Bit
+// 15.6. Vector Set-only-First Mask Bit
+def ReadVMSFSV : SchedRead;
+// 15.8. Vector Iota Instruction
+def ReadVMIotV : SchedRead;
+
+// 16. Vector Permutation Instructions
+// 16.1. Integer Scalar Move Instructions
+def ReadVIMovVX : SchedRead;
+def ReadVIMovXV : SchedRead;
+def ReadVIMovXX : SchedRead;
+// 16.2. Floating-Point Scalar Move Instructions
+def ReadVFMovVF : SchedRead;
+def ReadVFMovFV : SchedRead;
+def ReadVFMovFX : SchedRead;
+// 16.3. Vector Slide Instructions
+def ReadVISlideV : SchedRead;
+def ReadVISlideX : SchedRead;
+def ReadVFSlideV : SchedRead;
+def ReadVFSlideF : SchedRead;
+// 16.4. Vector Register Gather Instructions
+def ReadVGatherV : SchedRead;
+def ReadVGatherX : SchedRead;
+// 16.5. Vector Compress Instruction
+def ReadVCompressV : SchedRead;
+// 16.6. Whole Vector Register Move
+def ReadVMov1V : SchedRead;
+def ReadVMov2V : SchedRead;
+def ReadVMov4V : SchedRead;
+def ReadVMov8V : SchedRead;
+
+// Others
+def ReadVMask : SchedRead;
+
+//===----------------------------------------------------------------------===//
+/// Define default scheduler resources for V.
+
+multiclass UnsupportedSchedV {
+let Unsupported = true in {
+
+// 7. Vector Loads and Stores
+def : WriteRes<WriteVLDE8, []>;
+def : WriteRes<WriteVLDE16, []>;
+def : WriteRes<WriteVLDE32, []>;
+def : WriteRes<WriteVLDE64, []>;
+def : WriteRes<WriteVSTE8, []>;
+def : WriteRes<WriteVSTE16, []>;
+def : WriteRes<WriteVSTE32, []>;
+def : WriteRes<WriteVSTE64, []>;
+def : WriteRes<WriteVLDM, []>;
+def : WriteRes<WriteVSTM, []>;
+def : WriteRes<WriteVLDS8, []>;
+def : WriteRes<WriteVLDS16, []>;
+def : WriteRes<WriteVLDS32, []>;
+def : WriteRes<WriteVLDS64, []>;
+def : WriteRes<WriteVSTS8, []>;
+def : WriteRes<WriteVSTS16, []>;
+def : WriteRes<WriteVSTS32, []>;
+def : WriteRes<WriteVSTS64, []>;
+def : WriteRes<WriteVLDUX8, []>;
+def : WriteRes<WriteVLDUX16, []>;
+def : WriteRes<WriteVLDUX32, []>;
+def : WriteRes<WriteVLDUX64, []>;
+def : WriteRes<WriteVLDOX8, []>;
+def : WriteRes<WriteVLDOX16, []>;
+def : WriteRes<WriteVLDOX32, []>;
+def : WriteRes<WriteVLDOX64, []>;
+def : WriteRes<WriteVSTUX8, []>;
+def : WriteRes<WriteVSTUX16, []>;
+def : WriteRes<WriteVSTUX32, []>;
+def : WriteRes<WriteVSTUX64, []>;
+def : WriteRes<WriteVSTOX8, []>;
+def : WriteRes<WriteVSTOX16, []>;
+def : WriteRes<WriteVSTOX32, []>;
+def : WriteRes<WriteVSTOX64, []>;
+def : WriteRes<WriteVLDFF8, []>;
+def : WriteRes<WriteVLDFF16, []>;
+def : WriteRes<WriteVLDFF32, []>;
+def : WriteRes<WriteVLDFF64, []>;
+def : WriteRes<WriteVLD1R8, []>;
+def : WriteRes<WriteVLD1R16, []>;
+def : WriteRes<WriteVLD1R32, []>;
+def : WriteRes<WriteVLD1R64, []>;
+def : WriteRes<WriteVLD2R8, []>;
+def : WriteRes<WriteVLD2R16, []>;
+def : WriteRes<WriteVLD2R32, []>;
+def : WriteRes<WriteVLD2R64, []>;
+def : WriteRes<WriteVLD4R8, []>;
+def : WriteRes<WriteVLD4R16, []>;
+def : WriteRes<WriteVLD4R32, []>;
+def : WriteRes<WriteVLD4R64, []>;
+def : WriteRes<WriteVLD8R8, []>;
+def : WriteRes<WriteVLD8R16, []>;
+def : WriteRes<WriteVLD8R32, []>;
+def : WriteRes<WriteVLD8R64, []>;
+def : WriteRes<WriteVST1R, []>;
+def : WriteRes<WriteVST2R, []>;
+def : WriteRes<WriteVST4R, []>;
+def : WriteRes<WriteVST8R, []>;
+
+// 12. Vector Integer Arithmetic Instructions
+def : WriteRes<WriteVIALUV, []>;
+def : WriteRes<WriteVIALUX, []>;
+def : WriteRes<WriteVIALUI, []>;
+def : WriteRes<WriteVIWALUV, []>;
+def : WriteRes<WriteVIWALUX, []>;
+def : WriteRes<WriteVIWALUI, []>;
+def : WriteRes<WriteVExtV, []>;
+def : WriteRes<WriteVICALUV, []>;
+def : WriteRes<WriteVICALUX, []>;
+def : WriteRes<WriteVICALUI, []>;
+def : WriteRes<WriteVShiftV, []>;
+def : WriteRes<WriteVShiftX, []>;
+def : WriteRes<WriteVShiftI, []>;
+def : WriteRes<WriteVNShiftV, []>;
+def : WriteRes<WriteVNShiftX, []>;
+def : WriteRes<WriteVNShiftI, []>;
+def : WriteRes<WriteVICmpV, []>;
+def : WriteRes<WriteVICmpX, []>;
+def : WriteRes<WriteVICmpI, []>;
+def : WriteRes<WriteVIMulV, []>;
+def : WriteRes<WriteVIMulX, []>;
+def : WriteRes<WriteVIDivV, []>;
+def : WriteRes<WriteVIDivX, []>;
+def : WriteRes<WriteVIWMulV, []>;
+def : WriteRes<WriteVIWMulX, []>;
+def : WriteRes<WriteVIMulAddV, []>;
+def : WriteRes<WriteVIMulAddX, []>;
+def : WriteRes<WriteVIWMulAddV, []>;
+def : WriteRes<WriteVIWMulAddX, []>;
+def : WriteRes<WriteVIMergeV, []>;
+def : WriteRes<WriteVIMergeX, []>;
+def : WriteRes<WriteVIMergeI, []>;
+def : WriteRes<WriteVIMovV, []>;
+def : WriteRes<WriteVIMovX, []>;
+def : WriteRes<WriteVIMovI, []>;
+
+// 13. Vector Fixed-Point Arithmetic Instructions
+def : WriteRes<WriteVSALUV, []>;
+def : WriteRes<WriteVSALUX, []>;
+def : WriteRes<WriteVSALUI, []>;
+def : WriteRes<WriteVAALUV, []>;
+def : WriteRes<WriteVAALUX, []>;
+def : WriteRes<WriteVSMulV, []>;
+def : WriteRes<WriteVSMulX, []>;
+def : WriteRes<WriteVSShiftV, []>;
+def : WriteRes<WriteVSShiftX, []>;
+def : WriteRes<WriteVSShiftI, []>;
+def : WriteRes<WriteVNClipV, []>;
+def : WriteRes<WriteVNClipX, []>;
+def : WriteRes<WriteVNClipI, []>;
+
+// 14. Vector Floating-Point Instructions
+def : WriteRes<WriteVFALUV, []>;
+def : WriteRes<WriteVFALUF, []>;
+def : WriteRes<WriteVFWALUV, []>;
+def : WriteRes<WriteVFWALUF, []>;
+def : WriteRes<WriteVFMulV, []>;
+def : WriteRes<WriteVFMulF, []>;
+def : WriteRes<WriteVFDivV, []>;
+def : WriteRes<WriteVFDivF, []>;
+def : WriteRes<WriteVFWMulV, []>;
+def : WriteRes<WriteVFWMulF, []>;
+def : WriteRes<WriteVFMulAddV, []>;
+def : WriteRes<WriteVFMulAddF, []>;
+def : WriteRes<WriteVFWMulAddV, []>;
+def : WriteRes<WriteVFWMulAddF, []>;
+def : WriteRes<WriteVFSqrtV, []>;
+def : WriteRes<WriteVFRecpV, []>;
+def : WriteRes<WriteVFCmpV, []>;
+def : WriteRes<WriteVFCmpF, []>;
+def : WriteRes<WriteVFSgnjV, []>;
+def : WriteRes<WriteVFSgnjF, []>;
+def : WriteRes<WriteVFClassV, []>;
+def : WriteRes<WriteVFMergeV, []>;
+def : WriteRes<WriteVFMovV, []>;
+def : WriteRes<WriteVFCvtIToFV, []>;
+def : WriteRes<WriteVFCvtFToIV, []>;
+def : WriteRes<WriteVFCvtFToFV, []>;
+def : WriteRes<WriteVFWCvtIToFV, []>;
+def : WriteRes<WriteVFWCvtFToIV, []>;
+def : WriteRes<WriteVFWCvtFToFV, []>;
+def : WriteRes<WriteVFNCvtIToFV, []>;
+def : WriteRes<WriteVFNCvtFToIV, []>;
+def : WriteRes<WriteVFNCvtFToFV, []>;
+
+// 15. Vector Reduction Operations
+def : WriteRes<WriteVIRedV, []>;
+def : WriteRes<WriteVIWRedV, []>;
+def : WriteRes<WriteVFRedV, []>;
+def : WriteRes<WriteVFRedOV, []>;
+def : WriteRes<WriteVFWRedV, []>;
+def : WriteRes<WriteVFWRedOV, []>;
+
+// 16. Vector Mask Instructions
+def : WriteRes<WriteVMALUV, []>;
+def : WriteRes<WriteVMPopV, []>;
+def : WriteRes<WriteVMFFSV, []>;
+def : WriteRes<WriteVMSFSV, []>;
+def : WriteRes<WriteVMIotV, []>;
+def : WriteRes<WriteVMIdxV, []>;
+
+// 17. Vector Permutation Instructions
+def : WriteRes<WriteVIMovVX, []>;
+def : WriteRes<WriteVIMovXV, []>;
+def : WriteRes<WriteVFMovVF, []>;
+def : WriteRes<WriteVFMovFV, []>;
+def : WriteRes<WriteVISlideX, []>;
+def : WriteRes<WriteVISlideI, []>;
+def : WriteRes<WriteVISlide1X, []>;
+def : WriteRes<WriteVFSlide1F, []>;
+def : WriteRes<WriteVGatherV, []>;
+def : WriteRes<WriteVGatherX, []>;
+def : WriteRes<WriteVGatherI, []>;
+def : WriteRes<WriteVCompressV, []>;
+def : WriteRes<WriteVMov1V, []>;
+def : WriteRes<WriteVMov2V, []>;
+def : WriteRes<WriteVMov4V, []>;
+def : WriteRes<WriteVMov8V, []>;
+
+// 7. Vector Loads and Stores
+def : ReadAdvance<ReadVLDX, 0>;
+def : ReadAdvance<ReadVSTX, 0>;
+def : ReadAdvance<ReadVSTE8V, 0>;
+def : ReadAdvance<ReadVSTE16V, 0>;
+def : ReadAdvance<ReadVSTE32V, 0>;
+def : ReadAdvance<ReadVSTE64V, 0>;
+def : ReadAdvance<ReadVSTM, 0>;
+def : ReadAdvance<ReadVLDSX, 0>;
+def : ReadAdvance<ReadVSTSX, 0>;
+def : ReadAdvance<ReadVSTS8V, 0>;
+def : ReadAdvance<ReadVSTS16V, 0>;
+def : ReadAdvance<ReadVSTS32V, 0>;
+def : ReadAdvance<ReadVSTS64V, 0>;
+def : ReadAdvance<ReadVLDUXV, 0>;
+def : ReadAdvance<ReadVLDOXV, 0>;
+def : ReadAdvance<ReadVSTUXV, 0>;
+def : ReadAdvance<ReadVSTUX8, 0>;
+def : ReadAdvance<ReadVSTUX16, 0>;
+def : ReadAdvance<ReadVSTUX32, 0>;
+def : ReadAdvance<ReadVSTUX64, 0>;
+def : ReadAdvance<ReadVSTUX8V, 0>;
+def : ReadAdvance<ReadVSTUX16V, 0>;
+def : ReadAdvance<ReadVSTUX32V, 0>;
+def : ReadAdvance<ReadVSTUX64V, 0>;
+def : ReadAdvance<ReadVSTOX8, 0>;
+def : ReadAdvance<ReadVSTOX16, 0>;
+def : ReadAdvance<ReadVSTOX32, 0>;
+def : ReadAdvance<ReadVSTOX64, 0>;
+def : ReadAdvance<ReadVSTOXV, 0>;
+def : ReadAdvance<ReadVSTOX8V, 0>;
+def : ReadAdvance<ReadVSTOX16V, 0>;
+def : ReadAdvance<ReadVSTOX32V, 0>;
+def : ReadAdvance<ReadVSTOX64V, 0>;
+def : ReadAdvance<ReadVST1R, 0>;
+def : ReadAdvance<ReadVST2R, 0>;
+def : ReadAdvance<ReadVST4R, 0>;
+def : ReadAdvance<ReadVST8R, 0>;
+
+// 12. Vector Integer Arithmetic Instructions
+def : ReadAdvance<ReadVIALUV, 0>;
+def : ReadAdvance<ReadVIALUX, 0>;
+def : ReadAdvance<ReadVIWALUV, 0>;
+def : ReadAdvance<ReadVIWALUX, 0>;
+def : ReadAdvance<ReadVExtV, 0>;
+def : ReadAdvance<ReadVIALUCV, 0>;
+def : ReadAdvance<ReadVIALUCX, 0>;
+def : ReadAdvance<ReadVShiftV, 0>;
+def : ReadAdvance<ReadVShiftX, 0>;
+def : ReadAdvance<ReadVNShiftV, 0>;
+def : ReadAdvance<ReadVNShiftX, 0>;
+def : ReadAdvance<ReadVICmpV, 0>;
+def : ReadAdvance<ReadVICmpX, 0>;
+def : ReadAdvance<ReadVIMulV, 0>;
+def : ReadAdvance<ReadVIMulX, 0>;
+def : ReadAdvance<ReadVIDivV, 0>;
+def : ReadAdvance<ReadVIDivX, 0>;
+def : ReadAdvance<ReadVIWMulV, 0>;
+def : ReadAdvance<ReadVIWMulX, 0>;
+def : ReadAdvance<ReadVIMulAddV, 0>;
+def : ReadAdvance<ReadVIMulAddX, 0>;
+def : ReadAdvance<ReadVIWMulAddV, 0>;
+def : ReadAdvance<ReadVIWMulAddX, 0>;
+def : ReadAdvance<ReadVIMergeV, 0>;
+def : ReadAdvance<ReadVIMergeX, 0>;
+def : ReadAdvance<ReadVIMovV, 0>;
+def : ReadAdvance<ReadVIMovX, 0>;
+
+// 13. Vector Fixed-Point Arithmetic Instructions
+def : ReadAdvance<ReadVSALUV, 0>;
+def : ReadAdvance<ReadVSALUX, 0>;
+def : ReadAdvance<ReadVAALUV, 0>;
+def : ReadAdvance<ReadVAALUX, 0>;
+def : ReadAdvance<ReadVSMulV, 0>;
+def : ReadAdvance<ReadVSMulX, 0>;
+def : ReadAdvance<ReadVSShiftV, 0>;
+def : ReadAdvance<ReadVSShiftX, 0>;
+def : ReadAdvance<ReadVNClipV, 0>;
+def : ReadAdvance<ReadVNClipX, 0>;
+
+// 14. Vector Floating-Point Instructions
+def : ReadAdvance<ReadVFALUV, 0>;
+def : ReadAdvance<ReadVFALUF, 0>;
+def : ReadAdvance<ReadVFWALUV, 0>;
+def : ReadAdvance<ReadVFWALUF, 0>;
+def : ReadAdvance<ReadVFMulV, 0>;
+def : ReadAdvance<ReadVFMulF, 0>;
+def : ReadAdvance<ReadVFDivV, 0>;
+def : ReadAdvance<ReadVFDivF, 0>;
+def : ReadAdvance<ReadVFWMulV, 0>;
+def : ReadAdvance<ReadVFWMulF, 0>;
+def : ReadAdvance<ReadVFMulAddV, 0>;
+def : ReadAdvance<ReadVFMulAddF, 0>;
+def : ReadAdvance<ReadVFWMulAddV, 0>;
+def : ReadAdvance<ReadVFWMulAddF, 0>;
+def : ReadAdvance<ReadVFSqrtV, 0>;
+def : ReadAdvance<ReadVFRecpV, 0>;
+def : ReadAdvance<ReadVFCmpV, 0>;
+def : ReadAdvance<ReadVFCmpF, 0>;
+def : ReadAdvance<ReadVFSgnjV, 0>;
+def : ReadAdvance<ReadVFSgnjF, 0>;
+def : ReadAdvance<ReadVFClassV, 0>;
+def : ReadAdvance<ReadVFMergeV, 0>;
+def : ReadAdvance<ReadVFMergeF, 0>;
+def : ReadAdvance<ReadVFMovF, 0>;
+def : ReadAdvance<ReadVFCvtIToFV, 0>;
+def : ReadAdvance<ReadVFCvtFToIV, 0>;
+def : ReadAdvance<ReadVFWCvtIToFV, 0>;
+def : ReadAdvance<ReadVFWCvtFToIV, 0>;
+def : ReadAdvance<ReadVFWCvtFToFV, 0>;
+def : ReadAdvance<ReadVFNCvtIToFV, 0>;
+def : ReadAdvance<ReadVFNCvtFToIV, 0>;
+def : ReadAdvance<ReadVFNCvtFToFV, 0>;
+
+// 15. Vector Reduction Operations
+def : ReadAdvance<ReadVIRedV, 0>;
+def : ReadAdvance<ReadVIRedV0, 0>;
+def : ReadAdvance<ReadVIWRedV, 0>;
+def : ReadAdvance<ReadVIWRedV0, 0>;
+def : ReadAdvance<ReadVFRedV, 0>;
+def : ReadAdvance<ReadVFRedV0, 0>;
+def : ReadAdvance<ReadVFRedOV, 0>;
+def : ReadAdvance<ReadVFRedOV0, 0>;
+def : ReadAdvance<ReadVFWRedV, 0>;
+def : ReadAdvance<ReadVFWRedV0, 0>;
+def : ReadAdvance<ReadVFWRedOV, 0>;
+def : ReadAdvance<ReadVFWRedOV0, 0>;
+
+// 16. Vector Mask Instructions
+def : ReadAdvance<ReadVMALUV, 0>;
+def : ReadAdvance<ReadVMPopV, 0>;
+def : ReadAdvance<ReadVMFFSV, 0>;
+def : ReadAdvance<ReadVMSFSV, 0>;
+def : ReadAdvance<ReadVMIotV, 0>;
+
+// 17. Vector Permutation Instructions
+def : ReadAdvance<ReadVIMovVX, 0>;
+def : ReadAdvance<ReadVIMovXV, 0>;
+def : ReadAdvance<ReadVIMovXX, 0>;
+def : ReadAdvance<ReadVFMovVF, 0>;
+def : ReadAdvance<ReadVFMovFV, 0>;
+def : ReadAdvance<ReadVFMovFX, 0>;
+def : ReadAdvance<ReadVISlideV, 0>;
+def : ReadAdvance<ReadVISlideX, 0>;
+def : ReadAdvance<ReadVFSlideV, 0>;
+def : ReadAdvance<ReadVFSlideF, 0>;
+def : ReadAdvance<ReadVGatherV, 0>;
+def : ReadAdvance<ReadVGatherX, 0>;
+def : ReadAdvance<ReadVCompressV, 0>;
+def : ReadAdvance<ReadVMov1V, 0>;
+def : ReadAdvance<ReadVMov2V, 0>;
+def : ReadAdvance<ReadVMov4V, 0>;
+def : ReadAdvance<ReadVMov8V, 0>;
+
+// Others
+def : ReadAdvance<ReadVMask, 0>;
+
+} // Unsupported
+} // UnsupportedSchedV
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index b19fdcb0082b..1063134b8a6c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -17,7 +17,7 @@
#include "RISCVLegalizerInfo.h"
#include "RISCVRegisterBankInfo.h"
#include "RISCVTargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -45,6 +45,11 @@ static cl::opt<unsigned> RVVVectorLMULMax(
"Fractional LMUL values are not supported."),
cl::init(8), cl::Hidden);
+static cl::opt<unsigned> RVVVectorELENMax(
+ "riscv-v-fixed-length-vector-elen-max",
+ cl::desc("The maximum ELEN value to use for fixed length vectors."),
+ cl::init(64), cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -106,7 +111,8 @@ const RegisterBankInfo *RISCVSubtarget::getRegBankInfo() const {
}
unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
- assert(hasStdExtV() && "Tried to get vector length without V support!");
+ assert(hasVInstructions() &&
+ "Tried to get vector length without Zve or V extension support!");
if (RVVVectorBitsMax == 0)
return 0;
assert(RVVVectorBitsMax >= 128 && RVVVectorBitsMax <= 65536 &&
@@ -121,8 +127,8 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
}
unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
- assert(hasStdExtV() &&
- "Tried to get vector length without V extension support!");
+ assert(hasVInstructions() &&
+ "Tried to get vector length without Zve or V extension support!");
assert((RVVVectorBitsMin == 0 ||
(RVVVectorBitsMin >= 128 && RVVVectorBitsMax <= 65536 &&
isPowerOf2_32(RVVVectorBitsMin))) &&
@@ -138,13 +144,24 @@ unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
}
unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
- assert(hasStdExtV() &&
- "Tried to get maximum LMUL without V extension support!");
+ assert(hasVInstructions() &&
+ "Tried to get vector length without Zve or V extension support!");
assert(RVVVectorLMULMax <= 8 && isPowerOf2_32(RVVVectorLMULMax) &&
"V extension requires a LMUL to be at most 8 and a power of 2!");
- return PowerOf2Floor(std::max<unsigned>(RVVVectorLMULMax, 1));
+ return PowerOf2Floor(
+ std::max<unsigned>(std::min<unsigned>(RVVVectorLMULMax, 8), 1));
+}
+
+unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const {
+ assert(hasVInstructions() &&
+ "Tried to get maximum ELEN without Zve or V extension support!");
+ assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 &&
+ isPowerOf2_32(RVVVectorELENMax) &&
+ "V extension requires a ELEN to be a power of 2 between 8 and 64!");
+ return PowerOf2Floor(
+ std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, 64), 8));
}
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
- return hasStdExtV() && getMinRVVVectorSizeInBits() != 0;
+ return hasVInstructions() && getMinRVVVectorSizeInBits() != 0;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index ce36331e044d..deb2a11f98f1 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -39,7 +39,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtF = false;
bool HasStdExtD = false;
bool HasStdExtC = false;
- bool HasStdExtB = false;
bool HasStdExtZba = false;
bool HasStdExtZbb = false;
bool HasStdExtZbc = false;
@@ -50,10 +49,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
bool HasStdExtZbr = false;
bool HasStdExtZbs = false;
bool HasStdExtZbt = false;
- bool HasStdExtZbproposedc = false;
bool HasStdExtV = false;
bool HasStdExtZvlsseg = false;
bool HasStdExtZvamo = false;
+ bool HasStdExtZfhmin = false;
bool HasStdExtZfh = false;
bool HasRV64 = false;
bool IsRV32E = false;
@@ -107,7 +106,6 @@ public:
bool hasStdExtF() const { return HasStdExtF; }
bool hasStdExtD() const { return HasStdExtD; }
bool hasStdExtC() const { return HasStdExtC; }
- bool hasStdExtB() const { return HasStdExtB; }
bool hasStdExtZba() const { return HasStdExtZba; }
bool hasStdExtZbb() const { return HasStdExtZbb; }
bool hasStdExtZbc() const { return HasStdExtZbc; }
@@ -118,10 +116,10 @@ public:
bool hasStdExtZbr() const { return HasStdExtZbr; }
bool hasStdExtZbs() const { return HasStdExtZbs; }
bool hasStdExtZbt() const { return HasStdExtZbt; }
- bool hasStdExtZbproposedc() const { return HasStdExtZbproposedc; }
bool hasStdExtV() const { return HasStdExtV; }
bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; }
bool hasStdExtZvamo() const { return HasStdExtZvamo; }
+ bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
bool hasStdExtZfh() const { return HasStdExtZfh; }
bool is64Bit() const { return HasRV64; }
bool isRV32E() const { return IsRV32E; }
@@ -135,8 +133,17 @@ public:
assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
return UserReservedRegister[i];
}
+
+ // Vector codegen related methods.
+ bool hasVInstructions() const { return HasStdExtV; }
+ bool hasVInstructionsI64() const { return HasStdExtV; }
+ bool hasVInstructionsF16() const { return HasStdExtV && hasStdExtZfh(); }
+ bool hasVInstructionsF32() const { return HasStdExtV && hasStdExtF(); }
+ bool hasVInstructionsF64() const { return HasStdExtV && hasStdExtD(); }
+ // F16 and F64 both require F32.
+ bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
unsigned getMaxInterleaveFactor() const {
- return hasStdExtV() ? MaxInterleaveFactor : 1;
+ return hasVInstructions() ? MaxInterleaveFactor : 1;
}
protected:
@@ -158,6 +165,7 @@ public:
unsigned getMaxRVVVectorSizeInBits() const;
unsigned getMinRVVVectorSizeInBits() const;
unsigned getMaxLMULForFixedLengthVectors() const;
+ unsigned getMaxELENForFixedLengthVectors() const;
bool useRVVForFixedLengthVectors() const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index a561772b650b..41599dd8bb3f 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -385,6 +385,7 @@ def : SysReg<"dscratch1", 0x7B3>;
def : SysReg<"vstart", 0x008>;
def : SysReg<"vxsat", 0x009>;
def : SysReg<"vxrm", 0x00A>;
+def : SysReg<"vcsr", 0x00F>;
def : SysReg<"vl", 0xC20>;
def : SysReg<"vtype", 0xC21>;
def : SysReg<"vlenb", 0xC22>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b18ee6009217..b421eba8d442 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -27,8 +27,8 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
@@ -37,6 +37,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
+ initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
@@ -149,6 +150,9 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
+
+ addPass(createRISCVGatherScatterLoweringPass());
+
TargetPassConfig::addIRPasses();
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index fd110db1064b..56f0952fafc9 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -52,8 +52,15 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
// split up large offsets in GEP into better parts than ConstantHoisting
// can.
return TTI::TCC_Free;
- case Instruction::Add:
case Instruction::And:
+ // zext.h
+ if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
+ return TTI::TCC_Free;
+ // zext.w
+ if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZbb())
+ return TTI::TCC_Free;
+ LLVM_FALLTHROUGH;
+ case Instruction::Add:
case Instruction::Or:
case Instruction::Xor:
case Instruction::Mul:
@@ -125,7 +132,7 @@ Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
// know whether the LoopVectorizer is safe to do or not.
// We only consider to use single vector register (LMUL = 1) to vectorize.
unsigned MaxVectorSizeInBits = ST->getMaxRVVVectorSizeInBits();
- if (ST->hasStdExtV() && MaxVectorSizeInBits != 0)
+ if (ST->hasVInstructions() && MaxVectorSizeInBits != 0)
return MaxVectorSizeInBits / RISCV::RVVBitsPerBlock;
return BaseT::getMaxVScale();
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 95dacb1e6285..675681616d6e 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -18,6 +18,7 @@
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Function.h"
@@ -54,7 +55,7 @@ public:
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
bool shouldExpandReduction(const IntrinsicInst *II) const;
- bool supportsScalableVectors() const { return ST->hasStdExtV(); }
+ bool supportsScalableVectors() const { return ST->hasVInstructions(); }
Optional<unsigned> getMaxVScale() const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
@@ -63,52 +64,44 @@ public:
return TypeSize::getFixed(ST->getXLen());
case TargetTransformInfo::RGK_FixedWidthVector:
return TypeSize::getFixed(
- ST->hasStdExtV() ? ST->getMinRVVVectorSizeInBits() : 0);
+ ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0);
case TargetTransformInfo::RGK_ScalableVector:
return TypeSize::getScalable(
- ST->hasStdExtV() ? ST->getMinRVVVectorSizeInBits() : 0);
+ ST->hasVInstructions() ? RISCV::RVVBitsPerBlock : 0);
}
llvm_unreachable("Unsupported register kind");
}
+ unsigned getMinVectorRegisterBitWidth() const {
+ return ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0;
+ }
+
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I);
- bool isLegalElementTypeForRVV(Type *ScalarTy) const {
- if (ScalarTy->isPointerTy())
- return true;
-
- if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
- ScalarTy->isIntegerTy(32) || ScalarTy->isIntegerTy(64))
- return true;
-
- if (ScalarTy->isHalfTy())
- return ST->hasStdExtZfh();
- if (ScalarTy->isFloatTy())
- return ST->hasStdExtF();
- if (ScalarTy->isDoubleTy())
- return ST->hasStdExtD();
-
- return false;
- }
-
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
- if (!ST->hasStdExtV())
+ if (!ST->hasVInstructions())
return false;
// Only support fixed vectors if we know the minimum vector size.
if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
return false;
+ // Don't allow elements larger than the ELEN.
+ // FIXME: How to limit for scalable vectors?
+ if (isa<FixedVectorType>(DataType) &&
+ DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+ return false;
+
if (Alignment <
DL.getTypeStoreSize(DataType->getScalarType()).getFixedSize())
return false;
- return isLegalElementTypeForRVV(DataType->getScalarType());
+ return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -119,18 +112,24 @@ public:
}
bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) {
- if (!ST->hasStdExtV())
+ if (!ST->hasVInstructions())
return false;
// Only support fixed vectors if we know the minimum vector size.
if (isa<FixedVectorType>(DataType) && ST->getMinRVVVectorSizeInBits() == 0)
return false;
+ // Don't allow elements larger than the ELEN.
+ // FIXME: How to limit for scalable vectors?
+ if (isa<FixedVectorType>(DataType) &&
+ DataType->getScalarSizeInBits() > ST->getMaxELENForFixedLengthVectors())
+ return false;
+
if (Alignment <
DL.getTypeStoreSize(DataType->getScalarType()).getFixedSize())
return false;
- return isLegalElementTypeForRVV(DataType->getScalarType());
+ return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) {
@@ -150,14 +149,14 @@ public:
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const {
- if (!ST->hasStdExtV())
+ if (!ST->hasVInstructions())
return false;
if (!VF.isScalable())
return true;
Type *Ty = RdxDesc.getRecurrenceType();
- if (!isLegalElementTypeForRVV(Ty))
+ if (!TLI->isLegalElementTypeForRVV(Ty))
return false;
switch (RdxDesc.getRecurrenceKind()) {
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index 4f265d556380..27d1326d5f6c 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/RISCVTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheRISCV32Target() {
diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 9a2df8ca7fe9..48e6903bd1b1 100644
--- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -25,10 +25,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 5c4419c108c0..142124a8e0d9 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -17,7 +17,7 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 51eccfa52359..e950f9582f09 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -15,8 +15,8 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -274,7 +274,8 @@ namespace {
llvm_unreachable("relaxInstruction() unimplemented");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
// Cannot emit NOP with size not multiple of 32 bits.
if (Count % 4 != 0)
return false;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index 9531e3105fe2..49b75b7e0bd1 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index 2006c9bede34..f6f9c0a1de81 100644
--- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -29,7 +29,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 2007303d9903..ed1faf6b1fe8 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1614,11 +1614,14 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::MULO_I64, nullptr);
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
}
+ setLibcallName(RTLIB::MULO_I128, nullptr);
+
if (!Subtarget->isV9()) {
// SparcV8 does not have FNEGD and FABSD.
setOperationAction(ISD::FNEG, MVT::f64, Custom);
@@ -2957,8 +2960,15 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
SDValue ShiftAmt = DAG.getConstant(63, dl, VT);
SDValue RHS = Op.getOperand(1);
- SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt);
- SDValue HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt);
+ SDValue HiLHS, HiRHS;
+ if (isSigned) {
+ HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, ShiftAmt);
+ HiRHS = DAG.getNode(ISD::SRA, dl, MVT::i64, RHS, ShiftAmt);
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, MVT::i64);
+ }
+
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
TargetLowering::MakeLibCallOptions CallOptions;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index dc3a41c63098..a8a0b2cc9e67 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -20,8 +20,8 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index b161e2a9d087..5e305fc9df71 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -369,8 +369,7 @@ multiclass Load<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
// TODO: Instructions of the LoadASI class are currently asm only; hooking up
// CodeGen's address spaces to use these is a future task.
-class LoadASI<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
- RegisterClass RC, ValueType Ty, InstrItinClass itin = NoItinerary> :
+class LoadASI<string OpcStr, bits<6> Op3Val, RegisterClass RC> :
F3_1_asi<3, Op3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
!strconcat(OpcStr, "a [$addr] $asi, $dst"),
[]>;
@@ -380,7 +379,7 @@ multiclass LoadA<string OpcStr, bits<6> Op3Val, bits<6> LoadAOp3Val,
SDPatternOperator OpNode, RegisterClass RC, ValueType Ty,
InstrItinClass itin = NoItinerary> :
Load<OpcStr, Op3Val, OpNode, RC, Ty, itin> {
- def Arr : LoadASI<OpcStr, LoadAOp3Val, OpNode, RC, Ty>;
+ def Arr : LoadASI<OpcStr, LoadAOp3Val, RC>;
}
// The LDSTUB instruction is supported for asm only.
@@ -411,8 +410,7 @@ multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
// TODO: Instructions of the StoreASI class are currently asm only; hooking up
// CodeGen's address spaces to use these is a future task.
-class StoreASI<string OpcStr, bits<6> Op3Val,
- SDPatternOperator OpNode, RegisterClass RC, ValueType Ty,
+class StoreASI<string OpcStr, bits<6> Op3Val, RegisterClass RC,
InstrItinClass itin = IIC_st> :
F3_1_asi<3, Op3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
!strconcat(OpcStr, "a $rd, [$addr] $asi"),
@@ -420,10 +418,9 @@ class StoreASI<string OpcStr, bits<6> Op3Val,
itin>;
multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
- SDPatternOperator OpNode, RegisterClass RC, ValueType Ty,
- InstrItinClass itin = IIC_st> :
+ SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
Store<OpcStr, Op3Val, OpNode, RC, Ty> {
- def Arr : StoreASI<OpcStr, StoreAOp3Val, OpNode, RC, Ty, itin>;
+ def Arr : StoreASI<OpcStr, StoreAOp3Val, RC>;
}
//===----------------------------------------------------------------------===//
@@ -523,12 +520,12 @@ let DecoderMethod = "DecodeLoadIntPair" in
// Section B.2 - Load Floating-point Instructions, p. 92
let DecoderMethod = "DecodeLoadFP" in {
defm LDF : Load<"ld", 0b100000, load, FPRegs, f32, IIC_iu_or_fpu_instr>;
- def LDFArr : LoadASI<"ld", 0b110000, load, FPRegs, f32, IIC_iu_or_fpu_instr>,
+ def LDFArr : LoadASI<"ld", 0b110000, FPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeLoadDFP" in {
defm LDDF : Load<"ldd", 0b100011, load, DFPRegs, f64, IIC_ldd>;
- def LDDFArr : LoadASI<"ldd", 0b110011, load, DFPRegs, f64>,
+ def LDDFArr : LoadASI<"ldd", 0b110011, DFPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeLoadQFP" in
@@ -573,17 +570,17 @@ let DecoderMethod = "DecodeStoreInt" in {
}
let DecoderMethod = "DecodeStoreIntPair" in
- defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32, IIC_std>;
+ defm STD : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
// Section B.5 - Store Floating-point Instructions, p. 97
let DecoderMethod = "DecodeStoreFP" in {
defm STF : Store<"st", 0b100100, store, FPRegs, f32>;
- def STFArr : StoreASI<"st", 0b110100, store, FPRegs, f32>,
+ def STFArr : StoreASI<"st", 0b110100, FPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeStoreDFP" in {
defm STDF : Store<"std", 0b100111, store, DFPRegs, f64, IIC_std>;
- def STDFArr : StoreASI<"std", 0b110111, store, DFPRegs, f64>,
+ def STDFArr : StoreASI<"std", 0b110111, DFPRegs>,
Requires<[HasV9]>;
}
let DecoderMethod = "DecodeStoreQFP" in
@@ -1623,6 +1620,17 @@ let hasSideEffects = 1 in {
}
}
+// Section A.42 - Prefetch Data
+let Predicates = [HasV9] in {
+ def PREFETCHr : F3_1<3, 0b101101,
+ (outs), (ins MEMrr:$addr, shift_imm5:$rd),
+ "prefetch [$addr], $rd", []>;
+ def PREFETCHi : F3_2<3, 0b101101,
+ (outs), (ins MEMri:$addr, shift_imm5:$rd),
+ "prefetch [$addr], $rd", []>;
+}
+
+
// Section A.43 - Read Privileged Register Instructions
let Predicates = [HasV9] in {
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index abc47ef51563..618a8633f0a9 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -12,8 +12,8 @@
#include "SparcSubtarget.h"
#include "Sparc.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 083339bc157c..27c49a408a02 100644
--- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTarget() {
diff --git a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index 3bf5907012da..1138788ac7fa 100644
--- a/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/SparcTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheSparcTarget() {
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 0de24245cfcc..40ed417d0817 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/SystemZInstPrinter.h"
#include "MCTargetDesc/SystemZMCAsmInfo.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZTargetStreamer.h"
#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,10 +26,10 @@
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetRegistry.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -39,13 +40,15 @@
using namespace llvm;
-// Return true if Expr is in the range [MinValue, MaxValue].
-static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue) {
+// Return true if Expr is in the range [MinValue, MaxValue]. If AllowSymbol
+// is true any MCExpr is accepted (address displacement).
+static bool inRange(const MCExpr *Expr, int64_t MinValue, int64_t MaxValue,
+ bool AllowSymbol = false) {
if (auto *CE = dyn_cast<MCConstantExpr>(Expr)) {
int64_t Value = CE->getValue();
return Value >= MinValue && Value <= MaxValue;
}
- return false;
+ return AllowSymbol;
}
namespace {
@@ -264,10 +267,10 @@ public:
return isMem(MemKind) && Mem.RegKind == RegKind;
}
bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const {
- return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff);
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff, true);
}
bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const {
- return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287);
+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287, true);
}
bool isMemDisp12Len4(RegisterKind RegKind) const {
return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x10);
@@ -405,6 +408,13 @@ private:
SMLoc StartLoc, EndLoc;
};
+ SystemZTargetStreamer &getTargetStreamer() {
+ assert(getParser().getStreamer().getTargetStreamer() &&
+ "do not have a target streamer");
+ MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
+ return static_cast<SystemZTargetStreamer &>(TS);
+ }
+
bool parseRegister(Register &Reg, bool RestoreOnFailure = false);
bool parseIntegerRegister(Register &Reg, RegisterGroup Group);
@@ -420,6 +430,7 @@ private:
bool parseAddressRegister(Register &Reg);
bool ParseDirectiveInsn(SMLoc L);
+ bool ParseDirectiveMachine(SMLoc L);
OperandMatchResultTy parseAddress(OperandVector &Operands,
MemoryKind MemKind,
@@ -1210,6 +1221,8 @@ bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".insn")
return ParseDirectiveInsn(DirectiveID.getLoc());
+ if (IDVal == ".machine")
+ return ParseDirectiveMachine(DirectiveID.getLoc());
return true;
}
@@ -1322,6 +1335,28 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
return false;
}
+/// ParseDirectiveMachine
+/// ::= .machine [ mcpu ]
+bool SystemZAsmParser::ParseDirectiveMachine(SMLoc L) {
+ MCAsmParser &Parser = getParser();
+ if (Parser.getTok().isNot(AsmToken::Identifier) &&
+ Parser.getTok().isNot(AsmToken::String))
+ return Error(L, "unexpected token in '.machine' directive");
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+ if (parseToken(AsmToken::EndOfStatement))
+ return addErrorSuffix(" in '.machine' directive");
+
+ MCSubtargetInfo &STI = copySTI();
+ STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+
+ getTargetStreamer().emitMachine(CPU);
+
+ return false;
+}
+
bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc, bool RestoreOnFailure) {
Register Reg;
@@ -1486,10 +1521,6 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
return false;
}
-static std::string SystemZMnemonicSpellCheck(StringRef S,
- const FeatureBitset &FBS,
- unsigned VariantID = 0);
-
bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index e81db1030c01..5eba150dadc3 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -13,8 +13,8 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
#include <cassert>
#include <cstdint>
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index f3f3f096da33..0cb6bfaaebfb 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -24,9 +24,9 @@ using namespace llvm;
#include "SystemZGenAsmWriter.inc"
void SystemZInstPrinter::printAddress(const MCAsmInfo *MAI, unsigned Base,
- int64_t Disp, unsigned Index,
+ const MCOperand &DispMO, unsigned Index,
raw_ostream &O) {
- O << Disp;
+ printOperand(DispMO, MAI, O);
if (Base || Index) {
O << '(';
if (Index) {
@@ -194,23 +194,23 @@ void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
void SystemZInstPrinter::printBDAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- printAddress(&MAI, MI->getOperand(OpNum).getReg(),
- MI->getOperand(OpNum + 1).getImm(), 0, O);
+ printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1),
+ 0, O);
}
void SystemZInstPrinter::printBDXAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- printAddress(&MAI, MI->getOperand(OpNum).getReg(),
- MI->getOperand(OpNum + 1).getImm(),
+ printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1),
MI->getOperand(OpNum + 2).getReg(), O);
}
void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
unsigned Base = MI->getOperand(OpNum).getReg();
- uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+ const MCOperand &DispMO = MI->getOperand(OpNum + 1);
uint64_t Length = MI->getOperand(OpNum + 2).getImm();
- O << Disp << '(' << Length;
+ printOperand(DispMO, &MAI, O);
+ O << '(' << Length;
if (Base) {
O << ",";
printRegName(O, Base);
@@ -221,9 +221,10 @@ void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
unsigned Base = MI->getOperand(OpNum).getReg();
- uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+ const MCOperand &DispMO = MI->getOperand(OpNum + 1);
unsigned Length = MI->getOperand(OpNum + 2).getReg();
- O << Disp << "(";
+ printOperand(DispMO, &MAI, O);
+ O << "(";
printRegName(O, Length);
if (Base) {
O << ",";
@@ -234,8 +235,7 @@ void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- printAddress(&MAI, MI->getOperand(OpNum).getReg(),
- MI->getOperand(OpNum + 1).getImm(),
+ printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1),
MI->getOperand(OpNum + 2).getReg(), O);
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 0a57ca0082e6..008bf747e5a1 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -33,8 +33,9 @@ public:
static const char *getRegisterName(unsigned RegNo);
// Print an address with the given base, displacement and index.
- static void printAddress(const MCAsmInfo *MAI, unsigned Base, int64_t Disp,
- unsigned Index, raw_ostream &O);
+ static void printAddress(const MCAsmInfo *MAI, unsigned Base,
+ const MCOperand &DispMO, unsigned Index,
+ raw_ostream &O);
// Print the given operand.
static void printOperand(const MCOperand &MO, const MCAsmInfo *MAI,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index 134c85e822be..0f5e0b9672a9 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -10,6 +10,8 @@
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInst.h"
@@ -21,7 +23,8 @@ using namespace llvm;
// Value is a fully-resolved relocation value: Symbol + Addend [- Pivot].
// Return the bits that should be installed in a relocation field for
// fixup kind Kind.
-static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
+static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value,
+ const MCFixup &Fixup, MCContext &Ctx) {
if (Kind < FirstTargetFixupKind)
return Value;
@@ -32,6 +35,24 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
case SystemZ::FK_390_PC32DBL:
return (int64_t)Value / 2;
+ case SystemZ::FK_390_12:
+ if (!isUInt<12>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "displacement exceeds uint12");
+ return 0;
+ }
+ return Value;
+
+ case SystemZ::FK_390_20: {
+ if (!isInt<20>(Value)) {
+ Ctx.reportError(Fixup.getLoc(), "displacement exceeds int20");
+ return 0;
+ }
+ // The high byte of a 20 bit displacement value comes first.
+ uint64_t DLo = Value & 0xfff;
+ uint64_t DHi = (Value >> 12) & 0xff;
+ return (DLo << 8) | DHi;
+ }
+
case SystemZ::FK_390_TLS_CALL:
return 0;
}
@@ -63,7 +84,8 @@ public:
const MCAsmLayout &Layout) const override {
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override {
return createSystemZObjectWriter(OSABI);
@@ -94,7 +116,9 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{ "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "FK_390_PC24DBL", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_TLS_CALL", 0, 0, 0 }
+ { "FK_390_TLS_CALL", 0, 0, 0 },
+ { "FK_390_12", 4, 12, 0 },
+ { "FK_390_20", 4, 20, 0 }
};
// Fixup kinds from .reloc directive are like R_390_NONE. They
@@ -132,7 +156,7 @@ void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm,
assert(Offset + Size <= Data.size() && "Invalid fixup offset!");
// Big-endian insertion of Size bytes.
- Value = extractBitsForFixup(Kind, Value);
+ Value = extractBitsForFixup(Kind, Value, Fixup, Asm.getContext());
if (BitSize < 64)
Value &= ((uint64_t)1 << BitSize) - 1;
unsigned ShiftValue = (Size * 8) - 8;
@@ -142,7 +166,8 @@ void SystemZMCAsmBackend::applyFixup(const MCAssembler &Asm,
}
}
-bool SystemZMCAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool SystemZMCAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
for (uint64_t I = 0; I != Count; ++I)
OS << '\x7';
return true;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index fa4864299586..e61b07e973e9 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -12,37 +12,39 @@
using namespace llvm;
-SystemZMCAsmInfo::SystemZMCAsmInfo(const Triple &TT) {
- CodePointerSize = 8;
+SystemZMCAsmInfoELF::SystemZMCAsmInfoELF(const Triple &TT) {
+ AssemblerDialect = AD_ATT;
CalleeSaveStackSlotSize = 8;
+ CodePointerSize = 8;
+ Data64bitsDirective = "\t.quad\t";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
IsLittleEndian = false;
-
- AssemblerDialect = TT.isOSzOS() ? AD_HLASM : AD_ATT;
-
MaxInstLength = 6;
-
- CommentString = AssemblerDialect == AD_HLASM ? "*" : "#";
- RestrictCommentStringToStartOfStatement = (AssemblerDialect == AD_HLASM);
- AllowAdditionalComments = (AssemblerDialect == AD_ATT);
- AllowAtAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
- AllowDollarAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
- AllowHashAtStartOfIdentifier = (AssemblerDialect == AD_HLASM);
- DotIsPC = (AssemblerDialect == AD_ATT);
- StarIsPC = (AssemblerDialect == AD_HLASM);
- EmitGNUAsmStartIndentationMarker = (AssemblerDialect == AD_ATT);
- AllowAtInName = (AssemblerDialect == AD_HLASM);
- EmitLabelsInUpperCase = (AssemblerDialect == AD_HLASM);
-
- ZeroDirective = "\t.space\t";
- Data64bitsDirective = "\t.quad\t";
- UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
- ExceptionsType = ExceptionHandling::DwarfCFI;
+ UsesELFSectionDirectiveForBSS = true;
+ ZeroDirective = "\t.space\t";
}
-bool SystemZMCAsmInfo::isAcceptableChar(char C) const {
- if (AssemblerDialect == AD_ATT)
- return MCAsmInfo::isAcceptableChar(C);
+SystemZMCAsmInfoGOFF::SystemZMCAsmInfoGOFF(const Triple &TT) {
+ AllowAdditionalComments = false;
+ AllowAtInName = true;
+ AllowAtAtStartOfIdentifier = true;
+ AllowDollarAtStartOfIdentifier = true;
+ AllowHashAtStartOfIdentifier = true;
+ AssemblerDialect = AD_HLASM;
+ CalleeSaveStackSlotSize = 8;
+ CodePointerSize = 8;
+ CommentString = "*";
+ DotIsPC = false;
+ EmitGNUAsmStartIndentationMarker = false;
+ EmitLabelsInUpperCase = true;
+ IsLittleEndian = false;
+ MaxInstLength = 6;
+ RestrictCommentStringToStartOfStatement = true;
+ StarIsPC = true;
+ SupportsDebugInformation = true;
+}
+bool SystemZMCAsmInfoGOFF::isAcceptableChar(char C) const {
return MCAsmInfo::isAcceptableChar(C) || C == '#';
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index 389575d14679..b2f191424d01 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
@@ -10,15 +10,21 @@
#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCASMINFO_H
#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoGOFF.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
class Triple;
enum SystemZAsmDialect { AD_ATT = 0, AD_HLASM = 1 };
-class SystemZMCAsmInfo : public MCAsmInfoELF {
+class SystemZMCAsmInfoELF : public MCAsmInfoELF {
public:
- explicit SystemZMCAsmInfo(const Triple &TT);
+ explicit SystemZMCAsmInfoELF(const Triple &TT);
+};
+
+class SystemZMCAsmInfoGOFF : public MCAsmInfoGOFF {
+public:
+ explicit SystemZMCAsmInfoGOFF(const Triple &TT);
bool isAcceptableChar(char C) const override;
};
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index a5ccf4f68ffd..e280e4aaf3d8 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -12,6 +12,7 @@
#include "MCTargetDesc/SystemZMCFixups.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZInstrInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -60,6 +61,12 @@ private:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // Return the displacement value for the OpNum operand. If it is a symbol,
+ // add a fixup for it and return 0.
+ uint64_t getDispOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ SystemZ::FixupKind Kind) const;
+
// Called by the TableGen code to get the binary encoding of an address.
// The index or length, if any, is encoded first, followed by the base,
// followed by the displacement. In a 20-bit displacement,
@@ -180,11 +187,29 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
}
uint64_t SystemZMCCodeEmitter::
+getDispOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ SystemZ::FixupKind Kind) const {
+ const MCOperand &MO = MI.getOperand(OpNum);
+ if (MO.isImm())
+ return static_cast<uint64_t>(MO.getImm());
+ if (MO.isExpr()) {
+ // All instructions follow the pattern where the first displacement has a
+ // 2 bytes offset, and the second one 4 bytes.
+ unsigned ByteOffs = Fixups.size() == 0 ? 2 : 4;
+ Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind));
+ assert(Fixups.size() <= 2 && "More than two memory operands in MI?");
+ return 0;
+ }
+ llvm_unreachable("Unexpected operand type!");
+}
+
+uint64_t SystemZMCCodeEmitter::
getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
assert(isUInt<4>(Base) && isUInt<12>(Disp));
return (Base << 12) | Disp;
}
@@ -194,7 +219,7 @@ getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_20);
assert(isUInt<4>(Base) && isInt<20>(Disp));
return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12);
}
@@ -204,7 +229,7 @@ getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index));
return (Index << 16) | (Base << 12) | Disp;
@@ -215,7 +240,7 @@ getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_20);
uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index));
return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8)
@@ -227,7 +252,7 @@ getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
return (Len << 16) | (Base << 12) | Disp;
@@ -238,7 +263,7 @@ getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len));
return (Len << 16) | (Base << 12) | Disp;
@@ -249,7 +274,7 @@ getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
return (Len << 16) | (Base << 12) | Disp;
@@ -260,7 +285,7 @@ getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+ uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index));
return (Index << 16) | (Base << 12) | Disp;
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index 14f6198183b9..1f62baabb9e7 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -20,6 +20,8 @@ enum FixupKind {
FK_390_PC24DBL,
FK_390_PC32DBL,
FK_390_TLS_CALL,
+ FK_390_12,
+ FK_390_20,
// Marker
LastTargetFixupKind,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 0b3e7b15df13..c23463ab9bde 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -46,6 +46,8 @@ static unsigned getAbsoluteReloc(unsigned Kind) {
case FK_Data_2: return ELF::R_390_16;
case FK_Data_4: return ELF::R_390_32;
case FK_Data_8: return ELF::R_390_64;
+ case SystemZ::FK_390_12: return ELF::R_390_12;
+ case SystemZ::FK_390_20: return ELF::R_390_20;
}
llvm_unreachable("Unsupported absolute address");
}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 2a53dda84144..c7b73fd3b805 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -9,13 +9,15 @@
#include "SystemZMCTargetDesc.h"
#include "SystemZInstPrinter.h"
#include "SystemZMCAsmInfo.h"
+#include "SystemZTargetStreamer.h"
#include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -149,7 +151,10 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TT,
const MCTargetOptions &Options) {
- MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
+ if (TT.isOSzOS())
+ return new SystemZMCAsmInfoGOFF(TT);
+
+ MCAsmInfo *MAI = new SystemZMCAsmInfoELF(TT);
MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
nullptr, MRI.getDwarfRegNum(SystemZ::R15D, true),
SystemZMC::ELFCFAOffsetFromInitialSP);
@@ -182,6 +187,53 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
return new SystemZInstPrinter(MAI, MII, MRI);
}
+void SystemZTargetStreamer::emitConstantPools() {
+ // Emit EXRL target instructions.
+ if (EXRLTargets2Sym.empty())
+ return;
+ // Switch to the .text section.
+ const MCObjectFileInfo &OFI = *Streamer.getContext().getObjectFileInfo();
+ Streamer.SwitchSection(OFI.getTextSection());
+ for (auto &I : EXRLTargets2Sym) {
+ Streamer.emitLabel(I.second);
+ const MCInstSTIPair &MCI_STI = I.first;
+ Streamer.emitInstruction(MCI_STI.first, *MCI_STI.second);
+ }
+ EXRLTargets2Sym.clear();
+}
+
+namespace {
+class SystemZTargetAsmStreamer : public SystemZTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ SystemZTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
+ : SystemZTargetStreamer(S), OS(OS) {}
+ void emitMachine(StringRef CPU) override {
+ OS << "\t.machine " << CPU << "\n";
+ }
+};
+
+class SystemZTargetELFStreamer : public SystemZTargetStreamer {
+public:
+ SystemZTargetELFStreamer(MCStreamer &S) : SystemZTargetStreamer(S) {}
+ void emitMachine(StringRef CPU) override {}
+};
+} // end namespace
+
+static MCTargetStreamer *
+createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter *InstPrint,
+ bool isVerboseAsm) {
+ return new SystemZTargetAsmStreamer(S, OS);
+}
+
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+ return new SystemZTargetELFStreamer(S);
+}
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
// Register the MCAsmInfo.
TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
@@ -210,4 +262,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTargetMC() {
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(getTheSystemZTarget(),
createSystemZMCInstPrinter);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(getTheSystemZTarget(),
+ createAsmTargetStreamer);
+
+ // Register the obj streamer
+ TargetRegistry::RegisterObjectTargetStreamer(getTheSystemZTarget(),
+ createObjectTargetStreamer);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 46ccd2129969..defab665f924 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -24,7 +24,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -549,15 +549,17 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
Register SrcReg = MI->getOperand(4).getReg();
int64_t SrcDisp = MI->getOperand(5).getImm();
+ SystemZTargetStreamer *TS = getTargetStreamer();
MCSymbol *DotSym = nullptr;
MCInst ET = MCInstBuilder(TargetInsOpc).addReg(DestReg)
.addImm(DestDisp).addImm(1).addReg(SrcReg).addImm(SrcDisp);
- MCInstSTIPair ET_STI(ET, &MF->getSubtarget());
- EXRLT2SymMap::iterator I = EXRLTargets2Sym.find(ET_STI);
- if (I != EXRLTargets2Sym.end())
+ SystemZTargetStreamer::MCInstSTIPair ET_STI(ET, &MF->getSubtarget());
+ SystemZTargetStreamer::EXRLT2SymMap::iterator I =
+ TS->EXRLTargets2Sym.find(ET_STI);
+ if (I != TS->EXRLTargets2Sym.end())
DotSym = I->second;
else
- EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol();
+ TS->EXRLTargets2Sym[ET_STI] = DotSym = OutContext.createTempSymbol();
const MCSymbolRefExpr *Dot = MCSymbolRefExpr::create(DotSym, OutContext);
EmitToStreamer(
*OutStreamer,
@@ -722,19 +724,6 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
getSubtargetInfo());
}
-void SystemZAsmPrinter::emitEXRLTargetInstructions() {
- if (EXRLTargets2Sym.empty())
- return;
- // Switch to the .text section.
- OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
- for (auto &I : EXRLTargets2Sym) {
- OutStreamer->emitLabel(I.second);
- const MCInstSTIPair &MCI_STI = I.first;
- OutStreamer->emitInstruction(MCI_STI.first, *MCI_STI.second);
- }
- EXRLTargets2Sym.clear();
-}
-
// Convert a SystemZ-specific constant pool modifier into the associated
// MCSymbolRefExpr variant kind.
static MCSymbolRefExpr::VariantKind
@@ -786,14 +775,14 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
const char *ExtraCode,
raw_ostream &OS) {
- SystemZInstPrinter::printAddress(MAI, MI->getOperand(OpNo).getReg(),
- MI->getOperand(OpNo + 1).getImm(),
- MI->getOperand(OpNo + 2).getReg(), OS);
+ SystemZInstPrinter::
+ printAddress(MAI, MI->getOperand(OpNo).getReg(),
+ MCOperand::createImm(MI->getOperand(OpNo + 1).getImm()),
+ MI->getOperand(OpNo + 2).getReg(), OS);
return false;
}
void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
- emitEXRLTargetInstructions();
emitStackMaps(SM);
}
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index 11b731103c17..6cfd7bd4c486 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -11,6 +11,7 @@
#include "SystemZMCInstLower.h"
#include "SystemZTargetMachine.h"
+#include "SystemZTargetStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/MC/MCInstBuilder.h"
@@ -27,32 +28,11 @@ class LLVM_LIBRARY_VISIBILITY SystemZAsmPrinter : public AsmPrinter {
private:
StackMaps SM;
- typedef std::pair<MCInst, const MCSubtargetInfo *> MCInstSTIPair;
- struct CmpMCInst {
- bool operator()(const MCInstSTIPair &MCI_STI_A,
- const MCInstSTIPair &MCI_STI_B) const {
- if (MCI_STI_A.second != MCI_STI_B.second)
- return uintptr_t(MCI_STI_A.second) < uintptr_t(MCI_STI_B.second);
- const MCInst &A = MCI_STI_A.first;
- const MCInst &B = MCI_STI_B.first;
- assert(A.getNumOperands() == B.getNumOperands() &&
- A.getNumOperands() == 5 && A.getOperand(2).getImm() == 1 &&
- B.getOperand(2).getImm() == 1 && "Unexpected EXRL target MCInst");
- if (A.getOpcode() != B.getOpcode())
- return A.getOpcode() < B.getOpcode();
- if (A.getOperand(0).getReg() != B.getOperand(0).getReg())
- return A.getOperand(0).getReg() < B.getOperand(0).getReg();
- if (A.getOperand(1).getImm() != B.getOperand(1).getImm())
- return A.getOperand(1).getImm() < B.getOperand(1).getImm();
- if (A.getOperand(3).getReg() != B.getOperand(3).getReg())
- return A.getOperand(3).getReg() < B.getOperand(3).getReg();
- if (A.getOperand(4).getImm() != B.getOperand(4).getImm())
- return A.getOperand(4).getImm() < B.getOperand(4).getImm();
- return false;
- }
- };
- typedef std::map<MCInstSTIPair, MCSymbol *, CmpMCInst> EXRLT2SymMap;
- EXRLT2SymMap EXRLTargets2Sym;
+ SystemZTargetStreamer *getTargetStreamer() {
+ MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
+ assert(TS && "do not have a target streamer");
+ return static_cast<SystemZTargetStreamer *>(TS);
+ }
public:
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -77,7 +57,6 @@ private:
void LowerFENTRY_CALL(const MachineInstr &MI, SystemZMCInstLower &MCIL);
void LowerSTACKMAP(const MachineInstr &MI);
void LowerPATCHPOINT(const MachineInstr &MI, SystemZMCInstLower &Lower);
- void emitEXRLTargetInstructions();
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
index 86eb8365d527..9c73757d7f5c 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -28,3 +28,7 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = {
const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = {
SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
};
+
+const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = {
+ SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
+ SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31};
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 96c1080d5237..f82c61c0f344 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -27,6 +27,9 @@ namespace SystemZ {
const unsigned XPLINK64NumArgFPRs = 4;
extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs];
+
+ const unsigned XPLINK64NumArgVRs = 8;
+ extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs];
} // end namespace SystemZ
class SystemZCCState : public CCState {
@@ -124,7 +127,9 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
else
llvm_unreachable("Unknown Calling Convention!");
- unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
+ unsigned Offset = Reg && !Subtarget.isTargetXPLINK64()
+ ? 0
+ : State.AllocateStack(8, Align(8));
// Use that same location for all the pending parts.
for (auto &It : PendingMembers) {
@@ -167,12 +172,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- if (LocVT.getSizeInBits() < 128)
- return false;
-
- if (static_cast<SystemZCCState *>(&State)->IsFixed(ValNo))
- return false;
-
// For any C or C++ program, this should always be
// false, since it is illegal to have a function
// where the first argument is variadic. Therefore
@@ -185,21 +184,59 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
bool AllocGPR3 = State.AllocateReg(SystemZ::R3D);
// If GPR2 and GPR3 are available, then we may pass vararg in R2Q.
- if (AllocGPR2 && AllocGPR3) {
- State.addLoc(
- CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
+ // If only GPR3 is available, we need to set custom handling to copy
+ // hi bits into GPR3.
+ // Either way, we allocate on the stack.
+ if (AllocGPR3) {
+ // For f128 and vector var arg case, set the bitcast flag to bitcast to
+ // i128.
+ LocVT = MVT::i128;
+ LocInfo = CCValAssign::BCvt;
+ auto Offset = State.AllocateStack(16, Align(8));
+ if (AllocGPR2)
+ State.addLoc(
+ CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
+ else
+ State.addLoc(
+ CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
- // If only GPR3 is available, we allocate on stack but need to
- // set custom handling to copy hi bits into GPR3.
- if (!AllocGPR2 && AllocGPR3) {
- auto Offset = State.AllocateStack(16, Align(8));
- State.addLoc(
- CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return true;
+ return false;
+}
+
+inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ ArrayRef<MCPhysReg> RegList;
+
+ switch (LocVT.SimpleTy) {
+ case MVT::i64:
+ RegList = SystemZ::XPLINK64ArgGPRs;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ RegList = SystemZ::XPLINK64ArgVRs;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::f128:
+ RegList = SystemZ::XPLINK64ArgFPRs;
+ break;
+ default:
+ return false;
}
+ unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList);
+ // Every time we can allocate a register, allocate on the stack.
+ if (UnallocatedRegisterIndex < RegList.size())
+ State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8));
+
return false;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index 45e22b07be30..373023effb4a 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -162,12 +162,14 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
//===----------------------------------------------------------------------===//
// z/OS XPLINK64 callee-saved registers
//===----------------------------------------------------------------------===//
-def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 8, 15),
- (sequence "F%dD", 8, 15))>;
+// %R7D is volatile by the spec, but it must be saved in the prologue by
+// any non-leaf function and restored in the epilogue for use by the
+// return instruction so it functions exactly like a callee-saved register.
+def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
+ (sequence "F%dD", 15, 8))>;
-def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add (sequence "R%dD", 8, 15),
- (sequence "F%dD", 15, 8),
- (sequence "V%d", 23, 16))>;
+def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,
+ (sequence "V%d", 23, 16))>;
//===----------------------------------------------------------------------===//
// z/OS XPLINK64 return value calling convention
@@ -222,6 +224,17 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// XPLINK64 ABI compliant code widens integral types smaller than i64
// to i64 before placing the parameters either on the stack or in registers.
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+ // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS.
+ CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
+ CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
+ // long double, can only be passed in GPR2 and GPR3, if available,
+ // hence R2Q
+ CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
+ // Non fixed vector arguments are treated in the same way as long
+ // doubles.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
// A SwiftSelf is passed in callee-saved R10.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
@@ -236,7 +249,7 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// The first 3 integer arguments are passed in registers R1D-R3D.
// The rest will be passed in the user area. The address offset of the user
// area can be found in register R4D.
- CCIfType<[i32], CCAssignToReg<[R1L, R2L, R3L]>>,
+ CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>,
CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>,
// The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
@@ -247,34 +260,24 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>,
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCAssignToReg<[V24, V25, V26, V27,
V28, V29, V30, V31]>>>>,
// The first 4 named float and double arguments are passed in registers FPR0-FPR6.
// The rest will be passed in the user area.
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
+ CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>,
CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>,
// The first 2 long double arguments are passed in register FPR0/FPR2
// and FPR4/FPR6. The rest will be passed in the user area.
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
+ CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>,
- // Non fixed floats are passed in GPRs
- // Promote f32 to f64, if it needs to be passed in GPRs.
- CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
- // Assign f64 varargs to their proper GPRs.
- CCIfType<[f64], CCIfNotFixed<CCAssignToReg<[R1D, R2D, R3D]>>>,
- // long double, can only be passed in GPR2 and GPR3, if available,
- // hence R2Q
- CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
-
- // Non fixed vector arguments are treated in the same way as long
- // doubles.
- CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
-
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots.
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 19b703bbb226..ac94570e568f 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -571,10 +571,9 @@ bool SystemZElimCompare::optimizeCompareZero(
// Also do a forward search to handle cases where an instruction after the
// compare can be converted, like
// LTEBRCompare %f0s, %f0s; %f2s = LER %f0s => LTEBRCompare %f2s, %f0s
- for (MachineBasicBlock::iterator MBBI =
- std::next(MachineBasicBlock::iterator(&Compare)), MBBE = MBB.end();
- MBBI != MBBE;) {
- MachineInstr &MI = *MBBI++;
+ auto MIRange = llvm::make_range(
+ std::next(MachineBasicBlock::iterator(&Compare)), MBB.end());
+ for (MachineInstr &MI : llvm::make_early_inc_range(MIRange)) {
if (preservesValueOf(MI, SrcReg)) {
// Try to eliminate Compare by reusing a CC result from MI.
if (convertToLoadAndTest(MI, Compare, CCUsers)) {
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index d2f6ff96158d..d11d118fb8ee 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
namespace {
// The ABI-defined register save slots, relative to the CFA (i.e.
// incoming stack pointer + SystemZMC::ELFCallFrameSize).
-static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
+static const TargetFrameLowering::SpillSlot ELFSpillOffsetTable[] = {
{ SystemZ::R2D, 0x10 },
{ SystemZ::R3D, 0x18 },
{ SystemZ::R4D, 0x20 },
@@ -44,29 +44,55 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
{ SystemZ::F4D, 0x90 },
{ SystemZ::F6D, 0x98 }
};
+
+static const TargetFrameLowering::SpillSlot XPLINKSpillOffsetTable[] = {
+ {SystemZ::R4D, 0x00}, {SystemZ::R5D, 0x08}, {SystemZ::R6D, 0x10},
+ {SystemZ::R7D, 0x18}, {SystemZ::R8D, 0x20}, {SystemZ::R9D, 0x28},
+ {SystemZ::R10D, 0x30}, {SystemZ::R11D, 0x38}, {SystemZ::R12D, 0x40},
+ {SystemZ::R13D, 0x48}, {SystemZ::R14D, 0x50}, {SystemZ::R15D, 0x58}};
} // end anonymous namespace
-SystemZFrameLowering::SystemZFrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8),
- 0, Align(8), false /* StackRealignable */),
- RegSpillOffsets(0) {
- // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
- // equal to the incoming stack pointer, but to incoming stack pointer plus
- // 160. Instead of using a Local Area Offset, the Register save area will
- // be occupied by fixed frame objects, and all offsets are actually
- // relative to CFA.
+SystemZFrameLowering::SystemZFrameLowering(StackDirection D, Align StackAl,
+ int LAO, Align TransAl,
+ bool StackReal)
+ : TargetFrameLowering(D, StackAl, LAO, TransAl, StackReal) {}
- // Create a mapping from register number to save slot offset.
- // These offsets are relative to the start of the register save area.
- RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
- for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
- RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset;
+std::unique_ptr<SystemZFrameLowering>
+SystemZFrameLowering::create(const SystemZSubtarget &STI) {
+ if (STI.isTargetXPLINK64())
+ return std::make_unique<SystemZXPLINKFrameLowering>();
+ return std::make_unique<SystemZELFFrameLowering>();
}
-bool SystemZFrameLowering::
-assignCalleeSavedSpillSlots(MachineFunction &MF,
- const TargetRegisterInfo *TRI,
- std::vector<CalleeSavedInfo> &CSI) const {
+MachineBasicBlock::iterator SystemZFrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ switch (MI->getOpcode()) {
+ case SystemZ::ADJCALLSTACKDOWN:
+ case SystemZ::ADJCALLSTACKUP:
+ assert(hasReservedCallFrame(MF) &&
+ "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
+ return MBB.erase(MI);
+ break;
+
+ default:
+ llvm_unreachable("Unexpected call frame instruction");
+ }
+}
+
+bool SystemZFrameLowering::hasReservedCallFrame(
+ const MachineFunction &MF) const {
+ // The ELF ABI requires us to allocate 160 bytes of stack space for the
+ // callee, with any outgoing stack arguments being placed above that. It
+ // seems better to make that area a permanent feature of the frame even if
+ // we're using a frame pointer. Similarly, 64-bit XPLINK requires 96 bytes
+ // of stack space for the register save area.
+ return true;
+}
+
+bool SystemZELFFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
bool IsVarArg = MF.getFunction().isVarArg();
@@ -130,9 +156,9 @@ assignCalleeSavedSpillSlots(MachineFunction &MF,
return true;
}
-void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
- BitVector &SavedRegs,
- RegScavenger *RS) const {
+void SystemZELFFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
MachineFrameInfo &MFFrame = MF.getFrameInfo();
@@ -179,6 +205,24 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
+SystemZELFFrameLowering::SystemZELFFrameLowering()
+ : SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), 0,
+ Align(8), /* StackRealignable */ false),
+ RegSpillOffsets(0) {
+
+ // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not
+ // equal to the incoming stack pointer, but to incoming stack pointer plus
+ // 160. Instead of using a Local Area Offset, the Register save area will
+ // be occupied by fixed frame objects, and all offsets are actually
+ // relative to CFA.
+
+ // Create a mapping from register number to save slot offset.
+ // These offsets are relative to the start of the register save area.
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+ for (unsigned I = 0, E = array_lengthof(ELFSpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[ELFSpillOffsetTable[I].Reg] = ELFSpillOffsetTable[I].Offset;
+}
+
// Add GPR64 to the save instruction being built by MIB, which is in basic
// block MBB. IsImplicit says whether this is an explicit operand to the
// instruction, or an implicit one that comes between the explicit start
@@ -196,7 +240,7 @@ static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB,
}
}
-bool SystemZFrameLowering::spillCalleeSavedRegisters(
+bool SystemZELFFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
@@ -256,7 +300,7 @@ bool SystemZFrameLowering::spillCalleeSavedRegisters(
return true;
}
-bool SystemZFrameLowering::restoreCalleeSavedRegisters(
+bool SystemZELFFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
if (CSI.empty())
@@ -312,9 +356,8 @@ bool SystemZFrameLowering::restoreCalleeSavedRegisters(
return true;
}
-void SystemZFrameLowering::
-processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS) const {
+void SystemZELFFrameLowering::processFunctionBeforeFrameFinalized(
+ MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineRegisterInfo *MRI = &MF.getRegInfo();
@@ -410,8 +453,8 @@ static void buildDefCFAReg(MachineBasicBlock &MBB,
.addCFIIndex(CFIIndex);
}
-void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+void SystemZELFFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
const SystemZTargetLowering &TLI = *STI.getTargetLowering();
@@ -530,8 +573,8 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R11 as live on entry when
// saving the GPRs.)
- for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
- I->addLiveIn(SystemZ::R11D);
+ for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF))
+ MBBJ.addLiveIn(SystemZ::R11D);
}
// Skip over the FPR/VR saves.
@@ -573,15 +616,15 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
}
}
-void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
+void SystemZELFFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
auto *ZII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
- // See SystemZFrameLowering::emitPrologue
+ // See SystemZELFFrameLowering::emitPrologue
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
return;
@@ -619,8 +662,8 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
- MachineBasicBlock &PrologMBB) const {
+void SystemZELFFrameLowering::inlineStackProbe(
+ MachineFunction &MF, MachineBasicBlock &PrologMBB) const {
auto *ZII =
static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
const SystemZSubtarget &STI = MF.getSubtarget<SystemZSubtarget>();
@@ -719,24 +762,14 @@ void SystemZFrameLowering::inlineStackProbe(MachineFunction &MF,
}
}
-bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const {
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
MF.getFrameInfo().hasVarSizedObjects() ||
MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
}
-bool
-SystemZFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- // The ABI requires us to allocate 160 bytes of stack space for the callee,
- // with any outgoing stack arguments being placed above that. It seems
- // better to make that area a permanent feature of the frame even if
- // we're using a frame pointer.
- return true;
-}
-
-StackOffset
-SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
- Register &FrameReg) const {
+StackOffset SystemZELFFrameLowering::getFrameIndexReference(
+ const MachineFunction &MF, int FI, Register &FrameReg) const {
// Our incoming SP is actually SystemZMC::ELFCallFrameSize below the CFA, so
// add that difference here.
StackOffset Offset =
@@ -744,25 +777,8 @@ SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset + StackOffset::getFixed(SystemZMC::ELFCallFrameSize);
}
-MachineBasicBlock::iterator SystemZFrameLowering::
-eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- switch (MI->getOpcode()) {
- case SystemZ::ADJCALLSTACKDOWN:
- case SystemZ::ADJCALLSTACKUP:
- assert(hasReservedCallFrame(MF) &&
- "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
- return MBB.erase(MI);
- break;
-
- default:
- llvm_unreachable("Unexpected call frame instruction");
- }
-}
-
-unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF,
- Register Reg) const {
+unsigned SystemZELFFrameLowering::getRegSpillOffset(MachineFunction &MF,
+ Register Reg) const {
bool IsVarArg = MF.getFunction().isVarArg();
bool BackChain = MF.getFunction().hasFnAttribute("backchain");
bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
@@ -778,8 +794,8 @@ unsigned SystemZFrameLowering::getRegSpillOffset(MachineFunction &MF,
return Offset;
}
-int SystemZFrameLowering::
-getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
+int SystemZELFFrameLowering::getOrCreateFramePointerSaveIndex(
+ MachineFunction &MF) const {
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
int FI = ZFI->getFramePointerSaveIndex();
if (!FI) {
@@ -791,7 +807,7 @@ getOrCreateFramePointerSaveIndex(MachineFunction &MF) const {
return FI;
}
-bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const {
+bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack");
bool BackChain = MF.getFunction().hasFnAttribute("backchain");
bool SoftFloat = MF.getSubtarget<SystemZSubtarget>().hasSoftFloat();
@@ -800,3 +816,186 @@ bool SystemZFrameLowering::usePackedStack(MachineFunction &MF) const {
bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC;
return HasPackedStackAttr && CallConv;
}
+
+SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
+ : SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128,
+ Align(32), /* StackRealignable */ false),
+ RegSpillOffsets(-1) {
+
+ // Create a mapping from register number to save slot offset.
+ // These offsets are relative to the start of the local are area.
+ RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+ for (unsigned I = 0, E = array_lengthof(XPLINKSpillOffsetTable); I != E; ++I)
+ RegSpillOffsets[XPLINKSpillOffsetTable[I].Reg] =
+ XPLINKSpillOffsetTable[I].Offset;
+}
+
+bool SystemZXPLINKFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ MachineFrameInfo &MFFrame = MF.getFrameInfo();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // Scan the call-saved GPRs and find the bounds of the register spill area.
+ unsigned LowGPR = 0;
+ int LowOffset = INT32_MAX;
+ unsigned HighGPR = LowGPR;
+ int HighOffset = -1;
+
+ unsigned RegSP = Regs.getStackPointerRegister();
+ auto &GRRegClass = SystemZ::GR64BitRegClass;
+ const unsigned RegSize = 8;
+
+ auto ProcessCSI = [&](std::vector<CalleeSavedInfo> &CSIList) {
+ for (auto &CS : CSIList) {
+ unsigned Reg = CS.getReg();
+ int Offset = RegSpillOffsets[Reg];
+ if (Offset >= 0) {
+ if (GRRegClass.contains(Reg)) {
+ if (LowOffset > Offset) {
+ LowOffset = Offset;
+ LowGPR = Reg;
+ }
+
+ if (Offset > HighOffset) {
+ HighOffset = Offset;
+ HighGPR = Reg;
+ }
+ }
+ int FrameIdx = MFFrame.CreateFixedSpillStackObject(RegSize, Offset);
+ CS.setFrameIdx(FrameIdx);
+ } else
+ CS.setFrameIdx(INT32_MAX);
+ }
+ };
+
+ std::vector<CalleeSavedInfo> Spills;
+
+ // For non-leaf functions:
+ // - the address of callee (entry point) register R6 must be saved
+ Spills.push_back(CalleeSavedInfo(Regs.getAddressOfCalleeRegister()));
+
+ // If the function needs a frame pointer, or if the backchain pointer should
+ // be stored, then save the stack pointer register R4.
+ if (hasFP(MF) || MF.getFunction().hasFnAttribute("backchain"))
+ Spills.push_back(CalleeSavedInfo(RegSP));
+
+ // Save the range of call-saved registers, for use by the
+ // prologue/epilogue inserters.
+ ProcessCSI(CSI);
+ MFI->setRestoreGPRRegs(LowGPR, HighGPR, LowOffset);
+
+ // Save the range of call-saved registers, for use by the epilogue inserter.
+ ProcessCSI(Spills);
+ MFI->setSpillGPRRegs(LowGPR, HighGPR, LowOffset);
+
+ // Create spill slots for the remaining registers.
+ for (auto &CS : CSI) {
+ if (CS.getFrameIdx() != INT32_MAX)
+ continue;
+ unsigned Reg = CS.getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ Align Alignment = TRI->getSpillAlign(*RC);
+ unsigned Size = TRI->getSpillSize(*RC);
+ Alignment = std::min(Alignment, getStackAlign());
+ int FrameIdx = MFFrame.CreateStackObject(Size, Alignment, true);
+ CS.setFrameIdx(FrameIdx);
+ }
+
+ return true;
+}
+
+void SystemZXPLINKFrameLowering::determineCalleeSaves(MachineFunction &MF,
+ BitVector &SavedRegs,
+ RegScavenger *RS) const {
+ TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ bool HasFP = hasFP(MF);
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+
+ // If the function requires a frame pointer, record that the hard
+ // frame pointer will be clobbered.
+ if (HasFP)
+ SavedRegs.set(Regs.getFramePointerRegister());
+
+ // If the function is not an XPLeaf function, we need to save the
+ // return address register. We also always use that register for
+ // the return instruction, so it needs to be restored in the
+ // epilogue even though that register is considered to be volatile.
+ // #TODO: Implement leaf detection.
+ SavedRegs.set(Regs.getReturnFunctionAddressRegister());
+}
+
+bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return true;
+
+ MachineFunction &MF = *MBB.getParent();
+ SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
+ SystemZ::GPRRegs SpillGPRs = ZFI->getSpillGPRRegs();
+ DebugLoc DL;
+
+ // Save GPRs
+ if (SpillGPRs.LowGPR) {
+ assert(SpillGPRs.LowGPR != SpillGPRs.HighGPR &&
+ "Should be saving multiple registers");
+
+ // Build an STM/STMG instruction.
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG));
+
+ // Add the explicit register operands.
+ addSavedGPR(MBB, MIB, SpillGPRs.LowGPR, false);
+ addSavedGPR(MBB, MIB, SpillGPRs.HighGPR, false);
+
+ // Add the address r4
+ MIB.addReg(Regs.getStackPointerRegister());
+
+ // Add the partial offset
+ // We cannot add the actual offset as, at the stack is not finalized
+ MIB.addImm(SpillGPRs.GPROffset);
+
+ // Make sure all call-saved GPRs are included as operands and are
+ // marked as live on entry.
+ auto &GRRegClass = SystemZ::GR64BitRegClass;
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (GRRegClass.contains(Reg))
+ addSavedGPR(MBB, MIB, Reg, true);
+ }
+ }
+
+ // Spill FPRs to the stack in the normal TargetInstrInfo way
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (SystemZ::FP64BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::FP64BitRegClass, TRI);
+ }
+ if (SystemZ::VR128BitRegClass.contains(Reg)) {
+ MBB.addLiveIn(Reg);
+ TII->storeRegToStackSlot(MBB, MBBI, Reg, true, CSI[I].getFrameIdx(),
+ &SystemZ::VR128BitRegClass, TRI);
+ }
+ }
+
+ return true;
+}
+
+void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {}
+
+void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {}
+
+bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
+ return false;
+}
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
index c8312b836e57..6fddb4f81c41 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -10,6 +10,8 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZFRAMELOWERING_H
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZMachineFunctionInfo.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/Support/TypeSize.h"
@@ -19,10 +21,26 @@ class SystemZTargetMachine;
class SystemZSubtarget;
class SystemZFrameLowering : public TargetFrameLowering {
+public:
+ SystemZFrameLowering(StackDirection D, Align StackAl, int LAO, Align TransAl,
+ bool StackReal);
+
+ static std::unique_ptr<SystemZFrameLowering>
+ create(const SystemZSubtarget &STI);
+
+ // Override TargetFrameLowering.
+ bool isFPCloseToIncomingSP() const override { return false; }
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ MachineBasicBlock::iterator
+ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+};
+
+class SystemZELFFrameLowering : public SystemZFrameLowering {
IndexedMap<unsigned> RegSpillOffsets;
public:
- SystemZFrameLowering();
+ SystemZELFFrameLowering();
// Override TargetFrameLowering.
bool isFPCloseToIncomingSP() const override { return false; }
@@ -48,21 +66,14 @@ public:
void inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const override;
bool hasFP(const MachineFunction &MF) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
- MachineBasicBlock::iterator
- eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const override;
// Return the byte offset from the incoming stack pointer of Reg's
// ABI-defined save slot. Return 0 if no slot is defined for Reg. Adjust
// the offset in case MF has packed-stack.
unsigned getRegSpillOffset(MachineFunction &MF, Register Reg) const;
- // Get or create the frame index of where the old frame pointer is stored.
- int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
-
bool usePackedStack(MachineFunction &MF) const;
// Return the offset of the backchain.
@@ -70,6 +81,35 @@ public:
// The back chain is stored topmost with packed-stack.
return usePackedStack(MF) ? SystemZMC::ELFCallFrameSize - 8 : 0;
}
+
+ // Get or create the frame index of where the old frame pointer is stored.
+ int getOrCreateFramePointerSaveIndex(MachineFunction &MF) const;
+};
+
+class SystemZXPLINKFrameLowering : public SystemZFrameLowering {
+ IndexedMap<unsigned> RegSpillOffsets;
+
+public:
+ SystemZXPLINKFrameLowering();
+
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
+ void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
+ RegScavenger *RS) const override;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ ArrayRef<CalleeSavedInfo> CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index d70d48638b14..71432218068e 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -82,6 +82,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
: TargetLowering(TM), Subtarget(STI) {
MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
+ auto *Regs = STI.getSpecialRegisters();
+
// Set up the register classes.
if (Subtarget.hasHighWord())
addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
@@ -115,7 +117,7 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
computeRegisterProperties(Subtarget.getRegisterInfo());
// Set up special registers.
- setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+ setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
// TODO: It may be better to default to latency-oriented scheduling, however
// LLVM's current latency-oriented scheduler can't handle physreg definitions
@@ -293,6 +295,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ // Handle bitcast from fp128 to i128.
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+
// We have native instructions for i8, i16 and i32 extensions, but not i1.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
for (MVT VT : MVT::integer_valuetypes()) {
@@ -1353,14 +1358,21 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
- case CCValAssign::BCvt:
- // If this is a short vector argument to be stored to the stack,
+ case CCValAssign::BCvt: {
+ assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
+ assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
+ VA.getValVT() == MVT::f128);
+ MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
+ ? MVT::v2i64
+ : VA.getLocVT();
+ Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
+ // For ELF, this is a short vector argument to be stored to the stack,
// bitcast to v2i64 and then extract first element.
- assert(VA.getLocVT() == MVT::i64);
- assert(VA.getValVT().isVector());
- Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
- DAG.getConstant(0, DL, MVT::i32));
+ if (BitCastToType == MVT::v2i64)
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+ DAG.getConstant(0, DL, MVT::i32));
+ return Value;
+ }
case CCValAssign::Full:
return Value;
default:
@@ -1426,8 +1438,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
MachineRegisterInfo &MRI = MF.getRegInfo();
SystemZMachineFunctionInfo *FuncInfo =
MF.getInfo<SystemZMachineFunctionInfo>();
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
// Detect unsupported vector argument types.
@@ -1468,6 +1479,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
NumFixedFPRs += 1;
RC = &SystemZ::FP64BitRegClass;
break;
+ case MVT::f128:
+ NumFixedFPRs += 2;
+ RC = &SystemZ::FP128BitRegClass;
+ break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@@ -1521,7 +1536,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
}
- if (IsVarArg) {
+ // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
+ if (IsVarArg && Subtarget.isTargetELF()) {
// Save the number of non-varargs registers for later use by va_start, etc.
FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
@@ -1560,6 +1576,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
}
}
+ // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
+ // register (R5)
return Chain;
}
@@ -1600,6 +1618,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy(MF.getDataLayout());
LLVMContext &Ctx = *DAG.getContext();
+ SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
+
+ // FIXME: z/OS support to be added in later.
+ if (Subtarget.isTargetXPLINK64())
+ IsTailCall = false;
// Detect unsupported vector argument and return types.
if (Subtarget.hasVector()) {
@@ -1620,6 +1643,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+ if (Subtarget.isTargetXPLINK64())
+ // Although the XPLINK specifications for AMODE64 state that minimum size
+ // of the param area is minimum 32 bytes and no rounding is otherwise
+ // specified, we round this area in 64 bytes increments to be compatible
+ // with existing compilers.
+ NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
+
// Mark the start of the call.
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
@@ -1670,17 +1700,24 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
- if (VA.isRegLoc())
+ if (VA.isRegLoc()) {
+ // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
+ // MVT::i128 type. We decompose the 128-bit type to a pair of its high
+ // and low values.
+ if (VA.getLocVT() == MVT::i128)
+ ArgValue = lowerI128ToGR128(DAG, ArgValue);
// Queue up the argument copies and emit them at the end.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
- else {
+ } else {
assert(VA.isMemLoc() && "Argument not register or memory");
// Work out the address of the stack slot. Unpromoted ints and
// floats are passed as right-justified 8-byte values.
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
- unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
+ StackPtr = DAG.getCopyFromReg(Chain, DL,
+ Regs->getStackPointerRegister(), PtrVT);
+ unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
+ VA.getLocMemOffset();
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
Offset += 4;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
@@ -1689,6 +1726,17 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
+
+ // Although long doubles or vectors are passed through the stack when
+ // they are vararg (non-fixed arguments), if a long double or vector
+ // occupies the third and fourth slot of the argument list GPR3 should
+ // still shadow the third slot of the argument list.
+ if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
+ SDValue ShadowArgValue =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
+ DAG.getIntPtrConstant(1, DL));
+ RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
+ }
}
}
@@ -1700,6 +1748,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// associated Target* opcodes. Force %r1 to be used for indirect
// tail calls.
SDValue Glue;
+ // FIXME: Add support for XPLINK using the ADA register.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
@@ -2282,8 +2331,7 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
Comparison &C) {
if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
C.CCMask == SystemZ::CCMASK_CMP_NE) {
- for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
- SDNode *N = *I;
+ for (SDNode *N : C.Op0->uses()) {
if (N->getOpcode() == ISD::SUB &&
((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
(N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
@@ -2306,8 +2354,7 @@ static void adjustForFNeg(Comparison &C) {
return;
auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
if (C1 && C1->isZero()) {
- for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
- SDNode *N = *I;
+ for (SDNode *N : C.Op0->uses()) {
if (N->getOpcode() == ISD::FNEG) {
C.Op0 = SDValue(N, 0);
C.CCMask = SystemZ::reverseCCMask(C.CCMask);
@@ -2333,8 +2380,7 @@ static void adjustForLTGFR(Comparison &C) {
if (C1 && C1->getZExtValue() == 32) {
SDValue ShlOp0 = C.Op0.getOperand(0);
// See whether X has any SIGN_EXTEND_INREG uses.
- for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
- SDNode *N = *I;
+ for (SDNode *N : ShlOp0->uses()) {
if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
C.Op0 = SDValue(N, 0);
@@ -3320,8 +3366,7 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setFrameAddressIsTaken(true);
@@ -4139,17 +4184,21 @@ SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+ auto *Regs = Subtarget->getSpecialRegisters();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
report_fatal_error("Variable-sized stack allocations are not supported "
"in GHC calling convention");
return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
- SystemZ::R15D, Op.getValueType());
+ Regs->getStackPointerRegister(), Op.getValueType());
}
SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
+ auto *Regs = Subtarget->getSpecialRegisters();
MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
@@ -4163,12 +4212,13 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SDLoc DL(Op);
if (StoreBackchain) {
- SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, MVT::i64);
+ SDValue OldSP = DAG.getCopyFromReg(
+ Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
MachinePointerInfo());
}
- Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R15D, NewSP);
+ Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
if (StoreBackchain)
Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
@@ -5589,6 +5639,32 @@ SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
Results.push_back(Res.getValue(2));
break;
}
+ case ISD::BITCAST: {
+ SDValue Src = N->getOperand(0);
+ if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
+ !useSoftFloat()) {
+ SDLoc DL(N);
+ SDValue Lo, Hi;
+ if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
+ SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
+ DAG.getConstant(1, DL, MVT::i32));
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
+ DAG.getConstant(0, DL, MVT::i32));
+ } else {
+ assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
+ "Unrecognized register class for f128.");
+ SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
+ DL, MVT::f64, Src);
+ SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
+ DL, MVT::f64, Src);
+ Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
+ Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
+ }
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
+ }
+ break;
+ }
default:
llvm_unreachable("Unexpected node to lower");
}
@@ -5634,15 +5710,10 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(SUBCARRY);
OPCODE(GET_CCMASK);
OPCODE(MVC);
- OPCODE(MVC_LOOP);
OPCODE(NC);
- OPCODE(NC_LOOP);
OPCODE(OC);
- OPCODE(OC_LOOP);
OPCODE(XC);
- OPCODE(XC_LOOP);
OPCODE(CLC);
- OPCODE(CLC_LOOP);
OPCODE(STPCPY);
OPCODE(STRCMP);
OPCODE(SEARCH_STRING);
@@ -7071,13 +7142,19 @@ SystemZTargetLowering::getStackProbeSize(MachineFunction &MF) const {
// Force base value Base into a register before MI. Return the register.
static Register forceReg(MachineInstr &MI, MachineOperand &Base,
const SystemZInstrInfo *TII) {
- if (Base.isReg())
- return Base.getReg();
-
MachineBasicBlock *MBB = MI.getParent();
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (Base.isReg()) {
+ // Copy Base into a new virtual register to help register coalescing in
+ // cases with multiple uses.
+ Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
+ BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
+ .add(Base);
+ return Reg;
+ }
+
Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
.add(Base)
@@ -7103,8 +7180,8 @@ static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
// If we hit the end of the block, check whether CC is live into a
// successor.
if (miI == MBB->end()) {
- for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
- if ((*SI)->isLiveIn(SystemZ::CC))
+ for (const MachineBasicBlock *Succ : MBB->successors())
+ if (Succ->isLiveIn(SystemZ::CC))
return false;
}
@@ -7796,26 +7873,67 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
MachineOperand SrcBase = earlyUseOperand(MI.getOperand(2));
uint64_t SrcDisp = MI.getOperand(3).getImm();
MachineOperand &LengthMO = MI.getOperand(4);
- uint64_t ImmLength = LengthMO.isImm() ? LengthMO.getImm() : 0;
- Register LenMinus1Reg =
- LengthMO.isReg() ? LengthMO.getReg() : SystemZ::NoRegister;
+ bool IsImmForm = LengthMO.isImm();
+ bool IsRegForm = !IsImmForm;
+
+ bool NeedsLoop = false;
+ uint64_t ImmLength = 0;
+ Register LenMinus1Reg = SystemZ::NoRegister;
+ if (IsImmForm) {
+ ImmLength = LengthMO.getImm();
+ ImmLength++; // Add back the '1' subtracted originally.
+ if (ImmLength == 0) {
+ MI.eraseFromParent();
+ return MBB;
+ }
+ if (Opcode == SystemZ::CLC) {
+ if (ImmLength > 3 * 256)
+ // A two-CLC sequence is a clear win over a loop, not least because
+ // it needs only one branch. A three-CLC sequence needs the same
+ // number of branches as a loop (i.e. 2), but is shorter. That
+ // brings us to lengths greater than 768 bytes. It seems relatively
+ // likely that a difference will be found within the first 768 bytes,
+ // so we just optimize for the smallest number of branch
+ // instructions, in order to avoid polluting the prediction buffer
+ // too much.
+ NeedsLoop = true;
+ } else if (ImmLength > 6 * 256)
+ // The heuristic we use is to prefer loops for anything that would
+ // require 7 or more MVCs. With these kinds of sizes there isn't much
+ // to choose between straight-line code and looping code, since the
+ // time will be dominated by the MVCs themselves.
+ NeedsLoop = true;
+ } else {
+ NeedsLoop = true;
+ LenMinus1Reg = LengthMO.getReg();
+ }
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
- MachineBasicBlock *EndMBB = (ImmLength > 256 && Opcode == SystemZ::CLC
- ? SystemZ::splitBlockAfter(MI, MBB)
- : nullptr);
-
- // Check for the loop form, in which operand 5 is the trip count.
- if (MI.getNumExplicitOperands() > 5) {
- Register StartCountReg = MI.getOperand(5).getReg();
- bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
+ MachineBasicBlock *EndMBB =
+ (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
+ ? SystemZ::splitBlockAfter(MI, MBB)
+ : nullptr);
+
+ if (NeedsLoop) {
+ Register StartCountReg =
+ MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+ if (IsImmForm) {
+ TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
+ ImmLength &= 255;
+ } else {
+ BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
+ .addReg(LenMinus1Reg)
+ .addReg(0)
+ .addImm(8);
+ }
auto loadZeroAddress = [&]() -> MachineOperand {
Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
return MachineOperand::CreateReg(Reg, false);
};
+ bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
DestBase = loadZeroAddress();
if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
@@ -7842,12 +7960,12 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
Register ThisCountReg = MRI.createVirtualRegister(RC);
Register NextCountReg = MRI.createVirtualRegister(RC);
- if (LengthMO.isReg()) {
+ if (IsRegForm) {
AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
StartMBB = SystemZ::emitBlockAfter(MBB);
LoopMBB = SystemZ::emitBlockAfter(StartMBB);
- NextMBB = LoopMBB;
- DoneMBB = SystemZ::emitBlockAfter(LoopMBB);
+ NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
+ DoneMBB = SystemZ::emitBlockAfter(NextMBB);
// MBB:
// # Jump to AllDoneMBB if LenMinus1Reg is -1, or fall thru to StartMBB.
@@ -7882,7 +8000,6 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
DestBase = MachineOperand::CreateReg(NextDestReg, false);
SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
- ImmLength &= 255;
if (EndMBB && !ImmLength)
// If the loop handled the whole CLC range, DoneMBB will be empty with
// CC live-through into EndMBB, so add it as live-in.
@@ -7953,7 +8070,7 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
MBB->addSuccessor(DoneMBB);
MBB = DoneMBB;
- if (LengthMO.isReg()) {
+ if (IsRegForm) {
// DoneMBB:
// # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
// # Use EXecute Relative Long for the remainder of the bytes. The target
@@ -7966,19 +8083,23 @@ MachineBasicBlock *SystemZTargetLowering::emitMemMemWrapper(
: MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
.addReg(StartDestReg).addMBB(StartMBB)
- .addReg(NextDestReg).addMBB(LoopMBB);
+ .addReg(NextDestReg).addMBB(NextMBB);
if (!HaveSingleBase)
BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
.addReg(StartSrcReg).addMBB(StartMBB)
- .addReg(NextSrcReg).addMBB(LoopMBB);
- MRI.constrainRegClass(LenMinus1Reg, &SystemZ::ADDR64BitRegClass);
- BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
- .addImm(Opcode)
- .addReg(LenMinus1Reg)
- .addReg(RemDestReg).addImm(DestDisp)
- .addReg(RemSrcReg).addImm(SrcDisp);
+ .addReg(NextSrcReg).addMBB(NextMBB);
+ MachineInstrBuilder EXRL_MIB =
+ BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
+ .addImm(Opcode)
+ .addReg(LenMinus1Reg)
+ .addReg(RemDestReg).addImm(DestDisp)
+ .addReg(RemSrcReg).addImm(SrcDisp);
MBB->addSuccessor(AllDoneMBB);
MBB = AllDoneMBB;
+ if (EndMBB) {
+ EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
+ MBB->addLiveIn(SystemZ::CC);
+ }
}
}
@@ -8264,8 +8385,7 @@ MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
SDValue SystemZTargetLowering::
getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- auto *TFL =
- static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
+ auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
SDLoc DL(SP);
return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
@@ -8497,21 +8617,18 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
case SystemZ::ATOMIC_CMP_SWAPW:
return emitAtomicCmpSwapW(MI, MBB);
- case SystemZ::MVCSequence:
- case SystemZ::MVCLoop:
+ case SystemZ::MVCImm:
+ case SystemZ::MVCReg:
return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
- case SystemZ::NCSequence:
- case SystemZ::NCLoop:
+ case SystemZ::NCImm:
return emitMemMemWrapper(MI, MBB, SystemZ::NC);
- case SystemZ::OCSequence:
- case SystemZ::OCLoop:
+ case SystemZ::OCImm:
return emitMemMemWrapper(MI, MBB, SystemZ::OC);
- case SystemZ::XCSequence:
- case SystemZ::XCLoop:
- case SystemZ::XCLoopVarLen:
+ case SystemZ::XCImm:
+ case SystemZ::XCReg:
return emitMemMemWrapper(MI, MBB, SystemZ::XC);
- case SystemZ::CLCSequence:
- case SystemZ::CLCLoop:
+ case SystemZ::CLCImm:
+ case SystemZ::CLCReg:
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
case SystemZ::CLSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::CLST);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 248efc11b87f..461f804ca55e 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -117,23 +117,14 @@ enum NodeType : unsigned {
// MachineMemOperands rather than one.
MVC,
- // Like MVC, but implemented as a loop that handles X*256 bytes
- // followed by straight-line code to handle the rest (if any).
- // The value of X is passed as an additional operand.
- MVC_LOOP,
-
- // Similar to MVC and MVC_LOOP, but for logic operations (AND, OR, XOR).
+ // Similar to MVC, but for logic operations (AND, OR, XOR).
NC,
- NC_LOOP,
OC,
- OC_LOOP,
XC,
- XC_LOOP,
// Use CLC to compare two blocks of memory, with the same comments
- // as for MVC and MVC_LOOP.
+ // as for MVC.
CLC,
- CLC_LOOP,
// Use an MVST-based sequence to implement stpcpy().
STPCPY,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 337164d55e5f..7cbe125533d3 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -128,9 +128,10 @@ let Predicates = [FeatureNoVectorEnhancements1] in {
(EXTRACT_SUBREG FP128:$src2, subreg_h64))>;
}
-defm LoadStoreF32 : MVCLoadStore<load, f32, MVCSequence, 4>;
-defm LoadStoreF64 : MVCLoadStore<load, f64, MVCSequence, 8>;
-defm LoadStoreF128 : MVCLoadStore<load, f128, MVCSequence, 16>;
+// The length is given as one less for MVCImm.
+defm LoadStoreF32 : MVCLoadStore<load, f32, MVCImm, 3>;
+defm LoadStoreF64 : MVCLoadStore<load, f64, MVCImm, 7>;
+defm LoadStoreF128 : MVCLoadStore<load, f128, MVCImm, 15>;
//===----------------------------------------------------------------------===//
// Load instructions
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 5cb46cdb36a6..cd60fff1ab11 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -5329,42 +5329,37 @@ multiclass CondUnaryRSYPseudoAndMemFold<string mnemonic,
// Define an instruction that operates on two fixed-length blocks of memory,
// and associated pseudo instructions for operating on blocks of any size.
-// The Sequence form uses a straight-line sequence of instructions and
-// the Loop form uses a loop of length-256 instructions followed by
-// another instruction to handle the excess.
-// The LoopVarLen form is for a loop with a non-constant length parameter.
-multiclass MemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
+// There are two pseudos for the different cases of when the length is
+// constant or variable. The length operand of a pseudo is actually one less
+// than the intended number of bytes, since the register case needs to use an
+// EXRL with a target instruction that adds one to the length always.
+multiclass MemorySS<string mnemonic, bits<8> opcode, SDPatternOperator memop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length)]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256)]>;
- def LoopVarLen : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- GR64:$length, GR64:$count256),
- [(loop bdaddr12only:$dest, bdaddr12only:$src,
- GR64:$length, GR64:$count256)]>;
+ def Imm : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(memop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length)]>;
+ def Reg : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length),
+ [(memop bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length)]>;
}
}
// The same, but setting a CC result as comparison operator.
multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
- SDPatternOperator sequence, SDPatternOperator loop> {
+ SDPatternOperator memop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
- def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length),
- [(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length))]>;
- def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256),
- [(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
- imm64:$length, GR64:$count256))]>;
+ def Imm : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length),
+ [(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src,
+ imm64:$length))]>;
+ def Reg : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length),
+ [(set CC, (memop bdaddr12only:$dest, bdaddr12only:$src,
+ ADDR64:$length))]>;
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index b9f64198f4e5..2bf80882fa61 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -514,8 +515,8 @@ unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,
}
bool SystemZInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask,
- int &Value) const {
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const {
assert(MI.isCompare() && "Caller should have checked for a comparison");
if (MI.getNumExplicitOperands() == 2 && MI.getOperand(0).isReg() &&
@@ -942,8 +943,9 @@ static void transferMIFlag(MachineInstr *OldMI, MachineInstr *NewMI,
NewMI->setFlag(Flag);
}
-MachineInstr *SystemZInstrInfo::convertToThreeAddress(
- MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *
+SystemZInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const {
MachineBasicBlock *MBB = MI.getParent();
// Try to convert an AND into an RISBG-type instruction.
@@ -984,6 +986,8 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
}
}
+ if (LIS)
+ LIS->ReplaceMachineInstrInMaps(MI, *MIB);
transferDeadCC(&MI, MIB);
return MIB;
}
@@ -1515,6 +1519,13 @@ unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
const char *AsmStr = MI.getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
+ else if (MI.getOpcode() == SystemZ::PATCHPOINT)
+ return PatchPointOpers(&MI).getNumPatchBytes();
+ else if (MI.getOpcode() == SystemZ::STACKMAP)
+ return MI.getOperand(1).getImm();
+ else if (MI.getOpcode() == SystemZ::FENTRY_CALL)
+ return 6;
+
return MI.getDesc().getSize();
}
@@ -1923,7 +1934,7 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Reg, uint64_t Value) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
- unsigned Opcode;
+ unsigned Opcode = 0;
if (isInt<16>(Value))
Opcode = SystemZ::LGHI;
else if (SystemZ::isImmLL(Value))
@@ -1931,11 +1942,23 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
else if (SystemZ::isImmLH(Value)) {
Opcode = SystemZ::LLILH;
Value >>= 16;
- } else {
- assert(isInt<32>(Value) && "Huge values not handled yet");
+ }
+ else if (isInt<32>(Value))
Opcode = SystemZ::LGFI;
+ if (Opcode) {
+ BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+ return;
}
- BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ assert (MRI.isSSA() && "Huge values only handled before reg-alloc .");
+ Register Reg0 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+ Register Reg1 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::IMPLICIT_DEF), Reg0);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::IIHF64), Reg1)
+ .addReg(Reg0).addImm(Value >> 32);
+ BuildMI(MBB, MBBI, DL, get(SystemZ::IILF64), Reg)
+ .addReg(Reg1).addImm(Value & ((uint64_t(1) << 32) - 1));
}
bool SystemZInstrInfo::verifyInstruction(const MachineInstr &MI,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 72dafc3c93c2..396f56c7f59c 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -47,7 +47,8 @@ enum {
CCMaskFirst = (1 << 18),
CCMaskLast = (1 << 19),
IsLogical = (1 << 20),
- CCIfNoSignedWrap = (1 << 21)
+ CCIfNoSignedWrap = (1 << 21),
+ MemMemOp = (1 << 22)
};
static inline unsigned getAccessSize(unsigned int Flags) {
@@ -234,7 +235,8 @@ public:
const DebugLoc &DL,
int *BytesAdded = nullptr) const override;
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &Mask, int &Value) const override;
+ Register &SrcReg2, int64_t &Mask,
+ int64_t &Value) const override;
bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
Register, Register, Register, int &, int &,
int &) const override;
@@ -270,9 +272,8 @@ public:
Register DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
MachineInstr *
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index 7df7cc93d6eb..e4760229fd6b 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -503,7 +503,7 @@ def MVGHI : StoreSIL<"mvghi", 0xE548, store, imm64sx16>;
// Memory-to-memory moves.
let mayLoad = 1, mayStore = 1 in
- defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
+ defm MVC : MemorySS<"mvc", 0xD2, z_mvc>;
let mayLoad = 1, mayStore = 1, Defs = [CC] in {
def MVCL : SideEffectBinaryMemMemRR<"mvcl", 0x0E, GR128, GR128>;
def MVCLE : SideEffectTernaryMemMemRS<"mvcle", 0xA8, GR128, GR128>;
@@ -1200,7 +1200,7 @@ let Defs = [CC] in {
// Block AND.
let mayLoad = 1, mayStore = 1 in
- defm NC : MemorySS<"nc", 0xD4, z_nc, z_nc_loop>;
+ defm NC : MemorySS<"nc", 0xD4, z_nc>;
}
defm : RMWIByte<and, bdaddr12pair, NI>;
defm : RMWIByte<and, bdaddr20pair, NIY>;
@@ -1257,7 +1257,7 @@ let Defs = [CC] in {
// Block OR.
let mayLoad = 1, mayStore = 1 in
- defm OC : MemorySS<"oc", 0xD6, z_oc, z_oc_loop>;
+ defm OC : MemorySS<"oc", 0xD6, z_oc>;
}
defm : RMWIByte<or, bdaddr12pair, OI>;
defm : RMWIByte<or, bdaddr20pair, OIY>;
@@ -1297,7 +1297,7 @@ let Defs = [CC] in {
// Block XOR.
let mayLoad = 1, mayStore = 1 in
- defm XC : MemorySS<"xc", 0xD7, z_xc, z_xc_loop>;
+ defm XC : MemorySS<"xc", 0xD7, z_xc>;
}
defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
@@ -1624,7 +1624,7 @@ defm : ZXB<z_ucmp, GR64, CLGFR>;
// Memory-to-memory comparison.
let mayLoad = 1, Defs = [CC] in {
- defm CLC : CompareMemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
+ defm CLC : CompareMemorySS<"clc", 0xD5, z_clc>;
def CLCL : SideEffectBinaryMemMemRR<"clcl", 0x0F, GR128, GR128>;
def CLCLE : SideEffectTernaryMemMemRS<"clcle", 0xA9, GR128, GR128>;
def CLCLU : SideEffectTernaryMemMemRSY<"clclu", 0xEB8F, GR128, GR128>;
@@ -2173,7 +2173,7 @@ let hasSideEffects = 1 in {
def EX : SideEffectBinaryRX<"ex", 0x44, ADDR64>;
def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, ADDR64>;
let hasNoSchedulingInfo = 1 in
- def EXRL_Pseudo : Pseudo<(outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1,
+ def EXRL_Pseudo : Alias<6, (outs), (ins i64imm:$TargetOpc, ADDR64:$lenMinus1,
bdaddr12only:$bdl1, bdaddr12only:$bd2),
[]>;
}
@@ -2355,21 +2355,15 @@ let AddedComplexity = 4 in {
(RLLG GR64:$val, (LCR GR32:$shift), 0)>;
}
-// Peepholes for turning scalar operations into block operations.
-defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence,
- XCSequence, 1>;
-defm : BlockLoadStore<anyextloadi16, i32, MVCSequence, NCSequence, OCSequence,
- XCSequence, 2>;
-defm : BlockLoadStore<load, i32, MVCSequence, NCSequence, OCSequence,
- XCSequence, 4>;
-defm : BlockLoadStore<anyextloadi8, i64, MVCSequence, NCSequence,
- OCSequence, XCSequence, 1>;
-defm : BlockLoadStore<anyextloadi16, i64, MVCSequence, NCSequence, OCSequence,
- XCSequence, 2>;
-defm : BlockLoadStore<anyextloadi32, i64, MVCSequence, NCSequence, OCSequence,
- XCSequence, 4>;
-defm : BlockLoadStore<load, i64, MVCSequence, NCSequence, OCSequence,
- XCSequence, 8>;
+// Peepholes for turning scalar operations into block operations. The length
+// is given as one less for these pseudos.
+defm : BlockLoadStore<anyextloadi8, i32, MVCImm, NCImm, OCImm, XCImm, 0>;
+defm : BlockLoadStore<anyextloadi16, i32, MVCImm, NCImm, OCImm, XCImm, 1>;
+defm : BlockLoadStore<load, i32, MVCImm, NCImm, OCImm, XCImm, 3>;
+defm : BlockLoadStore<anyextloadi8, i64, MVCImm, NCImm, OCImm, XCImm, 0>;
+defm : BlockLoadStore<anyextloadi16, i64, MVCImm, NCImm, OCImm, XCImm, 1>;
+defm : BlockLoadStore<anyextloadi32, i64, MVCImm, NCImm, OCImm, XCImm, 3>;
+defm : BlockLoadStore<load, i64, MVCImm, NCImm, OCImm, XCImm, 7>;
//===----------------------------------------------------------------------===//
// Mnemonic Aliases
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index b1964321c78a..9c985c16f082 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -209,10 +209,24 @@ void SystemZLongBranch::skipTerminator(BlockPosition &Position,
Position.Address += Terminator.ExtraRelaxSize;
}
+static unsigned getInstSizeInBytes(const MachineInstr &MI,
+ const SystemZInstrInfo *TII) {
+ unsigned Size = TII->getInstSizeInBytes(MI);
+ assert((Size ||
+ // These do not have a size:
+ MI.isDebugOrPseudoInstr() || MI.isPosition() || MI.isKill() ||
+ MI.isImplicitDef() || MI.getOpcode() == SystemZ::MemBarrier ||
+ // These have a size that may be zero:
+ MI.isInlineAsm() || MI.getOpcode() == SystemZ::STACKMAP ||
+ MI.getOpcode() == SystemZ::PATCHPOINT) &&
+ "Missing size value for instruction.");
+ return Size;
+}
+
// Return a description of terminator instruction MI.
TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) {
TerminatorInfo Terminator;
- Terminator.Size = TII->getInstSizeInBytes(MI);
+ Terminator.Size = getInstSizeInBytes(MI, TII);
if (MI.isConditionalBranch() || MI.isUnconditionalBranch()) {
switch (MI.getOpcode()) {
case SystemZ::J:
@@ -287,7 +301,7 @@ uint64_t SystemZLongBranch::initMBBInfo() {
MachineBasicBlock::iterator MI = MBB->begin();
MachineBasicBlock::iterator End = MBB->end();
while (MI != End && !MI->isTerminator()) {
- Block.Size += TII->getInstSizeInBytes(*MI);
+ Block.Size += getInstSizeInBytes(*MI, TII);
++MI;
}
skipNonTerminators(Position, Block);
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
index 9bee5e8d1864..4bc979de795d 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -46,9 +46,9 @@ static MachineBasicBlock *getSingleSchedPred(MachineBasicBlock *MBB,
// The loop header has two predecessors, return the latch, but not for a
// single block loop.
if (MBB->pred_size() == 2 && Loop != nullptr && Loop->getHeader() == MBB) {
- for (auto I = MBB->pred_begin(); I != MBB->pred_end(); ++I)
- if (Loop->contains(*I))
- PredMBB = (*I == MBB ? nullptr : *I);
+ for (MachineBasicBlock *Pred : MBB->predecessors())
+ if (Loop->contains(Pred))
+ PredMBB = (Pred == MBB ? nullptr : Pred);
}
assert ((PredMBB == nullptr || !Loop || Loop->contains(PredMBB))
@@ -106,13 +106,12 @@ void SystemZPostRASchedStrategy::enterMBB(MachineBasicBlock *NextMBB) {
// Emit incoming terminator(s). Be optimistic and assume that branch
// prediction will generally do "the right thing".
- for (MachineBasicBlock::iterator I = SinglePredMBB->getFirstTerminator();
- I != SinglePredMBB->end(); I++) {
- LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; I->dump(););
- bool TakenBranch = (I->isBranch() &&
- (TII->getBranchInfo(*I).isIndirect() ||
- TII->getBranchInfo(*I).getMBBTarget() == MBB));
- HazardRec->emitInstruction(&*I, TakenBranch);
+ for (MachineInstr &MI : SinglePredMBB->terminators()) {
+ LLVM_DEBUG(dbgs() << "** Emitting incoming branch: "; MI.dump(););
+ bool TakenBranch = (MI.isBranch() &&
+ (TII->getBranchInfo(MI).isIndirect() ||
+ TII->getBranchInfo(MI).getMBBTarget() == MBB));
+ HazardRec->emitInstruction(&MI, TakenBranch);
if (TakenBranch)
break;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 992b1512a077..927d97233286 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -102,17 +102,6 @@ def SDT_ZMemMemLengthCC : SDTypeProfile<1, 3,
SDTCisPtrTy<1>,
SDTCisPtrTy<2>,
SDTCisVT<3, i64>]>;
-def SDT_ZMemMemLoop : SDTypeProfile<0, 4,
- [SDTCisPtrTy<0>,
- SDTCisPtrTy<1>,
- SDTCisVT<2, i64>,
- SDTCisVT<3, i64>]>;
-def SDT_ZMemMemLoopCC : SDTypeProfile<1, 4,
- [SDTCisVT<0, i32>,
- SDTCisPtrTy<1>,
- SDTCisPtrTy<2>,
- SDTCisVT<3, i64>,
- SDTCisVT<4, i64>]>;
def SDT_ZString : SDTypeProfile<1, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
@@ -416,24 +405,14 @@ def z_atomic_cmp_swap_128 : SDNode<"SystemZISD::ATOMIC_CMP_SWAP_128",
def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_mvc_loop : SDNode<"SystemZISD::MVC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_nc : SDNode<"SystemZISD::NC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_nc_loop : SDNode<"SystemZISD::NC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_oc : SDNode<"SystemZISD::OC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_oc_loop : SDNode<"SystemZISD::OC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_xc : SDNode<"SystemZISD::XC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
-def z_xc_loop : SDNode<"SystemZISD::XC_LOOP", SDT_ZMemMemLoop,
- [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLengthCC,
[SDNPHasChain, SDNPMayLoad]>;
-def z_clc_loop : SDNode<"SystemZISD::CLC_LOOP", SDT_ZMemMemLoopCC,
- [SDNPHasChain, SDNPMayLoad]>;
def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZStringCC,
[SDNPHasChain, SDNPMayLoad]>;
def z_stpcpy : SDNode<"SystemZISD::STPCPY", SDT_ZString,
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 0062e39602f5..48cec176b006 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -190,7 +190,9 @@ bool SystemZRegisterInfo::getRegAllocationHints(
const MCPhysReg *
SystemZXPLINK64Registers::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_SystemZ_XPLINK64_SaveList;
+ const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
+ return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_SaveList
+ : CSR_SystemZ_XPLINK64_SaveList;
}
const MCPhysReg *
@@ -211,7 +213,9 @@ SystemZELFRegisters::getCalleeSavedRegs(const MachineFunction *MF) const {
const uint32_t *
SystemZXPLINK64Registers::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
- return CSR_SystemZ_XPLINK64_RegMask;
+ const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
+ return Subtarget.hasVector() ? CSR_SystemZ_XPLINK64_Vector_RegMask
+ : CSR_SystemZ_XPLINK64_RegMask;
}
const uint32_t *
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 122504d4b44b..8ce01074873a 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZREGISTERINFO_H
#include "SystemZ.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#define GET_REGINFO_HEADER
@@ -44,9 +45,9 @@ inline bool isHighReg(unsigned int Reg) {
/// It is abstract, all calling conventions must override and
/// define the pure virtual member function defined in this class.
class SystemZCallingConventionRegisters {
+
public:
- /// \returns the register that keeps the
- /// return function address.
+ /// \returns the register that keeps the return function address.
virtual int getReturnFunctionAddressRegister() = 0;
/// \returns the register that keeps the
@@ -65,6 +66,12 @@ public:
virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const = 0;
+ /// \returns the offset to the locals area.
+ virtual int getCallFrameSize() = 0;
+
+ /// \returns the stack pointer bias.
+ virtual int getStackPointerBias() = 0;
+
/// Destroys the object. Bogus destructor allowing derived classes
/// to override it.
virtual ~SystemZCallingConventionRegisters(){};
@@ -82,12 +89,18 @@ public:
int getFramePointerRegister() override final { return SystemZ::R8D; };
+ int getAddressOfCalleeRegister() { return SystemZ::R6D; };
+
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction *MF) const override final;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
+ int getCallFrameSize() override final { return 128; }
+
+ int getStackPointerBias() override final { return 2048; }
+
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZXPLINK64Registers(){};
};
@@ -110,6 +123,10 @@ public:
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
+ int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; }
+
+ int getStackPointerBias() override final { return 0; }
+
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZELFRegisters(){};
};
diff --git a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 4a9ea69d101c..f38e93109967 100644
--- a/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -17,32 +17,29 @@ using namespace llvm;
#define DEBUG_TYPE "systemz-selectiondag-info"
-// Decide whether it is best to use a loop or straight-line code for
-// a block operation of Size bytes with source address Src and destination
-// address Dest. Sequence is the opcode to use for straight-line code
-// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP).
-// Return the chain for the completed operation.
-static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence,
- unsigned Loop, SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size) {
- EVT PtrVT = Src.getValueType();
- // The heuristic we use is to prefer loops for anything that would
- // require 7 or more MVCs. With these kinds of sizes there isn't
- // much to choose between straight-line code and looping code,
- // since the time will be dominated by the MVCs themselves.
- // However, the loop has 4 or 5 instructions (depending on whether
- // the base addresses can be proved equal), so there doesn't seem
- // much point using a loop for 5 * 256 bytes or fewer. Anything in
- // the range (5 * 256, 6 * 256) will need another instruction after
- // the loop, so it doesn't seem worth using a loop then either.
- // The next value up, 6 * 256, can be implemented in the same
- // number of straight-line MVCs as 6 * 256 - 1.
- if (Size > 6 * 256)
- return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, DL, PtrVT),
- DAG.getConstant(Size / 256, DL, PtrVT));
- return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src,
- DAG.getConstant(Size, DL, PtrVT));
+static SDVTList getMemMemVTs(unsigned Op, SelectionDAG &DAG) {
+ return Op == SystemZISD::CLC ? DAG.getVTList(MVT::i32, MVT::Other)
+ : DAG.getVTList(MVT::Other);
+}
+
+// Emit a mem-mem operation after subtracting one from size, which will be
+// added back during pseudo expansion. As the Reg case emitted here may be
+// converted by DAGCombiner into having an Imm length, they are both emitted
+// the same way.
+static SDValue emitMemMemImm(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ uint64_t Size) {
+ return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src,
+ DAG.getConstant(Size - 1, DL, Src.getValueType()));
+}
+
+static SDValue emitMemMemReg(SelectionDAG &DAG, const SDLoc &DL, unsigned Op,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size) {
+ SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
+ DAG.getZExtOrTrunc(Size, DL, MVT::i64),
+ DAG.getConstant(-1, DL, MVT::i64));
+ return DAG.getNode(Op, DL, getMemMemVTs(Op, DAG), Chain, Dst, Src, LenMinus1);
}
SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
@@ -53,9 +50,10 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy(
return SDValue();
if (auto *CSize = dyn_cast<ConstantSDNode>(Size))
- return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
- Chain, Dst, Src, CSize->getZExtValue());
- return SDValue();
+ return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, Dst, Src,
+ CSize->getZExtValue());
+
+ return emitMemMemReg(DAG, DL, SystemZISD::MVC, Chain, Dst, Src, Size);
}
// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by
@@ -127,52 +125,23 @@ SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset(
// Handle the special case of a memset of 0, which can use XC.
if (CByte && CByte->getZExtValue() == 0)
- return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP,
- Chain, Dst, Dst, Bytes);
+ return emitMemMemImm(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Bytes);
// Copy the byte to the first location and then use MVC to copy
// it to the rest.
Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, Alignment);
SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst,
DAG.getConstant(1, DL, PtrVT));
- return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP,
- Chain, DstPlus1, Dst, Bytes - 1);
+ return emitMemMemImm(DAG, DL, SystemZISD::MVC, Chain, DstPlus1, Dst,
+ Bytes - 1);
}
// Variable length
- if (CByte && CByte->getZExtValue() == 0) {
+ if (CByte && CByte->getZExtValue() == 0)
// Handle the special case of a variable length memset of 0 with XC.
- SDValue LenMinus1 = DAG.getNode(ISD::ADD, DL, MVT::i64,
- DAG.getZExtOrTrunc(Size, DL, MVT::i64),
- DAG.getConstant(-1, DL, MVT::i64));
- SDValue TripC = DAG.getNode(ISD::SRL, DL, MVT::i64, LenMinus1,
- DAG.getConstant(8, DL, MVT::i64));
- return DAG.getNode(SystemZISD::XC_LOOP, DL, MVT::Other, Chain, Dst, Dst,
- LenMinus1, TripC);
- }
- return SDValue();
-}
+ return emitMemMemReg(DAG, DL, SystemZISD::XC, Chain, Dst, Dst, Size);
-// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size),
-// deciding whether to use a loop or straight-line code.
-static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
- SDValue Src1, SDValue Src2, uint64_t Size) {
- SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
- EVT PtrVT = Src1.getValueType();
- // A two-CLC sequence is a clear win over a loop, not least because it
- // needs only one branch. A three-CLC sequence needs the same number
- // of branches as a loop (i.e. 2), but is shorter. That brings us to
- // lengths greater than 768 bytes. It seems relatively likely that
- // a difference will be found within the first 768 bytes, so we just
- // optimize for the smallest number of branch instructions, in order
- // to avoid polluting the prediction buffer too much. A loop only ever
- // needs 2 branches, whereas a straight-line sequence would need 3 or more.
- if (Size > 3 * 256)
- return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, DL, PtrVT),
- DAG.getConstant(Size / 256, DL, PtrVT));
- return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2,
- DAG.getConstant(Size, DL, PtrVT));
+ return SDValue();
}
// Convert the current CC value into an integer that is 0 if CC == 0,
@@ -193,15 +162,16 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp(
SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1,
SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
+ SDValue CCReg;
+ // Swap operands to invert CC == 1 vs. CC == 2 cases.
if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) {
uint64_t Bytes = CSize->getZExtValue();
assert(Bytes > 0 && "Caller should have handled 0-size case");
- // Swap operands to invert CC == 1 vs. CC == 2 cases.
- SDValue CCReg = emitCLC(DAG, DL, Chain, Src2, Src1, Bytes);
- Chain = CCReg.getValue(1);
- return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
- }
- return std::make_pair(SDValue(), SDValue());
+ CCReg = emitMemMemImm(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Bytes);
+ } else
+ CCReg = emitMemMemReg(DAG, DL, SystemZISD::CLC, Chain, Src2, Src1, Size);
+ Chain = CCReg.getValue(1);
+ return std::make_pair(addIPMSequence(DL, CCReg, DAG), Chain);
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 3d27b70d6ef9..254e5e92449b 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -211,8 +211,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
LiveRegs.addLiveOuts(MBB);
// Iterate backwards through the block looking for instructions to change.
- for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
- MachineInstr &MI = *MBBI;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
switch (MI.getOpcode()) {
case SystemZ::IILF:
Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH);
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index bfcdee270f29..0f03d96655bf 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -89,9 +89,7 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
HasSoftFloat(false), TargetTriple(TT),
SpecialRegisters(initializeSpecialRegisters()),
InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
- TSInfo(), FrameLowering() {}
-
-SystemZSubtarget::~SystemZSubtarget() { delete getSpecialRegisters(); }
+ TSInfo(), FrameLowering(SystemZFrameLowering::create(*this)) {}
bool SystemZSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index f6c155de44a0..67c5b8eb09b6 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -77,11 +77,11 @@ protected:
private:
Triple TargetTriple;
- SystemZCallingConventionRegisters *SpecialRegisters;
+ std::unique_ptr<SystemZCallingConventionRegisters> SpecialRegisters;
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
SystemZSelectionDAGInfo TSInfo;
- SystemZFrameLowering FrameLowering;
+ std::unique_ptr<const SystemZFrameLowering> FrameLowering;
SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU,
StringRef FS);
@@ -91,16 +91,23 @@ public:
SystemZSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM);
- ~SystemZSubtarget();
-
SystemZCallingConventionRegisters *getSpecialRegisters() const {
assert(SpecialRegisters && "Unsupported SystemZ calling convention");
- return SpecialRegisters;
+ return SpecialRegisters.get();
+ }
+
+ template <class SR> SR &getSpecialRegisters() const {
+ return *static_cast<SR *>(getSpecialRegisters());
}
const TargetFrameLowering *getFrameLowering() const override {
- return &FrameLowering;
+ return FrameLowering.get();
}
+
+ template <class TFL> const TFL *getFrameLowering() const {
+ return static_cast<const TFL *>(getFrameLowering());
+ }
+
const SystemZInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const SystemZRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index a886f9b9d814..deb3358102ed 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -21,8 +21,8 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Scalar.h"
#include <string>
@@ -84,8 +84,9 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
// 128-bit floats are aligned only to 64 bits.
Ret += "-f128:64";
- // When using the vector ABI, 128-bit vectors are also aligned to 64 bits.
- if (VectorABI)
+ // When using the vector ABI on Linux, 128-bit vectors are also aligned to 64
+ // bits. On z/OS, vector types are always aligned to 64 bits.
+ if (VectorABI || TT.isOSzOS())
Ret += "-v128:64";
// We prefer 16 bits of aligned for all globals; see above.
@@ -284,7 +285,7 @@ void SystemZPassConfig::addPreEmitPass() {
// vector instructions will be shortened into opcodes that compare
// elimination recognizes.
if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+ addPass(createSystemZShortenInstPass(getSystemZTargetMachine()));
// We eliminate comparisons here rather than earlier because some
// transformations can change the set of available CC values and we
@@ -310,7 +311,7 @@ void SystemZPassConfig::addPreEmitPass() {
// between the comparison and the branch, but it isn't clear whether
// preventing that would be a win or not.
if (getOptLevel() != CodeGenOpt::None)
- addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
+ addPass(createSystemZElimComparePass(getSystemZTargetMachine()));
addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
// Do final scheduling after all other optimizations, to get an
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
new file mode 100644
index 000000000000..a610a90d2069
--- /dev/null
+++ b/llvm/lib/Target/SystemZ/SystemZTargetStreamer.h
@@ -0,0 +1,55 @@
+//=- SystemZTargetStreamer.h - SystemZ Target Streamer ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class SystemZTargetStreamer : public MCTargetStreamer {
+public:
+ SystemZTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+
+ typedef std::pair<MCInst, const MCSubtargetInfo *> MCInstSTIPair;
+ struct CmpMCInst {
+ bool operator()(const MCInstSTIPair &MCI_STI_A,
+ const MCInstSTIPair &MCI_STI_B) const {
+ if (MCI_STI_A.second != MCI_STI_B.second)
+ return uintptr_t(MCI_STI_A.second) < uintptr_t(MCI_STI_B.second);
+ const MCInst &A = MCI_STI_A.first;
+ const MCInst &B = MCI_STI_B.first;
+ assert(A.getNumOperands() == B.getNumOperands() &&
+ A.getNumOperands() == 5 && A.getOperand(2).getImm() == 1 &&
+ B.getOperand(2).getImm() == 1 && "Unexpected EXRL target MCInst");
+ if (A.getOpcode() != B.getOpcode())
+ return A.getOpcode() < B.getOpcode();
+ if (A.getOperand(0).getReg() != B.getOperand(0).getReg())
+ return A.getOperand(0).getReg() < B.getOperand(0).getReg();
+ if (A.getOperand(1).getImm() != B.getOperand(1).getImm())
+ return A.getOperand(1).getImm() < B.getOperand(1).getImm();
+ if (A.getOperand(3).getReg() != B.getOperand(3).getReg())
+ return A.getOperand(3).getReg() < B.getOperand(3).getReg();
+ if (A.getOperand(4).getImm() != B.getOperand(4).getImm())
+ return A.getOperand(4).getImm() < B.getOperand(4).getImm();
+ return false;
+ }
+ };
+ typedef std::map<MCInstSTIPair, MCSymbol *, CmpMCInst> EXRLT2SymMap;
+ EXRLT2SymMap EXRLTargets2Sym;
+
+ void emitConstantPools() override;
+
+ virtual void emitMachine(StringRef CPU) = 0;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZTARGETSTREAMER_H
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 03c4da8495ab..6d66ebfced05 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -243,7 +243,8 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
}
void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) {
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) {
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
bool HasCall = false;
@@ -423,8 +424,8 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
(C->getType()->isVectorTy()
? dyn_cast_or_null<const ConstantInt>(C->getSplatValue())
: dyn_cast<const ConstantInt>(C));
- if (CVal != nullptr &&
- (CVal->getValue().isPowerOf2() || (-CVal->getValue()).isPowerOf2()))
+ if (CVal && (CVal->getValue().isPowerOf2() ||
+ CVal->getValue().isNegatedPowerOf2()))
DivRemConstPow2 = true;
else
DivRemConst = true;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 51cf557ae99b..db4ec794b3e4 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -52,7 +52,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP);
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE);
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -82,8 +83,7 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -115,8 +115,7 @@ public:
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
diff --git a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index 36291e079882..acfafd91bc17 100644
--- a/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/SystemZTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 0a655a82b889..390457dbb2bc 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -101,46 +101,41 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
// dso_preemptable. At this point in time, the various IR producers
// have not been transitioned to always produce a dso_local when it
// is possible to do so.
- // In the case of ExternalSymbolSDNode, GV is null and we should just return
- // false. However, COFF currently relies on this to be true
//
// As a result we still have some logic in here to improve the quality of the
// generated code.
- // FIXME: Add a module level metadata for whether intrinsics should be assumed
- // local.
if (!GV)
- return TT.isOSBinFormatCOFF();
+ return false;
// If the IR producer requested that this GV be treated as dso local, obey.
if (GV->isDSOLocal())
return true;
- // DLLImport explicitly marks the GV as external.
- if (GV->hasDLLImportStorageClass())
- return false;
-
- // On MinGW, variables that haven't been declared with DLLImport may still
- // end up automatically imported by the linker. To make this feasible,
- // don't assume the variables to be DSO local unless we actually know
- // that for sure. This only has to be done for variables; for functions
- // the linker can insert thunks for calling functions from another DLL.
- if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() &&
- GV->isDeclarationForLinker() && isa<GlobalVariable>(GV))
- return false;
-
- // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
- // remain unresolved in the link, they can be resolved to zero, which is
- // outside the current DSO.
- if (TT.isOSBinFormatCOFF() && GV->hasExternalWeakLinkage())
- return false;
+ if (TT.isOSBinFormatCOFF()) {
+ // DLLImport explicitly marks the GV as external.
+ if (GV->hasDLLImportStorageClass())
+ return false;
+
+ // On MinGW, variables that haven't been declared with DLLImport may still
+ // end up automatically imported by the linker. To make this feasible,
+ // don't assume the variables to be DSO local unless we actually know
+ // that for sure. This only has to be done for variables; for functions
+ // the linker can insert thunks for calling functions from another DLL.
+ if (TT.isWindowsGNUEnvironment() && GV->isDeclarationForLinker() &&
+ isa<GlobalVariable>(GV))
+ return false;
+
+ // Don't mark 'extern_weak' symbols as DSO local. If these symbols remain
+ // unresolved in the link, they can be resolved to zero, which is outside
+ // the current DSO.
+ if (GV->hasExternalWeakLinkage())
+ return false;
+
+ // Every other GV is local on COFF.
+ return true;
+ }
- // Every other GV is local on COFF.
- // Make an exception for windows OS in the triple: Some firmware builds use
- // *-win32-macho triples. This (accidentally?) produced windows relocations
- // without GOT tables in older clang versions; Keep this behaviour.
- // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables
- // either.
- if (TT.isOSBinFormatCOFF() || TT.isOSWindows())
+ if (TT.isOSBinFormatGOFF())
return true;
if (TT.isOSBinFormatMachO()) {
@@ -149,13 +144,8 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return GV->isStrongDefinitionForLinker();
}
- // Due to the AIX linkage model, any global with default visibility is
- // considered non-local.
- if (TT.isOSBinFormatXCOFF())
- return false;
-
- assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm());
- assert(RM != Reloc::DynamicNoPIC);
+ assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm() ||
+ TT.isOSBinFormatXCOFF());
return false;
}
diff --git a/llvm/lib/Target/TargetMachineC.cpp b/llvm/lib/Target/TargetMachineC.cpp
index 60fe84cadacc..55047a1bb3cd 100644
--- a/llvm/lib/Target/TargetMachineC.cpp
+++ b/llvm/lib/Target/TargetMachineC.cpp
@@ -18,10 +18,10 @@
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/CodeGenCWrappers.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index a3309a68c76d..7e92e4b33812 100644
--- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -25,7 +25,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
diff --git a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
index 20d609bc6b32..72c40cbe78c4 100644
--- a/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
+++ b/llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp
@@ -18,7 +18,7 @@
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
index 9a6ae90b5c73..29c209934680 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp
@@ -15,8 +15,8 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -164,7 +164,8 @@ public:
llvm_unreachable("relaxInstruction() should not be called");
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override {
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override {
if ((Count % 8) != 0)
return false;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
index 76824335239b..9f29fc092c69 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
@@ -37,5 +37,4 @@ VEELFMCAsmInfo::VEELFMCAsmInfo(const Triple &TheTriple) {
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
- UseIntegratedAssembler = false;
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index 4c480c050274..f4fbf763e59c 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -18,8 +18,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
index a95a299def88..7c4bf1cfd672 100644
--- a/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
+++ b/llvm/lib/Target/VE/TargetInfo/VETargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/VETargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VEAsmPrinter.cpp b/llvm/lib/Target/VE/VEAsmPrinter.cpp
index 08a75b6b8c55..af69d04a17ca 100644
--- a/llvm/lib/Target/VE/VEAsmPrinter.cpp
+++ b/llvm/lib/Target/VE/VEAsmPrinter.cpp
@@ -30,7 +30,7 @@
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index b297e0fcd1a2..32315543826a 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -2508,13 +2508,12 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) {
case ISD::CopyToReg:
// Check all use of selections, bit operations, and copies. If all of them
// are safe, optimize truncate to extract_subreg.
- for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end();
- UI != UE; ++UI) {
- switch ((*UI)->getOpcode()) {
+ for (const SDNode *U : User->uses()) {
+ switch (U->getOpcode()) {
default:
// If the use is an instruction which treats the source operand as i32,
// it is safe to avoid truncate here.
- if (isI32Insn(*UI, N))
+ if (isI32Insn(U, N))
continue;
break;
case ISD::ANY_EXTEND:
@@ -2561,10 +2560,7 @@ SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
return SDValue();
// Check all use of this TRUNCATE.
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
- ++UI) {
- SDNode *User = *UI;
-
+ for (const SDNode *User : N->uses()) {
// Make sure that we're not going to replace TRUNCATE for non i32
// instructions.
//
diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp
index 9770052ff913..ddcfb9da8249 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.cpp
+++ b/llvm/lib/Target/VE/VEInstrInfo.cpp
@@ -20,10 +20,10 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#define DEBUG_TYPE "ve-instr-info"
@@ -99,7 +99,7 @@ static bool isUncondBranchOpcode(int Opc) {
#define BRKIND(NAME) (Opc == NAME##a || Opc == NAME##a_nt || Opc == NAME##a_t)
// VE has other branch relative always instructions for word/double/float,
- // but we use only long branches in our lower. So, sanity check it here.
+ // but we use only long branches in our lower. So, check it here.
assert(!BRKIND(BRCFW) && !BRKIND(BRCFD) && !BRKIND(BRCFS) &&
"Branch relative word/double/float always instructions should not be "
"used!");
@@ -127,7 +127,7 @@ static bool isIndirectBranchOpcode(int Opc) {
#define BRKIND(NAME) \
(Opc == NAME##ari || Opc == NAME##ari_nt || Opc == NAME##ari_t)
// VE has other branch always instructions for word/double/float, but
- // we use only long branches in our lower. So, sanity check it here.
+ // we use only long branches in our lower. So, check it here.
assert(!BRKIND(BCFW) && !BRKIND(BCFD) && !BRKIND(BCFS) &&
"Branch word/double/float always instructions should not be used!");
return BRKIND(BCFL);
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 2f77daae7130..c3abbe2cafab 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -634,9 +634,7 @@ multiclass RRIm<string opcStr, bits<8>opc,
// Special RR multiclass for 128 bits shift left instruction.
// e.g. SLD
let Constraints = "$hi = $sx", DisableEncoding = "$hi", hasSideEffects = 0 in
-multiclass RRILDm<string opcStr, bits<8>opc,
- RegisterClass RC, ValueType Ty,
- SDPatternOperator OpNode = null_frag> {
+multiclass RRILDm<string opcStr, bits<8>opc, RegisterClass RC> {
def rrr : RR<opc, (outs RC:$sx), (ins RC:$hi, RC:$sz, I32:$sy),
!strconcat(opcStr, " $sx, $sz, $sy")>;
let cz = 0 in
@@ -653,9 +651,7 @@ multiclass RRILDm<string opcStr, bits<8>opc,
// Special RR multiclass for 128 bits shift right instruction.
// e.g. SRD
let Constraints = "$low = $sx", DisableEncoding = "$low", hasSideEffects = 0 in
-multiclass RRIRDm<string opcStr, bits<8>opc,
- RegisterClass RC, ValueType Ty,
- SDPatternOperator OpNode = null_frag> {
+multiclass RRIRDm<string opcStr, bits<8>opc, RegisterClass RC> {
def rrr : RR<opc, (outs RC:$sx), (ins RC:$sz, RC:$low, I32:$sy),
!strconcat(opcStr, " $sx, $sz, $sy")>;
let cz = 0 in
@@ -685,7 +681,7 @@ multiclass RRI1m<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
// Special RR multiclass for MRG instruction.
// e.g. MRG
let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0 in
-multiclass RRMRGm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+multiclass RRMRGm<string opcStr, bits<8>opc, RegisterClass RC> {
def rr : RR<opc, (outs RC:$sx), (ins RC:$sy, RC:$sz, RC:$sd),
!strconcat(opcStr, " $sx, $sy, $sz")>;
let cy = 0 in
@@ -719,7 +715,7 @@ multiclass RRSWPm<string opcStr, bits<8>opc,
// e.g. CMOVL, CMOVW, CMOVD, and etc.
let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0,
cfw = ? in
-multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC> {
def rr : RR<opc, (outs I64:$sx), (ins CCOp:$cfw, RC:$sy, I64:$sz, I64:$sd),
!strconcat(opcStr, " $sx, $sz, $sy")>;
let cy = 0 in
@@ -740,8 +736,8 @@ multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
// e.g. CVTWDSX, CVTWDZX, CVTWSSX, and etc.
// sz{3-0} = rounding mode
let cz = 0, hasSideEffects = 0 in
-multiclass CVTRDm<string opcStr, bits<8> opc, RegisterClass RCo, ValueType Tyo,
- RegisterClass RCi, ValueType Tyi> {
+multiclass CVTRDm<string opcStr, bits<8> opc, RegisterClass RCo,
+ RegisterClass RCi> {
def r : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, RCi:$sy),
!strconcat(opcStr, "${rd} $sx, $sy")> {
bits<4> rd;
@@ -1265,7 +1261,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm NND : RRNCm<"nnd", 0x54, I64, i64, and_not>;
// Section 8.5.6 - MRG (Merge)
-defm MRG : RRMRGm<"mrg", 0x56, I64, i64>;
+defm MRG : RRMRGm<"mrg", 0x56, I64>;
// Section 8.5.7 - LDZ (Leading Zero Count)
def ctlz_pat : PatFrags<(ops node:$src),
@@ -1297,10 +1293,10 @@ def : Pat<(i32 (bswap (i32 mimm:$src))),
(EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>;
// Section 8.5.11 - CMOV (Conditional Move)
-let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
-let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
-let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64, f64>;
-let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32, f32>;
+let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64>;
+let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32>;
+let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64>;
+let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32>;
def : MnemonicAlias<"cmov.l", "cmov.l.at">;
def : MnemonicAlias<"cmov.w", "cmov.w.at">;
def : MnemonicAlias<"cmov.d", "cmov.d.at">;
@@ -1315,14 +1311,14 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SLL : RRIm<"sll", 0x65, I64, i64, shl>;
// Section 8.6.2 - SLD (Shift Left Double)
-defm SLD : RRILDm<"sld", 0x64, I64, i64>;
+defm SLD : RRILDm<"sld", 0x64, I64>;
// Section 8.6.3 - SRL (Shift Right Logical)
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
defm SRL : RRIm<"srl", 0x75, I64, i64, srl>;
// Section 8.6.4 - SRD (Shift Right Double)
-defm SRD : RRIRDm<"srd", 0x74, I64, i64>;
+defm SRD : RRIRDm<"srd", 0x74, I64>;
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1405,16 +1401,16 @@ defm FCMPQ : RRNCbm<"fcmp.q", 0x7D, I64, f64, F128, f128, null_frag, simm7fp,
// Section 8.7.11 - FIX (Convert to Fixed Point)
// cx: double/float, cw: sx/zx, sz{0-3} = round
let cx = 0, cw = 0 /* sign extend */ in
-defm CVTWDSX : CVTRDm<"cvt.w.d.sx", 0x4E, I32, i32, I64, f64>;
+defm CVTWDSX : CVTRDm<"cvt.w.d.sx", 0x4E, I32, I64>;
let cx = 0, cw = 1 /* zero extend */ in
-defm CVTWDZX : CVTRDm<"cvt.w.d.zx", 0x4E, I32, i32, I64, f64>;
+defm CVTWDZX : CVTRDm<"cvt.w.d.zx", 0x4E, I32, I64>;
let cx = 1, cw = 0 /* sign extend */ in
-defm CVTWSSX : CVTRDm<"cvt.w.s.sx", 0x4E, I32, i32, F32, f32>;
+defm CVTWSSX : CVTRDm<"cvt.w.s.sx", 0x4E, I32, F32>;
let cx = 1, cw = 1 /* zero extend */ in
-defm CVTWSZX : CVTRDm<"cvt.w.s.zx", 0x4E, I32, i32, F32, f32>;
+defm CVTWSZX : CVTRDm<"cvt.w.s.zx", 0x4E, I32, F32>;
// Section 8.7.12 - FIXX (Convert to Fixed Point)
-defm CVTLD : CVTRDm<"cvt.l.d", 0x4F, I64, i64, I64, f64>;
+defm CVTLD : CVTRDm<"cvt.l.d", 0x4F, I64, I64>;
// Section 8.7.13 - FLT (Convert to Floating Point)
defm CVTDW : CVTm<"cvt.d.w", 0x5E, I64, f64, I32, i32, sint_to_fp>;
@@ -1836,7 +1832,7 @@ multiclass ZXATMLDm<SDPatternOperator from, int VAL,
def : Pat<(i64 (and (anyext (from ADDRzii:$addr)), VAL)),
(i2l (tozii MEMzii:$addr))>;
}
-multiclass ZXATMLD32m<SDPatternOperator from, int VAL,
+multiclass ZXATMLD32m<SDPatternOperator from,
RM torri, RM torii,
RM tozri, RM tozii> {
def : Pat<(i64 (zext (from ADDRrri:$addr))),
@@ -1852,8 +1848,7 @@ defm : ZXATMLDm<atomic_load_8, 0xFF, LD1BZXrri, LD1BZXrii, LD1BZXzri,
LD1BZXzii>;
defm : ZXATMLDm<atomic_load_16, 0xFFFF, LD2BZXrri, LD2BZXrii, LD2BZXzri,
LD2BZXzii>;
-defm : ZXATMLD32m<atomic_load_32, 0xFFFFFFFF, LDLZXrri, LDLZXrii, LDLZXzri,
- LDLZXzii>;
+defm : ZXATMLD32m<atomic_load_32, LDLZXrri, LDLZXrii, LDLZXzri, LDLZXzii>;
// Atomic stores
multiclass ATMSTm<SDPatternOperator from, ValueType ty,
@@ -1871,7 +1866,6 @@ defm : ATMSTm<atomic_store_64, i64, STrri, STrii, STzri, STzii>;
// Optimized atomic stores with truncate
multiclass TRATMSTm<SDPatternOperator from,
- ValueType ty,
RM torri,
RM torii,
RM tozri,
@@ -1885,9 +1879,9 @@ multiclass TRATMSTm<SDPatternOperator from,
def : Pat<(from ADDRzii:$addr, (i32 (trunc i64:$src))),
(tozii MEMzii:$addr, (EXTRACT_SUBREG $src, sub_i32))>;
}
-defm : TRATMSTm<atomic_store_8, i32, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
-defm : TRATMSTm<atomic_store_16, i32, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
-defm : TRATMSTm<atomic_store_32, i32, STLrri, STLrii, STLzri, STLzii>;
+defm : TRATMSTm<atomic_store_8, ST1Brri, ST1Brii, ST1Bzri, ST1Bzii>;
+defm : TRATMSTm<atomic_store_16, ST2Brri, ST2Brii, ST2Bzri, ST2Bzii>;
+defm : TRATMSTm<atomic_store_32, STLrri, STLrii, STLzri, STLzii>;
// Atomic swaps
def : Pat<(i32 (ts1am i64:$src, i32:$flag, i32:$new)),
diff --git a/llvm/lib/Target/VE/VESubtarget.cpp b/llvm/lib/Target/VE/VESubtarget.cpp
index daa6cfb8aa84..78ac742ebf52 100644
--- a/llvm/lib/Target/VE/VESubtarget.cpp
+++ b/llvm/lib/Target/VE/VESubtarget.cpp
@@ -12,8 +12,8 @@
#include "VESubtarget.h"
#include "VE.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VETargetMachine.cpp b/llvm/lib/Target/VE/VETargetMachine.cpp
index 414ae09431c0..9f294f15da91 100644
--- a/llvm/lib/Target/VE/VETargetMachine.cpp
+++ b/llvm/lib/Target/VE/VETargetMachine.cpp
@@ -17,7 +17,7 @@
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
index 7003fb387670..ac03e0bf627e 100644
--- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td
@@ -20,8 +20,7 @@ include "VVPInstrInfo.td"
multiclass VectorBinaryArith<
SDPatternOperator OpNode,
ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
- string OpBaseName,
- SDPatternOperator ImmOp, SDNodeXForm ImmCast> {
+ string OpBaseName> {
// No mask.
def : Pat<(OpNode
(any_broadcast ScalarVT:$sx),
@@ -56,10 +55,10 @@ multiclass VectorBinaryArith_ShortLong<
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
defm : VectorBinaryArith<OpNode,
LongScalarVT, LongDataVT, v256i1,
- LongOpBaseName, simm7, LO7>;
+ LongOpBaseName>;
defm : VectorBinaryArith<OpNode,
ShortScalarVT, ShortDataVT, v256i1,
- ShortOpBaseName, simm7, LO7>;
+ ShortOpBaseName>;
}
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index eb1dd879941a..7d1e6c553f81 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -31,9 +31,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -431,10 +431,10 @@ public:
bool checkForP2AlignIfLoadStore(OperandVector &Operands, StringRef InstName) {
// FIXME: there is probably a cleaner way to do this.
- auto IsLoadStore = InstName.find(".load") != StringRef::npos ||
- InstName.find(".store") != StringRef::npos ||
- InstName.find("prefetch") != StringRef::npos;
- auto IsAtomic = InstName.find("atomic.") != StringRef::npos;
+ auto IsLoadStore = InstName.contains(".load") ||
+ InstName.contains(".store") ||
+ InstName.contains("prefetch");
+ auto IsAtomic = InstName.contains("atomic.");
if (IsLoadStore || IsAtomic) {
// Parse load/store operands of the form: offset:p2align=align
if (IsLoadStore && isNext(AsmToken::Colon)) {
@@ -450,7 +450,7 @@ public:
// v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
// index. We need to avoid parsing an extra alignment operand for the
// lane index.
- auto IsLoadStoreLane = InstName.find("_lane") != StringRef::npos;
+ auto IsLoadStoreLane = InstName.contains("_lane");
if (IsLoadStoreLane && Operands.size() == 4)
return false;
// Alignment not specified (or atomics, must use default alignment).
@@ -1114,6 +1114,8 @@ public:
void onEndOfFunction(SMLoc ErrorLoc) {
TC.endOfFunction(ErrorLoc);
+ // Reset the type checker state.
+ TC.Clear();
// Automatically output a .size directive, so it becomes optional for the
// user.
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index 2f9245a7c66c..a6b5d4252f2f 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -31,10 +31,10 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -74,6 +74,9 @@ bool WebAssemblyAsmTypeCheck::typeError(SMLoc ErrorLoc, const Twine &Msg) {
// which are mostly not helpful.
if (TypeErrorThisFunction)
return true;
+ // If we're currently in unreachable code, we surpress errors as well.
+ if (Unreachable)
+ return true;
TypeErrorThisFunction = true;
dumpTypeStack("current stack: ");
return Parser.Error(ErrorLoc, Msg);
@@ -89,8 +92,7 @@ bool WebAssemblyAsmTypeCheck::popType(SMLoc ErrorLoc,
: StringRef(
"empty stack while popping value"));
}
- auto PVT = Stack.back();
- Stack.pop_back();
+ auto PVT = Stack.pop_back_val();
if (EVT.hasValue() && EVT.getValue() != PVT) {
return typeError(
ErrorLoc, StringRef("popped ") + WebAssembly::typeToString(PVT) +
@@ -155,8 +157,12 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst,
break;
case wasm::WASM_SYMBOL_TYPE_FUNCTION:
case wasm::WASM_SYMBOL_TYPE_DATA:
- if (SymRef->getKind() == MCSymbolRefExpr::VK_GOT) {
+ switch (SymRef->getKind()) {
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
Type = is64 ? wasm::ValType::I64 : wasm::ValType::I32;
+ return false;
+ default:
break;
}
LLVM_FALLTHROUGH;
@@ -167,17 +173,18 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst,
return false;
}
-void WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) {
+bool WebAssemblyAsmTypeCheck::endOfFunction(SMLoc ErrorLoc) {
// Check the return types.
for (auto RVT : llvm::reverse(ReturnTypes)) {
- popType(ErrorLoc, RVT);
+ if (popType(ErrorLoc, RVT))
+ return true;
}
if (!Stack.empty()) {
- typeError(ErrorLoc,
- std::to_string(Stack.size()) + " superfluous return values");
+ return typeError(ErrorLoc, std::to_string(Stack.size()) +
+ " superfluous return values");
}
- // Reset the type checker state.
- Clear();
+ Unreachable = true;
+ return false;
}
bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
@@ -213,13 +220,20 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
if (popType(ErrorLoc, {}))
return true;
} else if (Name == "end_block" || Name == "end_loop" || Name == "end_if" ||
- Name == "else") {
+ Name == "else" || Name == "end_try") {
if (checkEnd(ErrorLoc))
return true;
+ if (Name == "end_block")
+ Unreachable = false;
+ } else if (Name == "return") {
+ if (endOfFunction(ErrorLoc))
+ return true;
} else if (Name == "call_indirect" || Name == "return_call_indirect") {
// Function value.
if (popType(ErrorLoc, wasm::ValType::I32)) return true;
if (checkSig(ErrorLoc, LastSig)) return true;
+ if (Name == "return_call_indirect" && endOfFunction(ErrorLoc))
+ return true;
} else if (Name == "call" || Name == "return_call") {
const MCSymbolRefExpr *SymRef;
if (getSymRef(ErrorLoc, Inst, SymRef))
@@ -230,9 +244,25 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst) {
return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
" missing .functype");
if (checkSig(ErrorLoc, *Sig)) return true;
+ if (Name == "return_call" && endOfFunction(ErrorLoc))
+ return true;
+ } else if (Name == "catch") {
+ const MCSymbolRefExpr *SymRef;
+ if (getSymRef(ErrorLoc, Inst, SymRef))
+ return true;
+ const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
+ const auto *Sig = WasmSym->getSignature();
+ if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_TAG)
+ return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
+ " missing .tagtype");
+ // catch instruction pushes values whose types are specified in the tag's
+ // "params" part
+ Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end());
} else if (Name == "ref.null") {
auto VT = static_cast<wasm::ValType>(Inst.getOperand(0).getImm());
Stack.push_back(VT);
+ } else if (Name == "unreachable") {
+ Unreachable = true;
} else {
// The current instruction is a stack instruction which doesn't have
// explicit operands that indicate push/pop types, so we get those from
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
index a15a69b50418..aa35213ccca3 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
@@ -32,15 +32,9 @@ class WebAssemblyAsmTypeCheck final {
SmallVector<wasm::ValType, 4> ReturnTypes;
wasm::WasmSignature LastSig;
bool TypeErrorThisFunction = false;
+ bool Unreachable = false;
bool is64;
- void Clear() {
- Stack.clear();
- LocalTypes.clear();
- ReturnTypes.clear();
- TypeErrorThisFunction = false;
- }
-
void dumpTypeStack(Twine Msg);
bool typeError(SMLoc ErrorLoc, const Twine &Msg);
bool popType(SMLoc ErrorLoc, Optional<wasm::ValType> EVT);
@@ -57,8 +51,16 @@ public:
void funcDecl(const wasm::WasmSignature &Sig);
void localDecl(const SmallVector<wasm::ValType, 4> &Locals);
void setLastSig(const wasm::WasmSignature &Sig) { LastSig = Sig; }
- void endOfFunction(SMLoc ErrorLoc);
+ bool endOfFunction(SMLoc ErrorLoc);
bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst);
+
+ void Clear() {
+ Stack.clear();
+ LocalTypes.clear();
+ ReturnTypes.clear();
+ TypeErrorThisFunction = false;
+ Unreachable = false;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 6770ccc9df6a..2e1e4f061219 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -24,9 +24,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index d88311197c1a..85bb52c03e80 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -59,7 +59,8 @@ public:
return false;
}
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
const MCFixupKindInfo &
@@ -83,8 +84,8 @@ WebAssemblyAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
return Infos[Kind - FirstTargetFixupKind];
}
-bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS,
- uint64_t Count) const {
+bool WebAssemblyAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
for (uint64_t I = 0; I < Count; ++I)
OS << char(WebAssembly::Nop);
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 31cccb24d798..8f670ec88897 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -19,8 +19,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-mc-target-desc"
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 99defb42e380..d07bfce9abc1 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -95,6 +95,9 @@ enum TOF {
// platforms.
MO_GOT,
+ // Same as MO_GOT but the address stored in the global is a TLS address.
+ MO_GOT_TLS,
+
// On a symbol operand this indicates that the immediate is the symbol
// address relative the __memory_base wasm global.
// Only applicable to data symbols.
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index f67fab946746..405712906c40 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -74,6 +74,7 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(
switch (Modifier) {
case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_WASM_GOT_TLS:
return wasm::R_WASM_GLOBAL_INDEX_LEB;
case MCSymbolRefExpr::VK_WASM_TBREL:
assert(SymA.isFunction());
@@ -88,7 +89,10 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(
: wasm::R_WASM_MEMORY_ADDR_REL_SLEB;
case MCSymbolRefExpr::VK_WASM_TYPEINDEX:
return wasm::R_WASM_TYPE_INDEX_LEB;
+ case MCSymbolRefExpr::VK_None:
+ break;
default:
+ report_fatal_error("unknown VariantKind");
break;
}
diff --git a/llvm/lib/Target/WebAssembly/README.txt b/llvm/lib/Target/WebAssembly/README.txt
index 934a3ba3bc4a..ab1cd8f0f84a 100644
--- a/llvm/lib/Target/WebAssembly/README.txt
+++ b/llvm/lib/Target/WebAssembly/README.txt
@@ -2,11 +2,11 @@
The object format emitted by the WebAssembly backed is documented in:
- * https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ * https://github.com/WebAssembly/tool-conventions/blob/main/Linking.md
The C ABI is described in:
- * https://github.com/WebAssembly/tool-conventions/blob/master/BasicCABI.md
+ * https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md
For more information on WebAssembly itself, see the home page:
@@ -31,8 +31,8 @@ For more information, see:
The following documents contain some information on the semantics and binary
encoding of WebAssembly itself:
- * https://github.com/WebAssembly/design/blob/master/Semantics.md
- * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+ * https://github.com/WebAssembly/design/blob/main/Semantics.md
+ * https://github.com/WebAssembly/design/blob/main/BinaryEncoding.md
Some notes on ways that the generated code could be improved follow:
diff --git a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index f9a96819905f..e3daf6bfa72e 100644
--- a/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/WebAssemblyTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-target-info"
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index 673dc9521ced..f6e96d9b2877 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -15,6 +15,8 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYUTILITIES_H
+#include "llvm/IR/DerivedTypes.h"
+
namespace llvm {
class MachineBasicBlock;
@@ -35,18 +37,35 @@ enum WasmAddressSpace : unsigned {
// linear memory: WebAssembly globals or WebAssembly locals. Loads and stores
// to these pointers are lowered to global.get / global.set or local.get /
// local.set, as appropriate.
- WASM_ADDRESS_SPACE_WASM_VAR = 1
+ WASM_ADDRESS_SPACE_VAR = 1,
+ // A non-integral address space for externref values
+ WASM_ADDRESS_SPACE_EXTERNREF = 10,
+ // A non-integral address space for funcref values
+ WASM_ADDRESS_SPACE_FUNCREF = 20,
};
inline bool isDefaultAddressSpace(unsigned AS) {
return AS == WASM_ADDRESS_SPACE_DEFAULT;
}
inline bool isWasmVarAddressSpace(unsigned AS) {
- return AS == WASM_ADDRESS_SPACE_WASM_VAR;
+ return AS == WASM_ADDRESS_SPACE_VAR;
}
inline bool isValidAddressSpace(unsigned AS) {
return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
}
+inline bool isFuncrefType(const Type *Ty) {
+ return isa<PointerType>(Ty) &&
+ Ty->getPointerAddressSpace() ==
+ WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
+}
+inline bool isExternrefType(const Type *Ty) {
+ return isa<PointerType>(Ty) &&
+ Ty->getPointerAddressSpace() ==
+ WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
+}
+inline bool isRefType(const Type *Ty) {
+ return isFuncrefType(Ty) || isExternrefType(Ty);
+}
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
bool mayThrow(const MachineInstr &MI);
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h
index 9eb960d018d3..803786e0c9c2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -25,12 +25,12 @@ class ModulePass;
class FunctionPass;
// LLVM IR passes.
-ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH,
- bool EnableSjLj);
+ModulePass *createWebAssemblyLowerEmscriptenEHSjLj();
ModulePass *createWebAssemblyLowerGlobalDtors();
ModulePass *createWebAssemblyAddMissingPrototypes();
ModulePass *createWebAssemblyFixFunctionBitcasts();
FunctionPass *createWebAssemblyOptimizeReturned();
+FunctionPass *createWebAssemblyLowerRefTypesIntPtrConv();
// ISel and immediate followup passes.
FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
@@ -56,7 +56,7 @@ FunctionPass *createWebAssemblyLowerBrUnless();
FunctionPass *createWebAssemblyRegNumbering();
FunctionPass *createWebAssemblyDebugFixup();
FunctionPass *createWebAssemblyPeephole();
-FunctionPass *createWebAssemblyMCLowerPrePass();
+ModulePass *createWebAssemblyMCLowerPrePass();
// PassRegistry initialization declarations.
void initializeWebAssemblyAddMissingPrototypesPass(PassRegistry &);
@@ -85,6 +85,7 @@ void initializeWebAssemblyRegNumberingPass(PassRegistry &);
void initializeWebAssemblyDebugFixupPass(PassRegistry &);
void initializeWebAssemblyPeepholePass(PassRegistry &);
void initializeWebAssemblyMCLowerPrePassPass(PassRegistry &);
+void initializeWebAssemblyLowerRefTypesIntPtrConvPass(PassRegistry &);
namespace WebAssembly {
enum TargetIndex {
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.td b/llvm/lib/Target/WebAssembly/WebAssembly.td
index c1872dd91c58..a529c6217189 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -25,6 +25,9 @@ include "llvm/Target/Target.td"
def FeatureSIMD128 : SubtargetFeature<"simd128", "SIMDLevel", "SIMD128",
"Enable 128-bit SIMD">;
+def FeatureRelaxedSIMD : SubtargetFeature<"relaxed-simd", "SIMDLevel", "RelaxedSIMD",
+ "Enable relaxed-simd instructions">;
+
def FeatureAtomics : SubtargetFeature<"atomics", "HasAtomics", "true",
"Enable Atomics">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
index 530a55cda0e5..90e819912847 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp
@@ -86,27 +86,37 @@ bool WebAssemblyAddMissingPrototypes::runOnModule(Module &M) {
F.getName());
}
- // Create a function prototype based on the first call site (first bitcast)
- // that we find.
+ // Find calls of this function, looking through bitcasts.
+ SmallVector<CallBase *> Calls;
+ SmallVector<Value *> Worklist;
+ Worklist.push_back(&F);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ for (User *U : V->users()) {
+ if (auto *BC = dyn_cast<BitCastOperator>(U))
+ Worklist.push_back(BC);
+ else if (auto *CB = dyn_cast<CallBase>(U))
+ if (CB->getCalledOperand() == V)
+ Calls.push_back(CB);
+ }
+ }
+
+ // Create a function prototype based on the first call site that we find.
FunctionType *NewType = nullptr;
- for (Use &U : F.uses()) {
- LLVM_DEBUG(dbgs() << "prototype-less use: " << F.getName() << "\n");
- LLVM_DEBUG(dbgs() << *U.getUser() << "\n");
- if (auto *BC = dyn_cast<BitCastOperator>(U.getUser())) {
- if (auto *DestType = dyn_cast<FunctionType>(
- BC->getDestTy()->getPointerElementType())) {
- if (!NewType) {
- // Create a new function with the correct type
- NewType = DestType;
- LLVM_DEBUG(dbgs() << "found function type: " << *NewType << "\n");
- } else if (NewType != DestType) {
- errs() << "warning: prototype-less function used with "
- "conflicting signatures: "
- << F.getName() << "\n";
- LLVM_DEBUG(dbgs() << " " << *DestType << "\n");
- LLVM_DEBUG(dbgs() << " "<< *NewType << "\n");
- }
- }
+ for (CallBase *CB : Calls) {
+ LLVM_DEBUG(dbgs() << "prototype-less call of " << F.getName() << ":\n");
+ LLVM_DEBUG(dbgs() << *CB << "\n");
+ FunctionType *DestType = CB->getFunctionType();
+ if (!NewType) {
+ // Create a new function with the correct type
+ NewType = DestType;
+ LLVM_DEBUG(dbgs() << "found function type: " << *NewType << "\n");
+ } else if (NewType != DestType) {
+ errs() << "warning: prototype-less function used with "
+ "conflicting signatures: "
+ << F.getName() << "\n";
+ LLVM_DEBUG(dbgs() << " " << *DestType << "\n");
+ LLVM_DEBUG(dbgs() << " " << *NewType << "\n");
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 56829eb45e21..0d3f51693261 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -42,8 +42,8 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolWasm.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -51,8 +51,8 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
extern cl::opt<bool> WasmKeepRegisters;
-extern cl::opt<bool> EnableEmException;
-extern cl::opt<bool> EnableEmSjLj;
+extern cl::opt<bool> WasmEnableEmEH;
+extern cl::opt<bool> WasmEnableEmSjLj;
//===----------------------------------------------------------------------===//
// Helpers.
@@ -161,7 +161,7 @@ MCSymbolWasm *WebAssemblyAsmPrinter::getMCSymbolForFunction(
"Emscripten EH/SjLj does not support multivalue returns: " +
std::string(F->getName()) + ": " +
WebAssembly::signatureToString(Sig);
- report_fatal_error(Msg);
+ report_fatal_error(Twine(Msg));
}
WasmSym = cast<MCSymbolWasm>(
GetExternalSymbolSymbol(getEmscriptenInvokeSymbolName(Sig)));
@@ -234,26 +234,32 @@ MCSymbol *WebAssemblyAsmPrinter::getOrCreateWasmSymbol(StringRef Name) {
return WasmSym;
}
+ if (Name.startswith("GCC_except_table")) {
+ WasmSym->setType(wasm::WASM_SYMBOL_TYPE_DATA);
+ return WasmSym;
+ }
+
SmallVector<wasm::ValType, 4> Returns;
SmallVector<wasm::ValType, 4> Params;
- if (Name == "__cpp_exception") {
+ if (Name == "__cpp_exception" || Name == "__c_longjmp") {
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TAG);
- // We can't confirm its signature index for now because there can be
- // imported exceptions. Set it to be 0 for now.
- WasmSym->setTagType(
- {wasm::WASM_TAG_ATTRIBUTE_EXCEPTION, /* SigIndex */ 0});
- // We may have multiple C++ compilation units to be linked together, each of
- // which defines the exception symbol. To resolve them, we declare them as
- // weak.
- WasmSym->setWeak(true);
+ // In static linking we define tag symbols in WasmException::endModule().
+ // But we may have multiple objects to be linked together, each of which
+ // defines the tag symbols. To resolve them, we declare them as weak. In
+ // dynamic linking we make tag symbols undefined in the backend, define it
+ // in JS, and feed them to each importing module.
+ if (!isPositionIndependent())
+ WasmSym->setWeak(true);
WasmSym->setExternal(true);
- // All C++ exceptions are assumed to have a single i32 (for wasm32) or i64
- // (for wasm64) param type and void return type. The reaon is, all C++
- // exception values are pointers, and to share the type section with
- // functions, exceptions are assumed to have void return type.
- Params.push_back(Subtarget.hasAddr64() ? wasm::ValType::I64
- : wasm::ValType::I32);
+ // Currently both C++ exceptions and C longjmps have a single pointer type
+ // param. For C++ exceptions it is a pointer to an exception object, and for
+ // C longjmps it is pointer to a struct that contains a setjmp buffer and a
+ // longjmp return value. We may consider using multiple value parameters for
+ // longjmps later when multivalue support is ready.
+ wasm::ValType AddrType =
+ Subtarget.hasAddr64() ? wasm::ValType::I64 : wasm::ValType::I32;
+ Params.push_back(AddrType);
} else { // Function symbols
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
getLibcallSignature(Subtarget, Name, Returns, Params);
@@ -309,7 +315,7 @@ void WebAssemblyAsmPrinter::emitExternalDecls(const Module &M) {
// will discard it later if it turns out not to be necessary.
auto Signature = signatureFromMVTs(Results, Params);
bool InvokeDetected = false;
- auto *Sym = getMCSymbolForFunction(&F, EnableEmException || EnableEmSjLj,
+ auto *Sym = getMCSymbolForFunction(&F, WasmEnableEmEH || WasmEnableEmSjLj,
Signature.get(), InvokeDetected);
// Multiple functions can be mapped to the same invoke symbol. For
@@ -497,6 +503,15 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
// This pseudo-feature tells the linker whether shared memory would be safe
EmitFeature("shared-mem");
+ // This is an "architecture", not a "feature", but we emit it as such for
+ // the benefit of tools like Binaryen and consistency with other producers.
+ // FIXME: Subtarget is null here, so can't Subtarget->hasAddr64() ?
+ if (M.getDataLayout().getPointerSize() == 8) {
+ // Can't use EmitFeature since "wasm-feature-memory64" is not a module
+ // flag.
+ EmittedFeatures.push_back({wasm::WASM_FEATURE_PREFIX_USED, "memory64"});
+ }
+
if (EmittedFeatures.size() == 0)
return;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 59d69e48b775..7832f199a2cc 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -173,7 +173,7 @@ static bool explicitlyBranchesTo(MachineBasicBlock *Pred,
// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
// contains instructions that should go before the marker, and AfterSet contains
// ones that should go after the marker. In this function, AfterSet is only
-// used for sanity checking.
+// used for validation checking.
template <typename Container>
static MachineBasicBlock::iterator
getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
@@ -182,7 +182,7 @@ getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
while (InsertPos != MBB->begin()) {
if (BeforeSet.count(&*std::prev(InsertPos))) {
#ifndef NDEBUG
- // Sanity check
+ // Validation check
for (auto Pos = InsertPos, E = MBB->begin(); Pos != E; --Pos)
assert(!AfterSet.count(&*std::prev(Pos)));
#endif
@@ -197,7 +197,7 @@ getEarliestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
// satisfying the restrictions given by BeforeSet and AfterSet. BeforeSet
// contains instructions that should go before the marker, and AfterSet contains
// ones that should go after the marker. In this function, BeforeSet is only
-// used for sanity checking.
+// used for validation checking.
template <typename Container>
static MachineBasicBlock::iterator
getLatestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
@@ -206,7 +206,7 @@ getLatestInsertPos(MachineBasicBlock *MBB, const Container &BeforeSet,
while (InsertPos != MBB->end()) {
if (AfterSet.count(&*InsertPos)) {
#ifndef NDEBUG
- // Sanity check
+ // Validation check
for (auto Pos = InsertPos, E = MBB->end(); Pos != E; ++Pos)
assert(!BeforeSet.count(&*Pos));
#endif
@@ -842,8 +842,7 @@ static void unstackifyVRegsUsedInSplitBB(MachineBasicBlock &MBB,
// INST ..., TeeReg, ...
// INST ..., Reg, ...
// INST ..., Reg, ...
- for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (!WebAssembly::isTee(MI.getOpcode()))
continue;
Register TeeReg = MI.getOperand(0).getReg();
@@ -1671,8 +1670,7 @@ void WebAssemblyCFGStackify::rewriteDepthImmediates(MachineFunction &MF) {
SmallVector<EndMarkerInfo, 8> Stack;
SmallVector<const MachineBasicBlock *, 8> EHPadStack;
for (auto &MBB : reverse(MF)) {
- for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) {
- MachineInstr &MI = *I;
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
switch (MI.getOpcode()) {
case WebAssembly::BLOCK:
case WebAssembly::TRY:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index 4a0738dc3b7a..910a4e5e0d1a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -252,8 +252,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Visit each instruction in the function.
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
- MachineInstr &MI = *I++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
assert(!WebAssembly::isArgument(MI.getOpcode()));
if (MI.isDebugInstr() || MI.isLabel())
@@ -380,9 +379,14 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
Register NewReg = MRI.createVirtualRegister(RC);
unsigned Opc = getLocalGetOpcode(RC);
- InsertPt =
- BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
- .addImm(LocalId);
+ // Use a InsertPt as our DebugLoc, since MI may be discontinuous from
+ // the where this local is being inserted, causing non-linear stepping
+ // in the debugger or function entry points where variables aren't live
+ // yet. Alternative is previous instruction, but that is strictly worse
+ // since it can point at the previous statement.
+ // See crbug.com/1251909, crbug.com/1249745
+ InsertPt = BuildMI(MBB, InsertPt, InsertPt->getDebugLoc(),
+ TII->get(Opc), NewReg).addImm(LocalId);
MO.setReg(NewReg);
MFI.stackifyVReg(MRI, NewReg);
Changed = true;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 171d59ae4c6b..642aa6b4028a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -157,7 +157,7 @@ private:
void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB,
MachineMemOperand *MMO);
unsigned maskI1Value(unsigned Reg, const Value *V);
- unsigned getRegForI1Value(const Value *V, bool &Not);
+ unsigned getRegForI1Value(const Value *V, const BasicBlock *BB, bool &Not);
unsigned zeroExtendToI32(unsigned Reg, const Value *V,
MVT::SimpleValueType From);
unsigned signExtendToI32(unsigned Reg, const Value *V,
@@ -418,20 +418,17 @@ unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) {
return zeroExtendToI32(Reg, V, MVT::i1);
}
-unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
+unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V,
+ const BasicBlock *BB,
+ bool &Not) {
if (const auto *ICmp = dyn_cast<ICmpInst>(V))
if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1)))
- if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) {
+ if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32) &&
+ ICmp->getParent() == BB) {
Not = ICmp->isTrueWhenEqual();
return getRegForValue(ICmp->getOperand(0));
}
- Value *NotV;
- if (match(V, m_Not(m_Value(NotV))) && V->getType()->isIntegerTy(32)) {
- Not = true;
- return getRegForValue(NotV);
- }
-
Not = false;
unsigned Reg = getRegForValue(V);
if (Reg == 0)
@@ -648,11 +645,11 @@ bool WebAssemblyFastISel::fastLowerArguments() {
unsigned I = 0;
for (auto const &Arg : F->args()) {
const AttributeList &Attrs = F->getAttributes();
- if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
- Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
- Attrs.hasParamAttribute(I, Attribute::Nest))
+ if (Attrs.hasParamAttr(I, Attribute::ByVal) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftError) ||
+ Attrs.hasParamAttr(I, Attribute::InAlloca) ||
+ Attrs.hasParamAttr(I, Attribute::Nest))
return false;
Type *ArgTy = Arg.getType();
@@ -825,25 +822,25 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
}
SmallVector<unsigned, 8> Args;
- for (unsigned I = 0, E = Call->getNumArgOperands(); I < E; ++I) {
+ for (unsigned I = 0, E = Call->arg_size(); I < E; ++I) {
Value *V = Call->getArgOperand(I);
MVT::SimpleValueType ArgTy = getSimpleType(V->getType());
if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
return false;
const AttributeList &Attrs = Call->getAttributes();
- if (Attrs.hasParamAttribute(I, Attribute::ByVal) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftSelf) ||
- Attrs.hasParamAttribute(I, Attribute::SwiftError) ||
- Attrs.hasParamAttribute(I, Attribute::InAlloca) ||
- Attrs.hasParamAttribute(I, Attribute::Nest))
+ if (Attrs.hasParamAttr(I, Attribute::ByVal) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftSelf) ||
+ Attrs.hasParamAttr(I, Attribute::SwiftError) ||
+ Attrs.hasParamAttr(I, Attribute::InAlloca) ||
+ Attrs.hasParamAttr(I, Attribute::Nest))
return false;
unsigned Reg;
- if (Attrs.hasParamAttribute(I, Attribute::SExt))
+ if (Attrs.hasParamAttr(I, Attribute::SExt))
Reg = getRegForSignedValue(V);
- else if (Attrs.hasParamAttribute(I, Attribute::ZExt))
+ else if (Attrs.hasParamAttr(I, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
@@ -912,7 +909,8 @@ bool WebAssemblyFastISel::selectSelect(const Instruction *I) {
const auto *Select = cast<SelectInst>(I);
bool Not;
- unsigned CondReg = getRegForI1Value(Select->getCondition(), Not);
+ unsigned CondReg =
+ getRegForI1Value(Select->getCondition(), I->getParent(), Not);
if (CondReg == 0)
return false;
@@ -1312,7 +1310,7 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) {
MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)];
bool Not;
- unsigned CondReg = getRegForI1Value(Br->getCondition(), Not);
+ unsigned CondReg = getRegForI1Value(Br->getCondition(), Br->getParent(), Not);
if (CondReg == 0)
return false;
@@ -1370,9 +1368,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
}
unsigned Reg;
- if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::SExt))
+ if (FuncInfo.Fn->getAttributes().hasRetAttr(Attribute::SExt))
Reg = getRegForSignedValue(RV);
- else if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::ZExt))
+ else if (FuncInfo.Fn->getAttributes().hasRetAttr(Attribute::ZExt))
Reg = getRegForUnsignedValue(RV);
else
Reg = getRegForValue(RV);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
index 52aa3534c78e..5bdec89f1125 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp
@@ -61,9 +61,13 @@ void fixBrTableIndex(MachineInstr &MI, MachineBasicBlock *MBB,
auto ExtMI = MF.getRegInfo().getVRegDef(MI.getOperand(0).getReg());
if (ExtMI->getOpcode() == WebAssembly::I64_EXTEND_U_I32) {
// Unnecessarily extending a 32-bit value to 64, remove it.
- assert(MI.getOperand(0).getReg() == ExtMI->getOperand(0).getReg());
+ auto ExtDefReg = ExtMI->getOperand(0).getReg();
+ assert(MI.getOperand(0).getReg() == ExtDefReg);
MI.getOperand(0).setReg(ExtMI->getOperand(1).getReg());
- ExtMI->eraseFromParent();
+ if (MF.getRegInfo().use_nodbg_empty(ExtDefReg)) {
+ // No more users of extend, delete it.
+ ExtMI->eraseFromParent();
+ }
} else {
// Incoming 64-bit value that needs to be truncated.
Register Reg32 =
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
index 7abb6fa8905c..2a4349e02f1b 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFixFunctionBitcasts.cpp
@@ -64,29 +64,21 @@ ModulePass *llvm::createWebAssemblyFixFunctionBitcasts() {
// Recursively descend the def-use lists from V to find non-bitcast users of
// bitcasts of V.
static void findUses(Value *V, Function &F,
- SmallVectorImpl<std::pair<Use *, Function *>> &Uses,
- SmallPtrSetImpl<Constant *> &ConstantBCs) {
- for (Use &U : V->uses()) {
- if (auto *BC = dyn_cast<BitCastOperator>(U.getUser()))
- findUses(BC, F, Uses, ConstantBCs);
- else if (auto *A = dyn_cast<GlobalAlias>(U.getUser()))
- findUses(A, F, Uses, ConstantBCs);
- else if (U.get()->getType() != F.getType()) {
- CallBase *CB = dyn_cast<CallBase>(U.getUser());
- if (!CB)
- // Skip uses that aren't immediately called
- continue;
+ SmallVectorImpl<std::pair<CallBase *, Function *>> &Uses) {
+ for (User *U : V->users()) {
+ if (auto *BC = dyn_cast<BitCastOperator>(U))
+ findUses(BC, F, Uses);
+ else if (auto *A = dyn_cast<GlobalAlias>(U))
+ findUses(A, F, Uses);
+ else if (auto *CB = dyn_cast<CallBase>(U)) {
Value *Callee = CB->getCalledOperand();
if (Callee != V)
// Skip calls where the function isn't the callee
continue;
- if (isa<Constant>(U.get())) {
- // Only add constant bitcasts to the list once; they get RAUW'd
- auto C = ConstantBCs.insert(cast<Constant>(U.get()));
- if (!C.second)
- continue;
- }
- Uses.push_back(std::make_pair(&U, &F));
+ if (CB->getFunctionType() == F.getValueType())
+ // Skip uses that are immediately called
+ continue;
+ Uses.push_back(std::make_pair(CB, &F));
}
}
}
@@ -238,8 +230,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
Function *Main = nullptr;
CallInst *CallMain = nullptr;
- SmallVector<std::pair<Use *, Function *>, 0> Uses;
- SmallPtrSet<Constant *, 2> ConstantBCs;
+ SmallVector<std::pair<CallBase *, Function *>, 0> Uses;
// Collect all the places that need wrappers.
for (Function &F : M) {
@@ -247,7 +238,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
// bitcast type difference for swiftself and swifterror.
if (F.getCallingConv() == CallingConv::Swift)
continue;
- findUses(&F, F, Uses, ConstantBCs);
+ findUses(&F, F, Uses);
// If we have a "main" function, and its type isn't
// "int main(int argc, char *argv[])", create an artificial call with it
@@ -268,8 +259,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
Value *Casted =
ConstantExpr::getBitCast(Main, PointerType::get(MainTy, 0));
CallMain = CallInst::Create(MainTy, Casted, Args, "call_main");
- Use *UseMain = &CallMain->getOperandUse(2);
- Uses.push_back(std::make_pair(UseMain, &F));
+ Uses.push_back(std::make_pair(CallMain, &F));
}
}
}
@@ -277,16 +267,9 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
DenseMap<std::pair<Function *, FunctionType *>, Function *> Wrappers;
for (auto &UseFunc : Uses) {
- Use *U = UseFunc.first;
+ CallBase *CB = UseFunc.first;
Function *F = UseFunc.second;
- auto *PTy = cast<PointerType>(U->get()->getType());
- auto *Ty = dyn_cast<FunctionType>(PTy->getElementType());
-
- // If the function is casted to something like i8* as a "generic pointer"
- // to be later casted to something else, we can't generate a wrapper for it.
- // Just ignore such casts for now.
- if (!Ty)
- continue;
+ FunctionType *Ty = CB->getFunctionType();
auto Pair = Wrappers.insert(std::make_pair(std::make_pair(F, Ty), nullptr));
if (Pair.second)
@@ -296,10 +279,7 @@ bool FixFunctionBitcasts::runOnModule(Module &M) {
if (!Wrapper)
continue;
- if (isa<Constant>(U->get()))
- U->get()->replaceAllUsesWith(Wrapper);
- else
- U->set(Wrapper);
+ CB->setCalledOperand(Wrapper);
}
// If we created a wrapper for main, rename the wrapper so that it's the
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
index 21519d6135b7..1fa0ea3867c7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -21,9 +21,9 @@ HANDLE_NODETYPE(LOCAL_GET)
HANDLE_NODETYPE(LOCAL_SET)
// A wrapper node for TargetExternalSymbol, TargetGlobalAddress, and MCSymbol
HANDLE_NODETYPE(Wrapper)
-// A special wapper used in PIC code for __memory_base/__table_base relative
-// access.
-HANDLE_NODETYPE(WrapperPIC)
+// A special node for TargetGlobalAddress used in PIC code for
+// __memory_base/__table_base relative access.
+HANDLE_NODETYPE(WrapperREL)
HANDLE_NODETYPE(BR_IF)
HANDLE_NODETYPE(BR_TABLE)
HANDLE_NODETYPE(SHUFFLE)
@@ -41,8 +41,6 @@ HANDLE_NODETYPE(PROMOTE_LOW)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_S)
HANDLE_NODETYPE(TRUNC_SAT_ZERO_U)
HANDLE_NODETYPE(DEMOTE_ZERO)
-HANDLE_NODETYPE(THROW)
-HANDLE_NODETYPE(CATCH)
HANDLE_NODETYPE(MEMORY_COPY)
HANDLE_NODETYPE(MEMORY_FILL)
@@ -50,4 +48,5 @@ HANDLE_NODETYPE(MEMORY_FILL)
HANDLE_MEM_NODETYPE(LOAD_SPLAT)
HANDLE_MEM_NODETYPE(GLOBAL_GET)
HANDLE_MEM_NODETYPE(GLOBAL_SET)
+HANDLE_MEM_NODETYPE(TABLE_GET)
HANDLE_MEM_NODETYPE(TABLE_SET)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index f4bae59132e6..7e75989d3def 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -17,6 +17,7 @@
#include "WebAssemblyTargetMachine.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h" // To access function attributes.
#include "llvm/IR/IntrinsicsWebAssembly.h"
@@ -24,6 +25,7 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
#define DEBUG_TYPE "wasm-isel"
@@ -48,32 +50,11 @@ public:
return "WebAssembly Instruction Selection";
}
- void checkForInvalidNodes(const Function &F) {
- // This function will check for uses of ptrtoint on reference types and
- // report a fatal error if these are found.
- for (const BasicBlock &BB : F) {
- for (const Instruction &I : BB) {
- if (const PtrToIntInst *PTI = dyn_cast<const PtrToIntInst>(&I)) {
- const Value *V = PTI->getPointerOperand();
- if (WebAssemblyTargetLowering::isFuncrefType(V->getType()) ||
- WebAssemblyTargetLowering::isExternrefType(V->getType()))
- report_fatal_error("ptrtoint not allowed on reference types");
- } else if (const IntToPtrInst *ITP = dyn_cast<const IntToPtrInst>(&I)) {
- if (WebAssemblyTargetLowering::isFuncrefType(ITP->getDestTy()) ||
- WebAssemblyTargetLowering::isExternrefType(ITP->getDestTy()))
- report_fatal_error("inttoptr not allowed on reference types");
- }
- }
- }
- }
-
bool runOnMachineFunction(MachineFunction &MF) override {
LLVM_DEBUG(dbgs() << "********** ISelDAGToDAG **********\n"
"********** Function: "
<< MF.getName() << '\n');
- checkForInvalidNodes(MF.getFunction());
-
Subtarget = &MF.getSubtarget<WebAssemblySubtarget>();
return SelectionDAGISel::runOnMachineFunction(MF);
@@ -85,7 +66,6 @@ public:
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
std::vector<SDValue> &OutOps) override;
- bool SelectExternRefAddr(const SDValue &Addr, const SDValue &Base);
// Include the pieces autogenerated from the target description.
#include "WebAssemblyGenDAGISel.inc"
@@ -107,6 +87,17 @@ void WebAssemblyDAGToDAGISel::PreprocessISelDAG() {
SelectionDAGISel::PreprocessISelDAG();
}
+static SDValue getTagSymNode(int Tag, SelectionDAG *DAG) {
+ assert(Tag == WebAssembly::CPP_EXCEPTION || WebAssembly::C_LONGJMP);
+ auto &MF = DAG->getMachineFunction();
+ const auto &TLI = DAG->getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(DAG->getDataLayout());
+ const char *SymName = Tag == WebAssembly::CPP_EXCEPTION
+ ? MF.createExternalSymbolName("__cpp_exception")
+ : MF.createExternalSymbolName("__c_longjmp");
+ return DAG->getTargetExternalSymbol(SymName, PtrVT);
+}
+
void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@@ -127,8 +118,7 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
if (!MF.getSubtarget<WebAssemblySubtarget>().hasAtomics())
break;
- uint64_t SyncScopeID =
- cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
+ uint64_t SyncScopeID = Node->getConstantOperandVal(2);
MachineSDNode *Fence = nullptr;
switch (SyncScopeID) {
case SyncScope::SingleThread:
@@ -162,7 +152,7 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
}
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
case Intrinsic::wasm_tls_size: {
MachineSDNode *TLSSize = CurDAG->getMachineNode(
@@ -171,6 +161,7 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, TLSSize);
return;
}
+
case Intrinsic::wasm_tls_align: {
MachineSDNode *TLSAlign = CurDAG->getMachineNode(
GlobalGetIns, DL, PtrVT,
@@ -181,8 +172,11 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+
case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ unsigned IntNo = Node->getConstantOperandVal(1);
+ const auto &TLI = CurDAG->getTargetLoweringInfo();
+ MVT PtrVT = TLI.getPointerTy(CurDAG->getDataLayout());
switch (IntNo) {
case Intrinsic::wasm_tls_base: {
MachineSDNode *TLSBase = CurDAG->getMachineNode(
@@ -192,9 +186,48 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, TLSBase);
return;
}
+
+ case Intrinsic::wasm_catch: {
+ int Tag = Node->getConstantOperandVal(2);
+ SDValue SymNode = getTagSymNode(Tag, CurDAG);
+ MachineSDNode *Catch =
+ CurDAG->getMachineNode(WebAssembly::CATCH, DL,
+ {
+ PtrVT, // exception pointer
+ MVT::Other // outchain type
+ },
+ {
+ SymNode, // exception symbol
+ Node->getOperand(0) // inchain
+ });
+ ReplaceNode(Node, Catch);
+ return;
+ }
+ }
+ break;
+ }
+
+ case ISD::INTRINSIC_VOID: {
+ unsigned IntNo = Node->getConstantOperandVal(1);
+ switch (IntNo) {
+ case Intrinsic::wasm_throw: {
+ int Tag = Node->getConstantOperandVal(2);
+ SDValue SymNode = getTagSymNode(Tag, CurDAG);
+ MachineSDNode *Throw =
+ CurDAG->getMachineNode(WebAssembly::THROW, DL,
+ MVT::Other, // outchain type
+ {
+ SymNode, // exception symbol
+ Node->getOperand(3), // thrown value
+ Node->getOperand(0) // inchain
+ });
+ ReplaceNode(Node, Throw);
+ return;
+ }
}
break;
}
+
case WebAssemblyISD::CALL:
case WebAssemblyISD::RET_CALL: {
// CALL has both variable operands and variable results, but ISel only
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 62c53c0051ae..0df8f3e0e09c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
@@ -33,6 +32,7 @@
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
@@ -88,7 +88,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
}
}
if (Subtarget->hasReferenceTypes()) {
- for (auto T : {MVT::externref, MVT::funcref}) {
+ // We need custom load and store lowering for both externref, funcref and
+ // Other. The MVT::Other here represents tables of reference types.
+ for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
setOperationAction(ISD::LOAD, T, Custom);
setOperationAction(ISD::STORE, T, Custom);
}
@@ -213,8 +215,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(ISD::SELECT_CC, T, Expand);
// Expand integer operations supported for scalars but not SIMD
- for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
- ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
+ for (auto Op :
+ {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
setOperationAction(Op, T, Expand);
@@ -223,8 +225,15 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(Op, T, Legal);
- // And we have popcnt for i8x16
+ // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
+ setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
+
+ // Custom lower bit counting operations for other types to scalarize them.
+ for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
+ for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
+ setOperationAction(Op, T, Custom);
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
@@ -303,9 +312,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
}
- // And some truncating stores are legal as well
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
- setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
}
// Don't do anything clever with build_pairs
@@ -338,6 +345,24 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setMinimumJumpTableEntries(2);
}
+MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL,
+ uint32_t AS) const {
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
+ return MVT::externref;
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
+ return MVT::funcref;
+ return TargetLowering::getPointerTy(DL, AS);
+}
+
+MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL,
+ uint32_t AS) const {
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
+ return MVT::externref;
+ if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
+ return MVT::funcref;
+ return TargetLowering::getPointerMemTy(DL, AS);
+}
+
TargetLowering::AtomicExpansionKind
WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// We have wasm instructions for these
@@ -551,7 +576,21 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
if (IsIndirect) {
auto FnPtr = CallParams.getOperand(0);
CallParams.RemoveOperand(0);
- CallParams.addOperand(FnPtr);
+
+ // For funcrefs, call_indirect is done through __funcref_call_table and the
+ // funcref is always installed in slot 0 of the table, therefore instead of having
+ // the function pointer added at the end of the params list, a zero (the index in
+ // __funcref_call_table is added).
+ if (IsFuncrefCall) {
+ Register RegZero =
+ MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
+ MachineInstrBuilder MIBC0 =
+ BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
+
+ BB->insert(CallResults.getIterator(), MIBC0);
+ MachineInstrBuilder(MF, CallParams).addReg(RegZero);
+ } else
+ CallParams.addOperand(FnPtr);
}
for (auto Def : CallResults.defs())
@@ -770,6 +809,13 @@ bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
(ExtT == MVT::v2i64 && MemT == MVT::v2i32);
}
+bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode *GA) const {
+ // Wasm doesn't support function addresses with offsets
+ const GlobalValue *GV = GA->getGlobal();
+ return isa<Function>(GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
+}
+
EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &C,
EVT VT) const {
@@ -823,6 +869,45 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
}
+void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
+ const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
+ const SelectionDAG &DAG, unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = Op.getConstantOperandVal(0);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::wasm_bitmask: {
+ unsigned BitWidth = Known.getBitWidth();
+ EVT VT = Op.getOperand(1).getSimpleValueType();
+ unsigned PossibleBits = VT.getVectorNumElements();
+ APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
+ Known.Zero |= ZeroMask;
+ break;
+ }
+ }
+ }
+ }
+}
+
+TargetLoweringBase::LegalizeTypeAction
+WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
+ if (VT.isFixedLengthVector()) {
+ MVT EltVT = VT.getVectorElementType();
+ // We have legal vector types with these lane types, so widening the
+ // vector would let us use some of the lanes directly without having to
+ // extend or truncate values.
+ if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
+ EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
+ return TypeWidenVector;
+ }
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
//===----------------------------------------------------------------------===//
// WebAssembly Lowering private implementation.
//===----------------------------------------------------------------------===//
@@ -1088,7 +1173,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Lastly, if this is a call to a funcref we need to add an instruction
// table.set to the chain and transform the call.
- if (CLI.CB && isFuncrefType(CLI.CB->getCalledOperand()->getType())) {
+ if (CLI.CB &&
+ WebAssembly::isFuncrefType(CLI.CB->getCalledOperand()->getType())) {
// In the absence of function references proposal where a funcref call is
// lowered to call_ref, using reference types we generate a table.set to set
// the funcref to a special table used solely for this purpose, followed by
@@ -1106,7 +1192,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
MVT::funcref,
// Machine Mem Operand args
- MachinePointerInfo(WasmAddressSpace::FUNCREF),
+ MachinePointerInfo(
+ WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF),
CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
MachineMemOperand::MOStore);
@@ -1325,6 +1412,10 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerLoad(Op, DAG);
case ISD::STORE:
return LowerStore(Op, DAG);
+ case ISD::CTPOP:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ return DAG.UnrollVectorOp(Op.getNode());
}
}
@@ -1344,14 +1435,78 @@ static Optional<unsigned> IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG) {
return WebAssemblyFrameLowering::getLocalForStackObject(MF, FI->getIndex());
}
-bool WebAssemblyTargetLowering::isFuncrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() == WasmAddressSpace::FUNCREF;
+static bool IsWebAssemblyTable(SDValue Op) {
+ const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+ if (GA && WebAssembly::isWasmVarAddressSpace(GA->getAddressSpace())) {
+ const GlobalValue *Value = GA->getGlobal();
+ const Type *Ty = Value->getValueType();
+
+ if (Ty->isArrayTy() && WebAssembly::isRefType(Ty->getArrayElementType()))
+ return true;
+ }
+ return false;
+}
+
+// This function will accept as Op any access to a table, so Op can
+// be the actual table or an offset into the table.
+static bool IsWebAssemblyTableWithOffset(SDValue Op) {
+ if (Op->getOpcode() == ISD::ADD && Op->getNumOperands() == 2)
+ return (Op->getOperand(1).getSimpleValueType() == MVT::i32 &&
+ IsWebAssemblyTableWithOffset(Op->getOperand(0))) ||
+ (Op->getOperand(0).getSimpleValueType() == MVT::i32 &&
+ IsWebAssemblyTableWithOffset(Op->getOperand(1)));
+
+ return IsWebAssemblyTable(Op);
}
-bool WebAssemblyTargetLowering::isExternrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() == WasmAddressSpace::EXTERNREF;
+// Helper for table pattern matching used in LowerStore and LowerLoad
+bool WebAssemblyTargetLowering::MatchTableForLowering(SelectionDAG &DAG,
+ const SDLoc &DL,
+ const SDValue &Base,
+ GlobalAddressSDNode *&GA,
+ SDValue &Idx) const {
+ // We expect the following graph for a load of the form:
+ // table[<var> + <constant offset>]
+ //
+ // Case 1:
+ // externref = load t1
+ // t1: i32 = add t2, i32:<constant offset>
+ // t2: i32 = add tX, table
+ //
+ // This is in some cases simplified to just:
+ // Case 2:
+ // externref = load t1
+ // t1: i32 = add t2, i32:tX
+ //
+ // So, unfortunately we need to check for both cases and if we are in the
+ // first case extract the table GlobalAddressNode and build a new node tY
+ // that's tY: i32 = add i32:<constant offset>, i32:tX
+ //
+ if (IsWebAssemblyTable(Base)) {
+ GA = cast<GlobalAddressSDNode>(Base);
+ Idx = DAG.getConstant(0, DL, MVT::i32);
+ } else {
+ GA = dyn_cast<GlobalAddressSDNode>(Base->getOperand(0));
+ if (GA) {
+ // We are in Case 2 above.
+ Idx = Base->getOperand(1);
+ if (!Idx || GA->getNumValues() != 1 || Idx->getNumValues() != 1)
+ return false;
+ } else {
+ // This might be Case 1 above (or an error)
+ SDValue V = Base->getOperand(0);
+ GA = dyn_cast<GlobalAddressSDNode>(V->getOperand(1));
+
+ if (V->getOpcode() != ISD::ADD || V->getNumOperands() != 2 || !GA)
+ return false;
+
+ SDValue IdxV = DAG.getNode(ISD::ADD, DL, MVT::i32, Base->getOperand(1),
+ V->getOperand(0));
+ Idx = IdxV;
+ }
+ }
+
+ return true;
}
SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
@@ -1362,6 +1517,26 @@ SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
const SDValue &Base = SN->getBasePtr();
const SDValue &Offset = SN->getOffset();
+ if (IsWebAssemblyTableWithOffset(Base)) {
+ if (!Offset->isUndef())
+ report_fatal_error(
+ "unexpected offset when loading from webassembly table", false);
+
+ SDValue Idx;
+ GlobalAddressSDNode *GA;
+
+ if (!MatchTableForLowering(DAG, DL, Base, GA, Idx))
+ report_fatal_error("failed pattern matching for lowering table store",
+ false);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue TableSetOps[] = {SN->getChain(), SDValue(GA, 0), Idx, Value};
+ SDValue TableSet =
+ DAG.getMemIntrinsicNode(WebAssemblyISD::TABLE_SET, DL, Tys, TableSetOps,
+ SN->getMemoryVT(), SN->getMemOperand());
+ return TableSet;
+ }
+
if (IsWebAssemblyGlobal(Base)) {
if (!Offset->isUndef())
report_fatal_error("unexpected offset when storing to webassembly global",
@@ -1394,6 +1569,26 @@ SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
const SDValue &Base = LN->getBasePtr();
const SDValue &Offset = LN->getOffset();
+ if (IsWebAssemblyTableWithOffset(Base)) {
+ if (!Offset->isUndef())
+ report_fatal_error(
+ "unexpected offset when loading from webassembly table", false);
+
+ GlobalAddressSDNode *GA;
+ SDValue Idx;
+
+ if (!MatchTableForLowering(DAG, DL, Base, GA, Idx))
+ report_fatal_error("failed pattern matching for lowering table load",
+ false);
+
+ SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
+ SDValue TableGetOps[] = {LN->getChain(), SDValue(GA, 0), Idx};
+ SDValue TableGet =
+ DAG.getMemIntrinsicNode(WebAssemblyISD::TABLE_GET, DL, Tys, TableGetOps,
+ LN->getMemoryVT(), LN->getMemOperand());
+ return TableGet;
+ }
+
if (IsWebAssemblyGlobal(Base)) {
if (!Offset->isUndef())
report_fatal_error(
@@ -1468,7 +1663,7 @@ SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
return SDValue();
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned Depth = Op.getConstantOperandVal(0);
MakeLibCallOptions CallOptions;
return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
{DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
@@ -1495,7 +1690,6 @@ WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
const auto *GA = cast<GlobalAddressSDNode>(Op);
- MVT PtrVT = getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
@@ -1517,20 +1711,43 @@ WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
false);
}
- auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
- : WebAssembly::GLOBAL_GET_I32;
- const char *BaseName = MF.createExternalSymbolName("__tls_base");
+ auto model = GV->getThreadLocalMode();
- SDValue BaseAddr(
- DAG.getMachineNode(GlobalGet, DL, PtrVT,
- DAG.getTargetExternalSymbol(BaseName, PtrVT)),
- 0);
+ // Unsupported TLS modes
+ assert(model != GlobalValue::NotThreadLocal);
+ assert(model != GlobalValue::InitialExecTLSModel);
+
+ if (model == GlobalValue::LocalExecTLSModel ||
+ model == GlobalValue::LocalDynamicTLSModel ||
+ (model == GlobalValue::GeneralDynamicTLSModel &&
+ getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))) {
+ // For DSO-local TLS variables we use offset from __tls_base
+
+ MVT PtrVT = getPointerTy(DAG.getDataLayout());
+ auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
+ : WebAssembly::GLOBAL_GET_I32;
+ const char *BaseName = MF.createExternalSymbolName("__tls_base");
+
+ SDValue BaseAddr(
+ DAG.getMachineNode(GlobalGet, DL, PtrVT,
+ DAG.getTargetExternalSymbol(BaseName, PtrVT)),
+ 0);
+
+ SDValue TLSOffset = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
+ SDValue SymOffset =
+ DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
+
+ return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
+ }
- SDValue TLSOffset = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
- SDValue SymAddr = DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, TLSOffset);
+ assert(model == GlobalValue::GeneralDynamicTLSModel);
- return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
+ EVT VT = Op.getValueType();
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+ DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
+ GA->getOffset(),
+ WebAssemblyII::MO_GOT_TLS));
}
SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
@@ -1563,14 +1780,13 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
DAG.getTargetExternalSymbol(BaseName, PtrVT));
SDValue SymAddr = DAG.getNode(
- WebAssemblyISD::WrapperPIC, DL, VT,
+ WebAssemblyISD::WrapperREL, DL, VT,
DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
OperandFlags));
return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
- } else {
- OperandFlags = WebAssemblyII::MO_GOT;
}
+ OperandFlags = WebAssemblyII::MO_GOT;
}
return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
@@ -1640,21 +1856,6 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
MachinePointerInfo(SV));
}
-static SDValue getCppExceptionSymNode(SDValue Op, unsigned TagIndex,
- SelectionDAG &DAG) {
- // We only support C++ exceptions for now
- int Tag =
- cast<ConstantSDNode>(Op.getOperand(TagIndex).getNode())->getZExtValue();
- if (Tag != WebAssembly::CPP_EXCEPTION)
- llvm_unreachable("Invalid tag: We only support C++ exceptions for now");
- auto &MF = DAG.getMachineFunction();
- const auto &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
- const char *SymName = MF.createExternalSymbolName("__cpp_exception");
- return DAG.getNode(WebAssemblyISD::Wrapper, SDLoc(Op), PtrVT,
- DAG.getTargetExternalSymbol(SymName, PtrVT));
-}
-
SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1662,10 +1863,10 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
switch (Op.getOpcode()) {
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
- IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ IntNo = Op.getConstantOperandVal(1);
break;
case ISD::INTRINSIC_WO_CHAIN:
- IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ IntNo = Op.getConstantOperandVal(0);
break;
default:
llvm_unreachable("Invalid intrinsic");
@@ -1677,38 +1878,22 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
return SDValue(); // Don't custom lower most intrinsics.
case Intrinsic::wasm_lsda: {
- EVT VT = Op.getValueType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
- auto &Context = MF.getMMI().getContext();
- MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") +
- Twine(MF.getFunctionNumber()));
- return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
- DAG.getMCSymbol(S, PtrVT));
- }
-
- case Intrinsic::wasm_throw: {
- SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
- return DAG.getNode(WebAssemblyISD::THROW, DL,
- MVT::Other, // outchain type
- {
- Op.getOperand(0), // inchain
- SymNode, // exception symbol
- Op.getOperand(3) // thrown value
- });
- }
-
- case Intrinsic::wasm_catch: {
- SDValue SymNode = getCppExceptionSymNode(Op, 2, DAG);
- return DAG.getNode(WebAssemblyISD::CATCH, DL,
- {
- MVT::i32, // outchain type
- MVT::Other // return value
- },
- {
- Op.getOperand(0), // inchain
- SymNode // exception symbol
- });
+ auto PtrVT = getPointerTy(MF.getDataLayout());
+ const char *SymName = MF.createExternalSymbolName(
+ "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
+ if (isPositionIndependent()) {
+ SDValue Node = DAG.getTargetExternalSymbol(
+ SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
+ const char *BaseName = MF.createExternalSymbolName("__memory_base");
+ SDValue BaseAddr =
+ DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
+ DAG.getTargetExternalSymbol(BaseName, PtrVT));
+ SDValue SymAddr =
+ DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
+ return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
+ }
+ SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
+ return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
}
case Intrinsic::wasm_shuffle: {
@@ -1774,8 +1959,76 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
Op.getOperand(1));
}
+static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ if (Op.getValueType() != MVT::v2f64)
+ return SDValue();
+
+ auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
+ unsigned &Index) -> bool {
+ switch (Op.getOpcode()) {
+ case ISD::SINT_TO_FP:
+ Opcode = WebAssemblyISD::CONVERT_LOW_S;
+ break;
+ case ISD::UINT_TO_FP:
+ Opcode = WebAssemblyISD::CONVERT_LOW_U;
+ break;
+ case ISD::FP_EXTEND:
+ Opcode = WebAssemblyISD::PROMOTE_LOW;
+ break;
+ default:
+ return false;
+ }
+
+ auto ExtractVector = Op.getOperand(0);
+ if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return false;
+
+ if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
+ return false;
+
+ SrcVec = ExtractVector.getOperand(0);
+ Index = ExtractVector.getConstantOperandVal(1);
+ return true;
+ };
+
+ unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
+ SDValue LHSSrcVec, RHSSrcVec;
+ if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
+ !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
+ return SDValue();
+
+ if (LHSOpcode != RHSOpcode)
+ return SDValue();
+
+ MVT ExpectedSrcVT;
+ switch (LHSOpcode) {
+ case WebAssemblyISD::CONVERT_LOW_S:
+ case WebAssemblyISD::CONVERT_LOW_U:
+ ExpectedSrcVT = MVT::v4i32;
+ break;
+ case WebAssemblyISD::PROMOTE_LOW:
+ ExpectedSrcVT = MVT::v4f32;
+ break;
+ }
+ if (LHSSrcVec.getValueType() != ExpectedSrcVT)
+ return SDValue();
+
+ auto Src = LHSSrcVec;
+ if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
+ // Shuffle the source vector so that the converted lanes are the low lanes.
+ Src = DAG.getVectorShuffle(
+ ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
+ {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
+ }
+ return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
+}
+
SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
+ if (auto ConvertLow = LowerConvertLow(Op, DAG))
+ return ConvertLow;
+
SDLoc DL(Op);
const EVT VecT = Op.getValueType();
const EVT LaneT = Op.getOperand(0).getValueType();
@@ -1901,12 +2154,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
size_t NumShuffleLanes = 0;
if (ShuffleCounts.size()) {
std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
- ShuffleCounts.erase(std::remove_if(ShuffleCounts.begin(),
- ShuffleCounts.end(),
- [&](const auto &Pair) {
- return Pair.first == ShuffleSrc1;
- }),
- ShuffleCounts.end());
+ llvm::erase_if(ShuffleCounts,
+ [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
}
if (ShuffleCounts.size()) {
size_t AdditionalShuffleLanes;
@@ -1974,7 +2223,23 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SmallVector<SDValue, 16> ConstLanes;
for (const SDValue &Lane : Op->op_values()) {
if (IsConstant(Lane)) {
- ConstLanes.push_back(Lane);
+ // Values may need to be fixed so that they will sign extend to be
+ // within the expected range during ISel. Check whether the value is in
+ // bounds based on the lane bit width and if it is out of bounds, lop
+ // off the extra bits and subtract 2^n to reflect giving the high bit
+ // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
+ // cannot possibly be out of range.
+ auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
+ int64_t Val = Const ? Const->getSExtValue() : 0;
+ uint64_t LaneBits = 128 / Lanes;
+ assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
+ "Unexpected out of bounds negative value");
+ if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
+ auto NewVal = ((uint64_t)Val % (1ll << LaneBits)) - (1ll << LaneBits);
+ ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
+ } else {
+ ConstLanes.push_back(Lane);
+ }
} else if (LaneT.isFloatingPoint()) {
ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
} else {
@@ -2227,120 +2492,6 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
}
static SDValue
-performVectorConvertLowCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- auto &DAG = DCI.DAG;
-
- EVT ResVT = N->getValueType(0);
- if (ResVT != MVT::v2f64)
- return SDValue();
-
- auto GetWasmConversionOp = [](unsigned Op) {
- switch (Op) {
- case ISD::SINT_TO_FP:
- return WebAssemblyISD::CONVERT_LOW_S;
- case ISD::UINT_TO_FP:
- return WebAssemblyISD::CONVERT_LOW_U;
- case ISD::FP_EXTEND:
- return WebAssemblyISD::PROMOTE_LOW;
- }
- llvm_unreachable("unexpected op");
- };
-
- if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- // Combine this:
- //
- // (v2f64 (extract_subvector
- // (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0))
- //
- // into (f64x2.convert_low_i32x4_{s,u} $x).
- //
- // Or this:
- //
- // (v2f64 (extract_subvector
- // (v4f64 (fp_extend (v4f32 $x))), 0))
- //
- // into (f64x2.promote_low_f32x4 $x).
- auto Conversion = N->getOperand(0);
- auto ConversionOp = Conversion.getOpcode();
- MVT ExpectedSourceType;
- switch (ConversionOp) {
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- ExpectedSourceType = MVT::v4i32;
- break;
- case ISD::FP_EXTEND:
- ExpectedSourceType = MVT::v4f32;
- break;
- default:
- return SDValue();
- }
-
- if (Conversion.getValueType() != MVT::v4f64)
- return SDValue();
-
- auto Source = Conversion.getOperand(0);
- if (Source.getValueType() != ExpectedSourceType)
- return SDValue();
-
- auto IndexNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
- return SDValue();
-
- auto Op = GetWasmConversionOp(ConversionOp);
- return DAG.getNode(Op, SDLoc(N), ResVT, Source);
- }
-
- // Combine this:
- //
- // (v2f64 ({s,u}int_to_fp
- // (v2i32 (extract_subvector (v4i32 $x), 0))))
- //
- // into (f64x2.convert_low_i32x4_{s,u} $x).
- //
- // Or this:
- //
- // (v2f64 (fp_extend
- // (v2f32 (extract_subvector (v4f32 $x), 0))))
- //
- // into (f64x2.promote_low_f32x4 $x).
- auto ConversionOp = N->getOpcode();
- MVT ExpectedExtractType;
- MVT ExpectedSourceType;
- switch (ConversionOp) {
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- ExpectedExtractType = MVT::v2i32;
- ExpectedSourceType = MVT::v4i32;
- break;
- case ISD::FP_EXTEND:
- ExpectedExtractType = MVT::v2f32;
- ExpectedSourceType = MVT::v4f32;
- break;
- default:
- llvm_unreachable("unexpected opcode");
- }
-
- auto Extract = N->getOperand(0);
- if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return SDValue();
-
- if (Extract.getValueType() != ExpectedExtractType)
- return SDValue();
-
- auto Source = Extract.getOperand(0);
- if (Source.getValueType() != ExpectedSourceType)
- return SDValue();
-
- auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
- if (IndexNode == nullptr || IndexNode->getZExtValue() != 0)
- return SDValue();
-
- unsigned Op = GetWasmConversionOp(ConversionOp);
- return DAG.getNode(Op, SDLoc(N), ResVT, Source);
-}
-
-static SDValue
performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
auto &DAG = DCI.DAG;
@@ -2470,11 +2621,6 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
return performVectorExtendCombine(N, DCI);
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- case ISD::FP_EXTEND:
- case ISD::EXTRACT_SUBVECTOR:
- return performVectorConvertLowCombine(N, DCI);
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
case ISD::FP_ROUND:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index 5d813fefb96b..f7b460f61dbb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -45,35 +45,8 @@ public:
WebAssemblyTargetLowering(const TargetMachine &TM,
const WebAssemblySubtarget &STI);
- enum WasmAddressSpace : unsigned {
- // WebAssembly uses the following address spaces:
- // AS 0 : is the default address space for values in linear memory
- DEFAULT = 0,
- // AS 1 : is a non-integral address space for global variables
- GLOBAL = 1,
- // AS 10 : is a non-integral address space for externref values
- EXTERNREF = 10,
- // AS 20 : is a non-integral address space for funcref values
- FUNCREF = 20,
- };
-
- MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
- if (AS == WasmAddressSpace::EXTERNREF)
- return MVT::externref;
- if (AS == WasmAddressSpace::FUNCREF)
- return MVT::funcref;
- return TargetLowering::getPointerTy(DL, AS);
- }
- MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override {
- if (AS == WasmAddressSpace::EXTERNREF)
- return MVT::externref;
- if (AS == WasmAddressSpace::FUNCREF)
- return MVT::funcref;
- return TargetLowering::getPointerMemTy(DL, AS);
- }
-
- static bool isFuncrefType(const Type *Ty);
- static bool isExternrefType(const Type *Ty);
+ MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override;
+ MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override;
private:
/// Keep a pointer to the WebAssemblySubtarget around so that we can make the
@@ -102,12 +75,21 @@ private:
bool *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
+ void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
+
+ TargetLoweringBase::LegalizeTypeAction
+ getPreferredVectorAction(MVT VT) const override;
+
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
@@ -154,6 +136,11 @@ private:
SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
+ // Helper for LoadLoad and LowerStore
+ bool MatchTableForLowering(SelectionDAG &DAG, const SDLoc &DL,
+ const SDValue &Base, GlobalAddressSDNode *&GA,
+ SDValue &Idx) const;
+
// Custom DAG combine hooks
SDValue
PerformDAGCombine(SDNode *N,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index 1ee6ae196d02..42183d1645e1 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -114,13 +114,13 @@ def NotifyPatOffsetOnly_A64 :
Requires<[HasAddr64, HasAtomics]>;
def NotifyPatGlobalAddrOffOnly_A32 :
- Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblyWrapper tglobaladdr:$off),
I32:$count)),
(MEMORY_ATOMIC_NOTIFY_A32 0, tglobaladdr:$off, (CONST_I32 0), I32:$count)
>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
def NotifyPatGlobalAddrOffOnly_A64 :
- Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblywrapper tglobaladdr:$off),
+ Pat<(i32 (int_wasm_memory_atomic_notify (WebAssemblyWrapper tglobaladdr:$off),
I32:$count)),
(MEMORY_ATOMIC_NOTIFY_A64 0, tglobaladdr:$off, (CONST_I64 0), I32:$count)
>,
@@ -185,12 +185,12 @@ defm : WaitPatOffsetOnly<i64, int_wasm_memory_atomic_wait64,
"MEMORY_ATOMIC_WAIT64">;
multiclass WaitPatGlobalAddrOffOnly<ValueType ty, Intrinsic kind, string inst> {
- def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
+ def : Pat<(i32 (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp,
I64:$timeout)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
I64:$timeout)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(i32 (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp,
+ def : Pat<(i32 (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp,
I64:$timeout)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
I64:$timeout)>,
@@ -390,10 +390,10 @@ defm : AStorePatOffsetOnly<i32, atomic_store_32, "ATOMIC_STORE_I32">;
defm : AStorePatOffsetOnly<i64, atomic_store_64, "ATOMIC_STORE_I64">;
multiclass AStorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
+ def : Pat<(kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(kind (WebAssemblywrapper tglobaladdr:$off), ty:$val),
+ def : Pat<(kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
@@ -592,10 +592,10 @@ multiclass BinRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
}
multiclass BinRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$val)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$val)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$val)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$val)>,
Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
}
@@ -659,7 +659,7 @@ class sext_bin_rmw_16_64<PatFrag kind> : sext_bin_rmw_8_64<kind>;
// Patterns for various addressing modes for truncating-extending binary RMWs.
multiclass BinRMWTruncExtPattern<
- PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+ PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32,
string inst8_32, string inst16_32, string inst8_64, string inst16_64, string inst32_64> {
// Truncating-extending binary RMWs with no constant offset
defm : BinRMWPatNoOffset<i32, zext_bin_rmw_8_32<rmw_8>, inst8_32>;
@@ -724,27 +724,27 @@ multiclass BinRMWTruncExtPattern<
}
defm : BinRMWTruncExtPattern<
- atomic_load_add_8, atomic_load_add_16, atomic_load_add_32, atomic_load_add_64,
+ atomic_load_add_8, atomic_load_add_16, atomic_load_add_32,
"ATOMIC_RMW8_U_ADD_I32", "ATOMIC_RMW16_U_ADD_I32",
"ATOMIC_RMW8_U_ADD_I64", "ATOMIC_RMW16_U_ADD_I64", "ATOMIC_RMW32_U_ADD_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32, atomic_load_sub_64,
+ atomic_load_sub_8, atomic_load_sub_16, atomic_load_sub_32,
"ATOMIC_RMW8_U_SUB_I32", "ATOMIC_RMW16_U_SUB_I32",
"ATOMIC_RMW8_U_SUB_I64", "ATOMIC_RMW16_U_SUB_I64", "ATOMIC_RMW32_U_SUB_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_and_8, atomic_load_and_16, atomic_load_and_32, atomic_load_and_64,
+ atomic_load_and_8, atomic_load_and_16, atomic_load_and_32,
"ATOMIC_RMW8_U_AND_I32", "ATOMIC_RMW16_U_AND_I32",
"ATOMIC_RMW8_U_AND_I64", "ATOMIC_RMW16_U_AND_I64", "ATOMIC_RMW32_U_AND_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_or_8, atomic_load_or_16, atomic_load_or_32, atomic_load_or_64,
+ atomic_load_or_8, atomic_load_or_16, atomic_load_or_32,
"ATOMIC_RMW8_U_OR_I32", "ATOMIC_RMW16_U_OR_I32",
"ATOMIC_RMW8_U_OR_I64", "ATOMIC_RMW16_U_OR_I64", "ATOMIC_RMW32_U_OR_I64">;
defm : BinRMWTruncExtPattern<
- atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32, atomic_load_xor_64,
+ atomic_load_xor_8, atomic_load_xor_16, atomic_load_xor_32,
"ATOMIC_RMW8_U_XOR_I32", "ATOMIC_RMW16_U_XOR_I32",
"ATOMIC_RMW8_U_XOR_I64", "ATOMIC_RMW16_U_XOR_I64", "ATOMIC_RMW32_U_XOR_I64">;
defm : BinRMWTruncExtPattern<
- atomic_swap_8, atomic_swap_16, atomic_swap_32, atomic_swap_64,
+ atomic_swap_8, atomic_swap_16, atomic_swap_32,
"ATOMIC_RMW8_U_XCHG_I32", "ATOMIC_RMW16_U_XCHG_I32",
"ATOMIC_RMW8_U_XCHG_I64", "ATOMIC_RMW16_U_XCHG_I64",
"ATOMIC_RMW32_U_XCHG_I64">;
@@ -826,11 +826,11 @@ multiclass TerRMWPatOffsetOnly<ValueType ty, PatFrag kind, string inst> {
}
multiclass TerRMWPatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A32) 0, tglobaladdr:$off, (CONST_I32 0), ty:$exp,
ty:$new)>,
Requires<[HasAddr32, HasAtomics, IsNotPIC]>;
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off), ty:$exp, ty:$new)),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off), ty:$exp, ty:$new)),
(!cast<NI>(inst#_A64) 0, tglobaladdr:$off, (CONST_I64 0), ty:$exp,
ty:$new)>,
Requires<[HasAddr64, HasAtomics, IsNotPIC]>;
@@ -895,7 +895,7 @@ class sext_ter_rmw_16_64<PatFrag kind> : sext_ter_rmw_8_64<kind>;
// Patterns for various addressing modes for truncating-extending ternary RMWs.
multiclass TerRMWTruncExtPattern<
- PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32, PatFrag rmw_64,
+ PatFrag rmw_8, PatFrag rmw_16, PatFrag rmw_32,
string inst8_32, string inst16_32, string inst8_64, string inst16_64,
string inst32_64> {
// Truncating-extending ternary RMWs with no constant offset
@@ -961,7 +961,7 @@ multiclass TerRMWTruncExtPattern<
}
defm : TerRMWTruncExtPattern<
- atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
+ atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32,
"ATOMIC_RMW8_U_CMPXCHG_I32", "ATOMIC_RMW16_U_CMPXCHG_I32",
"ATOMIC_RMW8_U_CMPXCHG_I64", "ATOMIC_RMW16_U_CMPXCHG_I64",
"ATOMIC_RMW32_U_CMPXCHG_I64">;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 437b07bf8baf..be6547007aaf 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -130,8 +130,7 @@ let Predicates = [HasExceptionHandling] in {
// Throwing an exception: throw / rethrow
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
defm THROW : I<(outs), (ins tag_op:$tag, variable_ops),
- (outs), (ins tag_op:$tag),
- [(WebAssemblythrow (WebAssemblywrapper texternalsym:$tag))],
+ (outs), (ins tag_op:$tag), [],
"throw \t$tag", "throw \t$tag", 0x08>;
defm RETHROW : NRI<(outs), (ins i32imm:$depth), [], "rethrow \t$depth", 0x09>;
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
@@ -147,14 +146,10 @@ defm END_TRY : NRI<(outs), (ins), [], "end_try", 0x0b>;
// Catching an exception: catch / catch_all
let hasCtrlDep = 1, hasSideEffects = 1 in {
-// Currently 'catch' can only extract an i32, which is sufficient for C++
-// support, but according to the spec 'catch' can extract any number of values
-// based on the tag type.
-defm CATCH : I<(outs I32:$dst), (ins tag_op:$tag),
- (outs), (ins tag_op:$tag),
- [(set I32:$dst,
- (WebAssemblycatch (WebAssemblywrapper texternalsym:$tag)))],
- "catch \t$dst, $tag", "catch \t$tag", 0x07>;
+let variadicOpsAreDefs = 1 in
+defm CATCH : I<(outs), (ins tag_op:$tag, variable_ops),
+ (outs), (ins tag_op:$tag), [],
+ "catch", "catch \t$tag", 0x07>;
defm CATCH_ALL : NRI<(outs), (ins), [], "catch_all", 0x19>;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 15748067f123..ee9247a8bef9 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -26,6 +26,10 @@ def HasSIMD128 :
Predicate<"Subtarget->hasSIMD128()">,
AssemblerPredicate<(all_of FeatureSIMD128), "simd128">;
+def HasRelaxedSIMD :
+ Predicate<"Subtarget->hasRelaxedSIMD()">,
+ AssemblerPredicate<(all_of FeatureRelaxedSIMD), "relaxed-simd">;
+
def HasAtomics :
Predicate<"Subtarget->hasAtomics()">,
AssemblerPredicate<(all_of FeatureAtomics), "atomics">;
@@ -77,10 +81,6 @@ def SDT_WebAssemblyLocalSet : SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>;
def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>;
def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyWrapperPIC : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
- SDTCisPtrTy<0>]>;
-def SDT_WebAssemblyThrow : SDTypeProfile<0, -1, []>;
-def SDT_WebAssemblyCatch : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
def SDT_WebAssemblyGlobalGet : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>;
def SDT_WebAssemblyGlobalSet : SDTypeProfile<0, 2, [SDTCisPtrTy<1>]>;
@@ -102,14 +102,10 @@ def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT",
def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN",
SDT_WebAssemblyReturn,
[SDNPHasChain, SDNPVariadic]>;
-def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
+def WebAssemblyWrapper : SDNode<"WebAssemblyISD::Wrapper",
SDT_WebAssemblyWrapper>;
-def WebAssemblywrapperPIC : SDNode<"WebAssemblyISD::WrapperPIC",
- SDT_WebAssemblyWrapperPIC>;
-def WebAssemblythrow : SDNode<"WebAssemblyISD::THROW", SDT_WebAssemblyThrow,
- [SDNPHasChain, SDNPVariadic]>;
-def WebAssemblycatch : SDNode<"WebAssemblyISD::CATCH", SDT_WebAssemblyCatch,
- [SDNPHasChain, SDNPSideEffect]>;
+def WebAssemblyWrapperREL : SDNode<"WebAssemblyISD::WrapperREL",
+ SDT_WebAssemblyWrapper>;
def WebAssemblyglobal_get :
SDNode<"WebAssemblyISD::GLOBAL_GET", SDT_WebAssemblyGlobalGet,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -348,10 +344,10 @@ multiclass LOCAL<WebAssemblyRegClass rc, Operand global_op> {
} // hasSideEffects = 0
foreach vt = rc.RegTypes in {
def : Pat<(vt (WebAssemblyglobal_get
- (WebAssemblywrapper tglobaladdr:$addr))),
+ (WebAssemblyWrapper tglobaladdr:$addr))),
(!cast<NI>("GLOBAL_GET_" # rc) tglobaladdr:$addr)>;
def : Pat<(WebAssemblyglobal_set
- vt:$src, (WebAssemblywrapper tglobaladdr:$addr)),
+ vt:$src, (WebAssemblyWrapper tglobaladdr:$addr)),
(!cast<NI>("GLOBAL_SET_" # rc) tglobaladdr:$addr, vt:$src)>;
def : Pat<(vt (WebAssemblylocal_get (i32 timm:$local))),
(!cast<NI>("LOCAL_GET_" # rc) timm:$local)>;
@@ -386,38 +382,45 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
"f64.const\t$res, $imm", "f64.const\t$imm", 0x44>;
} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
-def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper tglobaladdr:$addr)),
(CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper tglobaladdr:$addr)),
(CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper tglobaladdr:$addr)),
(GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper tglobaladdr:$addr)),
(GLOBAL_GET_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapperREL tglobaladdr:$addr)),
(CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapperREL tglobaladdr:$addr)),
(CONST_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper tglobaltlsaddr:$addr)),
+def : Pat<(i32 (WebAssemblyWrapperREL tglobaltlsaddr:$addr)),
(CONST_I32 tglobaltlsaddr:$addr)>, Requires<[HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper tglobaltlsaddr:$addr)),
+def : Pat<(i64 (WebAssemblyWrapperREL tglobaltlsaddr:$addr)),
(CONST_I64 tglobaltlsaddr:$addr)>, Requires<[HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper tglobaltlsaddr:$addr)),
+ (GLOBAL_GET_I32 tglobaltlsaddr:$addr)>, Requires<[HasAddr32]>;
+def : Pat<(i64 (WebAssemblyWrapper tglobaltlsaddr:$addr)),
+ (GLOBAL_GET_I64 tglobaltlsaddr:$addr)>, Requires<[HasAddr64]>;
+
+def : Pat<(i32 (WebAssemblyWrapper texternalsym:$addr)),
(GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper texternalsym:$addr)),
(GLOBAL_GET_I64 texternalsym:$addr)>, Requires<[IsPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i32 (WebAssemblyWrapper texternalsym:$addr)),
(CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
-def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)),
+def : Pat<(i64 (WebAssemblyWrapper texternalsym:$addr)),
(CONST_I64 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
-def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
-def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
+def : Pat<(i32 (WebAssemblyWrapperREL texternalsym:$addr)),
+ (CONST_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblyWrapperREL texternalsym:$addr)),
+ (CONST_I64 texternalsym:$addr)>, Requires<[IsPIC, HasAddr64]>;
//===----------------------------------------------------------------------===//
// Additional sets of instructions.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 82f5e985c558..a70f62dde845 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -117,10 +117,10 @@ defm : LoadPatOffsetOnly<f32, load, "LOAD_F32">;
defm : LoadPatOffsetOnly<f64, load, "LOAD_F64">;
multiclass LoadPatGlobalAddrOffOnly<ValueType ty, SDPatternOperator kind, string inst> {
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))),
(!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0))>,
Requires<[IsNotPIC, HasAddr32]>;
- def : Pat<(ty (kind (WebAssemblywrapper tglobaladdr:$off))),
+ def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))),
(!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0))>,
Requires<[IsNotPIC, HasAddr64]>;
}
@@ -313,11 +313,11 @@ defm : StorePatOffsetOnly<f32, store, "STORE_F32">;
defm : StorePatOffsetOnly<f64, store, "STORE_F64">;
multiclass StorePatGlobalAddrOffOnly<ValueType ty, PatFrag kind, string inst> {
- def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)),
(!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0),
ty:$val)>,
Requires<[IsNotPIC, HasAddr32]>;
- def : Pat<(kind ty:$val, (WebAssemblywrapper tglobaladdr:$off)),
+ def : Pat<(kind ty:$val, (WebAssemblyWrapper tglobaladdr:$off)),
(!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0),
ty:$val)>,
Requires<[IsNotPIC, HasAddr64]>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 6429b46673a6..30b99c3a69a9 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -11,17 +11,34 @@
///
//===----------------------------------------------------------------------===//
-// Instructions requiring HasSIMD128 and the simd128 prefix byte
-multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
- list<dag> pattern_r, string asmstr_r = "",
- string asmstr_s = "", bits<32> simdop = -1> {
+// Instructions using the SIMD opcode prefix and requiring one of the SIMD
+// feature predicates.
+multiclass ABSTRACT_SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r,
+ string asmstr_s, bits<32> simdop,
+ Predicate simd_level> {
defm "" : I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r, asmstr_s,
!if(!ge(simdop, 0x100),
!or(0xfd0000, !and(0xffff, simdop)),
!or(0xfd00, !and(0xff, simdop)))>,
- Requires<[HasSIMD128]>;
+ Requires<[simd_level]>;
+}
+
+multiclass SIMD_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> simdop = -1> {
+ defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
+ asmstr_s, simdop, HasSIMD128>;
}
+multiclass RELAXED_I<dag oops_r, dag iops_r, dag oops_s, dag iops_s,
+ list<dag> pattern_r, string asmstr_r = "",
+ string asmstr_s = "", bits<32> simdop = -1> {
+ defm "" : ABSTRACT_SIMD_I<oops_r, iops_r, oops_s, iops_s, pattern_r, asmstr_r,
+ asmstr_s, simdop, HasRelaxedSIMD>;
+}
+
+
defm "" : ARGUMENT<V128, v16i8>;
defm "" : ARGUMENT<V128, v8i16>;
defm "" : ARGUMENT<V128, v4i32>;
@@ -267,6 +284,16 @@ multiclass SIMDLoadZero<Vec vec, bits<32> simdop> {
defm "" : SIMDLoadZero<I32x4, 0x5c>;
defm "" : SIMDLoadZero<I64x2, 0x5d>;
+// Use load_zero to load scalars into vectors as well where possible.
+// TODO: i32, i16, and i8 scalars
+def load_scalar :
+ PatFrag<(ops node:$addr), (scalar_to_vector (i64 (load $addr)))>;
+defm : LoadPatNoOffset<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
+defm : LoadPatImmOff<v2i64, load_scalar, regPlusImm, "LOAD_ZERO_I64x2">;
+defm : LoadPatImmOff<v2i64, load_scalar, or_is_add, "LOAD_ZERO_I64x2">;
+defm : LoadPatOffsetOnly<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
+defm : LoadPatGlobalAddrOffOnly<v2i64, load_scalar, "LOAD_ZERO_I64x2">;
+
// TODO: f32x4 and f64x2 as well
foreach vec = [I32x4, I64x2] in {
defvar inst = "LOAD_ZERO_"#vec;
@@ -1165,6 +1192,16 @@ def : Pat<(vec.int_vt (vselect
(pmax $lhs, $rhs)>;
}
+// And match the pmin/pmax LLVM intrinsics as well
+def : Pat<(v4f32 (int_wasm_pmin (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (PMIN_F32x4 V128:$lhs, V128:$rhs)>;
+def : Pat<(v4f32 (int_wasm_pmax (v4f32 V128:$lhs), (v4f32 V128:$rhs))),
+ (PMAX_F32x4 V128:$lhs, V128:$rhs)>;
+def : Pat<(v2f64 (int_wasm_pmin (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (PMIN_F64x2 V128:$lhs, V128:$rhs)>;
+def : Pat<(v2f64 (int_wasm_pmax (v2f64 V128:$lhs), (v2f64 V128:$rhs))),
+ (PMAX_F64x2 V128:$lhs, V128:$rhs)>;
+
//===----------------------------------------------------------------------===//
// Conversions
//===----------------------------------------------------------------------===//
@@ -1241,87 +1278,6 @@ multiclass SIMDNarrow<Vec vec, bits<32> baseInst> {
defm "" : SIMDNarrow<I16x8, 101>;
defm "" : SIMDNarrow<I32x4, 133>;
-// Use narrowing operations for truncating stores. Since the narrowing
-// operations are saturating instead of truncating, we need to mask
-// the stored values first.
-def store_v8i8_trunc_v8i16 :
- OutPatFrag<(ops node:$val),
- (EXTRACT_LANE_I64x2
- (NARROW_U_I8x16
- (AND
- (CONST_V128_I16x8
- 0x00ff, 0x00ff, 0x00ff, 0x00ff,
- 0x00ff, 0x00ff, 0x00ff, 0x00ff),
- node:$val),
- $val), // Unused input
- 0)>;
-
-def store_v4i16_trunc_v4i32 :
- OutPatFrag<(ops node:$val),
- (EXTRACT_LANE_I64x2
- (NARROW_U_I16x8
- (AND
- (CONST_V128_I32x4
- 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff),
- node:$val),
- $val), // Unused input
- 0)>;
-
-// Store patterns adapted from WebAssemblyInstrMemory.td
-multiclass NarrowingStorePatNoOffset<Vec vec, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, I32:$addr),
- (STORE_I64_A32 0, 0, $addr, (out $val))>,
- Requires<[HasAddr32]>;
- def : Pat<(node vec.vt:$val, I64:$addr),
- (STORE_I64_A64 0, 0, $addr, (out $val))>,
- Requires<[HasAddr64]>;
-}
-
-defm : NarrowingStorePatNoOffset<I16x8, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatNoOffset<I32x4, store_v4i16_trunc_v4i32>;
-
-multiclass NarrowingStorePatImmOff<Vec vec, PatFrag operand, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, (operand I32:$addr, imm:$off)),
- (STORE_I64_A32 0, imm:$off, $addr, (out $val))>,
- Requires<[HasAddr32]>;
- def : Pat<(node vec.vt:$val, (operand I64:$addr, imm:$off)),
- (STORE_I64_A64 0, imm:$off, $addr, (out $val))>,
- Requires<[HasAddr64]>;
-}
-
-defm : NarrowingStorePatImmOff<I16x8, regPlusImm, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatImmOff<I32x4, regPlusImm, store_v4i16_trunc_v4i32>;
-defm : NarrowingStorePatImmOff<I16x8, or_is_add, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatImmOff<I32x4, or_is_add, store_v4i16_trunc_v4i32>;
-
-multiclass NarrowingStorePatOffsetOnly<Vec vec, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, imm:$off),
- (STORE_I64_A32 0, imm:$off, (CONST_I32 0), (out $val))>,
- Requires<[HasAddr32]>;
- def : Pat<(node vec.vt:$val, imm:$off),
- (STORE_I64_A64 0, imm:$off, (CONST_I64 0), (out $val))>,
- Requires<[HasAddr64]>;
-}
-
-defm : NarrowingStorePatOffsetOnly<I16x8, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatOffsetOnly<I32x4, store_v4i16_trunc_v4i32>;
-
-multiclass NarrowingStorePatGlobalAddrOffOnly<Vec vec, OutPatFrag out> {
- defvar node = !cast<PatFrag>("truncstorevi"#vec.split.lane_bits);
- def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)),
- (STORE_I64_A32 0, tglobaladdr:$off, (CONST_I32 0), (out $val))>,
- Requires<[IsNotPIC, HasAddr32]>;
- def : Pat<(node vec.vt:$val, (WebAssemblywrapper tglobaladdr:$off)),
- (STORE_I64_A64 0, tglobaladdr:$off, (CONST_I64 0), (out $val))>,
- Requires<[IsNotPIC, HasAddr64]>;
-}
-
-defm : NarrowingStorePatGlobalAddrOffOnly<I16x8, store_v8i8_trunc_v8i16>;
-defm : NarrowingStorePatGlobalAddrOffOnly<I32x4, store_v4i16_trunc_v4i32>;
-
// Bitcasts are nops
// Matching bitcast t1 to t1 causes strange errors, so avoid repeating types
foreach t1 = AllVecs in
@@ -1349,9 +1305,107 @@ def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>;
defm "" : SIMDConvert<F64x2, F32x4, promote_low, "promote_low_f32x4", 0x5f>;
+// Lower extending loads to load64_zero + promote_low
+def extloadv2f32 : PatFrag<(ops node:$ptr), (extload node:$ptr)> {
+ let MemoryVT = v2f32;
+}
+// Adapted from the body of LoadPatNoOffset
+// TODO: other addressing patterns
+def : Pat<(v2f64 (extloadv2f32 (i32 I32:$addr))),
+ (promote_low_F64x2 (LOAD_ZERO_I64x2_A32 0, 0, I32:$addr))>,
+ Requires<[HasAddr32]>;
+def : Pat<(v2f64 (extloadv2f32 (i64 I64:$addr))),
+ (promote_low_F64x2 (LOAD_ZERO_I64x2_A64 0, 0, I64:$addr))>,
+ Requires<[HasAddr64]>;
+
//===----------------------------------------------------------------------===//
// Saturating Rounding Q-Format Multiplication
//===----------------------------------------------------------------------===//
defm Q15MULR_SAT_S :
SIMDBinary<I16x8, int_wasm_q15mulr_sat_signed, "q15mulr_sat_s", 0x82>;
+
+//===----------------------------------------------------------------------===//
+// Fused Multiply- Add and Subtract (FMA/FMS)
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDFM<Vec vec, bits<32> simdopA, bits<32> simdopS> {
+ defm FMA_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_fma
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".fma\t$dst, $a, $b, $c", vec.prefix#".fma", simdopA>;
+ defm FMS_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_fms
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".fms\t$dst, $a, $b, $c", vec.prefix#".fms", simdopS>;
+}
+
+defm "" : SIMDFM<F32x4, 0xaf, 0xb0>;
+defm "" : SIMDFM<F64x2, 0xcf, 0xd0>;
+
+//===----------------------------------------------------------------------===//
+// Laneselect
+//===----------------------------------------------------------------------===//
+
+multiclass SIMDLANESELECT<Vec vec, bits<32> op> {
+ defm LANESELECT_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_laneselect
+ (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
+ vec.prefix#".laneselect\t$dst, $a, $b, $c", vec.prefix#".laneselect", op>;
+}
+
+defm "" : SIMDLANESELECT<I8x16, 0xb2>;
+defm "" : SIMDLANESELECT<I16x8, 0xb3>;
+defm "" : SIMDLANESELECT<I32x4, 0xd2>;
+defm "" : SIMDLANESELECT<I64x2, 0xd3>;
+
+
+//===----------------------------------------------------------------------===//
+// Relaxed swizzle
+//===----------------------------------------------------------------------===//
+
+defm RELAXED_SWIZZLE :
+ RELAXED_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
+ [(set (v16i8 V128:$dst),
+ (int_wasm_relaxed_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
+ "i8x16.relaxed_swizzle\t$dst, $src, $mask", "i8x16.relaxed_swizzle", 162>;
+
+//===----------------------------------------------------------------------===//
+// Relaxed floating-point min and max.
+//===----------------------------------------------------------------------===//
+
+multiclass SIMD_RELAXED_FMINMAX<Vec vec, bits<32> simdopMin, bits<32> simdopMax> {
+ defm RELAXED_FMIN_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_relaxed_min
+ (vec.vt V128:$a), (vec.vt V128:$b)))],
+ vec.prefix#".relaxed_min\t$dst, $a, $b", vec.prefix#".relaxed_min", simdopMin>;
+ defm RELAXED_FMAX_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$a, V128:$b), (outs), (ins),
+ [(set (vec.vt V128:$dst), (int_wasm_relaxed_max
+ (vec.vt V128:$a), (vec.vt V128:$b)))],
+ vec.prefix#".relaxed_max\t$dst, $a, $b", vec.prefix#".relaxed_max", simdopMax>;
+}
+
+defm "" : SIMD_RELAXED_FMINMAX<F32x4, 0xb4, 0xe2>;
+defm "" : SIMD_RELAXED_FMINMAX<F64x2, 0xd4, 0xee>;
+
+//===----------------------------------------------------------------------===//
+// Relaxed floating-point to int conversions
+//===----------------------------------------------------------------------===//
+
+multiclass SIMD_RELAXED_CONVERT<Vec vec, Vec arg, SDPatternOperator op, string name, bits<32> simdop> {
+ defm op#_#vec :
+ RELAXED_I<(outs V128:$dst), (ins V128:$vec), (outs), (ins),
+ [(set (vec.vt V128:$dst), (vec.vt (op (arg.vt V128:$vec))))],
+ vec.prefix#"."#name#"\t$dst, $vec", vec.prefix#"."#name, simdop>;
+}
+
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F32x4, int_wasm_relaxed_trunc_signed, "relaxed_trunc_f32x4_s", 0xa5>;
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F32x4, int_wasm_relaxed_trunc_unsigned, "relaxed_trunc_f32x4_u", 0xa6>;
+
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F64x2, int_wasm_relaxed_trunc_zero_signed, "relaxed_trunc_f64x2_s_zero", 0xc5>;
+defm "" : SIMD_RELAXED_CONVERT<I32x4, F64x2, int_wasm_relaxed_trunc_zero_unsigned, "relaxed_trunc_f64x2_u_zero", 0xc6>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
index 2348bb165daf..e44c2073eaeb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrTable.td
@@ -11,9 +11,18 @@
/// Instructions that handle tables
//===----------------------------------------------------------------------===//
-multiclass TABLE<WebAssemblyRegClass rt> {
+def WebAssemblyTableSet_t : SDTypeProfile<0, 3, [SDTCisPtrTy<1>]>;
+def WebAssemblyTableSet : SDNode<"WebAssemblyISD::TABLE_SET", WebAssemblyTableSet_t,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def WebAssemblyTableGet_t : SDTypeProfile<1, 2, [SDTCisPtrTy<1>]>;
+def WebAssemblyTableGet : SDNode<"WebAssemblyISD::TABLE_GET", WebAssemblyTableGet_t,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+
+multiclass TABLE<WebAssemblyRegClass rc> {
let mayLoad = 1 in
- defm TABLE_GET_#rt : I<(outs rt:$res), (ins table32_op:$table, I32:$i),
+ defm TABLE_GET_#rc : I<(outs rc:$res), (ins table32_op:$table, I32:$i),
(outs), (ins table32_op:$table),
[],
"table.get\t$res, $table, $i",
@@ -21,41 +30,43 @@ multiclass TABLE<WebAssemblyRegClass rt> {
0x25>;
let mayStore = 1 in
- defm TABLE_SET_#rt : I<(outs), (ins table32_op:$table, I32:$i, rt:$val),
+ defm TABLE_SET_#rc : I<(outs), (ins table32_op:$table, I32:$i, rc:$val),
(outs), (ins table32_op:$table),
[],
"table.set\t$table, $i, $val",
"table.set\t$table",
0x26>;
- defm TABLE_GROW_#rt : I<(outs I32:$sz), (ins table32_op:$table, rt:$val, I32:$n),
+ defm TABLE_GROW_#rc : I<(outs I32:$sz), (ins table32_op:$table, rc:$val, I32:$n),
(outs), (ins table32_op:$table),
[],
"table.grow\t$sz, $table, $val, $n",
"table.grow\t$table",
0xfc0f>;
- defm TABLE_FILL_#rt : I<(outs), (ins table32_op:$table, I32:$i, rt:$val, I32:$n),
+ defm TABLE_FILL_#rc : I<(outs), (ins table32_op:$table, I32:$i, rc:$val, I32:$n),
(outs), (ins table32_op:$table),
[],
"table.fill\t$table, $i, $val, $n",
"table.fill\t$table",
0xfc11>;
+ foreach vt = rc.RegTypes in {
+ def : Pat<(vt (WebAssemblyTableGet (WebAssemblyWrapper tglobaladdr:$table), i32:$idx)),
+ (!cast<NI>("TABLE_GET_" # rc) tglobaladdr:$table, i32:$idx)>;
+ def : Pat<(WebAssemblyTableSet
+ (WebAssemblyWrapper tglobaladdr:$table),
+ i32:$idx,
+ vt:$src),
+ (!cast<NI>("TABLE_SET_" # rc) tglobaladdr:$table, i32:$idx, vt:$src)>;
+ }
}
defm "" : TABLE<FUNCREF>, Requires<[HasReferenceTypes]>;
defm "" : TABLE<EXTERNREF>, Requires<[HasReferenceTypes]>;
-def wasm_table_set_t : SDTypeProfile<0, 3, []>;
-def wasm_table_set : SDNode<"WebAssemblyISD::TABLE_SET", wasm_table_set_t,
- [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
-
-def : Pat<(wasm_table_set i32:$table, i32:$idx, funcref:$r),
- (TABLE_SET_FUNCREF i32:$table, i32:$idx, funcref:$r)>,
- Requires<[HasReferenceTypes]>;
-def : Pat<(wasm_table_set i32:$table, i32:$idx, externref:$r),
- (TABLE_SET_EXTERNREF i32:$table, i32:$idx, externref:$r)>,
+def : Pat<(WebAssemblyTableSet mcsym:$table, i32:$idx, funcref:$r),
+ (TABLE_SET_FUNCREF mcsym:$table, i32:$idx, funcref:$r)>,
Requires<[HasReferenceTypes]>;
defm TABLE_SIZE : I<(outs I32:$sz), (ins table32_op:$table),
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index 01b3aa887738..52226206eb32 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -63,12 +63,11 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
auto &MRI = MF.getRegInfo();
for (auto &MBB : MF) {
- for (auto MII = MBB.begin(); MII != MBB.end();) {
- MachineInstr *MI = &*MII++;
- if (MI->getOpcode() != WebAssembly::BR_UNLESS)
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ if (MI.getOpcode() != WebAssembly::BR_UNLESS)
continue;
- Register Cond = MI->getOperand(1).getReg();
+ Register Cond = MI.getOperand(1).getReg();
bool Inverted = false;
// Attempt to invert the condition in place.
@@ -189,7 +188,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
// instruction to invert it.
if (!Inverted) {
Register Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
- BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
.addReg(Cond);
MFI.stackifyVReg(MRI, Tmp);
Cond = Tmp;
@@ -199,10 +198,10 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
// The br_unless condition has now been inverted. Insert a br_if and
// delete the br_unless.
assert(Inverted);
- BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
- .add(MI->getOperand(0))
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII.get(WebAssembly::BR_IF))
+ .add(MI.getOperand(0))
.addReg(Cond);
- MBB.erase(MI);
+ MBB.erase(&MI);
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 599829a9e474..4eacc921b6cd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -7,15 +7,12 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file lowers exception-related instructions and setjmp/longjmp
-/// function calls in order to use Emscripten's JavaScript try and catch
-/// mechanism.
+/// This file lowers exception-related instructions and setjmp/longjmp function
+/// calls to use Emscripten's library functions. The pass uses JavaScript's try
+/// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in
+/// case of Emscripten SjLJ.
///
-/// To handle exceptions and setjmp/longjmps, this scheme relies on JavaScript's
-/// try and catch syntax and relevant exception-related libraries implemented
-/// in JavaScript glue code that will be produced by Emscripten.
-///
-/// * Exception handling
+/// * Emscripten exception handling
/// This pass lowers invokes and landingpads into library functions in JS glue
/// code. Invokes are lowered into function wrappers called invoke wrappers that
/// exist in JS side, which wraps the original function call with JS try-catch.
@@ -23,7 +20,7 @@
/// variables (see below) so we can check whether an exception occurred from
/// wasm code and handle it appropriately.
///
-/// * Setjmp-longjmp handling
+/// * Emscripten setjmp-longjmp handling
/// This pass lowers setjmp to a reasonably-performant approach for emscripten.
/// The idea is that each block with a setjmp is broken up into two parts: the
/// part containing setjmp and the part right after the setjmp. The latter part
@@ -52,7 +49,7 @@
/// __threwValue is 0 for exceptions, and the argument to longjmp in case of
/// longjmp.
///
-/// * Exception handling
+/// * Emscripten exception handling
///
/// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions
/// at link time. setThrew exists in Emscripten's compiler-rt:
@@ -121,16 +118,16 @@
/// call @llvm_eh_typeid_for(type)
/// llvm_eh_typeid_for function will be generated in JS glue code.
///
-/// * Setjmp / Longjmp handling
+/// * Emscripten setjmp / longjmp handling
///
-/// In case calls to longjmp() exists
+/// If there are calls to longjmp()
///
/// 1) Lower
-/// longjmp(buf, value)
+/// longjmp(env, val)
/// into
-/// emscripten_longjmp(buf, value)
+/// emscripten_longjmp(env, val)
///
-/// In case calls to setjmp() exists
+/// If there are calls to setjmp()
///
/// 2) In the function entry that calls setjmp, initialize setjmpTable and
/// sejmpTableSize as follows:
@@ -141,9 +138,9 @@
/// Emscripten compiler-rt.
///
/// 3) Lower
-/// setjmp(buf)
+/// setjmp(env)
/// into
-/// setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
+/// setjmpTable = saveSetjmp(env, label, setjmpTable, setjmpTableSize);
/// setjmpTableSize = getTempRet0();
/// For each dynamic setjmp call, setjmpTable stores its ID (a number which
/// is incrementally assigned from 0) and its label (a unique number that
@@ -151,10 +148,9 @@
/// setjmpTable, it is reallocated in saveSetjmp() in Emscripten's
/// compiler-rt and it will return the new table address, and assign the new
/// table size in setTempRet0(). saveSetjmp also stores the setjmp's ID into
-/// the buffer buf. A BB with setjmp is split into two after setjmp call in
+/// the buffer 'env'. A BB with setjmp is split into two after setjmp call in
/// order to make the post-setjmp BB the possible destination of longjmp BB.
///
-///
/// 4) Lower every call that might longjmp into
/// __THREW__ = 0;
/// call @__invoke_SIG(func, arg1, arg2)
@@ -171,7 +167,7 @@
/// %label = -1;
/// }
/// longjmp_result = getTempRet0();
-/// switch label {
+/// switch %label {
/// label 1: goto post-setjmp BB 1
/// label 2: goto post-setjmp BB 2
/// ...
@@ -188,23 +184,114 @@
/// occurred. Otherwise we jump to the right post-setjmp BB based on the
/// label.
///
+/// * Wasm setjmp / longjmp handling
+/// This mode still uses some Emscripten library functions but not JavaScript's
+/// try-catch mechanism. It instead uses Wasm exception handling intrinsics,
+/// which will be lowered to exception handling instructions.
+///
+/// If there are calls to longjmp()
+///
+/// 1) Lower
+/// longjmp(env, val)
+/// into
+/// __wasm_longjmp(env, val)
+///
+/// If there are calls to setjmp()
+///
+/// 2) and 3): The same as 2) and 3) in Emscripten SjLj.
+/// (setjmpTable/setjmpTableSize initialization + setjmp callsite
+/// transformation)
+///
+/// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value
+/// thrown by __wasm_longjmp function. In Emscripten library, we have this
+/// struct:
+///
+/// struct __WasmLongjmpArgs {
+/// void *env;
+/// int val;
+/// };
+/// struct __WasmLongjmpArgs __wasm_longjmp_args;
+///
+/// The thrown value here is a pointer to __wasm_longjmp_args struct object. We
+/// use this struct to transfer two values by throwing a single value. Wasm
+/// throw and catch instructions are capable of throwing and catching multiple
+/// values, but it also requires multivalue support that is currently not very
+/// reliable.
+/// TODO Switch to throwing and catching two values without using the struct
+///
+/// All longjmpable function calls will be converted to an invoke that will
+/// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we
+/// test the thrown values using testSetjmp function as we do for Emscripten
+/// SjLj. The main difference is, in Emscripten SjLj, we need to transform every
+/// longjmpable callsite into a sequence of code including testSetjmp() call; in
+/// Wasm SjLj we do the testing in only one place, in this catchpad.
+///
+/// After testing calling testSetjmp(), if the longjmp does not correspond to
+/// one of the setjmps within the current function, it rethrows the longjmp
+/// by calling __wasm_longjmp(). If it corresponds to one of setjmps in the
+/// function, we jump to the beginning of the function, which contains a switch
+/// to each post-setjmp BB. Again, in Emscripten SjLj, this switch is added for
+/// every longjmpable callsite; in Wasm SjLj we do this only once at the top of
+/// the function. (after setjmpTable/setjmpTableSize initialization)
+///
+/// The below is the pseudocode for what we have described
+///
+/// entry:
+/// Initialize setjmpTable and setjmpTableSize
+///
+/// setjmp.dispatch:
+/// switch %label {
+/// label 1: goto post-setjmp BB 1
+/// label 2: goto post-setjmp BB 2
+/// ...
+/// default: goto splitted next BB
+/// }
+/// ...
+///
+/// bb:
+/// invoke void @foo() ;; foo is a longjmpable function
+/// to label %next unwind label %catch.dispatch.longjmp
+/// ...
+///
+/// catch.dispatch.longjmp:
+/// %0 = catchswitch within none [label %catch.longjmp] unwind to caller
+///
+/// catch.longjmp:
+/// %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs
+/// %env = load 'env' field from __WasmLongjmpArgs
+/// %val = load 'val' field from __WasmLongjmpArgs
+/// %label = testSetjmp(mem[%env], setjmpTable, setjmpTableSize);
+/// if (%label == 0)
+/// __wasm_longjmp(%env, %val)
+/// catchret to %setjmp.dispatch
+///
///===----------------------------------------------------------------------===//
#include "WebAssembly.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/CodeGen/WasmEHFuncInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-lower-em-ehsjlj"
+// Emscripten's asm.js-style exception handling
+extern cl::opt<bool> WasmEnableEmEH;
+// Emscripten's asm.js-style setjmp/longjmp handling
+extern cl::opt<bool> WasmEnableEmSjLj;
+// Wasm setjmp/longjmp handling using wasm EH instructions
+extern cl::opt<bool> WasmEnableSjLj;
+
static cl::list<std::string>
EHAllowlist("emscripten-cxx-exceptions-allowed",
cl::desc("The list of function names in which Emscripten-style "
@@ -214,19 +301,25 @@ static cl::list<std::string>
namespace {
class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
- bool EnableEH; // Enable exception handling
- bool EnableSjLj; // Enable setjmp/longjmp handling
- bool DoSjLj; // Whether we actually perform setjmp/longjmp handling
-
- GlobalVariable *ThrewGV = nullptr;
- GlobalVariable *ThrewValueGV = nullptr;
- Function *GetTempRet0Func = nullptr;
- Function *SetTempRet0Func = nullptr;
- Function *ResumeF = nullptr;
- Function *EHTypeIDF = nullptr;
- Function *EmLongjmpF = nullptr;
- Function *SaveSetjmpF = nullptr;
- Function *TestSetjmpF = nullptr;
+ bool EnableEmEH; // Enable Emscripten exception handling
+ bool EnableEmSjLj; // Enable Emscripten setjmp/longjmp handling
+ bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling
+ bool DoSjLj; // Whether we actually perform setjmp/longjmp handling
+
+ GlobalVariable *ThrewGV = nullptr; // __THREW__ (Emscripten)
+ GlobalVariable *ThrewValueGV = nullptr; // __threwValue (Emscripten)
+ Function *GetTempRet0F = nullptr; // getTempRet0() (Emscripten)
+ Function *SetTempRet0F = nullptr; // setTempRet0() (Emscripten)
+ Function *ResumeF = nullptr; // __resumeException() (Emscripten)
+ Function *EHTypeIDF = nullptr; // llvm.eh.typeid.for() (intrinsic)
+ Function *EmLongjmpF = nullptr; // emscripten_longjmp() (Emscripten)
+ Function *SaveSetjmpF = nullptr; // saveSetjmp() (Emscripten)
+ Function *TestSetjmpF = nullptr; // testSetjmp() (Emscripten)
+ Function *WasmLongjmpF = nullptr; // __wasm_longjmp() (Emscripten)
+ Function *CatchF = nullptr; // wasm.catch() (intrinsic)
+
+ // type of 'struct __WasmLongjmpArgs' defined in emscripten
+ Type *LongjmpArgsTy = nullptr;
// __cxa_find_matching_catch_N functions.
// Indexed by the number of clauses in an original landingpad instruction.
@@ -242,31 +335,47 @@ class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
return "WebAssembly Lower Emscripten Exceptions";
}
+ using InstVector = SmallVectorImpl<Instruction *>;
bool runEHOnFunction(Function &F);
bool runSjLjOnFunction(Function &F);
+ void handleLongjmpableCallsForEmscriptenSjLj(
+ Function &F, InstVector &SetjmpTableInsts,
+ InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs);
+ void
+ handleLongjmpableCallsForWasmSjLj(Function &F, InstVector &SetjmpTableInsts,
+ InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs);
Function *getFindMatchingCatch(Module &M, unsigned NumClauses);
Value *wrapInvoke(CallBase *CI);
void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw,
Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label,
- Value *&LongjmpResult, BasicBlock *&EndBB);
+ Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB,
+ PHINode *&CallEmLongjmpBBThrewPHI,
+ PHINode *&CallEmLongjmpBBThrewValuePHI,
+ BasicBlock *&EndBB);
Function *getInvokeWrapper(CallBase *CI);
bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); }
- bool canLongjmp(Module &M, const Value *Callee) const;
- bool isEmAsmCall(Module &M, const Value *Callee) const;
bool supportsException(const Function *F) const {
- return EnableEH && (areAllExceptionsAllowed() ||
- EHAllowlistSet.count(std::string(F->getName())));
+ return EnableEmEH && (areAllExceptionsAllowed() ||
+ EHAllowlistSet.count(std::string(F->getName())));
}
+ void replaceLongjmpWith(Function *LongjmpF, Function *NewF);
void rebuildSSA(Function &F);
public:
static char ID;
- WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
- : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj) {
+ WebAssemblyLowerEmscriptenEHSjLj()
+ : ModulePass(ID), EnableEmEH(WasmEnableEmEH),
+ EnableEmSjLj(WasmEnableEmSjLj), EnableWasmSjLj(WasmEnableSjLj) {
+ assert(!(EnableEmSjLj && EnableWasmSjLj) &&
+ "Two SjLj modes cannot be turned on at the same time");
+ assert(!(EnableEmEH && EnableWasmSjLj) &&
+ "Wasm SjLj should be only used with Wasm EH");
EHAllowlistSet.insert(EHAllowlist.begin(), EHAllowlist.end());
}
bool runOnModule(Module &M) override;
@@ -282,9 +391,8 @@ INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE,
"WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp",
false, false)
-ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH,
- bool EnableSjLj) {
- return new WebAssemblyLowerEmscriptenEHSjLj(EnableEH, EnableSjLj);
+ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() {
+ return new WebAssemblyLowerEmscriptenEHSjLj();
}
static bool canThrow(const Value *V) {
@@ -353,12 +461,12 @@ static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name,
if (!F->hasFnAttribute("wasm-import-module")) {
llvm::AttrBuilder B;
B.addAttribute("wasm-import-module", "env");
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
}
if (!F->hasFnAttribute("wasm-import-name")) {
llvm::AttrBuilder B;
B.addAttribute("wasm-import-name", F->getName());
- F->addAttributes(llvm::AttributeList::FunctionIndex, B);
+ F->addFnAttrs(B);
}
return F;
}
@@ -415,15 +523,6 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
Module *M = CI->getModule();
LLVMContext &C = M->getContext();
- // If we are calling a function that is noreturn, we must remove that
- // attribute. The code we insert here does expect it to return, after we
- // catch the exception.
- if (CI->doesNotReturn()) {
- if (auto *F = CI->getCalledFunction())
- F->removeFnAttr(Attribute::NoReturn);
- CI->removeAttribute(AttributeList::FunctionIndex, Attribute::NoReturn);
- }
-
IRBuilder<> IRB(C);
IRB.SetInsertPoint(CI);
@@ -450,10 +549,10 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
// No attributes for the callee pointer.
ArgAttributes.push_back(AttributeSet());
// Copy the argument attributes from the original
- for (unsigned I = 0, E = CI->getNumArgOperands(); I < E; ++I)
- ArgAttributes.push_back(InvokeAL.getParamAttributes(I));
+ for (unsigned I = 0, E = CI->arg_size(); I < E; ++I)
+ ArgAttributes.push_back(InvokeAL.getParamAttrs(I));
- AttrBuilder FnAttrs(InvokeAL.getFnAttributes());
+ AttrBuilder FnAttrs(InvokeAL.getFnAttrs());
if (FnAttrs.contains(Attribute::AllocSize)) {
// The allocsize attribute (if any) referes to parameters by index and needs
// to be adjusted.
@@ -467,9 +566,8 @@ Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) {
}
// Reconstruct the AttributesList based on the vector we constructed.
- AttributeList NewCallAL =
- AttributeList::get(C, AttributeSet::get(C, FnAttrs),
- InvokeAL.getRetAttributes(), ArgAttributes);
+ AttributeList NewCallAL = AttributeList::get(
+ C, AttributeSet::get(C, FnAttrs), InvokeAL.getRetAttrs(), ArgAttributes);
NewCall->setAttributes(NewCallAL);
CI->replaceAllUsesWith(NewCall);
@@ -504,8 +602,7 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) {
return F;
}
-bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
- const Value *Callee) const {
+static bool canLongjmp(const Value *Callee) {
if (auto *CalleeF = dyn_cast<Function>(Callee))
if (CalleeF->isIntrinsic())
return false;
@@ -543,8 +640,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
return true;
}
-bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
- const Value *Callee) const {
+static bool isEmAsmCall(const Value *Callee) {
StringRef CalleeName = Callee->getName();
// This is an exhaustive list from Emscripten's <emscripten/em_asm.h>.
return CalleeName == "emscripten_asm_const_int" ||
@@ -558,7 +654,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
// The code this generates is equivalent to the following JavaScript code:
// %__threwValue.val = __threwValue;
// if (%__THREW__.val != 0 & %__threwValue.val != 0) {
-// %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
+// %label = testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
// if (%label == 0)
// emscripten_longjmp(%__THREW__.val, %__threwValue.val);
// setTempRet0(%__threwValue.val);
@@ -572,7 +668,8 @@ bool WebAssemblyLowerEmscriptenEHSjLj::isEmAsmCall(Module &M,
void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
BasicBlock *BB, DebugLoc DL, Value *Threw, Value *SetjmpTable,
Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult,
- BasicBlock *&EndBB) {
+ BasicBlock *&CallEmLongjmpBB, PHINode *&CallEmLongjmpBBThrewPHI,
+ PHINode *&CallEmLongjmpBBThrewValuePHI, BasicBlock *&EndBB) {
Function *F = BB->getParent();
Module *M = F->getParent();
LLVMContext &C = M->getContext();
@@ -591,10 +688,27 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1");
IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1);
- // %label = _testSetjmp(mem[%__THREW__.val], _setjmpTable, _setjmpTableSize);
+ // Generate call.em.longjmp BB once and share it within the function
+ if (!CallEmLongjmpBB) {
+ // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
+ CallEmLongjmpBB = BasicBlock::Create(C, "call.em.longjmp", F);
+ IRB.SetInsertPoint(CallEmLongjmpBB);
+ CallEmLongjmpBBThrewPHI = IRB.CreatePHI(getAddrIntType(M), 4, "threw.phi");
+ CallEmLongjmpBBThrewValuePHI =
+ IRB.CreatePHI(IRB.getInt32Ty(), 4, "threwvalue.phi");
+ CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1);
+ CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1);
+ IRB.CreateCall(EmLongjmpF,
+ {CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI});
+ IRB.CreateUnreachable();
+ } else {
+ CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1);
+ CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1);
+ }
+
+ // %label = testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
// if (%label == 0)
IRB.SetInsertPoint(ThenBB1);
- BasicBlock *ThenBB2 = BasicBlock::Create(C, "if.then2", F);
BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F);
Value *ThrewPtr =
IRB.CreateIntToPtr(Threw, getAddrPtrType(M), Threw->getName() + ".p");
@@ -603,16 +717,11 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
Value *ThenLabel = IRB.CreateCall(
TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label");
Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0));
- IRB.CreateCondBr(Cmp2, ThenBB2, EndBB2);
-
- // emscripten_longjmp(%__THREW__.val, %__threwValue.val);
- IRB.SetInsertPoint(ThenBB2);
- IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
- IRB.CreateUnreachable();
+ IRB.CreateCondBr(Cmp2, CallEmLongjmpBB, EndBB2);
// setTempRet0(%__threwValue.val);
IRB.SetInsertPoint(EndBB2);
- IRB.CreateCall(SetTempRet0Func, ThrewValue);
+ IRB.CreateCall(SetTempRet0F, ThrewValue);
IRB.CreateBr(EndBB1);
IRB.SetInsertPoint(ElseBB1);
@@ -628,53 +737,67 @@ void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
// Output parameter assignment
Label = LabelPHI;
EndBB = EndBB1;
- LongjmpResult = IRB.CreateCall(GetTempRet0Func, None, "longjmp_result");
+ LongjmpResult = IRB.CreateCall(GetTempRet0F, None, "longjmp_result");
}
void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
DT.recalculate(F); // CFG has been changed
- SSAUpdater SSA;
+
+ SSAUpdaterBulk SSA;
for (BasicBlock &BB : F) {
for (Instruction &I : BB) {
- SSA.Initialize(I.getType(), I.getName());
- SSA.AddAvailableValue(&BB, &I);
- for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
- Use &U = *UI;
- ++UI;
+ unsigned VarID = SSA.AddVariable(I.getName(), I.getType());
+ // If a value is defined by an invoke instruction, it is only available in
+ // its normal destination and not in its unwind destination.
+ if (auto *II = dyn_cast<InvokeInst>(&I))
+ SSA.AddAvailableValue(VarID, II->getNormalDest(), II);
+ else
+ SSA.AddAvailableValue(VarID, &BB, &I);
+ for (auto &U : I.uses()) {
auto *User = cast<Instruction>(U.getUser());
if (auto *UserPN = dyn_cast<PHINode>(User))
if (UserPN->getIncomingBlock(U) == &BB)
continue;
-
if (DT.dominates(&I, User))
continue;
- SSA.RewriteUseAfterInsertions(U);
+ SSA.AddUse(VarID, &U);
}
}
}
+ SSA.RewriteAllUses(&DT);
}
-// Replace uses of longjmp with emscripten_longjmp. emscripten_longjmp takes
-// arguments of type {i32, i32} (wasm32) / {i64, i32} (wasm64) and longjmp takes
-// {jmp_buf*, i32}, so we need a ptrtoint instruction here to make the type
-// match. jmp_buf* will eventually be lowered to i32 in the wasm backend.
-static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF,
- Function *EmLongjmpF) {
+// Replace uses of longjmp with a new longjmp function in Emscripten library.
+// In Emscripten SjLj, the new function is
+// void emscripten_longjmp(uintptr_t, i32)
+// In Wasm SjLj, the new function is
+// void __wasm_longjmp(i8*, i32)
+// Because the original libc longjmp function takes (jmp_buf*, i32), we need a
+// ptrtoint/bitcast instruction here to make the type match. jmp_buf* will
+// eventually be lowered to i32/i64 in the wasm backend.
+void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF,
+ Function *NewF) {
+ assert(NewF == EmLongjmpF || NewF == WasmLongjmpF);
Module *M = LongjmpF->getParent();
SmallVector<CallInst *, 8> ToErase;
LLVMContext &C = LongjmpF->getParent()->getContext();
IRBuilder<> IRB(C);
- // For calls to longjmp, replace it with emscripten_longjmp and cast its first
- // argument (jmp_buf*) to int
+ // For calls to longjmp, replace it with emscripten_longjmp/__wasm_longjmp and
+ // cast its first argument (jmp_buf*) appropriately
for (User *U : LongjmpF->users()) {
auto *CI = dyn_cast<CallInst>(U);
if (CI && CI->getCalledFunction() == LongjmpF) {
IRB.SetInsertPoint(CI);
- Value *Jmpbuf =
- IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "jmpbuf");
- IRB.CreateCall(EmLongjmpF, {Jmpbuf, CI->getArgOperand(1)});
+ Value *Env = nullptr;
+ if (NewF == EmLongjmpF)
+ Env =
+ IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "env");
+ else // WasmLongjmpF
+ Env =
+ IRB.CreateBitCast(CI->getArgOperand(0), IRB.getInt8PtrTy(), "env");
+ IRB.CreateCall(NewF, {Env, CI->getArgOperand(1)});
ToErase.push_back(CI);
}
}
@@ -682,14 +805,23 @@ static void replaceLongjmpWithEmscriptenLongjmp(Function *LongjmpF,
I->eraseFromParent();
// If we have any remaining uses of longjmp's function pointer, replace it
- // with (int(*)(jmp_buf*, int))emscripten_longjmp.
+ // with (void(*)(jmp_buf*, int))emscripten_longjmp / __wasm_longjmp.
if (!LongjmpF->uses().empty()) {
- Value *EmLongjmp =
- IRB.CreateBitCast(EmLongjmpF, LongjmpF->getType(), "em_longjmp");
- LongjmpF->replaceAllUsesWith(EmLongjmp);
+ Value *NewLongjmp =
+ IRB.CreateBitCast(NewF, LongjmpF->getType(), "longjmp.cast");
+ LongjmpF->replaceAllUsesWith(NewLongjmp);
}
}
+static bool containsLongjmpableCalls(const Function *F) {
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (canLongjmp(CB->getCalledOperand()))
+ return true;
+ return false;
+}
+
bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n");
@@ -698,39 +830,60 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
Function *SetjmpF = M.getFunction("setjmp");
Function *LongjmpF = M.getFunction("longjmp");
- bool SetjmpUsed = SetjmpF && !SetjmpF->use_empty();
- bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
- DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
+
+ // In some platforms _setjmp and _longjmp are used instead. Change these to
+ // use setjmp/longjmp instead, because we later detect these functions by
+ // their names.
+ Function *SetjmpF2 = M.getFunction("_setjmp");
+ Function *LongjmpF2 = M.getFunction("_longjmp");
+ if (SetjmpF2) {
+ if (SetjmpF) {
+ if (SetjmpF->getFunctionType() != SetjmpF2->getFunctionType())
+ report_fatal_error("setjmp and _setjmp have different function types");
+ } else {
+ SetjmpF = Function::Create(SetjmpF2->getFunctionType(),
+ GlobalValue::ExternalLinkage, "setjmp", M);
+ }
+ SetjmpF2->replaceAllUsesWith(SetjmpF);
+ }
+ if (LongjmpF2) {
+ if (LongjmpF) {
+ if (LongjmpF->getFunctionType() != LongjmpF2->getFunctionType())
+ report_fatal_error(
+ "longjmp and _longjmp have different function types");
+ } else {
+ LongjmpF = Function::Create(LongjmpF2->getFunctionType(),
+ GlobalValue::ExternalLinkage, "setjmp", M);
+ }
+ LongjmpF2->replaceAllUsesWith(LongjmpF);
+ }
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
assert(TPC && "Expected a TargetPassConfig");
auto &TM = TPC->getTM<WebAssemblyTargetMachine>();
- if (EnableEH && TM.Options.ExceptionModel == ExceptionHandling::Wasm)
- report_fatal_error("-exception-model=wasm not allowed with "
- "-enable-emscripten-cxx-exceptions");
-
// Declare (or get) global variables __THREW__, __threwValue, and
// getTempRet0/setTempRet0 function which are used in common for both
// exception handling and setjmp/longjmp handling
ThrewGV = getGlobalVariable(M, getAddrIntType(&M), TM, "__THREW__");
ThrewValueGV = getGlobalVariable(M, IRB.getInt32Ty(), TM, "__threwValue");
- GetTempRet0Func = getEmscriptenFunction(
+ GetTempRet0F = getEmscriptenFunction(
FunctionType::get(IRB.getInt32Ty(), false), "getTempRet0", &M);
- SetTempRet0Func = getEmscriptenFunction(
+ SetTempRet0F = getEmscriptenFunction(
FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false),
"setTempRet0", &M);
- GetTempRet0Func->setDoesNotThrow();
- SetTempRet0Func->setDoesNotThrow();
+ GetTempRet0F->setDoesNotThrow();
+ SetTempRet0F->setDoesNotThrow();
bool Changed = false;
// Function registration for exception handling
- if (EnableEH) {
+ if (EnableEmEH) {
// Register __resumeException function
FunctionType *ResumeFTy =
FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false);
ResumeF = getEmscriptenFunction(ResumeFTy, "__resumeException", &M);
+ ResumeF->addFnAttr(Attribute::NoReturn);
// Register llvm_eh_typeid_for function
FunctionType *EHTypeIDTy =
@@ -738,20 +891,55 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
EHTypeIDF = getEmscriptenFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M);
}
+ if ((EnableEmSjLj || EnableWasmSjLj) && SetjmpF) {
+ // Precompute setjmp users
+ for (User *U : SetjmpF->users()) {
+ if (auto *CB = dyn_cast<CallBase>(U)) {
+ auto *UserF = CB->getFunction();
+ // If a function that calls setjmp does not contain any other calls that
+ // can longjmp, we don't need to do any transformation on that function,
+ // so can ignore it
+ if (containsLongjmpableCalls(UserF))
+ SetjmpUsers.insert(UserF);
+ } else {
+ std::string S;
+ raw_string_ostream SS(S);
+ SS << *U;
+ report_fatal_error(Twine("Indirect use of setjmp is not supported: ") +
+ SS.str());
+ }
+ }
+ }
+
+ bool SetjmpUsed = SetjmpF && !SetjmpUsers.empty();
+ bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
+ DoSjLj = (EnableEmSjLj | EnableWasmSjLj) && (SetjmpUsed || LongjmpUsed);
+
// Function registration and data pre-gathering for setjmp/longjmp handling
if (DoSjLj) {
- // Register emscripten_longjmp function
- FunctionType *FTy = FunctionType::get(
- IRB.getVoidTy(), {getAddrIntType(&M), IRB.getInt32Ty()}, false);
- EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
+ assert(EnableEmSjLj || EnableWasmSjLj);
+ if (EnableEmSjLj) {
+ // Register emscripten_longjmp function
+ FunctionType *FTy = FunctionType::get(
+ IRB.getVoidTy(), {getAddrIntType(&M), IRB.getInt32Ty()}, false);
+ EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M);
+ EmLongjmpF->addFnAttr(Attribute::NoReturn);
+ } else { // EnableWasmSjLj
+ // Register __wasm_longjmp function, which calls __builtin_wasm_longjmp.
+ FunctionType *FTy = FunctionType::get(
+ IRB.getVoidTy(), {IRB.getInt8PtrTy(), IRB.getInt32Ty()}, false);
+ WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_longjmp", &M);
+ WasmLongjmpF->addFnAttr(Attribute::NoReturn);
+ }
if (SetjmpF) {
// Register saveSetjmp function
FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
- FTy = FunctionType::get(Type::getInt32PtrTy(C),
- {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
- Type::getInt32PtrTy(C), IRB.getInt32Ty()},
- false);
+ FunctionType *FTy =
+ FunctionType::get(Type::getInt32PtrTy(C),
+ {SetjmpFTy->getParamType(0), IRB.getInt32Ty(),
+ Type::getInt32PtrTy(C), IRB.getInt32Ty()},
+ false);
SaveSetjmpF = getEmscriptenFunction(FTy, "saveSetjmp", &M);
// Register testSetjmp function
@@ -761,16 +949,18 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
false);
TestSetjmpF = getEmscriptenFunction(FTy, "testSetjmp", &M);
- // Precompute setjmp users
- for (User *U : SetjmpF->users()) {
- auto *UI = cast<Instruction>(U);
- SetjmpUsers.insert(UI->getFunction());
- }
+ // wasm.catch() will be lowered down to wasm 'catch' instruction in
+ // instruction selection.
+ CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
+ // Type for struct __WasmLongjmpArgs
+ LongjmpArgsTy = StructType::get(IRB.getInt8PtrTy(), // env
+ IRB.getInt32Ty() // val
+ );
}
}
// Exception handling transformation
- if (EnableEH) {
+ if (EnableEmEH) {
for (Function &F : M) {
if (F.isDeclaration())
continue;
@@ -782,7 +972,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
if (DoSjLj) {
Changed = true; // We have setjmp or longjmp somewhere
if (LongjmpF)
- replaceLongjmpWithEmscriptenLongjmp(LongjmpF, EmLongjmpF);
+ replaceLongjmpWith(LongjmpF, EnableEmSjLj ? EmLongjmpF : WasmLongjmpF);
// Only traverse functions that uses setjmp in order not to insert
// unnecessary prep / cleanup code in every function
if (SetjmpF)
@@ -816,6 +1006,12 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
SmallVector<Instruction *, 64> ToErase;
SmallPtrSet<LandingPadInst *, 32> LandingPads;
+ // rethrow.longjmp BB that will be shared within the function.
+ BasicBlock *RethrowLongjmpBB = nullptr;
+ // PHI node for the loaded value of __THREW__ global variable in
+ // rethrow.longjmp BB
+ PHINode *RethrowLongjmpBBThrewPHI = nullptr;
+
for (BasicBlock &BB : F) {
auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
if (!II)
@@ -836,37 +1032,48 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
// setjmp, it will be appropriately handled in runSjLjOnFunction. But even
// if the function does not contain setjmp calls, we shouldn't silently
// ignore longjmps; we should rethrow them so they can be correctly
- // handled in somewhere up the call chain where setjmp is.
- // __THREW__'s value is 0 when nothing happened, 1 when an exception is
- // thrown, other values when longjmp is thrown.
+ // handled in somewhere up the call chain where setjmp is. __THREW__'s
+ // value is 0 when nothing happened, 1 when an exception is thrown, and
+ // other values when longjmp is thrown.
//
// if (%__THREW__.val == 0 || %__THREW__.val == 1)
// goto %tail
// else
// goto %longjmp.rethrow
//
- // longjmp.rethrow: ;; This is longjmp. Rethrow it
+ // rethrow.longjmp: ;; This is longjmp. Rethrow it
// %__threwValue.val = __threwValue
// emscripten_longjmp(%__THREW__.val, %__threwValue.val);
//
// tail: ;; Nothing happened or an exception is thrown
// ... Continue exception handling ...
- if (DoSjLj && !SetjmpUsers.count(&F) && canLongjmp(M, Callee)) {
+ if (DoSjLj && EnableEmSjLj && !SetjmpUsers.count(&F) &&
+ canLongjmp(Callee)) {
+ // Create longjmp.rethrow BB once and share it within the function
+ if (!RethrowLongjmpBB) {
+ RethrowLongjmpBB = BasicBlock::Create(C, "rethrow.longjmp", &F);
+ IRB.SetInsertPoint(RethrowLongjmpBB);
+ RethrowLongjmpBBThrewPHI =
+ IRB.CreatePHI(getAddrIntType(&M), 4, "threw.phi");
+ RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB);
+ Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
+ ThrewValueGV->getName() + ".val");
+ IRB.CreateCall(EmLongjmpF, {RethrowLongjmpBBThrewPHI, ThrewValue});
+ IRB.CreateUnreachable();
+ } else {
+ RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB);
+ }
+
+ IRB.SetInsertPoint(II); // Restore the insert point back
BasicBlock *Tail = BasicBlock::Create(C, "tail", &F);
- BasicBlock *RethrowBB = BasicBlock::Create(C, "longjmp.rethrow", &F);
Value *CmpEqOne =
IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one");
Value *CmpEqZero =
IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 0), "cmp.eq.zero");
Value *Or = IRB.CreateOr(CmpEqZero, CmpEqOne, "or");
- IRB.CreateCondBr(Or, Tail, RethrowBB);
- IRB.SetInsertPoint(RethrowBB);
- Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV,
- ThrewValueGV->getName() + ".val");
- IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
-
- IRB.CreateUnreachable();
+ IRB.CreateCondBr(Or, Tail, RethrowLongjmpBB);
IRB.SetInsertPoint(Tail);
+ BB.replaceSuccessorsPhiUsesWith(&BB, Tail);
}
// Insert a branch based on __THREW__ variable
@@ -961,7 +1168,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
Value *Undef = UndefValue::get(LPI->getType());
Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0");
- Value *TempRet0 = IRB.CreateCall(GetTempRet0Func, None, "tempret0");
+ Value *TempRet0 = IRB.CreateCall(GetTempRet0F, None, "tempret0");
Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
LPI->replaceAllUsesWith(Pair1);
@@ -997,14 +1204,15 @@ static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore,
}
bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
+ assert(EnableEmSjLj || EnableWasmSjLj);
Module &M = *F.getParent();
LLVMContext &C = F.getContext();
IRBuilder<> IRB(C);
SmallVector<Instruction *, 64> ToErase;
// Vector of %setjmpTable values
- std::vector<Instruction *> SetjmpTableInsts;
+ SmallVector<Instruction *, 4> SetjmpTableInsts;
// Vector of %setjmpTableSize values
- std::vector<Instruction *> SetjmpTableSizeInsts;
+ SmallVector<Instruction *, 4> SetjmpTableSizeInsts;
// Setjmp preparation
@@ -1012,11 +1220,13 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// We create this as an instruction intentionally, and we don't want to fold
// this instruction to a constant 4, because this value will be used in
// SSAUpdater.AddAvailableValue(...) later.
- BasicBlock &EntryBB = F.getEntryBlock();
- DebugLoc FirstDL = getOrCreateDebugLoc(&*EntryBB.begin(), F.getSubprogram());
- BinaryOperator *SetjmpTableSize = BinaryOperator::Create(
- Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize",
- &*EntryBB.getFirstInsertionPt());
+ BasicBlock *Entry = &F.getEntryBlock();
+ DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram());
+ SplitBlock(Entry, &*Entry->getFirstInsertionPt());
+
+ BinaryOperator *SetjmpTableSize =
+ BinaryOperator::Create(Instruction::Add, IRB.getInt32(4), IRB.getInt32(0),
+ "setjmpTableSize", Entry->getTerminator());
SetjmpTableSize->setDebugLoc(FirstDL);
// setjmpTable = (int *) malloc(40);
Instruction *SetjmpTable = CallInst::CreateMalloc(
@@ -1036,13 +1246,14 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
SetjmpTableSizeInsts.push_back(SetjmpTableSize);
// Setjmp transformation
- std::vector<PHINode *> SetjmpRetPHIs;
+ SmallVector<PHINode *, 4> SetjmpRetPHIs;
Function *SetjmpF = M.getFunction("setjmp");
for (User *U : SetjmpF->users()) {
auto *CI = dyn_cast<CallInst>(U);
+ // FIXME 'invoke' to setjmp can happen when we use Wasm EH + Wasm SjLj, but
+ // we don't support two being used together yet.
if (!CI)
- report_fatal_error("Does not support indirect calls to setjmp");
-
+ report_fatal_error("Wasm EH + Wasm SjLj is not fully supported yet");
BasicBlock *BB = CI->getParent();
if (BB->getParent() != &F) // in other function
continue;
@@ -1072,14 +1283,136 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Instruction *NewSetjmpTable =
IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
Instruction *NewSetjmpTableSize =
- IRB.CreateCall(GetTempRet0Func, None, "setjmpTableSize");
+ IRB.CreateCall(GetTempRet0F, None, "setjmpTableSize");
SetjmpTableInsts.push_back(NewSetjmpTable);
SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
ToErase.push_back(CI);
}
- // Update each call that can longjmp so it can return to a setjmp where
- // relevant.
+ // Handle longjmpable calls.
+ if (EnableEmSjLj)
+ handleLongjmpableCallsForEmscriptenSjLj(
+ F, SetjmpTableInsts, SetjmpTableSizeInsts, SetjmpRetPHIs);
+ else // EnableWasmSjLj
+ handleLongjmpableCallsForWasmSjLj(F, SetjmpTableInsts, SetjmpTableSizeInsts,
+ SetjmpRetPHIs);
+
+ // Erase everything we no longer need in this function
+ for (Instruction *I : ToErase)
+ I->eraseFromParent();
+
+ // Free setjmpTable buffer before each return instruction + function-exiting
+ // call
+ SmallVector<Instruction *, 16> ExitingInsts;
+ for (BasicBlock &BB : F) {
+ Instruction *TI = BB.getTerminator();
+ if (isa<ReturnInst>(TI))
+ ExitingInsts.push_back(TI);
+ // Any 'call' instruction with 'noreturn' attribute exits the function at
+ // this point. If this throws but unwinds to another EH pad within this
+ // function instead of exiting, this would have been an 'invoke', which
+ // happens if we use Wasm EH or Wasm SjLJ.
+ for (auto &I : BB) {
+ if (auto *CI = dyn_cast<CallInst>(&I)) {
+ bool IsNoReturn = CI->hasFnAttr(Attribute::NoReturn);
+ if (Function *CalleeF = CI->getCalledFunction())
+ IsNoReturn |= CalleeF->hasFnAttribute(Attribute::NoReturn);
+ if (IsNoReturn)
+ ExitingInsts.push_back(&I);
+ }
+ }
+ }
+ for (auto *I : ExitingInsts) {
+ DebugLoc DL = getOrCreateDebugLoc(I, F.getSubprogram());
+ // If this existing instruction is a call within a catchpad, we should add
+ // it as "funclet" to the operand bundle of 'free' call
+ SmallVector<OperandBundleDef, 1> Bundles;
+ if (auto *CB = dyn_cast<CallBase>(I))
+ if (auto Bundle = CB->getOperandBundle(LLVMContext::OB_funclet))
+ Bundles.push_back(OperandBundleDef(*Bundle));
+ auto *Free = CallInst::CreateFree(SetjmpTable, Bundles, I);
+ Free->setDebugLoc(DL);
+ // CallInst::CreateFree may create a bitcast instruction if its argument
+ // types mismatch. We need to set the debug loc for the bitcast too.
+ if (auto *FreeCallI = dyn_cast<CallInst>(Free)) {
+ if (auto *BitCastI = dyn_cast<BitCastInst>(FreeCallI->getArgOperand(0)))
+ BitCastI->setDebugLoc(DL);
+ }
+ }
+
+ // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
+ // (when buffer reallocation occurs)
+ // entry:
+ // setjmpTableSize = 4;
+ // setjmpTable = (int *) malloc(40);
+ // setjmpTable[0] = 0;
+ // ...
+ // somebb:
+ // setjmpTable = saveSetjmp(env, label, setjmpTable, setjmpTableSize);
+ // setjmpTableSize = getTempRet0();
+ // So we need to make sure the SSA for these variables is valid so that every
+ // saveSetjmp and testSetjmp calls have the correct arguments.
+ SSAUpdater SetjmpTableSSA;
+ SSAUpdater SetjmpTableSizeSSA;
+ SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
+ SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
+ for (Instruction *I : SetjmpTableInsts)
+ SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
+ for (Instruction *I : SetjmpTableSizeInsts)
+ SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);
+
+ for (auto &U : make_early_inc_range(SetjmpTable->uses()))
+ if (auto *I = dyn_cast<Instruction>(U.getUser()))
+ if (I->getParent() != Entry)
+ SetjmpTableSSA.RewriteUse(U);
+ for (auto &U : make_early_inc_range(SetjmpTableSize->uses()))
+ if (auto *I = dyn_cast<Instruction>(U.getUser()))
+ if (I->getParent() != Entry)
+ SetjmpTableSizeSSA.RewriteUse(U);
+
+ // Finally, our modifications to the cfg can break dominance of SSA variables.
+ // For example, in this code,
+ // if (x()) { .. setjmp() .. }
+ // if (y()) { .. longjmp() .. }
+ // We must split the longjmp block, and it can jump into the block splitted
+ // from setjmp one. But that means that when we split the setjmp block, it's
+ // first part no longer dominates its second part - there is a theoretically
+ // possible control flow path where x() is false, then y() is true and we
+ // reach the second part of the setjmp block, without ever reaching the first
+ // part. So, we rebuild SSA form here.
+ rebuildSSA(F);
+ return true;
+}
+
+// Update each call that can longjmp so it can return to the corresponding
+// setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the
+// comments at top of the file for details.
+void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj(
+ Function &F, InstVector &SetjmpTableInsts, InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {
+ Module &M = *F.getParent();
+ LLVMContext &C = F.getContext();
+ IRBuilder<> IRB(C);
+ SmallVector<Instruction *, 64> ToErase;
+
+ // We need to pass setjmpTable and setjmpTableSize to testSetjmp function.
+ // These values are defined in the beginning of the function and also in each
+ // setjmp callsite, but we don't know which values we should use at this
+ // point. So here we arbitraily use the ones defined in the beginning of the
+ // function, and SSAUpdater will later update them to the correct values.
+ Instruction *SetjmpTable = *SetjmpTableInsts.begin();
+ Instruction *SetjmpTableSize = *SetjmpTableSizeInsts.begin();
+
+ // call.em.longjmp BB that will be shared within the function.
+ BasicBlock *CallEmLongjmpBB = nullptr;
+ // PHI node for the loaded value of __THREW__ global variable in
+ // call.em.longjmp BB
+ PHINode *CallEmLongjmpBBThrewPHI = nullptr;
+ // PHI node for the loaded value of __threwValue global variable in
+ // call.em.longjmp BB
+ PHINode *CallEmLongjmpBBThrewValuePHI = nullptr;
+ // rethrow.exn BB that will be shared within the function.
+ BasicBlock *RethrowExnBB = nullptr;
// Because we are creating new BBs while processing and don't want to make
// all these newly created BBs candidates again for longjmp processing, we
@@ -1092,15 +1425,18 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
for (unsigned I = 0; I < BBs.size(); I++) {
BasicBlock *BB = BBs[I];
for (Instruction &I : *BB) {
- assert(!isa<InvokeInst>(&I));
+ if (isa<InvokeInst>(&I))
+ report_fatal_error("When using Wasm EH with Emscripten SjLj, there is "
+ "a restriction that `setjmp` function call and "
+ "exception cannot be used within the same function");
auto *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
const Value *Callee = CI->getCalledOperand();
- if (!canLongjmp(M, Callee))
+ if (!canLongjmp(Callee))
continue;
- if (isEmAsmCall(M, Callee))
+ if (isEmAsmCall(Callee))
report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
F.getName() +
". Please consider using EM_JS, or move the "
@@ -1171,19 +1507,26 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
// tail:
// ...
if (supportsException(&F) && canThrow(Callee)) {
- IRB.SetInsertPoint(CI);
// We will add a new conditional branch. So remove the branch created
// when we split the BB
ToErase.push_back(BB->getTerminator());
+
+ // Generate rethrow.exn BB once and share it within the function
+ if (!RethrowExnBB) {
+ RethrowExnBB = BasicBlock::Create(C, "rethrow.exn", &F);
+ IRB.SetInsertPoint(RethrowExnBB);
+ CallInst *Exn =
+ IRB.CreateCall(getFindMatchingCatch(M, 0), {}, "exn");
+ IRB.CreateCall(ResumeF, {Exn});
+ IRB.CreateUnreachable();
+ }
+
+ IRB.SetInsertPoint(CI);
BasicBlock *NormalBB = BasicBlock::Create(C, "normal", &F);
- BasicBlock *RethrowBB = BasicBlock::Create(C, "eh.rethrow", &F);
Value *CmpEqOne =
IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one");
- IRB.CreateCondBr(CmpEqOne, RethrowBB, NormalBB);
- IRB.SetInsertPoint(RethrowBB);
- CallInst *Exn = IRB.CreateCall(getFindMatchingCatch(M, 0), {}, "exn");
- IRB.CreateCall(ResumeF, {Exn});
- IRB.CreateUnreachable();
+ IRB.CreateCondBr(CmpEqOne, RethrowExnBB, NormalBB);
+
IRB.SetInsertPoint(NormalBB);
IRB.CreateBr(Tail);
BB = NormalBB; // New insertion point to insert testSetjmp()
@@ -1202,7 +1545,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
Value *LongjmpResult = nullptr;
BasicBlock *EndBB = nullptr;
wrapTestSetjmp(BB, CI->getDebugLoc(), Threw, SetjmpTable, SetjmpTableSize,
- Label, LongjmpResult, EndBB);
+ Label, LongjmpResult, CallEmLongjmpBB,
+ CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI,
+ EndBB);
assert(Label && LongjmpResult && EndBB);
// Create switch instruction
@@ -1224,76 +1569,184 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
}
}
- // Erase everything we no longer need in this function
for (Instruction *I : ToErase)
I->eraseFromParent();
+}
- // Free setjmpTable buffer before each return instruction
- for (BasicBlock &BB : F) {
- Instruction *TI = BB.getTerminator();
- if (isa<ReturnInst>(TI)) {
- DebugLoc DL = getOrCreateDebugLoc(TI, F.getSubprogram());
- auto *Free = CallInst::CreateFree(SetjmpTable, TI);
- Free->setDebugLoc(DL);
- // CallInst::CreateFree may create a bitcast instruction if its argument
- // types mismatch. We need to set the debug loc for the bitcast too.
- if (auto *FreeCallI = dyn_cast<CallInst>(Free)) {
- if (auto *BitCastI = dyn_cast<BitCastInst>(FreeCallI->getArgOperand(0)))
- BitCastI->setDebugLoc(DL);
- }
- }
+// Create a catchpad in which we catch a longjmp's env and val arguments, test
+// if the longjmp corresponds to one of setjmps in the current function, and if
+// so, jump to the setjmp dispatch BB from which we go to one of post-setjmp
+// BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at
+// top of the file for details.
+void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
+ Function &F, InstVector &SetjmpTableInsts, InstVector &SetjmpTableSizeInsts,
+ SmallVectorImpl<PHINode *> &SetjmpRetPHIs) {
+ Module &M = *F.getParent();
+ LLVMContext &C = F.getContext();
+ IRBuilder<> IRB(C);
+
+ // A function with catchswitch/catchpad instruction should have a personality
+ // function attached to it. Search for the wasm personality function, and if
+ // it exists, use it, and if it doesn't, create a dummy personality function.
+ // (SjLj is not going to call it anyway.)
+ if (!F.hasPersonalityFn()) {
+ StringRef PersName = getEHPersonalityName(EHPersonality::Wasm_CXX);
+ FunctionType *PersType =
+ FunctionType::get(IRB.getInt32Ty(), /* isVarArg */ true);
+ Value *PersF = M.getOrInsertFunction(PersName, PersType).getCallee();
+ F.setPersonalityFn(
+ cast<Constant>(IRB.CreateBitCast(PersF, IRB.getInt8PtrTy())));
}
- // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
- // (when buffer reallocation occurs)
+ // Use the entry BB's debugloc as a fallback
+ BasicBlock *Entry = &F.getEntryBlock();
+ DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram());
+ IRB.SetCurrentDebugLocation(FirstDL);
+
+ // Arbitrarily use the ones defined in the beginning of the function.
+ // SSAUpdater will later update them to the correct values.
+ Instruction *SetjmpTable = *SetjmpTableInsts.begin();
+ Instruction *SetjmpTableSize = *SetjmpTableSizeInsts.begin();
+
+ // Add setjmp.dispatch BB right after the entry block. Because we have
+ // initialized setjmpTable/setjmpTableSize in the entry block and split the
+ // rest into another BB, here 'OrigEntry' is the function's original entry
+ // block before the transformation.
+ //
// entry:
- // setjmpTableSize = 4;
- // setjmpTable = (int *) malloc(40);
- // setjmpTable[0] = 0;
- // ...
- // somebb:
- // setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
- // setjmpTableSize = getTempRet0();
- // So we need to make sure the SSA for these variables is valid so that every
- // saveSetjmp and testSetjmp calls have the correct arguments.
- SSAUpdater SetjmpTableSSA;
- SSAUpdater SetjmpTableSizeSSA;
- SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
- SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
- for (Instruction *I : SetjmpTableInsts)
- SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
- for (Instruction *I : SetjmpTableSizeInsts)
- SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);
+ // setjmpTable / setjmpTableSize initialization
+ // setjmp.dispatch:
+ // switch will be inserted here later
+ // entry.split: (OrigEntry)
+ // the original function starts here
+ BasicBlock *OrigEntry = Entry->getNextNode();
+ BasicBlock *SetjmpDispatchBB =
+ BasicBlock::Create(C, "setjmp.dispatch", &F, OrigEntry);
+ cast<BranchInst>(Entry->getTerminator())->setSuccessor(0, SetjmpDispatchBB);
+
+ // Create catch.dispatch.longjmp BB a catchswitch instruction
+ BasicBlock *CatchSwitchBB =
+ BasicBlock::Create(C, "catch.dispatch.longjmp", &F);
+ IRB.SetInsertPoint(CatchSwitchBB);
+ CatchSwitchInst *CatchSwitch =
+ IRB.CreateCatchSwitch(ConstantTokenNone::get(C), nullptr, 1);
+
+ // Create catch.longjmp BB and a catchpad instruction
+ BasicBlock *CatchLongjmpBB = BasicBlock::Create(C, "catch.longjmp", &F);
+ CatchSwitch->addHandler(CatchLongjmpBB);
+ IRB.SetInsertPoint(CatchLongjmpBB);
+ CatchPadInst *CatchPad = IRB.CreateCatchPad(CatchSwitch, {});
+
+ // Wasm throw and catch instructions can throw and catch multiple values, but
+ // that requires multivalue support in the toolchain, which is currently not
+ // very reliable. We instead throw and catch a pointer to a struct value of
+ // type 'struct __WasmLongjmpArgs', which is defined in Emscripten.
+ Instruction *CatchCI =
+ IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::C_LONGJMP)}, "thrown");
+ Value *LongjmpArgs =
+ IRB.CreateBitCast(CatchCI, LongjmpArgsTy->getPointerTo(), "longjmp.args");
+ Value *EnvField =
+ IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 0, "env_gep");
+ Value *ValField =
+ IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 1, "val_gep");
+ // void *env = __wasm_longjmp_args.env;
+ Instruction *Env = IRB.CreateLoad(IRB.getInt8PtrTy(), EnvField, "env");
+ // int val = __wasm_longjmp_args.val;
+ Instruction *Val = IRB.CreateLoad(IRB.getInt32Ty(), ValField, "val");
+
+ // %label = testSetjmp(mem[%env], setjmpTable, setjmpTableSize);
+ // if (%label == 0)
+ // __wasm_longjmp(%env, %val)
+ // catchret to %setjmp.dispatch
+ BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", &F);
+ BasicBlock *EndBB = BasicBlock::Create(C, "if.end", &F);
+ Value *EnvP = IRB.CreateBitCast(Env, getAddrPtrType(&M), "env.p");
+ Value *SetjmpID = IRB.CreateLoad(getAddrIntType(&M), EnvP, "setjmp.id");
+ Value *Label =
+ IRB.CreateCall(TestSetjmpF, {SetjmpID, SetjmpTable, SetjmpTableSize},
+ OperandBundleDef("funclet", CatchPad), "label");
+ Value *Cmp = IRB.CreateICmpEQ(Label, IRB.getInt32(0));
+ IRB.CreateCondBr(Cmp, ThenBB, EndBB);
+
+ IRB.SetInsertPoint(ThenBB);
+ CallInst *WasmLongjmpCI = IRB.CreateCall(
+ WasmLongjmpF, {Env, Val}, OperandBundleDef("funclet", CatchPad));
+ IRB.CreateUnreachable();
- for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end();
- UI != UE;) {
- // Grab the use before incrementing the iterator.
- Use &U = *UI;
- // Increment the iterator before removing the use from the list.
- ++UI;
- if (auto *I = dyn_cast<Instruction>(U.getUser()))
- if (I->getParent() != &EntryBB)
- SetjmpTableSSA.RewriteUse(U);
+ IRB.SetInsertPoint(EndBB);
+ // Jump to setjmp.dispatch block
+ IRB.CreateCatchRet(CatchPad, SetjmpDispatchBB);
+
+ // Go back to setjmp.dispatch BB
+ // setjmp.dispatch:
+ // switch %label {
+ // label 1: goto post-setjmp BB 1
+ // label 2: goto post-setjmp BB 2
+ // ...
+ // default: goto splitted next BB
+ // }
+ IRB.SetInsertPoint(SetjmpDispatchBB);
+ PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label.phi");
+ LabelPHI->addIncoming(Label, EndBB);
+ LabelPHI->addIncoming(IRB.getInt32(-1), Entry);
+ SwitchInst *SI = IRB.CreateSwitch(LabelPHI, OrigEntry, SetjmpRetPHIs.size());
+ // -1 means no longjmp happened, continue normally (will hit the default
+ // switch case). 0 means a longjmp that is not ours to handle, needs a
+ // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
+ // 0).
+ for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) {
+ SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent());
+ SetjmpRetPHIs[I]->addIncoming(Val, SetjmpDispatchBB);
}
- for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end();
- UI != UE;) {
- Use &U = *UI;
- ++UI;
- if (auto *I = dyn_cast<Instruction>(U.getUser()))
- if (I->getParent() != &EntryBB)
- SetjmpTableSizeSSA.RewriteUse(U);
+
+ // Convert all longjmpable call instructions to invokes that unwind to the
+ // newly created catch.dispatch.longjmp BB.
+ SmallVector<Instruction *, 64> ToErase;
+ for (auto *BB = &*F.begin(); BB; BB = BB->getNextNode()) {
+ for (Instruction &I : *BB) {
+ auto *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ continue;
+ const Value *Callee = CI->getCalledOperand();
+ if (!canLongjmp(Callee))
+ continue;
+ if (isEmAsmCall(Callee))
+ report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " +
+ F.getName() +
+ ". Please consider using EM_JS, or move the "
+ "EM_ASM into another function.",
+ false);
+ // This is __wasm_longjmp() call we inserted in this function, which
+ // rethrows the longjmp when the longjmp does not correspond to one of
+ // setjmps in this function. We should not convert this call to an invoke.
+ if (CI == WasmLongjmpCI)
+ continue;
+ ToErase.push_back(CI);
+
+ // Even if the callee function has attribute 'nounwind', which is true for
+ // all C functions, it can longjmp, which means it can throw a Wasm
+ // exception now.
+ CI->removeFnAttr(Attribute::NoUnwind);
+ if (Function *CalleeF = CI->getCalledFunction()) {
+ CalleeF->removeFnAttr(Attribute::NoUnwind);
+ }
+
+ IRB.SetInsertPoint(CI);
+ BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
+ // We will add a new invoke. So remove the branch created when we split
+ // the BB
+ ToErase.push_back(BB->getTerminator());
+ SmallVector<Value *, 8> Args(CI->args());
+ InvokeInst *II =
+ IRB.CreateInvoke(CI->getFunctionType(), CI->getCalledOperand(), Tail,
+ CatchSwitchBB, Args);
+ II->takeName(CI);
+ II->setDebugLoc(CI->getDebugLoc());
+ II->setAttributes(CI->getAttributes());
+ CI->replaceAllUsesWith(II);
+ }
}
- // Finally, our modifications to the cfg can break dominance of SSA variables.
- // For example, in this code,
- // if (x()) { .. setjmp() .. }
- // if (y()) { .. longjmp() .. }
- // We must split the longjmp block, and it can jump into the block splitted
- // from setjmp one. But that means that when we split the setjmp block, it's
- // first part no longer dominates its second part - there is a theoretically
- // possible control flow path where x() is false, then y() is true and we
- // reach the second part of the setjmp block, without ever reaching the first
- // part. So, we rebuild SSA form here.
- rebuildSSA(F);
- return true;
+ for (Instruction *I : ToErase)
+ I->eraseFromParent();
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
index 9ccbee819c35..3a0bef8c765c 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerGlobalDtors.cpp
@@ -68,7 +68,7 @@ bool LowerGlobalDtors::runOnModule(Module &M) {
if (!InitList)
return false;
- // Sanity-check @llvm.global_dtor's type.
+ // Validate @llvm.global_dtor's type.
auto *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
if (!ETy || ETy->getNumElements() != 3 ||
!ETy->getTypeAtIndex(0U)->isIntegerTy() ||
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
new file mode 100644
index 000000000000..8ff916c28c4e
--- /dev/null
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
@@ -0,0 +1,84 @@
+//=== WebAssemblyLowerRefTypesIntPtrConv.cpp -
+// Lower IntToPtr and PtrToInt on Reference Types ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Lowers IntToPtr and PtrToInt instructions on reference types to
+/// Trap instructions since they have been allowed to operate
+/// on non-integral pointers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Utils/WebAssemblyUtilities.h"
+#include "WebAssembly.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-reftypes-intptr-conv"
+
+namespace {
+class WebAssemblyLowerRefTypesIntPtrConv final : public FunctionPass {
+ StringRef getPassName() const override {
+ return "WebAssembly Lower RefTypes Int-Ptr Conversions";
+ }
+
+ bool runOnFunction(Function &MF) override;
+
+public:
+ static char ID; // Pass identification
+ WebAssemblyLowerRefTypesIntPtrConv() : FunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyLowerRefTypesIntPtrConv::ID = 0;
+INITIALIZE_PASS(WebAssemblyLowerRefTypesIntPtrConv, DEBUG_TYPE,
+ "WebAssembly Lower RefTypes Int-Ptr Conversions", false, false)
+
+FunctionPass *llvm::createWebAssemblyLowerRefTypesIntPtrConv() {
+ return new WebAssemblyLowerRefTypesIntPtrConv();
+}
+
+bool WebAssemblyLowerRefTypesIntPtrConv::runOnFunction(Function &F) {
+ LLVM_DEBUG(dbgs() << "********** Lower RefTypes IntPtr Convs **********\n"
+ "********** Function: "
+ << F.getName() << '\n');
+
+ // This function will check for uses of ptrtoint and inttoptr on reference
+ // types and replace them with a trap instruction.
+ //
+ // We replace the instruction by a trap instruction
+ // and its uses by null in the case of inttoptr and 0 in the
+ // case of ptrtoint.
+ std::set<Instruction *> worklist;
+
+ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ PtrToIntInst *PTI = dyn_cast<PtrToIntInst>(&*I);
+ IntToPtrInst *ITP = dyn_cast<IntToPtrInst>(&*I);
+ if (!(PTI && WebAssembly::isRefType(PTI->getPointerOperand()->getType())) &&
+ !(ITP && WebAssembly::isRefType(ITP->getDestTy())))
+ continue;
+
+ UndefValue *U = UndefValue::get(I->getType());
+ I->replaceAllUsesWith(U);
+
+ Function *TrapIntrin =
+ Intrinsic::getDeclaration(F.getParent(), Intrinsic::debugtrap);
+ CallInst::Create(TrapIntrin, {}, "", &*I);
+
+ worklist.insert(&*I);
+ }
+
+ // erase each instruction replaced by trap
+ for (Instruction *I : worklist)
+ I->eraseFromParent();
+
+ return !worklist.empty();
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index ec2380a501ab..0b953a90aeab 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -17,6 +17,7 @@
#include "Utils/WebAssemblyTypeUtilities.h"
#include "Utils/WebAssemblyUtilities.h"
#include "WebAssemblyAsmPrinter.h"
+#include "WebAssemblyISelLowering.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,6 +29,7 @@
#include "llvm/MC/MCSymbolWasm.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+
using namespace llvm;
// This disables the removal of registers when lowering into MC, as required
@@ -38,8 +40,8 @@ cl::opt<bool>
" instruction output for test purposes only."),
cl::init(false));
-extern cl::opt<bool> EnableEmException;
-extern cl::opt<bool> EnableEmSjLj;
+extern cl::opt<bool> WasmEnableEmEH;
+extern cl::opt<bool> WasmEnableEmSjLj;
static void removeRegisterOperands(const MachineInstr *MI, MCInst &OutMI);
@@ -56,15 +58,36 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
const MachineFunction &MF = *MO.getParent()->getParent()->getParent();
const TargetMachine &TM = MF.getTarget();
const Function &CurrentFunc = MF.getFunction();
+ Type *GlobalVT = Global->getValueType();
SmallVector<MVT, 1> VTs;
- computeLegalValueVTs(CurrentFunc, TM, Global->getValueType(), VTs);
- if (VTs.size() != 1)
+ computeLegalValueVTs(CurrentFunc, TM, GlobalVT, VTs);
+
+ // Tables are represented as Arrays in LLVM IR therefore
+ // they reach this point as aggregate Array types with an element type
+ // that is a reference type.
+ wasm::ValType Type;
+ if (GlobalVT->isArrayTy() &&
+ WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
+ MVT VT;
+ switch (GlobalVT->getArrayElementType()->getPointerAddressSpace()) {
+ case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF:
+ VT = MVT::funcref;
+ break;
+ case WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF:
+ VT = MVT::externref;
+ break;
+ default:
+ report_fatal_error("unhandled address space type");
+ }
+ Type = WebAssembly::toValType(VT);
+ } else if (VTs.size() == 1) {
+ Type = WebAssembly::toValType(VTs[0]);
+ } else
report_fatal_error("Aggregate globals not yet implemented");
- bool Mutable = true;
- wasm::ValType Type = WebAssembly::toValType(VTs[0]);
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- WasmSym->setGlobalType(wasm::WasmGlobalType{uint8_t(Type), Mutable});
+ WasmSym->setGlobalType(
+ wasm::WasmGlobalType{uint8_t(Type), /*Mutable=*/true});
}
return WasmSym;
}
@@ -82,7 +105,7 @@ WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
bool InvokeDetected = false;
auto *WasmSym = Printer.getMCSymbolForFunction(
- F, EnableEmException || EnableEmSjLj, Signature.get(), InvokeDetected);
+ F, WasmEnableEmEH || WasmEnableEmSjLj, Signature.get(), InvokeDetected);
WasmSym->setSignature(Signature.get());
Printer.addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
@@ -102,6 +125,9 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
switch (TargetFlags) {
case WebAssemblyII::MO_NO_FLAG:
break;
+ case WebAssemblyII::MO_GOT_TLS:
+ Kind = MCSymbolRefExpr::VK_WASM_GOT_TLS;
+ break;
case WebAssemblyII::MO_GOT:
Kind = MCSymbolRefExpr::VK_GOT;
break;
@@ -275,15 +301,9 @@ void WebAssemblyMCInstLower::lower(const MachineInstr *MI,
MCOp = lowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
break;
case MachineOperand::MO_ExternalSymbol:
- // The target flag indicates whether this is a symbol for a
- // variable or a function.
- assert(MO.getTargetFlags() == 0 &&
- "WebAssembly uses only symbol flags on ExternalSymbols");
MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
break;
case MachineOperand::MO_MCSymbol:
- // This is currently used only for LSDA symbols (GCC_except_table),
- // because global addresses or other external symbols are handled above.
assert(MO.getTargetFlags() == 0 &&
"WebAssembly does not use target flags on MCSymbol");
MCOp = lowerSymbolOperand(MO, MO.getMCSymbol());
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
index 3daffd1c23a2..37ac8e75f4b7 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCLowerPrePass.cpp
@@ -33,21 +33,21 @@ using namespace llvm;
#define DEBUG_TYPE "wasm-mclower-prepass"
namespace {
-class WebAssemblyMCLowerPrePass final : public MachineFunctionPass {
+class WebAssemblyMCLowerPrePass final : public ModulePass {
StringRef getPassName() const override {
return "WebAssembly MC Lower Pre Pass";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
+ ModulePass::getAnalysisUsage(AU);
}
- bool runOnMachineFunction(MachineFunction &MF) override;
+ bool runOnModule(Module &M) override;
public:
static char ID; // Pass identification, replacement for typeid
- WebAssemblyMCLowerPrePass() : MachineFunctionPass(ID) {}
+ WebAssemblyMCLowerPrePass() : ModulePass(ID) {}
};
} // end anonymous namespace
@@ -57,30 +57,43 @@ INITIALIZE_PASS(
"Collects information ahead of time for MC lowering",
false, false)
-FunctionPass *llvm::createWebAssemblyMCLowerPrePass() {
+ModulePass *llvm::createWebAssemblyMCLowerPrePass() {
return new WebAssemblyMCLowerPrePass();
}
-bool WebAssemblyMCLowerPrePass::runOnMachineFunction(MachineFunction &MF) {
- LLVM_DEBUG(dbgs() << "********** MC Lower Pre Pass **********\n"
- "********** Function: "
- << MF.getName() << '\n');
+// NOTE: this is a ModulePass since we need to enforce that this code has run
+// for all functions before AsmPrinter. If this way of doing things is ever
+// suboptimal, we could opt to make it a MachineFunctionPass and instead use
+// something like createBarrierNoopPass() to enforce ordering.
+bool WebAssemblyMCLowerPrePass::runOnModule(Module &M) {
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ if (!MMIWP)
+ return true;
- MachineModuleInfo &MMI = MF.getMMI();
+ MachineModuleInfo &MMI = MMIWP->getMMI();
MachineModuleInfoWasm &MMIW = MMI.getObjFileInfo<MachineModuleInfoWasm>();
- for (MachineBasicBlock &MBB : MF) {
- for (auto &MI : MBB) {
- // FIXME: what should all be filtered out beyond these?
- if (MI.isDebugInstr() || MI.isInlineAsm())
- continue;
- for (MachineOperand &MO : MI.uses()) {
- if (MO.isSymbol()) {
- MMIW.MachineSymbolsUsed.insert(MO.getSymbolName());
+ for (Function &F : M) {
+ MachineFunction *MF = MMI.getMachineFunction(F);
+ if (!MF)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "********** MC Lower Pre Pass **********\n"
+ "********** Function: "
+ << MF->getName() << '\n');
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (auto &MI : MBB) {
+ // FIXME: what should all be filtered out beyond these?
+ if (MI.isDebugInstr() || MI.isInlineAsm())
+ continue;
+ for (MachineOperand &MO : MI.uses()) {
+ if (MO.isSymbol()) {
+ MMIW.MachineSymbolsUsed.insert(MO.getSymbolName());
+ }
}
}
}
}
-
return true;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
index 9aea65cba280..2180f57c106a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp
@@ -96,9 +96,8 @@ static bool replaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
SmallVector<SlotIndex, 4> Indices;
- for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end();
- I != E;) {
- MachineOperand &O = *I++;
+ for (MachineOperand &O :
+ llvm::make_early_inc_range(MRI.use_nodbg_operands(FromReg))) {
MachineInstr *Where = O.getParent();
// Check that MI dominates the instruction in the normal way.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 6bfed1a7195c..9d83a75a8247 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -106,13 +106,12 @@ bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(
// instructions to satisfy LiveIntervals' requirement that all uses be
// dominated by defs. Now that LiveIntervals has computed which of these
// defs are actually needed and which are dead, remove the dead ones.
- for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE;) {
- MachineInstr *MI = &*MII++;
- if (MI->isImplicitDef() && MI->getOperand(0).isDead()) {
- LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg());
- LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*MI).getRegSlot());
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
+ for (MachineInstr &MI : llvm::make_early_inc_range(MF.front())) {
+ if (MI.isImplicitDef() && MI.getOperand(0).isDead()) {
+ LiveInterval &LI = LIS.getInterval(MI.getOperand(0).getReg());
+ LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(MI).getRegSlot());
+ LIS.RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 96390de8f5e7..7912aeb4f502 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -56,7 +56,7 @@ FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
}
void OptimizeReturned::visitCallBase(CallBase &CB) {
- for (unsigned I = 0, E = CB.getNumArgOperands(); I < E; ++I)
+ for (unsigned I = 0, E = CB.arg_size(); I < E; ++I)
if (CB.paramHasAttr(I, Attribute::Returned)) {
Value *Arg = CB.getArgOperand(I);
// Ignore constants, globals, undef, etc.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index ed5f7ccc854f..8b8593ddcbdd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -112,8 +112,7 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(
// Move ARGUMENT_* instructions to the top of the entry block, so that their
// liveness reflects the fact that these really are live-in values.
- for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE;) {
- MachineInstr &MI = *MII++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(Entry)) {
if (WebAssembly::isArgument(MI.getOpcode())) {
MI.removeFromParent();
Entry.insert(Entry.begin(), &MI);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index d6adc2fd155c..42419259802e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -497,6 +497,10 @@ static unsigned getTeeOpcode(const TargetRegisterClass *RC) {
return WebAssembly::TEE_F64;
if (RC == &WebAssembly::V128RegClass)
return WebAssembly::TEE_V128;
+ if (RC == &WebAssembly::EXTERNREFRegClass)
+ return WebAssembly::TEE_EXTERNREF;
+ if (RC == &WebAssembly::FUNCREFRegClass)
+ return WebAssembly::TEE_FUNCREF;
llvm_unreachable("Unexpected register class");
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 9f5d6b2a9a47..71f0bd28e1be 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -85,8 +85,8 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
// Replace explicit uses of the physical register with a virtual register.
const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg);
unsigned VReg = WebAssembly::NoRegister;
- for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E;) {
- MachineOperand &MO = *I++;
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI.reg_operands(PReg))) {
if (!MO.isImplicit()) {
if (VReg == WebAssembly::NoRegister) {
VReg = MRI.createVirtualRegister(RC);
@@ -101,8 +101,6 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
}
}
MO.setReg(VReg);
- if (MO.getParent()->isDebugValue())
- MO.setIsDebug();
Changed = true;
}
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 7943e1ecc8e1..add3c799f4aa 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -15,7 +15,7 @@
#include "WebAssemblySubtarget.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssemblyInstrInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
#define DEBUG_TYPE "wasm-subtarget"
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 43d5871f0aa0..b553c8150652 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -36,6 +36,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo {
enum SIMDEnum {
NoSIMD,
SIMD128,
+ RelaxedSIMD,
} SIMDLevel = NoSIMD;
bool HasAtomics = false;
@@ -89,6 +90,7 @@ public:
// Predicates used by WebAssemblyInstrInfo.td.
bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
bool hasSIMD128() const { return SIMDLevel >= SIMD128; }
+ bool hasRelaxedSIMD() const { return SIMDLevel >= RelaxedSIMD; }
bool hasAtomics() const { return HasAtomics; }
bool hasNontrappingFPToInt() const { return HasNontrappingFPToInt; }
bool hasSignExt() const { return HasSignExt; }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 746a7599c58c..80abccd74782 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -24,7 +24,7 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LowerAtomic.h"
@@ -34,17 +34,27 @@ using namespace llvm;
#define DEBUG_TYPE "wasm"
// Emscripten's asm.js-style exception handling
-cl::opt<bool> EnableEmException(
- "enable-emscripten-cxx-exceptions",
- cl::desc("WebAssembly Emscripten-style exception handling"),
- cl::init(false));
+cl::opt<bool>
+ WasmEnableEmEH("enable-emscripten-cxx-exceptions",
+ cl::desc("WebAssembly Emscripten-style exception handling"),
+ cl::init(false));
// Emscripten's asm.js-style setjmp/longjmp handling
-cl::opt<bool> EnableEmSjLj(
+cl::opt<bool> WasmEnableEmSjLj(
"enable-emscripten-sjlj",
cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
cl::init(false));
+// Exception handling using wasm EH instructions
+cl::opt<bool> WasmEnableEH("wasm-enable-eh",
+ cl::desc("WebAssembly exception handling"),
+ cl::init(false));
+
+// setjmp/longjmp handling using wasm EH instrutions
+cl::opt<bool> WasmEnableSjLj("wasm-enable-sjlj",
+ cl::desc("WebAssembly setjmp/longjmp handling"),
+ cl::init(false));
+
// A command-line option to keep implicit locals
// for the purpose of testing with lit/llc ONLY.
// This produces output which is not valid WebAssembly, and is not supported
@@ -123,12 +133,14 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
: LLVMTargetMachine(
T,
TT.isArch64Bit()
- ? (TT.isOSEmscripten()
- ? "e-m:e-p:64:64-i64:64-f128:64-n32:64-S128-ni:1:10:20"
- : "e-m:e-p:64:64-i64:64-n32:64-S128-ni:1:10:20")
- : (TT.isOSEmscripten()
- ? "e-m:e-p:32:32-i64:64-f128:64-n32:64-S128-ni:1:10:20"
- : "e-m:e-p:32:32-i64:64-n32:64-S128-ni:1:10:20"),
+ ? (TT.isOSEmscripten() ? "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-"
+ "f128:64-n32:64-S128-ni:1:10:20"
+ : "e-m:e-p:64:64-p10:8:8-p20:8:8-i64:64-"
+ "n32:64-S128-ni:1:10:20")
+ : (TT.isOSEmscripten() ? "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-"
+ "f128:64-n32:64-S128-ni:1:10:20"
+ : "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-"
+ "n32:64-S128-ni:1:10:20"),
TT, CPU, FS, Options, getEffectiveRelocModel(RM, TT),
getEffectiveCodeModel(CM, CodeModel::Large), OL),
TLOF(new WebAssemblyTargetObjectFile()) {
@@ -332,6 +344,7 @@ public:
void addPostRegAlloc() override;
bool addGCPasses() override { return false; }
void addPreEmitPass() override;
+ bool addPreISel() override;
// No reg alloc
bool addRegAssignAndRewriteFast() override { return false; }
@@ -355,6 +368,43 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
return nullptr; // No reg alloc
}
+static void checkSanityForEHAndSjLj(const TargetMachine *TM) {
+ // Sanity checking related to -exception-model
+ if (TM->Options.ExceptionModel != ExceptionHandling::None &&
+ TM->Options.ExceptionModel != ExceptionHandling::Wasm)
+ report_fatal_error("-exception-model should be either 'none' or 'wasm'");
+ if (WasmEnableEmEH && TM->Options.ExceptionModel == ExceptionHandling::Wasm)
+ report_fatal_error("-exception-model=wasm not allowed with "
+ "-enable-emscripten-cxx-exceptions");
+ if (WasmEnableEH && TM->Options.ExceptionModel != ExceptionHandling::Wasm)
+ report_fatal_error(
+ "-wasm-enable-eh only allowed with -exception-model=wasm");
+ if (WasmEnableSjLj && TM->Options.ExceptionModel != ExceptionHandling::Wasm)
+ report_fatal_error(
+ "-wasm-enable-sjlj only allowed with -exception-model=wasm");
+ if ((!WasmEnableEH && !WasmEnableSjLj) &&
+ TM->Options.ExceptionModel == ExceptionHandling::Wasm)
+ report_fatal_error(
+ "-exception-model=wasm only allowed with at least one of "
+ "-wasm-enable-eh or -wasm-enable-sjj");
+
+ // You can't enable two modes of EH at the same time
+ if (WasmEnableEmEH && WasmEnableEH)
+ report_fatal_error(
+ "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-eh");
+ // You can't enable two modes of SjLj at the same time
+ if (WasmEnableEmSjLj && WasmEnableSjLj)
+ report_fatal_error(
+ "-enable-emscripten-sjlj not allowed with -wasm-enable-sjlj");
+ // You can't mix Emscripten EH with Wasm SjLj.
+ if (WasmEnableEmEH && WasmEnableSjLj)
+ report_fatal_error(
+ "-enable-emscripten-cxx-exceptions not allowed with -wasm-enable-sjlj");
+ // Currently it is allowed to mix Wasm EH with Emscripten SjLj as an interim
+ // measure, but some code will error out at compile time in this combination.
+ // See WebAssemblyLowerEmscriptenEHSjLj pass for details.
+}
+
//===----------------------------------------------------------------------===//
// The following functions are called from lib/CodeGen/Passes.cpp to modify
// the CodeGen pass sequence.
@@ -381,23 +431,27 @@ void WebAssemblyPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createWebAssemblyOptimizeReturned());
+ checkSanityForEHAndSjLj(TM);
+
// If exception handling is not enabled and setjmp/longjmp handling is
// enabled, we lower invokes into calls and delete unreachable landingpad
// blocks. Lowering invokes when there is no EH support is done in
- // TargetPassConfig::addPassesToHandleExceptions, but this runs after this
- // function and SjLj handling expects all invokes to be lowered before.
- if (!EnableEmException &&
- TM->Options.ExceptionModel == ExceptionHandling::None) {
+ // TargetPassConfig::addPassesToHandleExceptions, but that runs after these IR
+ // passes and Emscripten SjLj handling expects all invokes to be lowered
+ // before.
+ if (!WasmEnableEmEH && !WasmEnableEH) {
addPass(createLowerInvokePass());
// The lower invoke pass may create unreachable code. Remove it in order not
// to process dead blocks in setjmp/longjmp handling.
addPass(createUnreachableBlockEliminationPass());
}
- // Handle exceptions and setjmp/longjmp if enabled.
- if (EnableEmException || EnableEmSjLj)
- addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
- EnableEmSjLj));
+ // Handle exceptions and setjmp/longjmp if enabled. Unlike Wasm EH preparation
+ // done in WasmEHPrepare pass, Wasm SjLj preparation shares libraries and
+ // transformation algorithms with Emscripten SjLj, so we run
+ // LowerEmscriptenEHSjLj pass also when Wasm SjLj is enabled.
+ if (WasmEnableEmEH || WasmEnableEmSjLj || WasmEnableSjLj)
+ addPass(createWebAssemblyLowerEmscriptenEHSjLj());
// Expand indirectbr instructions to switches.
addPass(createIndirectBrExpandPass());
@@ -518,6 +572,12 @@ void WebAssemblyPassConfig::addPreEmitPass() {
addPass(createWebAssemblyMCLowerPrePass());
}
+bool WebAssemblyPassConfig::addPreISel() {
+ TargetPassConfig::addPreISel();
+ addPass(createWebAssemblyLowerRefTypesIntPtrConv());
+ return false;
+}
+
yaml::MachineFunctionInfo *
WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const {
return new yaml::WebAssemblyFunctionInfo();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index d9bc7c6d2c3f..f1ebcbc6fc51 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -114,7 +114,8 @@ bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
}
void WebAssemblyTTIImpl::getUnrollingPreferences(
- Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) const {
+ Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const {
// Scan the loop: don't unroll loops with calls. This is a standard approach
// for most (all?) targets.
for (BasicBlock *BB : L->blocks())
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 1a33bd20d027..50036f7f7e98 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -49,7 +49,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
- TTI::UnrollingPreferences &UP) const;
+ TTI::UnrollingPreferences &UP,
+ OptimizationRemarkEmitter *ORE) const;
/// @}
@@ -59,8 +60,7 @@ public:
unsigned getNumberOfRegisters(unsigned ClassID) const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 67ca67d6cee6..8ce6b47d10e8 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -31,10 +31,10 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <memory>
@@ -1758,8 +1758,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm(
// It is widely common for MS InlineAsm to use a global variable and one/two
// registers in a mmory expression, and though unaccessible via rip/eip.
if (IsGlobalLV && (BaseReg || IndexReg)) {
- Operands.push_back(
- X86Operand::CreateMem(getPointerWidth(), Disp, Start, End));
+ Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
+ End, Size, Identifier, Decl));
return false;
}
// Otherwise, we set the base register to a non-zero value
@@ -2551,6 +2551,8 @@ bool X86AsmParser::ParseIntelOperand(OperandVector &Operands) {
StringRef ErrMsg;
unsigned BaseReg = SM.getBaseReg();
unsigned IndexReg = SM.getIndexReg();
+ if (IndexReg && BaseReg == X86::RIP)
+ BaseReg = 0;
unsigned Scale = SM.getScale();
if (!PtrInOperand)
Size = SM.getElementSize() << 3;
@@ -2655,7 +2657,7 @@ bool X86AsmParser::ParseATTOperand(OperandVector &Operands) {
Expr = nullptr;
Reg = RE->getRegNo();
- // Sanity check register.
+ // Check the register.
if (Reg == X86::EIZ || Reg == X86::RIZ)
return Error(
Loc, "%eiz and %riz can only be used as index registers",
@@ -2753,6 +2755,7 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
.Case("1to4", "{1to4}")
.Case("1to8", "{1to8}")
.Case("1to16", "{1to16}")
+ .Case("1to32", "{1to32}")
.Default(nullptr);
if (!BroadcastPrimitive)
return TokError("Invalid memory broadcast primitive.");
@@ -2914,7 +2917,7 @@ bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
return true;
- // Sanity check register.
+ // Check the register.
BaseReg = cast<X86MCExpr>(E)->getRegNo();
if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
return Error(BaseLoc, "eiz and riz can only be used as index registers",
@@ -3126,9 +3129,10 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
unsigned ComparisonPredicate = ~0U;
- // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
bool IsVCMP = PatchedName[0] == 'v';
unsigned CCIdx = IsVCMP ? 4 : 3;
@@ -3182,7 +3186,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
.Case("gt_oq", 0x1E)
.Case("true_us", 0x1F)
.Default(~0U);
- if (CC != ~0U && (IsVCMP || CC < 8)) {
+ if (CC != ~0U && (IsVCMP || CC < 8) &&
+ (IsVCMP || PatchedName.back() != 'h')) {
if (PatchedName.endswith("ss"))
PatchedName = IsVCMP ? "vcmpss" : "cmpss";
else if (PatchedName.endswith("sd"))
@@ -3191,6 +3196,10 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
PatchedName = IsVCMP ? "vcmpps" : "cmpps";
else if (PatchedName.endswith("pd"))
PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+ else if (PatchedName.endswith("sh"))
+ PatchedName = "vcmpsh";
+ else if (PatchedName.endswith("ph"))
+ PatchedName = "vcmpph";
else
llvm_unreachable("Unexpected suffix!");
@@ -3859,6 +3868,176 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
}
break;
}
+ case X86::VFCMADDCPHZ128m:
+ case X86::VFCMADDCPHZ256m:
+ case X86::VFCMADDCPHZm:
+ case X86::VFCMADDCPHZ128mb:
+ case X86::VFCMADDCPHZ256mb:
+ case X86::VFCMADDCPHZmb:
+ case X86::VFCMADDCPHZ128mbk:
+ case X86::VFCMADDCPHZ256mbk:
+ case X86::VFCMADDCPHZmbk:
+ case X86::VFCMADDCPHZ128mbkz:
+ case X86::VFCMADDCPHZ256mbkz:
+ case X86::VFCMADDCPHZmbkz:
+ case X86::VFCMADDCPHZ128mk:
+ case X86::VFCMADDCPHZ256mk:
+ case X86::VFCMADDCPHZmk:
+ case X86::VFCMADDCPHZ128mkz:
+ case X86::VFCMADDCPHZ256mkz:
+ case X86::VFCMADDCPHZmkz:
+ case X86::VFCMADDCPHZ128r:
+ case X86::VFCMADDCPHZ256r:
+ case X86::VFCMADDCPHZr:
+ case X86::VFCMADDCPHZ128rk:
+ case X86::VFCMADDCPHZ256rk:
+ case X86::VFCMADDCPHZrk:
+ case X86::VFCMADDCPHZ128rkz:
+ case X86::VFCMADDCPHZ256rkz:
+ case X86::VFCMADDCPHZrkz:
+ case X86::VFCMADDCPHZrb:
+ case X86::VFCMADDCPHZrbk:
+ case X86::VFCMADDCPHZrbkz:
+ case X86::VFCMADDCSHZm:
+ case X86::VFCMADDCSHZmk:
+ case X86::VFCMADDCSHZmkz:
+ case X86::VFCMADDCSHZr:
+ case X86::VFCMADDCSHZrb:
+ case X86::VFCMADDCSHZrbk:
+ case X86::VFCMADDCSHZrbkz:
+ case X86::VFCMADDCSHZrk:
+ case X86::VFCMADDCSHZrkz:
+ case X86::VFMADDCPHZ128m:
+ case X86::VFMADDCPHZ256m:
+ case X86::VFMADDCPHZm:
+ case X86::VFMADDCPHZ128mb:
+ case X86::VFMADDCPHZ256mb:
+ case X86::VFMADDCPHZmb:
+ case X86::VFMADDCPHZ128mbk:
+ case X86::VFMADDCPHZ256mbk:
+ case X86::VFMADDCPHZmbk:
+ case X86::VFMADDCPHZ128mbkz:
+ case X86::VFMADDCPHZ256mbkz:
+ case X86::VFMADDCPHZmbkz:
+ case X86::VFMADDCPHZ128mk:
+ case X86::VFMADDCPHZ256mk:
+ case X86::VFMADDCPHZmk:
+ case X86::VFMADDCPHZ128mkz:
+ case X86::VFMADDCPHZ256mkz:
+ case X86::VFMADDCPHZmkz:
+ case X86::VFMADDCPHZ128r:
+ case X86::VFMADDCPHZ256r:
+ case X86::VFMADDCPHZr:
+ case X86::VFMADDCPHZ128rk:
+ case X86::VFMADDCPHZ256rk:
+ case X86::VFMADDCPHZrk:
+ case X86::VFMADDCPHZ128rkz:
+ case X86::VFMADDCPHZ256rkz:
+ case X86::VFMADDCPHZrkz:
+ case X86::VFMADDCPHZrb:
+ case X86::VFMADDCPHZrbk:
+ case X86::VFMADDCPHZrbkz:
+ case X86::VFMADDCSHZm:
+ case X86::VFMADDCSHZmk:
+ case X86::VFMADDCSHZmkz:
+ case X86::VFMADDCSHZr:
+ case X86::VFMADDCSHZrb:
+ case X86::VFMADDCSHZrbk:
+ case X86::VFMADDCSHZrbkz:
+ case X86::VFMADDCSHZrk:
+ case X86::VFMADDCSHZrkz: {
+ unsigned Dest = Inst.getOperand(0).getReg();
+ for (unsigned i = 2; i < Inst.getNumOperands(); i++)
+ if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
+ return Warning(Ops[0]->getStartLoc(), "Destination register should be "
+ "distinct from source registers");
+ break;
+ }
+ case X86::VFCMULCPHZ128rm:
+ case X86::VFCMULCPHZ256rm:
+ case X86::VFCMULCPHZrm:
+ case X86::VFCMULCPHZ128rmb:
+ case X86::VFCMULCPHZ256rmb:
+ case X86::VFCMULCPHZrmb:
+ case X86::VFCMULCPHZ128rmbk:
+ case X86::VFCMULCPHZ256rmbk:
+ case X86::VFCMULCPHZrmbk:
+ case X86::VFCMULCPHZ128rmbkz:
+ case X86::VFCMULCPHZ256rmbkz:
+ case X86::VFCMULCPHZrmbkz:
+ case X86::VFCMULCPHZ128rmk:
+ case X86::VFCMULCPHZ256rmk:
+ case X86::VFCMULCPHZrmk:
+ case X86::VFCMULCPHZ128rmkz:
+ case X86::VFCMULCPHZ256rmkz:
+ case X86::VFCMULCPHZrmkz:
+ case X86::VFCMULCPHZ128rr:
+ case X86::VFCMULCPHZ256rr:
+ case X86::VFCMULCPHZrr:
+ case X86::VFCMULCPHZ128rrk:
+ case X86::VFCMULCPHZ256rrk:
+ case X86::VFCMULCPHZrrk:
+ case X86::VFCMULCPHZ128rrkz:
+ case X86::VFCMULCPHZ256rrkz:
+ case X86::VFCMULCPHZrrkz:
+ case X86::VFCMULCPHZrrb:
+ case X86::VFCMULCPHZrrbk:
+ case X86::VFCMULCPHZrrbkz:
+ case X86::VFCMULCSHZrm:
+ case X86::VFCMULCSHZrmk:
+ case X86::VFCMULCSHZrmkz:
+ case X86::VFCMULCSHZrr:
+ case X86::VFCMULCSHZrrb:
+ case X86::VFCMULCSHZrrbk:
+ case X86::VFCMULCSHZrrbkz:
+ case X86::VFCMULCSHZrrk:
+ case X86::VFCMULCSHZrrkz:
+ case X86::VFMULCPHZ128rm:
+ case X86::VFMULCPHZ256rm:
+ case X86::VFMULCPHZrm:
+ case X86::VFMULCPHZ128rmb:
+ case X86::VFMULCPHZ256rmb:
+ case X86::VFMULCPHZrmb:
+ case X86::VFMULCPHZ128rmbk:
+ case X86::VFMULCPHZ256rmbk:
+ case X86::VFMULCPHZrmbk:
+ case X86::VFMULCPHZ128rmbkz:
+ case X86::VFMULCPHZ256rmbkz:
+ case X86::VFMULCPHZrmbkz:
+ case X86::VFMULCPHZ128rmk:
+ case X86::VFMULCPHZ256rmk:
+ case X86::VFMULCPHZrmk:
+ case X86::VFMULCPHZ128rmkz:
+ case X86::VFMULCPHZ256rmkz:
+ case X86::VFMULCPHZrmkz:
+ case X86::VFMULCPHZ128rr:
+ case X86::VFMULCPHZ256rr:
+ case X86::VFMULCPHZrr:
+ case X86::VFMULCPHZ128rrk:
+ case X86::VFMULCPHZ256rrk:
+ case X86::VFMULCPHZrrk:
+ case X86::VFMULCPHZ128rrkz:
+ case X86::VFMULCPHZ256rrkz:
+ case X86::VFMULCPHZrrkz:
+ case X86::VFMULCPHZrrb:
+ case X86::VFMULCPHZrrbk:
+ case X86::VFMULCPHZrrbkz:
+ case X86::VFMULCSHZrm:
+ case X86::VFMULCSHZrmk:
+ case X86::VFMULCSHZrmkz:
+ case X86::VFMULCSHZrr:
+ case X86::VFMULCSHZrrb:
+ case X86::VFMULCSHZrrbk:
+ case X86::VFMULCSHZrrbkz:
+ case X86::VFMULCSHZrrk:
+ case X86::VFMULCSHZrrkz: {
+ unsigned Dest = Inst.getOperand(0).getReg();
+ for (unsigned i = 1; i < Inst.getNumOperands(); i++)
+ if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
+ return Warning(Ops[0]->getStartLoc(), "Destination register should be "
+ "distinct from source registers");
+ break;
+ }
}
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
@@ -3916,12 +4095,12 @@ void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
// be found here:
// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
switch (Inst.getOpcode()) {
- case X86::RETW:
- case X86::RETL:
- case X86::RETQ:
- case X86::RETIL:
- case X86::RETIQ:
- case X86::RETIW: {
+ case X86::RET16:
+ case X86::RET32:
+ case X86::RET64:
+ case X86::RETI16:
+ case X86::RETI32:
+ case X86::RETI64: {
MCInst ShlInst, FenceInst;
bool Parse32 = is32BitMode() || Code16GCC;
unsigned Basereg =
@@ -4093,24 +4272,6 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
ForcedVEXEncoding != VEXEncoding_VEX3))
return Match_Unsupported;
- // These instructions match ambiguously with their VEX encoded counterparts
- // and appear first in the matching table. Reject them unless we're forcing
- // EVEX encoding.
- // FIXME: We really need a way to break the ambiguity.
- switch (Opc) {
- case X86::VCVTSD2SIZrm_Int:
- case X86::VCVTSD2SI64Zrm_Int:
- case X86::VCVTSS2SIZrm_Int:
- case X86::VCVTSS2SI64Zrm_Int:
- case X86::VCVTTSD2SIZrm: case X86::VCVTTSD2SIZrm_Int:
- case X86::VCVTTSD2SI64Zrm: case X86::VCVTTSD2SI64Zrm_Int:
- case X86::VCVTTSS2SIZrm: case X86::VCVTTSS2SIZrm_Int:
- case X86::VCVTTSS2SI64Zrm: case X86::VCVTTSS2SI64Zrm_Int:
- if (ForcedVEXEncoding != VEXEncoding_EVEX)
- return Match_Unsupported;
- break;
- }
-
return Match_Success;
}
@@ -4678,7 +4839,7 @@ bool X86AsmParser::parseDirectiveArch() {
bool X86AsmParser::parseDirectiveNops(SMLoc L) {
int64_t NumBytes = 0, Control = 0;
SMLoc NumBytesLoc, ControlLoc;
- const MCSubtargetInfo STI = getSTI();
+ const MCSubtargetInfo& STI = getSTI();
NumBytesLoc = getTok().getLoc();
if (getParser().checkForValidSection() ||
getParser().parseAbsoluteExpression(NumBytes))
@@ -4704,7 +4865,7 @@ bool X86AsmParser::parseDirectiveNops(SMLoc L) {
}
/// Emit nops
- getParser().getStreamer().emitNops(NumBytes, Control, L);
+ getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
return false;
}
@@ -4717,11 +4878,11 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
const MCSection *Section = getStreamer().getCurrentSectionOnly();
if (!Section) {
- getStreamer().InitSections(false);
+ getStreamer().initSections(false, getSTI());
Section = getStreamer().getCurrentSectionOnly();
}
if (Section->UseCodeAlign())
- getStreamer().emitCodeAlignment(2, 0);
+ getStreamer().emitCodeAlignment(2, &getSTI(), 0);
else
getStreamer().emitValueToAlignment(2, 0, 1, 0);
return false;
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 2bc6492483c0..9164c699b569 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -34,7 +34,6 @@ struct X86Operand final : public MCParsedAsmOperand {
StringRef SymName;
void *OpDecl;
bool AddressOf;
- bool CallOperand;
struct TokOp {
const char *Data;
@@ -79,7 +78,7 @@ struct X86Operand final : public MCParsedAsmOperand {
X86Operand(KindTy K, SMLoc Start, SMLoc End)
: Kind(K), StartLoc(Start), EndLoc(End), OpDecl(nullptr),
- AddressOf(false), CallOperand(false) {}
+ AddressOf(false) {}
StringRef getSymName() override { return SymName; }
void *getOpDecl() override { return OpDecl; }
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 82581eb3c30a..908eb6d1fab1 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -83,9 +83,9 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -150,6 +150,12 @@ static InstrUID decode(OpcodeType type, InstructionContext insnContext,
dec =
&THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case MAP5:
+ dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case MAP6:
+ dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -332,7 +338,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
}
if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) &&
- ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) {
+ ((~byte1 & 0x8) == 0x8) && ((byte2 & 0x4) == 0x4)) {
insn->vectorExtensionType = TYPE_EVEX;
} else {
--insn->readerCursor; // unconsume byte1
@@ -800,10 +806,6 @@ static int readModRM(struct InternalInstruction *insn) {
return prefix##_DR0 + index; \
case TYPE_CONTROLREG: \
return prefix##_CR0 + index; \
- case TYPE_BNDR: \
- if (index > 3) \
- *valid = 0; \
- return prefix##_BND0 + index; \
case TYPE_MVSIBX: \
return prefix##_XMM0 + index; \
case TYPE_MVSIBY: \
@@ -876,11 +878,11 @@ static bool readOpcode(struct InternalInstruction *insn) {
insn->opcodeType = ONEBYTE;
if (insn->vectorExtensionType == TYPE_EVEX) {
- switch (mmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
+ switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
default:
LLVM_DEBUG(
- dbgs() << format("Unhandled mm field for instruction (0x%hhx)",
- mmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
+ dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
+ mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
return true;
case VEX_LOB_0F:
insn->opcodeType = TWOBYTE;
@@ -891,6 +893,12 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP5:
+ insn->opcodeType = MAP5;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_MAP6:
+ insn->opcodeType = MAP6;
+ return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_3B) {
switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
@@ -908,6 +916,12 @@ static bool readOpcode(struct InternalInstruction *insn) {
case VEX_LOB_0F3A:
insn->opcodeType = THREEBYTE_3A;
return consume(insn, insn->opcode);
+ case VEX_LOB_MAP5:
+ insn->opcodeType = MAP5;
+ return consume(insn, insn->opcode);
+ case VEX_LOB_MAP6:
+ insn->opcodeType = MAP6;
+ return consume(insn, insn->opcode);
}
} else if (insn->vectorExtensionType == TYPE_VEX_2B) {
insn->opcodeType = TWOBYTE;
@@ -1043,6 +1057,12 @@ static int getInstructionIDWithAttrMask(uint16_t *instructionID,
case THREEDNOW_MAP:
decision = &THREEDNOW_MAP_SYM;
break;
+ case MAP5:
+ decision = &MAP5_SYM;
+ break;
+ case MAP6:
+ decision = &MAP6_SYM;
+ break;
}
if (decision->opcodeDecisions[insnCtx]
diff --git a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 4318c17f03a0..95d3c8ede366 100644
--- a/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -37,7 +37,7 @@ namespace X86Disassembler {
#define xFromEVEX2of4(evex) (((~(evex)) & 0x40) >> 6)
#define bFromEVEX2of4(evex) (((~(evex)) & 0x20) >> 5)
#define r2FromEVEX2of4(evex) (((~(evex)) & 0x10) >> 4)
-#define mmFromEVEX2of4(evex) ((evex) & 0x3)
+#define mmmFromEVEX2of4(evex) ((evex) & 0x7)
#define wFromEVEX3of4(evex) (((evex) & 0x80) >> 7)
#define vvvvFromEVEX3of4(evex) (((~(evex)) & 0x78) >> 3)
#define ppFromEVEX3of4(evex) ((evex) & 0x3)
@@ -374,12 +374,6 @@ namespace X86Disassembler {
ENTRY(CR14) \
ENTRY(CR15)
-#define REGS_BOUND \
- ENTRY(BND0) \
- ENTRY(BND1) \
- ENTRY(BND2) \
- ENTRY(BND3)
-
#undef REGS_TMM
#define REGS_TMM \
ENTRY(TMM0) \
@@ -414,7 +408,6 @@ namespace X86Disassembler {
REGS_SEGMENT \
REGS_DEBUG \
REGS_CONTROL \
- REGS_BOUND \
REGS_TMM \
ENTRY(RIP)
@@ -489,7 +482,9 @@ enum SegmentOverride {
enum VEXLeadingOpcodeByte {
VEX_LOB_0F = 0x1,
VEX_LOB_0F38 = 0x2,
- VEX_LOB_0F3A = 0x3
+ VEX_LOB_0F3A = 0x3,
+ VEX_LOB_MAP5 = 0x5,
+ VEX_LOB_MAP6 = 0x6
};
enum XOPMapSelect {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index c685d7e0db81..baacf2f46183 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -153,6 +153,20 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+ case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
+ case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
+ case X86::VCMPSHZrm: case X86::VCMPSHZrr:
+ case X86::VCMPSHZrm_Int: case X86::VCMPSHZrr_Int:
+ case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
+ case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
+ case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
+ case X86::VCMPSHZrm_Intk: case X86::VCMPSHZrr_Intk:
+ case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
+ case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
+ case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
+ case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
+ case X86::VCMPSHZrrb_Int: case X86::VCMPSHZrrb_Intk:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
@@ -162,11 +176,15 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
if (Desc.TSFlags & X86II::EVEX_B) {
// Broadcast form.
- // Load size is based on W-bit.
- if (Desc.TSFlags & X86II::VEX_W)
+ // Load size is word for TA map. Otherwise it is based on W-bit.
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ printwordmem(MI, CurOp--, OS);
+ } else if (Desc.TSFlags & X86II::VEX_W) {
printqwordmem(MI, CurOp--, OS);
- else
+ } else {
printdwordmem(MI, CurOp--, OS);
+ }
// Print the number of elements broadcasted.
unsigned NumElts;
@@ -176,18 +194,28 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
else
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ NumElts *= 2;
+ }
OS << "{1to" << NumElts << "}";
} else {
- if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
- printdwordmem(MI, CurOp--, OS);
- else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS) {
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA)
+ printwordmem(MI, CurOp--, OS);
+ else
+ printdwordmem(MI, CurOp--, OS);
+ } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD) {
+ assert((Desc.TSFlags & X86II::OpMapMask) != X86II::TA &&
+ "Unexpected op map!");
printqwordmem(MI, CurOp--, OS);
- else if (Desc.TSFlags & X86II::EVEX_L2)
+ } else if (Desc.TSFlags & X86II::EVEX_L2) {
printzmmwordmem(MI, CurOp--, OS);
- else if (Desc.TSFlags & X86II::VEX_L)
+ } else if (Desc.TSFlags & X86II::VEX_L) {
printymmwordmem(MI, CurOp--, OS);
- else
+ } else {
printxmmwordmem(MI, CurOp--, OS);
+ }
}
} else {
if (Desc.TSFlags & X86II::EVEX_B)
@@ -391,7 +419,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
uint64_t Target;
if (MIA->evaluateBranch(*MI, 0, 0, Target))
return;
- if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0))
+ if (MIA->evaluateMemoryOperandAddress(*MI, /*STI=*/nullptr, 0, 0))
return;
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 83f3614ded1a..d4f39b571394 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -29,9 +29,9 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -116,13 +116,6 @@ cl::opt<bool> X86PadForBranchAlign(
"x86-pad-for-branch-align", cl::init(true), cl::Hidden,
cl::desc("Pad previous instructions to implement branch alignment"));
-class X86ELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
- bool HasRelocationAddend, bool foobar)
- : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
-};
-
class X86AsmBackend : public MCAsmBackend {
const MCSubtargetInfo &STI;
std::unique_ptr<const MCInstrInfo> MCII;
@@ -166,7 +159,8 @@ public:
bool allowAutoPadding() const override;
bool allowEnhancedRelaxation() const override;
- void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
+ void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,
+ const MCSubtargetInfo &STI) override;
void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
unsigned getNumFixupKinds() const override {
@@ -207,9 +201,10 @@ public:
void finishLayout(MCAssembler const &Asm, MCAsmLayout &Layout) const override;
- unsigned getMaximumNopSize() const override;
+ unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;
- bool writeNopData(raw_ostream &OS, uint64_t Count) const override;
+ bool writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const override;
};
} // end anonymous namespace
@@ -598,7 +593,7 @@ bool X86AsmBackend::needAlign(const MCInst &Inst) const {
/// Insert BoundaryAlignFragment before instructions to align branches.
void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
- const MCInst &Inst) {
+ const MCInst &Inst, const MCSubtargetInfo &STI) {
CanPadInst = canPadInst(Inst, OS);
if (!canPadBranches(OS))
@@ -637,7 +632,7 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,
isFirstMacroFusibleInst(Inst, *MCII))) {
// If we meet a unfused branch or the first instuction in a fusiable pair,
// insert a BoundaryAlign fragment.
- OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary));
+ OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI));
}
}
@@ -1081,16 +1076,16 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
}
}
-unsigned X86AsmBackend::getMaximumNopSize() const {
+unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
if (STI.hasFeature(X86::Mode16Bit))
return 4;
if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Mode64Bit))
return 1;
- if (STI.getFeatureBits()[X86::FeatureFast7ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
return 7;
- if (STI.getFeatureBits()[X86::FeatureFast15ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
return 15;
- if (STI.getFeatureBits()[X86::FeatureFast11ByteNOP])
+ if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
return 11;
// FIXME: handle 32-bit mode
// 15-bytes is the longest single NOP instruction, but 10-bytes is
@@ -1101,7 +1096,8 @@ unsigned X86AsmBackend::getMaximumNopSize() const {
/// Write a sequence of optimal nops to the output, covering \p Count
/// bytes.
/// \return - true on success, false on failure
-bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
+bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
+ const MCSubtargetInfo *STI) const {
static const char Nops32Bit[10][11] = {
// nop
"\x90",
@@ -1138,9 +1134,9 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count) const {
};
const char(*Nops)[11] =
- STI.getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
+ STI->getFeatureBits()[X86::Mode16Bit] ? Nops16Bit : Nops32Bit;
- uint64_t MaxNopLength = (uint64_t)getMaximumNopSize();
+ uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
// Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining
// length.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 58e233d86da1..4161765fc1ae 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -441,6 +441,11 @@ namespace X86II {
/// SYMBOL_LABEL @GOTPCREL
MO_GOTPCREL,
+ /// MO_GOTPCREL_NORELAX - Same as MO_GOTPCREL except that R_X86_64_GOTPCREL
+ /// relocations are guaranteed to be emitted by the integrated assembler
+ /// instead of the relaxable R_X86_64[_REX]_GOTPCRELX relocations.
+ MO_GOTPCREL_NORELAX,
+
/// MO_PLT - On a symbol operand this indicates that the immediate is
/// offset to the PLT entry of symbol name from the current code location.
///
@@ -790,7 +795,7 @@ namespace X86II {
// belongs to. i.e. one-byte, two-byte, 0x0f 0x38, 0x0f 0x3a, etc.
//
OpMapShift = OpPrefixShift + 2,
- OpMapMask = 0x7 << OpMapShift,
+ OpMapMask = 0xF << OpMapShift,
// OB - OneByte - Set if this instruction has a one byte opcode.
OB = 0 << OpMapShift,
@@ -819,13 +824,17 @@ namespace X86II {
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
ThreeDNow = 7 << OpMapShift,
+ // MAP5, MAP6 - Prefix after the 0x0F prefix.
+ T_MAP5 = 8 << OpMapShift,
+ T_MAP6 = 9 << OpMapShift,
+
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
// They are used to specify GPRs and SSE registers, 64-bit operand size,
// etc. We only cares about REX.W and REX.R bits and only the former is
// statically determined.
//
- REXShift = OpMapShift + 3,
+ REXShift = OpMapShift + 4,
REX_W = 1 << REXShift,
//===------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index fa937d381613..8ab86f46ffe6 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -218,6 +218,9 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
return ELF::R_X86_64_REX_GOTPCRELX;
}
llvm_unreachable("unexpected relocation type!");
+ case MCSymbolRefExpr::VK_GOTPCREL_NORELAX:
+ checkIs32(Ctx, Loc, Type);
+ return ELF::R_X86_64_GOTPCREL;
case MCSymbolRefExpr::VK_X86_PLTOFF:
checkIs64(Ctx, Loc, Type);
return ELF::R_X86_64_PLTOFF64;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index d8dbbbbf2779..167580ec1ed0 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -264,6 +264,24 @@ void X86InstPrinterCommon::printCMPMnemonic(const MCInst *MI, bool IsVCmp,
case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
OS << "ss\t";
break;
+ case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
+ case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
+ case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
+ case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
+ case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
+ case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
+ case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
+ case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
+ case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
+ OS << "ph\t";
+ break;
+ case X86::VCMPSHZrm: case X86::VCMPSHZrr:
+ case X86::VCMPSHZrm_Int: case X86::VCMPSHZrr_Int:
+ case X86::VCMPSHZrrb_Int: case X86::VCMPSHZrrb_Intk:
+ case X86::VCMPSHZrm_Intk: case X86::VCMPSHZrr_Intk:
+ OS << "sh\t";
+ break;
}
}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index d5b205ad9a63..48c335f9a777 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -132,6 +132,20 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
case X86::VCMPPSZrrib: case X86::VCMPPSZrribk:
case X86::VCMPSDZrrb_Int: case X86::VCMPSDZrrb_Intk:
case X86::VCMPSSZrrb_Int: case X86::VCMPSSZrrb_Intk:
+ case X86::VCMPPHZ128rmi: case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rmi: case X86::VCMPPHZ256rri:
+ case X86::VCMPPHZrmi: case X86::VCMPPHZrri:
+ case X86::VCMPSHZrm: case X86::VCMPSHZrr:
+ case X86::VCMPSHZrm_Int: case X86::VCMPSHZrr_Int:
+ case X86::VCMPPHZ128rmik: case X86::VCMPPHZ128rrik:
+ case X86::VCMPPHZ256rmik: case X86::VCMPPHZ256rrik:
+ case X86::VCMPPHZrmik: case X86::VCMPPHZrrik:
+ case X86::VCMPSHZrm_Intk: case X86::VCMPSHZrr_Intk:
+ case X86::VCMPPHZ128rmbi: case X86::VCMPPHZ128rmbik:
+ case X86::VCMPPHZ256rmbi: case X86::VCMPPHZ256rmbik:
+ case X86::VCMPPHZrmbi: case X86::VCMPPHZrmbik:
+ case X86::VCMPPHZrrib: case X86::VCMPPHZrribk:
+ case X86::VCMPSHZrrb_Int: case X86::VCMPSHZrrb_Intk:
if (Imm >= 0 && Imm <= 31) {
OS << '\t';
printCMPMnemonic(MI, /*IsVCMP*/true, OS);
@@ -152,11 +166,15 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
if ((Desc.TSFlags & X86II::FormMask) == X86II::MRMSrcMem) {
if (Desc.TSFlags & X86II::EVEX_B) {
// Broadcast form.
- // Load size is based on W-bit.
- if (Desc.TSFlags & X86II::VEX_W)
+ // Load size is word for TA map. Otherwise it is based on W-bit.
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ printwordmem(MI, CurOp++, OS);
+ } else if (Desc.TSFlags & X86II::VEX_W) {
printqwordmem(MI, CurOp++, OS);
- else
+ } else {
printdwordmem(MI, CurOp++, OS);
+ }
// Print the number of elements broadcasted.
unsigned NumElts;
@@ -166,18 +184,28 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
else
NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
+ assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ NumElts *= 2;
+ }
OS << "{1to" << NumElts << "}";
} else {
- if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS)
- printdwordmem(MI, CurOp++, OS);
- else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD)
+ if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XS) {
+ if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA)
+ printwordmem(MI, CurOp++, OS);
+ else
+ printdwordmem(MI, CurOp++, OS);
+ } else if ((Desc.TSFlags & X86II::OpPrefixMask) == X86II::XD) {
+ assert((Desc.TSFlags & X86II::OpMapMask) != X86II::TA &&
+ "Unexpected op map!");
printqwordmem(MI, CurOp++, OS);
- else if (Desc.TSFlags & X86II::EVEX_L2)
+ } else if (Desc.TSFlags & X86II::EVEX_L2) {
printzmmwordmem(MI, CurOp++, OS);
- else if (Desc.TSFlags & X86II::VEX_L)
+ } else if (Desc.TSFlags & X86II::VEX_L) {
printymmwordmem(MI, CurOp++, OS);
- else
+ } else {
printxmmwordmem(MI, CurOp++, OS);
+ }
}
} else {
printOperand(MI, CurOp++, OS);
@@ -349,7 +377,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
uint64_t Target;
if (MIA->evaluateBranch(*MI, 0, 0, Target))
return;
- if (MIA->evaluateMemoryOperandAddress(*MI, 0, 0))
+ if (MIA->evaluateMemoryOperandAddress(*MI, /*STI=*/nullptr, 0, 0))
return;
}
const MCOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 714d2d839054..4fa8bc64b245 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -799,7 +799,10 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// 0b00001: implied 0F leading opcode
// 0b00010: implied 0F 38 leading opcode bytes
// 0b00011: implied 0F 3A leading opcode bytes
- // 0b00100-0b11111: Reserved for future use
+ // 0b00100: Reserved for future use
+ // 0b00101: VEX MAP5
+ // 0b00110: VEX MAP6
+ // 0b00111-0b11111: Reserved for future use
// 0b01000: XOP map select - 08h instructions with imm byte
// 0b01001: XOP map select - 09h instructions with no imm byte
// 0b01010: XOP map select - 0Ah instructions with imm dword
@@ -825,6 +828,12 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::XOPA:
VEX_5M = 0xA;
break;
+ case X86II::T_MAP5:
+ VEX_5M = 0x5;
+ break;
+ case X86II::T_MAP6:
+ VEX_5M = 0x6;
+ break;
}
// VEX_4V (VEX vvvv field): a register specifier
@@ -1173,10 +1182,10 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// EVEX opcode prefix can have 4 bytes
//
// +-----+ +--------------+ +-------------------+ +------------------------+
- // | 62h | | RXBR' | 00mm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa |
+ // | 62h | | RXBR' | 0mmm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa |
// +-----+ +--------------+ +-------------------+ +------------------------+
- assert((VEX_5M & 0x3) == VEX_5M &&
- "More than 2 significant bits in VEX.m-mmmm fields for EVEX!");
+ assert((VEX_5M & 0x7) == VEX_5M &&
+ "More than 3 significant bits in VEX.m-mmmm fields for EVEX!");
emitByte(0x62, OS);
emitByte((VEX_R << 7) | (VEX_X << 6) | (VEX_B << 5) | (EVEX_R2 << 4) |
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 12dc053cd970..9da0a8129f23 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -26,9 +26,9 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -405,8 +405,12 @@ public:
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
uint64_t &Target) const override;
Optional<uint64_t> evaluateMemoryOperandAddress(const MCInst &Inst,
+ const MCSubtargetInfo *STI,
uint64_t Addr,
uint64_t Size) const override;
+ Optional<uint64_t>
+ getMemoryOperandRelocationOffset(const MCInst &Inst,
+ uint64_t Size) const override;
};
#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS
@@ -532,7 +536,8 @@ bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
}
Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
- const MCInst &Inst, uint64_t Addr, uint64_t Size) const {
+ const MCInst &Inst, const MCSubtargetInfo *STI, uint64_t Addr,
+ uint64_t Size) const {
const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags);
if (MemOpStart == -1)
@@ -555,6 +560,30 @@ Optional<uint64_t> X86MCInstrAnalysis::evaluateMemoryOperandAddress(
return None;
}
+Optional<uint64_t>
+X86MCInstrAnalysis::getMemoryOperandRelocationOffset(const MCInst &Inst,
+ uint64_t Size) const {
+ if (Inst.getOpcode() != X86::LEA64r)
+ return None;
+ const MCInstrDesc &MCID = Info->get(Inst.getOpcode());
+ int MemOpStart = X86II::getMemoryOperandNo(MCID.TSFlags);
+ if (MemOpStart == -1)
+ return None;
+ MemOpStart += X86II::getOperandBias(MCID);
+ const MCOperand &SegReg = Inst.getOperand(MemOpStart + X86::AddrSegmentReg);
+ const MCOperand &BaseReg = Inst.getOperand(MemOpStart + X86::AddrBaseReg);
+ const MCOperand &IndexReg = Inst.getOperand(MemOpStart + X86::AddrIndexReg);
+ const MCOperand &ScaleAmt = Inst.getOperand(MemOpStart + X86::AddrScaleAmt);
+ const MCOperand &Disp = Inst.getOperand(MemOpStart + X86::AddrDisp);
+ // Must be a simple rip-relative address.
+ if (BaseReg.getReg() != X86::RIP || SegReg.getReg() != 0 ||
+ IndexReg.getReg() != 0 || ScaleAmt.getImm() != 1 || !Disp.isImm())
+ return None;
+ // rip-relative ModR/M immediate is 32 bits.
+ assert(Size > 4 && "invalid instruction size for rip-relative lea");
+ return Size - 4;
+}
+
} // end of namespace X86_MC
} // end of namespace llvm
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
index 201b22d6232d..82f4460a42e7 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ShuffleDecode.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index 075e85f4e243..10fc176b59d8 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -100,7 +100,7 @@ unsigned X86WinCOFFObjectWriter::getRelocType(MCContext &Ctx,
if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
return COFF::IMAGE_REL_I386_DIR32NB;
if (Modifier == MCSymbolRefExpr::VK_SECREL)
- return COFF::IMAGE_REL_AMD64_SECREL;
+ return COFF::IMAGE_REL_I386_SECREL;
return COFF::IMAGE_REL_I386_DIR32;
case FK_SecRel_2:
return COFF::IMAGE_REL_I386_SECTION;
diff --git a/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index 18cda8f591c3..7490703251e9 100644
--- a/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/X86TargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheX86_32Target() {
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index eba5b6ce7836..10e1c5d6ed38 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -73,8 +73,8 @@ FunctionPass *createX86AvoidStoreForwardingBlocks();
/// Return a pass that lowers EFLAGS copy pseudo instructions.
FunctionPass *createX86FlagsCopyLoweringPass();
-/// Return a pass that expands WinAlloca pseudo-instructions.
-FunctionPass *createX86WinAllocaExpander();
+/// Return a pass that expands DynAlloca pseudo-instructions.
+FunctionPass *createX86DynAllocaExpander();
/// Return a pass that config the tile registers.
FunctionPass *createX86TileConfigPass();
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 53bbd93798ac..380507308c3d 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -27,7 +27,7 @@ def Mode16Bit : SubtargetFeature<"16bit-mode", "In16BitMode", "true",
"16-bit mode (i8086)">;
//===----------------------------------------------------------------------===//
-// X86 Subtarget features
+// X86 Subtarget ISA features
//===----------------------------------------------------------------------===//
def FeatureX87 : SubtargetFeature<"x87","HasX87", "true",
@@ -42,6 +42,9 @@ def FeatureCMOV : SubtargetFeature<"cmov","HasCMov", "true",
def FeatureCMPXCHG8B : SubtargetFeature<"cx8", "HasCmpxchg8b", "true",
"Support CMPXCHG8B instructions">;
+def FeatureCRC32 : SubtargetFeature<"crc32", "HasCRC32", "true",
+ "Enable SSE 4.2 CRC32 instruction">;
+
def FeaturePOPCNT : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
"Support POPCNT instruction">;
@@ -100,20 +103,6 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[FeatureCMPXCHG8B]>;
-def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
- "SHLD instruction is slow">;
-def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
- "PMULLD instruction is slow">;
-def FeatureSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
- "true",
- "PMADDWD is slower than PMULLD">;
-// FIXME: This should not apply to CPUs that do not have SSE.
-def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
- "IsUAMem16Slow", "true",
- "Slow unaligned 16-byte memory access">;
-def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
- "IsUAMem32Slow", "true",
- "Slow unaligned 32-byte memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions",
[FeatureSSE3]>;
@@ -184,6 +173,14 @@ def FeatureVP2INTERSECT : SubtargetFeature<"avx512vp2intersect",
"HasVP2INTERSECT", "true",
"Enable AVX-512 vp2intersect",
[FeatureAVX512]>;
+// FIXME: FP16 scalar intrinsics use the type v8f16, which is supposed to be
+// guarded under condition hasVLX. So we imply it in FeatureFP16 currently.
+// FIXME: FP16 conversion between f16 and i64 customize type v8i64, which is
+// supposed to be guarded under condition hasDQI. So we imply it in FeatureFP16
+// currently.
+def FeatureFP16 : SubtargetFeature<"avx512fp16", "HasFP16", "true",
+ "Support 16-bit floating point",
+ [FeatureBWI, FeatureVLX, FeatureDQI]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@@ -255,17 +252,6 @@ def FeatureAMXINT8 : SubtargetFeature<"amx-int8", "HasAMXINT8", "true",
def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
"Support AMX-BF16 instructions",
[FeatureAMXTILE]>;
-def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
- "Use LEA for adjusting the stack pointer">;
-def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
- "HasSlowDivide32", "true",
- "Use 8-bit divide for positive values less than 256">;
-def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divl",
- "HasSlowDivide64", "true",
- "Use 32-bit divide for positive values less than 2^32">;
-def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
- "PadShortFunctions", "true",
- "Pad short functions">;
def FeatureINVPCID : SubtargetFeature<"invpcid", "HasINVPCID", "true",
"Invalidate Process-Context Identifier">;
def FeatureSGX : SubtargetFeature<"sgx", "HasSGX", "true",
@@ -296,116 +282,244 @@ def FeatureTSXLDTRK : SubtargetFeature<"tsxldtrk", "HasTSXLDTRK", "true",
"Support TSXLDTRK instructions">;
def FeatureUINTR : SubtargetFeature<"uintr", "HasUINTR", "true",
"Has UINTR Instructions">;
+def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
+ "platform configuration instruction">;
+def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
+ "Support movdiri instruction">;
+def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
+ "Support movdir64b instruction">;
+
+// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
+// "string operations"). See "REP String Enhancement" in the Intel Software
+// Development Manual. This feature essentially means that REP MOVSB will copy
+// using the largest available size instead of copying bytes one by one, making
+// it at least as fast as REPMOVS{W,D,Q}.
+def FeatureERMSB
+ : SubtargetFeature<
+ "ermsb", "HasERMSB", "true",
+ "REP MOVS/STOS are fast">;
+
+// Icelake and newer processors have Fast Short REP MOV.
+def FeatureFSRM
+ : SubtargetFeature<
+ "fsrm", "HasFSRM", "true",
+ "REP MOVSB of short lengths is faster">;
+
+def FeatureSoftFloat
+ : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+ "Use software floating point features">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget Security Mitigation features
+//===----------------------------------------------------------------------===//
+
+// Lower indirect calls using a special construct called a `retpoline` to
+// mitigate potential Spectre v2 attacks against them.
+def FeatureRetpolineIndirectCalls
+ : SubtargetFeature<
+ "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
+ "Remove speculation of indirect calls from the generated code">;
+
+// Lower indirect branches and switches either using conditional branch trees
+// or using a special construct called a `retpoline` to mitigate potential
+// Spectre v2 attacks against them.
+def FeatureRetpolineIndirectBranches
+ : SubtargetFeature<
+ "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
+ "Remove speculation of indirect branches from the generated code">;
+
+// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
+// `retpoline-indirect-branches` above.
+def FeatureRetpoline
+ : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
+ "Remove speculation of indirect branches from the "
+ "generated code, either by avoiding them entirely or "
+ "lowering them with a speculation blocking construct",
+ [FeatureRetpolineIndirectCalls,
+ FeatureRetpolineIndirectBranches]>;
+
+// Rely on external thunks for the emitted retpoline calls. This allows users
+// to provide their own custom thunk definitions in highly specialized
+// environments such as a kernel that does boot-time hot patching.
+def FeatureRetpolineExternalThunk
+ : SubtargetFeature<
+ "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
+ "When lowering an indirect call or branch using a `retpoline`, rely "
+ "on the specified user provided thunk rather than emitting one "
+ "ourselves. Only has effect when combined with some other retpoline "
+ "feature", [FeatureRetpolineIndirectCalls]>;
+
+// Mitigate LVI attacks against indirect calls/branches and call returns
+def FeatureLVIControlFlowIntegrity
+ : SubtargetFeature<
+ "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
+ "Prevent indirect calls/branches from using a memory operand, and "
+ "precede all indirect calls/branches from a register with an "
+ "LFENCE instruction to serialize control flow. Also decompose RET "
+ "instructions into a POP+LFENCE+JMP sequence.">;
+
+// Enable SESES to mitigate speculative execution attacks
+def FeatureSpeculativeExecutionSideEffectSuppression
+ : SubtargetFeature<
+ "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
+ "Prevent speculative execution side channel timing attacks by "
+ "inserting a speculation barrier before memory reads, memory writes, "
+ "and conditional branches. Implies LVI Control Flow integrity.",
+ [FeatureLVIControlFlowIntegrity]>;
+
+// Mitigate LVI attacks against data loads
+def FeatureLVILoadHardening
+ : SubtargetFeature<
+ "lvi-load-hardening", "UseLVILoadHardening", "true",
+ "Insert LFENCE instructions to prevent data speculatively injected "
+ "into loads from being used maliciously.">;
+
+def FeatureTaggedGlobals
+ : SubtargetFeature<
+ "tagged-globals", "AllowTaggedGlobals", "true",
+ "Use an instruction sequence for taking the address of a global "
+ "that allows a memory tag in the upper address bits.">;
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget Tuning features
+//===----------------------------------------------------------------------===//
+
+def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
+ "SHLD instruction is slow">;
+
+def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
+ "PMULLD instruction is slow">;
+
+def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow",
+ "true",
+ "PMADDWD is slower than PMULLD">;
+
+// FIXME: This should not apply to CPUs that do not have SSE.
+def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+ "IsUAMem16Slow", "true",
+ "Slow unaligned 16-byte memory access">;
+
+def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
+ "IsUAMem32Slow", "true",
+ "Slow unaligned 32-byte memory access">;
+
+def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+ "Use LEA for adjusting the stack pointer">;
+
+def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+ "HasSlowDivide32", "true",
+ "Use 8-bit divide for positive values less than 256">;
+
+def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl",
+ "HasSlowDivide64", "true",
+ "Use 32-bit divide for positive values less than 2^32">;
+
+def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions",
+ "PadShortFunctions", "true",
+ "Pad short functions">;
+
// On some processors, instructions that implicitly take two memory operands are
// slow. In practice, this means that CALL, PUSH, and POP with memory operands
// should be avoided in favor of a MOV + register CALL/PUSH/POP.
-def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
+def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops",
"SlowTwoMemOps", "true",
"Two memory operand instructions are slow">;
-def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
+
+def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
-def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
+
+def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
"LEA instruction with certain arguments is slow">;
-def FeatureSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
+
+def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true",
"LEA instruction with 3 ops or certain registers is slow">;
-def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
+
+def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
"INC and DEC instructions are slower than ADD and SUB">;
-def FeatureSoftFloat
- : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
- "Use software floating point features">;
-def FeaturePOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
+
+def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt",
"HasPOPCNTFalseDeps", "true",
"POPCNT has a false dependency on dest register">;
-def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
+
+def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"HasLZCNTFalseDeps", "true",
"LZCNT/TZCNT have a false dependency on dest register">;
-def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
- "platform configuration instruction">;
+
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
-def FeatureFastVariableCrossLaneShuffle
+def TuningFastVariableCrossLaneShuffle
: SubtargetFeature<"fast-variable-crosslane-shuffle",
"HasFastVariableCrossLaneShuffle",
"true", "Cross-lane shuffles with variable masks are fast">;
-def FeatureFastVariablePerLaneShuffle
+def TuningFastVariablePerLaneShuffle
: SubtargetFeature<"fast-variable-perlane-shuffle",
"HasFastVariablePerLaneShuffle",
"true", "Per-lane shuffles with variable masks are fast">;
// On some X86 processors, a vzeroupper instruction should be inserted after
// using ymm/zmm registers before executing code that may use SSE instructions.
-def FeatureInsertVZEROUPPER
+def TuningInsertVZEROUPPER
: SubtargetFeature<"vzeroupper",
"InsertVZEROUPPER",
"true", "Should insert vzeroupper instructions">;
-// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
-// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
+
+// TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
+// than the corresponding NR code. TuningFastVectorFSQRT should be enabled if
// vector FSQRT has higher throughput than the corresponding NR code.
// The idea is that throughput bound code is likely to be vectorized, so for
// vectorized code we should care about the throughput of SQRT operations.
// But if the code is scalar that probably means that the code has some kind of
// dependency and we should care more about reducing the latency.
-def FeatureFastScalarFSQRT
+def TuningFastScalarFSQRT
: SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
"true", "Scalar SQRT is fast (disable Newton-Raphson)">;
-def FeatureFastVectorFSQRT
+def TuningFastVectorFSQRT
: SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
"true", "Vector SQRT is fast (disable Newton-Raphson)">;
+
// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
// be used to replace test/set sequences.
-def FeatureFastLZCNT
+def TuningFastLZCNT
: SubtargetFeature<
"fast-lzcnt", "HasFastLZCNT", "true",
"LZCNT instructions are as fast as most simple integer ops">;
+
// If the target can efficiently decode NOPs upto 7-bytes in length.
-def FeatureFast7ByteNOP
+def TuningFast7ByteNOP
: SubtargetFeature<
"fast-7bytenop", "HasFast7ByteNOP", "true",
"Target can quickly decode up to 7 byte NOPs">;
+
// If the target can efficiently decode NOPs upto 11-bytes in length.
-def FeatureFast11ByteNOP
+def TuningFast11ByteNOP
: SubtargetFeature<
"fast-11bytenop", "HasFast11ByteNOP", "true",
"Target can quickly decode up to 11 byte NOPs">;
+
// If the target can efficiently decode NOPs upto 15-bytes in length.
-def FeatureFast15ByteNOP
+def TuningFast15ByteNOP
: SubtargetFeature<
"fast-15bytenop", "HasFast15ByteNOP", "true",
"Target can quickly decode up to 15 byte NOPs">;
+
// Sandy Bridge and newer processors can use SHLD with the same source on both
// inputs to implement rotate to avoid the partial flag update of the normal
// rotate instructions.
-def FeatureFastSHLDRotate
+def TuningFastSHLDRotate
: SubtargetFeature<
"fast-shld-rotate", "HasFastSHLDRotate", "true",
"SHLD can be used as a faster rotate">;
-// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
-// "string operations"). See "REP String Enhancement" in the Intel Software
-// Development Manual. This feature essentially means that REP MOVSB will copy
-// using the largest available size instead of copying bytes one by one, making
-// it at least as fast as REPMOVS{W,D,Q}.
-def FeatureERMSB
- : SubtargetFeature<
- "ermsb", "HasERMSB", "true",
- "REP MOVS/STOS are fast">;
-
-// Icelake and newer processors have Fast Short REP MOV.
-def FeatureFSRM
- : SubtargetFeature<
- "fsrm", "HasFSRM", "true",
- "REP MOVSB of short lengths is faster">;
-
// Bulldozer and newer processors can merge CMP/TEST (but not other
// instructions) with conditional branches.
-def FeatureBranchFusion
+def TuningBranchFusion
: SubtargetFeature<"branchfusion", "HasBranchFusion", "true",
"CMP/TEST can be fused with conditional branches">;
// Sandy Bridge and newer processors have many instructions that can be
// fused with conditional branches and pass through the CPU as a single
// operation.
-def FeatureMacroFusion
+def TuningMacroFusion
: SubtargetFeature<"macrofusion", "HasMacroFusion", "true",
"Various instructions can be fused with conditional branches">;
@@ -413,117 +527,54 @@ def FeatureMacroFusion
// generate Gathers on all AVX2 processors. But the overhead on HSW is high.
// Skylake Client processor has faster Gathers than HSW and performance is
// similar to Skylake Server (AVX-512).
-def FeatureHasFastGather
+def TuningFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast">;
-def FeaturePrefer128Bit
+def TuningPrefer128Bit
: SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true",
"Prefer 128-bit AVX instructions">;
-def FeaturePrefer256Bit
+def TuningPrefer256Bit
: SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
"Prefer 256-bit AVX instructions">;
-def FeaturePreferMaskRegisters
+def TuningPreferMaskRegisters
: SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true",
"Prefer AVX512 mask registers over PTEST/MOVMSK">;
-// Lower indirect calls using a special construct called a `retpoline` to
-// mitigate potential Spectre v2 attacks against them.
-def FeatureRetpolineIndirectCalls
- : SubtargetFeature<
- "retpoline-indirect-calls", "UseRetpolineIndirectCalls", "true",
- "Remove speculation of indirect calls from the generated code">;
-
-// Lower indirect branches and switches either using conditional branch trees
-// or using a special construct called a `retpoline` to mitigate potential
-// Spectre v2 attacks against them.
-def FeatureRetpolineIndirectBranches
- : SubtargetFeature<
- "retpoline-indirect-branches", "UseRetpolineIndirectBranches", "true",
- "Remove speculation of indirect branches from the generated code">;
-
-// Deprecated umbrella feature for enabling both `retpoline-indirect-calls` and
-// `retpoline-indirect-branches` above.
-def FeatureRetpoline
- : SubtargetFeature<"retpoline", "DeprecatedUseRetpoline", "true",
- "Remove speculation of indirect branches from the "
- "generated code, either by avoiding them entirely or "
- "lowering them with a speculation blocking construct",
- [FeatureRetpolineIndirectCalls,
- FeatureRetpolineIndirectBranches]>;
-
-// Rely on external thunks for the emitted retpoline calls. This allows users
-// to provide their own custom thunk definitions in highly specialized
-// environments such as a kernel that does boot-time hot patching.
-def FeatureRetpolineExternalThunk
- : SubtargetFeature<
- "retpoline-external-thunk", "UseRetpolineExternalThunk", "true",
- "When lowering an indirect call or branch using a `retpoline`, rely "
- "on the specified user provided thunk rather than emitting one "
- "ourselves. Only has effect when combined with some other retpoline "
- "feature", [FeatureRetpolineIndirectCalls]>;
-
-// Mitigate LVI attacks against indirect calls/branches and call returns
-def FeatureLVIControlFlowIntegrity
- : SubtargetFeature<
- "lvi-cfi", "UseLVIControlFlowIntegrity", "true",
- "Prevent indirect calls/branches from using a memory operand, and "
- "precede all indirect calls/branches from a register with an "
- "LFENCE instruction to serialize control flow. Also decompose RET "
- "instructions into a POP+LFENCE+JMP sequence.">;
-
-// Enable SESES to mitigate speculative execution attacks
-def FeatureSpeculativeExecutionSideEffectSuppression
- : SubtargetFeature<
- "seses", "UseSpeculativeExecutionSideEffectSuppression", "true",
- "Prevent speculative execution side channel timing attacks by "
- "inserting a speculation barrier before memory reads, memory writes, "
- "and conditional branches. Implies LVI Control Flow integrity.",
- [FeatureLVIControlFlowIntegrity]>;
-
-// Mitigate LVI attacks against data loads
-def FeatureLVILoadHardening
- : SubtargetFeature<
- "lvi-load-hardening", "UseLVILoadHardening", "true",
- "Insert LFENCE instructions to prevent data speculatively injected "
- "into loads from being used maliciously.">;
-
-// Direct Move instructions.
-def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
- "Support movdiri instruction">;
-def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
- "Support movdir64b instruction">;
-
-def FeatureFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
+def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true",
"Indicates that the BEXTR instruction is implemented as a single uop "
"with good throughput">;
// Combine vector math operations with shuffles into horizontal math
// instructions if a CPU implements horizontal operations (introduced with
// SSE3) with better latency/throughput than the alternative sequence.
-def FeatureFastHorizontalOps
+def TuningFastHorizontalOps
: SubtargetFeature<
"fast-hops", "HasFastHorizontalOps", "true",
"Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
"normal vector instructions with shuffles">;
-def FeatureFastScalarShiftMasks
+def TuningFastScalarShiftMasks
: SubtargetFeature<
"fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
"Prefer a left/right scalar logical shift pair over a shift+and pair">;
-def FeatureFastVectorShiftMasks
+def TuningFastVectorShiftMasks
: SubtargetFeature<
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
"Prefer a left/right vector logical shift pair over a shift+and pair">;
-def FeatureFastMOVBE
+def TuningFastMOVBE
: SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
"Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
-def FeatureUseGLMDivSqrtCosts
+def TuningUseSLMArithCosts
+ : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true",
+ "Use Silvermont specific arithmetic costs">;
+
+def TuningUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;
@@ -531,10 +582,13 @@ def FeatureUseGLMDivSqrtCosts
def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
"Use alias analysis during codegen">;
+//===----------------------------------------------------------------------===//
+// X86 CPU Families
+// TODO: Remove these - use general tuning features to determine codegen.
+//===----------------------------------------------------------------------===//
+
// Bonnell
def ProcIntelAtom : SubtargetFeature<"", "X86ProcFamily", "IntelAtom", "">;
-// Silvermont
-def ProcIntelSLM : SubtargetFeature<"", "X86ProcFamily", "IntelSLM", "">;
//===----------------------------------------------------------------------===//
// Register File Description
@@ -569,6 +623,7 @@ include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
+include "X86SchedIceLake.td"
//===----------------------------------------------------------------------===//
// X86 Processor Feature Lists
@@ -580,9 +635,10 @@ def ProcessorFeatures {
FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, Feature64Bit
];
- list<SubtargetFeature> X86_64V2Features = !listconcat(
- X86_64V1Features,
- [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
+ list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
+ FeatureCMPXCHG16B, FeatureLAHFSAHF, FeatureCRC32, FeaturePOPCNT,
+ FeatureSSE42
+ ]);
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
FeatureMOVBE, FeatureXSAVE
@@ -596,8 +652,8 @@ def ProcessorFeatures {
// Nehalem
list<SubtargetFeature> NHMFeatures = X86_64V2Features;
- list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
+ TuningInsertVZEROUPPER];
// Westmere
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
@@ -609,15 +665,15 @@ def ProcessorFeatures {
list<SubtargetFeature> SNBAdditionalFeatures = [FeatureAVX,
FeatureXSAVE,
FeatureXSAVEOPT];
- list<SubtargetFeature> SNBTuning = [FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowUAMem32,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SNBTuning = [TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowUAMem32,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SNBFeatures =
!listconcat(WSMFeatures, SNBAdditionalFeatures);
@@ -638,17 +694,17 @@ def ProcessorFeatures {
FeatureINVPCID,
FeatureLZCNT,
FeatureMOVBE];
- list<SubtargetFeature> HSWTuning = [FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePOPCNTFalseDeps,
- FeatureLZCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> HSWTuning = [TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningLZCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> HSWFeatures =
!listconcat(IVBFeatures, HSWAdditionalFeatures);
@@ -665,18 +721,18 @@ def ProcessorFeatures {
FeatureXSAVEC,
FeatureXSAVES,
FeatureCLFLUSHOPT];
- list<SubtargetFeature> SKLTuning = [FeatureHasFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SKLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SKLFeatures =
!listconcat(BDWFeatures, SKLAdditionalFeatures);
@@ -692,19 +748,19 @@ def ProcessorFeatures {
FeatureVLX,
FeaturePKU,
FeatureCLWB];
- list<SubtargetFeature> SKXTuning = [FeatureHasFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePrefer256Bit,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SKXTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SKXFeatures =
!listconcat(BDWFeatures, SKXAdditionalFeatures);
@@ -730,18 +786,18 @@ def ProcessorFeatures {
FeatureVBMI,
FeatureIFMA,
FeatureSHA];
- list<SubtargetFeature> CNLTuning = [FeatureHasFastGather,
- FeatureMacroFusion,
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureFastScalarFSQRT,
- FeatureFastVectorFSQRT,
- FeatureFastSHLDRotate,
- FeatureFast15ByteNOP,
- FeatureFastVariableCrossLaneShuffle,
- FeatureFastVariablePerLaneShuffle,
- FeaturePrefer256Bit,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> CNLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> CNLFeatures =
!listconcat(SKLFeatures, CNLAdditionalFeatures);
@@ -755,7 +811,18 @@ def ProcessorFeatures {
FeatureGFNI,
FeatureRDPID,
FeatureFSRM];
- list<SubtargetFeature> ICLTuning = CNLTuning;
+ list<SubtargetFeature> ICLTuning = [TuningFastGather,
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> ICLFeatures =
!listconcat(CNLFeatures, ICLAdditionalFeatures);
@@ -763,7 +830,7 @@ def ProcessorFeatures {
list<SubtargetFeature> ICXAdditionalFeatures = [FeaturePCONFIG,
FeatureCLWB,
FeatureWBNOINVD];
- list<SubtargetFeature> ICXTuning = CNLTuning;
+ list<SubtargetFeature> ICXTuning = ICLTuning;
list<SubtargetFeature> ICXFeatures =
!listconcat(ICLFeatures, ICXAdditionalFeatures);
@@ -773,7 +840,7 @@ def ProcessorFeatures {
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureSHSTK];
- list<SubtargetFeature> TGLTuning = CNLTuning;
+ list<SubtargetFeature> TGLTuning = ICLTuning;
list<SubtargetFeature> TGLFeatures =
!listconcat(ICLFeatures, TGLAdditionalFeatures );
@@ -786,6 +853,7 @@ def ProcessorFeatures {
FeatureCLDEMOTE,
FeatureWAITPKG,
FeaturePTWRITE,
+ FeatureFP16,
FeatureAVXVNNI,
FeatureTSXLDTRK,
FeatureENQCMD,
@@ -811,31 +879,32 @@ def ProcessorFeatures {
FeatureMOVBE,
FeatureLAHFSAHF];
list<SubtargetFeature> AtomTuning = [ProcIntelAtom,
- FeatureSlowUAMem16,
- FeatureLEAForSP,
- FeatureSlowDivide32,
- FeatureSlowDivide64,
- FeatureSlowTwoMemOps,
- FeatureLEAUsesAG,
- FeaturePadShortFunctions,
- FeatureInsertVZEROUPPER];
+ TuningSlowUAMem16,
+ TuningLEAForSP,
+ TuningSlowDivide32,
+ TuningSlowDivide64,
+ TuningSlowTwoMemOps,
+ TuningLEAUsesAG,
+ TuningPadShortFunctions,
+ TuningInsertVZEROUPPER];
// Silvermont
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
+ FeatureCRC32,
FeaturePOPCNT,
FeaturePCLMUL,
FeaturePRFCHW,
FeatureRDRAND];
- list<SubtargetFeature> SLMTuning = [ProcIntelSLM,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureSlowDivide64,
- FeatureSlowPMULLD,
- FeatureFast7ByteNOP,
- FeatureFastMOVBE,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> SLMTuning = [TuningUseSLMArithCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningSlowDivide64,
+ TuningSlowPMULLD,
+ TuningFast7ByteNOP,
+ TuningFastMOVBE,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> SLMFeatures =
!listconcat(AtomFeatures, SLMAdditionalFeatures);
@@ -849,25 +918,25 @@ def ProcessorFeatures {
FeatureXSAVES,
FeatureCLFLUSHOPT,
FeatureFSGSBase];
- list<SubtargetFeature> GLMTuning = [FeatureUseGLMDivSqrtCosts,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureFastMOVBE,
- FeaturePOPCNTFalseDeps,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> GLMTuning = [TuningUseGLMDivSqrtCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningFastMOVBE,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> GLMFeatures =
!listconcat(SLMFeatures, GLMAdditionalFeatures);
// Goldmont Plus
list<SubtargetFeature> GLPAdditionalFeatures = [FeaturePTWRITE,
FeatureRDPID];
- list<SubtargetFeature> GLPTuning = [FeatureUseGLMDivSqrtCosts,
- FeatureSlowTwoMemOps,
- FeatureSlowLEA,
- FeatureSlowIncDec,
- FeatureFastMOVBE,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> GLPTuning = [TuningUseGLMDivSqrtCosts,
+ TuningSlowTwoMemOps,
+ TuningSlowLEA,
+ TuningSlowIncDec,
+ TuningFastMOVBE,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> GLPFeatures =
!listconcat(GLMFeatures, GLPAdditionalFeatures);
@@ -912,6 +981,7 @@ def ProcessorFeatures {
FeatureNOPL,
Feature64Bit,
FeatureCMPXCHG16B,
+ FeatureCRC32,
FeaturePOPCNT,
FeaturePCLMUL,
FeatureXSAVE,
@@ -934,14 +1004,14 @@ def ProcessorFeatures {
FeatureBMI2,
FeatureFMA,
FeaturePRFCHW];
- list<SubtargetFeature> KNLTuning = [FeatureSlowDivide64,
- FeatureSlow3OpsLEA,
- FeatureSlowIncDec,
- FeatureSlowTwoMemOps,
- FeaturePreferMaskRegisters,
- FeatureHasFastGather,
- FeatureFastMOVBE,
- FeatureSlowPMADDWD];
+ list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
+ TuningSlow3OpsLEA,
+ TuningSlowIncDec,
+ TuningSlowTwoMemOps,
+ TuningPreferMaskRegisters,
+ TuningFastGather,
+ TuningFastMOVBE,
+ TuningSlowPMADDWD];
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
list<SubtargetFeature> KNMFeatures =
!listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
@@ -960,9 +1030,9 @@ def ProcessorFeatures {
FeatureLAHFSAHF,
FeatureCMOV,
Feature64Bit];
- list<SubtargetFeature> BarcelonaTuning = [FeatureFastScalarShiftMasks,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BarcelonaTuning = [TuningFastScalarShiftMasks,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
// Bobcat
list<SubtargetFeature> BtVer1Features = [FeatureX87,
@@ -979,29 +1049,30 @@ def ProcessorFeatures {
FeatureLZCNT,
FeaturePOPCNT,
FeatureLAHFSAHF];
- list<SubtargetFeature> BtVer1Tuning = [FeatureFast15ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureFastVectorShiftMasks,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BtVer1Tuning = [TuningFast15ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningFastVectorShiftMasks,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
// Jaguar
list<SubtargetFeature> BtVer2AdditionalFeatures = [FeatureAVX,
FeatureAES,
+ FeatureCRC32,
FeaturePCLMUL,
FeatureBMI,
FeatureF16C,
FeatureMOVBE,
FeatureXSAVE,
FeatureXSAVEOPT];
- list<SubtargetFeature> BtVer2Tuning = [FeatureFastLZCNT,
- FeatureFastBEXTR,
- FeatureFastHorizontalOps,
- FeatureFast15ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureFastVectorShiftMasks,
- FeatureFastMOVBE,
- FeatureSlowSHLD];
+ list<SubtargetFeature> BtVer2Tuning = [TuningFastLZCNT,
+ TuningFastBEXTR,
+ TuningFastHorizontalOps,
+ TuningFast15ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningFastVectorShiftMasks,
+ TuningFastMOVBE,
+ TuningSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1013,6 +1084,7 @@ def ProcessorFeatures {
Feature64Bit,
FeatureCMPXCHG16B,
FeatureAES,
+ FeatureCRC32,
FeaturePRFCHW,
FeaturePCLMUL,
FeatureMMX,
@@ -1023,19 +1095,19 @@ def ProcessorFeatures {
FeatureXSAVE,
FeatureLWP,
FeatureLAHFSAHF];
- list<SubtargetFeature> BdVer1Tuning = [FeatureSlowSHLD,
- FeatureFast11ByteNOP,
- FeatureFastScalarShiftMasks,
- FeatureBranchFusion,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> BdVer1Tuning = [TuningSlowSHLD,
+ TuningFast11ByteNOP,
+ TuningFastScalarShiftMasks,
+ TuningBranchFusion,
+ TuningInsertVZEROUPPER];
// PileDriver
list<SubtargetFeature> BdVer2AdditionalFeatures = [FeatureF16C,
FeatureBMI,
FeatureTBM,
- FeatureFMA,
- FeatureFastBEXTR];
- list<SubtargetFeature> BdVer2AdditionalTuning = [FeatureFastMOVBE];
+ FeatureFMA];
+ list<SubtargetFeature> BdVer2AdditionalTuning = [TuningFastBEXTR,
+ TuningFastMOVBE];
list<SubtargetFeature> BdVer2Tuning =
!listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
list<SubtargetFeature> BdVer2Features =
@@ -1070,6 +1142,7 @@ def ProcessorFeatures {
FeatureCMOV,
Feature64Bit,
FeatureCMPXCHG16B,
+ FeatureCRC32,
FeatureF16C,
FeatureFMA,
FeatureFSGSBase,
@@ -1092,14 +1165,14 @@ def ProcessorFeatures {
FeatureXSAVEC,
FeatureXSAVEOPT,
FeatureXSAVES];
- list<SubtargetFeature> ZNTuning = [FeatureFastLZCNT,
- FeatureFastBEXTR,
- FeatureFast15ByteNOP,
- FeatureBranchFusion,
- FeatureFastScalarShiftMasks,
- FeatureFastMOVBE,
- FeatureSlowSHLD,
- FeatureInsertVZEROUPPER];
+ list<SubtargetFeature> ZNTuning = [TuningFastLZCNT,
+ TuningFastBEXTR,
+ TuningFast15ByteNOP,
+ TuningBranchFusion,
+ TuningFastScalarShiftMasks,
+ TuningFastMOVBE,
+ TuningSlowSHLD,
+ TuningInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
FeatureRDPID,
FeatureWBNOINVD];
@@ -1112,8 +1185,8 @@ def ProcessorFeatures {
FeatureVAES,
FeatureVPCLMULQDQ];
list<SubtargetFeature> ZN3AdditionalTuning =
- [FeatureMacroFusion,
- FeatureFastVariablePerLaneShuffle];
+ [TuningMacroFusion,
+ TuningFastVariablePerLaneShuffle];
list<SubtargetFeature> ZN3Tuning =
!listconcat(ZNTuning, ZN3AdditionalTuning);
list<SubtargetFeature> ZN3Features =
@@ -1140,37 +1213,37 @@ class ProcModel<string Name, SchedMachineModel Model,
// It has no effect on code generation.
def : ProcModel<"generic", SandyBridgeModel,
[FeatureX87, FeatureCMPXCHG8B, Feature64Bit],
- [FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowIncDec,
- FeatureMacroFusion,
- FeatureInsertVZEROUPPER]>;
+ [TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowIncDec,
+ TuningMacroFusion,
+ TuningInsertVZEROUPPER]>;
def : Proc<"i386", [FeatureX87],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i486", [FeatureX87],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i586", [FeatureX87, FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium", [FeatureX87, FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium-mmx", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"i686", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentiumpro", [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
FeatureNOPL],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureCMOV,
FeatureFXSR, FeatureNOPL],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium3", "pentium3m"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -1186,30 +1259,30 @@ foreach P = ["pentium3", "pentium3m"] in {
def : ProcModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcModel<P, GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
// Intel Quark.
def : Proc<"lakemont", [FeatureCMPXCHG8B],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// Intel Core Duo.
def : ProcModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// NetBurst.
def : ProcModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureCMPXCHG8B, FeatureMMX, FeatureSSE3,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : ProcModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureCMPXCHG8B,
@@ -1222,8 +1295,8 @@ def : ProcModel<"nocona", GenericPostRAModel, [
FeatureCMPXCHG16B,
],
[
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
// Intel Core 2 Solo/Duo.
@@ -1240,9 +1313,9 @@ def : ProcModel<"core2", SandyBridgeModel, [
FeatureLAHFSAHF
],
[
- FeatureMacroFusion,
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningMacroFusion,
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
def : ProcModel<"penryn", SandyBridgeModel, [
FeatureX87,
@@ -1257,9 +1330,9 @@ def : ProcModel<"penryn", SandyBridgeModel, [
FeatureLAHFSAHF
],
[
- FeatureMacroFusion,
- FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER
+ TuningMacroFusion,
+ TuningSlowUAMem16,
+ TuningInsertVZEROUPPER
]>;
// Atom CPUs.
@@ -1328,13 +1401,13 @@ def : ProcModel<"cooperlake", SkylakeServerModel,
ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
def : ProcModel<"cannonlake", SkylakeServerModel,
ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
-def : ProcModel<"icelake-client", SkylakeServerModel,
+def : ProcModel<"icelake-client", IceLakeModel,
ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
-def : ProcModel<"rocketlake", SkylakeServerModel,
+def : ProcModel<"rocketlake", IceLakeModel,
ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
-def : ProcModel<"icelake-server", SkylakeServerModel,
+def : ProcModel<"icelake-server", IceLakeModel,
ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
-def : ProcModel<"tigerlake", SkylakeServerModel,
+def : ProcModel<"tigerlake", IceLakeModel,
ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
def : ProcModel<"sapphirerapids", SkylakeServerModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
@@ -1344,37 +1417,37 @@ def : ProcModel<"alderlake", SkylakeClientModel,
// AMD CPUs.
def : Proc<"k6", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"k6-2", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"k6-3", [FeatureX87, FeatureCMPXCHG8B, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
foreach P = ["athlon", "athlon-tbird"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, Feature3DNowA,
FeatureNOPL],
- [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureCMOV,
FeatureSSE1, Feature3DNowA, FeatureFXSR, FeatureNOPL],
- [FeatureSlowSHLD, FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowSHLD, TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE2, Feature3DNowA,
FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureCMOV],
- [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
+ [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
+ TuningInsertVZEROUPPER]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE3, Feature3DNowA,
FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureCMOV,
Feature64Bit],
- [FeatureFastScalarShiftMasks, FeatureSlowSHLD, FeatureSlowUAMem16,
- FeatureInsertVZEROUPPER]>;
+ [TuningFastScalarShiftMasks, TuningSlowSHLD, TuningSlowUAMem16,
+ TuningInsertVZEROUPPER]>;
}
foreach P = ["amdfam10", "barcelona"] in {
@@ -1410,17 +1483,17 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCMPXCHG8B, Feature3DNowA],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"winchip-c6", [FeatureX87, FeatureMMX],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"winchip2", [FeatureX87, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"c3", [FeatureX87, Feature3DNow],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureCMOV],
- [FeatureSlowUAMem16, FeatureInsertVZEROUPPER]>;
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1434,11 +1507,11 @@ def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
// forming a common base for them.
def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
[
- FeatureSlow3OpsLEA,
- FeatureSlowDivide64,
- FeatureSlowIncDec,
- FeatureMacroFusion,
- FeatureInsertVZEROUPPER
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowIncDec,
+ TuningMacroFusion,
+ TuningInsertVZEROUPPER
]>;
// x86-64 micro-architecture levels.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index a27645389dd4..2e08482e4ff6 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -37,10 +37,10 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -190,6 +190,7 @@ void X86AsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;
+ case X86II::MO_GOTPCREL_NORELAX: O << "@GOTPCREL_NORELAX"; break;
case X86II::MO_GOT: O << "@GOT"; break;
case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
case X86II::MO_PLT: O << "@PLT"; break;
@@ -753,6 +754,8 @@ static void emitNonLazyStubs(MachineModuleInfo *MMI, MCStreamer &OutStreamer) {
void X86AsmPrinter::emitEndOfAsmFile(Module &M) {
const Triple &TT = TM.getTargetTriple();
+ emitAsanMemaccessSymbols(M);
+
if (TT.isOSBinFormatMachO()) {
// Mach-O uses non-lazy symbol stubs to encode per-TU information into
// global table for symbol lookup.
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.h b/llvm/lib/Target/X86/X86AsmPrinter.h
index a3b74c8ee387..3b0983a7d935 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -23,6 +23,7 @@ class MCCodeEmitter;
class MCStreamer;
class X86Subtarget;
class TargetMachine;
+struct ASanAccessInfo;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget = nullptr;
@@ -30,7 +31,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
FaultMaps FM;
std::unique_ptr<MCCodeEmitter> CodeEmitter;
bool EmitFPOData = false;
- bool NeedsRetpoline = false;
// This utility class tracks the length of a stackmap instruction's 'shadow'.
// It is used by the X86AsmPrinter to ensure that the stackmap shadow
@@ -98,6 +98,23 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL);
+ // Address sanitizer specific lowering for X86.
+ void LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI);
+ void emitAsanMemaccessSymbols(Module &M);
+ void emitAsanMemaccessPartial(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI);
+ void emitAsanMemaccessFull(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI);
+ void emitAsanReportError(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI);
+
+ typedef std::tuple<unsigned /*Reg*/, uint32_t /*AccessInfo*/>
+ AsanMemaccessTuple;
+ std::map<AsanMemaccessTuple, MCSymbol *> AsanMemaccessSymbols;
+
// Choose between emitting .seh_ directives and .cv_fpo_ directives.
void EmitSEHInstruction(const MachineInstr *MI);
diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index b6a37f08d7e9..04931afdec51 100644
--- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -360,22 +360,17 @@ findPotentialBlockers(MachineInstr *LoadInst) {
if (BlockCount < InspectionLimit) {
MachineBasicBlock *MBB = LoadInst->getParent();
int LimitLeft = InspectionLimit - BlockCount;
- for (MachineBasicBlock::pred_iterator PB = MBB->pred_begin(),
- PE = MBB->pred_end();
- PB != PE; ++PB) {
- MachineBasicBlock *PMBB = *PB;
+ for (MachineBasicBlock *PMBB : MBB->predecessors()) {
int PredCount = 0;
- for (MachineBasicBlock::reverse_iterator PBInst = PMBB->rbegin(),
- PME = PMBB->rend();
- PBInst != PME; ++PBInst) {
- if (PBInst->isMetaInstruction())
+ for (MachineInstr &PBInst : llvm::reverse(*PMBB)) {
+ if (PBInst.isMetaInstruction())
continue;
PredCount++;
if (PredCount >= LimitLeft)
break;
- if (PBInst->getDesc().isCall())
+ if (PBInst.getDesc().isCall())
break;
- PotentialBlockers.push_back(&*PBInst);
+ PotentialBlockers.push_back(&PBInst);
}
}
}
@@ -542,9 +537,8 @@ void X86AvoidSFBPass::findPotentiallylBlockedCopies(MachineFunction &MF) {
int DefVR = MI.getOperand(0).getReg();
if (!MRI->hasOneNonDBGUse(DefVR))
continue;
- for (auto UI = MRI->use_nodbg_begin(DefVR), UE = MRI->use_nodbg_end();
- UI != UE;) {
- MachineOperand &StoreMO = *UI++;
+ for (MachineOperand &StoreMO :
+ llvm::make_early_inc_range(MRI->use_nodbg_operands(DefVR))) {
MachineInstr &StoreMI = *StoreMO.getParent();
// Skip cases where the memcpy may overlap.
if (StoreMI.getParent() == MI.getParent() &&
diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp
index c8bffb4d4d37..a14ce82313cb 100644
--- a/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -105,7 +105,7 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
@@ -195,7 +195,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
}
void assignValueToReg(Register ValVReg, Register PhysReg,
- CCValAssign &VA) override {
+ CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 98883bbf59a8..4dd8a6cdd898 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -23,6 +23,13 @@ class CCIfNotSubtarget<string F, CCAction A>
"(State.getMachineFunction().getSubtarget()).", F),
A>;
+/// CCIfIsVarArgOnWin - Match if isVarArg on Windows 32bits.
+class CCIfIsVarArgOnWin<CCAction A>
+ : CCIf<"State.isVarArg() && "
+ "State.getMachineFunction().getSubtarget().getTargetTriple()."
+ "isWindowsMSVCEnvironment()",
+ A>;
+
// Register classes for RegCall
class RC_X86_RegCall {
list<Register> GPR_8 = [];
@@ -233,19 +240,19 @@ def RetCC_X86Common : CallingConv<[
// Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
// can only be used by ABI non-compliant code. If the target doesn't have XMM
// registers, it won't have vector types.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
// 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX target feature.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
// can only be used by ABI non-compliant code. This vector type is only
// supported while using the AVX-512 target feature.
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
// MMX vector types are always returned in MM0. If the target doesn't have
@@ -266,7 +273,11 @@ def RetCC_X86_32_C : CallingConv<[
// conv.
CCIfInReg<CCIfSubtarget<"hasSSE2()",
CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
- CCIfType<[f32,f64], CCAssignToReg<[FP0, FP1]>>,
+ CCIfSubtarget<"hasX87()",
+ CCIfType<[f32, f64], CCAssignToReg<[FP0, FP1]>>>,
+ CCIfNotSubtarget<"hasX87()",
+ CCIfType<[f32], CCAssignToReg<[EAX, EDX, ECX]>>>,
+ CCIfType<[f16], CCAssignToReg<[XMM0,XMM1,XMM2]>>,
CCDelegateTo<RetCC_X86Common>
]>;
@@ -329,6 +340,7 @@ def RetCC_X86_32_VectorCall : CallingConv<[
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
+ CCIfType<[f16], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,
@@ -552,7 +564,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
// The first 8 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfType<[f16, f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
@@ -561,33 +573,33 @@ def CC_X86_64_C : CallingConv<[
// FIXME: This isn't precisely correct; the x86-64 ABI document says that
// fixed arguments to vararg functions are supposed to be passed in
// registers. Actually modeling that would be a lot of work, though.
- CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
YMM4, YMM5, YMM6, YMM7]>>>>,
// The first 8 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCIfSubtarget<"hasAVX512()",
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+ CCIfType<[i32, i64, f16, f32, f64], CCAssignToStack<8, 8>>,
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
// Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCAssignToStack<16, 16>>,
// 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCAssignToStack<32, 32>>,
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToStack<64, 64>>
]>;
@@ -635,13 +647,13 @@ def CC_X86_Win64_C : CallingConv<[
CCIfCFGuardTarget<CCAssignToReg<[RAX]>>,
// 128 bit vectors are passed by pointer
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCPassIndirect<i64>>,
// 256 bit vectors are passed by pointer
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64], CCPassIndirect<i64>>,
// 512 bit vectors are passed by pointer
- CCIfType<[v64i8, v32i16, v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
+ CCIfType<[v64i8, v32i16, v16i32, v32f16, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
// Long doubles are passed by pointer
CCIfType<[f80], CCPassIndirect<i64>>,
@@ -655,7 +667,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[f64], CCIfNotSubtarget<"hasSSE1()", CCBitConvertToType<i64>>>,
// The first 4 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64],
+ CCIfType<[f16, f32, f64],
CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
[RCX , RDX , R8 , R9 ]>>,
@@ -678,7 +690,7 @@ def CC_X86_Win64_C : CallingConv<[
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i8, i16, i32, i64, f32, f64], CCAssignToStack<8, 8>>
+ CCIfType<[i8, i16, i32, i64, f16, f32, f64], CCAssignToStack<8, 8>>
]>;
def CC_X86_Win64_VectorCall : CallingConv<[
@@ -757,33 +769,51 @@ def CC_X86_64_AnyReg : CallingConv<[
/// values are spilled on the stack.
def CC_X86_32_Vector_Common : CallingConv<[
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
+ CCAssignToStack<16, 16>>,
// 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
- CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCAssignToStack<32, 32>>,
// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
- CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToStack<64, 64>>
]>;
+/// CC_X86_Win32_Vector - In X86 Win32 calling conventions, extra vector
+/// values are spilled on the stack.
+def CC_X86_Win32_Vector : CallingConv<[
+ // Other SSE vectors get 16-byte stack slots that are 4-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
+ CCAssignToStack<16, 4>>,
+
+ // 256-bit AVX vectors get 32-byte stack slots that are 4-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
+ CCAssignToStack<32, 4>>,
+
+ // 512-bit AVX 512-bit vectors get 64-byte stack slots that are 4-byte aligned.
+ CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
+ CCAssignToStack<64, 4>>
+]>;
+
// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
// vector registers
def CC_X86_32_Vector_Standard : CallingConv<[
// SSE vector arguments are passed in XMM registers.
- CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2]>>>,
// AVX 256-bit vector arguments are passed in YMM registers.
- CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2]>>>>,
// AVX 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
+ CCIfIsVarArgOnWin<CCDelegateTo<CC_X86_Win32_Vector>>,
CCDelegateTo<CC_X86_32_Vector_Common>
]>;
@@ -791,16 +821,16 @@ def CC_X86_32_Vector_Standard : CallingConv<[
// vector registers.
def CC_X86_32_Vector_Darwin : CallingConv<[
// SSE vector arguments are passed in XMM registers.
- CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
// AVX 256-bit vector arguments are passed in YMM registers.
- CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
CCIfSubtarget<"hasAVX()",
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
// AVX 512-bit vector arguments are passed in ZMM registers.
- CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
+ CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
CCDelegateTo<CC_X86_32_Vector_Common>
@@ -819,11 +849,15 @@ def CC_X86_32_Common : CallingConv<[
CCIfSubtarget<"hasSSE2()",
CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+ CCIfNotVarArg<CCIfInReg<CCIfType<[f16], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
// The first 3 __m64 vector arguments are passed in mmx registers if the
// call is not a vararg call.
CCIfNotVarArg<CCIfType<[x86mmx],
CCAssignToReg<[MM0, MM1, MM2]>>>,
+ CCIfType<[f16], CCAssignToStack<4, 4>>,
+
// Integer/Float values get stored in stack slots that are 4 bytes in
// size and 4-byte aligned.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
@@ -831,8 +865,8 @@ def CC_X86_32_Common : CallingConv<[
// Doubles get 8-byte slots that are 4-byte aligned.
CCIfType<[f64], CCAssignToStack<8, 4>>,
- // Long doubles get slots whose size depends on the subtarget.
- CCIfType<[f80], CCAssignToStack<0, 4>>,
+ // Long doubles get slots whose size and alignment depends on the subtarget.
+ CCIfType<[f80], CCAssignToStack<0, 0>>,
// Boolean vectors of AVX-512 are passed in SIMD registers.
// The call from AVX to AVX-512 function should work,
diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index 05349a7c01f8..863438793acf 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -582,10 +582,9 @@ static bool checkEFLAGSLive(MachineInstr *MI) {
}
// We hit the end of the block, check whether EFLAGS is live into a successor.
- for (auto I = BB->succ_begin(), E = BB->succ_end(); I != E; ++I) {
- if ((*I)->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *Succ : BB->successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
return true;
- }
return false;
}
@@ -797,8 +796,7 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
MOp.setIsKill(false);
}
}
- MBB->erase(MachineBasicBlock::iterator(MI),
- std::next(MachineBasicBlock::iterator(MI)));
+ MBB->erase(&MI);
// Add this PHI to the rewrite table.
FalseBBRegRewriteTable[NewCMOV->getOperand(0).getReg()] = TmpReg;
diff --git a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
index 9ada0a8dd412..df8df1e3a65d 100644
--- a/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
@@ -1,4 +1,4 @@
-//===----- X86WinAllocaExpander.cpp - Expand WinAlloca pseudo instruction -===//
+//===----- X86DynAllocaExpander.cpp - Expand DynAlloca pseudo instruction -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines a pass that expands WinAlloca pseudo-instructions.
+// This file defines a pass that expands DynAlloca pseudo-instructions.
//
// It performs a conservative analysis to determine whether each allocation
// falls within a region of the stack that is safe to use, or whether stack
@@ -33,26 +33,26 @@ using namespace llvm;
namespace {
-class X86WinAllocaExpander : public MachineFunctionPass {
+class X86DynAllocaExpander : public MachineFunctionPass {
public:
- X86WinAllocaExpander() : MachineFunctionPass(ID) {}
+ X86DynAllocaExpander() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- /// Strategies for lowering a WinAlloca.
+ /// Strategies for lowering a DynAlloca.
enum Lowering { TouchAndSub, Sub, Probe };
- /// Deterministic-order map from WinAlloca instruction to desired lowering.
+ /// Deterministic-order map from DynAlloca instruction to desired lowering.
typedef MapVector<MachineInstr*, Lowering> LoweringMap;
- /// Compute which lowering to use for each WinAlloca instruction.
+ /// Compute which lowering to use for each DynAlloca instruction.
void computeLowerings(MachineFunction &MF, LoweringMap& Lowerings);
/// Get the appropriate lowering based on current offset and amount.
Lowering getLowering(int64_t CurrentOffset, int64_t AllocaAmount);
- /// Lower a WinAlloca instruction.
+ /// Lower a DynAlloca instruction.
void lower(MachineInstr* MI, Lowering L);
MachineRegisterInfo *MRI = nullptr;
@@ -64,22 +64,22 @@ private:
int64_t StackProbeSize = 0;
bool NoStackArgProbe = false;
- StringRef getPassName() const override { return "X86 WinAlloca Expander"; }
+ StringRef getPassName() const override { return "X86 DynAlloca Expander"; }
static char ID;
};
-char X86WinAllocaExpander::ID = 0;
+char X86DynAllocaExpander::ID = 0;
} // end anonymous namespace
-FunctionPass *llvm::createX86WinAllocaExpander() {
- return new X86WinAllocaExpander();
+FunctionPass *llvm::createX86DynAllocaExpander() {
+ return new X86DynAllocaExpander();
}
-/// Return the allocation amount for a WinAlloca instruction, or -1 if unknown.
-static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
- assert(MI->getOpcode() == X86::WIN_ALLOCA_32 ||
- MI->getOpcode() == X86::WIN_ALLOCA_64);
+/// Return the allocation amount for a DynAlloca instruction, or -1 if unknown.
+static int64_t getDynAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ assert(MI->getOpcode() == X86::DYN_ALLOCA_32 ||
+ MI->getOpcode() == X86::DYN_ALLOCA_64);
assert(MI->getOperand(0).isReg());
Register AmountReg = MI->getOperand(0).getReg();
@@ -93,8 +93,8 @@ static int64_t getWinAllocaAmount(MachineInstr *MI, MachineRegisterInfo *MRI) {
return Def->getOperand(1).getImm();
}
-X86WinAllocaExpander::Lowering
-X86WinAllocaExpander::getLowering(int64_t CurrentOffset,
+X86DynAllocaExpander::Lowering
+X86DynAllocaExpander::getLowering(int64_t CurrentOffset,
int64_t AllocaAmount) {
// For a non-constant amount or a large amount, we have to probe.
if (AllocaAmount < 0 || AllocaAmount > StackProbeSize)
@@ -128,11 +128,11 @@ static bool isPushPop(const MachineInstr &MI) {
}
}
-void X86WinAllocaExpander::computeLowerings(MachineFunction &MF,
+void X86DynAllocaExpander::computeLowerings(MachineFunction &MF,
LoweringMap &Lowerings) {
// Do a one-pass reverse post-order walk of the CFG to conservatively estimate
// the offset between the stack pointer and the lowest touched part of the
- // stack, and use that to decide how to lower each WinAlloca instruction.
+ // stack, and use that to decide how to lower each DynAlloca instruction.
// Initialize OutOffset[B], the stack offset at exit from B, to something big.
DenseMap<MachineBasicBlock *, int64_t> OutOffset;
@@ -153,10 +153,10 @@ void X86WinAllocaExpander::computeLowerings(MachineFunction &MF,
if (Offset == -1) Offset = INT32_MAX;
for (MachineInstr &MI : *MBB) {
- if (MI.getOpcode() == X86::WIN_ALLOCA_32 ||
- MI.getOpcode() == X86::WIN_ALLOCA_64) {
- // A WinAlloca moves StackPtr, and potentially touches it.
- int64_t Amount = getWinAllocaAmount(&MI, MRI);
+ if (MI.getOpcode() == X86::DYN_ALLOCA_32 ||
+ MI.getOpcode() == X86::DYN_ALLOCA_64) {
+ // A DynAlloca moves StackPtr, and potentially touches it.
+ int64_t Amount = getDynAllocaAmount(&MI, MRI);
Lowering L = getLowering(Offset, Amount);
Lowerings[&MI] = L;
switch (L) {
@@ -195,12 +195,12 @@ static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) {
return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri;
}
-void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
+void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
const DebugLoc &DL = MI->getDebugLoc();
MachineBasicBlock *MBB = MI->getParent();
MachineBasicBlock::iterator I = *MI;
- int64_t Amount = getWinAllocaAmount(MI, MRI);
+ int64_t Amount = getDynAllocaAmount(MI, MRI);
if (Amount == 0) {
MI->eraseFromParent();
return;
@@ -209,7 +209,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
// These two variables differ on x32, which is a 64-bit target with a
// 32-bit alloca.
bool Is64Bit = STI->is64Bit();
- bool Is64BitAlloca = MI->getOpcode() == X86::WIN_ALLOCA_64;
+ bool Is64BitAlloca = MI->getOpcode() == X86::DYN_ALLOCA_64;
assert(SlotSize == 4 || SlotSize == 8);
switch (L) {
@@ -271,8 +271,8 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
AmountDef->eraseFromParent();
}
-bool X86WinAllocaExpander::runOnMachineFunction(MachineFunction &MF) {
- if (!MF.getInfo<X86MachineFunctionInfo>()->hasWinAlloca())
+bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction &MF) {
+ if (!MF.getInfo<X86MachineFunctionInfo>()->hasDynAlloca())
return false;
MRI = &MF.getRegInfo();
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 4add8d30e010..01dc509df795 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -236,19 +236,10 @@ void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
MBB.getParent()->moveCallSiteInfo(&MI, Marker);
// Emit call to ObjC runtime.
- unsigned RuntimeCallType = MI.getOperand(0).getImm();
- assert(RuntimeCallType <= 1 && "objc runtime call type must be 0 or 1");
- Module *M = MBB.getParent()->getFunction().getParent();
- auto &Context = M->getContext();
- auto *I8PtrTy = PointerType::get(IntegerType::get(Context, 8), 0);
- FunctionCallee Fn = M->getOrInsertFunction(
- RuntimeCallType == 0 ? "objc_retainAutoreleasedReturnValue"
- : "objc_unsafeClaimAutoreleasedReturnValue",
- FunctionType::get(I8PtrTy, {I8PtrTy}, false));
const uint32_t *RegMask =
TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
- .addGlobalAddress(cast<GlobalValue>(Fn.getCallee()), 0, 0)
+ .addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
.addRegMask(RegMask)
.addReg(X86::RAX,
RegState::Implicit |
@@ -403,10 +394,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB;
if (StackAdj == 0) {
MIB = BuildMI(MBB, MBBI, DL,
- TII->get(STI->is64Bit() ? X86::RETQ : X86::RETL));
+ TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32));
} else if (isUInt<16>(StackAdj)) {
MIB = BuildMI(MBB, MBBI, DL,
- TII->get(STI->is64Bit() ? X86::RETIQ : X86::RETIL))
+ TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32))
.addImm(StackAdj);
} else {
assert(!STI->is64Bit() &&
@@ -416,7 +407,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);
X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true);
BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);
- MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RETL));
+ MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32));
}
for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
MIB.add(MBBI->getOperand(I));
@@ -657,35 +648,24 @@ void X86ExpandPseudo::ExpandVastartSaveXmmRegs(
EntryBlk->end());
TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk);
- int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm();
- Register BaseReg;
- uint64_t FrameOffset =
- X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed();
- uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm();
+ uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm();
+ uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm();
// TODO: add support for YMM and ZMM here.
unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
// In the XMM save block, save all the XMM argument registers.
- for (int64_t OpndIdx = 3, RegIdx = 0;
+ for (int64_t OpndIdx = 7, RegIdx = 0;
OpndIdx < VAStartPseudoInstr->getNumOperands() - 1;
OpndIdx++, RegIdx++) {
-
- int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16;
-
- MachineMemOperand *MMO = Func->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset),
- MachineMemOperand::MOStore,
- /*Size=*/16, Align(16));
-
- BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc))
- .addReg(BaseReg)
- .addImm(/*Scale=*/1)
- .addReg(/*IndexReg=*/0)
- .addImm(/*Disp=*/Offset)
- .addReg(/*Segment=*/0)
- .addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg())
- .addMemOperand(MMO);
+ auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc));
+ for (int i = 0; i < X86::AddrNumOperands; ++i) {
+ if (i == X86::AddrDisp)
+ NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16);
+ else
+ NewMI.add(VAStartPseudoInstr->getOperand(i + 1));
+ }
+ NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg());
assert(Register::isPhysicalRegister(
VAStartPseudoInstr->getOperand(OpndIdx).getReg()));
}
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index bb95ed3ccdc5..1ac998b7ff7e 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -55,6 +55,7 @@ class X86FastISel final : public FastISel {
/// When SSE2 is available, use it for f64 operations.
bool X86ScalarSSEf64;
bool X86ScalarSSEf32;
+ bool X86ScalarSSEf16;
public:
explicit X86FastISel(FunctionLoweringInfo &funcInfo,
@@ -63,6 +64,7 @@ public:
Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
X86ScalarSSEf64 = Subtarget->hasSSE2();
X86ScalarSSEf32 = Subtarget->hasSSE1();
+ X86ScalarSSEf16 = Subtarget->hasFP16();
}
bool fastSelectInstruction(const Instruction *I) override;
@@ -157,7 +159,8 @@ private:
/// computed in an SSE register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
+ (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
}
bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
@@ -786,7 +789,8 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
RC = &X86::GR32RegClass;
}
- if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL)
+ if (Subtarget->isPICStyleRIPRel() || GVFlags == X86II::MO_GOTPCREL ||
+ GVFlags == X86II::MO_GOTPCREL_NORELAX)
StubAM.Base.Reg = X86::RIP;
LoadReg = createResultReg(RC);
@@ -1301,11 +1305,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
MachineInstrBuilder MIB;
if (X86MFInfo->getBytesToPopOnReturn()) {
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Subtarget->is64Bit() ? X86::RETIQ : X86::RETIL))
+ TII.get(Subtarget->is64Bit() ? X86::RETI64 : X86::RETI32))
.addImm(X86MFInfo->getBytesToPopOnReturn());
} else {
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
+ TII.get(Subtarget->is64Bit() ? X86::RET64 : X86::RET32));
}
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
@@ -2283,9 +2287,10 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
unsigned Opc;
switch (RetVT.SimpleTy) {
default: return false;
- case MVT::i8: Opc = X86::CMOV_GR8; break;
- case MVT::i16: Opc = X86::CMOV_GR16; break;
- case MVT::i32: Opc = X86::CMOV_GR32; break;
+ case MVT::i8: Opc = X86::CMOV_GR8; break;
+ case MVT::i16: Opc = X86::CMOV_GR16; break;
+ case MVT::f16: Opc = X86::CMOV_FR16X; break;
+ case MVT::i32: Opc = X86::CMOV_GR32; break;
case MVT::f32: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR32X
: X86::CMOV_FR32; break;
case MVT::f64: Opc = Subtarget->hasAVX512() ? X86::CMOV_FR64X
@@ -2741,7 +2746,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
return false;
- return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memcpy", II->arg_size() - 1);
}
case Intrinsic::memset: {
const MemSetInst *MSI = cast<MemSetInst>(II);
@@ -2756,7 +2761,7 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (MSI->getDestAddressSpace() > 255)
return false;
- return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
+ return lowerCallTo(II, "memset", II->arg_size() - 1);
}
case Intrinsic::stackprotector: {
// Emit code to store the stack guard onto the stack.
@@ -2780,8 +2785,6 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
if (!X86SelectAddress(DI->getAddress(), AM))
return false;
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
- // FIXME may need to add RegState::Debug to any registers produced,
- // although ESP/EBP should be the only ones at the moment.
assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
"Expected inlined-at fields to agree");
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
@@ -3484,6 +3487,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
// NonLazyBind calls or dllimport calls.
bool NeedLoad = OpFlags == X86II::MO_DLLIMPORT ||
OpFlags == X86II::MO_GOTPCREL ||
+ OpFlags == X86II::MO_GOTPCREL_NORELAX ||
OpFlags == X86II::MO_COFFSTUB;
unsigned CallOpc = NeedLoad
? (Is64Bit ? X86::CALL64m : X86::CALL32m)
@@ -3838,11 +3842,11 @@ unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
if (const auto *CI = dyn_cast<ConstantInt>(C))
return X86MaterializeInt(CI, VT);
- else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ if (const auto *CFP = dyn_cast<ConstantFP>(C))
return X86MaterializeFP(CFP, VT);
- else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ if (const auto *GV = dyn_cast<GlobalValue>(C))
return X86MaterializeGV(GV, VT);
- else if (isa<UndefValue>(C)) {
+ if (isa<UndefValue>(C)) {
unsigned Opc = 0;
switch (VT.SimpleTy) {
default:
diff --git a/llvm/lib/Target/X86/X86FastTileConfig.cpp b/llvm/lib/Target/X86/X86FastTileConfig.cpp
index 7031bd40215d..87c04a07cd13 100644
--- a/llvm/lib/Target/X86/X86FastTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86FastTileConfig.cpp
@@ -44,6 +44,7 @@ class X86FastTileConfig : public MachineFunctionPass {
const TargetRegisterInfo *TRI = nullptr;
const TargetInstrInfo *TII = nullptr;
MachineRegisterInfo *MRI = nullptr;
+ X86MachineFunctionInfo *X86FI = nullptr;
MachineInstr *getTileConfigPoint();
void tileConfig();
@@ -289,6 +290,8 @@ bool X86FastTileConfig::fastTileConfig() {
if (!CFGs.empty())
Changed = true;
}
+ if (Changed)
+ X86FI->setHasVirtualTileReg(true);
return Changed;
}
@@ -298,6 +301,7 @@ bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
ST = &MFunc.getSubtarget<X86Subtarget>();
TRI = ST->getRegisterInfo();
TII = MFunc.getSubtarget().getInstrInfo();
+ X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
return fastTileConfig();
}
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 05cab776e0b7..9a63cffe0a09 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -212,8 +212,7 @@ FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
// These instructions are all fine to convert.
break;
}
- MachineFunction::iterator MFI = MBB.getIterator();
- return TII->convertToThreeAddress(MFI, MI, nullptr);
+ return TII->convertToThreeAddress(MI, nullptr, nullptr);
}
FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index 2d9886e3f238..f24dbcfe972d 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -964,7 +964,11 @@ void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
if (!SetCCI.mayStore()) {
assert(SetCCI.getOperand(0).isReg() &&
"Cannot have a non-register defined operand to SETcc!");
- MRI->replaceRegWith(SetCCI.getOperand(0).getReg(), CondReg);
+ Register OldReg = SetCCI.getOperand(0).getReg();
+ // Drop Kill flags on the old register before replacing. CondReg may have
+ // a longer live range.
+ MRI->clearKillFlags(OldReg);
+ MRI->replaceRegWith(OldReg, CondReg);
SetCCI.eraseFromParent();
return;
}
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index e0f30f090171..60e1b37ed61c 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -832,6 +832,24 @@ static const TableEntry PopTable[] = {
{ X86::UCOM_Fr , X86::UCOM_FPr },
};
+static bool doesInstructionSetFPSW(MachineInstr &MI) {
+ if (const MachineOperand *MO = MI.findRegisterDefOperand(X86::FPSW))
+ if (!MO->isDead())
+ return true;
+ return false;
+}
+
+static MachineBasicBlock::iterator
+getNextFPInstruction(MachineBasicBlock::iterator I) {
+ MachineBasicBlock &MBB = *I->getParent();
+ while (++I != MBB.end()) {
+ MachineInstr &MI = *I;
+ if (X86::isX87Instruction(MI))
+ return I;
+ }
+ return MBB.end();
+}
+
/// popStackAfter - Pop the current value off of the top of the FP stack after
/// the specified instruction. This attempts to be sneaky and combine the pop
/// into the instruction itself if possible. The iterator is left pointing to
@@ -853,6 +871,14 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
I->RemoveOperand(0);
MI.dropDebugNumber();
} else { // Insert an explicit pop
+ // If this instruction sets FPSW, which is read in following instruction,
+ // insert pop after that reader.
+ if (doesInstructionSetFPSW(MI)) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineBasicBlock::iterator Next = getNextFPInstruction(I);
+ if (Next != MBB.end() && Next->readsRegister(X86::FPSW))
+ I = Next;
+ }
I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
}
}
@@ -1038,9 +1064,10 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
for (unsigned I = 0; I < N; ++I)
pushReg(N - I - 1);
- // Drop all variable values defined by this call -- we can't track them
- // once they've been stackified.
- I->dropDebugNumber();
+ // If this call has been modified, drop all variable values defined by it.
+ // We can't track them once they've been stackified.
+ if (STReturns)
+ I->dropDebugNumber();
}
/// If RET has an FP register use operand, pass the first one in ST(0) and
@@ -1732,16 +1759,14 @@ void FPS::setKillFlags(MachineBasicBlock &MBB) const {
LPR.addLiveOuts(MBB);
- for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
- I != E; ++I) {
- if (I->isDebugInstr())
+ for (MachineInstr &MI : llvm::reverse(MBB)) {
+ if (MI.isDebugInstr())
continue;
std::bitset<8> Defs;
SmallVector<MachineOperand *, 2> Uses;
- MachineInstr &MI = *I;
- for (auto &MO : I->operands()) {
+ for (auto &MO : MI.operands()) {
if (!MO.isReg())
continue;
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 4cde7971e597..bd780273509f 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -671,7 +671,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MF.insert(MBBIter, testMBB);
MF.insert(MBBIter, tailMBB);
- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
+ : Is64Bit ? X86::R11D
+ : X86::EAX;
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
@@ -1092,7 +1094,9 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MF.insert(MBBIter, bodyMBB);
MF.insert(MBBIter, footMBB);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
+ : Is64Bit ? X86::R11D
+ : X86::EAX;
// Setup entry block
{
@@ -1349,25 +1353,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// to determine the end of the prologue.
DebugLoc DL;
- // Add RETADDR move area to callee saved frame size.
- int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
- if (TailCallReturnAddrDelta && IsWin64Prologue)
+ // Space reserved for stack-based arguments when making a (ABI-guaranteed)
+ // tail call.
+ unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
+ if (TailCallArgReserveSize && IsWin64Prologue)
report_fatal_error("Can't handle guaranteed tail call under win64 yet");
- if (TailCallReturnAddrDelta < 0)
- X86FI->setCalleeSavedFrameSize(
- X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
-
const bool EmitStackProbeCall =
STI.getTargetLowering()->hasStackProbeSymbol(MF);
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
if (HasFP && X86FI->hasSwiftAsyncContext()) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8),
- MachineFramePtr)
- .addUse(MachineFramePtr)
- .addImm(60)
- .setMIFlag(MachineInstr::FrameSetup);
+ switch (MF.getTarget().Options.SwiftAsyncFramePointer) {
+ case SwiftAsyncFramePointerMode::DeploymentBased:
+ if (STI.swiftAsyncContextIsDynamicallySet()) {
+ // The special symbol below is absolute and has a *value* suitable to be
+ // combined with the frame pointer directly.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::OR64rm), MachineFramePtr)
+ .addUse(MachineFramePtr)
+ .addUse(X86::RIP)
+ .addImm(1)
+ .addUse(X86::NoRegister)
+ .addExternalSymbol("swift_async_extendedFramePointerFlags",
+ X86II::MO_GOTPCREL)
+ .addUse(X86::NoRegister);
+ break;
+ }
+ LLVM_FALLTHROUGH;
+
+ case SwiftAsyncFramePointerMode::Always:
+ BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
+ .addUse(MachineFramePtr)
+ .addImm(60)
+ .setMIFlag(MachineInstr::FrameSetup);
+ break;
+
+ case SwiftAsyncFramePointerMode::Never:
+ break;
+ }
}
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
@@ -1391,7 +1414,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
!EmitStackProbeCall && // No stack probes.
!MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
!MF.shouldSplitStack()) { // Regular stack
- uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+ uint64_t MinSize =
+ X86FI->getCalleeSavedFrameSize() - X86FI->getTCReturnAddrDelta();
if (HasFP) MinSize += SlotSize;
X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -1401,8 +1425,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Insert stack pointer adjustment for later moving of return addr. Only
// applies to tail call optimized functions where the callee argument stack
// size is bigger than the callers.
- if (TailCallReturnAddrDelta < 0) {
- BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta,
+ if (TailCallArgReserveSize != 0) {
+ BuildStackAdjustment(MBB, MBBI, DL, -(int)TailCallArgReserveSize,
/*InEpilogue=*/false)
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -1451,7 +1475,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
if (X86FI->getRestoreBasePointer())
FrameSize += SlotSize;
- NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+ NumBytes = FrameSize -
+ (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
// Callee-saved registers are pushed on stack before the stack is realigned.
if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
@@ -1554,7 +1579,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
}
} else {
assert(!IsFunclet && "funclets without FPs not yet implemented");
- NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+ NumBytes = StackSize -
+ (X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
}
// Update the offset adjustment, which is mainly used by codeview to translate
@@ -2011,6 +2037,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
uint64_t StackSize = MFI.getStackSize();
uint64_t MaxAlign = calculateMaxStackAlign(MF);
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+ unsigned TailCallArgReserveSize = -X86FI->getTCReturnAddrDelta();
bool HasFP = hasFP(MF);
uint64_t NumBytes = 0;
@@ -2024,14 +2051,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
} else if (HasFP) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- NumBytes = FrameSize - CSSize;
+ NumBytes = FrameSize - CSSize - TailCallArgReserveSize;
// Callee-saved registers were pushed on stack before the stack was
// realigned.
if (TRI->hasStackRealignment(MF) && !IsWin64Prologue)
NumBytes = alignTo(FrameSize, MaxAlign);
} else {
- NumBytes = StackSize - CSSize;
+ NumBytes = StackSize - CSSize - TailCallArgReserveSize;
}
uint64_t SEHStackAllocAmt = NumBytes;
@@ -2098,7 +2125,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
-
// If there is an ADD32ri or SUB32ri of ESP immediately before this
// instruction, merge the two instructions.
if (NumBytes || MFI.hasVarSizedObjects())
@@ -2140,10 +2166,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
} else if (NumBytes) {
// Adjust stack pointer back: ESP += numbytes.
emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true);
- if (!hasFP(MF) && NeedsDwarfCFI) {
+ if (!HasFP && NeedsDwarfCFI) {
// Define the current CFA rule to use the provided offset.
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize));
+ MCCFIInstruction::cfiDefCfaOffset(
+ nullptr, CSSize + TailCallArgReserveSize + SlotSize));
}
--MBBI;
}
@@ -2157,7 +2184,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (NeedsWin64CFI && MF.hasWinCFI())
BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
- if (!hasFP(MF) && NeedsDwarfCFI) {
+ if (!HasFP && NeedsDwarfCFI) {
MBBI = FirstCSPop;
int64_t Offset = -CSSize - SlotSize;
// Mark callee-saved pop instruction.
@@ -2177,9 +2204,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
// Emit DWARF info specifying the restores of the callee-saved registers.
// For epilogue with return inside or being other block without successor,
// no need to generate .cfi_restore for callee-saved registers.
- if (NeedsDwarfCFI && !MBB.succ_empty() && !MBB.isReturnBlock()) {
+ if (NeedsDwarfCFI && !MBB.succ_empty())
emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
- }
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
// Add the return addr area delta back since we are not tail calling.
@@ -2193,13 +2219,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
// Emit tilerelease for AMX kernel.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
- for (unsigned I = 0; I < RC->getNumRegs(); I++)
- if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) {
- BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
- break;
- }
+ if (X86FI->hasVirtualTileReg())
+ BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE));
}
StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
@@ -2226,7 +2247,6 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t StackSize = MFI.getStackSize();
- bool HasFP = hasFP(MF);
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
int64_t FPDelta = 0;
@@ -2262,39 +2282,27 @@ StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF,
"FPDelta isn't aligned per the Win64 ABI!");
}
-
- if (TRI->hasBasePointer(MF)) {
- assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
- if (FI < 0) {
- // Skip the saved EBP.
- return StackOffset::getFixed(Offset + SlotSize + FPDelta);
- } else {
- assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
- return StackOffset::getFixed(Offset + StackSize);
- }
- } else if (TRI->hasStackRealignment(MF)) {
- if (FI < 0) {
- // Skip the saved EBP.
- return StackOffset::getFixed(Offset + SlotSize + FPDelta);
- } else {
- assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
- return StackOffset::getFixed(Offset + StackSize);
- }
- // FIXME: Support tail calls
- } else {
- if (!HasFP)
- return StackOffset::getFixed(Offset + StackSize);
-
- // Skip the saved EBP.
+ if (FrameReg == TRI->getFramePtr()) {
+ // Skip saved EBP/RBP
Offset += SlotSize;
+ // Account for restricted Windows prologue.
+ Offset += FPDelta;
+
// Skip the RETADDR move area
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
Offset -= TailCallReturnAddrDelta;
+
+ return StackOffset::getFixed(Offset);
}
- return StackOffset::getFixed(Offset + FPDelta);
+ // FrameReg is either the stack pointer or a base pointer. But the base is
+ // located at the end of the statically known StackSize so the distinction
+ // doesn't really matter.
+ if (TRI->hasStackRealignment(MF) || TRI->hasBasePointer(MF))
+ assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)));
+ return StackOffset::getFixed(Offset + StackSize);
}
int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI,
@@ -3091,8 +3099,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
// having a ".", such as a simple <Module>.<Function>.<Arity>, or an
// "_", such as the BIF "suspend_0") as they are executed on another
// stack.
- if (F->getName().find("erlang.") != StringRef::npos ||
- F->getName().find("bif_") != StringRef::npos ||
+ if (F->getName().contains("erlang.") || F->getName().contains("bif_") ||
F->getName().find_first_of("._") == StringRef::npos)
continue;
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index e9c7ba44b524..7ed05fd0331d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -216,6 +216,8 @@ namespace {
bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth);
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
+ bool matchVectorAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+ unsigned Depth);
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -336,10 +338,9 @@ namespace {
return false;
// Walk all the users of the immediate.
- for (SDNode::use_iterator UI = N->use_begin(),
- UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
-
- SDNode *User = *UI;
+ for (const SDNode *User : N->uses()) {
+ if (UseCount >= 2)
+ break;
// This user is already selected. Count it as a legitimate use and
// move on.
@@ -433,6 +434,18 @@ namespace {
return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL);
}
+ SDValue getPermuteVINSERTCommutedImmediate(SDNode *N, unsigned VecWidth,
+ const SDLoc &DL) {
+ assert(VecWidth == 128 && "Unexpected vector width");
+ uint64_t Index = N->getConstantOperandVal(2);
+ MVT VecVT = N->getSimpleValueType(0);
+ uint64_t InsertIdx = (Index * VecVT.getScalarSizeInBits()) / VecWidth;
+ assert((InsertIdx == 0 || InsertIdx == 1) && "Bad insertf128 index");
+ // vinsert(0,sub,vec) -> [sub0][vec1] -> vperm2x128(0x30,vec,sub)
+ // vinsert(1,sub,vec) -> [vec0][sub0] -> vperm2x128(0x02,vec,sub)
+ return getI8Imm(InsertIdx ? 0x02 : 0x30, DL);
+ }
+
// Helper to detect unneeded and instructions on shift amounts. Called
// from PatFrags in tablegen.
bool isUnneededShiftMask(SDNode *N, unsigned Width) const {
@@ -504,8 +517,9 @@ namespace {
bool tryShiftAmountMod(SDNode *N);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTERNLOG(SDNode *N);
- bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentBC,
- SDValue A, SDValue B, SDValue C, uint8_t Imm);
+ bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentB,
+ SDNode *ParentC, SDValue A, SDValue B, SDValue C,
+ uint8_t Imm);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
bool tryMatchBitSelect(SDNode *N);
@@ -877,19 +891,34 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
continue;
}
- /// Convert vector increment or decrement to sub/add with an all-ones
- /// constant:
- /// add X, <1, 1...> --> sub X, <-1, -1...>
- /// sub X, <1, 1...> --> add X, <-1, -1...>
- /// The all-ones vector constant can be materialized using a pcmpeq
- /// instruction that is commonly recognized as an idiom (has no register
- /// dependency), so that's better/smaller than loading a splat 1 constant.
+ // Convert vector increment or decrement to sub/add with an all-ones
+ // constant:
+ // add X, <1, 1...> --> sub X, <-1, -1...>
+ // sub X, <1, 1...> --> add X, <-1, -1...>
+ // The all-ones vector constant can be materialized using a pcmpeq
+ // instruction that is commonly recognized as an idiom (has no register
+ // dependency), so that's better/smaller than loading a splat 1 constant.
+ //
+ // But don't do this if it would inhibit a potentially profitable load
+ // folding opportunity for the other operand. That only occurs with the
+ // intersection of:
+ // (1) The other operand (op0) is load foldable.
+ // (2) The op is an add (otherwise, we are *creating* an add and can still
+ // load fold the other op).
+ // (3) The target has AVX (otherwise, we have a destructive add and can't
+ // load fold the other op without killing the constant op).
+ // (4) The constant 1 vector has multiple uses (so it is profitable to load
+ // into a register anyway).
+ auto mayPreventLoadFold = [&]() {
+ return X86::mayFoldLoad(N->getOperand(0), *Subtarget) &&
+ N->getOpcode() == ISD::ADD && Subtarget->hasAVX() &&
+ !N->getOperand(1).hasOneUse();
+ };
if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
- N->getSimpleValueType(0).isVector()) {
-
+ N->getSimpleValueType(0).isVector() && !mayPreventLoadFold()) {
APInt SplatVal;
if (X86::isConstantSplat(N->getOperand(1), SplatVal) &&
- SplatVal.isOneValue()) {
+ SplatVal.isOne()) {
SDLoc DL(N);
MVT VT = N->getSimpleValueType(0);
@@ -1121,7 +1150,10 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
if (VT.isVector() || VT == MVT::f128)
break;
- MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32;
+ MVT VecVT = VT == MVT::f64 ? MVT::v2f64
+ : VT == MVT::f32 ? MVT::v4f32
+ : MVT::v8f16;
+
SDLoc dl(N);
SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT,
N->getOperand(0));
@@ -2464,10 +2496,18 @@ bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
return false;
}
-/// Helper for selectVectorAddr. Handles things that can be folded into a
-/// gather scatter address. The index register and scale should have already
-/// been handled.
-bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::matchVectorAddressRecursively(SDValue N,
+ X86ISelAddressMode &AM,
+ unsigned Depth) {
+ SDLoc dl(N);
+ LLVM_DEBUG({
+ dbgs() << "MatchVectorAddress: ";
+ AM.dump(CurDAG);
+ });
+ // Limit recursion.
+ if (Depth > 5)
+ return matchAddressBase(N, AM);
+
// TODO: Support other operations.
switch (N.getOpcode()) {
case ISD::Constant: {
@@ -2480,11 +2520,41 @@ bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
if (!matchWrapper(N, AM))
return false;
break;
+ case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
+ X86ISelAddressMode Backup = AM;
+ if (!matchVectorAddressRecursively(N.getOperand(0), AM, Depth + 1) &&
+ !matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
+ Depth + 1))
+ return false;
+ AM = Backup;
+
+ // Try again after commuting the operands.
+ if (!matchVectorAddressRecursively(Handle.getValue().getOperand(1), AM,
+ Depth + 1) &&
+ !matchVectorAddressRecursively(Handle.getValue().getOperand(0), AM,
+ Depth + 1))
+ return false;
+ AM = Backup;
+
+ N = Handle.getValue();
+ break;
+ }
}
return matchAddressBase(N, AM);
}
+/// Helper for selectVectorAddr. Handles things that can be folded into a
+/// gather/scatter address. The index register and scale should have already
+/// been handled.
+bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
+ return matchVectorAddressRecursively(N, AM, 0);
+}
+
bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr,
SDValue IndexOp, SDValue ScaleOp,
SDValue &Base, SDValue &Scale,
@@ -3387,16 +3457,24 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
return false;
SDValue NBits;
+ bool NegateNBits;
// If we have BMI2's BZHI, we are ok with muti-use patterns.
// Else, if we only have BMI1's BEXTR, we require one-use.
- const bool CanHaveExtraUses = Subtarget->hasBMI2();
- auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) {
- return CanHaveExtraUses ||
+ const bool AllowExtraUsesByDefault = Subtarget->hasBMI2();
+ auto checkUses = [AllowExtraUsesByDefault](SDValue Op, unsigned NUses,
+ Optional<bool> AllowExtraUses) {
+ return AllowExtraUses.getValueOr(AllowExtraUsesByDefault) ||
Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo());
};
- auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); };
- auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); };
+ auto checkOneUse = [checkUses](SDValue Op,
+ Optional<bool> AllowExtraUses = None) {
+ return checkUses(Op, 1, AllowExtraUses);
+ };
+ auto checkTwoUse = [checkUses](SDValue Op,
+ Optional<bool> AllowExtraUses = None) {
+ return checkUses(Op, 2, AllowExtraUses);
+ };
auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) {
if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) {
@@ -3409,8 +3487,8 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
};
// a) x & ((1 << nbits) + (-1))
- auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation,
- &NBits](SDValue Mask) -> bool {
+ auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, &NBits,
+ &NegateNBits](SDValue Mask) -> bool {
// Match `add`. Must only have one use!
if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask))
return false;
@@ -3424,6 +3502,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
if (!isOneConstant(M0->getOperand(0)))
return false;
NBits = M0->getOperand(1);
+ NegateNBits = false;
return true;
};
@@ -3436,7 +3515,7 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
// b) x & ~(-1 << nbits)
auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation,
- &NBits](SDValue Mask) -> bool {
+ &NBits, &NegateNBits](SDValue Mask) -> bool {
// Match `~()`. Must only have one use!
if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask))
return false;
@@ -3451,32 +3530,35 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
if (!isAllOnes(M0->getOperand(0)))
return false;
NBits = M0->getOperand(1);
+ NegateNBits = false;
return true;
};
- // Match potentially-truncated (bitwidth - y)
- auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt,
- unsigned Bitwidth) {
- // Skip over a truncate of the shift amount.
- if (ShiftAmt.getOpcode() == ISD::TRUNCATE) {
- ShiftAmt = ShiftAmt.getOperand(0);
- // The trunc should have been the only user of the real shift amount.
- if (!checkOneUse(ShiftAmt))
- return false;
- }
- // Match the shift amount as: (bitwidth - y). It should go away, too.
- if (ShiftAmt.getOpcode() != ISD::SUB)
- return false;
- auto *V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0));
+ // Try to match potentially-truncated shift amount as `(bitwidth - y)`,
+ // or leave the shift amount as-is, but then we'll have to negate it.
+ auto canonicalizeShiftAmt = [&NBits, &NegateNBits](SDValue ShiftAmt,
+ unsigned Bitwidth) {
+ NBits = ShiftAmt;
+ NegateNBits = true;
+ // Skip over a truncate of the shift amount, if any.
+ if (NBits.getOpcode() == ISD::TRUNCATE)
+ NBits = NBits.getOperand(0);
+ // Try to match the shift amount as (bitwidth - y). It should go away, too.
+ // If it doesn't match, that's fine, we'll just negate it ourselves.
+ if (NBits.getOpcode() != ISD::SUB)
+ return;
+ auto *V0 = dyn_cast<ConstantSDNode>(NBits.getOperand(0));
if (!V0 || V0->getZExtValue() != Bitwidth)
- return false;
- NBits = ShiftAmt.getOperand(1);
- return true;
+ return;
+ NBits = NBits.getOperand(1);
+ NegateNBits = false;
};
+ // c) x & (-1 >> z) but then we'll have to subtract z from bitwidth
+ // or
// c) x & (-1 >> (32 - y))
- auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation,
- matchShiftAmt](SDValue Mask) -> bool {
+ auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, &NegateNBits,
+ canonicalizeShiftAmt](SDValue Mask) -> bool {
// The mask itself may be truncated.
Mask = peekThroughOneUseTruncation(Mask);
unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits();
@@ -3490,27 +3572,39 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
// The shift amount should not be used externally.
if (!checkOneUse(M1))
return false;
- return matchShiftAmt(M1, Bitwidth);
+ canonicalizeShiftAmt(M1, Bitwidth);
+ // Pattern c. is non-canonical, and is expanded into pattern d. iff there
+ // is no extra use of the mask. Clearly, there was one since we are here.
+ // But at the same time, if we need to negate the shift amount,
+ // then we don't want the mask to stick around, else it's unprofitable.
+ return !NegateNBits;
};
SDValue X;
+ // d) x << z >> z but then we'll have to subtract z from bitwidth
+ // or
// d) x << (32 - y) >> (32 - y)
- auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt,
+ auto matchPatternD = [checkOneUse, checkTwoUse, canonicalizeShiftAmt,
+ AllowExtraUsesByDefault, &NegateNBits,
&X](SDNode *Node) -> bool {
if (Node->getOpcode() != ISD::SRL)
return false;
SDValue N0 = Node->getOperand(0);
- if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0))
+ if (N0->getOpcode() != ISD::SHL)
return false;
unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits();
SDValue N1 = Node->getOperand(1);
SDValue N01 = N0->getOperand(1);
// Both of the shifts must be by the exact same value.
- // There should not be any uses of the shift amount outside of the pattern.
- if (N1 != N01 || !checkTwoUse(N1))
+ if (N1 != N01)
return false;
- if (!matchShiftAmt(N1, Bitwidth))
+ canonicalizeShiftAmt(N1, Bitwidth);
+ // There should not be any external uses of the inner shift / shift amount.
+ // Note that while we are generally okay with external uses given BMI2,
+ // iff we need to negate the shift amount, we are not okay with extra uses.
+ const bool AllowExtraUses = AllowExtraUsesByDefault && !NegateNBits;
+ if (!checkOneUse(N0, AllowExtraUses) || !checkTwoUse(N1, AllowExtraUses))
return false;
X = N0->getOperand(0);
return true;
@@ -3535,6 +3629,11 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
} else if (!matchPatternD(Node))
return false;
+ // If we need to negate the shift amount, require BMI2 BZHI support.
+ // It's just too unprofitable for BMI1 BEXTR.
+ if (NegateNBits && !Subtarget->hasBMI2())
+ return false;
+
SDLoc DL(Node);
// Truncate the shift amount.
@@ -3549,11 +3648,21 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
- NBits = SDValue(
- CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
- NBits, SRIdxVal), 0);
+ NBits = SDValue(CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+ MVT::i32, ImplDef, NBits, SRIdxVal),
+ 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
+ // We might have matched the amount of high bits to be cleared,
+ // but we want the amount of low bits to be kept, so negate it then.
+ if (NegateNBits) {
+ SDValue BitWidthC = CurDAG->getConstant(NVT.getSizeInBits(), DL, MVT::i32);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), BitWidthC);
+
+ NBits = CurDAG->getNode(ISD::SUB, DL, MVT::i32, BitWidthC, NBits);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
+ }
+
if (Subtarget->hasBMI2()) {
// Great, just emit the the BZHI..
if (NVT != MVT::i32) {
@@ -4040,11 +4149,11 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
}
bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
- SDNode *ParentBC, SDValue A, SDValue B,
- SDValue C, uint8_t Imm) {
- assert(A.isOperandOf(ParentA));
- assert(B.isOperandOf(ParentBC));
- assert(C.isOperandOf(ParentBC));
+ SDNode *ParentB, SDNode *ParentC,
+ SDValue A, SDValue B, SDValue C,
+ uint8_t Imm) {
+ assert(A.isOperandOf(ParentA) && B.isOperandOf(ParentB) &&
+ C.isOperandOf(ParentC) && "Incorrect parent node");
auto tryFoldLoadOrBCast =
[this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale,
@@ -4072,7 +4181,7 @@ bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
bool FoldedLoad = false;
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
- if (tryFoldLoadOrBCast(Root, ParentBC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+ if (tryFoldLoadOrBCast(Root, ParentC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
FoldedLoad = true;
} else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3,
Tmp4)) {
@@ -4085,7 +4194,7 @@ bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
if (OldImm & 0x10) Imm |= 0x02;
if (OldImm & 0x08) Imm |= 0x40;
if (OldImm & 0x40) Imm |= 0x08;
- } else if (tryFoldLoadOrBCast(Root, ParentBC, B, Tmp0, Tmp1, Tmp2, Tmp3,
+ } else if (tryFoldLoadOrBCast(Root, ParentB, B, Tmp0, Tmp1, Tmp2, Tmp3,
Tmp4)) {
FoldedLoad = true;
std::swap(B, C);
@@ -4163,7 +4272,6 @@ bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA,
}
// Try to match two logic ops to a VPTERNLOG.
-// FIXME: Handle inverted inputs?
// FIXME: Handle more complex patterns that use an operand more than once?
bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
MVT NVT = N->getSimpleValueType(0);
@@ -4206,12 +4314,31 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
SDValue B = FoldableOp.getOperand(0);
SDValue C = FoldableOp.getOperand(1);
+ SDNode *ParentA = N;
+ SDNode *ParentB = FoldableOp.getNode();
+ SDNode *ParentC = FoldableOp.getNode();
// We can build the appropriate control immediate by performing the logic
// operation we're matching using these constants for A, B, and C.
- const uint8_t TernlogMagicA = 0xf0;
- const uint8_t TernlogMagicB = 0xcc;
- const uint8_t TernlogMagicC = 0xaa;
+ uint8_t TernlogMagicA = 0xf0;
+ uint8_t TernlogMagicB = 0xcc;
+ uint8_t TernlogMagicC = 0xaa;
+
+ // Some of the inputs may be inverted, peek through them and invert the
+ // magic values accordingly.
+ // TODO: There may be a bitcast before the xor that we should peek through.
+ auto PeekThroughNot = [](SDValue &Op, SDNode *&Parent, uint8_t &Magic) {
+ if (Op.getOpcode() == ISD::XOR && Op.hasOneUse() &&
+ ISD::isBuildVectorAllOnes(Op.getOperand(1).getNode())) {
+ Magic = ~Magic;
+ Parent = Op.getNode();
+ Op = Op.getOperand(0);
+ }
+ };
+
+ PeekThroughNot(A, ParentA, TernlogMagicA);
+ PeekThroughNot(B, ParentB, TernlogMagicB);
+ PeekThroughNot(C, ParentC, TernlogMagicC);
uint8_t Imm;
switch (FoldableOp.getOpcode()) {
@@ -4235,7 +4362,7 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) {
case ISD::XOR: Imm ^= TernlogMagicA; break;
}
- return matchVPTERNLOG(N, N, FoldableOp.getNode(), A, B, C, Imm);
+ return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm);
}
/// If the high bits of an 'and' operand are known zero, try setting the
@@ -4295,7 +4422,7 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
// Check if the mask is -1. In that case, this is an unnecessary instruction
// that escaped earlier analysis.
- if (NegMaskVal.isAllOnesValue()) {
+ if (NegMaskVal.isAllOnes()) {
ReplaceNode(And, And0.getNode());
return true;
}
@@ -4572,7 +4699,7 @@ bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
ReplaceNode(N, Ternlog.getNode());
return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(),
- A, B, C, 0xCA);
+ Ternlog.getNode(), A, B, C, 0xCA);
}
void X86DAGToDAGISel::Select(SDNode *Node) {
@@ -4807,7 +4934,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case X86ISD::VPTERNLOG: {
uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue();
- if (matchVPTERNLOG(Node, Node, Node, Node->getOperand(0),
+ if (matchVPTERNLOG(Node, Node, Node, Node, Node->getOperand(0),
Node->getOperand(1), Node->getOperand(2), Imm))
return;
break;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3a64b3460030..dba0321d9431 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48,9 +48,10 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -71,14 +72,6 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<int> ExperimentalPrefLoopAlignment(
- "x86-experimental-pref-loop-alignment", cl::init(4),
- cl::desc(
- "Sets the preferable loop alignment for experiments (as log2 bytes)"
- "(the last x86-experimental-pref-loop-alignment bits"
- " of the loop header PC will be 0)."),
- cl::Hidden);
-
static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
"x86-experimental-pref-innermost-loop-alignment", cl::init(4),
cl::desc(
@@ -117,6 +110,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
X86ScalarSSEf64 = Subtarget.hasSSE2();
X86ScalarSSEf32 = Subtarget.hasSSE1();
+ X86ScalarSSEf16 = Subtarget.hasFP16();
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
// Set up the TargetLowering object.
@@ -213,6 +207,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
+ // Signed saturation subtraction.
+ setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
+ setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
+ setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
+
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
// For slow shld targets we only lower for code size.
@@ -424,8 +425,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::f128, MVT::f16, Expand);
setOperationAction(ISD::PARITY, MVT::i8, Custom);
+ setOperationAction(ISD::PARITY, MVT::i16, Custom);
+ setOperationAction(ISD::PARITY, MVT::i32, Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::PARITY, MVT::i64, Custom);
if (Subtarget.hasPOPCNT()) {
setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
+ // popcntw is longer to encode than popcntl and also has a false dependency
+ // on the dest that popcntl hasn't had since Cannon Lake.
+ setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
} else {
setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
@@ -434,11 +442,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
else
setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
-
- setOperationAction(ISD::PARITY, MVT::i16, Custom);
- setOperationAction(ISD::PARITY, MVT::i32, Custom);
- if (Subtarget.is64Bit())
- setOperationAction(ISD::PARITY, MVT::i64, Custom);
}
setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
@@ -532,7 +535,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRAP, MVT::Other, Legal);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
- setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
+ if (Subtarget.getTargetTriple().isPS4CPU())
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
+ else
+ setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -968,6 +974,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
@@ -1147,6 +1154,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
+ setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -1172,10 +1181,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
}
- // i8 vectors are custom because the source register and source
- // source memory operand types are not the same width.
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
-
if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
// We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
// do the pre and post work in the vector domain.
@@ -1677,6 +1682,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
+ // With BWI, expanding (and promoting the shifts) is the better.
+ if (!Subtarget.useBWIRegs())
+ setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
+
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
@@ -1903,6 +1912,155 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
}
+ if (!Subtarget.useSoftFloat() && Subtarget.hasFP16()) {
+ auto setGroup = [&] (MVT VT) {
+ setOperationAction(ISD::FADD, VT, Legal);
+ setOperationAction(ISD::STRICT_FADD, VT, Legal);
+ setOperationAction(ISD::FSUB, VT, Legal);
+ setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+ setOperationAction(ISD::FMUL, VT, Legal);
+ setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+ setOperationAction(ISD::FDIV, VT, Legal);
+ setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+ setOperationAction(ISD::FSQRT, VT, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+
+ setOperationAction(ISD::FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::FTRUNC, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ setOperationAction(ISD::FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+
+ setOperationAction(ISD::LOAD, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Legal);
+
+ setOperationAction(ISD::FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
+ setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+
+ setOperationAction(ISD::FNEG, VT, Custom);
+ setOperationAction(ISD::FABS, VT, Custom);
+ setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ };
+
+ // AVX512_FP16 scalar operations
+ setGroup(MVT::f16);
+ addRegisterClass(MVT::f16, &X86::FR16XRegClass);
+ setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+ setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
+ setOperationAction(ISD::FROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
+ setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
+ setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
+ if (isTypeLegal(MVT::f80)) {
+ setOperationAction(ISD::FP_EXTEND, MVT::f80, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Custom);
+ }
+
+ setCondCodeAction(ISD::SETOEQ, MVT::f16, Expand);
+ setCondCodeAction(ISD::SETUNE, MVT::f16, Expand);
+
+ if (Subtarget.useAVX512Regs()) {
+ setGroup(MVT::v32f16);
+ addRegisterClass(MVT::v32f16, &X86::VR512RegClass);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v32i16, Custom);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i8,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i8, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i8,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v32i1,
+ MVT::v32i16);
+ setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v32i1, MVT::v32i16);
+ setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v32i1,
+ MVT::v32i16);
+
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32f16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
+
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
+ }
+
+ if (Subtarget.hasVLX()) {
+ addRegisterClass(MVT::v8f16, &X86::VR128XRegClass);
+ addRegisterClass(MVT::v16f16, &X86::VR256XRegClass);
+ setGroup(MVT::v8f16);
+ setGroup(MVT::v16f16);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i16, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Legal);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i16, Legal);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal);
+
+ // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16f16, Custom);
+
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16f16, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f16, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Legal);
+
+ // Need to custom widen these to prevent scalarization.
+ setOperationAction(ISD::LOAD, MVT::v4f16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4f16, Custom);
+ }
+
+ // Support fp16 0 immediate
+ addLegalFPImmediate(APFloat::getZero(APFloat::IEEEhalf()));
+ }
+
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
@@ -1921,6 +2079,37 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
}
+ if (Subtarget.hasFP16()) {
+ // vcvttph2[u]dq v4f16 -> v4i32/64, v2f16 -> v2i32/64
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f16, Custom);
+ // vcvt[u]dq2ph v4i32/64 -> v4f16, v2i32/64 -> v2f16
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f16, Custom);
+ // vcvtps2phx v4f32 -> v4f16, v2f32 -> v2f16
+ setOperationAction(ISD::FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f16, Custom);
+ // vcvtph2psx v4f16 -> v4f32, v2f16 -> v2f32
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
+ setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
+ }
+
setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
@@ -1969,7 +2158,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
setLibcallName(RTLIB::MUL_I128, nullptr);
+ // The MULO libcall is not part of libgcc, only compiler-rt.
+ setLibcallName(RTLIB::MULO_I64, nullptr);
}
+ // The MULO libcall is not part of libgcc, only compiler-rt.
+ setLibcallName(RTLIB::MULO_I128, nullptr);
// Combine sin / cos into _sincos_stret if it is available.
if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
@@ -1983,6 +2176,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UDIV, MVT::i128, Custom);
setOperationAction(ISD::SREM, MVT::i128, Custom);
setOperationAction(ISD::UREM, MVT::i128, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
+ setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
}
// On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
@@ -2070,8 +2271,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
MaxLoadsPerMemcmp = 2;
MaxLoadsPerMemcmpOptSize = 2;
- // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
- setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
+ // Default loop alignment, which can be overridden by -align-loops.
+ setPrefLoopAlignment(Align(16));
// An out-of-order CPU can speculatively execute past a predictable branch,
// but a conditional move could be stalled by an expensive earlier operation.
@@ -2165,6 +2366,16 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
return RegisterVT;
}
+ // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
+ // So its default register type is f16. We override the type to v8f16 here.
+ if (VT == MVT::v3f16 && Subtarget.hasFP16())
+ return MVT::v8f16;
+
+ // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
+ if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
+ !Subtarget.hasX87())
+ return MVT::i32;
+
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
@@ -2183,6 +2394,20 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
return NumRegisters;
}
+ // v3f16 will be widen to v4f16. But we don't assign register class for v4f16.
+ // So its default register number is 3. We override the number to 1 here.
+ if (VT == MVT::v3f16 && Subtarget.hasFP16())
+ return 1;
+
+ // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
+ // x87 is disabled.
+ if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
+ if (VT == MVT::f64)
+ return 2;
+ if (VT == MVT::f80)
+ return 3;
+ }
+
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -2272,7 +2497,7 @@ static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
-unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
+uint64_t X86TargetLowering::getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const {
if (Subtarget.is64Bit()) {
// Max of 8 and alignment of type.
@@ -2294,7 +2519,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
/// preferred vector width.
EVT X86TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
- if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
+ if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
if (Op.size() >= 16 &&
(!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
// FIXME: Check if unaligned 64-byte accesses are slow.
@@ -2547,7 +2772,7 @@ void X86TargetLowering::insertSSPDeclarations(Module &M) const {
Type::getInt8PtrTy(M.getContext()));
if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
F->setCallingConv(CallingConv::X86_FastCall);
- F->addAttribute(1, Attribute::AttrKind::InReg);
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
}
return;
}
@@ -2898,16 +3123,15 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return false;
bool HasRet = false;
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() != X86ISD::RET_FLAG)
+ for (const SDNode *U : Copy->uses()) {
+ if (U->getOpcode() != X86ISD::RET_FLAG)
return false;
// If we are returning more than one value, we can definitely
// not make a tail call see PR19530
- if (UI->getNumOperands() > 4)
+ if (U->getNumOperands() > 4)
return false;
- if (UI->getNumOperands() == 4 &&
- UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
+ if (U->getNumOperands() == 4 &&
+ U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
return false;
HasRet = true;
}
@@ -3137,38 +3361,40 @@ SDValue X86TargetLowering::LowerCallResult(
// For info on fast calling convention see Fast Calling Convention (tail call)
// implementation LowerX86_32FastCCCallTo.
-/// CallIsStructReturn - Determines whether a call uses struct return
-/// semantics.
-enum StructReturnType {
- NotStructReturn,
- RegStructReturn,
- StackStructReturn
-};
-static StructReturnType
-callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
- if (Outs.empty())
- return NotStructReturn;
+/// Determines whether Args, either a set of outgoing arguments to a call, or a
+/// set of incoming args of a call, contains an sret pointer that the callee
+/// pops
+template <typename T>
+static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
+ const X86Subtarget &Subtarget) {
+ // Not C++20 (yet), so no concepts available.
+ static_assert(std::is_same<T, ISD::OutputArg>::value ||
+ std::is_same<T, ISD::InputArg>::value,
+ "requires ISD::OutputArg or ISD::InputArg");
- const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
- if (!Flags.isSRet())
- return NotStructReturn;
- if (Flags.isInReg() || IsMCU)
- return RegStructReturn;
- return StackStructReturn;
-}
+ // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
+ // for most compilations.
+ if (!Subtarget.is32Bit())
+ return false;
+
+ if (Args.empty())
+ return false;
-/// Determines whether a function uses struct return semantics.
-static StructReturnType
-argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
- if (Ins.empty())
- return NotStructReturn;
+ // Most calls do not have an sret argument, check the arg next.
+ const ISD::ArgFlagsTy &Flags = Args[0].Flags;
+ if (!Flags.isSRet() || Flags.isInReg())
+ return false;
+
+ // The MSVCabi does not pop the sret.
+ if (Subtarget.getTargetTriple().isOSMSVCRT())
+ return false;
+
+ // MCUs don't pop the sret
+ if (Subtarget.isTargetMCU())
+ return false;
- const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
- if (!Flags.isSRet())
- return NotStructReturn;
- if (Flags.isInReg() || IsMCU)
- return RegStructReturn;
- return StackStructReturn;
+ // Callee pops argument
+ return true;
}
/// Make a copy of an aggregate at address specified by "Src" to address
@@ -3533,13 +3759,19 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
SmallVector<SDValue, 12> SaveXMMOps;
SaveXMMOps.push_back(Chain);
SaveXMMOps.push_back(ALVal);
- SaveXMMOps.push_back(
- DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
+ SaveXMMOps.push_back(RSFIN);
SaveXMMOps.push_back(
DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
llvm::append_range(SaveXMMOps, LiveXMMRegs);
- MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
- MVT::Other, SaveXMMOps));
+ MachineMemOperand *StoreMMO =
+ DAG.getMachineFunction().getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
+ Offset),
+ MachineMemOperand::MOStore, 128, Align(16));
+ MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
+ DL, DAG.getVTList(MVT::Other),
+ SaveXMMOps, MVT::i8, StoreMMO));
}
if (!MemOps.empty())
@@ -3670,6 +3902,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
RC = &X86::GR32RegClass;
else if (Is64Bit && RegVT == MVT::i64)
RC = &X86::GR64RegClass;
+ else if (RegVT == MVT::f16)
+ RC = &X86::FR16XRegClass;
else if (RegVT == MVT::f32)
RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
else if (RegVT == MVT::f64)
@@ -3767,12 +4001,12 @@ SDValue X86TargetLowering::LowerFormalArguments(
// the argument into a virtual register so that we can access it from the
// return points.
if (Ins[I].Flags.isSRet()) {
- Register Reg = FuncInfo->getSRetReturnReg();
- if (!Reg) {
- MVT PtrTy = getPointerTy(DAG.getDataLayout());
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
- FuncInfo->setSRetReturnReg(Reg);
- }
+ assert(!FuncInfo->getSRetReturnReg() &&
+ "SRet return has already been set");
+ MVT PtrTy = getPointerTy(DAG.getDataLayout());
+ Register Reg =
+ MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+ FuncInfo->setSRetReturnReg(Reg);
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
break;
@@ -3800,9 +4034,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
// If this is an sret function, the return should pop the hidden pointer.
- if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
- !Subtarget.getTargetTriple().isOSMSVCRT() &&
- argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
+ if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
FuncInfo->setBytesToPopOnReturn(4);
}
@@ -3921,10 +4153,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
bool Is64Bit = Subtarget.is64Bit();
bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
- StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
bool IsSibcall = false;
bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
+ bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
bool HasNCSR = (CB && isa<CallInst>(CB) &&
CB->hasFnAttr("no_caller_saved_registers"));
@@ -3950,13 +4182,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
}
-
if (isTailCall && !IsMustTail) {
// Check if it's really possible to do a tail call.
- isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- isVarArg, SR != NotStructReturn,
- MF.getFunction().hasStructRetAttr(), CLI.RetTy,
- Outs, OutVals, Ins, DAG);
+ isTailCall = IsEligibleForTailCallOptimization(
+ Callee, CallConv, IsCalleePopSRet, isVarArg, CLI.RetTy, Outs, OutVals,
+ Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -4199,7 +4429,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
- if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
+ if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
+ (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -4324,7 +4555,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// address into a register.
Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
} else if (Subtarget.isTarget64BitILP32() &&
- Callee->getValueType(0) == MVT::i32) {
+ Callee.getValueType() == MVT::i32) {
// Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
}
@@ -4436,14 +4667,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
"tail calls cannot be marked with clang.arc.attachedcall");
assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
- // Add target constant to select ObjC runtime call just before the call
- // target. RuntimeCallType == 0 selects objc_retainAutoreleasedReturnValue,
- // RuntimeCallType == 0 selects objc_unsafeClaimAutoreleasedReturnValue when
- // epxanding the pseudo.
- unsigned RuntimeCallType =
- objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1;
- Ops.insert(Ops.begin() + 1,
- DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32));
+ // Add a target global address for the retainRV/claimRV runtime function
+ // just before the call target.
+ Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+ auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
+ Ops.insert(Ops.begin() + 1, GA);
Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
} else {
Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
@@ -4459,20 +4688,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
// Create the CALLSEQ_END node.
- unsigned NumBytesForCalleeToPop;
+ unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
DAG.getTarget().Options.GuaranteedTailCallOpt))
NumBytesForCalleeToPop = NumBytes; // Callee pops everything
- else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
- !Subtarget.getTargetTriple().isOSMSVCRT() &&
- SR == StackStructReturn)
- // If this is a call to a struct-return function, the callee
- // pops the hidden struct pointer, so we have to push it back.
- // This is common for Darwin/X86, Linux & Mingw32 targets.
- // For MSVC Win32 targets, the caller pops the hidden struct pointer.
+ else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
+ // If this call passes a struct-return pointer, the callee
+ // pops that struct pointer.
NumBytesForCalleeToPop = 4;
- else
- NumBytesForCalleeToPop = 0; // Callee pops nothing.
// Returns a flag for retval copy to use.
if (!IsSibcall) {
@@ -4631,9 +4854,8 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
bool X86TargetLowering::IsEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
- bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
+ SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleePopSRet,
+ bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
if (!mayTailCallThisCC(CalleeCC))
@@ -4677,9 +4899,17 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
if (RegInfo->hasStackRealignment(MF))
return false;
- // Also avoid sibcall optimization if either caller or callee uses struct
- // return semantics.
- if (isCalleeStructRet || isCallerStructRet)
+ // Also avoid sibcall optimization if we're an sret return fn and the callee
+ // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
+ // insufficient.
+ if (MF.getInfo<X86MachineFunctionInfo>()->getSRetReturnReg()) {
+ // For a compatible tail call the callee must return our sret pointer. So it
+ // needs to be (a) an sret function itself and (b) we pass our sret as its
+ // sret. Condition #b is harder to determine.
+ return false;
+ } else if (IsCalleePopSRet)
+ // The callee pops an sret, so we cannot tail-call, as our caller doesn't
+ // expect that.
return false;
// Do not sibcall optimize vararg calls unless all arguments are passed via
@@ -4833,15 +5063,44 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
-static bool MayFoldLoad(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+bool X86::mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
+ bool AssumeSingleUse) {
+ if (!AssumeSingleUse && !Op.hasOneUse())
+ return false;
+ if (!ISD::isNormalLoad(Op.getNode()))
+ return false;
+
+ // If this is an unaligned vector, make sure the target supports folding it.
+ auto *Ld = cast<LoadSDNode>(Op.getNode());
+ if (!Subtarget.hasAVX() && !Subtarget.hasSSEUnalignedMem() &&
+ Ld->getValueSizeInBits(0) == 128 && Ld->getAlignment() < 16)
+ return false;
+
+ // TODO: If this is a non-temporal load and the target has an instruction
+ // for it, it should not be folded. See "useNonTemporalLoad()".
+
+ return true;
+}
+
+bool X86::mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
+ const X86Subtarget &Subtarget,
+ bool AssumeSingleUse) {
+ assert(Subtarget.hasAVX() && "Expected AVX for broadcast from memory");
+ if (!X86::mayFoldLoad(Op, Subtarget, AssumeSingleUse))
+ return false;
+
+ // We can not replace a wide volatile load with a broadcast-from-memory,
+ // because that would narrow the load, which isn't legal for volatiles.
+ auto *Ld = cast<LoadSDNode>(Op.getNode());
+ return !Ld->isVolatile() ||
+ Ld->getValueSizeInBits(0) == EltVT.getScalarSizeInBits();
}
-static bool MayFoldIntoStore(SDValue Op) {
+bool X86::mayFoldIntoStore(SDValue Op) {
return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
}
-static bool MayFoldIntoZeroExtend(SDValue Op) {
+bool X86::mayFoldIntoZeroExtend(SDValue Op) {
if (Op.hasOneUse()) {
unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
return (ISD::ZERO_EXTEND == Opcode);
@@ -4872,6 +5131,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
case X86ISD::UNPCKL:
case X86ISD::UNPCKH:
case X86ISD::VBROADCAST:
@@ -5023,20 +5283,20 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
/// condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
- bool isFP, SDValue &LHS, SDValue &RHS,
- SelectionDAG &DAG) {
+ bool isFP, SDValue &LHS, SDValue &RHS,
+ SelectionDAG &DAG) {
if (!isFP) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, DL, RHS.getValueType());
return X86::COND_NS;
}
- if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
// X < 0 -> X == 0, jump on sign.
return X86::COND_S;
}
- if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
+ if (SetCCOpcode == ISD::SETGE && RHSC->isZero()) {
// X >= 0 -> X == 0, jump on !sign.
return X86::COND_NS;
}
@@ -5119,6 +5379,10 @@ static bool hasFPCMov(unsigned X86CC) {
}
}
+static bool useVPTERNLOG(const X86Subtarget &Subtarget, MVT VT) {
+ return Subtarget.hasVLX() || Subtarget.canExtendTo512DQ() ||
+ VT.is512BitVector();
+}
bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
@@ -5312,10 +5576,13 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
VT = getTypeToTransformTo(Context, VT);
// If vector multiply is legal, assume that's faster than shl + add/sub.
- // TODO: Multiply is a complex op with higher latency and lower throughput in
- // most implementations, so this check could be loosened based on type
- // and/or a CPU attribute.
- if (isOperationLegal(ISD::MUL, VT))
+ // Multiply is a complex op with higher latency and lower throughput in
+ // most implementations, sub-vXi32 vector multiplies are always fast,
+ // vXi32 mustn't have a SlowMULLD implementation, and anything larger (vXi64)
+ // is always going to be slow.
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if (isOperationLegal(ISD::MUL, VT) && EltSizeInBits <= 32 &&
+ (EltSizeInBits != 32 || !Subtarget.isPMULLDSlow()))
return false;
// shl+add, shl+sub, shl+add+neg
@@ -5393,11 +5660,10 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
}
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const {
+ const MachineFunction &MF) const {
// Do not merge to float value size (128 bytes) if no implicit
// float attribute is set.
- bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
+ bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
if (NoFloat) {
unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
@@ -5731,7 +5997,7 @@ static bool canWidenShuffleElements(ArrayRef<int> Mask,
// Here we do not set undef elements as zeroable.
SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
if (V2IsZero) {
- assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!");
+ assert(!Zeroable.isZero() && "V2's non-undef elements are used?!");
for (int i = 0, Size = Mask.size(); i != Size; ++i)
if (Mask[i] != SM_SentinelUndef && Zeroable[i])
ZeroableMask[i] = SM_SentinelZero;
@@ -6037,62 +6303,67 @@ static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&
"Can't split odd sized vector");
+ // If this is a splat value (with no-undefs) then use the lower subvector,
+ // which should be a free extraction.
SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2);
+ if (DAG.isSplatValue(Op, /*AllowUndefs*/ false))
+ return std::make_pair(Lo, Lo);
+
SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2);
return std::make_pair(Lo, Hi);
}
-// Split an unary integer op into 2 half sized ops.
-static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
+/// Break an operation into 2 half sized ops and then concatenate the results.
+static SDValue splitVectorOp(SDValue Op, SelectionDAG &DAG) {
+ unsigned NumOps = Op.getNumOperands();
EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ // Extract the LHS Lo/Hi vectors
+ SmallVector<SDValue> LoOps(NumOps, SDValue());
+ SmallVector<SDValue> HiOps(NumOps, SDValue());
+ for (unsigned I = 0; I != NumOps; ++I) {
+ SDValue SrcOp = Op.getOperand(I);
+ if (!SrcOp.getValueType().isVector()) {
+ LoOps[I] = HiOps[I] = SrcOp;
+ continue;
+ }
+ std::tie(LoOps[I], HiOps[I]) = splitVector(SrcOp, DAG, dl);
+ }
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+ DAG.getNode(Op.getOpcode(), dl, LoVT, LoOps),
+ DAG.getNode(Op.getOpcode(), dl, HiVT, HiOps));
+}
+
+/// Break an unary integer operation into 2 half sized ops and then
+/// concatenate the result back.
+static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
// Make sure we only try to split 256/512-bit types to avoid creating
// narrow vectors.
+ EVT VT = Op.getValueType();
+ (void)VT;
assert((Op.getOperand(0).getValueType().is256BitVector() ||
Op.getOperand(0).getValueType().is512BitVector()) &&
(VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
assert(Op.getOperand(0).getValueType().getVectorNumElements() ==
VT.getVectorNumElements() &&
"Unexpected VTs!");
-
- SDLoc dl(Op);
-
- // Extract the Lo/Hi vectors
- SDValue Lo, Hi;
- std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, LoVT, Lo),
- DAG.getNode(Op.getOpcode(), dl, HiVT, Hi));
+ return splitVectorOp(Op, DAG);
}
/// Break a binary integer operation into 2 half sized ops and then
/// concatenate the result back.
static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
+ // Assert that all the types match.
EVT VT = Op.getValueType();
-
- // Sanity check that all the types match.
+ (void)VT;
assert(Op.getOperand(0).getValueType() == VT &&
Op.getOperand(1).getValueType() == VT && "Unexpected VTs!");
assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!");
-
- SDLoc dl(Op);
-
- // Extract the LHS Lo/Hi vectors
- SDValue LHS1, LHS2;
- std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
-
- // Extract the RHS Lo/Hi vectors
- SDValue RHS1, RHS2;
- std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1),
- DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2));
+ return splitVectorOp(Op, DAG);
}
// Helper for splitting operands of an operation to legal target size and
@@ -6143,6 +6414,71 @@ SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs);
}
+// Helper function that extends a non-512-bit vector op to 512-bits on non-VLX
+// targets.
+static SDValue getAVX512Node(unsigned Opcode, const SDLoc &DL, MVT VT,
+ ArrayRef<SDValue> Ops, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.hasAVX512() && "AVX512 target expected");
+ MVT SVT = VT.getScalarType();
+
+ // If we have a 32/64 splatted constant, splat it to DstTy to
+ // encourage a foldable broadcast'd operand.
+ auto MakeBroadcastOp = [&](SDValue Op, MVT OpVT, MVT DstVT) {
+ unsigned OpEltSizeInBits = OpVT.getScalarSizeInBits();
+ // AVX512 broadcasts 32/64-bit operands.
+ // TODO: Support float once getAVX512Node is used by fp-ops.
+ if (!OpVT.isInteger() || OpEltSizeInBits < 32 ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(SVT))
+ return SDValue();
+ // If we're not widening, don't bother if we're not bitcasting.
+ if (OpVT == DstVT && Op.getOpcode() != ISD::BITCAST)
+ return SDValue();
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(peekThroughBitcasts(Op))) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, OpEltSizeInBits) &&
+ !HasAnyUndefs && SplatValue.getBitWidth() == OpEltSizeInBits)
+ return DAG.getConstant(SplatValue, DL, DstVT);
+ }
+ return SDValue();
+ };
+
+ bool Widen = !(Subtarget.hasVLX() || VT.is512BitVector());
+
+ MVT DstVT = VT;
+ if (Widen)
+ DstVT = MVT::getVectorVT(SVT, 512 / SVT.getSizeInBits());
+
+ // Canonicalize src operands.
+ SmallVector<SDValue> SrcOps(Ops.begin(), Ops.end());
+ for (SDValue &Op : SrcOps) {
+ MVT OpVT = Op.getSimpleValueType();
+ // Just pass through scalar operands.
+ if (!OpVT.isVector())
+ continue;
+ assert(OpVT == VT && "Vector type mismatch");
+
+ if (SDValue BroadcastOp = MakeBroadcastOp(Op, OpVT, DstVT)) {
+ Op = BroadcastOp;
+ continue;
+ }
+
+ // Just widen the subvector by inserting into an undef wide vector.
+ if (Widen)
+ Op = widenSubVector(Op, false, Subtarget, DAG, DL, 512);
+ }
+
+ SDValue Res = DAG.getNode(Opcode, DL, DstVT, SrcOps);
+
+ // Perform the 512-bit op then extract the bottom subvector.
+ if (Widen)
+ Res = extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits());
+ return Res;
+}
+
/// Insert i1-subvector to i1-vector.
static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -6214,14 +6550,21 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
assert(IdxVal != 0 && "Unexpected index");
- NumElems = WideOpVT.getVectorNumElements();
- unsigned ShiftLeft = NumElems - SubVecNumElems;
- unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
- SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
- DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
- if (ShiftRight != 0)
- SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
- DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
+ // If upper elements of Vec are known undef, then just shift into place.
+ if (llvm::all_of(Vec->ops().slice(IdxVal + SubVecNumElems),
+ [](SDValue V) { return V.isUndef(); })) {
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(IdxVal, dl, MVT::i8));
+ } else {
+ NumElems = WideOpVT.getVectorNumElements();
+ unsigned ShiftLeft = NumElems - SubVecNumElems;
+ unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+ SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
+ if (ShiftRight != 0)
+ SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
+ DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
+ }
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
}
@@ -6323,7 +6666,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
"Expected a 128/256/512-bit vector type");
- APInt Ones = APInt::getAllOnesValue(32);
+ APInt Ones = APInt::getAllOnes(32);
unsigned NumElts = VT.getSizeInBits() / 32;
SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
return DAG.getBitcast(VT, Vec);
@@ -6461,6 +6804,58 @@ static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT,
return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
}
+/// Returns a node that packs the LHS + RHS nodes together at half width.
+/// May return X86ISD::PACKSS/PACKUS, packing the top/bottom half.
+/// TODO: Add vXi64 -> vXi32 pack support with vector_shuffle node.
+/// TODO: Add subvector splitting if/when we have a need for it.
+static SDValue getPack(SelectionDAG &DAG, const X86Subtarget &Subtarget,
+ const SDLoc &dl, MVT VT, SDValue LHS, SDValue RHS,
+ bool PackHiHalf = false) {
+ MVT OpVT = LHS.getSimpleValueType();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool UsePackUS = Subtarget.hasSSE41() || EltSizeInBits == 8;
+ assert(OpVT == RHS.getSimpleValueType() &&
+ VT.getSizeInBits() == OpVT.getSizeInBits() &&
+ (EltSizeInBits * 2) == OpVT.getScalarSizeInBits() &&
+ "Unexpected PACK operand types");
+ assert((EltSizeInBits == 8 || EltSizeInBits == 16) &&
+ "Unexpected PACK result type");
+
+ // See if we already have sufficient leading bits for PACKSS/PACKUS.
+ if (!PackHiHalf) {
+ if (UsePackUS &&
+ DAG.computeKnownBits(LHS).countMaxActiveBits() <= EltSizeInBits &&
+ DAG.computeKnownBits(RHS).countMaxActiveBits() <= EltSizeInBits)
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);
+
+ if (DAG.ComputeMinSignedBits(LHS) <= EltSizeInBits &&
+ DAG.ComputeMinSignedBits(RHS) <= EltSizeInBits)
+ return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);
+ }
+
+ // Fallback to sign/zero extending the requested half and pack.
+ SDValue Amt = DAG.getTargetConstant(EltSizeInBits, dl, MVT::i8);
+ if (UsePackUS) {
+ if (PackHiHalf) {
+ LHS = DAG.getNode(X86ISD::VSRLI, dl, OpVT, LHS, Amt);
+ RHS = DAG.getNode(X86ISD::VSRLI, dl, OpVT, RHS, Amt);
+ } else {
+ SDValue Mask = DAG.getConstant((1ULL << EltSizeInBits) - 1, dl, OpVT);
+ LHS = DAG.getNode(ISD::AND, dl, OpVT, LHS, Mask);
+ RHS = DAG.getNode(ISD::AND, dl, OpVT, RHS, Mask);
+ };
+ return DAG.getNode(X86ISD::PACKUS, dl, VT, LHS, RHS);
+ };
+
+ if (!PackHiHalf) {
+ LHS = DAG.getNode(X86ISD::VSHLI, dl, OpVT, LHS, Amt);
+ RHS = DAG.getNode(X86ISD::VSHLI, dl, OpVT, RHS, Amt);
+ }
+ LHS = DAG.getNode(X86ISD::VSRAI, dl, OpVT, LHS, Amt);
+ RHS = DAG.getNode(X86ISD::VSRAI, dl, OpVT, RHS, Amt);
+ return DAG.getNode(X86ISD::PACKSS, dl, VT, LHS, RHS);
+}
+
/// Return a vector_shuffle of the specified vector of zero or undef vector.
/// This produces a shuffle where the low element of V2 is swizzled into the
/// zero/undef vector, landing at element Idx.
@@ -6563,7 +6958,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
// Only treat an element as UNDEF if all bits are UNDEF.
- if (UndefEltBits.isAllOnesValue()) {
+ if (UndefEltBits.isAllOnes()) {
if (!AllowWholeUndefs)
return false;
UndefElts.setBit(i);
@@ -6602,59 +6997,36 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
// Handle UNDEFs.
if (Op.isUndef()) {
- APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
+ APInt UndefSrcElts = APInt::getAllOnes(NumElts);
SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract scalar constant bits.
if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
- APInt UndefSrcElts = APInt::getNullValue(1);
+ APInt UndefSrcElts = APInt::getZero(1);
SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
return CastBitData(UndefSrcElts, SrcEltBits);
}
if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
- APInt UndefSrcElts = APInt::getNullValue(1);
+ APInt UndefSrcElts = APInt::getZero(1);
APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
SmallVector<APInt, 64> SrcEltBits(1, RawBits);
return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from build vector.
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(Op)) {
+ BitVector Undefs;
+ SmallVector<APInt> SrcEltBits;
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
- unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
-
- APInt UndefSrcElts(NumSrcElts, 0);
- SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- const SDValue &Src = Op.getOperand(i);
- if (Src.isUndef()) {
- UndefSrcElts.setBit(i);
- continue;
- }
- auto *Cst = cast<ConstantSDNode>(Src);
- SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
+ if (BV->getConstantRawBits(true, SrcEltSizeInBits, SrcEltBits, Undefs)) {
+ APInt UndefSrcElts = APInt::getNullValue(SrcEltBits.size());
+ for (unsigned I = 0, E = SrcEltBits.size(); I != E; ++I)
+ if (Undefs[I])
+ UndefSrcElts.setBit(I);
+ return CastBitData(UndefSrcElts, SrcEltBits);
}
- return CastBitData(UndefSrcElts, SrcEltBits);
- }
- if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) {
- unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
- unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
-
- APInt UndefSrcElts(NumSrcElts, 0);
- SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
- const SDValue &Src = Op.getOperand(i);
- if (Src.isUndef()) {
- UndefSrcElts.setBit(i);
- continue;
- }
- auto *Cst = cast<ConstantFPSDNode>(Src);
- APInt RawBits = Cst->getValueAPF().bitcastToAPInt();
- SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits);
- }
- return CastBitData(UndefSrcElts, SrcEltBits);
}
// Extract constant bits from constant pool vector.
@@ -6704,17 +7076,21 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
SDValue Ptr = MemIntr->getBasePtr();
+ // The source constant may be larger than the subvector broadcast,
+ // ensure we extract the correct subvector constants.
if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) {
Type *CstTy = Cst->getType();
unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
- if (!CstTy->isVectorTy() || (SizeInBits % CstSizeInBits) != 0)
+ unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits();
+ if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 ||
+ (SizeInBits % SubVecSizeInBits) != 0)
return false;
- unsigned SubEltSizeInBits = CstTy->getScalarSizeInBits();
- unsigned NumSubElts = CstSizeInBits / SubEltSizeInBits;
- unsigned NumSubVecs = SizeInBits / CstSizeInBits;
+ unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
+ unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits;
+ unsigned NumSubVecs = SizeInBits / SubVecSizeInBits;
APInt UndefSubElts(NumSubElts, 0);
SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs,
- APInt(SubEltSizeInBits, 0));
+ APInt(CstEltSizeInBits, 0));
for (unsigned i = 0; i != NumSubElts; ++i) {
if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i],
UndefSubElts, i))
@@ -6814,12 +7190,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
AllowPartialUndefs))
return false;
- UndefElts = APInt::getNullValue(NumElts);
+ UndefElts = APInt::getZero(NumElts);
for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
if (M < 0) {
UndefElts.setBit(i);
- EltBits.push_back(APInt::getNullValue(EltSizeInBits));
+ EltBits.push_back(APInt::getZero(EltSizeInBits));
} else if (M < (int)NumElts) {
if (UndefElts0[M])
UndefElts.setBit(i);
@@ -6916,8 +7292,8 @@ static void getPackDemandedElts(EVT VT, const APInt &DemandedElts,
int NumEltsPerLane = NumElts / NumLanes;
int NumInnerEltsPerLane = NumInnerElts / NumLanes;
- DemandedLHS = APInt::getNullValue(NumInnerElts);
- DemandedRHS = APInt::getNullValue(NumInnerElts);
+ DemandedLHS = APInt::getZero(NumInnerElts);
+ DemandedRHS = APInt::getZero(NumInnerElts);
// Map DemandedElts to the packed operands.
for (int Lane = 0; Lane != NumLanes; ++Lane) {
@@ -6940,8 +7316,8 @@ static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts,
int NumEltsPerLane = NumElts / NumLanes;
int HalfEltsPerLane = NumEltsPerLane / 2;
- DemandedLHS = APInt::getNullValue(NumElts);
- DemandedRHS = APInt::getNullValue(NumElts);
+ DemandedLHS = APInt::getZero(NumElts);
+ DemandedRHS = APInt::getZero(NumElts);
// Map DemandedElts to the horizontal operands.
for (int Idx = 0; Idx != NumElts; ++Idx) {
@@ -7148,6 +7524,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
break;
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
DecodeScalarMoveMask(NumElems, /* IsLoad */ false, Mask);
@@ -7287,7 +7664,7 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask,
SDValue V1, SDValue V2,
APInt &KnownUndef, APInt &KnownZero) {
int Size = Mask.size();
- KnownUndef = KnownZero = APInt::getNullValue(Size);
+ KnownUndef = KnownZero = APInt::getZero(Size);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
@@ -7380,7 +7757,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask,
int Size = Mask.size();
SDValue V1 = Ops[0];
SDValue V2 = IsUnary ? V1 : Ops[1];
- KnownUndef = KnownZero = APInt::getNullValue(Size);
+ KnownUndef = KnownZero = APInt::getZero(Size);
V1 = peekThroughBitcasts(V1);
V2 = peekThroughBitcasts(V2);
@@ -7487,7 +7864,7 @@ static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask,
APInt &KnownUndef,
APInt &KnownZero) {
unsigned NumElts = Mask.size();
- KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
int M = Mask[i];
@@ -7760,9 +8137,9 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
// lanes), we can treat this as a truncation shuffle.
bool Offset0 = false, Offset1 = false;
if (Opcode == X86ISD::PACKSS) {
- if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
+ if ((!(N0.isUndef() || EltsLHS.isZero()) &&
DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) ||
- (!(N1.isUndef() || EltsRHS.isNullValue()) &&
+ (!(N1.isUndef() || EltsRHS.isZero()) &&
DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt))
return false;
// We can't easily fold ASHR into a shuffle, but if it was feeding a
@@ -7780,9 +8157,9 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
}
} else {
APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
- if ((!(N0.isUndef() || EltsLHS.isNullValue()) &&
+ if ((!(N0.isUndef() || EltsLHS.isZero()) &&
!DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) ||
- (!(N1.isUndef() || EltsRHS.isNullValue()) &&
+ (!(N1.isUndef() || EltsRHS.isZero()) &&
!DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1)))
return false;
}
@@ -7983,7 +8360,7 @@ static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs,
APInt KnownUndef, KnownZero;
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef,
KnownZero, DAG, Depth, ResolveKnownElts);
}
@@ -8467,10 +8844,10 @@ static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
case ISD::SCALAR_TO_VECTOR:
return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset);
case ISD::SRL:
- if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
- uint64_t Idx = IdxC->getZExtValue();
- if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) {
- ByteOffset += Idx / 8;
+ if (auto *AmtC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
+ uint64_t Amt = AmtC->getZExtValue();
+ if ((Amt % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) {
+ ByteOffset += Amt / 8;
return true;
}
}
@@ -8508,9 +8885,9 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
unsigned NumElems = Elts.size();
int LastLoadedElt = -1;
- APInt LoadMask = APInt::getNullValue(NumElems);
- APInt ZeroMask = APInt::getNullValue(NumElems);
- APInt UndefMask = APInt::getNullValue(NumElems);
+ APInt LoadMask = APInt::getZero(NumElems);
+ APInt ZeroMask = APInt::getZero(NumElems);
+ APInt UndefMask = APInt::getZero(NumElems);
SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
@@ -8671,7 +9048,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// If the upper half of a ymm/zmm load is undef then just load the lower half.
if (VT.is256BitVector() || VT.is512BitVector()) {
unsigned HalfNumElems = NumElems / 2;
- if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) {
+ if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnes()) {
EVT HalfVT =
EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
SDValue HalfLD =
@@ -8685,7 +9062,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
- (LoadSizeInBits == 32 || LoadSizeInBits == 64) &&
+ ((LoadSizeInBits == 16 && Subtarget.hasFP16()) || LoadSizeInBits == 32 ||
+ LoadSizeInBits == 64) &&
((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits)
: MVT::getIntegerVT(LoadSizeInBits);
@@ -8709,7 +9087,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
// BROADCAST - match the smallest possible repetition pattern, load that
// scalar/subvector element and then broadcast to the entire vector.
- if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
+ if (ZeroMask.isZero() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() &&
(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) {
for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) {
unsigned RepeatSize = SubElems * BaseSizeInBits;
@@ -8758,6 +9136,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
while (Broadcast.getValueSizeInBits() < VT.getSizeInBits())
Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL);
} else {
+ if (!Subtarget.hasAVX2() &&
+ !X86::mayFoldLoadIntoBroadcastFromMem(
+ RepeatLoad, RepeatVT.getScalarType().getSimpleVT(),
+ Subtarget,
+ /*AssumeSingleUse=*/true))
+ return SDValue();
Broadcast =
DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, RepeatLoad);
}
@@ -8800,7 +9184,9 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
- if (ScalarSize == 32) {
+ if (ScalarSize == 16) {
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
+ } else if (ScalarSize == 32) {
Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
} else {
assert(ScalarSize == 64 && "Unsupported floating point scalar size");
@@ -9009,6 +9395,7 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// with AVX2, also splat i8 and i16.
// With pattern matching, the VBROADCAST node may become a VMOVDDUP.
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
+ (ScalarSize == 16 && Subtarget.hasFP16() && CVT.isFloatingPoint()) ||
(OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
@@ -9071,6 +9458,9 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
return BCast;
}
+ if (ScalarSize == 16 && Subtarget.hasFP16() && IsGE256)
+ return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
// Unsupported broadcast.
return SDValue();
}
@@ -9760,7 +10150,7 @@ static SDValue getHopForBuildVector(const BuildVectorSDNode *BV,
V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width);
unsigned NumElts = VT.getVectorNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ APInt DemandedElts = APInt::getAllOnes(NumElts);
for (unsigned i = 0; i != NumElts; ++i)
if (BV->getOperand(i).isUndef())
DemandedElts.clearBit(i);
@@ -10335,9 +10725,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return VectorConstant;
unsigned EVTBits = EltVT.getSizeInBits();
- APInt UndefMask = APInt::getNullValue(NumElems);
- APInt ZeroMask = APInt::getNullValue(NumElems);
- APInt NonZeroMask = APInt::getNullValue(NumElems);
+ APInt UndefMask = APInt::getZero(NumElems);
+ APInt ZeroMask = APInt::getZero(NumElems);
+ APInt NonZeroMask = APInt::getZero(NumElems);
bool IsAllConstants = true;
SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems;
@@ -10361,7 +10751,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// All undef vector. Return an UNDEF. All zero vectors were handled above.
if (NonZeroMask == 0) {
- assert(UndefMask.isAllOnesValue() && "Fully undef mask expected");
+ assert(UndefMask.isAllOnes() && "Fully undef mask expected");
return DAG.getUNDEF(VT);
}
@@ -10471,13 +10861,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (NumZero == 0)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
- (EltVT == MVT::i64 && Subtarget.is64Bit())) {
+ if (EltVT == MVT::i32 || EltVT == MVT::f16 || EltVT == MVT::f32 ||
+ EltVT == MVT::f64 || (EltVT == MVT::i64 && Subtarget.is64Bit()) ||
+ (EltVT == MVT::i16 && Subtarget.hasFP16())) {
assert((VT.is128BitVector() || VT.is256BitVector() ||
VT.is512BitVector()) &&
"Expected an SSE value type!");
Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
- // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+ // Turn it into a MOVL (i.e. movsh, movss, movsd, movw or movd) to a
+ // zero vector.
return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
}
@@ -10607,7 +10999,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DAG, Subtarget))
return V;
- if (EVTBits == 16 && NumElems == 8)
+ if (EltVT == MVT::i16 && NumElems == 8)
if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero,
DAG, Subtarget))
return V;
@@ -10664,7 +11056,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
return Sh;
// For SSE 4.1, use insertps to put the high elements into the low element.
- if (Subtarget.hasSSE41()) {
+ if (Subtarget.hasSSE41() && EltVT != MVT::f16) {
SDValue Result;
if (!Op.getOperand(0).isUndef())
Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -11206,7 +11598,7 @@ static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask,
// Arbitrarily choose from the 2nd operand if the select condition element
// is undef.
// TODO: Can we do better by matching patterns such as even/odd?
- if (UndefElts[i] || EltBits[i].isNullValue())
+ if (UndefElts[i] || EltBits[i].isZero())
Mask[i] += NumElts;
}
@@ -11575,7 +11967,7 @@ static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT,
if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale))
continue;
unsigned UpperElts = NumElts - NumSrcElts;
- if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
continue;
SrcVT = MVT::getIntegerVT(EltSizeInBits * Scale);
SrcVT = MVT::getVectorVT(SrcVT, NumSrcElts);
@@ -11672,7 +12064,7 @@ static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1,
unsigned NumSrcElts = NumElts / Scale;
unsigned UpperElts = NumElts - NumSrcElts;
if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) ||
- !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
continue;
SDValue Src = V1;
@@ -11729,7 +12121,7 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
// The elements beyond the truncation must be undef/zero.
unsigned UpperElts = NumElts - NumSrcElts;
if (UpperElts > 0 &&
- !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue())
+ !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnes())
continue;
bool UndefUppers =
UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts);
@@ -11955,8 +12347,8 @@ static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
MVT LogicVT = VT;
if (EltVT == MVT::f32 || EltVT == MVT::f64) {
Zero = DAG.getConstantFP(0.0, DL, EltVT);
- APFloat AllOnesValue = APFloat::getAllOnesValue(
- SelectionDAG::EVTToAPFloatSemantics(EltVT), EltVT.getSizeInBits());
+ APFloat AllOnesValue =
+ APFloat::getAllOnesValue(SelectionDAG::EVTToAPFloatSemantics(EltVT));
AllOnes = DAG.getConstantFP(AllOnesValue, DL, EltVT);
LogicVT =
MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size());
@@ -12038,10 +12430,15 @@ static bool matchShuffleAsBlend(SDValue V1, SDValue V2,
int M = Mask[i];
if (M == SM_SentinelUndef)
continue;
- if (M == i)
+ if (M == i ||
+ (0 <= M && M < Size && IsElementEquivalent(Size, V1, V1, M, i))) {
+ Mask[i] = i;
continue;
- if (M == i + Size) {
+ }
+ if (M == (i + Size) ||
+ (Size <= M && IsElementEquivalent(Size, V2, V2, M - Size, i))) {
BlendMask |= 1ull << i;
+ Mask[i] = i + Size;
continue;
}
if (Zeroable[i]) {
@@ -12424,6 +12821,14 @@ static SDValue lowerShuffleAsByteRotateAndPermute(
return SDValue();
}
+static bool isBroadcastShuffleMask(ArrayRef<int> Mask) {
+ return isUndefOrEqual(Mask, 0);
+}
+
+static bool isNoopOrBroadcastShuffleMask(ArrayRef<int> Mask) {
+ return isNoopShuffleMask(Mask) || isBroadcastShuffleMask(Mask);
+}
+
/// Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
@@ -12457,6 +12862,38 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
}
}
+ // If we effectively only demand the 0'th element of \p Input, and not only
+ // as 0'th element, then broadcast said input,
+ // and change \p InputMask to be a no-op (identity) mask.
+ auto canonicalizeBroadcastableInput = [DL, VT, &Subtarget,
+ &DAG](SDValue &Input,
+ MutableArrayRef<int> InputMask) {
+ unsigned EltSizeInBits = Input.getScalarValueSizeInBits();
+ if (!Subtarget.hasAVX2() && (!Subtarget.hasAVX() || EltSizeInBits < 32 ||
+ !X86::mayFoldLoad(Input, Subtarget)))
+ return;
+ if (isNoopShuffleMask(InputMask))
+ return;
+ assert(isBroadcastShuffleMask(InputMask) &&
+ "Expected to demand only the 0'th element.");
+ Input = DAG.getNode(X86ISD::VBROADCAST, DL, VT, Input);
+ for (auto I : enumerate(InputMask)) {
+ int &InputMaskElt = I.value();
+ if (InputMaskElt >= 0)
+ InputMaskElt = I.index();
+ }
+ };
+
+ // Currently, we may need to produce one shuffle per input, and blend results.
+ // It is possible that the shuffle for one of the inputs is already a no-op.
+ // See if we can simplify non-no-op shuffles into broadcasts,
+ // which we consider to be strictly better than an arbitrary shuffle.
+ if (isNoopOrBroadcastShuffleMask(V1Mask) &&
+ isNoopOrBroadcastShuffleMask(V2Mask)) {
+ canonicalizeBroadcastableInput(V1, V1Mask);
+ canonicalizeBroadcastableInput(V2, V2Mask);
+ }
+
// Try to lower with the simpler initial blend/unpack/rotate strategies unless
// one of the input shuffles would be a no-op. We prefer to shuffle inputs as
// the shuffle may be able to fold with a load or other benefit. However, when
@@ -12974,7 +13411,7 @@ static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2,
int Size = Mask.size();
int HalfSize = Size / 2;
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
- assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask");
+ assert(!Zeroable.isAllOnes() && "Fully zeroable shuffle mask");
// Upper half must be undefined.
if (!isUndefUpperHalf(Mask))
@@ -13462,7 +13899,7 @@ static SDValue lowerShuffleAsElementInsertion(
if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
// We need to zext the scalar if it is smaller than an i32.
V2S = DAG.getBitcast(EltVT, V2S);
- if (EltVT == MVT::i8 || EltVT == MVT::i16) {
+ if (EltVT == MVT::i8 || (EltVT == MVT::i16 && !Subtarget.hasFP16())) {
// Using zext to expand a narrow element won't work for non-zero
// insertions.
if (!IsV1Zeroable)
@@ -13494,11 +13931,17 @@ static SDValue lowerShuffleAsElementInsertion(
if (!VT.is128BitVector())
return SDValue();
- // Otherwise, use MOVSD or MOVSS.
- assert((EltVT == MVT::f32 || EltVT == MVT::f64) &&
- "Only two types of floating point element types to handle!");
- return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL,
- ExtVT, V1, V2);
+ // Otherwise, use MOVSD, MOVSS or MOVSH.
+ unsigned MovOpc = 0;
+ if (EltVT == MVT::f16)
+ MovOpc = X86ISD::MOVSH;
+ else if (EltVT == MVT::f32)
+ MovOpc = X86ISD::MOVSS;
+ else if (EltVT == MVT::f64)
+ MovOpc = X86ISD::MOVSD;
+ else
+ llvm_unreachable("Unsupported floating point element type to handle!");
+ return DAG.getNode(MovOpc, DL, ExtVT, V1, V2);
}
// This lowering only works for the low element with floating point vectors.
@@ -15264,14 +15707,28 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false);
if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() &&
!Subtarget.hasVLX()) {
- SmallVector<SDValue, 8> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32));
- for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
- DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32);
- SDValue DWordClearMask = DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps);
- V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1),
- DWordClearMask);
- V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2),
- DWordClearMask);
+ // Check if this is part of a 256-bit vector truncation.
+ if (NumEvenDrops == 2 && Subtarget.hasAVX2() &&
+ peekThroughBitcasts(V1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ peekThroughBitcasts(V2).getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ SDValue V1V2 = concatSubVectors(V1, V2, DAG, DL);
+ V1V2 = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1V2,
+ getZeroVector(MVT::v16i16, Subtarget, DAG, DL),
+ DAG.getTargetConstant(0xEE, DL, MVT::i8));
+ V1V2 = DAG.getBitcast(MVT::v8i32, V1V2);
+ V1 = extract128BitVector(V1V2, 0, DAG, DL);
+ V2 = extract128BitVector(V1V2, 4, DAG, DL);
+ } else {
+ SmallVector<SDValue> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32));
+ for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
+ DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32);
+ SDValue DWordClearMask =
+ DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps);
+ V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1),
+ DWordClearMask);
+ V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2),
+ DWordClearMask);
+ }
// Now pack things back together.
SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2);
if (NumEvenDrops == 2) {
@@ -15300,6 +15757,33 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Mask, Subtarget, DAG);
}
+/// Lower 8-lane 16-bit floating point shuffles.
+static SDValue lowerV8F16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
+ const APInt &Zeroable, SDValue V1, SDValue V2,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert(V1.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
+ assert(V2.getSimpleValueType() == MVT::v8f16 && "Bad operand type!");
+ assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ int NumV2Elements = count_if(Mask, [](int M) { return M >= 8; });
+
+ if (NumV2Elements == 0) {
+ // Check for being able to broadcast a single element.
+ if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f16, V1, V2,
+ Mask, Subtarget, DAG))
+ return Broadcast;
+ }
+ if (NumV2Elements == 1 && Mask[0] >= 8)
+ if (SDValue V = lowerShuffleAsElementInsertion(DL, MVT::v8f16, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return V;
+
+ V1 = DAG.getBitcast(MVT::v8i16, V1);
+ V2 = DAG.getBitcast(MVT::v8i16, V2);
+ return DAG.getBitcast(MVT::v8f16,
+ DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, Mask));
+}
+
// Lowers unary/binary shuffle as VPERMV/VPERMV3, for non-VLX targets,
// sub-512-bit shuffles are padded to 512-bits for the shuffle and then
// the active subvector is extracted.
@@ -15705,6 +16189,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v8i16:
return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
+ case MVT::v8f16:
+ return lowerV8F16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
case MVT::v16i8:
return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
@@ -16083,22 +16569,13 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1);
bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1);
if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() &&
- MayFoldLoad(peekThroughOneUseBitcasts(V1))) {
+ X86::mayFoldLoad(peekThroughOneUseBitcasts(V1), Subtarget)) {
+ MVT MemVT = VT.getHalfNumVectorElementsVT();
+ unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1));
- if (!Ld->isNonTemporal()) {
- MVT MemVT = VT.getHalfNumVectorElementsVT();
- unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize();
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
- TypeSize::Fixed(Ofs), DL);
- SDValue Ops[] = {Ld->getChain(), Ptr};
- SDValue BcastLd = DAG.getMemIntrinsicNode(
- X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT,
- DAG.getMachineFunction().getMachineMemOperand(
- Ld->getMemOperand(), Ofs, MemVT.getStoreSize()));
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
- return BcastLd;
- }
+ if (SDValue BcstLd = getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, DL,
+ VT, MemVT, Ld, Ofs, DAG))
+ return BcstLd;
}
// With AVX2, use VPERMQ/VPERMPD for unary shuffles to allow memory folding.
@@ -17569,6 +18046,13 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
}
+ if (VT == MVT::v16f16) {
+ V1 = DAG.getBitcast(MVT::v16i16, V1);
+ V2 = DAG.getBitcast(MVT::v16i16, V2);
+ return DAG.getBitcast(MVT::v16f16,
+ DAG.getVectorShuffle(MVT::v16i16, DL, V1, V2, Mask));
+ }
+
switch (VT.SimpleTy) {
case MVT::v4f64:
return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
@@ -18135,6 +18619,13 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
}
+ if (VT == MVT::v32f16) {
+ V1 = DAG.getBitcast(MVT::v32i16, V1);
+ V2 = DAG.getBitcast(MVT::v32i16, V2);
+ return DAG.getBitcast(MVT::v32f16,
+ DAG.getVectorShuffle(MVT::v32i16, DL, V1, V2, Mask));
+ }
+
// Dispatch to each element type for lowering. If we don't have support for
// specific element type shuffles at 512 bits, immediately split them and
// lower them. Each lowering routine of a given type is allowed to assume that
@@ -18431,7 +18922,13 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
return false;
}
-/// Top-level lowering for x86 vector shuffles.
+// Forward declaration.
+static SDValue canonicalizeShuffleMaskWithHorizOp(
+ MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask,
+ unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget);
+
+ /// Top-level lowering for x86 vector shuffles.
///
/// This handles decomposition, canonicalization, and lowering of all x86
/// vector shuffles. Most of the specific lowering strategies are encapsulated
@@ -18489,7 +18986,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero);
APInt Zeroable = KnownUndef | KnownZero;
- if (Zeroable.isAllOnesValue())
+ if (Zeroable.isAllOnes())
return getZeroVector(VT, Subtarget, DAG, DL);
bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode());
@@ -18540,8 +19037,22 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ SmallVector<SDValue> Ops = {V1, V2};
+ SmallVector<int> Mask(OrigMask.begin(), OrigMask.end());
+
+ // Canonicalize the shuffle with any horizontal ops inputs.
+ // NOTE: This may update Ops and Mask.
+ if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp(
+ Ops, Mask, VT.getSizeInBits(), DL, DAG, Subtarget))
+ return DAG.getBitcast(VT, HOp);
+
+ V1 = DAG.getBitcast(VT, Ops[0]);
+ V2 = DAG.getBitcast(VT, Ops[1]);
+ assert(NumElements == (int)Mask.size() &&
+ "canonicalizeShuffleMaskWithHorizOp "
+ "shouldn't alter the shuffle mask size");
+
// Commute the shuffle if it will improve canonicalization.
- SmallVector<int, 64> Mask(OrigMask.begin(), OrigMask.end());
if (canonicalizeShuffleMaskWithCommute(Mask)) {
ShuffleVectorSDNode::commuteMask(Mask);
std::swap(V1, V2);
@@ -18686,8 +19197,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
if (VT.getSizeInBits() == 8) {
// If IdxVal is 0, it's cheaper to do a move instead of a pextrb, unless
// we're going to zero extend the register or fold the store.
- if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) &&
- !MayFoldIntoStore(Op))
+ if (llvm::isNullConstant(Idx) && !X86::mayFoldIntoZeroExtend(Op) &&
+ !X86::mayFoldIntoStore(Op))
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
@@ -18840,14 +19351,18 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
MVT VT = Op.getSimpleValueType();
- if (VT.getSizeInBits() == 16) {
+ if (VT == MVT::i16) {
// If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
// we're going to zero extend the register or fold the store (SSE41 only).
- if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
- !(Subtarget.hasSSE41() && MayFoldIntoStore(Op)))
+ if (IdxVal == 0 && !X86::mayFoldIntoZeroExtend(Op) &&
+ !(Subtarget.hasSSE41() && X86::mayFoldIntoStore(Op))) {
+ if (Subtarget.hasFP16())
+ return Op;
+
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
DAG.getBitcast(MVT::v4i32, Vec), Idx));
+ }
SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec,
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
@@ -18886,12 +19401,13 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
}
- if (VT.getSizeInBits() == 32) {
+ if (VT == MVT::f16 || VT.getSizeInBits() == 32) {
if (IdxVal == 0)
return Op;
- // SHUFPS the element to the lowest double word, then movss.
- int Mask[4] = { static_cast<int>(IdxVal), -1, -1, -1 };
+ // Shuffle the element to the lowest element, then movss or movsh.
+ SmallVector<int, 8> Mask(VecVT.getVectorNumElements(), -1);
+ Mask[0] = static_cast<int>(IdxVal);
Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
DAG.getIntPtrConstant(0, dl));
@@ -18994,17 +19510,28 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
bool IsZeroElt = X86::isZeroNode(N1);
bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1);
- // If we are inserting a element, see if we can do this more efficiently with
- // a blend shuffle with a rematerializable vector than a costly integer
- // insertion.
- if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() &&
- (16 <= EltSizeInBits || (IsZeroElt && !VT.is128BitVector()))) {
- SmallVector<int, 8> BlendMask;
- for (unsigned i = 0; i != NumElts; ++i)
- BlendMask.push_back(i == IdxVal ? i + NumElts : i);
- SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
- : getOnesVector(VT, DAG, dl);
- return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
+ if (IsZeroElt || IsAllOnesElt) {
+ // Lower insertion of i8 -1 as an 'OR' blend.
+ // We don't deal with i8 0 since it appears to be handled elsewhere.
+ if (IsAllOnesElt && EltSizeInBits == 8 && !Subtarget.hasSSE41()) {
+ SDValue ZeroCst = DAG.getConstant(0, dl, VT.getScalarType());
+ SDValue OnesCst = DAG.getAllOnesConstant(dl, VT.getScalarType());
+ SmallVector<SDValue, 8> CstVectorElts(NumElts, ZeroCst);
+ CstVectorElts[IdxVal] = OnesCst;
+ SDValue CstVector = DAG.getBuildVector(VT, dl, CstVectorElts);
+ return DAG.getNode(ISD::OR, dl, VT, N0, CstVector);
+ }
+ // See if we can do this more efficiently with a blend shuffle with a
+ // rematerializable vector.
+ if (Subtarget.hasSSE41() &&
+ (EltSizeInBits >= 16 || (IsZeroElt && !VT.is128BitVector()))) {
+ SmallVector<int, 8> BlendMask;
+ for (unsigned i = 0; i != NumElts; ++i)
+ BlendMask.push_back(i == IdxVal ? i + NumElts : i);
+ SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl)
+ : getOnesVector(VT, DAG, dl);
+ return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask);
+ }
}
// If the vector is wider than 128 bits, extract the 128-bit subvector, insert
@@ -19024,12 +19551,28 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
}
+ unsigned NumEltsIn128 = 128 / EltSizeInBits;
+ assert(isPowerOf2_32(NumEltsIn128) &&
+ "Vectors will always have power-of-two number of elements.");
+
+ // If we are not inserting into the low 128-bit vector chunk,
+ // then prefer the broadcast+blend sequence.
+ // FIXME: relax the profitability check iff all N1 uses are insertions.
+ if (!VT.is128BitVector() && IdxVal >= NumEltsIn128 &&
+ ((Subtarget.hasAVX2() && EltSizeInBits != 8) ||
+ (Subtarget.hasAVX() && (EltSizeInBits >= 32) &&
+ X86::mayFoldLoad(N1, Subtarget)))) {
+ SDValue N1SplatVec = DAG.getSplatBuildVector(VT, dl, N1);
+ SmallVector<int, 8> BlendMask;
+ for (unsigned i = 0; i != NumElts; ++i)
+ BlendMask.push_back(i == IdxVal ? i + NumElts : i);
+ return DAG.getVectorShuffle(VT, dl, N0, N1SplatVec, BlendMask);
+ }
+
// Get the desired 128-bit vector chunk.
SDValue V = extract128BitVector(N0, IdxVal, DAG, dl);
// Insert the element into the desired chunk.
- unsigned NumEltsIn128 = 128 / EltSizeInBits;
- assert(isPowerOf2_32(NumEltsIn128));
// Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
@@ -19041,10 +19584,10 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
}
assert(VT.is128BitVector() && "Only 128-bit vector types should be left!");
- // This will be just movd/movq/movss/movsd.
+ // This will be just movw/movd/movq/movsh/movss/movsd.
if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) {
if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 ||
- EltVT == MVT::i64) {
+ EltVT == MVT::f16 || EltVT == MVT::i64) {
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG);
}
@@ -19091,7 +19634,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// combine either bitwise AND or insert of float 0.0 to set these bits.
bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize();
- if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
+ if (IdxVal == 0 && (!MinSize || !X86::mayFoldLoad(N1, Subtarget))) {
// If this is an insertion of 32-bits into the low 32-bits of
// a vector, we prefer to generate a blend with immediate rather
// than an insertps. Blends are simpler operations in hardware and so
@@ -19143,8 +19686,9 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 &&
"Expected an SSE type!");
- // Pass through a v4i32 SCALAR_TO_VECTOR as that's what we use in tblgen.
- if (OpVT == MVT::v4i32)
+ // Pass through a v4i32 or V8i16 SCALAR_TO_VECTOR as that's what we use in
+ // tblgen.
+ if (OpVT == MVT::v4i32 || (OpVT == MVT::v8i16 && Subtarget.hasFP16()))
return Op;
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
@@ -19207,7 +19751,7 @@ unsigned X86TargetLowering::getGlobalWrapperKind(
return X86ISD::WrapperRIP;
// GOTPCREL references must always use RIP.
- if (OpFlags == X86II::MO_GOTPCREL)
+ if (OpFlags == X86II::MO_GOTPCREL || OpFlags == X86II::MO_GOTPCREL_NORELAX)
return X86ISD::WrapperRIP;
return X86ISD::Wrapper;
@@ -19682,92 +20226,6 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
}
-static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
- "Unexpected funnel shift opcode!");
-
- SDLoc DL(Op);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- SDValue Amt = Op.getOperand(2);
-
- bool IsFSHR = Op.getOpcode() == ISD::FSHR;
-
- if (VT.isVector()) {
- assert(Subtarget.hasVBMI2() && "Expected VBMI2");
-
- if (IsFSHR)
- std::swap(Op0, Op1);
-
- // With AVX512, but not VLX we need to widen to get a 512-bit result type.
- if (!Subtarget.hasVLX() && !VT.is512BitVector()) {
- Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512);
- Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512);
- }
-
- SDValue Funnel;
- APInt APIntShiftAmt;
- MVT ResultVT = Op0.getSimpleValueType();
- if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
- uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
- Funnel =
- DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0,
- Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
- } else {
- if (!Subtarget.hasVLX() && !VT.is512BitVector())
- Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512);
- Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL,
- ResultVT, Op0, Op1, Amt);
- }
- if (!Subtarget.hasVLX() && !VT.is512BitVector())
- Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits());
- return Funnel;
- }
- assert(
- (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
- "Unexpected funnel shift type!");
-
- // Expand slow SHLD/SHRD cases if we are not optimizing for size.
- bool OptForSize = DAG.shouldOptForSize();
- bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();
-
- // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.
- // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).
- if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&
- !isa<ConstantSDNode>(Amt)) {
- unsigned EltSizeInBits = VT.getScalarSizeInBits();
- SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());
- SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType());
- Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32);
- Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32);
- Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask);
- SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift);
- Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1);
- if (IsFSHR) {
- Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt);
- } else {
- Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt);
- Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift);
- }
- return DAG.getZExtOrTrunc(Res, DL, VT);
- }
-
- if (VT == MVT::i8 || ExpandFunnel)
- return SDValue();
-
- // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
- if (VT == MVT::i16) {
- Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
- DAG.getConstant(15, DL, Amt.getValueType()));
- unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
- return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
- }
-
- return Op;
-}
-
// Try to use a packed vector operation to handle i64 on 32-bit targets when
// AVX512DQ is enabled.
static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
@@ -19811,6 +20269,43 @@ static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG,
DAG.getIntPtrConstant(0, dl));
}
+// Try to use a packed vector operation to handle i64 on 32-bit targets.
+static SDValue LowerI64IntToFP16(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((Op.getOpcode() == ISD::SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
+ Op.getOpcode() == ISD::UINT_TO_FP) &&
+ "Unexpected opcode!");
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
+
+ if (SrcVT != MVT::i64 || Subtarget.is64Bit() || VT != MVT::f16)
+ return SDValue();
+
+ // Pack the i64 into a vector, do the operation and extract.
+
+ assert(Subtarget.hasFP16() && "Expected FP16");
+
+ SDLoc dl(Op);
+ SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src);
+ if (IsStrict) {
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {MVT::v2f16, MVT::Other},
+ {Op.getOperand(0), InVec});
+ SDValue Chain = CvtVec.getValue(1);
+ SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+ return DAG.getMergeValues({Value, Chain}, dl);
+ }
+
+ SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, MVT::v2f16, InVec);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec,
+ DAG.getIntPtrConstant(0, dl));
+}
+
static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT,
const X86Subtarget &Subtarget) {
switch (Opcode) {
@@ -20024,6 +20519,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
+ if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
+ return LowerWin64_INT128_TO_FP(Op, DAG);
+
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
@@ -20063,6 +20561,8 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
+ if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+ return V;
// SSE doesn't have an i16 conversion so we need to promote.
if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
@@ -20521,6 +21021,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (DstVT.isVector())
return lowerUINT_TO_FP_vec(Op, DAG, Subtarget);
+ if (Subtarget.isTargetWin64() && SrcVT == MVT::i128)
+ return LowerWin64_INT128_TO_FP(Op, DAG);
+
if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget))
return Extract;
@@ -20542,6 +21045,8 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
return V;
+ if (SDValue V = LowerI64IntToFP16(Op, DAG, Subtarget))
+ return V;
// The transform for i64->f64 isn't correct for 0 when rounding to negative
// infinity. It produces -0.0, so disable under strictfp.
@@ -21323,9 +21828,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
MVT VT = Op->getSimpleValueType(0);
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ SDValue Chain = IsStrict ? Op->getOperand(0) : SDValue();
MVT SrcVT = Src.getSimpleValueType();
SDLoc dl(Op);
+ SDValue Res;
if (VT.isVector()) {
if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) {
MVT ResVT = MVT::v4i32;
@@ -21350,10 +21857,8 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
}
- SDValue Res, Chain;
if (IsStrict) {
- Res =
- DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src});
+ Res = DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(Opc, dl, ResVT, Src);
@@ -21367,6 +21872,67 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
+ if (Subtarget.hasFP16() && SrcVT.getVectorElementType() == MVT::f16) {
+ if (VT == MVT::v8i16 || VT == MVT::v16i16 || VT == MVT::v32i16)
+ return Op;
+
+ MVT ResVT = VT;
+ MVT EleVT = VT.getVectorElementType();
+ if (EleVT != MVT::i64)
+ ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+ if (SrcVT != MVT::v8f16) {
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+ SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+ Ops[0] = Src;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+ }
+
+ if (IsStrict) {
+ Res = DAG.getNode(IsSigned ? X86ISD::STRICT_CVTTP2SI
+ : X86ISD::STRICT_CVTTP2UI,
+ dl, {ResVT, MVT::Other}, {Chain, Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI, dl,
+ ResVT, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ if (EleVT.getSizeInBits() < 16) {
+ ResVT = MVT::getVectorVT(EleVT, 8);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, ResVT, Res);
+ }
+
+ if (ResVT != VT)
+ Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
+ DAG.getIntPtrConstant(0, dl));
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
+ if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
+ if (IsStrict) {
+ Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
+ : ISD::STRICT_FP_TO_UINT,
+ dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
+ Chain = Res.getValue(1);
+ } else {
+ Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
+ MVT::v8i32, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
+
+ if (IsStrict)
+ return DAG.getMergeValues({Res, Chain}, dl);
+ return Res;
+ }
+
// v8f64->v8i32 is legal, but we need v8i32 to be custom for v8f32.
if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) {
assert(!IsSigned && "Expected unsigned conversion!");
@@ -21390,10 +21956,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Res, Chain;
if (IsStrict) {
Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other},
- {Op->getOperand(0), Src});
+ {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src);
@@ -21421,10 +21986,9 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src,
DAG.getIntPtrConstant(0, dl));
- SDValue Res, Chain;
if (IsStrict) {
Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
- {Op->getOperand(0), Src});
+ {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src);
@@ -21449,7 +22013,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32,
{Src, Zero, Zero, Zero});
Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other},
- {Op->getOperand(0), Tmp});
+ {Chain, Tmp});
SDValue Chain = Tmp.getValue(1);
Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp,
DAG.getIntPtrConstant(0, dl));
@@ -21532,17 +22096,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
// FIXME: This does not generate an invalid exception if the input does not
// fit in i32. PR44019
if (Subtarget.is64Bit()) {
- SDValue Res, Chain;
if (IsStrict) {
- Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other},
- { Op.getOperand(0), Src });
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i64, MVT::Other},
+ {Chain, Src});
Chain = Res.getValue(1);
} else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src);
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
- return DAG.getMergeValues({ Res, Chain }, dl);
+ return DAG.getMergeValues({Res, Chain}, dl);
return Res;
}
@@ -21557,17 +22120,16 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
// fit in i16. PR44019
if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) {
assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!");
- SDValue Res, Chain;
if (IsStrict) {
- Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other},
- { Op.getOperand(0), Src });
+ Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {MVT::i32, MVT::Other},
+ {Chain, Src});
Chain = Res.getValue(1);
} else
Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
- return DAG.getMergeValues({ Res, Chain }, dl);
+ return DAG.getMergeValues({Res, Chain}, dl);
return Res;
}
@@ -21583,7 +22145,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
else
LC = RTLIB::getFPTOUINT(SrcVT, VT);
- SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
MakeLibCallOptions CallOptions;
std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions,
SDLoc(Op), Chain);
@@ -21595,7 +22156,6 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
}
// Fall back to X87.
- SDValue Chain;
if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) {
if (IsStrict)
return DAG.getMergeValues({V, Chain}, dl);
@@ -21822,6 +22382,35 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::f128)
return SDValue();
+ if (VT == MVT::f80) {
+ if (SVT == MVT::f16) {
+ assert(Subtarget.hasFP16() && "Unexpected features!");
+ RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, VT);
+ MakeLibCallOptions CallOptions;
+ std::pair<SDValue, SDValue> Tmp =
+ makeLibCall(DAG, LC, VT, In, CallOptions, DL,
+ IsStrict ? Op.getOperand(0) : SDValue());
+ if (IsStrict)
+ return DAG.getMergeValues({Tmp.first, Tmp.second}, DL);
+ else
+ return Tmp.first;
+ }
+ return Op;
+ }
+
+ if (SVT.getVectorElementType() == MVT::f16) {
+ assert(Subtarget.hasFP16() && Subtarget.hasVLX() && "Unexpected features!");
+ if (SVT == MVT::v2f16)
+ In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In,
+ DAG.getUNDEF(MVT::v2f16));
+ SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8f16, In,
+ DAG.getUNDEF(MVT::v4f16));
+ if (IsStrict)
+ return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other},
+ {Op->getOperand(0), Res});
+ return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res);
+ }
+
assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!");
SDValue Res =
@@ -21835,8 +22424,11 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
bool IsStrict = Op->isStrictFPOpcode();
SDValue In = Op.getOperand(IsStrict ? 1 : 0);
- // It's legal except when f128 is involved
- if (In.getSimpleValueType() != MVT::f128)
+ MVT VT = Op.getSimpleValueType();
+ MVT SVT = In.getSimpleValueType();
+
+ // It's legal except when f128 is involved or we're converting f80->f16.
+ if (SVT != MVT::f128 && !(VT == MVT::f16 && SVT == MVT::f80))
return Op;
return SDValue();
@@ -22026,9 +22618,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
bool IsF128 = (VT == MVT::f128);
- assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
- VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
- VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
+ assert(VT.isFloatingPoint() && VT != MVT::f80 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Unexpected type in LowerFABSorFNEG");
// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
@@ -22042,7 +22633,9 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
bool IsFakeVector = !VT.isVector() && !IsF128;
MVT LogicVT = VT;
if (IsFakeVector)
- LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
+ LogicVT = (VT == MVT::f64) ? MVT::v2f64
+ : (VT == MVT::f32) ? MVT::v4f32
+ : MVT::v8f16;
unsigned EltBits = VT.getScalarSizeInBits();
// For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
@@ -22087,9 +22680,8 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
bool IsF128 = (VT == MVT::f128);
- assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
- VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
- VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
+ assert(VT.isFloatingPoint() && VT != MVT::f80 &&
+ DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Unexpected type in LowerFCOPYSIGN");
const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT);
@@ -22102,7 +22694,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
bool IsFakeVector = !VT.isVector() && !IsF128;
MVT LogicVT = VT;
if (IsFakeVector)
- LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
+ LogicVT = (VT == MVT::f64) ? MVT::v2f64
+ : (VT == MVT::f32) ? MVT::v4f32
+ : MVT::v8f16;
// The mask constants are automatically splatted for vector types.
unsigned EltSizeInBits = VT.getScalarSizeInBits();
@@ -22208,7 +22802,7 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType())
return false;
unsigned NumElts = VT.getVectorNumElements();
- APInt EltCount = APInt::getNullValue(NumElts);
+ APInt EltCount = APInt::getZero(NumElts);
M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first;
SrcOps.push_back(Src);
}
@@ -22227,7 +22821,7 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
} else {
// Quit if not all elements are used.
for (const auto &I : SrcOpMap)
- if (!I.second.isAllOnesValue())
+ if (!I.second.isAllOnes())
return false;
}
@@ -22250,7 +22844,7 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE);
auto MaskBits = [&](SDValue Src) {
- if (Mask.isAllOnesValue())
+ if (Mask.isAllOnes())
return Src;
EVT SrcVT = Src.getValueType();
SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT);
@@ -22288,8 +22882,8 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
// Without PTEST, a masked v2i64 or-reduction is not faster than
// scalarization.
- if (!Mask.isAllOnesValue() && VT.getScalarSizeInBits() > 32)
- return SDValue();
+ if (!Mask.isAllOnes() && VT.getScalarSizeInBits() > 32)
+ return SDValue();
V = DAG.getBitcast(MVT::v16i8, MaskBits(V));
V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V,
@@ -22312,7 +22906,7 @@ static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
// Check whether we're masking/truncating an OR-reduction result, in which
// case track the masked bits.
- APInt Mask = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ APInt Mask = APInt::getAllOnes(Op.getScalarValueSizeInBits());
switch (Op.getOpcode()) {
case ISD::TRUNCATE: {
SDValue Src = Op.getOperand(0);
@@ -22543,16 +23137,10 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
// For equality comparisons try to use SIGN_EXTEND if the input was
// truncate from something with enough sign bits.
if (Op0.getOpcode() == ISD::TRUNCATE) {
- SDValue In = Op0.getOperand(0);
- unsigned EffBits =
- In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
- if (EffBits <= 16)
+ if (DAG.ComputeMinSignedBits(Op0.getOperand(0)) <= 16)
ExtendOp = ISD::SIGN_EXTEND;
} else if (Op1.getOpcode() == ISD::TRUNCATE) {
- SDValue In = Op1.getOperand(0);
- unsigned EffBits =
- In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1;
- if (EffBits <= 16)
+ if (DAG.ComputeMinSignedBits(Op1.getOperand(0)) <= 16)
ExtendOp = ISD::SIGN_EXTEND;
}
}
@@ -22618,6 +23206,7 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
int &RefinementSteps,
bool &UseOneConstNR,
bool Reciprocal) const {
+ SDLoc DL(Op);
EVT VT = Op.getValueType();
// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
@@ -22639,7 +23228,23 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
UseOneConstNR = false;
// There is no FSQRT for 512-bits, but there is RSQRT14.
unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT;
- return DAG.getNode(Opcode, SDLoc(Op), VT, Op);
+ return DAG.getNode(Opcode, DL, VT, Op);
+ }
+
+ if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
+ Subtarget.hasFP16()) {
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 0;
+
+ if (VT == MVT::f16) {
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ SDValue Undef = DAG.getUNDEF(MVT::v8f16);
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, Op);
+ Op = DAG.getNode(X86ISD::RSQRT14S, DL, MVT::v8f16, Undef, Op);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Op, Zero);
+ }
+
+ return DAG.getNode(X86ISD::RSQRT14, DL, VT, Op);
}
return SDValue();
}
@@ -22649,6 +23254,7 @@ SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
int Enabled,
int &RefinementSteps) const {
+ SDLoc DL(Op);
EVT VT = Op.getValueType();
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
@@ -22673,7 +23279,23 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
// There is no FSQRT for 512-bits, but there is RCP14.
unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP;
- return DAG.getNode(Opcode, SDLoc(Op), VT, Op);
+ return DAG.getNode(Opcode, DL, VT, Op);
+ }
+
+ if (VT.getScalarType() == MVT::f16 && isTypeLegal(VT) &&
+ Subtarget.hasFP16()) {
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 0;
+
+ if (VT == MVT::f16) {
+ SDValue Zero = DAG.getIntPtrConstant(0, DL);
+ SDValue Undef = DAG.getUNDEF(MVT::v8f16);
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v8f16, Op);
+ Op = DAG.getNode(X86ISD::RCP14S, DL, MVT::v8f16, Undef, Op);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Op, Zero);
+ }
+
+ return DAG.getNode(X86ISD::RCP14, DL, VT, Op);
}
return SDValue();
}
@@ -22696,7 +23318,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
if (isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
- assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) &&
+ assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) &&
"Unexpected divisor!");
// Only perform this transform if CMOV is supported otherwise the select
@@ -22956,7 +23578,7 @@ static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
// Avoid overflow/underflow.
const APInt &EltC = Elt->getAPIntValue();
- if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue()))
+ if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isZero()))
return SDValue();
NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
@@ -23037,7 +23659,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (isFP) {
#ifndef NDEBUG
MVT EltVT = Op0.getSimpleValueType().getVectorElementType();
- assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+ assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64);
#endif
bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
@@ -23051,7 +23673,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 &&
(!IsStrict || Subtarget.hasVLX() ||
Op0.getSimpleValueType().is512BitVector())) {
- assert(VT.getVectorNumElements() <= 16);
+#ifndef NDEBUG
+ unsigned Num = VT.getVectorNumElements();
+ assert(Num <= 16 || (Num == 32 && EltVT == MVT::f16));
+#endif
Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM;
} else {
Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP;
@@ -23272,7 +23897,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
Cond = ISD::SETGT;
else if (ConstValue.isMaxSignedValue())
Cond = ISD::SETLT;
- else if (ConstValue.isNullValue() && DAG.SignBitIsZero(Op0))
+ else if (ConstValue.isZero() && DAG.SignBitIsZero(Op0))
Cond = ISD::SETGT;
}
@@ -23625,7 +24250,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// TODO: Can we move this to TranslateX86CC to handle jumps/branches too?
if (auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) {
const APInt &Op1Val = Op1C->getAPIntValue();
- if (!Op1Val.isNullValue()) {
+ if (!Op1Val.isZero()) {
// Ensure the constant+1 doesn't overflow.
if ((CC == ISD::CondCode::SETGT && !Op1Val.isMaxSignedValue()) ||
(CC == ISD::CondCode::SETUGT && !Op1Val.isMaxValue())) {
@@ -24053,8 +24678,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// being inserted between two CMOV's. (in i16 case too TBN)
// https://bugs.llvm.org/show_bug.cgi?id=40974
if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) ||
- (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) &&
- !MayFoldLoad(Op2))) {
+ (Op.getValueType() == MVT::i16 && !X86::mayFoldLoad(Op1, Subtarget) &&
+ !X86::mayFoldLoad(Op2, Subtarget))) {
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
SDValue Ops[] = { Op2, Op1, CC, Cond };
@@ -24699,8 +25324,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
DAG.getRegister(Vreg, SPTy));
} else {
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size);
- MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true);
+ Chain = DAG.getNode(X86ISD::DYN_ALLOCA, dl, NodeTys, Chain, Size);
+ MF.getInfo<X86MachineFunctionInfo>()->setHasDynAlloca(true);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
Register SPReg = RegInfo->getStackRegister();
@@ -24814,7 +25439,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
}
if (ArgMode == 2) {
- // Sanity Check: Make sure using fp_offset makes sense.
+ // Make sure using fp_offset makes sense.
assert(!Subtarget.useSoftFloat() &&
!(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) &&
Subtarget.hasSSE1());
@@ -25554,6 +26179,35 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Swap Src1 and Src2 in the node creation
return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1);
}
+ case CFMA_OP_MASKZ:
+ case CFMA_OP_MASK: {
+ SDValue Src1 = Op.getOperand(1);
+ SDValue Src2 = Op.getOperand(2);
+ SDValue Src3 = Op.getOperand(3);
+ SDValue Mask = Op.getOperand(4);
+ MVT VT = Op.getSimpleValueType();
+
+ SDValue PassThru = Src3;
+ if (IntrData->Type == CFMA_OP_MASKZ)
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+
+ // We add rounding mode to the Node when
+ // - RC Opcode is specified and
+ // - RC is not "current direction".
+ SDValue NewOp;
+ if (IntrData->Opc1 != 0) {
+ SDValue Rnd = Op.getOperand(5);
+ unsigned RC = 0;
+ if (isRoundModeSAEToX(Rnd, RC))
+ NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, Src3,
+ DAG.getTargetConstant(RC, dl, MVT::i32));
+ else if (!isRoundModeCurDirection(Rnd))
+ return SDValue();
+ }
+ if (!NewOp)
+ NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2, Src3);
+ return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG);
+ }
case IFMA_OP:
// NOTE: We need to swizzle the operands to pass the multiply operands
// first.
@@ -26165,6 +26819,19 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
getPointerTy(DAG.getDataLayout())),
Op.getOperand(1), ShAmt);
}
+ case Intrinsic::thread_pointer: {
+ if (Subtarget.isTargetELF()) {
+ SDLoc dl(Op);
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+ Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(
+ *DAG.getContext(), Subtarget.is64Bit() ? X86AS::FS : X86AS::GS));
+ return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+ DAG.getIntPtrConstant(0, dl), MachinePointerInfo(Ptr));
+ }
+ report_fatal_error(
+ "Target OS doesn't support __builtin_thread_pointer() yet.");
+ }
}
}
@@ -26469,6 +27136,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
DAG.getConstant(0, dl, MVT::i32),
DAG.getConstant(0, dl, MVT::i32));
}
+ case llvm::Intrinsic::asan_check_memaccess: {
+ // Mark this as adjustsStack because it will be lowered to a call.
+ DAG.getMachineFunction().getFrameInfo().setAdjustsStack(true);
+ // Don't do anything here, we will expand these intrinsics out later.
+ return Op;
+ }
case llvm::Intrinsic::x86_flags_read_u32:
case llvm::Intrinsic::x86_flags_read_u64:
case llvm::Intrinsic::x86_flags_write_u32:
@@ -27044,11 +27717,11 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
if (!Attrs.isEmpty() && !Func->isVarArg()) {
unsigned InRegCount = 0;
- unsigned Idx = 1;
+ unsigned Idx = 0;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I, ++Idx)
- if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
+ if (Attrs.hasParamAttr(Idx, Attribute::InReg)) {
const DataLayout &DL = DAG.getDataLayout();
// FIXME: should only count parameters that are lowered to integers.
InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
@@ -27517,15 +28190,51 @@ static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG,
EVT SetCCResultType =
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) {
- // usubsat X, Y --> (X >u Y) ? X - Y : 0
- SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
- SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
- // TODO: Move this to DAGCombiner?
- if (SetCCResultType == VT &&
- DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
- return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
- return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+ unsigned BitWidth = VT.getScalarSizeInBits();
+ if (Opcode == ISD::USUBSAT) {
+ if (!TLI.isOperationLegal(ISD::UMAX, VT) || useVPTERNLOG(Subtarget, VT)) {
+ // Handle a special-case with a bit-hack instead of cmp+select:
+ // usubsat X, SMIN --> (X ^ SMIN) & (X s>> BW-1)
+ // If the target can use VPTERNLOG, DAGToDAG will match this as
+ // "vpsra + vpternlog" which is better than "vpmax + vpsub" with a
+ // "broadcast" constant load.
+ ConstantSDNode *C = isConstOrConstSplat(Y, true);
+ if (C && C->getAPIntValue().isSignMask()) {
+ SDValue SignMask = DAG.getConstant(C->getAPIntValue(), DL, VT);
+ SDValue ShiftAmt = DAG.getConstant(BitWidth - 1, DL, VT);
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, X, SignMask);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShiftAmt);
+ return DAG.getNode(ISD::AND, DL, VT, Xor, Sra);
+ }
+ }
+ if (!TLI.isOperationLegal(ISD::UMAX, VT)) {
+ // usubsat X, Y --> (X >u Y) ? X - Y : 0
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y);
+ SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT);
+ // TODO: Move this to DAGCombiner?
+ if (SetCCResultType == VT &&
+ DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits())
+ return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub);
+ return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT));
+ }
+ }
+
+ if ((Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) &&
+ (!VT.isVector() || VT == MVT::v2i64)) {
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Result =
+ DAG.getNode(Opcode == ISD::SADDSAT ? ISD::SADDO : ISD::SSUBO, DL,
+ DAG.getVTList(VT, SetCCResultType), X, Y);
+ SDValue SumDiff = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue SatMin = DAG.getConstant(MinVal, DL, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, DL, VT);
+ SDValue SumNeg =
+ DAG.getSetCC(DL, SetCCResultType, SumDiff, Zero, ISD::SETLT);
+ Result = DAG.getSelect(DL, VT, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(DL, VT, Overflow, Result, SumDiff);
}
// Use default expansion.
@@ -27542,7 +28251,7 @@ static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget,
SDValue N0 = Op.getOperand(0);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
DAG.getConstant(0, DL, VT), N0);
- SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8),
+ SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_NS, DL, MVT::i8),
SDValue(Neg.getNode(), 1)};
return DAG.getNode(X86ISD::CMOV, DL, VT, Ops);
}
@@ -27646,9 +28355,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
// Multiply, mask the lower 8bits of the lo/hi results and pack.
SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi);
- RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT));
- RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT));
- return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
+ return getPack(DAG, Subtarget, dl, VT, RLo, RHi);
}
// Lower v4i32 mul as 2x shuffle, 2x pmuludq, 2x shuffle.
@@ -27801,19 +28508,10 @@ static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl,
SDValue RLo = DAG.getNode(MulOpc, dl, ExVT, ALo, BLo);
SDValue RHi = DAG.getNode(MulOpc, dl, ExVT, AHi, BHi);
- if (Low) {
- // Mask the lower bits and pack the results to rejoin the halves.
- SDValue Mask = DAG.getConstant(255, dl, ExVT);
- SDValue LLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, Mask);
- SDValue LHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, Mask);
- *Low = DAG.getNode(X86ISD::PACKUS, dl, VT, LLo, LHi);
- }
-
- RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG);
- RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG);
+ if (Low)
+ *Low = getPack(DAG, Subtarget, dl, VT, RLo, RHi);
- // Bitcast back to VT and then pack all the even elements from Lo and Hi.
- return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi);
+ return getPack(DAG, Subtarget, dl, VT, RLo, RHi, /*PackHiHalf*/ true);
}
static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
@@ -28111,9 +28809,80 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
return DAG.getBitcast(VT, CallInfo.first);
}
+SDValue X86TargetLowering::LowerWin64_FP_TO_INT128(SDValue Op,
+ SelectionDAG &DAG,
+ SDValue &Chain) const {
+ assert(Subtarget.isTargetWin64() && "Unexpected target");
+ EVT VT = Op.getValueType();
+ bool IsStrict = Op->isStrictFPOpcode();
+
+ SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
+ EVT ArgVT = Arg.getValueType();
+
+ assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
+ "Unexpected return type for lowering");
+
+ RTLIB::Libcall LC;
+ if (Op->getOpcode() == ISD::FP_TO_SINT ||
+ Op->getOpcode() == ISD::STRICT_FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(ArgVT, VT);
+ else
+ LC = RTLIB::getFPTOUINT(ArgVT, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
+
+ SDLoc dl(Op);
+ MakeLibCallOptions CallOptions;
+ Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+
+ SDValue Result;
+ // Expect the i128 argument returned as a v2i64 in xmm0, cast back to the
+ // expected VT (i128).
+ std::tie(Result, Chain) =
+ makeLibCall(DAG, LC, MVT::v2i64, Arg, CallOptions, dl, Chain);
+ Result = DAG.getBitcast(VT, Result);
+ return Result;
+}
+
+SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget.isTargetWin64() && "Unexpected target");
+ EVT VT = Op.getValueType();
+ bool IsStrict = Op->isStrictFPOpcode();
+
+ SDValue Arg = Op.getOperand(IsStrict ? 1 : 0);
+ EVT ArgVT = Arg.getValueType();
+
+ assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
+ "Unexpected argument type for lowering");
+
+ RTLIB::Libcall LC;
+ if (Op->getOpcode() == ISD::SINT_TO_FP ||
+ Op->getOpcode() == ISD::STRICT_SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(ArgVT, VT);
+ else
+ LC = RTLIB::getUINTTOFP(ArgVT, VT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
+
+ SDLoc dl(Op);
+ MakeLibCallOptions CallOptions;
+ SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
+
+ // Pass the i128 argument as an indirect argument on the stack.
+ SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
+ int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ Chain = DAG.getStore(Chain, dl, Arg, StackPtr, MPI, Align(16));
+
+ SDValue Result;
+ std::tie(Result, Chain) =
+ makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain);
+ return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
+}
+
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
-static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
+static bool supportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
if (VT.getScalarSizeInBits() < 16)
return false;
@@ -28133,14 +28902,14 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
// The shift amount is a variable, but it is the same for all vector lanes.
// These instructions are defined together with shift-immediate.
static
-bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
+bool supportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
- return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
+ return supportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
-static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
+static bool supportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16)
@@ -28216,7 +28985,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
uint64_t ShiftAmt = APIntShiftAmt.getZExtValue();
- if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
+ if (supportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG);
// i64 SRA needs to be performed as partial shifts.
@@ -28231,8 +29000,15 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
// Simple i8 add case
- if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+ if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) {
+ // R may be undef at run-time, but (shl R, 1) must be an even number (LSB
+ // must be 0). (add undef, undef) however can be any value. To make this
+ // safe, we must freeze R to ensure that register allocation uses the same
+ // register for an undefined value. This ensures that the result will
+ // still be even and preserves the original semantics.
+ R = DAG.getNode(ISD::FREEZE, dl, VT, R);
return DAG.getNode(ISD::ADD, dl, VT, R, R);
+ }
// ashr(R, 7) === cmp_slt(R, 0)
if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
@@ -28293,7 +29069,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true);
if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) {
- if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
+ if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) {
MVT EltVT = VT.getVectorElementType();
assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!");
if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32))
@@ -28311,7 +29087,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
!Subtarget.hasXOP()) {
unsigned NumElts = VT.getVectorNumElements();
MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
+ if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) {
unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL);
unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false);
BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt);
@@ -28363,7 +29139,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
- if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
+ if (supportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode()))
return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1));
}
return SDValue();
@@ -28376,8 +29152,10 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
MVT VT = Amt.getSimpleValueType();
if (!(VT == MVT::v8i16 || VT == MVT::v4i32 ||
(Subtarget.hasInt256() && VT == MVT::v16i16) ||
- (Subtarget.hasVBMI2() && VT == MVT::v32i16) ||
- (!Subtarget.hasAVX512() && VT == MVT::v16i8)))
+ (Subtarget.hasAVX512() && VT == MVT::v32i16) ||
+ (!Subtarget.hasAVX512() && VT == MVT::v16i8) ||
+ (Subtarget.hasInt256() && VT == MVT::v32i8) ||
+ (Subtarget.hasBWI() && VT == MVT::v64i8)))
return SDValue();
if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
@@ -28425,10 +29203,7 @@ static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl,
Hi = convertShiftLeftToScale(Hi, dl, Subtarget, DAG);
if (Subtarget.hasSSE41())
return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi);
-
- return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo),
- DAG.getBitcast(VT, Hi),
- {0, 2, 4, 6, 8, 10, 12, 14});
+ return getPack(DAG, Subtarget, dl, VT, Lo, Hi);
}
return SDValue();
@@ -28456,9 +29231,23 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
return V;
- if (SupportedVectorVarShift(VT, Subtarget, Opc))
+ if (supportedVectorVarShift(VT, Subtarget, Opc))
return Op;
+ // i64 vector arithmetic shift can be emulated with the transform:
+ // M = lshr(SIGN_MASK, Amt)
+ // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
+ if (((VT == MVT::v2i64 && !Subtarget.hasXOP()) ||
+ (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
+ Opc == ISD::SRA) {
+ SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
+ SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
+ R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+ R = DAG.getNode(ISD::XOR, dl, VT, R, M);
+ R = DAG.getNode(ISD::SUB, dl, VT, R, M);
+ return R;
+ }
+
// XOP has 128-bit variable logical/arithmetic shifts.
// +ve/-ve Amt = shift left/right.
if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 ||
@@ -28484,19 +29273,6 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
}
- // i64 vector arithmetic shift can be emulated with the transform:
- // M = lshr(SIGN_MASK, Amt)
- // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
- if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) &&
- Opc == ISD::SRA) {
- SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT);
- SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
- R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
- R = DAG.getNode(ISD::XOR, dl, VT, R, M);
- R = DAG.getNode(ISD::SUB, dl, VT, R, M);
- return R;
- }
-
// If possible, lower this shift as a sequence of two shifts by
// constant plus a BLENDing shuffle instead of scalarizing it.
// Example:
@@ -28552,7 +29328,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
// If possible, lower this packed shift into a vector multiply instead of
// expanding it into a sequence of scalar shifts.
- if (Opc == ISD::SHL)
+ // For v32i8 cases, it might be quicker to split/extend to vXi16 shifts.
+ if (Opc == ISD::SHL && !(VT == MVT::v32i8 && (Subtarget.hasXOP() ||
+ Subtarget.canExtendTo512BW())))
if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG))
return DAG.getNode(ISD::MUL, dl, VT, R, Scale);
@@ -28920,6 +29698,77 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
}
+static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) &&
+ "Unexpected funnel shift opcode!");
+
+ SDLoc DL(Op);
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ bool IsFSHR = Op.getOpcode() == ISD::FSHR;
+
+ if (VT.isVector()) {
+ assert(Subtarget.hasVBMI2() && "Expected VBMI2");
+
+ if (IsFSHR)
+ std::swap(Op0, Op1);
+
+ APInt APIntShiftAmt;
+ if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
+ uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
+ SDValue Imm = DAG.getTargetConstant(ShiftAmt, DL, MVT::i8);
+ return getAVX512Node(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
+ {Op0, Op1, Imm}, DAG, Subtarget);
+ }
+ return getAVX512Node(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
+ {Op0, Op1, Amt}, DAG, Subtarget);
+ }
+ assert(
+ (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ "Unexpected funnel shift type!");
+
+ // Expand slow SHLD/SHRD cases if we are not optimizing for size.
+ bool OptForSize = DAG.shouldOptForSize();
+ bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow();
+
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z & (bw-1))) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z & (bw-1))).
+ if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) &&
+ !isa<ConstantSDNode>(Amt)) {
+ SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType());
+ SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType());
+ Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32);
+ Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32);
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask);
+ SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift);
+ Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1);
+ if (IsFSHR) {
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt);
+ } else {
+ Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift);
+ }
+ return DAG.getZExtOrTrunc(Res, DL, VT);
+ }
+
+ if (VT == MVT::i8 || ExpandFunnel)
+ return SDValue();
+
+ // i16 needs to modulo the shift amount, but i32/i64 have implicit modulo.
+ if (VT == MVT::i16) {
+ Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt,
+ DAG.getConstant(15, DL, Amt.getValueType()));
+ unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL);
+ return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt);
+ }
+
+ return Op;
+}
+
static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
@@ -28931,6 +29780,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
unsigned Opcode = Op.getOpcode();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
int NumElts = VT.getVectorNumElements();
+ bool IsROTL = Opcode == ISD::ROTL;
// Check for constant splat rotation amount.
APInt CstSplatValue;
@@ -28944,7 +29794,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) {
// Attempt to rotate by immediate.
if (IsCstSplat) {
- unsigned RotOpc = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
+ unsigned RotOpc = IsROTL ? X86ISD::VROTLI : X86ISD::VROTRI;
uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
return DAG.getNode(RotOpc, DL, VT, R,
DAG.getTargetConstant(RotAmt, DL, MVT::i8));
@@ -28956,11 +29806,11 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// AVX512 VBMI2 vXi16 - lower to funnel shifts.
if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) {
- unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR);
+ unsigned FunnelOpc = IsROTL ? ISD::FSHL : ISD::FSHR;
return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt);
}
- assert((Opcode == ISD::ROTL) && "Only ROTL supported");
+ assert(IsROTL && "Only ROTL supported");
// XOP has 128-bit vector variable + immediate rotates.
// +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL.
@@ -28996,16 +29846,41 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
bool IsSplatAmt = DAG.isSplatValue(Amt);
+ SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
// v16i8/v32i8: Split rotation into rot4/rot2/rot1 stages and select by
// the amount bit.
- if (EltSizeInBits == 8 && !IsSplatAmt) {
+ if (EltSizeInBits == 8) {
if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()))
return SDValue();
- // We don't need ModuloAmt here as we just peek at individual bits.
+ // Check for a hidden ISD::ROTR, vXi8 lowering can handle both, but we
+ // currently hit infinite loops in legalization if we allow ISD::ROTR.
+ // FIXME: Infinite ROTL<->ROTR legalization in TargetLowering::expandROT.
+ SDValue HiddenROTRAmt;
+ if (Amt.getOpcode() == ISD::SUB &&
+ ISD::isBuildVectorAllZeros(Amt.getOperand(0).getNode()))
+ HiddenROTRAmt = Amt.getOperand(1);
+
MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
+ // If the amount is a splat, attempt to fold as unpack(x,x) << zext(y):
+ // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw.
+ // rotr(x,y) -> (((aext(x) << bw) | zext(x)) >> (y & (bw-1))).
+ if (SDValue BaseRotAmt = DAG.getSplatValue(DAG.getNode(
+ ISD::AND, DL, VT, HiddenROTRAmt ? HiddenROTRAmt : Amt, AmtMask))) {
+ unsigned ShiftX86Opc = HiddenROTRAmt ? X86ISD::VSRLI : X86ISD::VSHLI;
+ BaseRotAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseRotAmt);
+ SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
+ SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
+ Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, BaseRotAmt,
+ Subtarget, DAG);
+ Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt,
+ Subtarget, DAG);
+ return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !HiddenROTRAmt);
+ }
+
+ // We don't need ModuloAmt here as we just peek at individual bits.
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (Subtarget.hasSSE41()) {
// On SSE41 targets we can use PBLENDVB which selects bytes based just
@@ -29024,6 +29899,15 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getSelect(DL, SelVT, C, V0, V1);
};
+ // 'Hidden' ROTR is currently only profitable on AVX512 targets where we
+ // have VPTERNLOG.
+ unsigned ShiftLHS = ISD::SHL;
+ unsigned ShiftRHS = ISD::SRL;
+ if (HiddenROTRAmt && useVPTERNLOG(Subtarget, VT)) {
+ std::swap(ShiftLHS, ShiftRHS);
+ Amt = HiddenROTRAmt;
+ }
+
// Turn 'a' into a mask suitable for VSELECT: a = a << 5;
// We can safely do this using i16 shifts as we're only interested in
// the 3 lower bits of each byte.
@@ -29035,8 +29919,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
SDValue M;
M = DAG.getNode(
ISD::OR, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(4, DL, VT)),
- DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(4, DL, VT)));
+ DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(4, DL, VT)),
+ DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(4, DL, VT)));
R = SignBitSelect(VT, Amt, M, R);
// a += a
@@ -29045,8 +29929,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// r = VSELECT(r, rot(r, 2), a);
M = DAG.getNode(
ISD::OR, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(2, DL, VT)),
- DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(6, DL, VT)));
+ DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(2, DL, VT)),
+ DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(6, DL, VT)));
R = SignBitSelect(VT, Amt, M, R);
// a += a
@@ -29055,8 +29939,8 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// return VSELECT(r, rot(r, 1), a);
M = DAG.getNode(
ISD::OR, DL, VT,
- DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(1, DL, VT)),
- DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(7, DL, VT)));
+ DAG.getNode(ShiftLHS, DL, VT, R, DAG.getConstant(1, DL, VT)),
+ DAG.getNode(ShiftRHS, DL, VT, R, DAG.getConstant(7, DL, VT)));
return SignBitSelect(VT, Amt, M, R);
}
@@ -29065,18 +29949,16 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
// If the amount is a splat, perform the modulo BEFORE the splat,
// this helps LowerScalarVariableShift to remove the splat later.
Amt = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, BaseRotAmt);
- Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
- DAG.getConstant(EltSizeInBits - 1, DL, VT));
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
Amt = DAG.getVectorShuffle(VT, DL, Amt, DAG.getUNDEF(VT),
SmallVector<int>(NumElts, 0));
} else {
- Amt = DAG.getNode(ISD::AND, DL, VT, Amt,
- DAG.getConstant(EltSizeInBits - 1, DL, VT));
+ Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
}
bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
- bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
- SupportedVectorVarShift(VT, Subtarget, ISD::SRL);
+ bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
+ supportedVectorVarShift(VT, Subtarget, ISD::SRL);
// Fallback for splats + all supported variable shifts.
// Fallback for non-constants AVX2 vXi16 as well.
@@ -29088,9 +29970,11 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::OR, DL, VT, SHL, SRL);
}
- // As with shifts, convert the rotation amount to a multiplication factor.
+ // As with shifts, attempt to convert the rotation amount to a multiplication
+ // factor, fallback to general expansion.
SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG);
- assert(Scale && "Failed to convert ROTL amount to scale");
+ if (!Scale)
+ return SDValue();
// v8i16/v16i16: perform unsigned multiply hi/lo and OR the results.
if (EltSizeInBits == 16) {
@@ -29803,6 +30687,10 @@ static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp);
}
+ // If we have POPCNT, use the default expansion.
+ if (Subtarget.hasPOPCNT())
+ return SDValue();
+
if (VT == MVT::i64) {
// Xor the high and low 16-bits together using a 32-bit operation.
SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
@@ -30358,6 +31246,10 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
Mask = ExtendToType(Mask, MaskVT, DAG, true);
}
+ // Break dependency on the data register.
+ if (PassThru.isUndef())
+ PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+
SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
N->getScale() };
SDValue NewGather = DAG.getMemIntrinsicNode(
@@ -30886,6 +31778,51 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
+ if (VT.isVector() && Subtarget.hasFP16() &&
+ SrcVT.getVectorElementType() == MVT::f16) {
+ EVT EleVT = VT.getVectorElementType();
+ EVT ResVT = EleVT == MVT::i32 ? MVT::v4i32 : MVT::v8i16;
+
+ if (SrcVT != MVT::v8f16) {
+ SDValue Tmp =
+ IsStrict ? DAG.getConstantFP(0.0, dl, SrcVT) : DAG.getUNDEF(SrcVT);
+ SmallVector<SDValue, 4> Ops(SrcVT == MVT::v2f16 ? 4 : 2, Tmp);
+ Ops[0] = Src;
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f16, Ops);
+ }
+
+ SDValue Res, Chain;
+ if (IsStrict) {
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ Res =
+ DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {N->getOperand(0), Src});
+ Chain = Res.getValue(1);
+ } else {
+ unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ Res = DAG.getNode(Opc, dl, ResVT, Src);
+ }
+
+ // TODO: Need to add exception check code for strict FP.
+ if (EleVT.getSizeInBits() < 16) {
+ MVT TmpVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, TmpVT, Res);
+
+ // Now widen to 128 bits.
+ unsigned NumConcats = 128 / TmpVT.getSizeInBits();
+ MVT ConcatVT = MVT::getVectorVT(EleVT.getSimpleVT(), 8 * NumConcats);
+ SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(TmpVT));
+ ConcatOps[0] = Res;
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps);
+ }
+
+ Results.push_back(Res);
+ if (IsStrict)
+ Results.push_back(Chain);
+
+ return;
+ }
+
if (VT.isVector() && VT.getScalarSizeInBits() < 32) {
assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector &&
"Unexpected type action!");
@@ -31001,8 +31938,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert(!VT.isVector() && "Vectors should have been handled above!");
- if (Subtarget.hasDQI() && VT == MVT::i64 &&
- (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
+ if ((Subtarget.hasDQI() && VT == MVT::i64 &&
+ (SrcVT == MVT::f32 || SrcVT == MVT::f64)) ||
+ (Subtarget.hasFP16() && SrcVT == MVT::f16)) {
assert(!Subtarget.is64Bit() && "i64 should be legal");
unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
// If we use a 128-bit result we might need to use a target specific node.
@@ -31036,6 +31974,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
+ if (VT == MVT::i128 && Subtarget.isTargetWin64()) {
+ SDValue Chain;
+ SDValue V = LowerWin64_FP_TO_INT128(SDValue(N, 0), DAG, Chain);
+ Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(Chain);
+ return;
+ }
+
SDValue Chain;
if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) {
Results.push_back(V);
@@ -31059,9 +32006,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
N->getOpcode() == ISD::STRICT_SINT_TO_FP;
EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ if (VT.getVectorElementType() == MVT::f16 && Subtarget.hasFP16() &&
+ Subtarget.hasVLX()) {
+ if (Src.getValueType().getVectorElementType() == MVT::i16)
+ return;
+
+ if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2i32)
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
+ IsStrict ? DAG.getConstant(0, dl, MVT::v2i32)
+ : DAG.getUNDEF(MVT::v2i32));
+ if (IsStrict) {
+ unsigned Opc =
+ IsSigned ? X86ISD::STRICT_CVTSI2P : X86ISD::STRICT_CVTUI2P;
+ SDValue Res = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {N->getOperand(0), Src});
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ } else {
+ unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P;
+ Results.push_back(DAG.getNode(Opc, dl, MVT::v8f16, Src));
+ }
+ return;
+ }
if (VT != MVT::v2f32)
return;
- SDValue Src = N->getOperand(IsStrict ? 1 : 0);
EVT SrcVT = Src.getValueType();
if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
if (IsStrict) {
@@ -31162,14 +32131,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::FP_ROUND: {
bool IsStrict = N->isStrictFPOpcode();
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ EVT VT = N->getValueType(0);
+ EVT NewVT = VT.getVectorElementType() == MVT::f16 ? MVT::v8f16 : MVT::v4f32;
+ if (VT == MVT::v2f16 && Src.getValueType() == MVT::v2f32) {
+ SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f32)
+ : DAG.getUNDEF(MVT::v2f32);
+ Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, Ext);
+ }
if (!isTypeLegal(Src.getValueType()))
return;
SDValue V;
if (IsStrict)
- V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other},
- {N->getOperand(0), N->getOperand(1)});
+ V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {NewVT, MVT::Other},
+ {N->getOperand(0), Src});
else
- V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0));
+ V = DAG.getNode(X86ISD::VFPROUND, dl, NewVT, Src);
Results.push_back(V);
if (IsStrict)
Results.push_back(V.getValue(1));
@@ -31181,6 +32157,21 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// No other ValueType for FP_EXTEND should reach this point.
assert(N->getValueType(0) == MVT::v2f32 &&
"Do not know how to legalize this Node");
+ if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
+ return;
+ bool IsStrict = N->isStrictFPOpcode();
+ SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
+ : DAG.getUNDEF(MVT::v2f16);
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
+ if (IsStrict)
+ V = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::v4f32, MVT::Other},
+ {N->getOperand(0), V});
+ else
+ V = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, V);
+ Results.push_back(V);
+ if (IsStrict)
+ Results.push_back(V.getValue(1));
return;
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -31656,6 +32647,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MOVSLDUP)
NODE_NAME_CASE(MOVSD)
NODE_NAME_CASE(MOVSS)
+ NODE_NAME_CASE(MOVSH)
NODE_NAME_CASE(UNPCKL)
NODE_NAME_CASE(UNPCKH)
NODE_NAME_CASE(VBROADCAST)
@@ -31684,7 +32676,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VASTART_SAVE_XMM_REGS)
NODE_NAME_CASE(VAARG_64)
NODE_NAME_CASE(VAARG_X32)
- NODE_NAME_CASE(WIN_ALLOCA)
+ NODE_NAME_CASE(DYN_ALLOCA)
NODE_NAME_CASE(MEMBARRIER)
NODE_NAME_CASE(MFENCE)
NODE_NAME_CASE(SEG_ALLOCA)
@@ -31714,6 +32706,22 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FNMSUB_RND)
NODE_NAME_CASE(FMADDSUB_RND)
NODE_NAME_CASE(FMSUBADD_RND)
+ NODE_NAME_CASE(VFMADDC)
+ NODE_NAME_CASE(VFMADDC_RND)
+ NODE_NAME_CASE(VFCMADDC)
+ NODE_NAME_CASE(VFCMADDC_RND)
+ NODE_NAME_CASE(VFMULC)
+ NODE_NAME_CASE(VFMULC_RND)
+ NODE_NAME_CASE(VFCMULC)
+ NODE_NAME_CASE(VFCMULC_RND)
+ NODE_NAME_CASE(VFMULCSH)
+ NODE_NAME_CASE(VFMULCSH_RND)
+ NODE_NAME_CASE(VFCMULCSH)
+ NODE_NAME_CASE(VFCMULCSH_RND)
+ NODE_NAME_CASE(VFMADDCSH)
+ NODE_NAME_CASE(VFMADDCSH_RND)
+ NODE_NAME_CASE(VFCMADDCSH)
+ NODE_NAME_CASE(VFCMADDCSH_RND)
NODE_NAME_CASE(VPMADD52H)
NODE_NAME_CASE(VPMADD52L)
NODE_NAME_CASE(VRNDSCALE)
@@ -31954,6 +32962,7 @@ bool X86TargetLowering::isBinOp(unsigned Opcode) const {
bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const {
switch (Opcode) {
// TODO: Add more X86ISD opcodes once we have test coverage.
+ case X86ISD::AVG:
case X86ISD::PCMPEQ:
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ:
@@ -32047,6 +33056,36 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
bool X86TargetLowering::shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const {
+ using namespace llvm::PatternMatch;
+
+ FixedVectorType *VTy = dyn_cast<FixedVectorType>(I->getType());
+ if (!VTy)
+ return false;
+
+ if (I->getOpcode() == Instruction::Mul &&
+ VTy->getElementType()->isIntegerTy(64)) {
+ for (auto &Op : I->operands()) {
+ // Make sure we are not already sinking this operand
+ if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
+ continue;
+
+ // Look for PMULDQ pattern where the input is a sext_inreg from vXi32 or
+ // the PMULUDQ pattern where the input is a zext_inreg from vXi32.
+ if (Subtarget.hasSSE41() &&
+ match(Op.get(), m_AShr(m_Shl(m_Value(), m_SpecificInt(32)),
+ m_SpecificInt(32)))) {
+ Ops.push_back(&cast<Instruction>(Op)->getOperandUse(0));
+ Ops.push_back(&Op);
+ } else if (Subtarget.hasSSE2() &&
+ match(Op.get(),
+ m_And(m_Value(), m_SpecificInt(UINT64_C(0xffffffff))))) {
+ Ops.push_back(&Op);
+ }
+ }
+
+ return !Ops.empty();
+ }
+
// A uniform shift amount in a vector shift or funnel shift may be much
// cheaper than a generic variable vector shift, so make that pattern visible
// to SDAG by sinking the shuffle instruction next to the shift.
@@ -32102,6 +33141,8 @@ bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ return Subtarget.hasFP16();
case MVT::f32:
case MVT::f64:
return true;
@@ -32180,13 +33221,9 @@ static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
// If we hit the end of the block, check whether EFLAGS is live into a
// successor.
- for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
- sEnd = BB->succ_end();
- sItr != sEnd; ++sItr) {
- MachineBasicBlock* succ = *sItr;
- if (succ->isLiveIn(X86::EFLAGS))
+ for (MachineBasicBlock *Succ : BB->successors())
+ if (Succ->isLiveIn(X86::EFLAGS))
return true;
- }
return false;
}
@@ -32576,6 +33613,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
// conditional jump around it.
static bool isCMOVPseudo(MachineInstr &MI) {
switch (MI.getOpcode()) {
+ case X86::CMOV_FR16X:
case X86::CMOV_FR32:
case X86::CMOV_FR32X:
case X86::CMOV_FR64:
@@ -32922,14 +33960,11 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
}
// Transfer any debug instructions inside the CMOV sequence to the sunk block.
- auto DbgEnd = MachineBasicBlock::iterator(LastCMOV);
- auto DbgIt = MachineBasicBlock::iterator(MI);
- while (DbgIt != DbgEnd) {
- auto Next = std::next(DbgIt);
- if (DbgIt->isDebugInstr())
- SinkMBB->push_back(DbgIt->removeFromParent());
- DbgIt = Next;
- }
+ auto DbgRange = llvm::make_range(MachineBasicBlock::iterator(MI),
+ MachineBasicBlock::iterator(LastCMOV));
+ for (MachineInstr &MI : llvm::make_early_inc_range(DbgRange))
+ if (MI.isDebugInstr())
+ SinkMBB->push_back(MI.removeFromParent());
// Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
SinkMBB->splice(SinkMBB->end(), ThisMBB,
@@ -34576,6 +35611,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTDPBF16PS: {
unsigned Opc;
switch (MI.getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
case X86::PTDPBSSD: Opc = X86::TDPBSSD; break;
case X86::PTDPBSUD: Opc = X86::TDPBSUD; break;
case X86::PTDPBUSD: Opc = X86::TDPBUSD; break;
@@ -34603,6 +35639,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED: {
unsigned Opc;
switch (MI.getOpcode()) {
+ default: llvm_unreachable("illegal opcode!");
case X86::PTILELOADD: Opc = X86::TILELOADD; break;
case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break;
case X86::PTILESTORED: Opc = X86::TILESTORED; break;
@@ -34795,8 +35832,8 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
APInt DemandedLHS, DemandedRHS;
getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS);
- Known.One = APInt::getAllOnesValue(BitWidth * 2);
- Known.Zero = APInt::getAllOnesValue(BitWidth * 2);
+ Known.One = APInt::getAllOnes(BitWidth * 2);
+ Known.Zero = APInt::getAllOnes(BitWidth * 2);
KnownBits Known2;
if (!!DemandedLHS) {
@@ -35197,17 +36234,16 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned NumMaskElts = Mask.size();
unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
- // Match against a VZEXT_MOVL vXi32 zero-extending instruction.
- if (MaskEltSize == 32 && Mask[0] == 0) {
- if (isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) {
+ // Match against a VZEXT_MOVL vXi32 and vXi16 zero-extending instruction.
+ if (Mask[0] == 0 &&
+ (MaskEltSize == 32 || (MaskEltSize == 16 && Subtarget.hasFP16()))) {
+ if ((isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) ||
+ (V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1))) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
- return true;
- }
- if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
- Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
+ : !Subtarget.hasSSE2() ? MVT::v4f32
+ : MaskVT;
return true;
}
}
@@ -35251,11 +36287,14 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
- if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
+ if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
+ (MaskEltSize == 16 && Subtarget.hasFP16())) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
- SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
+ SrcVT = DstVT = MaskEltSize == 16 ? MVT::v8f16
+ : !Subtarget.hasSSE2() ? MVT::v4f32
+ : MaskVT;
return true;
}
@@ -35501,6 +36540,12 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
SrcVT = DstVT = MVT::v4f32;
return true;
}
+ if (isTargetShuffleEquivalent(MaskVT, Mask, {8, 1, 2, 3, 4, 5, 6, 7}) &&
+ Subtarget.hasFP16()) {
+ Shuffle = X86ISD::MOVSH;
+ SrcVT = DstVT = MVT::v8f16;
+ return true;
+ }
}
// Attempt to match against either an unary or binary PACKSS/PACKUS shuffle.
@@ -35538,8 +36583,8 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
unsigned Scale1 = NumV1Elts / NumMaskElts;
unsigned Scale2 = NumV2Elts / NumMaskElts;
- APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts);
- APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts);
+ APInt DemandedZeroV1 = APInt::getZero(NumV1Elts);
+ APInt DemandedZeroV2 = APInt::getZero(NumV2Elts);
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef)
@@ -35560,12 +36605,58 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
IsBlend = false;
break;
}
- if (IsBlend &&
- DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
- DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
- Shuffle = ISD::OR;
- SrcVT = DstVT = MaskVT.changeTypeToInteger();
- return true;
+ if (IsBlend) {
+ if (DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
+ DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
+ Shuffle = ISD::OR;
+ SrcVT = DstVT = MaskVT.changeTypeToInteger();
+ return true;
+ }
+ if (NumV1Elts == NumV2Elts && NumV1Elts == NumMaskElts) {
+ // FIXME: handle mismatched sizes?
+ // TODO: investigate if `ISD::OR` handling in
+ // `TargetLowering::SimplifyDemandedVectorElts` can be improved instead.
+ auto computeKnownBitsElementWise = [&DAG](SDValue V) {
+ unsigned NumElts = V.getValueType().getVectorNumElements();
+ KnownBits Known(NumElts);
+ for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
+ APInt Mask = APInt::getOneBitSet(NumElts, EltIdx);
+ KnownBits PeepholeKnown = DAG.computeKnownBits(V, Mask);
+ if (PeepholeKnown.isZero())
+ Known.Zero.setBit(EltIdx);
+ if (PeepholeKnown.isAllOnes())
+ Known.One.setBit(EltIdx);
+ }
+ return Known;
+ };
+
+ KnownBits V1Known = computeKnownBitsElementWise(V1);
+ KnownBits V2Known = computeKnownBitsElementWise(V2);
+
+ for (unsigned i = 0; i != NumMaskElts && IsBlend; ++i) {
+ int M = Mask[i];
+ if (M == SM_SentinelUndef)
+ continue;
+ if (M == SM_SentinelZero) {
+ IsBlend &= V1Known.Zero[i] && V2Known.Zero[i];
+ continue;
+ }
+ if (M == (int)i) {
+ IsBlend &= V2Known.Zero[i] || V1Known.One[i];
+ continue;
+ }
+ if (M == (int)(i + NumMaskElts)) {
+ IsBlend &= V1Known.Zero[i] || V2Known.One[i];
+ continue;
+ }
+ llvm_unreachable("will not get here.");
+ }
+ if (IsBlend) {
+ Shuffle = ISD::OR;
+ SrcVT = DstVT = MaskVT.changeTypeToInteger();
+ return true;
+ }
+ }
}
}
@@ -35817,13 +36908,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return CanonicalizeShuffleInput(RootVT, V1);
}
+ SmallVector<int, 64> Mask(BaseMask.begin(), BaseMask.end());
+
// See if the shuffle is a hidden identity shuffle - repeated args in HOPs
// etc. can be simplified.
- if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits) {
+ if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits && VT1.isVector()) {
SmallVector<int> ScaledMask, IdentityMask;
unsigned NumElts = VT1.getVectorNumElements();
- if (BaseMask.size() <= NumElts &&
- scaleShuffleElements(BaseMask, NumElts, ScaledMask)) {
+ if (Mask.size() <= NumElts &&
+ scaleShuffleElements(Mask, NumElts, ScaledMask)) {
for (unsigned i = 0; i != NumElts; ++i)
IdentityMask.push_back(i);
if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2))
@@ -35837,35 +36930,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// If the upper subvectors are zeroable, then an extract+insert is more
// optimal than using X86ISD::SHUF128. The insertion is free, even if it has
// to zero the upper subvectors.
- if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) {
+ if (isUndefOrZeroInRange(Mask, 1, NumBaseMaskElts - 1)) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) &&
+ assert(isInRange(Mask[0], 0, NumBaseMaskElts) &&
"Unexpected lane shuffle");
Res = CanonicalizeShuffleInput(RootVT, V1);
- unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts);
- bool UseZero = isAnyZero(BaseMask);
+ unsigned SubIdx = Mask[0] * (NumRootElts / NumBaseMaskElts);
+ bool UseZero = isAnyZero(Mask);
Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits);
return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits);
}
// Narrow shuffle mask to v4x128.
- SmallVector<int, 4> Mask;
+ SmallVector<int, 4> ScaledMask;
assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size");
- narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask);
+ narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, Mask, ScaledMask);
// Try to lower to vshuf64x2/vshuf32x4.
- auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask,
- SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL,
+ ArrayRef<int> ScaledMask, SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
unsigned PermMask = 0;
// Insure elements came from the same Op.
SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)};
for (int i = 0; i < 4; ++i) {
- assert(Mask[i] >= -1 && "Illegal shuffle sentinel value");
- if (Mask[i] < 0)
+ assert(ScaledMask[i] >= -1 && "Illegal shuffle sentinel value");
+ if (ScaledMask[i] < 0)
continue;
- SDValue Op = Mask[i] >= 4 ? V2 : V1;
+ SDValue Op = ScaledMask[i] >= 4 ? V2 : V1;
unsigned OpIndex = i / 2;
if (Ops[OpIndex].isUndef())
Ops[OpIndex] = Op;
@@ -35875,7 +36969,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Convert the 128-bit shuffle mask selection values into 128-bit
// selection bits defined by a vshuf64x2 instruction's immediate control
// byte.
- PermMask |= (Mask[i] % 4) << (i * 2);
+ PermMask |= (ScaledMask[i] % 4) << (i * 2);
}
return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT,
@@ -35887,18 +36981,20 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// FIXME: Is there a better way to do this? is256BitLaneRepeatedShuffleMask
// doesn't work because our mask is for 128 bits and we don't have an MVT
// to match that.
- bool PreferPERMQ =
- UnaryShuffle && isUndefOrInRange(Mask[0], 0, 2) &&
- isUndefOrInRange(Mask[1], 0, 2) && isUndefOrInRange(Mask[2], 2, 4) &&
- isUndefOrInRange(Mask[3], 2, 4) &&
- (Mask[0] < 0 || Mask[2] < 0 || Mask[0] == (Mask[2] % 2)) &&
- (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2));
-
- if (!isAnyZero(Mask) && !PreferPERMQ) {
+ bool PreferPERMQ = UnaryShuffle && isUndefOrInRange(ScaledMask[0], 0, 2) &&
+ isUndefOrInRange(ScaledMask[1], 0, 2) &&
+ isUndefOrInRange(ScaledMask[2], 2, 4) &&
+ isUndefOrInRange(ScaledMask[3], 2, 4) &&
+ (ScaledMask[0] < 0 || ScaledMask[2] < 0 ||
+ ScaledMask[0] == (ScaledMask[2] % 2)) &&
+ (ScaledMask[1] < 0 || ScaledMask[3] < 0 ||
+ ScaledMask[1] == (ScaledMask[3] % 2));
+
+ if (!isAnyZero(ScaledMask) && !PreferPERMQ) {
if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128)
return SDValue(); // Nothing to do!
MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64);
- if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG))
+ if (SDValue V = MatchSHUF128(ShuffleVT, DL, ScaledMask, V1, V2, DAG))
return DAG.getBitcast(RootVT, V);
}
}
@@ -35908,25 +37004,27 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// If the upper half is zeroable, then an extract+insert is more optimal
// than using X86ISD::VPERM2X128. The insertion is free, even if it has to
// zero the upper half.
- if (isUndefOrZero(BaseMask[1])) {
+ if (isUndefOrZero(Mask[1])) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle");
+ assert(isInRange(Mask[0], 0, 2) && "Unexpected lane shuffle");
Res = CanonicalizeShuffleInput(RootVT, V1);
- Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL);
- return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG,
- DL, 256);
+ Res = extract128BitVector(Res, Mask[0] * (NumRootElts / 2), DAG, DL);
+ return widenSubVector(Res, Mask[1] == SM_SentinelZero, Subtarget, DAG, DL,
+ 256);
}
- // If we're splatting the low subvector, an insert-subvector 'concat'
+ // If we're inserting the low subvector, an insert-subvector 'concat'
// pattern is quicker than VPERM2X128.
// TODO: Add AVX2 support instead of VPERMQ/VPERMPD.
- if (BaseMask[0] == 0 && BaseMask[1] == 0 && !Subtarget.hasAVX2()) {
+ if (BaseMask[0] == 0 && (BaseMask[1] == 0 || BaseMask[1] == 2) &&
+ !Subtarget.hasAVX2()) {
if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR)
return SDValue(); // Nothing to do!
- Res = CanonicalizeShuffleInput(RootVT, V1);
- Res = extractSubVector(Res, 0, DAG, DL, 128);
- return concatSubVectors(Res, Res, DAG, DL);
+ SDValue Lo = CanonicalizeShuffleInput(RootVT, V1);
+ SDValue Hi = CanonicalizeShuffleInput(RootVT, BaseMask[1] == 0 ? V1 : V2);
+ Hi = extractSubVector(Hi, 0, DAG, DL, 128);
+ return insertSubVector(Lo, Hi, NumRootElts / 2, DAG, DL, 128);
}
if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128)
@@ -35936,11 +37034,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// we need to use the zeroing feature.
// Prefer blends for sequential shuffles unless we are optimizing for size.
if (UnaryShuffle &&
- !(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) &&
- (OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) {
+ !(Subtarget.hasAVX2() && isUndefOrInRange(Mask, 0, 2)) &&
+ (OptForSize || !isSequentialOrUndefOrZeroInRange(Mask, 0, 2, 0))) {
unsigned PermMask = 0;
- PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
- PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
+ PermMask |= ((Mask[0] < 0 ? 0x8 : (Mask[0] & 1)) << 0);
+ PermMask |= ((Mask[1] < 0 ? 0x8 : (Mask[1] & 1)) << 4);
return DAG.getNode(
X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1),
DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8));
@@ -35951,16 +37049,15 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// TODO - handle AVX512VL cases with X86ISD::SHUF128.
if (!UnaryShuffle && !IsMaskedShuffle) {
- assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) &&
+ assert(llvm::all_of(Mask, [](int M) { return 0 <= M && M < 4; }) &&
"Unexpected shuffle sentinel value");
// Prefer blends to X86ISD::VPERM2X128.
- if (!((BaseMask[0] == 0 && BaseMask[1] == 3) ||
- (BaseMask[0] == 2 && BaseMask[1] == 1))) {
+ if (!((Mask[0] == 0 && Mask[1] == 3) || (Mask[0] == 2 && Mask[1] == 1))) {
unsigned PermMask = 0;
- PermMask |= ((BaseMask[0] & 3) << 0);
- PermMask |= ((BaseMask[1] & 3) << 4);
- SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2;
- SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2;
+ PermMask |= ((Mask[0] & 3) << 0);
+ PermMask |= ((Mask[1] & 3) << 4);
+ SDValue LHS = isInRange(Mask[0], 0, 2) ? V1 : V2;
+ SDValue RHS = isInRange(Mask[1], 0, 2) ? V1 : V2;
return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT,
CanonicalizeShuffleInput(RootVT, LHS),
CanonicalizeShuffleInput(RootVT, RHS),
@@ -35971,13 +37068,12 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// For masks that have been widened to 128-bit elements or more,
// narrow back down to 64-bit elements.
- SmallVector<int, 64> Mask;
if (BaseMaskEltSizeInBits > 64) {
assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size");
int MaskScale = BaseMaskEltSizeInBits / 64;
- narrowShuffleMaskElts(MaskScale, BaseMask, Mask);
- } else {
- Mask.assign(BaseMask.begin(), BaseMask.end());
+ SmallVector<int, 64> ScaledMask;
+ narrowShuffleMaskElts(MaskScale, Mask, ScaledMask);
+ Mask = std::move(ScaledMask);
}
// For masked shuffles, we're trying to match the root width for better
@@ -36029,7 +37125,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (isUndefOrEqual(Mask, 0)) {
if (V1.getValueType() == MaskVT &&
V1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
- MayFoldLoad(V1.getOperand(0))) {
+ X86::mayFoldLoad(V1.getOperand(0), Subtarget)) {
if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST)
return SDValue(); // Nothing to do!
Res = V1.getOperand(0);
@@ -36306,8 +37402,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask &&
isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) &&
DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
- APInt Zero = APInt::getNullValue(MaskEltSizeInBits);
- APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits);
+ APInt Zero = APInt::getZero(MaskEltSizeInBits);
+ APInt AllOnes = APInt::getAllOnes(MaskEltSizeInBits);
APInt UndefElts(NumMaskElts, 0);
SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
for (unsigned i = 0; i != NumMaskElts; ++i) {
@@ -36804,10 +37900,11 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
return SDValue();
}
- // Only fold if at least one of the constants is only used once or
- // the combined shuffle has included a variable mask shuffle, this
- // is to avoid constant pool bloat.
- if (!OneUseConstantOp && !HasVariableMask)
+ // If we're optimizing for size, only fold if at least one of the constants is
+ // only used once or the combined shuffle has included a variable mask
+ // shuffle, this is to avoid constant pool bloat.
+ bool IsOptimizingSize = DAG.shouldOptForSize();
+ if (IsOptimizingSize && !OneUseConstantOp && !HasVariableMask)
return SDValue();
// Shuffle the constant bits according to the mask.
@@ -36816,7 +37913,7 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
APInt ZeroElts(NumMaskElts, 0);
APInt ConstantElts(NumMaskElts, 0);
SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
- APInt::getNullValue(MaskSizeInBits));
+ APInt::getZero(MaskSizeInBits));
for (unsigned i = 0; i != NumMaskElts; ++i) {
int M = Mask[i];
if (M == SM_SentinelUndef) {
@@ -36847,10 +37944,10 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
ConstantElts.setBit(i);
ConstantBitData[i] = Bits;
}
- assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue());
+ assert((UndefElts | ZeroElts | ConstantElts).isAllOnes());
// Attempt to create a zero vector.
- if ((UndefElts | ZeroElts).isAllOnesValue())
+ if ((UndefElts | ZeroElts).isAllOnes())
return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL);
// Create the constant data.
@@ -36931,6 +38028,10 @@ static SDValue combineX86ShufflesRecursively(
if (!VT.isVector() || !VT.isSimple())
return SDValue(); // Bail if we hit a non-simple non-vector.
+ // FIXME: Just bail on f16 for now.
+ if (VT.getVectorElementType() == MVT::f16)
+ return SDValue();
+
assert((RootSizeInBits % VT.getSizeInBits()) == 0 &&
"Can only combine shuffles upto size of the root op.");
@@ -36939,7 +38040,7 @@ static SDValue combineX86ShufflesRecursively(
SmallVector<int, 64> OpMask;
SmallVector<SDValue, 2> OpInputs;
APInt OpUndef, OpZero;
- APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt OpDemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode());
if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef,
OpZero, DAG, Depth, false))
@@ -36981,14 +38082,14 @@ static SDValue combineX86ShufflesRecursively(
// Only resolve zeros if it will remove an input, otherwise we might end
// up in an infinite loop.
bool ResolveKnownZeros = true;
- if (!OpZero.isNullValue()) {
- APInt UsedInputs = APInt::getNullValue(OpInputs.size());
+ if (!OpZero.isZero()) {
+ APInt UsedInputs = APInt::getZero(OpInputs.size());
for (int i = 0, e = OpMask.size(); i != e; ++i) {
int M = OpMask[i];
if (OpUndef[i] || OpZero[i] || isUndefOrZero(M))
continue;
UsedInputs.setBit(M / OpMask.size());
- if (UsedInputs.isAllOnesValue()) {
+ if (UsedInputs.isAllOnes()) {
ResolveKnownZeros = false;
break;
}
@@ -37178,6 +38279,48 @@ static SDValue combineX86ShufflesRecursively(
Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget))
return DAG.getBitcast(Root.getValueType(), HOp);
+ // Try to refine our inputs given our knowledge of target shuffle mask.
+ for (auto I : enumerate(Ops)) {
+ int OpIdx = I.index();
+ SDValue &Op = I.value();
+
+ // What range of shuffle mask element values results in picking from Op?
+ int Lo = OpIdx * Mask.size();
+ int Hi = Lo + Mask.size();
+
+ // Which elements of Op do we demand, given the mask's granularity?
+ APInt OpDemandedElts(Mask.size(), 0);
+ for (int MaskElt : Mask) {
+ if (isInRange(MaskElt, Lo, Hi)) { // Picks from Op?
+ int OpEltIdx = MaskElt - Lo;
+ OpDemandedElts.setBit(OpEltIdx);
+ }
+ }
+
+ // Is the shuffle result smaller than the root?
+ if (Op.getValueSizeInBits() < RootSizeInBits) {
+ // We padded the mask with undefs. But we now need to undo that.
+ unsigned NumExpectedVectorElts = Mask.size();
+ unsigned EltSizeInBits = RootSizeInBits / NumExpectedVectorElts;
+ unsigned NumOpVectorElts = Op.getValueSizeInBits() / EltSizeInBits;
+ assert(!OpDemandedElts.extractBits(
+ NumExpectedVectorElts - NumOpVectorElts, NumOpVectorElts) &&
+ "Demanding the virtual undef widening padding?");
+ OpDemandedElts = OpDemandedElts.trunc(NumOpVectorElts); // NUW
+ }
+
+ // The Op itself may be of different VT, so we need to scale the mask.
+ unsigned NumOpElts = Op.getValueType().getVectorNumElements();
+ APInt OpScaledDemandedElts = APIntOps::ScaleBitMask(OpDemandedElts, NumOpElts);
+
+ // Can this operand be simplified any further, given it's demanded elements?
+ if (SDValue NewOp =
+ DAG.getTargetLoweringInfo().SimplifyMultipleUseDemandedVectorElts(
+ Op, OpScaledDemandedElts, DAG))
+ Op = NewOp;
+ }
+ // FIXME: should we rerun resolveTargetShuffleInputsAndMask() now?
+
// Widen any subvector shuffle inputs we've collected.
if (any_of(Ops, [RootSizeInBits](SDValue Op) {
return Op.getValueSizeInBits() < RootSizeInBits;
@@ -37424,8 +38567,10 @@ static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL,
SDValue N0 = V.getOperand(0);
SDValue N1 = V.getOperand(1);
unsigned Imm = V.getConstantOperandVal(2);
- if (!MayFoldLoad(peekThroughOneUseBitcasts(N0)) ||
- MayFoldLoad(peekThroughOneUseBitcasts(N1)))
+ const X86Subtarget &Subtarget =
+ static_cast<const X86Subtarget &>(DAG.getSubtarget());
+ if (!X86::mayFoldLoad(peekThroughOneUseBitcasts(N0), Subtarget) ||
+ X86::mayFoldLoad(peekThroughOneUseBitcasts(N1), Subtarget))
return SDValue();
Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4);
return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0,
@@ -37721,6 +38866,13 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR)
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+ // broadcast(extract_vector_elt(x, 0)) -> broadcast(x).
+ if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(Src.getOperand(1)) &&
+ DAG.getTargetLoweringInfo().isTypeLegal(
+ Src.getOperand(0).getValueType()))
+ return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
+
// Share broadcast with the longest vector and extract low subvector (free).
// Ensure the same SDValue from the SDNode use is being used.
for (SDNode *User : Src->uses())
@@ -37988,6 +39140,41 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
return SDValue();
}
+ case X86ISD::SHUFP: {
+ // Fold shufps(shuffle(x),shuffle(y)) -> shufps(x,y).
+ // This is a more relaxed shuffle combiner that can ignore oneuse limits.
+ // TODO: Support types other than v4f32.
+ if (VT == MVT::v4f32) {
+ bool Updated = false;
+ SmallVector<int> Mask;
+ SmallVector<SDValue> Ops;
+ if (getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask) &&
+ Ops.size() == 2) {
+ for (int i = 0; i != 2; ++i) {
+ SmallVector<SDValue> SubOps;
+ SmallVector<int> SubMask, SubScaledMask;
+ SDValue Sub = peekThroughBitcasts(Ops[i]);
+ // TODO: Scaling might be easier if we specify the demanded elts.
+ if (getTargetShuffleInputs(Sub, SubOps, SubMask, DAG, 0, false) &&
+ scaleShuffleElements(SubMask, 4, SubScaledMask) &&
+ SubOps.size() == 1 && isUndefOrInRange(SubScaledMask, 0, 4)) {
+ int Ofs = i * 2;
+ Mask[Ofs + 0] = SubScaledMask[Mask[Ofs + 0] % 4] + (i * 4);
+ Mask[Ofs + 1] = SubScaledMask[Mask[Ofs + 1] % 4] + (i * 4);
+ Ops[i] = DAG.getBitcast(VT, SubOps[0]);
+ Updated = true;
+ }
+ }
+ }
+ if (Updated) {
+ for (int &M : Mask)
+ M %= 4;
+ Ops.push_back(getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
+ return DAG.getNode(X86ISD::SHUFP, DL, VT, Ops);
+ }
+ }
+ return SDValue();
+ }
case X86ISD::VPERMI: {
// vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements.
// TODO: Remove when we have preferred domains in combineX86ShuffleChain.
@@ -38057,6 +39244,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
assert(Mask.size() == 4);
break;
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
case X86ISD::MOVSS: {
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
@@ -38441,6 +39629,12 @@ static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N,
if (VT.is512BitVector())
return SDValue();
+ // Do not generate X86ISD::ADDSUB node for FP16's vector types even though
+ // the ADDSUB idiom has been successfully recognized. There are no known
+ // X86 targets with FP16 ADDSUB instructions!
+ if (VT.getVectorElementType() == MVT::f16)
+ return SDValue();
+
return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
}
@@ -38568,7 +39762,7 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
// Simplify source operands based on shuffle mask.
// TODO - merge this into combineX86ShufflesRecursively.
APInt KnownUndef, KnownZero;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
DCI))
return SDValue(N, 0);
@@ -38584,7 +39778,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle(
TargetLowering::TargetLoweringOpt &TLO, unsigned Depth) const {
// If we're demanding all elements don't bother trying to simplify the mask.
unsigned NumElts = DemandedElts.getBitWidth();
- if (DemandedElts.isAllOnesValue())
+ if (DemandedElts.isAllOnes())
return false;
SDValue Mask = Op.getOperand(MaskIndex);
@@ -38671,6 +39865,58 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
KnownZero = LHSZero | RHSZero;
break;
}
+ case X86ISD::VPMADDWD: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, 2 * NumElts);
+
+ if (SimplifyDemandedVectorElts(LHS, DemandedSrcElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedSrcElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+
+ // TODO: Multiply by zero.
+
+ // If RHS/LHS elements are known zero then we don't need the LHS/RHS equivalent.
+ APInt DemandedLHSElts = DemandedSrcElts & ~RHSZero;
+ if (SimplifyDemandedVectorElts(LHS, DemandedLHSElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ APInt DemandedRHSElts = DemandedSrcElts & ~LHSZero;
+ if (SimplifyDemandedVectorElts(RHS, DemandedRHSElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+ break;
+ }
+ case X86ISD::PSADBW: {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ assert(VT.getScalarType() == MVT::i64 &&
+ LHS.getValueType() == RHS.getValueType() &&
+ LHS.getValueType().getScalarType() == MVT::i8 &&
+ "Unexpected PSADBW types");
+
+ // Aggressively peek through ops to get at the demanded elts.
+ if (!DemandedElts.isAllOnes()) {
+ unsigned NumSrcElts = LHS.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
+ SDValue NewLHS = SimplifyMultipleUseDemandedVectorElts(
+ LHS, DemandedSrcElts, TLO.DAG, Depth + 1);
+ SDValue NewRHS = SimplifyMultipleUseDemandedVectorElts(
+ RHS, DemandedSrcElts, TLO.DAG, Depth + 1);
+ if (NewLHS || NewRHS) {
+ NewLHS = NewLHS ? NewLHS : LHS;
+ NewRHS = NewRHS ? NewRHS : RHS;
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewLHS, NewRHS));
+ }
+ }
+ break;
+ }
case X86ISD::VSHL:
case X86ISD::VSRL:
case X86ISD::VSRA: {
@@ -38706,7 +39952,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
// Aggressively peek through ops to get at the demanded elts.
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(
@@ -38823,7 +40069,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Aggressively peek through ops to get at the demanded elts.
// TODO - we should do this for all target/faux shuffles ops.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
TLO.DAG, Depth + 1);
SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
@@ -38860,7 +40106,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// Aggressively peek through ops to get at the demanded elts.
// TODO: Handle repeated operands.
- if (N0 != N1 && !DemandedElts.isAllOnesValue()) {
+ if (N0 != N1 && !DemandedElts.isAllOnes()) {
SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS,
TLO.DAG, Depth + 1);
SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS,
@@ -39019,15 +40265,11 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
SDLoc DL(Op);
EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(),
ExtSizeInBits / VT.getScalarSizeInBits());
- SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other);
- SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)};
- SDValue Bcst =
- TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys,
- Ops, MemVT, MemIntr->getMemOperand());
- TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1),
- Bcst.getValue(1));
- return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0,
- TLO.DAG, DL, ExtSizeInBits));
+ if (SDValue BcstLd =
+ getBROADCAST_LOAD(Opc, DL, BcstVT, MemVT, MemIntr, 0, TLO.DAG))
+ return TLO.CombineTo(Op,
+ insertSubVector(TLO.DAG.getUNDEF(VT), BcstLd, 0,
+ TLO.DAG, DL, ExtSizeInBits));
}
break;
}
@@ -39130,6 +40372,12 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
}
+ // For broadcasts, unless we *only* demand the 0'th element,
+ // stop attempts at simplification here, we aren't going to improve things,
+ // this is better than any potential shuffle.
+ if (isTargetShuffleSplat(Op) && !DemandedElts.isOne())
+ return false;
+
// Get target/faux shuffle mask.
APInt OpUndef, OpZero;
SmallVector<int, 64> OpMask;
@@ -39175,7 +40423,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
continue;
int Lo = Src * NumElts;
- APInt SrcElts = APInt::getNullValue(NumElts);
+ APInt SrcElts = APInt::getZero(NumElts);
for (int i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
int M = OpMask[i] - Lo;
@@ -39197,7 +40445,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
// to match. This prevents combineX86ShuffleChain from returning a
// combined shuffle that's the same as the original root, causing an
// infinite loop.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
assert(Depth < X86::MaxShuffleCombineDepth && "Depth out of range");
SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef);
@@ -39492,7 +40740,8 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// Don't attempt this on AVX512 as it might affect broadcast folding.
// TODO: Should we attempt this for i32/i16 splats? They tend to be slower.
if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() &&
- OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2)) {
+ OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2) &&
+ Src->hasOneUse()) {
MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2);
SDValue NewSrc =
TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src);
@@ -39697,7 +40946,7 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op));
// Bitmask that indicates which ops have only been accessed 'inline'.
- APInt IdentityOp = APInt::getAllOnesValue(NumOps);
+ APInt IdentityOp = APInt::getAllOnes(NumOps);
for (int i = 0; i != NumElts; ++i) {
int M = ShuffleMask[i];
if (!DemandedElts[i] || ShuffleUndef[i])
@@ -40351,9 +41600,9 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
isa<ConstantSDNode>(N0)) {
auto *C = cast<ConstantSDNode>(N0);
- if (C->isAllOnesValue())
+ if (C->isAllOnes())
return DAG.getConstant(1, SDLoc(N0), VT);
- if (C->isNullValue())
+ if (C->isZero())
return DAG.getConstant(0, SDLoc(N0), VT);
}
@@ -40419,6 +41668,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// Check if we have a bitcast from another integer type as well.
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64) ||
+ (Subtarget.hasFP16() && VT == MVT::f16) ||
(Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() &&
TLI.isTypeLegal(VT))))
return SDValue();
@@ -40547,7 +41797,7 @@ static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG,
else if (BinOp == ISD::SMIN)
Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT);
else if (BinOp == ISD::UMAX)
- Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT);
+ Mask = DAG.getAllOnesConstant(DL, SrcVT);
if (Mask)
MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos);
@@ -40994,7 +42244,8 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
/// Extracting a scalar FP value from vector element 0 is free, so extract each
/// operand first, then perform the math as a scalar op.
-static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
+static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
@@ -41022,7 +42273,8 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2));
}
- if (VT != MVT::f32 && VT != MVT::f64)
+ if (!(VT == MVT::f16 && Subtarget.hasFP16()) && VT != MVT::f32 &&
+ VT != MVT::f64)
return SDValue();
// Vector FP selects don't fit the pattern of FP math ops (because the
@@ -41277,8 +42529,8 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (IsPextr) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(
- SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0),
+ APInt::getAllOnes(VT.getSizeInBits()), DCI))
return SDValue(N, 0);
// PEXTR*(PINSR*(v, s, c), c) -> s (with implicit zext handling).
@@ -41336,7 +42588,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineArithReduction(N, DAG, Subtarget))
return V;
- if (SDValue V = scalarizeExtEltFP(N, DAG))
+ if (SDValue V = scalarizeExtEltFP(N, DAG, Subtarget))
return V;
// Attempt to extract a i1 element by using MOVMSK to extract the signbits
@@ -41573,11 +42825,11 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
// Multiply condition by the difference if non-one.
- if (!AbsDiff.isOneValue())
+ if (!AbsDiff.isOne())
R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT));
// Add the base if non-zero.
- if (!FalseC->isNullValue())
+ if (!FalseC->isZero())
R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0));
return R;
@@ -41794,10 +43046,15 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask)) {
int NumElts = VT.getVectorNumElements();
for (int i = 0; i != NumElts; ++i) {
- if (CondMask[i] < NumElts)
+ // getConstVector sets negative shuffle mask values as undef, so ensure
+ // we hardcode SM_SentinelZero values to zero (0x80).
+ if (CondMask[i] < NumElts) {
+ LHSMask[i] = (LHSMask[i] == SM_SentinelZero) ? 0x80 : LHSMask[i];
RHSMask[i] = 0x80;
- else
+ } else {
LHSMask[i] = 0x80;
+ RHSMask[i] = (RHSMask[i] == SM_SentinelZero) ? 0x80 : RHSMask[i];
+ }
}
LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0),
getConstVector(LHSMask, SimpleVT, DAG, DL, true));
@@ -42331,7 +43588,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
// We can handle comparisons with zero in a number of cases by manipulating
// the CC used.
- if (!Comparison.isNullValue())
+ if (!Comparison.isZero())
return SDValue();
if (CC == X86::COND_S && Addend == 1)
@@ -42737,7 +43994,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
unsigned NumElts = VecVT.getVectorNumElements();
unsigned NumEltBits = VecVT.getScalarSizeInBits();
- bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isNullValue();
+ bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isZero();
bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits &&
CmpVal.isMask(NumElts);
if (!IsAnyOf && !IsAllOf)
@@ -42830,12 +44087,12 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) &&
ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) {
unsigned NumShuffleElts = ShuffleMask.size();
- APInt DemandedElts = APInt::getNullValue(NumShuffleElts);
+ APInt DemandedElts = APInt::getZero(NumShuffleElts);
for (int M : ShuffleMask) {
assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index");
DemandedElts.setBit(M);
}
- if (DemandedElts.isAllOnesValue()) {
+ if (DemandedElts.isAllOnes()) {
SDLoc DL(EFLAGS);
SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]);
Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
@@ -43316,8 +44573,9 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-// If the upper 17 bits of each element are zero then we can use PMADDWD,
-// which is always at least as quick as PMULLD, except on KNL.
+// If the upper 17 bits of either element are zero and the other element are
+// zero/sign bits then we can use PMADDWD, which is always at least as quick as
+// PMULLD, except on KNL.
static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasSSE2())
@@ -43332,33 +44590,92 @@ static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG,
if (!VT.isVector() || VT.getVectorElementType() != MVT::i32)
return SDValue();
- // Make sure the type is legal or will be widened to a legal type.
- if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ // Make sure the type is legal or can split/widen to a legal type.
+ // With AVX512 but without BWI, we would need to split v32i16.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 1 || !isPowerOf2_32(NumElts))
return SDValue();
- MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements());
+ EVT WVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, 2 * NumElts);
- // Without BWI, we would need to split v32i16.
- if (WVT == MVT::v32i16 && !Subtarget.hasBWI())
+ // With AVX512 but without BWI, we would need to split v32i16.
+ if (32 <= (2 * NumElts) && Subtarget.hasAVX512() && !Subtarget.hasBWI())
return SDValue();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- // If we are zero extending two steps without SSE4.1, its better to reduce
+ // If we are zero/sign extending two steps without SSE4.1, its better to
+ // reduce the vmul width instead.
+ if (!Subtarget.hasSSE41() &&
+ (((N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+ (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() <= 8)) ||
+ ((N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
+ (N1.getOpcode() == ISD::SIGN_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() <= 8))))
+ return SDValue();
+
+ // If we are sign extending a wide vector without SSE4.1, its better to reduce
// the vmul width instead.
if (!Subtarget.hasSSE41() &&
- (N0.getOpcode() == ISD::ZERO_EXTEND &&
- N0.getOperand(0).getScalarValueSizeInBits() <= 8) &&
- (N1.getOpcode() == ISD::ZERO_EXTEND &&
- N1.getOperand(0).getScalarValueSizeInBits() <= 8))
+ (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueSizeInBits() > 128) &&
+ (N1.getOpcode() == ISD::SIGN_EXTEND &&
+ N1.getOperand(0).getValueSizeInBits() > 128))
return SDValue();
- APInt Mask17 = APInt::getHighBitsSet(32, 17);
- if (!DAG.MaskedValueIsZero(N1, Mask17) ||
- !DAG.MaskedValueIsZero(N0, Mask17))
+ // Sign bits must extend down to the lowest i16.
+ if (DAG.ComputeMinSignedBits(N1) > 16 || DAG.ComputeMinSignedBits(N0) > 16)
return SDValue();
+ // At least one of the elements must be zero in the upper 17 bits, or can be
+ // safely made zero without altering the final result.
+ auto GetZeroableOp = [&](SDValue Op) {
+ APInt Mask17 = APInt::getHighBitsSet(32, 17);
+ if (DAG.MaskedValueIsZero(Op, Mask17))
+ return Op;
+ // Mask off upper 16-bits of sign-extended constants.
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
+ return DAG.getNode(ISD::AND, SDLoc(N), VT, Op,
+ DAG.getConstant(0xFFFF, SDLoc(N), VT));
+ if (Op.getOpcode() == ISD::SIGN_EXTEND && N->isOnlyUserOf(Op.getNode())) {
+ SDValue Src = Op.getOperand(0);
+ // Convert sext(vXi16) to zext(vXi16).
+ if (Src.getScalarValueSizeInBits() == 16 && VT.getSizeInBits() <= 128)
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
+ // Convert sext(vXi8) to zext(vXi16 sext(vXi8)) on pre-SSE41 targets
+ // which will expand the extension.
+ if (Src.getScalarValueSizeInBits() < 16 && !Subtarget.hasSSE41()) {
+ EVT ExtVT = VT.changeVectorElementType(MVT::i16);
+ Src = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), ExtVT, Src);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Src);
+ }
+ }
+ // Convert SIGN_EXTEND_VECTOR_INREG to ZEXT_EXTEND_VECTOR_INREG.
+ if (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
+ N->isOnlyUserOf(Op.getNode())) {
+ SDValue Src = Op.getOperand(0);
+ if (Src.getScalarValueSizeInBits() == 16)
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT, Src);
+ }
+ // Convert VSRAI(Op, 16) to VSRLI(Op, 16).
+ if (Op.getOpcode() == X86ISD::VSRAI && Op.getConstantOperandVal(1) == 16 &&
+ N->isOnlyUserOf(Op.getNode())) {
+ return DAG.getNode(X86ISD::VSRLI, SDLoc(N), VT, Op.getOperand(0),
+ Op.getOperand(1));
+ }
+ return SDValue();
+ };
+ SDValue ZeroN0 = GetZeroableOp(N0);
+ SDValue ZeroN1 = GetZeroableOp(N1);
+ if (!ZeroN0 && !ZeroN1)
+ return SDValue();
+ N0 = ZeroN0 ? ZeroN0 : N0;
+ N1 = ZeroN1 ? ZeroN1 : N1;
+
// Use SplitOpsAndApply to handle AVX splitting.
auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
ArrayRef<SDValue> Ops) {
@@ -43412,8 +44729,6 @@ static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Optimize a single multiply with constant into two operations in order to
-/// implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -43428,8 +44743,11 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() && VT.isVector())
return reduceVMULWidth(N, DAG, Subtarget);
+ // Optimize a single multiply with constant into two operations in order to
+ // implement it with two cheaper instructions, e.g. LEA + SHL, LEA + LEA.
if (!MulConstantOptimization)
return SDValue();
+
// An imul is usually smaller than the alternative sequence.
if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
@@ -43569,9 +44887,7 @@ static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG,
"SRL or SRA node is required here!");
SDLoc DL(N);
- // Only do this with SSE4.1. On earlier targets reduceVMULWidth will expand
- // the multiply.
- if (!Subtarget.hasSSE41())
+ if (!Subtarget.hasSSE2())
return SDValue();
// The operation feeding into the shift must be a multiply.
@@ -43964,7 +45280,7 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
APInt Undefs(NumDstElts, 0);
- SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt));
+ SmallVector<APInt, 32> Bits(NumDstElts, APInt::getZero(DstBitsPerElt));
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
@@ -43994,9 +45310,9 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
if (Val.isIntN(DstBitsPerElt))
Val = Val.trunc(DstBitsPerElt);
else if (Val.isNegative())
- Val = APInt::getNullValue(DstBitsPerElt);
+ Val = APInt::getZero(DstBitsPerElt);
else
- Val = APInt::getAllOnesValue(DstBitsPerElt);
+ Val = APInt::getAllOnes(DstBitsPerElt);
}
Bits[Lane * NumDstEltsPerLane + Elt] = Val;
}
@@ -44048,6 +45364,14 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType());
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1);
}
+
+ // Try again with pack(*_extend_vector_inreg, undef).
+ unsigned VecInRegOpc = IsSigned ? ISD::SIGN_EXTEND_VECTOR_INREG
+ : ISD::ZERO_EXTEND_VECTOR_INREG;
+ if (N0.getOpcode() == VecInRegOpc && N1.isUndef() &&
+ N0.getOperand(0).getScalarValueSizeInBits() < DstBitsPerElt)
+ return getEXTEND_VECTOR_INREG(ExtOpc, SDLoc(N), VT, N0.getOperand(0),
+ DAG);
}
// Attempt to combine as shuffle.
@@ -44066,47 +45390,25 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
"Unexpected horizontal add/sub opcode");
if (!shouldUseHorizontalOp(true, DAG, Subtarget)) {
- // For slow-hop targets, if we have a hop with a single op, see if we already
- // have another user that we can reuse and shuffle the result.
MVT VT = N->getSimpleValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if (VT.is128BitVector() && LHS == RHS) {
- for (SDNode *User : LHS->uses()) {
- if (User != N && User->getOpcode() == N->getOpcode()) {
- MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
- if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) {
- return DAG.getBitcast(
- VT,
- DAG.getVectorShuffle(ShufVT, SDLoc(N),
- DAG.getBitcast(ShufVT, SDValue(User, 0)),
- DAG.getUNDEF(ShufVT), {0, 1, 0, 1}));
- }
- if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) {
- return DAG.getBitcast(
- VT,
- DAG.getVectorShuffle(ShufVT, SDLoc(N),
- DAG.getBitcast(ShufVT, SDValue(User, 0)),
- DAG.getUNDEF(ShufVT), {2, 3, 2, 3}));
- }
- }
- }
- }
// HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).
if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&
LHS.getOpcode() == RHS.getOpcode() &&
- LHS.getValueType() == RHS.getValueType()) {
+ LHS.getValueType() == RHS.getValueType() &&
+ N->isOnlyUserOf(LHS.getNode()) && N->isOnlyUserOf(RHS.getNode())) {
SDValue LHS0 = LHS.getOperand(0);
- SDValue RHS0 = LHS.getOperand(1);
- SDValue LHS1 = RHS.getOperand(0);
+ SDValue LHS1 = LHS.getOperand(1);
+ SDValue RHS0 = RHS.getOperand(0);
SDValue RHS1 = RHS.getOperand(1);
- if ((LHS0 == RHS0 || LHS0.isUndef() || RHS0.isUndef()) &&
- (LHS1 == RHS1 || LHS1.isUndef() || RHS1.isUndef())) {
+ if ((LHS0 == LHS1 || LHS0.isUndef() || LHS1.isUndef()) &&
+ (RHS0 == RHS1 || RHS0.isUndef() || RHS1.isUndef())) {
SDLoc DL(N);
SDValue Res = DAG.getNode(LHS.getOpcode(), DL, LHS.getValueType(),
- LHS0.isUndef() ? RHS0 : LHS0,
- LHS1.isUndef() ? RHS1 : LHS1);
+ LHS0.isUndef() ? LHS1 : LHS0,
+ RHS0.isUndef() ? RHS1 : RHS0);
MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
Res = DAG.getBitcast(ShufVT, Res);
SDValue NewLHS =
@@ -44115,9 +45417,8 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
SDValue NewRHS =
DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res,
getV4X86ShuffleImm8ForMask({2, 3, 2, 3}, DL, DAG));
- DAG.ReplaceAllUsesOfValueWith(LHS, DAG.getBitcast(VT, NewLHS));
- DAG.ReplaceAllUsesOfValueWith(RHS, DAG.getBitcast(VT, NewRHS));
- return SDValue(N, 0);
+ return DAG.getNode(N->getOpcode(), DL, VT, DAG.getBitcast(VT, NewLHS),
+ DAG.getBitcast(VT, NewRHS));
}
}
}
@@ -44154,7 +45455,7 @@ static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG,
APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
KnownZero, DCI))
return SDValue(N, 0);
@@ -44256,8 +45557,8 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBitsPerElt),
+ DCI))
return SDValue(N, 0);
return SDValue();
@@ -44276,7 +45577,7 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnesValue(NumBitsPerElt), DCI))
+ APInt::getAllOnes(NumBitsPerElt), DCI))
return SDValue(N, 0);
}
@@ -44315,12 +45616,15 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
SDValue CMP01 = CMP0->getOperand(1);
EVT VT = CMP00.getValueType();
- if (VT == MVT::f32 || VT == MVT::f64) {
+ if (VT == MVT::f32 || VT == MVT::f64 ||
+ (VT == MVT::f16 && Subtarget.hasFP16())) {
bool ExpectingFlags = false;
// Check for any users that want flags:
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- !ExpectingFlags && UI != UE; ++UI)
- switch (UI->getOpcode()) {
+ for (const SDNode *U : N->uses()) {
+ if (ExpectingFlags)
+ break;
+
+ switch (U->getOpcode()) {
default:
case ISD::BR_CC:
case ISD::BRCOND:
@@ -44333,6 +45637,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
case ISD::ANY_EXTEND:
break;
}
+ }
if (!ExpectingFlags) {
enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0);
@@ -44396,7 +45701,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
}
/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
-static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineAndNotIntoANDNP(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::AND);
MVT VT = N->getSimpleValueType(0);
@@ -44543,17 +45848,19 @@ static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) {
return FPOpcode;
}
-/// If both input operands of a logic op are being cast from floating point
-/// types, try to convert this into a floating point logic node to avoid
-/// unnecessary moves from SSE to integer registers.
+/// If both input operands of a logic op are being cast from floating-point
+/// types or FP compares, try to convert this into a floating-point logic node
+/// to avoid unnecessary moves from SSE to integer registers.
static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST)
+ if (!((N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) ||
+ (N0.getOpcode() == ISD::SETCC && N1.getOpcode() == ISD::SETCC)))
return SDValue();
SDValue N00 = N0.getOperand(0);
@@ -44562,14 +45869,44 @@ static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
EVT N10Type = N10.getValueType();
// Ensure that both types are the same and are legal scalar fp types.
- if (N00Type != N10Type ||
- !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
- (Subtarget.hasSSE2() && N00Type == MVT::f64)))
+ if (N00Type != N10Type || !((Subtarget.hasSSE1() && N00Type == MVT::f32) ||
+ (Subtarget.hasSSE2() && N00Type == MVT::f64) ||
+ (Subtarget.hasFP16() && N00Type == MVT::f16)))
return SDValue();
- unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
- SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
- return DAG.getBitcast(VT, FPLogic);
+ if (N0.getOpcode() == ISD::BITCAST && !DCI.isBeforeLegalizeOps()) {
+ unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode());
+ SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+ return DAG.getBitcast(VT, FPLogic);
+ }
+
+ // The vector ISA for FP predicates is incomplete before AVX, so converting
+ // COMIS* to CMPS* may not be a win before AVX.
+ // TODO: Check types/predicates to see if they are available with SSE/SSE2.
+ if (!Subtarget.hasAVX() || VT != MVT::i1 || N0.getOpcode() != ISD::SETCC ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ // Convert scalar FP compares and logic to vector compares (COMIS* to CMPS*)
+ // and vector logic:
+ // logic (setcc N00, N01), (setcc N10, N11) -->
+ // extelt (logic (setcc (s2v N00), (s2v N01)), setcc (s2v N10), (s2v N11))), 0
+ unsigned NumElts = 128 / N00Type.getSizeInBits();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), N00Type, NumElts);
+ EVT BoolVecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
+ SDValue ZeroIndex = DAG.getVectorIdxConstant(0, DL);
+ SDValue N01 = N0.getOperand(1);
+ SDValue N11 = N1.getOperand(1);
+ SDValue Vec00 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N00);
+ SDValue Vec01 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N01);
+ SDValue Vec10 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N10);
+ SDValue Vec11 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, N11);
+ SDValue Setcc0 = DAG.getSetCC(DL, BoolVecVT, Vec00, Vec01,
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ SDValue Setcc1 = DAG.getSetCC(DL, BoolVecVT, Vec10, Vec11,
+ cast<CondCodeSDNode>(N1.getOperand(2))->get());
+ SDValue Logic = DAG.getNode(N->getOpcode(), DL, BoolVecVT, Setcc0, Setcc1);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Logic, ZeroIndex);
}
// Attempt to fold BITOP(MOVMSK(X),MOVMSK(Y)) -> MOVMSK(BITOP(X,Y))
@@ -44613,12 +45950,40 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue Op0 = peekThroughBitcasts(N->getOperand(0));
SDValue Op1 = peekThroughBitcasts(N->getOperand(1));
- EVT VT0 = Op0.getValueType();
- EVT VT1 = Op1.getValueType();
-
- if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger())
+ EVT VT = Op0.getValueType();
+ if (VT != Op1.getValueType() || !VT.isSimple() || !VT.isInteger())
return SDValue();
+ // Try to convert an "is positive" signbit masking operation into arithmetic
+ // shift and "andn". This saves a materialization of a -1 vector constant.
+ // The "is negative" variant should be handled more generally because it only
+ // requires "and" rather than "andn":
+ // and (pcmpgt X, -1), Y --> pandn (vsrai X, BitWidth - 1), Y
+ //
+ // This is limited to the original type to avoid producing even more bitcasts.
+ // If the bitcasts can't be eliminated, then it is unlikely that this fold
+ // will be profitable.
+ if (N->getValueType(0) == VT &&
+ supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRA)) {
+ SDValue X, Y;
+ if (Op1.hasOneUse() && Op1.getOpcode() == X86ISD::PCMPGT &&
+ isAllOnesOrAllOnesSplat(Op1.getOperand(1))) {
+ X = Op1.getOperand(0);
+ Y = Op0;
+ } else if (Op0.hasOneUse() && Op0.getOpcode() == X86ISD::PCMPGT &&
+ isAllOnesOrAllOnesSplat(Op0.getOperand(1))) {
+ X = Op0.getOperand(0);
+ Y = Op1;
+ }
+ if (X && Y) {
+ SDLoc DL(N);
+ SDValue Sra =
+ getTargetVShiftByConstNode(X86ISD::VSRAI, DL, VT.getSimpleVT(), X,
+ VT.getScalarSizeInBits() - 1, DAG);
+ return DAG.getNode(X86ISD::ANDNP, DL, VT, Sra, Y);
+ }
+ }
+
APInt SplatVal;
if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
!SplatVal.isMask())
@@ -44628,17 +45993,17 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
if (isBitwiseNot(Op0))
return SDValue();
- if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL))
+ if (!supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRL))
return SDValue();
- unsigned EltBitWidth = VT0.getScalarSizeInBits();
+ unsigned EltBitWidth = VT.getScalarSizeInBits();
if (EltBitWidth != DAG.ComputeNumSignBits(Op0))
return SDValue();
SDLoc DL(N);
unsigned ShiftVal = SplatVal.countTrailingOnes();
SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
- SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
+ SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}
@@ -44881,16 +46246,16 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
+ return FPLogic;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
- if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
- return FPLogic;
-
- if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
+ if (SDValue R = combineAndNotIntoANDNP(N, DAG))
return R;
if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
@@ -44921,7 +46286,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) &&
getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) &&
llvm::all_of(EltBits, [](const APInt &M) {
- return M.isNullValue() || M.isAllOnesValue();
+ return M.isZero() || M.isAllOnes();
})) {
unsigned NumElts = SrcVecVT.getVectorNumElements();
unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8;
@@ -44933,8 +46298,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (UndefElts[i])
continue;
int VecIdx = Scale * Idx + i;
- ShuffleMask[VecIdx] =
- EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx;
+ ShuffleMask[VecIdx] = EltBits[i].isZero() ? SM_SentinelZero : VecIdx;
}
if (SDValue Shuffle = combineX86ShufflesRecursively(
@@ -44956,7 +46320,8 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
assert(N->getOpcode() == ISD::OR && "Unexpected Opcode");
MVT VT = N->getSimpleValueType(0);
- if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0)
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if (!VT.isVector() || (EltSizeInBits % 8) != 0)
return SDValue();
SDValue N0 = peekThroughBitcasts(N->getOperand(0));
@@ -44966,9 +46331,7 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
// On XOP we'll lower to PCMOV so accept one use. With AVX512, we can use
// VPTERNLOG. Otherwise only do this if either mask has multiple uses already.
- bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) ||
- Subtarget.hasVLX();
- if (!(Subtarget.hasXOP() || UseVPTERNLOG ||
+ if (!(Subtarget.hasXOP() || useVPTERNLOG(Subtarget, VT) ||
!N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse()))
return SDValue();
@@ -44992,13 +46355,19 @@ static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
- if (UseVPTERNLOG) {
- // Emit a VPTERNLOG node directly.
- SDValue A = DAG.getBitcast(VT, N0.getOperand(1));
- SDValue B = DAG.getBitcast(VT, N0.getOperand(0));
- SDValue C = DAG.getBitcast(VT, N1.getOperand(0));
+ if (useVPTERNLOG(Subtarget, VT)) {
+ // Emit a VPTERNLOG node directly - 0xCA is the imm code for A?B:C.
+ // VPTERNLOG is only available as vXi32/64-bit types.
+ MVT OpSVT = EltSizeInBits == 32 ? MVT::i32 : MVT::i64;
+ MVT OpVT =
+ MVT::getVectorVT(OpSVT, VT.getSizeInBits() / OpSVT.getSizeInBits());
+ SDValue A = DAG.getBitcast(OpVT, N0.getOperand(1));
+ SDValue B = DAG.getBitcast(OpVT, N0.getOperand(0));
+ SDValue C = DAG.getBitcast(OpVT, N1.getOperand(0));
SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8);
- return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm);
+ SDValue Res = getAVX512Node(X86ISD::VPTERNLOG, DL, OpVT, {A, B, C, Imm},
+ DAG, Subtarget);
+ return DAG.getBitcast(VT, Res);
}
SDValue X = N->getOperand(0);
@@ -45247,15 +46616,15 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
+ return FPLogic;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
return R;
- if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
- return FPLogic;
-
if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget))
return R;
@@ -45476,7 +46845,7 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) {
APInt SignedMax, SignedMin;
if (MatchPackUS) {
- SignedMax = APInt::getAllOnesValue(NumDstBits).zext(NumSrcBits);
+ SignedMax = APInt::getAllOnes(NumDstBits).zext(NumSrcBits);
SignedMin = APInt(NumSrcBits, 0);
} else {
SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
@@ -45641,6 +47010,11 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
});
};
+ auto IsZExtLike = [DAG = &DAG, ScalarVT](SDValue V) {
+ unsigned MaxActiveBits = DAG->computeKnownBits(V).countMaxActiveBits();
+ return MaxActiveBits <= ScalarVT.getSizeInBits();
+ };
+
// Check if each element of the vector is right-shifted by one.
SDValue LHS = In.getOperand(0);
SDValue RHS = In.getOperand(1);
@@ -45659,23 +47033,25 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops);
};
- auto AVGSplitter = [&](SDValue Op0, SDValue Op1) {
+ auto AVGSplitter = [&](std::array<SDValue, 2> Ops) {
+ for (SDValue &Op : Ops)
+ if (Op.getValueType() != VT)
+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
// Pad to a power-of-2 vector, split+apply and extract the original vector.
unsigned NumElemsPow2 = PowerOf2Ceil(NumElems);
EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2);
if (NumElemsPow2 != NumElems) {
- SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT));
- SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT));
- for (unsigned i = 0; i != NumElems; ++i) {
- SDValue Idx = DAG.getIntPtrConstant(i, DL);
- Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx);
- Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx);
+ for (SDValue &Op : Ops) {
+ SmallVector<SDValue, 32> EltsOfOp(NumElemsPow2, DAG.getUNDEF(ScalarVT));
+ for (unsigned i = 0; i != NumElems; ++i) {
+ SDValue Idx = DAG.getIntPtrConstant(i, DL);
+ EltsOfOp[i] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op, Idx);
+ }
+ Op = DAG.getBuildVector(Pow2VT, DL, EltsOfOp);
}
- Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0);
- Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1);
}
- SDValue Res =
- SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder);
+ SDValue Res = SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, Ops, AVGBuilder);
if (NumElemsPow2 == NumElems)
return Res;
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
@@ -45685,14 +47061,12 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// Take care of the case when one of the operands is a constant vector whose
// element is in the range [1, 256].
if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
- Operands[0].getOpcode() == ISD::ZERO_EXTEND &&
- Operands[0].getOperand(0).getValueType() == VT) {
+ IsZExtLike(Operands[0])) {
// The pattern is detected. Subtract one from the constant vector, then
// demote it and emit X86ISD::AVG instruction.
SDValue VecOnes = DAG.getConstant(1, DL, InVT);
Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes);
- Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
- return AVGSplitter(Operands[0].getOperand(0), Operands[1]);
+ return AVGSplitter({Operands[0], Operands[1]});
}
// Matches 'add like' patterns: add(Op0,Op1) + zext(or(Op0,Op1)).
@@ -45731,15 +47105,12 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
// Check if Operands[0] and Operands[1] are results of type promotion.
for (int j = 0; j < 2; ++j)
- if (Operands[j].getValueType() != VT) {
- if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
- Operands[j].getOperand(0).getValueType() != VT)
+ if (Operands[j].getValueType() != VT)
+ if (!IsZExtLike(Operands[j]))
return SDValue();
- Operands[j] = Operands[j].getOperand(0);
- }
// The pattern is detected, emit X86ISD::AVG instruction(s).
- return AVGSplitter(Operands[0], Operands[1]);
+ return AVGSplitter({Operands[0], Operands[1]});
}
return SDValue();
@@ -46685,11 +48056,171 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Try to combine the following nodes
+// t29: i64 = X86ISD::Wrapper TargetConstantPool:i64
+// <i32 -2147483648[float -0.000000e+00]> 0
+// t27: v16i32[v16f32],ch = X86ISD::VBROADCAST_LOAD
+// <(load 4 from constant-pool)> t0, t29
+// [t30: v16i32 = bitcast t27]
+// t6: v16i32 = xor t7, t27[t30]
+// t11: v16f32 = bitcast t6
+// t21: v16f32 = X86ISD::VFMULC[X86ISD::VCFMULC] t11, t8
+// into X86ISD::VFCMULC[X86ISD::VFMULC] if possible:
+// t22: v16f32 = bitcast t7
+// t23: v16f32 = X86ISD::VFCMULC[X86ISD::VFMULC] t8, t22
+// t24: v32f16 = bitcast t23
+static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ int CombineOpcode =
+ N->getOpcode() == X86ISD::VFCMULC ? X86ISD::VFMULC : X86ISD::VFCMULC;
+ auto isConjugationConstant = [](const Constant *c) {
+ if (const auto *CI = dyn_cast<ConstantInt>(c)) {
+ APInt ConjugationInt32 = APInt(32, 0x80000000, true);
+ APInt ConjugationInt64 = APInt(64, 0x8000000080000000ULL, true);
+ switch (CI->getBitWidth()) {
+ case 16:
+ return false;
+ case 32:
+ return CI->getValue() == ConjugationInt32;
+ case 64:
+ return CI->getValue() == ConjugationInt64;
+ default:
+ llvm_unreachable("Unexpected bit width");
+ }
+ }
+ if (const auto *CF = dyn_cast<ConstantFP>(c))
+ return CF->isNegativeZeroValue();
+ return false;
+ };
+ auto combineConjugation = [&](SDValue &r) {
+ if (LHS->getOpcode() == ISD::BITCAST && RHS.hasOneUse()) {
+ SDValue XOR = LHS.getOperand(0);
+ if (XOR->getOpcode() == ISD::XOR && XOR.hasOneUse()) {
+ SDValue XORRHS = XOR.getOperand(1);
+ if (XORRHS.getOpcode() == ISD::BITCAST && XORRHS.hasOneUse())
+ XORRHS = XORRHS.getOperand(0);
+ if (XORRHS.getOpcode() == X86ISD::VBROADCAST_LOAD &&
+ XORRHS.getOperand(1).getNumOperands()) {
+ ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(XORRHS.getOperand(1).getOperand(0));
+ if (CP && isConjugationConstant(CP->getConstVal())) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ SDValue I2F = DAG.getBitcast(VT, LHS.getOperand(0).getOperand(0));
+ SDValue FCMulC = DAG.getNode(CombineOpcode, SDLoc(N), VT, RHS, I2F);
+ r = DAG.getBitcast(VT, FCMulC);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ };
+ SDValue Res;
+ if (combineConjugation(Res))
+ return Res;
+ std::swap(LHS, RHS);
+ if (combineConjugation(Res))
+ return Res;
+ return Res;
+}
+
+// Try to combine the following nodes:
+// FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, C)) to FMA(B, C, A)
+static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ auto AllowContract = [&DAG](const SDNodeFlags &Flags) {
+ return DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
+ Flags.hasAllowContract();
+ };
+
+ auto HasNoSignedZero = [&DAG](const SDNodeFlags &Flags) {
+ return DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros();
+ };
+ auto IsVectorAllNegativeZero = [](const SDNode *N) {
+ if (N->getOpcode() != X86ISD::VBROADCAST_LOAD)
+ return false;
+ assert(N->getSimpleValueType(0).getScalarType() == MVT::f32 &&
+ "Unexpected vector type!");
+ if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(N->getOperand(1)->getOperand(0))) {
+ APInt AI = APInt(32, 0x80008000, true);
+ if (const auto *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
+ return CI->getValue() == AI;
+ if (const auto *CF = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CF->getValue() == APFloat(APFloat::IEEEsingle(), AI);
+ }
+ return false;
+ };
+
+ if (N->getOpcode() != ISD::FADD || !Subtarget.hasFP16() ||
+ !AllowContract(N->getFlags()))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::v8f16 && VT != MVT::v16f16 && VT != MVT::v32f16)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ bool IsConj;
+ SDValue FAddOp1, MulOp0, MulOp1;
+ auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &AllowContract,
+ &IsVectorAllNegativeZero,
+ &HasNoSignedZero](SDValue N) -> bool {
+ if (!N.hasOneUse() || N.getOpcode() != ISD::BITCAST)
+ return false;
+ SDValue Op0 = N.getOperand(0);
+ unsigned Opcode = Op0.getOpcode();
+ if (Op0.hasOneUse() && AllowContract(Op0->getFlags())) {
+ if ((Opcode == X86ISD::VFMULC || Opcode == X86ISD::VFCMULC)) {
+ MulOp0 = Op0.getOperand(0);
+ MulOp1 = Op0.getOperand(1);
+ IsConj = Opcode == X86ISD::VFCMULC;
+ return true;
+ }
+ if ((Opcode == X86ISD::VFMADDC || Opcode == X86ISD::VFCMADDC) &&
+ ((ISD::isBuildVectorAllZeros(Op0->getOperand(2).getNode()) &&
+ HasNoSignedZero(Op0->getFlags())) ||
+ IsVectorAllNegativeZero(Op0->getOperand(2).getNode()))) {
+ MulOp0 = Op0.getOperand(0);
+ MulOp1 = Op0.getOperand(1);
+ IsConj = Opcode == X86ISD::VFCMADDC;
+ return true;
+ }
+ }
+ return false;
+ };
+
+ if (GetCFmulFrom(LHS))
+ FAddOp1 = RHS;
+ else if (GetCFmulFrom(RHS))
+ FAddOp1 = LHS;
+ else
+ return SDValue();
+
+ MVT CVT = MVT::getVectorVT(MVT::f32, VT.getVectorNumElements() / 2);
+ FAddOp1 = DAG.getBitcast(CVT, FAddOp1);
+ unsigned NewOp = IsConj ? X86ISD::VFCMADDC : X86ISD::VFMADDC;
+ // FIXME: How do we handle when fast math flags of FADD are different from
+ // CFMUL's?
+ SDValue CFmul =
+ DAG.getNode(NewOp, SDLoc(N), CVT, MulOp0, MulOp1, FAddOp1, N->getFlags());
+ return DAG.getBitcast(VT, CFmul);
+}
+
/// Do target-specific dag combines on floating-point adds/subs.
static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget))
return HOp;
+
+ if (SDValue COp = combineFaddCFmul(N, DAG, Subtarget))
+ return COp;
+
return SDValue();
}
@@ -46922,7 +48453,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
// SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
- In, APInt::getAllOnesValue(VT.getVectorNumElements()))) {
+ In, APInt::getAllOnes(VT.getVectorNumElements()))) {
if (*ShAmt == MinSignBits) {
SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
@@ -47178,7 +48709,7 @@ static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits()));
+ APInt DemandedMask(APInt::getAllOnes(VT.getScalarSizeInBits()));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
@@ -47498,6 +49029,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
return R;
+ if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
+ return FPLogic;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -47546,9 +49080,6 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
}
}
- if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
- return FPLogic;
-
return combineFneg(N, DAG, DCI, Subtarget);
}
@@ -47562,7 +49093,7 @@ static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnesValue(NumBits));
+ APInt DemandedMask(APInt::getAllOnes(NumBits));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
@@ -47704,6 +49235,7 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
(Subtarget.hasSSE2() && VT == MVT::f64) ||
+ (Subtarget.hasFP16() && VT == MVT::f16) ||
(VT.isVector() && TLI.isTypeLegal(VT))))
return SDValue();
@@ -47765,7 +49297,7 @@ static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt KnownUndef, KnownZero;
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
KnownZero, DCI))
return SDValue(N, 0);
@@ -48265,6 +49797,9 @@ static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG,
SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64)
return SDValue();
+ // We don't have CMPP Instruction for vxf16
+ if (N0.getOperand(0).getValueType().getVectorElementType() == MVT::f16)
+ return SDValue();
// We can only do this if the vector size in 256 bits or less.
unsigned Size = VT.getSizeInBits();
if (Size > 256 && Subtarget.useAVX512Regs())
@@ -48366,7 +49901,9 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
}
EVT ScalarVT = VT.getScalarType();
- if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
+ if (((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
+ !Subtarget.hasAnyFMA()) &&
+ !(ScalarVT == MVT::f16 && Subtarget.hasFP16()))
return SDValue();
auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
@@ -48873,7 +50410,7 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedMask(APInt::getAllOnesValue(NumBits));
+ APInt DemandedMask(APInt::getAllOnes(NumBits));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);
@@ -48881,9 +50418,44 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ auto *MemOp = cast<X86MaskedGatherScatterSDNode>(N);
+ SDValue BasePtr = MemOp->getBasePtr();
+ SDValue Index = MemOp->getIndex();
+ SDValue Scale = MemOp->getScale();
+ SDValue Mask = MemOp->getMask();
+
+ // Attempt to fold an index scale into the scale value directly.
+ // For smaller indices, implicit sext is performed BEFORE scale, preventing
+ // this fold under most circumstances.
+ // TODO: Move this into X86DAGToDAGISel::matchVectorAddressRecursively?
+ if ((Index.getOpcode() == X86ISD::VSHLI ||
+ (Index.getOpcode() == ISD::ADD &&
+ Index.getOperand(0) == Index.getOperand(1))) &&
+ isa<ConstantSDNode>(Scale) &&
+ BasePtr.getScalarValueSizeInBits() == Index.getScalarValueSizeInBits()) {
+ unsigned ShiftAmt =
+ Index.getOpcode() == ISD::ADD ? 1 : Index.getConstantOperandVal(1);
+ uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
+ uint64_t NewScaleAmt = ScaleAmt * (1ULL << ShiftAmt);
+ if (isPowerOf2_64(NewScaleAmt) && NewScaleAmt <= 8) {
+ SDValue NewIndex = Index.getOperand(0);
+ SDValue NewScale =
+ DAG.getTargetConstant(NewScaleAmt, SDLoc(N), Scale.getValueType());
+ if (N->getOpcode() == X86ISD::MGATHER)
+ return getAVX2GatherNode(N->getOpcode(), SDValue(N, 0), DAG,
+ MemOp->getOperand(1), Mask,
+ MemOp->getBasePtr(), NewIndex, NewScale,
+ MemOp->getChain(), Subtarget);
+ if (N->getOpcode() == X86ISD::MSCATTER)
+ return getScatterNode(N->getOpcode(), SDValue(N, 0), DAG,
+ MemOp->getOperand(1), Mask, MemOp->getBasePtr(),
+ NewIndex, NewScale, MemOp->getChain(), Subtarget);
+ }
+ }
+
// With vector masks we only demand the upper bit of the mask.
- SDValue Mask = cast<X86MaskedGatherScatterSDNode>(N)->getMask();
if (Mask.getScalarValueSizeInBits() != 1) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits()));
@@ -48962,6 +50534,48 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG,
}
}
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
+ // Try to move splat constant adders from the index operand to the base
+ // pointer operand. Taking care to multiply by the scale. We can only do
+ // this when index element type is the same as the pointer type.
+ // Otherwise we need to be sure the math doesn't wrap before the scale.
+ if (Index.getOpcode() == ISD::ADD &&
+ Index.getValueType().getVectorElementType() == PtrVT &&
+ isa<ConstantSDNode>(Scale)) {
+ uint64_t ScaleAmt = cast<ConstantSDNode>(Scale)->getZExtValue();
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(Index.getOperand(1))) {
+ BitVector UndefElts;
+ if (ConstantSDNode *C = BV->getConstantSplatNode(&UndefElts)) {
+ // FIXME: Allow non-constant?
+ if (UndefElts.none()) {
+ // Apply the scale.
+ APInt Adder = C->getAPIntValue() * ScaleAmt;
+ // Add it to the existing base.
+ Base = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
+ DAG.getConstant(Adder, DL, PtrVT));
+ Index = Index.getOperand(0);
+ return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
+ }
+ }
+
+ // It's also possible base is just a constant. In that case, just
+ // replace it with 0 and move the displacement into the index.
+ if (BV->isConstant() && isa<ConstantSDNode>(Base) &&
+ isOneConstant(Scale)) {
+ SDValue Splat = DAG.getSplatBuildVector(Index.getValueType(), DL, Base);
+ // Combine the constant build_vector and the constant base.
+ Splat = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
+ Index.getOperand(1), Splat);
+ // Add to the LHS of the original Index add.
+ Index = DAG.getNode(ISD::ADD, DL, Index.getValueType(),
+ Index.getOperand(0), Splat);
+ Base = DAG.getConstant(0, DL, Base.getValueType());
+ return rebuildGatherScatter(GorS, Index, Base, Scale, DAG);
+ }
+ }
+ }
+
if (DCI.isBeforeLegalizeOps()) {
unsigned IndexWidth = Index.getScalarValueSizeInBits();
@@ -49120,10 +50734,31 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
+ // UINT_TO_FP(vXi1~15) -> UINT_TO_FP(ZEXT(vXi1~15 to vXi16))
+ // UINT_TO_FP(vXi17~31) -> UINT_TO_FP(ZEXT(vXi17~31 to vXi32))
+ // UINT_TO_FP(vXi33~63) -> UINT_TO_FP(ZEXT(vXi33~63 to vXi64))
+ if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+ unsigned ScalarSize = InVT.getScalarSizeInBits();
+ if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ return SDValue();
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+ ScalarSize < 16 ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_UINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
+ return DAG.getNode(ISD::UINT_TO_FP, dl, VT, P);
+ }
+
// UINT_TO_FP(vXi1) -> SINT_TO_FP(ZEXT(vXi1 to vXi32))
// UINT_TO_FP(vXi8) -> SINT_TO_FP(ZEXT(vXi8 to vXi32))
// UINT_TO_FP(vXi16) -> SINT_TO_FP(ZEXT(vXi16 to vXi32))
- if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+ VT.getScalarType() != MVT::f16) {
SDLoc dl(N);
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0);
@@ -49162,10 +50797,31 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT InVT = Op0.getValueType();
+ // SINT_TO_FP(vXi1~15) -> SINT_TO_FP(SEXT(vXi1~15 to vXi16))
+ // SINT_TO_FP(vXi17~31) -> SINT_TO_FP(SEXT(vXi17~31 to vXi32))
+ // SINT_TO_FP(vXi33~63) -> SINT_TO_FP(SEXT(vXi33~63 to vXi64))
+ if (InVT.isVector() && VT.getVectorElementType() == MVT::f16) {
+ unsigned ScalarSize = InVT.getScalarSizeInBits();
+ if (ScalarSize == 16 || ScalarSize == 32 || ScalarSize >= 64)
+ return SDValue();
+ SDLoc dl(N);
+ EVT DstVT = EVT::getVectorVT(*DAG.getContext(),
+ ScalarSize < 16 ? MVT::i16
+ : ScalarSize < 32 ? MVT::i32
+ : MVT::i64,
+ InVT.getVectorNumElements());
+ SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
+ if (IsStrict)
+ return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+ {N->getOperand(0), P});
+ return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
+ }
+
// SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
// SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
// SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
- if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) {
+ if (InVT.isVector() && InVT.getScalarSizeInBits() < 32 &&
+ VT.getScalarType() != MVT::f16) {
SDLoc dl(N);
EVT DstVT = InVT.changeVectorElementType(MVT::i32);
SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0);
@@ -49244,10 +50900,7 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
static bool needCarryOrOverflowFlag(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
- for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
-
+ for (const SDNode *User : Flags->uses()) {
X86::CondCode CC;
switch (User->getOpcode()) {
default:
@@ -49282,10 +50935,7 @@ static bool needCarryOrOverflowFlag(SDValue Flags) {
static bool onlyZeroFlagUsed(SDValue Flags) {
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
- for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
-
+ for (const SDNode *User : Flags->uses()) {
unsigned CCOpNo;
switch (User->getOpcode()) {
default:
@@ -49534,8 +51184,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// the general case below.
auto *ConstantX = dyn_cast<ConstantSDNode>(X);
if (ConstantX) {
- if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) ||
- (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) {
+ if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_B && ConstantX->isZero())) {
// This is a complicated way to get -1 or 0 from the carry flag:
// -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
// 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
@@ -49544,8 +51194,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
Y.getOperand(1));
}
- if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) ||
- (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) {
+ if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_A && ConstantX->isZero())) {
SDValue EFLAGS = Y->getOperand(1);
if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
EFLAGS.getValueType().isInteger() &&
@@ -49643,8 +51293,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// fake operands:
// 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
// -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
- if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) ||
- (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) {
+ if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {
SDValue Zero = DAG.getConstant(0, DL, ZVT);
SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
@@ -49657,8 +51307,8 @@ static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
// with fake operands:
// 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
// -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
- if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) ||
- (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) {
+ if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {
SDValue One = DAG.getConstant(1, DL, ZVT);
SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
@@ -49932,6 +51582,50 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
PMADDBuilder);
}
+// ADD(VPMADDWD(X,Y),VPMADDWD(Z,W)) -> VPMADDWD(SHUFFLE(X,Z), SHUFFLE(Y,W))
+// If upper element in each pair of both VPMADDWD are zero then we can merge
+// the operand elements and use the implicit add of VPMADDWD.
+// TODO: Add support for VPMADDUBSW (which isn't commutable).
+static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1,
+ const SDLoc &DL, EVT VT) {
+ if (N0.getOpcode() != N1.getOpcode() || N0.getOpcode() != X86ISD::VPMADDWD)
+ return SDValue();
+
+ // TODO: Add 256/512-bit support once VPMADDWD combines with shuffles.
+ if (VT.getSizeInBits() > 128)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT OpVT = N0.getOperand(0).getSimpleValueType();
+ APInt DemandedBits = APInt::getAllOnes(OpVT.getScalarSizeInBits());
+ APInt DemandedHiElts = APInt::getSplat(2 * NumElts, APInt(2, 2));
+
+ bool Op0HiZero =
+ DAG.MaskedValueIsZero(N0.getOperand(0), DemandedBits, DemandedHiElts) ||
+ DAG.MaskedValueIsZero(N0.getOperand(1), DemandedBits, DemandedHiElts);
+ bool Op1HiZero =
+ DAG.MaskedValueIsZero(N1.getOperand(0), DemandedBits, DemandedHiElts) ||
+ DAG.MaskedValueIsZero(N1.getOperand(1), DemandedBits, DemandedHiElts);
+
+ // TODO: Check for zero lower elements once we have actual codegen that
+ // creates them.
+ if (!Op0HiZero || !Op1HiZero)
+ return SDValue();
+
+ // Create a shuffle mask packing the lower elements from each VPMADDWD.
+ SmallVector<int> Mask;
+ for (int i = 0; i != (int)NumElts; ++i) {
+ Mask.push_back(2 * i);
+ Mask.push_back(2 * (i + NumElts));
+ }
+
+ SDValue LHS =
+ DAG.getVectorShuffle(OpVT, DL, N0.getOperand(0), N1.getOperand(0), Mask);
+ SDValue RHS =
+ DAG.getVectorShuffle(OpVT, DL, N0.getOperand(1), N1.getOperand(1), Mask);
+ return DAG.getNode(X86ISD::VPMADDWD, DL, VT, LHS, RHS);
+}
+
/// CMOV of constants requires materializing constant operands in registers.
/// Try to fold those constants into an 'add' instruction to reduce instruction
/// count. We do this with CMOV rather the generic 'select' because there are
@@ -49961,11 +51655,34 @@ static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
if (!isSuitableCmov(Cmov))
return SDValue();
- // add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)
EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FalseOp = Cmov.getOperand(0);
SDValue TrueOp = Cmov.getOperand(1);
+
+ // We will push the add through the select, but we can potentially do better
+ // if we know there is another add in the sequence and this is pointer math.
+ // In that case, we can absorb an add into the trailing memory op and avoid
+ // a 3-operand LEA which is likely slower than a 2-operand LEA.
+ // TODO: If target has "slow3OpsLEA", do this even without the trailing memop?
+ if (OtherOp.getOpcode() == ISD::ADD && OtherOp.hasOneUse() &&
+ !isa<ConstantSDNode>(OtherOp.getOperand(0)) &&
+ all_of(N->uses(), [&](SDNode *Use) {
+ auto *MemNode = dyn_cast<MemSDNode>(Use);
+ return MemNode && MemNode->getBasePtr().getNode() == N;
+ })) {
+ // add (cmov C1, C2), add (X, Y) --> add (cmov (add X, C1), (add X, C2)), Y
+ // TODO: We are arbitrarily choosing op0 as the 1st piece of the sum, but
+ // it is possible that choosing op1 might be better.
+ SDValue X = OtherOp.getOperand(0), Y = OtherOp.getOperand(1);
+ FalseOp = DAG.getNode(ISD::ADD, DL, VT, X, FalseOp);
+ TrueOp = DAG.getNode(ISD::ADD, DL, VT, X, TrueOp);
+ Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp,
+ Cmov.getOperand(2), Cmov.getOperand(3));
+ return DAG.getNode(ISD::ADD, DL, VT, Cmov, Y);
+ }
+
+ // add (cmov C1, C2), OtherOp --> cmov (add OtherOp, C1), (add OtherOp, C2)
FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp);
TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp);
return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2),
@@ -49978,13 +51695,16 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
+ SDLoc DL(N);
if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG))
return Select;
- if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
+ if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, DL, VT, Subtarget))
+ return MAdd;
+ if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, DL, VT, Subtarget))
return MAdd;
- if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
+ if (SDValue MAdd = combineAddOfPMADDWD(DAG, Op0, Op1, DL, VT))
return MAdd;
// Try to synthesize horizontal adds from adds of shuffles.
@@ -50001,7 +51721,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (Op0.getOpcode() == ISD::ZERO_EXTEND &&
Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
TLI.isTypeLegal(Op0.getOperand(0).getValueType())) {
- SDLoc DL(N);
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt);
}
@@ -50009,7 +51728,6 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (Op1.getOpcode() == ISD::ZERO_EXTEND &&
Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 &&
TLI.isTypeLegal(Op1.getOperand(0).getValueType())) {
- SDLoc DL(N);
SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt);
}
@@ -50018,6 +51736,47 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
return combineAddOrSubToADCOrSBB(N, DAG);
}
+// Try to fold (sub Y, cmovns X, -X) -> (add Y, cmovns -X, X) if the cmov
+// condition comes from the subtract node that produced -X. This matches the
+// cmov expansion for absolute value. By swapping the operands we convert abs
+// to nabs.
+static SDValue combineSubABS(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (N1.getOpcode() != X86ISD::CMOV || !N1.hasOneUse())
+ return SDValue();
+
+ X86::CondCode CC = (X86::CondCode)N1.getConstantOperandVal(2);
+ if (CC != X86::COND_S && CC != X86::COND_NS)
+ return SDValue();
+
+ // Condition should come from a negate operation.
+ SDValue Cond = N1.getOperand(3);
+ if (Cond.getOpcode() != X86ISD::SUB || !isNullConstant(Cond.getOperand(0)))
+ return SDValue();
+ assert(Cond.getResNo() == 1 && "Unexpected result number");
+
+ // Get the X and -X from the negate.
+ SDValue NegX = Cond.getValue(0);
+ SDValue X = Cond.getOperand(1);
+
+ SDValue FalseOp = N1.getOperand(0);
+ SDValue TrueOp = N1.getOperand(1);
+
+ // Cmov operands should be X and NegX. Order doesn't matter.
+ if (!(TrueOp == X && FalseOp == NegX) && !(TrueOp == NegX && FalseOp == X))
+ return SDValue();
+
+ // Build a new CMOV with the operands swapped.
+ SDLoc DL(N);
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VT, TrueOp, FalseOp,
+ N1.getOperand(2), Cond);
+ // Convert sub to add.
+ return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov);
+}
+
static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -50049,6 +51808,9 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd);
}
+ if (SDValue V = combineSubABS(N, DAG))
+ return V;
+
// Try to synthesize horizontal subs from subs of shuffles.
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
return V;
@@ -50099,43 +51861,30 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (Op0.getOpcode() == X86ISD::VBROADCAST)
return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0));
- // If this scalar/subvector broadcast_load is inserted into both halves, use
- // a larger broadcast_load. Update other uses to use an extracted subvector.
- if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
+ // If this simple subvector or scalar/subvector broadcast_load is inserted
+ // into both halves, use a larger broadcast_load. Update other uses to use
+ // an extracted subvector.
+ if (ISD::isNormalLoad(Op0.getNode()) ||
+ Op0.getOpcode() == X86ISD::VBROADCAST_LOAD ||
Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) {
- auto *MemIntr = cast<MemIntrinsicSDNode>(Op0);
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()};
- SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops,
- MemIntr->getMemoryVT(),
- MemIntr->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(
- Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
- DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1));
- return BcastLd;
- }
-
- // If this is a simple subvector load repeated across multiple lanes, then
- // broadcast the load. Update other uses to use an extracted subvector.
- if (auto *Ld = dyn_cast<LoadSDNode>(Op0)) {
- if (Ld->isSimple() && !Ld->isNonTemporal() &&
- Ld->getExtensionType() == ISD::NON_EXTLOAD) {
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()};
- SDValue BcastLd =
- DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops,
- Ld->getMemoryVT(), Ld->getMemOperand());
- DAG.ReplaceAllUsesOfValueWith(
- Op0,
- extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits()));
- DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1));
+ auto *Mem = cast<MemSDNode>(Op0);
+ unsigned Opc = Op0.getOpcode() == X86ISD::VBROADCAST_LOAD
+ ? X86ISD::VBROADCAST_LOAD
+ : X86ISD::SUBV_BROADCAST_LOAD;
+ if (SDValue BcastLd =
+ getBROADCAST_LOAD(Opc, DL, VT, Mem->getMemoryVT(), Mem, 0, DAG)) {
+ SDValue BcastSrc =
+ extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits());
+ DAG.ReplaceAllUsesOfValueWith(Op0, BcastSrc);
return BcastLd;
}
}
// concat_vectors(movddup(x),movddup(x)) -> broadcast(x)
if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 &&
- (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0))))
+ (Subtarget.hasAVX2() ||
+ X86::mayFoldLoadIntoBroadcastFromMem(Op0.getOperand(0),
+ VT.getScalarType(), Subtarget)))
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64,
Op0.getOperand(0),
@@ -50144,7 +51893,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// concat_vectors(scalar_to_vector(x),scalar_to_vector(x)) -> broadcast(x)
if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR &&
(Subtarget.hasAVX2() ||
- (EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) &&
+ (EltSizeInBits >= 32 &&
+ X86::mayFoldLoad(Op0.getOperand(0), Subtarget))) &&
Op0.getOperand(0).getValueType() == VT.getScalarType())
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0));
@@ -50773,7 +52523,7 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
// TODO: SimplifyDemandedBits instead?
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
- if (C->getAPIntValue().isOneValue())
+ if (C->getAPIntValue().isOne())
return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1,
Src.getOperand(0));
@@ -50782,7 +52532,7 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() &&
Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
- if (C->isNullValue())
+ if (C->isZero())
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0),
Src.getOperand(1));
@@ -50851,7 +52601,7 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
// PMULDQ/PMULUDQ only uses lower 32 bits from each vector element.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(64), DCI))
return SDValue(N, 0);
// If the input is an extend_invec and the SimplifyDemandedBits call didn't
@@ -50885,6 +52635,29 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Simplify VPMADDUBSW/VPMADDWD operations.
+static SDValue combineVPMADD(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Multiply by zero.
+ // Don't return LHS/RHS as it may contain UNDEFs.
+ if (ISD::isBuildVectorAllZeros(LHS.getNode()) ||
+ ISD::isBuildVectorAllZeros(RHS.getNode()))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ APInt KnownUndef, KnownZero;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
+ if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
+ KnownZero, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -50950,7 +52723,7 @@ static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG,
APInt KnownUndef, KnownZero;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ APInt DemandedElts = APInt::getAllOnes(VT.getVectorNumElements());
if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef,
KnownZero, DCI))
return SDValue(N, 0);
@@ -50988,6 +52761,9 @@ static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
+ if (Subtarget.hasFP16())
+ return SDValue();
+
bool IsStrict = N->isStrictFPOpcode();
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
@@ -51096,6 +52872,9 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
+ if (Subtarget.hasFP16())
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(0);
EVT SrcVT = Src.getValueType();
@@ -51156,8 +52935,7 @@ static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
unsigned NumBits = N->getSimpleValueType(0).getSizeInBits();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.SimplifyDemandedBits(SDValue(N, 0),
- APInt::getAllOnesValue(NumBits), DCI))
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnes(NumBits), DCI))
return SDValue(N, 0);
return SDValue();
@@ -51215,6 +52993,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return combineUIntToFP(N, DAG, Subtarget);
case ISD::FADD:
case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget);
+ case X86ISD::VFCMULC:
+ case X86ISD::VFMULC: return combineFMulcFCMulc(N, DAG, Subtarget);
case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget);
case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget);
case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI);
@@ -51289,6 +53069,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::MOVDDUP:
case X86ISD::MOVSS:
case X86ISD::MOVSD:
+ case X86ISD::MOVSH:
case X86ISD::VBROADCAST:
case X86ISD::VPPERM:
case X86ISD::VPERMI:
@@ -51319,13 +53100,16 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI);
case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget);
case X86ISD::MGATHER:
- case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI);
+ case X86ISD::MSCATTER:
+ return combineX86GatherScatter(N, DAG, DCI, Subtarget);
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI);
case X86ISD::PCMPEQ:
case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget);
case X86ISD::PMULDQ:
case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget);
+ case X86ISD::VPMADDUBSW:
+ case X86ISD::VPMADDWD: return combineVPMADD(N, DAG, DCI);
case X86ISD::KSHIFTL:
case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI);
case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget);
@@ -51451,7 +53235,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
- if (MayFoldLoad(N0) && IsFoldableRMW(N0, Op))
+ if (X86::mayFoldLoad(N0, Subtarget) && IsFoldableRMW(N0, Op))
return false;
break;
}
@@ -51466,11 +53250,11 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
SDValue N0 = Op.getOperand(0);
SDValue N1 = Op.getOperand(1);
// Avoid disabling potential load folding opportunities.
- if (MayFoldLoad(N1) &&
+ if (X86::mayFoldLoad(N1, Subtarget) &&
(!Commute || !isa<ConstantSDNode>(N0) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op))))
return false;
- if (MayFoldLoad(N0) &&
+ if (X86::mayFoldLoad(N0, Subtarget) &&
((Commute && !isa<ConstantSDNode>(N1)) ||
(Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op))))
return false;
@@ -51510,13 +53294,13 @@ static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) {
static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) {
if (AsmPieces.size() == 3 || AsmPieces.size() == 4) {
- if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") &&
- std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") &&
- std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) {
+ if (llvm::is_contained(AsmPieces, "~{cc}") &&
+ llvm::is_contained(AsmPieces, "~{flags}") &&
+ llvm::is_contained(AsmPieces, "~{fpsr}")) {
if (AsmPieces.size() == 3)
return true;
- else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}"))
+ else if (llvm::is_contained(AsmPieces, "~{dirflag}"))
return true;
}
}
@@ -52041,7 +53825,8 @@ static bool isGRClass(const TargetRegisterClass &RC) {
/// Check if \p RC is a vector register class.
/// I.e., FR* / VR* or one of their variant.
static bool isFRClass(const TargetRegisterClass &RC) {
- return RC.hasSuperClassEq(&X86::FR32XRegClass) ||
+ return RC.hasSuperClassEq(&X86::FR16XRegClass) ||
+ RC.hasSuperClassEq(&X86::FR32XRegClass) ||
RC.hasSuperClassEq(&X86::FR64XRegClass) ||
RC.hasSuperClassEq(&X86::VR128XRegClass) ||
RC.hasSuperClassEq(&X86::VR256XRegClass) ||
@@ -52166,6 +53951,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
switch (VT.SimpleTy) {
default: break;
// Scalar SSE types.
+ case MVT::f16:
+ if (VConstraint && Subtarget.hasFP16())
+ return std::make_pair(0U, &X86::FR16XRegClass);
+ break;
case MVT::f32:
case MVT::i32:
if (VConstraint && Subtarget.hasVLX())
@@ -52184,6 +53973,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
break;
// Vector types and fp128.
+ case MVT::v8f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::f128:
case MVT::v16i8:
case MVT::v8i16:
@@ -52195,6 +53988,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &X86::VR128XRegClass);
return std::make_pair(0U, &X86::VR128RegClass);
// AVX types.
+ case MVT::v16f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -52206,6 +54003,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasAVX())
return std::make_pair(0U, &X86::VR256RegClass);
break;
+ case MVT::v32f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v64i8:
case MVT::v32i16:
case MVT::v8f64:
@@ -52235,12 +54036,20 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
switch (VT.SimpleTy) {
default: break;
// Scalar SSE types.
+ case MVT::f16:
+ if (!Subtarget.hasFP16())
+ break;
+ return std::make_pair(X86::XMM0, &X86::FR16XRegClass);
case MVT::f32:
case MVT::i32:
return std::make_pair(X86::XMM0, &X86::FR32RegClass);
case MVT::f64:
case MVT::i64:
return std::make_pair(X86::XMM0, &X86::FR64RegClass);
+ case MVT::v8f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::f128:
case MVT::v16i8:
case MVT::v8i16:
@@ -52250,6 +54059,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::v2f64:
return std::make_pair(X86::XMM0, &X86::VR128RegClass);
// AVX types.
+ case MVT::v16f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v32i8:
case MVT::v16i16:
case MVT::v8i32:
@@ -52259,6 +54072,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (Subtarget.hasAVX())
return std::make_pair(X86::YMM0, &X86::VR256RegClass);
break;
+ case MVT::v32f16:
+ if (!Subtarget.hasFP16())
+ break;
+ LLVM_FALLTHROUGH;
case MVT::v64i8:
case MVT::v32i16:
case MVT::v8f64:
@@ -52416,7 +54233,9 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// find, ignoring the required type.
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
- if (VT == MVT::f32 || VT == MVT::i32)
+ if (VT == MVT::f16)
+ Res.second = &X86::FR16XRegClass;
+ else if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32XRegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
Res.second = &X86::FR64XRegClass;
@@ -52489,7 +54308,7 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
// integer division, leaving the division as-is is a loss even in terms of
// size, because it will have to be scalarized, while the alternative code
// sequence can be performed in vector form.
- bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
+ bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
return OptSize && !VT.isVector();
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 869857bcc0d6..6805cb75f0f2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -14,6 +14,7 @@
#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/TargetLowering.h"
namespace llvm {
@@ -460,6 +461,7 @@ namespace llvm {
MOVHLPS,
MOVSD,
MOVSS,
+ MOVSH,
UNPCKL,
UNPCKH,
VPERMILPV,
@@ -564,6 +566,27 @@ namespace llvm {
FMADDSUB_RND,
FMSUBADD_RND,
+ // AVX512-FP16 complex addition and multiplication.
+ VFMADDC,
+ VFMADDC_RND,
+ VFCMADDC,
+ VFCMADDC_RND,
+
+ VFMULC,
+ VFMULC_RND,
+ VFCMULC,
+ VFCMULC_RND,
+
+ VFMADDCSH,
+ VFMADDCSH_RND,
+ VFCMADDCSH,
+ VFCMADDCSH_RND,
+
+ VFMULCSH,
+ VFMULCSH_RND,
+ VFCMULCSH,
+ VFCMULCSH_RND,
+
// Compress and expand.
COMPRESS,
EXPAND,
@@ -627,12 +650,8 @@ namespace llvm {
// packed single precision.
DPBF16PS,
- // Save xmm argument registers to the stack, according to %al. An operator
- // is needed so that this can be expanded with control flow.
- VASTART_SAVE_XMM_REGS,
-
- // Windows's _chkstk call to do stack probing.
- WIN_ALLOCA,
+ // A stack checking function call. On Windows it's _chkstk call.
+ DYN_ALLOCA,
// For allocating variable amounts of stack space when using
// segmented stacks. Check if the current stacklet has enough space, and
@@ -848,6 +867,10 @@ namespace llvm {
AESENCWIDE256KL,
AESDECWIDE256KL,
+ // Save xmm argument registers to the stack, according to %al. An operator
+ // is needed so that this can be expanded with control flow.
+ VASTART_SAVE_XMM_REGS,
+
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
@@ -888,6 +911,25 @@ namespace llvm {
/// as zero if AllowPartialUndefs is set, else we fail and return false.
bool isConstantSplat(SDValue Op, APInt &SplatVal,
bool AllowPartialUndefs = true);
+
+ /// Check if Op is a load operation that could be folded into some other x86
+ /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
+ bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
+ bool AssumeSingleUse = false);
+
+ /// Check if Op is a load operation that could be folded into a vector splat
+ /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
+ bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
+ const X86Subtarget &Subtarget,
+ bool AssumeSingleUse = false);
+
+ /// Check if Op is a value that could be used to fold a store into some
+ /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
+ bool mayFoldIntoStore(SDValue Op);
+
+ /// Check if Op is an operation that could be folded into a zero extend x86
+ /// instruction.
+ bool mayFoldIntoZeroExtend(SDValue Op);
} // end namespace X86
//===--------------------------------------------------------------------===//
@@ -923,7 +965,7 @@ namespace llvm {
/// function arguments in the caller parameter area. For X86, aggregates
/// that contains are placed at 16-byte boundaries while the rest are at
/// 4-byte boundaries.
- unsigned getByValTypeAlignment(Type *Ty,
+ uint64_t getByValTypeAlignment(Type *Ty,
const DataLayout &DL) const override;
EVT getOptimalMemOpType(const MemOp &Op,
@@ -989,7 +1031,7 @@ namespace llvm {
}
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
- const SelectionDAG &DAG) const override;
+ const MachineFunction &MF) const override;
bool isCheapToSpeculateCttz() const override;
@@ -998,7 +1040,8 @@ namespace llvm {
bool isCtlzFast() const override;
bool hasBitPreservingFPLogic(EVT VT) const override {
- return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
+ return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
+ (VT == MVT::f16 && X86ScalarSSEf16);
}
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
@@ -1282,7 +1325,8 @@ namespace llvm {
/// register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
- (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1
+ (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
+ (VT == MVT::f16 && X86ScalarSSEf16); // f16 is when AVX512FP16
}
/// Returns true if it is beneficial to convert a load of a constant
@@ -1442,6 +1486,7 @@ namespace llvm {
/// When SSE2 is available, use it for f64 operations.
bool X86ScalarSSEf32;
bool X86ScalarSSEf64;
+ bool X86ScalarSSEf16;
/// A list of legal FP immediates.
std::vector<APFloat> LegalFPImmediates;
@@ -1472,16 +1517,11 @@ namespace llvm {
/// Check whether the call is eligible for tail call optimization. Targets
/// that want to do tail call optimization should implement this function.
- bool IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
- Type *RetTy,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
+ bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
SDValue Chain, bool IsTailCall,
bool Is64Bit, int FPDiff,
@@ -1540,6 +1580,9 @@ namespace llvm {
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
+ SDValue &Chain) const;
+ SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 85410c54a4d2..732b2b1a5ada 100644
--- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -92,7 +92,7 @@ static bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
if (!CalleeFn)
return false;
AttributeList Attrs = CalleeFn->getAttributes();
- return Attrs.hasFnAttribute(Attribute::ReturnsTwice);
+ return Attrs.hasFnAttr(Attribute::ReturnsTwice);
}
bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/lib/Target/X86/X86IndirectThunks.cpp b/llvm/lib/Target/X86/X86IndirectThunks.cpp
index 3d96d198b409..e08b4b7c03c6 100644
--- a/llvm/lib/Target/X86/X86IndirectThunks.cpp
+++ b/llvm/lib/Target/X86/X86IndirectThunks.cpp
@@ -212,7 +212,7 @@ void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
MF.push_back(CallTarget);
const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
- const unsigned RetOpc = Is64Bit ? X86::RETQ : X86::RETL;
+ const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32;
Entry->addLiveIn(ThunkReg);
BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp
index 56d2709f5937..69a3d32a9314 100644
--- a/llvm/lib/Target/X86/X86InsertWait.cpp
+++ b/llvm/lib/Target/X86/X86InsertWait.cpp
@@ -55,23 +55,6 @@ char WaitInsert::ID = 0;
FunctionPass *llvm::createX86InsertX87waitPass() { return new WaitInsert(); }
-/// Return true if the Reg is X87 register.
-static bool isX87Reg(unsigned Reg) {
- return (Reg == X86::FPCW || Reg == X86::FPSW ||
- (Reg >= X86::ST0 && Reg <= X86::ST7));
-}
-
-/// check if the instruction is X87 instruction
-static bool isX87Instruction(MachineInstr &MI) {
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- if (isX87Reg(MO.getReg()))
- return true;
- }
- return false;
-}
-
static bool isX87ControlInstruction(MachineInstr &MI) {
switch (MI.getOpcode()) {
case X86::FNINIT:
@@ -121,7 +104,7 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF) {
for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) {
// Jump non X87 instruction.
- if (!isX87Instruction(*MI))
+ if (!X86::isX87Instruction(*MI))
continue;
// If the instruction instruction neither has float exception nor is
// a load/store instruction, or the instruction is x87 control
@@ -132,7 +115,7 @@ bool WaitInsert::runOnMachineFunction(MachineFunction &MF) {
// If the following instruction is an X87 instruction and isn't an X87
// non-waiting control instruction, we can omit insert wait instruction.
MachineBasicBlock::iterator AfterMI = std::next(MI);
- if (AfterMI != MBB.end() && isX87Instruction(*AfterMI) &&
+ if (AfterMI != MBB.end() && X86::isX87Instruction(*AfterMI) &&
!isX87NonWaitingControlInstruction(*AfterMI))
continue;
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 7d9466f0d181..ff8710634e89 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -239,7 +239,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
KnownBits KnownUpperBits = llvm::computeKnownBits(
Amt, DemandedUpper, II.getModule()->getDataLayout());
if (KnownLowerBits.getMaxValue().ult(BitWidth) &&
- (DemandedUpper.isNullValue() || KnownUpperBits.isZero())) {
+ (DemandedUpper.isZero() || KnownUpperBits.isZero())) {
SmallVector<int, 16> ZeroSplat(VWidth, 0);
Amt = Builder.CreateShuffleVector(Amt, ZeroSplat);
return (LogicalShift ? (ShiftLeft ? Builder.CreateShl(Vec, Amt)
@@ -269,7 +269,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
}
// If shift-by-zero then just return the original value.
- if (Count.isNullValue())
+ if (Count.isZero())
return Vec;
// Handle cases when Shift >= BitWidth.
@@ -476,7 +476,7 @@ static Value *simplifyX86pack(IntrinsicInst &II,
// PACKUS: Truncate signed value with unsigned saturation.
// Source values less than zero are saturated to zero.
// Source values greater than dst maxuint are saturated to maxuint.
- MinValue = APInt::getNullValue(SrcScalarSizeInBits);
+ MinValue = APInt::getZero(SrcScalarSizeInBits);
MaxValue = APInt::getLowBitsSet(SrcScalarSizeInBits, DstScalarSizeInBits);
}
@@ -1764,7 +1764,7 @@ Optional<Value *> X86TTIImpl::simplifyDemandedUseBitsIntrinsic(
// we know that DemandedMask is non-zero already.
APInt DemandedElts = DemandedMask.zextOrTrunc(ArgWidth);
Type *VTy = II.getType();
- if (DemandedElts.isNullValue()) {
+ if (DemandedElts.isZero()) {
return ConstantInt::getNullValue(VTy);
}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index dd61d91c3a62..8aee96e1c504 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -44,8 +44,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
// It is a little bit complex for scalar types, where NumElts = 1.
// In this case we build v4f32 or v2f64
string VTName = "v" # !if (!eq (NumElts, 1),
+ !if (!eq (EltVT.Size, 16), 8,
!if (!eq (EltVT.Size, 32), 4,
- !if (!eq (EltVT.Size, 64), 2, NumElts)), NumElts) # EltVT;
+ !if (!eq (EltVT.Size, 64), 2, NumElts))), NumElts) # EltVT;
// The vector VT.
ValueType VT = !cast<ValueType>(VTName);
@@ -65,8 +66,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
X86MemOperand MemOp = !cast<X86MemOperand>(TypeVariantName # Size # "mem");
X86MemOperand ScalarMemOp = !cast<X86MemOperand>(EltVT # "mem");
// FP scalar memory operand for intrinsics - ssmem/sdmem.
- Operand IntScalarMemOp = !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
- !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?));
+ Operand IntScalarMemOp = !if (!eq (EltTypeName, "f16"), !cast<Operand>("shmem"),
+ !if (!eq (EltTypeName, "f32"), !cast<Operand>("ssmem"),
+ !if (!eq (EltTypeName, "f64"), !cast<Operand>("sdmem"), ?)));
// Load patterns
PatFrag LdFrag = !cast<PatFrag>("load" # VTName);
@@ -76,11 +78,9 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
PatFrag BroadcastLdFrag = !cast<PatFrag>("X86VBroadcastld" # EltSizeName);
- PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f32"),
- !cast<PatFrags>("sse_load_f32"),
- !if (!eq (EltTypeName, "f64"),
- !cast<PatFrags>("sse_load_f64"),
- ?));
+ PatFrags ScalarIntMemFrags = !if (!eq (EltTypeName, "f16"), !cast<PatFrags>("sse_load_f16"),
+ !if (!eq (EltTypeName, "f32"), !cast<PatFrags>("sse_load_f32"),
+ !if (!eq (EltTypeName, "f64"), !cast<PatFrags>("sse_load_f64"), ?)));
// The string to specify embedded broadcast in assembly.
string BroadcastStr = "{1to" # NumElts # "}";
@@ -95,9 +95,12 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
Domain ExeDomain = !if (!eq (EltTypeName, "f32"), SSEPackedSingle,
!if (!eq (EltTypeName, "f64"), SSEPackedDouble,
- SSEPackedInt));
+ !if (!eq (EltTypeName, "f16"), SSEPackedSingle, // FIXME?
+ SSEPackedInt)));
- RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
+ RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X,
+ !if (!eq (EltTypeName, "f16"), FR16X,
+ FR64X));
dag ImmAllZerosV = (VT immAllZerosV);
@@ -109,6 +112,7 @@ def v64i8_info : X86VectorVTInfo<64, i8, VR512, "b">;
def v32i16_info : X86VectorVTInfo<32, i16, VR512, "w">;
def v16i32_info : X86VectorVTInfo<16, i32, VR512, "d">;
def v8i64_info : X86VectorVTInfo<8, i64, VR512, "q">;
+def v32f16_info : X86VectorVTInfo<32, f16, VR512, "ph">;
def v16f32_info : X86VectorVTInfo<16, f32, VR512, "ps">;
def v8f64_info : X86VectorVTInfo<8, f64, VR512, "pd">;
@@ -117,6 +121,7 @@ def v32i8x_info : X86VectorVTInfo<32, i8, VR256X, "b">;
def v16i16x_info : X86VectorVTInfo<16, i16, VR256X, "w">;
def v8i32x_info : X86VectorVTInfo<8, i32, VR256X, "d">;
def v4i64x_info : X86VectorVTInfo<4, i64, VR256X, "q">;
+def v16f16x_info : X86VectorVTInfo<16, f16, VR256X, "ph">;
def v8f32x_info : X86VectorVTInfo<8, f32, VR256X, "ps">;
def v4f64x_info : X86VectorVTInfo<4, f64, VR256X, "pd">;
@@ -124,6 +129,7 @@ def v16i8x_info : X86VectorVTInfo<16, i8, VR128X, "b">;
def v8i16x_info : X86VectorVTInfo<8, i16, VR128X, "w">;
def v4i32x_info : X86VectorVTInfo<4, i32, VR128X, "d">;
def v2i64x_info : X86VectorVTInfo<2, i64, VR128X, "q">;
+def v8f16x_info : X86VectorVTInfo<8, f16, VR128X, "ph">;
def v4f32x_info : X86VectorVTInfo<4, f32, VR128X, "ps">;
def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
@@ -131,6 +137,7 @@ def v2f64x_info : X86VectorVTInfo<2, f64, VR128X, "pd">;
// with the appropriate element type. This allows to use the same masking logic.
def i32x_info : X86VectorVTInfo<1, i32, GR32, "si">;
def i64x_info : X86VectorVTInfo<1, i64, GR64, "sq">;
+def f16x_info : X86VectorVTInfo<1, f16, VR128X, "sh">;
def f32x_info : X86VectorVTInfo<1, f32, VR128X, "ss">;
def f64x_info : X86VectorVTInfo<1, f64, VR128X, "sd">;
@@ -149,6 +156,8 @@ def avx512vl_i32_info : AVX512VLVectorVTInfo<v16i32_info, v8i32x_info,
v4i32x_info>;
def avx512vl_i64_info : AVX512VLVectorVTInfo<v8i64_info, v4i64x_info,
v2i64x_info>;
+def avx512vl_f16_info : AVX512VLVectorVTInfo<v32f16_info, v16f16x_info,
+ v8f16x_info>;
def avx512vl_f32_info : AVX512VLVectorVTInfo<v16f32_info, v8f32x_info,
v4f32x_info>;
def avx512vl_f64_info : AVX512VLVectorVTInfo<v8f64_info, v4f64x_info,
@@ -196,8 +205,9 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
string MaskingConstraint = "",
bit IsCommutable = 0,
bit IsKCommutable = 0,
- bit IsKZCommutable = IsCommutable> {
- let isCommutable = IsCommutable in
+ bit IsKZCommutable = IsCommutable,
+ string ClobberConstraint = ""> {
+ let isCommutable = IsCommutable, Constraints = ClobberConstraint in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
"$dst, "#IntelSrcAsm#"}",
@@ -211,12 +221,15 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
MaskingPattern>,
EVEX_K {
// In case of the 3src subclass this is overridden with a let.
- string Constraints = MaskingConstraint;
+ string Constraints = !if(!eq(ClobberConstraint, ""), MaskingConstraint,
+ !if(!eq(MaskingConstraint, ""), ClobberConstraint,
+ !strconcat(ClobberConstraint, ", ", MaskingConstraint)));
}
// Zero mask does not add any restrictions to commute operands transformation.
// So, it is Ok to use IsCommutable instead of IsKCommutable.
- let isCommutable = IsKZCommutable in // Prefer over VMOV*rrkz Pat<>
+ let isCommutable = IsKZCommutable, // Prefer over VMOV*rrkz Pat<>
+ Constraints = ClobberConstraint in
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
@@ -236,7 +249,8 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
string MaskingConstraint = "",
bit IsCommutable = 0,
bit IsKCommutable = 0,
- bit IsKZCommutable = IsCommutable> :
+ bit IsKZCommutable = IsCommutable,
+ string ClobberConstraint = ""> :
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
@@ -244,7 +258,7 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
[(set _.RC:$dst,
(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
MaskingConstraint, IsCommutable,
- IsKCommutable, IsKZCommutable>;
+ IsKCommutable, IsKZCommutable, ClobberConstraint>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
@@ -254,6 +268,7 @@ multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
dag RHS, dag MaskRHS,
+ string ClobberConstraint = "",
bit IsCommutable = 0, bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable> :
AVX512_maskable_custom<O, F, Outs, Ins,
@@ -266,7 +281,7 @@ multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
[(set _.RC:$dst,
(vselect_mask _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
"$src0 = $dst", IsCommutable, IsKCommutable,
- IsKZCommutable>;
+ IsKZCommutable, ClobberConstraint>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
@@ -277,14 +292,15 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
dag RHS,
bit IsCommutable = 0, bit IsKCommutable = 0,
bit IsKZCommutable = IsCommutable,
- SDPatternOperator Select = vselect_mask> :
+ SDPatternOperator Select = vselect_mask,
+ string ClobberConstraint = ""> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(Select _.KRCWM:$mask, RHS, _.RC:$src0),
Select, "$src0 = $dst", IsCommutable, IsKCommutable,
- IsKZCommutable>;
+ IsKZCommutable, ClobberConstraint>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the scalar instruction.
@@ -501,6 +517,12 @@ def : Pat<(v8f32 immAllZerosV), (AVX512_256_SET0)>;
def : Pat<(v4f64 immAllZerosV), (AVX512_256_SET0)>;
}
+let Predicates = [HasFP16] in {
+def : Pat<(v8f16 immAllZerosV), (AVX512_128_SET0)>;
+def : Pat<(v16f16 immAllZerosV), (AVX512_256_SET0)>;
+def : Pat<(v32f16 immAllZerosV), (AVX512_512_SET0)>;
+}
+
// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
// This is expanded by ExpandPostRAPseudos.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
@@ -513,6 +535,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
[(set VR128X:$dst, fp128imm0)]>;
}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasFP16] in {
+ def AVX512_FsFLD0SH : I<0, Pseudo, (outs FR16X:$dst), (ins), "",
+ [(set FR16X:$dst, fp16imm0)]>;
+}
+
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
@@ -649,16 +677,22 @@ defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v8f16x_info, v16f16x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16, HasVLX]>;
// Codegen pattern with the alternative types insert VEC128 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v8f16x_info, v32f16_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasFP16]>;
// Codegen pattern with the alternative types insert VEC256 into VEC512
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v16f16x_info, v32f16_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasFP16]>;
multiclass vinsert_for_mask_cast<string InstrStr, X86VectorVTInfo From,
@@ -944,17 +978,23 @@ defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v16f16x_info, v8f16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16, HasVLX]>;
// Codegen pattern with the alternative types extract VEC128 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v32f16_info, v8f16x_info,
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasFP16]>;
// Codegen pattern with the alternative types extract VEC256 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v32f16_info, v16f16x_info,
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasFP16]>;
// A 128-bit extract from bits [255:128] of a 512-bit vector should use a
@@ -1015,6 +1055,12 @@ def : Pat<(v16i8 (extract_subvector (v64i8 VR512:$src), (iPTR 16))),
(iPTR 1)))>;
}
+let Predicates = [HasFP16, HasVLX] in
+def : Pat<(v8f16 (extract_subvector (v32f16 VR512:$src), (iPTR 8))),
+ (v8f16 (VEXTRACTF32x4Z256rr
+ (v16f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_ymm)),
+ (iPTR 1)))>;
+
// Additional patterns for handling a bitcast between the vselect and the
// extract_subvector.
@@ -1140,9 +1186,8 @@ def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
// AVX-512 BROADCAST
//---
// broadcast with a scalar argument.
-multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
- string Name,
- X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+multiclass avx512_broadcast_scalar<string Name, X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo> {
def : Pat<(DestInfo.VT (X86VBroadcast SrcInfo.FRC:$src)),
(!cast<Instruction>(Name#DestInfo.ZSuffix#rr)
(SrcInfo.VT (COPY_TO_REGCLASS SrcInfo.FRC:$src, SrcInfo.RC)))>;
@@ -1162,7 +1207,6 @@ multiclass avx512_broadcast_scalar<bits<8> opc, string OpcodeStr,
// Split version to allow mask and broadcast node to be different types. This
// helps support the 32x2 broadcasts.
multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
- string Name,
SchedWrite SchedRR, SchedWrite SchedRM,
X86VectorVTInfo MaskInfo,
X86VectorVTInfo DestInfo,
@@ -1251,54 +1295,49 @@ multiclass avx512_broadcast_rm_split<bits<8> opc, string OpcodeStr,
}
// Helper class to force mask and broadcast result to same type.
-multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, string Name,
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
SchedWrite SchedRR, SchedWrite SchedRM,
X86VectorVTInfo DestInfo,
X86VectorVTInfo SrcInfo,
bit IsConvertibleToThreeAddress> :
- avx512_broadcast_rm_split<opc, OpcodeStr, Name, SchedRR, SchedRM,
+ avx512_broadcast_rm_split<opc, OpcodeStr, SchedRR, SchedRM,
DestInfo, DestInfo, SrcInfo,
IsConvertibleToThreeAddress>;
multiclass avx512_fp_broadcast_sd<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _> {
+ AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info512, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info512, _.info128>,
EVEX_V512;
}
let Predicates = [HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info256, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info256, _.info128>,
EVEX_V256;
}
}
multiclass avx512_fp_broadcast_ss<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _> {
+ AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in {
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info512, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info512,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info512, _.info128>,
EVEX_V512;
}
let Predicates = [HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info256, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info256,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info256, _.info128>,
EVEX_V256;
- defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteFShuffle256,
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteFShuffle256,
WriteFShuffle256Ld, _.info128, _.info128, 1>,
- avx512_broadcast_scalar<opc, OpcodeStr, NAME, _.info128,
- _.info128>,
+ avx512_broadcast_scalar<NAME, _.info128, _.info128>,
EVEX_V128;
}
}
@@ -1384,20 +1423,20 @@ defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
X86VBroadcast, GR64, HasAVX512>, VEX_W;
multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _, Predicate prd,
- bit IsConvertibleToThreeAddress> {
+ AVX512VLVectorVTInfo _, Predicate prd,
+ bit IsConvertibleToThreeAddress> {
let Predicates = [prd] in {
- defm Z : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _.info512, _.info128,
IsConvertibleToThreeAddress>,
EVEX_V512;
}
let Predicates = [prd, HasVLX] in {
- defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _.info256, _.info128,
IsConvertibleToThreeAddress>,
EVEX_V256;
- defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, NAME, WriteShuffle,
+ defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, WriteShuffle,
WriteShuffleXLd, _.info128, _.info128,
IsConvertibleToThreeAddress>,
EVEX_V128;
@@ -1439,6 +1478,31 @@ multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
Sched<[SchedWriteShuffle.YMM.Folded]>,
AVX5128IBase, EVEX;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v32f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZrm addr:$src)>;
+
+ def : Pat<(v32f16 (X86VBroadcast (v8f16 VR128X:$src))),
+ (VPBROADCASTWZrr VR128X:$src)>;
+ def : Pat<(v32f16 (X86VBroadcast (f16 FR16X:$src))),
+ (VPBROADCASTWZrr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
+}
+let Predicates = [HasVLX, HasFP16] in {
+ def : Pat<(v8f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZ128rm addr:$src)>;
+ def : Pat<(v16f16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZ256rm addr:$src)>;
+
+ def : Pat<(v8f16 (X86VBroadcast (v8f16 VR128X:$src))),
+ (VPBROADCASTWZ128rr VR128X:$src)>;
+ def : Pat<(v16f16 (X86VBroadcast (v8f16 VR128X:$src))),
+ (VPBROADCASTWZ256rr VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86VBroadcast (f16 FR16X:$src))),
+ (VPBROADCASTWZ128rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
+ def : Pat<(v16f16 (X86VBroadcast (f16 FR16X:$src))),
+ (VPBROADCASTWZ256rr (COPY_TO_REGCLASS FR16X:$src, VR128X))>;
+}
//===----------------------------------------------------------------------===//
// AVX-512 BROADCAST SUBVECTORS
@@ -1462,6 +1526,8 @@ def : Pat<(v8f64 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTF64X4rm addr:$src)>;
+def : Pat<(v32f16 (X86SubVBroadcastld256 addr:$src)),
+ (VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld256 addr:$src)),
(VBROADCASTI64X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld256 addr:$src)),
@@ -1475,6 +1541,8 @@ def : Pat<(v8f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4rm addr:$src)>;
+def : Pat<(v32f16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcastld128 addr:$src)),
@@ -1532,6 +1600,8 @@ def : Pat<(v4f64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v8f32 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTF32X4Z256rm addr:$src)>;
+def : Pat<(v16f16 (X86SubVBroadcastld128 addr:$src)),
+ (VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcastld128 addr:$src)),
(VBROADCASTI32X4Z256rm addr:$src)>;
def : Pat<(v8i32 (X86SubVBroadcastld128 addr:$src)),
@@ -1638,25 +1708,27 @@ def : Pat<(vselect_mask VK8WM:$mask,
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> {
+ AVX512VLVectorVTInfo _Dst,
+ AVX512VLVectorVTInfo _Src> {
let Predicates = [HasDQI] in
- defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _Dst.info512,
_Src.info512, _Src.info128, 0, null_frag, null_frag>,
EVEX_V512;
let Predicates = [HasDQI, HasVLX] in
- defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle256,
+ defm Z256 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle256,
WriteShuffle256Ld, _Dst.info256,
_Src.info256, _Src.info128, 0, null_frag, null_frag>,
EVEX_V256;
}
multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
- AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> :
+ AVX512VLVectorVTInfo _Dst,
+ AVX512VLVectorVTInfo _Src> :
avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> {
let Predicates = [HasDQI, HasVLX] in
- defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, NAME, WriteShuffle,
+ defm Z128 : avx512_broadcast_rm_split<opc, OpcodeStr, WriteShuffle,
WriteShuffleXLd, _Dst.info128,
_Src.info128, _Src.info128, 0, null_frag, null_frag>,
EVEX_V128;
@@ -2099,6 +2171,10 @@ let Predicates = [HasAVX512] in {
X86cmpms_su, X86cmpmsSAE_su,
SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
}
+let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
+ defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
+ X86cmpms_su, X86cmpmsSAE_su,
+ SchedWriteFCmp.Scl>, AVX512XSIi8Base, TA;
multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
@@ -2561,13 +2637,14 @@ multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
EVEX_B, Sched<[sched]>;
}
-multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _> {
- let Predicates = [HasAVX512] in {
+multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
+ Predicate Pred = HasAVX512> {
+ let Predicates = [Pred] in {
defm Z : avx512_vcmp_common<sched.ZMM, _.info512, NAME>,
avx512_vcmp_sae<sched.ZMM, _.info512>, EVEX_V512;
}
- let Predicates = [HasAVX512,HasVLX] in {
+ let Predicates = [Pred,HasVLX] in {
defm Z128 : avx512_vcmp_common<sched.XMM, _.info128, NAME>, EVEX_V128;
defm Z256 : avx512_vcmp_common<sched.YMM, _.info256, NAME>, EVEX_V256;
}
@@ -2577,18 +2654,23 @@ defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
+ AVX512PSIi8Base, EVEX_4V, EVEX_CD8<16, CD8VF>, TA;
// Patterns to select fp compares with load as first operand.
let Predicates = [HasAVX512] in {
- def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1,
- timm:$cc)),
+ def : Pat<(v1i1 (X86cmpms (loadf64 addr:$src2), FR64X:$src1, timm:$cc)),
(VCMPSDZrm FR64X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
- def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1,
- timm:$cc)),
+ def : Pat<(v1i1 (X86cmpms (loadf32 addr:$src2), FR32X:$src1, timm:$cc)),
(VCMPSSZrm FR32X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v1i1 (X86cmpms (loadf16 addr:$src2), FR16X:$src1, timm:$cc)),
+ (VCMPSHZrm FR16X:$src1, addr:$src2, (X86cmpm_imm_commute timm:$cc))>;
+}
+
// ----------------------------------------------------------------
// FPClass
@@ -2736,24 +2818,28 @@ multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _,
}
multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
- bits<8> opcScalar, X86SchedWriteWidths sched,
- Predicate prd> {
+ bits<8> opcScalar, X86SchedWriteWidths sched> {
+ defm PH : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f16_info, opcVec,
+ sched, HasFP16>,
+ EVEX_CD8<16, CD8VF>, AVX512PSIi8Base, TA;
+ defm SHZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
+ sched.Scl, f16x_info, HasFP16>,
+ EVEX_CD8<16, CD8VT1>, AVX512PSIi8Base, TA;
defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec,
- sched, prd>,
- EVEX_CD8<32, CD8VF>;
+ sched, HasDQI>,
+ EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
- sched, prd>,
- EVEX_CD8<64, CD8VF> , VEX_W;
+ sched, HasDQI>,
+ EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
- sched.Scl, f32x_info, prd>, VEX_LIG,
- EVEX_CD8<32, CD8VT1>;
+ sched.Scl, f32x_info, HasDQI>, VEX_LIG,
+ EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
- sched.Scl, f64x_info, prd>, VEX_LIG,
- EVEX_CD8<64, CD8VT1>, VEX_W;
+ sched.Scl, f64x_info, HasDQI>, VEX_LIG,
+ EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
}
-defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp,
- HasDQI>, AVX512AIi8Base, EVEX;
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
//-----------------------------------------------------------------
// Mask register copy, including
@@ -3766,6 +3852,110 @@ let Predicates = [HasVLX] in {
def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
(VMOVDQU64Z256mr addr:$dst, VR256X:$src)>;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrrk VR512:$src0, VK32WM:$mask, VR512:$src1)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask, (v32f16 VR512:$src1), v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrrkz VK32WM:$mask, VR512:$src1)>;
+ def : Pat<(v32f16 (alignedloadv32f16 addr:$src)),
+ (VMOVAPSZrm addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (alignedloadv32f16 addr:$src)), (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (alignedloadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (loadv32f16 addr:$src)),
+ (VMOVUPSZrm addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (loadv32f16 addr:$src)), (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (vselect VK32WM:$mask,
+ (v32f16 (loadv32f16 addr:$src)), v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, (v32f16 VR512:$src0))),
+ (VMOVDQU16Zrmk VR512:$src0, VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, undef)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+ def : Pat<(v32f16 (masked_load addr:$src, VK32WM:$mask, v32f16_info.ImmAllZerosV)),
+ (VMOVDQU16Zrmkz VK32WM:$mask, addr:$src)>;
+
+ def : Pat<(alignedstore (v32f16 VR512:$src), addr:$dst),
+ (VMOVAPSZmr addr:$dst, VR512:$src)>;
+ def : Pat<(store (v32f16 VR512:$src), addr:$dst),
+ (VMOVUPSZmr addr:$dst, VR512:$src)>;
+ def : Pat<(masked_store (v32f16 VR512:$src), addr:$dst, VK32WM:$mask),
+ (VMOVDQU16Zmrk addr:$dst, VK32WM:$mask, VR512:$src)>;
+}
+let Predicates = [HasFP16, HasVLX] in {
+ def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rrk VR256X:$src0, VK16WM:$mask, VR256X:$src1)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask, (v16f16 VR256X:$src1), v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rrkz VK16WM:$mask, VR256X:$src1)>;
+ def : Pat<(v16f16 (alignedloadv16f16 addr:$src)),
+ (VMOVAPSZ256rm addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (alignedloadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (alignedloadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (loadv16f16 addr:$src)),
+ (VMOVUPSZ256rm addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (loadv16f16 addr:$src)), (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (vselect VK16WM:$mask,
+ (v16f16 (loadv16f16 addr:$src)), v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, (v16f16 VR256X:$src0))),
+ (VMOVDQU16Z256rmk VR256X:$src0, VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, undef)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+ def : Pat<(v16f16 (masked_load addr:$src, VK16WM:$mask, v16f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z256rmkz VK16WM:$mask, addr:$src)>;
+
+ def : Pat<(alignedstore (v16f16 VR256X:$src), addr:$dst),
+ (VMOVAPSZ256mr addr:$dst, VR256X:$src)>;
+ def : Pat<(store (v16f16 VR256X:$src), addr:$dst),
+ (VMOVUPSZ256mr addr:$dst, VR256X:$src)>;
+ def : Pat<(masked_store (v16f16 VR256X:$src), addr:$dst, VK16WM:$mask),
+ (VMOVDQU16Z256mrk addr:$dst, VK16WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rrk VR128X:$src0, VK8WM:$mask, VR128X:$src1)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask, (v8f16 VR128X:$src1), v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rrkz VK8WM:$mask, VR128X:$src1)>;
+ def : Pat<(v8f16 (alignedloadv8f16 addr:$src)),
+ (VMOVAPSZ128rm addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (alignedloadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (alignedloadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (loadv8f16 addr:$src)),
+ (VMOVUPSZ128rm addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (loadv8f16 addr:$src)), (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (vselect VK8WM:$mask,
+ (v8f16 (loadv8f16 addr:$src)), v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, (v8f16 VR128X:$src0))),
+ (VMOVDQU16Z128rmk VR128X:$src0, VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, undef)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+ def : Pat<(v8f16 (masked_load addr:$src, VK8WM:$mask, v8f16x_info.ImmAllZerosV)),
+ (VMOVDQU16Z128rmkz VK8WM:$mask, addr:$src)>;
+
+ def : Pat<(alignedstore (v8f16 VR128X:$src), addr:$dst),
+ (VMOVAPSZ128mr addr:$dst, VR128X:$src)>;
+ def : Pat<(store (v8f16 VR128X:$src), addr:$dst),
+ (VMOVUPSZ128mr addr:$dst, VR128X:$src)>;
+ def : Pat<(masked_store (v8f16 VR128X:$src), addr:$dst, VK8WM:$mask),
+ (VMOVDQU16Z128mrk addr:$dst, VK8WM:$mask, VR128X:$src)>;
+}
// Move Int Doubleword to Packed Double Int
//
@@ -3905,12 +4095,13 @@ def : Pat<(f64 (bitconvert VK64:$src)),
(VMOV64toSDZrr (KMOVQrk VK64:$src))>;
//===----------------------------------------------------------------------===//
-// AVX-512 MOVSS, MOVSD
+// AVX-512 MOVSH, MOVSS, MOVSD
//===----------------------------------------------------------------------===//
multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
- X86VectorVTInfo _> {
- let Predicates = [HasAVX512, OptForSize] in
+ X86VectorVTInfo _,
+ list<Predicate> prd = [HasAVX512, OptForSize]> {
+ let Predicates = prd in
def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -3976,6 +4167,9 @@ defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
+ [HasFP16]>,
+ VEX_LIG, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
PatLeaf ZeroFP, X86VectorVTInfo _> {
@@ -4144,9 +4338,14 @@ def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128,
addr:$srcAddr)>;
}
+defm : avx512_move_scalar_lowering<"VMOVSHZ", X86Movsh, fp16imm0, v8f16x_info>;
defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
+defm : avx512_store_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
+defm : avx512_store_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
@@ -4154,6 +4353,13 @@ defm : avx512_store_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_store_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+defm : avx512_store_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (insert_subvector
+ (v32i1 immAllZerosV),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ GR8, sub_8bit>;
defm : avx512_store_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (insert_subvector
(v16i1 immAllZerosV),
@@ -4179,6 +4385,10 @@ defm : avx512_store_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
(iPTR 0))), GR8, sub_8bit>;
+defm : avx512_load_scalar_lowering<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32>;
+defm : avx512_load_scalar_lowering_subreg<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (bitconvert (and GR32:$mask, (i32 1)))), GR32, sub_32bit>;
defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
@@ -4186,6 +4396,13 @@ defm : avx512_load_scalar_lowering_subreg<"VMOVSSZ", avx512vl_f32_info,
defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
+defm : avx512_load_scalar_lowering_subreg2<"VMOVSHZ", avx512vl_f16_info,
+ (v32i1 (insert_subvector
+ (v32i1 immAllZerosV),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ (iPTR 0))),
+ (v8i1 (bitconvert (and GR8:$mask, (i8 1)))),
+ GR8, sub_8bit>;
defm : avx512_load_scalar_lowering_subreg2<"VMOVSSZ", avx512vl_f32_info,
(v16i1 (insert_subvector
(v16i1 immAllZerosV),
@@ -4211,6 +4428,16 @@ defm : avx512_load_scalar_lowering_subreg2<"VMOVSDZ", avx512vl_f64_info,
(v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))),
(iPTR 0))), GR8, sub_8bit>;
+def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), (f16 FR16X:$src2))),
+ (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrk
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src2, VR128X)),
+ VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
+
+def : Pat<(f16 (X86selects VK1WM:$mask, (f16 FR16X:$src1), fp16imm0)),
+ (COPY_TO_REGCLASS (v8f16 (VMOVSHZrrkz VK1WM:$mask, (v8f16 (IMPLICIT_DEF)),
+ (v8f16 (COPY_TO_REGCLASS FR16X:$src1, VR128X)))), FR16X)>;
+
def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (v4f32 (VMOVSSZrrk
(v4f32 (COPY_TO_REGCLASS FR32X:$src2, VR128X)),
@@ -4259,6 +4486,32 @@ def : Pat<(v2f64 (X86selects VK1WM:$mask, (v2f64 VR128X:$src1), (v2f64 immAllZer
(VMOVSDZrrkz VK1WM:$mask, VR128X:$src1, VR128X:$src1)>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
+ let Predicates = [HasFP16] in {
+ def VMOVSHZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2),
+ "vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, T_MAP5XS, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSHZrr">,
+ Sched<[SchedWriteFShuffle.XMM]>;
+
+ let Constraints = "$src0 = $dst" in
+ def VMOVSHZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f16x_info.RC:$src0, f16x_info.KRCWM:$mask,
+ VR128X:$src1, VR128X:$src2),
+ "vmovsh\t{$src2, $src1, $dst {${mask}}|"#
+ "$dst {${mask}}, $src1, $src2}",
+ []>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSHZrrk">,
+ Sched<[SchedWriteFShuffle.XMM]>;
+
+ def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
+ (ins f16x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2),
+ "vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, $src1, $src2}",
+ []>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
+ FoldGenData<"VMOVSHZrrkz">,
+ Sched<[SchedWriteFShuffle.XMM]>;
+ }
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4311,6 +4564,16 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
Sched<[SchedWriteFShuffle.XMM]>;
}
+def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ (VMOVSHZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
+def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}}|"#
+ "$dst {${mask}}, $src1, $src2}",
+ (VMOVSHZrrk_REV VR128X:$dst, VK1WM:$mask,
+ VR128X:$src1, VR128X:$src2), 0>;
+def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst {${mask}} {z}|"#
+ "$dst {${mask}} {z}, $src1, $src2}",
+ (VMOVSHZrrkz_REV VR128X:$dst, VK1WM:$mask,
+ VR128X:$src1, VR128X:$src2), 0>;
def : InstAlias<"vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
(VMOVSSZrr_REV VR128X:$dst, VR128X:$src1, VR128X:$src2), 0>;
def : InstAlias<"vmovss.s\t{$src2, $src1, $dst {${mask}}|"#
@@ -4393,6 +4656,29 @@ let Predicates = [HasAVX512] in {
def : Pat<(v8f64 (X86vzload64 addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
}
+let Predicates = [HasFP16] in {
+ def : Pat<(v8f16 (X86vzmovl (v8f16 VR128X:$src))),
+ (VMOVSHZrr (v8f16 (AVX512_128_SET0)), VR128X:$src)>;
+
+ // FIXME we need better canonicalization in dag combine
+ def : Pat<(v16f16 (X86vzmovl (v16f16 VR256X:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
+ (v8f16 (EXTRACT_SUBREG (v16f16 VR256X:$src), sub_xmm)))), sub_xmm)>;
+ def : Pat<(v32f16 (X86vzmovl (v32f16 VR512:$src))),
+ (SUBREG_TO_REG (i32 0),
+ (v8f16 (VMOVSHZrr (v8f16 (AVX512_128_SET0)),
+ (v8f16 (EXTRACT_SUBREG (v32f16 VR512:$src), sub_xmm)))), sub_xmm)>;
+
+ def : Pat<(v8f16 (X86vzload16 addr:$src)),
+ (VMOVSHZrm addr:$src)>;
+
+ def : Pat<(v16f16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
+
+ def : Pat<(v32f16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVSHZrm addr:$src), sub_xmm)>;
+}
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
@@ -5295,8 +5581,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
- SDNode VecNode, X86FoldableSchedWrite sched,
- bit IsCommutable = 0> {
+ SDNode VecNode, X86FoldableSchedWrite sched> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
@@ -5357,13 +5642,19 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator
defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode,
sched.PS.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, RndNode,
- sched.PS.Scl, IsCommutable>,
+ sched.PS.Scl>,
XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode,
sched.PD.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
- sched.PD.Scl, IsCommutable>,
+ sched.PD.Scl>,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ let Predicates = [HasFP16] in
+ defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
+ VecNode, sched.PH.Scl, IsCommutable>,
+ avx512_fp_scalar_round<opc, OpcodeStr#"sh", f16x_info, RndNode,
+ sched.PH.Scl>,
+ T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>;
}
multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5377,6 +5668,13 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
VecNode, SaeNode, sched.PD.Scl, IsCommutable,
NAME#"SD">,
XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ let Predicates = [HasFP16] in {
+ defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
+ VecNode, SaeNode, sched.PH.Scl, IsCommutable,
+ NAME#"SH">,
+ T_MAP5XS, EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>,
+ NotEVEX2VEXConvertible;
+ }
}
defm VADD : avx512_binop_s_round<0x58, "vadd", any_fadd, X86fadds, X86faddRnds,
SchedWriteFAddSizes, 1>;
@@ -5432,47 +5730,60 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
VEX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
+defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
+ SchedWriteFCmp.Scl, "VMINCSH">, T_MAP5XS,
+ EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
+ NotEVEX2VEXConvertible;
+defm VMAXCSHZ : avx512_comutable_binop_s<0x5F, "vmaxsh", f16x_info, X86fmaxc,
+ SchedWriteFCmp.Scl, "VMAXCSH">, T_MAP5XS,
+ EVEX_4V, VEX_LIG, EVEX_CD8<16, CD8VT1>, SIMD_EXC,
+ NotEVEX2VEXConvertible;
+
multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDPatternOperator MaskOpNode,
X86VectorVTInfo _, X86FoldableSchedWrite sched,
bit IsCommutable,
- bit IsKCommutable = IsCommutable> {
+ bit IsKCommutable = IsCommutable,
+ string suffix = _.Suffix,
+ string ClobberConstraint = "",
+ bit MayRaiseFPException = 1> {
let ExeDomain = _.ExeDomain, hasSideEffects = 0,
- Uses = [MXCSR], mayRaiseFPException = 1 in {
+ Uses = [MXCSR], mayRaiseFPException = MayRaiseFPException in {
defm rr: AVX512_maskable_split<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2), OpcodeStr#_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
- (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), IsCommutable,
- IsKCommutable, IsKCommutable>,
- EVEX_4V, Sched<[sched]>;
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr#suffix,
+ "$src2, $src1", "$src1, $src2",
+ (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+ (_.VT (MaskOpNode _.RC:$src1, _.RC:$src2)), ClobberConstraint,
+ IsCommutable, IsKCommutable, IsKCommutable>, EVEX_4V, Sched<[sched]>;
let mayLoad = 1 in {
defm rm: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#_.Suffix,
- "$src2, $src1", "$src1, $src2",
- (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
- (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2))>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr#suffix,
+ "$src2, $src1", "$src1, $src2",
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
+ (MaskOpNode _.RC:$src1, (_.LdFrag addr:$src2)),
+ ClobberConstraint>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmb: AVX512_maskable_split<opc, MRMSrcMem, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#_.Suffix,
- "${src2}"#_.BroadcastStr#", $src1",
- "$src1, ${src2}"#_.BroadcastStr,
- (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
- (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2)))>,
- EVEX_4V, EVEX_B,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr#suffix,
+ "${src2}"#_.BroadcastStr#", $src1",
+ "$src1, ${src2}"#_.BroadcastStr,
+ (OpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
+ (MaskOpNode _.RC:$src1, (_.VT (_.BroadcastLdFrag addr:$src2))),
+ ClobberConstraint>, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNodeRnd,
- X86FoldableSchedWrite sched, X86VectorVTInfo _> {
+ X86FoldableSchedWrite sched, X86VectorVTInfo _,
+ string suffix = _.Suffix,
+ string ClobberConstraint = ""> {
let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
- (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#_.Suffix,
+ (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr#suffix,
"$rc, $src2, $src1", "$src1, $src2, $rc",
- (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc)))>,
+ (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 timm:$rc))),
+ 0, 0, 0, vselect_mask, ClobberConstraint>,
EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
@@ -5519,9 +5830,32 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op
}
}
+multiclass avx512_fp_binop_ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode,
+ X86SchedWriteSizes sched, bit IsCommutable = 0> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v32f16_info,
+ sched.PH.ZMM, IsCommutable>, EVEX_V512, T_MAP5PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+ let Predicates = [HasVLX, HasFP16] in {
+ defm PHZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f16x_info,
+ sched.PH.XMM, IsCommutable>, EVEX_V128, T_MAP5PS,
+ EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f16x_info,
+ sched.PH.YMM, IsCommutable>, EVEX_V256, T_MAP5PS,
+ EVEX_CD8<16, CD8VF>;
+ }
+}
+
let Uses = [MXCSR] in
multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
+ v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ }
defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -5533,6 +5867,11 @@ multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
let Uses = [MXCSR] in
multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PH.ZMM,
+ v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ }
defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PS.ZMM,
v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -5543,26 +5882,36 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
SchedWriteFAddSizes, 1>,
+ avx512_fp_binop_ph<0x58, "vadd", any_fadd, fadd, SchedWriteFAddSizes, 1>,
avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAddSizes>;
defm VMUL : avx512_fp_binop_p<0x59, "vmul", any_fmul, fmul, HasAVX512,
SchedWriteFMulSizes, 1>,
+ avx512_fp_binop_ph<0x59, "vmul", any_fmul, fmul, SchedWriteFMulSizes, 1>,
avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMulSizes>;
defm VSUB : avx512_fp_binop_p<0x5C, "vsub", any_fsub, fsub, HasAVX512,
SchedWriteFAddSizes>,
+ avx512_fp_binop_ph<0x5C, "vsub", any_fsub, fsub, SchedWriteFAddSizes>,
avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAddSizes>;
defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", any_fdiv, fdiv, HasAVX512,
SchedWriteFDivSizes>,
+ avx512_fp_binop_ph<0x5E, "vdiv", any_fdiv, fdiv, SchedWriteFDivSizes>,
avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDivSizes>;
defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, X86fmin, HasAVX512,
SchedWriteFCmpSizes, 0>,
+ avx512_fp_binop_ph<0x5D, "vmin", X86fmin, X86fmin, SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5D, "vmin", X86fminSAE, SchedWriteFCmpSizes>;
defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, X86fmax, HasAVX512,
SchedWriteFCmpSizes, 0>,
+ avx512_fp_binop_ph<0x5F, "vmax", X86fmax, X86fmax, SchedWriteFCmpSizes, 0>,
avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxSAE, SchedWriteFCmpSizes>;
let isCodeGenOnly = 1 in {
defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, X86fminc, HasAVX512,
+ SchedWriteFCmpSizes, 1>,
+ avx512_fp_binop_ph<0x5D, "vmin", X86fminc, X86fminc,
SchedWriteFCmpSizes, 1>;
defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, X86fmaxc, HasAVX512,
+ SchedWriteFCmpSizes, 1>,
+ avx512_fp_binop_ph<0x5F, "vmax", X86fmaxc, X86fmaxc,
SchedWriteFCmpSizes, 1>;
}
let Uses = []<Register>, mayRaiseFPException = 0 in {
@@ -5616,43 +5965,57 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr,
X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm PHZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v32f16_info>,
+ avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v32f16_info>,
+ EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ defm SHZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f16x_info>,
+ avx512_fp_scalar_round<opcScaler, OpcodeStr#"sh", f16x_info, X86scalefsRnd, sched.Scl>,
+ EVEX_4V, T_MAP6PD, EVEX_CD8<16, CD8VT1>;
+ }
defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v16f32_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v16f32_info>,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
+ EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
X86scalefsRnd, sched.Scl>,
- EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+ EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>, T8PD;
defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
X86scalefsRnd, sched.Scl>,
- EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W;
+ EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v4f32x_info>,
- EVEX_V128, EVEX_CD8<32, CD8VF>;
+ EVEX_V128, EVEX_CD8<32, CD8VF>, T8PD;
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
- EVEX_V256, EVEX_CD8<32, CD8VF>;
+ EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
- EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
- EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ }
+
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v8f16x_info>,
+ EVEX_V128, EVEX_CD8<16, CD8VF>, T_MAP6PD;
+ defm PHZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v16f16x_info>,
+ EVEX_V256, EVEX_CD8<16, CD8VF>, T_MAP6PD;
}
}
defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef",
- SchedWriteFAdd>, T8PD, NotEVEX2VEXConvertible;
+ SchedWriteFAdd>, NotEVEX2VEXConvertible;
//===----------------------------------------------------------------------===//
// AVX-512 VPTESTM instructions
//===----------------------------------------------------------------------===//
multiclass avx512_vptest<bits<8> opc, string OpcodeStr,
- X86FoldableSchedWrite sched, X86VectorVTInfo _,
- string Name> {
+ X86FoldableSchedWrite sched, X86VectorVTInfo _> {
// NOTE: Patterns are omitted in favor of manual selection in X86ISelDAGToDAG.
// There are just too many permutations due to commutability and bitcasts.
let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
@@ -5687,13 +6050,13 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _> {
let Predicates = [HasAVX512] in
- defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512, NAME>,
+ defm Z : avx512_vptest<opc, OpcodeStr, sched.ZMM, _.info512>,
avx512_vptest_mb<opc, OpcodeStr, sched.ZMM, _.info512>, EVEX_V512;
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256, NAME>,
+ defm Z256 : avx512_vptest<opc, OpcodeStr, sched.YMM, _.info256>,
avx512_vptest_mb<opc, OpcodeStr, sched.YMM, _.info256>, EVEX_V256;
- defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128, NAME>,
+ defm Z128 : avx512_vptest<opc, OpcodeStr, sched.XMM, _.info128>,
avx512_vptest_mb<opc, OpcodeStr, sched.XMM, _.info128>, EVEX_V128;
}
}
@@ -5710,20 +6073,20 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in {
defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
- v32i16_info, NAME#"W">, EVEX_V512, VEX_W;
+ v32i16_info>, EVEX_V512, VEX_W;
defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
- v64i8_info, NAME#"B">, EVEX_V512;
+ v64i8_info>, EVEX_V512;
}
- let Predicates = [HasVLX, HasBWI] in {
+ let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
- v16i16x_info, NAME#"W">, EVEX_V256, VEX_W;
+ v16i16x_info>, EVEX_V256, VEX_W;
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
- v8i16x_info, NAME#"W">, EVEX_V128, VEX_W;
+ v8i16x_info>, EVEX_V128, VEX_W;
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
- v32i8x_info, NAME#"B">, EVEX_V256;
+ v32i8x_info>, EVEX_V256;
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
- v16i8x_info, NAME#"B">, EVEX_V128;
+ v16i8x_info>, EVEX_V128;
}
}
@@ -6392,7 +6755,7 @@ let Predicates = [HasAVX512] in {
multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6400,14 +6763,14 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6417,13 +6780,13 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
(MaskOpNode _.RC:$src2,
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6431,38 +6794,42 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))),
(_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 timm:$rc))), 1, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd,
X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo _, string Suff> {
- let Predicates = [HasAVX512] in {
+ AVX512VLVectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
defm Z : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.ZMM, _.info512, Suff>,
+ sched.ZMM, _.info512>,
avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
- _.info512, Suff>,
+ _.info512>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
- let Predicates = [HasVLX, HasAVX512] in {
+ let Predicates = [HasVLX, prd] in {
defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.YMM, _.info256, Suff>,
+ sched.YMM, _.info256>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.XMM, _.info128, Suff>,
+ sched.XMM, _.info128>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd> {
+ defm PH : avx512_fma3p_213_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f16_info, HasFP16>, T_MAP6PD;
defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info, "PS">;
+ avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info, "PD">, VEX_W;
+ avx512vl_f64_info>, T8PD, VEX_W;
}
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
@@ -6481,7 +6848,7 @@ defm VFNMSUB213 : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86any_Fnmsub,
multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6489,14 +6856,14 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(null_frag),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6507,14 +6874,14 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
- _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B,
+ _.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6522,38 +6889,42 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(null_frag),
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc))),
- 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd,
X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo _, string Suff> {
- let Predicates = [HasAVX512] in {
+ AVX512VLVectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
defm Z : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.ZMM, _.info512, Suff>,
+ sched.ZMM, _.info512>,
avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
- _.info512, Suff>,
+ _.info512>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
- let Predicates = [HasVLX, HasAVX512] in {
+ let Predicates = [HasVLX, prd] in {
defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.YMM, _.info256, Suff>,
+ sched.YMM, _.info256>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.XMM, _.info128, Suff>,
+ sched.XMM, _.info128>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PH : avx512_fma3p_231_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f16_info, HasFP16>, T_MAP6PD;
defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info, "PS">;
+ avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info, "PD">, VEX_W;
+ avx512vl_f64_info>, T8PD, VEX_W;
}
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
@@ -6571,7 +6942,7 @@ defm VFNMSUB231 : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86any_Fnmsub,
multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
defm r: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6579,7 +6950,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(null_frag),
(_.VT (MaskOpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ EVEX_4V, Sched<[sched]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6588,7 +6959,7 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -6600,13 +6971,13 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Suff> {
+ X86VectorVTInfo _> {
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, hasSideEffects = 0,
Uses = [MXCSR] in
defm rb: AVX512_maskable_fma<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -6614,38 +6985,42 @@ multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
(null_frag),
(_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 timm:$rc))),
- 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[sched]>;
+ 1, 1>, EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>;
}
multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd,
X86SchedWriteWidths sched,
- AVX512VLVectorVTInfo _, string Suff> {
- let Predicates = [HasAVX512] in {
+ AVX512VLVectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
defm Z : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.ZMM, _.info512, Suff>,
+ sched.ZMM, _.info512>,
avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, sched.ZMM,
- _.info512, Suff>,
+ _.info512>,
EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
}
- let Predicates = [HasVLX, HasAVX512] in {
+ let Predicates = [HasVLX, prd] in {
defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.YMM, _.info256, Suff>,
+ sched.YMM, _.info256>,
EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, MaskOpNode,
- sched.XMM, _.info128, Suff>,
+ sched.XMM, _.info128>,
EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
}
}
multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
SDNode MaskOpNode, SDNode OpNodeRnd > {
+ defm PH : avx512_fma3p_132_common<opc, OpcodeStr#"ph", OpNode, MaskOpNode,
+ OpNodeRnd, SchedWriteFMA,
+ avx512vl_f16_info, HasFP16>, T_MAP6PD;
defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f32_info, "PS">;
+ avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info, "PD">, VEX_W;
+ avx512vl_f64_info>, T8PD, VEX_W;
}
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
@@ -6668,39 +7043,39 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
+ EVEX_4V, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
let mayLoad = 1 in
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- AVX512FMA3Base, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", (null_frag), 1, 1>,
- AVX512FMA3Base, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
+ EVEX_4V, EVEX_B, EVEX_RC, Sched<[SchedWriteFMA.Scl]>;
let isCodeGenOnly = 1, isCommutable = 1 in {
- def r : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
+ def r : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, SIMD_EXC;
- def m : AVX512FMA3S<opc, MRMSrcMem, (outs _.FRC:$dst),
+ !if(MaskOnlyReg, [], [RHS_r])>, Sched<[SchedWriteFMA.Scl]>, EVEX_4V, SIMD_EXC;
+ def m : AVX512<opc, MRMSrcMem, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
let Uses = [MXCSR] in
- def rb : AVX512FMA3S<opc, MRMSrcReg, (outs _.FRC:$dst),
+ def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3, AVX512RC:$rc),
!strconcat(OpcodeStr,
"\t{$rc, $src3, $src2, $dst|$dst, $src2, $src3, $rc}"),
!if(MaskOnlyReg, [], [RHS_b])>, EVEX_B, EVEX_RC,
- Sched<[SchedWriteFMA.Scl]>;
+ Sched<[SchedWriteFMA.Scl]>, EVEX_4V;
}// isCodeGenOnly = 1
}// Constraints = "$src1 = $dst"
}
@@ -6744,10 +7119,15 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
let Predicates = [HasAVX512] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f32x_info, "SS">,
- EVEX_CD8<32, CD8VT1>, VEX_LIG;
+ EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f64x_info, "SD">,
- EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+ EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
+ }
+ let Predicates = [HasFP16] in {
+ defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
+ OpNodeRnd, f16x_info, "SH">,
+ EVEX_CD8<16, CD8VT1>, VEX_LIG, T_MAP6PD;
}
}
@@ -6759,8 +7139,9 @@ defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86any_Fnmsub, X86Fnmsu
multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
SDNode RndOp, string Prefix,
string Suffix, SDNode Move,
- X86VectorVTInfo _, PatLeaf ZeroFP> {
- let Predicates = [HasAVX512] in {
+ X86VectorVTInfo _, PatLeaf ZeroFP,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
def : Pat<(_.VT (Move (_.VT VR128X:$src1), (_.VT (scalar_to_vector
(Op _.FRC:$src2,
(_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))),
@@ -6958,6 +7339,14 @@ multiclass avx512_scalar_fma_patterns<SDPatternOperator Op, SDNode MaskedOp,
(_.VT (COPY_TO_REGCLASS _.FRC:$src3, VR128X)), AVX512RC:$rc)>;
}
}
+defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
+defm : avx512_scalar_fma_patterns<X86any_Fmsub, X86Fmsub, X86FmsubRnd, "VFMSUB", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmadd, X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
+defm : avx512_scalar_fma_patterns<X86any_Fnmsub, X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SH",
+ X86Movsh, v8f16x_info, fp16imm0, HasFP16>;
defm : avx512_scalar_fma_patterns<any_fma, fma, X86FmaddRnd, "VFMADD",
"SS", X86Movss, v4f32x_info, fp32imm0>;
@@ -6990,13 +7379,13 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
- AVX512FMA3Base, Sched<[sched]>;
+ T8PD, EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- AVX512FMA3Base, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7005,7 +7394,7 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)>,
- AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
} // Constraints = "$src1 = $dst"
@@ -7190,8 +7579,8 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
SDNode OpNodeRnd,
X86FoldableSchedWrite sched, string asm,
- string aliasStr> {
- let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
+ string aliasStr, Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SrcVT.ExeDomain in {
def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
[(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>,
@@ -7207,7 +7596,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT,
[(set DstVT.RC:$dst, (OpNode
(SrcVT.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
- } // Predicates = [HasAVX512]
+ } // Predicates = [prd]
def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0, "att">;
@@ -7246,8 +7635,7 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2u
multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
- X86FoldableSchedWrite sched,
- string aliasStr> {
+ X86FoldableSchedWrite sched> {
let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.FRC:$src),
@@ -7263,17 +7651,13 @@ multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
}
defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
- lrint, WriteCvtSS2I,
- "{l}">, XS, EVEX_CD8<32, CD8VT1>;
+ lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
- llrint, WriteCvtSS2I,
- "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+ llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
- lrint, WriteCvtSD2I,
- "{l}">, XD, EVEX_CD8<64, CD8VT1>;
+ lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
- llrint, WriteCvtSD2I,
- "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+ llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
@@ -7371,8 +7755,9 @@ def : Pat<(v2f64 (X86Movsd
multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
X86VectorVTInfo _DstRC, SDPatternOperator OpNode,
SDNode OpNodeInt, SDNode OpNodeSAE,
- X86FoldableSchedWrite sched, string aliasStr>{
-let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
+ X86FoldableSchedWrite sched, string aliasStr,
+ Predicate prd = HasAVX512> {
+let Predicates = [prd], ExeDomain = _SrcRC.ExeDomain in {
let isCodeGenOnly = 1 in {
def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
!strconcat(asm,"\t{$src, $dst|$dst, $src}"),
@@ -7399,7 +7784,7 @@ let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in {
[(set _DstRC.RC:$dst,
(OpNodeInt (_SrcRC.ScalarIntMemFrags addr:$src)))]>,
EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
-} //HasAVX512
+} // Predicates = [prd]
def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
(!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0, "att">;
@@ -7497,33 +7882,47 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
EVEX_4V, VEX_LIG, Sched<[sched]>,
EVEX_B, EVEX_RC;
}
-multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
+multiclass avx512_cvt_fp_scalar_trunc<bits<8> opc, string OpcodeStr,
SDNode OpNode, SDNode OpNodeRnd,
X86FoldableSchedWrite sched,
- X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+ X86VectorVTInfo _src, X86VectorVTInfo _dst,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
- OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
+ OpNodeRnd, sched>, EVEX_CD8<_src.EltSize, CD8VT1>;
}
}
-multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
- SDNode OpNode, SDNode OpNodeSAE,
- X86FoldableSchedWrite sched,
- X86VectorVTInfo _src, X86VectorVTInfo _dst> {
- let Predicates = [HasAVX512], ExeDomain = SSEPackedSingle in {
+multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, SDNode OpNodeSAE,
+ X86FoldableSchedWrite sched,
+ X86VectorVTInfo _src, X86VectorVTInfo _dst,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = SSEPackedSingle in {
defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode, sched>,
avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeSAE, sched>,
- EVEX_CD8<32, CD8VT1>, XS;
+ EVEX_CD8<_src.EltSize, CD8VT1>;
}
}
-defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86frounds,
+defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f64x_info,
- f32x_info>;
-defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpexts,
+ f32x_info>, XD, VEX_W;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f32x_info,
- f64x_info>;
+ f64x_info>, XS;
+defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
+ X86froundsRnd, WriteCvtSD2SS, f64x_info,
+ f16x_info, HasFP16>, T_MAP5XD, VEX_W;
+defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
+ X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+ f64x_info, HasFP16>, T_MAP5XS;
+defm VCVTSS2SH : avx512_cvt_fp_scalar_trunc<0x1D, "vcvtss2sh", X86frounds,
+ X86froundsRnd, WriteCvtSD2SS, f32x_info,
+ f16x_info, HasFP16>, T_MAP5PS;
+defm VCVTSH2SS : avx512_cvt_fp_scalar_extend<0x13, "vcvtsh2ss", X86fpexts,
+ X86fpextsSAE, WriteCvtSS2SD, f16x_info,
+ f32x_info, HasFP16>, T_MAP6PS;
def : Pat<(f64 (any_fpextend FR32X:$src)),
(VCVTSS2SDZrr (f64 (IMPLICIT_DEF)), FR32X:$src)>,
@@ -7536,6 +7935,27 @@ def : Pat<(f32 (any_fpround FR64X:$src)),
(VCVTSD2SSZrr (f32 (IMPLICIT_DEF)), FR64X:$src)>,
Requires<[HasAVX512]>;
+def : Pat<(f32 (any_fpextend FR16X:$src)),
+ (VCVTSH2SSZrr (f32 (IMPLICIT_DEF)), FR16X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f32 (any_fpextend (loadf16 addr:$src))),
+ (VCVTSH2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f64 (any_fpextend FR16X:$src)),
+ (VCVTSH2SDZrr (f64 (IMPLICIT_DEF)), FR16X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f64 (any_fpextend (loadf16 addr:$src))),
+ (VCVTSH2SDZrm (f64 (IMPLICIT_DEF)), addr:$src)>,
+ Requires<[HasFP16, OptForSize]>;
+
+def : Pat<(f16 (any_fpround FR32X:$src)),
+ (VCVTSS2SHZrr (f16 (IMPLICIT_DEF)), FR32X:$src)>,
+ Requires<[HasFP16]>;
+def : Pat<(f16 (any_fpround FR64X:$src)),
+ (VCVTSD2SHZrr (f16 (IMPLICIT_DEF)), FR64X:$src)>,
+ Requires<[HasFP16]>;
+
def : Pat<(v4f32 (X86Movss
(v4f32 VR128X:$dst),
(v4f32 (scalar_to_vector
@@ -7649,39 +8069,76 @@ multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src)),
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
-// Extend Float to Double
-multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
- X86SchedWriteWidths sched> {
- let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info,
+// Extend [Float to Double, Half to Float]
+multiclass avx512_cvt_extend<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasAVX512> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info512, _src.info256,
any_fpextend, fpextend, sched.ZMM>,
- avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, _dst.info512, _src.info256,
X86vfpextSAE, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasVLX] in {
- defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info,
- X86any_vfpext, X86vfpext, sched.XMM, "{1to2}",
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info128, _src.info128,
+ X86any_vfpext, X86vfpext, sched.XMM,
+ _dst.info128.BroadcastStr,
"", f64mem>, EVEX_V128;
- defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info,
- any_fpextend, fpextend, sched.YMM>, EVEX_V256;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, _dst.info256, _src.info128,
+ any_fpextend, fpextend, sched.YMM>, EVEX_V256;
}
}
-// Truncate Double to Float
-multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
- let Predicates = [HasAVX512] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info,
+// Truncate [Double to Float, Float to Half]
+multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasAVX512,
+ PatFrag bcast128 = _src.info128.BroadcastLdFrag,
+ PatFrag loadVT128 = _src.info128.LdFrag,
+ RegisterClass maskRC128 = _src.info128.KRCWM> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512,
X86any_vfpround, X86vfpround, sched.ZMM>,
- avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
X86vfproundRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasVLX] in {
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
- null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
- f128mem, VK2WM>, EVEX_V128;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info,
+ let Predicates = [prd, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128,
+ null_frag, null_frag, sched.XMM,
+ _src.info128.BroadcastStr, "{x}",
+ f128mem, maskRC128>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256,
X86any_vfpround, X86vfpround,
- sched.YMM, "{1to4}", "{y}">, EVEX_V256;
+ sched.YMM, _src.info256.BroadcastStr, "{y}">, EVEX_V256;
+
+ // Special patterns to allow use of X86vmfpround for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT VR128X:$src))),
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, maskRC128:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rrkz") maskRC128:$mask, VR128X:$src)>;
+
+ def : Pat<(_dst.info128.VT (X86any_vfpround (loadVT128 addr:$src))),
+ (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+ def : Pat<(X86vmfpround (loadVT128 addr:$src), (_dst.info128.VT VR128X:$src0),
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadVT128 addr:$src), _dst.info128.ImmAllZerosV,
+ maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmkz") maskRC128:$mask, addr:$src)>;
+
+ def : Pat<(_dst.info128.VT (X86any_vfpround (_src.info128.VT (bcast128 addr:$src)))),
+ (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+ (_dst.info128.VT VR128X:$src0), maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, maskRC128:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (_src.info128.VT (bcast128 addr:$src)),
+ _dst.info128.ImmAllZerosV, maskRC128:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbkz") maskRC128:$mask, addr:$src)>;
}
def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -7725,40 +8182,185 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sc
VK4WM:$mask, f64mem:$src), 0, "att">;
}
-defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SchedWriteCvtPD2PS>,
+defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
+ avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
VEX_W, PD, EVEX_CD8<64, CD8VF>;
-defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SchedWriteCvtPS2PD>,
- PS, EVEX_CD8<32, CD8VH>;
+defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
+ avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
+ PS, EVEX_CD8<32, CD8VH>;
-let Predicates = [HasVLX] in {
+// Extend Half to Double
+multiclass avx512_cvtph2pd<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f16x_info,
+ any_fpextend, fpextend, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f16x_info,
+ X86vfpextSAE, sched.ZMM>, EVEX_V512;
+ def : Pat<(v8f64 (extloadv8f16 addr:$src)),
+ (!cast<Instruction>(NAME # "Zrm") addr:$src)>;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v8f16x_info,
+ X86any_vfpext, X86vfpext, sched.XMM, "{1to2}", "",
+ f32mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v8f16x_info,
+ X86any_vfpext, X86vfpext, sched.YMM, "{1to4}", "",
+ f64mem>, EVEX_V256;
+ }
+}
+
+// Truncate Double to Half
+multiclass avx512_cvtpd2ph<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8f64_info,
+ X86any_vfpround, X86vfpround, sched.ZMM, "{1to8}", "{z}">,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8f64_info,
+ X86vfproundRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2f64x_info, null_frag,
+ null_frag, sched.XMM, "{1to2}", "{x}", f128mem,
+ VK2WM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4f64x_info, null_frag,
+ null_frag, sched.YMM, "{1to4}", "{y}", f256mem,
+ VK4WM>, EVEX_V256;
+ }
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+ VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTPS2PHX : avx512_cvt_trunc<0x1D, "vcvtps2phx", avx512vl_f16_info,
+ avx512vl_f32_info, SchedWriteCvtPD2PS,
+ HasFP16>, T_MAP5PD, EVEX_CD8<32, CD8VF>;
+defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
+ avx512vl_f16_info, SchedWriteCvtPS2PD,
+ HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
+defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
+ VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
+defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
+ T_MAP5PS, EVEX_CD8<16, CD8VQ>;
+
+let Predicates = [HasFP16, HasVLX] in {
// Special patterns to allow use of X86vmfpround for masking. Instruction
// patterns have been disabled with null_frag.
- def : Pat<(X86any_vfpround (v2f64 VR128X:$src)),
- (VCVTPD2PSZ128rr VR128X:$src)>;
- def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v4f32 VR128X:$src0),
+ def : Pat<(v8f16 (X86any_vfpround (v4f64 VR256X:$src))),
+ (VCVTPD2PHZ256rr VR256X:$src)>;
+ def : Pat<(v8f16 (X86vmfpround (v4f64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask)),
+ (VCVTPD2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86vmfpround (v4f64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (loadv4f64 addr:$src))),
+ (VCVTPD2PHZ256rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv4f64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv4f64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTPD2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (v4f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTPD2PHZ256rmb addr:$src)>;
+ def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTPD2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (v4f64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTPD2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_vfpround (v2f64 VR128X:$src))),
+ (VCVTPD2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), (v8f16 VR128X:$src0),
VK2WM:$mask),
- (VCVTPD2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86vmfpround (v2f64 VR128X:$src), v4f32x_info.ImmAllZerosV,
+ (VCVTPD2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86vmfpround (v2f64 VR128X:$src), v8f16x_info.ImmAllZerosV,
VK2WM:$mask),
- (VCVTPD2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
+ (VCVTPD2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86any_vfpround (loadv2f64 addr:$src)),
- (VCVTPD2PSZ128rm addr:$src)>;
- def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v4f32 VR128X:$src0),
+ def : Pat<(v8f16 (X86any_vfpround (loadv2f64 addr:$src))),
+ (VCVTPD2PHZ128rm addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), (v8f16 VR128X:$src0),
VK2WM:$mask),
- (VCVTPD2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86vmfpround (loadv2f64 addr:$src), v4f32x_info.ImmAllZerosV,
+ (VCVTPD2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86vmfpround (loadv2f64 addr:$src), v8f16x_info.ImmAllZerosV,
VK2WM:$mask),
- (VCVTPD2PSZ128rmkz VK2WM:$mask, addr:$src)>;
+ (VCVTPD2PHZ128rmkz VK2WM:$mask, addr:$src)>;
- def : Pat<(X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src))),
- (VCVTPD2PSZ128rmb addr:$src)>;
+ def : Pat<(v8f16 (X86any_vfpround (v2f64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTPD2PHZ128rmb addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTPD2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTPD2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
def : Pat<(X86vmfpround (v2f64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTPD2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTPD2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
}
// Convert Signed/Unsigned Doubleword to Double
@@ -8079,26 +8681,60 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpN
}
// Convert Signed/Unsigned Quardword to Float
-multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
- SDNode MaskOpNode, SDNode OpNodeRnd,
- X86SchedWriteWidths sched> {
- let Predicates = [HasDQI] in {
- defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode,
+// Also Convert Signed/Unsigned Doubleword to Half
+multiclass avx512_cvtqq2ps_dq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDPatternOperator OpNode128,
+ SDPatternOperator OpNode128M, SDPatternOperator OpNodeRnd,
+ AVX512VLVectorVTInfo _dst, AVX512VLVectorVTInfo _src,
+ X86SchedWriteWidths sched, Predicate prd = HasDQI> {
+ let Predicates = [prd] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _dst.info256, _src.info512, OpNode,
MaskOpNode, sched.ZMM>,
- avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _dst.info256, _src.info512,
OpNodeRnd, sched.ZMM>, EVEX_V512;
}
- let Predicates = [HasDQI, HasVLX] in {
+ let Predicates = [prd, HasVLX] in {
// we need "x"/"y" suffixes in order to distinguish between 128 and 256
// memory forms of these instructions in Asm Parcer. They have the same
// dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
// due to the same reason.
- defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, null_frag,
- null_frag, sched.XMM, "{1to2}", "{x}", i128mem, VK2WM>,
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info128, null_frag,
+ null_frag, sched.XMM, _src.info128.BroadcastStr,
+ "{x}", i128mem, _src.info128.KRCWM>,
EVEX_V128, NotEVEX2VEXConvertible;
- defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
- MaskOpNode, sched.YMM, "{1to4}", "{y}">, EVEX_V256,
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _dst.info128, _src.info256, OpNode,
+ MaskOpNode, sched.YMM, _src.info256.BroadcastStr,
+ "{y}">, EVEX_V256,
NotEVEX2VEXConvertible;
+
+ // Special patterns to allow use of X86VM[SU]intToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT VR128X:$src))),
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), (_dst.info128.VT VR128X:$src0),
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$src0, _src.info128.KRCWM:$mask, VR128X:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT VR128X:$src), _dst.info128.ImmAllZerosV,
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rrkz") _src.info128.KRCWM:$mask, VR128X:$src)>;
+
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.LdFrag addr:$src))),
+ (!cast<Instruction>(NAME # "Z128rm") addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), (_dst.info128.VT VR128X:$src0),
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.LdFrag addr:$src), _dst.info128.ImmAllZerosV,
+ _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmkz") _src.info128.KRCWM:$mask, addr:$src)>;
+
+ def : Pat<(_dst.info128.VT (OpNode128 (_src.info128.VT (X86VBroadcastld64 addr:$src)))),
+ (!cast<Instruction>(NAME # "Z128rmb") addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+ (_dst.info128.VT VR128X:$src0), _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$src0, _src.info128.KRCWM:$mask, addr:$src)>;
+ def : Pat<(OpNode128M (_src.info128.VT (X86VBroadcastld64 addr:$src)),
+ _dst.info128.ImmAllZerosV, _src.info128.KRCWM:$mask),
+ (!cast<Instruction>(NAME # "Z128rmbkz") _src.info128.KRCWM:$mask, addr:$src)>;
}
def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
@@ -8240,13 +8876,29 @@ defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
VEX_W, XS, EVEX_CD8<64, CD8VF>;
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", any_sint_to_fp,
- sint_to_fp, X86VSintToFpRnd, SchedWriteCvtDQ2PS>,
- VEX_W, PS, EVEX_CD8<64, CD8VF>;
+defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VMSintToFP,
+ X86VSintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+ SchedWriteCvtDQ2PS, HasFP16>,
+ T_MAP5PS, EVEX_CD8<32, CD8VF>;
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", any_uint_to_fp,
- uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PS>,
- VEX_W, XD, EVEX_CD8<64, CD8VF>;
+defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint_to_fp,
+ X86any_VUintToFP, X86VMUintToFP,
+ X86VUintToFpRnd, avx512vl_f16_info, avx512vl_i32_info,
+ SchedWriteCvtDQ2PS, HasFP16>, T_MAP5XD,
+ EVEX_CD8<32, CD8VF>;
+
+defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
+ X86any_VSintToFP, X86VMSintToFP,
+ X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+ SchedWriteCvtDQ2PS>, VEX_W, PS,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
+ X86any_VUintToFP, X86VMUintToFP,
+ X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
+ SchedWriteCvtDQ2PS>, VEX_W, XD,
+ EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
// Special patterns to allow use of X86mcvtp2Int for masking. Instruction
@@ -8436,66 +9088,6 @@ let Predicates = [HasVLX] in {
(VCVTUDQ2PDZ128rmkz VK2WM:$mask, addr:$src)>;
}
-let Predicates = [HasDQI, HasVLX] in {
- // Special patterns to allow use of X86VMSintToFP for masking. Instruction
- // patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86any_VSintToFP (v2i64 VR128X:$src))),
- (VCVTQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
- def : Pat<(v4f32 (X86any_VSintToFP (loadv2i64 addr:$src))),
- (VCVTQQ2PSZ128rm addr:$src)>;
- def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
- def : Pat<(v4f32 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
- (VCVTQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-
- // Special patterns to allow use of X86VMUintToFP for masking. Instruction
- // patterns have been disabled with null_frag.
- def : Pat<(v4f32 (X86any_VUintToFP (v2i64 VR128X:$src))),
- (VCVTUQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rrkz VK2WM:$mask, VR128X:$src)>;
-
- def : Pat<(v4f32 (X86any_VUintToFP (loadv2i64 addr:$src))),
- (VCVTUQQ2PSZ128rm addr:$src)>;
- def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v4f32 VR128X:$src0),
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v4f32x_info.ImmAllZerosV,
- VK2WM:$mask),
- (VCVTUQQ2PSZ128rmkz VK2WM:$mask, addr:$src)>;
-
- def : Pat<(v4f32 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
- (VCVTUQQ2PSZ128rmb addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- (v4f32 VR128X:$src0), VK2WM:$mask),
- (VCVTUQQ2PSZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
- def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
- v4f32x_info.ImmAllZerosV, VK2WM:$mask),
- (VCVTUQQ2PSZ128rmbkz VK2WM:$mask, addr:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// Half precision conversion instructions
//===----------------------------------------------------------------------===//
@@ -8626,9 +9218,9 @@ let Predicates = [HasVLX] in {
// Unordered/Ordered scalar fp compare with Sae and set EFLAGS
multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
- string OpcodeStr, Domain d,
- X86FoldableSchedWrite sched = WriteFComX> {
- let hasSideEffects = 0, Uses = [MXCSR] in
+ string OpcodeStr, Domain d,
+ X86FoldableSchedWrite sched = WriteFComX> {
+ let ExeDomain = d, hasSideEffects = 0, Uses = [MXCSR] in
def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>,
EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>;
@@ -8675,10 +9267,35 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
}
}
-/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
+let Defs = [EFLAGS], Predicates = [HasFP16] in {
+ defm VUCOMISHZ : avx512_ord_cmp_sae<0x2E, v8f16x_info, "vucomish",
+ SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
+ EVEX_CD8<16, CD8VT1>;
+ defm VCOMISHZ : avx512_ord_cmp_sae<0x2F, v8f16x_info, "vcomish",
+ SSEPackedSingle>, AVX512PSIi8Base, T_MAP5PS,
+ EVEX_CD8<16, CD8VT1>;
+ defm VUCOMISHZ : sse12_ord_cmp<0x2E, FR16X, X86any_fcmp, f16, f16mem, loadf16,
+ "ucomish", SSEPackedSingle>, T_MAP5PS, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ defm VCOMISHZ : sse12_ord_cmp<0x2F, FR16X, X86strict_fcmps, f16, f16mem, loadf16,
+ "comish", SSEPackedSingle>, T_MAP5PS, EVEX,
+ VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ let isCodeGenOnly = 1 in {
+ defm VUCOMISHZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v8f16, shmem,
+ sse_load_f16, "ucomish", SSEPackedSingle>,
+ T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+
+ defm VCOMISHZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v8f16, shmem,
+ sse_load_f16, "comish", SSEPackedSingle>,
+ T_MAP5PS, EVEX, VEX_LIG, EVEX_CD8<16, CD8VT1>;
+ }
+}
+
+/// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd, rcpsh, rsqrtsh
multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86FoldableSchedWrite sched, X86VectorVTInfo _> {
- let Predicates = [HasAVX512], ExeDomain = _.ExeDomain, Uses = [MXCSR] in {
+ X86FoldableSchedWrite sched, X86VectorVTInfo _,
+ Predicate prd = HasAVX512> {
+ let Predicates = [prd], ExeDomain = _.ExeDomain in {
defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -8693,6 +9310,13 @@ multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
+defm VRCPSHZ : avx512_fp14_s<0x4D, "vrcpsh", X86rcp14s, SchedWriteFRcp.Scl,
+ f16x_info, HasFP16>, EVEX_CD8<16, CD8VT1>,
+ T_MAP6PD;
+defm VRSQRTSHZ : avx512_fp14_s<0x4F, "vrsqrtsh", X86rsqrt14s,
+ SchedWriteFRsqrt.Scl, f16x_info, HasFP16>,
+ EVEX_CD8<16, CD8VT1>, T_MAP6PD;
+let Uses = [MXCSR] in {
defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
f32x_info>, EVEX_CD8<32, CD8VT1>,
T8PD;
@@ -8705,6 +9329,7 @@ defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
EVEX_CD8<64, CD8VT1>, T8PD;
+}
/// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -8728,33 +9353,45 @@ multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
-let Uses = [MXCSR] in
multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86SchedWriteWidths sched> {
- defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched.ZMM,
- v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched.ZMM,
- v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
- // Define only if AVX512VL feature is present.
- let Predicates = [HasVLX] in {
- defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, sched.XMM, v4f32x_info>,
- EVEX_V128, EVEX_CD8<32, CD8VF>;
- defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"),
- OpNode, sched.YMM, v8f32x_info>,
- EVEX_V256, EVEX_CD8<32, CD8VF>;
- defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, sched.XMM, v2f64x_info>,
- EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
- defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"),
- OpNode, sched.YMM, v4f64x_info>,
- EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ let Uses = [MXCSR] in {
+ defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
+ v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
+ v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
}
-}
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
+ v32f16_info>, EVEX_V512, T_MAP6PD, EVEX_CD8<16, CD8VF>;
-defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SchedWriteFRsqrt>;
-defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SchedWriteFRcp>;
+ // Define only if AVX512VL feature is present.
+ let Predicates = [HasVLX], Uses = [MXCSR] in {
+ defm 14PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
+ OpNode, sched.XMM, v4f32x_info>,
+ EVEX_V128, EVEX_CD8<32, CD8VF>;
+ defm 14PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"),
+ OpNode, sched.YMM, v8f32x_info>,
+ EVEX_V256, EVEX_CD8<32, CD8VF>;
+ defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
+ OpNode, sched.XMM, v2f64x_info>,
+ EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
+ OpNode, sched.YMM, v4f64x_info>,
+ EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
+ OpNode, sched.XMM, v8f16x_info>,
+ EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
+ OpNode, sched.YMM, v16f16x_info>,
+ EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ }
+}
+
+defm VRSQRT : avx512_fp14_p_vl_all<0x4E, "vrsqrt", X86rsqrt14, SchedWriteFRsqrt>;
+defm VRCP : avx512_fp14_p_vl_all<0x4C, "vrcp", X86rcp14, SchedWriteFRcp>;
/// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
@@ -8784,20 +9421,29 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
- sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG;
+ sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
- sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+ sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
+}
+
+multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
+ let Predicates = [HasFP16] in
+ defm SHZ : avx512_fp28_s<opc, OpcodeStr#"sh", f16x_info, OpNode, OpNodeSAE, sched>,
+ EVEX_CD8<16, CD8VT1>, T_MAP6PD, EVEX_4V;
}
let Predicates = [HasERI] in {
defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
- SchedWriteFRcp.Scl>, T8PD, EVEX_4V;
+ SchedWriteFRcp.Scl>;
defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
- SchedWriteFRsqrt.Scl>, T8PD, EVEX_4V;
+ SchedWriteFRsqrt.Scl>;
}
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
- SchedWriteFRnd.Scl>, T8PD, EVEX_4V;
+ SchedWriteFRnd.Scl>,
+ avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
+ SchedWriteFRnd.Scl>;
/// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -8861,6 +9507,19 @@ multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
}
}
+multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeSAE, X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_fp28_p<opc, OpcodeStr#"ph", v32f16_info, OpNode, sched.ZMM>,
+ avx512_fp28_p_sae<opc, OpcodeStr#"ph", v32f16_info, OpNodeSAE, sched.ZMM>,
+ T_MAP6PD, EVEX_V512, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_fp28_p<opc, OpcodeStr#"ph", v8f16x_info, OpNode, sched.XMM>,
+ EVEX_V128, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_fp28_p<opc, OpcodeStr#"ph", v16f16x_info, OpNode, sched.YMM>,
+ EVEX_V256, T_MAP6PD, EVEX_CD8<16, CD8VF>;
+ }
+}
let Predicates = [HasERI] in {
defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
SchedWriteFRsqrt>, EVEX;
@@ -8871,6 +9530,8 @@ let Predicates = [HasERI] in {
}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
SchedWriteFRnd>,
+ avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
+ SchedWriteFRnd>,
avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexp,
SchedWriteFRnd>, EVEX;
@@ -8908,6 +9569,18 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR], mayRaiseFPException = 1 in
multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.ZMM, v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ let Predicates = [HasFP16, HasVLX] in {
+ defm PHZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.XMM, v8f16x_info>,
+ EVEX_V128, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ defm PHZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.YMM, v16f16x_info>,
+ EVEX_V256, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+ }
defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.ZMM, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -8934,6 +9607,10 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR] in
multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
+ let Predicates = [HasFP16] in
+ defm PHZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ph"),
+ sched.PH.ZMM, v32f16_info>,
+ EVEX_V512, T_MAP5PS, EVEX_CD8<16, CD8VF>;
defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"),
sched.PS.ZMM, v16f32_info>,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
@@ -8943,8 +9620,8 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
- X86VectorVTInfo _, string Name> {
- let ExeDomain = _.ExeDomain in {
+ X86VectorVTInfo _, string Name, Predicate prd = HasAVX512> {
+ let ExeDomain = _.ExeDomain, Predicates = [prd] in {
defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
@@ -8966,7 +9643,7 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
(i32 timm:$rc))>,
EVEX_B, EVEX_RC, Sched<[sched]>;
- let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in {
+ let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
(ins _.FRC:$src1, _.FRC:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -8979,13 +9656,13 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
}
}
- let Predicates = [HasAVX512] in {
+ let Predicates = [prd] in {
def : Pat<(_.EltVT (any_fsqrt _.FRC:$src)),
(!cast<Instruction>(Name#Zr)
(_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
}
- let Predicates = [HasAVX512, OptForSize] in {
+ let Predicates = [prd, OptForSize] in {
def : Pat<(_.EltVT (any_fsqrt (load addr:$src))),
(!cast<Instruction>(Name#Zm)
(_.EltVT (IMPLICIT_DEF)), addr:$src)>;
@@ -8994,6 +9671,8 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWri
multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
X86SchedWriteSizes sched> {
+ defm SHZ : avx512_sqrt_scalar<opc, OpcodeStr#"sh", sched.PH.Scl, f16x_info, NAME#"SH", HasFP16>,
+ EVEX_CD8<16, CD8VT1>, EVEX_4V, T_MAP5XS;
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
@@ -9058,6 +9737,12 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
}
+let Predicates = [HasFP16] in
+defm VRNDSCALESHZ : avx512_rndscale_scalar<0x0A, "vrndscalesh",
+ SchedWriteFRnd.Scl, f16x_info>,
+ AVX512PSIi8Base, TA, EVEX_4V,
+ EVEX_CD8<16, CD8VT1>;
+
defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
SchedWriteFRnd.Scl, f32x_info>,
AVX512AIi8Base, EVEX_4V, VEX_LIG,
@@ -9086,6 +9771,9 @@ multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
}
}
+defm : avx512_masked_scalar<fsqrt, "SQRTSHZ", X86Movsh,
+ (v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v8f16x_info,
+ fp16imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasFP16>;
defm : avx512_masked_scalar<fsqrt, "SQRTSSZ", X86Movss,
(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v4f32x_info,
fp32imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
@@ -9154,7 +9842,6 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
- X86VectorVTInfo DestInfo,
PatFrag truncFrag, PatFrag mtruncFrag,
string Name> {
@@ -9184,23 +9871,22 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128,
let Predicates = [HasVLX, prd] in {
defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, MaskNode128, sched,
VTSrcInfo.info128, DestInfoZ128, x86memopZ128>,
- avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
- truncFrag, mtruncFrag, NAME>, EVEX_V128;
+ avx512_trunc_mr_lowering<VTSrcInfo.info128, truncFrag,
+ mtruncFrag, NAME>, EVEX_V128;
defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, MaskNode256, sched,
VTSrcInfo.info256, DestInfoZ256, x86memopZ256>,
- avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
- truncFrag, mtruncFrag, NAME>, EVEX_V256;
+ avx512_trunc_mr_lowering<VTSrcInfo.info256, truncFrag,
+ mtruncFrag, NAME>, EVEX_V256;
}
let Predicates = [prd] in
defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, MaskNode512, sched,
VTSrcInfo.info512, DestInfoZ, x86memopZ>,
- avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
- truncFrag, mtruncFrag, NAME>, EVEX_V512;
+ avx512_trunc_mr_lowering<VTSrcInfo.info512, truncFrag,
+ mtruncFrag, NAME>, EVEX_V512;
}
-multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
- SDPatternOperator MaskNode,
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, PatFrag StoreNode,
PatFrag MaskedStoreNode, SDNode InVecNode,
SDPatternOperator InVecMaskNode> {
@@ -9271,17 +9957,16 @@ multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
}
-defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, select_trunc,
+defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb",
WriteShuffle256, truncstorevi8,
masked_truncstorevi8, X86vtrunc, X86vmtrunc>;
-defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, select_truncs,
+defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb",
WriteShuffle256, truncstore_s_vi8,
masked_truncstore_s_vi8, X86vtruncs,
X86vmtruncs>;
-defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
- select_truncus, WriteShuffle256,
- truncstore_us_vi8, masked_truncstore_us_vi8,
- X86vtruncus, X86vmtruncus>;
+defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb",
+ WriteShuffle256, truncstore_us_vi8,
+ masked_truncstore_us_vi8, X86vtruncus, X86vmtruncus>;
defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, select_trunc,
WriteShuffle256, truncstorevi16,
@@ -9454,8 +10139,9 @@ multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr,
}
multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr,
- SDNode OpNode, SDNode InVecNode, string ExtTy,
- X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+ SDNode InVecNode, string ExtTy,
+ X86FoldableSchedWrite sched,
+ PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info,
v16i8x_info, i16mem, LdFrag, InVecNode>,
@@ -9532,14 +10218,14 @@ multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr,
defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", zext, zext_invec, "z", WriteShuffle256>;
-defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext, zext_invec, "z", WriteShuffle256>;
+defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", zext_invec, "z", WriteShuffle256>;
defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", zext, zext_invec, "z", WriteShuffle256>;
defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", sext, sext_invec, "s", WriteShuffle256>;
defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", sext, sext_invec, "s", WriteShuffle256>;
-defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext, sext_invec, "s", WriteShuffle256>;
+defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", sext_invec, "s", WriteShuffle256>;
defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", sext, sext_invec, "s", WriteShuffle256>;
defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", sext, sext_invec, "s", WriteShuffle256>;
defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", sext, sext_invec, "s", WriteShuffle256>;
@@ -10304,24 +10990,26 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
bits<8> opcPs, bits<8> opcPd, SDPatternOperator OpNode,
SDPatternOperator MaskOpNode, SDNode OpNodeSAE,
X86SchedWriteWidths sched, Predicate prd>{
+ defm PH : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f16_info,
+ opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, HasFP16>,
+ AVX512PSIi8Base, TA, EVEX, EVEX_CD8<16, CD8VF>;
defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
opcPs, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
- EVEX_CD8<32, CD8VF>;
+ AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
- EVEX_CD8<64, CD8VF>, VEX_W;
+ AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
X86VReduce, X86VReduce, X86VReduceSAE,
- SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX;
+ SchedWriteFRnd, HasDQI>;
defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
X86any_VRndScale, X86VRndScale, X86VRndScaleSAE,
- SchedWriteFRnd, HasAVX512>,
- AVX512AIi8Base, EVEX;
+ SchedWriteFRnd, HasAVX512>;
defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
X86VGetMant, X86VGetMant, X86VGetMantSAE,
- SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX;
+ SchedWriteFRnd, HasAVX512>;
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, X86VRangeSAE,
@@ -10345,6 +11033,9 @@ defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
+ 0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasFP16>,
+ AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
@@ -10352,6 +11043,9 @@ defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+defm VGETMANTSH: avx512_common_fp_sae_scalar_imm<"vgetmantsh", f16x_info,
+ 0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasFP16>,
+ AVX512PSIi8Base, TA, VEX_LIG, EVEX_4V, EVEX_CD8<16, CD8VT1>;
multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched,
@@ -10770,7 +11464,7 @@ multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr,
}
}
-multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo> {
defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched.ZMM,
VTInfo.info512>, EVEX_V512;
@@ -10783,13 +11477,13 @@ multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
}
-multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
- defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched,
+ defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
avx512vl_f64_info>, XD, VEX_W;
}
-defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SchedWriteFShuffle>;
+defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
let Predicates = [HasVLX] in {
def : Pat<(v2f64 (X86VBroadcast f64:$src)),
@@ -10956,16 +11650,15 @@ defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
// VSHUFPS - VSHUFPD Operations
//===----------------------------------------------------------------------===//
-multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
- AVX512VLVectorVTInfo VTInfo_FP>{
+multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
SchedWriteFShuffle>,
EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
AVX512AIi8Base, EVEX_4V;
}
-defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
-defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
+defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - Byte shift Left/Right
@@ -11598,6 +12291,11 @@ defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSD", X86Movsd, v2f64x_
defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSD", X86Movsd, v2f64x_info, fp64imm0>;
defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSD", X86Movsd, v2f64x_info, fp64imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fadd, fadd, "ADDSH", X86Movsh, v8f16x_info, fp16imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fsub, fsub, "SUBSH", X86Movsh, v8f16x_info, fp16imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fmul, fmul, "MULSH", X86Movsh, v8f16x_info, fp16imm0>;
+defm : AVX512_scalar_math_fp_patterns<any_fdiv, fdiv, "DIVSH", X86Movsh, v8f16x_info, fp16imm0>;
+
multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string OpcPrefix,
SDNode Move, X86VectorVTInfo _> {
let Predicates = [HasAVX512] in {
@@ -11609,6 +12307,7 @@ multiclass AVX512_scalar_unary_math_patterns<SDPatternOperator OpNode, string Op
defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
+defm : AVX512_scalar_unary_math_patterns<any_fsqrt, "SQRTSH", X86Movsh, v8f16x_info>;
//===----------------------------------------------------------------------===//
// AES instructions
@@ -11671,13 +12370,13 @@ multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
- AVX512FMA3Base, Sched<[sched]>;
+ T8PD, EVEX_4V, Sched<[sched]>;
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.LdFrag addr:$src3))))>,
- AVX512FMA3Base,
+ T8PD, EVEX_4V,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@@ -11693,7 +12392,7 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
"$src2, ${src3}"#VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
- AVX512FMA3Base, EVEX_B,
+ T8PD, EVEX_4V, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@@ -12200,3 +12899,732 @@ let ExeDomain = SSEPackedSingle in
defm VDPBF16PS : avx512_dpbf16ps_sizes<0x52, "vdpbf16ps", X86dpbf16ps, SchedWriteFMA,
avx512vl_f32_info, avx512vl_i32_info,
HasBF16>, T8XS, EVEX_CD8<32, CD8VF>;
+
+//===----------------------------------------------------------------------===//
+// AVX512FP16
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasFP16] in {
+// Move word ( r/m16) to Packed word
+def VMOVW2SHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveFromGpr]>;
+def VMOVWrm : AVX512<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i16mem:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst,
+ (v8i16 (scalar_to_vector (loadi16 addr:$src))))]>,
+ T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFLoad]>;
+
+def : Pat<(f16 (bitconvert GR16:$src)),
+ (f16 (COPY_TO_REGCLASS
+ (VMOVW2SHrr
+ (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit)),
+ FR16X))>;
+def : Pat<(v8i16 (scalar_to_vector (i16 GR16:$src))),
+ (VMOVW2SHrr (INSERT_SUBREG (IMPLICIT_DEF), GR16:$src, sub_16bit))>;
+def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (and GR32:$src, 0xffff)))),
+ (VMOVW2SHrr GR32:$src)>;
+// FIXME: We should really find a way to improve these patterns.
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (and GR32:$src, 0xffff))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
+def : Pat<(v16i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (and GR32:$src, 0xffff))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVW2SHrr GR32:$src), sub_xmm)>;
+
+def : Pat<(v8i16 (X86vzmovl (v8i16 (scalar_to_vector (i16 (trunc GR32:$src)))))),
+ (VMOVW2SHrr GR32:$src)>;
+
+// AVX 128-bit movw instruction write zeros in the high 128-bit part.
+def : Pat<(v8i16 (X86vzload16 addr:$src)),
+ (VMOVWrm addr:$src)>;
+def : Pat<(v16i16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
+
+// Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
+def : Pat<(v32i16 (X86vzload16 addr:$src)),
+ (SUBREG_TO_REG (i32 0), (v8i16 (VMOVWrm addr:$src)), sub_xmm)>;
+
+def : Pat<(v4i32 (scalar_to_vector (i32 (extloadi16 addr:$src)))),
+ (VMOVWrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (scalar_to_vector (i32 (zextloadi16 addr:$src))))),
+ (VMOVWrm addr:$src)>;
+def : Pat<(v8i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (i32 (zextloadi16 addr:$src)))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
+def : Pat<(v16i32 (X86vzmovl
+ (insert_subvector undef,
+ (v4i32 (scalar_to_vector
+ (i32 (zextloadi16 addr:$src)))),
+ (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVWrm addr:$src), sub_xmm)>;
+
+// Move word from xmm register to r/m16
+def VMOVSH2Wrr : AVX512<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, Sched<[WriteVecMoveToGpr]>;
+def VMOVWmr : AVX512<0x7E, MRMDestMem, (outs),
+ (ins i16mem:$dst, VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(store (i16 (extractelt (v8i16 VR128X:$src),
+ (iPTR 0))), addr:$dst)]>,
+ T_MAP5PD, EVEX, EVEX_CD8<16, CD8VT1>, Sched<[WriteFStore]>;
+
+def : Pat<(i16 (bitconvert FR16X:$src)),
+ (i16 (EXTRACT_SUBREG
+ (VMOVSH2Wrr (COPY_TO_REGCLASS FR16X:$src, VR128X)),
+ sub_16bit))>;
+def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
+ (i16 (EXTRACT_SUBREG (VMOVSH2Wrr VR128X:$src), sub_16bit))>;
+}
+
+// Allow "vmovw" to use GR64
+let hasSideEffects = 0 in {
+ def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
+ def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
+}
+
+// Convert 16-bit float to i16/u16
+multiclass avx512_cvtph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ AVX512VLVectorVTInfo _Dst,
+ AVX512VLVectorVTInfo _Src,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNode, MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+ OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+ OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert 16-bit float to i16/u16 truncate
+multiclass avx512_cvttph2w<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNode, MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, _Dst.info512, _Src.info512,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info128, _Src.info128,
+ OpNode, MaskOpNode, sched.XMM>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, _Dst.info256, _Src.info256,
+ OpNode, MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+defm VCVTPH2UW : avx512_cvtph2w<0x7D, "vcvtph2uw", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, avx512vl_i16_info,
+ avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTUW2PH : avx512_cvtph2w<0x7D, "vcvtuw2ph", any_uint_to_fp, uint_to_fp,
+ X86VUintToFpRnd, avx512vl_f16_info,
+ avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5XD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2W : avx512_cvttph2w<0x7C, "vcvttph2w", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ avx512vl_i16_info, avx512vl_f16_info,
+ SchedWriteCvtPD2DQ>, T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTTPH2UW : avx512_cvttph2w<0x7C, "vcvttph2uw", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ avx512vl_i16_info, avx512vl_f16_info,
+ SchedWriteCvtPD2DQ>, T_MAP5PS, EVEX_CD8<16, CD8VF>;
+defm VCVTPH2W : avx512_cvtph2w<0x7D, "vcvtph2w", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, avx512vl_i16_info,
+ avx512vl_f16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5PD, EVEX_CD8<16, CD8VF>;
+defm VCVTW2PH : avx512_cvtph2w<0x7D, "vcvtw2ph", any_sint_to_fp, sint_to_fp,
+ X86VSintToFpRnd, avx512vl_f16_info,
+ avx512vl_i16_info, SchedWriteCvtPD2DQ>,
+ T_MAP5XS, EVEX_CD8<16, CD8VF>;
+
+// Convert Half to Signed/Unsigned Doubleword
+multiclass avx512_cvtph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+// Convert Half to Signed/Unsigned Doubleword with truncation
+multiclass avx512_cvttph2dq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to4}", "", f64mem>, EVEX_V128;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM>, EVEX_V256;
+ }
+}
+
+
+defm VCVTPH2DQ : avx512_cvtph2dq<0x5B, "vcvtph2dq", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VH>;
+defm VCVTPH2UDQ : avx512_cvtph2dq<0x79, "vcvtph2udq", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PS,
+ EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2DQ : avx512_cvttph2dq<0x5B, "vcvttph2dq", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5XS,
+ EVEX_CD8<16, CD8VH>;
+
+defm VCVTTPH2UDQ : avx512_cvttph2dq<0x78, "vcvttph2udq", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PS,
+ EVEX_CD8<16, CD8VH>;
+
+// Convert Half to Signed/Unsigned Quardword
+multiclass avx512_cvtph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v8f16x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to2}", "", f32mem>,
+ EVEX_V128;
+ // Explicitly specified broadcast string, since we take only 4 elements
+ // from v8f16x_info source
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM, "{1to4}", "", f64mem>,
+ EVEX_V256;
+ }
+}
+
+// Convert Half to Signed/Unsigned Quardword with truncation
+multiclass avx512_cvttph2qq<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.ZMM>,
+ avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f16x_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ // Explicitly specified broadcast string, since we take only 2 elements
+ // from v8f16x_info source
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.XMM, "{1to2}", "", f32mem>, EVEX_V128;
+ // Explicitly specified broadcast string, since we take only 4 elements
+ // from v8f16x_info source
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v8f16x_info, OpNode,
+ MaskOpNode, sched.YMM, "{1to4}", "", f64mem>, EVEX_V256;
+ }
+}
+
+defm VCVTPH2QQ : avx512_cvtph2qq<0x7B, "vcvtph2qq", X86cvtp2Int, X86cvtp2Int,
+ X86cvtp2IntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTPH2UQQ : avx512_cvtph2qq<0x79, "vcvtph2uqq", X86cvtp2UInt, X86cvtp2UInt,
+ X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2QQ : avx512_cvttph2qq<0x7A, "vcvttph2qq", X86any_cvttp2si,
+ X86cvttp2si, X86cvttp2siSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+defm VCVTTPH2UQQ : avx512_cvttph2qq<0x78, "vcvttph2uqq", X86any_cvttp2ui,
+ X86cvttp2ui, X86cvttp2uiSAE,
+ SchedWriteCvtPS2DQ>, T_MAP5PD,
+ EVEX_CD8<16, CD8VQ>;
+
+// Convert Signed/Unsigned Quardword to Half
+multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+ SDPatternOperator MaskOpNode, SDNode OpNodeRnd,
+ X86SchedWriteWidths sched> {
+ // we need "x"/"y"/"z" suffixes in order to distinguish between 128, 256 and
+ // 512 memory forms of these instructions in Asm Parcer. They have the same
+ // dest type - 'v8f16x_info'. We also specify the broadcast string explicitly
+ // due to the same reason.
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v8i64_info, OpNode,
+ MaskOpNode, sched.ZMM, "{1to8}", "{z}">,
+ avx512_vcvt_fp_rc<opc, OpcodeStr, v8f16x_info, v8i64_info,
+ OpNodeRnd, sched.ZMM>, EVEX_V512;
+ }
+ let Predicates = [HasFP16, HasVLX] in {
+ defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v2i64x_info,
+ null_frag, null_frag, sched.XMM, "{1to2}", "{x}",
+ i128mem, VK2WM>,
+ EVEX_V128, NotEVEX2VEXConvertible;
+ defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f16x_info, v4i64x_info,
+ null_frag, null_frag, sched.YMM, "{1to4}", "{y}",
+ i256mem, VK4WM>,
+ EVEX_V256, NotEVEX2VEXConvertible;
+ }
+
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst,
+ VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrk") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z128rrkz") VR128X:$dst,
+ VK2WM:$mask, VR128X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst|$dst, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbk") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"x\t{${src}{1to2}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to2}}",
+ (!cast<Instruction>(NAME # "Z128rmbkz") VR128X:$dst,
+ VK2WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst,
+ VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrk") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Z256rrkz") VR128X:$dst,
+ VK4WM:$mask, VR256X:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst|$dst, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbk") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"y\t{${src}{1to4}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to4}}",
+ (!cast<Instruction>(NAME # "Z256rmbkz") VR128X:$dst,
+ VK4WM:$mask, i64mem:$src), 0, "att">;
+
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst|$dst, $src}",
+ (!cast<Instruction>(NAME # "Zrr") VR128X:$dst,
+ VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}}|"
+ "$dst {${mask}}, $src}",
+ (!cast<Instruction>(NAME # "Zrrk") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{$src, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, $src}",
+ (!cast<Instruction>(NAME # "Zrrkz") VR128X:$dst,
+ VK8WM:$mask, VR512:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst|$dst, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmb") VR128X:$dst,
+ i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}}|"
+ "$dst {${mask}}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbk") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+ def : InstAlias<OpcodeStr#"z\t{${src}{1to8}, $dst {${mask}} {z}|"
+ "$dst {${mask}} {z}, ${src}{1to8}}",
+ (!cast<Instruction>(NAME # "Zrmbkz") VR128X:$dst,
+ VK8WM:$mask, i64mem:$src), 0, "att">;
+}
+
+defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
+ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
+ EVEX_CD8<64, CD8VF>;
+
+defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
+ X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
+ EVEX_CD8<64, CD8VF>;
+
+// Convert half to signed/unsigned int 32/64
+defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{l}", HasFP16>,
+ T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
+ X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
+ T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
+ T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
+ X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
+ T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
+ any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
+ "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ "{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
+ any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
+ "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+
+let Predicates = [HasFP16] in {
+ defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
+ v8f16x_info, i32mem, loadi32, "cvtsi2sh", "l">,
+ T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
+ v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
+ T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
+ v8f16x_info, i32mem, loadi32,
+ "cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
+ defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
+ v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
+ T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+ def : InstAlias<"vcvtusi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
+ (VCVTUSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
+
+
+ def : Pat<(f16 (any_sint_to_fp (loadi32 addr:$src))),
+ (VCVTSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f16 (any_sint_to_fp (loadi64 addr:$src))),
+ (VCVTSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f16 (any_sint_to_fp GR32:$src)),
+ (VCVTSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f16 (any_sint_to_fp GR64:$src)),
+ (VCVTSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+ def : Pat<(f16 (any_uint_to_fp (loadi32 addr:$src))),
+ (VCVTUSI2SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+ def : Pat<(f16 (any_uint_to_fp (loadi64 addr:$src))),
+ (VCVTUSI642SHZrm (f16 (IMPLICIT_DEF)), addr:$src)>;
+
+ def : Pat<(f16 (any_uint_to_fp GR32:$src)),
+ (VCVTUSI2SHZrr (f16 (IMPLICIT_DEF)), GR32:$src)>;
+ def : Pat<(f16 (any_uint_to_fp GR64:$src)),
+ (VCVTUSI642SHZrr (f16 (IMPLICIT_DEF)), GR64:$src)>;
+
+ // Patterns used for matching vcvtsi2sh intrinsic sequences from clang
+ // which produce unnecessary vmovsh instructions
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR64:$src)))))),
+ (VCVTSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi64 addr:$src))))))),
+ (VCVTSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp GR32:$src)))))),
+ (VCVTSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_sint_to_fp (loadi32 addr:$src))))))),
+ (VCVTSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR64:$src)))))),
+ (VCVTUSI642SHZrr_Int VR128X:$dst, GR64:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi64 addr:$src))))))),
+ (VCVTUSI642SHZrm_Int VR128X:$dst, addr:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp GR32:$src)))))),
+ (VCVTUSI2SHZrr_Int VR128X:$dst, GR32:$src)>;
+
+ def : Pat<(v8f16 (X86Movsh
+ (v8f16 VR128X:$dst),
+ (v8f16 (scalar_to_vector (f16 (any_uint_to_fp (loadi32 addr:$src))))))),
+ (VCVTUSI2SHZrm_Int VR128X:$dst, addr:$src)>;
+} // Predicates = [HasFP16]
+
+let Predicates = [HasFP16, HasVLX] in {
+ // Special patterns to allow use of X86VMSintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8f16 (X86any_VSintToFP (v4i64 VR256X:$src))),
+ (VCVTQQ2PHZ256rr VR256X:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (loadv4i64 addr:$src))),
+ (VCVTQQ2PHZ256rm addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTQQ2PHZ256rmb addr:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v2i64 VR128X:$src))),
+ (VCVTQQ2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (loadv2i64 addr:$src))),
+ (VCVTQQ2PHZ128rm addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VSintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTQQ2PHZ128rmb addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMSintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+
+ // Special patterns to allow use of X86VMUintToFP for masking. Instruction
+ // patterns have been disabled with null_frag.
+ def : Pat<(v8f16 (X86any_VUintToFP (v4i64 VR256X:$src))),
+ (VCVTUQQ2PHZ256rr VR256X:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rrk VR128X:$src0, VK4WM:$mask, VR256X:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 VR256X:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rrkz VK4WM:$mask, VR256X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (loadv4i64 addr:$src))),
+ (VCVTUQQ2PHZ256rm addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), (v8f16 VR128X:$src0),
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv4i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v4i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTUQQ2PHZ256rmb addr:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmbk VR128X:$src0, VK4WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (v4i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK4WM:$mask),
+ (VCVTUQQ2PHZ256rmbkz VK4WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v2i64 VR128X:$src))),
+ (VCVTUQQ2PHZ128rr VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 VR128X:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rrkz VK2WM:$mask, VR128X:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (loadv2i64 addr:$src))),
+ (VCVTUQQ2PHZ128rm addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), (v8f16 VR128X:$src0),
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (loadv2i64 addr:$src), v8f16x_info.ImmAllZerosV,
+ VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmkz VK2WM:$mask, addr:$src)>;
+
+ def : Pat<(v8f16 (X86any_VUintToFP (v2i64 (X86VBroadcastld64 addr:$src)))),
+ (VCVTUQQ2PHZ128rmb addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ (v8f16 VR128X:$src0), VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmbk VR128X:$src0, VK2WM:$mask, addr:$src)>;
+ def : Pat<(X86VMUintToFP (v2i64 (X86VBroadcastld64 addr:$src)),
+ v8f16x_info.ImmAllZerosV, VK2WM:$mask),
+ (VCVTUQQ2PHZ128rmbkz VK2WM:$mask, addr:$src)>;
+}
+
+let Constraints = "@earlyclobber $dst, $src1 = $dst" in {
+ multiclass avx512_cfmaop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _, bit IsCommutable> {
+ defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), IsCommutable>, EVEX_4V;
+
+ defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.MemOp:$src3),
+ OpcodeStr, "$src3, $src2", "$src2, $src3",
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, EVEX_4V;
+
+ defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.ScalarMemOp:$src3),
+ OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr),
+ (_.VT (OpNode _.RC:$src2, (_.VT (_.BroadcastLdFrag addr:$src3)), _.RC:$src1))>, EVEX_B, EVEX_4V;
+ }
+} // Constraints = "@earlyclobber $dst, $src1 = $dst"
+
+multiclass avx512_cfmaop_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ X86VectorVTInfo _> {
+ let Constraints = "@earlyclobber $dst, $src1 = $dst" in
+ defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+ OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 timm:$rc)))>,
+ EVEX_4V, EVEX_B, EVEX_RC;
+}
+
+
+multiclass avx512_cfmaop_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, bit IsCommutable> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v16f32_info, IsCommutable>,
+ avx512_cfmaop_round<opc, OpcodeStr, OpNodeRnd, v16f32_info>,
+ EVEX_V512, Sched<[WriteFMAZ]>;
+ }
+ let Predicates = [HasVLX, HasFP16] in {
+ defm Z256 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v8f32x_info, IsCommutable>, EVEX_V256, Sched<[WriteFMAY]>;
+ defm Z128 : avx512_cfmaop_rm<opc, OpcodeStr, OpNode, v4f32x_info, IsCommutable>, EVEX_V128, Sched<[WriteFMAX]>;
+ }
+}
+
+multiclass avx512_cfmulop_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode MaskOpNode, SDNode OpNodeRnd, bit IsCommutable> {
+ let Predicates = [HasFP16] in {
+ defm Z : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v16f32_info,
+ WriteFMAZ, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>,
+ avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, WriteFMAZ, v16f32_info,
+ "", "@earlyclobber $dst">, EVEX_V512;
+ }
+ let Predicates = [HasVLX, HasFP16] in {
+ defm Z256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f32x_info,
+ WriteFMAY, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V256;
+ defm Z128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f32x_info,
+ WriteFMAX, IsCommutable, IsCommutable, "", "@earlyclobber $dst", 0>, EVEX_V128;
+ }
+}
+
+
+let Uses = [MXCSR] in {
+ defm VFMADDCPH : avx512_cfmaop_common<0x56, "vfmaddcph", x86vfmaddc, x86vfmaddcRnd, 1>,
+ T_MAP6XS, EVEX_CD8<32, CD8VF>;
+ defm VFCMADDCPH : avx512_cfmaop_common<0x56, "vfcmaddcph", x86vfcmaddc, x86vfcmaddcRnd, 0>,
+ T_MAP6XD, EVEX_CD8<32, CD8VF>;
+
+ defm VFMULCPH : avx512_cfmulop_common<0xD6, "vfmulcph", x86vfmulc, x86vfmulc,
+ x86vfmulcRnd, 1>, T_MAP6XS, EVEX_CD8<32, CD8VF>;
+ defm VFCMULCPH : avx512_cfmulop_common<0xD6, "vfcmulcph", x86vfcmulc,
+ x86vfcmulc, x86vfcmulcRnd, 0>, T_MAP6XD, EVEX_CD8<32, CD8VF>;
+}
+
+
+multiclass avx512_cfmaop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd,
+ bit IsCommutable> {
+ let Predicates = [HasFP16], Constraints = "@earlyclobber $dst, $src1 = $dst" in {
+ defm r : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src2, VR128X:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3",
+ (v4f32 (OpNode VR128X:$src2, VR128X:$src3, VR128X:$src1)), IsCommutable>,
+ Sched<[WriteFMAX]>;
+ defm m : AVX512_maskable_3src<opc, MRMSrcMem, v4f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src2, ssmem:$src3), OpcodeStr,
+ "$src3, $src2", "$src2, $src3",
+ (v4f32 (OpNode VR128X:$src2, (sse_load_f32 addr:$src3), VR128X:$src1))>,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ defm rb : AVX512_maskable_3src<opc, MRMSrcReg, v4f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src2, VR128X:$src3, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src3, $src2", "$src2, $src3, $rc",
+ (v4f32 (OpNodeRnd VR128X:$src2, VR128X:$src3, VR128X:$src1, (i32 timm:$rc)))>,
+ EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
+ }
+}
+
+multiclass avx512_cfmbinop_sh_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ SDNode OpNodeRnd, bit IsCommutable> {
+ let Predicates = [HasFP16] in {
+ defm rr : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (v4f32 (OpNode VR128X:$src1, VR128X:$src2)),
+ IsCommutable, IsCommutable, IsCommutable,
+ X86selects, "@earlyclobber $dst">, Sched<[WriteFMAX]>;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src1, ssmem:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (v4f32 (OpNode VR128X:$src1, (sse_load_f32 addr:$src2))),
+ 0, 0, 0, X86selects, "@earlyclobber $dst">,
+ Sched<[WriteFMAX.Folded, WriteFMAX.ReadAfterFold]>;
+ defm rrb : AVX512_maskable<opc, MRMSrcReg, f32x_info, (outs VR128X:$dst),
+ (ins VR128X:$src1, VR128X:$src2, AVX512RC:$rc), OpcodeStr,
+ "$rc, $src2, $src1", "$src1, $src2, $rc",
+ (OpNodeRnd (v4f32 VR128X:$src1), (v4f32 VR128X:$src2), (i32 timm:$rc)),
+ 0, 0, 0, X86selects, "@earlyclobber $dst">,
+ EVEX_B, EVEX_RC, Sched<[WriteFMAX]>;
+ }
+}
+
+let Uses = [MXCSR] in {
+ defm VFMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfmaddcsh", x86vfmaddcSh, x86vfmaddcShRnd, 1>,
+ T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
+ defm VFCMADDCSHZ : avx512_cfmaop_sh_common<0x57, "vfcmaddcsh", x86vfcmaddcSh, x86vfcmaddcShRnd, 0>,
+ T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, EVEX_4V;
+
+ defm VFMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfmulcsh", x86vfmulcSh, x86vfmulcShRnd, 1>,
+ T_MAP6XS, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
+ defm VFCMULCSHZ : avx512_cfmbinop_sh_common<0xD7, "vfcmulcsh", x86vfcmulcSh, x86vfcmulcShRnd, 0>,
+ T_MAP6XD, EVEX_CD8<32, CD8VT1>, EVEX_V128, VEX_LIG, EVEX_4V;
+}
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index e83e1e74ff52..8337d2b37383 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -708,6 +708,19 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+// BinOpRM - Instructions like "adc reg, reg, [mem]".
+// There is an implicit register read at the end of the operand sequence.
+class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+ dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
+ : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+ (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+ mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ // base, scale, index, offset, segment.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // implicit register read.
+ sched.ReadAfterFold]>;
+
// BinOpRM_F - Instructions like "cmp reg, [mem]".
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
@@ -725,7 +738,7 @@ class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
- : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
+ : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
EFLAGS))]>;
@@ -805,7 +818,11 @@ class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteALURMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteALURMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold]>; // reg
// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -813,7 +830,12 @@ class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
+ (implicit EFLAGS)]>, Sched<[WriteADCRMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold, // reg
+ WriteALU.ReadAfterFold]>; // EFLAGS
// BinOpMR_F - Instructions like "cmp [mem], reg".
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -1475,13 +1497,17 @@ multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
let hasSideEffects = 0 in {
def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
- []>, T8XD, VEX_4V, Sched<[sched, WriteIMulH]>;
+ []>, T8XD, VEX_4V, Sched<[WriteIMulH, sched]>;
let mayLoad = 1 in
def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
!strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
-
- []>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>;
+ []>, T8XD, VEX_4V,
+ Sched<[WriteIMulHLd, sched.Folded,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Implicit read of EDX/RDX
+ sched.ReadAfterFold]>;
// Pseudo instructions to be used when the low result isn't used. The
// instruction is defined to keep the high if both destinations are the same.
@@ -1496,9 +1522,9 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2] in {
let Uses = [EDX] in
- defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>;
+ defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
let Uses = [RDX] in
- defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W;
+ defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, VEX_W;
}
//===----------------------------------------------------------------------===//
@@ -1525,7 +1551,12 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
"adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
} // SchedRW
- let mayLoad = 1, SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold] in {
+ let mayLoad = 1,
+ SchedRW = [WriteADC.Folded, WriteADC.ReadAfterFold,
+ // Memory operand.
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ // Implicit read of EFLAGS
+ WriteADC.ReadAfterFold] in {
def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 202d320cd731..ba52283b570d 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -69,16 +69,12 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2),
let SchedRW = [WriteSystem] in {
// x86-64 va_start lowering magic.
-let hasSideEffects = 1, Defs = [EFLAGS] in {
+let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in {
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
(outs),
- (ins GR8:$al,
- i32imm:$regsavefi, i32imm:$offset,
- variable_ops),
- "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
- [(X86vastart_save_xmm_regs GR8:$al,
- timm:$regsavefi,
- timm:$offset),
+ (ins GR8:$al, i8mem:$regsavefi, variable_ops),
+ "#VASTART_SAVE_XMM_REGS $al, $regsavefi",
+ [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi),
(implicit EFLAGS)]>;
}
@@ -153,15 +149,15 @@ def STACKALLOC_W_PROBING : I<0, Pseudo, (outs), (ins i64imm:$stacksize),
// (compared to ordinary calls) like stack pointer change.
let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
-def WIN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size),
+def DYN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size),
"# dynamic stack allocation",
- [(X86WinAlloca GR32:$size)]>,
+ [(X86DynAlloca GR32:$size)]>,
Requires<[NotLP64]>;
let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
-def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
+def DYN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
"# dynamic stack allocation",
- [(X86WinAlloca GR64:$size)]>,
+ [(X86DynAlloca GR64:$size)]>,
Requires<[In64BitMode]>;
} // SchedRW
@@ -261,6 +257,17 @@ let isPseudo = 1, SchedRW = [WriteSystem] in {
}
//===----------------------------------------------------------------------===//
+// Pseudo instructions used by address sanitizer.
+//===----------------------------------------------------------------------===//
+let
+ Defs = [R8, EFLAGS] in {
+def ASAN_CHECK_MEMACCESS : PseudoI<
+ (outs), (ins GR64NoR8:$addr, i32imm:$accessinfo),
+ [(int_asan_check_memaccess GR64NoR8:$addr, (i32 timm:$accessinfo))]>,
+ Sched<[]>;
+}
+
+//===----------------------------------------------------------------------===//
// Pseudo instructions used by segmented stacks.
//
@@ -555,6 +562,7 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
let Predicates = [HasMMX] in
defm _VR64 : CMOVrr_PSEUDO<VR64, x86mmx>;
+ defm _FR16X : CMOVrr_PSEUDO<FR16X, f16>;
let Predicates = [HasSSE1,NoAVX512] in
defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
let Predicates = [HasSSE2,NoAVX512] in
@@ -612,6 +620,8 @@ let Predicates = [HasVLX] in {
(CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
(CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
+ def : Pat<(v8f16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
(CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
@@ -623,6 +633,8 @@ let Predicates = [HasVLX] in {
(CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
(CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
+ def : Pat<(v16f16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
(CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
@@ -635,6 +647,8 @@ def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
(CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
(CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
+def : Pat<(v32f16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
+ (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
(CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
@@ -953,7 +967,7 @@ multiclass ATOMIC_RMW_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
!strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
[(set
GR32:$dst,
- (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))]>,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))]>,
OpSize32;
def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$val, i64mem:$ptr),
@@ -1197,10 +1211,10 @@ def : Pat<(X86call (i64 tglobaladdr:$dst)),
def : Pat<(X86call (i64 texternalsym:$dst)),
(CALL64pcrel32 texternalsym:$dst)>;
-def : Pat<(X86call_rvmarker (timm:$sel), (i64 texternalsym:$dst)),
- (CALL64pcrel32_RVMARKER timm:$sel, texternalsym:$dst)>;
-def : Pat<(X86call_rvmarker (timm:$sel), (i64 tglobaladdr:$dst)),
- (CALL64pcrel32_RVMARKER timm:$sel, tglobaladdr:$dst)>;
+def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)),
+ (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, texternalsym:$dst)>;
+def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
+ (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>;
// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index a6cb17f17a17..6d969962afff 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -20,30 +20,30 @@
// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
- def RETL : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ def RET32 : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{l}", []>, OpSize32, Requires<[Not64BitMode]>;
- def RETQ : I <0xC3, RawFrm, (outs), (ins variable_ops),
+ def RET64 : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret{q}", []>, OpSize32, Requires<[In64BitMode]>;
- def RETW : I <0xC3, RawFrm, (outs), (ins),
+ def RET16 : I <0xC3, RawFrm, (outs), (ins),
"ret{w}", []>, OpSize16;
- def RETIL : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ def RETI32 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{l}\t$amt", []>, OpSize32, Requires<[Not64BitMode]>;
- def RETIQ : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+ def RETI64 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
"ret{q}\t$amt", []>, OpSize32, Requires<[In64BitMode]>;
- def RETIW : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
+ def RETI16 : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt),
"ret{w}\t$amt", []>, OpSize16;
- def LRETL : I <0xCB, RawFrm, (outs), (ins),
+ def LRET32 : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{l|f}", []>, OpSize32;
- def LRETQ : RI <0xCB, RawFrm, (outs), (ins),
+ def LRET64 : RI <0xCB, RawFrm, (outs), (ins),
"{l}ret{|f}q", []>, Requires<[In64BitMode]>;
- def LRETW : I <0xCB, RawFrm, (outs), (ins),
+ def LRET16 : I <0xCB, RawFrm, (outs), (ins),
"{l}ret{w|f}", []>, OpSize16;
- def LRETIL : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "{l}ret{l|f}\t$amt", []>, OpSize32;
- def LRETIQ : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
- def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
- "{l}ret{w|f}\t$amt", []>, OpSize16;
+ def LRETI32 : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{l|f}\t$amt", []>, OpSize32;
+ def LRETI64 : RIi16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{|f}q\t$amt", []>, Requires<[In64BitMode]>;
+ def LRETI16 : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+ "{l}ret{w|f}\t$amt", []>, OpSize16;
// The machine return from interrupt instruction, but sometimes we need to
// perform a post-epilogue stack adjustment. Codegen emits the pseudo form
@@ -419,15 +419,15 @@ let isPseudo = 1, isCall = 1, isCodeGenOnly = 1,
Uses = [RSP, SSP],
SchedRW = [WriteJump] in {
def CALL64m_RVMARKER :
- PseudoI<(outs), (ins i32imm:$sel, i64mem:$dst), [(X86call_rvmarker timm:$sel, (loadi64 addr:$dst))]>,
+ PseudoI<(outs), (ins i64imm:$rvfunc, i64mem:$dst), [(X86call_rvmarker tglobaladdr:$rvfunc, (loadi64 addr:$dst))]>,
Requires<[In64BitMode]>;
def CALL64r_RVMARKER :
- PseudoI<(outs), (ins i32imm:$sel, GR64:$dst), [(X86call_rvmarker timm:$sel, GR64:$dst)]>,
+ PseudoI<(outs), (ins i64imm:$rvfunc, GR64:$dst), [(X86call_rvmarker tglobaladdr:$rvfunc, GR64:$dst)]>,
Requires<[In64BitMode]>;
def CALL64pcrel32_RVMARKER :
- PseudoI<(outs), (ins i32imm:$sel, i64i32imm_brtarget:$dst), []>,
+ PseudoI<(outs), (ins i64imm:$rvfunc, i64i32imm_brtarget:$dst), []>,
Requires<[In64BitMode]>;
}
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td
index 27328fe42c44..1f92293fa73f 100644
--- a/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/llvm/lib/Target/X86/X86InstrFMA.td
@@ -427,7 +427,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
- ValueType VT, X86FoldableSchedWrite sched> {
+ X86FoldableSchedWrite sched> {
let isCodeGenOnly = 1, hasSideEffects = 0,
Uses = [MXCSR], mayRaiseFPException = 1 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
@@ -540,20 +540,16 @@ let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, any_fma, loadf32,
SchedWriteFMA.Scl>,
- fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6A, "vfmaddss", ssmem, SchedWriteFMA.Scl>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86any_Fmsub, loadf32,
SchedWriteFMA.Scl>,
- fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6E, "vfmsubss", ssmem, SchedWriteFMA.Scl>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
X86any_Fnmadd, loadf32, SchedWriteFMA.Scl>,
- fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7A, "vfnmaddss", ssmem, SchedWriteFMA.Scl>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
X86any_Fnmsub, loadf32, SchedWriteFMA.Scl>,
- fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7E, "vfnmsubss", ssmem, SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", any_fma, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
@@ -573,20 +569,16 @@ let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, any_fma, loadf64,
SchedWriteFMA.Scl>,
- fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6B, "vfmaddsd", sdmem, SchedWriteFMA.Scl>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86any_Fmsub, loadf64,
SchedWriteFMA.Scl>,
- fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x6F, "vfmsubsd", sdmem, SchedWriteFMA.Scl>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
X86any_Fnmadd, loadf64, SchedWriteFMA.Scl>,
- fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, SchedWriteFMA.Scl>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
X86any_Fnmsub, loadf64, SchedWriteFMA.Scl>,
- fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
- SchedWriteFMA.Scl>;
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", any_fma, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
@@ -603,8 +595,8 @@ let ExeDomain = SSEPackedDouble in {
}
multiclass scalar_fma4_patterns<SDPatternOperator Op, string Name,
- ValueType VT, ValueType EltVT,
- RegisterClass RC, PatFrag mem_frag> {
+ ValueType VT, RegisterClass RC,
+ PatFrag mem_frag> {
let Predicates = [HasFMA4] in {
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, RC:$src2, RC:$src3))))),
@@ -629,12 +621,12 @@ multiclass scalar_fma4_patterns<SDPatternOperator Op, string Name,
}
}
-defm : scalar_fma4_patterns<any_fma, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSS4", v4f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSS4", v4f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSS4", v4f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSS4", v4f32, FR32, loadf32>;
-defm : scalar_fma4_patterns<any_fma, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
-defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<any_fma, "VFMADDSD4", v2f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fmsub, "VFMSUBSD4", v2f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fnmadd, "VFNMADDSD4", v2f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86any_Fnmsub, "VFNMSUBSD4", v2f64, FR64, loadf64>;
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index 6d803e931b68..52b2a62316cd 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -28,35 +28,43 @@ using namespace llvm;
FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
-#define FMA3GROUP_PACKED_WIDTHS(Name, Suf, Attrs) \
- FMA3GROUP(Name, Suf##Ym, Attrs) \
- FMA3GROUP(Name, Suf##Yr, Attrs) \
+#define FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
+
+#define FMA3GROUP_PACKED_WIDTHS_ALL(Name, Suf, Attrs) \
+ FMA3GROUP(Name, Suf##Ym, Attrs) \
+ FMA3GROUP(Name, Suf##Yr, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##m, Attrs) \
FMA3GROUP(Name, Suf##r, Attrs)
#define FMA3GROUP_PACKED(Name, Attrs) \
- FMA3GROUP_PACKED_WIDTHS(Name, PD, Attrs) \
- FMA3GROUP_PACKED_WIDTHS(Name, PS, Attrs)
+ FMA3GROUP_PACKED_WIDTHS_ALL(Name, PD, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS_Z(Name, PH, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS_ALL(Name, PS, Attrs)
-#define FMA3GROUP_SCALAR_WIDTHS(Name, Suf, Attrs) \
+#define FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##Zm, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
FMA3GROUP(Name, Suf##Zr, Attrs) \
FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+
+#define FMA3GROUP_SCALAR_WIDTHS_ALL(Name, Suf, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS_Z(Name, Suf, Attrs) \
FMA3GROUP(Name, Suf##m, Attrs) \
FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
FMA3GROUP(Name, Suf##r, Attrs) \
FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
#define FMA3GROUP_SCALAR(Name, Attrs) \
- FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
- FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs)
+ FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SD, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS_Z(Name, SH, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS_ALL(Name, SS, Attrs)
#define FMA3GROUP_FULL(Name, Attrs) \
FMA3GROUP_PACKED(Name, Attrs) \
@@ -78,15 +86,19 @@ static const X86InstrFMA3Group Groups[] = {
#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PH, Suf, Attrs) \
FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, PHZ##Suf, Attrs) \
FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
#define FMA3GROUP_SCALAR_AVX512_ROUND(Name, Suf, Attrs) \
FMA3GROUP(Name, SDZ##Suf, Attrs) \
FMA3GROUP_MASKED(Name, SDZ##Suf##_Int, Attrs) \
+ FMA3GROUP(Name, SHZ##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, SHZ##Suf##_Int, Attrs) \
FMA3GROUP(Name, SSZ##Suf, Attrs) \
FMA3GROUP_MASKED(Name, SSZ##Suf##_Int, Attrs)
@@ -130,14 +142,16 @@ const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
// FMA3 instructions have a well defined encoding pattern we can exploit.
uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
- bool IsFMA3 = ((TSFlags & X86II::EncodingMask) == X86II::VEX ||
- (TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
- (TSFlags & X86II::OpMapMask) == X86II::T8 &&
- (TSFlags & X86II::OpPrefixMask) == X86II::PD &&
- ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
- (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
- (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
- if (!IsFMA3)
+ bool IsFMA3Opcode = ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
+ (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
+ (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
+ bool IsFMA3Encoding = ((TSFlags & X86II::EncodingMask) == X86II::VEX &&
+ (TSFlags & X86II::OpMapMask) == X86II::T8) ||
+ ((TSFlags & X86II::EncodingMask) == X86II::EVEX &&
+ ((TSFlags & X86II::OpMapMask) == X86II::T8 ||
+ (TSFlags & X86II::OpMapMask) == X86II::T_MAP6));
+ bool IsFMA3Prefix = (TSFlags & X86II::OpPrefixMask) == X86II::PD;
+ if (!IsFMA3Opcode || !IsFMA3Encoding || !IsFMA3Prefix)
return nullptr;
verifyTables();
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index cda28d18f4aa..e310f369be08 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -377,7 +377,7 @@ def TST_F : FPI<0xD9, MRM_E4, (outs), (ins), "ftst">;
} // SchedRW
} // Uses = [FPCW], mayRaiseFPException = 1
-let SchedRW = [WriteFTest] in {
+let SchedRW = [WriteFTest], Defs = [FPSW] in {
def XAM_Fp32 : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
def XAM_Fp64 : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
def XAM_Fp80 : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 17fe7f0bd310..6d4ad08842c7 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -815,10 +815,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0 },
{ X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0 },
{ X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0 },
+ { X86::VFPCLASSPHZ128rr, X86::VFPCLASSPHZ128rm, 0 },
+ { X86::VFPCLASSPHZ256rr, X86::VFPCLASSPHZ256rm, 0 },
+ { X86::VFPCLASSPHZrr, X86::VFPCLASSPHZrm, 0 },
{ X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rm, 0 },
{ X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rm, 0 },
{ X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrm, 0 },
{ X86::VFPCLASSSDZrr, X86::VFPCLASSSDZrm, TB_NO_REVERSE },
+ { X86::VFPCLASSSHZrr, X86::VFPCLASSSHZrm, TB_NO_REVERSE },
{ X86::VFPCLASSSSZrr, X86::VFPCLASSSSZrm, TB_NO_REVERSE },
{ X86::VFRCZPDYrr, X86::VFRCZPDYrm, 0 },
{ X86::VFRCZPDrr, X86::VFRCZPDrm, 0 },
@@ -829,12 +833,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0 },
{ X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0 },
{ X86::VGETEXPPDZr, X86::VGETEXPPDZm, 0 },
+ { X86::VGETEXPPHZ128r, X86::VGETEXPPHZ128m, 0 },
+ { X86::VGETEXPPHZ256r, X86::VGETEXPPHZ256m, 0 },
+ { X86::VGETEXPPHZr, X86::VGETEXPPHZm, 0 },
{ X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128m, 0 },
{ X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256m, 0 },
{ X86::VGETEXPPSZr, X86::VGETEXPPSZm, 0 },
{ X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmi, 0 },
{ X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmi, 0 },
{ X86::VGETMANTPDZrri, X86::VGETMANTPDZrmi, 0 },
+ { X86::VGETMANTPHZ128rri, X86::VGETMANTPHZ128rmi, 0 },
+ { X86::VGETMANTPHZ256rri, X86::VGETMANTPHZ256rmi, 0 },
+ { X86::VGETMANTPHZrri, X86::VGETMANTPHZrmi, 0 },
{ X86::VGETMANTPSZ128rri, X86::VGETMANTPSZ128rmi, 0 },
{ X86::VGETMANTPSZ256rri, X86::VGETMANTPSZ256rmi, 0 },
{ X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
@@ -1161,17 +1171,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VRCP14PSZr, X86::VRCP14PSZm, 0 },
{ X86::VRCP28PDZr, X86::VRCP28PDZm, 0 },
{ X86::VRCP28PSZr, X86::VRCP28PSZm, 0 },
+ { X86::VRCPPHZ128r, X86::VRCPPHZ128m, 0 },
+ { X86::VRCPPHZ256r, X86::VRCPPHZ256m, 0 },
+ { X86::VRCPPHZr, X86::VRCPPHZm, 0 },
{ X86::VRCPPSYr, X86::VRCPPSYm, 0 },
{ X86::VRCPPSr, X86::VRCPPSm, 0 },
{ X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmi, 0 },
{ X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmi, 0 },
{ X86::VREDUCEPDZrri, X86::VREDUCEPDZrmi, 0 },
+ { X86::VREDUCEPHZ128rri, X86::VREDUCEPHZ128rmi, 0 },
+ { X86::VREDUCEPHZ256rri, X86::VREDUCEPHZ256rmi, 0 },
+ { X86::VREDUCEPHZrri, X86::VREDUCEPHZrmi, 0 },
{ X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmi, 0 },
{ X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmi, 0 },
{ X86::VREDUCEPSZrri, X86::VREDUCEPSZrmi, 0 },
{ X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmi, 0 },
{ X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmi, 0 },
{ X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmi, 0 },
+ { X86::VRNDSCALEPHZ128rri, X86::VRNDSCALEPHZ128rmi, 0 },
+ { X86::VRNDSCALEPHZ256rri, X86::VRNDSCALEPHZ256rmi, 0 },
+ { X86::VRNDSCALEPHZrri, X86::VRNDSCALEPHZrmi, 0 },
{ X86::VRNDSCALEPSZ128rri, X86::VRNDSCALEPSZ128rmi, 0 },
{ X86::VRNDSCALEPSZ256rri, X86::VRNDSCALEPSZ256rmi, 0 },
{ X86::VRNDSCALEPSZrri, X86::VRNDSCALEPSZrmi, 0 },
@@ -1187,6 +1206,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0 },
{ X86::VRSQRT28PDZr, X86::VRSQRT28PDZm, 0 },
{ X86::VRSQRT28PSZr, X86::VRSQRT28PSZm, 0 },
+ { X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128m, 0 },
+ { X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256m, 0 },
+ { X86::VRSQRTPHZr, X86::VRSQRTPHZm, 0 },
{ X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
{ X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
{ X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
@@ -1194,6 +1216,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
{ X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0 },
{ X86::VSQRTPDZr, X86::VSQRTPDZm, 0 },
{ X86::VSQRTPDr, X86::VSQRTPDm, 0 },
+ { X86::VSQRTPHZ128r, X86::VSQRTPHZ128m, 0 },
+ { X86::VSQRTPHZ256r, X86::VSQRTPHZ256m, 0 },
+ { X86::VSQRTPHZr, X86::VSQRTPHZm, 0 },
{ X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
{ X86::VSQRTPSZ128r, X86::VSQRTPSZ128m, 0 },
{ X86::VSQRTPSZ256r, X86::VSQRTPSZ256m, 0 },
@@ -1550,6 +1575,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
{ X86::VADDPDZrr, X86::VADDPDZrm, 0 },
{ X86::VADDPDrr, X86::VADDPDrm, 0 },
+ { X86::VADDPHZ128rr, X86::VADDPHZ128rm, 0 },
+ { X86::VADDPHZ256rr, X86::VADDPHZ256rm, 0 },
+ { X86::VADDPHZrr, X86::VADDPHZrm, 0 },
{ X86::VADDPSYrr, X86::VADDPSYrm, 0 },
{ X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
{ X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
@@ -1559,6 +1587,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE },
{ X86::VADDSDrr, X86::VADDSDrm, 0 },
{ X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE },
+ { X86::VADDSHZrr, X86::VADDSHZrm, 0 },
+ { X86::VADDSHZrr_Int, X86::VADDSHZrm_Int, TB_NO_REVERSE },
{ X86::VADDSSZrr, X86::VADDSSZrm, 0 },
{ X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE },
{ X86::VADDSSrr, X86::VADDSSrm, 0 },
@@ -1642,6 +1672,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
{ X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
{ X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
+ { X86::VCMPPHZ128rri, X86::VCMPPHZ128rmi, 0 },
+ { X86::VCMPPHZ256rri, X86::VCMPPHZ256rmi, 0 },
+ { X86::VCMPPHZrri, X86::VCMPPHZrmi, 0 },
{ X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
{ X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
{ X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 },
@@ -1651,6 +1684,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSDrr, X86::VCMPSDrm, 0 },
{ X86::VCMPSDrr_Int, X86::VCMPSDrm_Int, TB_NO_REVERSE },
+ { X86::VCMPSHZrr, X86::VCMPSHZrm, 0 },
+ { X86::VCMPSHZrr_Int, X86::VCMPSHZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
{ X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
{ X86::VCMPSSrr, X86::VCMPSSrm, 0 },
@@ -1782,6 +1817,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 },
{ X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
{ X86::VDIVPDrr, X86::VDIVPDrm, 0 },
+ { X86::VDIVPHZ128rr, X86::VDIVPHZ128rm, 0 },
+ { X86::VDIVPHZ256rr, X86::VDIVPHZ256rm, 0 },
+ { X86::VDIVPHZrr, X86::VDIVPHZrm, 0 },
{ X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
{ X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 },
{ X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 },
@@ -1791,6 +1829,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE },
{ X86::VDIVSDrr, X86::VDIVSDrm, 0 },
{ X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE },
+ { X86::VDIVSHZrr, X86::VDIVSHZrm, 0 },
+ { X86::VDIVSHZrr_Int, X86::VDIVSHZrm_Int, TB_NO_REVERSE },
{ X86::VDIVSSZrr, X86::VDIVSSZrm, 0 },
{ X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE },
{ X86::VDIVSSrr, X86::VDIVSSrm, 0 },
@@ -1806,6 +1846,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VEXPANDPSZ128rrkz, X86::VEXPANDPSZ128rmkz, TB_NO_REVERSE },
{ X86::VEXPANDPSZ256rrkz, X86::VEXPANDPSZ256rmkz, TB_NO_REVERSE },
{ X86::VEXPANDPSZrrkz, X86::VEXPANDPSZrmkz, TB_NO_REVERSE },
+ { X86::VFCMULCPHZ128rr, X86::VFCMULCPHZ128rm, 0 },
+ { X86::VFCMULCPHZ256rr, X86::VFCMULCPHZ256rm, 0 },
+ { X86::VFCMULCPHZrr, X86::VFCMULCPHZrm, 0 },
+ { X86::VFCMULCSHZrr, X86::VFCMULCSHZrm, TB_NO_REVERSE },
{ X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, 0 },
{ X86::VFMADDPD4rr, X86::VFMADDPD4mr, 0 },
{ X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, 0 },
@@ -1830,6 +1874,10 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE },
{ X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 },
{ X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE },
+ { X86::VFMULCPHZ128rr, X86::VFMULCPHZ128rm, 0 },
+ { X86::VFMULCPHZ256rr, X86::VFMULCPHZ256rm, 0 },
+ { X86::VFMULCPHZrr, X86::VFMULCPHZrm, 0 },
+ { X86::VFMULCSHZrr, X86::VFMULCSHZrm, TB_NO_REVERSE },
{ X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, 0 },
{ X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, 0 },
{ X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, 0 },
@@ -1849,26 +1897,38 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmk, 0 },
{ X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmk, 0 },
{ X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmk, 0 },
+ { X86::VFPCLASSPHZ128rrk, X86::VFPCLASSPHZ128rmk, 0 },
+ { X86::VFPCLASSPHZ256rrk, X86::VFPCLASSPHZ256rmk, 0 },
+ { X86::VFPCLASSPHZrrk, X86::VFPCLASSPHZrmk, 0 },
{ X86::VFPCLASSPSZ128rrk, X86::VFPCLASSPSZ128rmk, 0 },
{ X86::VFPCLASSPSZ256rrk, X86::VFPCLASSPSZ256rmk, 0 },
{ X86::VFPCLASSPSZrrk, X86::VFPCLASSPSZrmk, 0 },
{ X86::VFPCLASSSDZrrk, X86::VFPCLASSSDZrmk, TB_NO_REVERSE },
+ { X86::VFPCLASSSHZrrk, X86::VFPCLASSSHZrmk, TB_NO_REVERSE },
{ X86::VFPCLASSSSZrrk, X86::VFPCLASSSSZrmk, TB_NO_REVERSE },
{ X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mkz, 0 },
{ X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mkz, 0 },
{ X86::VGETEXPPDZrkz, X86::VGETEXPPDZmkz, 0 },
+ { X86::VGETEXPPHZ128rkz, X86::VGETEXPPHZ128mkz, 0 },
+ { X86::VGETEXPPHZ256rkz, X86::VGETEXPPHZ256mkz, 0 },
+ { X86::VGETEXPPHZrkz, X86::VGETEXPPHZmkz, 0 },
{ X86::VGETEXPPSZ128rkz, X86::VGETEXPPSZ128mkz, 0 },
{ X86::VGETEXPPSZ256rkz, X86::VGETEXPPSZ256mkz, 0 },
{ X86::VGETEXPPSZrkz, X86::VGETEXPPSZmkz, 0 },
{ X86::VGETEXPSDZr, X86::VGETEXPSDZm, TB_NO_REVERSE },
+ { X86::VGETEXPSHZr, X86::VGETEXPSHZm, TB_NO_REVERSE },
{ X86::VGETEXPSSZr, X86::VGETEXPSSZm, TB_NO_REVERSE },
{ X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmikz, 0 },
{ X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmikz, 0 },
{ X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmikz, 0 },
+ { X86::VGETMANTPHZ128rrikz, X86::VGETMANTPHZ128rmikz, 0 },
+ { X86::VGETMANTPHZ256rrikz, X86::VGETMANTPHZ256rmikz, 0 },
+ { X86::VGETMANTPHZrrikz, X86::VGETMANTPHZrmikz, 0 },
{ X86::VGETMANTPSZ128rrikz, X86::VGETMANTPSZ128rmikz, 0 },
{ X86::VGETMANTPSZ256rrikz, X86::VGETMANTPSZ256rmikz, 0 },
{ X86::VGETMANTPSZrrikz, X86::VGETMANTPSZrmikz, 0 },
{ X86::VGETMANTSDZrri, X86::VGETMANTSDZrmi, TB_NO_REVERSE },
+ { X86::VGETMANTSHZrri, X86::VGETMANTSHZrmi, TB_NO_REVERSE },
{ X86::VGETMANTSSZrri, X86::VGETMANTSSZrmi, TB_NO_REVERSE },
{ X86::VGF2P8AFFINEINVQBYrri, X86::VGF2P8AFFINEINVQBYrmi, 0 },
{ X86::VGF2P8AFFINEINVQBZ128rri, X86::VGF2P8AFFINEINVQBZ128rmi, 0 },
@@ -1912,6 +1972,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 },
{ X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
{ X86::VMAXCPDrr, X86::VMAXCPDrm, 0 },
+ { X86::VMAXCPHZ128rr, X86::VMAXCPHZ128rm, 0 },
+ { X86::VMAXCPHZ256rr, X86::VMAXCPHZ256rm, 0 },
+ { X86::VMAXCPHZrr, X86::VMAXCPHZrm, 0 },
{ X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 },
{ X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 },
{ X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 },
@@ -1919,6 +1982,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXCPSrr, X86::VMAXCPSrm, 0 },
{ X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 },
{ X86::VMAXCSDrr, X86::VMAXCSDrm, 0 },
+ { X86::VMAXCSHZrr, X86::VMAXCSHZrm, 0 },
{ X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 },
{ X86::VMAXCSSrr, X86::VMAXCSSrm, 0 },
{ X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
@@ -1926,6 +1990,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 },
{ X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
{ X86::VMAXPDrr, X86::VMAXPDrm, 0 },
+ { X86::VMAXPHZ128rr, X86::VMAXPHZ128rm, 0 },
+ { X86::VMAXPHZ256rr, X86::VMAXPHZ256rm, 0 },
+ { X86::VMAXPHZrr, X86::VMAXPHZrm, 0 },
{ X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
{ X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 },
{ X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 },
@@ -1935,6 +2002,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE },
{ X86::VMAXSDrr, X86::VMAXSDrm, 0 },
{ X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE },
+ { X86::VMAXSHZrr, X86::VMAXSHZrm, 0 },
+ { X86::VMAXSHZrr_Int, X86::VMAXSHZrm_Int, TB_NO_REVERSE },
{ X86::VMAXSSZrr, X86::VMAXSSZrm, 0 },
{ X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE },
{ X86::VMAXSSrr, X86::VMAXSSrm, 0 },
@@ -1944,6 +2013,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 },
{ X86::VMINCPDZrr, X86::VMINCPDZrm, 0 },
{ X86::VMINCPDrr, X86::VMINCPDrm, 0 },
+ { X86::VMINCPHZ128rr, X86::VMINCPHZ128rm, 0 },
+ { X86::VMINCPHZ256rr, X86::VMINCPHZ256rm, 0 },
+ { X86::VMINCPHZrr, X86::VMINCPHZrm, 0 },
{ X86::VMINCPSYrr, X86::VMINCPSYrm, 0 },
{ X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 },
{ X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 },
@@ -1951,6 +2023,7 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINCPSrr, X86::VMINCPSrm, 0 },
{ X86::VMINCSDZrr, X86::VMINCSDZrm, 0 },
{ X86::VMINCSDrr, X86::VMINCSDrm, 0 },
+ { X86::VMINCSHZrr, X86::VMINCSHZrm, 0 },
{ X86::VMINCSSZrr, X86::VMINCSSZrm, 0 },
{ X86::VMINCSSrr, X86::VMINCSSrm, 0 },
{ X86::VMINPDYrr, X86::VMINPDYrm, 0 },
@@ -1958,6 +2031,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 },
{ X86::VMINPDZrr, X86::VMINPDZrm, 0 },
{ X86::VMINPDrr, X86::VMINPDrm, 0 },
+ { X86::VMINPHZ128rr, X86::VMINPHZ128rm, 0 },
+ { X86::VMINPHZ256rr, X86::VMINPHZ256rm, 0 },
+ { X86::VMINPHZrr, X86::VMINPHZrm, 0 },
{ X86::VMINPSYrr, X86::VMINPSYrm, 0 },
{ X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 },
{ X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 },
@@ -1967,6 +2043,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
{ X86::VMINSDrr, X86::VMINSDrm, 0 },
{ X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE },
+ { X86::VMINSHZrr, X86::VMINSHZrm, 0 },
+ { X86::VMINSHZrr_Int, X86::VMINSHZrm_Int, TB_NO_REVERSE },
{ X86::VMINSSZrr, X86::VMINSSZrm, 0 },
{ X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
@@ -2021,6 +2099,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 },
{ X86::VMULPDZrr, X86::VMULPDZrm, 0 },
{ X86::VMULPDrr, X86::VMULPDrm, 0 },
+ { X86::VMULPHZ128rr, X86::VMULPHZ128rm, 0 },
+ { X86::VMULPHZ256rr, X86::VMULPHZ256rm, 0 },
+ { X86::VMULPHZrr, X86::VMULPHZrm, 0 },
{ X86::VMULPSYrr, X86::VMULPSYrm, 0 },
{ X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 },
{ X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 },
@@ -2030,6 +2111,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE },
{ X86::VMULSDrr, X86::VMULSDrm, 0 },
{ X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE },
+ { X86::VMULSHZrr, X86::VMULSHZrm, 0 },
+ { X86::VMULSHZrr_Int, X86::VMULSHZrm_Int, TB_NO_REVERSE },
{ X86::VMULSSZrr, X86::VMULSSZrm, 0 },
{ X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
{ X86::VMULSSrr, X86::VMULSSrm, 0 },
@@ -2861,24 +2944,37 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VRCP28PSZrkz, X86::VRCP28PSZmkz, 0 },
{ X86::VRCP28SDZr, X86::VRCP28SDZm, TB_NO_REVERSE },
{ X86::VRCP28SSZr, X86::VRCP28SSZm, TB_NO_REVERSE },
+ { X86::VRCPPHZ128rkz, X86::VRCPPHZ128mkz, 0 },
+ { X86::VRCPPHZ256rkz, X86::VRCPPHZ256mkz, 0 },
+ { X86::VRCPPHZrkz, X86::VRCPPHZmkz, 0 },
+ { X86::VRCPSHZrr, X86::VRCPSHZrm, TB_NO_REVERSE },
{ X86::VRCPSSr, X86::VRCPSSm, 0 },
{ X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE },
{ X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmikz, 0 },
{ X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmikz, 0 },
{ X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmikz, 0 },
+ { X86::VREDUCEPHZ128rrikz, X86::VREDUCEPHZ128rmikz, 0 },
+ { X86::VREDUCEPHZ256rrikz, X86::VREDUCEPHZ256rmikz, 0 },
+ { X86::VREDUCEPHZrrikz, X86::VREDUCEPHZrmikz, 0 },
{ X86::VREDUCEPSZ128rrikz, X86::VREDUCEPSZ128rmikz, 0 },
{ X86::VREDUCEPSZ256rrikz, X86::VREDUCEPSZ256rmikz, 0 },
{ X86::VREDUCEPSZrrikz, X86::VREDUCEPSZrmikz, 0 },
{ X86::VREDUCESDZrri, X86::VREDUCESDZrmi, TB_NO_REVERSE },
+ { X86::VREDUCESHZrri, X86::VREDUCESHZrmi, TB_NO_REVERSE },
{ X86::VREDUCESSZrri, X86::VREDUCESSZrmi, TB_NO_REVERSE },
{ X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmikz, 0 },
{ X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmikz, 0 },
{ X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmikz, 0 },
+ { X86::VRNDSCALEPHZ128rrikz, X86::VRNDSCALEPHZ128rmikz, 0 },
+ { X86::VRNDSCALEPHZ256rrikz, X86::VRNDSCALEPHZ256rmikz, 0 },
+ { X86::VRNDSCALEPHZrrikz, X86::VRNDSCALEPHZrmikz, 0 },
{ X86::VRNDSCALEPSZ128rrikz, X86::VRNDSCALEPSZ128rmikz, 0 },
{ X86::VRNDSCALEPSZ256rrikz, X86::VRNDSCALEPSZ256rmikz, 0 },
{ X86::VRNDSCALEPSZrrikz, X86::VRNDSCALEPSZrmikz, 0 },
{ X86::VRNDSCALESDZr, X86::VRNDSCALESDZm, 0 },
{ X86::VRNDSCALESDZr_Int, X86::VRNDSCALESDZm_Int, TB_NO_REVERSE },
+ { X86::VRNDSCALESHZr, X86::VRNDSCALESHZm, 0 },
+ { X86::VRNDSCALESHZr_Int, X86::VRNDSCALESHZm_Int, TB_NO_REVERSE },
{ X86::VRNDSCALESSZr, X86::VRNDSCALESSZm, 0 },
{ X86::VRNDSCALESSZr_Int, X86::VRNDSCALESSZm_Int, TB_NO_REVERSE },
{ X86::VROUNDSDr, X86::VROUNDSDm, 0 },
@@ -2897,15 +2993,23 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmkz, 0 },
{ X86::VRSQRT28SDZr, X86::VRSQRT28SDZm, TB_NO_REVERSE },
{ X86::VRSQRT28SSZr, X86::VRSQRT28SSZm, TB_NO_REVERSE },
+ { X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mkz, 0 },
+ { X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mkz, 0 },
+ { X86::VRSQRTPHZrkz, X86::VRSQRTPHZmkz, 0 },
+ { X86::VRSQRTSHZrr, X86::VRSQRTSHZrm, TB_NO_REVERSE },
{ X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
{ X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE },
{ X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rm, 0 },
{ X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rm, 0 },
{ X86::VSCALEFPDZrr, X86::VSCALEFPDZrm, 0 },
+ { X86::VSCALEFPHZ128rr, X86::VSCALEFPHZ128rm, 0 },
+ { X86::VSCALEFPHZ256rr, X86::VSCALEFPHZ256rm, 0 },
+ { X86::VSCALEFPHZrr, X86::VSCALEFPHZrm, 0 },
{ X86::VSCALEFPSZ128rr, X86::VSCALEFPSZ128rm, 0 },
{ X86::VSCALEFPSZ256rr, X86::VSCALEFPSZ256rm, 0 },
{ X86::VSCALEFPSZrr, X86::VSCALEFPSZrm, 0 },
{ X86::VSCALEFSDZrr, X86::VSCALEFSDZrm, TB_NO_REVERSE },
+ { X86::VSCALEFSHZrr, X86::VSCALEFSHZrm, TB_NO_REVERSE },
{ X86::VSCALEFSSZrr, X86::VSCALEFSSZrm, TB_NO_REVERSE },
{ X86::VSHUFF32X4Z256rri, X86::VSHUFF32X4Z256rmi, 0 },
{ X86::VSHUFF32X4Zrri, X86::VSHUFF32X4Zrmi, 0 },
@@ -2928,6 +3032,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mkz, 0 },
{ X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mkz, 0 },
{ X86::VSQRTPDZrkz, X86::VSQRTPDZmkz, 0 },
+ { X86::VSQRTPHZ128rkz, X86::VSQRTPHZ128mkz, 0 },
+ { X86::VSQRTPHZ256rkz, X86::VSQRTPHZ256mkz, 0 },
+ { X86::VSQRTPHZrkz, X86::VSQRTPHZmkz, 0 },
{ X86::VSQRTPSZ128rkz, X86::VSQRTPSZ128mkz, 0 },
{ X86::VSQRTPSZ256rkz, X86::VSQRTPSZ256mkz, 0 },
{ X86::VSQRTPSZrkz, X86::VSQRTPSZmkz, 0 },
@@ -2935,6 +3042,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSQRTSDZr_Int, X86::VSQRTSDZm_Int, TB_NO_REVERSE },
{ X86::VSQRTSDr, X86::VSQRTSDm, 0 },
{ X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE },
+ { X86::VSQRTSHZr, X86::VSQRTSHZm, 0 },
+ { X86::VSQRTSHZr_Int, X86::VSQRTSHZm_Int, TB_NO_REVERSE },
{ X86::VSQRTSSZr, X86::VSQRTSSZm, 0 },
{ X86::VSQRTSSZr_Int, X86::VSQRTSSZm_Int, TB_NO_REVERSE },
{ X86::VSQRTSSr, X86::VSQRTSSm, 0 },
@@ -2944,6 +3053,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
{ X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
{ X86::VSUBPDrr, X86::VSUBPDrm, 0 },
+ { X86::VSUBPHZ128rr, X86::VSUBPHZ128rm, 0 },
+ { X86::VSUBPHZ256rr, X86::VSUBPHZ256rm, 0 },
+ { X86::VSUBPHZrr, X86::VSUBPHZrm, 0 },
{ X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
{ X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
{ X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 },
@@ -2953,6 +3065,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
{ X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE },
{ X86::VSUBSDrr, X86::VSUBSDrm, 0 },
{ X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE },
+ { X86::VSUBSHZrr, X86::VSUBSHZrm, 0 },
+ { X86::VSUBSHZrr_Int, X86::VSUBSHZrm_Int, TB_NO_REVERSE },
{ X86::VSUBSSZrr, X86::VSUBSSZrm, 0 },
{ X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE },
{ X86::VSUBSSrr, X86::VSUBSSrm, 0 },
@@ -2999,10 +3113,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
{ X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
{ X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
+ { X86::VADDPHZ128rrkz, X86::VADDPHZ128rmkz, 0 },
+ { X86::VADDPHZ256rrkz, X86::VADDPHZ256rmkz, 0 },
+ { X86::VADDPHZrrkz, X86::VADDPHZrmkz, 0 },
{ X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
{ X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
{ X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
{ X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VADDSHZrr_Intkz, X86::VADDSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 },
{ X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 },
@@ -3041,10 +3159,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 },
{ X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 },
{ X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 },
+ { X86::VCMPPHZ128rrik, X86::VCMPPHZ128rmik, 0 },
+ { X86::VCMPPHZ256rrik, X86::VCMPPHZ256rmik, 0 },
+ { X86::VCMPPHZrrik, X86::VCMPPHZrmik, 0 },
{ X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 },
{ X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 },
{ X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 },
{ X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCMPSHZrr_Intk, X86::VCMPSHZrm_Intk, TB_NO_REVERSE },
{ X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmk, TB_NO_REVERSE },
{ X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmk, 0 },
@@ -3141,10 +3263,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
{ X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
{ X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
+ { X86::VDIVPHZ128rrkz, X86::VDIVPHZ128rmkz, 0 },
+ { X86::VDIVPHZ256rrkz, X86::VDIVPHZ256rmkz, 0 },
+ { X86::VDIVPHZrrkz, X86::VDIVPHZrmkz, 0 },
{ X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
{ X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
{ X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
{ X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VDIVSHZrr_Intkz, X86::VDIVSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VDPBF16PSZ128r, X86::VDPBF16PSZ128m, 0 },
{ X86::VDPBF16PSZ256r, X86::VDPBF16PSZ256m, 0 },
@@ -3157,6 +3283,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VEXPANDPSZ128rrk, X86::VEXPANDPSZ128rmk, TB_NO_REVERSE },
{ X86::VEXPANDPSZ256rrk, X86::VEXPANDPSZ256rmk, TB_NO_REVERSE },
{ X86::VEXPANDPSZrrk, X86::VEXPANDPSZrmk, TB_NO_REVERSE },
+ { X86::VFCMADDCPHZ128r, X86::VFCMADDCPHZ128m, 0 },
+ { X86::VFCMADDCPHZ256r, X86::VFCMADDCPHZ256m, 0 },
+ { X86::VFCMADDCPHZr, X86::VFCMADDCPHZm, 0 },
+ { X86::VFCMADDCSHZr, X86::VFCMADDCSHZm, TB_NO_REVERSE },
+ { X86::VFCMULCPHZ128rrkz, X86::VFCMULCPHZ128rmkz, 0 },
+ { X86::VFCMULCPHZ256rrkz, X86::VFCMULCPHZ256rmkz, 0 },
+ { X86::VFCMULCPHZrrkz, X86::VFCMULCPHZrmkz, 0 },
+ { X86::VFCMULCSHZrrkz, X86::VFCMULCSHZrmkz, TB_NO_REVERSE },
{ X86::VFIXUPIMMPDZ128rri, X86::VFIXUPIMMPDZ128rmi, 0 },
{ X86::VFIXUPIMMPDZ256rri, X86::VFIXUPIMMPDZ256rmi, 0 },
{ X86::VFIXUPIMMPDZrri, X86::VFIXUPIMMPDZrmi, 0 },
@@ -3170,6 +3304,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, 0 },
{ X86::VFMADD132PDZr, X86::VFMADD132PDZm, 0 },
{ X86::VFMADD132PDr, X86::VFMADD132PDm, 0 },
+ { X86::VFMADD132PHZ128r, X86::VFMADD132PHZ128m, 0 },
+ { X86::VFMADD132PHZ256r, X86::VFMADD132PHZ256m, 0 },
+ { X86::VFMADD132PHZr, X86::VFMADD132PHZm, 0 },
{ X86::VFMADD132PSYr, X86::VFMADD132PSYm, 0 },
{ X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128m, 0 },
{ X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256m, 0 },
@@ -3179,6 +3316,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD132SDZr_Int, X86::VFMADD132SDZm_Int, TB_NO_REVERSE },
{ X86::VFMADD132SDr, X86::VFMADD132SDm, 0 },
{ X86::VFMADD132SDr_Int, X86::VFMADD132SDm_Int, TB_NO_REVERSE },
+ { X86::VFMADD132SHZr, X86::VFMADD132SHZm, 0 },
+ { X86::VFMADD132SHZr_Int, X86::VFMADD132SHZm_Int, TB_NO_REVERSE },
{ X86::VFMADD132SSZr, X86::VFMADD132SSZm, 0 },
{ X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_NO_REVERSE },
{ X86::VFMADD132SSr, X86::VFMADD132SSm, 0 },
@@ -3188,6 +3327,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, 0 },
{ X86::VFMADD213PDZr, X86::VFMADD213PDZm, 0 },
{ X86::VFMADD213PDr, X86::VFMADD213PDm, 0 },
+ { X86::VFMADD213PHZ128r, X86::VFMADD213PHZ128m, 0 },
+ { X86::VFMADD213PHZ256r, X86::VFMADD213PHZ256m, 0 },
+ { X86::VFMADD213PHZr, X86::VFMADD213PHZm, 0 },
{ X86::VFMADD213PSYr, X86::VFMADD213PSYm, 0 },
{ X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128m, 0 },
{ X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256m, 0 },
@@ -3197,6 +3339,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD213SDZr_Int, X86::VFMADD213SDZm_Int, TB_NO_REVERSE },
{ X86::VFMADD213SDr, X86::VFMADD213SDm, 0 },
{ X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_NO_REVERSE },
+ { X86::VFMADD213SHZr, X86::VFMADD213SHZm, 0 },
+ { X86::VFMADD213SHZr_Int, X86::VFMADD213SHZm_Int, TB_NO_REVERSE },
{ X86::VFMADD213SSZr, X86::VFMADD213SSZm, 0 },
{ X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_NO_REVERSE },
{ X86::VFMADD213SSr, X86::VFMADD213SSm, 0 },
@@ -3206,6 +3350,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, 0 },
{ X86::VFMADD231PDZr, X86::VFMADD231PDZm, 0 },
{ X86::VFMADD231PDr, X86::VFMADD231PDm, 0 },
+ { X86::VFMADD231PHZ128r, X86::VFMADD231PHZ128m, 0 },
+ { X86::VFMADD231PHZ256r, X86::VFMADD231PHZ256m, 0 },
+ { X86::VFMADD231PHZr, X86::VFMADD231PHZm, 0 },
{ X86::VFMADD231PSYr, X86::VFMADD231PSYm, 0 },
{ X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128m, 0 },
{ X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256m, 0 },
@@ -3215,10 +3362,16 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADD231SDZr_Int, X86::VFMADD231SDZm_Int, TB_NO_REVERSE },
{ X86::VFMADD231SDr, X86::VFMADD231SDm, 0 },
{ X86::VFMADD231SDr_Int, X86::VFMADD231SDm_Int, TB_NO_REVERSE },
+ { X86::VFMADD231SHZr, X86::VFMADD231SHZm, 0 },
+ { X86::VFMADD231SHZr_Int, X86::VFMADD231SHZm_Int, TB_NO_REVERSE },
{ X86::VFMADD231SSZr, X86::VFMADD231SSZm, 0 },
{ X86::VFMADD231SSZr_Int, X86::VFMADD231SSZm_Int, TB_NO_REVERSE },
{ X86::VFMADD231SSr, X86::VFMADD231SSm, 0 },
{ X86::VFMADD231SSr_Int, X86::VFMADD231SSm_Int, TB_NO_REVERSE },
+ { X86::VFMADDCPHZ128r, X86::VFMADDCPHZ128m, 0 },
+ { X86::VFMADDCPHZ256r, X86::VFMADDCPHZ256m, 0 },
+ { X86::VFMADDCPHZr, X86::VFMADDCPHZm, 0 },
+ { X86::VFMADDCSHZr, X86::VFMADDCSHZm, TB_NO_REVERSE },
{ X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, 0 },
{ X86::VFMADDPD4rr, X86::VFMADDPD4rm, 0 },
{ X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, 0 },
@@ -3232,6 +3385,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256m, 0 },
{ X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZm, 0 },
{ X86::VFMADDSUB132PDr, X86::VFMADDSUB132PDm, 0 },
+ { X86::VFMADDSUB132PHZ128r, X86::VFMADDSUB132PHZ128m, 0 },
+ { X86::VFMADDSUB132PHZ256r, X86::VFMADDSUB132PHZ256m, 0 },
+ { X86::VFMADDSUB132PHZr, X86::VFMADDSUB132PHZm, 0 },
{ X86::VFMADDSUB132PSYr, X86::VFMADDSUB132PSYm, 0 },
{ X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128m, 0 },
{ X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256m, 0 },
@@ -3242,6 +3398,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256m, 0 },
{ X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZm, 0 },
{ X86::VFMADDSUB213PDr, X86::VFMADDSUB213PDm, 0 },
+ { X86::VFMADDSUB213PHZ128r, X86::VFMADDSUB213PHZ128m, 0 },
+ { X86::VFMADDSUB213PHZ256r, X86::VFMADDSUB213PHZ256m, 0 },
+ { X86::VFMADDSUB213PHZr, X86::VFMADDSUB213PHZm, 0 },
{ X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, 0 },
{ X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128m, 0 },
{ X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256m, 0 },
@@ -3252,6 +3411,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256m, 0 },
{ X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZm, 0 },
{ X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, 0 },
+ { X86::VFMADDSUB231PHZ128r, X86::VFMADDSUB231PHZ128m, 0 },
+ { X86::VFMADDSUB231PHZ256r, X86::VFMADDSUB231PHZ256m, 0 },
+ { X86::VFMADDSUB231PHZr, X86::VFMADDSUB231PHZm, 0 },
{ X86::VFMADDSUB231PSYr, X86::VFMADDSUB231PSYm, 0 },
{ X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128m, 0 },
{ X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256m, 0 },
@@ -3266,6 +3428,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, 0 },
{ X86::VFMSUB132PDZr, X86::VFMSUB132PDZm, 0 },
{ X86::VFMSUB132PDr, X86::VFMSUB132PDm, 0 },
+ { X86::VFMSUB132PHZ128r, X86::VFMSUB132PHZ128m, 0 },
+ { X86::VFMSUB132PHZ256r, X86::VFMSUB132PHZ256m, 0 },
+ { X86::VFMSUB132PHZr, X86::VFMSUB132PHZm, 0 },
{ X86::VFMSUB132PSYr, X86::VFMSUB132PSYm, 0 },
{ X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128m, 0 },
{ X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256m, 0 },
@@ -3275,6 +3440,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB132SDZr_Int, X86::VFMSUB132SDZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB132SDr, X86::VFMSUB132SDm, 0 },
{ X86::VFMSUB132SDr_Int, X86::VFMSUB132SDm_Int, TB_NO_REVERSE },
+ { X86::VFMSUB132SHZr, X86::VFMSUB132SHZm, 0 },
+ { X86::VFMSUB132SHZr_Int, X86::VFMSUB132SHZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB132SSZr, X86::VFMSUB132SSZm, 0 },
{ X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB132SSr, X86::VFMSUB132SSm, 0 },
@@ -3284,6 +3451,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, 0 },
{ X86::VFMSUB213PDZr, X86::VFMSUB213PDZm, 0 },
{ X86::VFMSUB213PDr, X86::VFMSUB213PDm, 0 },
+ { X86::VFMSUB213PHZ128r, X86::VFMSUB213PHZ128m, 0 },
+ { X86::VFMSUB213PHZ256r, X86::VFMSUB213PHZ256m, 0 },
+ { X86::VFMSUB213PHZr, X86::VFMSUB213PHZm, 0 },
{ X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, 0 },
{ X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128m, 0 },
{ X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256m, 0 },
@@ -3293,6 +3463,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB213SDZr_Int, X86::VFMSUB213SDZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB213SDr, X86::VFMSUB213SDm, 0 },
{ X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_NO_REVERSE },
+ { X86::VFMSUB213SHZr, X86::VFMSUB213SHZm, 0 },
+ { X86::VFMSUB213SHZr_Int, X86::VFMSUB213SHZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB213SSZr, X86::VFMSUB213SSZm, 0 },
{ X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB213SSr, X86::VFMSUB213SSm, 0 },
@@ -3302,6 +3474,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, 0 },
{ X86::VFMSUB231PDZr, X86::VFMSUB231PDZm, 0 },
{ X86::VFMSUB231PDr, X86::VFMSUB231PDm, 0 },
+ { X86::VFMSUB231PHZ128r, X86::VFMSUB231PHZ128m, 0 },
+ { X86::VFMSUB231PHZ256r, X86::VFMSUB231PHZ256m, 0 },
+ { X86::VFMSUB231PHZr, X86::VFMSUB231PHZm, 0 },
{ X86::VFMSUB231PSYr, X86::VFMSUB231PSYm, 0 },
{ X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128m, 0 },
{ X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256m, 0 },
@@ -3311,6 +3486,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUB231SDZr_Int, X86::VFMSUB231SDZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB231SDr, X86::VFMSUB231SDm, 0 },
{ X86::VFMSUB231SDr_Int, X86::VFMSUB231SDm_Int, TB_NO_REVERSE },
+ { X86::VFMSUB231SHZr, X86::VFMSUB231SHZm, 0 },
+ { X86::VFMSUB231SHZr_Int, X86::VFMSUB231SHZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB231SSZr, X86::VFMSUB231SSZm, 0 },
{ X86::VFMSUB231SSZr_Int, X86::VFMSUB231SSZm_Int, TB_NO_REVERSE },
{ X86::VFMSUB231SSr, X86::VFMSUB231SSm, 0 },
@@ -3320,6 +3497,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256m, 0 },
{ X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZm, 0 },
{ X86::VFMSUBADD132PDr, X86::VFMSUBADD132PDm, 0 },
+ { X86::VFMSUBADD132PHZ128r, X86::VFMSUBADD132PHZ128m, 0 },
+ { X86::VFMSUBADD132PHZ256r, X86::VFMSUBADD132PHZ256m, 0 },
+ { X86::VFMSUBADD132PHZr, X86::VFMSUBADD132PHZm, 0 },
{ X86::VFMSUBADD132PSYr, X86::VFMSUBADD132PSYm, 0 },
{ X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128m, 0 },
{ X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256m, 0 },
@@ -3330,6 +3510,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256m, 0 },
{ X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZm, 0 },
{ X86::VFMSUBADD213PDr, X86::VFMSUBADD213PDm, 0 },
+ { X86::VFMSUBADD213PHZ128r, X86::VFMSUBADD213PHZ128m, 0 },
+ { X86::VFMSUBADD213PHZ256r, X86::VFMSUBADD213PHZ256m, 0 },
+ { X86::VFMSUBADD213PHZr, X86::VFMSUBADD213PHZm, 0 },
{ X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, 0 },
{ X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128m, 0 },
{ X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256m, 0 },
@@ -3340,6 +3523,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256m, 0 },
{ X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZm, 0 },
{ X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, 0 },
+ { X86::VFMSUBADD231PHZ128r, X86::VFMSUBADD231PHZ128m, 0 },
+ { X86::VFMSUBADD231PHZ256r, X86::VFMSUBADD231PHZ256m, 0 },
+ { X86::VFMSUBADD231PHZr, X86::VFMSUBADD231PHZm, 0 },
{ X86::VFMSUBADD231PSYr, X86::VFMSUBADD231PSYm, 0 },
{ X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128m, 0 },
{ X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256m, 0 },
@@ -3357,11 +3543,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE },
{ X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 },
{ X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE },
+ { X86::VFMULCPHZ128rrkz, X86::VFMULCPHZ128rmkz, 0 },
+ { X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmkz, 0 },
+ { X86::VFMULCPHZrrkz, X86::VFMULCPHZrmkz, 0 },
+ { X86::VFMULCSHZrrkz, X86::VFMULCSHZrmkz, TB_NO_REVERSE },
{ X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, 0 },
{ X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, 0 },
{ X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, 0 },
{ X86::VFNMADD132PDZr, X86::VFNMADD132PDZm, 0 },
{ X86::VFNMADD132PDr, X86::VFNMADD132PDm, 0 },
+ { X86::VFNMADD132PHZ128r, X86::VFNMADD132PHZ128m, 0 },
+ { X86::VFNMADD132PHZ256r, X86::VFNMADD132PHZ256m, 0 },
+ { X86::VFNMADD132PHZr, X86::VFNMADD132PHZm, 0 },
{ X86::VFNMADD132PSYr, X86::VFNMADD132PSYm, 0 },
{ X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128m, 0 },
{ X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256m, 0 },
@@ -3371,6 +3564,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD132SDZr_Int, X86::VFNMADD132SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD132SDr, X86::VFNMADD132SDm, 0 },
{ X86::VFNMADD132SDr_Int, X86::VFNMADD132SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMADD132SHZr, X86::VFNMADD132SHZm, 0 },
+ { X86::VFNMADD132SHZr_Int, X86::VFNMADD132SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD132SSZr, X86::VFNMADD132SSZm, 0 },
{ X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD132SSr, X86::VFNMADD132SSm, 0 },
@@ -3380,6 +3575,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, 0 },
{ X86::VFNMADD213PDZr, X86::VFNMADD213PDZm, 0 },
{ X86::VFNMADD213PDr, X86::VFNMADD213PDm, 0 },
+ { X86::VFNMADD213PHZ128r, X86::VFNMADD213PHZ128m, 0 },
+ { X86::VFNMADD213PHZ256r, X86::VFNMADD213PHZ256m, 0 },
+ { X86::VFNMADD213PHZr, X86::VFNMADD213PHZm, 0 },
{ X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, 0 },
{ X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128m, 0 },
{ X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256m, 0 },
@@ -3389,6 +3587,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD213SDZr_Int, X86::VFNMADD213SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD213SDr, X86::VFNMADD213SDm, 0 },
{ X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMADD213SHZr, X86::VFNMADD213SHZm, 0 },
+ { X86::VFNMADD213SHZr_Int, X86::VFNMADD213SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD213SSZr, X86::VFNMADD213SSZm, 0 },
{ X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD213SSr, X86::VFNMADD213SSm, 0 },
@@ -3398,6 +3598,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, 0 },
{ X86::VFNMADD231PDZr, X86::VFNMADD231PDZm, 0 },
{ X86::VFNMADD231PDr, X86::VFNMADD231PDm, 0 },
+ { X86::VFNMADD231PHZ128r, X86::VFNMADD231PHZ128m, 0 },
+ { X86::VFNMADD231PHZ256r, X86::VFNMADD231PHZ256m, 0 },
+ { X86::VFNMADD231PHZr, X86::VFNMADD231PHZm, 0 },
{ X86::VFNMADD231PSYr, X86::VFNMADD231PSYm, 0 },
{ X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128m, 0 },
{ X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256m, 0 },
@@ -3407,6 +3610,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMADD231SDZr_Int, X86::VFNMADD231SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD231SDr, X86::VFNMADD231SDm, 0 },
{ X86::VFNMADD231SDr_Int, X86::VFNMADD231SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMADD231SHZr, X86::VFNMADD231SHZm, 0 },
+ { X86::VFNMADD231SHZr_Int, X86::VFNMADD231SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD231SSZr, X86::VFNMADD231SSZm, 0 },
{ X86::VFNMADD231SSZr_Int, X86::VFNMADD231SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMADD231SSr, X86::VFNMADD231SSm, 0 },
@@ -3424,6 +3629,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, 0 },
{ X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZm, 0 },
{ X86::VFNMSUB132PDr, X86::VFNMSUB132PDm, 0 },
+ { X86::VFNMSUB132PHZ128r, X86::VFNMSUB132PHZ128m, 0 },
+ { X86::VFNMSUB132PHZ256r, X86::VFNMSUB132PHZ256m, 0 },
+ { X86::VFNMSUB132PHZr, X86::VFNMSUB132PHZm, 0 },
{ X86::VFNMSUB132PSYr, X86::VFNMSUB132PSYm, 0 },
{ X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128m, 0 },
{ X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256m, 0 },
@@ -3433,6 +3641,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB132SDZr_Int, X86::VFNMSUB132SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB132SDr, X86::VFNMSUB132SDm, 0 },
{ X86::VFNMSUB132SDr_Int, X86::VFNMSUB132SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUB132SHZr, X86::VFNMSUB132SHZm, 0 },
+ { X86::VFNMSUB132SHZr_Int, X86::VFNMSUB132SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB132SSZr, X86::VFNMSUB132SSZm, 0 },
{ X86::VFNMSUB132SSZr_Int, X86::VFNMSUB132SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, 0 },
@@ -3442,6 +3652,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, 0 },
{ X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZm, 0 },
{ X86::VFNMSUB213PDr, X86::VFNMSUB213PDm, 0 },
+ { X86::VFNMSUB213PHZ128r, X86::VFNMSUB213PHZ128m, 0 },
+ { X86::VFNMSUB213PHZ256r, X86::VFNMSUB213PHZ256m, 0 },
+ { X86::VFNMSUB213PHZr, X86::VFNMSUB213PHZm, 0 },
{ X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, 0 },
{ X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128m, 0 },
{ X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256m, 0 },
@@ -3451,6 +3664,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB213SDZr_Int, X86::VFNMSUB213SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB213SDr, X86::VFNMSUB213SDm, 0 },
{ X86::VFNMSUB213SDr_Int, X86::VFNMSUB213SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUB213SHZr, X86::VFNMSUB213SHZm, 0 },
+ { X86::VFNMSUB213SHZr_Int, X86::VFNMSUB213SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB213SSZr, X86::VFNMSUB213SSZm, 0 },
{ X86::VFNMSUB213SSZr_Int, X86::VFNMSUB213SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, 0 },
@@ -3460,6 +3675,9 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, 0 },
{ X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZm, 0 },
{ X86::VFNMSUB231PDr, X86::VFNMSUB231PDm, 0 },
+ { X86::VFNMSUB231PHZ128r, X86::VFNMSUB231PHZ128m, 0 },
+ { X86::VFNMSUB231PHZ256r, X86::VFNMSUB231PHZ256m, 0 },
+ { X86::VFNMSUB231PHZr, X86::VFNMSUB231PHZm, 0 },
{ X86::VFNMSUB231PSYr, X86::VFNMSUB231PSYm, 0 },
{ X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128m, 0 },
{ X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256m, 0 },
@@ -3469,6 +3687,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VFNMSUB231SDZr_Int, X86::VFNMSUB231SDZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB231SDr, X86::VFNMSUB231SDm, 0 },
{ X86::VFNMSUB231SDr_Int, X86::VFNMSUB231SDm_Int, TB_NO_REVERSE },
+ { X86::VFNMSUB231SHZr, X86::VFNMSUB231SHZm, 0 },
+ { X86::VFNMSUB231SHZr_Int, X86::VFNMSUB231SHZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB231SSZr, X86::VFNMSUB231SSZm, 0 },
{ X86::VFNMSUB231SSZr_Int, X86::VFNMSUB231SSZm_Int, TB_NO_REVERSE },
{ X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, 0 },
@@ -3484,18 +3704,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mk, 0 },
{ X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mk, 0 },
{ X86::VGETEXPPDZrk, X86::VGETEXPPDZmk, 0 },
+ { X86::VGETEXPPHZ128rk, X86::VGETEXPPHZ128mk, 0 },
+ { X86::VGETEXPPHZ256rk, X86::VGETEXPPHZ256mk, 0 },
+ { X86::VGETEXPPHZrk, X86::VGETEXPPHZmk, 0 },
{ X86::VGETEXPPSZ128rk, X86::VGETEXPPSZ128mk, 0 },
{ X86::VGETEXPPSZ256rk, X86::VGETEXPPSZ256mk, 0 },
{ X86::VGETEXPPSZrk, X86::VGETEXPPSZmk, 0 },
{ X86::VGETEXPSDZrkz, X86::VGETEXPSDZmkz, TB_NO_REVERSE },
+ { X86::VGETEXPSHZrkz, X86::VGETEXPSHZmkz, TB_NO_REVERSE },
{ X86::VGETEXPSSZrkz, X86::VGETEXPSSZmkz, TB_NO_REVERSE },
{ X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmik, 0 },
{ X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmik, 0 },
{ X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmik, 0 },
+ { X86::VGETMANTPHZ128rrik, X86::VGETMANTPHZ128rmik, 0 },
+ { X86::VGETMANTPHZ256rrik, X86::VGETMANTPHZ256rmik, 0 },
+ { X86::VGETMANTPHZrrik, X86::VGETMANTPHZrmik, 0 },
{ X86::VGETMANTPSZ128rrik, X86::VGETMANTPSZ128rmik, 0 },
{ X86::VGETMANTPSZ256rrik, X86::VGETMANTPSZ256rmik, 0 },
{ X86::VGETMANTPSZrrik, X86::VGETMANTPSZrmik, 0 },
{ X86::VGETMANTSDZrrikz, X86::VGETMANTSDZrmikz, TB_NO_REVERSE },
+ { X86::VGETMANTSHZrrikz, X86::VGETMANTSHZrmikz, TB_NO_REVERSE },
{ X86::VGETMANTSSZrrikz, X86::VGETMANTSSZrmikz, TB_NO_REVERSE },
{ X86::VGF2P8AFFINEINVQBZ128rrikz, X86::VGF2P8AFFINEINVQBZ128rmikz, 0 },
{ X86::VGF2P8AFFINEINVQBZ256rrikz, X86::VGF2P8AFFINEINVQBZ256rmikz, 0 },
@@ -3521,30 +3749,44 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 },
{ X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 },
{ X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 },
+ { X86::VMAXCPHZ128rrkz, X86::VMAXCPHZ128rmkz, 0 },
+ { X86::VMAXCPHZ256rrkz, X86::VMAXCPHZ256rmkz, 0 },
+ { X86::VMAXCPHZrrkz, X86::VMAXCPHZrmkz, 0 },
{ X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 },
{ X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 },
{ X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
{ X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 },
{ X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
{ X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
+ { X86::VMAXPHZ128rrkz, X86::VMAXPHZ128rmkz, 0 },
+ { X86::VMAXPHZ256rrkz, X86::VMAXPHZ256rmkz, 0 },
+ { X86::VMAXPHZrrkz, X86::VMAXPHZrmkz, 0 },
{ X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
{ X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
{ X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
{ X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMAXSHZrr_Intkz, X86::VMAXSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 },
{ X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 },
{ X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
+ { X86::VMINCPHZ128rrkz, X86::VMINCPHZ128rmkz, 0 },
+ { X86::VMINCPHZ256rrkz, X86::VMINCPHZ256rmkz, 0 },
+ { X86::VMINCPHZrrkz, X86::VMINCPHZrmkz, 0 },
{ X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 },
{ X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 },
{ X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
{ X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
{ X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
{ X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
+ { X86::VMINPHZ128rrkz, X86::VMINPHZ128rmkz, 0 },
+ { X86::VMINPHZ256rrkz, X86::VMINPHZ256rmkz, 0 },
+ { X86::VMINPHZrrkz, X86::VMINPHZrmkz, 0 },
{ X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
{ X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
{ X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
{ X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMINSHZrr_Intkz, X86::VMINSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VMOVAPDZ128rrk, X86::VMOVAPDZ128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
{ X86::VMOVAPDZ256rrk, X86::VMOVAPDZ256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
@@ -3588,10 +3830,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
{ X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
{ X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
+ { X86::VMULPHZ128rrkz, X86::VMULPHZ128rmkz, 0 },
+ { X86::VMULPHZ256rrkz, X86::VMULPHZ256rmkz, 0 },
+ { X86::VMULPHZrrkz, X86::VMULPHZrmkz, 0 },
{ X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
{ X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
{ X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
{ X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VMULSHZrr_Intkz, X86::VMULSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
{ X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
@@ -4258,21 +4504,33 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VRCP28PSZrk, X86::VRCP28PSZmk, 0 },
{ X86::VRCP28SDZrkz, X86::VRCP28SDZmkz, TB_NO_REVERSE },
{ X86::VRCP28SSZrkz, X86::VRCP28SSZmkz, TB_NO_REVERSE },
+ { X86::VRCPPHZ128rk, X86::VRCPPHZ128mk, 0 },
+ { X86::VRCPPHZ256rk, X86::VRCPPHZ256mk, 0 },
+ { X86::VRCPPHZrk, X86::VRCPPHZmk, 0 },
+ { X86::VRCPSHZrrkz, X86::VRCPSHZrmkz, TB_NO_REVERSE },
{ X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmik, 0 },
{ X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmik, 0 },
{ X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmik, 0 },
+ { X86::VREDUCEPHZ128rrik, X86::VREDUCEPHZ128rmik, 0 },
+ { X86::VREDUCEPHZ256rrik, X86::VREDUCEPHZ256rmik, 0 },
+ { X86::VREDUCEPHZrrik, X86::VREDUCEPHZrmik, 0 },
{ X86::VREDUCEPSZ128rrik, X86::VREDUCEPSZ128rmik, 0 },
{ X86::VREDUCEPSZ256rrik, X86::VREDUCEPSZ256rmik, 0 },
{ X86::VREDUCEPSZrrik, X86::VREDUCEPSZrmik, 0 },
{ X86::VREDUCESDZrrikz, X86::VREDUCESDZrmikz, TB_NO_REVERSE },
+ { X86::VREDUCESHZrrikz, X86::VREDUCESHZrmikz, TB_NO_REVERSE },
{ X86::VREDUCESSZrrikz, X86::VREDUCESSZrmikz, TB_NO_REVERSE },
{ X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmik, 0 },
{ X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmik, 0 },
{ X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmik, 0 },
+ { X86::VRNDSCALEPHZ128rrik, X86::VRNDSCALEPHZ128rmik, 0 },
+ { X86::VRNDSCALEPHZ256rrik, X86::VRNDSCALEPHZ256rmik, 0 },
+ { X86::VRNDSCALEPHZrrik, X86::VRNDSCALEPHZrmik, 0 },
{ X86::VRNDSCALEPSZ128rrik, X86::VRNDSCALEPSZ128rmik, 0 },
{ X86::VRNDSCALEPSZ256rrik, X86::VRNDSCALEPSZ256rmik, 0 },
{ X86::VRNDSCALEPSZrrik, X86::VRNDSCALEPSZrmik, 0 },
{ X86::VRNDSCALESDZr_Intkz, X86::VRNDSCALESDZm_Intkz, TB_NO_REVERSE },
+ { X86::VRNDSCALESHZr_Intkz, X86::VRNDSCALESHZm_Intkz, TB_NO_REVERSE },
{ X86::VRNDSCALESSZr_Intkz, X86::VRNDSCALESSZm_Intkz, TB_NO_REVERSE },
{ X86::VRSQRT14PDZ128rk, X86::VRSQRT14PDZ128mk, 0 },
{ X86::VRSQRT14PDZ256rk, X86::VRSQRT14PDZ256mk, 0 },
@@ -4286,13 +4544,21 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmk, 0 },
{ X86::VRSQRT28SDZrkz, X86::VRSQRT28SDZmkz, TB_NO_REVERSE },
{ X86::VRSQRT28SSZrkz, X86::VRSQRT28SSZmkz, TB_NO_REVERSE },
+ { X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mk, 0 },
+ { X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mk, 0 },
+ { X86::VRSQRTPHZrk, X86::VRSQRTPHZmk, 0 },
+ { X86::VRSQRTSHZrrkz, X86::VRSQRTSHZrmkz, TB_NO_REVERSE },
{ X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmkz, 0 },
{ X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmkz, 0 },
{ X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmkz, 0 },
+ { X86::VSCALEFPHZ128rrkz, X86::VSCALEFPHZ128rmkz, 0 },
+ { X86::VSCALEFPHZ256rrkz, X86::VSCALEFPHZ256rmkz, 0 },
+ { X86::VSCALEFPHZrrkz, X86::VSCALEFPHZrmkz, 0 },
{ X86::VSCALEFPSZ128rrkz, X86::VSCALEFPSZ128rmkz, 0 },
{ X86::VSCALEFPSZ256rrkz, X86::VSCALEFPSZ256rmkz, 0 },
{ X86::VSCALEFPSZrrkz, X86::VSCALEFPSZrmkz, 0 },
{ X86::VSCALEFSDZrrkz, X86::VSCALEFSDZrmkz, TB_NO_REVERSE },
+ { X86::VSCALEFSHZrrkz, X86::VSCALEFSHZrmkz, TB_NO_REVERSE },
{ X86::VSCALEFSSZrrkz, X86::VSCALEFSSZrmkz, TB_NO_REVERSE },
{ X86::VSHUFF32X4Z256rrikz, X86::VSHUFF32X4Z256rmikz, 0 },
{ X86::VSHUFF32X4Zrrikz, X86::VSHUFF32X4Zrmikz, 0 },
@@ -4311,18 +4577,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
{ X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mk, 0 },
{ X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mk, 0 },
{ X86::VSQRTPDZrk, X86::VSQRTPDZmk, 0 },
+ { X86::VSQRTPHZ128rk, X86::VSQRTPHZ128mk, 0 },
+ { X86::VSQRTPHZ256rk, X86::VSQRTPHZ256mk, 0 },
+ { X86::VSQRTPHZrk, X86::VSQRTPHZmk, 0 },
{ X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mk, 0 },
{ X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mk, 0 },
{ X86::VSQRTPSZrk, X86::VSQRTPSZmk, 0 },
{ X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE },
+ { X86::VSQRTSHZr_Intkz, X86::VSQRTSHZm_Intkz, TB_NO_REVERSE },
{ X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE },
{ X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
{ X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
{ X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
+ { X86::VSUBPHZ128rrkz, X86::VSUBPHZ128rmkz, 0 },
+ { X86::VSUBPHZ256rrkz, X86::VSUBPHZ256rmkz, 0 },
+ { X86::VSUBPHZrrkz, X86::VSUBPHZrmkz, 0 },
{ X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
{ X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
{ X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
{ X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
+ { X86::VSUBSHZrr_Intkz, X86::VSUBSHZrm_Intkz, TB_NO_REVERSE },
{ X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
{ X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
{ X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
@@ -4348,10 +4622,14 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
{ X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
{ X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
+ { X86::VADDPHZ128rrk, X86::VADDPHZ128rmk, 0 },
+ { X86::VADDPHZ256rrk, X86::VADDPHZ256rmk, 0 },
+ { X86::VADDPHZrrk, X86::VADDPHZrmk, 0 },
{ X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
{ X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
{ X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
{ X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VADDSHZrr_Intk, X86::VADDSHZrm_Intk, TB_NO_REVERSE },
{ X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
{ X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 },
{ X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 },
@@ -4374,18 +4652,26 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmk, 0 },
{ X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmk, 0 },
{ X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmk, 0 },
+ { X86::VCVTSD2SHZrr_Intk, X86::VCVTSD2SHZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSH2SDZrr_Intk, X86::VCVTSH2SDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSH2SSZrr_Intk, X86::VCVTSH2SSZrm_Intk, TB_NO_REVERSE },
{ X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE },
+ { X86::VCVTSS2SHZrr_Intk, X86::VCVTSS2SHZrm_Intk, TB_NO_REVERSE },
{ X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0 },
{ X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0 },
{ X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0 },
{ X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
{ X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
{ X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
+ { X86::VDIVPHZ128rrk, X86::VDIVPHZ128rmk, 0 },
+ { X86::VDIVPHZ256rrk, X86::VDIVPHZ256rmk, 0 },
+ { X86::VDIVPHZrrk, X86::VDIVPHZrmk, 0 },
{ X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
{ X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
{ X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
{ X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VDIVSHZrr_Intk, X86::VDIVSHZrm_Intk, TB_NO_REVERSE },
{ X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
{ X86::VDPBF16PSZ128rk, X86::VDPBF16PSZ128mk, 0 },
{ X86::VDPBF16PSZ128rkz, X86::VDPBF16PSZ128mkz, 0 },
@@ -4393,6 +4679,18 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VDPBF16PSZ256rkz, X86::VDPBF16PSZ256mkz, 0 },
{ X86::VDPBF16PSZrk, X86::VDPBF16PSZmk, 0 },
{ X86::VDPBF16PSZrkz, X86::VDPBF16PSZmkz, 0 },
+ { X86::VFCMADDCPHZ128rk, X86::VFCMADDCPHZ128mk, 0 },
+ { X86::VFCMADDCPHZ128rkz, X86::VFCMADDCPHZ128mkz, 0 },
+ { X86::VFCMADDCPHZ256rk, X86::VFCMADDCPHZ256mk, 0 },
+ { X86::VFCMADDCPHZ256rkz, X86::VFCMADDCPHZ256mkz, 0 },
+ { X86::VFCMADDCPHZrk, X86::VFCMADDCPHZmk, 0 },
+ { X86::VFCMADDCPHZrkz, X86::VFCMADDCPHZmkz, 0 },
+ { X86::VFCMADDCSHZrk, X86::VFCMADDCSHZmk, TB_NO_REVERSE },
+ { X86::VFCMADDCSHZrkz, X86::VFCMADDCSHZmkz, TB_NO_REVERSE },
+ { X86::VFCMULCPHZ128rrk, X86::VFCMULCPHZ128rmk, 0 },
+ { X86::VFCMULCPHZ256rrk, X86::VFCMULCPHZ256rmk, 0 },
+ { X86::VFCMULCPHZrrk, X86::VFCMULCPHZrmk, 0 },
+ { X86::VFCMULCSHZrrk, X86::VFCMULCSHZrmk, TB_NO_REVERSE },
{ X86::VFIXUPIMMPDZ128rrik, X86::VFIXUPIMMPDZ128rmik, 0 },
{ X86::VFIXUPIMMPDZ128rrikz, X86::VFIXUPIMMPDZ128rmikz, 0 },
{ X86::VFIXUPIMMPDZ256rrik, X86::VFIXUPIMMPDZ256rmik, 0 },
@@ -4415,6 +4713,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD132PDZ256rkz, X86::VFMADD132PDZ256mkz, 0 },
{ X86::VFMADD132PDZrk, X86::VFMADD132PDZmk, 0 },
{ X86::VFMADD132PDZrkz, X86::VFMADD132PDZmkz, 0 },
+ { X86::VFMADD132PHZ128rk, X86::VFMADD132PHZ128mk, 0 },
+ { X86::VFMADD132PHZ128rkz, X86::VFMADD132PHZ128mkz, 0 },
+ { X86::VFMADD132PHZ256rk, X86::VFMADD132PHZ256mk, 0 },
+ { X86::VFMADD132PHZ256rkz, X86::VFMADD132PHZ256mkz, 0 },
+ { X86::VFMADD132PHZrk, X86::VFMADD132PHZmk, 0 },
+ { X86::VFMADD132PHZrkz, X86::VFMADD132PHZmkz, 0 },
{ X86::VFMADD132PSZ128rk, X86::VFMADD132PSZ128mk, 0 },
{ X86::VFMADD132PSZ128rkz, X86::VFMADD132PSZ128mkz, 0 },
{ X86::VFMADD132PSZ256rk, X86::VFMADD132PSZ256mk, 0 },
@@ -4423,6 +4727,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD132PSZrkz, X86::VFMADD132PSZmkz, 0 },
{ X86::VFMADD132SDZr_Intk, X86::VFMADD132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD132SDZr_Intkz, X86::VFMADD132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADD132SHZr_Intk, X86::VFMADD132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mk, 0 },
@@ -4431,6 +4737,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD213PDZ256rkz, X86::VFMADD213PDZ256mkz, 0 },
{ X86::VFMADD213PDZrk, X86::VFMADD213PDZmk, 0 },
{ X86::VFMADD213PDZrkz, X86::VFMADD213PDZmkz, 0 },
+ { X86::VFMADD213PHZ128rk, X86::VFMADD213PHZ128mk, 0 },
+ { X86::VFMADD213PHZ128rkz, X86::VFMADD213PHZ128mkz, 0 },
+ { X86::VFMADD213PHZ256rk, X86::VFMADD213PHZ256mk, 0 },
+ { X86::VFMADD213PHZ256rkz, X86::VFMADD213PHZ256mkz, 0 },
+ { X86::VFMADD213PHZrk, X86::VFMADD213PHZmk, 0 },
+ { X86::VFMADD213PHZrkz, X86::VFMADD213PHZmkz, 0 },
{ X86::VFMADD213PSZ128rk, X86::VFMADD213PSZ128mk, 0 },
{ X86::VFMADD213PSZ128rkz, X86::VFMADD213PSZ128mkz, 0 },
{ X86::VFMADD213PSZ256rk, X86::VFMADD213PSZ256mk, 0 },
@@ -4439,6 +4751,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD213PSZrkz, X86::VFMADD213PSZmkz, 0 },
{ X86::VFMADD213SDZr_Intk, X86::VFMADD213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD213SDZr_Intkz, X86::VFMADD213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADD213SHZr_Intk, X86::VFMADD213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mk, 0 },
@@ -4447,6 +4761,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD231PDZ256rkz, X86::VFMADD231PDZ256mkz, 0 },
{ X86::VFMADD231PDZrk, X86::VFMADD231PDZmk, 0 },
{ X86::VFMADD231PDZrkz, X86::VFMADD231PDZmkz, 0 },
+ { X86::VFMADD231PHZ128rk, X86::VFMADD231PHZ128mk, 0 },
+ { X86::VFMADD231PHZ128rkz, X86::VFMADD231PHZ128mkz, 0 },
+ { X86::VFMADD231PHZ256rk, X86::VFMADD231PHZ256mk, 0 },
+ { X86::VFMADD231PHZ256rkz, X86::VFMADD231PHZ256mkz, 0 },
+ { X86::VFMADD231PHZrk, X86::VFMADD231PHZmk, 0 },
+ { X86::VFMADD231PHZrkz, X86::VFMADD231PHZmkz, 0 },
{ X86::VFMADD231PSZ128rk, X86::VFMADD231PSZ128mk, 0 },
{ X86::VFMADD231PSZ128rkz, X86::VFMADD231PSZ128mkz, 0 },
{ X86::VFMADD231PSZ256rk, X86::VFMADD231PSZ256mk, 0 },
@@ -4455,14 +4775,30 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADD231PSZrkz, X86::VFMADD231PSZmkz, 0 },
{ X86::VFMADD231SDZr_Intk, X86::VFMADD231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD231SDZr_Intkz, X86::VFMADD231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADD231SHZr_Intk, X86::VFMADD231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMADD231SHZr_Intkz, X86::VFMADD231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMADD231SSZr_Intk, X86::VFMADD231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMADD231SSZr_Intkz, X86::VFMADD231SSZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMADDCPHZ128rk, X86::VFMADDCPHZ128mk, 0 },
+ { X86::VFMADDCPHZ128rkz, X86::VFMADDCPHZ128mkz, 0 },
+ { X86::VFMADDCPHZ256rk, X86::VFMADDCPHZ256mk, 0 },
+ { X86::VFMADDCPHZ256rkz, X86::VFMADDCPHZ256mkz, 0 },
+ { X86::VFMADDCPHZrk, X86::VFMADDCPHZmk, 0 },
+ { X86::VFMADDCPHZrkz, X86::VFMADDCPHZmkz, 0 },
+ { X86::VFMADDCSHZrk, X86::VFMADDCSHZmk, TB_NO_REVERSE },
+ { X86::VFMADDCSHZrkz, X86::VFMADDCSHZmkz, TB_NO_REVERSE },
{ X86::VFMADDSUB132PDZ128rk, X86::VFMADDSUB132PDZ128mk, 0 },
{ X86::VFMADDSUB132PDZ128rkz, X86::VFMADDSUB132PDZ128mkz, 0 },
{ X86::VFMADDSUB132PDZ256rk, X86::VFMADDSUB132PDZ256mk, 0 },
{ X86::VFMADDSUB132PDZ256rkz, X86::VFMADDSUB132PDZ256mkz, 0 },
{ X86::VFMADDSUB132PDZrk, X86::VFMADDSUB132PDZmk, 0 },
{ X86::VFMADDSUB132PDZrkz, X86::VFMADDSUB132PDZmkz, 0 },
+ { X86::VFMADDSUB132PHZ128rk, X86::VFMADDSUB132PHZ128mk, 0 },
+ { X86::VFMADDSUB132PHZ128rkz, X86::VFMADDSUB132PHZ128mkz, 0 },
+ { X86::VFMADDSUB132PHZ256rk, X86::VFMADDSUB132PHZ256mk, 0 },
+ { X86::VFMADDSUB132PHZ256rkz, X86::VFMADDSUB132PHZ256mkz, 0 },
+ { X86::VFMADDSUB132PHZrk, X86::VFMADDSUB132PHZmk, 0 },
+ { X86::VFMADDSUB132PHZrkz, X86::VFMADDSUB132PHZmkz, 0 },
{ X86::VFMADDSUB132PSZ128rk, X86::VFMADDSUB132PSZ128mk, 0 },
{ X86::VFMADDSUB132PSZ128rkz, X86::VFMADDSUB132PSZ128mkz, 0 },
{ X86::VFMADDSUB132PSZ256rk, X86::VFMADDSUB132PSZ256mk, 0 },
@@ -4475,6 +4811,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADDSUB213PDZ256rkz, X86::VFMADDSUB213PDZ256mkz, 0 },
{ X86::VFMADDSUB213PDZrk, X86::VFMADDSUB213PDZmk, 0 },
{ X86::VFMADDSUB213PDZrkz, X86::VFMADDSUB213PDZmkz, 0 },
+ { X86::VFMADDSUB213PHZ128rk, X86::VFMADDSUB213PHZ128mk, 0 },
+ { X86::VFMADDSUB213PHZ128rkz, X86::VFMADDSUB213PHZ128mkz, 0 },
+ { X86::VFMADDSUB213PHZ256rk, X86::VFMADDSUB213PHZ256mk, 0 },
+ { X86::VFMADDSUB213PHZ256rkz, X86::VFMADDSUB213PHZ256mkz, 0 },
+ { X86::VFMADDSUB213PHZrk, X86::VFMADDSUB213PHZmk, 0 },
+ { X86::VFMADDSUB213PHZrkz, X86::VFMADDSUB213PHZmkz, 0 },
{ X86::VFMADDSUB213PSZ128rk, X86::VFMADDSUB213PSZ128mk, 0 },
{ X86::VFMADDSUB213PSZ128rkz, X86::VFMADDSUB213PSZ128mkz, 0 },
{ X86::VFMADDSUB213PSZ256rk, X86::VFMADDSUB213PSZ256mk, 0 },
@@ -4487,6 +4829,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMADDSUB231PDZ256rkz, X86::VFMADDSUB231PDZ256mkz, 0 },
{ X86::VFMADDSUB231PDZrk, X86::VFMADDSUB231PDZmk, 0 },
{ X86::VFMADDSUB231PDZrkz, X86::VFMADDSUB231PDZmkz, 0 },
+ { X86::VFMADDSUB231PHZ128rk, X86::VFMADDSUB231PHZ128mk, 0 },
+ { X86::VFMADDSUB231PHZ128rkz, X86::VFMADDSUB231PHZ128mkz, 0 },
+ { X86::VFMADDSUB231PHZ256rk, X86::VFMADDSUB231PHZ256mk, 0 },
+ { X86::VFMADDSUB231PHZ256rkz, X86::VFMADDSUB231PHZ256mkz, 0 },
+ { X86::VFMADDSUB231PHZrk, X86::VFMADDSUB231PHZmk, 0 },
+ { X86::VFMADDSUB231PHZrkz, X86::VFMADDSUB231PHZmkz, 0 },
{ X86::VFMADDSUB231PSZ128rk, X86::VFMADDSUB231PSZ128mk, 0 },
{ X86::VFMADDSUB231PSZ128rkz, X86::VFMADDSUB231PSZ128mkz, 0 },
{ X86::VFMADDSUB231PSZ256rk, X86::VFMADDSUB231PSZ256mk, 0 },
@@ -4499,6 +4847,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB132PDZ256rkz, X86::VFMSUB132PDZ256mkz, 0 },
{ X86::VFMSUB132PDZrk, X86::VFMSUB132PDZmk, 0 },
{ X86::VFMSUB132PDZrkz, X86::VFMSUB132PDZmkz, 0 },
+ { X86::VFMSUB132PHZ128rk, X86::VFMSUB132PHZ128mk, 0 },
+ { X86::VFMSUB132PHZ128rkz, X86::VFMSUB132PHZ128mkz, 0 },
+ { X86::VFMSUB132PHZ256rk, X86::VFMSUB132PHZ256mk, 0 },
+ { X86::VFMSUB132PHZ256rkz, X86::VFMSUB132PHZ256mkz, 0 },
+ { X86::VFMSUB132PHZrk, X86::VFMSUB132PHZmk, 0 },
+ { X86::VFMSUB132PHZrkz, X86::VFMSUB132PHZmkz, 0 },
{ X86::VFMSUB132PSZ128rk, X86::VFMSUB132PSZ128mk, 0 },
{ X86::VFMSUB132PSZ128rkz, X86::VFMSUB132PSZ128mkz, 0 },
{ X86::VFMSUB132PSZ256rk, X86::VFMSUB132PSZ256mk, 0 },
@@ -4507,6 +4861,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmkz, 0 },
{ X86::VFMSUB132SDZr_Intk, X86::VFMSUB132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB132SDZr_Intkz, X86::VFMSUB132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMSUB132SHZr_Intk, X86::VFMSUB132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mk, 0 },
@@ -4515,6 +4871,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB213PDZ256rkz, X86::VFMSUB213PDZ256mkz, 0 },
{ X86::VFMSUB213PDZrk, X86::VFMSUB213PDZmk, 0 },
{ X86::VFMSUB213PDZrkz, X86::VFMSUB213PDZmkz, 0 },
+ { X86::VFMSUB213PHZ128rk, X86::VFMSUB213PHZ128mk, 0 },
+ { X86::VFMSUB213PHZ128rkz, X86::VFMSUB213PHZ128mkz, 0 },
+ { X86::VFMSUB213PHZ256rk, X86::VFMSUB213PHZ256mk, 0 },
+ { X86::VFMSUB213PHZ256rkz, X86::VFMSUB213PHZ256mkz, 0 },
+ { X86::VFMSUB213PHZrk, X86::VFMSUB213PHZmk, 0 },
+ { X86::VFMSUB213PHZrkz, X86::VFMSUB213PHZmkz, 0 },
{ X86::VFMSUB213PSZ128rk, X86::VFMSUB213PSZ128mk, 0 },
{ X86::VFMSUB213PSZ128rkz, X86::VFMSUB213PSZ128mkz, 0 },
{ X86::VFMSUB213PSZ256rk, X86::VFMSUB213PSZ256mk, 0 },
@@ -4523,6 +4885,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmkz, 0 },
{ X86::VFMSUB213SDZr_Intk, X86::VFMSUB213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB213SDZr_Intkz, X86::VFMSUB213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMSUB213SHZr_Intk, X86::VFMSUB213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mk, 0 },
@@ -4531,6 +4895,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB231PDZ256rkz, X86::VFMSUB231PDZ256mkz, 0 },
{ X86::VFMSUB231PDZrk, X86::VFMSUB231PDZmk, 0 },
{ X86::VFMSUB231PDZrkz, X86::VFMSUB231PDZmkz, 0 },
+ { X86::VFMSUB231PHZ128rk, X86::VFMSUB231PHZ128mk, 0 },
+ { X86::VFMSUB231PHZ128rkz, X86::VFMSUB231PHZ128mkz, 0 },
+ { X86::VFMSUB231PHZ256rk, X86::VFMSUB231PHZ256mk, 0 },
+ { X86::VFMSUB231PHZ256rkz, X86::VFMSUB231PHZ256mkz, 0 },
+ { X86::VFMSUB231PHZrk, X86::VFMSUB231PHZmk, 0 },
+ { X86::VFMSUB231PHZrkz, X86::VFMSUB231PHZmkz, 0 },
{ X86::VFMSUB231PSZ128rk, X86::VFMSUB231PSZ128mk, 0 },
{ X86::VFMSUB231PSZ128rkz, X86::VFMSUB231PSZ128mkz, 0 },
{ X86::VFMSUB231PSZ256rk, X86::VFMSUB231PSZ256mk, 0 },
@@ -4539,6 +4909,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUB231PSZrkz, X86::VFMSUB231PSZmkz, 0 },
{ X86::VFMSUB231SDZr_Intk, X86::VFMSUB231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB231SDZr_Intkz, X86::VFMSUB231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFMSUB231SHZr_Intk, X86::VFMSUB231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFMSUB231SHZr_Intkz, X86::VFMSUB231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUB231SSZr_Intk, X86::VFMSUB231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFMSUB231SSZr_Intkz, X86::VFMSUB231SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFMSUBADD132PDZ128rk, X86::VFMSUBADD132PDZ128mk, 0 },
@@ -4547,6 +4919,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUBADD132PDZ256rkz, X86::VFMSUBADD132PDZ256mkz, 0 },
{ X86::VFMSUBADD132PDZrk, X86::VFMSUBADD132PDZmk, 0 },
{ X86::VFMSUBADD132PDZrkz, X86::VFMSUBADD132PDZmkz, 0 },
+ { X86::VFMSUBADD132PHZ128rk, X86::VFMSUBADD132PHZ128mk, 0 },
+ { X86::VFMSUBADD132PHZ128rkz, X86::VFMSUBADD132PHZ128mkz, 0 },
+ { X86::VFMSUBADD132PHZ256rk, X86::VFMSUBADD132PHZ256mk, 0 },
+ { X86::VFMSUBADD132PHZ256rkz, X86::VFMSUBADD132PHZ256mkz, 0 },
+ { X86::VFMSUBADD132PHZrk, X86::VFMSUBADD132PHZmk, 0 },
+ { X86::VFMSUBADD132PHZrkz, X86::VFMSUBADD132PHZmkz, 0 },
{ X86::VFMSUBADD132PSZ128rk, X86::VFMSUBADD132PSZ128mk, 0 },
{ X86::VFMSUBADD132PSZ128rkz, X86::VFMSUBADD132PSZ128mkz, 0 },
{ X86::VFMSUBADD132PSZ256rk, X86::VFMSUBADD132PSZ256mk, 0 },
@@ -4559,6 +4937,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUBADD213PDZ256rkz, X86::VFMSUBADD213PDZ256mkz, 0 },
{ X86::VFMSUBADD213PDZrk, X86::VFMSUBADD213PDZmk, 0 },
{ X86::VFMSUBADD213PDZrkz, X86::VFMSUBADD213PDZmkz, 0 },
+ { X86::VFMSUBADD213PHZ128rk, X86::VFMSUBADD213PHZ128mk, 0 },
+ { X86::VFMSUBADD213PHZ128rkz, X86::VFMSUBADD213PHZ128mkz, 0 },
+ { X86::VFMSUBADD213PHZ256rk, X86::VFMSUBADD213PHZ256mk, 0 },
+ { X86::VFMSUBADD213PHZ256rkz, X86::VFMSUBADD213PHZ256mkz, 0 },
+ { X86::VFMSUBADD213PHZrk, X86::VFMSUBADD213PHZmk, 0 },
+ { X86::VFMSUBADD213PHZrkz, X86::VFMSUBADD213PHZmkz, 0 },
{ X86::VFMSUBADD213PSZ128rk, X86::VFMSUBADD213PSZ128mk, 0 },
{ X86::VFMSUBADD213PSZ128rkz, X86::VFMSUBADD213PSZ128mkz, 0 },
{ X86::VFMSUBADD213PSZ256rk, X86::VFMSUBADD213PSZ256mk, 0 },
@@ -4571,18 +4955,34 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFMSUBADD231PDZ256rkz, X86::VFMSUBADD231PDZ256mkz, 0 },
{ X86::VFMSUBADD231PDZrk, X86::VFMSUBADD231PDZmk, 0 },
{ X86::VFMSUBADD231PDZrkz, X86::VFMSUBADD231PDZmkz, 0 },
+ { X86::VFMSUBADD231PHZ128rk, X86::VFMSUBADD231PHZ128mk, 0 },
+ { X86::VFMSUBADD231PHZ128rkz, X86::VFMSUBADD231PHZ128mkz, 0 },
+ { X86::VFMSUBADD231PHZ256rk, X86::VFMSUBADD231PHZ256mk, 0 },
+ { X86::VFMSUBADD231PHZ256rkz, X86::VFMSUBADD231PHZ256mkz, 0 },
+ { X86::VFMSUBADD231PHZrk, X86::VFMSUBADD231PHZmk, 0 },
+ { X86::VFMSUBADD231PHZrkz, X86::VFMSUBADD231PHZmkz, 0 },
{ X86::VFMSUBADD231PSZ128rk, X86::VFMSUBADD231PSZ128mk, 0 },
{ X86::VFMSUBADD231PSZ128rkz, X86::VFMSUBADD231PSZ128mkz, 0 },
{ X86::VFMSUBADD231PSZ256rk, X86::VFMSUBADD231PSZ256mk, 0 },
{ X86::VFMSUBADD231PSZ256rkz, X86::VFMSUBADD231PSZ256mkz, 0 },
{ X86::VFMSUBADD231PSZrk, X86::VFMSUBADD231PSZmk, 0 },
{ X86::VFMSUBADD231PSZrkz, X86::VFMSUBADD231PSZmkz, 0 },
+ { X86::VFMULCPHZ128rrk, X86::VFMULCPHZ128rmk, 0 },
+ { X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmk, 0 },
+ { X86::VFMULCPHZrrk, X86::VFMULCPHZrmk, 0 },
+ { X86::VFMULCSHZrrk, X86::VFMULCSHZrmk, TB_NO_REVERSE },
{ X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mk, 0 },
{ X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mkz, 0 },
{ X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mk, 0 },
{ X86::VFNMADD132PDZ256rkz, X86::VFNMADD132PDZ256mkz, 0 },
{ X86::VFNMADD132PDZrk, X86::VFNMADD132PDZmk, 0 },
{ X86::VFNMADD132PDZrkz, X86::VFNMADD132PDZmkz, 0 },
+ { X86::VFNMADD132PHZ128rk, X86::VFNMADD132PHZ128mk, 0 },
+ { X86::VFNMADD132PHZ128rkz, X86::VFNMADD132PHZ128mkz, 0 },
+ { X86::VFNMADD132PHZ256rk, X86::VFNMADD132PHZ256mk, 0 },
+ { X86::VFNMADD132PHZ256rkz, X86::VFNMADD132PHZ256mkz, 0 },
+ { X86::VFNMADD132PHZrk, X86::VFNMADD132PHZmk, 0 },
+ { X86::VFNMADD132PHZrkz, X86::VFNMADD132PHZmkz, 0 },
{ X86::VFNMADD132PSZ128rk, X86::VFNMADD132PSZ128mk, 0 },
{ X86::VFNMADD132PSZ128rkz, X86::VFNMADD132PSZ128mkz, 0 },
{ X86::VFNMADD132PSZ256rk, X86::VFNMADD132PSZ256mk, 0 },
@@ -4591,6 +4991,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmkz, 0 },
{ X86::VFNMADD132SDZr_Intk, X86::VFNMADD132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD132SDZr_Intkz, X86::VFNMADD132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMADD132SHZr_Intk, X86::VFNMADD132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mk, 0 },
@@ -4599,6 +5001,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD213PDZ256rkz, X86::VFNMADD213PDZ256mkz, 0 },
{ X86::VFNMADD213PDZrk, X86::VFNMADD213PDZmk, 0 },
{ X86::VFNMADD213PDZrkz, X86::VFNMADD213PDZmkz, 0 },
+ { X86::VFNMADD213PHZ128rk, X86::VFNMADD213PHZ128mk, 0 },
+ { X86::VFNMADD213PHZ128rkz, X86::VFNMADD213PHZ128mkz, 0 },
+ { X86::VFNMADD213PHZ256rk, X86::VFNMADD213PHZ256mk, 0 },
+ { X86::VFNMADD213PHZ256rkz, X86::VFNMADD213PHZ256mkz, 0 },
+ { X86::VFNMADD213PHZrk, X86::VFNMADD213PHZmk, 0 },
+ { X86::VFNMADD213PHZrkz, X86::VFNMADD213PHZmkz, 0 },
{ X86::VFNMADD213PSZ128rk, X86::VFNMADD213PSZ128mk, 0 },
{ X86::VFNMADD213PSZ128rkz, X86::VFNMADD213PSZ128mkz, 0 },
{ X86::VFNMADD213PSZ256rk, X86::VFNMADD213PSZ256mk, 0 },
@@ -4607,6 +5015,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmkz, 0 },
{ X86::VFNMADD213SDZr_Intk, X86::VFNMADD213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD213SDZr_Intkz, X86::VFNMADD213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMADD213SHZr_Intk, X86::VFNMADD213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mk, 0 },
@@ -4615,6 +5025,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD231PDZ256rkz, X86::VFNMADD231PDZ256mkz, 0 },
{ X86::VFNMADD231PDZrk, X86::VFNMADD231PDZmk, 0 },
{ X86::VFNMADD231PDZrkz, X86::VFNMADD231PDZmkz, 0 },
+ { X86::VFNMADD231PHZ128rk, X86::VFNMADD231PHZ128mk, 0 },
+ { X86::VFNMADD231PHZ128rkz, X86::VFNMADD231PHZ128mkz, 0 },
+ { X86::VFNMADD231PHZ256rk, X86::VFNMADD231PHZ256mk, 0 },
+ { X86::VFNMADD231PHZ256rkz, X86::VFNMADD231PHZ256mkz, 0 },
+ { X86::VFNMADD231PHZrk, X86::VFNMADD231PHZmk, 0 },
+ { X86::VFNMADD231PHZrkz, X86::VFNMADD231PHZmkz, 0 },
{ X86::VFNMADD231PSZ128rk, X86::VFNMADD231PSZ128mk, 0 },
{ X86::VFNMADD231PSZ128rkz, X86::VFNMADD231PSZ128mkz, 0 },
{ X86::VFNMADD231PSZ256rk, X86::VFNMADD231PSZ256mk, 0 },
@@ -4623,6 +5039,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmkz, 0 },
{ X86::VFNMADD231SDZr_Intk, X86::VFNMADD231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD231SDZr_Intkz, X86::VFNMADD231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMADD231SHZr_Intk, X86::VFNMADD231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mk, 0 },
@@ -4631,6 +5049,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB132PDZ256rkz, X86::VFNMSUB132PDZ256mkz, 0 },
{ X86::VFNMSUB132PDZrk, X86::VFNMSUB132PDZmk, 0 },
{ X86::VFNMSUB132PDZrkz, X86::VFNMSUB132PDZmkz, 0 },
+ { X86::VFNMSUB132PHZ128rk, X86::VFNMSUB132PHZ128mk, 0 },
+ { X86::VFNMSUB132PHZ128rkz, X86::VFNMSUB132PHZ128mkz, 0 },
+ { X86::VFNMSUB132PHZ256rk, X86::VFNMSUB132PHZ256mk, 0 },
+ { X86::VFNMSUB132PHZ256rkz, X86::VFNMSUB132PHZ256mkz, 0 },
+ { X86::VFNMSUB132PHZrk, X86::VFNMSUB132PHZmk, 0 },
+ { X86::VFNMSUB132PHZrkz, X86::VFNMSUB132PHZmkz, 0 },
{ X86::VFNMSUB132PSZ128rk, X86::VFNMSUB132PSZ128mk, 0 },
{ X86::VFNMSUB132PSZ128rkz, X86::VFNMSUB132PSZ128mkz, 0 },
{ X86::VFNMSUB132PSZ256rk, X86::VFNMSUB132PSZ256mk, 0 },
@@ -4639,6 +5063,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmkz, 0 },
{ X86::VFNMSUB132SDZr_Intk, X86::VFNMSUB132SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB132SDZr_Intkz, X86::VFNMSUB132SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMSUB132SHZr_Intk, X86::VFNMSUB132SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mk, 0 },
@@ -4647,6 +5073,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB213PDZ256rkz, X86::VFNMSUB213PDZ256mkz, 0 },
{ X86::VFNMSUB213PDZrk, X86::VFNMSUB213PDZmk, 0 },
{ X86::VFNMSUB213PDZrkz, X86::VFNMSUB213PDZmkz, 0 },
+ { X86::VFNMSUB213PHZ128rk, X86::VFNMSUB213PHZ128mk, 0 },
+ { X86::VFNMSUB213PHZ128rkz, X86::VFNMSUB213PHZ128mkz, 0 },
+ { X86::VFNMSUB213PHZ256rk, X86::VFNMSUB213PHZ256mk, 0 },
+ { X86::VFNMSUB213PHZ256rkz, X86::VFNMSUB213PHZ256mkz, 0 },
+ { X86::VFNMSUB213PHZrk, X86::VFNMSUB213PHZmk, 0 },
+ { X86::VFNMSUB213PHZrkz, X86::VFNMSUB213PHZmkz, 0 },
{ X86::VFNMSUB213PSZ128rk, X86::VFNMSUB213PSZ128mk, 0 },
{ X86::VFNMSUB213PSZ128rkz, X86::VFNMSUB213PSZ128mkz, 0 },
{ X86::VFNMSUB213PSZ256rk, X86::VFNMSUB213PSZ256mk, 0 },
@@ -4655,6 +5087,8 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmkz, 0 },
{ X86::VFNMSUB213SDZr_Intk, X86::VFNMSUB213SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB213SDZr_Intkz, X86::VFNMSUB213SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMSUB213SHZr_Intk, X86::VFNMSUB213SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mk, 0 },
@@ -4663,6 +5097,12 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB231PDZ256rkz, X86::VFNMSUB231PDZ256mkz, 0 },
{ X86::VFNMSUB231PDZrk, X86::VFNMSUB231PDZmk, 0 },
{ X86::VFNMSUB231PDZrkz, X86::VFNMSUB231PDZmkz, 0 },
+ { X86::VFNMSUB231PHZ128rk, X86::VFNMSUB231PHZ128mk, 0 },
+ { X86::VFNMSUB231PHZ128rkz, X86::VFNMSUB231PHZ128mkz, 0 },
+ { X86::VFNMSUB231PHZ256rk, X86::VFNMSUB231PHZ256mk, 0 },
+ { X86::VFNMSUB231PHZ256rkz, X86::VFNMSUB231PHZ256mkz, 0 },
+ { X86::VFNMSUB231PHZrk, X86::VFNMSUB231PHZmk, 0 },
+ { X86::VFNMSUB231PHZrkz, X86::VFNMSUB231PHZmkz, 0 },
{ X86::VFNMSUB231PSZ128rk, X86::VFNMSUB231PSZ128mk, 0 },
{ X86::VFNMSUB231PSZ128rkz, X86::VFNMSUB231PSZ128mkz, 0 },
{ X86::VFNMSUB231PSZ256rk, X86::VFNMSUB231PSZ256mk, 0 },
@@ -4671,11 +5111,15 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VFNMSUB231PSZrkz, X86::VFNMSUB231PSZmkz, 0 },
{ X86::VFNMSUB231SDZr_Intk, X86::VFNMSUB231SDZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB231SDZr_Intkz, X86::VFNMSUB231SDZm_Intkz, TB_NO_REVERSE },
+ { X86::VFNMSUB231SHZr_Intk, X86::VFNMSUB231SHZm_Intk, TB_NO_REVERSE },
+ { X86::VFNMSUB231SHZr_Intkz, X86::VFNMSUB231SHZm_Intkz, TB_NO_REVERSE },
{ X86::VFNMSUB231SSZr_Intk, X86::VFNMSUB231SSZm_Intk, TB_NO_REVERSE },
{ X86::VFNMSUB231SSZr_Intkz, X86::VFNMSUB231SSZm_Intkz, TB_NO_REVERSE },
{ X86::VGETEXPSDZrk, X86::VGETEXPSDZmk, TB_NO_REVERSE },
+ { X86::VGETEXPSHZrk, X86::VGETEXPSHZmk, TB_NO_REVERSE },
{ X86::VGETEXPSSZrk, X86::VGETEXPSSZmk, TB_NO_REVERSE },
{ X86::VGETMANTSDZrrik, X86::VGETMANTSDZrmik, TB_NO_REVERSE },
+ { X86::VGETMANTSHZrrik, X86::VGETMANTSHZrmik, TB_NO_REVERSE },
{ X86::VGETMANTSSZrrik, X86::VGETMANTSSZrmik, TB_NO_REVERSE },
{ X86::VGF2P8AFFINEINVQBZ128rrik, X86::VGF2P8AFFINEINVQBZ128rmik, 0 },
{ X86::VGF2P8AFFINEINVQBZ256rrik, X86::VGF2P8AFFINEINVQBZ256rmik, 0 },
@@ -4701,38 +5145,56 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 },
{ X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 },
{ X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 },
+ { X86::VMAXCPHZ128rrk, X86::VMAXCPHZ128rmk, 0 },
+ { X86::VMAXCPHZ256rrk, X86::VMAXCPHZ256rmk, 0 },
+ { X86::VMAXCPHZrrk, X86::VMAXCPHZrmk, 0 },
{ X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 },
{ X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 },
{ X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
{ X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 },
{ X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
{ X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
+ { X86::VMAXPHZ128rrk, X86::VMAXPHZ128rmk, 0 },
+ { X86::VMAXPHZ256rrk, X86::VMAXPHZ256rmk, 0 },
+ { X86::VMAXPHZrrk, X86::VMAXPHZrmk, 0 },
{ X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
{ X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
{ X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
{ X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMAXSHZrr_Intk, X86::VMAXSHZrm_Intk, TB_NO_REVERSE },
{ X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, TB_NO_REVERSE },
{ X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 },
{ X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 },
{ X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
+ { X86::VMINCPHZ128rrk, X86::VMINCPHZ128rmk, 0 },
+ { X86::VMINCPHZ256rrk, X86::VMINCPHZ256rmk, 0 },
+ { X86::VMINCPHZrrk, X86::VMINCPHZrmk, 0 },
{ X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 },
{ X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 },
{ X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
{ X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
{ X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
{ X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
+ { X86::VMINPHZ128rrk, X86::VMINPHZ128rmk, 0 },
+ { X86::VMINPHZ256rrk, X86::VMINPHZ256rmk, 0 },
+ { X86::VMINPHZrrk, X86::VMINPHZrmk, 0 },
{ X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
{ X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
{ X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
{ X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMINSHZrr_Intk, X86::VMINSHZrm_Intk, TB_NO_REVERSE },
{ X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, TB_NO_REVERSE },
{ X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
{ X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
{ X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
+ { X86::VMULPHZ128rrk, X86::VMULPHZ128rmk, 0 },
+ { X86::VMULPHZ256rrk, X86::VMULPHZ256rmk, 0 },
+ { X86::VMULPHZrrk, X86::VMULPHZrmk, 0 },
{ X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
{ X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
{ X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
{ X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VMULSHZrr_Intk, X86::VMULSHZrm_Intk, TB_NO_REVERSE },
{ X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
{ X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
{ X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
@@ -5213,21 +5675,29 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VRCP14SSZrrk, X86::VRCP14SSZrmk, TB_NO_REVERSE },
{ X86::VRCP28SDZrk, X86::VRCP28SDZmk, TB_NO_REVERSE },
{ X86::VRCP28SSZrk, X86::VRCP28SSZmk, TB_NO_REVERSE },
+ { X86::VRCPSHZrrk, X86::VRCPSHZrmk, TB_NO_REVERSE },
{ X86::VREDUCESDZrrik, X86::VREDUCESDZrmik, TB_NO_REVERSE },
+ { X86::VREDUCESHZrrik, X86::VREDUCESHZrmik, TB_NO_REVERSE },
{ X86::VREDUCESSZrrik, X86::VREDUCESSZrmik, TB_NO_REVERSE },
{ X86::VRNDSCALESDZr_Intk, X86::VRNDSCALESDZm_Intk, TB_NO_REVERSE },
+ { X86::VRNDSCALESHZr_Intk, X86::VRNDSCALESHZm_Intk, TB_NO_REVERSE },
{ X86::VRNDSCALESSZr_Intk, X86::VRNDSCALESSZm_Intk, TB_NO_REVERSE },
{ X86::VRSQRT14SDZrrk, X86::VRSQRT14SDZrmk, TB_NO_REVERSE },
{ X86::VRSQRT14SSZrrk, X86::VRSQRT14SSZrmk, TB_NO_REVERSE },
{ X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE },
{ X86::VRSQRT28SSZrk, X86::VRSQRT28SSZmk, TB_NO_REVERSE },
+ { X86::VRSQRTSHZrrk, X86::VRSQRTSHZrmk, TB_NO_REVERSE },
{ X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmk, 0 },
{ X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmk, 0 },
{ X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmk, 0 },
+ { X86::VSCALEFPHZ128rrk, X86::VSCALEFPHZ128rmk, 0 },
+ { X86::VSCALEFPHZ256rrk, X86::VSCALEFPHZ256rmk, 0 },
+ { X86::VSCALEFPHZrrk, X86::VSCALEFPHZrmk, 0 },
{ X86::VSCALEFPSZ128rrk, X86::VSCALEFPSZ128rmk, 0 },
{ X86::VSCALEFPSZ256rrk, X86::VSCALEFPSZ256rmk, 0 },
{ X86::VSCALEFPSZrrk, X86::VSCALEFPSZrmk, 0 },
{ X86::VSCALEFSDZrrk, X86::VSCALEFSDZrmk, TB_NO_REVERSE },
+ { X86::VSCALEFSHZrrk, X86::VSCALEFSHZrmk, TB_NO_REVERSE },
{ X86::VSCALEFSSZrrk, X86::VSCALEFSSZrmk, TB_NO_REVERSE },
{ X86::VSHUFF32X4Z256rrik, X86::VSHUFF32X4Z256rmik, 0 },
{ X86::VSHUFF32X4Zrrik, X86::VSHUFF32X4Zrmik, 0 },
@@ -5244,14 +5714,19 @@ static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
{ X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 },
{ X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
{ X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE },
+ { X86::VSQRTSHZr_Intk, X86::VSQRTSHZm_Intk, TB_NO_REVERSE },
{ X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE },
{ X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
{ X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
{ X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
+ { X86::VSUBPHZ128rrk, X86::VSUBPHZ128rmk, 0 },
+ { X86::VSUBPHZ256rrk, X86::VSUBPHZ256rmk, 0 },
+ { X86::VSUBPHZrrk, X86::VSUBPHZrmk, 0 },
{ X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
{ X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
{ X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
{ X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
+ { X86::VSUBSHZrr_Intk, X86::VSUBSHZrm_Intk, TB_NO_REVERSE },
{ X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
{ X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
{ X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index dba13720cbd2..0e7033fc233a 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -149,8 +149,8 @@ def PS : Prefix<4>; // Similar to NoPrfx, but disassembler uses this to know
// disable to ANDPS.
// Class specifying the opcode map.
-class Map<bits<3> val> {
- bits<3> Value = val;
+class Map<bits<4> val> {
+ bits<4> Value = val;
}
def OB : Map<0>;
def TB : Map<1>;
@@ -160,6 +160,8 @@ def XOP8 : Map<4>;
def XOP9 : Map<5>;
def XOPA : Map<6>;
def ThreeDNow : Map<7>;
+def T_MAP5 : Map<8>;
+def T_MAP6 : Map<9>;
// Class specifying the encoding
class Encoding<bits<2> val> {
@@ -204,6 +206,16 @@ class XOP8 { Map OpMap = XOP8; Prefix OpPrefix = PS; }
class XOP9 { Map OpMap = XOP9; Prefix OpPrefix = PS; }
class XOPA { Map OpMap = XOPA; Prefix OpPrefix = PS; }
class ThreeDNow { Map OpMap = ThreeDNow; }
+class T_MAP5 { Map OpMap = T_MAP5; }
+class T_MAP5PS : T_MAP5 { Prefix OpPrefix = PS; } // none
+class T_MAP5PD : T_MAP5 { Prefix OpPrefix = PD; } // 0x66
+class T_MAP5XS : T_MAP5 { Prefix OpPrefix = XS; } // 0xF3
+class T_MAP5XD : T_MAP5 { Prefix OpPrefix = XD; } // 0xF2
+class T_MAP6 { Map OpMap = T_MAP6; }
+class T_MAP6PS : T_MAP6 { Prefix OpPrefix = PS; }
+class T_MAP6PD : T_MAP6 { Prefix OpPrefix = PD; }
+class T_MAP6XS : T_MAP6 { Prefix OpPrefix = XS; }
+class T_MAP6XD : T_MAP6 { Prefix OpPrefix = XD; }
class OBXS { Prefix OpPrefix = XS; }
class PS : TB { Prefix OpPrefix = PS; }
class PD : TB { Prefix OpPrefix = PD; }
@@ -284,6 +296,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
// If this is a pseudo instruction, mark it isCodeGenOnly.
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+ let HasPositionOrder = 1;
+
//
// Attributes specific to X86 instructions...
//
@@ -301,7 +315,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
Prefix OpPrefix = NoPrfx; // Which prefix byte does this inst have?
bits<3> OpPrefixBits = OpPrefix.Value;
Map OpMap = OB; // Which opcode map does this inst have?
- bits<3> OpMapBits = OpMap.Value;
+ bits<4> OpMapBits = OpMap.Value;
bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
@@ -360,28 +374,28 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{10-9} = AdSizeBits;
// No need for 3rd bit, we don't need to distinguish NoPrfx from PS.
let TSFlags{12-11} = OpPrefixBits{1-0};
- let TSFlags{15-13} = OpMapBits;
- let TSFlags{16} = hasREX_WPrefix;
- let TSFlags{20-17} = ImmT.Value;
- let TSFlags{23-21} = FPForm.Value;
- let TSFlags{24} = hasLockPrefix;
- let TSFlags{25} = hasREPPrefix;
- let TSFlags{27-26} = ExeDomain.Value;
- let TSFlags{29-28} = OpEncBits;
- let TSFlags{37-30} = Opcode;
+ let TSFlags{16-13} = OpMapBits;
+ let TSFlags{17} = hasREX_WPrefix;
+ let TSFlags{21-18} = ImmT.Value;
+ let TSFlags{24-22} = FPForm.Value;
+ let TSFlags{25} = hasLockPrefix;
+ let TSFlags{26} = hasREPPrefix;
+ let TSFlags{28-27} = ExeDomain.Value;
+ let TSFlags{30-29} = OpEncBits;
+ let TSFlags{38-31} = Opcode;
// Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
- let TSFlags{38} = HasVEX_W;
- let TSFlags{39} = hasVEX_4V;
- let TSFlags{40} = hasVEX_L;
- let TSFlags{41} = hasEVEX_K;
- let TSFlags{42} = hasEVEX_Z;
- let TSFlags{43} = hasEVEX_L2;
- let TSFlags{44} = hasEVEX_B;
+ let TSFlags{39} = HasVEX_W;
+ let TSFlags{40} = hasVEX_4V;
+ let TSFlags{41} = hasVEX_L;
+ let TSFlags{42} = hasEVEX_K;
+ let TSFlags{43} = hasEVEX_Z;
+ let TSFlags{44} = hasEVEX_L2;
+ let TSFlags{45} = hasEVEX_B;
// If we run out of TSFlags bits, it's possible to encode this in 3 bits.
- let TSFlags{51-45} = CD8_Scale;
- let TSFlags{52} = hasEVEX_RC;
- let TSFlags{53} = hasNoTrackPrefix;
- let TSFlags{54} = ExplicitVEXPrefix;
+ let TSFlags{52-46} = CD8_Scale;
+ let TSFlags{53} = hasEVEX_RC;
+ let TSFlags{54} = hasNoTrackPrefix;
+ let TSFlags{55} = ExplicitVEXPrefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -738,18 +752,19 @@ class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8PD,
Requires<[UseSSE42]>;
-// SS42FI - SSE 4.2 instructions with T8XD prefix.
-// NOTE: 'HasSSE42' is used as SS42FI is only used for CRC32 insns.
-class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
-
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TAPD,
Requires<[UseSSE42]>;
+// CRC32I - SSE 4.2 CRC32 instructions.
+// NOTE: 'HasCRC32' is used as CRC32 instructions are GPR only and not directly
+// controlled by the SSE42 flag.
+class CRC32I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasCRC32]>;
+
// AVX Instruction Templates:
// Instructions introduced in AVX (no SSE equivalent forms)
//
@@ -870,7 +885,6 @@ class AVX512FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern>, T8PD,
EVEX_4V, Requires<[HasAVX512]>;
-class AVX512FMA3Base : T8PD, EVEX_4V;
class AVX512<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 777c5a158b4c..166f1f8c3251 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -130,14 +130,12 @@ def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>;
def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>;
def X86vfpext : SDNode<"X86ISD::VFPEXT",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
- SDTCisSameSizeAs<0, 1>]>>;
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>]>>;
def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
- SDTCisSameSizeAs<0, 1>]>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>]>,
[SDNPHasChain]>;
def X86any_vfpext : PatFrags<(ops node:$src),
@@ -145,13 +143,13 @@ def X86any_vfpext : PatFrags<(ops node:$src),
(X86vfpext node:$src)]>;
def X86vfpround: SDNode<"X86ISD::VFPROUND",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>]>>;
def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>]>,
[SDNPHasChain]>;
@@ -160,33 +158,32 @@ def X86any_vfpround : PatFrags<(ops node:$src),
(X86vfpround node:$src)]>;
def X86frounds : SDNode<"X86ISD::VFPROUNDS",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f64>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f64>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>,
SDTCisVT<3, i32>]>>;
def X86fpexts : SDNode<"X86ISD::VFPEXTS",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f32>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
SDTCisSameAs<0, 1>,
- SDTCVecEltisVT<2, f32>,
+ SDTCisFP<2>, SDTCisVec<2>,
SDTCisSameSizeAs<0, 2>]>>;
def X86vmfpround: SDNode<"X86ISD::VMFPROUND",
- SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
- SDTCisSameSizeAs<0, 1>,
+ SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>>;
@@ -417,6 +414,11 @@ def X86Movss : SDNode<"X86ISD::MOVSS",
SDTCisVT<1, v4f32>,
SDTCisVT<2, v4f32>]>>;
+def X86Movsh : SDNode<"X86ISD::MOVSH",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v8f16>,
+ SDTCisVT<1, v8f16>,
+ SDTCisVT<2, v8f16>]>>;
+
def X86Movlhps : SDNode<"X86ISD::MOVLHPS",
SDTypeProfile<1, 2, [SDTCisVT<0, v4f32>,
SDTCisVT<1, v4f32>,
@@ -570,6 +572,24 @@ def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>,
def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>;
def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>;
+def x86vfmaddc : SDNode<"X86ISD::VFMADDC", SDTFPTernaryOp, [SDNPCommutative]>;
+def x86vfmaddcRnd : SDNode<"X86ISD::VFMADDC_RND", SDTFmaRound, [SDNPCommutative]>;
+def x86vfcmaddc : SDNode<"X86ISD::VFCMADDC", SDTFPTernaryOp>;
+def x86vfcmaddcRnd : SDNode<"X86ISD::VFCMADDC_RND", SDTFmaRound>;
+def x86vfmulc : SDNode<"X86ISD::VFMULC", SDTFPBinOp, [SDNPCommutative]>;
+def x86vfmulcRnd : SDNode<"X86ISD::VFMULC_RND", SDTFPBinOpRound, [SDNPCommutative]>;
+def x86vfcmulc : SDNode<"X86ISD::VFCMULC", SDTFPBinOp>;
+def x86vfcmulcRnd : SDNode<"X86ISD::VFCMULC_RND", SDTFPBinOpRound>;
+
+def x86vfmaddcSh : SDNode<"X86ISD::VFMADDCSH", SDTFPTernaryOp, [SDNPCommutative]>;
+def x86vfcmaddcSh : SDNode<"X86ISD::VFCMADDCSH", SDTFPTernaryOp>;
+def x86vfmulcSh : SDNode<"X86ISD::VFMULCSH", SDTFPBinOp, [SDNPCommutative]>;
+def x86vfcmulcSh : SDNode<"X86ISD::VFCMULCSH", SDTFPBinOp>;
+def x86vfmaddcShRnd : SDNode<"X86ISD::VFMADDCSH_RND", SDTFmaRound, [SDNPCommutative]>;
+def x86vfcmaddcShRnd : SDNode<"X86ISD::VFCMADDCSH_RND",SDTFmaRound>;
+def x86vfmulcShRnd : SDNode<"X86ISD::VFMULCSH_RND", SDTFPBinOpRound, [SDNPCommutative]>;
+def x86vfcmulcShRnd : SDNode<"X86ISD::VFCMULCSH_RND", SDTFPBinOpRound>;
+
def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>;
def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>;
@@ -704,7 +724,6 @@ def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>;
// Masked versions of above
def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisFP<0>, SDTCisInt<1>,
- SDTCisSameSizeAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCVecEltisVT<3, i1>,
SDTCisSameNumEltsAs<1, 3>]>;
@@ -752,12 +771,12 @@ def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH",
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<1, 4>]> >;
def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE",
- SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f64>,
- SDTCVecEltisVT<1, f32>,
+ SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<1, 0>]>>;
def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
- SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
- SDTCVecEltisVT<1, f64>,
+ SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
SDTCisOpSmallerThanOp<0, 1>,
SDTCisVT<2, i32>]>>;
@@ -796,6 +815,7 @@ def SDTX86MaskedStore: SDTypeProfile<0, 3, [ // masked store
//===----------------------------------------------------------------------===//
// 128-bit load pattern fragments
+def loadv8f16 : PatFrag<(ops node:$ptr), (v8f16 (load node:$ptr))>;
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
@@ -804,6 +824,7 @@ def loadv8i16 : PatFrag<(ops node:$ptr), (v8i16 (load node:$ptr))>;
def loadv16i8 : PatFrag<(ops node:$ptr), (v16i8 (load node:$ptr))>;
// 256-bit load pattern fragments
+def loadv16f16 : PatFrag<(ops node:$ptr), (v16f16 (load node:$ptr))>;
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
@@ -812,6 +833,7 @@ def loadv16i16 : PatFrag<(ops node:$ptr), (v16i16 (load node:$ptr))>;
def loadv32i8 : PatFrag<(ops node:$ptr), (v32i8 (load node:$ptr))>;
// 512-bit load pattern fragments
+def loadv32f16 : PatFrag<(ops node:$ptr), (v32f16 (load node:$ptr))>;
def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
@@ -823,6 +845,10 @@ def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
def extloadv2f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
def extloadv4f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
def extloadv8f32 : PatFrag<(ops node:$ptr), (extloadvf32 node:$ptr)>;
+def extloadv2f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
+def extloadv4f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
+def extloadv8f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
+def extloadv16f16 : PatFrag<(ops node:$ptr), (extloadvf16 node:$ptr)>;
// Like 'store', but always requires vector size alignment.
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -839,6 +865,8 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
// 128-bit aligned load pattern fragments
// NOTE: all 128-bit integer vector loads are promoted to v2i64
+def alignedloadv8f16 : PatFrag<(ops node:$ptr),
+ (v8f16 (alignedload node:$ptr))>;
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
@@ -854,6 +882,8 @@ def alignedloadv16i8 : PatFrag<(ops node:$ptr),
// 256-bit aligned load pattern fragments
// NOTE: all 256-bit integer vector loads are promoted to v4i64
+def alignedloadv16f16 : PatFrag<(ops node:$ptr),
+ (v16f16 (alignedload node:$ptr))>;
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
(v8f32 (alignedload node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
@@ -868,6 +898,8 @@ def alignedloadv32i8 : PatFrag<(ops node:$ptr),
(v32i8 (alignedload node:$ptr))>;
// 512-bit aligned load pattern fragments
+def alignedloadv32f16 : PatFrag<(ops node:$ptr),
+ (v32f16 (alignedload node:$ptr))>;
def alignedloadv16f32 : PatFrag<(ops node:$ptr),
(v16f32 (alignedload node:$ptr))>;
def alignedloadv8f64 : PatFrag<(ops node:$ptr),
@@ -926,6 +958,11 @@ def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>;
+def X86vzload16 : PatFrag<(ops node:$src),
+ (X86vzld node:$src), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 2;
+}]>;
+
def X86vzload32 : PatFrag<(ops node:$src),
(X86vzld node:$src), [{
return cast<MemIntrinsicSDNode>(N)->getMemoryVT().getStoreSize() == 4;
@@ -976,6 +1013,10 @@ def X86SubVBroadcastld256 : PatFrag<(ops node:$src),
// only load a single element.
// FIXME: We should add more canolicalizing in DAGCombine. Particulary removing
// the simple_load case.
+def sse_load_f16 : PatFrags<(ops node:$ptr),
+ [(v8f16 (simple_load node:$ptr)),
+ (v8f16 (X86vzload16 node:$ptr)),
+ (v8f16 (scalar_to_vector (loadf16 node:$ptr)))]>;
def sse_load_f32 : PatFrags<(ops node:$ptr),
[(v4f32 (simple_load node:$ptr)),
(v4f32 (X86vzload32 node:$ptr)),
@@ -985,9 +1026,13 @@ def sse_load_f64 : PatFrags<(ops node:$ptr),
(v2f64 (X86vzload64 node:$ptr)),
(v2f64 (scalar_to_vector (loadf64 node:$ptr)))]>;
+def shmem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
def ssmem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
def sdmem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
+def fp16imm0 : PatLeaf<(f16 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
def fp32imm0 : PatLeaf<(f32 fpimm), [{
return N->isExactlyValue(+0.0);
@@ -1013,6 +1058,12 @@ def INSERT_get_vinsert128_imm : SDNodeXForm<insert_subvector, [{
return getInsertVINSERTImmediate(N, 128, SDLoc(N));
}]>;
+// INSERT_get_vperm2x128_imm xform function: convert insert_subvector index to
+// commuted VPERM2F128/VPERM2I128 imm.
+def INSERT_get_vperm2x128_commutedimm : SDNodeXForm<insert_subvector, [{
+ return getPermuteVINSERTCommutedImmediate(N, 128, SDLoc(N));
+}]>;
+
// EXTRACT_get_vextract256_imm xform function: convert extract_subvector index
// to VEXTRACTF64x4 imm.
def EXTRACT_get_vextract256_imm : SDNodeXForm<extract_subvector, [{
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 12a2d92fd888..639aa5199ea5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -19,6 +19,7 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Sequence.h"
+#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -82,7 +83,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
(STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
: X86::ADJCALLSTACKUP32),
X86::CATCHRET,
- (STI.is64Bit() ? X86::RETQ : X86::RETL)),
+ (STI.is64Bit() ? X86::RET64 : X86::RET32)),
Subtarget(STI), RI(STI.getTargetTriple()) {
}
@@ -699,6 +700,8 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
return true;
case X86::MOV16rm:
case X86::KMOVWkm:
+ case X86::VMOVSHZrm:
+ case X86::VMOVSHZrm_alt:
MemBytes = 2;
return true;
case X86::MOV32rm:
@@ -795,6 +798,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
return true;
case X86::MOV16mr:
case X86::KMOVWmk:
+ case X86::VMOVSHZmr:
MemBytes = 2;
return true;
case X86::MOV32mr:
@@ -980,6 +984,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
case X86::AVX512_FsFLD0SD:
+ case X86::AVX512_FsFLD0SH:
case X86::AVX512_FsFLD0SS:
case X86::AVX512_FsFLD0F128:
case X86::AVX_SET0:
@@ -1047,6 +1052,8 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
case X86::VMOVSSZrm_alt:
case X86::VMOVSDZrm:
case X86::VMOVSDZrm_alt:
+ case X86::VMOVSHZrm:
+ case X86::VMOVSHZrm_alt:
case X86::VMOVAPDZ128rm:
case X86::VMOVAPDZ256rm:
case X86::VMOVAPDZrm:
@@ -1189,7 +1196,7 @@ inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned Opc, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
- LiveVariables *LV) const {
+ LiveVariables *LV, LiveIntervals *LIS) const {
MachineFunction &MF = *MI.getParent()->getParent();
const TargetRegisterClass *RC;
if (AllowSP) {
@@ -1199,12 +1206,12 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
&X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass;
}
Register SrcReg = Src.getReg();
+ isKill = MI.killsRegister(SrcReg);
// For both LEA64 and LEA32 the register already has essentially the right
// type (32-bit or 64-bit) we may just need to forbid SP.
if (Opc != X86::LEA64_32r) {
NewSrc = SrcReg;
- isKill = Src.isKill();
assert(!Src.isUndef() && "Undef op doesn't need optimization");
if (NewSrc.isVirtual() && !MF.getRegInfo().constrainRegClass(NewSrc, RC))
@@ -1219,8 +1226,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
ImplicitOp = Src;
ImplicitOp.setImplicit();
- NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
- isKill = Src.isKill();
+ NewSrc = getX86SubSuperRegister(SrcReg, 64);
assert(!Src.isUndef() && "Undef op doesn't need optimization");
} else {
// Virtual register of the wrong class, we have to create a temporary 64-bit
@@ -1229,24 +1235,36 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
MachineInstr *Copy =
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(NewSrc, RegState::Define | RegState::Undef, X86::sub_32bit)
- .add(Src);
+ .addReg(SrcReg, getKillRegState(isKill));
// Which is obviously going to be dead after we're done with it.
isKill = true;
if (LV)
LV->replaceKillInstruction(SrcReg, MI, *Copy);
+
+ if (LIS) {
+ SlotIndex CopyIdx = LIS->InsertMachineInstrInMaps(*Copy);
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ LiveInterval &LI = LIS->getInterval(SrcReg);
+ LiveRange::Segment *S = LI.getSegmentContaining(Idx);
+ if (S->end.getBaseIndex() == Idx)
+ S->end = CopyIdx.getRegSlot();
+ }
}
// We've set all the parameters without issue.
return true;
}
-MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
- unsigned MIOpc, MachineFunction::iterator &MFI, MachineInstr &MI,
- LiveVariables *LV, bool Is8BitOp) const {
+MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS,
+ bool Is8BitOp) const {
// We handle 8-bit adds and various 16-bit opcodes in the switch below.
- MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineRegisterInfo &RegInfo = MBB.getParent()->getRegInfo();
assert((Is8BitOp || RegInfo.getTargetRegisterInfo()->getRegSizeInBits(
*RegInfo.getRegClass(MI.getOperand(0).getReg())) == 16) &&
"Unexpected type for LEA transform");
@@ -1264,6 +1282,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
unsigned Opcode = X86::LEA64_32r;
Register InRegLEA = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass);
Register OutRegLEA = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ Register InRegLEA2;
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
@@ -1275,18 +1294,22 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
MachineBasicBlock::iterator MBBI = MI.getIterator();
Register Dest = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
+ Register Src2;
bool IsDead = MI.getOperand(0).isDead();
bool IsKill = MI.getOperand(1).isKill();
unsigned SubReg = Is8BitOp ? X86::sub_8bit : X86::sub_16bit;
assert(!MI.getOperand(1).isUndef() && "Undef op doesn't need optimization");
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
+ MachineInstr *ImpDef =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA);
MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA, RegState::Define, SubReg)
.addReg(Src, getKillRegState(IsKill));
+ MachineInstr *ImpDef2 = nullptr;
+ MachineInstr *InsMI2 = nullptr;
MachineInstrBuilder MIB =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(Opcode), OutRegLEA);
switch (MIOpc) {
default: llvm_unreachable("Unreachable!");
case X86::SHL8ri:
@@ -1316,11 +1339,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
case X86::ADD8rr_DB:
case X86::ADD16rr:
case X86::ADD16rr_DB: {
- Register Src2 = MI.getOperand(2).getReg();
+ Src2 = MI.getOperand(2).getReg();
bool IsKill2 = MI.getOperand(2).isKill();
assert(!MI.getOperand(2).isUndef() && "Undef op doesn't need optimization");
- unsigned InRegLEA2 = 0;
- MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
// ADD8rr/ADD16rr killed %reg1028, %reg1028
// just a single insert_subreg.
@@ -1332,8 +1353,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
InRegLEA2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
// Build and insert into an implicit UNDEF value. This is OK because
// we will be shifting and then extracting the lower 8/16-bits.
- BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF), InRegLEA2);
- InsMI2 = BuildMI(*MFI, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ ImpDef2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(X86::IMPLICIT_DEF),
+ InRegLEA2);
+ InsMI2 = BuildMI(MBB, &*MIB, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(InRegLEA2, RegState::Define, SubReg)
.addReg(Src2, getKillRegState(IsKill2));
addRegReg(MIB, InRegLEA, true, InRegLEA2, true);
@@ -1346,7 +1368,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), get(TargetOpcode::COPY))
.addReg(Dest, RegState::Define | getDeadRegState(IsDead))
.addReg(OutRegLEA, RegState::Kill, SubReg);
@@ -1360,6 +1382,45 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
LV->replaceKillInstruction(Dest, MI, *ExtMI);
}
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*ImpDef);
+ SlotIndex InsIdx = LIS->InsertMachineInstrInMaps(*InsMI);
+ if (ImpDef2)
+ LIS->InsertMachineInstrInMaps(*ImpDef2);
+ SlotIndex Ins2Idx;
+ if (InsMI2)
+ Ins2Idx = LIS->InsertMachineInstrInMaps(*InsMI2);
+ SlotIndex NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ SlotIndex ExtIdx = LIS->InsertMachineInstrInMaps(*ExtMI);
+ LIS->getInterval(InRegLEA);
+ LIS->getInterval(OutRegLEA);
+ if (InRegLEA2)
+ LIS->getInterval(InRegLEA2);
+
+ // Move the use of Src up to InsMI.
+ LiveInterval &SrcLI = LIS->getInterval(Src);
+ LiveRange::Segment *SrcSeg = SrcLI.getSegmentContaining(NewIdx);
+ if (SrcSeg->end == NewIdx.getRegSlot())
+ SrcSeg->end = InsIdx.getRegSlot();
+
+ if (InsMI2) {
+ // Move the use of Src2 up to InsMI2.
+ LiveInterval &Src2LI = LIS->getInterval(Src2);
+ LiveRange::Segment *Src2Seg = Src2LI.getSegmentContaining(NewIdx);
+ if (Src2Seg->end == NewIdx.getRegSlot())
+ Src2Seg->end = Ins2Idx.getRegSlot();
+ }
+
+ // Move the definition of Dest down to ExtMI.
+ LiveInterval &DestLI = LIS->getInterval(Dest);
+ LiveRange::Segment *DestSeg =
+ DestLI.getSegmentContaining(NewIdx.getRegSlot());
+ assert(DestSeg->start == NewIdx.getRegSlot() &&
+ DestSeg->valno->def == NewIdx.getRegSlot());
+ DestSeg->start = ExtIdx.getRegSlot();
+ DestSeg->valno->def = ExtIdx.getRegSlot();
+ }
+
return ExtMI;
}
@@ -1373,9 +1434,9 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(
/// This method returns a null pointer if the transformation cannot be
/// performed, otherwise it returns the new instruction.
///
-MachineInstr *
-X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI, LiveVariables *LV) const {
+MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
+ LiveVariables *LV,
+ LiveIntervals *LIS) const {
// The following opcodes also sets the condition code register(s). Only
// convert them to equivalent lea if the condition code register def's
// are dead!
@@ -1398,6 +1459,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return nullptr;
MachineInstr *NewMI = nullptr;
+ Register SrcReg, SrcReg2;
bool Is64Bit = Subtarget.is64Bit();
bool Is8BitOp = false;
@@ -1432,10 +1494,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// LEA can't handle ESP.
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
- SrcReg, isKill, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB =
@@ -1460,7 +1521,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
if (!isTruncatedShiftCountForLEA(ShAmt))
return nullptr;
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
}
case X86::INC64r:
case X86::INC32r: {
@@ -1468,10 +1529,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r :
(Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
- ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB =
@@ -1491,10 +1551,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
: (Is64Bit ? X86::LEA64_32r : X86::LEA32r);
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false, SrcReg, isKill,
- ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/false, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1513,7 +1572,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LLVM_FALLTHROUGH;
case X86::DEC16r:
case X86::INC16r:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::ADD64rr:
case X86::ADD64rr_DB:
case X86::ADD32rr:
@@ -1525,21 +1584,26 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
else
Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
- bool isKill;
- Register SrcReg;
- MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, ImplicitOp, LV))
- return nullptr;
-
const MachineOperand &Src2 = MI.getOperand(2);
bool isKill2;
- Register SrcReg2;
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
- SrcReg2, isKill2, ImplicitOp2, LV))
+ if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/false, SrcReg2, isKill2,
+ ImplicitOp2, LV, LIS))
return nullptr;
+ bool isKill;
+ MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
+ if (Src.getReg() == Src2.getReg()) {
+ // Don't call classify LEAReg a second time on the same register, in case
+ // the first call inserted a COPY from Src2 and marked it as killed.
+ isKill = isKill2;
+ SrcReg = SrcReg2;
+ } else {
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
+ return nullptr;
+ }
+
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc)).add(Dest);
if (ImplicitOp.getReg() != 0)
MIB.add(ImplicitOp);
@@ -1557,7 +1621,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LLVM_FALLTHROUGH;
case X86::ADD16rr:
case X86::ADD16rr_DB:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64ri32_DB:
@@ -1575,10 +1639,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1598,7 +1661,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD16ri8:
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
- return convertToThreeAddressWithLEA(MIOpc, MFI, MI, LV, Is8BitOp);
+ return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::SUB8ri:
case X86::SUB16ri8:
case X86::SUB16ri:
@@ -1616,10 +1679,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
bool isKill;
- Register SrcReg;
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
- if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
- SrcReg, isKill, ImplicitOp, LV))
+ if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/true, SrcReg, isKill,
+ ImplicitOp, LV, LIS))
return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI.getDebugLoc(), get(Opc))
@@ -1806,7 +1868,17 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
LV->replaceKillInstruction(Dest.getReg(), MI, *NewMI);
}
- MFI->insert(MI.getIterator(), NewMI); // Insert the new inst
+ MachineBasicBlock &MBB = *MI.getParent();
+ MBB.insert(MI.getIterator(), NewMI); // Insert the new inst
+
+ if (LIS) {
+ LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
+ if (SrcReg)
+ LIS->getInterval(SrcReg);
+ if (SrcReg2)
+ LIS->getInterval(SrcReg2);
+ }
+
return NewMI;
}
@@ -2235,6 +2307,10 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VCMPSSZrr:
case X86::VCMPPDZrri:
case X86::VCMPPSZrri:
+ case X86::VCMPSHZrr:
+ case X86::VCMPPHZrri:
+ case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rri:
case X86::VCMPPDZ128rri:
case X86::VCMPPSZ128rri:
case X86::VCMPPDZ256rri:
@@ -2481,6 +2557,10 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VCMPSSZrr:
case X86::VCMPPDZrri:
case X86::VCMPPSZrri:
+ case X86::VCMPSHZrr:
+ case X86::VCMPPHZrri:
+ case X86::VCMPPHZ128rri:
+ case X86::VCMPPHZ256rri:
case X86::VCMPPDZ128rri:
case X86::VCMPPSZ128rri:
case X86::VCMPPDZ256rri:
@@ -2606,7 +2686,19 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPMADD52LUQZ256rkz:
case X86::VPMADD52LUQZr:
case X86::VPMADD52LUQZrk:
- case X86::VPMADD52LUQZrkz: {
+ case X86::VPMADD52LUQZrkz:
+ case X86::VFMADDCPHZr:
+ case X86::VFMADDCPHZrk:
+ case X86::VFMADDCPHZrkz:
+ case X86::VFMADDCPHZ128r:
+ case X86::VFMADDCPHZ128rk:
+ case X86::VFMADDCPHZ128rkz:
+ case X86::VFMADDCPHZ256r:
+ case X86::VFMADDCPHZ256rk:
+ case X86::VFMADDCPHZ256rkz:
+ case X86::VFMADDCSHZr:
+ case X86::VFMADDCSHZrk:
+ case X86::VFMADDCSHZrkz: {
unsigned CommutableOpIdx1 = 2;
unsigned CommutableOpIdx2 = 3;
if (X86II::isKMasked(Desc.TSFlags)) {
@@ -2834,11 +2926,6 @@ X86::getX86ConditionCode(CmpInst::Predicate Predicate) {
return std::make_pair(CC, NeedSwap);
}
-/// Return a setcc opcode based on whether it has memory operand.
-unsigned X86::getSETOpc(bool HasMemoryOperand) {
- return HasMemoryOperand ? X86::SETCCr : X86::SETCCm;
-}
-
/// Return a cmov opcode for the given register size in bytes, and operand type.
unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand) {
switch(RegBytes) {
@@ -2919,6 +3006,23 @@ unsigned X86::getSwappedVCMPImm(unsigned Imm) {
return Imm;
}
+/// Return true if the Reg is X87 register.
+static bool isX87Reg(unsigned Reg) {
+ return (Reg == X86::FPCW || Reg == X86::FPSW ||
+ (Reg >= X86::ST0 && Reg <= X86::ST7));
+}
+
+/// check if the instruction is X87 instruction
+bool X86::isX87Instruction(MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (isX87Reg(MO.getReg()))
+ return true;
+ }
+ return false;
+}
+
bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
case X86::TCRETURNdi:
@@ -3018,13 +3122,13 @@ static MachineBasicBlock *getFallThroughMBB(MachineBasicBlock *MBB,
// and fallthrough MBB. If we find more than one, we cannot identify the
// fallthrough MBB and should return nullptr.
MachineBasicBlock *FallthroughBB = nullptr;
- for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
- if ((*SI)->isEHPad() || (*SI == TBB && FallthroughBB))
+ for (MachineBasicBlock *Succ : MBB->successors()) {
+ if (Succ->isEHPad() || (Succ == TBB && FallthroughBB))
continue;
// Return a nullptr if we found more than one fallthrough successor.
if (FallthroughBB && FallthroughBB != TBB)
return nullptr;
- FallthroughBB = *SI;
+ FallthroughBB = Succ;
}
return FallthroughBB;
}
@@ -3228,13 +3332,13 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
MachineInstr *ConditionDef = nullptr;
bool SingleUseCondition = true;
- for (auto I = std::next(MBB.rbegin()), E = MBB.rend(); I != E; ++I) {
- if (I->modifiesRegister(X86::EFLAGS, TRI)) {
- ConditionDef = &*I;
+ for (MachineInstr &MI : llvm::drop_begin(llvm::reverse(MBB))) {
+ if (MI.modifiesRegister(X86::EFLAGS, TRI)) {
+ ConditionDef = &MI;
break;
}
- if (I->readsRegister(X86::EFLAGS, TRI))
+ if (MI.readsRegister(X86::EFLAGS, TRI))
SingleUseCondition = false;
}
@@ -3605,6 +3709,10 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
case 2:
if (X86::VK16RegClass.hasSubClassEq(RC))
return load ? X86::KMOVWkm : X86::KMOVWmk;
+ if (X86::FR16XRegClass.hasSubClassEq(RC)) {
+ assert(STI.hasFP16());
+ return load ? X86::VMOVSHZrm_alt : X86::VMOVSHZmr;
+ }
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
return load ? X86::MOV16rm : X86::MOV16mr;
case 4:
@@ -3680,12 +3788,6 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
HasAVX ? X86::VMOVUPSmr :
X86::MOVUPSmr);
}
- if (X86::BNDRRegClass.hasSubClassEq(RC)) {
- if (STI.is64Bit())
- return load ? X86::BNDMOV64rm : X86::BNDMOV64mr;
- else
- return load ? X86::BNDMOV32rm : X86::BNDMOV32mr;
- }
llvm_unreachable("Unknown 16-byte regclass");
}
case 32:
@@ -3904,8 +4006,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
}
bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const {
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const {
switch (MI.getOpcode()) {
default: break;
case X86::CMP64ri32:
@@ -3984,42 +4086,83 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
return false;
}
-/// Check whether the first instruction, whose only
-/// purpose is to update flags, can be made redundant.
-/// CMPrr can be made redundant by SUBrr if the operands are the same.
-/// This function can be extended later on.
-/// SrcReg, SrcRegs: register operands for FlagI.
-/// ImmValue: immediate for FlagI if it takes an immediate.
-inline static bool isRedundantFlagInstr(const MachineInstr &FlagI,
+bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
Register SrcReg, Register SrcReg2,
- int ImmMask, int ImmValue,
- const MachineInstr &OI) {
- if (((FlagI.getOpcode() == X86::CMP64rr && OI.getOpcode() == X86::SUB64rr) ||
- (FlagI.getOpcode() == X86::CMP32rr && OI.getOpcode() == X86::SUB32rr) ||
- (FlagI.getOpcode() == X86::CMP16rr && OI.getOpcode() == X86::SUB16rr) ||
- (FlagI.getOpcode() == X86::CMP8rr && OI.getOpcode() == X86::SUB8rr)) &&
- ((OI.getOperand(1).getReg() == SrcReg &&
- OI.getOperand(2).getReg() == SrcReg2) ||
- (OI.getOperand(1).getReg() == SrcReg2 &&
- OI.getOperand(2).getReg() == SrcReg)))
- return true;
-
- if (ImmMask != 0 &&
- ((FlagI.getOpcode() == X86::CMP64ri32 &&
- OI.getOpcode() == X86::SUB64ri32) ||
- (FlagI.getOpcode() == X86::CMP64ri8 &&
- OI.getOpcode() == X86::SUB64ri8) ||
- (FlagI.getOpcode() == X86::CMP32ri && OI.getOpcode() == X86::SUB32ri) ||
- (FlagI.getOpcode() == X86::CMP32ri8 &&
- OI.getOpcode() == X86::SUB32ri8) ||
- (FlagI.getOpcode() == X86::CMP16ri && OI.getOpcode() == X86::SUB16ri) ||
- (FlagI.getOpcode() == X86::CMP16ri8 &&
- OI.getOpcode() == X86::SUB16ri8) ||
- (FlagI.getOpcode() == X86::CMP8ri && OI.getOpcode() == X86::SUB8ri)) &&
- OI.getOperand(1).getReg() == SrcReg &&
- OI.getOperand(2).getImm() == ImmValue)
- return true;
- return false;
+ int64_t ImmMask, int64_t ImmValue,
+ const MachineInstr &OI, bool *IsSwapped,
+ int64_t *ImmDelta) const {
+ switch (OI.getOpcode()) {
+ case X86::CMP64rr:
+ case X86::CMP32rr:
+ case X86::CMP16rr:
+ case X86::CMP8rr:
+ case X86::SUB64rr:
+ case X86::SUB32rr:
+ case X86::SUB16rr:
+ case X86::SUB8rr: {
+ Register OISrcReg;
+ Register OISrcReg2;
+ int64_t OIMask;
+ int64_t OIValue;
+ if (!analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) ||
+ OIMask != ImmMask || OIValue != ImmValue)
+ return false;
+ if (SrcReg == OISrcReg && SrcReg2 == OISrcReg2) {
+ *IsSwapped = false;
+ return true;
+ }
+ if (SrcReg == OISrcReg2 && SrcReg2 == OISrcReg) {
+ *IsSwapped = true;
+ return true;
+ }
+ return false;
+ }
+ case X86::CMP64ri32:
+ case X86::CMP64ri8:
+ case X86::CMP32ri:
+ case X86::CMP32ri8:
+ case X86::CMP16ri:
+ case X86::CMP16ri8:
+ case X86::CMP8ri:
+ case X86::SUB64ri32:
+ case X86::SUB64ri8:
+ case X86::SUB32ri:
+ case X86::SUB32ri8:
+ case X86::SUB16ri:
+ case X86::SUB16ri8:
+ case X86::SUB8ri:
+ case X86::TEST64rr:
+ case X86::TEST32rr:
+ case X86::TEST16rr:
+ case X86::TEST8rr: {
+ if (ImmMask != 0) {
+ Register OISrcReg;
+ Register OISrcReg2;
+ int64_t OIMask;
+ int64_t OIValue;
+ if (analyzeCompare(OI, OISrcReg, OISrcReg2, OIMask, OIValue) &&
+ SrcReg == OISrcReg && ImmMask == OIMask) {
+ if (OIValue == ImmValue) {
+ *ImmDelta = 0;
+ return true;
+ } else if (static_cast<uint64_t>(ImmValue) ==
+ static_cast<uint64_t>(OIValue) - 1) {
+ *ImmDelta = -1;
+ return true;
+ } else if (static_cast<uint64_t>(ImmValue) ==
+ static_cast<uint64_t>(OIValue) + 1) {
+ *ImmDelta = 1;
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+ return FlagI.isIdenticalTo(OI);
+ }
+ default:
+ return false;
+ }
}
/// Check whether the definition can be converted
@@ -4189,8 +4332,8 @@ static X86::CondCode isUseDefConvertible(const MachineInstr &MI) {
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask,
- int CmpValue,
+ Register SrcReg2, int64_t CmpMask,
+ int64_t CmpValue,
const MachineRegisterInfo *MRI) const {
// Check whether we can replace SUB with CMP.
switch (CmpInstr.getOpcode()) {
@@ -4243,114 +4386,117 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
}
}
- // Get the unique definition of SrcReg.
- MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
- if (!MI) return false;
-
- // CmpInstr is the first instruction of the BB.
- MachineBasicBlock::iterator I = CmpInstr, Def = MI;
+ // The following code tries to remove the comparison by re-using EFLAGS
+ // from earlier instructions.
- // If we are comparing against zero, check whether we can use MI to update
- // EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
bool IsCmpZero = (CmpMask != 0 && CmpValue == 0);
- if (IsCmpZero && MI->getParent() != CmpInstr.getParent())
+
+ // Transformation currently requires SSA values.
+ if (SrcReg2.isPhysical())
return false;
+ MachineInstr *SrcRegDef = MRI->getVRegDef(SrcReg);
+ assert(SrcRegDef && "Must have a definition (SSA)");
- // If we have a use of the source register between the def and our compare
- // instruction we can eliminate the compare iff the use sets EFLAGS in the
- // right way.
- bool ShouldUpdateCC = false;
+ MachineInstr *MI = nullptr;
+ MachineInstr *Sub = nullptr;
+ MachineInstr *Movr0Inst = nullptr;
bool NoSignFlag = false;
bool ClearsOverflowFlag = false;
+ bool ShouldUpdateCC = false;
+ bool IsSwapped = false;
X86::CondCode NewCC = X86::COND_INVALID;
- if (IsCmpZero && !isDefConvertible(*MI, NoSignFlag, ClearsOverflowFlag)) {
- // Scan forward from the use until we hit the use we're looking for or the
- // compare instruction.
- for (MachineBasicBlock::iterator J = MI;; ++J) {
- // Do we have a convertible instruction?
- NewCC = isUseDefConvertible(*J);
- if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
- J->getOperand(1).getReg() == SrcReg) {
- assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
- ShouldUpdateCC = true; // Update CC later on.
- // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
- // with the new def.
- Def = J;
- MI = &*Def;
- break;
- }
+ int64_t ImmDelta = 0;
- if (J == I)
+ // Search backward from CmpInstr for the next instruction defining EFLAGS.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ MachineBasicBlock &CmpMBB = *CmpInstr.getParent();
+ MachineBasicBlock::reverse_iterator From =
+ std::next(MachineBasicBlock::reverse_iterator(CmpInstr));
+ for (MachineBasicBlock *MBB = &CmpMBB;;) {
+ for (MachineInstr &Inst : make_range(From, MBB->rend())) {
+ // Try to use EFLAGS from the instruction defining %SrcReg. Example:
+ // %eax = addl ...
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (&Inst == SrcRegDef) {
+ if (IsCmpZero &&
+ isDefConvertible(Inst, NoSignFlag, ClearsOverflowFlag)) {
+ MI = &Inst;
+ break;
+ }
+ // Cannot find other candidates before definition of SrcReg.
return false;
- }
- }
+ }
- // We are searching for an earlier instruction that can make CmpInstr
- // redundant and that instruction will be saved in Sub.
- MachineInstr *Sub = nullptr;
- const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (Inst.modifiesRegister(X86::EFLAGS, TRI)) {
+ // Try to use EFLAGS produced by an instruction reading %SrcReg.
+ // Example:
+ // %eax = ...
+ // ...
+ // popcntl %eax
+ // ... // EFLAGS not changed
+ // testl %eax, %eax // <-- can be removed
+ if (IsCmpZero) {
+ NewCC = isUseDefConvertible(Inst);
+ if (NewCC != X86::COND_INVALID && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(1).getReg() == SrcReg) {
+ ShouldUpdateCC = true;
+ MI = &Inst;
+ break;
+ }
+ }
- // We iterate backward, starting from the instruction before CmpInstr and
- // stop when reaching the definition of a source register or done with the BB.
- // RI points to the instruction before CmpInstr.
- // If the definition is in this basic block, RE points to the definition;
- // otherwise, RE is the rend of the basic block.
- MachineBasicBlock::reverse_iterator
- RI = ++I.getReverse(),
- RE = CmpInstr.getParent() == MI->getParent()
- ? Def.getReverse() /* points to MI */
- : CmpInstr.getParent()->rend();
- MachineInstr *Movr0Inst = nullptr;
- for (; RI != RE; ++RI) {
- MachineInstr &Instr = *RI;
- // Check whether CmpInstr can be made redundant by the current instruction.
- if (!IsCmpZero && isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask,
- CmpValue, Instr)) {
- Sub = &Instr;
- break;
- }
+ // Try to use EFLAGS from an instruction with similar flag results.
+ // Example:
+ // sub x, y or cmp x, y
+ // ... // EFLAGS not changed
+ // cmp x, y // <-- can be removed
+ if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpMask, CmpValue,
+ Inst, &IsSwapped, &ImmDelta)) {
+ Sub = &Inst;
+ break;
+ }
- if (Instr.modifiesRegister(X86::EFLAGS, TRI) ||
- Instr.readsRegister(X86::EFLAGS, TRI)) {
- // This instruction modifies or uses EFLAGS.
+ // MOV32r0 is implemented with xor which clobbers condition code. It is
+ // safe to move up, if the definition to EFLAGS is dead and earlier
+ // instructions do not read or write EFLAGS.
+ if (!Movr0Inst && Inst.getOpcode() == X86::MOV32r0 &&
+ Inst.registerDefIsDead(X86::EFLAGS, TRI)) {
+ Movr0Inst = &Inst;
+ continue;
+ }
- // MOV32r0 etc. are implemented with xor which clobbers condition code.
- // They are safe to move up, if the definition to EFLAGS is dead and
- // earlier instructions do not read or write EFLAGS.
- if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
- Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
- Movr0Inst = &Instr;
- continue;
+ // Cannot do anything for any other EFLAG changes.
+ return false;
}
-
- // We can't remove CmpInstr.
- return false;
}
- }
- // Return false if no candidates exist.
- if (!IsCmpZero && !Sub)
- return false;
+ if (MI || Sub)
+ break;
- bool IsSwapped =
- (SrcReg2 != 0 && Sub && Sub->getOperand(1).getReg() == SrcReg2 &&
- Sub->getOperand(2).getReg() == SrcReg);
+ // Reached begin of basic block. Continue in predecessor if there is
+ // exactly one.
+ if (MBB->pred_size() != 1)
+ return false;
+ MBB = *MBB->pred_begin();
+ From = MBB->rbegin();
+ }
// Scan forward from the instruction after CmpInstr for uses of EFLAGS.
// It is safe to remove CmpInstr if EFLAGS is redefined or killed.
// If we are done with the basic block, we need to check whether EFLAGS is
// live-out.
- bool IsSafe = false;
+ bool FlagsMayLiveOut = true;
SmallVector<std::pair<MachineInstr*, X86::CondCode>, 4> OpsToUpdate;
- MachineBasicBlock::iterator E = CmpInstr.getParent()->end();
- for (++I; I != E; ++I) {
- const MachineInstr &Instr = *I;
+ MachineBasicBlock::iterator AfterCmpInstr =
+ std::next(MachineBasicBlock::iterator(CmpInstr));
+ for (MachineInstr &Instr : make_range(AfterCmpInstr, CmpMBB.end())) {
bool ModifyEFLAGS = Instr.modifiesRegister(X86::EFLAGS, TRI);
bool UseEFLAGS = Instr.readsRegister(X86::EFLAGS, TRI);
// We should check the usage if this instruction uses and updates EFLAGS.
if (!UseEFLAGS && ModifyEFLAGS) {
// It is safe to remove CmpInstr if EFLAGS is updated again.
- IsSafe = true;
+ FlagsMayLiveOut = false;
break;
}
if (!UseEFLAGS && !ModifyEFLAGS)
@@ -4358,7 +4504,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// EFLAGS is used by this instruction.
X86::CondCode OldCC = X86::COND_INVALID;
- if (IsCmpZero || IsSwapped) {
+ if (MI || IsSwapped || ImmDelta != 0) {
// We decode the condition code from opcode.
if (Instr.isBranch())
OldCC = X86::getCondFromBranch(Instr);
@@ -4370,7 +4516,7 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
if (OldCC == X86::COND_INVALID) return false;
}
X86::CondCode ReplacementCC = X86::COND_INVALID;
- if (IsCmpZero) {
+ if (MI) {
switch (OldCC) {
default: break;
case X86::COND_A: case X86::COND_AE:
@@ -4411,43 +4557,97 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
// We swap the condition code and synthesize the new opcode.
ReplacementCC = getSwappedCondition(OldCC);
if (ReplacementCC == X86::COND_INVALID) return false;
+ ShouldUpdateCC = true;
+ } else if (ImmDelta != 0) {
+ unsigned BitWidth = TRI->getRegSizeInBits(*MRI->getRegClass(SrcReg));
+ // Shift amount for min/max constants to adjust for 8/16/32 instruction
+ // sizes.
+ switch (OldCC) {
+ case X86::COND_L: // x <s (C + 1) --> x <=s C
+ if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_LE;
+ break;
+ case X86::COND_B: // x <u (C + 1) --> x <=u C
+ if (ImmDelta != 1 || CmpValue == 0)
+ return false;
+ ReplacementCC = X86::COND_BE;
+ break;
+ case X86::COND_GE: // x >=s (C + 1) --> x >s C
+ if (ImmDelta != 1 || APInt::getSignedMinValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_G;
+ break;
+ case X86::COND_AE: // x >=u (C + 1) --> x >u C
+ if (ImmDelta != 1 || CmpValue == 0)
+ return false;
+ ReplacementCC = X86::COND_A;
+ break;
+ case X86::COND_G: // x >s (C - 1) --> x >=s C
+ if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_GE;
+ break;
+ case X86::COND_A: // x >u (C - 1) --> x >=u C
+ if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_AE;
+ break;
+ case X86::COND_LE: // x <=s (C - 1) --> x <s C
+ if (ImmDelta != -1 || APInt::getSignedMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_L;
+ break;
+ case X86::COND_BE: // x <=u (C - 1) --> x <u C
+ if (ImmDelta != -1 || APInt::getMaxValue(BitWidth) == CmpValue)
+ return false;
+ ReplacementCC = X86::COND_B;
+ break;
+ default:
+ return false;
+ }
+ ShouldUpdateCC = true;
}
- if ((ShouldUpdateCC || IsSwapped) && ReplacementCC != OldCC) {
+ if (ShouldUpdateCC && ReplacementCC != OldCC) {
// Push the MachineInstr to OpsToUpdate.
// If it is safe to remove CmpInstr, the condition code of these
// instructions will be modified.
- OpsToUpdate.push_back(std::make_pair(&*I, ReplacementCC));
+ OpsToUpdate.push_back(std::make_pair(&Instr, ReplacementCC));
}
if (ModifyEFLAGS || Instr.killsRegister(X86::EFLAGS, TRI)) {
// It is safe to remove CmpInstr if EFLAGS is updated again or killed.
- IsSafe = true;
+ FlagsMayLiveOut = false;
break;
}
}
- // If EFLAGS is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if ((IsCmpZero || IsSwapped) && !IsSafe) {
- MachineBasicBlock *MBB = CmpInstr.getParent();
- for (MachineBasicBlock *Successor : MBB->successors())
+ // If we have to update users but EFLAGS is live-out abort, since we cannot
+ // easily find all of the users.
+ if (ShouldUpdateCC && FlagsMayLiveOut) {
+ for (MachineBasicBlock *Successor : CmpMBB.successors())
if (Successor->isLiveIn(X86::EFLAGS))
return false;
}
// The instruction to be updated is either Sub or MI.
- Sub = IsCmpZero ? MI : Sub;
+ assert((MI == nullptr || Sub == nullptr) && "Should not have Sub and MI set");
+ Sub = MI != nullptr ? MI : Sub;
+ MachineBasicBlock *SubBB = Sub->getParent();
// Move Movr0Inst to the appropriate place before Sub.
if (Movr0Inst) {
+ // Only move within the same block so we don't accidentally move to a
+ // block with higher execution frequency.
+ if (&CmpMBB != SubBB)
+ return false;
// Look backwards until we find a def that doesn't use the current EFLAGS.
- Def = Sub;
- MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(),
+ MachineBasicBlock::reverse_iterator InsertI = Sub,
InsertE = Sub->getParent()->rend();
for (; InsertI != InsertE; ++InsertI) {
MachineInstr *Instr = &*InsertI;
if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
Instr->modifiesRegister(X86::EFLAGS, TRI)) {
- Sub->getParent()->remove(Movr0Inst);
+ Movr0Inst->getParent()->remove(Movr0Inst);
Instr->getParent()->insert(MachineBasicBlock::iterator(Instr),
Movr0Inst);
break;
@@ -4469,6 +4669,13 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
Op.first->getOperand(Op.first->getDesc().getNumOperands() - 1)
.setImm(Op.second);
}
+ // Add EFLAGS to block live-ins between CmpBB and block of flags producer.
+ for (MachineBasicBlock *MBB = &CmpMBB; MBB != SubBB;
+ MBB = *MBB->pred_begin()) {
+ assert(MBB->pred_size() == 1 && "Expected exactly one predecessor");
+ if (!MBB->isLiveIn(X86::EFLAGS))
+ MBB->addLiveIn(X86::EFLAGS);
+ }
return true;
}
@@ -4755,6 +4962,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
case X86::AVX512_128_SET0:
+ case X86::AVX512_FsFLD0SH:
case X86::AVX512_FsFLD0SS:
case X86::AVX512_FsFLD0SD:
case X86::AVX512_FsFLD0F128: {
@@ -5158,6 +5366,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VCVTUSI642SDZrr_Int:
case X86::VCVTUSI642SDZrrb_Int:
case X86::VCVTUSI642SDZrm_Int:
+ case X86::VCVTSI2SHZrr:
+ case X86::VCVTSI2SHZrm:
+ case X86::VCVTSI2SHZrr_Int:
+ case X86::VCVTSI2SHZrrb_Int:
+ case X86::VCVTSI2SHZrm_Int:
+ case X86::VCVTSI642SHZrr:
+ case X86::VCVTSI642SHZrm:
+ case X86::VCVTSI642SHZrr_Int:
+ case X86::VCVTSI642SHZrrb_Int:
+ case X86::VCVTSI642SHZrm_Int:
+ case X86::VCVTUSI2SHZrr:
+ case X86::VCVTUSI2SHZrm:
+ case X86::VCVTUSI2SHZrr_Int:
+ case X86::VCVTUSI2SHZrrb_Int:
+ case X86::VCVTUSI2SHZrm_Int:
+ case X86::VCVTUSI642SHZrr:
+ case X86::VCVTUSI642SHZrm:
+ case X86::VCVTUSI642SHZrr_Int:
+ case X86::VCVTUSI642SHZrrb_Int:
+ case X86::VCVTUSI642SHZrm_Int:
// Load folding won't effect the undef register update since the input is
// a GPR.
return OpNum == 1 && !ForLoadFold;
@@ -5230,6 +5458,29 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VRCP14SDZrm:
case X86::VRCP14SSZrr:
case X86::VRCP14SSZrm:
+ case X86::VRCPSHZrr:
+ case X86::VRCPSHZrm:
+ case X86::VRSQRTSHZrr:
+ case X86::VRSQRTSHZrm:
+ case X86::VREDUCESHZrmi:
+ case X86::VREDUCESHZrri:
+ case X86::VREDUCESHZrrib:
+ case X86::VGETEXPSHZr:
+ case X86::VGETEXPSHZrb:
+ case X86::VGETEXPSHZm:
+ case X86::VGETMANTSHZrri:
+ case X86::VGETMANTSHZrrib:
+ case X86::VGETMANTSHZrmi:
+ case X86::VRNDSCALESHZr:
+ case X86::VRNDSCALESHZr_Int:
+ case X86::VRNDSCALESHZrb_Int:
+ case X86::VRNDSCALESHZm:
+ case X86::VRNDSCALESHZm_Int:
+ case X86::VSQRTSHZr:
+ case X86::VSQRTSHZr_Int:
+ case X86::VSQRTSHZrb_Int:
+ case X86::VSQRTSHZm:
+ case X86::VSQRTSHZm_Int:
case X86::VRCP28SDZr:
case X86::VRCP28SDZrb:
case X86::VRCP28SDZm:
@@ -5259,6 +5510,26 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum,
case X86::VSQRTSDZrb_Int:
case X86::VSQRTSDZm:
case X86::VSQRTSDZm_Int:
+ case X86::VCVTSD2SHZrr:
+ case X86::VCVTSD2SHZrr_Int:
+ case X86::VCVTSD2SHZrrb_Int:
+ case X86::VCVTSD2SHZrm:
+ case X86::VCVTSD2SHZrm_Int:
+ case X86::VCVTSS2SHZrr:
+ case X86::VCVTSS2SHZrr_Int:
+ case X86::VCVTSS2SHZrrb_Int:
+ case X86::VCVTSS2SHZrm:
+ case X86::VCVTSS2SHZrm_Int:
+ case X86::VCVTSH2SDZrr:
+ case X86::VCVTSH2SDZrr_Int:
+ case X86::VCVTSH2SDZrrb_Int:
+ case X86::VCVTSH2SDZrm:
+ case X86::VCVTSH2SDZrm_Int:
+ case X86::VCVTSH2SSZrr:
+ case X86::VCVTSH2SSZrr_Int:
+ case X86::VCVTSH2SSZrrb_Int:
+ case X86::VCVTSH2SSZrm:
+ case X86::VCVTSH2SSZrm_Int:
return OpNum == 1;
case X86::VMOVSSZrrk:
case X86::VMOVSDZrrk:
@@ -6036,6 +6307,49 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
}
}
+ if ((Opc == X86::VMOVSHZrm || Opc == X86::VMOVSHZrm_alt) && RegSize > 16) {
+ // These instructions only load 16 bits, we can't fold them if the
+ // destination register is wider than 16 bits (2 bytes), and its user
+ // instruction isn't scalar (SH).
+ switch (UserOpc) {
+ case X86::VADDSHZrr_Int:
+ case X86::VCMPSHZrr_Int:
+ case X86::VDIVSHZrr_Int:
+ case X86::VMAXSHZrr_Int:
+ case X86::VMINSHZrr_Int:
+ case X86::VMULSHZrr_Int:
+ case X86::VSUBSHZrr_Int:
+ case X86::VADDSHZrr_Intk: case X86::VADDSHZrr_Intkz:
+ case X86::VCMPSHZrr_Intk:
+ case X86::VDIVSHZrr_Intk: case X86::VDIVSHZrr_Intkz:
+ case X86::VMAXSHZrr_Intk: case X86::VMAXSHZrr_Intkz:
+ case X86::VMINSHZrr_Intk: case X86::VMINSHZrr_Intkz:
+ case X86::VMULSHZrr_Intk: case X86::VMULSHZrr_Intkz:
+ case X86::VSUBSHZrr_Intk: case X86::VSUBSHZrr_Intkz:
+ case X86::VFMADD132SHZr_Int: case X86::VFNMADD132SHZr_Int:
+ case X86::VFMADD213SHZr_Int: case X86::VFNMADD213SHZr_Int:
+ case X86::VFMADD231SHZr_Int: case X86::VFNMADD231SHZr_Int:
+ case X86::VFMSUB132SHZr_Int: case X86::VFNMSUB132SHZr_Int:
+ case X86::VFMSUB213SHZr_Int: case X86::VFNMSUB213SHZr_Int:
+ case X86::VFMSUB231SHZr_Int: case X86::VFNMSUB231SHZr_Int:
+ case X86::VFMADD132SHZr_Intk: case X86::VFNMADD132SHZr_Intk:
+ case X86::VFMADD213SHZr_Intk: case X86::VFNMADD213SHZr_Intk:
+ case X86::VFMADD231SHZr_Intk: case X86::VFNMADD231SHZr_Intk:
+ case X86::VFMSUB132SHZr_Intk: case X86::VFNMSUB132SHZr_Intk:
+ case X86::VFMSUB213SHZr_Intk: case X86::VFNMSUB213SHZr_Intk:
+ case X86::VFMSUB231SHZr_Intk: case X86::VFNMSUB231SHZr_Intk:
+ case X86::VFMADD132SHZr_Intkz: case X86::VFNMADD132SHZr_Intkz:
+ case X86::VFMADD213SHZr_Intkz: case X86::VFNMADD213SHZr_Intkz:
+ case X86::VFMADD231SHZr_Intkz: case X86::VFNMADD231SHZr_Intkz:
+ case X86::VFMSUB132SHZr_Intkz: case X86::VFNMSUB132SHZr_Intkz:
+ case X86::VFMSUB213SHZr_Intkz: case X86::VFNMSUB213SHZr_Intkz:
+ case X86::VFMSUB231SHZr_Intkz: case X86::VFNMSUB231SHZr_Intkz:
+ return false;
+ default:
+ return true;
+ }
+ }
+
return false;
}
@@ -6101,6 +6415,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_FsFLD0SS:
Alignment = Align(4);
break;
+ case X86::AVX512_FsFLD0SH:
+ Alignment = Align(2);
+ break;
default:
return nullptr;
}
@@ -6136,6 +6453,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
case X86::AVX512_256_SET0:
case X86::AVX512_512_SET0:
case X86::AVX512_512_SETALLONES:
+ case X86::AVX512_FsFLD0SH:
case X86::FsFLD0SD:
case X86::AVX512_FsFLD0SD:
case X86::FsFLD0SS:
@@ -6174,6 +6492,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
Ty = Type::getDoubleTy(MF.getFunction().getContext());
else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128)
Ty = Type::getFP128Ty(MF.getFunction().getContext());
+ else if (Opc == X86::AVX512_FsFLD0SH)
+ Ty = Type::getHalfTy(MF.getFunction().getContext());
else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
Ty = FixedVectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),
16);
@@ -8384,6 +8704,14 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::VMINCSSrr:
case X86::VMINCSDZrr:
case X86::VMINCSSZrr:
+ case X86::VMAXCPHZ128rr:
+ case X86::VMAXCPHZ256rr:
+ case X86::VMAXCPHZrr:
+ case X86::VMAXCSHZrr:
+ case X86::VMINCPHZ128rr:
+ case X86::VMINCPHZ256rr:
+ case X86::VMINCPHZrr:
+ case X86::VMINCSHZrr:
return true;
case X86::ADDPDrr:
case X86::ADDPSrr:
@@ -8421,6 +8749,14 @@ bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
case X86::VMULSSrr:
case X86::VMULSDZrr:
case X86::VMULSSZrr:
+ case X86::VADDPHZ128rr:
+ case X86::VADDPHZ256rr:
+ case X86::VADDPHZrr:
+ case X86::VADDSHZrr:
+ case X86::VMULPHZ128rr:
+ case X86::VMULPHZ256rr:
+ case X86::VMULPHZrr:
+ case X86::VMULSHZrr:
return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&
Inst.getFlag(MachineInstr::MIFlag::FmNsz);
default:
@@ -8667,6 +9003,7 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_GOT, "x86-got"},
{MO_GOTOFF, "x86-gotoff"},
{MO_GOTPCREL, "x86-gotpcrel"},
+ {MO_GOTPCREL_NORELAX, "x86-gotpcrel-norelax"},
{MO_PLT, "x86-plt"},
{MO_TLSGD, "x86-tlsgd"},
{MO_TLSLD, "x86-tlsld"},
@@ -8966,13 +9303,8 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
- const std::vector<MCCFIInstruction> &CFIInstructions =
- RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
- if (MBBI->isCFIInstruction()) {
- unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
- MCCFIInstruction CFI = CFIInstructions[CFIIndex];
+ if (MBBI->isCFIInstruction())
CFICount++;
- }
MBBI++;
}
@@ -9102,7 +9434,7 @@ void X86InstrInfo::buildOutlinedFrame(MachineBasicBlock &MBB,
// We're a normal call, so our sequence doesn't have a return instruction.
// Add it in.
- MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RETQ));
+ MachineInstr *retq = BuildMI(MF, DebugLoc(), get(X86::RET64));
MBB.insert(MBB.end(), retq);
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index c663bb32af37..33ce55bbdb2b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -37,9 +37,6 @@ enum AsmComments {
/// the instruction operands should be swaped to match the condition code.
std::pair<CondCode, bool> getX86ConditionCode(CmpInst::Predicate Predicate);
-/// Return a setcc opcode based on whether it has a memory operand.
-unsigned getSETOpc(bool HasMemoryOperand = false);
-
/// Return a cmov opcode for the given register size in bytes, and operand type.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false);
@@ -68,6 +65,8 @@ unsigned getSwappedVPCOMImm(unsigned Imm);
/// Get the VCMP immediate if the opcodes are swapped.
unsigned getSwappedVCMPImm(unsigned Imm);
+/// Check if the instruction is X87 instruction.
+bool isX87Instruction(MachineInstr &MI);
} // namespace X86
/// isGlobalStubReference - Return true if the specified TargetFlag operand is
@@ -76,6 +75,7 @@ inline static bool isGlobalStubReference(unsigned char TargetFlag) {
switch (TargetFlag) {
case X86II::MO_DLLIMPORT: // dllimport stub.
case X86II::MO_GOTPCREL: // rip-relative GOT reference.
+ case X86II::MO_GOTPCREL_NORELAX: // rip-relative GOT reference.
case X86II::MO_GOT: // normal GOT reference.
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Normal $non_lazy_ptr ref.
case X86II::MO_DARWIN_NONLAZY: // Normal $non_lazy_ptr ref.
@@ -250,7 +250,7 @@ public:
bool classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
unsigned LEAOpcode, bool AllowSP, Register &NewSrc,
bool &isKill, MachineOperand &ImplicitOp,
- LiveVariables *LV) const;
+ LiveVariables *LV, LiveIntervals *LIS) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@@ -262,9 +262,8 @@ public:
/// This method returns a null pointer if the transformation cannot be
/// performed, otherwise it returns the new instruction.
///
- MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
- MachineInstr &MI,
- LiveVariables *LV) const override;
+ MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
+ LiveIntervals *LIS) const override;
/// Returns true iff the routine could find two commutable operands in the
/// given machine instruction.
@@ -510,14 +509,14 @@ public:
/// compares against in CmpValue. Return true if the comparison instruction
/// can be analyzed.
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
- Register &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
+ Register &SrcReg2, int64_t &CmpMask,
+ int64_t &CmpValue) const override;
/// optimizeCompareInstr - Check if there exists an earlier instruction that
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
- Register SrcReg2, int CmpMask, int CmpValue,
+ Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
const MachineRegisterInfo *MRI) const override;
/// optimizeLoadInstr - Try to remove the load by folding it to a register
@@ -591,10 +590,9 @@ private:
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
/// super-register and then truncating back down to a 8/16-bit sub-register.
- MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc,
- MachineFunction::iterator &MFI,
- MachineInstr &MI,
+ MachineInstr *convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr &MI,
LiveVariables *LV,
+ LiveIntervals *LIS,
bool Is8BitOp) const;
/// Handles memory folding for special case instructions, for instance those
@@ -631,6 +629,22 @@ private:
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2,
bool IsIntrinsic = false) const;
+
+ /// Returns true when instruction \p FlagI produces the same flags as \p OI.
+ /// The caller should pass in the results of calling analyzeCompare on \p OI:
+ /// \p SrcReg, \p SrcReg2, \p ImmMask, \p ImmValue.
+ /// If the flags match \p OI as if it had the input operands swapped then the
+ /// function succeeds and sets \p IsSwapped to true.
+ ///
+ /// Examples of OI, FlagI pairs returning true:
+ /// CMP %1, 42 and CMP %1, 42
+ /// CMP %1, %2 and %3 = SUB %1, %2
+ /// TEST %1, %1 and %2 = SUB %1, 0
+ /// CMP %1, %2 and %3 = SUB %2, %1 ; IsSwapped=true
+ bool isRedundantFlagInstr(const MachineInstr &FlagI, Register SrcReg,
+ Register SrcReg2, int64_t ImmMask, int64_t ImmValue,
+ const MachineInstr &OI, bool *IsSwapped,
+ int64_t *ImmDelta) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 34afedb5bad2..fee9939b8dfc 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -91,8 +91,7 @@ def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
- SDTCisVT<1, iPTR>,
- SDTCisVT<2, iPTR>]>;
+ SDTCisPtrTy<1>]>;
def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
@@ -112,7 +111,7 @@ def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_X86WIN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_X86DYN_ALLOCA : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
@@ -184,7 +183,7 @@ def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
SDT_X86VASTART_SAVE_XMM_REGS,
- [SDNPHasChain, SDNPVariadic]>;
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>;
def X86vaarg64 :
SDNode<"X86ISD::VAARG_64", SDT_X86VAARG,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
@@ -294,7 +293,7 @@ def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
-def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA,
+def X86DynAlloca : SDNode<"X86ISD::DYN_ALLOCA", SDT_X86DYN_ALLOCA,
[SDNPHasChain, SDNPOutGlue]>;
def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
@@ -421,6 +420,7 @@ def i64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
def i128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand>;
def i256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand>;
def i512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand>;
+def f16mem : X86MemOperand<"printwordmem", X86Mem16AsmOperand>;
def f32mem : X86MemOperand<"printdwordmem", X86Mem32AsmOperand>;
def f64mem : X86MemOperand<"printqwordmem", X86Mem64AsmOperand>;
def f80mem : X86MemOperand<"printtbytemem", X86Mem80AsmOperand>;
@@ -919,6 +919,7 @@ def PKU : Predicate<"Subtarget->hasPKU()">;
def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">;
+def HasFP16 : Predicate<"Subtarget->hasFP16()">;
def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;
@@ -993,6 +994,7 @@ def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
+def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
AssemblerPredicate<(all_of (not Mode64Bit)), "Not 64-bit mode">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">,
@@ -1193,6 +1195,7 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
}]>;
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf16 : PatFrag<(ops node:$ptr), (f16 (load node:$ptr))>;
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
@@ -3155,9 +3158,6 @@ include "X86InstrAVX512.td"
include "X86InstrMMX.td"
include "X86Instr3DNow.td"
-// MPX instructions
-include "X86InstrMPX.td"
-
include "X86InstrVMX.td"
include "X86InstrSVM.td"
include "X86InstrSNP.td"
diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td
index b91e563a15f3..a716aab4260b 100644
--- a/llvm/lib/Target/X86/X86InstrKL.td
+++ b/llvm/lib/Target/X86/X86InstrKL.td
@@ -1,10 +1,9 @@
//===---------------------------*-tablegen-*-------------------------------===//
//===------------- X86InstrKL.td - KL Instruction Set Extension -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Target/X86/X86InstrMPX.td b/llvm/lib/Target/X86/X86InstrMPX.td
deleted file mode 100644
index 44ba071947c2..000000000000
--- a/llvm/lib/Target/X86/X86InstrMPX.td
+++ /dev/null
@@ -1,77 +0,0 @@
-//===-- X86InstrMPX.td - MPX Instruction Set ---------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the X86 MPX instruction set, defining the
-// instructions, and properties of the instructions which are needed for code
-// generation, machine code emission, and analysis.
-//
-//===----------------------------------------------------------------------===//
-
-// FIXME: Investigate a better scheduler class if MPX is ever used inside LLVM.
-let SchedRW = [WriteSystem] in {
-
-multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
- def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
- OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
- Requires<[Not64BitMode]>;
- def 64rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
- OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
- Requires<[In64BitMode]>;
-}
-
-defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
-
-multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
- def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[Not64BitMode]>;
- def 64rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, anymem:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[In64BitMode]>;
-
- def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[Not64BitMode]>;
- def 64rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
- OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
- Requires<[In64BitMode]>;
-}
-defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS, NotMemoryFoldable;
-defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD, NotMemoryFoldable;
-defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD, NotMemoryFoldable;
-
-def BNDMOVrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- NotMemoryFoldable;
-let mayLoad = 1 in {
-def BNDMOV32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[Not64BitMode]>, NotMemoryFoldable;
-def BNDMOV64rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[In64BitMode]>, NotMemoryFoldable;
-}
-let isCodeGenOnly = 1, ForceDisassemble = 1 in
-def BNDMOVrr_REV : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- NotMemoryFoldable;
-let mayStore = 1 in {
-def BNDMOV32mr : I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[Not64BitMode]>, NotMemoryFoldable;
-def BNDMOV64mr : I<0x1B, MRMDestMem, (outs), (ins i128mem:$dst, BNDR:$src),
- "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
- Requires<[In64BitMode]>, NotMemoryFoldable;
-
-def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins anymem:$dst, BNDR:$src),
- "bndstx\t{$src, $dst|$dst, $src}", []>, PS;
-}
-let mayLoad = 1 in
-def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins anymem:$src),
- "bndldx\t{$src, $dst|$dst, $src}", []>, PS;
-} // SchedRW
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 41fda603d5a9..035f139e6f33 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -40,7 +40,7 @@ let isCodeGenOnly = 1 in {
}
/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
-multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
+multiclass sse12_fp_scalar_int<bits<8> opc,
SDPatternOperator OpNode, RegisterClass RC,
ValueType VT, string asm, Operand memopr,
PatFrags mem_frags, Domain d,
@@ -187,8 +187,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// don't use movss/movsd for copies.
//===----------------------------------------------------------------------===//
-multiclass sse12_move_rr<SDNode OpNode, ValueType vt,
- X86MemOperand x86memop, string base_opc,
+multiclass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc,
string asm_opr, Domain d, string Name> {
let isCommutable = 1 in
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
@@ -210,7 +209,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
Domain d, string Name, Predicate pred> {
// AVX
let Predicates = [UseAVX, OptForSize] in
- defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
+ defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
"V"#Name>,
VEX_4V, VEX_LIG, VEX_WIG;
@@ -222,7 +221,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
let Predicates = [pred, NoSSE41_Or_OptForSize] in
- defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr,
+ defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
"\t{$src2, $dst|$dst, $src2}", d, Name>;
}
@@ -1747,20 +1746,20 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
// XMM only
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (loadv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround VR256:$src))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (loadv4f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
} // Predicates = [HasAVX, NoVLX]
@@ -1771,11 +1770,11 @@ def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (v2f64 VR128:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
Sched<[WriteCvtPD2PS]>, SIMD_EXC;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (X86any_vfpround (memopv2f64 addr:$src)))]>,
+ [(set VR128:$dst, (v4f32 (X86any_vfpround (memopv2f64 addr:$src))))]>,
Sched<[WriteCvtPD2PS.Folded]>, SIMD_EXC;
//===----------------------------------------------------------------------===//
@@ -2266,7 +2265,7 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
/// There are no patterns here because isel prefers integer versions for SSE2
/// and later. There are SSE1 v4f32 patterns later.
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
- SDNode OpNode, X86SchedWriteWidths sched> {
+ X86SchedWriteWidths sched> {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
@@ -2296,11 +2295,11 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
}
}
-defm AND : sse12_fp_packed_logical<0x54, "and", and, SchedWriteFLogic>;
-defm OR : sse12_fp_packed_logical<0x56, "or", or, SchedWriteFLogic>;
-defm XOR : sse12_fp_packed_logical<0x57, "xor", xor, SchedWriteFLogic>;
+defm AND : sse12_fp_packed_logical<0x54, "and", SchedWriteFLogic>;
+defm OR : sse12_fp_packed_logical<0x56, "or", SchedWriteFLogic>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", SchedWriteFLogic>;
let isCommutable = 0 in
- defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp, SchedWriteFLogic>;
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", SchedWriteFLogic>;
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v32i8 (and VR256:$src1, VR256:$src2)),
@@ -2643,18 +2642,18 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
SDPatternOperator OpNode,
X86SchedWriteSizes sched> {
let Uses = [MXCSR], mayRaiseFPException = 1 in {
- defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
+ defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
- defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
+ defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
let Constraints = "$src1 = $dst" in {
- defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32,
+ defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
SSEPackedSingle, sched.PS.Scl>, XS;
- defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64,
+ defm SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
SSEPackedDouble, sched.PD.Scl>, XD;
}
@@ -2790,8 +2789,8 @@ defm : scalar_math_patterns<any_fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf
/// For the non-AVX defs, we need $src1 to be tied to $dst because
/// the HW instructions are 2 operand / destructive.
multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
- ValueType ScalarVT, X86MemOperand x86memop,
- Operand intmemop, SDPatternOperator OpNode, Domain d,
+ X86MemOperand x86memop, Operand intmemop,
+ SDPatternOperator OpNode, Domain d,
X86FoldableSchedWrite sched, Predicate target> {
let isCodeGenOnly = 1, hasSideEffects = 0 in {
def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1),
@@ -2818,9 +2817,8 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
}
-multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
- PatFrags mem_frags, Intrinsic Intr,
- Predicate target, string Suffix> {
+multiclass sse_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
+ Intrinsic Intr, Predicate target> {
let Predicates = [target] in {
// These are unary operations, but they are modeled as having 2 source operands
// because the high elements of the destination are unchanged in SSE.
@@ -2841,7 +2839,7 @@ multiclass sse_fp_unop_s_intr<RegisterClass RC, ValueType vt,
}
}
-multiclass avx_fp_unop_s_intr<RegisterClass RC, ValueType vt, PatFrags mem_frags,
+multiclass avx_fp_unop_s_intr<ValueType vt, PatFrags mem_frags,
Intrinsic Intr, Predicate target> {
let Predicates = [target] in {
def : Pat<(Intr VR128:$src),
@@ -2972,12 +2970,11 @@ let Predicates = [HasAVX, NoVLX] in {
Sched<[sched.XMM.Folded]>;
}
-multiclass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
+multiclass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> {
+ defm SS : sse_fp_unop_s_intr<v4f32, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
- UseSSE1, "SS">, XS;
- defm V#NAME#SS : avx_fp_unop_s_intr<FR32, v4f32, sse_load_f32,
+ UseSSE1>, XS;
+ defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
AVXTarget>,
XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
@@ -2985,7 +2982,7 @@ multiclass sse1_fp_unop_s_intr<bits<8> opc, string OpcodeStr, SDNode OpNode,
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32, f32mem,
+ defm SS : sse_fp_unop_s<opc, OpcodeStr#ss, FR32, f32mem,
ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
@@ -2994,7 +2991,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNod
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
X86SchedWriteWidths sched, Predicate AVXTarget> {
- defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64, f64mem,
+ defm SD : sse_fp_unop_s<opc, OpcodeStr#sd, FR64, f64mem,
sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
@@ -3010,10 +3007,10 @@ defm SQRT : sse1_fp_unop_s<0x51, "sqrt", any_fsqrt, SchedWriteFSqrt, UseAVX>,
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
- sse1_fp_unop_s_intr<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, HasAVX>,
+ sse1_fp_unop_s_intr<"rsqrt", HasAVX>,
sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SchedWriteFRsqrt, [HasAVX]>;
defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
- sse1_fp_unop_s_intr<0x53, "rcp", X86frcp, SchedWriteFRcp, HasAVX>,
+ sse1_fp_unop_s_intr<"rcp", HasAVX>,
sse1_fp_unop_p<0x53, "rcp", X86frcp, SchedWriteFRcp, [HasAVX]>;
// There is no f64 version of the reciprocal approximation instructions.
@@ -6588,14 +6585,14 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
// of r and m.
class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
RegisterClass RCIn, SDPatternOperator Int> :
- SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
+ CRC32I<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))]>,
Sched<[WriteCRC32]>;
class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
X86MemOperand x86memop, SDPatternOperator Int> :
- SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
+ CRC32I<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>,
Sched<[WriteCRC32.Folded, WriteCRC32.ReadAfterFold]>;
@@ -7049,6 +7046,50 @@ def : Pat<(v32i8 (X86SubVBroadcastld128 addr:$src)),
}
//===----------------------------------------------------------------------===//
+// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
+//
+
+let ExeDomain = SSEPackedSingle in {
+let isCommutable = 1 in
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, u8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
+ VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
+}
+
+// Immediate transform to help with commuting.
+def Perm2XCommuteImm : SDNodeXForm<timm, [{
+ return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
+}]>;
+
+multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
+ def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
+ // Pattern with load in other operand.
+ def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
+ (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
+ (Perm2XCommuteImm timm:$imm))>;
+}
+
+let Predicates = [HasAVX] in {
+ defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
+}
+
+let Predicates = [HasAVX1Only] in {
+ defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
+ defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
+ defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
+ defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
+}
+
+//===----------------------------------------------------------------------===//
// VINSERTF128 - Insert packed floating-point values
//
let hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
@@ -7070,29 +7111,37 @@ let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 immAllOnesV), (VCMPPSYrri (AVX_SET0), (AVX_SET0), 0xf)>;
}
-multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
- PatFrag memop_frag> {
+multiclass vinsert_lowering<string InstrStr, string PermStr,
+ ValueType From, ValueType To,
+ PatFrag frommemop_frag, PatFrag tomemop_frag> {
def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
(iPTR imm)),
(!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
- (From (memop_frag addr:$src2)),
+ (From (frommemop_frag addr:$src2)),
(iPTR imm)),
(!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
(INSERT_get_vinsert128_imm VR256:$ins))>;
+ // Folding "To" vector - convert to perm2x128 and commute inputs.
+ def : Pat<(vinsert128_insert:$ins (To (tomemop_frag addr:$src1)),
+ (From VR128:$src2),
+ (iPTR imm)),
+ (!cast<Instruction>(PermStr#rm)
+ (INSERT_SUBREG (To (IMPLICIT_DEF)), VR128:$src2, sub_xmm),
+ addr:$src1, (INSERT_get_vperm2x128_commutedimm VR256:$ins))>;
}
let Predicates = [HasAVX, NoVLX] in {
- defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>;
- defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4f32, v8f32, loadv4f32, loadv8f32>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2f64, v4f64, loadv2f64, loadv4f64>;
}
let Predicates = [HasAVX1Only] in {
- defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32, loadv4i32>;
- defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv8i16>;
- defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8, loadv16i8>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v2i64, v4i64, loadv2i64, loadv4i64>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v4i32, v8i32, loadv4i32, loadv8i32>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v8i16, v16i16, loadv8i16, loadv16i16>;
+ defm : vinsert_lowering<"VINSERTF128", "VPERM2F128", v16i8, v32i8, loadv16i8, loadv32i8>;
}
//===----------------------------------------------------------------------===//
@@ -7297,50 +7346,6 @@ let ExeDomain = SSEPackedDouble in {
}
//===----------------------------------------------------------------------===//
-// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
-//
-
-let ExeDomain = SSEPackedSingle in {
-let isCommutable = 1 in
-def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
- (ins VR256:$src1, VR256:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256]>;
-def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
- (ins VR256:$src1, f256mem:$src2, u8imm:$src3),
- "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>,
- VEX_4V, VEX_L, Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
-}
-
-// Immediate transform to help with commuting.
-def Perm2XCommuteImm : SDNodeXForm<timm, [{
- return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
-}]>;
-
-multiclass vperm2x128_lowering<string InstrStr, ValueType VT, PatFrag memop_frag> {
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR256:$src2, timm:$imm)>;
- def : Pat<(VT (X86VPerm2x128 VR256:$src1, (memop_frag addr:$src2), (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2, timm:$imm)>;
- // Pattern with load in other operand.
- def : Pat<(VT (X86VPerm2x128 (memop_frag addr:$src2), VR256:$src1, (i8 timm:$imm))),
- (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
- (Perm2XCommuteImm timm:$imm))>;
-}
-
-let Predicates = [HasAVX] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4f64, loadv4f64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8f32, loadv8f32>;
-}
-
-let Predicates = [HasAVX1Only] in {
- defm : vperm2x128_lowering<"VPERM2F128", v4i64, loadv4i64>;
- defm : vperm2x128_lowering<"VPERM2F128", v8i32, loadv8i32>;
- defm : vperm2x128_lowering<"VPERM2F128", v16i16, loadv16i16>;
- defm : vperm2x128_lowering<"VPERM2F128", v32i8, loadv32i8>;
-}
-
-//===----------------------------------------------------------------------===//
// VZERO - Zero YMM registers
// Note: These instruction do not affect the YMM16-YMM31.
//
@@ -7625,10 +7630,18 @@ let Predicates = [HasAVX1Only] in {
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
(v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), sub_xmm),
(v4f32 (VPERMILPSri (v4f32 (COPY_TO_REGCLASS FR32:$src, VR128)), 0)), 1)>;
+ def : Pat<(v8f32 (X86VBroadcast v4f32:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
+ (v4f32 (VPERMILPSri VR128:$src, 0)), sub_xmm),
+ (v4f32 (VPERMILPSri VR128:$src, 0)), 1)>;
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
(v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), sub_xmm),
(v2f64 (VMOVDDUPrr (v2f64 (COPY_TO_REGCLASS FR64:$src, VR128)))), 1)>;
+ def : Pat<(v4f64 (X86VBroadcast v2f64:$src)),
+ (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
+ (v2f64 (VMOVDDUPrr VR128:$src)), sub_xmm),
+ (v2f64 (VMOVDDUPrr VR128:$src)), 1)>;
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
(VPSHUFDri (VMOVDI2PDIrr GR32:$src), 0)>;
@@ -7741,10 +7754,10 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
}
let Predicates = [HasAVX2, NoVLX] in {
- defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64, loadv2i64>;
- defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32, loadv4i32>;
- defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv8i16>;
- defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8, loadv16i8>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v2i64, v4i64, loadv2i64, loadv4i64>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v4i32, v8i32, loadv4i32, loadv8i32>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v8i16, v16i16, loadv8i16, loadv16i16>;
+ defm : vinsert_lowering<"VINSERTI128", "VPERM2I128", v16i8, v32i8, loadv16i8, loadv32i8>;
}
//===----------------------------------------------------------------------===//
@@ -7889,10 +7902,8 @@ let Predicates = [HasAVX2, NoVLX] in {
// VGATHER - GATHER Operations
// FIXME: Improve scheduling of gather instructions.
-multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
- ValueType VTy, RegisterClass RC256,
- X86MemOperand memop128, X86MemOperand memop256,
- ValueType MTx = VTx, ValueType MTy = VTy> {
+multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
+ X86MemOperand memop128, X86MemOperand memop256> {
let mayLoad = 1, hasSideEffects = 0 in {
def rm : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
(ins VR128:$src1, memop128:$src2, VR128:$mask),
@@ -7911,27 +7922,27 @@ let Predicates = [HasAVX2] in {
let mayLoad = 1, hasSideEffects = 0, Constraints
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
in {
- defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", v2i64, v4i64,
- VR256, vx128mem, vx256mem>, VEX_W;
- defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", v2i64, v4i64,
- VR256, vx128mem, vy256mem>, VEX_W;
- defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", v4i32, v8i32,
- VR256, vx128mem, vy256mem>;
- defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", v4i32, v4i32,
- VR128, vx64mem, vy128mem>;
+ defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
+ VR256, vx128mem, vx256mem>, VEX_W;
+ defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
+ VR256, vx128mem, vy256mem>, VEX_W;
+ defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
+ VR256, vx128mem, vy256mem>;
+ defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
+ VR128, vx64mem, vy128mem>;
let ExeDomain = SSEPackedDouble in {
- defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", v2f64, v4f64,
- VR256, vx128mem, vx256mem, v2i64, v4i64>, VEX_W;
- defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", v2f64, v4f64,
- VR256, vx128mem, vy256mem, v2i64, v4i64>, VEX_W;
+ defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
+ VR256, vx128mem, vx256mem>, VEX_W;
+ defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
+ VR256, vx128mem, vy256mem>, VEX_W;
}
let ExeDomain = SSEPackedSingle in {
- defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", v4f32, v8f32,
- VR256, vx128mem, vy256mem, v4i32, v8i32>;
- defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", v4f32, v4f32,
- VR128, vx64mem, vy128mem, v4i32, v4i32>;
+ defm VGATHERDPS : avx2_gather<0x92, "vgatherdps",
+ VR256, vx128mem, vy256mem>;
+ defm VGATHERQPS : avx2_gather<0x93, "vgatherqps",
+ VR128, vx64mem, vy128mem>;
}
}
}
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 48c27051a872..b4dd99d08a62 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -529,16 +529,17 @@ let SchedRW = [WriteSystem] in {
//===----------------------------------------------------------------------===//
// XSAVE instructions
let SchedRW = [WriteSystem] in {
-let Predicates = [HasXSAVE] in {
+// NOTE: No HasXSAVE predicate so that these can be used with _xgetbv/_xsetbv
+// on Windows without needing to enable the xsave feature to be compatible with
+// MSVC.
let Defs = [EDX, EAX], Uses = [ECX] in
- def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, PS;
+def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, PS;
let Uses = [EDX, EAX, ECX] in
- def XSETBV : I<0x01, MRM_D1, (outs), (ins),
- "xsetbv",
- [(int_x86_xsetbv ECX, EDX, EAX)]>, PS;
+def XSETBV : I<0x01, MRM_D1, (outs), (ins),
+ "xsetbv",
+ [(int_x86_xsetbv ECX, EDX, EAX)]>, PS;
-} // HasXSAVE
let Uses = [EDX, EAX] in {
def XSAVE : I<0xAE, MRM4m, (outs), (ins opaquemem:$dst),
@@ -583,7 +584,7 @@ def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaquemem:$dst),
//===----------------------------------------------------------------------===//
// VIA PadLock crypto instructions
let Defs = [RAX, RDI], Uses = [RDX, RDI], SchedRW = [WriteSystem] in
- def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB, REP;
+ def XSTORE : I<0xa7, MRM_C0, (outs), (ins), "xstore", []>, TB;
def : InstAlias<"xstorerng", (XSTORE)>;
diff --git a/llvm/lib/Target/X86/X86InstrVecCompiler.td b/llvm/lib/Target/X86/X86InstrVecCompiler.td
index e98843bd3ae3..2429aa113fb1 100644
--- a/llvm/lib/Target/X86/X86InstrVecCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrVecCompiler.td
@@ -25,6 +25,8 @@ let Predicates = [NoAVX512] in {
let Predicates = [HasAVX512] in {
// A vector extract of the first f32/f64 position is a subregister copy
+ def : Pat<(f16 (extractelt (v8f16 VR128X:$src), (iPTR 0))),
+ (COPY_TO_REGCLASS (v8f16 VR128X:$src), FR16X)>;
def : Pat<(f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
(COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X)>;
def : Pat<(f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
@@ -32,6 +34,8 @@ let Predicates = [HasAVX512] in {
}
let Predicates = [NoVLX] in {
+ def : Pat<(v8f16 (scalar_to_vector FR16X:$src)),
+ (COPY_TO_REGCLASS FR16X:$src, VR128)>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(COPY_TO_REGCLASS FR32:$src, VR128)>;
@@ -41,6 +45,8 @@ let Predicates = [NoVLX] in {
}
let Predicates = [HasVLX] in {
+ def : Pat<(v8f16 (scalar_to_vector FR16X:$src)),
+ (COPY_TO_REGCLASS FR16X:$src, VR128X)>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32X:$src)),
(COPY_TO_REGCLASS FR32X:$src, VR128X)>;
@@ -74,6 +80,7 @@ defm : subvector_subreg_lowering<VR128, v2i64, VR256, v4i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR256, v4f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR256, v16i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR256, v32i8, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8f16, VR256, v16f16, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -85,6 +92,7 @@ defm : subvector_subreg_lowering<VR128, v2i64, VR512, v8i64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v2f64, VR512, v8f64, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v8i16, VR512, v32i16, sub_xmm>;
defm : subvector_subreg_lowering<VR128, v16i8, VR512, v64i8, sub_xmm>;
+defm : subvector_subreg_lowering<VR128, v8f16, VR512, v32f16, sub_xmm>;
// A 128-bit subvector extract from the first 512-bit vector position is a
// subregister copy that needs no instruction. Likewise, a 128-bit subvector
@@ -96,6 +104,7 @@ defm : subvector_subreg_lowering<VR256, v4i64, VR512, v8i64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v4f64, VR512, v8f64, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v16i16, VR512, v32i16, sub_ymm>;
defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
+defm : subvector_subreg_lowering<VR256, v16f16, VR512, v32f16, sub_ymm>;
// If we're inserting into an all zeros vector, just use a plain move which
@@ -103,8 +112,7 @@ defm : subvector_subreg_lowering<VR256, v32i8, VR512, v64i8, sub_ymm>;
// any moves that we can prove are unnecessary.
multiclass subvec_zero_lowering<string MoveStr,
RegisterClass RC, ValueType DstTy,
- ValueType SrcTy, ValueType ZeroTy,
- SubRegIndex SubIdx> {
+ ValueType SrcTy, SubRegIndex SubIdx> {
def : Pat<(DstTy (insert_subvector immAllZerosV,
(SrcTy RC:$src), (iPTR 0))),
(SUBREG_TO_REG (i64 0),
@@ -112,51 +120,57 @@ multiclass subvec_zero_lowering<string MoveStr,
}
let Predicates = [HasAVX, NoVLX] in {
- defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, v8i32, sub_xmm>;
+ defm : subvec_zero_lowering<"APD", VR128, v4f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APS", VR128, v8f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v4i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v8i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v16i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v32i8, v16i8, sub_xmm>;
}
let Predicates = [HasVLX] in {
- defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, v8i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, v8i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, v16i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, v16i32, sub_ymm>;
+ defm : subvec_zero_lowering<"APDZ128", VR128X, v4f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v8f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v4i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDZ128", VR128X, v8f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v16f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v8i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v16i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v32i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA64Z128", VR128X, v64i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDZ256", VR256X, v8f64, v4f64, sub_ymm>;
+ defm : subvec_zero_lowering<"APSZ256", VR256X, v16f32, v8f32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v8i64, v4i64, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v16i32, v8i32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v32i16, v16i16, sub_ymm>;
+ defm : subvec_zero_lowering<"DQA64Z256", VR256X, v64i8, v32i8, sub_ymm>;
}
let Predicates = [HasAVX512, NoVLX] in {
- defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, v16i32, sub_xmm>;
- defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, v16i32, sub_xmm>;
-
- defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, v16i32, sub_ymm>;
- defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, v16i32, sub_ymm>;
+ defm : subvec_zero_lowering<"APD", VR128, v8f64, v2f64, sub_xmm>;
+ defm : subvec_zero_lowering<"APS", VR128, v16f32, v4f32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v8i64, v2i64, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v16i32, v4i32, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v32i16, v8i16, sub_xmm>;
+ defm : subvec_zero_lowering<"DQA", VR128, v64i8, v16i8, sub_xmm>;
+
+ defm : subvec_zero_lowering<"APDY", VR256, v8f64, v4f64, sub_ymm>;
+ defm : subvec_zero_lowering<"APSY", VR256, v16f32, v8f32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v8i64, v4i64, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v16i32, v8i32, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v32i16, v16i16, sub_ymm>;
+ defm : subvec_zero_lowering<"DQAY", VR256, v64i8, v32i8, sub_ymm>;
+}
+
+let Predicates = [HasFP16, HasVLX] in {
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v16f16, v8f16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ128", VR128X, v32f16, v8f16, sub_xmm>;
+ defm : subvec_zero_lowering<"APSZ256", VR256X, v32f16, v16f16, sub_ymm>;
}
class maskzeroupper<ValueType vt, RegisterClass RC> :
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index ff531713037c..8abbaa92c8cf 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -479,7 +479,7 @@ static void X86SelectAddress(const MachineInstr &I,
"unsupported type.");
if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
- if (auto COff = getConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
+ if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
int64_t Imm = *COff;
if (isInt<32>(Imm)) { // Check for displacement overflow.
AM.Disp = static_cast<int32_t>(Imm);
@@ -1065,7 +1065,7 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I,
return false;
Opcode = X86::ADC32rr;
- } else if (auto val = getConstantVRegVal(CarryInReg, MRI)) {
+ } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
// carry is constant, support only 0.
if (*val != 0)
return false;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index de2500b8e1bd..1edec96bbec3 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -24,6 +24,7 @@ enum IntrinsicType : uint16_t {
GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASSS,
INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP_IMM8,
INTR_TYPE_3OP_IMM8,
+ CFMA_OP_MASK, CFMA_OP_MASKZ,
CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM, BLENDV, BEXTRI,
CVTPD2PS_MASK,
INTR_TYPE_1OP_SAE, INTR_TYPE_2OP_SAE,
@@ -987,6 +988,236 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_256, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
X86_INTRINSIC_DATA(avx512bf16_dpbf16ps_512, INTR_TYPE_3OP, X86ISD::DPBF16PS, 0),
X86_INTRINSIC_DATA(avx512bf16_mask_cvtneps2bf16_128, CVTNEPS2BF16_MASK, X86ISD::CVTNEPS2BF16, X86ISD::MCVTNEPS2BF16),
+ X86_INTRINSIC_DATA(avx512fp16_add_ph_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
+ X86_INTRINSIC_DATA(avx512fp16_div_ph_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
+ X86_INTRINSIC_DATA(avx512fp16_fpclass_ph_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_fpclass_ph_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_fpclass_ph_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_add_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FADDS, X86ISD::FADDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_ph_128, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_ph_256, CMP_MASK_CC, X86ISD::CMPMM, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_ph_512, CMP_MASK_CC, X86ISD::CMPMM, X86ISD::CMPMM_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_cmp_sh, CMP_MASK_SCALAR_CC,
+ X86ISD::FSETCCM, X86ISD::FSETCCM_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_div_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FDIVS, X86ISD::FDIVS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_fpclass_sh, FPCLASSS, X86ISD::VFPCLASSS, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_ph_128, INTR_TYPE_1OP_MASK, X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_ph_256, INTR_TYPE_1OP_MASK, X86ISD::FGETEXP, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_ph_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::FGETEXP, X86ISD::FGETEXP_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getexp_sh, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FGETEXPS, X86ISD::FGETEXPS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_ph_128, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_ph_256, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_ph_512, INTR_TYPE_2OP_MASK_SAE,
+ X86ISD::VGETMANT, X86ISD::VGETMANT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_getmant_sh, INTR_TYPE_3OP_SCALAR_MASK_SAE,
+ X86ISD::VGETMANTS, X86ISD::VGETMANTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_max_sh_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMAXS, X86ISD::FMAXS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_min_sh_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::FMINS, X86ISD::FMINS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_mul_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FMULS, X86ISD::FMULS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_ph_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_ph_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_ph_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rcp_sh, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_ph_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_ph_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_ph_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VREDUCE, X86ISD::VREDUCE_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_reduce_sh, INTR_TYPE_SCALAR_MASK, X86ISD::VREDUCES, X86ISD::VREDUCES_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_ph_128, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_ph_256, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_ph_512, INTR_TYPE_2OP_MASK_SAE, X86ISD::VRNDSCALE, X86ISD::VRNDSCALE_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rndscale_sh, INTR_TYPE_SCALAR_MASK,
+ X86ISD::VRNDSCALES, X86ISD::VRNDSCALES_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_ph_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_ph_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_ph_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_rsqrt_sh, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_ph_128, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_ph_256, INTR_TYPE_2OP_MASK, X86ISD::SCALEF, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_ph_512, INTR_TYPE_2OP_MASK,
+ X86ISD::SCALEF, X86ISD::SCALEF_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_scalef_sh, INTR_TYPE_SCALAR_MASK,
+ X86ISD::SCALEFS, X86ISD::SCALEFS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_sqrt_sh, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSQRTS, X86ISD::FSQRTS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_sub_sh_round, INTR_TYPE_SCALAR_MASK,
+ X86ISD::FSUBS, X86ISD::FSUBS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtdq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_128, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_256, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtpd2ph_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2dq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_256, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2pd_512, INTR_TYPE_1OP_MASK_SAE,
+ ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_128, INTR_TYPE_1OP_MASK, X86ISD::VFPEXT, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_256, INTR_TYPE_1OP_MASK, ISD::FP_EXTEND, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2psx_512, INTR_TYPE_1OP_MASK_SAE,
+ ISD::FP_EXTEND, X86ISD::VFPEXT_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2qq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2udq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uqq_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2uw_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtph2w_512, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_128, TRUNCATE_TO_REG,
+ X86ISD::VFPROUND, X86ISD::VMFPROUND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_256, INTR_TYPE_1OP_MASK, X86ISD::VFPROUND, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtps2phx_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VFPROUND, X86ISD::VFPROUND_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtqq2ph_256, TRUNCATE_TO_REG,
+ X86ISD::CVTSI2P, X86ISD::MCVTSI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsd2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+ X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2sd_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtsh2ss_round, INTR_TYPE_SCALAR_MASK_SAE,
+ X86ISD::VFPEXTS, X86ISD::VFPEXTS_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtss2sh_round, INTR_TYPE_SCALAR_MASK_RND,
+ X86ISD::VFPROUNDS, X86ISD::VFPROUNDS_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2dq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2qq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2udq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uqq_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2UI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2uw_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2UI, X86ISD::CVTTP2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::CVTTP2SI, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvttph2w_512, INTR_TYPE_1OP_MASK_SAE,
+ X86ISD::CVTTP2SI, X86ISD::CVTTP2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtudq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_128, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vcvtuqq2ph_256, TRUNCATE_TO_REG,
+ X86ISD::CVTUI2P, X86ISD::MCVTUI2P),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_cph_128, CFMA_OP_MASK, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_cph_256, CFMA_OP_MASK, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_cph_512, CFMA_OP_MASK, X86ISD::VFCMADDC, X86ISD::VFCMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmadd_csh, CFMA_OP_MASK, X86ISD::VFCMADDCSH, X86ISD::VFCMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_cph_128, INTR_TYPE_2OP_MASK, X86ISD::VFCMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_cph_256, INTR_TYPE_2OP_MASK, X86ISD::VFCMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_cph_512, INTR_TYPE_2OP_MASK, X86ISD::VFCMULC, X86ISD::VFCMULC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfcmul_csh, INTR_TYPE_SCALAR_MASK, X86ISD::VFCMULCSH, X86ISD::VFCMULCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_cph_128, CFMA_OP_MASK, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_cph_256, CFMA_OP_MASK, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_cph_512, CFMA_OP_MASK, X86ISD::VFMADDC, X86ISD::VFMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmadd_csh, CFMA_OP_MASK, X86ISD::VFMADDCSH, X86ISD::VFMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_cph_128, INTR_TYPE_2OP_MASK, X86ISD::VFMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_cph_256, INTR_TYPE_2OP_MASK, X86ISD::VFMULC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_cph_512, INTR_TYPE_2OP_MASK, X86ISD::VFMULC, X86ISD::VFMULC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_mask_vfmul_csh, INTR_TYPE_SCALAR_MASK, X86ISD::VFMULCSH, X86ISD::VFMULCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_cph_128, CFMA_OP_MASKZ, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_cph_256, CFMA_OP_MASKZ, X86ISD::VFCMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_cph_512, CFMA_OP_MASKZ, X86ISD::VFCMADDC, X86ISD::VFCMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfcmadd_csh, CFMA_OP_MASKZ, X86ISD::VFCMADDCSH, X86ISD::VFCMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_cph_128, CFMA_OP_MASKZ, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_cph_256, CFMA_OP_MASKZ, X86ISD::VFMADDC, 0),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_cph_512, CFMA_OP_MASKZ, X86ISD::VFMADDC, X86ISD::VFMADDC_RND),
+ X86_INTRINSIC_DATA(avx512fp16_maskz_vfmadd_csh, CFMA_OP_MASKZ, X86ISD::VFMADDCSH, X86ISD::VFMADDCSH_RND),
+ X86_INTRINSIC_DATA(avx512fp16_max_ph_128, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(avx512fp16_max_ph_256, INTR_TYPE_2OP, X86ISD::FMAX, 0),
+ X86_INTRINSIC_DATA(avx512fp16_max_ph_512, INTR_TYPE_2OP_SAE, X86ISD::FMAX, X86ISD::FMAX_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_min_ph_128, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx512fp16_min_ph_256, INTR_TYPE_2OP, X86ISD::FMIN, 0),
+ X86_INTRINSIC_DATA(avx512fp16_min_ph_512, INTR_TYPE_2OP_SAE, X86ISD::FMIN, X86ISD::FMIN_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_mul_ph_512, INTR_TYPE_2OP, ISD::FMUL, X86ISD::FMUL_RND),
+ X86_INTRINSIC_DATA(avx512fp16_sqrt_ph_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
+ X86_INTRINSIC_DATA(avx512fp16_sub_ph_512, INTR_TYPE_2OP, ISD::FSUB, X86ISD::FSUB_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcomi_sh, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+ /*fp16 scalar convert instruction*/
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si32, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2si64, INTR_TYPE_1OP, X86ISD::CVTS2SI, X86ISD::CVTS2SI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi32, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsh2usi64, INTR_TYPE_1OP, X86ISD::CVTS2UI, X86ISD::CVTS2UI_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsi2sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtsi642sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_SINT_TO_FP, X86ISD::SCALAR_SINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2si64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2SI, X86ISD::CVTTS2SI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi32, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvttsh2usi64, INTR_TYPE_1OP_SAE, X86ISD::CVTTS2UI, X86ISD::CVTTS2UI_SAE),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtusi2sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vcvtusi642sh, INTR_TYPE_2OP,
+ X86ISD::SCALAR_UINT_TO_FP, X86ISD::SCALAR_UINT_TO_FP_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vfmadd_f16, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vfmadd_ph_512, INTR_TYPE_3OP, ISD::FMA, X86ISD::FMADD_RND),
+ X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_128, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_256, INTR_TYPE_3OP, X86ISD::FMADDSUB, 0),
+ X86_INTRINSIC_DATA(avx512fp16_vfmaddsub_ph_512, INTR_TYPE_3OP, X86ISD::FMADDSUB,
+ X86ISD::FMADDSUB_RND),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0),
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 165533eba346..4710e524931c 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -306,7 +306,8 @@ bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction(
OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary(
OptimizePluginPath.c_str(), &ErrorMsg);
if (!ErrorMsg.empty())
- report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"');
+ report_fatal_error(Twine("Failed to load opt plugin: \"") + ErrorMsg +
+ "\"");
OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut");
if (!OptimizeCut)
report_fatal_error("Invalid optimization plugin");
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
index 7b6276c1d87e..e562748c98fe 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp
@@ -76,7 +76,7 @@ bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction(
bool Modified = false;
for (auto &MBB : MF) {
for (auto MBBI = MBB.begin(); MBBI != MBB.end(); ++MBBI) {
- if (MBBI->getOpcode() != X86::RETQ)
+ if (MBBI->getOpcode() != X86::RET64)
continue;
unsigned ClobberReg = TRI->findDeadCallerSavedReg(MBB, MBBI);
diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
index 248069f4deb4..6b564a0356a6 100644
--- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
@@ -498,8 +498,8 @@ X86LowerAMXIntrinsics::lowerTileDP(Instruction *TileDP) {
Value *ResAMX =
Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));
// Delete TileDP intrinsic and do some clean-up.
- for (auto UI = TileDP->use_begin(), UE = TileDP->use_end(); UI != UE;) {
- Instruction *I = cast<Instruction>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(TileDP->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
Value *Vec;
if (match(I, m_BitCast(m_Value(Vec)))) {
I->replaceAllUsesWith(ResVec);
@@ -542,9 +542,8 @@ bool X86LowerAMXIntrinsics::lowerTileLoadStore(Instruction *TileLoadStore) {
Value *ResAMX =
Builder.CreateBitCast(ResVec, Type::getX86_AMXTy(Builder.getContext()));
// Delete tileloadd6 intrinsic and do some clean-up
- for (auto UI = TileLoadStore->use_begin(), UE = TileLoadStore->use_end();
- UI != UE;) {
- Instruction *I = cast<Instruction>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(TileLoadStore->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
Value *Vec;
if (match(I, m_BitCast(m_Value(Vec)))) {
I->replaceAllUsesWith(ResVec);
@@ -561,8 +560,8 @@ bool X86LowerAMXIntrinsics::lowerTileZero(Instruction *TileZero) {
IRBuilder<> Builder(TileZero);
FixedVectorType *V256I32Ty = FixedVectorType::get(Builder.getInt32Ty(), 256);
Value *VecZero = Constant::getNullValue(V256I32Ty);
- for (auto UI = TileZero->use_begin(), UE = TileZero->use_end(); UI != UE;) {
- Instruction *I = cast<Instruction>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(TileZero->uses())) {
+ Instruction *I = cast<Instruction>(U.getUser());
Value *Vec;
if (match(I, m_BitCast(m_Value(Vec)))) {
I->replaceAllUsesWith(VecZero);
@@ -631,6 +630,7 @@ bool X86LowerAMXIntrinsics::visit() {
return C;
}
+namespace {
class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
public:
static char ID;
@@ -665,6 +665,7 @@ public:
AU.addRequired<TargetPassConfig>();
}
};
+} // namespace
static const char PassName[] = "Lower AMX intrinsics";
char X86LowerAMXIntrinsicsLegacyPass::ID = 0;
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 4ba44ccb6c16..7368b64efd9a 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -40,8 +40,10 @@
//
#include "X86.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -56,66 +58,44 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
#define DEBUG_TYPE "lower-amx-type"
-static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder,
- BasicBlock *BB) {
+static bool isAMXCast(Instruction *II) {
+ return match(II,
+ m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(m_Value())) ||
+ match(II, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(m_Value()));
+}
+
+static AllocaInst *createAllocaInstAtEntry(IRBuilder<> &Builder, BasicBlock *BB,
+ Type *Ty) {
Function &F = *BB->getParent();
Module *M = BB->getModule();
const DataLayout &DL = M->getDataLayout();
- Type *V256I32Ty = VectorType::get(Builder.getInt32Ty(), 256, false);
LLVMContext &Ctx = Builder.getContext();
auto AllocaAlignment = DL.getPrefTypeAlign(Type::getX86_AMXTy(Ctx));
unsigned AllocaAS = DL.getAllocaAddrSpace();
AllocaInst *AllocaRes =
- new AllocaInst(V256I32Ty, AllocaAS, "", &F.getEntryBlock().front());
+ new AllocaInst(Ty, AllocaAS, "", &F.getEntryBlock().front());
AllocaRes->setAlignment(AllocaAlignment);
return AllocaRes;
}
-namespace {
-class X86LowerAMXType {
- Function &Func;
- TargetMachine *TM = nullptr;
-
- // In AMX intrinsics we let Shape = {Row, Col}, but the
- // RealCol = Col / ElementSize. We may use the RealCol
- // as a new Row for other new created AMX intrinsics.
- std::map<Value *, Value *> Col2Row;
-
-public:
- X86LowerAMXType(Function &F, TargetMachine *TargetM) : Func(F), TM(TargetM) {}
- bool visit();
- void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast);
- void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST);
- bool transformBitcast(BitCastInst *Bitcast);
- std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo);
- Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity);
-};
-
-Value *X86LowerAMXType::getRowFromCol(Instruction *II, Value *V,
- unsigned Granularity) {
- if (Col2Row.count(V))
- return Col2Row[V];
- IRBuilder<> Builder(&*II->getParent()->getFirstInsertionPt());
- if (auto *I = dyn_cast<Instruction>(V)) {
- BasicBlock::iterator Iter = I->getIterator();
- ++Iter;
- Builder.SetInsertPoint(&*Iter);
- }
- ConstantInt *Gran = Builder.getInt16(Granularity);
- Value *RealRow = Builder.CreateUDiv(V, Gran);
- Col2Row[V] = RealRow;
- return RealRow;
+static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
+ for (Instruction &I : F.getEntryBlock())
+ if (!isa<AllocaInst>(&I))
+ return &I;
+ llvm_unreachable("No terminator in the entry block!");
}
-std::pair<Value *, Value *> X86LowerAMXType::getShape(IntrinsicInst *II,
- unsigned OpNo) {
+static std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
+ IRBuilder<> Builder(II);
Value *Row = nullptr, *Col = nullptr;
switch (II->getIntrinsicID()) {
default:
@@ -144,14 +124,32 @@ std::pair<Value *, Value *> X86LowerAMXType::getShape(IntrinsicInst *II,
Col = II->getArgOperand(2);
break;
case 5:
- Row = II->getArgOperand(2);
- // FIXME: There is a design bug for AMX shape, which the Col should be
- // Col/4 if it will be used as Row, but current Greedy RA can't handle
- // this case well, it may failed if we generate a new Shape definition.
- // So Let's just do it in O0 first.
- // Row = Row / 4
- if (TM->getOptLevel() == CodeGenOpt::None)
- Row = getRowFromCol(II, Row, 4);
+ if (isa<ConstantInt>(II->getArgOperand(2)))
+ Row = Builder.getInt16(
+ (cast<ConstantInt>(II->getOperand(2))->getSExtValue()) / 4);
+ else if (isa<Instruction>(II->getArgOperand(2))) {
+ // When it is not a const value and it is not a function argument, we
+ // create Row after the definition of II->getOperand(2) instead of
+ // before II. For example, II is %118, we try to getshape for %117:
+ // %117 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x
+ // i32> %115).
+ // %118 = call x86_amx @llvm.x86.tdpbf16ps.internal(i16
+ // %104, i16 %105, i16 %106, x86_amx %110, x86_amx %114, x86_amx
+ // %117).
+ // If we create %row = udiv i16 %106, 4 before %118(aka. II), then its
+ // definition is after its user(new tileload for %117).
+ // So, the best choice is to create %row right after the definition of
+ // %106.
+ Builder.SetInsertPoint(cast<Instruction>(II->getOperand(2)));
+ Row = Builder.CreateUDiv(II->getOperand(2), Builder.getInt16(4));
+ cast<Instruction>(Row)->moveAfter(cast<Instruction>(II->getOperand(2)));
+ } else {
+ // When it is not a const value and it is a function argument, we create
+ // Row at the entry bb.
+ IRBuilder<> NewBuilder(
+ getFirstNonAllocaInTheEntryBlock(*II->getFunction()));
+ Row = NewBuilder.CreateUDiv(II->getOperand(2), NewBuilder.getInt16(4));
+ }
Col = II->getArgOperand(1);
break;
}
@@ -162,6 +160,23 @@ std::pair<Value *, Value *> X86LowerAMXType::getShape(IntrinsicInst *II,
return std::make_pair(Row, Col);
}
+namespace {
+class X86LowerAMXType {
+ Function &Func;
+
+ // In AMX intrinsics we let Shape = {Row, Col}, but the
+ // RealCol = Col / ElementSize. We may use the RealCol
+ // as a new Row for other new created AMX intrinsics.
+ std::map<Value *, Value *> Col2Row;
+
+public:
+ X86LowerAMXType(Function &F) : Func(F) {}
+ bool visit();
+ void combineLoadBitcast(LoadInst *LD, BitCastInst *Bitcast);
+ void combineBitcastStore(BitCastInst *Bitcast, StoreInst *ST);
+ bool transformBitcast(BitCastInst *Bitcast);
+};
+
// %src = load <256 x i32>, <256 x i32>* %addr, align 64
// %2 = bitcast <256 x i32> %src to x86_amx
// -->
@@ -230,8 +245,8 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
Value *I8Ptr, *Stride;
auto *Src = Bitcast->getOperand(0);
- auto Prepare = [&]() {
- AllocaAddr = createAllocaInstAtEntry(Builder, Bitcast->getParent());
+ auto Prepare = [&](Type *MemTy) {
+ AllocaAddr = createAllocaInstAtEntry(Builder, Bitcast->getParent(), MemTy);
I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
Stride = Builder.getInt64(64);
};
@@ -250,7 +265,7 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
if (!II)
return false; // May be bitcast from x86amx to <256 x i32>.
- Prepare();
+ Prepare(Bitcast->getOperand(0)->getType());
Builder.CreateStore(Src, AllocaAddr);
// TODO we can pick an constant operand for the shape.
Value *Row = nullptr, *Col = nullptr;
@@ -270,7 +285,7 @@ bool X86LowerAMXType::transformBitcast(BitCastInst *Bitcast) {
auto *II = dyn_cast<IntrinsicInst>(Src);
if (!II)
return false; // May be bitcast from <256 x i32> to x86amx.
- Prepare();
+ Prepare(Bitcast->getType());
Value *Row = II->getOperand(0);
Value *Col = II->getOperand(1);
std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Src};
@@ -287,9 +302,7 @@ bool X86LowerAMXType::visit() {
Col2Row.clear();
for (BasicBlock *BB : post_order(&Func)) {
- for (BasicBlock::reverse_iterator II = BB->rbegin(), IE = BB->rend();
- II != IE;) {
- Instruction &Inst = *II++;
+ for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(*BB))) {
auto *Bitcast = dyn_cast<BitCastInst>(&Inst);
if (!Bitcast)
continue;
@@ -332,10 +345,8 @@ bool X86LowerAMXType::visit() {
continue;
}
StoreInst *ST = nullptr;
- for (auto UI = Bitcast->use_begin(), UE = Bitcast->use_end();
- UI != UE;) {
- Value *I = (UI++)->getUser();
- ST = dyn_cast<StoreInst>(I);
+ for (Use &U : Bitcast->uses()) {
+ ST = dyn_cast<StoreInst>(U.getUser());
if (ST)
break;
}
@@ -637,6 +648,366 @@ bool X86VolatileTileData::volatileTileData() {
namespace {
+class X86LowerAMXCast {
+ Function &Func;
+
+public:
+ X86LowerAMXCast(Function &F) : Func(F) {}
+ bool combineAMXcast(TargetLibraryInfo *TLI);
+ bool transformAMXCast(IntrinsicInst *AMXCast);
+ bool transformAllAMXCast();
+ bool optimizeAMXCastFromPhi(IntrinsicInst *CI, PHINode *PN,
+ SmallSetVector<Instruction *, 16> &DeadInst);
+};
+
+static bool DCEInstruction(Instruction *I,
+ SmallSetVector<Instruction *, 16> &WorkList,
+ const TargetLibraryInfo *TLI) {
+ if (isInstructionTriviallyDead(I, TLI)) {
+ salvageDebugInfo(*I);
+ salvageKnowledge(I);
+
+ // Null out all of the instruction's operands to see if any operand becomes
+ // dead as we go.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ Value *OpV = I->getOperand(i);
+ I->setOperand(i, nullptr);
+
+ if (!OpV->use_empty() || I == OpV)
+ continue;
+
+ // If the operand is an instruction that became dead as we nulled out the
+ // operand, and if it is 'trivially' dead, delete it in a future loop
+ // iteration.
+ if (Instruction *OpI = dyn_cast<Instruction>(OpV)) {
+ if (isInstructionTriviallyDead(OpI, TLI)) {
+ WorkList.insert(OpI);
+ }
+ }
+ }
+ I->eraseFromParent();
+ return true;
+ }
+ return false;
+}
+
+/// This function handles following case
+///
+/// A -> B amxcast
+/// PHI
+/// B -> A amxcast
+///
+/// All the related PHI nodes can be replaced by new PHI nodes with type A.
+/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
+bool X86LowerAMXCast::optimizeAMXCastFromPhi(
+ IntrinsicInst *CI, PHINode *PN,
+ SmallSetVector<Instruction *, 16> &DeadInst) {
+ IRBuilder<> Builder(CI);
+ Value *Src = CI->getOperand(0);
+ Type *SrcTy = Src->getType(); // Type B
+ Type *DestTy = CI->getType(); // Type A
+
+ SmallVector<PHINode *, 4> PhiWorklist;
+ SmallSetVector<PHINode *, 4> OldPhiNodes;
+
+ // Find all of the A->B casts and PHI nodes.
+ // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
+ // OldPhiNodes is used to track all known PHI nodes, before adding a new
+ // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
+ PhiWorklist.push_back(PN);
+ OldPhiNodes.insert(PN);
+ while (!PhiWorklist.empty()) {
+ auto *OldPN = PhiWorklist.pop_back_val();
+ for (Value *IncValue : OldPN->incoming_values()) {
+ // TODO: currently, We ignore cases where it is a const. In the future, we
+ // might support const.
+ if (isa<Constant>(IncValue))
+ return false;
+
+ if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
+ if (OldPhiNodes.insert(PNode))
+ PhiWorklist.push_back(PNode);
+ continue;
+ }
+ Instruction *ACI = dyn_cast<Instruction>(IncValue);
+ if (ACI && isAMXCast(ACI)) {
+ // Verify it's a A->B cast.
+ Type *TyA = ACI->getOperand(0)->getType();
+ Type *TyB = ACI->getType();
+ if (TyA != DestTy || TyB != SrcTy)
+ return false;
+ continue;
+ }
+ return false;
+ }
+ }
+
+ // Check that each user of each old PHI node is something that we can
+ // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
+ for (auto *OldPN : OldPhiNodes) {
+ for (User *V : OldPN->users()) {
+ Instruction *ACI = dyn_cast<Instruction>(V);
+ if (ACI && isAMXCast(ACI)) {
+ // Verify it's a B->A cast.
+ Type *TyB = ACI->getOperand(0)->getType();
+ Type *TyA = ACI->getType();
+ if (TyA != DestTy || TyB != SrcTy)
+ return false;
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ // As long as the user is another old PHI node, then even if we don't
+ // rewrite it, the PHI web we're considering won't have any users
+ // outside itself, so it'll be dead.
+ // example:
+ // bb.0:
+ // %0 = amxcast ...
+ // bb.1:
+ // %1 = amxcast ...
+ // bb.2:
+ // %goodphi = phi %0, %1
+ // %3 = amxcast %goodphi
+ // bb.3:
+ // %goodphi2 = phi %0, %goodphi
+ // %4 = amxcast %goodphi2
+ // When optimizeAMXCastFromPhi process %3 and %goodphi, %goodphi2 is
+ // outside the phi-web, so the combination stop When
+ // optimizeAMXCastFromPhi process %4 and %goodphi2, the optimization
+ // will be done.
+ if (OldPhiNodes.count(PHI) == 0)
+ return false;
+ } else
+ return false;
+ }
+ }
+
+ // For each old PHI node, create a corresponding new PHI node with a type A.
+ SmallDenseMap<PHINode *, PHINode *> NewPNodes;
+ for (auto *OldPN : OldPhiNodes) {
+ Builder.SetInsertPoint(OldPN);
+ PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
+ NewPNodes[OldPN] = NewPN;
+ }
+
+ // Fill in the operands of new PHI nodes.
+ for (auto *OldPN : OldPhiNodes) {
+ PHINode *NewPN = NewPNodes[OldPN];
+ for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
+ Value *V = OldPN->getOperand(j);
+ Value *NewV = nullptr;
+ Instruction *ACI = dyn_cast<Instruction>(V);
+ // There should not be a AMXcast from a const.
+ if (ACI && isAMXCast(ACI))
+ NewV = ACI->getOperand(0);
+ else if (auto *PrevPN = dyn_cast<PHINode>(V))
+ NewV = NewPNodes[PrevPN];
+ assert(NewV);
+ NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
+ }
+ }
+
+ // Traverse all accumulated PHI nodes and process its users,
+ // which are Stores and BitcCasts. Without this processing
+ // NewPHI nodes could be replicated and could lead to extra
+ // moves generated after DeSSA.
+ // If there is a store with type B, change it to type A.
+
+ // Replace users of BitCast B->A with NewPHI. These will help
+ // later to get rid of a closure formed by OldPHI nodes.
+ for (auto *OldPN : OldPhiNodes) {
+ PHINode *NewPN = NewPNodes[OldPN];
+ for (User *V : make_early_inc_range(OldPN->users())) {
+ Instruction *ACI = dyn_cast<Instruction>(V);
+ if (ACI && isAMXCast(ACI)) {
+ Type *TyB = ACI->getOperand(0)->getType();
+ Type *TyA = ACI->getType();
+ assert(TyA == DestTy && TyB == SrcTy);
+ (void)TyA;
+ (void)TyB;
+ ACI->replaceAllUsesWith(NewPN);
+ DeadInst.insert(ACI);
+ } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+ // We don't need to push PHINode into DeadInst since they are operands
+ // of rootPN DCE can safely delete rootPN's operands if rootPN is dead.
+ assert(OldPhiNodes.contains(PHI));
+ (void)PHI;
+ } else
+ llvm_unreachable("all uses should be handled");
+ }
+ }
+ return true;
+}
+
+bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) {
+ bool Change = false;
+ // Collect tile cast instruction.
+ SmallVector<Instruction *, 8> Vec2TileInsts;
+ SmallVector<Instruction *, 8> Tile2VecInsts;
+ SmallVector<Instruction *, 8> PhiCastWorkList;
+ SmallSetVector<Instruction *, 16> DeadInst;
+ for (BasicBlock &BB : Func) {
+ for (Instruction &I : BB) {
+ Value *Vec;
+ if (match(&I,
+ m_Intrinsic<Intrinsic::x86_cast_vector_to_tile>(m_Value(Vec))))
+ Vec2TileInsts.push_back(&I);
+ else if (match(&I, m_Intrinsic<Intrinsic::x86_cast_tile_to_vector>(
+ m_Value(Vec))))
+ Tile2VecInsts.push_back(&I);
+ }
+ }
+
+ auto Convert = [&](SmallVectorImpl<Instruction *> &Insts, Intrinsic::ID IID) {
+ for (auto *Inst : Insts) {
+ for (User *U : Inst->users()) {
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
+ if (!II || II->getIntrinsicID() != IID)
+ continue;
+ // T1 = vec2tile V0
+ // V2 = tile2vec T1
+ // V3 = OP V2
+ // -->
+ // T1 = vec2tile V0
+ // V2 = tile2vec T1
+ // V3 = OP V0
+ II->replaceAllUsesWith(Inst->getOperand(0));
+ Change = true;
+ }
+ }
+ };
+
+ Convert(Vec2TileInsts, Intrinsic::x86_cast_tile_to_vector);
+ Convert(Tile2VecInsts, Intrinsic::x86_cast_vector_to_tile);
+
+ auto EraseInst = [&](SmallVectorImpl<Instruction *> &Insts) {
+ for (auto *Inst : Insts) {
+ if (Inst->use_empty()) {
+ Inst->eraseFromParent();
+ Change = true;
+ }
+ }
+ };
+
+ EraseInst(Vec2TileInsts);
+ EraseInst(Tile2VecInsts);
+
+ // Handle the A->B->A cast, and there is an intervening PHI node.
+ for (BasicBlock &BB : Func) {
+ for (Instruction &I : BB) {
+ if (isAMXCast(&I)) {
+ if (isa<PHINode>(I.getOperand(0)))
+ PhiCastWorkList.push_back(&I);
+ }
+ }
+ }
+ for (auto *I : PhiCastWorkList) {
+ // We skip the dead Amxcast.
+ if (DeadInst.contains(I))
+ continue;
+ PHINode *PN = cast<PHINode>(I->getOperand(0));
+ if (optimizeAMXCastFromPhi(cast<IntrinsicInst>(I), PN, DeadInst)) {
+ DeadInst.insert(PN);
+ Change = true;
+ }
+ }
+
+ // Since we create new phi and merge AMXCast, some old phis and AMXCast might
+ // have no uses. We do some DeadCodeElimination for them.
+ while (!DeadInst.empty()) {
+ Instruction *I = DeadInst.pop_back_val();
+ Change |= DCEInstruction(I, DeadInst, TLI);
+ }
+ return Change;
+}
+
+// There might be remaining AMXcast after combineAMXcast and they should be
+// handled elegantly.
+bool X86LowerAMXCast::transformAMXCast(IntrinsicInst *AMXCast) {
+ IRBuilder<> Builder(AMXCast);
+ AllocaInst *AllocaAddr;
+ Value *I8Ptr, *Stride;
+ auto *Src = AMXCast->getOperand(0);
+
+ auto Prepare = [&](Type *MemTy) {
+ AllocaAddr = createAllocaInstAtEntry(Builder, AMXCast->getParent(), MemTy);
+ I8Ptr = Builder.CreateBitCast(AllocaAddr, Builder.getInt8PtrTy());
+ Stride = Builder.getInt64(64);
+ };
+
+ if (AMXCast->getType()->isX86_AMXTy()) {
+ // %2 = amxcast <225 x i32> %src to x86_amx
+ // call void @llvm.x86.tilestored64.internal(i16 15, i16 60,
+ // i8* %addr3, i64 60, x86_amx %2)
+ // -->
+ // %addr = alloca <225 x i32>, align 64
+ // store <225 x i32> %src, <225 x i32>* %addr, align 64
+ // %addr2 = bitcast <225 x i32>* %addr to i8*
+ // %2 = call x86_amx @llvm.x86.tileloadd64.internal(i16 15, i16 60,
+ // i8* %addr2,
+ // i64 60)
+ // call void @llvm.x86.tilestored64.internal(i16 15, i16 60,
+ // i8* %addr3, i64 60, x86_amx %2)
+ Use &U = *(AMXCast->use_begin());
+ unsigned OpNo = U.getOperandNo();
+ auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+ if (!II)
+ return false; // May be bitcast from x86amx to <256 x i32>.
+ Prepare(AMXCast->getOperand(0)->getType());
+ Builder.CreateStore(Src, AllocaAddr);
+ // TODO we can pick an constant operand for the shape.
+ Value *Row = nullptr, *Col = nullptr;
+ std::tie(Row, Col) = getShape(II, OpNo);
+ std::array<Value *, 4> Args = {
+ Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty())};
+ Value *NewInst = Builder.CreateIntrinsic(
+ Intrinsic::x86_tileloadd64_internal, None, Args);
+ AMXCast->replaceAllUsesWith(NewInst);
+ AMXCast->eraseFromParent();
+ } else {
+ // %2 = amxcast x86_amx %src to <225 x i32>
+ // -->
+ // %addr = alloca <225 x i32>, align 64
+ // %addr2 = bitcast <225 x i32>* to i8*
+ // call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col,
+ // i8* %addr2, i64 %stride)
+ // %2 = load <225 x i32>, <225 x i32>* %addr, align 64
+ auto *II = dyn_cast<IntrinsicInst>(Src);
+ if (!II)
+ return false; // May be bitcast from <256 x i32> to x86amx.
+ Prepare(AMXCast->getType());
+ Value *Row = II->getOperand(0);
+ Value *Col = II->getOperand(1);
+ std::array<Value *, 5> Args = {
+ Row, Col, I8Ptr, Builder.CreateSExt(Col, Builder.getInt64Ty()), Src};
+ Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, None, Args);
+ Value *NewInst = Builder.CreateLoad(AMXCast->getType(), AllocaAddr);
+ AMXCast->replaceAllUsesWith(NewInst);
+ AMXCast->eraseFromParent();
+ }
+
+ return true;
+}
+
+bool X86LowerAMXCast::transformAllAMXCast() {
+ bool Change = false;
+ // Collect tile cast instruction.
+ SmallVector<Instruction *, 8> WorkLists;
+ for (BasicBlock &BB : Func) {
+ for (Instruction &I : BB) {
+ if (isAMXCast(&I))
+ WorkLists.push_back(&I);
+ }
+ }
+
+ for (auto *Inst : WorkLists) {
+ Change |= transformAMXCast(cast<IntrinsicInst>(Inst));
+ }
+
+ return Change;
+}
+
+} // anonymous namespace
+
+namespace {
+
class X86LowerAMXTypeLegacyPass : public FunctionPass {
public:
static char ID;
@@ -646,10 +1017,18 @@ public:
}
bool runOnFunction(Function &F) override {
+ bool C = false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
+ TargetLibraryInfo *TLI =
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ X86LowerAMXCast LAC(F);
+ C |= LAC.combineAMXcast(TLI);
+ // There might be remaining AMXcast after combineAMXcast and they should be
+ // handled elegantly.
+ C |= LAC.transformAllAMXCast();
- X86LowerAMXType LAT(F, TM);
- bool C = LAT.visit();
+ X86LowerAMXType LAT(F);
+ C |= LAT.visit();
// Prepare for fast register allocation at O0.
// Todo: May better check the volatile model of AMX code, not just
@@ -671,6 +1050,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
}
};
@@ -681,6 +1061,7 @@ char X86LowerAMXTypeLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
false)
diff --git a/llvm/lib/Target/X86/X86LowerTileCopy.cpp b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
index 03692d195768..d6b42145859d 100644
--- a/llvm/lib/Target/X86/X86LowerTileCopy.cpp
+++ b/llvm/lib/Target/X86/X86LowerTileCopy.cpp
@@ -75,9 +75,7 @@ bool X86LowerTileCopy::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
- MII != MIE;) {
- MachineInstr &MI = *MII++;
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
if (!MI.isCopy())
continue;
MachineOperand &DstMO = MI.getOperand(0);
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 7d916f917d5e..c3cd634612a4 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -43,8 +43,11 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
using namespace llvm;
@@ -274,6 +277,9 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_GOTPCREL:
RefKind = MCSymbolRefExpr::VK_GOTPCREL;
break;
+ case X86II::MO_GOTPCREL_NORELAX:
+ RefKind = MCSymbolRefExpr::VK_GOTPCREL_NORELAX;
+ break;
case X86II::MO_GOT:
RefKind = MCSymbolRefExpr::VK_GOT;
break;
@@ -418,7 +424,7 @@ static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
}
static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
- return Subtarget.is64Bit() ? X86::RETQ : X86::RETL;
+ return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
}
Optional<MCOperand>
@@ -1094,11 +1100,11 @@ static unsigned emitNop(MCStreamer &OS, unsigned NumBytes,
if (Subtarget->is64Bit()) {
// FIXME: We can use NOOPL on 32-bit targets with FeatureNOPL, but the
// IndexReg/BaseReg below need to be updated.
- if (Subtarget->hasFeature(X86::FeatureFast7ByteNOP))
+ if (Subtarget->hasFeature(X86::TuningFast7ByteNOP))
MaxNopLength = 7;
- else if (Subtarget->hasFeature(X86::FeatureFast15ByteNOP))
+ else if (Subtarget->hasFeature(X86::TuningFast15ByteNOP))
MaxNopLength = 15;
- else if (Subtarget->hasFeature(X86::FeatureFast11ByteNOP))
+ else if (Subtarget->hasFeature(X86::TuningFast11ByteNOP))
MaxNopLength = 11;
else
MaxNopLength = 10;
@@ -1323,6 +1329,244 @@ void X86AsmPrinter::LowerFENTRY_CALL(const MachineInstr &MI,
.addExpr(Op));
}
+void X86AsmPrinter::LowerASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
+ // FIXME: Make this work on non-ELF.
+ if (!TM.getTargetTriple().isOSBinFormatELF()) {
+ report_fatal_error("llvm.asan.check.memaccess only supported on ELF");
+ return;
+ }
+
+ unsigned Reg = MI.getOperand(0).getReg().id();
+ ASanAccessInfo AccessInfo(MI.getOperand(1).getImm());
+
+ MCSymbol *&Sym =
+ AsanMemaccessSymbols[AsanMemaccessTuple(Reg, AccessInfo.Packed)];
+ if (!Sym) {
+ std::string Name = AccessInfo.IsWrite ? "store" : "load";
+ std::string SymName = "__asan_check_" + Name +
+ utostr(1ULL << AccessInfo.AccessSizeIndex) + "_rn" +
+ utostr(Reg);
+ Sym = OutContext.getOrCreateSymbol(SymName);
+ }
+
+ EmitAndCountInstruction(
+ MCInstBuilder(X86::CALL64pcrel32)
+ .addExpr(MCSymbolRefExpr::create(Sym, OutContext)));
+}
+
+void X86AsmPrinter::emitAsanMemaccessPartial(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI) {
+ assert(AccessInfo.AccessSizeIndex == 0 || AccessInfo.AccessSizeIndex == 1 ||
+ AccessInfo.AccessSizeIndex == 2);
+ assert(Reg != X86::R8);
+
+ uint64_t ShadowBase;
+ int MappingScale;
+ bool OrShadowOffset;
+ getAddressSanitizerParams(
+ Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
+ AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(MappingScale),
+ STI);
+ if (OrShadowOffset) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(ShadowBase),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOV8rm)
+ .addReg(X86::R8B)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(0)
+ .addReg(X86::NoRegister),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::TEST8rr).addReg(X86::R8B).addReg(X86::R8B), STI);
+ } else {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOVSX32rm8)
+ .addReg(X86::R8D)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(ShadowBase)
+ .addReg(X86::NoRegister),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::TEST32rr).addReg(X86::R8D).addReg(X86::R8D), STI);
+ }
+ MCSymbol *AdditionalCheck = OutContext.createTempSymbol();
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JCC_1)
+ .addExpr(MCSymbolRefExpr::create(AdditionalCheck, OutContext))
+ .addImm(X86::COND_NE),
+ STI);
+ MCSymbol *ReturnSym = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(ReturnSym);
+ OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
+
+ // Shadow byte is non-zero so we need to perform additional checks.
+ OutStreamer->emitLabel(AdditionalCheck);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RCX),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::RCX)
+ .addReg(X86::NoRegister + Reg),
+ STI);
+ const size_t Granularity = 1ULL << MappingScale;
+ OutStreamer->emitInstruction(MCInstBuilder(X86::AND32ri8)
+ .addReg(X86::NoRegister)
+ .addReg(X86::ECX)
+ .addImm(Granularity - 1),
+ STI);
+ if (AccessInfo.AccessSizeIndex == 1) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
+ .addReg(X86::NoRegister)
+ .addReg(X86::ECX)
+ .addImm(1),
+ STI);
+ } else if (AccessInfo.AccessSizeIndex == 2) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::ADD32ri8)
+ .addReg(X86::NoRegister)
+ .addReg(X86::ECX)
+ .addImm(3),
+ STI);
+ }
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::R8D).addImm(1),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RCX),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JCC_1)
+ .addExpr(MCSymbolRefExpr::create(ReturnSym, OutContext))
+ .addImm(X86::COND_L),
+ STI);
+
+ emitAsanReportError(M, Reg, AccessInfo, STI);
+}
+
+void X86AsmPrinter::emitAsanMemaccessFull(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI) {
+ assert(AccessInfo.AccessSizeIndex == 3 || AccessInfo.AccessSizeIndex == 4);
+ assert(Reg != X86::R8);
+
+ uint64_t ShadowBase;
+ int MappingScale;
+ bool OrShadowOffset;
+ getAddressSanitizerParams(
+ Triple(M.getTargetTriple()), M.getDataLayout().getPointerSizeInBits(),
+ AccessInfo.CompileKernel, &ShadowBase, &MappingScale, &OrShadowOffset);
+
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::MOV64rr).addReg(X86::R8).addReg(X86::NoRegister + Reg),
+ STI);
+ OutStreamer->emitInstruction(MCInstBuilder(X86::SHR64ri)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(MappingScale),
+ STI);
+ if (OrShadowOffset) {
+ OutStreamer->emitInstruction(MCInstBuilder(X86::OR64ri32)
+ .addReg(X86::R8)
+ .addReg(X86::R8)
+ .addImm(ShadowBase),
+ STI);
+ auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
+ OutStreamer->emitInstruction(MCInstBuilder(OpCode)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(0)
+ .addReg(X86::NoRegister)
+ .addImm(0),
+ STI);
+ } else {
+ auto OpCode = AccessInfo.AccessSizeIndex == 3 ? X86::CMP8mi : X86::CMP16mi8;
+ OutStreamer->emitInstruction(MCInstBuilder(OpCode)
+ .addReg(X86::R8)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(ShadowBase)
+ .addReg(X86::NoRegister)
+ .addImm(0),
+ STI);
+ }
+ MCSymbol *ReportCode = OutContext.createTempSymbol();
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JCC_1)
+ .addExpr(MCSymbolRefExpr::create(ReportCode, OutContext))
+ .addImm(X86::COND_NE),
+ STI);
+ MCSymbol *ReturnSym = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(ReturnSym);
+ OutStreamer->emitInstruction(MCInstBuilder(getRetOpcode(*Subtarget)), STI);
+
+ OutStreamer->emitLabel(ReportCode);
+ emitAsanReportError(M, Reg, AccessInfo, STI);
+}
+
+void X86AsmPrinter::emitAsanReportError(Module &M, unsigned Reg,
+ const ASanAccessInfo &AccessInfo,
+ MCSubtargetInfo &STI) {
+ std::string Name = AccessInfo.IsWrite ? "store" : "load";
+ MCSymbol *ReportError = OutContext.getOrCreateSymbol(
+ "__asan_report_" + Name + utostr(1ULL << AccessInfo.AccessSizeIndex));
+ OutStreamer->emitInstruction(MCInstBuilder(X86::MOV64rr)
+ .addReg(X86::RDI)
+ .addReg(X86::NoRegister + Reg),
+ STI);
+ OutStreamer->emitInstruction(
+ MCInstBuilder(X86::JMP_4)
+ .addExpr(MCSymbolRefExpr::create(ReportError, MCSymbolRefExpr::VK_PLT,
+ OutContext)),
+ STI);
+}
+
+void X86AsmPrinter::emitAsanMemaccessSymbols(Module &M) {
+ if (AsanMemaccessSymbols.empty())
+ return;
+
+ const Triple &TT = TM.getTargetTriple();
+ assert(TT.isOSBinFormatELF());
+ std::unique_ptr<MCSubtargetInfo> STI(
+ TM.getTarget().createMCSubtargetInfo(TT.str(), "", ""));
+ assert(STI && "Unable to create subtarget info");
+
+ for (auto &P : AsanMemaccessSymbols) {
+ MCSymbol *Sym = P.second;
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ ".text.hot", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_GROUP, 0, Sym->getName(),
+ /*IsComdat=*/true));
+
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_ELF_TypeFunction);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Weak);
+ OutStreamer->emitSymbolAttribute(Sym, MCSA_Hidden);
+ OutStreamer->emitLabel(Sym);
+
+ unsigned Reg = std::get<0>(P.first);
+ ASanAccessInfo AccessInfo(std::get<1>(P.first));
+
+ if (AccessInfo.AccessSizeIndex < 3) {
+ emitAsanMemaccessPartial(M, Reg, AccessInfo, *STI);
+ } else {
+ emitAsanMemaccessFull(M, Reg, AccessInfo, *STI);
+ }
+ }
+}
+
void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
X86MCInstLower &MCIL) {
// PATCHABLE_OP minsize, opcode, operands
@@ -1477,7 +1721,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
// First we emit the label and the jump.
auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true);
OutStreamer->AddComment("# XRay Custom Event Log");
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
@@ -1573,7 +1817,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// First we emit the label and the jump.
auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true);
OutStreamer->AddComment("# XRay Typed Event Log");
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
@@ -1675,7 +1919,7 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
// call <relative offset, 32-bits> // 5 bytes
//
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
// Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
@@ -1705,7 +1949,7 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
//
// This just makes sure that the alignment for the next instruction is 2.
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
unsigned OpCode = MI.getOperand(0).getImm();
MCInst Ret;
@@ -1729,7 +1973,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
// the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
// tail call much like how we have it in PATCHABLE_RET.
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
- OutStreamer->emitCodeAlignment(2);
+ OutStreamer->emitCodeAlignment(2, &getSubtargetInfo());
OutStreamer->emitLabel(CurSled);
auto Target = OutContext.createTempSymbol();
@@ -2563,6 +2807,9 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
return;
+ case X86::ASAN_CHECK_MEMACCESS:
+ return LowerASAN_CHECK_MEMACCESS(*MI);
+
case X86::MORESTACK_RET_RESTORE_R10:
// Return, then restore R10.
EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 46d2e2a66fd6..99d1a97380dd 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -102,8 +102,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// True if this function uses the red zone.
bool UsesRedZone = false;
- /// True if this function has WIN_ALLOCA instructions.
- bool HasWinAlloca = false;
+ /// True if this function has DYN_ALLOCA instructions.
+ bool HasDynAlloca = false;
/// True if this function has any preallocated calls.
bool HasPreallocatedCall = false;
@@ -113,6 +113,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// other tools to detect the extended record.
bool HasSwiftAsyncContext = false;
+ /// True if this function has tile virtual register. This is used to
+ /// determine if we should insert tilerelease in frame lowering.
+ bool HasVirtualTileReg = false;
+
Optional<int> SwiftAsyncContextFrameIdx;
ValueMap<const Value *, size_t> PreallocatedIds;
@@ -198,8 +202,8 @@ public:
bool getUsesRedZone() const { return UsesRedZone; }
void setUsesRedZone(bool V) { UsesRedZone = V; }
- bool hasWinAlloca() const { return HasWinAlloca; }
- void setHasWinAlloca(bool v) { HasWinAlloca = v; }
+ bool hasDynAlloca() const { return HasDynAlloca; }
+ void setHasDynAlloca(bool v) { HasDynAlloca = v; }
bool hasPreallocatedCall() const { return HasPreallocatedCall; }
void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
@@ -207,6 +211,9 @@ public:
bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; }
void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; }
+ bool hasVirtualTileReg() const { return HasVirtualTileReg; }
+ void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; }
+
Optional<int> getSwiftAsyncContextFrameIdx() const {
return SwiftAsyncContextFrameIdx;
}
diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index ab4d2bd05772..6967a96ce83b 100644
--- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -503,9 +503,7 @@ bool X86OptimizeLEAPass::removeRedundantAddrCalc(MemOpMap &LEAs) {
MachineBasicBlock *MBB = (*LEAs.begin()->second.begin())->getParent();
// Process all instructions in basic block.
- for (auto I = MBB->begin(), E = MBB->end(); I != E;) {
- MachineInstr &MI = *I++;
-
+ for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
// Instruction must be load or store.
if (!MI.mayLoadOrStore())
continue;
@@ -655,9 +653,8 @@ bool X86OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
// isReplaceable function.
Register FirstVReg = First.getOperand(0).getReg();
Register LastVReg = Last.getOperand(0).getReg();
- for (auto UI = MRI->use_begin(LastVReg), UE = MRI->use_end();
- UI != UE;) {
- MachineOperand &MO = *UI++;
+ for (MachineOperand &MO :
+ llvm::make_early_inc_range(MRI->use_operands(LastVReg))) {
MachineInstr &MI = *MO.getParent();
if (MI.isDebugValue()) {
diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp
index e10dab72078d..47ae517ae76d 100644
--- a/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -174,12 +174,9 @@ void PadShortFunc::findReturns(MachineBasicBlock *MBB, unsigned int Cycles) {
}
// Follow branches in BB and look for returns
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin();
- I != MBB->succ_end(); ++I) {
- if (*I == MBB)
- continue;
- findReturns(*I, Cycles);
- }
+ for (MachineBasicBlock *Succ : MBB->successors())
+ if (Succ != MBB)
+ findReturns(Succ, Cycles);
}
/// cyclesUntilReturn - return true if the MBB has a return instruction,
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 3844667ccc74..25fcba1a7581 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -142,8 +142,24 @@ def SkylakeServerPfmCounters : ProcPfmCounters {
def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
def : PfmCountersBinding<"cannonlake", SkylakeServerPfmCounters>;
-def : PfmCountersBinding<"icelake-client", SkylakeServerPfmCounters>;
-def : PfmCountersBinding<"icelake-server", SkylakeServerPfmCounters>;
+
+def IceLakePfmCounters : ProcPfmCounters {
+ let CycleCounter = UnhaltedCoreCyclesPfmCounter;
+ let UopsCounter = UopsIssuedPfmCounter;
+ let IssueCounters = [
+ PfmIssueCounter<"ICXPort0", "uops_dispatched_port:port_0">,
+ PfmIssueCounter<"ICXPort1", "uops_dispatched_port:port_1">,
+ PfmIssueCounter<"ICXPort23", "uops_dispatched_port:port_2_3">,
+ PfmIssueCounter<"ICXPort49", "uops_dispatched_port:port_4_9">,
+ PfmIssueCounter<"ICXPort5", "uops_dispatched_port:port_5">,
+ PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">,
+ PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
+ ];
+}
+def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>;
+def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>;
+def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>;
+def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>;
// AMD X86 Counters.
// Set basic counters for AMD cpus that we know libpfm4 supports.
diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index b85a0b61d6f6..5d21f8666ec6 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -25,6 +25,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -235,6 +236,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
const TargetInstrInfo *TII = ST.getInstrInfo();
const TargetRegisterInfo *TRI = ST.getRegisterInfo();
const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID);
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
BitVector AMXRegs(TRI->getNumRegs());
for (unsigned I = 0; I < RC->getNumRegs(); I++)
@@ -294,6 +296,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
// There's no AMX instruction if we didn't find a tile config live in point.
if (CfgNeedInsert.empty())
return false;
+ X86FI->setHasVirtualTileReg(true);
// Avoid to insert ldtilecfg before any shape defs.
SmallVector<MachineBasicBlock *, 8> WorkList;
@@ -323,7 +326,7 @@ bool X86PreTileConfig::runOnMachineFunction(MachineFunction &MF) {
ST.getTileConfigSize(), ST.getTileConfigAlignment(), false);
// Try to insert for the tile config live in points.
- for (auto I : CfgNeedInsert) {
+ for (const auto &I : CfgNeedInsert) {
SmallSet<MIRef, 8> InsertPoints;
SmallVector<MIRef, 8> WorkList({I});
while (!WorkList.empty()) {
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index c4748423baea..130cb61cdde2 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -816,10 +816,10 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg(
return 0;
case TargetOpcode::PATCHABLE_RET:
case X86::RET:
- case X86::RETL:
- case X86::RETQ:
- case X86::RETIL:
- case X86::RETIQ:
+ case X86::RET32:
+ case X86::RET64:
+ case X86::RETI32:
+ case X86::RETI64:
case X86::TCRETURNdi:
case X86::TCRETURNri:
case X86::TCRETURNmi:
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 1ab9d2588a90..d835f452b67e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -373,12 +373,6 @@ def CR15 : X86Reg<"cr15", 15>;
def EIZ : X86Reg<"eiz", 4>;
def RIZ : X86Reg<"riz", 4>;
-// Bound registers, used in MPX instructions
-def BND0 : X86Reg<"bnd0", 0>;
-def BND1 : X86Reg<"bnd1", 1>;
-def BND2 : X86Reg<"bnd2", 2>;
-def BND3 : X86Reg<"bnd3", 3>;
-
// CET registers - Shadow Stack Pointer
def SSP : X86Reg<"ssp", 0>;
@@ -436,6 +430,12 @@ def GR64 : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+// GR64 - 64-bit GPRs without R8 and RIP. Could be used when emitting code for
+// intrinsics, which use implict input registers.
+def GR64NoR8 : RegisterClass<"X86", [i64], 64,
+ (add RAX, RCX, RDX, RSI, RDI, R9, R10, R11,
+ RBX, R14, R15, R12, R13, RBP, RSP)>;
+
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
@@ -567,9 +567,9 @@ def RSTi : RegisterOperand<RST, "printSTiRegOperand">;
// Generic vector registers: VR64 and VR128.
// Ensure that float types are declared first - only float is legal on SSE1.
def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
-def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+def VR128 : RegisterClass<"X86", [v4f32, v2f64, v8f16, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32)>;
-def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+def VR256 : RegisterClass<"X86", [v8f32, v4f64, v16f16, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 15)>;
// Status flags registers.
@@ -587,7 +587,7 @@ def DFCCR : RegisterClass<"X86", [i32], 32, (add DF)> {
}
// AVX-512 vector/mask registers.
-def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v32f16, v64i8, v32i16, v16i32, v8i64],
512, (sequence "ZMM%u", 0, 31)>;
// Represents the lower 16 registers that have VEX/legacy encodable subregs.
@@ -599,10 +599,12 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
+def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)>;
+
// Extended VR128 and VR256 for AVX-512 instructions
-def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64, f128],
+def VR128X : RegisterClass<"X86", [v4f32, v2f64, v8f16, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32X)>;
-def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+def VR256X : RegisterClass<"X86", [v8f32, v4f64, v16f16, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 31)>;
// Mask registers
@@ -632,9 +634,6 @@ def VK16WM : RegisterClass<"X86", [v16i1], 16, (add VK8WM)> {let Size = 16;}
def VK32WM : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
def VK64WM : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
-// Bound registers
-def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
-
// Tiles
let CopyCost = -1 in // Don't allow copying of tile registers
def TILE : RegisterClass<"X86", [x86amx], 8192,
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index d2ced1c67407..2827981b7fb0 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -112,6 +112,25 @@ multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW,
// 2/3/7 cycle to recompute the address.
def : WriteRes<WriteRMW, [BWPort237,BWPort4]>;
+// Loads, stores, and moves, not folded with other operations.
+// Store_addr on 237.
+// Store_data on 4.
+defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1], 1>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def : WriteRes<WriteZero, []>;
+
+// Model the effect of clobbering the read-write mask operand of the GATHER operation.
+// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
+defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
+
// Arithmetic.
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteADC, [BWPort06], 1>; // Integer ALU + flags op.
@@ -123,41 +142,41 @@ defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>;
+defm : BWWriteResPair<WriteMULX32, [BWPort1,BWPort06,BWPort0156], 3, [1,1,1], 3>;
defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : BWWriteResPair<WriteMULX64, [BWPort1,BWPort5], 3, [1,1], 2>;
defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>;
defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
-
-// TODO: Why isn't the BWDivider used consistently?
-defm : X86WriteRes<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10], 1>;
-defm : X86WriteRes<WriteDiv16, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
-defm : X86WriteRes<WriteDiv32, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
-defm : X86WriteRes<WriteDiv64, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
-defm : X86WriteRes<WriteDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
-
-defm : X86WriteRes<WriteIDiv8, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv16, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv32, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv64, [BWPort0, BWDivider], 25, [1,10], 1>;
-defm : X86WriteRes<WriteIDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteIDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteIDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
-defm : X86WriteRes<WriteIDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+def BWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(BWWriteIMulH.Latency, BroadwellModel.LoadLatency);
+}
-defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>;
-defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteBSWAP32, [BWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteCMPXCHG,[BWPort06, BWPort0156], 5, [2, 3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[BWPort23, BWPort06, BWPort0156, BWPort237, BWPort4], 8, [1, 2, 1, 1, 1], 6>;
defm : X86WriteRes<WriteXCHG, [BWPort0156], 2, [3], 3>;
-defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
+// Integer shifts and rotates.
+defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
+defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
+defm : BWWriteResPair<WriteRotate, [BWPort06], 1, [1], 1>;
+defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
-def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
+defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm : BWWriteResPair<WriteJump, [BWPort06], 1>;
+
+defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
defm : BWWriteResPair<WriteCMOV, [BWPort06], 1>; // Conditional move.
defm : X86WriteRes<WriteFCMOV, [BWPort1], 3, [1], 1>; // x87 conditional move.
@@ -176,6 +195,11 @@ defm : X86WriteRes<WriteBitTestSet, [BWPort06], 1, [1], 1>; // Bit Test + S
defm : X86WriteRes<WriteBitTestSetImmLd, [BWPort06,BWPort23], 5, [1,1], 3>;
defm : X86WriteRes<WriteBitTestSetRegLd, [BWPort0156,BWPort23], 5, [1,1], 2>;
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [BWPort15]>;
+
// Bit counts.
defm : BWWriteResPair<WriteBSF, [BWPort1], 3>;
defm : BWWriteResPair<WriteBSR, [BWPort1], 3>;
@@ -183,43 +207,29 @@ defm : BWWriteResPair<WriteLZCNT, [BWPort1], 3>;
defm : BWWriteResPair<WriteTZCNT, [BWPort1], 3>;
defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>;
-// Integer shifts and rotates.
-defm : BWWriteResPair<WriteShift, [BWPort06], 1>;
-defm : BWWriteResPair<WriteShiftCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
-defm : BWWriteResPair<WriteRotate, [BWPort06], 1, [1], 1>;
-defm : BWWriteResPair<WriteRotateCL, [BWPort06,BWPort0156], 3, [2,1], 3>;
-
-// SHLD/SHRD.
-defm : X86WriteRes<WriteSHDrri, [BWPort1], 3, [1], 1>;
-defm : X86WriteRes<WriteSHDrrcl,[BWPort1,BWPort06,BWPort0156], 6, [1, 1, 2], 4>;
-defm : X86WriteRes<WriteSHDmri, [BWPort1,BWPort23,BWPort237,BWPort0156], 9, [1, 1, 1, 1], 4>;
-defm : X86WriteRes<WriteSHDmrcl,[BWPort1,BWPort23,BWPort237,BWPort06,BWPort0156], 11, [1, 1, 1, 1, 2], 6>;
-
// BMI1 BEXTR/BLS, BMI2 BZHI
defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
defm : BWWriteResPair<WriteBLS, [BWPort15], 1>;
defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
-// Loads, stores, and moves, not folded with other operations.
-defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
-defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1], 1>;
-
-// Model the effect of clobbering the read-write mask operand of the GATHER operation.
-// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
-defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
-
-// Idioms that clear a register, like xorps %xmm0, %xmm0.
-// These can often bypass execution ports completely.
-def : WriteRes<WriteZero, []>;
-
-// Treat misc copies as a move.
-def : InstRW<[WriteMove], (instrs COPY)>;
-
-// Branches don't produce values, so they have no latency, but they still
-// consume resources. Indirect branches can fold loads.
-defm : BWWriteResPair<WriteJump, [BWPort06], 1>;
+// TODO: Why isn't the BWDivider used consistently?
+defm : X86WriteRes<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10], 1>;
+defm : X86WriteRes<WriteDiv16, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [BWPort0,BWPort1,BWPort5,BWPort6,BWPort01,BWPort0156], 80, [7,7,3,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 34, [2,2,2,1,1], 8>;
+
+defm : X86WriteRes<WriteIDiv8, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv32, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv64, [BWPort0, BWDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv8Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [BWPort0,BWPort1,BWPort5,BWPort23,BWPort0156], 35, [2,2,2,1,1], 8>;
// Floating point. This covers both scalar and vector operations.
defm : X86WriteRes<WriteFLD0, [BWPort01], 1, [1], 1>;
@@ -245,6 +255,7 @@ defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub.
defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM).
@@ -285,6 +296,16 @@ defm : BWWriteResPair<WriteFDiv64X, [BWPort0,BWFPDivider], 14, [1,8], 1, 5>; //
defm : BWWriteResPair<WriteFDiv64Y, [BWPort0,BWPort015,BWFPDivider], 23, [2,1,16], 3, 6>; // Floating point division (YMM).
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
+defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
+defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
+defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFRcpZ>;
+
+defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
+defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
+defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
+
defm : X86WriteRes<WriteFSqrt, [BWPort0,BWFPDivider], 11, [1,4], 1>; // Floating point square root.
defm : X86WriteRes<WriteFSqrtLd, [BWPort0,BWPort23,BWFPDivider], 16, [1,1,7], 2>;
defm : BWWriteResPair<WriteFSqrtX, [BWPort0,BWFPDivider], 11, [1,7], 1, 5>; // Floating point square root (XMM).
@@ -297,16 +318,6 @@ defm : BWWriteResPair<WriteFSqrt64Y, [BWPort0,BWPort015,BWFPDivider], 29, [2,1,2
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : BWWriteResPair<WriteFSqrt80, [BWPort0,BWFPDivider], 23, [1,9]>; // Floating point long double square root.
-defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate.
-defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM).
-defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM).
-defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-
-defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate.
-defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM).
-defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM).
-defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-
defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add.
defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (XMM).
defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM).
@@ -336,6 +347,8 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends.
defm : BWWriteResPair<WriteFBlendY, [BWPort015], 1, [1], 1, 6>; // Floating point vector blends.
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
+defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends.
defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends.
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
@@ -343,6 +356,48 @@ defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
// FMA Scheduling helper class.
// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+// Conversion between integer and float.
+defm : BWWriteResPair<WriteCvtSS2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
+defm : BWWriteResPair<WriteCvtSD2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
+
+defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
+defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PD, [BWPort1], 4>;
+defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1], 4>;
+defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
+
+defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
+defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
+defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
+defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
+
+defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
+defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
+
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>;
@@ -366,12 +421,6 @@ defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveToGpr, [BWPort0], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveFromGpr, [BWPort5], 1, [1], 1>;
-defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>;
-
-defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
-defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
-defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
-defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicX,[BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor.
defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM).
@@ -379,6 +428,10 @@ defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : BWWriteResPair<WriteVecTest, [BWPort0,BWPort5], 2, [1,1], 2, 5>; // Vector integer TEST instructions.
defm : BWWriteResPair<WriteVecTestY, [BWPort0,BWPort5], 4, [1,1], 2, 6>; // Vector integer TEST instructions (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
+defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM).
+defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulX, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply.
defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply.
@@ -397,6 +450,9 @@ defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : BWWriteResPair<WriteBlend, [BWPort5], 1, [1], 1, 5>; // Vector blends.
defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteBlendZ>;
+defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector shuffles.
+defm : BWWriteResPair<WriteVPMOV256, [BWPort5], 3, [1], 1, 6>; // 256-bit width packed vector width-changing move.
+defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector variable shuffles.
defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends.
defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM).
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
@@ -444,49 +500,7 @@ def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> {
let NumMicroOps = 3;
}
-// Conversion between integer and float.
-defm : BWWriteResPair<WriteCvtSS2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>;
-defm : BWWriteResPair<WriteCvtSD2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>;
-
-defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>;
-defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>;
-defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PD, [BWPort1], 4>;
-defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1], 4>;
-defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>;
-
-defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>;
-defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>;
-defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>;
-defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
-
-defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
-defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
-defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
-defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
-
-defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>;
-defm : X86WriteResUnsupported<WriteCvtPS2PHZ>;
-defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>;
-defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
-
-// Strings instructions.
+// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
def : WriteRes<WritePCmpIStrM, [BWPort0]> {
@@ -542,7 +556,7 @@ def : WriteRes<WriteVecMOVMSK, [BWPort0]> { let Latency = 3; }
def : WriteRes<WriteVecMOVMSKY, [BWPort0]> { let Latency = 3; }
def : WriteRes<WriteMMXMOVMSK, [BWPort0]> { let Latency = 1; }
-// AES instructions.
+// AES Instructions.
def : WriteRes<WriteAESDecEnc, [BWPort5]> { // Decryption, encryption.
let Latency = 7;
let NumMicroOps = 1;
@@ -578,27 +592,19 @@ def : WriteRes<WriteAESKeyGenLd, [BWPort0, BWPort5, BWPort23, BWPort015]> {
// Carry-less multiplication instructions.
defm : BWWriteResPair<WriteCLMul, [BWPort0], 5>;
+// Load/store MXCSR.
+def : WriteRes<WriteLDMXCSR, [BWPort0,BWPort23,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [BWPort4,BWPort5,BWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
// Catch-all for expensive system instructions.
-def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
-
-// AVX2.
-defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector shuffles.
-defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3, [1], 1, 6>; // Fp 256-bit width vector variable shuffles.
-defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector shuffles.
-defm : BWWriteResPair<WriteVPMOV256, [BWPort5], 3, [1], 1, 6>; // 256-bit width packed vector width-changing move.
-defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3, [1], 1, 6>; // 256-bit width vector variable shuffles.
+def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; }
// Old microcoded instructions that nobody use.
-def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
+def : WriteRes<WriteMicrocoded, [BWPort0156]> { let Latency = 100; }
// Fence instructions.
def : WriteRes<WriteFence, [BWPort23, BWPort4]>;
-// Load/store MXCSR.
-def : WriteRes<WriteLDMXCSR, [BWPort0,BWPort23,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [BWPort4,BWPort5,BWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-
// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
@@ -1104,7 +1110,7 @@ def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[BWWriteResGroup84], (instrs LRETQ, RETQ)>;
+def: InstRW<[BWWriteResGroup84], (instrs LRET64, RET64)>;
def BWWriteResGroup87 : SchedWriteRes<[BWPort4,BWPort23,BWPort237,BWPort06]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 99fddcd4b2d5..68961d6245ab 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -117,12 +117,16 @@ multiclass HWWriteResPair<X86FoldableSchedWrite SchedRW,
// 2/3/7 cycle to recompute the address.
def : WriteRes<WriteRMW, [HWPort237,HWPort4]>;
+// Loads, stores, and moves, not folded with other operations.
// Store_addr on 237.
// Store_data on 4.
defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
def : WriteRes<WriteZero, []>;
// Model the effect of clobbering the read-write mask operand of the GATHER operation.
@@ -140,12 +144,17 @@ defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>;
+defm : HWWriteResPair<WriteMULX32, [HWPort1,HWPort06,HWPort0156], 3, [1,1,1], 3>;
defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>;
+defm : HWWriteResPair<WriteMULX64, [HWPort1,HWPort6], 3, [1,1], 2>;
defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>;
defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def HWWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(HWWriteIMulH.Latency, HaswellModel.LoadLatency);
+}
defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>;
@@ -165,11 +174,15 @@ defm : X86WriteRes<WriteSHDrrcl,[HWPort1, HWPort06, HWPort0156], 6, [1, 1, 2], 4
defm : X86WriteRes<WriteSHDmri, [HWPort1, HWPort23, HWPort237, HWPort0156], 10, [1, 1, 1, 1], 4>;
defm : X86WriteRes<WriteSHDmrcl,[HWPort1, HWPort23, HWPort237, HWPort06, HWPort0156], 12, [1, 1, 1, 1, 2], 6>;
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
+
defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
defm : HWWriteResPair<WriteCMOV, [HWPort06,HWPort0156], 2, [1,1], 2>; // Conditional move.
defm : X86WriteRes<WriteFCMOV, [HWPort1], 3, [1], 1>; // x87 conditional move.
+
def : WriteRes<WriteSETCC, [HWPort06]>; // Setcc.
def : WriteRes<WriteSETCCStore, [HWPort06,HWPort4,HWPort237]> {
let Latency = 2;
@@ -220,7 +233,7 @@ defm : X86WriteRes<WriteIDiv16Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>
defm : X86WriteRes<WriteIDiv32Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
defm : X86WriteRes<WriteIDiv64Ld, [HWPort0,HWPort23,HWDivider], 29, [1,1,10], 2>;
-// Scalar and vector floating point.
+// Floating point. This covers both scalar and vector operations.
defm : X86WriteRes<WriteFLD0, [HWPort01], 1, [1], 1>;
defm : X86WriteRes<WriteFLD1, [HWPort01], 1, [2], 2>;
defm : X86WriteRes<WriteFLDC, [HWPort01], 1, [2], 2>;
@@ -305,14 +318,14 @@ defm : HWWriteResPair<WriteFSqrt64Y, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28
defm : HWWriteResPair<WriteFSqrt64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28], 3, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>;
-defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
-defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
-defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
-defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
-defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
-defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
-defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
-defm : HWWriteResPair<WriteDPPSZ, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; // Unsupported = 1
+defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>;
+defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>;
+defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>;
+defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1
+defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>;
+defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>;
+defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>;
+defm : HWWriteResPair<WriteDPPSZ, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; // Unsupported = 1
defm : HWWriteResPair<WriteFSign, [HWPort0], 1>;
defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>;
@@ -593,11 +606,28 @@ def : WriteRes<WriteCLMulLd, [HWPort0, HWPort5, HWPort23]> {
def : WriteRes<WriteLDMXCSR, [HWPort0,HWPort23,HWPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
def : WriteRes<WriteSTMXCSR, [HWPort4,HWPort5,HWPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+// Catch-all for expensive system instructions.
def : WriteRes<WriteSystem, [HWPort0156]> { let Latency = 100; }
+
+// Old microcoded instructions that nobody use.
def : WriteRes<WriteMicrocoded, [HWPort0156]> { let Latency = 100; }
+
+// Fence instructions.
def : WriteRes<WriteFence, [HWPort23, HWPort4]>;
+
+// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
+defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
+defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 5>;
+defm : HWWriteResPair<WritePHAddX, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
+defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
+
//================ Exceptions ================//
//-- Specific Scheduling Models --//
@@ -680,7 +710,7 @@ def HWWriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> {
let NumMicroOps = 4;
let ResourceCycles = [1, 2, 1];
}
-def : InstRW<[HWWriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>;
+def : InstRW<[HWWriteRETI], (instregex "RETI(16|32|64)", "LRETI(16|32|64)")>;
// BOUND.
// r,m.
@@ -821,16 +851,6 @@ def HWWriteFXTRACT : SchedWriteRes<[]> {
}
def : InstRW<[HWWriteFXTRACT], (instrs FXTRACT)>;
-////////////////////////////////////////////////////////////////////////////////
-// Horizontal add/sub instructions.
-////////////////////////////////////////////////////////////////////////////////
-
-defm : HWWriteResPair<WriteFHAdd, [HWPort1, HWPort5], 5, [1,2], 3, 6>;
-defm : HWWriteResPair<WriteFHAddY, [HWPort1, HWPort5], 5, [1,2], 3, 7>;
-defm : HWWriteResPair<WritePHAdd, [HWPort5, HWPort15], 3, [2,1], 3, 5>;
-defm : HWWriteResPair<WritePHAddX, [HWPort5, HWPort15], 3, [2,1], 3, 6>;
-defm : HWWriteResPair<WritePHAddY, [HWPort5, HWPort15], 3, [2,1], 3, 7>;
-
//=== Floating Point XMM and YMM Instructions ===//
// Remaining instrs.
@@ -1168,7 +1188,7 @@ def HWWriteResGroup41 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[HWWriteResGroup41], (instrs LRETQ, RETL, RETQ)>;
+def: InstRW<[HWWriteResGroup41], (instrs LRET64, RET32, RET64)>;
def HWWriteResGroup44 : SchedWriteRes<[HWPort4,HWPort6,HWPort237,HWPort0156]> {
let Latency = 3;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
new file mode 100644
index 000000000000..889b9b7fa666
--- /dev/null
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -0,0 +1,2636 @@
+//=- X86SchedIceLake.td - X86 Ice Lake Scheduling ------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Ice Lake to support
+// instruction scheduling and other instruction cost heuristics.
+//
+// TODO: This is mainly a copy X86SchedSkylakeServer.td, but allows us to
+// iteratively improve scheduling handling toward better modelling the
+// Ice Lake (Sunny/Cypress Cove) microarchitecture.
+//
+//===----------------------------------------------------------------------===//
+
+def IceLakeModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and Ice Lake can
+ // decode 6 instructions per cycle.
+ let IssueWidth = 6;
+ let MicroOpBufferSize = 224; // Based on the reorder buffer.
+ let LoadLatency = 5;
+ let MispredictPenalty = 14;
+
+ // Based on the LSD (loop-stream detector) queue size and benchmarking data.
+ let LoopMicroOpBufferSize = 50;
+
+ // This flag is set to allow the scheduler to assign a default model to
+ // unrecognized opcodes.
+ let CompleteModel = 0;
+}
+
+let SchedModel = IceLakeModel in {
+
+// Ice Lake can issue micro-ops to 8 different ports in one cycle.
+
+// Ports 0, 1, 5, and 6 handle all computation.
+// Ports 4 and 9 gets the data half of stores. Store data can be available later
+// than the store address, but since we don't model the latency of stores, we
+// can ignore that.
+// Ports 2 and 3 are identical. They handle loads and address calculations.
+// Ports 7 and 8 are identical. They handle stores address calculations.
+def ICXPort0 : ProcResource<1>;
+def ICXPort1 : ProcResource<1>;
+def ICXPort2 : ProcResource<1>;
+def ICXPort3 : ProcResource<1>;
+def ICXPort4 : ProcResource<1>;
+def ICXPort5 : ProcResource<1>;
+def ICXPort6 : ProcResource<1>;
+def ICXPort7 : ProcResource<1>;
+def ICXPort8 : ProcResource<1>;
+def ICXPort9 : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def ICXPort01 : ProcResGroup<[ICXPort0, ICXPort1]>;
+def ICXPort23 : ProcResGroup<[ICXPort2, ICXPort3]>;
+def ICXPort237 : ProcResGroup<[ICXPort2, ICXPort3, ICXPort7]>;
+def ICXPort04 : ProcResGroup<[ICXPort0, ICXPort4]>;
+def ICXPort05 : ProcResGroup<[ICXPort0, ICXPort5]>;
+def ICXPort06 : ProcResGroup<[ICXPort0, ICXPort6]>;
+def ICXPort15 : ProcResGroup<[ICXPort1, ICXPort5]>;
+def ICXPort16 : ProcResGroup<[ICXPort1, ICXPort6]>;
+def ICXPort49 : ProcResGroup<[ICXPort4, ICXPort9]>;
+def ICXPort56 : ProcResGroup<[ICXPort5, ICXPort6]>;
+def ICXPort78 : ProcResGroup<[ICXPort7, ICXPort8]>;
+def ICXPort015 : ProcResGroup<[ICXPort0, ICXPort1, ICXPort5]>;
+def ICXPort056 : ProcResGroup<[ICXPort0, ICXPort5, ICXPort6]>;
+def ICXPort0156: ProcResGroup<[ICXPort0, ICXPort1, ICXPort5, ICXPort6]>;
+
+def ICXDivider : ProcResource<1>; // Integer division issued on port 0.
+// FP division and sqrt on port 0.
+def ICXFPDivider : ProcResource<1>;
+
+// 60 Entry Unified Scheduler
+def ICXPortAny : ProcResGroup<[ICXPort0, ICXPort1, ICXPort2, ICXPort3, ICXPort4,
+ ICXPort5, ICXPort6, ICXPort7, ICXPort8, ICXPort9]> {
+ let BufferSize=60;
+}
+
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available until 5
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 5>;
+
+// Vector loads are 5/6/7 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 5/6/7 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 5>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 7>;
+
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 5> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+
+ // Memory variant also uses a cycle on port 2/3 and adds LoadLat cycles to
+ // the latency (default = 5).
+ def : WriteRes<SchedRW.Folded, !listconcat([ICXPort23], ExePorts)> {
+ let Latency = !add(Lat, LoadLat);
+ let ResourceCycles = !listconcat([1], Res);
+ let NumMicroOps = !add(UOps, 1);
+ }
+}
+
+// A folded store needs a cycle on port 4 for the store data, and an extra port
+// 2/3/7 cycle to recompute the address.
+def : WriteRes<WriteRMW, [ICXPort237,ICXPort4]>;
+
+// Arithmetic.
+defm : ICXWriteResPair<WriteALU, [ICXPort0156], 1>; // Simple integer ALU op.
+defm : ICXWriteResPair<WriteADC, [ICXPort06], 1>; // Integer ALU + flags op.
+
+// Integer multiplication.
+defm : ICXWriteResPair<WriteIMul8, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul16, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,2], 4>;
+defm : X86WriteRes<WriteIMul16Imm, [ICXPort1,ICXPort0156], 4, [1,1], 2>;
+defm : X86WriteRes<WriteIMul16ImmLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
+defm : X86WriteRes<WriteIMul16Reg, [ICXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteIMul16RegLd, [ICXPort1,ICXPort0156,ICXPort23], 8, [1,1,1], 3>;
+defm : ICXWriteResPair<WriteIMul32, [ICXPort1,ICXPort06,ICXPort0156], 4, [1,1,1], 3>;
+defm : ICXWriteResPair<WriteMULX32, [ICXPort1,ICXPort06,ICXPort0156], 3, [1,1,1], 3>;
+defm : ICXWriteResPair<WriteIMul32Imm, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul32Reg, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul64, [ICXPort1,ICXPort5], 4, [1,1], 2>;
+defm : ICXWriteResPair<WriteMULX64, [ICXPort1,ICXPort5], 3, [1,1], 2>;
+defm : ICXWriteResPair<WriteIMul64Imm, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteIMul64Reg, [ICXPort1], 3>;
+def ICXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(ICXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
+}
+
+defm : X86WriteRes<WriteBSWAP32, [ICXPort15], 1, [1], 1>;
+defm : X86WriteRes<WriteBSWAP64, [ICXPort06, ICXPort15], 2, [1,1], 2>;
+defm : X86WriteRes<WriteCMPXCHG,[ICXPort06, ICXPort0156], 5, [2,3], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort237,ICXPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteXCHG, [ICXPort0156], 2, [3], 3>;
+
+// TODO: Why isn't the ICXDivider used?
+defm : ICXWriteResPair<WriteDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1, 4>;
+defm : X86WriteRes<WriteDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156], 76, [7,2,8,3,1,11], 32>;
+defm : X86WriteRes<WriteDiv16Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv32Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
+defm : X86WriteRes<WriteDiv64Ld, [ICXPort0,ICXPort23,ICXDivider], 29, [1,1,10], 2>;
+
+defm : X86WriteRes<WriteIDiv8, [ICXPort0, ICXDivider], 25, [1,10], 1>;
+defm : X86WriteRes<WriteIDiv16, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv32, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv64, [ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort06,ICXPort0156], 102, [4,2,4,8,14,34], 66>;
+defm : X86WriteRes<WriteIDiv8Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv16Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv32Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+defm : X86WriteRes<WriteIDiv64Ld, [ICXPort0,ICXPort5,ICXPort23,ICXPort0156], 28, [2,4,1,1], 8>;
+
+defm : ICXWriteResPair<WriteCRC32, [ICXPort1], 3>;
+
+def : WriteRes<WriteLEA, [ICXPort15]>; // LEA instructions can't fold loads.
+
+defm : ICXWriteResPair<WriteCMOV, [ICXPort06], 1, [1], 1>; // Conditional move.
+defm : X86WriteRes<WriteFCMOV, [ICXPort1], 3, [1], 1>; // x87 conditional move.
+def : WriteRes<WriteSETCC, [ICXPort06]>; // Setcc.
+def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort4,ICXPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+defm : X86WriteRes<WriteLAHFSAHF, [ICXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTest, [ICXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [ICXPort06,ICXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [ICXPort0156,ICXPort23], 6, [1,1], 2>;
+defm : X86WriteRes<WriteBitTestSet, [ICXPort06], 1, [1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [ICXPort06,ICXPort23], 5, [1,1], 3>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [ICXPort0156,ICXPort23], 5, [1,1], 2>;
+
+// Integer shifts and rotates.
+defm : ICXWriteResPair<WriteShift, [ICXPort06], 1>;
+defm : ICXWriteResPair<WriteShiftCL, [ICXPort06], 3, [3], 3>;
+defm : ICXWriteResPair<WriteRotate, [ICXPort06], 1, [1], 1>;
+defm : ICXWriteResPair<WriteRotateCL, [ICXPort06], 3, [3], 3>;
+
+// SHLD/SHRD.
+defm : X86WriteRes<WriteSHDrri, [ICXPort1], 3, [1], 1>;
+defm : X86WriteRes<WriteSHDrrcl,[ICXPort1,ICXPort06,ICXPort0156], 6, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort237,ICXPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>;
+
+// Bit counts.
+defm : ICXWriteResPair<WriteBSF, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteBSR, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteLZCNT, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteTZCNT, [ICXPort1], 3>;
+defm : ICXWriteResPair<WritePOPCNT, [ICXPort1], 3>;
+
+// BMI1 BEXTR/BLS, BMI2 BZHI
+defm : ICXWriteResPair<WriteBEXTR, [ICXPort06,ICXPort15], 2, [1,1], 2>;
+defm : ICXWriteResPair<WriteBLS, [ICXPort15], 1>;
+defm : ICXWriteResPair<WriteBZHI, [ICXPort15], 1>;
+
+// Loads, stores, and moves, not folded with other operations.
+defm : X86WriteRes<WriteLoad, [ICXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [ICXPort237, ICXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [ICXPort237, ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [ICXPort0156], 1, [1], 1>;
+
+// Model the effect of clobbering the read-write mask operand of the GATHER operation.
+// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
+defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+def : WriteRes<WriteZero, []>;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm : ICXWriteResPair<WriteJump, [ICXPort06], 1>;
+
+// Floating point. This covers both scalar and vector operations.
+defm : X86WriteRes<WriteFLD0, [ICXPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteFLD1, [ICXPort05], 1, [2], 2>;
+defm : X86WriteRes<WriteFLDC, [ICXPort05], 1, [2], 2>;
+defm : X86WriteRes<WriteFLoad, [ICXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteFLoadX, [ICXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteFLoadY, [ICXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteFMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteFStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteFMoveY, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteEMMS, [ICXPort05,ICXPort0156], 10, [9,1], 10>;
+
+defm : ICXWriteResPair<WriteFAdd, [ICXPort01], 4, [1], 1, 5>; // Floating point add/sub.
+defm : ICXWriteResPair<WriteFAddX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFAddY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFAddZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFAdd64, [ICXPort01], 4, [1], 1, 5>; // Floating point double add/sub.
+defm : ICXWriteResPair<WriteFAdd64X, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFAdd64Y, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFAdd64Z, [ICXPort05], 4, [1], 1, 7>;
+
+defm : ICXWriteResPair<WriteFCmp, [ICXPort01], 4, [1], 1, 5>; // Floating point compare.
+defm : ICXWriteResPair<WriteFCmpX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFCmpY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFCmpZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFCmp64, [ICXPort01], 4, [1], 1, 5>; // Floating point double compare.
+defm : ICXWriteResPair<WriteFCmp64X, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFCmp64Y, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFCmp64Z, [ICXPort05], 4, [1], 1, 7>;
+
+defm : ICXWriteResPair<WriteFCom, [ICXPort0], 2>; // Floating point compare to flags (X87).
+defm : ICXWriteResPair<WriteFComX, [ICXPort0], 2>; // Floating point compare to flags (SSE).
+
+defm : ICXWriteResPair<WriteFMul, [ICXPort01], 4, [1], 1, 5>; // Floating point multiplication.
+defm : ICXWriteResPair<WriteFMulX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFMulY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMulZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMul64, [ICXPort01], 4, [1], 1, 5>; // Floating point double multiplication.
+defm : ICXWriteResPair<WriteFMul64X, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFMul64Y, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMul64Z, [ICXPort05], 4, [1], 1, 7>;
+
+defm : ICXWriteResPair<WriteFDiv, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : ICXWriteResPair<WriteFDivX, [ICXPort0,ICXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles.
+defm : ICXWriteResPair<WriteFDivY, [ICXPort0,ICXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles.
+defm : ICXWriteResPair<WriteFDivZ, [ICXPort0,ICXPort5,ICXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles.
+//defm : ICXWriteResPair<WriteFDiv64, [ICXPort0,ICXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division.
+//defm : ICXWriteResPair<WriteFDiv64X, [ICXPort0,ICXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles.
+//defm : ICXWriteResPair<WriteFDiv64Y, [ICXPort0,ICXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles.
+defm : ICXWriteResPair<WriteFDiv64Z, [ICXPort0,ICXPort5,ICXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles.
+
+defm : ICXWriteResPair<WriteFSqrt, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 5>; // Floating point square root.
+defm : ICXWriteResPair<WriteFSqrtX, [ICXPort0,ICXFPDivider], 12, [1,3], 1, 6>;
+defm : ICXWriteResPair<WriteFSqrtY, [ICXPort0,ICXFPDivider], 12, [1,6], 1, 7>;
+defm : ICXWriteResPair<WriteFSqrtZ, [ICXPort0,ICXPort5,ICXFPDivider], 20, [2,1,12], 3, 7>;
+defm : ICXWriteResPair<WriteFSqrt64, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root.
+defm : ICXWriteResPair<WriteFSqrt64X, [ICXPort0,ICXFPDivider], 18, [1,6], 1, 6>;
+defm : ICXWriteResPair<WriteFSqrt64Y, [ICXPort0,ICXFPDivider], 18, [1,12],1, 7>;
+defm : ICXWriteResPair<WriteFSqrt64Z, [ICXPort0,ICXPort5,ICXFPDivider], 32, [2,1,24], 3, 7>;
+defm : ICXWriteResPair<WriteFSqrt80, [ICXPort0,ICXFPDivider], 21, [1,7]>; // Floating point long double square root.
+
+defm : ICXWriteResPair<WriteFRcp, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate.
+defm : ICXWriteResPair<WriteFRcpX, [ICXPort0], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFRcpY, [ICXPort0], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFRcpZ, [ICXPort0,ICXPort5], 4, [2,1], 3, 7>;
+
+defm : ICXWriteResPair<WriteFRsqrt, [ICXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate.
+defm : ICXWriteResPair<WriteFRsqrtX,[ICXPort0], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFRsqrtY,[ICXPort0], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFRsqrtZ,[ICXPort0,ICXPort5], 9, [2,1], 3, 7>;
+
+defm : ICXWriteResPair<WriteFMA, [ICXPort01], 4, [1], 1, 5>; // Fused Multiply Add.
+defm : ICXWriteResPair<WriteFMAX, [ICXPort01], 4, [1], 1, 6>;
+defm : ICXWriteResPair<WriteFMAY, [ICXPort01], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFMAZ, [ICXPort05], 4, [1], 1, 7>;
+defm : ICXWriteResPair<WriteDPPD, [ICXPort5,ICXPort015], 9, [1,2], 3, 6>; // Floating point double dot product.
+defm : ICXWriteResPair<WriteDPPS, [ICXPort5,ICXPort015], 13, [1,3], 4, 6>;
+defm : ICXWriteResPair<WriteDPPSY,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>;
+defm : ICXWriteResPair<WriteDPPSZ,[ICXPort5,ICXPort015], 13, [1,3], 4, 7>;
+defm : ICXWriteResPair<WriteFSign, [ICXPort0], 1>; // Floating point fabs/fchs.
+defm : ICXWriteResPair<WriteFRnd, [ICXPort01], 8, [2], 2, 6>; // Floating point rounding.
+defm : ICXWriteResPair<WriteFRndY, [ICXPort01], 8, [2], 2, 7>;
+defm : ICXWriteResPair<WriteFRndZ, [ICXPort05], 8, [2], 2, 7>;
+defm : ICXWriteResPair<WriteFLogic, [ICXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals.
+defm : ICXWriteResPair<WriteFLogicY, [ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFLogicZ, [ICXPort05], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFTest, [ICXPort0], 2, [1], 1, 6>; // Floating point TEST instructions.
+defm : ICXWriteResPair<WriteFTestY, [ICXPort0], 2, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFTestZ, [ICXPort0], 2, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector shuffles.
+defm : ICXWriteResPair<WriteFShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarShuffle, [ICXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles.
+defm : ICXWriteResPair<WriteFVarShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFBlend, [ICXPort015], 1, [1], 1, 6>; // Floating point vector blends.
+defm : ICXWriteResPair<WriteFBlendY,[ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFBlendZ,[ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteFVarBlend, [ICXPort015], 2, [2], 2, 6>; // Fp vector variable blends.
+defm : ICXWriteResPair<WriteFVarBlendY,[ICXPort015], 2, [2], 2, 7>;
+defm : ICXWriteResPair<WriteFVarBlendZ,[ICXPort015], 2, [2], 2, 7>;
+
+// FMA Scheduling helper class.
+// class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
+
+// Vector integer operations.
+defm : X86WriteRes<WriteVecLoad, [ICXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecLoadX, [ICXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadY, [ICXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [ICXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [ICXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>;
+defm : X86WriteRes<WriteVecMoveToGpr, [ICXPort0], 2, [1], 1>;
+defm : X86WriteRes<WriteVecMoveFromGpr, [ICXPort5], 1, [1], 1>;
+
+defm : ICXWriteResPair<WriteVecALU, [ICXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals.
+defm : ICXWriteResPair<WriteVecALUX, [ICXPort01], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVecALUY, [ICXPort01], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecALUZ, [ICXPort0], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecLogic, [ICXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor.
+defm : ICXWriteResPair<WriteVecLogicX,[ICXPort015], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVecLogicY,[ICXPort015], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecLogicZ,[ICXPort05], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecTest, [ICXPort0,ICXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions.
+defm : ICXWriteResPair<WriteVecTestY, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
+defm : ICXWriteResPair<WriteVecTestZ, [ICXPort0,ICXPort5], 3, [1,1], 2, 7>;
+defm : ICXWriteResPair<WriteVecIMul, [ICXPort0], 5, [1], 1, 5>; // Vector integer multiply.
+defm : ICXWriteResPair<WriteVecIMulX, [ICXPort01], 5, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVecIMulY, [ICXPort01], 5, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecIMulZ, [ICXPort05], 5, [1], 1, 7>;
+defm : ICXWriteResPair<WritePMULLD, [ICXPort01], 10, [2], 2, 6>; // Vector PMULLD.
+defm : ICXWriteResPair<WritePMULLDY, [ICXPort01], 10, [2], 2, 7>;
+defm : ICXWriteResPair<WritePMULLDZ, [ICXPort05], 10, [2], 2, 7>;
+defm : ICXWriteResPair<WriteShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector shuffles.
+defm : ICXWriteResPair<WriteShuffleX, [ICXPort5], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarShuffle, [ICXPort5], 1, [1], 1, 5>; // Vector variable shuffles.
+defm : ICXWriteResPair<WriteVarShuffleX, [ICXPort5], 1, [1], 1, 6>;
+defm : ICXWriteResPair<WriteVarShuffleY, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarShuffleZ, [ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteBlend, [ICXPort5], 1, [1], 1, 6>; // Vector blends.
+defm : ICXWriteResPair<WriteBlendY,[ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteBlendZ,[ICXPort5], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarBlend, [ICXPort015], 2, [2], 2, 6>; // Vector variable blends.
+defm : ICXWriteResPair<WriteVarBlendY,[ICXPort015], 2, [2], 2, 6>;
+defm : ICXWriteResPair<WriteVarBlendZ,[ICXPort05], 2, [1], 1, 6>;
+defm : ICXWriteResPair<WriteMPSAD, [ICXPort5], 4, [2], 2, 6>; // Vector MPSAD.
+defm : ICXWriteResPair<WriteMPSADY, [ICXPort5], 4, [2], 2, 7>;
+defm : ICXWriteResPair<WriteMPSADZ, [ICXPort5], 4, [2], 2, 7>;
+defm : ICXWriteResPair<WritePSADBW, [ICXPort5], 3, [1], 1, 5>; // Vector PSADBW.
+defm : ICXWriteResPair<WritePSADBWX, [ICXPort5], 3, [1], 1, 6>;
+defm : ICXWriteResPair<WritePSADBWY, [ICXPort5], 3, [1], 1, 7>;
+defm : ICXWriteResPair<WritePSADBWZ, [ICXPort5], 3, [1], 1, 7>;
+defm : ICXWriteResPair<WritePHMINPOS, [ICXPort0], 4, [1], 1, 6>; // Vector PHMINPOS.
+
+// Vector integer shifts.
+defm : ICXWriteResPair<WriteVecShift, [ICXPort0], 1, [1], 1, 5>;
+defm : X86WriteRes<WriteVecShiftX, [ICXPort5,ICXPort01], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftY, [ICXPort5,ICXPort01], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZ, [ICXPort5,ICXPort0], 4, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftXLd, [ICXPort01,ICXPort23], 7, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftYLd, [ICXPort01,ICXPort23], 8, [1,1], 2>;
+defm : X86WriteRes<WriteVecShiftZLd, [ICXPort0,ICXPort23], 8, [1,1], 2>;
+
+defm : ICXWriteResPair<WriteVecShiftImm, [ICXPort0], 1, [1], 1, 5>;
+defm : ICXWriteResPair<WriteVecShiftImmX, [ICXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts.
+defm : ICXWriteResPair<WriteVecShiftImmY, [ICXPort01], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVecShiftImmZ, [ICXPort0], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarVecShift, [ICXPort01], 1, [1], 1, 6>; // Variable vector shifts.
+defm : ICXWriteResPair<WriteVarVecShiftY, [ICXPort01], 1, [1], 1, 7>;
+defm : ICXWriteResPair<WriteVarVecShiftZ, [ICXPort0], 1, [1], 1, 7>;
+
+// Vector insert/extract operations.
+def : WriteRes<WriteVecInsert, [ICXPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteVecInsertLd, [ICXPort5,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def: InstRW<[WriteVecInsertLd], (instregex "(V?)MOV(H|L)(PD|PS)rm")>;
+
+def : WriteRes<WriteVecExtract, [ICXPort0,ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : WriteRes<WriteVecExtractSt, [ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+// Conversion between integer and float.
+defm : ICXWriteResPair<WriteCvtSS2I, [ICXPort01], 6, [2], 2>; // Needs more work: DD vs DQ.
+defm : ICXWriteResPair<WriteCvtPS2I, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPS2IY, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPS2IZ, [ICXPort05], 3>;
+defm : ICXWriteResPair<WriteCvtSD2I, [ICXPort01], 6, [2], 2>;
+defm : ICXWriteResPair<WriteCvtPD2I, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPD2IY, [ICXPort01], 3>;
+defm : ICXWriteResPair<WriteCvtPD2IZ, [ICXPort05], 3>;
+
+defm : ICXWriteResPair<WriteCvtI2SS, [ICXPort1], 4>;
+defm : ICXWriteResPair<WriteCvtI2PS, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PSY, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PSZ, [ICXPort05], 4>; // Needs more work: DD vs DQ.
+defm : ICXWriteResPair<WriteCvtI2SD, [ICXPort1], 4>;
+defm : ICXWriteResPair<WriteCvtI2PD, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PDY, [ICXPort01], 4>;
+defm : ICXWriteResPair<WriteCvtI2PDZ, [ICXPort05], 4>;
+
+defm : ICXWriteResPair<WriteCvtSS2SD, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPS2PD, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPS2PDY, [ICXPort5,ICXPort01], 3, [1,1], 2>;
+defm : ICXWriteResPair<WriteCvtPS2PDZ, [ICXPort05], 3, [2], 2>;
+defm : ICXWriteResPair<WriteCvtSD2SS, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPD2PS, [ICXPort1], 3>;
+defm : ICXWriteResPair<WriteCvtPD2PSY, [ICXPort5,ICXPort01], 3, [1,1], 2>;
+defm : ICXWriteResPair<WriteCvtPD2PSZ, [ICXPort05], 3, [2], 2>;
+
+defm : X86WriteRes<WriteCvtPH2PS, [ICXPort5,ICXPort01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZ, [ICXPort5,ICXPort0], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [ICXPort23,ICXPort01], 9, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [ICXPort23,ICXPort01], 10, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSZLd, [ICXPort23,ICXPort05], 10, [1,1], 2>;
+
+defm : X86WriteRes<WriteCvtPS2PH, [ICXPort5,ICXPort01], 5, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHZ, [ICXPort5,ICXPort05], 7, [1,1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 8, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort05], 8, [1,1,1,1], 4>;
+
+// Strings instructions.
+
+// Packed Compare Implicit Length Strings, Return Mask
+def : WriteRes<WritePCmpIStrM, [ICXPort0]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def : WriteRes<WritePCmpIStrMLd, [ICXPort0, ICXPort23]> {
+ let Latency = 16;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3,1];
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+def : WriteRes<WritePCmpEStrM, [ICXPort0, ICXPort5, ICXPort015, ICXPort0156]> {
+ let Latency = 19;
+ let NumMicroOps = 9;
+ let ResourceCycles = [4,3,1,1];
+}
+def : WriteRes<WritePCmpEStrMLd, [ICXPort0, ICXPort5, ICXPort23, ICXPort015, ICXPort0156]> {
+ let Latency = 25;
+ let NumMicroOps = 10;
+ let ResourceCycles = [4,3,1,1,1];
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+def : WriteRes<WritePCmpIStrI, [ICXPort0]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def : WriteRes<WritePCmpIStrILd, [ICXPort0, ICXPort23]> {
+ let Latency = 16;
+ let NumMicroOps = 4;
+ let ResourceCycles = [3,1];
+}
+
+// Packed Compare Explicit Length Strings, Return Index
+def : WriteRes<WritePCmpEStrI, [ICXPort0,ICXPort5,ICXPort0156]> {
+ let Latency = 18;
+ let NumMicroOps = 8;
+ let ResourceCycles = [4,3,1];
+}
+def : WriteRes<WritePCmpEStrILd, [ICXPort0, ICXPort5, ICXPort23, ICXPort0156]> {
+ let Latency = 24;
+ let NumMicroOps = 9;
+ let ResourceCycles = [4,3,1,1];
+}
+
+// MOVMSK Instructions.
+def : WriteRes<WriteFMOVMSK, [ICXPort0]> { let Latency = 2; }
+def : WriteRes<WriteVecMOVMSK, [ICXPort0]> { let Latency = 2; }
+def : WriteRes<WriteVecMOVMSKY, [ICXPort0]> { let Latency = 2; }
+def : WriteRes<WriteMMXMOVMSK, [ICXPort0]> { let Latency = 2; }
+
+// AES instructions.
+def : WriteRes<WriteAESDecEnc, [ICXPort0]> { // Decryption, encryption.
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteAESDecEncLd, [ICXPort0, ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+
+def : WriteRes<WriteAESIMC, [ICXPort0]> { // InvMixColumn.
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def : WriteRes<WriteAESIMCLd, [ICXPort0, ICXPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+
+def : WriteRes<WriteAESKeyGen, [ICXPort0,ICXPort5,ICXPort015]> { // Key Generation.
+ let Latency = 20;
+ let NumMicroOps = 11;
+ let ResourceCycles = [3,6,2];
+}
+def : WriteRes<WriteAESKeyGenLd, [ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 25;
+ let NumMicroOps = 11;
+ let ResourceCycles = [3,6,1,1];
+}
+
+// Carry-less multiplication instructions.
+def : WriteRes<WriteCLMul, [ICXPort5]> {
+ let Latency = 6;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def : WriteRes<WriteCLMulLd, [ICXPort5, ICXPort23]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+
+// Catch-all for expensive system instructions.
+def : WriteRes<WriteSystem, [ICXPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
+
+// AVX2.
+defm : ICXWriteResPair<WriteFShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector shuffles.
+defm : ICXWriteResPair<WriteFVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // Fp 256-bit width vector variable shuffles.
+defm : ICXWriteResPair<WriteShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector shuffles.
+defm : ICXWriteResPair<WriteVPMOV256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width packed vector width-changing move.
+defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bit width vector variable shuffles.
+
+// Old microcoded instructions that nobody use.
+def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
+
+// Fence instructions.
+def : WriteRes<WriteFence, [ICXPort23, ICXPort4]>;
+
+// Load/store MXCSR.
+def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [ICXPort4,ICXPort5,ICXPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+
+// Nop, not very useful expect it provides a model for nops!
+def : WriteRes<WriteNop, []>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Horizontal add/sub instructions.
+////////////////////////////////////////////////////////////////////////////////
+
+defm : ICXWriteResPair<WriteFHAdd, [ICXPort5,ICXPort015], 6, [2,1], 3, 6>;
+defm : ICXWriteResPair<WriteFHAddY, [ICXPort5,ICXPort015], 6, [2,1], 3, 7>;
+defm : ICXWriteResPair<WritePHAdd, [ICXPort5,ICXPort05], 3, [2,1], 3, 5>;
+defm : ICXWriteResPair<WritePHAddX, [ICXPort5,ICXPort015], 3, [2,1], 3, 6>;
+defm : ICXWriteResPair<WritePHAddY, [ICXPort5,ICXPort015], 3, [2,1], 3, 7>;
+
+// Remaining instrs.
+
+def ICXWriteResGroup1 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup1], (instregex "KAND(B|D|Q|W)rr",
+ "KANDN(B|D|Q|W)rr",
+ "KMOV(B|D|Q|W)kk",
+ "KNOT(B|D|Q|W)rr",
+ "KOR(B|D|Q|W)rr",
+ "KXNOR(B|D|Q|W)rr",
+ "KXOR(B|D|Q|W)rr",
+ "KSET0(B|D|Q|W)", // Same as KXOR
+ "KSET1(B|D|Q|W)", // Same as KXNOR
+ "MMX_PADDS(B|W)irr",
+ "MMX_PADDUS(B|W)irr",
+ "MMX_PAVG(B|W)irr",
+ "MMX_PCMPEQ(B|D|W)irr",
+ "MMX_PCMPGT(B|D|W)irr",
+ "MMX_P(MAX|MIN)SWirr",
+ "MMX_P(MAX|MIN)UBirr",
+ "MMX_PSUBS(B|W)irr",
+ "MMX_PSUBUS(B|W)irr",
+ "VPMOVB2M(Z|Z128|Z256)rr",
+ "VPMOVD2M(Z|Z128|Z256)rr",
+ "VPMOVQ2M(Z|Z128|Z256)rr",
+ "VPMOVW2M(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup3 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup3], (instregex "COM(P?)_FST0r",
+ "KMOV(B|D|Q|W)kr",
+ "UCOM_F(P?)r")>;
+
+def ICXWriteResGroup4 : SchedWriteRes<[ICXPort6]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup4], (instregex "JMP(16|32|64)r")>;
+
+def ICXWriteResGroup6 : SchedWriteRes<[ICXPort05]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup6], (instrs FINCSTP, FNOP)>;
+
+def ICXWriteResGroup7 : SchedWriteRes<[ICXPort06]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>;
+
+def ICXWriteResGroup8 : SchedWriteRes<[ICXPort15]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup8], (instregex "ANDN(32|64)rr")>;
+
+def ICXWriteResGroup9 : SchedWriteRes<[ICXPort015]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr",
+ "VBLENDMPS(Z128|Z256)rr",
+ "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr",
+ "(V?)PADD(B|D|Q|W)rr",
+ "VPBLENDD(Y?)rri",
+ "VPBLENDMB(Z128|Z256)rr",
+ "VPBLENDMD(Z128|Z256)rr",
+ "VPBLENDMQ(Z128|Z256)rr",
+ "VPBLENDMW(Z128|Z256)rr",
+ "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk",
+ "VPTERNLOGD(Z|Z128|Z256)rri",
+ "VPTERNLOGQ(Z|Z128|Z256)rri")>;
+
+def ICXWriteResGroup10 : SchedWriteRes<[ICXPort0156]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup10], (instrs CBW, CWDE, CDQE,
+ CMC, STC,
+ SGDT64m,
+ SIDT64m,
+ SMSW16m,
+ STRm,
+ SYSCALL)>;
+
+def ICXWriteResGroup11 : SchedWriteRes<[ICXPort4,ICXPort237]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup11], (instrs FBSTPm, VMPTRSTm)>;
+def: InstRW<[ICXWriteResGroup11], (instregex "KMOV(B|D|Q|W)mk",
+ "ST_FP(32|64|80)m")>;
+
+def ICXWriteResGroup13 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup13], (instrs MMX_MOVQ2DQrr)>;
+
+def ICXWriteResGroup14 : SchedWriteRes<[ICXPort05]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup14], (instrs FDECSTP,
+ MMX_MOVDQ2Qrr)>;
+
+def ICXWriteResGroup17 : SchedWriteRes<[ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup17], (instrs LFENCE,
+ WAIT,
+ XGETBV)>;
+
+def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>;
+
+def ICXWriteResGroup21 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup21], (instrs SFENCE)>;
+
+def ICXWriteResGroup23 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup23], (instrs CWD,
+ JCXZ, JECXZ, JRCXZ,
+ ADC8i8, SBB8i8,
+ ADC16i16, SBB16i16,
+ ADC32i32, SBB32i32,
+ ADC64i32, SBB64i32)>;
+
+def ICXWriteResGroup25 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>;
+
+def ICXWriteResGroup27 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
+
+def ICXWriteResGroup28 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
+ STOSB, STOSL, STOSQ, STOSW)>;
+def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
+
+def ICXWriteResGroup29 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
+ let Latency = 2;
+ let NumMicroOps = 5;
+ let ResourceCycles = [2,2,1];
+}
+def: InstRW<[ICXWriteResGroup29], (instregex "VMOVDQU8Zmr(b?)")>;
+
+def ICXWriteResGroup30 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk",
+ "KORTEST(B|D|Q|W)rr",
+ "KTEST(B|D|Q|W)rr")>;
+
+def ICXWriteResGroup31 : SchedWriteRes<[ICXPort1]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup31], (instregex "PDEP(32|64)rr",
+ "PEXT(32|64)rr")>;
+
+def ICXWriteResGroup32 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup32], (instrs VPSADBWZrr)>; // TODO: 512-bit ops require ports 0/1 to be joined.
+def: InstRW<[ICXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)",
+ "VALIGND(Z|Z128|Z256)rri",
+ "VALIGNQ(Z|Z128|Z256)rri",
+ "VDBPSADBWZrri", // TODO: 512-bit ops require ports 0/1 to be joined.
+ "VPBROADCAST(B|W)rr",
+ "VP(MAX|MIN)(S|U)Q(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup33 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup33], (instregex "KADD(B|D|Q|W)rr",
+ "KSHIFTL(B|D|Q|W)ri",
+ "KSHIFTR(B|D|Q|W)ri",
+ "KUNPCK(BW|DQ|WD)rr",
+ "VCMPPD(Z|Z128|Z256)rri",
+ "VCMPPS(Z|Z128|Z256)rri",
+ "VCMP(SD|SS)Zrr",
+ "VFPCLASS(PD|PS)(Z|Z128|Z256)rr",
+ "VFPCLASS(SD|SS)Zrr",
+ "VPCMPB(Z|Z128|Z256)rri",
+ "VPCMPD(Z|Z128|Z256)rri",
+ "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr",
+ "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr",
+ "VPCMPQ(Z|Z128|Z256)rri",
+ "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri",
+ "VPCMPW(Z|Z128|Z256)rri",
+ "VPTEST(N?)M(B|D|Q|W)(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup34 : SchedWriteRes<[ICXPort0,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup34], (instrs FNSTSW16r)>;
+
+def ICXWriteResGroup37 : SchedWriteRes<[ICXPort0,ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup37], (instregex "MMX_PH(ADD|SUB)SWrr")>;
+
+def ICXWriteResGroup38 : SchedWriteRes<[ICXPort5,ICXPort01]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup38], (instregex "(V?)PH(ADD|SUB)SW(Y?)rr")>;
+
+def ICXWriteResGroup41 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup41], (instrs MMX_PACKSSDWirr,
+ MMX_PACKSSWBirr,
+ MMX_PACKUSWBirr)>;
+
+def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>;
+
+def ICXWriteResGroup43 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup43], (instrs MFENCE)>;
+
+def ICXWriteResGroup44 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup44], (instregex "RCL(8|16|32|64)r(1|i)",
+ "RCR(8|16|32|64)r(1|i)")>;
+
+def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>;
+
+def ICXWriteResGroup47 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
+
+def ICXWriteResGroup48 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 3;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup48], (instrs CALL64pcrel32)>;
+
+def ICXWriteResGroup49 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>;
+
+def ICXWriteResGroup50 : SchedWriteRes<[ICXPort01]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z128|Z256)rr",
+ "(V?)CVTDQ2PSrr",
+ "VCVTPD2QQ(Z128|Z256)rr",
+ "VCVTPD2UQQ(Z128|Z256)rr",
+ "VCVTPS2DQ(Y|Z128|Z256)rr",
+ "(V?)CVTPS2DQrr",
+ "VCVTPS2UDQ(Z128|Z256)rr",
+ "VCVTQQ2PD(Z128|Z256)rr",
+ "VCVTTPD2QQ(Z128|Z256)rr",
+ "VCVTTPD2UQQ(Z128|Z256)rr",
+ "VCVTTPS2DQ(Z128|Z256)rr",
+ "(V?)CVTTPS2DQrr",
+ "VCVTTPS2UDQ(Z128|Z256)rr",
+ "VCVTUDQ2PS(Z128|Z256)rr",
+ "VCVTUQQ2PD(Z128|Z256)rr")>;
+
+def ICXWriteResGroup50z : SchedWriteRes<[ICXPort05]> {
+ let Latency = 4;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup50z], (instrs VCVTDQ2PSZrr,
+ VCVTPD2QQZrr,
+ VCVTPD2UQQZrr,
+ VCVTPS2DQZrr,
+ VCVTPS2UDQZrr,
+ VCVTQQ2PDZrr,
+ VCVTTPD2QQZrr,
+ VCVTTPD2UQQZrr,
+ VCVTTPS2DQZrr,
+ VCVTTPS2UDQZrr,
+ VCVTUDQ2PSZrr,
+ VCVTUQQ2PDZrr)>;
+
+def ICXWriteResGroup51 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
+ "VEXPANDPS(Z|Z128|Z256)rr",
+ "VPEXPANDD(Z|Z128|Z256)rr",
+ "VPEXPANDQ(Z|Z128|Z256)rr",
+ "VPMOVDB(Z|Z128|Z256)rr",
+ "VPMOVDW(Z|Z128|Z256)rr",
+ "VPMOVQB(Z|Z128|Z256)rr",
+ "VPMOVQW(Z|Z128|Z256)rr",
+ "VPMOVSDB(Z|Z128|Z256)rr",
+ "VPMOVSDW(Z|Z128|Z256)rr",
+ "VPMOVSQB(Z|Z128|Z256)rr",
+ "VPMOVSQD(Z|Z128|Z256)rr",
+ "VPMOVSQW(Z|Z128|Z256)rr",
+ "VPMOVSWB(Z|Z128|Z256)rr",
+ "VPMOVUSDB(Z|Z128|Z256)rr",
+ "VPMOVUSDW(Z|Z128|Z256)rr",
+ "VPMOVUSQB(Z|Z128|Z256)rr",
+ "VPMOVUSQD(Z|Z128|Z256)rr",
+ "VPMOVUSWB(Z|Z128|Z256)rr",
+ "VPMOVWB(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup54 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup54], (instregex "IST(T?)_FP(16|32|64)m",
+ "IST_F(16|32)m",
+ "VPMOVQD(Z|Z128|Z256)mr(b?)")>;
+
+def ICXWriteResGroup55 : SchedWriteRes<[ICXPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [4];
+}
+def: InstRW<[ICXWriteResGroup55], (instrs FNCLEX)>;
+
+def ICXWriteResGroup56 : SchedWriteRes<[]> {
+ let Latency = 0;
+ let NumMicroOps = 4;
+ let ResourceCycles = [];
+}
+def: InstRW<[ICXWriteResGroup56], (instrs VZEROUPPER)>;
+
+def ICXWriteResGroup57 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort0156]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,2];
+}
+def: InstRW<[ICXWriteResGroup57], (instregex "LAR(16|32|64)rr")>;
+
+def ICXWriteResGroup58 : SchedWriteRes<[ICXPort23]> {
+ let Latency = 5;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup58], (instregex "MOVSX(16|32|64)rm(8|16|32)",
+ "MOVZX(16|32|64)rm(8|16)",
+ "(V?)MOVDDUPrm")>; // TODO: Should this be ICXWriteResGroup71?
+
+def ICXWriteResGroup61 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIirr",
+ "MMX_CVT(T?)PS2PIirr",
+ "VCVTDQ2PDZ128rr",
+ "VCVTPD2DQZ128rr",
+ "(V?)CVT(T?)PD2DQrr",
+ "VCVTPD2PSZ128rr",
+ "(V?)CVTPD2PSrr",
+ "VCVTPD2UDQZ128rr",
+ "VCVTPS2PDZ128rr",
+ "(V?)CVTPS2PDrr",
+ "VCVTPS2QQZ128rr",
+ "VCVTPS2UQQZ128rr",
+ "VCVTQQ2PSZ128rr",
+ "(V?)CVTSD2SS(Z?)rr",
+ "(V?)CVTSI(64)?2SDrr",
+ "VCVTSI2SSZrr",
+ "(V?)CVTSI2SSrr",
+ "VCVTSI(64)?2SDZrr",
+ "VCVTSS2SDZrr",
+ "(V?)CVTSS2SDrr",
+ "VCVTTPD2DQZ128rr",
+ "VCVTTPD2UDQZ128rr",
+ "VCVTTPS2QQZ128rr",
+ "VCVTTPS2UQQZ128rr",
+ "VCVTUDQ2PDZ128rr",
+ "VCVTUQQ2PSZ128rr",
+ "VCVTUSI2SSZrr",
+ "VCVTUSI(64)?2SDZrr")>;
+
+def ICXWriteResGroup62 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup62], (instregex "VPCONFLICTQZ128rr")>;
+
+def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>;
+
+def ICXWriteResGroup65 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort015]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
+ "VCVTPS2PHZ256mr(b?)",
+ "VCVTPS2PHZmr(b?)")>;
+
+def ICXWriteResGroup66 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[ICXWriteResGroup66], (instregex "VPMOVDB(Z|Z128|Z256)mr(b?)",
+ "VPMOVDW(Z|Z128|Z256)mr(b?)",
+ "VPMOVQB(Z|Z128|Z256)mr(b?)",
+ "VPMOVQW(Z|Z128|Z256)mr(b?)",
+ "VPMOVSDB(Z|Z128|Z256)mr(b?)",
+ "VPMOVSDW(Z|Z128|Z256)mr(b?)",
+ "VPMOVSQB(Z|Z128|Z256)mr(b?)",
+ "VPMOVSQD(Z|Z128|Z256)mr(b?)",
+ "VPMOVSQW(Z|Z128|Z256)mr(b?)",
+ "VPMOVSWB(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSDB(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSDW(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSQB(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSQD(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSQW(Z|Z128|Z256)mr(b?)",
+ "VPMOVUSWB(Z|Z128|Z256)mr(b?)",
+ "VPMOVWB(Z|Z128|Z256)mr(b?)")>;
+
+def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,4];
+}
+def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>;
+
+def ICXWriteResGroup69 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,4];
+}
+def: InstRW<[ICXWriteResGroup69], (instregex "PUSHF(16|64)")>;
+
+def ICXWriteResGroup71 : SchedWriteRes<[ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup71], (instrs VBROADCASTSSrm,
+ VPBROADCASTDrm,
+ VPBROADCASTQrm,
+ VMOVSHDUPrm,
+ VMOVSLDUPrm,
+ MOVSHDUPrm,
+ MOVSLDUPrm)>;
+
+def ICXWriteResGroup72 : SchedWriteRes<[ICXPort5]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup72], (instrs MMX_CVTPI2PSirr)>;
+def: InstRW<[ICXWriteResGroup72], (instregex "VCOMPRESSPD(Z|Z128|Z256)rr",
+ "VCOMPRESSPS(Z|Z128|Z256)rr",
+ "VPCOMPRESSD(Z|Z128|Z256)rr",
+ "VPCOMPRESSQ(Z|Z128|Z256)rr",
+ "VPERMW(Z|Z128|Z256)rr")>;
+
+def ICXWriteResGroup73 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup73], (instrs MMX_PADDSBirm,
+ MMX_PADDSWirm,
+ MMX_PADDUSBirm,
+ MMX_PADDUSWirm,
+ MMX_PAVGBirm,
+ MMX_PAVGWirm,
+ MMX_PCMPEQBirm,
+ MMX_PCMPEQDirm,
+ MMX_PCMPEQWirm,
+ MMX_PCMPGTBirm,
+ MMX_PCMPGTDirm,
+ MMX_PCMPGTWirm,
+ MMX_PMAXSWirm,
+ MMX_PMAXUBirm,
+ MMX_PMINSWirm,
+ MMX_PMINUBirm,
+ MMX_PSUBSBirm,
+ MMX_PSUBSWirm,
+ MMX_PSUBUSBirm,
+ MMX_PSUBUSWirm)>;
+
+def ICXWriteResGroup76 : SchedWriteRes<[ICXPort6,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup76], (instrs FARJMP64m)>;
+def: InstRW<[ICXWriteResGroup76], (instregex "JMP(16|32|64)m")>;
+
+def ICXWriteResGroup79 : SchedWriteRes<[ICXPort23,ICXPort15]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup79], (instregex "ANDN(32|64)rm",
+ "MOVBE(16|32|64)rm")>;
+
+def ICXWriteResGroup80 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup80], (instregex "VMOV(64to|QI2)PQIZrm(b?)")>;
+def: InstRW<[ICXWriteResGroup80], (instrs VMOVDI2PDIZrm)>;
+
+def ICXWriteResGroup81 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup81], (instrs POP16r, POP32r, POP64r)>;
+def: InstRW<[ICXWriteResGroup81], (instregex "POP(16|32|64)rmr")>;
+
+def ICXWriteResGroup82 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup82], (instregex "(V?)CVTSI642SSrr",
+ "VCVTSI642SSZrr",
+ "VCVTUSI642SSZrr")>;
+
+def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
+
+def ICXWriteResGroup86 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
+ "SHL(8|16|32|64)m(1|i)",
+ "SHR(8|16|32|64)m(1|i)")>;
+
+def ICXWriteResGroup87 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup87], (instregex "POP(16|32|64)rmm",
+ "PUSH(16|32|64)rmm")>;
+
+def ICXWriteResGroup88 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 6;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,5];
+}
+def: InstRW<[ICXWriteResGroup88], (instrs STD)>;
+
+def ICXWriteResGroup89 : SchedWriteRes<[ICXPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup89], (instregex "LD_F(32|64|80)m")>;
+def: InstRW<[ICXWriteResGroup89], (instrs VBROADCASTF128,
+ VBROADCASTI128,
+ VBROADCASTSDYrm,
+ VBROADCASTSSYrm,
+ VMOVDDUPYrm,
+ VMOVSHDUPYrm,
+ VMOVSLDUPYrm,
+ VPBROADCASTDYrm,
+ VPBROADCASTQYrm)>;
+
+def ICXWriteResGroup90 : SchedWriteRes<[ICXPort01,ICXPort5]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup90], (instrs VCVTDQ2PDYrr)>;
+
+def ICXWriteResGroup92 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup92], (instregex "VMOVSDZrm(b?)",
+ "VMOVSSZrm(b?)")>;
+
+def ICXWriteResGroup92a : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup92a], (instregex "(V?)PMOV(SX|ZX)BDrm",
+ "(V?)PMOV(SX|ZX)BQrm",
+ "(V?)PMOV(SX|ZX)BWrm",
+ "(V?)PMOV(SX|ZX)DQrm",
+ "(V?)PMOV(SX|ZX)WDrm",
+ "(V?)PMOV(SX|ZX)WQrm")>;
+
+def ICXWriteResGroup93 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr",
+ "VCVTPD2DQ(Y|Z256)rr",
+ "VCVTPD2PS(Y|Z256)rr",
+ "VCVTPD2UDQZ256rr",
+ "VCVTPS2PD(Y|Z256)rr",
+ "VCVTPS2QQZ256rr",
+ "VCVTPS2UQQZ256rr",
+ "VCVTQQ2PSZ256rr",
+ "VCVTTPD2DQ(Y|Z256)rr",
+ "VCVTTPD2UDQZ256rr",
+ "VCVTTPS2QQZ256rr",
+ "VCVTTPS2UQQZ256rr",
+ "VCVTUDQ2PDZ256rr",
+ "VCVTUQQ2PSZ256rr")>;
+
+def ICXWriteResGroup93z : SchedWriteRes<[ICXPort5,ICXPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup93z], (instrs VCVTDQ2PDZrr,
+ VCVTPD2DQZrr,
+ VCVTPD2PSZrr,
+ VCVTPD2UDQZrr,
+ VCVTPS2PDZrr,
+ VCVTPS2QQZrr,
+ VCVTPS2UQQZrr,
+ VCVTQQ2PSZrr,
+ VCVTTPD2DQZrr,
+ VCVTTPD2UDQZrr,
+ VCVTTPS2QQZrr,
+ VCVTTPS2UQQZrr,
+ VCVTUDQ2PDZrr,
+ VCVTUQQ2PSZrr)>;
+
+def ICXWriteResGroup95 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup95], (instrs VMOVNTDQAZ128rm,
+ VPBLENDDrmi)>;
+def: InstRW<[ICXWriteResGroup95, ReadAfterVecXLd],
+ (instregex "VBLENDMPDZ128rm(b?)",
+ "VBLENDMPSZ128rm(b?)",
+ "VBROADCASTI32X2Z128rm(b?)",
+ "VBROADCASTSSZ128rm(b?)",
+ "VINSERT(F|I)128rm",
+ "VMOVAPDZ128rm(b?)",
+ "VMOVAPSZ128rm(b?)",
+ "VMOVDDUPZ128rm(b?)",
+ "VMOVDQA32Z128rm(b?)",
+ "VMOVDQA64Z128rm(b?)",
+ "VMOVDQU16Z128rm(b?)",
+ "VMOVDQU32Z128rm(b?)",
+ "VMOVDQU64Z128rm(b?)",
+ "VMOVDQU8Z128rm(b?)",
+ "VMOVSHDUPZ128rm(b?)",
+ "VMOVSLDUPZ128rm(b?)",
+ "VMOVUPDZ128rm(b?)",
+ "VMOVUPSZ128rm(b?)",
+ "VPADD(B|D|Q|W)Z128rm(b?)",
+ "(V?)PADD(B|D|Q|W)rm",
+ "VPBLENDM(B|D|Q|W)Z128rm(b?)",
+ "VPBROADCASTDZ128rm(b?)",
+ "VPBROADCASTQZ128rm(b?)",
+ "VPSUB(B|D|Q|W)Z128rm(b?)",
+ "(V?)PSUB(B|D|Q|W)rm",
+ "VPTERNLOGDZ128rm(b?)i",
+ "VPTERNLOGQZ128rm(b?)i")>;
+
+def ICXWriteResGroup96 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup96], (instrs MMX_PACKSSDWirm,
+ MMX_PACKSSWBirm,
+ MMX_PACKUSWBirm)>;
+
+def ICXWriteResGroup97 : SchedWriteRes<[ICXPort5,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup97], (instregex "VPERMI2W128rr",
+ "VPERMI2W256rr",
+ "VPERMI2Wrr",
+ "VPERMT2W128rr",
+ "VPERMT2W256rr",
+ "VPERMT2Wrr")>;
+
+def ICXWriteResGroup99 : SchedWriteRes<[ICXPort23,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup99], (instrs LEAVE, LEAVE64,
+ SCASB, SCASL, SCASQ, SCASW)>;
+
+def ICXWriteResGroup100 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort015]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup100], (instregex "VCVTSS2USI64Zrr",
+ "(V?)CVTSS2SI64(Z?)rr",
+ "(V?)CVTTSS2SI64(Z?)rr",
+ "VCVTTSS2USI64Zrr")>;
+
+def ICXWriteResGroup101 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup101], (instrs FLDCW16m)>;
+
+def ICXWriteResGroup103 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup103], (instregex "KMOV(B|D|Q|W)km")>;
+
+def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>;
+
+def ICXWriteResGroup106 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
+ "VCOMPRESSPS(Z|Z128|Z256)mr(b?)",
+ "VPCOMPRESSD(Z|Z128|Z256)mr(b?)",
+ "VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>;
+
+def ICXWriteResGroup107 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup107], (instregex "ROL(8|16|32|64)m(1|i)",
+ "ROR(8|16|32|64)m(1|i)")>;
+
+def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+ let ResourceCycles = [2];
+}
+def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
+ ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
+
+def ICXWriteResGroup108 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
+
+def ICXWriteResGroup109 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
+def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>;
+
+def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1,2,2,2];
+}
+def: InstRW<[ICXWriteResGroup110], (instrs VPSCATTERDQZ128mr,
+ VPSCATTERQQZ128mr,
+ VSCATTERDPDZ128mr,
+ VSCATTERQPDZ128mr)>;
+
+def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 7;
+ let ResourceCycles = [1,3,1,2];
+}
+def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>;
+
+def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 11;
+ let ResourceCycles = [1,4,4,2];
+}
+def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
+ VPSCATTERQQZ256mr,
+ VSCATTERDPDZ256mr,
+ VSCATTERQPDZ256mr)>;
+
+def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 19;
+ let ResourceCycles = [1,8,8,2];
+}
+def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
+ VPSCATTERQQZmr,
+ VSCATTERDPDZmr,
+ VSCATTERQPDZmr)>;
+
+def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 7;
+ let NumMicroOps = 36;
+ let ResourceCycles = [1,16,1,16,2];
+}
+def: InstRW<[ICXWriteResGroup114], (instrs VSCATTERDPSZmr)>;
+
+def ICXWriteResGroup118 : SchedWriteRes<[ICXPort1,ICXPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup118], (instregex "PDEP(32|64)rm",
+ "PEXT(32|64)rm")>;
+
+def ICXWriteResGroup119 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup119], (instregex "FCOM(P?)(32|64)m",
+ "VPBROADCASTB(Z|Z256)rm(b?)",
+ "VPBROADCASTW(Z|Z256)rm(b?)")>;
+def: InstRW<[ICXWriteResGroup119], (instrs VPBROADCASTBYrm,
+ VPBROADCASTWYrm,
+ VPMOVSXBDYrm,
+ VPMOVSXBQYrm,
+ VPMOVSXWQYrm)>;
+
+def ICXWriteResGroup121 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup121], (instrs VMOVNTDQAZ256rm,
+ VPBLENDDYrmi)>;
+def: InstRW<[ICXWriteResGroup121, ReadAfterVecYLd],
+ (instregex "VBLENDMPD(Z|Z256)rm(b?)",
+ "VBLENDMPS(Z|Z256)rm(b?)",
+ "VBROADCASTF32X2Z256rm(b?)",
+ "VBROADCASTF32X2Zrm(b?)",
+ "VBROADCASTF32X4Z256rm(b?)",
+ "VBROADCASTF32X4rm(b?)",
+ "VBROADCASTF32X8rm(b?)",
+ "VBROADCASTF64X2Z128rm(b?)",
+ "VBROADCASTF64X2rm(b?)",
+ "VBROADCASTF64X4rm(b?)",
+ "VBROADCASTI32X2Z256rm(b?)",
+ "VBROADCASTI32X2Zrm(b?)",
+ "VBROADCASTI32X4Z256rm(b?)",
+ "VBROADCASTI32X4rm(b?)",
+ "VBROADCASTI32X8rm(b?)",
+ "VBROADCASTI64X2Z128rm(b?)",
+ "VBROADCASTI64X2rm(b?)",
+ "VBROADCASTI64X4rm(b?)",
+ "VBROADCASTSD(Z|Z256)rm(b?)",
+ "VBROADCASTSS(Z|Z256)rm(b?)",
+ "VINSERTF32x4(Z|Z256)rm(b?)",
+ "VINSERTF32x8Zrm(b?)",
+ "VINSERTF64x2(Z|Z256)rm(b?)",
+ "VINSERTF64x4Zrm(b?)",
+ "VINSERTI32x4(Z|Z256)rm(b?)",
+ "VINSERTI32x8Zrm(b?)",
+ "VINSERTI64x2(Z|Z256)rm(b?)",
+ "VINSERTI64x4Zrm(b?)",
+ "VMOVAPD(Z|Z256)rm(b?)",
+ "VMOVAPS(Z|Z256)rm(b?)",
+ "VMOVDDUP(Z|Z256)rm(b?)",
+ "VMOVDQA32(Z|Z256)rm(b?)",
+ "VMOVDQA64(Z|Z256)rm(b?)",
+ "VMOVDQU16(Z|Z256)rm(b?)",
+ "VMOVDQU32(Z|Z256)rm(b?)",
+ "VMOVDQU64(Z|Z256)rm(b?)",
+ "VMOVDQU8(Z|Z256)rm(b?)",
+ "VMOVSHDUP(Z|Z256)rm(b?)",
+ "VMOVSLDUP(Z|Z256)rm(b?)",
+ "VMOVUPD(Z|Z256)rm(b?)",
+ "VMOVUPS(Z|Z256)rm(b?)",
+ "VPADD(B|D|Q|W)Yrm",
+ "VPADD(B|D|Q|W)(Z|Z256)rm(b?)",
+ "VPBLENDM(B|D|Q|W)(Z|Z256)rm(b?)",
+ "VPBROADCASTD(Z|Z256)rm(b?)",
+ "VPBROADCASTQ(Z|Z256)rm(b?)",
+ "VPSUB(B|D|Q|W)Yrm",
+ "VPSUB(B|D|Q|W)(Z|Z256)rm(b?)",
+ "VPTERNLOGD(Z|Z256)rm(b?)i",
+ "VPTERNLOGQ(Z|Z256)rm(b?)i")>;
+
+def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,2,1];
+}
+def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
+
+def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
+ "RCR(8|16|32|64)m(1|i)")>;
+
+def ICXWriteResGroup128 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,1,3];
+}
+def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
+ "ROR(8|16|32|64)mCL",
+ "SAR(8|16|32|64)mCL",
+ "SHL(8|16|32|64)mCL",
+ "SHR(8|16|32|64)mCL")>;
+
+def ICXWriteResGroup130 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+ let ResourceCycles = [1,1,1,2,1];
+}
+def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>;
+
+def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,2,1,2,2];
+}
+def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
+ VPSCATTERQDZ256mr,
+ VSCATTERQPSZ128mr,
+ VSCATTERQPSZ256mr)>;
+
+def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 12;
+ let ResourceCycles = [1,4,1,4,2];
+}
+def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
+ VSCATTERDPSZ128mr)>;
+
+def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 20;
+ let ResourceCycles = [1,8,1,8,2];
+}
+def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
+ VSCATTERDPSZ256mr)>;
+
+def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+ let Latency = 8;
+ let NumMicroOps = 36;
+ let ResourceCycles = [1,16,1,16,2];
+}
+def: InstRW<[ICXWriteResGroup134], (instrs VPSCATTERDDZmr)>;
+
+def ICXWriteResGroup135 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup135], (instrs MMX_CVTPI2PSirm)>;
+
+def ICXWriteResGroup136 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup136], (instrs VPMOVSXBWYrm,
+ VPMOVSXDQYrm,
+ VPMOVSXWDYrm,
+ VPMOVZXWDYrm)>;
+def: InstRW<[ICXWriteResGroup136], (instregex "VALIGN(D|Q)Z128rm(b?)i",
+ "VFPCLASSSDZrm(b?)",
+ "VFPCLASSSSZrm(b?)",
+ "(V?)PCMPGTQrm",
+ "VPERMI2D128rm(b?)",
+ "VPERMI2PD128rm(b?)",
+ "VPERMI2PS128rm(b?)",
+ "VPERMI2Q128rm(b?)",
+ "VPERMT2D128rm(b?)",
+ "VPERMT2PD128rm(b?)",
+ "VPERMT2PS128rm(b?)",
+ "VPERMT2Q128rm(b?)",
+ "VPMAXSQZ128rm(b?)",
+ "VPMAXUQZ128rm(b?)",
+ "VPMINSQZ128rm(b?)",
+ "VPMINUQZ128rm(b?)",
+ "VPMOVSXBDZ128rm(b?)",
+ "VPMOVSXBQZ128rm(b?)",
+ "VPMOVSXBWZ128rm(b?)",
+ "VPMOVSXDQZ128rm(b?)",
+ "VPMOVSXWDZ128rm(b?)",
+ "VPMOVSXWQZ128rm(b?)",
+ "VPMOVZXBDZ128rm(b?)",
+ "VPMOVZXBQZ128rm(b?)",
+ "VPMOVZXBWZ128rm(b?)",
+ "VPMOVZXDQZ128rm(b?)",
+ "VPMOVZXWDZ128rm(b?)",
+ "VPMOVZXWQZ128rm(b?)")>;
+
+def ICXWriteResGroup136_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup136_2], (instregex "VCMP(PD|PS)Z128rm(b?)i",
+ "VCMP(SD|SS)Zrm",
+ "VFPCLASSPDZ128rm(b?)",
+ "VFPCLASSPSZ128rm(b?)",
+ "VPCMPBZ128rmi(b?)",
+ "VPCMPDZ128rmi(b?)",
+ "VPCMPEQ(B|D|Q|W)Z128rm(b?)",
+ "VPCMPGT(B|D|Q|W)Z128rm(b?)",
+ "VPCMPQZ128rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z128rmi(b?)",
+ "VPCMPWZ128rmi(b?)",
+ "VPTESTMBZ128rm(b?)",
+ "VPTESTMDZ128rm(b?)",
+ "VPTESTMQZ128rm(b?)",
+ "VPTESTMWZ128rm(b?)",
+ "VPTESTNMBZ128rm(b?)",
+ "VPTESTNMDZ128rm(b?)",
+ "VPTESTNMQZ128rm(b?)",
+ "VPTESTNMWZ128rm(b?)")>;
+
+def ICXWriteResGroup137 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm",
+ "(V?)CVTPS2PDrm")>;
+
+def ICXWriteResGroup143 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup143], (instregex "(V?)PHADDSWrm",
+ "(V?)PHSUBSWrm")>;
+
+def ICXWriteResGroup146 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[ICXWriteResGroup146], (instregex "LAR(16|32|64)rm",
+ "LSL(16|32|64)rm")>;
+
+def ICXWriteResGroup148 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup148], (instrs VPCMPGTQYrm)>;
+def: InstRW<[ICXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m",
+ "ILD_F(16|32|64)m",
+ "VALIGND(Z|Z256)rm(b?)i",
+ "VALIGNQ(Z|Z256)rm(b?)i",
+ "VPMAXSQ(Z|Z256)rm(b?)",
+ "VPMAXUQ(Z|Z256)rm(b?)",
+ "VPMINSQ(Z|Z256)rm(b?)",
+ "VPMINUQ(Z|Z256)rm(b?)")>;
+
+def ICXWriteResGroup148_2 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup148_2], (instregex "VCMPPD(Z|Z256)rm(b?)i",
+ "VCMPPS(Z|Z256)rm(b?)i",
+ "VFPCLASSPD(Z|Z256)rm(b?)",
+ "VFPCLASSPS(Z|Z256)rm(b?)",
+ "VPCMPB(Z|Z256)rmi(b?)",
+ "VPCMPD(Z|Z256)rmi(b?)",
+ "VPCMPEQB(Z|Z256)rm(b?)",
+ "VPCMPEQD(Z|Z256)rm(b?)",
+ "VPCMPEQQ(Z|Z256)rm(b?)",
+ "VPCMPEQW(Z|Z256)rm(b?)",
+ "VPCMPGTB(Z|Z256)rm(b?)",
+ "VPCMPGTD(Z|Z256)rm(b?)",
+ "VPCMPGTQ(Z|Z256)rm(b?)",
+ "VPCMPGTW(Z|Z256)rm(b?)",
+ "VPCMPQ(Z|Z256)rmi(b?)",
+ "VPCMPU(B|D|Q|W)Z256rmi(b?)",
+ "VPCMPU(B|D|Q|W)Zrmi(b?)",
+ "VPCMPW(Z|Z256)rmi(b?)",
+ "VPTESTM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTM(B|D|Q|W)Zrm(b?)",
+ "VPTESTNM(B|D|Q|W)Z256rm(b?)",
+ "VPTESTNM(B|D|Q|W)Zrm(b?)")>;
+
+def ICXWriteResGroup149 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup149], (instregex "VCVTDQ2PDZ128rm(b?)",
+ "VCVTDQ2PSZ128rm(b?)",
+ "(V?)CVTDQ2PSrm",
+ "VCVTPD2QQZ128rm(b?)",
+ "VCVTPD2UQQZ128rm(b?)",
+ "VCVTPH2PSZ128rm(b?)",
+ "VCVTPS2DQZ128rm(b?)",
+ "(V?)CVTPS2DQrm",
+ "VCVTPS2PDZ128rm(b?)",
+ "VCVTPS2QQZ128rm(b?)",
+ "VCVTPS2UDQZ128rm(b?)",
+ "VCVTPS2UQQZ128rm(b?)",
+ "VCVTQQ2PDZ128rm(b?)",
+ "VCVTQQ2PSZ128rm(b?)",
+ "VCVTSS2SDZrm",
+ "(V?)CVTSS2SDrm",
+ "VCVTTPD2QQZ128rm(b?)",
+ "VCVTTPD2UQQZ128rm(b?)",
+ "VCVTTPS2DQZ128rm(b?)",
+ "(V?)CVTTPS2DQrm",
+ "VCVTTPS2QQZ128rm(b?)",
+ "VCVTTPS2UDQZ128rm(b?)",
+ "VCVTTPS2UQQZ128rm(b?)",
+ "VCVTUDQ2PDZ128rm(b?)",
+ "VCVTUDQ2PSZ128rm(b?)",
+ "VCVTUQQ2PDZ128rm(b?)",
+ "VCVTUQQ2PSZ128rm(b?)")>;
+
+def ICXWriteResGroup151 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup151], (instregex "VEXPANDPDZ128rm(b?)",
+ "VEXPANDPSZ128rm(b?)",
+ "VPEXPANDDZ128rm(b?)",
+ "VPEXPANDQZ128rm(b?)")>;
+
+def ICXWriteResGroup153 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup153], (instregex "(V?)CVTSD2SSrm")>;
+
+def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
+ let Latency = 10;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm,
+ VPHSUBSWYrm)>;
+
+def ICXWriteResGroup157 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 10;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,1,1,1,3];
+}
+def: InstRW<[ICXWriteResGroup157], (instregex "XCHG(8|16|32|64)rm")>;
+
+def ICXWriteResGroup159 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
+ let Latency = 11;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1,3];
+}
+def : SchedAlias<WriteFDivX, ICXWriteResGroup159>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup160 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup160], (instregex "MUL_F(32|64)m")>;
+
+def ICXWriteResGroup161 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup161], (instrs VCVTDQ2PSYrm,
+ VCVTPS2PDYrm)>;
+def: InstRW<[ICXWriteResGroup161], (instregex "VCVTDQ2(PD|PS)(Z|Z256)rm(b?)",
+ "VCVTPH2PS(Z|Z256)rm(b?)",
+ "VCVTPS2PD(Z|Z256)rm(b?)",
+ "VCVTQQ2PD(Z|Z256)rm(b?)",
+ "VCVTQQ2PSZ256rm(b?)",
+ "VCVT(T?)PD2QQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PD2UQQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PS2DQYrm",
+ "VCVT(T?)PS2DQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PS2QQZ256rm(b?)",
+ "VCVT(T?)PS2UDQ(Z|Z256)rm(b?)",
+ "VCVT(T?)PS2UQQZ256rm(b?)",
+ "VCVTUDQ2(PD|PS)(Z|Z256)rm(b?)",
+ "VCVTUQQ2PD(Z|Z256)rm(b?)",
+ "VCVTUQQ2PSZ256rm(b?)")>;
+
+def ICXWriteResGroup162 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup162], (instregex "FICOM(P?)(16|32)m",
+ "VEXPANDPD(Z|Z256)rm(b?)",
+ "VEXPANDPS(Z|Z256)rm(b?)",
+ "VPEXPANDD(Z|Z256)rm(b?)",
+ "VPEXPANDQ(Z|Z256)rm(b?)")>;
+
+def ICXWriteResGroup163 : SchedWriteRes<[ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,2];
+}
+def: InstRW<[ICXWriteResGroup163], (instregex "VCVTSD2SSZrm")>;
+
+def ICXWriteResGroup164 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup164], (instregex "(V?)CVTDQ2PDrm")>;
+
+def ICXWriteResGroup166 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup166], (instrs CVTPD2PSrm,
+ CVTPD2DQrm,
+ CVTTPD2DQrm,
+ MMX_CVTPD2PIirm,
+ MMX_CVTTPD2PIirm)>;
+
+def ICXWriteResGroup167 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 11;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup167], (instregex "VPCONFLICTQZ128rm(b?)")>;
+
+def ICXWriteResGroup169 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
+ let Latency = 11;
+ let NumMicroOps = 7;
+ let ResourceCycles = [2,3,2];
+}
+def: InstRW<[ICXWriteResGroup169], (instregex "RCL(16|32|64)rCL",
+ "RCR(16|32|64)rCL")>;
+
+def ICXWriteResGroup170 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 11;
+ let NumMicroOps = 9;
+ let ResourceCycles = [1,5,1,2];
+}
+def: InstRW<[ICXWriteResGroup170], (instrs RCL8rCL)>;
+
+def ICXWriteResGroup171 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
+ let Latency = 11;
+ let NumMicroOps = 11;
+ let ResourceCycles = [2,9];
+}
+def: InstRW<[ICXWriteResGroup171], (instrs LOOPE, LOOPNE)>;
+
+def ICXWriteResGroup174 : SchedWriteRes<[ICXPort01]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[ICXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>;
+
+def ICXWriteResGroup174z : SchedWriteRes<[ICXPort05]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+ let ResourceCycles = [3];
+}
+def: InstRW<[ICXWriteResGroup174z], (instregex "VPMULLQZrr")>;
+
+def ICXWriteResGroup175 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup175], (instregex "VPERMWZ128rm(b?)")>;
+
+def ICXWriteResGroup176 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup176], (instregex "VCVT(T?)SD2USIZrm(b?)",
+ "VCVT(T?)SS2USI64Zrm(b?)")>;
+
+def ICXWriteResGroup177 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup177], (instregex "VCVT(T?)PS2QQZrm(b?)",
+ "VCVT(T?)PS2UQQZrm(b?)")>;
+
+def ICXWriteResGroup179 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,1,1,1];
+}
+def: InstRW<[ICXWriteResGroup179], (instregex "CVTTSS2SI64rm")>;
+
+def ICXWriteResGroup180 : SchedWriteRes<[ICXPort5,ICXPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ResourceCycles = [2,1];
+}
+def: InstRW<[ICXWriteResGroup180], (instregex "(ADD|SUB|SUBR)_FI(16|32)m",
+ "VPERMWZ256rm(b?)",
+ "VPERMWZrm(b?)")>;
+
+def ICXWriteResGroup181 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup181], (instrs VCVTDQ2PDYrm)>;
+
+def ICXWriteResGroup183 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup183], (instregex "VPERMI2W128rm(b?)",
+ "VPERMT2W128rm(b?)")>;
+
+def ICXWriteResGroup184 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
+ let Latency = 14;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1,3];
+}
+def : SchedAlias<WriteFDiv64, ICXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
+def : SchedAlias<WriteFDiv64X, ICXWriteResGroup184>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup184_1 : SchedWriteRes<[ICXPort0,ICXFPDivider]> {
+ let Latency = 14;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1,5];
+}
+def : SchedAlias<WriteFDiv64Y, ICXWriteResGroup184_1>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup187 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup187], (instregex "MUL_FI(16|32)m")>;
+
+def ICXWriteResGroup188 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup188], (instregex "VCVTPD2DQZrm(b?)",
+ "VCVTPD2PSZrm(b?)",
+ "VCVTPD2UDQZrm(b?)",
+ "VCVTQQ2PSZrm(b?)",
+ "VCVTTPD2DQZrm(b?)",
+ "VCVTTPD2UDQZrm(b?)",
+ "VCVTUQQ2PSZrm(b?)")>;
+
+def ICXWriteResGroup189 : SchedWriteRes<[ICXPort5,ICXPort23,ICXPort015]> {
+ let Latency = 14;
+ let NumMicroOps = 4;
+ let ResourceCycles = [2,1,1];
+}
+def: InstRW<[ICXWriteResGroup189], (instregex "VPERMI2W256rm(b?)",
+ "VPERMI2Wrm(b?)",
+ "VPERMT2W256rm(b?)",
+ "VPERMT2Wrm(b?)")>;
+
+def ICXWriteResGroup190 : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 14;
+ let NumMicroOps = 10;
+ let ResourceCycles = [2,4,1,3];
+}
+def: InstRW<[ICXWriteResGroup190], (instrs RCR8rCL)>;
+
+def ICXWriteResGroup191 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 15;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup191], (instregex "DIVR_(FPrST0|FST0r|FrST0)")>;
+
+def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 15;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,2,2,1,2];
+}
+def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
+
+def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 15;
+ let NumMicroOps = 10;
+ let ResourceCycles = [1,1,1,5,1,1];
+}
+def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
+
+def ICXWriteResGroup199 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 16;
+ let NumMicroOps = 14;
+ let ResourceCycles = [1,1,1,4,2,5];
+}
+def: InstRW<[ICXWriteResGroup199], (instrs CMPXCHG8B)>;
+
+def ICXWriteResGroup200 : SchedWriteRes<[ICXPort1, ICXPort05, ICXPort6]> {
+ let Latency = 12;
+ let NumMicroOps = 34;
+ let ResourceCycles = [1, 4, 5];
+}
+def: InstRW<[ICXWriteResGroup200], (instrs VZEROALL)>;
+
+def ICXWriteResGroup201 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 17;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,5];
+}
+def : SchedAlias<WriteFDivXLd, ICXWriteResGroup201>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup202 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort5,ICXPort6,ICXPort05,ICXPort0156]> {
+ let Latency = 17;
+ let NumMicroOps = 15;
+ let ResourceCycles = [2,1,2,4,2,4];
+}
+def: InstRW<[ICXWriteResGroup202], (instrs XCH_F)>;
+
+def ICXWriteResGroup205 : SchedWriteRes<[ICXPort23,ICXPort01]> {
+ let Latency = 21;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup205], (instregex "VPMULLQZ128rm(b?)")>;
+
+def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156]> {
+ let Latency = 18;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,1,5];
+}
+def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>;
+
+def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 18;
+ let NumMicroOps = 11;
+ let ResourceCycles = [2,1,1,4,1,2];
+}
+def: InstRW<[ICXWriteResGroup208], (instregex "RCR(8|16|32|64)mCL")>;
+
+def ICXWriteResGroup209 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 19;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,4];
+}
+def : SchedAlias<WriteFDiv64Ld, ICXWriteResGroup209>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup211 : SchedWriteRes<[ICXPort23,ICXPort01]> {
+ let Latency = 22;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup211], (instregex "VPMULLQZ256rm(b?)")>;
+
+def ICXWriteResGroup211_1 : SchedWriteRes<[ICXPort23,ICXPort05]> {
+ let Latency = 22;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup211_1], (instregex "VPMULLQZrm(b?)")>;
+
+def ICXWriteResGroup215 : SchedWriteRes<[ICXPort0]> {
+ let Latency = 20;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+def: InstRW<[ICXWriteResGroup215], (instregex "DIV_(FPrST0|FST0r|FrST0)")>;
+
+def ICXWriteResGroup216 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 20;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,4];
+}
+def : SchedAlias<WriteFDiv64XLd, ICXWriteResGroup216>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteGatherEVEX2 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 17;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX2], (instrs VGATHERQPSZ128rm, VPGATHERQDZ128rm,
+ VGATHERDPDZ128rm, VPGATHERDQZ128rm,
+ VGATHERQPDZ128rm, VPGATHERQQZ128rm)>;
+
+def ICXWriteGatherEVEX4 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 19;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX4], (instrs VGATHERQPSZ256rm, VPGATHERQDZ256rm,
+ VGATHERQPDZ256rm, VPGATHERQQZ256rm,
+ VGATHERDPSZ128rm, VPGATHERDDZ128rm,
+ VGATHERDPDZ256rm, VPGATHERDQZ256rm)>;
+
+def ICXWriteGatherEVEX8 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 21;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX8], (instrs VGATHERDPSZ256rm, VPGATHERDDZ256rm,
+ VGATHERDPDZrm, VPGATHERDQZrm,
+ VGATHERQPDZrm, VPGATHERQQZrm,
+ VGATHERQPSZrm, VPGATHERQDZrm)>;
+
+def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0156]> {
+ let Latency = 25;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,16,1,1];
+}
+def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
+
+def ICXWriteResGroup219 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 20;
+ let NumMicroOps = 8;
+ let ResourceCycles = [1,1,1,1,1,1,2];
+}
+def: InstRW<[ICXWriteResGroup219], (instrs INSB, INSL, INSW)>;
+
+def ICXWriteResGroup220 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort0156]> {
+ let Latency = 20;
+ let NumMicroOps = 10;
+ let ResourceCycles = [1,2,7];
+}
+def: InstRW<[ICXWriteResGroup220], (instrs MWAITrr)>;
+
+def ICXWriteResGroup222 : SchedWriteRes<[ICXPort0,ICXPort23,ICXFPDivider]> {
+ let Latency = 21;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1,8];
+}
+def : SchedAlias<WriteFDiv64YLd, ICXWriteResGroup222>; // TODO - convert to ZnWriteResFpuPair
+
+def ICXWriteResGroup223 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 22;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup223], (instregex "DIV_F(32|64)m")>;
+
+def ICXWriteResGroupVEX2 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
+ let Latency = 18;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,2,1,1];
+}
+def: InstRW<[ICXWriteResGroupVEX2], (instrs VGATHERDPDrm, VPGATHERDQrm,
+ VGATHERQPDrm, VPGATHERQQrm,
+ VGATHERQPSrm, VPGATHERQDrm)>;
+
+def ICXWriteResGroupVEX4 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
+ let Latency = 20;
+ let NumMicroOps = 5; // 2 uops peform multiple loads
+ let ResourceCycles = [1,4,1,1];
+}
+def: InstRW<[ICXWriteResGroupVEX4], (instrs VGATHERDPDYrm, VPGATHERDQYrm,
+ VGATHERDPSrm, VPGATHERDDrm,
+ VGATHERQPDYrm, VPGATHERQQYrm,
+ VGATHERQPSYrm, VPGATHERQDYrm)>;
+
+def ICXWriteResGroupVEX8 : SchedWriteRes<[ICXPort0, ICXPort23, ICXPort5, ICXPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 5; // 2 uops perform multiple loads
+ let ResourceCycles = [1,8,1,1];
+}
+def: InstRW<[ICXWriteResGroupVEX8], (instrs VGATHERDPSYrm, VPGATHERDDYrm)>;
+
+def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
+ let Latency = 22;
+ let NumMicroOps = 14;
+ let ResourceCycles = [5,5,4];
+}
+def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
+ "VPCONFLICTQZ256rr")>;
+
+def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 23;
+ let NumMicroOps = 19;
+ let ResourceCycles = [2,1,4,1,1,4,6];
+}
+def: InstRW<[ICXWriteResGroup228], (instrs CMPXCHG16B)>;
+
+def ICXWriteResGroup233 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 25;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup233], (instregex "DIV_FI(16|32)m")>;
+
+def ICXWriteResGroup239 : SchedWriteRes<[ICXPort0,ICXPort23]> {
+ let Latency = 27;
+ let NumMicroOps = 2;
+ let ResourceCycles = [1,1];
+}
+def: InstRW<[ICXWriteResGroup239], (instregex "DIVR_F(32|64)m")>;
+
+def ICXWriteResGroup242 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 29;
+ let NumMicroOps = 15;
+ let ResourceCycles = [5,5,1,4];
+}
+def: InstRW<[ICXWriteResGroup242], (instregex "VPCONFLICTQZ256rm(b?)")>;
+
+def ICXWriteResGroup243 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
+ let Latency = 30;
+ let NumMicroOps = 3;
+ let ResourceCycles = [1,1,1];
+}
+def: InstRW<[ICXWriteResGroup243], (instregex "DIVR_FI(16|32)m")>;
+
+def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,ICXPort0156]> {
+ let Latency = 35;
+ let NumMicroOps = 23;
+ let ResourceCycles = [1,5,3,4,10];
+}
+def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri",
+ "IN(8|16|32)rr")>;
+
+def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 35;
+ let NumMicroOps = 23;
+ let ResourceCycles = [1,5,2,1,4,10];
+}
+def: InstRW<[ICXWriteResGroup248], (instregex "OUT(8|16|32)ir",
+ "OUT(8|16|32)rr")>;
+
+def ICXWriteResGroup249 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
+ let Latency = 37;
+ let NumMicroOps = 21;
+ let ResourceCycles = [9,7,5];
+}
+def: InstRW<[ICXWriteResGroup249], (instregex "VPCONFLICTDZ256rr",
+ "VPCONFLICTQZrr")>;
+
+def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156]> {
+ let Latency = 37;
+ let NumMicroOps = 31;
+ let ResourceCycles = [1,8,1,21];
+}
+def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>;
+
+def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort15,ICXPort0156]> {
+ let Latency = 40;
+ let NumMicroOps = 18;
+ let ResourceCycles = [1,1,2,3,1,1,1,8];
+}
+def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>;
+
+def ICXWriteResGroup253 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 41;
+ let NumMicroOps = 39;
+ let ResourceCycles = [1,10,1,1,26];
+}
+def: InstRW<[ICXWriteResGroup253], (instrs XSAVE64)>;
+
+def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
+ let Latency = 42;
+ let NumMicroOps = 22;
+ let ResourceCycles = [2,20];
+}
+def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>;
+
+def ICXWriteResGroup255 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+ let Latency = 42;
+ let NumMicroOps = 40;
+ let ResourceCycles = [1,11,1,1,26];
+}
+def: InstRW<[ICXWriteResGroup255], (instrs XSAVE)>;
+def: InstRW<[ICXWriteResGroup255], (instregex "XSAVEC", "XSAVES", "XSAVEOPT")>;
+
+def ICXWriteResGroup256 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 44;
+ let NumMicroOps = 22;
+ let ResourceCycles = [9,7,1,5];
+}
+def: InstRW<[ICXWriteResGroup256], (instregex "VPCONFLICTDZ256rm(b?)",
+ "VPCONFLICTQZrm(b?)")>;
+
+def ICXWriteResGroup258 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort05,ICXPort06,ICXPort0156]> {
+ let Latency = 62;
+ let NumMicroOps = 64;
+ let ResourceCycles = [2,8,5,10,39];
+}
+def: InstRW<[ICXWriteResGroup258], (instrs FLDENVm)>;
+
+def ICXWriteResGroup259 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 63;
+ let NumMicroOps = 88;
+ let ResourceCycles = [4,4,31,1,2,1,45];
+}
+def: InstRW<[ICXWriteResGroup259], (instrs FXRSTOR64)>;
+
+def ICXWriteResGroup260 : SchedWriteRes<[ICXPort0,ICXPort6,ICXPort23,ICXPort05,ICXPort06,ICXPort15,ICXPort0156]> {
+ let Latency = 63;
+ let NumMicroOps = 90;
+ let ResourceCycles = [4,2,33,1,2,1,47];
+}
+def: InstRW<[ICXWriteResGroup260], (instrs FXRSTOR)>;
+
+def ICXWriteResGroup261 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
+ let Latency = 67;
+ let NumMicroOps = 35;
+ let ResourceCycles = [17,11,7];
+}
+def: InstRW<[ICXWriteResGroup261], (instregex "VPCONFLICTDZrr")>;
+
+def ICXWriteResGroup262 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23,ICXPort015]> {
+ let Latency = 74;
+ let NumMicroOps = 36;
+ let ResourceCycles = [17,11,1,7];
+}
+def: InstRW<[ICXWriteResGroup262], (instregex "VPCONFLICTDZrm(b?)")>;
+
+def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> {
+ let Latency = 75;
+ let NumMicroOps = 15;
+ let ResourceCycles = [6,3,6];
+}
+def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>;
+
+def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort237,ICXPort06,ICXPort0156]> {
+ let Latency = 106;
+ let NumMicroOps = 100;
+ let ResourceCycles = [9,1,11,16,1,11,21,30];
+}
+def: InstRW<[ICXWriteResGroup266], (instrs FSTENVm)>;
+
+def ICXWriteResGroup267 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
+ let Latency = 140;
+ let NumMicroOps = 4;
+ let ResourceCycles = [1,3];
+}
+def: InstRW<[ICXWriteResGroup267], (instrs PAUSE)>;
+
+def: InstRW<[WriteZero], (instrs CLC)>;
+
+
+// Instruction variants handled by the renamer. These might not need execution
+// ports in certain conditions.
+// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs",
+// section "Skylake Pipeline" > "Register allocation and renaming".
+// These can be investigated with llvm-exegesis, e.g.
+// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=-
+
+def ICXWriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+}
+
+def ICXWriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[ICXWriteZeroIdiom], (instrs SUB32rr, SUB64rr,
+ XOR32rr, XOR64rr)>;
+
+def ICXWriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+def : InstRW<[ICXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr,
+ XORPDrr, VXORPDrr,
+ VXORPSZ128rr,
+ VXORPDZ128rr)>;
+
+def ICXWriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[ICXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+ VXORPSZ256rr, VXORPDZ256rr)>;
+
+def ICXWriteFZeroIdiomZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicZ]>
+]>;
+def : InstRW<[ICXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>;
+
+def ICXWriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr,
+ VPXORDZ128rr, VPXORQZ128rr)>;
+
+def ICXWriteVZeroIdiomLogicY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomLogicY], (instrs VPXORYrr,
+ VPXORDZ256rr, VPXORQZ256rr)>;
+
+def ICXWriteVZeroIdiomLogicZ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicZ]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>;
+
+def ICXWriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr,
+ PCMPGTDrr, VPCMPGTDrr,
+ PCMPGTWrr, VPCMPGTWrr)>;
+
+def ICXWriteVZeroIdiomALUY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUY]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr,
+ VPCMPGTDYrr,
+ VPCMPGTWYrr)>;
+
+def ICXWritePSUB : SchedWriteRes<[ICXPort015]> {
+ let Latency = 1;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def ICXWriteVZeroIdiomPSUB : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [ICXWritePSUB]>
+]>;
+
+def : InstRW<[ICXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr,
+ PSUBDrr, VPSUBDrr, VPSUBDZ128rr,
+ PSUBQrr, VPSUBQrr, VPSUBQZ128rr,
+ PSUBWrr, VPSUBWrr, VPSUBWZ128rr,
+ VPSUBBYrr, VPSUBBZ256rr,
+ VPSUBDYrr, VPSUBDZ256rr,
+ VPSUBQYrr, VPSUBQZ256rr,
+ VPSUBWYrr, VPSUBWZ256rr,
+ VPSUBBZrr,
+ VPSUBDZrr,
+ VPSUBQZrr,
+ VPSUBWZrr)>;
+def ICXWritePCMPGTQ : SchedWriteRes<[ICXPort5]> {
+ let Latency = 3;
+ let NumMicroOps = 1;
+ let ResourceCycles = [1];
+}
+
+def ICXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [ICXWriteZeroLatency]>,
+ SchedVar<NoSchedPred, [ICXWritePCMPGTQ]>
+]>;
+def : InstRW<[ICXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr,
+ VPCMPGTQYrr)>;
+
+
+// CMOVs that use both Z and C flag require an extra uop.
+def ICXWriteCMOVA_CMOVBErr : SchedWriteRes<[ICXPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def ICXWriteCMOVA_CMOVBErm : SchedWriteRes<[ICXPort23,ICXPort06]> {
+ let Latency = 7;
+ let ResourceCycles = [1,2];
+ let NumMicroOps = 3;
+}
+
+def ICXCMOVA_CMOVBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArr_Or_CMOVBErr>, [ICXWriteCMOVA_CMOVBErr]>,
+ SchedVar<NoSchedPred, [WriteCMOV]>
+]>;
+
+def ICXCMOVA_CMOVBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsCMOVArm_Or_CMOVBErm>, [ICXWriteCMOVA_CMOVBErm]>,
+ SchedVar<NoSchedPred, [WriteCMOV.Folded]>
+]>;
+
+def : InstRW<[ICXCMOVA_CMOVBErr], (instrs CMOV16rr, CMOV32rr, CMOV64rr)>;
+def : InstRW<[ICXCMOVA_CMOVBErm], (instrs CMOV16rm, CMOV32rm, CMOV64rm)>;
+
+// SETCCs that use both Z and C flag require an extra uop.
+def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+
+def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06]> {
+ let Latency = 3;
+ let ResourceCycles = [1,1,2];
+ let NumMicroOps = 4;
+}
+
+def ICXSETA_SETBErr : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAr_Or_SETBEr>, [ICXWriteSETA_SETBEr]>,
+ SchedVar<NoSchedPred, [WriteSETCC]>
+]>;
+
+def ICXSETA_SETBErm : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<IsSETAm_Or_SETBEm>, [ICXWriteSETA_SETBEm]>,
+ SchedVar<NoSchedPred, [WriteSETCCStore]>
+]>;
+
+def : InstRW<[ICXSETA_SETBErr], (instrs SETCCr)>;
+def : InstRW<[ICXSETA_SETBErm], (instrs SETCCm)>;
+
+} // SchedModel
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 2f7157f43268..c8d7b0f72c1c 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -124,12 +124,17 @@ defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>;
+defm : SBWriteResPair<WriteMULX32, [SBPort1,SBPort05,SBPort015], 3, [1,1,1], 3>;
defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>;
+defm : SBWriteResPair<WriteMULX64, [SBPort1,SBPort0], 3, [1,1], 2>;
defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>;
defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SBWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(SBWriteIMulH.Latency, SandyBridgeModel.LoadLatency);
+}
defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>;
defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>;
@@ -601,7 +606,7 @@ def SBWriteResGroup2 : SchedWriteRes<[SBPort5]> {
def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP,
LD_Frr, ST_Frr, ST_FPrr)>;
def: InstRW<[SBWriteResGroup2], (instrs LOOP, LOOPE, LOOPNE)>; // FIXME: This seems wrong compared to other Intel CPUs.
-def: InstRW<[SBWriteResGroup2], (instrs RETQ)>;
+def: InstRW<[SBWriteResGroup2], (instrs RET64)>;
def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> {
let Latency = 1;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 8486bdda0349..7d3229c3b023 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -122,12 +122,17 @@ defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>;
defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>;
+defm : SKLWriteResPair<WriteMULX32, [SKLPort1,SKLPort06,SKLPort0156], 3, [1,1,1], 3>;
defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>;
+defm : SKLWriteResPair<WriteMULX64, [SKLPort1,SKLPort5], 3, [1,1], 2>;
defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>;
defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SKLWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(SKLWriteIMulH.Latency, SkylakeClientModel.LoadLatency);
+}
defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>;
@@ -1170,7 +1175,7 @@ def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKLWriteResGroup98], (instrs LRETQ, RETQ)>;
+def: InstRW<[SKLWriteResGroup98], (instrs LRET64, RET64)>;
def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index ba80d47c4eb6..1d8417aef41e 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -123,12 +123,17 @@ defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1
defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>;
defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>;
+defm : SKXWriteResPair<WriteMULX32, [SKXPort1,SKXPort06,SKXPort0156], 3, [1,1,1], 3>;
defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>;
+defm : SKXWriteResPair<WriteMULX64, [SKXPort1,SKXPort5], 3, [1,1], 2>;
defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>;
defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>;
-def : WriteRes<WriteIMulH, []> { let Latency = 3; }
+def SKXWriteIMulH : WriteRes<WriteIMulH, []> { let Latency = 4; }
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = !add(SKXWriteIMulH.Latency, SkylakeServerModel.LoadLatency);
+}
defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>;
@@ -1431,7 +1436,7 @@ def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> {
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
-def: InstRW<[SKXWriteResGroup104], (instrs LRETQ, RETQ)>;
+def: InstRW<[SKXWriteResGroup104], (instrs LRET64, RET64)>;
def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> {
let Latency = 7;
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 09148fc19e57..1cb48175260a 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -87,8 +87,10 @@ class X86SchedWriteWidths<X86FoldableSchedWrite sScl,
}
// Multiclass that wraps X86SchedWriteWidths for each fp vector type.
-class X86SchedWriteSizes<X86SchedWriteWidths sPS,
+class X86SchedWriteSizes<X86SchedWriteWidths sPH,
+ X86SchedWriteWidths sPS,
X86SchedWriteWidths sPD> {
+ X86SchedWriteWidths PH = sPH;
X86SchedWriteWidths PS = sPS;
X86SchedWriteWidths PD = sPD;
}
@@ -146,7 +148,10 @@ defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by reg
defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication.
defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate.
defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register.
-def WriteIMulH : SchedWrite; // Integer multiplication, high part.
+defm WriteMULX32 : X86SchedWritePair; // Integer 32-bit Multiplication without affecting flags.
+defm WriteMULX64 : X86SchedWritePair; // Integer 64-bit Multiplication without affecting flags.
+def WriteIMulH : SchedWrite; // Integer multiplication, high part (only used by the RR variant of MULX).
+def WriteIMulHLd : SchedWrite; // Integer multiplication, high part (only used by the RM variant of MULX).
def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap.
def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap.
@@ -681,20 +686,22 @@ def SchedWriteVarBlend
WriteVarBlendY, WriteVarBlendZ>;
// Vector size wrappers.
+// FIXME: Currently PH uses the same schedule method as PS.
+// We may refine them later.
def SchedWriteFAddSizes
- : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd64>;
+ : X86SchedWriteSizes<SchedWriteFAdd, SchedWriteFAdd, SchedWriteFAdd64>;
def SchedWriteFCmpSizes
- : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp64>;
+ : X86SchedWriteSizes<SchedWriteFCmp, SchedWriteFCmp, SchedWriteFCmp64>;
def SchedWriteFMulSizes
- : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul64>;
+ : X86SchedWriteSizes<SchedWriteFMul, SchedWriteFMul, SchedWriteFMul64>;
def SchedWriteFDivSizes
- : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv64>;
+ : X86SchedWriteSizes<SchedWriteFDiv, SchedWriteFDiv, SchedWriteFDiv64>;
def SchedWriteFSqrtSizes
- : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt64>;
+ : X86SchedWriteSizes<SchedWriteFSqrt, SchedWriteFSqrt, SchedWriteFSqrt64>;
def SchedWriteFLogicSizes
- : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic>;
+ : X86SchedWriteSizes<SchedWriteFLogic, SchedWriteFLogic, SchedWriteFLogic>;
def SchedWriteFShuffleSizes
- : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle>;
+ : X86SchedWriteSizes<SchedWriteFShuffle, SchedWriteFShuffle, SchedWriteFShuffle>;
//===----------------------------------------------------------------------===//
// Generic Processor Scheduler Models.
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index d00c2e3718d3..6fd98280f560 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -56,17 +56,21 @@ multiclass AtomWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> RMPorts,
int RRLat = 1, int RMLat = 1,
list<int> RRRes = [1],
- list<int> RMRes = [1]> {
+ list<int> RMRes = [1],
+ int RRUOps = 1,
+ int RMUOps = 1> {
// Register variant.
def : WriteRes<SchedRW, RRPorts> {
let Latency = RRLat;
let ResourceCycles = RRRes;
+ let NumMicroOps = RRUOps;
}
// Memory variant.
def : WriteRes<SchedRW.Folded, RMPorts> {
let Latency = RMLat;
let ResourceCycles = RMRes;
+ let NumMicroOps = RMUOps;
}
}
@@ -80,17 +84,20 @@ def : WriteRes<WriteRMW, [AtomPort0]>;
defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>;
defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>;
-defm : AtomWriteResPair<WriteIMul8, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>;
-defm : AtomWriteResPair<WriteIMul16, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
-defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
-defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
-defm : AtomWriteResPair<WriteIMul32, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>;
+defm : AtomWriteResPair<WriteIMul8, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 7, [7,7], [7,7], 3, 3>;
+defm : AtomWriteResPair<WriteIMul16, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [7,7], [8,8], 4, 5>;
+defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 2, 3>;
+defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 2, 3>;
+defm : AtomWriteResPair<WriteIMul32, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [6,6], [7,7], 3, 4>;
defm : AtomWriteResPair<WriteIMul32Imm, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : AtomWriteResPair<WriteIMul32Reg, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
-defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
-defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>;
-defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>;
+defm : AtomWriteResPair<WriteIMul64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 12, 12, [12,12], [12,12], 8, 8>;
+defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 14, 14, [14,14], [14,14], 7, 7>;
+defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 12, 12, [12,12], [12,12], 6, 6>;
defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>;
defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>;
@@ -98,14 +105,14 @@ defm : X86WriteRes<WriteBSWAP64, [AtomPort0], 1, [1], 1>;
defm : AtomWriteResPair<WriteCMPXCHG, [AtomPort01], [AtomPort01], 15, 15, [15]>;
defm : X86WriteRes<WriteCMPXCHGRMW, [AtomPort01, AtomPort0], 1, [1, 1], 1>;
-defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>;
-defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
-defm : AtomWriteResPair<WriteDiv32, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>;
-defm : AtomWriteResPair<WriteDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
-defm : AtomWriteResPair<WriteIDiv8, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteIDiv16, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteIDiv32, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteIDiv64, [AtomPort01], [AtomPort01],130,130,[130],[130]>;
+defm : AtomWriteResPair<WriteDiv8, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 50, 68, [50,50], [68,68], 9, 9>;
+defm : AtomWriteResPair<WriteDiv16, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 50, 50, [50,50], [50,50], 12, 12>;
+defm : AtomWriteResPair<WriteDiv32, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 50, 50, [50,50], [50,50], 12, 12>;
+defm : AtomWriteResPair<WriteDiv64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],130,130,[130,130],[130,130], 38, 38>;
+defm : AtomWriteResPair<WriteIDiv8, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 26, 26>;
+defm : AtomWriteResPair<WriteIDiv16, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 29, 29>;
+defm : AtomWriteResPair<WriteIDiv32, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 29, 29>;
+defm : AtomWriteResPair<WriteIDiv64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],130,130,[130,130],[130,130], 60, 60>;
defm : X86WriteResPairUnsupported<WriteCRC32>;
@@ -132,8 +139,8 @@ defm : X86WriteRes<WriteBitTestSet, [AtomPort1], 1, [1], 1>;
def : WriteRes<WriteLEA, [AtomPort1]>;
// Bit counts.
-defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
-defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>;
+defm : AtomWriteResPair<WriteBSF, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 16, 16, [16,16], [16,16], 10, 10>;
+defm : AtomWriteResPair<WriteBSR, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 16, 16, [16,16], [16,16], 10, 10>;
defm : X86WriteResPairUnsupported<WritePOPCNT>;
defm : X86WriteResPairUnsupported<WriteLZCNT>;
defm : X86WriteResPairUnsupported<WriteTZCNT>;
@@ -230,52 +237,52 @@ defm : AtomWriteResPair<WriteFAddX, [AtomPort1], [AtomPort0,AtomPort1],
defm : X86WriteResPairUnsupported<WriteFAddY>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : AtomWriteResPair<WriteFAdd64, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
-defm : AtomWriteResPair<WriteFAdd64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteFAdd64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6], 3, 4>;
defm : X86WriteResPairUnsupported<WriteFAdd64Y>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : AtomWriteResPair<WriteFCmp, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
-defm : AtomWriteResPair<WriteFCmpX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteFCmpX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6], 3, 4>;
defm : X86WriteResPairUnsupported<WriteFCmpY>;
defm : X86WriteResPairUnsupported<WriteFCmpZ>;
defm : AtomWriteResPair<WriteFCmp64, [AtomPort1], [AtomPort0,AtomPort1], 5, 5, [1], [1,1]>;
-defm : AtomWriteResPair<WriteFCmp64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6]>;
+defm : AtomWriteResPair<WriteFCmp64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 6, 7, [5,5], [6,6], 3, 4>;
defm : X86WriteResPairUnsupported<WriteFCmp64Y>;
defm : X86WriteResPairUnsupported<WriteFCmp64Z>;
defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
-defm : AtomWriteResPair<WriteFComX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
+defm : AtomWriteResPair<WriteFComX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9],[10,10], 4, 5>;
defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [2], [2]>;
defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
defm : X86WriteResPairUnsupported<WriteFMulY>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [2], [2]>;
-defm : AtomWriteResPair<WriteFMul64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10]>;
+defm : AtomWriteResPair<WriteFMul64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9],[10,10], 6, 7>;
defm : X86WriteResPairUnsupported<WriteFMul64Y>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
-defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : AtomWriteResPair<WriteFRcpX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>;
-defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>;
+defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 9, 10, [9,9], [10,10], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
-defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFDiv, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 34, 34, [34,34], [34,34], 3, 4>;
+defm : AtomWriteResPair<WriteFDivX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 70, 70, [70,70], [70,70], 6, 7>;
defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
-defm : AtomWriteResPair<WriteFDiv64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteFDiv64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFDiv64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 3, 4>;
+defm : AtomWriteResPair<WriteFDiv64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],125,125,[125,125],[125,125], 6, 7>;
defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : AtomWriteResPair<WriteFSqrt, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>;
-defm : AtomWriteResPair<WriteFSqrtX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>;
+defm : AtomWriteResPair<WriteFSqrt, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 34, 34, [34,34], [34,34], 3, 4>;
+defm : AtomWriteResPair<WriteFSqrtX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 70, 70, [70,70], [70,70], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : AtomWriteResPair<WriteFSqrt64, [AtomPort01], [AtomPort01], 62, 62, [62], [62]>;
-defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort01], [AtomPort01],125,125,[125],[125]>;
+defm : AtomWriteResPair<WriteFSqrt64, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 62, 62, [62,62], [62,62], 3, 4>;
+defm : AtomWriteResPair<WriteFSqrt64X, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1],125,125,[125,125],[125,125], 5, 6>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
-defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71, [71], [71]>;
+defm : AtomWriteResPair<WriteFSqrt80, [AtomPort0], [AtomPort0], 71, 71, [71], [71]>;
defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>;
defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>;
defm : X86WriteResPairUnsupported<WriteFRndY>;
@@ -389,8 +396,8 @@ defm : X86WriteResPairUnsupported<WriteVecLogicZ>;
defm : X86WriteResPairUnsupported<WriteVecTest>;
defm : X86WriteResPairUnsupported<WriteVecTestY>;
defm : X86WriteResPairUnsupported<WriteVecTestZ>;
-defm : AtomWriteResPair<WriteVecShift, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2]>;
-defm : AtomWriteResPair<WriteVecShiftX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2]>;
+defm : AtomWriteResPair<WriteVecShift, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2], 2, 3>;
+defm : AtomWriteResPair<WriteVecShiftX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 2, 3, [1,1], [2,2], 2, 3>;
defm : X86WriteResPairUnsupported<WriteVecShiftY>;
defm : X86WriteResPairUnsupported<WriteVecShiftZ>;
defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort0], [AtomPort0], 1, 1>;
@@ -417,7 +424,7 @@ defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>;
defm : X86WriteResPairUnsupported<WriteShuffleY>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>;
-defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4]>;
+defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 4, 5, [3,3], [4,4], 4, 5>;
defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : X86WriteResPairUnsupported<WriteBlend>;
@@ -471,11 +478,11 @@ defm : X86WriteResPairUnsupported<WriteAESDecEnc>;
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : AtomWriteResPair<WriteFHAdd, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
-defm : AtomWriteResPair<WriteFHAddY, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>;
-defm : AtomWriteResPair<WritePHAdd, [AtomPort01], [AtomPort01], 3, 4, [3], [4]>;
-defm : AtomWriteResPair<WritePHAddX, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
-defm : AtomWriteResPair<WritePHAddY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>;
+defm : AtomWriteResPair<WriteFHAdd, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 8, 9, [8,8], [9,9], 5, 6>;
+defm : X86WriteResPairUnsupported<WriteFHAddY>;
+defm : AtomWriteResPair<WritePHAdd, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 3, 4, [3,3], [4,4], 3, 4>;
+defm : AtomWriteResPair<WritePHAddX, [AtomPort0,AtomPort1], [AtomPort0,AtomPort1], 7, 8, [7,7], [8,8], 3, 4>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
////////////////////////////////////////////////////////////////////////////////
// Carry-less multiplication instructions.
@@ -487,8 +494,8 @@ defm : X86WriteResPairUnsupported<WriteCLMul>;
// Load/store MXCSR.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLDMXCSR, [AtomPort01]> { let Latency = 5; let ResourceCycles = [5]; }
-def : WriteRes<WriteSTMXCSR, [AtomPort01]> { let Latency = 15; let ResourceCycles = [15]; }
+defm : X86WriteRes<WriteLDMXCSR, [AtomPort0,AtomPort1], 5, [5,5], 4>;
+defm : X86WriteRes<WriteSTMXCSR, [AtomPort0,AtomPort1], 15, [15,15], 4>;
////////////////////////////////////////////////////////////////////////////////
// Special Cases.
@@ -533,7 +540,7 @@ def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
PUSH16rmr, PUSH32rmr, PUSH64rmr,
PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
XCH_F)>;
-def : InstRW<[AtomWrite0_1_1], (instregex "RETI(L|Q|W)$",
+def : InstRW<[AtomWrite0_1_1], (instregex "RETI(16|32|64)$",
"IRET(16|32|64)?")>;
def AtomWrite0_1_5 : SchedWriteRes<[AtomPort0, AtomPort1]> {
@@ -551,10 +558,7 @@ def AtomWrite01_1 : SchedWriteRes<[AtomPort01]> {
def : InstRW<[AtomWrite01_1], (instrs FDECSTP, FFREE, FFREEP, FINCSTP, WAIT,
LFENCE,
STOSB, STOSL, STOSQ, STOSW,
- MOVSSrr, MOVSSrr_REV,
- PSLLDQri, PSRLDQri)>;
-def : InstRW<[AtomWrite01_1], (instregex "MMX_PACK(SSDW|SSWB|USWB)irr",
- "MMX_PUNPCKH(BW|DQ|WD)irr")>;
+ MOVSSrr, MOVSSrr_REV)>;
def AtomWrite01_2 : SchedWriteRes<[AtomPort01]> {
let Latency = 2;
@@ -644,7 +648,6 @@ def : InstRW<[AtomWrite01_9], (instrs POPA16, POPA32,
SHLD64rri8, SHRD64rri8,
CMPXCHG8rr)>;
def : InstRW<[AtomWrite01_9], (instregex "(U)?COM_FI", "TST_F",
- "(U)?COMIS(D|S)rr",
"CVT(T)?SS2SI64rr(_Int)?")>;
def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
@@ -652,8 +655,7 @@ def AtomWrite01_10 : SchedWriteRes<[AtomPort01]> {
let ResourceCycles = [10];
}
def : SchedAlias<WriteFLDC, AtomWrite01_10>;
-def : InstRW<[AtomWrite01_10], (instregex "(U)?COMIS(D|S)rm",
- "CVT(T)?SS2SI64rm(_Int)?")>;
+def : InstRW<[AtomWrite01_10], (instregex "CVT(T)?SS2SI64rm(_Int)?")>;
def AtomWrite01_11 : SchedWriteRes<[AtomPort01]> {
let Latency = 11;
@@ -817,8 +819,8 @@ def AtomWrite01_79 : SchedWriteRes<[AtomPort01]> {
let Latency = 79;
let ResourceCycles = [79];
}
-def : InstRW<[AtomWrite01_79], (instregex "RET(L|Q|W)?$",
- "LRETI?(L|Q|W)")>;
+def : InstRW<[AtomWrite01_79], (instregex "RET(16|32|64)?$",
+ "LRETI?(16|32|64)")>;
def AtomWrite01_92 : SchedWriteRes<[AtomPort01]> {
let Latency = 92;
diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index 99d4011dae77..4c16b5b52b1d 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -435,7 +435,12 @@ defm : PdWriteResExPair<WriteIMul32Reg, [PdEX1, PdMul], 4, [1, 2]>;
defm : PdWriteResExPair<WriteIMul64, [PdEX1, PdMul], 6, [1, 6]>;
defm : PdWriteResExPair<WriteIMul64Imm, [PdEX1, PdMul], 6, [1, 4],1, 1>;
defm : PdWriteResExPair<WriteIMul64Reg, [PdEX1, PdMul], 6, [1, 4]>;
-defm : X86WriteResUnsupported<WriteIMulH>; // BMI2 MULX
+
+// BMI2 MULX
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : PdWriteResExPair<WriteDiv8, [PdEX1, PdDiv], 12, [1, 12]>;
defm : PdWriteResExPair<WriteDiv16, [PdEX1, PdDiv], 15, [1, 15], 2>;
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index cdd03830bcad..68ebaa244acf 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -209,7 +209,10 @@ defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 1>;
defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>;
defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 1>;
defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 1>;
-defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>;
defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>;
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 123844a73a59..5af9835f75a7 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -62,7 +62,7 @@ def : ReadAdvance<ReadInt2Fpu, 0>;
multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
list<ProcResourceKind> ExePorts,
int Lat, list<int> Res = [1], int UOps = 1,
- int LoadLat = 3> {
+ int LoadUOps = 0, int LoadLat = 3> {
// Register variant is using a single cycle on ExePort.
def : WriteRes<SchedRW, ExePorts> {
let Latency = Lat;
@@ -75,13 +75,13 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
def : WriteRes<SchedRW.Folded, !listconcat([SLM_MEC_RSV], ExePorts)> {
let Latency = !add(Lat, LoadLat);
let ResourceCycles = !listconcat([1], Res);
- let NumMicroOps = UOps;
+ let NumMicroOps = !add(UOps, LoadUOps);
}
}
-// A folded store needs a cycle on MEC_RSV for the store data, but it does not
-// need an extra port cycle to recompute the address.
-def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
+// A folded store needs a cycle on MEC_RSV for the store data (using the same uop),
+// but it does not need an extra port cycle to recompute the address.
+def : WriteRes<WriteRMW, [SLM_MEC_RSV]> { let NumMicroOps = 0; }
def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
@@ -101,17 +101,20 @@ def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>;
-defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 3>;
+defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 5, [5], 3>;
+defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 5, [5], 4, 1>;
+defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 4, [4], 2, 1>;
+defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 4, [4], 2, 1>;
+defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 5, [5], 3, 1>;
defm : SLMWriteResPair<WriteIMul32Imm, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>;
-defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>;
-def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>;
+defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 7, [7], 3>;
+defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 5, [2]>;
+defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 5, [2]>;
+defm : X86WriteResUnsupported<WriteIMulH>;
+defm : X86WriteResUnsupported<WriteIMulHLd>;
+defm : X86WriteResPairUnsupported<WriteMULX32>;
+defm : X86WriteResPairUnsupported<WriteMULX64>;
defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>;
@@ -140,12 +143,12 @@ def : WriteRes<WriteSETCCStore, [SLM_IEC_RSV01, SLM_MEC_RSV]> {
let ResourceCycles = [2,1];
}
defm : X86WriteRes<WriteLAHFSAHF, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : X86WriteRes<WriteBitTest, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : X86WriteRes<WriteBitTestImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
-defm : X86WriteRes<WriteBitTestRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 4, [1,1], 1>;
-defm : X86WriteRes<WriteBitTestSet, [SLM_IEC_RSV01], 1, [1], 1>;
-defm : X86WriteRes<WriteBitTestSetImmLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
-defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1], 1>;
+defm : X86WriteRes<WriteBitTest, [SLM_IEC_RSV0, SLM_IEC_RSV1], 1, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestImmLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 4, [1,1,1], 1>;
+defm : X86WriteRes<WriteBitTestRegLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 4, [1,1,1], 7>;
+defm : X86WriteRes<WriteBitTestSet, [SLM_IEC_RSV0, SLM_IEC_RSV1], 1, [1,1], 1>;
+defm : X86WriteRes<WriteBitTestSetImmLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 3, [1,1,1], 1>;
+defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV0, SLM_IEC_RSV1, SLM_MEC_RSV], 3, [1,1,1], 7>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -153,8 +156,8 @@ defm : X86WriteRes<WriteBitTestSetRegLd, [SLM_IEC_RSV01, SLM_MEC_RSV], 3, [1,1],
def : WriteRes<WriteLEA, [SLM_IEC_RSV1]>;
// Bit counts.
-defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV01], 10, [20], 10>;
-defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV01], 10, [20], 10>;
+defm : SLMWriteResPair<WriteBSF, [SLM_IEC_RSV0, SLM_IEC_RSV1], 10, [10,10], 10>;
+defm : SLMWriteResPair<WriteBSR, [SLM_IEC_RSV0, SLM_IEC_RSV1], 10, [10,10], 10>;
defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>;
defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>;
@@ -164,14 +167,14 @@ defm : X86WriteResPairUnsupported<WriteBEXTR>;
defm : X86WriteResPairUnsupported<WriteBLS>;
defm : X86WriteResPairUnsupported<WriteBZHI>;
-defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
-defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>;
+defm : SLMWriteResPair<WriteDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv8, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv16, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv32, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
+defm : SLMWriteResPair<WriteIDiv64, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 0, 4>;
// Scalar and vector floating point.
defm : X86WriteRes<WriteFLD0, [SLM_FPC_RSV01], 1, [1], 1>;
@@ -230,33 +233,33 @@ defm : X86WriteResPairUnsupported<WriteFMAX>;
defm : X86WriteResPairUnsupported<WriteFMAY>;
defm : X86WriteResPairUnsupported<WriteFMAZ>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
-defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
-defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
+defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDivY>;
defm : X86WriteResPairUnsupported<WriteFDivZ>;
defm : SLMWriteResPair<WriteFDiv64, [SLM_FPC_RSV0, SLMFPDivider], 34, [1,32]>;
-defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
-defm : SLMWriteResPair<WriteFDiv64Y, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69]>;
+defm : SLMWriteResPair<WriteFDiv64X, [SLM_FPC_RSV0, SLMFPDivider], 69, [1,69], 6, 1>;
+defm : X86WriteResPairUnsupported<WriteFDiv64Y>;
defm : X86WriteResPairUnsupported<WriteFDiv64Z>;
-defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRcpY>;
defm : X86WriteResPairUnsupported<WriteFRcpZ>;
-defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>;
-defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>;
+defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 9, [8], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFRsqrtY>;
defm : X86WriteResPairUnsupported<WriteFRsqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0, SLMFPDivider], 20, [1,20]>;
+defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0, SLMFPDivider], 41, [1,40], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrtY>;
defm : X86WriteResPairUnsupported<WriteFSqrtZ>;
-defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0,SLMFPDivider], 35, [1,35], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
-defm : SLMWriteResPair<WriteFSqrt64Y, [SLM_FPC_RSV0,SLMFPDivider], 71, [1,70], 1, 3>;
+defm : SLMWriteResPair<WriteFSqrt64, [SLM_FPC_RSV0, SLMFPDivider], 35, [1,35]>;
+defm : SLMWriteResPair<WriteFSqrt64X, [SLM_FPC_RSV0, SLMFPDivider], 71, [1,70], 5, 1>;
+defm : X86WriteResPairUnsupported<WriteFSqrt64Y>;
defm : X86WriteResPairUnsupported<WriteFSqrt64Z>;
defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>;
-defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 12, [8], 5, 1>;
+defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 15, [12], 9, 1>;
+defm : X86WriteResPairUnsupported<WriteDPPSY>;
defm : X86WriteResPairUnsupported<WriteDPPSZ>;
defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>;
@@ -277,7 +280,7 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>;
defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteFBlendY>;
defm : X86WriteResPairUnsupported<WriteFBlendZ>;
-defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 3>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 2, 1>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
@@ -369,8 +372,8 @@ defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>;
defm : X86WriteResPairUnsupported<WriteVecALUZ>;
defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2], 2>;
-defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2], 2>;
+defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 5, [2]>;
+defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 5, [2]>;
defm : X86WriteResPairUnsupported<WriteVecIMulZ>;
// FIXME: The below is closer to correct, but caused some perf regressions.
//defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>;
@@ -382,21 +385,21 @@ defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteShuffleZ>;
defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>;
-defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 5, [5], 4>;
-defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 5, [5], 4>;
+defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 5, [5], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteVarShuffleY>;
defm : X86WriteResPairUnsupported<WriteVarShuffleZ>;
defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>;
defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteBlendZ>;
-defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 4, [4], 2, 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
-defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>;
-defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>;
+defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7, [5], 3, 1>;
+defm : X86WriteResPairUnsupported<WriteMPSADY>;
defm : X86WriteResPairUnsupported<WriteMPSADZ>;
defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>;
-defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>;
+defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 5, [2]>;
+defm : X86WriteResPairUnsupported<WritePSADBWY>;
defm : X86WriteResPairUnsupported<WritePSADBWZ>;
defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>;
defm : X86WriteResPairUnsupported<WriteShuffle256>;
@@ -417,26 +420,26 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 6, [6], 4>;
-defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 6, [6], 4>;
+defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV1], 6, [6], 4, 1>;
+defm : X86WriteResPairUnsupported<WriteFHAddY>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
-defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
-defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>;
+defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 6, [6], 3, 1>;
+defm : X86WriteResPairUnsupported<WritePHAddY>;
defm : X86WriteResPairUnsupported<WritePHAddZ>;
// String instructions.
// Packed Compare Implicit Length Strings, Return Mask
-defm : SLMWriteResPair<WritePCmpIStrM, [SLM_FPC_RSV0], 13, [13]>;
+defm : SLMWriteResPair<WritePCmpIStrM, [SLM_FPC_RSV0], 13, [13], 5, 1>;
// Packed Compare Explicit Length Strings, Return Mask
-defm : SLMWriteResPair<WritePCmpEStrM, [SLM_FPC_RSV0], 17, [17]>;
+defm : SLMWriteResPair<WritePCmpEStrM, [SLM_FPC_RSV0], 17, [17], 8, 1>;
// Packed Compare Implicit Length Strings, Return Index
-defm : SLMWriteResPair<WritePCmpIStrI, [SLM_FPC_RSV0], 17, [17]>;
+defm : SLMWriteResPair<WritePCmpIStrI, [SLM_FPC_RSV0], 17, [17], 6, 1>;
// Packed Compare Explicit Length Strings, Return Index
-defm : SLMWriteResPair<WritePCmpEStrI, [SLM_FPC_RSV0], 21, [21]>;
+defm : SLMWriteResPair<WritePCmpEStrI, [SLM_FPC_RSV0], 21, [21], 9, 1>;
// MOVMSK Instructions.
def : WriteRes<WriteFMOVMSK, [SLM_FPC_RSV1]> { let Latency = 4; }
@@ -450,7 +453,7 @@ defm : SLMWriteResPair<WriteAESIMC, [SLM_FPC_RSV0], 8, [5]>;
defm : SLMWriteResPair<WriteAESKeyGen, [SLM_FPC_RSV0], 8, [5]>;
// Carry-less multiplication instructions.
-defm : SLMWriteResPair<WriteCLMul, [SLM_FPC_RSV0], 10, [10]>;
+defm : SLMWriteResPair<WriteCLMul, [SLM_FPC_RSV0], 10, [10], 8, 1>;
def : WriteRes<WriteSystem, [SLM_FPC_RSV0]> { let Latency = 100; }
def : WriteRes<WriteMicrocoded, [SLM_FPC_RSV0]> { let Latency = 100; }
@@ -462,15 +465,19 @@ def : WriteRes<WriteNop, []>;
def SLMWriteResGroup1rr : SchedWriteRes<[SLM_FPC_RSV01]> {
let Latency = 4;
let NumMicroOps = 2;
- let ResourceCycles = [4];
+ let ResourceCycles = [8];
}
-def: InstRW<[SLMWriteResGroup1rr], (instrs PADDQrr, PSUBQrr, PCMPEQQrr)>;
+def: InstRW<[SLMWriteResGroup1rr], (instrs MMX_PADDQirr, PADDQrr,
+ MMX_PSUBQirr, PSUBQrr,
+ PCMPEQQrr)>;
def SLMWriteResGroup1rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV01]> {
let Latency = 7;
let NumMicroOps = 3;
- let ResourceCycles = [1,4];
+ let ResourceCycles = [1,8];
}
-def: InstRW<[SLMWriteResGroup1rm], (instrs PADDQrm, PSUBQrm, PCMPEQQrm)>;
+def: InstRW<[SLMWriteResGroup1rm], (instrs MMX_PADDQirm, PADDQrm,
+ MMX_PSUBQirm, PSUBQrm,
+ PCMPEQQrm)>;
} // SchedModel
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 12f8e7cc76f7..8e30e5e10ca8 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -256,8 +256,13 @@ defm : ZnWriteResPair<WriteIDiv32, [ZnALU2, ZnDivider], 25, [1,25], 2>;
defm : ZnWriteResPair<WriteIDiv64, [ZnALU2, ZnDivider], 41, [1,41], 2>;
// IMULH
-def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{
- let Latency = 4;
+def ZnWriteIMulH : WriteRes<WriteIMulH, [ZnMultiplier]>{
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+def : WriteRes<WriteIMulHLd, [ZnMultiplier]> {
+ let Latency = !add(ZnWriteIMulH.Latency, Znver1Model.LoadLatency);
+ let NumMicroOps = ZnWriteIMulH.NumMicroOps;
}
// Floating point operations
@@ -659,32 +664,10 @@ def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
}
def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>;
-// MULX.
-// r32,r32,r32.
-def ZnWriteMulX32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> {
- let Latency = 3;
- let ResourceCycles = [1, 2];
-}
-def : InstRW<[ZnWriteMulX32], (instrs MULX32rr)>;
-
-// r32,r32,m32.
-def ZnWriteMulX32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
- let Latency = 8;
- let ResourceCycles = [1, 2, 2];
-}
-def : InstRW<[ZnWriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
-
-// r64,r64,r64.
-def ZnWriteMulX64 : SchedWriteRes<[ZnALU1]> {
- let Latency = 3;
-}
-def : InstRW<[ZnWriteMulX64], (instrs MULX64rr)>;
-
-// r64,r64,m64.
-def ZnWriteMulX64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> {
- let Latency = 8;
-}
-def : InstRW<[ZnWriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
+// MULX
+// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
+defm : ZnWriteResPair<WriteMULX32, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
+defm : ZnWriteResPair<WriteMULX64, [ZnALU1, ZnMultiplier], 3, [1, 1], 1, 5, 0>;
//-- Control transfer instructions --//
@@ -714,7 +697,7 @@ def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
def ZnWriteRET : SchedWriteRes<[ZnALU03]> {
let NumMicroOps = 2;
}
-def : InstRW<[ZnWriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
+def : InstRW<[ZnWriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
"IRET(16|32|64)")>;
//-- Logic instructions --//
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index 5b4b151d2938..a83c89e2f28a 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -243,10 +243,17 @@ defm : Zn2WriteResPair<WriteIDiv32, [Zn2ALU2, Zn2Divider], 25, [1,25], 2>;
defm : Zn2WriteResPair<WriteIDiv64, [Zn2ALU2, Zn2Divider], 41, [1,41], 2>;
// IMULH
-def : WriteRes<WriteIMulH, [Zn2ALU1, Zn2Multiplier]>{
- let Latency = 4;
+def Zn2WriteIMulH : WriteRes<WriteIMulH, [Zn2Multiplier]>{
+ let Latency = 3;
+ let NumMicroOps = 0;
}
+def : WriteRes<WriteIMulHLd, [Zn2Multiplier]>{
+ let Latency = !add(Zn2WriteIMulH.Latency, Znver2Model.LoadLatency);
+ let NumMicroOps = Zn2WriteIMulH.NumMicroOps;
+}
+
+
// Floating point operations
defm : X86WriteRes<WriteFLoad, [Zn2AGU], 8, [1], 1>;
defm : X86WriteRes<WriteFLoadX, [Zn2AGU], 8, [1], 1>;
@@ -658,31 +665,9 @@ def : SchedAlias<WriteIMul64ImmLd, Zn2WriteMul64Ld>;
def : SchedAlias<WriteIMul64RegLd, Zn2WriteMul64Ld>;
// MULX.
-// r32,r32,r32.
-def Zn2WriteMulX32 : SchedWriteRes<[Zn2ALU1, Zn2Multiplier]> {
- let Latency = 3;
- let ResourceCycles = [1, 2];
-}
-def : InstRW<[Zn2WriteMulX32], (instrs MULX32rr)>;
-
-// r32,r32,m32.
-def Zn2WriteMulX32Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
- let Latency = 7;
- let ResourceCycles = [1, 2, 2];
-}
-def : InstRW<[Zn2WriteMulX32Ld, ReadAfterLd], (instrs MULX32rm)>;
-
-// r64,r64,r64.
-def Zn2WriteMulX64 : SchedWriteRes<[Zn2ALU1]> {
- let Latency = 3;
-}
-def : InstRW<[Zn2WriteMulX64], (instrs MULX64rr)>;
-
-// r64,r64,m64.
-def Zn2WriteMulX64Ld : SchedWriteRes<[Zn2AGU, Zn2ALU1, Zn2Multiplier]> {
- let Latency = 7;
-}
-def : InstRW<[Zn2WriteMulX64Ld, ReadAfterLd], (instrs MULX64rm)>;
+// Numbers are based on the AMD SOG for Family 17h - Instruction Latencies.
+defm : Zn2WriteResPair<WriteMULX32, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
+defm : Zn2WriteResPair<WriteMULX64, [Zn2ALU1, Zn2Multiplier], 3, [1, 1], 1, 4, 0>;
//-- Control transfer instructions --//
@@ -712,7 +697,7 @@ def : InstRW<[WriteMicrocoded], (instregex "CALL(16|32)m")>;
def Zn2WriteRET : SchedWriteRes<[Zn2ALU03]> {
let NumMicroOps = 2;
}
-def : InstRW<[Zn2WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)",
+def : InstRW<[Zn2WriteRET], (instregex "RET(16|32|64)", "LRET(16|32|64)",
"IRET(16|32|64)")>;
//-- Logic instructions --//
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td
index 4a91a91a0f0f..be07c069aae1 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver3.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td
@@ -617,42 +617,15 @@ defm : Zn3WriteResIntPair<WriteIMul16, [Zn3Multiplier], 3, [3], 3, /*LoadUOps=*/
defm : Zn3WriteResIntPair<WriteIMul16Imm, [Zn3Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul16Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul32, [Zn3Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
-
-def Zn3MULX32rr : SchedWriteRes<[Zn3Multiplier]> {
- let Latency = 4;
- let ResourceCycles = [1];
- let NumMicroOps = 2;
-}
-def : InstRW<[Zn3MULX32rr, WriteIMulH], (instrs MULX32rr)>;
-
-def Zn3MULX32rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3MULX32rr.Latency);
- let ResourceCycles = [1, 1, 2];
- let NumMicroOps = Zn3MULX32rr.NumMicroOps;
-}
-def : InstRW<[Zn3MULX32rm, WriteIMulH], (instrs MULX32rm)>;
-
+defm : Zn3WriteResIntPair<WriteMULX32, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
defm : Zn3WriteResIntPair<WriteIMul32Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul32Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
defm : Zn3WriteResIntPair<WriteIMul64, [Zn3Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
-
-def Zn3MULX64rr : SchedWriteRes<[Zn3Multiplier]> {
- let Latency = 4;
- let ResourceCycles = [1];
- let NumMicroOps = 2;
-}
-def : InstRW<[Zn3MULX64rr, WriteIMulH], (instrs MULX64rr)>;
-
-def Zn3MULX64rm : SchedWriteRes<[Zn3AGU012, Zn3Load, Zn3Multiplier]> {
- let Latency = !add(Znver3Model.LoadLatency, Zn3MULX64rr.Latency);
- let ResourceCycles = [1, 1, 2];
- let NumMicroOps = Zn3MULX64rr.NumMicroOps;
-}
-def : InstRW<[Zn3MULX64rm, WriteIMulH], (instrs MULX64rm)>;
-
+defm : Zn3WriteResIntPair<WriteMULX64, [Zn3Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
defm : Zn3WriteResIntPair<WriteIMul64Imm, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
defm : Zn3WriteResIntPair<WriteIMul64Reg, [Zn3Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
-defm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part.
+defm : Zn3WriteResInt<WriteIMulHLd, [], !add(4, Znver3Model.LoadLatency), [], 0>; // Integer multiplication, high part.
+defm : Zn3WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part.
defm : Zn3WriteResInt<WriteBSWAP32, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap.
defm : Zn3WriteResInt<WriteBSWAP64, [Zn3ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap.
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index a3238e6317a0..5e59081c63b0 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -71,9 +71,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
// Check to see if there is a specialized entry-point for memory zeroing.
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Val);
- if (const char *bzeroName = (ValC && ValC->isNullValue())
- ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
- : nullptr) {
+ if (const char *bzeroName =
+ (ValC && ValC->isZero())
+ ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO)
+ : nullptr) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 14a3fea240e7..1a97904e9bc9 100644
--- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -100,7 +100,7 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
// Only treat the element as UNDEF if all bits are UNDEF, otherwise
// treat it as zero.
- if (EltUndef.isAllOnesValue()) {
+ if (EltUndef.isAllOnes()) {
UndefElts.setBit(i);
RawMask[i] = 0;
continue;
diff --git a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
index fcaf7c86128a..83a4a025f518 100644
--- a/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp
@@ -850,11 +850,9 @@ getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII,
void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
- for (auto MII = MBB.instr_begin(), MIE = MBB.instr_end(); MII != MIE;) {
- // Grab a reference and increment the iterator so we can remove this
- // instruction if needed without disturbing the iteration.
- MachineInstr &MI = *MII++;
-
+ // We use make_early_inc_range here so we can remove instructions if needed
+ // without disturbing the iteration.
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB.instrs())) {
// Must either be a call or a branch.
if (!MI.isCall() && !MI.isBranch())
continue;
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 4af0ac238f59..a3d4d04b1e0d 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -67,6 +67,13 @@ X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
unsigned char
X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
+ // Tagged globals have non-zero upper bits, which makes direct references
+ // require a 64-bit immediate. On the small code model this causes relocation
+ // errors, so we go through the GOT instead.
+ if (AllowTaggedGlobals && TM.getCodeModel() == CodeModel::Small && GV &&
+ !isa<Function>(GV))
+ return X86II::MO_GOTPCREL_NORELAX;
+
// If we're not PIC, it's not very interesting.
if (!isPositionIndependent())
return X86II::MO_NO_FLAG;
@@ -143,6 +150,9 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
return classifyLocalReference(GV);
if (isTargetCOFF()) {
+ // ExternalSymbolSDNode like _tls_index.
+ if (!GV)
+ return X86II::MO_NO_FLAG;
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
return X86II::MO_COFFSTUB;
@@ -157,6 +167,11 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
// reference for them.
if (TM.getCodeModel() == CodeModel::Large)
return isTargetELF() ? X86II::MO_GOT : X86II::MO_NO_FLAG;
+ // Tagged globals have non-zero upper bits, which makes direct references
+ // require a 64-bit immediate. So we can't let the linker relax the
+ // relocation to a 32-bit RIP-relative direct reference.
+ if (AllowTaggedGlobals && GV && !isa<Function>(GV))
+ return X86II::MO_GOTPCREL_NORELAX;
return X86II::MO_GOTPCREL;
}
@@ -184,10 +199,13 @@ X86Subtarget::classifyGlobalFunctionReference(const GlobalValue *GV,
if (TM.shouldAssumeDSOLocal(M, GV))
return X86II::MO_NO_FLAG;
- // Functions on COFF can be non-DSO local for two reasons:
+ // Functions on COFF can be non-DSO local for three reasons:
+ // - They are intrinsic functions (!GV)
// - They are marked dllimport
// - They are extern_weak, and a stub is needed
if (isTargetCOFF()) {
+ if (!GV)
+ return X86II::MO_NO_FLAG;
if (GV->hasDLLImportStorageClass())
return X86II::MO_DLLIMPORT;
return X86II::MO_COFFSTUB;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 935dbd882a44..9da54dc2e9b7 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -54,8 +54,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
// are not a good idea. We should be migrating away from these.
enum X86ProcFamilyEnum {
Others,
- IntelAtom,
- IntelSLM
+ IntelAtom
};
enum X86SSEEnum {
@@ -353,6 +352,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Processor has AVX-512 Vector Length eXtenstions
bool HasVLX = false;
+ /// Processor has AVX-512 16 bit floating-point extenstions
+ bool HasFP16 = false;
+
/// Processor has PKU extenstions
bool HasPKU = false;
@@ -425,6 +427,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Processor supports User Level Interrupt instructions
bool HasUINTR = false;
+ /// Enable SSE4.2 CRC32 instruction (Used when SSE4.2 is supported but
+ /// function is GPR only)
+ bool HasCRC32 = false;
+
/// Processor has a single uop BEXTR implementation.
bool HasFastBEXTR = false;
@@ -469,6 +475,10 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// loads from being used maliciously.
bool UseLVILoadHardening = false;
+ /// Use an instruction sequence for taking the address of a global that allows
+ /// a memory tag in the upper address bits.
+ bool AllowTaggedGlobals = false;
+
/// Use software floating point for code generation.
bool UseSoftFloat = false;
@@ -495,6 +505,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Indicates target prefers AVX512 mask registers.
bool PreferMaskRegisters = false;
+ /// Use Silvermont specific arithmetic costs.
+ bool UseSLMArithCosts = false;
+
/// Use Goldmont specific floating point div/sqrt costs.
bool UseGLMDivSqrtCosts = false;
@@ -742,6 +755,7 @@ public:
bool hasDQI() const { return HasDQI; }
bool hasBWI() const { return HasBWI; }
bool hasVLX() const { return HasVLX; }
+ bool hasFP16() const { return HasFP16; }
bool hasPKU() const { return HasPKU; }
bool hasVNNI() const { return HasVNNI; }
bool hasBF16() const { return HasBF16; }
@@ -763,6 +777,7 @@ public:
bool hasSERIALIZE() const { return HasSERIALIZE; }
bool hasTSXLDTRK() const { return HasTSXLDTRK; }
bool hasUINTR() const { return HasUINTR; }
+ bool hasCRC32() const { return HasCRC32; }
bool useRetpolineIndirectCalls() const { return UseRetpolineIndirectCalls; }
bool useRetpolineIndirectBranches() const {
return UseRetpolineIndirectBranches;
@@ -784,8 +799,10 @@ public:
}
bool preferMaskRegisters() const { return PreferMaskRegisters; }
+ bool useSLMArithCosts() const { return UseSLMArithCosts; }
bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; }
bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; }
+ bool allowTaggedGlobals() const { return AllowTaggedGlobals; }
bool useLVILoadHardening() const { return UseLVILoadHardening; }
bool useSpeculativeExecutionSideEffectSuppression() const {
return UseSpeculativeExecutionSideEffectSuppression;
@@ -819,7 +836,6 @@ public:
/// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
- bool isSLM() const { return X86ProcFamily == IntelSLM; }
bool useSoftFloat() const { return UseSoftFloat; }
bool useAA() const override { return UseAA; }
@@ -933,6 +949,31 @@ public:
/// Return true if the subtarget allows calls to immediate address.
bool isLegalToCallImmediateAddr() const;
+ /// Return whether FrameLowering should always set the "extended frame
+ /// present" bit in FP, or set it based on a symbol in the runtime.
+ bool swiftAsyncContextIsDynamicallySet() const {
+ // Older OS versions (particularly system unwinders) are confused by the
+ // Swift extended frame, so when building code that might be run on them we
+ // must dynamically query the concurrency library to determine whether
+ // extended frames should be flagged as present.
+ const Triple &TT = getTargetTriple();
+
+ unsigned Major, Minor, Micro;
+ TT.getOSVersion(Major, Minor, Micro);
+ switch(TT.getOS()) {
+ default:
+ return false;
+ case Triple::IOS:
+ case Triple::TvOS:
+ return Major < 15;
+ case Triple::WatchOS:
+ return Major < 8;
+ case Triple::MacOSX:
+ case Triple::Darwin:
+ return Major < 12;
+ }
+ }
+
/// If we are using indirect thunks, we need to expand indirectbr to avoid it
/// lowering to an actual indirect jump.
bool enableIndirectBrExpand() const override {
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index ee8cff3e008b..336985f3bf9d 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -39,11 +39,11 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/CFGuard.h"
@@ -503,7 +503,7 @@ void X86PassConfig::addPreRegAlloc() {
addPass(createX86SpeculativeLoadHardeningPass());
addPass(createX86FlagsCopyLoweringPass());
- addPass(createX86WinAllocaExpander());
+ addPass(createX86DynAllocaExpander());
if (getOptLevel() != CodeGenOpt::None) {
addPass(createX86PreTileConfigPass());
@@ -585,6 +585,9 @@ void X86PassConfig::addPreEmitPass2() {
addPass(createEHContGuardCatchretPass());
}
addPass(createX86LoadValueInjectionRetHardeningPass());
+
+ // Insert pseudo probe annotation for callsite profiling
+ addPass(createPseudoProbeInserter());
}
bool X86PassConfig::addPostFastRegAllocRewrite() {
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 971c430d73b1..06dacb638d16 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -206,6 +206,87 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ if (ISD == ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
+ LT.second.getScalarType() == MVT::i32) {
+ // Check if the operands can be represented as a smaller datatype.
+ bool Op1Signed = false, Op2Signed = false;
+ unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
+ unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed);
+ unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
+
+ // If both are representable as i15 and at least one is constant,
+ // zero-extended, or sign-extended from vXi16 (or less pre-SSE41) then we
+ // can treat this as PMADDWD which has the same costs as a vXi16 multiply.
+ if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) {
+ bool Op1Constant =
+ isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
+ bool Op2Constant =
+ isa<ConstantDataVector>(Args[1]) || isa<ConstantVector>(Args[1]);
+ bool Op1Sext = isa<SExtInst>(Args[0]) &&
+ (Op1MinSize == 15 || (Op1MinSize < 15 && !ST->hasSSE41()));
+ bool Op2Sext = isa<SExtInst>(Args[1]) &&
+ (Op2MinSize == 15 || (Op2MinSize < 15 && !ST->hasSSE41()));
+
+ bool IsZeroExtended = !Op1Signed || !Op2Signed;
+ bool IsConstant = Op1Constant || Op2Constant;
+ bool IsSext = Op1Sext || Op2Sext;
+ if (IsConstant || IsZeroExtended || IsSext)
+ LT.second =
+ MVT::getVectorVT(MVT::i16, 2 * LT.second.getVectorNumElements());
+ }
+ }
+
+ if ((ISD == ISD::MUL || ISD == ISD::SDIV || ISD == ISD::SREM ||
+ ISD == ISD::UDIV || ISD == ISD::UREM) &&
+ (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
+ Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
+ // Vector multiply by pow2 will be simplified to shifts.
+ if (ISD == ISD::MUL) {
+ InstructionCost Cost = getArithmeticInstrCost(
+ Instruction::Shl, Ty, CostKind, Op1Info, Op2Info,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None);
+ return Cost;
+ }
+
+ if (ISD == ISD::SDIV || ISD == ISD::SREM) {
+ // On X86, vector signed division by constants power-of-two are
+ // normally expanded to the sequence SRA + SRL + ADD + SRA.
+ // The OperandValue properties may not be the same as that of the previous
+ // operation; conservatively assume OP_None.
+ InstructionCost Cost =
+ 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+
+ if (ISD == ISD::SREM) {
+ // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
+ Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
+ Op2Info);
+ Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
+ Op2Info);
+ }
+
+ return Cost;
+ }
+
+ // Vector unsigned division/remainder will be simplified to shifts/masks.
+ if (ISD == ISD::UDIV)
+ return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ // UREM
+ return getArithmeticInstrCost(Instruction::And, Ty, CostKind, Op1Info,
+ Op2Info, TargetTransformInfo::OP_None,
+ TargetTransformInfo::OP_None);
+ }
+
static const CostTblEntry GLMCostTable[] = {
{ ISD::FDIV, MVT::f32, 18 }, // divss
{ ISD::FDIV, MVT::v4f32, 35 }, // divps
@@ -241,9 +322,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SUB, MVT::v2i64, 4 },
};
- if (ST->isSLM()) {
+ if (ST->useSLMArithCosts()) {
if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) {
// Check if the operands can be shrinked into a smaller datatype.
+ // TODO: Merge this into generiic vXi32 MUL patterns above.
bool Op1Signed = false;
unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
bool Op2Signed = false;
@@ -268,54 +350,6 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
}
}
- if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV ||
- ISD == ISD::UREM) &&
- (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
- Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) &&
- Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
- if (ISD == ISD::SDIV || ISD == ISD::SREM) {
- // On X86, vector signed division by constants power-of-two are
- // normally expanded to the sequence SRA + SRL + ADD + SRA.
- // The OperandValue properties may not be the same as that of the previous
- // operation; conservatively assume OP_None.
- InstructionCost Cost =
- 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info,
- Op2Info, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info,
- Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info,
- Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
-
- if (ISD == ISD::SREM) {
- // For SREM: (X % C) is the equivalent of (X - (X/C)*C)
- Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info,
- Op2Info);
- Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info,
- Op2Info);
- }
-
- return Cost;
- }
-
- // Vector unsigned division/remainder will be simplified to shifts/masks.
- if (ISD == ISD::UDIV)
- return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind,
- Op1Info, Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
-
- else // UREM
- return getArithmeticInstrCost(Instruction::And, Ty, CostKind,
- Op1Info, Op2Info,
- TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None);
- }
-
static const CostTblEntry AVX512BWUniformConstCostTable[] = {
{ ISD::SHL, MVT::v64i8, 2 }, // psllw + pand.
{ ISD::SRL, MVT::v64i8, 2 }, // psrlw + pand.
@@ -1005,6 +1039,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ADD, MVT::i64, 1 }, // Core (Merom) from http://www.agner.org/
{ ISD::SUB, MVT::i64, 1 }, // Core (Merom) from http://www.agner.org/
+ { ISD::MUL, MVT::i64, 2 }, // Nehalem from http://www.agner.org/
};
if (ST->is64Bit())
@@ -1121,6 +1156,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
return SubLT.first;
}
+
+ // If the insertion isn't aligned, treat it like a 2-op shuffle.
+ Kind = TTI::SK_PermuteTwoSrc;
}
// Handle some common (illegal) sub-vector types as they are often very cheap
@@ -1196,6 +1234,29 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.first = NumOfDests * NumOfShufflesPerDest;
}
+ static const CostTblEntry AVX512FP16ShuffleTbl[] = {
+ {TTI::SK_Broadcast, MVT::v32f16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v16f16, 1}, // vpbroadcastw
+ {TTI::SK_Broadcast, MVT::v8f16, 1}, // vpbroadcastw
+
+ {TTI::SK_Reverse, MVT::v32f16, 2}, // vpermw
+ {TTI::SK_Reverse, MVT::v16f16, 2}, // vpermw
+ {TTI::SK_Reverse, MVT::v8f16, 1}, // vpshufb
+
+ {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, // vpermw
+ {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // vpshufb
+
+ {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v16f16, 2}, // vpermt2w
+ {TTI::SK_PermuteTwoSrc, MVT::v8f16, 2} // vpermt2w
+ };
+
+ if (!ST->useSoftFloat() && ST->hasFP16())
+ if (const auto *Entry =
+ CostTableLookup(AVX512FP16ShuffleTbl, Kind, LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry AVX512VBMIShuffleTbl[] = {
{TTI::SK_Reverse, MVT::v64i8, 1}, // vpermb
{TTI::SK_Reverse, MVT::v32i8, 1}, // vpermb
@@ -1533,6 +1594,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 },
+ { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v64i1, 1 },
{ ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 },
// Mask zero extend is a sext + shift.
@@ -1546,6 +1608,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 },
+ { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v64i1, 2 },
{ ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 },
{ ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 },
@@ -1557,12 +1620,14 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, // vpmovwb
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, // widen to zmm
+ { ISD::TRUNCATE, MVT::v8i1, MVT::v16i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, // vpmovwb
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, // widen to zmm
{ ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 },
+ { ISD::TRUNCATE, MVT::v64i1, MVT::v32i16, 2 },
{ ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 },
};
@@ -1606,17 +1671,26 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, // vpmovdb
{ ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 2 }, // vpmovdb
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 }, // vpmovdb
- { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, // vpmovdb
+ { ISD::TRUNCATE, MVT::v32i8, MVT::v16i32, 2 }, // vpmovdb
+ { ISD::TRUNCATE, MVT::v64i8, MVT::v16i32, 2 }, // vpmovdb
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, // vpmovdw
+ { ISD::TRUNCATE, MVT::v32i16, MVT::v16i32, 2 }, // vpmovdw
{ ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 2 }, // vpmovqb
{ ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1 }, // vpshufb
{ ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 }, // vpmovqb
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v8i64, 2 }, // vpmovqb
+ { ISD::TRUNCATE, MVT::v32i8, MVT::v8i64, 2 }, // vpmovqb
+ { ISD::TRUNCATE, MVT::v64i8, MVT::v8i64, 2 }, // vpmovqb
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 2 }, // vpmovqw
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v8i64, 2 }, // vpmovqw
+ { ISD::TRUNCATE, MVT::v32i16, MVT::v8i64, 2 }, // vpmovqw
{ ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, // vpmovqd
{ ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, // zmm vpmovqd
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 5 },// 2*vpmovqd+concat+vpmovdb
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, // extend to v16i32
{ ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 8 },
+ { ISD::TRUNCATE, MVT::v64i8, MVT::v32i16, 8 },
// Sign extend is zmm vpternlogd+vptruncdb.
// Zero extend is zmm broadcast load+vptruncdw.
@@ -1889,6 +1963,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 4 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 4 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v8i16, 1 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 1 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 1 },
@@ -1964,6 +2040,8 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 },
{ ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 },
+ { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 },
+ { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
{ ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, // and+extract+packuswb
{ ISD::TRUNCATE, MVT::v16i8, MVT::v8i32, 5 },
{ ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 },
@@ -2365,13 +2443,21 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
assert(ISD && "Invalid opcode");
unsigned ExtraCost = 0;
- if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) {
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
// Some vector comparison predicates cost extra instructions.
+ // TODO: Should we invert this and assume worst case cmp costs
+ // and reduce for particular predicates?
if (MTy.isVector() &&
!((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) ||
(ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) ||
ST->hasBWI())) {
- switch (cast<CmpInst>(I)->getPredicate()) {
+ // Fallback to I if a specific predicate wasn't specified.
+ CmpInst::Predicate Pred = VecPred;
+ if (I && (Pred == CmpInst::BAD_ICMP_PREDICATE ||
+ Pred == CmpInst::BAD_FCMP_PREDICATE))
+ Pred = cast<CmpInst>(I)->getPredicate();
+
+ switch (Pred) {
case CmpInst::Predicate::ICMP_NE:
// xor(cmpeq(x,y),-1)
ExtraCost = 1;
@@ -2399,6 +2485,11 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
ExtraCost = 3;
}
break;
+ case CmpInst::Predicate::BAD_ICMP_PREDICATE:
+ case CmpInst::Predicate::BAD_FCMP_PREDICATE:
+ // Assume worst case scenario and add the maximum extra cost.
+ ExtraCost = 3;
+ break;
default:
break;
}
@@ -2502,7 +2593,7 @@ InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
{ ISD::SELECT, MVT::v4f32, 3 }, // andps + andnps + orps
};
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
return LT.first * (ExtraCost + Entry->Cost);
@@ -2556,6 +2647,22 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
// TODO: Overflow intrinsics (*ADDO, *SUBO, *MULO) with vector types are not
// specialized in these tables yet.
+ static const CostTblEntry AVX512BITALGCostTbl[] = {
+ { ISD::CTPOP, MVT::v32i16, 1 },
+ { ISD::CTPOP, MVT::v64i8, 1 },
+ { ISD::CTPOP, MVT::v16i16, 1 },
+ { ISD::CTPOP, MVT::v32i8, 1 },
+ { ISD::CTPOP, MVT::v8i16, 1 },
+ { ISD::CTPOP, MVT::v16i8, 1 },
+ };
+ static const CostTblEntry AVX512VPOPCNTDQCostTbl[] = {
+ { ISD::CTPOP, MVT::v8i64, 1 },
+ { ISD::CTPOP, MVT::v16i32, 1 },
+ { ISD::CTPOP, MVT::v4i64, 1 },
+ { ISD::CTPOP, MVT::v8i32, 1 },
+ { ISD::CTPOP, MVT::v2i64, 1 },
+ { ISD::CTPOP, MVT::v4i32, 1 },
+ };
static const CostTblEntry AVX512CDCostTbl[] = {
{ ISD::CTLZ, MVT::v8i64, 1 },
{ ISD::CTLZ, MVT::v16i32, 1 },
@@ -2573,10 +2680,10 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostTblEntry AVX512BWCostTbl[] = {
{ ISD::ABS, MVT::v32i16, 1 },
{ ISD::ABS, MVT::v64i8, 1 },
- { ISD::BITREVERSE, MVT::v8i64, 5 },
- { ISD::BITREVERSE, MVT::v16i32, 5 },
- { ISD::BITREVERSE, MVT::v32i16, 5 },
- { ISD::BITREVERSE, MVT::v64i8, 5 },
+ { ISD::BITREVERSE, MVT::v8i64, 3 },
+ { ISD::BITREVERSE, MVT::v16i32, 3 },
+ { ISD::BITREVERSE, MVT::v32i16, 3 },
+ { ISD::BITREVERSE, MVT::v64i8, 2 },
{ ISD::BSWAP, MVT::v8i64, 1 },
{ ISD::BSWAP, MVT::v16i32, 1 },
{ ISD::BSWAP, MVT::v32i16, 1 },
@@ -2612,8 +2719,8 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostTblEntry AVX512CostTbl[] = {
{ ISD::ABS, MVT::v8i64, 1 },
{ ISD::ABS, MVT::v16i32, 1 },
- { ISD::ABS, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::ABS, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::ABS, MVT::v32i16, 2 },
+ { ISD::ABS, MVT::v64i8, 2 },
{ ISD::ABS, MVT::v4i64, 1 },
{ ISD::ABS, MVT::v2i64, 1 },
{ ISD::BITREVERSE, MVT::v8i64, 36 },
@@ -2637,26 +2744,26 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::CTTZ, MVT::v64i8, 18 },
{ ISD::SMAX, MVT::v8i64, 1 },
{ ISD::SMAX, MVT::v16i32, 1 },
- { ISD::SMAX, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMAX, MVT::v32i16, 2 },
+ { ISD::SMAX, MVT::v64i8, 2 },
{ ISD::SMAX, MVT::v4i64, 1 },
{ ISD::SMAX, MVT::v2i64, 1 },
{ ISD::SMIN, MVT::v8i64, 1 },
{ ISD::SMIN, MVT::v16i32, 1 },
- { ISD::SMIN, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SMIN, MVT::v32i16, 2 },
+ { ISD::SMIN, MVT::v64i8, 2 },
{ ISD::SMIN, MVT::v4i64, 1 },
{ ISD::SMIN, MVT::v2i64, 1 },
{ ISD::UMAX, MVT::v8i64, 1 },
{ ISD::UMAX, MVT::v16i32, 1 },
- { ISD::UMAX, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::UMAX, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMAX, MVT::v32i16, 2 },
+ { ISD::UMAX, MVT::v64i8, 2 },
{ ISD::UMAX, MVT::v4i64, 1 },
{ ISD::UMAX, MVT::v2i64, 1 },
{ ISD::UMIN, MVT::v8i64, 1 },
{ ISD::UMIN, MVT::v16i32, 1 },
- { ISD::UMIN, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::UMIN, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::UMIN, MVT::v32i16, 2 },
+ { ISD::UMIN, MVT::v64i8, 2 },
{ ISD::UMIN, MVT::v4i64, 1 },
{ ISD::UMIN, MVT::v2i64, 1 },
{ ISD::USUBSAT, MVT::v16i32, 2 }, // pmaxud + psubd
@@ -2667,14 +2774,14 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::UADDSAT, MVT::v2i64, 3 }, // not + pminuq + paddq
{ ISD::UADDSAT, MVT::v4i64, 3 }, // not + pminuq + paddq
{ ISD::UADDSAT, MVT::v8i64, 3 }, // not + pminuq + paddq
- { ISD::SADDSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SADDSAT, MVT::v64i8, 2 }, // FIXME: include split
- { ISD::SSUBSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::SSUBSAT, MVT::v64i8, 2 }, // FIXME: include split
- { ISD::UADDSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::UADDSAT, MVT::v64i8, 2 }, // FIXME: include split
- { ISD::USUBSAT, MVT::v32i16, 2 }, // FIXME: include split
- { ISD::USUBSAT, MVT::v64i8, 2 }, // FIXME: include split
+ { ISD::SADDSAT, MVT::v32i16, 2 },
+ { ISD::SADDSAT, MVT::v64i8, 2 },
+ { ISD::SSUBSAT, MVT::v32i16, 2 },
+ { ISD::SSUBSAT, MVT::v64i8, 2 },
+ { ISD::UADDSAT, MVT::v32i16, 2 },
+ { ISD::UADDSAT, MVT::v64i8, 2 },
+ { ISD::USUBSAT, MVT::v32i16, 2 },
+ { ISD::USUBSAT, MVT::v64i8, 2 },
{ ISD::FMAXNUM, MVT::f32, 2 },
{ ISD::FMAXNUM, MVT::v4f32, 2 },
{ ISD::FMAXNUM, MVT::v8f32, 2 },
@@ -2703,25 +2810,41 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::v8i32, 1 },
{ ISD::ABS, MVT::v16i16, 1 },
{ ISD::ABS, MVT::v32i8, 1 },
- { ISD::BITREVERSE, MVT::v4i64, 5 },
- { ISD::BITREVERSE, MVT::v8i32, 5 },
- { ISD::BITREVERSE, MVT::v16i16, 5 },
- { ISD::BITREVERSE, MVT::v32i8, 5 },
+ { ISD::BITREVERSE, MVT::v2i64, 3 },
+ { ISD::BITREVERSE, MVT::v4i64, 3 },
+ { ISD::BITREVERSE, MVT::v4i32, 3 },
+ { ISD::BITREVERSE, MVT::v8i32, 3 },
+ { ISD::BITREVERSE, MVT::v8i16, 3 },
+ { ISD::BITREVERSE, MVT::v16i16, 3 },
+ { ISD::BITREVERSE, MVT::v16i8, 3 },
+ { ISD::BITREVERSE, MVT::v32i8, 3 },
{ ISD::BSWAP, MVT::v4i64, 1 },
{ ISD::BSWAP, MVT::v8i32, 1 },
{ ISD::BSWAP, MVT::v16i16, 1 },
- { ISD::CTLZ, MVT::v4i64, 23 },
- { ISD::CTLZ, MVT::v8i32, 18 },
- { ISD::CTLZ, MVT::v16i16, 14 },
- { ISD::CTLZ, MVT::v32i8, 9 },
- { ISD::CTPOP, MVT::v4i64, 7 },
- { ISD::CTPOP, MVT::v8i32, 11 },
- { ISD::CTPOP, MVT::v16i16, 9 },
- { ISD::CTPOP, MVT::v32i8, 6 },
- { ISD::CTTZ, MVT::v4i64, 10 },
- { ISD::CTTZ, MVT::v8i32, 14 },
- { ISD::CTTZ, MVT::v16i16, 12 },
- { ISD::CTTZ, MVT::v32i8, 9 },
+ { ISD::CTLZ, MVT::v2i64, 7 },
+ { ISD::CTLZ, MVT::v4i64, 7 },
+ { ISD::CTLZ, MVT::v4i32, 5 },
+ { ISD::CTLZ, MVT::v8i32, 5 },
+ { ISD::CTLZ, MVT::v8i16, 4 },
+ { ISD::CTLZ, MVT::v16i16, 4 },
+ { ISD::CTLZ, MVT::v16i8, 3 },
+ { ISD::CTLZ, MVT::v32i8, 3 },
+ { ISD::CTPOP, MVT::v2i64, 3 },
+ { ISD::CTPOP, MVT::v4i64, 3 },
+ { ISD::CTPOP, MVT::v4i32, 7 },
+ { ISD::CTPOP, MVT::v8i32, 7 },
+ { ISD::CTPOP, MVT::v8i16, 3 },
+ { ISD::CTPOP, MVT::v16i16, 3 },
+ { ISD::CTPOP, MVT::v16i8, 2 },
+ { ISD::CTPOP, MVT::v32i8, 2 },
+ { ISD::CTTZ, MVT::v2i64, 4 },
+ { ISD::CTTZ, MVT::v4i64, 4 },
+ { ISD::CTTZ, MVT::v4i32, 7 },
+ { ISD::CTTZ, MVT::v8i32, 7 },
+ { ISD::CTTZ, MVT::v8i16, 4 },
+ { ISD::CTTZ, MVT::v16i16, 4 },
+ { ISD::CTTZ, MVT::v16i8, 3 },
+ { ISD::CTTZ, MVT::v32i8, 3 },
{ ISD::SADDSAT, MVT::v16i16, 1 },
{ ISD::SADDSAT, MVT::v32i8, 1 },
{ ISD::SMAX, MVT::v8i32, 1 },
@@ -3093,10 +3216,18 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
+ if (ST->hasBITALG())
+ if (const auto *Entry = CostTableLookup(AVX512BITALGCostTbl, ISD, MTy))
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
+
+ if (ST->hasVPOPCNTDQ())
+ if (const auto *Entry = CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy))
+ return adjustTableCost(*Entry, LT.first, ICA.getFlags());
+
if (ST->hasCDI())
if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
@@ -3179,8 +3310,6 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
}
- // TODO - add BMI (TZCNT) scalar handling
-
if (ST->is64Bit())
if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy))
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
@@ -3312,7 +3441,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
if (Index == -1U && (Opcode == Instruction::ExtractElement ||
Opcode == Instruction::InsertElement)) {
// TODO: On some SSE41+ targets, we expand to cmp+splat+select patterns:
- // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
+ // inselt N0, N1, N2 --> select (SplatN2 == {0,1,2...}) ? SplatN1 : N0.
// TODO: Move this to BasicTTIImpl.h? We'd need better gep + index handling.
assert(isa<FixedVectorType>(Val) && "Fixed vector type expected");
@@ -3378,7 +3507,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Unexpected vector opcode");
MVT MScalarTy = LT.second.getScalarType();
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy))
return Entry->Cost + RegisterFileMoveCost;
@@ -3505,6 +3634,112 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
return Cost;
}
+InstructionCost
+X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF, const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind) {
+ const unsigned EltTyBits = DL.getTypeSizeInBits(EltTy);
+ // We don't differentiate element types here, only element bit width.
+ EltTy = IntegerType::getIntNTy(EltTy->getContext(), EltTyBits);
+
+ auto bailout = [&]() {
+ return BaseT::getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ };
+
+ // For now, only deal with AVX512 cases.
+ if (!ST->hasAVX512())
+ return bailout();
+
+ // Do we have a native shuffle for this element type, or should we promote?
+ unsigned PromEltTyBits = EltTyBits;
+ switch (EltTyBits) {
+ case 32:
+ case 64:
+ break; // AVX512F.
+ case 16:
+ if (!ST->hasBWI())
+ PromEltTyBits = 32; // promote to i32, AVX512F.
+ break; // AVX512BW
+ case 8:
+ if (!ST->hasVBMI())
+ PromEltTyBits = 32; // promote to i32, AVX512F.
+ break; // AVX512VBMI
+ case 1:
+ // There is no support for shuffling i1 elements. We *must* promote.
+ if (ST->hasBWI()) {
+ if (ST->hasVBMI())
+ PromEltTyBits = 8; // promote to i8, AVX512VBMI.
+ else
+ PromEltTyBits = 16; // promote to i16, AVX512BW.
+ break;
+ }
+ return bailout();
+ default:
+ return bailout();
+ }
+ auto *PromEltTy = IntegerType::getIntNTy(EltTy->getContext(), PromEltTyBits);
+
+ auto *SrcVecTy = FixedVectorType::get(EltTy, VF);
+ auto *PromSrcVecTy = FixedVectorType::get(PromEltTy, VF);
+
+ int NumDstElements = VF * ReplicationFactor;
+ auto *PromDstVecTy = FixedVectorType::get(PromEltTy, NumDstElements);
+ auto *DstVecTy = FixedVectorType::get(EltTy, NumDstElements);
+
+ // Legalize the types.
+ MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second;
+ MVT LegalPromSrcVecTy = TLI->getTypeLegalizationCost(DL, PromSrcVecTy).second;
+ MVT LegalPromDstVecTy = TLI->getTypeLegalizationCost(DL, PromDstVecTy).second;
+ MVT LegalDstVecTy = TLI->getTypeLegalizationCost(DL, DstVecTy).second;
+ // They should have legalized into vector types.
+ if (!LegalSrcVecTy.isVector() || !LegalPromSrcVecTy.isVector() ||
+ !LegalPromDstVecTy.isVector() || !LegalDstVecTy.isVector())
+ return bailout();
+
+ if (PromEltTyBits != EltTyBits) {
+ // If we have to perform the shuffle with wider elt type than our data type,
+ // then we will first need to anyext (we don't care about the new bits)
+ // the source elements, and then truncate Dst elements.
+ InstructionCost PromotionCost;
+ PromotionCost += getCastInstrCost(
+ Instruction::SExt, /*Dst=*/PromSrcVecTy, /*Src=*/SrcVecTy,
+ TargetTransformInfo::CastContextHint::None, CostKind);
+ PromotionCost +=
+ getCastInstrCost(Instruction::Trunc, /*Dst=*/DstVecTy,
+ /*Src=*/PromDstVecTy,
+ TargetTransformInfo::CastContextHint::None, CostKind);
+ return PromotionCost + getReplicationShuffleCost(PromEltTy,
+ ReplicationFactor, VF,
+ DemandedDstElts, CostKind);
+ }
+
+ assert(LegalSrcVecTy.getScalarSizeInBits() == EltTyBits &&
+ LegalSrcVecTy.getScalarType() == LegalDstVecTy.getScalarType() &&
+ "We expect that the legalization doesn't affect the element width, "
+ "doesn't coalesce/split elements.");
+
+ unsigned NumEltsPerDstVec = LegalDstVecTy.getVectorNumElements();
+ unsigned NumDstVectors =
+ divideCeil(DstVecTy->getNumElements(), NumEltsPerDstVec);
+
+ auto *SingleDstVecTy = FixedVectorType::get(EltTy, NumEltsPerDstVec);
+
+ // Not all the produced Dst elements may be demanded. In our case,
+ // given that a single Dst vector is formed by a single shuffle,
+ // if all elements that will form a single Dst vector aren't demanded,
+ // then we won't need to do that shuffle, so adjust the cost accordingly.
+ APInt DemandedDstVectors = APIntOps::ScaleBitMask(
+ DemandedDstElts.zextOrSelf(NumDstVectors * NumEltsPerDstVec),
+ NumDstVectors);
+ unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation();
+
+ InstructionCost SingleShuffleCost =
+ getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy,
+ /*Mask=*/None, /*Index=*/0, /*SubTp=*/nullptr);
+ return NumDstVectorsDemanded * SingleShuffleCost;
+}
+
InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment,
unsigned AddressSpace,
@@ -3677,7 +3912,7 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
if ((IsLoad && !isLegalMaskedLoad(SrcVTy, Alignment)) ||
(IsStore && !isLegalMaskedStore(SrcVTy, Alignment))) {
// Scalarization
- APInt DemandedElts = APInt::getAllOnesValue(NumElem);
+ APInt DemandedElts = APInt::getAllOnes(NumElem);
InstructionCost MaskSplitCost =
getScalarizationOverhead(MaskTy, DemandedElts, false, true);
InstructionCost ScalarCompareCost = getCmpSelInstrCost(
@@ -3795,7 +4030,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
EVT VT = TLI->getValueType(DL, ValTy);
if (VT.isSimple()) {
MVT MTy = VT.getSimpleVT();
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
return Entry->Cost;
@@ -3834,7 +4069,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
ArithmeticCost *= LT.first - 1;
}
- if (ST->isSLM())
+ if (ST->useSLMArithCosts())
if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
return ArithmeticCost + Entry->Cost;
@@ -4589,16 +4824,17 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
bool VariableMask, Align Alignment,
unsigned AddressSpace) {
+ Type *ScalarTy = SrcVTy->getScalarType();
unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements();
- APInt DemandedElts = APInt::getAllOnesValue(VF);
+ APInt DemandedElts = APInt::getAllOnes(VF);
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
InstructionCost MaskUnpackCost = 0;
if (VariableMask) {
auto *MaskTy =
FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF);
- MaskUnpackCost =
- getScalarizationOverhead(MaskTy, DemandedElts, false, true);
+ MaskUnpackCost = getScalarizationOverhead(
+ MaskTy, DemandedElts, /*Insert=*/false, /*Extract=*/true);
InstructionCost ScalarCompareCost = getCmpSelInstrCost(
Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr,
CmpInst::BAD_ICMP_PREDICATE, CostKind);
@@ -4606,24 +4842,23 @@ InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
}
+ InstructionCost AddressUnpackCost = getScalarizationOverhead(
+ FixedVectorType::get(ScalarTy->getPointerTo(), VF), DemandedElts,
+ /*Insert=*/false, /*Extract=*/true);
+
// The cost of the scalar loads/stores.
InstructionCost MemoryOpCost =
- VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
- MaybeAlign(Alignment), AddressSpace, CostKind);
-
- InstructionCost InsertExtractCost = 0;
- if (Opcode == Instruction::Load)
- for (unsigned i = 0; i < VF; ++i)
- // Add the cost of inserting each scalar load into the vector
- InsertExtractCost +=
- getVectorInstrCost(Instruction::InsertElement, SrcVTy, i);
- else
- for (unsigned i = 0; i < VF; ++i)
- // Add the cost of extracting each element out of the data vector
- InsertExtractCost +=
- getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i);
+ VF * getMemoryOpCost(Opcode, ScalarTy, MaybeAlign(Alignment),
+ AddressSpace, CostKind);
- return MemoryOpCost + MaskUnpackCost + InsertExtractCost;
+ // The cost of forming the vector from loaded scalars/
+ // scalarizing the vector to perform scalar stores.
+ InstructionCost InsertExtractCost =
+ getScalarizationOverhead(cast<FixedVectorType>(SrcVTy), DemandedElts,
+ /*Insert=*/Opcode == Instruction::Load,
+ /*Extract=*/Opcode == Instruction::Store);
+
+ return AddressUnpackCost + MemoryOpCost + MaskUnpackCost + InsertExtractCost;
}
/// Calculate the cost of Gather / Scatter operation
@@ -4690,6 +4925,9 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
return true;
+ if (ScalarTy->isHalfTy() && ST->hasBWI() && ST->hasFP16())
+ return true;
+
if (!ScalarTy->isIntegerTy())
return false;
@@ -4732,7 +4970,7 @@ bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) {
// loads require AVX2).
if (DataSize == 32)
return ST->hasAVX();
- else if (DataSize == 16)
+ if (DataSize == 16)
return ST->hasSSE1();
return true;
}
@@ -4765,11 +5003,15 @@ bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) {
return isLegalMaskedExpandLoad(DataTy);
}
-bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
+bool X86TTIImpl::supportsGather() const {
// Some CPUs have better gather performance than others.
// TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
// enable gather with a -march.
- if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2())))
+ return ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2());
+}
+
+bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) {
+ if (!supportsGather())
return false;
// This function is called now in two cases: from the Loop Vectorizer
@@ -4893,6 +5135,14 @@ X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
+bool X86TTIImpl::prefersVectorizedAddressing() const {
+ return supportsGather();
+}
+
+bool X86TTIImpl::supportsEfficientVectorElementLoadStore() const {
+ return false;
+}
+
bool X86TTIImpl::enableInterleavedAccessVectorization() {
// TODO: We expect this to be beneficial regardless of arch,
// but there are currently some unexplained performance artifacts on Atom.
@@ -4900,122 +5150,6 @@ bool X86TTIImpl::enableInterleavedAccessVectorization() {
return !(ST->isAtom());
}
-// Get estimation for interleaved load/store operations for AVX2.
-// \p Factor is the interleaved-access factor (stride) - number of
-// (interleaved) elements in the group.
-// \p Indices contains the indices for a strided load: when the
-// interleaved load has gaps they indicate which elements are used.
-// If Indices is empty (or if the number of indices is equal to the size
-// of the interleaved-access as given in \p Factor) the access has no gaps.
-//
-// As opposed to AVX-512, AVX2 does not have generic shuffles that allow
-// computing the cost using a generic formula as a function of generic
-// shuffles. We therefore use a lookup table instead, filled according to
-// the instruction sequences that codegen currently generates.
-InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2(
- unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
-
- if (UseMaskForCond || UseMaskForGaps)
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind,
- UseMaskForCond, UseMaskForGaps);
-
- // We currently Support only fully-interleaved groups, with no gaps.
- // TODO: Support also strided loads (interleaved-groups with gaps).
- if (Indices.size() && Indices.size() != Factor)
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-
- // VecTy for interleave memop is <VF*Factor x Elt>.
- // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
- // VecTy = <12 x i32>.
- MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
-
- // This function can be called with VecTy=<6xi128>, Factor=3, in which case
- // the VF=2, while v2i128 is an unsupported MVT vector type
- // (see MachineValueType.h::getVectorVT()).
- if (!LegalVT.isVector())
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-
- unsigned VF = VecTy->getNumElements() / Factor;
- Type *ScalarTy = VecTy->getElementType();
- // Deduplicate entries, model floats/pointers as appropriately-sized integers.
- if (!ScalarTy->isIntegerTy())
- ScalarTy =
- Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
-
- // Get the cost of all the memory operations.
- InstructionCost MemOpCosts = getMemoryOpCost(
- Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
-
- auto *VT = FixedVectorType::get(ScalarTy, VF);
- EVT ETy = TLI->getValueType(DL, VT);
- if (!ETy.isSimple())
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-
- // TODO: Complete for other data-types and strides.
- // Each combination of Stride, element bit width and VF results in a different
- // sequence; The cost tables are therefore accessed with:
- // Factor (stride) and VectorType=VFxiN.
- // The Cost accounts only for the shuffle sequence;
- // The cost of the loads/stores is accounted for separately.
- //
- static const CostTblEntry AVX2InterleavedLoadTbl[] = {
- {2, MVT::v4i64, 6}, // (load 8i64 and) deinterleave into 2 x 4i64
-
- {3, MVT::v2i8, 10}, // (load 6i8 and) deinterleave into 3 x 2i8
- {3, MVT::v4i8, 4}, // (load 12i8 and) deinterleave into 3 x 4i8
- {3, MVT::v8i8, 9}, // (load 24i8 and) deinterleave into 3 x 8i8
- {3, MVT::v16i8, 11}, // (load 48i8 and) deinterleave into 3 x 16i8
- {3, MVT::v32i8, 13}, // (load 96i8 and) deinterleave into 3 x 32i8
-
- {3, MVT::v8i32, 17}, // (load 24i32 and) deinterleave into 3 x 8i32
-
- {4, MVT::v2i8, 12}, // (load 8i8 and) deinterleave into 4 x 2i8
- {4, MVT::v4i8, 4}, // (load 16i8 and) deinterleave into 4 x 4i8
- {4, MVT::v8i8, 20}, // (load 32i8 and) deinterleave into 4 x 8i8
- {4, MVT::v16i8, 39}, // (load 64i8 and) deinterleave into 4 x 16i8
- {4, MVT::v32i8, 80}, // (load 128i8 and) deinterleave into 4 x 32i8
-
- {8, MVT::v8i32, 40} // (load 64i32 and) deinterleave into 8 x 8i32
- };
-
- static const CostTblEntry AVX2InterleavedStoreTbl[] = {
- {2, MVT::v4i64, 6}, // interleave 2 x 4i64 into 8i64 (and store)
-
- {3, MVT::v2i8, 7}, // interleave 3 x 2i8 into 6i8 (and store)
- {3, MVT::v4i8, 8}, // interleave 3 x 4i8 into 12i8 (and store)
- {3, MVT::v8i8, 11}, // interleave 3 x 8i8 into 24i8 (and store)
- {3, MVT::v16i8, 11}, // interleave 3 x 16i8 into 48i8 (and store)
- {3, MVT::v32i8, 13}, // interleave 3 x 32i8 into 96i8 (and store)
-
- {4, MVT::v2i8, 12}, // interleave 4 x 2i8 into 8i8 (and store)
- {4, MVT::v4i8, 9}, // interleave 4 x 4i8 into 16i8 (and store)
- {4, MVT::v8i8, 10}, // interleave 4 x 8i8 into 32i8 (and store)
- {4, MVT::v16i8, 10}, // interleave 4 x 16i8 into 64i8 (and store)
- {4, MVT::v32i8, 12} // interleave 4 x 32i8 into 128i8 (and store)
- };
-
- if (Opcode == Instruction::Load) {
- if (const auto *Entry =
- CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT()))
- return MemOpCosts + Entry->Cost;
- } else {
- assert(Opcode == Instruction::Store &&
- "Expected Store Instruction at this point");
- if (const auto *Entry =
- CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT()))
- return MemOpCosts + Entry->Cost;
- }
-
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind);
-}
-
// Get estimation for interleaved load/store operations and strided load.
// \p Indices contains indices for strided load.
// \p Factor - the factor of interleaving.
@@ -5024,12 +5158,6 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) {
-
- if (UseMaskForCond || UseMaskForGaps)
- return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, CostKind,
- UseMaskForCond, UseMaskForGaps);
-
// VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
// VecTy = <12 x i32>.
@@ -5044,12 +5172,46 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// Get the cost of one memory operation.
auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(),
LegalVT.getVectorNumElements());
- InstructionCost MemOpCost = getMemoryOpCost(
- Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind);
+ InstructionCost MemOpCost;
+ if (UseMaskForCond || UseMaskForGaps)
+ MemOpCost = getMaskedMemoryOpCost(Opcode, SingleMemOpTy, Alignment,
+ AddressSpace, CostKind);
+ else
+ MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment),
+ AddressSpace, CostKind);
unsigned VF = VecTy->getNumElements() / Factor;
MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
+ // FIXME: this is the most conservative estimate for the mask cost.
+ InstructionCost MaskCost;
+ if (UseMaskForCond || UseMaskForGaps) {
+ APInt DemandedLoadStoreElts = APInt::getZero(VecTy->getNumElements());
+ for (unsigned Index : Indices) {
+ assert(Index < Factor && "Invalid index for interleaved memory op");
+ for (unsigned Elm = 0; Elm < VF; Elm++)
+ DemandedLoadStoreElts.setBit(Index + Elm * Factor);
+ }
+
+ Type *I8Type = Type::getInt8Ty(VecTy->getContext());
+
+ MaskCost = getReplicationShuffleCost(
+ I8Type, Factor, VF,
+ UseMaskForGaps ? DemandedLoadStoreElts
+ : APInt::getAllOnes(VecTy->getNumElements()),
+ CostKind);
+
+ // The Gaps mask is invariant and created outside the loop, therefore the
+ // cost of creating it is not accounted for here. However if we have both
+ // a MaskForGaps and some other mask that guards the execution of the
+ // memory access, we need to account for the cost of And-ing the two masks
+ // inside the loop.
+ if (UseMaskForGaps) {
+ auto *MaskVT = FixedVectorType::get(I8Type, VecTy->getNumElements());
+ MaskCost += getArithmeticInstrCost(BinaryOperator::And, MaskVT, CostKind);
+ }
+ }
+
if (Opcode == Instruction::Load) {
// The tables (AVX512InterleavedLoadTbl and AVX512InterleavedStoreTbl)
// contain the cost of the optimized shuffle sequence that the
@@ -5065,7 +5227,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
if (const auto *Entry =
CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT))
- return NumOfMemOps * MemOpCost + Entry->Cost;
+ return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
//If an entry does not exist, fallback to the default implementation.
// Kind of shuffle depends on number of loaded values.
@@ -5102,7 +5264,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
- NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
+ MaskCost + NumOfUnfoldedLoads * MemOpCost +
+ NumOfMoves;
return Cost;
}
@@ -5124,7 +5287,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
if (const auto *Entry =
CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT))
- return NumOfMemOps * MemOpCost + Entry->Cost;
+ return MaskCost + NumOfMemOps * MemOpCost + Entry->Cost;
//If an entry does not exist, fallback to the default implementation.
// There is no strided stores meanwhile. And store can't be folded in
@@ -5138,33 +5301,321 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// We need additional instructions to keep sources.
unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
InstructionCost Cost =
+ MaskCost +
NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
NumOfMoves;
return Cost;
}
InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
- unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ unsigned Opcode, Type *BaseTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
- auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
+ auto *VecTy = cast<FixedVectorType>(BaseTy);
+
+ auto isSupportedOnAVX512 = [&](Type *VecTy, bool HasBW) {
Type *EltTy = cast<VectorType>(VecTy)->getElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
EltTy->isIntegerTy(32) || EltTy->isPointerTy())
return true;
- if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8))
+ if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) ||
+ (!ST->useSoftFloat() && ST->hasFP16() && EltTy->isHalfTy()))
return HasBW;
return false;
};
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(
- Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment,
- AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
- if (ST->hasAVX2())
- return getInterleavedMemoryOpCostAVX2(
- Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment,
+ Opcode, VecTy, Factor, Indices, Alignment,
AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
+ if (UseMaskForCond || UseMaskForGaps)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, CostKind,
+ UseMaskForCond, UseMaskForGaps);
+
+ // Get estimation for interleaved load/store operations for SSE-AVX2.
+ // As opposed to AVX-512, SSE-AVX2 do not have generic shuffles that allow
+ // computing the cost using a generic formula as a function of generic
+ // shuffles. We therefore use a lookup table instead, filled according to
+ // the instruction sequences that codegen currently generates.
+
+ // VecTy for interleave memop is <VF*Factor x Elt>.
+ // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
+ // VecTy = <12 x i32>.
+ MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+
+ // This function can be called with VecTy=<6xi128>, Factor=3, in which case
+ // the VF=2, while v2i128 is an unsupported MVT vector type
+ // (see MachineValueType.h::getVectorVT()).
+ if (!LegalVT.isVector())
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, CostKind);
+
+ unsigned VF = VecTy->getNumElements() / Factor;
+ Type *ScalarTy = VecTy->getElementType();
+ // Deduplicate entries, model floats/pointers as appropriately-sized integers.
+ if (!ScalarTy->isIntegerTy())
+ ScalarTy =
+ Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy));
+
+ // Get the cost of all the memory operations.
+ // FIXME: discount dead loads.
+ InstructionCost MemOpCosts = getMemoryOpCost(
+ Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind);
+
+ auto *VT = FixedVectorType::get(ScalarTy, VF);
+ EVT ETy = TLI->getValueType(DL, VT);
+ if (!ETy.isSimple())
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, CostKind);
+
+ // TODO: Complete for other data-types and strides.
+ // Each combination of Stride, element bit width and VF results in a different
+ // sequence; The cost tables are therefore accessed with:
+ // Factor (stride) and VectorType=VFxiN.
+ // The Cost accounts only for the shuffle sequence;
+ // The cost of the loads/stores is accounted for separately.
+ //
+ static const CostTblEntry AVX2InterleavedLoadTbl[] = {
+ {2, MVT::v2i8, 2}, // (load 4i8 and) deinterleave into 2 x 2i8
+ {2, MVT::v4i8, 2}, // (load 8i8 and) deinterleave into 2 x 4i8
+ {2, MVT::v8i8, 2}, // (load 16i8 and) deinterleave into 2 x 8i8
+ {2, MVT::v16i8, 4}, // (load 32i8 and) deinterleave into 2 x 16i8
+ {2, MVT::v32i8, 6}, // (load 64i8 and) deinterleave into 2 x 32i8
+
+ {2, MVT::v8i16, 6}, // (load 16i16 and) deinterleave into 2 x 8i16
+ {2, MVT::v16i16, 9}, // (load 32i16 and) deinterleave into 2 x 16i16
+ {2, MVT::v32i16, 18}, // (load 64i16 and) deinterleave into 2 x 32i16
+
+ {2, MVT::v8i32, 4}, // (load 16i32 and) deinterleave into 2 x 8i32
+ {2, MVT::v16i32, 8}, // (load 32i32 and) deinterleave into 2 x 16i32
+ {2, MVT::v32i32, 16}, // (load 64i32 and) deinterleave into 2 x 32i32
+
+ {2, MVT::v4i64, 4}, // (load 8i64 and) deinterleave into 2 x 4i64
+ {2, MVT::v8i64, 8}, // (load 16i64 and) deinterleave into 2 x 8i64
+ {2, MVT::v16i64, 16}, // (load 32i64 and) deinterleave into 2 x 16i64
+ {2, MVT::v32i64, 32}, // (load 64i64 and) deinterleave into 2 x 32i64
+
+ {3, MVT::v2i8, 3}, // (load 6i8 and) deinterleave into 3 x 2i8
+ {3, MVT::v4i8, 3}, // (load 12i8 and) deinterleave into 3 x 4i8
+ {3, MVT::v8i8, 6}, // (load 24i8 and) deinterleave into 3 x 8i8
+ {3, MVT::v16i8, 11}, // (load 48i8 and) deinterleave into 3 x 16i8
+ {3, MVT::v32i8, 14}, // (load 96i8 and) deinterleave into 3 x 32i8
+
+ {3, MVT::v2i16, 5}, // (load 6i16 and) deinterleave into 3 x 2i16
+ {3, MVT::v4i16, 7}, // (load 12i16 and) deinterleave into 3 x 4i16
+ {3, MVT::v8i16, 9}, // (load 24i16 and) deinterleave into 3 x 8i16
+ {3, MVT::v16i16, 28}, // (load 48i16 and) deinterleave into 3 x 16i16
+ {3, MVT::v32i16, 56}, // (load 96i16 and) deinterleave into 3 x 32i16
+
+ {3, MVT::v2i32, 3}, // (load 6i32 and) deinterleave into 3 x 2i32
+ {3, MVT::v4i32, 3}, // (load 12i32 and) deinterleave into 3 x 4i32
+ {3, MVT::v8i32, 7}, // (load 24i32 and) deinterleave into 3 x 8i32
+ {3, MVT::v16i32, 14}, // (load 48i32 and) deinterleave into 3 x 16i32
+ {3, MVT::v32i32, 32}, // (load 96i32 and) deinterleave into 3 x 32i32
+
+ {3, MVT::v2i64, 1}, // (load 6i64 and) deinterleave into 3 x 2i64
+ {3, MVT::v4i64, 5}, // (load 12i64 and) deinterleave into 3 x 4i64
+ {3, MVT::v8i64, 10}, // (load 24i64 and) deinterleave into 3 x 8i64
+ {3, MVT::v16i64, 20}, // (load 48i64 and) deinterleave into 3 x 16i64
+
+ {4, MVT::v2i8, 4}, // (load 8i8 and) deinterleave into 4 x 2i8
+ {4, MVT::v4i8, 4}, // (load 16i8 and) deinterleave into 4 x 4i8
+ {4, MVT::v8i8, 12}, // (load 32i8 and) deinterleave into 4 x 8i8
+ {4, MVT::v16i8, 24}, // (load 64i8 and) deinterleave into 4 x 16i8
+ {4, MVT::v32i8, 56}, // (load 128i8 and) deinterleave into 4 x 32i8
+
+ {4, MVT::v2i16, 6}, // (load 8i16 and) deinterleave into 4 x 2i16
+ {4, MVT::v4i16, 17}, // (load 16i16 and) deinterleave into 4 x 4i16
+ {4, MVT::v8i16, 33}, // (load 32i16 and) deinterleave into 4 x 8i16
+ {4, MVT::v16i16, 75}, // (load 64i16 and) deinterleave into 4 x 16i16
+ {4, MVT::v32i16, 150}, // (load 128i16 and) deinterleave into 4 x 32i16
+
+ {4, MVT::v2i32, 4}, // (load 8i32 and) deinterleave into 4 x 2i32
+ {4, MVT::v4i32, 8}, // (load 16i32 and) deinterleave into 4 x 4i32
+ {4, MVT::v8i32, 16}, // (load 32i32 and) deinterleave into 4 x 8i32
+ {4, MVT::v16i32, 32}, // (load 64i32 and) deinterleave into 4 x 16i32
+ {4, MVT::v32i32, 68}, // (load 128i32 and) deinterleave into 4 x 32i32
+
+ {4, MVT::v2i64, 6}, // (load 8i64 and) deinterleave into 4 x 2i64
+ {4, MVT::v4i64, 8}, // (load 16i64 and) deinterleave into 4 x 4i64
+ {4, MVT::v8i64, 20}, // (load 32i64 and) deinterleave into 4 x 8i64
+ {4, MVT::v16i64, 40}, // (load 64i64 and) deinterleave into 4 x 16i64
+
+ {6, MVT::v2i8, 6}, // (load 12i8 and) deinterleave into 6 x 2i8
+ {6, MVT::v4i8, 14}, // (load 24i8 and) deinterleave into 6 x 4i8
+ {6, MVT::v8i8, 18}, // (load 48i8 and) deinterleave into 6 x 8i8
+ {6, MVT::v16i8, 43}, // (load 96i8 and) deinterleave into 6 x 16i8
+ {6, MVT::v32i8, 82}, // (load 192i8 and) deinterleave into 6 x 32i8
+
+ {6, MVT::v2i16, 13}, // (load 12i16 and) deinterleave into 6 x 2i16
+ {6, MVT::v4i16, 9}, // (load 24i16 and) deinterleave into 6 x 4i16
+ {6, MVT::v8i16, 39}, // (load 48i16 and) deinterleave into 6 x 8i16
+ {6, MVT::v16i16, 106}, // (load 96i16 and) deinterleave into 6 x 16i16
+ {6, MVT::v32i16, 212}, // (load 192i16 and) deinterleave into 6 x 32i16
+
+ {6, MVT::v2i32, 6}, // (load 12i32 and) deinterleave into 6 x 2i32
+ {6, MVT::v4i32, 15}, // (load 24i32 and) deinterleave into 6 x 4i32
+ {6, MVT::v8i32, 31}, // (load 48i32 and) deinterleave into 6 x 8i32
+ {6, MVT::v16i32, 64}, // (load 96i32 and) deinterleave into 6 x 16i32
+
+ {6, MVT::v2i64, 6}, // (load 12i64 and) deinterleave into 6 x 2i64
+ {6, MVT::v4i64, 18}, // (load 24i64 and) deinterleave into 6 x 4i64
+ {6, MVT::v8i64, 36}, // (load 48i64 and) deinterleave into 6 x 8i64
+
+ {8, MVT::v8i32, 40} // (load 64i32 and) deinterleave into 8 x 8i32
+ };
+
+ static const CostTblEntry SSSE3InterleavedLoadTbl[] = {
+ {2, MVT::v4i16, 2}, // (load 8i16 and) deinterleave into 2 x 4i16
+ };
+
+ static const CostTblEntry SSE2InterleavedLoadTbl[] = {
+ {2, MVT::v2i16, 2}, // (load 4i16 and) deinterleave into 2 x 2i16
+ {2, MVT::v4i16, 7}, // (load 8i16 and) deinterleave into 2 x 4i16
+
+ {2, MVT::v2i32, 2}, // (load 4i32 and) deinterleave into 2 x 2i32
+ {2, MVT::v4i32, 2}, // (load 8i32 and) deinterleave into 2 x 4i32
+
+ {2, MVT::v2i64, 2}, // (load 4i64 and) deinterleave into 2 x 2i64
+ };
+
+ static const CostTblEntry AVX2InterleavedStoreTbl[] = {
+ {2, MVT::v16i8, 3}, // interleave 2 x 16i8 into 32i8 (and store)
+ {2, MVT::v32i8, 4}, // interleave 2 x 32i8 into 64i8 (and store)
+
+ {2, MVT::v8i16, 3}, // interleave 2 x 8i16 into 16i16 (and store)
+ {2, MVT::v16i16, 4}, // interleave 2 x 16i16 into 32i16 (and store)
+ {2, MVT::v32i16, 8}, // interleave 2 x 32i16 into 64i16 (and store)
+
+ {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32 (and store)
+ {2, MVT::v8i32, 4}, // interleave 2 x 8i32 into 16i32 (and store)
+ {2, MVT::v16i32, 8}, // interleave 2 x 16i32 into 32i32 (and store)
+ {2, MVT::v32i32, 16}, // interleave 2 x 32i32 into 64i32 (and store)
+
+ {2, MVT::v2i64, 2}, // interleave 2 x 2i64 into 4i64 (and store)
+ {2, MVT::v4i64, 4}, // interleave 2 x 4i64 into 8i64 (and store)
+ {2, MVT::v8i64, 8}, // interleave 2 x 8i64 into 16i64 (and store)
+ {2, MVT::v16i64, 16}, // interleave 2 x 16i64 into 32i64 (and store)
+ {2, MVT::v32i64, 32}, // interleave 2 x 32i64 into 64i64 (and store)
+
+ {3, MVT::v2i8, 4}, // interleave 3 x 2i8 into 6i8 (and store)
+ {3, MVT::v4i8, 4}, // interleave 3 x 4i8 into 12i8 (and store)
+ {3, MVT::v8i8, 6}, // interleave 3 x 8i8 into 24i8 (and store)
+ {3, MVT::v16i8, 11}, // interleave 3 x 16i8 into 48i8 (and store)
+ {3, MVT::v32i8, 13}, // interleave 3 x 32i8 into 96i8 (and store)
+
+ {3, MVT::v2i16, 4}, // interleave 3 x 2i16 into 6i16 (and store)
+ {3, MVT::v4i16, 6}, // interleave 3 x 4i16 into 12i16 (and store)
+ {3, MVT::v8i16, 12}, // interleave 3 x 8i16 into 24i16 (and store)
+ {3, MVT::v16i16, 27}, // interleave 3 x 16i16 into 48i16 (and store)
+ {3, MVT::v32i16, 54}, // interleave 3 x 32i16 into 96i16 (and store)
+
+ {3, MVT::v2i32, 4}, // interleave 3 x 2i32 into 6i32 (and store)
+ {3, MVT::v4i32, 5}, // interleave 3 x 4i32 into 12i32 (and store)
+ {3, MVT::v8i32, 11}, // interleave 3 x 8i32 into 24i32 (and store)
+ {3, MVT::v16i32, 22}, // interleave 3 x 16i32 into 48i32 (and store)
+ {3, MVT::v32i32, 48}, // interleave 3 x 32i32 into 96i32 (and store)
+
+ {3, MVT::v2i64, 4}, // interleave 3 x 2i64 into 6i64 (and store)
+ {3, MVT::v4i64, 6}, // interleave 3 x 4i64 into 12i64 (and store)
+ {3, MVT::v8i64, 12}, // interleave 3 x 8i64 into 24i64 (and store)
+ {3, MVT::v16i64, 24}, // interleave 3 x 16i64 into 48i64 (and store)
+
+ {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8 (and store)
+ {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8 (and store)
+ {4, MVT::v8i8, 4}, // interleave 4 x 8i8 into 32i8 (and store)
+ {4, MVT::v16i8, 8}, // interleave 4 x 16i8 into 64i8 (and store)
+ {4, MVT::v32i8, 12}, // interleave 4 x 32i8 into 128i8 (and store)
+
+ {4, MVT::v2i16, 2}, // interleave 4 x 2i16 into 8i16 (and store)
+ {4, MVT::v4i16, 6}, // interleave 4 x 4i16 into 16i16 (and store)
+ {4, MVT::v8i16, 10}, // interleave 4 x 8i16 into 32i16 (and store)
+ {4, MVT::v16i16, 32}, // interleave 4 x 16i16 into 64i16 (and store)
+ {4, MVT::v32i16, 64}, // interleave 4 x 32i16 into 128i16 (and store)
+
+ {4, MVT::v2i32, 5}, // interleave 4 x 2i32 into 8i32 (and store)
+ {4, MVT::v4i32, 6}, // interleave 4 x 4i32 into 16i32 (and store)
+ {4, MVT::v8i32, 16}, // interleave 4 x 8i32 into 32i32 (and store)
+ {4, MVT::v16i32, 32}, // interleave 4 x 16i32 into 64i32 (and store)
+ {4, MVT::v32i32, 64}, // interleave 4 x 32i32 into 128i32 (and store)
+
+ {4, MVT::v2i64, 6}, // interleave 4 x 2i64 into 8i64 (and store)
+ {4, MVT::v4i64, 8}, // interleave 4 x 4i64 into 16i64 (and store)
+ {4, MVT::v8i64, 20}, // interleave 4 x 8i64 into 32i64 (and store)
+ {4, MVT::v16i64, 40}, // interleave 4 x 16i64 into 64i64 (and store)
+
+ {6, MVT::v2i8, 7}, // interleave 6 x 2i8 into 12i8 (and store)
+ {6, MVT::v4i8, 9}, // interleave 6 x 4i8 into 24i8 (and store)
+ {6, MVT::v8i8, 16}, // interleave 6 x 8i8 into 48i8 (and store)
+ {6, MVT::v16i8, 27}, // interleave 6 x 16i8 into 96i8 (and store)
+ {6, MVT::v32i8, 90}, // interleave 6 x 32i8 into 192i8 (and store)
+
+ {6, MVT::v2i16, 10}, // interleave 6 x 2i16 into 12i16 (and store)
+ {6, MVT::v4i16, 15}, // interleave 6 x 4i16 into 24i16 (and store)
+ {6, MVT::v8i16, 21}, // interleave 6 x 8i16 into 48i16 (and store)
+ {6, MVT::v16i16, 58}, // interleave 6 x 16i16 into 96i16 (and store)
+ {6, MVT::v32i16, 90}, // interleave 6 x 32i16 into 192i16 (and store)
+
+ {6, MVT::v2i32, 9}, // interleave 6 x 2i32 into 12i32 (and store)
+ {6, MVT::v4i32, 12}, // interleave 6 x 4i32 into 24i32 (and store)
+ {6, MVT::v8i32, 33}, // interleave 6 x 8i32 into 48i32 (and store)
+ {6, MVT::v16i32, 66}, // interleave 6 x 16i32 into 96i32 (and store)
+
+ {6, MVT::v2i64, 8}, // interleave 6 x 2i64 into 12i64 (and store)
+ {6, MVT::v4i64, 15}, // interleave 6 x 4i64 into 24i64 (and store)
+ {6, MVT::v8i64, 30}, // interleave 6 x 8i64 into 48i64 (and store)
+ };
+
+ static const CostTblEntry SSE2InterleavedStoreTbl[] = {
+ {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8 (and store)
+ {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8 (and store)
+ {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8 (and store)
+
+ {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16 (and store)
+ {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16 (and store)
+
+ {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32 (and store)
+ };
+
+ if (Opcode == Instruction::Load) {
+ auto GetDiscountedCost = [Factor, NumMembers = Indices.size(),
+ MemOpCosts](const CostTblEntry *Entry) {
+ // NOTE: this is just an approximation!
+ // It can over/under -estimate the cost!
+ return MemOpCosts + divideCeil(NumMembers * Entry->Cost, Factor);
+ };
+
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2InterleavedLoadTbl, Factor,
+ ETy.getSimpleVT()))
+ return GetDiscountedCost(Entry);
+
+ if (ST->hasSSSE3())
+ if (const auto *Entry = CostTableLookup(SSSE3InterleavedLoadTbl, Factor,
+ ETy.getSimpleVT()))
+ return GetDiscountedCost(Entry);
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2InterleavedLoadTbl, Factor,
+ ETy.getSimpleVT()))
+ return GetDiscountedCost(Entry);
+ } else {
+ assert(Opcode == Instruction::Store &&
+ "Expected Store Instruction at this point");
+ assert((!Indices.size() || Indices.size() == Factor) &&
+ "Interleaved store only supports fully-interleaved groups.");
+ if (ST->hasAVX2())
+ if (const auto *Entry = CostTableLookup(AVX2InterleavedStoreTbl, Factor,
+ ETy.getSimpleVT()))
+ return MemOpCosts + Entry->Cost;
+
+ if (ST->hasSSE2())
+ if (const auto *Entry = CostTableLookup(SSE2InterleavedStoreTbl, Factor,
+ ETy.getSimpleVT()))
+ return MemOpCosts + Entry->Cost;
+ }
+
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 69ff6584316e..c53424ec0026 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -45,52 +45,54 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::FeatureCMPXCHG16B,
X86::FeatureLAHFSAHF,
- // Codegen control options.
- X86::FeatureFast11ByteNOP,
- X86::FeatureFast15ByteNOP,
- X86::FeatureFastBEXTR,
- X86::FeatureFastHorizontalOps,
- X86::FeatureFastLZCNT,
- X86::FeatureFastScalarFSQRT,
- X86::FeatureFastSHLDRotate,
- X86::FeatureFastScalarShiftMasks,
- X86::FeatureFastVectorShiftMasks,
- X86::FeatureFastVariableCrossLaneShuffle,
- X86::FeatureFastVariablePerLaneShuffle,
- X86::FeatureFastVectorFSQRT,
- X86::FeatureLEAForSP,
- X86::FeatureLEAUsesAG,
- X86::FeatureLZCNTFalseDeps,
- X86::FeatureBranchFusion,
- X86::FeatureMacroFusion,
- X86::FeaturePadShortFunctions,
- X86::FeaturePOPCNTFalseDeps,
+ // Some older targets can be setup to fold unaligned loads.
X86::FeatureSSEUnalignedMem,
- X86::FeatureSlow3OpsLEA,
- X86::FeatureSlowDivide32,
- X86::FeatureSlowDivide64,
- X86::FeatureSlowIncDec,
- X86::FeatureSlowLEA,
- X86::FeatureSlowPMADDWD,
- X86::FeatureSlowPMULLD,
- X86::FeatureSlowSHLD,
- X86::FeatureSlowTwoMemOps,
- X86::FeatureSlowUAMem16,
- X86::FeaturePreferMaskRegisters,
- X86::FeatureInsertVZEROUPPER,
- X86::FeatureUseGLMDivSqrtCosts,
+
+ // Codegen control options.
+ X86::TuningFast11ByteNOP,
+ X86::TuningFast15ByteNOP,
+ X86::TuningFastBEXTR,
+ X86::TuningFastHorizontalOps,
+ X86::TuningFastLZCNT,
+ X86::TuningFastScalarFSQRT,
+ X86::TuningFastSHLDRotate,
+ X86::TuningFastScalarShiftMasks,
+ X86::TuningFastVectorShiftMasks,
+ X86::TuningFastVariableCrossLaneShuffle,
+ X86::TuningFastVariablePerLaneShuffle,
+ X86::TuningFastVectorFSQRT,
+ X86::TuningLEAForSP,
+ X86::TuningLEAUsesAG,
+ X86::TuningLZCNTFalseDeps,
+ X86::TuningBranchFusion,
+ X86::TuningMacroFusion,
+ X86::TuningPadShortFunctions,
+ X86::TuningPOPCNTFalseDeps,
+ X86::TuningSlow3OpsLEA,
+ X86::TuningSlowDivide32,
+ X86::TuningSlowDivide64,
+ X86::TuningSlowIncDec,
+ X86::TuningSlowLEA,
+ X86::TuningSlowPMADDWD,
+ X86::TuningSlowPMULLD,
+ X86::TuningSlowSHLD,
+ X86::TuningSlowTwoMemOps,
+ X86::TuningSlowUAMem16,
+ X86::TuningPreferMaskRegisters,
+ X86::TuningInsertVZEROUPPER,
+ X86::TuningUseSLMArithCosts,
+ X86::TuningUseGLMDivSqrtCosts,
// Perf-tuning flags.
- X86::FeatureHasFastGather,
- X86::FeatureSlowUAMem32,
+ X86::TuningFastGather,
+ X86::TuningSlowUAMem32,
// Based on whether user set the -mprefer-vector-width command line.
- X86::FeaturePrefer128Bit,
- X86::FeaturePrefer256Bit,
+ X86::TuningPrefer128Bit,
+ X86::TuningPrefer256Bit,
// CPU name enums. These just follow CPU string.
- X86::ProcIntelAtom,
- X86::ProcIntelSLM,
+ X86::ProcIntelAtom
};
public:
@@ -120,8 +122,7 @@ public:
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
InstructionCost getArithmeticInstrCost(
- unsigned Opcode, Type *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
@@ -144,14 +145,17 @@ public:
InstructionCost getScalarizationOverhead(VectorType *Ty,
const APInt &DemandedElts,
bool Insert, bool Extract);
+ InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
+ int VF,
+ const APInt &DemandedDstElts,
+ TTI::TargetCostKind CostKind);
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+ InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
@@ -180,9 +184,9 @@ public:
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
- InstructionCost getArithmeticReductionCost(
- unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency);
+ InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ Optional<FastMathFlags> FMF,
+ TTI::TargetCostKind CostKind);
InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
@@ -192,19 +196,13 @@ public:
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond = false, bool UseMaskForGaps = false);
InstructionCost getInterleavedMemoryOpCostAVX512(
unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false, bool UseMaskForGaps = false);
- InstructionCost getInterleavedMemoryOpCostAVX2(
- unsigned Opcode, FixedVectorType *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
- bool UseMaskForCond = false, bool UseMaskForGaps = false);
+ TTI::TargetCostKind CostKind, bool UseMaskForCond = false,
+ bool UseMaskForGaps = false);
InstructionCost getIntImmCost(int64_t);
@@ -241,9 +239,12 @@ public:
SmallPtrSetImpl<Argument *> &Args) const;
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
+ bool prefersVectorizedAddressing() const;
+ bool supportsEfficientVectorElementLoadStore() const;
bool enableInterleavedAccessVectorization();
private:
+ bool supportsGather() const;
InstructionCost getGSScalarCost(unsigned Opcode, Type *DataTy,
bool VariableMask, Align Alignment,
unsigned AddressSpace);
diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86VZeroUpper.cpp
index c3031b698552..59b5dc111ce3 100644
--- a/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -271,10 +271,8 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
<< getBlockExitStateName(CurState) << '\n');
if (CurState == EXITS_DIRTY)
- for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end();
- SI != SE; ++SI)
- addDirtySuccessor(**SI);
+ for (MachineBasicBlock *Succ : MBB.successors())
+ addDirtySuccessor(*Succ);
BlockStates[MBB.getNumber()].ExitState = CurState;
}
diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp
index 8d8bd5e6b326..02186949960d 100644
--- a/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -458,7 +458,7 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
void WinEHStatePass::rewriteSetJmpCall(IRBuilder<> &Builder, Function &F,
CallBase &Call, Value *State) {
// Don't rewrite calls with a weird number of arguments.
- if (Call.getNumArgOperands() != 2)
+ if (Call.arg_size() != 2)
return;
SmallVector<OperandBundleDef, 1> OpBundles;
diff --git a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 0505686e645b..f6b97e9e84b3 100644
--- a/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index b44984ff6b4c..c286b747a271 100644
--- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -20,10 +20,10 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index d5f66c2bd824..8916c6ca7be7 100644
--- a/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "TargetInfo/XCoreTargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
Target &llvm::getTheXCoreTarget() {
diff --git a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index 4ea775305e12..38b613700674 100644
--- a/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -38,8 +38,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <algorithm>
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 51fdfe54db18..7c86262269fc 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1315,7 +1315,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
CFRegNode.push_back(ArgIn.getValue(ArgIn->getNumValues() - 1));
}
} else {
- // sanity check
+ // Only arguments passed on the stack should make it here.
assert(VA.isMemLoc());
// Load the argument to a virtual register
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
@@ -1643,7 +1643,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N1, N0, N2);
// fold (ladd 0, 0, x) -> 0, x & 1
- if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ if (N0C && N0C->isZero() && N1C && N1C->isZero()) {
SDValue Carry = DAG.getConstant(0, dl, VT);
SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
DAG.getConstant(1, dl, VT));
@@ -1653,7 +1653,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
+ if (N1C && N1C->isZero() && N->hasNUsesOfValue(0, 1)) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
KnownBits Known = DAG.computeKnownBits(N2);
@@ -1675,7 +1675,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = N0.getValueType();
// fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
- if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+ if (N0C && N0C->isZero() && N1C && N1C->isZero()) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
KnownBits Known = DAG.computeKnownBits(N2);
@@ -1690,7 +1690,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
// fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
// low bit set
- if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 1)) {
+ if (N1C && N1C->isZero() && N->hasNUsesOfValue(0, 1)) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
KnownBits Known = DAG.computeKnownBits(N2);
@@ -1719,7 +1719,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
N1, N0, N2, N3);
// lmul(x, 0, a, b)
- if (N1C && N1C->isNullValue()) {
+ if (N1C && N1C->isZero()) {
// If the high result is unused fold to add(a, b)
if (N->hasNUsesOfValue(0, 0)) {
SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
diff --git a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index 1b21e1ce195b..1b53d593c130 100644
--- a/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -21,9 +21,9 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 6528154ab0e2..b5a683de33ab 100644
--- a/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/llvm/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -21,7 +21,6 @@
#include "llvm/IR/IntrinsicsXCore.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/NoFolder.h"
-#include "llvm/IR/ReplaceConstant.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
@@ -90,11 +89,11 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
if (PredBB->getTerminator()->getNumSuccessors() > 1)
PredBB = SplitEdge(PredBB, PN->getParent());
Instruction *InsertPos = PredBB->getTerminator();
- Instruction *NewInst = createReplacementInstr(CE, InsertPos);
+ Instruction *NewInst = CE->getAsInstruction(InsertPos);
PN->setOperand(I, NewInst);
}
} else if (Instruction *Instr = dyn_cast<Instruction>(WU)) {
- Instruction *NewInst = createReplacementInstr(CE, Instr);
+ Instruction *NewInst = CE->getAsInstruction(Instr);
Instr->replaceUsesOfWith(CE, NewInst);
} else {
ConstantExpr *CExpr = dyn_cast<ConstantExpr>(WU);
@@ -103,7 +102,7 @@ static bool replaceConstantExprOp(ConstantExpr *CE, Pass *P) {
}
}
} while (CE->hasNUsesOrMore(1)); // We need to check because a recursive
- // sibling may have used 'CE' when createReplacementInstr was called.
+ // sibling may have used 'CE' when getAsInstruction was called.
CE->destroyConstant();
return true;
}
diff --git a/llvm/lib/Target/XCore/XCoreSubtarget.cpp b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
index 4b29751c7d06..1be707cb488c 100644
--- a/llvm/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/llvm/lib/Target/XCore/XCoreSubtarget.cpp
@@ -12,7 +12,7 @@
#include "XCoreSubtarget.h"
#include "XCore.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
diff --git a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index 046cd6b5db7d..2e49627a19bf 100644
--- a/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -20,8 +20,8 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
@@ -99,7 +99,7 @@ bool XCorePassConfig::addInstSelector() {
}
void XCorePassConfig::addPreEmitPass() {
- addPass(createXCoreFrameToArgsOffsetEliminationPass(), false);
+ addPass(createXCoreFrameToArgsOffsetEliminationPass());
}
// Force static initialization.
diff --git a/llvm/lib/TextAPI/TextStub.cpp b/llvm/lib/TextAPI/TextStub.cpp
index 5d85342adb26..b64f19ab65cc 100644
--- a/llvm/lib/TextAPI/TextStub.cpp
+++ b/llvm/lib/TextAPI/TextStub.cpp
@@ -1121,9 +1121,9 @@ TextAPIReader::get(MemoryBufferRef InputBuffer) {
auto File = std::unique_ptr<InterfaceFile>(
const_cast<InterfaceFile *>(Files.front()));
- for (auto Iter = std::next(Files.begin()); Iter != Files.end(); ++Iter)
+ for (const InterfaceFile *FI : llvm::drop_begin(Files))
File->addDocument(
- std::shared_ptr<InterfaceFile>(const_cast<InterfaceFile *>(*Iter)));
+ std::shared_ptr<InterfaceFile>(const_cast<InterfaceFile *>(FI)));
if (YAMLIn.error())
return make_error<StringError>(Ctx.ErrorMessage, YAMLIn.error());
diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index 961577f126ba..de1634ebed3c 100644
--- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -12,6 +12,7 @@
#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/COFFModuleDefinition.h"
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 85abbf6d86e0..7243e39c9029 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -18,6 +18,7 @@
#include "llvm-c/Transforms/AggressiveInstCombine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -205,8 +206,8 @@ struct MaskOps {
bool FoundAnd1;
MaskOps(unsigned BitWidth, bool MatchAnds)
- : Root(nullptr), Mask(APInt::getNullValue(BitWidth)),
- MatchAndChain(MatchAnds), FoundAnd1(false) {}
+ : Root(nullptr), Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds),
+ FoundAnd1(false) {}
};
/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
@@ -377,10 +378,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
// Also, we want to avoid matching partial patterns.
// TODO: It would be more efficient if we removed dead instructions
// iteratively in this loop rather than waiting until the end.
- for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
+ for (Instruction &I : llvm::reverse(BB)) {
MadeChange |= foldAnyOrAllBitsSet(I);
MadeChange |= foldGuardedFunnelShift(I, DT);
- MadeChange |= tryToRecognizePopCount(I);
+ MadeChange |= tryToRecognizePopCount(I);
}
}
@@ -394,10 +395,11 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
-static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
+static bool runImpl(Function &F, AssumptionCache &AC, TargetLibraryInfo &TLI,
+ DominatorTree &DT) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
- TruncInstCombine TIC(TLI, DL, DT);
+ TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
MadeChange |= foldUnusualPatterns(F, DT);
return MadeChange;
@@ -406,6 +408,7 @@ static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT) {
void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
@@ -415,16 +418,18 @@ void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
}
bool AggressiveInstCombinerLegacyPass::runOnFunction(Function &F) {
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return runImpl(F, TLI, DT);
+ return runImpl(F, AC, TLI, DT);
}
PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
FunctionAnalysisManager &AM) {
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- if (!runImpl(F, TLI, DT)) {
+ if (!runImpl(F, AC, TLI, DT)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
@@ -438,6 +443,7 @@ char AggressiveInstCombinerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
"aggressive-instcombine",
"Combine pattern based expressions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
index 42bcadfc7dcd..5d69e26d6ecc 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h
@@ -17,6 +17,8 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -39,16 +41,18 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
namespace llvm {
- class DataLayout;
- class DominatorTree;
- class Function;
- class Instruction;
- class TargetLibraryInfo;
- class TruncInst;
- class Type;
- class Value;
+class AssumptionCache;
+class DataLayout;
+class DominatorTree;
+class Function;
+class Instruction;
+class TargetLibraryInfo;
+class TruncInst;
+class Type;
+class Value;
class TruncInstCombine {
+ AssumptionCache &AC;
TargetLibraryInfo &TLI;
const DataLayout &DL;
const DominatorTree &DT;
@@ -75,9 +79,9 @@ class TruncInstCombine {
MapVector<Instruction *, Info> InstInfoMap;
public:
- TruncInstCombine(TargetLibraryInfo &TLI, const DataLayout &DL,
- const DominatorTree &DT)
- : TLI(TLI), DL(DL), DT(DT), CurrentTruncInst(nullptr) {}
+ TruncInstCombine(AssumptionCache &AC, TargetLibraryInfo &TLI,
+ const DataLayout &DL, const DominatorTree &DT)
+ : AC(AC), TLI(TLI), DL(DL), DT(DT), CurrentTruncInst(nullptr) {}
/// Perform TruncInst pattern optimization on given function.
bool run(Function &F);
@@ -104,6 +108,18 @@ private:
/// to be reduced.
Type *getBestTruncatedType();
+ KnownBits computeKnownBits(const Value *V) const {
+ return llvm::computeKnownBits(V, DL, /*Depth=*/0, &AC,
+ /*CtxI=*/cast<Instruction>(CurrentTruncInst),
+ &DT);
+ }
+
+ unsigned ComputeNumSignBits(const Value *V) const {
+ return llvm::ComputeNumSignBits(
+ V, DL, /*Depth=*/0, &AC, /*CtxI=*/cast<Instruction>(CurrentTruncInst),
+ &DT);
+ }
+
/// Given a \p V value and a \p SclTy scalar type return the generated reduced
/// value of \p V based on the type \p SclTy.
///
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
index 16b82219e8ca..abac3f801a22 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/Support/KnownBits.h"
using namespace llvm;
@@ -61,9 +62,18 @@ static void getRelevantOperands(Instruction *I, SmallVectorImpl<Value *> &Ops) {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::InsertElement:
Ops.push_back(I->getOperand(0));
Ops.push_back(I->getOperand(1));
break;
+ case Instruction::ExtractElement:
+ Ops.push_back(I->getOperand(0));
+ break;
case Instruction::Select:
Ops.push_back(I->getOperand(1));
Ops.push_back(I->getOperand(2));
@@ -127,6 +137,13 @@ bool TruncInstCombine::buildTruncExpressionDag() {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::InsertElement:
+ case Instruction::ExtractElement:
case Instruction::Select: {
SmallVector<Value *, 2> Operands;
getRelevantOperands(I, Operands);
@@ -135,10 +152,9 @@ bool TruncInstCombine::buildTruncExpressionDag() {
}
default:
// TODO: Can handle more cases here:
- // 1. shufflevector, extractelement, insertelement
- // 2. udiv, urem
- // 3. shl, lshr, ashr
- // 4. phi node(and loop handling)
+ // 1. shufflevector
+ // 2. sdiv, srem
+ // 3. phi node(and loop handling)
// ...
return false;
}
@@ -270,6 +286,50 @@ Type *TruncInstCombine::getBestTruncatedType() {
unsigned OrigBitWidth =
CurrentTruncInst->getOperand(0)->getType()->getScalarSizeInBits();
+ // Initialize MinBitWidth for shift instructions with the minimum number
+ // that is greater than shift amount (i.e. shift amount + 1).
+ // For `lshr` adjust MinBitWidth so that all potentially truncated
+ // bits of the value-to-be-shifted are zeros.
+ // For `ashr` adjust MinBitWidth so that all potentially truncated
+ // bits of the value-to-be-shifted are sign bits (all zeros or ones)
+ // and even one (first) untruncated bit is sign bit.
+ // Exit early if MinBitWidth is not less than original bitwidth.
+ for (auto &Itr : InstInfoMap) {
+ Instruction *I = Itr.first;
+ if (I->isShift()) {
+ KnownBits KnownRHS = computeKnownBits(I->getOperand(1));
+ unsigned MinBitWidth = KnownRHS.getMaxValue()
+ .uadd_sat(APInt(OrigBitWidth, 1))
+ .getLimitedValue(OrigBitWidth);
+ if (MinBitWidth == OrigBitWidth)
+ return nullptr;
+ if (I->getOpcode() == Instruction::LShr) {
+ KnownBits KnownLHS = computeKnownBits(I->getOperand(0));
+ MinBitWidth =
+ std::max(MinBitWidth, KnownLHS.getMaxValue().getActiveBits());
+ }
+ if (I->getOpcode() == Instruction::AShr) {
+ unsigned NumSignBits = ComputeNumSignBits(I->getOperand(0));
+ MinBitWidth = std::max(MinBitWidth, OrigBitWidth - NumSignBits + 1);
+ }
+ if (MinBitWidth >= OrigBitWidth)
+ return nullptr;
+ Itr.second.MinBitWidth = MinBitWidth;
+ }
+ if (I->getOpcode() == Instruction::UDiv ||
+ I->getOpcode() == Instruction::URem) {
+ unsigned MinBitWidth = 0;
+ for (const auto &Op : I->operands()) {
+ KnownBits Known = computeKnownBits(Op);
+ MinBitWidth =
+ std::max(Known.getMaxValue().getActiveBits(), MinBitWidth);
+ if (MinBitWidth >= OrigBitWidth)
+ return nullptr;
+ }
+ Itr.second.MinBitWidth = MinBitWidth;
+ }
+ }
+
// Calculate minimum allowed bit-width allowed for shrinking the currently
// visited truncate's operand.
unsigned MinBitWidth = getMinBitWidth();
@@ -356,10 +416,32 @@ void TruncInstCombine::ReduceExpressionDag(Type *SclTy) {
case Instruction::Mul:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor: {
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::UDiv:
+ case Instruction::URem: {
Value *LHS = getReducedOperand(I->getOperand(0), SclTy);
Value *RHS = getReducedOperand(I->getOperand(1), SclTy);
Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS);
+ // Preserve `exact` flag since truncation doesn't change exactness
+ if (auto *PEO = dyn_cast<PossiblyExactOperator>(I))
+ if (auto *ResI = dyn_cast<Instruction>(Res))
+ ResI->setIsExact(PEO->isExact());
+ break;
+ }
+ case Instruction::ExtractElement: {
+ Value *Vec = getReducedOperand(I->getOperand(0), SclTy);
+ Value *Idx = I->getOperand(1);
+ Res = Builder.CreateExtractElement(Vec, Idx);
+ break;
+ }
+ case Instruction::InsertElement: {
+ Value *Vec = getReducedOperand(I->getOperand(0), SclTy);
+ Value *NewElt = getReducedOperand(I->getOperand(1), SclTy);
+ Value *Idx = I->getOperand(2);
+ Res = Builder.CreateInsertElement(Vec, NewElt, Idx);
break;
}
case Instruction::Select: {
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 5b09cdb35791..67f8828e4c75 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -56,8 +56,10 @@ static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
bool Changed = false;
- for (auto IB = inst_begin(F), E = inst_end(F); IB != E;) {
- Instruction &I = *IB++;
+ bool IsPrivateAndUnprocessed =
+ F.hasFnAttribute(CORO_PRESPLIT_ATTR) && F.hasLocalLinkage();
+
+ for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
default:
@@ -71,6 +73,10 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
case Intrinsic::coro_alloc:
II->replaceAllUsesWith(ConstantInt::getTrue(Context));
break;
+ case Intrinsic::coro_async_resume:
+ II->replaceAllUsesWith(
+ ConstantPointerNull::get(cast<PointerType>(I.getType())));
+ break;
case Intrinsic::coro_id:
case Intrinsic::coro_id_retcon:
case Intrinsic::coro_id_retcon_once:
@@ -80,6 +86,13 @@ bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
case Intrinsic::coro_subfn_addr:
lowerSubFn(Builder, cast<CoroSubFnInst>(II));
break;
+ case Intrinsic::coro_end:
+ case Intrinsic::coro_suspend_retcon:
+ if (IsPrivateAndUnprocessed) {
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ } else
+ continue;
+ break;
case Intrinsic::coro_async_size_replace:
auto *Target = cast<ConstantStruct>(
cast<GlobalVariable>(II->getArgOperand(0)->stripPointerCasts())
@@ -115,7 +128,8 @@ static bool declaresCoroCleanupIntrinsics(const Module &M) {
return coro::declaresIntrinsics(
M, {"llvm.coro.alloc", "llvm.coro.begin", "llvm.coro.subfn.addr",
"llvm.coro.free", "llvm.coro.id", "llvm.coro.id.retcon",
- "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace"});
+ "llvm.coro.id.retcon.once", "llvm.coro.async.size.replace",
+ "llvm.coro.async.resume"});
}
PreservedAnalyses CoroCleanupPass::run(Function &F,
diff --git a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
index 5e5e513cdfda..68a34bdcb1cd 100644
--- a/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -150,8 +150,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
CoroIdInst *CoroId = nullptr;
SmallVector<CoroFreeInst *, 4> CoroFrees;
bool HasCoroSuspend = false;
- for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) {
- Instruction &I = *IB++;
+ for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
if (auto *CB = dyn_cast<CallBase>(&I)) {
switch (CB->getIntrinsicID()) {
default:
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index beae5fdac8ab..ac3d078714ce 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -16,6 +16,7 @@
#include "CoroInternal.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/PtrUseVisitor.h"
#include "llvm/Analysis/StackLifetime.h"
@@ -435,7 +436,7 @@ private:
DenseMap<Value*, unsigned> FieldIndexByKey;
public:
- FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL,
+ FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
Optional<Align> MaxFrameAlignment)
: DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
@@ -576,13 +577,8 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
using AllocaSetType = SmallVector<AllocaInst *, 4>;
SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
- // We need to add field for allocas at the end of this function. However, this
- // function has multiple exits, so we use this helper to avoid redundant code.
- struct RTTIHelper {
- std::function<void()> func;
- RTTIHelper(std::function<void()> &&func) : func(func) {}
- ~RTTIHelper() { func(); }
- } Helper([&]() {
+ // We need to add field for allocas at the end of this function.
+ auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
for (auto AllocaList : NonOverlapedAllocas) {
auto *LargestAI = *AllocaList.begin();
FieldIDType Id = addFieldForAlloca(LargestAI);
@@ -840,8 +836,9 @@ static StringRef solveTypeName(Type *Ty) {
return "UnknownType";
}
-static DIType *solveDIType(DIBuilder &Builder, Type *Ty, DataLayout &Layout,
- DIScope *Scope, unsigned LineNum,
+static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
+ const DataLayout &Layout, DIScope *Scope,
+ unsigned LineNum,
DenseMap<Type *, DIType *> &DITypeCache) {
if (DIType *DT = DITypeCache.lookup(Ty))
return DT;
@@ -1348,13 +1345,17 @@ struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
}
void visitIntrinsicInst(IntrinsicInst &II) {
- if (II.getIntrinsicID() != Intrinsic::lifetime_start)
+ // When we found the lifetime markers refers to a
+ // subrange of the original alloca, ignore the lifetime
+ // markers to avoid misleading the analysis.
+ if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
+ !Offset.isZero())
return Base::visitIntrinsicInst(II);
LifetimeStarts.insert(&II);
}
void visitCallBase(CallBase &CB) {
- for (unsigned Op = 0, OpCount = CB.getNumArgOperands(); Op < OpCount; ++Op)
+ for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op)
if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
PI.setEscaped(&CB);
handleMayWrite(CB);
@@ -1868,8 +1869,7 @@ static void cleanupSinglePredPHIs(Function &F) {
}
}
while (!Worklist.empty()) {
- auto *Phi = Worklist.back();
- Worklist.pop_back();
+ auto *Phi = Worklist.pop_back_val();
auto *OriginalValue = Phi->getIncomingValue(0);
Phi->replaceAllUsesWith(OriginalValue);
}
@@ -1984,14 +1984,15 @@ static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
if (CurrentBlock != U->getParent()) {
bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);
- CurrentBlock = IsInCoroSuspendBlock
- ? U->getParent()->getSinglePredecessor()
- : U->getParent();
+ CurrentBlock = U->getParent();
+ auto *InsertBlock = IsInCoroSuspendBlock
+ ? CurrentBlock->getSinglePredecessor()
+ : CurrentBlock;
CurrentMaterialization = cast<Instruction>(Def)->clone();
CurrentMaterialization->setName(Def->getName());
CurrentMaterialization->insertBefore(
- IsInCoroSuspendBlock ? CurrentBlock->getTerminator()
- : &*CurrentBlock->getFirstInsertionPt());
+ IsInCoroSuspendBlock ? InsertBlock->getTerminator()
+ : &*InsertBlock->getFirstInsertionPt());
}
if (auto *PN = dyn_cast<PHINode>(U)) {
assert(PN->getNumIncomingValues() == 1 &&
@@ -2244,12 +2245,7 @@ static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
/// intrinsics and attempting to MemToReg the alloca away.
static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
coro::Shape &Shape) {
- for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
- // We're likely changing the use list, so use a mutation-safe
- // iteration pattern.
- auto &Use = *UI;
- ++UI;
-
+ for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
// swifterror values can only be used in very specific ways.
// We take advantage of that here.
auto User = Use.getUser();
@@ -2510,11 +2506,11 @@ void coro::salvageDebugInfo(
DIExpression *Expr = DVI->getExpression();
// Follow the pointer arithmetic all the way to the incoming
// function argument and convert into a DIExpression.
- bool OutermostLoad = true;
+ bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
Value *Storage = DVI->getVariableLocationOp(0);
Value *OriginalStorage = Storage;
- while (Storage) {
- if (auto *LdInst = dyn_cast<LoadInst>(Storage)) {
+ while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
+ if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
Storage = LdInst->getOperand(0);
// FIXME: This is a heuristic that works around the fact that
// LLVM IR debug intrinsics cannot yet distinguish between
@@ -2522,26 +2518,25 @@ void coro::salvageDebugInfo(
// implicitly a memory location no DW_OP_deref operation for the
// last direct load from an alloca is necessary. This condition
// effectively drops the *last* DW_OP_deref in the expression.
- if (!OutermostLoad)
+ if (!SkipOutermostLoad)
Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
- OutermostLoad = false;
- } else if (auto *StInst = dyn_cast<StoreInst>(Storage)) {
+ } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
Storage = StInst->getOperand(0);
- } else if (auto *GEPInst = dyn_cast<GetElementPtrInst>(Storage)) {
- SmallVector<Value *> AdditionalValues;
- DIExpression *SalvagedExpr = llvm::salvageDebugInfoImpl(
- *GEPInst, Expr,
- /*WithStackValue=*/false, 0, AdditionalValues);
- // Debug declares cannot currently handle additional location
- // operands.
- if (!SalvagedExpr || !AdditionalValues.empty())
+ } else {
+ SmallVector<uint64_t, 16> Ops;
+ SmallVector<Value *, 0> AdditionalValues;
+ Value *Op = llvm::salvageDebugInfoImpl(
+ *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
+ AdditionalValues);
+ if (!Op || !AdditionalValues.empty()) {
+ // If salvaging failed or salvaging produced more than one location
+ // operand, give up.
break;
- Expr = SalvagedExpr;
- Storage = GEPInst->getOperand(0);
- } else if (auto *BCInst = dyn_cast<llvm::BitCastInst>(Storage))
- Storage = BCInst->getOperand(0);
- else
- break;
+ }
+ Storage = Op;
+ Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
+ }
+ SkipOutermostLoad = false;
}
if (!Storage)
return;
diff --git a/llvm/lib/Transforms/Coroutines/CoroInstr.h b/llvm/lib/Transforms/Coroutines/CoroInstr.h
index 5ed800d67fe9..bf3d781ba43e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInstr.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -638,7 +638,7 @@ public:
void checkWellFormed() const;
Function *getMustTailCallFunction() const {
- if (getNumArgOperands() < 3)
+ if (arg_size() < 3)
return nullptr;
return cast<Function>(
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index b6932dbbfc3f..fa1d92f439b8 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -520,8 +520,8 @@ void CoroCloner::replaceRetconOrAsyncSuspendUses() {
}
// Try to peephole extracts of an aggregate return.
- for (auto UI = NewS->use_begin(), UE = NewS->use_end(); UI != UE; ) {
- auto EVI = dyn_cast<ExtractValueInst>((UI++)->getUser());
+ for (Use &U : llvm::make_early_inc_range(NewS->uses())) {
+ auto *EVI = dyn_cast<ExtractValueInst>(U.getUser());
if (!EVI || EVI->getNumIndices() != 1)
continue;
@@ -622,12 +622,12 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
// If there are no arguments, this is a 'get' operation.
Value *MappedResult;
- if (Op->getNumArgOperands() == 0) {
+ if (Op->arg_empty()) {
auto ValueTy = Op->getType();
auto Slot = getSwiftErrorSlot(ValueTy);
MappedResult = Builder.CreateLoad(ValueTy, Slot);
} else {
- assert(Op->getNumArgOperands() == 1);
+ assert(Op->arg_size() == 1);
auto Value = MappedOp->getArgOperand(0);
auto ValueTy = Value->getType();
auto Slot = getSwiftErrorSlot(ValueTy);
@@ -669,7 +669,7 @@ void CoroCloner::salvageDebugInfo() {
for (DbgVariableIntrinsic *DVI : Worklist) {
if (IsUnreachableBlock(DVI->getParent()))
DVI->eraseFromParent();
- else if (dyn_cast_or_null<AllocaInst>(DVI->getVariableLocationOp(0))) {
+ else if (isa_and_nonnull<AllocaInst>(DVI->getVariableLocationOp(0))) {
// Count all non-debuginfo uses in reachable blocks.
unsigned Uses = 0;
for (auto *User : DVI->getVariableLocationOp(0)->users())
@@ -738,8 +738,7 @@ void CoroCloner::replaceEntryBlock() {
// entry needs to be moved to the new entry.
Function *F = OldEntry->getParent();
DominatorTree DT{*F};
- for (auto IT = inst_begin(F), End = inst_end(F); IT != End;) {
- Instruction &I = *IT++;
+ for (Instruction &I : llvm::make_early_inc_range(instructions(F))) {
auto *Alloca = dyn_cast<AllocaInst>(&I);
if (!Alloca || I.use_empty())
continue;
@@ -773,9 +772,8 @@ Value *CoroCloner::deriveNewFramePointer() {
auto DbgLoc =
cast<CoroSuspendAsyncInst>(VMap[ActiveSuspend])->getDebugLoc();
// Calling i8* (i8*)
- auto *CallerContext = Builder.CreateCall(
- cast<FunctionType>(ProjectionFunc->getType()->getPointerElementType()),
- ProjectionFunc, CalleeContext);
+ auto *CallerContext = Builder.CreateCall(ProjectionFunc->getFunctionType(),
+ ProjectionFunc, CalleeContext);
CallerContext->setCallingConv(ProjectionFunc->getCallingConv());
CallerContext->setDebugLoc(DbgLoc);
// The frame is located after the async_context header.
@@ -906,8 +904,7 @@ void CoroCloner::create() {
case coro::ABI::Switch:
// Bootstrap attributes by copying function attributes from the
// original function. This should include optimization settings and so on.
- NewAttrs = NewAttrs.addAttributes(Context, AttributeList::FunctionIndex,
- OrigAttrs.getFnAttributes());
+ NewAttrs = NewAttrs.addFnAttributes(Context, OrigAttrs.getFnAttrs());
addFramePointerAttrs(NewAttrs, Context, 0,
Shape.FrameSize, Shape.FrameAlign);
@@ -929,9 +926,8 @@ void CoroCloner::create() {
}
// Transfer the original function's attributes.
- auto FnAttrs = OrigF.getAttributes().getFnAttributes();
- NewAttrs =
- NewAttrs.addAttributes(Context, AttributeList::FunctionIndex, FnAttrs);
+ auto FnAttrs = OrigF.getAttributes().getFnAttrs();
+ NewAttrs = NewAttrs.addFnAttributes(Context, FnAttrs);
break;
}
case coro::ABI::Retcon:
@@ -1144,11 +1140,13 @@ static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
static void postSplitCleanup(Function &F) {
removeUnreachableBlocks(F);
+#ifndef NDEBUG
// For now, we do a mandatory verification step because we don't
// entirely trust this pass. Note that we don't want to add a verifier
// pass to FPM below because it will also verify all the global data.
if (verifyFunction(F, &errs()))
report_fatal_error("Broken function");
+#endif
}
// Assuming we arrived at the block NewBlock from Prev instruction, store
@@ -1262,7 +1260,7 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
Attribute::SwiftSelf, Attribute::SwiftError};
AttributeList Attrs = CI.getAttributes();
for (auto AK : ABIAttrs)
- if (Attrs.hasParamAttribute(0, AK))
+ if (Attrs.hasParamAttr(0, AK))
return false;
return true;
@@ -1357,7 +1355,7 @@ static bool hasCallsInBlocksBetween(BasicBlock *SaveBB, BasicBlock *ResDesBB) {
auto *BB = Worklist.pop_back_val();
Set.insert(BB);
for (auto *Pred : predecessors(BB))
- if (Set.count(Pred) == 0)
+ if (!Set.contains(Pred))
Worklist.push_back(Pred);
}
@@ -1547,8 +1545,7 @@ static void coerceArguments(IRBuilder<> &Builder, FunctionType *FnTy,
CallInst *coro::createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
ArrayRef<Value *> Arguments,
IRBuilder<> &Builder) {
- auto *FnTy =
- cast<FunctionType>(MustTailCallFn->getType()->getPointerElementType());
+ auto *FnTy = MustTailCallFn->getFunctionType();
// Coerce the arguments, llvm optimizations seem to ignore the types in
// vaarg functions and throws away casts in optimized mode.
SmallVector<Value *, 8> CallArgs;
@@ -1568,8 +1565,8 @@ static void splitAsyncCoroutine(Function &F, coro::Shape &Shape,
// Reset various things that the optimizer might have decided it
// "knows" about the coroutine function due to not seeing a return.
F.removeFnAttr(Attribute::NoReturn);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F.removeRetAttr(Attribute::NoAlias);
+ F.removeRetAttr(Attribute::NonNull);
auto &Context = F.getContext();
auto *Int8PtrTy = Type::getInt8PtrTy(Context);
@@ -1667,8 +1664,8 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
// Reset various things that the optimizer might have decided it
// "knows" about the coroutine function due to not seeing a return.
F.removeFnAttr(Attribute::NoReturn);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- F.removeAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F.removeRetAttr(Attribute::NoAlias);
+ F.removeRetAttr(Attribute::NonNull);
// Allocate the frame.
auto *Id = cast<AnyCoroIdRetconInst>(Shape.CoroBegin->getId());
@@ -1977,9 +1974,9 @@ static void replacePrepare(CallInst *Prepare, LazyCallGraph &CG,
// %2 = bitcast %1 to [[TYPE]]
// ==>
// %2 = @some_function
- for (auto UI = Prepare->use_begin(), UE = Prepare->use_end(); UI != UE;) {
+ for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
// Look for bitcasts back to the original function type.
- auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
+ auto *Cast = dyn_cast<BitCastInst>(U.getUser());
if (!Cast || Cast->getType() != Fn->getType())
continue;
@@ -2019,10 +2016,9 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
// %2 = bitcast %1 to [[TYPE]]
// ==>
// %2 = @some_function
- for (auto UI = Prepare->use_begin(), UE = Prepare->use_end();
- UI != UE; ) {
+ for (Use &U : llvm::make_early_inc_range(Prepare->uses())) {
// Look for bitcasts back to the original function type.
- auto *Cast = dyn_cast<BitCastInst>((UI++)->getUser());
+ auto *Cast = dyn_cast<BitCastInst>(U.getUser());
if (!Cast || Cast->getType() != Fn->getType()) continue;
// Check whether the replacement will introduce new direct calls.
@@ -2059,9 +2055,9 @@ static void replacePrepare(CallInst *Prepare, CallGraph &CG) {
static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
LazyCallGraph::SCC &C) {
bool Changed = false;
- for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end(); PI != PE;) {
+ for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
// Intrinsics can only be used in calls.
- auto *Prepare = cast<CallInst>((PI++)->getUser());
+ auto *Prepare = cast<CallInst>(P.getUser());
replacePrepare(Prepare, CG, C);
Changed = true;
}
@@ -2077,10 +2073,9 @@ static bool replaceAllPrepares(Function *PrepareFn, LazyCallGraph &CG,
/// switch coroutines, which are lowered in multiple stages).
static bool replaceAllPrepares(Function *PrepareFn, CallGraph &CG) {
bool Changed = false;
- for (auto PI = PrepareFn->use_begin(), PE = PrepareFn->use_end();
- PI != PE; ) {
+ for (Use &P : llvm::make_early_inc_range(PrepareFn->uses())) {
// Intrinsics can only be used in calls.
- auto *Prepare = cast<CallInst>((PI++)->getUser());
+ auto *Prepare = cast<CallInst>(P.getUser());
replacePrepare(Prepare, CG);
Changed = true;
}
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index ae2d9e192c87..e4883ef89db7 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -126,6 +126,7 @@ static bool isCoroutineIntrinsicName(StringRef Name) {
"llvm.coro.alloc",
"llvm.coro.async.context.alloc",
"llvm.coro.async.context.dealloc",
+ "llvm.coro.async.resume",
"llvm.coro.async.size.replace",
"llvm.coro.async.store_resume",
"llvm.coro.begin",
@@ -311,10 +312,9 @@ void coro::Shape::buildFrom(Function &F) {
if (CoroBegin)
report_fatal_error(
"coroutine should have exactly one defining @llvm.coro.begin");
- CB->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- CB->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
- CB->removeAttribute(AttributeList::FunctionIndex,
- Attribute::NoDuplicate);
+ CB->addRetAttr(Attribute::NonNull);
+ CB->addRetAttr(Attribute::NoAlias);
+ CB->removeFnAttr(Attribute::NoDuplicate);
CoroBegin = CB;
break;
}
@@ -571,8 +571,8 @@ void coro::Shape::emitDealloc(IRBuilder<> &Builder, Value *Ptr,
llvm_unreachable("Unknown coro::ABI enum");
}
-LLVM_ATTRIBUTE_NORETURN
-static void fail(const Instruction *I, const char *Reason, Value *V) {
+[[noreturn]] static void fail(const Instruction *I, const char *Reason,
+ Value *V) {
#ifndef NDEBUG
I->dump();
if (V) {
@@ -722,7 +722,7 @@ void CoroAsyncEndInst::checkWellFormed() const {
return;
auto *FnTy =
cast<FunctionType>(MustTailCallFunc->getType()->getPointerElementType());
- if (FnTy->getNumParams() != (getNumArgOperands() - 3))
+ if (FnTy->getNumParams() != (arg_size() - 3))
fail(this,
"llvm.coro.end.async must tail call function argument type must "
"match the tail arguments",
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 532599b42e0d..01e724e22dcf 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -73,8 +73,8 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
},
ORE);
assert(OIC);
- emitInlinedInto(ORE, CB->getDebugLoc(), CB->getParent(), F, *Caller,
- *OIC, false, DEBUG_TYPE);
+ emitInlinedIntoBasedOnCost(ORE, CB->getDebugLoc(), CB->getParent(), F,
+ *Caller, *OIC, false, DEBUG_TYPE);
InlineFunctionInfo IFI(
/*cg=*/nullptr, GetAssumptionCache, &PSI,
@@ -108,8 +108,10 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
// Delete the non-comdat ones from the module and also from our vector.
auto NonComdatBegin = partition(
InlinedFunctions, [&](Function *F) { return F->hasComdat(); });
- for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end()))
+ for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end())) {
M.getFunctionList().erase(F);
+ Changed = true;
+ }
InlinedFunctions.erase(NonComdatBegin, InlinedFunctions.end());
if (!InlinedFunctions.empty()) {
@@ -117,8 +119,10 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
// are not actually dead.
filterDeadComdatFunctions(M, InlinedFunctions);
// The remaining functions are actually dead.
- for (Function *F : InlinedFunctions)
+ for (Function *F : InlinedFunctions) {
M.getFunctionList().erase(F);
+ Changed = true;
+ }
}
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index f670a101767e..93bb11433775 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -148,7 +148,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
} else if (!ArgsToPromote.count(&*I)) {
// Unchanged argument
Params.push_back(I->getType());
- ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));
} else if (I->use_empty()) {
// Dead argument (which are always marked as promotable)
++NumArgumentsDead;
@@ -177,9 +177,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Since loads will only have a single operand, and GEPs only a single
// non-index operand, this will record direct loads without any indices,
// and gep+loads with the GEP indices.
- for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end();
- II != IE; ++II)
- Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+ for (const Use &I : llvm::drop_begin(UI->operands()))
+ Indices.push_back(cast<ConstantInt>(I)->getSExtValue());
// GEPs with a single 0 index can be merged with direct loads
if (Indices.size() == 1 && Indices.front() == 0)
Indices.clear();
@@ -231,8 +230,8 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Recompute the parameter attributes list based on the new arguments for
// the function.
- NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttributes(),
- PAL.getRetAttributes(), ArgAttrVec));
+ NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),
+ PAL.getRetAttrs(), ArgAttrVec));
ArgAttrVec.clear();
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
@@ -257,7 +256,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
++I, ++AI, ++ArgNo)
if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
Args.push_back(*AI); // Unmodified argument
- ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
} else if (ByValArgsToTransform.count(&*I)) {
// Emit a GEP and load for each element of the struct.
Type *AgTy = I->getParamByValType();
@@ -313,9 +312,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
IRB.CreateLoad(OrigLoad->getType(), V, V->getName() + ".val");
newLoad->setAlignment(OrigLoad->getAlign());
// Transfer the AA info too.
- AAMDNodes AAInfo;
- OrigLoad->getAAMetadata(AAInfo);
- newLoad->setAAMetadata(AAInfo);
+ newLoad->setAAMetadata(OrigLoad->getAAMetadata());
Args.push_back(newLoad);
ArgAttrVec.push_back(AttributeSet());
@@ -325,7 +322,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
// Push any varargs arguments on the list.
for (; AI != CB.arg_end(); ++AI, ++ArgNo) {
Args.push_back(*AI);
- ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));
}
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -341,9 +338,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
NewCS = NewCall;
}
NewCS->setCallingConv(CB.getCallingConv());
- NewCS->setAttributes(
- AttributeList::get(F->getContext(), CallPAL.getFnAttributes(),
- CallPAL.getRetAttributes(), ArgAttrVec));
+ NewCS->setAttributes(AttributeList::get(F->getContext(),
+ CallPAL.getFnAttrs(),
+ CallPAL.getRetAttrs(), ArgAttrVec));
NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});
Args.clear();
ArgAttrVec.clear();
@@ -1018,11 +1015,12 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
do {
LocalChange = false;
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
for (LazyCallGraph::Node &N : C) {
Function &OldF = N.getFunction();
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
// FIXME: This lambda must only be used with this function. We should
// skip the lambda and just get the AA results directly.
auto AARGetter = [&](Function &F) -> AAResults & {
@@ -1045,6 +1043,13 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
C.getOuterRefSCC().replaceNodeFunction(N, *NewF);
FAM.clear(OldF, OldF.getName());
OldF.eraseFromParent();
+
+ PreservedAnalyses FuncPA;
+ FuncPA.preserveSet<CFGAnalyses>();
+ for (auto *U : NewF->users()) {
+ auto *UserF = cast<CallBase>(U)->getFunction();
+ FAM.invalidate(*UserF, FuncPA);
+ }
}
Changed |= LocalChange;
@@ -1053,7 +1058,12 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
if (!Changed)
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+ PreservedAnalyses PA;
+ // We've cleared out analyses for deleted functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We've manually invalidated analyses for functions we've modified.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
}
namespace {
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 762317425026..edadc79e3a9f 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/ValueHandle.h"
@@ -250,10 +251,12 @@ Value *AA::getWithType(Value &V, Type &Ty) {
return Constant::getNullValue(&Ty);
if (C->getType()->isPointerTy() && Ty.isPointerTy())
return ConstantExpr::getPointerCast(C, &Ty);
- if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
- return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
- if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
- return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ if (C->getType()->getPrimitiveSizeInBits() >= Ty.getPrimitiveSizeInBits()) {
+ if (C->getType()->isIntegerTy() && Ty.isIntegerTy())
+ return ConstantExpr::getTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ if (C->getType()->isFloatingPointTy() && Ty.isFloatingPointTy())
+ return ConstantExpr::getFPTrunc(C, &Ty, /* OnlyIfReduced */ true);
+ }
}
return nullptr;
}
@@ -379,30 +382,30 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
if (Attr.isEnumAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isStringAttribute()) {
StringRef Kind = Attr.getKindAsString();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
if (Attr.isIntAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttribute(AttrIdx, Kind))
+ if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
+ isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
return false;
- Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
- Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
+ Attrs = Attrs.removeAttributeAtIndex(Ctx, AttrIdx, Kind);
+ Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
return true;
}
@@ -655,9 +658,9 @@ bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
else
AttrList = getAssociatedFunction()->getAttributes();
- bool HasAttr = AttrList.hasAttribute(getAttrIdx(), AK);
+ bool HasAttr = AttrList.hasAttributeAtIndex(getAttrIdx(), AK);
if (HasAttr)
- Attrs.push_back(AttrList.getAttribute(getAttrIdx(), AK));
+ Attrs.push_back(AttrList.getAttributeAtIndex(getAttrIdx(), AK));
return HasAttr;
}
@@ -1023,7 +1026,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
while (!Worklist.empty()) {
const Use *U = Worklist.pop_back_val();
- if (!Visited.insert(U).second)
+ if (isa<PHINode>(U->getUser()) && !Visited.insert(U).second)
continue;
LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
<< *U->getUser() << "\n");
@@ -1040,6 +1043,8 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
if (auto *SI = dyn_cast<StoreInst>(U->getUser())) {
if (&SI->getOperandUse(0) == U) {
+ if (!Visited.insert(U).second)
+ continue;
SmallSetVector<Value *, 4> PotentialCopies;
if (AA::getPotentialCopiesOfStoredValue(*this, *SI, PotentialCopies,
QueryingAA,
@@ -1118,6 +1123,10 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) {
if (CE->isCast() && CE->getType()->isPointerTy() &&
CE->getType()->getPointerElementType()->isFunctionTy()) {
+ LLVM_DEBUG(
+ dbgs() << "[Attributor] Use, is constant cast expression, add "
+ << CE->getNumUses()
+ << " uses of that expression instead!\n");
for (const Use &CEU : CE->uses())
Uses.push_back(&CEU);
continue;
@@ -1138,9 +1147,13 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
const Use *EffectiveUse =
ACS.isCallbackCall() ? &ACS.getCalleeUseForCallback() : &U;
if (!ACS.isCallee(EffectiveUse)) {
- if (!RequireAllCallSites)
+ if (!RequireAllCallSites) {
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
+ << " is not a call of " << Fn.getName()
+ << ", skip use\n");
continue;
- LLVM_DEBUG(dbgs() << "[Attributor] User " << EffectiveUse->getUser()
+ }
+ LLVM_DEBUG(dbgs() << "[Attributor] User " << *EffectiveUse->getUser()
<< " is an invalid use of " << Fn.getName() << "\n");
return false;
}
@@ -1410,6 +1423,16 @@ void Attributor::runTillFixpoint() {
} while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations ||
VerifyMaxFixpointIterations));
+ if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) {
+ auto Remark = [&](OptimizationRemarkMissed ORM) {
+ return ORM << "Attributor did not reach a fixpoint after "
+ << ore::NV("Iterations", MaxFixedPointIterations)
+ << " iterations.";
+ };
+ Function *F = Worklist.front()->getIRPosition().getAssociatedFunction();
+ emitRemark<OptimizationRemarkMissed>(F, "FixedPoint", Remark);
+ }
+
LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: "
<< IterationCounter << "/" << MaxFixpointIterations
<< " iterations\n");
@@ -1919,55 +1942,91 @@ void Attributor::createShallowWrapper(Function &F) {
CallInst *CI = CallInst::Create(&F, Args, "", EntryBB);
CI->setTailCall(true);
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoInline);
+ CI->addFnAttr(Attribute::NoInline);
ReturnInst::Create(Ctx, CI->getType()->isVoidTy() ? nullptr : CI, EntryBB);
NumFnShallowWrappersCreated++;
}
+bool Attributor::isInternalizable(Function &F) {
+ if (F.isDeclaration() || F.hasLocalLinkage() ||
+ GlobalValue::isInterposableLinkage(F.getLinkage()))
+ return false;
+ return true;
+}
+
Function *Attributor::internalizeFunction(Function &F, bool Force) {
if (!AllowDeepWrapper && !Force)
return nullptr;
- if (F.isDeclaration() || F.hasLocalLinkage() ||
- GlobalValue::isInterposableLinkage(F.getLinkage()))
+ if (!isInternalizable(F))
return nullptr;
- Module &M = *F.getParent();
- FunctionType *FnTy = F.getFunctionType();
+ SmallPtrSet<Function *, 2> FnSet = {&F};
+ DenseMap<Function *, Function *> InternalizedFns;
+ internalizeFunctions(FnSet, InternalizedFns);
- // create a copy of the current function
- Function *Copied = Function::Create(FnTy, F.getLinkage(), F.getAddressSpace(),
- F.getName() + ".internalized");
- ValueToValueMapTy VMap;
- auto *NewFArgIt = Copied->arg_begin();
- for (auto &Arg : F.args()) {
- auto ArgName = Arg.getName();
- NewFArgIt->setName(ArgName);
- VMap[&Arg] = &(*NewFArgIt++);
- }
- SmallVector<ReturnInst *, 8> Returns;
-
- // Copy the body of the original function to the new one
- CloneFunctionInto(Copied, &F, VMap, CloneFunctionChangeType::LocalChangesOnly,
- Returns);
-
- // Set the linakage and visibility late as CloneFunctionInto has some implicit
- // requirements.
- Copied->setVisibility(GlobalValue::DefaultVisibility);
- Copied->setLinkage(GlobalValue::PrivateLinkage);
+ return InternalizedFns[&F];
+}
- // Copy metadata
- SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
- F.getAllMetadata(MDs);
- for (auto MDIt : MDs)
- if (!Copied->hasMetadata())
- Copied->addMetadata(MDIt.first, *MDIt.second);
+bool Attributor::internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
+ DenseMap<Function *, Function *> &FnMap) {
+ for (Function *F : FnSet)
+ if (!Attributor::isInternalizable(*F))
+ return false;
- M.getFunctionList().insert(F.getIterator(), Copied);
- F.replaceAllUsesWith(Copied);
- Copied->setDSOLocal(true);
+ FnMap.clear();
+ // Generate the internalized version of each function.
+ for (Function *F : FnSet) {
+ Module &M = *F->getParent();
+ FunctionType *FnTy = F->getFunctionType();
+
+ // Create a copy of the current function
+ Function *Copied =
+ Function::Create(FnTy, F->getLinkage(), F->getAddressSpace(),
+ F->getName() + ".internalized");
+ ValueToValueMapTy VMap;
+ auto *NewFArgIt = Copied->arg_begin();
+ for (auto &Arg : F->args()) {
+ auto ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ // Copy the body of the original function to the new one
+ CloneFunctionInto(Copied, F, VMap,
+ CloneFunctionChangeType::LocalChangesOnly, Returns);
+
+ // Set the linakage and visibility late as CloneFunctionInto has some
+ // implicit requirements.
+ Copied->setVisibility(GlobalValue::DefaultVisibility);
+ Copied->setLinkage(GlobalValue::PrivateLinkage);
+
+ // Copy metadata
+ SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+ F->getAllMetadata(MDs);
+ for (auto MDIt : MDs)
+ if (!Copied->hasMetadata())
+ Copied->addMetadata(MDIt.first, *MDIt.second);
+
+ M.getFunctionList().insert(F->getIterator(), Copied);
+ Copied->setDSOLocal(true);
+ FnMap[F] = Copied;
+ }
+
+ // Replace all uses of the old function with the new internalized function
+ // unless the caller is a function that was just internalized.
+ for (Function *F : FnSet) {
+ auto &InternalizedFn = FnMap[F];
+ auto IsNotInternalized = [&](Use &U) -> bool {
+ if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+ return !FnMap.lookup(CB->getCaller());
+ return false;
+ };
+ F->replaceUsesWithIf(InternalizedFn, IsNotInternalized);
+ }
- return Copied;
+ return true;
}
bool Attributor::isValidFunctionSignatureRewrite(
@@ -1976,7 +2035,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
if (!RewriteSignatures)
return false;
- auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
+ Function *Fn = Arg.getParent();
+ auto CallSiteCanBeChanged = [Fn](AbstractCallSite ACS) {
// Forbid the call site to cast the function return type. If we need to
// rewrite these functions we need to re-create a cast for the new call site
// (if the old had uses).
@@ -1984,11 +2044,12 @@ bool Attributor::isValidFunctionSignatureRewrite(
ACS.getInstruction()->getType() !=
ACS.getCalledFunction()->getReturnType())
return false;
+ if (ACS.getCalledOperand()->getType() != Fn->getType())
+ return false;
// Forbid must-tail calls for now.
return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall();
};
- Function *Fn = Arg.getParent();
// Avoid var-arg functions for now.
if (Fn->isVarArg()) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite var-args functions\n");
@@ -2118,7 +2179,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
} else {
NewArgumentTypes.push_back(Arg.getType());
NewArgumentAttributes.push_back(
- OldFnAttributeList.getParamAttributes(Arg.getArgNo()));
+ OldFnAttributeList.getParamAttrs(Arg.getArgNo()));
}
}
@@ -2149,8 +2210,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
// the function.
LLVMContext &Ctx = OldFn->getContext();
NewFn->setAttributes(AttributeList::get(
- Ctx, OldFnAttributeList.getFnAttributes(),
- OldFnAttributeList.getRetAttributes(), NewArgumentAttributes));
+ Ctx, OldFnAttributeList.getFnAttrs(), OldFnAttributeList.getRetAttrs(),
+ NewArgumentAttributes));
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
@@ -2195,7 +2256,7 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
} else {
NewArgOperands.push_back(ACS.getCallArgOperand(OldArgNum));
NewArgOperandAttributes.push_back(
- OldCallAttributeList.getParamAttributes(OldArgNum));
+ OldCallAttributeList.getParamAttrs(OldArgNum));
}
}
@@ -2225,8 +2286,8 @@ ChangeStatus Attributor::rewriteFunctionSignatures(
NewCB->setCallingConv(OldCB->getCallingConv());
NewCB->takeName(OldCB);
NewCB->setAttributes(AttributeList::get(
- Ctx, OldCallAttributeList.getFnAttributes(),
- OldCallAttributeList.getRetAttributes(), NewArgOperandAttributes));
+ Ctx, OldCallAttributeList.getFnAttrs(),
+ OldCallAttributeList.getRetAttrs(), NewArgOperandAttributes));
CallSitePairs.push_back({OldCB, NewCB});
return true;
@@ -2441,6 +2502,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Every function can be "readnone/argmemonly/inaccessiblememonly/...".
getOrCreateAAFor<AAMemoryLocation>(FPos);
+ // Every function can track active assumptions.
+ getOrCreateAAFor<AAAssumptionInfo>(FPos);
+
// Every function might be applicable for Heap-To-Stack conversion.
if (EnableHeapToStack)
getOrCreateAAFor<AAHeapToStack>(FPos);
@@ -2526,6 +2590,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
auto CallSitePred = [&](Instruction &I) -> bool {
auto &CB = cast<CallBase>(I);
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
+ IRPosition CBFnPos = IRPosition::callsite_function(CB);
// Call sites might be dead if they do not have side effects and no live
// users. The return value might be dead if there are no live users.
@@ -2537,6 +2602,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!Callee)
return true;
+ // Every call site can track active assumptions.
+ getOrCreateAAFor<AAAssumptionInfo>(CBFnPos);
+
// Skip declarations except if annotations on their call sites were
// explicitly requested.
if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
@@ -2549,7 +2617,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAValueSimplify>(CBRetPos);
}
- for (int I = 0, E = CB.getNumArgOperands(); I < E; ++I) {
+ for (int I = 0, E = CB.arg_size(); I < E; ++I) {
IRPosition CBArgPos = IRPosition::callsite_argument(CB, I);
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 98ce286d5139..ec08287393de 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -28,6 +29,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Assumptions.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
@@ -146,6 +148,7 @@ PIPE_OPERATOR(AANoUndef)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAFunctionReachability)
PIPE_OPERATOR(AAPointerInfo)
+PIPE_OPERATOR(AAAssumptionInfo)
#undef PIPE_OPERATOR
@@ -203,46 +206,25 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
<< "-bytes as " << *ResTy << "\n");
if (Offset) {
- SmallVector<Value *, 4> Indices;
- std::string GEPName = Ptr->getName().str() + ".0";
-
- // Add 0 index to look through the pointer.
- assert((uint64_t)Offset < DL.getTypeAllocSize(PtrElemTy) &&
- "Offset out of bounds");
- Indices.push_back(Constant::getNullValue(IRB.getInt32Ty()));
-
Type *Ty = PtrElemTy;
- do {
- auto *STy = dyn_cast<StructType>(Ty);
- if (!STy)
- // Non-aggregate type, we cast and make byte-wise progress now.
- break;
-
- const StructLayout *SL = DL.getStructLayout(STy);
- if (int64_t(SL->getSizeInBytes()) < Offset)
- break;
-
- uint64_t Idx = SL->getElementContainingOffset(Offset);
- assert(Idx < STy->getNumElements() && "Offset calculation error!");
- uint64_t Rem = Offset - SL->getElementOffset(Idx);
- Ty = STy->getElementType(Idx);
-
- LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
- << " Idx: " << Idx << " Rem: " << Rem << "\n");
+ APInt IntOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
+ SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(Ty, IntOffset);
- GEPName += "." + std::to_string(Idx);
- Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
- Offset = Rem;
- } while (Offset);
+ SmallVector<Value *, 4> ValIndices;
+ std::string GEPName = Ptr->getName().str();
+ for (const APInt &Index : IntIndices) {
+ ValIndices.push_back(IRB.getInt(Index));
+ GEPName += "." + std::to_string(Index.getZExtValue());
+ }
// Create a GEP for the indices collected above.
- Ptr = IRB.CreateGEP(PtrElemTy, Ptr, Indices, GEPName);
+ Ptr = IRB.CreateGEP(PtrElemTy, Ptr, ValIndices, GEPName);
// If an offset is left we use byte-wise adjustment.
- if (Offset) {
+ if (IntOffset != 0) {
Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
- Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt32(Offset),
- GEPName + ".b" + Twine(Offset));
+ Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(IntOffset),
+ GEPName + ".b" + Twine(IntOffset.getZExtValue()));
}
}
@@ -431,6 +413,7 @@ const Value *stripAndAccumulateMinimalOffsets(
};
return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds,
+ /* AllowInvariant */ false,
AttributorAnalysis);
}
@@ -503,6 +486,7 @@ static void clampReturnedValueStates(
S ^= *T;
}
+namespace {
/// Helper class for generic deduction: return value -> returned position.
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
@@ -661,6 +645,7 @@ struct AACallSiteReturnedFromReturned : public BaseType {
return clampStateAndIndicateChange(S, AA.getState());
}
};
+} // namespace
/// Helper function to accumulate uses.
template <class AAType, typename StateType = typename AAType::StateType>
@@ -1051,6 +1036,7 @@ private:
BooleanState BS;
};
+namespace {
struct AAPointerInfoImpl
: public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
@@ -1149,19 +1135,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
return true;
};
+ /// Helper struct, will support ranges eventually.
+ struct OffsetInfo {
+ int64_t Offset = AA::PointerInfo::OffsetAndSize::Unknown;
+
+ bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
+ };
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
using namespace AA::PointerInfo;
State S = getState();
ChangeStatus Changed = ChangeStatus::UNCHANGED;
Value &AssociatedValue = getAssociatedValue();
- struct OffsetInfo {
- int64_t Offset = 0;
- };
const DataLayout &DL = A.getDataLayout();
DenseMap<Value *, OffsetInfo> OffsetInfoMap;
- OffsetInfoMap[&AssociatedValue] = {};
+ OffsetInfoMap[&AssociatedValue] = OffsetInfo{0};
auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
bool &Follow) {
@@ -1203,7 +1193,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
}
SmallVector<Value *, 8> Indices;
- for (Use &Idx : llvm::make_range(GEP->idx_begin(), GEP->idx_end())) {
+ for (Use &Idx : GEP->indices()) {
if (auto *CIdx = dyn_cast<ConstantInt>(Idx)) {
Indices.push_back(CIdx);
continue;
@@ -1219,8 +1209,52 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
Follow = true;
return true;
}
- if (isa<CastInst>(Usr) || isa<PHINode>(Usr) || isa<SelectInst>(Usr))
+ if (isa<CastInst>(Usr) || isa<SelectInst>(Usr))
return HandlePassthroughUser(Usr, PtrOI, Follow);
+
+ // For PHIs we need to take care of the recurrence explicitly as the value
+ // might change while we iterate through a loop. For now, we give up if
+ // the PHI is not invariant.
+ if (isa<PHINode>(Usr)) {
+ // Check if the PHI is invariant (so far).
+ OffsetInfo &UsrOI = OffsetInfoMap[Usr];
+ if (UsrOI == PtrOI)
+ return true;
+
+ // Check if the PHI operand has already an unknown offset as we can't
+ // improve on that anymore.
+ if (PtrOI.Offset == OffsetAndSize::Unknown) {
+ UsrOI = PtrOI;
+ Follow = true;
+ return true;
+ }
+
+ // Check if the PHI operand is not dependent on the PHI itself.
+ // TODO: This is not great as we look at the pointer type. However, it
+ // is unclear where the Offset size comes from with typeless pointers.
+ APInt Offset(
+ DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()),
+ 0);
+ if (&AssociatedValue == CurPtr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /* AllowNonInbounds */ true)) {
+ if (Offset != PtrOI.Offset) {
+ LLVM_DEBUG(dbgs()
+ << "[AAPointerInfo] PHI operand pointer offset mismatch "
+ << *CurPtr << " in " << *Usr << "\n");
+ return false;
+ }
+ return HandlePassthroughUser(Usr, PtrOI, Follow);
+ }
+
+ // TODO: Approximate in case we know the direction of the recurrence.
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
+ << *CurPtr << " in " << *Usr << "\n");
+ UsrOI = PtrOI;
+ UsrOI.Offset = OffsetAndSize::Unknown;
+ Follow = true;
+ return true;
+ }
+
if (auto *LoadI = dyn_cast<LoadInst>(Usr))
return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr,
AccessKind::AK_READ, PtrOI.Offset, Changed,
@@ -2388,6 +2422,10 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
const size_t NoUBPrevSize = AssumedNoUBInsts.size();
auto InspectMemAccessInstForUB = [&](Instruction &I) {
+ // Lang ref now states volatile store is not UB, let's skip them.
+ if (I.isVolatile() && I.mayWriteToMemory())
+ return true;
+
// Skip instructions that are already saved.
if (AssumedNoUBInsts.count(&I) || KnownUBInsts.count(&I))
return true;
@@ -2467,7 +2505,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
Function *Callee = CB.getCalledFunction();
if (!Callee)
return true;
- for (unsigned idx = 0; idx < CB.getNumArgOperands(); idx++) {
+ for (unsigned idx = 0; idx < CB.arg_size(); idx++) {
// If current argument is known to be simplified to null pointer and the
// corresponding argument position is known to have nonnull attribute,
// the argument is poison. Furthermore, if the argument is poison and
@@ -3135,8 +3173,7 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
// value passed at this call site.
// TODO: AbstractCallSite
const auto &CB = cast<CallBase>(getAnchorValue());
- for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands();
- OtherArgNo++)
+ for (unsigned OtherArgNo = 0; OtherArgNo < CB.arg_size(); OtherArgNo++)
if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
return false;
@@ -3354,6 +3391,10 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
}
bool isDeadStore(Attributor &A, StoreInst &SI) {
+ // Lang ref now states volatile store is not UB/dead, let's skip them.
+ if (SI.isVolatile())
+ return false;
+
bool UsedAssumedInformation = false;
SmallSetVector<Value *, 4> PotentialCopies;
if (!AA::getPotentialCopiesOfStoredValue(A, SI, PotentialCopies, *this,
@@ -5039,6 +5080,7 @@ struct AANoCaptureCallSiteReturned final : AANoCaptureImpl {
STATS_DECLTRACK_CSRET_ATTR(nocapture)
}
};
+} // namespace
/// ------------------ Value Simplify Attribute ----------------------------
@@ -5059,6 +5101,7 @@ bool ValueSimplifyStateType::unionAssumed(Optional<Value *> Other) {
return true;
}
+namespace {
struct AAValueSimplifyImpl : AAValueSimplify {
AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A)
: AAValueSimplify(IRP, A) {}
@@ -6464,7 +6507,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) {
CallBase *DC = cast<CallBase>(ACS.getInstruction());
int DCArgNo = ACS.getCallArgOperandNo(ArgNo);
- assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->getNumArgOperands() &&
+ assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->arg_size() &&
"Expected a direct call operand for callback call operand");
LLVM_DEBUG({
@@ -7287,10 +7330,12 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
case Instruction::Store:
// Stores cause the NO_WRITES property to disappear if the use is the
- // pointer operand. Note that we do assume that capturing was taken care of
- // somewhere else.
+ // pointer operand. Note that while capturing was taken care of somewhere
+ // else we need to deal with stores of the value that is not looked through.
if (cast<StoreInst>(UserI)->getPointerOperand() == U.get())
removeAssumedBits(NO_WRITES);
+ else
+ indicatePessimisticFixpoint();
return;
case Instruction::Call:
@@ -7336,6 +7381,7 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
if (UserI->mayWriteToMemory())
removeAssumedBits(NO_WRITES);
}
+} // namespace
/// -------------------- Memory Locations Attributes ---------------------------
/// Includes read-none, argmemonly, inaccessiblememonly,
@@ -7628,11 +7674,14 @@ void AAMemoryLocationImpl::categorizePtrValue(
assert(!isa<GEPOperator>(Obj) && "GEPs should have been stripped.");
if (isa<UndefValue>(Obj))
continue;
- if (auto *Arg = dyn_cast<Argument>(Obj)) {
- if (Arg->hasByValAttr())
- MLK = NO_LOCAL_MEM;
- else
- MLK = NO_ARGUMENT_MEM;
+ if (isa<Argument>(Obj)) {
+ // TODO: For now we do not treat byval arguments as local copies performed
+ // on the call edge, though, we should. To make that happen we need to
+ // teach various passes, e.g., DSE, about the copy effect of a byval. That
+ // would also allow us to mark functions only accessing byval arguments as
+ // readnone again, atguably their acceses have no effect outside of the
+ // function, like accesses to allocas.
+ MLK = NO_ARGUMENT_MEM;
} else if (auto *GV = dyn_cast<GlobalValue>(Obj)) {
// Reading constant memory is not treated as a read "effect" by the
// function attr pass so we won't neither. Constants defined by TBAA are
@@ -7678,7 +7727,7 @@ void AAMemoryLocationImpl::categorizePtrValue(
void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
Attributor &A, CallBase &CB, AAMemoryLocation::StateType &AccessedLocs,
bool &Changed) {
- for (unsigned ArgNo = 0, E = CB.getNumArgOperands(); ArgNo < E; ++ArgNo) {
+ for (unsigned ArgNo = 0, E = CB.arg_size(); ArgNo < E; ++ArgNo) {
// Skip non-pointer arguments.
const Value *ArgOp = CB.getArgOperand(ArgNo);
@@ -8611,31 +8660,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
static bool calculateICmpInst(const ICmpInst *ICI, const APInt &LHS,
const APInt &RHS) {
- ICmpInst::Predicate Pred = ICI->getPredicate();
- switch (Pred) {
- case ICmpInst::ICMP_UGT:
- return LHS.ugt(RHS);
- case ICmpInst::ICMP_SGT:
- return LHS.sgt(RHS);
- case ICmpInst::ICMP_EQ:
- return LHS.eq(RHS);
- case ICmpInst::ICMP_UGE:
- return LHS.uge(RHS);
- case ICmpInst::ICMP_SGE:
- return LHS.sge(RHS);
- case ICmpInst::ICMP_ULT:
- return LHS.ult(RHS);
- case ICmpInst::ICMP_SLT:
- return LHS.slt(RHS);
- case ICmpInst::ICMP_NE:
- return LHS.ne(RHS);
- case ICmpInst::ICMP_ULE:
- return LHS.ule(RHS);
- case ICmpInst::ICMP_SLE:
- return LHS.sle(RHS);
- default:
- llvm_unreachable("Invalid ICmp predicate!");
- }
+ return ICmpInst::compare(LHS, RHS, ICI->getPredicate());
}
static APInt calculateCastInst(const CastInst *CI, const APInt &Src,
@@ -8675,25 +8700,25 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
case Instruction::Mul:
return LHS * RHS;
case Instruction::UDiv:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.udiv(RHS);
case Instruction::SDiv:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.sdiv(RHS);
case Instruction::URem:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
return LHS.urem(RHS);
case Instruction::SRem:
- if (RHS.isNullValue()) {
+ if (RHS.isZero()) {
SkipOperation = true;
return LHS;
}
@@ -9292,32 +9317,69 @@ struct AANoUndefCallSiteReturned final
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
};
-struct AACallEdgesFunction : public AACallEdges {
- AACallEdgesFunction(const IRPosition &IRP, Attributor &A)
- : AACallEdges(IRP, A) {}
+struct AACallEdgesImpl : public AACallEdges {
+ AACallEdgesImpl(const IRPosition &IRP, Attributor &A) : AACallEdges(IRP, A) {}
+
+ virtual const SetVector<Function *> &getOptimisticEdges() const override {
+ return CalledFunctions;
+ }
+
+ virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
+
+ virtual bool hasNonAsmUnknownCallee() const override {
+ return HasUnknownCalleeNonAsm;
+ }
+
+ const std::string getAsStr() const override {
+ return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
+ std::to_string(CalledFunctions.size()) + "]";
+ }
+ void trackStatistics() const override {}
+
+protected:
+ void addCalledFunction(Function *Fn, ChangeStatus &Change) {
+ if (CalledFunctions.insert(Fn)) {
+ Change = ChangeStatus::CHANGED;
+ LLVM_DEBUG(dbgs() << "[AACallEdges] New call edge: " << Fn->getName()
+ << "\n");
+ }
+ }
+
+ void setHasUnknownCallee(bool NonAsm, ChangeStatus &Change) {
+ if (!HasUnknownCallee)
+ Change = ChangeStatus::CHANGED;
+ if (NonAsm && !HasUnknownCalleeNonAsm)
+ Change = ChangeStatus::CHANGED;
+ HasUnknownCalleeNonAsm |= NonAsm;
+ HasUnknownCallee = true;
+ }
+
+private:
+ /// Optimistic set of functions that might be called by this position.
+ SetVector<Function *> CalledFunctions;
+
+ /// Is there any call with a unknown callee.
+ bool HasUnknownCallee = false;
+
+ /// Is there any call with a unknown callee, excluding any inline asm.
+ bool HasUnknownCalleeNonAsm = false;
+};
+
+struct AACallEdgesCallSite : public AACallEdgesImpl {
+ AACallEdgesCallSite(const IRPosition &IRP, Attributor &A)
+ : AACallEdgesImpl(IRP, A) {}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Change = ChangeStatus::UNCHANGED;
- bool OldHasUnknownCallee = HasUnknownCallee;
- bool OldHasUnknownCalleeNonAsm = HasUnknownCalleeNonAsm;
-
- auto AddCalledFunction = [&](Function *Fn) {
- if (CalledFunctions.insert(Fn)) {
- Change = ChangeStatus::CHANGED;
- LLVM_DEBUG(dbgs() << "[AACallEdges] New call edge: " << Fn->getName()
- << "\n");
- }
- };
auto VisitValue = [&](Value &V, const Instruction *CtxI, bool &HasUnknown,
bool Stripped) -> bool {
if (Function *Fn = dyn_cast<Function>(&V)) {
- AddCalledFunction(Fn);
+ addCalledFunction(Fn, Change);
} else {
LLVM_DEBUG(dbgs() << "[AACallEdges] Unrecognized value: " << V << "\n");
- HasUnknown = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
// Explore all values.
@@ -9325,44 +9387,67 @@ struct AACallEdgesFunction : public AACallEdges {
};
// Process any value that we might call.
- auto ProcessCalledOperand = [&](Value *V, Instruction *Ctx) {
+ auto ProcessCalledOperand = [&](Value *V) {
+ bool DummyValue = false;
if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
- HasUnknownCallee, VisitValue, nullptr,
+ DummyValue, VisitValue, nullptr,
false)) {
// If we haven't gone through all values, assume that there are unknown
// callees.
- HasUnknownCallee = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
};
- auto ProcessCallInst = [&](Instruction &Inst) {
- CallBase &CB = static_cast<CallBase &>(Inst);
- if (CB.isInlineAsm()) {
- HasUnknownCallee = true;
- return true;
- }
+ CallBase *CB = static_cast<CallBase *>(getCtxI());
- // Process callee metadata if available.
- if (auto *MD = Inst.getMetadata(LLVMContext::MD_callees)) {
- for (auto &Op : MD->operands()) {
- Function *Callee = mdconst::extract_or_null<Function>(Op);
- if (Callee)
- AddCalledFunction(Callee);
- }
- // Callees metadata grantees that the called function is one of its
- // operands, So we are done.
- return true;
+ if (CB->isInlineAsm()) {
+ setHasUnknownCallee(false, Change);
+ return Change;
+ }
+
+ // Process callee metadata if available.
+ if (auto *MD = getCtxI()->getMetadata(LLVMContext::MD_callees)) {
+ for (auto &Op : MD->operands()) {
+ Function *Callee = mdconst::dyn_extract_or_null<Function>(Op);
+ if (Callee)
+ addCalledFunction(Callee, Change);
}
+ return Change;
+ }
- // The most simple case.
- ProcessCalledOperand(CB.getCalledOperand(), &Inst);
+ // The most simple case.
+ ProcessCalledOperand(CB->getCalledOperand());
- // Process callback functions.
- SmallVector<const Use *, 4u> CallbackUses;
- AbstractCallSite::getCallbackUses(CB, CallbackUses);
- for (const Use *U : CallbackUses)
- ProcessCalledOperand(U->get(), &Inst);
+ // Process callback functions.
+ SmallVector<const Use *, 4u> CallbackUses;
+ AbstractCallSite::getCallbackUses(*CB, CallbackUses);
+ for (const Use *U : CallbackUses)
+ ProcessCalledOperand(U->get());
+
+ return Change;
+ }
+};
+
+struct AACallEdgesFunction : public AACallEdgesImpl {
+ AACallEdgesFunction(const IRPosition &IRP, Attributor &A)
+ : AACallEdgesImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto ProcessCallInst = [&](Instruction &Inst) {
+ CallBase &CB = static_cast<CallBase &>(Inst);
+
+ auto &CBEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
+ if (CBEdges.hasNonAsmUnknownCallee())
+ setHasUnknownCallee(true, Change);
+ if (CBEdges.hasUnknownCallee())
+ setHasUnknownCallee(false, Change);
+
+ for (Function *F : CBEdges.getOptimisticEdges())
+ addCalledFunction(F, Change);
return true;
};
@@ -9373,155 +9458,323 @@ struct AACallEdgesFunction : public AACallEdges {
UsedAssumedInformation)) {
// If we haven't looked at all call like instructions, assume that there
// are unknown callees.
- HasUnknownCallee = true;
- HasUnknownCalleeNonAsm = true;
+ setHasUnknownCallee(true, Change);
}
- // Track changes.
- if (OldHasUnknownCallee != HasUnknownCallee ||
- OldHasUnknownCalleeNonAsm != HasUnknownCalleeNonAsm)
- Change = ChangeStatus::CHANGED;
-
return Change;
}
+};
- virtual const SetVector<Function *> &getOptimisticEdges() const override {
- return CalledFunctions;
- };
+struct AAFunctionReachabilityFunction : public AAFunctionReachability {
+private:
+ struct QuerySet {
+ void markReachable(Function *Fn) {
+ Reachable.insert(Fn);
+ Unreachable.erase(Fn);
+ }
+
+ ChangeStatus update(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList) {
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ for (auto *AAEdges : AAEdgesList) {
+ if (AAEdges->hasUnknownCallee()) {
+ if (!CanReachUnknownCallee)
+ Change = ChangeStatus::CHANGED;
+ CanReachUnknownCallee = true;
+ return Change;
+ }
+ }
- virtual bool hasUnknownCallee() const override { return HasUnknownCallee; }
+ for (Function *Fn : make_early_inc_range(Unreachable)) {
+ if (checkIfReachable(A, AA, AAEdgesList, Fn)) {
+ Change = ChangeStatus::CHANGED;
+ markReachable(Fn);
+ }
+ }
+ return Change;
+ }
- virtual bool hasNonAsmUnknownCallee() const override {
- return HasUnknownCalleeNonAsm;
- }
+ bool isReachable(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList, Function *Fn) {
+ // Assume that we can reach the function.
+ // TODO: Be more specific with the unknown callee.
+ if (CanReachUnknownCallee)
+ return true;
- const std::string getAsStr() const override {
- return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
- std::to_string(CalledFunctions.size()) + "]";
- }
+ if (Reachable.count(Fn))
+ return true;
- void trackStatistics() const override {}
+ if (Unreachable.count(Fn))
+ return false;
- /// Optimistic set of functions that might be called by this function.
- SetVector<Function *> CalledFunctions;
+ // We need to assume that this function can't reach Fn to prevent
+ // an infinite loop if this function is recursive.
+ Unreachable.insert(Fn);
- /// Is there any call with a unknown callee.
- bool HasUnknownCallee = false;
+ bool Result = checkIfReachable(A, AA, AAEdgesList, Fn);
+ if (Result)
+ markReachable(Fn);
+ return Result;
+ }
- /// Is there any call with a unknown callee, excluding any inline asm.
- bool HasUnknownCalleeNonAsm = false;
-};
+ bool checkIfReachable(Attributor &A, const AAFunctionReachability &AA,
+ ArrayRef<const AACallEdges *> AAEdgesList,
+ Function *Fn) const {
-struct AAFunctionReachabilityFunction : public AAFunctionReachability {
- AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
- : AAFunctionReachability(IRP, A) {}
+ // Handle the most trivial case first.
+ for (auto *AAEdges : AAEdgesList) {
+ const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
- bool canReach(Attributor &A, Function *Fn) const override {
- // Assume that we can reach any function if we can reach a call with
- // unknown callee.
- if (CanReachUnknownCallee)
- return true;
+ if (Edges.count(Fn))
+ return true;
+ }
- if (ReachableQueries.count(Fn))
- return true;
+ SmallVector<const AAFunctionReachability *, 8> Deps;
+ for (auto &AAEdges : AAEdgesList) {
+ const SetVector<Function *> &Edges = AAEdges->getOptimisticEdges();
+
+ for (Function *Edge : Edges) {
+ // We don't need a dependency if the result is reachable.
+ const AAFunctionReachability &EdgeReachability =
+ A.getAAFor<AAFunctionReachability>(
+ AA, IRPosition::function(*Edge), DepClassTy::NONE);
+ Deps.push_back(&EdgeReachability);
+
+ if (EdgeReachability.canReach(A, Fn))
+ return true;
+ }
+ }
+
+ // The result is false for now, set dependencies and leave.
+ for (auto Dep : Deps)
+ A.recordDependence(AA, *Dep, DepClassTy::REQUIRED);
- if (UnreachableQueries.count(Fn))
return false;
+ }
+
+ /// Set of functions that we know for sure is reachable.
+ DenseSet<Function *> Reachable;
+
+ /// Set of functions that are unreachable, but might become reachable.
+ DenseSet<Function *> Unreachable;
+
+ /// If we can reach a function with a call to a unknown function we assume
+ /// that we can reach any function.
+ bool CanReachUnknownCallee = false;
+ };
+
+public:
+ AAFunctionReachabilityFunction(const IRPosition &IRP, Attributor &A)
+ : AAFunctionReachability(IRP, A) {}
+ bool canReach(Attributor &A, Function *Fn) const override {
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
- const SetVector<Function *> &Edges = AAEdges.getOptimisticEdges();
- bool Result = checkIfReachable(A, Edges, Fn);
+ // Attributor returns attributes as const, so this function has to be
+ // const for users of this attribute to use it without having to do
+ // a const_cast.
+ // This is a hack for us to be able to cache queries.
+ auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ bool Result =
+ NonConstThis->WholeFunction.isReachable(A, *this, {&AAEdges}, Fn);
+
+ return Result;
+ }
+
+ /// Can \p CB reach \p Fn
+ bool canReach(Attributor &A, CallBase &CB, Function *Fn) const override {
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
// Attributor returns attributes as const, so this function has to be
// const for users of this attribute to use it without having to do
// a const_cast.
// This is a hack for us to be able to cache queries.
auto *NonConstThis = const_cast<AAFunctionReachabilityFunction *>(this);
+ QuerySet &CBQuery = NonConstThis->CBQueries[&CB];
- if (Result)
- NonConstThis->ReachableQueries.insert(Fn);
- else
- NonConstThis->UnreachableQueries.insert(Fn);
+ bool Result = CBQuery.isReachable(A, *this, {&AAEdges}, Fn);
return Result;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- if (CanReachUnknownCallee)
- return ChangeStatus::UNCHANGED;
-
const AACallEdges &AAEdges =
A.getAAFor<AACallEdges>(*this, getIRPosition(), DepClassTy::REQUIRED);
- const SetVector<Function *> &Edges = AAEdges.getOptimisticEdges();
ChangeStatus Change = ChangeStatus::UNCHANGED;
- if (AAEdges.hasUnknownCallee()) {
- bool OldCanReachUnknown = CanReachUnknownCallee;
- CanReachUnknownCallee = true;
- return OldCanReachUnknown ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
+ Change |= WholeFunction.update(A, *this, {&AAEdges});
- // Check if any of the unreachable functions become reachable.
- for (auto Current = UnreachableQueries.begin();
- Current != UnreachableQueries.end();) {
- if (!checkIfReachable(A, Edges, *Current)) {
- Current++;
- continue;
- }
- ReachableQueries.insert(*Current);
- UnreachableQueries.erase(*Current++);
- Change = ChangeStatus::CHANGED;
+ for (auto CBPair : CBQueries) {
+ const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ *this, IRPosition::callsite_function(*CBPair.first),
+ DepClassTy::REQUIRED);
+
+ Change |= CBPair.second.update(A, *this, {&AAEdges});
}
return Change;
}
const std::string getAsStr() const override {
- size_t QueryCount = ReachableQueries.size() + UnreachableQueries.size();
+ size_t QueryCount =
+ WholeFunction.Reachable.size() + WholeFunction.Unreachable.size();
- return "FunctionReachability [" + std::to_string(ReachableQueries.size()) +
- "," + std::to_string(QueryCount) + "]";
+ return "FunctionReachability [" +
+ std::to_string(WholeFunction.Reachable.size()) + "," +
+ std::to_string(QueryCount) + "]";
}
void trackStatistics() const override {}
private:
- bool canReachUnknownCallee() const override { return CanReachUnknownCallee; }
+ bool canReachUnknownCallee() const override {
+ return WholeFunction.CanReachUnknownCallee;
+ }
- bool checkIfReachable(Attributor &A, const SetVector<Function *> &Edges,
- Function *Fn) const {
- if (Edges.count(Fn))
- return true;
+ /// Used to answer if a the whole function can reacha a specific function.
+ QuerySet WholeFunction;
- for (Function *Edge : Edges) {
- // We don't need a dependency if the result is reachable.
- const AAFunctionReachability &EdgeReachability =
- A.getAAFor<AAFunctionReachability>(*this, IRPosition::function(*Edge),
- DepClassTy::NONE);
+ /// Used to answer if a call base inside this function can reach a specific
+ /// function.
+ DenseMap<CallBase *, QuerySet> CBQueries;
+};
- if (EdgeReachability.canReach(A, Fn))
- return true;
- }
- for (Function *Fn : Edges)
- A.getAAFor<AAFunctionReachability>(*this, IRPosition::function(*Fn),
- DepClassTy::REQUIRED);
+/// ---------------------- Assumption Propagation ------------------------------
+struct AAAssumptionInfoImpl : public AAAssumptionInfo {
+ AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A,
+ const DenseSet<StringRef> &Known)
+ : AAAssumptionInfo(IRP, A, Known) {}
- return false;
+ bool hasAssumption(const StringRef Assumption) const override {
+ return isValidState() && setContains(Assumption);
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr() const override {
+ const SetContents &Known = getKnown();
+ const SetContents &Assumed = getAssumed();
+
+ const std::string KnownStr =
+ llvm::join(Known.getSet().begin(), Known.getSet().end(), ",");
+ const std::string AssumedStr =
+ (Assumed.isUniversal())
+ ? "Universal"
+ : llvm::join(Assumed.getSet().begin(), Assumed.getSet().end(), ",");
+
+ return "Known [" + KnownStr + "]," + " Assumed [" + AssumedStr + "]";
+ }
+};
+
+/// Propagates assumption information from parent functions to all of their
+/// successors. An assumption can be propagated if the containing function
+/// dominates the called function.
+///
+/// We start with a "known" set of assumptions already valid for the associated
+/// function and an "assumed" set that initially contains all possible
+/// assumptions. The assumed set is inter-procedurally updated by narrowing its
+/// contents as concrete values are known. The concrete values are seeded by the
+/// first nodes that are either entries into the call graph, or contains no
+/// assumptions. Each node is updated as the intersection of the assumed state
+/// with all of its predecessors.
+struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl {
+ AAAssumptionInfoFunction(const IRPosition &IRP, Attributor &A)
+ : AAAssumptionInfoImpl(IRP, A,
+ getAssumptions(*IRP.getAssociatedFunction())) {}
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ const auto &Assumptions = getKnown();
+
+ // Don't manifest a universal set if it somehow made it here.
+ if (Assumptions.isUniversal())
+ return ChangeStatus::UNCHANGED;
+
+ Function *AssociatedFunction = getAssociatedFunction();
+
+ bool Changed = addAssumptions(*AssociatedFunction, Assumptions.getSet());
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool Changed = false;
+
+ auto CallSitePred = [&](AbstractCallSite ACS) {
+ const auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ *this, IRPosition::callsite_function(*ACS.getInstruction()),
+ DepClassTy::REQUIRED);
+ // Get the set of assumptions shared by all of this function's callers.
+ Changed |= getIntersection(AssumptionAA.getAssumed());
+ return !getAssumed().empty() || !getKnown().empty();
+ };
+
+ bool AllCallSitesKnown;
+ // Get the intersection of all assumptions held by this node's predecessors.
+ // If we don't know all the call sites then this is either an entry into the
+ // call graph or an empty node. This node is known to only contain its own
+ // assumptions and can be propagated to its successors.
+ if (!A.checkForAllCallSites(CallSitePred, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ void trackStatistics() const override {}
+};
+
+/// Assumption Info defined for call sites.
+struct AAAssumptionInfoCallSite final : AAAssumptionInfoImpl {
+
+ AAAssumptionInfoCallSite(const IRPosition &IRP, Attributor &A)
+ : AAAssumptionInfoImpl(IRP, A, getInitialAssumptions(IRP)) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
+ A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
}
- /// Set of functions that we know for sure is reachable.
- SmallPtrSet<Function *, 8> ReachableQueries;
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // Don't manifest a universal set if it somehow made it here.
+ if (getKnown().isUniversal())
+ return ChangeStatus::UNCHANGED;
+
+ CallBase &AssociatedCall = cast<CallBase>(getAssociatedValue());
+ bool Changed = addAssumptions(AssociatedCall, getAssumed().getSet());
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
+ auto &AssumptionAA =
+ A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
+ bool Changed = getIntersection(AssumptionAA.getAssumed());
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
- /// Set of functions that are unreachable, but might become reachable.
- SmallPtrSet<Function *, 8> UnreachableQueries;
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
- /// If we can reach a function with a call to a unknown function we assume
- /// that we can reach any function.
- bool CanReachUnknownCallee = false;
+private:
+ /// Helper to initialized the known set as all the assumptions this call and
+ /// the callee contain.
+ DenseSet<StringRef> getInitialAssumptions(const IRPosition &IRP) {
+ const CallBase &CB = cast<CallBase>(IRP.getAssociatedValue());
+ auto Assumptions = getAssumptions(CB);
+ if (Function *F = IRP.getAssociatedFunction())
+ set_union(Assumptions, getAssumptions(*F));
+ if (Function *F = IRP.getAssociatedFunction())
+ set_union(Assumptions, getAssumptions(*F));
+ return Assumptions;
+ }
};
} // namespace
@@ -9559,6 +9812,7 @@ const char AANoUndef::ID = 0;
const char AACallEdges::ID = 0;
const char AAFunctionReachability::ID = 0;
const char AAPointerInfo::ID = 0;
+const char AAAssumptionInfo::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -9660,6 +9914,8 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAssumptionInfo)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
@@ -9679,7 +9935,6 @@ CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFree)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReachability)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
-CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAFunctionReachability)
CREATE_NON_RET_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryBehavior)
diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 8e81f4bad4af..178d3f41963e 100644
--- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -153,33 +153,30 @@ static bool mergeConstants(Module &M) {
// were just merged.
while (true) {
// Find the canonical constants others will be merged with.
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
-
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// If this GV is dead, remove it.
- GV->removeDeadConstantUsers();
- if (GV->use_empty() && GV->hasLocalLinkage()) {
- GV->eraseFromParent();
+ GV.removeDeadConstantUsers();
+ if (GV.use_empty() && GV.hasLocalLinkage()) {
+ GV.eraseFromParent();
++ChangesMade;
continue;
}
- if (isUnmergeableGlobal(GV, UsedGlobals))
+ if (isUnmergeableGlobal(&GV, UsedGlobals))
continue;
// This transformation is legal for weak ODR globals in the sense it
// doesn't change semantics, but we really don't want to perform it
// anyway; it's likely to pessimize code generation, and some tools
// (like the Darwin linker in cases involving CFString) don't expect it.
- if (GV->isWeakForLinker())
+ if (GV.isWeakForLinker())
continue;
// Don't touch globals with metadata other then !dbg.
- if (hasMetadataOtherThanDebugLoc(GV))
+ if (hasMetadataOtherThanDebugLoc(&GV))
continue;
- Constant *Init = GV->getInitializer();
+ Constant *Init = GV.getInitializer();
// Check to see if the initializer is already known.
GlobalVariable *&Slot = CMap[Init];
@@ -188,9 +185,9 @@ static bool mergeConstants(Module &M) {
// replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
bool FirstConstantFound = !Slot;
- if (FirstConstantFound || IsBetterCanonical(*GV, *Slot)) {
- Slot = GV;
- LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV->getName()
+ if (FirstConstantFound || IsBetterCanonical(GV, *Slot)) {
+ Slot = &GV;
+ LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName()
<< (FirstConstantFound ? "\n" : " (updated)\n"));
}
}
@@ -199,18 +196,15 @@ static bool mergeConstants(Module &M) {
// SameContentReplacements vector. We cannot do the replacement in this pass
// because doing so may cause initializers of other globals to be rewritten,
// invalidating the Constant* pointers in CMap.
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
-
- if (isUnmergeableGlobal(GV, UsedGlobals))
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
+ if (isUnmergeableGlobal(&GV, UsedGlobals))
continue;
// We can only replace constant with local linkage.
- if (!GV->hasLocalLinkage())
+ if (!GV.hasLocalLinkage())
continue;
- Constant *Init = GV->getInitializer();
+ Constant *Init = GV.getInitializer();
// Check to see if the initializer is already known.
auto Found = CMap.find(Init);
@@ -218,16 +212,16 @@ static bool mergeConstants(Module &M) {
continue;
GlobalVariable *Slot = Found->second;
- if (Slot == GV)
+ if (Slot == &GV)
continue;
- if (makeMergeable(GV, Slot) == CanMerge::No)
+ if (makeMergeable(&GV, Slot) == CanMerge::No)
continue;
// Make all uses of the duplicate constant use the canonical version.
- LLVM_DEBUG(dbgs() << "Will replace: @" << GV->getName() << " -> @"
+ LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @"
<< Slot->getName() << "\n");
- SameContentReplacements.push_back(std::make_pair(GV, Slot));
+ SameContentReplacements.push_back(std::make_pair(&GV, Slot));
}
// Now that we have figured out which replacements must be made, do them all
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d95fd55870f8..fb9ab7954e36 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -175,8 +175,8 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
// to pass in a smaller number of arguments into the new function.
//
std::vector<Value *> Args;
- for (Value::user_iterator I = Fn.user_begin(), E = Fn.user_end(); I != E; ) {
- CallBase *CB = dyn_cast<CallBase>(*I++);
+ for (User *U : llvm::make_early_inc_range(Fn.users())) {
+ CallBase *CB = dyn_cast<CallBase>(U);
if (!CB)
continue;
@@ -188,9 +188,9 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
if (!PAL.isEmpty()) {
SmallVector<AttributeSet, 8> ArgAttrs;
for (unsigned ArgNo = 0; ArgNo < NumArgs; ++ArgNo)
- ArgAttrs.push_back(PAL.getParamAttributes(ArgNo));
- PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttributes(),
- PAL.getRetAttributes(), ArgAttrs);
+ ArgAttrs.push_back(PAL.getParamAttrs(ArgNo));
+ PAL = AttributeList::get(Fn.getContext(), PAL.getFnAttrs(),
+ PAL.getRetAttrs(), ArgAttrs);
}
SmallVector<OperandBundleDef, 1> OpBundles;
@@ -762,8 +762,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (LiveValues.erase(Arg)) {
Params.push_back(I->getType());
ArgAlive[ArgI] = true;
- ArgAttrVec.push_back(PAL.getParamAttributes(ArgI));
- HasLiveReturnedArg |= PAL.hasParamAttribute(ArgI, Attribute::Returned);
+ ArgAttrVec.push_back(PAL.getParamAttrs(ArgI));
+ HasLiveReturnedArg |= PAL.hasParamAttr(ArgI, Attribute::Returned);
} else {
++NumArgumentsEliminated;
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Removing argument "
@@ -838,7 +838,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
assert(NRetTy && "No new return type found?");
// The existing function return attributes.
- AttrBuilder RAttrs(PAL.getRetAttributes());
+ AttrBuilder RAttrs(PAL.getRetAttrs());
// Remove any incompatible attributes, but only if we removed all return
// values. Otherwise, ensure that we don't have any conflicting attributes
@@ -853,8 +853,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
// Strip allocsize attributes. They might refer to the deleted arguments.
- AttributeSet FnAttrs = PAL.getFnAttributes().removeAttribute(
- F->getContext(), Attribute::AllocSize);
+ AttributeSet FnAttrs =
+ PAL.getFnAttrs().removeAttribute(F->getContext(), Attribute::AllocSize);
// Reconstruct the AttributesList based on the vector we constructed.
assert(ArgAttrVec.size() == Params.size());
@@ -889,7 +889,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Adjust the call return attributes in case the function was changed to
// return void.
- AttrBuilder RAttrs(CallPAL.getRetAttributes());
+ AttrBuilder RAttrs(CallPAL.getRetAttrs());
RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy));
AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs);
@@ -903,7 +903,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
if (ArgAlive[Pi]) {
Args.push_back(*I);
// Get original parameter attributes, but skip return attributes.
- AttributeSet Attrs = CallPAL.getParamAttributes(Pi);
+ AttributeSet Attrs = CallPAL.getParamAttrs(Pi);
if (NRetTy != RetTy && Attrs.hasAttribute(Attribute::Returned)) {
// If the return type has changed, then get rid of 'returned' on the
// call site. The alternative is to make all 'returned' attributes on
@@ -922,7 +922,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Push any varargs arguments on the list. Don't forget their attributes.
for (auto E = CB.arg_end(); I != E; ++I, ++Pi) {
Args.push_back(*I);
- ArgAttrVec.push_back(CallPAL.getParamAttributes(Pi));
+ ArgAttrVec.push_back(CallPAL.getParamAttrs(Pi));
}
// Reconstruct the AttributesList based on the vector we constructed.
@@ -930,7 +930,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
// Again, be sure to remove any allocsize attributes, since their indices
// may now be incorrect.
- AttributeSet FnAttrs = CallPAL.getFnAttributes().removeAttribute(
+ AttributeSet FnAttrs = CallPAL.getFnAttrs().removeAttribute(
F->getContext(), Attribute::AllocSize);
AttributeList NewCallPAL = AttributeList::get(
@@ -1094,11 +1094,9 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
// fused with the next loop, because deleting a function invalidates
// information computed while surveying other functions.
LLVM_DEBUG(dbgs() << "DeadArgumentEliminationPass - Deleting dead varargs\n");
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function &F = *I++;
+ for (Function &F : llvm::make_early_inc_range(M))
if (F.getFunctionType()->isVarArg())
Changed |= DeleteDeadVarargs(F);
- }
// Second phase:loop through the module, determining which arguments are live.
// We assume all arguments are dead unless proven otherwise (allowing us to
@@ -1109,13 +1107,10 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
SurveyFunction(F);
// Now, remove all dead arguments and return values from each function in
- // turn.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- // Increment now, because the function will probably get removed (ie.
- // replaced by a new one).
- Function *F = &*I++;
- Changed |= RemoveDeadStuffFromFunction(F);
- }
+ // turn. We use make_early_inc_range here because functions will probably get
+ // removed (i.e. replaced by new ones).
+ for (Function &F : llvm::make_early_inc_range(M))
+ Changed |= RemoveDeadStuffFromFunction(&F);
// Finally, look for any unused parameters in functions with non-local
// linkage and replace the passed in parameters with undef.
diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp
index ba0efd46af16..387f114f6ffa 100644
--- a/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -121,32 +121,27 @@ namespace {
}
// Visit the Aliases.
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E;) {
- Module::alias_iterator CurI = I;
- ++I;
-
- bool Delete = deleteStuff == (bool)Named.count(&*CurI);
- makeVisible(*CurI, Delete);
+ for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
+ bool Delete = deleteStuff == (bool)Named.count(&GA);
+ makeVisible(GA, Delete);
if (Delete) {
- Type *Ty = CurI->getValueType();
+ Type *Ty = GA.getValueType();
- CurI->removeFromParent();
+ GA.removeFromParent();
llvm::Value *Declaration;
if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
- Declaration = Function::Create(FTy, GlobalValue::ExternalLinkage,
- CurI->getAddressSpace(),
- CurI->getName(), &M);
+ Declaration =
+ Function::Create(FTy, GlobalValue::ExternalLinkage,
+ GA.getAddressSpace(), GA.getName(), &M);
} else {
Declaration =
- new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
- nullptr, CurI->getName());
-
+ new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, GA.getName());
}
- CurI->replaceAllUsesWith(Declaration);
- delete &*CurI;
+ GA.replaceAllUsesWith(Declaration);
+ delete &GA;
}
}
diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 47fdf042f9d4..16d00a0c89e1 100644
--- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -50,14 +50,14 @@ static void forceAttributes(Function &F) {
return Kind;
};
- for (auto &S : ForceAttributes) {
+ for (const auto &S : ForceAttributes) {
auto Kind = ParseFunctionAndAttr(S);
if (Kind == Attribute::None || F.hasFnAttribute(Kind))
continue;
F.addFnAttr(Kind);
}
- for (auto &S : ForceRemoveAttributes) {
+ for (const auto &S : ForceRemoveAttributes) {
auto Kind = ParseFunctionAndAttr(S);
if (Kind == Attribute::None || !F.hasFnAttribute(Kind))
continue;
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index ca8660a98ded..cde78713b554 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -14,10 +14,12 @@
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -82,6 +84,11 @@ STATISTIC(NumNoFree, "Number of functions marked as nofree");
STATISTIC(NumWillReturn, "Number of functions marked as willreturn");
STATISTIC(NumNoSync, "Number of functions marked as nosync");
+STATISTIC(NumThinLinkNoRecurse,
+ "Number of functions marked as norecurse during thinlink");
+STATISTIC(NumThinLinkNoUnwind,
+ "Number of functions marked as nounwind during thinlink");
+
static cl::opt<bool> EnableNonnullArgPropagation(
"enable-nonnull-arg-prop", cl::init(true), cl::Hidden,
cl::desc("Try to propagate nonnull argument attributes from callsites to "
@@ -95,6 +102,10 @@ static cl::opt<bool> DisableNoFreeInference(
"disable-nofree-inference", cl::Hidden,
cl::desc("Stop inferring nofree attribute during function-attrs pass"));
+static cl::opt<bool> DisableThinLTOPropagation(
+ "disable-thinlto-funcattrs", cl::init(true), cl::Hidden,
+ cl::desc("Don't propagate function-attrs in thinLTO"));
+
namespace {
using SCCNodeSet = SmallSetVector<Function *, 8>;
@@ -131,12 +142,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Scan the function body for instructions that may read or write memory.
bool ReadsMemory = false;
bool WritesMemory = false;
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- Instruction *I = &*II;
-
+ for (Instruction &I : instructions(F)) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
- if (auto *Call = dyn_cast<CallBase>(I)) {
+ if (auto *Call = dyn_cast<CallBase>(&I)) {
// Ignore calls to functions in the same SCC, as long as the call sites
// don't have operand bundles. Calls with operand bundles are allowed to
// have memory effects not described by the memory effects of the call
@@ -170,14 +179,13 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
- for (auto CI = Call->arg_begin(), CE = Call->arg_end(); CI != CE; ++CI) {
- Value *Arg = *CI;
+ for (const Use &U : Call->args()) {
+ const Value *Arg = U;
if (!Arg->getType()->isPtrOrPtrVectorTy())
continue;
- AAMDNodes AAInfo;
- I->getAAMetadata(AAInfo);
- MemoryLocation Loc = MemoryLocation::getBeforeOrAfter(Arg, AAInfo);
+ MemoryLocation Loc =
+ MemoryLocation::getBeforeOrAfter(Arg, I.getAAMetadata());
// Skip accesses to local or constant memory as they don't impact the
// externally visible mod/ref behavior.
@@ -192,21 +200,21 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
ReadsMemory = true;
}
continue;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
// Ignore non-volatile loads from local memory. (Atomic is okay here.)
if (!LI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(LI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
// Ignore non-volatile stores to local memory. (Atomic is okay here.)
if (!SI->isVolatile()) {
MemoryLocation Loc = MemoryLocation::get(SI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
continue;
}
- } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+ } else if (VAArgInst *VI = dyn_cast<VAArgInst>(&I)) {
// Ignore vaargs on local memory.
MemoryLocation Loc = MemoryLocation::get(VI);
if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
@@ -217,10 +225,10 @@ static MemoryAccessKind checkFunctionMemoryAccess(Function &F, bool ThisBody,
// read or write memory.
//
// Writes memory, remember that.
- WritesMemory |= I->mayWriteToMemory();
+ WritesMemory |= I.mayWriteToMemory();
// If this instruction may read memory, remember that.
- ReadsMemory |= I->mayReadFromMemory();
+ ReadsMemory |= I.mayReadFromMemory();
}
if (WritesMemory) {
@@ -240,7 +248,8 @@ MemoryAccessKind llvm::computeFunctionBodyMemoryAccess(Function &F,
/// Deduce readonly/readnone attributes for the SCC.
template <typename AARGetterT>
-static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
+static void addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter,
+ SmallSet<Function *, 8> &Changed) {
// Check if any of the functions in the SCC read or write memory. If they
// write memory then they can't be marked readnone or readonly.
bool ReadsMemory = false;
@@ -255,7 +264,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(),
AAR, SCCNodes)) {
case MAK_MayWrite:
- return false;
+ return;
case MAK_ReadOnly:
ReadsMemory = true;
break;
@@ -271,11 +280,10 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
// If the SCC contains both functions that read and functions that write, then
// we cannot add readonly attributes.
if (ReadsMemory && WritesMemory)
- return false;
+ return;
// Success! Functions in this SCC do not access memory, or only read memory.
// Give them the appropriate attribute.
- bool MadeChange = false;
for (Function *F : SCCNodes) {
if (F->doesNotAccessMemory())
@@ -289,7 +297,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
if (F->doesNotReadMemory() && WritesMemory)
continue;
- MadeChange = true;
+ Changed.insert(F);
// Clear out any existing attributes.
AttrBuilder AttrsToRemove;
@@ -303,7 +311,7 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOnly);
AttrsToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
}
- F->removeAttributes(AttributeList::FunctionIndex, AttrsToRemove);
+ F->removeFnAttrs(AttrsToRemove);
// Add in the new attribute.
if (WritesMemory && !ReadsMemory)
@@ -318,8 +326,195 @@ static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT &&AARGetter) {
else
++NumReadNone;
}
+}
+
+// Compute definitive function attributes for a function taking into account
+// prevailing definitions and linkage types
+static FunctionSummary *calculatePrevailingSummary(
+ ValueInfo VI,
+ DenseMap<ValueInfo, FunctionSummary *> &CachedPrevailingSummary,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing) {
+
+ if (CachedPrevailingSummary.count(VI))
+ return CachedPrevailingSummary[VI];
+
+ /// At this point, prevailing symbols have been resolved. The following leads
+ /// to returning a conservative result:
+ /// - Multiple instances with local linkage. Normally local linkage would be
+ /// unique per module
+ /// as the GUID includes the module path. We could have a guid alias if
+ /// there wasn't any distinguishing path when each file was compiled, but
+ /// that should be rare so we'll punt on those.
+
+ /// These next 2 cases should not happen and will assert:
+ /// - Multiple instances with external linkage. This should be caught in
+ /// symbol resolution
+ /// - Non-existent FunctionSummary for Aliasee. This presents a hole in our
+ /// knowledge meaning we have to go conservative.
+
+ /// Otherwise, we calculate attributes for a function as:
+ /// 1. If we have a local linkage, take its attributes. If there's somehow
+ /// multiple, bail and go conservative.
+ /// 2. If we have an external/WeakODR/LinkOnceODR linkage check that it is
+ /// prevailing, take its attributes.
+ /// 3. If we have a Weak/LinkOnce linkage the copies can have semantic
+ /// differences. However, if the prevailing copy is known it will be used
+ /// so take its attributes. If the prevailing copy is in a native file
+ /// all IR copies will be dead and propagation will go conservative.
+ /// 4. AvailableExternally summaries without a prevailing copy are known to
+ /// occur in a couple of circumstances:
+ /// a. An internal function gets imported due to its caller getting
+ /// imported, it becomes AvailableExternally but no prevailing
+ /// definition exists. Because it has to get imported along with its
+ /// caller the attributes will be captured by propagating on its
+ /// caller.
+ /// b. C++11 [temp.explicit]p10 can generate AvailableExternally
+ /// definitions of explicitly instanced template declarations
+ /// for inlining which are ultimately dropped from the TU. Since this
+ /// is localized to the TU the attributes will have already made it to
+ /// the callers.
+ /// These are edge cases and already captured by their callers so we
+ /// ignore these for now. If they become relevant to optimize in the
+ /// future this can be revisited.
+ /// 5. Otherwise, go conservative.
+
+ CachedPrevailingSummary[VI] = nullptr;
+ FunctionSummary *Local = nullptr;
+ FunctionSummary *Prevailing = nullptr;
+
+ for (const auto &GVS : VI.getSummaryList()) {
+ if (!GVS->isLive())
+ continue;
+
+ FunctionSummary *FS = dyn_cast<FunctionSummary>(GVS->getBaseObject());
+ // Virtual and Unknown (e.g. indirect) calls require going conservative
+ if (!FS || FS->fflags().HasUnknownCall)
+ return nullptr;
+
+ const auto &Linkage = GVS->linkage();
+ if (GlobalValue::isLocalLinkage(Linkage)) {
+ if (Local) {
+ LLVM_DEBUG(
+ dbgs()
+ << "ThinLTO FunctionAttrs: Multiple Local Linkage, bailing on "
+ "function "
+ << VI.name() << " from " << FS->modulePath() << ". Previous module "
+ << Local->modulePath() << "\n");
+ return nullptr;
+ }
+ Local = FS;
+ } else if (GlobalValue::isExternalLinkage(Linkage)) {
+ assert(IsPrevailing(VI.getGUID(), GVS.get()));
+ Prevailing = FS;
+ break;
+ } else if (GlobalValue::isWeakODRLinkage(Linkage) ||
+ GlobalValue::isLinkOnceODRLinkage(Linkage) ||
+ GlobalValue::isWeakAnyLinkage(Linkage) ||
+ GlobalValue::isLinkOnceAnyLinkage(Linkage)) {
+ if (IsPrevailing(VI.getGUID(), GVS.get())) {
+ Prevailing = FS;
+ break;
+ }
+ } else if (GlobalValue::isAvailableExternallyLinkage(Linkage)) {
+ // TODO: Handle these cases if they become meaningful
+ continue;
+ }
+ }
+
+ if (Local) {
+ assert(!Prevailing);
+ CachedPrevailingSummary[VI] = Local;
+ } else if (Prevailing) {
+ assert(!Local);
+ CachedPrevailingSummary[VI] = Prevailing;
+ }
- return MadeChange;
+ return CachedPrevailingSummary[VI];
+}
+
+bool llvm::thinLTOPropagateFunctionAttrs(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing) {
+ // TODO: implement addNoAliasAttrs once
+ // there's more information about the return type in the summary
+ if (DisableThinLTOPropagation)
+ return false;
+
+ DenseMap<ValueInfo, FunctionSummary *> CachedPrevailingSummary;
+ bool Changed = false;
+
+ auto PropagateAttributes = [&](std::vector<ValueInfo> &SCCNodes) {
+ // Assume we can propagate unless we discover otherwise
+ FunctionSummary::FFlags InferredFlags;
+ InferredFlags.NoRecurse = (SCCNodes.size() == 1);
+ InferredFlags.NoUnwind = true;
+
+ for (auto &V : SCCNodes) {
+ FunctionSummary *CallerSummary =
+ calculatePrevailingSummary(V, CachedPrevailingSummary, IsPrevailing);
+
+ // Function summaries can fail to contain information such as declarations
+ if (!CallerSummary)
+ return;
+
+ if (CallerSummary->fflags().MayThrow)
+ InferredFlags.NoUnwind = false;
+
+ for (const auto &Callee : CallerSummary->calls()) {
+ FunctionSummary *CalleeSummary = calculatePrevailingSummary(
+ Callee.first, CachedPrevailingSummary, IsPrevailing);
+
+ if (!CalleeSummary)
+ return;
+
+ if (!CalleeSummary->fflags().NoRecurse)
+ InferredFlags.NoRecurse = false;
+
+ if (!CalleeSummary->fflags().NoUnwind)
+ InferredFlags.NoUnwind = false;
+
+ if (!InferredFlags.NoUnwind && !InferredFlags.NoRecurse)
+ break;
+ }
+ }
+
+ if (InferredFlags.NoUnwind || InferredFlags.NoRecurse) {
+ Changed = true;
+ for (auto &V : SCCNodes) {
+ if (InferredFlags.NoRecurse) {
+ LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoRecurse to "
+ << V.name() << "\n");
+ ++NumThinLinkNoRecurse;
+ }
+
+ if (InferredFlags.NoUnwind) {
+ LLVM_DEBUG(dbgs() << "ThinLTO FunctionAttrs: Propagated NoUnwind to "
+ << V.name() << "\n");
+ ++NumThinLinkNoUnwind;
+ }
+
+ for (auto &S : V.getSummaryList()) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get())) {
+ if (InferredFlags.NoRecurse)
+ FS->setNoRecurse();
+
+ if (InferredFlags.NoUnwind)
+ FS->setNoUnwind();
+ }
+ }
+ }
+ }
+ };
+
+ // Call propagation functions on each SCC in the Index
+ for (scc_iterator<ModuleSummaryIndex *> I = scc_begin(&Index); !I.isAtEnd();
+ ++I) {
+ std::vector<ValueInfo> Nodes(*I);
+ PropagateAttributes(Nodes);
+ }
+ return Changed;
}
namespace {
@@ -395,7 +590,7 @@ struct ArgumentUsesTracker : public CaptureTracker {
assert(UseIndex < CB->data_operands_size() &&
"Indirect function calls should have been filtered above!");
- if (UseIndex >= CB->getNumArgOperands()) {
+ if (UseIndex >= CB->arg_size()) {
// Data operand, but not a argument operand -- must be a bundle operand
assert(CB->hasOperandBundles() && "Must be!");
@@ -530,7 +725,7 @@ determinePointerReadAttrs(Argument *A,
assert(UseIndex < CB.data_operands_size() &&
"Data operand use expected!");
- bool IsOperandBundleUse = UseIndex >= CB.getNumArgOperands();
+ bool IsOperandBundleUse = UseIndex >= CB.arg_size();
if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
assert(F->isVarArg() && "More params than args in non-varargs call");
@@ -581,9 +776,8 @@ determinePointerReadAttrs(Argument *A,
}
/// Deduce returned attributes for the SCC.
-static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining if an argument is always returned.
for (Function *F : SCCNodes) {
// We can infer and propagate function attributes only when we know that the
@@ -623,11 +817,9 @@ static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
auto *A = cast<Argument>(RetArg);
A->addAttr(Attribute::Returned);
++NumReturned;
- Changed = true;
+ Changed.insert(F);
}
}
-
- return Changed;
}
/// If a callsite has arguments that are also arguments to the parent function,
@@ -693,9 +885,8 @@ static bool addReadAttr(Argument *A, Attribute::AttrKind R) {
}
/// Deduce nocapture attributes for the SCC.
-static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addArgumentAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
ArgumentGraph AG;
// Check each function in turn, determining which pointer arguments are not
@@ -707,7 +898,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (!F->hasExactDefinition())
continue;
- Changed |= addArgumentAttrsFromCallsites(*F);
+ if (addArgumentAttrsFromCallsites(*F))
+ Changed.insert(F);
// Functions that are readonly (or readnone) and nounwind and don't return
// a value can't capture arguments. Don't analyze them.
@@ -718,7 +910,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(F);
}
}
continue;
@@ -737,7 +929,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
// If it's trivially not captured, mark it nocapture now.
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(F);
} else {
// If it's not trivially captured and not trivially not captured,
// then it must be calling into another function in our SCC. Save
@@ -761,7 +953,8 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Self.insert(&*A);
Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
if (R != Attribute::None)
- Changed = addReadAttr(A, R);
+ if (addReadAttr(A, R))
+ Changed.insert(F);
}
}
}
@@ -785,7 +978,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Argument *A = ArgumentSCC[0]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(A->getParent());
}
continue;
}
@@ -827,7 +1020,7 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
Argument *A = ArgumentSCC[i]->Definition;
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
- Changed = true;
+ Changed.insert(A->getParent());
}
// We also want to compute readonly/readnone. With a small number of false
@@ -858,12 +1051,11 @@ static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
if (ReadAttr != Attribute::None) {
for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
Argument *A = ArgumentSCC[i]->Definition;
- Changed = addReadAttr(A, ReadAttr);
+ if (addReadAttr(A, ReadAttr))
+ Changed.insert(A->getParent());
}
}
}
-
- return Changed;
}
/// Tests whether a function is "malloc-like".
@@ -934,7 +1126,8 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
}
/// Deduce noalias attributes for the SCC.
-static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
+static void addNoAliasAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Check each function in turn, determining which functions return noalias
// pointers.
for (Function *F : SCCNodes) {
@@ -946,7 +1139,7 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
- return false;
+ return;
// We annotate noalias return values, which are only applicable to
// pointer types.
@@ -954,10 +1147,9 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
continue;
if (!isFunctionMallocLike(F, SCCNodes))
- return false;
+ return;
}
- bool MadeChange = false;
for (Function *F : SCCNodes) {
if (F->returnDoesNotAlias() ||
!F->getReturnType()->isPointerTy())
@@ -965,10 +1157,8 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
F->setReturnDoesNotAlias();
++NumNoAlias;
- MadeChange = true;
+ Changed.insert(F);
}
-
- return MadeChange;
}
/// Tests whether this function is known to not return null.
@@ -1044,26 +1234,24 @@ static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
}
/// Deduce nonnull attributes for the SCC.
-static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
+static void addNonNullAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Speculative that all functions in the SCC return only nonnull
// pointers. We may refute this as we analyze functions.
bool SCCReturnsNonNull = true;
- bool MadeChange = false;
-
// Check each function in turn, determining which functions return nonnull
// pointers.
for (Function *F : SCCNodes) {
// Already nonnull.
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull))
+ if (F->getAttributes().hasRetAttr(Attribute::NonNull))
continue;
// We can infer and propagate function attributes only when we know that the
// definition we'll get at link time is *exactly* the definition we see now.
// For more details, see GlobalValue::mayBeDerefined.
if (!F->hasExactDefinition())
- return false;
+ return;
// We annotate nonnull return values, which are only applicable to
// pointer types.
@@ -1077,9 +1265,9 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
// which prevents us from speculating about the entire SCC
LLVM_DEBUG(dbgs() << "Eagerly marking " << F->getName()
<< " as nonnull\n");
- F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
- MadeChange = true;
+ Changed.insert(F);
}
continue;
}
@@ -1090,19 +1278,16 @@ static bool addNonNullAttrs(const SCCNodeSet &SCCNodes) {
if (SCCReturnsNonNull) {
for (Function *F : SCCNodes) {
- if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
- Attribute::NonNull) ||
+ if (F->getAttributes().hasRetAttr(Attribute::NonNull) ||
!F->getReturnType()->isPointerTy())
continue;
LLVM_DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
- F->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ F->addRetAttr(Attribute::NonNull);
++NumNonNullReturn;
- MadeChange = true;
+ Changed.insert(F);
}
}
-
- return MadeChange;
}
namespace {
@@ -1155,12 +1340,13 @@ public:
InferenceDescriptors.push_back(AttrInference);
}
- bool run(const SCCNodeSet &SCCNodes);
+ void run(const SCCNodeSet &SCCNodes, SmallSet<Function *, 8> &Changed);
};
/// Perform all the requested attribute inference actions according to the
/// attribute predicates stored before.
-bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
+void AttributeInferer::run(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
SmallVector<InferenceDescriptor, 4> InferInSCC = InferenceDescriptors;
// Go through all the functions in SCC and check corresponding attribute
// assumptions for each of them. Attributes that are invalid for this SCC
@@ -1169,7 +1355,7 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
// No attributes whose assumptions are still valid - done.
if (InferInSCC.empty())
- return false;
+ return;
// Check if our attributes ever need scanning/can be scanned.
llvm::erase_if(InferInSCC, [F](const InferenceDescriptor &ID) {
@@ -1212,9 +1398,8 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
}
if (InferInSCC.empty())
- return false;
+ return;
- bool Changed = false;
for (Function *F : SCCNodes)
// At this point InferInSCC contains only functions that were either:
// - explicitly skipped from scan/inference, or
@@ -1223,10 +1408,9 @@ bool AttributeInferer::run(const SCCNodeSet &SCCNodes) {
for (auto &ID : InferInSCC) {
if (ID.SkipFunction(*F))
continue;
- Changed = true;
+ Changed.insert(F);
ID.SetAttribute(*F);
}
- return Changed;
}
struct SCCNodesResult {
@@ -1243,7 +1427,7 @@ static bool InstrBreaksNonConvergent(Instruction &I,
// Breaks non-convergent assumption if CS is a convergent call to a function
// not in the SCC.
return CB && CB->isConvergent() &&
- SCCNodes.count(CB->getCalledFunction()) == 0;
+ !SCCNodes.contains(CB->getCalledFunction());
}
/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
@@ -1282,7 +1466,8 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
/// Attempt to remove convergent function attribute when possible.
///
/// Returns true if any changes to function attributes were made.
-static bool inferConvergent(const SCCNodeSet &SCCNodes) {
+static void inferConvergent(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
// Request to remove the convergent attribute from all functions in the SCC
@@ -1305,7 +1490,7 @@ static bool inferConvergent(const SCCNodeSet &SCCNodes) {
},
/* RequiresExactDefinition= */ false});
// Perform all the requested attribute inference actions.
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
/// Infer attributes from all functions in the SCC by scanning every
@@ -1314,7 +1499,8 @@ static bool inferConvergent(const SCCNodeSet &SCCNodes) {
/// - addition of NoUnwind attribute
///
/// Returns true if any changes to function attributes were made.
-static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
+static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
if (!DisableNoUnwindInference)
@@ -1363,19 +1549,20 @@ static bool inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes) {
/* RequiresExactDefinition= */ true});
// Perform all the requested attribute inference actions.
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
-static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
+static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
if (SCCNodes.size() != 1)
- return false;
+ return;
Function *F = *SCCNodes.begin();
if (!F || !F->hasExactDefinition() || F->doesNotRecurse())
- return false;
+ return;
// If all of the calls in F are identifiable and are to norecurse functions, F
// is norecurse. This check also detects self-recursion as F is not currently
@@ -1386,7 +1573,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
Function *Callee = CB->getCalledFunction();
if (!Callee || Callee == F || !Callee->doesNotRecurse())
// Function calls a potentially recursive function.
- return false;
+ return;
}
// Every call was to a non-recursive function other than this function, and
@@ -1394,7 +1581,7 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
// recurse.
F->setDoesNotRecurse();
++NumNoRecurse;
- return true;
+ Changed.insert(F);
}
static bool instructionDoesNotReturn(Instruction &I) {
@@ -1412,9 +1599,8 @@ static bool basicBlockCanReturn(BasicBlock &BB) {
}
// Set the noreturn function attribute if possible.
-static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addNoReturnAttrs(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || !F->hasExactDefinition() || F->hasFnAttribute(Attribute::Naked) ||
F->doesNotReturn())
@@ -1424,11 +1610,9 @@ static bool addNoReturnAttrs(const SCCNodeSet &SCCNodes) {
// FIXME: this doesn't handle recursion or unreachable blocks.
if (none_of(*F, basicBlockCanReturn)) {
F->setDoesNotReturn();
- Changed = true;
+ Changed.insert(F);
}
}
-
- return Changed;
}
static bool functionWillReturn(const Function &F) {
@@ -1461,19 +1645,16 @@ static bool functionWillReturn(const Function &F) {
}
// Set the willreturn function attribute if possible.
-static bool addWillReturn(const SCCNodeSet &SCCNodes) {
- bool Changed = false;
-
+static void addWillReturn(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
for (Function *F : SCCNodes) {
if (!F || F->willReturn() || !functionWillReturn(*F))
continue;
F->setWillReturn();
NumWillReturn++;
- Changed = true;
+ Changed.insert(F);
}
-
- return Changed;
}
// Return true if this is an atomic which has an ordering stronger than
@@ -1532,7 +1713,8 @@ static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
}
// Infer the nosync attribute.
-static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
+static void addNoSyncAttr(const SCCNodeSet &SCCNodes,
+ SmallSet<Function *, 8> &Changed) {
AttributeInferer AI;
AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
Attribute::NoSync,
@@ -1549,14 +1731,15 @@ static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
++NumNoSync;
},
/* RequiresExactDefinition= */ true});
- return AI.run(SCCNodes);
+ AI.run(SCCNodes, Changed);
}
static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
SCCNodesResult Res;
Res.HasUnknownCall = false;
for (Function *F : Functions) {
- if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked)) {
+ if (!F || F->hasOptNone() || F->hasFnAttribute(Attribute::Naked) ||
+ F->isPresplitCoroutine()) {
// Treat any function we're trying not to optimize as if it were an
// indirect call and omit it from the node set used below.
Res.HasUnknownCall = true;
@@ -1582,32 +1765,33 @@ static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
}
template <typename AARGetterT>
-static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
- AARGetterT &&AARGetter) {
+static SmallSet<Function *, 8>
+deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
SCCNodesResult Nodes = createSCCNodeSet(Functions);
- bool Changed = false;
// Bail if the SCC only contains optnone functions.
if (Nodes.SCCNodes.empty())
- return Changed;
+ return {};
+
+ SmallSet<Function *, 8> Changed;
- Changed |= addArgumentReturnedAttrs(Nodes.SCCNodes);
- Changed |= addReadAttrs(Nodes.SCCNodes, AARGetter);
- Changed |= addArgumentAttrs(Nodes.SCCNodes);
- Changed |= inferConvergent(Nodes.SCCNodes);
- Changed |= addNoReturnAttrs(Nodes.SCCNodes);
- Changed |= addWillReturn(Nodes.SCCNodes);
+ addArgumentReturnedAttrs(Nodes.SCCNodes, Changed);
+ addReadAttrs(Nodes.SCCNodes, AARGetter, Changed);
+ addArgumentAttrs(Nodes.SCCNodes, Changed);
+ inferConvergent(Nodes.SCCNodes, Changed);
+ addNoReturnAttrs(Nodes.SCCNodes, Changed);
+ addWillReturn(Nodes.SCCNodes, Changed);
// If we have no external nodes participating in the SCC, we can deduce some
// more precise attributes as well.
if (!Nodes.HasUnknownCall) {
- Changed |= addNoAliasAttrs(Nodes.SCCNodes);
- Changed |= addNonNullAttrs(Nodes.SCCNodes);
- Changed |= inferAttrsFromFunctionBodies(Nodes.SCCNodes);
- Changed |= addNoRecurseAttrs(Nodes.SCCNodes);
+ addNoAliasAttrs(Nodes.SCCNodes, Changed);
+ addNonNullAttrs(Nodes.SCCNodes, Changed);
+ inferAttrsFromFunctionBodies(Nodes.SCCNodes, Changed);
+ addNoRecurseAttrs(Nodes.SCCNodes, Changed);
}
- Changed |= addNoSyncAttr(Nodes.SCCNodes);
+ addNoSyncAttr(Nodes.SCCNodes, Changed);
// Finally, infer the maximal set of attributes from the ones we've inferred
// above. This is handling the cases where one attribute on a signature
@@ -1615,7 +1799,8 @@ static bool deriveAttrsInPostOrder(ArrayRef<Function *> Functions,
// the later is missing (or simply less sophisticated).
for (Function *F : Nodes.SCCNodes)
if (F)
- Changed |= inferAttributesFromOthers(*F);
+ if (inferAttributesFromOthers(*F))
+ Changed.insert(F);
return Changed;
}
@@ -1638,14 +1823,35 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
Functions.push_back(&N.getFunction());
}
- if (deriveAttrsInPostOrder(Functions, AARGetter)) {
- // We have not changed the call graph or removed/added functions.
- PreservedAnalyses PA;
- PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
- return PA;
+ auto ChangedFunctions = deriveAttrsInPostOrder(Functions, AARGetter);
+ if (ChangedFunctions.empty())
+ return PreservedAnalyses::all();
+
+ // Invalidate analyses for modified functions so that we don't have to
+ // invalidate all analyses for all functions in this SCC.
+ PreservedAnalyses FuncPA;
+ // We haven't changed the CFG for modified functions.
+ FuncPA.preserveSet<CFGAnalyses>();
+ for (Function *Changed : ChangedFunctions) {
+ FAM.invalidate(*Changed, FuncPA);
+ // Also invalidate any direct callers of changed functions since analyses
+ // may care about attributes of direct callees. For example, MemorySSA cares
+ // about whether or not a call's callee modifies memory and queries that
+ // through function attributes.
+ for (auto *U : Changed->users()) {
+ if (auto *Call = dyn_cast<CallBase>(U)) {
+ if (Call->getCalledFunction() == Changed)
+ FAM.invalidate(*Call->getFunction(), FuncPA);
+ }
+ }
}
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ // We have not added or removed functions.
+ PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We already invalidated all relevant function analyses above.
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
}
namespace {
@@ -1690,7 +1896,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
Functions.push_back(I->getFunction());
}
- return deriveAttrsInPostOrder(Functions, AARGetter);
+ return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
}
bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 2f6cf0ca7087..d9b43109f629 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
@@ -187,23 +188,6 @@ selectCallee(const ModuleSummaryIndex &Index,
return false;
}
- // For SamplePGO, in computeImportForFunction the OriginalId
- // may have been used to locate the callee summary list (See
- // comment there).
- // The mapping from OriginalId to GUID may return a GUID
- // that corresponds to a static variable. Filter it out here.
- // This can happen when
- // 1) There is a call to a library function which is not defined
- // in the index.
- // 2) There is a static variable with the OriginalGUID identical
- // to the GUID of the library function in 1);
- // When this happens, the logic for SamplePGO kicks in and
- // the static variable in 2) will be found, which needs to be
- // filtered out.
- if (GVSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind) {
- Reason = FunctionImporter::ImportFailureReason::GlobalVar;
- return false;
- }
if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) {
Reason = FunctionImporter::ImportFailureReason::InterposableLinkage;
// There is no point in importing these, we can't inline them
@@ -264,21 +248,6 @@ using EdgeInfo =
} // anonymous namespace
-static ValueInfo
-updateValueInfoForIndirectCalls(const ModuleSummaryIndex &Index, ValueInfo VI) {
- if (!VI.getSummaryList().empty())
- return VI;
- // For SamplePGO, the indirect call targets for local functions will
- // have its original name annotated in profile. We try to find the
- // corresponding PGOFuncName as the GUID.
- // FIXME: Consider updating the edges in the graph after building
- // it, rather than needing to perform this mapping on each walk.
- auto GUID = Index.getGUIDFromOriginalID(VI.getGUID());
- if (GUID == 0)
- return ValueInfo();
- return Index.getValueInfo(GUID);
-}
-
static bool shouldImportGlobal(const ValueInfo &VI,
const GVSummaryMapTy &DefinedGVSummaries) {
const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
@@ -400,10 +369,6 @@ static void computeImportForFunction(
continue;
}
- VI = updateValueInfoForIndirectCalls(Index, VI);
- if (!VI)
- continue;
-
if (DefinedGVSummaries.count(VI.getGUID())) {
// FIXME: Consider not skipping import if the module contains
// a non-prevailing def with interposable linkage. The prevailing copy
@@ -496,7 +461,7 @@ static void computeImportForFunction(
VI.name().str() + " due to " +
getFailureName(Reason);
auto Error = make_error<StringError>(
- Msg, std::make_error_code(std::errc::operation_not_supported));
+ Msg, make_error_code(errc::not_supported));
logAllUnhandledErrors(std::move(Error), errs(),
"Error importing module: ");
break;
@@ -839,16 +804,61 @@ void llvm::ComputeCrossModuleImportForModuleFromIndex(
#endif
}
-void llvm::computeDeadSymbols(
+// For SamplePGO, the indirect call targets for local functions will
+// have its original name annotated in profile. We try to find the
+// corresponding PGOFuncName as the GUID, and fix up the edges
+// accordingly.
+void updateValueInfoForIndirectCalls(ModuleSummaryIndex &Index,
+ FunctionSummary *FS) {
+ for (auto &EI : FS->mutableCalls()) {
+ if (!EI.first.getSummaryList().empty())
+ continue;
+ auto GUID = Index.getGUIDFromOriginalID(EI.first.getGUID());
+ if (GUID == 0)
+ continue;
+ // Update the edge to point directly to the correct GUID.
+ auto VI = Index.getValueInfo(GUID);
+ if (llvm::any_of(
+ VI.getSummaryList(),
+ [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
+ // The mapping from OriginalId to GUID may return a GUID
+ // that corresponds to a static variable. Filter it out here.
+ // This can happen when
+ // 1) There is a call to a library function which is not defined
+ // in the index.
+ // 2) There is a static variable with the OriginalGUID identical
+ // to the GUID of the library function in 1);
+ // When this happens the static variable in 2) will be found,
+ // which needs to be filtered out.
+ return SummaryPtr->getSummaryKind() ==
+ GlobalValueSummary::GlobalVarKind;
+ }))
+ continue;
+ EI.first = VI;
+ }
+}
+
+void llvm::updateIndirectCalls(ModuleSummaryIndex &Index) {
+ for (const auto &Entry : Index) {
+ for (auto &S : Entry.second.SummaryList) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
+ updateValueInfoForIndirectCalls(Index, FS);
+ }
+ }
+}
+
+void llvm::computeDeadSymbolsAndUpdateIndirectCalls(
ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing) {
assert(!Index.withGlobalValueDeadStripping());
- if (!ComputeDead)
- return;
- if (GUIDPreservedSymbols.empty())
- // Don't do anything when nothing is live, this is friendly with tests.
+ if (!ComputeDead ||
+ // Don't do anything when nothing is live, this is friendly with tests.
+ GUIDPreservedSymbols.empty()) {
+ // Still need to update indirect calls.
+ updateIndirectCalls(Index);
return;
+ }
unsigned LiveSymbols = 0;
SmallVector<ValueInfo, 128> Worklist;
Worklist.reserve(GUIDPreservedSymbols.size() * 2);
@@ -863,13 +873,16 @@ void llvm::computeDeadSymbols(
// Add values flagged in the index as live roots to the worklist.
for (const auto &Entry : Index) {
auto VI = Index.getValueInfo(Entry);
- for (auto &S : Entry.second.SummaryList)
+ for (auto &S : Entry.second.SummaryList) {
+ if (auto *FS = dyn_cast<FunctionSummary>(S.get()))
+ updateValueInfoForIndirectCalls(Index, FS);
if (S->isLive()) {
LLVM_DEBUG(dbgs() << "Live root: " << VI << "\n");
Worklist.push_back(VI);
++LiveSymbols;
break;
}
+ }
}
// Make value live and add it to the worklist if it was not live before.
@@ -882,9 +895,6 @@ void llvm::computeDeadSymbols(
// binary, which increases the binary size unnecessarily. Note that
// if this code changes, the importer needs to change so that edges
// to functions marked dead are skipped.
- VI = updateValueInfoForIndirectCalls(Index, VI);
- if (!VI)
- return;
if (llvm::any_of(VI.getSummaryList(),
[](const std::unique_ptr<llvm::GlobalValueSummary> &S) {
@@ -958,7 +968,8 @@ void llvm::computeDeadSymbolsWithConstProp(
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
function_ref<PrevailingType(GlobalValue::GUID)> isPrevailing,
bool ImportEnabled) {
- computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing);
+ computeDeadSymbolsAndUpdateIndirectCalls(Index, GUIDPreservedSymbols,
+ isPrevailing);
if (ImportEnabled)
Index.propagateAttributes(GUIDPreservedSymbols);
}
@@ -1040,13 +1051,33 @@ bool llvm::convertToDeclaration(GlobalValue &GV) {
return true;
}
-void llvm::thinLTOResolvePrevailingInModule(
- Module &TheModule, const GVSummaryMapTy &DefinedGlobals) {
- auto updateLinkage = [&](GlobalValue &GV) {
+void llvm::thinLTOFinalizeInModule(Module &TheModule,
+ const GVSummaryMapTy &DefinedGlobals,
+ bool PropagateAttrs) {
+ auto FinalizeInModule = [&](GlobalValue &GV, bool Propagate = false) {
// See if the global summary analysis computed a new resolved linkage.
const auto &GS = DefinedGlobals.find(GV.getGUID());
if (GS == DefinedGlobals.end())
return;
+
+ if (Propagate)
+ if (FunctionSummary *FS = dyn_cast<FunctionSummary>(GS->second)) {
+ if (Function *F = dyn_cast<Function>(&GV)) {
+ // TODO: propagate ReadNone and ReadOnly.
+ if (FS->fflags().ReadNone && !F->doesNotAccessMemory())
+ F->setDoesNotAccessMemory();
+
+ if (FS->fflags().ReadOnly && !F->onlyReadsMemory())
+ F->setOnlyReadsMemory();
+
+ if (FS->fflags().NoRecurse && !F->doesNotRecurse())
+ F->setDoesNotRecurse();
+
+ if (FS->fflags().NoUnwind && !F->doesNotThrow())
+ F->setDoesNotThrow();
+ }
+ }
+
auto NewLinkage = GS->second->linkage();
if (GlobalValue::isLocalLinkage(GV.getLinkage()) ||
// Don't internalize anything here, because the code below
@@ -1105,11 +1136,11 @@ void llvm::thinLTOResolvePrevailingInModule(
// Process functions and global now
for (auto &GV : TheModule)
- updateLinkage(GV);
+ FinalizeInModule(GV, PropagateAttrs);
for (auto &GV : TheModule.globals())
- updateLinkage(GV);
+ FinalizeInModule(GV);
for (auto &GV : TheModule.aliases())
- updateLinkage(GV);
+ FinalizeInModule(GV);
}
/// Run internalization on \p TheModule based on symmary analysis.
@@ -1153,7 +1184,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
/// Make alias a clone of its aliasee.
static Function *replaceAliasWithAliasee(Module *SrcModule, GlobalAlias *GA) {
- Function *Fn = cast<Function>(GA->getBaseObject());
+ Function *Fn = cast<Function>(GA->getAliaseeObject());
ValueToValueMapTy VMap;
Function *NewFn = CloneFunction(Fn, VMap);
@@ -1259,12 +1290,12 @@ Expected<bool> FunctionImporter::importFunctions(
if (Error Err = GA.materialize())
return std::move(Err);
// Import alias as a copy of its aliasee.
- GlobalObject *Base = GA.getBaseObject();
- if (Error Err = Base->materialize())
+ GlobalObject *GO = GA.getAliaseeObject();
+ if (Error Err = GO->materialize())
return std::move(Err);
auto *Fn = replaceAliasWithAliasee(SrcModule.get(), &GA);
- LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << Base->getGUID()
- << " " << Base->getName() << " from "
+ LLVM_DEBUG(dbgs() << "Is importing aliasee fn " << GO->getGUID() << " "
+ << GO->getName() << " from "
<< SrcModule->getSourceFileName() << "\n");
if (EnableImportMetadata) {
// Add 'thinlto_src_module' metadata for statistics and debugging.
@@ -1303,7 +1334,7 @@ Expected<bool> FunctionImporter::importFunctions(
std::move(SrcModule), GlobalsToImport.getArrayRef(),
[](GlobalValue &, IRMover::ValueAdder) {},
/*IsPerformingImport=*/true))
- report_fatal_error("Function Import: link error: " +
+ report_fatal_error(Twine("Function Import: link error: ") +
toString(std::move(Err)));
ImportedCount += GlobalsToImport.size();
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index f61f4312b777..fbd083bb9bbf 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -11,7 +11,6 @@
// are propagated to the callee by specializing the function.
//
// Current limitations:
-// - It does not handle specialization of recursive functions,
// - It does not yet handle integer ranges.
// - Only 1 argument per function is specialised,
// - The cost-model could be further looked into,
@@ -22,6 +21,18 @@
// a direct way to steer function specialization, avoiding the cost-model,
// and thus control compile-times / code-size.
//
+// Todos:
+// - Specializing recursive functions relies on running the transformation a
+// number of times, which is controlled by option
+// `func-specialization-max-iters`. Thus, increasing this value and the
+// number of iterations, will linearly increase the number of times recursive
+// functions get specialized, see also the discussion in
+// https://reviews.llvm.org/D106426 for details. Perhaps there is a
+// compile-time friendlier way to control/limit the number of specialisations
+// for recursive functions.
+// - Don't transform the function if there is no function specialization
+// happens.
+//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
@@ -59,20 +70,166 @@ static cl::opt<unsigned> MaxConstantsThreshold(
"specialization"),
cl::init(3));
+static cl::opt<unsigned> SmallFunctionThreshold(
+ "func-specialization-size-threshold", cl::Hidden,
+ cl::desc("Don't specialize functions that have less than this theshold "
+ "number of instructions"),
+ cl::init(100));
+
static cl::opt<unsigned>
AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden,
cl::desc("Average loop iteration count cost"),
cl::init(10));
+static cl::opt<bool> SpecializeOnAddresses(
+ "func-specialization-on-address", cl::init(false), cl::Hidden,
+ cl::desc("Enable function specialization on the address of global values"));
+
+// TODO: This needs checking to see the impact on compile-times, which is why
+// this is off by default for now.
static cl::opt<bool> EnableSpecializationForLiteralConstant(
"function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
- cl::desc("Make function specialization available for literal constant."));
+ cl::desc("Enable specialization of functions that take a literal constant "
+ "as an argument."));
+
+// Helper to check if \p LV is either a constant or a constant
+// range with a single element. This should cover exactly the same cases as the
+// old ValueLatticeElement::isConstant() and is intended to be used in the
+// transition to ValueLatticeElement.
+static bool isConstant(const ValueLatticeElement &LV) {
+ return LV.isConstant() ||
+ (LV.isConstantRange() && LV.getConstantRange().isSingleElement());
+}
// Helper to check if \p LV is either overdefined or a constant int.
static bool isOverdefined(const ValueLatticeElement &LV) {
- return !LV.isUnknownOrUndef() && !LV.isConstant();
+ return !LV.isUnknownOrUndef() && !isConstant(LV);
+}
+
+static Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call) {
+ Value *StoreValue = nullptr;
+ for (auto *User : Alloca->users()) {
+ // We can't use llvm::isAllocaPromotable() as that would fail because of
+ // the usage in the CallInst, which is what we check here.
+ if (User == Call)
+ continue;
+ if (auto *Bitcast = dyn_cast<BitCastInst>(User)) {
+ if (!Bitcast->hasOneUse() || *Bitcast->user_begin() != Call)
+ return nullptr;
+ continue;
+ }
+
+ if (auto *Store = dyn_cast<StoreInst>(User)) {
+ // This is a duplicate store, bail out.
+ if (StoreValue || Store->isVolatile())
+ return nullptr;
+ StoreValue = Store->getValueOperand();
+ continue;
+ }
+ // Bail if there is any other unknown usage.
+ return nullptr;
+ }
+ return dyn_cast_or_null<Constant>(StoreValue);
}
+// A constant stack value is an AllocaInst that has a single constant
+// value stored to it. Return this constant if such an alloca stack value
+// is a function argument.
+static Constant *getConstantStackValue(CallInst *Call, Value *Val,
+ SCCPSolver &Solver) {
+ if (!Val)
+ return nullptr;
+ Val = Val->stripPointerCasts();
+ if (auto *ConstVal = dyn_cast<ConstantInt>(Val))
+ return ConstVal;
+ auto *Alloca = dyn_cast<AllocaInst>(Val);
+ if (!Alloca || !Alloca->getAllocatedType()->isIntegerTy())
+ return nullptr;
+ return getPromotableAlloca(Alloca, Call);
+}
+
+// To support specializing recursive functions, it is important to propagate
+// constant arguments because after a first iteration of specialisation, a
+// reduced example may look like this:
+//
+// define internal void @RecursiveFn(i32* arg1) {
+// %temp = alloca i32, align 4
+// store i32 2 i32* %temp, align 4
+// call void @RecursiveFn.1(i32* nonnull %temp)
+// ret void
+// }
+//
+// Before a next iteration, we need to propagate the constant like so
+// which allows further specialization in next iterations.
+//
+// @funcspec.arg = internal constant i32 2
+//
+// define internal void @someFunc(i32* arg1) {
+// call void @otherFunc(i32* nonnull @funcspec.arg)
+// ret void
+// }
+//
+static void constantArgPropagation(SmallVectorImpl<Function *> &WorkList,
+ Module &M, SCCPSolver &Solver) {
+ // Iterate over the argument tracked functions see if there
+ // are any new constant values for the call instruction via
+ // stack variables.
+ for (auto *F : WorkList) {
+ // TODO: Generalize for any read only arguments.
+ if (F->arg_size() != 1)
+ continue;
+
+ auto &Arg = *F->arg_begin();
+ if (!Arg.onlyReadsMemory() || !Arg.getType()->isPointerTy())
+ continue;
+
+ for (auto *User : F->users()) {
+ auto *Call = dyn_cast<CallInst>(User);
+ if (!Call)
+ break;
+ auto *ArgOp = Call->getArgOperand(0);
+ auto *ArgOpType = ArgOp->getType();
+ auto *ConstVal = getConstantStackValue(Call, ArgOp, Solver);
+ if (!ConstVal)
+ break;
+
+ Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
+ GlobalValue::InternalLinkage, ConstVal,
+ "funcspec.arg");
+
+ if (ArgOpType != ConstVal->getType())
+ GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOp->getType());
+
+ Call->setArgOperand(0, GV);
+
+ // Add the changed CallInst to Solver Worklist
+ Solver.visitCall(*Call);
+ }
+ }
+}
+
+// ssa_copy intrinsics are introduced by the SCCP solver. These intrinsics
+// interfere with the constantArgPropagation optimization.
+static void removeSSACopy(Function &F) {
+ for (BasicBlock &BB : F) {
+ for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!II)
+ continue;
+ if (II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+ Inst.replaceAllUsesWith(II->getOperand(0));
+ Inst.eraseFromParent();
+ }
+ }
+}
+
+static void removeSSACopy(Module &M) {
+ for (Function &F : M)
+ removeSSACopy(F);
+}
+
+namespace {
class FunctionSpecializer {
/// The IPSCCP Solver.
@@ -115,9 +272,14 @@ public:
for (auto *SpecializedFunc : CurrentSpecializations) {
SpecializedFuncs.insert(SpecializedFunc);
- // TODO: If we want to support specializing specialized functions,
- // initialize here the state of the newly created functions, marking
- // them argument-tracked and executable.
+ // Initialize the state of the newly created functions, marking them
+ // argument-tracked and executable.
+ if (SpecializedFunc->hasExactDefinition() &&
+ !SpecializedFunc->hasFnAttribute(Attribute::Naked))
+ Solver.addTrackedFunction(SpecializedFunc);
+ Solver.addArgumentTrackedFunction(SpecializedFunc);
+ FuncDecls.push_back(SpecializedFunc);
+ Solver.markBlockExecutable(&SpecializedFunc->front());
// Replace the function arguments for the specialized functions.
for (Argument &Arg : SpecializedFunc->args())
@@ -138,12 +300,22 @@ public:
const ValueLatticeElement &IV = Solver.getLatticeValueFor(V);
if (isOverdefined(IV))
return false;
- auto *Const = IV.isConstant() ? Solver.getConstant(IV)
- : UndefValue::get(V->getType());
+ auto *Const =
+ isConstant(IV) ? Solver.getConstant(IV) : UndefValue::get(V->getType());
V->replaceAllUsesWith(Const);
- // TODO: Update the solver here if we want to specialize specialized
- // functions.
+ for (auto *U : Const->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ if (Solver.isBlockExecutable(I->getParent()))
+ Solver.visit(I);
+
+ // Remove the instruction from Block and Solver.
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (I->isSafeToRemove()) {
+ I->eraseFromParent();
+ Solver.removeLatticeValueFor(I);
+ }
+ }
return true;
}
@@ -152,6 +324,15 @@ private:
// also in the cost model.
unsigned NbFunctionsSpecialized = 0;
+ /// Clone the function \p F and remove the ssa_copy intrinsics added by
+ /// the SCCPSolver in the cloned version.
+ Function *cloneCandidateFunction(Function *F) {
+ ValueToValueMapTy EmptyMap;
+ Function *Clone = CloneFunction(F, EmptyMap);
+ removeSSACopy(*Clone);
+ return Clone;
+ }
+
/// This function decides whether to specialize function \p F based on the
/// known constant values its arguments can take on. Specialization is
/// performed on the first interesting argument. Specializations based on
@@ -162,9 +343,8 @@ private:
SmallVectorImpl<Function *> &Specializations) {
// Do not specialize the cloned function again.
- if (SpecializedFuncs.contains(F)) {
+ if (SpecializedFuncs.contains(F))
return false;
- }
// If we're optimizing the function for size, we shouldn't specialize it.
if (F->hasOptSize() ||
@@ -176,8 +356,25 @@ private:
if (!Solver.isBlockExecutable(&F->getEntryBlock()))
return false;
+ // It wastes time to specialize a function which would get inlined finally.
+ if (F->hasFnAttribute(Attribute::AlwaysInline))
+ return false;
+
LLVM_DEBUG(dbgs() << "FnSpecialization: Try function: " << F->getName()
<< "\n");
+
+ // Determine if it would be profitable to create a specialization of the
+ // function where the argument takes on the given constant value. If so,
+ // add the constant to Constants.
+ auto FnSpecCost = getSpecializationCost(F);
+ if (!FnSpecCost.isValid()) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: func specialisation cost: ";
+ FnSpecCost.print(dbgs()); dbgs() << "\n");
+
// Determine if we should specialize the function based on the values the
// argument can take on. If specialization is not profitable, we continue
// on to the next argument.
@@ -195,7 +392,7 @@ private:
// be set to false by isArgumentInteresting (that function only adds
// values to the Constants list that are deemed profitable).
SmallVector<Constant *, 4> Constants;
- if (!isArgumentInteresting(&A, Constants, IsPartial)) {
+ if (!isArgumentInteresting(&A, Constants, FnSpecCost, IsPartial)) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Argument is not interesting\n");
continue;
}
@@ -214,8 +411,7 @@ private:
for (auto *C : Constants) {
// Clone the function. We leave the ValueToValueMap empty to allow
// IPSCCP to propagate the constant arguments.
- ValueToValueMapTy EmptyMap;
- Function *Clone = CloneFunction(F, EmptyMap);
+ Function *Clone = cloneCandidateFunction(F);
Argument *ClonedArg = Clone->arg_begin() + A.getArgNo();
// Rewrite calls to the function so that they call the clone instead.
@@ -231,9 +427,10 @@ private:
NbFunctionsSpecialized++;
}
- // TODO: if we want to support specialize specialized functions, and if
- // the function has been completely specialized, the original function is
- // no longer needed, so we would need to mark it unreachable here.
+ // If the function has been completely specialized, the original function
+ // is no longer needed. Mark it unreachable.
+ if (!IsPartial)
+ Solver.markFunctionUnreachable(F);
// FIXME: Only one argument per function.
return true;
@@ -253,7 +450,11 @@ private:
// If the code metrics reveal that we shouldn't duplicate the function, we
// shouldn't specialize it. Set the specialization cost to Invalid.
- if (Metrics.notDuplicatable) {
+ // Or if the lines of codes implies that this function is easy to get
+ // inlined so that we shouldn't specialize it.
+ if (Metrics.notDuplicatable ||
+ (!ForceFunctionSpecialization &&
+ Metrics.NumInsts < SmallFunctionThreshold)) {
InstructionCost C{};
C.setInvalid();
return C;
@@ -379,9 +580,8 @@ private:
/// argument.
bool isArgumentInteresting(Argument *A,
SmallVectorImpl<Constant *> &Constants,
+ const InstructionCost &FnSpecCost,
bool &IsPartial) {
- Function *F = A->getParent();
-
// For now, don't attempt to specialize functions based on the values of
// composite types.
if (!A->getType()->isSingleValueType() || A->user_empty())
@@ -420,18 +620,6 @@ private:
return false;
}
- // Determine if it would be profitable to create a specialization of the
- // function where the argument takes on the given constant value. If so,
- // add the constant to Constants.
- auto FnSpecCost = getSpecializationCost(F);
- if (!FnSpecCost.isValid()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialisation cost.\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: func specialisation cost: ";
- FnSpecCost.print(dbgs()); dbgs() << "\n");
-
for (auto *C : PossibleConstants) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Constant: " << *C << "\n");
if (ForceFunctionSpecialization) {
@@ -475,6 +663,12 @@ private:
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
auto &CS = *cast<CallBase>(U);
+ // If the call site has attribute minsize set, that callsite won't be
+ // specialized.
+ if (CS.hasFnAttr(Attribute::MinSize)) {
+ AllConstant = false;
+ continue;
+ }
// If the parent of the call site will never be executed, we don't need
// to worry about the passed value.
@@ -482,11 +676,25 @@ private:
continue;
auto *V = CS.getArgOperand(A->getArgNo());
+ if (isa<PoisonValue>(V))
+ return false;
+
+ // For now, constant expressions are fine but only if they are function
+ // calls.
+ if (auto *CE = dyn_cast<ConstantExpr>(V))
+ if (!isa<Function>(CE->getOperand(0)))
+ return false;
+
// TrackValueOfGlobalVariable only tracks scalar global variables.
if (auto *GV = dyn_cast<GlobalVariable>(V)) {
- if (!GV->getValueType()->isSingleValueType()) {
+ // Check if we want to specialize on the address of non-constant
+ // global values.
+ if (!GV->isConstant())
+ if (!SpecializeOnAddresses)
+ return false;
+
+ if (!GV->getValueType()->isSingleValueType())
return false;
- }
}
if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
@@ -506,6 +714,9 @@ private:
/// This function modifies calls to function \p F whose argument at index \p
/// ArgNo is equal to constant \p C. The calls are rewritten to call function
/// \p Clone instead.
+ ///
+ /// Callsites that have been marked with the MinSize function attribute won't
+ /// be specialized and rewritten.
void rewriteCallSites(Function *F, Function *Clone, Argument &Arg,
Constant *C) {
unsigned ArgNo = Arg.getArgNo();
@@ -527,24 +738,7 @@ private:
}
}
};
-
-/// Function to clean up the left over intrinsics from SCCP util.
-static void cleanup(Module &M) {
- for (Function &F : M) {
- for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
- Instruction *Inst = &*BI++;
- if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
- if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
- Value *Op = II->getOperand(0);
- Inst->replaceAllUsesWith(Op);
- Inst->eraseFromParent();
- }
- }
- }
- }
- }
-}
+} // namespace
bool llvm::runFunctionSpecialization(
Module &M, const DataLayout &DL,
@@ -597,12 +791,27 @@ bool llvm::runFunctionSpecialization(
Solver.trackValueOfGlobalVariable(&G);
}
+ auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
+ SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
+ TrackedFuncs.end());
+
+ // No tracked functions, so nothing to do: don't run the solver and remove
+ // the ssa_copy intrinsics that may have been introduced.
+ if (TrackedFuncs.empty()) {
+ removeSSACopy(M);
+ return false;
+ }
+
// Solve for constants.
auto RunSCCPSolver = [&](auto &WorkList) {
bool ResolvedUndefs = true;
while (ResolvedUndefs) {
+ // Not running the solver unnecessary is checked in regression test
+ // nothing-to-do.ll, so if this debug message is changed, this regression
+ // test needs updating too.
LLVM_DEBUG(dbgs() << "FnSpecialization: Running solver\n");
+
Solver.solve();
LLVM_DEBUG(dbgs() << "FnSpecialization: Resolving undefs\n");
ResolvedUndefs = false;
@@ -615,15 +824,14 @@ bool llvm::runFunctionSpecialization(
for (BasicBlock &BB : *F) {
if (!Solver.isBlockExecutable(&BB))
continue;
+ // FIXME: The solver may make changes to the function here, so set
+ // Changed, even if later function specialization does not trigger.
for (auto &I : make_early_inc_range(BB))
- FS.tryToReplaceWithConstant(&I);
+ Changed |= FS.tryToReplaceWithConstant(&I);
}
}
};
- auto &TrackedFuncs = Solver.getArgumentTrackedFunctions();
- SmallVector<Function *, 16> FuncDecls(TrackedFuncs.begin(),
- TrackedFuncs.end());
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "FnSpecialization: Worklist fn decls:\n");
for (auto *F : FuncDecls)
@@ -637,14 +845,18 @@ bool llvm::runFunctionSpecialization(
unsigned I = 0;
while (FuncSpecializationMaxIters != I++ &&
FS.specializeFunctions(FuncDecls, CurrentSpecializations)) {
- // TODO: run the solver here for the specialized functions only if we want
- // to specialize recursively.
+
+ // Run the solver for the specialized functions.
+ RunSCCPSolver(CurrentSpecializations);
+
+ // Replace some unresolved constant arguments.
+ constantArgPropagation(FuncDecls, M, Solver);
CurrentSpecializations.clear();
Changed = true;
}
// Clean up the IR by removing ssa_copy intrinsics.
- cleanup(M);
+ removeSSACopy(M);
return Changed;
}
diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index fb4cb23b837e..5e5d2086adc2 100644
--- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -88,7 +88,7 @@ ModulePass *llvm::createGlobalDCEPass() {
static bool isEmptyFunction(Function *F) {
BasicBlock &Entry = F->getEntryBlock();
for (auto &I : Entry) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
if (auto *RI = dyn_cast<ReturnInst>(&I))
return !RI->getReturnValue();
@@ -210,7 +210,7 @@ void GlobalDCEPass::ScanVTableLoad(Function *Caller, Metadata *TypeId,
Constant *Ptr =
getPointerAtOffset(VTable->getInitializer(), VTableOffset + CallOffset,
- *Caller->getParent());
+ *Caller->getParent(), VTable);
if (!Ptr) {
LLVM_DEBUG(dbgs() << "can't find pointer in vtable!\n");
VFESafeVTables.erase(VTable);
@@ -416,6 +416,16 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
// virtual function pointers with null, allowing us to remove the
// function itself.
++NumVFuncs;
+
+ // Detect vfuncs that are referenced as "relative pointers" which are used
+ // in Swift vtables, i.e. entries in the form of:
+ //
+ // i32 trunc (i64 sub (i64 ptrtoint @f, i64 ptrtoint ...)) to i32)
+ //
+ // In this case, replace the whole "sub" expression with constant 0 to
+ // avoid leaving a weird sub(0, symbol) expression behind.
+ replaceRelativePointerUsersWithZero(F);
+
F->replaceNonMetadataUsesWith(ConstantPointerNull::get(F->getType()));
}
EraseUnusedGlobalValue(F);
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 8750eb9ecc4e..b2c2efed7db8 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -208,9 +208,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
// Constants can't be pointers to dynamically allocated memory.
- for (Value::user_iterator UI = GV->user_begin(), E = GV->user_end();
- UI != E;) {
- User *U = *UI++;
+ for (User *U : llvm::make_early_inc_range(GV->users())) {
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
Value *V = SI->getValueOperand();
if (isa<Constant>(V)) {
@@ -703,8 +701,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
!ICmpInst::isSigned(cast<ICmpInst>(U)->getPredicate()) &&
isa<LoadInst>(U->getOperand(0)) &&
isa<ConstantPointerNull>(U->getOperand(1))) {
- assert(isa<GlobalValue>(
- cast<LoadInst>(U->getOperand(0))->getPointerOperand()) &&
+ assert(isa<GlobalValue>(cast<LoadInst>(U->getOperand(0))
+ ->getPointerOperand()
+ ->stripPointerCasts()) &&
"Should be GlobalVariable");
// This and only this kind of non-signed ICmpInst is to be replaced with
// the comparing of the value of the created global init bool later in
@@ -720,22 +719,55 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
/// Return true if all uses of any loads from GV will trap if the loaded value
/// is null. Note that this also permits comparisons of the loaded value
/// against null, as a special case.
-static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
- for (const User *U : GV->users())
- if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
- SmallPtrSet<const PHINode*, 8> PHIs;
- if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+static bool allUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(GV);
+ while (!Worklist.empty()) {
+ const Value *P = Worklist.pop_back_val();
+ for (auto *U : P->users()) {
+ if (auto *LI = dyn_cast<LoadInst>(U)) {
+ SmallPtrSet<const PHINode *, 8> PHIs;
+ if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+ return false;
+ } else if (auto *SI = dyn_cast<StoreInst>(U)) {
+ // Ignore stores to the global.
+ if (SI->getPointerOperand() != P)
+ return false;
+ } else if (auto *CE = dyn_cast<ConstantExpr>(U)) {
+ if (CE->stripPointerCasts() != GV)
+ return false;
+ // Check further the ConstantExpr.
+ Worklist.push_back(CE);
+ } else {
+ // We don't know or understand this user, bail out.
return false;
- } else if (isa<StoreInst>(U)) {
- // Ignore stores to the global.
- } else {
- // We don't know or understand this user, bail out.
- //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
- return false;
+ }
}
+ }
+
return true;
}
+/// Get all the loads/store uses for global variable \p GV.
+static void allUsesOfLoadAndStores(GlobalVariable *GV,
+ SmallVector<Value *, 4> &Uses) {
+ SmallVector<Value *, 4> Worklist;
+ Worklist.push_back(GV);
+ while (!Worklist.empty()) {
+ auto *P = Worklist.pop_back_val();
+ for (auto *U : P->users()) {
+ if (auto *CE = dyn_cast<ConstantExpr>(U)) {
+ Worklist.push_back(CE);
+ continue;
+ }
+
+ assert((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
+ "Expect only load or store instructions");
+ Uses.push_back(U);
+ }
+ }
+}
+
static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
for (auto UI = V->user_begin(), E = V->user_end(); UI != E; ) {
@@ -817,8 +849,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(
bool AllNonStoreUsesGone = true;
// Replace all uses of loads with uses of uses of the stored value.
- for (Value::user_iterator GUI = GV->user_begin(), E = GV->user_end(); GUI != E;){
- User *GlobalUser = *GUI++;
+ for (User *GlobalUser : llvm::make_early_inc_range(GV->users())) {
if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
// If we were able to delete all uses of the loads
@@ -934,9 +965,8 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
}
}
- Constant *RepValue = NewGV;
- if (NewGV->getType() != GV->getValueType())
- RepValue = ConstantExpr::getBitCast(RepValue, GV->getValueType());
+ SmallPtrSet<Constant *, 1> RepValues;
+ RepValues.insert(NewGV);
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
@@ -947,9 +977,11 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
GV->getName()+".init", GV->getThreadLocalMode());
bool InitBoolUsed = false;
- // Loop over all uses of GV, processing them in turn.
- while (!GV->use_empty()) {
- if (StoreInst *SI = dyn_cast<StoreInst>(GV->user_back())) {
+ // Loop over all instruction uses of GV, processing them in turn.
+ SmallVector<Value *, 4> Guses;
+ allUsesOfLoadAndStores(GV, Guses);
+ for (auto *U : Guses) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// The global is initialized when the store to it occurs. If the stored
// value is null value, the global bool is set to false, otherwise true.
new StoreInst(ConstantInt::getBool(
@@ -961,12 +993,14 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
continue;
}
- LoadInst *LI = cast<LoadInst>(GV->user_back());
+ LoadInst *LI = cast<LoadInst>(U);
while (!LI->use_empty()) {
Use &LoadUse = *LI->use_begin();
ICmpInst *ICI = dyn_cast<ICmpInst>(LoadUse.getUser());
if (!ICI) {
- LoadUse = RepValue;
+ auto *CE = ConstantExpr::getBitCast(NewGV, LI->getType());
+ RepValues.insert(CE);
+ LoadUse.set(CE);
continue;
}
@@ -1012,40 +1046,53 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV, DL, TLI);
- if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue, DL, TLI);
+ for (auto *CE : RepValues)
+ ConstantPropUsersOf(CE, DL, TLI);
return NewGV;
}
-/// Scan the use-list of V checking to make sure that there are no complex uses
-/// of V. We permit simple things like dereferencing the pointer, but not
+/// Scan the use-list of GV checking to make sure that there are no complex uses
+/// of GV. We permit simple things like dereferencing the pointer, but not
/// storing through the address, unless it is to the specified global.
static bool
-valueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+valueIsOnlyUsedLocallyOrStoredToOneGlobal(const CallInst *CI,
const GlobalVariable *GV) {
- for (const User *U : V->users()) {
- const Instruction *Inst = cast<Instruction>(U);
+ SmallPtrSet<const Value *, 4> Visited;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(CI);
- if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
- continue; // Fine, ignore.
- }
+ while (!Worklist.empty()) {
+ const Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
- if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
- return false; // Storing the pointer itself... bad.
- continue; // Otherwise, storing through it, or storing into GV... fine.
- }
+ for (const Use &VUse : V->uses()) {
+ const User *U = VUse.getUser();
+ if (isa<LoadInst>(U) || isa<CmpInst>(U))
+ continue; // Fine, ignore.
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
- if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV))
- return false;
- continue;
- }
+ if (auto *SI = dyn_cast<StoreInst>(U)) {
+ if (SI->getValueOperand() == V &&
+ SI->getPointerOperand()->stripPointerCasts() != GV)
+ return false; // Storing the pointer not into GV... bad.
+ continue; // Otherwise, storing through it, or storing into GV... fine.
+ }
- return false;
+ if (auto *BCI = dyn_cast<BitCastInst>(U)) {
+ Worklist.push_back(BCI);
+ continue;
+ }
+
+ if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+ Worklist.push_back(GEPI);
+ continue;
+ }
+
+ return false;
+ }
}
+
return true;
}
@@ -1066,12 +1113,12 @@ static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
// been reached). To do this, we check to see if all uses of the global
// would trap if the global were null: this proves that they must all
// happen after the malloc.
- if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+ if (!allUsesOfLoadedValueWillTrapIfNull(GV))
return false;
// We can't optimize this if the malloc itself is used in a complex way,
// for example, being stored into multiple globals. This allows the
- // malloc to be stored into the specified global, loaded icmp'd.
+ // malloc to be stored into the specified global, loaded, gep, icmp'd.
// These are all things we could transform to using the global for.
if (!valueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV))
return false;
@@ -1112,6 +1159,7 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// value was null.
if (GV->getInitializer()->getType()->isPointerTy() &&
GV->getInitializer()->isNullValue() &&
+ StoredOnceVal->getType()->isPointerTy() &&
!NullPointerIsDefined(
nullptr /* F */,
GV->getInitializer()->getType()->getPointerAddressSpace())) {
@@ -1442,8 +1490,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
append_range(UUsers, U->users());
for (auto *UU : UUsers) {
Instruction *UI = cast<Instruction>(UU);
- Instruction *NewU = U->getAsInstruction();
- NewU->insertBefore(UI);
+ Instruction *NewU = U->getAsInstruction(UI);
UI->replaceUsesOfWith(U, NewU);
}
// We've replaced all the uses, so destroy the constant. (destroyConstant
@@ -1456,6 +1503,7 @@ static void makeAllConstantUsesInstructions(Constant *C) {
/// it if possible. If we make a change, return true.
static bool
processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
auto &DL = GV->getParent()->getDataLayout();
@@ -1554,43 +1602,57 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
if (SRAGlobal(GV, DL))
return true;
}
- if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
+ Value *StoredOnceValue = GS.getStoredOnceValue();
+ if (GS.StoredType == GlobalStatus::StoredOnce && StoredOnceValue) {
+ // Avoid speculating constant expressions that might trap (div/rem).
+ auto *SOVConstant = dyn_cast<Constant>(StoredOnceValue);
+ if (SOVConstant && SOVConstant->canTrap())
+ return Changed;
+
+ Function &StoreFn =
+ const_cast<Function &>(*GS.StoredOnceStore->getFunction());
+ bool CanHaveNonUndefGlobalInitializer =
+ GetTTI(StoreFn).canHaveNonUndefGlobalInitializerInAddressSpace(
+ GV->getType()->getAddressSpace());
// If the initial value for the global was an undef value, and if only
// one other value was stored into it, we can just change the
// initializer to be the stored value, then delete all stores to the
// global. This allows us to mark it constant.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (isa<UndefValue>(GV->getInitializer())) {
- // Change the initial value here.
- GV->setInitializer(SOVConstant);
-
- // Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
-
- if (GV->use_empty()) {
- LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
- << "simplify all users and delete global!\n");
- GV->eraseFromParent();
- ++NumDeleted;
- }
- ++NumSubstitute;
- return true;
+ // This is restricted to address spaces that allow globals to have
+ // initializers. NVPTX, for example, does not support initializers for
+ // shared memory (AS 3).
+ if (SOVConstant && SOVConstant->getType() == GV->getValueType() &&
+ isa<UndefValue>(GV->getInitializer()) &&
+ CanHaveNonUndefGlobalInitializer) {
+ // Change the initial value here.
+ GV->setInitializer(SOVConstant);
+
+ // Clean up any obviously simplifiable users now.
+ CleanupConstantGlobalUsers(GV, GV->getInitializer(), DL, GetTLI);
+
+ if (GV->use_empty()) {
+ LLVM_DEBUG(dbgs() << " *** Substituting initializer allowed us to "
+ << "simplify all users and delete global!\n");
+ GV->eraseFromParent();
+ ++NumDeleted;
}
+ ++NumSubstitute;
+ return true;
+ }
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
- if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL,
- GetTLI))
+ if (optimizeOnceStoredGlobal(GV, StoredOnceValue, GS.Ordering, DL, GetTLI))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
- // boolean.
- if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue)) {
- if (GS.Ordering == AtomicOrdering::NotAtomic) {
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
- ++NumShrunkToBool;
- return true;
- }
+ // boolean. Skip this optimization for AS that doesn't allow an initializer.
+ if (SOVConstant && GS.Ordering == AtomicOrdering::NotAtomic &&
+ (!isa<UndefValue>(GV->getInitializer()) ||
+ CanHaveNonUndefGlobalInitializer)) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+ ++NumShrunkToBool;
+ return true;
}
}
}
@@ -1602,6 +1664,7 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
/// make a change, return true.
static bool
processGlobal(GlobalValue &GV,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree) {
if (GV.getName().startswith("llvm."))
@@ -1634,7 +1697,8 @@ processGlobal(GlobalValue &GV,
if (GVar->isConstant() || !GVar->hasInitializer())
return Changed;
- return processInternalGlobal(GVar, GS, GetTLI, LookupDomTree) || Changed;
+ return processInternalGlobal(GVar, GS, GetTTI, GetTLI, LookupDomTree) ||
+ Changed;
}
/// Walk all of the direct calls of the specified function, changing them to
@@ -1651,7 +1715,7 @@ static AttributeList StripAttr(LLVMContext &C, AttributeList Attrs,
Attribute::AttrKind A) {
unsigned AttrIndex;
if (Attrs.hasAttrSomewhere(A, &AttrIndex))
- return Attrs.removeAttribute(C, AttrIndex, A);
+ return Attrs.removeAttributeAtIndex(C, AttrIndex, A);
return Attrs;
}
@@ -1864,10 +1928,8 @@ static void RemovePreallocated(Function *F) {
Value *AllocaReplacement = ArgAllocas[AllocArgIndex];
if (!AllocaReplacement) {
auto AddressSpace = UseCall->getType()->getPointerAddressSpace();
- auto *ArgType = UseCall
- ->getAttribute(AttributeList::FunctionIndex,
- Attribute::Preallocated)
- .getValueAsType();
+ auto *ArgType =
+ UseCall->getFnAttr(Attribute::Preallocated).getValueAsType();
auto *InsertBefore = PreallocatedSetup->getNextNonDebugInstruction();
Builder.SetInsertPoint(InsertBefore);
auto *Alloca =
@@ -1897,26 +1959,22 @@ OptimizeFunctions(Module &M,
bool Changed = false;
std::vector<Function *> AllCallsCold;
- for (Module::iterator FI = M.begin(), E = M.end(); FI != E;) {
- Function *F = &*FI++;
- if (hasOnlyColdCalls(*F, GetBFI))
- AllCallsCold.push_back(F);
- }
+ for (Function &F : llvm::make_early_inc_range(M))
+ if (hasOnlyColdCalls(F, GetBFI))
+ AllCallsCold.push_back(&F);
// Optimize functions.
- for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
- Function *F = &*FI++;
-
+ for (Function &F : llvm::make_early_inc_range(M)) {
// Don't perform global opt pass on naked functions; we don't want fast
// calling conventions for naked functions.
- if (F->hasFnAttribute(Attribute::Naked))
+ if (F.hasFnAttribute(Attribute::Naked))
continue;
// Functions without names cannot be referenced outside this module.
- if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
- F->setLinkage(GlobalValue::InternalLinkage);
+ if (!F.hasName() && !F.isDeclaration() && !F.hasLocalLinkage())
+ F.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*F, NotDiscardableComdats)) {
+ if (deleteIfDead(F, NotDiscardableComdats)) {
Changed = true;
continue;
}
@@ -1931,17 +1989,17 @@ OptimizeFunctions(Module &M,
// some more complicated logic to break these cycles.
// Removing unreachable blocks might invalidate the dominator so we
// recalculate it.
- if (!F->isDeclaration()) {
- if (removeUnreachableBlocks(*F)) {
- auto &DT = LookupDomTree(*F);
- DT.recalculate(*F);
+ if (!F.isDeclaration()) {
+ if (removeUnreachableBlocks(F)) {
+ auto &DT = LookupDomTree(F);
+ DT.recalculate(F);
Changed = true;
}
}
- Changed |= processGlobal(*F, GetTLI, LookupDomTree);
+ Changed |= processGlobal(F, GetTTI, GetTLI, LookupDomTree);
- if (!F->hasLocalLinkage())
+ if (!F.hasLocalLinkage())
continue;
// If we have an inalloca parameter that we can safely remove the
@@ -1949,56 +2007,55 @@ OptimizeFunctions(Module &M,
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
- if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
- !F->hasAddressTaken() && !hasMustTailCallers(F)) {
- RemoveAttribute(F, Attribute::InAlloca);
+ if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
+ !F.hasAddressTaken() && !hasMustTailCallers(&F)) {
+ RemoveAttribute(&F, Attribute::InAlloca);
Changed = true;
}
// FIXME: handle invokes
// FIXME: handle musttail
- if (F->getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
- if (!F->hasAddressTaken() && !hasMustTailCallers(F) &&
- !hasInvokeCallers(F)) {
- RemovePreallocated(F);
+ if (F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
+ if (!F.hasAddressTaken() && !hasMustTailCallers(&F) &&
+ !hasInvokeCallers(&F)) {
+ RemovePreallocated(&F);
Changed = true;
}
continue;
}
- if (hasChangeableCC(F) && !F->isVarArg() && !F->hasAddressTaken()) {
+ if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
NumInternalFunc++;
- TargetTransformInfo &TTI = GetTTI(*F);
+ TargetTransformInfo &TTI = GetTTI(F);
// Change the calling convention to coldcc if either stress testing is
// enabled or the target would like to use coldcc on functions which are
// cold at all call sites and the callers contain no other non coldcc
// calls.
if (EnableColdCCStressTest ||
- (TTI.useColdCCForColdCall(*F) &&
- isValidCandidateForColdCC(*F, GetBFI, AllCallsCold))) {
- F->setCallingConv(CallingConv::Cold);
- changeCallSitesToColdCC(F);
+ (TTI.useColdCCForColdCall(F) &&
+ isValidCandidateForColdCC(F, GetBFI, AllCallsCold))) {
+ F.setCallingConv(CallingConv::Cold);
+ changeCallSitesToColdCC(&F);
Changed = true;
NumColdCC++;
}
}
- if (hasChangeableCC(F) && !F->isVarArg() &&
- !F->hasAddressTaken()) {
+ if (hasChangeableCC(&F) && !F.isVarArg() && !F.hasAddressTaken()) {
// If this function has a calling convention worth changing, is not a
// varargs function, and is only called directly, promote it to use the
// Fast calling convention.
- F->setCallingConv(CallingConv::Fast);
- ChangeCalleesToFastCall(F);
+ F.setCallingConv(CallingConv::Fast);
+ ChangeCalleesToFastCall(&F);
++NumFastCallFns;
Changed = true;
}
- if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
- !F->hasAddressTaken()) {
+ if (F.getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+ !F.hasAddressTaken()) {
// The function is not used by a trampoline intrinsic, so it is safe
// to remove the 'nest' attribute.
- RemoveAttribute(F, Attribute::Nest);
+ RemoveAttribute(&F, Attribute::Nest);
++NumNestRemoved;
Changed = true;
}
@@ -2008,35 +2065,34 @@ OptimizeFunctions(Module &M,
static bool
OptimizeGlobalVars(Module &M,
+ function_ref<TargetTransformInfo &(Function &)> GetTTI,
function_ref<TargetLibraryInfo &(Function &)> GetTLI,
function_ref<DominatorTree &(Function &)> LookupDomTree,
SmallPtrSetImpl<const Comdat *> &NotDiscardableComdats) {
bool Changed = false;
- for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
- GVI != E; ) {
- GlobalVariable *GV = &*GVI++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// Global variables without names cannot be referenced outside this module.
- if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
- GV->setLinkage(GlobalValue::InternalLinkage);
+ if (!GV.hasName() && !GV.isDeclaration() && !GV.hasLocalLinkage())
+ GV.setLinkage(GlobalValue::InternalLinkage);
// Simplify the initializer.
- if (GV->hasInitializer())
- if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
+ if (GV.hasInitializer())
+ if (auto *C = dyn_cast<Constant>(GV.getInitializer())) {
auto &DL = M.getDataLayout();
// TLI is not used in the case of a Constant, so use default nullptr
// for that optional parameter, since we don't have a Function to
// provide GetTLI anyway.
Constant *New = ConstantFoldConstant(C, DL, /*TLI*/ nullptr);
if (New != C)
- GV->setInitializer(New);
+ GV.setInitializer(New);
}
- if (deleteIfDead(*GV, NotDiscardableComdats)) {
+ if (deleteIfDead(GV, NotDiscardableComdats)) {
Changed = true;
continue;
}
- Changed |= processGlobal(*GV, GetTLI, LookupDomTree);
+ Changed |= processGlobal(GV, GetTTI, GetTLI, LookupDomTree);
}
return Changed;
}
@@ -2425,24 +2481,21 @@ OptimizeGlobalAliases(Module &M,
for (GlobalValue *GV : Used.used())
Used.compilerUsedErase(GV);
- for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E;) {
- GlobalAlias *J = &*I++;
-
+ for (GlobalAlias &J : llvm::make_early_inc_range(M.aliases())) {
// Aliases without names cannot be referenced outside this module.
- if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
- J->setLinkage(GlobalValue::InternalLinkage);
+ if (!J.hasName() && !J.isDeclaration() && !J.hasLocalLinkage())
+ J.setLinkage(GlobalValue::InternalLinkage);
- if (deleteIfDead(*J, NotDiscardableComdats)) {
+ if (deleteIfDead(J, NotDiscardableComdats)) {
Changed = true;
continue;
}
// If the alias can change at link time, nothing can be done - bail out.
- if (J->isInterposable())
+ if (J.isInterposable())
continue;
- Constant *Aliasee = J->getAliasee();
+ Constant *Aliasee = J.getAliasee();
GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts());
// We can't trivially replace the alias with the aliasee if the aliasee is
// non-trivial in some way. We also can't replace the alias with the aliasee
@@ -2455,31 +2508,31 @@ OptimizeGlobalAliases(Module &M,
// Make all users of the alias use the aliasee instead.
bool RenameTarget;
- if (!hasUsesToReplace(*J, Used, RenameTarget))
+ if (!hasUsesToReplace(J, Used, RenameTarget))
continue;
- J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
+ J.replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J.getType()));
++NumAliasesResolved;
Changed = true;
if (RenameTarget) {
// Give the aliasee the name, linkage and other attributes of the alias.
- Target->takeName(&*J);
- Target->setLinkage(J->getLinkage());
- Target->setDSOLocal(J->isDSOLocal());
- Target->setVisibility(J->getVisibility());
- Target->setDLLStorageClass(J->getDLLStorageClass());
+ Target->takeName(&J);
+ Target->setLinkage(J.getLinkage());
+ Target->setDSOLocal(J.isDSOLocal());
+ Target->setVisibility(J.getVisibility());
+ Target->setDLLStorageClass(J.getDLLStorageClass());
- if (Used.usedErase(&*J))
+ if (Used.usedErase(&J))
Used.usedInsert(Target);
- if (Used.compilerUsedErase(&*J))
+ if (Used.compilerUsedErase(&J))
Used.compilerUsedInsert(Target);
- } else if (mayHaveOtherReferences(*J, Used))
+ } else if (mayHaveOtherReferences(J, Used))
continue;
// Delete the alias.
- M.getAliasList().erase(J);
+ M.getAliasList().erase(&J);
++NumAliasesRemoved;
Changed = true;
}
@@ -2526,7 +2579,7 @@ static bool cxxDtorIsEmpty(const Function &Fn) {
return false;
for (auto &I : Fn.getEntryBlock()) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
if (isa<ReturnInst>(I))
return true;
@@ -2552,12 +2605,11 @@ static bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
// and remove them.
bool Changed = false;
- for (auto I = CXAAtExitFn->user_begin(), E = CXAAtExitFn->user_end();
- I != E;) {
+ for (User *U : llvm::make_early_inc_range(CXAAtExitFn->users())) {
// We're only interested in calls. Theoretically, we could handle invoke
// instructions as well, but neither llvm-gcc nor clang generate invokes
// to __cxa_atexit.
- CallInst *CI = dyn_cast<CallInst>(*I++);
+ CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
@@ -2614,8 +2666,8 @@ static bool optimizeGlobalsInModule(
});
// Optimize non-address-taken globals.
- LocalChange |=
- OptimizeGlobalVars(M, GetTLI, LookupDomTree, NotDiscardableComdats);
+ LocalChange |= OptimizeGlobalVars(M, GetTTI, GetTLI, LookupDomTree,
+ NotDiscardableComdats);
// Resolve aliases, when possible.
LocalChange |= OptimizeGlobalAliases(M, NotDiscardableComdats);
diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index 365b269dc3bf..e7d698c42fcf 100644
--- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -154,11 +154,8 @@ static bool splitGlobals(Module &M) {
return false;
bool Changed = false;
- for (auto I = M.global_begin(); I != M.global_end();) {
- GlobalVariable &GV = *I;
- ++I;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals()))
Changed |= splitGlobal(GV);
- }
return Changed;
}
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index adf9ffba5780..b8a314c54f18 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
@@ -33,6 +34,10 @@
using namespace llvm;
using namespace IRSimilarity;
+// A command flag to be used for debugging to exclude branches from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableBranches;
+
// Set to true if the user wants the ir outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
// functions. Since the outliner is confined to a single module (modulo LTO),
@@ -71,8 +76,12 @@ struct OutlinableGroup {
/// for extraction.
bool IgnoreGroup = false;
- /// The return block for the overall function.
- BasicBlock *EndBB = nullptr;
+ /// The return blocks for the overall function.
+ DenseMap<Value *, BasicBlock *> EndBBs;
+
+ /// The PHIBlocks with their corresponding return block based on the return
+ /// value as the key.
+ DenseMap<Value *, BasicBlock *> PHIBlocks;
/// A set containing the different GVN store sets needed. Each array contains
/// a sorted list of the different values that need to be stored into output
@@ -87,6 +96,14 @@ struct OutlinableGroup {
/// index in ArgumentTypes is an output argument.
unsigned NumAggregateInputs = 0;
+ /// The mapping of the canonical numbering of the values in outlined sections
+ /// to specific arguments.
+ DenseMap<unsigned, unsigned> CanonicalNumberToAggArg;
+
+ /// The number of branches in the region target a basic block that is outside
+ /// of the region.
+ unsigned BranchesToOutside = 0;
+
/// The number of instructions that will be outlined by extracting \ref
/// Regions.
InstructionCost Benefit = 0;
@@ -118,20 +135,67 @@ struct OutlinableGroup {
/// \param SourceBB - the BasicBlock to pull Instructions from.
/// \param TargetBB - the BasicBlock to put Instruction into.
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
- BasicBlock::iterator BBCurr, BBEnd, BBNext;
- for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
- BBCurr = BBNext) {
- BBNext = std::next(BBCurr);
- BBCurr->moveBefore(TargetBB, TargetBB.end());
- }
+ for (Instruction &I : llvm::make_early_inc_range(SourceBB))
+ I.moveBefore(TargetBB, TargetBB.end());
+}
+
+/// A function to sort the keys of \p Map, which must be a mapping of constant
+/// values to basic blocks and return it in \p SortedKeys
+///
+/// \param SortedKeys - The vector the keys will be return in and sorted.
+/// \param Map - The DenseMap containing keys to sort.
+static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
+ DenseMap<Value *, BasicBlock *> &Map) {
+ for (auto &VtoBB : Map)
+ SortedKeys.push_back(VtoBB.first);
+
+ stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
+ const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
+ const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+ assert(RHSC && "Not a constant integer in return value?");
+ assert(LHSC && "Not a constant integer in return value?");
+
+ return LHSC->getLimitedValue() < RHSC->getLimitedValue();
+ });
+}
+
+Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
+ Value *V) {
+ Optional<unsigned> GVN = Candidate->getGVN(V);
+ assert(GVN.hasValue() && "No GVN for incoming value");
+ Optional<unsigned> CanonNum = Candidate->getCanonicalNum(*GVN);
+ Optional<unsigned> FirstGVN = Other.Candidate->fromCanonicalNum(*CanonNum);
+ Optional<Value *> FoundValueOpt = Other.Candidate->fromGVN(*FirstGVN);
+ return FoundValueOpt.getValueOr(nullptr);
}
void OutlinableRegion::splitCandidate() {
assert(!CandidateSplit && "Candidate already split!");
+ Instruction *BackInst = Candidate->backInstruction();
+
+ Instruction *EndInst = nullptr;
+ // Check whether the last instruction is a terminator, if it is, we do
+ // not split on the following instruction. We leave the block as it is. We
+ // also check that this is not the last instruction in the Module, otherwise
+ // the check for whether the current following instruction matches the
+ // previously recorded instruction will be incorrect.
+ if (!BackInst->isTerminator() ||
+ BackInst->getParent() != &BackInst->getFunction()->back()) {
+ EndInst = Candidate->end()->Inst;
+ assert(EndInst && "Expected an end instruction?");
+ }
+
+ // We check if the current instruction following the last instruction in the
+ // region is the same as the recorded instruction following the last
+ // instruction. If they do not match, there could be problems in rewriting
+ // the program after outlining, so we ignore it.
+ if (!BackInst->isTerminator() &&
+ EndInst != BackInst->getNextNonDebugInstruction())
+ return;
+
Instruction *StartInst = (*Candidate->begin()).Inst;
- Instruction *EndInst = (*Candidate->end()).Inst;
- assert(StartInst && EndInst && "Expected a start and end instruction?");
+ assert(StartInst && "Expected a start instruction?");
StartBB = StartInst->getParent();
PrevBB = StartBB;
@@ -153,13 +217,20 @@ void OutlinableRegion::splitCandidate() {
std::string OriginalName = PrevBB->getName().str();
StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
-
- // This is the case for the inner block since we do not have to include
- // multiple blocks.
- EndBB = StartBB;
- FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+ PrevBB->replaceSuccessorsPhiUsesWith(PrevBB, StartBB);
CandidateSplit = true;
+ if (!BackInst->isTerminator()) {
+ EndBB = EndInst->getParent();
+ FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
+ EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
+ FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
+ return;
+ }
+
+ EndBB = BackInst->getParent();
+ EndsInBranch = true;
+ FollowBB = nullptr;
}
void OutlinableRegion::reattachCandidate() {
@@ -180,7 +251,6 @@ void OutlinableRegion::reattachCandidate() {
// inst3
// inst4
assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
- assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
// StartBB should only have one predecessor since we put an unconditional
// branch at the end of PrevBB when we split the BasicBlock.
@@ -189,21 +259,24 @@ void OutlinableRegion::reattachCandidate() {
"No Predecessor for the region start basic block!");
assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
- assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
PrevBB->getTerminator()->eraseFromParent();
- EndBB->getTerminator()->eraseFromParent();
moveBBContents(*StartBB, *PrevBB);
BasicBlock *PlacementBB = PrevBB;
if (StartBB != EndBB)
PlacementBB = EndBB;
- moveBBContents(*FollowBB, *PlacementBB);
+ if (!EndsInBranch && PlacementBB->getUniqueSuccessor() != nullptr) {
+ assert(FollowBB != nullptr && "FollowBB for Candidate is not defined!");
+ assert(PlacementBB->getTerminator() && "Terminator removed from EndBB!");
+ PlacementBB->getTerminator()->eraseFromParent();
+ moveBBContents(*FollowBB, *PlacementBB);
+ PlacementBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
+ FollowBB->eraseFromParent();
+ }
PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
- PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
StartBB->eraseFromParent();
- FollowBB->eraseFromParent();
// Make sure to save changes back to the StartBB.
StartBB = PrevBB;
@@ -261,8 +334,9 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
// division instruction for targets that have a native division instruction.
// To be overly conservative, we only add 1 to the number of instructions for
// each division instruction.
- for (Instruction &I : *StartBB) {
- switch (I.getOpcode()) {
+ for (IRInstructionData &ID : *Candidate) {
+ Instruction *I = ID.Inst;
+ switch (I->getOpcode()) {
case Instruction::FDiv:
case Instruction::FRem:
case Instruction::SDiv:
@@ -272,7 +346,7 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
Benefit += 1;
break;
default:
- Benefit += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+ Benefit += TTI.getInstructionCost(I, TargetTransformInfo::TCK_CodeSize);
break;
}
}
@@ -373,8 +447,24 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
unsigned FunctionNameSuffix) {
assert(!Group.OutlinedFunction && "Function is already defined!");
+ Type *RetTy = Type::getVoidTy(M.getContext());
+ // All extracted functions _should_ have the same return type at this point
+ // since the similarity identifier ensures that all branches outside of the
+ // region occur in the same place.
+
+ // NOTE: Should we ever move to the model that uses a switch at every point
+ // needed, meaning that we could branch within the region or out, it is
+ // possible that we will need to switch to using the most general case all of
+ // the time.
+ for (OutlinableRegion *R : Group.Regions) {
+ Type *ExtractedFuncType = R->ExtractedFunction->getReturnType();
+ if ((RetTy->isVoidTy() && !ExtractedFuncType->isVoidTy()) ||
+ (RetTy->isIntegerTy(1) && ExtractedFuncType->isIntegerTy(16)))
+ RetTy = ExtractedFuncType;
+ }
+
Group.OutlinedFunctionType = FunctionType::get(
- Type::getVoidTy(M.getContext()), Group.ArgumentTypes, false);
+ RetTy, Group.ArgumentTypes, false);
// These functions will only be called from within the same module, so
// we can set an internal linkage.
@@ -430,21 +520,23 @@ Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
///
/// \param [in] Old - The function to move the basic blocks from.
/// \param [in] New - The function to move the basic blocks to.
-/// \returns the first return block for the function in New.
-static BasicBlock *moveFunctionData(Function &Old, Function &New) {
- Function::iterator CurrBB, NextBB, FinalBB;
- BasicBlock *NewEnd = nullptr;
- std::vector<Instruction *> DebugInsts;
- for (CurrBB = Old.begin(), FinalBB = Old.end(); CurrBB != FinalBB;
- CurrBB = NextBB) {
- NextBB = std::next(CurrBB);
- CurrBB->removeFromParent();
- CurrBB->insertInto(&New);
- Instruction *I = CurrBB->getTerminator();
- if (isa<ReturnInst>(I))
- NewEnd = &(*CurrBB);
-
- for (Instruction &Val : *CurrBB) {
+/// \param [out] NewEnds - The return blocks of the new overall function.
+static void moveFunctionData(Function &Old, Function &New,
+ DenseMap<Value *, BasicBlock *> &NewEnds) {
+ for (BasicBlock &CurrBB : llvm::make_early_inc_range(Old)) {
+ CurrBB.removeFromParent();
+ CurrBB.insertInto(&New);
+ Instruction *I = CurrBB.getTerminator();
+
+ // For each block we find a return instruction is, it is a potential exit
+ // path for the function. We keep track of each block based on the return
+ // value here.
+ if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+ NewEnds.insert(std::make_pair(RI->getReturnValue(), &CurrBB));
+
+ std::vector<Instruction *> DebugInsts;
+
+ for (Instruction &Val : CurrBB) {
// We must handle the scoping of called functions differently than
// other outlined instructions.
if (!isa<CallInst>(&Val)) {
@@ -476,8 +568,7 @@ static BasicBlock *moveFunctionData(Function &Old, Function &New) {
I->eraseFromParent();
}
- assert(NewEnd && "No return instruction for new function?");
- return NewEnd;
+ assert(NewEnds.size() > 0 && "No return instruction for new function?");
}
/// Find the the constants that will need to be lifted into arguments
@@ -664,11 +755,22 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// function to account for the extracted constants, we have two different
// counters as we find extracted arguments, and as we come across overall
// arguments.
+
+ // Additionally, in our first pass, for the first extracted function,
+ // we find argument locations for the canonical value numbering. This
+ // numbering overrides any discovered location for the extracted code.
for (unsigned InputVal : InputGVNs) {
+ Optional<unsigned> CanonicalNumberOpt = C.getCanonicalNum(InputVal);
+ assert(CanonicalNumberOpt.hasValue() && "Canonical number not found?");
+ unsigned CanonicalNumber = CanonicalNumberOpt.getValue();
+
Optional<Value *> InputOpt = C.fromGVN(InputVal);
assert(InputOpt.hasValue() && "Global value number not found?");
Value *Input = InputOpt.getValue();
+ DenseMap<unsigned, unsigned>::iterator AggArgIt =
+ Group.CanonicalNumberToAggArg.find(CanonicalNumber);
+
if (!Group.InputTypesSet) {
Group.ArgumentTypes.push_back(Input->getType());
// If the input value has a swifterr attribute, make sure to mark the
@@ -684,17 +786,34 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
// Check if we have a constant. If we do add it to the overall argument
// number to Constant map for the region, and continue to the next input.
if (Constant *CST = dyn_cast<Constant>(Input)) {
- Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ if (AggArgIt != Group.CanonicalNumberToAggArg.end())
+ Region.AggArgToConstant.insert(std::make_pair(AggArgIt->second, CST));
+ else {
+ Group.CanonicalNumberToAggArg.insert(
+ std::make_pair(CanonicalNumber, TypeIndex));
+ Region.AggArgToConstant.insert(std::make_pair(TypeIndex, CST));
+ }
TypeIndex++;
continue;
}
// It is not a constant, we create the mapping from extracted argument list
- // to the overall argument list.
+ // to the overall argument list, using the canonical location, if it exists.
assert(ArgInputs.count(Input) && "Input cannot be found!");
- Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
- Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ if (AggArgIt != Group.CanonicalNumberToAggArg.end()) {
+ if (OriginalIndex != AggArgIt->second)
+ Region.ChangedArgOrder = true;
+ Region.ExtractedArgToAgg.insert(
+ std::make_pair(OriginalIndex, AggArgIt->second));
+ Region.AggArgToExtracted.insert(
+ std::make_pair(AggArgIt->second, OriginalIndex));
+ } else {
+ Group.CanonicalNumberToAggArg.insert(
+ std::make_pair(CanonicalNumber, TypeIndex));
+ Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, TypeIndex));
+ Region.AggArgToExtracted.insert(std::make_pair(TypeIndex, OriginalIndex));
+ }
OriginalIndex++;
TypeIndex++;
}
@@ -718,10 +837,41 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
/// \param [in] Outputs - The values found by the code extractor.
static void
findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
- ArrayRef<Value *> Outputs) {
+ SetVector<Value *> &Outputs) {
OutlinableGroup &Group = *Region.Parent;
IRSimilarityCandidate &C = *Region.Candidate;
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ C.getBasicBlocks(BBSet, BE);
+
+ // Find the exits to the region.
+ SmallPtrSet<BasicBlock *, 1> Exits;
+ for (BasicBlock *Block : BE)
+ for (BasicBlock *Succ : successors(Block))
+ if (!BBSet.contains(Succ))
+ Exits.insert(Succ);
+
+ // After determining which blocks exit to PHINodes, we add these PHINodes to
+ // the set of outputs to be processed. We also check the incoming values of
+ // the PHINodes for whether they should no longer be considered outputs.
+ for (BasicBlock *ExitBB : Exits) {
+ for (PHINode &PN : ExitBB->phis()) {
+ // Find all incoming values from the outlining region.
+ SmallVector<unsigned, 2> IncomingVals;
+ for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
+ if (BBSet.contains(PN.getIncomingBlock(Idx)))
+ IncomingVals.push_back(Idx);
+
+ // Do not process PHI if there is one (or fewer) predecessor from region.
+ if (IncomingVals.size() <= 1)
+ continue;
+
+ Region.IgnoreRegion = true;
+ return;
+ }
+ }
+
// This counts the argument number in the extracted function.
unsigned OriginalIndex = Region.NumExtractedInputs;
@@ -797,7 +947,7 @@ void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
// Map the outputs found by the CodeExtractor to the arguments found for
// the overall function.
- findExtractedOutputToOverallOutputMapping(Region, Outputs.getArrayRef());
+ findExtractedOutputToOverallOutputMapping(Region, Outputs);
}
/// Replace the extracted function in the Region with a call to the overall
@@ -820,9 +970,10 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
assert(AggFunc && "Function to replace with is nullptr?");
// If the arguments are the same size, there are not values that need to be
- // made argument, or different output registers to handle. We can simply
- // replace the called function in this case.
- if (AggFunc->arg_size() == Call->arg_size()) {
+ // made into an argument, the argument ordering has not been change, or
+ // different output registers to handle. We can simply replace the called
+ // function in this case.
+ if (!Region.ChangedArgOrder && AggFunc->arg_size() == Call->arg_size()) {
LLVM_DEBUG(dbgs() << "Replace call to " << *Call << " with call to "
<< *AggFunc << " with same number of arguments\n");
Call->setCalledFunction(AggFunc);
@@ -895,6 +1046,9 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// Transfer any debug information.
Call->setDebugLoc(Region.Call->getDebugLoc());
+ // Since our output may determine which branch we go to, we make sure to
+ // propogate this new call value through the module.
+ OldCall->replaceAllUsesWith(Call);
// Remove the old instruction.
OldCall->eraseFromParent();
@@ -913,13 +1067,23 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
// region with the arguments of the function for an OutlinableGroup.
//
/// \param [in] Region - The region of extracted code to be changed.
-/// \param [in,out] OutputBB - The BasicBlock for the output stores for this
+/// \param [in,out] OutputBBs - The BasicBlock for the output stores for this
/// region.
-static void replaceArgumentUses(OutlinableRegion &Region,
- BasicBlock *OutputBB) {
+/// \param [in] FirstFunction - A flag to indicate whether we are using this
+/// function to define the overall outlined function for all the regions, or
+/// if we are operating on one of the following regions.
+static void
+replaceArgumentUses(OutlinableRegion &Region,
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ bool FirstFunction = false) {
OutlinableGroup &Group = *Region.Parent;
assert(Region.ExtractedFunction && "Region has no extracted function?");
+ Function *DominatingFunction = Region.ExtractedFunction;
+ if (FirstFunction)
+ DominatingFunction = Group.OutlinedFunction;
+ DominatorTree DT(*DominatingFunction);
+
for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
ArgIdx++) {
assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
@@ -946,11 +1110,53 @@ static void replaceArgumentUses(OutlinableRegion &Region,
assert(InstAsUser && "User is nullptr!");
Instruction *I = cast<Instruction>(InstAsUser);
- I->setDebugLoc(DebugLoc());
- LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
- << *OutputBB << "\n");
+ BasicBlock *BB = I->getParent();
+ SmallVector<BasicBlock *, 4> Descendants;
+ DT.getDescendants(BB, Descendants);
+ bool EdgeAdded = false;
+ if (Descendants.size() == 0) {
+ EdgeAdded = true;
+ DT.insertEdge(&DominatingFunction->getEntryBlock(), BB);
+ DT.getDescendants(BB, Descendants);
+ }
+
+ // Iterate over the following blocks, looking for return instructions,
+ // if we find one, find the corresponding output block for the return value
+ // and move our store instruction there.
+ for (BasicBlock *DescendBB : Descendants) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(DescendBB->getTerminator());
+ if (!RI)
+ continue;
+ Value *RetVal = RI->getReturnValue();
+ auto VBBIt = OutputBBs.find(RetVal);
+ assert(VBBIt != OutputBBs.end() && "Could not find output value!");
+
+ // If this is storing a PHINode, we must make sure it is included in the
+ // overall function.
+ StoreInst *SI = cast<StoreInst>(I);
+
+ Value *ValueOperand = SI->getValueOperand();
+
+ StoreInst *NewI = cast<StoreInst>(I->clone());
+ NewI->setDebugLoc(DebugLoc());
+ BasicBlock *OutputBB = VBBIt->second;
+ OutputBB->getInstList().push_back(NewI);
+ LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
+ << *OutputBB << "\n");
- I->moveBefore(*OutputBB, OutputBB->end());
+ if (FirstFunction)
+ continue;
+ Value *CorrVal =
+ Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
+ assert(CorrVal && "Value is nullptr?");
+ NewI->setOperand(0, CorrVal);
+ }
+
+ // If we added an edge for basic blocks without a predecessor, we remove it
+ // here.
+ if (EdgeAdded)
+ DT.deleteEdge(&DominatingFunction->getEntryBlock(), BB);
+ I->eraseFromParent();
LLVM_DEBUG(dbgs() << "Replacing uses of output " << *Arg << " in function "
<< *Region.ExtractedFunction << " with " << *AggArg
@@ -990,69 +1196,53 @@ void replaceConstants(OutlinableRegion &Region) {
}
}
-/// For the given function, find all the nondebug or lifetime instructions,
-/// and return them as a vector. Exclude any blocks in \p ExludeBlocks.
-///
-/// \param [in] F - The function we collect the instructions from.
-/// \param [in] ExcludeBlocks - BasicBlocks to ignore.
-/// \returns the list of instructions extracted.
-static std::vector<Instruction *>
-collectRelevantInstructions(Function &F,
- DenseSet<BasicBlock *> &ExcludeBlocks) {
- std::vector<Instruction *> RelevantInstructions;
-
- for (BasicBlock &BB : F) {
- if (ExcludeBlocks.contains(&BB))
- continue;
-
- for (Instruction &Inst : BB) {
- if (Inst.isLifetimeStartOrEnd())
- continue;
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
-
- RelevantInstructions.push_back(&Inst);
- }
- }
-
- return RelevantInstructions;
-}
-
/// It is possible that there is a basic block that already performs the same
/// stores. This returns a duplicate block, if it exists
///
-/// \param OutputBB [in] the block we are looking for a duplicate of.
+/// \param OutputBBs [in] the blocks we are looking for a duplicate of.
/// \param OutputStoreBBs [in] The existing output blocks.
/// \returns an optional value with the number output block if there is a match.
-Optional<unsigned>
-findDuplicateOutputBlock(BasicBlock *OutputBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
+Optional<unsigned> findDuplicateOutputBlock(
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
- bool WrongInst = false;
- bool WrongSize = false;
+ bool Mismatch = false;
unsigned MatchingNum = 0;
- for (BasicBlock *CompBB : OutputStoreBBs) {
- WrongInst = false;
- if (CompBB->size() - 1 != OutputBB->size()) {
- WrongSize = true;
- MatchingNum++;
- continue;
- }
-
- WrongSize = false;
- BasicBlock::iterator NIt = OutputBB->begin();
- for (Instruction &I : *CompBB) {
- if (isa<BranchInst>(&I))
- continue;
+ // We compare the new set output blocks to the other sets of output blocks.
+ // If they are the same number, and have identical instructions, they are
+ // considered to be the same.
+ for (DenseMap<Value *, BasicBlock *> &CompBBs : OutputStoreBBs) {
+ Mismatch = false;
+ for (std::pair<Value *, BasicBlock *> &VToB : CompBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator OutputBBIt =
+ OutputBBs.find(VToB.first);
+ if (OutputBBIt == OutputBBs.end()) {
+ Mismatch = true;
+ break;
+ }
- if (!I.isIdenticalTo(&(*NIt))) {
- WrongInst = true;
+ BasicBlock *CompBB = VToB.second;
+ BasicBlock *OutputBB = OutputBBIt->second;
+ if (CompBB->size() - 1 != OutputBB->size()) {
+ Mismatch = true;
break;
}
- NIt++;
+ BasicBlock::iterator NIt = OutputBB->begin();
+ for (Instruction &I : *CompBB) {
+ if (isa<BranchInst>(&I))
+ continue;
+
+ if (!I.isIdenticalTo(&(*NIt))) {
+ Mismatch = true;
+ break;
+ }
+
+ NIt++;
+ }
}
- if (!WrongInst && !WrongSize)
+
+ if (!Mismatch)
return MatchingNum;
MatchingNum++;
@@ -1061,95 +1251,130 @@ findDuplicateOutputBlock(BasicBlock *OutputBB,
return None;
}
+/// Remove empty output blocks from the outlined region.
+///
+/// \param BlocksToPrune - Mapping of return values output blocks for the \p
+/// Region.
+/// \param Region - The OutlinableRegion we are analyzing.
+static bool
+analyzeAndPruneOutputBlocks(DenseMap<Value *, BasicBlock *> &BlocksToPrune,
+ OutlinableRegion &Region) {
+ bool AllRemoved = true;
+ Value *RetValueForBB;
+ BasicBlock *NewBB;
+ SmallVector<Value *, 4> ToRemove;
+ // Iterate over the output blocks created in the outlined section.
+ for (std::pair<Value *, BasicBlock *> &VtoBB : BlocksToPrune) {
+ RetValueForBB = VtoBB.first;
+ NewBB = VtoBB.second;
+
+ // If there are no instructions, we remove it from the module, and also
+ // mark the value for removal from the return value to output block mapping.
+ if (NewBB->size() == 0) {
+ NewBB->eraseFromParent();
+ ToRemove.push_back(RetValueForBB);
+ continue;
+ }
+
+ // Mark that we could not remove all the blocks since they were not all
+ // empty.
+ AllRemoved = false;
+ }
+
+ // Remove the return value from the mapping.
+ for (Value *V : ToRemove)
+ BlocksToPrune.erase(V);
+
+ // Mark the region as having the no output scheme.
+ if (AllRemoved)
+ Region.OutputBlockNum = -1;
+
+ return AllRemoved;
+}
+
/// For the outlined section, move needed the StoreInsts for the output
/// registers into their own block. Then, determine if there is a duplicate
/// output block already created.
///
/// \param [in] OG - The OutlinableGroup of regions to be outlined.
/// \param [in] Region - The OutlinableRegion that is being analyzed.
-/// \param [in,out] OutputBB - the block that stores for this region will be
+/// \param [in,out] OutputBBs - the blocks that stores for this region will be
/// placed in.
-/// \param [in] EndBB - the final block of the extracted function.
+/// \param [in] EndBBs - the final blocks of the extracted function.
/// \param [in] OutputMappings - OutputMappings the mapping of values that have
/// been replaced by a new output value.
/// \param [in,out] OutputStoreBBs - The existing output blocks.
-static void
-alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
- BasicBlock *OutputBB, BasicBlock *EndBB,
- const DenseMap<Value *, Value *> &OutputMappings,
- std::vector<BasicBlock *> &OutputStoreBBs) {
- DenseSet<unsigned> ValuesToFind(Region.GVNStores.begin(),
- Region.GVNStores.end());
-
- // We iterate over the instructions in the extracted function, and find the
- // global value number of the instructions. If we find a value that should
- // be contained in a store, we replace the uses of the value with the value
- // from the overall function, so that the store is storing the correct
- // value from the overall function.
- DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
- OutputStoreBBs.end());
- ExcludeBBs.insert(OutputBB);
- std::vector<Instruction *> ExtractedFunctionInsts =
- collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
- std::vector<Instruction *> OverallFunctionInsts =
- collectRelevantInstructions(*OG.OutlinedFunction, ExcludeBBs);
-
- assert(ExtractedFunctionInsts.size() == OverallFunctionInsts.size() &&
- "Number of relevant instructions not equal!");
-
- unsigned NumInstructions = ExtractedFunctionInsts.size();
- for (unsigned Idx = 0; Idx < NumInstructions; Idx++) {
- Value *V = ExtractedFunctionInsts[Idx];
-
- if (OutputMappings.find(V) != OutputMappings.end())
- V = OutputMappings.find(V)->second;
- Optional<unsigned> GVN = Region.Candidate->getGVN(V);
-
- // If we have found one of the stored values for output, replace the value
- // with the corresponding one from the overall function.
- if (GVN.hasValue() && ValuesToFind.erase(GVN.getValue())) {
- V->replaceAllUsesWith(OverallFunctionInsts[Idx]);
- if (ValuesToFind.size() == 0)
- break;
- }
-
- if (ValuesToFind.size() == 0)
- break;
- }
-
- assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
-
- // If the size of the block is 0, then there are no stores, and we do not
- // need to save this block.
- if (OutputBB->size() == 0) {
- Region.OutputBlockNum = -1;
- OutputBB->eraseFromParent();
+static void alignOutputBlockWithAggFunc(
+ OutlinableGroup &OG, OutlinableRegion &Region,
+ DenseMap<Value *, BasicBlock *> &OutputBBs,
+ DenseMap<Value *, BasicBlock *> &EndBBs,
+ const DenseMap<Value *, Value *> &OutputMappings,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
+ // If none of the output blocks have any instructions, this means that we do
+ // not have to determine if it matches any of the other output schemes, and we
+ // don't have to do anything else.
+ if (analyzeAndPruneOutputBlocks(OutputBBs, Region))
return;
- }
- // Determine is there is a duplicate block.
+ // Determine is there is a duplicate set of blocks.
Optional<unsigned> MatchingBB =
- findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
+ findDuplicateOutputBlock(OutputBBs, OutputStoreBBs);
- // If there is, we remove the new output block. If it does not,
- // we add it to our list of output blocks.
+ // If there is, we remove the new output blocks. If it does not,
+ // we add it to our list of sets of output blocks.
if (MatchingBB.hasValue()) {
LLVM_DEBUG(dbgs() << "Set output block for region in function"
<< Region.ExtractedFunction << " to "
<< MatchingBB.getValue());
Region.OutputBlockNum = MatchingBB.getValue();
- OutputBB->eraseFromParent();
+ for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs)
+ VtoBB.second->eraseFromParent();
return;
}
Region.OutputBlockNum = OutputStoreBBs.size();
- LLVM_DEBUG(dbgs() << "Create output block for region in"
- << Region.ExtractedFunction << " to "
- << *OutputBB);
- OutputStoreBBs.push_back(OutputBB);
- BranchInst::Create(EndBB, OutputBB);
+ Value *RetValueForBB;
+ BasicBlock *NewBB;
+ OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
+ for (std::pair<Value *, BasicBlock *> &VtoBB : OutputBBs) {
+ RetValueForBB = VtoBB.first;
+ NewBB = VtoBB.second;
+ DenseMap<Value *, BasicBlock *>::iterator VBBIt =
+ EndBBs.find(RetValueForBB);
+ LLVM_DEBUG(dbgs() << "Create output block for region in"
+ << Region.ExtractedFunction << " to "
+ << *NewBB);
+ BranchInst::Create(VBBIt->second, NewBB);
+ OutputStoreBBs.back().insert(std::make_pair(RetValueForBB, NewBB));
+ }
+}
+
+/// Takes in a mapping, \p OldMap of ConstantValues to BasicBlocks, sorts keys,
+/// before creating a basic block for each \p NewMap, and inserting into the new
+/// block. Each BasicBlock is named with the scheme "<basename>_<key_idx>".
+///
+/// \param OldMap [in] - The mapping to base the new mapping off of.
+/// \param NewMap [out] - The output mapping using the keys of \p OldMap.
+/// \param ParentFunc [in] - The function to put the new basic block in.
+/// \param BaseName [in] - The start of the BasicBlock names to be appended to
+/// by an index value.
+static void createAndInsertBasicBlocks(DenseMap<Value *, BasicBlock *> &OldMap,
+ DenseMap<Value *, BasicBlock *> &NewMap,
+ Function *ParentFunc, Twine BaseName) {
+ unsigned Idx = 0;
+ std::vector<Value *> SortedKeys;
+
+ getSortedConstantKeys(SortedKeys, OldMap);
+
+ for (Value *RetVal : SortedKeys) {
+ BasicBlock *NewBB = BasicBlock::Create(
+ ParentFunc->getContext(),
+ Twine(BaseName) + Twine("_") + Twine(static_cast<unsigned>(Idx++)),
+ ParentFunc);
+ NewMap.insert(std::make_pair(RetVal, NewBB));
+ }
}
/// Create the switch statement for outlined function to differentiate between
@@ -1159,50 +1384,74 @@ alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
/// matches the needed stores for the extracted section.
/// \param [in] M - The module we are outlining from.
/// \param [in] OG - The group of regions to be outlined.
-/// \param [in] EndBB - The final block of the extracted function.
+/// \param [in] EndBBs - The final blocks of the extracted function.
/// \param [in,out] OutputStoreBBs - The existing output blocks.
-void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
- ArrayRef<BasicBlock *> OutputStoreBBs) {
+void createSwitchStatement(
+ Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
// We only need the switch statement if there is more than one store
// combination.
if (OG.OutputGVNCombinations.size() > 1) {
Function *AggFunc = OG.OutlinedFunction;
- // Create a final block
- BasicBlock *ReturnBlock =
- BasicBlock::Create(M.getContext(), "final_block", AggFunc);
- Instruction *Term = EndBB->getTerminator();
- Term->moveBefore(*ReturnBlock, ReturnBlock->end());
- // Put the switch statement in the old end basic block for the function with
- // a fall through to the new return block
- LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
- << OutputStoreBBs.size() << "\n");
- SwitchInst *SwitchI =
- SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
- ReturnBlock, OutputStoreBBs.size(), EndBB);
-
- unsigned Idx = 0;
- for (BasicBlock *BB : OutputStoreBBs) {
- SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
- BB);
- Term = BB->getTerminator();
- Term->setSuccessor(0, ReturnBlock);
- Idx++;
+ // Create a final block for each different return block.
+ DenseMap<Value *, BasicBlock *> ReturnBBs;
+ createAndInsertBasicBlocks(OG.EndBBs, ReturnBBs, AggFunc, "final_block");
+
+ for (std::pair<Value *, BasicBlock *> &RetBlockPair : ReturnBBs) {
+ std::pair<Value *, BasicBlock *> &OutputBlock =
+ *OG.EndBBs.find(RetBlockPair.first);
+ BasicBlock *ReturnBlock = RetBlockPair.second;
+ BasicBlock *EndBB = OutputBlock.second;
+ Instruction *Term = EndBB->getTerminator();
+ // Move the return value to the final block instead of the original exit
+ // stub.
+ Term->moveBefore(*ReturnBlock, ReturnBlock->end());
+ // Put the switch statement in the old end basic block for the function
+ // with a fall through to the new return block.
+ LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
+ << OutputStoreBBs.size() << "\n");
+ SwitchInst *SwitchI =
+ SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
+ ReturnBlock, OutputStoreBBs.size(), EndBB);
+
+ unsigned Idx = 0;
+ for (DenseMap<Value *, BasicBlock *> &OutputStoreBB : OutputStoreBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator OSBBIt =
+ OutputStoreBB.find(OutputBlock.first);
+
+ if (OSBBIt == OutputStoreBB.end())
+ continue;
+
+ BasicBlock *BB = OSBBIt->second;
+ SwitchI->addCase(
+ ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx), BB);
+ Term = BB->getTerminator();
+ Term->setSuccessor(0, ReturnBlock);
+ Idx++;
+ }
}
return;
}
- // If there needs to be stores, move them from the output block to the end
- // block to save on branching instructions.
+ // If there needs to be stores, move them from the output blocks to their
+ // corresponding ending block.
if (OutputStoreBBs.size() == 1) {
LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
<< *OG.OutlinedFunction << "\n");
- BasicBlock *OutputBlock = OutputStoreBBs[0];
- Instruction *Term = OutputBlock->getTerminator();
- Term->eraseFromParent();
- Term = EndBB->getTerminator();
- moveBBContents(*OutputBlock, *EndBB);
- Term->moveBefore(*EndBB, EndBB->end());
- OutputBlock->eraseFromParent();
+ DenseMap<Value *, BasicBlock *> OutputBlocks = OutputStoreBBs[0];
+ for (std::pair<Value *, BasicBlock *> &VBPair : OutputBlocks) {
+ DenseMap<Value *, BasicBlock *>::iterator EndBBIt =
+ EndBBs.find(VBPair.first);
+ assert(EndBBIt != EndBBs.end() && "Could not find end block");
+ BasicBlock *EndBB = EndBBIt->second;
+ BasicBlock *OutputBB = VBPair.second;
+ Instruction *Term = OutputBB->getTerminator();
+ Term->eraseFromParent();
+ Term = EndBB->getTerminator();
+ moveBBContents(*OutputBB, *EndBB);
+ Term->moveBefore(*EndBB, EndBB->end());
+ OutputBB->eraseFromParent();
+ }
}
}
@@ -1217,42 +1466,44 @@ void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
/// set of stores needed for the different functions.
/// \param [in,out] FuncsToRemove - Extracted functions to erase from module
/// once outlining is complete.
-static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup,
- std::vector<BasicBlock *> &OutputStoreBBs,
- std::vector<Function *> &FuncsToRemove) {
+static void fillOverallFunction(
+ Module &M, OutlinableGroup &CurrentGroup,
+ std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
+ std::vector<Function *> &FuncsToRemove) {
OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
// Move first extracted function's instructions into new function.
LLVM_DEBUG(dbgs() << "Move instructions from "
<< *CurrentOS->ExtractedFunction << " to instruction "
<< *CurrentGroup.OutlinedFunction << "\n");
-
- CurrentGroup.EndBB = moveFunctionData(*CurrentOS->ExtractedFunction,
- *CurrentGroup.OutlinedFunction);
+ moveFunctionData(*CurrentOS->ExtractedFunction,
+ *CurrentGroup.OutlinedFunction, CurrentGroup.EndBBs);
// Transfer the attributes from the function to the new function.
- for (Attribute A :
- CurrentOS->ExtractedFunction->getAttributes().getFnAttributes())
+ for (Attribute A : CurrentOS->ExtractedFunction->getAttributes().getFnAttrs())
CurrentGroup.OutlinedFunction->addFnAttr(A);
- // Create an output block for the first extracted function.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), Twine("output_block_") + Twine(static_cast<unsigned>(0)),
- CurrentGroup.OutlinedFunction);
+ // Create a new set of output blocks for the first extracted function.
+ DenseMap<Value *, BasicBlock *> NewBBs;
+ createAndInsertBasicBlocks(CurrentGroup.EndBBs, NewBBs,
+ CurrentGroup.OutlinedFunction, "output_block_0");
CurrentOS->OutputBlockNum = 0;
- replaceArgumentUses(*CurrentOS, NewBB);
+ replaceArgumentUses(*CurrentOS, NewBBs, true);
replaceConstants(*CurrentOS);
- // If the new basic block has no new stores, we can erase it from the module.
- // It it does, we create a branch instruction to the last basic block from the
- // new one.
- if (NewBB->size() == 0) {
- CurrentOS->OutputBlockNum = -1;
- NewBB->eraseFromParent();
- } else {
- BranchInst::Create(CurrentGroup.EndBB, NewBB);
- OutputStoreBBs.push_back(NewBB);
+ // We first identify if any output blocks are empty, if they are we remove
+ // them. We then create a branch instruction to the basic block to the return
+ // block for the function for each non empty output block.
+ if (!analyzeAndPruneOutputBlocks(NewBBs, *CurrentOS)) {
+ OutputStoreBBs.push_back(DenseMap<Value *, BasicBlock *>());
+ for (std::pair<Value *, BasicBlock *> &VToBB : NewBBs) {
+ DenseMap<Value *, BasicBlock *>::iterator VBBIt =
+ CurrentGroup.EndBBs.find(VToBB.first);
+ BasicBlock *EndBB = VBBIt->second;
+ BranchInst::Create(EndBB, VToBB.second);
+ OutputStoreBBs.back().insert(VToBB);
+ }
}
// Replace the call to the extracted function with the outlined function.
@@ -1268,25 +1519,28 @@ void IROutliner::deduplicateExtractedSections(
std::vector<Function *> &FuncsToRemove, unsigned &OutlinedFunctionNum) {
createFunction(M, CurrentGroup, OutlinedFunctionNum);
- std::vector<BasicBlock *> OutputStoreBBs;
+ std::vector<DenseMap<Value *, BasicBlock *>> OutputStoreBBs;
OutlinableRegion *CurrentOS;
fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
+ std::vector<Value *> SortedKeys;
for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
CurrentOS = CurrentGroup.Regions[Idx];
AttributeFuncs::mergeAttributesForOutlining(*CurrentGroup.OutlinedFunction,
*CurrentOS->ExtractedFunction);
- // Create a new BasicBlock to hold the needed store instructions.
- BasicBlock *NewBB = BasicBlock::Create(
- M.getContext(), "output_block_" + std::to_string(Idx),
- CurrentGroup.OutlinedFunction);
- replaceArgumentUses(*CurrentOS, NewBB);
+ // Create a set of BasicBlocks, one for each return block, to hold the
+ // needed store instructions.
+ DenseMap<Value *, BasicBlock *> NewBBs;
+ createAndInsertBasicBlocks(
+ CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
+ "output_block_" + Twine(static_cast<unsigned>(Idx)));
- alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
- CurrentGroup.EndBB, OutputMappings,
+ replaceArgumentUses(*CurrentOS, NewBBs);
+ alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
+ CurrentGroup.EndBBs, OutputMappings,
OutputStoreBBs);
CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
@@ -1294,11 +1548,78 @@ void IROutliner::deduplicateExtractedSections(
}
// Create a switch statement to handle the different output schemes.
- createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
+ createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBBs, OutputStoreBBs);
OutlinedFunctionNum++;
}
+/// Checks that the next instruction in the InstructionDataList matches the
+/// next instruction in the module. If they do not, there could be the
+/// possibility that extra code has been inserted, and we must ignore it.
+///
+/// \param ID - The IRInstructionData to check the next instruction of.
+/// \returns true if the InstructionDataList and actual instruction match.
+static bool nextIRInstructionDataMatchesNextInst(IRInstructionData &ID) {
+ // We check if there is a discrepancy between the InstructionDataList
+ // and the actual next instruction in the module. If there is, it means
+ // that an extra instruction was added, likely by the CodeExtractor.
+
+ // Since we do not have any similarity data about this particular
+ // instruction, we cannot confidently outline it, and must discard this
+ // candidate.
+ IRInstructionDataList::iterator NextIDIt = std::next(ID.getIterator());
+ Instruction *NextIDLInst = NextIDIt->Inst;
+ Instruction *NextModuleInst = nullptr;
+ if (!ID.Inst->isTerminator())
+ NextModuleInst = ID.Inst->getNextNonDebugInstruction();
+ else if (NextIDLInst != nullptr)
+ NextModuleInst =
+ &*NextIDIt->Inst->getParent()->instructionsWithoutDebug().begin();
+
+ if (NextIDLInst && NextIDLInst != NextModuleInst)
+ return false;
+
+ return true;
+}
+
+bool IROutliner::isCompatibleWithAlreadyOutlinedCode(
+ const OutlinableRegion &Region) {
+ IRSimilarityCandidate *IRSC = Region.Candidate;
+ unsigned StartIdx = IRSC->getStartIdx();
+ unsigned EndIdx = IRSC->getEndIdx();
+
+ // A check to make sure that we are not about to attempt to outline something
+ // that has already been outlined.
+ for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
+ if (Outlined.contains(Idx))
+ return false;
+
+ // We check if the recorded instruction matches the actual next instruction,
+ // if it does not, we fix it in the InstructionDataList.
+ if (!Region.Candidate->backInstruction()->isTerminator()) {
+ Instruction *NewEndInst =
+ Region.Candidate->backInstruction()->getNextNonDebugInstruction();
+ assert(NewEndInst && "Next instruction is a nullptr?");
+ if (Region.Candidate->end()->Inst != NewEndInst) {
+ IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
+ IRInstructionData *NewEndIRID = new (InstDataAllocator.Allocate())
+ IRInstructionData(*NewEndInst,
+ InstructionClassifier.visit(*NewEndInst), *IDL);
+
+ // Insert the first IRInstructionData of the new region after the
+ // last IRInstructionData of the IRSimilarityCandidate.
+ IDL->insert(Region.Candidate->end(), *NewEndIRID);
+ }
+ }
+
+ return none_of(*IRSC, [this](IRInstructionData &ID) {
+ if (!nextIRInstructionDataMatchesNextInst(ID))
+ return true;
+
+ return !this->InstructionClassifier.visit(ID.Inst);
+ });
+}
+
void IROutliner::pruneIncompatibleRegions(
std::vector<IRSimilarityCandidate> &CandidateVec,
OutlinableGroup &CurrentGroup) {
@@ -1310,6 +1631,15 @@ void IROutliner::pruneIncompatibleRegions(
return LHS.getStartIdx() < RHS.getStartIdx();
});
+ IRSimilarityCandidate &FirstCandidate = CandidateVec[0];
+ // Since outlining a call and a branch instruction will be the same as only
+ // outlinining a call instruction, we ignore it as a space saving.
+ if (FirstCandidate.getLength() == 2) {
+ if (isa<CallInst>(FirstCandidate.front()->Inst) &&
+ isa<BranchInst>(FirstCandidate.back()->Inst))
+ return;
+ }
+
unsigned CurrentEndIdx = 0;
for (IRSimilarityCandidate &IRSC : CandidateVec) {
PreviouslyOutlined = false;
@@ -1325,9 +1655,13 @@ void IROutliner::pruneIncompatibleRegions(
if (PreviouslyOutlined)
continue;
- // TODO: If in the future we can outline across BasicBlocks, we will need to
- // check all BasicBlocks contained in the region.
- if (IRSC.getStartBB()->hasAddressTaken())
+ // Check over the instructions, and if the basic block has its address
+ // taken for use somewhere else, we do not outline that block.
+ bool BBHasAddressTaken = any_of(IRSC, [](IRInstructionData &ID){
+ return ID.Inst->getParent()->hasAddressTaken();
+ });
+
+ if (BBHasAddressTaken)
continue;
if (IRSC.front()->Inst->getFunction()->hasLinkOnceODRLinkage() &&
@@ -1340,16 +1674,9 @@ void IROutliner::pruneIncompatibleRegions(
continue;
bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) {
- // We check if there is a discrepancy between the InstructionDataList
- // and the actual next instruction in the module. If there is, it means
- // that an extra instruction was added, likely by the CodeExtractor.
-
- // Since we do not have any similarity data about this particular
- // instruction, we cannot confidently outline it, and must discard this
- // candidate.
- if (std::next(ID.getIterator())->Inst !=
- ID.Inst->getNextNonDebugInstruction())
+ if (!nextIRInstructionDataMatchesNextInst(ID))
return true;
+
return !this->InstructionClassifier.visit(ID.Inst);
});
@@ -1416,10 +1743,33 @@ static InstructionCost findCostForOutputBlocks(Module &M,
OutlinableGroup &CurrentGroup,
TargetTransformInfo &TTI) {
InstructionCost OutputCost = 0;
+ unsigned NumOutputBranches = 0;
+
+ IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
+ DenseSet<BasicBlock *> CandidateBlocks;
+ Candidate.getBasicBlocks(CandidateBlocks);
+
+ // Count the number of different output branches that point to blocks outside
+ // of the region.
+ DenseSet<BasicBlock *> FoundBlocks;
+ for (IRInstructionData &ID : Candidate) {
+ if (!isa<BranchInst>(ID.Inst))
+ continue;
+
+ for (Value *V : ID.OperVals) {
+ BasicBlock *BB = static_cast<BasicBlock *>(V);
+ DenseSet<BasicBlock *>::iterator CBIt = CandidateBlocks.find(BB);
+ if (CBIt != CandidateBlocks.end() || FoundBlocks.contains(BB))
+ continue;
+ FoundBlocks.insert(BB);
+ NumOutputBranches++;
+ }
+ }
+
+ CurrentGroup.BranchesToOutside = NumOutputBranches;
for (const ArrayRef<unsigned> &OutputUse :
CurrentGroup.OutputGVNCombinations) {
- IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
for (unsigned GVN : OutputUse) {
Optional<Value *> OV = Candidate.fromGVN(GVN);
assert(OV.hasValue() && "Could not find value for GVN?");
@@ -1434,14 +1784,14 @@ static InstructionCost findCostForOutputBlocks(Module &M,
LLVM_DEBUG(dbgs() << "Adding: " << StoreCost
<< " instructions to cost for output of type "
<< *V->getType() << "\n");
- OutputCost += StoreCost;
+ OutputCost += StoreCost * NumOutputBranches;
}
InstructionCost BranchCost =
TTI.getCFInstrCost(Instruction::Br, TargetTransformInfo::TCK_CodeSize);
LLVM_DEBUG(dbgs() << "Adding " << BranchCost << " to the current cost for"
<< " a branch instruction\n");
- OutputCost += BranchCost;
+ OutputCost += BranchCost * NumOutputBranches;
}
// If there is more than one output scheme, we must have a comparison and
@@ -1460,7 +1810,7 @@ static InstructionCost findCostForOutputBlocks(Module &M,
LLVM_DEBUG(dbgs() << "Adding: " << TotalCost
<< " instructions for each switch case for each different"
<< " output path in a function\n");
- OutputCost += TotalCost;
+ OutputCost += TotalCost * NumOutputBranches;
}
return OutputCost;
@@ -1548,13 +1898,12 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
bool IROutliner::extractSection(OutlinableRegion &Region) {
SetVector<Value *> ArgInputs, Outputs, SinkCands;
- Region.CE->findInputsOutputs(ArgInputs, Outputs, SinkCands);
-
assert(Region.StartBB && "StartBB for the OutlinableRegion is nullptr!");
- assert(Region.FollowBB && "FollowBB for the OutlinableRegion is nullptr!");
+ BasicBlock *InitialStart = Region.StartBB;
Function *OrigF = Region.StartBB->getParent();
CodeExtractorAnalysisCache CEAC(*OrigF);
- Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
+ Region.ExtractedFunction =
+ Region.CE->extractCodeRegion(CEAC, ArgInputs, Outputs);
// If the extraction was successful, find the BasicBlock, and reassign the
// OutlinableRegion blocks
@@ -1565,7 +1914,23 @@ bool IROutliner::extractSection(OutlinableRegion &Region) {
return false;
}
- BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
+ // Get the block containing the called branch, and reassign the blocks as
+ // necessary. If the original block still exists, it is because we ended on
+ // a branch instruction, and so we move the contents into the block before
+ // and assign the previous block correctly.
+ User *InstAsUser = Region.ExtractedFunction->user_back();
+ BasicBlock *RewrittenBB = cast<Instruction>(InstAsUser)->getParent();
+ Region.PrevBB = RewrittenBB->getSinglePredecessor();
+ assert(Region.PrevBB && "PrevBB is nullptr?");
+ if (Region.PrevBB == InitialStart) {
+ BasicBlock *NewPrev = InitialStart->getSinglePredecessor();
+ Instruction *BI = NewPrev->getTerminator();
+ BI->eraseFromParent();
+ moveBBContents(*InitialStart, *NewPrev);
+ Region.PrevBB = NewPrev;
+ InitialStart->eraseFromParent();
+ }
+
Region.StartBB = RewrittenBB;
Region.EndBB = RewrittenBB;
@@ -1608,6 +1973,7 @@ bool IROutliner::extractSection(OutlinableRegion &Region) {
unsigned IROutliner::doOutline(Module &M) {
// Find the possible similarity sections.
+ InstructionClassifier.EnableBranches = !DisableBranches;
IRSimilarityIdentifier &Identifier = getIRSI(M);
SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
@@ -1622,12 +1988,17 @@ unsigned IROutliner::doOutline(Module &M) {
return LHS[0].getLength() * LHS.size() >
RHS[0].getLength() * RHS.size();
});
+ // Creating OutlinableGroups for each SimilarityCandidate to be used in
+ // each of the following for loops to avoid making an allocator.
+ std::vector<OutlinableGroup> PotentialGroups(SimilarityCandidates.size());
DenseSet<unsigned> NotSame;
- std::vector<Function *> FuncsToRemove;
+ std::vector<OutlinableGroup *> NegativeCostGroups;
+ std::vector<OutlinableRegion *> OutlinedRegions;
// Iterate over the possible sets of similarity.
+ unsigned PotentialGroupIdx = 0;
for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
- OutlinableGroup CurrentGroup;
+ OutlinableGroup &CurrentGroup = PotentialGroups[PotentialGroupIdx++];
// Remove entries that were previously outlined
pruneIncompatibleRegions(CandidateVec, CurrentGroup);
@@ -1649,20 +2020,31 @@ unsigned IROutliner::doOutline(Module &M) {
// Create a CodeExtractor for each outlinable region. Identify inputs and
// outputs for each section using the code extractor and create the argument
// types for the Aggregate Outlining Function.
- std::vector<OutlinableRegion *> OutlinedRegions;
+ OutlinedRegions.clear();
for (OutlinableRegion *OS : CurrentGroup.Regions) {
// Break the outlinable region out of its parent BasicBlock into its own
// BasicBlocks (see function implementation).
OS->splitCandidate();
- std::vector<BasicBlock *> BE = {OS->StartBB};
+
+ // There's a chance that when the region is split, extra instructions are
+ // added to the region. This makes the region no longer viable
+ // to be split, so we ignore it for outlining.
+ if (!OS->CandidateSplit)
+ continue;
+
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ OS->Candidate->getBasicBlocks(BBSet, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
false, "outlined");
findAddInputsOutputs(M, *OS, NotSame);
if (!OS->IgnoreRegion)
OutlinedRegions.push_back(OS);
- else
- OS->reattachCandidate();
+
+ // We recombine the blocks together now that we have gathered all the
+ // needed information.
+ OS->reattachCandidate();
}
CurrentGroup.Regions = std::move(OutlinedRegions);
@@ -1675,12 +2057,11 @@ unsigned IROutliner::doOutline(Module &M) {
if (CostModel)
findCostBenefit(M, CurrentGroup);
- // If we are adhering to the cost model, reattach all the candidates
+ // If we are adhering to the cost model, skip those groups where the cost
+ // outweighs the benefits.
if (CurrentGroup.Cost >= CurrentGroup.Benefit && CostModel) {
- for (OutlinableRegion *OS : CurrentGroup.Regions)
- OS->reattachCandidate();
- OptimizationRemarkEmitter &ORE = getORE(
- *CurrentGroup.Regions[0]->Candidate->getFunction());
+ OptimizationRemarkEmitter &ORE =
+ getORE(*CurrentGroup.Regions[0]->Candidate->getFunction());
ORE.emit([&]() {
IRSimilarityCandidate *C = CurrentGroup.Regions[0]->Candidate;
OptimizationRemarkMissed R(DEBUG_TYPE, "WouldNotDecreaseSize",
@@ -1704,12 +2085,70 @@ unsigned IROutliner::doOutline(Module &M) {
continue;
}
+ NegativeCostGroups.push_back(&CurrentGroup);
+ }
+
+ ExtractorAllocator.DestroyAll();
+
+ if (NegativeCostGroups.size() > 1)
+ stable_sort(NegativeCostGroups,
+ [](const OutlinableGroup *LHS, const OutlinableGroup *RHS) {
+ return LHS->Benefit - LHS->Cost > RHS->Benefit - RHS->Cost;
+ });
+
+ std::vector<Function *> FuncsToRemove;
+ for (OutlinableGroup *CG : NegativeCostGroups) {
+ OutlinableGroup &CurrentGroup = *CG;
+
+ OutlinedRegions.clear();
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ // We check whether our region is compatible with what has already been
+ // outlined, and whether we need to ignore this item.
+ if (!isCompatibleWithAlreadyOutlinedCode(*Region))
+ continue;
+ OutlinedRegions.push_back(Region);
+ }
+
+ if (OutlinedRegions.size() < 2)
+ continue;
+
+ // Reestimate the cost and benefit of the OutlinableGroup. Continue only if
+ // we are still outlining enough regions to make up for the added cost.
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+ if (CostModel) {
+ CurrentGroup.Benefit = 0;
+ CurrentGroup.Cost = 0;
+ findCostBenefit(M, CurrentGroup);
+ if (CurrentGroup.Cost >= CurrentGroup.Benefit)
+ continue;
+ }
+ OutlinedRegions.clear();
+ for (OutlinableRegion *Region : CurrentGroup.Regions) {
+ Region->splitCandidate();
+ if (!Region->CandidateSplit)
+ continue;
+ OutlinedRegions.push_back(Region);
+ }
+
+ CurrentGroup.Regions = std::move(OutlinedRegions);
+ if (CurrentGroup.Regions.size() < 2) {
+ for (OutlinableRegion *R : CurrentGroup.Regions)
+ R->reattachCandidate();
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << "Outlining regions with cost " << CurrentGroup.Cost
<< " and benefit " << CurrentGroup.Benefit << "\n");
// Create functions out of all the sections, and mark them as outlined.
OutlinedRegions.clear();
for (OutlinableRegion *OS : CurrentGroup.Regions) {
+ SmallVector<BasicBlock *> BE;
+ DenseSet<BasicBlock *> BBSet;
+ OS->Candidate->getBasicBlocks(BBSet, BE);
+ OS->CE = new (ExtractorAllocator.Allocate())
+ CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
+ false, "outlined");
bool FunctionOutlined = extractSection(*OS);
if (FunctionOutlined) {
unsigned StartIdx = OS->Candidate->getStartIdx();
@@ -1767,6 +2206,7 @@ bool IROutliner::run(Module &M) {
}
// Pass Manager Boilerplate
+namespace {
class IROutlinerLegacyPass : public ModulePass {
public:
static char ID;
@@ -1782,6 +2222,7 @@ public:
bool runOnModule(Module &M) override;
};
+} // namespace
bool IROutlinerLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 59260af88832..992c2b292e1e 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -31,9 +31,11 @@
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InlineOrder.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/Utils/ImportedFunctionsInliningStatistics.h"
@@ -96,9 +98,53 @@ static cl::opt<std::string> CGSCCInlineReplayFile(
"cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
"Optimization remarks file containing inline remarks to be replayed "
- "by inlining from cgscc inline remarks."),
+ "by cgscc inlining."),
cl::Hidden);
+static cl::opt<ReplayInlinerSettings::Scope> CGSCCInlineReplayScope(
+ "cgscc-inline-replay-scope",
+ cl::init(ReplayInlinerSettings::Scope::Function),
+ cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
+ "Replay on functions that have remarks associated "
+ "with them (default)"),
+ clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
+ "Replay on the entire module")),
+ cl::desc("Whether inline replay should be applied to the entire "
+ "Module or just the Functions (default) that are present as "
+ "callers in remarks during cgscc inlining."),
+ cl::Hidden);
+
+static cl::opt<ReplayInlinerSettings::Fallback> CGSCCInlineReplayFallback(
+ "cgscc-inline-replay-fallback",
+ cl::init(ReplayInlinerSettings::Fallback::Original),
+ cl::values(
+ clEnumValN(
+ ReplayInlinerSettings::Fallback::Original, "Original",
+ "All decisions not in replay send to original advisor (default)"),
+ clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
+ "AlwaysInline", "All decisions not in replay are inlined"),
+ clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
+ "All decisions not in replay are not inlined")),
+ cl::desc(
+ "How cgscc inline replay treats sites that don't come from the replay. "
+ "Original: defers to original advisor, AlwaysInline: inline all sites "
+ "not in replay, NeverInline: inline no sites not in replay"),
+ cl::Hidden);
+
+static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
+ "cgscc-inline-replay-format",
+ cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
+ cl::values(
+ clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
+ clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
+ "<Line Number>:<Column Number>"),
+ clEnumValN(CallSiteFormat::Format::LineDiscriminator,
+ "LineDiscriminator", "<Line Number>.<Discriminator>"),
+ clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
+ "LineColumnDiscriminator",
+ "<Line Number>:<Column Number>.<Discriminator> (default)")),
+ cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
+
static cl::opt<bool> InlineEnablePriorityOrder(
"inline-enable-priority-order", cl::Hidden, cl::init(false),
cl::desc("Enable the priority inline order for the inliner"));
@@ -463,7 +509,7 @@ inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
}
++NumInlined;
- emitInlinedInto(ORE, DLoc, Block, *Callee, *Caller, *OIC);
+ emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
@@ -661,9 +707,12 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
std::make_unique<DefaultInlineAdvisor>(M, FAM, getInlineParams());
if (!CGSCCInlineReplayFile.empty())
- OwnedAdvisor = std::make_unique<ReplayInlineAdvisor>(
+ OwnedAdvisor = getReplayInlineAdvisor(
M, FAM, M.getContext(), std::move(OwnedAdvisor),
- CGSCCInlineReplayFile,
+ ReplayInlinerSettings{CGSCCInlineReplayFile,
+ CGSCCInlineReplayScope,
+ CGSCCInlineReplayFallback,
+ {CGSCCInlineReplayFormat}},
/*EmitRemarks=*/true);
return *OwnedAdvisor;
@@ -674,153 +723,6 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
return *IAA->getAdvisor();
}
-template <typename T> class InlineOrder {
-public:
- using reference = T &;
- using const_reference = const T &;
-
- virtual ~InlineOrder() {}
-
- virtual size_t size() = 0;
-
- virtual void push(const T &Elt) = 0;
-
- virtual T pop() = 0;
-
- virtual const_reference front() = 0;
-
- virtual void erase_if(function_ref<bool(T)> Pred) = 0;
-
- bool empty() { return !size(); }
-};
-
-template <typename T, typename Container = SmallVector<T, 16>>
-class DefaultInlineOrder : public InlineOrder<T> {
- using reference = T &;
- using const_reference = const T &;
-
-public:
- size_t size() override { return Calls.size() - FirstIndex; }
-
- void push(const T &Elt) override { Calls.push_back(Elt); }
-
- T pop() override {
- assert(size() > 0);
- return Calls[FirstIndex++];
- }
-
- const_reference front() override {
- assert(size() > 0);
- return Calls[FirstIndex];
- }
-
- void erase_if(function_ref<bool(T)> Pred) override {
- Calls.erase(std::remove_if(Calls.begin() + FirstIndex, Calls.end(), Pred),
- Calls.end());
- }
-
-private:
- Container Calls;
- size_t FirstIndex = 0;
-};
-
-class Priority {
-public:
- Priority(int Size) : Size(Size) {}
-
- static bool isMoreDesirable(const Priority &S1, const Priority &S2) {
- return S1.Size < S2.Size;
- }
-
- static Priority evaluate(CallBase *CB) {
- Function *Callee = CB->getCalledFunction();
- return Priority(Callee->getInstructionCount());
- }
-
- int Size;
-};
-
-template <typename PriorityT>
-class PriorityInlineOrder : public InlineOrder<std::pair<CallBase *, int>> {
- using T = std::pair<CallBase *, int>;
- using HeapT = std::pair<CallBase *, PriorityT>;
- using reference = T &;
- using const_reference = const T &;
-
- static bool cmp(const HeapT &P1, const HeapT &P2) {
- return PriorityT::isMoreDesirable(P2.second, P1.second);
- }
-
- // A call site could become less desirable for inlining because of the size
- // growth from prior inlining into the callee. This method is used to lazily
- // update the desirability of a call site if it's decreasing. It is only
- // called on pop() or front(), not every time the desirability changes. When
- // the desirability of the front call site decreases, an updated one would be
- // pushed right back into the heap. For simplicity, those cases where
- // the desirability of a call site increases are ignored here.
- void adjust() {
- bool Changed = false;
- do {
- CallBase *CB = Heap.front().first;
- const PriorityT PreviousGoodness = Heap.front().second;
- const PriorityT CurrentGoodness = PriorityT::evaluate(CB);
- Changed = PriorityT::isMoreDesirable(PreviousGoodness, CurrentGoodness);
- if (Changed) {
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
- Heap.pop_back();
- Heap.push_back({CB, CurrentGoodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
- }
- } while (Changed);
- }
-
-public:
- size_t size() override { return Heap.size(); }
-
- void push(const T &Elt) override {
- CallBase *CB = Elt.first;
- const int InlineHistoryID = Elt.second;
- const PriorityT Goodness = PriorityT::evaluate(CB);
-
- Heap.push_back({CB, Goodness});
- std::push_heap(Heap.begin(), Heap.end(), cmp);
- InlineHistoryMap[CB] = InlineHistoryID;
- }
-
- T pop() override {
- assert(size() > 0);
- adjust();
-
- CallBase *CB = Heap.front().first;
- T Result = std::make_pair(CB, InlineHistoryMap[CB]);
- InlineHistoryMap.erase(CB);
- std::pop_heap(Heap.begin(), Heap.end(), cmp);
- Heap.pop_back();
- return Result;
- }
-
- const_reference front() override {
- assert(size() > 0);
- adjust();
-
- CallBase *CB = Heap.front().first;
- return *InlineHistoryMap.find(CB);
- }
-
- void erase_if(function_ref<bool(T)> Pred) override {
- auto PredWrapper = [=](HeapT P) -> bool {
- return Pred(std::make_pair(P.first, 0));
- };
- Heap.erase(std::remove_if(Heap.begin(), Heap.end(), PredWrapper),
- Heap.end());
- std::make_heap(Heap.begin(), Heap.end(), cmp);
- }
-
-private:
- SmallVector<HeapT, 16> Heap;
- DenseMap<CallBase *, int> InlineHistoryMap;
-};
-
PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
CGSCCAnalysisManager &AM, LazyCallGraph &CG,
CGSCCUpdateResult &UR) {
@@ -868,7 +770,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// incrementally maknig a single function grow in a super linear fashion.
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
if (InlineEnablePriorityOrder)
- Calls = std::make_unique<PriorityInlineOrder<Priority>>();
+ Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
else
Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
assert(Calls != nullptr && "Expected an initialized InlineOrder");
@@ -972,8 +874,13 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
continue;
}
- auto Advice = Advisor.getAdvice(*CB, OnlyMandatory);
+ std::unique_ptr<InlineAdvice> Advice =
+ Advisor.getAdvice(*CB, OnlyMandatory);
+
// Check whether we want to inline this callsite.
+ if (!Advice)
+ continue;
+
if (!Advice->isInliningRecommended()) {
Advice->recordUnattemptedInlining();
continue;
@@ -1104,6 +1011,10 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
UR.InlinedInternalEdges.insert({&N, OldC});
}
InlinedCallees.clear();
+
+ // Invalidate analyses for this function now so that we don't have to
+ // invalidate analyses for all functions in this SCC later.
+ FAM.invalidate(F, PreservedAnalyses::none());
}
// Now that we've finished inlining all of the calls across this SCC, delete
@@ -1147,10 +1058,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (!Changed)
return PreservedAnalyses::all();
+ PreservedAnalyses PA;
// Even if we change the IR, we update the core CGSCC data structures and so
// can preserve the proxy to the function analysis manager.
- PreservedAnalyses PA;
PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+ // We have already invalidated all analyses on modified functions.
+ PA.preserveSet<AllAnalysesOn<Function>>();
return PA;
}
@@ -1173,7 +1086,11 @@ ModuleInlinerWrapperPass::ModuleInlinerWrapperPass(InlineParams Params,
PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
ModuleAnalysisManager &MAM) {
auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
- if (!IAA.tryCreate(Params, Mode, CGSCCInlineReplayFile)) {
+ if (!IAA.tryCreate(Params, Mode,
+ {CGSCCInlineReplayFile,
+ CGSCCInlineReplayScope,
+ CGSCCInlineReplayFallback,
+ {CGSCCInlineReplayFormat}})) {
M.getContext().emitError(
"Could not setup Inlining Advisor for the requested "
"mode and/or options");
@@ -1192,10 +1109,39 @@ PreservedAnalyses ModuleInlinerWrapperPass::run(Module &M,
else
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
createDevirtSCCRepeatedPass(std::move(PM), MaxDevirtIterations)));
+
+ MPM.addPass(std::move(AfterCGMPM));
MPM.run(M, MAM);
- IAA.clear();
+ // Discard the InlineAdvisor, a subsequent inlining session should construct
+ // its own.
+ auto PA = PreservedAnalyses::all();
+ PA.abandon<InlineAdvisorAnalysis>();
+ return PA;
+}
- // The ModulePassManager has already taken care of invalidating analyses.
- return PreservedAnalyses::all();
+void InlinerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<InlinerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ if (OnlyMandatory)
+ OS << "<only-mandatory>";
+}
+
+void ModuleInlinerWrapperPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ // Print some info about passes added to the wrapper. This is however
+ // incomplete as InlineAdvisorAnalysis part isn't included (which also depends
+ // on Params and Mode).
+ if (!MPM.isEmpty()) {
+ MPM.printPipeline(OS, MapClassName2PassName);
+ OS << ",";
+ }
+ OS << "cgscc(";
+ if (MaxDevirtIterations != 0)
+ OS << "devirt<" << MaxDevirtIterations << ">(";
+ PM.printPipeline(OS, MapClassName2PassName);
+ if (MaxDevirtIterations != 0)
+ OS << ")";
+ OS << ")";
}
diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp
index db3b4384ce67..692e445cb7cb 100644
--- a/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -201,21 +201,6 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
AlwaysPreserved.insert(V->getName());
}
- // Mark all functions not in the api as internal.
- IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
- for (Function &I : M) {
- if (!maybeInternalize(I, ComdatMap))
- continue;
- Changed = true;
-
- if (ExternalNode)
- // Remove a callgraph edge from the external node to this function.
- ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
-
- ++NumFunctions;
- LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
- }
-
// Never internalize the llvm.used symbol. It is used to implement
// attribute((used)).
// FIXME: Shouldn't this just filter on llvm.metadata section??
@@ -237,6 +222,21 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
else
AlwaysPreserved.insert("__stack_chk_guard");
+ // Mark all functions not in the api as internal.
+ IsWasm = Triple(M.getTargetTriple()).isOSBinFormatWasm();
+ for (Function &I : M) {
+ if (!maybeInternalize(I, ComdatMap))
+ continue;
+ Changed = true;
+
+ if (ExternalNode)
+ // Remove a callgraph edge from the external node to this function.
+ ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
+
+ ++NumFunctions;
+ LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
+ }
+
// Mark all global variables with initializers that are not in the api as
// internal as well.
for (auto &GV : M.globals()) {
diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index a497c0390bce..d9a59dd35fde 100644
--- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -283,3 +283,13 @@ PreservedAnalyses LoopExtractorPass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}
+
+void LoopExtractorPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopExtractorPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (NumLoops == 1)
+ OS << "single";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index b492b200c6d5..f78971f0e586 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -342,7 +342,8 @@ private:
struct ScopedSaveAliaseesAndUsed {
Module &M;
SmallVector<GlobalValue *, 4> Used, CompilerUsed;
- std::vector<std::pair<GlobalIndirectSymbol *, Function *>> FunctionAliases;
+ std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases;
+ std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs;
ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
// The users of this class want to replace all function references except
@@ -362,13 +363,16 @@ struct ScopedSaveAliaseesAndUsed {
if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true))
GV->eraseFromParent();
- for (auto &GIS : concat<GlobalIndirectSymbol>(M.aliases(), M.ifuncs())) {
+ for (auto &GA : M.aliases()) {
// FIXME: This should look past all aliases not just interposable ones,
// see discussion on D65118.
- if (auto *F =
- dyn_cast<Function>(GIS.getIndirectSymbol()->stripPointerCasts()))
- FunctionAliases.push_back({&GIS, F});
+ if (auto *F = dyn_cast<Function>(GA.getAliasee()->stripPointerCasts()))
+ FunctionAliases.push_back({&GA, F});
}
+
+ for (auto &GI : M.ifuncs())
+ if (auto *F = dyn_cast<Function>(GI.getResolver()->stripPointerCasts()))
+ ResolverIFuncs.push_back({&GI, F});
}
~ScopedSaveAliaseesAndUsed() {
@@ -376,8 +380,15 @@ struct ScopedSaveAliaseesAndUsed {
appendToCompilerUsed(M, CompilerUsed);
for (auto P : FunctionAliases)
- P.first->setIndirectSymbol(
+ P.first->setAliasee(
ConstantExpr::getBitCast(P.second, P.first->getType()));
+
+ for (auto P : ResolverIFuncs) {
+ // This does not preserve pointer casts that may have been stripped by the
+ // constructor, but the resolver's type is different from that of the
+ // ifunc anyway.
+ P.first->setResolver(P.second);
+ }
}
};
@@ -1550,17 +1561,28 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
ConstantInt::get(IntPtrTy, I)}),
F->getType());
- if (Functions[I]->isExported()) {
- if (IsJumpTableCanonical) {
- ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
- } else {
- GlobalAlias *JtAlias = GlobalAlias::create(
- F->getValueType(), 0, GlobalValue::ExternalLinkage,
- F->getName() + ".cfi_jt", CombinedGlobalElemPtr, &M);
+
+ const bool IsExported = Functions[I]->isExported();
+ if (!IsJumpTableCanonical) {
+ GlobalValue::LinkageTypes LT = IsExported
+ ? GlobalValue::ExternalLinkage
+ : GlobalValue::InternalLinkage;
+ GlobalAlias *JtAlias = GlobalAlias::create(F->getValueType(), 0, LT,
+ F->getName() + ".cfi_jt",
+ CombinedGlobalElemPtr, &M);
+ if (IsExported)
JtAlias->setVisibility(GlobalValue::HiddenVisibility);
+ else
+ appendToUsed(M, {JtAlias});
+ }
+
+ if (IsExported) {
+ if (IsJumpTableCanonical)
+ ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
+ else
ExportSummary->cfiFunctionDecls().insert(std::string(F->getName()));
- }
}
+
if (!IsJumpTableCanonical) {
if (F->hasExternalWeakLinkage())
replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
@@ -1751,11 +1773,7 @@ static bool isDirectCall(Use& U) {
void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
bool IsJumpTableCanonical) {
SmallSetVector<Constant *, 4> Constants;
- auto UI = Old->use_begin(), E = Old->use_end();
- for (; UI != E;) {
- Use &U = *UI;
- ++UI;
-
+ for (Use &U : llvm::make_early_inc_range(Old->uses())) {
// Skip block addresses
if (isa<BlockAddress>(U.getUser()))
continue;
@@ -1792,12 +1810,11 @@ bool LowerTypeTestsModule::lower() {
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
if (DropTypeTests && TypeTestFunc) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = cast<CallInst>(U.getUser());
// Find and erase llvm.assume intrinsics for this llvm.type.test call.
- for (auto CIU = CI->use_begin(), CIUE = CI->use_end(); CIU != CIUE;)
- if (auto *Assume = dyn_cast<AssumeInst>((*CIU++).getUser()))
+ for (Use &CIU : llvm::make_early_inc_range(CI->uses()))
+ if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser()))
Assume->eraseFromParent();
// If the assume was merged with another assume, we might have a use on a
// phi (which will feed the assume). Simply replace the use on the phi
@@ -1835,13 +1852,9 @@ bool LowerTypeTestsModule::lower() {
return false;
if (ImportSummary) {
- if (TypeTestFunc) {
- for (auto UI = TypeTestFunc->use_begin(), UE = TypeTestFunc->use_end();
- UI != UE;) {
- auto *CI = cast<CallInst>((*UI++).getUser());
- importTypeTest(CI);
- }
- }
+ if (TypeTestFunc)
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses()))
+ importTypeTest(cast<CallInst>(U.getUser()));
if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty())
report_fatal_error(
@@ -2100,11 +2113,11 @@ bool LowerTypeTestsModule::lower() {
auto CI = cast<CallInst>(U.getUser());
std::vector<GlobalTypeMember *> Targets;
- if (CI->getNumArgOperands() % 2 != 1)
+ if (CI->arg_size() % 2 != 1)
report_fatal_error("number of arguments should be odd");
GlobalClassesTy::member_iterator CurSet;
- for (unsigned I = 1; I != CI->getNumArgOperands(); I += 2) {
+ for (unsigned I = 1; I != CI->arg_size(); I += 2) {
int64_t Offset;
auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
CI->getOperand(I), Offset, M.getDataLayout()));
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 9e6dd879ac01..97ef872c5499 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -463,17 +463,15 @@ bool MergeFunctions::runOnModule(Module &M) {
// Replace direct callers of Old with New.
void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
- for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) {
- Use *U = &*UI;
- ++UI;
- CallBase *CB = dyn_cast<CallBase>(U->getUser());
- if (CB && CB->isCallee(U)) {
+ for (Use &U : llvm::make_early_inc_range(Old->uses())) {
+ CallBase *CB = dyn_cast<CallBase>(U.getUser());
+ if (CB && CB->isCallee(&U)) {
// Do not copy attributes from the called function to the call-site.
// Function comparison ensures that the attributes are the same up to
// type congruences in byval(), in which case we need to keep the byval
// type of the call-site, not the callee function.
remove(CB->getFunction());
- U->set(BitcastNew);
+ U.set(BitcastNew);
}
}
}
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
new file mode 100644
index 000000000000..ebf080e87c3b
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -0,0 +1,354 @@
+//===- ModuleInliner.cpp - Code related to module inliner -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls in the module level. It doesn't need any infromation about
+// SCC or call graph, which is different from the SCC inliner. The decisions of
+// which calls are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ModuleInliner.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InlineOrder.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <cassert>
+#include <functional>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "module-inline"
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+
+static cl::opt<bool> InlineEnablePriorityOrder(
+ "module-inline-enable-priority-order", cl::Hidden, cl::init(true),
+ cl::desc("Enable the priority inline order for the module inliner"));
+
+/// Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool inlineHistoryIncludes(
+ Function *F, int InlineHistoryID,
+ const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
+ while (InlineHistoryID != -1) {
+ assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+ "Invalid inline history ID");
+ if (InlineHistory[InlineHistoryID].first == F)
+ return true;
+ InlineHistoryID = InlineHistory[InlineHistoryID].second;
+ }
+ return false;
+}
+
+InlineAdvisor &ModuleInlinerPass::getAdvisor(const ModuleAnalysisManager &MAM,
+ FunctionAnalysisManager &FAM,
+ Module &M) {
+ if (OwnedAdvisor)
+ return *OwnedAdvisor;
+
+ auto *IAA = MAM.getCachedResult<InlineAdvisorAnalysis>(M);
+ if (!IAA) {
+ // It should still be possible to run the inliner as a stand-alone module
+ // pass, for test scenarios. In that case, we default to the
+ // DefaultInlineAdvisor, which doesn't need to keep state between module
+ // pass runs. It also uses just the default InlineParams. In this case, we
+ // need to use the provided FAM, which is valid for the duration of the
+ // inliner pass, and thus the lifetime of the owned advisor. The one we
+ // would get from the MAM can be invalidated as a result of the inliner's
+ // activity.
+ OwnedAdvisor = std::make_unique<DefaultInlineAdvisor>(M, FAM, Params);
+
+ return *OwnedAdvisor;
+ }
+ assert(IAA->getAdvisor() &&
+ "Expected a present InlineAdvisorAnalysis also have an "
+ "InlineAdvisor initialized");
+ return *IAA->getAdvisor();
+}
+
+static bool isKnownLibFunction(Function &F, TargetLibraryInfo &TLI) {
+ LibFunc LF;
+
+ // Either this is a normal library function or a "vectorizable"
+ // function. Not using the VFDatabase here because this query
+ // is related only to libraries handled via the TLI.
+ return TLI.getLibFunc(F, LF) ||
+ TLI.isKnownVectorFunctionInLibrary(F.getName());
+}
+
+PreservedAnalyses ModuleInlinerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ LLVM_DEBUG(dbgs() << "---- Module Inliner is Running ---- \n");
+
+ auto &IAA = MAM.getResult<InlineAdvisorAnalysis>(M);
+ if (!IAA.tryCreate(Params, Mode, {})) {
+ M.getContext().emitError(
+ "Could not setup Inlining Advisor for the requested "
+ "mode and/or options");
+ return PreservedAnalyses::all();
+ }
+
+ bool Changed = false;
+
+ ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ InlineAdvisor &Advisor = getAdvisor(MAM, FAM, M);
+ Advisor.onPassEntry();
+
+ auto AdvisorOnExit = make_scope_exit([&] { Advisor.onPassExit(); });
+
+ // In the module inliner, a priority-based worklist is used for calls across
+ // the entire Module. With this module inliner, the inline order is not
+ // limited to bottom-up order. More globally scope inline order is enabled.
+ // Also, the inline deferral logic become unnecessary in this module inliner.
+ // It is possible to use other priority heuristics, e.g. profile-based
+ // heuristic.
+ //
+ // TODO: Here is a huge amount duplicate code between the module inliner and
+ // the SCC inliner, which need some refactoring.
+ std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> Calls;
+ if (InlineEnablePriorityOrder)
+ Calls = std::make_unique<PriorityInlineOrder<InlineSizePriority>>();
+ else
+ Calls = std::make_unique<DefaultInlineOrder<std::pair<CallBase *, int>>>();
+ assert(Calls != nullptr && "Expected an initialized InlineOrder");
+
+ // Populate the initial list of calls in this module.
+ for (Function &F : M) {
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ // We want to generally process call sites top-down in order for
+ // simplifications stemming from replacing the call with the returned value
+ // after inlining to be visible to subsequent inlining decisions.
+ // FIXME: Using instructions sequence is a really bad way to do this.
+ // Instead we should do an actual RPO walk of the function body.
+ for (Instruction &I : instructions(F))
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *Callee = CB->getCalledFunction()) {
+ if (!Callee->isDeclaration())
+ Calls->push({CB, -1});
+ else if (!isa<IntrinsicInst>(I)) {
+ using namespace ore;
+ setInlineRemark(*CB, "unavailable definition");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
+ << NV("Callee", Callee) << " will not be inlined into "
+ << NV("Caller", CB->getCaller())
+ << " because its definition is unavailable"
+ << setIsVerbose();
+ });
+ }
+ }
+ }
+ if (Calls->empty())
+ return PreservedAnalyses::all();
+
+ // When inlining a callee produces new call sites, we want to keep track of
+ // the fact that they were inlined from the callee. This allows us to avoid
+ // infinite inlining in some obscure cases. To represent this, we use an
+ // index into the InlineHistory vector.
+ SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+ // Track a set vector of inlined callees so that we can augment the caller
+ // with all of their edges in the call graph before pruning out the ones that
+ // got simplified away.
+ SmallSetVector<Function *, 4> InlinedCallees;
+
+ // Track the dead functions to delete once finished with inlining calls. We
+ // defer deleting these to make it easier to handle the call graph updates.
+ SmallVector<Function *, 4> DeadFunctions;
+
+ // Loop forward over all of the calls.
+ while (!Calls->empty()) {
+ // We expect the calls to typically be batched with sequences of calls that
+ // have the same caller, so we first set up some shared infrastructure for
+ // this caller. We also do any pruning we can at this layer on the caller
+ // alone.
+ Function &F = *Calls->front().first->getCaller();
+
+ LLVM_DEBUG(dbgs() << "Inlining calls in: " << F.getName() << "\n"
+ << " Function size: " << F.getInstructionCount()
+ << "\n");
+
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+
+ // Now process as many calls as we have within this caller in the sequence.
+ // We bail out as soon as the caller has to change so we can
+ // prepare the context of that new caller.
+ bool DidInline = false;
+ while (!Calls->empty() && Calls->front().first->getCaller() == &F) {
+ auto P = Calls->pop();
+ CallBase *CB = P.first;
+ const int InlineHistoryID = P.second;
+ Function &Callee = *CB->getCalledFunction();
+
+ if (InlineHistoryID != -1 &&
+ inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
+ setInlineRemark(*CB, "recursive");
+ continue;
+ }
+
+ auto Advice = Advisor.getAdvice(*CB, /*OnlyMandatory*/ false);
+ // Check whether we want to inline this callsite.
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ continue;
+ }
+
+ // Setup the data structure used to plumb customization into the
+ // `InlineFunction` routine.
+ InlineFunctionInfo IFI(
+ /*cg=*/nullptr, GetAssumptionCache, PSI,
+ &FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
+ &FAM.getResult<BlockFrequencyAnalysis>(Callee));
+
+ InlineResult IR =
+ InlineFunction(*CB, IFI, &FAM.getResult<AAManager>(*CB->getCaller()));
+ if (!IR.isSuccess()) {
+ Advice->recordUnsuccessfulInlining(IR);
+ continue;
+ }
+
+ DidInline = true;
+ InlinedCallees.insert(&Callee);
+ ++NumInlined;
+
+ LLVM_DEBUG(dbgs() << " Size after inlining: "
+ << F.getInstructionCount() << "\n");
+
+ // Add any new callsites to defined functions to the worklist.
+ if (!IFI.InlinedCallSites.empty()) {
+ int NewHistoryID = InlineHistory.size();
+ InlineHistory.push_back({&Callee, InlineHistoryID});
+
+ for (CallBase *ICB : reverse(IFI.InlinedCallSites)) {
+ Function *NewCallee = ICB->getCalledFunction();
+ if (!NewCallee) {
+ // Try to promote an indirect (virtual) call without waiting for
+ // the post-inline cleanup and the next DevirtSCCRepeatedPass
+ // iteration because the next iteration may not happen and we may
+ // miss inlining it.
+ if (tryPromoteCall(*ICB))
+ NewCallee = ICB->getCalledFunction();
+ }
+ if (NewCallee)
+ if (!NewCallee->isDeclaration())
+ Calls->push({ICB, NewHistoryID});
+ }
+ }
+
+ // Merge the attributes based on the inlining.
+ AttributeFuncs::mergeAttributesForInlining(F, Callee);
+
+ // For local functions, check whether this makes the callee trivially
+ // dead. In that case, we can drop the body of the function eagerly
+ // which may reduce the number of callers of other functions to one,
+ // changing inline cost thresholds.
+ bool CalleeWasDeleted = false;
+ if (Callee.hasLocalLinkage()) {
+ // To check this we also need to nuke any dead constant uses (perhaps
+ // made dead by this operation on other functions).
+ Callee.removeDeadConstantUsers();
+ // if (Callee.use_empty() && !CG.isLibFunction(Callee)) {
+ if (Callee.use_empty() && !isKnownLibFunction(Callee, GetTLI(Callee))) {
+ Calls->erase_if([&](const std::pair<CallBase *, int> &Call) {
+ return Call.first->getCaller() == &Callee;
+ });
+ // Clear the body and queue the function itself for deletion when we
+ // finish inlining.
+ // Note that after this point, it is an error to do anything other
+ // than use the callee's address or delete it.
+ Callee.dropAllReferences();
+ assert(!is_contained(DeadFunctions, &Callee) &&
+ "Cannot put cause a function to become dead twice!");
+ DeadFunctions.push_back(&Callee);
+ CalleeWasDeleted = true;
+ }
+ }
+ if (CalleeWasDeleted)
+ Advice->recordInliningWithCalleeDeleted();
+ else
+ Advice->recordInlining();
+ }
+
+ if (!DidInline)
+ continue;
+ Changed = true;
+
+ InlinedCallees.clear();
+ }
+
+ // Now that we've finished inlining all of the calls across this module,
+ // delete all of the trivially dead functions.
+ //
+ // Note that this walks a pointer set which has non-deterministic order but
+ // that is OK as all we do is delete things and add pointers to unordered
+ // sets.
+ for (Function *DeadF : DeadFunctions) {
+ // Clear out any cached analyses.
+ FAM.clear(*DeadF, DeadF->getName());
+
+ // And delete the actual function from the module.
+ // The Advisor may use Function pointers to efficiently index various
+ // internal maps, e.g. for memoization. Function cleanup passes like
+ // argument promotion create new functions. It is possible for a new
+ // function to be allocated at the address of a deleted function. We could
+ // index using names, but that's inefficient. Alternatively, we let the
+ // Advisor free the functions when it sees fit.
+ DeadF->getBasicBlockList().clear();
+ M.getFunctionList().remove(DeadF);
+
+ ++NumDeleted;
+ }
+
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b80349352719..f342c35fa283 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -33,6 +34,8 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
@@ -41,6 +44,8 @@
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include <algorithm>
+
using namespace llvm;
using namespace omp;
@@ -72,6 +77,46 @@ static cl::opt<bool> HideMemoryTransferLatency(
" transfers"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> DisableOpenMPOptDeglobalization(
+ "openmp-opt-disable-deglobalization", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving deglobalization."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptSPMDization(
+ "openmp-opt-disable-spmdization", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptFolding(
+ "openmp-opt-disable-folding", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
+ "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
+ cl::desc("Disable OpenMP optimizations that replace the state machine."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> PrintModuleAfterOptimizations(
+ "openmp-opt-print-module", cl::ZeroOrMore,
+ cl::desc("Print the current module after OpenMP optimizations."),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> AlwaysInlineDeviceFunctions(
+ "openmp-opt-inline-device", cl::ZeroOrMore,
+ cl::desc("Inline all applicible functions on the device."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<bool>
+ EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
+ cl::desc("Enables more verbose remarks."), cl::Hidden,
+ cl::init(false));
+
+static cl::opt<unsigned>
+ SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden,
+ cl::desc("Maximal number of attributor iterations."),
+ cl::init(256));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -328,7 +373,7 @@ struct OMPInformationCache : public InformationCache {
if (F->arg_size() != RTFArgTypes.size())
return false;
- auto RTFTyIt = RTFArgTypes.begin();
+ auto *RTFTyIt = RTFArgTypes.begin();
for (Argument &Arg : F->args()) {
if (Arg.getType() != *RTFTyIt)
return false;
@@ -503,7 +548,7 @@ struct KernelInfoState : AbstractState {
/// State to track if we are in SPMD-mode, assumed or know, and why we decided
/// we cannot be. If it is assumed, then RequiresFullRuntime should also be
/// false.
- BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;
+ BooleanStateWithPtrSetVector<Instruction, false> SPMDCompatibilityTracker;
/// The __kmpc_target_init call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
@@ -542,7 +587,9 @@ struct KernelInfoState : AbstractState {
/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
IsAtFixpoint = true;
+ ReachingKernelEntries.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ ReachedKnownParallelRegions.indicatePessimisticFixpoint();
ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
return ChangeStatus::CHANGED;
}
@@ -550,6 +597,10 @@ struct KernelInfoState : AbstractState {
/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
IsAtFixpoint = true;
+ ReachingKernelEntries.indicateOptimisticFixpoint();
+ SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ ReachedKnownParallelRegions.indicateOptimisticFixpoint();
+ ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -569,6 +620,12 @@ struct KernelInfoState : AbstractState {
return true;
}
+ /// Returns true if this kernel contains any OpenMP parallel regions.
+ bool mayContainParallelRegion() {
+ return !ReachedKnownParallelRegions.empty() ||
+ !ReachedUnknownParallelRegions.empty();
+ }
+
/// Return empty set as the best state of potential values.
static KernelInfoState getBestState() { return KernelInfoState(true); }
@@ -584,12 +641,14 @@ struct KernelInfoState : AbstractState {
// Do not merge two different _init and _deinit call sites.
if (KIS.KernelInitCB) {
if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
- indicatePessimisticFixpoint();
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
+ "assumptions.");
KernelInitCB = KIS.KernelInitCB;
}
if (KIS.KernelDeinitCB) {
if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
- indicatePessimisticFixpoint();
+ llvm_unreachable("Kernel that calls another kernel violates OpenMP-Opt "
+ "assumptions.");
KernelDeinitCB = KIS.KernelDeinitCB;
}
SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
@@ -1032,8 +1091,8 @@ private:
Args.clear();
Args.push_back(OutlinedFn->getArg(0));
Args.push_back(OutlinedFn->getArg(1));
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
+ for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
+ ++U)
Args.push_back(CI->getArgOperand(U));
CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
@@ -1041,9 +1100,9 @@ private:
NewCI->setDebugLoc(CI->getDebugLoc());
// Forward parameter attributes from the callback to the callee.
- for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
- U < E; ++U)
- for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
+ for (unsigned U = CallbackFirstArgOperand, E = CI->arg_size(); U < E;
+ ++U)
+ for (const Attribute &A : CI->getAttributes().getParamAttrs(U))
NewCI->addParamAttr(
U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
@@ -1563,13 +1622,13 @@ private:
// TODO: Use dominance to find a good position instead.
auto CanBeMoved = [this](CallBase &CB) {
- unsigned NumArgs = CB.getNumArgOperands();
+ unsigned NumArgs = CB.arg_size();
if (NumArgs == 0)
return true;
if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
return false;
- for (unsigned u = 1; u < NumArgs; ++u)
- if (isa<Instruction>(CB.getArgOperand(u)))
+ for (unsigned U = 1; U < NumArgs; ++U)
+ if (isa<Instruction>(CB.getArgOperand(U)))
return false;
return true;
};
@@ -1612,7 +1671,7 @@ private:
// valid at the new location. For now we just pick a global one, either
// existing and used by one of the calls, or created from scratch.
if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
- if (CI->getNumArgOperands() > 0 &&
+ if (!CI->arg_empty() &&
CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
/* GlobalOnly */ true);
@@ -1695,8 +1754,8 @@ private:
// Transitively search for more arguments by looking at the users of the
// ones we know already. During the search the GTIdArgs vector is extended
// so we cannot cache the size nor can we use a range based for.
- for (unsigned u = 0; u < GTIdArgs.size(); ++u)
- AddUserArgs(*GTIdArgs[u]);
+ for (unsigned U = 0; U < GTIdArgs.size(); ++U)
+ AddUserArgs(*GTIdArgs[U]);
}
/// Kernel (=GPU) optimizations and utility functions
@@ -1822,6 +1881,10 @@ private:
OMPRTL___kmpc_kernel_end_parallel);
ExternalizationRAII BarrierSPMD(OMPInfoCache,
OMPRTL___kmpc_barrier_simple_spmd);
+ ExternalizationRAII BarrierGeneric(OMPInfoCache,
+ OMPRTL___kmpc_barrier_simple_generic);
+ ExternalizationRAII ThreadId(OMPInfoCache,
+ OMPRTL___kmpc_get_hardware_thread_id_in_block);
registerAAs(IsModulePass);
@@ -1918,6 +1981,10 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
if (!KernelParallelRFI)
return Changed;
+ // If we have disabled state machine changes, exit
+ if (DisableOpenMPOptStateMachineRewrite)
+ return Changed;
+
for (Function *F : SCC) {
// Check if the function is a use in a __kmpc_parallel_51 call at
@@ -1996,7 +2063,8 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
UndefValue::get(Int8Ty), F->getName() + ".ID");
for (Use *U : ToBeReplacedStateMachineUses)
- U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
+ U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ ID, U->get()->getType()));
++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
@@ -2508,9 +2576,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
- // Check if the edge into the successor block compares the __kmpc_target_init
- // result with -1. If we are in non-SPMD-mode that signals only the main
- // thread will execute the edge.
+ // Check if the edge into the successor block contains a condition that only
+ // lets the main thread execute it.
auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
if (!Edge || !Edge->isConditional())
return false;
@@ -2525,16 +2592,27 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
if (!C)
return false;
- // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
+ // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
if (C->isAllOnesValue()) {
auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
if (!CB)
return false;
- const int InitIsSPMDArgNo = 1;
- auto *IsSPMDModeCI =
- dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
- return IsSPMDModeCI && IsSPMDModeCI->isZero();
+ const int InitModeArgNo = 1;
+ auto *ModeCI = dyn_cast<ConstantInt>(CB->getOperand(InitModeArgNo));
+ return ModeCI && (ModeCI->getSExtValue() & OMP_TGT_EXEC_MODE_GENERIC);
+ }
+
+ if (C->isZero()) {
+ // Match: 0 == llvm.nvvm.read.ptx.sreg.tid.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
+ return true;
+
+ // Match: 0 == llvm.amdgcn.workitem.id.x()
+ if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+ if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
+ return true;
}
return false;
@@ -2543,15 +2621,14 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// Merge all the predecessor states into the current basic block. A basic
// block is executed by a single thread if all of its predecessors are.
auto MergePredecessorStates = [&](BasicBlock *BB) {
- if (pred_begin(BB) == pred_end(BB))
+ if (pred_empty(BB))
return SingleThreadedBBs.contains(BB);
bool IsInitialThread = true;
- for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
- PredBB != PredEndBB; ++PredBB) {
- if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
+ for (BasicBlock *PredBB : predecessors(BB)) {
+ if (!IsInitialThreadOnly(dyn_cast<BranchInst>(PredBB->getTerminator()),
BB))
- IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
+ IsInitialThread &= SingleThreadedBBs.contains(PredBB);
}
return IsInitialThread;
@@ -2683,9 +2760,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
- LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "
- << CB->getCaller()->getName() << " with "
- << AllocSize->getZExtValue()
+ LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
+ << " with " << AllocSize->getZExtValue()
<< " bytes of shared memory\n");
// Create a new shared memory buffer of the same size as the allocation
@@ -2734,7 +2810,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
const auto &ED = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
if (CallBase *CB = dyn_cast<CallBase>(U))
- if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
+ if (!isa<ConstantInt>(CB->getArgOperand(0)) ||
!ED.isExecutedByInitialThreadOnly(*CB))
MallocCalls.erase(CB);
}
@@ -2769,9 +2845,17 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
: "") +
std::string(" #PRs: ") +
- std::to_string(ReachedKnownParallelRegions.size()) +
+ (ReachedKnownParallelRegions.isValidState()
+ ? std::to_string(ReachedKnownParallelRegions.size())
+ : "<invalid>") +
", #Unknown PRs: " +
- std::to_string(ReachedUnknownParallelRegions.size());
+ (ReachedUnknownParallelRegions.isValidState()
+ ? std::to_string(ReachedUnknownParallelRegions.size())
+ : "<invalid>") +
+ ", #Reaching Kernels: " +
+ (ReachingKernelEntries.isValidState()
+ ? std::to_string(ReachingKernelEntries.size())
+ : "<invalid>");
}
/// Create an abstract attribute biew for the position \p IRP.
@@ -2797,6 +2881,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
: AAKernelInfo(IRP, A) {}
+ SmallPtrSet<Instruction *, 4> GuardedInstructions;
+
+ SmallPtrSetImpl<Instruction *> &getGuardedInstructions() {
+ return GuardedInstructions;
+ }
+
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
// This is a high-level transform that might change the constant arguments
@@ -2843,8 +2933,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
},
Fn);
- assert((KernelInitCB && KernelDeinitCB) &&
- "Kernel without __kmpc_target_init or __kmpc_target_deinit!");
+ // Ignore kernels without initializers such as global constructors.
+ if (!KernelInitCB || !KernelDeinitCB) {
+ indicateOptimisticFixpoint();
+ return;
+ }
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
@@ -2859,7 +2952,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
// state. As long as we are not in an invalid state, we will create a
// custom state machine so the value should be a `i1 false`. If we are
// in an invalid state, we won't change the value that is in the IR.
- if (!isValidState())
+ if (!ReachedKnownParallelRegions.isValidState())
+ return nullptr;
+ // If we have disabled state machine rewrites, don't make a custom one.
+ if (DisableOpenMPOptStateMachineRewrite)
return nullptr;
if (AA)
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
@@ -2869,7 +2965,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
return FalseVal;
};
- Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB =
+ Attributor::SimplifictionCallbackTy ModeSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
// IRP represents the "SPMDCompatibilityTracker" argument of an
@@ -2885,8 +2981,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
} else {
UsedAssumedInformation = false;
}
- auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
- SPMDCompatibilityTracker.isAssumed());
+ auto *Val = ConstantInt::getSigned(
+ IntegerType::getInt8Ty(IRP.getAnchorValue().getContext()),
+ SPMDCompatibilityTracker.isAssumed() ? OMP_TGT_EXEC_MODE_SPMD
+ : OMP_TGT_EXEC_MODE_GENERIC);
return Val;
};
@@ -2911,8 +3009,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
return Val;
};
- constexpr const int InitIsSPMDArgNo = 1;
- constexpr const int DeinitIsSPMDArgNo = 1;
+ constexpr const int InitModeArgNo = 1;
+ constexpr const int DeinitModeArgNo = 1;
constexpr const int InitUseStateMachineArgNo = 2;
constexpr const int InitRequiresFullRuntimeArgNo = 3;
constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
@@ -2920,11 +3018,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
StateMachineSimplifyCB);
A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo),
- IsSPMDModeSimplifyCB);
+ IRPosition::callsite_argument(*KernelInitCB, InitModeArgNo),
+ ModeSimplifyCB);
A.registerSimplificationCallback(
- IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
- IsSPMDModeSimplifyCB);
+ IRPosition::callsite_argument(*KernelDeinitCB, DeinitModeArgNo),
+ ModeSimplifyCB);
A.registerSimplificationCallback(
IRPosition::callsite_argument(*KernelInitCB,
InitRequiresFullRuntimeArgNo),
@@ -2935,10 +3033,25 @@ struct AAKernelInfoFunction : AAKernelInfo {
IsGenericModeSimplifyCB);
// Check if we know we are in SPMD-mode already.
- ConstantInt *IsSPMDArg =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
- if (IsSPMDArg && !IsSPMDArg->isZero())
+ ConstantInt *ModeArg =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
+ if (ModeArg && (ModeArg->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ // This is a generic region but SPMDization is disabled so stop tracking.
+ else if (DisableOpenMPOptSPMDization)
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
+ }
+
+ /// Sanitize the string \p S such that it is a suitable global symbol name.
+ static std::string sanitizeForGlobalName(std::string S) {
+ std::replace_if(
+ S.begin(), S.end(),
+ [](const char C) {
+ return !((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
+ (C >= '0' && C <= '9') || C == '_');
+ },
+ '.');
+ return S;
}
/// Modify the IR based on the KernelInfoState as the fixpoint iteration is
@@ -2949,19 +3062,16 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!KernelInitCB || !KernelDeinitCB)
return ChangeStatus::UNCHANGED;
- // Known SPMD-mode kernels need no manifest changes.
- if (SPMDCompatibilityTracker.isKnown())
- return ChangeStatus::UNCHANGED;
-
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
- if (!changeToSPMDMode(A))
- buildCustomStateMachine(A);
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ if (!changeToSPMDMode(A, Changed))
+ return buildCustomStateMachine(A);
- return ChangeStatus::CHANGED;
+ return Changed;
}
- bool changeToSPMDMode(Attributor &A) {
+ bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
if (!SPMDCompatibilityTracker.isAssumed()) {
@@ -2993,38 +3103,259 @@ struct AAKernelInfoFunction : AAKernelInfo {
return false;
}
- // Adjust the global exec mode flag that tells the runtime what mode this
- // kernel is executed in.
+ // Check if the kernel is already in SPMD mode, if so, return success.
Function *Kernel = getAnchorScope();
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
- assert(ExecMode->getInitializer() &&
- ExecMode->getInitializer()->isOneValue() &&
- "Initially non-SPMD kernel has SPMD exec mode!");
+ assert(ExecMode->getInitializer() && "ExecMode doesn't have initializer!");
// Set the global exec mode flag to indicate SPMD-Generic mode.
- constexpr int SPMDGeneric = 2;
- if (!ExecMode->getInitializer()->isZeroValue())
- ExecMode->setInitializer(
- ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric));
+ assert(isa<ConstantInt>(ExecMode->getInitializer()) &&
+ "ExecMode is not an integer!");
+ const int8_t ExecModeVal =
+ cast<ConstantInt>(ExecMode->getInitializer())->getSExtValue();
+ if (ExecModeVal != OMP_TGT_EXEC_MODE_GENERIC)
+ return true;
+
+ // We will now unconditionally modify the IR, indicate a change.
+ Changed = ChangeStatus::CHANGED;
+
+ auto CreateGuardedRegion = [&](Instruction *RegionStartI,
+ Instruction *RegionEndI) {
+ LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
+ MemorySSAUpdater *MSU = nullptr;
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+
+ BasicBlock *ParentBB = RegionStartI->getParent();
+ Function *Fn = ParentBB->getParent();
+ Module &M = *Fn->getParent();
+
+ // Create all the blocks and logic.
+ // ParentBB:
+ // goto RegionCheckTidBB
+ // RegionCheckTidBB:
+ // Tid = __kmpc_hardware_thread_id()
+ // if (Tid != 0)
+ // goto RegionBarrierBB
+ // RegionStartBB:
+ // <execute instructions guarded>
+ // goto RegionEndBB
+ // RegionEndBB:
+ // <store escaping values to shared mem>
+ // goto RegionBarrierBB
+ // RegionBarrierBB:
+ // __kmpc_simple_barrier_spmd()
+ // // second barrier is omitted if lacking escaping values.
+ // <load escaping values from shared mem>
+ // __kmpc_simple_barrier_spmd()
+ // goto RegionExitBB
+ // RegionExitBB:
+ // <execute rest of instructions>
+
+ BasicBlock *RegionEndBB = SplitBlock(ParentBB, RegionEndI->getNextNode(),
+ DT, LI, MSU, "region.guarded.end");
+ BasicBlock *RegionBarrierBB =
+ SplitBlock(RegionEndBB, &*RegionEndBB->getFirstInsertionPt(), DT, LI,
+ MSU, "region.barrier");
+ BasicBlock *RegionExitBB =
+ SplitBlock(RegionBarrierBB, &*RegionBarrierBB->getFirstInsertionPt(),
+ DT, LI, MSU, "region.exit");
+ BasicBlock *RegionStartBB =
+ SplitBlock(ParentBB, RegionStartI, DT, LI, MSU, "region.guarded");
+
+ assert(ParentBB->getUniqueSuccessor() == RegionStartBB &&
+ "Expected a different CFG");
+
+ BasicBlock *RegionCheckTidBB = SplitBlock(
+ ParentBB, ParentBB->getTerminator(), DT, LI, MSU, "region.check.tid");
+
+ // Register basic blocks with the Attributor.
+ A.registerManifestAddedBasicBlock(*RegionEndBB);
+ A.registerManifestAddedBasicBlock(*RegionBarrierBB);
+ A.registerManifestAddedBasicBlock(*RegionExitBB);
+ A.registerManifestAddedBasicBlock(*RegionStartBB);
+ A.registerManifestAddedBasicBlock(*RegionCheckTidBB);
+
+ bool HasBroadcastValues = false;
+ // Find escaping outputs from the guarded region to outside users and
+ // broadcast their values to them.
+ for (Instruction &I : *RegionStartBB) {
+ SmallPtrSet<Instruction *, 4> OutsideUsers;
+ for (User *Usr : I.users()) {
+ Instruction &UsrI = *cast<Instruction>(Usr);
+ if (UsrI.getParent() != RegionStartBB)
+ OutsideUsers.insert(&UsrI);
+ }
+
+ if (OutsideUsers.empty())
+ continue;
+
+ HasBroadcastValues = true;
+
+ // Emit a global variable in shared memory to store the broadcasted
+ // value.
+ auto *SharedMem = new GlobalVariable(
+ M, I.getType(), /* IsConstant */ false,
+ GlobalValue::InternalLinkage, UndefValue::get(I.getType()),
+ sanitizeForGlobalName(
+ (I.getName() + ".guarded.output.alloc").str()),
+ nullptr, GlobalValue::NotThreadLocal,
+ static_cast<unsigned>(AddressSpace::Shared));
+
+ // Emit a store instruction to update the value.
+ new StoreInst(&I, SharedMem, RegionEndBB->getTerminator());
+
+ LoadInst *LoadI = new LoadInst(I.getType(), SharedMem,
+ I.getName() + ".guarded.output.load",
+ RegionBarrierBB->getTerminator());
+
+ // Emit a load instruction and replace uses of the output value.
+ for (Instruction *UsrI : OutsideUsers)
+ UsrI->replaceUsesOfWith(&I, LoadI);
+ }
+
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+
+ // Go to tid check BB in ParentBB.
+ const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
+ ParentBB->getTerminator()->eraseFromParent();
+ OpenMPIRBuilder::LocationDescription Loc(
+ InsertPointTy(ParentBB, ParentBB->end()), DL);
+ OMPInfoCache.OMPBuilder.updateToLocation(Loc);
+ auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc);
+ Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr);
+ BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
+
+ // Add check for Tid in RegionCheckTidBB
+ RegionCheckTidBB->getTerminator()->eraseFromParent();
+ OpenMPIRBuilder::LocationDescription LocRegionCheckTid(
+ InsertPointTy(RegionCheckTidBB, RegionCheckTidBB->end()), DL);
+ OMPInfoCache.OMPBuilder.updateToLocation(LocRegionCheckTid);
+ FunctionCallee HardwareTidFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ Value *Tid =
+ OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
+ Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
+ OMPInfoCache.OMPBuilder.Builder
+ .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
+ ->setDebugLoc(DL);
+
+ // First barrier for synchronization, ensures main thread has updated
+ // values.
+ FunctionCallee BarrierFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_barrier_simple_spmd);
+ OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
+ RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
+ OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid})
+ ->setDebugLoc(DL);
+
+ // Second barrier ensures workers have read broadcast values.
+ if (HasBroadcastValues)
+ CallInst::Create(BarrierFn, {Ident, Tid}, "",
+ RegionBarrierBB->getTerminator())
+ ->setDebugLoc(DL);
+ };
+
+ auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
+ SmallPtrSet<BasicBlock *, 8> Visited;
+ for (Instruction *GuardedI : SPMDCompatibilityTracker) {
+ BasicBlock *BB = GuardedI->getParent();
+ if (!Visited.insert(BB).second)
+ continue;
+
+ SmallVector<std::pair<Instruction *, Instruction *>> Reorders;
+ Instruction *LastEffect = nullptr;
+ BasicBlock::reverse_iterator IP = BB->rbegin(), IPEnd = BB->rend();
+ while (++IP != IPEnd) {
+ if (!IP->mayHaveSideEffects() && !IP->mayReadFromMemory())
+ continue;
+ Instruction *I = &*IP;
+ if (OpenMPOpt::getCallIfRegularCall(*I, &AllocSharedRFI))
+ continue;
+ if (!I->user_empty() || !SPMDCompatibilityTracker.contains(I)) {
+ LastEffect = nullptr;
+ continue;
+ }
+ if (LastEffect)
+ Reorders.push_back({I, LastEffect});
+ LastEffect = &*IP;
+ }
+ for (auto &Reorder : Reorders)
+ Reorder.first->moveBefore(Reorder.second);
+ }
+
+ SmallVector<std::pair<Instruction *, Instruction *>, 4> GuardedRegions;
+
+ for (Instruction *GuardedI : SPMDCompatibilityTracker) {
+ BasicBlock *BB = GuardedI->getParent();
+ auto *CalleeAA = A.lookupAAFor<AAKernelInfo>(
+ IRPosition::function(*GuardedI->getFunction()), nullptr,
+ DepClassTy::NONE);
+ assert(CalleeAA != nullptr && "Expected Callee AAKernelInfo");
+ auto &CalleeAAFunction = *cast<AAKernelInfoFunction>(CalleeAA);
+ // Continue if instruction is already guarded.
+ if (CalleeAAFunction.getGuardedInstructions().contains(GuardedI))
+ continue;
+
+ Instruction *GuardedRegionStart = nullptr, *GuardedRegionEnd = nullptr;
+ for (Instruction &I : *BB) {
+ // If instruction I needs to be guarded update the guarded region
+ // bounds.
+ if (SPMDCompatibilityTracker.contains(&I)) {
+ CalleeAAFunction.getGuardedInstructions().insert(&I);
+ if (GuardedRegionStart)
+ GuardedRegionEnd = &I;
+ else
+ GuardedRegionStart = GuardedRegionEnd = &I;
+
+ continue;
+ }
+
+ // Instruction I does not need guarding, store
+ // any region found and reset bounds.
+ if (GuardedRegionStart) {
+ GuardedRegions.push_back(
+ std::make_pair(GuardedRegionStart, GuardedRegionEnd));
+ GuardedRegionStart = nullptr;
+ GuardedRegionEnd = nullptr;
+ }
+ }
+ }
+
+ for (auto &GR : GuardedRegions)
+ CreateGuardedRegion(GR.first, GR.second);
+
+ // Adjust the global exec mode flag that tells the runtime what mode this
+ // kernel is executed in.
+ assert(ExecModeVal == OMP_TGT_EXEC_MODE_GENERIC &&
+ "Initially non-SPMD kernel has SPMD exec mode!");
+ ExecMode->setInitializer(
+ ConstantInt::get(ExecMode->getInitializer()->getType(),
+ ExecModeVal | OMP_TGT_EXEC_MODE_GENERIC_SPMD));
// Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
- const int InitIsSPMDArgNo = 1;
- const int DeinitIsSPMDArgNo = 1;
+ const int InitModeArgNo = 1;
+ const int DeinitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
const int InitRequiresFullRuntimeArgNo = 3;
const int DeinitRequiresFullRuntimeArgNo = 2;
auto &Ctx = getAnchorValue().getContext();
- A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
- *ConstantInt::getBool(Ctx, 1));
+ A.changeUseAfterManifest(
+ KernelInitCB->getArgOperandUse(InitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
*ConstantInt::getBool(Ctx, 0));
A.changeUseAfterManifest(
- KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
- *ConstantInt::getBool(Ctx, 1));
+ KernelDeinitCB->getArgOperandUse(DeinitModeArgNo),
+ *ConstantInt::getSigned(IntegerType::getInt8Ty(Ctx),
+ OMP_TGT_EXEC_MODE_SPMD));
A.changeUseAfterManifest(
KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
*ConstantInt::getBool(Ctx, 0));
@@ -3042,10 +3373,15 @@ struct AAKernelInfoFunction : AAKernelInfo {
};
ChangeStatus buildCustomStateMachine(Attributor &A) {
- assert(ReachedKnownParallelRegions.isValidState() &&
- "Custom state machine with invalid parallel region states?");
+ // If we have disabled state machine rewrites, don't make a custom one
+ if (DisableOpenMPOptStateMachineRewrite)
+ return ChangeStatus::UNCHANGED;
- const int InitIsSPMDArgNo = 1;
+ // Don't rewrite the state machine if we are not in a valid state.
+ if (!ReachedKnownParallelRegions.isValidState())
+ return ChangeStatus::UNCHANGED;
+
+ const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
// Check if the current configuration is non-SPMD and generic state machine.
@@ -3054,14 +3390,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// we give up.
ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
- ConstantInt *IsSPMD =
- dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
+ ConstantInt *Mode =
+ dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitModeArgNo));
// If we are stuck with generic mode, try to create a custom device (=GPU)
// state machine which is specialized for the parallel regions that are
// reachable by the kernel.
- if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD ||
- !IsSPMD->isZero())
+ if (!UseStateMachine || UseStateMachine->isZero() || !Mode ||
+ (Mode->getSExtValue() & OMP_TGT_EXEC_MODE_SPMD))
return ChangeStatus::UNCHANGED;
// If not SPMD mode, indicate we use a custom state machine now.
@@ -3074,8 +3410,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// happen if there simply are no parallel regions. In the resulting kernel
// all worker threads will simply exit right away, leaving the main thread
// to do the work alone.
- if (ReachedKnownParallelRegions.empty() &&
- ReachedUnknownParallelRegions.empty()) {
+ if (!mayContainParallelRegion()) {
++NumOpenMPTargetRegionKernelsWithoutStateMachine;
auto Remark = [&](OptimizationRemark OR) {
@@ -3121,9 +3456,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Create all the blocks:
//
// InitCB = __kmpc_target_init(...)
- // bool IsWorker = InitCB >= 0;
+ // BlockHwSize =
+ // __kmpc_get_hardware_num_threads_in_block();
+ // WarpSize = __kmpc_get_warp_size();
+ // BlockSize = BlockHwSize - WarpSize;
+ // if (InitCB >= BlockSize) return;
+ // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
// if (IsWorker) {
- // SMBeginBB: __kmpc_barrier_simple_spmd(...);
+ // SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
// if (!WorkFn) return;
@@ -3137,7 +3477,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// ((WorkFnTy*)WorkFn)(...);
// SMEndParallelBB: __kmpc_kernel_end_parallel(...);
// }
- // SMDoneBB: __kmpc_barrier_simple_spmd(...);
+ // SMDoneBB: __kmpc_barrier_simple_generic(...);
// goto SMBeginBB;
// }
// UserCodeEntryBB: // user code
@@ -3149,6 +3489,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
BasicBlock *InitBB = KernelInitCB->getParent();
BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
KernelInitCB->getNextNode(), "thread.user_code.check");
+ BasicBlock *IsWorkerCheckBB =
+ BasicBlock::Create(Ctx, "is_worker_check", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineBeginBB = BasicBlock::Create(
Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
@@ -3165,6 +3507,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
A.registerManifestAddedBasicBlock(*InitBB);
A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
+ A.registerManifestAddedBasicBlock(*IsWorkerCheckBB);
A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
@@ -3174,22 +3517,47 @@ struct AAKernelInfoFunction : AAKernelInfo {
const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
-
InitBB->getTerminator()->eraseFromParent();
+
+ Module &M = *Kernel->getParent();
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ FunctionCallee BlockHwSizeFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
+ FunctionCallee WarpSizeFn =
+ OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+ M, OMPRTL___kmpc_get_warp_size);
+ Instruction *BlockHwSize =
+ CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+ BlockHwSize->setDebugLoc(DLoc);
+ Instruction *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+ WarpSize->setDebugLoc(DLoc);
+ Instruction *BlockSize =
+ BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+ BlockSize->setDebugLoc(DLoc);
+ Instruction *IsMainOrWorker =
+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
+ BlockSize, "thread.is_main_or_worker", InitBB);
+ IsMainOrWorker->setDebugLoc(DLoc);
+ BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
+ InitBB);
+
Instruction *IsWorker =
ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
ConstantInt::get(KernelInitCB->getType(), -1),
- "thread.is_worker", InitBB);
+ "thread.is_worker", IsWorkerCheckBB);
IsWorker->setDebugLoc(DLoc);
- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
+ BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
+ IsWorkerCheckBB);
// Create local storage for the work function pointer.
+ const DataLayout &DL = M.getDataLayout();
Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
- AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr",
- &Kernel->getEntryBlock().front());
+ Instruction *WorkFnAI =
+ new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
+ "worker.work_fn.addr", &Kernel->getEntryBlock().front());
WorkFnAI->setDebugLoc(DLoc);
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
OMPInfoCache.OMPBuilder.updateToLocation(
OpenMPIRBuilder::LocationDescription(
IRBuilder<>::InsertPoint(StateMachineBeginBB,
@@ -3199,13 +3567,23 @@ struct AAKernelInfoFunction : AAKernelInfo {
Value *Ident = KernelInitCB->getArgOperand(0);
Value *GTid = KernelInitCB;
- Module &M = *Kernel->getParent();
FunctionCallee BarrierFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
- M, OMPRTL___kmpc_barrier_simple_spmd);
+ M, OMPRTL___kmpc_barrier_simple_generic);
CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
->setDebugLoc(DLoc);
+ if (WorkFnAI->getType()->getPointerAddressSpace() !=
+ (unsigned int)AddressSpace::Generic) {
+ WorkFnAI = new AddrSpaceCastInst(
+ WorkFnAI,
+ PointerType::getWithSamePointeeType(
+ cast<PointerType>(WorkFnAI->getType()),
+ (unsigned int)AddressSpace::Generic),
+ WorkFnAI->getName() + ".generic", StateMachineBeginBB);
+ WorkFnAI->setDebugLoc(DLoc);
+ }
+
FunctionCallee KernelParallelFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_kernel_parallel);
@@ -3243,8 +3621,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Now that we have most of the CFG skeleton it is time for the if-cascade
// that checks the function pointer we got from the runtime against the
// parallel regions we expect, if there are any.
- for (int i = 0, e = ReachedKnownParallelRegions.size(); i < e; ++i) {
- auto *ParallelRegion = ReachedKnownParallelRegions[i];
+ for (int I = 0, E = ReachedKnownParallelRegions.size(); I < E; ++I) {
+ auto *ParallelRegion = ReachedKnownParallelRegions[I];
BasicBlock *PRExecuteBB = BasicBlock::Create(
Ctx, "worker_state_machine.parallel_region.execute", Kernel,
StateMachineEndParallelBB);
@@ -3260,7 +3638,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
// Check if we need to compare the pointer at all or if we can just
// call the parallel region function.
Value *IsPR;
- if (i + 1 < e || !ReachedUnknownParallelRegions.empty()) {
+ if (I + 1 < E || !ReachedUnknownParallelRegions.empty()) {
Instruction *CmpI = ICmpInst::Create(
ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
"worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
@@ -3324,8 +3702,21 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (llvm::all_of(Objects,
[](const Value *Obj) { return isa<AllocaInst>(Obj); }))
return true;
+ // Check for AAHeapToStack moved objects which must not be guarded.
+ auto &HS = A.getAAFor<AAHeapToStack>(
+ *this, IRPosition::function(*I.getFunction()),
+ DepClassTy::OPTIONAL);
+ if (llvm::all_of(Objects, [&HS](const Value *Obj) {
+ auto *CB = dyn_cast<CallBase>(Obj);
+ if (!CB)
+ return false;
+ return HS.isAssumedHeapToStack(*CB);
+ })) {
+ return true;
+ }
}
- // For now we give up on everything but stores.
+
+ // Insert instruction that needs guarding.
SPMDCompatibilityTracker.insert(&I);
return true;
};
@@ -3339,9 +3730,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!IsKernelEntry) {
updateReachingKernelEntries(A);
updateParallelLevels(A);
+
+ if (!ParallelLevels.isValidState())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
}
// Callback to check a call instruction.
+ bool AllParallelRegionStatesWereFixed = true;
bool AllSPMDStatesWereFixed = true;
auto CheckCallInst = [&](Instruction &I) {
auto &CB = cast<CallBase>(I);
@@ -3349,13 +3744,37 @@ struct AAKernelInfoFunction : AAKernelInfo {
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
getState() ^= CBAA.getState();
AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
+ AllParallelRegionStatesWereFixed &=
+ CBAA.ReachedKnownParallelRegions.isAtFixpoint();
+ AllParallelRegionStatesWereFixed &=
+ CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
return true;
};
bool UsedAssumedInformationInCheckCallInst = false;
if (!A.checkForAllCallLikeInstructions(
- CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
+ CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) {
+ LLVM_DEBUG(dbgs() << TAG
+ << "Failed to visit all call-like instructions!\n";);
return indicatePessimisticFixpoint();
+ }
+
+ // If we haven't used any assumed information for the reached parallel
+ // region states we can fix it.
+ if (!UsedAssumedInformationInCheckCallInst &&
+ AllParallelRegionStatesWereFixed) {
+ ReachedKnownParallelRegions.indicateOptimisticFixpoint();
+ ReachedUnknownParallelRegions.indicateOptimisticFixpoint();
+ }
+
+ // If we are sure there are no parallel regions in the kernel we do not
+ // want SPMD mode.
+ if (IsKernelEntry && ReachedUnknownParallelRegions.isAtFixpoint() &&
+ ReachedKnownParallelRegions.isAtFixpoint() &&
+ ReachedUnknownParallelRegions.isValidState() &&
+ ReachedKnownParallelRegions.isValidState() &&
+ !mayContainParallelRegion())
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
// If we haven't used any assumed information for the SPMD state we can fix
// it.
@@ -3454,14 +3873,14 @@ struct AAKernelInfoCallSite : AAKernelInfo {
CallBase &CB = cast<CallBase>(getAssociatedValue());
Function *Callee = getAssociatedFunction();
- // Helper to lookup an assumption string.
- auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) {
- return Fn && hasAssumption(*Fn, AssumptionStr);
- };
+ auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
// Check for SPMD-mode assumptions.
- if (HasAssumption(Callee, "ompx_spmd_amenable"))
+ if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
+ indicateOptimisticFixpoint();
+ }
// First weed out calls we do not care about, that is readonly/readnone
// calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
@@ -3483,14 +3902,16 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// Unknown callees might contain parallel regions, except if they have
// an appropriate assumption attached.
- if (!(HasAssumption(Callee, "omp_no_openmp") ||
- HasAssumption(Callee, "omp_no_parallelism")))
+ if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
+ AssumptionAA.hasAssumption("omp_no_parallelism")))
ReachedUnknownParallelRegions.insert(&CB);
// If SPMDCompatibilityTracker is not fixed, we need to give up on the
// idea we can run something unknown in SPMD-mode.
- if (!SPMDCompatibilityTracker.isAtFixpoint())
+ if (!SPMDCompatibilityTracker.isAtFixpoint()) {
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
+ }
// We have updated the state for this unknown call properly, there won't
// be any change so we indicate a fixpoint.
@@ -3506,6 +3927,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
switch (RF) {
// All the functions we know are compatible with SPMD mode.
case OMPRTL___kmpc_is_spmd_exec_mode:
+ case OMPRTL___kmpc_distribute_static_fini:
case OMPRTL___kmpc_for_static_fini:
case OMPRTL___kmpc_global_thread_num:
case OMPRTL___kmpc_get_hardware_num_threads_in_block:
@@ -3516,6 +3938,10 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPRTL___kmpc_end_master:
case OMPRTL___kmpc_barrier:
break;
+ case OMPRTL___kmpc_distribute_static_init_4:
+ case OMPRTL___kmpc_distribute_static_init_4u:
+ case OMPRTL___kmpc_distribute_static_init_8:
+ case OMPRTL___kmpc_distribute_static_init_8u:
case OMPRTL___kmpc_for_static_init_4:
case OMPRTL___kmpc_for_static_init_4u:
case OMPRTL___kmpc_for_static_init_8:
@@ -3533,6 +3959,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
case OMPScheduleType::DistributeChunked:
break;
default:
+ SPMDCompatibilityTracker.indicatePessimisticFixpoint();
SPMDCompatibilityTracker.insert(&CB);
break;
};
@@ -3565,7 +3992,7 @@ struct AAKernelInfoCallSite : AAKernelInfo {
return;
default:
// Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
- // generally.
+ // generally. However, they do not hide parallel regions.
SPMDCompatibilityTracker.insert(&CB);
break;
}
@@ -3685,6 +4112,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
}
void initialize(Attributor &A) override {
+ if (DisableOpenMPOptFolding)
+ indicatePessimisticFixpoint();
+
Function *Callee = getAssociatedFunction();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@@ -3741,11 +4171,24 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
- Instruction &CB = *getCtxI();
- A.changeValueAfterManifest(CB, **SimplifiedValue);
- A.deleteAfterManifest(CB);
+ Instruction &I = *getCtxI();
+ A.changeValueAfterManifest(I, **SimplifiedValue);
+ A.deleteAfterManifest(I);
+
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ auto Remark = [&](OptimizationRemark OR) {
+ if (auto *C = dyn_cast<ConstantInt>(*SimplifiedValue))
+ return OR << "Replacing OpenMP runtime call "
+ << CB->getCalledFunction()->getName() << " with "
+ << ore::NV("FoldedValue", C->getZExtValue()) << ".";
+ return OR << "Replacing OpenMP runtime call "
+ << CB->getCalledFunction()->getName() << ".";
+ };
- LLVM_DEBUG(dbgs() << TAG << "Folding runtime call: " << CB << " with "
+ if (CB && EnableVerboseRemarks)
+ A.emitRemark<OptimizationRemark>(CB, "OMP180", Remark);
+
+ LLVM_DEBUG(dbgs() << TAG << "Replacing runtime call: " << I << " with "
<< **SimplifiedValue << "\n");
Changed = ChangeStatus::CHANGED;
@@ -3979,7 +4422,6 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
-
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
@@ -4012,7 +4454,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
return false;
};
- GlobalizationRFI.foreachUse(SCC, CreateAA);
+ if (!DisableOpenMPOptDeglobalization)
+ GlobalizationRFI.foreachUse(SCC, CreateAA);
// Create an ExecutionDomain AA for every function and a HeapToStack AA for
// every function if there is a device kernel.
@@ -4024,7 +4467,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
continue;
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
- A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
+ if (!DisableOpenMPOptDeglobalization)
+ A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
for (auto &I : instructions(*F)) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
@@ -4176,28 +4620,32 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
ORE.emit([&]() {
OptimizationRemarkAnalysis ORA(DEBUG_TYPE, "OMP140", &F);
return ORA << "Could not internalize function. "
- << "Some optimizations may not be possible.";
+ << "Some optimizations may not be possible. [OMP140]";
});
};
// Create internal copies of each function if this is a kernel Module. This
// allows iterprocedural passes to see every call edge.
- DenseSet<const Function *> InternalizedFuncs;
- if (isOpenMPDevice(M))
+ DenseMap<Function *, Function *> InternalizedMap;
+ if (isOpenMPDevice(M)) {
+ SmallPtrSet<Function *, 16> InternalizeFns;
for (Function &F : M)
if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
!DisableInternalization) {
- if (Attributor::internalizeFunction(F, /* Force */ true)) {
- InternalizedFuncs.insert(&F);
+ if (Attributor::isInternalizable(F)) {
+ InternalizeFns.insert(&F);
} else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
EmitRemark(F);
}
}
+ Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
+ }
+
// Look at every function in the Module unless it was internalized.
SmallVector<Function *, 16> SCC;
for (Function &F : M)
- if (!F.isDeclaration() && !InternalizedFuncs.contains(&F))
+ if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
SCC.push_back(&F);
if (SCC.empty())
@@ -4215,12 +4663,24 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
+
+ // Optionally inline device functions for potentially better performance.
+ if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
+ for (Function &F : M)
+ if (!F.isDeclaration() && !Kernels.contains(&F) &&
+ !F.hasFnAttribute(Attribute::NoInline))
+ F.addFnAttr(Attribute::AlwaysInline);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt Module Pass:\n" << M);
+
if (Changed)
return PreservedAnalyses::none();
@@ -4267,12 +4727,17 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
/*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
+
if (Changed)
return PreservedAnalyses::none();
@@ -4333,12 +4798,18 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
Allocator,
/*CGSCC*/ Functions, Kernels);
- unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
+ unsigned MaxFixpointIterations =
+ (isOpenMPDevice(M)) ? SetFixpointIterations : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
MaxFixpointIterations, OREGetter, DEBUG_TYPE);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
- return OMPOpt.run(false);
+ bool Result = OMPOpt.run(false);
+
+ if (PrintModuleAfterOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module after OpenMPOpt CGSCC Pass:\n" << M);
+
+ return Result;
}
bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index d517de38ace3..7402e399a88a 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -441,9 +441,7 @@ PartialInlinerImpl::computeOutliningColdRegionsInfo(
};
auto BBProfileCount = [BFI](BasicBlock *BB) {
- return BFI->getBlockProfileCount(BB)
- ? BFI->getBlockProfileCount(BB).getValue()
- : 0;
+ return BFI->getBlockProfileCount(BB).getValueOr(0);
};
// Use the same computeBBInlineCost function to compute the cost savings of
@@ -1413,7 +1411,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
uint64_t CalleeEntryCountV =
- (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
+ (CalleeEntryCount ? CalleeEntryCount->getCount() : 0);
bool AnyInline = false;
for (User *User : Users) {
@@ -1461,8 +1459,8 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
if (AnyInline) {
Cloner.IsFunctionInlined = true;
if (CalleeEntryCount)
- Cloner.OrigFunc->setEntryCount(
- CalleeEntryCount.setCount(CalleeEntryCountV));
+ Cloner.OrigFunc->setEntryCount(Function::ProfileCount(
+ CalleeEntryCountV, CalleeEntryCount->getType()));
OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
OrigFuncORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc)
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index aa916345954d..74f68531b89a 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -437,6 +437,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
+ // The matrix extension can introduce large vector operations early, which can
+ // benefit from running vector-combine early on.
+ if (EnableMatrix)
+ MPM.add(createVectorCombinePass());
+
// Begin the loop pass pipeline.
if (EnableSimpleLoopUnswitch) {
// The simple loop unswitch pass relies on separate cleanup passes. Schedule
@@ -1012,7 +1017,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
createPGOIndirectCallPromotionLegacyPass(true, !PGOSampleUse.empty()));
// Propage constant function arguments by specializing the functions.
- if (EnableFunctionSpecialization)
+ if (EnableFunctionSpecialization && OptLevel > 2)
PM.add(createFunctionSpecializationPass());
// Propagate constants at call sites into the functions they call. This
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index 081398a390fa..5779553ee732 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -135,6 +135,7 @@ PreservedAnalyses FunctionSpecializationPass::run(Module &M,
return PA;
}
+namespace {
struct FunctionSpecializationLegacyPass : public ModulePass {
static char ID; // Pass identification, replacement for typeid
FunctionSpecializationLegacyPass() : ModulePass(ID) {}
@@ -175,6 +176,7 @@ struct FunctionSpecializationLegacyPass : public ModulePass {
return runFunctionSpecialization(M, DL, GetTLI, GetTTI, GetAC, GetAnalysis);
}
};
+} // namespace
char FunctionSpecializationLegacyPass::ID = 0;
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 55b88ac14da5..bae9a1e27e75 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -32,7 +32,7 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
if (CalleeName.empty())
return getHottestChildContext(CallSite);
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end())
return &It->second;
@@ -64,8 +64,8 @@ ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
ContextTrieNode &ContextTrieNode::moveToChildContext(
const LineLocation &CallSite, ContextTrieNode &&NodeToMove,
- StringRef ContextStrToRemove, bool DeleteNode) {
- uint32_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
+ uint32_t ContextFramesToRemove, bool DeleteNode) {
+ uint64_t Hash = nodeHash(NodeToMove.getFuncName(), CallSite);
assert(!AllChildContext.count(Hash) && "Node to remove must exist");
LineLocation OldCallSite = NodeToMove.CallSiteLoc;
ContextTrieNode &OldParentContext = *NodeToMove.getParentContext();
@@ -86,10 +86,10 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
FunctionSamples *FSamples = Node->getFunctionSamples();
if (FSamples) {
- FSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FSamples->getContext().promoteOnPath(ContextFramesToRemove);
FSamples->getContext().setState(SyntheticContext);
- LLVM_DEBUG(dbgs() << " Context promoted to: " << FSamples->getContext()
- << "\n");
+ LLVM_DEBUG(dbgs() << " Context promoted to: "
+ << FSamples->getContext().toString() << "\n");
}
for (auto &It : Node->getAllChildContext()) {
@@ -108,12 +108,12 @@ ContextTrieNode &ContextTrieNode::moveToChildContext(
void ContextTrieNode::removeChildContext(const LineLocation &CallSite,
StringRef CalleeName) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
// Note this essentially calls dtor and destroys that child context
AllChildContext.erase(Hash);
}
-std::map<uint32_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
+std::map<uint64_t, ContextTrieNode> &ContextTrieNode::getAllChildContext() {
return AllChildContext;
}
@@ -127,6 +127,15 @@ void ContextTrieNode::setFunctionSamples(FunctionSamples *FSamples) {
FuncSamples = FSamples;
}
+Optional<uint32_t> ContextTrieNode::getFunctionSize() const { return FuncSize; }
+
+void ContextTrieNode::addFunctionSize(uint32_t FSize) {
+ if (!FuncSize.hasValue())
+ FuncSize = 0;
+
+ FuncSize = FuncSize.getValue() + FSize;
+}
+
LineLocation ContextTrieNode::getCallSiteLoc() const { return CallSiteLoc; }
ContextTrieNode *ContextTrieNode::getParentContext() const {
@@ -137,9 +146,10 @@ void ContextTrieNode::setParentContext(ContextTrieNode *Parent) {
ParentContext = Parent;
}
-void ContextTrieNode::dump() {
+void ContextTrieNode::dumpNode() {
dbgs() << "Node: " << FuncName << "\n"
<< " Callsite: " << CallSiteLoc << "\n"
+ << " Size: " << FuncSize << "\n"
<< " Children:\n";
for (auto &It : AllChildContext) {
@@ -147,20 +157,38 @@ void ContextTrieNode::dump() {
}
}
-uint32_t ContextTrieNode::nodeHash(StringRef ChildName,
+void ContextTrieNode::dumpTree() {
+ dbgs() << "Context Profile Tree:\n";
+ std::queue<ContextTrieNode *> NodeQueue;
+ NodeQueue.push(this);
+
+ while (!NodeQueue.empty()) {
+ ContextTrieNode *Node = NodeQueue.front();
+ NodeQueue.pop();
+ Node->dumpNode();
+
+ for (auto &It : Node->getAllChildContext()) {
+ ContextTrieNode *ChildNode = &It.second;
+ NodeQueue.push(ChildNode);
+ }
+ }
+}
+
+uint64_t ContextTrieNode::nodeHash(StringRef ChildName,
const LineLocation &Callsite) {
// We still use child's name for child hash, this is
// because for children of root node, we don't have
// different line/discriminator, and we'll rely on name
// to differentiate children.
- uint32_t NameHash = std::hash<std::string>{}(ChildName.str());
- uint32_t LocId = (Callsite.LineOffset << 16) | Callsite.Discriminator;
+ uint64_t NameHash = std::hash<std::string>{}(ChildName.str());
+ uint64_t LocId =
+ (((uint64_t)Callsite.LineOffset) << 32) | Callsite.Discriminator;
return NameHash + (LocId << 5) + LocId;
}
ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
const LineLocation &CallSite, StringRef CalleeName, bool AllowCreate) {
- uint32_t Hash = nodeHash(CalleeName, CallSite);
+ uint64_t Hash = nodeHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
if (It != AllChildContext.end()) {
assert(It->second.getFuncName() == CalleeName &&
@@ -177,13 +205,16 @@ ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
// Profiler tracker than manages profiles and its associated context
SampleContextTracker::SampleContextTracker(
- StringMap<FunctionSamples> &Profiles) {
+ SampleProfileMap &Profiles,
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap)
+ : GUIDToFuncNameMap(GUIDToFuncNameMap) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
- SampleContext Context(FuncSample.first(), RawContext);
- LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
+ SampleContext Context = FuncSample.first;
+ LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
+ << "\n");
if (!Context.isBaseContext())
- FuncToCtxtProfiles[Context.getNameWithoutContext()].push_back(FSamples);
+ FuncToCtxtProfiles[Context.getName()].insert(FSamples);
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
assert(!NewNode->getFunctionSamples() &&
"New node can't have sample profile");
@@ -200,6 +231,10 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
return nullptr;
CalleeName = FunctionSamples::getCanonicalFnName(CalleeName);
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ CalleeName = getRepInFormat(CalleeName, FunctionSamples::UseMD5, FGUID);
// For indirect call, CalleeName will be empty, in which case the context
// profile for callee with largest total samples will be returned.
@@ -207,7 +242,8 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
if (CalleeContext) {
FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
LLVM_DEBUG(if (FSamples) {
- dbgs() << " Callee context found: " << FSamples->getContext() << "\n";
+ dbgs() << " Callee context found: " << FSamples->getContext().toString()
+ << "\n";
});
return FSamples;
}
@@ -285,6 +321,11 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
bool MergeContext) {
LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ Name = getRepInFormat(Name, FunctionSamples::UseMD5, FGUID);
+
// Base profile is top-level node (child of root node), so try to retrieve
// existing top-level node for given function first. If it exists, it could be
// that we've merged base profile before, or there's actually context-less
@@ -299,14 +340,14 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
// into base profile.
for (auto *CSamples : FuncToCtxtProfiles[Name]) {
SampleContext &Context = CSamples->getContext();
- ContextTrieNode *FromNode = getContextFor(Context);
- if (FromNode == Node)
- continue;
-
// Skip inlined context profile and also don't re-merge any context
if (Context.hasState(InlinedContext) || Context.hasState(MergedContext))
continue;
+ ContextTrieNode *FromNode = getContextFor(Context);
+ if (FromNode == Node)
+ continue;
+
ContextTrieNode &ToNode = promoteMergeContextSamplesTree(*FromNode);
assert((!Node || Node == &ToNode) && "Expect only one base profile");
Node = &ToNode;
@@ -324,7 +365,7 @@ void SampleContextTracker::markContextSamplesInlined(
const FunctionSamples *InlinedSamples) {
assert(InlinedSamples && "Expect non-null inlined samples");
LLVM_DEBUG(dbgs() << "Marking context profile as inlined: "
- << InlinedSamples->getContext() << "\n");
+ << InlinedSamples->getContext().toString() << "\n");
InlinedSamples->getContext().setState(InlinedContext);
}
@@ -376,30 +417,23 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
FunctionSamples *FromSamples = NodeToPromo.getFunctionSamples();
assert(FromSamples && "Shouldn't promote a context without profile");
LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
- << FromSamples->getContext() << "\n");
+ << FromSamples->getContext().toString() << "\n");
assert(!FromSamples->getContext().hasState(InlinedContext) &&
"Shouldn't promote inlined context profile");
- StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
+ uint32_t ContextFramesToRemove =
+ FromSamples->getContext().getContextFrames().size() - 1;
return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
- ContextStrToRemove);
+ ContextFramesToRemove);
}
-void SampleContextTracker::dump() {
- dbgs() << "Context Profile Tree:\n";
- std::queue<ContextTrieNode *> NodeQueue;
- NodeQueue.push(&RootContext);
-
- while (!NodeQueue.empty()) {
- ContextTrieNode *Node = NodeQueue.front();
- NodeQueue.pop();
- Node->dump();
+void SampleContextTracker::dump() { RootContext.dumpTree(); }
- for (auto &It : Node->getAllChildContext()) {
- ContextTrieNode *ChildNode = &It.second;
- NodeQueue.push(ChildNode);
- }
- }
+StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const {
+ if (!FunctionSamples::UseMD5)
+ return Node->getFuncName();
+ assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
+ return GUIDToFuncNameMap->lookup(std::stoull(Node->getFuncName().data()));
}
ContextTrieNode *
@@ -444,11 +478,22 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
RootName = PrevDIL->getScope()->getSubprogram()->getName();
S.push_back(std::make_pair(LineLocation(0, 0), RootName));
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::list<std::string> MD5Names;
+ if (FunctionSamples::UseMD5) {
+ for (auto &Location : S) {
+ MD5Names.emplace_back();
+ getRepInFormat(Location.second, FunctionSamples::UseMD5, MD5Names.back());
+ Location.second = MD5Names.back();
+ }
+ }
+
ContextTrieNode *ContextNode = &RootContext;
int I = S.size();
while (--I >= 0 && ContextNode) {
LineLocation &CallSite = S[I].first;
- StringRef &CalleeName = S[I].second;
+ StringRef CalleeName = S[I].second;
ContextNode = ContextNode->getChildContext(CallSite, CalleeName);
}
@@ -462,27 +507,18 @@ ContextTrieNode *
SampleContextTracker::getOrCreateContextPath(const SampleContext &Context,
bool AllowCreate) {
ContextTrieNode *ContextNode = &RootContext;
- StringRef ContextRemain = Context;
- StringRef ChildContext;
- StringRef CalleeName;
LineLocation CallSiteLoc(0, 0);
- while (ContextNode && !ContextRemain.empty()) {
- auto ContextSplit = SampleContext::splitContextString(ContextRemain);
- ChildContext = ContextSplit.first;
- ContextRemain = ContextSplit.second;
- LineLocation NextCallSiteLoc(0, 0);
- SampleContext::decodeContextString(ChildContext, CalleeName,
- NextCallSiteLoc);
-
+ for (auto &Callsite : Context.getContextFrames()) {
// Create child node at parent line/disc location
if (AllowCreate) {
ContextNode =
- ContextNode->getOrCreateChildContext(CallSiteLoc, CalleeName);
+ ContextNode->getOrCreateChildContext(CallSiteLoc, Callsite.FuncName);
} else {
- ContextNode = ContextNode->getChildContext(CallSiteLoc, CalleeName);
+ ContextNode =
+ ContextNode->getChildContext(CallSiteLoc, Callsite.FuncName);
}
- CallSiteLoc = NextCallSiteLoc;
+ CallSiteLoc = Callsite.Location;
}
assert((!AllowCreate || ContextNode) &&
@@ -502,7 +538,7 @@ ContextTrieNode &SampleContextTracker::addTopLevelContextNode(StringRef FName) {
void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
ContextTrieNode &ToNode,
- StringRef ContextStrToRemove) {
+ uint32_t ContextFramesToRemove) {
FunctionSamples *FromSamples = FromNode.getFunctionSamples();
FunctionSamples *ToSamples = ToNode.getFunctionSamples();
if (FromSamples && ToSamples) {
@@ -510,19 +546,21 @@ void SampleContextTracker::mergeContextNode(ContextTrieNode &FromNode,
ToSamples->merge(*FromSamples);
ToSamples->getContext().setState(SyntheticContext);
FromSamples->getContext().setState(MergedContext);
+ if (FromSamples->getContext().hasAttribute(ContextShouldBeInlined))
+ ToSamples->getContext().setAttribute(ContextShouldBeInlined);
} else if (FromSamples) {
// Transfer FromSamples from FromNode to ToNode
ToNode.setFunctionSamples(FromSamples);
FromSamples->getContext().setState(SyntheticContext);
- FromSamples->getContext().promoteOnPath(ContextStrToRemove);
+ FromSamples->getContext().promoteOnPath(ContextFramesToRemove);
FromNode.setFunctionSamples(nullptr);
}
}
ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
ContextTrieNode &FromNode, ContextTrieNode &ToNodeParent,
- StringRef ContextStrToRemove) {
- assert(!ContextStrToRemove.empty() && "Context to remove can't be empty");
+ uint32_t ContextFramesToRemove) {
+ assert(ContextFramesToRemove && "Context to remove can't be empty");
// Ignore call site location if destination is top level under root
LineLocation NewCallSiteLoc = LineLocation(0, 0);
@@ -540,21 +578,21 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
// Do not delete node to move from its parent here because
// caller is iterating over children of that parent node.
ToNode = &ToNodeParent.moveToChildContext(
- NewCallSiteLoc, std::move(FromNode), ContextStrToRemove, false);
+ NewCallSiteLoc, std::move(FromNode), ContextFramesToRemove, false);
} else {
// Destination node exists, merge samples for the context tree
- mergeContextNode(FromNode, *ToNode, ContextStrToRemove);
+ mergeContextNode(FromNode, *ToNode, ContextFramesToRemove);
LLVM_DEBUG({
if (ToNode->getFunctionSamples())
dbgs() << " Context promoted and merged to: "
- << ToNode->getFunctionSamples()->getContext() << "\n";
+ << ToNode->getFunctionSamples()->getContext().toString() << "\n";
});
// Recursively promote and merge children
for (auto &It : FromNode.getAllChildContext()) {
ContextTrieNode &FromChildNode = It.second;
promoteMergeContextSamplesTree(FromChildNode, *ToNode,
- ContextStrToRemove);
+ ContextFramesToRemove);
}
// Remove children once they're all merged
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 8e9c79fc7bbb..a961c47a7501 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -143,6 +143,12 @@ static cl::opt<bool> ProfileSampleAccurate(
"callsite and function as having 0 samples. Otherwise, treat "
"un-sampled callsites and functions conservatively as unknown. "));
+static cl::opt<bool> ProfileSampleBlockAccurate(
+ "profile-sample-block-accurate", cl::Hidden, cl::init(false),
+ cl::desc("If the sample profile is accurate, we will mark all un-sampled "
+ "branches and calls as having 0 samples. Otherwise, treat "
+ "them conservatively as unknown. "));
+
static cl::opt<bool> ProfileAccurateForSymsInList(
"profile-accurate-for-symsinlist", cl::Hidden, cl::ZeroOrMore,
cl::init(true),
@@ -214,6 +220,16 @@ static cl::opt<bool> CallsitePrioritizedInline(
cl::desc("Use call site prioritized inlining for sample profile loader."
"Currently only CSSPGO is supported."));
+static cl::opt<bool> UsePreInlinerDecision(
+ "sample-profile-use-preinliner", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Use the preinliner decisions stored in profile context."));
+
+static cl::opt<bool> AllowRecursiveInline(
+ "sample-profile-recursive-inline", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Allow sample loader inliner to inline recursive calls."));
+
static cl::opt<std::string> ProfileInlineReplayFile(
"sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
cl::desc(
@@ -221,6 +237,50 @@ static cl::opt<std::string> ProfileInlineReplayFile(
"by inlining from sample profile loader."),
cl::Hidden);
+static cl::opt<ReplayInlinerSettings::Scope> ProfileInlineReplayScope(
+ "sample-profile-inline-replay-scope",
+ cl::init(ReplayInlinerSettings::Scope::Function),
+ cl::values(clEnumValN(ReplayInlinerSettings::Scope::Function, "Function",
+ "Replay on functions that have remarks associated "
+ "with them (default)"),
+ clEnumValN(ReplayInlinerSettings::Scope::Module, "Module",
+ "Replay on the entire module")),
+ cl::desc("Whether inline replay should be applied to the entire "
+ "Module or just the Functions (default) that are present as "
+ "callers in remarks during sample profile inlining."),
+ cl::Hidden);
+
+static cl::opt<ReplayInlinerSettings::Fallback> ProfileInlineReplayFallback(
+ "sample-profile-inline-replay-fallback",
+ cl::init(ReplayInlinerSettings::Fallback::Original),
+ cl::values(
+ clEnumValN(
+ ReplayInlinerSettings::Fallback::Original, "Original",
+ "All decisions not in replay send to original advisor (default)"),
+ clEnumValN(ReplayInlinerSettings::Fallback::AlwaysInline,
+ "AlwaysInline", "All decisions not in replay are inlined"),
+ clEnumValN(ReplayInlinerSettings::Fallback::NeverInline, "NeverInline",
+ "All decisions not in replay are not inlined")),
+ cl::desc("How sample profile inline replay treats sites that don't come "
+ "from the replay. Original: defers to original advisor, "
+ "AlwaysInline: inline all sites not in replay, NeverInline: "
+ "inline no sites not in replay"),
+ cl::Hidden);
+
+static cl::opt<CallSiteFormat::Format> ProfileInlineReplayFormat(
+ "sample-profile-inline-replay-format",
+ cl::init(CallSiteFormat::Format::LineColumnDiscriminator),
+ cl::values(
+ clEnumValN(CallSiteFormat::Format::Line, "Line", "<Line Number>"),
+ clEnumValN(CallSiteFormat::Format::LineColumn, "LineColumn",
+ "<Line Number>:<Column Number>"),
+ clEnumValN(CallSiteFormat::Format::LineDiscriminator,
+ "LineDiscriminator", "<Line Number>.<Discriminator>"),
+ clEnumValN(CallSiteFormat::Format::LineColumnDiscriminator,
+ "LineColumnDiscriminator",
+ "<Line Number>:<Column Number>.<Discriminator> (default)")),
+ cl::desc("How sample profile inline replay file is formatted"), cl::Hidden);
+
static cl::opt<unsigned>
MaxNumPromotions("sample-profile-icp-max-prom", cl::init(3), cl::Hidden,
cl::ZeroOrMore,
@@ -358,10 +418,10 @@ public:
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
- : SampleProfileLoaderBaseImpl(std::string(Name)),
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
- RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
+ LTOPhase(LTOPhase) {}
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -377,7 +437,7 @@ protected:
findFunctionSamples(const Instruction &I) const override;
std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
- void findExternalInlineCandidate(const FunctionSamples *Samples,
+ void findExternalInlineCandidate(CallBase *CB, const FunctionSamples *Samples,
DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap,
uint64_t Threshold);
@@ -385,8 +445,11 @@ protected:
bool tryPromoteAndInlineCandidate(
Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
uint64_t &Sum, SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ Optional<InlineCost> getExternalInlineAdvisorCost(CallBase &CB);
+ bool getExternalInlineAdvisorShouldInline(CallBase &CB);
InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
bool
@@ -417,9 +480,6 @@ protected:
/// Profile tracker for different context.
std::unique_ptr<SampleContextTracker> ContextTracker;
- /// Name of the profile remapping file to load.
- std::string RemappingFilename;
-
/// Flag indicating whether input profile is context-sensitive
bool ProfileIsCS = false;
@@ -464,7 +524,7 @@ protected:
bool ProfAccForSymsInList;
// External inline advisor used to replay inline decision from remarks.
- std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
+ std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
// A pseudo probe helper to correlate the imported sample counts.
std::unique_ptr<PseudoProbeManager> ProbeManager;
@@ -953,8 +1013,24 @@ void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
}
void SampleProfileLoader::findExternalInlineCandidate(
- const FunctionSamples *Samples, DenseSet<GlobalValue::GUID> &InlinedGUIDs,
+ CallBase *CB, const FunctionSamples *Samples,
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
+
+ // If ExternalInlineAdvisor wants to inline an external function
+ // make sure it's imported
+ if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
+ // Samples may not exist for replayed function, if so
+ // just add the direct GUID and move on
+ if (!Samples) {
+ InlinedGUIDs.insert(
+ FunctionSamples::getGUID(CB->getCalledFunction()->getName()));
+ return;
+ }
+ // Otherwise, drop the threshold to import everything that we can
+ Threshold = 0;
+ }
+
assert(Samples && "expect non-null caller profile");
// For AutoFDO profile, retrieve candidate profiles by walking over
@@ -975,14 +1051,21 @@ void SampleProfileLoader::findExternalInlineCandidate(
// For CSSPGO profile, retrieve candidate profile by walking over the
// trie built for context profile. Note that also take call targets
// even if callee doesn't have a corresponding context profile.
- if (!CalleeSample || CalleeSample->getEntrySamples() < Threshold)
+ if (!CalleeSample)
+ continue;
+
+ // If pre-inliner decision is used, honor that for importing as well.
+ bool PreInline =
+ UsePreInlinerDecision &&
+ CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
+ if (!PreInline && CalleeSample->getEntrySamples() < Threshold)
continue;
StringRef Name = CalleeSample->getFuncName();
Function *Func = SymbolMap.lookup(Name);
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(Name));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
@@ -992,7 +1075,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
const Function *Callee = SymbolMap.lookup(CalleeName);
if (!Callee || Callee->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
}
// Import hot child context profile associted with callees. Note that this
@@ -1042,16 +1125,20 @@ bool SampleProfileLoader::inlineHotFunctions(
for (auto &I : BB.getInstList()) {
const FunctionSamples *FS = nullptr;
if (auto *CB = dyn_cast<CallBase>(&I)) {
- if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
- assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
- "GUIDToFuncNameMap has to be populated");
- AllCandidates.push_back(CB);
- if (FS->getEntrySamples() > 0 || ProfileIsCS)
- LocalNotInlinedCallSites.try_emplace(CB, FS);
- if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
- Hot = true;
- else if (shouldInlineColdCallee(*CB))
- ColdCandidates.push_back(CB);
+ if (!isa<IntrinsicInst>(I)) {
+ if ((FS = findCalleeFunctionSamples(*CB))) {
+ assert((!FunctionSamples::UseMD5 || FS->GUIDToFuncNameMap) &&
+ "GUIDToFuncNameMap has to be populated");
+ AllCandidates.push_back(CB);
+ if (FS->getEntrySamples() > 0 || ProfileIsCS)
+ LocalNotInlinedCallSites.try_emplace(CB, FS);
+ if (callsiteIsHot(FS, PSI, ProfAccForSymsInList))
+ Hot = true;
+ else if (shouldInlineColdCallee(*CB))
+ ColdCandidates.push_back(CB);
+ } else if (getExternalInlineAdvisorShouldInline(*CB)) {
+ AllCandidates.push_back(CB);
+ }
}
}
}
@@ -1078,7 +1165,7 @@ bool SampleProfileLoader::inlineHotFunctions(
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1098,8 +1185,8 @@ bool SampleProfileLoader::inlineHotFunctions(
LocalChanged = true;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(findCalleeFunctionSamples(*I), InlinedGUIDs,
- SymbolMap,
+ findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
+ InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
}
}
@@ -1184,8 +1271,8 @@ bool SampleProfileLoader::tryInlineCandidate(
*CalledFunction);
// The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
- true, CSINLINE_DEBUG);
+ emitInlinedIntoBasedOnCost(*ORE, DLoc, BB, *CalledFunction,
+ *BB->getParent(), Cost, true, CSINLINE_DEBUG);
// Now populate the list of newly exposed call sites.
if (InlinedCallSites) {
@@ -1228,7 +1315,9 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
// Find the callee's profile. For indirect call, find hottest target profile.
const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
- if (!CalleeSamples)
+ // If ExternalInlineAdvisor wants to inline this site, do so even
+ // if Samples are not present.
+ if (!CalleeSamples && !getExternalInlineAdvisorShouldInline(*CB))
return false;
float Factor = 1.0;
@@ -1247,19 +1336,34 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
return true;
}
-InlineCost
-SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+Optional<InlineCost>
+SampleProfileLoader::getExternalInlineAdvisorCost(CallBase &CB) {
std::unique_ptr<InlineAdvice> Advice = nullptr;
if (ExternalInlineAdvisor) {
- Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
- if (!Advice->isInliningRecommended()) {
- Advice->recordUnattemptedInlining();
- return InlineCost::getNever("not previously inlined");
+ Advice = ExternalInlineAdvisor->getAdvice(CB);
+ if (Advice) {
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ return InlineCost::getNever("not previously inlined");
+ }
+ Advice->recordInlining();
+ return InlineCost::getAlways("previously inlined");
}
- Advice->recordInlining();
- return InlineCost::getAlways("previously inlined");
}
+ return {};
+}
+
+bool SampleProfileLoader::getExternalInlineAdvisorShouldInline(CallBase &CB) {
+ Optional<InlineCost> Cost = getExternalInlineAdvisorCost(CB);
+ return Cost ? !!Cost.getValue() : false;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+ if (Optional<InlineCost> ReplayCost =
+ getExternalInlineAdvisorCost(*Candidate.CallInstr))
+ return ReplayCost.getValue();
// Adjust threshold based on call site hotness, only do this for callsite
// prioritized inliner because otherwise cost-benefit check is done earlier.
int SampleThreshold = SampleColdCallSiteThreshold;
@@ -1274,7 +1378,9 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
assert(Callee && "Expect a definition for inline candidate of direct call");
InlineParams Params = getInlineParams();
+ // We will ignore the threshold from inline cost, so always get full cost.
Params.ComputeFullInlineCost = true;
+ Params.AllowRecursiveCall = AllowRecursiveInline;
// Checks if there is anything in the reachable portion of the callee at
// this callsite that makes this inlining potentially illegal. Need to
// set ComputeFullInlineCost, otherwise getInlineCost may return early
@@ -1288,6 +1394,25 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
if (Cost.isNever() || Cost.isAlways())
return Cost;
+ // With CSSPGO, the preinliner in llvm-profgen can estimate global inline
+ // decisions based on hotness as well as accurate function byte sizes for
+ // given context using function/inlinee sizes from previous build. It
+ // stores the decision in profile, and also adjust/merge context profile
+ // aiming at better context-sensitive post-inline profile quality, assuming
+ // all inline decision estimates are going to be honored by compiler. Here
+ // we replay that inline decision under `sample-profile-use-preinliner`.
+ // Note that we don't need to handle negative decision from preinliner as
+ // context profile for not inlined calls are merged by preinliner already.
+ if (UsePreInlinerDecision && Candidate.CalleeSamples) {
+ // Once two node are merged due to promotion, we're losing some context
+ // so the original context-sensitive preinliner decision should be ignored
+ // for SyntheticContext.
+ SampleContext &Context = Candidate.CalleeSamples->getContext();
+ if (!Context.hasState(SyntheticContext) &&
+ Context.hasAttribute(ContextShouldBeInlined))
+ return InlineCost::getAlways("preinliner");
+ }
+
// For old FDO inliner, we inline the call site as long as cost is not
// "Never". The cost-benefit check is done earlier.
if (!CallsitePrioritizedInline) {
@@ -1357,7 +1482,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
for (const auto *FS : CalleeSamples) {
// TODO: Consider disable pre-lTO ICP for MonoLTO as well
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(FS, InlinedGUIDs, SymbolMap,
+ findExternalInlineCandidate(I, FS, InlinedGUIDs, SymbolMap,
PSI->getOrCompHotCountThreshold());
continue;
}
@@ -1405,8 +1530,9 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
Changed = true;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
- findExternalInlineCandidate(Candidate.CalleeSamples, InlinedGUIDs,
- SymbolMap, PSI->getOrCompHotCountThreshold());
+ findExternalInlineCandidate(I, findCalleeFunctionSamples(*I),
+ InlinedGUIDs, SymbolMap,
+ PSI->getOrCompHotCountThreshold());
}
}
@@ -1494,7 +1620,7 @@ void SampleProfileLoader::generateMDProfMetadata(Function &F) {
{static_cast<uint32_t>(BlockWeights[BB])}));
}
}
- } else if (OverwriteExistingWeights) {
+ } else if (OverwriteExistingWeights || ProfileSampleBlockAccurate) {
// Set profile metadata (possibly annotated by LTO prelink) to zero or
// clear it for cold code.
for (auto &I : BB->getInstList()) {
@@ -1792,11 +1918,13 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
if (FAM && !ProfileInlineReplayFile.empty()) {
- ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
- M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr, ProfileInlineReplayFile,
+ ExternalInlineAdvisor = getReplayInlineAdvisor(
+ M, *FAM, Ctx, /*OriginalAdvisor=*/nullptr,
+ ReplayInlinerSettings{ProfileInlineReplayFile,
+ ProfileInlineReplayScope,
+ ProfileInlineReplayFallback,
+ {ProfileInlineReplayFormat}},
/*EmitRemarks=*/false);
- if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
- ExternalInlineAdvisor.reset();
}
// Apply tweaks if context-sensitive profile is available.
@@ -1810,13 +1938,21 @@ bool SampleProfileLoader::doInitialization(Module &M,
if (!CallsitePrioritizedInline.getNumOccurrences())
CallsitePrioritizedInline = true;
+ // For CSSPGO, use preinliner decision by default when available.
+ if (!UsePreInlinerDecision.getNumOccurrences())
+ UsePreInlinerDecision = true;
+
+ // For CSSPGO, we also allow recursive inline to best use context profile.
+ if (!AllowRecursiveInline.getNumOccurrences())
+ AllowRecursiveInline = true;
+
// Enable iterative-BFI by default for CSSPGO.
if (!UseIterativeBFIInference.getNumOccurrences())
UseIterativeBFIInference = true;
// Tracker for profiles under different context
- ContextTracker =
- std::make_unique<SampleContextTracker>(Reader->getProfiles());
+ ContextTracker = std::make_unique<SampleContextTracker>(
+ Reader->getProfiles(), &GUIDToFuncNameMap);
}
// Load pseudo probe descriptors for probe-based function samples.
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 08d316337ef5..21395460bccb 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -415,9 +415,7 @@ void PseudoProbeUpdatePass::runOnFunction(Function &F,
FunctionAnalysisManager &FAM) {
BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto BBProfileCount = [&BFI](BasicBlock *BB) {
- return BFI.getBlockProfileCount(BB)
- ? BFI.getBlockProfileCount(BB).getValue()
- : 0;
+ return BFI.getBlockProfileCount(BB).getValueOr(0);
};
// Collect the sum of execution weight for each probe.
diff --git a/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 655a7a404951..0f2412dce1c9 100644
--- a/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -30,23 +30,20 @@ static bool stripDeadPrototypes(Module &M) {
bool MadeChange = false;
// Erase dead function prototypes.
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- Function *F = &*I++;
+ for (Function &F : llvm::make_early_inc_range(M)) {
// Function must be a prototype and unused.
- if (F->isDeclaration() && F->use_empty()) {
- F->eraseFromParent();
+ if (F.isDeclaration() && F.use_empty()) {
+ F.eraseFromParent();
++NumDeadPrototypes;
MadeChange = true;
}
}
// Erase dead global var prototypes.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ) {
- GlobalVariable *GV = &*I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
// Global must be a prototype and unused.
- if (GV->isDeclaration() && GV->use_empty())
- GV->eraseFromParent();
+ if (GV.isDeclaration() && GV.use_empty())
+ GV.eraseFromParent();
}
// Return an indication of whether we changed anything or not.
diff --git a/llvm/lib/Transforms/IPO/StripSymbols.cpp b/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 168740a1158e..9d4e9464f361 100644
--- a/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -214,13 +214,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
for (GlobalVariable &GV : M.globals()) {
- if (GV.hasLocalLinkage() && llvmUsedValues.count(&GV) == 0)
+ if (GV.hasLocalLinkage() && !llvmUsedValues.contains(&GV))
if (!PreserveDbgInfo || !GV.getName().startswith("llvm.dbg"))
GV.setName(""); // Internal symbols can't participate in linkage
}
for (Function &I : M) {
- if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0)
+ if (I.hasLocalLinkage() && !llvmUsedValues.contains(&I))
if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
I.setName(""); // Internal symbols can't participate in linkage
if (auto *Symtab = I.getValueSymbolTable())
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 37329b489555..0cc1b37844f6 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -33,6 +33,19 @@ using namespace llvm;
namespace {
+// Determine if a promotion alias should be created for a symbol name.
+static bool allowPromotionAlias(const std::string &Name) {
+ // Promotion aliases are used only in inline assembly. It's safe to
+ // simply skip unusual names. Subset of MCAsmInfo::isAcceptableChar()
+ // and MCAsmInfoXCOFF::isAcceptableChar().
+ for (const char &C : Name) {
+ if (isAlnum(C) || C == '_' || C == '.')
+ continue;
+ return false;
+ }
+ return true;
+}
+
// Promote each local-linkage entity defined by ExportM and used by ImportM by
// changing visibility and appending the given ModuleId.
void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
@@ -55,6 +68,7 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
}
}
+ std::string OldName = Name.str();
std::string NewName = (Name + ModuleId).str();
if (const auto *C = ExportGV.getComdat())
@@ -69,6 +83,13 @@ void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId,
ImportGV->setName(NewName);
ImportGV->setVisibility(GlobalValue::HiddenVisibility);
}
+
+ if (isa<Function>(&ExportGV) && allowPromotionAlias(OldName)) {
+ // Create a local alias with the original name to avoid breaking
+ // references from inline assembly.
+ std::string Alias = ".set " + OldName + "," + NewName + "\n";
+ ExportM.appendModuleInlineAsm(Alias);
+ }
}
if (!RenamedComdats.empty())
@@ -143,8 +164,7 @@ void simplifyExternals(Module &M) {
FunctionType *EmptyFT =
FunctionType::get(Type::getVoidTy(M.getContext()), false);
- for (auto I = M.begin(), E = M.end(); I != E;) {
- Function &F = *I++;
+ for (Function &F : llvm::make_early_inc_range(M)) {
if (F.isDeclaration() && F.use_empty()) {
F.eraseFromParent();
continue;
@@ -160,16 +180,15 @@ void simplifyExternals(Module &M) {
F.getAddressSpace(), "", &M);
NewF->copyAttributesFrom(&F);
// Only copy function attribtues.
- NewF->setAttributes(
- AttributeList::get(M.getContext(), AttributeList::FunctionIndex,
- F.getAttributes().getFnAttributes()));
+ NewF->setAttributes(AttributeList::get(M.getContext(),
+ AttributeList::FunctionIndex,
+ F.getAttributes().getFnAttrs()));
NewF->takeName(&F);
F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
F.eraseFromParent();
}
- for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
- GlobalVariable &GV = *I++;
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
if (GV.isDeclaration() && GV.use_empty()) {
GV.eraseFromParent();
continue;
@@ -304,7 +323,8 @@ void splitAndWriteThinLTOBitcode(
return true;
if (auto *F = dyn_cast<Function>(GV))
return EligibleVirtualFns.count(F);
- if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (auto *GVar =
+ dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))
return HasTypeMetadata(GVar);
return false;
}));
@@ -333,7 +353,7 @@ void splitAndWriteThinLTOBitcode(
// Remove all globals with type metadata, globals with comdats that live in
// MergedM, and aliases pointing to such globals from the thin LTO module.
filterModule(&M, [&](const GlobalValue *GV) {
- if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getBaseObject()))
+ if (auto *GVar = dyn_cast_or_null<GlobalVariable>(GV->getAliaseeObject()))
if (HasTypeMetadata(GVar))
return false;
if (const auto *C = GV->getComdat())
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 7a8946110785..61054e7ae46f 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -1288,7 +1288,7 @@ void DevirtModule::tryICallBranchFunnel(
M.getDataLayout().getProgramAddressSpace(),
"branch_funnel", &M);
}
- JT->addAttribute(1, Attribute::Nest);
+ JT->addParamAttr(0, Attribute::Nest);
std::vector<Value *> JTArgs;
JTArgs.push_back(JT->arg_begin());
@@ -1361,10 +1361,10 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
M.getContext(), ArrayRef<Attribute>{Attribute::get(
M.getContext(), Attribute::Nest)}));
for (unsigned I = 0; I + 2 < Attrs.getNumAttrSets(); ++I)
- NewArgAttrs.push_back(Attrs.getParamAttributes(I));
+ NewArgAttrs.push_back(Attrs.getParamAttrs(I));
NewCS->setAttributes(
- AttributeList::get(M.getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), NewArgAttrs));
+ AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), NewArgAttrs));
CB.replaceAllUsesWith(NewCS);
CB.eraseFromParent();
@@ -1786,10 +1786,8 @@ void DevirtModule::scanTypeTestUsers(
// points to a member of the type identifier %md. Group calls by (type ID,
// offset) pair (effectively the identity of the virtual function) and store
// to CallSlots.
- for (auto I = TypeTestFunc->use_begin(), E = TypeTestFunc->use_end();
- I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
@@ -1858,11 +1856,8 @@ void DevirtModule::scanTypeTestUsers(
void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
Function *TypeTestFunc = Intrinsic::getDeclaration(&M, Intrinsic::type_test);
- for (auto I = TypeCheckedLoadFunc->use_begin(),
- E = TypeCheckedLoadFunc->use_end();
- I != E;) {
- auto CI = dyn_cast<CallInst>(I->getUser());
- ++I;
+ for (Use &U : llvm::make_early_inc_range(TypeCheckedLoadFunc->uses())) {
+ auto *CI = dyn_cast<CallInst>(U.getUser());
if (!CI)
continue;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d01a021bf3f4..eb1b8a29cfc5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -939,7 +939,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
// add (xor X, LowMaskC), C --> sub (LowMaskC + C), X
if (C2->isMask()) {
KnownBits LHSKnown = computeKnownBits(X, 0, &Add);
- if ((*C2 | LHSKnown.Zero).isAllOnesValue())
+ if ((*C2 | LHSKnown.Zero).isAllOnes())
return BinaryOperator::CreateSub(ConstantInt::get(Ty, *C2 + *C), X);
}
@@ -963,7 +963,7 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
}
}
- if (C->isOneValue() && Op0->hasOneUse()) {
+ if (C->isOne() && Op0->hasOneUse()) {
// add (sext i1 X), 1 --> zext (not X)
// TODO: The smallest IR representation is (select X, 0, 1), and that would
// not require the one-use check. But we need to remove a transform in
@@ -1355,6 +1355,17 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (match(RHS, m_OneUse(m_c_Add(m_Value(A), m_Specific(LHS)))))
return BinaryOperator::CreateAdd(A, Builder.CreateShl(LHS, 1, "reass.add"));
+ {
+ // (A + C1) + (C2 - B) --> (A - B) + (C1 + C2)
+ Constant *C1, *C2;
+ if (match(&I, m_c_Add(m_Add(m_Value(A), m_ImmConstant(C1)),
+ m_Sub(m_ImmConstant(C2), m_Value(B)))) &&
+ (LHS->hasOneUse() || RHS->hasOneUse())) {
+ Value *Sub = Builder.CreateSub(A, B);
+ return BinaryOperator::CreateAdd(Sub, ConstantExpr::getAdd(C1, C2));
+ }
+ }
+
// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V);
@@ -1817,12 +1828,8 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
if (match(Op0, m_AllOnes()))
return BinaryOperator::CreateNot(Op1);
- // (~X) - (~Y) --> Y - X
- Value *X, *Y;
- if (match(Op0, m_Not(m_Value(X))) && match(Op1, m_Not(m_Value(Y))))
- return BinaryOperator::CreateSub(Y, X);
-
// (X + -1) - Y --> ~Y + X
+ Value *X, *Y;
if (match(Op0, m_OneUse(m_Add(m_Value(X), m_AllOnes()))))
return BinaryOperator::CreateAdd(Builder.CreateNot(Op1), X);
@@ -1843,6 +1850,17 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateSub(X, Add);
}
+ // (~X) - (~Y) --> Y - X
+ // This is placed after the other reassociations and explicitly excludes a
+ // sub-of-sub pattern to avoid infinite looping.
+ if (isFreeToInvert(Op0, Op0->hasOneUse()) &&
+ isFreeToInvert(Op1, Op1->hasOneUse()) &&
+ !match(Op0, m_Sub(m_ImmConstant(), m_Value()))) {
+ Value *NotOp0 = Builder.CreateNot(Op0);
+ Value *NotOp1 = Builder.CreateNot(Op1);
+ return BinaryOperator::CreateSub(NotOp1, NotOp0);
+ }
+
auto m_AddRdx = [](Value *&Vec) {
return m_OneUse(m_Intrinsic<Intrinsic::vector_reduce_add>(m_Value(Vec)));
};
@@ -1892,7 +1910,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
KnownBits RHSKnown = computeKnownBits(Op1, 0, &I);
- if ((*Op0C | RHSKnown.Zero).isAllOnesValue())
+ if ((*Op0C | RHSKnown.Zero).isAllOnes())
return BinaryOperator::CreateXor(Op1, Op0);
}
@@ -2039,12 +2057,31 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return BinaryOperator::CreateAnd(
Op0, Builder.CreateNot(Y, Y->getName() + ".not"));
+ // ~X - Min/Max(~X, Y) -> ~Min/Max(X, ~Y) - X
+ // ~X - Min/Max(Y, ~X) -> ~Min/Max(X, ~Y) - X
+ // Min/Max(~X, Y) - ~X -> X - ~Min/Max(X, ~Y)
+ // Min/Max(Y, ~X) - ~X -> X - ~Min/Max(X, ~Y)
+ // As long as Y is freely invertible, this will be neutral or a win.
+ // Note: We don't generate the inverse max/min, just create the 'not' of
+ // it and let other folds do the rest.
+ if (match(Op0, m_Not(m_Value(X))) &&
+ match(Op1, m_c_MaxOrMin(m_Specific(Op0), m_Value(Y))) &&
+ !Op0->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *Not = Builder.CreateNot(Op1);
+ return BinaryOperator::CreateSub(Not, X);
+ }
+ if (match(Op1, m_Not(m_Value(X))) &&
+ match(Op0, m_c_MaxOrMin(m_Specific(Op1), m_Value(Y))) &&
+ !Op1->hasNUsesOrMore(3) && isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *Not = Builder.CreateNot(Op0);
+ return BinaryOperator::CreateSub(X, Not);
+ }
+
+ // TODO: This is the same logic as above but handles the cmp-select idioms
+ // for min/max, so the use checks are increased to account for the
+ // extra instructions. If we canonicalize to intrinsics, this block
+ // can likely be removed.
{
- // ~A - Min/Max(~A, O) -> Max/Min(A, ~O) - A
- // ~A - Min/Max(O, ~A) -> Max/Min(A, ~O) - A
- // Min/Max(~A, O) - ~A -> A - Max/Min(A, ~O)
- // Min/Max(O, ~A) - ~A -> A - Max/Min(A, ~O)
- // So long as O here is freely invertible, this will be neutral or a win.
Value *LHS, *RHS, *A;
Value *NotA = Op0, *MinMax = Op1;
SelectPatternFlavor SPF = matchSelectPattern(MinMax, LHS, RHS).Flavor;
@@ -2057,12 +2094,10 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
match(NotA, m_Not(m_Value(A))) && (NotA == LHS || NotA == RHS)) {
if (NotA == LHS)
std::swap(LHS, RHS);
- // LHS is now O above and expected to have at least 2 uses (the min/max)
- // NotA is epected to have 2 uses from the min/max and 1 from the sub.
+ // LHS is now Y above and expected to have at least 2 uses (the min/max)
+ // NotA is expected to have 2 uses from the min/max and 1 from the sub.
if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
!NotA->hasNUsesOrMore(4)) {
- // Note: We don't generate the inverse max/min, just create the not of
- // it and let other folds do the rest.
Value *Not = Builder.CreateNot(MinMax);
if (NotA == Op0)
return BinaryOperator::CreateSub(Not, A);
@@ -2119,7 +2154,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
unsigned BitWidth = Ty->getScalarSizeInBits();
unsigned Cttz = AddC->countTrailingZeros();
APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz));
- if ((HighMask & *AndC).isNullValue())
+ if ((HighMask & *AndC).isZero())
return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC)));
}
@@ -2133,6 +2168,19 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return replaceInstUsesWith(
I, Builder.CreateIntrinsic(Intrinsic::umin, {I.getType()}, {Op0, Y}));
+ // umax(X, Op1) - Op1 --> usub.sat(X, Op1)
+ // TODO: The one-use restriction is not strictly necessary, but it may
+ // require improving other pattern matching and/or codegen.
+ if (match(Op0, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op1)))))
+ return replaceInstUsesWith(
+ I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));
+
+ // Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op0)
+ if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {
+ Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0});
+ return BinaryOperator::CreateNeg(USub);
+ }
+
// C - ctpop(X) => ctpop(~X) if C is bitwidth
if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) &&
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))))
@@ -2173,8 +2221,8 @@ static Instruction *foldFNegIntoConstant(Instruction &I) {
// TODO: We could propagate nsz/ninf from fdiv alone?
FastMathFlags FMF = I.getFastMathFlags();
FastMathFlags OpFMF = FNegOp->getFastMathFlags();
- FDiv->setHasNoSignedZeros(FMF.noSignedZeros() & OpFMF.noSignedZeros());
- FDiv->setHasNoInfs(FMF.noInfs() & OpFMF.noInfs());
+ FDiv->setHasNoSignedZeros(FMF.noSignedZeros() && OpFMF.noSignedZeros());
+ FDiv->setHasNoInfs(FMF.noInfs() && OpFMF.noInfs());
return FDiv;
}
// With NSZ [ counter-example with -0.0: -(-0.0 + 0.0) != 0.0 + -0.0 ]:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 120852c44474..06c9bf650f37 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -185,14 +185,15 @@ enum MaskedICmpType {
/// satisfies.
static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
ICmpInst::Predicate Pred) {
- ConstantInt *ACst = dyn_cast<ConstantInt>(A);
- ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+ const APInt *ConstA = nullptr, *ConstB = nullptr, *ConstC = nullptr;
+ match(A, m_APInt(ConstA));
+ match(B, m_APInt(ConstB));
+ match(C, m_APInt(ConstC));
bool IsEq = (Pred == ICmpInst::ICMP_EQ);
- bool IsAPow2 = (ACst && !ACst->isZero() && ACst->getValue().isPowerOf2());
- bool IsBPow2 = (BCst && !BCst->isZero() && BCst->getValue().isPowerOf2());
+ bool IsAPow2 = ConstA && ConstA->isPowerOf2();
+ bool IsBPow2 = ConstB && ConstB->isPowerOf2();
unsigned MaskVal = 0;
- if (CCst && CCst->isZero()) {
+ if (ConstC && ConstC->isZero()) {
// if C is zero, then both A and B qualify as mask
MaskVal |= (IsEq ? (Mask_AllZeros | AMask_Mixed | BMask_Mixed)
: (Mask_NotAllZeros | AMask_NotMixed | BMask_NotMixed));
@@ -211,7 +212,7 @@ static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
if (IsAPow2)
MaskVal |= (IsEq ? (Mask_NotAllZeros | AMask_NotMixed)
: (Mask_AllZeros | AMask_Mixed));
- } else if (ACst && CCst && ConstantExpr::getAnd(ACst, CCst) == CCst) {
+ } else if (ConstA && ConstC && ConstC->isSubsetOf(*ConstA)) {
MaskVal |= (IsEq ? AMask_Mixed : AMask_NotMixed);
}
@@ -221,7 +222,7 @@ static unsigned getMaskedICmpType(Value *A, Value *B, Value *C,
if (IsBPow2)
MaskVal |= (IsEq ? (Mask_NotAllZeros | BMask_NotMixed)
: (Mask_AllZeros | BMask_Mixed));
- } else if (BCst && CCst && ConstantExpr::getAnd(BCst, CCst) == CCst) {
+ } else if (ConstB && ConstC && ConstC->isSubsetOf(*ConstB)) {
MaskVal |= (IsEq ? BMask_Mixed : BMask_NotMixed);
}
@@ -269,9 +270,9 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
ICmpInst *RHS,
ICmpInst::Predicate &PredL,
ICmpInst::Predicate &PredR) {
- // vectors are not (yet?) supported. Don't support pointers either.
- if (!LHS->getOperand(0)->getType()->isIntegerTy() ||
- !RHS->getOperand(0)->getType()->isIntegerTy())
+ // Don't allow pointers. Splat vectors are fine.
+ if (!LHS->getOperand(0)->getType()->isIntOrIntVectorTy() ||
+ !RHS->getOperand(0)->getType()->isIntOrIntVectorTy())
return None;
// Here comes the tricky part:
@@ -367,9 +368,9 @@ getMaskedTypeForICmpPair(Value *&A, Value *&B, Value *&C,
} else {
return None;
}
+
+ assert(Ok && "Failed to find AND on the right side of the RHS icmp.");
}
- if (!Ok)
- return None;
if (L11 == A) {
B = L12;
@@ -619,8 +620,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// Remaining cases assume at least that B and D are constant, and depend on
// their actual values. This isn't strictly necessary, just a "handle the
// easy cases for now" decision.
- ConstantInt *BCst, *DCst;
- if (!match(B, m_ConstantInt(BCst)) || !match(D, m_ConstantInt(DCst)))
+ const APInt *ConstB, *ConstD;
+ if (!match(B, m_APInt(ConstB)) || !match(D, m_APInt(ConstD)))
return nullptr;
if (Mask & (Mask_NotAllZeros | BMask_NotAllOnes)) {
@@ -629,11 +630,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// -> (icmp ne (A & B), 0) or (icmp ne (A & D), 0)
// Only valid if one of the masks is a superset of the other (check "B&D" is
// the same as either B or D).
- APInt NewMask = BCst->getValue() & DCst->getValue();
-
- if (NewMask == BCst->getValue())
+ APInt NewMask = *ConstB & *ConstD;
+ if (NewMask == *ConstB)
return LHS;
- else if (NewMask == DCst->getValue())
+ else if (NewMask == *ConstD)
return RHS;
}
@@ -642,11 +642,10 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// -> (icmp ne (A & B), A) or (icmp ne (A & D), A)
// Only valid if one of the masks is a superset of the other (check "B|D" is
// the same as either B or D).
- APInt NewMask = BCst->getValue() | DCst->getValue();
-
- if (NewMask == BCst->getValue())
+ APInt NewMask = *ConstB | *ConstD;
+ if (NewMask == *ConstB)
return LHS;
- else if (NewMask == DCst->getValue())
+ else if (NewMask == *ConstD)
return RHS;
}
@@ -661,23 +660,21 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// We can't simply use C and E because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set.
- ConstantInt *CCst, *ECst;
- if (!match(C, m_ConstantInt(CCst)) || !match(E, m_ConstantInt(ECst)))
+ const APInt *OldConstC, *OldConstE;
+ if (!match(C, m_APInt(OldConstC)) || !match(E, m_APInt(OldConstE)))
return nullptr;
- if (PredL != NewCC)
- CCst = cast<ConstantInt>(ConstantExpr::getXor(BCst, CCst));
- if (PredR != NewCC)
- ECst = cast<ConstantInt>(ConstantExpr::getXor(DCst, ECst));
+
+ const APInt ConstC = PredL != NewCC ? *ConstB ^ *OldConstC : *OldConstC;
+ const APInt ConstE = PredR != NewCC ? *ConstD ^ *OldConstE : *OldConstE;
// If there is a conflict, we should actually return a false for the
// whole construct.
- if (((BCst->getValue() & DCst->getValue()) &
- (CCst->getValue() ^ ECst->getValue())).getBoolValue())
+ if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue())
return ConstantInt::get(LHS->getType(), !IsAnd);
Value *NewOr1 = Builder.CreateOr(B, D);
- Value *NewOr2 = ConstantExpr::getOr(CCst, ECst);
Value *NewAnd = Builder.CreateAnd(A, NewOr1);
+ Constant *NewOr2 = ConstantInt::get(A->getType(), ConstC | ConstE);
return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
}
@@ -777,20 +774,6 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS,
return Builder.CreateICmp(Pred, Or, ConstantInt::get(X->getType(), *C2));
}
- // Special case: get the ordering right when the values wrap around zero.
- // Ie, we assumed the constants were unsigned when swapping earlier.
- if (C1->isNullValue() && C2->isAllOnesValue())
- std::swap(C1, C2);
-
- if (*C1 == *C2 - 1) {
- // (X == 13 || X == 14) --> X - 13 <=u 1
- // (X != 13 && X != 14) --> X - 13 >u 1
- // An 'add' is the canonical IR form, so favor that over a 'sub'.
- Value *Add = Builder.CreateAdd(X, ConstantInt::get(X->getType(), -(*C1)));
- auto NewPred = JoinedByAnd ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_ULE;
- return Builder.CreateICmp(NewPred, Add, ConstantInt::get(X->getType(), 1));
- }
-
return nullptr;
}
@@ -923,7 +906,7 @@ static Value *foldSignedTruncationCheck(ICmpInst *ICmp0, ICmpInst *ICmp1,
if (!tryToDecompose(OtherICmp, X0, UnsetBitsMask))
return nullptr;
- assert(!UnsetBitsMask.isNullValue() && "empty mask makes no sense.");
+ assert(!UnsetBitsMask.isZero() && "empty mask makes no sense.");
// Are they working on the same value?
Value *X;
@@ -1113,8 +1096,8 @@ static Value *extractIntPart(const IntPart &P, IRBuilderBase &Builder) {
/// (icmp eq X0, Y0) & (icmp eq X1, Y1) -> icmp eq X01, Y01
/// (icmp ne X0, Y0) | (icmp ne X1, Y1) -> icmp ne X01, Y01
/// where X0, X1 and Y0, Y1 are adjacent parts extracted from an integer.
-static Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd,
- InstCombiner::BuilderTy &Builder) {
+Value *InstCombinerImpl::foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1,
+ bool IsAnd) {
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
return nullptr;
@@ -1202,6 +1185,51 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
return Builder.CreateBinOp(Logic.getOpcode(), Cmp0, SubstituteCmp);
}
+/// Fold (icmp Pred1 V1, C1) & (icmp Pred2 V2, C2)
+/// or (icmp Pred1 V1, C1) | (icmp Pred2 V2, C2)
+/// into a single comparison using range-based reasoning.
+static Value *foldAndOrOfICmpsUsingRanges(
+ ICmpInst::Predicate Pred1, Value *V1, const APInt &C1,
+ ICmpInst::Predicate Pred2, Value *V2, const APInt &C2,
+ IRBuilderBase &Builder, bool IsAnd) {
+ // Look through add of a constant offset on V1, V2, or both operands. This
+ // allows us to interpret the V + C' < C'' range idiom into a proper range.
+ const APInt *Offset1 = nullptr, *Offset2 = nullptr;
+ if (V1 != V2) {
+ Value *X;
+ if (match(V1, m_Add(m_Value(X), m_APInt(Offset1))))
+ V1 = X;
+ if (match(V2, m_Add(m_Value(X), m_APInt(Offset2))))
+ V2 = X;
+ }
+
+ if (V1 != V2)
+ return nullptr;
+
+ ConstantRange CR1 = ConstantRange::makeExactICmpRegion(Pred1, C1);
+ if (Offset1)
+ CR1 = CR1.subtract(*Offset1);
+
+ ConstantRange CR2 = ConstantRange::makeExactICmpRegion(Pred2, C2);
+ if (Offset2)
+ CR2 = CR2.subtract(*Offset2);
+
+ Optional<ConstantRange> CR =
+ IsAnd ? CR1.exactIntersectWith(CR2) : CR1.exactUnionWith(CR2);
+ if (!CR)
+ return nullptr;
+
+ CmpInst::Predicate NewPred;
+ APInt NewC, Offset;
+ CR->getEquivalentICmp(NewPred, NewC, Offset);
+
+ Type *Ty = V1->getType();
+ Value *NewV = V1;
+ if (Offset != 0)
+ NewV = Builder.CreateAdd(NewV, ConstantInt::get(Ty, Offset));
+ return Builder.CreateICmp(NewPred, NewV, ConstantInt::get(Ty, NewC));
+}
+
/// Fold (icmp)&(icmp) if possible.
Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
BinaryOperator &And) {
@@ -1262,170 +1290,64 @@ Value *InstCombinerImpl::foldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS,
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/true, Q, Builder))
return X;
- if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true, Builder))
+ if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/true))
return X;
// This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
- ConstantInt *LHSC, *RHSC;
- if (!match(LHS->getOperand(1), m_ConstantInt(LHSC)) ||
- !match(RHS->getOperand(1), m_ConstantInt(RHSC)))
- return nullptr;
-
- if (LHSC == RHSC && PredL == PredR) {
- // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
- // where C is a power of 2 or
- // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
- if ((PredL == ICmpInst::ICMP_ULT && LHSC->getValue().isPowerOf2()) ||
- (PredL == ICmpInst::ICMP_EQ && LHSC->isZero())) {
- Value *NewOr = Builder.CreateOr(LHS0, RHS0);
- return Builder.CreateICmp(PredL, NewOr, LHSC);
- }
+ // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+ // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs.
+ if (PredL == ICmpInst::ICMP_EQ && match(LHS->getOperand(1), m_ZeroInt()) &&
+ PredR == ICmpInst::ICMP_EQ && match(RHS->getOperand(1), m_ZeroInt()) &&
+ LHS0->getType() == RHS0->getType()) {
+ Value *NewOr = Builder.CreateOr(LHS0, RHS0);
+ return Builder.CreateICmp(PredL, NewOr,
+ Constant::getNullValue(NewOr->getType()));
}
+ const APInt *LHSC, *RHSC;
+ if (!match(LHS->getOperand(1), m_APInt(LHSC)) ||
+ !match(RHS->getOperand(1), m_APInt(RHSC)))
+ return nullptr;
+
// (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
// where CMAX is the all ones value for the truncated type,
// iff the lower bits of C2 and CA are zero.
if (PredL == ICmpInst::ICMP_EQ && PredL == PredR && LHS->hasOneUse() &&
RHS->hasOneUse()) {
Value *V;
- ConstantInt *AndC, *SmallC = nullptr, *BigC = nullptr;
+ const APInt *AndC, *SmallC = nullptr, *BigC = nullptr;
// (trunc x) == C1 & (and x, CA) == C2
// (and x, CA) == C2 & (trunc x) == C1
if (match(RHS0, m_Trunc(m_Value(V))) &&
- match(LHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
+ match(LHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
SmallC = RHSC;
BigC = LHSC;
} else if (match(LHS0, m_Trunc(m_Value(V))) &&
- match(RHS0, m_And(m_Specific(V), m_ConstantInt(AndC)))) {
+ match(RHS0, m_And(m_Specific(V), m_APInt(AndC)))) {
SmallC = LHSC;
BigC = RHSC;
}
if (SmallC && BigC) {
- unsigned BigBitSize = BigC->getType()->getBitWidth();
- unsigned SmallBitSize = SmallC->getType()->getBitWidth();
+ unsigned BigBitSize = BigC->getBitWidth();
+ unsigned SmallBitSize = SmallC->getBitWidth();
// Check that the low bits are zero.
APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize);
- if ((Low & AndC->getValue()).isNullValue() &&
- (Low & BigC->getValue()).isNullValue()) {
- Value *NewAnd = Builder.CreateAnd(V, Low | AndC->getValue());
- APInt N = SmallC->getValue().zext(BigBitSize) | BigC->getValue();
- Value *NewVal = ConstantInt::get(AndC->getType()->getContext(), N);
+ if ((Low & *AndC).isZero() && (Low & *BigC).isZero()) {
+ Value *NewAnd = Builder.CreateAnd(V, Low | *AndC);
+ APInt N = SmallC->zext(BigBitSize) | *BigC;
+ Value *NewVal = ConstantInt::get(NewAnd->getType(), N);
return Builder.CreateICmp(PredL, NewAnd, NewVal);
}
}
}
- // From here on, we only handle:
- // (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
- if (LHS0 != RHS0)
- return nullptr;
-
- // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
- if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
- PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
- PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
- PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
- return nullptr;
-
- // We can't fold (ugt x, C) & (sgt x, C2).
- if (!predicatesFoldable(PredL, PredR))
- return nullptr;
-
- // Ensure that the larger constant is on the RHS.
- bool ShouldSwap;
- if (CmpInst::isSigned(PredL) ||
- (ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
- ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
- else
- ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
-
- if (ShouldSwap) {
- std::swap(LHS, RHS);
- std::swap(LHSC, RHSC);
- std::swap(PredL, PredR);
- }
-
- // At this point, we know we have two icmp instructions
- // comparing a value against two constants and and'ing the result
- // together. Because of the above check, we know that we only have
- // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know
- // (from the icmp folding check above), that the two constants
- // are not equal and that the larger constant is on the RHS
- assert(LHSC != RHSC && "Compares not folded above?");
-
- switch (PredL) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_ULT:
- // (X != 13 & X u< 14) -> X < 13
- if (LHSC->getValue() == (RHSC->getValue() - 1))
- return Builder.CreateICmpULT(LHS0, LHSC);
- if (LHSC->isZero()) // (X != 0 & X u< C) -> X-1 u< C-1
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- false, true);
- break; // (X != 13 & X u< 15) -> no change
- case ICmpInst::ICMP_SLT:
- // (X != 13 & X s< 14) -> X < 13
- if (LHSC->getValue() == (RHSC->getValue() - 1))
- return Builder.CreateICmpSLT(LHS0, LHSC);
- // (X != INT_MIN & X s< C) -> X-(INT_MIN+1) u< (C-(INT_MIN+1))
- if (LHSC->isMinValue(true))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- true, true);
- break; // (X != 13 & X s< 15) -> no change
- case ICmpInst::ICMP_NE:
- // Potential folds for this case should already be handled.
- break;
- }
- break;
- case ICmpInst::ICMP_UGT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE:
- // (X u> 13 & X != 14) -> X u> 14
- if (RHSC->getValue() == (LHSC->getValue() + 1))
- return Builder.CreateICmp(PredL, LHS0, RHSC);
- // X u> C & X != UINT_MAX -> (X-(C+1)) u< UINT_MAX-(C+1)
- if (RHSC->isMaxValue(false))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- false, true);
- break; // (X u> 13 & X != 15) -> no change
- case ICmpInst::ICMP_ULT: // (X u> 13 & X u< 15) -> (X-14) u< 1
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- false, true);
- }
- break;
- case ICmpInst::ICMP_SGT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_NE:
- // (X s> 13 & X != 14) -> X s> 14
- if (RHSC->getValue() == (LHSC->getValue() + 1))
- return Builder.CreateICmp(PredL, LHS0, RHSC);
- // X s> C & X != INT_MAX -> (X-(C+1)) u< INT_MAX-(C+1)
- if (RHSC->isMaxValue(true))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(),
- true, true);
- break; // (X s> 13 & X != 15) -> no change
- case ICmpInst::ICMP_SLT: // (X s> 13 & X s< 15) -> (X-14) u< 1
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue(), true,
- true);
- }
- break;
- }
-
- return nullptr;
+ return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC,
+ Builder, /* IsAnd */ true);
}
Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
@@ -1496,15 +1418,15 @@ static Instruction *reassociateFCmps(BinaryOperator &BO,
std::swap(Op0, Op1);
// Match inner binop and the predicate for combining 2 NAN checks into 1.
- BinaryOperator *BO1;
+ Value *BO10, *BO11;
FCmpInst::Predicate NanPred = Opcode == Instruction::And ? FCmpInst::FCMP_ORD
: FCmpInst::FCMP_UNO;
if (!match(Op0, m_FCmp(Pred, m_Value(X), m_AnyZeroFP())) || Pred != NanPred ||
- !match(Op1, m_BinOp(BO1)) || BO1->getOpcode() != Opcode)
+ !match(Op1, m_BinOp(Opcode, m_Value(BO10), m_Value(BO11))))
return nullptr;
// The inner logic op must have a matching fcmp operand.
- Value *BO10 = BO1->getOperand(0), *BO11 = BO1->getOperand(1), *Y;
+ Value *Y;
if (!match(BO10, m_FCmp(Pred, m_Value(Y), m_AnyZeroFP())) ||
Pred != NanPred || X->getType() != Y->getType())
std::swap(BO10, BO11);
@@ -1524,27 +1446,42 @@ static Instruction *reassociateFCmps(BinaryOperator &BO,
return BinaryOperator::Create(Opcode, NewFCmp, BO11);
}
-/// Match De Morgan's Laws:
+/// Match variations of De Morgan's Laws:
/// (~A & ~B) == (~(A | B))
/// (~A | ~B) == (~(A & B))
static Instruction *matchDeMorgansLaws(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
- auto Opcode = I.getOpcode();
+ const Instruction::BinaryOps Opcode = I.getOpcode();
assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
"Trying to match De Morgan's Laws with something other than and/or");
// Flip the logic operation.
- Opcode = (Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
+ const Instruction::BinaryOps FlippedOpcode =
+ (Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Value *A, *B;
- if (match(I.getOperand(0), m_OneUse(m_Not(m_Value(A)))) &&
- match(I.getOperand(1), m_OneUse(m_Not(m_Value(B)))) &&
+ if (match(Op0, m_OneUse(m_Not(m_Value(A)))) &&
+ match(Op1, m_OneUse(m_Not(m_Value(B)))) &&
!InstCombiner::isFreeToInvert(A, A->hasOneUse()) &&
!InstCombiner::isFreeToInvert(B, B->hasOneUse())) {
- Value *AndOr = Builder.CreateBinOp(Opcode, A, B, I.getName() + ".demorgan");
+ Value *AndOr =
+ Builder.CreateBinOp(FlippedOpcode, A, B, I.getName() + ".demorgan");
return BinaryOperator::CreateNot(AndOr);
}
+ // The 'not' ops may require reassociation.
+ // (A & ~B) & ~C --> A & ~(B | C)
+ // (~B & A) & ~C --> A & ~(B | C)
+ // (A | ~B) | ~C --> A | ~(B & C)
+ // (~B | A) | ~C --> A | ~(B & C)
+ Value *C;
+ if (match(Op0, m_OneUse(m_c_BinOp(Opcode, m_Value(A), m_Not(m_Value(B))))) &&
+ match(Op1, m_Not(m_Value(C)))) {
+ Value *FlippedBO = Builder.CreateBinOp(FlippedOpcode, B, C);
+ return BinaryOperator::Create(Opcode, A, Builder.CreateNot(FlippedBO));
+ }
+
return nullptr;
}
@@ -1778,6 +1715,72 @@ Instruction *InstCombinerImpl::narrowMaskedBinOp(BinaryOperator &And) {
return new ZExtInst(Builder.CreateAnd(NewBO, X), Ty);
}
+/// Try folding relatively complex patterns for both And and Or operations
+/// with all And and Or swapped.
+static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ const Instruction::BinaryOps Opcode = I.getOpcode();
+ assert(Opcode == Instruction::And || Opcode == Instruction::Or);
+
+ // Flip the logic operation.
+ const Instruction::BinaryOps FlippedOpcode =
+ (Opcode == Instruction::And) ? Instruction::Or : Instruction::And;
+
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Value *A, *B, *C;
+
+ // (~(A | B) & C) | ... --> ...
+ // (~(A & B) | C) & ... --> ...
+ // TODO: One use checks are conservative. We just need to check that a total
+ // number of multiple used values does not exceed reduction
+ // in operations.
+ if (match(Op0, m_c_BinOp(FlippedOpcode,
+ m_Not(m_BinOp(Opcode, m_Value(A), m_Value(B))),
+ m_Value(C)))) {
+ // (~(A | B) & C) | (~(A | C) & B) --> (B ^ C) & ~A
+ // (~(A & B) | C) & (~(A & C) | B) --> ~((B ^ C) & A)
+ if (match(Op1,
+ m_OneUse(m_c_BinOp(FlippedOpcode,
+ m_OneUse(m_Not(m_c_BinOp(Opcode, m_Specific(A),
+ m_Specific(C)))),
+ m_Specific(B))))) {
+ Value *Xor = Builder.CreateXor(B, C);
+ return (Opcode == Instruction::Or)
+ ? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(A))
+ : BinaryOperator::CreateNot(Builder.CreateAnd(Xor, A));
+ }
+
+ // (~(A | B) & C) | (~(B | C) & A) --> (A ^ C) & ~B
+ // (~(A & B) | C) & (~(B & C) | A) --> ~((A ^ C) & B)
+ if (match(Op1,
+ m_OneUse(m_c_BinOp(FlippedOpcode,
+ m_OneUse(m_Not(m_c_BinOp(Opcode, m_Specific(B),
+ m_Specific(C)))),
+ m_Specific(A))))) {
+ Value *Xor = Builder.CreateXor(A, C);
+ return (Opcode == Instruction::Or)
+ ? BinaryOperator::CreateAnd(Xor, Builder.CreateNot(B))
+ : BinaryOperator::CreateNot(Builder.CreateAnd(Xor, B));
+ }
+
+ // (~(A | B) & C) | ~(A | C) --> ~((B & C) | A)
+ // (~(A & B) | C) & ~(A & C) --> ~((B | C) & A)
+ if (match(Op1, m_OneUse(m_Not(m_OneUse(
+ m_c_BinOp(Opcode, m_Specific(A), m_Specific(C)))))))
+ return BinaryOperator::CreateNot(Builder.CreateBinOp(
+ Opcode, Builder.CreateBinOp(FlippedOpcode, B, C), A));
+
+ // (~(A | B) & C) | ~(B | C) --> ~((A & C) | B)
+ // (~(A & B) | C) & ~(B & C) --> ~((A | C) & B)
+ if (match(Op1, m_OneUse(m_Not(m_OneUse(
+ m_c_BinOp(Opcode, m_Specific(B), m_Specific(C)))))))
+ return BinaryOperator::CreateNot(Builder.CreateBinOp(
+ Opcode, Builder.CreateBinOp(FlippedOpcode, A, C), B));
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -1803,6 +1806,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *Xor = foldAndToXor(I, Builder))
return Xor;
+ if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
+ return X;
+
// (A|B)&(A|C) -> A|(B&C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
@@ -1883,7 +1889,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
// (X + AddC) & LowMaskC --> X & LowMaskC
unsigned Ctlz = C->countLeadingZeros();
APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz));
- if ((*AddC & LowMask).isNullValue())
+ if ((*AddC & LowMask).isZero())
return BinaryOperator::CreateAnd(X, Op1);
// If we are masking the result of the add down to exactly one bit and
@@ -1896,44 +1902,37 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return BinaryOperator::CreateXor(NewAnd, Op1);
}
}
- }
- ConstantInt *AndRHS;
- if (match(Op1, m_ConstantInt(AndRHS))) {
- const APInt &AndRHSMask = AndRHS->getValue();
-
- // Optimize a variety of ((val OP C1) & C2) combinations...
- if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
- // ((C1 OP zext(X)) & C2) -> zext((C1-X) & C2) if C2 fits in the bitwidth
- // of X and OP behaves well when given trunc(C1) and X.
- // TODO: Do this for vectors by using m_APInt instead of m_ConstantInt.
- switch (Op0I->getOpcode()) {
- default:
- break;
+ // ((C1 OP zext(X)) & C2) -> zext((C1 OP X) & C2) if C2 fits in the
+ // bitwidth of X and OP behaves well when given trunc(C1) and X.
+ auto isSuitableBinOpcode = [](BinaryOperator *B) {
+ switch (B->getOpcode()) {
case Instruction::Xor:
case Instruction::Or:
case Instruction::Mul:
case Instruction::Add:
case Instruction::Sub:
- Value *X;
- ConstantInt *C1;
- // TODO: The one use restrictions could be relaxed a little if the AND
- // is going to be removed.
- if (match(Op0I, m_OneUse(m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))),
- m_ConstantInt(C1))))) {
- if (AndRHSMask.isIntN(X->getType()->getScalarSizeInBits())) {
- auto *TruncC1 = ConstantExpr::getTrunc(C1, X->getType());
- Value *BinOp;
- Value *Op0LHS = Op0I->getOperand(0);
- if (isa<ZExtInst>(Op0LHS))
- BinOp = Builder.CreateBinOp(Op0I->getOpcode(), X, TruncC1);
- else
- BinOp = Builder.CreateBinOp(Op0I->getOpcode(), TruncC1, X);
- auto *TruncC2 = ConstantExpr::getTrunc(AndRHS, X->getType());
- auto *And = Builder.CreateAnd(BinOp, TruncC2);
- return new ZExtInst(And, Ty);
- }
- }
+ return true;
+ default:
+ return false;
+ }
+ };
+ BinaryOperator *BO;
+ if (match(Op0, m_OneUse(m_BinOp(BO))) && isSuitableBinOpcode(BO)) {
+ Value *X;
+ const APInt *C1;
+ // TODO: The one-use restrictions could be relaxed a little if the AND
+ // is going to be removed.
+ if (match(BO, m_c_BinOp(m_OneUse(m_ZExt(m_Value(X))), m_APInt(C1))) &&
+ C->isIntN(X->getType()->getScalarSizeInBits())) {
+ unsigned XWidth = X->getType()->getScalarSizeInBits();
+ Constant *TruncC1 = ConstantInt::get(X->getType(), C1->trunc(XWidth));
+ Value *BinOp = isa<ZExtInst>(BO->getOperand(0))
+ ? Builder.CreateBinOp(BO->getOpcode(), X, TruncC1)
+ : Builder.CreateBinOp(BO->getOpcode(), TruncC1, X);
+ Constant *TruncC = ConstantInt::get(X->getType(), C->trunc(XWidth));
+ Value *And = Builder.CreateAnd(BinOp, TruncC);
+ return new ZExtInst(And, Ty);
}
}
}
@@ -2071,13 +2070,13 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
A->getType()->isIntOrIntVectorTy(1))
return SelectInst::Create(A, Op0, Constant::getNullValue(Ty));
- // and(ashr(subNSW(Y, X), ScalarSizeInBits(Y)-1), X) --> X s> Y ? X : 0.
- if (match(&I, m_c_And(m_OneUse(m_AShr(
- m_NSWSub(m_Value(Y), m_Value(X)),
- m_SpecificInt(Ty->getScalarSizeInBits() - 1))),
- m_Deferred(X)))) {
- Value *NewICmpInst = Builder.CreateICmpSGT(X, Y);
- return SelectInst::Create(NewICmpInst, X, ConstantInt::getNullValue(Ty));
+ // (iN X s>> (N-1)) & Y --> (X s< 0) ? Y : 0
+ unsigned FullShift = Ty->getScalarSizeInBits() - 1;
+ if (match(&I, m_c_And(m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))),
+ m_Value(Y)))) {
+ Constant *Zero = ConstantInt::getNullValue(Ty);
+ Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg");
+ return SelectInst::Create(Cmp, Y, Zero);
}
// (~x) & y --> ~(x | (~y)) iff that gets rid of inversions
@@ -2284,28 +2283,38 @@ static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
/// vector composed of all-zeros or all-ones values and is the bitwise 'not' of
/// B, it can be used as the condition operand of a select instruction.
Value *InstCombinerImpl::getSelectCondition(Value *A, Value *B) {
- // Step 1: We may have peeked through bitcasts in the caller.
+ // We may have peeked through bitcasts in the caller.
// Exit immediately if we don't have (vector) integer types.
Type *Ty = A->getType();
if (!Ty->isIntOrIntVectorTy() || !B->getType()->isIntOrIntVectorTy())
return nullptr;
- // Step 2: We need 0 or all-1's bitmasks.
- if (ComputeNumSignBits(A) != Ty->getScalarSizeInBits())
- return nullptr;
-
- // Step 3: If B is the 'not' value of A, we have our answer.
- if (match(A, m_Not(m_Specific(B)))) {
+ // If A is the 'not' operand of B and has enough signbits, we have our answer.
+ if (match(B, m_Not(m_Specific(A)))) {
// If these are scalars or vectors of i1, A can be used directly.
if (Ty->isIntOrIntVectorTy(1))
return A;
- return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(Ty));
+
+ // If we look through a vector bitcast, the caller will bitcast the operands
+ // to match the condition's number of bits (N x i1).
+ // To make this poison-safe, disallow bitcast from wide element to narrow
+ // element. That could allow poison in lanes where it was not present in the
+ // original code.
+ A = peekThroughBitcast(A);
+ if (A->getType()->isIntOrIntVectorTy()) {
+ unsigned NumSignBits = ComputeNumSignBits(A);
+ if (NumSignBits == A->getType()->getScalarSizeInBits() &&
+ NumSignBits <= Ty->getScalarSizeInBits())
+ return Builder.CreateTrunc(A, CmpInst::makeCmpResultType(A->getType()));
+ }
+ return nullptr;
}
// If both operands are constants, see if the constants are inverse bitmasks.
Constant *AConst, *BConst;
if (match(A, m_Constant(AConst)) && match(B, m_Constant(BConst)))
- if (AConst == ConstantExpr::getNot(BConst))
+ if (AConst == ConstantExpr::getNot(BConst) &&
+ ComputeNumSignBits(A) == Ty->getScalarSizeInBits())
return Builder.CreateZExtOrTrunc(A, CmpInst::makeCmpResultType(Ty));
// Look for more complex patterns. The 'not' op may be hidden behind various
@@ -2349,10 +2358,17 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B,
B = peekThroughBitcast(B, true);
if (Value *Cond = getSelectCondition(A, B)) {
// ((bc Cond) & C) | ((bc ~Cond) & D) --> bc (select Cond, (bc C), (bc D))
+ // If this is a vector, we may need to cast to match the condition's length.
// The bitcasts will either all exist or all not exist. The builder will
// not create unnecessary casts if the types already match.
- Value *BitcastC = Builder.CreateBitCast(C, A->getType());
- Value *BitcastD = Builder.CreateBitCast(D, A->getType());
+ Type *SelTy = A->getType();
+ if (auto *VecTy = dyn_cast<VectorType>(Cond->getType())) {
+ unsigned Elts = VecTy->getElementCount().getKnownMinValue();
+ Type *EltTy = Builder.getIntNTy(SelTy->getPrimitiveSizeInBits() / Elts);
+ SelTy = VectorType::get(EltTy, VecTy->getElementCount());
+ }
+ Value *BitcastC = Builder.CreateBitCast(C, SelTy);
+ Value *BitcastD = Builder.CreateBitCast(D, SelTy);
Value *Select = Builder.CreateSelect(Cond, BitcastC, BitcastD);
return Builder.CreateBitCast(Select, OrigType);
}
@@ -2374,8 +2390,9 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
ICmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
Value *LHS0 = LHS->getOperand(0), *RHS0 = RHS->getOperand(0);
Value *LHS1 = LHS->getOperand(1), *RHS1 = RHS->getOperand(1);
- auto *LHSC = dyn_cast<ConstantInt>(LHS1);
- auto *RHSC = dyn_cast<ConstantInt>(RHS1);
+ const APInt *LHSC = nullptr, *RHSC = nullptr;
+ match(LHS1, m_APInt(LHSC));
+ match(RHS1, m_APInt(RHSC));
// Fold (icmp ult/ule (A + C1), C3) | (icmp ult/ule (A + C2), C3)
// --> (icmp ult/ule ((A & ~(C1 ^ C2)) + max(C1, C2)), C3)
@@ -2389,40 +2406,41 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// This implies all values in the two ranges differ by exactly one bit.
if ((PredL == ICmpInst::ICMP_ULT || PredL == ICmpInst::ICMP_ULE) &&
PredL == PredR && LHSC && RHSC && LHS->hasOneUse() && RHS->hasOneUse() &&
- LHSC->getType() == RHSC->getType() &&
- LHSC->getValue() == (RHSC->getValue())) {
+ LHSC->getBitWidth() == RHSC->getBitWidth() && *LHSC == *RHSC) {
Value *AddOpnd;
- ConstantInt *LAddC, *RAddC;
- if (match(LHS0, m_Add(m_Value(AddOpnd), m_ConstantInt(LAddC))) &&
- match(RHS0, m_Add(m_Specific(AddOpnd), m_ConstantInt(RAddC))) &&
- LAddC->getValue().ugt(LHSC->getValue()) &&
- RAddC->getValue().ugt(LHSC->getValue())) {
+ const APInt *LAddC, *RAddC;
+ if (match(LHS0, m_Add(m_Value(AddOpnd), m_APInt(LAddC))) &&
+ match(RHS0, m_Add(m_Specific(AddOpnd), m_APInt(RAddC))) &&
+ LAddC->ugt(*LHSC) && RAddC->ugt(*LHSC)) {
- APInt DiffC = LAddC->getValue() ^ RAddC->getValue();
+ APInt DiffC = *LAddC ^ *RAddC;
if (DiffC.isPowerOf2()) {
- ConstantInt *MaxAddC = nullptr;
- if (LAddC->getValue().ult(RAddC->getValue()))
+ const APInt *MaxAddC = nullptr;
+ if (LAddC->ult(*RAddC))
MaxAddC = RAddC;
else
MaxAddC = LAddC;
- APInt RRangeLow = -RAddC->getValue();
- APInt RRangeHigh = RRangeLow + LHSC->getValue();
- APInt LRangeLow = -LAddC->getValue();
- APInt LRangeHigh = LRangeLow + LHSC->getValue();
+ APInt RRangeLow = -*RAddC;
+ APInt RRangeHigh = RRangeLow + *LHSC;
+ APInt LRangeLow = -*LAddC;
+ APInt LRangeHigh = LRangeLow + *LHSC;
APInt LowRangeDiff = RRangeLow ^ LRangeLow;
APInt HighRangeDiff = RRangeHigh ^ LRangeHigh;
APInt RangeDiff = LRangeLow.sgt(RRangeLow) ? LRangeLow - RRangeLow
: RRangeLow - LRangeLow;
if (LowRangeDiff.isPowerOf2() && LowRangeDiff == HighRangeDiff &&
- RangeDiff.ugt(LHSC->getValue())) {
- Value *MaskC = ConstantInt::get(LAddC->getType(), ~DiffC);
+ RangeDiff.ugt(*LHSC)) {
+ Type *Ty = AddOpnd->getType();
+ Value *MaskC = ConstantInt::get(Ty, ~DiffC);
Value *NewAnd = Builder.CreateAnd(AddOpnd, MaskC);
- Value *NewAdd = Builder.CreateAdd(NewAnd, MaxAddC);
- return Builder.CreateICmp(LHS->getPredicate(), NewAdd, LHSC);
+ Value *NewAdd = Builder.CreateAdd(NewAnd,
+ ConstantInt::get(Ty, *MaxAddC));
+ return Builder.CreateICmp(LHS->getPredicate(), NewAdd,
+ ConstantInt::get(Ty, *LHSC));
}
}
}
@@ -2496,14 +2514,13 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
foldUnsignedUnderflowCheck(RHS, LHS, /*IsAnd=*/false, Q, Builder))
return X;
- if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false, Builder))
+ if (Value *X = foldEqOfParts(LHS, RHS, /*IsAnd=*/false))
return X;
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
- // TODO: Remove this when foldLogOpOfMaskedICmps can handle vectors.
- if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_Zero()) &&
- PredR == ICmpInst::ICMP_NE && match(RHS1, m_Zero()) &&
- LHS0->getType()->isIntOrIntVectorTy() &&
+ // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs.
+ if (PredL == ICmpInst::ICMP_NE && match(LHS1, m_ZeroInt()) &&
+ PredR == ICmpInst::ICMP_NE && match(RHS1, m_ZeroInt()) &&
LHS0->getType() == RHS0->getType()) {
Value *NewOr = Builder.CreateOr(LHS0, RHS0);
return Builder.CreateICmp(PredL, NewOr,
@@ -2514,114 +2531,8 @@ Value *InstCombinerImpl::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (!LHSC || !RHSC)
return nullptr;
- // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
- // iff C2 + CA == C1.
- if (PredL == ICmpInst::ICMP_ULT && PredR == ICmpInst::ICMP_EQ) {
- ConstantInt *AddC;
- if (match(LHS0, m_Add(m_Specific(RHS0), m_ConstantInt(AddC))))
- if (RHSC->getValue() + AddC->getValue() == LHSC->getValue())
- return Builder.CreateICmpULE(LHS0, LHSC);
- }
-
- // From here on, we only handle:
- // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
- if (LHS0 != RHS0)
- return nullptr;
-
- // ICMP_[US][GL]E X, C is folded to ICMP_[US][GL]T elsewhere.
- if (PredL == ICmpInst::ICMP_UGE || PredL == ICmpInst::ICMP_ULE ||
- PredR == ICmpInst::ICMP_UGE || PredR == ICmpInst::ICMP_ULE ||
- PredL == ICmpInst::ICMP_SGE || PredL == ICmpInst::ICMP_SLE ||
- PredR == ICmpInst::ICMP_SGE || PredR == ICmpInst::ICMP_SLE)
- return nullptr;
-
- // We can't fold (ugt x, C) | (sgt x, C2).
- if (!predicatesFoldable(PredL, PredR))
- return nullptr;
-
- // Ensure that the larger constant is on the RHS.
- bool ShouldSwap;
- if (CmpInst::isSigned(PredL) ||
- (ICmpInst::isEquality(PredL) && CmpInst::isSigned(PredR)))
- ShouldSwap = LHSC->getValue().sgt(RHSC->getValue());
- else
- ShouldSwap = LHSC->getValue().ugt(RHSC->getValue());
-
- if (ShouldSwap) {
- std::swap(LHS, RHS);
- std::swap(LHSC, RHSC);
- std::swap(PredL, PredR);
- }
-
- // At this point, we know we have two icmp instructions
- // comparing a value against two constants and or'ing the result
- // together. Because of the above check, we know that we only have
- // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
- // icmp folding check above), that the two constants are not
- // equal.
- assert(LHSC != RHSC && "Compares not folded above?");
-
- switch (PredL) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- // Potential folds for this case should already be handled.
- break;
- case ICmpInst::ICMP_UGT:
- // (X == 0 || X u> C) -> (X-1) u>= C
- if (LHSC->isMinValue(false))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1,
- false, false);
- // (X == 13 | X u> 14) -> no change
- break;
- case ICmpInst::ICMP_SGT:
- // (X == INT_MIN || X s> C) -> (X-(INT_MIN+1)) u>= C-INT_MIN
- if (LHSC->isMinValue(true))
- return insertRangeTest(LHS0, LHSC->getValue() + 1, RHSC->getValue() + 1,
- true, false);
- // (X == 13 | X s> 14) -> no change
- break;
- }
- break;
- case ICmpInst::ICMP_ULT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ: // (X u< 13 | X == 14) -> no change
- // (X u< C || X == UINT_MAX) => (X-C) u>= UINT_MAX-C
- if (RHSC->isMaxValue(false))
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(),
- false, false);
- break;
- case ICmpInst::ICMP_UGT: // (X u< 13 | X u> 15) -> (X-13) u> 2
- assert(!RHSC->isMaxValue(false) && "Missed icmp simplification");
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1,
- false, false);
- }
- break;
- case ICmpInst::ICMP_SLT:
- switch (PredR) {
- default:
- llvm_unreachable("Unknown integer condition code!");
- case ICmpInst::ICMP_EQ:
- // (X s< C || X == INT_MAX) => (X-C) u>= INT_MAX-C
- if (RHSC->isMaxValue(true))
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue(),
- true, false);
- // (X s< 13 | X == 14) -> no change
- break;
- case ICmpInst::ICMP_SGT: // (X s< 13 | X s> 15) -> (X-13) u> 2
- assert(!RHSC->isMaxValue(true) && "Missed icmp simplification");
- return insertRangeTest(LHS0, LHSC->getValue(), RHSC->getValue() + 1, true,
- false);
- }
- break;
- }
- return nullptr;
+ return foldAndOrOfICmpsUsingRanges(PredL, LHS0, *LHSC, PredR, RHS0, *RHSC,
+ Builder, /* IsAnd */ false);
}
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
@@ -2647,6 +2558,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *Xor = foldOrToXor(I, Builder))
return Xor;
+ if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
+ return X;
+
// (A&B)|(A&C) -> A&(B|C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
@@ -2684,69 +2598,63 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
Value *X, *Y;
const APInt *CV;
if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
- !CV->isAllOnesValue() && MaskedValueIsZero(Y, *CV, 0, &I)) {
+ !CV->isAllOnes() && MaskedValueIsZero(Y, *CV, 0, &I)) {
// (X ^ C) | Y -> (X | Y) ^ C iff Y & C == 0
// The check for a 'not' op is for efficiency (if Y is known zero --> ~X).
Value *Or = Builder.CreateOr(X, Y);
return BinaryOperator::CreateXor(Or, ConstantInt::get(I.getType(), *CV));
}
- // (A & C)|(B & D)
+ // (A & C) | (B & D)
Value *A, *B, *C, *D;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
- // (A & C1)|(B & C2)
- ConstantInt *C1, *C2;
- if (match(C, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2))) {
- Value *V1 = nullptr, *V2 = nullptr;
- if ((C1->getValue() & C2->getValue()).isNullValue()) {
- // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
- // iff (C1&C2) == 0 and (N&~C1) == 0
- if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
- ((V1 == B &&
- MaskedValueIsZero(V2, ~C1->getValue(), 0, &I)) || // (V|N)
- (V2 == B &&
- MaskedValueIsZero(V1, ~C1->getValue(), 0, &I)))) // (N|V)
- return BinaryOperator::CreateAnd(A,
- Builder.getInt(C1->getValue()|C2->getValue()));
- // Or commutes, try both ways.
- if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
- ((V1 == A &&
- MaskedValueIsZero(V2, ~C2->getValue(), 0, &I)) || // (V|N)
- (V2 == A &&
- MaskedValueIsZero(V1, ~C2->getValue(), 0, &I)))) // (N|V)
- return BinaryOperator::CreateAnd(B,
- Builder.getInt(C1->getValue()|C2->getValue()));
-
- // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
- // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
- ConstantInt *C3 = nullptr, *C4 = nullptr;
- if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
- (C3->getValue() & ~C1->getValue()).isNullValue() &&
- match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
- (C4->getValue() & ~C2->getValue()).isNullValue()) {
- V2 = Builder.CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
- return BinaryOperator::CreateAnd(V2,
- Builder.getInt(C1->getValue()|C2->getValue()));
- }
- }
- if (C1->getValue() == ~C2->getValue()) {
- Value *X;
-
- // ((X|B)&C1)|(B&C2) -> (X&C1) | B iff C1 == ~C2
+ // (A & C0) | (B & C1)
+ const APInt *C0, *C1;
+ if (match(C, m_APInt(C0)) && match(D, m_APInt(C1))) {
+ Value *X;
+ if (*C0 == ~*C1) {
+ // ((X | B) & MaskC) | (B & ~MaskC) -> (X & MaskC) | B
if (match(A, m_c_Or(m_Value(X), m_Specific(B))))
- return BinaryOperator::CreateOr(Builder.CreateAnd(X, C1), B);
- // (A&C2)|((X|A)&C1) -> (X&C2) | A iff C1 == ~C2
+ return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C0), B);
+ // (A & MaskC) | ((X | A) & ~MaskC) -> (X & ~MaskC) | A
if (match(B, m_c_Or(m_Specific(A), m_Value(X))))
- return BinaryOperator::CreateOr(Builder.CreateAnd(X, C2), A);
+ return BinaryOperator::CreateOr(Builder.CreateAnd(X, *C1), A);
- // ((X^B)&C1)|(B&C2) -> (X&C1) ^ B iff C1 == ~C2
+ // ((X ^ B) & MaskC) | (B & ~MaskC) -> (X & MaskC) ^ B
if (match(A, m_c_Xor(m_Value(X), m_Specific(B))))
- return BinaryOperator::CreateXor(Builder.CreateAnd(X, C1), B);
- // (A&C2)|((X^A)&C1) -> (X&C2) ^ A iff C1 == ~C2
+ return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C0), B);
+ // (A & MaskC) | ((X ^ A) & ~MaskC) -> (X & ~MaskC) ^ A
if (match(B, m_c_Xor(m_Specific(A), m_Value(X))))
- return BinaryOperator::CreateXor(Builder.CreateAnd(X, C2), A);
+ return BinaryOperator::CreateXor(Builder.CreateAnd(X, *C1), A);
+ }
+
+ if ((*C0 & *C1).isZero()) {
+ // ((X | B) & C0) | (B & C1) --> (X | B) & (C0 | C1)
+ // iff (C0 & C1) == 0 and (X & ~C0) == 0
+ if (match(A, m_c_Or(m_Value(X), m_Specific(B))) &&
+ MaskedValueIsZero(X, ~*C0, 0, &I)) {
+ Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ return BinaryOperator::CreateAnd(A, C01);
+ }
+ // (A & C0) | ((X | A) & C1) --> (X | A) & (C0 | C1)
+ // iff (C0 & C1) == 0 and (X & ~C1) == 0
+ if (match(B, m_c_Or(m_Value(X), m_Specific(A))) &&
+ MaskedValueIsZero(X, ~*C1, 0, &I)) {
+ Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ return BinaryOperator::CreateAnd(B, C01);
+ }
+ // ((X | C2) & C0) | ((X | C3) & C1) --> (X | C2 | C3) & (C0 | C1)
+ // iff (C0 & C1) == 0 and (C2 & ~C0) == 0 and (C3 & ~C1) == 0.
+ const APInt *C2, *C3;
+ if (match(A, m_Or(m_Value(X), m_APInt(C2))) &&
+ match(B, m_Or(m_Specific(X), m_APInt(C3))) &&
+ (*C2 & ~*C0).isZero() && (*C3 & ~*C1).isZero()) {
+ Value *Or = Builder.CreateOr(X, *C2 | *C3, "bitfield");
+ Constant *C01 = ConstantInt::get(I.getType(), *C0 | *C1);
+ return BinaryOperator::CreateAnd(Or, C01);
+ }
}
}
@@ -2801,6 +2709,8 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
// A | ( A ^ B) -> A | B
// A | (~A ^ B) -> A | ~B
// (A & B) | (A ^ B)
+ // ~A | (A ^ B) -> ~(A & B)
+ // The swap above should always make Op0 the 'not' for the last case.
if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
if (Op0 == A || Op0 == B)
return BinaryOperator::CreateOr(A, B);
@@ -2809,6 +2719,10 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
match(Op0, m_And(m_Specific(B), m_Specific(A))))
return BinaryOperator::CreateOr(A, B);
+ if ((Op0->hasOneUse() || Op1->hasOneUse()) &&
+ (match(Op0, m_Not(m_Specific(A))) || match(Op0, m_Not(m_Specific(B)))))
+ return BinaryOperator::CreateNot(Builder.CreateAnd(A, B));
+
if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
Value *Not = Builder.CreateNot(B, B->getName() + ".not");
return BinaryOperator::CreateOr(Not, Op0);
@@ -3275,71 +3189,45 @@ bool InstCombinerImpl::sinkNotIntoOtherHandOfAndOrOr(BinaryOperator &I) {
return true;
}
-// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
-// here. We should standardize that construct where it is needed or choose some
-// other way to ensure that commutated variants of patterns are not missed.
-Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
- if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
- SQ.getWithInstruction(&I)))
- return replaceInstUsesWith(I, V);
-
- if (SimplifyAssociativeOrCommutative(I))
- return &I;
-
- if (Instruction *X = foldVectorBinop(I))
- return X;
-
- if (Instruction *NewXor = foldXorToXor(I, Builder))
- return NewXor;
-
- // (A&B)^(A&C) -> A&(B^C) etc
- if (Value *V = SimplifyUsingDistributiveLaws(I))
- return replaceInstUsesWith(I, V);
-
- // See if we can simplify any instructions used by the instruction whose sole
- // purpose is to compute bits we don't care about.
- if (SimplifyDemandedInstructionBits(I))
- return &I;
-
- if (Value *V = SimplifyBSwap(I, Builder))
- return replaceInstUsesWith(I, V);
-
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- Type *Ty = I.getType();
-
- // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
- // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
- // calls in there are unnecessary as SimplifyDemandedInstructionBits should
- // have already taken care of those cases.
- Value *M;
- if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
- m_c_And(m_Deferred(M), m_Value()))))
- return BinaryOperator::CreateOr(Op0, Op1);
+Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
+ Value *NotOp;
+ if (!match(&I, m_Not(m_Value(NotOp))))
+ return nullptr;
// Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand.
- Value *X, *Y;
-
// We must eliminate the and/or (one-use) for these transforms to not increase
// the instruction count.
+ //
// ~(~X & Y) --> (X | ~Y)
// ~(Y & ~X) --> (X | ~Y)
- if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) {
+ //
+ // Note: The logical matches do not check for the commuted patterns because
+ // those are handled via SimplifySelectsFeedingBinaryOp().
+ Type *Ty = I.getType();
+ Value *X, *Y;
+ if (match(NotOp, m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y))))) {
Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
return BinaryOperator::CreateOr(X, NotY);
}
+ if (match(NotOp, m_OneUse(m_LogicalAnd(m_Not(m_Value(X)), m_Value(Y))))) {
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
+ return SelectInst::Create(X, ConstantInt::getTrue(Ty), NotY);
+ }
+
// ~(~X | Y) --> (X & ~Y)
// ~(Y | ~X) --> (X & ~Y)
- if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) {
+ if (match(NotOp, m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y))))) {
Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
return BinaryOperator::CreateAnd(X, NotY);
}
-
- if (Instruction *Xor = visitMaskedMerge(I, Builder))
- return Xor;
+ if (match(NotOp, m_OneUse(m_LogicalOr(m_Not(m_Value(X)), m_Value(Y))))) {
+ Value *NotY = Builder.CreateNot(Y, Y->getName() + ".not");
+ return SelectInst::Create(X, NotY, ConstantInt::getFalse(Ty));
+ }
// Is this a 'not' (~) fed by a binary operator?
BinaryOperator *NotVal;
- if (match(&I, m_Not(m_BinOp(NotVal)))) {
+ if (match(NotOp, m_BinOp(NotVal))) {
if (NotVal->getOpcode() == Instruction::And ||
NotVal->getOpcode() == Instruction::Or) {
// Apply DeMorgan's Law when inverts are free:
@@ -3411,9 +3299,164 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
NotVal);
}
- // Use DeMorgan and reassociation to eliminate a 'not' op.
+ // not (cmp A, B) = !cmp A, B
+ CmpInst::Predicate Pred;
+ if (match(NotOp, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
+ cast<CmpInst>(NotOp)->setPredicate(CmpInst::getInversePredicate(Pred));
+ return replaceInstUsesWith(I, NotOp);
+ }
+
+ // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
+ // ~min(~X, ~Y) --> max(X, Y)
+ // ~max(~X, Y) --> min(X, ~Y)
+ auto *II = dyn_cast<IntrinsicInst>(NotOp);
+ if (II && II->hasOneUse()) {
+ if (match(NotOp, m_MaxOrMin(m_Value(X), m_Value(Y))) &&
+ isFreeToInvert(X, X->hasOneUse()) &&
+ isFreeToInvert(Y, Y->hasOneUse())) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
+ Value *NotX = Builder.CreateNot(X);
+ Value *NotY = Builder.CreateNot(Y);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, NotX, NotY);
+ return replaceInstUsesWith(I, InvMaxMin);
+ }
+ if (match(NotOp, m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y)))) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
+ Value *NotY = Builder.CreateNot(Y);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotY);
+ return replaceInstUsesWith(I, InvMaxMin);
+ }
+ }
+
+ // TODO: Remove folds if we canonicalize to intrinsics (see above).
+ // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
+ //
+ // %notx = xor i32 %x, -1
+ // %cmp1 = icmp sgt i32 %notx, %y
+ // %smax = select i1 %cmp1, i32 %notx, i32 %y
+ // %res = xor i32 %smax, -1
+ // =>
+ // %noty = xor i32 %y, -1
+ // %cmp2 = icmp slt %x, %noty
+ // %res = select i1 %cmp2, i32 %x, i32 %noty
+ //
+ // Same is applicable for smin/umax/umin.
+ if (NotOp->hasOneUse()) {
+ Value *LHS, *RHS;
+ SelectPatternFlavor SPF = matchSelectPattern(NotOp, LHS, RHS).Flavor;
+ if (SelectPatternResult::isMinOrMax(SPF)) {
+ // It's possible we get here before the not has been simplified, so make
+ // sure the input to the not isn't freely invertible.
+ if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) {
+ Value *NotY = Builder.CreateNot(RHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
+ }
+
+ // It's possible we get here before the not has been simplified, so make
+ // sure the input to the not isn't freely invertible.
+ if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *NotX = Builder.CreateNot(LHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
+ }
+
+ // If both sides are freely invertible, then we can get rid of the xor
+ // completely.
+ if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
+ isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
+ Value *NotLHS = Builder.CreateNot(LHS);
+ Value *NotRHS = Builder.CreateNot(RHS);
+ return SelectInst::Create(
+ Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
+ NotLHS, NotRHS);
+ }
+ }
+
+ // Pull 'not' into operands of select if both operands are one-use compares
+ // or one is one-use compare and the other one is a constant.
+ // Inverting the predicates eliminates the 'not' operation.
+ // Example:
+ // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
+ // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
+ // not (select ?, (cmp TPred, ?, ?), true -->
+ // select ?, (cmp InvTPred, ?, ?), false
+ if (auto *Sel = dyn_cast<SelectInst>(NotOp)) {
+ Value *TV = Sel->getTrueValue();
+ Value *FV = Sel->getFalseValue();
+ auto *CmpT = dyn_cast<CmpInst>(TV);
+ auto *CmpF = dyn_cast<CmpInst>(FV);
+ bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV);
+ bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV);
+ if (InvertibleT && InvertibleF) {
+ if (CmpT)
+ CmpT->setPredicate(CmpT->getInversePredicate());
+ else
+ Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV)));
+ if (CmpF)
+ CmpF->setPredicate(CmpF->getInversePredicate());
+ else
+ Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV)));
+ return replaceInstUsesWith(I, Sel);
+ }
+ }
+ }
+
+ if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
+ return NewXor;
+
+ return nullptr;
+}
+
+// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
+// here. We should standardize that construct where it is needed or choose some
+// other way to ensure that commutated variants of patterns are not missed.
+Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
+ if (Value *V = SimplifyXorInst(I.getOperand(0), I.getOperand(1),
+ SQ.getWithInstruction(&I)))
+ return replaceInstUsesWith(I, V);
+
+ if (SimplifyAssociativeOrCommutative(I))
+ return &I;
+
+ if (Instruction *X = foldVectorBinop(I))
+ return X;
+
+ if (Instruction *NewXor = foldXorToXor(I, Builder))
+ return NewXor;
+
+ // (A&B)^(A&C) -> A&(B^C) etc
+ if (Value *V = SimplifyUsingDistributiveLaws(I))
+ return replaceInstUsesWith(I, V);
+
+ // See if we can simplify any instructions used by the instruction whose sole
+ // purpose is to compute bits we don't care about.
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+
+ if (Value *V = SimplifyBSwap(I, Builder))
+ return replaceInstUsesWith(I, V);
+
+ if (Instruction *R = foldNot(I))
+ return R;
+
+ // Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
+ // This it a special case in haveNoCommonBitsSet, but the computeKnownBits
+ // calls in there are unnecessary as SimplifyDemandedInstructionBits should
+ // have already taken care of those cases.
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ Value *M;
+ if (match(&I, m_c_Xor(m_c_And(m_Not(m_Value(M)), m_Value()),
+ m_c_And(m_Deferred(M), m_Value()))))
+ return BinaryOperator::CreateOr(Op0, Op1);
+
+ if (Instruction *Xor = visitMaskedMerge(I, Builder))
+ return Xor;
+
+ Value *X, *Y;
Constant *C1;
if (match(Op1, m_Constant(C1))) {
+ // Use DeMorgan and reassociation to eliminate a 'not' op.
Constant *C2;
if (match(Op0, m_OneUse(m_Or(m_Not(m_Value(X)), m_Constant(C2))))) {
// (~X | C2) ^ C1 --> ((X & ~C2) ^ -1) ^ C1 --> (X & ~C2) ^ ~C1
@@ -3425,15 +3468,24 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
Value *Or = Builder.CreateOr(X, ConstantExpr::getNot(C2));
return BinaryOperator::CreateXor(Or, ConstantExpr::getNot(C1));
}
- }
- // not (cmp A, B) = !cmp A, B
- CmpInst::Predicate Pred;
- if (match(&I, m_Not(m_OneUse(m_Cmp(Pred, m_Value(), m_Value()))))) {
- cast<CmpInst>(Op0)->setPredicate(CmpInst::getInversePredicate(Pred));
- return replaceInstUsesWith(I, Op0);
+ // Convert xor ([trunc] (ashr X, BW-1)), C =>
+ // select(X >s -1, C, ~C)
+ // The ashr creates "AllZeroOrAllOne's", which then optionally inverses the
+ // constant depending on whether this input is less than 0.
+ const APInt *CA;
+ if (match(Op0, m_OneUse(m_TruncOrSelf(
+ m_AShr(m_Value(X), m_APIntAllowUndef(CA))))) &&
+ *CA == X->getType()->getScalarSizeInBits() - 1 &&
+ !match(C1, m_AllOnes())) {
+ assert(!C1->isZeroValue() && "Unexpected xor with 0");
+ Value *ICmp =
+ Builder.CreateICmpSGT(X, Constant::getAllOnesValue(X->getType()));
+ return SelectInst::Create(ICmp, Op1, Builder.CreateNot(Op1));
+ }
}
+ Type *Ty = I.getType();
{
const APInt *RHSC;
if (match(Op1, m_APInt(RHSC))) {
@@ -3456,13 +3508,13 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
// canonicalize to a 'not' before the shift to help SCEV and codegen:
// (X << C) ^ RHSC --> ~X << C
if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_APInt(C)))) &&
- *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).shl(*C)) {
+ *RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).shl(*C)) {
Value *NotX = Builder.CreateNot(X);
return BinaryOperator::CreateShl(NotX, ConstantInt::get(Ty, *C));
}
// (X >>u C) ^ RHSC --> ~X >>u C
if (match(Op0, m_OneUse(m_LShr(m_Value(X), m_APInt(C)))) &&
- *RHSC == APInt::getAllOnesValue(Ty->getScalarSizeInBits()).lshr(*C)) {
+ *RHSC == APInt::getAllOnes(Ty->getScalarSizeInBits()).lshr(*C)) {
Value *NotX = Builder.CreateNot(X);
return BinaryOperator::CreateLShr(NotX, ConstantInt::get(Ty, *C));
}
@@ -3572,101 +3624,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *CastedXor = foldCastedBitwiseLogic(I))
return CastedXor;
- // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
- // ~min(~X, ~Y) --> max(X, Y)
- // ~max(~X, Y) --> min(X, ~Y)
- auto *II = dyn_cast<IntrinsicInst>(Op0);
- if (II && match(Op1, m_AllOnes())) {
- if (match(Op0, m_MaxOrMin(m_Not(m_Value(X)), m_Not(m_Value(Y))))) {
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
- return replaceInstUsesWith(I, InvMaxMin);
- }
- if (match(Op0, m_OneUse(m_c_MaxOrMin(m_Not(m_Value(X)), m_Value(Y))))) {
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
- Value *NotY = Builder.CreateNot(Y);
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotY);
- return replaceInstUsesWith(I, InvMaxMin);
- }
- }
-
- // TODO: Remove folds if we canonicalize to intrinsics (see above).
- // Eliminate a bitwise 'not' op of 'not' min/max by inverting the min/max:
- //
- // %notx = xor i32 %x, -1
- // %cmp1 = icmp sgt i32 %notx, %y
- // %smax = select i1 %cmp1, i32 %notx, i32 %y
- // %res = xor i32 %smax, -1
- // =>
- // %noty = xor i32 %y, -1
- // %cmp2 = icmp slt %x, %noty
- // %res = select i1 %cmp2, i32 %x, i32 %noty
- //
- // Same is applicable for smin/umax/umin.
- if (match(Op1, m_AllOnes()) && Op0->hasOneUse()) {
- Value *LHS, *RHS;
- SelectPatternFlavor SPF = matchSelectPattern(Op0, LHS, RHS).Flavor;
- if (SelectPatternResult::isMinOrMax(SPF)) {
- // It's possible we get here before the not has been simplified, so make
- // sure the input to the not isn't freely invertible.
- if (match(LHS, m_Not(m_Value(X))) && !isFreeToInvert(X, X->hasOneUse())) {
- Value *NotY = Builder.CreateNot(RHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), X, NotY), X, NotY);
- }
-
- // It's possible we get here before the not has been simplified, so make
- // sure the input to the not isn't freely invertible.
- if (match(RHS, m_Not(m_Value(Y))) && !isFreeToInvert(Y, Y->hasOneUse())) {
- Value *NotX = Builder.CreateNot(LHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), NotX, Y), NotX, Y);
- }
-
- // If both sides are freely invertible, then we can get rid of the xor
- // completely.
- if (isFreeToInvert(LHS, !LHS->hasNUsesOrMore(3)) &&
- isFreeToInvert(RHS, !RHS->hasNUsesOrMore(3))) {
- Value *NotLHS = Builder.CreateNot(LHS);
- Value *NotRHS = Builder.CreateNot(RHS);
- return SelectInst::Create(
- Builder.CreateICmp(getInverseMinMaxPred(SPF), NotLHS, NotRHS),
- NotLHS, NotRHS);
- }
- }
-
- // Pull 'not' into operands of select if both operands are one-use compares
- // or one is one-use compare and the other one is a constant.
- // Inverting the predicates eliminates the 'not' operation.
- // Example:
- // not (select ?, (cmp TPred, ?, ?), (cmp FPred, ?, ?) -->
- // select ?, (cmp InvTPred, ?, ?), (cmp InvFPred, ?, ?)
- // not (select ?, (cmp TPred, ?, ?), true -->
- // select ?, (cmp InvTPred, ?, ?), false
- if (auto *Sel = dyn_cast<SelectInst>(Op0)) {
- Value *TV = Sel->getTrueValue();
- Value *FV = Sel->getFalseValue();
- auto *CmpT = dyn_cast<CmpInst>(TV);
- auto *CmpF = dyn_cast<CmpInst>(FV);
- bool InvertibleT = (CmpT && CmpT->hasOneUse()) || isa<Constant>(TV);
- bool InvertibleF = (CmpF && CmpF->hasOneUse()) || isa<Constant>(FV);
- if (InvertibleT && InvertibleF) {
- if (CmpT)
- CmpT->setPredicate(CmpT->getInversePredicate());
- else
- Sel->setTrueValue(ConstantExpr::getNot(cast<Constant>(TV)));
- if (CmpF)
- CmpF->setPredicate(CmpF->getInversePredicate());
- else
- Sel->setFalseValue(ConstantExpr::getNot(cast<Constant>(FV)));
- return replaceInstUsesWith(I, Sel);
- }
- }
- }
-
- if (Instruction *NewXor = sinkNotIntoXor(I, Builder))
- return NewXor;
-
if (Instruction *Abs = canonicalizeAbs(I, Builder))
return Abs;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 726bb545be12..bfa7bfa2290a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -67,7 +67,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -79,11 +78,12 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumSimplified, "Number of library calls simplified");
static cl::opt<unsigned> GuardWideningWindow(
@@ -513,7 +513,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
// If the input to cttz/ctlz is known to be non-zero,
// then change the 'ZeroIsUndef' parameter to 'true'
// because we know the zero behavior can't affect the result.
- if (!Known.One.isNullValue() ||
+ if (!Known.One.isZero() ||
isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
&IC.getDominatorTree())) {
if (!match(II.getArgOperand(1), m_One()))
@@ -656,8 +656,8 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,
// comparison to the first NumOperands.
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
unsigned NumOperands) {
- assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
- assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
+ assert(I.arg_size() >= NumOperands && "Not enough operands");
+ assert(E.arg_size() >= NumOperands && "Not enough operands");
for (unsigned i = 0; i < NumOperands; i++)
if (I.getArgOperand(i) != E.getArgOperand(i))
return false;
@@ -682,11 +682,11 @@ removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC,
BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
for (; BI != BE; ++BI) {
if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
- if (isa<DbgInfoIntrinsic>(I) ||
+ if (I->isDebugOrPseudoInst() ||
I->getIntrinsicID() == EndI.getIntrinsicID())
continue;
if (IsStart(*I)) {
- if (haveSameOperands(EndI, *I, EndI.getNumArgOperands())) {
+ if (haveSameOperands(EndI, *I, EndI.arg_size())) {
IC.eraseInstFromFunction(*I);
IC.eraseInstFromFunction(EndI);
return true;
@@ -710,7 +710,7 @@ Instruction *InstCombinerImpl::visitVAEndInst(VAEndInst &I) {
}
static CallInst *canonicalizeConstantArg0ToArg1(CallInst &Call) {
- assert(Call.getNumArgOperands() > 1 && "Need at least 2 args to swap");
+ assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
Call.setArgOperand(0, Arg1);
@@ -754,6 +754,45 @@ static Optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
+/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
+/// can trigger other combines.
+static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
+ InstCombiner::BuilderTy &Builder) {
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
+ MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
+ "Expected a min or max intrinsic");
+
+ // TODO: Match vectors with undef elements, but undef may not propagate.
+ Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
+ Value *X;
+ const APInt *C0, *C1;
+ if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
+ !match(Op1, m_APInt(C1)))
+ return nullptr;
+
+ // Check for necessary no-wrap and overflow constraints.
+ bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
+ auto *Add = cast<BinaryOperator>(Op0);
+ if ((IsSigned && !Add->hasNoSignedWrap()) ||
+ (!IsSigned && !Add->hasNoUnsignedWrap()))
+ return nullptr;
+
+ // If the constant difference overflows, then instsimplify should reduce the
+ // min/max to the add or C1.
+ bool Overflow;
+ APInt CDiff =
+ IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
+ assert(!Overflow && "Expected simplify of min/max");
+
+ // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
+ // Note: the "mismatched" no-overflow setting does not propagate.
+ Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
+ Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
+ return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
+ : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
+}
+
/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
/// can only be one of two possible constant values -- turn that into a select
/// of constants.
@@ -795,6 +834,63 @@ static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
}
+/// Reduce a sequence of min/max intrinsics with a common operand.
+static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
+ // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
+ auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
+ auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
+ Intrinsic::ID MinMaxID = II->getIntrinsicID();
+ if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
+ RHS->getIntrinsicID() != MinMaxID ||
+ (!LHS->hasOneUse() && !RHS->hasOneUse()))
+ return nullptr;
+
+ Value *A = LHS->getArgOperand(0);
+ Value *B = LHS->getArgOperand(1);
+ Value *C = RHS->getArgOperand(0);
+ Value *D = RHS->getArgOperand(1);
+
+ // Look for a common operand.
+ Value *MinMaxOp = nullptr;
+ Value *ThirdOp = nullptr;
+ if (LHS->hasOneUse()) {
+ // If the LHS is only used in this chain and the RHS is used outside of it,
+ // reuse the RHS min/max because that will eliminate the LHS.
+ if (D == A || C == A) {
+ // min(min(a, b), min(c, a)) --> min(min(c, a), b)
+ // min(min(a, b), min(a, d)) --> min(min(a, d), b)
+ MinMaxOp = RHS;
+ ThirdOp = B;
+ } else if (D == B || C == B) {
+ // min(min(a, b), min(c, b)) --> min(min(c, b), a)
+ // min(min(a, b), min(b, d)) --> min(min(b, d), a)
+ MinMaxOp = RHS;
+ ThirdOp = A;
+ }
+ } else {
+ assert(RHS->hasOneUse() && "Expected one-use operand");
+ // Reuse the LHS. This will eliminate the RHS.
+ if (D == A || D == B) {
+ // min(min(a, b), min(c, a)) --> min(min(a, b), c)
+ // min(min(a, b), min(c, b)) --> min(min(a, b), c)
+ MinMaxOp = LHS;
+ ThirdOp = C;
+ } else if (C == A || C == B) {
+ // min(min(a, b), min(b, d)) --> min(min(a, b), d)
+ // min(min(a, b), min(c, b)) --> min(min(a, b), d)
+ MinMaxOp = LHS;
+ ThirdOp = D;
+ }
+ }
+
+ if (!MinMaxOp || !ThirdOp)
+ return nullptr;
+
+ Module *Mod = II->getModule();
+ Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
+ return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -896,7 +992,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
auto VWidth = IIFVTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
if (V != II)
return replaceInstUsesWith(*II, V);
@@ -1007,21 +1103,45 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
- if (match(I0, m_Not(m_Value(X)))) {
- // max (not X), (not Y) --> not (min X, Y)
- Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
- if (match(I1, m_Not(m_Value(Y))) &&
+ if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
+ // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
+ // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
+ // TODO: Canonicalize neg after min/max if I1 is constant.
+ if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
(I0->hasOneUse() || I1->hasOneUse())) {
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
- return BinaryOperator::CreateNot(InvMaxMin);
+ return BinaryOperator::CreateNSWNeg(InvMaxMin);
}
- // max (not X), C --> not(min X, ~C)
- if (match(I1, m_Constant(C)) && I0->hasOneUse()) {
- Constant *NotC = ConstantExpr::getNot(C);
- Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, NotC);
+ }
+
+ // If we can eliminate ~A and Y is free to invert:
+ // max ~A, Y --> ~(min A, ~Y)
+ //
+ // Examples:
+ // max ~A, ~Y --> ~(min A, Y)
+ // max ~A, C --> ~(min A, ~C)
+ // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
+ auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
+ Value *A;
+ if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
+ !isFreeToInvert(A, A->hasOneUse()) &&
+ isFreeToInvert(Y, Y->hasOneUse())) {
+ Value *NotY = Builder.CreateNot(Y);
+ Intrinsic::ID InvID = getInverseMinMaxIntrinsic(IID);
+ Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
return BinaryOperator::CreateNot(InvMaxMin);
}
- }
+ return nullptr;
+ };
+
+ if (Instruction *I = moveNotAfterMinMax(I0, I1))
+ return I;
+ if (Instruction *I = moveNotAfterMinMax(I1, I0))
+ return I;
+
+ if (Instruction *I = moveAddAfterMinMax(II, Builder))
+ return I;
// smax(X, -X) --> abs(X)
// smin(X, -X) --> -abs(X)
@@ -1051,11 +1171,17 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
return Sel;
+ if (Instruction *SAdd = matchSAddSubSat(*II))
+ return SAdd;
+
if (match(I1, m_ImmConstant()))
if (auto *Sel = dyn_cast<SelectInst>(I0))
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
return R;
+ if (Instruction *NewMinMax = factorizeMinMaxTree(II))
+ return NewMinMax;
+
break;
}
case Intrinsic::bswap: {
@@ -1098,6 +1224,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Power->equalsInt(2))
return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
II->getArgOperand(0), II);
+
+ if (!Power->getValue()[0]) {
+ Value *X;
+ // If power is even:
+ // powi(-x, p) -> powi(x, p)
+ // powi(fabs(x), p) -> powi(x, p)
+ // powi(copysign(x, y), p) -> powi(x, p)
+ if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
+ match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
+ match(II->getArgOperand(0),
+ m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
+ return replaceOperand(*II, 0, X);
+ }
}
break;
@@ -1637,14 +1776,66 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::stackrestore: {
- // If the save is right next to the restore, remove the restore. This can
- // happen when variable allocas are DCE'd.
+ enum class ClassifyResult {
+ None,
+ Alloca,
+ StackRestore,
+ CallWithSideEffects,
+ };
+ auto Classify = [](const Instruction *I) {
+ if (isa<AllocaInst>(I))
+ return ClassifyResult::Alloca;
+
+ if (auto *CI = dyn_cast<CallInst>(I)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
+ if (II->getIntrinsicID() == Intrinsic::stackrestore)
+ return ClassifyResult::StackRestore;
+
+ if (II->mayHaveSideEffects())
+ return ClassifyResult::CallWithSideEffects;
+ } else {
+ // Consider all non-intrinsic calls to be side effects
+ return ClassifyResult::CallWithSideEffects;
+ }
+ }
+
+ return ClassifyResult::None;
+ };
+
+ // If the stacksave and the stackrestore are in the same BB, and there is
+ // no intervening call, alloca, or stackrestore of a different stacksave,
+ // remove the restore. This can happen when variable allocas are DCE'd.
if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
- if (SS->getIntrinsicID() == Intrinsic::stacksave) {
- // Skip over debug info.
- if (SS->getNextNonDebugInstruction() == II) {
- return eraseInstFromFunction(CI);
+ if (SS->getIntrinsicID() == Intrinsic::stacksave &&
+ SS->getParent() == II->getParent()) {
+ BasicBlock::iterator BI(SS);
+ bool CannotRemove = false;
+ for (++BI; &*BI != II; ++BI) {
+ switch (Classify(&*BI)) {
+ case ClassifyResult::None:
+ // So far so good, look at next instructions.
+ break;
+
+ case ClassifyResult::StackRestore:
+ // If we found an intervening stackrestore for a different
+ // stacksave, we can't remove the stackrestore. Otherwise, continue.
+ if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
+ CannotRemove = true;
+ break;
+
+ case ClassifyResult::Alloca:
+ case ClassifyResult::CallWithSideEffects:
+ // If we found an alloca, a non-intrinsic call, or an intrinsic
+ // call with side effects, we can't remove the stackrestore.
+ CannotRemove = true;
+ break;
+ }
+ if (CannotRemove)
+ break;
}
+
+ if (!CannotRemove)
+ return eraseInstFromFunction(CI);
}
}
@@ -1654,29 +1845,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Instruction *TI = II->getParent()->getTerminator();
bool CannotRemove = false;
for (++BI; &*BI != TI; ++BI) {
- if (isa<AllocaInst>(BI)) {
+ switch (Classify(&*BI)) {
+ case ClassifyResult::None:
+ // So far so good, look at next instructions.
+ break;
+
+ case ClassifyResult::StackRestore:
+ // If there is a stackrestore below this one, remove this one.
+ return eraseInstFromFunction(CI);
+
+ case ClassifyResult::Alloca:
+ case ClassifyResult::CallWithSideEffects:
+ // If we found an alloca, a non-intrinsic call, or an intrinsic call
+ // with side effects (such as llvm.stacksave and llvm.read_register),
+ // we can't remove the stack restore.
CannotRemove = true;
break;
}
- if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
- if (auto *II2 = dyn_cast<IntrinsicInst>(BCI)) {
- // If there is a stackrestore below this one, remove this one.
- if (II2->getIntrinsicID() == Intrinsic::stackrestore)
- return eraseInstFromFunction(CI);
-
- // Bail if we cross over an intrinsic with side effects, such as
- // llvm.stacksave, or llvm.read_register.
- if (II2->mayHaveSideEffects()) {
- CannotRemove = true;
- break;
- }
- } else {
- // If we found a non-intrinsic call, we can't remove the stack
- // restore.
- CannotRemove = true;
- break;
- }
- }
+ if (CannotRemove)
+ break;
}
// If the stack restore is in a return, resume, or unwind block and if there
@@ -1963,6 +2150,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::experimental_vector_reverse: {
+ Value *BO0, *BO1, *X, *Y;
+ Value *Vec = II->getArgOperand(0);
+ if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
+ auto *OldBinOp = cast<BinaryOperator>(Vec);
+ if (match(BO0, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(X)))) {
+ // rev(binop rev(X), rev(Y)) --> binop X, Y
+ if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(Y))))
+ return replaceInstUsesWith(CI,
+ BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), X, Y, OldBinOp,
+ OldBinOp->getName(), II));
+ // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
+ if (isSplatValue(BO1))
+ return replaceInstUsesWith(CI,
+ BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), X, BO1,
+ OldBinOp, OldBinOp->getName(), II));
+ }
+ // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
+ if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(Y))) &&
+ isSplatValue(BO0))
+ return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
+ OldBinOp->getOpcode(), BO0, Y,
+ OldBinOp, OldBinOp->getName(), II));
+ }
+ // rev(unop rev(X)) --> unop X
+ if (match(Vec, m_OneUse(m_UnOp(
+ m_Intrinsic<Intrinsic::experimental_vector_reverse>(
+ m_Value(X)))))) {
+ auto *OldUnOp = cast<UnaryOperator>(Vec);
+ auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
+ OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II);
+ return replaceInstUsesWith(CI, NewUnOp);
+ }
+ break;
+ }
case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_and: {
// Canonicalize logical or/and reductions:
@@ -1973,21 +2200,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// %val = bitcast <ReduxWidth x i1> to iReduxWidth
// %res = cmp eq iReduxWidth %val, 11111
Value *Arg = II->getArgOperand(0);
- Type *RetTy = II->getType();
- if (RetTy == Builder.getInt1Ty())
- if (auto *FVTy = dyn_cast<FixedVectorType>(Arg->getType())) {
- Value *Res = Builder.CreateBitCast(
- Arg, Builder.getIntNTy(FVTy->getNumElements()));
- if (IID == Intrinsic::vector_reduce_and) {
- Res = Builder.CreateICmpEQ(
- Res, ConstantInt::getAllOnesValue(Res->getType()));
- } else {
- assert(IID == Intrinsic::vector_reduce_or &&
- "Expected or reduction.");
- Res = Builder.CreateIsNotNull(Res);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = Builder.CreateBitCast(
+ Vect, Builder.getIntNTy(FTy->getNumElements()));
+ if (IID == Intrinsic::vector_reduce_and) {
+ Res = Builder.CreateICmpEQ(
+ Res, ConstantInt::getAllOnesValue(Res->getType()));
+ } else {
+ assert(IID == Intrinsic::vector_reduce_or &&
+ "Expected or reduction.");
+ Res = Builder.CreateIsNotNull(Res);
+ }
+ if (Arg != Vect)
+ Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
+ II->getType());
+ return replaceInstUsesWith(CI, Res);
}
- return replaceInstUsesWith(CI, Res);
- }
+ }
LLVM_FALLTHROUGH;
}
case Intrinsic::vector_reduce_add: {
@@ -2017,12 +2249,117 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
LLVM_FALLTHROUGH;
}
- case Intrinsic::vector_reduce_mul:
- case Intrinsic::vector_reduce_xor:
- case Intrinsic::vector_reduce_umax:
+ case Intrinsic::vector_reduce_xor: {
+ if (IID == Intrinsic::vector_reduce_xor) {
+ // Exclusive disjunction reduction over the vector with
+ // (potentially-extended) i1 element type is actually a
+ // (potentially-extended) arithmetic `add` reduction over the original
+ // non-extended value:
+ // vector_reduce_xor(?ext(<n x i1>))
+ // -->
+ // ?ext(vector_reduce_add(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = Builder.CreateAddReduce(Vect);
+ if (Arg != Vect)
+ Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
+ II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
+ case Intrinsic::vector_reduce_mul: {
+ if (IID == Intrinsic::vector_reduce_mul) {
+ // Multiplicative reduction over the vector with (potentially-extended)
+ // i1 element type is actually a (potentially zero-extended)
+ // logical `and` reduction over the original non-extended value:
+ // vector_reduce_mul(?ext(<n x i1>))
+ // -->
+ // zext(vector_reduce_and(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = Builder.CreateAndReduce(Vect);
+ if (Res->getType() != II->getType())
+ Res = Builder.CreateZExt(Res, II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::vector_reduce_umin:
- case Intrinsic::vector_reduce_smax:
+ case Intrinsic::vector_reduce_umax: {
+ if (IID == Intrinsic::vector_reduce_umin ||
+ IID == Intrinsic::vector_reduce_umax) {
+ // UMin/UMax reduction over the vector with (potentially-extended)
+ // i1 element type is actually a (potentially-extended)
+ // logical `and`/`or` reduction over the original non-extended value:
+ // vector_reduce_u{min,max}(?ext(<n x i1>))
+ // -->
+ // ?ext(vector_reduce_{and,or}(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Value *Res = IID == Intrinsic::vector_reduce_umin
+ ? Builder.CreateAndReduce(Vect)
+ : Builder.CreateOrReduce(Vect);
+ if (Arg != Vect)
+ Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
+ II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::vector_reduce_smin:
+ case Intrinsic::vector_reduce_smax: {
+ if (IID == Intrinsic::vector_reduce_smin ||
+ IID == Intrinsic::vector_reduce_smax) {
+ // SMin/SMax reduction over the vector with (potentially-extended)
+ // i1 element type is actually a (potentially-extended)
+ // logical `and`/`or` reduction over the original non-extended value:
+ // vector_reduce_s{min,max}(<n x i1>)
+ // -->
+ // vector_reduce_{or,and}(<n x i1>)
+ // and
+ // vector_reduce_s{min,max}(sext(<n x i1>))
+ // -->
+ // sext(vector_reduce_{or,and}(<n x i1>))
+ // and
+ // vector_reduce_s{min,max}(zext(<n x i1>))
+ // -->
+ // zext(vector_reduce_{and,or}(<n x i1>))
+ Value *Arg = II->getArgOperand(0);
+ Value *Vect;
+ if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
+ if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
+ if (Arg != Vect)
+ ExtOpc = cast<CastInst>(Arg)->getOpcode();
+ Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
+ (ExtOpc == Instruction::CastOps::ZExt))
+ ? Builder.CreateAndReduce(Vect)
+ : Builder.CreateOrReduce(Vect);
+ if (Arg != Vect)
+ Res = Builder.CreateCast(ExtOpc, Res, II->getType());
+ return replaceInstUsesWith(CI, Res);
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ }
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
case Intrinsic::vector_reduce_fadd:
@@ -2228,7 +2565,7 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
}
void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI) {
- unsigned NumArgs = Call.getNumArgOperands();
+ unsigned NumArgs = Call.arg_size();
ConstantInt *Op0C = dyn_cast<ConstantInt>(Call.getOperand(0));
ConstantInt *Op1C =
(NumArgs == 1) ? nullptr : dyn_cast<ConstantInt>(Call.getOperand(1));
@@ -2239,55 +2576,46 @@ void InstCombinerImpl::annotateAnyAllocSite(CallBase &Call, const TargetLibraryI
if (isMallocLikeFn(&Call, TLI) && Op0C) {
if (isOpNewLikeFn(&Call, TLI))
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableBytes(
- Call.getContext(), Op0C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableBytes(
+ Call.getContext(), Op0C->getZExtValue()));
else
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Op0C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op0C->getZExtValue()));
} else if (isAlignedAllocLikeFn(&Call, TLI)) {
if (Op1C)
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Op1C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op1C->getZExtValue()));
// Add alignment attribute if alignment is a power of two constant.
if (Op0C && Op0C->getValue().ult(llvm::Value::MaximumAlignment) &&
isKnownNonZero(Call.getOperand(1), DL, 0, &AC, &Call, &DT)) {
uint64_t AlignmentVal = Op0C->getZExtValue();
if (llvm::isPowerOf2_64(AlignmentVal)) {
- Call.removeAttribute(AttributeList::ReturnIndex, Attribute::Alignment);
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithAlignment(Call.getContext(),
- Align(AlignmentVal)));
+ Call.removeRetAttr(Attribute::Alignment);
+ Call.addRetAttr(Attribute::getWithAlignment(Call.getContext(),
+ Align(AlignmentVal)));
}
}
} else if (isReallocLikeFn(&Call, TLI) && Op1C) {
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Op1C->getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Op1C->getZExtValue()));
} else if (isCallocLikeFn(&Call, TLI) && Op0C && Op1C) {
bool Overflow;
const APInt &N = Op0C->getValue();
APInt Size = N.umul_ov(Op1C->getValue(), Overflow);
if (!Overflow)
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Size.getZExtValue()));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Size.getZExtValue()));
} else if (isStrdupLikeFn(&Call, TLI)) {
uint64_t Len = GetStringLength(Call.getOperand(0));
if (Len) {
// strdup
if (NumArgs == 1)
- Call.addAttribute(AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), Len));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), Len));
// strndup
else if (NumArgs == 2 && Op1C)
- Call.addAttribute(
- AttributeList::ReturnIndex,
- Attribute::getWithDereferenceableOrNullBytes(
- Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1)));
+ Call.addRetAttr(Attribute::getWithDereferenceableOrNullBytes(
+ Call.getContext(), std::min(Len, Op1C->getZExtValue() + 1)));
}
}
}
@@ -2489,7 +2817,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
// isKnownNonNull -> nonnull attribute
if (!GCR.hasRetAttr(Attribute::NonNull) &&
isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
- GCR.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+ GCR.addRetAttr(Attribute::NonNull);
// We discovered new fact, re-check users.
Worklist.pushUsersToWorkList(GCR);
}
@@ -2646,19 +2974,19 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
return false; // Cannot transform this parameter value.
- if (AttrBuilder(CallerPAL.getParamAttributes(i))
+ if (AttrBuilder(CallerPAL.getParamAttrs(i))
.overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
return false; // Attribute not compatible with transformed value.
if (Call.isInAllocaArgument(i))
return false; // Cannot transform to and from inalloca.
- if (CallerPAL.hasParamAttribute(i, Attribute::SwiftError))
+ if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
return false;
// If the parameter is passed as a byval argument, then we have to have a
// sized type and the sized type has to have the same size as the old type.
- if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
+ if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
if (!ParamPTy || !ParamPTy->getElementType()->isSized())
return false;
@@ -2699,7 +3027,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// that are compatible with being a vararg call argument.
unsigned SRetIdx;
if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
- SRetIdx > FT->getNumParams())
+ SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
return false;
}
@@ -2728,12 +3056,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
Args.push_back(NewArg);
// Add any parameter attributes.
- if (CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
- AttrBuilder AB(CallerPAL.getParamAttributes(i));
+ if (CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
+ AttrBuilder AB(CallerPAL.getParamAttrs(i));
AB.addByValAttr(NewArg->getType()->getPointerElementType());
ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
} else
- ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+ ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
}
// If the function takes more arguments than the call was taking, add them
@@ -2760,12 +3088,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
Args.push_back(NewArg);
// Add any parameter attributes.
- ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
+ ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
}
}
}
- AttributeSet FnAttrs = CallerPAL.getFnAttributes();
+ AttributeSet FnAttrs = CallerPAL.getFnAttrs();
if (NewRetTy->isVoidTy())
Caller->setName(""); // Void type should not have a name.
@@ -2866,7 +3194,7 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
for (FunctionType::param_iterator I = NestFTy->param_begin(),
E = NestFTy->param_end();
I != E; ++NestArgNo, ++I) {
- AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
+ AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
if (AS.hasAttribute(Attribute::Nest)) {
// Record the parameter type and any other attributes.
NestTy = *I;
@@ -2902,7 +3230,7 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
// Add the original argument and attributes.
NewArgs.push_back(*I);
- NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
+ NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
++ArgNo;
++I;
@@ -2948,8 +3276,8 @@ InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
NestF : ConstantExpr::getBitCast(NestF,
PointerType::getUnqual(NewFTy));
AttributeList NewPAL =
- AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), NewArgAttrs);
+ AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), NewArgAttrs);
SmallVector<OperandBundleDef, 1> OpBundles;
Call.getOperandBundlesAsDefs(OpBundles);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 04877bec94ec..ca87477c5d81 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -333,7 +333,7 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
SrcTy->getNumElements() == DestTy->getNumElements() &&
SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
- return new ShuffleVectorInst(CastX, UndefValue::get(DestTy), Mask);
+ return new ShuffleVectorInst(CastX, Mask);
}
}
@@ -701,10 +701,10 @@ static Instruction *shrinkSplatShuffle(TruncInst &Trunc,
if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
is_splat(Shuf->getShuffleMask()) &&
Shuf->getType() == Shuf->getOperand(0)->getType()) {
- // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Undef, SplatMask
- Constant *NarrowUndef = UndefValue::get(Trunc.getType());
+ // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
+ // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
- return new ShuffleVectorInst(NarrowOp, NarrowUndef, Shuf->getShuffleMask());
+ return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
}
return nullptr;
@@ -961,14 +961,25 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
}
}
+
+ if (match(Src, m_VScale(DL))) {
+ if (Trunc.getFunction() &&
+ Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned MaxVScale = Trunc.getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ if (MaxVScale > 0 && Log2_32(MaxVScale) < DestWidth) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(Trunc, VScale);
+ }
+ }
+ }
+
return nullptr;
}
-/// Transform (zext icmp) to bitwise / integer operations in order to
-/// eliminate it. If DoTransform is false, just test whether the given
-/// (zext icmp) can be transformed.
-Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
- bool DoTransform) {
+Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
// If we are just checking for a icmp eq of a single bit and zext'ing it
// to an integer, then shift the bit to the appropriate place and then
// cast to integer to avoid the comparison.
@@ -977,10 +988,8 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
// zext (x <s 0) to i32 --> x>>u31 true if signbit set.
// zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
- if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
- (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
- if (!DoTransform) return Cmp;
-
+ if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) ||
+ (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnes())) {
Value *In = Cmp->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
In->getType()->getScalarSizeInBits() - 1);
@@ -1004,7 +1013,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
// zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
// zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
// zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
- if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
+ if ((Op1CV->isZero() || Op1CV->isPowerOf2()) &&
// This only works for EQ and NE
Cmp->isEquality()) {
// If Op1C some other power of two, convert:
@@ -1012,10 +1021,8 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
APInt KnownZeroMask(~Known.Zero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
- if (!DoTransform) return Cmp;
-
bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
- if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
+ if (!Op1CV->isZero() && (*Op1CV != KnownZeroMask)) {
// (X&4) == 2 --> false
// (X&4) != 2 --> true
Constant *Res = ConstantInt::get(Zext.getType(), isNE);
@@ -1031,7 +1038,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
In->getName() + ".lobit");
}
- if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
+ if (!Op1CV->isZero() == isNE) { // Toggle the low bit.
Constant *One = ConstantInt::get(In->getType(), 1);
In = Builder.CreateXor(In, One);
}
@@ -1053,9 +1060,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
match(Cmp->getOperand(0),
m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
- if (!DoTransform)
- return Cmp;
-
if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
X = Builder.CreateNot(X);
Value *Lshr = Builder.CreateLShr(X, ShAmt);
@@ -1077,8 +1081,6 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
APInt UnknownBit = ~KnownBits;
if (UnknownBit.countPopulation() == 1) {
- if (!DoTransform) return Cmp;
-
Value *Result = Builder.CreateXor(LHS, RHS);
// Mask off any bits that are set and won't be shifted away.
@@ -1316,51 +1318,37 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &CI) {
if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
return transformZExtICmp(Cmp, CI);
- BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
- if (SrcI && SrcI->getOpcode() == Instruction::Or) {
- // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
- // of the (zext icmp) can be eliminated. If so, immediately perform the
- // according elimination.
- ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
- ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
- if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
- LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
- (transformZExtICmp(LHS, CI, false) ||
- transformZExtICmp(RHS, CI, false))) {
- // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
- Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
- Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
- Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
- if (auto *OrInst = dyn_cast<Instruction>(Or))
- Builder.SetInsertPoint(OrInst);
-
- // Perform the elimination.
- if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
- transformZExtICmp(LHS, *LZExt);
- if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
- transformZExtICmp(RHS, *RZExt);
-
- return replaceInstUsesWith(CI, Or);
- }
- }
-
// zext(trunc(X) & C) -> (X & zext(C)).
Constant *C;
Value *X;
- if (SrcI &&
- match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
+ if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
X->getType() == CI.getType())
return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
// zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
Value *And;
- if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
+ if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
match(And, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Specific(C)))) &&
X->getType() == CI.getType()) {
Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
}
+ if (match(Src, m_VScale(DL))) {
+ if (CI.getFunction() &&
+ CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned MaxVScale = CI.getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
+ if (MaxVScale > 0 && Log2_32(MaxVScale) < TypeWidth) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(CI, VScale);
+ }
+ }
+ }
+
return nullptr;
}
@@ -1605,6 +1593,32 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &CI) {
return BinaryOperator::CreateAShr(A, NewShAmt);
}
+ // Splatting a bit of constant-index across a value:
+ // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
+ // TODO: If the dest type is different, use a cast (adjust use check).
+ if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
+ m_SpecificInt(SrcBitSize - 1)))) &&
+ X->getType() == DestTy) {
+ Constant *ShlAmtC = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
+ Constant *AshrAmtC = ConstantInt::get(DestTy, DestBitSize - 1);
+ Value *Shl = Builder.CreateShl(X, ShlAmtC);
+ return BinaryOperator::CreateAShr(Shl, AshrAmtC);
+ }
+
+ if (match(Src, m_VScale(DL))) {
+ if (CI.getFunction() &&
+ CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned MaxVScale = CI.getFunction()
+ ->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ if (MaxVScale > 0 && Log2_32(MaxVScale) < (SrcBitSize - 1)) {
+ Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
+ return replaceInstUsesWith(CI, VScale);
+ }
+ }
+ }
+
return nullptr;
}
@@ -2060,6 +2074,19 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
+ // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
+ // While this can increase the number of instructions it doesn't actually
+ // increase the overall complexity since the arithmetic is just part of
+ // the GEP otherwise.
+ if (GEP->hasOneUse() &&
+ isa<ConstantPointerNull>(GEP->getPointerOperand())) {
+ return replaceInstUsesWith(CI,
+ Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
+ /*isSigned=*/false));
+ }
+ }
+
Value *Vec, *Scalar, *Index;
if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
m_Value(Scalar), m_Value(Index)))) &&
@@ -2133,9 +2160,9 @@ optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy,
if (SrcElts > DestElts) {
// If we're shrinking the number of elements (rewriting an integer
// truncate), just shuffle in the elements corresponding to the least
- // significant bits from the input and use undef as the second shuffle
+ // significant bits from the input and use poison as the second shuffle
// input.
- V2 = UndefValue::get(SrcTy);
+ V2 = PoisonValue::get(SrcTy);
// Make sure the shuffle mask selects the "least significant bits" by
// keeping elements from back of the src vector for big endian, and from the
// front for little endian.
@@ -2528,7 +2555,7 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
// As long as the user is another old PHI node, then even if we don't
// rewrite it, the PHI web we're considering won't have any users
// outside itself, so it'll be dead.
- if (OldPhiNodes.count(PHI) == 0)
+ if (!OldPhiNodes.contains(PHI))
return nullptr;
} else {
return nullptr;
@@ -2736,6 +2763,30 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
return new BitCastInst(InsElt->getOperand(1), DestTy);
}
+
+ // Convert an artificial vector insert into more analyzable bitwise logic.
+ unsigned BitWidth = DestTy->getScalarSizeInBits();
+ Value *X, *Y;
+ uint64_t IndexC;
+ if (match(Src, m_OneUse(m_InsertElt(m_OneUse(m_BitCast(m_Value(X))),
+ m_Value(Y), m_ConstantInt(IndexC)))) &&
+ DestTy->isIntegerTy() && X->getType() == DestTy &&
+ isDesirableIntType(BitWidth)) {
+ // Adjust for big endian - the LSBs are at the high index.
+ if (DL.isBigEndian())
+ IndexC = SrcVTy->getNumElements() - 1 - IndexC;
+
+ // We only handle (endian-normalized) insert to index 0. Any other insert
+ // would require a left-shift, so that is an extra instruction.
+ if (IndexC == 0) {
+ // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
+ unsigned EltWidth = Y->getType()->getScalarSizeInBits();
+ APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
+ Value *AndX = Builder.CreateAnd(X, MaskC);
+ Value *ZextY = Builder.CreateZExt(Y, DestTy);
+ return BinaryOperator::CreateOr(AndX, ZextY);
+ }
+ }
}
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2b0ef0c5f2cc..7a9e177f19da 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -78,15 +78,15 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
if (!ICmpInst::isSigned(Pred))
return false;
- if (C.isNullValue())
+ if (C.isZero())
return ICmpInst::isRelational(Pred);
- if (C.isOneValue()) {
+ if (C.isOne()) {
if (Pred == ICmpInst::ICMP_SLT) {
Pred = ICmpInst::ICMP_SLE;
return true;
}
- } else if (C.isAllOnesValue()) {
+ } else if (C.isAllOnes()) {
if (Pred == ICmpInst::ICMP_SGT) {
Pred = ICmpInst::ICMP_SGE;
return true;
@@ -541,7 +541,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
if (!CI->isNoopCast(DL))
return false;
- if (Explored.count(CI->getOperand(0)) == 0)
+ if (!Explored.contains(CI->getOperand(0)))
WorkList.push_back(CI->getOperand(0));
}
@@ -553,7 +553,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
GEP->getType() != Start->getType())
return false;
- if (Explored.count(GEP->getOperand(0)) == 0)
+ if (!Explored.contains(GEP->getOperand(0)))
WorkList.push_back(GEP->getOperand(0));
}
@@ -575,7 +575,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
// Explore the PHI nodes further.
for (auto *PN : PHIs)
for (Value *Op : PN->incoming_values())
- if (Explored.count(Op) == 0)
+ if (!Explored.contains(Op))
WorkList.push_back(Op);
}
@@ -589,7 +589,7 @@ static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
auto *Inst = dyn_cast<Instruction>(Val);
if (Inst == Base || Inst == PHI || !Inst || !PHI ||
- Explored.count(PHI) == 0)
+ !Explored.contains(PHI))
continue;
if (PHI->getParent() == Inst->getParent())
@@ -1147,12 +1147,12 @@ Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
};
// Don't bother doing any work for cases which InstSimplify handles.
- if (AP2.isNullValue())
+ if (AP2.isZero())
return nullptr;
bool IsAShr = isa<AShrOperator>(I.getOperand(0));
if (IsAShr) {
- if (AP2.isAllOnesValue())
+ if (AP2.isAllOnes())
return nullptr;
if (AP2.isNegative() != AP1.isNegative())
return nullptr;
@@ -1178,7 +1178,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
if (IsAShr && AP1 == AP2.ashr(Shift)) {
// There are multiple solutions if we are comparing against -1 and the LHS
// of the ashr is not a power of two.
- if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
+ if (AP1.isAllOnes() && !AP2.isPowerOf2())
return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
} else if (AP1 == AP2.lshr(Shift)) {
@@ -1206,7 +1206,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
};
// Don't bother doing any work for cases which InstSimplify handles.
- if (AP2.isNullValue())
+ if (AP2.isZero())
return nullptr;
unsigned AP2TrailingZeros = AP2.countTrailingZeros();
@@ -1270,9 +1270,8 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// This is only really a signed overflow check if the inputs have been
// sign-extended; check for that condition. For example, if CI2 is 2^31 and
// the operands of the add are 64 bits wide, we need at least 33 sign bits.
- unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
- if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
- IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
+ if (IC.ComputeMinSignedBits(A, 0, &I) > NewWidth ||
+ IC.ComputeMinSignedBits(B, 0, &I) > NewWidth)
return nullptr;
// In order to replace the original add with a narrower
@@ -1544,7 +1543,7 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Trunc->getOperand(0);
- if (C.isOneValue() && C.getBitWidth() > 1) {
+ if (C.isOne() && C.getBitWidth() > 1) {
// icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
Value *V = nullptr;
if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
@@ -1725,7 +1724,7 @@ Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
// Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is
// preferable because it allows the C2 << Y expression to be hoisted out of a
// loop if Y is invariant and X is not.
- if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() &&
+ if (Shift->hasOneUse() && C1.isZero() && Cmp.isEquality() &&
!Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
// Compute C2 << Y.
Value *NewShift =
@@ -1749,7 +1748,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
// For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
// TODO: We canonicalize to the longer form for scalars because we have
// better analysis/folds for icmp, and codegen may be better with icmp.
- if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isNullValue() &&
+ if (isICMP_NE && Cmp.getType()->isVectorTy() && C1.isZero() &&
match(And->getOperand(1), m_One()))
return new TruncInst(And->getOperand(0), Cmp.getType());
@@ -1762,7 +1761,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
if (!And->hasOneUse())
return nullptr;
- if (Cmp.isEquality() && C1.isNullValue()) {
+ if (Cmp.isEquality() && C1.isZero()) {
// Restrict this fold to single-use 'and' (PR10267).
// Replace (and X, (1 << size(X)-1) != 0) with X s< 0
if (C2->isSignMask()) {
@@ -1812,7 +1811,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
// (icmp pred (and A, (or (shl 1, B), 1), 0))
//
// iff pred isn't signed
- if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() &&
+ if (!Cmp.isSigned() && C1.isZero() && And->getOperand(0)->hasOneUse() &&
match(And->getOperand(1), m_One())) {
Constant *One = cast<Constant>(And->getOperand(1));
Value *Or = And->getOperand(0);
@@ -1889,7 +1888,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
// X & -C == -C -> X > u ~C
// X & -C != -C -> X <= u ~C
// iff C is a power of 2
- if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) {
+ if (Cmp.getOperand(1) == Y && C.isNegatedPowerOf2()) {
auto NewPred =
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT : CmpInst::ICMP_ULE;
return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
@@ -1899,7 +1898,7 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
// (X & C2) != 0 -> (trunc X) < 0
// iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
const APInt *C2;
- if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) {
+ if (And->hasOneUse() && C.isZero() && match(Y, m_APInt(C2))) {
int32_t ExactLogBase2 = C2->exactLogBase2();
if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
@@ -1920,7 +1919,7 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
BinaryOperator *Or,
const APInt &C) {
ICmpInst::Predicate Pred = Cmp.getPredicate();
- if (C.isOneValue()) {
+ if (C.isOne()) {
// icmp slt signum(V) 1 --> icmp slt V, 1
Value *V = nullptr;
if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
@@ -1950,7 +1949,18 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
}
}
- if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
+ // (X | (X-1)) s< 0 --> X s< 1
+ // (X | (X-1)) s> -1 --> X s> 0
+ Value *X;
+ bool TrueIfSigned;
+ if (isSignBitCheck(Pred, C, TrueIfSigned) &&
+ match(Or, m_c_Or(m_Add(m_Value(X), m_AllOnes()), m_Deferred(X)))) {
+ auto NewPred = TrueIfSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGT;
+ Constant *NewC = ConstantInt::get(X->getType(), TrueIfSigned ? 1 : 0);
+ return new ICmpInst(NewPred, X, NewC);
+ }
+
+ if (!Cmp.isEquality() || !C.isZero() || !Or->hasOneUse())
return nullptr;
Value *P, *Q;
@@ -2001,14 +2011,14 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
// If the multiply does not wrap, try to divide the compare constant by the
// multiplication factor.
- if (Cmp.isEquality() && !MulC->isNullValue()) {
+ if (Cmp.isEquality() && !MulC->isZero()) {
// (mul nsw X, MulC) == C --> X == C /s MulC
- if (Mul->hasNoSignedWrap() && C.srem(*MulC).isNullValue()) {
+ if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(Mul->getType(), C.sdiv(*MulC));
return new ICmpInst(Pred, Mul->getOperand(0), NewC);
}
// (mul nuw X, MulC) == C --> X == C /u MulC
- if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isNullValue()) {
+ if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(Mul->getType(), C.udiv(*MulC));
return new ICmpInst(Pred, Mul->getOperand(0), NewC);
}
@@ -2053,7 +2063,7 @@ static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
} else if (Cmp.isSigned()) {
Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
- if (C.isAllOnesValue()) {
+ if (C.isAllOnes()) {
// (1 << Y) <= -1 -> Y == 31
if (Pred == ICmpInst::ICMP_SLE)
return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
@@ -2227,8 +2237,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
// icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
Value *X = Shr->getOperand(0);
CmpInst::Predicate Pred = Cmp.getPredicate();
- if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
- C.isNullValue())
+ if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && C.isZero())
return new ICmpInst(Pred, X, Cmp.getOperand(1));
const APInt *ShiftVal;
@@ -2316,7 +2325,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
if (Shr->isExact())
return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
- if (C.isNullValue()) {
+ if (C.isZero()) {
// == 0 is u< 1.
if (Pred == CmpInst::ICMP_EQ)
return new ICmpInst(CmpInst::ICMP_ULT, X,
@@ -2355,7 +2364,7 @@ Instruction *InstCombinerImpl::foldICmpSRemConstant(ICmpInst &Cmp,
return nullptr;
const APInt *DivisorC;
- if (!C.isNullValue() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
+ if (!C.isZero() || !match(SRem->getOperand(1), m_Power2(DivisorC)))
return nullptr;
// Mask off the sign bit and the modulo bits (low-bits).
@@ -2435,8 +2444,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
// INT_MIN will also fail if the divisor is 1. Although folds of all these
// division-by-constant cases should be present, we can not assert that they
// have happened before we reach this icmp instruction.
- if (C2->isNullValue() || C2->isOneValue() ||
- (DivIsSigned && C2->isAllOnesValue()))
+ if (C2->isZero() || C2->isOne() || (DivIsSigned && C2->isAllOnes()))
return nullptr;
// Compute Prod = C * C2. We are essentially solving an equation of
@@ -2476,16 +2484,16 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
}
} else if (C2->isStrictlyPositive()) { // Divisor is > 0.
- if (C.isNullValue()) { // (X / pos) op 0
+ if (C.isZero()) { // (X / pos) op 0
// Can't overflow. e.g. X/2 op 0 --> [-1, 2)
LoBound = -(RangeSize - 1);
HiBound = RangeSize;
- } else if (C.isStrictlyPositive()) { // (X / pos) op pos
+ } else if (C.isStrictlyPositive()) { // (X / pos) op pos
LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
HiOverflow = LoOverflow = ProdOV;
if (!HiOverflow)
HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
- } else { // (X / pos) op neg
+ } else { // (X / pos) op neg
// e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
HiBound = Prod + 1;
LoOverflow = HiOverflow = ProdOV ? -1 : 0;
@@ -2497,7 +2505,7 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
} else if (C2->isNegative()) { // Divisor is < 0.
if (Div->isExact())
RangeSize.negate();
- if (C.isNullValue()) { // (X / neg) op 0
+ if (C.isZero()) { // (X / neg) op 0
// e.g. X/-5 op 0 --> [-4, 5)
LoBound = RangeSize + 1;
HiBound = -RangeSize;
@@ -2505,13 +2513,13 @@ Instruction *InstCombinerImpl::foldICmpDivConstant(ICmpInst &Cmp,
HiOverflow = 1; // [INTMIN+1, overflow)
HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN
}
- } else if (C.isStrictlyPositive()) { // (X / neg) op pos
+ } else if (C.isStrictlyPositive()) { // (X / neg) op pos
// e.g. X/-5 op 3 --> [-19, -14)
HiBound = Prod + 1;
HiOverflow = LoOverflow = ProdOV ? -1 : 0;
if (!LoOverflow)
LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
- } else { // (X / neg) op neg
+ } else { // (X / neg) op neg
LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
LoOverflow = HiOverflow = ProdOV;
if (!HiOverflow)
@@ -2581,42 +2589,54 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
const APInt &C) {
Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
ICmpInst::Predicate Pred = Cmp.getPredicate();
- const APInt *C2;
- APInt SubResult;
+ Type *Ty = Sub->getType();
- // icmp eq/ne (sub C, Y), C -> icmp eq/ne Y, 0
- if (match(X, m_APInt(C2)) && *C2 == C && Cmp.isEquality())
- return new ICmpInst(Cmp.getPredicate(), Y,
- ConstantInt::get(Y->getType(), 0));
+ // (SubC - Y) == C) --> Y == (SubC - C)
+ // (SubC - Y) != C) --> Y != (SubC - C)
+ Constant *SubC;
+ if (Cmp.isEquality() && match(X, m_ImmConstant(SubC))) {
+ return new ICmpInst(Pred, Y,
+ ConstantExpr::getSub(SubC, ConstantInt::get(Ty, C)));
+ }
// (icmp P (sub nuw|nsw C2, Y), C) -> (icmp swap(P) Y, C2-C)
+ const APInt *C2;
+ APInt SubResult;
+ ICmpInst::Predicate SwappedPred = Cmp.getSwappedPredicate();
+ bool HasNSW = Sub->hasNoSignedWrap();
+ bool HasNUW = Sub->hasNoUnsignedWrap();
if (match(X, m_APInt(C2)) &&
- ((Cmp.isUnsigned() && Sub->hasNoUnsignedWrap()) ||
- (Cmp.isSigned() && Sub->hasNoSignedWrap())) &&
+ ((Cmp.isUnsigned() && HasNUW) || (Cmp.isSigned() && HasNSW)) &&
!subWithOverflow(SubResult, *C2, C, Cmp.isSigned()))
- return new ICmpInst(Cmp.getSwappedPredicate(), Y,
- ConstantInt::get(Y->getType(), SubResult));
+ return new ICmpInst(SwappedPred, Y, ConstantInt::get(Ty, SubResult));
// The following transforms are only worth it if the only user of the subtract
// is the icmp.
+ // TODO: This is an artificial restriction for all of the transforms below
+ // that only need a single replacement icmp.
if (!Sub->hasOneUse())
return nullptr;
+ // X - Y == 0 --> X == Y.
+ // X - Y != 0 --> X != Y.
+ if (Cmp.isEquality() && C.isZero())
+ return new ICmpInst(Pred, X, Y);
+
if (Sub->hasNoSignedWrap()) {
// (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
- if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue())
+ if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
// (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
- if (Pred == ICmpInst::ICMP_SGT && C.isNullValue())
+ if (Pred == ICmpInst::ICMP_SGT && C.isZero())
return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
// (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
- if (Pred == ICmpInst::ICMP_SLT && C.isNullValue())
+ if (Pred == ICmpInst::ICMP_SLT && C.isZero())
return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
// (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
- if (Pred == ICmpInst::ICMP_SLT && C.isOneValue())
+ if (Pred == ICmpInst::ICMP_SLT && C.isOne())
return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
}
@@ -2634,7 +2654,12 @@ Instruction *InstCombinerImpl::foldICmpSubConstant(ICmpInst &Cmp,
if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
- return nullptr;
+ // We have handled special cases that reduce.
+ // Canonicalize any remaining sub to add as:
+ // (C2 - Y) > C --> (Y + ~C2) < ~C
+ Value *Add = Builder.CreateAdd(Y, ConstantInt::get(Ty, ~(*C2)), "notsub",
+ HasNUW, HasNSW);
+ return new ICmpInst(SwappedPred, Add, ConstantInt::get(Ty, ~C));
}
/// Fold icmp (add X, Y), C.
@@ -2723,6 +2748,14 @@ Instruction *InstCombinerImpl::foldICmpAddConstant(ICmpInst &Cmp,
return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
ConstantExpr::getNeg(cast<Constant>(Y)));
+ // The range test idiom can use either ult or ugt. Arbitrarily canonicalize
+ // to the ult form.
+ // X+C2 >u C -> X+(C2-C-1) <u ~C
+ if (Pred == ICmpInst::ICMP_UGT)
+ return new ICmpInst(ICmpInst::ICMP_ULT,
+ Builder.CreateAdd(X, ConstantInt::get(Ty, *C2 - C - 1)),
+ ConstantInt::get(Ty, ~C));
+
return nullptr;
}
@@ -2830,8 +2863,7 @@ Instruction *InstCombinerImpl::foldICmpSelectConstant(ICmpInst &Cmp,
return nullptr;
}
-static Instruction *foldICmpBitCast(ICmpInst &Cmp,
- InstCombiner::BuilderTy &Builder) {
+Instruction *InstCombinerImpl::foldICmpBitCast(ICmpInst &Cmp) {
auto *Bitcast = dyn_cast<BitCastInst>(Cmp.getOperand(0));
if (!Bitcast)
return nullptr;
@@ -2917,6 +2949,39 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp,
return new ICmpInst(Pred, BCSrcOp, Op1);
}
+ const APInt *C;
+ if (!match(Cmp.getOperand(1), m_APInt(C)) ||
+ !Bitcast->getType()->isIntegerTy() ||
+ !Bitcast->getSrcTy()->isIntOrIntVectorTy())
+ return nullptr;
+
+ // If this is checking if all elements of a vector compare are set or not,
+ // invert the casted vector equality compare and test if all compare
+ // elements are clear or not. Compare against zero is generally easier for
+ // analysis and codegen.
+ // icmp eq/ne (bitcast (not X) to iN), -1 --> icmp eq/ne (bitcast X to iN), 0
+ // Example: are all elements equal? --> are zero elements not equal?
+ // TODO: Try harder to reduce compare of 2 freely invertible operands?
+ if (Cmp.isEquality() && C->isAllOnes() && Bitcast->hasOneUse() &&
+ isFreeToInvert(BCSrcOp, BCSrcOp->hasOneUse())) {
+ Type *ScalarTy = Bitcast->getType();
+ Value *Cast = Builder.CreateBitCast(Builder.CreateNot(BCSrcOp), ScalarTy);
+ return new ICmpInst(Pred, Cast, ConstantInt::getNullValue(ScalarTy));
+ }
+
+ // If this is checking if all elements of an extended vector are clear or not,
+ // compare in a narrow type to eliminate the extend:
+ // icmp eq/ne (bitcast (ext X) to iN), 0 --> icmp eq/ne (bitcast X to iM), 0
+ Value *X;
+ if (Cmp.isEquality() && C->isZero() && Bitcast->hasOneUse() &&
+ match(BCSrcOp, m_ZExtOrSExt(m_Value(X)))) {
+ if (auto *VecTy = dyn_cast<FixedVectorType>(X->getType())) {
+ Type *NewType = Builder.getIntNTy(VecTy->getPrimitiveSizeInBits());
+ Value *NewCast = Builder.CreateBitCast(X, NewType);
+ return new ICmpInst(Pred, NewCast, ConstantInt::getNullValue(NewType));
+ }
+ }
+
// Folding: icmp <pred> iN X, C
// where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
// and C is a splat of a K-bit pattern
@@ -2924,12 +2989,6 @@ static Instruction *foldICmpBitCast(ICmpInst &Cmp,
// Into:
// %E = extractelement <M x iK> %vec, i32 C'
// icmp <pred> iK %E, trunc(C)
- const APInt *C;
- if (!match(Cmp.getOperand(1), m_APInt(C)) ||
- !Bitcast->getType()->isIntegerTy() ||
- !Bitcast->getSrcTy()->isIntOrIntVectorTy())
- return nullptr;
-
Value *Vec;
ArrayRef<int> Mask;
if (match(BCSrcOp, m_Shuffle(m_Value(Vec), m_Undef(), m_Mask(Mask)))) {
@@ -3055,7 +3114,7 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
switch (BO->getOpcode()) {
case Instruction::SRem:
// If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
- if (C.isNullValue() && BO->hasOneUse()) {
+ if (C.isZero() && BO->hasOneUse()) {
const APInt *BOC;
if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
@@ -3069,7 +3128,7 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
if (BO->hasOneUse())
return new ICmpInst(Pred, BOp0, ConstantExpr::getSub(RHS, BOC));
- } else if (C.isNullValue()) {
+ } else if (C.isZero()) {
// Replace ((add A, B) != 0) with (A != -B) if A or B is
// efficiently invertible, or if the add has just this one use.
if (Value *NegVal = dyn_castNegVal(BOp1))
@@ -3090,25 +3149,12 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
// For the xor case, we can xor two constants together, eliminating
// the explicit xor.
return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
- } else if (C.isNullValue()) {
+ } else if (C.isZero()) {
// Replace ((xor A, B) != 0) with (A != B)
return new ICmpInst(Pred, BOp0, BOp1);
}
}
break;
- case Instruction::Sub:
- if (BO->hasOneUse()) {
- // Only check for constant LHS here, as constant RHS will be canonicalized
- // to add and use the fold above.
- if (Constant *BOC = dyn_cast<Constant>(BOp0)) {
- // Replace ((sub BOC, B) != C) with (B != BOC-C).
- return new ICmpInst(Pred, BOp1, ConstantExpr::getSub(BOC, RHS));
- } else if (C.isNullValue()) {
- // Replace ((sub A, B) != 0) with (A != B).
- return new ICmpInst(Pred, BOp0, BOp1);
- }
- }
- break;
case Instruction::Or: {
const APInt *BOC;
if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
@@ -3132,7 +3178,7 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
break;
}
case Instruction::UDiv:
- if (C.isNullValue()) {
+ if (C.isZero()) {
// (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
return new ICmpInst(NewPred, BOp1, BOp0);
@@ -3149,25 +3195,26 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
+ const ICmpInst::Predicate Pred = Cmp.getPredicate();
+
switch (II->getIntrinsicID()) {
case Intrinsic::abs:
// abs(A) == 0 -> A == 0
// abs(A) == INT_MIN -> A == INT_MIN
- if (C.isNullValue() || C.isMinSignedValue())
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
- ConstantInt::get(Ty, C));
+ if (C.isZero() || C.isMinSignedValue())
+ return new ICmpInst(Pred, II->getArgOperand(0), ConstantInt::get(Ty, C));
break;
case Intrinsic::bswap:
// bswap(A) == C -> A == bswap(C)
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
+ return new ICmpInst(Pred, II->getArgOperand(0),
ConstantInt::get(Ty, C.byteSwap()));
case Intrinsic::ctlz:
case Intrinsic::cttz: {
// ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
if (C == BitWidth)
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
+ return new ICmpInst(Pred, II->getArgOperand(0),
ConstantInt::getNullValue(Ty));
// ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set
@@ -3181,9 +3228,8 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
APInt Mask2 = IsTrailing
? APInt::getOneBitSet(BitWidth, Num)
: APInt::getOneBitSet(BitWidth, BitWidth - Num - 1);
- return new ICmpInst(Cmp.getPredicate(),
- Builder.CreateAnd(II->getArgOperand(0), Mask1),
- ConstantInt::get(Ty, Mask2));
+ return new ICmpInst(Pred, Builder.CreateAnd(II->getArgOperand(0), Mask1),
+ ConstantInt::get(Ty, Mask2));
}
break;
}
@@ -3191,28 +3237,49 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
case Intrinsic::ctpop: {
// popcount(A) == 0 -> A == 0 and likewise for !=
// popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
- bool IsZero = C.isNullValue();
+ bool IsZero = C.isZero();
if (IsZero || C == BitWidth)
- return new ICmpInst(Cmp.getPredicate(), II->getArgOperand(0),
- IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty));
+ return new ICmpInst(Pred, II->getArgOperand(0),
+ IsZero ? Constant::getNullValue(Ty)
+ : Constant::getAllOnesValue(Ty));
break;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ if (II->getArgOperand(0) == II->getArgOperand(1)) {
+ // (rot X, ?) == 0/-1 --> X == 0/-1
+ // TODO: This transform is safe to re-use undef elts in a vector, but
+ // the constant value passed in by the caller doesn't allow that.
+ if (C.isZero() || C.isAllOnes())
+ return new ICmpInst(Pred, II->getArgOperand(0), Cmp.getOperand(1));
+
+ const APInt *RotAmtC;
+ // ror(X, RotAmtC) == C --> X == rol(C, RotAmtC)
+ // rol(X, RotAmtC) == C --> X == ror(C, RotAmtC)
+ if (match(II->getArgOperand(2), m_APInt(RotAmtC)))
+ return new ICmpInst(Pred, II->getArgOperand(0),
+ II->getIntrinsicID() == Intrinsic::fshl
+ ? ConstantInt::get(Ty, C.rotr(*RotAmtC))
+ : ConstantInt::get(Ty, C.rotl(*RotAmtC)));
+ }
+ break;
+
case Intrinsic::uadd_sat: {
// uadd.sat(a, b) == 0 -> (a | b) == 0
- if (C.isNullValue()) {
+ if (C.isZero()) {
Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1));
- return new ICmpInst(Cmp.getPredicate(), Or, Constant::getNullValue(Ty));
+ return new ICmpInst(Pred, Or, Constant::getNullValue(Ty));
}
break;
}
case Intrinsic::usub_sat: {
// usub.sat(a, b) == 0 -> a <= b
- if (C.isNullValue()) {
- ICmpInst::Predicate NewPred = Cmp.getPredicate() == ICmpInst::ICMP_EQ
- ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
+ if (C.isZero()) {
+ ICmpInst::Predicate NewPred =
+ Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
return new ICmpInst(NewPred, II->getArgOperand(0), II->getArgOperand(1));
}
break;
@@ -3224,6 +3291,42 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
return nullptr;
}
+/// Fold an icmp with LLVM intrinsics
+static Instruction *foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp) {
+ assert(Cmp.isEquality());
+
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ Value *Op0 = Cmp.getOperand(0);
+ Value *Op1 = Cmp.getOperand(1);
+ const auto *IIOp0 = dyn_cast<IntrinsicInst>(Op0);
+ const auto *IIOp1 = dyn_cast<IntrinsicInst>(Op1);
+ if (!IIOp0 || !IIOp1 || IIOp0->getIntrinsicID() != IIOp1->getIntrinsicID())
+ return nullptr;
+
+ switch (IIOp0->getIntrinsicID()) {
+ case Intrinsic::bswap:
+ case Intrinsic::bitreverse:
+ // If both operands are byte-swapped or bit-reversed, just compare the
+ // original values.
+ return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
+ case Intrinsic::fshl:
+ case Intrinsic::fshr:
+ // If both operands are rotated by same amount, just compare the
+ // original values.
+ if (IIOp0->getOperand(0) != IIOp0->getOperand(1))
+ break;
+ if (IIOp1->getOperand(0) != IIOp1->getOperand(1))
+ break;
+ if (IIOp0->getOperand(2) != IIOp1->getOperand(2))
+ break;
+ return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
+ default:
+ break;
+ }
+
+ return nullptr;
+}
+
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
IntrinsicInst *II,
@@ -3663,7 +3766,7 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
(WidestTy->getScalarSizeInBits() - 1) +
(NarrowestTy->getScalarSizeInBits() - 1);
APInt MaximalRepresentableShiftAmount =
- APInt::getAllOnesValue(XShAmt->getType()->getScalarSizeInBits());
+ APInt::getAllOnes(XShAmt->getType()->getScalarSizeInBits());
if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
return nullptr;
@@ -3746,19 +3849,22 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
/// Fold
/// (-1 u/ x) u< y
-/// ((x * y) u/ x) != y
+/// ((x * y) ?/ x) != y
/// to
-/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit
+/// @llvm.?mul.with.overflow(x, y) plus extraction of overflow bit
/// Note that the comparison is commutative, while inverted (u>=, ==) predicate
/// will mean that we are looking for the opposite answer.
-Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
+Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) {
ICmpInst::Predicate Pred;
Value *X, *Y;
Instruction *Mul;
+ Instruction *Div;
bool NeedNegation;
// Look for: (-1 u/ x) u</u>= y
if (!I.isEquality() &&
- match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
+ match(&I, m_c_ICmp(Pred,
+ m_CombineAnd(m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))),
+ m_Instruction(Div)),
m_Value(Y)))) {
Mul = nullptr;
@@ -3773,13 +3879,16 @@ Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
default:
return nullptr; // Wrong predicate.
}
- } else // Look for: ((x * y) u/ x) !=/== y
+ } else // Look for: ((x * y) / x) !=/== y
if (I.isEquality() &&
- match(&I, m_c_ICmp(Pred, m_Value(Y),
- m_OneUse(m_UDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
+ match(&I,
+ m_c_ICmp(Pred, m_Value(Y),
+ m_CombineAnd(
+ m_OneUse(m_IDiv(m_CombineAnd(m_c_Mul(m_Deferred(Y),
m_Value(X)),
m_Instruction(Mul)),
- m_Deferred(X)))))) {
+ m_Deferred(X))),
+ m_Instruction(Div))))) {
NeedNegation = Pred == ICmpInst::Predicate::ICMP_EQ;
} else
return nullptr;
@@ -3791,19 +3900,22 @@ Value *InstCombinerImpl::foldUnsignedMultiplicationOverflowCheck(ICmpInst &I) {
if (MulHadOtherUses)
Builder.SetInsertPoint(Mul);
- Function *F = Intrinsic::getDeclaration(
- I.getModule(), Intrinsic::umul_with_overflow, X->getType());
- CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul");
+ Function *F = Intrinsic::getDeclaration(I.getModule(),
+ Div->getOpcode() == Instruction::UDiv
+ ? Intrinsic::umul_with_overflow
+ : Intrinsic::smul_with_overflow,
+ X->getType());
+ CallInst *Call = Builder.CreateCall(F, {X, Y}, "mul");
// If the multiplication was used elsewhere, to ensure that we don't leave
// "duplicate" instructions, replace uses of that original multiplication
// with the multiplication result from the with.overflow intrinsic.
if (MulHadOtherUses)
- replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "umul.val"));
+ replaceInstUsesWith(*Mul, Builder.CreateExtractValue(Call, 0, "mul.val"));
- Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov");
+ Value *Res = Builder.CreateExtractValue(Call, 1, "mul.ov");
if (NeedNegation) // This technically increases instruction count.
- Res = Builder.CreateNot(Res, "umul.not.ov");
+ Res = Builder.CreateNot(Res, "mul.not.ov");
// If we replaced the mul, erase it. Do this after all uses of Builder,
// as the mul is used as insertion point.
@@ -4079,8 +4191,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 &&
match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality())
if (!C->countTrailingZeros() ||
- (BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) ||
- (BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap()))
+ (BO0 && BO1 && BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) ||
+ (BO0 && BO1 && BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap()))
return new ICmpInst(Pred, X, Y);
}
@@ -4146,8 +4258,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
break;
const APInt *C;
- if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
- !C->isOneValue()) {
+ if (match(BO0->getOperand(1), m_APInt(C)) && !C->isZero() &&
+ !C->isOne()) {
// icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
// Mask = -1 >> count-trailing-zeros(C).
if (unsigned TZs = C->countTrailingZeros()) {
@@ -4200,7 +4312,7 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
}
- if (Value *V = foldUnsignedMultiplicationOverflowCheck(I))
+ if (Value *V = foldMultiplicationOverflowCheck(I))
return replaceInstUsesWith(I, V);
if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
@@ -4373,6 +4485,19 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
}
+ {
+ // Similar to above, but specialized for constant because invert is needed:
+ // (X | C) == (Y | C) --> (X ^ Y) & ~C == 0
+ Value *X, *Y;
+ Constant *C;
+ if (match(Op0, m_OneUse(m_Or(m_Value(X), m_Constant(C)))) &&
+ match(Op1, m_OneUse(m_Or(m_Value(Y), m_Specific(C))))) {
+ Value *Xor = Builder.CreateXor(X, Y);
+ Value *And = Builder.CreateAnd(Xor, ConstantExpr::getNot(C));
+ return new ICmpInst(Pred, And, Constant::getNullValue(And->getType()));
+ }
+ }
+
// Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
// and (B & (1<<X)-1) == (zext A) --> A == (trunc B)
ConstantInt *Cst1;
@@ -4441,14 +4566,8 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
}
- // If both operands are byte-swapped or bit-reversed, just compare the
- // original values.
- // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
- // and handle more intrinsics.
- if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
- (match(Op0, m_BitReverse(m_Value(A))) &&
- match(Op1, m_BitReverse(m_Value(B)))))
- return new ICmpInst(Pred, A, B);
+ if (Instruction *ICmp = foldICmpIntrinsicWithIntrinsic(I))
+ return ICmp;
// Canonicalize checking for a power-of-2-or-zero value:
// (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
@@ -4474,6 +4593,74 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
: new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1));
}
+ // Match icmp eq (trunc (lshr A, BW), (ashr (trunc A), BW-1)), which checks the
+ // top BW/2 + 1 bits are all the same. Create "A >=s INT_MIN && A <=s INT_MAX",
+ // which we generate as "icmp ult (add A, 2^(BW-1)), 2^BW" to skip a few steps
+ // of instcombine.
+ unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+ if (match(Op0, m_AShr(m_Trunc(m_Value(A)), m_SpecificInt(BitWidth - 1))) &&
+ match(Op1, m_Trunc(m_LShr(m_Specific(A), m_SpecificInt(BitWidth)))) &&
+ A->getType()->getScalarSizeInBits() == BitWidth * 2 &&
+ (I.getOperand(0)->hasOneUse() || I.getOperand(1)->hasOneUse())) {
+ APInt C = APInt::getOneBitSet(BitWidth * 2, BitWidth - 1);
+ Value *Add = Builder.CreateAdd(A, ConstantInt::get(A->getType(), C));
+ return new ICmpInst(Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT
+ : ICmpInst::ICMP_UGE,
+ Add, ConstantInt::get(A->getType(), C.shl(1)));
+ }
+
+ return nullptr;
+}
+
+static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
+ InstCombiner::BuilderTy &Builder) {
+ const ICmpInst::Predicate Pred = ICmp.getPredicate();
+ Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1);
+
+ // Try to canonicalize trunc + compare-to-constant into a mask + cmp.
+ // The trunc masks high bits while the compare may effectively mask low bits.
+ Value *X;
+ const APInt *C;
+ if (!match(Op0, m_OneUse(m_Trunc(m_Value(X)))) || !match(Op1, m_APInt(C)))
+ return nullptr;
+
+ unsigned SrcBits = X->getType()->getScalarSizeInBits();
+ if (Pred == ICmpInst::ICMP_ULT) {
+ if (C->isPowerOf2()) {
+ // If C is a power-of-2 (one set bit):
+ // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
+ Constant *MaskC = ConstantInt::get(X->getType(), (-*C).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ Constant *Zero = ConstantInt::getNullValue(X->getType());
+ return new ICmpInst(ICmpInst::ICMP_EQ, And, Zero);
+ }
+ // If C is a negative power-of-2 (high-bit mask):
+ // (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)
+ if (C->isNegatedPowerOf2()) {
+ Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
+ }
+ }
+
+ if (Pred == ICmpInst::ICMP_UGT) {
+ // If C is a low-bit-mask (C+1 is a power-of-2):
+ // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
+ if (C->isMask()) {
+ Constant *MaskC = ConstantInt::get(X->getType(), (~*C).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ Constant *Zero = ConstantInt::getNullValue(X->getType());
+ return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+ }
+ // If C is not-of-power-of-2 (one clear bit):
+ // (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)
+ if ((~*C).isPowerOf2()) {
+ Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
+ Value *And = Builder.CreateAnd(X, MaskC);
+ return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
+ }
+ }
+
return nullptr;
}
@@ -4620,6 +4807,9 @@ Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1);
}
+ if (Instruction *R = foldICmpWithTrunc(ICmp, Builder))
+ return R;
+
return foldICmpWithZextOrSext(ICmp, Builder);
}
@@ -4943,7 +5133,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
const APInt *RHS;
if (!match(I.getOperand(1), m_APInt(RHS)))
- return APInt::getAllOnesValue(BitWidth);
+ return APInt::getAllOnes(BitWidth);
// If this is a normal comparison, it demands all bits. If it is a sign bit
// comparison, it only demands the sign bit.
@@ -4965,7 +5155,7 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
default:
- return APInt::getAllOnesValue(BitWidth);
+ return APInt::getAllOnes(BitWidth);
}
}
@@ -5129,8 +5319,7 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
Op0Known, 0))
return &I;
- if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
- Op1Known, 0))
+ if (SimplifyDemandedBits(&I, 1, APInt::getAllOnes(BitWidth), Op1Known, 0))
return &I;
// Given the known and unknown bits, compute a range that the LHS could be
@@ -5158,6 +5347,83 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
if (!isa<Constant>(Op1) && Op1Min == Op1Max)
return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
+ // Don't break up a clamp pattern -- (min(max X, Y), Z) -- by replacing a
+ // min/max canonical compare with some other compare. That could lead to
+ // conflict with select canonicalization and infinite looping.
+ // FIXME: This constraint may go away if min/max intrinsics are canonical.
+ auto isMinMaxCmp = [&](Instruction &Cmp) {
+ if (!Cmp.hasOneUse())
+ return false;
+ Value *A, *B;
+ SelectPatternFlavor SPF = matchSelectPattern(Cmp.user_back(), A, B).Flavor;
+ if (!SelectPatternResult::isMinOrMax(SPF))
+ return false;
+ return match(Op0, m_MaxOrMin(m_Value(), m_Value())) ||
+ match(Op1, m_MaxOrMin(m_Value(), m_Value()));
+ };
+ if (!isMinMaxCmp(I)) {
+ switch (Pred) {
+ default:
+ break;
+ case ICmpInst::ICMP_ULT: {
+ if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ // A <u C -> A == C-1 if min(A)+1 == C
+ if (*CmpC == Op0Min + 1)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC - 1));
+ // X <u C --> X == 0, if the number of zero bits in the bottom of X
+ // exceeds the log2 of C.
+ if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ Constant::getNullValue(Op1->getType()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_UGT: {
+ if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ // A >u C -> A == C+1 if max(a)-1 == C
+ if (*CmpC == Op0Max - 1)
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC + 1));
+ // X >u C --> X != 0, if the number of zero bits in the bottom of X
+ // exceeds the log2 of C.
+ if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0,
+ Constant::getNullValue(Op1->getType()));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_SLT: {
+ if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC - 1));
+ }
+ break;
+ }
+ case ICmpInst::ICMP_SGT: {
+ if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+ return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+ const APInt *CmpC;
+ if (match(Op1, m_APInt(CmpC))) {
+ if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
+ return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+ ConstantInt::get(Op1->getType(), *CmpC + 1));
+ }
+ break;
+ }
+ }
+ }
+
// Based on the range information we know about the LHS, see if we can
// simplify this comparison. For example, (x&4) < 8 is always true.
switch (Pred) {
@@ -5203,7 +5469,7 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
// Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
const APInt *CI;
- if (Op0KnownZeroInverted.isOneValue() &&
+ if (Op0KnownZeroInverted.isOne() &&
match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
// ((8 >>u X) & 1) == 0 -> X != 3
// ((8 >>u X) & 1) != 0 -> X == 3
@@ -5219,21 +5485,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- // A <u C -> A == C-1 if min(A)+1 == C
- if (*CmpC == Op0Min + 1)
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC - 1));
- // X <u C --> X == 0, if the number of zero bits in the bottom of X
- // exceeds the log2 of C.
- if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- Constant::getNullValue(Op1->getType()));
- }
break;
}
case ICmpInst::ICMP_UGT: {
@@ -5241,21 +5492,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- // A >u C -> A == C+1 if max(a)-1 == C
- if (*CmpC == Op0Max - 1)
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC + 1));
- // X >u C --> X != 0, if the number of zero bits in the bottom of X
- // exceeds the log2 of C.
- if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
- return new ICmpInst(ICmpInst::ICMP_NE, Op0,
- Constant::getNullValue(Op1->getType()));
- }
break;
}
case ICmpInst::ICMP_SLT: {
@@ -5263,14 +5499,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC - 1));
- }
break;
}
case ICmpInst::ICMP_SGT: {
@@ -5278,14 +5506,6 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
- if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
- return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
- const APInt *CmpC;
- if (match(Op1, m_APInt(CmpC))) {
- if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
- return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
- ConstantInt::get(Op1->getType(), *CmpC + 1));
- }
break;
}
case ICmpInst::ICMP_SGE:
@@ -5587,7 +5807,7 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
if (match(RHS, m_Shuffle(m_Value(V2), m_Undef(), m_SpecificMask(M))) &&
V1Ty == V2->getType() && (LHS->hasOneUse() || RHS->hasOneUse())) {
Value *NewCmp = Builder.CreateCmp(Pred, V1, V2);
- return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
+ return new ShuffleVectorInst(NewCmp, M);
}
// Try to canonicalize compare with splatted operand and splat constant.
@@ -5608,8 +5828,7 @@ static Instruction *foldVectorCmp(CmpInst &Cmp,
ScalarC);
SmallVector<int, 8> NewM(M.size(), MaskSplatIndex);
Value *NewCmp = Builder.CreateCmp(Pred, V1, C);
- return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()),
- NewM);
+ return new ShuffleVectorInst(NewCmp, NewM);
}
return nullptr;
@@ -5645,6 +5864,23 @@ static Instruction *foldICmpOfUAddOv(ICmpInst &I) {
return ExtractValueInst::Create(UAddOv, 1);
}
+static Instruction *foldICmpInvariantGroup(ICmpInst &I) {
+ if (!I.getOperand(0)->getType()->isPointerTy() ||
+ NullPointerIsDefined(
+ I.getParent()->getParent(),
+ I.getOperand(0)->getType()->getPointerAddressSpace())) {
+ return nullptr;
+ }
+ Instruction *Op;
+ if (match(I.getOperand(0), m_Instruction(Op)) &&
+ match(I.getOperand(1), m_Zero()) &&
+ Op->isLaunderOrStripInvariantGroup()) {
+ return ICmpInst::Create(Instruction::ICmp, I.getPredicate(),
+ Op->getOperand(0), I.getOperand(1));
+ }
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
const SimplifyQuery Q = SQ.getWithInstruction(&I);
@@ -5698,9 +5934,6 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpWithDominatingICmp(I))
return Res;
- if (Instruction *Res = foldICmpBinOp(I, Q))
- return Res;
-
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
@@ -5746,6 +5979,15 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
}
}
+ // The folds in here may rely on wrapping flags and special constants, so
+ // they can break up min/max idioms in some cases but not seemingly similar
+ // patterns.
+ // FIXME: It may be possible to enhance select folding to make this
+ // unnecessary. It may also be moot if we canonicalize to min/max
+ // intrinsics.
+ if (Instruction *Res = foldICmpBinOp(I, Q))
+ return Res;
+
if (Instruction *Res = foldICmpInstWithConstant(I))
return Res;
@@ -5757,13 +5999,12 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
return Res;
- // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+ // Try to optimize 'icmp GEP, P' or 'icmp P, GEP'.
+ if (auto *GEP = dyn_cast<GEPOperator>(Op0))
if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
return NI;
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
- if (Instruction *NI = foldGEPICmp(GEP, Op0,
- ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+ if (auto *GEP = dyn_cast<GEPOperator>(Op1))
+ if (Instruction *NI = foldGEPICmp(GEP, Op0, I.getSwappedPredicate(), I))
return NI;
// Try to optimize equality comparisons against alloca-based pointers.
@@ -5777,7 +6018,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
return New;
}
- if (Instruction *Res = foldICmpBitCast(I, Builder))
+ if (Instruction *Res = foldICmpBitCast(I))
return Res;
// TODO: Hoist this above the min/max bailout.
@@ -5879,6 +6120,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldVectorCmp(I, Builder))
return Res;
+ if (Instruction *Res = foldICmpInvariantGroup(I))
+ return Res;
+
return Changed ? &I : nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index eaa53348028d..72e1b21e8d49 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -22,14 +22,15 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
using namespace llvm::PatternMatch;
@@ -61,7 +62,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
: public InstCombiner,
public InstVisitor<InstCombinerImpl, Instruction *> {
public:
- InstCombinerImpl(InstCombineWorklist &Worklist, BuilderTy &Builder,
+ InstCombinerImpl(InstructionWorklist &Worklist, BuilderTy &Builder,
bool MinimizeSize, AAResults *AA, AssumptionCache &AC,
TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE,
@@ -190,6 +191,7 @@ public:
private:
void annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
+ bool isDesirableIntType(unsigned BitWidth) const;
bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
bool shouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
@@ -240,15 +242,11 @@ private:
///
/// \param ICI The icmp of the (zext icmp) pair we are interested in.
/// \parem CI The zext of the (zext icmp) pair we are interested in.
- /// \param DoTransform Pass false to just test whether the given (zext icmp)
- /// would be transformed. Pass true to actually perform the transformation.
///
/// \return null if the transformation cannot be performed. If the
/// transformation can be performed the new instruction that replaces the
- /// (zext icmp) pair will be returned (if \p DoTransform is false the
- /// unmodified \p ICI will be returned in this case).
- Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
- bool DoTransform = true);
+ /// (zext icmp) pair will be returned.
+ Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI);
Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
@@ -319,13 +317,15 @@ private:
Value *EmitGEPOffset(User *GEP);
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
+ Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
Instruction *narrowBinOp(TruncInst &Trunc);
Instruction *narrowMaskedBinOp(BinaryOperator &And);
Instruction *narrowMathIfNoOverflow(BinaryOperator &I);
Instruction *narrowFunnelShift(TruncInst &Trunc);
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
- Instruction *matchSAddSubSat(SelectInst &MinMax1);
+ Instruction *matchSAddSubSat(Instruction &MinMax1);
+ Instruction *foldNot(BinaryOperator &I);
void freelyInvertAllUsersOf(Value *V);
@@ -347,6 +347,8 @@ private:
Value *foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Or);
Value *foldXorOfICmps(ICmpInst *LHS, ICmpInst *RHS, BinaryOperator &Xor);
+ Value *foldEqOfParts(ICmpInst *Cmp0, ICmpInst *Cmp1, bool IsAnd);
+
/// Optimize (fcmp)&(fcmp) or (fcmp)|(fcmp).
/// NOTE: Unlike most of instcombine, this returns a Value which should
/// already be inserted into the function.
@@ -623,6 +625,7 @@ public:
Instruction *foldPHIArgGEPIntoPHI(PHINode &PN);
Instruction *foldPHIArgLoadIntoPHI(PHINode &PN);
Instruction *foldPHIArgZextsIntoPHI(PHINode &PN);
+ Instruction *foldPHIArgIntToPtrToPHI(PHINode &PN);
/// If an integer typed PHI has only one use which is an IntToPtr operation,
/// replace the PHI with an existing pointer typed PHI if it exists. Otherwise
@@ -657,7 +660,7 @@ public:
Instruction *foldSignBitTest(ICmpInst &I);
Instruction *foldICmpWithZero(ICmpInst &Cmp);
- Value *foldUnsignedMultiplicationOverflowCheck(ICmpInst &Cmp);
+ Value *foldMultiplicationOverflowCheck(ICmpInst &Cmp);
Instruction *foldICmpSelectConstant(ICmpInst &Cmp, SelectInst *Select,
ConstantInt *C);
@@ -701,6 +704,7 @@ public:
const APInt &C);
Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
const APInt &C);
+ Instruction *foldICmpBitCast(ICmpInst &Cmp);
// Helpers of visitSelectInst().
Instruction *foldSelectExtConst(SelectInst &Sel);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index a8474e27383d..79a8a065d02a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -261,8 +261,8 @@ private:
bool PointerReplacer::collectUsers(Instruction &I) {
for (auto U : I.users()) {
- Instruction *Inst = cast<Instruction>(&*U);
- if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+ auto *Inst = cast<Instruction>(&*U);
+ if (auto *Load = dyn_cast<LoadInst>(Inst)) {
if (Load->isVolatile())
return false;
Worklist.insert(Load);
@@ -270,7 +270,9 @@ bool PointerReplacer::collectUsers(Instruction &I) {
Worklist.insert(Inst);
if (!collectUsers(*Inst))
return false;
- } else if (isa<MemTransferInst>(Inst)) {
+ } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) {
+ if (MI->isVolatile())
+ return false;
Worklist.insert(Inst);
} else if (Inst->isLifetimeStartOrEnd()) {
continue;
@@ -335,8 +337,7 @@ void PointerReplacer::replace(Instruction *I) {
MemCpy->getIntrinsicID(), MemCpy->getRawDest(), MemCpy->getDestAlign(),
SrcV, MemCpy->getSourceAlign(), MemCpy->getLength(),
MemCpy->isVolatile());
- AAMDNodes AAMD;
- MemCpy->getAAMetadata(AAMD);
+ AAMDNodes AAMD = MemCpy->getAAMetadata();
if (AAMD)
NewI->setAAMetadata(AAMD);
@@ -647,9 +648,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
if (NumElements == 1) {
LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
".unpack");
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- NewLoad->setAAMetadata(AAMD);
+ NewLoad->setAAMetadata(LI.getAAMetadata());
return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -678,9 +677,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
ST->getElementType(i), Ptr,
commonAlignment(Align, SL->getElementOffset(i)), Name + ".unpack");
// Propagate AA metadata. It'll still be valid on the narrowed load.
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- L->setAAMetadata(AAMD);
+ L->setAAMetadata(LI.getAAMetadata());
V = IC.Builder.CreateInsertValue(V, L, i);
}
@@ -693,9 +690,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
auto NumElements = AT->getNumElements();
if (NumElements == 1) {
LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- NewLoad->setAAMetadata(AAMD);
+ NewLoad->setAAMetadata(LI.getAAMetadata());
return IC.replaceInstUsesWith(LI, IC.Builder.CreateInsertValue(
UndefValue::get(T), NewLoad, 0, Name));
}
@@ -727,9 +722,7 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
commonAlignment(Align, Offset),
Name + ".unpack");
- AAMDNodes AAMD;
- LI.getAAMetadata(AAMD);
- L->setAAMetadata(AAMD);
+ L->setAAMetadata(LI.getAAMetadata());
V = IC.Builder.CreateInsertValue(V, L, i);
Offset += EltSize;
}
@@ -1206,9 +1199,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
auto EltAlign = commonAlignment(Align, SL->getElementOffset(i));
llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
- AAMDNodes AAMD;
- SI.getAAMetadata(AAMD);
- NS->setAAMetadata(AAMD);
+ NS->setAAMetadata(SI.getAAMetadata());
}
return true;
@@ -1254,9 +1245,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
auto EltAlign = commonAlignment(Align, Offset);
Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
- AAMDNodes AAMD;
- SI.getAAMetadata(AAMD);
- NS->setAAMetadata(AAMD);
+ NS->setAAMetadata(SI.getAAMetadata());
Offset += EltSize;
}
@@ -1498,8 +1487,8 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
StoreInst *OtherStore = nullptr;
if (OtherBr->isUnconditional()) {
--BBI;
- // Skip over debugging info.
- while (isa<DbgInfoIntrinsic>(BBI) ||
+ // Skip over debugging info and pseudo probes.
+ while (BBI->isDebugOrPseudoInst() ||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
if (BBI==OtherBB->begin())
return false;
@@ -1567,12 +1556,9 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
NewSI->setDebugLoc(MergedLoc);
// If the two stores had AA tags, merge them.
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
- if (AATags) {
- OtherStore->getAAMetadata(AATags, /* Merge = */ true);
- NewSI->setAAMetadata(AATags);
- }
+ AAMDNodes AATags = SI.getAAMetadata();
+ if (AATags)
+ NewSI->setAAMetadata(AATags.merge(OtherStore->getAAMetadata()));
// Nuke the old stores.
eraseInstFromFunction(SI);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 6f2a8ebf839a..779d298da7a4 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -31,7 +31,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include <cassert>
@@ -39,11 +38,12 @@
#include <cstdint>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
/// The specific integer value is used in a context where it is known to be
/// non-zero. If this allows us to simplify the computation, do so and return
/// the new operand, otherwise return null.
@@ -107,14 +107,19 @@ static Value *foldMulSelectToNegate(BinaryOperator &I,
// mul (select Cond, 1, -1), OtherOp --> select Cond, OtherOp, -OtherOp
// mul OtherOp, (select Cond, 1, -1) --> select Cond, OtherOp, -OtherOp
if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_One(), m_AllOnes())),
- m_Value(OtherOp))))
- return Builder.CreateSelect(Cond, OtherOp, Builder.CreateNeg(OtherOp));
-
+ m_Value(OtherOp)))) {
+ bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
+ Value *Neg = Builder.CreateNeg(OtherOp, "", false, HasAnyNoWrap);
+ return Builder.CreateSelect(Cond, OtherOp, Neg);
+ }
// mul (select Cond, -1, 1), OtherOp --> select Cond, -OtherOp, OtherOp
// mul OtherOp, (select Cond, -1, 1) --> select Cond, -OtherOp, OtherOp
if (match(&I, m_c_Mul(m_OneUse(m_Select(m_Value(Cond), m_AllOnes(), m_One())),
- m_Value(OtherOp))))
- return Builder.CreateSelect(Cond, Builder.CreateNeg(OtherOp), OtherOp);
+ m_Value(OtherOp)))) {
+ bool HasAnyNoWrap = I.hasNoSignedWrap() || I.hasNoUnsignedWrap();
+ Value *Neg = Builder.CreateNeg(OtherOp, "", false, HasAnyNoWrap);
+ return Builder.CreateSelect(Cond, Neg, OtherOp);
+ }
// fmul (select Cond, 1.0, -1.0), OtherOp --> select Cond, OtherOp, -OtherOp
// fmul OtherOp, (select Cond, 1.0, -1.0) --> select Cond, OtherOp, -OtherOp
@@ -564,6 +569,16 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
return replaceInstUsesWith(I, NewPow);
}
+ // powi(x, y) * powi(x, z) -> powi(x, y + z)
+ if (match(Op0, m_Intrinsic<Intrinsic::powi>(m_Value(X), m_Value(Y))) &&
+ match(Op1, m_Intrinsic<Intrinsic::powi>(m_Specific(X), m_Value(Z))) &&
+ Y->getType() == Z->getType()) {
+ auto *YZ = Builder.CreateAdd(Y, Z);
+ auto *NewPow = Builder.CreateIntrinsic(
+ Intrinsic::powi, {X->getType(), YZ->getType()}, {X, YZ}, &I);
+ return replaceInstUsesWith(I, NewPow);
+ }
+
// exp(X) * exp(Y) -> exp(X + Y)
if (match(Op0, m_Intrinsic<Intrinsic::exp>(m_Value(X))) &&
match(Op1, m_Intrinsic<Intrinsic::exp>(m_Value(Y)))) {
@@ -706,11 +721,11 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
assert(C1.getBitWidth() == C2.getBitWidth() && "Constant widths not equal");
// Bail if we will divide by zero.
- if (C2.isNullValue())
+ if (C2.isZero())
return false;
// Bail if we would divide INT_MIN by -1.
- if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
+ if (IsSigned && C1.isMinSignedValue() && C2.isAllOnes())
return false;
APInt Remainder(C1.getBitWidth(), /*val=*/0ULL, IsSigned);
@@ -778,11 +793,12 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
}
if ((IsSigned && match(Op0, m_NSWShl(m_Value(X), m_APInt(C1))) &&
- *C1 != C1->getBitWidth() - 1) ||
- (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))))) {
+ C1->ult(C1->getBitWidth() - 1)) ||
+ (!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))) &&
+ C1->ult(C1->getBitWidth()))) {
APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
APInt C1Shifted = APInt::getOneBitSet(
- C1->getBitWidth(), static_cast<unsigned>(C1->getLimitedValue()));
+ C1->getBitWidth(), static_cast<unsigned>(C1->getZExtValue()));
// (X << C1) / C2 -> X / (C2 >> C1) if C2 is a multiple of 1 << C1.
if (isMultiple(*C2, C1Shifted, Quotient, IsSigned)) {
@@ -803,7 +819,7 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
}
}
- if (!C2->isNullValue()) // avoid X udiv 0
+ if (!C2->isZero()) // avoid X udiv 0
if (Instruction *FoldedDiv = foldBinOpIntoSelectOrPhi(I))
return FoldedDiv;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
index 37c7e6135501..7dc516c6fdc3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp
@@ -215,6 +215,20 @@ LLVM_NODISCARD Value *Negator::visitImpl(Value *V, unsigned Depth) {
: Builder.CreateSExt(I->getOperand(0), I->getType(),
I->getName() + ".neg");
break;
+ case Instruction::Select: {
+ // If both arms of the select are constants, we don't need to recurse.
+ // Therefore, this transform is not limited by uses.
+ auto *Sel = cast<SelectInst>(I);
+ Constant *TrueC, *FalseC;
+ if (match(Sel->getTrueValue(), m_ImmConstant(TrueC)) &&
+ match(Sel->getFalseValue(), m_ImmConstant(FalseC))) {
+ Constant *NegTrueC = ConstantExpr::getNeg(TrueC);
+ Constant *NegFalseC = ConstantExpr::getNeg(FalseC);
+ return Builder.CreateSelect(Sel->getCondition(), NegTrueC, NegFalseC,
+ I->getName() + ".neg", /*MDFrom=*/I);
+ }
+ break;
+ }
default:
break; // Other instructions require recursive reasoning.
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 6c6351c70e3a..35739c3b9a21 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -299,6 +299,29 @@ Instruction *InstCombinerImpl::foldIntegerTypedPHI(PHINode &PN) {
IntToPtr->getOperand(0)->getType());
}
+// Remove RoundTrip IntToPtr/PtrToInt Cast on PHI-Operand and
+// fold Phi-operand to bitcast.
+Instruction *InstCombinerImpl::foldPHIArgIntToPtrToPHI(PHINode &PN) {
+ // convert ptr2int ( phi[ int2ptr(ptr2int(x))] ) --> ptr2int ( phi [ x ] )
+ // Make sure all uses of phi are ptr2int.
+ if (!all_of(PN.users(), [](User *U) { return isa<PtrToIntInst>(U); }))
+ return nullptr;
+
+ // Iterating over all operands to check presence of target pointers for
+ // optimization.
+ bool OperandWithRoundTripCast = false;
+ for (unsigned OpNum = 0; OpNum != PN.getNumIncomingValues(); ++OpNum) {
+ if (auto *NewOp =
+ simplifyIntToPtrRoundTripCast(PN.getIncomingValue(OpNum))) {
+ PN.setIncomingValue(OpNum, NewOp);
+ OperandWithRoundTripCast = true;
+ }
+ }
+ if (!OperandWithRoundTripCast)
+ return nullptr;
+ return &PN;
+}
+
/// If we have something like phi [insertvalue(a,b,0), insertvalue(c,d,0)],
/// turn this into a phi[a,c] and phi[b,d] and a single insertvalue.
Instruction *
@@ -1306,6 +1329,9 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
if (Instruction *Result = foldPHIArgZextsIntoPHI(PN))
return Result;
+ if (Instruction *Result = foldPHIArgIntToPtrToPHI(PN))
+ return Result;
+
// If all PHI operands are the same operation, pull them through the PHI,
// reducing code size.
if (isa<Instruction>(PN.getIncomingValue(0)) &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index ce2b913dba61..4a1e82ae9c1d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -38,15 +38,16 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
static Value *createMinMax(InstCombiner::BuilderTy &Builder,
SelectPatternFlavor SPF, Value *A, Value *B) {
@@ -165,7 +166,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
// simplify/reduce the instructions.
APInt TC = *SelTC;
APInt FC = *SelFC;
- if (!TC.isNullValue() && !FC.isNullValue()) {
+ if (!TC.isZero() && !FC.isZero()) {
// If the select constants differ by exactly one bit and that's the same
// bit that is masked and checked by the select condition, the select can
// be replaced by bitwise logic to set/clear one bit of the constant result.
@@ -202,7 +203,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
// Determine which shift is needed to transform result of the 'and' into the
// desired result.
- const APInt &ValC = !TC.isNullValue() ? TC : FC;
+ const APInt &ValC = !TC.isZero() ? TC : FC;
unsigned ValZeros = ValC.logBase2();
unsigned AndZeros = AndMask.logBase2();
@@ -224,7 +225,7 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
// Okay, now we know that everything is set up, we just don't know whether we
// have a icmp_ne or icmp_eq and whether the true or false val is the zero.
- bool ShouldNotVal = !TC.isNullValue();
+ bool ShouldNotVal = !TC.isZero();
ShouldNotVal ^= Pred == ICmpInst::ICMP_NE;
if (ShouldNotVal)
V = Builder.CreateXor(V, ValC);
@@ -319,8 +320,16 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
Value *X, *Y;
if (match(TI, m_FNeg(m_Value(X))) && match(FI, m_FNeg(m_Value(Y))) &&
(TI->hasOneUse() || FI->hasOneUse())) {
+ // Intersect FMF from the fneg instructions and union those with the select.
+ FastMathFlags FMF = TI->getFastMathFlags();
+ FMF &= FI->getFastMathFlags();
+ FMF |= SI.getFastMathFlags();
Value *NewSel = Builder.CreateSelect(Cond, X, Y, SI.getName() + ".v", &SI);
- return UnaryOperator::CreateFNegFMF(NewSel, TI);
+ if (auto *NewSelI = dyn_cast<Instruction>(NewSel))
+ NewSelI->setFastMathFlags(FMF);
+ Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewSel);
+ NewFNeg->setFastMathFlags(FMF);
+ return NewFNeg;
}
// Min/max intrinsic with a common operand can have the common operand pulled
@@ -420,10 +429,9 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
}
static bool isSelect01(const APInt &C1I, const APInt &C2I) {
- if (!C1I.isNullValue() && !C2I.isNullValue()) // One side must be zero.
+ if (!C1I.isZero() && !C2I.isZero()) // One side must be zero.
return false;
- return C1I.isOneValue() || C1I.isAllOnesValue() ||
- C2I.isOneValue() || C2I.isAllOnesValue();
+ return C1I.isOne() || C1I.isAllOnes() || C2I.isOne() || C2I.isAllOnes();
}
/// Try to fold the select into one of the operands to allow further
@@ -715,6 +723,58 @@ static Instruction *foldSetClearBits(SelectInst &Sel,
return nullptr;
}
+// select (x == 0), 0, x * y --> freeze(y) * x
+// select (y == 0), 0, x * y --> freeze(x) * y
+// select (x == 0), undef, x * y --> freeze(y) * x
+// select (x == undef), 0, x * y --> freeze(y) * x
+// Usage of mul instead of 0 will make the result more poisonous,
+// so the operand that was not checked in the condition should be frozen.
+// The latter folding is applied only when a constant compared with x is
+// is a vector consisting of 0 and undefs. If a constant compared with x
+// is a scalar undefined value or undefined vector then an expression
+// should be already folded into a constant.
+static Instruction *foldSelectZeroOrMul(SelectInst &SI, InstCombinerImpl &IC) {
+ auto *CondVal = SI.getCondition();
+ auto *TrueVal = SI.getTrueValue();
+ auto *FalseVal = SI.getFalseValue();
+ Value *X, *Y;
+ ICmpInst::Predicate Predicate;
+
+ // Assuming that constant compared with zero is not undef (but it may be
+ // a vector with some undef elements). Otherwise (when a constant is undef)
+ // the select expression should be already simplified.
+ if (!match(CondVal, m_ICmp(Predicate, m_Value(X), m_Zero())) ||
+ !ICmpInst::isEquality(Predicate))
+ return nullptr;
+
+ if (Predicate == ICmpInst::ICMP_NE)
+ std::swap(TrueVal, FalseVal);
+
+ // Check that TrueVal is a constant instead of matching it with m_Zero()
+ // to handle the case when it is a scalar undef value or a vector containing
+ // non-zero elements that are masked by undef elements in the compare
+ // constant.
+ auto *TrueValC = dyn_cast<Constant>(TrueVal);
+ if (TrueValC == nullptr ||
+ !match(FalseVal, m_c_Mul(m_Specific(X), m_Value(Y))) ||
+ !isa<Instruction>(FalseVal))
+ return nullptr;
+
+ auto *ZeroC = cast<Constant>(cast<Instruction>(CondVal)->getOperand(1));
+ auto *MergedC = Constant::mergeUndefsWith(TrueValC, ZeroC);
+ // If X is compared with 0 then TrueVal could be either zero or undef.
+ // m_Zero match vectors containing some undef elements, but for scalars
+ // m_Undef should be used explicitly.
+ if (!match(MergedC, m_Zero()) && !match(MergedC, m_Undef()))
+ return nullptr;
+
+ auto *FalseValI = cast<Instruction>(FalseVal);
+ auto *FrY = IC.InsertNewInstBefore(new FreezeInst(Y, Y->getName() + ".fr"),
+ *FalseValI);
+ IC.replaceOperand(*FalseValI, FalseValI->getOperand(0) == Y ? 0 : 1, FrY);
+ return IC.replaceInstUsesWith(SI, FalseValI);
+}
+
/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
/// There are 8 commuted/swapped variants of this pattern.
/// TODO: Also support a - UMIN(a,b) patterns.
@@ -1229,8 +1289,8 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
// Iff -C1 s<= C2 s<= C0-C1
// Also ULT predicate can also be UGT iff C0 != -1 (+invert result)
// SLT predicate can also be SGT iff C2 != INT_MAX (+invert res.)
-static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
- InstCombiner::BuilderTy &Builder) {
+static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
+ InstCombiner::BuilderTy &Builder) {
Value *X = Sel0.getTrueValue();
Value *Sel1 = Sel0.getFalseValue();
@@ -1238,36 +1298,42 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// Said condition must be one-use.
if (!Cmp0.hasOneUse())
return nullptr;
+ ICmpInst::Predicate Pred0 = Cmp0.getPredicate();
Value *Cmp00 = Cmp0.getOperand(0);
Constant *C0;
if (!match(Cmp0.getOperand(1),
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
return nullptr;
- // Canonicalize Cmp0 into the form we expect.
+
+ if (!isa<SelectInst>(Sel1)) {
+ Pred0 = ICmpInst::getInversePredicate(Pred0);
+ std::swap(X, Sel1);
+ }
+
+ // Canonicalize Cmp0 into ult or uge.
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
- switch (Cmp0.getPredicate()) {
+ switch (Pred0) {
case ICmpInst::Predicate::ICMP_ULT:
+ case ICmpInst::Predicate::ICMP_UGE:
+ // Although icmp ult %x, 0 is an unusual thing to try and should generally
+ // have been simplified, it does not verify with undef inputs so ensure we
+ // are not in a strange state.
+ if (!match(C0, m_SpecificInt_ICMP(
+ ICmpInst::Predicate::ICMP_NE,
+ APInt::getZero(C0->getType()->getScalarSizeInBits()))))
+ return nullptr;
break; // Great!
case ICmpInst::Predicate::ICMP_ULE:
- // We'd have to increment C0 by one, and for that it must not have all-ones
- // element, but then it would have been canonicalized to 'ult' before
- // we get here. So we can't do anything useful with 'ule'.
- return nullptr;
case ICmpInst::Predicate::ICMP_UGT:
- // We want to canonicalize it to 'ult', so we'll need to increment C0,
- // which again means it must not have any all-ones elements.
+ // We want to canonicalize it to 'ult' or 'uge', so we'll need to increment
+ // C0, which again means it must not have any all-ones elements.
if (!match(C0,
- m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_NE,
- APInt::getAllOnesValue(
- C0->getType()->getScalarSizeInBits()))))
+ m_SpecificInt_ICMP(
+ ICmpInst::Predicate::ICMP_NE,
+ APInt::getAllOnes(C0->getType()->getScalarSizeInBits()))))
return nullptr; // Can't do, have all-ones element[s].
C0 = InstCombiner::AddOne(C0);
- std::swap(X, Sel1);
break;
- case ICmpInst::Predicate::ICMP_UGE:
- // The only way we'd get this predicate if this `icmp` has extra uses,
- // but then we won't be able to do this fold.
- return nullptr;
default:
return nullptr; // Unknown predicate.
}
@@ -1277,11 +1343,16 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!Sel1->hasOneUse())
return nullptr;
+ // If the types do not match, look through any truncs to the underlying
+ // instruction.
+ if (Cmp00->getType() != X->getType() && X->hasOneUse())
+ match(X, m_TruncOrSelf(m_Value(X)));
+
// We now can finish matching the condition of the outermost select:
// it should either be the X itself, or an addition of some constant to X.
Constant *C1;
if (Cmp00 == X)
- C1 = ConstantInt::getNullValue(Sel0.getType());
+ C1 = ConstantInt::getNullValue(X->getType());
else if (!match(Cmp00,
m_Add(m_Specific(X),
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
@@ -1335,6 +1406,8 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// The thresholds of this clamp-like pattern.
auto *ThresholdLowIncl = ConstantExpr::getNeg(C1);
auto *ThresholdHighExcl = ConstantExpr::getSub(C0, C1);
+ if (Pred0 == ICmpInst::Predicate::ICMP_UGE)
+ std::swap(ThresholdLowIncl, ThresholdHighExcl);
// The fold has a precondition 1: C2 s>= ThresholdLow
auto *Precond1 = ConstantExpr::getICmp(ICmpInst::Predicate::ICMP_SGE, C2,
@@ -1347,15 +1420,29 @@ static Instruction *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!match(Precond2, m_One()))
return nullptr;
+ // If we are matching from a truncated input, we need to sext the
+ // ReplacementLow and ReplacementHigh values. Only do the transform if they
+ // are free to extend due to being constants.
+ if (X->getType() != Sel0.getType()) {
+ Constant *LowC, *HighC;
+ if (!match(ReplacementLow, m_ImmConstant(LowC)) ||
+ !match(ReplacementHigh, m_ImmConstant(HighC)))
+ return nullptr;
+ ReplacementLow = ConstantExpr::getSExt(LowC, X->getType());
+ ReplacementHigh = ConstantExpr::getSExt(HighC, X->getType());
+ }
+
// All good, finally emit the new pattern.
Value *ShouldReplaceLow = Builder.CreateICmpSLT(X, ThresholdLowIncl);
Value *ShouldReplaceHigh = Builder.CreateICmpSGE(X, ThresholdHighExcl);
Value *MaybeReplacedLow =
Builder.CreateSelect(ShouldReplaceLow, ReplacementLow, X);
- Instruction *MaybeReplacedHigh =
- SelectInst::Create(ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
- return MaybeReplacedHigh;
+ // Create the final select. If we looked through a truncate above, we will
+ // need to retruncate the result.
+ Value *MaybeReplacedHigh = Builder.CreateSelect(
+ ShouldReplaceHigh, ReplacementHigh, MaybeReplacedLow);
+ return Builder.CreateTrunc(MaybeReplacedHigh, Sel0.getType());
}
// If we have
@@ -1446,8 +1533,8 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewAbs = canonicalizeAbsNabs(SI, *ICI, *this))
return NewAbs;
- if (Instruction *NewAbs = canonicalizeClampLike(SI, *ICI, Builder))
- return NewAbs;
+ if (Value *V = canonicalizeClampLike(SI, *ICI, Builder))
+ return replaceInstUsesWith(SI, V);
if (Instruction *NewSel =
tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
@@ -1816,9 +1903,7 @@ foldOverflowingAddSubSelect(SelectInst &SI, InstCombiner::BuilderTy &Builder) {
m_Value(TrueVal), m_Value(FalseVal))))
return false;
- auto IsZeroOrOne = [](const APInt &C) {
- return C.isNullValue() || C.isOneValue();
- };
+ auto IsZeroOrOne = [](const APInt &C) { return C.isZero() || C.isOne(); };
auto IsMinMax = [&](Value *Min, Value *Max) {
APInt MinVal = APInt::getSignedMinValue(Ty->getScalarSizeInBits());
APInt MaxVal = APInt::getSignedMaxValue(Ty->getScalarSizeInBits());
@@ -2182,7 +2267,7 @@ static Instruction *moveAddAfterMinMax(SelectPatternFlavor SPF, Value *X,
}
/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
-Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) {
+Instruction *InstCombinerImpl::matchSAddSubSat(Instruction &MinMax1) {
Type *Ty = MinMax1.getType();
// We are looking for a tree of:
@@ -2212,23 +2297,14 @@ Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) {
if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
return nullptr;
- // Also make sure that the number of uses is as expected. The "3"s are for the
- // the two items of min/max (the compare and the select).
- if (MinMax2->hasNUsesOrMore(3) || AddSub->hasNUsesOrMore(3))
+ // Also make sure that the number of uses is as expected. The 3 is for the
+ // the two items of the compare and the select, or 2 from a min/max.
+ unsigned ExpUses = isa<IntrinsicInst>(MinMax1) ? 2 : 3;
+ if (MinMax2->hasNUsesOrMore(ExpUses) || AddSub->hasNUsesOrMore(ExpUses))
return nullptr;
// Create the new type (which can be a vector type)
Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
- // Match the two extends from the add/sub
- Value *A, *B;
- if(!match(AddSub, m_BinOp(m_SExt(m_Value(A)), m_SExt(m_Value(B)))))
- return nullptr;
- // And check the incoming values are of a type smaller than or equal to the
- // size of the saturation. Otherwise the higher bits can cause different
- // results.
- if (A->getType()->getScalarSizeInBits() > NewBitWidth ||
- B->getType()->getScalarSizeInBits() > NewBitWidth)
- return nullptr;
Intrinsic::ID IntrinsicID;
if (AddSub->getOpcode() == Instruction::Add)
@@ -2238,10 +2314,16 @@ Instruction *InstCombinerImpl::matchSAddSubSat(SelectInst &MinMax1) {
else
return nullptr;
+ // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
+ // is usually achieved via a sext from a smaller type.
+ if (ComputeMinSignedBits(AddSub->getOperand(0), 0, AddSub) > NewBitWidth ||
+ ComputeMinSignedBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
+ return nullptr;
+
// Finally create and return the sat intrinsic, truncated to the new type
Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
- Value *AT = Builder.CreateSExt(A, NewTy);
- Value *BT = Builder.CreateSExt(B, NewTy);
+ Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
+ Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
Value *Sat = Builder.CreateCall(F, {AT, BT});
return CastInst::Create(Instruction::SExt, Sat, Ty);
}
@@ -2432,7 +2514,7 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
unsigned NumElts = VecTy->getNumElements();
APInt UndefElts(NumElts, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(NumElts));
+ APInt AllOnesEltMask(APInt::getAllOnes(NumElts));
if (Value *V = SimplifyDemandedVectorElts(&Sel, AllOnesEltMask, UndefElts)) {
if (V != &Sel)
return replaceInstUsesWith(Sel, V);
@@ -2754,11 +2836,16 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
/* IsAnd */ IsAnd))
return I;
- if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal))
- if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1))
+ if (auto *ICmp0 = dyn_cast<ICmpInst>(CondVal)) {
+ if (auto *ICmp1 = dyn_cast<ICmpInst>(Op1)) {
if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd,
/* IsLogical */ true))
return replaceInstUsesWith(SI, V);
+
+ if (auto *V = foldEqOfParts(ICmp0, ICmp1, IsAnd))
+ return replaceInstUsesWith(SI, V);
+ }
+ }
}
// select (select a, true, b), c, false -> select a, c, false
@@ -2863,14 +2950,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
}
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
- // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work. We
- // also require nnan because we do not want to unintentionally change the
- // sign of a NaN value.
+ // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
// (X <= +/-0.0) ? (0.0 - X) : X --> fabs(X)
- Instruction *FSub;
if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(FalseVal))) &&
- match(TrueVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
(Pred == FCmpInst::FCMP_OLE || Pred == FCmpInst::FCMP_ULE)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI);
return replaceInstUsesWith(SI, Fabs);
@@ -2878,7 +2961,6 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// (X > +/-0.0) ? X : (0.0 - X) --> fabs(X)
if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
match(FalseVal, m_FSub(m_PosZeroFP(), m_Specific(TrueVal))) &&
- match(FalseVal, m_Instruction(FSub)) && FSub->hasNoNaNs() &&
(Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_UGT)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI);
return replaceInstUsesWith(SI, Fabs);
@@ -2886,11 +2968,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// With nnan and nsz:
// (X < +/-0.0) ? -X : X --> fabs(X)
// (X <= +/-0.0) ? -X : X --> fabs(X)
- Instruction *FNeg;
if (match(CondVal, m_FCmp(Pred, m_Specific(FalseVal), m_AnyZeroFP())) &&
- match(TrueVal, m_FNeg(m_Specific(FalseVal))) &&
- match(TrueVal, m_Instruction(FNeg)) && FNeg->hasNoNaNs() &&
- FNeg->hasNoSignedZeros() && SI.hasNoSignedZeros() &&
+ match(TrueVal, m_FNeg(m_Specific(FalseVal))) && SI.hasNoSignedZeros() &&
(Pred == FCmpInst::FCMP_OLT || Pred == FCmpInst::FCMP_OLE ||
Pred == FCmpInst::FCMP_ULT || Pred == FCmpInst::FCMP_ULE)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, FalseVal, &SI);
@@ -2900,9 +2979,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// (X > +/-0.0) ? X : -X --> fabs(X)
// (X >= +/-0.0) ? X : -X --> fabs(X)
if (match(CondVal, m_FCmp(Pred, m_Specific(TrueVal), m_AnyZeroFP())) &&
- match(FalseVal, m_FNeg(m_Specific(TrueVal))) &&
- match(FalseVal, m_Instruction(FNeg)) && FNeg->hasNoNaNs() &&
- FNeg->hasNoSignedZeros() && SI.hasNoSignedZeros() &&
+ match(FalseVal, m_FNeg(m_Specific(TrueVal))) && SI.hasNoSignedZeros() &&
(Pred == FCmpInst::FCMP_OGT || Pred == FCmpInst::FCMP_OGE ||
Pred == FCmpInst::FCMP_UGT || Pred == FCmpInst::FCMP_UGE)) {
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, TrueVal, &SI);
@@ -2920,6 +2997,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
return Add;
if (Instruction *Or = foldSetClearBits(SI, Builder))
return Or;
+ if (Instruction *Mul = foldSelectZeroOrMul(SI, *this))
+ return Mul;
// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
auto *TI = dyn_cast<Instruction>(TrueVal);
@@ -2939,8 +3018,10 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (Gep->getNumOperands() != 2 || Gep->getPointerOperand() != Base ||
!Gep->hasOneUse())
return nullptr;
- Type *ElementType = Gep->getResultElementType();
Value *Idx = Gep->getOperand(1);
+ if (isa<VectorType>(CondVal->getType()) && !isa<VectorType>(Idx->getType()))
+ return nullptr;
+ Type *ElementType = Gep->getResultElementType();
Value *NewT = Idx;
Value *NewF = Constant::getNullValue(Idx->getType());
if (Swap)
@@ -3188,9 +3269,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (!CondVal->getType()->isVectorTy() && !AC.assumptions().empty()) {
KnownBits Known(1);
computeKnownBits(CondVal, Known, 0, &SI);
- if (Known.One.isOneValue())
+ if (Known.One.isOne())
return replaceInstUsesWith(SI, TrueVal);
- if (Known.Zero.isOneValue())
+ if (Known.Zero.isOne())
return replaceInstUsesWith(SI, FalseVal);
}
@@ -3230,7 +3311,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *Mask;
if (match(TrueVal, m_Zero()) &&
match(FalseVal, m_MaskedLoad(m_Value(), m_Value(), m_Value(Mask),
- m_CombineOr(m_Undef(), m_Zero())))) {
+ m_CombineOr(m_Undef(), m_Zero()))) &&
+ (CondVal->getType() == Mask->getType())) {
// We can remove the select by ensuring the load zeros all lanes the
// select would have. We determine this by proving there is no overlap
// between the load and select masks.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index ca5e473fdecb..06421d553915 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -41,7 +41,7 @@ bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1,
(Sh0->getType()->getScalarSizeInBits() - 1) +
(Sh1->getType()->getScalarSizeInBits() - 1);
APInt MaximalRepresentableShiftAmount =
- APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
+ APInt::getAllOnes(ShAmt0->getType()->getScalarSizeInBits());
return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount);
}
@@ -172,8 +172,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
// There are many variants to this pattern:
// a) (x & ((1 << MaskShAmt) - 1)) << ShiftShAmt
// b) (x & (~(-1 << MaskShAmt))) << ShiftShAmt
-// c) (x & (-1 >> MaskShAmt)) << ShiftShAmt
-// d) (x & ((-1 << MaskShAmt) >> MaskShAmt)) << ShiftShAmt
+// c) (x & (-1 l>> MaskShAmt)) << ShiftShAmt
+// d) (x & ((-1 << MaskShAmt) l>> MaskShAmt)) << ShiftShAmt
// e) ((x << MaskShAmt) l>> MaskShAmt) << ShiftShAmt
// f) ((x << MaskShAmt) a>> MaskShAmt) << ShiftShAmt
// All these patterns can be simplified to just:
@@ -213,11 +213,11 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
auto MaskA = m_Add(m_Shl(m_One(), m_Value(MaskShAmt)), m_AllOnes());
// (~(-1 << maskNbits))
auto MaskB = m_Xor(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_AllOnes());
- // (-1 >> MaskShAmt)
- auto MaskC = m_Shr(m_AllOnes(), m_Value(MaskShAmt));
- // ((-1 << MaskShAmt) >> MaskShAmt)
+ // (-1 l>> MaskShAmt)
+ auto MaskC = m_LShr(m_AllOnes(), m_Value(MaskShAmt));
+ // ((-1 << MaskShAmt) l>> MaskShAmt)
auto MaskD =
- m_Shr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
+ m_LShr(m_Shl(m_AllOnes(), m_Value(MaskShAmt)), m_Deferred(MaskShAmt));
Value *X;
Constant *NewMask;
@@ -240,7 +240,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// that shall remain in the root value (OuterShift).
// An extend of an undef value becomes zero because the high bits are never
- // completely unknown. Replace the the `undef` shift amounts with final
+ // completely unknown. Replace the `undef` shift amounts with final
// shift bitwidth to ensure that the value remains undef when creating the
// subsequent shift op.
SumOfShAmts = Constant::replaceUndefsWith(
@@ -272,7 +272,7 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// shall be unset in the root value (OuterShift).
// An extend of an undef value becomes zero because the high bits are never
- // completely unknown. Replace the the `undef` shift amounts with negated
+ // completely unknown. Replace the `undef` shift amounts with negated
// bitwidth of innermost shift to ensure that the value remains undef when
// creating the subsequent shift op.
unsigned WidestTyBitWidth = WidestTy->getScalarSizeInBits();
@@ -346,9 +346,8 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
// TODO: Remove the one-use check if the other logic operand (Y) is constant.
Value *X, *Y;
auto matchFirstShift = [&](Value *V) {
- BinaryOperator *BO;
APInt Threshold(Ty->getScalarSizeInBits(), Ty->getScalarSizeInBits());
- return match(V, m_BinOp(BO)) && BO->getOpcode() == ShiftOpcode &&
+ return match(V, m_BinOp(ShiftOpcode, m_Value(), m_Value())) &&
match(V, m_OneUse(m_Shift(m_Value(X), m_Constant(C0)))) &&
match(ConstantExpr::getAdd(C0, C1),
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
@@ -661,23 +660,22 @@ static bool canShiftBinOpWithConstantRHS(BinaryOperator &Shift,
Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I) {
- bool isLeftShift = I.getOpcode() == Instruction::Shl;
-
const APInt *Op1C;
if (!match(Op1, m_APInt(Op1C)))
return nullptr;
// See if we can propagate this shift into the input, this covers the trivial
// cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ bool IsLeftShift = I.getOpcode() == Instruction::Shl;
if (I.getOpcode() != Instruction::AShr &&
- canEvaluateShifted(Op0, Op1C->getZExtValue(), isLeftShift, *this, &I)) {
+ canEvaluateShifted(Op0, Op1C->getZExtValue(), IsLeftShift, *this, &I)) {
LLVM_DEBUG(
dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: "
<< *Op0 << "\n SH: " << I << "\n");
return replaceInstUsesWith(
- I, getShiftedValue(Op0, Op1C->getZExtValue(), isLeftShift, *this, DL));
+ I, getShiftedValue(Op0, Op1C->getZExtValue(), IsLeftShift, *this, DL));
}
// See if we can simplify any instructions used by the instruction whose sole
@@ -686,202 +684,72 @@ Instruction *InstCombinerImpl::FoldShiftByConstant(Value *Op0, Constant *Op1,
unsigned TypeBits = Ty->getScalarSizeInBits();
assert(!Op1C->uge(TypeBits) &&
"Shift over the type width should have been removed already");
+ (void)TypeBits;
if (Instruction *FoldedShift = foldBinOpIntoSelectOrPhi(I))
return FoldedShift;
- // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
- if (auto *TI = dyn_cast<TruncInst>(Op0)) {
- // If 'shift2' is an ashr, we would have to get the sign bit into a funny
- // place. Don't try to do this transformation in this case. Also, we
- // require that the input operand is a shift-by-constant so that we have
- // confidence that the shifts will get folded together. We could do this
- // xform in more cases, but it is unlikely to be profitable.
- const APInt *TrShiftAmt;
- if (I.isLogicalShift() &&
- match(TI->getOperand(0), m_Shift(m_Value(), m_APInt(TrShiftAmt)))) {
- auto *TrOp = cast<Instruction>(TI->getOperand(0));
- Type *SrcTy = TrOp->getType();
-
- // Okay, we'll do this xform. Make the shift of shift.
- Constant *ShAmt = ConstantExpr::getZExt(Op1, SrcTy);
- // (shift2 (shift1 & 0x00FF), c2)
- Value *NSh = Builder.CreateBinOp(I.getOpcode(), TrOp, ShAmt, I.getName());
-
- // For logical shifts, the truncation has the effect of making the high
- // part of the register be zeros. Emulate this by inserting an AND to
- // clear the top bits as needed. This 'and' will usually be zapped by
- // other xforms later if dead.
- unsigned SrcSize = SrcTy->getScalarSizeInBits();
- Constant *MaskV =
- ConstantInt::get(SrcTy, APInt::getLowBitsSet(SrcSize, TypeBits));
-
- // The mask we constructed says what the trunc would do if occurring
- // between the shifts. We want to know the effect *after* the second
- // shift. We know that it is a logical shift by a constant, so adjust the
- // mask as appropriate.
- MaskV = ConstantExpr::get(I.getOpcode(), MaskV, ShAmt);
- // shift1 & 0x00FF
- Value *And = Builder.CreateAnd(NSh, MaskV, TI->getName());
- // Return the value truncated to the interesting size.
- return new TruncInst(And, Ty);
- }
- }
-
- if (Op0->hasOneUse()) {
- if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
- // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
- Value *V1;
- const APInt *CC;
- switch (Op0BO->getOpcode()) {
- default: break;
- case Instruction::Add:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- // These operators commute.
- // Turn (Y + (X >> C)) << C -> (X + (Y << C)) & (~0 << C)
- if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
- match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
- m_Specific(Op1)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
- // (X + (Y << C))
- Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), YS, V1,
- Op0BO->getOperand(1)->getName());
- unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
- APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
- Constant *Mask = ConstantInt::get(Ty, Bits);
- return BinaryOperator::CreateAnd(X, Mask);
- }
-
- // Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
- Value *Op0BOOp1 = Op0BO->getOperand(1);
- if (isLeftShift && Op0BOOp1->hasOneUse() &&
- match(Op0BOOp1, m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
- m_APInt(CC)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
- // X & (CC << C)
- Value *XM = Builder.CreateAnd(
- V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
- V1->getName() + ".mask");
- return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
- }
- LLVM_FALLTHROUGH;
- }
-
- case Instruction::Sub: {
- // Turn ((X >> C) + Y) << C -> (X + (Y << C)) & (~0 << C)
- if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
- match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
- m_Specific(Op1)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
- // (X + (Y << C))
- Value *X = Builder.CreateBinOp(Op0BO->getOpcode(), V1, YS,
- Op0BO->getOperand(0)->getName());
- unsigned Op1Val = Op1C->getLimitedValue(TypeBits);
- APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
- Constant *Mask = ConstantInt::get(Ty, Bits);
- return BinaryOperator::CreateAnd(X, Mask);
- }
-
- // Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
- if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
- match(Op0BO->getOperand(0),
- m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
- m_APInt(CC)))) {
- Value *YS = // (Y << C)
- Builder.CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
- // X & (CC << C)
- Value *XM = Builder.CreateAnd(
- V1, ConstantExpr::getShl(ConstantInt::get(Ty, *CC), Op1),
- V1->getName() + ".mask");
- return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
- }
-
- break;
- }
- }
+ if (!Op0->hasOneUse())
+ return nullptr;
- // If the operand is a bitwise operator with a constant RHS, and the
- // shift is the only use, we can pull it out of the shift.
- const APInt *Op0C;
- if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
- if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(Op0BO->getOperand(1)), Op1);
+ if (auto *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+ // If the operand is a bitwise operator with a constant RHS, and the
+ // shift is the only use, we can pull it out of the shift.
+ const APInt *Op0C;
+ if (match(Op0BO->getOperand(1), m_APInt(Op0C))) {
+ if (canShiftBinOpWithConstantRHS(I, Op0BO)) {
+ Constant *NewRHS = ConstantExpr::get(
+ I.getOpcode(), cast<Constant>(Op0BO->getOperand(1)), Op1);
- Value *NewShift =
+ Value *NewShift =
Builder.CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
- NewShift->takeName(Op0BO);
-
- return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
- NewRHS);
- }
- }
-
- // If the operand is a subtract with a constant LHS, and the shift
- // is the only use, we can pull it out of the shift.
- // This folds (shl (sub C1, X), C2) -> (sub (C1 << C2), (shl X, C2))
- if (isLeftShift && Op0BO->getOpcode() == Instruction::Sub &&
- match(Op0BO->getOperand(0), m_APInt(Op0C))) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(Op0BO->getOperand(0)), Op1);
-
- Value *NewShift = Builder.CreateShl(Op0BO->getOperand(1), Op1);
NewShift->takeName(Op0BO);
- return BinaryOperator::CreateSub(NewRHS, NewShift);
+ return BinaryOperator::Create(Op0BO->getOpcode(), NewShift, NewRHS);
}
}
+ }
- // If we have a select that conditionally executes some binary operator,
- // see if we can pull it the select and operator through the shift.
- //
- // For example, turning:
- // shl (select C, (add X, C1), X), C2
- // Into:
- // Y = shl X, C2
- // select C, (add Y, C1 << C2), Y
- Value *Cond;
- BinaryOperator *TBO;
- Value *FalseVal;
- if (match(Op0, m_Select(m_Value(Cond), m_OneUse(m_BinOp(TBO)),
- m_Value(FalseVal)))) {
- const APInt *C;
- if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
- match(TBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, TBO)) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(TBO->getOperand(1)), Op1);
-
- Value *NewShift =
- Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1);
- Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift,
- NewRHS);
- return SelectInst::Create(Cond, NewOp, NewShift);
- }
+ // If we have a select that conditionally executes some binary operator,
+ // see if we can pull it the select and operator through the shift.
+ //
+ // For example, turning:
+ // shl (select C, (add X, C1), X), C2
+ // Into:
+ // Y = shl X, C2
+ // select C, (add Y, C1 << C2), Y
+ Value *Cond;
+ BinaryOperator *TBO;
+ Value *FalseVal;
+ if (match(Op0, m_Select(m_Value(Cond), m_OneUse(m_BinOp(TBO)),
+ m_Value(FalseVal)))) {
+ const APInt *C;
+ if (!isa<Constant>(FalseVal) && TBO->getOperand(0) == FalseVal &&
+ match(TBO->getOperand(1), m_APInt(C)) &&
+ canShiftBinOpWithConstantRHS(I, TBO)) {
+ Constant *NewRHS = ConstantExpr::get(
+ I.getOpcode(), cast<Constant>(TBO->getOperand(1)), Op1);
+
+ Value *NewShift = Builder.CreateBinOp(I.getOpcode(), FalseVal, Op1);
+ Value *NewOp = Builder.CreateBinOp(TBO->getOpcode(), NewShift, NewRHS);
+ return SelectInst::Create(Cond, NewOp, NewShift);
}
+ }
- BinaryOperator *FBO;
- Value *TrueVal;
- if (match(Op0, m_Select(m_Value(Cond), m_Value(TrueVal),
- m_OneUse(m_BinOp(FBO))))) {
- const APInt *C;
- if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
- match(FBO->getOperand(1), m_APInt(C)) &&
- canShiftBinOpWithConstantRHS(I, FBO)) {
- Constant *NewRHS = ConstantExpr::get(I.getOpcode(),
- cast<Constant>(FBO->getOperand(1)), Op1);
-
- Value *NewShift =
- Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1);
- Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift,
- NewRHS);
- return SelectInst::Create(Cond, NewShift, NewOp);
- }
+ BinaryOperator *FBO;
+ Value *TrueVal;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(TrueVal),
+ m_OneUse(m_BinOp(FBO))))) {
+ const APInt *C;
+ if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal &&
+ match(FBO->getOperand(1), m_APInt(C)) &&
+ canShiftBinOpWithConstantRHS(I, FBO)) {
+ Constant *NewRHS = ConstantExpr::get(
+ I.getOpcode(), cast<Constant>(FBO->getOperand(1)), Op1);
+
+ Value *NewShift = Builder.CreateBinOp(I.getOpcode(), TrueVal, Op1);
+ Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, NewRHS);
+ return SelectInst::Create(Cond, NewShift, NewOp);
}
}
@@ -908,41 +776,41 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
Type *Ty = I.getType();
unsigned BitWidth = Ty->getScalarSizeInBits();
- const APInt *ShAmtAPInt;
- if (match(Op1, m_APInt(ShAmtAPInt))) {
- unsigned ShAmt = ShAmtAPInt->getZExtValue();
+ const APInt *C;
+ if (match(Op1, m_APInt(C))) {
+ unsigned ShAmtC = C->getZExtValue();
- // shl (zext X), ShAmt --> zext (shl X, ShAmt)
+ // shl (zext X), C --> zext (shl X, C)
// This is only valid if X would have zeros shifted out.
Value *X;
if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
- if (ShAmt < SrcWidth &&
- MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmt), 0, &I))
- return new ZExtInst(Builder.CreateShl(X, ShAmt), Ty);
+ if (ShAmtC < SrcWidth &&
+ MaskedValueIsZero(X, APInt::getHighBitsSet(SrcWidth, ShAmtC), 0, &I))
+ return new ZExtInst(Builder.CreateShl(X, ShAmtC), Ty);
}
// (X >> C) << C --> X & (-1 << C)
if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1)))) {
- APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
}
- const APInt *ShOp1;
- if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(ShOp1)))) &&
- ShOp1->ult(BitWidth)) {
- unsigned ShrAmt = ShOp1->getZExtValue();
- if (ShrAmt < ShAmt) {
- // If C1 < C2: (X >>?,exact C1) << C2 --> X << (C2 - C1)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt);
+ const APInt *C1;
+ if (match(Op0, m_Exact(m_Shr(m_Value(X), m_APInt(C1)))) &&
+ C1->ult(BitWidth)) {
+ unsigned ShrAmt = C1->getZExtValue();
+ if (ShrAmt < ShAmtC) {
+ // If C1 < C: (X >>?,exact C1) << C --> X << (C - C1)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
return NewShl;
}
- if (ShrAmt > ShAmt) {
- // If C1 > C2: (X >>?exact C1) << C2 --> X >>?exact (C1 - C2)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt);
+ if (ShrAmt > ShAmtC) {
+ // If C1 > C: (X >>?exact C1) << C --> X >>?exact (C1 - C)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmtC);
auto *NewShr = BinaryOperator::Create(
cast<BinaryOperator>(Op0)->getOpcode(), X, ShiftDiff);
NewShr->setIsExact(true);
@@ -950,49 +818,135 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
}
}
- if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_APInt(ShOp1)))) &&
- ShOp1->ult(BitWidth)) {
- unsigned ShrAmt = ShOp1->getZExtValue();
- if (ShrAmt < ShAmt) {
- // If C1 < C2: (X >>? C1) << C2 --> X << (C2 - C1) & (-1 << C2)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShrAmt);
+ if (match(Op0, m_OneUse(m_Shr(m_Value(X), m_APInt(C1)))) &&
+ C1->ult(BitWidth)) {
+ unsigned ShrAmt = C1->getZExtValue();
+ if (ShrAmt < ShAmtC) {
+ // If C1 < C: (X >>? C1) << C --> (X << (C - C1)) & (-1 << C)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShrAmt);
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
Builder.Insert(NewShl);
- APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
}
- if (ShrAmt > ShAmt) {
- // If C1 > C2: (X >>? C1) << C2 --> X >>? (C1 - C2) & (-1 << C2)
- Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmt);
+ if (ShrAmt > ShAmtC) {
+ // If C1 > C: (X >>? C1) << C --> (X >>? (C1 - C)) & (-1 << C)
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShrAmt - ShAmtC);
auto *OldShr = cast<BinaryOperator>(Op0);
auto *NewShr =
BinaryOperator::Create(OldShr->getOpcode(), X, ShiftDiff);
NewShr->setIsExact(OldShr->isExact());
Builder.Insert(NewShr);
- APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewShr, ConstantInt::get(Ty, Mask));
}
}
- if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) {
- unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
+ // Similar to above, but look through an intermediate trunc instruction.
+ BinaryOperator *Shr;
+ if (match(Op0, m_OneUse(m_Trunc(m_OneUse(m_BinOp(Shr))))) &&
+ match(Shr, m_Shr(m_Value(X), m_APInt(C1)))) {
+ // The larger shift direction survives through the transform.
+ unsigned ShrAmtC = C1->getZExtValue();
+ unsigned ShDiff = ShrAmtC > ShAmtC ? ShrAmtC - ShAmtC : ShAmtC - ShrAmtC;
+ Constant *ShiftDiffC = ConstantInt::get(X->getType(), ShDiff);
+ auto ShiftOpc = ShrAmtC > ShAmtC ? Shr->getOpcode() : Instruction::Shl;
+
+ // If C1 > C:
+ // (trunc (X >> C1)) << C --> (trunc (X >> (C1 - C))) && (-1 << C)
+ // If C > C1:
+ // (trunc (X >> C1)) << C --> (trunc (X << (C - C1))) && (-1 << C)
+ Value *NewShift = Builder.CreateBinOp(ShiftOpc, X, ShiftDiffC, "sh.diff");
+ Value *Trunc = Builder.CreateTrunc(NewShift, Ty, "tr.sh.diff");
+ APInt Mask(APInt::getHighBitsSet(BitWidth, BitWidth - ShAmtC));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Ty, Mask));
+ }
+
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
// Oversized shifts are simplified to zero in InstSimplify.
if (AmtSum < BitWidth)
// (X << C1) << C2 --> X << (C1 + C2)
return BinaryOperator::CreateShl(X, ConstantInt::get(Ty, AmtSum));
}
+ // If we have an opposite shift by the same amount, we may be able to
+ // reorder binops and shifts to eliminate math/logic.
+ auto isSuitableBinOpcode = [](Instruction::BinaryOps BinOpcode) {
+ switch (BinOpcode) {
+ default:
+ return false;
+ case Instruction::Add:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Sub:
+ // NOTE: Sub is not commutable and the tranforms below may not be valid
+ // when the shift-right is operand 1 (RHS) of the sub.
+ return true;
+ }
+ };
+ BinaryOperator *Op0BO;
+ if (match(Op0, m_OneUse(m_BinOp(Op0BO))) &&
+ isSuitableBinOpcode(Op0BO->getOpcode())) {
+ // Commute so shift-right is on LHS of the binop.
+ // (Y bop (X >> C)) << C -> ((X >> C) bop Y) << C
+ // (Y bop ((X >> C) & CC)) << C -> (((X >> C) & CC) bop Y) << C
+ Value *Shr = Op0BO->getOperand(0);
+ Value *Y = Op0BO->getOperand(1);
+ Value *X;
+ const APInt *CC;
+ if (Op0BO->isCommutative() && Y->hasOneUse() &&
+ (match(Y, m_Shr(m_Value(), m_Specific(Op1))) ||
+ match(Y, m_And(m_OneUse(m_Shr(m_Value(), m_Specific(Op1))),
+ m_APInt(CC)))))
+ std::swap(Shr, Y);
+
+ // ((X >> C) bop Y) << C -> (X bop (Y << C)) & (~0 << C)
+ if (match(Shr, m_OneUse(m_Shr(m_Value(X), m_Specific(Op1))))) {
+ // Y << C
+ Value *YS = Builder.CreateShl(Y, Op1, Op0BO->getName());
+ // (X bop (Y << C))
+ Value *B =
+ Builder.CreateBinOp(Op0BO->getOpcode(), X, YS, Shr->getName());
+ unsigned Op1Val = C->getLimitedValue(BitWidth);
+ APInt Bits = APInt::getHighBitsSet(BitWidth, BitWidth - Op1Val);
+ Constant *Mask = ConstantInt::get(Ty, Bits);
+ return BinaryOperator::CreateAnd(B, Mask);
+ }
+
+ // (((X >> C) & CC) bop Y) << C -> (X & (CC << C)) bop (Y << C)
+ if (match(Shr,
+ m_OneUse(m_And(m_OneUse(m_Shr(m_Value(X), m_Specific(Op1))),
+ m_APInt(CC))))) {
+ // Y << C
+ Value *YS = Builder.CreateShl(Y, Op1, Op0BO->getName());
+ // X & (CC << C)
+ Value *M = Builder.CreateAnd(X, ConstantInt::get(Ty, CC->shl(*C)),
+ X->getName() + ".mask");
+ return BinaryOperator::Create(Op0BO->getOpcode(), M, YS);
+ }
+ }
+
+ // (C1 - X) << C --> (C1 << C) - (X << C)
+ if (match(Op0, m_OneUse(m_Sub(m_APInt(C1), m_Value(X))))) {
+ Constant *NewLHS = ConstantInt::get(Ty, C1->shl(*C));
+ Value *NewShift = Builder.CreateShl(X, Op1);
+ return BinaryOperator::CreateSub(NewLHS, NewShift);
+ }
+
// If the shifted-out value is known-zero, then this is a NUW shift.
if (!I.hasNoUnsignedWrap() &&
- MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmt), 0, &I)) {
+ MaskedValueIsZero(Op0, APInt::getHighBitsSet(BitWidth, ShAmtC), 0,
+ &I)) {
I.setHasNoUnsignedWrap();
return &I;
}
// If the shifted-out value is all signbits, then this is a NSW shift.
- if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmt) {
+ if (!I.hasNoSignedWrap() && ComputeNumSignBits(Op0, 0, &I) > ShAmtC) {
I.setHasNoSignedWrap();
return &I;
}
@@ -1048,12 +1002,12 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
- const APInt *ShAmtAPInt;
- if (match(Op1, m_APInt(ShAmtAPInt))) {
- unsigned ShAmt = ShAmtAPInt->getZExtValue();
+ const APInt *C;
+ if (match(Op1, m_APInt(C))) {
+ unsigned ShAmtC = C->getZExtValue();
unsigned BitWidth = Ty->getScalarSizeInBits();
auto *II = dyn_cast<IntrinsicInst>(Op0);
- if (II && isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt &&
+ if (II && isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmtC &&
(II->getIntrinsicID() == Intrinsic::ctlz ||
II->getIntrinsicID() == Intrinsic::cttz ||
II->getIntrinsicID() == Intrinsic::ctpop)) {
@@ -1067,78 +1021,81 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
}
Value *X;
- const APInt *ShOp1;
- if (match(Op0, m_Shl(m_Value(X), m_APInt(ShOp1))) && ShOp1->ult(BitWidth)) {
- if (ShOp1->ult(ShAmt)) {
- unsigned ShlAmt = ShOp1->getZExtValue();
- Constant *ShiftDiff = ConstantInt::get(Ty, ShAmt - ShlAmt);
+ const APInt *C1;
+ if (match(Op0, m_Shl(m_Value(X), m_APInt(C1))) && C1->ult(BitWidth)) {
+ if (C1->ult(ShAmtC)) {
+ unsigned ShlAmtC = C1->getZExtValue();
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShAmtC - ShlAmtC);
if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
- // (X <<nuw C1) >>u C2 --> X >>u (C2 - C1)
+ // (X <<nuw C1) >>u C --> X >>u (C - C1)
auto *NewLShr = BinaryOperator::CreateLShr(X, ShiftDiff);
NewLShr->setIsExact(I.isExact());
return NewLShr;
}
- // (X << C1) >>u C2 --> (X >>u (C2 - C1)) & (-1 >> C2)
+ // (X << C1) >>u C --> (X >>u (C - C1)) & (-1 >> C)
Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
- APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
}
- if (ShOp1->ugt(ShAmt)) {
- unsigned ShlAmt = ShOp1->getZExtValue();
- Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmt - ShAmt);
+ if (C1->ugt(ShAmtC)) {
+ unsigned ShlAmtC = C1->getZExtValue();
+ Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmtC - ShAmtC);
if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
- // (X <<nuw C1) >>u C2 --> X <<nuw (C1 - C2)
+ // (X <<nuw C1) >>u C --> X <<nuw (C1 - C)
auto *NewShl = BinaryOperator::CreateShl(X, ShiftDiff);
NewShl->setHasNoUnsignedWrap(true);
return NewShl;
}
- // (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2)
+ // (X << C1) >>u C --> X << (C1 - C) & (-1 >> C)
Value *NewShl = Builder.CreateShl(X, ShiftDiff);
- APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
}
- assert(*ShOp1 == ShAmt);
+ assert(*C1 == ShAmtC);
// (X << C) >>u C --> X & (-1 >>u C)
- APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt));
+ APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
}
if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) &&
(!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) {
- assert(ShAmt < X->getType()->getScalarSizeInBits() &&
+ assert(ShAmtC < X->getType()->getScalarSizeInBits() &&
"Big shift not simplified to zero?");
// lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
- Value *NewLShr = Builder.CreateLShr(X, ShAmt);
+ Value *NewLShr = Builder.CreateLShr(X, ShAmtC);
return new ZExtInst(NewLShr, Ty);
}
- if (match(Op0, m_SExt(m_Value(X))) &&
- (!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType()))) {
- // Are we moving the sign bit to the low bit and widening with high zeros?
+ if (match(Op0, m_SExt(m_Value(X)))) {
unsigned SrcTyBitWidth = X->getType()->getScalarSizeInBits();
- if (ShAmt == BitWidth - 1) {
- // lshr (sext i1 X to iN), N-1 --> zext X to iN
- if (SrcTyBitWidth == 1)
- return new ZExtInst(X, Ty);
+ // lshr (sext i1 X to iN), C --> select (X, -1 >> C, 0)
+ if (SrcTyBitWidth == 1) {
+ auto *NewC = ConstantInt::get(
+ Ty, APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
+ return SelectInst::Create(X, NewC, ConstantInt::getNullValue(Ty));
+ }
- // lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
- if (Op0->hasOneUse()) {
+ if ((!Ty->isIntegerTy() || shouldChangeType(Ty, X->getType())) &&
+ Op0->hasOneUse()) {
+ // Are we moving the sign bit to the low bit and widening with high
+ // zeros? lshr (sext iM X to iN), N-1 --> zext (lshr X, M-1) to iN
+ if (ShAmtC == BitWidth - 1) {
Value *NewLShr = Builder.CreateLShr(X, SrcTyBitWidth - 1);
return new ZExtInst(NewLShr, Ty);
}
- }
- // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
- if (ShAmt == BitWidth - SrcTyBitWidth && Op0->hasOneUse()) {
- // The new shift amount can't be more than the narrow source type.
- unsigned NewShAmt = std::min(ShAmt, SrcTyBitWidth - 1);
- Value *AShr = Builder.CreateAShr(X, NewShAmt);
- return new ZExtInst(AShr, Ty);
+ // lshr (sext iM X to iN), N-M --> zext (ashr X, min(N-M, M-1)) to iN
+ if (ShAmtC == BitWidth - SrcTyBitWidth) {
+ // The new shift amount can't be more than the narrow source type.
+ unsigned NewShAmt = std::min(ShAmtC, SrcTyBitWidth - 1);
+ Value *AShr = Builder.CreateAShr(X, NewShAmt);
+ return new ZExtInst(AShr, Ty);
+ }
}
}
Value *Y;
- if (ShAmt == BitWidth - 1) {
+ if (ShAmtC == BitWidth - 1) {
// lshr i32 or(X,-X), 31 --> zext (X != 0)
if (match(Op0, m_OneUse(m_c_Or(m_Neg(m_Value(X)), m_Deferred(X)))))
return new ZExtInst(Builder.CreateIsNotNull(X), Ty);
@@ -1150,32 +1107,55 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
// Check if a number is negative and odd:
// lshr i32 (srem X, 2), 31 --> and (X >> 31), X
if (match(Op0, m_OneUse(m_SRem(m_Value(X), m_SpecificInt(2))))) {
- Value *Signbit = Builder.CreateLShr(X, ShAmt);
+ Value *Signbit = Builder.CreateLShr(X, ShAmtC);
return BinaryOperator::CreateAnd(Signbit, X);
}
}
- if (match(Op0, m_LShr(m_Value(X), m_APInt(ShOp1)))) {
- unsigned AmtSum = ShAmt + ShOp1->getZExtValue();
+ // (X >>u C1) >>u C --> X >>u (C1 + C)
+ if (match(Op0, m_LShr(m_Value(X), m_APInt(C1)))) {
// Oversized shifts are simplified to zero in InstSimplify.
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
if (AmtSum < BitWidth)
- // (X >>u C1) >>u C2 --> X >>u (C1 + C2)
return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
}
+ Instruction *TruncSrc;
+ if (match(Op0, m_OneUse(m_Trunc(m_Instruction(TruncSrc)))) &&
+ match(TruncSrc, m_LShr(m_Value(X), m_APInt(C1)))) {
+ unsigned SrcWidth = X->getType()->getScalarSizeInBits();
+ unsigned AmtSum = ShAmtC + C1->getZExtValue();
+
+ // If the combined shift fits in the source width:
+ // (trunc (X >>u C1)) >>u C --> and (trunc (X >>u (C1 + C)), MaskC
+ //
+ // If the first shift covers the number of bits truncated, then the
+ // mask instruction is eliminated (and so the use check is relaxed).
+ if (AmtSum < SrcWidth &&
+ (TruncSrc->hasOneUse() || C1->uge(SrcWidth - BitWidth))) {
+ Value *SumShift = Builder.CreateLShr(X, AmtSum, "sum.shift");
+ Value *Trunc = Builder.CreateTrunc(SumShift, Ty, I.getName());
+
+ // If the first shift does not cover the number of bits truncated, then
+ // we require a mask to get rid of high bits in the result.
+ APInt MaskC = APInt::getAllOnes(BitWidth).lshr(ShAmtC);
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Ty, MaskC));
+ }
+ }
+
// Look for a "splat" mul pattern - it replicates bits across each half of
// a value, so a right shift is just a mask of the low bits:
// lshr i32 (mul nuw X, Pow2+1), 16 --> and X, Pow2-1
// TODO: Generalize to allow more than just half-width shifts?
const APInt *MulC;
if (match(Op0, m_NUWMul(m_Value(X), m_APInt(MulC))) &&
- ShAmt * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
- MulC->logBase2() == ShAmt)
+ ShAmtC * 2 == BitWidth && (*MulC - 1).isPowerOf2() &&
+ MulC->logBase2() == ShAmtC)
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *MulC - 2));
// If the shifted-out value is known-zero, then this is an exact shift.
if (!I.isExact() &&
- MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmt), 0, &I)) {
+ MaskedValueIsZero(Op0, APInt::getLowBitsSet(BitWidth, ShAmtC), 0, &I)) {
I.setIsExact();
return &I;
}
@@ -1346,6 +1326,22 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
}
}
+ // Prefer `-(x & 1)` over `(x << (bitwidth(x)-1)) a>> (bitwidth(x)-1)`
+ // as the pattern to splat the lowest bit.
+ // FIXME: iff X is already masked, we don't need the one-use check.
+ Value *X;
+ if (match(Op1, m_SpecificIntAllowUndef(BitWidth - 1)) &&
+ match(Op0, m_OneUse(m_Shl(m_Value(X),
+ m_SpecificIntAllowUndef(BitWidth - 1))))) {
+ Constant *Mask = ConstantInt::get(Ty, 1);
+ // Retain the knowledge about the ignored lanes.
+ Mask = Constant::mergeUndefsWith(
+ Constant::mergeUndefsWith(Mask, cast<Constant>(Op1)),
+ cast<Constant>(cast<Instruction>(Op0)->getOperand(1)));
+ X = Builder.CreateAnd(X, Mask);
+ return BinaryOperator::CreateNeg(X);
+ }
+
if (Instruction *R = foldVariableSignZeroExtensionOfVariableHighBitExtract(I))
return R;
@@ -1354,7 +1350,6 @@ Instruction *InstCombinerImpl::visitAShr(BinaryOperator &I) {
return BinaryOperator::CreateLShr(Op0, Op1);
// ashr (xor %x, -1), %y --> xor (ashr %x, %y), -1
- Value *X;
if (match(Op0, m_OneUse(m_Not(m_Value(X))))) {
// Note that we must drop 'exact'-ness of the shift!
// Note that we can't keep undef's in -1 vector constant!
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 15b51ae8a5ee..e357a9da8b12 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -55,7 +55,7 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
KnownBits Known(BitWidth);
- APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+ APInt DemandedMask(APInt::getAllOnes(BitWidth));
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
0, &Inst);
@@ -124,7 +124,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
Known.resetAll();
- if (DemandedMask.isNullValue()) // Not demanding any bits from V.
+ if (DemandedMask.isZero()) // Not demanding any bits from V.
return UndefValue::get(VTy);
if (Depth == MaxAnalysisRecursionDepth)
@@ -274,8 +274,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// constant because that's a canonical 'not' op, and that is better for
// combining, SCEV, and codegen.
const APInt *C;
- if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) {
- if ((*C | ~DemandedMask).isAllOnesValue()) {
+ if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnes()) {
+ if ((*C | ~DemandedMask).isAllOnes()) {
// Force bits to 1 to create a 'not' op.
I->setOperand(1, ConstantInt::getAllOnesValue(VTy));
return I;
@@ -385,8 +385,26 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
Known = KnownBits::commonBits(LHSKnown, RHSKnown);
break;
}
- case Instruction::ZExt:
case Instruction::Trunc: {
+ // If we do not demand the high bits of a right-shifted and truncated value,
+ // then we may be able to truncate it before the shift.
+ Value *X;
+ const APInt *C;
+ if (match(I->getOperand(0), m_OneUse(m_LShr(m_Value(X), m_APInt(C))))) {
+ // The shift amount must be valid (not poison) in the narrow type, and
+ // it must not be greater than the high bits demanded of the result.
+ if (C->ult(I->getType()->getScalarSizeInBits()) &&
+ C->ule(DemandedMask.countLeadingZeros())) {
+ // trunc (lshr X, C) --> lshr (trunc X), C
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(I);
+ Value *Trunc = Builder.CreateTrunc(X, I->getType());
+ return Builder.CreateLShr(Trunc, C->getZExtValue());
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+ case Instruction::ZExt: {
unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth);
@@ -516,8 +534,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I->getOperand(0);
// We can't do this with the LHS for subtraction, unless we are only
// demanding the LSB.
- if ((I->getOpcode() == Instruction::Add ||
- DemandedFromOps.isOneValue()) &&
+ if ((I->getOpcode() == Instruction::Add || DemandedFromOps.isOne()) &&
DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
@@ -615,7 +632,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedMask.isOneValue()) {
+ if (DemandedMask.isOne()) {
// Perform the logical shift right.
Instruction *NewVal = BinaryOperator::CreateLShr(
I->getOperand(0), I->getOperand(1), I->getName());
@@ -743,7 +760,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Instruction::URem: {
KnownBits Known2(BitWidth);
- APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ APInt AllOnes = APInt::getAllOnes(BitWidth);
if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
return I;
@@ -829,6 +846,29 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
KnownBitsComputed = true;
break;
}
+ case Intrinsic::umax: {
+ // UMax(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-zero bit of C.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(II->getArgOperand(1), m_APInt(C)) &&
+ CTZ >= C->getActiveBits())
+ return II->getArgOperand(0);
+ break;
+ }
+ case Intrinsic::umin: {
+ // UMin(A, C) == A if ...
+ // The lowest non-zero bit of DemandMask is higher than the highest
+ // non-one bit of C.
+ // This comes from using DeMorgans on the above umax example.
+ const APInt *C;
+ unsigned CTZ = DemandedMask.countTrailingZeros();
+ if (match(II->getArgOperand(1), m_APInt(C)) &&
+ CTZ >= C->getBitWidth() - C->countLeadingOnes())
+ return II->getArgOperand(0);
+ break;
+ }
default: {
// Handle target specific intrinsics
Optional<Value *> V = targetSimplifyDemandedUseBitsIntrinsic(
@@ -1021,8 +1061,8 @@ Value *InstCombinerImpl::simplifyShrShlDemandedBits(
Known.Zero.setLowBits(ShlAmt - 1);
Known.Zero &= DemandedMask;
- APInt BitMask1(APInt::getAllOnesValue(BitWidth));
- APInt BitMask2(APInt::getAllOnesValue(BitWidth));
+ APInt BitMask1(APInt::getAllOnes(BitWidth));
+ APInt BitMask2(APInt::getAllOnes(BitWidth));
bool isLshr = (Shr->getOpcode() == Instruction::LShr);
BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
@@ -1088,7 +1128,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return nullptr;
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
- APInt EltMask(APInt::getAllOnesValue(VWidth));
+ APInt EltMask(APInt::getAllOnes(VWidth));
assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
if (match(V, m_Undef())) {
@@ -1097,7 +1137,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
return nullptr;
}
- if (DemandedElts.isNullValue()) { // If nothing is demanded, provide poison.
+ if (DemandedElts.isZero()) { // If nothing is demanded, provide poison.
UndefElts = EltMask;
return PoisonValue::get(V->getType());
}
@@ -1107,7 +1147,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
if (auto *C = dyn_cast<Constant>(V)) {
// Check if this is identity. If so, return 0 since we are not simplifying
// anything.
- if (DemandedElts.isAllOnesValue())
+ if (DemandedElts.isAllOnes())
return nullptr;
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
@@ -1260,7 +1300,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Handle trivial case of a splat. Only check the first element of LHS
// operand.
if (all_of(Shuffle->getShuffleMask(), [](int Elt) { return Elt == 0; }) &&
- DemandedElts.isAllOnesValue()) {
+ DemandedElts.isAllOnes()) {
if (!match(I->getOperand(1), m_Undef())) {
I->setOperand(1, PoisonValue::get(I->getOperand(1)->getType()));
MadeChange = true;
@@ -1515,8 +1555,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Subtlety: If we load from a pointer, the pointer must be valid
// regardless of whether the element is demanded. Doing otherwise risks
// segfaults which didn't exist in the original program.
- APInt DemandedPtrs(APInt::getAllOnesValue(VWidth)),
- DemandedPassThrough(DemandedElts);
+ APInt DemandedPtrs(APInt::getAllOnes(VWidth)),
+ DemandedPassThrough(DemandedElts);
if (auto *CV = dyn_cast<ConstantVector>(II->getOperand(2)))
for (unsigned i = 0; i < VWidth; i++) {
Constant *CElt = CV->getAggregateElement(i);
@@ -1568,7 +1608,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// If we've proven all of the lanes undef, return an undef value.
// TODO: Intersect w/demanded lanes
- if (UndefElts.isAllOnesValue())
+ if (UndefElts.isAllOnes())
return UndefValue::get(I->getType());;
return MadeChange ? I : nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 32b15376f898..32e537897140 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -35,37 +35,46 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <cassert>
#include <cstdint>
#include <iterator>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumAggregateReconstructionsSimplified,
"Number of aggregate reconstructions turned into reuse of the "
"original aggregate");
/// Return true if the value is cheaper to scalarize than it is to leave as a
-/// vector operation. IsConstantExtractIndex indicates whether we are extracting
-/// one known element from a vector constant.
+/// vector operation. If the extract index \p EI is a constant integer then
+/// some operations may be cheap to scalarize.
///
/// FIXME: It's possible to create more instructions than previously existed.
-static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
+static bool cheapToScalarize(Value *V, Value *EI) {
+ ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
+
// If we can pick a scalar constant value out of a vector, that is free.
if (auto *C = dyn_cast<Constant>(V))
- return IsConstantExtractIndex || C->getSplatValue();
+ return CEI || C->getSplatValue();
+
+ if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
+ ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
+ // Index needs to be lower than the minimum size of the vector, because
+ // for scalable vector, the vector size is known at run time.
+ return CEI->getValue().ult(EC.getKnownMinValue());
+ }
// An insertelement to the same constant index as our extract will simplify
// to the scalar inserted element. An insertelement to a different constant
// index is irrelevant to our extract.
if (match(V, m_InsertElt(m_Value(), m_Value(), m_ConstantInt())))
- return IsConstantExtractIndex;
+ return CEI;
if (match(V, m_OneUse(m_Load(m_Value()))))
return true;
@@ -75,14 +84,12 @@ static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
Value *V0, *V1;
if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
- if (cheapToScalarize(V0, IsConstantExtractIndex) ||
- cheapToScalarize(V1, IsConstantExtractIndex))
+ if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
return true;
CmpInst::Predicate UnusedPred;
if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
- if (cheapToScalarize(V0, IsConstantExtractIndex) ||
- cheapToScalarize(V1, IsConstantExtractIndex))
+ if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
return true;
return false;
@@ -119,7 +126,8 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
// and that it is a binary operation which is cheap to scalarize.
// otherwise return nullptr.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
- !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
+ !(isa<BinaryOperator>(PHIUser)) ||
+ !cheapToScalarize(PHIUser, EI.getIndexOperand()))
return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
@@ -170,24 +178,46 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
return &EI;
}
-static Instruction *foldBitcastExtElt(ExtractElementInst &Ext,
- InstCombiner::BuilderTy &Builder,
- bool IsBigEndian) {
+Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
Value *X;
uint64_t ExtIndexC;
if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
- !X->getType()->isVectorTy() ||
!match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
return nullptr;
+ ElementCount NumElts =
+ cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
+ Type *DestTy = Ext.getType();
+ bool IsBigEndian = DL.isBigEndian();
+
+ // If we are casting an integer to vector and extracting a portion, that is
+ // a shift-right and truncate.
+ // TODO: Allow FP dest type by casting the trunc to FP?
+ if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() &&
+ isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) {
+ assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
+ "Expected fixed vector type for bitcast from scalar integer");
+
+ // Big endian requires adjusting the extract index since MSB is at index 0.
+ // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
+ // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
+ if (IsBigEndian)
+ ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
+ unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits();
+ if (!ShiftAmountC || Ext.getVectorOperand()->hasOneUse()) {
+ Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
+ return new TruncInst(Lshr, DestTy);
+ }
+ }
+
+ if (!X->getType()->isVectorTy())
+ return nullptr;
+
// If this extractelement is using a bitcast from a vector of the same number
// of elements, see if we can find the source element from the source vector:
// extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
auto *SrcTy = cast<VectorType>(X->getType());
- Type *DestTy = Ext.getType();
ElementCount NumSrcElts = SrcTy->getElementCount();
- ElementCount NumElts =
- cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
if (NumSrcElts == NumElts)
if (Value *Elt = findScalarElement(X, ExtIndexC))
return new BitCastInst(Elt, DestTy);
@@ -274,7 +304,7 @@ static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr) {
unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
// Conservatively assume that all elements are needed.
- APInt UsedElts(APInt::getAllOnesValue(VWidth));
+ APInt UsedElts(APInt::getAllOnes(VWidth));
switch (UserInstr->getOpcode()) {
case Instruction::ExtractElement: {
@@ -322,11 +352,11 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
} else {
- UnionUsedElts = APInt::getAllOnesValue(VWidth);
+ UnionUsedElts = APInt::getAllOnes(VWidth);
break;
}
- if (UnionUsedElts.isAllOnesValue())
+ if (UnionUsedElts.isAllOnes())
break;
}
@@ -388,7 +418,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// If the input vector has multiple uses, simplify it based on a union
// of all elements used.
APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
APInt UndefElts(NumElts, 0);
if (Value *V = SimplifyDemandedVectorElts(
SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
@@ -402,7 +432,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
}
- if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
+ if (Instruction *I = foldBitcastExtElt(EI))
return I;
// If there's a vector PHI feeding a scalar use through this extractelement
@@ -415,7 +445,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
// TODO come up with a n-ary matcher that subsumes both unary and
// binary matchers.
UnaryOperator *UO;
- if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
+ if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
// extelt (unop X), Index --> unop (extelt X, Index)
Value *X = UO->getOperand(0);
Value *E = Builder.CreateExtractElement(X, Index);
@@ -423,7 +453,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
}
BinaryOperator *BO;
- if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
+ if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
// extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
Value *E0 = Builder.CreateExtractElement(X, Index);
@@ -434,7 +464,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
Value *X, *Y;
CmpInst::Predicate Pred;
if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
- cheapToScalarize(SrcVec, IndexC)) {
+ cheapToScalarize(SrcVec, Index)) {
// extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
Value *E0 = Builder.CreateExtractElement(X, Index);
Value *E1 = Builder.CreateExtractElement(Y, Index);
@@ -651,8 +681,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
return;
- auto *WideVec =
- new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);
+ auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
// Insert the new shuffle after the vector operand of the extract is defined
// (as long as it's not a PHI) or at the start of the basic block of the
@@ -913,7 +942,7 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
"We don't store nullptr in SourceAggregate!");
assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
(I.index() != 0) &&
- "SourceAggregate should be valid after the the first element,");
+ "SourceAggregate should be valid after the first element,");
// For this element, is there a plausible source aggregate?
// FIXME: we could special-case undef element, IFF we know that in the
@@ -1179,7 +1208,7 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
if (!ElementPresent[i])
Mask[i] = -1;
- return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
+ return new ShuffleVectorInst(FirstIE, Mask);
}
/// Try to fold an insert element into an existing splat shuffle by changing
@@ -1208,15 +1237,15 @@ static Instruction *foldInsEltIntoSplat(InsertElementInst &InsElt) {
// Replace the shuffle mask element at the index of this insert with a zero.
// For example:
- // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
- // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
+ // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
+ // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
unsigned NumMaskElts =
cast<FixedVectorType>(Shuf->getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts);
for (unsigned i = 0; i != NumMaskElts; ++i)
NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
- return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
+ return new ShuffleVectorInst(Op0, NewMask);
}
/// Try to fold an extract+insert element into an existing identity shuffle by
@@ -1348,6 +1377,10 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
NewShufElts[I] = ShufConstVec->getAggregateElement(I);
NewMaskElts[I] = Mask[I];
}
+
+ // Bail if we failed to find an element.
+ if (!NewShufElts[I])
+ return nullptr;
}
// Create new operands for a shuffle that includes the constant of the
@@ -1399,6 +1432,41 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
return nullptr;
}
+/// If both the base vector and the inserted element are extended from the same
+/// type, do the insert element in the narrow source type followed by extend.
+/// TODO: This can be extended to include other cast opcodes, but particularly
+/// if we create a wider insertelement, make sure codegen is not harmed.
+static Instruction *narrowInsElt(InsertElementInst &InsElt,
+ InstCombiner::BuilderTy &Builder) {
+ // We are creating a vector extend. If the original vector extend has another
+ // use, that would mean we end up with 2 vector extends, so avoid that.
+ // TODO: We could ease the use-clause to "if at least one op has one use"
+ // (assuming that the source types match - see next TODO comment).
+ Value *Vec = InsElt.getOperand(0);
+ if (!Vec->hasOneUse())
+ return nullptr;
+
+ Value *Scalar = InsElt.getOperand(1);
+ Value *X, *Y;
+ CastInst::CastOps CastOpcode;
+ if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
+ CastOpcode = Instruction::FPExt;
+ else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
+ CastOpcode = Instruction::SExt;
+ else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
+ CastOpcode = Instruction::ZExt;
+ else
+ return nullptr;
+
+ // TODO: We can allow mismatched types by creating an intermediate cast.
+ if (X->getType()->getScalarType() != Y->getType())
+ return nullptr;
+
+ // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
+ Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
+ return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
+}
+
Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
@@ -1495,7 +1563,7 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
unsigned VWidth = VecTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
if (V != &IE)
return replaceInstUsesWith(IE, V);
@@ -1518,6 +1586,9 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
return IdentityShuf;
+ if (Instruction *Ext = narrowInsElt(IE, Builder))
+ return Ext;
+
return nullptr;
}
@@ -1924,8 +1995,8 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
// Splat from element 0. Any mask element that is undefined remains undefined.
// For example:
- // shuf (inselt undef, X, 2), undef, <2,2,undef>
- // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
+ // shuf (inselt undef, X, 2), _, <2,2,undef>
+ // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
unsigned NumMaskElts =
cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts, 0);
@@ -1933,7 +2004,7 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
if (Mask[i] == UndefMaskElem)
NewMask[i] = Mask[i];
- return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
+ return new ShuffleVectorInst(NewIns, NewMask);
}
/// Try to fold shuffles that are the equivalent of a vector select.
@@ -2197,12 +2268,8 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
SmallVector<int, 16> Mask;
Shuf.getShuffleMask(Mask);
- // The shuffle must not change vector sizes.
- // TODO: This restriction could be removed if the insert has only one use
- // (because the transform would require a new length-changing shuffle).
int NumElts = Mask.size();
- if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
- return nullptr;
+ int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
// This is a specialization of a fold in SimplifyDemandedVectorElts. We may
// not be able to handle it there if the insertelement has >1 use.
@@ -2219,11 +2286,16 @@ static Instruction *foldShuffleWithInsert(ShuffleVectorInst &Shuf,
if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
// Offset the index constant by the vector width because we are checking for
// accesses to the 2nd vector input of the shuffle.
- IdxC += NumElts;
+ IdxC += InpNumElts;
// shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
if (!is_contained(Mask, (int)IdxC))
return IC.replaceOperand(Shuf, 1, X);
}
+ // For the rest of the transform, the shuffle must not change vector sizes.
+ // TODO: This restriction could be removed if the insert has only one use
+ // (because the transform would require a new length-changing shuffle).
+ if (NumElts != InpNumElts)
+ return nullptr;
// shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
@@ -2413,16 +2485,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (LHS == RHS) {
assert(!match(RHS, m_Undef()) &&
"Shuffle with 2 undef ops not simplified?");
- // Remap any references to RHS to use LHS.
- SmallVector<int, 16> Elts;
- for (unsigned i = 0; i != VWidth; ++i) {
- // Propagate undef elements or force mask to LHS.
- if (Mask[i] < 0)
- Elts.push_back(UndefMaskElem);
- else
- Elts.push_back(Mask[i] % LHSWidth);
- }
- return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
+ return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
}
// shuffle undef, x, mask --> shuffle x, undef, mask'
@@ -2444,7 +2507,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return I;
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
if (V != &SVI)
return replaceInstUsesWith(SVI, V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4e3b18e805ee..47b6dcb67a78 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -100,7 +100,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -109,11 +108,12 @@
#include <string>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumWorklistIterations,
"Number of instruction combining iterations performed");
@@ -202,23 +202,37 @@ Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(&Builder, DL, GEP);
}
+/// Legal integers and common types are considered desirable. This is used to
+/// avoid creating instructions with types that may not be supported well by the
+/// the backend.
+/// NOTE: This treats i8, i16 and i32 specially because they are common
+/// types in frontend languages.
+bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
+ switch (BitWidth) {
+ case 8:
+ case 16:
+ case 32:
+ return true;
+ default:
+ return DL.isLegalInteger(BitWidth);
+ }
+}
+
/// Return true if it is desirable to convert an integer computation from a
/// given bit width to a new bit width.
/// We don't want to convert from a legal to an illegal type or from a smaller
-/// to a larger illegal type. A width of '1' is always treated as a legal type
-/// because i1 is a fundamental type in IR, and there are many specialized
-/// optimizations for i1 types. Widths of 8, 16 or 32 are equally treated as
+/// to a larger illegal type. A width of '1' is always treated as a desirable
+/// type because i1 is a fundamental type in IR, and there are many specialized
+/// optimizations for i1 types. Common/desirable widths are equally treated as
/// legal to convert to, in order to open up more combining opportunities.
-/// NOTE: this treats i8, i16 and i32 specially, due to them being so common
-/// from frontend languages.
bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
unsigned ToWidth) const {
bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
- // Convert to widths of 8, 16 or 32 even if they are not legal types. Only
- // shrink types, to prevent infinite loops.
- if (ToWidth < FromWidth && (ToWidth == 8 || ToWidth == 16 || ToWidth == 32))
+ // Convert to desirable widths even if they are not legal types.
+ // Only shrink types, to prevent infinite loops.
+ if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
return true;
// If this is a legal integer from type, and the result would be an illegal
@@ -359,7 +373,8 @@ Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
PtrToInt->getSrcTy()->getPointerAddressSpace() &&
DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
- return Builder.CreateBitCast(PtrToInt->getOperand(0), CastTy);
+ return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
+ "", PtrToInt);
}
}
return nullptr;
@@ -961,14 +976,14 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
"Expected constant-foldable intrinsic");
Intrinsic::ID IID = II->getIntrinsicID();
- if (II->getNumArgOperands() == 1)
+ if (II->arg_size() == 1)
return Builder.CreateUnaryIntrinsic(IID, SO);
// This works for real binary ops like min/max (where we always expect the
// constant operand to be canonicalized as op1) and unary ops with a bonus
// constant argument like ctlz/cttz.
// TODO: Handle non-commutative binary intrinsics as below for binops.
- assert(II->getNumArgOperands() == 2 && "Expected binary intrinsic");
+ assert(II->arg_size() == 2 && "Expected binary intrinsic");
assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
}
@@ -1058,7 +1073,7 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op,
// Compare for equality including undefs as equal.
auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
const APInt *C;
- return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
};
if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
@@ -1120,9 +1135,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
BasicBlock *NonConstBB = nullptr;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
- // If I is a freeze instruction, count undef as a non-constant.
- if (match(InVal, m_ImmConstant()) &&
- (!isa<FreezeInst>(I) || isGuaranteedNotToBeUndefOrPoison(InVal)))
+ // For non-freeze, require constant operand
+ // For freeze, require non-undef, non-poison operand
+ if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant()))
+ continue;
+ if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal))
continue;
if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
@@ -1268,61 +1285,19 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
/// specified offset. If so, fill them into NewIndices and return the resultant
/// element type, otherwise return null.
Type *
-InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
SmallVectorImpl<Value *> &NewIndices) {
Type *Ty = PtrTy->getElementType();
if (!Ty->isSized())
return nullptr;
- // Start with the index over the outer type. Note that the type size
- // might be zero (even if the offset isn't zero) if the indexed type
- // is something like [0 x {int, int}]
- Type *IndexTy = DL.getIndexType(PtrTy);
- int64_t FirstIdx = 0;
- if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
- FirstIdx = Offset/TySize;
- Offset -= FirstIdx*TySize;
-
- // Handle hosts where % returns negative instead of values [0..TySize).
- if (Offset < 0) {
- --FirstIdx;
- Offset += TySize;
- assert(Offset >= 0);
- }
- assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
- }
-
- NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx));
-
- // Index into the types. If we fail, set OrigBase to null.
- while (Offset) {
- // Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
- return nullptr;
-
- if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
- assert(Offset < (int64_t)SL->getSizeInBytes() &&
- "Offset must stay within the indexed type");
-
- unsigned Elt = SL->getElementContainingOffset(Offset);
- NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
- Elt));
-
- Offset -= SL->getElementOffset(Elt);
- Ty = STy->getElementType(Elt);
- } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
- assert(EltSize && "Cannot index into a zero-sized array");
- NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize));
- Offset %= EltSize;
- Ty = AT->getElementType();
- } else {
- // Otherwise, we can't index into the middle of this atomic type, bail.
- return nullptr;
- }
- }
+ APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
+ SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
+ if (!Offset.isZero())
+ return nullptr;
+ for (const APInt &Index : Indices)
+ NewIndices.push_back(Builder.getInt(Index));
return Ty;
}
@@ -1623,7 +1598,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
Value *XY = Builder.CreateBinOp(Opcode, X, Y);
if (auto *BO = dyn_cast<BinaryOperator>(XY))
BO->copyIRFlags(&Inst);
- return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M);
+ return new ShuffleVectorInst(XY, M);
};
// If both arguments of the binary operation are shuffles that use the same
@@ -1754,25 +1729,20 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
Value *X;
ArrayRef<int> MaskC;
int SplatIndex;
- BinaryOperator *BO;
+ Value *Y, *OtherOp;
if (!match(LHS,
m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
!match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
- X->getType() != Inst.getType() || !match(RHS, m_OneUse(m_BinOp(BO))) ||
- BO->getOpcode() != Opcode)
+ X->getType() != Inst.getType() ||
+ !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
return nullptr;
// FIXME: This may not be safe if the analysis allows undef elements. By
// moving 'Y' before the splat shuffle, we are implicitly assuming
// that it is not undef/poison at the splat index.
- Value *Y, *OtherOp;
- if (isSplatValue(BO->getOperand(0), SplatIndex)) {
- Y = BO->getOperand(0);
- OtherOp = BO->getOperand(1);
- } else if (isSplatValue(BO->getOperand(1), SplatIndex)) {
- Y = BO->getOperand(1);
- OtherOp = BO->getOperand(0);
- } else {
+ if (isSplatValue(OtherOp, SplatIndex)) {
+ std::swap(Y, OtherOp);
+ } else if (!isSplatValue(Y, SplatIndex)) {
return nullptr;
}
@@ -1788,7 +1758,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// dropped to be safe.
if (isa<FPMathOperator>(R)) {
R->copyFastMathFlags(&Inst);
- R->andIRFlags(BO);
+ R->andIRFlags(RHS);
}
if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
NewInstBO->copyIRFlags(R);
@@ -1896,7 +1866,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
- if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
+ if (Value *V = SimplifyGEPInst(GEPEltType, Ops, GEP.isInBounds(),
+ SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
// For vector geps, use the generic demanded vector support.
@@ -1905,7 +1876,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
auto VWidth = GEPFVTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
UndefElts)) {
if (V != &GEP)
@@ -2117,10 +2088,12 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// -- have to recreate %src & %gep
// put NewSrc at same location as %src
Builder.SetInsertPoint(cast<Instruction>(PtrOp));
- auto *NewSrc = cast<GetElementPtrInst>(
- Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
- NewSrc->setIsInBounds(Src->isInBounds());
- auto *NewGEP =
+ Value *NewSrc =
+ Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName());
+ // Propagate 'inbounds' if the new source was not constant-folded.
+ if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc))
+ NewSrcGEPI->setIsInBounds(Src->isInBounds());
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
NewGEP->setIsInBounds(GEP.isInBounds());
return NewGEP;
@@ -2128,18 +2101,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
}
-
- // Fold (gep(gep(Ptr,Idx0),Idx1) -> gep(Ptr,add(Idx0,Idx1))
- if (GO1->getType() == SO1->getType()) {
- bool NewInBounds = GEP.isInBounds() && Src->isInBounds();
- auto *NewIdx =
- Builder.CreateAdd(GO1, SO1, GEP.getName() + ".idx",
- /*HasNUW*/ false, /*HasNSW*/ NewInBounds);
- auto *NewGEP = GetElementPtrInst::Create(
- GEPEltType, Src->getPointerOperand(), {NewIdx});
- NewGEP->setIsInBounds(NewInBounds);
- return NewGEP;
- }
}
// Note that if our source is a gep chain itself then we wait for that
@@ -2647,6 +2608,13 @@ static bool isAllocSiteRemovable(Instruction *AI,
Users.emplace_back(I);
continue;
}
+
+ if (isReallocLikeFn(I, TLI, true)) {
+ Users.emplace_back(I);
+ Worklist.push_back(I);
+ continue;
+ }
+
return false;
case Instruction::Store: {
@@ -2834,15 +2802,33 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
// At this point, we know that everything in FreeInstrBB can be moved
// before TI.
- for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end();
- It != End;) {
- Instruction &Instr = *It++;
+ for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
if (&Instr == FreeInstrBBTerminator)
break;
Instr.moveBefore(TI);
}
assert(FreeInstrBB->size() == 1 &&
"Only the branch instruction should remain");
+
+ // Now that we've moved the call to free before the NULL check, we have to
+ // remove any attributes on its parameter that imply it's non-null, because
+ // those attributes might have only been valid because of the NULL check, and
+ // we can get miscompiles if we keep them. This is conservative if non-null is
+ // also implied by something other than the NULL check, but it's guaranteed to
+ // be correct, and the conservativeness won't matter in practice, since the
+ // attributes are irrelevant for the call to free itself and the pointer
+ // shouldn't be used after the call.
+ AttributeList Attrs = FI.getAttributes();
+ Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
+ Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
+ if (Dereferenceable.isValid()) {
+ uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
+ Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
+ Attribute::Dereferenceable);
+ Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
+ }
+ FI.setAttributes(Attrs);
+
return &FI;
}
@@ -2861,6 +2847,15 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) {
if (isa<ConstantPointerNull>(Op))
return eraseInstFromFunction(FI);
+ // If we had free(realloc(...)) with no intervening uses, then eliminate the
+ // realloc() entirely.
+ if (CallInst *CI = dyn_cast<CallInst>(Op)) {
+ if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI, true)) {
+ return eraseInstFromFunction(
+ *replaceInstUsesWith(*CI, CI->getOperand(0)));
+ }
+ }
+
// If we optimize for code size, try to move the call to free before the null
// test so that simplify cfg can remove the empty block and dead code
// elimination the branch. I.e., helps to turn something like:
@@ -2947,7 +2942,7 @@ Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
- return isa<DbgInfoIntrinsic>(BBI) ||
+ return BBI->isDebugOrPseudoInst() ||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
};
@@ -3138,26 +3133,21 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
// checking for overflow.
const APInt *C;
if (match(WO->getRHS(), m_APInt(C))) {
- // Compute the no-wrap range [X,Y) for LHS given RHS=C, then
- // check for the inverted range using range offset trick (i.e.
- // use a subtract to shift the range to bottom of either the
- // signed or unsigned domain and then use a single compare to
- // check range membership).
+ // Compute the no-wrap range for LHS given RHS=C, then construct an
+ // equivalent icmp, potentially using an offset.
ConstantRange NWR =
ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
WO->getNoWrapKind());
- APInt Min = WO->isSigned() ? NWR.getSignedMin() : NWR.getUnsignedMin();
- NWR = NWR.subtract(Min);
CmpInst::Predicate Pred;
- APInt NewRHSC;
- if (NWR.getEquivalentICmp(Pred, NewRHSC)) {
- auto *OpTy = WO->getRHS()->getType();
- auto *NewLHS = Builder.CreateSub(WO->getLHS(),
- ConstantInt::get(OpTy, Min));
- return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
- ConstantInt::get(OpTy, NewRHSC));
- }
+ APInt NewRHSC, Offset;
+ NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
+ auto *OpTy = WO->getRHS()->getType();
+ auto *NewLHS = WO->getLHS();
+ if (Offset != 0)
+ NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
+ return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
+ ConstantInt::get(OpTy, NewRHSC));
}
}
}
@@ -3183,9 +3173,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
// Whatever aliasing information we had for the orignal load must also
// hold for the smaller load, so propagate the annotations.
- AAMDNodes Nodes;
- L->getAAMetadata(Nodes);
- NL->setAAMetadata(Nodes);
+ NL->setAAMetadata(L->getAAMetadata());
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use replaceInstUsesWith().
return replaceInstUsesWith(EV, NL);
@@ -3568,8 +3556,14 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
// While we could change the other users of OrigOp to use freeze(OrigOp), that
// potentially reduces their optimization potential, so let's only do this iff
// the OrigOp is only used by the freeze.
- if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp) ||
- canCreateUndefOrPoison(dyn_cast<Operator>(OrigOp)))
+ if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
+ return nullptr;
+
+ // We can't push the freeze through an instruction which can itself create
+ // poison. If the only source of new poison is flags, we can simply
+ // strip them (since we know the only use is the freeze and nothing can
+ // benefit from them.)
+ if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
return nullptr;
// If operand is guaranteed not to be poison, there is no need to add freeze
@@ -3585,6 +3579,8 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
return nullptr;
}
+ OrigOpInst->dropPoisonGeneratingFlags();
+
// If all operands are guaranteed to be non-poison, we can drop freeze.
if (!MaybePoisonOperand)
return OrigOp;
@@ -3668,7 +3664,7 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
/// instruction past all of the instructions between it and the end of its
/// block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
- assert(I->getSingleUndroppableUse() && "Invariants didn't hold!");
+ assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
BasicBlock *SrcBlock = I->getParent();
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -3822,51 +3818,71 @@ bool InstCombinerImpl::run() {
// See if we can trivially sink this instruction to its user if we can
// prove that the successor is not executed more frequently than our block.
- if (EnableCodeSinking)
- if (Use *SingleUse = I->getSingleUndroppableUse()) {
- BasicBlock *BB = I->getParent();
- Instruction *UserInst = cast<Instruction>(SingleUse->getUser());
- BasicBlock *UserParent;
-
- // Get the block the use occurs in.
- if (PHINode *PN = dyn_cast<PHINode>(UserInst))
- UserParent = PN->getIncomingBlock(*SingleUse);
- else
- UserParent = UserInst->getParent();
-
- // Try sinking to another block. If that block is unreachable, then do
- // not bother. SimplifyCFG should handle it.
- if (UserParent != BB && DT.isReachableFromEntry(UserParent)) {
- // See if the user is one of our successors that has only one
- // predecessor, so that we don't have to split the critical edge.
- bool ShouldSink = UserParent->getUniquePredecessor() == BB;
- // Another option where we can sink is a block that ends with a
- // terminator that does not pass control to other block (such as
- // return or unreachable). In this case:
- // - I dominates the User (by SSA form);
- // - the User will be executed at most once.
- // So sinking I down to User is always profitable or neutral.
- if (!ShouldSink) {
- auto *Term = UserParent->getTerminator();
- ShouldSink = isa<ReturnInst>(Term) || isa<UnreachableInst>(Term);
- }
- if (ShouldSink) {
- assert(DT.dominates(BB, UserParent) &&
- "Dominance relation broken?");
- // Okay, the CFG is simple enough, try to sink this instruction.
- if (TryToSinkInstruction(I, UserParent)) {
- LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
- MadeIRChange = true;
- // We'll add uses of the sunk instruction below, but since sinking
- // can expose opportunities for it's *operands* add them to the
- // worklist
- for (Use &U : I->operands())
- if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
- Worklist.push(OpI);
- }
+ // Return the UserBlock if successful.
+ auto getOptionalSinkBlockForInst =
+ [this](Instruction *I) -> Optional<BasicBlock *> {
+ if (!EnableCodeSinking)
+ return None;
+ auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
+ if (!UserInst)
+ return None;
+
+ BasicBlock *BB = I->getParent();
+ BasicBlock *UserParent = nullptr;
+
+ // Special handling for Phi nodes - get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ if (PN->getIncomingValue(i) == I) {
+ // Bail out if we have uses in different blocks. We don't do any
+ // sophisticated analysis (i.e finding NearestCommonDominator of these
+ // use blocks).
+ if (UserParent && UserParent != PN->getIncomingBlock(i))
+ return None;
+ UserParent = PN->getIncomingBlock(i);
}
}
+ assert(UserParent && "expected to find user block!");
+ } else
+ UserParent = UserInst->getParent();
+
+ // Try sinking to another block. If that block is unreachable, then do
+ // not bother. SimplifyCFG should handle it.
+ if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
+ return None;
+
+ auto *Term = UserParent->getTerminator();
+ // See if the user is one of our successors that has only one
+ // predecessor, so that we don't have to split the critical edge.
+ // Another option where we can sink is a block that ends with a
+ // terminator that does not pass control to other block (such as
+ // return or unreachable). In this case:
+ // - I dominates the User (by SSA form);
+ // - the User will be executed at most once.
+ // So sinking I down to User is always profitable or neutral.
+ if (UserParent->getUniquePredecessor() == BB ||
+ (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term))) {
+ assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
+ return UserParent;
}
+ return None;
+ };
+
+ auto OptBB = getOptionalSinkBlockForInst(I);
+ if (OptBB) {
+ auto *UserParent = *OptBB;
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ if (TryToSinkInstruction(I, UserParent)) {
+ LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
+ MadeIRChange = true;
+ // We'll add uses of the sunk instruction below, but since
+ // sinking can expose opportunities for it's *operands* add
+ // them to the worklist
+ for (Use &U : I->operands())
+ if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
+ Worklist.push(OpI);
+ }
+ }
// Now that we have an instruction, try combining it to simplify it.
Builder.SetInsertPoint(I);
@@ -3994,13 +4010,13 @@ public:
/// whose condition is a known constant, we only visit the reachable successors.
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
const TargetLibraryInfo *TLI,
- InstCombineWorklist &ICWorklist) {
+ InstructionWorklist &ICWorklist) {
bool MadeIRChange = false;
SmallPtrSet<BasicBlock *, 32> Visited;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(&F.front());
- SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
DenseMap<Constant *, Constant *> FoldedConstants;
AliasScopeTracker SeenAliasScopes;
@@ -4011,25 +4027,23 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
if (!Visited.insert(BB).second)
continue;
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *Inst = &*BBI++;
-
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
// ConstantProp instruction if trivially constant.
- if (!Inst->use_empty() &&
- (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
- if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
- LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst
+ if (!Inst.use_empty() &&
+ (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
+ if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
+ LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
<< '\n');
- Inst->replaceAllUsesWith(C);
+ Inst.replaceAllUsesWith(C);
++NumConstProp;
- if (isInstructionTriviallyDead(Inst, TLI))
- Inst->eraseFromParent();
+ if (isInstructionTriviallyDead(&Inst, TLI))
+ Inst.eraseFromParent();
MadeIRChange = true;
continue;
}
// See if we can constant fold its operands.
- for (Use &U : Inst->operands()) {
+ for (Use &U : Inst.operands()) {
if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
continue;
@@ -4039,7 +4053,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
FoldRes = ConstantFoldConstant(C, DL, TLI);
if (FoldRes != C) {
- LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
+ LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
<< "\n Old = " << *C
<< "\n New = " << *FoldRes << '\n');
U = FoldRes;
@@ -4050,9 +4064,9 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// Skip processing debug and pseudo intrinsics in InstCombine. Processing
// these call instructions consumes non-trivial amount of time and
// provides no value for the optimization.
- if (!Inst->isDebugOrPseudoInst()) {
- InstrsForInstCombineWorklist.push_back(Inst);
- SeenAliasScopes.analyse(Inst);
+ if (!Inst.isDebugOrPseudoInst()) {
+ InstrsForInstructionWorklist.push_back(&Inst);
+ SeenAliasScopes.analyse(&Inst);
}
}
@@ -4097,8 +4111,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// of the function down. This jives well with the way that it adds all uses
// of instructions to the worklist after doing a transformation, thus avoiding
// some N^2 behavior in pathological cases.
- ICWorklist.reserve(InstrsForInstCombineWorklist.size());
- for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+ ICWorklist.reserve(InstrsForInstructionWorklist.size());
+ for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
// DCE instruction if trivially dead. As we iterate in reverse program
// order here, we will clean up whole chains of dead instructions.
if (isInstructionTriviallyDead(Inst, TLI) ||
@@ -4118,7 +4132,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
}
static bool combineInstructionsOverFunction(
- Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
+ Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 0d4ca0bcecfb..b56329ad76ae 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -47,6 +48,7 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -176,7 +178,15 @@ const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private";
// Accesses sizes are powers of two: 1, 2, 4, 8, 16.
static const size_t kNumberOfAccessSizes = 5;
-static const unsigned kAllocaRzSize = 32;
+static const uint64_t kAllocaRzSize = 32;
+
+// ASanAccessInfo implementation constants.
+constexpr size_t kCompileKernelShift = 0;
+constexpr size_t kCompileKernelMask = 0x1;
+constexpr size_t kAccessSizeIndexShift = 1;
+constexpr size_t kAccessSizeIndexMask = 0xf;
+constexpr size_t kIsWriteShift = 5;
+constexpr size_t kIsWriteMask = 0x1;
// Command-line flags.
@@ -203,6 +213,11 @@ static cl::opt<bool> ClInstrumentWrites(
"asan-instrument-writes", cl::desc("instrument write instructions"),
cl::Hidden, cl::init(true));
+static cl::opt<bool>
+ ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(false),
+ cl::Hidden, cl::desc("Use Stack Safety analysis results"),
+ cl::Optional);
+
static cl::opt<bool> ClInstrumentAtomics(
"asan-instrument-atomics",
cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
@@ -348,6 +363,10 @@ static cl::opt<uint64_t>
static cl::opt<bool> ClOpt("asan-opt", cl::desc("Optimize instrumentation"),
cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptimizeCallbacks("asan-optimize-callbacks",
+ cl::desc("Optimize callbacks"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClOptSameTemp(
"asan-opt-same-temp", cl::desc("Instrument the same temp just once"),
cl::Hidden, cl::init(true));
@@ -442,7 +461,7 @@ struct ShadowMapping {
} // end anonymous namespace
-static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
+static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
bool IsKasan) {
bool IsAndroid = TargetTriple.isAndroid();
bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
@@ -559,6 +578,32 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
return Mapping;
}
+namespace llvm {
+void getAddressSanitizerParams(const Triple &TargetTriple, int LongSize,
+ bool IsKasan, uint64_t *ShadowBase,
+ int *MappingScale, bool *OrShadowOffset) {
+ auto Mapping = getShadowMapping(TargetTriple, LongSize, IsKasan);
+ *ShadowBase = Mapping.Offset;
+ *MappingScale = Mapping.Scale;
+ *OrShadowOffset = Mapping.OrShadowOffset;
+}
+
+ASanAccessInfo::ASanAccessInfo(int32_t Packed)
+ : Packed(Packed),
+ AccessSizeIndex((Packed >> kAccessSizeIndexShift) & kAccessSizeIndexMask),
+ IsWrite((Packed >> kIsWriteShift) & kIsWriteMask),
+ CompileKernel((Packed >> kCompileKernelShift) & kCompileKernelMask) {}
+
+ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
+ uint8_t AccessSizeIndex)
+ : Packed((IsWrite << kIsWriteShift) +
+ (CompileKernel << kCompileKernelShift) +
+ (AccessSizeIndex << kAccessSizeIndexShift)),
+ AccessSizeIndex(AccessSizeIndex), IsWrite(IsWrite),
+ CompileKernel(CompileKernel) {}
+
+} // namespace llvm
+
static uint64_t getRedzoneSizeForScale(int MappingScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
@@ -609,6 +654,7 @@ char ASanGlobalsMetadataWrapperPass::ID = 0;
/// AddressSanitizer: instrument the code in module to find memory bugs.
struct AddressSanitizer {
AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD,
+ const StackSafetyGlobalInfo *SSGI,
bool CompileKernel = false, bool Recover = false,
bool UseAfterScope = false,
AsanDetectStackUseAfterReturnMode UseAfterReturn =
@@ -619,10 +665,12 @@ struct AddressSanitizer {
UseAfterScope(UseAfterScope || ClUseAfterScope),
UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn
: UseAfterReturn),
- GlobalsMD(*GlobalsMD) {
+ GlobalsMD(*GlobalsMD), SSGI(SSGI) {
C = &(M.getContext());
LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
+ Int8PtrTy = Type::getInt8PtrTy(*C);
+ Int32Ty = Type::getInt32Ty(*C);
TargetTriple = Triple(M.getTargetTriple());
Mapping = getShadowMapping(TargetTriple, LongSize, this->CompileKernel);
@@ -646,7 +694,7 @@ struct AddressSanitizer {
/// Check if we want (and can) handle this alloca.
bool isInterestingAlloca(const AllocaInst &AI);
- bool ignoreAccess(Value *Ptr);
+ bool ignoreAccess(Instruction *Inst, Value *Ptr);
void getInterestingMemoryOperands(
Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
@@ -713,6 +761,8 @@ private:
bool UseAfterScope;
AsanDetectStackUseAfterReturnMode UseAfterReturn;
Type *IntptrTy;
+ Type *Int8PtrTy;
+ Type *Int32Ty;
ShadowMapping Mapping;
FunctionCallee AsanHandleNoReturnFunc;
FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction;
@@ -729,6 +779,7 @@ private:
FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset;
Value *LocalDynamicShadow = nullptr;
const GlobalsMetadata &GlobalsMD;
+ const StackSafetyGlobalInfo *SSGI;
DenseMap<const AllocaInst *, bool> ProcessedAllocas;
FunctionCallee AMDGPUAddressShared;
@@ -755,16 +806,22 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<ASanGlobalsMetadataWrapperPass>();
+ if (ClUseStackSafety)
+ AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
}
bool runOnFunction(Function &F) override {
GlobalsMetadata &GlobalsMD =
getAnalysis<ASanGlobalsMetadataWrapperPass>().getGlobalsMD();
+ const StackSafetyGlobalInfo *const SSGI =
+ ClUseStackSafety
+ ? &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult()
+ : nullptr;
const TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover,
- UseAfterScope, UseAfterReturn);
+ AddressSanitizer ASan(*F.getParent(), &GlobalsMD, SSGI, CompileKernel,
+ Recover, UseAfterScope, UseAfterReturn);
return ASan.instrumentFunction(F, TLI);
}
@@ -1212,20 +1269,15 @@ GlobalsMetadata ASanGlobalsMetadataAnalysis::run(Module &M,
return GlobalsMetadata(M);
}
-AddressSanitizerPass::AddressSanitizerPass(
- bool CompileKernel, bool Recover, bool UseAfterScope,
- AsanDetectStackUseAfterReturnMode UseAfterReturn)
- : CompileKernel(CompileKernel), Recover(Recover),
- UseAfterScope(UseAfterScope), UseAfterReturn(UseAfterReturn) {}
-
PreservedAnalyses AddressSanitizerPass::run(Function &F,
AnalysisManager<Function> &AM) {
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
Module &M = *F.getParent();
if (auto *R = MAMProxy.getCachedResult<ASanGlobalsMetadataAnalysis>(M)) {
const TargetLibraryInfo *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
- AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope,
- UseAfterReturn);
+ AddressSanitizer Sanitizer(M, R, nullptr, Options.CompileKernel,
+ Options.Recover, Options.UseAfterScope,
+ Options.UseAfterReturn);
if (Sanitizer.instrumentFunction(F, TLI))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
@@ -1237,21 +1289,51 @@ PreservedAnalyses AddressSanitizerPass::run(Function &F,
return PreservedAnalyses::all();
}
+void AddressSanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<AddressSanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.CompileKernel)
+ OS << "kernel";
+ OS << ">";
+}
+
+void ModuleAddressSanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<ModuleAddressSanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.CompileKernel)
+ OS << "kernel";
+ OS << ">";
+}
+
ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(
- bool CompileKernel, bool Recover, bool UseGlobalGC, bool UseOdrIndicator,
- AsanDtorKind DestructorKind)
- : CompileKernel(CompileKernel), Recover(Recover), UseGlobalGC(UseGlobalGC),
+ const AddressSanitizerOptions &Options, bool UseGlobalGC,
+ bool UseOdrIndicator, AsanDtorKind DestructorKind)
+ : Options(Options), UseGlobalGC(UseGlobalGC),
UseOdrIndicator(UseOdrIndicator), DestructorKind(DestructorKind) {}
PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M,
- AnalysisManager<Module> &AM) {
- GlobalsMetadata &GlobalsMD = AM.getResult<ASanGlobalsMetadataAnalysis>(M);
- ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover,
- UseGlobalGC, UseOdrIndicator,
- DestructorKind);
- if (Sanitizer.instrumentModule(M))
- return PreservedAnalyses::none();
- return PreservedAnalyses::all();
+ ModuleAnalysisManager &MAM) {
+ GlobalsMetadata &GlobalsMD = MAM.getResult<ASanGlobalsMetadataAnalysis>(M);
+ ModuleAddressSanitizer ModuleSanitizer(M, &GlobalsMD, Options.CompileKernel,
+ Options.Recover, UseGlobalGC,
+ UseOdrIndicator, DestructorKind);
+ bool Modified = false;
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ const StackSafetyGlobalInfo *const SSGI =
+ ClUseStackSafety ? &MAM.getResult<StackSafetyGlobalAnalysis>(M) : nullptr;
+ for (Function &F : M) {
+ AddressSanitizer FunctionSanitizer(
+ M, &GlobalsMD, SSGI, Options.CompileKernel, Options.Recover,
+ Options.UseAfterScope, Options.UseAfterReturn);
+ const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ Modified |= FunctionSanitizer.instrumentFunction(F, &TLI);
+ }
+ Modified |= ModuleSanitizer.instrumentModule(M);
+ return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
INITIALIZE_PASS(ASanGlobalsMetadataWrapperPass, "asan-globals-md",
@@ -1266,6 +1348,7 @@ INITIALIZE_PASS_BEGIN(
"AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
false)
INITIALIZE_PASS_DEPENDENCY(ASanGlobalsMetadataWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(
AddressSanitizerLegacyPass, "asan",
@@ -1404,7 +1487,7 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
return IsInteresting;
}
-bool AddressSanitizer::ignoreAccess(Value *Ptr) {
+bool AddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
// Instrument acesses from different address spaces only for AMDGPU.
Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
if (PtrTy->getPointerAddressSpace() != 0 &&
@@ -1425,6 +1508,10 @@ bool AddressSanitizer::ignoreAccess(Value *Ptr) {
if (ClSkipPromotableAllocas && !isInterestingAlloca(*AI))
return true;
+ if (SSGI != nullptr && SSGI->stackAccessIsSafe(*Inst) &&
+ findAllocaForValue(Ptr))
+ return true;
+
return false;
}
@@ -1439,22 +1526,22 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(LI, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(LI, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(LI, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), None);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(LI, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(), None);
@@ -1469,7 +1556,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
return;
auto BasePtr = CI->getOperand(OpOffset);
- if (ignoreAccess(BasePtr))
+ if (ignoreAccess(LI, BasePtr))
return;
auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
MaybeAlign Alignment = Align(1);
@@ -1479,9 +1566,9 @@ void AddressSanitizer::getInterestingMemoryOperands(
Value *Mask = CI->getOperand(2 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
} else {
- for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) {
+ for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(CI->getArgOperand(ArgNo)))
+ ignoreAccess(LI, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
@@ -1738,9 +1825,20 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
}
IRBuilder<> IRB(InsertBefore);
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+ const ASanAccessInfo AccessInfo(IsWrite, CompileKernel, AccessSizeIndex);
+
+ if (UseCalls && ClOptimizeCallbacks) {
+ const ASanAccessInfo AccessInfo(IsWrite, CompileKernel, AccessSizeIndex);
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ IRB.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::asan_check_memaccess),
+ {IRB.CreatePointerCast(Addr, Int8PtrTy),
+ ConstantInt::get(Int32Ty, AccessInfo.Packed)});
+ return;
+ }
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (UseCalls) {
if (Exp == 0)
IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][0][AccessSizeIndex],
@@ -1936,7 +2034,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
// Globals from llvm.metadata aren't emitted, do not instrument them.
if (Section == "llvm.metadata") return false;
// Do not instrument globals from special LLVM sections.
- if (Section.find("__llvm") != StringRef::npos || Section.find("__LLVM") != StringRef::npos) return false;
+ if (Section.contains("__llvm") || Section.contains("__LLVM"))
+ return false;
// Do not instrument function pointers to initialization and termination
// routines: dynamic linker will not properly handle redzones.
@@ -2133,8 +2232,7 @@ Instruction *ModuleAddressSanitizer::CreateAsanModuleDtor(Module &M) {
AsanDtorFunction = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(*C), false),
GlobalValue::InternalLinkage, 0, kAsanModuleDtorName, &M);
- AsanDtorFunction->addAttribute(AttributeList::FunctionIndex,
- Attribute::NoUnwind);
+ AsanDtorFunction->addFnAttr(Attribute::NoUnwind);
// Ensure Dtor cannot be discarded, even if in a comdat.
appendToUsed(M, {AsanDtorFunction});
BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
@@ -2753,7 +2851,7 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
if (II && II->getIntrinsicID() == Intrinsic::localescape) {
// We found a call. Mark all the allocas passed in as uninteresting.
- for (Value *Arg : II->arg_operands()) {
+ for (Value *Arg : II->args()) {
AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
assert(AI && AI->isStaticAlloca() &&
"non-static alloca arg to localescape");
@@ -2774,6 +2872,8 @@ bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) {
bool AddressSanitizer::instrumentFunction(Function &F,
const TargetLibraryInfo *TLI) {
+ if (F.empty())
+ return false;
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
if (F.getName().startswith("__asan_")) return false;
@@ -2916,7 +3016,8 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
if (LongSize != 32) return false;
CallInst *CI = dyn_cast<CallInst>(I);
if (!CI || !CI->isInlineAsm()) return false;
- if (CI->getNumArgOperands() <= 5) return false;
+ if (CI->arg_size() <= 5)
+ return false;
// We have inline assembly with quite a few arguments.
return true;
}
@@ -3112,7 +3213,7 @@ Value *FunctionStackPoisoner::createAllocaForLayout(
assert(Alloca->isStaticAlloca());
}
assert((ClRealignStack & (ClRealignStack - 1)) == 0);
- size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
+ uint64_t FrameAlignment = std::max(L.FrameAlignment, uint64_t(ClRealignStack));
Alloca->setAlignment(Align(FrameAlignment));
return IRB.CreatePointerCast(Alloca, IntptrTy);
}
@@ -3256,8 +3357,8 @@ void FunctionStackPoisoner::processStaticAllocas() {
// Minimal header size (left redzone) is 4 pointers,
// i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms.
- size_t Granularity = 1ULL << Mapping.Scale;
- size_t MinHeaderSize = std::max((size_t)ASan.LongSize / 2, Granularity);
+ uint64_t Granularity = 1ULL << Mapping.Scale;
+ uint64_t MinHeaderSize = std::max((uint64_t)ASan.LongSize / 2, Granularity);
const ASanStackFrameLayout &L =
ComputeASanStackFrameLayout(SVD, Granularity, MinHeaderSize);
@@ -3511,7 +3612,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
IRBuilder<> IRB(AI);
- const unsigned Alignment = std::max(kAllocaRzSize, AI->getAlignment());
+ const uint64_t Alignment = std::max(kAllocaRzSize, AI->getAlignment());
const uint64_t AllocaRedzoneMask = kAllocaRzSize - 1;
Value *Zero = Constant::getNullValue(IntptrTy);
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 9acd82c005e6..1a7f7a365ce4 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -53,6 +53,8 @@ static bool runCGProfilePass(
InstrProfSymtab Symtab;
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
Function *CalledF, uint64_t NewCount) {
+ if (NewCount == 0)
+ return;
if (!CalledF || !TTI.isLoweredToCall(CalledF) ||
CalledF->hasDLLImportStorageClass())
return;
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index 3b4d80dc8023..497aac30c3f6 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1553,11 +1553,11 @@ static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
SI->swapValues();
SI->swapProfMetadata();
if (Scope->TrueBiasedSelects.count(SI)) {
- assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
+ assert(!Scope->FalseBiasedSelects.contains(SI) &&
"Must not be already in");
Scope->FalseBiasedSelects.insert(SI);
} else if (Scope->FalseBiasedSelects.count(SI)) {
- assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
+ assert(!Scope->TrueBiasedSelects.contains(SI) &&
"Must not be already in");
Scope->TrueBiasedSelects.insert(SI);
}
@@ -1592,7 +1592,7 @@ static void insertTrivialPHIs(CHRScope *Scope,
SmallVector<Instruction *, 8> Users;
for (User *U : I.users()) {
if (auto *UI = dyn_cast<Instruction>(U)) {
- if (BlocksInScope.count(UI->getParent()) == 0 &&
+ if (!BlocksInScope.contains(UI->getParent()) &&
// Unless there's already a phi for I at the exit block.
!(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
CHR_DEBUG(dbgs() << "V " << I << "\n");
@@ -1752,7 +1752,7 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
// Create the combined branch condition and constant-fold the branches/selects
// in the hot path.
fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
- ProfileCount ? ProfileCount.getValue() : 0);
+ ProfileCount.getValueOr(0));
}
// A helper for transformScopes. Clone the blocks in the scope (excluding the
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 63aa84e4a77c..38c219ce3465 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -18,6 +18,9 @@
/// The analysis is based on automatic propagation of data flow labels (also
/// known as taint labels) through a program as it performs computation.
///
+/// Argument and return value labels are passed through TLS variables
+/// __dfsan_arg_tls and __dfsan_retval_tls.
+///
/// Each byte of application memory is backed by a shadow memory byte. The
/// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
/// laid out as follows:
@@ -144,20 +147,22 @@ static cl::opt<bool> ClPreserveAlignment(
// to the "native" (i.e. unsanitized) ABI. Unless the ABI list contains
// additional annotations for those functions, a call to one of those functions
// will produce a warning message, as the labelling behaviour of the function is
-// unknown. The other supported annotations are "functional" and "discard",
-// which are described below under DataFlowSanitizer::WrapperKind.
+// unknown. The other supported annotations for uninstrumented functions are
+// "functional" and "discard", which are described below under
+// DataFlowSanitizer::WrapperKind.
+// Functions will often be labelled with both "uninstrumented" and one of
+// "functional" or "discard". This will leave the function unchanged by this
+// pass, and create a wrapper function that will call the original.
+//
+// Instrumented functions can also be annotated as "force_zero_labels", which
+// will make all shadow and return values set zero labels.
+// Functions should never be labelled with both "force_zero_labels" and
+// "uninstrumented" or any of the unistrumented wrapper kinds.
static cl::list<std::string> ClABIListFiles(
"dfsan-abilist",
cl::desc("File listing native ABI functions and how the pass treats them"),
cl::Hidden);
-// Controls whether the pass uses IA_Args or IA_TLS as the ABI for instrumented
-// functions (see DataFlowSanitizer::InstrumentedABI below).
-static cl::opt<bool>
- ClArgsABI("dfsan-args-abi",
- cl::desc("Use the argument ABI rather than the TLS ABI"),
- cl::Hidden);
-
// Controls whether the pass includes or ignores the labels of pointers in load
// instructions.
static cl::opt<bool> ClCombinePointerLabelsOnLoad(
@@ -349,18 +354,18 @@ transformFunctionAttributes(const TransformedFunction &TransformedFunction,
for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
I < IE; ++I) {
unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
- ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttributes(I);
+ ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
}
// Copy annotations on varargs arguments.
for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
IE = CallSiteAttrs.getNumAttrSets();
I < IE; ++I) {
- ArgumentAttributes.push_back(CallSiteAttrs.getParamAttributes(I));
+ ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
}
- return AttributeList::get(Ctx, CallSiteAttrs.getFnAttributes(),
- CallSiteAttrs.getRetAttributes(),
+ return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
+ CallSiteAttrs.getRetAttrs(),
llvm::makeArrayRef(ArgumentAttributes));
}
@@ -372,17 +377,6 @@ class DataFlowSanitizer {
enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
- /// Which ABI should be used for instrumented functions?
- enum InstrumentedABI {
- /// Argument and return value labels are passed through additional
- /// arguments and by modifying the return type.
- IA_Args,
-
- /// Argument and return value labels are passed through TLS variables
- /// __dfsan_arg_tls and __dfsan_retval_tls.
- IA_TLS
- };
-
/// How should calls to uninstrumented functions be handled?
enum WrapperKind {
/// This function is present in an uninstrumented form but we don't know
@@ -400,9 +394,7 @@ class DataFlowSanitizer {
/// Instead of calling the function, a custom wrapper __dfsw_F is called,
/// where F is the name of the function. This function may wrap the
- /// original function or provide its own implementation. This is similar to
- /// the IA_Args ABI, except that IA_Args uses a struct return type to
- /// pass the return value shadow in a register, while WK_Custom uses an
+ /// original function or provide its own implementation. WK_Custom uses an
/// extra pointer argument to return the shadow. This allows the wrapped
/// form of the function type to be expressed in C.
WK_Custom
@@ -469,10 +461,9 @@ class DataFlowSanitizer {
getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
bool isInstrumented(const Function *F);
bool isInstrumented(const GlobalAlias *GA);
- FunctionType *getArgsFunctionType(FunctionType *T);
+ bool isForceZeroLabels(const Function *F);
FunctionType *getTrampolineFunctionType(FunctionType *T);
TransformedFunction getCustomFunctionType(FunctionType *T);
- InstrumentedABI getInstrumentedABI();
WrapperKind getWrapperKind(Function *F);
void addGlobalNameSuffix(GlobalValue *GV);
Function *buildWrapperFunction(Function *F, StringRef NewFName,
@@ -496,18 +487,11 @@ class DataFlowSanitizer {
/// Returns whether the pass tracks origins. Supports only TLS ABI mode.
bool shouldTrackOrigins();
- /// Returns whether the pass tracks labels for struct fields and array
- /// indices. Supports only TLS ABI mode.
- bool shouldTrackFieldsAndIndices();
-
/// Returns a zero constant with the shadow type of OrigTy.
///
/// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
/// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
/// getZeroShadow(other type) = i16(0)
- ///
- /// Note that a zero shadow is always i16(0) when shouldTrackFieldsAndIndices
- /// returns false.
Constant *getZeroShadow(Type *OrigTy);
/// Returns a zero constant with the shadow type of V's type.
Constant *getZeroShadow(Value *V);
@@ -520,9 +504,6 @@ class DataFlowSanitizer {
/// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
/// getShadowTy([n x T]) = [n x getShadowTy(T)]
/// getShadowTy(other type) = i16
- ///
- /// Note that a shadow type is always i16 when shouldTrackFieldsAndIndices
- /// returns false.
Type *getShadowTy(Type *OrigTy);
/// Returns the shadow type of of V's type.
Type *getShadowTy(Value *V);
@@ -539,8 +520,8 @@ struct DFSanFunction {
DataFlowSanitizer &DFS;
Function *F;
DominatorTree DT;
- DataFlowSanitizer::InstrumentedABI IA;
bool IsNativeABI;
+ bool IsForceZeroLabels;
AllocaInst *LabelReturnAlloca = nullptr;
AllocaInst *OriginReturnAlloca = nullptr;
DenseMap<Value *, Value *> ValShadowMap;
@@ -571,8 +552,10 @@ struct DFSanFunction {
DenseMap<Value *, Value *> CachedCollapsedShadows;
DenseMap<Value *, std::set<Value *>> ShadowElements;
- DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
- : DFS(DFS), F(F), IA(DFS.getInstrumentedABI()), IsNativeABI(IsNativeABI) {
+ DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
+ bool IsForceZeroLabels)
+ : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
+ IsForceZeroLabels(IsForceZeroLabels) {
DT.recalculate(*F);
}
@@ -787,17 +770,6 @@ DataFlowSanitizer::DataFlowSanitizer(
SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
}
-FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
- SmallVector<Type *, 4> ArgTypes(T->param_begin(), T->param_end());
- ArgTypes.append(T->getNumParams(), PrimitiveShadowTy);
- if (T->isVarArg())
- ArgTypes.push_back(PrimitiveShadowPtrTy);
- Type *RetType = T->getReturnType();
- if (!RetType->isVoidTy())
- RetType = StructType::get(RetType, PrimitiveShadowTy);
- return FunctionType::get(RetType, ArgTypes, T->isVarArg());
-}
-
FunctionType *DataFlowSanitizer::getTrampolineFunctionType(FunctionType *T) {
assert(!T->isVarArg());
SmallVector<Type *, 4> ArgTypes;
@@ -861,9 +833,6 @@ TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
}
bool DataFlowSanitizer::isZeroShadow(Value *V) {
- if (!shouldTrackFieldsAndIndices())
- return ZeroPrimitiveShadow == V;
-
Type *T = V->getType();
if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
@@ -880,19 +849,11 @@ bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
}
bool DataFlowSanitizer::shouldTrackOrigins() {
- static const bool ShouldTrackOrigins =
- ClTrackOrigins && getInstrumentedABI() == DataFlowSanitizer::IA_TLS;
+ static const bool ShouldTrackOrigins = ClTrackOrigins;
return ShouldTrackOrigins;
}
-bool DataFlowSanitizer::shouldTrackFieldsAndIndices() {
- return getInstrumentedABI() == DataFlowSanitizer::IA_TLS;
-}
-
Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
- if (!shouldTrackFieldsAndIndices())
- return ZeroPrimitiveShadow;
-
if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
return ZeroPrimitiveShadow;
Type *ShadowTy = getShadowTy(OrigTy);
@@ -992,8 +953,6 @@ Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
return Shadow;
- assert(DFS.shouldTrackFieldsAndIndices());
-
// Checks if the cached collapsed shadow value dominates Pos.
Value *&CS = CachedCollapsedShadows[Shadow];
if (CS && DT.dominates(CS, Pos))
@@ -1007,9 +966,6 @@ Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
}
Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
- if (!shouldTrackFieldsAndIndices())
- return PrimitiveShadowTy;
-
if (!OrigTy->isSized())
return PrimitiveShadowTy;
if (isa<IntegerType>(OrigTy))
@@ -1107,8 +1063,8 @@ bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
return !ABIList.isIn(*GA, "uninstrumented");
}
-DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() {
- return ClArgsABI ? IA_Args : IA_TLS;
+bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
+ return ABIList.isIn(*F, "force_zero_labels");
}
DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
@@ -1139,7 +1095,7 @@ void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
Pos = Asm.find("@");
if (Pos == std::string::npos)
- report_fatal_error("unsupported .symver: " + Asm);
+ report_fatal_error(Twine("unsupported .symver: ", Asm));
Asm.replace(Pos, 1, Suffix + "@");
GV->getParent()->setModuleInlineAsm(Asm);
@@ -1154,14 +1110,12 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
NewFName, F->getParent());
NewF->copyAttributesFrom(F);
- NewF->removeAttributes(
- AttributeList::ReturnIndex,
+ NewF->removeRetAttrs(
AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
if (F->isVarArg()) {
- NewF->removeAttributes(AttributeList::FunctionIndex,
- AttrBuilder().addAttribute("split-stack"));
+ NewF->removeFnAttrs(AttrBuilder().addAttribute("split-stack"));
CallInst::Create(DFSanVarargWrapperFn,
IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
BB);
@@ -1199,7 +1153,8 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
// F is called by a wrapped custom function with primitive shadows. So
// its arguments and return value need conversion.
- DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
+ DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true,
+ /*ForceZeroLabels=*/false);
Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI;
++ValAI;
for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N) {
@@ -1238,23 +1193,17 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
{
AttributeList AL;
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::NoUnwind);
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::ReadOnly);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanUnionLoadFn =
Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
}
{
AttributeList AL;
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::NoUnwind);
- AL = AL.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::ReadOnly);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
+ AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
"__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
}
@@ -1274,8 +1223,7 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
{
AttributeList AL;
AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
DFSanChainOriginFnTy, AL);
}
@@ -1283,8 +1231,7 @@ void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
AttributeList AL;
AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
- AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex,
- Attribute::ZExt);
+ AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
"__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
}
@@ -1409,34 +1356,32 @@ bool DataFlowSanitizer::runImpl(Module &M) {
std::vector<Function *> FnsToInstrument;
SmallPtrSet<Function *, 2> FnsWithNativeABI;
+ SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
for (Function &F : M)
if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F))
FnsToInstrument.push_back(&F);
// Give function aliases prefixes when necessary, and build wrappers where the
// instrumentedness is inconsistent.
- for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
- AI != AE;) {
- GlobalAlias *GA = &*AI;
- ++AI;
+ for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
// Don't stop on weak. We assume people aren't playing games with the
// instrumentedness of overridden weak aliases.
- auto *F = dyn_cast<Function>(GA->getBaseObject());
+ auto *F = dyn_cast<Function>(GA.getAliaseeObject());
if (!F)
continue;
- bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
+ bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
if (GAInst && FInst) {
- addGlobalNameSuffix(GA);
+ addGlobalNameSuffix(&GA);
} else if (GAInst != FInst) {
// Non-instrumented alias of an instrumented function, or vice versa.
// Replace the alias with a native-ABI wrapper of the aliasee. The pass
// below will take care of instrumenting it.
Function *NewF =
- buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType());
- GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType()));
- NewF->takeName(GA);
- GA->eraseFromParent();
+ buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
+ GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType()));
+ NewF->takeName(&GA);
+ GA.eraseFromParent();
FnsToInstrument.push_back(NewF);
}
}
@@ -1456,50 +1401,17 @@ bool DataFlowSanitizer::runImpl(Module &M) {
FT->getReturnType()->isVoidTy());
if (isInstrumented(&F)) {
+ if (isForceZeroLabels(&F))
+ FnsWithForceZeroLabel.insert(&F);
+
// Instrumented functions get a '.dfsan' suffix. This allows us to more
// easily identify cases of mismatching ABIs. This naming scheme is
// mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
- if (getInstrumentedABI() == IA_Args && !IsZeroArgsVoidRet) {
- FunctionType *NewFT = getArgsFunctionType(FT);
- Function *NewF = Function::Create(NewFT, F.getLinkage(),
- F.getAddressSpace(), "", &M);
- NewF->copyAttributesFrom(&F);
- NewF->removeAttributes(
- AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
- for (Function::arg_iterator FArg = F.arg_begin(),
- NewFArg = NewF->arg_begin(),
- FArgEnd = F.arg_end();
- FArg != FArgEnd; ++FArg, ++NewFArg) {
- FArg->replaceAllUsesWith(&*NewFArg);
- }
- NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
-
- for (Function::user_iterator UI = F.user_begin(), UE = F.user_end();
- UI != UE;) {
- BlockAddress *BA = dyn_cast<BlockAddress>(*UI);
- ++UI;
- if (BA) {
- BA->replaceAllUsesWith(
- BlockAddress::get(NewF, BA->getBasicBlock()));
- delete BA;
- }
- }
- F.replaceAllUsesWith(
- ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT)));
- NewF->takeName(&F);
- F.eraseFromParent();
- *FI = NewF;
- addGlobalNameSuffix(NewF);
- } else {
- addGlobalNameSuffix(&F);
- }
+ addGlobalNameSuffix(&F);
} else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
// Build a wrapper function for F. The wrapper simply calls F, and is
// added to FnsToInstrument so that any instrumentation according to its
// WrapperKind is done in the second pass below.
- FunctionType *NewFT =
- getInstrumentedABI() == IA_Args ? getArgsFunctionType(FT) : FT;
// If the function being wrapped has local linkage, then preserve the
// function's linkage in the wrapper function.
@@ -1511,9 +1423,8 @@ bool DataFlowSanitizer::runImpl(Module &M) {
&F,
(shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
std::string(F.getName()),
- WrapperLinkage, NewFT);
- if (getInstrumentedABI() == IA_TLS)
- NewF->removeAttributes(AttributeList::FunctionIndex, ReadOnlyNoneAttrs);
+ WrapperLinkage, FT);
+ NewF->removeFnAttrs(ReadOnlyNoneAttrs);
Value *WrappedFnCst =
ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
@@ -1552,7 +1463,8 @@ bool DataFlowSanitizer::runImpl(Module &M) {
removeUnreachableBlocks(*F);
- DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F));
+ DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
+ FnsWithForceZeroLabel.count(F));
// DFSanVisitor may create new basic blocks, which confuses df_iterator.
// Build a copy of the list before iterating over it.
@@ -1649,23 +1561,14 @@ Value *DFSanFunction::getOrigin(Value *V) {
if (Argument *A = dyn_cast<Argument>(V)) {
if (IsNativeABI)
return DFS.ZeroOrigin;
- switch (IA) {
- case DataFlowSanitizer::IA_TLS: {
- if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
- Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
- IRBuilder<> IRB(ArgOriginTLSPos);
- Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
- Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
- } else {
- // Overflow
- Origin = DFS.ZeroOrigin;
- }
- break;
- }
- case DataFlowSanitizer::IA_Args: {
+ if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
+ Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
+ IRBuilder<> IRB(ArgOriginTLSPos);
+ Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
+ Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
+ } else {
+ // Overflow
Origin = DFS.ZeroOrigin;
- break;
- }
}
} else {
Origin = DFS.ZeroOrigin;
@@ -1716,25 +1619,14 @@ Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
Value *DFSanFunction::getShadow(Value *V) {
if (!isa<Argument>(V) && !isa<Instruction>(V))
return DFS.getZeroShadow(V);
+ if (IsForceZeroLabels)
+ return DFS.getZeroShadow(V);
Value *&Shadow = ValShadowMap[V];
if (!Shadow) {
if (Argument *A = dyn_cast<Argument>(V)) {
if (IsNativeABI)
return DFS.getZeroShadow(V);
- switch (IA) {
- case DataFlowSanitizer::IA_TLS: {
- Shadow = getShadowForTLSArgument(A);
- break;
- }
- case DataFlowSanitizer::IA_Args: {
- unsigned ArgIdx = A->getArgNo() + F->arg_size() / 2;
- Function::arg_iterator Arg = F->arg_begin();
- std::advance(Arg, ArgIdx);
- Shadow = &*Arg;
- assert(Shadow->getType() == DFS.PrimitiveShadowTy);
- break;
- }
- }
+ Shadow = getShadowForTLSArgument(A);
NonZeroChecks.push_back(Shadow);
} else {
Shadow = DFS.getZeroShadow(V);
@@ -1745,8 +1637,6 @@ Value *DFSanFunction::getShadow(Value *V) {
void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
assert(!ValShadowMap.count(I));
- assert(DFS.shouldTrackFieldsAndIndices() ||
- Shadow->getType() == DFS.PrimitiveShadowTy);
ValShadowMap[I] = Shadow;
}
@@ -2124,7 +2014,7 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
{IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
ConstantInt::get(DFS.IntptrTy, Size)});
- Call->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ Call->addRetAttr(Attribute::ZExt);
return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
DFS.PrimitiveShadowTy),
IRB.CreateTrunc(Call, DFS.OriginTy)};
@@ -2171,7 +2061,7 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
IRBuilder<> IRB(Pos);
CallInst *FallbackCall = IRB.CreateCall(
DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
- FallbackCall->addAttribute(AttributeList::ReturnIndex, Attribute::ZExt);
+ FallbackCall->addRetAttr(Attribute::ZExt);
return {FallbackCall, Origin};
}
@@ -2563,15 +2453,12 @@ void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
}
void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
- // Special case: if this is the bitcast (there is exactly 1 allowed) between
- // a musttail call and a ret, don't instrument. New instructions are not
- // allowed after a musttail call.
- if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
- if (CI->isMustTailCall())
- return;
- }
- // TODO: handle musttail call returns for IA_Args.
+ // Special case: if this is the bitcast (there is exactly 1 allowed) between
+ // a musttail call and a ret, don't instrument. New instructions are not
+ // allowed after a musttail call.
+ if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
+ if (CI->isMustTailCall())
+ return;
visitInstOperands(BCI);
}
@@ -2629,11 +2516,6 @@ void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
}
void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
- if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
- visitInstOperands(I);
- return;
- }
-
IRBuilder<> IRB(&I);
Value *Agg = I.getAggregateOperand();
Value *AggShadow = DFSF.getShadow(Agg);
@@ -2643,11 +2525,6 @@ void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
}
void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
- if (!DFSF.DFS.shouldTrackFieldsAndIndices()) {
- visitInstOperands(I);
- return;
- }
-
IRBuilder<> IRB(&I);
Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
@@ -2798,41 +2675,22 @@ static bool isAMustTailRetVal(Value *RetVal) {
void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
if (!DFSF.IsNativeABI && RI.getReturnValue()) {
- switch (DFSF.IA) {
- case DataFlowSanitizer::IA_TLS: {
- // Don't emit the instrumentation for musttail call returns.
- if (isAMustTailRetVal(RI.getReturnValue()))
- return;
-
- Value *S = DFSF.getShadow(RI.getReturnValue());
- IRBuilder<> IRB(&RI);
- Type *RT = DFSF.F->getFunctionType()->getReturnType();
- unsigned Size =
- getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
- if (Size <= RetvalTLSSize) {
- // If the size overflows, stores nothing. At callsite, oversized return
- // shadows are set to zero.
- IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB),
- ShadowTLSAlignment);
- }
- if (DFSF.DFS.shouldTrackOrigins()) {
- Value *O = DFSF.getOrigin(RI.getReturnValue());
- IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
- }
- break;
- }
- case DataFlowSanitizer::IA_Args: {
- // TODO: handle musttail call returns for IA_Args.
-
- IRBuilder<> IRB(&RI);
- Type *RT = DFSF.F->getFunctionType()->getReturnType();
- Value *InsVal =
- IRB.CreateInsertValue(UndefValue::get(RT), RI.getReturnValue(), 0);
- Value *InsShadow =
- IRB.CreateInsertValue(InsVal, DFSF.getShadow(RI.getReturnValue()), 1);
- RI.setOperand(0, InsShadow);
- break;
+ // Don't emit the instrumentation for musttail call returns.
+ if (isAMustTailRetVal(RI.getReturnValue()))
+ return;
+
+ Value *S = DFSF.getShadow(RI.getReturnValue());
+ IRBuilder<> IRB(&RI);
+ Type *RT = DFSF.F->getFunctionType()->getReturnType();
+ unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
+ if (Size <= RetvalTLSSize) {
+ // If the size overflows, stores nothing. At callsite, oversized return
+ // shadows are set to zero.
+ IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
}
+ if (DFSF.DFS.shouldTrackOrigins()) {
+ Value *O = DFSF.getOrigin(RI.getReturnValue());
+ IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
}
}
}
@@ -2953,8 +2811,7 @@ bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
// Custom functions returning non-void will write to the return label.
if (!FT->getReturnType()->isVoidTy()) {
- CustomFn->removeAttributes(AttributeList::FunctionIndex,
- DFSF.DFS.ReadOnlyNoneAttrs);
+ CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
}
}
@@ -3056,32 +2913,30 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
FunctionType *FT = CB.getFunctionType();
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
- // Stores argument shadows.
- unsigned ArgOffset = 0;
- const DataLayout &DL = getDataLayout();
- for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
- if (ShouldTrackOrigins) {
- // Ignore overflowed origins
- Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
- if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
- !DFSF.DFS.isZeroShadow(ArgShadow))
- IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
- DFSF.getArgOriginTLS(I, IRB));
- }
+ const DataLayout &DL = getDataLayout();
- unsigned Size =
- DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
- // Stop storing if arguments' size overflows. Inside a function, arguments
- // after overflow have zero shadow values.
- if (ArgOffset + Size > ArgTLSSize)
- break;
- IRB.CreateAlignedStore(
- DFSF.getShadow(CB.getArgOperand(I)),
- DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
- ShadowTLSAlignment);
- ArgOffset += alignTo(Size, ShadowTLSAlignment);
+ // Stores argument shadows.
+ unsigned ArgOffset = 0;
+ for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
+ if (ShouldTrackOrigins) {
+ // Ignore overflowed origins
+ Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
+ if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
+ !DFSF.DFS.isZeroShadow(ArgShadow))
+ IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
+ DFSF.getArgOriginTLS(I, IRB));
}
+
+ unsigned Size =
+ DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
+ // Stop storing if arguments' size overflows. Inside a function, arguments
+ // after overflow have zero shadow values.
+ if (ArgOffset + Size > ArgTLSSize)
+ break;
+ IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
+ DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
+ ShadowTLSAlignment);
+ ArgOffset += alignTo(Size, ShadowTLSAlignment);
}
Instruction *Next = nullptr;
@@ -3099,99 +2954,31 @@ void DFSanVisitor::visitCallBase(CallBase &CB) {
Next = CB.getNextNode();
}
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_TLS) {
- // Don't emit the epilogue for musttail call returns.
- if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
- return;
-
- // Loads the return value shadow.
- IRBuilder<> NextIRB(Next);
- const DataLayout &DL = getDataLayout();
- unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
- if (Size > RetvalTLSSize) {
- // Set overflowed return shadow to be zero.
- DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
- } else {
- LoadInst *LI = NextIRB.CreateAlignedLoad(
- DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
- ShadowTLSAlignment, "_dfsret");
- DFSF.SkipInsts.insert(LI);
- DFSF.setShadow(&CB, LI);
- DFSF.NonZeroChecks.push_back(LI);
- }
-
- if (ShouldTrackOrigins) {
- LoadInst *LI = NextIRB.CreateLoad(
- DFSF.DFS.OriginTy, DFSF.getRetvalOriginTLS(), "_dfsret_o");
- DFSF.SkipInsts.insert(LI);
- DFSF.setOrigin(&CB, LI);
- }
- }
- }
-
- // Do all instrumentation for IA_Args down here to defer tampering with the
- // CFG in a way that SplitEdge may be able to detect.
- if (DFSF.DFS.getInstrumentedABI() == DataFlowSanitizer::IA_Args) {
- // TODO: handle musttail call returns for IA_Args.
-
- FunctionType *NewFT = DFSF.DFS.getArgsFunctionType(FT);
- Value *Func =
- IRB.CreateBitCast(CB.getCalledOperand(), PointerType::getUnqual(NewFT));
-
- const unsigned NumParams = FT->getNumParams();
-
- // Copy original arguments.
- auto *ArgIt = CB.arg_begin(), *ArgEnd = CB.arg_end();
- std::vector<Value *> Args(NumParams);
- std::copy_n(ArgIt, NumParams, Args.begin());
-
- // Add shadow arguments by transforming original arguments.
- std::generate_n(std::back_inserter(Args), NumParams,
- [&]() { return DFSF.getShadow(*ArgIt++); });
-
- if (FT->isVarArg()) {
- unsigned VarArgSize = CB.arg_size() - NumParams;
- ArrayType *VarArgArrayTy =
- ArrayType::get(DFSF.DFS.PrimitiveShadowTy, VarArgSize);
- AllocaInst *VarArgShadow =
- new AllocaInst(VarArgArrayTy, getDataLayout().getAllocaAddrSpace(),
- "", &DFSF.F->getEntryBlock().front());
- Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
-
- // Copy remaining var args.
- unsigned GepIndex = 0;
- std::for_each(ArgIt, ArgEnd, [&](Value *Arg) {
- IRB.CreateStore(
- DFSF.getShadow(Arg),
- IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, GepIndex++));
- Args.push_back(Arg);
- });
- }
+ // Don't emit the epilogue for musttail call returns.
+ if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
+ return;
- CallBase *NewCB;
- if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
- NewCB = IRB.CreateInvoke(NewFT, Func, II->getNormalDest(),
- II->getUnwindDest(), Args);
+ // Loads the return value shadow.
+ IRBuilder<> NextIRB(Next);
+ unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
+ if (Size > RetvalTLSSize) {
+ // Set overflowed return shadow to be zero.
+ DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
} else {
- NewCB = IRB.CreateCall(NewFT, Func, Args);
- }
- NewCB->setCallingConv(CB.getCallingConv());
- NewCB->setAttributes(CB.getAttributes().removeAttributes(
- *DFSF.DFS.Ctx, AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCB->getType())));
-
- if (Next) {
- ExtractValueInst *ExVal = ExtractValueInst::Create(NewCB, 0, "", Next);
- DFSF.SkipInsts.insert(ExVal);
- ExtractValueInst *ExShadow = ExtractValueInst::Create(NewCB, 1, "", Next);
- DFSF.SkipInsts.insert(ExShadow);
- DFSF.setShadow(ExVal, ExShadow);
- DFSF.NonZeroChecks.push_back(ExShadow);
-
- CB.replaceAllUsesWith(ExVal);
+ LoadInst *LI = NextIRB.CreateAlignedLoad(
+ DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
+ ShadowTLSAlignment, "_dfsret");
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setShadow(&CB, LI);
+ DFSF.NonZeroChecks.push_back(LI);
}
- CB.eraseFromParent();
+ if (ShouldTrackOrigins) {
+ LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
+ DFSF.getRetvalOriginTLS(), "_dfsret_o");
+ DFSF.SkipInsts.insert(LI);
+ DFSF.setOrigin(&CB, LI);
+ }
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index c99f2e66b1cc..325089fc4402 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -86,7 +86,7 @@ GCOVOptions GCOVOptions::getDefault() {
Options.Atomic = AtomicCounter;
if (DefaultGCOVVersion.size() != 4) {
- llvm::report_fatal_error(std::string("Invalid -default-gcov-version: ") +
+ llvm::report_fatal_error(Twine("Invalid -default-gcov-version: ") +
DefaultGCOVVersion);
}
memcpy(Options.Version, DefaultGCOVVersion.c_str(), 4);
@@ -1373,12 +1373,16 @@ Function *GCOVProfiler::insertReset(
BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", ResetF);
IRBuilder<> Builder(Entry);
+ LLVMContext &C = Entry->getContext();
// Zero out the counters.
for (const auto &I : CountersBySP) {
GlobalVariable *GV = I.first;
- Constant *Null = Constant::getNullValue(GV->getValueType());
- Builder.CreateStore(Null, GV);
+ auto *GVTy = cast<ArrayType>(GV->getValueType());
+ Builder.CreateMemSet(GV, Constant::getNullValue(Type::getInt8Ty(C)),
+ GVTy->getNumElements() *
+ GVTy->getElementType()->getScalarSizeInBits() / 8,
+ GV->getAlign());
}
Type *RetTy = ResetF->getReturnType();
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 60a4ee8811fb..62c265e40dab 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -17,7 +17,10 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
@@ -26,6 +29,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
@@ -41,6 +45,7 @@
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -115,6 +120,17 @@ static cl::opt<bool>
cl::Hidden, cl::desc("Use Stack Safety analysis results"),
cl::Optional);
+static cl::opt<size_t> ClMaxLifetimes(
+ "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
+ cl::ReallyHidden,
+ cl::desc("How many lifetime ends to handle for a single alloca."),
+ cl::Optional);
+
+static cl::opt<bool>
+ ClUseAfterScope("hwasan-use-after-scope",
+ cl::desc("detect use after scope within function"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClUARRetagToZero(
"hwasan-uar-retag-to-zero",
cl::desc("Clear alloca tags before returning from the function to allow "
@@ -220,9 +236,21 @@ bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
return shouldInstrumentStack(TargetTriple) &&
mightUseStackSafetyAnalysis(DisableOptimization);
}
+
+bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
+ return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
+}
+
/// An instrumentation pass implementing detection of addressability bugs
/// using tagged pointers.
class HWAddressSanitizer {
+private:
+ struct AllocaInfo {
+ AllocaInst *AI;
+ SmallVector<IntrinsicInst *, 2> LifetimeStart;
+ SmallVector<IntrinsicInst *, 2> LifetimeEnd;
+ };
+
public:
HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
const StackSafetyGlobalInfo *SSI)
@@ -237,7 +265,11 @@ public:
void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
- bool sanitizeFunction(Function &F);
+ DenseMap<AllocaInst *, AllocaInst *> padInterestingAllocas(
+ const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument);
+ bool sanitizeFunction(Function &F,
+ llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT);
void initializeModule();
void createHwasanCtorComdat();
@@ -250,23 +282,34 @@ public:
void untagPointerOperand(Instruction *I, Value *Addr);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+
+ int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
+ void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
+ unsigned AccessSizeIndex,
+ Instruction *InsertBefore);
void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore);
+ bool ignoreMemIntrinsic(MemIntrinsic *MI);
void instrumentMemIntrinsic(MemIntrinsic *MI);
bool instrumentMemAccess(InterestingMemoryOperand &O);
- bool ignoreAccess(Value *Ptr);
+ bool ignoreAccess(Instruction *Inst, Value *Ptr);
void getInterestingMemoryOperands(
Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
bool isInterestingAlloca(const AllocaInst &AI);
- bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
+ void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
+ static bool isStandardLifetime(const AllocaInfo &AllocaInfo,
+ const DominatorTree &DT);
bool instrumentStack(
- SmallVectorImpl<AllocaInst *> &Allocas,
+ MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
+ SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
+ llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT);
Value *readRegister(IRBuilder<> &IRB, StringRef Name);
bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
Value *getNextTagWithCall(IRBuilder<> &IRB);
@@ -313,8 +356,9 @@ private:
bool WithFrameRecord;
void init(Triple &TargetTriple, bool InstrumentWithCalls);
- unsigned getObjectAlignment() const { return 1U << Scale; }
+ uint64_t getObjectAlignment() const { return 1ULL << Scale; }
};
+
ShadowMapping Mapping;
Type *VoidTy = Type::getVoidTy(M.getContext());
@@ -331,6 +375,7 @@ private:
bool InstrumentLandingPads;
bool InstrumentWithCalls;
bool InstrumentStack;
+ bool DetectUseAfterScope;
bool UsePageAliases;
bool HasMatchAllTag = false;
@@ -377,14 +422,21 @@ public:
}
bool runOnFunction(Function &F) override {
- if (shouldUseStackSafetyAnalysis(Triple(F.getParent()->getTargetTriple()),
- DisableOptimization)) {
+ auto TargetTriple = Triple(F.getParent()->getTargetTriple());
+ if (shouldUseStackSafetyAnalysis(TargetTriple, DisableOptimization)) {
// We cannot call getAnalysis in doInitialization, that would cause a
// crash as the required analyses are not initialized yet.
HWASan->setSSI(
&getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult());
}
- return HWASan->sanitizeFunction(F);
+ return HWASan->sanitizeFunction(
+ F,
+ [&]() -> const DominatorTree & {
+ return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ },
+ [&]() -> const PostDominatorTree & {
+ return getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ });
}
bool doFinalization(Module &M) override {
@@ -399,6 +451,8 @@ public:
// This is so we don't need to plumb TargetTriple all the way to here.
if (mightUseStackSafetyAnalysis(DisableOptimization))
AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<PostDominatorTreeWrapperPass>();
}
private:
@@ -417,6 +471,8 @@ INITIALIZE_PASS_BEGIN(
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
false)
INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
INITIALIZE_PASS_END(
HWAddressSanitizerLegacyPass, "hwasan",
"HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
@@ -430,25 +486,41 @@ llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, bool Recover,
DisableOptimization);
}
-HWAddressSanitizerPass::HWAddressSanitizerPass(bool CompileKernel, bool Recover,
- bool DisableOptimization)
- : CompileKernel(CompileKernel), Recover(Recover),
- DisableOptimization(DisableOptimization) {}
-
PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
const StackSafetyGlobalInfo *SSI = nullptr;
- if (shouldUseStackSafetyAnalysis(llvm::Triple(M.getTargetTriple()),
- DisableOptimization))
+ auto TargetTriple = llvm::Triple(M.getTargetTriple());
+ if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
- HWAddressSanitizer HWASan(M, CompileKernel, Recover, SSI);
+
+ HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
bool Modified = false;
- for (Function &F : M)
- Modified |= HWASan.sanitizeFunction(F);
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ for (Function &F : M) {
+ Modified |= HWASan.sanitizeFunction(
+ F,
+ [&]() -> const DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
+ },
+ [&]() -> const PostDominatorTree & {
+ return FAM.getResult<PostDominatorTreeAnalysis>(F);
+ });
+ }
if (Modified)
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
+void HWAddressSanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.CompileKernel)
+ OS << "kernel;";
+ if (Options.Recover)
+ OS << "recover";
+ OS << ">";
+}
void HWAddressSanitizer::createHwasanCtorComdat() {
std::tie(HwasanCtorFunction, std::ignore) =
@@ -566,6 +638,7 @@ void HWAddressSanitizer::initializeModule() {
UsePageAliases = shouldUsePageAliases(TargetTriple);
InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
InstrumentStack = shouldInstrumentStack(TargetTriple);
+ DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
PointerTagShift = IsX86_64 ? 57 : 56;
TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
@@ -712,7 +785,7 @@ Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
}
}
-bool HWAddressSanitizer::ignoreAccess(Value *Ptr) {
+bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
// Do not instrument acesses from different address spaces; we cannot deal
// with them.
Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
@@ -726,6 +799,12 @@ bool HWAddressSanitizer::ignoreAccess(Value *Ptr) {
if (Ptr->isSwiftError())
return true;
+ if (findAllocaForValue(Ptr)) {
+ if (!InstrumentStack)
+ return true;
+ if (SSI && SSI->stackAccessIsSafe(*Inst))
+ return true;
+ }
return false;
}
@@ -740,29 +819,29 @@ void HWAddressSanitizer::getInterestingMemoryOperands(
return;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads || ignoreAccess(LI->getPointerOperand()))
+ if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
return;
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites || ignoreAccess(SI->getPointerOperand()))
+ if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
return;
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(RMW->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
return;
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), None);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics || ignoreAccess(XCHG->getPointerOperand()))
+ if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
return;
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(), None);
} else if (auto CI = dyn_cast<CallInst>(I)) {
- for (unsigned ArgNo = 0; ArgNo < CI->getNumArgOperands(); ArgNo++) {
+ for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
- ignoreAccess(CI->getArgOperand(ArgNo)))
+ ignoreAccess(I, CI->getArgOperand(ArgNo)))
continue;
Type *Ty = CI->getParamByValType(ArgNo);
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
@@ -809,30 +888,38 @@ Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
}
+int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
+ unsigned AccessSizeIndex) {
+ return (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
+ (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
+ (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
+ (Recover << HWASanAccessInfo::RecoverShift) +
+ (IsWrite << HWASanAccessInfo::IsWriteShift) +
+ (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
+}
+
+void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
+ unsigned AccessSizeIndex,
+ Instruction *InsertBefore) {
+ assert(!UsePageAliases);
+ const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
+ IRBuilder<> IRB(InsertBefore);
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
+ IRB.CreateCall(Intrinsic::getDeclaration(
+ M, UseShortGranules
+ ? Intrinsic::hwasan_check_memaccess_shortgranules
+ : Intrinsic::hwasan_check_memaccess),
+ {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
+}
+
void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
unsigned AccessSizeIndex,
Instruction *InsertBefore) {
assert(!UsePageAliases);
- const int64_t AccessInfo =
- (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
- (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
- (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
- (Recover << HWASanAccessInfo::RecoverShift) +
- (IsWrite << HWASanAccessInfo::IsWriteShift) +
- (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
+ const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
IRBuilder<> IRB(InsertBefore);
- if (OutlinedChecks) {
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
- IRB.CreateCall(Intrinsic::getDeclaration(
- M, UseShortGranules
- ? Intrinsic::hwasan_check_memaccess_shortgranules
- : Intrinsic::hwasan_check_memaccess),
- {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
- return;
- }
-
Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
IRB.getInt8Ty());
@@ -908,6 +995,16 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
}
+bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+ return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
+ (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
+ }
+ if (isa<MemSetInst>(MI))
+ return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
+ return false;
+}
+
void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
IRBuilder<> IRB(MI);
if (isa<MemTransferInst>(MI)) {
@@ -943,6 +1040,8 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
if (InstrumentWithCalls) {
IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
IRB.CreatePointerCast(Addr, IntptrTy));
+ } else if (OutlinedChecks) {
+ instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
} else {
instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
}
@@ -968,7 +1067,7 @@ static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
return SizeInBytes * ArraySize;
}
-bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
+void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
size_t Size) {
size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
if (!UseShortGranules)
@@ -999,7 +1098,6 @@ bool HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
AlignedSize - 1));
}
}
- return true;
}
unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
@@ -1231,17 +1329,53 @@ bool HWAddressSanitizer::instrumentLandingPads(
return true;
}
+static bool
+maybeReachableFromEachOther(const SmallVectorImpl<IntrinsicInst *> &Insts,
+ const DominatorTree &DT) {
+ // If we have too many lifetime ends, give up, as the algorithm below is N^2.
+ if (Insts.size() > ClMaxLifetimes)
+ return true;
+ for (size_t I = 0; I < Insts.size(); ++I) {
+ for (size_t J = 0; J < Insts.size(); ++J) {
+ if (I == J)
+ continue;
+ if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, &DT))
+ return true;
+ }
+ }
+ return false;
+}
+
+// static
+bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo,
+ const DominatorTree &DT) {
+ // An alloca that has exactly one start and end in every possible execution.
+ // If it has multiple ends, they have to be unreachable from each other, so
+ // at most one of them is actually used for each execution of the function.
+ return AllocaInfo.LifetimeStart.size() == 1 &&
+ (AllocaInfo.LifetimeEnd.size() == 1 ||
+ (AllocaInfo.LifetimeEnd.size() > 0 &&
+ !maybeReachableFromEachOther(AllocaInfo.LifetimeEnd, DT)));
+}
+
bool HWAddressSanitizer::instrumentStack(
- SmallVectorImpl<AllocaInst *> &Allocas,
+ MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
+ SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
- SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
+ SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
+ llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT) {
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
// alloca addresses using that. Unfortunately, offsets are not known yet
// (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
// temp, shift-OR it into each alloca address and xor with the retag mask.
// This generates one extra instruction per alloca use.
- for (unsigned N = 0; N < Allocas.size(); ++N) {
- auto *AI = Allocas[N];
+ unsigned int I = 0;
+
+ for (auto &KV : AllocasToInstrument) {
+ auto N = I++;
+ auto *AI = KV.first;
+ AllocaInfo &Info = KV.second;
IRBuilder<> IRB(AI->getNextNode());
// Replace uses of the alloca with tagged address.
@@ -1268,17 +1402,40 @@ bool HWAddressSanitizer::instrumentStack(
}
size_t Size = getAllocaSizeInBytes(*AI);
- tagAlloca(IRB, AI, Tag, Size);
-
- for (auto RI : RetVec) {
- IRB.SetInsertPoint(RI);
-
- // Re-tag alloca memory with the special UAR tag.
- Value *Tag = getUARTag(IRB, StackTag);
- tagAlloca(IRB, AI, Tag, alignTo(Size, Mapping.getObjectAlignment()));
+ size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ bool StandardLifetime =
+ UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
+ if (DetectUseAfterScope && StandardLifetime) {
+ IntrinsicInst *Start = Info.LifetimeStart[0];
+ IRB.SetInsertPoint(Start->getNextNode());
+ auto TagEnd = [&](Instruction *Node) {
+ IRB.SetInsertPoint(Node);
+ Value *UARTag = getUARTag(IRB, StackTag);
+ tagAlloca(IRB, AI, UARTag, AlignedSize);
+ };
+ tagAlloca(IRB, AI, Tag, Size);
+ if (!forAllReachableExits(GetDT(), GetPDT(), Start, Info.LifetimeEnd,
+ RetVec, TagEnd)) {
+ for (auto *End : Info.LifetimeEnd)
+ End->eraseFromParent();
+ }
+ } else {
+ tagAlloca(IRB, AI, Tag, Size);
+ for (auto *RI : RetVec) {
+ IRB.SetInsertPoint(RI);
+ Value *UARTag = getUARTag(IRB, StackTag);
+ tagAlloca(IRB, AI, UARTag, AlignedSize);
+ }
+ if (!StandardLifetime) {
+ for (auto &II : Info.LifetimeStart)
+ II->eraseFromParent();
+ for (auto &II : Info.LifetimeEnd)
+ II->eraseFromParent();
+ }
}
}
-
+ for (auto &I : UnrecognizedLifetimes)
+ I->eraseFromParent();
return true;
}
@@ -1300,7 +1457,42 @@ bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
!(SSI && SSI->isSafe(AI));
}
-bool HWAddressSanitizer::sanitizeFunction(Function &F) {
+DenseMap<AllocaInst *, AllocaInst *> HWAddressSanitizer::padInterestingAllocas(
+ const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument) {
+ DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
+ for (auto &KV : AllocasToInstrument) {
+ AllocaInst *AI = KV.first;
+ uint64_t Size = getAllocaSizeInBytes(*AI);
+ uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+ AI->setAlignment(
+ Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
+ if (Size != AlignedSize) {
+ Type *AllocatedType = AI->getAllocatedType();
+ if (AI->isArrayAllocation()) {
+ uint64_t ArraySize =
+ cast<ConstantInt>(AI->getArraySize())->getZExtValue();
+ AllocatedType = ArrayType::get(AllocatedType, ArraySize);
+ }
+ Type *TypeWithPadding = StructType::get(
+ AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
+ auto *NewAI = new AllocaInst(
+ TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
+ NewAI->takeName(AI);
+ NewAI->setAlignment(AI->getAlign());
+ NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
+ NewAI->setSwiftError(AI->isSwiftError());
+ NewAI->copyMetadata(*AI);
+ auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
+ AI->replaceAllUsesWith(Bitcast);
+ AllocaToPaddedAllocaMap[AI] = NewAI;
+ }
+ }
+ return AllocaToPaddedAllocaMap;
+}
+
+bool HWAddressSanitizer::sanitizeFunction(
+ Function &F, llvm::function_ref<const DominatorTree &()> GetDT,
+ llvm::function_ref<const PostDominatorTree &()> GetPDT) {
if (&F == HwasanCtorFunction)
return false;
@@ -1311,18 +1503,36 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
- SmallVector<AllocaInst *, 8> AllocasToInstrument;
+ MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument;
SmallVector<Instruction *, 8> RetVec;
SmallVector<Instruction *, 8> LandingPadVec;
+ SmallVector<Instruction *, 4> UnrecognizedLifetimes;
DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
for (auto &BB : F) {
for (auto &Inst : BB) {
- if (InstrumentStack)
+ if (InstrumentStack) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
if (isInterestingAlloca(*AI))
- AllocasToInstrument.push_back(AI);
+ AllocasToInstrument.insert({AI, {}});
+ continue;
+ }
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+ II->getIntrinsicID() == Intrinsic::lifetime_end)) {
+ AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
+ if (!AI) {
+ UnrecognizedLifetimes.push_back(&Inst);
+ continue;
+ }
+ if (!isInterestingAlloca(*AI))
+ continue;
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start)
+ AllocasToInstrument[AI].LifetimeStart.push_back(II);
+ else
+ AllocasToInstrument[AI].LifetimeEnd.push_back(II);
continue;
}
+ }
if (isa<ReturnInst>(Inst) || isa<ResumeInst>(Inst) ||
isa<CleanupReturnInst>(Inst))
@@ -1343,7 +1553,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
getInterestingMemoryOperands(&Inst, OperandsToInstrument);
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
- IntrinToInstrument.push_back(MI);
+ if (!ignoreMemIntrinsic(MI))
+ IntrinToInstrument.push_back(MI);
}
}
@@ -1377,38 +1588,14 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
if (!AllocasToInstrument.empty()) {
Value *StackTag =
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- instrumentStack(AllocasToInstrument, AllocaDbgMap, RetVec, StackTag);
+ instrumentStack(AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
+ RetVec, StackTag, GetDT, GetPDT);
}
// Pad and align each of the allocas that we instrumented to stop small
// uninteresting allocas from hiding in instrumented alloca's padding and so
// that we have enough space to store real tags for short granules.
- DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
- for (AllocaInst *AI : AllocasToInstrument) {
- uint64_t Size = getAllocaSizeInBytes(*AI);
- uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
- AI->setAlignment(
- Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
- if (Size != AlignedSize) {
- Type *AllocatedType = AI->getAllocatedType();
- if (AI->isArrayAllocation()) {
- uint64_t ArraySize =
- cast<ConstantInt>(AI->getArraySize())->getZExtValue();
- AllocatedType = ArrayType::get(AllocatedType, ArraySize);
- }
- Type *TypeWithPadding = StructType::get(
- AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
- auto *NewAI = new AllocaInst(
- TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
- NewAI->takeName(AI);
- NewAI->setAlignment(AI->getAlign());
- NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
- NewAI->setSwiftError(AI->isSwiftError());
- NewAI->copyMetadata(*AI);
- auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
- AI->replaceAllUsesWith(Bitcast);
- AllocaToPaddedAllocaMap[AI] = NewAI;
- }
- }
+ DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap =
+ padInterestingAllocas(AllocasToInstrument);
if (!AllocaToPaddedAllocaMap.empty()) {
for (auto &BB : F) {
@@ -1434,13 +1621,11 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
// dynamic allocas.
if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
InsertPt = &*F.getEntryBlock().begin();
- for (auto II = EntryIRB.GetInsertBlock()->begin(),
- IE = EntryIRB.GetInsertBlock()->end();
- II != IE;) {
- Instruction *I = &*II++;
- if (auto *AI = dyn_cast<AllocaInst>(I))
+ for (Instruction &I :
+ llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
+ if (auto *AI = dyn_cast<AllocaInst>(&I))
if (isa<ConstantInt>(AI->getArraySize()))
- I->moveBefore(InsertPt);
+ I.moveBefore(InsertPt);
}
}
@@ -1586,9 +1771,10 @@ void HWAddressSanitizer::instrumentGlobals() {
Hasher.update(M.getSourceFileName());
MD5::MD5Result Hash;
Hasher.final(Hash);
- uint8_t Tag = Hash[0] & TagMaskByte;
+ uint8_t Tag = Hash[0];
for (GlobalVariable *GV : Globals) {
+ Tag &= TagMaskByte;
// Skip tag 0 in order to avoid collisions with untagged memory.
if (Tag == 0)
Tag = 1;
diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index 071feb876540..3ea314329079 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -1,9 +1,8 @@
//===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 0d257bb6bd52..ad21fec269ec 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -446,13 +446,12 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
bool MadeChange = false;
PromotionCandidates.clear();
for (BasicBlock &BB : *F) {
- for (auto I = BB.begin(), E = BB.end(); I != E;) {
- auto Instr = I++;
- InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+ for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
+ InstrProfIncrementInst *Inc = castToIncrementInst(&Instr);
if (Inc) {
lowerIncrement(Inc);
MadeChange = true;
- } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+ } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
lowerValueProfileInst(Ind);
MadeChange = true;
}
@@ -520,6 +519,14 @@ void InstrProfiling::promoteCounterLoadStores(Function *F) {
}
}
+static bool needsRuntimeHookUnconditionally(const Triple &TT) {
+ // On Fuchsia, we only need runtime hook if any counters are present.
+ if (TT.isOSFuchsia())
+ return false;
+
+ return true;
+}
+
/// Check if the module contains uses of any profiling intrinsics.
static bool containsProfilingIntrinsics(Module &M) {
if (auto *F = M.getFunction(
@@ -548,8 +555,11 @@ bool InstrProfiling::run(
UsedVars.clear();
TT = Triple(M.getTargetTriple());
+ bool MadeChange = false;
+
// Emit the runtime hook even if no counters are present.
- bool MadeChange = emitRuntimeHook();
+ if (needsRuntimeHookUnconditionally(TT))
+ MadeChange = emitRuntimeHook();
// Improve compile time by avoiding linear scans when there is no work.
GlobalVariable *CoverageNamesVar =
@@ -588,6 +598,7 @@ bool InstrProfiling::run(
emitVNodes();
emitNameData();
+ emitRuntimeHook();
emitRegistration();
emitUses();
emitInitialization();
@@ -692,7 +703,6 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
LoadInst *LI = dyn_cast<LoadInst>(&I);
if (!LI) {
IRBuilder<> Builder(&I);
- Type *Int64Ty = Type::getInt64Ty(M->getContext());
GlobalVariable *Bias = M->getGlobalVariable(getInstrProfCounterBiasVarName());
if (!Bias) {
// Compiler must define this variable when runtime counter relocation
@@ -747,14 +757,18 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
}
/// Get the name of a profiling variable for a particular function.
-static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
+static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix,
+ bool &Renamed) {
StringRef NamePrefix = getInstrProfNameVarPrefix();
StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
Function *F = Inc->getParent()->getParent();
Module *M = F->getParent();
if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
- !canRenameComdatFunc(*F))
+ !canRenameComdatFunc(*F)) {
+ Renamed = false;
return (Prefix + Name).str();
+ }
+ Renamed = true;
uint64_t FuncHash = Inc->getHash()->getZExtValue();
SmallVector<char, 24> HashPostfix;
if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
@@ -848,6 +862,15 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
+ // Due to the limitation of binder as of 2021/09/28, the duplicate weak
+ // symbols in the same csect won't be discarded. When there are duplicate weak
+ // symbols, we can NOT guarantee that the relocations get resolved to the
+ // intended weak symbol, so we can not ensure the correctness of the relative
+ // CounterPtr, so we have to use private linkage for counter and data symbols.
+ if (TT.isOSBinFormatXCOFF()) {
+ Linkage = GlobalValue::PrivateLinkage;
+ Visibility = GlobalValue::DefaultVisibility;
+ }
// Move the name variable to the right section. Place them in a COMDAT group
// if the associated function is a COMDAT. This will make sure that only one
// copy of counters of the COMDAT function will be emitted after linking. Keep
@@ -867,8 +890,11 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
// discarded.
bool DataReferencedByCode = profDataReferencedByCode(*M);
bool NeedComdat = needsComdatForCounter(*Fn, *M);
- std::string CntsVarName = getVarName(Inc, getInstrProfCountersVarPrefix());
- std::string DataVarName = getVarName(Inc, getInstrProfDataVarPrefix());
+ bool Renamed;
+ std::string CntsVarName =
+ getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
+ std::string DataVarName =
+ getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
auto MaybeSetComdat = [&](GlobalVariable *GV) {
bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
if (UseComdat) {
@@ -909,7 +935,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
auto *ValuesVar = new GlobalVariable(
*M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
- getVarName(Inc, getInstrProfValuesVarPrefix()));
+ getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
ValuesVar->setVisibility(Visibility);
ValuesVar->setSection(
getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
@@ -920,6 +946,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
}
// Create data variable.
+ auto *IntPtrTy = M->getDataLayout().getIntPtrType(M->getContext());
auto *Int16Ty = Type::getInt16Ty(Ctx);
auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
Type *DataTypes[] = {
@@ -936,10 +963,6 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
- Constant *DataVals[] = {
-#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
-#include "llvm/ProfileData/InstrProfData.inc"
- };
// If the data variable is not referenced by code (if we don't emit
// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
// data variable live under linker GC, the data variable can be private. This
@@ -947,14 +970,30 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
//
// On COFF, a comdat leader cannot be local so we require DataReferencedByCode
// to be false.
- if (NS == 0 && (TT.isOSBinFormatELF() ||
- (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
+ //
+ // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
+ // that other copies must have the same CFG and cannot have value profiling.
+ // If no hash suffix, other profd copies may be referenced by code.
+ if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
+ (TT.isOSBinFormatELF() ||
+ (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
Linkage = GlobalValue::PrivateLinkage;
Visibility = GlobalValue::DefaultVisibility;
}
auto *Data =
- new GlobalVariable(*M, DataTy, false, Linkage,
- ConstantStruct::get(DataTy, DataVals), DataVarName);
+ new GlobalVariable(*M, DataTy, false, Linkage, nullptr, DataVarName);
+ // Reference the counter variable with a label difference (link-time
+ // constant).
+ auto *RelativeCounterPtr =
+ ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
+ ConstantExpr::getPtrToInt(Data, IntPtrTy));
+
+ Constant *DataVals[] = {
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
+#include "llvm/ProfileData/InstrProfData.inc"
+ };
+ Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
+
Data->setVisibility(Visibility);
Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
@@ -1035,7 +1074,7 @@ void InstrProfiling::emitNameData() {
std::string CompressedNameStr;
if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
DoInstrProfNameCompression)) {
- report_fatal_error(toString(std::move(E)), false);
+ report_fatal_error(Twine(toString(std::move(E))), false);
}
auto &Ctx = M->getContext();
@@ -1102,9 +1141,9 @@ void InstrProfiling::emitRegistration() {
}
bool InstrProfiling::emitRuntimeHook() {
- // We expect the linker to be invoked with -u<hook_var> flag for Linux or
- // Fuchsia, in which case there is no need to emit the user function.
- if (TT.isOSLinux() || TT.isOSFuchsia())
+ // We expect the linker to be invoked with -u<hook_var> flag for Linux
+ // in which case there is no need to emit the external variable.
+ if (TT.isOSLinux())
return false;
// If the module's provided its own runtime, we don't need to do anything.
@@ -1117,23 +1156,28 @@ bool InstrProfiling::emitRuntimeHook() {
new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
nullptr, getInstrProfRuntimeHookVarName());
- // Make a function that uses it.
- auto *User = Function::Create(FunctionType::get(Int32Ty, false),
- GlobalValue::LinkOnceODRLinkage,
- getInstrProfRuntimeHookVarUseFuncName(), M);
- User->addFnAttr(Attribute::NoInline);
- if (Options.NoRedZone)
- User->addFnAttr(Attribute::NoRedZone);
- User->setVisibility(GlobalValue::HiddenVisibility);
- if (TT.supportsCOMDAT())
- User->setComdat(M->getOrInsertComdat(User->getName()));
-
- IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
- auto *Load = IRB.CreateLoad(Int32Ty, Var);
- IRB.CreateRet(Load);
-
- // Mark the user variable as used so that it isn't stripped out.
- CompilerUsedVars.push_back(User);
+ if (TT.isOSBinFormatELF()) {
+ // Mark the user variable as used so that it isn't stripped out.
+ CompilerUsedVars.push_back(Var);
+ } else {
+ // Make a function that uses it.
+ auto *User = Function::Create(FunctionType::get(Int32Ty, false),
+ GlobalValue::LinkOnceODRLinkage,
+ getInstrProfRuntimeHookVarUseFuncName(), M);
+ User->addFnAttr(Attribute::NoInline);
+ if (Options.NoRedZone)
+ User->addFnAttr(Attribute::NoRedZone);
+ User->setVisibility(GlobalValue::HiddenVisibility);
+ if (TT.supportsCOMDAT())
+ User->setComdat(M->getOrInsertComdat(User->getName()));
+
+ IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
+ auto *Load = IRB.CreateLoad(Int32Ty, Var);
+ IRB.CreateRet(Load);
+
+ // Mark the function as used so that it isn't stripped out.
+ CompilerUsedVars.push_back(User);
+ }
return true;
}
@@ -1142,12 +1186,12 @@ void InstrProfiling::emitUses() {
// GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
// we conservatively retain all unconditionally in the compiler.
//
- // On ELF, the linker can guarantee the associated sections will be retained
- // or discarded as a unit, so llvm.compiler.used is sufficient. Similarly on
- // COFF, if prof data is not referenced by code we use one comdat and ensure
- // this GC property as well. Otherwise, we have to conservatively make all of
- // the sections retained by the linker.
- if (TT.isOSBinFormatELF() ||
+ // On ELF and Mach-O, the linker can guarantee the associated sections will be
+ // retained or discarded as a unit, so llvm.compiler.used is sufficient.
+ // Similarly on COFF, if prof data is not referenced by code we use one comdat
+ // and ensure this GC property as well. Otherwise, we have to conservatively
+ // make all of the sections retained by the linker.
+ if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
(TT.isOSBinFormatCOFF() && !profDataReferencedByCode(*M)))
appendToCompilerUsed(*M, CompilerUsedVars);
else
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 0e6a404a9e0b..727672fa0605 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -107,6 +108,10 @@ static cl::opt<int>
cl::desc("granularity of memprof shadow mapping"),
cl::Hidden, cl::init(DefaultShadowGranularity));
+static cl::opt<bool> ClStack("memprof-instrument-stack",
+ cl::desc("Instrument scalar stack variables"),
+ cl::Hidden, cl::init(false));
+
// Debug flags.
static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,
@@ -123,6 +128,8 @@ static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),
STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
+STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
+STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
namespace {
@@ -255,8 +262,6 @@ PreservedAnalyses MemProfilerPass::run(Function &F,
if (Profiler.instrumentFunction(F))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
-
- return PreservedAnalyses::all();
}
ModuleMemProfilerPass::ModuleMemProfilerPass() {}
@@ -448,6 +453,15 @@ void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
InterestingMemoryAccess &Access) {
+ // Skip instrumentation of stack accesses unless requested.
+ if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {
+ if (Access.IsWrite)
+ ++NumSkippedStackWrites;
+ else
+ ++NumSkippedStackReads;
+ return;
+ }
+
if (Access.IsWrite)
NumInstrumentedWrites++;
else
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4e755bab15f3..4d15b784f486 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -673,14 +673,27 @@ PreservedAnalyses MemorySanitizerPass::run(Function &F,
return PreservedAnalyses::all();
}
-PreservedAnalyses MemorySanitizerPass::run(Module &M,
- ModuleAnalysisManager &AM) {
+PreservedAnalyses
+ModuleMemorySanitizerPass::run(Module &M, ModuleAnalysisManager &AM) {
if (Options.Kernel)
return PreservedAnalyses::all();
insertModuleCtor(M);
return PreservedAnalyses::none();
}
+void MemorySanitizerPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Options.Recover)
+ OS << "recover;";
+ if (Options.Kernel)
+ OS << "kernel;";
+ OS << "track-origins=" << Options.TrackOrigins;
+ OS << ">";
+}
+
char MemorySanitizerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(MemorySanitizerLegacyPass, "msan",
@@ -1695,7 +1708,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (FArgEagerCheck) {
*ShadowPtr = getCleanShadow(V);
setOrigin(A, getCleanOrigin());
- continue;
+ break;
} else if (FArgByVal) {
Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
// ByVal pointer itself has clean shadow. We copy the actual
@@ -1745,8 +1758,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
- if (!FArgEagerCheck)
- ArgOffset += alignTo(Size, kShadowTLSAlignment);
+ ArgOffset += alignTo(Size, kShadowTLSAlignment);
}
assert(*ShadowPtr && "Could not find shadow for an argument");
return *ShadowPtr;
@@ -2661,7 +2673,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
RetTy->isX86_MMXTy()))
return false;
- unsigned NumArgOperands = I.getNumArgOperands();
+ unsigned NumArgOperands = I.arg_size();
for (unsigned i = 0; i < NumArgOperands; ++i) {
Type *Ty = I.getArgOperand(i)->getType();
if (Ty != RetTy)
@@ -2688,7 +2700,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// We special-case intrinsics where this approach fails. See llvm.bswap
/// handling as an example of that.
bool handleUnknownIntrinsic(IntrinsicInst &I) {
- unsigned NumArgOperands = I.getNumArgOperands();
+ unsigned NumArgOperands = I.arg_size();
if (NumArgOperands == 0)
return false;
@@ -2762,10 +2774,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *CopyOp, *ConvertOp;
assert((!HasRoundingMode ||
- isa<ConstantInt>(I.getArgOperand(I.getNumArgOperands() - 1))) &&
+ isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
"Invalid rounding mode");
- switch (I.getNumArgOperands() - HasRoundingMode) {
+ switch (I.arg_size() - HasRoundingMode) {
case 2:
CopyOp = I.getArgOperand(0);
ConvertOp = I.getArgOperand(1);
@@ -2854,7 +2866,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// size, and the rest is ignored. Behavior is defined even if shift size is
// greater than register (or field) width.
void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
- assert(I.getNumArgOperands() == 2);
+ assert(I.arg_size() == 2);
IRBuilder<> IRB(&I);
// If any of the S2 bits are poisoned, the whole thing is poisoned.
// Otherwise perform the same shift on S1.
@@ -2919,7 +2931,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
// EltSizeInBits is used only for x86mmx arguments.
void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
- assert(I.getNumArgOperands() == 2);
+ assert(I.arg_size() == 2);
bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
IRBuilder<> IRB(&I);
Value *S1 = getShadow(&I, 0);
@@ -3653,9 +3665,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
.addAttribute(Attribute::ArgMemOnly)
.addAttribute(Attribute::Speculatable);
- Call->removeAttributes(AttributeList::FunctionIndex, B);
+ Call->removeFnAttrs(B);
if (Function *Func = Call->getCalledFunction()) {
- Func->removeAttributes(AttributeList::FunctionIndex, B);
+ Func->removeFnAttrs(B);
}
maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
@@ -3696,42 +3708,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (EagerCheck) {
insertShadowCheck(A, &CB);
- continue;
- }
- if (ByVal) {
- // ByVal requires some special handling as it's too big for a single
- // load
- assert(A->getType()->isPointerTy() &&
- "ByVal argument is not a pointer!");
- Size = DL.getTypeAllocSize(CB.getParamByValType(i));
- if (ArgOffset + Size > kParamTLSSize) break;
- const MaybeAlign ParamAlignment(CB.getParamAlign(i));
- MaybeAlign Alignment = llvm::None;
- if (ParamAlignment)
- Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
- Value *AShadowPtr =
- getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
- /*isStore*/ false)
- .first;
-
- Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
- Alignment, Size);
- // TODO(glider): need to copy origins.
- } else {
- // Any other parameters mean we need bit-grained tracking of uninit data
Size = DL.getTypeAllocSize(A->getType());
- if (ArgOffset + Size > kParamTLSSize) break;
- Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
- kShadowTLSAlignment);
- Constant *Cst = dyn_cast<Constant>(ArgShadow);
- if (Cst && Cst->isNullValue()) ArgIsInitialized = true;
+ } else {
+ if (ByVal) {
+ // ByVal requires some special handling as it's too big for a single
+ // load
+ assert(A->getType()->isPointerTy() &&
+ "ByVal argument is not a pointer!");
+ Size = DL.getTypeAllocSize(CB.getParamByValType(i));
+ if (ArgOffset + Size > kParamTLSSize)
+ break;
+ const MaybeAlign ParamAlignment(CB.getParamAlign(i));
+ MaybeAlign Alignment = llvm::None;
+ if (ParamAlignment)
+ Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
+ Value *AShadowPtr =
+ getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
+ /*isStore*/ false)
+ .first;
+
+ Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
+ Alignment, Size);
+ // TODO(glider): need to copy origins.
+ } else {
+ // Any other parameters mean we need bit-grained tracking of uninit
+ // data
+ Size = DL.getTypeAllocSize(A->getType());
+ if (ArgOffset + Size > kParamTLSSize)
+ break;
+ Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
+ kShadowTLSAlignment);
+ Constant *Cst = dyn_cast<Constant>(ArgShadow);
+ if (Cst && Cst->isNullValue())
+ ArgIsInitialized = true;
+ }
+ if (MS.TrackOrigins && !ArgIsInitialized)
+ IRB.CreateStore(getOrigin(A),
+ getOriginPtrForArgument(A, IRB, ArgOffset));
+ (void)Store;
+ assert(Store != nullptr);
+ LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
}
- if (MS.TrackOrigins && !ArgIsInitialized)
- IRB.CreateStore(getOrigin(A),
- getOriginPtrForArgument(A, IRB, ArgOffset));
- (void)Store;
- assert(Size != 0 && Store != nullptr);
- LLVM_DEBUG(dbgs() << " Param:" << *Store << "\n");
+ assert(Size != 0);
ArgOffset += alignTo(Size, kShadowTLSAlignment);
}
LLVM_DEBUG(dbgs() << " done with call args\n");
@@ -3807,7 +3825,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (isAMustTailRetVal(RetVal)) return;
Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
bool HasNoUndef =
- F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+ F.hasRetAttribute(Attribute::NoUndef);
bool StoreShadow = !(ClEagerChecks && HasNoUndef);
// FIXME: Consider using SpecialCaseList to specify a list of functions that
// must always return fully initialized values. For now, we hardcode "main".
@@ -4176,7 +4194,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
MemorySanitizerVisitor &MSV)
: F(F), MS(MS), MSV(MSV) {
AMD64FpEndOffset = AMD64FpEndOffsetSSE;
- for (const auto &Attr : F.getAttributes().getFnAttributes()) {
+ for (const auto &Attr : F.getAttributes().getFnAttrs()) {
if (Attr.isStringAttribute() &&
(Attr.getKindAsString() == "target-features")) {
if (Attr.getValueAsString().contains("-sse"))
@@ -5330,6 +5348,9 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
if (!CompileKernel && F.getName() == kMsanModuleCtorName)
return false;
+ if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
+ return false;
+
MemorySanitizerVisitor Visitor(F, *this, TLI);
// Clear out readonly/readnone attributes.
@@ -5339,7 +5360,7 @@ bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
.addAttribute(Attribute::WriteOnly)
.addAttribute(Attribute::ArgMemOnly)
.addAttribute(Attribute::Speculatable);
- F.removeAttributes(AttributeList::FunctionIndex, B);
+ F.removeFnAttrs(B);
return Visitor.runOnFunction();
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3d9261eb99ba..af5946325bbb 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -110,6 +110,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -198,12 +199,14 @@ static cl::opt<bool>
"warnings about missing profile data for "
"functions."));
+namespace llvm {
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data.
-static cl::opt<bool>
+cl::opt<bool>
NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
cl::desc("Use this option to turn off/on "
"warnings about profile cfg mismatch."));
+} // namespace llvm
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data for Comdat functions, which often turns out to be false
@@ -462,7 +465,10 @@ public:
private:
bool runOnModule(Module &M) override {
createProfileFileNameVar(M, InstrProfileOutput);
- createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
+ // The variable in a comdat may be discarded by LTO. Ensure the
+ // declaration will be retained.
+ appendToCompilerUsed(
+ M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, PGOInstrumentEntry));
return false;
}
std::string InstrProfileOutput;
@@ -1610,7 +1616,7 @@ static bool InstrumentAllFunctions(
// For the context-sensitve instrumentation, we should have a separated pass
// (before LTO/ThinLTO linking) to create these variables.
if (!IsCS)
- createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry);
+ createIRLevelProfileFlagVar(M, /*IsCS=*/false, PGOInstrumentEntry);
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
collectComdatMembers(M, ComdatMembers);
@@ -1630,7 +1636,10 @@ static bool InstrumentAllFunctions(
PreservedAnalyses
PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
createProfileFileNameVar(M, CSInstrName);
- createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
+ // The variable in a comdat may be discarded by LTO. Ensure the declaration
+ // will be retained.
+ appendToCompilerUsed(
+ M, createIRLevelProfileFlagVar(M, /*IsCS=*/true, PGOInstrumentEntry));
return PreservedAnalyses::all();
}
@@ -1677,7 +1686,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
BlockFrequencyInfo NBFI(F, NBPI, LI);
#ifndef NDEBUG
auto BFIEntryCount = F.getEntryCount();
- assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) &&
+ assert(BFIEntryCount.hasValue() && (BFIEntryCount->getCount() > 0) &&
"Invalid BFI Entrycount");
#endif
auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 7607464cc0b9..da8ee1f15bf8 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -55,6 +55,16 @@ const char SanCovTraceConstCmp1[] = "__sanitizer_cov_trace_const_cmp1";
const char SanCovTraceConstCmp2[] = "__sanitizer_cov_trace_const_cmp2";
const char SanCovTraceConstCmp4[] = "__sanitizer_cov_trace_const_cmp4";
const char SanCovTraceConstCmp8[] = "__sanitizer_cov_trace_const_cmp8";
+const char SanCovLoad1[] = "__sanitizer_cov_load1";
+const char SanCovLoad2[] = "__sanitizer_cov_load2";
+const char SanCovLoad4[] = "__sanitizer_cov_load4";
+const char SanCovLoad8[] = "__sanitizer_cov_load8";
+const char SanCovLoad16[] = "__sanitizer_cov_load16";
+const char SanCovStore1[] = "__sanitizer_cov_store1";
+const char SanCovStore2[] = "__sanitizer_cov_store2";
+const char SanCovStore4[] = "__sanitizer_cov_store4";
+const char SanCovStore8[] = "__sanitizer_cov_store8";
+const char SanCovStore16[] = "__sanitizer_cov_store16";
const char SanCovTraceDiv4[] = "__sanitizer_cov_trace_div4";
const char SanCovTraceDiv8[] = "__sanitizer_cov_trace_div8";
const char SanCovTraceGep[] = "__sanitizer_cov_trace_gep";
@@ -122,6 +132,14 @@ static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
cl::desc("Tracing of DIV instructions"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClLoadTracing("sanitizer-coverage-trace-loads",
+ cl::desc("Tracing of load instructions"),
+ cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClStoreTracing("sanitizer-coverage-trace-stores",
+ cl::desc("Tracing of store instructions"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
cl::desc("Tracing of GEP instructions"),
cl::Hidden, cl::init(false));
@@ -175,9 +193,11 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
Options.PCTable |= ClCreatePCTable;
Options.NoPrune |= !ClPruneBlocks;
Options.StackDepth |= ClStackDepth;
+ Options.TraceLoads |= ClLoadTracing;
+ Options.TraceStores |= ClStoreTracing;
if (!Options.TracePCGuard && !Options.TracePC &&
!Options.Inline8bitCounters && !Options.StackDepth &&
- !Options.InlineBoolFlag)
+ !Options.InlineBoolFlag && !Options.TraceLoads && !Options.TraceStores)
Options.TracePCGuard = true; // TracePCGuard is default.
return Options;
}
@@ -207,6 +227,8 @@ private:
ArrayRef<BinaryOperator *> DivTraceTargets);
void InjectTraceForGep(Function &F,
ArrayRef<GetElementPtrInst *> GepTraceTargets);
+ void InjectTraceForLoadsAndStores(Function &F, ArrayRef<LoadInst *> Loads,
+ ArrayRef<StoreInst *> Stores);
void InjectTraceForSwitch(Function &F,
ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks,
@@ -234,14 +256,17 @@ private:
std::string getSectionEnd(const std::string &Section) const;
FunctionCallee SanCovTracePCIndir;
FunctionCallee SanCovTracePC, SanCovTracePCGuard;
- FunctionCallee SanCovTraceCmpFunction[4];
- FunctionCallee SanCovTraceConstCmpFunction[4];
- FunctionCallee SanCovTraceDivFunction[2];
+ std::array<FunctionCallee, 4> SanCovTraceCmpFunction;
+ std::array<FunctionCallee, 4> SanCovTraceConstCmpFunction;
+ std::array<FunctionCallee, 5> SanCovLoadFunction;
+ std::array<FunctionCallee, 5> SanCovStoreFunction;
+ std::array<FunctionCallee, 2> SanCovTraceDivFunction;
FunctionCallee SanCovTraceGepFunction;
FunctionCallee SanCovTraceSwitchFunction;
GlobalVariable *SanCovLowestStack;
- Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
- *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty, *Int1PtrTy;
+ Type *Int128PtrTy, *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty,
+ *Int32PtrTy, *Int16PtrTy, *Int16Ty, *Int8Ty, *Int8PtrTy, *Int1Ty,
+ *Int1PtrTy;
Module *CurModule;
std::string CurModuleUniqueId;
Triple TargetTriple;
@@ -411,7 +436,9 @@ bool ModuleSanitizerCoverage::instrumentModule(
IntptrPtrTy = PointerType::getUnqual(IntptrTy);
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
+ Int128PtrTy = PointerType::getUnqual(IRB.getInt128Ty());
Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
+ Int16PtrTy = PointerType::getUnqual(IRB.getInt16Ty());
Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Int1PtrTy = PointerType::getUnqual(IRB.getInt1Ty());
@@ -452,6 +479,28 @@ bool ModuleSanitizerCoverage::instrumentModule(
SanCovTraceConstCmpFunction[3] =
M.getOrInsertFunction(SanCovTraceConstCmp8, VoidTy, Int64Ty, Int64Ty);
+ // Loads.
+ SanCovLoadFunction[0] = M.getOrInsertFunction(SanCovLoad1, VoidTy, Int8PtrTy);
+ SanCovLoadFunction[1] =
+ M.getOrInsertFunction(SanCovLoad2, VoidTy, Int16PtrTy);
+ SanCovLoadFunction[2] =
+ M.getOrInsertFunction(SanCovLoad4, VoidTy, Int32PtrTy);
+ SanCovLoadFunction[3] =
+ M.getOrInsertFunction(SanCovLoad8, VoidTy, Int64PtrTy);
+ SanCovLoadFunction[4] =
+ M.getOrInsertFunction(SanCovLoad16, VoidTy, Int128PtrTy);
+ // Stores.
+ SanCovStoreFunction[0] =
+ M.getOrInsertFunction(SanCovStore1, VoidTy, Int8PtrTy);
+ SanCovStoreFunction[1] =
+ M.getOrInsertFunction(SanCovStore2, VoidTy, Int16PtrTy);
+ SanCovStoreFunction[2] =
+ M.getOrInsertFunction(SanCovStore4, VoidTy, Int32PtrTy);
+ SanCovStoreFunction[3] =
+ M.getOrInsertFunction(SanCovStore8, VoidTy, Int64PtrTy);
+ SanCovStoreFunction[4] =
+ M.getOrInsertFunction(SanCovStore16, VoidTy, Int128PtrTy);
+
{
AttributeList AL;
AL = AL.addParamAttribute(*C, 0, Attribute::ZExt);
@@ -632,6 +681,8 @@ void ModuleSanitizerCoverage::instrumentFunction(
SmallVector<Instruction *, 8> SwitchTraceTargets;
SmallVector<BinaryOperator *, 8> DivTraceTargets;
SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
+ SmallVector<LoadInst *, 8> Loads;
+ SmallVector<StoreInst *, 8> Stores;
const DominatorTree *DT = DTCallback(F);
const PostDominatorTree *PDT = PDTCallback(F);
@@ -661,6 +712,12 @@ void ModuleSanitizerCoverage::instrumentFunction(
if (Options.TraceGep)
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst))
GepTraceTargets.push_back(GEP);
+ if (Options.TraceLoads)
+ if (LoadInst *LI = dyn_cast<LoadInst>(&Inst))
+ Loads.push_back(LI);
+ if (Options.TraceStores)
+ if (StoreInst *SI = dyn_cast<StoreInst>(&Inst))
+ Stores.push_back(SI);
if (Options.StackDepth)
if (isa<InvokeInst>(Inst) ||
(isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))
@@ -674,6 +731,7 @@ void ModuleSanitizerCoverage::instrumentFunction(
InjectTraceForSwitch(F, SwitchTraceTargets);
InjectTraceForDiv(F, DivTraceTargets);
InjectTraceForGep(F, GepTraceTargets);
+ InjectTraceForLoadsAndStores(F, Loads, Stores);
}
GlobalVariable *ModuleSanitizerCoverage::CreateFunctionLocalArrayInSection(
@@ -857,6 +915,40 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
}
}
+void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
+ Function &, ArrayRef<LoadInst *> Loads, ArrayRef<StoreInst *> Stores) {
+ auto CallbackIdx = [&](const Value *Ptr) -> int {
+ auto ElementTy = cast<PointerType>(Ptr->getType())->getElementType();
+ uint64_t TypeSize = DL->getTypeStoreSizeInBits(ElementTy);
+ return TypeSize == 8 ? 0
+ : TypeSize == 16 ? 1
+ : TypeSize == 32 ? 2
+ : TypeSize == 64 ? 3
+ : TypeSize == 128 ? 4
+ : -1;
+ };
+ Type *PointerType[5] = {Int8PtrTy, Int16PtrTy, Int32PtrTy, Int64PtrTy,
+ Int128PtrTy};
+ for (auto LI : Loads) {
+ IRBuilder<> IRB(LI);
+ auto Ptr = LI->getPointerOperand();
+ int Idx = CallbackIdx(Ptr);
+ if (Idx < 0)
+ continue;
+ IRB.CreateCall(SanCovLoadFunction[Idx],
+ IRB.CreatePointerCast(Ptr, PointerType[Idx]));
+ }
+ for (auto SI : Stores) {
+ IRBuilder<> IRB(SI);
+ auto Ptr = SI->getPointerOperand();
+ int Idx = CallbackIdx(Ptr);
+ if (Idx < 0)
+ continue;
+ IRB.CreateCall(SanCovStoreFunction[Idx],
+ IRB.CreatePointerCast(Ptr, PointerType[Idx]));
+ }
+}
+
void ModuleSanitizerCoverage::InjectTraceForCmp(
Function &, ArrayRef<Instruction *> CmpTraceTargets) {
for (auto I : CmpTraceTargets) {
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 063999a68236..f98e39d751f4 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -206,8 +206,8 @@ PreservedAnalyses ThreadSanitizerPass::run(Function &F,
return PreservedAnalyses::all();
}
-PreservedAnalyses ThreadSanitizerPass::run(Module &M,
- ModuleAnalysisManager &MAM) {
+PreservedAnalyses ModuleThreadSanitizerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
insertModuleCtor(M);
return PreservedAnalyses::none();
}
@@ -249,8 +249,7 @@ void ThreadSanitizer::initialize(Module &M) {
IRBuilder<> IRB(M.getContext());
AttributeList Attr;
- Attr = Attr.addAttribute(M.getContext(), AttributeList::FunctionIndex,
- Attribute::NoUnwind);
+ Attr = Attr.addFnAttribute(M.getContext(), Attribute::NoUnwind);
// Initialize the callbacks.
TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", Attr,
IRB.getVoidTy(), IRB.getInt8PtrTy());
@@ -563,6 +562,12 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
// all.
if (F.hasFnAttribute(Attribute::Naked))
return false;
+
+ // __attribute__(disable_sanitizer_instrumentation) prevents all kinds of
+ // instrumentation.
+ if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
+ return false;
+
initialize(*F.getParent());
SmallVector<InstructionInfo, 8> AllLoadsAndStores;
SmallVector<Instruction*, 8> LocalLoadsAndStores;
@@ -580,7 +585,8 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
AtomicAccesses.push_back(&Inst);
else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
LocalLoadsAndStores.push_back(&Inst);
- else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ else if ((isa<CallInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) ||
+ isa<InvokeInst>(Inst)) {
if (CallInst *CI = dyn_cast<CallInst>(&Inst))
maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
if (isa<MemIntrinsic>(Inst))
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
index 06b12149f597..1ca6ddabac5b 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -103,9 +103,8 @@ CallInst *BundledRetainClaimRVs::insertRVCallWithColors(
Instruction *InsertPt, CallBase *AnnotatedCall,
const DenseMap<BasicBlock *, ColorVector> &BlockColors) {
IRBuilder<> Builder(InsertPt);
- bool IsRetainRV = objcarc::hasAttachedCallOpBundle(AnnotatedCall, true);
- Function *Func = EP.get(IsRetainRV ? ARCRuntimeEntryPointKind::RetainRV
- : ARCRuntimeEntryPointKind::ClaimRV);
+ Function *Func = *objcarc::getAttachedARCFunction(AnnotatedCall);
+ assert(Func && "operand isn't a Function");
Type *ParamTy = Func->getArg(0)->getType();
Value *CallArg = Builder.CreateBitCast(AnnotatedCall, ParamTy);
auto *Call =
@@ -115,16 +114,28 @@ CallInst *BundledRetainClaimRVs::insertRVCallWithColors(
}
BundledRetainClaimRVs::~BundledRetainClaimRVs() {
- if (ContractPass) {
- // At this point, we know that the annotated calls can't be tail calls as
- // they are followed by marker instructions and retainRV/claimRV calls. Mark
- // them as notail, so that the backend knows these calls can't be tail
- // calls.
- for (auto P : RVCalls)
- if (auto *CI = dyn_cast<CallInst>(P.second))
+ for (auto P : RVCalls) {
+ if (ContractPass) {
+ CallBase *CB = P.second;
+ // At this point, we know that the annotated calls can't be tail calls
+ // as they are followed by marker instructions and retainRV/claimRV
+ // calls. Mark them as notail so that the backend knows these calls
+ // can't be tail calls.
+ if (auto *CI = dyn_cast<CallInst>(CB))
CI->setTailCallKind(CallInst::TCK_NoTail);
- } else {
- for (auto P : RVCalls)
+
+ if (UseMarker) {
+ // Remove the retainRV/claimRV function operand from the operand bundle
+ // to reflect the fact that the backend is responsible for emitting only
+ // the marker instruction, but not the retainRV/claimRV call.
+ OperandBundleDef OB("clang.arc.attachedcall", None);
+ auto *NewCB = CallBase::Create(CB, OB, CB);
+ CB->replaceAllUsesWith(NewCB);
+ CB->eraseFromParent();
+ }
+ }
+
+ if (!ContractPass || !UseMarker)
EraseInstruction(P.first);
}
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index 1f9d76969bfd..2b47bec7ffe8 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -105,8 +105,8 @@ CallInst *createCallInstWithColors(
class BundledRetainClaimRVs {
public:
- BundledRetainClaimRVs(ARCRuntimeEntryPoints &P, bool ContractPass)
- : EP(P), ContractPass(ContractPass) {}
+ BundledRetainClaimRVs(bool ContractPass, bool UseMarker)
+ : ContractPass(ContractPass), UseMarker(UseMarker) {}
~BundledRetainClaimRVs();
/// Insert a retainRV/claimRV call to the normal destination blocks of invokes
@@ -155,8 +155,10 @@ private:
/// A map of inserted retainRV/claimRV calls to annotated calls/invokes.
DenseMap<CallInst *, CallBase *> RVCalls;
- ARCRuntimeEntryPoints &EP;
bool ContractPass;
+
+ /// Indicates whether the target uses a special inline-asm marker.
+ bool UseMarker;
};
} // end namespace objcarc
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index 6a928f2c7ffb..210ec60f2f87 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -64,30 +64,29 @@ bool OptimizeBB(BasicBlock *BB) {
bool Changed = false;
Instruction *Push = nullptr;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
- Instruction *Inst = &*I++;
- switch (GetBasicARCInstKind(Inst)) {
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ switch (GetBasicARCInstKind(&Inst)) {
case ARCInstKind::AutoreleasepoolPush:
- Push = Inst;
+ Push = &Inst;
break;
case ARCInstKind::AutoreleasepoolPop:
// If this pop matches a push and nothing in between can autorelease,
// zap the pair.
- if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+ if (Push && cast<CallInst>(&Inst)->getArgOperand(0) == Push) {
Changed = true;
LLVM_DEBUG(dbgs() << "ObjCARCAPElim::OptimizeBB: Zapping push pop "
"autorelease pair:\n"
" Pop: "
- << *Inst << "\n"
+ << Inst << "\n"
<< " Push: " << *Push
<< "\n");
- Inst->eraseFromParent();
+ Inst.eraseFromParent();
Push->eraseFromParent();
}
Push = nullptr;
break;
case ARCInstKind::CallOrUser:
- if (MayAutorelease(cast<CallBase>(*Inst)))
+ if (MayAutorelease(cast<CallBase>(Inst)))
Push = nullptr;
break;
default:
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 62161b5b6b40..c2ed94e8e1f6 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -226,13 +226,6 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
// of Inst.
ARCInstKind Class = GetBasicARCInstKind(Inst);
- // If Inst is an unrelated retain, we don't care about it.
- //
- // TODO: This is one area where the optimization could be made more
- // aggressive.
- if (IsRetain(Class))
- continue;
-
// If we have seen the store, but not the release...
if (Store) {
// We need to make sure that it is safe to move the release from its
@@ -248,8 +241,18 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
return nullptr;
}
- // Ok, now we know we have not seen a store yet. See if Inst can write to
- // our load location, if it can not, just ignore the instruction.
+ // Ok, now we know we have not seen a store yet.
+
+ // If Inst is a retain, we don't care about it as it doesn't prevent moving
+ // the load to the store.
+ //
+ // TODO: This is one area where the optimization could be made more
+ // aggressive.
+ if (IsRetain(Class))
+ continue;
+
+ // See if Inst can write to our load location, if it can not, just ignore
+ // the instruction.
if (!isModSet(AA->getModRefInfo(Inst, Loc)))
continue;
@@ -431,13 +434,21 @@ bool ObjCARCContract::tryToPeepholeInstruction(
LLVM_FALLTHROUGH;
case ARCInstKind::RetainRV:
case ARCInstKind::ClaimRV: {
- // If we're compiling for a target which needs a special inline-asm
- // marker to do the return value optimization and the retainRV/claimRV call
- // wasn't bundled with a call, insert the marker now.
+ bool IsInstContainedInBundle = BundledInsts->contains(Inst);
+
+ // Return now if the target doesn't need a special inline-asm marker. Return
+ // true if this is a bundled retainRV/claimRV call, which is going to be
+ // erased at the end of this pass, to avoid undoing objc-arc-expand and
+ // replacing uses of the retainRV/claimRV call's argument with its result.
if (!RVInstMarker)
- return false;
+ return IsInstContainedInBundle;
+
+ // The target needs a special inline-asm marker.
- if (BundledInsts->contains(Inst))
+ // We don't have to emit the marker if this is a bundled call since the
+ // backend is responsible for emitting it. Return false to undo
+ // objc-arc-expand.
+ if (IsInstContainedInBundle)
return false;
BasicBlock::iterator BBI = Inst->getIterator();
@@ -537,7 +548,7 @@ bool ObjCARCContract::run(Function &F, AAResults *A, DominatorTree *D) {
AA = A;
DT = D;
PA.setAA(A);
- BundledRetainClaimRVs BRV(EP, true);
+ BundledRetainClaimRVs BRV(true, RVInstMarker);
BundledInsts = &BRV;
std::pair<bool, bool> R = BundledInsts->insertAfterInvokes(F, DT);
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index d2121dcebe91..6b074ac5adab 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -56,12 +56,10 @@ static bool runImpl(Function &F) {
LLVM_DEBUG(dbgs() << "ObjCARCExpand: Visiting Function: " << F.getName()
<< "\n");
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
- Instruction *Inst = &*I;
+ for (Instruction &Inst : instructions(&F)) {
+ LLVM_DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << Inst << "\n");
- LLVM_DEBUG(dbgs() << "ObjCARCExpand: Visiting: " << *Inst << "\n");
-
- switch (GetBasicARCInstKind(Inst)) {
+ switch (GetBasicARCInstKind(&Inst)) {
case ARCInstKind::Retain:
case ARCInstKind::RetainRV:
case ARCInstKind::Autorelease:
@@ -73,12 +71,12 @@ static bool runImpl(Function &F) {
// harder. Undo any uses of this optimization that the front-end
// emitted here. We'll redo them in the contract pass.
Changed = true;
- Value *Value = cast<CallInst>(Inst)->getArgOperand(0);
- LLVM_DEBUG(dbgs() << "ObjCARCExpand: Old = " << *Inst
+ Value *Value = cast<CallInst>(&Inst)->getArgOperand(0);
+ LLVM_DEBUG(dbgs() << "ObjCARCExpand: Old = " << Inst
<< "\n"
" New = "
<< *Value << "\n");
- Inst->replaceAllUsesWith(Value);
+ Inst.replaceAllUsesWith(Value);
break;
}
default:
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index ada6aa8d9b6d..0fa4904456cd 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -2229,13 +2229,12 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
// Then, for each destroyWeak with an alloca operand, check to see if
// the alloca and all its users can be zapped.
- for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
- Instruction *Inst = &*I++;
- ARCInstKind Class = GetBasicARCInstKind(Inst);
+ for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
+ ARCInstKind Class = GetBasicARCInstKind(&Inst);
if (Class != ARCInstKind::DestroyWeak)
continue;
- CallInst *Call = cast<CallInst>(Inst);
+ CallInst *Call = cast<CallInst>(&Inst);
Value *Arg = Call->getArgOperand(0);
if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
for (User *U : Alloca->users()) {
@@ -2250,8 +2249,8 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
}
}
Changed = true;
- for (auto UI = Alloca->user_begin(), UE = Alloca->user_end(); UI != UE;) {
- CallInst *UserInst = cast<CallInst>(*UI++);
+ for (User *U : llvm::make_early_inc_range(Alloca->users())) {
+ CallInst *UserInst = cast<CallInst>(U);
switch (GetBasicARCInstKind(UserInst)) {
case ARCInstKind::InitWeak:
case ARCInstKind::StoreWeak:
@@ -2462,7 +2461,7 @@ bool ObjCARCOpt::run(Function &F, AAResults &AA) {
return false;
Changed = CFGChanged = false;
- BundledRetainClaimRVs BRV(EP, false);
+ BundledRetainClaimRVs BRV(false, objcarc::getRVInstMarker(*F.getParent()));
BundledInsts = &BRV;
LLVM_DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName()
diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index a63e356ce1fc..6d0a67c91cfa 100644
--- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -56,7 +56,8 @@ class ProvenanceAnalysis {
CachedResultsTy CachedResults;
- DenseMap<const Value *, WeakTrackingVH> UnderlyingObjCPtrCache;
+ DenseMap<const Value *, std::pair<WeakVH, WeakTrackingVH>>
+ UnderlyingObjCPtrCache;
bool relatedCheck(const Value *A, const Value *B);
bool relatedSelect(const SelectInst *A, const Value *B);
diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 6fdfe787d438..fe637ee066a4 100644
--- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -58,11 +58,11 @@ bool PAEval::runOnFunction(Function &F) {
for (auto &Arg : F.args())
insertIfNamed(Values, &Arg);
- for (auto I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- insertIfNamed(Values, &*I);
+ for (Instruction &I : instructions(F)) {
+ insertIfNamed(Values, &I);
- for (auto &Op : I->operands())
- insertIfNamed(Values, Op);
+ for (auto &Op : I.operands())
+ insertIfNamed(Values, Op);
}
ProvenanceAnalysis PA;
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 6f3fdb88eda5..b693acceb3f6 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -538,7 +538,7 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
// that have no side effects and do not influence the control flow or return
// value of the function, and may therefore be deleted safely.
// NOTE: We reuse the Worklist vector here for memory efficiency.
- for (Instruction &I : instructions(F)) {
+ for (Instruction &I : llvm::reverse(instructions(F))) {
// Check if the instruction is alive.
if (isLive(&I))
continue;
@@ -554,9 +554,11 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
// Prepare to delete.
Worklist.push_back(&I);
salvageDebugInfo(I);
- I.dropAllReferences();
}
+ for (Instruction *&I : Worklist)
+ I->dropAllReferences();
+
for (Instruction *&I : Worklist) {
++NumRemoved;
I->eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index be21db9087d2..e4ec5f266eb8 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -221,6 +221,10 @@ bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
AAPtr = AAPtr->stripPointerCastsSameRepresentation();
AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
+ if (!isa<SCEVConstant>(AlignSCEV))
+ // Added to suppress a crash because consumer doesn't expect non-constant
+ // alignments in the assume bundle. TODO: Consider generalizing caller.
+ return false;
if (AlignOB.Inputs.size() == 3)
OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
else
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index c06125788f37..6c2467db79f7 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -53,7 +53,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// in the def-use chain needs to be changed.
auto *J = dyn_cast<Instruction>(JU);
if (J && J->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(J).isAllOnesValue()) {
+ !DB.getDemandedBits(J).isAllOnes()) {
Visited.insert(J);
WorkList.push_back(J);
}
@@ -84,7 +84,7 @@ static void clearAssumptionsOfUsers(Instruction *I, DemandedBits &DB) {
// that in the def-use chain needs to be changed.
auto *K = dyn_cast<Instruction>(KU);
if (K && Visited.insert(K).second && K->getType()->isIntOrIntVectorTy() &&
- !DB.getDemandedBits(K).isAllOnesValue())
+ !DB.getDemandedBits(K).isAllOnes())
WorkList.push_back(K);
}
}
@@ -103,12 +103,9 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
// Remove instructions that are dead, either because they were not reached
// during analysis or have no demanded bits.
if (DB.isInstructionDead(&I) ||
- (I.getType()->isIntOrIntVectorTy() &&
- DB.getDemandedBits(&I).isNullValue() &&
+ (I.getType()->isIntOrIntVectorTy() && DB.getDemandedBits(&I).isZero() &&
wouldInstructionBeTriviallyDead(&I))) {
- salvageDebugInfo(I);
Worklist.push_back(&I);
- I.dropAllReferences();
Changed = true;
continue;
}
@@ -155,6 +152,11 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
}
}
+ for (Instruction *&I : llvm::reverse(Worklist)) {
+ salvageDebugInfo(*I);
+ I->dropAllReferences();
+ }
+
for (Instruction *&I : Worklist) {
++NumRemoved;
I->eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 2eb94b721d96..95de59fa8262 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -467,7 +467,7 @@ static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallBase &CB,
BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
- for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
+ for (auto *Pred : llvm::reverse(Preds)) {
ConditionsTy Conditions;
// Record condition on edge BB(CS) <- Pred
recordCondition(CB, Pred, CB.getParent(), Conditions);
@@ -505,8 +505,7 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
bool Changed = false;
- for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
- BasicBlock &BB = *BI++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
auto II = BB.getFirstNonPHIOrDbg()->getIterator();
auto IE = BB.getTerminator()->getIterator();
// Iterate until we reach the terminator instruction. tryToSplitCallSite
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 535f50d4f904..27f54f8026e1 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -762,7 +762,7 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
cast<PointerType>(Ty)->getAddressSpace());
Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
- Mat = GetElementPtrInst::Create(Int8PtrTy->getElementType(), Base,
+ Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base,
Offset, "mat_gep", InsertionPt);
Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
} else
@@ -819,10 +819,9 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
// Aside from constant GEPs, only constant cast expressions are collected.
assert(ConstExpr->isCast() && "ConstExpr should be a cast");
- Instruction *ConstExprInst = ConstExpr->getAsInstruction();
+ Instruction *ConstExprInst = ConstExpr->getAsInstruction(
+ findMatInsertPt(ConstUser.Inst, ConstUser.OpndIdx));
ConstExprInst->setOperand(0, Mat);
- ConstExprInst->insertBefore(findMatInsertPt(ConstUser.Inst,
- ConstUser.OpndIdx));
// Use the same debug location as the instruction we are about to update.
ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index efd1c025d0cd..7f2d5d7d9987 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -268,6 +269,31 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
continue;
WorkList.emplace_back(DT.getNode(&BB));
+ // True as long as long as the current instruction is guaranteed to execute.
+ bool GuaranteedToExecute = true;
+ // Scan BB for assume calls.
+ // TODO: also use this scan to queue conditions to simplify, so we can
+ // interleave facts from assumes and conditions to simplify in a single
+ // basic block. And to skip another traversal of each basic block when
+ // simplifying.
+ for (Instruction &I : BB) {
+ Value *Cond;
+ // For now, just handle assumes with a single compare as condition.
+ if (match(&I, m_Intrinsic<Intrinsic::assume>(m_Value(Cond))) &&
+ isa<CmpInst>(Cond)) {
+ if (GuaranteedToExecute) {
+ // The assume is guaranteed to execute when BB is entered, hence Cond
+ // holds on entry to BB.
+ WorkList.emplace_back(DT.getNode(&BB), cast<CmpInst>(Cond), false);
+ } else {
+ // Otherwise the condition only holds in the successors.
+ for (BasicBlock *Succ : successors(&BB))
+ WorkList.emplace_back(DT.getNode(Succ), cast<CmpInst>(Cond), false);
+ }
+ }
+ GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I);
+ }
+
auto *Br = dyn_cast<BranchInst>(BB.getTerminator());
if (!Br || !Br->isConditional())
continue;
@@ -395,8 +421,13 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
for (auto &E : reverse(DFSInStack))
dbgs() << " C " << *E.Condition << " " << E.IsNot << "\n";
});
- Cmp->replaceAllUsesWith(
- ConstantInt::getTrue(F.getParent()->getContext()));
+ Cmp->replaceUsesWithIf(
+ ConstantInt::getTrue(F.getParent()->getContext()), [](Use &U) {
+ // Conditions in an assume trivially simplify to true. Skip uses
+ // in assume calls to not destroy the available information.
+ auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+ return !II || II->getIntrinsicID() != Intrinsic::assume;
+ });
NumCondsRemoved++;
Changed = true;
}
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 36cbd42a5fdd..ca9567dc7ac8 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -67,6 +67,7 @@ STATISTIC(NumUDivURemsNarrowed,
STATISTIC(NumAShrs, "Number of ashr converted to lshr");
STATISTIC(NumSRems, "Number of srem converted to urem");
STATISTIC(NumSExt, "Number of sext converted to zext");
+STATISTIC(NumSICmps, "Number of signed icmp preds simplified to unsigned");
STATISTIC(NumAnd, "Number of ands removed");
STATISTIC(NumNW, "Number of no-wrap deductions");
STATISTIC(NumNSW, "Number of no-signed-wrap deductions");
@@ -295,11 +296,34 @@ static bool processMemAccess(Instruction *I, LazyValueInfo *LVI) {
return true;
}
+static bool processICmp(ICmpInst *Cmp, LazyValueInfo *LVI) {
+ // Only for signed relational comparisons of scalar integers.
+ if (Cmp->getType()->isVectorTy() ||
+ !Cmp->getOperand(0)->getType()->isIntegerTy())
+ return false;
+
+ if (!Cmp->isSigned())
+ return false;
+
+ ICmpInst::Predicate UnsignedPred =
+ ConstantRange::getEquivalentPredWithFlippedSignedness(
+ Cmp->getPredicate(), LVI->getConstantRange(Cmp->getOperand(0), Cmp),
+ LVI->getConstantRange(Cmp->getOperand(1), Cmp));
+
+ if (UnsignedPred == ICmpInst::Predicate::BAD_ICMP_PREDICATE)
+ return false;
+
+ ++NumSICmps;
+ Cmp->setPredicate(UnsignedPred);
+
+ return true;
+}
+
/// See if LazyValueInfo's ability to exploit edge conditions or range
/// information is sufficient to prove this comparison. Even for local
/// conditions, this can sometimes prove conditions instcombine can't by
/// exploiting range information.
-static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+static bool constantFoldCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
Value *Op0 = Cmp->getOperand(0);
auto *C = dyn_cast<Constant>(Cmp->getOperand(1));
if (!C)
@@ -318,6 +342,17 @@ static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
return true;
}
+static bool processCmp(CmpInst *Cmp, LazyValueInfo *LVI) {
+ if (constantFoldCmp(Cmp, LVI))
+ return true;
+
+ if (auto *ICmp = dyn_cast<ICmpInst>(Cmp))
+ if (processICmp(ICmp, LVI))
+ return true;
+
+ return false;
+}
+
/// Simplify a switch instruction by removing cases which can never fire. If the
/// uselessness of a case could be determined locally then constant propagation
/// would already have figured it out. Instead, walk the predecessors and
@@ -341,7 +376,13 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
// ConstantFoldTerminator() as the underlying SwitchInst can be changed.
SwitchInstProfUpdateWrapper SI(*I);
- for (auto CI = SI->case_begin(), CE = SI->case_end(); CI != CE;) {
+ APInt Low =
+ APInt::getSignedMaxValue(Cond->getType()->getScalarSizeInBits());
+ APInt High =
+ APInt::getSignedMinValue(Cond->getType()->getScalarSizeInBits());
+
+ SwitchInst::CaseIt CI = SI->case_begin();
+ for (auto CE = SI->case_end(); CI != CE;) {
ConstantInt *Case = CI->getCaseValue();
LazyValueInfo::Tristate State =
LVI->getPredicateAt(CmpInst::ICMP_EQ, Cond, Case, I,
@@ -374,9 +415,28 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
break;
}
+ // Get Lower/Upper bound from switch cases.
+ Low = APIntOps::smin(Case->getValue(), Low);
+ High = APIntOps::smax(Case->getValue(), High);
+
// Increment the case iterator since we didn't delete it.
++CI;
}
+
+ // Try to simplify default case as unreachable
+ if (CI == SI->case_end() && SI->getNumCases() != 0 &&
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg())) {
+ const ConstantRange SIRange =
+ LVI->getConstantRange(SI->getCondition(), SI);
+
+ // If the numbered switch cases cover the entire range of the condition,
+ // then the default case is not reachable.
+ if (SIRange.getSignedMin() == Low && SIRange.getSignedMax() == High &&
+ SI->getNumCases() == High - Low + 1) {
+ createUnreachableSwitchDefault(SI, &DTU);
+ Changed = true;
+ }
+ }
}
if (Changed)
@@ -690,7 +750,7 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, LazyValueInfo *LVI) {
// sdiv/srem is UB if divisor is -1 and divident is INT_MIN, so unless we can
// prove that such a combination is impossible, we need to bump the bitwidth.
- if (CRs[1]->contains(APInt::getAllOnesValue(OrigWidth)) &&
+ if (CRs[1]->contains(APInt::getAllOnes(OrigWidth)) &&
CRs[0]->contains(
APInt::getSignedMinValue(MinSignedBits).sextOrSelf(OrigWidth)))
++MinSignedBits;
@@ -1023,49 +1083,48 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
// blocks.
for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
bool BBChanged = false;
- for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
- Instruction *II = &*BI++;
- switch (II->getOpcode()) {
+ for (Instruction &II : llvm::make_early_inc_range(*BB)) {
+ switch (II.getOpcode()) {
case Instruction::Select:
- BBChanged |= processSelect(cast<SelectInst>(II), LVI);
+ BBChanged |= processSelect(cast<SelectInst>(&II), LVI);
break;
case Instruction::PHI:
- BBChanged |= processPHI(cast<PHINode>(II), LVI, DT, SQ);
+ BBChanged |= processPHI(cast<PHINode>(&II), LVI, DT, SQ);
break;
case Instruction::ICmp:
case Instruction::FCmp:
- BBChanged |= processCmp(cast<CmpInst>(II), LVI);
+ BBChanged |= processCmp(cast<CmpInst>(&II), LVI);
break;
case Instruction::Load:
case Instruction::Store:
- BBChanged |= processMemAccess(II, LVI);
+ BBChanged |= processMemAccess(&II, LVI);
break;
case Instruction::Call:
case Instruction::Invoke:
- BBChanged |= processCallSite(cast<CallBase>(*II), LVI);
+ BBChanged |= processCallSite(cast<CallBase>(II), LVI);
break;
case Instruction::SRem:
case Instruction::SDiv:
- BBChanged |= processSDivOrSRem(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processSDivOrSRem(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::UDiv:
case Instruction::URem:
- BBChanged |= processUDivOrURem(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processUDivOrURem(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::AShr:
- BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processAShr(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::SExt:
- BBChanged |= processSExt(cast<SExtInst>(II), LVI);
+ BBChanged |= processSExt(cast<SExtInst>(&II), LVI);
break;
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
case Instruction::Shl:
- BBChanged |= processBinOp(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processBinOp(cast<BinaryOperator>(&II), LVI);
break;
case Instruction::And:
- BBChanged |= processAnd(cast<BinaryOperator>(II), LVI);
+ BBChanged |= processAnd(cast<BinaryOperator>(&II), LVI);
break;
}
}
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 90679bcac4b7..8c4523206070 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -1,9 +1,8 @@
//===- DFAJumpThreading.cpp - Threads a switch statement inside a loop ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -84,8 +83,6 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>
#include <deque>
-#include <unordered_map>
-#include <unordered_set>
using namespace llvm;
@@ -147,8 +144,7 @@ private:
Stack.push_back(SIToUnfold);
while (!Stack.empty()) {
- SelectInstToUnfold SIToUnfold = Stack.back();
- Stack.pop_back();
+ SelectInstToUnfold SIToUnfold = Stack.pop_back_val();
std::vector<SelectInstToUnfold> NewSIsToUnfold;
std::vector<BasicBlock *> NewBBs;
@@ -174,6 +170,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
@@ -350,7 +347,7 @@ struct ClonedBlock {
typedef std::deque<BasicBlock *> PathType;
typedef std::vector<PathType> PathsType;
-typedef std::set<const BasicBlock *> VisitedBlocks;
+typedef SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
typedef std::vector<ClonedBlock> CloneList;
// This data structure keeps track of all blocks that have been cloned. If two
@@ -493,7 +490,7 @@ private:
}
bool isPredictableValue(Value *InpVal, SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.find(InpVal) != SeenValues.end())
+ if (SeenValues.contains(InpVal))
return true;
if (isa<ConstantInt>(InpVal))
@@ -508,7 +505,7 @@ private:
void addInstToQueue(Value *Val, std::deque<Instruction *> &Q,
SmallSet<Value *, 16> &SeenValues) {
- if (SeenValues.find(Val) != SeenValues.end())
+ if (SeenValues.contains(Val))
return;
if (Instruction *I = dyn_cast<Instruction>(Val))
Q.push_back(I);
@@ -533,7 +530,7 @@ private:
return false;
if (isa<PHINode>(SIUse) &&
- SIBB->getSingleSuccessor() != dyn_cast<Instruction>(SIUse)->getParent())
+ SIBB->getSingleSuccessor() != cast<Instruction>(SIUse)->getParent())
return false;
// If select will not be sunk during unfolding, and it is in the same basic
@@ -621,13 +618,9 @@ private:
// Some blocks have multiple edges to the same successor, and this set
// is used to prevent a duplicate path from being generated
SmallSet<BasicBlock *, 4> Successors;
-
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
- BasicBlock *Succ = *SI;
-
- if (Successors.find(Succ) != Successors.end())
+ for (BasicBlock *Succ : successors(BB)) {
+ if (!Successors.insert(Succ).second)
continue;
- Successors.insert(Succ);
// Found a cycle through the SwitchBlock
if (Succ == SwitchBlock) {
@@ -636,7 +629,7 @@ private:
}
// We have encountered a cycle, do not get caught in it
- if (Visited.find(Succ) != Visited.end())
+ if (Visited.contains(Succ))
continue;
PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
@@ -668,15 +661,14 @@ private:
SmallSet<Value *, 16> SeenValues;
while (!Stack.empty()) {
- PHINode *CurPhi = Stack.back();
- Stack.pop_back();
+ PHINode *CurPhi = Stack.pop_back_val();
Res[CurPhi->getParent()] = CurPhi;
SeenValues.insert(CurPhi);
for (Value *Incoming : CurPhi->incoming_values()) {
if (Incoming == FirstDef || isa<ConstantInt>(Incoming) ||
- SeenValues.find(Incoming) != SeenValues.end()) {
+ SeenValues.contains(Incoming)) {
continue;
}
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index d22b3f409585..a8ec8bb97970 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -13,10 +13,10 @@
// in between both MemoryDefs. A bit more concretely:
//
// For all MemoryDefs StartDef:
-// 1. Get the next dominating clobbering MemoryDef (EarlierAccess) by walking
+// 1. Get the next dominating clobbering MemoryDef (MaybeDeadAccess) by walking
// upwards.
-// 2. Check that there are no reads between EarlierAccess and the StartDef by
-// checking all uses starting at EarlierAccess and walking until we see
+// 2. Check that there are no reads between MaybeDeadAccess and the StartDef by
+// checking all uses starting at MaybeDeadAccess and walking until we see
// StartDef.
// 3. For each found CurrentDef, check that:
// 1. There are no barrier instructions between CurrentDef and StartDef (like
@@ -56,6 +56,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
@@ -78,6 +79,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -122,7 +124,7 @@ EnablePartialStoreMerging("enable-dse-partial-store-merging",
static cl::opt<unsigned>
MemorySSAScanLimit("dse-memoryssa-scanlimit", cl::init(150), cl::Hidden,
cl::desc("The number of memory instructions to scan for "
- "dead store elimination (default = 100)"));
+ "dead store elimination (default = 150)"));
static cl::opt<unsigned> MemorySSAUpwardsStepLimit(
"dse-memoryssa-walklimit", cl::init(90), cl::Hidden,
cl::desc("The maximum number of steps while walking upwards to find "
@@ -203,39 +205,6 @@ static bool hasAnalyzableMemoryWrite(Instruction *I,
return false;
}
-/// Return a Location stored to by the specified instruction. If isRemovable
-/// returns true, this function and getLocForRead completely describe the memory
-/// operations for this instruction.
-static MemoryLocation getLocForWrite(Instruction *Inst,
- const TargetLibraryInfo &TLI) {
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return MemoryLocation::get(SI);
-
- // memcpy/memmove/memset.
- if (auto *MI = dyn_cast<AnyMemIntrinsic>(Inst))
- return MemoryLocation::getForDest(MI);
-
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- default:
- return MemoryLocation(); // Unhandled intrinsic.
- case Intrinsic::init_trampoline:
- return MemoryLocation::getAfter(II->getArgOperand(0));
- case Intrinsic::masked_store:
- return MemoryLocation::getForArgument(II, 1, TLI);
- case Intrinsic::lifetime_end: {
- uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- return MemoryLocation(II->getArgOperand(1), Len);
- }
- }
- }
- if (auto *CB = dyn_cast<CallBase>(Inst))
- // All the supported TLI functions so far happen to have dest as their
- // first argument.
- return MemoryLocation::getAfter(CB->getArgOperand(0));
- return MemoryLocation();
-}
-
/// If the value of this instruction and the memory it writes to is unused, may
/// we delete this instruction?
static bool isRemovable(Instruction *I) {
@@ -333,147 +302,146 @@ enum OverwriteResult {
} // end anonymous namespace
/// Check if two instruction are masked stores that completely
-/// overwrite one another. More specifically, \p Later has to
-/// overwrite \p Earlier.
-static OverwriteResult isMaskedStoreOverwrite(const Instruction *Later,
- const Instruction *Earlier,
+/// overwrite one another. More specifically, \p KillingI has to
+/// overwrite \p DeadI.
+static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
BatchAAResults &AA) {
- const auto *IIL = dyn_cast<IntrinsicInst>(Later);
- const auto *IIE = dyn_cast<IntrinsicInst>(Earlier);
- if (IIL == nullptr || IIE == nullptr)
+ const auto *KillingII = dyn_cast<IntrinsicInst>(KillingI);
+ const auto *DeadII = dyn_cast<IntrinsicInst>(DeadI);
+ if (KillingII == nullptr || DeadII == nullptr)
return OW_Unknown;
- if (IIL->getIntrinsicID() != Intrinsic::masked_store ||
- IIE->getIntrinsicID() != Intrinsic::masked_store)
+ if (KillingII->getIntrinsicID() != Intrinsic::masked_store ||
+ DeadII->getIntrinsicID() != Intrinsic::masked_store)
return OW_Unknown;
// Pointers.
- Value *LP = IIL->getArgOperand(1)->stripPointerCasts();
- Value *EP = IIE->getArgOperand(1)->stripPointerCasts();
- if (LP != EP && !AA.isMustAlias(LP, EP))
+ Value *KillingPtr = KillingII->getArgOperand(1)->stripPointerCasts();
+ Value *DeadPtr = DeadII->getArgOperand(1)->stripPointerCasts();
+ if (KillingPtr != DeadPtr && !AA.isMustAlias(KillingPtr, DeadPtr))
return OW_Unknown;
// Masks.
- // TODO: check that Later's mask is a superset of the Earlier's mask.
- if (IIL->getArgOperand(3) != IIE->getArgOperand(3))
+ // TODO: check that KillingII's mask is a superset of the DeadII's mask.
+ if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3))
return OW_Unknown;
return OW_Complete;
}
-/// Return 'OW_Complete' if a store to the 'Later' location completely
-/// overwrites a store to the 'Earlier' location, 'OW_End' if the end of the
-/// 'Earlier' location is completely overwritten by 'Later', 'OW_Begin' if the
-/// beginning of the 'Earlier' location is overwritten by 'Later'.
-/// 'OW_PartialEarlierWithFullLater' means that an earlier (big) store was
-/// overwritten by a latter (smaller) store which doesn't write outside the big
+/// Return 'OW_Complete' if a store to the 'KillingLoc' location completely
+/// overwrites a store to the 'DeadLoc' location, 'OW_End' if the end of the
+/// 'DeadLoc' location is completely overwritten by 'KillingLoc', 'OW_Begin'
+/// if the beginning of the 'DeadLoc' location is overwritten by 'KillingLoc'.
+/// 'OW_PartialEarlierWithFullLater' means that a dead (big) store was
+/// overwritten by a killing (smaller) store which doesn't write outside the big
/// store's memory locations. Returns 'OW_Unknown' if nothing can be determined.
-/// NOTE: This function must only be called if both \p Later and \p Earlier
-/// write to the same underlying object with valid \p EarlierOff and \p
-/// LaterOff.
-static OverwriteResult isPartialOverwrite(const MemoryLocation &Later,
- const MemoryLocation &Earlier,
- int64_t EarlierOff, int64_t LaterOff,
- Instruction *DepWrite,
+/// NOTE: This function must only be called if both \p KillingLoc and \p
+/// DeadLoc belong to the same underlying object with valid \p KillingOff and
+/// \p DeadOff.
+static OverwriteResult isPartialOverwrite(const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t KillingOff, int64_t DeadOff,
+ Instruction *DeadI,
InstOverlapIntervalsTy &IOL) {
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// We may now overlap, although the overlap is not complete. There might also
// be other incomplete overlaps, and together, they might cover the complete
- // earlier write.
+ // dead store.
// Note: The correctness of this logic depends on the fact that this function
// is not even called providing DepWrite when there are any intervening reads.
if (EnablePartialOverwriteTracking &&
- LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= EarlierOff) {
+ KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= DeadOff) {
// Insert our part of the overlap into the map.
- auto &IM = IOL[DepWrite];
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: Earlier [" << EarlierOff
- << ", " << int64_t(EarlierOff + EarlierSize)
- << ") Later [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ auto &IM = IOL[DeadI];
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite: DeadLoc [" << DeadOff << ", "
+ << int64_t(DeadOff + DeadSize) << ") KillingLoc ["
+ << KillingOff << ", " << int64_t(KillingOff + KillingSize)
+ << ")\n");
// Make sure that we only insert non-overlapping intervals and combine
// adjacent intervals. The intervals are stored in the map with the ending
// offset as the key (in the half-open sense) and the starting offset as
// the value.
- int64_t LaterIntStart = LaterOff, LaterIntEnd = LaterOff + LaterSize;
+ int64_t KillingIntStart = KillingOff;
+ int64_t KillingIntEnd = KillingOff + KillingSize;
- // Find any intervals ending at, or after, LaterIntStart which start
- // before LaterIntEnd.
- auto ILI = IM.lower_bound(LaterIntStart);
- if (ILI != IM.end() && ILI->second <= LaterIntEnd) {
+ // Find any intervals ending at, or after, KillingIntStart which start
+ // before KillingIntEnd.
+ auto ILI = IM.lower_bound(KillingIntStart);
+ if (ILI != IM.end() && ILI->second <= KillingIntEnd) {
// This existing interval is overlapped with the current store somewhere
- // in [LaterIntStart, LaterIntEnd]. Merge them by erasing the existing
+ // in [KillingIntStart, KillingIntEnd]. Merge them by erasing the existing
// intervals and adjusting our start and end.
- LaterIntStart = std::min(LaterIntStart, ILI->second);
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ KillingIntStart = std::min(KillingIntStart, ILI->second);
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
// Continue erasing and adjusting our end in case other previous
// intervals are also overlapped with the current store.
//
- // |--- ealier 1 ---| |--- ealier 2 ---|
- // |------- later---------|
+ // |--- dead 1 ---| |--- dead 2 ---|
+ // |------- killing---------|
//
- while (ILI != IM.end() && ILI->second <= LaterIntEnd) {
- assert(ILI->second > LaterIntStart && "Unexpected interval");
- LaterIntEnd = std::max(LaterIntEnd, ILI->first);
+ while (ILI != IM.end() && ILI->second <= KillingIntEnd) {
+ assert(ILI->second > KillingIntStart && "Unexpected interval");
+ KillingIntEnd = std::max(KillingIntEnd, ILI->first);
ILI = IM.erase(ILI);
}
}
- IM[LaterIntEnd] = LaterIntStart;
+ IM[KillingIntEnd] = KillingIntStart;
ILI = IM.begin();
- if (ILI->second <= EarlierOff &&
- ILI->first >= int64_t(EarlierOff + EarlierSize)) {
- LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: Earlier ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") Composite Later [" << ILI->second << ", "
+ if (ILI->second <= DeadOff && ILI->first >= int64_t(DeadOff + DeadSize)) {
+ LLVM_DEBUG(dbgs() << "DSE: Full overwrite from partials: DeadLoc ["
+ << DeadOff << ", " << int64_t(DeadOff + DeadSize)
+ << ") Composite KillingLoc [" << ILI->second << ", "
<< ILI->first << ")\n");
++NumCompletePartials;
return OW_Complete;
}
}
- // Check for an earlier store which writes to all the memory locations that
- // the later store writes to.
- if (EnablePartialStoreMerging && LaterOff >= EarlierOff &&
- int64_t(EarlierOff + EarlierSize) > LaterOff &&
- uint64_t(LaterOff - EarlierOff) + LaterSize <= EarlierSize) {
- LLVM_DEBUG(dbgs() << "DSE: Partial overwrite an earlier load ["
- << EarlierOff << ", "
- << int64_t(EarlierOff + EarlierSize)
- << ") by a later store [" << LaterOff << ", "
- << int64_t(LaterOff + LaterSize) << ")\n");
+ // Check for a dead store which writes to all the memory locations that
+ // the killing store writes to.
+ if (EnablePartialStoreMerging && KillingOff >= DeadOff &&
+ int64_t(DeadOff + DeadSize) > KillingOff &&
+ uint64_t(KillingOff - DeadOff) + KillingSize <= DeadSize) {
+ LLVM_DEBUG(dbgs() << "DSE: Partial overwrite a dead load [" << DeadOff
+ << ", " << int64_t(DeadOff + DeadSize)
+ << ") by a killing store [" << KillingOff << ", "
+ << int64_t(KillingOff + KillingSize) << ")\n");
// TODO: Maybe come up with a better name?
return OW_PartialEarlierWithFullLater;
}
- // Another interesting case is if the later store overwrites the end of the
- // earlier store.
+ // Another interesting case is if the killing store overwrites the end of the
+ // dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
- // In this case we may want to trim the size of earlier to avoid generating
- // writes to addresses which will definitely be overwritten later
+ // In this case we may want to trim the size of dead store to avoid
+ // generating stores to addresses which will definitely be overwritten killing
+ // store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + EarlierSize) &&
- int64_t(LaterOff + LaterSize) >= int64_t(EarlierOff + EarlierSize)))
+ (KillingOff > DeadOff && KillingOff < int64_t(DeadOff + DeadSize) &&
+ int64_t(KillingOff + KillingSize) >= int64_t(DeadOff + DeadSize)))
return OW_End;
- // Finally, we also need to check if the later store overwrites the beginning
- // of the earlier store.
+ // Finally, we also need to check if the killing store overwrites the
+ // beginning of the dead store.
//
- // |--earlier--|
- // |-- later --|
+ // |--dead--|
+ // |-- killing --|
//
// In this case we may want to move the destination address and trim the size
- // of earlier to avoid generating writes to addresses which will definitely
- // be overwritten later.
+ // of dead store to avoid generating stores to addresses which will definitely
+ // be overwritten killing store.
if (!EnablePartialOverwriteTracking &&
- (LaterOff <= EarlierOff && int64_t(LaterOff + LaterSize) > EarlierOff)) {
- assert(int64_t(LaterOff + LaterSize) < int64_t(EarlierOff + EarlierSize) &&
+ (KillingOff <= DeadOff && int64_t(KillingOff + KillingSize) > DeadOff)) {
+ assert(int64_t(KillingOff + KillingSize) < int64_t(DeadOff + DeadSize) &&
"Expect to be handled as OW_Complete");
return OW_Begin;
}
@@ -505,7 +473,12 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
BasicBlock::iterator SecondBBI(SecondI);
BasicBlock *FirstBB = FirstI->getParent();
BasicBlock *SecondBB = SecondI->getParent();
- MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+ MemoryLocation MemLoc;
+ if (auto *MemSet = dyn_cast<MemSetInst>(SecondI))
+ MemLoc = MemoryLocation::getForDest(MemSet);
+ else
+ MemLoc = MemoryLocation::get(SecondI);
+
auto *MemLocPtr = const_cast<Value *>(MemLoc.Ptr);
// Start checking the SecondBB.
@@ -568,11 +541,11 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
return true;
}
-static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
- uint64_t &EarlierSize, int64_t LaterStart,
- uint64_t LaterSize, bool IsOverwriteEnd) {
- auto *EarlierIntrinsic = cast<AnyMemIntrinsic>(EarlierWrite);
- Align PrefAlign = EarlierIntrinsic->getDestAlign().valueOrOne();
+static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
+ uint64_t &DeadSize, int64_t KillingStart,
+ uint64_t KillingSize, bool IsOverwriteEnd) {
+ auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
+ Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
// We assume that memet/memcpy operates in chunks of the "largest" native
// type size and aligned on the same value. That means optimal start and size
@@ -593,19 +566,19 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
// Compute start and size of the region to remove. Make sure 'PrefAlign' is
// maintained on the remaining store.
if (IsOverwriteEnd) {
- // Calculate required adjustment for 'LaterStart'in order to keep remaining
- // store size aligned on 'PerfAlign'.
+ // Calculate required adjustment for 'KillingStart' in order to keep
+ // remaining store size aligned on 'PerfAlign'.
uint64_t Off =
- offsetToAlignment(uint64_t(LaterStart - EarlierStart), PrefAlign);
- ToRemoveStart = LaterStart + Off;
- if (EarlierSize <= uint64_t(ToRemoveStart - EarlierStart))
+ offsetToAlignment(uint64_t(KillingStart - DeadStart), PrefAlign);
+ ToRemoveStart = KillingStart + Off;
+ if (DeadSize <= uint64_t(ToRemoveStart - DeadStart))
return false;
- ToRemoveSize = EarlierSize - uint64_t(ToRemoveStart - EarlierStart);
+ ToRemoveSize = DeadSize - uint64_t(ToRemoveStart - DeadStart);
} else {
- ToRemoveStart = EarlierStart;
- assert(LaterSize >= uint64_t(EarlierStart - LaterStart) &&
+ ToRemoveStart = DeadStart;
+ assert(KillingSize >= uint64_t(DeadStart - KillingStart) &&
"Not overlapping accesses?");
- ToRemoveSize = LaterSize - uint64_t(EarlierStart - LaterStart);
+ ToRemoveSize = KillingSize - uint64_t(DeadStart - KillingStart);
// Calculate required adjustment for 'ToRemoveSize'in order to keep
// start of the remaining store aligned on 'PerfAlign'.
uint64_t Off = offsetToAlignment(ToRemoveSize, PrefAlign);
@@ -619,10 +592,10 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
assert(ToRemoveSize > 0 && "Shouldn't reach here if nothing to remove");
- assert(EarlierSize > ToRemoveSize && "Can't remove more than original size");
+ assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
- uint64_t NewSize = EarlierSize - ToRemoveSize;
- if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(EarlierWrite)) {
+ uint64_t NewSize = DeadSize - ToRemoveSize;
+ if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
// When shortening an atomic memory intrinsic, the newly shortened
// length must remain an integer multiple of the element size.
const uint32_t ElementSize = AMI->getElementSizeInBytes();
@@ -631,65 +604,62 @@ static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierStart,
}
LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW "
- << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
- << *EarlierWrite << "\n KILLER [" << ToRemoveStart << ", "
+ << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *DeadI
+ << "\n KILLER [" << ToRemoveStart << ", "
<< int64_t(ToRemoveStart + ToRemoveSize) << ")\n");
- Value *EarlierWriteLength = EarlierIntrinsic->getLength();
- Value *TrimmedLength =
- ConstantInt::get(EarlierWriteLength->getType(), NewSize);
- EarlierIntrinsic->setLength(TrimmedLength);
- EarlierIntrinsic->setDestAlignment(PrefAlign);
+ Value *DeadWriteLength = DeadIntrinsic->getLength();
+ Value *TrimmedLength = ConstantInt::get(DeadWriteLength->getType(), NewSize);
+ DeadIntrinsic->setLength(TrimmedLength);
+ DeadIntrinsic->setDestAlignment(PrefAlign);
if (!IsOverwriteEnd) {
- Value *OrigDest = EarlierIntrinsic->getRawDest();
+ Value *OrigDest = DeadIntrinsic->getRawDest();
Type *Int8PtrTy =
- Type::getInt8PtrTy(EarlierIntrinsic->getContext(),
+ Type::getInt8PtrTy(DeadIntrinsic->getContext(),
OrigDest->getType()->getPointerAddressSpace());
Value *Dest = OrigDest;
if (OrigDest->getType() != Int8PtrTy)
- Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", EarlierWrite);
+ Dest = CastInst::CreatePointerCast(OrigDest, Int8PtrTy, "", DeadI);
Value *Indices[1] = {
- ConstantInt::get(EarlierWriteLength->getType(), ToRemoveSize)};
+ ConstantInt::get(DeadWriteLength->getType(), ToRemoveSize)};
Instruction *NewDestGEP = GetElementPtrInst::CreateInBounds(
- Type::getInt8Ty(EarlierIntrinsic->getContext()),
- Dest, Indices, "", EarlierWrite);
- NewDestGEP->setDebugLoc(EarlierIntrinsic->getDebugLoc());
+ Type::getInt8Ty(DeadIntrinsic->getContext()), Dest, Indices, "", DeadI);
+ NewDestGEP->setDebugLoc(DeadIntrinsic->getDebugLoc());
if (NewDestGEP->getType() != OrigDest->getType())
NewDestGEP = CastInst::CreatePointerCast(NewDestGEP, OrigDest->getType(),
- "", EarlierWrite);
- EarlierIntrinsic->setDest(NewDestGEP);
+ "", DeadI);
+ DeadIntrinsic->setDest(NewDestGEP);
}
- // Finally update start and size of earlier access.
+ // Finally update start and size of dead access.
if (!IsOverwriteEnd)
- EarlierStart += ToRemoveSize;
- EarlierSize = NewSize;
+ DeadStart += ToRemoveSize;
+ DeadSize = NewSize;
return true;
}
-static bool tryToShortenEnd(Instruction *EarlierWrite,
- OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
+static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
return false;
OverlapIntervalsTy::iterator OII = --IntervalMap.end();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart > EarlierStart &&
- // Note: "LaterStart - EarlierStart" is known to be positive due to
+ if (KillingStart > DeadStart &&
+ // Note: "KillingStart - KillingStart" is known to be positive due to
// preceding check.
- (uint64_t)(LaterStart - EarlierStart) < EarlierSize &&
- // Note: "EarlierSize - (uint64_t)(LaterStart - EarlierStart)" is known to
+ (uint64_t)(KillingStart - DeadStart) < DeadSize &&
+ // Note: "DeadSize - (uint64_t)(KillingStart - DeadStart)" is known to
// be non negative due to preceding checks.
- LaterSize >= EarlierSize - (uint64_t)(LaterStart - EarlierStart)) {
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, true)) {
+ KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ true)) {
IntervalMap.erase(OII);
return true;
}
@@ -697,28 +667,28 @@ static bool tryToShortenEnd(Instruction *EarlierWrite,
return false;
}
-static bool tryToShortenBegin(Instruction *EarlierWrite,
+static bool tryToShortenBegin(Instruction *DeadI,
OverlapIntervalsTy &IntervalMap,
- int64_t &EarlierStart, uint64_t &EarlierSize) {
- if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
+ int64_t &DeadStart, uint64_t &DeadSize) {
+ if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
return false;
OverlapIntervalsTy::iterator OII = IntervalMap.begin();
- int64_t LaterStart = OII->second;
- uint64_t LaterSize = OII->first - LaterStart;
+ int64_t KillingStart = OII->second;
+ uint64_t KillingSize = OII->first - KillingStart;
- assert(OII->first - LaterStart >= 0 && "Size expected to be positive");
+ assert(OII->first - KillingStart >= 0 && "Size expected to be positive");
- if (LaterStart <= EarlierStart &&
- // Note: "EarlierStart - LaterStart" is known to be non negative due to
+ if (KillingStart <= DeadStart &&
+ // Note: "DeadStart - KillingStart" is known to be non negative due to
// preceding check.
- LaterSize > (uint64_t)(EarlierStart - LaterStart)) {
- // Note: "LaterSize - (uint64_t)(EarlierStart - LaterStart)" is known to be
- // positive due to preceding checks.
- assert(LaterSize - (uint64_t)(EarlierStart - LaterStart) < EarlierSize &&
+ KillingSize > (uint64_t)(DeadStart - KillingStart)) {
+ // Note: "KillingSize - (uint64_t)(DeadStart - DeadStart)" is known to
+ // be positive due to preceding checks.
+ assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
"Should have been handled as OW_Complete");
- if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
- LaterSize, false)) {
+ if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
+ false)) {
IntervalMap.erase(OII);
return true;
}
@@ -726,71 +696,48 @@ static bool tryToShortenBegin(Instruction *EarlierWrite,
return false;
}
-static bool removePartiallyOverlappedStores(const DataLayout &DL,
- InstOverlapIntervalsTy &IOL,
- const TargetLibraryInfo &TLI) {
- bool Changed = false;
- for (auto OI : IOL) {
- Instruction *EarlierWrite = OI.first;
- MemoryLocation Loc = getLocForWrite(EarlierWrite, TLI);
- assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
-
- const Value *Ptr = Loc.Ptr->stripPointerCasts();
- int64_t EarlierStart = 0;
- uint64_t EarlierSize = Loc.Size.getValue();
- GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
- OverlapIntervalsTy &IntervalMap = OI.second;
- Changed |=
- tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- if (IntervalMap.empty())
- continue;
- Changed |=
- tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
- }
- return Changed;
-}
-
-static Constant *tryToMergePartialOverlappingStores(
- StoreInst *Earlier, StoreInst *Later, int64_t InstWriteOffset,
- int64_t DepWriteOffset, const DataLayout &DL, BatchAAResults &AA,
- DominatorTree *DT) {
-
- if (Earlier && isa<ConstantInt>(Earlier->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Earlier->getValueOperand()->getType()) &&
- Later && isa<ConstantInt>(Later->getValueOperand()) &&
- DL.typeSizeEqualsStoreSize(Later->getValueOperand()->getType()) &&
- memoryIsNotModifiedBetween(Earlier, Later, AA, DL, DT)) {
+static Constant *
+tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
+ int64_t KillingOffset, int64_t DeadOffset,
+ const DataLayout &DL, BatchAAResults &AA,
+ DominatorTree *DT) {
+
+ if (DeadI && isa<ConstantInt>(DeadI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(DeadI->getValueOperand()->getType()) &&
+ KillingI && isa<ConstantInt>(KillingI->getValueOperand()) &&
+ DL.typeSizeEqualsStoreSize(KillingI->getValueOperand()->getType()) &&
+ memoryIsNotModifiedBetween(DeadI, KillingI, AA, DL, DT)) {
// If the store we find is:
// a) partially overwritten by the store to 'Loc'
- // b) the later store is fully contained in the earlier one and
+ // b) the killing store is fully contained in the dead one and
// c) they both have a constant value
// d) none of the two stores need padding
- // Merge the two stores, replacing the earlier store's value with a
+ // Merge the two stores, replacing the dead store's value with a
// merge of both values.
// TODO: Deal with other constant types (vectors, etc), and probably
// some mem intrinsics (if needed)
- APInt EarlierValue =
- cast<ConstantInt>(Earlier->getValueOperand())->getValue();
- APInt LaterValue = cast<ConstantInt>(Later->getValueOperand())->getValue();
- unsigned LaterBits = LaterValue.getBitWidth();
- assert(EarlierValue.getBitWidth() > LaterValue.getBitWidth());
- LaterValue = LaterValue.zext(EarlierValue.getBitWidth());
+ APInt DeadValue = cast<ConstantInt>(DeadI->getValueOperand())->getValue();
+ APInt KillingValue =
+ cast<ConstantInt>(KillingI->getValueOperand())->getValue();
+ unsigned KillingBits = KillingValue.getBitWidth();
+ assert(DeadValue.getBitWidth() > KillingValue.getBitWidth());
+ KillingValue = KillingValue.zext(DeadValue.getBitWidth());
// Offset of the smaller store inside the larger store
- unsigned BitOffsetDiff = (InstWriteOffset - DepWriteOffset) * 8;
- unsigned LShiftAmount = DL.isBigEndian() ? EarlierValue.getBitWidth() -
- BitOffsetDiff - LaterBits
- : BitOffsetDiff;
- APInt Mask = APInt::getBitsSet(EarlierValue.getBitWidth(), LShiftAmount,
- LShiftAmount + LaterBits);
+ unsigned BitOffsetDiff = (KillingOffset - DeadOffset) * 8;
+ unsigned LShiftAmount =
+ DL.isBigEndian() ? DeadValue.getBitWidth() - BitOffsetDiff - KillingBits
+ : BitOffsetDiff;
+ APInt Mask = APInt::getBitsSet(DeadValue.getBitWidth(), LShiftAmount,
+ LShiftAmount + KillingBits);
// Clear the bits we'll be replacing, then OR with the smaller
// store, shifted appropriately.
- APInt Merged = (EarlierValue & ~Mask) | (LaterValue << LShiftAmount);
- LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Earlier: " << *Earlier
- << "\n Later: " << *Later
+ APInt Merged = (DeadValue & ~Mask) | (KillingValue << LShiftAmount);
+ LLVM_DEBUG(dbgs() << "DSE: Merge Stores:\n Dead: " << *DeadI
+ << "\n Killing: " << *KillingI
<< "\n Merged Value: " << Merged << '\n');
- return ConstantInt::get(Earlier->getValueOperand()->getType(), Merged);
+ return ConstantInt::get(DeadI->getValueOperand()->getType(), Merged);
}
return nullptr;
}
@@ -819,14 +766,17 @@ bool isNoopIntrinsic(Instruction *I) {
}
// Check if we can ignore \p D for DSE.
-bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
+bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller,
+ const TargetLibraryInfo &TLI) {
Instruction *DI = D->getMemoryInst();
// Calls that only access inaccessible memory cannot read or write any memory
// locations we consider for elimination.
if (auto *CB = dyn_cast<CallBase>(DI))
- if (CB->onlyAccessesInaccessibleMemory())
+ if (CB->onlyAccessesInaccessibleMemory()) {
+ if (isAllocLikeFn(DI, &TLI))
+ return false;
return true;
-
+ }
// We can eliminate stores to locations not visible to the caller across
// throwing instructions.
if (DI->mayThrow() && !DefVisibleToCaller)
@@ -841,7 +791,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
return true;
// Skip intrinsics that do not really read or modify memory.
- if (isNoopIntrinsic(D->getMemoryInst()))
+ if (isNoopIntrinsic(DI))
return true;
return false;
@@ -850,6 +800,7 @@ bool canSkipDef(MemoryDef *D, bool DefVisibleToCaller) {
struct DSEState {
Function &F;
AliasAnalysis &AA;
+ EarliestEscapeInfo EI;
/// The single BatchAA instance that is used to cache AA queries. It will
/// not be invalidated over the whole run. This is safe, because:
@@ -892,30 +843,29 @@ struct DSEState {
/// basic block.
DenseMap<BasicBlock *, InstOverlapIntervalsTy> IOLs;
+ // Class contains self-reference, make sure it's not copied/moved.
+ DSEState(const DSEState &) = delete;
+ DSEState &operator=(const DSEState &) = delete;
+
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
const LoopInfo &LI)
- : F(F), AA(AA), BatchAA(AA), MSSA(MSSA), DT(DT), PDT(PDT), TLI(TLI),
- DL(F.getParent()->getDataLayout()), LI(LI) {}
-
- static DSEState get(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
- DominatorTree &DT, PostDominatorTree &PDT,
- const TargetLibraryInfo &TLI, const LoopInfo &LI) {
- DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
+ : F(F), AA(AA), EI(DT, LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
+ PDT(PDT), TLI(TLI), DL(F.getParent()->getDataLayout()), LI(LI) {
// Collect blocks with throwing instructions not modeled in MemorySSA and
// alloc-like objects.
unsigned PO = 0;
for (BasicBlock *BB : post_order(&F)) {
- State.PostOrderNumbers[BB] = PO++;
+ PostOrderNumbers[BB] = PO++;
for (Instruction &I : *BB) {
MemoryAccess *MA = MSSA.getMemoryAccess(&I);
if (I.mayThrow() && !MA)
- State.ThrowingBlocks.insert(I.getParent());
+ ThrowingBlocks.insert(I.getParent());
auto *MD = dyn_cast_or_null<MemoryDef>(MA);
- if (MD && State.MemDefs.size() < MemorySSADefsPerBlockLimit &&
- (State.getLocForWriteEx(&I) || State.isMemTerminatorInst(&I)))
- State.MemDefs.push_back(MD);
+ if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit &&
+ (getLocForWriteEx(&I) || isMemTerminatorInst(&I)))
+ MemDefs.push_back(MD);
}
}
@@ -925,131 +875,134 @@ struct DSEState {
if (AI.hasPassPointeeByValueCopyAttr()) {
// For byval, the caller doesn't know the address of the allocation.
if (AI.hasByValAttr())
- State.InvisibleToCallerBeforeRet.insert({&AI, true});
- State.InvisibleToCallerAfterRet.insert({&AI, true});
+ InvisibleToCallerBeforeRet.insert({&AI, true});
+ InvisibleToCallerAfterRet.insert({&AI, true});
}
// Collect whether there is any irreducible control flow in the function.
- State.ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
-
- return State;
+ ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
}
- /// Return 'OW_Complete' if a store to the 'Later' location (by \p LaterI
- /// instruction) completely overwrites a store to the 'Earlier' location.
- /// (by \p EarlierI instruction).
- /// Return OW_MaybePartial if \p Later does not completely overwrite
- /// \p Earlier, but they both write to the same underlying object. In that
- /// case, use isPartialOverwrite to check if \p Later partially overwrites
- /// \p Earlier. Returns 'OW_Unknown' if nothing can be determined.
- OverwriteResult
- isOverwrite(const Instruction *LaterI, const Instruction *EarlierI,
- const MemoryLocation &Later, const MemoryLocation &Earlier,
- int64_t &EarlierOff, int64_t &LaterOff) {
+ /// Return 'OW_Complete' if a store to the 'KillingLoc' location (by \p
+ /// KillingI instruction) completely overwrites a store to the 'DeadLoc'
+ /// location (by \p DeadI instruction).
+ /// Return OW_MaybePartial if \p KillingI does not completely overwrite
+ /// \p DeadI, but they both write to the same underlying object. In that
+ /// case, use isPartialOverwrite to check if \p KillingI partially overwrites
+ /// \p DeadI. Returns 'OW_Unknown' if nothing can be determined.
+ OverwriteResult isOverwrite(const Instruction *KillingI,
+ const Instruction *DeadI,
+ const MemoryLocation &KillingLoc,
+ const MemoryLocation &DeadLoc,
+ int64_t &KillingOff, int64_t &DeadOff) {
// AliasAnalysis does not always account for loops. Limit overwrite checks
- // to dependencies for which we can guarantee they are independant of any
+ // to dependencies for which we can guarantee they are independent of any
// loops they are in.
- if (!isGuaranteedLoopIndependent(EarlierI, LaterI, Earlier))
+ if (!isGuaranteedLoopIndependent(DeadI, KillingI, DeadLoc))
return OW_Unknown;
// FIXME: Vet that this works for size upper-bounds. Seems unlikely that we'll
// get imprecise values here, though (except for unknown sizes).
- if (!Later.Size.isPrecise() || !Earlier.Size.isPrecise()) {
+ if (!KillingLoc.Size.isPrecise() || !DeadLoc.Size.isPrecise()) {
// In case no constant size is known, try to an IR values for the number
// of bytes written and check if they match.
- const auto *LaterMemI = dyn_cast<MemIntrinsic>(LaterI);
- const auto *EarlierMemI = dyn_cast<MemIntrinsic>(EarlierI);
- if (LaterMemI && EarlierMemI) {
- const Value *LaterV = LaterMemI->getLength();
- const Value *EarlierV = EarlierMemI->getLength();
- if (LaterV == EarlierV && BatchAA.isMustAlias(Earlier, Later))
+ const auto *KillingMemI = dyn_cast<MemIntrinsic>(KillingI);
+ const auto *DeadMemI = dyn_cast<MemIntrinsic>(DeadI);
+ if (KillingMemI && DeadMemI) {
+ const Value *KillingV = KillingMemI->getLength();
+ const Value *DeadV = DeadMemI->getLength();
+ if (KillingV == DeadV && BatchAA.isMustAlias(DeadLoc, KillingLoc))
return OW_Complete;
}
// Masked stores have imprecise locations, but we can reason about them
// to some extent.
- return isMaskedStoreOverwrite(LaterI, EarlierI, BatchAA);
+ return isMaskedStoreOverwrite(KillingI, DeadI, BatchAA);
}
- const uint64_t LaterSize = Later.Size.getValue();
- const uint64_t EarlierSize = Earlier.Size.getValue();
+ const uint64_t KillingSize = KillingLoc.Size.getValue();
+ const uint64_t DeadSize = DeadLoc.Size.getValue();
// Query the alias information
- AliasResult AAR = BatchAA.alias(Later, Earlier);
+ AliasResult AAR = BatchAA.alias(KillingLoc, DeadLoc);
// If the start pointers are the same, we just have to compare sizes to see if
- // the later store was larger than the earlier store.
+ // the killing store was larger than the dead store.
if (AAR == AliasResult::MustAlias) {
- // Make sure that the Later size is >= the Earlier size.
- if (LaterSize >= EarlierSize)
+ // Make sure that the KillingSize size is >= the DeadSize size.
+ if (KillingSize >= DeadSize)
return OW_Complete;
}
// If we hit a partial alias we may have a full overwrite
if (AAR == AliasResult::PartialAlias && AAR.hasOffset()) {
int32_t Off = AAR.getOffset();
- if (Off >= 0 && (uint64_t)Off + EarlierSize <= LaterSize)
+ if (Off >= 0 && (uint64_t)Off + DeadSize <= KillingSize)
return OW_Complete;
}
- // Check to see if the later store is to the entire object (either a global,
- // an alloca, or a byval/inalloca argument). If so, then it clearly
+ // Check to see if the killing store is to the entire object (either a
+ // global, an alloca, or a byval/inalloca argument). If so, then it clearly
// overwrites any other store to the same object.
- const Value *P1 = Earlier.Ptr->stripPointerCasts();
- const Value *P2 = Later.Ptr->stripPointerCasts();
- const Value *UO1 = getUnderlyingObject(P1), *UO2 = getUnderlyingObject(P2);
+ const Value *DeadPtr = DeadLoc.Ptr->stripPointerCasts();
+ const Value *KillingPtr = KillingLoc.Ptr->stripPointerCasts();
+ const Value *DeadUndObj = getUnderlyingObject(DeadPtr);
+ const Value *KillingUndObj = getUnderlyingObject(KillingPtr);
// If we can't resolve the same pointers to the same object, then we can't
// analyze them at all.
- if (UO1 != UO2)
+ if (DeadUndObj != KillingUndObj)
return OW_Unknown;
- // If the "Later" store is to a recognizable object, get its size.
- uint64_t ObjectSize = getPointerSize(UO2, DL, TLI, &F);
- if (ObjectSize != MemoryLocation::UnknownSize)
- if (ObjectSize == LaterSize && ObjectSize >= EarlierSize)
+ // If the KillingI store is to a recognizable object, get its size.
+ uint64_t KillingUndObjSize = getPointerSize(KillingUndObj, DL, TLI, &F);
+ if (KillingUndObjSize != MemoryLocation::UnknownSize)
+ if (KillingUndObjSize == KillingSize && KillingUndObjSize >= DeadSize)
return OW_Complete;
// Okay, we have stores to two completely different pointers. Try to
// decompose the pointer into a "base + constant_offset" form. If the base
// pointers are equal, then we can reason about the two stores.
- EarlierOff = 0;
- LaterOff = 0;
- const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, DL);
- const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, DL);
-
- // If the base pointers still differ, we have two completely different stores.
- if (BP1 != BP2)
+ DeadOff = 0;
+ KillingOff = 0;
+ const Value *DeadBasePtr =
+ GetPointerBaseWithConstantOffset(DeadPtr, DeadOff, DL);
+ const Value *KillingBasePtr =
+ GetPointerBaseWithConstantOffset(KillingPtr, KillingOff, DL);
+
+ // If the base pointers still differ, we have two completely different
+ // stores.
+ if (DeadBasePtr != KillingBasePtr)
return OW_Unknown;
- // The later access completely overlaps the earlier store if and only if
- // both start and end of the earlier one is "inside" the later one:
- // |<->|--earlier--|<->|
- // |-------later-------|
+ // The killing access completely overlaps the dead store if and only if
+ // both start and end of the dead one is "inside" the killing one:
+ // |<->|--dead--|<->|
+ // |-----killing------|
// Accesses may overlap if and only if start of one of them is "inside"
// another one:
- // |<->|--earlier--|<----->|
- // |-------later-------|
+ // |<->|--dead--|<-------->|
+ // |-------killing--------|
// OR
- // |----- earlier -----|
- // |<->|---later---|<----->|
+ // |-------dead-------|
+ // |<->|---killing---|<----->|
//
// We have to be careful here as *Off is signed while *.Size is unsigned.
- // Check if the earlier access starts "not before" the later one.
- if (EarlierOff >= LaterOff) {
- // If the earlier access ends "not after" the later access then the earlier
- // one is completely overwritten by the later one.
- if (uint64_t(EarlierOff - LaterOff) + EarlierSize <= LaterSize)
+ // Check if the dead access starts "not before" the killing one.
+ if (DeadOff >= KillingOff) {
+ // If the dead access ends "not after" the killing access then the
+ // dead one is completely overwritten by the killing one.
+ if (uint64_t(DeadOff - KillingOff) + DeadSize <= KillingSize)
return OW_Complete;
- // If start of the earlier access is "before" end of the later access then
- // accesses overlap.
- else if ((uint64_t)(EarlierOff - LaterOff) < LaterSize)
+ // If start of the dead access is "before" end of the killing access
+ // then accesses overlap.
+ else if ((uint64_t)(DeadOff - KillingOff) < KillingSize)
return OW_MaybePartial;
}
- // If start of the later access is "before" end of the earlier access then
+ // If start of the killing access is "before" end of the dead access then
// accesses overlap.
- else if ((uint64_t)(LaterOff - EarlierOff) < EarlierSize) {
+ else if ((uint64_t)(KillingOff - DeadOff) < DeadSize) {
return OW_MaybePartial;
}
@@ -1106,8 +1059,13 @@ struct DSEState {
LibFunc LF;
if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) {
switch (LF) {
- case LibFunc_strcpy:
case LibFunc_strncpy:
+ if (const auto *Len = dyn_cast<ConstantInt>(CB->getArgOperand(2)))
+ return MemoryLocation(CB->getArgOperand(0),
+ LocationSize::precise(Len->getZExtValue()),
+ CB->getAAMetadata());
+ LLVM_FALLTHROUGH;
+ case LibFunc_strcpy:
case LibFunc_strcat:
case LibFunc_strncat:
return {MemoryLocation::getAfter(CB->getArgOperand(0))};
@@ -1145,8 +1103,8 @@ struct DSEState {
int64_t InstWriteOffset, DepWriteOffset;
if (auto CC = getLocForWriteEx(UseInst))
- return isOverwrite(UseInst, DefInst, *CC, DefLoc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
return false;
}
@@ -1248,9 +1206,10 @@ struct DSEState {
const Value *LocUO = getUnderlyingObject(Loc.Ptr);
return BatchAA.isMustAlias(TermLoc.Ptr, LocUO);
}
- int64_t InstWriteOffset, DepWriteOffset;
- return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, DepWriteOffset,
- InstWriteOffset) == OW_Complete;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ return isOverwrite(MaybeTerm, AccessI, TermLoc, Loc, InstWriteOffset,
+ DepWriteOffset) == OW_Complete;
}
// Returns true if \p Use may read from \p DefLoc.
@@ -1270,10 +1229,6 @@ struct DSEState {
if (CB->onlyAccessesInaccessibleMemory())
return false;
- // NOTE: For calls, the number of stores removed could be slightly improved
- // by using AA.callCapturesBefore(UseInst, DefLoc, &DT), but that showed to
- // be expensive compared to the benefits in practice. For now, avoid more
- // expensive analysis to limit compile-time.
return isRefSet(BatchAA.getModRefInfo(UseInst, DefLoc));
}
@@ -1329,15 +1284,15 @@ struct DSEState {
return IsGuaranteedLoopInvariantBase(Ptr);
}
- // Find a MemoryDef writing to \p DefLoc and dominating \p StartAccess, with
- // no read access between them or on any other path to a function exit block
- // if \p DefLoc is not accessible after the function returns. If there is no
- // such MemoryDef, return None. The returned value may not (completely)
- // overwrite \p DefLoc. Currently we bail out when we encounter an aliasing
- // MemoryUse (read).
+ // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess,
+ // with no read access between them or on any other path to a function exit
+ // block if \p KillingLoc is not accessible after the function returns. If
+ // there is no such MemoryDef, return None. The returned value may not
+ // (completely) overwrite \p KillingLoc. Currently we bail out when we
+ // encounter an aliasing MemoryUse (read).
Optional<MemoryAccess *>
getDomMemoryDef(MemoryDef *KillingDef, MemoryAccess *StartAccess,
- const MemoryLocation &DefLoc, const Value *DefUO,
+ const MemoryLocation &KillingLoc, const Value *KillingUndObj,
unsigned &ScanLimit, unsigned &WalkerStepLimit,
bool IsMemTerm, unsigned &PartialLimit) {
if (ScanLimit == 0 || WalkerStepLimit == 0) {
@@ -1389,19 +1344,20 @@ struct DSEState {
MemoryDef *CurrentDef = cast<MemoryDef>(Current);
Instruction *CurrentI = CurrentDef->getMemoryInst();
- if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(DefUO)))
+ if (canSkipDef(CurrentDef, !isInvisibleToCallerBeforeRet(KillingUndObj),
+ TLI))
continue;
// Before we try to remove anything, check for any extra throwing
// instructions that block us from DSEing
- if (mayThrowBetween(KillingI, CurrentI, DefUO)) {
+ if (mayThrowBetween(KillingI, CurrentI, KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... skip, may throw!\n");
return None;
}
// Check for anything that looks like it will be a barrier to further
// removal
- if (isDSEBarrier(DefUO, CurrentI)) {
+ if (isDSEBarrier(KillingUndObj, CurrentI)) {
LLVM_DEBUG(dbgs() << " ... skip, barrier\n");
return None;
}
@@ -1410,14 +1366,14 @@ struct DSEState {
// clobber, bail out, as the path is not profitable. We skip this check
// for intrinsic calls, because the code knows how to handle memcpy
// intrinsics.
- if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(DefLoc, CurrentI))
+ if (!isa<IntrinsicInst>(CurrentI) && isReadClobber(KillingLoc, CurrentI))
return None;
// Quick check if there are direct uses that are read-clobbers.
- if (any_of(Current->uses(), [this, &DefLoc, StartAccess](Use &U) {
+ if (any_of(Current->uses(), [this, &KillingLoc, StartAccess](Use &U) {
if (auto *UseOrDef = dyn_cast<MemoryUseOrDef>(U.getUser()))
return !MSSA.dominates(StartAccess, UseOrDef) &&
- isReadClobber(DefLoc, UseOrDef->getMemoryInst());
+ isReadClobber(KillingLoc, UseOrDef->getMemoryInst());
return false;
})) {
LLVM_DEBUG(dbgs() << " ... found a read clobber\n");
@@ -1450,9 +1406,10 @@ struct DSEState {
if (!isMemTerminator(*CurrentLoc, CurrentI, KillingI))
continue;
} else {
- int64_t InstWriteOffset, DepWriteOffset;
- auto OR = isOverwrite(KillingI, CurrentI, DefLoc, *CurrentLoc,
- DepWriteOffset, InstWriteOffset);
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ auto OR = isOverwrite(KillingI, CurrentI, KillingLoc, *CurrentLoc,
+ KillingOffset, DeadOffset);
// If Current does not write to the same object as KillingDef, check
// the next candidate.
if (OR == OW_Unknown)
@@ -1473,30 +1430,25 @@ struct DSEState {
};
// Accesses to objects accessible after the function returns can only be
- // eliminated if the access is killed along all paths to the exit. Collect
+ // eliminated if the access is dead along all paths to the exit. Collect
// the blocks with killing (=completely overwriting MemoryDefs) and check if
- // they cover all paths from EarlierAccess to any function exit.
+ // they cover all paths from MaybeDeadAccess to any function exit.
SmallPtrSet<Instruction *, 16> KillingDefs;
KillingDefs.insert(KillingDef->getMemoryInst());
- MemoryAccess *EarlierAccess = Current;
- Instruction *EarlierMemInst =
- cast<MemoryDef>(EarlierAccess)->getMemoryInst();
- LLVM_DEBUG(dbgs() << " Checking for reads of " << *EarlierAccess << " ("
- << *EarlierMemInst << ")\n");
+ MemoryAccess *MaybeDeadAccess = Current;
+ MemoryLocation MaybeDeadLoc = *CurrentLoc;
+ Instruction *MaybeDeadI = cast<MemoryDef>(MaybeDeadAccess)->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " Checking for reads of " << *MaybeDeadAccess << " ("
+ << *MaybeDeadI << ")\n");
SmallSetVector<MemoryAccess *, 32> WorkList;
auto PushMemUses = [&WorkList](MemoryAccess *Acc) {
for (Use &U : Acc->uses())
WorkList.insert(cast<MemoryAccess>(U.getUser()));
};
- PushMemUses(EarlierAccess);
-
- // Optimistically collect all accesses for reads. If we do not find any
- // read clobbers, add them to the cache.
- SmallPtrSet<MemoryAccess *, 16> KnownNoReads;
- if (!EarlierMemInst->mayReadFromMemory())
- KnownNoReads.insert(EarlierAccess);
- // Check if EarlierDef may be read.
+ PushMemUses(MaybeDeadAccess);
+
+ // Check if DeadDef may be read.
for (unsigned I = 0; I < WorkList.size(); I++) {
MemoryAccess *UseAccess = WorkList[I];
@@ -1508,7 +1460,6 @@ struct DSEState {
}
--ScanLimit;
NumDomMemDefChecks++;
- KnownNoReads.insert(UseAccess);
if (isa<MemoryPhi>(UseAccess)) {
if (any_of(KillingDefs, [this, UseAccess](Instruction *KI) {
@@ -1535,7 +1486,7 @@ struct DSEState {
// A memory terminator kills all preceeding MemoryDefs and all succeeding
// MemoryAccesses. We do not have to check it's users.
- if (isMemTerminator(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isMemTerminator(MaybeDeadLoc, MaybeDeadI, UseInst)) {
LLVM_DEBUG(
dbgs()
<< " ... skipping, memterminator invalidates following accesses\n");
@@ -1548,14 +1499,14 @@ struct DSEState {
continue;
}
- if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(DefUO)) {
+ if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
LLVM_DEBUG(dbgs() << " ... found throwing instruction\n");
return None;
}
// Uses which may read the original MemoryDef mean we cannot eliminate the
// original MD. Stop walk.
- if (isReadClobber(*CurrentLoc, UseInst)) {
+ if (isReadClobber(MaybeDeadLoc, UseInst)) {
LLVM_DEBUG(dbgs() << " ... found read clobber\n");
return None;
}
@@ -1563,16 +1514,16 @@ struct DSEState {
// If this worklist walks back to the original memory access (and the
// pointer is not guarenteed loop invariant) then we cannot assume that a
// store kills itself.
- if (EarlierAccess == UseAccess &&
- !isGuaranteedLoopInvariant(CurrentLoc->Ptr)) {
+ if (MaybeDeadAccess == UseAccess &&
+ !isGuaranteedLoopInvariant(MaybeDeadLoc.Ptr)) {
LLVM_DEBUG(dbgs() << " ... found not loop invariant self access\n");
return None;
}
- // Otherwise, for the KillingDef and EarlierAccess we only have to check
+ // Otherwise, for the KillingDef and MaybeDeadAccess we only have to check
// if it reads the memory location.
// TODO: It would probably be better to check for self-reads before
// calling the function.
- if (KillingDef == UseAccess || EarlierAccess == UseAccess) {
+ if (KillingDef == UseAccess || MaybeDeadAccess == UseAccess) {
LLVM_DEBUG(dbgs() << " ... skipping killing def/dom access\n");
continue;
}
@@ -1581,18 +1532,18 @@ struct DSEState {
// the original location. Otherwise we have to check uses of *all*
// MemoryDefs we discover, including non-aliasing ones. Otherwise we might
// miss cases like the following
- // 1 = Def(LoE) ; <----- EarlierDef stores [0,1]
+ // 1 = Def(LoE) ; <----- DeadDef stores [0,1]
// 2 = Def(1) ; (2, 1) = NoAlias, stores [2,3]
// Use(2) ; MayAlias 2 *and* 1, loads [0, 3].
// (The Use points to the *first* Def it may alias)
// 3 = Def(1) ; <---- Current (3, 2) = NoAlias, (3,1) = MayAlias,
// stores [0,1]
if (MemoryDef *UseDef = dyn_cast<MemoryDef>(UseAccess)) {
- if (isCompleteOverwrite(*CurrentLoc, EarlierMemInst, UseInst)) {
+ if (isCompleteOverwrite(MaybeDeadLoc, MaybeDeadI, UseInst)) {
BasicBlock *MaybeKillingBlock = UseInst->getParent();
if (PostOrderNumbers.find(MaybeKillingBlock)->second <
- PostOrderNumbers.find(EarlierAccess->getBlock())->second) {
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ PostOrderNumbers.find(MaybeDeadAccess->getBlock())->second) {
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
LLVM_DEBUG(dbgs()
<< " ... found killing def " << *UseInst << "\n");
KillingDefs.insert(UseInst);
@@ -1608,9 +1559,9 @@ struct DSEState {
}
// For accesses to locations visible after the function returns, make sure
- // that the location is killed (=overwritten) along all paths from
- // EarlierAccess to the exit.
- if (!isInvisibleToCallerAfterRet(DefUO)) {
+ // that the location is dead (=overwritten) along all paths from
+ // MaybeDeadAccess to the exit.
+ if (!isInvisibleToCallerAfterRet(KillingUndObj)) {
SmallPtrSet<BasicBlock *, 16> KillingBlocks;
for (Instruction *KD : KillingDefs)
KillingBlocks.insert(KD->getParent());
@@ -1619,25 +1570,24 @@ struct DSEState {
// Find the common post-dominator of all killing blocks.
BasicBlock *CommonPred = *KillingBlocks.begin();
- for (auto I = std::next(KillingBlocks.begin()), E = KillingBlocks.end();
- I != E; I++) {
+ for (BasicBlock *BB : llvm::drop_begin(KillingBlocks)) {
if (!CommonPred)
break;
- CommonPred = PDT.findNearestCommonDominator(CommonPred, *I);
+ CommonPred = PDT.findNearestCommonDominator(CommonPred, BB);
}
// If CommonPred is in the set of killing blocks, just check if it
- // post-dominates EarlierAccess.
+ // post-dominates MaybeDeadAccess.
if (KillingBlocks.count(CommonPred)) {
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock()))
- return {EarlierAccess};
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock()))
+ return {MaybeDeadAccess};
return None;
}
- // If the common post-dominator does not post-dominate EarlierAccess,
- // there is a path from EarlierAccess to an exit not going through a
+ // If the common post-dominator does not post-dominate MaybeDeadAccess,
+ // there is a path from MaybeDeadAccess to an exit not going through a
// killing block.
- if (PDT.dominates(CommonPred, EarlierAccess->getBlock())) {
+ if (PDT.dominates(CommonPred, MaybeDeadAccess->getBlock())) {
SetVector<BasicBlock *> WorkList;
// If CommonPred is null, there are multiple exits from the function.
@@ -1650,16 +1600,16 @@ struct DSEState {
NumCFGTries++;
// Check if all paths starting from an exit node go through one of the
- // killing blocks before reaching EarlierAccess.
+ // killing blocks before reaching MaybeDeadAccess.
for (unsigned I = 0; I < WorkList.size(); I++) {
NumCFGChecks++;
BasicBlock *Current = WorkList[I];
if (KillingBlocks.count(Current))
continue;
- if (Current == EarlierAccess->getBlock())
+ if (Current == MaybeDeadAccess->getBlock())
return None;
- // EarlierAccess is reachable from the entry, so we don't have to
+ // MaybeDeadAccess is reachable from the entry, so we don't have to
// explore unreachable blocks further.
if (!DT.isReachableFromEntry(Current))
continue;
@@ -1671,14 +1621,14 @@ struct DSEState {
return None;
}
NumCFGSuccess++;
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
return None;
}
- // No aliasing MemoryUses of EarlierAccess found, EarlierAccess is
+ // No aliasing MemoryUses of MaybeDeadAccess found, MaybeDeadAccess is
// potentially dead.
- return {EarlierAccess};
+ return {MaybeDeadAccess};
}
// Delete dead memory defs
@@ -1701,6 +1651,7 @@ struct DSEState {
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
SkipStores.insert(MD);
}
+
Updater.removeMemoryAccess(MA);
}
@@ -1715,47 +1666,49 @@ struct DSEState {
NowDeadInsts.push_back(OpI);
}
+ EI.removeInstruction(DeadInst);
DeadInst->eraseFromParent();
}
}
- // Check for any extra throws between SI and NI that block DSE. This only
- // checks extra maythrows (those that aren't MemoryDef's). MemoryDef that may
- // throw are handled during the walk from one def to the next.
- bool mayThrowBetween(Instruction *SI, Instruction *NI,
- const Value *SILocUnd) {
- // First see if we can ignore it by using the fact that SI is an
+ // Check for any extra throws between \p KillingI and \p DeadI that block
+ // DSE. This only checks extra maythrows (those that aren't MemoryDef's).
+ // MemoryDef that may throw are handled during the walk from one def to the
+ // next.
+ bool mayThrowBetween(Instruction *KillingI, Instruction *DeadI,
+ const Value *KillingUndObj) {
+ // First see if we can ignore it by using the fact that KillingI is an
// alloca/alloca like object that is not visible to the caller during
// execution of the function.
- if (SILocUnd && isInvisibleToCallerBeforeRet(SILocUnd))
+ if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
return false;
- if (SI->getParent() == NI->getParent())
- return ThrowingBlocks.count(SI->getParent());
+ if (KillingI->getParent() == DeadI->getParent())
+ return ThrowingBlocks.count(KillingI->getParent());
return !ThrowingBlocks.empty();
}
- // Check if \p NI acts as a DSE barrier for \p SI. The following instructions
- // act as barriers:
- // * A memory instruction that may throw and \p SI accesses a non-stack
+ // Check if \p DeadI acts as a DSE barrier for \p KillingI. The following
+ // instructions act as barriers:
+ // * A memory instruction that may throw and \p KillingI accesses a non-stack
// object.
// * Atomic stores stronger that monotonic.
- bool isDSEBarrier(const Value *SILocUnd, Instruction *NI) {
- // If NI may throw it acts as a barrier, unless we are to an alloca/alloca
- // like object that does not escape.
- if (NI->mayThrow() && !isInvisibleToCallerBeforeRet(SILocUnd))
+ bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
+ // If DeadI may throw it acts as a barrier, unless we are to an
+ // alloca/alloca like object that does not escape.
+ if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
return true;
- // If NI is an atomic load/store stronger than monotonic, do not try to
+ // If DeadI is an atomic load/store stronger than monotonic, do not try to
// eliminate/reorder it.
- if (NI->isAtomic()) {
- if (auto *LI = dyn_cast<LoadInst>(NI))
+ if (DeadI->isAtomic()) {
+ if (auto *LI = dyn_cast<LoadInst>(DeadI))
return isStrongerThanMonotonic(LI->getOrdering());
- if (auto *SI = dyn_cast<StoreInst>(NI))
+ if (auto *SI = dyn_cast<StoreInst>(DeadI))
return isStrongerThanMonotonic(SI->getOrdering());
- if (auto *ARMW = dyn_cast<AtomicRMWInst>(NI))
+ if (auto *ARMW = dyn_cast<AtomicRMWInst>(DeadI))
return isStrongerThanMonotonic(ARMW->getOrdering());
- if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(NI))
+ if (auto *CmpXchg = dyn_cast<AtomicCmpXchgInst>(DeadI))
return isStrongerThanMonotonic(CmpXchg->getSuccessOrdering()) ||
isStrongerThanMonotonic(CmpXchg->getFailureOrdering());
llvm_unreachable("other instructions should be skipped in MemorySSA");
@@ -1776,7 +1729,6 @@ struct DSEState {
continue;
Instruction *DefI = Def->getMemoryInst();
- SmallVector<const Value *, 4> Pointers;
auto DefLoc = getLocForWriteEx(DefI);
if (!DefLoc)
continue;
@@ -1787,7 +1739,7 @@ struct DSEState {
// uncommon. If it turns out to be important, we can use
// getUnderlyingObjects here instead.
const Value *UO = getUnderlyingObject(DefLoc->Ptr);
- if (!UO || !isInvisibleToCallerAfterRet(UO))
+ if (!isInvisibleToCallerAfterRet(UO))
continue;
if (isWriteAtEndOfFunction(Def)) {
@@ -1804,8 +1756,7 @@ struct DSEState {
/// \returns true if \p Def is a no-op store, either because it
/// directly stores back a loaded value or stores zero to a calloced object.
- bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc,
- const Value *DefUO) {
+ bool storeIsNoop(MemoryDef *Def, const Value *DefUO) {
StoreInst *Store = dyn_cast<StoreInst>(Def->getMemoryInst());
MemSetInst *MemSet = dyn_cast<MemSetInst>(Def->getMemoryInst());
Constant *StoredConstant = nullptr;
@@ -1816,13 +1767,78 @@ struct DSEState {
if (StoredConstant && StoredConstant->isNullValue()) {
auto *DefUOInst = dyn_cast<Instruction>(DefUO);
- if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) {
- auto *UnderlyingDef = cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
- // If UnderlyingDef is the clobbering access of Def, no instructions
- // between them can modify the memory location.
- auto *ClobberDef =
- MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
- return UnderlyingDef == ClobberDef;
+ if (DefUOInst) {
+ if (isCallocLikeFn(DefUOInst, &TLI)) {
+ auto *UnderlyingDef =
+ cast<MemoryDef>(MSSA.getMemoryAccess(DefUOInst));
+ // If UnderlyingDef is the clobbering access of Def, no instructions
+ // between them can modify the memory location.
+ auto *ClobberDef =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def);
+ return UnderlyingDef == ClobberDef;
+ }
+
+ if (MemSet) {
+ if (F.hasFnAttribute(Attribute::SanitizeMemory) ||
+ F.hasFnAttribute(Attribute::SanitizeAddress) ||
+ F.hasFnAttribute(Attribute::SanitizeHWAddress) ||
+ F.getName() == "calloc")
+ return false;
+ auto *Malloc = const_cast<CallInst *>(dyn_cast<CallInst>(DefUOInst));
+ if (!Malloc)
+ return false;
+ auto *InnerCallee = Malloc->getCalledFunction();
+ if (!InnerCallee)
+ return false;
+ LibFunc Func;
+ if (!TLI.getLibFunc(*InnerCallee, Func) || !TLI.has(Func) ||
+ Func != LibFunc_malloc)
+ return false;
+
+ auto shouldCreateCalloc = [](CallInst *Malloc, CallInst *Memset) {
+ // Check for br(icmp ptr, null), truebb, falsebb) pattern at the end
+ // of malloc block
+ auto *MallocBB = Malloc->getParent(),
+ *MemsetBB = Memset->getParent();
+ if (MallocBB == MemsetBB)
+ return true;
+ auto *Ptr = Memset->getArgOperand(0);
+ auto *TI = MallocBB->getTerminator();
+ ICmpInst::Predicate Pred;
+ BasicBlock *TrueBB, *FalseBB;
+ if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Ptr), m_Zero()), TrueBB,
+ FalseBB)))
+ return false;
+ if (Pred != ICmpInst::ICMP_EQ || MemsetBB != FalseBB)
+ return false;
+ return true;
+ };
+
+ if (Malloc->getOperand(0) == MemSet->getLength()) {
+ if (shouldCreateCalloc(Malloc, MemSet) &&
+ DT.dominates(Malloc, MemSet) &&
+ memoryIsNotModifiedBetween(Malloc, MemSet, BatchAA, DL, &DT)) {
+ IRBuilder<> IRB(Malloc);
+ const auto &DL = Malloc->getModule()->getDataLayout();
+ if (auto *Calloc =
+ emitCalloc(ConstantInt::get(IRB.getIntPtrTy(DL), 1),
+ Malloc->getArgOperand(0), IRB, TLI)) {
+ MemorySSAUpdater Updater(&MSSA);
+ auto *LastDef = cast<MemoryDef>(
+ Updater.getMemorySSA()->getMemoryAccess(Malloc));
+ auto *NewAccess = Updater.createMemoryAccessAfter(
+ cast<Instruction>(Calloc), LastDef, LastDef);
+ auto *NewAccessMD = cast<MemoryDef>(NewAccess);
+ Updater.insertDef(NewAccessMD, /*RenameUses=*/true);
+ Updater.removeMemoryAccess(Malloc);
+ Malloc->replaceAllUsesWith(Calloc);
+ Malloc->eraseFromParent();
+ return true;
+ }
+ return false;
+ }
+ }
+ }
}
}
@@ -1875,6 +1891,76 @@ struct DSEState {
return false;
}
+
+ bool removePartiallyOverlappedStores(InstOverlapIntervalsTy &IOL) {
+ bool Changed = false;
+ for (auto OI : IOL) {
+ Instruction *DeadI = OI.first;
+ MemoryLocation Loc = *getLocForWriteEx(DeadI);
+ assert(isRemovable(DeadI) && "Expect only removable instruction");
+
+ const Value *Ptr = Loc.Ptr->stripPointerCasts();
+ int64_t DeadStart = 0;
+ uint64_t DeadSize = Loc.Size.getValue();
+ GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
+ OverlapIntervalsTy &IntervalMap = OI.second;
+ Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
+ if (IntervalMap.empty())
+ continue;
+ Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
+ }
+ return Changed;
+ }
+
+ /// Eliminates writes to locations where the value that is being written
+ /// is already stored at the same location.
+ bool eliminateRedundantStoresOfExistingValues() {
+ bool MadeChange = false;
+ LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the "
+ "already existing value\n");
+ for (auto *Def : MemDefs) {
+ if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) ||
+ !isRemovable(Def->getMemoryInst()))
+ continue;
+ auto *UpperDef = dyn_cast<MemoryDef>(Def->getDefiningAccess());
+ if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef))
+ continue;
+
+ Instruction *DefInst = Def->getMemoryInst();
+ Instruction *UpperInst = UpperDef->getMemoryInst();
+ auto IsRedundantStore = [this, DefInst,
+ UpperInst](MemoryLocation UpperLoc) {
+ if (DefInst->isIdenticalTo(UpperInst))
+ return true;
+ if (auto *MemSetI = dyn_cast<MemSetInst>(UpperInst)) {
+ if (auto *SI = dyn_cast<StoreInst>(DefInst)) {
+ auto MaybeDefLoc = getLocForWriteEx(DefInst);
+ if (!MaybeDefLoc)
+ return false;
+ int64_t InstWriteOffset = 0;
+ int64_t DepWriteOffset = 0;
+ auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc,
+ InstWriteOffset, DepWriteOffset);
+ Value *StoredByte = isBytewiseValue(SI->getValueOperand(), DL);
+ return StoredByte && StoredByte == MemSetI->getOperand(1) &&
+ OR == OW_Complete;
+ }
+ }
+ return false;
+ };
+
+ auto MaybeUpperLoc = getLocForWriteEx(UpperInst);
+ if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) ||
+ isReadClobber(*MaybeUpperLoc, DefInst))
+ continue;
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst
+ << '\n');
+ deleteDeadInstruction(DefInst);
+ NumRedundantStores++;
+ MadeChange = true;
+ }
+ return MadeChange;
+ }
};
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
@@ -1883,68 +1969,64 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
const LoopInfo &LI) {
bool MadeChange = false;
- DSEState State = DSEState::get(F, AA, MSSA, DT, PDT, TLI, LI);
+ DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
MemoryDef *KillingDef = State.MemDefs[I];
if (State.SkipStores.count(KillingDef))
continue;
- Instruction *SI = KillingDef->getMemoryInst();
+ Instruction *KillingI = KillingDef->getMemoryInst();
- Optional<MemoryLocation> MaybeSILoc;
- if (State.isMemTerminatorInst(SI))
- MaybeSILoc = State.getLocForTerminator(SI).map(
+ Optional<MemoryLocation> MaybeKillingLoc;
+ if (State.isMemTerminatorInst(KillingI))
+ MaybeKillingLoc = State.getLocForTerminator(KillingI).map(
[](const std::pair<MemoryLocation, bool> &P) { return P.first; });
else
- MaybeSILoc = State.getLocForWriteEx(SI);
+ MaybeKillingLoc = State.getLocForWriteEx(KillingI);
- if (!MaybeSILoc) {
+ if (!MaybeKillingLoc) {
LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for "
- << *SI << "\n");
+ << *KillingI << "\n");
continue;
}
- MemoryLocation SILoc = *MaybeSILoc;
- assert(SILoc.Ptr && "SILoc should not be null");
- const Value *SILocUnd = getUnderlyingObject(SILoc.Ptr);
-
- MemoryAccess *Current = KillingDef;
+ MemoryLocation KillingLoc = *MaybeKillingLoc;
+ assert(KillingLoc.Ptr && "KillingLoc should not be null");
+ const Value *KillingUndObj = getUnderlyingObject(KillingLoc.Ptr);
LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs killed by "
- << *Current << " (" << *SI << ")\n");
+ << *KillingDef << " (" << *KillingI << ")\n");
unsigned ScanLimit = MemorySSAScanLimit;
unsigned WalkerStepLimit = MemorySSAUpwardsStepLimit;
unsigned PartialLimit = MemorySSAPartialStoreLimit;
// Worklist of MemoryAccesses that may be killed by KillingDef.
SetVector<MemoryAccess *> ToCheck;
-
- if (SILocUnd)
- ToCheck.insert(KillingDef->getDefiningAccess());
+ ToCheck.insert(KillingDef->getDefiningAccess());
bool Shortend = false;
- bool IsMemTerm = State.isMemTerminatorInst(SI);
+ bool IsMemTerm = State.isMemTerminatorInst(KillingI);
// Check if MemoryAccesses in the worklist are killed by KillingDef.
for (unsigned I = 0; I < ToCheck.size(); I++) {
- Current = ToCheck[I];
+ MemoryAccess *Current = ToCheck[I];
if (State.SkipStores.count(Current))
continue;
- Optional<MemoryAccess *> Next = State.getDomMemoryDef(
- KillingDef, Current, SILoc, SILocUnd, ScanLimit, WalkerStepLimit,
- IsMemTerm, PartialLimit);
+ Optional<MemoryAccess *> MaybeDeadAccess = State.getDomMemoryDef(
+ KillingDef, Current, KillingLoc, KillingUndObj, ScanLimit,
+ WalkerStepLimit, IsMemTerm, PartialLimit);
- if (!Next) {
+ if (!MaybeDeadAccess) {
LLVM_DEBUG(dbgs() << " finished walk\n");
continue;
}
- MemoryAccess *EarlierAccess = *Next;
- LLVM_DEBUG(dbgs() << " Checking if we can kill " << *EarlierAccess);
- if (isa<MemoryPhi>(EarlierAccess)) {
+ MemoryAccess *DeadAccess = *MaybeDeadAccess;
+ LLVM_DEBUG(dbgs() << " Checking if we can kill " << *DeadAccess);
+ if (isa<MemoryPhi>(DeadAccess)) {
LLVM_DEBUG(dbgs() << "\n ... adding incoming values to worklist\n");
- for (Value *V : cast<MemoryPhi>(EarlierAccess)->incoming_values()) {
+ for (Value *V : cast<MemoryPhi>(DeadAccess)->incoming_values()) {
MemoryAccess *IncomingAccess = cast<MemoryAccess>(V);
BasicBlock *IncomingBlock = IncomingAccess->getBlock();
- BasicBlock *PhiBlock = EarlierAccess->getBlock();
+ BasicBlock *PhiBlock = DeadAccess->getBlock();
// We only consider incoming MemoryAccesses that come before the
// MemoryPhi. Otherwise we could discover candidates that do not
@@ -1955,72 +2037,73 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
continue;
}
- auto *NextDef = cast<MemoryDef>(EarlierAccess);
- Instruction *NI = NextDef->getMemoryInst();
- LLVM_DEBUG(dbgs() << " (" << *NI << ")\n");
- ToCheck.insert(NextDef->getDefiningAccess());
+ auto *DeadDefAccess = cast<MemoryDef>(DeadAccess);
+ Instruction *DeadI = DeadDefAccess->getMemoryInst();
+ LLVM_DEBUG(dbgs() << " (" << *DeadI << ")\n");
+ ToCheck.insert(DeadDefAccess->getDefiningAccess());
NumGetDomMemoryDefPassed++;
if (!DebugCounter::shouldExecute(MemorySSACounter))
continue;
- MemoryLocation NILoc = *State.getLocForWriteEx(NI);
+ MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI);
if (IsMemTerm) {
- const Value *NIUnd = getUnderlyingObject(NILoc.Ptr);
- if (SILocUnd != NIUnd)
+ const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr);
+ if (KillingUndObj != DeadUndObj)
continue;
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
} else {
- // Check if NI overwrites SI.
- int64_t InstWriteOffset, DepWriteOffset;
- OverwriteResult OR = State.isOverwrite(SI, NI, SILoc, NILoc,
- DepWriteOffset, InstWriteOffset);
+ // Check if DeadI overwrites KillingI.
+ int64_t KillingOffset = 0;
+ int64_t DeadOffset = 0;
+ OverwriteResult OR = State.isOverwrite(
+ KillingI, DeadI, KillingLoc, DeadLoc, KillingOffset, DeadOffset);
if (OR == OW_MaybePartial) {
auto Iter = State.IOLs.insert(
std::make_pair<BasicBlock *, InstOverlapIntervalsTy>(
- NI->getParent(), InstOverlapIntervalsTy()));
+ DeadI->getParent(), InstOverlapIntervalsTy()));
auto &IOL = Iter.first->second;
- OR = isPartialOverwrite(SILoc, NILoc, DepWriteOffset, InstWriteOffset,
- NI, IOL);
+ OR = isPartialOverwrite(KillingLoc, DeadLoc, KillingOffset,
+ DeadOffset, DeadI, IOL);
}
if (EnablePartialStoreMerging && OR == OW_PartialEarlierWithFullLater) {
- auto *Earlier = dyn_cast<StoreInst>(NI);
- auto *Later = dyn_cast<StoreInst>(SI);
+ auto *DeadSI = dyn_cast<StoreInst>(DeadI);
+ auto *KillingSI = dyn_cast<StoreInst>(KillingI);
// We are re-using tryToMergePartialOverlappingStores, which requires
- // Earlier to domiante Later.
+ // DeadSI to dominate DeadSI.
// TODO: implement tryToMergeParialOverlappingStores using MemorySSA.
- if (Earlier && Later && DT.dominates(Earlier, Later)) {
+ if (DeadSI && KillingSI && DT.dominates(DeadSI, KillingSI)) {
if (Constant *Merged = tryToMergePartialOverlappingStores(
- Earlier, Later, InstWriteOffset, DepWriteOffset, State.DL,
+ KillingSI, DeadSI, KillingOffset, DeadOffset, State.DL,
State.BatchAA, &DT)) {
// Update stored value of earlier store to merged constant.
- Earlier->setOperand(0, Merged);
+ DeadSI->setOperand(0, Merged);
++NumModifiedStores;
MadeChange = true;
Shortend = true;
- // Remove later store and remove any outstanding overlap intervals
- // for the updated store.
- State.deleteDeadInstruction(Later);
- auto I = State.IOLs.find(Earlier->getParent());
+ // Remove killing store and remove any outstanding overlap
+ // intervals for the updated store.
+ State.deleteDeadInstruction(KillingSI);
+ auto I = State.IOLs.find(DeadSI->getParent());
if (I != State.IOLs.end())
- I->second.erase(Earlier);
+ I->second.erase(DeadSI);
break;
}
}
}
if (OR == OW_Complete) {
- LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *NI
- << "\n KILLER: " << *SI << '\n');
- State.deleteDeadInstruction(NI);
+ LLVM_DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DeadI
+ << "\n KILLER: " << *KillingI << '\n');
+ State.deleteDeadInstruction(DeadI);
++NumFastStores;
MadeChange = true;
}
@@ -2028,10 +2111,11 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
}
// Check if the store is a no-op.
- if (!Shortend && isRemovable(SI) &&
- State.storeIsNoop(KillingDef, SILoc, SILocUnd)) {
- LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *SI << '\n');
- State.deleteDeadInstruction(SI);
+ if (!Shortend && isRemovable(KillingI) &&
+ State.storeIsNoop(KillingDef, KillingUndObj)) {
+ LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI
+ << '\n');
+ State.deleteDeadInstruction(KillingI);
NumRedundantStores++;
MadeChange = true;
continue;
@@ -2040,8 +2124,9 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
if (EnablePartialOverwriteTracking)
for (auto &KV : State.IOLs)
- MadeChange |= removePartiallyOverlappedStores(State.DL, KV.second, TLI);
+ MadeChange |= State.removePartiallyOverlappedStores(KV.second);
+ MadeChange |= State.eliminateRedundantStoresOfExistingValues();
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
return MadeChange;
}
diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index c77769368ede..66c9d9f0902a 100644
--- a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -272,9 +272,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (PredBB && IsSafeToHoist(RemInst, RemBB) &&
IsSafeToHoist(DivInst, DivBB) &&
- llvm::all_of(successors(PredBB), [&](BasicBlock *BB) {
- return BB == DivBB || BB == RemBB;
- })) {
+ all_of(successors(PredBB),
+ [&](BasicBlock *BB) { return BB == DivBB || BB == RemBB; }) &&
+ all_of(predecessors(DivBB),
+ [&](BasicBlock *BB) { return BB == RemBB || BB == PredBB; })) {
DivDominates = true;
DivInst->moveBefore(PredBB->getTerminator());
Changed = true;
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 978c6a77b8dc..90f71f7729a7 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -293,7 +293,7 @@ static unsigned getHashValueImpl(SimpleValue Val) {
// TODO: Extend this to handle intrinsics with >2 operands where the 1st
// 2 operands are commutative.
auto *II = dyn_cast<IntrinsicInst>(Inst);
- if (II && II->isCommutative() && II->getNumArgOperands() == 2) {
+ if (II && II->isCommutative() && II->arg_size() == 2) {
Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
if (LHS > RHS)
std::swap(LHS, RHS);
@@ -363,7 +363,7 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
auto *LII = dyn_cast<IntrinsicInst>(LHSI);
auto *RII = dyn_cast<IntrinsicInst>(RHSI);
if (LII && RII && LII->getIntrinsicID() == RII->getIntrinsicID() &&
- LII->isCommutative() && LII->getNumArgOperands() == 2) {
+ LII->isCommutative() && LII->arg_size() == 2) {
return LII->getArgOperand(0) == RII->getArgOperand(1) &&
LII->getArgOperand(1) == RII->getArgOperand(0);
}
@@ -1265,6 +1265,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
continue;
}
+ // Skip pseudoprobe intrinsics, for the same reason as assume intrinsics.
+ if (match(&Inst, m_Intrinsic<Intrinsic::pseudoprobe>())) {
+ LLVM_DEBUG(dbgs() << "EarlyCSE skipping pseudoprobe: " << Inst << '\n');
+ continue;
+ }
+
// We can skip all invariant.start intrinsics since they only read memory,
// and we can forward values across it. For invariant starts without
// invariant ends, we can use the fact that the invariantness never ends to
@@ -1642,6 +1648,16 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
return PA;
}
+void EarlyCSEPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (UseMemorySSA)
+ OS << "memssa";
+ OS << ">";
+}
+
namespace {
/// A simple and fast domtree-based CSE pass.
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 8a5d4f568774..a98bb8358aef 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -256,7 +256,7 @@ void Float2IntPass::walkForwards() {
Op = [](ArrayRef<ConstantRange> Ops) {
assert(Ops.size() == 1 && "FNeg is a unary operator!");
unsigned Size = Ops[0].getBitWidth();
- auto Zero = ConstantRange(APInt::getNullValue(Size));
+ auto Zero = ConstantRange(APInt::getZero(Size));
return Zero.sub(Ops[0]);
};
break;
@@ -372,7 +372,7 @@ bool Float2IntPass::validateAndTransform() {
// If it does, transformation would be illegal.
//
// Don't count the roots, as they terminate the graphs.
- if (Roots.count(I) == 0) {
+ if (!Roots.contains(I)) {
// Set the type of the conversion while we're here.
if (!ConvertedToTy)
ConvertedToTy = I->getType();
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 16368aec7c3f..00506fb86006 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -126,7 +126,7 @@ static cl::opt<uint32_t> MaxBBSpeculations(
"into) when deducing if a value is fully available or not in GVN "
"(default = 600)"));
-struct llvm::GVN::Expression {
+struct llvm::GVNPass::Expression {
uint32_t opcode;
bool commutative = false;
Type *type = nullptr;
@@ -155,17 +155,18 @@ struct llvm::GVN::Expression {
namespace llvm {
-template <> struct DenseMapInfo<GVN::Expression> {
- static inline GVN::Expression getEmptyKey() { return ~0U; }
- static inline GVN::Expression getTombstoneKey() { return ~1U; }
+template <> struct DenseMapInfo<GVNPass::Expression> {
+ static inline GVNPass::Expression getEmptyKey() { return ~0U; }
+ static inline GVNPass::Expression getTombstoneKey() { return ~1U; }
- static unsigned getHashValue(const GVN::Expression &e) {
+ static unsigned getHashValue(const GVNPass::Expression &e) {
using llvm::hash_value;
return static_cast<unsigned>(hash_value(e));
}
- static bool isEqual(const GVN::Expression &LHS, const GVN::Expression &RHS) {
+ static bool isEqual(const GVNPass::Expression &LHS,
+ const GVNPass::Expression &RHS) {
return LHS == RHS;
}
};
@@ -246,7 +247,7 @@ struct llvm::gvn::AvailableValue {
/// Emit code at the specified insertion point to adjust the value defined
/// here to the specified type. This handles various coercion cases.
Value *MaterializeAdjustedValue(LoadInst *Load, Instruction *InsertPt,
- GVN &gvn) const;
+ GVNPass &gvn) const;
};
/// Represents an AvailableValue which can be rematerialized at the end of
@@ -276,7 +277,7 @@ struct llvm::gvn::AvailableValueInBlock {
/// Emit code at the end of this block to adjust the value defined here to
/// the specified type. This handles various coercion cases.
- Value *MaterializeAdjustedValue(LoadInst *Load, GVN &gvn) const {
+ Value *MaterializeAdjustedValue(LoadInst *Load, GVNPass &gvn) const {
return AV.MaterializeAdjustedValue(Load, BB->getTerminator(), gvn);
}
};
@@ -285,7 +286,7 @@ struct llvm::gvn::AvailableValueInBlock {
// ValueTable Internal Functions
//===----------------------------------------------------------------------===//
-GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
+GVNPass::Expression GVNPass::ValueTable::createExpr(Instruction *I) {
Expression e;
e.type = I->getType();
e.opcode = I->getOpcode();
@@ -330,9 +331,8 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) {
return e;
}
-GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
+GVNPass::Expression GVNPass::ValueTable::createCmpExpr(
+ unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) {
assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
"Not a comparison!");
Expression e;
@@ -350,7 +350,8 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode,
return e;
}
-GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
+GVNPass::Expression
+GVNPass::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
@@ -382,20 +383,21 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
// ValueTable External Functions
//===----------------------------------------------------------------------===//
-GVN::ValueTable::ValueTable() = default;
-GVN::ValueTable::ValueTable(const ValueTable &) = default;
-GVN::ValueTable::ValueTable(ValueTable &&) = default;
-GVN::ValueTable::~ValueTable() = default;
-GVN::ValueTable &GVN::ValueTable::operator=(const GVN::ValueTable &Arg) = default;
+GVNPass::ValueTable::ValueTable() = default;
+GVNPass::ValueTable::ValueTable(const ValueTable &) = default;
+GVNPass::ValueTable::ValueTable(ValueTable &&) = default;
+GVNPass::ValueTable::~ValueTable() = default;
+GVNPass::ValueTable &
+GVNPass::ValueTable::operator=(const GVNPass::ValueTable &Arg) = default;
/// add - Insert a value into the table with a specified value number.
-void GVN::ValueTable::add(Value *V, uint32_t num) {
+void GVNPass::ValueTable::add(Value *V, uint32_t num) {
valueNumbering.insert(std::make_pair(V, num));
if (PHINode *PN = dyn_cast<PHINode>(V))
NumberingPhi[num] = PN;
}
-uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
+uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
uint32_t e = assignExpNewValueNum(exp).first;
@@ -421,13 +423,12 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
// a normal load or store instruction.
CallInst *local_cdep = dyn_cast<CallInst>(local_dep.getInst());
- if (!local_cdep ||
- local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (!local_cdep || local_cdep->arg_size() != C->arg_size()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
uint32_t cd_vn = lookupOrAdd(local_cdep->getArgOperand(i));
if (c_vn != cd_vn) {
@@ -477,11 +478,11 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
return nextValueNumber++;
}
- if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+ if (cdep->arg_size() != C->arg_size()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ for (unsigned i = 0, e = C->arg_size(); i < e; ++i) {
uint32_t c_vn = lookupOrAdd(C->getArgOperand(i));
uint32_t cd_vn = lookupOrAdd(cdep->getArgOperand(i));
if (c_vn != cd_vn) {
@@ -500,11 +501,13 @@ uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) {
}
/// Returns true if a value number exists for the specified value.
-bool GVN::ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+bool GVNPass::ValueTable::exists(Value *V) const {
+ return valueNumbering.count(V) != 0;
+}
/// lookup_or_add - Returns the value number for the specified value, assigning
/// it a new number if it did not have one before.
-uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
+uint32_t GVNPass::ValueTable::lookupOrAdd(Value *V) {
DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
if (VI != valueNumbering.end())
return VI->second;
@@ -581,7 +584,7 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) {
/// Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
-uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
+uint32_t GVNPass::ValueTable::lookup(Value *V, bool Verify) const {
DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
if (Verify) {
assert(VI != valueNumbering.end() && "Value not numbered?");
@@ -594,15 +597,15 @@ uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const {
/// assigning it a new number if it did not have one before. Useful when
/// we deduced the result of a comparison, but don't immediately have an
/// instruction realizing that comparison to hand.
-uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode,
- CmpInst::Predicate Predicate,
- Value *LHS, Value *RHS) {
+uint32_t GVNPass::ValueTable::lookupOrAddCmp(unsigned Opcode,
+ CmpInst::Predicate Predicate,
+ Value *LHS, Value *RHS) {
Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS);
return assignExpNewValueNum(exp).first;
}
/// Remove all entries from the ValueTable.
-void GVN::ValueTable::clear() {
+void GVNPass::ValueTable::clear() {
valueNumbering.clear();
expressionNumbering.clear();
NumberingPhi.clear();
@@ -614,7 +617,7 @@ void GVN::ValueTable::clear() {
}
/// Remove a value from the value numbering.
-void GVN::ValueTable::erase(Value *V) {
+void GVNPass::ValueTable::erase(Value *V) {
uint32_t Num = valueNumbering.lookup(V);
valueNumbering.erase(V);
// If V is PHINode, V <--> value number is an one-to-one mapping.
@@ -624,7 +627,7 @@ void GVN::ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
-void GVN::ValueTable::verifyRemoved(const Value *V) const {
+void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
@@ -635,28 +638,28 @@ void GVN::ValueTable::verifyRemoved(const Value *V) const {
// GVN Pass
//===----------------------------------------------------------------------===//
-bool GVN::isPREEnabled() const {
+bool GVNPass::isPREEnabled() const {
return Options.AllowPRE.getValueOr(GVNEnablePRE);
}
-bool GVN::isLoadPREEnabled() const {
+bool GVNPass::isLoadPREEnabled() const {
return Options.AllowLoadPRE.getValueOr(GVNEnableLoadPRE);
}
-bool GVN::isLoadInLoopPREEnabled() const {
+bool GVNPass::isLoadInLoopPREEnabled() const {
return Options.AllowLoadInLoopPRE.getValueOr(GVNEnableLoadInLoopPRE);
}
-bool GVN::isLoadPRESplitBackedgeEnabled() const {
+bool GVNPass::isLoadPRESplitBackedgeEnabled() const {
return Options.AllowLoadPRESplitBackedge.getValueOr(
GVNEnableSplitBackedgeInLoadPRE);
}
-bool GVN::isMemDepEnabled() const {
+bool GVNPass::isMemDepEnabled() const {
return Options.AllowMemDep.getValueOr(GVNEnableMemDep);
}
-PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) {
// FIXME: The order of evaluation of these 'getResult' calls is very
// significant! Re-ordering these variables will cause GVN when run alone to
// be less effective! We should fix memdep and basic-aa to not exhibit this
@@ -684,8 +687,26 @@ PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
return PA;
}
+void GVNPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ if (Options.AllowPRE != None)
+ OS << (Options.AllowPRE.getValue() ? "" : "no-") << "pre;";
+ if (Options.AllowLoadPRE != None)
+ OS << (Options.AllowLoadPRE.getValue() ? "" : "no-") << "load-pre;";
+ if (Options.AllowLoadPRESplitBackedge != None)
+ OS << (Options.AllowLoadPRESplitBackedge.getValue() ? "" : "no-")
+ << "split-backedge-load-pre;";
+ if (Options.AllowMemDep != None)
+ OS << (Options.AllowMemDep.getValue() ? "" : "no-") << "memdep";
+ OS << ">";
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
+LLVM_DUMP_METHOD void GVNPass::dump(DenseMap<uint32_t, Value *> &d) const {
errs() << "{\n";
for (auto &I : d) {
errs() << I.first << "\n";
@@ -835,7 +856,7 @@ static bool IsValueFullyAvailableInBlock(
static Value *
ConstructSSAForLoadSet(LoadInst *Load,
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
- GVN &gvn) {
+ GVNPass &gvn) {
// Check for the fully redundant, dominating load case. In this case, we can
// just use the dominating value directly.
if (ValuesPerBlock.size() == 1 &&
@@ -878,7 +899,7 @@ ConstructSSAForLoadSet(LoadInst *Load,
Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
Instruction *InsertPt,
- GVN &gvn) const {
+ GVNPass &gvn) const {
Value *Res;
Type *LoadTy = Load->getType();
const DataLayout &DL = Load->getModule()->getDataLayout();
@@ -1002,8 +1023,8 @@ static void reportMayClobberedLoad(LoadInst *Load, MemDepResult DepInfo,
ORE->emit(R);
}
-bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
- Value *Address, AvailableValue &Res) {
+bool GVNPass::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
+ Value *Address, AvailableValue &Res) {
assert((DepInfo.isDef() || DepInfo.isClobber()) &&
"expected a local dependence");
assert(Load->isUnordered() && "rules below are incorrect for ordered access");
@@ -1137,9 +1158,9 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *Load, MemDepResult DepInfo,
return false;
}
-void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
- AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
// dependencies that produce an unknown value for the load (such as a call
@@ -1182,7 +1203,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
"post condition violation");
}
-void GVN::eliminatePartiallyRedundantLoad(
+void GVNPass::eliminatePartiallyRedundantLoad(
LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
MapVector<BasicBlock *, Value *> &AvailableLoads) {
for (const auto &AvailableLoad : AvailableLoads) {
@@ -1212,8 +1233,7 @@ void GVN::eliminatePartiallyRedundantLoad(
}
// Transfer the old load's AA tags to the new load.
- AAMDNodes Tags;
- Load->getAAMetadata(Tags);
+ AAMDNodes Tags = Load->getAAMetadata();
if (Tags)
NewLoad->setAAMetadata(Tags);
@@ -1257,8 +1277,8 @@ void GVN::eliminatePartiallyRedundantLoad(
});
}
-bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
// Okay, we have *some* definitions of the value. This means that the value
// is available in some of our (transitive) predecessors. Lets think about
// doing PRE of this load. This will involve inserting a new load into the
@@ -1498,8 +1518,9 @@ bool GVN::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
return true;
}
-bool GVN::performLoopLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- UnavailBlkVect &UnavailableBlocks) {
+bool GVNPass::performLoopLoadPRE(LoadInst *Load,
+ AvailValInBlkVect &ValuesPerBlock,
+ UnavailBlkVect &UnavailableBlocks) {
if (!LI)
return false;
@@ -1590,7 +1611,7 @@ static void reportLoadElim(LoadInst *Load, Value *AvailableValue,
/// Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
-bool GVN::processNonLocalLoad(LoadInst *Load) {
+bool GVNPass::processNonLocalLoad(LoadInst *Load) {
// non-local speculations are not allowed under asan.
if (Load->getParent()->getParent()->hasFnAttribute(
Attribute::SanitizeAddress) ||
@@ -1622,10 +1643,8 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
// If this load follows a GEP, see if we can PRE the indices before analyzing.
if (GetElementPtrInst *GEP =
dyn_cast<GetElementPtrInst>(Load->getOperand(0))) {
- for (GetElementPtrInst::op_iterator OI = GEP->idx_begin(),
- OE = GEP->idx_end();
- OI != OE; ++OI)
- if (Instruction *I = dyn_cast<Instruction>(OI->get()))
+ for (Use &U : GEP->indices())
+ if (Instruction *I = dyn_cast<Instruction>(U.get()))
Changed |= performScalarPRE(I);
}
@@ -1673,8 +1692,11 @@ bool GVN::processNonLocalLoad(LoadInst *Load) {
if (!isLoadInLoopPREEnabled() && LI && LI->getLoopFor(Load->getParent()))
return Changed;
- return Changed || PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
- performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks);
+ if (performLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) ||
+ PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks))
+ return true;
+
+ return Changed;
}
static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
@@ -1738,7 +1760,7 @@ static bool hasUsersIn(Value *V, BasicBlock *BB) {
return false;
}
-bool GVN::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
+bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
Value *V = IntrinsicI->getArgOperand(0);
if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
@@ -1882,7 +1904,7 @@ static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
/// Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
-bool GVN::processLoad(LoadInst *L) {
+bool GVNPass::processLoad(LoadInst *L) {
if (!MD)
return false;
@@ -1936,7 +1958,7 @@ bool GVN::processLoad(LoadInst *L) {
/// Return a pair the first field showing the value number of \p Exp and the
/// second field showing whether it is a value number newly created.
std::pair<uint32_t, bool>
-GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
+GVNPass::ValueTable::assignExpNewValueNum(Expression &Exp) {
uint32_t &e = expressionNumbering[Exp];
bool CreateNewValNum = !e;
if (CreateNewValNum) {
@@ -1951,8 +1973,8 @@ GVN::ValueTable::assignExpNewValueNum(Expression &Exp) {
/// Return whether all the values related with the same \p num are
/// defined in \p BB.
-bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
- GVN &Gvn) {
+bool GVNPass::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
+ GVNPass &Gvn) {
LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
while (Vals && Vals->BB == BB)
Vals = Vals->Next;
@@ -1960,9 +1982,9 @@ bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB,
}
/// Wrap phiTranslateImpl to provide caching functionality.
-uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
- const BasicBlock *PhiBlock, uint32_t Num,
- GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslate(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVNPass &Gvn) {
auto FindRes = PhiTranslateTable.find({Num, Pred});
if (FindRes != PhiTranslateTable.end())
return FindRes->second;
@@ -1973,9 +1995,10 @@ uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred,
// Return true if the value number \p Num and NewNum have equal value.
// Return false if the result is unknown.
-bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
- const BasicBlock *Pred,
- const BasicBlock *PhiBlock, GVN &Gvn) {
+bool GVNPass::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
+ const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ GVNPass &Gvn) {
CallInst *Call = nullptr;
LeaderTableEntry *Vals = &Gvn.LeaderTable[Num];
while (Vals) {
@@ -2008,9 +2031,9 @@ bool GVN::ValueTable::areCallValsEqual(uint32_t Num, uint32_t NewNum,
/// Translate value number \p Num using phis, so that it has the values of
/// the phis in BB.
-uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
- const BasicBlock *PhiBlock,
- uint32_t Num, GVN &Gvn) {
+uint32_t GVNPass::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
+ const BasicBlock *PhiBlock,
+ uint32_t Num, GVNPass &Gvn) {
if (PHINode *PN = NumberingPhi[Num]) {
for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred)
@@ -2063,8 +2086,8 @@ uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred,
/// Erase stale entry from phiTranslate cache so phiTranslate can be computed
/// again.
-void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
- const BasicBlock &CurrBlock) {
+void GVNPass::ValueTable::eraseTranslateCacheEntry(
+ uint32_t Num, const BasicBlock &CurrBlock) {
for (const BasicBlock *Pred : predecessors(&CurrBlock))
PhiTranslateTable.erase({Num, Pred});
}
@@ -2074,7 +2097,7 @@ void GVN::ValueTable::eraseTranslateCacheEntry(uint32_t Num,
// and then scan the list to find one whose block dominates the block in
// question. This is fast because dominator tree queries consist of only
// a few comparisons of DFS numbers.
-Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
+Value *GVNPass::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
if (!Vals.Val) return nullptr;
@@ -2113,7 +2136,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
return Pred != nullptr;
}
-void GVN::assignBlockRPONumber(Function &F) {
+void GVNPass::assignBlockRPONumber(Function &F) {
BlockRPONumber.clear();
uint32_t NextBlockNumber = 1;
ReversePostOrderTraversal<Function *> RPOT(&F);
@@ -2122,7 +2145,7 @@ void GVN::assignBlockRPONumber(Function &F) {
InvalidBlockRPONumbers = false;
}
-bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
+bool GVNPass::replaceOperandsForInBlockEquality(Instruction *Instr) const {
bool Changed = false;
for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
Value *Operand = Instr->getOperand(OpNum);
@@ -2142,8 +2165,9 @@ bool GVN::replaceOperandsForInBlockEquality(Instruction *Instr) const {
/// 'RHS' everywhere in the scope. Returns whether a change was made.
/// If DominatesByEdge is false, then it means that we will propagate the RHS
/// value starting from the end of Root.Start.
-bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
- bool DominatesByEdge) {
+bool GVNPass::propagateEquality(Value *LHS, Value *RHS,
+ const BasicBlockEdge &Root,
+ bool DominatesByEdge) {
SmallVector<std::pair<Value*, Value*>, 4> Worklist;
Worklist.push_back(std::make_pair(LHS, RHS));
bool Changed = false;
@@ -2291,7 +2315,7 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
/// When calculating availability, handle an instruction
/// by inserting it into the appropriate sets
-bool GVN::processInstruction(Instruction *I) {
+bool GVNPass::processInstruction(Instruction *I) {
// Ignore dbg info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
return false;
@@ -2432,10 +2456,10 @@ bool GVN::processInstruction(Instruction *I) {
}
/// runOnFunction - This is the main transformation entry point for a function.
-bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
- const TargetLibraryInfo &RunTLI, AAResults &RunAA,
- MemoryDependenceResults *RunMD, LoopInfo *LI,
- OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
+bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
+ const TargetLibraryInfo &RunTLI, AAResults &RunAA,
+ MemoryDependenceResults *RunMD, LoopInfo *LI,
+ OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) {
AC = &RunAC;
DT = &RunDT;
VN.setDomTree(DT);
@@ -2457,10 +2481,8 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge unconditional branches, allowing PRE to catch more
// optimization opportunities.
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
- BasicBlock *BB = &*FI++;
-
- bool removedBlock = MergeBlockIntoPredecessor(BB, &DTU, LI, MSSAU, MD);
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
+ bool removedBlock = MergeBlockIntoPredecessor(&BB, &DTU, LI, MSSAU, MD);
if (removedBlock)
++NumGVNBlocks;
@@ -2502,7 +2524,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
return Changed;
}
-bool GVN::processBlock(BasicBlock *BB) {
+bool GVNPass::processBlock(BasicBlock *BB) {
// FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
// (and incrementing BI before processing an instruction).
assert(InstrsToErase.empty() &&
@@ -2563,8 +2585,8 @@ bool GVN::processBlock(BasicBlock *BB) {
}
// Instantiate an expression in a predecessor that lacked it.
-bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
- BasicBlock *Curr, unsigned int ValNo) {
+bool GVNPass::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
+ BasicBlock *Curr, unsigned int ValNo) {
// Because we are going top-down through the block, all value numbers
// will be available in the predecessor by the time we need them. Any
// that weren't originally present will have been instantiated earlier
@@ -2612,7 +2634,7 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
return true;
}
-bool GVN::performScalarPRE(Instruction *CurInst) {
+bool GVNPass::performScalarPRE(Instruction *CurInst) {
if (isa<AllocaInst>(CurInst) || CurInst->isTerminator() ||
isa<PHINode>(CurInst) || CurInst->getType()->isVoidTy() ||
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
@@ -2797,7 +2819,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
/// Perform a purely local form of PRE that looks for diamond
/// control flow patterns and attempts to perform simple PRE at the join point.
-bool GVN::performPRE(Function &F) {
+bool GVNPass::performPRE(Function &F) {
bool Changed = false;
for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
// Nothing to PRE in the entry block.
@@ -2824,7 +2846,7 @@ bool GVN::performPRE(Function &F) {
/// Split the critical edge connecting the given two blocks, and return
/// the block inserted to the critical edge.
-BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
+BasicBlock *GVNPass::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
// GVN does not require loop-simplify, do not try to preserve it if it is not
// possible.
BasicBlock *BB = SplitCriticalEdge(
@@ -2840,7 +2862,7 @@ BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
/// Split critical edges found during the previous
/// iteration that may enable further optimization.
-bool GVN::splitCriticalEdges() {
+bool GVNPass::splitCriticalEdges() {
if (toSplit.empty())
return false;
@@ -2860,7 +2882,7 @@ bool GVN::splitCriticalEdges() {
}
/// Executes one iteration of GVN
-bool GVN::iterateOnFunction(Function &F) {
+bool GVNPass::iterateOnFunction(Function &F) {
cleanupGlobalSets();
// Top-down walk of the dominator tree
@@ -2876,7 +2898,7 @@ bool GVN::iterateOnFunction(Function &F) {
return Changed;
}
-void GVN::cleanupGlobalSets() {
+void GVNPass::cleanupGlobalSets() {
VN.clear();
LeaderTable.clear();
BlockRPONumber.clear();
@@ -2887,7 +2909,7 @@ void GVN::cleanupGlobalSets() {
/// Verify that the specified instruction does not occur in our
/// internal data structures.
-void GVN::verifyRemoved(const Instruction *Inst) const {
+void GVNPass::verifyRemoved(const Instruction *Inst) const {
VN.verifyRemoved(Inst);
// Walk through the value number scope to make sure the instruction isn't
@@ -2907,7 +2929,7 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
/// function is to add all these blocks to "DeadBlocks". For the dead blocks'
/// live successors, update their phi nodes by replacing the operands
/// corresponding to dead blocks with UndefVal.
-void GVN::addDeadBlock(BasicBlock *BB) {
+void GVNPass::addDeadBlock(BasicBlock *BB) {
SmallVector<BasicBlock *, 4> NewDead;
SmallSetVector<BasicBlock *, 4> DF;
@@ -2995,7 +3017,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
// dead blocks with "UndefVal" in an hope these PHIs will optimized away.
//
// Return true iff *NEW* dead code are found.
-bool GVN::processFoldableCondBr(BranchInst *BI) {
+bool GVNPass::processFoldableCondBr(BranchInst *BI) {
if (!BI || BI->isUnconditional())
return false;
@@ -3023,7 +3045,7 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
// associated val-num. As it normally has far more live instructions than dead
// instructions, it makes more sense just to "fabricate" a val-number for the
// dead code than checking if instruction involved is dead or not.
-void GVN::assignValNumForDeadCode() {
+void GVNPass::assignValNumForDeadCode() {
for (BasicBlock *BB : DeadBlocks) {
for (Instruction &Inst : *BB) {
unsigned ValNum = VN.lookupOrAdd(&Inst);
@@ -3078,7 +3100,7 @@ public:
}
private:
- GVN Impl;
+ GVNPass Impl;
};
char GVNLegacyPass::ID = 0;
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index 790d71992da4..fdc3afd9348a 100644
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -169,7 +169,7 @@ class InsnInfo {
public:
// Inserts I and its value number in VNtoScalars.
- void insert(Instruction *I, GVN::ValueTable &VN) {
+ void insert(Instruction *I, GVNPass::ValueTable &VN) {
// Scalar instruction.
unsigned V = VN.lookupOrAdd(I);
VNtoScalars[{V, InvalidVN}].push_back(I);
@@ -184,7 +184,7 @@ class LoadInfo {
public:
// Insert Load and the value number of its memory address in VNtoLoads.
- void insert(LoadInst *Load, GVN::ValueTable &VN) {
+ void insert(LoadInst *Load, GVNPass::ValueTable &VN) {
if (Load->isSimple()) {
unsigned V = VN.lookupOrAdd(Load->getPointerOperand());
VNtoLoads[{V, InvalidVN}].push_back(Load);
@@ -201,7 +201,7 @@ class StoreInfo {
public:
// Insert the Store and a hash number of the store address and the stored
// value in VNtoStores.
- void insert(StoreInst *Store, GVN::ValueTable &VN) {
+ void insert(StoreInst *Store, GVNPass::ValueTable &VN) {
if (!Store->isSimple())
return;
// Hash the store address and the stored value.
@@ -221,7 +221,7 @@ class CallInfo {
public:
// Insert Call and its value numbering in one of the VNtoCalls* containers.
- void insert(CallInst *Call, GVN::ValueTable &VN) {
+ void insert(CallInst *Call, GVNPass::ValueTable &VN) {
// A call that doesNotAccessMemory is handled as a Scalar,
// onlyReadsMemory will be handled as a Load instruction,
// all other calls will be handled as stores.
@@ -274,7 +274,7 @@ public:
unsigned int rank(const Value *V) const;
private:
- GVN::ValueTable VN;
+ GVNPass::ValueTable VN;
DominatorTree *DT;
PostDominatorTree *PDT;
AliasAnalysis *AA;
@@ -377,12 +377,12 @@ private:
if (!Root)
return;
// Depth first walk on PDom tree to fill the CHIargs at each PDF.
- RenameStackType RenameStack;
for (auto Node : depth_first(Root)) {
BasicBlock *BB = Node->getBlock();
if (!BB)
continue;
+ RenameStackType RenameStack;
// Collect all values in BB and push to stack.
fillRenameStack(BB, ValueBBs, RenameStack);
@@ -827,6 +827,8 @@ void GVNHoist::fillRenameStack(BasicBlock *BB, InValuesType &ValueBBs,
auto it1 = ValueBBs.find(BB);
if (it1 != ValueBBs.end()) {
// Iterate in reverse order to keep lower ranked values on the top.
+ LLVM_DEBUG(dbgs() << "\nVisiting: " << BB->getName()
+ << " for pushing instructions on stack";);
for (std::pair<VNType, Instruction *> &VI : reverse(it1->second)) {
// Get the value of instruction I
LLVM_DEBUG(dbgs() << "\nPushing on stack: " << *VI.second);
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 61eb4ce0ed46..82b81003ef21 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -46,6 +46,7 @@
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/ConstantRange.h"
@@ -105,8 +106,10 @@ static void setCondition(Instruction *I, Value *NewCond) {
}
// Eliminates the guard instruction properly.
-static void eliminateGuard(Instruction *GuardInst) {
+static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
GuardInst->eraseFromParent();
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(GuardInst);
++GuardsEliminated;
}
@@ -114,6 +117,7 @@ class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
LoopInfo &LI;
+ MemorySSAUpdater *MSSAU;
/// Together, these describe the region of interest. This might be all of
/// the blocks within a function, or only a given loop's blocks and preheader.
@@ -269,12 +273,12 @@ class GuardWideningImpl {
}
public:
-
explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT,
- LoopInfo &LI, DomTreeNode *Root,
+ LoopInfo &LI, MemorySSAUpdater *MSSAU,
+ DomTreeNode *Root,
std::function<bool(BasicBlock*)> BlockFilter)
- : DT(DT), PDT(PDT), LI(LI), Root(Root), BlockFilter(BlockFilter)
- {}
+ : DT(DT), PDT(PDT), LI(LI), MSSAU(MSSAU), Root(Root),
+ BlockFilter(BlockFilter) {}
/// The entry point for this pass.
bool run();
@@ -313,7 +317,7 @@ bool GuardWideningImpl::run() {
if (!WidenedGuards.count(I)) {
assert(isa<ConstantInt>(getCondition(I)) && "Should be!");
if (isSupportedGuardInstruction(I))
- eliminateGuard(I);
+ eliminateGuard(I, MSSAU);
else {
assert(isa<BranchInst>(I) &&
"Eliminated something other than guard or branch?");
@@ -514,27 +518,20 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
ConstantRange CR1 =
ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue());
- // SubsetIntersect is a subset of the actual mathematical intersection of
- // CR0 and CR1, while SupersetIntersect is a superset of the actual
- // mathematical intersection. If these two ConstantRanges are equal, then
- // we know we were able to represent the actual mathematical intersection
- // of CR0 and CR1, and can use the same to generate an icmp instruction.
- //
// Given what we're doing here and the semantics of guards, it would
- // actually be correct to just use SubsetIntersect, but that may be too
+ // be correct to use a subset intersection, but that may be too
// aggressive in cases we care about.
- auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse();
- auto SupersetIntersect = CR0.intersectWith(CR1);
-
- APInt NewRHSAP;
- CmpInst::Predicate Pred;
- if (SubsetIntersect == SupersetIntersect &&
- SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) {
- if (InsertPt) {
- ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP);
- Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ if (Optional<ConstantRange> Intersect = CR0.exactIntersectWith(CR1)) {
+ APInt NewRHSAP;
+ CmpInst::Predicate Pred;
+ if (Intersect->getEquivalentICmp(Pred, NewRHSAP)) {
+ if (InsertPt) {
+ ConstantInt *NewRHS =
+ ConstantInt::get(Cond0->getContext(), NewRHSAP);
+ Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
+ }
+ return true;
}
- return true;
}
}
}
@@ -766,12 +763,18 @@ PreservedAnalyses GuardWideningPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- if (!GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
- [](BasicBlock*) { return true; } ).run())
+ auto *MSSAA = AM.getCachedResult<MemorySSAAnalysis>(F);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAA->getMSSA());
+ if (!GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+ DT.getRootNode(), [](BasicBlock *) { return true; })
+ .run())
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -784,11 +787,17 @@ PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM,
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L.contains(BB);
};
- if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, AR.DT.getNode(RootBB),
- BlockFilter).run())
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+ if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, MSSAU ? MSSAU.get() : nullptr,
+ AR.DT.getNode(RootBB), BlockFilter).run())
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
namespace {
@@ -805,8 +814,14 @@ struct GuardWideningLegacyPass : public FunctionPass {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- return GuardWideningImpl(DT, &PDT, LI, DT.getRootNode(),
- [](BasicBlock*) { return true; } ).run();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+ return GuardWideningImpl(DT, &PDT, LI, MSSAU ? MSSAU.get() : nullptr,
+ DT.getRootNode(),
+ [](BasicBlock *) { return true; })
+ .run();
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -814,6 +829,7 @@ struct GuardWideningLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -833,13 +849,18 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>();
auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr;
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
+
BasicBlock *RootBB = L->getLoopPredecessor();
if (!RootBB)
RootBB = L->getHeader();
auto BlockFilter = [&](BasicBlock *BB) {
return BB == RootBB || L->contains(BB);
};
- return GuardWideningImpl(DT, PDT, LI,
+ return GuardWideningImpl(DT, PDT, LI, MSSAU ? MSSAU.get() : nullptr,
DT.getNode(RootBB), BlockFilter).run();
}
@@ -847,6 +868,7 @@ struct LoopGuardWideningLegacyPass : public LoopPass {
AU.setPreservesCFG();
getLoopAnalysisUsage(AU);
AU.addPreserved<PostDominatorTreeWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
};
}
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9ee2a2d0bf08..ae2fe2767074 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -89,6 +89,7 @@
#include <utility>
using namespace llvm;
+using namespace PatternMatch;
#define DEBUG_TYPE "indvars"
@@ -155,6 +156,10 @@ class IndVarSimplify {
bool rewriteNonIntegerIVs(Loop *L);
bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
+ /// Try to improve our exit conditions by converting condition from signed
+ /// to unsigned or rotating computation out of the loop.
+ /// (See inline comment about why this is duplicated from simplifyAndExtend)
+ bool canonicalizeExitCondition(Loop *L);
/// Try to eliminate loop exits based on analyzeable exit counts
bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
/// Try to form loop invariant tests for loop exits by changing how many
@@ -494,6 +499,7 @@ bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
MadeAnyChanges = true;
PN.setIncomingValue(IncomingValIdx,
ExitVal->getIncomingValue(PreheaderIdx));
+ SE->forgetValue(&PN);
}
}
}
@@ -541,18 +547,18 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
return;
}
- if (!WI.WidestNativeType) {
+ if (!WI.WidestNativeType ||
+ Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
WI.IsSigned = IsSigned;
return;
}
- // We extend the IV to satisfy the sign of its first user, arbitrarily.
- if (WI.IsSigned != IsSigned)
- return;
-
- if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
- WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+ // We extend the IV to satisfy the sign of its user(s), or 'signed'
+ // if there are multiple users with both sign- and zero extensions,
+ // in order not to introduce nondeterministic behaviour based on the
+ // unspecified order of a PHI nodes' users-iterator.
+ WI.IsSigned |= IsSigned;
}
//===----------------------------------------------------------------------===//
@@ -1274,9 +1280,9 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
// Skip debug info intrinsics.
do {
--I;
- } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+ } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
- if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ if (I->isDebugOrPseudoInst() && I == Preheader->begin())
Done = true;
} else {
Done = true;
@@ -1309,6 +1315,18 @@ static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
replaceExitCond(BI, NewCond, DeadInsts);
}
+static void replaceLoopPHINodesWithPreheaderValues(
+ Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
+ assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
+ auto *LoopPreheader = L->getLoopPreheader();
+ auto *LoopHeader = L->getHeader();
+ for (auto &PN : LoopHeader->phis()) {
+ auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
+ PN.replaceAllUsesWith(PreheaderIncoming);
+ DeadInsts.emplace_back(&PN);
+ }
+}
+
static void replaceWithInvariantCond(
const Loop *L, BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
const SCEV *InvariantLHS, const SCEV *InvariantRHS, SCEVExpander &Rewriter,
@@ -1333,7 +1351,6 @@ static bool optimizeLoopExitWithUnknownExitCount(
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
ICmpInst::Predicate Pred;
Value *LHS, *RHS;
- using namespace PatternMatch;
BasicBlock *TrueSucc, *FalseSucc;
if (!match(BI, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc))))
@@ -1394,6 +1411,140 @@ static bool optimizeLoopExitWithUnknownExitCount(
return true;
}
+bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
+ // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
+ // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
+ // never reaches the icmp since the zext doesn't fold to an AddRec unless
+ // it already has flags. The alternative to this would be to extending the
+ // set of "interesting" IV users to include the icmp, but doing that
+ // regresses results in practice by querying SCEVs before trip counts which
+ // rely on them which results in SCEV caching sub-optimal answers. The
+ // concern about caching sub-optimal results is why we only query SCEVs of
+ // the loop invariant RHS here.
+ SmallVector<BasicBlock*, 16> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ bool Changed = false;
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse())
+ continue;
+
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ // For the range reasoning, avoid computing SCEVs in the loop to avoid
+ // poisoning cache with sub-optimal results. For the must-execute case,
+ // this is a neccessary precondition for correctness.
+ if (!L->isLoopInvariant(RHS)) {
+ if (!L->isLoopInvariant(LHS))
+ continue;
+ // Same logic applies for the inverse case
+ std::swap(LHS, RHS);
+ }
+
+ // Match (icmp signed-cond zext, RHS)
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
+ continue;
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
+ // replace the signed condition with the unsigned version.
+ ICmp->setPredicate(ICmp->getUnsignedPredicate());
+ Changed = true;
+ // Note: No SCEV invalidation needed. We've changed the predicate, but
+ // have not changed exit counts, or the values produced by the compare.
+ continue;
+ }
+ }
+
+ // Now that we've canonicalized the condition to match the extend,
+ // see if we can rotate the extend out of the loop.
+ for (auto *ExitingBB : ExitingBlocks) {
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
+ assert(BI->isConditional() && "exit branch must be conditional");
+
+ auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
+ continue;
+
+ bool Swapped = false;
+ auto *LHS = ICmp->getOperand(0);
+ auto *RHS = ICmp->getOperand(1);
+ if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
+ // Nothing to rotate
+ continue;
+ if (L->isLoopInvariant(LHS)) {
+ // Same logic applies for the inverse case until we actually pick
+ // which operand of the compare to update.
+ Swapped = true;
+ std::swap(LHS, RHS);
+ }
+ assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
+
+ // Match (icmp unsigned-cond zext, RHS)
+ // TODO: Extend to handle corresponding sext/signed-cmp case
+ // TODO: Extend to other invertible functions
+ Value *LHSOp = nullptr;
+ if (!match(LHS, m_ZExt(m_Value(LHSOp))))
+ continue;
+
+ // In general, we only rotate if we can do so without increasing the number
+ // of instructions. The exception is when we have an zext(add-rec). The
+ // reason for allowing this exception is that we know we need to get rid
+ // of the zext for SCEV to be able to compute a trip count for said loops;
+ // we consider the new trip count valuable enough to increase instruction
+ // count by one.
+ if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
+ continue;
+
+ // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
+ // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
+ // when zext is loop varying and RHS is loop invariant. This converts
+ // loop varying work to loop-invariant work.
+ auto doRotateTransform = [&]() {
+ assert(ICmp->isUnsigned() && "must have proven unsigned already");
+ auto *NewRHS =
+ CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
+ L->getLoopPreheader()->getTerminator());
+ ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
+ ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
+ if (LHS->use_empty())
+ DeadInsts.push_back(LHS);
+ };
+
+
+ const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
+ const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
+ const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
+ auto FullCR = ConstantRange::getFull(InnerBitWidth);
+ FullCR = FullCR.zeroExtend(OuterBitWidth);
+ auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
+ if (FullCR.contains(RHSCR)) {
+ doRotateTransform();
+ Changed = true;
+ // Note, we are leaving SCEV in an unfortunately imprecise case here
+ // as rotation tends to reveal information about trip counts not
+ // previously visible.
+ continue;
+ }
+ }
+
+ return Changed;
+}
+
bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1499,20 +1650,18 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
// If we know we'd exit on the first iteration, rewrite the exit to
// reflect this. This does not imply the loop must exit through this
// exit; there may be an earlier one taken on the first iteration.
- // TODO: Given we know the backedge can't be taken, we should go ahead
- // and break it. Or at least, kill all the header phis and simplify.
+ // We know that the backedge can't be taken, so we replace all
+ // the header PHIs with values coming from the preheader.
if (ExitCount->isZero()) {
foldExit(L, ExitingBB, true, DeadInsts);
+ replaceLoopPHINodesWithPreheaderValues(L, DeadInsts);
Changed = true;
continue;
}
- // If we end up with a pointer exit count, bail. Note that we can end up
- // with a pointer exit count for one exiting block, and not for another in
- // the same loop.
- if (!ExitCount->getType()->isIntegerTy() ||
- !MaxExitCount->getType()->isIntegerTy())
- continue;
+ assert(ExitCount->getType()->isIntegerTy() &&
+ MaxExitCount->getType()->isIntegerTy() &&
+ "Exit counts must be integers");
Type *WiderType =
SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
@@ -1569,14 +1718,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// through *explicit* control flow. We have to eliminate the possibility of
// implicit exits (see below) before we know it's truly exact.
const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(ExactBTC) ||
- !SE->isLoopInvariant(ExactBTC, L) ||
- !isSafeToExpand(ExactBTC, *SE))
+ if (isa<SCEVCouldNotCompute>(ExactBTC) || !isSafeToExpand(ExactBTC, *SE))
return false;
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExactBTC->getType()->isIntegerTy())
- return false;
+ assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
+ assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
auto BadExit = [&](BasicBlock *ExitingBB) {
// If our exiting block exits multiple loops, we can only rewrite the
@@ -1603,15 +1749,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
return true;
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
- if (isa<SCEVCouldNotCompute>(ExitCount) ||
- !SE->isLoopInvariant(ExitCount, L) ||
- !isSafeToExpand(ExitCount, *SE))
- return true;
-
- // If we end up with a pointer exit count, bail. It may be unsized.
- if (!ExitCount->getType()->isIntegerTy())
+ if (isa<SCEVCouldNotCompute>(ExitCount) || !isSafeToExpand(ExitCount, *SE))
return true;
+ assert(SE->isLoopInvariant(ExitCount, L) &&
+ "Exit count must be loop invariant");
+ assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
return false;
};
@@ -1781,7 +1924,11 @@ bool IndVarSimplify::run(Loop *L) {
}
// Eliminate redundant IV cycles.
- NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
+ NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
+
+ // Try to convert exit conditions to unsigned and rotate computation
+ // out of the loop. Note: Handles invalidation internally if needed.
+ Changed |= canonicalizeExitCondition(L);
// Try to eliminate loop exits based on analyzeable exit counts
if (optimizeLoopExits(L, Rewriter)) {
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f7d631f5e785..883d4afff3bd 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -96,10 +96,13 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -115,6 +118,7 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -146,6 +150,14 @@ static const unsigned UninitializedAddressSpace =
namespace {
using ValueToAddrSpaceMapTy = DenseMap<const Value *, unsigned>;
+// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on
+// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new
+// addrspace is inferred on the *use* of a pointer. This map is introduced to
+// infer addrspace from the addrspace predicate assumption built from assume
+// intrinsic. In that scenario, only specific uses (under valid assumption
+// context) could be inferred with a new addrspace.
+using PredicatedAddrSpaceMapTy =
+ DenseMap<std::pair<const Value *, const Value *>, unsigned>;
using PostorderStackTy = llvm::SmallVector<PointerIntPair<Value *, 1, bool>, 4>;
class InferAddressSpaces : public FunctionPass {
@@ -160,6 +172,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -167,6 +181,8 @@ public:
};
class InferAddressSpacesImpl {
+ AssumptionCache &AC;
+ DominatorTree *DT = nullptr;
const TargetTransformInfo *TTI = nullptr;
const DataLayout *DL = nullptr;
@@ -174,21 +190,24 @@ class InferAddressSpacesImpl {
/// possible.
unsigned FlatAddrSpace = 0;
- // Returns the new address space of V if updated; otherwise, returns None.
- Optional<unsigned>
- updateAddressSpace(const Value &V,
- const ValueToAddrSpaceMapTy &InferredAddrSpace) const;
+ // Try to update the address space of V. If V is updated, returns true and
+ // false otherwise.
+ bool updateAddressSpace(const Value &V,
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const;
// Tries to infer the specific address space of each address expression in
// Postorder.
void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) const;
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const;
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
Value *cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const;
// Changes the flat address expressions in function F to point to specific
@@ -196,7 +215,8 @@ class InferAddressSpacesImpl {
// all flat expressions in the use-def graph of function F.
bool rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const;
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const;
void appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
@@ -211,14 +231,18 @@ class InferAddressSpacesImpl {
std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;
Value *cloneValueWithNewAddressSpace(
- Value *V, unsigned NewAddrSpace,
- const ValueToValueMapTy &ValueWithNewAddrSpace,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+ Value *V, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ SmallVectorImpl<const Use *> *UndefUsesToFix) const;
unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
+ unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;
+
public:
- InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
- : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
+ InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT,
+ const TargetTransformInfo *TTI, unsigned FlatAddrSpace)
+ : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {}
bool run(Function &F);
};
@@ -232,8 +256,12 @@ void initializeInferAddressSpacesPass(PassRegistry &);
} // end namespace llvm
-INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
- false, false)
+INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
+ false, false)
// Check whether that's no-op pointer bicast using a pair of
// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
@@ -505,6 +533,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
static Value *operandWithNewAddressSpaceOrCreateUndef(
const Use &OperandUse, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) {
Value *Operand = OperandUse.get();
@@ -517,6 +546,18 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand))
return NewOperand;
+ Instruction *Inst = cast<Instruction>(OperandUse.getUser());
+ auto I = PredicatedAS.find(std::make_pair(Inst, Operand));
+ if (I != PredicatedAS.end()) {
+ // Insert an addrspacecast on that operand before the user.
+ unsigned NewAS = I->second;
+ Type *NewPtrTy = PointerType::getWithSamePointeeType(
+ cast<PointerType>(Operand->getType()), NewAS);
+ auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
+ NewI->insertBefore(Inst);
+ return NewI;
+ }
+
UndefUsesToFix->push_back(&OperandUse);
return UndefValue::get(NewPtrTy);
}
@@ -536,6 +577,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
Type *NewPtrType = PointerType::getWithSamePointeeType(
cast<PointerType>(I->getType()), NewAddrSpace);
@@ -557,7 +599,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef(
II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
- UndefUsesToFix);
+ PredicatedAS, UndefUsesToFix);
Value *Rewrite =
TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
if (Rewrite) {
@@ -586,7 +628,8 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
NewPointerOperands.push_back(nullptr);
else
NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
- OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix));
+ OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+ UndefUsesToFix));
}
switch (I->getOpcode()) {
@@ -708,9 +751,8 @@ static Value *cloneConstantExprWithNewAddressSpace(
if (CE->getOpcode() == Instruction::GetElementPtr) {
// Needs to specify the source type while constructing a getelementptr
// constant expression.
- return CE->getWithOperands(
- NewOperands, TargetType, /*OnlyIfReduced=*/false,
- NewOperands[0]->getType()->getPointerElementType());
+ return CE->getWithOperands(NewOperands, TargetType, /*OnlyIfReduced=*/false,
+ cast<GEPOperator>(CE)->getSourceElementType());
}
return CE->getWithOperands(NewOperands, TargetType);
@@ -724,6 +766,7 @@ static Value *cloneConstantExprWithNewAddressSpace(
Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
SmallVectorImpl<const Use *> *UndefUsesToFix) const {
// All values in Postorder are flat address expressions.
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
@@ -731,7 +774,7 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
- I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix);
+ I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, UndefUsesToFix);
if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {
if (NewI->getParent() == nullptr) {
NewI->insertBefore(I);
@@ -779,46 +822,43 @@ bool InferAddressSpacesImpl::run(Function &F) {
// Runs a data-flow analysis to refine the address spaces of every expression
// in Postorder.
ValueToAddrSpaceMapTy InferredAddrSpace;
- inferAddressSpaces(Postorder, &InferredAddrSpace);
+ PredicatedAddrSpaceMapTy PredicatedAS;
+ inferAddressSpaces(Postorder, InferredAddrSpace, PredicatedAS);
// Changes the address spaces of the flat address expressions who are inferred
// to point to a specific address space.
- return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F);
+ return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace,
+ PredicatedAS, &F);
}
// Constants need to be tracked through RAUW to handle cases with nested
// constant expressions, so wrap values in WeakTrackingVH.
void InferAddressSpacesImpl::inferAddressSpaces(
ArrayRef<WeakTrackingVH> Postorder,
- ValueToAddrSpaceMapTy *InferredAddrSpace) const {
+ ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const {
SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());
// Initially, all expressions are in the uninitialized address space.
for (Value *V : Postorder)
- (*InferredAddrSpace)[V] = UninitializedAddressSpace;
+ InferredAddrSpace[V] = UninitializedAddressSpace;
while (!Worklist.empty()) {
Value *V = Worklist.pop_back_val();
- // Tries to update the address space of the stack top according to the
+ // Try to update the address space of the stack top according to the
// address spaces of its operands.
- LLVM_DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n');
- Optional<unsigned> NewAS = updateAddressSpace(*V, *InferredAddrSpace);
- if (!NewAS.hasValue())
+ if (!updateAddressSpace(*V, InferredAddrSpace, PredicatedAS))
continue;
- // If any updates are made, grabs its users to the worklist because
- // their address spaces can also be possibly updated.
- LLVM_DEBUG(dbgs() << " to " << NewAS.getValue() << '\n');
- (*InferredAddrSpace)[V] = NewAS.getValue();
for (Value *User : V->users()) {
// Skip if User is already in the worklist.
if (Worklist.count(User))
continue;
- auto Pos = InferredAddrSpace->find(User);
+ auto Pos = InferredAddrSpace.find(User);
// Our algorithm only updates the address spaces of flat address
// expressions, which are those in InferredAddrSpace.
- if (Pos == InferredAddrSpace->end())
+ if (Pos == InferredAddrSpace.end())
continue;
// Function updateAddressSpace moves the address space down a lattice
@@ -832,10 +872,37 @@ void InferAddressSpacesImpl::inferAddressSpaces(
}
}
-Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
- const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const {
+unsigned InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V,
+ Value *Opnd) const {
+ const Instruction *I = dyn_cast<Instruction>(&V);
+ if (!I)
+ return UninitializedAddressSpace;
+
+ Opnd = Opnd->stripInBoundsOffsets();
+ for (auto &AssumeVH : AC.assumptionsFor(Opnd)) {
+ if (!AssumeVH)
+ continue;
+ CallInst *CI = cast<CallInst>(AssumeVH);
+ if (!isValidAssumeForContext(CI, I, DT))
+ continue;
+
+ const Value *Ptr;
+ unsigned AS;
+ std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(CI->getArgOperand(0));
+ if (Ptr)
+ return AS;
+ }
+
+ return UninitializedAddressSpace;
+}
+
+bool InferAddressSpacesImpl::updateAddressSpace(
+ const Value &V, ValueToAddrSpaceMapTy &InferredAddrSpace,
+ PredicatedAddrSpaceMapTy &PredicatedAS) const {
assert(InferredAddrSpace.count(&V));
+ LLVM_DEBUG(dbgs() << "Updating the address space of\n " << V << '\n');
+
// The new inferred address space equals the join of the address spaces
// of all its pointer operands.
unsigned NewAS = UninitializedAddressSpace;
@@ -861,7 +928,7 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
// address space is known.
if ((C1 && Src0AS == UninitializedAddressSpace) ||
(C0 && Src1AS == UninitializedAddressSpace))
- return None;
+ return false;
if (C0 && isSafeToCastConstAddrSpace(C0, Src1AS))
NewAS = Src1AS;
@@ -878,10 +945,23 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
// Otherwise, infer the address space from its pointer operands.
for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) {
auto I = InferredAddrSpace.find(PtrOperand);
- unsigned OperandAS =
- I != InferredAddrSpace.end()
- ? I->second
- : PtrOperand->getType()->getPointerAddressSpace();
+ unsigned OperandAS;
+ if (I == InferredAddrSpace.end()) {
+ OperandAS = PtrOperand->getType()->getPointerAddressSpace();
+ if (OperandAS == FlatAddrSpace) {
+ // Check AC for assumption dominating V.
+ unsigned AS = getPredicatedAddrSpace(V, PtrOperand);
+ if (AS != UninitializedAddressSpace) {
+ LLVM_DEBUG(dbgs()
+ << " deduce operand AS from the predicate addrspace "
+ << AS << '\n');
+ OperandAS = AS;
+ // Record this use with the predicated AS.
+ PredicatedAS[std::make_pair(&V, PtrOperand)] = OperandAS;
+ }
+ }
+ } else
+ OperandAS = I->second;
// join(flat, *) = flat. So we can break if NewAS is already flat.
NewAS = joinAddressSpaces(NewAS, OperandAS);
@@ -894,8 +974,13 @@ Optional<unsigned> InferAddressSpacesImpl::updateAddressSpace(
unsigned OldAS = InferredAddrSpace.lookup(&V);
assert(OldAS != FlatAddrSpace);
if (OldAS == NewAS)
- return None;
- return NewAS;
+ return false;
+
+ // If any updates are made, grabs its users to the worklist because
+ // their address spaces can also be possibly updated.
+ LLVM_DEBUG(dbgs() << " to " << NewAS << '\n');
+ InferredAddrSpace[&V] = NewAS;
+ return true;
}
/// \p returns true if \p U is the pointer operand of a memory instruction with
@@ -1026,7 +1111,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,
bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
const TargetTransformInfo &TTI, ArrayRef<WeakTrackingVH> Postorder,
- const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {
+ const ValueToAddrSpaceMapTy &InferredAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const {
// For each address expression to be modified, creates a clone of it with its
// pointer operands converted to the new address space. Since the pointer
// operands are converted, the clone is naturally in the new address space by
@@ -1042,8 +1128,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
continue;
if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
- Value *New = cloneValueWithNewAddressSpace(
- V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix);
+ Value *New =
+ cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,
+ PredicatedAS, &UndefUsesToFix);
if (New)
ValueWithNewAddrSpace[V] = New;
}
@@ -1155,8 +1242,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (ASC->getDestAddressSpace() == NewAS) {
- if (ASC->getType()->getPointerElementType() !=
- NewV->getType()->getPointerElementType()) {
+ if (!cast<PointerType>(ASC->getType())
+ ->hasSameElementTypeAs(
+ cast<PointerType>(NewV->getType()))) {
NewV = CastInst::Create(Instruction::BitCast, NewV,
ASC->getType(), "", ASC);
}
@@ -1199,7 +1287,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
+ auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
return InferAddressSpacesImpl(
+ getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), DT,
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F),
FlatAddrSpace)
.run(F);
@@ -1217,11 +1308,14 @@ InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace)
PreservedAnalyses InferAddressSpacesPass::run(Function &F,
FunctionAnalysisManager &AM) {
bool Changed =
- InferAddressSpacesImpl(&AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
+ InferAddressSpacesImpl(AM.getResult<AssumptionAnalysis>(F),
+ AM.getCachedResult<DominatorTreeAnalysis>(F),
+ &AM.getResult<TargetIRAnalysis>(F), FlatAddrSpace)
.run(F);
if (Changed) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
+ PA.preserve<DominatorTreeAnalysis>();
return PA;
}
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 9dc3b0351346..fe9a7211967c 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -331,7 +331,7 @@ bool JumpThreading::runOnFunction(Function &F) {
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = Impl.runImpl(F, TLI, LVI, AA, &DTU, F.hasProfileData(),
+ bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
@@ -360,7 +360,7 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
}
- bool Changed = runImpl(F, &TLI, &LVI, &AA, &DTU, F.hasProfileData(),
+ bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
std::move(BFI), std::move(BPI));
if (PrintLVIAfterJumpThreading) {
@@ -377,12 +377,14 @@ PreservedAnalyses JumpThreadingPass::run(Function &F,
}
bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
- LazyValueInfo *LVI_, AliasAnalysis *AA_,
- DomTreeUpdater *DTU_, bool HasProfileData_,
+ TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
+ AliasAnalysis *AA_, DomTreeUpdater *DTU_,
+ bool HasProfileData_,
std::unique_ptr<BlockFrequencyInfo> BFI_,
std::unique_ptr<BranchProbabilityInfo> BPI_) {
LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
TLI = TLI_;
+ TTI = TTI_;
LVI = LVI_;
AA = AA_;
DTU = DTU_;
@@ -514,7 +516,8 @@ static void replaceFoldableUses(Instruction *Cond, Value *ToVal) {
/// Return the cost of duplicating a piece of this block from first non-phi
/// and before StopAt instruction to thread across it. Stop scanning the block
/// when exceeding the threshold. If duplication is impossible, returns ~0U.
-static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
+static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI,
+ BasicBlock *BB,
Instruction *StopAt,
unsigned Threshold) {
assert(StopAt->getParent() == BB && "Not an instruction from proper BB?");
@@ -550,26 +553,21 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
if (Size > Threshold)
return Size;
- // Debugger intrinsics don't incur code size.
- if (isa<DbgInfoIntrinsic>(I)) continue;
-
- // Pseudo-probes don't incur code size.
- if (isa<PseudoProbeInst>(I))
- continue;
-
- // If this is a pointer->pointer bitcast, it is free.
- if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
- continue;
-
- // Freeze instruction is free, too.
- if (isa<FreezeInst>(I))
- continue;
-
// Bail out if this instruction gives back a token type, it is not possible
// to duplicate it if it is used outside this BB.
if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
return ~0U;
+ // Blocks with NoDuplicate are modelled as having infinite cost, so they
+ // are never duplicated.
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (CI->cannotDuplicate() || CI->isConvergent())
+ return ~0U;
+
+ if (TTI->getUserCost(&*I, TargetTransformInfo::TCK_SizeAndLatency)
+ == TargetTransformInfo::TCC_Free)
+ continue;
+
// All other instructions count for at least one unit.
++Size;
@@ -578,11 +576,7 @@ static unsigned getJumpThreadDuplicationCost(BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
- if (CI->cannotDuplicate() || CI->isConvergent())
- // Blocks with NoDuplicate are modelled as having infinite cost, so they
- // are never duplicated.
- return ~0U;
- else if (!isa<IntrinsicInst>(CI))
+ if (!isa<IntrinsicInst>(CI))
Size += 3;
else if (!CI->getType()->isVectorTy())
Size += 1;
@@ -1363,8 +1357,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) {
// If all of the loads and stores that feed the value have the same AA tags,
// then we can propagate them onto any newly inserted loads.
- AAMDNodes AATags;
- LoadI->getAAMetadata(AATags);
+ AAMDNodes AATags = LoadI->getAAMetadata();
SmallPtrSet<BasicBlock*, 8> PredsScanned;
@@ -2235,10 +2228,10 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB,
}
// Compute the cost of duplicating BB and PredBB.
- unsigned BBCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned BBCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
unsigned PredBBCost = getJumpThreadDuplicationCost(
- PredBB, PredBB->getTerminator(), BBDupThreshold);
+ TTI, PredBB, PredBB->getTerminator(), BBDupThreshold);
// Give up if costs are too high. We need to check BBCost and PredBBCost
// individually before checking their sum because getJumpThreadDuplicationCost
@@ -2346,8 +2339,8 @@ bool JumpThreadingPass::tryThreadEdge(
return false;
}
- unsigned JumpThreadCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned JumpThreadCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
if (JumpThreadCost > BBDupThreshold) {
LLVM_DEBUG(dbgs() << " Not threading BB '" << BB->getName()
<< "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -2615,8 +2608,8 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
return false;
}
- unsigned DuplicationCost =
- getJumpThreadDuplicationCost(BB, BB->getTerminator(), BBDupThreshold);
+ unsigned DuplicationCost = getJumpThreadDuplicationCost(
+ TTI, BB, BB->getTerminator(), BBDupThreshold);
if (DuplicationCost > BBDupThreshold) {
LLVM_DEBUG(dbgs() << " Not duplicating BB '" << BB->getName()
<< "' - Cost is too high: " << DuplicationCost << "\n");
@@ -3032,7 +3025,8 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
ValueToValueMapTy UnguardedMapping, GuardedMapping;
Instruction *AfterGuard = Guard->getNextNode();
- unsigned Cost = getJumpThreadDuplicationCost(BB, AfterGuard, BBDupThreshold);
+ unsigned Cost =
+ getJumpThreadDuplicationCost(TTI, BB, AfterGuard, BBDupThreshold);
if (Cost > BBDupThreshold)
return false;
// Duplicate all instructions before the guard and the guard itself to the
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 30058df3ded5..bf714d167670 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -117,13 +117,6 @@ static cl::opt<uint32_t> MaxNumUsesTraversed(
cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));
-// Default value of zero implies we use the regular alias set tracker mechanism
-// instead of the cross product using AA to identify aliasing of the memory
-// location we are interested in.
-static cl::opt<int>
-LICMN2Theshold("licm-n2-threshold", cl::Hidden, cl::init(0),
- cl::desc("How many instruction to cross product using AA"));
-
// Experimental option to allow imprecision in LICM in pathological cases, in
// exchange for faster compile. This is to be removed if MemorySSA starts to
// address the same issue. This flag applies only when LICM uses MemorySSA
@@ -151,7 +144,8 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop);
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
@@ -180,7 +174,7 @@ static Instruction *cloneInstructionInExitBlock(
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU);
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- AliasSetTracker *AST, MemorySSAUpdater *MSSAU);
+ MemorySSAUpdater *MSSAU);
static void moveInstructionBefore(Instruction &I, Instruction &Dest,
ICFLoopSafetyInfo &SafetyInfo,
@@ -206,9 +200,6 @@ struct LoopInvariantCodeMotion {
private:
unsigned LicmMssaOptCap;
unsigned LicmMssaNoAccForPromotionCap;
-
- std::unique_ptr<AliasSetTracker>
- collectAliasInfoForLoop(Loop *L, LoopInfo *LI, AAResults *AA);
};
struct LegacyLICMPass : public LoopPass {
@@ -228,9 +219,7 @@ struct LegacyLICMPass : public LoopPass {
<< L->getHeader()->getNameOrAsOperand() << "\n");
auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? (&getAnalysis<MemorySSAWrapperPass>().getMSSA())
- : nullptr;
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
bool hasProfileData = L->getHeader()->getParent()->hasProfileData();
BlockFrequencyInfo *BFI =
hasProfileData ? &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI()
@@ -257,10 +246,8 @@ struct LegacyLICMPass : public LoopPass {
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
@@ -275,6 +262,9 @@ private:
PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &) {
+ if (!AR.MSSA)
+ report_fatal_error("LICM requires MemorySSA (loop-mssa)");
+
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
@@ -289,8 +279,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- if (AR.MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -298,6 +287,9 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
+ if (!AR.MSSA)
+ report_fatal_error("LNICM requires MemorySSA (loop-mssa)");
+
// For the new PM, we also can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
@@ -316,8 +308,7 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<LoopAnalysis>();
- if (AR.MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
@@ -386,10 +377,6 @@ bool LoopInvariantCodeMotion::runOnLoop(
return false;
}
- std::unique_ptr<AliasSetTracker> CurAST;
- std::unique_ptr<MemorySSAUpdater> MSSAU;
- std::unique_ptr<SinkAndHoistLICMFlags> Flags;
-
// Don't sink stores from loops with coroutine suspend instructions.
// LICM would sink instructions into the default destination of
// the coroutine switch. The default destination of the switch is to
@@ -406,17 +393,9 @@ bool LoopInvariantCodeMotion::runOnLoop(
});
});
- if (!MSSA) {
- LLVM_DEBUG(dbgs() << "LICM: Using Alias Set Tracker.\n");
- CurAST = collectAliasInfoForLoop(L, LI, AA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true);
- } else {
- LLVM_DEBUG(dbgs() << "LICM: Using MemorySSA.\n");
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- Flags = std::make_unique<SinkAndHoistLICMFlags>(
- LicmMssaOptCap, LicmMssaNoAccForPromotionCap, /*IsSink=*/true, L, MSSA);
- }
+ MemorySSAUpdater MSSAU(MSSA);
+ SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+ /*IsSink=*/true, L, MSSA);
// Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader();
@@ -435,14 +414,16 @@ bool LoopInvariantCodeMotion::runOnLoop(
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
if (L->hasDedicatedExits())
- Changed |=
- sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, TTI, L,
- CurAST.get(), MSSAU.get(), &SafetyInfo, *Flags.get(), ORE);
- Flags->setIsSink(false);
+ Changed |= LoopNestMode
+ ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI,
+ DT, BFI, TLI, TTI, L, &MSSAU,
+ &SafetyInfo, Flags, ORE)
+ : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI,
+ TLI, TTI, L, &MSSAU, &SafetyInfo, Flags, ORE);
+ Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
- CurAST.get(), MSSAU.get(), SE, &SafetyInfo,
- *Flags.get(), ORE, LoopNestMode);
+ &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.
@@ -452,7 +433,7 @@ bool LoopInvariantCodeMotion::runOnLoop(
// preheader for SSA updater, so also avoid sinking when no preheader
// is available.
if (!DisablePromotion && Preheader && L->hasDedicatedExits() &&
- !Flags->tooManyMemoryAccesses() && !HasCoroSuspendInst) {
+ !Flags.tooManyMemoryAccesses() && !HasCoroSuspendInst) {
// Figure out the loop exits and their insertion points
SmallVector<BasicBlock *, 8> ExitBlocks;
L->getUniqueExitBlocks(ExitBlocks);
@@ -466,55 +447,29 @@ bool LoopInvariantCodeMotion::runOnLoop(
SmallVector<Instruction *, 8> InsertPts;
SmallVector<MemoryAccess *, 8> MSSAInsertPts;
InsertPts.reserve(ExitBlocks.size());
- if (MSSAU)
- MSSAInsertPts.reserve(ExitBlocks.size());
+ MSSAInsertPts.reserve(ExitBlocks.size());
for (BasicBlock *ExitBlock : ExitBlocks) {
InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
- if (MSSAU)
- MSSAInsertPts.push_back(nullptr);
+ MSSAInsertPts.push_back(nullptr);
}
PredIteratorCache PIC;
+ // Promoting one set of accesses may make the pointers for another set
+ // loop invariant, so run this in a loop (with the MaybePromotable set
+ // decreasing in size over time).
bool Promoted = false;
- if (CurAST.get()) {
- // Loop over all of the alias sets in the tracker object.
- for (AliasSet &AS : *CurAST) {
- // We can promote this alias set if it has a store, if it is a "Must"
- // alias set, if the pointer is loop invariant, and if we are not
- // eliminating any volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- !L->isLoopInvariant(AS.begin()->getValue()))
- continue;
-
- assert(
- !AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
-
- SmallSetVector<Value *, 8> PointerMustAliases;
- for (const auto &ASI : AS)
- PointerMustAliases.insert(ASI.getValue());
-
- Promoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
- DT, TLI, L, CurAST.get(), MSSAU.get(), &SafetyInfo, ORE);
+ bool LocalPromoted;
+ do {
+ LocalPromoted = false;
+ for (const SmallSetVector<Value *, 8> &PointerMustAliases :
+ collectPromotionCandidates(MSSA, AA, L)) {
+ LocalPromoted |= promoteLoopAccessesToScalars(
+ PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
+ LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
}
- } else {
- // Promoting one set of accesses may make the pointers for another set
- // loop invariant, so run this in a loop (with the MaybePromotable set
- // decreasing in size over time).
- bool LocalPromoted;
- do {
- LocalPromoted = false;
- for (const SmallSetVector<Value *, 8> &PointerMustAliases :
- collectPromotionCandidates(MSSA, AA, L)) {
- LocalPromoted |= promoteLoopAccessesToScalars(
- PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
- LI, DT, TLI, L, /*AST*/nullptr, MSSAU.get(), &SafetyInfo, ORE);
- }
- Promoted |= LocalPromoted;
- } while (LocalPromoted);
- }
+ Promoted |= LocalPromoted;
+ } while (LocalPromoted);
// Once we have promoted values across the loop body we have to
// recursively reform LCSSA as any nested loop may now have values defined
@@ -536,8 +491,8 @@ bool LoopInvariantCodeMotion::runOnLoop(
assert((L->isOutermost() || L->getParentLoop()->isLCSSAForm(*DT)) &&
"Parent loop not left in LCSSA form after LICM!");
- if (MSSAU.get() && VerifyMemorySSA)
- MSSAU->getMemorySSA()->verifyMemorySSA();
+ if (VerifyMemorySSA)
+ MSSA->verifyMemorySSA();
if (Changed && SE)
SE->forgetLoopDispositions(L);
@@ -552,17 +507,15 @@ bool LoopInvariantCodeMotion::runOnLoop(
bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- Loop *CurLoop, AliasSetTracker *CurAST,
- MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ Loop *CurLoop, MemorySSAUpdater *MSSAU,
+ ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
- OptimizationRemarkEmitter *ORE) {
+ OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion.");
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
// We want to visit children before parents. We will enque all the parents
// before their children in the worklist and process the worklist in reverse
@@ -587,7 +540,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
salvageKnowledge(&I);
salvageDebugInfo(I);
++II;
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
@@ -598,26 +551,46 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// operands of the instruction are loop invariant.
//
bool FreeInLoop = false;
+ bool LoopNestMode = OutermostLoop != nullptr;
if (!I.mayHaveSideEffects() &&
- isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, FreeInLoop) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
- ORE)) {
+ isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+ SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/nullptr, MSSAU, true,
+ &Flags, ORE)) {
if (sink(I, LI, DT, BFI, CurLoop, SafetyInfo, MSSAU, ORE)) {
if (!FreeInLoop) {
++II;
salvageDebugInfo(I);
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
}
Changed = true;
}
}
}
}
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed;
}
+bool llvm::sinkRegionForLoopNest(
+ DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT,
+ BlockFrequencyInfo *BFI, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) {
+
+ bool Changed = false;
+ SmallPriorityWorklist<Loop *, 4> Worklist;
+ Worklist.insert(CurLoop);
+ appendLoopsToWorklist(*CurLoop, Worklist);
+ while (!Worklist.empty()) {
+ Loop *L = Worklist.pop_back_val();
+ Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI,
+ TTI, L, MSSAU, SafetyInfo, Flags, ORE, CurLoop);
+ }
+ return Changed;
+}
+
namespace {
// This is a helper class for hoistRegion to make it able to hoist control flow
// in order to be able to hoist phis. The way this works is that we initially
@@ -820,9 +793,8 @@ public:
if (HoistTarget == InitialPreheader) {
// Phis in the loop header now need to use the new preheader.
InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
- if (MSSAU)
- MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
- HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
+ MSSAU->wireOldPredecessorsToNewImmediatePredecessor(
+ HoistTarget->getSingleSuccessor(), HoistCommonSucc, {HoistTarget});
// The new preheader dominates the loop header.
DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
DomTreeNode *HeaderNode = DT->getNode(CurLoop->getHeader());
@@ -884,16 +856,14 @@ static bool worthSinkOrHoistInst(Instruction &I, BasicBlock *DstBlock,
bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
DominatorTree *DT, BlockFrequencyInfo *BFI,
TargetLibraryInfo *TLI, Loop *CurLoop,
- AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo,
+ MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
+ ICFLoopSafetyInfo *SafetyInfo,
SinkAndHoistLICMFlags &Flags,
OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
- CurLoop != nullptr && SafetyInfo != nullptr &&
+ CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
"Unexpected input to hoistRegion.");
- assert(((CurAST != nullptr) ^ (MSSAU != nullptr)) &&
- "Either AliasSetTracker or MemorySSA should be initialized.");
ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU);
@@ -913,8 +883,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
if (!LoopNestMode && inSubLoop(BB, CurLoop, LI))
continue;
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
- Instruction &I = *II++;
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
// Try constant folding this instruction. If all the operands are
// constants, it is technically hoistable, but it would be better to
// just fold it.
@@ -922,12 +891,10 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
&I, I.getModule()->getDataLayout(), TLI)) {
LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
<< '\n');
- if (CurAST)
- CurAST->copyValue(&I, C);
// FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
I.replaceAllUsesWith(C);
if (isInstructionTriviallyDead(&I, TLI))
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
Changed = true;
continue;
}
@@ -940,8 +907,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// and we have accurately duplicated the control flow from the loop header
// to that block.
if (CurLoop->hasLoopInvariantOperands(&I) &&
- canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, MSSAU, true, &Flags,
- ORE) &&
+ canSinkOrHoistInst(I, AA, DT, CurLoop, /*CurAST*/ nullptr, MSSAU,
+ true, &Flags, ORE) &&
worthSinkOrHoistInst(I, CurLoop->getLoopPreheader(), ORE, BFI) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
@@ -970,7 +937,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
SafetyInfo->insertInstructionTo(Product, I.getParent());
Product->insertAfter(&I);
I.replaceAllUsesWith(Product);
- eraseInstruction(I, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(I, *SafetyInfo, MSSAU);
hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB),
SafetyInfo, MSSAU, SE, ORE);
@@ -1049,7 +1016,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
}
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// Now that we've finished hoisting make sure that LI and DT are still
@@ -1101,6 +1068,10 @@ static bool isLoadInvariantInLoop(LoadInst *LI, DominatorTree *DT,
return false;
Addr = BC->getOperand(0);
}
+ // If we've ended up at a global/constant, bail. We shouldn't be looking at
+ // uselists for non-local Values in a loop pass.
+ if (isa<Constant>(Addr))
+ return false;
unsigned UsesVisited = 0;
// Traverse all uses of the load operand value, to see if invariant.start is
@@ -1273,7 +1244,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// writes to this memory in the loop, we can hoist or sink.
if (AAResults::onlyAccessesArgPointees(Behavior)) {
// TODO: expand to writeable arguments
- for (Value *Op : CI->arg_operands())
+ for (Value *Op : CI->args())
if (Op->getType()->isPointerTy()) {
bool Invalidated;
if (CurAST)
@@ -1443,7 +1414,8 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop) {
+ TargetTransformInfo *TTI, bool &FreeInLoop,
+ bool LoopNestMode) {
const auto &BlockColors = SafetyInfo->getBlockColors();
bool IsFree = isFreeInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
@@ -1460,6 +1432,15 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
if (!BlockColors.empty() &&
BlockColors.find(const_cast<BasicBlock *>(BB))->second.size() != 1)
return false;
+
+ if (LoopNestMode) {
+ while (isa<PHINode>(UI) && UI->hasOneUser() &&
+ UI->getNumOperands() == 1) {
+ if (!CurLoop->contains(UI))
+ break;
+ UI = cast<Instruction>(UI->user_back());
+ }
+ }
}
if (CurLoop->contains(UI)) {
@@ -1546,9 +1527,7 @@ static Instruction *cloneInstructionInExitBlock(
}
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
- AliasSetTracker *AST, MemorySSAUpdater *MSSAU) {
- if (AST)
- AST->deleteValue(&I);
+ MemorySSAUpdater *MSSAU) {
if (MSSAU)
MSSAU->removeMemoryAccess(&I);
SafetyInfo.removeInstruction(&I);
@@ -1599,8 +1578,7 @@ static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
// predecessor fairly simple.
if (!SafetyInfo->getBlockColors().empty() && BB->getFirstNonPHI()->isEHPad())
return false;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *BBPred = *PI;
+ for (BasicBlock *BBPred : predecessors(BB)) {
if (isa<IndirectBrInst>(BBPred->getTerminator()) ||
isa<CallBrInst>(BBPred->getTerminator()))
return false;
@@ -1786,7 +1764,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
PN->replaceAllUsesWith(New);
- eraseInstruction(*PN, *SafetyInfo, nullptr, nullptr);
+ eraseInstruction(*PN, *SafetyInfo, nullptr);
Changed = true;
}
return Changed;
@@ -1875,11 +1853,10 @@ class LoopPromoter : public LoadAndStorePromoter {
SmallVectorImpl<Instruction *> &LoopInsertPts;
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts;
PredIteratorCache &PredCache;
- AliasSetTracker *AST;
MemorySSAUpdater *MSSAU;
LoopInfo &LI;
DebugLoc DL;
- int Alignment;
+ Align Alignment;
bool UnorderedAtomic;
AAMDNodes AATags;
ICFLoopSafetyInfo &SafetyInfo;
@@ -1907,13 +1884,13 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP,
SmallVectorImpl<MemoryAccess *> &MSSAIP, PredIteratorCache &PIC,
- AliasSetTracker *ast, MemorySSAUpdater *MSSAU, LoopInfo &li,
- DebugLoc dl, int alignment, bool UnorderedAtomic,
- const AAMDNodes &AATags, ICFLoopSafetyInfo &SafetyInfo)
+ MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl,
+ Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags,
+ ICFLoopSafetyInfo &SafetyInfo)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP),
- PredCache(PIC), AST(ast), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
- Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
+ PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)),
+ Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags),
SafetyInfo(SafetyInfo) {}
bool isInstInList(Instruction *I,
@@ -1940,39 +1917,29 @@ public:
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
- NewSI->setAlignment(Align(Alignment));
+ NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
NewSI->setAAMetadata(AATags);
- if (MSSAU) {
- MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
- MemoryAccess *NewMemAcc;
- if (!MSSAInsertPoint) {
- NewMemAcc = MSSAU->createMemoryAccessInBB(
- NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
- } else {
- NewMemAcc =
- MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
- }
- MSSAInsertPts[i] = NewMemAcc;
- MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
- // FIXME: true for safety, false may still be correct.
+ MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i];
+ MemoryAccess *NewMemAcc;
+ if (!MSSAInsertPoint) {
+ NewMemAcc = MSSAU->createMemoryAccessInBB(
+ NewSI, nullptr, NewSI->getParent(), MemorySSA::Beginning);
+ } else {
+ NewMemAcc =
+ MSSAU->createMemoryAccessAfter(NewSI, nullptr, MSSAInsertPoint);
}
+ MSSAInsertPts[i] = NewMemAcc;
+ MSSAU->insertDef(cast<MemoryDef>(NewMemAcc), true);
+ // FIXME: true for safety, false may still be correct.
}
}
- void replaceLoadWithValue(LoadInst *LI, Value *V) const override {
- // Update alias analysis.
- if (AST)
- AST->copyValue(LI, V);
- }
void instructionDeleted(Instruction *I) const override {
SafetyInfo.removeInstruction(I);
- if (AST)
- AST->deleteValue(I);
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
+ MSSAU->removeMemoryAccess(I);
}
};
@@ -2023,8 +1990,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVectorImpl<Instruction *> &InsertPts,
SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
- Loop *CurLoop, AliasSetTracker *CurAST, MemorySSAUpdater *MSSAU,
- ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
+ Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
+ OptimizationRemarkEmitter *ORE) {
// Verify inputs.
assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
SafetyInfo != nullptr &&
@@ -2189,9 +2156,9 @@ bool llvm::promoteLoopAccessesToScalars(
// Merge the AA tags.
if (LoopUses.empty()) {
// On the first load/store, just take its AA tags.
- UI->getAAMetadata(AATags);
+ AATags = UI->getAAMetadata();
} else if (AATags) {
- UI->getAAMetadata(AATags, /* Merge = */ true);
+ AATags = AATags.merge(UI->getAAMetadata());
}
LoopUses.push_back(UI);
@@ -2256,9 +2223,8 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
- InsertPts, MSSAInsertPts, PIC, CurAST, MSSAU, *LI, DL,
- Alignment.value(), SawUnorderedAtomic, AATags,
- *SafetyInfo);
+ InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL,
+ Alignment, SawUnorderedAtomic, AATags, *SafetyInfo);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
@@ -2273,24 +2239,22 @@ bool llvm::promoteLoopAccessesToScalars(
PreheaderLoad->setAAMetadata(AATags);
SSA.AddAvailableValue(Preheader, PreheaderLoad);
- if (MSSAU) {
- MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
- PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
- MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
- MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
- }
+ MemoryAccess *PreheaderLoadMemoryAccess = MSSAU->createMemoryAccessInBB(
+ PreheaderLoad, nullptr, PreheaderLoad->getParent(), MemorySSA::End);
+ MemoryUse *NewMemUse = cast<MemoryUse>(PreheaderLoadMemoryAccess);
+ MSSAU->insertUse(NewMemUse, /*RenameUses=*/true);
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// Rewrite all the loads in the loop and remember all the definitions from
// stores in the loop.
Promoter.run(LoopUses);
- if (MSSAU && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
- eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);
+ eraseInstruction(*PreheaderLoad, *SafetyInfo, MSSAU);
return true;
}
@@ -2356,71 +2320,10 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
return Result;
}
-/// Returns an owning pointer to an alias set which incorporates aliasing info
-/// from L and all subloops of L.
-std::unique_ptr<AliasSetTracker>
-LoopInvariantCodeMotion::collectAliasInfoForLoop(Loop *L, LoopInfo *LI,
- AAResults *AA) {
- auto CurAST = std::make_unique<AliasSetTracker>(*AA);
-
- // Add everything from all the sub loops.
- for (Loop *InnerL : L->getSubLoops())
- for (BasicBlock *BB : InnerL->blocks())
- CurAST->add(*BB);
-
- // And merge in this loop (without anything from inner loops).
- for (BasicBlock *BB : L->blocks())
- if (LI->getLoopFor(BB) == L)
- CurAST->add(*BB);
-
- return CurAST;
-}
-
static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
AliasSetTracker *CurAST, Loop *CurLoop,
AAResults *AA) {
- // First check to see if any of the basic blocks in CurLoop invalidate *V.
- bool isInvalidatedAccordingToAST = CurAST->getAliasSetFor(MemLoc).isMod();
-
- if (!isInvalidatedAccordingToAST || !LICMN2Theshold)
- return isInvalidatedAccordingToAST;
-
- // Check with a diagnostic analysis if we can refine the information above.
- // This is to identify the limitations of using the AST.
- // The alias set mechanism used by LICM has a major weakness in that it
- // combines all things which may alias into a single set *before* asking
- // modref questions. As a result, a single readonly call within a loop will
- // collapse all loads and stores into a single alias set and report
- // invalidation if the loop contains any store. For example, readonly calls
- // with deopt states have this form and create a general alias set with all
- // loads and stores. In order to get any LICM in loops containing possible
- // deopt states we need a more precise invalidation of checking the mod ref
- // info of each instruction within the loop and LI. This has a complexity of
- // O(N^2), so currently, it is used only as a diagnostic tool since the
- // default value of LICMN2Threshold is zero.
-
- // Don't look at nested loops.
- if (CurLoop->begin() != CurLoop->end())
- return true;
-
- int N = 0;
- for (BasicBlock *BB : CurLoop->getBlocks())
- for (Instruction &I : *BB) {
- if (N >= LICMN2Theshold) {
- LLVM_DEBUG(dbgs() << "Alasing N2 threshold exhausted for "
- << *(MemLoc.Ptr) << "\n");
- return true;
- }
- N++;
- auto Res = AA->getModRefInfo(&I, MemLoc);
- if (isModSet(Res)) {
- LLVM_DEBUG(dbgs() << "Aliasing failed on " << I << " for "
- << *(MemLoc.Ptr) << "\n");
- return true;
- }
- }
- LLVM_DEBUG(dbgs() << "Aliasing okay for " << *(MemLoc.Ptr) << "\n");
- return false;
+ return CurAST->getAliasSetFor(MemLoc).isMod();
}
bool pointerInvalidatedByLoopWithMSSA(MemorySSA *MSSA, MemoryUse *MU,
diff --git a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
index 993b154dc9a8..d438d56e38ca 100644
--- a/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopBoundSplit.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -39,10 +40,12 @@ struct ConditionInfo {
ICmpInst::Predicate Pred;
/// AddRec llvm value
Value *AddRecValue;
+ /// Non PHI AddRec llvm value
+ Value *NonPHIAddRecValue;
/// Bound llvm value
Value *BoundValue;
/// AddRec SCEV
- const SCEV *AddRecSCEV;
+ const SCEVAddRecExpr *AddRecSCEV;
/// Bound SCEV
const SCEV *BoundSCEV;
@@ -54,19 +57,31 @@ struct ConditionInfo {
} // namespace
static void analyzeICmp(ScalarEvolution &SE, ICmpInst *ICmp,
- ConditionInfo &Cond) {
+ ConditionInfo &Cond, const Loop &L) {
Cond.ICmp = ICmp;
if (match(ICmp, m_ICmp(Cond.Pred, m_Value(Cond.AddRecValue),
m_Value(Cond.BoundValue)))) {
- Cond.AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
- Cond.BoundSCEV = SE.getSCEV(Cond.BoundValue);
+ const SCEV *AddRecSCEV = SE.getSCEV(Cond.AddRecValue);
+ const SCEV *BoundSCEV = SE.getSCEV(Cond.BoundValue);
+ const SCEVAddRecExpr *LHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+ const SCEVAddRecExpr *RHSAddRecSCEV = dyn_cast<SCEVAddRecExpr>(BoundSCEV);
// Locate AddRec in LHSSCEV and Bound in RHSSCEV.
- if (isa<SCEVAddRecExpr>(Cond.BoundSCEV) &&
- !isa<SCEVAddRecExpr>(Cond.AddRecSCEV)) {
+ if (!LHSAddRecSCEV && RHSAddRecSCEV) {
std::swap(Cond.AddRecValue, Cond.BoundValue);
- std::swap(Cond.AddRecSCEV, Cond.BoundSCEV);
+ std::swap(AddRecSCEV, BoundSCEV);
Cond.Pred = ICmpInst::getSwappedPredicate(Cond.Pred);
}
+
+ Cond.AddRecSCEV = dyn_cast<SCEVAddRecExpr>(AddRecSCEV);
+ Cond.BoundSCEV = BoundSCEV;
+ Cond.NonPHIAddRecValue = Cond.AddRecValue;
+
+ // If the Cond.AddRecValue is PHI node, update Cond.NonPHIAddRecValue with
+ // value from backedge.
+ if (Cond.AddRecSCEV && isa<PHINode>(Cond.AddRecValue)) {
+ PHINode *PN = cast<PHINode>(Cond.AddRecValue);
+ Cond.NonPHIAddRecValue = PN->getIncomingValueForBlock(L.getLoopLatch());
+ }
}
}
@@ -118,21 +133,20 @@ static bool calculateUpperBound(const Loop &L, ScalarEvolution &SE,
static bool hasProcessableCondition(const Loop &L, ScalarEvolution &SE,
ICmpInst *ICmp, ConditionInfo &Cond,
bool IsExitCond) {
- analyzeICmp(SE, ICmp, Cond);
+ analyzeICmp(SE, ICmp, Cond, L);
// The BoundSCEV should be evaluated at loop entry.
if (!SE.isAvailableAtLoopEntry(Cond.BoundSCEV, &L))
return false;
- const SCEVAddRecExpr *AddRecSCEV = dyn_cast<SCEVAddRecExpr>(Cond.AddRecSCEV);
// Allowed AddRec as induction variable.
- if (!AddRecSCEV)
+ if (!Cond.AddRecSCEV)
return false;
- if (!AddRecSCEV->isAffine())
+ if (!Cond.AddRecSCEV->isAffine())
return false;
- const SCEV *StepRecSCEV = AddRecSCEV->getStepRecurrence(SE);
+ const SCEV *StepRecSCEV = Cond.AddRecSCEV->getStepRecurrence(SE);
// Allowed constant step.
if (!isa<SCEVConstant>(StepRecSCEV))
return false;
@@ -264,6 +278,14 @@ static BranchInst *findSplitCandidate(const Loop &L, ScalarEvolution &SE,
SplitCandidateCond.BoundSCEV->getType())
continue;
+ // After transformation, we assume the split condition of the pre-loop is
+ // always true. In order to guarantee it, we need to check the start value
+ // of the split cond AddRec satisfies the split condition.
+ if (!SE.isLoopEntryGuardedByCond(&L, SplitCandidateCond.Pred,
+ SplitCandidateCond.AddRecSCEV->getStart(),
+ SplitCandidateCond.BoundSCEV))
+ continue;
+
SplitCandidateCond.BI = BI;
return BI;
}
@@ -341,13 +363,45 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
".split", &LI, &DT, PostLoopBlocks);
remapInstructionsInBlocks(PostLoopBlocks, VMap);
- // Add conditional branch to check we can skip post-loop in its preheader.
BasicBlock *PostLoopPreHeader = PostLoop->getLoopPreheader();
- IRBuilder<> Builder(PostLoopPreHeader);
+ IRBuilder<> Builder(&PostLoopPreHeader->front());
+
+ // Update phi nodes in header of post-loop.
+ bool isExitingLatch =
+ (L.getExitingBlock() == L.getLoopLatch()) ? true : false;
+ Value *ExitingCondLCSSAPhi = nullptr;
+ for (PHINode &PN : L.getHeader()->phis()) {
+ // Create LCSSA phi node in preheader of post-loop.
+ PHINode *LCSSAPhi =
+ Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+ LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+ // If the exiting block is loop latch, the phi does not have the update at
+ // last iteration. In this case, update lcssa phi with value from backedge.
+ LCSSAPhi->addIncoming(
+ isExitingLatch ? PN.getIncomingValueForBlock(L.getLoopLatch()) : &PN,
+ L.getExitingBlock());
+
+ // Update the start value of phi node in post-loop with the LCSSA phi node.
+ PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
+ PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader, LCSSAPhi);
+
+ // Find PHI with exiting condition from pre-loop. The PHI should be
+ // SCEVAddRecExpr and have same incoming value from backedge with
+ // ExitingCond.
+ if (!SE.isSCEVable(PN.getType()))
+ continue;
+
+ const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
+ if (PhiSCEV && ExitingCond.NonPHIAddRecValue ==
+ PN.getIncomingValueForBlock(L.getLoopLatch()))
+ ExitingCondLCSSAPhi = LCSSAPhi;
+ }
+
+ // Add conditional branch to check we can skip post-loop in its preheader.
Instruction *OrigBI = PostLoopPreHeader->getTerminator();
ICmpInst::Predicate Pred = ICmpInst::ICMP_NE;
Value *Cond =
- Builder.CreateICmp(Pred, ExitingCond.AddRecValue, ExitingCond.BoundValue);
+ Builder.CreateICmp(Pred, ExitingCondLCSSAPhi, ExitingCond.BoundValue);
Builder.CreateCondBr(Cond, PostLoop->getHeader(), PostLoop->getExitBlock());
OrigBI->eraseFromParent();
@@ -368,21 +422,6 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
// Replace exiting bound value of pre-loop NewBound.
ExitingCond.ICmp->setOperand(1, NewBoundValue);
- // Replace IV's start value of post-loop by NewBound.
- for (PHINode &PN : L.getHeader()->phis()) {
- // Find PHI with exiting condition from pre-loop.
- if (SE.isSCEVable(PN.getType()) && isa<SCEVAddRecExpr>(SE.getSCEV(&PN))) {
- for (Value *Op : PN.incoming_values()) {
- if (Op == ExitingCond.AddRecValue) {
- // Find cloned PHI for post-loop.
- PHINode *PostLoopPN = cast<PHINode>(VMap[&PN]);
- PostLoopPN->setIncomingValueForBlock(PostLoopPreHeader,
- NewBoundValue);
- }
- }
- }
- }
-
// Replace SplitCandidateCond.BI's condition of pre-loop by True.
LLVMContext &Context = PreHeader->getContext();
SplitCandidateCond.BI->setCondition(ConstantInt::getTrue(Context));
@@ -398,6 +437,30 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
else
ExitingCond.BI->setSuccessor(1, PostLoopPreHeader);
+ // Update phi node in exit block of post-loop.
+ Builder.SetInsertPoint(&PostLoopPreHeader->front());
+ for (PHINode &PN : PostLoop->getExitBlock()->phis()) {
+ for (auto i : seq<int>(0, PN.getNumOperands())) {
+ // Check incoming block is pre-loop's exiting block.
+ if (PN.getIncomingBlock(i) == L.getExitingBlock()) {
+ Value *IncomingValue = PN.getIncomingValue(i);
+
+ // Create LCSSA phi node for incoming value.
+ PHINode *LCSSAPhi =
+ Builder.CreatePHI(PN.getType(), 1, PN.getName() + ".lcssa");
+ LCSSAPhi->setDebugLoc(PN.getDebugLoc());
+ LCSSAPhi->addIncoming(IncomingValue, PN.getIncomingBlock(i));
+
+ // Replace pre-loop's exiting block by post-loop's preheader.
+ PN.setIncomingBlock(i, PostLoopPreHeader);
+ // Replace incoming value by LCSSAPhi.
+ PN.setIncomingValue(i, LCSSAPhi);
+ // Add a new incoming value with post-loop's exiting block.
+ PN.addIncoming(VMap[IncomingValue], PostLoop->getExitingBlock());
+ }
+ }
+ }
+
// Update dominator tree.
DT.changeImmediateDominator(PostLoopPreHeader, L.getExitingBlock());
DT.changeImmediateDominator(PostLoop->getExitBlock(), PostLoopPreHeader);
@@ -406,10 +469,7 @@ static bool splitLoopBound(Loop &L, DominatorTree &DT, LoopInfo &LI,
SE.forgetLoop(&L);
// Canonicalize loops.
- // TODO: Try to update LCSSA information according to above change.
- formLCSSA(L, DT, &LI, &SE);
simplifyLoop(&L, &DT, &LI, &SE, nullptr, nullptr, true);
- formLCSSA(*PostLoop, DT, &LI, &SE);
simplifyLoop(PostLoop, &DT, &LI, &SE, nullptr, nullptr, true);
// Add new post-loop to loop pass manager.
diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index a5d7835bd094..77d76609c926 100644
--- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -127,6 +128,8 @@ public:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
@@ -143,6 +146,7 @@ INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index f7e8442fae81..5814e2f043d5 100644
--- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -36,6 +36,8 @@ using namespace llvm;
#define DEBUG_TYPE "loop-delete"
STATISTIC(NumDeleted, "Number of loops deleted");
+STATISTIC(NumBackedgesBroken,
+ "Number of loops for which we managed to break the backedge");
static cl::opt<bool> EnableSymbolicExecution(
"loop-deletion-enable-symbolic-execution", cl::Hidden, cl::init(true),
@@ -191,6 +193,20 @@ getValueOnFirstIteration(Value *V, DenseMap<Value *, Value *> &FirstIterValue,
Value *RHS =
getValueOnFirstIteration(BO->getOperand(1), FirstIterValue, SQ);
FirstIterV = SimplifyBinOp(BO->getOpcode(), LHS, RHS, SQ);
+ } else if (auto *Cmp = dyn_cast<ICmpInst>(V)) {
+ Value *LHS =
+ getValueOnFirstIteration(Cmp->getOperand(0), FirstIterValue, SQ);
+ Value *RHS =
+ getValueOnFirstIteration(Cmp->getOperand(1), FirstIterValue, SQ);
+ FirstIterV = SimplifyICmpInst(Cmp->getPredicate(), LHS, RHS, SQ);
+ } else if (auto *Select = dyn_cast<SelectInst>(V)) {
+ Value *Cond =
+ getValueOnFirstIteration(Select->getCondition(), FirstIterValue, SQ);
+ if (auto *C = dyn_cast<ConstantInt>(Cond)) {
+ auto *Selected = C->isAllOnesValue() ? Select->getTrueValue()
+ : Select->getFalseValue();
+ FirstIterV = getValueOnFirstIteration(Selected, FirstIterValue, SQ);
+ }
}
if (!FirstIterV)
FirstIterV = V;
@@ -314,22 +330,20 @@ static bool canProveExitOnFirstIteration(Loop *L, DominatorTree &DT,
}
using namespace PatternMatch;
- ICmpInst::Predicate Pred;
- Value *LHS, *RHS;
+ Value *Cond;
BasicBlock *IfTrue, *IfFalse;
auto *Term = BB->getTerminator();
- if (match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+ if (match(Term, m_Br(m_Value(Cond),
m_BasicBlock(IfTrue), m_BasicBlock(IfFalse)))) {
- if (!LHS->getType()->isIntegerTy()) {
+ auto *ICmp = dyn_cast<ICmpInst>(Cond);
+ if (!ICmp || !ICmp->getType()->isIntegerTy()) {
MarkAllSuccessorsLive(BB);
continue;
}
// Can we prove constant true or false for this condition?
- LHS = getValueOnFirstIteration(LHS, FirstIterValue, SQ);
- RHS = getValueOnFirstIteration(RHS, FirstIterValue, SQ);
- auto *KnownCondition = SimplifyICmpInst(Pred, LHS, RHS, SQ);
- if (!KnownCondition) {
+ auto *KnownCondition = getValueOnFirstIteration(ICmp, FirstIterValue, SQ);
+ if (KnownCondition == ICmp) {
// Failed to simplify.
MarkAllSuccessorsLive(BB);
continue;
@@ -393,14 +407,25 @@ breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
if (!L->getLoopLatch())
return LoopDeletionResult::Unmodified;
- auto *BTC = SE.getBackedgeTakenCount(L);
- if (!isa<SCEVCouldNotCompute>(BTC) && SE.isKnownNonZero(BTC))
- return LoopDeletionResult::Unmodified;
- if (!BTC->isZero() && !canProveExitOnFirstIteration(L, DT, LI))
- return LoopDeletionResult::Unmodified;
+ auto *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+ if (BTC->isZero()) {
+ // SCEV knows this backedge isn't taken!
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ ++NumBackedgesBroken;
+ return LoopDeletionResult::Deleted;
+ }
- breakLoopBackedge(L, DT, SE, LI, MSSA);
- return LoopDeletionResult::Deleted;
+ // If SCEV leaves open the possibility of a zero trip count, see if
+ // symbolically evaluating the first iteration lets us prove the backedge
+ // unreachable.
+ if (isa<SCEVCouldNotCompute>(BTC) || !SE.isKnownNonZero(BTC))
+ if (canProveExitOnFirstIteration(L, DT, LI)) {
+ breakLoopBackedge(L, DT, SE, LI, MSSA);
+ ++NumBackedgesBroken;
+ return LoopDeletionResult::Deleted;
+ }
+
+ return LoopDeletionResult::Unmodified;
}
/// Remove a loop if it is dead.
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index bac3dc0f3fb9..0f4c767c1e4c 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -1057,8 +1057,8 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, nullptr};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index f54289f85ef5..965d1575518e 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -27,6 +27,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/LoopFlatten.h"
+
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -49,11 +51,13 @@
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#define DEBUG_TYPE "loop-flatten"
-
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "loop-flatten"
+
+STATISTIC(NumFlattened, "Number of loops flattened");
+
static cl::opt<unsigned> RepeatedInstructionThreshold(
"loop-flatten-cost-threshold", cl::Hidden, cl::init(2),
cl::desc("Limit on the cost of instructions that can be repeated due to "
@@ -90,9 +94,33 @@ struct FlattenInfo {
// Whether this holds the flatten info before or after widening.
bool Widened = false;
+ // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
+ // been applied. This bookkeeping is used so we can skip some checks on these
+ // phi nodes.
+ PHINode *NarrowInnerInductionPHI = nullptr;
+ PHINode *NarrowOuterInductionPHI = nullptr;
+
FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL) {};
+
+ bool isNarrowInductionPhi(PHINode *Phi) {
+ // This can't be the narrow phi if we haven't widened the IV first.
+ if (!Widened)
+ return false;
+ return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
+ }
};
+static bool
+setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
+ SmallPtrSetImpl<Instruction *> &IterationInstructions) {
+ TripCount = TC;
+ IterationInstructions.insert(Increment);
+ LLVM_DEBUG(dbgs() << "Found Increment: "; Increment->dump());
+ LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
+ LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
+ return true;
+}
+
// Finds the induction variable, increment and trip count for a simple loop that
// we can flatten.
static bool findLoopComponents(
@@ -164,36 +192,68 @@ static bool findLoopComponents(
return false;
}
// The trip count is the RHS of the compare. If this doesn't match the trip
- // count computed by SCEV then this is either because the trip count variable
- // has been widened (then leave the trip count as it is), or because it is a
- // constant and another transformation has changed the compare, e.g.
- // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, then we don't flatten
- // the loop (yet).
- TripCount = Compare->getOperand(1);
+ // count computed by SCEV then this is because the trip count variable
+ // has been widened so the types don't match, or because it is a constant and
+ // another transformation has changed the compare (e.g. icmp ult %inc,
+ // tripcount -> icmp ult %j, tripcount-1), or both.
+ Value *RHS = Compare->getOperand(1);
+ const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+ LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
+ return false;
+ }
+ // The use of the Extend=false flag on getTripCountFromExitCount was added
+ // during a refactoring to preserve existing behavior. However, there's
+ // nothing obvious in the surrounding code when handles the overflow case.
+ // FIXME: audit code to establish whether there's a latent bug here.
const SCEV *SCEVTripCount =
- SE->getTripCountFromExitCount(SE->getBackedgeTakenCount(L));
- if (SE->getSCEV(TripCount) != SCEVTripCount) {
- if (!IsWidened) {
- LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
- return false;
- }
- auto TripCountInst = dyn_cast<Instruction>(TripCount);
- if (!TripCountInst) {
- LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
- return false;
+ SE->getTripCountFromExitCount(BackedgeTakenCount, false);
+ const SCEV *SCEVRHS = SE->getSCEV(RHS);
+ if (SCEVRHS == SCEVTripCount)
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+ ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
+ if (ConstantRHS) {
+ const SCEV *BackedgeTCExt = nullptr;
+ if (IsWidened) {
+ const SCEV *SCEVTripCountExt;
+ // Find the extended backedge taken count and extended trip count using
+ // SCEV. One of these should now match the RHS of the compare.
+ BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
+ SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
+ if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
}
- if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
- SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
- LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
- return false;
+ // If the RHS of the compare is equal to the backedge taken count we need
+ // to add one to get the trip count.
+ if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
+ ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
+ Value *NewRHS = ConstantInt::get(
+ ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+ return setLoopComponents(NewRHS, TripCount, Increment,
+ IterationInstructions);
}
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
}
- IterationInstructions.insert(Increment);
- LLVM_DEBUG(dbgs() << "Found increment: "; Increment->dump());
- LLVM_DEBUG(dbgs() << "Found trip count: "; TripCount->dump());
-
- LLVM_DEBUG(dbgs() << "Successfully found all loop components\n");
- return true;
+ // If the RHS isn't a constant then check that the reason it doesn't match
+ // the SCEV trip count is because the RHS is a ZExt or SExt instruction
+ // (and take the trip count to be the RHS).
+ if (!IsWidened) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
+ auto *TripCountInst = dyn_cast<Instruction>(RHS);
+ if (!TripCountInst) {
+ LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+ return false;
+ }
+ if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
+ SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
+ LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
+ return false;
+ }
+ return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
}
static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
@@ -221,6 +281,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
// them specially when doing the transformation.
if (&InnerPHI == FI.InnerInductionPHI)
continue;
+ if (FI.isNarrowInductionPhi(&InnerPHI))
+ continue;
// Each inner loop PHI node must have two incoming values/blocks - one
// from the pre-header, and one from the latch.
@@ -266,6 +328,8 @@ static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
}
for (PHINode &OuterPHI : FI.OuterLoop->getHeader()->phis()) {
+ if (FI.isNarrowInductionPhi(&OuterPHI))
+ continue;
if (!SafeOuterPHIs.count(&OuterPHI)) {
LLVM_DEBUG(dbgs() << "found unsafe PHI in outer loop: "; OuterPHI.dump());
return false;
@@ -356,18 +420,25 @@ static bool checkIVUsers(FlattenInfo &FI) {
if (U == FI.InnerIncrement)
continue;
- // After widening the IVs, a trunc instruction might have been introduced, so
- // look through truncs.
+ // After widening the IVs, a trunc instruction might have been introduced,
+ // so look through truncs.
if (isa<TruncInst>(U)) {
if (!U->hasOneUse())
return false;
U = *U->user_begin();
}
+ // If the use is in the compare (which is also the condition of the inner
+ // branch) then the compare has been altered by another transformation e.g
+ // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
+ // a constant. Ignore this use as the compare gets removed later anyway.
+ if (U == FI.InnerBranch->getCondition())
+ continue;
+
LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
- Value *MatchedMul;
- Value *MatchedItCount;
+ Value *MatchedMul = nullptr;
+ Value *MatchedItCount = nullptr;
bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
m_Value(MatchedMul))) &&
match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
@@ -375,11 +446,23 @@ static bool checkIVUsers(FlattenInfo &FI) {
// Matches the same pattern as above, except it also looks for truncs
// on the phi, which can be the result of widening the induction variables.
- bool IsAddTrunc = match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
- m_Value(MatchedMul))) &&
- match(MatchedMul,
- m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
- m_Value(MatchedItCount)));
+ bool IsAddTrunc =
+ match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
+ m_Value(MatchedMul))) &&
+ match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
+ m_Value(MatchedItCount)));
+
+ if (!MatchedItCount)
+ return false;
+ // Look through extends if the IV has been widened.
+ if (FI.Widened &&
+ (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
+ assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
+ "Unexpected type mismatch in types after widening");
+ MatchedItCount = isa<SExtInst>(MatchedItCount)
+ ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
+ : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
+ }
if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
LLVM_DEBUG(dbgs() << "Use is optimisable\n");
@@ -451,17 +534,27 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
for (Value *V : FI.LinearIVUses) {
for (Value *U : V->users()) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
- // The IV is used as the operand of a GEP, and the IV is at least as
- // wide as the address space of the GEP. In this case, the GEP would
- // wrap around the address space before the IV increment wraps, which
- // would be UB.
- if (GEP->isInBounds() &&
- V->getType()->getIntegerBitWidth() >=
- DL.getPointerTypeSizeInBits(GEP->getType())) {
- LLVM_DEBUG(
- dbgs() << "use of linear IV would be UB if overflow occurred: ";
- GEP->dump());
- return OverflowResult::NeverOverflows;
+ for (Value *GEPUser : U->users()) {
+ Instruction *GEPUserInst = dyn_cast<Instruction>(GEPUser);
+ if (!isa<LoadInst>(GEPUserInst) &&
+ !(isa<StoreInst>(GEPUserInst) &&
+ GEP == GEPUserInst->getOperand(1)))
+ continue;
+ if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst,
+ FI.InnerLoop))
+ continue;
+ // The IV is used as the operand of a GEP which dominates the loop
+ // latch, and the IV is at least as wide as the address space of the
+ // GEP. In this case, the GEP would wrap around the address space
+ // before the IV increment wraps, which would be UB.
+ if (GEP->isInBounds() &&
+ V->getType()->getIntegerBitWidth() >=
+ DL.getPointerTypeSizeInBits(GEP->getType())) {
+ LLVM_DEBUG(
+ dbgs() << "use of linear IV would be UB if overflow occurred: ";
+ GEP->dump());
+ return OverflowResult::NeverOverflows;
+ }
}
}
}
@@ -518,7 +611,7 @@ static bool CanFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI, LPMUpdater *U) {
Function *F = FI.OuterLoop->getHeader()->getParent();
LLVM_DEBUG(dbgs() << "Checks all passed, doing the transformation\n");
{
@@ -574,7 +667,13 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
// deleted, and any information that have about the outer loop invalidated.
SE->forgetLoop(FI.OuterLoop);
SE->forgetLoop(FI.InnerLoop);
+ if (U)
+ U->markLoopAsDeleted(*FI.InnerLoop, FI.InnerLoop->getName());
LI->erase(FI.InnerLoop);
+
+ // Increment statistic value.
+ NumFlattened++;
+
return true;
}
@@ -605,14 +704,11 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
}
SCEVExpander Rewriter(*SE, DL, "loopflatten");
- SmallVector<WideIVInfo, 2> WideIVs;
SmallVector<WeakTrackingVH, 4> DeadInsts;
- WideIVs.push_back( {FI.InnerInductionPHI, MaxLegalType, false });
- WideIVs.push_back( {FI.OuterInductionPHI, MaxLegalType, false });
unsigned ElimExt = 0;
unsigned Widened = 0;
- for (const auto &WideIV : WideIVs) {
+ auto CreateWideIV = [&] (WideIVInfo WideIV, bool &Deleted) -> bool {
PHINode *WidePhi = createWideIV(WideIV, LI, SE, Rewriter, DT, DeadInsts,
ElimExt, Widened, true /* HasGuards */,
true /* UsePostIncrementRanges */);
@@ -620,17 +716,35 @@ static bool CanWidenIV(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
return false;
LLVM_DEBUG(dbgs() << "Created wide phi: "; WidePhi->dump());
LLVM_DEBUG(dbgs() << "Deleting old phi: "; WideIV.NarrowIV->dump());
- RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
- }
- // After widening, rediscover all the loop components.
+ Deleted = RecursivelyDeleteDeadPHINode(WideIV.NarrowIV);
+ return true;
+ };
+
+ bool Deleted;
+ if (!CreateWideIV({FI.InnerInductionPHI, MaxLegalType, false }, Deleted))
+ return false;
+ // Add the narrow phi to list, so that it will be adjusted later when the
+ // the transformation is performed.
+ if (!Deleted)
+ FI.InnerPHIsToTransform.insert(FI.InnerInductionPHI);
+
+ if (!CreateWideIV({FI.OuterInductionPHI, MaxLegalType, false }, Deleted))
+ return false;
+
assert(Widened && "Widened IV expected");
FI.Widened = true;
+
+ // Save the old/narrow induction phis, which we need to ignore in CheckPHIs.
+ FI.NarrowInnerInductionPHI = FI.InnerInductionPHI;
+ FI.NarrowOuterInductionPHI = FI.OuterInductionPHI;
+
+ // After widening, rediscover all the loop components.
return CanFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
}
static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
ScalarEvolution *SE, AssumptionCache *AC,
- const TargetTransformInfo *TTI) {
+ const TargetTransformInfo *TTI, LPMUpdater *U) {
LLVM_DEBUG(
dbgs() << "Loop flattening running on outer loop "
<< FI.OuterLoop->getHeader()->getName() << " and inner loop "
@@ -641,12 +755,30 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
return false;
// Check if we can widen the induction variables to avoid overflow checks.
- if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
-
- // Check if the new iteration variable might overflow. In this case, we
- // need to version the loop, and select the original version at runtime if
- // the iteration space is too large.
+ bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
+
+ // It can happen that after widening of the IV, flattening may not be
+ // possible/happening, e.g. when it is deemed unprofitable. So bail here if
+ // that is the case.
+ // TODO: IV widening without performing the actual flattening transformation
+ // is not ideal. While this codegen change should not matter much, it is an
+ // unnecessary change which is better to avoid. It's unlikely this happens
+ // often, because if it's unprofitibale after widening, it should be
+ // unprofitabe before widening as checked in the first round of checks. But
+ // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
+ // relaxed. Because this is making a code change (the IV widening, but not
+ // the flattening), we return true here.
+ if (FI.Widened && !CanFlatten)
+ return true;
+
+ // If we have widened and can perform the transformation, do that here.
+ if (CanFlatten)
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
+
+ // Otherwise, if we haven't widened the IV, check if the new iteration
+ // variable might overflow. In this case, we need to version the loop, and
+ // select the original version at runtime if the iteration space is too
+ // large.
// TODO: We currently don't version the loop.
OverflowResult OR = checkOverflow(FI, DT, AC);
if (OR == OverflowResult::AlwaysOverflowsHigh ||
@@ -659,18 +791,18 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
}
LLVM_DEBUG(dbgs() << "Multiply cannot overflow, modifying loop in-place\n");
- return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
}
bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, TargetTransformInfo *TTI) {
+ AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U) {
bool Changed = false;
for (Loop *InnerLoop : LN.getLoops()) {
auto *OuterLoop = InnerLoop->getParentLoop();
if (!OuterLoop)
continue;
FlattenInfo FI(OuterLoop, InnerLoop);
- Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI);
+ Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U);
}
return Changed;
}
@@ -685,12 +817,12 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// in simplified form, and also needs LCSSA. Running
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
- Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI);
+ Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U);
if (!Changed)
return PreservedAnalyses::all();
- return PreservedAnalyses::none();
+ return getLoopPassPreservedAnalyses();
}
namespace {
@@ -735,7 +867,7 @@ bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
bool Changed = false;
for (Loop *L : *LI) {
auto LN = LoopNest::getLoopNest(*L, *SE);
- Changed |= Flatten(*LN, DT, LI, SE, AC, TTI);
+ Changed |= Flatten(*LN, DT, LI, SE, AC, TTI, nullptr);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3d60e205b002..42da86a9ecf5 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -217,15 +217,15 @@ private:
bool processLoopMemCpy(MemCpyInst *MCI, const SCEV *BECount);
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
- bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+ bool processLoopStridedStore(Value *DestPtr, const SCEV *StoreSizeSCEV,
MaybeAlign StoreAlignment, Value *StoredVal,
Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores,
const SCEVAddRecExpr *Ev, const SCEV *BECount,
- bool NegStride, bool IsLoopMemset = false);
+ bool IsNegStride, bool IsLoopMemset = false);
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
bool processLoopStoreOfLoopLoad(Value *DestPtr, Value *SourcePtr,
- unsigned StoreSize, MaybeAlign StoreAlign,
+ const SCEV *StoreSize, MaybeAlign StoreAlign,
MaybeAlign LoadAlign, Instruction *TheStore,
Instruction *TheLoad,
const SCEVAddRecExpr *StoreEv,
@@ -625,8 +625,8 @@ bool LoopIdiomRecognize::runOnLoopBlock(
// We can only promote stores in this block if they are unconditionally
// executed in the loop. For a block to be unconditionally executed, it has
// to dominate all the exit blocks of the loop. Verify this now.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!DT->dominates(BB, ExitBlocks[i]))
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(BB, ExitBlock))
return false;
bool MadeChange = false;
@@ -750,16 +750,13 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
bool Changed = false;
// For stores that start but don't end a link in the chain:
- for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
- it != e; ++it) {
- if (Tails.count(*it))
+ for (StoreInst *I : Heads) {
+ if (Tails.count(I))
continue;
// We found a store instr that starts a chain. Now follow the chain and try
// to transform it.
SmallPtrSet<Instruction *, 8> AdjacentStores;
- StoreInst *I = *it;
-
StoreInst *HeadStore = I;
unsigned StoreSize = 0;
@@ -784,12 +781,14 @@ bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
if (StoreSize != Stride && StoreSize != -Stride)
continue;
- bool NegStride = StoreSize == -Stride;
+ bool IsNegStride = StoreSize == -Stride;
- if (processLoopStridedStore(StorePtr, StoreSize,
+ Type *IntIdxTy = DL->getIndexType(StorePtr->getType());
+ const SCEV *StoreSizeSCEV = SE->getConstant(IntIdxTy, StoreSize);
+ if (processLoopStridedStore(StorePtr, StoreSizeSCEV,
MaybeAlign(HeadStore->getAlignment()),
StoredVal, HeadStore, AdjacentStores, StoreEv,
- BECount, NegStride)) {
+ BECount, IsNegStride)) {
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
Changed = true;
}
@@ -857,15 +856,15 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
// Check if the stride matches the size of the memcpy. If so, then we know
// that every byte is touched in the loop.
- const SCEVConstant *StoreStride =
+ const SCEVConstant *ConstStoreStride =
dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
- const SCEVConstant *LoadStride =
+ const SCEVConstant *ConstLoadStride =
dyn_cast<SCEVConstant>(LoadEv->getOperand(1));
- if (!StoreStride || !LoadStride)
+ if (!ConstStoreStride || !ConstLoadStride)
return false;
- APInt StoreStrideValue = StoreStride->getAPInt();
- APInt LoadStrideValue = LoadStride->getAPInt();
+ APInt StoreStrideValue = ConstStoreStride->getAPInt();
+ APInt LoadStrideValue = ConstLoadStride->getAPInt();
// Huge stride value - give up
if (StoreStrideValue.getBitWidth() > 64 || LoadStrideValue.getBitWidth() > 64)
return false;
@@ -875,7 +874,7 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
return OptimizationRemarkMissed(DEBUG_TYPE, "SizeStrideUnequal", MCI)
<< ore::NV("Inst", "memcpy") << " in "
<< ore::NV("Function", MCI->getFunction())
- << " function will not be hoised: "
+ << " function will not be hoisted: "
<< ore::NV("Reason", "memcpy size is not equal to stride");
});
return false;
@@ -887,16 +886,17 @@ bool LoopIdiomRecognize::processLoopMemCpy(MemCpyInst *MCI,
if (StoreStrideInt != LoadStrideInt)
return false;
- return processLoopStoreOfLoopLoad(Dest, Source, (unsigned)SizeInBytes,
- MCI->getDestAlign(), MCI->getSourceAlign(),
- MCI, MCI, StoreEv, LoadEv, BECount);
+ return processLoopStoreOfLoopLoad(
+ Dest, Source, SE->getConstant(Dest->getType(), SizeInBytes),
+ MCI->getDestAlign(), MCI->getSourceAlign(), MCI, MCI, StoreEv, LoadEv,
+ BECount);
}
/// processLoopMemSet - See if this memset can be promoted to a large memset.
bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
const SCEV *BECount) {
- // We can only handle non-volatile memsets with a constant size.
- if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+ // We can only handle non-volatile memsets.
+ if (MSI->isVolatile())
return false;
// If we're not allowed to hack on memset, we fail.
@@ -909,23 +909,72 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
- if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ if (!Ev || Ev->getLoop() != CurLoop)
return false;
-
- // Reject memsets that are so large that they overflow an unsigned.
- uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
- if ((SizeInBytes >> 32) != 0)
+ if (!Ev->isAffine()) {
+ LLVM_DEBUG(dbgs() << " Pointer is not affine, abort\n");
return false;
+ }
- // Check to see if the stride matches the size of the memset. If so, then we
- // know that every byte is touched in the loop.
- const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
- if (!ConstStride)
+ const SCEV *PointerStrideSCEV = Ev->getOperand(1);
+ const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength());
+ if (!PointerStrideSCEV || !MemsetSizeSCEV)
return false;
- APInt Stride = ConstStride->getAPInt();
- if (SizeInBytes != Stride && SizeInBytes != -Stride)
- return false;
+ bool IsNegStride = false;
+ const bool IsConstantSize = isa<ConstantInt>(MSI->getLength());
+
+ if (IsConstantSize) {
+ // Memset size is constant.
+ // Check if the pointer stride matches the memset size. If so, then
+ // we know that every byte is touched in the loop.
+ LLVM_DEBUG(dbgs() << " memset size is constant\n");
+ uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+ const SCEVConstant *ConstStride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+ if (!ConstStride)
+ return false;
+
+ APInt Stride = ConstStride->getAPInt();
+ if (SizeInBytes != Stride && SizeInBytes != -Stride)
+ return false;
+
+ IsNegStride = SizeInBytes == -Stride;
+ } else {
+ // Memset size is non-constant.
+ // Check if the pointer stride matches the memset size.
+ // To be conservative, the pass would not promote pointers that aren't in
+ // address space zero. Also, the pass only handles memset length and stride
+ // that are invariant for the top level loop.
+ LLVM_DEBUG(dbgs() << " memset size is non-constant\n");
+ if (Pointer->getType()->getPointerAddressSpace() != 0) {
+ LLVM_DEBUG(dbgs() << " pointer is not in address space zero, "
+ << "abort\n");
+ return false;
+ }
+ if (!SE->isLoopInvariant(MemsetSizeSCEV, CurLoop)) {
+ LLVM_DEBUG(dbgs() << " memset size is not a loop-invariant, "
+ << "abort\n");
+ return false;
+ }
+
+ // Compare positive direction PointerStrideSCEV with MemsetSizeSCEV
+ IsNegStride = PointerStrideSCEV->isNonConstantNegative();
+ const SCEV *PositiveStrideSCEV =
+ IsNegStride ? SE->getNegativeSCEV(PointerStrideSCEV)
+ : PointerStrideSCEV;
+ LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n"
+ << " PositiveStrideSCEV: " << *PositiveStrideSCEV
+ << "\n");
+
+ if (PositiveStrideSCEV != MemsetSizeSCEV) {
+ // TODO: folding can be done to the SCEVs
+ // The folding is to fold expressions that is covered by the loop guard
+ // at loop entry. After the folding, compare again and proceed
+ // optimization if equal.
+ LLVM_DEBUG(dbgs() << " SCEV don't match, abort\n");
+ return false;
+ }
+ }
// Verify that the memset value is loop invariant. If not, we can't promote
// the memset.
@@ -935,10 +984,10 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
SmallPtrSet<Instruction *, 1> MSIs;
MSIs.insert(MSI);
- bool NegStride = SizeInBytes == -Stride;
- return processLoopStridedStore(
- Pointer, (unsigned)SizeInBytes, MaybeAlign(MSI->getDestAlignment()),
- SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true);
+ return processLoopStridedStore(Pointer, SE->getSCEV(MSI->getLength()),
+ MaybeAlign(MSI->getDestAlignment()),
+ SplatValue, MSI, MSIs, Ev, BECount,
+ IsNegStride, /*IsLoopMemset=*/true);
}
/// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -946,9 +995,9 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
/// argument specifies what the verboten forms of access are (read or write).
static bool
mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
- const SCEV *BECount, unsigned StoreSize,
+ const SCEV *BECount, const SCEV *StoreSizeSCEV,
AliasAnalysis &AA,
- SmallPtrSetImpl<Instruction *> &IgnoredStores) {
+ SmallPtrSetImpl<Instruction *> &IgnoredInsts) {
// Get the location that may be stored across the loop. Since the access is
// strided positively through memory, we say that the modified location starts
// at the pointer and has infinite size.
@@ -956,9 +1005,11 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// If the loop iterates a fixed number of times, we can refine the access size
// to be exactly the size of the memset, which is (BECount+1)*StoreSize
- if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+ const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount);
+ const SCEVConstant *ConstSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+ if (BECst && ConstSize)
AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
- StoreSize);
+ ConstSize->getValue()->getZExtValue());
// TODO: For this to be really effective, we have to dive into the pointer
// operand in the store. Store to &A[i] of 100 will always return may alias
@@ -966,14 +1017,12 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// which will then no-alias a store to &A[100].
MemoryLocation StoreLoc(Ptr, AccessSize);
- for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
- ++BI)
- for (Instruction &I : **BI)
- if (IgnoredStores.count(&I) == 0 &&
+ for (BasicBlock *B : L->blocks())
+ for (Instruction &I : *B)
+ if (!IgnoredInsts.contains(&I) &&
isModOrRefSet(
intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
return true;
-
return false;
}
@@ -981,57 +1030,67 @@ mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
// we're trying to memset. Therefore, we need to recompute the base pointer,
// which is just Start - BECount*Size.
static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
- Type *IntPtr, unsigned StoreSize,
+ Type *IntPtr, const SCEV *StoreSizeSCEV,
ScalarEvolution *SE) {
const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
- if (StoreSize != 1)
- Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+ if (!StoreSizeSCEV->isOne()) {
+ // index = back edge count * store size
+ Index = SE->getMulExpr(Index,
+ SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
SCEV::FlagNUW);
+ }
+ // base pointer = start - index * store size
return SE->getMinusSCEV(Start, Index);
}
-/// Compute the number of bytes as a SCEV from the backedge taken count.
-///
-/// This also maps the SCEV into the provided type and tries to handle the
-/// computation in a way that will fold cleanly.
-static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
- unsigned StoreSize, Loop *CurLoop,
- const DataLayout *DL, ScalarEvolution *SE) {
- const SCEV *NumBytesS;
- // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
+/// Compute trip count from the backedge taken count.
+static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr,
+ Loop *CurLoop, const DataLayout *DL,
+ ScalarEvolution *SE) {
+ const SCEV *TripCountS = nullptr;
+ // The # stored bytes is (BECount+1). Expand the trip count out to
// pointer size if it isn't already.
//
// If we're going to need to zero extend the BE count, check if we can add
// one to it prior to zero extending without overflow. Provided this is safe,
// it allows better simplification of the +1.
- if (DL->getTypeSizeInBits(BECount->getType()).getFixedSize() <
- DL->getTypeSizeInBits(IntPtr).getFixedSize() &&
+ if (DL->getTypeSizeInBits(BECount->getType()) <
+ DL->getTypeSizeInBits(IntPtr) &&
SE->isLoopEntryGuardedByCond(
CurLoop, ICmpInst::ICMP_NE, BECount,
SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
- NumBytesS = SE->getZeroExtendExpr(
+ TripCountS = SE->getZeroExtendExpr(
SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
IntPtr);
} else {
- NumBytesS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
- SE->getOne(IntPtr), SCEV::FlagNUW);
+ TripCountS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
+ SE->getOne(IntPtr), SCEV::FlagNUW);
}
- // And scale it based on the store size.
- if (StoreSize != 1) {
- NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
- SCEV::FlagNUW);
- }
- return NumBytesS;
+ return TripCountS;
+}
+
+/// Compute the number of bytes as a SCEV from the backedge taken count.
+///
+/// This also maps the SCEV into the provided type and tries to handle the
+/// computation in a way that will fold cleanly.
+static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
+ const SCEV *StoreSizeSCEV, Loop *CurLoop,
+ const DataLayout *DL, ScalarEvolution *SE) {
+ const SCEV *TripCountSCEV = getTripCount(BECount, IntPtr, CurLoop, DL, SE);
+
+ return SE->getMulExpr(TripCountSCEV,
+ SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
+ SCEV::FlagNUW);
}
/// processLoopStridedStore - We see a strided store of some value. If we can
/// transform this into a memset or memset_pattern in the loop preheader, do so.
bool LoopIdiomRecognize::processLoopStridedStore(
- Value *DestPtr, unsigned StoreSize, MaybeAlign StoreAlignment,
+ Value *DestPtr, const SCEV *StoreSizeSCEV, MaybeAlign StoreAlignment,
Value *StoredVal, Instruction *TheStore,
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
- const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
+ const SCEV *BECount, bool IsNegStride, bool IsLoopMemset) {
Value *SplatValue = isBytewiseValue(StoredVal, *DL);
Constant *PatternValue = nullptr;
@@ -1056,8 +1115,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
bool Changed = false;
const SCEV *Start = Ev->getStart();
// Handle negative strided loops.
- if (NegStride)
- Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1082,7 +1141,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Changed = true;
if (mayLoopAccessLocation(BasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores))
+ StoreSizeSCEV, *AA, Stores))
return Changed;
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
@@ -1091,7 +1150,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
// Okay, everything looks good, insert the memset.
const SCEV *NumBytesS =
- getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
// TODO: ideally we should still be able to generate memset if SCEV expander
// is taught to generate the dependencies at the latest point.
@@ -1138,13 +1197,20 @@ bool LoopIdiomRecognize::processLoopStridedStore(
<< "\n");
ORE.emit([&]() {
- return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
- NewCall->getDebugLoc(), Preheader)
- << "Transformed loop-strided store in "
- << ore::NV("Function", TheStore->getFunction())
- << " function into a call to "
- << ore::NV("NewFunction", NewCall->getCalledFunction())
- << "() intrinsic";
+ OptimizationRemark R(DEBUG_TYPE, "ProcessLoopStridedStore",
+ NewCall->getDebugLoc(), Preheader);
+ R << "Transformed loop-strided store in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function into a call to "
+ << ore::NV("NewFunction", NewCall->getCalledFunction())
+ << "() intrinsic";
+ if (!Stores.empty())
+ R << ore::setExtraArgs();
+ for (auto *I : Stores) {
+ R << ore::NV("FromBlock", I->getParent()->getName())
+ << ore::NV("ToBlock", Preheader->getName());
+ }
+ return R;
});
// Okay, the memset has been formed. Zap the original store and anything that
@@ -1181,16 +1247,63 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
// random load we can't handle.
Value *LoadPtr = LI->getPointerOperand();
const SCEVAddRecExpr *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
- return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSize,
+
+ const SCEV *StoreSizeSCEV = SE->getConstant(StorePtr->getType(), StoreSize);
+ return processLoopStoreOfLoopLoad(StorePtr, LoadPtr, StoreSizeSCEV,
SI->getAlign(), LI->getAlign(), SI, LI,
StoreEv, LoadEv, BECount);
}
+class MemmoveVerifier {
+public:
+ explicit MemmoveVerifier(const Value &LoadBasePtr, const Value &StoreBasePtr,
+ const DataLayout &DL)
+ : DL(DL), LoadOff(0), StoreOff(0),
+ BP1(llvm::GetPointerBaseWithConstantOffset(
+ LoadBasePtr.stripPointerCasts(), LoadOff, DL)),
+ BP2(llvm::GetPointerBaseWithConstantOffset(
+ StoreBasePtr.stripPointerCasts(), StoreOff, DL)),
+ IsSameObject(BP1 == BP2) {}
+
+ bool loadAndStoreMayFormMemmove(unsigned StoreSize, bool IsNegStride,
+ const Instruction &TheLoad,
+ bool IsMemCpy) const {
+ if (IsMemCpy) {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride.
+ if ((!IsNegStride && LoadOff <= StoreOff) ||
+ (IsNegStride && LoadOff >= StoreOff))
+ return false;
+ } else {
+ // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr
+ // for negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
+ int64_t LoadSize =
+ DL.getTypeSizeInBits(TheLoad.getType()).getFixedSize() / 8;
+ if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ return false;
+ if ((!IsNegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
+ (IsNegStride && LoadOff + LoadSize > StoreOff))
+ return false;
+ }
+ return true;
+ }
+
+private:
+ const DataLayout &DL;
+ int64_t LoadOff;
+ int64_t StoreOff;
+ const Value *BP1;
+ const Value *BP2;
+
+public:
+ const bool IsSameObject;
+};
+
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
- Value *DestPtr, Value *SourcePtr, unsigned StoreSize, MaybeAlign StoreAlign,
- MaybeAlign LoadAlign, Instruction *TheStore, Instruction *TheLoad,
- const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv,
- const SCEV *BECount) {
+ Value *DestPtr, Value *SourcePtr, const SCEV *StoreSizeSCEV,
+ MaybeAlign StoreAlign, MaybeAlign LoadAlign, Instruction *TheStore,
+ Instruction *TheLoad, const SCEVAddRecExpr *StoreEv,
+ const SCEVAddRecExpr *LoadEv, const SCEV *BECount) {
// FIXME: until llvm.memcpy.inline supports dynamic sizes, we need to
// conservatively bail here, since otherwise we may have to transform
@@ -1213,11 +1326,18 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
Type *IntIdxTy = Builder.getIntNTy(DL->getIndexSizeInBits(StrAS));
APInt Stride = getStoreStride(StoreEv);
- bool NegStride = StoreSize == -Stride;
+ const SCEVConstant *ConstStoreSize = dyn_cast<SCEVConstant>(StoreSizeSCEV);
+
+ // TODO: Deal with non-constant size; Currently expect constant store size
+ assert(ConstStoreSize && "store size is expected to be a constant");
+
+ int64_t StoreSize = ConstStoreSize->getValue()->getZExtValue();
+ bool IsNegStride = StoreSize == -Stride;
// Handle negative strided loops.
- if (NegStride)
- StrStart = getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ StrStart =
+ getStartForNegStride(StrStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
// Okay, we have a strided store "p[i]" of a loaded value. We can turn
// this into a memcpy in the loop preheader now if we want. However, this
@@ -1237,19 +1357,24 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// the return value will read this comment, and leave them alone.
Changed = true;
- SmallPtrSet<Instruction *, 2> Stores;
- Stores.insert(TheStore);
+ SmallPtrSet<Instruction *, 2> IgnoredInsts;
+ IgnoredInsts.insert(TheStore);
bool IsMemCpy = isa<MemCpyInst>(TheStore);
const StringRef InstRemark = IsMemCpy ? "memcpy" : "load and store";
- bool UseMemMove =
+ bool LoopAccessStore =
mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
- StoreSize, *AA, Stores);
- if (UseMemMove) {
- Stores.insert(TheLoad);
+ StoreSizeSCEV, *AA, IgnoredInsts);
+ if (LoopAccessStore) {
+ // For memmove case it's not enough to guarantee that loop doesn't access
+ // TheStore and TheLoad. Additionally we need to make sure that TheStore is
+ // the only user of TheLoad.
+ if (!TheLoad->hasOneUse())
+ return Changed;
+ IgnoredInsts.insert(TheLoad);
if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
- BECount, StoreSize, *AA, Stores)) {
+ BECount, StoreSizeSCEV, *AA, IgnoredInsts)) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessStore",
TheStore)
@@ -1260,15 +1385,16 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
});
return Changed;
}
- Stores.erase(TheLoad);
+ IgnoredInsts.erase(TheLoad);
}
const SCEV *LdStart = LoadEv->getStart();
unsigned LdAS = SourcePtr->getType()->getPointerAddressSpace();
// Handle negative strided loops.
- if (NegStride)
- LdStart = getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSize, SE);
+ if (IsNegStride)
+ LdStart =
+ getStartForNegStride(LdStart, BECount, IntIdxTy, StoreSizeSCEV, SE);
// For a memcpy, we have to make sure that the input array is not being
// mutated by the loop.
@@ -1278,42 +1404,40 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
// If the store is a memcpy instruction, we must check if it will write to
// the load memory locations. So remove it from the ignored stores.
if (IsMemCpy)
- Stores.erase(TheStore);
+ IgnoredInsts.erase(TheStore);
+ MemmoveVerifier Verifier(*LoadBasePtr, *StoreBasePtr, *DL);
if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
- StoreSize, *AA, Stores)) {
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad", TheLoad)
- << ore::NV("Inst", InstRemark) << " in "
- << ore::NV("Function", TheStore->getFunction())
- << " function will not be hoisted: "
- << ore::NV("Reason", "The loop may access load location");
- });
- return Changed;
- }
- if (UseMemMove) {
- // Ensure that LoadBasePtr is after StoreBasePtr or before StoreBasePtr for
- // negative stride. LoadBasePtr shouldn't overlap with StoreBasePtr.
- int64_t LoadOff = 0, StoreOff = 0;
- const Value *BP1 = llvm::GetPointerBaseWithConstantOffset(
- LoadBasePtr->stripPointerCasts(), LoadOff, *DL);
- const Value *BP2 = llvm::GetPointerBaseWithConstantOffset(
- StoreBasePtr->stripPointerCasts(), StoreOff, *DL);
- int64_t LoadSize =
- DL->getTypeSizeInBits(TheLoad->getType()).getFixedSize() / 8;
- if (BP1 != BP2 || LoadSize != int64_t(StoreSize))
+ StoreSizeSCEV, *AA, IgnoredInsts)) {
+ if (!IsMemCpy) {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "LoopMayAccessLoad",
+ TheLoad)
+ << ore::NV("Inst", InstRemark) << " in "
+ << ore::NV("Function", TheStore->getFunction())
+ << " function will not be hoisted: "
+ << ore::NV("Reason", "The loop may access load location");
+ });
return Changed;
- if ((!NegStride && LoadOff < StoreOff + int64_t(StoreSize)) ||
- (NegStride && LoadOff + LoadSize > StoreOff))
+ }
+ // At this point loop may access load only for memcpy in same underlying
+ // object. If that's not the case bail out.
+ if (!Verifier.IsSameObject)
return Changed;
}
+ bool UseMemMove = IsMemCpy ? Verifier.IsSameObject : LoopAccessStore;
+ if (UseMemMove)
+ if (!Verifier.loadAndStoreMayFormMemmove(StoreSize, IsNegStride, *TheLoad,
+ IsMemCpy))
+ return Changed;
+
if (avoidLIRForMultiBlockLoop())
return Changed;
// Okay, everything is safe, we can transform this!
const SCEV *NumBytesS =
- getNumBytes(BECount, IntIdxTy, StoreSize, CurLoop, DL, SE);
+ getNumBytes(BECount, IntIdxTy, StoreSizeSCEV, CurLoop, DL, SE);
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
@@ -1375,11 +1499,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
<< ore::NV("NewFunction", NewCall->getCalledFunction())
<< "() intrinsic from " << ore::NV("Inst", InstRemark)
<< " instruction in " << ore::NV("Function", TheStore->getFunction())
- << " function";
+ << " function"
+ << ore::setExtraArgs()
+ << ore::NV("FromBlock", TheStore->getParent()->getName())
+ << ore::NV("ToBlock", Preheader->getName());
});
- // Okay, the memcpy has been formed. Zap the original store and anything that
- // feeds into it.
+ // Okay, a new call to memcpy/memmove has been formed. Zap the original store
+ // and anything that feeds into it.
if (MSSAU)
MSSAU->removeMemoryAccess(TheStore, true);
deleteDeadInstruction(TheStore);
@@ -1544,24 +1671,22 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
CountInst = nullptr;
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
- IterE = LoopEntry->end();
- Iter != IterE; Iter++) {
- Instruction *Inst = &*Iter;
- if (Inst->getOpcode() != Instruction::Add)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
continue;
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
if (!Inc || !Inc->isOne())
continue;
- PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
if (!Phi)
continue;
// Check if the result of the instruction is live of the loop.
bool LiveOutLoop = false;
- for (User *U : Inst->users()) {
+ for (User *U : Inst.users()) {
if ((cast<Instruction>(U))->getParent() != LoopEntry) {
LiveOutLoop = true;
break;
@@ -1569,7 +1694,7 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
}
if (LiveOutLoop) {
- CountInst = Inst;
+ CountInst = &Inst;
CountPhi = Phi;
break;
}
@@ -1670,22 +1795,20 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, const DataLayout &DL,
// plus "cnt0". Currently it is not optimized.
// This step could be used to detect POPCNT instruction:
// cnt.next = cnt + (x.next & 1)
- for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
- IterE = LoopEntry->end();
- Iter != IterE; Iter++) {
- Instruction *Inst = &*Iter;
- if (Inst->getOpcode() != Instruction::Add)
+ for (Instruction &Inst : llvm::make_range(
+ LoopEntry->getFirstNonPHI()->getIterator(), LoopEntry->end())) {
+ if (Inst.getOpcode() != Instruction::Add)
continue;
- ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+ ConstantInt *Inc = dyn_cast<ConstantInt>(Inst.getOperand(1));
if (!Inc || (!Inc->isOne() && !Inc->isMinusOne()))
continue;
- PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry);
+ PHINode *Phi = getRecurrenceVar(Inst.getOperand(0), &Inst, LoopEntry);
if (!Phi)
continue;
- CntInst = Inst;
+ CntInst = &Inst;
CntPhi = Phi;
break;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 3153a8721193..b9e63a4bc06f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -105,9 +105,7 @@ static bool simplifyLoopInst(Loop &L, DominatorTree &DT, LoopInfo &LI,
if (!V || !LI.replacementPreservesLCSSAForm(&I, V))
continue;
- for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(I.uses())) {
auto *UserI = cast<Instruction>(U.getUser());
U.set(V);
@@ -195,15 +193,10 @@ public:
const TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
*L->getHeader()->getParent());
- MemorySSA *MSSA = nullptr;
- Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- }
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MemorySSAUpdater MSSAU(MSSA);
- return simplifyLoopInst(*L, DT, LI, AC, TLI,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ return simplifyLoopInst(*L, DT, LI, AC, TLI, &MSSAU);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -211,10 +204,8 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.setPreservesCFG();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 34545f35b3c3..9f605b4ac4ad 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -1710,16 +1710,12 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
auto &OuterInnerReductions = LIL.getOuterInnerReductions();
// Now update the reduction PHIs in the inner and outer loop headers.
SmallVector<PHINode *, 4> InnerLoopPHIs, OuterLoopPHIs;
- for (PHINode &PHI : InnerLoopHeader->phis()) {
- if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
- continue;
- InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
- }
- for (PHINode &PHI : OuterLoopHeader->phis()) {
- if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end())
- continue;
- OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
- }
+ for (PHINode &PHI : InnerLoopHeader->phis())
+ if (OuterInnerReductions.contains(&PHI))
+ InnerLoopPHIs.push_back(cast<PHINode>(&PHI));
+ for (PHINode &PHI : OuterLoopHeader->phis())
+ if (OuterInnerReductions.contains(&PHI))
+ OuterLoopPHIs.push_back(cast<PHINode>(&PHI));
// Now move the remaining reduction PHIs from outer to inner loop header and
// vice versa. The PHI nodes must be part of a reduction across the inner and
@@ -1767,6 +1763,7 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
return Changed;
}
+namespace {
/// Main LoopInterchange Pass.
struct LoopInterchangeLegacyPass : public LoopPass {
static char ID;
@@ -1795,6 +1792,7 @@ struct LoopInterchangeLegacyPass : public LoopPass {
return LoopInterchange(SE, LI, DI, DT, ORE).run(L);
}
};
+} // namespace
char LoopInterchangeLegacyPass::ID = 0;
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index aaf586173e44..21d59936616b 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -34,7 +34,6 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@@ -109,8 +108,8 @@ struct StoreToLoadForwardingCandidate {
// Currently we only support accesses with unit stride. FIXME: we should be
// able to handle non unit stirde as well as long as the stride is equal to
// the dependence distance.
- if (getPtrStride(PSE, LoadPtr, L) != 1 ||
- getPtrStride(PSE, StorePtr, L) != 1)
+ if (getPtrStride(PSE, LoadType, LoadPtr, L) != 1 ||
+ getPtrStride(PSE, LoadType, StorePtr, L) != 1)
return false;
auto &DL = Load->getParent()->getModule()->getDataLayout();
@@ -718,15 +717,12 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
auto *PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
auto *BFI = (PSI && PSI->hasProfileSummary()) ?
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
bool Changed = eliminateLoadsAcrossLoops(
F, LI, DT, BFI, PSI, &SE, &AC, [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
});
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index f4fce4871331..3df4cfe8e4c1 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -10,6 +10,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
@@ -44,6 +45,18 @@ PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
return PA;
}
+void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
+ LPMUpdater &>::printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)>
+ MapClassName2PassName) {
+ for (unsigned Idx = 0, Size = LoopPasses.size(); Idx != Size; ++Idx) {
+ auto *P = LoopPasses[Idx].get();
+ P->printPipeline(OS, MapClassName2PassName);
+ if (Idx + 1 < Size)
+ OS << ",";
+ }
+}
+
// Run both loop passes and loop-nest passes on top-level loop \p L.
PreservedAnalyses
LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
@@ -112,12 +125,6 @@ LoopPassManager::runWithLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
// notify the updater, otherwise U.ParentL might gets outdated and triggers
// assertion failures in addSiblingLoops and addChildLoops.
U.setParentLoop(L.getParentLoop());
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
return PA;
}
@@ -161,17 +168,17 @@ LoopPassManager::runWithoutLoopNestPasses(Loop &L, LoopAnalysisManager &AM,
// notify the updater, otherwise U.ParentL might gets outdated and triggers
// assertion failures in addSiblingLoops and addChildLoops.
U.setParentLoop(L.getParentLoop());
-
- // FIXME: Historically, the pass managers all called the LLVM context's
- // yield function here. We don't have a generic way to acquire the
- // context and it isn't yet clear what the right pattern is for yielding
- // in the new pass manager so it is currently omitted.
- // ...getContext().yield();
}
return PA;
}
} // namespace llvm
+void FunctionToLoopPassAdaptor::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ OS << (UseMemorySSA ? "loop-mssa(" : "loop(");
+ Pass->printPipeline(OS, MapClassName2PassName);
+ OS << ")";
+}
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
// Before we even compute any loop analyses, first run a miniature function
@@ -201,6 +208,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
BlockFrequencyInfo *BFI = UseBlockFrequencyInfo && F.hasProfileData()
? (&AM.getResult<BlockFrequencyAnalysis>(F))
: nullptr;
+ BranchProbabilityInfo *BPI =
+ UseBranchProbabilityInfo && F.hasProfileData()
+ ? (&AM.getResult<BranchProbabilityAnalysis>(F))
+ : nullptr;
LoopStandardAnalysisResults LAR = {AM.getResult<AAManager>(F),
AM.getResult<AssumptionAnalysis>(F),
AM.getResult<DominatorTreeAnalysis>(F),
@@ -209,6 +220,7 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
AM.getResult<TargetLibraryAnalysis>(F),
AM.getResult<TargetIRAnalysis>(F),
BFI,
+ BPI,
MSSA};
// Setup the loop analysis manager from its proxy. It is important that
@@ -285,6 +297,10 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
else
PI.runAfterPass<Loop>(*Pass, *L, PassPA);
+ if (LAR.MSSA && !PassPA.getChecker<MemorySSAAnalysis>().preserved())
+ report_fatal_error("Loop pass manager using MemorySSA contains a pass "
+ "that does not preserve MemorySSA");
+
#ifndef NDEBUG
// LoopAnalysisResults should always be valid.
// Note that we don't LAR.SE.verify() because that can change observed SE
@@ -325,6 +341,8 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
PA.preserve<ScalarEvolutionAnalysis>();
if (UseBlockFrequencyInfo && F.hasProfileData())
PA.preserve<BlockFrequencyAnalysis>();
+ if (UseBranchProbabilityInfo && F.hasProfileData())
+ PA.preserve<BranchProbabilityAnalysis>();
if (UseMemorySSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 4f97641e2027..aa7e79a589f2 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -183,6 +183,8 @@
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/IR/Function.h"
@@ -254,7 +256,7 @@ class LoopPredication {
DominatorTree *DT;
ScalarEvolution *SE;
LoopInfo *LI;
- BranchProbabilityInfo *BPI;
+ MemorySSAUpdater *MSSAU;
Loop *L;
const DataLayout *DL;
@@ -302,16 +304,15 @@ class LoopPredication {
// If the loop always exits through another block in the loop, we should not
// predicate based on the latch check. For example, the latch check can be a
// very coarse grained check and there can be more fine grained exit checks
- // within the loop. We identify such unprofitable loops through BPI.
+ // within the loop.
bool isLoopProfitableToPredicate();
bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
public:
- LoopPredication(AliasAnalysis *AA, DominatorTree *DT,
- ScalarEvolution *SE, LoopInfo *LI,
- BranchProbabilityInfo *BPI)
- : AA(AA), DT(DT), SE(SE), LI(LI), BPI(BPI) {};
+ LoopPredication(AliasAnalysis *AA, DominatorTree *DT, ScalarEvolution *SE,
+ LoopInfo *LI, MemorySSAUpdater *MSSAU)
+ : AA(AA), DT(DT), SE(SE), LI(LI), MSSAU(MSSAU){};
bool runOnLoop(Loop *L);
};
@@ -325,6 +326,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<BranchProbabilityInfoWrapperPass>();
getLoopAnalysisUsage(AU);
+ AU.addPreserved<MemorySSAWrapperPass>();
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override {
@@ -333,10 +335,12 @@ public:
auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- BranchProbabilityInfo &BPI =
- getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+ auto *MSSAWP = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (MSSAWP)
+ MSSAU = std::make_unique<MemorySSAUpdater>(&MSSAWP->getMSSA());
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- LoopPredication LP(AA, DT, SE, LI, &BPI);
+ LoopPredication LP(AA, DT, SE, LI, MSSAU ? MSSAU.get() : nullptr);
return LP.runOnLoop(L);
}
};
@@ -358,16 +362,18 @@ Pass *llvm::createLoopPredicationPass() {
PreservedAnalyses LoopPredicationPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
- Function *F = L.getHeader()->getParent();
- // For the new PM, we also can't use BranchProbabilityInfo as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but BPI is not preserved, hence a newly built one is needed.
- BranchProbabilityInfo BPI(*F, AR.LI, &AR.TLI, &AR.DT, nullptr);
- LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI, &BPI);
+ std::unique_ptr<MemorySSAUpdater> MSSAU;
+ if (AR.MSSA)
+ MSSAU = std::make_unique<MemorySSAUpdater>(AR.MSSA);
+ LoopPredication LP(&AR.AA, &AR.DT, &AR.SE, &AR.LI,
+ MSSAU ? MSSAU.get() : nullptr);
if (!LP.runOnLoop(&L))
return PreservedAnalyses::all();
- return getLoopPassPreservedAnalyses();
+ auto PA = getLoopPassPreservedAnalyses();
+ if (AR.MSSA)
+ PA.preserve<MemorySSAAnalysis>();
+ return PA;
}
Optional<LoopICmp>
@@ -809,7 +815,7 @@ bool LoopPredication::widenGuardConditions(IntrinsicInst *Guard,
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = Guard->getOperand(0);
Guard->setOperand(0, AllChecks);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
LLVM_DEBUG(dbgs() << "Widened checks = " << NumWidened << "\n");
return true;
@@ -835,7 +841,7 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
Value *AllChecks = Builder.CreateAnd(Checks);
auto *OldCond = BI->getCondition();
BI->setCondition(AllChecks);
- RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
assert(isGuardAsWidenableBranch(BI) &&
"Stopped being a guard after transform?");
@@ -912,7 +918,7 @@ Optional<LoopICmp> LoopPredication::parseLoopLatchICmp() {
bool LoopPredication::isLoopProfitableToPredicate() {
- if (SkipProfitabilityChecks || !BPI)
+ if (SkipProfitabilityChecks)
return true;
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 8> ExitEdges;
@@ -934,8 +940,61 @@ bool LoopPredication::isLoopProfitableToPredicate() {
"expected to be an exiting block with 2 succs!");
unsigned LatchBrExitIdx =
LatchTerm->getSuccessor(0) == L->getHeader() ? 1 : 0;
+ // We compute branch probabilities without BPI. We do not rely on BPI since
+ // Loop predication is usually run in an LPM and BPI is only preserved
+ // lossily within loop pass managers, while BPI has an inherent notion of
+ // being complete for an entire function.
+
+ // If the latch exits into a deoptimize or an unreachable block, do not
+ // predicate on that latch check.
+ auto *LatchExitBlock = LatchTerm->getSuccessor(LatchBrExitIdx);
+ if (isa<UnreachableInst>(LatchTerm) ||
+ LatchExitBlock->getTerminatingDeoptimizeCall())
+ return false;
+
+ auto IsValidProfileData = [](MDNode *ProfileData, const Instruction *Term) {
+ if (!ProfileData || !ProfileData->getOperand(0))
+ return false;
+ if (MDString *MDS = dyn_cast<MDString>(ProfileData->getOperand(0)))
+ if (!MDS->getString().equals("branch_weights"))
+ return false;
+ if (ProfileData->getNumOperands() != 1 + Term->getNumSuccessors())
+ return false;
+ return true;
+ };
+ MDNode *LatchProfileData = LatchTerm->getMetadata(LLVMContext::MD_prof);
+ // Latch terminator has no valid profile data, so nothing to check
+ // profitability on.
+ if (!IsValidProfileData(LatchProfileData, LatchTerm))
+ return true;
+
+ auto ComputeBranchProbability =
+ [&](const BasicBlock *ExitingBlock,
+ const BasicBlock *ExitBlock) -> BranchProbability {
+ auto *Term = ExitingBlock->getTerminator();
+ MDNode *ProfileData = Term->getMetadata(LLVMContext::MD_prof);
+ unsigned NumSucc = Term->getNumSuccessors();
+ if (IsValidProfileData(ProfileData, Term)) {
+ uint64_t Numerator = 0, Denominator = 0, ProfVal = 0;
+ for (unsigned i = 0; i < NumSucc; i++) {
+ ConstantInt *CI =
+ mdconst::extract<ConstantInt>(ProfileData->getOperand(i + 1));
+ ProfVal = CI->getValue().getZExtValue();
+ if (Term->getSuccessor(i) == ExitBlock)
+ Numerator += ProfVal;
+ Denominator += ProfVal;
+ }
+ return BranchProbability::getBranchProbability(Numerator, Denominator);
+ } else {
+ assert(LatchBlock != ExitingBlock &&
+ "Latch term should always have profile data!");
+ // No profile data, so we choose the weight as 1/num_of_succ(Src)
+ return BranchProbability::getBranchProbability(1, NumSucc);
+ }
+ };
+
BranchProbability LatchExitProbability =
- BPI->getEdgeProbability(LatchBlock, LatchBrExitIdx);
+ ComputeBranchProbability(LatchBlock, LatchExitBlock);
// Protect against degenerate inputs provided by the user. Providing a value
// less than one, can invert the definition of profitable loop predication.
@@ -948,18 +1007,18 @@ bool LoopPredication::isLoopProfitableToPredicate() {
LLVM_DEBUG(dbgs() << "The value is set to 1.0\n");
ScaleFactor = 1.0;
}
- const auto LatchProbabilityThreshold =
- LatchExitProbability * ScaleFactor;
+ const auto LatchProbabilityThreshold = LatchExitProbability * ScaleFactor;
for (const auto &ExitEdge : ExitEdges) {
BranchProbability ExitingBlockProbability =
- BPI->getEdgeProbability(ExitEdge.first, ExitEdge.second);
+ ComputeBranchProbability(ExitEdge.first, ExitEdge.second);
// Some exiting edge has higher probability than the latch exiting edge.
// No longer profitable to predicate.
if (ExitingBlockProbability > LatchProbabilityThreshold)
return false;
}
- // Using BPI, we have concluded that the most probable way to exit from the
+
+ // We have concluded that the most probable way to exit from the
// loop is through the latch (or there's no profile information and all
// exits are equally likely).
return true;
@@ -1071,28 +1130,26 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// widen so that we gain ability to analyze it's exit count and perform this
// transform. TODO: It'd be nice to know for sure the exit became
// analyzeable after dropping widenability.
- {
- bool Invalidate = false;
+ bool ChangedLoop = false;
- for (auto *ExitingBB : ExitingBlocks) {
- if (LI->getLoopFor(ExitingBB) != L)
- continue;
+ for (auto *ExitingBB : ExitingBlocks) {
+ if (LI->getLoopFor(ExitingBB) != L)
+ continue;
- auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
- if (!BI)
- continue;
+ auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
+ if (!BI)
+ continue;
- Use *Cond, *WC;
- BasicBlock *IfTrueBB, *IfFalseBB;
- if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
- L->contains(IfTrueBB)) {
- WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
- Invalidate = true;
- }
+ Use *Cond, *WC;
+ BasicBlock *IfTrueBB, *IfFalseBB;
+ if (parseWidenableBranch(BI, Cond, WC, IfTrueBB, IfFalseBB) &&
+ L->contains(IfTrueBB)) {
+ WC->set(ConstantInt::getTrue(IfTrueBB->getContext()));
+ ChangedLoop = true;
}
- if (Invalidate)
- SE->forgetLoop(L);
}
+ if (ChangedLoop)
+ SE->forgetLoop(L);
// The use of umin(all analyzeable exits) instead of latch is subtle, but
// important for profitability. We may have a loop which hasn't been fully
@@ -1104,18 +1161,24 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
!SE->isLoopInvariant(MinEC, L) ||
!isSafeToExpandAt(MinEC, WidenableBR, *SE))
- return false;
+ return ChangedLoop;
// Subtlety: We need to avoid inserting additional uses of the WC. We know
// that it can only have one transitive use at the moment, and thus moving
// that use to just before the branch and inserting code before it and then
// modifying the operand is legal.
auto *IP = cast<Instruction>(WidenableBR->getCondition());
+ // Here we unconditionally modify the IR, so after this point we should return
+ // only `true`!
IP->moveBefore(WidenableBR);
+ if (MSSAU)
+ if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
+ MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
+ MemorySSA::BeforeTerminator);
Rewriter.setInsertPoint(IP);
IRBuilder<> B(IP);
- bool Changed = false;
+ bool InvalidateLoop = false;
Value *MinECV = nullptr; // lazily generated if needed
for (BasicBlock *ExitingBB : ExitingBlocks) {
// If our exiting block exits multiple loops, we can only rewrite the
@@ -1172,16 +1235,18 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
Value *OldCond = BI->getCondition();
BI->setCondition(ConstantInt::get(OldCond->getType(), !ExitIfTrue));
- Changed = true;
+ InvalidateLoop = true;
}
- if (Changed)
+ if (InvalidateLoop)
// We just mutated a bunch of loop exits changing there exit counts
// widely. We need to force recomputation of the exit counts given these
// changes. Note that all of the inserted exits are never taken, and
// should be removed next time the CFG is modified.
SE->forgetLoop(L);
- return Changed;
+
+ // Always return `true` since we have moved the WidenableBR's condition.
+ return true;
}
bool LoopPredication::runOnLoop(Loop *Loop) {
@@ -1242,5 +1307,8 @@ bool LoopPredication::runOnLoop(Loop *Loop) {
for (auto *Guard : GuardsAsWidenableBranches)
Changed |= widenWidenableBranchGuardConditions(Guard, Expander);
Changed |= predicateLoopExits(L, Expander);
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 6d5b19443c76..5ba137b1c85f 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -99,8 +99,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency)
- AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
// Lazy BFI and BPI are marked as preserved here so LoopRotate
@@ -121,13 +120,11 @@ public:
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);
Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- // Not requiring MemorySSA and getting it only if available will split
- // the loop pass pipeline when LoopRotate is being run first.
- auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- if (MSSAA)
- MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
- }
+ // Not requiring MemorySSA and getting it only if available will split
+ // the loop pass pipeline when LoopRotate is being run first.
+ auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAA)
+ MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
// Vectorization requires loop-rotation. Use default threshold for loops the
// user explicitly marked for vectorization, even when header duplication is
// disabled.
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index cc6d11220807..a87843d658a9 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -733,13 +733,12 @@ public:
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto *MSSAA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- if (VerifyMemorySSA)
- MSSA->verifyMemorySSA();
- }
+ if (MSSAA)
+ MSSAU = MemorySSAUpdater(&MSSAA->getMSSA());
+ if (MSSAA && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
bool DeleteCurrentLoop = false;
bool Changed = simplifyLoopCFG(
*L, DT, LI, SE, MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
@@ -750,10 +749,7 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addPreserved<MemorySSAWrapperPass>();
AU.addPreserved<DependenceAnalysisWrapperPass>();
getLoopAnalysisUsage(AU);
}
diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp
index a01287f587d7..c9c9e60d0921 100644
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -323,15 +323,14 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
// Traverse preheader's instructions in reverse order becaue if A depends
// on B (A appears after B), A needs to be sinked first before B can be
// sinked.
- for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
- Instruction *I = &*II++;
+ for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
// No need to check for instruction's operands are loop invariant.
- assert(L.hasLoopInvariantOperands(I) &&
+ assert(L.hasLoopInvariantOperands(&I) &&
"Insts in a loop's preheader should have loop invariant operands!");
- if (!canSinkOrHoistInst(*I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
+ if (!canSinkOrHoistInst(I, &AA, &DT, &L, CurAST, MSSAU.get(), false,
LICMFlags.get()))
continue;
- if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
+ if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI,
MSSAU.get()))
Changed = true;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 5f210380ae5a..a9a2266e1196 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -136,6 +136,12 @@ using namespace llvm;
/// worst cases before LSR burns too much compile time and stack space.
static const unsigned MaxIVUsers = 200;
+/// Limit the size of expression that SCEV-based salvaging will attempt to
+/// translate into a DIExpression.
+/// Choose a maximum size such that debuginfo is not excessively increased and
+/// the salvaging is not too expensive for the compiler.
+static const unsigned MaxSCEVSalvageExpressionSize = 64;
+
// Temporary flag to cleanup congruent phis after LSR phi expansion.
// It's currently disabled until we can determine whether it's truly useful or
// not. The flag should be removed after the v3.0 release.
@@ -689,7 +695,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
const APInt &RA = RC->getAPInt();
// Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
// some folding.
- if (RA.isAllOnesValue()) {
+ if (RA.isAllOnes()) {
if (LHS->getType()->isPointerTy())
return nullptr;
return SE.getMulExpr(LHS, RC);
@@ -2816,9 +2822,7 @@ static const SCEV *getExprBase(const SCEV *S) {
// there's nothing more complex.
// FIXME: not sure if we want to recognize negation.
const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
- for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
- E(Add->op_begin()); I != E; ++I) {
- const SCEV *SubExpr = *I;
+ for (const SCEV *SubExpr : reverse(Add->operands())) {
if (SubExpr->getSCEVType() == scAddExpr)
return getExprBase(SubExpr);
@@ -3150,7 +3154,7 @@ void LSRInstance::CollectChains() {
void LSRInstance::FinalizeChain(IVChain &Chain) {
assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
-
+
for (const IVInc &Inc : Chain) {
LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n");
auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
@@ -3385,7 +3389,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
// Mark uses whose expressions cannot be expanded.
- if (!isSafeToExpand(S, SE))
+ if (!isSafeToExpand(S, SE, /*CanonicalMode*/ false))
LU.RigidFormula = true;
Formula F;
@@ -3934,6 +3938,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Check each interesting stride.
for (int64_t Factor : Factors) {
+ // Check that Factor can be represented by IntTy
+ if (!ConstantInt::isValueValidForType(IntTy, Factor))
+ continue;
// Check that the multiplication doesn't overflow.
if (Base.BaseOffset == std::numeric_limits<int64_t>::min() && Factor == -1)
continue;
@@ -4082,6 +4089,14 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (DstTy->isPointerTy())
return;
+ // It is invalid to extend a pointer type so exit early if ScaledReg or
+ // any of the BaseRegs are pointers.
+ if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
+ return;
+ if (any_of(Base.BaseRegs,
+ [](const SCEV *S) { return S->getType()->isPointerTy(); }))
+ return;
+
for (Type *SrcTy : Types) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
@@ -5689,23 +5704,6 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
}
}
-#ifndef NDEBUG
- // All dominating loops must have preheaders, or SCEVExpander may not be able
- // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
- //
- // IVUsers analysis should only create users that are dominated by simple loop
- // headers. Since this loop should dominate all of its users, its user list
- // should be empty if this loop itself is not within a simple loop nest.
- for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
- Rung; Rung = Rung->getIDom()) {
- BasicBlock *BB = Rung->getBlock();
- const Loop *DomLoop = LI.getLoopFor(BB);
- if (DomLoop && DomLoop->getHeader() == BB) {
- assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
- }
- }
-#endif // DEBUG
-
LLVM_DEBUG(dbgs() << "\nLSR on loop ";
L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
dbgs() << ":\n");
@@ -5870,6 +5868,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MemorySSAWrapperPass>();
}
+namespace {
struct SCEVDbgValueBuilder {
SCEVDbgValueBuilder() = default;
SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) {
@@ -5906,9 +5905,12 @@ struct SCEVDbgValueBuilder {
pushValue(V);
}
- void pushConst(const SCEVConstant *C) {
+ bool pushConst(const SCEVConstant *C) {
+ if (C->getAPInt().getMinSignedBits() > 64)
+ return false;
Expr.push_back(llvm::dwarf::DW_OP_consts);
Expr.push_back(C->getAPInt().getSExtValue());
+ return true;
}
/// Several SCEV types are sequences of the same arithmetic operator applied
@@ -5947,10 +5949,10 @@ struct SCEVDbgValueBuilder {
bool pushSCEV(const llvm::SCEV *S) {
bool Success = true;
if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
- pushConst(StartInt);
+ Success &= pushConst(StartInt);
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
- if(!U->getValue())
+ if (!U->getValue())
return false;
pushValue(U->getValue());
@@ -6033,6 +6035,8 @@ struct SCEVDbgValueBuilder {
/// SCEV constant value is an identity function.
bool isIdentityFunction(uint64_t Op, const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (C->getAPInt().getMinSignedBits() > 64)
+ return false;
int64_t I = C->getAPInt().getSExtValue();
switch (Op) {
case llvm::dwarf::DW_OP_plus:
@@ -6112,14 +6116,15 @@ struct DVIRecoveryRec {
Metadata *LocationOp;
const llvm::SCEV *SCEV;
};
+} // namespace
-static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
+static void RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
const SCEVDbgValueBuilder &IterationCount,
ScalarEvolution &SE) {
// LSR may add locations to previously single location-op DVIs which
// are currently not supported.
if (CachedDVI.DVI->getNumVariableLocationOps() != 1)
- return false;
+ return;
// SCEVs for SSA values are most frquently of the form
// {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
@@ -6127,45 +6132,70 @@ static bool RewriteDVIUsingIterCount(DVIRecoveryRec CachedDVI,
// SCEVs have not been observed to result in debuginfo-lossy optimisations,
// so its not expected this point will be reached.
if (!isa<SCEVAddRecExpr>(CachedDVI.SCEV))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: Value to salvage SCEV: "
<< *CachedDVI.SCEV << '\n');
const auto *Rec = cast<SCEVAddRecExpr>(CachedDVI.SCEV);
if (!Rec->isAffine())
- return false;
+ return;
+
+ if (CachedDVI.SCEV->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return;
// Initialise a new builder with the iteration count expression. In
// combination with the value's SCEV this enables recovery.
SCEVDbgValueBuilder RecoverValue(IterationCount);
if (!RecoverValue.SCEVToValueExpr(*Rec, SE))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *CachedDVI.DVI << '\n');
RecoverValue.applyExprToDbgValue(*CachedDVI.DVI, CachedDVI.Expr);
LLVM_DEBUG(dbgs() << "scev-salvage: to: " << *CachedDVI.DVI << '\n');
- return true;
}
-static bool
+static void RewriteDVIUsingOffset(DVIRecoveryRec &DVIRec, llvm::PHINode &IV,
+ int64_t Offset) {
+ assert(!DVIRec.DVI->hasArgList() && "Expected single location-op dbg.value.");
+ DbgValueInst *DVI = DVIRec.DVI;
+ SmallVector<uint64_t, 8> Ops;
+ DIExpression::appendOffset(Ops, Offset);
+ DIExpression *Expr = DIExpression::prependOpcodes(DVIRec.Expr, Ops, true);
+ LLVM_DEBUG(dbgs() << "scev-salvage: Updating: " << *DVIRec.DVI << '\n');
+ DVI->setExpression(Expr);
+ llvm::Value *ValIV = dyn_cast<llvm::Value>(&IV);
+ DVI->replaceVariableLocationOp(
+ 0u, llvm::MetadataAsValue::get(DVI->getContext(),
+ llvm::ValueAsMetadata::get(ValIV)));
+ LLVM_DEBUG(dbgs() << "scev-salvage: updated with offset to IV: "
+ << *DVIRec.DVI << '\n');
+}
+
+static void
DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
llvm::PHINode *LSRInductionVar,
SmallVector<DVIRecoveryRec, 2> &DVIToUpdate) {
if (DVIToUpdate.empty())
- return false;
+ return;
const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
assert(SCEVInductionVar &&
"Anticipated a SCEV for the post-LSR induction variable");
- bool Changed = false;
if (const SCEVAddRecExpr *IVAddRec =
dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
+ if (!IVAddRec->isAffine())
+ return;
+
+ if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
+ return;
+
+ // The iteration count is required to recover location values.
SCEVDbgValueBuilder IterCountExpr;
IterCountExpr.pushValue(LSRInductionVar);
if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
<< '\n');
@@ -6180,20 +6210,34 @@ DbgRewriteSalvageableDVIs(llvm::Loop *L, ScalarEvolution &SE,
// supported by SCEV salvaging. But, we can attempt a salvage by restoring
// the pre-LSR single-op expression.
if (DVIRec.DVI->hasArgList()) {
+ if (!DVIRec.DVI->getVariableLocationOp(0))
+ continue;
llvm::Type *Ty = DVIRec.DVI->getVariableLocationOp(0)->getType();
DVIRec.DVI->setRawLocation(
llvm::ValueAsMetadata::get(UndefValue::get(Ty)));
DVIRec.DVI->setExpression(DVIRec.Expr);
}
- Changed |= RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+ LLVM_DEBUG(dbgs() << "scev-salvage: value to recover SCEV: "
+ << *DVIRec.SCEV << '\n');
+
+ // Create a simple expression if the IV and value to salvage SCEVs
+ // start values differ by only a constant value.
+ if (Optional<APInt> Offset =
+ SE.computeConstantDifference(DVIRec.SCEV, SCEVInductionVar)) {
+ if (Offset.getValue().getMinSignedBits() <= 64)
+ RewriteDVIUsingOffset(DVIRec, *LSRInductionVar,
+ Offset.getValue().getSExtValue());
+ } else {
+ RewriteDVIUsingIterCount(DVIRec, IterCountExpr, SE);
+ }
}
}
- return Changed;
}
/// Identify and cache salvageable DVI locations and expressions along with the
-/// corresponding SCEV(s). Also ensure that the DVI is not deleted before
+/// corresponding SCEV(s). Also ensure that the DVI is not deleted between
+/// cacheing and salvaging.
static void
DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
SmallVector<DVIRecoveryRec, 2> &SalvageableDVISCEVs,
@@ -6204,10 +6248,24 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
if (!DVI)
continue;
+ if (DVI->isUndef())
+ continue;
+
if (DVI->hasArgList())
continue;
- if (!SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
+ if (!DVI->getVariableLocationOp(0) ||
+ !SE.isSCEVable(DVI->getVariableLocationOp(0)->getType()))
+ continue;
+
+ // SCEVUnknown wraps an llvm::Value, it does not have a start and stride.
+ // Therefore no translation to DIExpression is performed.
+ const SCEV *S = SE.getSCEV(DVI->getVariableLocationOp(0));
+ if (isa<SCEVUnknown>(S))
+ continue;
+
+ // Avoid wasting resources generating an expression containing undef.
+ if (SE.containsUndefs(S))
continue;
SalvageableDVISCEVs.push_back(
@@ -6223,34 +6281,32 @@ DbgGatherSalvagableDVI(Loop *L, ScalarEvolution &SE,
/// surviving subsequent transforms.
static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
const LSRInstance &LSR) {
- // For now, just pick the first IV generated and inserted. Ideally pick an IV
- // that is unlikely to be optimised away by subsequent transforms.
+
+ auto IsSuitableIV = [&](PHINode *P) {
+ if (!SE.isSCEVable(P->getType()))
+ return false;
+ if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
+ return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
+ return false;
+ };
+
+ // For now, just pick the first IV that was generated and inserted by
+ // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
+ // by subsequent transforms.
for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
if (!IV)
continue;
- assert(isa<PHINode>(&*IV) && "Expected PhI node.");
- if (SE.isSCEVable((*IV).getType())) {
- PHINode *Phi = dyn_cast<PHINode>(&*IV);
- LLVM_DEBUG(const llvm::SCEV *S = SE.getSCEV(Phi);
- dbgs() << "scev-salvage: IV : " << *IV << "with SCEV: " << *S
- << "\n");
- return Phi;
- }
- }
+ // There should only be PHI node IVs.
+ PHINode *P = cast<PHINode>(&*IV);
- for (PHINode &Phi : L.getHeader()->phis()) {
- if (!SE.isSCEVable(Phi.getType()))
- continue;
-
- const llvm::SCEV *PhiSCEV = SE.getSCEV(&Phi);
- if (const llvm::SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(PhiSCEV))
- if (!Rec->isAffine())
- continue;
+ if (IsSuitableIV(P))
+ return P;
+ }
- LLVM_DEBUG(dbgs() << "scev-salvage: Selected IV from loop header: " << Phi
- << " with SCEV: " << *PhiSCEV << "\n");
- return &Phi;
+ for (PHINode &P : L.getHeader()->phis()) {
+ if (IsSuitableIV(&P))
+ return &P;
}
return nullptr;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 71eb393fcdd7..1ecbb86724e1 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -286,8 +286,8 @@ tryToUnrollAndJamLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
AssumptionCache &AC, DependenceInfo &DI,
OptimizationRemarkEmitter &ORE, int OptLevel) {
TargetTransformInfo::UnrollingPreferences UP =
- gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, OptLevel, None,
- None, None, None, None, None);
+ gatherUnrollingPreferences(L, SE, TTI, nullptr, nullptr, ORE, OptLevel,
+ None, None, None, None, None, None);
TargetTransformInfo::PeelingPreferences PP =
gatherPeelingPreferences(L, SE, TTI, None, None);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 49501f324a49..67702520511b 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -184,7 +184,8 @@ static const unsigned NoThreshold = std::numeric_limits<unsigned>::max();
/// flags, TTI overrides and user specified parameters.
TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, int OptLevel,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ OptimizationRemarkEmitter &ORE, int OptLevel,
Optional<unsigned> UserThreshold, Optional<unsigned> UserCount,
Optional<bool> UserAllowPartial, Optional<bool> UserRuntime,
Optional<bool> UserUpperBound, Optional<unsigned> UserFullUnrollMaxCount) {
@@ -214,7 +215,7 @@ TargetTransformInfo::UnrollingPreferences llvm::gatherUnrollingPreferences(
UP.MaxIterationsCountToAnalyze = UnrollMaxIterationsCountToAnalyze;
// Override with any target specific settings
- TTI.getUnrollingPreferences(L, SE, UP);
+ TTI.getUnrollingPreferences(L, SE, UP, &ORE);
// Apply size attributes
bool OptForSize = L->getHeader()->getParent()->hasOptSize() ||
@@ -318,6 +319,16 @@ struct EstimatedUnrollCost {
unsigned RolledDynamicCost;
};
+struct PragmaInfo {
+ PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU)
+ : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC),
+ PragmaEnableUnroll(PEU) {}
+ const bool UserUnrollCount;
+ const bool PragmaFullUnroll;
+ const unsigned PragmaCount;
+ const bool PragmaEnableUnroll;
+};
+
} // end anonymous namespace
/// Figure out if the loop is worth full unrolling.
@@ -746,13 +757,132 @@ public:
// Returns loop size estimation for unrolled loop, given the unrolling
// configuration specified by UP.
- uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
+ uint64_t
+ getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP,
+ const unsigned CountOverwrite = 0) const {
assert(LoopSize >= UP.BEInsns &&
"LoopSize should not be less than BEInsns!");
- return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
+ if (CountOverwrite)
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * CountOverwrite +
+ UP.BEInsns;
+ else
+ return static_cast<uint64_t>(LoopSize - UP.BEInsns) * UP.Count +
+ UP.BEInsns;
}
};
+static Optional<unsigned>
+shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo,
+ const unsigned TripMultiple, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ // Using unroll pragma
+ // 1st priority is unroll count set by "unroll-count" option.
+
+ if (PInfo.UserUnrollCount) {
+ if (UP.AllowRemainder &&
+ UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold)
+ return (unsigned)UnrollCount;
+ }
+
+ // 2nd priority is unroll count set by pragma.
+ if (PInfo.PragmaCount > 0) {
+ if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) &&
+ UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold)
+ return PInfo.PragmaCount;
+ }
+
+ if (PInfo.PragmaFullUnroll && TripCount != 0) {
+ if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold)
+ return TripCount;
+ }
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
+
+static Optional<unsigned> shouldFullUnroll(
+ Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT,
+ ScalarEvolution &SE, const SmallPtrSetImpl<const Value *> &EphValues,
+ const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
+ // When computing the unrolled size, note that BEInsns are not replicated
+ // like the rest of the loop body.
+ if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+ return FullUnrollTripCount;
+
+ } else {
+ // The loop isn't that small, but we still can fully unroll it if that
+ // helps to remove a significant number of instructions.
+ // To check that, run additional analysis on the loop.
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, FullUnrollTripCount, DT, SE, EphValues, TTI,
+ UP.Threshold * UP.MaxPercentThresholdBoost / 100,
+ UP.MaxIterationsCountToAnalyze)) {
+ unsigned Boost =
+ getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+ if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+ return FullUnrollTripCount;
+ }
+ }
+ }
+ }
+ return None;
+}
+
+static Optional<unsigned>
+shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount,
+ const UnrollCostEstimator UCE,
+ const TargetTransformInfo::UnrollingPreferences &UP) {
+
+ unsigned count = UP.Count;
+ if (TripCount) {
+ if (!UP.Partial) {
+ LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
+ << "-unroll-allow-partial not given\n");
+ count = 0;
+ return count;
+ }
+ if (count == 0)
+ count = TripCount;
+ if (UP.PartialThreshold != NoThreshold) {
+ // Reduce unroll count to be modulo of TripCount for partial unrolling.
+ if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+ (LoopSize - UP.BEInsns);
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+ while (count != 0 && TripCount % count != 0)
+ count--;
+ if (UP.AllowRemainder && count <= 1) {
+ // If there is no Count that is modulo of TripCount, set Count to
+ // largest power-of-two factor that satisfies the threshold limit.
+ // As we'll create fixup loop, do the type of unrolling only if
+ // remainder loop is allowed.
+ count = UP.DefaultUnrollRuntimeCount;
+ while (count != 0 &&
+ UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold)
+ count >>= 1;
+ }
+ if (count < 2) {
+ count = 0;
+ }
+ } else {
+ count = TripCount;
+ }
+ if (count > UP.MaxCount)
+ count = UP.MaxCount;
+
+ LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n");
+
+ return count;
+ }
+
+ // if didn't return until here, should continue to other priorties
+ return None;
+}
// Returns true if unroll count was set explicitly.
// Calculates unroll count and writes it to UP.Count.
// Unless IgnoreUser is true, will also use metadata and command-line options
@@ -770,7 +900,18 @@ bool llvm::computeUnrollCount(
TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
UnrollCostEstimator UCE(*L, LoopSize);
+ Optional<unsigned> UnrollFactor;
+
+ const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
+ const bool PragmaFullUnroll = hasUnrollFullPragma(L);
+ const unsigned PragmaCount = unrollCountPragmaValue(L);
+ const bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
+ const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
+ PragmaEnableUnroll || UserUnrollCount;
+
+ PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount,
+ PragmaEnableUnroll);
// Use an explicit peel count that has been specified for testing. In this
// case it's not permitted to also specify an explicit unroll count.
if (PP.PeelCount) {
@@ -782,47 +923,29 @@ bool llvm::computeUnrollCount(
UP.Runtime = false;
return true;
}
-
// Check for explicit Count.
// 1st priority is unroll count set by "unroll-count" option.
- bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
- if (UserUnrollCount) {
- UP.Count = UnrollCount;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
- return true;
- }
-
// 2nd priority is unroll count set by pragma.
- unsigned PragmaCount = unrollCountPragmaValue(L);
- if (PragmaCount > 0) {
- UP.Count = PragmaCount;
- UP.Runtime = true;
- UP.AllowExpensiveTripCount = true;
- UP.Force = true;
- if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
- UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return true;
- }
- bool PragmaFullUnroll = hasUnrollFullPragma(L);
- if (PragmaFullUnroll && TripCount != 0) {
- UP.Count = TripCount;
- if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
- return false;
- }
+ UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP);
+
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
- bool PragmaEnableUnroll = hasUnrollEnablePragma(L);
- bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
- PragmaEnableUnroll || UserUnrollCount;
-
- if (ExplicitUnroll && TripCount != 0) {
- // If the loop has an unrolling pragma, we want to be more aggressive with
- // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
- // value which is larger than the default limits.
- UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
- UP.PartialThreshold =
- std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ if (UserUnrollCount || (PragmaCount > 0)) {
+ UP.AllowExpensiveTripCount = true;
+ UP.Force = true;
+ }
+ UP.Runtime |= (PragmaCount > 0);
+ return ExplicitUnroll;
+ } else {
+ if (ExplicitUnroll && TripCount != 0) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold
+ // value which is larger than the default limits.
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
}
// 3rd priority is full unroll count.
@@ -852,71 +975,55 @@ bool llvm::computeUnrollCount(
unsigned FullUnrollTripCount =
ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount;
UP.Count = FullUnrollTripCount;
- if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
- // When computing the unrolled size, note that BEInsns are not replicated
- // like the rest of the loop body.
- if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- } else {
- // The loop isn't that small, but we still can fully unroll it if that
- // helps to remove a significant number of instructions.
- // To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
- L, FullUnrollTripCount, DT, SE, EphValues, TTI,
- UP.Threshold * UP.MaxPercentThresholdBoost / 100,
- UP.MaxIterationsCountToAnalyze)) {
- unsigned Boost =
- getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
- if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
- UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
- return ExplicitUnroll;
- }
- }
- }
+
+ UnrollFactor =
+ shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP);
+
+ // if shouldFullUnroll can do the unrolling, some side parameteres should be
+ // set
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
+ TripCount = FullUnrollTripCount;
+ TripMultiple = UP.UpperBound ? 1 : TripMultiple;
+ return ExplicitUnroll;
+ } else {
+ UP.Count = FullUnrollTripCount;
}
// 4th priority is loop peeling.
- computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold);
+ computePeelCount(L, LoopSize, PP, TripCount, DT, SE, UP.Threshold);
if (PP.PeelCount) {
UP.Runtime = false;
UP.Count = 1;
return ExplicitUnroll;
}
+ // Before starting partial unrolling, set up.partial to true,
+ // if user explicitly asked for unrolling
+ if (TripCount)
+ UP.Partial |= ExplicitUnroll;
+
// 5th priority is partial unrolling.
// Try partial unroll only when TripCount could be statically calculated.
- if (TripCount) {
- UP.Partial |= ExplicitUnroll;
- if (!UP.Partial) {
- LLVM_DEBUG(dbgs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- UP.Count = 0;
- return false;
- }
- if (UP.Count == 0)
- UP.Count = TripCount;
+ UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP);
+
+ if (UnrollFactor) {
+ UP.Count = *UnrollFactor;
+
+ if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+ UP.Count != TripCount)
+ ORE->emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE,
+ "FullUnrollAsDirectedTooLarge",
+ L->getStartLoc(), L->getHeader())
+ << "Unable to fully unroll loop as directed by unroll pragma "
+ "because "
+ "unrolled size is too large.";
+ });
+
if (UP.PartialThreshold != NoThreshold) {
- // Reduce unroll count to be modulo of TripCount for partial unrolling.
- if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count =
- (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
- (LoopSize - UP.BEInsns);
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- while (UP.Count != 0 && TripCount % UP.Count != 0)
- UP.Count--;
- if (UP.AllowRemainder && UP.Count <= 1) {
- // If there is no Count that is modulo of TripCount, set Count to
- // largest power-of-two factor that satisfies the threshold limit.
- // As we'll create fixup loop, do the type of unrolling only if
- // remainder loop is allowed.
- UP.Count = UP.DefaultUnrollRuntimeCount;
- while (UP.Count != 0 &&
- UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
- UP.Count >>= 1;
- }
- if (UP.Count < 2) {
+ if (UP.Count == 0) {
if (PragmaEnableUnroll)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
@@ -926,25 +1033,8 @@ bool llvm::computeUnrollCount(
"pragma "
"because unrolled size is too large.";
});
- UP.Count = 0;
}
- } else {
- UP.Count = TripCount;
}
- if (UP.Count > UP.MaxCount)
- UP.Count = UP.MaxCount;
- if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
- UP.Count != TripCount)
- ORE->emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE,
- "FullUnrollAsDirectedTooLarge",
- L->getStartLoc(), L->getHeader())
- << "Unable to fully unroll loop as directed by unroll pragma "
- "because "
- "unrolled size is too large.";
- });
- LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count
- << "\n");
return ExplicitUnroll;
}
assert(TripCount == 0 &&
@@ -981,8 +1071,6 @@ bool llvm::computeUnrollCount(
UP.AllowExpensiveTripCount = true;
}
}
-
- // Reduce count based on the type of unrolling and the threshold values.
UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
if (!UP.Runtime) {
LLVM_DEBUG(
@@ -1017,7 +1105,7 @@ bool llvm::computeUnrollCount(
using namespace ore;
- if (PragmaCount > 0 && !UP.AllowRemainder)
+ if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder)
ORE->emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE,
"DifferentUnrollCountFromDirected",
@@ -1079,7 +1167,7 @@ static LoopUnrollResult tryToUnrollLoop(
bool NotDuplicatable;
bool Convergent;
TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
- L, SE, TTI, BFI, PSI, OptLevel, ProvidedThreshold, ProvidedCount,
+ L, SE, TTI, BFI, PSI, ORE, OptLevel, ProvidedThreshold, ProvidedCount,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedFullUnrollMaxCount);
TargetTransformInfo::PeelingPreferences PP = gatherPeelingPreferences(
@@ -1529,3 +1617,25 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
return getLoopPassPreservedAnalyses();
}
+
+void LoopUnrollPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (UnrollOpts.AllowPartial != None)
+ OS << (UnrollOpts.AllowPartial.getValue() ? "" : "no-") << "partial;";
+ if (UnrollOpts.AllowPeeling != None)
+ OS << (UnrollOpts.AllowPeeling.getValue() ? "" : "no-") << "peeling;";
+ if (UnrollOpts.AllowRuntime != None)
+ OS << (UnrollOpts.AllowRuntime.getValue() ? "" : "no-") << "runtime;";
+ if (UnrollOpts.AllowUpperBound != None)
+ OS << (UnrollOpts.AllowUpperBound.getValue() ? "" : "no-") << "upperbound;";
+ if (UnrollOpts.AllowProfileBasedPeeling != None)
+ OS << (UnrollOpts.AllowProfileBasedPeeling.getValue() ? "" : "no-")
+ << "profile-peeling;";
+ if (UnrollOpts.FullUnrollMaxCount != None)
+ OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
+ OS << "O" << UnrollOpts.OptLevel;
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 9a854ff80246..76bb5497c2c2 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -232,10 +232,8 @@ namespace {
AU.addPreserved<LazyBranchProbabilityInfoPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
if (HasBranchDivergence)
AU.addRequired<LegacyDivergenceAnalysis>();
getLoopAnalysisUsage(AU);
@@ -539,11 +537,8 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
LPM = &LPMRef;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- assert(DT && "Cannot update MemorySSA without a valid DomTree.");
- }
+ MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
CurrentLoop = L;
Function *F = CurrentLoop->getHeader()->getParent();
@@ -551,19 +546,19 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) {
if (SanitizeMemory)
SafetyInfo.computeLoopSafetyInfo(L);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
bool Changed = false;
do {
assert(CurrentLoop->isLCSSAForm(*DT));
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
RedoLoop = false;
Changed |= processCurrentLoop();
} while (RedoLoop);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
return Changed;
@@ -1312,8 +1307,7 @@ void LoopUnswitch::splitExitEdges(
for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) {
BasicBlock *ExitBlock = ExitBlocks[I];
- SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
- pred_end(ExitBlock));
+ SmallVector<BasicBlock *, 4> Preds(predecessors(ExitBlock));
// Although SplitBlockPredecessors doesn't preserve loop-simplify in
// general, if we call it on all predecessors of all exits then it does.
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index bd3001988369..186065db327e 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -55,11 +55,17 @@ static bool replaceConditionalBranchesOnConstant(Instruction *II,
Value *NewValue,
DomTreeUpdater *DTU) {
bool HasDeadBlocks = false;
- SmallSetVector<Instruction *, 8> Worklist;
+ SmallSetVector<Instruction *, 8> UnsimplifiedUsers;
replaceAndRecursivelySimplify(II, NewValue, nullptr, nullptr, nullptr,
- &Worklist);
- for (auto I : Worklist) {
- BranchInst *BI = dyn_cast<BranchInst>(I);
+ &UnsimplifiedUsers);
+ // UnsimplifiedUsers can contain PHI nodes that may be removed when
+ // replacing the branch instructions, so use a value handle worklist
+ // to handle those possibly removed instructions.
+ SmallVector<WeakVH, 8> Worklist(UnsimplifiedUsers.begin(),
+ UnsimplifiedUsers.end());
+
+ for (auto &VH : Worklist) {
+ BranchInst *BI = dyn_cast_or_null<BranchInst>(VH);
if (!BI)
continue;
if (BI->isUnconditional())
diff --git a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index ead8082f3036..1c186e9a0488 100644
--- a/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -357,11 +357,10 @@ static bool lowerExpectIntrinsic(Function &F) {
// Remove llvm.expect intrinsics. Iterate backwards in order
// to process select instructions before the intrinsic gets
// removed.
- for (auto BI = BB.rbegin(), BE = BB.rend(); BI != BE;) {
- Instruction *Inst = &*BI++;
- CallInst *CI = dyn_cast<CallInst>(Inst);
+ for (Instruction &Inst : llvm::make_early_inc_range(llvm::reverse(BB))) {
+ CallInst *CI = dyn_cast<CallInst>(&Inst);
if (!CI) {
- if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(&Inst)) {
if (handleBrSelExpect(*SI))
ExpectIntrinsicsHandled++;
}
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 42c183a6408e..4e4097e13271 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -900,8 +900,7 @@ public:
// UndefedInsts and then check that we in fact remove them.
SmallSet<Instruction *, 16> UndefedInsts;
for (auto *Inst : reverse(ToRemove)) {
- for (auto I = Inst->use_begin(), E = Inst->use_end(); I != E;) {
- Use &U = *I++;
+ for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
if (auto *Undefed = dyn_cast<Instruction>(U.getUser()))
UndefedInsts.insert(Undefed);
U.set(UndefValue::get(Inst->getType()));
@@ -981,8 +980,9 @@ public:
Value *EltPtr = createElementPtr(Ptr, EltTy, Builder);
MatrixTy Result;
for (unsigned I = 0, E = Shape.getNumVectors(); I < E; ++I) {
- Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(I), Stride,
- Shape.getStride(), EltTy, Builder);
+ Value *GEP = computeVectorAddr(
+ EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I),
+ Stride, Shape.getStride(), EltTy, Builder);
Value *Vector = Builder.CreateAlignedLoad(
VecTy, GEP, getAlignForIndex(I, Stride, EltTy, MAlign),
IsVolatile, "col.load");
@@ -1071,9 +1071,11 @@ public:
auto VType = cast<VectorType>(Ty);
Value *EltPtr = createElementPtr(Ptr, VType->getElementType(), Builder);
for (auto Vec : enumerate(StoreVal.vectors())) {
- Value *GEP = computeVectorAddr(EltPtr, Builder.getInt64(Vec.index()),
- Stride, StoreVal.getStride(),
- VType->getElementType(), Builder);
+ Value *GEP = computeVectorAddr(
+ EltPtr,
+ Builder.getIntN(Stride->getType()->getScalarSizeInBits(),
+ Vec.index()),
+ Stride, StoreVal.getStride(), VType->getElementType(), Builder);
Builder.CreateAlignedStore(Vec.value(), GEP,
getAlignForIndex(Vec.index(), Stride,
VType->getElementType(),
@@ -2261,6 +2263,16 @@ PreservedAnalyses LowerMatrixIntrinsicsPass::run(Function &F,
return PreservedAnalyses::all();
}
+void LowerMatrixIntrinsicsPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LowerMatrixIntrinsicsPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (Minimal)
+ OS << "minimal";
+ OS << ">";
+}
+
namespace {
class LowerMatrixIntrinsicsLegacyPass : public FunctionPass {
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2e36c50b75fc..67335a45fb58 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -22,7 +22,6 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -67,9 +66,10 @@ using namespace llvm;
#define DEBUG_TYPE "memcpyopt"
-static cl::opt<bool>
- EnableMemorySSA("enable-memcpyopt-memoryssa", cl::init(true), cl::Hidden,
- cl::desc("Use MemorySSA-backed MemCpyOpt."));
+static cl::opt<bool> EnableMemCpyOptWithoutLibcalls(
+ "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore,
+ cl::desc("Enable memcpyopt even when libcalls are disabled"));
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
@@ -178,9 +178,9 @@ public:
}
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
- int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
-
- addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
+ TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
+ assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
+ addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
SI->getAlign().value(), SI);
}
@@ -282,13 +282,9 @@ private:
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- if (!EnableMemorySSA)
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addPreserved<MemoryDependenceWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- if (EnableMemorySSA)
- AU.addRequired<MemorySSAWrapperPass>();
+ AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
};
@@ -304,7 +300,6 @@ INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
@@ -329,10 +324,7 @@ static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
}
void MemCpyOptPass::eraseInstruction(Instruction *I) {
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- if (MD)
- MD->removeInstruction(I);
+ MSSAU->removeMemoryAccess(I);
I->eraseFromParent();
}
@@ -371,6 +363,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Value *ByteVal) {
const DataLayout &DL = StartInst->getModule()->getDataLayout();
+ // We can't track scalable types
+ if (StoreInst *SI = dyn_cast<StoreInst>(StartInst))
+ if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
+ return nullptr;
+
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
@@ -389,14 +386,12 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// memsets.
MemoryDef *LastMemDef = nullptr;
for (++BI; !BI->isTerminator(); ++BI) {
- if (MSSAU) {
- auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
- MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
- if (CurrentAcc) {
- MemInsertPoint = CurrentAcc;
- if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
- LastMemDef = CurrentDef;
- }
+ auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
+ MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
+ if (CurrentAcc) {
+ MemInsertPoint = CurrentAcc;
+ if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
+ LastMemDef = CurrentDef;
}
// Calls that only access inaccessible memory do not block merging
@@ -426,6 +421,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
break;
+ // We can't track ranges involving scalable types.
+ if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
+ break;
+
// Check to see if this stored value is of the same byte-splattable value.
Value *StoredByte = isBytewiseValue(StoredVal, DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
@@ -494,19 +493,17 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (!Range.TheStores.empty())
AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
- if (MSSAU) {
- assert(LastMemDef && MemInsertPoint &&
- "Both LastMemDef and MemInsertPoint need to be set");
- auto *NewDef =
- cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
- ? MSSAU->createMemoryAccessBefore(
- AMemSet, LastMemDef, MemInsertPoint)
- : MSSAU->createMemoryAccessAfter(
- AMemSet, LastMemDef, MemInsertPoint));
- MSSAU->insertDef(NewDef, /*RenameUses=*/true);
- LastMemDef = NewDef;
- MemInsertPoint = NewDef;
- }
+ assert(LastMemDef && MemInsertPoint &&
+ "Both LastMemDef and MemInsertPoint need to be set");
+ auto *NewDef =
+ cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
+ ? MSSAU->createMemoryAccessBefore(
+ AMemSet, LastMemDef, MemInsertPoint)
+ : MSSAU->createMemoryAccessAfter(
+ AMemSet, LastMemDef, MemInsertPoint));
+ MSSAU->insertDef(NewDef, /*RenameUses=*/true);
+ LastMemDef = NewDef;
+ MemInsertPoint = NewDef;
// Zap all the stores.
for (Instruction *SI : Range.TheStores)
@@ -615,17 +612,15 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
// TODO: Simplify this once P will be determined by MSSA, in which case the
// discrepancy can no longer occur.
MemoryUseOrDef *MemInsertPoint = nullptr;
- if (MSSAU) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
- MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
- } else {
- const Instruction *ConstP = P;
- for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
- ++LI->getReverseIterator())) {
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
- MemInsertPoint = MA;
- break;
- }
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
+ MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
+ } else {
+ const Instruction *ConstP = P;
+ for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
+ ++LI->getReverseIterator())) {
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
+ MemInsertPoint = MA;
+ break;
}
}
}
@@ -634,12 +629,10 @@ bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
for (auto *I : llvm::reverse(ToLift)) {
LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
I->moveBefore(P);
- if (MSSAU) {
- assert(MemInsertPoint && "Must have found insert point");
- if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
- MSSAU->moveAfter(MA, MemInsertPoint);
- MemInsertPoint = MA;
- }
+ assert(MemInsertPoint && "Must have found insert point");
+ if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
+ MSSAU->moveAfter(MA, MemInsertPoint);
+ MemInsertPoint = MA;
}
}
@@ -673,7 +666,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LI->getParent() == SI->getParent()) {
auto *T = LI->getType();
- if (T->isAggregateType()) {
+ // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
+ // the corresponding libcalls are not available.
+ // TODO: We should really distinguish between libcall availability and
+ // our ability to introduce intrinsics.
+ if (T->isAggregateType() &&
+ (EnableMemCpyOptWithoutLibcalls ||
+ (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
MemoryLocation LoadLoc = MemoryLocation::get(LI);
// We use alias analysis to check if an instruction may store to
@@ -703,9 +702,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (P) {
// If we load from memory that may alias the memory we store to,
// memmove must be used to preserve semantic. If not, memcpy can
- // be used.
+ // be used. Also, if we load from constant memory, memcpy can be used
+ // as the constant memory won't be modified.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::get(SI), LoadLoc))
+ if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
UseMemMove = true;
uint64_t Size = DL.getTypeStoreSize(T);
@@ -724,13 +724,10 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
<< *M << "\n");
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(SI);
eraseInstruction(LI);
@@ -746,38 +743,21 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// happen to be using a load-store pair to implement it, rather than
// a memcpy.
CallInst *C = nullptr;
- if (EnableMemorySSA) {
- if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
- MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
- // The load most post-dom the call. Limit to the same block for now.
- // TODO: Support non-local call-slot optimization?
- if (LoadClobber->getBlock() == SI->getParent())
- C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
- }
- } else {
- MemDepResult ldep = MD->getDependency(LI);
- if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
- C = dyn_cast<CallInst>(ldep.getInst());
+ if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
+ MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
+ // The load most post-dom the call. Limit to the same block for now.
+ // TODO: Support non-local call-slot optimization?
+ if (LoadClobber->getBlock() == SI->getParent())
+ C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
}
if (C) {
// Check that nothing touches the dest of the "copy" between
// the call and the store.
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (EnableMemorySSA) {
- if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
- MSSA->getMemoryAccess(SI)))
- C = nullptr;
- } else {
- for (BasicBlock::iterator I = --SI->getIterator(),
- E = C->getIterator();
- I != E; --I) {
- if (isModOrRefSet(AA->getModRefInfo(&*I, StoreLoc))) {
- C = nullptr;
- break;
- }
- }
- }
+ if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
+ MSSA->getMemoryAccess(SI)))
+ C = nullptr;
}
if (C) {
@@ -796,6 +776,13 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
}
}
+ // The following code creates memset intrinsics out of thin air. Don't do
+ // this if the corresponding libfunc is not available.
+ // TODO: We should really distinguish between libcall availability and
+ // our ability to introduce intrinsics.
+ if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
+ return false;
+
// There are two cases that are interesting for this code to handle: memcpy
// and memset. Right now we only handle memset.
@@ -822,13 +809,12 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI)));
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ // The newly inserted memset is immediately overwritten by the original
+ // store, so we do not need to rename uses.
+ auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ M, StoreDef->getDefiningAccess(), StoreDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
eraseInstruction(SI);
NumMemSetInfer++;
@@ -859,7 +845,7 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
- Value *cpySrc, uint64_t cpyLen,
+ Value *cpySrc, TypeSize cpySize,
Align cpyAlign, CallInst *C) {
// The general transformation to keep in mind is
//
@@ -875,6 +861,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// src only holds uninitialized values at the moment of the call, meaning that
// the memcpy can be discarded rather than moved.
+ // We can't optimize scalable types.
+ if (cpySize.isScalable())
+ return false;
+
// Lifetime marks shouldn't be operated on.
if (Function *F = C->getCalledFunction())
if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
@@ -893,13 +883,13 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
- if (cpyLen < srcSize)
+ if (cpySize < srcSize)
return false;
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
- if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
+ if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
DL, C, DT))
return false;
@@ -1020,11 +1010,6 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
}
- // Drop any cached information about the call, because we may have changed
- // its dependence information by changing its parameter.
- if (MD)
- MD->removeInstruction(C);
-
// Update AA metadata
// FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
// handled here, but combineMetadata doesn't support them yet
@@ -1073,28 +1058,19 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
//
// TODO: If the code between M and MDep is transparent to the destination "c",
// then we could still perform the xform by moving M up to the first memcpy.
- if (EnableMemorySSA) {
- // TODO: It would be sufficient to check the MDep source up to the memcpy
- // size of M, rather than MDep.
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep =
- MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
- M->getIterator(), M->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ // TODO: It would be sufficient to check the MDep source up to the memcpy
+ // size of M, rather than MDep.
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
+ return false;
// If the dest of the second might alias the source of the first, then the
- // source and dest might overlap. We still want to eliminate the intermediate
- // value, but we have to generate a memmove instead of memcpy.
+ // source and dest might overlap. In addition, if the source of the first
+ // points to constant memory, they won't overlap by definition. Otherwise, we
+ // still want to eliminate the intermediate value, but we have to generate a
+ // memmove instead of memcpy.
bool UseMemMove = false;
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(MDep)))
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
UseMemMove = true;
// If all checks passed, then we can transform M.
@@ -1121,12 +1097,10 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
MDep->getRawSource(), MDep->getSourceAlign(),
M->getLength(), M->isVolatile());
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
- auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
+ auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
// Remove the instruction we're replacing.
eraseInstruction(M);
@@ -1156,30 +1130,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
// Check that src and dst of the memcpy aren't the same. While memcpy
// operands cannot partially overlap, exact equality is allowed.
- if (!AA->isNoAlias(MemoryLocation(MemCpy->getSource(),
- LocationSize::precise(1)),
- MemoryLocation(MemCpy->getDest(),
- LocationSize::precise(1))))
+ if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
return false;
- if (EnableMemorySSA) {
- // We know that dst up to src_size is not written. We now need to make sure
- // that dst up to dst_size is not accessed. (If we did not move the memset,
- // checking for reads would be sufficient.)
- if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
- MSSA->getMemoryAccess(MemSet),
- MSSA->getMemoryAccess(MemCpy))) {
- return false;
- }
- } else {
- // We have already checked that dst up to src_size is not accessed. We
- // need to make sure that there are no accesses up to dst_size either.
- MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
- MemoryLocation::getForDest(MemSet), false, MemCpy->getIterator(),
- MemCpy->getParent());
- if (DstDepInfo.getInst() != MemSet)
- return false;
- }
+ // We know that dst up to src_size is not written. We now need to make sure
+ // that dst up to dst_size is not accessed. (If we did not move the memset,
+ // checking for reads would be sufficient.)
+ if (accessedBetween(*AA, MemoryLocation::getForDest(MemSet),
+ MSSA->getMemoryAccess(MemSet),
+ MSSA->getMemoryAccess(MemCpy)))
+ return false;
// Use the same i8* dest as the memcpy, killing the memset dest if different.
Value *Dest = MemCpy->getRawDest();
@@ -1229,18 +1189,16 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
SrcSize),
MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
- if (MSSAU) {
- assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
- "MemCpy must be a MemoryDef");
- // The new memset is inserted after the memcpy, but it is known that its
- // defining access is the memset about to be removed which immediately
- // precedes the memcpy.
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessBefore(
- NewMemSet, LastDef->getDefiningAccess(), LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
+ "MemCpy must be a MemoryDef");
+ // The new memset is inserted after the memcpy, but it is known that its
+ // defining access is the memset about to be removed which immediately
+ // precedes the memcpy.
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessBefore(
+ NewMemSet, LastDef->getDefiningAccess(), LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(MemSet);
return true;
@@ -1248,23 +1206,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
/// Determine whether the instruction has undefined content for the given Size,
/// either because it was freshly alloca'd or started its lifetime.
-static bool hasUndefContents(Instruction *I, Value *Size) {
- if (isa<AllocaInst>(I))
- return true;
-
- if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
- if (II->getIntrinsicID() == Intrinsic::lifetime_start)
- if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
- if (LTSize->getZExtValue() >= CSize->getZExtValue())
- return true;
- }
-
- return false;
-}
-
-static bool hasUndefContentsMSSA(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
- MemoryDef *Def, Value *Size) {
+static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
+ MemoryDef *Def, Value *Size) {
if (MSSA->isLiveOnEntryDef(Def))
return isa<AllocaInst>(getUnderlyingObject(V));
@@ -1338,19 +1281,12 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
// easily represent this location, we use the full 0..CopySize range.
MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
bool CanReduceSize = false;
- if (EnableMemorySSA) {
- MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- MemSetAccess->getDefiningAccess(), MemCpyLoc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- if (hasUndefContentsMSSA(MSSA, AA, MemCpy->getSource(), MD, CopySize))
- CanReduceSize = true;
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- MemCpyLoc, true, MemSet->getIterator(), MemSet->getParent());
- if (DepInfo.isDef() && hasUndefContents(DepInfo.getInst(), CopySize))
+ MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ MemSetAccess->getDefiningAccess(), MemCpyLoc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
CanReduceSize = true;
- }
if (!CanReduceSize)
return false;
@@ -1362,12 +1298,10 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
Instruction *NewM =
Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
CopySize, MaybeAlign(MemCpy->getDestAlignment()));
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
- auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
+ auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
return true;
}
@@ -1397,149 +1331,90 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
Instruction *NewM =
Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
MaybeAlign(M->getDestAlignment()), false);
- if (MSSAU) {
- auto *LastDef =
- cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
- auto *NewAccess =
- MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
- MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
- }
+ auto *LastDef =
+ cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
+ auto *NewAccess =
+ MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
+ MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
eraseInstruction(M);
++NumCpyToSet;
return true;
}
- if (EnableMemorySSA) {
- MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
- MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
- MemoryLocation DestLoc = MemoryLocation::getForDest(M);
- const MemoryAccess *DestClobber =
- MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- // The memcpy most post-dom the memset, so limit this to the same basic
- // block. A non-local generalization is likely not worthwhile.
- if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
- if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
- if (DestClobber->getBlock() == M->getParent())
- if (processMemSetMemCpyDependence(M, MDep))
- return true;
-
- MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
- AnyClobber, MemoryLocation::getForSource(M));
-
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
- if (Instruction *MI = MD->getMemoryInst()) {
- if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
- if (auto *C = dyn_cast<CallInst>(MI)) {
- // The memcpy must post-dom the call. Limit to the same block for
- // now. Additionally, we need to ensure that there are no accesses
- // to dest between the call and the memcpy. Accesses to src will be
- // checked by performCallSlotOptzn().
- // TODO: Support non-local call-slot optimization?
- if (C->getParent() == M->getParent() &&
- !accessedBetween(*AA, DestLoc, MD, MA)) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment,
- C)) {
- LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
- << " call: " << *C << "\n"
- << " memcpy: " << *M << "\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- }
- }
- if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep);
- if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
- if (performMemCpyToMemSetOptzn(M, MDep)) {
- LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
- eraseInstruction(M);
- ++NumCpyToSet;
- return true;
- }
- }
- }
-
- if (hasUndefContentsMSSA(MSSA, AA, M->getSource(), MD, M->getLength())) {
- LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
- } else {
- MemDepResult DepInfo = MD->getDependency(M);
-
- // Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- if (DepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
+ MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
+ MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
+ MemoryLocation DestLoc = MemoryLocation::getForDest(M);
+ const MemoryAccess *DestClobber =
+ MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
+
+ // Try to turn a partially redundant memset + memcpy into
+ // memcpy + smaller memset. We don't need the memcpy size for this.
+ // The memcpy most post-dom the memset, so limit this to the same basic
+ // block. A non-local generalization is likely not worthwhile.
+ if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
+ if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
+ if (DestClobber->getBlock() == M->getParent())
if (processMemSetMemCpyDependence(M, MDep))
return true;
- // There are four possible optimizations we can do for memcpy:
- // a) memcpy-memcpy xform which exposes redundance for DSE.
- // b) call-memcpy xform for return slot optimization.
- // c) memcpy from freshly alloca'd space or space that has just started
- // its lifetime copies undefined data, and we can therefore eliminate
- // the memcpy in favor of the data that was already at the destination.
- // d) memcpy from a just-memset'd source can be turned into memset.
- if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
- if (DepInfo.isClobber()) {
- if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
- // FIXME: Can we pass in either of dest/src alignment here instead
- // of conservatively taking the minimum?
- Align Alignment = std::min(M->getDestAlign().valueOrOne(),
- M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
+ MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ AnyClobber, MemoryLocation::getForSource(M));
+
+ // There are four possible optimizations we can do for memcpy:
+ // a) memcpy-memcpy xform which exposes redundance for DSE.
+ // b) call-memcpy xform for return slot optimization.
+ // c) memcpy from freshly alloca'd space or space that has just started
+ // its lifetime copies undefined data, and we can therefore eliminate
+ // the memcpy in favor of the data that was already at the destination.
+ // d) memcpy from a just-memset'd source can be turned into memset.
+ if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
+ if (Instruction *MI = MD->getMemoryInst()) {
+ if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
+ if (auto *C = dyn_cast<CallInst>(MI)) {
+ // The memcpy must post-dom the call. Limit to the same block for
+ // now. Additionally, we need to ensure that there are no accesses
+ // to dest between the call and the memcpy. Accesses to src will be
+ // checked by performCallSlotOptzn().
+ // TODO: Support non-local call-slot optimization?
+ if (C->getParent() == M->getParent() &&
+ !accessedBetween(*AA, DestLoc, MD, MA)) {
+ // FIXME: Can we pass in either of dest/src alignment here instead
+ // of conservatively taking the minimum?
+ Align Alignment = std::min(M->getDestAlign().valueOrOne(),
+ M->getSourceAlign().valueOrOne());
+ if (performCallSlotOptzn(
+ M, M, M->getDest(), M->getSource(),
+ TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+ C)) {
+ LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
+ << " call: " << *C << "\n"
+ << " memcpy: " << *M << "\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
}
}
- }
-
- MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
- MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
- SrcLoc, true, M->getIterator(), M->getParent());
-
- if (SrcDepInfo.isClobber()) {
- if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+ if (auto *MDep = dyn_cast<MemCpyInst>(MI))
return processMemCpyMemCpyDependence(M, MDep);
- } else if (SrcDepInfo.isDef()) {
- if (hasUndefContents(SrcDepInfo.getInst(), M->getLength())) {
- eraseInstruction(M);
- ++NumMemCpyInstr;
- return true;
- }
- }
-
- if (SrcDepInfo.isClobber())
- if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
+ if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
if (performMemCpyToMemSetOptzn(M, MDep)) {
+ LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
eraseInstruction(M);
++NumCpyToSet;
return true;
}
+ }
+ }
+
+ if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
+ LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
}
return false;
@@ -1548,12 +1423,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
/// not to alias.
bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
- if (!TLI->has(LibFunc_memmove))
- return false;
-
- // See if the pointers alias.
- if (!AA->isNoAlias(MemoryLocation::getForDest(M),
- MemoryLocation::getForSource(M)))
+ // See if the source could be modified by this memmove potentially.
+ if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
return false;
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
@@ -1569,11 +1440,6 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
// For MemorySSA nothing really changes (except that memcpy may imply stricter
// aliasing guarantees).
- // MemDep may have over conservative information about this instruction, just
- // conservatively flush it from the cache.
- if (MD)
- MD->removeInstruction(M);
-
++NumMoveToCpy;
return true;
}
@@ -1584,24 +1450,16 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// Find out what feeds this byval argument.
Value *ByValArg = CB.getArgOperand(ArgNo);
Type *ByValTy = CB.getParamByValType(ArgNo);
- uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
+ TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ if (!CallAccess)
+ return false;
MemCpyInst *MDep = nullptr;
- if (EnableMemorySSA) {
- MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
- if (!CallAccess)
- return false;
- MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
- CallAccess->getDefiningAccess(), Loc);
- if (auto *MD = dyn_cast<MemoryDef>(Clobber))
- MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
- } else {
- MemDepResult DepInfo = MD->getPointerDependencyFrom(
- Loc, true, CB.getIterator(), CB.getParent());
- if (!DepInfo.isClobber())
- return false;
- MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- }
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
// If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
// a memcpy, see if we can byval from the source of the memcpy instead of the
@@ -1612,7 +1470,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (!C1 || C1->getValue().getZExtValue() < ByValSize)
+ if (!C1 || !TypeSize::isKnownGE(
+ TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
@@ -1639,19 +1498,9 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// *b = 42;
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
- if (EnableMemorySSA) {
- if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
- return false;
- } else {
- // NOTE: This is conservative, it will stop on any read from the source loc,
- // not just the defining memcpy.
- MemDepResult SourceDep = MD->getPointerDependencyFrom(
- MemoryLocation::getForSource(MDep), false,
- CB.getIterator(), MDep->getParent());
- if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
- return false;
- }
+ if (writtenBetween(MSSA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+ return false;
Value *TmpCast = MDep->getSource();
if (MDep->getSource()->getType() != ByValArg->getType()) {
@@ -1718,47 +1567,33 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
}
PreservedAnalyses MemCpyOptPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto *MD = !EnableMemorySSA ? &AM.getResult<MemoryDependenceAnalysis>(F)
- : AM.getCachedResult<MemoryDependenceAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
auto *AC = &AM.getResult<AssumptionAnalysis>(F);
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *MSSA = EnableMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F)
- : AM.getCachedResult<MemorySSAAnalysis>(F);
+ auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
- bool MadeChange =
- runImpl(F, MD, &TLI, AA, AC, DT, MSSA ? &MSSA->getMSSA() : nullptr);
+ bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
if (!MadeChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
- if (MD)
- PA.preserve<MemoryDependenceAnalysis>();
- if (MSSA)
- PA.preserve<MemorySSAAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
return PA;
}
-bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
- TargetLibraryInfo *TLI_, AliasAnalysis *AA_,
- AssumptionCache *AC_, DominatorTree *DT_,
- MemorySSA *MSSA_) {
+bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
+ AliasAnalysis *AA_, AssumptionCache *AC_,
+ DominatorTree *DT_, MemorySSA *MSSA_) {
bool MadeChange = false;
- MD = MD_;
TLI = TLI_;
AA = AA_;
AC = AC_;
DT = DT_;
MSSA = MSSA_;
MemorySSAUpdater MSSAU_(MSSA_);
- MSSAU = MSSA_ ? &MSSAU_ : nullptr;
- // If we don't have at least memset and memcpy, there is little point of doing
- // anything here. These are required by a freestanding implementation, so if
- // even they are disabled, there is no point in trying hard.
- if (!TLI->has(LibFunc_memset) || !TLI->has(LibFunc_memcpy))
- return false;
+ MSSAU = &MSSAU_;
while (true) {
if (!iterateOnFunction(F))
@@ -1766,10 +1601,9 @@ bool MemCpyOptPass::runImpl(Function &F, MemoryDependenceResults *MD_,
MadeChange = true;
}
- if (MSSA_ && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA_->verifyMemorySSA();
- MD = nullptr;
return MadeChange;
}
@@ -1778,17 +1612,11 @@ bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- auto *MDWP = !EnableMemorySSA
- ? &getAnalysis<MemoryDependenceWrapperPass>()
- : getAnalysisIfAvailable<MemoryDependenceWrapperPass>();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *MSSAWP = EnableMemorySSA
- ? &getAnalysis<MemorySSAWrapperPass>()
- : getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- return Impl.runImpl(F, MDWP ? & MDWP->getMemDep() : nullptr, TLI, AA, AC, DT,
- MSSAWP ? &MSSAWP->getMSSA() : nullptr);
+ return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
}
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index f13f24ad2027..aac0deea5be3 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -66,15 +66,6 @@ namespace {
#define DEBUG_TYPE "mergeicmps"
-// Returns true if the instruction is a simple load or a simple store
-static bool isSimpleLoadOrStore(const Instruction *I) {
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- return LI->isSimple();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I))
- return SI->isSimple();
- return false;
-}
-
// A BCE atom "Binary Compare Expression Atom" represents an integer load
// that is a constant offset from a base value, e.g. `a` or `o.c` in the example
// at the top.
@@ -154,6 +145,10 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
return {};
}
Value *const Addr = LoadI->getOperand(0);
+ if (Addr->getType()->getPointerAddressSpace() != 0) {
+ LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
+ return {};
+ }
auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
if (!GEP)
return {};
@@ -234,6 +229,8 @@ class BCECmpBlock {
InstructionSet BlockInsts;
// The block requires splitting.
bool RequireSplit = false;
+ // Original order of this block in the chain.
+ unsigned OrigOrder = 0;
private:
BCECmp Cmp;
@@ -244,14 +241,13 @@ bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
// If this instruction may clobber the loads and is in middle of the BCE cmp
// block instructions, then bail for now.
if (Inst->mayWriteToMemory()) {
- // Bail if this is not a simple load or store
- if (!isSimpleLoadOrStore(Inst))
- return false;
- // Disallow stores that might alias the BCE operands
- MemoryLocation LLoc = MemoryLocation::get(Cmp.Lhs.LoadI);
- MemoryLocation RLoc = MemoryLocation::get(Cmp.Rhs.LoadI);
- if (isModSet(AA.getModRefInfo(Inst, LLoc)) ||
- isModSet(AA.getModRefInfo(Inst, RLoc)))
+ auto MayClobber = [&](LoadInst *LI) {
+ // If a potentially clobbering instruction comes before the load,
+ // we can still safely sink the load.
+ return !Inst->comesBefore(LI) &&
+ isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
+ };
+ if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
return false;
}
// Make sure this instruction does not use any of the BCE cmp block
@@ -386,39 +382,83 @@ static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
<< Comparison.Rhs().BaseId << " + "
<< Comparison.Rhs().Offset << "\n");
LLVM_DEBUG(dbgs() << "\n");
+ Comparison.OrigOrder = Comparisons.size();
Comparisons.push_back(std::move(Comparison));
}
// A chain of comparisons.
class BCECmpChain {
- public:
- BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
- AliasAnalysis &AA);
-
- int size() const { return Comparisons_.size(); }
+public:
+ using ContiguousBlocks = std::vector<BCECmpBlock>;
-#ifdef MERGEICMPS_DOT_ON
- void dump() const;
-#endif // MERGEICMPS_DOT_ON
+ BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
+ AliasAnalysis &AA);
bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
DomTreeUpdater &DTU);
-private:
- static bool IsContiguous(const BCECmpBlock &First,
- const BCECmpBlock &Second) {
- return First.Lhs().BaseId == Second.Lhs().BaseId &&
- First.Rhs().BaseId == Second.Rhs().BaseId &&
- First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
- First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+ bool atLeastOneMerged() const {
+ return any_of(MergedBlocks_,
+ [](const auto &Blocks) { return Blocks.size() > 1; });
}
+private:
PHINode &Phi_;
- std::vector<BCECmpBlock> Comparisons_;
+ // The list of all blocks in the chain, grouped by contiguity.
+ std::vector<ContiguousBlocks> MergedBlocks_;
// The original entry block (before sorting);
BasicBlock *EntryBlock_;
};
+static bool areContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) {
+ return First.Lhs().BaseId == Second.Lhs().BaseId &&
+ First.Rhs().BaseId == Second.Rhs().BaseId &&
+ First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
+ First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
+}
+
+static unsigned getMinOrigOrder(const BCECmpChain::ContiguousBlocks &Blocks) {
+ unsigned MinOrigOrder = std::numeric_limits<unsigned>::max();
+ for (const BCECmpBlock &Block : Blocks)
+ MinOrigOrder = std::min(MinOrigOrder, Block.OrigOrder);
+ return MinOrigOrder;
+}
+
+/// Given a chain of comparison blocks, groups the blocks into contiguous
+/// ranges that can be merged together into a single comparison.
+static std::vector<BCECmpChain::ContiguousBlocks>
+mergeBlocks(std::vector<BCECmpBlock> &&Blocks) {
+ std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks;
+
+ // Sort to detect continuous offsets.
+ llvm::sort(Blocks,
+ [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
+ return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
+ std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
+ });
+
+ BCECmpChain::ContiguousBlocks *LastMergedBlock = nullptr;
+ for (BCECmpBlock &Block : Blocks) {
+ if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) {
+ MergedBlocks.emplace_back();
+ LastMergedBlock = &MergedBlocks.back();
+ } else {
+ LLVM_DEBUG(dbgs() << "Merging block " << Block.BB->getName() << " into "
+ << LastMergedBlock->back().BB->getName() << "\n");
+ }
+ LastMergedBlock->push_back(std::move(Block));
+ }
+
+ // While we allow reordering for merging, do not reorder unmerged comparisons.
+ // Doing so may introduce branch on poison.
+ llvm::sort(MergedBlocks, [](const BCECmpChain::ContiguousBlocks &LhsBlocks,
+ const BCECmpChain::ContiguousBlocks &RhsBlocks) {
+ return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks);
+ });
+
+ return MergedBlocks;
+}
+
BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
AliasAnalysis &AA)
: Phi_(Phi) {
@@ -498,47 +538,9 @@ BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
return;
}
EntryBlock_ = Comparisons[0].BB;
- Comparisons_ = std::move(Comparisons);
-#ifdef MERGEICMPS_DOT_ON
- errs() << "BEFORE REORDERING:\n\n";
- dump();
-#endif // MERGEICMPS_DOT_ON
- // Reorder blocks by LHS. We can do that without changing the
- // semantics because we are only accessing dereferencable memory.
- llvm::sort(Comparisons_,
- [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
- return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
- std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
- });
-#ifdef MERGEICMPS_DOT_ON
- errs() << "AFTER REORDERING:\n\n";
- dump();
-#endif // MERGEICMPS_DOT_ON
+ MergedBlocks_ = mergeBlocks(std::move(Comparisons));
}
-#ifdef MERGEICMPS_DOT_ON
-void BCECmpChain::dump() const {
- errs() << "digraph dag {\n";
- errs() << " graph [bgcolor=transparent];\n";
- errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
- errs() << " edge [color=black];\n";
- for (size_t I = 0; I < Comparisons_.size(); ++I) {
- const auto &Comparison = Comparisons_[I];
- errs() << " \"" << I << "\" [label=\"%"
- << Comparison.Lhs().Base()->getName() << " + "
- << Comparison.Lhs().Offset << " == %"
- << Comparison.Rhs().Base()->getName() << " + "
- << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
- << " bytes)\"];\n";
- const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
- if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
- errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
- }
- errs() << " \"Phi\" [label=\"Phi\"];\n";
- errs() << "}\n\n";
-}
-#endif // MERGEICMPS_DOT_ON
-
namespace {
// A class to compute the name of a set of merged basic blocks.
@@ -661,47 +663,18 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
DomTreeUpdater &DTU) {
- assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
- // First pass to check if there is at least one merge. If not, we don't do
- // anything and we keep analysis passes intact.
- const auto AtLeastOneMerged = [this]() {
- for (size_t I = 1; I < Comparisons_.size(); ++I) {
- if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
- return true;
- }
- return false;
- };
- if (!AtLeastOneMerged())
- return false;
-
+ assert(atLeastOneMerged() && "simplifying trivial BCECmpChain");
LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
<< EntryBlock_->getName() << "\n");
// Effectively merge blocks. We go in the reverse direction from the phi block
// so that the next block is always available to branch to.
- const auto mergeRange = [this, &TLI, &AA, &DTU](int I, int Num,
- BasicBlock *InsertBefore,
- BasicBlock *Next) {
- return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
- InsertBefore, Next, Phi_, TLI, AA, DTU);
- };
- int NumMerged = 1;
+ BasicBlock *InsertBefore = EntryBlock_;
BasicBlock *NextCmpBlock = Phi_.getParent();
- for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
- if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
- LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
- << " into " << Comparisons_[I + 1].BB->getName()
- << "\n");
- ++NumMerged;
- } else {
- NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
- NumMerged = 1;
- }
+ for (const auto &Blocks : reverse(MergedBlocks_)) {
+ InsertBefore = NextCmpBlock = mergeComparisons(
+ Blocks, InsertBefore, NextCmpBlock, Phi_, TLI, AA, DTU);
}
- // Insert the entry block for the new chain before the old entry block.
- // If the old entry block was the function entry, this ensures that the new
- // entry can become the function entry.
- NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
// Replace the original cmp chain with the new cmp chain by pointing all
// predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
@@ -729,13 +702,16 @@ bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
// Delete merged blocks. This also removes incoming values in phi.
SmallVector<BasicBlock *, 16> DeadBlocks;
- for (auto &Cmp : Comparisons_) {
- LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
- DeadBlocks.push_back(Cmp.BB);
+ for (const auto &Blocks : MergedBlocks_) {
+ for (const BCECmpBlock &Block : Blocks) {
+ LLVM_DEBUG(dbgs() << "Deleting merged block " << Block.BB->getName()
+ << "\n");
+ DeadBlocks.push_back(Block.BB);
+ }
}
DeleteDeadBlocks(DeadBlocks, &DTU);
- Comparisons_.clear();
+ MergedBlocks_.clear();
return true;
}
@@ -835,8 +811,8 @@ bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
if (Blocks.empty()) return false;
BCECmpChain CmpChain(Blocks, Phi, AA);
- if (CmpChain.size() < 2) {
- LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
+ if (!CmpChain.atLeastOneMerged()) {
+ LLVM_DEBUG(dbgs() << "skip: nothing merged\n");
return false;
}
@@ -862,9 +838,9 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
bool MadeChange = false;
- for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
+ for (BasicBlock &BB : llvm::drop_begin(F)) {
// A Phi operation is always first in a basic block.
- if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
+ if (auto *const Phi = dyn_cast<PHINode>(&*BB.begin()))
MadeChange |= processPhi(*Phi, TLI, AA, DTU);
}
diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 033fc168a67f..734532a6670c 100644
--- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -420,3 +420,12 @@ MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void MergedLoadStoreMotionPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index ded5caf53b5a..6dca30d9876e 100644
--- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -282,8 +282,12 @@ NaryReassociatePass::matchAndReassociateMinOrMax(Instruction *I,
m_Value(LHS), m_Value(RHS));
if (match(I, MinMaxMatcher)) {
OrigSCEV = SE->getSCEV(I);
- return dyn_cast_or_null<Instruction>(
- tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS));
+ if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+ tryReassociateMinOrMax(I, MinMaxMatcher, LHS, RHS)))
+ return NewMinMax;
+ if (auto *NewMinMax = dyn_cast_or_null<Instruction>(
+ tryReassociateMinOrMax(I, MinMaxMatcher, RHS, LHS)))
+ return NewMinMax;
}
return nullptr;
}
@@ -596,58 +600,60 @@ Value *NaryReassociatePass::tryReassociateMinOrMax(Instruction *I,
Value *LHS, Value *RHS) {
Value *A = nullptr, *B = nullptr;
MaxMinT m_MaxMin(m_Value(A), m_Value(B));
- for (unsigned int i = 0; i < 2; ++i) {
- if (!LHS->hasNUsesOrMore(3) && match(LHS, m_MaxMin)) {
- const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
- const SCEV *RHSExpr = SE->getSCEV(RHS);
- for (unsigned int j = 0; j < 2; ++j) {
- if (j == 0) {
- if (BExpr == RHSExpr)
- continue;
- // Transform 'I = (A op B) op RHS' to 'I = (A op RHS) op B' on the
- // first iteration.
- std::swap(BExpr, RHSExpr);
- } else {
- if (AExpr == RHSExpr)
- continue;
- // Transform 'I = (A op RHS) op B' 'I = (B op RHS) op A' on the second
- // iteration.
- std::swap(AExpr, RHSExpr);
- }
-
- // The optimization is profitable only if LHS can be removed in the end.
- // In other words LHS should be used (directly or indirectly) by I only.
- if (llvm::any_of(LHS->users(), [&](auto *U) {
- return U != I && !(U->hasOneUser() && *U->users().begin() == I);
- }))
- continue;
-
- SCEVExpander Expander(*SE, *DL, "nary-reassociate");
- SmallVector<const SCEV *, 2> Ops1{ BExpr, AExpr };
- const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
- const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
-
- Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
-
- if (!R1MinMax)
- continue;
-
- LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax
- << "\n");
-
- R1Expr = SE->getUnknown(R1MinMax);
- SmallVector<const SCEV *, 2> Ops2{ RHSExpr, R1Expr };
- const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
-
- Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
- NewMinMax->setName(Twine(I->getName()).concat(".nary"));
-
- LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
- << "NARY: Inserting: " << *NewMinMax << "\n");
- return NewMinMax;
- }
- }
- std::swap(LHS, RHS);
+
+ if (LHS->hasNUsesOrMore(3) ||
+ // The optimization is profitable only if LHS can be removed in the end.
+ // In other words LHS should be used (directly or indirectly) by I only.
+ llvm::any_of(LHS->users(),
+ [&](auto *U) {
+ return U != I &&
+ !(U->hasOneUser() && *U->users().begin() == I);
+ }) ||
+ !match(LHS, m_MaxMin))
+ return nullptr;
+
+ auto tryCombination = [&](Value *A, const SCEV *AExpr, Value *B,
+ const SCEV *BExpr, Value *C,
+ const SCEV *CExpr) -> Value * {
+ SmallVector<const SCEV *, 2> Ops1{BExpr, AExpr};
+ const SCEVTypes SCEVType = convertToSCEVype(m_MaxMin);
+ const SCEV *R1Expr = SE->getMinMaxExpr(SCEVType, Ops1);
+
+ Instruction *R1MinMax = findClosestMatchingDominator(R1Expr, I);
+
+ if (!R1MinMax)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "NARY: Found common sub-expr: " << *R1MinMax << "\n");
+
+ SmallVector<const SCEV *, 2> Ops2{SE->getUnknown(C),
+ SE->getUnknown(R1MinMax)};
+ const SCEV *R2Expr = SE->getMinMaxExpr(SCEVType, Ops2);
+
+ SCEVExpander Expander(*SE, *DL, "nary-reassociate");
+ Value *NewMinMax = Expander.expandCodeFor(R2Expr, I->getType(), I);
+ NewMinMax->setName(Twine(I->getName()).concat(".nary"));
+
+ LLVM_DEBUG(dbgs() << "NARY: Deleting: " << *I << "\n"
+ << "NARY: Inserting: " << *NewMinMax << "\n");
+ return NewMinMax;
+ };
+
+ const SCEV *AExpr = SE->getSCEV(A);
+ const SCEV *BExpr = SE->getSCEV(B);
+ const SCEV *RHSExpr = SE->getSCEV(RHS);
+
+ if (BExpr != RHSExpr) {
+ // Try (A op RHS) op B
+ if (auto *NewMinMax = tryCombination(A, AExpr, RHS, RHSExpr, B, BExpr))
+ return NewMinMax;
+ }
+
+ if (AExpr != RHSExpr) {
+ // Try (RHS op B) op A
+ if (auto *NewMinMax = tryCombination(RHS, RHSExpr, B, BExpr, A, AExpr))
+ return NewMinMax;
}
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index a137d13c6ea0..91215cd19e2b 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1194,9 +1194,10 @@ NewGVN::ExprResult NewGVN::createExpression(Instruction *I) const {
SimplifyCastInst(CI->getOpcode(), E->getOperand(0), CI->getType(), SQ);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
- } else if (isa<GetElementPtrInst>(I)) {
- Value *V = SimplifyGEPInst(
- E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);
+ } else if (auto *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+ Value *V = SimplifyGEPInst(GEPI->getSourceElementType(),
+ ArrayRef<Value *>(E->op_begin(), E->op_end()),
+ GEPI->isInBounds(), SQ);
if (auto Simplified = checkExprResults(E, I, V))
return Simplified;
} else if (AllConstant) {
@@ -1818,7 +1819,7 @@ NewGVN::ExprResult NewGVN::performSymbolicCmpEvaluation(Instruction *I) const {
// See if we know something about the comparison itself, like it is the target
// of an assume.
auto *CmpPI = PredInfo->getPredicateInfoFor(I);
- if (dyn_cast_or_null<PredicateAssume>(CmpPI))
+ if (isa_and_nonnull<PredicateAssume>(CmpPI))
return ExprResult::some(
createConstantExpression(ConstantInt::getTrue(CI->getType())));
@@ -3606,7 +3607,7 @@ void NewGVN::convertClassToDFSOrdered(
// Skip uses in unreachable blocks, as we're going
// to delete them.
- if (ReachableBlocks.count(IBlock) == 0)
+ if (!ReachableBlocks.contains(IBlock))
continue;
DomTreeNode *DomNode = DT->getNode(IBlock);
diff --git a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 7872c553b412..44027ccd92ca 100644
--- a/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -82,7 +82,7 @@ static bool optimizeSQRT(CallInst *Call, Function *CalledFunc,
// Add attribute "readnone" so that backend can use a native sqrt instruction
// for this call.
- Call->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
+ Call->addFnAttr(Attribute::ReadNone);
// Insert a FP compare instruction and use it as the CurrBB branch condition.
Builder.SetInsertPoint(CurrBBTerm);
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 888edc4d69a8..b0fb8daaba8f 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -140,7 +140,7 @@ XorOpnd::XorOpnd(Value *V) {
// view the operand as "V | 0"
SymbolicPart = V;
- ConstPart = APInt::getNullValue(V->getType()->getScalarSizeInBits());
+ ConstPart = APInt::getZero(V->getType()->getScalarSizeInBits());
isOr = true;
}
@@ -1279,10 +1279,10 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
/// be returned.
static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
const APInt &ConstOpnd) {
- if (ConstOpnd.isNullValue())
+ if (ConstOpnd.isZero())
return nullptr;
- if (ConstOpnd.isAllOnesValue())
+ if (ConstOpnd.isAllOnes())
return Opnd;
Instruction *I = BinaryOperator::CreateAnd(
@@ -1304,7 +1304,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
// = ((x | c1) ^ c1) ^ (c1 ^ c2)
// = (x & ~c1) ^ (c1 ^ c2)
// It is useful only when c1 == c2.
- if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isNullValue())
+ if (!Opnd1->isOrExpr() || Opnd1->getConstPart().isZero())
return false;
if (!Opnd1->getValue()->hasOneUse())
@@ -1361,7 +1361,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3((~C1) ^ C2);
// Do not increase code size!
- if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ if (!C3.isZero() && !C3.isAllOnes()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
@@ -1377,7 +1377,7 @@ bool ReassociatePass::CombineXorOpnd(Instruction *I, XorOpnd *Opnd1,
APInt C3 = C1 ^ C2;
// Do not increase code size
- if (!C3.isNullValue() && !C3.isAllOnesValue()) {
+ if (!C3.isZero() && !C3.isAllOnes()) {
int NewInstNum = ConstOpnd.getBoolValue() ? 1 : 2;
if (NewInstNum > DeadInstNum)
return false;
@@ -1468,8 +1468,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
Value *CV;
// Step 3.1: Try simplifying "CurrOpnd ^ ConstOpnd"
- if (!ConstOpnd.isNullValue() &&
- CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
+ if (!ConstOpnd.isZero() && CombineXorOpnd(I, CurrOpnd, ConstOpnd, CV)) {
Changed = true;
if (CV)
*CurrOpnd = XorOpnd(CV);
@@ -1510,7 +1509,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
ValueEntry VE(getRank(O.getValue()), O.getValue());
Ops.push_back(VE);
}
- if (!ConstOpnd.isNullValue()) {
+ if (!ConstOpnd.isZero()) {
Value *C = ConstantInt::get(Ty, ConstOpnd);
ValueEntry VE(getRank(C), C);
Ops.push_back(VE);
@@ -1519,7 +1518,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
if (Sz == 1)
return Ops.back().Op;
if (Sz == 0) {
- assert(ConstOpnd.isNullValue());
+ assert(ConstOpnd.isZero());
return ConstantInt::get(Ty, ConstOpnd);
}
}
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index bc0fecc972fc..2d3490b2d29e 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -755,7 +755,7 @@ public:
}
bool operator==(const BDVState &Other) const {
- return OriginalValue == OriginalValue && BaseValue == Other.BaseValue &&
+ return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
Status == Other.Status;
}
@@ -910,7 +910,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
VerifyStates();
LLVM_DEBUG(dbgs() << "States after initialization:\n");
- for (auto Pair : States) {
+ for (const auto &Pair : States) {
LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
#endif
@@ -1002,7 +1002,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
#ifndef NDEBUG
VerifyStates();
LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
- for (auto Pair : States) {
+ for (const auto &Pair : States) {
LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
}
#endif
@@ -1163,7 +1163,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
// llvm::Value of the correct type (and still remain pure).
// This will remove the need to add bitcasts.
assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
- "Sanity -- findBaseOrBDV should be pure!");
+ "findBaseOrBDV should be pure!");
#endif
}
Value *Base = BlockToValue[InBB];
@@ -1377,11 +1377,11 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
return AL;
// Remove the readonly, readnone, and statepoint function attributes.
- AttrBuilder FnAttrs = AL.getFnAttributes();
+ AttrBuilder FnAttrs = AL.getFnAttrs();
for (auto Attr : FnAttrsToStrip)
FnAttrs.removeAttribute(Attr);
- for (Attribute A : AL.getFnAttributes()) {
+ for (Attribute A : AL.getFnAttrs()) {
if (isStatepointDirectiveAttr(A))
FnAttrs.remove(A);
}
@@ -1533,9 +1533,8 @@ static StringRef getDeoptLowering(CallBase *Call) {
// FIXME: Calls have a *really* confusing interface around attributes
// with values.
const AttributeList &CSAS = Call->getAttributes();
- if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
- return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
- .getValueAsString();
+ if (CSAS.hasFnAttr(DeoptLowering))
+ return CSAS.getFnAttr(DeoptLowering).getValueAsString();
Function *F = Call->getCalledFunction();
assert(F && F->hasFnAttribute(DeoptLowering));
return F->getFnAttribute(DeoptLowering).getValueAsString();
@@ -1801,7 +1800,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
GCResult->setAttributes(
AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
- Call->getAttributes().getRetAttributes()));
+ Call->getAttributes().getRetAttrs()));
// We cannot RAUW or delete CS.getInstruction() because it could be in the
// live set of some other safepoint, in which case that safepoint's
@@ -1855,7 +1854,7 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
// It receives iterator to the statepoint gc relocates and emits a store to the
// assigned location (via allocaMap) for the each one of them. It adds the
// visited values into the visitedLiveValues set, which we will later use them
-// for sanity checking.
+// for validation checking.
static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
DenseMap<Value *, AllocaInst *> &AllocaMap,
@@ -2454,7 +2453,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
SmallVectorImpl<CallBase *> &ToUpdate,
DefiningValueMapTy &DVCache) {
#ifndef NDEBUG
- // sanity check the input
+ // Validate the input
std::set<CallBase *> Uniqued;
Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
@@ -2620,9 +2619,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// we just grab that.
llvm::append_range(Live, Info.StatepointToken->gc_args());
#ifndef NDEBUG
- // Do some basic sanity checks on our liveness results before performing
- // relocation. Relocation can and will turn mistakes in liveness results
- // into non-sensical code which is must harder to debug.
+ // Do some basic validation checking on our liveness results before
+ // performing relocation. Relocation can and will turn mistakes in liveness
+ // results into non-sensical code which is must harder to debug.
// TODO: It would be nice to test consistency as well
assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
"statepoint must be reachable or liveness is meaningless");
@@ -2641,7 +2640,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
unique_unsorted(Live);
#ifndef NDEBUG
- // sanity check
+ // Validation check
for (auto *Ptr : Live)
assert(isHandledGCPointerType(Ptr->getType()) &&
"must be a gc pointer type");
@@ -2656,18 +2655,19 @@ template <typename AttrHolder>
static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
unsigned Index) {
AttrBuilder R;
- if (AH.getDereferenceableBytes(Index))
+ AttributeSet AS = AH.getAttributes().getAttributes(Index);
+ if (AS.getDereferenceableBytes())
R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
- AH.getDereferenceableBytes(Index)));
- if (AH.getDereferenceableOrNullBytes(Index))
+ AS.getDereferenceableBytes()));
+ if (AS.getDereferenceableOrNullBytes())
R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
- AH.getDereferenceableOrNullBytes(Index)));
+ AS.getDereferenceableOrNullBytes()));
for (auto Attr : ParamAttrsToStrip)
- if (AH.getAttributes().hasAttribute(Index, Attr))
+ if (AS.hasAttribute(Attr))
R.addAttribute(Attr);
if (!R.empty())
- AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R));
+ AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, R));
}
static void stripNonValidAttributesFromPrototype(Function &F) {
@@ -3016,7 +3016,7 @@ static SetVector<Value *> computeKillSet(BasicBlock *BB) {
#ifndef NDEBUG
/// Check that the items in 'Live' dominate 'TI'. This is used as a basic
-/// sanity check for the liveness computation.
+/// validation check for the liveness computation.
static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
Instruction *TI, bool TermOkay = false) {
for (Value *V : Live) {
@@ -3103,7 +3103,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
} // while (!Worklist.empty())
#ifndef NDEBUG
- // Sanity check our output against SSA properties. This helps catch any
+ // Verify our output against SSA properties. This helps catch any
// missing kills during the above iteration.
for (BasicBlock &BB : F)
checkBasicSSA(DT, Data, BB);
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index b09f896d0157..28e00c873361 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -490,17 +490,17 @@ bool llvm::runIPSCCP(
AttrBuilder AttributesToRemove;
AttributesToRemove.addAttribute(Attribute::ArgMemOnly);
AttributesToRemove.addAttribute(Attribute::InaccessibleMemOrArgMemOnly);
- F.removeAttributes(AttributeList::FunctionIndex, AttributesToRemove);
+ F.removeFnAttrs(AttributesToRemove);
for (User *U : F.users()) {
auto *CB = dyn_cast<CallBase>(U);
if (!CB || CB->getCalledFunction() != &F)
continue;
- CB->removeAttributes(AttributeList::FunctionIndex,
- AttributesToRemove);
+ CB->removeFnAttrs(AttributesToRemove);
}
}
+ MadeChanges |= ReplacedPointerArg;
}
SmallPtrSet<Value *, 32> InsertedValues;
@@ -540,14 +540,13 @@ bool llvm::runIPSCCP(
DTU.deleteBB(DeadBB);
for (BasicBlock &BB : F) {
- for (BasicBlock::iterator BI = BB.begin(), E = BB.end(); BI != E;) {
- Instruction *Inst = &*BI++;
- if (Solver.getPredicateInfoFor(Inst)) {
- if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ for (Instruction &Inst : llvm::make_early_inc_range(BB)) {
+ if (Solver.getPredicateInfoFor(&Inst)) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&Inst)) {
if (II->getIntrinsicID() == Intrinsic::ssa_copy) {
Value *Op = II->getOperand(0);
- Inst->replaceAllUsesWith(Op);
- Inst->eraseFromParent();
+ Inst.replaceAllUsesWith(Op);
+ Inst.eraseFromParent();
}
}
}
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 5ec01454e5b2..31c8999c3724 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -122,7 +122,7 @@ namespace {
class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
std::string Prefix;
- const Twine getNameWithPrefix(const Twine &Name) const {
+ Twine getNameWithPrefix(const Twine &Name) const {
return Name.isTriviallyEmpty() ? Name : Prefix + Name;
}
@@ -1275,8 +1275,7 @@ static void speculatePHINodeLoads(PHINode &PN) {
// Get the AA tags and alignment to use from one of the loads. It does not
// matter which one we get and if any differ.
- AAMDNodes AATags;
- SomeLoad->getAAMetadata(AATags);
+ AAMDNodes AATags = SomeLoad->getAAMetadata();
Align Alignment = SomeLoad->getAlign();
// Rewrite all loads of the PN to use the new PHI.
@@ -1330,14 +1329,21 @@ static void speculatePHINodeLoads(PHINode &PN) {
/// %V = select i1 %cond, i32 %V1, i32 %V2
///
/// We can do this to a select if its only uses are loads and if the operand
-/// to the select can be loaded unconditionally.
+/// to the select can be loaded unconditionally. If found an intervening bitcast
+/// with a single use of the load, allow the promotion.
static bool isSafeSelectToSpeculate(SelectInst &SI) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
const DataLayout &DL = SI.getModule()->getDataLayout();
for (User *U : SI.users()) {
- LoadInst *LI = dyn_cast<LoadInst>(U);
+ LoadInst *LI;
+ BitCastInst *BC = dyn_cast<BitCastInst>(U);
+ if (BC && BC->hasOneUse())
+ LI = dyn_cast<LoadInst>(*BC->user_begin());
+ else
+ LI = dyn_cast<LoadInst>(U);
+
if (!LI || !LI->isSimple())
return false;
@@ -1363,13 +1369,27 @@ static void speculateSelectInstLoads(SelectInst &SI) {
Value *FV = SI.getFalseValue();
// Replace the loads of the select with a select of two loads.
while (!SI.use_empty()) {
- LoadInst *LI = cast<LoadInst>(SI.user_back());
+ LoadInst *LI;
+ BitCastInst *BC = dyn_cast<BitCastInst>(SI.user_back());
+ if (BC) {
+ assert(BC->hasOneUse() && "Bitcast should have a single use.");
+ LI = cast<LoadInst>(BC->user_back());
+ } else {
+ LI = cast<LoadInst>(SI.user_back());
+ }
+
assert(LI->isSimple() && "We only speculate simple loads");
IRB.SetInsertPoint(LI);
- LoadInst *TL = IRB.CreateLoad(LI->getType(), TV,
+ Value *NewTV =
+ BC ? IRB.CreateBitCast(TV, BC->getType(), TV->getName() + ".sroa.cast")
+ : TV;
+ Value *NewFV =
+ BC ? IRB.CreateBitCast(FV, BC->getType(), FV->getName() + ".sroa.cast")
+ : FV;
+ LoadInst *TL = IRB.CreateLoad(LI->getType(), NewTV,
LI->getName() + ".sroa.speculate.load.true");
- LoadInst *FL = IRB.CreateLoad(LI->getType(), FV,
+ LoadInst *FL = IRB.CreateLoad(LI->getType(), NewFV,
LI->getName() + ".sroa.speculate.load.false");
NumLoadsSpeculated += 2;
@@ -1377,8 +1397,7 @@ static void speculateSelectInstLoads(SelectInst &SI) {
TL->setAlignment(LI->getAlign());
FL->setAlignment(LI->getAlign());
- AAMDNodes Tags;
- LI->getAAMetadata(Tags);
+ AAMDNodes Tags = LI->getAAMetadata();
if (Tags) {
TL->setAAMetadata(Tags);
FL->setAAMetadata(Tags);
@@ -1390,6 +1409,8 @@ static void speculateSelectInstLoads(SelectInst &SI) {
LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
LI->replaceAllUsesWith(V);
LI->eraseFromParent();
+ if (BC)
+ BC->eraseFromParent();
}
SI.eraseFromParent();
}
@@ -1462,76 +1483,6 @@ static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
return buildGEP(IRB, BasePtr, Indices, NamePrefix);
}
-/// Recursively compute indices for a natural GEP.
-///
-/// This is the recursive step for getNaturalGEPWithOffset that walks down the
-/// element types adding appropriate indices for the GEP.
-static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
- Value *Ptr, Type *Ty, APInt &Offset,
- Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
- if (Offset == 0)
- return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
- NamePrefix);
-
- // We can't recurse through pointer types.
- if (Ty->isPointerTy())
- return nullptr;
-
- // We try to analyze GEPs over vectors here, but note that these GEPs are
- // extremely poorly defined currently. The long-term goal is to remove GEPing
- // over a vector from the IR completely.
- if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
- unsigned ElementSizeInBits =
- DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize();
- if (ElementSizeInBits % 8 != 0) {
- // GEPs over non-multiple of 8 size vector elements are invalid.
- return nullptr;
- }
- APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
- if (NumSkippedElements.ugt(cast<FixedVectorType>(VecTy)->getNumElements()))
- return nullptr;
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
- Offset, TargetTy, Indices, NamePrefix);
- }
-
- if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
- Type *ElementTy = ArrTy->getElementType();
- APInt ElementSize(Offset.getBitWidth(),
- DL.getTypeAllocSize(ElementTy).getFixedSize());
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
- if (NumSkippedElements.ugt(ArrTy->getNumElements()))
- return nullptr;
-
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
- }
-
- StructType *STy = dyn_cast<StructType>(Ty);
- if (!STy)
- return nullptr;
-
- const StructLayout *SL = DL.getStructLayout(STy);
- uint64_t StructOffset = Offset.getZExtValue();
- if (StructOffset >= SL->getSizeInBytes())
- return nullptr;
- unsigned Index = SL->getElementContainingOffset(StructOffset);
- Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
- Type *ElementTy = STy->getElementType(Index);
- if (Offset.uge(DL.getTypeAllocSize(ElementTy).getFixedSize()))
- return nullptr; // The offset points into alignment padding.
-
- Indices.push_back(IRB.getInt32(Index));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
-}
-
/// Get a natural GEP from a base pointer to a particular offset and
/// resulting in a particular type.
///
@@ -1556,18 +1507,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
return nullptr; // We can't GEP through an unsized element.
- if (isa<ScalableVectorType>(ElementTy))
+
+ SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
+ if (Offset != 0)
return nullptr;
- APInt ElementSize(Offset.getBitWidth(),
- DL.getTypeAllocSize(ElementTy).getFixedSize());
- if (ElementSize == 0)
- return nullptr; // Zero-length arrays can't help us build a natural GEP.
- APInt NumSkippedElements = Offset.sdiv(ElementSize);
-
- Offset -= NumSkippedElements * ElementSize;
- Indices.push_back(IRB.getInt(NumSkippedElements));
- return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
- Indices, NamePrefix);
+
+ for (const APInt &Index : IntIndices)
+ Indices.push_back(IRB.getInt(Index));
+ return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
+ NamePrefix);
}
/// Compute an adjusted pointer from Ptr by Offset bytes where the
@@ -1588,6 +1536,15 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
APInt Offset, Type *PointerTy,
const Twine &NamePrefix) {
+ // Create i8 GEP for opaque pointers.
+ if (Ptr->getType()->isOpaquePointerTy()) {
+ if (Offset != 0)
+ Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
+ NamePrefix + "sroa_idx");
+ return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
+ NamePrefix + "sroa_cast");
+ }
+
// Even though we don't look through PHI nodes, we could be called on an
// instruction in an unreachable block, which may be on a cycle.
SmallPtrSet<Value *, 4> Visited;
@@ -1851,13 +1808,13 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
} else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
return false;
- } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
- // Disable vector promotion when there are loads or stores of an FCA.
- return false;
} else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
if (LI->isVolatile())
return false;
Type *LTy = LI->getType();
+ // Disable vector promotion when there are loads or stores of an FCA.
+ if (LTy->isStructTy())
+ return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(LTy->isIntegerTy());
LTy = SplitIntTy;
@@ -1868,6 +1825,9 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
if (SI->isVolatile())
return false;
Type *STy = SI->getValueOperand()->getType();
+ // Disable vector promotion when there are loads or stores of an FCA.
+ if (STy->isStructTy())
+ return false;
if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
assert(STy->isIntegerTy());
STy = SplitIntTy;
@@ -2282,7 +2242,7 @@ class llvm::sroa::AllocaSliceRewriter
const DataLayout &DL;
AllocaSlices &AS;
- SROA &Pass;
+ SROAPass &Pass;
AllocaInst &OldAI, &NewAI;
const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
Type *NewAllocaTy;
@@ -2330,7 +2290,7 @@ class llvm::sroa::AllocaSliceRewriter
IRBuilderTy IRB;
public:
- AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
+ AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROAPass &Pass,
AllocaInst &OldAI, AllocaInst &NewAI,
uint64_t NewAllocaBeginOffset,
uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
@@ -2510,8 +2470,7 @@ private:
Value *OldOp = LI.getOperand(0);
assert(OldOp == OldPtr);
- AAMDNodes AATags;
- LI.getAAMetadata(AATags);
+ AAMDNodes AATags = LI.getAAMetadata();
unsigned AS = LI.getPointerAddressSpace();
@@ -2675,9 +2634,7 @@ private:
Value *OldOp = SI.getOperand(1);
assert(OldOp == OldPtr);
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
-
+ AAMDNodes AATags = SI.getAAMetadata();
Value *V = SI.getValueOperand();
// Strip all inbounds GEPs and pointer casts to try to dig out any root
@@ -2743,7 +2700,9 @@ private:
deleteIfTriviallyDead(OldOp);
LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
- return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
+ return NewSI->getPointerOperand() == &NewAI &&
+ NewSI->getValueOperand()->getType() == NewAllocaTy &&
+ !SI.isVolatile();
}
/// Compute an integer value from splatting an i8 across the given
@@ -2784,8 +2743,7 @@ private:
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
assert(II.getRawDest() == OldPtr);
- AAMDNodes AATags;
- II.getAAMetadata(AATags);
+ AAMDNodes AATags = II.getAAMetadata();
// If the memset has a variable size, it cannot be split, just adjust the
// pointer to the new alloca.
@@ -2811,10 +2769,11 @@ private:
if (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)
return false;
+ // Length must be in range for FixedVectorType.
auto *C = cast<ConstantInt>(II.getLength());
- if (C->getBitWidth() > 64)
+ const uint64_t Len = C->getLimitedValue();
+ if (Len > std::numeric_limits<unsigned>::max())
return false;
- const auto Len = C->getZExtValue();
auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
return canConvertValue(DL, SrcTy, AllocaTy) &&
@@ -2912,8 +2871,7 @@ private:
LLVM_DEBUG(dbgs() << " original: " << II << "\n");
- AAMDNodes AATags;
- II.getAAMetadata(AATags);
+ AAMDNodes AATags = II.getAAMetadata();
bool IsDest = &II.getRawDestUse() == OldUse;
assert((IsDest && II.getRawDest() == OldPtr) ||
@@ -3420,9 +3378,7 @@ private:
// We have an aggregate being loaded, split it apart.
LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
- AAMDNodes AATags;
- LI.getAAMetadata(AATags);
- LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags,
+ LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
getAdjustedAlignment(&LI, 0), DL);
Value *V = UndefValue::get(LI.getType());
Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
@@ -3473,9 +3429,7 @@ private:
// We have an aggregate being stored, split it apart.
LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
- AAMDNodes AATags;
- SI.getAAMetadata(AATags);
- StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags,
+ StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(),
getAdjustedAlignment(&SI, 0), DL);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
Visited.erase(&SI);
@@ -3801,7 +3755,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
/// there all along.
///
/// \returns true if any changes are made.
-bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
// Track the loads and stores which are candidates for pre-splitting here, in
@@ -4281,8 +4235,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
/// appropriate new offsets. It also evaluates how successful the rewrite was
/// at enabling promotion and if it was successful queues the alloca to be
/// promoted.
-AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
- Partition &P) {
+AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
+ Partition &P) {
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
@@ -4433,7 +4387,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
/// Walks the slices of an alloca and form partitions based on them,
/// rewriting each of their uses.
-bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
+bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
if (AS.begin() == AS.end())
return false;
@@ -4604,7 +4558,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
/// Clobber a use with undef, deleting the used value if it becomes dead.
-void SROA::clobberUse(Use &U) {
+void SROAPass::clobberUse(Use &U) {
Value *OldV = U;
// Replace the use with an undef value.
U = UndefValue::get(OldV->getType());
@@ -4623,7 +4577,7 @@ void SROA::clobberUse(Use &U) {
/// This analyzes the alloca to ensure we can reason about it, builds
/// the slices of the alloca, and then hands it off to be split and
/// rewritten as needed.
-bool SROA::runOnAlloca(AllocaInst &AI) {
+bool SROAPass::runOnAlloca(AllocaInst &AI) {
LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
++NumAllocasAnalyzed;
@@ -4697,7 +4651,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
///
/// We also record the alloca instructions deleted here so that they aren't
/// subsequently handed to mem2reg to promote.
-bool SROA::deleteDeadInstructions(
+bool SROAPass::deleteDeadInstructions(
SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
bool Changed = false;
while (!DeadInsts.empty()) {
@@ -4736,7 +4690,7 @@ bool SROA::deleteDeadInstructions(
/// This attempts to promote whatever allocas have been identified as viable in
/// the PromotableAllocas list. If that list is empty, there is nothing to do.
/// This function returns whether any promotion occurred.
-bool SROA::promoteAllocas(Function &F) {
+bool SROAPass::promoteAllocas(Function &F) {
if (PromotableAllocas.empty())
return false;
@@ -4748,8 +4702,8 @@ bool SROA::promoteAllocas(Function &F) {
return true;
}
-PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
- AssumptionCache &RunAC) {
+PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
+ AssumptionCache &RunAC) {
LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
C = &F.getContext();
DT = &RunDT;
@@ -4803,7 +4757,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
return PA;
}
-PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
AM.getResult<AssumptionAnalysis>(F));
}
@@ -4814,7 +4768,7 @@ PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
/// SROA pass.
class llvm::sroa::SROALegacyPass : public FunctionPass {
/// The SROA implementation.
- SROA Impl;
+ SROAPass Impl;
public:
static char ID;
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index ca288a533f46..1284bae820a4 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -873,13 +873,11 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
auto &DL = F.getParent()->getDataLayout();
while (MadeChange) {
MadeChange = false;
- for (Function::iterator I = F.begin(); I != F.end();) {
- BasicBlock *BB = &*I++;
+ for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
bool ModifiedDTOnIteration = false;
- MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration, TTI, DL,
+ MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
DTU.hasValue() ? DTU.getPointer() : nullptr);
-
// Restart BB iteration if the dominator tree of the Function was changed
if (ModifiedDTOnIteration)
break;
@@ -933,7 +931,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
if (II) {
// The scalarization code below does not work for scalable vectors.
if (isa<ScalableVectorType>(II->getType()) ||
- any_of(II->arg_operands(),
+ any_of(II->args(),
[](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
return false;
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 8ef6b69673be..6b7419abe1d1 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -66,6 +66,15 @@ static cl::opt<bool>
namespace {
+BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
+ BasicBlock *BB = Itr->getParent();
+ if (isa<PHINode>(Itr))
+ Itr = BB->getFirstInsertionPt();
+ if (Itr != BB->end())
+ Itr = skipDebugIntrinsics(Itr);
+ return Itr;
+}
+
// Used to store the scattered form of a vector.
using ValueVector = SmallVector<Value *, 8>;
@@ -371,10 +380,11 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V) {
return Scatterer(Point->getParent(), Point->getIterator(),
UndefValue::get(V->getType()));
// Put the scattered form of an instruction directly after the
- // instruction.
+ // instruction, skipping over PHI nodes and debug intrinsics.
BasicBlock *BB = VOp->getParent();
- return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
- V, &Scattered[V]);
+ return Scatterer(
+ BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
+ &Scattered[V]);
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
@@ -530,7 +540,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
return false;
unsigned NumElems = cast<FixedVectorType>(VT)->getNumElements();
- unsigned NumArgs = CI.getNumArgOperands();
+ unsigned NumArgs = CI.arg_size();
ValueVector ScalarOperands(NumArgs);
SmallVector<Scatterer, 8> Scattered(NumArgs);
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index f216956406b6..ffa2f9adb978 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1164,8 +1164,11 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
DL = &F.getParent()->getDataLayout();
bool Changed = false;
for (BasicBlock &B : F) {
- for (BasicBlock::iterator I = B.begin(), IE = B.end(); I != IE;)
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
+ if (!DT->isReachableFromEntry(&B))
+ continue;
+
+ for (Instruction &I : llvm::make_early_inc_range(B))
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I))
Changed |= splitGEP(GEP);
// No need to split GEP ConstantExprs because all its indices are constant
// already.
@@ -1258,10 +1261,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
DominatingSubs.clear();
for (const auto Node : depth_first(DT)) {
BasicBlock *BB = Node->getBlock();
- for (auto I = BB->begin(); I != BB->end(); ) {
- Instruction *Cur = &*I++;
- Changed |= reuniteExts(Cur);
- }
+ for (Instruction &I : llvm::make_early_inc_range(*BB))
+ Changed |= reuniteExts(&I);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b9cccc2af309..a27da047bfd3 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -28,6 +28,7 @@
#include "llvm/Analysis/MemorySSAUpdater.h"
#include "llvm/Analysis/MustExecute.h"
#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -49,7 +50,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GenericDomTree.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -81,6 +81,7 @@ static cl::opt<bool> EnableNonTrivialUnswitch(
static cl::opt<int>
UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
+ cl::ZeroOrMore,
cl::desc("The cost threshold for unswitching a loop."));
static cl::opt<bool> EnableUnswitchCostMultiplier(
@@ -108,6 +109,10 @@ static cl::opt<unsigned>
cl::desc("Max number of memory uses to explore during "
"partial unswitching analysis"),
cl::init(100), cl::Hidden);
+static cl::opt<bool> FreezeLoopUnswitchCond(
+ "freeze-loop-unswitch-cond", cl::init(false), cl::Hidden,
+ cl::desc("If enabled, the freeze instruction will be added to condition "
+ "of loop unswitch to prevent miscompilation."));
/// Collect all of the loop invariant input values transitively used by the
/// homogeneous instruction graph from a given root.
@@ -195,15 +200,15 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
/// end of \p BB and conditionally branch on the copied condition. We only
/// branch on a single value.
-static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
- ArrayRef<Value *> Invariants,
- bool Direction,
- BasicBlock &UnswitchedSucc,
- BasicBlock &NormalSucc) {
+static void buildPartialUnswitchConditionalBranch(
+ BasicBlock &BB, ArrayRef<Value *> Invariants, bool Direction,
+ BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze) {
IRBuilder<> IRB(&BB);
Value *Cond = Direction ? IRB.CreateOr(Invariants) :
IRB.CreateAnd(Invariants);
+ if (InsertFreeze)
+ Cond = IRB.CreateFreeze(Cond, Cond->getName() + ".fr");
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}
@@ -564,7 +569,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
"Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"
" condition!");
buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
- *UnswitchedBB, *NewPH);
+ *UnswitchedBB, *NewPH, false);
}
// Update the dominator tree with the added edge.
@@ -1587,10 +1592,12 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
BB->eraseFromParent();
}
-static void deleteDeadBlocksFromLoop(Loop &L,
- SmallVectorImpl<BasicBlock *> &ExitBlocks,
- DominatorTree &DT, LoopInfo &LI,
- MemorySSAUpdater *MSSAU) {
+static void
+deleteDeadBlocksFromLoop(Loop &L,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
@@ -1640,6 +1647,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
}) &&
"If the child loop header is dead all blocks in the child loop must "
"be dead as well!");
+ DestroyLoopCB(*ChildL, ChildL->getName());
LI.destroy(ChildL);
return true;
});
@@ -1980,6 +1988,8 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
ParentL->removeChildLoop(llvm::find(*ParentL, &L));
else
LI.removeLoop(llvm::find(LI, &L));
+ // markLoopAsDeleted for L should be triggered by the caller (it is typically
+ // done by using the UnswitchCB callback).
LI.destroy(&L);
return false;
}
@@ -2019,7 +2029,8 @@ static void unswitchNontrivialInvariants(
SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -2117,6 +2128,13 @@ static void unswitchNontrivialInvariants(
SE->forgetTopmostLoop(&L);
}
+ bool InsertFreeze = false;
+ if (FreezeLoopUnswitchCond) {
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(&L);
+ InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
+ }
+
// If the edge from this terminator to a successor dominates that successor,
// store a map from each block in its dominator subtree to it. This lets us
// tell when cloning for a particular successor if a block is dominated by
@@ -2191,6 +2209,11 @@ static void unswitchNontrivialInvariants(
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ if (InsertFreeze) {
+ auto Cond = BI->getCondition();
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, BI, &DT))
+ BI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", BI));
+ }
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2205,6 +2228,11 @@ static void unswitchNontrivialInvariants(
else
Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
+ if (InsertFreeze) {
+ auto Cond = SI->getCondition();
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond, &AC, SI, &DT))
+ SI->setCondition(new FreezeInst(Cond, Cond->getName() + ".fr", SI));
+ }
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
// remove and insert one edge in the domtree.
@@ -2285,7 +2313,7 @@ static void unswitchNontrivialInvariants(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
else
buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
- *ClonedPH, *LoopPH);
+ *ClonedPH, *LoopPH, InsertFreeze);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
if (MSSAU) {
@@ -2319,7 +2347,7 @@ static void unswitchNontrivialInvariants(
// Now that our cloned loops have been built, we can update the original loop.
// First we delete the dead blocks from it and then we rebuild the loop
// structure taking these deletions into account.
- deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
+ deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -2364,7 +2392,9 @@ static void unswitchNontrivialInvariants(
ConstantInt *ContinueReplacement =
Direction ? ConstantInt::getFalse(BI->getContext())
: ConstantInt::getTrue(BI->getContext());
- for (Value *Invariant : Invariants)
+ for (Value *Invariant : Invariants) {
+ assert(!isa<Constant>(Invariant) &&
+ "Should not be replacing constant values!");
// Use make_early_inc_range here as set invalidates the iterator.
for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
Instruction *UserI = dyn_cast<Instruction>(U.getUser());
@@ -2379,6 +2409,7 @@ static void unswitchNontrivialInvariants(
DT.dominates(ClonedPH, UserI->getParent()))
U.set(UnswitchedReplacement);
}
+ }
}
// We can change which blocks are exit blocks of all the cloned sibling
@@ -2670,7 +2701,8 @@ static bool unswitchBestCondition(
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Collect all invariant conditions within this loop (as opposed to an inner
// loop which would be handled when visiting that inner loop).
SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
@@ -2720,6 +2752,9 @@ static bool unswitchBestCondition(
Cond = CondNext;
BI->setCondition(Cond);
+ if (isa<Constant>(Cond))
+ continue;
+
if (L.isLoopInvariant(BI->getCondition())) {
UnswitchCandidates.push_back({BI, {BI->getCondition()}});
continue;
@@ -2958,7 +2993,7 @@ static bool unswitchBestCondition(
<< "\n");
unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
ExitBlocks, PartialIVInfo, DT, LI, AC,
- UnswitchCB, SE, MSSAU);
+ UnswitchCB, SE, MSSAU, DestroyLoopCB);
return true;
}
@@ -2988,7 +3023,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
bool NonTrivial,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3036,7 +3072,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// Try to unswitch the best invariant condition. We prefer this full unswitch to
// a partial unswitch when possible below the threshold.
- if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU))
+ if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
+ DestroyLoopCB))
return true;
// No other opportunities to unswitch.
@@ -3083,6 +3120,10 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
U.markLoopAsDeleted(L, LoopName);
};
+ auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
+ U.markLoopAsDeleted(L, Name);
+ };
+
Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
MSSAU = MemorySSAUpdater(AR.MSSA);
@@ -3091,7 +3132,8 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
UnswitchCB, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DestroyLoopCB))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
@@ -3107,6 +3149,17 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
return PA;
}
+void SimpleLoopUnswitchPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (NonTrivial ? "" : "no-") << "nontrivial;";
+ OS << (Trivial ? "" : "no-") << "trivial";
+ OS << ">";
+}
+
namespace {
class SimpleLoopUnswitchLegacyPass : public LoopPass {
@@ -3126,10 +3179,8 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
- if (EnableMSSALoopDependency) {
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
+ AU.addRequired<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
getLoopAnalysisUsage(AU);
}
};
@@ -3150,12 +3201,8 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- MemorySSA *MSSA = nullptr;
- Optional<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSAU = MemorySSAUpdater(MSSA);
- }
+ MemorySSA *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
+ MemorySSAUpdater MSSAU(MSSA);
auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;
@@ -3179,14 +3226,17 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LPM.markLoopAsDeleted(*L);
};
- if (MSSA && VerifyMemorySSA)
+ auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
+ LPM.markLoopAsDeleted(L);
+ };
+
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
- bool Changed =
- unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial,
+ UnswitchCB, SE, &MSSAU, DestroyLoopCB);
- if (MSSA && VerifyMemorySSA)
+ if (VerifyMemorySSA)
MSSA->verifyMemorySSA();
// Historically this pass has had issues with the dominator tree so verify it
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 09d59b0e884a..86d3620c312e 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -224,7 +224,11 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
UniqueLoopHeaders.end());
+ unsigned IterCnt = 0;
+ (void)IterCnt;
while (LocalChange) {
+ assert(IterCnt++ < 1000 &&
+ "Sanity: iterative simplification didn't converge!");
LocalChange = false;
// Loop over all of the basic blocks and remove them if they are unneeded.
@@ -319,6 +323,21 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
applyCommandLineOverridesToOptions(Options);
}
+void SimplifyCFGPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SimplifyCFGPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
+ OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
+ OS << (Options.ConvertSwitchToLookupTable ? "" : "no-")
+ << "switch-to-lookup;";
+ OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
+ OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
+ OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts";
+ OS << ">";
+}
+
PreservedAnalyses SimplifyCFGPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index dfa30418ea01..06169a7834f6 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -268,7 +268,7 @@ bool SpeculativeExecutionPass::considerHoistingFromTo(
if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(U)) {
return all_of(DVI->location_ops(), [&NotHoisted](Value *V) {
if (const auto *I = dyn_cast_or_null<Instruction>(V)) {
- if (NotHoisted.count(I) == 0)
+ if (!NotHoisted.contains(I))
return true;
}
return false;
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 20b8b982e14b..b47378808216 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -607,7 +607,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
if (IndexOffset == 1)
return C.Stride;
// Common case 2: if (i' - i) is -1, Bump = -S.
- if (IndexOffset.isAllOnesValue())
+ if (IndexOffset.isAllOnes())
return Builder.CreateNeg(C.Stride);
// Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may
@@ -620,7 +620,7 @@ Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2());
return Builder.CreateShl(ExtendedStride, Exponent);
}
- if ((-IndexOffset).isPowerOf2()) {
+ if (IndexOffset.isNegatedPowerOf2()) {
// If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i).
ConstantInt *Exponent =
ConstantInt::get(DeltaType, (-IndexOffset).logBase2());
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 846a9321f53e..3bcf92e28a21 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -262,7 +262,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
// Note that this runs whether we know an alloca has escaped or not. If
// it has, then we can't trust Tracker.AllocaUsers to be accurate.
bool SafeToTail = true;
- for (auto &Arg : CI->arg_operands()) {
+ for (auto &Arg : CI->args()) {
if (isa<Constant>(Arg.getUser()))
continue;
if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
@@ -584,8 +584,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
// call instruction into the newly created temporarily variable.
void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
int OpndIdx) {
- PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
- Type *AggTy = ArgTy->getElementType();
+ Type *AggTy = CI->getParamByValType(OpndIdx);
+ assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
// Get alignment of byVal operand.
@@ -611,8 +611,8 @@ void TailRecursionEliminator::copyByValueOperandIntoLocalTemp(CallInst *CI,
// into the corresponding function argument location.
void TailRecursionEliminator::copyLocalTempOfByValueOperandIntoArguments(
CallInst *CI, int OpndIdx) {
- PointerType *ArgTy = cast<PointerType>(CI->getArgOperand(OpndIdx)->getType());
- Type *AggTy = ArgTy->getElementType();
+ Type *AggTy = CI->getParamByValType(OpndIdx);
+ assert(AggTy);
const DataLayout &DL = F.getParent()->getDataLayout();
// Get alignment of byVal operand.
@@ -667,7 +667,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
createTailRecurseLoopHeader(CI);
// Copy values of ByVal operands into local temporarily variables.
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I))
copyByValueOperandIntoLocalTemp(CI, I);
}
@@ -675,7 +675,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
// Ok, now that we know we have a pseudo-entry block WITH all of the
// required PHI nodes, add entries into the PHI node for the actual
// parameters passed into the tail-recursive call.
- for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
+ for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
if (CI->isByValArgument(I)) {
copyLocalTempOfByValueOperandIntoArguments(CI, I);
ArgumentPHIs[I]->addIncoming(F.getArg(I), BB);
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index 8cd16ca3906f..fdc914a72bfd 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -63,6 +63,9 @@ static Value *callPrintfBegin(IRBuilder<> &Builder, Value *Version) {
auto Int64Ty = Builder.getInt64Ty();
auto M = Builder.GetInsertBlock()->getModule();
auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
+ if (!M->getModuleFlag("amdgpu_hostcall")) {
+ M->addModuleFlag(llvm::Module::Override, "amdgpu_hostcall", 1);
+ }
return Builder.CreateCall(Fn, Version);
}
diff --git a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 01912297324a..cbc508bb863a 100644
--- a/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -33,14 +33,14 @@ static inline bool CompareVars(const ASanStackVariableDescription &a,
// We also force minimal alignment for all vars to kMinAlignment so that vars
// with e.g. alignment 1 and alignment 16 do not get reordered by CompareVars.
-static const size_t kMinAlignment = 16;
+static const uint64_t kMinAlignment = 16;
// We want to add a full redzone after every variable.
// The larger the variable Size the larger is the redzone.
// The resulting frame size is a multiple of Alignment.
-static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
- size_t Alignment) {
- size_t Res = 0;
+static uint64_t VarAndRedzoneSize(uint64_t Size, uint64_t Granularity,
+ uint64_t Alignment) {
+ uint64_t Res = 0;
if (Size <= 4) Res = 16;
else if (Size <= 16) Res = 32;
else if (Size <= 128) Res = Size + 32;
@@ -52,7 +52,7 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Granularity,
ASanStackFrameLayout
ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
- size_t Granularity, size_t MinHeaderSize) {
+ uint64_t Granularity, uint64_t MinHeaderSize) {
assert(Granularity >= 8 && Granularity <= 64 &&
(Granularity & (Granularity - 1)) == 0);
assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
@@ -67,22 +67,22 @@ ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
ASanStackFrameLayout Layout;
Layout.Granularity = Granularity;
Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment);
- size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
- Vars[0].Alignment);
+ uint64_t Offset =
+ std::max(std::max(MinHeaderSize, Granularity), Vars[0].Alignment);
assert((Offset % Granularity) == 0);
for (size_t i = 0; i < NumVars; i++) {
bool IsLast = i == NumVars - 1;
- size_t Alignment = std::max(Granularity, Vars[i].Alignment);
+ uint64_t Alignment = std::max(Granularity, Vars[i].Alignment);
(void)Alignment; // Used only in asserts.
- size_t Size = Vars[i].Size;
+ uint64_t Size = Vars[i].Size;
assert((Alignment & (Alignment - 1)) == 0);
assert(Layout.FrameAlignment >= Alignment);
assert((Offset % Alignment) == 0);
assert(Size > 0);
- size_t NextAlignment = IsLast ? Granularity
- : std::max(Granularity, Vars[i + 1].Alignment);
- size_t SizeWithRedzone = VarAndRedzoneSize(Size, Granularity,
- NextAlignment);
+ uint64_t NextAlignment =
+ IsLast ? Granularity : std::max(Granularity, Vars[i + 1].Alignment);
+ uint64_t SizeWithRedzone =
+ VarAndRedzoneSize(Size, Granularity, NextAlignment);
Vars[i].Offset = Offset;
Offset += SizeWithRedzone;
}
@@ -118,7 +118,7 @@ GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
assert(Vars.size() > 0);
SmallVector<uint8_t, 64> SB;
SB.clear();
- const size_t Granularity = Layout.Granularity;
+ const uint64_t Granularity = Layout.Granularity;
SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic);
for (const auto &Var : Vars) {
SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic);
@@ -135,13 +135,13 @@ SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
const SmallVectorImpl<ASanStackVariableDescription> &Vars,
const ASanStackFrameLayout &Layout) {
SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout);
- const size_t Granularity = Layout.Granularity;
+ const uint64_t Granularity = Layout.Granularity;
for (const auto &Var : Vars) {
assert(Var.LifetimeSize <= Var.Size);
- const size_t LifetimeShadowSize =
+ const uint64_t LifetimeShadowSize =
(Var.LifetimeSize + Granularity - 1) / Granularity;
- const size_t Offset = Var.Offset / Granularity;
+ const uint64_t Offset = Var.Offset / Granularity;
std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize,
kAsanStackUseAfterScopeMagic);
}
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index d689e04da36f..f910f7c3c31f 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -67,7 +67,8 @@ bool isUsefullToPreserve(Attribute::AttrKind Kind) {
/// This function will try to transform the given knowledge into a more
/// canonical one. the canonical knowledge maybe the given one.
-RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK, DataLayout DL) {
+RetainedKnowledge canonicalizedKnowledge(RetainedKnowledge RK,
+ const DataLayout &DL) {
switch (RK.AttrKind) {
default:
return RK;
@@ -103,7 +104,7 @@ struct AssumeBuilderState {
Module *M;
using MapKey = std::pair<Value *, Attribute::AttrKind>;
- SmallMapVector<MapKey, unsigned, 8> AssumedKnowledgeMap;
+ SmallMapVector<MapKey, uint64_t, 8> AssumedKnowledgeMap;
Instruction *InstBeingModified = nullptr;
AssumptionCache* AC = nullptr;
DominatorTree* DT = nullptr;
@@ -196,28 +197,27 @@ struct AssumeBuilderState {
(!ShouldPreserveAllAttributes &&
!isUsefullToPreserve(Attr.getKindAsEnum())))
return;
- unsigned AttrArg = 0;
+ uint64_t AttrArg = 0;
if (Attr.isIntAttribute())
AttrArg = Attr.getValueAsInt();
addKnowledge({Attr.getKindAsEnum(), AttrArg, WasOn});
}
void addCall(const CallBase *Call) {
- auto addAttrList = [&](AttributeList AttrList) {
- for (unsigned Idx = AttributeList::FirstArgIndex;
- Idx < AttrList.getNumAttrSets(); Idx++)
- for (Attribute Attr : AttrList.getAttributes(Idx)) {
+ auto addAttrList = [&](AttributeList AttrList, unsigned NumArgs) {
+ for (unsigned Idx = 0; Idx < NumArgs; Idx++)
+ for (Attribute Attr : AttrList.getParamAttrs(Idx)) {
bool IsPoisonAttr = Attr.hasAttribute(Attribute::NonNull) ||
Attr.hasAttribute(Attribute::Alignment);
- if (!IsPoisonAttr || Call->isPassingUndefUB(Idx - 1))
- addAttribute(Attr, Call->getArgOperand(Idx - 1));
+ if (!IsPoisonAttr || Call->isPassingUndefUB(Idx))
+ addAttribute(Attr, Call->getArgOperand(Idx));
}
- for (Attribute Attr : AttrList.getFnAttributes())
+ for (Attribute Attr : AttrList.getFnAttrs())
addAttribute(Attr, nullptr);
};
- addAttrList(Call->getAttributes());
+ addAttrList(Call->getAttributes(), Call->arg_size());
if (Function *Fn = Call->getCalledFunction())
- addAttrList(Fn->getAttributes());
+ addAttrList(Fn->getAttributes(), Fn->arg_size());
}
AssumeInst *build() {
@@ -261,8 +261,7 @@ struct AssumeBuilderState {
addKnowledge({Attribute::NonNull, 0u, Pointer});
}
if (MA.valueOrOne() > 1)
- addKnowledge(
- {Attribute::Alignment, unsigned(MA.valueOrOne().value()), Pointer});
+ addKnowledge({Attribute::Alignment, MA.valueOrOne().value(), Pointer});
}
void addInstruction(Instruction *I) {
@@ -392,7 +391,7 @@ struct AssumeSimplify {
void dropRedundantKnowledge() {
struct MapValue {
IntrinsicInst *Assume;
- unsigned ArgValue;
+ uint64_t ArgValue;
CallInst::BundleOpInfo *BOI;
};
buildMapping(false);
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index ee933b638a23..6469c899feea 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -52,6 +53,12 @@ using namespace llvm;
#define DEBUG_TYPE "basicblock-utils"
+static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
+ "max-deopt-or-unreachable-succ-check-depth", cl::init(8), cl::Hidden,
+ cl::desc("Set the maximum path length when checking whether a basic block "
+ "is followed by a block that either has a terminating "
+ "deoptimizing call or is terminated with an unreachable"));
+
void llvm::DetatchDeadBlocks(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
@@ -230,7 +237,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
if (DTU) {
SmallPtrSet<BasicBlock *, 2> SuccsOfBB(succ_begin(BB), succ_end(BB));
SmallPtrSet<BasicBlock *, 2> SuccsOfPredBB(succ_begin(PredBB),
- succ_begin(PredBB));
+ succ_end(PredBB));
Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1);
// Add insert edges first. Experimentally, for the particular case of two
// blocks that can be merged, with a single successor and single predecessor
@@ -485,6 +492,20 @@ void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
BI = New;
}
+bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) {
+ // Remember visited blocks to avoid infinite loop
+ SmallPtrSet<const BasicBlock *, 8> VisitedBlocks;
+ unsigned Depth = 0;
+ while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth &&
+ VisitedBlocks.insert(BB).second) {
+ if (BB->getTerminatingDeoptimizeCall() ||
+ isa<UnreachableInst>(BB->getTerminator()))
+ return true;
+ BB = BB->getUniqueSuccessor();
+ }
+ return false;
+}
+
void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
BasicBlock::iterator BI(From);
ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 35e22f7a57e2..957935398972 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -96,9 +96,9 @@ static bool setDoesNotThrow(Function &F) {
}
static bool setRetDoesNotAlias(Function &F) {
- if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoAlias))
+ if (F.hasRetAttribute(Attribute::NoAlias))
return false;
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
+ F.addRetAttr(Attribute::NoAlias);
++NumNoAlias;
return true;
}
@@ -145,8 +145,8 @@ static bool setSignExtendedArg(Function &F, unsigned ArgNo) {
static bool setRetNoUndef(Function &F) {
if (!F.getReturnType()->isVoidTy() &&
- !F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef)) {
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+ !F.hasRetAttribute(Attribute::NoUndef)) {
+ F.addRetAttr(Attribute::NoUndef);
++NumNoUndef;
return true;
}
@@ -174,7 +174,10 @@ static bool setArgNoUndef(Function &F, unsigned ArgNo) {
}
static bool setRetAndArgsNoUndef(Function &F) {
- return setRetNoUndef(F) | setArgsNoUndef(F);
+ bool UndefAdded = false;
+ UndefAdded |= setRetNoUndef(F);
+ UndefAdded |= setArgsNoUndef(F);
+ return UndefAdded;
}
static bool setReturnedArg(Function &F, unsigned ArgNo) {
@@ -1268,7 +1271,7 @@ Value *llvm::emitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilderBase &B,
Value *llvm::emitStrCpy(Value *Dst, Value *Src, IRBuilderBase &B,
const TargetLibraryInfo *TLI) {
- Type *I8Ptr = B.getInt8PtrTy();
+ Type *I8Ptr = Dst->getType();
return emitLibCall(LibFunc_strcpy, I8Ptr, {I8Ptr, I8Ptr},
{castToCStr(Dst, B), castToCStr(Src, B)}, B, TLI);
}
@@ -1453,9 +1456,8 @@ static Value *emitUnaryFloatFnCallHelper(Value *Op, StringRef Name,
// The incoming attribute set may have come from a speculatable intrinsic, but
// is being replaced with a library call which is not allowed to be
// speculatable.
- CI->setAttributes(Attrs.removeAttribute(B.getContext(),
- AttributeList::FunctionIndex,
- Attribute::Speculatable));
+ CI->setAttributes(
+ Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable));
if (const Function *F =
dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1498,9 +1500,8 @@ static Value *emitBinaryFloatFnCallHelper(Value *Op1, Value *Op2,
// The incoming attribute set may have come from a speculatable intrinsic, but
// is being replaced with a library call which is not allowed to be
// speculatable.
- CI->setAttributes(Attrs.removeAttribute(B.getContext(),
- AttributeList::FunctionIndex,
- Attribute::Speculatable));
+ CI->setAttributes(
+ Attrs.removeFnAttribute(B.getContext(), Attribute::Speculatable));
if (const Function *F =
dyn_cast<Function>(Callee.getCallee()->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
@@ -1655,8 +1656,8 @@ Value *llvm::emitMalloc(Value *Num, IRBuilderBase &B, const DataLayout &DL,
return CI;
}
-Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
- IRBuilderBase &B, const TargetLibraryInfo &TLI) {
+Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
+ const TargetLibraryInfo &TLI) {
if (!TLI.has(LibFunc_calloc))
return nullptr;
@@ -1664,8 +1665,8 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs,
StringRef CallocName = TLI.getName(LibFunc_calloc);
const DataLayout &DL = M->getDataLayout();
IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext()));
- FunctionCallee Calloc = M->getOrInsertFunction(
- CallocName, Attrs, B.getInt8PtrTy(), PtrType, PtrType);
+ FunctionCallee Calloc =
+ M->getOrInsertFunction(CallocName, B.getInt8PtrTy(), PtrType, PtrType);
inferLibFuncAttributes(M, CallocName, TLI);
CallInst *CI = B.CreateCall(Calloc, {Num, Size}, CallocName);
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 87868251036c..ebe19f1751e5 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -424,6 +424,21 @@ bool llvm::isLegalToPromote(const CallBase &CB, Function *Callee,
*FailureReason = "Argument type mismatch";
return false;
}
+ // Make sure that the callee and call agree on byval/inalloca. The types do
+ // not have to match.
+
+ if (Callee->hasParamAttribute(I, Attribute::ByVal) !=
+ CB.getAttributes().hasParamAttr(I, Attribute::ByVal)) {
+ if (FailureReason)
+ *FailureReason = "byval mismatch";
+ return false;
+ }
+ if (Callee->hasParamAttribute(I, Attribute::InAlloca) !=
+ CB.getAttributes().hasParamAttr(I, Attribute::InAlloca)) {
+ if (FailureReason)
+ *FailureReason = "inalloca mismatch";
+ return false;
+ }
}
for (; I < NumArgs; I++) {
// Vararg functions can have more arguments than parameters.
@@ -485,18 +500,19 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
CB.setArgOperand(ArgNo, Cast);
// Remove any incompatible attributes for the argument.
- AttrBuilder ArgAttrs(CallerPAL.getParamAttributes(ArgNo));
+ AttrBuilder ArgAttrs(CallerPAL.getParamAttrs(ArgNo));
ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy));
- // If byval is used, this must be a pointer type, and the byval type must
- // match the element type. Update it if present.
+ // We may have a different byval/inalloca type.
if (ArgAttrs.getByValType())
ArgAttrs.addByValAttr(Callee->getParamByValType(ArgNo));
+ if (ArgAttrs.getInAllocaType())
+ ArgAttrs.addInAllocaAttr(Callee->getParamInAllocaType(ArgNo));
NewArgAttrs.push_back(AttributeSet::get(Ctx, ArgAttrs));
AttributeChanged = true;
} else
- NewArgAttrs.push_back(CallerPAL.getParamAttributes(ArgNo));
+ NewArgAttrs.push_back(CallerPAL.getParamAttrs(ArgNo));
}
// If the return type of the call site doesn't match that of the callee, cast
@@ -511,7 +527,7 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee,
// Set the new callsite attribute.
if (AttributeChanged)
- CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttributes(),
+ CB.setAttributes(AttributeList::get(Ctx, CallerPAL.getFnAttrs(),
AttributeSet::get(Ctx, RAttrs),
NewArgAttrs));
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
index 1f649fe6c748..049c7d113521 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp
@@ -33,7 +33,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/Analysis/IVUsers.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 0ac9a5aaa425..048e691e33cf 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -62,7 +62,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
NewBB->getInstList().push_back(NewInst);
VMap[&I] = NewInst; // Add instruction map to value.
- hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I));
+ hasCalls |= (isa<CallInst>(I) && !I.isDebugOrPseudoInst());
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
if (!AI->isStaticAlloca()) {
hasDynamicAllocas = true;
@@ -116,13 +116,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
for (const Argument &OldArg : OldFunc->args()) {
if (Argument *NewArg = dyn_cast<Argument>(VMap[&OldArg])) {
NewArgAttrs[NewArg->getArgNo()] =
- OldAttrs.getParamAttributes(OldArg.getArgNo());
+ OldAttrs.getParamAttrs(OldArg.getArgNo());
}
}
NewFunc->setAttributes(
- AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
- OldAttrs.getRetAttributes(), NewArgAttrs));
+ AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(),
+ OldAttrs.getRetAttrs(), NewArgAttrs));
// Everything else beyond this point deals with function instructions,
// so if we are dealing with a function declaration, we're done.
@@ -410,7 +410,7 @@ void PruningFunctionCloner::CloneBlock(
NewInst->setName(II->getName() + NameSuffix);
VMap[&*II] = NewInst; // Add instruction map to value.
NewBB->getInstList().push_back(NewInst);
- hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+ hasCalls |= (isa<CallInst>(II) && !II->isDebugOrPseudoInst());
if (CodeInfo) {
CodeInfo->OrigVMap[&*II] = NewInst;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 9edc52b53550..96aff563aa9b 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -434,6 +434,7 @@ CodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) {
}
// Now add the old exit block to the outline region.
Blocks.insert(CommonExitBlock);
+ OldTargets.push_back(NewExitBlock);
return CommonExitBlock;
}
@@ -885,7 +886,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// "target-features" attribute allowing it to be lowered.
// FIXME: This should be changed to check to see if a specific
// attribute can not be inherited.
- for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) {
+ for (const auto &Attr : oldFunction->getAttributes().getFnAttrs()) {
if (Attr.isStringAttribute()) {
if (Attr.getKindAsString() == "thunk")
continue;
@@ -943,6 +944,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
case Attribute::Cold:
+ case Attribute::DisableSanitizerInstrumentation:
case Attribute::Hot:
case Attribute::NoRecurse:
case Attribute::InlineHint:
@@ -1044,9 +1046,8 @@ static void eraseLifetimeMarkersOnInputs(const SetVector<BasicBlock *> &Blocks,
const SetVector<Value *> &SunkAllocas,
SetVector<Value *> &LifetimesStart) {
for (BasicBlock *BB : Blocks) {
- for (auto It = BB->begin(), End = BB->end(); It != End;) {
- auto *II = dyn_cast<IntrinsicInst>(&*It);
- ++It;
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
+ auto *II = dyn_cast<IntrinsicInst>(&I);
if (!II || !II->isLifetimeStartOrEnd())
continue;
@@ -1247,45 +1248,57 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
// not in the region to be extracted.
std::map<BasicBlock *, BasicBlock *> ExitBlockMap;
+ // Iterate over the previously collected targets, and create new blocks inside
+ // the function to branch to.
unsigned switchVal = 0;
+ for (BasicBlock *OldTarget : OldTargets) {
+ if (Blocks.count(OldTarget))
+ continue;
+ BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+ if (NewTarget)
+ continue;
+
+ // If we don't already have an exit stub for this non-extracted
+ // destination, create one now!
+ NewTarget = BasicBlock::Create(Context,
+ OldTarget->getName() + ".exitStub",
+ newFunction);
+ unsigned SuccNum = switchVal++;
+
+ Value *brVal = nullptr;
+ assert(NumExitBlocks < 0xffff && "too many exit blocks for switch");
+ switch (NumExitBlocks) {
+ case 0:
+ case 1: break; // No value needed.
+ case 2: // Conditional branch, return a bool
+ brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+ break;
+ default:
+ brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+ break;
+ }
+
+ ReturnInst::Create(Context, brVal, NewTarget);
+
+ // Update the switch instruction.
+ TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+ SuccNum),
+ OldTarget);
+ }
+
for (BasicBlock *Block : Blocks) {
Instruction *TI = Block->getTerminator();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
- if (!Blocks.count(TI->getSuccessor(i))) {
- BasicBlock *OldTarget = TI->getSuccessor(i);
- // add a new basic block which returns the appropriate value
- BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
- if (!NewTarget) {
- // If we don't already have an exit stub for this non-extracted
- // destination, create one now!
- NewTarget = BasicBlock::Create(Context,
- OldTarget->getName() + ".exitStub",
- newFunction);
- unsigned SuccNum = switchVal++;
-
- Value *brVal = nullptr;
- switch (NumExitBlocks) {
- case 0:
- case 1: break; // No value needed.
- case 2: // Conditional branch, return a bool
- brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
- break;
- default:
- brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
- break;
- }
-
- ReturnInst::Create(Context, brVal, NewTarget);
-
- // Update the switch instruction.
- TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
- SuccNum),
- OldTarget);
- }
-
- // rewrite the original branch instruction with this new target
- TI->setSuccessor(i, NewTarget);
- }
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (Blocks.count(TI->getSuccessor(i)))
+ continue;
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ // add a new basic block which returns the appropriate value
+ BasicBlock *NewTarget = ExitBlockMap[OldTarget];
+ assert(NewTarget && "Unknown target block!");
+
+ // rewrite the original branch instruction with this new target
+ TI->setSuccessor(i, NewTarget);
+ }
}
// Store the arguments right after the definition of output value.
@@ -1388,12 +1401,17 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+ auto newFuncIt = newFunction->front().getIterator();
for (BasicBlock *Block : Blocks) {
// Delete the basic block from the old function, and the list of blocks
oldBlocks.remove(Block);
// Insert this basic block into the new function
- newBlocks.push_back(Block);
+ // Insert the original blocks after the entry block created
+ // for the new function. The entry block may be followed
+ // by a set of exit blocks at this point, but these exit
+ // blocks better be placed at the end of the new function.
+ newFuncIt = newBlocks.insertAfter(newFuncIt, Block);
}
}
@@ -1569,6 +1587,13 @@ static void fixupDebugInfoPostExtraction(Function &OldFunc, Function &NewFunc,
Function *
CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
+ ValueSet Inputs, Outputs;
+ return extractCodeRegion(CEAC, Inputs, Outputs);
+}
+
+Function *
+CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
+ ValueSet &inputs, ValueSet &outputs) {
if (!isEligible())
return nullptr;
@@ -1593,11 +1618,8 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
// Remove @llvm.assume calls that will be moved to the new function from the
// old function's assumption cache.
for (BasicBlock *Block : Blocks) {
- for (auto It = Block->begin(), End = Block->end(); It != End;) {
- Instruction *I = &*It;
- ++It;
-
- if (auto *AI = dyn_cast<AssumeInst>(I)) {
+ for (Instruction &I : llvm::make_early_inc_range(*Block)) {
+ if (auto *AI = dyn_cast<AssumeInst>(&I)) {
if (AC)
AC->unregisterAssumption(AI);
AI->eraseFromParent();
@@ -1627,6 +1649,16 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
}
NumExitBlocks = ExitBlocks.size();
+ for (BasicBlock *Block : Blocks) {
+ Instruction *TI = Block->getTerminator();
+ for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+ if (Blocks.count(TI->getSuccessor(i)))
+ continue;
+ BasicBlock *OldTarget = TI->getSuccessor(i);
+ OldTargets.push_back(OldTarget);
+ }
+ }
+
// If we have to split PHI nodes of the entry or exit blocks, do so now.
severSplitPHINodesOfEntry(header);
severSplitPHINodesOfExits(ExitBlocks);
@@ -1657,7 +1689,7 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC) {
}
newFuncRoot->getInstList().push_back(BranchI);
- ValueSet inputs, outputs, SinkingCands, HoistingCands;
+ ValueSet SinkingCands, HoistingCands;
BasicBlock *CommonExit = nullptr;
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
assert(HoistingCands.empty() || CommonExit);
diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index ce982c7403aa..648f4e64a4d2 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -309,7 +309,7 @@ collectInstructionsInBetween(Instruction &StartInst, const Instruction &EndInst,
bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
DominatorTree &DT, const PostDominatorTree *PDT,
- DependenceInfo *DI) {
+ DependenceInfo *DI, bool CheckForEntireBlock) {
// Skip tests when we don't have PDT or DI
if (!PDT || !DI)
return false;
@@ -332,16 +332,24 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT))
return reportInvalidCandidate(I, NotControlFlowEquivalent);
- if (!DT.dominates(&InsertPoint, &I))
+ if (isReachedBefore(&I, &InsertPoint, &DT, PDT))
for (const Use &U : I.uses())
if (auto *UserInst = dyn_cast<Instruction>(U.getUser()))
if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U))
return false;
- if (!DT.dominates(&I, &InsertPoint))
+ if (isReachedBefore(&InsertPoint, &I, &DT, PDT))
for (const Value *Op : I.operands())
- if (auto *OpInst = dyn_cast<Instruction>(Op))
- if (&InsertPoint == OpInst || !DT.dominates(OpInst, &InsertPoint))
+ if (auto *OpInst = dyn_cast<Instruction>(Op)) {
+ if (&InsertPoint == OpInst)
+ return false;
+ // If OpInst is an instruction that appears earlier in the same BB as
+ // I, then it is okay to move since OpInst will still be available.
+ if (CheckForEntireBlock && I.getParent() == OpInst->getParent() &&
+ DT.dominates(OpInst, &I))
+ continue;
+ if (!DT.dominates(OpInst, &InsertPoint))
return false;
+ }
DT.updateDFSNumbers();
const bool MoveForward = domTreeLevelBefore(&DT, &I, &InsertPoint);
@@ -393,7 +401,8 @@ bool llvm::isSafeToMoveBefore(BasicBlock &BB, Instruction &InsertPoint,
if (BB.getTerminator() == &I)
return true;
- return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI);
+ return isSafeToMoveBefore(I, InsertPoint, DT, PDT, DI,
+ /*CheckForEntireBlock=*/true);
});
}
@@ -401,11 +410,9 @@ void llvm::moveInstructionsToTheBeginning(BasicBlock &FromBB, BasicBlock &ToBB,
DominatorTree &DT,
const PostDominatorTree &PDT,
DependenceInfo &DI) {
- for (auto It = ++FromBB.rbegin(); It != FromBB.rend();) {
+ for (Instruction &I :
+ llvm::make_early_inc_range(llvm::drop_begin(llvm::reverse(FromBB)))) {
Instruction *MovePos = ToBB.getFirstNonPHIOrDbg();
- Instruction &I = *It;
- // Increment the iterator before modifying FromBB.
- ++It;
if (isSafeToMoveBefore(I, *MovePos, DT, &PDT, &DI))
I.moveBefore(MovePos);
@@ -423,3 +430,47 @@ void llvm::moveInstructionsToTheEnd(BasicBlock &FromBB, BasicBlock &ToBB,
I.moveBefore(MovePos);
}
}
+
+bool llvm::nonStrictlyPostDominate(const BasicBlock *ThisBlock,
+ const BasicBlock *OtherBlock,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT) {
+ assert(isControlFlowEquivalent(*ThisBlock, *OtherBlock, *DT, *PDT) &&
+ "ThisBlock and OtherBlock must be CFG equivalent!");
+ const BasicBlock *CommonDominator =
+ DT->findNearestCommonDominator(ThisBlock, OtherBlock);
+ if (CommonDominator == nullptr)
+ return false;
+
+ /// Recursively check the predecessors of \p ThisBlock up to
+ /// their common dominator, and see if any of them post-dominates
+ /// \p OtherBlock.
+ SmallVector<const BasicBlock *, 8> WorkList;
+ SmallPtrSet<const BasicBlock *, 8> Visited;
+ WorkList.push_back(ThisBlock);
+ while (!WorkList.empty()) {
+ const BasicBlock *CurBlock = WorkList.back();
+ WorkList.pop_back();
+ Visited.insert(CurBlock);
+ if (PDT->dominates(CurBlock, OtherBlock))
+ return true;
+
+ for (auto *Pred : predecessors(CurBlock)) {
+ if (Pred == CommonDominator || Visited.count(Pred))
+ continue;
+ WorkList.push_back(Pred);
+ }
+ }
+ return false;
+}
+
+bool llvm::isReachedBefore(const Instruction *I0, const Instruction *I1,
+ const DominatorTree *DT,
+ const PostDominatorTree *PDT) {
+ const BasicBlock *BB0 = I0->getParent();
+ const BasicBlock *BB1 = I1->getParent();
+ if (BB0 == BB1)
+ return DT->dominates(I0, I1);
+
+ return nonStrictlyPostDominate(BB1, BB0, DT, PDT);
+}
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 30c3fa521d52..fc7083b0c30d 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -457,14 +457,14 @@ static bool checkInstructions(const DebugInstMap &DILocsBefore,
}
// This checks the preservation of original debug variable intrinsics.
-static bool checkVars(const DebugVarMap &DIFunctionsBefore,
- const DebugVarMap &DIFunctionsAfter,
+static bool checkVars(const DebugVarMap &DIVarsBefore,
+ const DebugVarMap &DIVarsAfter,
StringRef NameOfWrappedPass, StringRef FileNameFromCU,
bool ShouldWriteIntoJSON, llvm::json::Array &Bugs) {
bool Preserved = true;
- for (const auto &V : DIFunctionsBefore) {
- auto VarIt = DIFunctionsAfter.find(V.first);
- if (VarIt == DIFunctionsAfter.end())
+ for (const auto &V : DIVarsBefore) {
+ auto VarIt = DIVarsAfter.find(V.first);
+ if (VarIt == DIVarsAfter.end())
continue;
unsigned NumOfDbgValsAfter = VarIt->second;
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 31d03e1e86af..e3e8f63383df 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -89,7 +89,7 @@ static bool runOnFunction(Function &F, bool PostInlining) {
insertCall(F, EntryFunc, &*F.begin()->getFirstInsertionPt(), DL);
Changed = true;
- F.removeAttribute(AttributeList::FunctionIndex, EntryAttr);
+ F.removeFnAttr(EntryAttr);
}
if (!ExitFunc.empty()) {
@@ -111,7 +111,7 @@ static bool runOnFunction(Function &F, bool PostInlining) {
insertCall(F, ExitFunc, T, DL);
Changed = true;
}
- F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);
+ F.removeFnAttr(ExitAttr);
}
return Changed;
@@ -183,3 +183,13 @@ llvm::EntryExitInstrumenterPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void llvm::EntryExitInstrumenterPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<llvm::EntryExitInstrumenterPass> *>(this)
+ ->printPipeline(OS, MapClassName2PassName);
+ OS << "<";
+ if (PostInlining)
+ OS << "post-inline";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index 463c223d9e8f..9c8aed94708e 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -128,11 +128,6 @@ isSimpleEnoughValueToCommit(Constant *C,
/// globals and GEP's of globals. This should be kept up to date with
/// CommitValueTo.
static bool isSimpleEnoughPointerToCommit(Constant *C, const DataLayout &DL) {
- // Conservatively, avoid aggregate types. This is because we don't
- // want to worry about them partially overlapping other stores.
- if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
- return false;
-
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
// Do not allow weak/*_odr/linkonce linkage or external globals.
return GV->hasUniqueInitializer();
@@ -284,7 +279,7 @@ bool Evaluator::getFormalParams(CallBase &CB, Function *F,
return false;
auto *FTy = F->getFunctionType();
- if (FTy->getNumParams() > CB.getNumArgOperands()) {
+ if (FTy->getNumParams() > CB.arg_size()) {
LLVM_DEBUG(dbgs() << "Too few arguments for function.\n");
return false;
}
@@ -343,7 +338,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
Ptr = FoldedPtr;
LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");
}
- if (!isSimpleEnoughPointerToCommit(Ptr, DL)) {
+ // Conservatively, avoid aggregate types. This is because we don't
+ // want to worry about them partially overlapping other stores.
+ if (!SI->getValueOperand()->getType()->isSingleValueType() ||
+ !isSimpleEnoughPointerToCommit(Ptr, DL)) {
// If this is too complex for us to commit, reject it.
LLVM_DEBUG(
dbgs() << "Pointer is too complex for us to evaluate store.");
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 10f48fe827f4..8de3ce876bab 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -124,7 +124,7 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
// children to a new vector.
auto FirstChild = std::partition(
CandidateLoops.begin(), CandidateLoops.end(), [&](Loop *L) {
- return L == NewLoop || Blocks.count(L->getHeader()) == 0;
+ return L == NewLoop || !Blocks.contains(L->getHeader());
});
SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
CandidateLoops.erase(FirstChild, CandidateLoops.end());
diff --git a/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index dbcacc20b589..ddd3f597ae01 100644
--- a/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -162,7 +162,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// of \param BB (BB4) and should not have address-taken.
// There should exist only one such unconditional
// branch among the predecessors.
- if (UnCondBlock || !PP || (Preds.count(PP) == 0) ||
+ if (UnCondBlock || !PP || !Preds.contains(PP) ||
Pred->hasAddressTaken())
return false;
@@ -215,7 +215,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
// PS is the successor which is not BB. Check successors to identify
// the last conditional branch.
- if (Preds.count(PS) == 0) {
+ if (!Preds.contains(PS)) {
// Case 2.
LastCondBlock = Pred;
} else {
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 2696557a719f..326864803d7c 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -110,7 +110,7 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
return Res;
- for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
+ for (unsigned i : L.indexes()) {
AttributeSet LAS = L.getAttributes(i);
AttributeSet RAS = R.getAttributes(i);
AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
diff --git a/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index f782396be7b6..9bfc73e4ba6c 100644
--- a/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -105,8 +105,10 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
// value, not an aggregate), keep more specific information about
// stores.
if (GS.StoredType != GlobalStatus::Stored) {
- if (const GlobalVariable *GV =
- dyn_cast<GlobalVariable>(SI->getOperand(1))) {
+ const Value *Ptr = SI->getPointerOperand();
+ if (isa<ConstantExpr>(Ptr))
+ Ptr = Ptr->stripPointerCasts();
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
Value *StoredVal = SI->getOperand(0);
if (Constant *C = dyn_cast<Constant>(StoredVal)) {
@@ -125,9 +127,9 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
GS.StoredType = GlobalStatus::InitializerStored;
} else if (GS.StoredType < GlobalStatus::StoredOnce) {
GS.StoredType = GlobalStatus::StoredOnce;
- GS.StoredOnceValue = StoredVal;
+ GS.StoredOnceStore = SI;
} else if (GS.StoredType == GlobalStatus::StoredOnce &&
- GS.StoredOnceValue == StoredVal) {
+ GS.getStoredOnceValue() == StoredVal) {
// noop.
} else {
GS.StoredType = GlobalStatus::Stored;
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index a1e160d144dc..047bf5569ded 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -47,7 +47,7 @@ static void addVariantDeclaration(CallInst &CI, const ElementCount &VF,
// Add function declaration.
Type *RetTy = ToVectorTy(CI.getType(), VF);
SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI.arg_operands())
+ for (Value *ArgOperand : CI.args())
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
assert(!CI.getFunctionType()->isVarArg() &&
"VarArg functions are not supported.");
@@ -94,8 +94,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
const std::string TLIName =
std::string(TLI.getVectorizedFunction(ScalarName, VF));
if (!TLIName.empty()) {
- std::string MangledName = VFABI::mangleTLIVectorName(
- TLIName, ScalarName, CI.getNumArgOperands(), VF);
+ std::string MangledName =
+ VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
if (!OriginalSetOfMappings.count(MangledName)) {
Mappings.push_back(MangledName);
++NumCallInjected;
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 792aa8208f27..f4776589910f 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -539,12 +539,10 @@ static Value *getUnwindDestToken(Instruction *EHPad,
static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
BasicBlock *BB, BasicBlock *UnwindEdge,
UnwindDestMemoTy *FuncletUnwindMap = nullptr) {
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *I = &*BBI++;
-
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
// We only need to check for function calls: inlined invoke
// instructions require no special handling.
- CallInst *CI = dyn_cast<CallInst>(I);
+ CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI || CI->doesNotThrow())
continue;
@@ -830,6 +828,7 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
}
}
+namespace {
/// Utility for cloning !noalias and !alias.scope metadata. When a code region
/// using scoped alias metadata is inlined, the aliasing relationships may not
/// hold between the two version. It is necessary to create a deep clone of the
@@ -851,6 +850,7 @@ public:
/// metadata.
void remap(Function::iterator FStart, Function::iterator FEnd);
};
+} // namespace
ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
const Function *F) {
@@ -1179,14 +1179,8 @@ static bool MayContainThrowingOrExitingCall(Instruction *Begin,
assert(Begin->getParent() == End->getParent() &&
"Expected to be in same basic block!");
- unsigned NumInstChecked = 0;
- // Check that all instructions in the range [Begin, End) are guaranteed to
- // transfer execution to successor.
- for (auto &I : make_range(Begin->getIterator(), End->getIterator()))
- if (NumInstChecked++ > InlinerAttributeWindow ||
- !isGuaranteedToTransferExecutionToSuccessor(&I))
- return true;
- return false;
+ return !llvm::isGuaranteedToTransferExecutionToSuccessor(
+ Begin->getIterator(), End->getIterator(), InlinerAttributeWindow + 1);
}
static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
@@ -1259,8 +1253,7 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
// existing attribute value (i.e. attributes such as dereferenceable,
// dereferenceable_or_null etc). See AttrBuilder::merge for more details.
AttributeList AL = NewRetVal->getAttributes();
- AttributeList NewAL =
- AL.addAttributes(Context, AttributeList::ReturnIndex, Valid);
+ AttributeList NewAL = AL.addRetAttributes(Context, Valid);
NewRetVal->setAttributes(NewAL);
}
}
@@ -1376,13 +1369,13 @@ static void UpdateCallGraphAfterInlining(CallBase &CB,
CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB));
}
-static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
- BasicBlock *InsertBlock,
+static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
+ Module *M, BasicBlock *InsertBlock,
InlineFunctionInfo &IFI) {
- Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
- Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
+ Value *Size =
+ Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
@@ -1393,13 +1386,13 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
/// When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
-static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+static Value *HandleByValArgument(Type *ByValType, Value *Arg,
+ Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
unsigned ByValAlignment) {
- PointerType *ArgTy = cast<PointerType>(Arg->getType());
- Type *AggTy = ArgTy->getElementType();
-
+ assert(cast<PointerType>(Arg->getType())
+ ->isOpaqueOrPointeeTypeMatches(ByValType));
Function *Caller = TheCall->getFunction();
const DataLayout &DL = Caller->getParent()->getDataLayout();
@@ -1427,7 +1420,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
}
// Create the alloca. If we have DataLayout, use nice alignment.
- Align Alignment(DL.getPrefTypeAlignment(AggTy));
+ Align Alignment(DL.getPrefTypeAlignment(ByValType));
// If the byval had an alignment specified, we *must* use at least that
// alignment, as it is required by the byval argument (and uses of the
@@ -1435,7 +1428,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Alignment = max(Alignment, MaybeAlign(ByValAlignment));
Value *NewAlloca =
- new AllocaInst(AggTy, DL.getAllocaAddrSpace(), nullptr, Alignment,
+ new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment,
Arg->getName(), &*Caller->begin()->begin());
IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
@@ -1607,8 +1600,7 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
const ProfileCount &CalleeEntryCount,
const CallBase &TheCall, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *CallerBFI) {
- if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() ||
- CalleeEntryCount.getCount() < 1)
+ if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)
return;
auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;
int64_t CallCount =
@@ -1617,40 +1609,39 @@ static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,
}
void llvm::updateProfileCallee(
- Function *Callee, int64_t entryDelta,
+ Function *Callee, int64_t EntryDelta,
const ValueMap<const Value *, WeakTrackingVH> *VMap) {
auto CalleeCount = Callee->getEntryCount();
if (!CalleeCount.hasValue())
return;
- uint64_t priorEntryCount = CalleeCount.getCount();
- uint64_t newEntryCount;
+ const uint64_t PriorEntryCount = CalleeCount->getCount();
// Since CallSiteCount is an estimate, it could exceed the original callee
// count and has to be set to 0 so guard against underflow.
- if (entryDelta < 0 && static_cast<uint64_t>(-entryDelta) > priorEntryCount)
- newEntryCount = 0;
- else
- newEntryCount = priorEntryCount + entryDelta;
+ const uint64_t NewEntryCount =
+ (EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)
+ ? 0
+ : PriorEntryCount + EntryDelta;
// During inlining ?
if (VMap) {
- uint64_t cloneEntryCount = priorEntryCount - newEntryCount;
+ uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
for (auto Entry : *VMap)
if (isa<CallInst>(Entry.first))
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
- CI->updateProfWeight(cloneEntryCount, priorEntryCount);
+ CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
}
- if (entryDelta) {
- Callee->setEntryCount(newEntryCount);
+ if (EntryDelta) {
+ Callee->setEntryCount(NewEntryCount);
for (BasicBlock &BB : *Callee)
// No need to update the callsite if it is pruned during inlining.
if (!VMap || VMap->count(&BB))
for (Instruction &I : BB)
if (CallInst *CI = dyn_cast<CallInst>(&I))
- CI->updateProfWeight(newEntryCount, priorEntryCount);
+ CI->updateProfWeight(NewEntryCount, PriorEntryCount);
}
}
@@ -1672,66 +1663,69 @@ void llvm::updateProfileCallee(
/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is
/// a retainRV call.
static void
-inlineRetainOrClaimRVCalls(CallBase &CB,
+inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
const SmallVectorImpl<ReturnInst *> &Returns) {
Module *Mod = CB.getModule();
- bool IsRetainRV = objcarc::hasAttachedCallOpBundle(&CB, true),
+ assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
+ bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
IsClaimRV = !IsRetainRV;
for (auto *RI : Returns) {
Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
- BasicBlock::reverse_iterator I = ++(RI->getIterator().getReverse());
- BasicBlock::reverse_iterator EI = RI->getParent()->rend();
bool InsertRetainCall = IsRetainRV;
IRBuilder<> Builder(RI->getContext());
// Walk backwards through the basic block looking for either a matching
// autoreleaseRV call or an unannotated call.
- for (; I != EI;) {
- auto CurI = I++;
-
+ auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()),
+ RI->getParent()->rend());
+ for (Instruction &I : llvm::make_early_inc_range(InstRange)) {
// Ignore casts.
- if (isa<CastInst>(*CurI))
+ if (isa<CastInst>(I))
continue;
- if (auto *II = dyn_cast<IntrinsicInst>(&*CurI)) {
- if (II->getIntrinsicID() == Intrinsic::objc_autoreleaseReturnValue &&
- II->hasNUses(0) &&
- objcarc::GetRCIdentityRoot(II->getOperand(0)) == RetOpnd) {
- // If we've found a matching authoreleaseRV call:
- // - If claimRV is attached to the call, insert a call to objc_release
- // and erase the autoreleaseRV call.
- // - If retainRV is attached to the call, just erase the autoreleaseRV
- // call.
- if (IsClaimRV) {
- Builder.SetInsertPoint(II);
- Function *IFn =
- Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
- Value *BC =
- Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
- Builder.CreateCall(IFn, BC, "");
- }
- II->eraseFromParent();
- InsertRetainCall = false;
- }
- } else if (auto *CI = dyn_cast<CallInst>(&*CurI)) {
- if (objcarc::GetRCIdentityRoot(CI) == RetOpnd &&
- !objcarc::hasAttachedCallOpBundle(CI)) {
- // If we've found an unannotated call that defines RetOpnd, add a
- // "clang.arc.attachedcall" operand bundle.
- Value *BundleArgs[] = {ConstantInt::get(
- Builder.getInt64Ty(),
- objcarc::getAttachedCallOperandBundleEnum(IsRetainRV))};
- OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
- auto *NewCall = CallBase::addOperandBundle(
- CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI);
- NewCall->copyMetadata(*CI);
- CI->replaceAllUsesWith(NewCall);
- CI->eraseFromParent();
- InsertRetainCall = false;
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||
+ !II->hasNUses(0) ||
+ objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd)
+ break;
+
+ // If we've found a matching authoreleaseRV call:
+ // - If claimRV is attached to the call, insert a call to objc_release
+ // and erase the autoreleaseRV call.
+ // - If retainRV is attached to the call, just erase the autoreleaseRV
+ // call.
+ if (IsClaimRV) {
+ Builder.SetInsertPoint(II);
+ Function *IFn =
+ Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);
+ Value *BC = Builder.CreateBitCast(RetOpnd, IFn->getArg(0)->getType());
+ Builder.CreateCall(IFn, BC, "");
}
+ II->eraseFromParent();
+ InsertRetainCall = false;
+ break;
}
+ auto *CI = dyn_cast<CallInst>(&I);
+
+ if (!CI)
+ break;
+
+ if (objcarc::GetRCIdentityRoot(CI) != RetOpnd ||
+ objcarc::hasAttachedCallOpBundle(CI))
+ break;
+
+ // If we've found an unannotated call that defines RetOpnd, add a
+ // "clang.arc.attachedcall" operand bundle.
+ Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)};
+ OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);
+ auto *NewCall = CallBase::addOperandBundle(
+ CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI);
+ NewCall->copyMetadata(*CI);
+ CI->replaceAllUsesWith(NewCall);
+ CI->eraseFromParent();
+ InsertRetainCall = false;
break;
}
@@ -1895,8 +1889,13 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
{ // Scope to destroy VMap after cloning.
ValueToValueMapTy VMap;
+ struct ByValInit {
+ Value *Dst;
+ Value *Src;
+ Type *Ty;
+ };
// Keep a list of pair (dst, src) to emit byval initializations.
- SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
+ SmallVector<ByValInit, 4> ByValInits;
// When inlining a function that contains noalias scope metadata,
// this metadata needs to be cloned so that the inlined blocks
@@ -1921,10 +1920,12 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// or readnone, because the copy would be unneeded: the callee doesn't
// modify the struct.
if (CB.isByValArgument(ArgNo)) {
- ActualArg = HandleByValArgument(ActualArg, &CB, CalledFunc, IFI,
+ ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg,
+ &CB, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo));
if (ActualArg != *AI)
- ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
+ ByValInits.push_back(
+ {ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});
}
VMap[&*I] = ActualArg;
@@ -1953,8 +1954,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
FirstNewBlock = LastBlock; ++FirstNewBlock;
// Insert retainRV/clainRV runtime calls.
- if (objcarc::hasAttachedCallOpBundle(&CB))
- inlineRetainOrClaimRVCalls(CB, Returns);
+ objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB);
+ if (RVCallKind != objcarc::ARCInstKind::None)
+ inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);
// Updated caller/callee profiles only when requested. For sample loader
// inlining, the context-sensitive inlinee profile doesn't need to be
@@ -1966,13 +1968,14 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,
CalledFunc->front());
- updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), CB,
- IFI.PSI, IFI.CallerBFI);
+ if (auto Profile = CalledFunc->getEntryCount())
+ updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI,
+ IFI.CallerBFI);
}
// Inject byval arguments initialization.
- for (std::pair<Value*, Value*> &Init : ByValInit)
- HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+ for (ByValInit &Init : ByValInits)
+ HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
&*FirstNewBlock, IFI);
Optional<OperandBundleUse> ParentDeopt =
@@ -2100,9 +2103,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
SmallVector<Value*,4> VarArgsToForward;
SmallVector<AttributeSet, 4> VarArgsAttrs;
for (unsigned i = CalledFunc->getFunctionType()->getNumParams();
- i < CB.getNumArgOperands(); i++) {
+ i < CB.arg_size(); i++) {
VarArgsToForward.push_back(CB.getArgOperand(i));
- VarArgsAttrs.push_back(CB.getAttributes().getParamAttributes(i));
+ VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i));
}
bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;
@@ -2117,8 +2120,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
++BB) {
- for (auto II = BB->begin(); II != BB->end();) {
- Instruction &I = *II++;
+ for (Instruction &I : llvm::make_early_inc_range(*BB)) {
CallInst *CI = dyn_cast<CallInst>(&I);
if (!CI)
continue;
@@ -2135,15 +2137,15 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {
for (unsigned ArgNo = 0;
ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)
- ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
+ ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
}
// Add VarArg attributes.
ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());
- Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(),
- Attrs.getRetAttributes(), ArgAttrs);
+ Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(),
+ Attrs.getRetAttrs(), ArgAttrs);
// Add VarArgs to existing parameters.
- SmallVector<Value *, 6> Params(CI->arg_operands());
+ SmallVector<Value *, 6> Params(CI->args());
Params.append(VarArgsToForward.begin(), VarArgsToForward.end());
CallInst *NewCI = CallInst::Create(
CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI);
@@ -2295,8 +2297,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
BB != E; ++BB) {
// Add bundle operands to any top-level call sites.
SmallVector<OperandBundleDef, 1> OpBundles;
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
- CallBase *I = dyn_cast<CallBase>(&*BBI++);
+ for (Instruction &II : llvm::make_early_inc_range(*BB)) {
+ CallBase *I = dyn_cast<CallBase>(&II);
if (!I)
continue;
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 277fd903e9aa..668626fef933 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -309,7 +309,7 @@ static void computeBlocksDominatingExits(
// worklist, unless we visited it already.
BasicBlock *IDomBB = DT.getNode(BB)->getIDom()->getBlock();
- // Exit blocks can have an immediate dominator not beloinging to the
+ // Exit blocks can have an immediate dominator not belonging to the
// loop. For an exit block to be immediately dominated by another block
// outside the loop, it implies not all paths from that dominator, to the
// exit block, go through the loop.
diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 7e5832148bc0..6958a89f5be6 100644
--- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -304,7 +304,7 @@ void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func))
return;
- if (CI.getNumArgOperands() == 0)
+ if (CI.arg_empty())
return;
// TODO: Handle long double in other formats.
Type *ArgType = CI.getArgOperand(0)->getType();
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index d03d76f57ca1..74ab37fadf36 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1413,8 +1413,6 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
if (auto *AI =
dyn_cast_or_null<AllocaInst>(DII->getVariableLocationOp(0))) {
if (Optional<TypeSize> FragmentSize = AI->getAllocationSizeInBits(DL)) {
- assert(ValueSize.isScalable() == FragmentSize->isScalable() &&
- "Both sizes should agree on the scalable flag.");
return TypeSize::isKnownGE(ValueSize, *FragmentSize);
}
}
@@ -1733,9 +1731,11 @@ void llvm::salvageDebugInfo(Instruction &I) {
void llvm::salvageDebugInfoForDbgValues(
Instruction &I, ArrayRef<DbgVariableIntrinsic *> DbgUsers) {
- // This is an arbitrary chosen limit on the maximum number of values we can
- // salvage up to in a DIArgList, used for performance reasons.
+ // These are arbitrary chosen limits on the maximum number of values and the
+ // maximum size of a debug expression we can salvage up to, used for
+ // performance reasons.
const unsigned MaxDebugArgs = 16;
+ const unsigned MaxExpressionSize = 128;
bool Salvaged = false;
for (auto *DII : DbgUsers) {
@@ -1752,23 +1752,30 @@ void llvm::salvageDebugInfoForDbgValues(
// must be updated in the DIExpression and potentially have additional
// values added; thus we call salvageDebugInfoImpl for each `I` instance in
// DIILocation.
+ Value *Op0 = nullptr;
DIExpression *SalvagedExpr = DII->getExpression();
auto LocItr = find(DIILocation, &I);
while (SalvagedExpr && LocItr != DIILocation.end()) {
+ SmallVector<uint64_t, 16> Ops;
unsigned LocNo = std::distance(DIILocation.begin(), LocItr);
- SalvagedExpr = salvageDebugInfoImpl(I, SalvagedExpr, StackValue, LocNo,
- AdditionalValues);
+ uint64_t CurrentLocOps = SalvagedExpr->getNumLocationOperands();
+ Op0 = salvageDebugInfoImpl(I, CurrentLocOps, Ops, AdditionalValues);
+ if (!Op0)
+ break;
+ SalvagedExpr =
+ DIExpression::appendOpsToArg(SalvagedExpr, Ops, LocNo, StackValue);
LocItr = std::find(++LocItr, DIILocation.end(), &I);
}
// salvageDebugInfoImpl should fail on examining the first element of
// DbgUsers, or none of them.
- if (!SalvagedExpr)
+ if (!Op0)
break;
- DII->replaceVariableLocationOp(&I, I.getOperand(0));
- if (AdditionalValues.empty()) {
+ DII->replaceVariableLocationOp(&I, Op0);
+ bool IsValidSalvageExpr = SalvagedExpr->getNumElements() <= MaxExpressionSize;
+ if (AdditionalValues.empty() && IsValidSalvageExpr) {
DII->setExpression(SalvagedExpr);
- } else if (isa<DbgValueInst>(DII) &&
+ } else if (isa<DbgValueInst>(DII) && IsValidSalvageExpr &&
DII->getNumVariableLocationOps() + AdditionalValues.size() <=
MaxDebugArgs) {
DII->addVariableLocationOps(AdditionalValues, SalvagedExpr);
@@ -1793,16 +1800,16 @@ void llvm::salvageDebugInfoForDbgValues(
}
}
-bool getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
- uint64_t CurrentLocOps,
- SmallVectorImpl<uint64_t> &Opcodes,
- SmallVectorImpl<Value *> &AdditionalValues) {
+Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
+ uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
unsigned BitWidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace());
// Rewrite a GEP into a DIExpression.
MapVector<Value *, APInt> VariableOffsets;
APInt ConstantOffset(BitWidth, 0);
if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset))
- return false;
+ return nullptr;
if (!VariableOffsets.empty() && !CurrentLocOps) {
Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0});
CurrentLocOps = 1;
@@ -1816,7 +1823,7 @@ bool getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
dwarf::DW_OP_plus});
}
DIExpression::appendOffset(Opcodes, ConstantOffset.getSExtValue());
- return true;
+ return GEP->getOperand(0);
}
uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) {
@@ -1849,14 +1856,14 @@ uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) {
}
}
-bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
- SmallVectorImpl<uint64_t> &Opcodes,
- SmallVectorImpl<Value *> &AdditionalValues) {
+Value *getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
// Handle binary operations with constant integer operands as a special case.
auto *ConstInt = dyn_cast<ConstantInt>(BI->getOperand(1));
// Values wider than 64 bits cannot be represented within a DIExpression.
if (ConstInt && ConstInt->getBitWidth() > 64)
- return false;
+ return nullptr;
Instruction::BinaryOps BinOpcode = BI->getOpcode();
// Push any Constant Int operand onto the expression stack.
@@ -1867,7 +1874,7 @@ bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
if (BinOpcode == Instruction::Add || BinOpcode == Instruction::Sub) {
uint64_t Offset = BinOpcode == Instruction::Add ? Val : -int64_t(Val);
DIExpression::appendOffset(Opcodes, Offset);
- return true;
+ return BI->getOperand(0);
}
Opcodes.append({dwarf::DW_OP_constu, Val});
} else {
@@ -1883,62 +1890,51 @@ bool getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
// representation in a DIExpression.
uint64_t DwarfBinOp = getDwarfOpForBinOp(BinOpcode);
if (!DwarfBinOp)
- return false;
+ return nullptr;
Opcodes.push_back(DwarfBinOp);
-
- return true;
+ return BI->getOperand(0);
}
-DIExpression *
-llvm::salvageDebugInfoImpl(Instruction &I, DIExpression *SrcDIExpr,
- bool WithStackValue, unsigned LocNo,
- SmallVectorImpl<Value *> &AdditionalValues) {
- uint64_t CurrentLocOps = SrcDIExpr->getNumLocationOperands();
+Value *llvm::salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Ops,
+ SmallVectorImpl<Value *> &AdditionalValues) {
auto &M = *I.getModule();
auto &DL = M.getDataLayout();
- // Apply a vector of opcodes to the source DIExpression.
- auto doSalvage = [&](SmallVectorImpl<uint64_t> &Ops) -> DIExpression * {
- DIExpression *DIExpr = SrcDIExpr;
- if (!Ops.empty()) {
- DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, LocNo, WithStackValue);
- }
- return DIExpr;
- };
-
- // initializer-list helper for applying operators to the source DIExpression.
- auto applyOps = [&](ArrayRef<uint64_t> Opcodes) {
- SmallVector<uint64_t, 8> Ops(Opcodes.begin(), Opcodes.end());
- return doSalvage(Ops);
- };
-
if (auto *CI = dyn_cast<CastInst>(&I)) {
+ Value *FromValue = CI->getOperand(0);
// No-op casts are irrelevant for debug info.
- if (CI->isNoopCast(DL))
- return SrcDIExpr;
+ if (CI->isNoopCast(DL)) {
+ return FromValue;
+ }
Type *Type = CI->getType();
+ if (Type->isPointerTy())
+ Type = DL.getIntPtrType(Type);
// Casts other than Trunc, SExt, or ZExt to scalar types cannot be salvaged.
if (Type->isVectorTy() ||
- !(isa<TruncInst>(&I) || isa<SExtInst>(&I) || isa<ZExtInst>(&I)))
+ !(isa<TruncInst>(&I) || isa<SExtInst>(&I) || isa<ZExtInst>(&I) ||
+ isa<IntToPtrInst>(&I) || isa<PtrToIntInst>(&I)))
return nullptr;
- Value *FromValue = CI->getOperand(0);
- unsigned FromTypeBitSize = FromValue->getType()->getScalarSizeInBits();
+ llvm::Type *FromType = FromValue->getType();
+ if (FromType->isPointerTy())
+ FromType = DL.getIntPtrType(FromType);
+
+ unsigned FromTypeBitSize = FromType->getScalarSizeInBits();
unsigned ToTypeBitSize = Type->getScalarSizeInBits();
- return applyOps(DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize,
- isa<SExtInst>(&I)));
+ auto ExtOps = DIExpression::getExtOps(FromTypeBitSize, ToTypeBitSize,
+ isa<SExtInst>(&I));
+ Ops.append(ExtOps.begin(), ExtOps.end());
+ return FromValue;
}
- SmallVector<uint64_t, 8> Ops;
- if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- if (getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues))
- return doSalvage(Ops);
- } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) {
- if (getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues))
- return doSalvage(Ops);
- }
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(&I))
+ return getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues);
+ if (auto *BI = dyn_cast<BinaryOperator>(&I))
+ return getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues);
+
// *Not* to do: we should not attempt to salvage load instructions,
// because the validity and lifetime of a dbg.value containing
// DW_OP_deref becomes difficult to analyze. See PR40628 for examples.
@@ -2194,6 +2190,26 @@ void llvm::changeToCall(InvokeInst *II, DomTreeUpdater *DTU) {
DTU->applyUpdates({{DominatorTree::Delete, BB, UnwindDestBB}});
}
+void llvm::createUnreachableSwitchDefault(SwitchInst *Switch,
+ DomTreeUpdater *DTU) {
+ LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+ auto *BB = Switch->getParent();
+ auto *OrigDefaultBlock = Switch->getDefaultDest();
+ OrigDefaultBlock->removePredecessor(BB);
+ BasicBlock *NewDefaultBlock = BasicBlock::Create(
+ BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
+ OrigDefaultBlock);
+ new UnreachableInst(Switch->getContext(), NewDefaultBlock);
+ Switch->setDefaultDest(&*NewDefaultBlock);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 2> Updates;
+ Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
+ if (!is_contained(successors(BB), OrigDefaultBlock))
+ Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
+ DTU->applyUpdates(Updates);
+ }
+}
+
BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
BasicBlock *UnwindEdge,
DomTreeUpdater *DTU) {
@@ -2669,9 +2685,7 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,
assert(From->getType() == To->getType());
unsigned Count = 0;
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(From->uses())) {
if (!Dominates(Root, U))
continue;
U.set(To);
@@ -2687,9 +2701,7 @@ unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) {
auto *BB = From->getParent();
unsigned Count = 0;
- for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
- UI != UE;) {
- Use &U = *UI++;
+ for (Use &U : llvm::make_early_inc_range(From->uses())) {
auto *I = cast<Instruction>(U.getUser());
if (I->getParent() == BB)
continue;
@@ -3171,7 +3183,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
// only byteswap values with an even number of bytes.
- APInt DemandedMask = APInt::getAllOnesValue(DemandedBW);
+ APInt DemandedMask = APInt::getAllOnes(DemandedBW);
bool OKForBSwap = MatchBSwaps && (DemandedBW % 16) == 0;
bool OKForBitReverse = MatchBitReversals;
for (unsigned BitIdx = 0;
@@ -3208,7 +3220,7 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
Instruction *Result = CallInst::Create(F, Provider, "rev", I);
InsertedInsts.push_back(Result);
- if (!DemandedMask.isAllOnesValue()) {
+ if (!DemandedMask.isAllOnes()) {
auto *Mask = ConstantInt::get(DemandedTy, DemandedMask);
Result = BinaryOperator::Create(Instruction::And, Result, Mask, "mask", I);
InsertedInsts.push_back(Result);
@@ -3235,7 +3247,7 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
if (F && !F->hasLocalLinkage() && F->hasName() &&
TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
!F->doesNotAccessMemory())
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin);
+ CI->addFnAttr(Attribute::NoBuiltin);
}
bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
@@ -3263,7 +3275,7 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {
if (CB.isBundleOperand(OpIdx))
return false;
- if (OpIdx < CB.getNumArgOperands()) {
+ if (OpIdx < CB.arg_size()) {
// Some variadic intrinsics require constants in the variadic arguments,
// which currently aren't markable as immarg.
if (isa<IntrinsicInst>(CB) &&
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index cd1f6f0c78a5..f3cf42be8ba1 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -73,57 +74,39 @@ static cl::opt<unsigned> UnrollForcePeelCount(
"unroll-force-peel-count", cl::init(0), cl::Hidden,
cl::desc("Force a peel count regardless of profiling information."));
-static cl::opt<bool> UnrollPeelMultiDeoptExit(
- "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden,
- cl::desc("Allow peeling of loops with multiple deopt exits."));
-
static const char *PeeledCountMetaData = "llvm.loop.peeled.count";
-// Designates that a Phi is estimated to become invariant after an "infinite"
-// number of loop iterations (i.e. only may become an invariant if the loop is
-// fully unrolled).
-static const unsigned InfiniteIterationsToInvariance =
- std::numeric_limits<unsigned>::max();
-
// Check whether we are capable of peeling this loop.
bool llvm::canPeel(Loop *L) {
// Make sure the loop is in simplified form
if (!L->isLoopSimplifyForm())
return false;
- if (UnrollPeelMultiDeoptExit) {
- SmallVector<BasicBlock *, 4> Exits;
- L->getUniqueNonLatchExitBlocks(Exits);
-
- if (!Exits.empty()) {
- // Latch's terminator is a conditional branch, Latch is exiting and
- // all non Latch exits ends up with deoptimize.
- const BasicBlock *Latch = L->getLoopLatch();
- const BranchInst *T = dyn_cast<BranchInst>(Latch->getTerminator());
- return T && T->isConditional() && L->isLoopExiting(Latch) &&
- all_of(Exits, [](const BasicBlock *BB) {
- return BB->getTerminatingDeoptimizeCall();
- });
- }
- }
-
- // Only peel loops that contain a single exit
- if (!L->getExitingBlock() || !L->getUniqueExitBlock())
- return false;
-
// Don't try to peel loops where the latch is not the exiting block.
// This can be an indication of two different things:
// 1) The loop is not rotated.
// 2) The loop contains irreducible control flow that involves the latch.
const BasicBlock *Latch = L->getLoopLatch();
- if (Latch != L->getExitingBlock())
+ if (!L->isLoopExiting(Latch))
return false;
// Peeling is only supported if the latch is a branch.
if (!isa<BranchInst>(Latch->getTerminator()))
return false;
- return true;
+ SmallVector<BasicBlock *, 4> Exits;
+ L->getUniqueNonLatchExitBlocks(Exits);
+ // The latch must either be the only exiting block or all non-latch exit
+ // blocks have either a deopt or unreachable terminator or compose a chain of
+ // blocks where the last one is either deopt or unreachable terminated. Both
+ // deopt and unreachable terminators are a strong indication they are not
+ // taken. Note that this is a profitability check, not a legality check. Also
+ // note that LoopPeeling currently can only update the branch weights of latch
+ // blocks and branch weights to blocks with deopt or unreachable do not need
+ // updating.
+ return all_of(Exits, [](const BasicBlock *BB) {
+ return IsBlockFollowedByDeoptOrUnreachable(BB);
+ });
}
// This function calculates the number of iterations after which the given Phi
@@ -139,9 +122,9 @@ bool llvm::canPeel(Loop *L) {
// %x = phi(0, %a), <-- becomes invariant starting from 3rd iteration.
// %y = phi(0, 5),
// %a = %y + 1.
-static unsigned calculateIterationsToInvariance(
+static Optional<unsigned> calculateIterationsToInvariance(
PHINode *Phi, Loop *L, BasicBlock *BackEdge,
- SmallDenseMap<PHINode *, unsigned> &IterationsToInvariance) {
+ SmallDenseMap<PHINode *, Optional<unsigned> > &IterationsToInvariance) {
assert(Phi->getParent() == L->getHeader() &&
"Non-loop Phi should not be checked for turning into invariant.");
assert(BackEdge == L->getLoopLatch() && "Wrong latch?");
@@ -154,29 +137,90 @@ static unsigned calculateIterationsToInvariance(
Value *Input = Phi->getIncomingValueForBlock(BackEdge);
// Place infinity to map to avoid infinite recursion for cycled Phis. Such
// cycles can never stop on an invariant.
- IterationsToInvariance[Phi] = InfiniteIterationsToInvariance;
- unsigned ToInvariance = InfiniteIterationsToInvariance;
+ IterationsToInvariance[Phi] = None;
+ Optional<unsigned> ToInvariance = None;
if (L->isLoopInvariant(Input))
ToInvariance = 1u;
else if (PHINode *IncPhi = dyn_cast<PHINode>(Input)) {
// Only consider Phis in header block.
if (IncPhi->getParent() != L->getHeader())
- return InfiniteIterationsToInvariance;
+ return None;
// If the input becomes an invariant after X iterations, then our Phi
// becomes an invariant after X + 1 iterations.
- unsigned InputToInvariance = calculateIterationsToInvariance(
+ auto InputToInvariance = calculateIterationsToInvariance(
IncPhi, L, BackEdge, IterationsToInvariance);
- if (InputToInvariance != InfiniteIterationsToInvariance)
- ToInvariance = InputToInvariance + 1u;
+ if (InputToInvariance)
+ ToInvariance = *InputToInvariance + 1u;
}
// If we found that this Phi lies in an invariant chain, update the map.
- if (ToInvariance != InfiniteIterationsToInvariance)
+ if (ToInvariance)
IterationsToInvariance[Phi] = ToInvariance;
return ToInvariance;
}
+// Try to find any invariant memory reads that will become dereferenceable in
+// the remainder loop after peeling. The load must also be used (transitively)
+// by an exit condition. Returns the number of iterations to peel off (at the
+// moment either 0 or 1).
+static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
+ DominatorTree &DT) {
+ // Skip loops with a single exiting block, because there should be no benefit
+ // for the heuristic below.
+ if (L.getExitingBlock())
+ return 0;
+
+ // All non-latch exit blocks must have an UnreachableInst terminator.
+ // Otherwise the heuristic below may not be profitable.
+ SmallVector<BasicBlock *, 4> Exits;
+ L.getUniqueNonLatchExitBlocks(Exits);
+ if (any_of(Exits, [](const BasicBlock *BB) {
+ return !isa<UnreachableInst>(BB->getTerminator());
+ }))
+ return 0;
+
+ // Now look for invariant loads that dominate the latch and are not known to
+ // be dereferenceable. If there are such loads and no writes, they will become
+ // dereferenceable in the loop if the first iteration is peeled off. Also
+ // collect the set of instructions controlled by such loads. Only peel if an
+ // exit condition uses (transitively) such a load.
+ BasicBlock *Header = L.getHeader();
+ BasicBlock *Latch = L.getLoopLatch();
+ SmallPtrSet<Value *, 8> LoadUsers;
+ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ for (BasicBlock *BB : L.blocks()) {
+ for (Instruction &I : *BB) {
+ if (I.mayWriteToMemory())
+ return 0;
+
+ auto Iter = LoadUsers.find(&I);
+ if (Iter != LoadUsers.end()) {
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ // Do not look for reads in the header; they can already be hoisted
+ // without peeling.
+ if (BB == Header)
+ continue;
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ Value *Ptr = LI->getPointerOperand();
+ if (DT.dominates(BB, Latch) && L.isLoopInvariant(Ptr) &&
+ !isDereferenceablePointer(Ptr, LI->getType(), DL, LI, &DT))
+ for (Value *U : I.users())
+ LoadUsers.insert(U);
+ }
+ }
+ }
+ SmallVector<BasicBlock *> ExitingBlocks;
+ L.getExitingBlocks(ExitingBlocks);
+ if (any_of(ExitingBlocks, [&LoadUsers](BasicBlock *Exiting) {
+ return LoadUsers.contains(Exiting->getTerminator());
+ }))
+ return 1;
+ return 0;
+}
+
// Return the number of iterations to peel off that make conditions in the
// body true/false. For example, if we peel 2 iterations off the loop below,
// the condition i < 2 can be evaluated at compile time.
@@ -292,8 +336,8 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Return the number of iterations we want to peel off.
void llvm::computePeelCount(Loop *L, unsigned LoopSize,
TargetTransformInfo::PeelingPreferences &PP,
- unsigned &TripCount, ScalarEvolution &SE,
- unsigned Threshold) {
+ unsigned &TripCount, DominatorTree &DT,
+ ScalarEvolution &SE, unsigned Threshold) {
assert(LoopSize > 0 && "Zero loop size is not allowed!");
// Save the PP.PeelCount value set by the target in
// TTI.getPeelingPreferences or by the flag -unroll-peel-count.
@@ -337,7 +381,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
// First, check that we can peel at least one iteration.
if (2 * LoopSize <= Threshold && UnrollPeelMaxCount > 0) {
// Store the pre-calculated values here.
- SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;
+ SmallDenseMap<PHINode *, Optional<unsigned> > IterationsToInvariance;
// Now go through all Phis to calculate their the number of iterations they
// need to become invariants.
// Start the max computation with the UP.PeelCount value set by the target
@@ -347,10 +391,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
assert(BackEdge && "Loop is not in simplified form?");
for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) {
PHINode *Phi = cast<PHINode>(&*BI);
- unsigned ToInvariance = calculateIterationsToInvariance(
+ auto ToInvariance = calculateIterationsToInvariance(
Phi, L, BackEdge, IterationsToInvariance);
- if (ToInvariance != InfiniteIterationsToInvariance)
- DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);
+ if (ToInvariance)
+ DesiredPeelCount = std::max(DesiredPeelCount, *ToInvariance);
}
// Pay respect to limitations implied by loop size and the max peel count.
@@ -360,6 +404,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
DesiredPeelCount = std::max(DesiredPeelCount,
countToEliminateCompares(*L, MaxPeelCount, SE));
+ if (DesiredPeelCount == 0)
+ DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT);
+
if (DesiredPeelCount > 0) {
DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);
// Consider max peel count limitation.
@@ -679,34 +726,27 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
L->getExitEdges(ExitEdges);
- DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
+ // Remember dominators of blocks we might reach through exits to change them
+ // later. Immediate dominator of such block might change, because we add more
+ // routes which can lead to the exit: we can reach it from the peeled
+ // iterations too.
+ DenseMap<BasicBlock *, BasicBlock *> NonLoopBlocksIDom;
if (DT) {
- // We'd like to determine the idom of exit block after peeling one
- // iteration.
- // Let Exit is exit block.
- // Let ExitingSet - is a set of predecessors of Exit block. They are exiting
- // blocks.
- // Let Latch' and ExitingSet' are copies after a peeling.
- // We'd like to find an idom'(Exit) - idom of Exit after peeling.
- // It is an evident that idom'(Exit) will be the nearest common dominator
- // of ExitingSet and ExitingSet'.
- // idom(Exit) is a nearest common dominator of ExitingSet.
- // idom(Exit)' is a nearest common dominator of ExitingSet'.
- // Taking into account that we have a single Latch, Latch' will dominate
- // Header and idom(Exit).
- // So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
- // All these basic blocks are in the same loop, so what we find is
- // (nearest common dominator of idom(Exit) and Latch)'.
- // In the loop below we remember nearest common dominator of idom(Exit) and
- // Latch to update idom of Exit later.
- assert(L->hasDedicatedExits() && "No dedicated exits?");
- for (auto Edge : ExitEdges) {
- if (ExitIDom.count(Edge.second))
- continue;
- BasicBlock *BB = DT->findNearestCommonDominator(
- DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
- assert(L->contains(BB) && "IDom is not in a loop");
- ExitIDom[Edge.second] = BB;
+ for (auto *BB : L->blocks()) {
+ auto *BBDomNode = DT->getNode(BB);
+ SmallVector<BasicBlock *, 16> ChildrenToUpdate;
+ for (auto *ChildDomNode : BBDomNode->children()) {
+ auto *ChildBB = ChildDomNode->getBlock();
+ if (!L->contains(ChildBB))
+ ChildrenToUpdate.push_back(ChildBB);
+ }
+ // The new idom of the block will be the nearest common dominator
+ // of all copies of the previous idom. This is equivalent to the
+ // nearest common dominator of the previous idom and the first latch,
+ // which dominates all copies of the previous idom.
+ BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latch);
+ for (auto *ChildBB : ChildrenToUpdate)
+ NonLoopBlocksIDom[ChildBB] = NewIDom;
}
}
@@ -795,13 +835,11 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
remapInstructionsInBlocks(NewBlocks, VMap);
if (DT) {
- // Latches of the cloned loops dominate over the loop exit, so idom of the
- // latter is the first cloned loop body, as original PreHeader dominates
- // the original loop body.
+ // Update IDoms of the blocks reachable through exits.
if (Iter == 0)
- for (auto Exit : ExitIDom)
- DT->changeImmediateDominator(Exit.first,
- cast<BasicBlock>(LVMap[Exit.second]));
+ for (auto BBIDom : NonLoopBlocksIDom)
+ DT->changeImmediateDominator(BBIDom.first,
+ cast<BasicBlock>(LVMap[BBIDom.second]));
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
#endif
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index ff7905bed91d..c66fd7bb0588 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -103,6 +103,7 @@ static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
BasicBlock *OrigPreheader,
ValueToValueMapTy &ValueMap,
+ ScalarEvolution *SE,
SmallVectorImpl<PHINode*> *InsertedPHIs) {
// Remove PHI node entries that are no longer live.
BasicBlock::iterator I, E = OrigHeader->end();
@@ -125,19 +126,15 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
// The value now exits in two versions: the initial value in the preheader
// and the loop "next" value in the original header.
SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+ // Force re-computation of OrigHeaderVal, as some users now need to use the
+ // new PHI node.
+ if (SE)
+ SE->forgetValue(OrigHeaderVal);
SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
// Visit each use of the OrigHeader instruction.
- for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
- UE = OrigHeaderVal->use_end();
- UI != UE;) {
- // Grab the use before incrementing the iterator.
- Use &U = *UI;
-
- // Increment the iterator before removing the use from the list.
- ++UI;
-
+ for (Use &U : llvm::make_early_inc_range(OrigHeaderVal->uses())) {
// SSAUpdater can't handle a non-PHI use in the same block as an
// earlier def. We can easily handle those cases manually.
Instruction *UserInst = cast<Instruction>(U.getUser());
@@ -399,9 +396,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
D->getExpression()};
};
SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
- for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
- I != E; ++I) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
+ for (Instruction &I : llvm::drop_begin(llvm::reverse(*OrigPreheader))) {
+ if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I))
DbgIntrinsics.insert(makeHash(DII));
else
break;
@@ -563,7 +559,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
SmallVector<PHINode*, 2> InsertedPHIs;
// If there were any uses of instructions in the duplicated block outside the
// loop, update them, inserting PHI nodes as required
- RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
+ RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, SE,
&InsertedPHIs);
// Attach dbg.value intrinsics to the new phis if that phi uses a value that
@@ -621,7 +617,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// one predecessor. Note that Exit could be an exit block for multiple
// nested loops, causing both of the edges to now be critical and need to
// be split.
- SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
+ SmallVector<BasicBlock *, 4> ExitPreds(predecessors(Exit));
bool SplitLatchEdge = false;
for (BasicBlock *ExitPred : ExitPreds) {
// We only need to split loop exit edges.
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index d2fd32c98d73..d14c006c8032 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -779,8 +779,7 @@ namespace {
AU.addPreserved<DependenceAnalysisWrapperPass>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
AU.addPreserved<BranchProbabilityInfoWrapperPass>();
- if (EnableMSSALoopDependency)
- AU.addPreserved<MemorySSAWrapperPass>();
+ AU.addPreserved<MemorySSAWrapperPass>();
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -814,12 +813,10 @@ bool LoopSimplify::runOnFunction(Function &F) {
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
MemorySSA *MSSA = nullptr;
std::unique_ptr<MemorySSAUpdater> MSSAU;
- if (EnableMSSALoopDependency) {
- auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- if (MSSAAnalysis) {
- MSSA = &MSSAAnalysis->getMSSA();
- MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- }
+ auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+ if (MSSAAnalysis) {
+ MSSA = &MSSAAnalysis->getMSSA();
+ MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index a91bf7b7af13..b0c622b98d5e 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -224,13 +224,12 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SmallVector<WeakTrackingVH, 16> DeadInsts;
for (BasicBlock *BB : L->getBlocks()) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *Inst = &*I++;
- if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))
- if (LI->replacementPreservesLCSSAForm(Inst, V))
- Inst->replaceAllUsesWith(V);
- if (isInstructionTriviallyDead(Inst))
- DeadInsts.emplace_back(Inst);
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(&Inst, V))
+ Inst.replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(&Inst))
+ DeadInsts.emplace_back(&Inst);
}
// We can't do recursive deletion until we're done iterating, as we might
// have a phi which (potentially indirectly) uses instructions later in
@@ -515,6 +514,10 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+ // We place the unrolled iterations immediately after the original loop
+ // latch. This is a reasonable default placement if we don't have block
+ // frequencies, and if we do, well the layout will be adjusted later.
+ auto BlockInsertPt = std::next(LatchBlock->getIterator());
for (unsigned It = 1; It != ULO.Count; ++It) {
SmallVector<BasicBlock *, 8> NewBlocks;
SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -523,7 +526,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
ValueToValueMapTy VMap;
BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
- Header->getParent()->getBasicBlockList().push_back(New);
+ Header->getParent()->getBasicBlockList().insert(BlockInsertPt, New);
assert((*BB != Header || LI->getLoopFor(*BB) == L) &&
"Header should not be in a sub-loop");
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 6749d3db743c..a92cb6a313d3 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -22,6 +22,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
@@ -35,6 +36,7 @@
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
@@ -167,8 +169,11 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
// Add the branch to the exit block (around the unrolled loop)
B.CreateCondBr(BrLoopExit, OriginalLoopLatchExit, NewPreHeader);
InsertPt->eraseFromParent();
- if (DT)
- DT->changeImmediateDominator(OriginalLoopLatchExit, PrologExit);
+ if (DT) {
+ auto *NewDom = DT->findNearestCommonDominator(OriginalLoopLatchExit,
+ PrologExit);
+ DT->changeImmediateDominator(OriginalLoopLatchExit, NewDom);
+ }
}
/// Connect the unrolling epilog code to the original loop.
@@ -215,7 +220,10 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// PN = PHI [I, Latch]
// ...
// Exit:
- // EpilogPN = PHI [PN, EpilogPreHeader]
+ // EpilogPN = PHI [PN, EpilogPreHeader], [X, Exit2], [Y, Exit2.epil]
+ //
+ // Exits from non-latch blocks point to the original exit block and the
+ // epilogue edges have already been added.
//
// There is EpilogPreHeader incoming block instead of NewExit as
// NewExit was spilt 1 more time to get EpilogPreHeader.
@@ -282,8 +290,10 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
// Add the branch to the exit block (around the unrolling loop)
B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit);
InsertPt->eraseFromParent();
- if (DT)
- DT->changeImmediateDominator(Exit, NewExit);
+ if (DT) {
+ auto *NewDom = DT->findNearestCommonDominator(Exit, NewExit);
+ DT->changeImmediateDominator(Exit, NewDom);
+ }
// Split the main loop exit to maintain canonicalization guarantees.
SmallVector<BasicBlock*, 4> NewExitPreds{Latch};
@@ -291,17 +301,15 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,
PreserveLCSSA);
}
-/// Create a clone of the blocks in a loop and connect them together.
-/// If CreateRemainderLoop is false, loop structure will not be cloned,
-/// otherwise a new loop will be created including all cloned blocks, and the
-/// iterator of it switches to count NewIter down to 0.
+/// Create a clone of the blocks in a loop and connect them together. A new
+/// loop will be created including all cloned blocks, and the iterator of the
+/// new loop switched to count NewIter down to 0.
/// The cloned blocks should be inserted between InsertTop and InsertBot.
-/// If loop structure is cloned InsertTop should be new preheader, InsertBot
-/// new loop exit.
-/// Return the new cloned loop that is created when CreateRemainderLoop is true.
+/// InsertTop should be new preheader, InsertBot new loop exit.
+/// Returns the new cloned loop that is created.
static Loop *
-CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
- const bool UseEpilogRemainder, const bool UnrollRemainder,
+CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder,
+ const bool UnrollRemainder,
BasicBlock *InsertTop,
BasicBlock *InsertBot, BasicBlock *Preheader,
std::vector<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
@@ -315,8 +323,6 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
Loop *ParentLoop = L->getParentLoop();
NewLoopsMap NewLoops;
NewLoops[ParentLoop] = ParentLoop;
- if (!CreateRemainderLoop)
- NewLoops[L] = ParentLoop;
// For each block in the original loop, create a new copy,
// and update the value map with the newly created values.
@@ -324,11 +330,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F);
NewBlocks.push_back(NewBB);
- // If we're unrolling the outermost loop, there's no remainder loop,
- // and this block isn't in a nested loop, then the new block is not
- // in any loop. Otherwise, add it to loopinfo.
- if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop)
- addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
+ addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops);
VMap[*BB] = NewBB;
if (Header == *BB) {
@@ -349,27 +351,24 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
}
if (Latch == *BB) {
- // For the last block, if CreateRemainderLoop is false, create a direct
- // jump to InsertBot. If not, create a loop back to cloned head.
+ // For the last block, create a loop back to cloned head.
VMap.erase((*BB)->getTerminator());
+ // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
+ // Subtle: NewIter can be 0 if we wrapped when computing the trip count,
+ // thus we must compare the post-increment (wrapping) value.
BasicBlock *FirstLoopBB = cast<BasicBlock>(VMap[Header]);
BranchInst *LatchBR = cast<BranchInst>(NewBB->getTerminator());
IRBuilder<> Builder(LatchBR);
- if (!CreateRemainderLoop) {
- Builder.CreateBr(InsertBot);
- } else {
- PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
- suffix + ".iter",
- FirstLoopBB->getFirstNonPHI());
- Value *IdxSub =
- Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
- NewIdx->getName() + ".sub");
- Value *IdxCmp =
- Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp");
- Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
- NewIdx->addIncoming(NewIter, InsertTop);
- NewIdx->addIncoming(IdxSub, NewBB);
- }
+ PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2,
+ suffix + ".iter",
+ FirstLoopBB->getFirstNonPHI());
+ auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+ auto *One = ConstantInt::get(NewIdx->getType(), 1);
+ Value *IdxNext = Builder.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ Value *IdxCmp = Builder.CreateICmpNE(IdxNext, NewIter, NewIdx->getName() + ".cmp");
+ Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot);
+ NewIdx->addIncoming(Zero, InsertTop);
+ NewIdx->addIncoming(IdxNext, NewBB);
LatchBR->eraseFromParent();
}
}
@@ -378,99 +377,45 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop,
// cloned loop.
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
- if (!CreateRemainderLoop) {
- if (UseEpilogRemainder) {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- NewPHI->removeIncomingValue(Latch, false);
- } else {
- VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
- cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
- }
- } else {
- unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
- NewPHI->setIncomingBlock(idx, InsertTop);
- BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
- idx = NewPHI->getBasicBlockIndex(Latch);
- Value *InVal = NewPHI->getIncomingValue(idx);
- NewPHI->setIncomingBlock(idx, NewLatch);
- if (Value *V = VMap.lookup(InVal))
- NewPHI->setIncomingValue(idx, V);
- }
- }
- if (CreateRemainderLoop) {
- Loop *NewLoop = NewLoops[L];
- assert(NewLoop && "L should have been cloned");
- MDNode *LoopID = NewLoop->getLoopID();
-
- // Only add loop metadata if the loop is not going to be completely
- // unrolled.
- if (UnrollRemainder)
- return NewLoop;
-
- Optional<MDNode *> NewLoopID = makeFollowupLoopID(
- LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
- if (NewLoopID.hasValue()) {
- NewLoop->setLoopID(NewLoopID.getValue());
-
- // Do not setLoopAlreadyUnrolled if loop attributes have been defined
- // explicitly.
- return NewLoop;
- }
-
- // Add unroll disable metadata to disable future unrolling for this loop.
- NewLoop->setLoopAlreadyUnrolled();
- return NewLoop;
+ unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
+ NewPHI->setIncomingBlock(idx, InsertTop);
+ BasicBlock *NewLatch = cast<BasicBlock>(VMap[Latch]);
+ idx = NewPHI->getBasicBlockIndex(Latch);
+ Value *InVal = NewPHI->getIncomingValue(idx);
+ NewPHI->setIncomingBlock(idx, NewLatch);
+ if (Value *V = VMap.lookup(InVal))
+ NewPHI->setIncomingValue(idx, V);
}
- else
- return nullptr;
-}
-/// Returns true if we can safely unroll a multi-exit/exiting loop. OtherExits
-/// is populated with all the loop exit blocks other than the LatchExit block.
-static bool canSafelyUnrollMultiExitLoop(Loop *L, BasicBlock *LatchExit,
- bool PreserveLCSSA,
- bool UseEpilogRemainder) {
+ Loop *NewLoop = NewLoops[L];
+ assert(NewLoop && "L should have been cloned");
+ MDNode *LoopID = NewLoop->getLoopID();
- // We currently have some correctness constrains in unrolling a multi-exit
- // loop. Check for these below.
+ // Only add loop metadata if the loop is not going to be completely
+ // unrolled.
+ if (UnrollRemainder)
+ return NewLoop;
- // We rely on LCSSA form being preserved when the exit blocks are transformed.
- if (!PreserveLCSSA)
- return false;
+ Optional<MDNode *> NewLoopID = makeFollowupLoopID(
+ LoopID, {LLVMLoopUnrollFollowupAll, LLVMLoopUnrollFollowupRemainder});
+ if (NewLoopID.hasValue()) {
+ NewLoop->setLoopID(NewLoopID.getValue());
- // TODO: Support multiple exiting blocks jumping to the `LatchExit` when
- // UnrollRuntimeMultiExit is true. This will need updating the logic in
- // connectEpilog/connectProlog.
- if (!LatchExit->getSinglePredecessor()) {
- LLVM_DEBUG(
- dbgs() << "Bailout for multi-exit handling when latch exit has >1 "
- "predecessor.\n");
- return false;
+ // Do not setLoopAlreadyUnrolled if loop attributes have been defined
+ // explicitly.
+ return NewLoop;
}
- // FIXME: We bail out of multi-exit unrolling when epilog loop is generated
- // and L is an inner loop. This is because in presence of multiple exits, the
- // outer loop is incorrect: we do not add the EpilogPreheader and exit to the
- // outer loop. This is automatically handled in the prolog case, so we do not
- // have that bug in prolog generation.
- if (UseEpilogRemainder && L->getParentLoop())
- return false;
- // All constraints have been satisfied.
- return true;
+ // Add unroll disable metadata to disable future unrolling for this loop.
+ NewLoop->setLoopAlreadyUnrolled();
+ return NewLoop;
}
/// Returns true if we can profitably unroll the multi-exit loop L. Currently,
/// we return true only if UnrollRuntimeMultiExit is set to true.
static bool canProfitablyUnrollMultiExitLoop(
Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits, BasicBlock *LatchExit,
- bool PreserveLCSSA, bool UseEpilogRemainder) {
-
-#if !defined(NDEBUG)
- assert(canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
- UseEpilogRemainder) &&
- "Should be safe to unroll before checking profitability!");
-#endif
+ bool UseEpilogRemainder) {
// Priority goes to UnrollRuntimeMultiExit if it's supplied.
if (UnrollRuntimeMultiExit.getNumOccurrences())
@@ -523,24 +468,56 @@ static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
uint64_t TrueWeight, FalseWeight;
BranchInst *LatchBR =
cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
- if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
- uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
- ? FalseWeight
- : TrueWeight;
- assert(UnrollFactor > 1);
- uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
- BasicBlock *Header = RemainderLoop->getHeader();
- BasicBlock *Latch = RemainderLoop->getLoopLatch();
- auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
- unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
- MDBuilder MDB(RemainderLatchBR->getContext());
- MDNode *WeightNode =
- HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
- : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
- RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
- }
+ if (!LatchBR->extractProfMetadata(TrueWeight, FalseWeight))
+ return;
+ uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
+ ? FalseWeight
+ : TrueWeight;
+ assert(UnrollFactor > 1);
+ uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
+ BasicBlock *Header = RemainderLoop->getHeader();
+ BasicBlock *Latch = RemainderLoop->getLoopLatch();
+ auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
+ unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
+ MDBuilder MDB(RemainderLatchBR->getContext());
+ MDNode *WeightNode =
+ HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+ : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+ RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
}
+/// Calculate ModVal = (BECount + 1) % Count on the abstract integer domain
+/// accounting for the possibility of unsigned overflow in the 2s complement
+/// domain. Preconditions:
+/// 1) TripCount = BECount + 1 (allowing overflow)
+/// 2) Log2(Count) <= BitWidth(BECount)
+static Value *CreateTripRemainder(IRBuilder<> &B, Value *BECount,
+ Value *TripCount, unsigned Count) {
+ // Note that TripCount is BECount + 1.
+ if (isPowerOf2_32(Count))
+ // If the expression is zero, then either:
+ // 1. There are no iterations to be run in the prolog/epilog loop.
+ // OR
+ // 2. The addition computing TripCount overflowed.
+ //
+ // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
+ // the number of iterations that remain to be run in the original loop is a
+ // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (a
+ // precondition of this method).
+ return B.CreateAnd(TripCount, Count - 1, "xtraiter");
+
+ // As (BECount + 1) can potentially unsigned overflow we count
+ // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
+ Constant *CountC = ConstantInt::get(BECount->getType(), Count);
+ Value *ModValTmp = B.CreateURem(BECount, CountC);
+ Value *ModValAdd = B.CreateAdd(ModValTmp,
+ ConstantInt::get(ModValTmp->getType(), 1));
+ // At that point (BECount % Count) + 1 could be equal to Count.
+ // To handle this case we need to take mod by Count one more time.
+ return B.CreateURem(ModValAdd, CountC, "xtraiter");
+}
+
+
/// Insert code in the prolog/epilog code when unrolling a loop with a
/// run-time trip-count.
///
@@ -624,19 +601,22 @@ bool llvm::UnrollRuntimeLoopRemainder(
// These are exit blocks other than the target of the latch exiting block.
SmallVector<BasicBlock *, 4> OtherExits;
L->getUniqueNonLatchExitBlocks(OtherExits);
- bool isMultiExitUnrollingEnabled =
- canSafelyUnrollMultiExitLoop(L, LatchExit, PreserveLCSSA,
- UseEpilogRemainder) &&
- canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA,
- UseEpilogRemainder);
- // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.
- if (!isMultiExitUnrollingEnabled &&
- (!L->getExitingBlock() || OtherExits.size())) {
- LLVM_DEBUG(
- dbgs()
- << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
- "enabled!\n");
- return false;
+ // Support only single exit and exiting block unless multi-exit loop
+ // unrolling is enabled.
+ if (!L->getExitingBlock() || OtherExits.size()) {
+ // We rely on LCSSA form being preserved when the exit blocks are transformed.
+ // (Note that only an off-by-default mode of the old PM disables PreserveLCCA.)
+ if (!PreserveLCSSA)
+ return false;
+
+ if (!canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit,
+ UseEpilogRemainder)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "
+ "enabled!\n");
+ return false;
+ }
}
// Use Scalar Evolution to compute the trip count. This allows more loops to
// be unrolled than relying on induction var simplification.
@@ -659,6 +639,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
unsigned BEWidth = cast<IntegerType>(BECountSC->getType())->getBitWidth();
// Add 1 since the backedge count doesn't include the first loop iteration.
+ // (Note that overflow can occur, this is handled explicitly below)
const SCEV *TripCountSC =
SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
if (isa<SCEVCouldNotCompute>(TripCountSC)) {
@@ -706,8 +687,7 @@ bool llvm::UnrollRuntimeLoopRemainder(
NewPreHeader = SplitBlock(PreHeader, PreHeader->getTerminator(), DT, LI);
NewPreHeader->setName(PreHeader->getName() + ".new");
// Split LatchExit to create phi nodes from branch above.
- SmallVector<BasicBlock*, 4> Preds(predecessors(LatchExit));
- NewExit = SplitBlockPredecessors(LatchExit, Preds, ".unr-lcssa", DT, LI,
+ NewExit = SplitBlockPredecessors(LatchExit, {Latch}, ".unr-lcssa", DT, LI,
nullptr, PreserveLCSSA);
// NewExit gets its DebugLoc from LatchExit, which is not part of the
// original Loop.
@@ -717,6 +697,21 @@ bool llvm::UnrollRuntimeLoopRemainder(
// Split NewExit to insert epilog remainder loop.
EpilogPreHeader = SplitBlock(NewExit, NewExitTerminator, DT, LI);
EpilogPreHeader->setName(Header->getName() + ".epil.preheader");
+
+ // If the latch exits from multiple level of nested loops, then
+ // by assumption there must be another loop exit which branches to the
+ // outer loop and we must adjust the loop for the newly inserted blocks
+ // to account for the fact that our epilogue is still in the same outer
+ // loop. Note that this leaves loopinfo temporarily out of sync with the
+ // CFG until the actual epilogue loop is inserted.
+ if (auto *ParentL = L->getParentLoop())
+ if (LI->getLoopFor(LatchExit) != ParentL) {
+ LI->removeBlock(NewExit);
+ ParentL->addBasicBlockToLoop(NewExit, *LI);
+ LI->removeBlock(EpilogPreHeader);
+ ParentL->addBasicBlockToLoop(EpilogPreHeader, *LI);
+ }
+
} else {
// If prolog remainder
// Split the original preheader twice to insert prolog remainder loop
@@ -751,35 +746,8 @@ bool llvm::UnrollRuntimeLoopRemainder(
Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(),
PreHeaderBR);
IRBuilder<> B(PreHeaderBR);
- Value *ModVal;
- // Calculate ModVal = (BECount + 1) % Count.
- // Note that TripCount is BECount + 1.
- if (isPowerOf2_32(Count)) {
- // When Count is power of 2 we don't BECount for epilog case, however we'll
- // need it for a branch around unrolling loop for prolog case.
- ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
- // 1. There are no iterations to be run in the prolog/epilog loop.
- // OR
- // 2. The addition computing TripCount overflowed.
- //
- // If (2) is true, we know that TripCount really is (1 << BEWidth) and so
- // the number of iterations that remain to be run in the original loop is a
- // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
- // explicitly check this above).
- } else {
- // As (BECount + 1) can potentially unsigned overflow we count
- // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
- Value *ModValTmp = B.CreateURem(BECount,
- ConstantInt::get(BECount->getType(),
- Count));
- Value *ModValAdd = B.CreateAdd(ModValTmp,
- ConstantInt::get(ModValTmp->getType(), 1));
- // At that point (BECount % Count) + 1 could be equal to Count.
- // To handle this case we need to take mod by Count one more time.
- ModVal = B.CreateURem(ModValAdd,
- ConstantInt::get(BECount->getType(), Count),
- "xtraiter");
- }
+ Value * const ModVal = CreateTripRemainder(B, BECount, TripCount, Count);
+
Value *BranchVal =
UseEpilogRemainder ? B.CreateICmpULT(BECount,
ConstantInt::get(BECount->getType(),
@@ -810,18 +778,13 @@ bool llvm::UnrollRuntimeLoopRemainder(
std::vector<BasicBlock *> NewBlocks;
ValueToValueMapTy VMap;
- // For unroll factor 2 remainder loop will have 1 iterations.
- // Do not create 1 iteration loop.
- bool CreateRemainderLoop = (Count != 2);
-
// Clone all the basic blocks in the loop. If Count is 2, we don't clone
// the loop, otherwise we create a cloned loop to execute the extra
// iterations. This function adds the appropriate CFG connections.
BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit;
BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader;
Loop *remainderLoop = CloneLoopBlocks(
- L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder,
- InsertTop, InsertBot,
+ L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot,
NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
// Assign the maximum possible trip count as the back edge weight for the
@@ -840,36 +803,33 @@ bool llvm::UnrollRuntimeLoopRemainder(
// work is to update the phi nodes in the original loop, and take in the
// values from the cloned region.
for (auto *BB : OtherExits) {
- for (auto &II : *BB) {
-
- // Given we preserve LCSSA form, we know that the values used outside the
- // loop will be used through these phi nodes at the exit blocks that are
- // transformed below.
- if (!isa<PHINode>(II))
- break;
- PHINode *Phi = cast<PHINode>(&II);
- unsigned oldNumOperands = Phi->getNumIncomingValues();
+ // Given we preserve LCSSA form, we know that the values used outside the
+ // loop will be used through these phi nodes at the exit blocks that are
+ // transformed below.
+ for (PHINode &PN : BB->phis()) {
+ unsigned oldNumOperands = PN.getNumIncomingValues();
// Add the incoming values from the remainder code to the end of the phi
// node.
- for (unsigned i =0; i < oldNumOperands; i++){
- Value *newVal = VMap.lookup(Phi->getIncomingValue(i));
- // newVal can be a constant or derived from values outside the loop, and
- // hence need not have a VMap value. Also, since lookup already generated
- // a default "null" VMap entry for this value, we need to populate that
- // VMap entry correctly, with the mapped entry being itself.
- if (!newVal) {
- newVal = Phi->getIncomingValue(i);
- VMap[Phi->getIncomingValue(i)] = Phi->getIncomingValue(i);
- }
- Phi->addIncoming(newVal,
- cast<BasicBlock>(VMap[Phi->getIncomingBlock(i)]));
+ for (unsigned i = 0; i < oldNumOperands; i++){
+ auto *PredBB =PN.getIncomingBlock(i);
+ if (PredBB == Latch)
+ // The latch exit is handled seperately, see connectX
+ continue;
+ if (!L->contains(PredBB))
+ // Even if we had dedicated exits, the code above inserted an
+ // extra branch which can reach the latch exit.
+ continue;
+
+ auto *V = PN.getIncomingValue(i);
+ if (Instruction *I = dyn_cast<Instruction>(V))
+ if (L->contains(I))
+ V = VMap.lookup(I);
+ PN.addIncoming(V, cast<BasicBlock>(VMap[PredBB]));
}
}
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
for (BasicBlock *SuccBB : successors(BB)) {
- assert(!(any_of(OtherExits,
- [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) ||
- SuccBB == LatchExit) &&
+ assert(!(llvm::is_contained(OtherExits, SuccBB) || SuccBB == LatchExit) &&
"Breaks the definition of dedicated exits!");
}
#endif
@@ -931,23 +891,22 @@ bool llvm::UnrollRuntimeLoopRemainder(
PreserveLCSSA);
// Update counter in loop for unrolling.
- // I should be multiply of Count.
+ // Use an incrementing IV. Pre-incr/post-incr is backedge/trip count.
+ // Subtle: TestVal can be 0 if we wrapped when computing the trip count,
+ // thus we must compare the post-increment (wrapping) value.
IRBuilder<> B2(NewPreHeader->getTerminator());
Value *TestVal = B2.CreateSub(TripCount, ModVal, "unroll_iter");
BranchInst *LatchBR = cast<BranchInst>(Latch->getTerminator());
- B2.SetInsertPoint(LatchBR);
PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter",
Header->getFirstNonPHI());
- Value *IdxSub =
- B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1),
- NewIdx->getName() + ".nsub");
- Value *IdxCmp;
- if (LatchBR->getSuccessor(0) == Header)
- IdxCmp = B2.CreateIsNotNull(IdxSub, NewIdx->getName() + ".ncmp");
- else
- IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp");
- NewIdx->addIncoming(TestVal, NewPreHeader);
- NewIdx->addIncoming(IdxSub, Latch);
+ B2.SetInsertPoint(LatchBR);
+ auto *Zero = ConstantInt::get(NewIdx->getType(), 0);
+ auto *One = ConstantInt::get(NewIdx->getType(), 1);
+ Value *IdxNext = B2.CreateAdd(NewIdx, One, NewIdx->getName() + ".next");
+ auto Pred = LatchBR->getSuccessor(0) == Header ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+ Value *IdxCmp = B2.CreateICmp(Pred, IdxNext, TestVal, NewIdx->getName() + ".ncmp");
+ NewIdx->addIncoming(Zero, NewPreHeader);
+ NewIdx->addIncoming(IdxNext, Latch);
LatchBR->setCondition(IdxCmp);
} else {
// Connect the prolog code to the original loop and update the
@@ -960,12 +919,49 @@ bool llvm::UnrollRuntimeLoopRemainder(
// of its parent loops, so the Scalar Evolution pass needs to be run again.
SE->forgetTopmostLoop(L);
- // Verify that the Dom Tree is correct.
+ // Verify that the Dom Tree and Loop Info are correct.
#if defined(EXPENSIVE_CHECKS) && !defined(NDEBUG)
- if (DT)
+ if (DT) {
assert(DT->verify(DominatorTree::VerificationLevel::Full));
+ LI->verify(*DT);
+ }
#endif
+ // For unroll factor 2 remainder loop will have 1 iteration.
+ if (Count == 2 && DT && LI && SE) {
+ // TODO: This code could probably be pulled out into a helper function
+ // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion.
+ BasicBlock *RemainderLatch = remainderLoop->getLoopLatch();
+ assert(RemainderLatch);
+ SmallVector<BasicBlock*> RemainderBlocks(remainderLoop->getBlocks().begin(),
+ remainderLoop->getBlocks().end());
+ breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr);
+ remainderLoop = nullptr;
+
+ // Simplify loop values after breaking the backedge
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadInsts;
+ for (BasicBlock *BB : RemainderBlocks) {
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
+ if (Value *V = SimplifyInstruction(&Inst, {DL, nullptr, DT, AC}))
+ if (LI->replacementPreservesLCSSAForm(&Inst, V))
+ Inst.replaceAllUsesWith(V);
+ if (isInstructionTriviallyDead(&Inst))
+ DeadInsts.emplace_back(&Inst);
+ }
+ // We can't do recursive deletion until we're done iterating, as we might
+ // have a phi which (potentially indirectly) uses instructions later in
+ // the block we're iterating through.
+ RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
+ }
+
+ // Merge latch into exit block.
+ auto *ExitBB = RemainderLatch->getSingleSuccessor();
+ assert(ExitBB && "required after breaking cond br backedge");
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ MergeBlockIntoPredecessor(ExitBB, &DTU, LI);
+ }
+
// Canonicalize to LoopSimplifyForm both original and remainder loops. We
// cannot rely on the LoopUnrollPass to do this because it only does
// canonicalization for parent/subloops and not the sibling loops.
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index e4d78f9ada08..f0f079335683 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -612,10 +612,7 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
for (auto *Block : L->blocks())
for (Instruction &I : *Block) {
auto *Undef = UndefValue::get(I.getType());
- for (Value::use_iterator UI = I.use_begin(), E = I.use_end();
- UI != E;) {
- Use &U = *UI;
- ++UI;
+ for (Use &U : llvm::make_early_inc_range(I.uses())) {
if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
if (L->contains(Usr->getParent()))
continue;
@@ -710,21 +707,58 @@ void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
SE.forgetLoop(L);
- // Note: By splitting the backedge, and then explicitly making it unreachable
- // we gracefully handle corner cases such as non-bottom tested loops and the
- // like. We also have the benefit of being able to reuse existing well tested
- // code. It might be worth special casing the common bottom tested case at
- // some point to avoid code churn.
-
std::unique_ptr<MemorySSAUpdater> MSSAU;
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
- auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+ // Update the CFG and domtree. We chose to special case a couple of
+ // of common cases for code quality and test readability reasons.
+ [&]() -> void {
+ if (auto *BI = dyn_cast<BranchInst>(Latch->getTerminator())) {
+ if (!BI->isConditional()) {
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BI, /*PreserveLCSSA*/ true, &DTU,
+ MSSAU.get());
+ return;
+ }
+
+ // Conditional latch/exit - note that latch can be shared by inner
+ // and outer loop so the other target doesn't need to an exit
+ if (L->isLoopExiting(Latch)) {
+ // TODO: Generalize ConstantFoldTerminator so that it can be used
+ // here without invalidating LCSSA or MemorySSA. (Tricky case for
+ // LCSSA: header is an exit block of a preceeding sibling loop w/o
+ // dedicated exits.)
+ const unsigned ExitIdx = L->contains(BI->getSuccessor(0)) ? 1 : 0;
+ BasicBlock *ExitBB = BI->getSuccessor(ExitIdx);
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ Header->removePredecessor(Latch, true);
+
+ IRBuilder<> Builder(BI);
+ auto *NewBI = Builder.CreateBr(ExitBB);
+ // Transfer the metadata to the new branch instruction (minus the
+ // loop info since this is no longer a loop)
+ NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg,
+ LLVMContext::MD_annotation});
+
+ BI->eraseFromParent();
+ DTU.applyUpdates({{DominatorTree::Delete, Latch, Header}});
+ if (MSSA)
+ MSSAU->applyUpdates({{DominatorTree::Delete, Latch, Header}}, DT);
+ return;
+ }
+ }
- DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
- (void)changeToUnreachable(BackedgeBB->getTerminator(),
- /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
+ // General case. By splitting the backedge, and then explicitly making it
+ // unreachable we gracefully handle corner cases such as switch and invoke
+ // termiantors.
+ auto *BackedgeBB = SplitEdge(Latch, Header, &DT, &LI, MSSAU.get());
+
+ DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Eager);
+ (void)changeToUnreachable(BackedgeBB->getTerminator(),
+ /*PreserveLCSSA*/ true, &DTU, MSSAU.get());
+ }();
// Erase (and destroy) this loop instance. Handles relinking sub-loops
// and blocks within the loop as needed.
@@ -852,32 +886,37 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
-Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
- Value *Right) {
- CmpInst::Predicate Pred;
+CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
switch (RK) {
default:
llvm_unreachable("Unknown min/max recurrence kind");
case RecurKind::UMin:
- Pred = CmpInst::ICMP_ULT;
- break;
+ return CmpInst::ICMP_ULT;
case RecurKind::UMax:
- Pred = CmpInst::ICMP_UGT;
- break;
+ return CmpInst::ICMP_UGT;
case RecurKind::SMin:
- Pred = CmpInst::ICMP_SLT;
- break;
+ return CmpInst::ICMP_SLT;
case RecurKind::SMax:
- Pred = CmpInst::ICMP_SGT;
- break;
+ return CmpInst::ICMP_SGT;
case RecurKind::FMin:
- Pred = CmpInst::FCMP_OLT;
- break;
+ return CmpInst::FCMP_OLT;
case RecurKind::FMax:
- Pred = CmpInst::FCMP_OGT;
- break;
+ return CmpInst::FCMP_OGT;
}
+}
+Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
+ RecurKind RK, Value *Left, Value *Right) {
+ if (auto VTy = dyn_cast<VectorType>(Left->getType()))
+ StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
+ return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
+}
+
+Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
+ Value *Right) {
+ CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
return Select;
@@ -955,15 +994,50 @@ Value *llvm::getShuffleReduction(IRBuilderBase &Builder, Value *Src,
return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
}
+Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
+ const TargetTransformInfo *TTI,
+ Value *Src,
+ const RecurrenceDescriptor &Desc,
+ PHINode *OrigPhi) {
+ assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
+ Desc.getRecurrenceKind()) &&
+ "Unexpected reduction kind");
+ Value *InitVal = Desc.getRecurrenceStartValue();
+ Value *NewVal = nullptr;
+
+ // First use the original phi to determine the new value we're trying to
+ // select from in the loop.
+ SelectInst *SI = nullptr;
+ for (auto *U : OrigPhi->users()) {
+ if ((SI = dyn_cast<SelectInst>(U)))
+ break;
+ }
+ assert(SI && "One user of the original phi should be a select");
+
+ if (SI->getTrueValue() == OrigPhi)
+ NewVal = SI->getFalseValue();
+ else {
+ assert(SI->getFalseValue() == OrigPhi &&
+ "At least one input to the select should be the original Phi");
+ NewVal = SI->getTrueValue();
+ }
+
+ // Create a splat vector with the new value and compare this to the vector
+ // we want to reduce.
+ ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
+ Value *Right = Builder.CreateVectorSplat(EC, InitVal);
+ Value *Cmp =
+ Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
+
+ // If any predicate is true it means that we want to select the new value.
+ Cmp = Builder.CreateOrReduce(Cmp);
+ return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
+}
+
Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
const TargetTransformInfo *TTI,
Value *Src, RecurKind RdxKind,
ArrayRef<Value *> RedOps) {
- TargetTransformInfo::ReductionFlags RdxFlags;
- RdxFlags.IsMaxOp = RdxKind == RecurKind::SMax || RdxKind == RecurKind::UMax ||
- RdxKind == RecurKind::FMax;
- RdxFlags.IsSigned = RdxKind == RecurKind::SMax || RdxKind == RecurKind::SMin;
-
auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
switch (RdxKind) {
case RecurKind::Add:
@@ -1000,14 +1074,19 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
Value *llvm::createTargetReduction(IRBuilderBase &B,
const TargetTransformInfo *TTI,
- const RecurrenceDescriptor &Desc,
- Value *Src) {
+ const RecurrenceDescriptor &Desc, Value *Src,
+ PHINode *OrigPhi) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
IRBuilderBase::FastMathFlagGuard FMFGuard(B);
B.setFastMathFlags(Desc.getFastMathFlags());
- return createSimpleTargetReduction(B, TTI, Src, Desc.getRecurrenceKind());
+
+ RecurKind RK = Desc.getRecurrenceKind();
+ if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
+ return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
+
+ return createSimpleTargetReduction(B, TTI, Src, RK);
}
Value *llvm::createOrderedReduction(IRBuilderBase &B,
@@ -1081,58 +1160,6 @@ bool llvm::cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
// As a side effect, reduces the amount of IV processing within the loop.
//===----------------------------------------------------------------------===//
-// Return true if the SCEV expansion generated by the rewriter can replace the
-// original value. SCEV guarantees that it produces the same value, but the way
-// it is produced may be illegal IR. Ideally, this function will only be
-// called for verification.
-static bool isValidRewrite(ScalarEvolution *SE, Value *FromVal, Value *ToVal) {
- // If an SCEV expression subsumed multiple pointers, its expansion could
- // reassociate the GEP changing the base pointer. This is illegal because the
- // final address produced by a GEP chain must be inbounds relative to its
- // underlying object. Otherwise basic alias analysis, among other things,
- // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
- // producing an expression involving multiple pointers. Until then, we must
- // bail out here.
- //
- // Retrieve the pointer operand of the GEP. Don't use getUnderlyingObject
- // because it understands lcssa phis while SCEV does not.
- Value *FromPtr = FromVal;
- Value *ToPtr = ToVal;
- if (auto *GEP = dyn_cast<GEPOperator>(FromVal))
- FromPtr = GEP->getPointerOperand();
-
- if (auto *GEP = dyn_cast<GEPOperator>(ToVal))
- ToPtr = GEP->getPointerOperand();
-
- if (FromPtr != FromVal || ToPtr != ToVal) {
- // Quickly check the common case
- if (FromPtr == ToPtr)
- return true;
-
- // SCEV may have rewritten an expression that produces the GEP's pointer
- // operand. That's ok as long as the pointer operand has the same base
- // pointer. Unlike getUnderlyingObject(), getPointerBase() will find the
- // base of a recurrence. This handles the case in which SCEV expansion
- // converts a pointer type recurrence into a nonrecurrent pointer base
- // indexed by an integer recurrence.
-
- // If the GEP base pointer is a vector of pointers, abort.
- if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
- return false;
-
- const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
- const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
- if (FromBase == ToBase)
- return true;
-
- LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: GEP rewrite bail out "
- << *FromBase << " != " << *ToBase << "\n");
-
- return false;
- }
- return true;
-}
-
static bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) {
SmallPtrSet<const Instruction *, 8> Visited;
SmallVector<const Instruction *, 8> WorkList;
@@ -1165,9 +1192,6 @@ struct RewritePhi {
Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
bool HighCost; // Is this expansion a high-cost?
- Value *Expansion = nullptr;
- bool ValidRewrite = false;
-
RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
bool H)
: PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
@@ -1204,8 +1228,6 @@ static bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet)
// phase later. Skip it in the loop invariant check below.
bool found = false;
for (const RewritePhi &Phi : RewritePhiSet) {
- if (!Phi.ValidRewrite)
- continue;
unsigned i = Phi.Ith;
if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
found = true;
@@ -1264,13 +1286,6 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
if (!SE->isSCEVable(PN->getType()))
continue;
- // It's necessary to tell ScalarEvolution about this explicitly so that
- // it can walk the def-use list and forget all SCEVs, as it may not be
- // watching the PHI itself. Once the new exit value is in place, there
- // may not be a def-use connection between the loop and every instruction
- // which got a SCEVAddRecExpr for that loop.
- SE->forgetValue(PN);
-
// Iterate over all of the values in all the PHI nodes.
for (unsigned i = 0; i != NumPreds; ++i) {
// If the value being merged in is not integer or is not defined
@@ -1339,61 +1354,49 @@ int llvm::rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
}
}
- // Now that we've done preliminary filtering and billed all the SCEV's,
- // we can perform the last sanity check - the expansion must be valid.
- for (RewritePhi &Phi : RewritePhiSet) {
- Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(),
- Phi.ExpansionPoint);
+ // TODO: evaluate whether it is beneficial to change how we calculate
+ // high-cost: if we have SCEV 'A' which we know we will expand, should we
+ // calculate the cost of other SCEV's after expanding SCEV 'A', thus
+ // potentially giving cost bonus to those other SCEV's?
- LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "
- << *(Phi.Expansion) << '\n'
- << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
+ bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
+ int NumReplaced = 0;
+
+ // Transformation.
+ for (const RewritePhi &Phi : RewritePhiSet) {
+ PHINode *PN = Phi.PN;
- // FIXME: isValidRewrite() is a hack. it should be an assert, eventually.
- Phi.ValidRewrite = isValidRewrite(SE, Phi.ExpansionPoint, Phi.Expansion);
- if (!Phi.ValidRewrite) {
- DeadInsts.push_back(Phi.Expansion);
+ // Only do the rewrite when the ExitValue can be expanded cheaply.
+ // If LoopCanBeDel is true, rewrite exit value aggressively.
+ if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost)
continue;
- }
+
+ Value *ExitVal = Rewriter.expandCodeFor(
+ Phi.ExpansionSCEV, Phi.PN->getType(), Phi.ExpansionPoint);
+
+ LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = " << *ExitVal
+ << '\n'
+ << " LoopVal = " << *(Phi.ExpansionPoint) << "\n");
#ifndef NDEBUG
// If we reuse an instruction from a loop which is neither L nor one of
// its containing loops, we end up breaking LCSSA form for this loop by
// creating a new use of its instruction.
- if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion))
+ if (auto *ExitInsn = dyn_cast<Instruction>(ExitVal))
if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
if (EVL != L)
assert(EVL->contains(L) && "LCSSA breach detected!");
#endif
- }
-
- // TODO: after isValidRewrite() is an assertion, evaluate whether
- // it is beneficial to change how we calculate high-cost:
- // if we have SCEV 'A' which we know we will expand, should we calculate
- // the cost of other SCEV's after expanding SCEV 'A',
- // thus potentially giving cost bonus to those other SCEV's?
-
- bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
- int NumReplaced = 0;
-
- // Transformation.
- for (const RewritePhi &Phi : RewritePhiSet) {
- if (!Phi.ValidRewrite)
- continue;
-
- PHINode *PN = Phi.PN;
- Value *ExitVal = Phi.Expansion;
-
- // Only do the rewrite when the ExitValue can be expanded cheaply.
- // If LoopCanBeDel is true, rewrite exit value aggressively.
- if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
- DeadInsts.push_back(ExitVal);
- continue;
- }
NumReplaced++;
Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
PN->setIncomingValue(Phi.Ith, ExitVal);
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
// If this instruction is dead now, delete it. Don't do it now to avoid
// invalidating iterators.
@@ -1554,7 +1557,7 @@ expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
return ChecksWithBounds;
}
-std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
+Value *llvm::addRuntimeChecks(
Instruction *Loc, Loop *TheLoop,
const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
SCEVExpander &Exp) {
@@ -1563,22 +1566,10 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
LLVMContext &Ctx = Loc->getContext();
- Instruction *FirstInst = nullptr;
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
- // FIXME: this helper is currently a duplicate of the one in
- // LoopVectorize.cpp.
- auto GetFirstInst = [](Instruction *FirstInst, Value *V,
- Instruction *Loc) -> Instruction * {
- if (FirstInst)
- return FirstInst;
- if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() == Loc->getParent() ? I : nullptr;
- return nullptr;
- };
-
for (const auto &Check : ExpandedChecks) {
const PointerBounds &A = Check.first, &B = Check.second;
// Check if two pointers (A and B) conflict where conflict is computed as:
@@ -1607,30 +1598,16 @@ std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
// bound1 = (A.Start < B.End)
// IsConflict = bound0 & bound1
Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
- FirstInst = GetFirstInst(FirstInst, Cmp0, Loc);
Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
- FirstInst = GetFirstInst(FirstInst, Cmp1, Loc);
Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
- FirstInst = GetFirstInst(FirstInst, IsConflict, Loc);
if (MemoryRuntimeCheck) {
IsConflict =
ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
- FirstInst = GetFirstInst(FirstInst, IsConflict, Loc);
}
MemoryRuntimeCheck = IsConflict;
}
- if (!MemoryRuntimeCheck)
- return std::make_pair(nullptr, nullptr);
-
- // We have to do this trickery because the IRBuilder might fold the check to a
- // constant expression in which case there is no Instruction anchored in a
- // the block.
- Instruction *Check =
- BinaryOperator::CreateAnd(MemoryRuntimeCheck, ConstantInt::getTrue(Ctx));
- ChkBuilder.Insert(Check, "memcheck.conflict");
- FirstInst = GetFirstInst(FirstInst, Check, Loc);
- return std::make_pair(FirstInst, Check);
+ return MemoryRuntimeCheck;
}
Optional<IVConditionInfo> llvm::hasPartialIVCondition(Loop &L,
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 8a89158788cf..771b7d25b0f2 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -14,9 +14,9 @@
#include "llvm/Transforms/Utils/LoopVersioning.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Dominators.h"
@@ -52,8 +52,7 @@ void LoopVersioning::versionLoop(
assert(VersionedLoop->isLoopSimplifyForm() &&
"Loop is not in loop-simplify form");
- Instruction *FirstCheckInst;
- Instruction *MemRuntimeCheck;
+ Value *MemRuntimeCheck;
Value *SCEVRuntimeCheck;
Value *RuntimeCheck = nullptr;
@@ -64,8 +63,8 @@ void LoopVersioning::versionLoop(
SCEVExpander Exp2(*RtPtrChecking.getSE(),
VersionedLoop->getHeader()->getModule()->getDataLayout(),
"induction");
- std::tie(FirstCheckInst, MemRuntimeCheck) = addRuntimeChecks(
- RuntimeCheckBB->getTerminator(), VersionedLoop, AliasChecks, Exp2);
+ MemRuntimeCheck = addRuntimeChecks(RuntimeCheckBB->getTerminator(),
+ VersionedLoop, AliasChecks, Exp2);
SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
"scev.check");
@@ -354,14 +353,11 @@ PreservedAnalyses LoopVersioningPass::run(Function &F,
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
auto GetLAA = [&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 616b4e8eb01c..8dc4702993c3 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -442,7 +442,7 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
/* DestAlign */ Memcpy->getDestAlign().valueOrOne(),
/* SrcIsVolatile */ Memcpy->isVolatile(),
/* DstIsVolatile */ Memcpy->isVolatile(),
- /* TargetTransfomrInfo */ TTI);
+ /* TargetTransformInfo */ TTI);
}
}
diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index ec8d7a7074cd..aff9d1311688 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -524,16 +524,14 @@ bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
bool Changed = false;
SmallPtrSet<BasicBlock *, 8> DeleteList;
- for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
- BasicBlock *Cur =
- &*I++; // Advance over block so we don't traverse new blocks
-
+ // We use make_early_inc_range here so that we don't traverse new blocks.
+ for (BasicBlock &Cur : llvm::make_early_inc_range(F)) {
// If the block is a dead Default block that will be deleted later, don't
// waste time processing it.
- if (DeleteList.count(Cur))
+ if (DeleteList.count(&Cur))
continue;
- if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur.getTerminator())) {
Changed = true;
ProcessSwitchInst(SI, DeleteList, AC, LVI);
}
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 2aef37205c53..bb5ff59cba4b 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -125,7 +125,7 @@ Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
Function *Ctor = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
GlobalValue::InternalLinkage, 0, CtorName, &M);
- Ctor->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+ Ctor->addFnAttr(Attribute::NoUnwind);
BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
ReturnInst::Create(M.getContext(), CtorBB);
// Ensure Ctor cannot be discarded, even if in a comdat.
@@ -165,7 +165,7 @@ llvm::getOrCreateSanitizerCtorAndInitFunctions(
if (Function *Ctor = M.getFunction(CtorName))
// FIXME: Sink this logic into the module, similar to the handling of
// globals. This will make moving to a concurrent model much easier.
- if (Ctor->arg_size() == 0 ||
+ if (Ctor->arg_empty() ||
Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
return {Ctor, declareSanitizerInitFunction(M, InitName, InitArgTypes)};
@@ -297,7 +297,6 @@ void VFABI::setVectorVariantNames(
"vector function declaration is missing.");
}
#endif
- CI->addAttribute(
- AttributeList::FunctionIndex,
+ CI->addFnAttr(
Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
}
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 91280762aaa7..bd2b6fafdf2e 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -23,6 +24,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -566,10 +568,18 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// to ensure we dominate all of our uses. Always insert right before the
// relevant instruction (terminator, assume), so that we insert in proper
// order in the case of multiple predicateinfo in the same block.
+ // The number of named values is used to detect if a new declaration was
+ // added. If so, that declaration is tracked so that it can be removed when
+ // the analysis is done. The corner case were a new declaration results in
+ // a name clash and the old name being renamed is not considered as that
+ // represents an invalid module.
if (isa<PredicateWithEdge>(ValInfo)) {
IRBuilder<> B(getBranchTerminator(ValInfo));
+ auto NumDecls = F.getParent()->getNumNamedValues();
Function *IF = Intrinsic::getDeclaration(
F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC =
B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));
PI.PredicateMap.insert({PIC, ValInfo});
@@ -581,8 +591,11 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
// Insert the predicate directly after the assume. While it also holds
// directly before it, assume(i1 true) is not a useful fact.
IRBuilder<> B(PAssume->AssumeInst->getNextNode());
+ auto NumDecls = F.getParent()->getNumNamedValues();
Function *IF = Intrinsic::getDeclaration(
F.getParent(), Intrinsic::ssa_copy, Op->getType());
+ if (NumDecls != F.getParent()->getNumNamedValues())
+ PI.CreatedDeclarations.insert(IF);
CallInst *PIC = B.CreateCall(IF, Op);
PI.PredicateMap.insert({PIC, ValInfo});
Result.Def = PIC;
@@ -761,6 +774,23 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,
Builder.buildPredicateInfo();
}
+// Remove all declarations we created . The PredicateInfo consumers are
+// responsible for remove the ssa_copy calls created.
+PredicateInfo::~PredicateInfo() {
+ // Collect function pointers in set first, as SmallSet uses a SmallVector
+ // internally and we have to remove the asserting value handles first.
+ SmallPtrSet<Function *, 20> FunctionPtrs;
+ for (auto &F : CreatedDeclarations)
+ FunctionPtrs.insert(&*F);
+ CreatedDeclarations.clear();
+
+ for (Function *F : FunctionPtrs) {
+ assert(F->user_begin() == F->user_end() &&
+ "PredicateInfo consumer did not remove all SSA copies.");
+ F->eraseFromParent();
+ }
+}
+
Optional<PredicateConstraint> PredicateBase::getConstraint() const {
switch (Type) {
case PT_Assume:
@@ -827,6 +857,19 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AssumptionCacheTracker>();
}
+// Replace ssa_copy calls created by PredicateInfo with their operand.
+static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) {
+ for (Instruction &Inst : llvm::make_early_inc_range(instructions(F))) {
+ const auto *PI = PredInfo.getPredicateInfoFor(&Inst);
+ auto *II = dyn_cast<IntrinsicInst>(&Inst);
+ if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy)
+ continue;
+
+ Inst.replaceAllUsesWith(II->getOperand(0));
+ Inst.eraseFromParent();
+ }
+}
+
bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -834,6 +877,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {
PredInfo->print(dbgs());
if (VerifyPredicateInfo)
PredInfo->verifyPredicateInfo();
+
+ replaceCreatedSSACopys(*PredInfo, F);
return false;
}
@@ -845,6 +890,7 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,
auto PredInfo = std::make_unique<PredicateInfo>(F, DT, AC);
PredInfo->print(OS);
+ replaceCreatedSSACopys(*PredInfo, F);
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 427028066026..b35ab57e0d87 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -70,7 +70,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
if (LI->isVolatile())
return false;
} else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(0) == AI)
+ if (SI->getValueOperand() == AI ||
+ SI->getValueOperand()->getType() != AI->getAllocatedType())
return false; // Don't allow a store OF the AI, only INTO the AI.
// Note that atomic stores can be transformed; atomic semantics do
// not have any meaning for a local alloca.
diff --git a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
index 85e5adaeaf5e..3ebc89158173 100644
--- a/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
+++ b/llvm/lib/Transforms/Utils/RelLookupTableConverter.cpp
@@ -177,9 +177,7 @@ static bool convertToRelativeLookupTables(
bool Changed = false;
- for (auto GVI = M.global_begin(), E = M.global_end(); GVI != E;) {
- GlobalVariable &GV = *GVI++;
-
+ for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
if (!shouldConvertToRelLookupTable(M, GV))
continue;
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 4cf99abcc10f..d7e8eaf677c6 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -540,8 +540,14 @@ void SCCPInstVisitor::markArgInFuncSpecialization(Function *F, Argument *A,
E = F->arg_end();
I != E; ++I, ++J)
if (J != A && ValueState.count(I)) {
- ValueState[J] = ValueState[I];
- pushToWorkList(ValueState[J], J);
+ // Note: This previously looked like this:
+ // ValueState[J] = ValueState[I];
+ // This is incorrect because the DenseMap class may resize the underlying
+ // memory when inserting `J`, which will invalidate the reference to `I`.
+ // Instead, we make sure `J` exists, then set it to `I` afterwards.
+ auto &NewValue = ValueState[J];
+ NewValue = ValueState[I];
+ pushToWorkList(NewValue, J);
}
}
@@ -802,6 +808,9 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
return;
ValueLatticeElement OpSt = getValueState(I.getOperand(0));
+ if (OpSt.isUnknownOrUndef())
+ return;
+
if (Constant *OpC = getConstant(OpSt)) {
// Fold the constant as we build.
Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
@@ -809,9 +818,14 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
return;
// Propagate constant value
markConstant(&I, C);
- } else if (OpSt.isConstantRange() && I.getDestTy()->isIntegerTy()) {
+ } else if (I.getDestTy()->isIntegerTy()) {
auto &LV = getValueState(&I);
- ConstantRange OpRange = OpSt.getConstantRange();
+ ConstantRange OpRange =
+ OpSt.isConstantRange()
+ ? OpSt.getConstantRange()
+ : ConstantRange::getFull(
+ I.getOperand(0)->getType()->getScalarSizeInBits());
+
Type *DestTy = I.getDestTy();
// Vectors where all elements have the same known constant range are treated
// as a single constant range in the lattice. When bitcasting such vectors,
@@ -826,7 +840,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
ConstantRange Res =
OpRange.castOp(I.getOpcode(), DL.getTypeSizeInBits(DestTy));
mergeInValue(LV, &I, ValueLatticeElement::getRange(Res));
- } else if (!OpSt.isUnknownOrUndef())
+ } else
markOverdefined(&I);
}
@@ -1183,10 +1197,10 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
// a declaration, maybe we can constant fold it.
if (F && F->isDeclaration() && canConstantFoldCallTo(&CB, F)) {
SmallVector<Constant *, 8> Operands;
- for (auto AI = CB.arg_begin(), E = CB.arg_end(); AI != E; ++AI) {
- if (AI->get()->getType()->isStructTy())
+ for (const Use &A : CB.args()) {
+ if (A.get()->getType()->isStructTy())
return markOverdefined(&CB); // Can't handle struct args.
- ValueLatticeElement State = getValueState(*AI);
+ ValueLatticeElement State = getValueState(A);
if (State.isUnknownOrUndef())
return; // Operands are not resolved yet.
diff --git a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
index 917d5e0a1ef0..7de76b86817b 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp
@@ -65,12 +65,6 @@ void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) {
Rewrites[Var].Uses.push_back(U);
}
-/// Return true if the SSAUpdater already has a value for the specified variable
-/// in the specified block.
-bool SSAUpdaterBulk::HasValueForBlock(unsigned Var, BasicBlock *BB) {
- return (Var < Rewrites.size()) ? Rewrites[Var].Defines.count(BB) : false;
-}
-
// Compute value at the given block BB. We either should already know it, or we
// should be able to recursively reach it going up dominator tree.
Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R,
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 3978e1e29825..a042146d7ace 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -747,9 +747,8 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// so that pointer operands are inserted first, which the code below relies on
// to form more involved GEPs.
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
- for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
- E(S->op_begin()); I != E; ++I)
- OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+ for (const SCEV *Op : reverse(S->operands()))
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
// Sort by loop. Use a stable sort so that constants follow non-constants and
// pointer operands precede non-pointer operands.
@@ -765,7 +764,11 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// This is the first operand. Just expand it.
Sum = expand(Op);
++I;
- } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ continue;
+ }
+
+ assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
+ if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
// The running sum expression is a pointer. Try to form a getelementptr
// at this level with that as the base.
SmallVector<const SCEV *, 4> NewOps;
@@ -779,16 +782,6 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
NewOps.push_back(X);
}
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
- } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
- // The running sum is an integer, and there's a pointer at this level.
- // Try to form a getelementptr. If the running sum is instructions,
- // use a SCEVUnknown to avoid re-analyzing them.
- SmallVector<const SCEV *, 4> NewOps;
- NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
- SE.getSCEV(Sum));
- for (++I; I != E && I->first == CurLoop; ++I)
- NewOps.push_back(I->second);
- Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
} else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false);
@@ -817,9 +810,8 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Collect all the mul operands in a loop, along with their associated loops.
// Iterate in reverse so that constants are emitted last, all else equal.
SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
- for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
- E(S->op_begin()); I != E; ++I)
- OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+ for (const SCEV *Op : reverse(S->operands()))
+ OpsAndLoops.push_back(std::make_pair(getRelevantLoop(Op), Op));
// Sort by loop. Use a stable sort so that constants follow non-constants.
llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT));
@@ -923,28 +915,6 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
/*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS()));
}
-/// Move parts of Base into Rest to leave Base with the minimal
-/// expression that provides a pointer operand suitable for a
-/// GEP expansion.
-static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
- ScalarEvolution &SE) {
- while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
- Base = A->getStart();
- Rest = SE.getAddExpr(Rest,
- SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
- A->getStepRecurrence(SE),
- A->getLoop(),
- A->getNoWrapFlags(SCEV::FlagNW)));
- }
- if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
- Base = A->getOperand(A->getNumOperands()-1);
- SmallVector<const SCEV *, 8> NewAddOps(A->operands());
- NewAddOps.back() = Rest;
- Rest = SE.getAddExpr(NewAddOps);
- ExposePointerBase(Base, Rest, SE);
- }
-}
-
/// Determine if this is a well-behaved chain of instructions leading back to
/// the PHI. If so, it may be reused by expanded expressions.
bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
@@ -1125,22 +1095,6 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
return IncV;
}
-/// Hoist the addrec instruction chain rooted in the loop phi above the
-/// position. This routine assumes that this is possible (has been checked).
-void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
- Instruction *Pos, PHINode *LoopPhi) {
- do {
- if (DT->dominates(InstToHoist, Pos))
- break;
- // Make sure the increment is where we want it. But don't move it
- // down past a potential existing post-inc user.
- fixupInsertPoints(InstToHoist);
- InstToHoist->moveBefore(Pos);
- Pos = InstToHoist;
- InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
- } while (InstToHoist != LoopPhi);
-}
-
/// Check whether we can cheaply express the requested SCEV in terms of
/// the available PHI SCEV by truncation and/or inversion of the step.
static bool canBeCheaplyTransformed(ScalarEvolution &SE,
@@ -1264,8 +1218,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
if (LSRMode) {
if (!isExpandedAddRecExprPHI(&PN, TempIncV, L))
continue;
- if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
- continue;
} else {
if (!isNormalAddRecExprPHI(&PN, TempIncV, L))
continue;
@@ -1293,11 +1245,6 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
}
if (AddRecPhiMatch) {
- // Potentially, move the increment. We have made sure in
- // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
- if (L == IVIncInsertLoop)
- hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
-
// Ok, the add recurrence looks usable.
// Remember this PHI, even in post-inc mode.
InsertedValues.insert(AddRecPhiMatch);
@@ -1597,29 +1544,17 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
+ if (PointerType *PTy = dyn_cast<PointerType>(S->getType())) {
+ Value *StartV = expand(SE.getPointerBase(S));
+ assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+ return expandAddToGEP(SE.removePointerBase(S), PTy, Ty, StartV);
+ }
+
SmallVector<const SCEV *, 4> NewOps(S->operands());
NewOps[0] = SE.getConstant(Ty, 0);
const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
S->getNoWrapFlags(SCEV::FlagNW));
- // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
- // comments on expandAddToGEP for details.
- const SCEV *Base = S->getStart();
- // Dig into the expression to find the pointer base for a GEP.
- const SCEV *ExposedRest = Rest;
- ExposePointerBase(Base, ExposedRest, SE);
- // If we found a pointer, expand the AddRec with a GEP.
- if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
- // Make sure the Base isn't something exotic, such as a multiplied
- // or divided pointer value. In those cases, the result type isn't
- // actually a pointer type.
- if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
- Value *StartV = expand(Base);
- assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
- return expandAddToGEP(ExposedRest, PTy, Ty, StartV);
- }
- }
-
// Just do a normal add. Pre-expand the operands to suppress folding.
//
// The LHS and RHS values are factored out of the expand call to make the
@@ -1898,6 +1833,22 @@ Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) {
return V;
}
+/// Check whether value has nuw/nsw/exact set but SCEV does not.
+/// TODO: In reality it is better to check the poison recursively
+/// but this is better than nothing.
+static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) {
+ if (isa<OverflowingBinaryOperator>(I)) {
+ if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) {
+ if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap())
+ return true;
+ if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap())
+ return true;
+ }
+ } else if (isa<PossiblyExactOperator>(I) && I->isExact())
+ return true;
+ return false;
+}
+
ScalarEvolution::ValueOffsetPair
SCEVExpander::FindValueInExprValueMap(const SCEV *S,
const Instruction *InsertPt) {
@@ -1907,19 +1858,22 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S,
if (CanonicalMode || !SE.containsAddRecurrence(S)) {
// If S is scConstant, it may be worse to reuse an existing Value.
if (S->getSCEVType() != scConstant && Set) {
- // Choose a Value from the set which dominates the insertPt.
- // insertPt should be inside the Value's parent loop so as not to break
+ // Choose a Value from the set which dominates the InsertPt.
+ // InsertPt should be inside the Value's parent loop so as not to break
// the LCSSA form.
for (auto const &VOPair : *Set) {
Value *V = VOPair.first;
ConstantInt *Offset = VOPair.second;
- Instruction *EntInst = nullptr;
- if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
- S->getType() == V->getType() &&
- EntInst->getFunction() == InsertPt->getFunction() &&
+ Instruction *EntInst = dyn_cast_or_null<Instruction>(V);
+ if (!EntInst)
+ continue;
+
+ assert(EntInst->getFunction() == InsertPt->getFunction());
+ if (S->getType() == V->getType() &&
SE.DT.dominates(EntInst, InsertPt) &&
(SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
- SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+ SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) &&
+ !SCEVLostPoisonFlags(S, EntInst))
return {V, Offset};
}
}
@@ -2068,7 +2022,9 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
Phis.push_back(&PN);
if (TTI)
- llvm::sort(Phis, [](Value *LHS, Value *RHS) {
+ // Use stable_sort to preserve order of equivalent PHIs, so the order
+ // of the sorted Phis is the same from run to run on the same loop.
+ llvm::stable_sort(Phis, [](Value *LHS, Value *RHS) {
// Put pointers at the back and make sure pointer < pointer = false.
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy();
@@ -2524,18 +2480,14 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
IntegerType *Ty =
IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
- Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false);
Value *NegStepValue =
expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false);
- Value *StartValue = expandCodeForImpl(
- isa<PointerType>(ARExpandTy) ? Start
- : SE.getPtrToIntExpr(Start, ARExpandTy),
- ARExpandTy, Loc, false);
+ Value *StartValue = expandCodeForImpl(Start, ARTy, Loc, false);
ConstantInt *Zero =
- ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
+ ConstantInt::get(Loc->getContext(), APInt::getZero(DstBits));
Builder.SetInsertPoint(Loc);
// Compute |Step|
@@ -2544,25 +2496,33 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Get the backedge taken count and truncate or extended to the AR type.
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
- auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
- Intrinsic::umul_with_overflow, Ty);
// Compute |Step| * Backedge
- CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
- Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
- Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ Value *MulV, *OfMul;
+ if (Step->isOne()) {
+ // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
+ // needed, there is never an overflow, so to avoid artificially inflating
+ // the cost of the check, directly emit the optimized IR.
+ MulV = TruncTripCount;
+ OfMul = ConstantInt::getFalse(MulV->getContext());
+ } else {
+ auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
+ Intrinsic::umul_with_overflow, Ty);
+ CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
+ MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
+ OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ }
// Compute:
// Start + |Step| * Backedge < Start
// Start - |Step| * Backedge > Start
Value *Add = nullptr, *Sub = nullptr;
- if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) {
- const SCEV *MulS = SE.getSCEV(MulV);
- const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
- Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
- ARPtrTy);
- Sub = Builder.CreateBitCast(
- expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy);
+ if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
+ StartValue = InsertNoopCastOfTo(
+ StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+ Value *NegMulV = Builder.CreateNeg(MulV);
+ Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+ Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
} else {
Add = Builder.CreateAdd(StartValue, MulV);
Sub = Builder.CreateSub(StartValue, MulV);
@@ -2686,9 +2646,11 @@ namespace {
// perfectly reduced form, which can't be guaranteed.
struct SCEVFindUnsafe {
ScalarEvolution &SE;
+ bool CanonicalMode;
bool IsUnsafe;
- SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
+ SCEVFindUnsafe(ScalarEvolution &SE, bool CanonicalMode)
+ : SE(SE), CanonicalMode(CanonicalMode), IsUnsafe(false) {}
bool follow(const SCEV *S) {
if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
@@ -2704,6 +2666,14 @@ struct SCEVFindUnsafe {
IsUnsafe = true;
return false;
}
+
+ // For non-affine addrecs or in non-canonical mode we need a preheader
+ // to insert into.
+ if (!AR->getLoop()->getLoopPreheader() &&
+ (!CanonicalMode || !AR->isAffine())) {
+ IsUnsafe = true;
+ return false;
+ }
}
return true;
}
@@ -2712,8 +2682,8 @@ struct SCEVFindUnsafe {
}
namespace llvm {
-bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
- SCEVFindUnsafe Search(SE);
+bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE, bool CanonicalMode) {
+ SCEVFindUnsafe Search(SE, CanonicalMode);
visitAll(S, Search);
return !Search.IsUnsafe;
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 583bb379488e..f467de5f924e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -25,6 +25,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GuardUtils.h"
@@ -159,6 +160,13 @@ static cl::opt<unsigned>
cl::desc("Maximum cost of combining conditions when "
"folding branches"));
+static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
+ "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
+ cl::init(2),
+ cl::desc("Multiplier to apply to threshold when determining whether or not "
+ "to fold branch to common destination when vector operations are "
+ "present"));
+
STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
STATISTIC(NumLinearMaps,
"Number of switch instructions turned into linear mapping");
@@ -272,7 +280,6 @@ public:
}
bool simplifyOnce(BasicBlock *BB);
- bool simplifyOnceImpl(BasicBlock *BB);
bool run(BasicBlock *BB);
// Helper to set Resimplify and return change indication.
@@ -1094,17 +1101,24 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Update (liveout) uses of bonus instructions,
// now that the bonus instruction has been cloned into predecessor.
- SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(BonusInst.getType(),
- (NewBonusInst->getName() + ".merge").str());
- SSAUpdate.AddAvailableValue(BB, &BonusInst);
- SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
+ // Note that we expect to be in a block-closed SSA form for this to work!
for (Use &U : make_early_inc_range(BonusInst.uses())) {
auto *UI = cast<Instruction>(U.getUser());
- if (UI->getParent() != PredBlock)
- SSAUpdate.RewriteUseAfterInsertions(U);
- else // Use is in the same block as, and comes before, NewBonusInst.
- SSAUpdate.RewriteUse(U);
+ auto *PN = dyn_cast<PHINode>(UI);
+ if (!PN) {
+ assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
+ "If the user is not a PHI node, then it should be in the same "
+ "block as, and come after, the original bonus instruction.");
+ continue; // Keep using the original bonus instruction.
+ }
+ // Is this the block-closed SSA form PHI node?
+ if (PN->getIncomingBlock(U) == BB)
+ continue; // Great, keep using the original bonus instruction.
+ // The only other alternative is an "use" when coming from
+ // the predecessor block - here we should refer to the cloned bonus instr.
+ assert(PN->getIncomingBlock(U) == PredBlock &&
+ "Not in block-closed SSA form?");
+ U.set(NewBonusInst);
}
}
}
@@ -2044,7 +2058,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
unsigned NumPHIdValues = 0;
for (auto *I : *LRI)
for (auto *V : PHIOperands[I]) {
- if (InstructionsToSink.count(V) == 0)
+ if (!InstructionsToSink.contains(V))
++NumPHIdValues;
// FIXME: this check is overly optimistic. We may end up not sinking
// said instruction, due to the very same profitability check.
@@ -2250,6 +2264,23 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
return SI->getValueOperand();
return nullptr; // Unknown store.
}
+
+ if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
+ if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
+ LI->isSimple()) {
+ // Local objects (created by an `alloca` instruction) are always
+ // writable, so once we are past a read from a location it is valid to
+ // also write to that same location.
+ // If the address of the local object never escapes the function, that
+ // means it's never concurrently read or written, hence moving the store
+ // from under the condition will not introduce a data race.
+ auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
+ if (AI && !PointerMayBeCaptured(AI, false, true))
+ // Found a previous load, return it.
+ return LI;
+ }
+ // The load didn't work out, but we may still find a store.
+ }
}
return nullptr;
@@ -2545,17 +2576,17 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
int Size = 0;
SmallPtrSet<const Value *, 32> EphValues;
- auto IsEphemeral = [&](const Value *V) {
- if (isa<AssumeInst>(V))
+ auto IsEphemeral = [&](const Instruction *I) {
+ if (isa<AssumeInst>(I))
return true;
- return isSafeToSpeculativelyExecute(V) &&
- all_of(V->users(),
+ return !I->mayHaveSideEffects() && !I->isTerminator() &&
+ all_of(I->users(),
[&](const User *U) { return EphValues.count(U); });
};
// Walk the loop in reverse so that we can identify ephemeral values properly
// (values only feeding assumes).
- for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
+ for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
// Can't fold blocks that contain noduplicate or convergent calls.
if (CallInst *CI = dyn_cast<CallInst>(&I))
if (CI->cannotDuplicate() || CI->isConvergent())
@@ -2588,8 +2619,10 @@ static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
/// If we have a conditional branch on a PHI node value that is defined in the
/// same block as the branch and if any PHI entries are constants, thread edges
/// corresponding to that entry to be branches to their ultimate destination.
-static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
- const DataLayout &DL, AssumptionCache *AC) {
+static Optional<bool> FoldCondBranchOnPHIImpl(BranchInst *BI,
+ DomTreeUpdater *DTU,
+ const DataLayout &DL,
+ AssumptionCache *AC) {
BasicBlock *BB = BI->getParent();
PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
// NOTE: we currently cannot transform this case if the PHI node is used
@@ -2703,13 +2736,25 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
DTU->applyUpdates(Updates);
}
- // Recurse, simplifying any other constants.
- return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true;
+ // Signal repeat, simplifying any other constants.
+ return None;
}
return false;
}
+static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
+ const DataLayout &DL, AssumptionCache *AC) {
+ Optional<bool> Result;
+ bool EverChanged = false;
+ do {
+ // Note that None means "we changed things, but recurse further."
+ Result = FoldCondBranchOnPHIImpl(BI, DTU, DL, AC);
+ EverChanged |= Result == None || *Result;
+ } while (Result == None);
+ return EverChanged;
+}
+
/// Given a BB that starts with the specified two-entry PHI node,
/// see if we can eliminate it.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
@@ -2845,8 +2890,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// instructions.
for (BasicBlock *IfBlock : IfBlocks)
for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
- if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
- !isa<PseudoProbeInst>(I)) {
+ if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
// This is not an aggressive instruction that we can promote.
// Because of this, we won't be able to get rid of the control flow, so
// the xform is not worth it.
@@ -3105,6 +3149,14 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
return true;
}
+/// Return if an instruction's type or any of its operands' types are a vector
+/// type.
+static bool isVectorOp(Instruction &I) {
+ return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
+ return U->getType()->isVectorTy();
+ });
+}
+
/// If this basic block is simple enough, and if a predecessor branches to us
/// and one of our successors, fold the block into the predecessor and use
/// logical operations to pick the right destination.
@@ -3189,6 +3241,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// number of the bonus instructions we'll need to create when cloning into
// each predecessor does not exceed a certain threshold.
unsigned NumBonusInsts = 0;
+ bool SawVectorOp = false;
const unsigned PredCount = Preds.size();
for (Instruction &I : *BB) {
// Don't check the branch condition comparison itself.
@@ -3200,14 +3253,35 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// I must be safe to execute unconditionally.
if (!isSafeToSpeculativelyExecute(&I))
return false;
+ SawVectorOp |= isVectorOp(I);
// Account for the cost of duplicating this instruction into each
- // predecessor.
- NumBonusInsts += PredCount;
- // Early exits once we reach the limit.
- if (NumBonusInsts > BonusInstThreshold)
+ // predecessor. Ignore free instructions.
+ if (!TTI ||
+ TTI->getUserCost(&I, CostKind) != TargetTransformInfo::TCC_Free) {
+ NumBonusInsts += PredCount;
+
+ // Early exits once we reach the limit.
+ if (NumBonusInsts >
+ BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
+ return false;
+ }
+
+ auto IsBCSSAUse = [BB, &I](Use &U) {
+ auto *UI = cast<Instruction>(U.getUser());
+ if (auto *PN = dyn_cast<PHINode>(UI))
+ return PN->getIncomingBlock(U) == BB;
+ return UI->getParent() == BB && I.comesBefore(UI);
+ };
+
+ // Does this instruction require rewriting of uses?
+ if (!all_of(I.uses(), IsBCSSAUse))
return false;
}
+ if (NumBonusInsts >
+ BonusInstThreshold *
+ (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
+ return false;
// Ok, we have the budget. Perform the transformation.
for (BasicBlock *PredBlock : Preds) {
@@ -3340,7 +3414,7 @@ static bool mergeConditionalStoreToAddress(
InstructionCost Cost = 0;
InstructionCost Budget =
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
- for (auto &I : BB->instructionsWithoutDebug()) {
+ for (auto &I : BB->instructionsWithoutDebug(false)) {
// Consider terminator instruction to be free.
if (I.isTerminator())
continue;
@@ -3413,10 +3487,7 @@ static bool mergeConditionalStoreToAddress(
/*BranchWeights=*/nullptr, DTU);
QB.SetInsertPoint(T);
StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
- AAMDNodes AAMD;
- PStore->getAAMetadata(AAMD, /*Merge=*/false);
- PStore->getAAMetadata(AAMD, /*Merge=*/true);
- SI->setAAMetadata(AAMD);
+ SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
// Choose the minimum alignment. If we could prove both stores execute, we
// could use biggest one. In this case, though, we only know that one of the
// stores executes. And we don't know it's safe to take the alignment from a
@@ -3666,7 +3737,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
// fold the conditions into logical ops and one cond br.
// Ignore dbg intrinsics.
- if (&*BB->instructionsWithoutDebug().begin() != BI)
+ if (&*BB->instructionsWithoutDebug(false).begin() != BI)
return false;
int PBIOp, BIOp;
@@ -4711,29 +4782,6 @@ static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
return true;
}
-static void createUnreachableSwitchDefault(SwitchInst *Switch,
- DomTreeUpdater *DTU) {
- LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
- auto *BB = Switch->getParent();
- BasicBlock *NewDefaultBlock = SplitBlockPredecessors(
- Switch->getDefaultDest(), Switch->getParent(), "", DTU);
- auto *OrigDefaultBlock = Switch->getDefaultDest();
- Switch->setDefaultDest(&*NewDefaultBlock);
- if (DTU)
- DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock},
- {DominatorTree::Delete, BB, OrigDefaultBlock}});
- SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU);
- SmallVector<DominatorTree::UpdateType, 2> Updates;
- if (DTU)
- for (auto *Successor : successors(NewDefaultBlock))
- Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor});
- auto *NewTerminator = NewDefaultBlock->getTerminator();
- new UnreachableInst(Switch->getContext(), NewTerminator);
- EraseTerminatorAndDCECond(NewTerminator);
- if (DTU)
- DTU->applyUpdates(Updates);
-}
-
/// Turn a switch with two reachable destinations into an integer range
/// comparison and branch.
bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
@@ -5039,9 +5087,10 @@ static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI
return false;
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- if (!CE->isGEPWithNoNotionalOverIndexing())
- return false;
- if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
+ // Pointer casts and in-bounds GEPs will not prohibit the backend from
+ // materializing the array of constants.
+ Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
+ if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
return false;
}
@@ -5111,7 +5160,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
// which we can constant-propagate the CaseVal, continue to its successor.
SmallDenseMap<Value *, Constant *> ConstantPool;
ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
- for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
+ for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
if (I.isTerminator()) {
// If the terminator is a simple branch, continue to the next block.
if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
@@ -5604,8 +5653,32 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
}
+static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
+ const DataLayout &DL) {
+ // Allow any legal type.
+ if (TTI.isTypeLegal(Ty))
+ return true;
+
+ auto *IT = dyn_cast<IntegerType>(Ty);
+ if (!IT)
+ return false;
+
+ // Also allow power of 2 integer types that have at least 8 bits and fit in
+ // a register. These types are common in frontend languages and targets
+ // usually support loads of these types.
+ // TODO: We could relax this to any integer that fits in a register and rely
+ // on ABI alignment and padding in the table to allow the load to be widened.
+ // Or we could widen the constants and truncate the load.
+ unsigned BitWidth = IT->getBitWidth();
+ return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
+ DL.fitsInLegalInteger(IT->getBitWidth());
+}
+
/// Determine whether a lookup table should be built for this switch, based on
/// the number of cases, size of the table, and the types of the results.
+// TODO: We could support larger than legal types by limiting based on the
+// number of loads required and/or table size. If the constants are small we
+// could use smaller table entries and extend after the load.
static bool
ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
const TargetTransformInfo &TTI, const DataLayout &DL,
@@ -5619,7 +5692,7 @@ ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
Type *Ty = I.second;
// Saturate this flag to true.
- HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
+ HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
// Saturate this flag to false.
AllTablesFitInRegister =
@@ -6102,7 +6175,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
// If the block only contains the switch, see if we can fold the block
// away into any preds.
- if (SI == &*BB->instructionsWithoutDebug().begin())
+ if (SI == &*BB->instructionsWithoutDebug(false).begin())
if (FoldValueComparisonIntoPredecessors(SI, Builder))
return requestResimplify();
}
@@ -6246,12 +6319,9 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
// The debug info in OtherPred doesn't cover the merged control flow that
// used to go through BB. We need to delete it or update it.
- for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) {
- Instruction &Inst = *I;
- I++;
+ for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
if (isa<DbgInfoIntrinsic>(Inst))
Inst.eraseFromParent();
- }
SmallPtrSet<BasicBlock *, 16> Succs(succ_begin(BB), succ_end(BB));
for (BasicBlock *Succ : Succs) {
@@ -6338,6 +6408,11 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
}
bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
+ assert(
+ !isa<ConstantInt>(BI->getCondition()) &&
+ BI->getSuccessor(0) != BI->getSuccessor(1) &&
+ "Tautological conditional branch should have been eliminated already.");
+
BasicBlock *BB = BI->getParent();
if (!Options.SimplifyCondBranch)
return false;
@@ -6452,19 +6527,21 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
if (C->isNullValue() || isa<UndefValue>(C)) {
// Only look at the first use, avoid hurting compile time with long uselists
- User *Use = *I->user_begin();
+ auto *Use = cast<Instruction>(*I->user_begin());
+ // Bail out if Use is not in the same BB as I or Use == I or Use comes
+ // before I in the block. The latter two can be the case if Use is a PHI
+ // node.
+ if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
+ return false;
// Now make sure that there are no instructions in between that can alter
// control flow (eg. calls)
- for (BasicBlock::iterator
- i = ++BasicBlock::iterator(I),
- UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
- i != UI; ++i) {
- if (i == I->getParent()->end())
- return false;
- if (!isGuaranteedToTransferExecutionToSuccessor(&*i))
- return false;
- }
+ auto InstrRange =
+ make_range(std::next(I->getIterator()), Use->getIterator());
+ if (any_of(InstrRange, [](Instruction &I) {
+ return !isGuaranteedToTransferExecutionToSuccessor(&I);
+ }))
+ return false;
// Look through GEPs. A load from a GEP derived from NULL is still undefined
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
@@ -6540,21 +6617,51 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
// destination from conditional branches.
if (BI->isUnconditional())
Builder.CreateUnreachable();
- else
+ else {
+ // Preserve guarding condition in assume, because it might not be
+ // inferrable from any dominating condition.
+ Value *Cond = BI->getCondition();
+ if (BI->getSuccessor(0) == BB)
+ Builder.CreateAssumption(Builder.CreateNot(Cond));
+ else
+ Builder.CreateAssumption(Cond);
Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
: BI->getSuccessor(0));
+ }
BI->eraseFromParent();
if (DTU)
DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+ // Redirect all branches leading to UB into
+ // a newly created unreachable block.
+ BasicBlock *Unreachable = BasicBlock::Create(
+ Predecessor->getContext(), "unreachable", BB->getParent(), BB);
+ Builder.SetInsertPoint(Unreachable);
+ // The new block contains only one instruction: Unreachable
+ Builder.CreateUnreachable();
+ for (auto &Case : SI->cases())
+ if (Case.getCaseSuccessor() == BB) {
+ BB->removePredecessor(Predecessor);
+ Case.setSuccessor(Unreachable);
+ }
+ if (SI->getDefaultDest() == BB) {
+ BB->removePredecessor(Predecessor);
+ SI->setDefaultDest(Unreachable);
+ }
+
+ if (DTU)
+ DTU->applyUpdates(
+ { { DominatorTree::Insert, Predecessor, Unreachable },
+ { DominatorTree::Delete, Predecessor, BB } });
+ return true;
}
- // TODO: SwitchInst.
}
return false;
}
-bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
+bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
bool Changed = false;
assert(BB && BB->getParent() && "Block not embedded in function!");
@@ -6578,7 +6685,8 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
Changed |= EliminateDuplicatePHINodes(BB);
// Check for and remove branches that will always cause undefined behavior.
- Changed |= removeUndefIntroducingPredecessor(BB, DTU);
+ if (removeUndefIntroducingPredecessor(BB, DTU))
+ return requestResimplify();
// Merge basic blocks into their predecessor if there is only one distinct
// pred, and if there is only one distinct successor of the predecessor, and
@@ -6603,7 +6711,8 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- Changed |= FoldTwoEntryPHINode(PN, TTI, DTU, DL);
+ if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
+ return true;
}
Instruction *Terminator = BB->getTerminator();
@@ -6632,12 +6741,6 @@ bool SimplifyCFGOpt::simplifyOnceImpl(BasicBlock *BB) {
return Changed;
}
-bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
- bool Changed = simplifyOnceImpl(BB);
-
- return Changed;
-}
-
bool SimplifyCFGOpt::run(BasicBlock *BB) {
bool Changed = false;
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index bd30be011472..5b7fd4349c6c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -942,6 +942,7 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
} // namespace llvm
+namespace {
//===----------------------------------------------------------------------===//
// Widen Induction Variables - Extend the width of an IV to cover its
// widest uses.
@@ -1072,7 +1073,7 @@ protected:
private:
SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
};
-
+} // namespace
/// Determine the insertion point for this user. By default, insert immediately
/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b8e0f63c481d..e190a1294eb3 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -142,12 +142,10 @@ static void annotateDereferenceableBytes(CallInst *CI,
unsigned AS = CI->getArgOperand(ArgNo)->getType()->getPointerAddressSpace();
if (!llvm::NullPointerIsDefined(F, AS) ||
CI->paramHasAttr(ArgNo, Attribute::NonNull))
- DerefBytes = std::max(CI->getDereferenceableOrNullBytes(
- ArgNo + AttributeList::FirstArgIndex),
+ DerefBytes = std::max(CI->getParamDereferenceableOrNullBytes(ArgNo),
DereferenceableBytes);
-
- if (CI->getDereferenceableBytes(ArgNo + AttributeList::FirstArgIndex) <
- DerefBytes) {
+
+ if (CI->getParamDereferenceableBytes(ArgNo) < DerefBytes) {
CI->removeParamAttr(ArgNo, Attribute::Dereferenceable);
if (!llvm::NullPointerIsDefined(F, AS) ||
CI->paramHasAttr(ArgNo, Attribute::NonNull))
@@ -512,14 +510,18 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilderBase &B) {
B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
Function *Callee = CI->getCalledFunction();
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+
+ // stpcpy(d,s) -> strcpy(d,s) if the result is not used.
+ if (CI->use_empty())
+ return emitStrCpy(Dst, Src, B, TLI);
+
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = emitStrLen(Src, B, DL, TLI);
return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
@@ -541,8 +543,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilderBase &B) {
// copy for us. Make a memcpy to copy the nul byte with align = 1.
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1), LenV);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return DstEnd;
}
@@ -577,9 +578,9 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
if (SrcLen == 0) {
// strncpy(x, "", y) -> memset(x, '\0', y)
Align MemSetAlign =
- CI->getAttributes().getParamAttributes(0).getAlignment().valueOrOne();
+ CI->getAttributes().getParamAttrs(0).getAlignment().valueOrOne();
CallInst *NewCI = B.CreateMemSet(Dst, B.getInt8('\0'), Size, MemSetAlign);
- AttrBuilder ArgAttrs(CI->getAttributes().getParamAttributes(0));
+ AttrBuilder ArgAttrs(CI->getAttributes().getParamAttrs(0));
NewCI->setAttributes(NewCI->getAttributes().addParamAttributes(
CI->getContext(), 0, ArgAttrs));
return Dst;
@@ -604,8 +605,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemCpy(Dst, Align(1), Src, Align(1),
ConstantInt::get(DL.getIntPtrType(PT), Len));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return Dst;
}
@@ -1082,8 +1082,7 @@ Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemCpy(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1136,8 +1135,7 @@ Value *LibCallSimplifier::optimizeMemPCpy(CallInst *CI, IRBuilderBase &B) {
// any return attributes are compliant.
// TODO: Attach return value attributes to the 1st operand to preserve them?
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return B.CreateInBoundsGEP(B.getInt8Ty(), Dst, N);
}
@@ -1151,70 +1149,21 @@ Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilderBase &B) {
CallInst *NewCI = B.CreateMemMove(CI->getArgOperand(0), Align(1),
CI->getArgOperand(1), Align(1), Size);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
-/// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).
-Value *LibCallSimplifier::foldMallocMemset(CallInst *Memset, IRBuilderBase &B) {
- // This has to be a memset of zeros (bzero).
- auto *FillValue = dyn_cast<ConstantInt>(Memset->getArgOperand(1));
- if (!FillValue || FillValue->getZExtValue() != 0)
- return nullptr;
-
- // TODO: We should handle the case where the malloc has more than one use.
- // This is necessary to optimize common patterns such as when the result of
- // the malloc is checked against null or when a memset intrinsic is used in
- // place of a memset library call.
- auto *Malloc = dyn_cast<CallInst>(Memset->getArgOperand(0));
- if (!Malloc || !Malloc->hasOneUse())
- return nullptr;
-
- // Is the inner call really malloc()?
- Function *InnerCallee = Malloc->getCalledFunction();
- if (!InnerCallee)
- return nullptr;
-
- LibFunc Func;
- if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) ||
- Func != LibFunc_malloc)
- return nullptr;
-
- // The memset must cover the same number of bytes that are malloc'd.
- if (Memset->getArgOperand(2) != Malloc->getArgOperand(0))
- return nullptr;
-
- // Replace the malloc with a calloc. We need the data layout to know what the
- // actual size of a 'size_t' parameter is.
- B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator());
- const DataLayout &DL = Malloc->getModule()->getDataLayout();
- IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext());
- if (Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1),
- Malloc->getArgOperand(0),
- Malloc->getAttributes(), B, *TLI)) {
- substituteInParent(Malloc, Calloc);
- return Calloc;
- }
-
- return nullptr;
-}
-
Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilderBase &B) {
Value *Size = CI->getArgOperand(2);
annotateNonNullAndDereferenceable(CI, 0, Size, DL);
if (isa<IntrinsicInst>(CI))
return nullptr;
- if (auto *Calloc = foldMallocMemset(CI, B))
- return Calloc;
-
// memset(p, v, n) -> llvm.memset(align 1 p, v, n)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val, Size, Align(1));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
@@ -1346,13 +1295,13 @@ Value *LibCallSimplifier::optimizeCAbs(CallInst *CI, IRBuilderBase &B) {
B.setFastMathFlags(CI->getFastMathFlags());
Value *Real, *Imag;
- if (CI->getNumArgOperands() == 1) {
+ if (CI->arg_size() == 1) {
Value *Op = CI->getArgOperand(0);
assert(Op->getType()->isArrayTy() && "Unexpected signature for cabs!");
Real = B.CreateExtractValue(Op, 0, "real");
Imag = B.CreateExtractValue(Op, 1, "imag");
} else {
- assert(CI->getNumArgOperands() == 2 && "Unexpected signature for cabs!");
+ assert(CI->arg_size() == 2 && "Unexpected signature for cabs!");
Real = CI->getArgOperand(0);
Imag = CI->getArgOperand(1);
}
@@ -2333,7 +2282,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilderBase &B,
// Proceedings of PACT'98, Oct. 1998, IEEE
if (!CI->hasFnAttr(Attribute::Cold) &&
isReportingError(Callee, CI, StreamArg)) {
- CI->addAttribute(AttributeList::FunctionIndex, Attribute::Cold);
+ CI->addFnAttr(Attribute::Cold);
}
return nullptr;
@@ -2349,7 +2298,7 @@ static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
// These functions might be considered cold, but only if their stream
// argument is stderr.
- if (StreamArg >= (int)CI->getNumArgOperands())
+ if (StreamArg >= (int)CI->arg_size())
return false;
LoadInst *LI = dyn_cast<LoadInst>(CI->getArgOperand(StreamArg));
if (!LI)
@@ -2381,7 +2330,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
return emitPutChar(B.getInt32(FormatStr[0]), B, TLI);
// Try to remove call or emit putchar/puts.
- if (FormatStr == "%s" && CI->getNumArgOperands() > 1) {
+ if (FormatStr == "%s" && CI->arg_size() > 1) {
StringRef OperandStr;
if (!getConstantStringInfo(CI->getOperand(1), OperandStr))
return nullptr;
@@ -2402,7 +2351,7 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
// printf("foo\n") --> puts("foo")
if (FormatStr.back() == '\n' &&
- FormatStr.find('%') == StringRef::npos) { // No format characters.
+ !FormatStr.contains('%')) { // No format characters.
// Create a string literal with no \n on it. We expect the constant merge
// pass to be run after this pass, to merge duplicate strings.
FormatStr = FormatStr.drop_back();
@@ -2412,12 +2361,12 @@ Value *LibCallSimplifier::optimizePrintFString(CallInst *CI, IRBuilderBase &B) {
// Optimize specific format strings.
// printf("%c", chr) --> putchar(chr)
- if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ if (FormatStr == "%c" && CI->arg_size() > 1 &&
CI->getArgOperand(1)->getType()->isIntegerTy())
return emitPutChar(CI->getArgOperand(1), B, TLI);
// printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ if (FormatStr == "%s\n" && CI->arg_size() > 1 &&
CI->getArgOperand(1)->getType()->isPointerTy())
return emitPutS(CI->getArgOperand(1), B, TLI);
return nullptr;
@@ -2469,10 +2418,10 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
// If we just have a format string (nothing else crazy) transform it.
Value *Dest = CI->getArgOperand(0);
- if (CI->getNumArgOperands() == 2) {
+ if (CI->arg_size() == 2) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
- if (FormatStr.find('%') != StringRef::npos)
+ if (FormatStr.contains('%'))
return nullptr; // we found a format specifier, bail out.
// sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1)
@@ -2485,8 +2434,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
return nullptr;
// Decode the second character of the format string.
@@ -2597,10 +2545,10 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
return nullptr;
// If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumArgOperands() == 3) {
+ if (CI->arg_size() == 3) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
- if (FormatStr.find('%') != StringRef::npos)
+ if (FormatStr.contains('%'))
return nullptr; // we found a format specifier, bail out.
if (N == 0)
@@ -2619,8 +2567,7 @@ Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI,
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() == 2 && FormatStr[0] == '%' &&
- CI->getNumArgOperands() == 4) {
+ if (FormatStr.size() == 2 && FormatStr[0] == '%' && CI->arg_size() == 4) {
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
@@ -2688,9 +2635,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
return nullptr;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
- if (CI->getNumArgOperands() == 2) {
+ if (CI->arg_size() == 2) {
// Could handle %% -> % if we cared.
- if (FormatStr.find('%') != StringRef::npos)
+ if (FormatStr.contains('%'))
return nullptr; // We found a format specifier.
return emitFWrite(
@@ -2701,8 +2648,7 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI,
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
- CI->getNumArgOperands() < 3)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->arg_size() < 3)
return nullptr;
// Decode the second character of the format string.
@@ -3066,7 +3012,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
return optimizeLog(CI, Builder);
case Intrinsic::sqrt:
return optimizeSqrt(CI, Builder);
- // TODO: Use foldMallocMemset() with memset intrinsic.
case Intrinsic::memset:
return optimizeMemSet(CI, Builder);
case Intrinsic::memcpy:
@@ -3266,8 +3211,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,
B.CreateMemCpy(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3280,8 +3224,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
B.CreateMemMove(CI->getArgOperand(0), Align(1), CI->getArgOperand(1),
Align(1), CI->getArgOperand(2));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3289,15 +3232,12 @@ Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,
Value *FortifiedLibCallSimplifier::optimizeMemSetChk(CallInst *CI,
IRBuilderBase &B) {
- // TODO: Try foldMallocMemset() here.
-
if (isFortifiedCallFoldable(CI, 3, 2)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
CallInst *NewCI = B.CreateMemSet(CI->getArgOperand(0), Val,
CI->getArgOperand(2), Align(1));
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return CI->getArgOperand(0);
}
return nullptr;
@@ -3311,9 +3251,7 @@ Value *FortifiedLibCallSimplifier::optimizeMemPCpyChk(CallInst *CI,
CI->getArgOperand(2), B, DL, TLI)) {
CallInst *NewCI = cast<CallInst>(Call);
NewCI->setAttributes(CI->getAttributes());
- NewCI->removeAttributes(
- AttributeList::ReturnIndex,
- AttributeFuncs::typeIncompatible(NewCI->getType()));
+ NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType()));
return NewCI;
}
return nullptr;
@@ -3354,7 +3292,11 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
else
return nullptr;
- Type *SizeTTy = DL.getIntPtrType(CI->getContext());
+ // FIXME: There is really no guarantee that sizeof(size_t) is equal to
+ // sizeof(int*) for every target. So the assumption used here to derive the
+ // SizeTBits based on the size of an integer pointer in address space zero
+ // isn't always valid.
+ Type *SizeTTy = DL.getIntPtrType(CI->getContext(), /*AddressSpace=*/0);
Value *LenV = ConstantInt::get(SizeTTy, Len);
Value *Ret = emitMemCpyChk(Dst, Src, LenV, ObjSize, B, DL, TLI);
// If the function was an __stpcpy_chk, and we were able to fold it into
diff --git a/llvm/lib/Transforms/Utils/SplitModule.cpp b/llvm/lib/Transforms/Utils/SplitModule.cpp
index 32f2f4e233b2..7e12bbd2851c 100644
--- a/llvm/lib/Transforms/Utils/SplitModule.cpp
+++ b/llvm/lib/Transforms/Utils/SplitModule.cpp
@@ -24,7 +24,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalObject.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
@@ -65,9 +64,8 @@ static void addNonConstUser(ClusterMapType &GVtoClusterMap,
if (const Instruction *I = dyn_cast<Instruction>(U)) {
const GlobalValue *F = I->getParent()->getParent();
GVtoClusterMap.unionSets(GV, F);
- } else if (isa<GlobalIndirectSymbol>(U) || isa<Function>(U) ||
- isa<GlobalVariable>(U)) {
- GVtoClusterMap.unionSets(GV, cast<GlobalValue>(U));
+ } else if (const GlobalValue *GVU = dyn_cast<GlobalValue>(U)) {
+ GVtoClusterMap.unionSets(GV, GVU);
} else {
llvm_unreachable("Underimplemented use case");
}
@@ -91,6 +89,13 @@ static void addAllGlobalValueUsers(ClusterMapType &GVtoClusterMap,
}
}
+static const GlobalObject *getGVPartitioningRoot(const GlobalValue *GV) {
+ const GlobalObject *GO = GV->getAliaseeObject();
+ if (const auto *GI = dyn_cast_or_null<GlobalIFunc>(GO))
+ GO = GI->getResolverFunction();
+ return GO;
+}
+
// Find partitions for module in the way that no locals need to be
// globalized.
// Try to balance pack those partitions into N files since this roughly equals
@@ -123,12 +128,11 @@ static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap,
Member = &GV;
}
- // For aliases we should not separate them from their aliasees regardless
- // of linkage.
- if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(&GV)) {
- if (const GlobalObject *Base = GIS->getBaseObject())
- GVtoClusterMap.unionSets(&GV, Base);
- }
+ // Aliases should not be separated from their aliasees and ifuncs should
+ // not be separated from their resolvers regardless of linkage.
+ if (const GlobalObject *Root = getGVPartitioningRoot(&GV))
+ if (&GV != Root)
+ GVtoClusterMap.unionSets(&GV, Root);
if (const Function *F = dyn_cast<Function>(&GV)) {
for (const BasicBlock &BB : *F) {
@@ -225,9 +229,8 @@ static void externalize(GlobalValue *GV) {
// Returns whether GV should be in partition (0-based) I of N.
static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
- if (auto *GIS = dyn_cast<GlobalIndirectSymbol>(GV))
- if (const GlobalObject *Base = GIS->getBaseObject())
- GV = Base;
+ if (const GlobalObject *Root = getGVPartitioningRoot(GV))
+ GV = Root;
StringRef Name;
if (const Comdat *C = GV->getComdat())
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index ec4ea848a5d4..6a0eb34a7999 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -184,7 +184,7 @@ performOnModule(Module &M) {
std::string Name = Regex(Pattern).sub(Transform, C.getName(), &Error);
if (!Error.empty())
- report_fatal_error("unable to transforn " + C.getName() + " in " +
+ report_fatal_error(Twine("unable to transforn ") + C.getName() + " in " +
M.getModuleIdentifier() + ": " + Error);
if (C.getName() == Name)
@@ -256,11 +256,11 @@ bool RewriteMapParser::parse(const std::string &MapFile,
MemoryBuffer::getFile(MapFile);
if (!Mapping)
- report_fatal_error("unable to read rewrite map '" + MapFile + "': " +
- Mapping.getError().message());
+ report_fatal_error(Twine("unable to read rewrite map '") + MapFile +
+ "': " + Mapping.getError().message());
if (!parse(*Mapping, DL))
- report_fatal_error("unable to parse rewrite map '" + MapFile + "'");
+ report_fatal_error(Twine("unable to parse rewrite map '") + MapFile + "'");
return true;
}
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 6336af25ef98..dbe3cc93e72b 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -403,19 +403,10 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
if (Offset == -1)
return Offset;
- unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
- if (Offset) {
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
- Src, OffsetCst);
- }
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ if (ConstantFoldLoadFromConstPtr(Src, LoadTy, APInt(IndexSize, Offset), DL))
return Offset;
return -1;
}
@@ -584,19 +575,11 @@ T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
Constant *Src = cast<Constant>(MTI->getSource());
- unsigned AS = Src->getType()->getPointerAddressSpace();
// Otherwise, see if we can constant fold a load from the constant with the
// offset applied as appropriate.
- if (Offset) {
- Src = ConstantExpr::getBitCast(Src,
- Type::getInt8PtrTy(Src->getContext(), AS));
- Constant *OffsetCst =
- ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
- Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
- Src, OffsetCst);
- }
- Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
- return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(Src->getType());
+ return ConstantFoldLoadFromConstPtr(
+ Src, LoadTy, APInt(IndexSize, Offset), DL);
}
/// This function is called when we have a
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index f3afd42e6163..c3eafd6b2492 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -26,7 +26,8 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
#include "llvm/IR/GlobalObject.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
@@ -68,7 +69,7 @@ struct WorklistEntry {
enum EntryKind {
MapGlobalInit,
MapAppendingVar,
- MapGlobalIndirectSymbol,
+ MapAliasOrIFunc,
RemapFunction
};
struct GVInitTy {
@@ -79,8 +80,8 @@ struct WorklistEntry {
GlobalVariable *GV;
Constant *InitPrefix;
};
- struct GlobalIndirectSymbolTy {
- GlobalIndirectSymbol *GIS;
+ struct AliasOrIFuncTy {
+ GlobalValue *GV;
Constant *Target;
};
@@ -91,7 +92,7 @@ struct WorklistEntry {
union {
GVInitTy GVInit;
AppendingGVTy AppendingGV;
- GlobalIndirectSymbolTy GlobalIndirectSymbol;
+ AliasOrIFuncTy AliasOrIFunc;
Function *RemapF;
} Data;
};
@@ -163,8 +164,8 @@ public:
bool IsOldCtorDtor,
ArrayRef<Constant *> NewMembers,
unsigned MCID);
- void scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS, Constant &Target,
- unsigned MCID);
+ void scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target,
+ unsigned MCID);
void scheduleRemapFunction(Function &F, unsigned MCID);
void flush();
@@ -873,10 +874,17 @@ void Mapper::flush() {
E.AppendingGVIsOldCtorDtor, makeArrayRef(NewInits));
break;
}
- case WorklistEntry::MapGlobalIndirectSymbol:
- E.Data.GlobalIndirectSymbol.GIS->setIndirectSymbol(
- mapConstant(E.Data.GlobalIndirectSymbol.Target));
+ case WorklistEntry::MapAliasOrIFunc: {
+ GlobalValue *GV = E.Data.AliasOrIFunc.GV;
+ Constant *Target = mapConstant(E.Data.AliasOrIFunc.Target);
+ if (auto *GA = dyn_cast<GlobalAlias>(GV))
+ GA->setAliasee(Target);
+ else if (auto *GI = dyn_cast<GlobalIFunc>(GV))
+ GI->setResolver(Target);
+ else
+ llvm_unreachable("Not alias or ifunc");
break;
+ }
case WorklistEntry::RemapFunction:
remapFunction(*E.Data.RemapF);
break;
@@ -944,12 +952,13 @@ void Mapper::remapInstruction(Instruction *I) {
LLVMContext &C = CB->getContext();
AttributeList Attrs = CB->getAttributes();
for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
- for (Attribute::AttrKind TypedAttr :
- {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef,
- Attribute::InAlloca}) {
- if (Type *Ty = Attrs.getAttribute(i, TypedAttr).getValueAsType()) {
- Attrs = Attrs.replaceAttributeType(C, i, TypedAttr,
- TypeMapper->remapType(Ty));
+ for (int AttrIdx = Attribute::FirstTypeAttr;
+ AttrIdx <= Attribute::LastTypeAttr; AttrIdx++) {
+ Attribute::AttrKind TypedAttr = (Attribute::AttrKind)AttrIdx;
+ if (Type *Ty =
+ Attrs.getAttributeAtIndex(i, TypedAttr).getValueAsType()) {
+ Attrs = Attrs.replaceAttributeTypeAtIndex(C, i, TypedAttr,
+ TypeMapper->remapType(Ty));
break;
}
}
@@ -1068,16 +1077,18 @@ void Mapper::scheduleMapAppendingVariable(GlobalVariable &GV,
AppendingInits.append(NewMembers.begin(), NewMembers.end());
}
-void Mapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
- Constant &Target, unsigned MCID) {
- assert(AlreadyScheduled.insert(&GIS).second && "Should not reschedule");
+void Mapper::scheduleMapAliasOrIFunc(GlobalValue &GV, Constant &Target,
+ unsigned MCID) {
+ assert(AlreadyScheduled.insert(&GV).second && "Should not reschedule");
+ assert((isa<GlobalAlias>(GV) || isa<GlobalIFunc>(GV)) &&
+ "Should be alias or ifunc");
assert(MCID < MCs.size() && "Invalid mapping context");
WorklistEntry WE;
- WE.Kind = WorklistEntry::MapGlobalIndirectSymbol;
+ WE.Kind = WorklistEntry::MapAliasOrIFunc;
WE.MCID = MCID;
- WE.Data.GlobalIndirectSymbol.GIS = &GIS;
- WE.Data.GlobalIndirectSymbol.Target = &Target;
+ WE.Data.AliasOrIFunc.GV = &GV;
+ WE.Data.AliasOrIFunc.Target = &Target;
Worklist.push_back(WE);
}
@@ -1174,10 +1185,14 @@ void ValueMapper::scheduleMapAppendingVariable(GlobalVariable &GV,
GV, InitPrefix, IsOldCtorDtor, NewMembers, MCID);
}
-void ValueMapper::scheduleMapGlobalIndirectSymbol(GlobalIndirectSymbol &GIS,
- Constant &Target,
- unsigned MCID) {
- getAsMapper(pImpl)->scheduleMapGlobalIndirectSymbol(GIS, Target, MCID);
+void ValueMapper::scheduleMapGlobalAlias(GlobalAlias &GA, Constant &Aliasee,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GA, Aliasee, MCID);
+}
+
+void ValueMapper::scheduleMapGlobalIFunc(GlobalIFunc &GI, Constant &Resolver,
+ unsigned MCID) {
+ getAsMapper(pImpl)->scheduleMapAliasOrIFunc(GI, Resolver, MCID);
}
void ValueMapper::scheduleRemapFunction(Function &F, unsigned MCID) {
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 3b90997100f1..5a4a2f0924f6 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -694,31 +694,16 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
});
for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
- if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
- if (!is_contained(Chain, &I))
- MemoryInstrs.push_back(&I);
- else
- ChainInstrs.push_back(&I);
- } else if (isa<IntrinsicInst>(&I) &&
- cast<IntrinsicInst>(&I)->getIntrinsicID() ==
- Intrinsic::sideeffect) {
- // Ignore llvm.sideeffect calls.
- } else if (isa<IntrinsicInst>(&I) &&
- cast<IntrinsicInst>(&I)->getIntrinsicID() ==
- Intrinsic::pseudoprobe) {
- // Ignore llvm.pseudoprobe calls.
- } else if (isa<IntrinsicInst>(&I) &&
- cast<IntrinsicInst>(&I)->getIntrinsicID() == Intrinsic::assume) {
- // Ignore llvm.assume calls.
- } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
- LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I
- << '\n');
- break;
- } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
- LLVM_DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
- << '\n');
+ if ((isa<LoadInst>(I) || isa<StoreInst>(I)) && is_contained(Chain, &I)) {
+ ChainInstrs.push_back(&I);
+ continue;
+ }
+ if (I.mayThrow()) {
+ LLVM_DEBUG(dbgs() << "LSV: Found may-throw operation: " << I << '\n');
break;
}
+ if (I.mayReadOrWriteMemory())
+ MemoryInstrs.push_back(&I);
}
// Loop until we find an instruction in ChainInstrs that we can't vectorize.
@@ -751,26 +736,28 @@ Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
return LI->hasMetadata(LLVMContext::MD_invariant_load);
};
- // We can ignore the alias as long as the load comes before the store,
- // because that means we won't be moving the load past the store to
- // vectorize it (the vectorized load is inserted at the location of the
- // first load in the chain).
- if (isa<StoreInst>(MemInstr) && ChainLoad &&
- (IsInvariantLoad(ChainLoad) || ChainLoad->comesBefore(MemInstr)))
- continue;
-
- // Same case, but in reverse.
- if (MemLoad && isa<StoreInst>(ChainInstr) &&
- (IsInvariantLoad(MemLoad) || MemLoad->comesBefore(ChainInstr)))
- continue;
+ if (IsLoadChain) {
+ // We can ignore the alias as long as the load comes before the store,
+ // because that means we won't be moving the load past the store to
+ // vectorize it (the vectorized load is inserted at the location of the
+ // first load in the chain).
+ if (ChainInstr->comesBefore(MemInstr) ||
+ (ChainLoad && IsInvariantLoad(ChainLoad)))
+ continue;
+ } else {
+ // Same case, but in reverse.
+ if (MemInstr->comesBefore(ChainInstr) ||
+ (MemLoad && IsInvariantLoad(MemLoad)))
+ continue;
+ }
- if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
- MemoryLocation::get(ChainInstr))) {
+ ModRefInfo MR =
+ AA.getModRefInfo(MemInstr, MemoryLocation::get(ChainInstr));
+ if (IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)) {
LLVM_DEBUG({
dbgs() << "LSV: Found alias:\n"
- " Aliasing instruction and pointer:\n"
+ " Aliasing instruction:\n"
<< " " << *MemInstr << '\n'
- << " " << *getLoadStorePointerOperand(MemInstr) << '\n'
<< " Aliased instruction and pointer:\n"
<< " " << *ChainInstr << '\n'
<< " " << *getLoadStorePointerOperand(ChainInstr) << '\n';
@@ -1085,9 +1072,12 @@ bool Vectorizer::vectorizeStoreChain(
if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n");
- return vectorizeStoreChain(Chain.slice(0, TargetVF),
- InstructionsProcessed) |
- vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |=
+ vectorizeStoreChain(Chain.slice(0, TargetVF), InstructionsProcessed);
+ Vectorized |=
+ vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
+ return Vectorized;
}
LLVM_DEBUG({
@@ -1104,8 +1094,10 @@ bool Vectorizer::vectorizeStoreChain(
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
- vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeStoreChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
Align NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
@@ -1119,15 +1111,17 @@ bool Vectorizer::vectorizeStoreChain(
if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
- vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeStoreChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeStoreChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
BasicBlock::iterator First, Last;
std::tie(First, Last) = getBoundaryInstrs(Chain);
Builder.SetInsertPoint(&*Last);
- Value *Vec = UndefValue::get(VecTy);
+ Value *Vec = PoisonValue::get(VecTy);
if (VecStoreTy) {
unsigned VecWidth = VecStoreTy->getNumElements();
@@ -1237,8 +1231,12 @@ bool Vectorizer::vectorizeLoadChain(
if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
" Creating two separate arrays.\n");
- return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
- vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |=
+ vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed);
+ Vectorized |=
+ vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
+ return Vectorized;
}
// We won't try again to vectorize the elements of the chain, regardless of
@@ -1249,8 +1247,10 @@ bool Vectorizer::vectorizeLoadChain(
if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
- vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeLoadChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
Align NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
@@ -1264,8 +1264,10 @@ bool Vectorizer::vectorizeLoadChain(
if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
auto Chains = splitOddVectorElts(Chain, Sz);
- return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
- vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ bool Vectorized = false;
+ Vectorized |= vectorizeLoadChain(Chains.first, InstructionsProcessed);
+ Vectorized |= vectorizeLoadChain(Chains.second, InstructionsProcessed);
+ return Vectorized;
}
LLVM_DEBUG({
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3c484fb0d28a..805011191da0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -419,7 +419,8 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
return false;
}
-int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
+int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
+ Value *Ptr) const {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
@@ -428,7 +429,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);
bool CanAddPredicate = !OptForSize;
- int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
+ int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
+ CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
@@ -747,7 +749,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
if (CI) {
auto *SE = PSE.getSE();
Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
- for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
reportVectorizationFailure("Found unvectorizable intrinsic",
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 5c4c4fdfa3f7..a7d6609f8c56 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -268,12 +268,6 @@ class LoopVectorizationPlanner {
/// A builder used to construct the current plan.
VPBuilder Builder;
- /// The best number of elements of the vector types used in the
- /// transformed loop. BestVF = None means that vectorization is
- /// disabled.
- Optional<ElementCount> BestVF = None;
- unsigned BestUF = 0;
-
public:
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
@@ -295,12 +289,13 @@ public:
/// VF and its cost.
VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
- /// Finalize the best decision and dispose of all other VPlans.
- void setBestPlan(ElementCount VF, unsigned UF);
+ /// Return the best VPlan for \p VF.
+ VPlan &getBestPlanFor(ElementCount VF) const;
/// Generate the IR code for the body of the vectorized loop according to the
- /// best selected VPlan.
- void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT);
+ /// best selected \p VF, \p UF and VPlan \p BestPlan.
+ void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
+ InnerLoopVectorizer &LB, DominatorTree *DT);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printPlans(raw_ostream &O);
@@ -308,12 +303,9 @@ public:
/// Look through the existing plans and return true if we have one with all
/// the vectorization factors in question.
- bool hasPlanWithVFs(const ArrayRef<ElementCount> VFs) const {
- return any_of(VPlans, [&](const VPlanPtr &Plan) {
- return all_of(VFs, [&](const ElementCount &VF) {
- return Plan->hasVF(VF);
- });
- });
+ bool hasPlanWithVF(ElementCount VF) const {
+ return any_of(VPlans,
+ [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
}
/// Test a \p Predicate on a \p Range of VF's. Return the value of applying
@@ -351,13 +343,14 @@ private:
/// legal to vectorize the loop. This method creates VPlans using VPRecipes.
void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
- /// Adjust the recipes for any inloop reductions. The chain of instructions
- /// leading from the loop exit instr to the phi need to be converted to
- /// reductions, with one operand being vector and the other being the scalar
- /// reduction chain.
- void adjustRecipesForInLoopReductions(VPlanPtr &Plan,
- VPRecipeBuilder &RecipeBuilder,
- ElementCount MinVF);
+ // Adjust the recipes for reductions. For in-loop reductions the chain of
+ // instructions leading from the loop exit instr to the phi need to be
+ // converted to reductions, with one operand being vector and the other being
+ // the scalar reduction chain. For other reductions, a select is introduced
+ // between the phi and live-out recipes when folding the tail.
+ void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
+ VPRecipeBuilder &RecipeBuilder,
+ ElementCount MinVF);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f24ae6b100d5..23bb6f0860c9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -87,7 +87,6 @@
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
-#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -332,8 +331,8 @@ static cl::opt<bool>
cl::desc("Prefer in-loop vector reductions, "
"overriding the targets preference."));
-cl::opt<bool> EnableStrictReductions(
- "enable-strict-reductions", cl::init(false), cl::Hidden,
+static cl::opt<bool> ForceOrderedReductions(
+ "force-ordered-reductions", cl::init(false), cl::Hidden,
cl::desc("Enable the vectorisation of loops with in-order (strict) "
"FP reductions"));
@@ -545,7 +544,8 @@ public:
/// vectorized loop.
void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask);
+ VPValue *StoredValue, VPValue *BlockInMask,
+ bool ConsecutiveStride, bool Reverse);
/// Set the debug location in the builder \p Ptr using the debug location in
/// \p V. If \p Ptr is None then it uses the class member's Builder.
@@ -590,12 +590,11 @@ protected:
/// Handle all cross-iteration phis in the header.
void fixCrossIterationPHIs(VPTransformState &State);
- /// Fix a first-order recurrence. This is the second phase of vectorizing
- /// this phi node.
+ /// Create the exit value of first order recurrences in the middle block and
+ /// update their users.
void fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR, VPTransformState &State);
- /// Fix a reduction cross-iteration phi. This is the second phase of
- /// vectorizing this phi node.
+ /// Create code for the loop exit value of the reduction.
void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
/// Clear NSW/NUW flags from reduction instructions if necessary.
@@ -621,9 +620,9 @@ protected:
/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
/// \p Opcode is relevant for FP induction variable.
- virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step,
- Instruction::BinaryOps Opcode =
- Instruction::BinaryOpsEnd);
+ virtual Value *
+ getStepVector(Value *Val, Value *StartIdx, Value *Step,
+ Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd);
/// Compute scalar induction steps. \p ScalarIV is the scalar induction
/// variable on which to base the steps, \p Step is the size of the step, and
@@ -890,9 +889,9 @@ public:
private:
Value *getBroadcastInstrs(Value *V) override;
- Value *getStepVector(Value *Val, int StartIdx, Value *Step,
- Instruction::BinaryOps Opcode =
- Instruction::BinaryOpsEnd) override;
+ Value *getStepVector(
+ Value *Val, Value *StartIdx, Value *Step,
+ Instruction::BinaryOps Opcode = Instruction::BinaryOpsEnd) override;
Value *reverseVector(Value *Vec) override;
};
@@ -911,10 +910,9 @@ struct EpilogueLoopVectorizationInfo {
Value *TripCount = nullptr;
Value *VectorTripCount = nullptr;
- EpilogueLoopVectorizationInfo(unsigned MVF, unsigned MUF, unsigned EVF,
- unsigned EUF)
- : MainLoopVF(ElementCount::getFixed(MVF)), MainLoopUF(MUF),
- EpilogueVF(ElementCount::getFixed(EVF)), EpilogueUF(EUF) {
+ EpilogueLoopVectorizationInfo(ElementCount MVF, unsigned MUF,
+ ElementCount EVF, unsigned EUF)
+ : MainLoopVF(MVF), MainLoopUF(MUF), EpilogueVF(EVF), EpilogueUF(EUF) {
assert(EUF == 1 &&
"A high UF for the epilogue loop is likely not beneficial.");
}
@@ -1105,11 +1103,10 @@ static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
}
/// Return a value for Step multiplied by VF.
-static Value *createStepForVF(IRBuilder<> &B, Constant *Step, ElementCount VF) {
- assert(isa<ConstantInt>(Step) && "Expected an integer step");
- Constant *StepVal = ConstantInt::get(
- Step->getType(),
- cast<ConstantInt>(Step)->getSExtValue() * VF.getKnownMinValue());
+static Value *createStepForVF(IRBuilder<> &B, Type *Ty, ElementCount VF,
+ int64_t Step) {
+ assert(Ty->isIntegerTy() && "Expected an integer step");
+ Constant *StepVal = ConstantInt::get(Ty, Step * VF.getKnownMinValue());
return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal;
}
@@ -1121,6 +1118,13 @@ Value *getRuntimeVF(IRBuilder<> &B, Type *Ty, ElementCount VF) {
return VF.isScalable() ? B.CreateVScale(EC) : EC;
}
+static Value *getRuntimeVFAsFloat(IRBuilder<> &B, Type *FTy, ElementCount VF) {
+ assert(FTy->isFloatingPointTy() && "Expected floating point type!");
+ Type *IntTy = IntegerType::get(FTy->getContext(), FTy->getScalarSizeInBits());
+ Value *RuntimeVF = getRuntimeVF(B, IntTy, VF);
+ return B.CreateUIToFP(RuntimeVF, FTy);
+}
+
void reportVectorizationFailure(const StringRef DebugMsg,
const StringRef OREMsg, const StringRef ORETag,
OptimizationRemarkEmitter *ORE, Loop *TheLoop,
@@ -1319,8 +1323,7 @@ public:
/// the IsOrdered flag of RdxDesc is set and we do not allow reordering
/// of FP operations.
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) {
- return EnableStrictReductions && !Hints->allowReordering() &&
- RdxDesc.isOrdered();
+ return !Hints->allowReordering() && RdxDesc.isOrdered();
}
/// \returns The smallest bitwidth each instruction can be represented with.
@@ -1495,14 +1498,14 @@ public:
/// Returns true if the target machine supports masked store operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedStore(Type *DataType, Value *Ptr, Align Alignment) const {
- return Legal->isConsecutivePtr(Ptr) &&
+ return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedStore(DataType, Alignment);
}
/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedLoad(Type *DataType, Value *Ptr, Align Alignment) const {
- return Legal->isConsecutivePtr(Ptr) &&
+ return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedLoad(DataType, Alignment);
}
@@ -1539,7 +1542,7 @@ public:
// through scalar predication or masked load/store or masked gather/scatter.
// Superset of instructions that return true for isScalarWithPredication.
bool isPredicatedInst(Instruction *I) {
- if (!blockNeedsPredication(I->getParent()))
+ if (!blockNeedsPredicationForAnyReason(I->getParent()))
return false;
// Loads and stores that need some form of masked operation are predicated
// instructions.
@@ -1593,7 +1596,10 @@ public:
/// Returns true if all loop blocks should be masked to fold tail loop.
bool foldTailByMasking() const { return FoldTailByMasking; }
- bool blockNeedsPredication(BasicBlock *BB) const {
+ /// Returns true if the instructions in this block requires predication
+ /// for any reason, e.g. because tail folding now requires a predicate
+ /// or because the block in the original loop was predicated.
+ bool blockNeedsPredicationForAnyReason(BasicBlock *BB) const {
return foldTailByMasking() || Legal->blockNeedsPredication(BB);
}
@@ -1928,7 +1934,7 @@ class GeneratedRTChecks {
/// The value representing the result of the generated memory runtime checks.
/// If it is nullptr, either no memory runtime checks have been generated or
/// they have been used.
- Instruction *MemRuntimeCheckCond = nullptr;
+ Value *MemRuntimeCheckCond = nullptr;
DominatorTree *DT;
LoopInfo *LI;
@@ -1971,7 +1977,7 @@ public:
MemCheckBlock = SplitBlock(Pred, Pred->getTerminator(), DT, LI, nullptr,
"vector.memcheck");
- std::tie(std::ignore, MemRuntimeCheckCond) =
+ MemRuntimeCheckCond =
addRuntimeChecks(MemCheckBlock->getTerminator(), L,
RtPtrChecking.getChecks(), MemCheckExp);
assert(MemRuntimeCheckCond &&
@@ -2030,7 +2036,6 @@ public:
if (MemCheckExp.isInsertedInstruction(&I))
continue;
SE.forgetValue(&I);
- SE.eraseValueFromMap(&I);
I.eraseFromParent();
}
}
@@ -2289,9 +2294,11 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
Step = Builder.CreateTrunc(Step, TruncType);
Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
}
+
+ Value *Zero = getSignedIntOrFpConstant(Start->getType(), 0);
Value *SplatStart = Builder.CreateVectorSplat(VF, Start);
Value *SteppedStart =
- getStepVector(SplatStart, 0, Step, II.getInductionOpcode());
+ getStepVector(SplatStart, Zero, Step, II.getInductionOpcode());
// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
@@ -2308,12 +2315,11 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
// Multiply the vectorization factor by the step using integer or
// floating-point arithmetic as appropriate.
Type *StepType = Step->getType();
+ Value *RuntimeVF;
if (Step->getType()->isFloatingPointTy())
- StepType = IntegerType::get(StepType->getContext(),
- StepType->getScalarSizeInBits());
- Value *RuntimeVF = getRuntimeVF(Builder, StepType, VF);
- if (Step->getType()->isFloatingPointTy())
- RuntimeVF = Builder.CreateSIToFP(RuntimeVF, Step->getType());
+ RuntimeVF = getRuntimeVFAsFloat(Builder, StepType, VF);
+ else
+ RuntimeVF = getRuntimeVF(Builder, StepType, VF);
Value *Mul = Builder.CreateBinOp(MulOp, Step, RuntimeVF);
// Create a vector splat to use in the induction update.
@@ -2388,9 +2394,13 @@ void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
if (isa<TruncInst>(EntryVal))
return;
- const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
- if (Casts.empty())
+ if (!CastDef) {
+ assert(ID.getCastInsts().empty() &&
+ "there are casts for ID, but no CastDef");
return;
+ }
+ assert(!ID.getCastInsts().empty() &&
+ "there is a CastDef, but no casts for ID");
// Only the first Cast instruction in the Casts vector is of interest.
// The rest of the Casts (if exist) have no uses outside the
// induction update chain itself.
@@ -2462,9 +2472,14 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
Value *Broadcasted = getBroadcastInstrs(ScalarIV);
for (unsigned Part = 0; Part < UF; ++Part) {
assert(!VF.isScalable() && "scalable vectors not yet supported.");
+ Value *StartIdx;
+ if (Step->getType()->isFloatingPointTy())
+ StartIdx = getRuntimeVFAsFloat(Builder, Step->getType(), VF * Part);
+ else
+ StartIdx = getRuntimeVF(Builder, Step->getType(), VF * Part);
+
Value *EntryPart =
- getStepVector(Broadcasted, VF.getKnownMinValue() * Part, Step,
- ID.getInductionOpcode());
+ getStepVector(Broadcasted, StartIdx, Step, ID.getInductionOpcode());
State.set(Def, EntryPart, Part);
if (Trunc)
addMetadata(EntryPart, Trunc);
@@ -2520,7 +2535,8 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
buildScalarSteps(ScalarIV, Step, EntryVal, ID, Def, CastDef, State);
}
-Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
+Value *InnerLoopVectorizer::getStepVector(Value *Val, Value *StartIdx,
+ Value *Step,
Instruction::BinaryOps BinOp) {
// Create and check the types.
auto *ValVTy = cast<VectorType>(Val->getType());
@@ -2543,12 +2559,11 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);
- // Add on StartIdx
- Value *StartIdxSplat = Builder.CreateVectorSplat(
- VLen, ConstantInt::get(InitVecValSTy, StartIdx));
- InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
+ // Splat the StartIdx
+ Value *StartIdxSplat = Builder.CreateVectorSplat(VLen, StartIdx);
if (STy->isIntegerTy()) {
+ InitVec = Builder.CreateAdd(InitVec, StartIdxSplat);
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
@@ -2561,6 +2576,8 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
"Binary Opcode should be specified for FP induction");
InitVec = Builder.CreateUIToFP(InitVec, ValVTy);
+ InitVec = Builder.CreateFAdd(InitVec, StartIdxSplat);
+
Step = Builder.CreateVectorSplat(VLen, Step);
Value *MulOp = Builder.CreateFMul(InitVec, Step);
return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
@@ -2609,8 +2626,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
}
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *StartIdx0 =
- createStepForVF(Builder, ConstantInt::get(IntStepTy, Part), VF);
+ Value *StartIdx0 = createStepForVF(Builder, IntStepTy, VF, Part);
if (!IsUniform && VF.isScalable()) {
auto *SplatStartIdx = Builder.CreateVectorSplat(VF, StartIdx0);
@@ -2838,12 +2854,25 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
auto *SubVT = VectorType::get(ScalarTy, VF);
// Vectorize the interleaved store group.
+ MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
+ assert((!MaskForGaps || useMaskedInterleavedAccesses(*TTI)) &&
+ "masked interleaved groups are not allowed.");
+ assert((!MaskForGaps || !VF.isScalable()) &&
+ "masking gaps for scalable vectors is not yet supported.");
for (unsigned Part = 0; Part < UF; Part++) {
// Collect the stored vector from each member.
SmallVector<Value *, 4> StoredVecs;
for (unsigned i = 0; i < InterleaveFactor; i++) {
- // Interleaved store group doesn't allow a gap, so each index has a member
- assert(Group->getMember(i) && "Fail to get a member from an interleaved store group");
+ assert((Group->getMember(i) || MaskForGaps) &&
+ "Fail to get a member from an interleaved store group");
+ Instruction *Member = Group->getMember(i);
+
+ // Skip the gaps in the group.
+ if (!Member) {
+ Value *Undef = PoisonValue::get(SubVT);
+ StoredVecs.push_back(Undef);
+ continue;
+ }
Value *StoredVec = State.get(StoredValues[i], Part);
@@ -2867,16 +2896,21 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
"interleaved.vec");
Instruction *NewStoreInstr;
- if (BlockInMask) {
- Value *BlockInMaskPart = State.get(BlockInMask, Part);
- Value *ShuffledMask = Builder.CreateShuffleVector(
- BlockInMaskPart,
- createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
- "interleaved.mask");
- NewStoreInstr = Builder.CreateMaskedStore(
- IVec, AddrParts[Part], Group->getAlign(), ShuffledMask);
- }
- else
+ if (BlockInMask || MaskForGaps) {
+ Value *GroupMask = MaskForGaps;
+ if (BlockInMask) {
+ Value *BlockInMaskPart = State.get(BlockInMask, Part);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ BlockInMaskPart,
+ createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
+ "interleaved.mask");
+ GroupMask = MaskForGaps ? Builder.CreateBinOp(Instruction::And,
+ ShuffledMask, MaskForGaps)
+ : ShuffledMask;
+ }
+ NewStoreInstr = Builder.CreateMaskedStore(IVec, AddrParts[Part],
+ Group->getAlign(), GroupMask);
+ } else
NewStoreInstr =
Builder.CreateAlignedStore(IVec, AddrParts[Part], Group->getAlign());
@@ -2886,7 +2920,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
void InnerLoopVectorizer::vectorizeMemoryInstruction(
Instruction *Instr, VPTransformState &State, VPValue *Def, VPValue *Addr,
- VPValue *StoredValue, VPValue *BlockInMask) {
+ VPValue *StoredValue, VPValue *BlockInMask, bool ConsecutiveStride,
+ bool Reverse) {
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(Instr);
StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2895,31 +2930,11 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(
assert((!SI || StoredValue) && "No stored value provided for widened store");
assert((!LI || !StoredValue) && "Stored value provided for widened load");
- LoopVectorizationCostModel::InstWidening Decision =
- Cost->getWideningDecision(Instr, VF);
- assert((Decision == LoopVectorizationCostModel::CM_Widen ||
- Decision == LoopVectorizationCostModel::CM_Widen_Reverse ||
- Decision == LoopVectorizationCostModel::CM_GatherScatter) &&
- "CM decision is not to widen the memory instruction");
-
Type *ScalarDataTy = getLoadStoreType(Instr);
auto *DataTy = VectorType::get(ScalarDataTy, VF);
const Align Alignment = getLoadStoreAlignment(Instr);
-
- // Determine if the pointer operand of the access is either consecutive or
- // reverse consecutive.
- bool Reverse = (Decision == LoopVectorizationCostModel::CM_Widen_Reverse);
- bool ConsecutiveStride =
- Reverse || (Decision == LoopVectorizationCostModel::CM_Widen);
- bool CreateGatherScatter =
- (Decision == LoopVectorizationCostModel::CM_GatherScatter);
-
- // Either Ptr feeds a vector load/store, or a vector GEP should feed a vector
- // gather/scatter. Otherwise Decision should have been to Scalarize.
- assert((ConsecutiveStride || CreateGatherScatter) &&
- "The instruction should be scalarized");
- (void)ConsecutiveStride;
+ bool CreateGatherScatter = !ConsecutiveStride;
VectorParts BlockInMaskParts(UF);
bool isMaskRequired = BlockInMask;
@@ -2953,7 +2968,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
BlockInMaskParts[Part] = reverseVector(BlockInMaskParts[Part]);
} else {
- Value *Increment = createStepForVF(Builder, Builder.getInt32(Part), VF);
+ Value *Increment =
+ createStepForVF(Builder, Builder.getInt32Ty(), VF, Part);
PartPtr = cast<GetElementPtrInst>(
Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
PartPtr->setIsInBounds(InBounds);
@@ -3172,7 +3188,7 @@ Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
Type *Ty = TC->getType();
// This is where we can make the step a runtime constant.
- Value *Step = createStepForVF(Builder, ConstantInt::get(Ty, UF), VF);
+ Value *Step = createStepForVF(Builder, Ty, VF, UF);
// If the tail is to be folded by masking, round the number of iterations N
// up to a multiple of Step instead of rounding down. This is done by first
@@ -3262,8 +3278,7 @@ void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
// If tail is to be folded, vector loop takes care of all iterations.
Value *CheckMinIters = Builder.getFalse();
if (!Cost->foldTailByMasking()) {
- Value *Step =
- createStepForVF(Builder, ConstantInt::get(Count->getType(), UF), VF);
+ Value *Step = createStepForVF(Builder, Count->getType(), VF, UF);
CheckMinIters = Builder.CreateICmp(P, Count, Step, "min.iters.check");
}
// Create new preheader for vector loop.
@@ -3433,7 +3448,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
assert(isa<SCEVConstant>(Step) &&
"Expected constant step for pointer induction");
return B.CreateGEP(
- StartValue->getType()->getPointerElementType(), StartValue,
+ ID.getElementType(), StartValue,
CreateMul(Index,
Exp.expandCodeFor(Step, Index->getType()->getScalarType(),
GetInsertPoint())));
@@ -3739,7 +3754,7 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// The loop step is equal to the vectorization factor (num of SIMD elements)
// times the unroll factor (num of SIMD instructions).
Builder.SetInsertPoint(&*Lp->getHeader()->getFirstInsertionPt());
- Value *Step = createStepForVF(Builder, ConstantInt::get(IdxTy, UF), VF);
+ Value *Step = createStepForVF(Builder, IdxTy, VF, UF);
Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
Induction =
createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
@@ -3857,21 +3872,19 @@ struct CSEDenseMapInfo {
static void cse(BasicBlock *BB) {
// Perform simple cse.
SmallDenseMap<Instruction *, Instruction *, 4, CSEDenseMapInfo> CSEMap;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
- Instruction *In = &*I++;
-
- if (!CSEDenseMapInfo::canHandle(In))
+ for (Instruction &In : llvm::make_early_inc_range(*BB)) {
+ if (!CSEDenseMapInfo::canHandle(&In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
- if (Instruction *V = CSEMap.lookup(In)) {
- In->replaceAllUsesWith(V);
- In->eraseFromParent();
+ if (Instruction *V = CSEMap.lookup(&In)) {
+ In.replaceAllUsesWith(V);
+ In.eraseFromParent();
continue;
}
- CSEMap[In] = In;
+ CSEMap[&In] = &In;
}
}
@@ -3881,7 +3894,7 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
Function *F = CI->getCalledFunction();
Type *ScalarRetTy = CI->getType();
SmallVector<Type *, 4> Tys, ScalarTys;
- for (auto &ArgOp : CI->arg_operands())
+ for (auto &ArgOp : CI->args())
ScalarTys.push_back(ArgOp->getType());
// Estimate cost of scalarized vector call. The source operands are assumed
@@ -3940,7 +3953,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
if (auto *FPMO = dyn_cast<FPMathOperator>(CI))
FMF = FPMO->getFastMathFlags();
- SmallVector<const Value *> Arguments(CI->arg_begin(), CI->arg_end());
+ SmallVector<const Value *> Arguments(CI->args());
FunctionType *FTy = CI->getCalledFunction()->getFunctionType();
SmallVector<Type *> ParamTys;
std::transform(FTy->param_begin(), FTy->param_end(),
@@ -3974,7 +3987,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from State indicates that it
// wasn't vectorized.
- VPValue *Def = State.Plan->getVPValue(KV.first);
+ // FIXME: Should not rely on getVPValue at this point.
+ VPValue *Def = State.Plan->getVPValue(KV.first, true);
if (!State.hasAnyVectorValue(Def))
continue;
for (unsigned Part = 0; Part < UF; ++Part) {
@@ -4081,7 +4095,8 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths(VPTransformState &State) {
// If the value wasn't vectorized, we must maintain the original scalar
// type. The absence of the value from State indicates that it
// wasn't vectorized.
- VPValue *Def = State.Plan->getVPValue(KV.first);
+ // FIXME: Should not rely on getVPValue at this point.
+ VPValue *Def = State.Plan->getVPValue(KV.first, true);
if (!State.hasAnyVectorValue(Def))
continue;
for (unsigned Part = 0; Part < UF; ++Part) {
@@ -4222,17 +4237,12 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR,
// After execution completes the vector loop, we extract the next value of
// the recurrence (x) to use as the initial value in the scalar loop.
- auto *IdxTy = Builder.getInt32Ty();
- auto *VecPhi = cast<PHINode>(State.get(PhiR, 0));
-
- // Fix the latch value of the new recurrence in the vector loop.
- VPValue *PreviousDef = PhiR->getBackedgeValue();
- Value *Incoming = State.get(PreviousDef, UF - 1);
- VecPhi->addIncoming(Incoming, LI->getLoopFor(LoopVectorBody)->getLoopLatch());
-
// Extract the last vector element in the middle block. This will be the
// initial value for the recurrence when jumping to the scalar loop.
+ VPValue *PreviousDef = PhiR->getBackedgeValue();
+ Value *Incoming = State.get(PreviousDef, UF - 1);
auto *ExtractForScalar = Incoming;
+ auto *IdxTy = Builder.getInt32Ty();
if (VF.isVector()) {
auto *One = ConstantInt::get(IdxTy, 1);
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
@@ -4283,8 +4293,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(VPWidenPHIRecipe *PhiR,
// and thus no phis which needed updated.
if (!Cost->requiresScalarEpilogue(VF))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (any_of(LCSSAPhi.incoming_values(),
- [Phi](Value *V) { return V == Phi; }))
+ if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi))
LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
}
@@ -4301,29 +4310,13 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
setDebugLocFromInst(ReductionStartValue);
- VPValue *LoopExitInstDef = State.Plan->getVPValue(LoopExitInst);
+ VPValue *LoopExitInstDef = PhiR->getBackedgeValue();
// This is the vector-clone of the value that leaves the loop.
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
// Wrap flags are in general invalid after vectorization, clear them.
clearReductionWrapFlags(RdxDesc, State);
- // Fix the vector-loop phi.
-
- // Reductions do not have to start at zero. They can start with
- // any loop invariant values.
- BasicBlock *VectorLoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
-
- unsigned LastPartForNewPhi = PhiR->isOrdered() ? 1 : UF;
- for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
- Value *VecRdxPhi = State.get(PhiR->getVPSingleValue(), Part);
- Value *Val = State.get(PhiR->getBackedgeValue(), Part);
- if (PhiR->isOrdered())
- Val = State.get(PhiR->getBackedgeValue(), UF - 1);
-
- cast<PHINode>(VecRdxPhi)->addIncoming(Val, VectorLoopLatch);
- }
-
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
@@ -4361,7 +4354,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
RdxDesc.getOpcode(), PhiTy,
TargetTransformInfo::ReductionFlags())) {
auto *VecRdxPhi =
- cast<PHINode>(State.get(PhiR->getVPSingleValue(), Part));
+ cast<PHINode>(State.get(PhiR, Part));
VecRdxPhi->setIncomingValueForBlock(
LI->getLoopFor(LoopVectorBody)->getLoopLatch(), Sel);
}
@@ -4382,13 +4375,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
: Builder.CreateZExt(Trunc, VecTy);
- for (Value::user_iterator UI = RdxParts[Part]->user_begin();
- UI != RdxParts[Part]->user_end();)
- if (*UI != Trunc) {
- (*UI++)->replaceUsesOfWith(RdxParts[Part], Extnd);
+ for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users()))
+ if (U != Trunc) {
+ U->replaceUsesOfWith(RdxParts[Part], Extnd);
RdxParts[Part] = Extnd;
- } else {
- ++UI;
}
}
Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
@@ -4421,9 +4411,11 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
ReducedPartRdx = Builder.CreateBinOp(
(Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
- } else {
+ } else if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
+ ReducedPartRdx = createSelectCmpOp(Builder, ReductionStartValue, RK,
+ ReducedPartRdx, RdxPart);
+ else
ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
- }
}
}
@@ -4431,7 +4423,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// target reduction in the loop using a Reduction recipe.
if (VF.isVector() && !PhiR->isInLoop()) {
ReducedPartRdx =
- createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx);
+ createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi);
// If the reduction can be performed in a smaller type, we need to extend
// the reduction to the wider type before we branch to the original loop.
if (PhiTy != RdxDesc.getRecurrenceType())
@@ -4456,8 +4448,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// fixFirstOrderRecurrence for a more complete explaination of the logic.
if (!Cost->requiresScalarEpilogue(VF))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (any_of(LCSSAPhi.incoming_values(),
- [LoopExitInst](Value *V) { return V == LoopExitInst; }))
+ if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst))
LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
// Fix the scalar loop reduction variable with the incoming reduction sum
@@ -4488,7 +4479,8 @@ void InnerLoopVectorizer::clearReductionWrapFlags(const RecurrenceDescriptor &Rd
Instruction *Cur = Worklist.pop_back_val();
if (isa<OverflowingBinaryOperator>(Cur))
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *V = State.get(State.Plan->getVPValue(Cur), Part);
+ // FIXME: Should not rely on getVPValue at this point.
+ Value *V = State.get(State.Plan->getVPValue(Cur, true), Part);
cast<Instruction>(V)->dropPoisonGeneratingFlags();
}
@@ -4519,11 +4511,12 @@ void InnerLoopVectorizer::fixLCSSAPHIs(VPTransformState &State) {
// Can be a loop invariant incoming value or the last scalar value to be
// extracted from the vectorized loop.
+ // FIXME: Should not rely on getVPValue at this point.
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
Value *lastIncomingValue =
OrigLoop->isLoopInvariant(IncomingValue)
? IncomingValue
- : State.get(State.Plan->getVPValue(IncomingValue),
+ : State.get(State.Plan->getVPValue(IncomingValue, true),
VPIteration(UF - 1, Lane));
LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock);
}
@@ -4763,10 +4756,18 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
}
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *PartStart = createStepForVF(
- Builder, ConstantInt::get(PtrInd->getType(), Part), VF);
+ Value *PartStart =
+ createStepForVF(Builder, PtrInd->getType(), VF, Part);
if (NeedsVectorIndex) {
+ // Here we cache the whole vector, which means we can support the
+ // extraction of any lane. However, in some cases the extractelement
+ // instruction that is generated for scalar uses of this vector (e.g.
+ // a load instruction) is not folded away. Therefore we still
+ // calculate values for the first n lanes to avoid redundant moves
+ // (when extracting the 0th element) and to produce scalar code (i.e.
+ // additional add/gep instructions instead of expensive extractelement
+ // instructions) when extracting higher-order elements.
Value *PartStartSplat = Builder.CreateVectorSplat(VF, PartStart);
Value *Indices = Builder.CreateAdd(PartStartSplat, UnitStepVec);
Value *GlobalIndices = Builder.CreateAdd(PtrIndSplat, Indices);
@@ -4774,9 +4775,6 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
emitTransformedIndex(Builder, GlobalIndices, PSE.getSE(), DL, II);
SclrGep->setName("next.gep");
State.set(PhiR, SclrGep, Part);
- // We've cached the whole vector, which means we can support the
- // extraction of any lane.
- continue;
}
for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
@@ -4813,7 +4811,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *NumUnrolledElems =
Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF));
Value *InductionGEP = GetElementPtrInst::Create(
- ScStValueType->getPointerElementType(), NewPointerPhi,
+ II.getElementType(), NewPointerPhi,
Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
InductionLoc);
NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
@@ -4832,7 +4830,7 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Builder.CreateAdd(StartOffset, Builder.CreateStepVector(VecPhiType));
Value *GEP = Builder.CreateGEP(
- ScStValueType->getPointerElementType(), NewPointerPhi,
+ II.getElementType(), NewPointerPhi,
Builder.CreateMul(
StartOffset, Builder.CreateVectorSplat(State.VF, ScalarStepValue),
"vector.gep"));
@@ -4979,7 +4977,7 @@ void InnerLoopVectorizer::widenCallInstruction(CallInst &I, VPValue *Def,
auto *CI = cast<CallInst>(&I);
SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI->arg_operands())
+ for (Value *ArgOperand : CI->args())
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF.getKnownMinValue()));
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
@@ -5128,8 +5126,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
Instruction *Update = cast<Instruction>(
cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
- ScalarPtrs.insert(Update);
- return;
+
+ // If there is more than one user of Update (Ptr), we shouldn't assume it
+ // will be scalar after vectorisation as other users of the instruction
+ // may require widening. Otherwise, add it to ScalarPtrs.
+ if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
+ ScalarPtrs.insert(Update);
+ return;
+ }
}
// We only care about bitcast and getelementptr instructions contained in
// the loop.
@@ -5142,12 +5146,11 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
if (Worklist.count(I))
return;
- // If all users of the pointer will be memory accesses and scalar, place the
- // pointer in ScalarPtrs. Otherwise, place the pointer in
- // PossibleNonScalarPtrs.
- if (llvm::all_of(I->users(), [&](User *U) {
- return (isa<LoadInst>(U) || isa<StoreInst>(U)) &&
- isScalarUse(cast<Instruction>(U), Ptr);
+ // If the use of the pointer will be a scalar use, and all users of the
+ // pointer are memory accesses, place the pointer in ScalarPtrs. Otherwise,
+ // place the pointer in PossibleNonScalarPtrs.
+ if (isScalarUse(MemAccess, Ptr) && llvm::all_of(I->users(), [&](User *U) {
+ return isa<LoadInst>(U) || isa<StoreInst>(U);
}))
ScalarPtrs.insert(I);
else
@@ -5254,7 +5257,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
}
bool LoopVectorizationCostModel::isScalarWithPredication(Instruction *I) const {
- if (!blockNeedsPredication(I->getParent()))
+ if (!blockNeedsPredicationForAnyReason(I->getParent()))
return false;
switch(I->getOpcode()) {
default:
@@ -5297,12 +5300,20 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// Check if masking is required.
// A Group may need masking for one of two reasons: it resides in a block that
- // needs predication, or it was decided to use masking to deal with gaps.
+ // needs predication, or it was decided to use masking to deal with gaps
+ // (either a gap at the end of a load-access that may result in a speculative
+ // load, or any gaps in a store-access).
bool PredicatedAccessRequiresMasking =
- Legal->blockNeedsPredication(I->getParent()) && Legal->isMaskRequired(I);
- bool AccessWithGapsRequiresMasking =
- Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
- if (!PredicatedAccessRequiresMasking && !AccessWithGapsRequiresMasking)
+ blockNeedsPredicationForAnyReason(I->getParent()) &&
+ Legal->isMaskRequired(I);
+ bool LoadAccessWithGapsRequiresEpilogMasking =
+ isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
+ !isScalarEpilogueAllowed();
+ bool StoreAccessWithGapsRequiresMasking =
+ isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor());
+ if (!PredicatedAccessRequiresMasking &&
+ !LoadAccessWithGapsRequiresEpilogMasking &&
+ !StoreAccessWithGapsRequiresMasking)
return true;
// If masked interleaving is required, we expect that the user/target had
@@ -5311,6 +5322,9 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
assert(useMaskedInterleavedAccesses(TTI) &&
"Masked interleave-groups for predicated accesses are not enabled.");
+ if (Group->isReverse())
+ return false;
+
auto *Ty = getLoadStoreType(I);
const Align Alignment = getLoadStoreAlignment(I);
return isa<LoadInst>(I) ? TTI.isLegalMaskedLoad(Ty, Alignment)
@@ -5320,14 +5334,13 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
Instruction *I, ElementCount VF) {
// Get and ensure we have a valid memory instruction.
- LoadInst *LI = dyn_cast<LoadInst>(I);
- StoreInst *SI = dyn_cast<StoreInst>(I);
- assert((LI || SI) && "Invalid memory instruction");
+ assert((isa<LoadInst, StoreInst>(I)) && "Invalid memory instruction");
auto *Ptr = getLoadStorePointerOperand(I);
+ auto *ScalarTy = getLoadStoreType(I);
// In order to be widened, the pointer should be consecutive, first of all.
- if (!Legal->isConsecutivePtr(Ptr))
+ if (!Legal->isConsecutivePtr(ScalarTy, Ptr))
return false;
// If the instruction is a store located in a predicated block, it will be
@@ -5338,7 +5351,6 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
// If the instruction's allocated size doesn't equal it's type size, it
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
- auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
if (hasIrregularType(ScalarTy, DL))
return false;
@@ -5369,12 +5381,14 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
return (!I || !TheLoop->contains(I));
};
+ // Worklist containing uniform instructions demanding lane 0.
SetVector<Instruction *> Worklist;
BasicBlock *Latch = TheLoop->getLoopLatch();
- // Instructions that are scalar with predication must not be considered
- // uniform after vectorization, because that would create an erroneous
- // replicating region where only a single instance out of VF should be formed.
+ // Add uniform instructions demanding lane 0 to the worklist. Instructions
+ // that are scalar with predication must not be considered uniform after
+ // vectorization, because that would create an erroneous replicating region
+ // where only a single instance out of VF should be formed.
// TODO: optimize such seldom cases if found important, see PR40816.
auto addToWorklistIfAllowed = [&](Instruction *I) -> void {
if (isOutOfScope(I)) {
@@ -5433,6 +5447,30 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// lane 0 demanded or b) are uses which demand only lane 0 of their operand.
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sideeffect:
+ case Intrinsic::experimental_noalias_scope_decl:
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ if (TheLoop->hasLoopInvariantOperands(&I))
+ addToWorklistIfAllowed(&I);
+ break;
+ default:
+ break;
+ }
+ }
+
+ // ExtractValue instructions must be uniform, because the operands are
+ // known to be loop-invariant.
+ if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
+ assert(isOutOfScope(EVI->getAggregateOperand()) &&
+ "Expected aggregate value to be loop invariant");
+ addToWorklistIfAllowed(EVI);
+ continue;
+ }
+
// If there's no pointer operand, there's nothing to do.
auto *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
@@ -5565,13 +5603,8 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
ElementCount
LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
- if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {
- reportVectorizationInfo(
- "Disabling scalable vectorization, because target does not "
- "support scalable vectors.",
- "ScalableVectorsUnsupported", ORE, TheLoop);
+ if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors)
return ElementCount::getScalable(0);
- }
if (Hints->isScalableVectorizationDisabled()) {
reportVectorizationInfo("Scalable vectorization is explicitly disabled",
@@ -5579,6 +5612,8 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
return ElementCount::getScalable(0);
}
+ LLVM_DEBUG(dbgs() << "LV: Scalable vectorization is available\n");
+
auto MaxScalableVF = ElementCount::getScalable(
std::numeric_limits<ElementCount::ScalarTy>::max());
@@ -5614,6 +5649,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+ if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ if (VScaleMax > 0)
+ MaxVScale = VScaleMax;
+ }
MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
if (!MaxScalableVF)
@@ -5681,17 +5723,32 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
return MaxSafeFixedVF;
}
- LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
- << " is unsafe. Ignoring scalable UserVF.\n");
- ORE->emit([&]() {
- return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
- TheLoop->getStartLoc(),
- TheLoop->getHeader())
- << "User-specified vectorization factor "
- << ore::NV("UserVectorizationFactor", UserVF)
- << " is unsafe. Ignoring the hint to let the compiler pick a "
- "suitable VF.";
- });
+ if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {
+ LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
+ << " is ignored because scalable vectors are not "
+ "available.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "User-specified vectorization factor "
+ << ore::NV("UserVectorizationFactor", UserVF)
+ << " is ignored because the target does not support scalable "
+ "vectors. The compiler will pick a more suitable value.";
+ });
+ } else {
+ LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
+ << " is unsafe. Ignoring scalable UserVF.\n");
+ ORE->emit([&]() {
+ return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
+ TheLoop->getStartLoc(),
+ TheLoop->getHeader())
+ << "User-specified vectorization factor "
+ << ore::NV("UserVectorizationFactor", UserVF)
+ << " is unsafe. Ignoring the hint to let the compiler pick a "
+ "more suitable value.";
+ });
+ }
}
LLVM_DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType
@@ -5972,19 +6029,27 @@ bool LoopVectorizationCostModel::isMoreProfitable(
return RTCostA < RTCostB;
}
- // When set to preferred, for now assume vscale may be larger than 1, so
- // that scalable vectorization is slightly favorable over fixed-width
- // vectorization.
+ // Improve estimate for the vector width if it is scalable.
+ unsigned EstimatedWidthA = A.Width.getKnownMinValue();
+ unsigned EstimatedWidthB = B.Width.getKnownMinValue();
+ if (Optional<unsigned> VScale = TTI.getVScaleForTuning()) {
+ if (A.Width.isScalable())
+ EstimatedWidthA *= VScale.getValue();
+ if (B.Width.isScalable())
+ EstimatedWidthB *= VScale.getValue();
+ }
+
+ // When set to preferred, for now assume vscale may be larger than 1 (or the
+ // one being tuned for), so that scalable vectorization is slightly favorable
+ // over fixed-width vectorization.
if (Hints->isScalableVectorizationPreferred())
if (A.Width.isScalable() && !B.Width.isScalable())
- return (CostA * B.Width.getKnownMinValue()) <=
- (CostB * A.Width.getKnownMinValue());
+ return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
// To avoid the need for FP division:
// (CostA / A.Width) < (CostB / B.Width)
// <=> (CostA * B.Width) < (CostB * A.Width)
- return (CostA * B.Width.getKnownMinValue()) <
- (CostB * A.Width.getKnownMinValue());
+ return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA);
}
VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
@@ -6014,11 +6079,22 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
VectorizationCostTy C = expectedCost(i, &InvalidCosts);
VectorizationFactor Candidate(i, C.first);
- LLVM_DEBUG(
- dbgs() << "LV: Vector loop of width " << i << " costs: "
- << (Candidate.Cost / Candidate.Width.getKnownMinValue())
- << (i.isScalable() ? " (assuming a minimum vscale of 1)" : "")
- << ".\n");
+
+#ifndef NDEBUG
+ unsigned AssumedMinimumVscale = 1;
+ if (Optional<unsigned> VScale = TTI.getVScaleForTuning())
+ AssumedMinimumVscale = VScale.getValue();
+ unsigned Width =
+ Candidate.Width.isScalable()
+ ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale
+ : Candidate.Width.getFixedValue();
+ LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i
+ << " costs: " << (Candidate.Cost / Width));
+ if (i.isScalable())
+ LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
+ << AssumedMinimumVscale << ")");
+ LLVM_DEBUG(dbgs() << ".\n");
+#endif
if (!C.second && !ForceVectorization) {
LLVM_DEBUG(
@@ -6182,15 +6258,6 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
- // FIXME: This can be fixed for scalable vectors later, because at this stage
- // the LoopVectorizer will only consider vectorizing a loop with scalable
- // vectors when the loop has a hint to enable vectorization for a given VF.
- if (MainLoopVF.isScalable()) {
- LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization for scalable vectors not "
- "yet supported.\n");
- return Result;
- }
-
// Not really a cost consideration, but check for unsupported cases here to
// simplify the logic.
if (!isCandidateForEpilogueVectorization(*TheLoop, MainLoopVF)) {
@@ -6202,9 +6269,9 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
if (EpilogueVectorizationForceVF > 1) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
- if (LVP.hasPlanWithVFs(
- {MainLoopVF, ElementCount::getFixed(EpilogueVectorizationForceVF)}))
- return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0};
+ ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
+ if (LVP.hasPlanWithVF(ForcedEC))
+ return {ForcedEC, 0};
else {
LLVM_DEBUG(
dbgs()
@@ -6221,14 +6288,24 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
return Result;
}
- if (!isEpilogueVectorizationProfitable(MainLoopVF))
+ auto FixedMainLoopVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
+ if (MainLoopVF.isScalable())
+ LLVM_DEBUG(
+ dbgs() << "LEV: Epilogue vectorization using scalable vectors not "
+ "yet supported. Converting to fixed-width (VF="
+ << FixedMainLoopVF << ") instead\n");
+
+ if (!isEpilogueVectorizationProfitable(FixedMainLoopVF)) {
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
+ "this loop\n");
return Result;
+ }
for (auto &NextVF : ProfitableVFs)
- if (ElementCount::isKnownLT(NextVF.Width, MainLoopVF) &&
+ if (ElementCount::isKnownLT(NextVF.Width, FixedMainLoopVF) &&
(Result.Width.getFixedValue() == 1 ||
isMoreProfitable(NextVF, Result)) &&
- LVP.hasPlanWithVFs({MainLoopVF, NextVF.Width}))
+ LVP.hasPlanWithVF(NextVF.Width))
Result = NextVF;
if (Result != VectorizationFactor::Disabled())
@@ -6471,6 +6548,22 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
unsigned StoresIC = IC / (NumStores ? NumStores : 1);
unsigned LoadsIC = IC / (NumLoads ? NumLoads : 1);
+ // There is little point in interleaving for reductions containing selects
+ // and compares when VF=1 since it may just create more overhead than it's
+ // worth for loops with small trip counts. This is because we still have to
+ // do the final reduction after the loop.
+ bool HasSelectCmpReductions =
+ HasReductions &&
+ any_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ return RecurrenceDescriptor::isSelectCmpRecurrenceKind(
+ RdxDesc.getRecurrenceKind());
+ });
+ if (HasSelectCmpReductions) {
+ LLVM_DEBUG(dbgs() << "LV: Not interleaving select-cmp reductions.\n");
+ return 1;
+ }
+
// If we have a scalar reduction (vector reductions are already dealt with
// by this point), we can increase the critical path length if the loop
// we're interleaving is inside another loop. For tree-wise reductions
@@ -6756,7 +6849,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// determine if it would be better to not if-convert the blocks they are in.
// If so, we also record the instructions to scalarize.
for (BasicBlock *BB : TheLoop->blocks()) {
- if (!blockNeedsPredication(BB))
+ if (!blockNeedsPredicationForAnyReason(BB))
continue;
for (Instruction &I : *BB)
if (isScalarWithPredication(&I)) {
@@ -6851,7 +6944,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(I->getType(), VF)),
- APInt::getAllOnesValue(VF.getFixedValue()), true, false);
+ APInt::getAllOnes(VF.getFixedValue()), true, false);
ScalarCost +=
VF.getFixedValue() *
TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput);
@@ -6870,7 +6963,7 @@ int LoopVectorizationCostModel::computePredInstDiscount(
else if (needsExtract(J, VF)) {
ScalarCost += TTI.getScalarizationOverhead(
cast<VectorType>(ToVectorTy(J->getType(), VF)),
- APInt::getAllOnesValue(VF.getFixedValue()), false, true);
+ APInt::getAllOnes(VF.getFixedValue()), false, true);
}
}
@@ -7016,7 +7109,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
auto *Vec_i1Ty =
VectorType::get(IntegerType::getInt1Ty(ValTy->getContext()), VF);
Cost += TTI.getScalarizationOverhead(
- Vec_i1Ty, APInt::getAllOnesValue(VF.getKnownMinValue()),
+ Vec_i1Ty, APInt::getAllOnes(VF.getKnownMinValue()),
/*Insert=*/false, /*Extract=*/true);
Cost += TTI.getCFInstrCost(Instruction::Br, TTI::TCK_RecipThroughput);
@@ -7036,7 +7129,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);
- int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+ int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
@@ -7117,18 +7210,16 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
unsigned InterleaveFactor = Group->getFactor();
auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);
- // Holds the indices of existing members in an interleaved load group.
- // An interleaved store group doesn't need this as it doesn't allow gaps.
+ // Holds the indices of existing members in the interleaved group.
SmallVector<unsigned, 4> Indices;
- if (isa<LoadInst>(I)) {
- for (unsigned i = 0; i < InterleaveFactor; i++)
- if (Group->getMember(i))
- Indices.push_back(i);
- }
+ for (unsigned IF = 0; IF < InterleaveFactor; IF++)
+ if (Group->getMember(IF))
+ Indices.push_back(IF);
// Calculate the cost of the whole interleaved group.
bool UseMaskForGaps =
- Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed();
+ (Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed()) ||
+ (isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlign(),
AS, TTI::TCK_RecipThroughput, Legal->isMaskRequired(I), UseMaskForGaps);
@@ -7210,8 +7301,41 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
VectorTy = VectorType::get(I->getOperand(0)->getType(), VectorTy);
Instruction *Op0, *Op1;
- if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) &&
- !TheLoop->isLoopInvariant(RedOp)) {
+ if (RedOp &&
+ match(RedOp,
+ m_ZExtOrSExt(m_Mul(m_Instruction(Op0), m_Instruction(Op1)))) &&
+ match(Op0, m_ZExtOrSExt(m_Value())) &&
+ Op0->getOpcode() == Op1->getOpcode() &&
+ Op0->getOperand(0)->getType() == Op1->getOperand(0)->getType() &&
+ !TheLoop->isLoopInvariant(Op0) && !TheLoop->isLoopInvariant(Op1) &&
+ (Op0->getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
+
+ // Matched reduce(ext(mul(ext(A), ext(B)))
+ // Note that the extend opcodes need to all match, or if A==B they will have
+ // been converted to zext(mul(sext(A), sext(A))) as it is known positive,
+ // which is equally fine.
+ bool IsUnsigned = isa<ZExtInst>(Op0);
+ auto *ExtType = VectorType::get(Op0->getOperand(0)->getType(), VectorTy);
+ auto *MulType = VectorType::get(Op0->getType(), VectorTy);
+
+ InstructionCost ExtCost =
+ TTI.getCastInstrCost(Op0->getOpcode(), MulType, ExtType,
+ TTI::CastContextHint::None, CostKind, Op0);
+ InstructionCost MulCost =
+ TTI.getArithmeticInstrCost(Instruction::Mul, MulType, CostKind);
+ InstructionCost Ext2Cost =
+ TTI.getCastInstrCost(RedOp->getOpcode(), VectorTy, MulType,
+ TTI::CastContextHint::None, CostKind, RedOp);
+
+ InstructionCost RedCost = TTI.getExtendedAddReductionCost(
+ /*IsMLA=*/true, IsUnsigned, RdxDesc.getRecurrenceType(), ExtType,
+ CostKind);
+
+ if (RedCost.isValid() &&
+ RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost)
+ return I == RetI ? RedCost : 0;
+ } else if (RedOp && match(RedOp, m_ZExtOrSExt(m_Value())) &&
+ !TheLoop->isLoopInvariant(RedOp)) {
// Matched reduce(ext(A))
bool IsUnsigned = isa<ZExtInst>(RedOp);
auto *ExtType = VectorType::get(RedOp->getOperand(0)->getType(), VectorTy);
@@ -7245,7 +7369,7 @@ Optional<InstructionCost> LoopVectorizationCostModel::getReductionPatternCost(
if (RedCost.isValid() && RedCost < ExtCost * 2 + MulCost + BaseCost)
return I == RetI ? RedCost : 0;
- } else {
+ } else if (!match(I, m_ZExtOrSExt(m_Value()))) {
// Matched reduce(mul())
InstructionCost MulCost =
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
@@ -7304,9 +7428,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
Type *VectorTy;
InstructionCost C = getInstructionCost(I, VF, VectorTy);
- bool TypeNotScalarized =
- VF.isVector() && VectorTy->isVectorTy() &&
- TTI.getNumberOfParts(VectorTy) < VF.getKnownMinValue();
+ bool TypeNotScalarized = false;
+ if (VF.isVector() && VectorTy->isVectorTy()) {
+ unsigned NumParts = TTI.getNumberOfParts(VectorTy);
+ if (NumParts)
+ TypeNotScalarized = NumParts < VF.getKnownMinValue();
+ else
+ C = InstructionCost::getInvalid();
+ }
return VectorizationCostTy(C, TypeNotScalarized);
}
@@ -7327,8 +7456,8 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
if (!RetTy->isVoidTy() &&
(!isa<LoadInst>(I) || !TTI.supportsEfficientVectorElementLoadStore()))
Cost += TTI.getScalarizationOverhead(
- cast<VectorType>(RetTy), APInt::getAllOnesValue(VF.getKnownMinValue()),
- true, false);
+ cast<VectorType>(RetTy), APInt::getAllOnes(VF.getKnownMinValue()), true,
+ false);
// Some targets keep addresses scalar.
if (isa<LoadInst>(I) && !TTI.prefersVectorizedAddressing())
@@ -7340,7 +7469,7 @@ LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I,
// Collect operands to consider.
CallInst *CI = dyn_cast<CallInst>(I);
- Instruction::op_range Ops = CI ? CI->arg_operands() : I->operands();
+ Instruction::op_range Ops = CI ? CI->args() : I->operands();
// Skip operands that do not require extraction/scalarization and do not incur
// any overhead.
@@ -7391,8 +7520,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
// We assume that widening is the best solution when possible.
if (memoryInstructionCanBeWidened(&I, VF)) {
InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
- int ConsecutiveStride =
- Legal->isConsecutivePtr(getLoadStorePointerOperand(&I));
+ int ConsecutiveStride = Legal->isConsecutivePtr(
+ getLoadStoreType(&I), getLoadStorePointerOperand(&I));
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
"Expected consecutive stride.");
InstWidening Decision =
@@ -7579,8 +7708,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF);
return (
TTI.getScalarizationOverhead(
- Vec_i1Ty, APInt::getAllOnesValue(VF.getFixedValue()), false,
- true) +
+ Vec_i1Ty, APInt::getAllOnes(VF.getFixedValue()), false, true) +
(TTI.getCFInstrCost(Instruction::Br, CostKind) * VF.getFixedValue()));
} else if (I->getParent() == TheLoop->getLoopLatch() || VF.isScalar())
// The back-edge branch will remain, as will all scalar branches.
@@ -7893,7 +8021,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
// Check if the pointer operand of a load or store instruction is
// consecutive.
if (auto *Ptr = getLoadStorePointerOperand(Inst))
- return Legal->isConsecutivePtr(Ptr);
+ return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr);
return false;
}
@@ -8019,7 +8147,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return None;
// Invalidate interleave groups if all blocks of loop will be predicated.
- if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
+ if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
!useMaskedInterleavedAccesses(*TTI)) {
LLVM_DEBUG(
dbgs()
@@ -8105,28 +8233,30 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return SelectedVF;
}
-void LoopVectorizationPlanner::setBestPlan(ElementCount VF, unsigned UF) {
- LLVM_DEBUG(dbgs() << "Setting best plan to VF=" << VF << ", UF=" << UF
- << '\n');
- BestVF = VF;
- BestUF = UF;
+VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const {
+ assert(count_if(VPlans,
+ [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) ==
+ 1 &&
+ "Best VF has not a single VPlan.");
- erase_if(VPlans, [VF](const VPlanPtr &Plan) {
- return !Plan->hasVF(VF);
- });
- assert(VPlans.size() == 1 && "Best VF has not a single VPlan.");
+ for (const VPlanPtr &Plan : VPlans) {
+ if (Plan->hasVF(VF))
+ return *Plan.get();
+ }
+ llvm_unreachable("No plan found!");
}
-void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
+void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
+ VPlan &BestVPlan,
+ InnerLoopVectorizer &ILV,
DominatorTree *DT) {
+ LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF
+ << '\n');
+
// Perform the actual loop transformation.
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
- assert(BestVF.hasValue() && "Vectorization Factor is missing");
- assert(VPlans.size() == 1 && "Not a single VPlan to execute.");
-
- VPTransformState State{
- *BestVF, BestUF, LI, DT, ILV.Builder, &ILV, VPlans.front().get()};
+ VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
State.TripCount = ILV.getOrCreateTripCount(nullptr);
State.CanonicalIV = ILV.Induction;
@@ -8142,7 +8272,7 @@ void LoopVectorizationPlanner::executePlan(InnerLoopVectorizer &ILV,
//===------------------------------------------------===//
// 2. Copy and widen instructions from the old loop into the new loop.
- VPlans.front()->execute(&State);
+ BestVPlan.execute(&State);
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
@@ -8222,21 +8352,19 @@ Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
-Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
+Value *InnerLoopUnroller::getStepVector(Value *Val, Value *StartIdx,
+ Value *Step,
Instruction::BinaryOps BinOp) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
Type *Ty = Val->getType();
assert(!Ty->isVectorTy() && "Val must be a scalar");
if (Ty->isFloatingPointTy()) {
- Constant *C = ConstantFP::get(Ty, (double)StartIdx);
-
// Floating-point operations inherit FMF via the builder's flags.
- Value *MulOp = Builder.CreateFMul(C, Step);
+ Value *MulOp = Builder.CreateFMul(StartIdx, Step);
return Builder.CreateBinOp(BinOp, Val, MulOp);
}
- Constant *C = ConstantInt::get(Ty, StartIdx);
- return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
+ return Builder.CreateAdd(Val, Builder.CreateMul(StartIdx, Step), "induction");
}
static void AddRuntimeUnrollDisableMetaData(Loop *L) {
@@ -8311,7 +8439,9 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
OldInduction = Legal->getPrimaryInduction();
Type *IdxTy = Legal->getWidestInductionType();
Value *StartIdx = ConstantInt::get(IdxTy, 0);
- Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
+
+ IRBuilder<> B(&*Lp->getLoopPreheader()->getFirstInsertionPt());
+ Value *Step = getRuntimeVF(B, IdxTy, VF * UF);
Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
EPI.VectorTripCount = CountRoundDown;
Induction =
@@ -8329,9 +8459,9 @@ BasicBlock *EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
void EpilogueVectorizerMainLoop::printDebugTracesAtStart() {
LLVM_DEBUG({
dbgs() << "Create Skeleton for epilogue vectorized loop (first pass)\n"
- << "Main Loop VF:" << EPI.MainLoopVF.getKnownMinValue()
+ << "Main Loop VF:" << EPI.MainLoopVF
<< ", Main Loop UF:" << EPI.MainLoopUF
- << ", Epilogue Loop VF:" << EPI.EpilogueVF.getKnownMinValue()
+ << ", Epilogue Loop VF:" << EPI.EpilogueVF
<< ", Epilogue Loop UF:" << EPI.EpilogueUF << "\n";
});
}
@@ -8346,8 +8476,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
Loop *L, BasicBlock *Bypass, bool ForEpilogue) {
assert(L && "Expected valid Loop.");
assert(Bypass && "Expected valid bypass basic block.");
- unsigned VFactor =
- ForEpilogue ? EPI.EpilogueVF.getKnownMinValue() : VF.getKnownMinValue();
+ ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF;
unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF;
Value *Count = getOrCreateTripCount(L);
// Reuse existing vector loop preheader for TC checks.
@@ -8361,7 +8490,7 @@ BasicBlock *EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
Value *CheckMinIters = Builder.CreateICmp(
- P, Count, ConstantInt::get(Count->getType(), VFactor * UFactor),
+ P, Count, createStepForVF(Builder, Count->getType(), VFactor, UFactor),
"min.iters.check");
if (!ForEpilogue)
@@ -8513,11 +8642,11 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ?
ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
- Value *CheckMinIters = Builder.CreateICmp(
- P, Count,
- ConstantInt::get(Count->getType(),
- EPI.EpilogueVF.getKnownMinValue() * EPI.EpilogueUF),
- "min.epilog.iters.check");
+ Value *CheckMinIters =
+ Builder.CreateICmp(P, Count,
+ createStepForVF(Builder, Count->getType(),
+ EPI.EpilogueVF, EPI.EpilogueUF),
+ "min.epilog.iters.check");
ReplaceInstWithInst(
Insert->getTerminator(),
@@ -8530,7 +8659,7 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
void EpilogueVectorizerEpilogueLoop::printDebugTracesAtStart() {
LLVM_DEBUG({
dbgs() << "Create Skeleton for epilogue vectorized loop (second pass)\n"
- << "Epilogue Loop VF:" << EPI.EpilogueVF.getKnownMinValue()
+ << "Epilogue Loop VF:" << EPI.EpilogueVF
<< ", Epilogue Loop UF:" << EPI.EpilogueUF << "\n";
});
}
@@ -8628,7 +8757,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
VPValue *BlockMask = nullptr;
if (OrigLoop->getHeader() == BB) {
- if (!CM.blockNeedsPredication(BB))
+ if (!CM.blockNeedsPredicationForAnyReason(BB))
return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
// Create the block in mask as the first non-phi instruction in the block.
@@ -8643,9 +8772,9 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
if (Legal->getPrimaryInduction())
IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction());
else {
- auto IVRecipe = new VPWidenCanonicalIVRecipe();
+ auto *IVRecipe = new VPWidenCanonicalIVRecipe();
Builder.getInsertBlock()->insert(IVRecipe, NewInsertionPoint);
- IV = IVRecipe->getVPSingleValue();
+ IV = IVRecipe;
}
VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
bool TailFolded = !CM.isScalarEpilogueAllowed();
@@ -8708,12 +8837,21 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
if (Legal->isMaskRequired(I))
Mask = createBlockInMask(I->getParent(), Plan);
+ // Determine if the pointer operand of the access is either consecutive or
+ // reverse consecutive.
+ LoopVectorizationCostModel::InstWidening Decision =
+ CM.getWideningDecision(I, Range.Start);
+ bool Reverse = Decision == LoopVectorizationCostModel::CM_Widen_Reverse;
+ bool Consecutive =
+ Reverse || Decision == LoopVectorizationCostModel::CM_Widen;
+
if (LoadInst *Load = dyn_cast<LoadInst>(I))
- return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask);
+ return new VPWidenMemoryInstructionRecipe(*Load, Operands[0], Mask,
+ Consecutive, Reverse);
StoreInst *Store = cast<StoreInst>(I);
return new VPWidenMemoryInstructionRecipe(*Store, Operands[1], Operands[0],
- Mask);
+ Mask, Consecutive, Reverse);
}
VPWidenIntOrFpInductionRecipe *
@@ -8829,7 +8967,7 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
if (!LoopVectorizationPlanner::getDecisionAndClampRange(willWiden, Range))
return nullptr;
- ArrayRef<VPValue *> Ops = Operands.take_front(CI->getNumArgOperands());
+ ArrayRef<VPValue *> Ops = Operands.take_front(CI->arg_size());
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()));
}
@@ -8916,6 +9054,37 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
+ // Even if the instruction is not marked as uniform, there are certain
+ // intrinsic calls that can be effectively treated as such, so we check for
+ // them here. Conservatively, we only do this for scalable vectors, since
+ // for fixed-width VFs we can always fall back on full scalarization.
+ if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) {
+ switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ case Intrinsic::assume:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ // For scalable vectors if one of the operands is variant then we still
+ // want to mark as uniform, which will generate one instruction for just
+ // the first lane of the vector. We can't scalarize the call in the same
+ // way as for fixed-width vectors because we don't know how many lanes
+ // there are.
+ //
+ // The reasons for doing it this way for scalable vectors are:
+ // 1. For the assume intrinsic generating the instruction for the first
+ // lane is still be better than not generating any at all. For
+ // example, the input may be a splat across all lanes.
+ // 2. For the lifetime start/end intrinsics the pointer operand only
+ // does anything useful when the input comes from a stack object,
+ // which suggests it should always be uniform. For non-stack objects
+ // the effect is to poison the object, which still allows us to
+ // remove the call.
+ IsUniform = true;
+ break;
+ default:
+ break;
+ }
+ }
+
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
IsUniform, IsPredicated);
setRecipe(I, Recipe);
@@ -9137,6 +9306,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RecipeBuilder.recordRecipeOf(R);
// For min/max reducitons, where we have a pair of icmp/select, we also
// need to record the ICmp recipe, so it can be removed later.
+ assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
+ "Only min/max recurrences allowed for inloop reductions");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
RecipeBuilder.recordRecipeOf(cast<Instruction>(R->getOperand(0)));
}
@@ -9165,22 +9336,27 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- // Create a dummy pre-entry VPBasicBlock to start building the VPlan.
auto Plan = std::make_unique<VPlan>();
- VPBasicBlock *VPBB = new VPBasicBlock("Pre-Entry");
- Plan->setEntry(VPBB);
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
LoopBlocksDFS DFS(OrigLoop);
DFS.perform(LI);
+ VPBasicBlock *VPBB = nullptr;
+ VPBasicBlock *HeaderVPBB = nullptr;
+ SmallVector<VPWidenIntOrFpInductionRecipe *> InductionsToMove;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
unsigned VPBBsForBB = 0;
auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
- VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
+ if (VPBB)
+ VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
+ else {
+ Plan->setEntry(FirstVPBBForBB);
+ HeaderVPBB = FirstVPBBForBB;
+ }
VPBB = FirstVPBBForBB;
Builder.setInsertPoint(VPBB);
@@ -9222,6 +9398,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
Plan->addVPValue(UV, Def);
}
+ if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) &&
+ HeaderVPBB->getFirstNonPhi() != VPBB->end()) {
+ // Keep track of VPWidenIntOrFpInductionRecipes not in the phi section
+ // of the header block. That can happen for truncates of induction
+ // variables. Those recipes are moved to the phi section of the header
+ // block after applying SinkAfter, which relies on the original
+ // position of the trunc.
+ assert(isa<TruncInst>(Instr));
+ InductionsToMove.push_back(
+ cast<VPWidenIntOrFpInductionRecipe>(Recipe));
+ }
RecipeBuilder.setRecipe(Instr, Recipe);
VPBB->appendRecipe(Recipe);
continue;
@@ -9239,17 +9426,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
+ assert(isa<VPBasicBlock>(Plan->getEntry()) &&
+ !Plan->getEntry()->getEntryBasicBlock()->empty() &&
+ "entry block must be set to a non-empty VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
- // Discard empty dummy pre-entry VPBasicBlock. Note that other VPBasicBlocks
- // may also be empty, such as the last one VPBB, reflecting original
- // basic-blocks with no recipes.
- VPBasicBlock *PreEntry = cast<VPBasicBlock>(Plan->getEntry());
- assert(PreEntry->empty() && "Expecting empty pre-entry block.");
- VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor());
- VPBlockUtils::disconnectBlocks(PreEntry, Entry);
- delete PreEntry;
-
// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
// bring the VPlan to its final state.
@@ -9318,6 +9499,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
+ // Now that sink-after is done, move induction recipes for optimized truncates
+ // to the phi section of the header block.
+ for (VPWidenIntOrFpInductionRecipe *Ind : InductionsToMove)
+ Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+
+ // Adjust the recipes for any inloop reductions.
+ adjustRecipesForReductions(VPBB, Plan, RecipeBuilder, Range.Start);
+
// Introduce a recipe to combine the incoming and previous values of a
// first-order recurrence.
for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) {
@@ -9325,16 +9514,20 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
if (!RecurPhi)
continue;
+ VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
+ VPBasicBlock *InsertBlock = PrevRecipe->getParent();
+ auto *Region = GetReplicateRegion(PrevRecipe);
+ if (Region)
+ InsertBlock = cast<VPBasicBlock>(Region->getSingleSuccessor());
+ if (Region || PrevRecipe->isPhi())
+ Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
+ else
+ Builder.setInsertPoint(InsertBlock, std::next(PrevRecipe->getIterator()));
+
auto *RecurSplice = cast<VPInstruction>(
Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
{RecurPhi, RecurPhi->getBackedgeValue()}));
- VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
- if (auto *Region = GetReplicateRegion(PrevRecipe)) {
- VPBasicBlock *Succ = cast<VPBasicBlock>(Region->getSingleSuccessor());
- RecurSplice->moveBefore(*Succ, Succ->getFirstNonPhi());
- } else
- RecurSplice->moveAfter(PrevRecipe);
RecurPhi->replaceAllUsesWith(RecurSplice);
// Set the first operand of RecurSplice to RecurPhi again, after replacing
// all users.
@@ -9372,22 +9565,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
}
}
- // Adjust the recipes for any inloop reductions.
- adjustRecipesForInLoopReductions(Plan, RecipeBuilder, Range.Start);
-
- // Finally, if tail is folded by masking, introduce selects between the phi
- // and the live-out instruction of each reduction, at the end of the latch.
- if (CM.foldTailByMasking() && !Legal->getReductionVars().empty()) {
- Builder.setInsertPoint(VPBB);
- auto *Cond = RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
- for (auto &Reduction : Legal->getReductionVars()) {
- if (CM.isInLoopReduction(Reduction.first))
- continue;
- VPValue *Phi = Plan->getOrAddVPValue(Reduction.first);
- VPValue *Red = Plan->getOrAddVPValue(Reduction.second.getLoopExitInstr());
- Builder.createNaryOp(Instruction::Select, {Cond, Red, Phi});
- }
- }
+ // From this point onwards, VPlan-to-VPlan transformations may change the plan
+ // in ways that accessing values using original IR values is incorrect.
+ Plan->disableValue2VPValue();
VPlanTransforms::sinkScalarOperands(*Plan);
VPlanTransforms::mergeReplicateRegions(*Plan);
@@ -9405,6 +9585,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RSO.flush();
Plan->setName(PlanName);
+ assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
}
@@ -9443,12 +9624,14 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
return Plan;
}
-// Adjust the recipes for any inloop reductions. The chain of instructions
-// leading from the loop exit instr to the phi need to be converted to
-// reductions, with one operand being vector and the other being the scalar
-// reduction chain.
-void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
- VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder, ElementCount MinVF) {
+// Adjust the recipes for reductions. For in-loop reductions the chain of
+// instructions leading from the loop exit instr to the phi need to be converted
+// to reductions, with one operand being vector and the other being the scalar
+// reduction chain. For other reductions, a select is introduced between the phi
+// and live-out recipes when folding the tail.
+void LoopVectorizationPlanner::adjustRecipesForReductions(
+ VPBasicBlock *LatchVPBB, VPlanPtr &Plan, VPRecipeBuilder &RecipeBuilder,
+ ElementCount MinVF) {
for (auto &Reduction : CM.getInLoopReductionChains()) {
PHINode *Phi = Reduction.first;
RecurrenceDescriptor &RdxDesc = Legal->getReductionVars()[Phi];
@@ -9468,6 +9651,8 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
VPValue *ChainOp = Plan->getVPValue(Chain);
unsigned FirstOpId;
+ assert(!RecurrenceDescriptor::isSelectCmpRecurrenceKind(Kind) &&
+ "Only min/max recurrences allowed for inloop reductions");
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
assert(isa<VPWidenSelectRecipe>(WidenRecipe) &&
"Expected to replace a VPWidenSelectSC");
@@ -9505,6 +9690,21 @@ void LoopVectorizationPlanner::adjustRecipesForInLoopReductions(
Chain = R;
}
}
+
+ // If tail is folded by masking, introduce selects between the phi
+ // and the live-out instruction of each reduction, at the end of the latch.
+ if (CM.foldTailByMasking()) {
+ for (VPRecipeBase &R : Plan->getEntry()->getEntryBasicBlock()->phis()) {
+ VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!PhiR || PhiR->isInLoop())
+ continue;
+ Builder.setInsertPoint(LatchVPBB);
+ VPValue *Cond =
+ RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
+ VPValue *Red = PhiR->getBackedgeValue();
+ Builder.createNaryOp(Instruction::Select, {Cond, Red, PhiR});
+ }
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -9519,9 +9719,22 @@ void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
O << ", ";
Mask->printAsOperand(O, SlotTracker);
}
- for (unsigned i = 0; i < IG->getFactor(); ++i)
- if (Instruction *I = IG->getMember(i))
- O << "\n" << Indent << " " << VPlanIngredient(I) << " " << i;
+
+ unsigned OpIdx = 0;
+ for (unsigned i = 0; i < IG->getFactor(); ++i) {
+ if (!IG->getMember(i))
+ continue;
+ if (getNumStoreOperands() > 0) {
+ O << "\n" << Indent << " store ";
+ getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);
+ O << " to index " << i;
+ } else {
+ O << "\n" << Indent << " ";
+ getVPValue(OpIdx)->printAsOperand(O, SlotTracker);
+ O << " = load from index " << i;
+ }
+ ++OpIdx;
+ }
}
#endif
@@ -9605,17 +9818,20 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), 0);
+ RecurKind Kind = RdxDesc->getRecurrenceKind();
+ bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
+ // Propagate the fast-math flags carried by the underlying instruction.
+ IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
+ State.Builder.setFastMathFlags(RdxDesc->getFastMathFlags());
for (unsigned Part = 0; Part < State.UF; ++Part) {
- RecurKind Kind = RdxDesc->getRecurrenceKind();
- bool IsOrdered = State.ILV->useOrderedReductions(*RdxDesc);
Value *NewVecOp = State.get(getVecOp(), Part);
if (VPValue *Cond = getCondOp()) {
Value *NewCond = State.get(Cond, Part);
VectorType *VecTy = cast<VectorType>(NewVecOp->getType());
- Constant *Iden = RecurrenceDescriptor::getRecurrenceIdentity(
+ Value *Iden = RdxDesc->getRecurrenceIdentity(
Kind, VecTy->getElementType(), RdxDesc->getFastMathFlags());
- Constant *IdenVec =
- ConstantVector::getSplat(VecTy->getElementCount(), Iden);
+ Value *IdenVec =
+ State.Builder.CreateVectorSplat(VecTy->getElementCount(), Iden);
Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, IdenVec);
NewVecOp = Select;
}
@@ -9627,8 +9843,8 @@ void VPReductionRecipe::execute(VPTransformState &State) {
PrevInChain);
else
NewRed = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(),
- PrevInChain, NewVecOp);
+ (Instruction::BinaryOps)RdxDesc->getOpcode(Kind), PrevInChain,
+ NewVecOp);
PrevInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), Part);
@@ -9640,11 +9856,10 @@ void VPReductionRecipe::execute(VPTransformState &State) {
NewRed, PrevInChain);
} else if (IsOrdered)
NextInChain = NewRed;
- else {
+ else
NextInChain = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)getUnderlyingInstr()->getOpcode(), NewRed,
+ (Instruction::BinaryOps)RdxDesc->getOpcode(Kind), NewRed,
PrevInChain);
- }
State.set(this, NextInChain, Part);
}
}
@@ -9757,7 +9972,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
State.ILV->vectorizeMemoryInstruction(
&Ingredient, State, StoredValue ? nullptr : getVPSingleValue(), getAddr(),
- StoredValue, getMask());
+ StoredValue, getMask(), Consecutive, Reverse);
}
// Determine how to lower the scalar epilogue, which depends on 1) optimising
@@ -9923,7 +10138,7 @@ static bool processLoopInVPlanNativePath(
VectorizationFactor::Disabled() == VF)
return false;
- LVP.setBestPlan(VF.Width, 1);
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
{
GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
@@ -9932,7 +10147,7 @@ static bool processLoopInVPlanNativePath(
&CM, BFI, PSI, Checks);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
- LVP.executePlan(LB, DT);
+ LVP.executePlan(VF.Width, 1, BestPlan, LB, DT);
}
// Mark the loop as already vectorized to avoid vectorizing again.
@@ -10103,7 +10318,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}
- if (!LVL.canVectorizeFPMath(EnableStrictReductions)) {
+ bool AllowOrderedReductions;
+ // If the flag is set, use that instead and override the TTI behaviour.
+ if (ForceOrderedReductions.getNumOccurrences() > 0)
+ AllowOrderedReductions = ForceOrderedReductions;
+ else
+ AllowOrderedReductions = TTI->enableOrderedReductions();
+ if (!LVL.canVectorizeFPMath(AllowOrderedReductions)) {
ORE->emit([&]() {
auto *ExactFPMathInst = Requirements.getExactFPInst();
return OptimizationRemarkAnalysisFPCommute(DEBUG_TYPE, "CantReorderFPOps",
@@ -10248,7 +10469,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
F->getParent()->getDataLayout());
if (!VF.Width.isScalar() || IC > 1)
Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
- LVP.setBestPlan(VF.Width, IC);
using namespace ore;
if (!VectorizeLoop) {
@@ -10257,7 +10477,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// interleave it.
InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
&CM, BFI, PSI, Checks);
- LVP.executePlan(Unroller, DT);
+
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT);
ORE->emit([&]() {
return OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
@@ -10276,14 +10498,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// The first pass vectorizes the main loop and creates a scalar epilogue
// to be vectorized by executing the plan (potentially with a different
// factor) again shortly afterwards.
- EpilogueLoopVectorizationInfo EPI(VF.Width.getKnownMinValue(), IC,
- EpilogueVF.Width.getKnownMinValue(),
- 1);
+ EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
EPI, &LVL, &CM, BFI, PSI, Checks);
- LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF);
- LVP.executePlan(MainILV, DT);
+ VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
+ LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV,
+ DT);
++LoopsVectorized;
simplifyLoop(L, DT, LI, SE, AC, nullptr, false /* PreserveLCSSA */);
@@ -10291,13 +10512,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
- LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF);
EPI.MainLoopVF = EPI.EpilogueVF;
EPI.MainLoopUF = EPI.EpilogueUF;
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
ORE, EPI, &LVL, &CM, BFI, PSI,
Checks);
- LVP.executePlan(EpilogILV, DT);
+
+ VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF);
+ LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
+ DT);
++LoopsEpilogueVectorized;
if (!MainILV.areSafetyChecksAdded())
@@ -10305,7 +10528,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
} else {
InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
&LVL, &CM, BFI, PSI, Checks);
- LVP.executePlan(LB, DT);
+
+ VPlan &BestPlan = LVP.getBestPlanFor(VF.Width);
+ LVP.executePlan(VF.Width, IC, BestPlan, LB, DT);
++LoopsVectorized;
// Add metadata to disable runtime unrolling a scalar loop when there
@@ -10423,15 +10648,12 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- MemorySSA *MSSA = EnableMSSALoopDependency
- ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA()
- : nullptr;
auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
std::function<const LoopAccessInfo &(Loop &)> GetLAA =
[&](Loop &L) -> const LoopAccessInfo & {
- LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
- TLI, TTI, nullptr, MSSA};
+ LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE,
+ TLI, TTI, nullptr, nullptr, nullptr};
return LAM.getResult<LoopAccessAnalysis>(L, AR);
};
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
@@ -10455,3 +10677,14 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
PA.preserveSet<CFGAnalyses>();
return PA;
}
+
+void LoopVectorizePass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopVectorizePass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+
+ OS << "<";
+ OS << (InterleaveOnlyWhenForced ? "" : "no-") << "interleave-forced-only;";
+ OS << (VectorizeOnlyWhenForced ? "" : "no-") << "vectorize-forced-only;";
+ OS << ">";
+}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index cc3f5c7d4b48..e3ef0b794f68 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
@@ -200,12 +201,39 @@ static bool isValidElementType(Type *Ty) {
!Ty->isPPC_FP128Ty();
}
+/// \returns True if the value is a constant (but not globals/constant
+/// expressions).
+static bool isConstant(Value *V) {
+ return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
+}
+
+/// Checks if \p V is one of vector-like instructions, i.e. undef,
+/// insertelement/extractelement with constant indices for fixed vector type or
+/// extractvalue instruction.
+static bool isVectorLikeInstWithConstOps(Value *V) {
+ if (!isa<InsertElementInst, ExtractElementInst>(V) &&
+ !isa<ExtractValueInst, UndefValue>(V))
+ return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I || isa<ExtractValueInst>(I))
+ return true;
+ if (!isa<FixedVectorType>(I->getOperand(0)->getType()))
+ return false;
+ if (isa<ExtractElementInst>(I))
+ return isConstant(I->getOperand(1));
+ assert(isa<InsertElementInst>(V) && "Expected only insertelement.");
+ return isConstant(I->getOperand(2));
+}
+
/// \returns true if all of the instructions in \p VL are in the same block or
/// false otherwise.
static bool allSameBlock(ArrayRef<Value *> VL) {
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)
return false;
+ if (all_of(VL, isVectorLikeInstWithConstOps))
+ return true;
+
BasicBlock *BB = I0->getParent();
for (int I = 1, E = VL.size(); I < E; I++) {
auto *II = dyn_cast<Instruction>(VL[I]);
@@ -218,12 +246,6 @@ static bool allSameBlock(ArrayRef<Value *> VL) {
return true;
}
-/// \returns True if the value is a constant (but not globals/constant
-/// expressions).
-static bool isConstant(Value *V) {
- return isa<Constant>(V) && !isa<ConstantExpr>(V) && !isa<GlobalValue>(V);
-}
-
/// \returns True if all of the values in \p VL are constants (but not
/// globals/constant expressions).
static bool allConstant(ArrayRef<Value *> VL) {
@@ -232,12 +254,21 @@ static bool allConstant(ArrayRef<Value *> VL) {
return all_of(VL, isConstant);
}
-/// \returns True if all of the values in \p VL are identical.
+/// \returns True if all of the values in \p VL are identical or some of them
+/// are UndefValue.
static bool isSplat(ArrayRef<Value *> VL) {
- for (unsigned i = 1, e = VL.size(); i < e; ++i)
- if (VL[i] != VL[0])
+ Value *FirstNonUndef = nullptr;
+ for (Value *V : VL) {
+ if (isa<UndefValue>(V))
+ continue;
+ if (!FirstNonUndef) {
+ FirstNonUndef = V;
+ continue;
+ }
+ if (V != FirstNonUndef)
return false;
- return true;
+ }
+ return FirstNonUndef != nullptr;
}
/// \returns True if \p I is commutative, handles CmpInst and BinaryOperator.
@@ -295,8 +326,10 @@ static bool isCommutative(Instruction *I) {
/// TODO: Can we split off and reuse the shuffle mask detection from
/// TargetTransformInfo::getInstructionThroughput?
static Optional<TargetTransformInfo::ShuffleKind>
-isShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
+isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
auto *EI0 = cast<ExtractElementInst>(VL[0]);
+ if (isa<ScalableVectorType>(EI0->getVectorOperandType()))
+ return None;
unsigned Size =
cast<FixedVectorType>(EI0->getVectorOperandType())->getNumElements();
Value *Vec1 = nullptr;
@@ -504,7 +537,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
case Instruction::Call: {
CallInst *CI = cast<CallInst>(UserInst);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
if (hasVectorInstrinsicScalarOpd(ID, i))
return (CI->getArgOperand(i) == Scalar);
}
@@ -535,13 +568,67 @@ static bool isSimple(Instruction *I) {
return true;
}
+/// Shuffles \p Mask in accordance with the given \p SubMask.
+static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
+ if (SubMask.empty())
+ return;
+ if (Mask.empty()) {
+ Mask.append(SubMask.begin(), SubMask.end());
+ return;
+ }
+ SmallVector<int> NewMask(SubMask.size(), UndefMaskElem);
+ int TermValue = std::min(Mask.size(), SubMask.size());
+ for (int I = 0, E = SubMask.size(); I < E; ++I) {
+ if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
+ Mask[SubMask[I]] >= TermValue)
+ continue;
+ NewMask[I] = Mask[SubMask[I]];
+ }
+ Mask.swap(NewMask);
+}
+
+/// Order may have elements assigned special value (size) which is out of
+/// bounds. Such indices only appear on places which correspond to undef values
+/// (see canReuseExtract for details) and used in order to avoid undef values
+/// have effect on operands ordering.
+/// The first loop below simply finds all unused indices and then the next loop
+/// nest assigns these indices for undef values positions.
+/// As an example below Order has two undef positions and they have assigned
+/// values 3 and 7 respectively:
+/// before: 6 9 5 4 9 2 1 0
+/// after: 6 3 5 4 7 2 1 0
+static void fixupOrderingIndices(SmallVectorImpl<unsigned> &Order) {
+ const unsigned Sz = Order.size();
+ SmallBitVector UsedIndices(Sz);
+ SmallVector<int> MaskedIndices;
+ for (unsigned I = 0; I < Sz; ++I) {
+ if (Order[I] < Sz)
+ UsedIndices.set(Order[I]);
+ else
+ MaskedIndices.push_back(I);
+ }
+ if (MaskedIndices.empty())
+ return;
+ SmallVector<int> AvailableIndices(MaskedIndices.size());
+ unsigned Cnt = 0;
+ int Idx = UsedIndices.find_first();
+ do {
+ AvailableIndices[Cnt] = Idx;
+ Idx = UsedIndices.find_next(Idx);
+ ++Cnt;
+ } while (Idx > 0);
+ assert(Cnt == MaskedIndices.size() && "Non-synced masked/available indices.");
+ for (int I = 0, E = MaskedIndices.size(); I < E; ++I)
+ Order[MaskedIndices[I]] = AvailableIndices[I];
+}
+
namespace llvm {
static void inversePermutation(ArrayRef<unsigned> Indices,
SmallVectorImpl<int> &Mask) {
Mask.clear();
const unsigned E = Indices.size();
- Mask.resize(E, E + 1);
+ Mask.resize(E, UndefMaskElem);
for (unsigned I = 0; I < E; ++I)
Mask[Indices[I]] = I;
}
@@ -581,6 +668,22 @@ static Optional<int> getInsertIndex(Value *InsertInst, unsigned Offset) {
return Index;
}
+/// Reorders the list of scalars in accordance with the given \p Order and then
+/// the \p Mask. \p Order - is the original order of the scalars, need to
+/// reorder scalars into an unordered state at first according to the given
+/// order. Then the ordered scalars are shuffled once again in accordance with
+/// the provided mask.
+static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
+ ArrayRef<int> Mask) {
+ assert(!Mask.empty() && "Expected non-empty mask.");
+ SmallVector<Value *> Prev(Scalars.size(),
+ UndefValue::get(Scalars.front()->getType()));
+ Prev.swap(Scalars);
+ for (unsigned I = 0, E = Prev.size(); I < E; ++I)
+ if (Mask[I] != UndefMaskElem)
+ Scalars[Mask[I]] = Prev[I];
+}
+
namespace slpvectorizer {
/// Bottom Up SLP Vectorizer.
@@ -645,13 +748,12 @@ public:
void buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst = None);
- /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
- /// the purpose of scheduling and extraction in the \p UserIgnoreLst taking
- /// into account (and updating it, if required) list of externally used
- /// values stored in \p ExternallyUsedValues.
- void buildTree(ArrayRef<Value *> Roots,
- ExtraValueToDebugLocsMap &ExternallyUsedValues,
- ArrayRef<Value *> UserIgnoreLst = None);
+ /// Builds external uses of the vectorized scalars, i.e. the list of
+ /// vectorized scalars to be extracted, their lanes and their scalar users. \p
+ /// ExternallyUsedValues contains additional list of external uses to handle
+ /// vectorization of reductions.
+ void
+ buildExternalUses(const ExtraValueToDebugLocsMap &ExternallyUsedValues = {});
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
@@ -659,8 +761,6 @@ public:
ScalarToTreeEntry.clear();
MustGather.clear();
ExternalUses.clear();
- NumOpsWantToKeepOrder.clear();
- NumOpsWantToKeepOriginalOrder = 0;
for (auto &Iter : BlocksSchedules) {
BlockScheduling *BS = Iter.second.get();
BS->clear();
@@ -674,103 +774,28 @@ public:
/// Perform LICM and CSE on the newly generated gather sequences.
void optimizeGatherSequence();
- /// \returns The best order of instructions for vectorization.
- Optional<ArrayRef<unsigned>> bestOrder() const {
- assert(llvm::all_of(
- NumOpsWantToKeepOrder,
- [this](const decltype(NumOpsWantToKeepOrder)::value_type &D) {
- return D.getFirst().size() ==
- VectorizableTree[0]->Scalars.size();
- }) &&
- "All orders must have the same size as number of instructions in "
- "tree node.");
- auto I = std::max_element(
- NumOpsWantToKeepOrder.begin(), NumOpsWantToKeepOrder.end(),
- [](const decltype(NumOpsWantToKeepOrder)::value_type &D1,
- const decltype(NumOpsWantToKeepOrder)::value_type &D2) {
- return D1.second < D2.second;
- });
- if (I == NumOpsWantToKeepOrder.end() ||
- I->getSecond() <= NumOpsWantToKeepOriginalOrder)
- return None;
-
- return makeArrayRef(I->getFirst());
- }
-
- /// Builds the correct order for root instructions.
- /// If some leaves have the same instructions to be vectorized, we may
- /// incorrectly evaluate the best order for the root node (it is built for the
- /// vector of instructions without repeated instructions and, thus, has less
- /// elements than the root node). This function builds the correct order for
- /// the root node.
- /// For example, if the root node is \<a+b, a+c, a+d, f+e\>, then the leaves
- /// are \<a, a, a, f\> and \<b, c, d, e\>. When we try to vectorize the first
- /// leaf, it will be shrink to \<a, b\>. If instructions in this leaf should
- /// be reordered, the best order will be \<1, 0\>. We need to extend this
- /// order for the root node. For the root node this order should look like
- /// \<3, 0, 1, 2\>. This function extends the order for the reused
- /// instructions.
- void findRootOrder(OrdersType &Order) {
- // If the leaf has the same number of instructions to vectorize as the root
- // - order must be set already.
- unsigned RootSize = VectorizableTree[0]->Scalars.size();
- if (Order.size() == RootSize)
- return;
- SmallVector<unsigned, 4> RealOrder(Order.size());
- std::swap(Order, RealOrder);
- SmallVector<int, 4> Mask;
- inversePermutation(RealOrder, Mask);
- Order.assign(Mask.begin(), Mask.end());
- // The leaf has less number of instructions - need to find the true order of
- // the root.
- // Scan the nodes starting from the leaf back to the root.
- const TreeEntry *PNode = VectorizableTree.back().get();
- SmallVector<const TreeEntry *, 4> Nodes(1, PNode);
- SmallPtrSet<const TreeEntry *, 4> Visited;
- while (!Nodes.empty() && Order.size() != RootSize) {
- const TreeEntry *PNode = Nodes.pop_back_val();
- if (!Visited.insert(PNode).second)
- continue;
- const TreeEntry &Node = *PNode;
- for (const EdgeInfo &EI : Node.UserTreeIndices)
- if (EI.UserTE)
- Nodes.push_back(EI.UserTE);
- if (Node.ReuseShuffleIndices.empty())
- continue;
- // Build the order for the parent node.
- OrdersType NewOrder(Node.ReuseShuffleIndices.size(), RootSize);
- SmallVector<unsigned, 4> OrderCounter(Order.size(), 0);
- // The algorithm of the order extension is:
- // 1. Calculate the number of the same instructions for the order.
- // 2. Calculate the index of the new order: total number of instructions
- // with order less than the order of the current instruction + reuse
- // number of the current instruction.
- // 3. The new order is just the index of the instruction in the original
- // vector of the instructions.
- for (unsigned I : Node.ReuseShuffleIndices)
- ++OrderCounter[Order[I]];
- SmallVector<unsigned, 4> CurrentCounter(Order.size(), 0);
- for (unsigned I = 0, E = Node.ReuseShuffleIndices.size(); I < E; ++I) {
- unsigned ReusedIdx = Node.ReuseShuffleIndices[I];
- unsigned OrderIdx = Order[ReusedIdx];
- unsigned NewIdx = 0;
- for (unsigned J = 0; J < OrderIdx; ++J)
- NewIdx += OrderCounter[J];
- NewIdx += CurrentCounter[OrderIdx];
- ++CurrentCounter[OrderIdx];
- assert(NewOrder[NewIdx] == RootSize &&
- "The order index should not be written already.");
- NewOrder[NewIdx] = I;
- }
- std::swap(Order, NewOrder);
- }
- assert(Order.size() == RootSize &&
- "Root node is expected or the size of the order must be the same as "
- "the number of elements in the root node.");
- assert(llvm::all_of(Order,
- [RootSize](unsigned Val) { return Val != RootSize; }) &&
- "All indices must be initialized");
- }
+ /// Checks if the specified gather tree entry \p TE can be represented as a
+ /// shuffled vector entry + (possibly) permutation with other gathers. It
+ /// implements the checks only for possibly ordered scalars (Loads,
+ /// ExtractElement, ExtractValue), which can be part of the graph.
+ Optional<OrdersType> findReusedOrderedScalars(const TreeEntry &TE);
+
+ /// Reorders the current graph to the most profitable order starting from the
+ /// root node to the leaf nodes. The best order is chosen only from the nodes
+ /// of the same size (vectorization factor). Smaller nodes are considered
+ /// parts of subgraph with smaller VF and they are reordered independently. We
+ /// can make it because we still need to extend smaller nodes to the wider VF
+ /// and we can merge reordering shuffles with the widening shuffles.
+ void reorderTopToBottom();
+
+ /// Reorders the current graph to the most profitable order starting from
+ /// leaves to the root. It allows to rotate small subgraphs and reduce the
+ /// number of reshuffles if the leaf nodes use the same order. In this case we
+ /// can merge the orders and just shuffle user node instead of shuffling its
+ /// operands. Plus, even the leaf nodes have different orders, it allows to
+ /// sink reordering in the graph closer to the root node and merge it later
+ /// during analysis.
+ void reorderBottomToTop(bool IgnoreReorder = false);
/// \return The vector element size in bits to use when vectorizing the
/// expression tree ending at \p V. If V is a store, the size is the width of
@@ -793,6 +818,10 @@ public:
return MinVecRegSize;
}
+ unsigned getMinVF(unsigned Sz) const {
+ return std::max(2U, getMinVecRegSize() / Sz);
+ }
+
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
unsigned MaxVF = MaxVFOption.getNumOccurrences() ?
MaxVFOption : TTI->getMaximumVF(ElemWidth, Opcode);
@@ -809,7 +838,7 @@ public:
/// \returns True if the VectorizableTree is both tiny and not fully
/// vectorizable. We do not vectorize such trees.
- bool isTreeTinyAndNotFullyVectorizable() const;
+ bool isTreeTinyAndNotFullyVectorizable(bool ForReduction = false) const;
/// Assume that a legal-sized 'or'-reduction of shifted/zexted loaded values
/// can be load combined in the backend. Load combining may not be allowed in
@@ -1578,10 +1607,12 @@ private:
Value *vectorizeTree(ArrayRef<Value *> VL);
/// \returns the scalarization cost for this type. Scalarization in this
- /// context means the creation of vectors from a group of scalars.
- InstructionCost
- getGatherCost(FixedVectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices) const;
+ /// context means the creation of vectors from a group of scalars. If \p
+ /// NeedToShuffle is true, need to add a cost of reshuffling some of the
+ /// vector elements.
+ InstructionCost getGatherCost(FixedVectorType *Ty,
+ const DenseSet<unsigned> &ShuffledIndices,
+ bool NeedToShuffle) const;
/// Checks if the gathered \p VL can be represented as shuffle(s) of previous
/// tree entries.
@@ -1605,7 +1636,7 @@ private:
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
- bool isFullyVectorizableTinyTree() const;
+ bool isFullyVectorizableTinyTree(bool ForReduction) const;
/// Reorder commutative or alt operands to get better probability of
/// generating vectorized code.
@@ -1621,14 +1652,43 @@ private:
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
- if (VL.size() == Scalars.size())
- return std::equal(VL.begin(), VL.end(), Scalars.begin());
- return VL.size() == ReuseShuffleIndices.size() &&
- std::equal(
- VL.begin(), VL.end(), ReuseShuffleIndices.begin(),
- [this](Value *V, int Idx) { return V == Scalars[Idx]; });
+ auto &&IsSame = [VL](ArrayRef<Value *> Scalars, ArrayRef<int> Mask) {
+ if (Mask.size() != VL.size() && VL.size() == Scalars.size())
+ return std::equal(VL.begin(), VL.end(), Scalars.begin());
+ return VL.size() == Mask.size() &&
+ std::equal(VL.begin(), VL.end(), Mask.begin(),
+ [Scalars](Value *V, int Idx) {
+ return (isa<UndefValue>(V) &&
+ Idx == UndefMaskElem) ||
+ (Idx != UndefMaskElem && V == Scalars[Idx]);
+ });
+ };
+ if (!ReorderIndices.empty()) {
+ // TODO: implement matching if the nodes are just reordered, still can
+ // treat the vector as the same if the list of scalars matches VL
+ // directly, without reordering.
+ SmallVector<int> Mask;
+ inversePermutation(ReorderIndices, Mask);
+ if (VL.size() == Scalars.size())
+ return IsSame(Scalars, Mask);
+ if (VL.size() == ReuseShuffleIndices.size()) {
+ ::addMask(Mask, ReuseShuffleIndices);
+ return IsSame(Scalars, Mask);
+ }
+ return false;
+ }
+ return IsSame(Scalars, ReuseShuffleIndices);
}
+ /// \return Final vectorization factor for the node. Defined by the total
+ /// number of vectorized scalars, including those, used several times in the
+ /// entry and counted in the \a ReuseShuffleIndices, if any.
+ unsigned getVectorFactor() const {
+ if (!ReuseShuffleIndices.empty())
+ return ReuseShuffleIndices.size();
+ return Scalars.size();
+ };
+
/// A vector of scalars.
ValueList Scalars;
@@ -1701,6 +1761,12 @@ private:
}
}
+ /// Reorders operands of the node to the given mask \p Mask.
+ void reorderOperands(ArrayRef<int> Mask) {
+ for (ValueList &Operand : Operands)
+ reorderScalars(Operand, Mask);
+ }
+
/// \returns the \p OpIdx operand of this TreeEntry.
ValueList &getOperand(unsigned OpIdx) {
assert(OpIdx < Operands.size() && "Off bounds");
@@ -1760,19 +1826,14 @@ private:
return AltOp ? AltOp->getOpcode() : 0;
}
- /// Update operations state of this entry if reorder occurred.
- bool updateStateIfReorder() {
- if (ReorderIndices.empty())
- return false;
- InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front());
- setOperations(S);
- return true;
- }
- /// When ReuseShuffleIndices is empty it just returns position of \p V
- /// within vector of Scalars. Otherwise, try to remap on its reuse index.
+ /// When ReuseReorderShuffleIndices is empty it just returns position of \p
+ /// V within vector of Scalars. Otherwise, try to remap on its reuse index.
int findLaneForValue(Value *V) const {
unsigned FoundLane = std::distance(Scalars.begin(), find(Scalars, V));
assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
+ if (!ReorderIndices.empty())
+ FoundLane = ReorderIndices[FoundLane];
+ assert(FoundLane < Scalars.size() && "Couldn't find extract lane");
if (!ReuseShuffleIndices.empty()) {
FoundLane = std::distance(ReuseShuffleIndices.begin(),
find(ReuseShuffleIndices, FoundLane));
@@ -1856,7 +1917,7 @@ private:
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<int> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
TreeEntry::EntryState EntryState =
Bundle ? TreeEntry::Vectorize : TreeEntry::NeedToGather;
@@ -1869,7 +1930,7 @@ private:
Optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
- ArrayRef<unsigned> ReuseShuffleIndices = None,
+ ArrayRef<int> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
assert(((!Bundle && EntryState == TreeEntry::NeedToGather) ||
(Bundle && EntryState != TreeEntry::NeedToGather)) &&
@@ -1877,12 +1938,25 @@ private:
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
- Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->State = EntryState;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
ReuseShuffleIndices.end());
- Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
- Last->setOperations(S);
+ if (ReorderIndices.empty()) {
+ Last->Scalars.assign(VL.begin(), VL.end());
+ Last->setOperations(S);
+ } else {
+ // Reorder scalars and build final mask.
+ Last->Scalars.assign(VL.size(), nullptr);
+ transform(ReorderIndices, Last->Scalars.begin(),
+ [VL](unsigned Idx) -> Value * {
+ if (Idx >= VL.size())
+ return UndefValue::get(VL.front()->getType());
+ return VL[Idx];
+ });
+ InstructionsState S = getSameOpcode(Last->Scalars);
+ Last->setOperations(S);
+ Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
+ }
if (Last->State != TreeEntry::NeedToGather) {
for (Value *V : VL) {
assert(!getTreeEntry(V) && "Scalar already in tree!");
@@ -1965,12 +2039,9 @@ private:
if (result.hasValue()) {
return result.getValue();
}
- MemoryLocation Loc2 = getLocation(Inst2, AA);
bool aliased = true;
- if (Loc1.Ptr && Loc2.Ptr && isSimple(Inst1) && isSimple(Inst2)) {
- // Do the alias check.
- aliased = !AA->isNoAlias(Loc1, Loc2);
- }
+ if (Loc1.Ptr && isSimple(Inst1))
+ aliased = isModOrRefSet(AA->getModRefInfo(Inst2, Loc1));
// Store the result in the cache.
result = aliased;
return aliased;
@@ -2434,14 +2505,6 @@ private:
}
};
- /// Contains orders of operations along with the number of bundles that have
- /// operations in this order. It stores only those orders that require
- /// reordering, if reordering is not required it is counted using \a
- /// NumOpsWantToKeepOriginalOrder.
- DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo> NumOpsWantToKeepOrder;
- /// Number of bundles that do not require reordering.
- unsigned NumOpsWantToKeepOriginalOrder = 0;
-
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
@@ -2540,10 +2603,8 @@ template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *R) {
std::string Str;
raw_string_ostream OS(Str);
- if (isSplat(Entry->Scalars)) {
- OS << "<splat> " << *Entry->Scalars[0];
- return Str;
- }
+ if (isSplat(Entry->Scalars))
+ OS << "<splat> ";
for (auto V : Entry->Scalars) {
OS << *V;
if (llvm::any_of(R->ExternalUses, [&](const BoUpSLP::ExternalUser &EU) {
@@ -2594,21 +2655,539 @@ void BoUpSLP::eraseInstructions(ArrayRef<Value *> AV) {
};
}
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
- ArrayRef<Value *> UserIgnoreLst) {
- ExtraValueToDebugLocsMap ExternallyUsedValues;
- buildTree(Roots, ExternallyUsedValues, UserIgnoreLst);
+/// Reorders the given \p Reuses mask according to the given \p Mask. \p Reuses
+/// contains original mask for the scalars reused in the node. Procedure
+/// transform this mask in accordance with the given \p Mask.
+static void reorderReuses(SmallVectorImpl<int> &Reuses, ArrayRef<int> Mask) {
+ assert(!Mask.empty() && Reuses.size() == Mask.size() &&
+ "Expected non-empty mask.");
+ SmallVector<int> Prev(Reuses.begin(), Reuses.end());
+ Prev.swap(Reuses);
+ for (unsigned I = 0, E = Prev.size(); I < E; ++I)
+ if (Mask[I] != UndefMaskElem)
+ Reuses[Mask[I]] = Prev[I];
}
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
- ExtraValueToDebugLocsMap &ExternallyUsedValues,
- ArrayRef<Value *> UserIgnoreLst) {
- deleteTree();
- UserIgnoreList = UserIgnoreLst;
- if (!allSameType(Roots))
+/// Reorders the given \p Order according to the given \p Mask. \p Order - is
+/// the original order of the scalars. Procedure transforms the provided order
+/// in accordance with the given \p Mask. If the resulting \p Order is just an
+/// identity order, \p Order is cleared.
+static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
+ assert(!Mask.empty() && "Expected non-empty mask.");
+ SmallVector<int> MaskOrder;
+ if (Order.empty()) {
+ MaskOrder.resize(Mask.size());
+ std::iota(MaskOrder.begin(), MaskOrder.end(), 0);
+ } else {
+ inversePermutation(Order, MaskOrder);
+ }
+ reorderReuses(MaskOrder, Mask);
+ if (ShuffleVectorInst::isIdentityMask(MaskOrder)) {
+ Order.clear();
return;
- buildTree_rec(Roots, 0, EdgeInfo());
+ }
+ Order.assign(Mask.size(), Mask.size());
+ for (unsigned I = 0, E = Mask.size(); I < E; ++I)
+ if (MaskOrder[I] != UndefMaskElem)
+ Order[MaskOrder[I]] = I;
+ fixupOrderingIndices(Order);
+}
+
+Optional<BoUpSLP::OrdersType>
+BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
+ assert(TE.State == TreeEntry::NeedToGather && "Expected gather node only.");
+ unsigned NumScalars = TE.Scalars.size();
+ OrdersType CurrentOrder(NumScalars, NumScalars);
+ SmallVector<int> Positions;
+ SmallBitVector UsedPositions(NumScalars);
+ const TreeEntry *STE = nullptr;
+ // Try to find all gathered scalars that are gets vectorized in other
+ // vectorize node. Here we can have only one single tree vector node to
+ // correctly identify order of the gathered scalars.
+ for (unsigned I = 0; I < NumScalars; ++I) {
+ Value *V = TE.Scalars[I];
+ if (!isa<LoadInst, ExtractElementInst, ExtractValueInst>(V))
+ continue;
+ if (const auto *LocalSTE = getTreeEntry(V)) {
+ if (!STE)
+ STE = LocalSTE;
+ else if (STE != LocalSTE)
+ // Take the order only from the single vector node.
+ return None;
+ unsigned Lane =
+ std::distance(STE->Scalars.begin(), find(STE->Scalars, V));
+ if (Lane >= NumScalars)
+ return None;
+ if (CurrentOrder[Lane] != NumScalars) {
+ if (Lane != I)
+ continue;
+ UsedPositions.reset(CurrentOrder[Lane]);
+ }
+ // The partial identity (where only some elements of the gather node are
+ // in the identity order) is good.
+ CurrentOrder[Lane] = I;
+ UsedPositions.set(I);
+ }
+ }
+ // Need to keep the order if we have a vector entry and at least 2 scalars or
+ // the vectorized entry has just 2 scalars.
+ if (STE && (UsedPositions.count() > 1 || STE->Scalars.size() == 2)) {
+ auto &&IsIdentityOrder = [NumScalars](ArrayRef<unsigned> CurrentOrder) {
+ for (unsigned I = 0; I < NumScalars; ++I)
+ if (CurrentOrder[I] != I && CurrentOrder[I] != NumScalars)
+ return false;
+ return true;
+ };
+ if (IsIdentityOrder(CurrentOrder)) {
+ CurrentOrder.clear();
+ return CurrentOrder;
+ }
+ auto *It = CurrentOrder.begin();
+ for (unsigned I = 0; I < NumScalars;) {
+ if (UsedPositions.test(I)) {
+ ++I;
+ continue;
+ }
+ if (*It == NumScalars) {
+ *It = I;
+ ++I;
+ }
+ ++It;
+ }
+ return CurrentOrder;
+ }
+ return None;
+}
+void BoUpSLP::reorderTopToBottom() {
+ // Maps VF to the graph nodes.
+ DenseMap<unsigned, SmallPtrSet<TreeEntry *, 4>> VFToOrderedEntries;
+ // ExtractElement gather nodes which can be vectorized and need to handle
+ // their ordering.
+ DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
+ // Find all reorderable nodes with the given VF.
+ // Currently the are vectorized loads,extracts + some gathering of extracts.
+ for_each(VectorizableTree, [this, &VFToOrderedEntries, &GathersToOrders](
+ const std::unique_ptr<TreeEntry> &TE) {
+ // No need to reorder if need to shuffle reuses, still need to shuffle the
+ // node.
+ if (!TE->ReuseShuffleIndices.empty())
+ return;
+ if (TE->State == TreeEntry::Vectorize &&
+ isa<LoadInst, ExtractElementInst, ExtractValueInst, StoreInst,
+ InsertElementInst>(TE->getMainOp()) &&
+ !TE->isAltShuffle()) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ return;
+ }
+ if (TE->State == TreeEntry::NeedToGather) {
+ if (TE->getOpcode() == Instruction::ExtractElement &&
+ !TE->isAltShuffle() &&
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
+ ->getVectorOperandType()) &&
+ allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
+ // Check that gather of extractelements can be represented as
+ // just a shuffle of a single vector.
+ OrdersType CurrentOrder;
+ bool Reuse =
+ canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
+ if (Reuse || !CurrentOrder.empty()) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), CurrentOrder);
+ return;
+ }
+ }
+ if (Optional<OrdersType> CurrentOrder =
+ findReusedOrderedScalars(*TE.get())) {
+ VFToOrderedEntries[TE->Scalars.size()].insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
+ }
+ }
+ });
+
+ // Reorder the graph nodes according to their vectorization factor.
+ for (unsigned VF = VectorizableTree.front()->Scalars.size(); VF > 1;
+ VF /= 2) {
+ auto It = VFToOrderedEntries.find(VF);
+ if (It == VFToOrderedEntries.end())
+ continue;
+ // Try to find the most profitable order. We just are looking for the most
+ // used order and reorder scalar elements in the nodes according to this
+ // mostly used order.
+ const SmallPtrSetImpl<TreeEntry *> &OrderedEntries = It->getSecond();
+ // All operands are reordered and used only in this node - propagate the
+ // most used order to the user node.
+ MapVector<OrdersType, unsigned,
+ DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
+ OrdersUses;
+ SmallPtrSet<const TreeEntry *, 4> VisitedOps;
+ for (const TreeEntry *OpTE : OrderedEntries) {
+ // No need to reorder this nodes, still need to extend and to use shuffle,
+ // just need to merge reordering shuffle and the reuse shuffle.
+ if (!OpTE->ReuseShuffleIndices.empty())
+ continue;
+ // Count number of orders uses.
+ const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
+ if (OpTE->State == TreeEntry::NeedToGather)
+ return GathersToOrders.find(OpTE)->second;
+ return OpTE->ReorderIndices;
+ }();
+ // Stores actually store the mask, not the order, need to invert.
+ if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
+ OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
+ SmallVector<int> Mask;
+ inversePermutation(Order, Mask);
+ unsigned E = Order.size();
+ OrdersType CurrentOrder(E, E);
+ transform(Mask, CurrentOrder.begin(), [E](int Idx) {
+ return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
+ });
+ fixupOrderingIndices(CurrentOrder);
+ ++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
+ } else {
+ ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
+ }
+ }
+ // Set order of the user node.
+ if (OrdersUses.empty())
+ continue;
+ // Choose the most used order.
+ ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
+ unsigned Cnt = OrdersUses.front().second;
+ for (const auto &Pair : drop_begin(OrdersUses)) {
+ if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
+ BestOrder = Pair.first;
+ Cnt = Pair.second;
+ }
+ }
+ // Set order of the user node.
+ if (BestOrder.empty())
+ continue;
+ SmallVector<int> Mask;
+ inversePermutation(BestOrder, Mask);
+ SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
+ unsigned E = BestOrder.size();
+ transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
+ return I < E ? static_cast<int>(I) : UndefMaskElem;
+ });
+ // Do an actual reordering, if profitable.
+ for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
+ // Just do the reordering for the nodes with the given VF.
+ if (TE->Scalars.size() != VF) {
+ if (TE->ReuseShuffleIndices.size() == VF) {
+ // Need to reorder the reuses masks of the operands with smaller VF to
+ // be able to find the match between the graph nodes and scalar
+ // operands of the given node during vectorization/cost estimation.
+ assert(all_of(TE->UserTreeIndices,
+ [VF, &TE](const EdgeInfo &EI) {
+ return EI.UserTE->Scalars.size() == VF ||
+ EI.UserTE->Scalars.size() ==
+ TE->Scalars.size();
+ }) &&
+ "All users must be of VF size.");
+ // Update ordering of the operands with the smaller VF than the given
+ // one.
+ reorderReuses(TE->ReuseShuffleIndices, Mask);
+ }
+ continue;
+ }
+ if (TE->State == TreeEntry::Vectorize &&
+ isa<ExtractElementInst, ExtractValueInst, LoadInst, StoreInst,
+ InsertElementInst>(TE->getMainOp()) &&
+ !TE->isAltShuffle()) {
+ // Build correct orders for extract{element,value}, loads and
+ // stores.
+ reorderOrder(TE->ReorderIndices, Mask);
+ if (isa<InsertElementInst, StoreInst>(TE->getMainOp()))
+ TE->reorderOperands(Mask);
+ } else {
+ // Reorder the node and its operands.
+ TE->reorderOperands(Mask);
+ assert(TE->ReorderIndices.empty() &&
+ "Expected empty reorder sequence.");
+ reorderScalars(TE->Scalars, Mask);
+ }
+ if (!TE->ReuseShuffleIndices.empty()) {
+ // Apply reversed order to keep the original ordering of the reused
+ // elements to avoid extra reorder indices shuffling.
+ OrdersType CurrentOrder;
+ reorderOrder(CurrentOrder, MaskOrder);
+ SmallVector<int> NewReuses;
+ inversePermutation(CurrentOrder, NewReuses);
+ addMask(NewReuses, TE->ReuseShuffleIndices);
+ TE->ReuseShuffleIndices.swap(NewReuses);
+ }
+ }
+ }
+}
+
+void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
+ SetVector<TreeEntry *> OrderedEntries;
+ DenseMap<const TreeEntry *, OrdersType> GathersToOrders;
+ // Find all reorderable leaf nodes with the given VF.
+ // Currently the are vectorized loads,extracts without alternate operands +
+ // some gathering of extracts.
+ SmallVector<TreeEntry *> NonVectorized;
+ for_each(VectorizableTree, [this, &OrderedEntries, &GathersToOrders,
+ &NonVectorized](
+ const std::unique_ptr<TreeEntry> &TE) {
+ if (TE->State != TreeEntry::Vectorize)
+ NonVectorized.push_back(TE.get());
+ // No need to reorder if need to shuffle reuses, still need to shuffle the
+ // node.
+ if (!TE->ReuseShuffleIndices.empty())
+ return;
+ if (TE->State == TreeEntry::Vectorize &&
+ isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE->getMainOp()) &&
+ !TE->isAltShuffle()) {
+ OrderedEntries.insert(TE.get());
+ return;
+ }
+ if (TE->State == TreeEntry::NeedToGather) {
+ if (TE->getOpcode() == Instruction::ExtractElement &&
+ !TE->isAltShuffle() &&
+ isa<FixedVectorType>(cast<ExtractElementInst>(TE->getMainOp())
+ ->getVectorOperandType()) &&
+ allSameType(TE->Scalars) && allSameBlock(TE->Scalars)) {
+ // Check that gather of extractelements can be represented as
+ // just a shuffle of a single vector with a single user only.
+ OrdersType CurrentOrder;
+ bool Reuse =
+ canReuseExtract(TE->Scalars, TE->getMainOp(), CurrentOrder);
+ if ((Reuse || !CurrentOrder.empty()) &&
+ !any_of(VectorizableTree,
+ [&TE](const std::unique_ptr<TreeEntry> &Entry) {
+ return Entry->State == TreeEntry::NeedToGather &&
+ Entry.get() != TE.get() &&
+ Entry->isSame(TE->Scalars);
+ })) {
+ OrderedEntries.insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), CurrentOrder);
+ return;
+ }
+ }
+ if (Optional<OrdersType> CurrentOrder =
+ findReusedOrderedScalars(*TE.get())) {
+ OrderedEntries.insert(TE.get());
+ GathersToOrders.try_emplace(TE.get(), *CurrentOrder);
+ }
+ }
+ });
+
+ // Checks if the operands of the users are reordarable and have only single
+ // use.
+ auto &&CheckOperands =
+ [this, &NonVectorized](const auto &Data,
+ SmallVectorImpl<TreeEntry *> &GatherOps) {
+ for (unsigned I = 0, E = Data.first->getNumOperands(); I < E; ++I) {
+ if (any_of(Data.second,
+ [I](const std::pair<unsigned, TreeEntry *> &OpData) {
+ return OpData.first == I &&
+ OpData.second->State == TreeEntry::Vectorize;
+ }))
+ continue;
+ ArrayRef<Value *> VL = Data.first->getOperand(I);
+ const TreeEntry *TE = nullptr;
+ const auto *It = find_if(VL, [this, &TE](Value *V) {
+ TE = getTreeEntry(V);
+ return TE;
+ });
+ if (It != VL.end() && TE->isSame(VL))
+ return false;
+ TreeEntry *Gather = nullptr;
+ if (count_if(NonVectorized, [VL, &Gather](TreeEntry *TE) {
+ assert(TE->State != TreeEntry::Vectorize &&
+ "Only non-vectorized nodes are expected.");
+ if (TE->isSame(VL)) {
+ Gather = TE;
+ return true;
+ }
+ return false;
+ }) > 1)
+ return false;
+ if (Gather)
+ GatherOps.push_back(Gather);
+ }
+ return true;
+ };
+ // 1. Propagate order to the graph nodes, which use only reordered nodes.
+ // I.e., if the node has operands, that are reordered, try to make at least
+ // one operand order in the natural order and reorder others + reorder the
+ // user node itself.
+ SmallPtrSet<const TreeEntry *, 4> Visited;
+ while (!OrderedEntries.empty()) {
+ // 1. Filter out only reordered nodes.
+ // 2. If the entry has multiple uses - skip it and jump to the next node.
+ MapVector<TreeEntry *, SmallVector<std::pair<unsigned, TreeEntry *>>> Users;
+ SmallVector<TreeEntry *> Filtered;
+ for (TreeEntry *TE : OrderedEntries) {
+ if (!(TE->State == TreeEntry::Vectorize ||
+ (TE->State == TreeEntry::NeedToGather &&
+ GathersToOrders.count(TE))) ||
+ TE->UserTreeIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
+ !all_of(drop_begin(TE->UserTreeIndices),
+ [TE](const EdgeInfo &EI) {
+ return EI.UserTE == TE->UserTreeIndices.front().UserTE;
+ }) ||
+ !Visited.insert(TE).second) {
+ Filtered.push_back(TE);
+ continue;
+ }
+ // Build a map between user nodes and their operands order to speedup
+ // search. The graph currently does not provide this dependency directly.
+ for (EdgeInfo &EI : TE->UserTreeIndices) {
+ TreeEntry *UserTE = EI.UserTE;
+ auto It = Users.find(UserTE);
+ if (It == Users.end())
+ It = Users.insert({UserTE, {}}).first;
+ It->second.emplace_back(EI.EdgeIdx, TE);
+ }
+ }
+ // Erase filtered entries.
+ for_each(Filtered,
+ [&OrderedEntries](TreeEntry *TE) { OrderedEntries.remove(TE); });
+ for (const auto &Data : Users) {
+ // Check that operands are used only in the User node.
+ SmallVector<TreeEntry *> GatherOps;
+ if (!CheckOperands(Data, GatherOps)) {
+ for_each(Data.second,
+ [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
+ OrderedEntries.remove(Op.second);
+ });
+ continue;
+ }
+ // All operands are reordered and used only in this node - propagate the
+ // most used order to the user node.
+ MapVector<OrdersType, unsigned,
+ DenseMap<OrdersType, unsigned, OrdersTypeDenseMapInfo>>
+ OrdersUses;
+ SmallPtrSet<const TreeEntry *, 4> VisitedOps;
+ for (const auto &Op : Data.second) {
+ TreeEntry *OpTE = Op.second;
+ if (!OpTE->ReuseShuffleIndices.empty() ||
+ (IgnoreReorder && OpTE == VectorizableTree.front().get()))
+ continue;
+ const auto &Order = [OpTE, &GathersToOrders]() -> const OrdersType & {
+ if (OpTE->State == TreeEntry::NeedToGather)
+ return GathersToOrders.find(OpTE)->second;
+ return OpTE->ReorderIndices;
+ }();
+ // Stores actually store the mask, not the order, need to invert.
+ if (OpTE->State == TreeEntry::Vectorize && !OpTE->isAltShuffle() &&
+ OpTE->getOpcode() == Instruction::Store && !Order.empty()) {
+ SmallVector<int> Mask;
+ inversePermutation(Order, Mask);
+ unsigned E = Order.size();
+ OrdersType CurrentOrder(E, E);
+ transform(Mask, CurrentOrder.begin(), [E](int Idx) {
+ return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
+ });
+ fixupOrderingIndices(CurrentOrder);
+ ++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
+ } else {
+ ++OrdersUses.insert(std::make_pair(Order, 0)).first->second;
+ }
+ if (VisitedOps.insert(OpTE).second)
+ OrdersUses.insert(std::make_pair(OrdersType(), 0)).first->second +=
+ OpTE->UserTreeIndices.size();
+ assert(OrdersUses[{}] > 0 && "Counter cannot be less than 0.");
+ --OrdersUses[{}];
+ }
+ // If no orders - skip current nodes and jump to the next one, if any.
+ if (OrdersUses.empty()) {
+ for_each(Data.second,
+ [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
+ OrderedEntries.remove(Op.second);
+ });
+ continue;
+ }
+ // Choose the best order.
+ ArrayRef<unsigned> BestOrder = OrdersUses.front().first;
+ unsigned Cnt = OrdersUses.front().second;
+ for (const auto &Pair : drop_begin(OrdersUses)) {
+ if (Cnt < Pair.second || (Cnt == Pair.second && Pair.first.empty())) {
+ BestOrder = Pair.first;
+ Cnt = Pair.second;
+ }
+ }
+ // Set order of the user node (reordering of operands and user nodes).
+ if (BestOrder.empty()) {
+ for_each(Data.second,
+ [&OrderedEntries](const std::pair<unsigned, TreeEntry *> &Op) {
+ OrderedEntries.remove(Op.second);
+ });
+ continue;
+ }
+ // Erase operands from OrderedEntries list and adjust their orders.
+ VisitedOps.clear();
+ SmallVector<int> Mask;
+ inversePermutation(BestOrder, Mask);
+ SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
+ unsigned E = BestOrder.size();
+ transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
+ return I < E ? static_cast<int>(I) : UndefMaskElem;
+ });
+ for (const std::pair<unsigned, TreeEntry *> &Op : Data.second) {
+ TreeEntry *TE = Op.second;
+ OrderedEntries.remove(TE);
+ if (!VisitedOps.insert(TE).second)
+ continue;
+ if (!TE->ReuseShuffleIndices.empty() && TE->ReorderIndices.empty()) {
+ // Just reorder reuses indices.
+ reorderReuses(TE->ReuseShuffleIndices, Mask);
+ continue;
+ }
+ // Gathers are processed separately.
+ if (TE->State != TreeEntry::Vectorize)
+ continue;
+ assert((BestOrder.size() == TE->ReorderIndices.size() ||
+ TE->ReorderIndices.empty()) &&
+ "Non-matching sizes of user/operand entries.");
+ reorderOrder(TE->ReorderIndices, Mask);
+ }
+ // For gathers just need to reorder its scalars.
+ for (TreeEntry *Gather : GatherOps) {
+ assert(Gather->ReorderIndices.empty() &&
+ "Unexpected reordering of gathers.");
+ if (!Gather->ReuseShuffleIndices.empty()) {
+ // Just reorder reuses indices.
+ reorderReuses(Gather->ReuseShuffleIndices, Mask);
+ continue;
+ }
+ reorderScalars(Gather->Scalars, Mask);
+ OrderedEntries.remove(Gather);
+ }
+ // Reorder operands of the user node and set the ordering for the user
+ // node itself.
+ if (Data.first->State != TreeEntry::Vectorize ||
+ !isa<ExtractElementInst, ExtractValueInst, LoadInst>(
+ Data.first->getMainOp()) ||
+ Data.first->isAltShuffle())
+ Data.first->reorderOperands(Mask);
+ if (!isa<InsertElementInst, StoreInst>(Data.first->getMainOp()) ||
+ Data.first->isAltShuffle()) {
+ reorderScalars(Data.first->Scalars, Mask);
+ reorderOrder(Data.first->ReorderIndices, MaskOrder);
+ if (Data.first->ReuseShuffleIndices.empty() &&
+ !Data.first->ReorderIndices.empty() &&
+ !Data.first->isAltShuffle()) {
+ // Insert user node to the list to try to sink reordering deeper in
+ // the graph.
+ OrderedEntries.insert(Data.first);
+ }
+ } else {
+ reorderOrder(Data.first->ReorderIndices, Mask);
+ }
+ }
+ }
+ // If the reordering is unnecessary, just remove the reorder.
+ if (IgnoreReorder && !VectorizableTree.front()->ReorderIndices.empty() &&
+ VectorizableTree.front()->ReuseShuffleIndices.empty())
+ VectorizableTree.front()->ReorderIndices.clear();
+}
+
+void BoUpSLP::buildExternalUses(
+ const ExtraValueToDebugLocsMap &ExternallyUsedValues) {
// Collect the values that we need to extract from the tree.
for (auto &TEPtr : VectorizableTree) {
TreeEntry *Entry = TEPtr.get();
@@ -2636,6 +3215,9 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
if (!UserInst)
continue;
+ if (isDeleted(UserInst))
+ continue;
+
// Skip in-tree scalars that become vectors
if (TreeEntry *UseEntry = getTreeEntry(U)) {
Value *UseScalar = UseEntry->Scalars[0];
@@ -2664,14 +3246,120 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
}
}
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
+ ArrayRef<Value *> UserIgnoreLst) {
+ deleteTree();
+ UserIgnoreList = UserIgnoreLst;
+ if (!allSameType(Roots))
+ return;
+ buildTree_rec(Roots, 0, EdgeInfo());
+}
+
+namespace {
+/// Tracks the state we can represent the loads in the given sequence.
+enum class LoadsState { Gather, Vectorize, ScatterVectorize };
+} // anonymous namespace
+
+/// Checks if the given array of loads can be represented as a vectorized,
+/// scatter or just simple gather.
+static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
+ const TargetTransformInfo &TTI,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &Order,
+ SmallVectorImpl<Value *> &PointerOps) {
+ // Check that a vectorized load would load the same memory as a scalar
+ // load. For example, we don't want to vectorize loads that are smaller
+ // than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
+ // treats loading/storing it as an i8 struct. If we vectorize loads/stores
+ // from such a struct, we read/write packed bits disagreeing with the
+ // unvectorized version.
+ Type *ScalarTy = VL0->getType();
+
+ if (DL.getTypeSizeInBits(ScalarTy) != DL.getTypeAllocSizeInBits(ScalarTy))
+ return LoadsState::Gather;
+
+ // Make sure all loads in the bundle are simple - we can't vectorize
+ // atomic or volatile loads.
+ PointerOps.clear();
+ PointerOps.resize(VL.size());
+ auto *POIter = PointerOps.begin();
+ for (Value *V : VL) {
+ auto *L = cast<LoadInst>(V);
+ if (!L->isSimple())
+ return LoadsState::Gather;
+ *POIter = L->getPointerOperand();
+ ++POIter;
+ }
+
+ Order.clear();
+ // Check the order of pointer operands.
+ if (llvm::sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order)) {
+ Value *Ptr0;
+ Value *PtrN;
+ if (Order.empty()) {
+ Ptr0 = PointerOps.front();
+ PtrN = PointerOps.back();
+ } else {
+ Ptr0 = PointerOps[Order.front()];
+ PtrN = PointerOps[Order.back()];
+ }
+ Optional<int> Diff =
+ getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, DL, SE);
+ // Check that the sorted loads are consecutive.
+ if (static_cast<unsigned>(*Diff) == VL.size() - 1)
+ return LoadsState::Vectorize;
+ Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
+ for (Value *V : VL)
+ CommonAlignment =
+ commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
+ if (TTI.isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
+ CommonAlignment))
+ return LoadsState::ScatterVectorize;
+ }
+
+ return LoadsState::Gather;
+}
+
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
+ SmallVector<int> ReuseShuffleIndicies;
+ SmallVector<Value *> UniqueValues;
+ auto &&TryToFindDuplicates = [&VL, &ReuseShuffleIndicies, &UniqueValues,
+ &UserTreeIdx,
+ this](const InstructionsState &S) {
+ // Check that every instruction appears once in this bundle.
+ DenseMap<Value *, unsigned> UniquePositions;
+ for (Value *V : VL) {
+ auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
+ ReuseShuffleIndicies.emplace_back(isa<UndefValue>(V) ? -1
+ : Res.first->second);
+ if (Res.second)
+ UniqueValues.emplace_back(V);
+ }
+ size_t NumUniqueScalarValues = UniqueValues.size();
+ if (NumUniqueScalarValues == VL.size()) {
+ ReuseShuffleIndicies.clear();
+ } else {
+ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
+ if (NumUniqueScalarValues <= 1 ||
+ !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ return false;
+ }
+ VL = UniqueValues;
+ }
+ return true;
+ };
+
InstructionsState S = getSameOpcode(VL);
if (Depth == RecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
@@ -2680,7 +3368,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
isa<ScalableVectorType>(
cast<ExtractElementInst>(S.OpValue)->getVectorOperandType())) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
@@ -2700,9 +3390,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// If all of the operands are identical or constant we have a simple solution.
- if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
+ // If we deal with insert/extract instructions, they all must have constant
+ // indices, otherwise we should gather them, not try to vectorize.
+ if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode() ||
+ (isa<InsertElementInst, ExtractValueInst, ExtractElementInst>(S.MainOp) &&
+ !all_of(VL, isVectorLikeInstWithConstOps))) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
@@ -2724,7 +3420,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
if (!E->isSame(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
// Record the reuse of the tree node. FIXME, currently this is only used to
@@ -2743,7 +3441,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (getTreeEntry(I)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V
<< ") is already in tree.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
@@ -2754,7 +3454,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *V : VL) {
if (MustGather.count(V) || is_contained(UserIgnoreList, V)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
@@ -2773,28 +3475,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
// Check that every instruction appears once in this bundle.
- SmallVector<unsigned, 4> ReuseShuffleIndicies;
- SmallVector<Value *, 4> UniqueValues;
- DenseMap<Value *, unsigned> UniquePositions;
- for (Value *V : VL) {
- auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
- ReuseShuffleIndicies.emplace_back(Res.first->second);
- if (Res.second)
- UniqueValues.emplace_back(V);
- }
- size_t NumUniqueScalarValues = UniqueValues.size();
- if (NumUniqueScalarValues == VL.size()) {
- ReuseShuffleIndicies.clear();
- } else {
- LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
- if (NumUniqueScalarValues <= 1 ||
- !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
- return;
- }
- VL = UniqueValues;
- }
+ if (!TryToFindDuplicates(S))
+ return;
auto &BSRef = BlocksSchedules[BB];
if (!BSRef)
@@ -2867,7 +3549,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
if (Reuse) {
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
- ++NumOpsWantToKeepOriginalOrder;
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
// This is a special case, as it does not gather, but at the same time
@@ -2885,12 +3566,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
dbgs() << " " << Idx;
dbgs() << "\n";
});
+ fixupOrderingIndices(CurrentOrder);
// Insert new order with initial value 0, if it does not exist,
// otherwise return the iterator to the existing one.
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
- findRootOrder(CurrentOrder);
- ++NumOpsWantToKeepOrder[CurrentOrder];
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
@@ -2910,8 +3590,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Check that we have a buildvector and not a shuffle of 2 or more
// different vectors.
ValueSet SourceVectors;
- for (Value *V : VL)
+ int MinIdx = std::numeric_limits<int>::max();
+ for (Value *V : VL) {
SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
+ Optional<int> Idx = *getInsertIndex(V, 0);
+ if (!Idx || *Idx == UndefMaskElem)
+ continue;
+ MinIdx = std::min(MinIdx, *Idx);
+ }
if (count_if(VL, [&SourceVectors](Value *V) {
return !SourceVectors.contains(V);
@@ -2919,13 +3605,35 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// Found 2nd source vector - cancel.
LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
"different source vectors.\n");
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
BS.cancelScheduling(VL, VL0);
return;
}
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx);
+ auto OrdCompare = [](const std::pair<int, int> &P1,
+ const std::pair<int, int> &P2) {
+ return P1.first > P2.first;
+ };
+ PriorityQueue<std::pair<int, int>, SmallVector<std::pair<int, int>>,
+ decltype(OrdCompare)>
+ Indices(OrdCompare);
+ for (int I = 0, E = VL.size(); I < E; ++I) {
+ Optional<int> Idx = *getInsertIndex(VL[I], 0);
+ if (!Idx || *Idx == UndefMaskElem)
+ continue;
+ Indices.emplace(*Idx, I);
+ }
+ OrdersType CurrentOrder(VL.size(), VL.size());
+ bool IsIdentity = true;
+ for (int I = 0, E = VL.size(); I < E; ++I) {
+ CurrentOrder[Indices.top().second] = I;
+ IsIdentity &= Indices.top().second == I;
+ Indices.pop();
+ }
+ if (IsIdentity)
+ CurrentOrder.clear();
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ None, CurrentOrder);
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
constexpr int NumOps = 2;
@@ -2936,7 +3644,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TE->setOperand(I, VectorOperands[I]);
}
- buildTree_rec(VectorOperands[NumOps - 1], Depth + 1, {TE, 0});
+ buildTree_rec(VectorOperands[NumOps - 1], Depth + 1, {TE, NumOps - 1});
return;
}
case Instruction::Load: {
@@ -2946,90 +3654,52 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.
- Type *ScalarTy = VL0->getType();
-
- if (DL->getTypeSizeInBits(ScalarTy) !=
- DL->getTypeAllocSizeInBits(ScalarTy)) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
- return;
- }
-
- // Make sure all loads in the bundle are simple - we can't vectorize
- // atomic or volatile loads.
- SmallVector<Value *, 4> PointerOps(VL.size());
- auto POIter = PointerOps.begin();
- for (Value *V : VL) {
- auto *L = cast<LoadInst>(V);
- if (!L->isSimple()) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
- return;
- }
- *POIter = L->getPointerOperand();
- ++POIter;
- }
-
+ SmallVector<Value *> PointerOps;
OrdersType CurrentOrder;
- // Check the order of pointer operands.
- if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
- Value *Ptr0;
- Value *PtrN;
+ TreeEntry *TE = nullptr;
+ switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, CurrentOrder,
+ PointerOps)) {
+ case LoadsState::Vectorize:
if (CurrentOrder.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
+ // Original loads are consecutive and does not require reordering.
+ TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {
- Ptr0 = PointerOps[CurrentOrder.front()];
- PtrN = PointerOps[CurrentOrder.back()];
- }
- Optional<int> Diff = getPointersDiff(
- ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
- // Check that the sorted loads are consecutive.
- if (static_cast<unsigned>(*Diff) == VL.size() - 1) {
- if (CurrentOrder.empty()) {
- // Original loads are consecutive and does not require reordering.
- ++NumOpsWantToKeepOriginalOrder;
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
- UserTreeIdx, ReuseShuffleIndicies);
- TE->setOperandsInOrder();
- LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
- } else {
- // Need to reorder.
- TreeEntry *TE =
- newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies, CurrentOrder);
- TE->setOperandsInOrder();
- LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
- findRootOrder(CurrentOrder);
- ++NumOpsWantToKeepOrder[CurrentOrder];
- }
- return;
- }
- Align CommonAlignment = cast<LoadInst>(VL0)->getAlign();
- for (Value *V : VL)
- CommonAlignment =
- commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
- if (TTI->isLegalMaskedGather(FixedVectorType::get(ScalarTy, VL.size()),
- CommonAlignment)) {
- // Vectorizing non-consecutive loads with `llvm.masked.gather`.
- TreeEntry *TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle,
- S, UserTreeIdx, ReuseShuffleIndicies);
- TE->setOperandsInOrder();
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
- LLVM_DEBUG(dbgs()
- << "SLP: added a vector of non-consecutive loads.\n");
- return;
+ fixupOrderingIndices(CurrentOrder);
+ // Need to reorder.
+ TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies, CurrentOrder);
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
}
+ TE->setOperandsInOrder();
+ break;
+ case LoadsState::ScatterVectorize:
+ // Vectorizing non-consecutive loads with `llvm.masked.gather`.
+ TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
+ UserTreeIdx, ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
+ break;
+ case LoadsState::Gather:
+ BS.cancelScheduling(VL, VL0);
+ newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+#ifndef NDEBUG
+ Type *ScalarTy = VL0->getType();
+ if (DL->getTypeSizeInBits(ScalarTy) !=
+ DL->getTypeAllocSizeInBits(ScalarTy))
+ LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+ else if (any_of(VL, [](Value *V) {
+ return !cast<LoadInst>(V)->isSimple();
+ }))
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
+ else
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+#endif // NDEBUG
+ break;
}
-
- LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
return;
}
case Instruction::ZExt:
@@ -3213,15 +3883,40 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
- TE->setOperandsInOrder();
- for (unsigned i = 0, e = 2; i < e; ++i) {
- ValueList Operands;
- // Prepare the operand vector.
- for (Value *V : VL)
- Operands.push_back(cast<Instruction>(V)->getOperand(i));
-
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ SmallVector<ValueList, 2> Operands(2);
+ // Prepare the operand vector for pointer operands.
+ for (Value *V : VL)
+ Operands.front().push_back(
+ cast<GetElementPtrInst>(V)->getPointerOperand());
+ TE->setOperand(0, Operands.front());
+ // Need to cast all indices to the same type before vectorization to
+ // avoid crash.
+ // Required to be able to find correct matches between different gather
+ // nodes and reuse the vectorized values rather than trying to gather them
+ // again.
+ int IndexIdx = 1;
+ Type *VL0Ty = VL0->getOperand(IndexIdx)->getType();
+ Type *Ty = all_of(VL,
+ [VL0Ty, IndexIdx](Value *V) {
+ return VL0Ty == cast<GetElementPtrInst>(V)
+ ->getOperand(IndexIdx)
+ ->getType();
+ })
+ ? VL0Ty
+ : DL->getIndexType(cast<GetElementPtrInst>(VL0)
+ ->getPointerOperandType()
+ ->getScalarType());
+ // Prepare the operand vector.
+ for (Value *V : VL) {
+ auto *Op = cast<Instruction>(V)->getOperand(IndexIdx);
+ auto *CI = cast<ConstantInt>(Op);
+ Operands.back().push_back(ConstantExpr::getIntegerCast(
+ CI, Ty, CI->getValue().isSignBitSet()));
}
+ TE->setOperand(IndexIdx, Operands.back());
+
+ for (unsigned I = 0, Ops = Operands.size(); I < Ops; ++I)
+ buildTree_rec(Operands[I], Depth + 1, {TE, I});
return;
}
case Instruction::Store: {
@@ -3276,21 +3971,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
if (CurrentOrder.empty()) {
// Original stores are consecutive and does not require reordering.
- ++NumOpsWantToKeepOriginalOrder;
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
UserTreeIdx, ReuseShuffleIndicies);
TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
} else {
+ fixupOrderingIndices(CurrentOrder);
TreeEntry *TE =
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies, CurrentOrder);
TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
- findRootOrder(CurrentOrder);
- ++NumOpsWantToKeepOrder[CurrentOrder];
}
return;
}
@@ -3321,7 +4014,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
Function *F = CI->getCalledFunction();
- unsigned NumArgs = CI->getNumArgOperands();
+ unsigned NumArgs = CI->arg_size();
SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
for (unsigned j = 0; j != NumArgs; ++j)
if (hasVectorInstrinsicScalarOpd(ID, j))
@@ -3373,7 +4066,11 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
TE->setOperandsInOrder();
- for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) {
+ // For scalar operands no need to to create an entry since no need to
+ // vectorize it.
+ if (hasVectorInstrinsicScalarOpd(ID, i))
+ continue;
ValueList Operands;
// Prepare the operand vector.
for (Value *V : VL) {
@@ -3548,7 +4245,7 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
FastMathFlags FMF;
if (auto *FPCI = dyn_cast<FPMathOperator>(CI))
FMF = FPCI->getFastMathFlags();
- SmallVector<const Value *> Arguments(CI->arg_begin(), CI->arg_end());
+ SmallVector<const Value *> Arguments(CI->args());
IntrinsicCostAttributes CostAttrs(ID, VecTy, Arguments, VecTys, FMF,
dyn_cast<IntrinsicInst>(CI));
auto IntrinsicCost =
@@ -3621,25 +4318,42 @@ computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
return Cost;
}
-/// Shuffles \p Mask in accordance with the given \p SubMask.
-static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
- if (SubMask.empty())
- return;
- if (Mask.empty()) {
- Mask.append(SubMask.begin(), SubMask.end());
- return;
- }
- SmallVector<int, 4> NewMask(SubMask.size(), SubMask.size());
- int TermValue = std::min(Mask.size(), SubMask.size());
- for (int I = 0, E = SubMask.size(); I < E; ++I) {
- if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
- Mask[SubMask[I]] >= TermValue) {
- NewMask[I] = UndefMaskElem;
- continue;
+/// Build shuffle mask for shuffle graph entries and lists of main and alternate
+/// operations operands.
+static void
+buildSuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
+ ArrayRef<int> ReusesIndices,
+ const function_ref<bool(Instruction *)> IsAltOp,
+ SmallVectorImpl<int> &Mask,
+ SmallVectorImpl<Value *> *OpScalars = nullptr,
+ SmallVectorImpl<Value *> *AltScalars = nullptr) {
+ unsigned Sz = VL.size();
+ Mask.assign(Sz, UndefMaskElem);
+ SmallVector<int> OrderMask;
+ if (!ReorderIndices.empty())
+ inversePermutation(ReorderIndices, OrderMask);
+ for (unsigned I = 0; I < Sz; ++I) {
+ unsigned Idx = I;
+ if (!ReorderIndices.empty())
+ Idx = OrderMask[I];
+ auto *OpInst = cast<Instruction>(VL[Idx]);
+ if (IsAltOp(OpInst)) {
+ Mask[I] = Sz + Idx;
+ if (AltScalars)
+ AltScalars->push_back(OpInst);
+ } else {
+ Mask[I] = Idx;
+ if (OpScalars)
+ OpScalars->push_back(OpInst);
}
- NewMask[I] = Mask[SubMask[I]];
}
- Mask.swap(NewMask);
+ if (!ReusesIndices.empty()) {
+ SmallVector<int> NewMask(ReusesIndices.size(), UndefMaskElem);
+ transform(ReusesIndices, NewMask.begin(), [&Mask](int Idx) {
+ return Idx != UndefMaskElem ? Mask[Idx] : UndefMaskElem;
+ });
+ Mask.swap(NewMask);
+ }
}
InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
@@ -3661,13 +4375,10 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (MinBWs.count(VL[0]))
VecTy = FixedVectorType::get(
IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
- auto *FinalVecTy = VecTy;
+ unsigned EntryVF = E->getVectorFactor();
+ auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
- unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- if (NeedToShuffleReuses)
- FinalVecTy =
- FixedVectorType::get(VecTy->getElementType(), ReuseShuffleNumbers);
// FIXME: it tries to fix a problem with MSVC buildbots.
TargetTransformInfo &TTIRef = *TTI;
auto &&AdjustExtractsCost = [this, &TTIRef, CostKind, VL, VecTy,
@@ -3785,7 +4496,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// shuffle of a single/two vectors the scalars are extracted from.
SmallVector<int> Mask;
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
- isShuffle(VL, Mask);
+ isFixedVectorShuffle(VL, Mask);
if (ShuffleKind.hasValue()) {
// Found the bunch of extractelement instructions that must be gathered
// into a vector and can be represented as a permutation elements in a
@@ -3803,6 +4514,92 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (NeedToShuffleReuses)
ReuseShuffleCost = TTI->getShuffleCost(
TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
+ // Improve gather cost for gather of loads, if we can group some of the
+ // loads into vector loads.
+ if (VL.size() > 2 && E->getOpcode() == Instruction::Load &&
+ !E->isAltShuffle()) {
+ BoUpSLP::ValueSet VectorizedLoads;
+ unsigned StartIdx = 0;
+ unsigned VF = VL.size() / 2;
+ unsigned VectorizedCnt = 0;
+ unsigned ScatterVectorizeCnt = 0;
+ const unsigned Sz = DL->getTypeSizeInBits(E->getMainOp()->getType());
+ for (unsigned MinVF = getMinVF(2 * Sz); VF >= MinVF; VF /= 2) {
+ for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
+ Cnt += VF) {
+ ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
+ if (!VectorizedLoads.count(Slice.front()) &&
+ !VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
+ SmallVector<Value *> PointerOps;
+ OrdersType CurrentOrder;
+ LoadsState LS = canVectorizeLoads(Slice, Slice.front(), *TTI, *DL,
+ *SE, CurrentOrder, PointerOps);
+ switch (LS) {
+ case LoadsState::Vectorize:
+ case LoadsState::ScatterVectorize:
+ // Mark the vectorized loads so that we don't vectorize them
+ // again.
+ if (LS == LoadsState::Vectorize)
+ ++VectorizedCnt;
+ else
+ ++ScatterVectorizeCnt;
+ VectorizedLoads.insert(Slice.begin(), Slice.end());
+ // If we vectorized initial block, no need to try to vectorize it
+ // again.
+ if (Cnt == StartIdx)
+ StartIdx += VF;
+ break;
+ case LoadsState::Gather:
+ break;
+ }
+ }
+ }
+ // Check if the whole array was vectorized already - exit.
+ if (StartIdx >= VL.size())
+ break;
+ // Found vectorizable parts - exit.
+ if (!VectorizedLoads.empty())
+ break;
+ }
+ if (!VectorizedLoads.empty()) {
+ InstructionCost GatherCost = 0;
+ unsigned NumParts = TTI->getNumberOfParts(VecTy);
+ bool NeedInsertSubvectorAnalysis =
+ !NumParts || (VL.size() / VF) > NumParts;
+ // Get the cost for gathered loads.
+ for (unsigned I = 0, End = VL.size(); I < End; I += VF) {
+ if (VectorizedLoads.contains(VL[I]))
+ continue;
+ GatherCost += getGatherCost(VL.slice(I, VF));
+ }
+ // The cost for vectorized loads.
+ InstructionCost ScalarsCost = 0;
+ for (Value *V : VectorizedLoads) {
+ auto *LI = cast<LoadInst>(V);
+ ScalarsCost += TTI->getMemoryOpCost(
+ Instruction::Load, LI->getType(), LI->getAlign(),
+ LI->getPointerAddressSpace(), CostKind, LI);
+ }
+ auto *LI = cast<LoadInst>(E->getMainOp());
+ auto *LoadTy = FixedVectorType::get(LI->getType(), VF);
+ Align Alignment = LI->getAlign();
+ GatherCost +=
+ VectorizedCnt *
+ TTI->getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
+ LI->getPointerAddressSpace(), CostKind, LI);
+ GatherCost += ScatterVectorizeCnt *
+ TTI->getGatherScatterOpCost(
+ Instruction::Load, LoadTy, LI->getPointerOperand(),
+ /*VariableMask=*/false, Alignment, CostKind, LI);
+ if (NeedInsertSubvectorAnalysis) {
+ // Add the cost for the subvectors insert.
+ for (int I = VF, E = VL.size(); I < E; I += VF)
+ GatherCost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy,
+ None, I, LoadTy);
+ }
+ return ReuseShuffleCost + GatherCost - ScalarsCost;
+ }
+ }
return ReuseShuffleCost + getGatherCost(VL);
}
InstructionCost CommonCost = 0;
@@ -3852,7 +4649,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
++Idx;
}
}
- Idx = ReuseShuffleNumbers;
+ Idx = EntryVF;
for (Value *V : VL) {
if (ShuffleOrOp == Instruction::ExtractElement) {
auto *EE = cast<ExtractElementInst>(V);
@@ -3895,29 +4692,33 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
return CommonCost;
}
case Instruction::InsertElement: {
+ assert(E->ReuseShuffleIndices.empty() &&
+ "Unique insertelements only are expected.");
auto *SrcVecTy = cast<FixedVectorType>(VL0->getType());
unsigned const NumElts = SrcVecTy->getNumElements();
unsigned const NumScalars = VL.size();
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
// TODO: Add support for Instruction::InsertValue.
- unsigned Offset = UINT_MAX;
+ SmallVector<int> Mask;
+ if (!E->ReorderIndices.empty()) {
+ inversePermutation(E->ReorderIndices, Mask);
+ Mask.append(NumElts - NumScalars, UndefMaskElem);
+ } else {
+ Mask.assign(NumElts, UndefMaskElem);
+ std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
+ }
+ unsigned Offset = *getInsertIndex(VL0, 0);
bool IsIdentity = true;
- SmallVector<int> ShuffleMask(NumElts, UndefMaskElem);
+ SmallVector<int> PrevMask(NumElts, UndefMaskElem);
+ Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
- Optional<int> InsertIdx = getInsertIndex(VL[I], 0);
+ Optional<int> InsertIdx = getInsertIndex(VL[PrevMask[I]], 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
continue;
- unsigned Idx = *InsertIdx;
- DemandedElts.setBit(Idx);
- if (Idx < Offset) {
- Offset = Idx;
- IsIdentity &= I == 0;
- } else {
- assert(Idx >= Offset && "Failed to find vector index offset");
- IsIdentity &= Idx - Offset == I;
- }
- ShuffleMask[Idx] = I;
+ DemandedElts.setBit(*InsertIdx);
+ IsIdentity &= *InsertIdx - Offset == I;
+ Mask[*InsertIdx - Offset] = I;
}
assert(Offset < NumElts && "Failed to find vector index offset");
@@ -3932,8 +4733,23 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TargetTransformInfo::SK_PermuteSingleSrc,
FixedVectorType::get(SrcVecTy->getElementType(), Sz));
} else if (!IsIdentity) {
- Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy,
- ShuffleMask);
+ auto *FirstInsert =
+ cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
+ return !is_contained(E->Scalars,
+ cast<Instruction>(V)->getOperand(0));
+ }));
+ if (isa<UndefValue>(FirstInsert->getOperand(0))) {
+ Cost += TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, SrcVecTy, Mask);
+ } else {
+ SmallVector<int> InsertMask(NumElts);
+ std::iota(InsertMask.begin(), InsertMask.end(), 0);
+ for (unsigned I = 0; I < NumElts; I++) {
+ if (Mask[I] != UndefMaskElem)
+ InsertMask[Offset + I] = NumElts + I;
+ }
+ Cost +=
+ TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVecTy, InsertMask);
+ }
}
return Cost;
@@ -3955,7 +4771,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy,
TTI::getCastContextHint(VL0), CostKind, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
// Calculate the cost of this instruction.
@@ -3980,7 +4796,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
@@ -4085,7 +4901,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK,
Op2VK, Op1VP, Op2VP, Operands, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost =
@@ -4103,7 +4919,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost(
Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecCost = TTI->getArithmeticInstrCost(
@@ -4117,7 +4933,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
Instruction::Load, ScalarTy, Alignment, 0, CostKind, VL0);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost;
InstructionCost VecLdCost;
@@ -4160,7 +4976,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InstructionCost ScalarEltCost =
TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
if (NeedToShuffleReuses) {
- CommonCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
+ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
}
InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost;
@@ -4215,14 +5031,16 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
TTI::CastContextHint::None, CostKind);
}
- SmallVector<int> Mask(E->Scalars.size());
- for (unsigned I = 0, End = E->Scalars.size(); I < End; ++I) {
- auto *OpInst = cast<Instruction>(E->Scalars[I]);
- assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
- Mask[I] = I + (OpInst->getOpcode() == E->getAltOpcode() ? End : 0);
- }
- VecCost +=
- TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, Mask, 0);
+ SmallVector<int> Mask;
+ buildSuffleEntryMask(
+ E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
+ [E](Instruction *I) {
+ assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ return I->getOpcode() == E->getAltOpcode();
+ },
+ Mask);
+ CommonCost =
+ TTI->getShuffleCost(TargetTransformInfo::SK_Select, FinalVecTy, Mask);
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
return CommonCost + VecCost - ScalarCost;
}
@@ -4231,13 +5049,30 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
}
}
-bool BoUpSLP::isFullyVectorizableTinyTree() const {
+bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const {
LLVM_DEBUG(dbgs() << "SLP: Check whether the tree with height "
<< VectorizableTree.size() << " is fully vectorizable .\n");
+ auto &&AreVectorizableGathers = [this](const TreeEntry *TE, unsigned Limit) {
+ SmallVector<int> Mask;
+ return TE->State == TreeEntry::NeedToGather &&
+ !any_of(TE->Scalars,
+ [this](Value *V) { return EphValues.contains(V); }) &&
+ (allConstant(TE->Scalars) || isSplat(TE->Scalars) ||
+ TE->Scalars.size() < Limit ||
+ (TE->getOpcode() == Instruction::ExtractElement &&
+ isFixedVectorShuffle(TE->Scalars, Mask)) ||
+ (TE->State == TreeEntry::NeedToGather &&
+ TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()));
+ };
+
// We only handle trees of heights 1 and 2.
if (VectorizableTree.size() == 1 &&
- VectorizableTree[0]->State == TreeEntry::Vectorize)
+ (VectorizableTree[0]->State == TreeEntry::Vectorize ||
+ (ForReduction &&
+ AreVectorizableGathers(VectorizableTree[0].get(),
+ VectorizableTree[0]->Scalars.size()) &&
+ VectorizableTree[0]->getVectorFactor() > 2)))
return true;
if (VectorizableTree.size() != 2)
@@ -4249,19 +5084,14 @@ bool BoUpSLP::isFullyVectorizableTinyTree() const {
// or they are extractelements, which form shuffle.
SmallVector<int> Mask;
if (VectorizableTree[0]->State == TreeEntry::Vectorize &&
- (allConstant(VectorizableTree[1]->Scalars) ||
- isSplat(VectorizableTree[1]->Scalars) ||
- (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
- VectorizableTree[1]->Scalars.size() <
- VectorizableTree[0]->Scalars.size()) ||
- (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
- VectorizableTree[1]->getOpcode() == Instruction::ExtractElement &&
- isShuffle(VectorizableTree[1]->Scalars, Mask))))
+ AreVectorizableGathers(VectorizableTree[1].get(),
+ VectorizableTree[0]->Scalars.size()))
return true;
// Gathering cost would be too much for tiny trees.
if (VectorizableTree[0]->State == TreeEntry::NeedToGather ||
- VectorizableTree[1]->State == TreeEntry::NeedToGather)
+ (VectorizableTree[1]->State == TreeEntry::NeedToGather &&
+ VectorizableTree[0]->State != TreeEntry::ScatterVectorize))
return false;
return true;
@@ -4330,7 +5160,7 @@ bool BoUpSLP::isLoadCombineCandidate() const {
return true;
}
-bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
+bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
// No need to vectorize inserts of gathered values.
if (VectorizableTree.size() == 2 &&
isa<InsertElementInst>(VectorizableTree[0]->Scalars[0]) &&
@@ -4344,7 +5174,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() const {
// If we have a tiny tree (a tree whose size is less than MinTreeSize), we
// can vectorize it if we can prove it fully vectorizable.
- if (isFullyVectorizableTinyTree())
+ if (isFullyVectorizableTinyTree(ForReduction))
return false;
assert(VectorizableTree.empty()
@@ -4496,7 +5326,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
// If found user is an insertelement, do not calculate extract cost but try
// to detect it as a final shuffled/identity match.
- if (EU.User && isa<InsertElementInst>(EU.User)) {
+ if (isa_and_nonnull<InsertElementInst>(EU.User)) {
if (auto *FTy = dyn_cast<FixedVectorType>(EU.User->getType())) {
Optional<int> InsertIdx = getInsertIndex(EU.User, 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
@@ -4508,8 +5338,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
return false;
auto *IE1 = cast<InsertElementInst>(VU);
auto *IE2 = cast<InsertElementInst>(V);
- // Go though of insertelement instructions trying to find either VU as
- // the original vector for IE2 or V as the original vector for IE1.
+ // Go through of insertelement instructions trying to find either VU
+ // as the original vector for IE2 or V as the original vector for IE1.
do {
if (IE1 == VU || IE2 == V)
return true;
@@ -4525,7 +5355,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
VF.push_back(FTy->getNumElements());
ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
FirstUsers.push_back(EU.User);
- DemandedElts.push_back(APInt::getNullValue(VF.back()));
+ DemandedElts.push_back(APInt::getZero(VF.back()));
VecId = FirstUsers.size() - 1;
} else {
VecId = std::distance(FirstUsers.begin(), It);
@@ -4705,18 +5535,11 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
} else {
// Try to find nodes with the same vector factor.
assert(UsedTEs.size() == 2 && "Expected at max 2 permuted entries.");
- // FIXME: Shall be replaced by GetVF function once non-power-2 patch is
- // landed.
- auto &&GetVF = [](const TreeEntry *TE) {
- if (!TE->ReuseShuffleIndices.empty())
- return TE->ReuseShuffleIndices.size();
- return TE->Scalars.size();
- };
DenseMap<int, const TreeEntry *> VFToTE;
for (const TreeEntry *TE : UsedTEs.front())
- VFToTE.try_emplace(GetVF(TE), TE);
+ VFToTE.try_emplace(TE->getVectorFactor(), TE);
for (const TreeEntry *TE : UsedTEs.back()) {
- auto It = VFToTE.find(GetVF(TE));
+ auto It = VFToTE.find(TE->getVectorFactor());
if (It != VFToTE.end()) {
VF = It->first;
Entries.push_back(It->second);
@@ -4757,16 +5580,17 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
InstructionCost
BoUpSLP::getGatherCost(FixedVectorType *Ty,
- const DenseSet<unsigned> &ShuffledIndices) const {
+ const DenseSet<unsigned> &ShuffledIndices,
+ bool NeedToShuffle) const {
unsigned NumElts = Ty->getNumElements();
- APInt DemandedElts = APInt::getNullValue(NumElts);
+ APInt DemandedElts = APInt::getZero(NumElts);
for (unsigned I = 0; I < NumElts; ++I)
if (!ShuffledIndices.count(I))
DemandedElts.setBit(I);
InstructionCost Cost =
TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true,
/*Extract*/ false);
- if (!ShuffledIndices.empty())
+ if (NeedToShuffle)
Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
return Cost;
}
@@ -4777,6 +5601,7 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
ScalarTy = SI->getValueOperand()->getType();
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ bool DuplicateNonConst = false;
// Find the cost of inserting/extracting values from the vector.
// Check if the same elements are inserted several times and count them as
// shuffle candidates.
@@ -4785,12 +5610,17 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// Iterate in reverse order to consider insert elements with the high cost.
for (unsigned I = VL.size(); I > 0; --I) {
unsigned Idx = I - 1;
- if (isConstant(VL[Idx]))
+ // No need to shuffle duplicates for constants.
+ if (isConstant(VL[Idx])) {
+ ShuffledElements.insert(Idx);
continue;
- if (!UniqueElements.insert(VL[Idx]).second)
+ }
+ if (!UniqueElements.insert(VL[Idx]).second) {
+ DuplicateNonConst = true;
ShuffledElements.insert(Idx);
+ }
}
- return getGatherCost(VecTy, ShuffledElements);
+ return getGatherCost(VecTy, ShuffledElements, DuplicateNonConst);
}
// Perform operand reordering on the instructions in VL and return the reordered
@@ -5006,17 +5836,18 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
// block:
// %phi = phi <2 x > { .., %entry} {%shuffle, %block}
- // %2 = shuffle <2 x > %phi, %poison, <4 x > <0, 0, 1, 1>
+ // %2 = shuffle <2 x > %phi, poison, <4 x > <1, 1, 0, 0>
// ... (use %2)
- // %shuffle = shuffle <2 x> %2, poison, <2 x> {0, 2}
+ // %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
// br %block
- SmallVector<int> UniqueIdxs;
+ SmallVector<int> UniqueIdxs(VF, UndefMaskElem);
SmallSet<int, 4> UsedIdxs;
int Pos = 0;
int Sz = VL.size();
for (int Idx : E->ReuseShuffleIndices) {
- if (Idx != Sz && UsedIdxs.insert(Idx).second)
- UniqueIdxs.emplace_back(Pos);
+ if (Idx != Sz && Idx != UndefMaskElem &&
+ UsedIdxs.insert(Idx).second)
+ UniqueIdxs[Idx] = Pos;
++Pos;
}
assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
@@ -5047,11 +5878,9 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
}).base());
VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
int UniqueVals = 0;
- bool HasUndefs = false;
for (Value *V : VL.drop_back(VL.size() - VF)) {
if (isa<UndefValue>(V)) {
ReuseShuffleIndicies.emplace_back(UndefMaskElem);
- HasUndefs = true;
continue;
}
if (isConstant(V)) {
@@ -5066,15 +5895,10 @@ Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
++UniqueVals;
}
}
- if (HasUndefs && UniqueVals == 1 && UniqueValues.size() == 1) {
+ if (UniqueVals == 1 && UniqueValues.size() == 1) {
// Emit pure splat vector.
- // FIXME: why it is not identified as an identity.
- unsigned NumUndefs = count(ReuseShuffleIndicies, UndefMaskElem);
- if (NumUndefs == ReuseShuffleIndicies.size() - 1)
- ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
- UndefMaskElem);
- else
- ReuseShuffleIndicies.assign(VF, 0);
+ ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
+ UndefMaskElem);
} else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
ReuseShuffleIndicies.clear();
UniqueValues.clear();
@@ -5107,12 +5931,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- unsigned VF = E->Scalars.size();
- if (NeedToShuffleReuses)
- VF = E->ReuseShuffleIndices.size();
+ unsigned VF = E->getVectorFactor();
ShuffleInstructionBuilder ShuffleBuilder(Builder, VF);
if (E->State == TreeEntry::NeedToGather) {
- setInsertPointAfterBundle(E);
+ if (E->getMainOp())
+ setInsertPointAfterBundle(E);
Value *Vec;
SmallVector<int> Mask;
SmallVector<const TreeEntry *> Entries;
@@ -5152,13 +5975,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *VecTy = FixedVectorType::get(ScalarTy, E->Scalars.size());
switch (ShuffleOrOp) {
case Instruction::PHI: {
+ assert(
+ (E->ReorderIndices.empty() || E != VectorizableTree.front().get()) &&
+ "PHI reordering is free.");
auto *PH = cast<PHINode>(VL0);
Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI());
Builder.SetCurrentDebugLocation(PH->getDebugLoc());
PHINode *NewPhi = Builder.CreatePHI(VecTy, PH->getNumIncomingValues());
Value *V = NewPhi;
- if (NeedToShuffleReuses)
- V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
+ ShuffleBuilder.addMask(E->ReuseShuffleIndices);
+ V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
@@ -5209,53 +6036,48 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return NewV;
}
case Instruction::InsertElement: {
- Builder.SetInsertPoint(VL0);
+ assert(E->ReuseShuffleIndices.empty() && "All inserts should be unique");
+ Builder.SetInsertPoint(cast<Instruction>(E->Scalars.back()));
Value *V = vectorizeTree(E->getOperand(1));
+ // Create InsertVector shuffle if necessary
+ auto *FirstInsert = cast<Instruction>(*find_if(E->Scalars, [E](Value *V) {
+ return !is_contained(E->Scalars, cast<Instruction>(V)->getOperand(0));
+ }));
const unsigned NumElts =
- cast<FixedVectorType>(VL0->getType())->getNumElements();
+ cast<FixedVectorType>(FirstInsert->getType())->getNumElements();
const unsigned NumScalars = E->Scalars.size();
+ unsigned Offset = *getInsertIndex(VL0, 0);
+ assert(Offset < NumElts && "Failed to find vector index offset");
+
+ // Create shuffle to resize vector
+ SmallVector<int> Mask;
+ if (!E->ReorderIndices.empty()) {
+ inversePermutation(E->ReorderIndices, Mask);
+ Mask.append(NumElts - NumScalars, UndefMaskElem);
+ } else {
+ Mask.assign(NumElts, UndefMaskElem);
+ std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
+ }
// Create InsertVector shuffle if necessary
- Instruction *FirstInsert = nullptr;
bool IsIdentity = true;
- unsigned Offset = UINT_MAX;
+ SmallVector<int> PrevMask(NumElts, UndefMaskElem);
+ Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
- Value *Scalar = E->Scalars[I];
- if (!FirstInsert &&
- !is_contained(E->Scalars, cast<Instruction>(Scalar)->getOperand(0)))
- FirstInsert = cast<Instruction>(Scalar);
+ Value *Scalar = E->Scalars[PrevMask[I]];
Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
if (!InsertIdx || *InsertIdx == UndefMaskElem)
continue;
- unsigned Idx = *InsertIdx;
- if (Idx < Offset) {
- Offset = Idx;
- IsIdentity &= I == 0;
- } else {
- assert(Idx >= Offset && "Failed to find vector index offset");
- IsIdentity &= Idx - Offset == I;
- }
- }
- assert(Offset < NumElts && "Failed to find vector index offset");
-
- // Create shuffle to resize vector
- SmallVector<int> Mask(NumElts, UndefMaskElem);
- if (!IsIdentity) {
- for (unsigned I = 0; I < NumScalars; ++I) {
- Value *Scalar = E->Scalars[I];
- Optional<int> InsertIdx = getInsertIndex(Scalar, 0);
- if (!InsertIdx || *InsertIdx == UndefMaskElem)
- continue;
- Mask[*InsertIdx - Offset] = I;
- }
- } else {
- std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
+ IsIdentity &= *InsertIdx - Offset == I;
+ Mask[*InsertIdx - Offset] = I;
}
if (!IsIdentity || NumElts != NumScalars)
V = Builder.CreateShuffleVector(V, Mask);
- if (NumElts != NumScalars) {
+ if ((!IsIdentity || Offset != 0 ||
+ !isa<UndefValue>(FirstInsert->getOperand(0))) &&
+ NumElts != NumScalars) {
SmallVector<int> InsertMask(NumElts);
std::iota(InsertMask.begin(), InsertMask.end(), 0);
for (unsigned I = 0; I < NumElts; I++) {
@@ -5295,6 +6117,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
auto *CI = cast<CastInst>(VL0);
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5317,6 +6140,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
Value *V = Builder.CreateCmp(P0, L, R);
propagateIRFlags(V, E->Scalars, VL0);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5337,6 +6161,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *V = Builder.CreateSelect(Cond, True, False);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5360,6 +6185,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5403,6 +6229,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
if (auto *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5414,9 +6241,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
case Instruction::Load: {
// Loads are inserted at the head of the tree because we don't want to
// sink them all the way down past store instructions.
- bool IsReorder = E->updateStateIfReorder();
- if (IsReorder)
- VL0 = E->getMainOp();
setInsertPointAfterBundle(E);
LoadInst *LI = cast<LoadInst>(VL0);
@@ -5430,8 +6254,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar so we add the new BitCast
// to ExternalUses list to make sure that an extract will be generated
// in the future.
- if (getTreeEntry(PO))
- ExternalUses.emplace_back(PO, cast<User>(VecPtr), 0);
+ if (TreeEntry *Entry = getTreeEntry(PO)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(PO);
+ ExternalUses.emplace_back(PO, cast<User>(VecPtr), FoundLane);
+ }
NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
} else {
@@ -5454,9 +6281,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::Store: {
- bool IsReorder = !E->ReorderIndices.empty();
- auto *SI = cast<StoreInst>(
- IsReorder ? E->Scalars[E->ReorderIndices.front()] : VL0);
+ auto *SI = cast<StoreInst>(VL0);
unsigned AS = SI->getPointerAddressSpace();
setInsertPointAfterBundle(E);
@@ -5474,8 +6299,12 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The pointer operand uses an in-tree scalar, so add the new BitCast to
// ExternalUses to make sure that an extract will be generated in the
// future.
- if (getTreeEntry(ScalarPtr))
- ExternalUses.push_back(ExternalUser(ScalarPtr, cast<User>(VecPtr), 0));
+ if (TreeEntry *Entry = getTreeEntry(ScalarPtr)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(ScalarPtr);
+ ExternalUses.push_back(
+ ExternalUser(ScalarPtr, cast<User>(VecPtr), FoundLane));
+ }
Value *V = propagateMetadata(ST, E->Scalars);
@@ -5484,37 +6313,22 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
case Instruction::GetElementPtr: {
+ auto *GEP0 = cast<GetElementPtrInst>(VL0);
setInsertPointAfterBundle(E);
Value *Op0 = vectorizeTree(E->getOperand(0));
- std::vector<Value *> OpVecs;
- for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e;
- ++j) {
- ValueList &VL = E->getOperand(j);
- // Need to cast all elements to the same type before vectorization to
- // avoid crash.
- Type *VL0Ty = VL0->getOperand(j)->getType();
- Type *Ty = llvm::all_of(
- VL, [VL0Ty](Value *V) { return VL0Ty == V->getType(); })
- ? VL0Ty
- : DL->getIndexType(cast<GetElementPtrInst>(VL0)
- ->getPointerOperandType()
- ->getScalarType());
- for (Value *&V : VL) {
- auto *CI = cast<ConstantInt>(V);
- V = ConstantExpr::getIntegerCast(CI, Ty,
- CI->getValue().isSignBitSet());
- }
- Value *OpVec = vectorizeTree(VL);
+ SmallVector<Value *> OpVecs;
+ for (int J = 1, N = GEP0->getNumOperands(); J < N; ++J) {
+ Value *OpVec = vectorizeTree(E->getOperand(J));
OpVecs.push_back(OpVec);
}
- Value *V = Builder.CreateGEP(
- cast<GetElementPtrInst>(VL0)->getSourceElementType(), Op0, OpVecs);
+ Value *V = Builder.CreateGEP(GEP0->getSourceElementType(), Op0, OpVecs);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5541,7 +6355,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
std::vector<Value *> OpVecs;
SmallVector<Type *, 2> TysForDecl =
{FixedVectorType::get(CI->getType(), E->Scalars.size())};
- for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
+ for (int j = 0, e = CI->arg_size(); j < e; ++j) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
// vectorized.
@@ -5577,10 +6391,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// The scalar argument uses an in-tree scalar so we add the new vectorized
// call to ExternalUses list to make sure that an extract will be
// generated in the future.
- if (ScalarArg && getTreeEntry(ScalarArg))
- ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
+ if (ScalarArg) {
+ if (TreeEntry *Entry = getTreeEntry(ScalarArg)) {
+ // Find which lane we need to extract.
+ unsigned FoundLane = Entry->findLaneForValue(ScalarArg);
+ ExternalUses.push_back(
+ ExternalUser(ScalarArg, cast<User>(V), FoundLane));
+ }
+ }
propagateIRFlags(V, E->Scalars, VL0);
+ ShuffleBuilder.addInversedMask(E->ReorderIndices);
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
@@ -5628,19 +6449,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// Also, gather up main and alt scalar ops to propagate IR flags to
// each vector operation.
ValueList OpScalars, AltScalars;
- unsigned Sz = E->Scalars.size();
- SmallVector<int> Mask(Sz);
- for (unsigned I = 0; I < Sz; ++I) {
- auto *OpInst = cast<Instruction>(E->Scalars[I]);
- assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
- if (OpInst->getOpcode() == E->getAltOpcode()) {
- Mask[I] = Sz + I;
- AltScalars.push_back(E->Scalars[I]);
- } else {
- Mask[I] = I;
- OpScalars.push_back(E->Scalars[I]);
- }
- }
+ SmallVector<int> Mask;
+ buildSuffleEntryMask(
+ E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
+ [E](Instruction *I) {
+ assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
+ return I->getOpcode() == E->getAltOpcode();
+ },
+ Mask, &OpScalars, &AltScalars);
propagateIRFlags(V0, OpScalars);
propagateIRFlags(V1, AltScalars);
@@ -5648,7 +6464,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
if (Instruction *I = dyn_cast<Instruction>(V))
V = propagateMetadata(I, E->Scalars);
- ShuffleBuilder.addMask(E->ReuseShuffleIndices);
V = ShuffleBuilder.finalize(V);
E->VectorizedValue = V;
@@ -5823,7 +6638,9 @@ BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) {
LLVM_DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
// It is legal to delete users in the ignorelist.
- assert((getTreeEntry(U) || is_contained(UserIgnoreList, U)) &&
+ assert((getTreeEntry(U) || is_contained(UserIgnoreList, U) ||
+ (isa_and_nonnull<Instruction>(U) &&
+ isDeleted(cast<Instruction>(U)))) &&
"Deleting out-of-tree value");
}
}
@@ -5898,27 +6715,28 @@ void BoUpSLP::optimizeGatherSequence() {
"Worklist not sorted properly!");
BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
- for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
- Instruction *In = &*it++;
- if (isDeleted(In))
+ for (Instruction &In : llvm::make_early_inc_range(*BB)) {
+ if (isDeleted(&In))
continue;
- if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
+ if (!isa<InsertElementInst>(&In) && !isa<ExtractElementInst>(&In) &&
+ !isa<ShuffleVectorInst>(&In))
continue;
// Check if we can replace this instruction with any of the
// visited instructions.
+ bool Replaced = false;
for (Instruction *v : Visited) {
- if (In->isIdenticalTo(v) &&
- DT->dominates(v->getParent(), In->getParent())) {
- In->replaceAllUsesWith(v);
- eraseInstruction(In);
- In = nullptr;
+ if (In.isIdenticalTo(v) &&
+ DT->dominates(v->getParent(), In.getParent())) {
+ In.replaceAllUsesWith(v);
+ eraseInstruction(&In);
+ Replaced = true;
break;
}
}
- if (In) {
- assert(!is_contained(Visited, In));
- Visited.push_back(In);
+ if (!Replaced) {
+ assert(!is_contained(Visited, &In));
+ Visited.push_back(&In);
}
}
}
@@ -5931,7 +6749,9 @@ void BoUpSLP::optimizeGatherSequence() {
Optional<BoUpSLP::ScheduleData *>
BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
const InstructionsState &S) {
- if (isa<PHINode>(S.OpValue) || isa<InsertElementInst>(S.OpValue))
+ // No need to schedule PHIs, insertelement, extractelement and extractvalue
+ // instructions.
+ if (isa<PHINode>(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue))
return nullptr;
// Initialize the instruction bundle.
@@ -6027,7 +6847,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,
Value *OpValue) {
- if (isa<PHINode>(OpValue) || isa<InsertElementInst>(OpValue))
+ if (isa<PHINode>(OpValue) || isVectorLikeInstWithConstOps(OpValue))
return;
ScheduleData *Bundle = getScheduleData(OpValue);
@@ -6067,8 +6887,9 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
return true;
Instruction *I = dyn_cast<Instruction>(V);
assert(I && "bundle member must be an instruction");
- assert(!isa<PHINode>(I) && !isa<InsertElementInst>(I) &&
- "phi nodes/insertelements don't need to be scheduled");
+ assert(!isa<PHINode>(I) && !isVectorLikeInstWithConstOps(I) &&
+ "phi nodes/insertelements/extractelements/extractvalues don't need to "
+ "be scheduled");
auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool {
ScheduleData *ISD = getScheduleData(I);
if (!ISD)
@@ -6338,7 +7159,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd;
I = I->getNextNode()) {
BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) {
- assert((isa<InsertElementInst>(SD->Inst) ||
+ assert((isVectorLikeInstWithConstOps(SD->Inst) ||
SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) &&
"scheduler and vectorizer bundle mismatch");
SD->FirstInBundle->SchedulingPriority = Idx++;
@@ -6681,9 +7502,7 @@ struct SLPVectorizer : public FunctionPass {
initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
}
- bool doInitialization(Module &M) override {
- return false;
- }
+ bool doInitialization(Module &M) override { return false; }
bool runOnFunction(Function &F) override {
if (skipFunction(F))
@@ -6818,44 +7637,6 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
return Changed;
}
-/// Order may have elements assigned special value (size) which is out of
-/// bounds. Such indices only appear on places which correspond to undef values
-/// (see canReuseExtract for details) and used in order to avoid undef values
-/// have effect on operands ordering.
-/// The first loop below simply finds all unused indices and then the next loop
-/// nest assigns these indices for undef values positions.
-/// As an example below Order has two undef positions and they have assigned
-/// values 3 and 7 respectively:
-/// before: 6 9 5 4 9 2 1 0
-/// after: 6 3 5 4 7 2 1 0
-/// \returns Fixed ordering.
-static BoUpSLP::OrdersType fixupOrderingIndices(ArrayRef<unsigned> Order) {
- BoUpSLP::OrdersType NewOrder(Order.begin(), Order.end());
- const unsigned Sz = NewOrder.size();
- SmallBitVector UsedIndices(Sz);
- SmallVector<int> MaskedIndices;
- for (int I = 0, E = NewOrder.size(); I < E; ++I) {
- if (NewOrder[I] < Sz)
- UsedIndices.set(NewOrder[I]);
- else
- MaskedIndices.push_back(I);
- }
- if (MaskedIndices.empty())
- return NewOrder;
- SmallVector<int> AvailableIndices(MaskedIndices.size());
- unsigned Cnt = 0;
- int Idx = UsedIndices.find_first();
- do {
- AvailableIndices[Cnt] = Idx;
- Idx = UsedIndices.find_next(Idx);
- ++Cnt;
- } while (Idx > 0);
- assert(Cnt == MaskedIndices.size() && "Non-synced masked/available indices.");
- for (int I = 0, E = MaskedIndices.size(); I < E; ++I)
- NewOrder[MaskedIndices[I]] = AvailableIndices[I];
- return NewOrder;
-}
-
bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
unsigned Idx) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << Chain.size()
@@ -6871,19 +7652,13 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
<< "\n");
R.buildTree(Chain);
- Optional<ArrayRef<unsigned>> Order = R.bestOrder();
- // TODO: Handle orders of size less than number of elements in the vector.
- if (Order && Order->size() == Chain.size()) {
- // TODO: reorder tree nodes without tree rebuilding.
- SmallVector<Value *, 4> ReorderedOps(Chain.size());
- transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
- [Chain](const unsigned Idx) { return Chain[Idx]; });
- R.buildTree(ReorderedOps);
- }
if (R.isTreeTinyAndNotFullyVectorizable())
return false;
if (R.isLoadCombineCandidate())
return false;
+ R.reorderTopToBottom();
+ R.reorderBottomToTop();
+ R.buildExternalUses();
R.computeMinimumValueSizes();
@@ -7006,7 +7781,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned EltSize = R.getVectorElementSize(Operands[0]);
unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
- unsigned MinVF = std::max(2U, R.getMinVecRegSize() / EltSize);
+ unsigned MinVF = R.getMinVF(EltSize);
unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
MaxElts);
@@ -7079,11 +7854,11 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
if (!A || !B)
return false;
Value *VL[] = {A, B};
- return tryToVectorizeList(VL, R, /*AllowReorder=*/true);
+ return tryToVectorizeList(VL, R);
}
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- bool AllowReorder) {
+ bool LimitForRegisterSize) {
if (VL.size() < 2)
return false;
@@ -7117,7 +7892,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
}
unsigned Sz = R.getVectorElementSize(I0);
- unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+ unsigned MinVF = R.getMinVF(Sz);
unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
if (MaxVF < 2) {
@@ -7155,7 +7930,8 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (!isPowerOf2_32(OpsWidth))
continue;
- if ((VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
+ if ((LimitForRegisterSize && OpsWidth < MaxVF) ||
+ (VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
break;
ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
@@ -7170,18 +7946,11 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
<< "\n");
R.buildTree(Ops);
- if (AllowReorder) {
- Optional<ArrayRef<unsigned>> Order = R.bestOrder();
- if (Order) {
- // TODO: reorder tree nodes without tree rebuilding.
- SmallVector<Value *, 4> ReorderedOps(Ops.size());
- transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
- [Ops](const unsigned Idx) { return Ops[Idx]; });
- R.buildTree(ReorderedOps);
- }
- }
if (R.isTreeTinyAndNotFullyVectorizable())
continue;
+ R.reorderTopToBottom();
+ R.reorderBottomToTop();
+ R.buildExternalUses();
R.computeMinimumValueSizes();
InstructionCost Cost = R.getTreeCost();
@@ -7374,10 +8143,20 @@ class HorizontalReduction {
Value *RHS, const Twine &Name, bool UseSelect) {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
switch (Kind) {
- case RecurKind::Add:
- case RecurKind::Mul:
case RecurKind::Or:
+ if (UseSelect &&
+ LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
+ return Builder.CreateSelect(LHS, Builder.getTrue(), RHS, Name);
+ return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
+ Name);
case RecurKind::And:
+ if (UseSelect &&
+ LHS->getType() == CmpInst::makeCmpResultType(LHS->getType()))
+ return Builder.CreateSelect(LHS, RHS, Builder.getFalse(), Name);
+ return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
+ Name);
+ case RecurKind::Add:
+ case RecurKind::Mul:
case RecurKind::Xor:
case RecurKind::FAdd:
case RecurKind::FMul:
@@ -7421,8 +8200,12 @@ class HorizontalReduction {
static Value *createOp(IRBuilder<> &Builder, RecurKind RdxKind, Value *LHS,
Value *RHS, const Twine &Name,
const ReductionOpsListType &ReductionOps) {
- bool UseSelect = ReductionOps.size() == 2;
- assert((!UseSelect || isa<SelectInst>(ReductionOps[1][0])) &&
+ bool UseSelect = ReductionOps.size() == 2 ||
+ // Logical or/and.
+ (ReductionOps.size() == 1 &&
+ isa<SelectInst>(ReductionOps.front().front()));
+ assert((!UseSelect || ReductionOps.size() != 2 ||
+ isa<SelectInst>(ReductionOps[1][0])) &&
"Expected cmp + select pairs for reduction");
Value *Op = createOp(Builder, RdxKind, LHS, RHS, Name, UseSelect);
if (RecurrenceDescriptor::isIntMinMaxRecurrenceKind(RdxKind)) {
@@ -7560,10 +8343,10 @@ class HorizontalReduction {
/// Checks if the instruction is in basic block \p BB.
/// For a cmp+sel min/max reduction check that both ops are in \p BB.
static bool hasSameParent(Instruction *I, BasicBlock *BB) {
- if (isCmpSelMinMax(I)) {
+ if (isCmpSelMinMax(I) || (isBoolLogicOp(I) && isa<SelectInst>(I))) {
auto *Sel = cast<SelectInst>(I);
- auto *Cmp = cast<Instruction>(Sel->getCondition());
- return Sel->getParent() == BB && Cmp->getParent() == BB;
+ auto *Cmp = dyn_cast<Instruction>(Sel->getCondition());
+ return Sel->getParent() == BB && Cmp && Cmp->getParent() == BB;
}
return I->getParent() == BB;
}
@@ -7745,13 +8528,13 @@ public:
}
/// Attempt to vectorize the tree found by matchAssociativeReduction.
- bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+ Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
// If there are a sufficient number of reduction values, reduce
// to a nearby power-of-2. We can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
unsigned NumReducedVals = ReducedVals.size();
if (NumReducedVals < 4)
- return false;
+ return nullptr;
// Intersect the fast-math-flags from all reduction operations.
FastMathFlags RdxFMF;
@@ -7825,22 +8608,14 @@ public:
unsigned i = 0;
while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
ArrayRef<Value *> VL(&ReducedVals[i], ReduxWidth);
- V.buildTree(VL, ExternallyUsedValues, IgnoreList);
- Optional<ArrayRef<unsigned>> Order = V.bestOrder();
- if (Order) {
- assert(Order->size() == VL.size() &&
- "Order size must be the same as number of vectorized "
- "instructions.");
- // TODO: reorder tree nodes without tree rebuilding.
- SmallVector<Value *, 4> ReorderedOps(VL.size());
- transform(fixupOrderingIndices(*Order), ReorderedOps.begin(),
- [VL](const unsigned Idx) { return VL[Idx]; });
- V.buildTree(ReorderedOps, ExternallyUsedValues, IgnoreList);
- }
- if (V.isTreeTinyAndNotFullyVectorizable())
+ V.buildTree(VL, IgnoreList);
+ if (V.isTreeTinyAndNotFullyVectorizable(/*ForReduction=*/true))
break;
if (V.isLoadCombineReductionCandidate(RdxKind))
break;
+ V.reorderTopToBottom();
+ V.reorderBottomToTop(/*IgnoreReorder=*/true);
+ V.buildExternalUses(ExternallyUsedValues);
// For a poison-safe boolean logic reduction, do not replace select
// instructions with logic ops. All reduced values will be frozen (see
@@ -7860,7 +8635,7 @@ public:
InstructionCost Cost = TreeCost + ReductionCost;
if (!Cost.isValid()) {
LLVM_DEBUG(dbgs() << "Encountered invalid baseline cost.\n");
- return false;
+ return nullptr;
}
if (Cost >= -SLPCostThreshold) {
V.getORE()->emit([&]() {
@@ -7940,7 +8715,7 @@ public:
// vector reductions.
V.eraseInstructions(IgnoreList);
}
- return VectorizedTree != nullptr;
+ return VectorizedTree;
}
unsigned numReductionValues() const { return ReducedVals.size(); }
@@ -7950,6 +8725,7 @@ private:
InstructionCost getReductionCost(TargetTransformInfo *TTI,
Value *FirstReducedVal, unsigned ReduxWidth,
FastMathFlags FMF) {
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *ScalarTy = FirstReducedVal->getType();
FixedVectorType *VectorTy = FixedVectorType::get(ScalarTy, ReduxWidth);
InstructionCost VectorCost, ScalarCost;
@@ -7962,33 +8738,39 @@ private:
case RecurKind::FAdd:
case RecurKind::FMul: {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(RdxKind);
- VectorCost = TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF);
- ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy);
+ VectorCost =
+ TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind);
+ ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind);
break;
}
case RecurKind::FMax:
case RecurKind::FMin: {
+ auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
- /*unsigned=*/false);
- ScalarCost =
- TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
- CmpInst::makeCmpResultType(ScalarTy));
+ /*unsigned=*/false, CostKind);
+ CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
+ ScalarCost = TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy,
+ SclCondTy, RdxPred, CostKind) +
+ TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+ SclCondTy, RdxPred, CostKind);
break;
}
case RecurKind::SMax:
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin: {
+ auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
auto *VecCondTy = cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
bool IsUnsigned =
RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
- VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned);
- ScalarCost =
- TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
- CmpInst::makeCmpResultType(ScalarTy));
+ VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, IsUnsigned,
+ CostKind);
+ CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
+ ScalarCost = TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
+ SclCondTy, RdxPred, CostKind) +
+ TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
+ SclCondTy, RdxPred, CostKind);
break;
}
default:
@@ -8010,6 +8792,7 @@ private:
assert(isPowerOf2_32(ReduxWidth) &&
"We only handle power-of-two reductions for now");
+ ++NumVectorInstructions;
return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind,
ReductionOps.back());
}
@@ -8219,32 +9002,45 @@ static bool tryToVectorizeHorReductionOrInstOperands(
// Skip the analysis of CmpInsts.Compiler implements postanalysis of the
// CmpInsts so we can skip extra attempts in
// tryToVectorizeHorReductionOrInstOperands and save compile time.
- SmallVector<std::pair<Instruction *, unsigned>, 8> Stack(1, {Root, 0});
+ std::queue<std::pair<Instruction *, unsigned>> Stack;
+ Stack.emplace(Root, 0);
SmallPtrSet<Value *, 8> VisitedInstrs;
+ SmallVector<WeakTrackingVH> PostponedInsts;
bool Res = false;
+ auto &&TryToReduce = [TTI, &P, &R](Instruction *Inst, Value *&B0,
+ Value *&B1) -> Value * {
+ bool IsBinop = matchRdxBop(Inst, B0, B1);
+ bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
+ if (IsBinop || IsSelect) {
+ HorizontalReduction HorRdx;
+ if (HorRdx.matchAssociativeReduction(P, Inst))
+ return HorRdx.tryToReduce(R, TTI);
+ }
+ return nullptr;
+ };
while (!Stack.empty()) {
Instruction *Inst;
unsigned Level;
- std::tie(Inst, Level) = Stack.pop_back_val();
+ std::tie(Inst, Level) = Stack.front();
+ Stack.pop();
// Do not try to analyze instruction that has already been vectorized.
// This may happen when we vectorize instruction operands on a previous
// iteration while stack was populated before that happened.
if (R.isDeleted(Inst))
continue;
- Value *B0, *B1;
- bool IsBinop = matchRdxBop(Inst, B0, B1);
- bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
- if (IsBinop || IsSelect) {
- HorizontalReduction HorRdx;
- if (HorRdx.matchAssociativeReduction(P, Inst)) {
- if (HorRdx.tryToReduce(R, TTI)) {
- Res = true;
- // Set P to nullptr to avoid re-analysis of phi node in
- // matchAssociativeReduction function unless this is the root node.
- P = nullptr;
- continue;
- }
+ Value *B0 = nullptr, *B1 = nullptr;
+ if (Value *V = TryToReduce(Inst, B0, B1)) {
+ Res = true;
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ // Try to find another reduction.
+ Stack.emplace(I, Level);
+ continue;
}
+ } else {
+ bool IsBinop = B0 && B1;
if (P && IsBinop) {
Inst = dyn_cast<Instruction>(B0);
if (Inst == P)
@@ -8256,14 +9052,14 @@ static bool tryToVectorizeHorReductionOrInstOperands(
continue;
}
}
- }
- // Set P to nullptr to avoid re-analysis of phi node in
- // matchAssociativeReduction function unless this is the root node.
- P = nullptr;
- // Do not try to vectorize CmpInst operands, this is done separately.
- if (!isa<CmpInst>(Inst) && Vectorize(Inst, R)) {
- Res = true;
- continue;
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ // Do not try to vectorize CmpInst operands, this is done separately.
+ // Final attempt for binop args vectorization should happen after the loop
+ // to try to find reductions.
+ if (!isa<CmpInst>(Inst))
+ PostponedInsts.push_back(Inst);
}
// Try to vectorize operands.
@@ -8277,8 +9073,13 @@ static bool tryToVectorizeHorReductionOrInstOperands(
// separately.
if (!isa<PHINode>(I) && !isa<CmpInst>(I) && !R.isDeleted(I) &&
I->getParent() == BB)
- Stack.emplace_back(I, Level);
+ Stack.emplace(I, Level);
}
+ // Try to vectorized binops where reductions were not found.
+ for (Value *V : PostponedInsts)
+ if (auto *Inst = dyn_cast<Instruction>(V))
+ if (!R.isDeleted(Inst))
+ Res |= Vectorize(Inst, R);
return Res;
}
@@ -8313,7 +9114,7 @@ bool SLPVectorizerPass::vectorizeInsertValueInst(InsertValueInst *IVI,
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IVI << "\n");
// Aggregate value is unlikely to be processed in vector register, we need to
// extract scalars into scalar registers, so NeedExtraction is set true.
- return tryToVectorizeList(BuildVectorOpds, R, /*AllowReorder=*/false);
+ return tryToVectorizeList(BuildVectorOpds, R);
}
bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
@@ -8324,11 +9125,11 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
if (!findBuildAggregate(IEI, TTI, BuildVectorOpds, BuildVectorInsts) ||
(llvm::all_of(BuildVectorOpds,
[](Value *V) { return isa<ExtractElementInst>(V); }) &&
- isShuffle(BuildVectorOpds, Mask)))
+ isFixedVectorShuffle(BuildVectorOpds, Mask)))
return false;
LLVM_DEBUG(dbgs() << "SLP: array mappable to vector: " << *IEI << "\n");
- return tryToVectorizeList(BuildVectorInsts, R, /*AllowReorder=*/true);
+ return tryToVectorizeList(BuildVectorInsts, R);
}
bool SLPVectorizerPass::vectorizeSimpleInstructions(
@@ -8369,6 +9170,78 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(
return OpsChanged;
}
+template <typename T>
+static bool
+tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
+ function_ref<unsigned(T *)> Limit,
+ function_ref<bool(T *, T *)> Comparator,
+ function_ref<bool(T *, T *)> AreCompatible,
+ function_ref<bool(ArrayRef<T *>, bool)> TryToVectorize,
+ bool LimitForRegisterSize) {
+ bool Changed = false;
+ // Sort by type, parent, operands.
+ stable_sort(Incoming, Comparator);
+
+ // Try to vectorize elements base on their type.
+ SmallVector<T *> Candidates;
+ for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;) {
+ // Look for the next elements with the same type, parent and operand
+ // kinds.
+ auto *SameTypeIt = IncIt;
+ while (SameTypeIt != E && AreCompatible(*SameTypeIt, *IncIt))
+ ++SameTypeIt;
+
+ // Try to vectorize them.
+ unsigned NumElts = (SameTypeIt - IncIt);
+ LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at nodes ("
+ << NumElts << ")\n");
+ // The vectorization is a 3-state attempt:
+ // 1. Try to vectorize instructions with the same/alternate opcodes with the
+ // size of maximal register at first.
+ // 2. Try to vectorize remaining instructions with the same type, if
+ // possible. This may result in the better vectorization results rather than
+ // if we try just to vectorize instructions with the same/alternate opcodes.
+ // 3. Final attempt to try to vectorize all instructions with the
+ // same/alternate ops only, this may result in some extra final
+ // vectorization.
+ if (NumElts > 1 &&
+ TryToVectorize(makeArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
+ // Success start over because instructions might have been changed.
+ Changed = true;
+ } else if (NumElts < Limit(*IncIt) &&
+ (Candidates.empty() ||
+ Candidates.front()->getType() == (*IncIt)->getType())) {
+ Candidates.append(IncIt, std::next(IncIt, NumElts));
+ }
+ // Final attempt to vectorize instructions with the same types.
+ if (Candidates.size() > 1 &&
+ (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType())) {
+ if (TryToVectorize(Candidates, /*LimitForRegisterSize=*/false)) {
+ // Success start over because instructions might have been changed.
+ Changed = true;
+ } else if (LimitForRegisterSize) {
+ // Try to vectorize using small vectors.
+ for (auto *It = Candidates.begin(), *End = Candidates.end();
+ It != End;) {
+ auto *SameTypeIt = It;
+ while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
+ ++SameTypeIt;
+ unsigned NumElts = (SameTypeIt - It);
+ if (NumElts > 1 && TryToVectorize(makeArrayRef(It, NumElts),
+ /*LimitForRegisterSize=*/false))
+ Changed = true;
+ It = SameTypeIt;
+ }
+ }
+ Candidates.clear();
+ }
+
+ // Start over at the next instruction of a different type (or the end).
+ IncIt = SameTypeIt;
+ }
+ return Changed;
+}
+
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
bool Changed = false;
SmallVector<Value *, 4> Incoming;
@@ -8377,11 +9250,89 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// node. Allows better to identify the chains that can be vectorized in the
// better way.
DenseMap<Value *, SmallVector<Value *, 4>> PHIToOpcodes;
+ auto PHICompare = [this, &PHIToOpcodes](Value *V1, Value *V2) {
+ assert(isValidElementType(V1->getType()) &&
+ isValidElementType(V2->getType()) &&
+ "Expected vectorizable types only.");
+ // It is fine to compare type IDs here, since we expect only vectorizable
+ // types, like ints, floats and pointers, we don't care about other type.
+ if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
+ return true;
+ if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
+ return false;
+ ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
+ ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
+ if (Opcodes1.size() < Opcodes2.size())
+ return true;
+ if (Opcodes1.size() > Opcodes2.size())
+ return false;
+ for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
+ // Undefs are compatible with any other value.
+ if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
+ continue;
+ if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
+ if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
+ DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
+ DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
+ if (!NodeI1)
+ return NodeI2 != nullptr;
+ if (!NodeI2)
+ return false;
+ assert((NodeI1 == NodeI2) ==
+ (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
+ "Different nodes should have different DFS numbers");
+ if (NodeI1 != NodeI2)
+ return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
+ InstructionsState S = getSameOpcode({I1, I2});
+ if (S.getOpcode())
+ continue;
+ return I1->getOpcode() < I2->getOpcode();
+ }
+ if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
+ continue;
+ if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
+ return true;
+ if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
+ return false;
+ }
+ return false;
+ };
+ auto AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
+ if (V1 == V2)
+ return true;
+ if (V1->getType() != V2->getType())
+ return false;
+ ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
+ ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
+ if (Opcodes1.size() != Opcodes2.size())
+ return false;
+ for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
+ // Undefs are compatible with any other value.
+ if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
+ continue;
+ if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
+ if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
+ if (I1->getParent() != I2->getParent())
+ return false;
+ InstructionsState S = getSameOpcode({I1, I2});
+ if (S.getOpcode())
+ continue;
+ return false;
+ }
+ if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
+ continue;
+ if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
+ return false;
+ }
+ return true;
+ };
+ auto Limit = [&R](Value *V) {
+ unsigned EltSize = R.getVectorElementSize(V);
+ return std::max(2U, R.getMaxVecRegSize() / EltSize);
+ };
- bool HaveVectorizedPhiNodes = true;
- while (HaveVectorizedPhiNodes) {
- HaveVectorizedPhiNodes = false;
-
+ bool HaveVectorizedPhiNodes = false;
+ do {
// Collect the incoming values from the PHIs.
Incoming.clear();
for (Instruction &I : *BB) {
@@ -8419,132 +9370,15 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
}
- // Sort by type, parent, operands.
- stable_sort(Incoming, [this, &PHIToOpcodes](Value *V1, Value *V2) {
- assert(isValidElementType(V1->getType()) &&
- isValidElementType(V2->getType()) &&
- "Expected vectorizable types only.");
- // It is fine to compare type IDs here, since we expect only vectorizable
- // types, like ints, floats and pointers, we don't care about other type.
- if (V1->getType()->getTypeID() < V2->getType()->getTypeID())
- return true;
- if (V1->getType()->getTypeID() > V2->getType()->getTypeID())
- return false;
- ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
- ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
- if (Opcodes1.size() < Opcodes2.size())
- return true;
- if (Opcodes1.size() > Opcodes2.size())
- return false;
- for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
- // Undefs are compatible with any other value.
- if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
- continue;
- if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
- if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
- DomTreeNodeBase<BasicBlock> *NodeI1 = DT->getNode(I1->getParent());
- DomTreeNodeBase<BasicBlock> *NodeI2 = DT->getNode(I2->getParent());
- if (!NodeI1)
- return NodeI2 != nullptr;
- if (!NodeI2)
- return false;
- assert((NodeI1 == NodeI2) ==
- (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
- "Different nodes should have different DFS numbers");
- if (NodeI1 != NodeI2)
- return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
- InstructionsState S = getSameOpcode({I1, I2});
- if (S.getOpcode())
- continue;
- return I1->getOpcode() < I2->getOpcode();
- }
- if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
- continue;
- if (Opcodes1[I]->getValueID() < Opcodes2[I]->getValueID())
- return true;
- if (Opcodes1[I]->getValueID() > Opcodes2[I]->getValueID())
- return false;
- }
- return false;
- });
-
- auto &&AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
- if (V1 == V2)
- return true;
- if (V1->getType() != V2->getType())
- return false;
- ArrayRef<Value *> Opcodes1 = PHIToOpcodes[V1];
- ArrayRef<Value *> Opcodes2 = PHIToOpcodes[V2];
- if (Opcodes1.size() != Opcodes2.size())
- return false;
- for (int I = 0, E = Opcodes1.size(); I < E; ++I) {
- // Undefs are compatible with any other value.
- if (isa<UndefValue>(Opcodes1[I]) || isa<UndefValue>(Opcodes2[I]))
- continue;
- if (auto *I1 = dyn_cast<Instruction>(Opcodes1[I]))
- if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
- if (I1->getParent() != I2->getParent())
- return false;
- InstructionsState S = getSameOpcode({I1, I2});
- if (S.getOpcode())
- continue;
- return false;
- }
- if (isa<Constant>(Opcodes1[I]) && isa<Constant>(Opcodes2[I]))
- continue;
- if (Opcodes1[I]->getValueID() != Opcodes2[I]->getValueID())
- return false;
- }
- return true;
- };
-
- // Try to vectorize elements base on their type.
- SmallVector<Value *, 4> Candidates;
- for (SmallVector<Value *, 4>::iterator IncIt = Incoming.begin(),
- E = Incoming.end();
- IncIt != E;) {
-
- // Look for the next elements with the same type, parent and operand
- // kinds.
- SmallVector<Value *, 4>::iterator SameTypeIt = IncIt;
- while (SameTypeIt != E && AreCompatiblePHIs(*SameTypeIt, *IncIt)) {
- VisitedInstrs.insert(*SameTypeIt);
- ++SameTypeIt;
- }
-
- // Try to vectorize them.
- unsigned NumElts = (SameTypeIt - IncIt);
- LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at PHIs ("
- << NumElts << ")\n");
- // The order in which the phi nodes appear in the program does not matter.
- // So allow tryToVectorizeList to reorder them if it is beneficial. This
- // is done when there are exactly two elements since tryToVectorizeList
- // asserts that there are only two values when AllowReorder is true.
- if (NumElts > 1 && tryToVectorizeList(makeArrayRef(IncIt, NumElts), R,
- /*AllowReorder=*/true)) {
- // Success start over because instructions might have been changed.
- HaveVectorizedPhiNodes = true;
- Changed = true;
- } else if (NumElts < 4 &&
- (Candidates.empty() ||
- Candidates.front()->getType() == (*IncIt)->getType())) {
- Candidates.append(IncIt, std::next(IncIt, NumElts));
- }
- // Final attempt to vectorize phis with the same types.
- if (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType()) {
- if (Candidates.size() > 1 &&
- tryToVectorizeList(Candidates, R, /*AllowReorder=*/true)) {
- // Success start over because instructions might have been changed.
- HaveVectorizedPhiNodes = true;
- Changed = true;
- }
- Candidates.clear();
- }
-
- // Start over at the next instruction of a different type (or the end).
- IncIt = SameTypeIt;
- }
- }
+ HaveVectorizedPhiNodes = tryToVectorizeSequence<Value>(
+ Incoming, Limit, PHICompare, AreCompatiblePHIs,
+ [this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
+ return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
+ },
+ /*LimitForRegisterSize=*/true);
+ Changed |= HaveVectorizedPhiNodes;
+ VisitedInstrs.insert(Incoming.begin(), Incoming.end());
+ } while (HaveVectorizedPhiNodes);
VisitedInstrs.clear();
@@ -8797,6 +9631,10 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
return V1->getValueOperand()->getValueID() ==
V2->getValueOperand()->getValueID();
};
+ auto Limit = [&R, this](StoreInst *SI) {
+ unsigned EltSize = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+ return R.getMinVF(EltSize);
+ };
// Attempt to sort and vectorize each of the store-groups.
for (auto &Pair : Stores) {
@@ -8806,33 +9644,15 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
LLVM_DEBUG(dbgs() << "SLP: Analyzing a store chain of length "
<< Pair.second.size() << ".\n");
- stable_sort(Pair.second, StoreSorter);
-
- // Try to vectorize elements based on their compatibility.
- for (ArrayRef<StoreInst *>::iterator IncIt = Pair.second.begin(),
- E = Pair.second.end();
- IncIt != E;) {
-
- // Look for the next elements with the same type.
- ArrayRef<StoreInst *>::iterator SameTypeIt = IncIt;
- Type *EltTy = (*IncIt)->getPointerOperand()->getType();
-
- while (SameTypeIt != E && AreCompatibleStores(*SameTypeIt, *IncIt))
- ++SameTypeIt;
-
- // Try to vectorize them.
- unsigned NumElts = (SameTypeIt - IncIt);
- LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at stores ("
- << NumElts << ")\n");
- if (NumElts > 1 && !EltTy->getPointerElementType()->isVectorTy() &&
- vectorizeStores(makeArrayRef(IncIt, NumElts), R)) {
- // Success start over because instructions might have been changed.
- Changed = true;
- }
+ if (!isValidElementType(Pair.second.front()->getValueOperand()->getType()))
+ continue;
- // Start over at the next instruction of a different type (or the end).
- IncIt = SameTypeIt;
- }
+ Changed |= tryToVectorizeSequence<StoreInst>(
+ Pair.second, Limit, StoreSorter, AreCompatibleStores,
+ [this, &R](ArrayRef<StoreInst *> Candidates, bool) {
+ return vectorizeStores(Candidates, R);
+ },
+ /*LimitForRegisterSize=*/false);
}
return Changed;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 5f39fe1c17a3..638467f94e1c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -815,6 +815,28 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);
+ // Fix the latch value of reduction and first-order recurrences phis in the
+ // vector loop.
+ VPBasicBlock *Header = Entry->getEntryBasicBlock();
+ for (VPRecipeBase &R : Header->phis()) {
+ auto *PhiR = dyn_cast<VPWidenPHIRecipe>(&R);
+ if (!PhiR || !(isa<VPFirstOrderRecurrencePHIRecipe>(&R) ||
+ isa<VPReductionPHIRecipe>(&R)))
+ continue;
+ // For first-order recurrences and in-order reduction phis, only a single
+ // part is generated, which provides the last part from the previous
+ // iteration. Otherwise all UF parts are generated.
+ bool SinglePartNeeded = isa<VPFirstOrderRecurrencePHIRecipe>(&R) ||
+ cast<VPReductionPHIRecipe>(&R)->isOrdered();
+ unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF;
+ for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
+ Value *VecPhi = State->get(PhiR, Part);
+ Value *Val = State->get(PhiR->getBackedgeValue(),
+ SinglePartNeeded ? State->UF - 1 : Part);
+ cast<PHINode>(VecPhi)->addIncoming(Val, VectorLatchBB);
+ }
+ }
+
// Setup branch terminator successors for VPBBs in VPBBsToFix based on
// VPBB's successors.
for (auto VPBB : State->CFG.VPBBsToFix) {
@@ -862,6 +884,13 @@ void VPlan::print(raw_ostream &O) const {
VPSlotTracker SlotTracker(this);
O << "VPlan '" << Name << "' {";
+
+ if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
+ O << "\nLive-in ";
+ BackedgeTakenCount->printAsOperand(O, SlotTracker);
+ O << " = backedge-taken count\n";
+ }
+
for (const VPBlockBase *Block : depth_first(getEntry())) {
O << '\n';
Block->print(O, "", SlotTracker);
@@ -920,12 +949,12 @@ void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
+Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
Twine(getOrCreateBID(Block));
}
-const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
+Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
const std::string &Name = Block->getName();
if (!Name.empty())
return Name;
@@ -1235,7 +1264,7 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
VF.isScalar() ? Indices.back() : ConstantVector::get(Indices);
// Add the consecutive indices to the vector value.
Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
- State.set(getVPSingleValue(), CanonicalVectorIV, Part);
+ State.set(this, CanonicalVectorIV, Part);
}
}
@@ -1243,7 +1272,7 @@ void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
- getVPSingleValue()->printAsOperand(O, SlotTracker);
+ printAsOperand(O, SlotTracker);
O << " = WIDEN-CANONICAL-INDUCTION";
}
#endif
@@ -1306,12 +1335,16 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
State.set(this, EntryPart, Part);
}
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
VPValue *StartVPV = getStartValue();
Value *StartV = StartVPV->getLiveInIRValue();
Value *Iden = nullptr;
RecurKind RK = RdxDesc.getRecurrenceKind();
- if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK)) {
+ if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
+ RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
// MinMax reduction have the start value as their identify.
if (ScalarPHI) {
Iden = StartV;
@@ -1322,12 +1355,11 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
}
} else {
- Constant *IdenC = RecurrenceDescriptor::getRecurrenceIdentity(
- RK, VecTy->getScalarType(), RdxDesc.getFastMathFlags());
- Iden = IdenC;
+ Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
+ RdxDesc.getFastMathFlags());
if (!ScalarPHI) {
- Iden = ConstantVector::getSplat(State.VF, IdenC);
+ Iden = Builder.CreateVectorSplat(State.VF, Iden);
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(State.CFG.VectorPreHeader->getTerminator());
Constant *Zero = Builder.getInt32(0);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bdf09d15c27f..00ee31007cb7 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1312,7 +1312,7 @@ public:
// The first operand is the address, followed by the stored values, followed
// by an optional mask.
return ArrayRef<VPValue *>(op_begin(), getNumOperands())
- .slice(1, getNumOperands() - (HasMask ? 2 : 1));
+ .slice(1, getNumStoreOperands());
}
/// Generate the wide load or store, and shuffles.
@@ -1325,6 +1325,12 @@ public:
#endif
const InterleaveGroup<Instruction> *getInterleaveGroup() { return IG; }
+
+ /// Returns the number of stored operands of this interleave group. Returns 0
+ /// for load interleave groups.
+ unsigned getNumStoreOperands() const {
+ return getNumOperands() - (HasMask ? 2 : 1);
+ }
};
/// A recipe to represent inloop reduction operations, performing a reduction on
@@ -1508,6 +1514,12 @@ public:
class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
Instruction &Ingredient;
+ // Whether the loaded-from / stored-to addresses are consecutive.
+ bool Consecutive;
+
+ // Whether the consecutive loaded/stored addresses are in reverse order.
+ bool Reverse;
+
void setMask(VPValue *Mask) {
if (!Mask)
return;
@@ -1519,16 +1531,21 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
}
public:
- VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask)
- : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load) {
+ VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
+ bool Consecutive, bool Reverse)
+ : VPRecipeBase(VPWidenMemoryInstructionSC, {Addr}), Ingredient(Load),
+ Consecutive(Consecutive), Reverse(Reverse) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
new VPValue(VPValue::VPVMemoryInstructionSC, &Load, this);
setMask(Mask);
}
VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
- VPValue *StoredValue, VPValue *Mask)
+ VPValue *StoredValue, VPValue *Mask,
+ bool Consecutive, bool Reverse)
: VPRecipeBase(VPWidenMemoryInstructionSC, {Addr, StoredValue}),
- Ingredient(Store) {
+ Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
+ assert((Consecutive || !Reverse) && "Reverse implies consecutive");
setMask(Mask);
}
@@ -1558,6 +1575,13 @@ public:
return getOperand(1); // Stored value is the 2nd, mandatory operand.
}
+ // Return whether the loaded-from / stored-to addresses are consecutive.
+ bool isConsecutive() const { return Consecutive; }
+
+ // Return whether the consecutive loaded/stored addresses are in reverse
+ // order.
+ bool isReverse() const { return Reverse; }
+
/// Generate the wide load/store.
void execute(VPTransformState &State) override;
@@ -1569,11 +1593,11 @@ public:
};
/// A Recipe for widening the canonical induction variable of the vector loop.
-class VPWidenCanonicalIVRecipe : public VPRecipeBase {
+class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
public:
- VPWidenCanonicalIVRecipe() : VPRecipeBase(VPWidenCanonicalIVSC, {}) {
- new VPValue(nullptr, this);
- }
+ VPWidenCanonicalIVRecipe()
+ : VPRecipeBase(VPWidenCanonicalIVSC, {}),
+ VPValue(VPValue::VPVWidenCanonicalIVSC, nullptr, this) {}
~VPWidenCanonicalIVRecipe() override = default;
@@ -2094,6 +2118,10 @@ class VPlan {
/// Holds the VPLoopInfo analysis for this VPlan.
VPLoopInfo VPLInfo;
+ /// Indicates whether it is safe use the Value2VPValue mapping or if the
+ /// mapping cannot be used any longer, because it is stale.
+ bool Value2VPValueEnabled = true;
+
public:
VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
if (Entry)
@@ -2135,6 +2163,10 @@ public:
return BackedgeTakenCount;
}
+ /// Mark the plan to indicate that using Value2VPValue is not safe any
+ /// longer, because it may be stale.
+ void disableValue2VPValue() { Value2VPValueEnabled = false; }
+
void addVF(ElementCount VF) { VFs.insert(VF); }
bool hasVF(ElementCount VF) { return VFs.count(VF); }
@@ -2148,6 +2180,8 @@ public:
void addExternalDef(VPValue *VPVal) { VPExternalDefs.insert(VPVal); }
void addVPValue(Value *V) {
+ assert(Value2VPValueEnabled &&
+ "IR value to VPValue mapping may be out of date!");
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
VPValue *VPV = new VPValue(V);
@@ -2156,25 +2190,39 @@ public:
}
void addVPValue(Value *V, VPValue *VPV) {
+ assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
Value2VPValue[V] = VPV;
}
- VPValue *getVPValue(Value *V) {
+ /// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
+ /// checking whether it is safe to query VPValues using IR Values.
+ VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
+ assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
+ "Value2VPValue mapping may be out of date!");
assert(V && "Trying to get the VPValue of a null Value");
assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
return Value2VPValue[V];
}
- VPValue *getOrAddVPValue(Value *V) {
+ /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
+ /// OverrideAllowed can be used to disable checking whether it is safe to
+ /// query VPValues using IR Values.
+ VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
+ assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
+ "Value2VPValue mapping may be out of date!");
assert(V && "Trying to get or add the VPValue of a null Value");
if (!Value2VPValue.count(V))
addVPValue(V);
return getVPValue(V);
}
- void removeVPValueFor(Value *V) { Value2VPValue.erase(V); }
+ void removeVPValueFor(Value *V) {
+ assert(Value2VPValueEnabled &&
+ "IR value to VPValue mapping may be out of date!");
+ Value2VPValue.erase(V);
+ }
/// Return the VPLoopInfo analysis for this VPlan.
VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
@@ -2244,9 +2292,9 @@ class VPlanPrinter {
return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
}
- const Twine getOrCreateName(const VPBlockBase *Block);
+ Twine getOrCreateName(const VPBlockBase *Block);
- const Twine getUID(const VPBlockBase *Block);
+ Twine getUID(const VPBlockBase *Block);
/// Print the information related to a CFG edge between two VPBlockBases.
void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 52b5ae083d0e..ded5bc04beb5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -31,19 +31,18 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
VPBasicBlock *VPBB = Base->getEntryBasicBlock();
// Introduce each ingredient into VPlan.
- for (auto I = VPBB->begin(), E = VPBB->end(); I != E;) {
- VPRecipeBase *Ingredient = &*I++;
- VPValue *VPV = Ingredient->getVPSingleValue();
+ for (VPRecipeBase &Ingredient : llvm::make_early_inc_range(*VPBB)) {
+ VPValue *VPV = Ingredient.getVPSingleValue();
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
if (DeadInstructions.count(Inst)) {
VPValue DummyValue;
VPV->replaceAllUsesWith(&DummyValue);
- Ingredient->eraseFromParent();
+ Ingredient.eraseFromParent();
continue;
}
VPRecipeBase *NewRecipe = nullptr;
- if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(Ingredient)) {
+ if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
InductionDescriptor II = Inductions.lookup(Phi);
if (II.getKind() == InductionDescriptor::IK_IntInduction ||
@@ -55,25 +54,25 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
continue;
}
} else {
- assert(isa<VPInstruction>(Ingredient) &&
+ assert(isa<VPInstruction>(&Ingredient) &&
"only VPInstructions expected here");
assert(!isa<PHINode>(Inst) && "phis should be handled above");
// Create VPWidenMemoryInstructionRecipe for loads and stores.
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
*Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- nullptr /*Mask*/);
+ nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
*Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- Plan->getOrAddVPValue(Store->getValueOperand()),
- nullptr /*Mask*/);
+ Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/,
+ false /*Consecutive*/, false /*Reverse*/);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
NewRecipe = new VPWidenGEPRecipe(
GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
- NewRecipe = new VPWidenCallRecipe(
- *CI, Plan->mapToVPValues(CI->arg_operands()));
+ NewRecipe =
+ new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()));
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
bool InvariantCond =
SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop);
@@ -85,13 +84,13 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
}
}
- NewRecipe->insertBefore(Ingredient);
+ NewRecipe->insertBefore(&Ingredient);
if (NewRecipe->getNumDefinedValues() == 1)
VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
else
assert(NewRecipe->getNumDefinedValues() == 0 &&
"Only recpies with zero or one defined values expected");
- Ingredient->eraseFromParent();
+ Ingredient.eraseFromParent();
Plan->removeVPValueFor(Inst);
for (auto *Def : NewRecipe->definedValues()) {
Plan->addVPValue(Inst, Def);
@@ -106,44 +105,76 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
bool Changed = false;
// First, collect the operands of all predicated replicate recipes as seeds
// for sinking.
- SetVector<VPValue *> WorkList;
+ SetVector<std::pair<VPBasicBlock *, VPValue *>> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (auto &Recipe : *VPBB) {
auto *RepR = dyn_cast<VPReplicateRecipe>(&Recipe);
if (!RepR || !RepR->isPredicated())
continue;
- WorkList.insert(RepR->op_begin(), RepR->op_end());
+ for (VPValue *Op : RepR->operands())
+ WorkList.insert(std::make_pair(RepR->getParent(), Op));
}
}
// Try to sink each replicate recipe in the worklist.
while (!WorkList.empty()) {
- auto *C = WorkList.pop_back_val();
+ VPBasicBlock *SinkTo;
+ VPValue *C;
+ std::tie(SinkTo, C) = WorkList.pop_back_val();
auto *SinkCandidate = dyn_cast_or_null<VPReplicateRecipe>(C->Def);
- if (!SinkCandidate || SinkCandidate->isUniform())
- continue;
-
- // All users of SinkCandidate must be in the same block in order to perform
- // sinking. Therefore the destination block for sinking must match the block
- // containing the first user.
- auto *FirstUser = dyn_cast<VPRecipeBase>(*SinkCandidate->user_begin());
- if (!FirstUser)
- continue;
- VPBasicBlock *SinkTo = FirstUser->getParent();
- if (SinkCandidate->getParent() == SinkTo ||
+ if (!SinkCandidate || SinkCandidate->isUniform() ||
+ SinkCandidate->getParent() == SinkTo ||
SinkCandidate->mayHaveSideEffects() ||
SinkCandidate->mayReadOrWriteMemory())
continue;
- // All recipe users of the sink candidate must be in the same block SinkTo.
- if (any_of(SinkCandidate->users(), [SinkTo](VPUser *U) {
- auto *UI = dyn_cast<VPRecipeBase>(U);
- return !UI || UI->getParent() != SinkTo;
- }))
+ bool NeedsDuplicating = false;
+ // All recipe users of the sink candidate must be in the same block SinkTo
+ // or all users outside of SinkTo must be uniform-after-vectorization (
+ // i.e., only first lane is used) . In the latter case, we need to duplicate
+ // SinkCandidate. At the moment, we identify such UAV's by looking for the
+ // address operands of widened memory recipes.
+ auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
+ SinkCandidate](VPUser *U) {
+ auto *UI = dyn_cast<VPRecipeBase>(U);
+ if (!UI)
+ return false;
+ if (UI->getParent() == SinkTo)
+ return true;
+ auto *WidenI = dyn_cast<VPWidenMemoryInstructionRecipe>(UI);
+ if (WidenI && WidenI->getAddr() == SinkCandidate) {
+ NeedsDuplicating = true;
+ return true;
+ }
+ return false;
+ };
+ if (!all_of(SinkCandidate->users(), CanSinkWithUser))
continue;
+ if (NeedsDuplicating) {
+ Instruction *I = cast<Instruction>(SinkCandidate->getUnderlyingValue());
+ auto *Clone =
+ new VPReplicateRecipe(I, SinkCandidate->operands(), true, false);
+ // TODO: add ".cloned" suffix to name of Clone's VPValue.
+
+ Clone->insertBefore(SinkCandidate);
+ SmallVector<VPUser *, 4> Users(SinkCandidate->user_begin(),
+ SinkCandidate->user_end());
+ for (auto *U : Users) {
+ auto *UI = cast<VPRecipeBase>(U);
+ if (UI->getParent() == SinkTo)
+ continue;
+
+ for (unsigned Idx = 0; Idx != UI->getNumOperands(); Idx++) {
+ if (UI->getOperand(Idx) != SinkCandidate)
+ continue;
+ UI->setOperand(Idx, Clone);
+ }
+ }
+ }
SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
- WorkList.insert(SinkCandidate->op_begin(), SinkCandidate->op_end());
+ for (VPValue *Op : SinkCandidate->operands())
+ WorkList.insert(std::make_pair(SinkTo, Op));
Changed = true;
}
return Changed;
@@ -234,12 +265,15 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
VPValue *PredInst1 =
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
- for (VPUser *U : Phi1ToMove.getVPSingleValue()->users()) {
+ VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
+ SmallVector<VPUser *> Users(Phi1ToMoveV->user_begin(),
+ Phi1ToMoveV->user_end());
+ for (VPUser *U : Users) {
auto *UI = dyn_cast<VPRecipeBase>(U);
if (!UI || UI->getParent() != Then2)
continue;
for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
- if (Phi1ToMove.getVPSingleValue() != U->getOperand(I))
+ if (Phi1ToMoveV != U->getOperand(I))
continue;
U->setOperand(I, PredInst1);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 6eec8d14de4a..6d6ea4eb30f1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -128,3 +128,33 @@ void VPlanVerifier::verifyHierarchicalCFG(
assert(!TopRegion->getParent() && "VPlan Top Region should have no parent.");
verifyRegionRec(TopRegion);
}
+
+bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
+ auto Iter = depth_first(
+ VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
+ for (const VPBasicBlock *VPBB :
+ VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
+ // Verify that phi-like recipes are at the beginning of the block, with no
+ // other recipes in between.
+ auto RecipeI = VPBB->begin();
+ auto End = VPBB->end();
+ while (RecipeI != End && RecipeI->isPhi())
+ RecipeI++;
+
+ while (RecipeI != End) {
+ if (RecipeI->isPhi() && !isa<VPBlendRecipe>(&*RecipeI)) {
+ errs() << "Found phi-like recipe after non-phi recipe";
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ errs() << ": ";
+ RecipeI->dump();
+ errs() << "after\n";
+ std::prev(RecipeI)->dump();
+#endif
+ return false;
+ }
+ RecipeI++;
+ }
+ }
+ return true;
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.h b/llvm/lib/Transforms/Vectorize/VPlanVerifier.h
index 8e8de441648a..839c24e2c9f4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.h
@@ -26,6 +26,7 @@
namespace llvm {
class VPRegionBlock;
+class VPlan;
/// Struct with utility functions that can be used to check the consistency and
/// invariants of a VPlan, including the components of its H-CFG.
@@ -35,6 +36,12 @@ struct VPlanVerifier {
/// 1. Region/Block verification: Check the Region/Block verification
/// invariants for every region in the H-CFG.
void verifyHierarchicalCFG(const VPRegionBlock *TopRegion) const;
+
+ /// Verify invariants for general VPlans. Currently it checks the following:
+ /// 1. all phi-like recipes must be at the beginning of a block, with no other
+ /// recipes in between. Note that currently there is still an exception for
+ /// VPBlendRecipes.
+ static bool verifyPlanIsValid(const VPlan &Plan);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index d18bcd34620c..57b11e9414ba 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -31,10 +31,12 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
+#define DEBUG_TYPE "vector-combine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "vector-combine"
STATISTIC(NumVecLoad, "Number of vector loads formed");
STATISTIC(NumVecCmp, "Number of vector compares formed");
STATISTIC(NumVecBO, "Number of vector binops formed");
@@ -61,8 +63,10 @@ namespace {
class VectorCombine {
public:
VectorCombine(Function &F, const TargetTransformInfo &TTI,
- const DominatorTree &DT, AAResults &AA, AssumptionCache &AC)
- : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC) {}
+ const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
+ bool ScalarizationOnly)
+ : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC),
+ ScalarizationOnly(ScalarizationOnly) {}
bool run();
@@ -74,12 +78,18 @@ private:
AAResults &AA;
AssumptionCache &AC;
+ /// If true only perform scalarization combines and do not introduce new
+ /// vector operations.
+ bool ScalarizationOnly;
+
+ InstructionWorklist Worklist;
+
bool vectorizeLoadInsert(Instruction &I);
ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
unsigned PreferredExtractIndex) const;
bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
- unsigned Opcode,
+ const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
unsigned PreferredExtractIndex);
void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
@@ -92,14 +102,27 @@ private:
bool foldExtractedCmps(Instruction &I);
bool foldSingleElementStore(Instruction &I);
bool scalarizeLoadExtract(Instruction &I);
+ bool foldShuffleOfBinops(Instruction &I);
+
+ void replaceValue(Value &Old, Value &New) {
+ Old.replaceAllUsesWith(&New);
+ New.takeName(&Old);
+ if (auto *NewI = dyn_cast<Instruction>(&New)) {
+ Worklist.pushUsersToWorkList(*NewI);
+ Worklist.pushValue(NewI);
+ }
+ Worklist.pushValue(&Old);
+ }
+
+ void eraseInstruction(Instruction &I) {
+ for (Value *Op : I.operands())
+ Worklist.pushValue(Op);
+ Worklist.remove(&I);
+ I.eraseFromParent();
+ }
};
} // namespace
-static void replaceValue(Value &Old, Value &New) {
- Old.replaceAllUsesWith(&New);
- New.takeName(&Old);
-}
-
bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// Match insert into fixed vector of scalar value.
// TODO: Handle non-zero insert index.
@@ -284,12 +307,13 @@ ExtractElementInst *VectorCombine::getShuffleExtract(
/// \p ConvertToShuffle to that extract instruction.
bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
ExtractElementInst *Ext1,
- unsigned Opcode,
+ const Instruction &I,
ExtractElementInst *&ConvertToShuffle,
unsigned PreferredExtractIndex) {
assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
isa<ConstantInt>(Ext1->getOperand(1)) &&
"Expected constant extract indexes");
+ unsigned Opcode = I.getOpcode();
Type *ScalarTy = Ext0->getType();
auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
InstructionCost ScalarOpCost, VectorOpCost;
@@ -302,10 +326,11 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
} else {
assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
"Expected a compare");
- ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
- CmpInst::makeCmpResultType(ScalarTy));
- VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
- CmpInst::makeCmpResultType(VecTy));
+ CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
+ ScalarOpCost = TTI.getCmpSelInstrCost(
+ Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
+ VectorOpCost = TTI.getCmpSelInstrCost(
+ Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
}
// Get cost estimates for the extract elements. These costs will factor into
@@ -480,8 +505,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
ExtractElementInst *ExtractToChange;
- if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
- InsertIndex))
+ if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
return false;
if (ExtractToChange) {
@@ -501,6 +525,8 @@ bool VectorCombine::foldExtractExtract(Instruction &I) {
else
foldExtExtBinop(Ext0, Ext1, I);
+ Worklist.push(Ext0);
+ Worklist.push(Ext1);
return true;
}
@@ -623,8 +649,11 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
unsigned Opcode = I.getOpcode();
InstructionCost ScalarOpCost, VectorOpCost;
if (IsCmp) {
- ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
- VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
+ CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
+ ScalarOpCost = TTI.getCmpSelInstrCost(
+ Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
+ VectorOpCost = TTI.getCmpSelInstrCost(
+ Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
} else {
ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
@@ -724,7 +753,10 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
InstructionCost OldCost =
TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
- OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
+ OldCost +=
+ TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
+ CmpInst::makeCmpResultType(I0->getType()), Pred) *
+ 2;
OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
// The proposed vector pattern is:
@@ -733,7 +765,8 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
- InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
+ InstructionCost NewCost = TTI.getCmpSelInstrCost(
+ CmpOpcode, X->getType(), CmpInst::makeCmpResultType(X->getType()), Pred);
SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
ShufMask[CheapIndex] = ExpensiveIndex;
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy,
@@ -774,18 +807,98 @@ static bool isMemModifiedBetween(BasicBlock::iterator Begin,
});
}
+/// Helper class to indicate whether a vector index can be safely scalarized and
+/// if a freeze needs to be inserted.
+class ScalarizationResult {
+ enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
+
+ StatusTy Status;
+ Value *ToFreeze;
+
+ ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
+ : Status(Status), ToFreeze(ToFreeze) {}
+
+public:
+ ScalarizationResult(const ScalarizationResult &Other) = default;
+ ~ScalarizationResult() {
+ assert(!ToFreeze && "freeze() not called with ToFreeze being set");
+ }
+
+ static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
+ static ScalarizationResult safe() { return {StatusTy::Safe}; }
+ static ScalarizationResult safeWithFreeze(Value *ToFreeze) {
+ return {StatusTy::SafeWithFreeze, ToFreeze};
+ }
+
+ /// Returns true if the index can be scalarize without requiring a freeze.
+ bool isSafe() const { return Status == StatusTy::Safe; }
+ /// Returns true if the index cannot be scalarized.
+ bool isUnsafe() const { return Status == StatusTy::Unsafe; }
+ /// Returns true if the index can be scalarize, but requires inserting a
+ /// freeze.
+ bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
+
+ /// Reset the state of Unsafe and clear ToFreze if set.
+ void discard() {
+ ToFreeze = nullptr;
+ Status = StatusTy::Unsafe;
+ }
+
+ /// Freeze the ToFreeze and update the use in \p User to use it.
+ void freeze(IRBuilder<> &Builder, Instruction &UserI) {
+ assert(isSafeWithFreeze() &&
+ "should only be used when freezing is required");
+ assert(is_contained(ToFreeze->users(), &UserI) &&
+ "UserI must be a user of ToFreeze");
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(cast<Instruction>(&UserI));
+ Value *Frozen =
+ Builder.CreateFreeze(ToFreeze, ToFreeze->getName() + ".frozen");
+ for (Use &U : make_early_inc_range((UserI.operands())))
+ if (U.get() == ToFreeze)
+ U.set(Frozen);
+
+ ToFreeze = nullptr;
+ }
+};
+
/// Check if it is legal to scalarize a memory access to \p VecTy at index \p
/// Idx. \p Idx must access a valid vector element.
-static bool canScalarizeAccess(FixedVectorType *VecTy, Value *Idx,
- Instruction *CtxI, AssumptionCache &AC) {
- if (auto *C = dyn_cast<ConstantInt>(Idx))
- return C->getValue().ult(VecTy->getNumElements());
+static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy,
+ Value *Idx, Instruction *CtxI,
+ AssumptionCache &AC,
+ const DominatorTree &DT) {
+ if (auto *C = dyn_cast<ConstantInt>(Idx)) {
+ if (C->getValue().ult(VecTy->getNumElements()))
+ return ScalarizationResult::safe();
+ return ScalarizationResult::unsafe();
+ }
- APInt Zero(Idx->getType()->getScalarSizeInBits(), 0);
- APInt MaxElts(Idx->getType()->getScalarSizeInBits(), VecTy->getNumElements());
+ unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
+ APInt Zero(IntWidth, 0);
+ APInt MaxElts(IntWidth, VecTy->getNumElements());
ConstantRange ValidIndices(Zero, MaxElts);
- ConstantRange IdxRange = computeConstantRange(Idx, true, &AC, CtxI, 0);
- return ValidIndices.contains(IdxRange);
+ ConstantRange IdxRange(IntWidth, true);
+
+ if (isGuaranteedNotToBePoison(Idx, &AC)) {
+ if (ValidIndices.contains(computeConstantRange(Idx, true, &AC, CtxI, &DT)))
+ return ScalarizationResult::safe();
+ return ScalarizationResult::unsafe();
+ }
+
+ // If the index may be poison, check if we can insert a freeze before the
+ // range of the index is restricted.
+ Value *IdxBase;
+ ConstantInt *CI;
+ if (match(Idx, m_And(m_Value(IdxBase), m_ConstantInt(CI)))) {
+ IdxRange = IdxRange.binaryAnd(CI->getValue());
+ } else if (match(Idx, m_URem(m_Value(IdxBase), m_ConstantInt(CI)))) {
+ IdxRange = IdxRange.urem(CI->getValue());
+ }
+
+ if (ValidIndices.contains(IdxRange))
+ return ScalarizationResult::safeWithFreeze(IdxBase);
+ return ScalarizationResult::unsafe();
}
/// The memory operation on a vector of \p ScalarType had alignment of
@@ -833,12 +946,17 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
// modified between, vector type matches store size, and index is inbounds.
if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
!DL.typeSizeEqualsStoreSize(Load->getType()) ||
- !canScalarizeAccess(VecTy, Idx, Load, AC) ||
- SrcAddr != SI->getPointerOperand()->stripPointerCasts() ||
+ SrcAddr != SI->getPointerOperand()->stripPointerCasts())
+ return false;
+
+ auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
+ if (ScalarizableIdx.isUnsafe() ||
isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
MemoryLocation::get(SI), AA))
return false;
+ if (ScalarizableIdx.isSafeWithFreeze())
+ ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
Value *GEP = Builder.CreateInBoundsGEP(
SI->getValueOperand()->getType(), SI->getPointerOperand(),
{ConstantInt::get(Idx->getType(), 0), Idx});
@@ -849,8 +967,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
DL);
NSI->setAlignment(ScalarOpAlignment);
replaceValue(I, *NSI);
- // Need erasing the store manually.
- I.eraseFromParent();
+ eraseInstruction(I);
return true;
}
@@ -860,11 +977,10 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) {
/// Try to scalarize vector loads feeding extractelement instructions.
bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
Value *Ptr;
- Value *Idx;
- if (!match(&I, m_ExtractElt(m_Load(m_Value(Ptr)), m_Value(Idx))))
+ if (!match(&I, m_Load(m_Value(Ptr))))
return false;
- auto *LI = cast<LoadInst>(I.getOperand(0));
+ auto *LI = cast<LoadInst>(&I);
const DataLayout &DL = I.getModule()->getDataLayout();
if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
return false;
@@ -909,8 +1025,12 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
else if (LastCheckedInst->comesBefore(UI))
LastCheckedInst = UI;
- if (!canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC))
+ auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
+ if (!ScalarIdx.isSafe()) {
+ // TODO: Freeze index if it is safe to do so.
+ ScalarIdx.discard();
return false;
+ }
auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
OriginalCost +=
@@ -946,6 +1066,60 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
return true;
}
+/// Try to convert "shuffle (binop), (binop)" with a shared binop operand into
+/// "binop (shuffle), (shuffle)".
+bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
+ auto *VecTy = dyn_cast<FixedVectorType>(I.getType());
+ if (!VecTy)
+ return false;
+
+ BinaryOperator *B0, *B1;
+ ArrayRef<int> Mask;
+ if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)),
+ m_Mask(Mask))) ||
+ B0->getOpcode() != B1->getOpcode() || B0->getType() != VecTy)
+ return false;
+
+ // Try to replace a binop with a shuffle if the shuffle is not costly.
+ // The new shuffle will choose from a single, common operand, so it may be
+ // cheaper than the existing two-operand shuffle.
+ SmallVector<int> UnaryMask = createUnaryMask(Mask, Mask.size());
+ Instruction::BinaryOps Opcode = B0->getOpcode();
+ InstructionCost BinopCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
+ InstructionCost ShufCost = TTI.getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc, VecTy, UnaryMask);
+ if (ShufCost > BinopCost)
+ return false;
+
+ // If we have something like "add X, Y" and "add Z, X", swap ops to match.
+ Value *X = B0->getOperand(0), *Y = B0->getOperand(1);
+ Value *Z = B1->getOperand(0), *W = B1->getOperand(1);
+ if (BinaryOperator::isCommutative(Opcode) && X != Z && Y != W)
+ std::swap(X, Y);
+
+ Value *Shuf0, *Shuf1;
+ if (X == Z) {
+ // shuf (bo X, Y), (bo X, W) --> bo (shuf X), (shuf Y, W)
+ Shuf0 = Builder.CreateShuffleVector(X, UnaryMask);
+ Shuf1 = Builder.CreateShuffleVector(Y, W, Mask);
+ } else if (Y == W) {
+ // shuf (bo X, Y), (bo Z, Y) --> bo (shuf X, Z), (shuf Y)
+ Shuf0 = Builder.CreateShuffleVector(X, Z, Mask);
+ Shuf1 = Builder.CreateShuffleVector(Y, UnaryMask);
+ } else {
+ return false;
+ }
+
+ Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
+ // Intersect flags from the old binops.
+ if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {
+ NewInst->copyIRFlags(B0);
+ NewInst->andIRFlags(B1);
+ }
+ replaceValue(I, *NewBO);
+ return true;
+}
+
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
bool VectorCombine::run() {
@@ -957,29 +1131,43 @@ bool VectorCombine::run() {
return false;
bool MadeChange = false;
+ auto FoldInst = [this, &MadeChange](Instruction &I) {
+ Builder.SetInsertPoint(&I);
+ if (!ScalarizationOnly) {
+ MadeChange |= vectorizeLoadInsert(I);
+ MadeChange |= foldExtractExtract(I);
+ MadeChange |= foldBitcastShuf(I);
+ MadeChange |= foldExtractedCmps(I);
+ MadeChange |= foldShuffleOfBinops(I);
+ }
+ MadeChange |= scalarizeBinopOrCmp(I);
+ MadeChange |= scalarizeLoadExtract(I);
+ MadeChange |= foldSingleElementStore(I);
+ };
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
if (!DT.isReachableFromEntry(&BB))
continue;
// Use early increment range so that we can erase instructions in loop.
for (Instruction &I : make_early_inc_range(BB)) {
- if (isa<DbgInfoIntrinsic>(I))
+ if (I.isDebugOrPseudoInst())
continue;
- Builder.SetInsertPoint(&I);
- MadeChange |= vectorizeLoadInsert(I);
- MadeChange |= foldExtractExtract(I);
- MadeChange |= foldBitcastShuf(I);
- MadeChange |= scalarizeBinopOrCmp(I);
- MadeChange |= foldExtractedCmps(I);
- MadeChange |= scalarizeLoadExtract(I);
- MadeChange |= foldSingleElementStore(I);
+ FoldInst(I);
}
}
- // We're done with transforms, so remove dead instructions.
- if (MadeChange)
- for (BasicBlock &BB : F)
- SimplifyInstructionsInBlock(&BB);
+ while (!Worklist.isEmpty()) {
+ Instruction *I = Worklist.removeOne();
+ if (!I)
+ continue;
+
+ if (isInstructionTriviallyDead(I)) {
+ eraseInstruction(*I);
+ continue;
+ }
+
+ FoldInst(*I);
+ }
return MadeChange;
}
@@ -1014,7 +1202,7 @@ public:
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- VectorCombine Combiner(F, TTI, DT, AA, AC);
+ VectorCombine Combiner(F, TTI, DT, AA, AC, false);
return Combiner.run();
}
};
@@ -1038,7 +1226,7 @@ PreservedAnalyses VectorCombinePass::run(Function &F,
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
AAResults &AA = FAM.getResult<AAManager>(F);
- VectorCombine Combiner(F, TTI, DT, AA, AC);
+ VectorCombine Combiner(F, TTI, DT, AA, AC, ScalarizationOnly);
if (!Combiner.run())
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
index 6af7bc699d05..1be1d34417eb 100644
--- a/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
+++ b/llvm/lib/WindowsManifest/WindowsManifestMerger.cpp
@@ -35,7 +35,7 @@ void WindowsManifestError::log(raw_ostream &OS) const { OS << Msg; }
class WindowsManifestMerger::WindowsManifestMergerImpl {
public:
~WindowsManifestMergerImpl();
- Error merge(const MemoryBuffer &Manifest);
+ Error merge(MemoryBufferRef Manifest);
std::unique_ptr<MemoryBuffer> getMergedManifest();
private:
@@ -620,7 +620,7 @@ WindowsManifestMerger::WindowsManifestMergerImpl::~WindowsManifestMergerImpl() {
}
Error WindowsManifestMerger::WindowsManifestMergerImpl::merge(
- const MemoryBuffer &Manifest) {
+ MemoryBufferRef Manifest) {
if (Merged)
return make_error<WindowsManifestError>(
"merge after getMergedManifest is not supported");
@@ -690,7 +690,7 @@ WindowsManifestMerger::WindowsManifestMergerImpl::~WindowsManifestMergerImpl() {
}
Error WindowsManifestMerger::WindowsManifestMergerImpl::merge(
- const MemoryBuffer &Manifest) {
+ MemoryBufferRef Manifest) {
return make_error<WindowsManifestError>("no libxml2");
}
@@ -708,7 +708,7 @@ WindowsManifestMerger::WindowsManifestMerger()
WindowsManifestMerger::~WindowsManifestMerger() {}
-Error WindowsManifestMerger::merge(const MemoryBuffer &Manifest) {
+Error WindowsManifestMerger::merge(MemoryBufferRef Manifest) {
return Impl->merge(Manifest);
}
diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp
index e6534e5a7be7..c60efa465bb6 100644
--- a/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/llvm/lib/XRay/InstrumentationMap.cpp
@@ -86,10 +86,8 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
"Failed to find XRay instrumentation map.",
std::make_error_code(std::errc::executable_format_error));
- if (Expected<StringRef> E = I->getContents())
- Contents = *E;
- else
- return E.takeError();
+ if (Error E = I->getContents().moveInto(Contents))
+ return E;
RelocMap Relocs;
if (ObjFile.getBinary()->isELF()) {
@@ -190,7 +188,7 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
SledEntry::FunctionKinds::TAIL,
SledEntry::FunctionKinds::LOG_ARGS_ENTER,
SledEntry::FunctionKinds::CUSTOM_EVENT};
- if (Kind >= sizeof(Kinds))
+ if (Kind >= sizeof(Kinds) / sizeof(Kinds[0]))
return errorCodeToError(
std::make_error_code(std::errc::executable_format_error));
Entry.Kind = Kinds[Kind];
diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp
index 2601ee318f7d..451e1cd98ee8 100644
--- a/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -269,7 +269,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function *> &Funcs) {
std::vector<GlobalValue *> ToRemove;
// First, remove aliases to functions we're about to purge.
for (GlobalAlias &Alias : M->aliases()) {
- GlobalObject *Root = Alias.getBaseObject();
+ GlobalObject *Root = Alias.getAliaseeObject();
Function *F = dyn_cast_or_null<Function>(Root);
if (F) {
if (Functions.count(F))
@@ -358,8 +358,7 @@ bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
for (auto A : Attrs)
AB.addAttribute(A);
AttributeList NewAttrs;
- NewAttrs =
- NewAttrs.addAttributes(BD.getContext(), AttributeList::FunctionIndex, AB);
+ NewAttrs = NewAttrs.addFnAttributes(BD.getContext(), AB);
// Set this new list of attributes on the function.
F->setAttributes(NewAttrs);
@@ -375,7 +374,7 @@ bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
// Pass along the set of attributes that caused the crash.
Attrs.clear();
- for (Attribute A : NewAttrs.getFnAttributes()) {
+ for (Attribute A : NewAttrs.getFnAttrs()) {
Attrs.push_back(A);
}
return true;
@@ -787,14 +786,13 @@ bool ReduceCrashingInstructions::TestInsts(
for (Module::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
- for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
- Instruction *Inst = &*I++;
- if (!Instructions.count(Inst) && !Inst->isTerminator() &&
- !Inst->isEHPad() && !Inst->getType()->isTokenTy() &&
- !Inst->isSwiftError()) {
- if (!Inst->getType()->isVoidTy())
- Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
- Inst->eraseFromParent();
+ for (Instruction &Inst : llvm::make_early_inc_range(*FI)) {
+ if (!Instructions.count(&Inst) && !Inst.isTerminator() &&
+ !Inst.isEHPad() && !Inst.getType()->isTokenTy() &&
+ !Inst.isSwiftError()) {
+ if (!Inst.getType()->isVoidTy())
+ Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
+ Inst.eraseFromParent();
}
}
@@ -1232,7 +1230,7 @@ static Error DebugACrash(BugDriver &BD, BugTester TestFn) {
assert(Fn && "Could not find function?");
std::vector<Attribute> Attrs;
- for (Attribute A : Fn->getAttributes().getFnAttributes())
+ for (Attribute A : Fn->getAttributes().getFnAttrs())
Attrs.push_back(A);
OldSize += Attrs.size();
diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp
index ca78735202fc..848baf90965b 100644
--- a/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -223,8 +223,8 @@ bool BugDriver::runPasses(Module &Program,
for (std::vector<std::string>::const_iterator I = pass_args.begin(),
E = pass_args.end();
I != E; ++I)
- Args.push_back(I->c_str());
- Args.push_back(Temp->TmpName.c_str());
+ Args.push_back(*I);
+ Args.push_back(Temp->TmpName);
Args.append(ExtraArgs.begin(), ExtraArgs.end());
LLVM_DEBUG(errs() << "\nAbout to run:\t";
diff --git a/llvm/tools/bugpoint/ToolRunner.cpp b/llvm/tools/bugpoint/ToolRunner.cpp
index b81ab07980dd..d3111e574e7c 100644
--- a/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/llvm/tools/bugpoint/ToolRunner.cpp
@@ -192,7 +192,7 @@ Expected<int> LLI::ExecuteProgram(const std::string &Bitcode,
outs() << "<lli>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = LLIArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = LLIArgs.size(); i != e; ++i) errs()
<< " " << LLIArgs[i];
errs() << "\n";);
return RunProgramWithTimeout(LLIPath, LLIArgs, InputFile, OutputFile,
@@ -460,7 +460,7 @@ Expected<CC::FileType> LLC::OutputCode(const std::string &Bitcode,
outs() << (UseIntegratedAssembler ? "<llc-ia>" : "<llc>");
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = LLCArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = LLCArgs.size(); i != e; ++i) errs()
<< " " << LLCArgs[i];
errs() << "\n";);
if (RunProgramWithTimeout(LLCPath, LLCArgs, "", "", "", Timeout, MemoryLimit))
@@ -578,7 +578,7 @@ Expected<int> JIT::ExecuteProgram(const std::string &Bitcode,
outs() << "<jit>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = JITArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = JITArgs.size(); i != e; ++i) errs()
<< " " << JITArgs[i];
errs() << "\n";);
LLVM_DEBUG(errs() << "\nSending output to " << OutputFile << "\n");
@@ -685,7 +685,7 @@ Expected<int> CC::ExecuteProgram(const std::string &ProgramFile,
outs() << "<CC>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = CCArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = CCArgs.size(); i != e; ++i) errs()
<< " " << CCArgs[i];
errs() << "\n";);
if (RunProgramWithTimeout(CCPath, CCArgs, "", "", ""))
@@ -733,7 +733,7 @@ Expected<int> CC::ExecuteProgram(const std::string &ProgramFile,
outs().flush();
LLVM_DEBUG(
errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = ProgramArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = ProgramArgs.size(); i != e; ++i) errs()
<< " " << ProgramArgs[i];
errs() << "\n";);
@@ -829,7 +829,7 @@ Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
outs() << "<CC>";
outs().flush();
LLVM_DEBUG(errs() << "\nAbout to run:\t";
- for (unsigned i = 0, e = CCArgs.size() - 1; i != e; ++i) errs()
+ for (unsigned i = 0, e = CCArgs.size(); i != e; ++i) errs()
<< " " << CCArgs[i];
errs() << "\n";);
if (RunProgramWithTimeout(CCPath, CCArgs, "", "", ""))
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index 6a1e2bae2096..9d80f062c8f9 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/CommandFlags.h"
@@ -36,6 +37,7 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
@@ -47,8 +49,8 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -82,6 +84,19 @@ TimeCompilations("time-compilations", cl::Hidden, cl::init(1u),
cl::value_desc("N"),
cl::desc("Repeat compilation N times for timing"));
+static cl::opt<bool> TimeTrace("time-trace", cl::desc("Record time trace"));
+
+static cl::opt<unsigned> TimeTraceGranularity(
+ "time-trace-granularity",
+ cl::desc(
+ "Minimum time granularity (in microseconds) traced by time profiler"),
+ cl::init(500), cl::Hidden);
+
+static cl::opt<std::string>
+ TimeTraceFile("time-trace-file",
+ cl::desc("Specify time trace file destination"),
+ cl::value_desc("filename"));
+
static cl::opt<std::string>
BinutilsVersion("binutils-version", cl::Hidden,
cl::desc("Produced object files can use all ELF features "
@@ -201,8 +216,7 @@ static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass(
static int compileModule(char **, LLVMContext &);
-LLVM_ATTRIBUTE_NORETURN static void reportError(Twine Msg,
- StringRef Filename = "") {
+[[noreturn]] static void reportError(Twine Msg, StringRef Filename = "") {
SmallString<256> Prefix;
if (!Filename.empty()) {
if (Filename == "-")
@@ -213,7 +227,7 @@ LLVM_ATTRIBUTE_NORETURN static void reportError(Twine Msg,
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN static void reportError(Error Err, StringRef Filename) {
+[[noreturn]] static void reportError(Error Err, StringRef Filename) {
assert(Err);
handleAllErrors(createFileError(Filename, std::move(Err)),
[&](const ErrorInfoBase &EI) { reportError(EI.message()); });
@@ -330,8 +344,6 @@ int main(int argc, char **argv) {
// Enable debug stream buffering.
EnableDebugBuffering = true;
- LLVMContext Context;
-
// Initialize targets first, so that --version shows registered targets.
InitializeAllTargets();
InitializeAllTargetMCs();
@@ -366,6 +378,21 @@ int main(int argc, char **argv) {
cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
+ if (TimeTrace)
+ timeTraceProfilerInitialize(TimeTraceGranularity, argv[0]);
+ auto TimeTraceScopeExit = make_scope_exit([]() {
+ if (TimeTrace) {
+ if (auto E = timeTraceProfilerWrite(TimeTraceFile, OutputFilename)) {
+ handleAllErrors(std::move(E), [&](const StringError &SE) {
+ errs() << SE.getMessage() << "\n";
+ });
+ return;
+ }
+ timeTraceProfilerCleanup();
+ }
+ });
+
+ LLVMContext Context;
Context.setDiscardValueNames(DiscardValueNames);
// Set a diagnostic handler that doesn't exit on the first error
diff --git a/llvm/tools/lli/ChildTarget/ChildTarget.cpp b/llvm/tools/lli/ChildTarget/ChildTarget.cpp
index 5772baca1d09..cf1b03a141c5 100644
--- a/llvm/tools/lli/ChildTarget/ChildTarget.cpp
+++ b/llvm/tools/lli/ChildTarget/ChildTarget.cpp
@@ -1,69 +1,76 @@
-#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h"
-#include "llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h"
-#include "llvm/Support/Debug.h"
+//===----------- ChildTarget.cpp - Out-of-proc executor for lli -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple out-of-process executor for lli.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h"
#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Process.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
#include <sstream>
-#include "../RemoteJITUtils.h"
-
using namespace llvm;
using namespace llvm::orc;
-using namespace llvm::sys;
-
-#ifdef __x86_64__
-typedef OrcX86_64_SysV HostOrcArch;
-#else
-typedef OrcGenericABI HostOrcArch;
-#endif
ExitOnError ExitOnErr;
int main(int argc, char *argv[]) {
+#if LLVM_ENABLE_THREADS
if (argc != 3) {
errs() << "Usage: " << argv[0] << " <input fd> <output fd>\n";
return 1;
}
- ExitOnErr.setBanner(std::string(argv[0]) + ":");
-
- int InFD;
- int OutFD;
- {
- std::istringstream InFDStream(argv[1]), OutFDStream(argv[2]);
- InFDStream >> InFD;
- OutFDStream >> OutFD;
- }
-
if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
errs() << "Error loading program symbols.\n";
return 1;
}
- auto SymbolLookup = [](const std::string &Name) {
- return RTDyldMemoryManager::getSymbolAddressInProcess(Name);
- };
+ ExitOnErr.setBanner(std::string(argv[0]) + ": ");
- auto RegisterEHFrames = [](uint8_t *Addr, uint32_t Size) {
- RTDyldMemoryManager::registerEHFramesInProcess(Addr, Size);
- };
-
- auto DeregisterEHFrames = [](uint8_t *Addr, uint32_t Size) {
- RTDyldMemoryManager::deregisterEHFramesInProcess(Addr, Size);
- };
-
- shared::FDRawByteChannel Channel(InFD, OutFD);
- typedef remote::OrcRemoteTargetServer<shared::FDRawByteChannel, HostOrcArch>
- JITServer;
- JITServer Server(Channel, SymbolLookup, RegisterEHFrames, DeregisterEHFrames);
-
- while (!Server.receivedTerminate())
- ExitOnErr(Server.handleOne());
+ int InFD = 0;
+ int OutFD = 0;
+ {
+ std::istringstream InFDStream(argv[1]), OutFDStream(argv[2]);
+ InFDStream >> InFD;
+ OutFDStream >> OutFD;
+ }
- close(InFD);
- close(OutFD);
+ auto Server =
+ ExitOnErr(SimpleRemoteEPCServer::Create<FDSimpleRemoteEPCTransport>(
+ [](SimpleRemoteEPCServer::Setup &S) -> Error {
+ S.setDispatcher(
+ std::make_unique<SimpleRemoteEPCServer::ThreadDispatcher>());
+ S.bootstrapSymbols() =
+ SimpleRemoteEPCServer::defaultBootstrapSymbols();
+ S.services().push_back(
+ std::make_unique<rt_bootstrap::SimpleExecutorMemoryManager>());
+ return Error::success();
+ },
+ InFD, OutFD));
+
+ ExitOnErr(Server->waitForDisconnect());
return 0;
+
+#else
+ errs() << argv[0]
+ << " error: this tool requires threads, but LLVM was "
+ "built with LLVM_ENABLE_THREADS=Off\n";
+ return 1;
+#endif
}
diff --git a/llvm/tools/lli/RemoteJITUtils.h b/llvm/tools/lli/ForwardingMemoryManager.h
index cc8d034f62a5..99a545e60de4 100644
--- a/llvm/tools/lli/RemoteJITUtils.h
+++ b/llvm/tools/lli/ForwardingMemoryManager.h
@@ -10,21 +10,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H
-#define LLVM_TOOLS_LLI_REMOTEJITUTILS_H
+#ifndef LLVM_TOOLS_LLI_FORWARDINGMEMORYMANAGER_H
+#define LLVM_TOOLS_LLI_FORWARDINGMEMORYMANAGER_H
-#include "llvm/ExecutionEngine/Orc/Shared/FDRawByteChannel.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include <mutex>
-
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#else
-#include <io.h>
-#endif
-
-// launch the remote process (see lli.cpp) and return a channel to it.
-std::unique_ptr<llvm::orc::shared::FDRawByteChannel> launchRemote();
namespace llvm {
@@ -70,9 +60,7 @@ public:
MemMgr->registerEHFrames(Addr, LoadAddr, Size);
}
- void deregisterEHFrames() override {
- MemMgr->deregisterEHFrames();
- }
+ void deregisterEHFrames() override { MemMgr->deregisterEHFrames(); }
bool finalizeMemory(std::string *ErrMsg = nullptr) override {
return MemMgr->finalizeMemory(ErrMsg);
@@ -90,8 +78,7 @@ public:
return Resolver->findSymbol(Name);
}
- JITSymbol
- findSymbolInLogicalDylib(const std::string &Name) override {
+ JITSymbol findSymbolInLogicalDylib(const std::string &Name) override {
return Resolver->findSymbolInLogicalDylib(Name);
}
@@ -100,17 +87,31 @@ private:
std::shared_ptr<LegacyJITSymbolResolver> Resolver;
};
-template <typename RemoteT>
class RemoteResolver : public LegacyJITSymbolResolver {
public:
-
- RemoteResolver(RemoteT &R) : R(R) {}
+ static Expected<std::unique_ptr<RemoteResolver>>
+ Create(orc::ExecutorProcessControl &EPC) {
+ auto DylibMgr =
+ orc::EPCGenericDylibManager::CreateWithDefaultBootstrapSymbols(EPC);
+ if (!DylibMgr)
+ return DylibMgr.takeError();
+ auto H = DylibMgr->open("", 0);
+ if (!H)
+ return H.takeError();
+ return std::unique_ptr<RemoteResolver>(
+ new RemoteResolver(std::move(*DylibMgr), std::move(*H)));
+ }
JITSymbol findSymbol(const std::string &Name) override {
- if (auto Addr = R.getSymbolAddress(Name))
- return JITSymbol(*Addr, JITSymbolFlags::Exported);
- else
- return Addr.takeError();
+ orc::RemoteSymbolLookupSet R;
+ R.push_back({std::move(Name), false});
+ if (auto Addrs = DylibMgr.lookup(H, R)) {
+ if (Addrs->size() != 1)
+ return make_error<StringError>("Unexpected remote lookup result",
+ inconvertibleErrorCode());
+ return JITSymbol(Addrs->front().getValue(), JITSymbolFlags::Exported);
+ } else
+ return Addrs.takeError();
}
JITSymbol findSymbolInLogicalDylib(const std::string &Name) override {
@@ -118,8 +119,13 @@ public:
}
public:
- RemoteT &R;
+ RemoteResolver(orc::EPCGenericDylibManager DylibMgr,
+ orc::tpctypes::DylibHandle H)
+ : DylibMgr(std::move(DylibMgr)), H(std::move(H)) {}
+
+ orc::EPCGenericDylibManager DylibMgr;
+ orc::tpctypes::DylibHandle H;
};
-}
+} // namespace llvm
-#endif
+#endif // LLVM_TOOLS_LLI_FORWARDINGMEMORYMANAGER_H
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index af614c01b9a8..d20daa07196b 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "ExecutionUtils.h"
-#include "RemoteJITUtils.h"
+#include "ForwardingMemoryManager.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeReader.h"
@@ -30,11 +30,12 @@
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
+#include "llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h"
@@ -68,6 +69,12 @@
#include "llvm/Transforms/Instrumentation.h"
#include <cerrno>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
#ifdef __CYGWIN__
#include <cygwin/version.h>
#if defined(CYGWIN_VERSION_DLL_MAJOR) && CYGWIN_VERSION_DLL_MAJOR<1007
@@ -348,13 +355,12 @@ private:
return false;
std::string CacheSubdir = ModID.substr(PrefixLength);
-#if defined(_WIN32)
- // Transform "X:\foo" => "/X\foo" for convenience.
- if (isalpha(CacheSubdir[0]) && CacheSubdir[1] == ':') {
+ // Transform "X:\foo" => "/X\foo" for convenience on Windows.
+ if (is_style_windows(llvm::sys::path::Style::native) &&
+ isalpha(CacheSubdir[0]) && CacheSubdir[1] == ':') {
CacheSubdir[1] = CacheSubdir[0];
CacheSubdir[0] = '/';
}
-#endif
CacheName = CacheDir + CacheSubdir;
size_t pos = CacheName.rfind('.');
@@ -410,8 +416,7 @@ CodeGenOpt::Level getOptLevel() {
llvm_unreachable("Unrecognized opt level.");
}
-LLVM_ATTRIBUTE_NORETURN
-static void reportError(SMDiagnostic Err, const char *ProgName) {
+[[noreturn]] static void reportError(SMDiagnostic Err, const char *ProgName) {
Err.print(ProgName, errs());
exit(1);
}
@@ -419,6 +424,7 @@ static void reportError(SMDiagnostic Err, const char *ProgName) {
Error loadDylibs();
int runOrcJIT(const char *ProgName);
void disallowOrcOptions();
+Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote();
//===----------------------------------------------------------------------===//
// main Driver function
@@ -659,6 +665,10 @@ int main(int argc, char **argv, char * const *envp) {
#endif
}
+ std::unique_ptr<orc::ExecutorProcessControl> EPC =
+ RemoteMCJIT ? ExitOnErr(launchRemote())
+ : ExitOnErr(orc::SelfExecutorProcessControl::Create());
+
if (!RemoteMCJIT) {
// If the program doesn't explicitly call exit, we will need the Exit
// function later on to make an explicit call, so get the function now.
@@ -709,22 +719,10 @@ int main(int argc, char **argv, char * const *envp) {
// it couldn't. This is a limitation of the LLI implementation, not the
// MCJIT itself. FIXME.
- // Lanch the remote process and get a channel to it.
- std::unique_ptr<orc::shared::FDRawByteChannel> C = launchRemote();
- if (!C) {
- WithColor::error(errs(), argv[0]) << "failed to launch remote JIT.\n";
- exit(1);
- }
-
- // Create a remote target client running over the channel.
- llvm::orc::ExecutionSession ES(
- std::make_unique<orc::UnsupportedExecutorProcessControl>());
- ES.setErrorReporter([&](Error Err) { ExitOnErr(std::move(Err)); });
- typedef orc::remote::OrcRemoteTargetClient MyRemote;
- auto R = ExitOnErr(MyRemote::Create(*C, ES));
-
// Create a remote memory manager.
- auto RemoteMM = ExitOnErr(R->createRemoteMemoryManager());
+ auto RemoteMM = ExitOnErr(
+ orc::EPCGenericRTDyldMemoryManager::CreateWithDefaultBootstrapSymbols(
+ *EPC));
// Forward MCJIT's memory manager calls to the remote memory manager.
static_cast<ForwardingMemoryManager*>(RTDyldMM)->setMemMgr(
@@ -732,16 +730,16 @@ int main(int argc, char **argv, char * const *envp) {
// Forward MCJIT's symbol resolution calls to the remote.
static_cast<ForwardingMemoryManager *>(RTDyldMM)->setResolver(
- std::make_unique<RemoteResolver<MyRemote>>(*R));
-
+ ExitOnErr(RemoteResolver::Create(*EPC)));
// Grab the target address of the JIT'd main function on the remote and call
// it.
// FIXME: argv and envp handling.
- JITTargetAddress Entry = EE->getFunctionAddress(EntryFn->getName().str());
+ auto Entry =
+ orc::ExecutorAddr(EE->getFunctionAddress(EntryFn->getName().str()));
EE->finalizeObject();
LLVM_DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at 0x"
- << format("%llx", Entry) << "\n");
- Result = ExitOnErr(R->callIntVoid(Entry));
+ << format("%llx", Entry.getValue()) << "\n");
+ Result = ExitOnErr(EPC->runAsMain(Entry, {}));
// Like static constructors, the remote target MCJIT support doesn't handle
// this yet. It could. FIXME.
@@ -752,7 +750,7 @@ int main(int argc, char **argv, char * const *envp) {
EE.reset();
// Signal the remote target that we're done JITing.
- ExitOnErr(R->terminateSession());
+ ExitOnErr(EPC->disconnect());
}
return Result;
@@ -1062,7 +1060,8 @@ int runOrcJIT(const char *ProgName) {
if (EPC) {
// ExecutorProcessControl-based execution with JITLink.
- Result = ExitOnErr(EPC->runAsMain(MainSym.getAddress(), InputArgv));
+ Result = ExitOnErr(
+ EPC->runAsMain(orc::ExecutorAddr(MainSym.getAddress()), InputArgv));
} else {
// Manual in-process execution with RuntimeDyld.
using MainFnTy = int(int, char *[]);
@@ -1099,7 +1098,7 @@ void disallowOrcOptions() {
}
}
-std::unique_ptr<orc::shared::FDRawByteChannel> launchRemote() {
+Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote() {
#ifndef LLVM_ON_UNIX
llvm_unreachable("launchRemote not supported on non-Unix platforms");
#else
@@ -1148,8 +1147,9 @@ std::unique_ptr<orc::shared::FDRawByteChannel> launchRemote() {
close(PipeFD[0][0]);
close(PipeFD[1][1]);
- // Return an RPC channel connected to our end of the pipes.
- return std::make_unique<orc::shared::FDRawByteChannel>(PipeFD[1][0],
- PipeFD[0][1]);
+ // Return a SimpleRemoteEPC instance connected to our end of the pipes.
+ return orc::SimpleRemoteEPC::Create<orc::FDSimpleRemoteEPCTransport>(
+ std::make_unique<llvm::orc::InPlaceTaskDispatcher>(),
+ llvm::orc::SimpleRemoteEPC::Setup(), PipeFD[1][0], PipeFD[0][1]);
#endif
}
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 0e1dce6bc2e8..175ec8d022c2 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -96,11 +96,11 @@ OPTIONS:
OPERATIONS:
d - delete [files] from the archive
m - move [files] in the archive
- p - print [files] found in the archive
+ p - print contents of [files] found in the archive
q - quick append [files] to the archive
r - replace or insert [files] into the archive
s - act as ranlib
- t - display contents of archive
+ t - display list of files in archive
x - extract [files] from the archive
MODIFIERS:
@@ -136,14 +136,14 @@ static unsigned MRILineNumber;
static bool ParsingMRIScript;
// Show the error plus the usage message, and exit.
-LLVM_ATTRIBUTE_NORETURN static void badUsage(Twine Error) {
+[[noreturn]] static void badUsage(Twine Error) {
WithColor::error(errs(), ToolName) << Error << "\n";
printHelpMessage();
exit(1);
}
// Show the error message and exit.
-LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
+[[noreturn]] static void fail(Twine Error) {
if (ParsingMRIScript) {
WithColor::error(errs(), ToolName)
<< "script line " << MRILineNumber << ": " << Error << "\n";
diff --git a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index f4851bfb2a9c..a238b0cf5922 100644
--- a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -11,8 +11,9 @@
// llvm-bcanalyzer [options] x.bc - Read LLVM bitcode from the x.bc file
//
// Options:
-// --help - Output information about command line switches
-// --dump - Dump low-level bitcode structure in readable format
+// --help - Output information about command line switches
+// --dump - Dump low-level bitcode structure in readable format
+// --dump-blockinfo - Dump the BLOCKINFO_BLOCK, when used with --dump
//
// This tool provides analytical information about a bitcode file. It is
// intended as an aid to developers of bitcode reading and writing software. It
@@ -47,6 +48,11 @@ static cl::opt<std::string> InputFilename(cl::Positional,
static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"),
cl::cat(BCAnalyzerCategory));
+static cl::opt<bool> DumpBlockinfo("dump-blockinfo",
+ cl::desc("Include BLOCKINFO details in low"
+ " level dump"),
+ cl::cat(BCAnalyzerCategory));
+
//===----------------------------------------------------------------------===//
// Bitcode specific analysis.
//===----------------------------------------------------------------------===//
@@ -114,6 +120,7 @@ int main(int argc, char **argv) {
O.Histogram = !NoHistogram;
O.Symbolic = !NonSymbolic;
O.ShowBinaryBlobs = ShowBinaryBlobs;
+ O.DumpBlockinfo = DumpBlockinfo;
ExitOnErr(BA.analyze(
Dump ? Optional<BCDumpOptions>(O) : Optional<BCDumpOptions>(None),
diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index 02c0106cbc29..5c9ff41a2d5d 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -784,10 +784,18 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
// If path-equivalence was given and is a comma seperated pair then set
// PathRemapping.
- auto EquivPair = StringRef(PathRemap).split(',');
- if (!(EquivPair.first.empty() && EquivPair.second.empty()))
+ if (!PathRemap.empty()) {
+ auto EquivPair = StringRef(PathRemap).split(',');
+ if (EquivPair.first.empty() || EquivPair.second.empty()) {
+ error("invalid argument '" + PathRemap +
+ "', must be in format 'from,to'",
+ "-path-equivalence");
+ return 1;
+ }
+
PathRemapping = {std::string(EquivPair.first),
std::string(EquivPair.second)};
+ }
// If a demangler is supplied, check if it exists and register it.
if (!DemanglerOpts.empty()) {
diff --git a/llvm/tools/llvm-cov/CoverageExporterLcov.cpp b/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
index 6cf5d9285b90..0096a3d44d85 100644
--- a/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
+++ b/llvm/tools/llvm-cov/CoverageExporterLcov.cpp
@@ -167,7 +167,7 @@ void renderLineSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
void renderBranchSummary(raw_ostream &OS, const FileCoverageSummary &Summary) {
OS << "BRF:" << Summary.BranchCoverage.getNumBranches() << '\n'
- << "BFH:" << Summary.BranchCoverage.getCovered() << '\n';
+ << "BRH:" << Summary.BranchCoverage.getCovered() << '\n';
}
void renderFile(raw_ostream &OS, const coverage::CoverageMapping &Coverage,
diff --git a/llvm/tools/llvm-cov/CoverageFilters.cpp b/llvm/tools/llvm-cov/CoverageFilters.cpp
index da3b5214eec4..fac7518d7da2 100644
--- a/llvm/tools/llvm-cov/CoverageFilters.cpp
+++ b/llvm/tools/llvm-cov/CoverageFilters.cpp
@@ -21,7 +21,7 @@ bool NameCoverageFilter::matches(
const coverage::CoverageMapping &,
const coverage::FunctionRecord &Function) const {
StringRef FuncName = Function.Name;
- return FuncName.find(Name) != StringRef::npos;
+ return FuncName.contains(Name);
}
bool NameRegexCoverageFilter::matches(
diff --git a/llvm/tools/llvm-cxxdump/Error.cpp b/llvm/tools/llvm-cxxdump/Error.cpp
index 25317820409c..053d0e0764bc 100644
--- a/llvm/tools/llvm-cxxdump/Error.cpp
+++ b/llvm/tools/llvm-cxxdump/Error.cpp
@@ -12,6 +12,7 @@
#include "Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include <string>
using namespace llvm;
diff --git a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
index f214288e951b..1430674dbadc 100644
--- a/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -13,6 +13,7 @@
#include "llvm-cxxdump.h"
#include "Error.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolSize.h"
@@ -20,7 +21,6 @@
#include "llvm/Support/Endian.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -49,7 +49,7 @@ static void error(std::error_code EC) {
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN static void error(Error Err) {
+[[noreturn]] static void error(Error Err) {
logAllUnhandledErrors(std::move(Err), WithColor::error(outs()),
"reading file: ");
outs().flush();
diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index d8bf8dbccce0..ccfaaa96deb2 100644
--- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -65,34 +65,27 @@ static void error(const Twine &Message) {
}
static std::string demangle(const std::string &Mangled) {
- int Status;
- std::string Prefix;
-
const char *DecoratedStr = Mangled.c_str();
if (StripUnderscore)
if (DecoratedStr[0] == '_')
++DecoratedStr;
- size_t DecoratedLength = strlen(DecoratedStr);
+ std::string Result;
+ if (nonMicrosoftDemangle(DecoratedStr, Result))
+ return Result;
+
+ std::string Prefix;
char *Undecorated = nullptr;
- if (Types ||
- ((DecoratedLength >= 2 && strncmp(DecoratedStr, "_Z", 2) == 0) ||
- (DecoratedLength >= 4 && strncmp(DecoratedStr, "___Z", 4) == 0)))
- Undecorated = itaniumDemangle(DecoratedStr, nullptr, nullptr, &Status);
+ if (Types)
+ Undecorated = itaniumDemangle(DecoratedStr, nullptr, nullptr, nullptr);
- if (!Undecorated &&
- (DecoratedLength > 6 && strncmp(DecoratedStr, "__imp_", 6) == 0)) {
+ if (!Undecorated && strncmp(DecoratedStr, "__imp_", 6) == 0) {
Prefix = "import thunk for ";
- Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status);
- }
-
- if (!Undecorated &&
- (DecoratedLength >= 2 && strncmp(DecoratedStr, "_R", 2) == 0)) {
- Undecorated = rustDemangle(DecoratedStr, nullptr, nullptr, &Status);
+ Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, nullptr);
}
- std::string Result(Undecorated ? Prefix + Undecorated : Mangled);
+ Result = Undecorated ? Prefix + Undecorated : Mangled;
free(Undecorated);
return Result;
}
@@ -128,7 +121,7 @@ static void SplitStringDelims(
static bool IsLegalItaniumChar(char C) {
// Itanium CXX ABI [External Names]p5.1.1:
// '$' and '.' in mangled names are reserved for private implementations.
- return isalnum(C) || C == '.' || C == '$' || C == '_';
+ return isAlnum(C) || C == '.' || C == '$' || C == '_';
}
// If 'Split' is true, then 'Mangled' is broken into individual words and each
diff --git a/llvm/tools/llvm-diff/DiffConsumer.cpp b/llvm/tools/llvm-diff/lib/DiffConsumer.cpp
index a703f42f14c3..b6eb71916acf 100644
--- a/llvm/tools/llvm-diff/DiffConsumer.cpp
+++ b/llvm/tools/llvm-diff/lib/DiffConsumer.cpp
@@ -134,6 +134,12 @@ void DiffConsumer::indent() {
while (N--) out << ' ';
}
+void DiffConsumer::reset() {
+ contexts.clear();
+ Differences = false;
+ Indent = 0;
+}
+
bool DiffConsumer::hadDifferences() const {
return Differences;
}
diff --git a/llvm/tools/llvm-diff/DiffConsumer.h b/llvm/tools/llvm-diff/lib/DiffConsumer.h
index f7b2f2450eec..08c3afcbe111 100644
--- a/llvm/tools/llvm-diff/DiffConsumer.h
+++ b/llvm/tools/llvm-diff/lib/DiffConsumer.h
@@ -78,6 +78,7 @@ class StringRef;
DiffConsumer()
: out(errs()), Differences(false), Indent(0) {}
+ void reset();
bool hadDifferences() const;
void enterContext(const Value *L, const Value *R) override;
void exitContext() override;
diff --git a/llvm/tools/llvm-diff/DiffLog.cpp b/llvm/tools/llvm-diff/lib/DiffLog.cpp
index d31a345d255c..d31a345d255c 100644
--- a/llvm/tools/llvm-diff/DiffLog.cpp
+++ b/llvm/tools/llvm-diff/lib/DiffLog.cpp
diff --git a/llvm/tools/llvm-diff/DiffLog.h b/llvm/tools/llvm-diff/lib/DiffLog.h
index d8b07b971198..d8b07b971198 100644
--- a/llvm/tools/llvm-diff/DiffLog.h
+++ b/llvm/tools/llvm-diff/lib/DiffLog.h
diff --git a/llvm/tools/llvm-diff/DifferenceEngine.cpp b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
index eb746cd2a865..eb746cd2a865 100644
--- a/llvm/tools/llvm-diff/DifferenceEngine.cpp
+++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.cpp
diff --git a/llvm/tools/llvm-diff/DifferenceEngine.h b/llvm/tools/llvm-diff/lib/DifferenceEngine.h
index 436a35566360..436a35566360 100644
--- a/llvm/tools/llvm-diff/DifferenceEngine.h
+++ b/llvm/tools/llvm-diff/lib/DifferenceEngine.h
diff --git a/llvm/tools/llvm-diff/llvm-diff.cpp b/llvm/tools/llvm-diff/llvm-diff.cpp
index 8a11179e741e..d9d19f35ffee 100644
--- a/llvm/tools/llvm-diff/llvm-diff.cpp
+++ b/llvm/tools/llvm-diff/llvm-diff.cpp
@@ -10,8 +10,8 @@
//
//===----------------------------------------------------------------------===//
-#include "DiffLog.h"
-#include "DifferenceEngine.h"
+#include "lib/DiffLog.h"
+#include "lib/DifferenceEngine.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
diff --git a/llvm/tools/llvm-dwarfdump/Statistics.cpp b/llvm/tools/llvm-dwarfdump/Statistics.cpp
index 19a971afa311..b237e014038d 100644
--- a/llvm/tools/llvm-dwarfdump/Statistics.cpp
+++ b/llvm/tools/llvm-dwarfdump/Statistics.cpp
@@ -29,6 +29,9 @@ constexpr int NumOfCoverageCategories = 12;
/// This is used for zero location coverage bucket.
constexpr unsigned ZeroCoverageBucket = 0;
+/// The UINT64_MAX is used as an indication of the overflow.
+constexpr uint64_t OverflowValue = std::numeric_limits<uint64_t>::max();
+
/// This represents variables DIE offsets.
using AbstractOriginVarsTy = llvm::SmallVector<uint64_t>;
/// This maps function DIE offset to its variables.
@@ -36,22 +39,43 @@ using AbstractOriginVarsTyMap = llvm::DenseMap<uint64_t, AbstractOriginVarsTy>;
/// This represents function DIE offsets containing an abstract_origin.
using FunctionsWithAbstractOriginTy = llvm::SmallVector<uint64_t>;
+/// This represents a data type for the stats and it helps us to
+/// detect an overflow.
+/// NOTE: This can be implemented as a template if there is an another type
+/// needing this.
+struct SaturatingUINT64 {
+ /// Number that represents the stats.
+ uint64_t Value;
+
+ SaturatingUINT64(uint64_t Value_) : Value(Value_) {}
+
+ void operator++(int) { return *this += 1; }
+ void operator+=(uint64_t Value_) {
+ if (Value != OverflowValue) {
+ if (Value < OverflowValue - Value_)
+ Value += Value_;
+ else
+ Value = OverflowValue;
+ }
+ }
+};
+
/// Holds statistics for one function (or other entity that has a PC range and
/// contains variables, such as a compile unit).
struct PerFunctionStats {
/// Number of inlined instances of this function.
- unsigned NumFnInlined = 0;
+ uint64_t NumFnInlined = 0;
/// Number of out-of-line instances of this function.
- unsigned NumFnOutOfLine = 0;
+ uint64_t NumFnOutOfLine = 0;
/// Number of inlined instances that have abstract origins.
- unsigned NumAbstractOrigins = 0;
+ uint64_t NumAbstractOrigins = 0;
/// Number of variables and parameters with location across all inlined
/// instances.
- unsigned TotalVarWithLoc = 0;
+ uint64_t TotalVarWithLoc = 0;
/// Number of constants with location across all inlined instances.
- unsigned ConstantMembers = 0;
+ uint64_t ConstantMembers = 0;
/// Number of arificial variables, parameters or members across all instances.
- unsigned NumArtificial = 0;
+ uint64_t NumArtificial = 0;
/// List of all Variables and parameters in this function.
StringSet<> VarsInFunction;
/// Compile units also cover a PC range, but have this flag set to false.
@@ -59,63 +83,63 @@ struct PerFunctionStats {
/// Function has source location information.
bool HasSourceLocation = false;
/// Number of function parameters.
- unsigned NumParams = 0;
+ uint64_t NumParams = 0;
/// Number of function parameters with source location.
- unsigned NumParamSourceLocations = 0;
+ uint64_t NumParamSourceLocations = 0;
/// Number of function parameters with type.
- unsigned NumParamTypes = 0;
+ uint64_t NumParamTypes = 0;
/// Number of function parameters with a DW_AT_location.
- unsigned NumParamLocations = 0;
+ uint64_t NumParamLocations = 0;
/// Number of local variables.
- unsigned NumLocalVars = 0;
+ uint64_t NumLocalVars = 0;
/// Number of local variables with source location.
- unsigned NumLocalVarSourceLocations = 0;
+ uint64_t NumLocalVarSourceLocations = 0;
/// Number of local variables with type.
- unsigned NumLocalVarTypes = 0;
+ uint64_t NumLocalVarTypes = 0;
/// Number of local variables with DW_AT_location.
- unsigned NumLocalVarLocations = 0;
+ uint64_t NumLocalVarLocations = 0;
};
/// Holds accumulated global statistics about DIEs.
struct GlobalStats {
/// Total number of PC range bytes covered by DW_AT_locations.
- unsigned TotalBytesCovered = 0;
+ SaturatingUINT64 TotalBytesCovered = 0;
/// Total number of parent DIE PC range bytes covered by DW_AT_Locations.
- unsigned ScopeBytesCovered = 0;
+ SaturatingUINT64 ScopeBytesCovered = 0;
/// Total number of PC range bytes in each variable's enclosing scope.
- unsigned ScopeBytes = 0;
+ SaturatingUINT64 ScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value).
- unsigned ScopeEntryValueBytesCovered = 0;
+ SaturatingUINT64 ScopeEntryValueBytesCovered = 0;
/// Total number of PC range bytes covered by DW_AT_locations of
/// formal parameters.
- unsigned ParamScopeBytesCovered = 0;
+ SaturatingUINT64 ParamScopeBytesCovered = 0;
/// Total number of PC range bytes in each parameter's enclosing scope.
- unsigned ParamScopeBytes = 0;
+ SaturatingUINT64 ParamScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value) (only for parameters).
- unsigned ParamScopeEntryValueBytesCovered = 0;
+ SaturatingUINT64 ParamScopeEntryValueBytesCovered = 0;
/// Total number of PC range bytes covered by DW_AT_locations (only for local
/// variables).
- unsigned LocalVarScopeBytesCovered = 0;
+ SaturatingUINT64 LocalVarScopeBytesCovered = 0;
/// Total number of PC range bytes in each local variable's enclosing scope.
- unsigned LocalVarScopeBytes = 0;
+ SaturatingUINT64 LocalVarScopeBytes = 0;
/// Total number of PC range bytes covered by DW_AT_locations with
/// the debug entry values (DW_OP_entry_value) (only for local variables).
- unsigned LocalVarScopeEntryValueBytesCovered = 0;
+ SaturatingUINT64 LocalVarScopeEntryValueBytesCovered = 0;
/// Total number of call site entries (DW_AT_call_file & DW_AT_call_line).
- unsigned CallSiteEntries = 0;
+ SaturatingUINT64 CallSiteEntries = 0;
/// Total number of call site DIEs (DW_TAG_call_site).
- unsigned CallSiteDIEs = 0;
+ SaturatingUINT64 CallSiteDIEs = 0;
/// Total number of call site parameter DIEs (DW_TAG_call_site_parameter).
- unsigned CallSiteParamDIEs = 0;
+ SaturatingUINT64 CallSiteParamDIEs = 0;
/// Total byte size of concrete functions. This byte size includes
/// inline functions contained in the concrete functions.
- unsigned FunctionSize = 0;
+ SaturatingUINT64 FunctionSize = 0;
/// Total byte size of inlined functions. This is the total number of bytes
/// for the top inline functions within concrete functions. This can help
/// tune the inline settings when compiling to match user expectations.
- unsigned InlineFunctionSize = 0;
+ SaturatingUINT64 InlineFunctionSize = 0;
};
/// Holds accumulated debug location statistics about local variables and
@@ -126,37 +150,37 @@ struct LocationStats {
/// of variables with the no debug location at all, but the last element
/// in the vector represents the number of fully covered variables within
/// its scope.
- std::vector<unsigned> VarParamLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> VarParamLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Map non debug entry values coverage.
- std::vector<unsigned> VarParamNonEntryValLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> VarParamNonEntryValLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// The debug location statistics for formal parameters.
- std::vector<unsigned> ParamLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> ParamLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Map non debug entry values coverage for formal parameters.
- std::vector<unsigned> ParamNonEntryValLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> ParamNonEntryValLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// The debug location statistics for local variables.
- std::vector<unsigned> LocalVarLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> LocalVarLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Map non debug entry values coverage for local variables.
- std::vector<unsigned> LocalVarNonEntryValLocStats{
- std::vector<unsigned>(NumOfCoverageCategories, 0)};
+ std::vector<SaturatingUINT64> LocalVarNonEntryValLocStats{
+ std::vector<SaturatingUINT64>(NumOfCoverageCategories, 0)};
/// Total number of local variables and function parameters processed.
- unsigned NumVarParam = 0;
+ SaturatingUINT64 NumVarParam = 0;
/// Total number of formal parameters processed.
- unsigned NumParam = 0;
+ SaturatingUINT64 NumParam = 0;
/// Total number of local variables processed.
- unsigned NumVar = 0;
+ SaturatingUINT64 NumVar = 0;
};
} // namespace
/// Collect debug location statistics for one DIE.
static void collectLocStats(uint64_t ScopeBytesCovered, uint64_t BytesInScope,
- std::vector<unsigned> &VarParamLocStats,
- std::vector<unsigned> &ParamLocStats,
- std::vector<unsigned> &LocalVarLocStats,
+ std::vector<SaturatingUINT64> &VarParamLocStats,
+ std::vector<SaturatingUINT64> &ParamLocStats,
+ std::vector<SaturatingUINT64> &LocalVarLocStats,
bool IsParam, bool IsLocalVar) {
auto getCoverageBucket = [ScopeBytesCovered, BytesInScope]() -> unsigned {
// No debug location at all for the variable.
@@ -173,11 +197,11 @@ static void collectLocStats(uint64_t ScopeBytesCovered, uint64_t BytesInScope,
unsigned CoverageBucket = getCoverageBucket();
- VarParamLocStats[CoverageBucket]++;
+ VarParamLocStats[CoverageBucket].Value++;
if (IsParam)
- ParamLocStats[CoverageBucket]++;
+ ParamLocStats[CoverageBucket].Value++;
else if (IsLocalVar)
- LocalVarLocStats[CoverageBucket]++;
+ LocalVarLocStats[CoverageBucket].Value++;
}
/// Construct an identifier for a given DIE from its Prefix, Name, DeclFileName
@@ -298,7 +322,7 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
U->getFormParams().Format);
// Consider the expression containing the DW_OP_entry_value as
// an entry value.
- return llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
+ return llvm::any_of(Expression, [](const DWARFExpression::Operation &Op) {
return Op.getCode() == dwarf::DW_OP_entry_value ||
Op.getCode() == dwarf::DW_OP_GNU_entry_value;
});
@@ -350,11 +374,11 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
// Calculate the debug location statistics.
if (BytesInScope && !DeferLocStats) {
- LocStats.NumVarParam++;
+ LocStats.NumVarParam.Value++;
if (IsParam)
- LocStats.NumParam++;
+ LocStats.NumParam.Value++;
else if (IsLocalVar)
- LocStats.NumVar++;
+ LocStats.NumVar.Value++;
collectLocStats(ScopeBytesCovered, BytesInScope, LocStats.VarParamLocStats,
LocStats.ParamLocStats, LocStats.LocalVarLocStats, IsParam,
@@ -389,7 +413,7 @@ static void collectStatsForDie(DWARFDie Die, const std::string &FnPrefix,
GlobalStats.LocalVarScopeEntryValueBytesCovered +=
BytesEntryValuesCovered;
}
- assert(GlobalStats.ScopeBytesCovered <= GlobalStats.ScopeBytes);
+ assert(GlobalStats.ScopeBytesCovered.Value <= GlobalStats.ScopeBytes.Value);
}
if (IsConstantMember) {
@@ -603,45 +627,78 @@ static void collectStatsRecursive(
/// Print human-readable output.
/// \{
static void printDatum(json::OStream &J, const char *Key, json::Value Value) {
- J.attribute(Key, Value);
+ if (Value == OverflowValue)
+ J.attribute(Key, "overflowed");
+ else
+ J.attribute(Key, Value);
+
LLVM_DEBUG(llvm::dbgs() << Key << ": " << Value << '\n');
}
static void printLocationStats(json::OStream &J, const char *Key,
- std::vector<unsigned> &LocationStats) {
- J.attribute(
- (Twine(Key) + " with 0% of parent scope covered by DW_AT_location").str(),
- LocationStats[0]);
+ std::vector<SaturatingUINT64> &LocationStats) {
+ if (LocationStats[0].Value == OverflowValue)
+ J.attribute((Twine(Key) +
+ " with (0%,10%) of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute(
+ (Twine(Key) + " with 0% of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[0].Value);
LLVM_DEBUG(
llvm::dbgs() << Key
<< " with 0% of parent scope covered by DW_AT_location: \\"
- << LocationStats[0] << '\n');
- J.attribute(
- (Twine(Key) + " with (0%,10%) of parent scope covered by DW_AT_location")
- .str(),
- LocationStats[1]);
+ << LocationStats[0].Value << '\n');
+
+ if (LocationStats[1].Value == OverflowValue)
+ J.attribute((Twine(Key) +
+ " with (0%,10%) of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute((Twine(Key) +
+ " with (0%,10%) of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[1].Value);
LLVM_DEBUG(llvm::dbgs()
<< Key
<< " with (0%,10%) of parent scope covered by DW_AT_location: "
- << LocationStats[1] << '\n');
+ << LocationStats[1].Value << '\n');
+
for (unsigned i = 2; i < NumOfCoverageCategories - 1; ++i) {
- J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
- Twine(i * 10) + "%) of parent scope covered by DW_AT_location")
- .str(),
- LocationStats[i]);
+ if (LocationStats[i].Value == OverflowValue)
+ J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
+ Twine(i * 10) +
+ "%) of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute((Twine(Key) + " with [" + Twine((i - 1) * 10) + "%," +
+ Twine(i * 10) +
+ "%) of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[i].Value);
LLVM_DEBUG(llvm::dbgs()
<< Key << " with [" << (i - 1) * 10 << "%," << i * 10
<< "%) of parent scope covered by DW_AT_location: "
- << LocationStats[i]);
+ << LocationStats[i].Value);
}
- J.attribute(
- (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
- .str(),
- LocationStats[NumOfCoverageCategories - 1]);
+ if (LocationStats[NumOfCoverageCategories - 1].Value == OverflowValue)
+ J.attribute(
+ (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
+ .str(),
+ "overflowed");
+ else
+ J.attribute(
+ (Twine(Key) + " with 100% of parent scope covered by DW_AT_location")
+ .str(),
+ LocationStats[NumOfCoverageCategories - 1].Value);
LLVM_DEBUG(
llvm::dbgs() << Key
<< " with 100% of parent scope covered by DW_AT_location: "
- << LocationStats[NumOfCoverageCategories - 1]);
+ << LocationStats[NumOfCoverageCategories - 1].Value);
}
static void printSectionSizes(json::OStream &J, const SectionSizes &Sizes) {
@@ -750,31 +807,31 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
/// The version number should be increased every time the algorithm is changed
/// (including bug fixes). New metrics may be added without increasing the
/// version.
- unsigned Version = 8;
- unsigned VarParamTotal = 0;
- unsigned VarParamUnique = 0;
- unsigned VarParamWithLoc = 0;
- unsigned NumFunctions = 0;
- unsigned NumInlinedFunctions = 0;
- unsigned NumFuncsWithSrcLoc = 0;
- unsigned NumAbstractOrigins = 0;
- unsigned ParamTotal = 0;
- unsigned ParamWithType = 0;
- unsigned ParamWithLoc = 0;
- unsigned ParamWithSrcLoc = 0;
- unsigned LocalVarTotal = 0;
- unsigned LocalVarWithType = 0;
- unsigned LocalVarWithSrcLoc = 0;
- unsigned LocalVarWithLoc = 0;
+ unsigned Version = 9;
+ SaturatingUINT64 VarParamTotal = 0;
+ SaturatingUINT64 VarParamUnique = 0;
+ SaturatingUINT64 VarParamWithLoc = 0;
+ SaturatingUINT64 NumFunctions = 0;
+ SaturatingUINT64 NumInlinedFunctions = 0;
+ SaturatingUINT64 NumFuncsWithSrcLoc = 0;
+ SaturatingUINT64 NumAbstractOrigins = 0;
+ SaturatingUINT64 ParamTotal = 0;
+ SaturatingUINT64 ParamWithType = 0;
+ SaturatingUINT64 ParamWithLoc = 0;
+ SaturatingUINT64 ParamWithSrcLoc = 0;
+ SaturatingUINT64 LocalVarTotal = 0;
+ SaturatingUINT64 LocalVarWithType = 0;
+ SaturatingUINT64 LocalVarWithSrcLoc = 0;
+ SaturatingUINT64 LocalVarWithLoc = 0;
for (auto &Entry : Statistics) {
PerFunctionStats &Stats = Entry.getValue();
- unsigned TotalVars = Stats.VarsInFunction.size() *
+ uint64_t TotalVars = Stats.VarsInFunction.size() *
(Stats.NumFnInlined + Stats.NumFnOutOfLine);
// Count variables in global scope.
if (!Stats.IsFunction)
TotalVars =
Stats.NumLocalVars + Stats.ConstantMembers + Stats.NumArtificial;
- unsigned Constants = Stats.ConstantMembers;
+ uint64_t Constants = Stats.ConstantMembers;
VarParamWithLoc += Stats.TotalVarWithLoc + Constants;
VarParamTotal += TotalVars;
VarParamUnique += Stats.VarsInFunction.size();
@@ -806,70 +863,72 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
printDatum(J, "file", Filename.str());
printDatum(J, "format", FormatName);
- printDatum(J, "#functions", NumFunctions);
- printDatum(J, "#functions with location", NumFuncsWithSrcLoc);
- printDatum(J, "#inlined functions", NumInlinedFunctions);
- printDatum(J, "#inlined functions with abstract origins", NumAbstractOrigins);
+ printDatum(J, "#functions", NumFunctions.Value);
+ printDatum(J, "#functions with location", NumFuncsWithSrcLoc.Value);
+ printDatum(J, "#inlined functions", NumInlinedFunctions.Value);
+ printDatum(J, "#inlined functions with abstract origins",
+ NumAbstractOrigins.Value);
// This includes local variables and formal parameters.
- printDatum(J, "#unique source variables", VarParamUnique);
- printDatum(J, "#source variables", VarParamTotal);
- printDatum(J, "#source variables with location", VarParamWithLoc);
+ printDatum(J, "#unique source variables", VarParamUnique.Value);
+ printDatum(J, "#source variables", VarParamTotal.Value);
+ printDatum(J, "#source variables with location", VarParamWithLoc.Value);
- printDatum(J, "#call site entries", GlobalStats.CallSiteEntries);
- printDatum(J, "#call site DIEs", GlobalStats.CallSiteDIEs);
- printDatum(J, "#call site parameter DIEs", GlobalStats.CallSiteParamDIEs);
+ printDatum(J, "#call site entries", GlobalStats.CallSiteEntries.Value);
+ printDatum(J, "#call site DIEs", GlobalStats.CallSiteDIEs.Value);
+ printDatum(J, "#call site parameter DIEs",
+ GlobalStats.CallSiteParamDIEs.Value);
printDatum(J, "sum_all_variables(#bytes in parent scope)",
- GlobalStats.ScopeBytes);
+ GlobalStats.ScopeBytes.Value);
printDatum(J,
"sum_all_variables(#bytes in any scope covered by DW_AT_location)",
- GlobalStats.TotalBytesCovered);
+ GlobalStats.TotalBytesCovered.Value);
printDatum(J,
"sum_all_variables(#bytes in parent scope covered by "
"DW_AT_location)",
- GlobalStats.ScopeBytesCovered);
+ GlobalStats.ScopeBytesCovered.Value);
printDatum(J,
"sum_all_variables(#bytes in parent scope covered by "
"DW_OP_entry_value)",
- GlobalStats.ScopeEntryValueBytesCovered);
+ GlobalStats.ScopeEntryValueBytesCovered.Value);
printDatum(J, "sum_all_params(#bytes in parent scope)",
- GlobalStats.ParamScopeBytes);
+ GlobalStats.ParamScopeBytes.Value);
printDatum(J,
"sum_all_params(#bytes in parent scope covered by DW_AT_location)",
- GlobalStats.ParamScopeBytesCovered);
+ GlobalStats.ParamScopeBytesCovered.Value);
printDatum(J,
"sum_all_params(#bytes in parent scope covered by "
"DW_OP_entry_value)",
- GlobalStats.ParamScopeEntryValueBytesCovered);
+ GlobalStats.ParamScopeEntryValueBytesCovered.Value);
printDatum(J, "sum_all_local_vars(#bytes in parent scope)",
- GlobalStats.LocalVarScopeBytes);
+ GlobalStats.LocalVarScopeBytes.Value);
printDatum(J,
"sum_all_local_vars(#bytes in parent scope covered by "
"DW_AT_location)",
- GlobalStats.LocalVarScopeBytesCovered);
+ GlobalStats.LocalVarScopeBytesCovered.Value);
printDatum(J,
"sum_all_local_vars(#bytes in parent scope covered by "
"DW_OP_entry_value)",
- GlobalStats.LocalVarScopeEntryValueBytesCovered);
+ GlobalStats.LocalVarScopeEntryValueBytesCovered.Value);
- printDatum(J, "#bytes within functions", GlobalStats.FunctionSize);
+ printDatum(J, "#bytes within functions", GlobalStats.FunctionSize.Value);
printDatum(J, "#bytes within inlined functions",
- GlobalStats.InlineFunctionSize);
+ GlobalStats.InlineFunctionSize.Value);
// Print the summary for formal parameters.
- printDatum(J, "#params", ParamTotal);
- printDatum(J, "#params with source location", ParamWithSrcLoc);
- printDatum(J, "#params with type", ParamWithType);
- printDatum(J, "#params with binary location", ParamWithLoc);
+ printDatum(J, "#params", ParamTotal.Value);
+ printDatum(J, "#params with source location", ParamWithSrcLoc.Value);
+ printDatum(J, "#params with type", ParamWithType.Value);
+ printDatum(J, "#params with binary location", ParamWithLoc.Value);
// Print the summary for local variables.
- printDatum(J, "#local vars", LocalVarTotal);
- printDatum(J, "#local vars with source location", LocalVarWithSrcLoc);
- printDatum(J, "#local vars with type", LocalVarWithType);
- printDatum(J, "#local vars with binary location", LocalVarWithLoc);
+ printDatum(J, "#local vars", LocalVarTotal.Value);
+ printDatum(J, "#local vars with source location", LocalVarWithSrcLoc.Value);
+ printDatum(J, "#local vars with type", LocalVarWithType.Value);
+ printDatum(J, "#local vars with binary location", LocalVarWithLoc.Value);
// Print the debug section sizes.
printSectionSizes(J, Sizes);
@@ -877,32 +936,34 @@ bool dwarfdump::collectStatsForObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
// Print the location statistics for variables (includes local variables
// and formal parameters).
printDatum(J, "#variables processed by location statistics",
- LocStats.NumVarParam);
+ LocStats.NumVarParam.Value);
printLocationStats(J, "#variables", LocStats.VarParamLocStats);
printLocationStats(J, "#variables - entry values",
LocStats.VarParamNonEntryValLocStats);
// Print the location statistics for formal parameters.
- printDatum(J, "#params processed by location statistics", LocStats.NumParam);
+ printDatum(J, "#params processed by location statistics",
+ LocStats.NumParam.Value);
printLocationStats(J, "#params", LocStats.ParamLocStats);
printLocationStats(J, "#params - entry values",
LocStats.ParamNonEntryValLocStats);
// Print the location statistics for local variables.
printDatum(J, "#local vars processed by location statistics",
- LocStats.NumVar);
+ LocStats.NumVar.Value);
printLocationStats(J, "#local vars", LocStats.LocalVarLocStats);
printLocationStats(J, "#local vars - entry values",
LocStats.LocalVarNonEntryValLocStats);
J.objectEnd();
OS << '\n';
- LLVM_DEBUG(
- llvm::dbgs() << "Total Availability: "
- << (int)std::round((VarParamWithLoc * 100.0) / VarParamTotal)
- << "%\n";
- llvm::dbgs() << "PC Ranges covered: "
- << (int)std::round((GlobalStats.ScopeBytesCovered * 100.0) /
- GlobalStats.ScopeBytes)
- << "%\n");
+ LLVM_DEBUG(llvm::dbgs() << "Total Availability: "
+ << (int)std::round((VarParamWithLoc.Value * 100.0) /
+ VarParamTotal.Value)
+ << "%\n";
+ llvm::dbgs() << "PC Ranges covered: "
+ << (int)std::round(
+ (GlobalStats.ScopeBytesCovered.Value * 100.0) /
+ GlobalStats.ScopeBytes.Value)
+ << "%\n");
return true;
}
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index a324ff710af5..9eeaddf14928 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -169,7 +169,7 @@ static list<std::string>
static alias FindAlias("f", desc("Alias for --find."), aliasopt(Find),
cl::NotHidden);
static opt<bool> IgnoreCase("ignore-case",
- desc("Ignore case distinctions when searching."),
+ desc("Ignore case distinctions when using --name."),
value_desc("i"), cat(DwarfDumpCategory));
static alias IgnoreCaseAlias("i", desc("Alias for --ignore-case."),
aliasopt(IgnoreCase), cl::NotHidden);
@@ -192,11 +192,12 @@ static opt<std::string>
cl::value_desc("filename"), cat(DwarfDumpCategory));
static alias OutputFilenameAlias("out-file", desc("Alias for -o."),
aliasopt(OutputFilename));
-static opt<bool>
- UseRegex("regex",
- desc("Treat any <pattern> strings as regular expressions when "
- "searching instead of just as an exact string match."),
- cat(DwarfDumpCategory));
+static opt<bool> UseRegex(
+ "regex",
+ desc("Treat any <pattern> strings as regular "
+ "expressions when searching with --name. If --ignore-case is also "
+ "specified, the regular expression becomes case-insensitive."),
+ cat(DwarfDumpCategory));
static alias RegexAlias("x", desc("Alias for --regex"), aliasopt(UseRegex),
cl::NotHidden);
static opt<bool>
@@ -536,8 +537,9 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
};
if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get())) {
if (filterArch(*Obj)) {
- std::unique_ptr<DWARFContext> DICtx =
- DWARFContext::create(*Obj, nullptr, "", RecoverableErrorHandler);
+ std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
+ *Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "",
+ RecoverableErrorHandler);
if (!HandleObj(*Obj, *DICtx, Filename, OS))
Result = false;
}
@@ -548,8 +550,9 @@ static bool handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
if (auto MachOOrErr = ObjForArch.getAsObjectFile()) {
auto &Obj = **MachOOrErr;
if (filterArch(Obj)) {
- std::unique_ptr<DWARFContext> DICtx =
- DWARFContext::create(Obj, nullptr, "", RecoverableErrorHandler);
+ std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
+ Obj, DWARFContext::ProcessDebugRelocations::Process, nullptr, "",
+ RecoverableErrorHandler);
if (!HandleObj(Obj, *DICtx, ObjName, OS))
Result = false;
}
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 1f583728c141..4b6f7bc8dd34 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -20,10 +20,10 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index 45bfa84fb826..995ebacacb87 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -227,6 +227,10 @@ static cl::opt<bool> ListDependentLibrariesOnly(
"Instead of running LTO, list the dependent libraries in each IR file"),
cl::cat(LTOCategory));
+static cl::opt<bool> QueryHasCtorDtor(
+ "query-hasCtorDtor", cl::init(false),
+ cl::desc("Queries LTOModule::hasCtorDtor() on each IR file"));
+
static cl::opt<bool>
SetMergedModule("set-merged-module", cl::init(false),
cl::desc("Use the first input module as the merged module"),
@@ -371,7 +375,7 @@ static void printIndexStats() {
ExitOnErr(getModuleSummaryIndexForFile(Filename));
// Skip files without a module summary.
if (!Index)
- report_fatal_error(Filename + " does not contain an index");
+ report_fatal_error(Twine(Filename) + " does not contain an index");
unsigned Calls = 0, Refs = 0, Functions = 0, Alias = 0, Globals = 0;
for (auto &Summaries : *Index) {
@@ -394,22 +398,27 @@ static void printIndexStats() {
}
}
-/// List symbols in each IR file.
+/// Load each IR file and dump certain information based on active flags.
///
/// The main point here is to provide lit-testable coverage for the LTOModule
-/// functionality that's exposed by the C API to list symbols. Moreover, this
-/// provides testing coverage for modules that have been created in their own
-/// contexts.
-static void listSymbols(const TargetOptions &Options) {
+/// functionality that's exposed by the C API. Moreover, this provides testing
+/// coverage for modules that have been created in their own contexts.
+static void testLTOModule(const TargetOptions &Options) {
for (auto &Filename : InputFilenames) {
std::unique_ptr<MemoryBuffer> Buffer;
std::unique_ptr<LTOModule> Module =
getLocalLTOModule(Filename, Buffer, Options);
- // List the symbols.
- outs() << Filename << ":\n";
- for (int I = 0, E = Module->getSymbolCount(); I != E; ++I)
- outs() << Module->getSymbolName(I) << "\n";
+ if (ListSymbolsOnly) {
+ // List the symbols.
+ outs() << Filename << ":\n";
+ for (int I = 0, E = Module->getSymbolCount(); I != E; ++I)
+ outs() << Module->getSymbolName(I) << "\n";
+ }
+ if (QueryHasCtorDtor)
+ outs() << Filename
+ << ": hasCtorDtor = " << (Module->hasCtorDtor() ? "true" : "false")
+ << "\n";
}
}
@@ -478,6 +487,10 @@ static void createCombinedModuleSummaryIndex() {
ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(Filename)));
ExitOnErr(readModuleSummaryIndex(*MB, CombinedIndex, NextModuleId++));
}
+ // In order to use this index for testing, specifically import testing, we
+ // need to update any indirect call edges created from SamplePGO, so that they
+ // point to the correct GUIDs.
+ updateIndirectCalls(CombinedIndex);
std::error_code EC;
assert(!OutputFilename.empty());
raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC,
@@ -939,8 +952,8 @@ int main(int argc, char **argv) {
// set up the TargetOptions for the machine
TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple());
- if (ListSymbolsOnly) {
- listSymbols(Options);
+ if (ListSymbolsOnly || QueryHasCtorDtor) {
+ testLTOModule(Options);
return 0;
}
@@ -1050,7 +1063,7 @@ int main(int argc, char **argv) {
CodeGen.addMustPreserveSymbol(KeptDSOSyms[i]);
// Set cpu and attrs strings for the default target/subtarget.
- CodeGen.setCpu(codegen::getMCPU().c_str());
+ CodeGen.setCpu(codegen::getMCPU());
CodeGen.setOptLevel(OptLevel - '0');
CodeGen.setAttrs(codegen::getMAttrs());
@@ -1084,8 +1097,7 @@ int main(int argc, char **argv) {
error("writing merged module failed.");
}
- auto AddStream =
- [&](size_t Task) -> std::unique_ptr<lto::NativeObjectStream> {
+ auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
std::string PartFilename = OutputFilename;
if (Parallelism != 1)
PartFilename += "." + utostr(Task);
@@ -1095,7 +1107,7 @@ int main(int argc, char **argv) {
std::make_unique<raw_fd_ostream>(PartFilename, EC, sys::fs::OF_None);
if (EC)
error("error opening the file '" + PartFilename + "': " + EC.message());
- return std::make_unique<lto::NativeObjectStream>(std::move(S));
+ return std::make_unique<CachedFileStream>(std::move(S));
};
if (!CodeGen.compileOptimized(AddStream, Parallelism))
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index c0bff1eabee2..6f6f6c1ed90f 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -19,10 +19,10 @@
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/LTO/Caching.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
+#include "llvm/Support/Caching.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
@@ -362,23 +362,23 @@ static int run(int argc, char **argv) {
if (HasErrors)
return 1;
- auto AddStream =
- [&](size_t Task) -> std::unique_ptr<lto::NativeObjectStream> {
+ auto AddStream = [&](size_t Task) -> std::unique_ptr<CachedFileStream> {
std::string Path = OutputFilename + "." + utostr(Task);
std::error_code EC;
auto S = std::make_unique<raw_fd_ostream>(Path, EC, sys::fs::OF_None);
check(EC, Path);
- return std::make_unique<lto::NativeObjectStream>(std::move(S));
+ return std::make_unique<CachedFileStream>(std::move(S));
};
auto AddBuffer = [&](size_t Task, std::unique_ptr<MemoryBuffer> MB) {
*AddStream(Task)->OS << MB->getBuffer();
};
- NativeObjectCache Cache;
+ FileCache Cache;
if (!CacheDir.empty())
- Cache = check(localCache(CacheDir, AddBuffer), "failed to create cache");
+ Cache = check(localCache("ThinLTO", "Thin", CacheDir, AddBuffer),
+ "failed to create cache");
check(Lto.run(AddStream, Cache), "LTO::run failed");
return 0;
diff --git a/llvm/tools/llvm-mc/Disassembler.cpp b/llvm/tools/llvm-mc/Disassembler.cpp
index 16ab99548adf..ac55d05db192 100644
--- a/llvm/tools/llvm-mc/Disassembler.cpp
+++ b/llvm/tools/llvm-mc/Disassembler.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -156,7 +156,7 @@ int Disassembler::disassemble(const Target &T, const std::string &Triple,
}
// Set up initial section manually here
- Streamer.InitSections(false);
+ Streamer.initSections(false, STI);
bool ErrorOccurred = false;
diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
index 24c601b7033f..4e5a12e53a6b 100644
--- a/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -26,6 +26,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/FileUtilities.h"
@@ -34,7 +35,6 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
@@ -571,7 +571,7 @@ int main(int argc, char **argv) {
MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ false));
if (NoExecStack)
- Str->InitSections(true);
+ Str->initSections(true, *STI);
}
// Use Assembler information for parsing.
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
index 6ad2a65592b9..6cdd0ba797aa 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -114,7 +114,7 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions(
// Need to initialize an MCTargetStreamer otherwise
// certain asm directives will cause a segfault.
- // Using nulls() so that anything emitted by the MCTagetStreamer
+ // Using nulls() so that anything emitted by the MCTargetStreamer
// doesn't show up in the llvm-mca output.
raw_ostream &OSRef = nulls();
formatted_raw_ostream FOSRef(OSRef);
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.h b/llvm/tools/llvm-mca/CodeRegionGenerator.h
index 1c11784ca3fb..ac02131b2f39 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.h
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.h
@@ -20,9 +20,9 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include <memory>
namespace llvm {
@@ -37,7 +37,7 @@ protected:
CodeRegionGenerator &operator=(const CodeRegionGenerator &) = delete;
public:
- CodeRegionGenerator(SourceMgr &SM) : Regions(SM) {}
+ CodeRegionGenerator(llvm::SourceMgr &SM) : Regions(SM) {}
virtual ~CodeRegionGenerator();
virtual Expected<const CodeRegions &>
parseCodeRegions(const std::unique_ptr<MCInstPrinter> &IP) = 0;
@@ -54,7 +54,7 @@ class AsmCodeRegionGenerator final : public CodeRegionGenerator {
unsigned AssemblerDialect; // This is set during parsing.
public:
- AsmCodeRegionGenerator(const Target &T, SourceMgr &SM, MCContext &C,
+ AsmCodeRegionGenerator(const Target &T, llvm::SourceMgr &SM, MCContext &C,
const MCAsmInfo &A, const MCSubtargetInfo &S,
const MCInstrInfo &I)
: CodeRegionGenerator(SM), TheTarget(T), Ctx(C), MAI(A), STI(S), MCII(I),
diff --git a/llvm/tools/llvm-mca/PipelinePrinter.cpp b/llvm/tools/llvm-mca/PipelinePrinter.cpp
index 955b825891fa..9d06c6a19395 100644
--- a/llvm/tools/llvm-mca/PipelinePrinter.cpp
+++ b/llvm/tools/llvm-mca/PipelinePrinter.cpp
@@ -14,7 +14,6 @@
#include "PipelinePrinter.h"
#include "CodeRegion.h"
#include "Views/InstructionView.h"
-#include "Views/View.h"
namespace llvm {
namespace mca {
diff --git a/llvm/tools/llvm-mca/PipelinePrinter.h b/llvm/tools/llvm-mca/PipelinePrinter.h
index 1365f75be0f5..fd262f0a8a5d 100644
--- a/llvm/tools/llvm-mca/PipelinePrinter.h
+++ b/llvm/tools/llvm-mca/PipelinePrinter.h
@@ -16,11 +16,11 @@
#ifndef LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
#define LLVM_TOOLS_LLVM_MCA_PIPELINEPRINTER_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/Context.h"
#include "llvm/MCA/Pipeline.h"
+#include "llvm/MCA/View.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "llvm-mca"
diff --git a/llvm/tools/llvm-mca/Views/DispatchStatistics.h b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
index 81b582f74a6b..cfd12691c03f 100644
--- a/llvm/tools/llvm-mca/Views/DispatchStatistics.h
+++ b/llvm/tools/llvm-mca/Views/DispatchStatistics.h
@@ -33,9 +33,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
#define LLVM_TOOLS_LLVM_MCA_DISPATCHVIEW_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
#include <map>
namespace llvm {
diff --git a/llvm/tools/llvm-mca/Views/InstructionView.h b/llvm/tools/llvm-mca/Views/InstructionView.h
index 1843b0513dfc..cec07eef6a80 100644
--- a/llvm/tools/llvm-mca/Views/InstructionView.h
+++ b/llvm/tools/llvm-mca/Views/InstructionView.h
@@ -15,7 +15,7 @@
#ifndef LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
#define LLVM_TOOLS_LLVM_MCA_INSTRUCTIONVIEW_H
-#include "Views/View.h"
+#include "llvm/MCA/View.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
index ec5c5f431e12..3de2a22ac32d 100644
--- a/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
+++ b/llvm/tools/llvm-mca/Views/RegisterFileStatistics.h
@@ -35,9 +35,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_REGISTERFILESTATISTICS_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
namespace llvm {
namespace mca {
diff --git a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
index 86b46e93aa7c..ed3736c64515 100644
--- a/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
+++ b/llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h
@@ -28,8 +28,8 @@
#ifndef LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_RETIRECONTROLUNITSTATISTICS_H
-#include "Views/View.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
#include <map>
namespace llvm {
diff --git a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
index 66f4b0011866..9d2f71c13e5a 100644
--- a/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
+++ b/llvm/tools/llvm-mca/Views/SchedulerStatistics.h
@@ -36,9 +36,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
#define LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
-#include "Views/View.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MCA/View.h"
#include <map>
namespace llvm {
diff --git a/llvm/tools/llvm-mca/Views/SummaryView.h b/llvm/tools/llvm-mca/Views/SummaryView.h
index e2c7cfd19e94..21f3fad23ca0 100644
--- a/llvm/tools/llvm-mca/Views/SummaryView.h
+++ b/llvm/tools/llvm-mca/Views/SummaryView.h
@@ -28,9 +28,9 @@
#ifndef LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
#define LLVM_TOOLS_LLVM_MCA_SUMMARYVIEW_H
-#include "Views/View.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/MC/MCSchedule.h"
+#include "llvm/MCA/View.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -71,12 +71,6 @@ class SummaryView : public View {
// Used to map resource indices to actual processor resource IDs.
llvm::SmallVector<unsigned, 8> ResIdx2ProcResID;
- // Compute the reciprocal throughput for the analyzed code block.
- // The reciprocal block throughput is computed as the MAX between:
- // - NumMicroOps / DispatchWidth
- // - Total Resource Cycles / #Units (for every resource consumed).
- double getBlockRThroughput() const;
-
/// Compute the data we want to print out in the object DV.
void collectData(DisplayValues &DV) const;
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
index 9a949761bb75..5c05edbdea68 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -145,10 +145,11 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
double AverageTime1, AverageTime2, AverageTime3;
AverageTime1 =
- (double)Entry.CyclesSpentInSchedulerQueue / CumulativeExecutions;
- AverageTime2 = (double)Entry.CyclesSpentInSQWhileReady / CumulativeExecutions;
- AverageTime3 =
- (double)Entry.CyclesSpentAfterWBAndBeforeRetire / CumulativeExecutions;
+ (double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;
+ AverageTime2 =
+ (double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;
+ AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /
+ CumulativeExecutions;
OS << Executions;
OS.PadToColumn(13);
@@ -157,18 +158,18 @@ void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,
BufferSize);
- OS << format("%.1f", floor((AverageTime1 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);
OS.PadToColumn(20);
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,
BufferSize);
- OS << format("%.1f", floor((AverageTime2 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);
OS.PadToColumn(27);
if (!PrintingTotals)
tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,
CumulativeExecutions,
getSubTargetInfo().getSchedModel().MicroOpBufferSize);
- OS << format("%.1f", floor((AverageTime3 * 10) + 0.5) / 10);
+ OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);
if (OS.has_colors())
OS.resetColor();
@@ -295,8 +296,10 @@ void TimelineView::printTimeline(raw_ostream &OS) const {
// attribute is set correctly whether or not it is greater
// than timeline-max-cycles so we can use that to ensure
// we don't early exit because of a 0 latency instruction.
- if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0)
+ if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) {
+ FOS << "Truncated display due to cycle limit\n";
return;
+ }
unsigned SourceIndex = IID % Source.size();
printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);
diff --git a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp b/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp
deleted file mode 100644
index a655f3faf1bf..000000000000
--- a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===------------------ AMDGPUCustomBehaviour.cpp ---------------*-C++ -* -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file implements methods from the AMDGPUCustomBehaviour class.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPUCustomBehaviour.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIInstrInfo.h"
-#include "llvm/Support/WithColor.h"
-
-namespace llvm {
-namespace mca {
-
-AMDGPUCustomBehaviour::AMDGPUCustomBehaviour(const MCSubtargetInfo &STI,
- const SourceMgr &SrcMgr,
- const MCInstrInfo &MCII)
- : CustomBehaviour(STI, SrcMgr, MCII) {}
-
-unsigned AMDGPUCustomBehaviour::checkCustomHazard(ArrayRef<InstRef> IssuedInst,
- const InstRef &IR) {
- return 0;
-}
-
-} // namespace mca
-} // namespace llvm
diff --git a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h b/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h
deleted file mode 100644
index 0dd21c7b4c44..000000000000
--- a/llvm/tools/llvm-mca/lib/AMDGPU/AMDGPUCustomBehaviour.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//===------------------- AMDGPUCustomBehaviour.h ----------------*-C++ -* -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-///
-/// This file defines the AMDGPUCustomBehaviour class which inherits from
-/// CustomBehaviour.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLVM_MCA_LIB_AMDGPU_AMDGPUCUSTOMBEHAVIOUR_H
-#define LLVM_TOOLS_LLVM_MCA_LIB_AMDGPU_AMDGPUCUSTOMBEHAVIOUR_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/MCA/CustomBehaviour.h"
-#include "llvm/Support/TargetParser.h"
-
-namespace llvm {
-namespace mca {
-
-class AMDGPUInstrPostProcess : public InstrPostProcess {
-public:
- AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
- : InstrPostProcess(STI, MCII) {}
-
- ~AMDGPUInstrPostProcess() {}
-
- void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
- const MCInst &MCI) override {}
-};
-
-class AMDGPUCustomBehaviour : public CustomBehaviour {
-public:
- AMDGPUCustomBehaviour(const MCSubtargetInfo &STI, const SourceMgr &SrcMgr,
- const MCInstrInfo &MCII);
-
- ~AMDGPUCustomBehaviour() {}
-
- /// This method is used to determine if an instruction
- /// should be allowed to be dispatched. The return value is
- /// how many cycles until the instruction can be dispatched.
- /// This method is called after MCA has already checked for
- /// register and hardware dependencies so this method should only
- /// implement custom behaviour and dependencies that are not picked up
- /// by MCA naturally.
- unsigned checkCustomHazard(ArrayRef<InstRef> IssuedInst,
- const InstRef &IR) override;
-};
-
-} // namespace mca
-} // namespace llvm
-
-#endif /* LLVM_TOOLS_LLVM_MCA_LIB_AMDGPU_AMDGPUCUSTOMBEHAVIOUR_H */
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index a473cd8f1719..0b58ca377ce1 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -32,9 +32,6 @@
#include "Views/SchedulerStatistics.h"
#include "Views/SummaryView.h"
#include "Views/TimelineView.h"
-#ifdef HAS_AMDGPU
-#include "lib/AMDGPU/AMDGPUCustomBehaviour.h"
-#endif
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -43,6 +40,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/MCA/CodeEmitter.h"
#include "llvm/MCA/Context.h"
#include "llvm/MCA/CustomBehaviour.h"
@@ -59,7 +57,6 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
@@ -293,39 +290,6 @@ static void processViewOptions(bool IsOutOfOrder) {
processOptionImpl(PrintRetireStats, Default);
}
-std::unique_ptr<mca::InstrPostProcess>
-createInstrPostProcess(const Triple &TheTriple, const MCSubtargetInfo &STI,
- const MCInstrInfo &MCII) {
- // Might be a good idea to have a separate flag so that InstrPostProcess
- // can be used with or without CustomBehaviour
- if (DisableCustomBehaviour)
- return std::make_unique<mca::InstrPostProcess>(STI, MCII);
-#ifdef HAS_AMDGPU
- if (TheTriple.isAMDGPU())
- return std::make_unique<mca::AMDGPUInstrPostProcess>(STI, MCII);
-#endif
- return std::make_unique<mca::InstrPostProcess>(STI, MCII);
-}
-
-std::unique_ptr<mca::CustomBehaviour>
-createCustomBehaviour(const Triple &TheTriple, const MCSubtargetInfo &STI,
- const mca::SourceMgr &SrcMgr, const MCInstrInfo &MCII) {
- // Build the appropriate CustomBehaviour object for the current target.
- // The CustomBehaviour class should never depend on the source code,
- // but it can depend on the list of mca::Instruction and any classes
- // that can be built using just the target info. If you need extra
- // information from the source code or the list of MCInst, consider
- // adding that information to the mca::Instruction class and setting
- // it during InstrBuilder::createInstruction().
- if (DisableCustomBehaviour)
- return std::make_unique<mca::CustomBehaviour>(STI, SrcMgr, MCII);
-#ifdef HAS_AMDGPU
- if (TheTriple.isAMDGPU())
- return std::make_unique<mca::AMDGPUCustomBehaviour>(STI, SrcMgr, MCII);
-#endif
- return std::make_unique<mca::CustomBehaviour>(STI, SrcMgr, MCII);
-}
-
// Returns true on success.
static bool runPipeline(mca::Pipeline &P) {
// Handle pipeline errors here.
@@ -344,6 +308,7 @@ int main(int argc, char **argv) {
InitializeAllTargetInfos();
InitializeAllTargetMCs();
InitializeAllAsmParsers();
+ InitializeAllTargetMCAs();
// Enable printing of available targets when flag --version is specified.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
@@ -532,8 +497,18 @@ int main(int argc, char **argv) {
// Lower the MCInst sequence into an mca::Instruction sequence.
ArrayRef<MCInst> Insts = Region->getInstructions();
mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts);
- std::unique_ptr<mca::InstrPostProcess> IPP =
- createInstrPostProcess(TheTriple, *STI, *MCII);
+
+ std::unique_ptr<mca::InstrPostProcess> IPP;
+ if (!DisableCustomBehaviour) {
+ IPP = std::unique_ptr<mca::InstrPostProcess>(
+ TheTarget->createInstrPostProcess(*STI, *MCII));
+ }
+ if (!IPP)
+ // If the target doesn't have its own IPP implemented (or the
+ // -disable-cb flag is set) then we use the base class
+ // (which does nothing).
+ IPP = std::make_unique<mca::InstrPostProcess>(*STI, *MCII);
+
std::vector<std::unique_ptr<mca::Instruction>> LoweredSequence;
for (const MCInst &MCI : Insts) {
Expected<std::unique_ptr<mca::Instruction>> Inst =
@@ -602,14 +577,35 @@ int main(int argc, char **argv) {
// the source code (but it can depend on the list of
// mca::Instruction or any objects that can be reconstructed
// from the target information).
- std::unique_ptr<mca::CustomBehaviour> CB =
- createCustomBehaviour(TheTriple, *STI, S, *MCII);
+ std::unique_ptr<mca::CustomBehaviour> CB;
+ if (!DisableCustomBehaviour)
+ CB = std::unique_ptr<mca::CustomBehaviour>(
+ TheTarget->createCustomBehaviour(*STI, S, *MCII));
+ if (!CB)
+ // If the target doesn't have its own CB implemented (or the -disable-cb
+ // flag is set) then we use the base class (which does nothing).
+ CB = std::make_unique<mca::CustomBehaviour>(*STI, S, *MCII);
// Create a basic pipeline simulating an out-of-order backend.
auto P = MCA.createDefaultPipeline(PO, S, *CB);
mca::PipelinePrinter Printer(*P, *Region, RegionIdx, *STI, PO);
+ // Targets can define their own custom Views that exist within their
+ // /lib/Target/ directory so that the View can utilize their CustomBehaviour
+ // or other backend symbols / functionality that are not already exposed
+ // through one of the MC-layer classes. These Views will be initialized
+ // using the CustomBehaviour::getViews() variants.
+ // If a target makes a custom View that does not depend on their target
+ // CB or their backend, they should put the View within
+ // /tools/llvm-mca/Views/ instead.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getStartViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
// When we output JSON, we add a view that contains the instructions
// and CPU resource information.
if (PrintJson) {
@@ -635,6 +631,16 @@ int main(int argc, char **argv) {
Printer.addView(std::make_unique<mca::InstructionInfoView>(
*STI, *MCII, CE, ShowEncoding, Insts, *IP));
+ // Fetch custom Views that are to be placed after the InstructionInfoView.
+ // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
+ // for more info.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getPostInstrInfoViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
if (PrintDispatchStats)
Printer.addView(std::make_unique<mca::DispatchStatistics>());
@@ -659,6 +665,16 @@ int main(int argc, char **argv) {
TimelineMaxCycles));
}
+ // Fetch custom Views that are to be placed after all other Views.
+ // Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
+ // for more info.
+ if (!DisableCustomBehaviour) {
+ std::vector<std::unique_ptr<mca::View>> CBViews =
+ CB->getEndViews(*IP, Insts);
+ for (auto &CBView : CBViews)
+ Printer.addView(std::move(CBView));
+ }
+
if (!runPipeline(*P))
return 1;
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index ffb427a3f2bd..0864985377ce 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -64,7 +64,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
@@ -530,7 +530,7 @@ struct DarwinStabName {
uint8_t NType;
const char *Name;
};
-static const struct DarwinStabName DarwinStabNames[] = {
+const struct DarwinStabName DarwinStabNames[] = {
{MachO::N_GSYM, "GSYM"},
{MachO::N_FNAME, "FNAME"},
{MachO::N_FUN, "FUN"},
@@ -599,22 +599,16 @@ static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
outs() << format(" %02x", NType);
}
-static Optional<std::string> demangle(StringRef Name, bool StripUnderscore) {
- if (StripUnderscore && !Name.empty() && Name[0] == '_')
- Name = Name.substr(1);
+static Optional<std::string> demangle(const std::string &Name,
+ bool StripUnderscore) {
+ const char *Mangled = Name.c_str();
+ if (StripUnderscore && Mangled[0] == '_')
+ Mangled = Mangled + 1;
- if (!Name.startswith("_Z"))
- return None;
-
- int Status;
- char *Undecorated =
- itaniumDemangle(Name.str().c_str(), nullptr, nullptr, &Status);
- if (Status != 0)
- return None;
-
- std::string S(Undecorated);
- free(Undecorated);
- return S;
+ std::string Demangled;
+ if (nonMicrosoftDemangle(Mangled, Demangled))
+ return Demangled;
+ return None;
}
static bool symbolIsDefined(const NMSymbol &Sym) {
@@ -1575,90 +1569,11 @@ static void dumpSymbolsFromDLInfoMachO(MachOObjectFile &MachO) {
}
}
-namespace {
-struct SymbolVersion {
- std::string Name;
- bool IsDefault;
-};
-} // namespace
-
-template <class ELFT>
-static Expected<std::vector<SymbolVersion>>
-readSymbolVersionsELF(const ELFFile<ELFT> &Obj, StringRef FileName,
- ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
- using Elf_Shdr = typename ELFT::Shdr;
-
- // We called sections() earlier, so can't fail here.
- typename ELFT::ShdrRange SectionsOrErr = cantFail(Obj.sections());
- const Elf_Shdr *SymVerSec = nullptr;
- const Elf_Shdr *SymVerNeedSec = nullptr;
- const Elf_Shdr *SymVerDefSec = nullptr;
- for (const Elf_Shdr &Sec : SectionsOrErr) {
- if (Sec.sh_type == ELF::SHT_GNU_versym)
- SymVerSec = &Sec;
- else if (Sec.sh_type == ELF::SHT_GNU_verdef)
- SymVerDefSec = &Sec;
- else if (Sec.sh_type == ELF::SHT_GNU_verneed)
- SymVerNeedSec = &Sec;
- }
-
- if (!SymVerSec)
- return std::vector<SymbolVersion>{};
-
- Expected<SmallVector<Optional<VersionEntry>, 0>> MapOrErr =
- Obj.loadVersionMap(SymVerNeedSec, SymVerDefSec);
- if (!MapOrErr)
- return MapOrErr.takeError();
-
- std::vector<SymbolVersion> Ret;
- size_t I = 0;
- for (auto It = Symbols.begin(), E = Symbols.end(); It != E; ++It) {
- ++I;
- Expected<const typename ELFT::Versym *> VerEntryOrErr =
- Obj.template getEntry<typename ELFT::Versym>(*SymVerSec, I);
- if (!VerEntryOrErr)
- return createError("unable to read an entry with index " + Twine(I) +
- " from " + describe(Obj, *SymVerSec) + ": " +
- toString(VerEntryOrErr.takeError()));
-
- Expected<uint32_t> FlagsOrErr = It->getFlags();
- if (!FlagsOrErr)
- return createError("unable to read flags for symbol with index " +
- Twine(I) + ": " + toString(FlagsOrErr.takeError()));
-
- bool IsDefault;
- Expected<StringRef> VerOrErr = Obj.getSymbolVersionByIndex(
- (*VerEntryOrErr)->vs_index, IsDefault, *MapOrErr,
- (*FlagsOrErr) & SymbolRef::SF_Undefined);
- if (!VerOrErr)
- return createError("unable to get a version for entry " + Twine(I) +
- " of " + describe(Obj, *SymVerSec) + ": " +
- toString(VerOrErr.takeError()));
-
- Ret.push_back({(*VerOrErr).str(), IsDefault});
- }
-
- return Ret;
-}
-
-static Expected<std::vector<SymbolVersion>>
-readSymbolVersionsELF(const ELFObjectFileBase &Obj,
- ELFObjectFileBase::elf_symbol_iterator_range Symbols) {
- if (const auto *ELF = dyn_cast<ELF32LEObjectFile>(&Obj))
- return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
- else if (const auto *ELF = dyn_cast<ELF32BEObjectFile>(&Obj))
- return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
- else if (const auto *ELF = dyn_cast<ELF64LEObjectFile>(&Obj))
- return readSymbolVersionsELF(ELF->getELFFile(), Obj.getFileName(), Symbols);
- return readSymbolVersionsELF(cast<ELF64BEObjectFile>(&Obj)->getELFFile(),
- Obj.getFileName(), Symbols);
-}
-
static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
StringRef ArchiveName = {},
StringRef ArchitectureName = {}) {
auto Symbols = Obj.symbols();
- std::vector<SymbolVersion> SymbolVersions;
+ std::vector<VersionEntry> SymbolVersions;
if (DynamicSyms) {
const auto *E = dyn_cast<ELFObjectFileBase>(&Obj);
if (!E) {
@@ -1667,8 +1582,8 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
Symbols = E->getDynamicSymbolIterators();
- if (Expected<std::vector<SymbolVersion>> VersionsOrErr =
- readSymbolVersionsELF(*E, Symbols))
+ if (Expected<std::vector<VersionEntry>> VersionsOrErr =
+ E->readDynsymVersions())
SymbolVersions = std::move(*VersionsOrErr);
else
WithColor::warning(errs(), ToolName)
@@ -1738,7 +1653,7 @@ static void dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
}
if (!SymbolVersions.empty() && !SymbolVersions[I].Name.empty())
S.Name +=
- (SymbolVersions[I].IsDefault ? "@@" : "@") + SymbolVersions[I].Name;
+ (SymbolVersions[I].IsVerDef ? "@@" : "@") + SymbolVersions[I].Name;
S.Sym = Sym;
SymbolList.push_back(S);
diff --git a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
index e50ac2e12e2f..38c9cd09433b 100644
--- a/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/COFFObjcopy.cpp
@@ -94,7 +94,7 @@ static Error addGnuDebugLink(Object &Obj, StringRef DebugLinkFile) {
return Error::success();
}
-static void setSectionFlags(Section &Sec, SectionFlag AllFlags) {
+static uint32_t flagsToCharacteristics(SectionFlag AllFlags, uint32_t OldChar) {
// Need to preserve alignment flags.
const uint32_t PreserveMask =
IMAGE_SCN_ALIGN_1BYTES | IMAGE_SCN_ALIGN_2BYTES | IMAGE_SCN_ALIGN_4BYTES |
@@ -107,8 +107,7 @@ static void setSectionFlags(Section &Sec, SectionFlag AllFlags) {
// Setup new section characteristics based on the flags provided in command
// line.
- uint32_t NewCharacteristics =
- (Sec.Header.Characteristics & PreserveMask) | IMAGE_SCN_MEM_READ;
+ uint32_t NewCharacteristics = (OldChar & PreserveMask) | IMAGE_SCN_MEM_READ;
if ((AllFlags & SectionFlag::SecAlloc) && !(AllFlags & SectionFlag::SecLoad))
NewCharacteristics |= IMAGE_SCN_CNT_UNINITIALIZED_DATA;
@@ -128,7 +127,7 @@ static void setSectionFlags(Section &Sec, SectionFlag AllFlags) {
if (AllFlags & SectionFlag::SecExclude)
NewCharacteristics |= IMAGE_SCN_LNK_REMOVE;
- Sec.Header.Characteristics = NewCharacteristics;
+ return NewCharacteristics;
}
static Error handleArgs(const CommonConfig &Config, Object &Obj) {
@@ -226,7 +225,8 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
for (Section &Sec : Obj.getMutableSections()) {
const auto It = Config.SetSectionFlags.find(Sec.Name);
if (It != Config.SetSectionFlags.end())
- setSectionFlags(Sec, It->second.NewFlags);
+ Sec.Header.Characteristics = flagsToCharacteristics(
+ It->second.NewFlags, Sec.Header.Characteristics);
}
for (const auto &Flag : Config.AddSection) {
@@ -238,11 +238,18 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
return createFileError(FileName, errorCodeToError(BufOrErr.getError()));
auto Buf = std::move(*BufOrErr);
+ uint32_t Characteristics;
+ const auto It = Config.SetSectionFlags.find(SecName);
+ if (It != Config.SetSectionFlags.end())
+ Characteristics = flagsToCharacteristics(It->second.NewFlags, 0);
+ else
+ Characteristics = IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES;
+
addSection(
Obj, SecName,
makeArrayRef(reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
Buf->getBufferSize()),
- IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_ALIGN_1BYTES);
+ Characteristics);
}
if (!Config.AddGnuDebugLink.empty())
diff --git a/llvm/tools/llvm-objcopy/COFF/Object.cpp b/llvm/tools/llvm-objcopy/COFF/Object.cpp
index 1c17b8408ee7..ec2628c7eca9 100644
--- a/llvm/tools/llvm-objcopy/COFF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Object.cpp
@@ -107,7 +107,7 @@ void Object::removeSections(function_ref<bool(const Section &)> ToRemove) {
// section,
// remove those as well as nothing will include them (and we can't
// leave them dangling).
- if (RemovedSections.count(Sym.AssociativeComdatTargetSectionId) == 1)
+ if (RemovedSections.contains(Sym.AssociativeComdatTargetSectionId))
AssociatedSections.insert(Sym.TargetSectionId);
return RemovedSections.contains(Sym.TargetSectionId);
});
diff --git a/llvm/tools/llvm-objcopy/COFF/Writer.cpp b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
index e7be64faab65..cbd0e4261238 100644
--- a/llvm/tools/llvm-objcopy/COFF/Writer.cpp
+++ b/llvm/tools/llvm-objcopy/COFF/Writer.cpp
@@ -406,7 +406,7 @@ Expected<uint32_t> COFFWriter::virtualAddressToFileAddress(uint32_t RVA) {
// the debug_directory structs in there, and set the PointerToRawData field
// in all of them, according to their new physical location in the file.
Error COFFWriter::patchDebugDirectory() {
- if (Obj.DataDirectories.size() < DEBUG_DIRECTORY)
+ if (Obj.DataDirectories.size() <= DEBUG_DIRECTORY)
return Error::success();
const data_directory *Dir = &Obj.DataDirectories[DEBUG_DIRECTORY];
if (Dir->Size <= 0)
@@ -426,15 +426,13 @@ Error COFFWriter::patchDebugDirectory() {
uint8_t *End = Ptr + Dir->Size;
while (Ptr < End) {
debug_directory *Debug = reinterpret_cast<debug_directory *>(Ptr);
- if (!Debug->AddressOfRawData)
- return createStringError(object_error::parse_failed,
- "debug directory payload outside of "
- "mapped sections not supported");
- if (Expected<uint32_t> FilePosOrErr =
- virtualAddressToFileAddress(Debug->AddressOfRawData))
- Debug->PointerToRawData = *FilePosOrErr;
- else
- return FilePosOrErr.takeError();
+ if (Debug->PointerToRawData) {
+ if (Expected<uint32_t> FilePosOrErr =
+ virtualAddressToFileAddress(Debug->AddressOfRawData))
+ Debug->PointerToRawData = *FilePosOrErr;
+ else
+ return FilePosOrErr.takeError();
+ }
Ptr += sizeof(debug_directory);
Offset += sizeof(debug_directory);
}
diff --git a/llvm/tools/llvm-objcopy/CommonConfig.h b/llvm/tools/llvm-objcopy/CommonConfig.h
index 131ce5c59114..ea39a6da2ba5 100644
--- a/llvm/tools/llvm-objcopy/CommonConfig.h
+++ b/llvm/tools/llvm-objcopy/CommonConfig.h
@@ -210,14 +210,7 @@ struct CommonConfig {
// Repeated options
std::vector<StringRef> AddSection;
std::vector<StringRef> DumpSection;
- std::vector<StringRef> RPathToAdd;
- std::vector<StringRef> RPathToPrepend;
- DenseMap<StringRef, StringRef> RPathsToUpdate;
- DenseMap<StringRef, StringRef> InstallNamesToUpdate;
- DenseSet<StringRef> RPathsToRemove;
-
- // install-name-tool's id option
- Optional<StringRef> SharedLibId;
+ std::vector<StringRef> UpdateSection;
// Section matchers
NameMatcher KeepSection;
@@ -239,23 +232,13 @@ struct CommonConfig {
StringMap<SectionFlagsUpdate> SetSectionFlags;
StringMap<StringRef> SymbolsToRename;
- // ELF entry point address expression. The input parameter is an entry point
- // address in the input ELF file. The entry address in the output file is
- // calculated with EntryExpr(input_address), when either --set-start or
- // --change-start is used.
- std::function<uint64_t(uint64_t)> EntryExpr;
-
// Symbol info specified by --add-symbol option.
std::vector<NewSymbolInfo> SymbolsToAdd;
// Boolean options
- bool AllowBrokenLinks = false;
bool DeterministicArchives = true;
bool ExtractDWO = false;
bool ExtractMainPartition = false;
- bool KeepFileSymbols = false;
- bool KeepUndefined = false;
- bool LocalizeHidden = false;
bool OnlyKeepDebug = false;
bool PreserveDates = false;
bool StripAll = false;
@@ -264,12 +247,9 @@ struct CommonConfig {
bool StripDebug = false;
bool StripNonAlloc = false;
bool StripSections = false;
- bool StripSwiftSymbols = false;
bool StripUnneeded = false;
bool Weaken = false;
bool DecompressDebugSections = false;
- // install-name-tool's --delete_all_rpaths
- bool RemoveAllRpaths = false;
DebugCompressionType CompressionType = DebugCompressionType::None;
};
diff --git a/llvm/tools/llvm-objcopy/ConfigManager.cpp b/llvm/tools/llvm-objcopy/ConfigManager.cpp
index 9f7d06b99418..2e5cf9357a52 100644
--- a/llvm/tools/llvm-objcopy/ConfigManager.cpp
+++ b/llvm/tools/llvm-objcopy/ConfigManager.cpp
@@ -39,7 +39,7 @@ enum ObjcopyID {
#include "ObjcopyOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info ObjcopyInfoTable[] = {
+const opt::OptTable::Info ObjcopyInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{OBJCOPY_##PREFIX, \
@@ -79,7 +79,7 @@ enum InstallNameToolID {
#include "InstallNameToolOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info InstallNameToolInfoTable[] = {
+const opt::OptTable::Info InstallNameToolInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{INSTALL_NAME_TOOL_##PREFIX, \
@@ -116,7 +116,7 @@ enum BitcodeStripID {
#include "BitcodeStripOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info BitcodeStripInfoTable[] = {
+const opt::OptTable::Info BitcodeStripInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{BITCODE_STRIP_##PREFIX, \
@@ -153,7 +153,7 @@ enum StripID {
#include "StripOpts.inc"
#undef PREFIX
-static const opt::OptTable::Info StripInfoTable[] = {
+const opt::OptTable::Info StripInfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{STRIP_##PREFIX, NAME, HELPTEXT, \
@@ -559,27 +559,21 @@ static Expected<NewSymbolInfo> parseNewSymbolInfo(StringRef FlagValue) {
}
Expected<const ELFConfig &> ConfigManager::getELFConfig() const {
- if (Common.StripSwiftSymbols || Common.KeepUndefined)
- return createStringError(llvm::errc::invalid_argument,
- "option not supported by llvm-objcopy for ELF");
-
return ELF;
}
Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
- if (Common.AllowBrokenLinks || !Common.SplitDWO.empty() ||
- !Common.SymbolsPrefix.empty() || !Common.AllocSectionsPrefix.empty() ||
- !Common.DumpSection.empty() || !Common.KeepSection.empty() ||
- ELF.NewSymbolVisibility || !Common.SymbolsToGlobalize.empty() ||
+ if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() ||
+ !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() ||
!Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() ||
!Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
!Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- Common.ExtractDWO || Common.LocalizeHidden || Common.PreserveDates ||
- Common.StripDWO || Common.StripNonAlloc || Common.StripSections ||
- Common.StripSwiftSymbols || Common.KeepUndefined || Common.Weaken ||
+ Common.ExtractDWO || Common.PreserveDates || Common.StripDWO ||
+ Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
Common.DecompressDebugSections ||
Common.DiscardMode == DiscardType::Locals ||
- !Common.SymbolsToAdd.empty() || Common.EntryExpr) {
+ !Common.SymbolsToAdd.empty()) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for COFF");
}
@@ -588,19 +582,18 @@ Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
}
Expected<const MachOConfig &> ConfigManager::getMachOConfig() const {
- if (Common.AllowBrokenLinks || !Common.SplitDWO.empty() ||
- !Common.SymbolsPrefix.empty() || !Common.AllocSectionsPrefix.empty() ||
- !Common.KeepSection.empty() || ELF.NewSymbolVisibility ||
+ if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
+ !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() ||
!Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() ||
!Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() ||
!Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() ||
!Common.UnneededSymbolsToRemove.empty() ||
!Common.SetSectionAlignment.empty() || !Common.SetSectionFlags.empty() ||
- Common.ExtractDWO || Common.LocalizeHidden || Common.PreserveDates ||
- Common.StripAllGNU || Common.StripDWO || Common.StripNonAlloc ||
- Common.StripSections || Common.Weaken || Common.DecompressDebugSections ||
- Common.StripUnneeded || Common.DiscardMode == DiscardType::Locals ||
- !Common.SymbolsToAdd.empty() || Common.EntryExpr) {
+ Common.ExtractDWO || Common.PreserveDates || Common.StripAllGNU ||
+ Common.StripDWO || Common.StripNonAlloc || Common.StripSections ||
+ Common.Weaken || Common.DecompressDebugSections || Common.StripUnneeded ||
+ Common.DiscardMode == DiscardType::Locals ||
+ !Common.SymbolsToAdd.empty()) {
return createStringError(llvm::errc::invalid_argument,
"option not supported by llvm-objcopy for MachO");
}
@@ -612,8 +605,7 @@ Expected<const WasmConfig &> ConfigManager::getWasmConfig() const {
if (!Common.AddGnuDebugLink.empty() || Common.ExtractPartition ||
!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
!Common.AllocSectionsPrefix.empty() ||
- Common.DiscardMode != DiscardType::None || ELF.NewSymbolVisibility ||
- !Common.SymbolsToAdd.empty() || !Common.RPathToAdd.empty() ||
+ Common.DiscardMode != DiscardType::None || !Common.SymbolsToAdd.empty() ||
!Common.SymbolsToGlobalize.empty() || !Common.SymbolsToLocalize.empty() ||
!Common.SymbolsToKeep.empty() || !Common.SymbolsToRemove.empty() ||
!Common.UnneededSymbolsToRemove.empty() ||
@@ -684,6 +676,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
ConfigManager ConfigMgr;
CommonConfig &Config = ConfigMgr.Common;
ELFConfig &ELFConfig = ConfigMgr.ELF;
+ MachOConfig &MachOConfig = ConfigMgr.MachO;
Config.InputFilename = Positional[0];
Config.OutputFilename = Positional[Positional.size() == 1 ? 0 : 1];
if (InputArgs.hasArg(OBJCOPY_target) &&
@@ -887,6 +880,17 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
"bad format for --add-section: missing file name");
Config.AddSection.push_back(ArgValue);
}
+ for (auto Arg : InputArgs.filtered(OBJCOPY_update_section)) {
+ StringRef ArgValue(Arg->getValue());
+ if (!ArgValue.contains('='))
+ return createStringError(errc::invalid_argument,
+ "bad format for --update-section: missing '='");
+ if (ArgValue.split("=").second.empty())
+ return createStringError(
+ errc::invalid_argument,
+ "bad format for --update-section: missing file name");
+ Config.UpdateSection.push_back(ArgValue);
+ }
for (auto *Arg : InputArgs.filtered(OBJCOPY_dump_section)) {
StringRef Value(Arg->getValue());
if (Value.split('=').second.empty())
@@ -905,7 +909,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
Config.ExtractDWO = InputArgs.hasArg(OBJCOPY_extract_dwo);
Config.ExtractMainPartition =
InputArgs.hasArg(OBJCOPY_extract_main_partition);
- Config.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
+ ELFConfig.LocalizeHidden = InputArgs.hasArg(OBJCOPY_localize_hidden);
Config.Weaken = InputArgs.hasArg(OBJCOPY_weaken);
if (InputArgs.hasArg(OBJCOPY_discard_all, OBJCOPY_discard_locals))
Config.DiscardMode =
@@ -913,13 +917,13 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
? DiscardType::All
: DiscardType::Locals;
Config.OnlyKeepDebug = InputArgs.hasArg(OBJCOPY_only_keep_debug);
- Config.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
- Config.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined);
+ ELFConfig.KeepFileSymbols = InputArgs.hasArg(OBJCOPY_keep_file_symbols);
+ MachOConfig.KeepUndefined = InputArgs.hasArg(OBJCOPY_keep_undefined);
Config.DecompressDebugSections =
InputArgs.hasArg(OBJCOPY_decompress_debug_sections);
if (Config.DiscardMode == DiscardType::All) {
Config.StripDebug = true;
- Config.KeepFileSymbols = true;
+ ELFConfig.KeepFileSymbols = true;
}
for (auto Arg : InputArgs.filtered(OBJCOPY_localize_symbol))
if (Error E = Config.SymbolsToLocalize.addMatcher(NameOrPattern::create(
@@ -993,7 +997,7 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
Config.SymbolsToAdd.push_back(*SymInfo);
}
- Config.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links);
+ ELFConfig.AllowBrokenLinks = InputArgs.hasArg(OBJCOPY_allow_broken_links);
Config.DeterministicArchives = InputArgs.hasFlag(
OBJCOPY_enable_deterministic_archives,
@@ -1013,16 +1017,16 @@ objcopy::parseObjcopyOptions(ArrayRef<const char *> RawArgsArr,
return createStringError(
EAddr.getError(), "bad entry point address: '%s'", Arg->getValue());
- Config.EntryExpr = [EAddr](uint64_t) { return *EAddr; };
+ ELFConfig.EntryExpr = [EAddr](uint64_t) { return *EAddr; };
} else if (Arg->getOption().matches(OBJCOPY_change_start)) {
auto EIncr = getAsInteger<int64_t>(Arg->getValue());
if (!EIncr)
return createStringError(EIncr.getError(),
"bad entry point increment: '%s'",
Arg->getValue());
- auto Expr = Config.EntryExpr ? std::move(Config.EntryExpr)
- : [](uint64_t A) { return A; };
- Config.EntryExpr = [Expr, EIncr](uint64_t EAddr) {
+ auto Expr = ELFConfig.EntryExpr ? std::move(ELFConfig.EntryExpr)
+ : [](uint64_t A) { return A; };
+ ELFConfig.EntryExpr = [Expr, EIncr](uint64_t EAddr) {
return Expr(EAddr) + *EIncr;
};
}
@@ -1057,6 +1061,7 @@ objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
DriverConfig DC;
ConfigManager ConfigMgr;
CommonConfig &Config = ConfigMgr.Common;
+ MachOConfig &MachOConfig = ConfigMgr.MachO;
InstallNameToolOptTable T;
unsigned MissingArgumentIndex, MissingArgumentCount;
llvm::opt::InputArgList InputArgs =
@@ -1087,27 +1092,27 @@ objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
}
for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_add_rpath))
- Config.RPathToAdd.push_back(Arg->getValue());
+ MachOConfig.RPathToAdd.push_back(Arg->getValue());
for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_prepend_rpath))
- Config.RPathToPrepend.push_back(Arg->getValue());
+ MachOConfig.RPathToPrepend.push_back(Arg->getValue());
for (auto Arg : InputArgs.filtered(INSTALL_NAME_TOOL_delete_rpath)) {
StringRef RPath = Arg->getValue();
// Cannot add and delete the same rpath at the same time.
- if (is_contained(Config.RPathToAdd, RPath))
+ if (is_contained(MachOConfig.RPathToAdd, RPath))
return createStringError(
errc::invalid_argument,
"cannot specify both -add_rpath '%s' and -delete_rpath '%s'",
RPath.str().c_str(), RPath.str().c_str());
- if (is_contained(Config.RPathToPrepend, RPath))
+ if (is_contained(MachOConfig.RPathToPrepend, RPath))
return createStringError(
errc::invalid_argument,
"cannot specify both -prepend_rpath '%s' and -delete_rpath '%s'",
RPath.str().c_str(), RPath.str().c_str());
- Config.RPathsToRemove.insert(RPath);
+ MachOConfig.RPathsToRemove.insert(RPath);
}
for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_rpath)) {
@@ -1118,51 +1123,52 @@ objcopy::parseInstallNameToolOptions(ArrayRef<const char *> ArgsArr) {
// Cannot specify duplicate -rpath entries
auto It1 = find_if(
- Config.RPathsToUpdate,
+ MachOConfig.RPathsToUpdate,
[&Match](const DenseMap<StringRef, StringRef>::value_type &OldNew) {
return Match(OldNew.getFirst()) || Match(OldNew.getSecond());
});
- if (It1 != Config.RPathsToUpdate.end())
+ if (It1 != MachOConfig.RPathsToUpdate.end())
return createStringError(errc::invalid_argument,
"cannot specify both -rpath '" +
It1->getFirst() + "' '" + It1->getSecond() +
"' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -delete_rpath and -rpath
- auto It2 = find_if(Config.RPathsToRemove, Match);
- if (It2 != Config.RPathsToRemove.end())
+ auto It2 = find_if(MachOConfig.RPathsToRemove, Match);
+ if (It2 != MachOConfig.RPathsToRemove.end())
return createStringError(errc::invalid_argument,
"cannot specify both -delete_rpath '" + *It2 +
"' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -add_rpath and -rpath
- auto It3 = find_if(Config.RPathToAdd, Match);
- if (It3 != Config.RPathToAdd.end())
+ auto It3 = find_if(MachOConfig.RPathToAdd, Match);
+ if (It3 != MachOConfig.RPathToAdd.end())
return createStringError(errc::invalid_argument,
"cannot specify both -add_rpath '" + *It3 +
"' and -rpath '" + Old + "' '" + New + "'");
// Cannot specify the same rpath under both -prepend_rpath and -rpath.
- auto It4 = find_if(Config.RPathToPrepend, Match);
- if (It4 != Config.RPathToPrepend.end())
+ auto It4 = find_if(MachOConfig.RPathToPrepend, Match);
+ if (It4 != MachOConfig.RPathToPrepend.end())
return createStringError(errc::invalid_argument,
"cannot specify both -prepend_rpath '" + *It4 +
"' and -rpath '" + Old + "' '" + New + "'");
- Config.RPathsToUpdate.insert({Old, New});
+ MachOConfig.RPathsToUpdate.insert({Old, New});
}
if (auto *Arg = InputArgs.getLastArg(INSTALL_NAME_TOOL_id)) {
- Config.SharedLibId = Arg->getValue();
- if (Config.SharedLibId->empty())
+ MachOConfig.SharedLibId = Arg->getValue();
+ if (MachOConfig.SharedLibId->empty())
return createStringError(errc::invalid_argument,
"cannot specify an empty id");
}
for (auto *Arg : InputArgs.filtered(INSTALL_NAME_TOOL_change))
- Config.InstallNamesToUpdate.insert({Arg->getValue(0), Arg->getValue(1)});
+ MachOConfig.InstallNamesToUpdate.insert(
+ {Arg->getValue(0), Arg->getValue(1)});
- Config.RemoveAllRpaths =
+ MachOConfig.RemoveAllRpaths =
InputArgs.hasArg(INSTALL_NAME_TOOL_delete_all_rpaths);
SmallVector<StringRef, 2> Positional;
@@ -1281,6 +1287,8 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
ConfigManager ConfigMgr;
CommonConfig &Config = ConfigMgr.Common;
+ ELFConfig &ELFConfig = ConfigMgr.ELF;
+ MachOConfig &MachOConfig = ConfigMgr.MachO;
if (InputArgs.hasArg(STRIP_regex) && InputArgs.hasArg(STRIP_wildcard))
return createStringError(errc::invalid_argument,
@@ -1292,7 +1300,7 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
: InputArgs.hasArg(STRIP_wildcard)
? MatchStyle::Wildcard
: MatchStyle::Literal;
- Config.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
+ ELFConfig.AllowBrokenLinks = InputArgs.hasArg(STRIP_allow_broken_links);
Config.StripDebug = InputArgs.hasArg(STRIP_strip_debug);
if (InputArgs.hasArg(STRIP_discard_all, STRIP_discard_locals))
@@ -1305,10 +1313,10 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
if (auto Arg = InputArgs.getLastArg(STRIP_strip_all, STRIP_no_strip_all))
Config.StripAll = Arg->getOption().getID() == STRIP_strip_all;
Config.StripAllGNU = InputArgs.hasArg(STRIP_strip_all_gnu);
- Config.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols);
+ MachOConfig.StripSwiftSymbols = InputArgs.hasArg(STRIP_strip_swift_symbols);
Config.OnlyKeepDebug = InputArgs.hasArg(STRIP_only_keep_debug);
- Config.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols);
- Config.KeepUndefined = InputArgs.hasArg(STRIP_keep_undefined);
+ ELFConfig.KeepFileSymbols = InputArgs.hasArg(STRIP_keep_file_symbols);
+ MachOConfig.KeepUndefined = InputArgs.hasArg(STRIP_keep_undefined);
for (auto Arg : InputArgs.filtered(STRIP_keep_section))
if (Error E = Config.KeepSection.addMatcher(NameOrPattern::create(
@@ -1337,7 +1345,7 @@ objcopy::parseStripOptions(ArrayRef<const char *> RawArgsArr,
if (Config.DiscardMode == DiscardType::All) {
Config.StripDebug = true;
- Config.KeepFileSymbols = true;
+ ELFConfig.KeepFileSymbols = true;
}
Config.DeterministicArchives =
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h b/llvm/tools/llvm-objcopy/ELF/ELFConfig.h
index 42d407da17ff..229a8d61fb83 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFConfig.h
+++ b/llvm/tools/llvm-objcopy/ELF/ELFConfig.h
@@ -20,6 +20,16 @@ namespace objcopy {
// ELF specific configuration for copying/stripping a single file.
struct ELFConfig {
uint8_t NewSymbolVisibility = (uint8_t)ELF::STV_DEFAULT;
+
+ // ELF entry point address expression. The input parameter is an entry point
+ // address in the input ELF file. The entry address in the output file is
+ // calculated with EntryExpr(input_address), when either --set-start or
+ // --change-start is used.
+ std::function<uint64_t(uint64_t)> EntryExpr;
+
+ bool AllowBrokenLinks = false;
+ bool KeepFileSymbols = false;
+ bool LocalizeHidden = false;
};
} // namespace objcopy
diff --git a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
index 986eeca6256c..16de84a961b5 100644
--- a/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/ELFObjcopy.cpp
@@ -204,8 +204,7 @@ static bool isCompressable(const SectionBase &Sec) {
}
static Error replaceDebugSections(
- Object &Obj, SectionPred &RemovePred,
- function_ref<bool(const SectionBase &)> ShouldReplace,
+ Object &Obj, function_ref<bool(const SectionBase &)> ShouldReplace,
function_ref<Expected<SectionBase *>(const SectionBase *)> AddSection) {
// Build a list of the debug sections we are going to replace.
// We can't call `AddSection` while iterating over sections,
@@ -225,17 +224,7 @@ static Error replaceDebugSections(
FromTo[S] = *NewSection;
}
- // Now we want to update the target sections of relocation
- // sections. Also we will update the relocations themselves
- // to update the symbol references.
- for (auto &Sec : Obj.sections())
- Sec.replaceSectionReferences(FromTo);
-
- RemovePred = [ShouldReplace, RemovePred](const SectionBase &Sec) {
- return ShouldReplace(Sec) || RemovePred(Sec);
- };
-
- return Error::success();
+ return Obj.replaceSections(FromTo);
}
static bool isUnneededSymbol(const Symbol &Sym) {
@@ -244,7 +233,8 @@ static bool isUnneededSymbol(const Symbol &Sym) {
Sym.Type != STT_SECTION;
}
-static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
+static Error updateAndRemoveSymbols(const CommonConfig &Config,
+ const ELFConfig &ELFConfig, Object &Obj) {
// TODO: update or remove symbols only if there is an option that affects
// them.
if (!Obj.SymbolTable)
@@ -254,7 +244,7 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
// Common and undefined symbols don't make sense as local symbols, and can
// even cause crashes if we localize those, so skip them.
if (!Sym.isCommon() && Sym.getShndx() != SHN_UNDEF &&
- ((Config.LocalizeHidden &&
+ ((ELFConfig.LocalizeHidden &&
(Sym.Visibility == STV_HIDDEN || Sym.Visibility == STV_INTERNAL)) ||
Config.SymbolsToLocalize.matches(Sym.Name)))
Sym.Binding = STB_LOCAL;
@@ -304,7 +294,7 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
auto RemoveSymbolsPred = [&](const Symbol &Sym) {
if (Config.SymbolsToKeep.matches(Sym.Name) ||
- (Config.KeepFileSymbols && Sym.Type == STT_FILE))
+ (ELFConfig.KeepFileSymbols && Sym.Type == STT_FILE))
return false;
if ((Config.DiscardMode == DiscardType::All ||
@@ -339,7 +329,8 @@ static Error updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
return Obj.removeSymbols(RemoveSymbolsPred);
}
-static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
+static Error replaceAndRemoveSections(const CommonConfig &Config,
+ const ELFConfig &ELFConfig, Object &Obj) {
SectionPred RemovePred = [](const SectionBase &) { return false; };
// Removes:
@@ -465,7 +456,7 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
// and at least one of those symbols is present
// (equivalently, the updated symbol table is not empty)
// the symbol table and the string table should not be removed.
- if ((!Config.SymbolsToKeep.empty() || Config.KeepFileSymbols) &&
+ if ((!Config.SymbolsToKeep.empty() || ELFConfig.KeepFileSymbols) &&
Obj.SymbolTable && !Obj.SymbolTable->empty()) {
RemovePred = [&Obj, RemovePred](const SectionBase &Sec) {
if (&Sec == Obj.SymbolTable || &Sec == Obj.SymbolTable->getStrTab())
@@ -474,9 +465,12 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
};
}
+ if (Error E = Obj.removeSections(ELFConfig.AllowBrokenLinks, RemovePred))
+ return E;
+
if (Config.CompressionType != DebugCompressionType::None) {
if (Error Err = replaceDebugSections(
- Obj, RemovePred, isCompressable,
+ Obj, isCompressable,
[&Config, &Obj](const SectionBase *S) -> Expected<SectionBase *> {
Expected<CompressedSection> NewSection =
CompressedSection::create(*S, Config.CompressionType);
@@ -488,7 +482,7 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
return Err;
} else if (Config.DecompressDebugSections) {
if (Error Err = replaceDebugSections(
- Obj, RemovePred,
+ Obj,
[](const SectionBase &S) { return isa<CompressedSection>(&S); },
[&Obj](const SectionBase *S) {
const CompressedSection *CS = cast<CompressedSection>(S);
@@ -497,7 +491,7 @@ static Error replaceAndRemoveSections(const CommonConfig &Config, Object &Obj) {
return Err;
}
- return Obj.removeSections(Config.AllowBrokenLinks, RemovePred);
+ return Error::success();
}
// Add symbol to the Object symbol table with the specified properties.
@@ -554,6 +548,22 @@ static void addSymbol(Object &Obj, const NewSymbolInfo &SymInfo,
Sec ? (uint16_t)SYMBOL_SIMPLE_INDEX : (uint16_t)SHN_ABS, 0);
}
+static Error
+handleUserSection(StringRef Flag,
+ function_ref<Error(StringRef, ArrayRef<uint8_t>)> F) {
+ std::pair<StringRef, StringRef> SecPair = Flag.split("=");
+ StringRef SecName = SecPair.first;
+ StringRef File = SecPair.second;
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr = MemoryBuffer::getFile(File);
+ if (!BufOrErr)
+ return createFileError(File, errorCodeToError(BufOrErr.getError()));
+ std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+ ArrayRef<uint8_t> Data(
+ reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
+ Buf->getBufferSize());
+ return F(SecName, Data);
+}
+
// This function handles the high level operations of GNU objcopy including
// handling command line options. It's important to outline certain properties
// we expect to hold of the command line operations. Any operation that "keeps"
@@ -570,7 +580,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
if (!Config.SplitDWO.empty() && Config.ExtractDWO) {
return Obj.removeSections(
- Config.AllowBrokenLinks,
+ ELFConfig.AllowBrokenLinks,
[&Obj](const SectionBase &Sec) { return onlyKeepDWOPred(Obj, Sec); });
}
@@ -587,21 +597,39 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
// remove the relocation sections before removing the symbols. That allows
// us to avoid reporting the inappropriate errors about removing symbols
// named in relocations.
- if (Error E = replaceAndRemoveSections(Config, Obj))
+ if (Error E = replaceAndRemoveSections(Config, ELFConfig, Obj))
return E;
- if (Error E = updateAndRemoveSymbols(Config, Obj))
+ if (Error E = updateAndRemoveSymbols(Config, ELFConfig, Obj))
return E;
if (!Config.SectionsToRename.empty()) {
+ std::vector<RelocationSectionBase *> RelocSections;
+ DenseSet<SectionBase *> RenamedSections;
for (SectionBase &Sec : Obj.sections()) {
+ auto *RelocSec = dyn_cast<RelocationSectionBase>(&Sec);
const auto Iter = Config.SectionsToRename.find(Sec.Name);
if (Iter != Config.SectionsToRename.end()) {
const SectionRename &SR = Iter->second;
Sec.Name = std::string(SR.NewName);
if (SR.NewFlags.hasValue())
setSectionFlagsAndType(Sec, SR.NewFlags.getValue());
- }
+ RenamedSections.insert(&Sec);
+ } else if (RelocSec && !(Sec.Flags & SHF_ALLOC))
+ // Postpone processing relocation sections which are not specified in
+ // their explicit '--rename-section' commands until after their target
+ // sections are renamed.
+ // Dynamic relocation sections (i.e. ones with SHF_ALLOC) should be
+ // renamed only explicitly. Otherwise, renaming, for example, '.got.plt'
+ // would affect '.rela.plt', which is not desirable.
+ RelocSections.push_back(RelocSec);
+ }
+
+ // Rename relocation sections according to their target sections.
+ for (RelocationSectionBase *RelocSec : RelocSections) {
+ auto Iter = RenamedSections.find(RelocSec->getSection());
+ if (Iter != RenamedSections.end())
+ RelocSec->Name = (RelocSec->getNamePrefix() + (*Iter)->Name).str();
}
}
@@ -624,27 +652,16 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
// .rela.prefix.plt since GNU objcopy does so.
const SectionBase *TargetSec = RelocSec->getSection();
if (TargetSec && (TargetSec->Flags & SHF_ALLOC)) {
- StringRef prefix;
- switch (Sec.Type) {
- case SHT_REL:
- prefix = ".rel";
- break;
- case SHT_RELA:
- prefix = ".rela";
- break;
- default:
- llvm_unreachable("not a relocation section");
- }
-
// If the relocation section comes *after* the target section, we
// don't add Config.AllocSectionsPrefix because we've already added
// the prefix to TargetSec->Name. Otherwise, if the relocation
// section comes *before* the target section, we add the prefix.
if (PrefixedSections.count(TargetSec))
- Sec.Name = (prefix + TargetSec->Name).str();
+ Sec.Name = (RelocSec->getNamePrefix() + TargetSec->Name).str();
else
- Sec.Name =
- (prefix + Config.AllocSectionsPrefix + TargetSec->Name).str();
+ Sec.Name = (RelocSec->getNamePrefix() + Config.AllocSectionsPrefix +
+ TargetSec->Name)
+ .str();
}
}
}
@@ -664,21 +681,23 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
Sec.Type = SHT_NOBITS;
for (const auto &Flag : Config.AddSection) {
- std::pair<StringRef, StringRef> SecPair = Flag.split("=");
- StringRef SecName = SecPair.first;
- StringRef File = SecPair.second;
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
- MemoryBuffer::getFile(File);
- if (!BufOrErr)
- return createFileError(File, errorCodeToError(BufOrErr.getError()));
- std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
- ArrayRef<uint8_t> Data(
- reinterpret_cast<const uint8_t *>(Buf->getBufferStart()),
- Buf->getBufferSize());
- OwnedDataSection &NewSection =
- Obj.addSection<OwnedDataSection>(SecName, Data);
- if (SecName.startswith(".note") && SecName != ".note.GNU-stack")
- NewSection.Type = SHT_NOTE;
+ auto AddSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
+ OwnedDataSection &NewSection =
+ Obj.addSection<OwnedDataSection>(Name, Data);
+ if (Name.startswith(".note") && Name != ".note.GNU-stack")
+ NewSection.Type = SHT_NOTE;
+ return Error::success();
+ };
+ if (Error E = handleUserSection(Flag, AddSection))
+ return E;
+ }
+
+ for (StringRef Flag : Config.UpdateSection) {
+ auto UpdateSection = [&](StringRef Name, ArrayRef<uint8_t> Data) {
+ return Obj.updateSection(Name, Data);
+ };
+ if (Error E = handleUserSection(Flag, UpdateSection))
+ return E;
}
if (!Config.AddGnuDebugLink.empty())
@@ -705,8 +724,8 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
}
}
- if (Config.EntryExpr)
- Obj.Entry = Config.EntryExpr(Obj.Entry);
+ if (ELFConfig.EntryExpr)
+ Obj.Entry = ELFConfig.EntryExpr(Obj.Entry);
return Error::success();
}
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.cpp b/llvm/tools/llvm-objcopy/ELF/Object.cpp
index ba91d08e5540..3db5028e85f7 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.cpp
+++ b/llvm/tools/llvm-objcopy/ELF/Object.cpp
@@ -893,6 +893,17 @@ Error SymbolTableSection::accept(MutableSectionVisitor &Visitor) {
return Visitor.visit(*this);
}
+StringRef RelocationSectionBase::getNamePrefix() const {
+ switch (Type) {
+ case SHT_REL:
+ return ".rel";
+ case SHT_RELA:
+ return ".rela";
+ default:
+ llvm_unreachable("not a relocation section");
+ }
+}
+
Error RelocationSection::removeSectionReferences(
bool AllowBrokenLinks, function_ref<bool(const SectionBase *)> ToRemove) {
if (ToRemove(Symbols)) {
@@ -1342,13 +1353,16 @@ void IHexELFBuilder::addDataSections() {
if (R.HexData.empty())
continue;
RecAddr = R.Addr + SegmentAddr + BaseAddr;
- if (!Section || Section->Addr + Section->Size != RecAddr)
- // OriginalOffset field is only used to sort section properly, so
- // instead of keeping track of real offset in IHEX file, we use
- // section number.
+ if (!Section || Section->Addr + Section->Size != RecAddr) {
+ // OriginalOffset field is only used to sort sections before layout, so
+ // instead of keeping track of real offsets in IHEX file, and as
+ // layoutSections() and layoutSectionsForOnlyKeepDebug() use
+ // llvm::stable_sort(), we can just set it to a constant (zero).
Section = &Obj->addSection<OwnedDataSection>(
- ".sec" + std::to_string(SecNo++), RecAddr,
- ELF::SHF_ALLOC | ELF::SHF_WRITE, SecNo);
+ ".sec" + std::to_string(SecNo), RecAddr,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE, 0);
+ SecNo++;
+ }
Section->appendHexData(R.HexData);
break;
case IHexRecord::EndOfFile:
@@ -2093,6 +2107,17 @@ template <class ELFT> void ELFWriter<ELFT>::writeSegmentData() {
Size);
}
+ for (auto it : Obj.getUpdatedSections()) {
+ SectionBase *Sec = it.first;
+ ArrayRef<uint8_t> Data = it.second;
+
+ auto *Parent = Sec->ParentSegment;
+ assert(Parent && "This section should've been part of a segment.");
+ uint64_t Offset =
+ Sec->OriginalOffset - Parent->OriginalOffset + Parent->Offset;
+ llvm::copy(Data, Buf->getBufferStart() + Offset);
+ }
+
// Iterate over removed sections and overwrite their old data with zeroes.
for (auto &Sec : Obj.removedSections()) {
Segment *Parent = Sec.ParentSegment;
@@ -2110,6 +2135,37 @@ ELFWriter<ELFT>::ELFWriter(Object &Obj, raw_ostream &Buf, bool WSH,
: Writer(Obj, Buf), WriteSectionHeaders(WSH && Obj.HadShdrs),
OnlyKeepDebug(OnlyKeepDebug) {}
+Error Object::updateSection(StringRef Name, ArrayRef<uint8_t> Data) {
+ auto It = llvm::find_if(Sections,
+ [&](const SecPtr &Sec) { return Sec->Name == Name; });
+ if (It == Sections.end())
+ return createStringError(errc::invalid_argument, "section '%s' not found",
+ Name.str().c_str());
+
+ auto *OldSec = It->get();
+ if (!OldSec->hasContents())
+ return createStringError(
+ errc::invalid_argument,
+ "section '%s' can't be updated because it does not have contents",
+ Name.str().c_str());
+
+ if (Data.size() > OldSec->Size && OldSec->ParentSegment)
+ return createStringError(errc::invalid_argument,
+ "cannot fit data of size %zu into section '%s' "
+ "with size %zu that is part of a segment",
+ Data.size(), Name.str().c_str(), OldSec->Size);
+
+ if (!OldSec->ParentSegment) {
+ *It = std::make_unique<OwnedDataSection>(*OldSec, Data);
+ } else {
+ // The segment writer will be in charge of updating these contents.
+ OldSec->Size = Data.size();
+ UpdatedSections[OldSec] = Data;
+ }
+
+ return Error::success();
+}
+
Error Object::removeSections(
bool AllowBrokenLinks, std::function<bool(const SectionBase &)> ToRemove) {
@@ -2162,6 +2218,30 @@ Error Object::removeSections(
return Error::success();
}
+Error Object::replaceSections(
+ const DenseMap<SectionBase *, SectionBase *> &FromTo) {
+ auto SectionIndexLess = [](const SecPtr &Lhs, const SecPtr &Rhs) {
+ return Lhs->Index < Rhs->Index;
+ };
+ assert(llvm::is_sorted(Sections, SectionIndexLess) &&
+ "Sections are expected to be sorted by Index");
+ // Set indices of new sections so that they can be later sorted into positions
+ // of removed ones.
+ for (auto &I : FromTo)
+ I.second->Index = I.first->Index;
+
+ // Notify all sections about the replacement.
+ for (auto &Sec : Sections)
+ Sec->replaceSectionReferences(FromTo);
+
+ if (Error E = removeSections(
+ /*AllowBrokenLinks=*/false,
+ [=](const SectionBase &Sec) { return FromTo.count(&Sec) > 0; }))
+ return E;
+ llvm::sort(Sections, SectionIndexLess);
+ return Error::success();
+}
+
Error Object::removeSymbols(function_ref<bool(const Symbol &)> ToRemove) {
if (SymbolTable)
for (const SecPtr &Sec : Sections)
@@ -2200,20 +2280,6 @@ Error Object::addNewSymbolTable() {
return Error::success();
}
-void Object::sortSections() {
- // Use stable_sort to maintain the original ordering as closely as possible.
- llvm::stable_sort(Sections, [](const SecPtr &A, const SecPtr &B) {
- // Put SHT_GROUP sections first, since group section headers must come
- // before the sections they contain. This also matches what GNU objcopy
- // does.
- if (A->Type != B->Type &&
- (A->Type == ELF::SHT_GROUP || B->Type == ELF::SHT_GROUP))
- return A->Type == ELF::SHT_GROUP;
- // For all other sections, sort by offset order.
- return A->OriginalOffset < B->OriginalOffset;
- });
-}
-
// Orders segments such that if x = y->ParentSegment then y comes before x.
static void orderSegments(std::vector<Segment *> &Segments) {
llvm::stable_sort(Segments, compareSegmentsByOffset);
@@ -2262,6 +2328,9 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
// the offset from the start of the segment. Using the offset from the start
// of the segment we can assign a new offset to the section. For sections not
// covered by segments we can just bump Offset to the next valid location.
+ // While it is not necessary, layout the sections in the order based on their
+ // original offsets to resemble the input file as close as possible.
+ std::vector<SectionBase *> OutOfSegmentSections;
uint32_t Index = 1;
for (auto &Sec : Sections) {
Sec.Index = Index++;
@@ -2269,12 +2338,19 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
auto Segment = *Sec.ParentSegment;
Sec.Offset =
Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset);
- } else {
- Offset = alignTo(Offset, Sec.Align == 0 ? 1 : Sec.Align);
- Sec.Offset = Offset;
- if (Sec.Type != SHT_NOBITS)
- Offset += Sec.Size;
- }
+ } else
+ OutOfSegmentSections.push_back(&Sec);
+ }
+
+ llvm::stable_sort(OutOfSegmentSections,
+ [](const SectionBase *Lhs, const SectionBase *Rhs) {
+ return Lhs->OriginalOffset < Rhs->OriginalOffset;
+ });
+ for (auto *Sec : OutOfSegmentSections) {
+ Offset = alignTo(Offset, Sec->Align == 0 ? 1 : Sec->Align);
+ Sec->Offset = Offset;
+ if (Sec->Type != SHT_NOBITS)
+ Offset += Sec->Size;
}
return Offset;
}
@@ -2282,38 +2358,49 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
// Rewrite sh_offset after some sections are changed to SHT_NOBITS and thus
// occupy no space in the file.
static uint64_t layoutSectionsForOnlyKeepDebug(Object &Obj, uint64_t Off) {
+ // The layout algorithm requires the sections to be handled in the order of
+ // their offsets in the input file, at least inside segments.
+ std::vector<SectionBase *> Sections;
+ Sections.reserve(Obj.sections().size());
uint32_t Index = 1;
for (auto &Sec : Obj.sections()) {
Sec.Index = Index++;
-
- auto *FirstSec = Sec.ParentSegment && Sec.ParentSegment->Type == PT_LOAD
- ? Sec.ParentSegment->firstSection()
+ Sections.push_back(&Sec);
+ }
+ llvm::stable_sort(Sections,
+ [](const SectionBase *Lhs, const SectionBase *Rhs) {
+ return Lhs->OriginalOffset < Rhs->OriginalOffset;
+ });
+
+ for (auto *Sec : Sections) {
+ auto *FirstSec = Sec->ParentSegment && Sec->ParentSegment->Type == PT_LOAD
+ ? Sec->ParentSegment->firstSection()
: nullptr;
// The first section in a PT_LOAD has to have congruent offset and address
// modulo the alignment, which usually equals the maximum page size.
- if (FirstSec && FirstSec == &Sec)
- Off = alignTo(Off, Sec.ParentSegment->Align, Sec.Addr);
+ if (FirstSec && FirstSec == Sec)
+ Off = alignTo(Off, Sec->ParentSegment->Align, Sec->Addr);
// sh_offset is not significant for SHT_NOBITS sections, but the congruence
// rule must be followed if it is the first section in a PT_LOAD. Do not
// advance Off.
- if (Sec.Type == SHT_NOBITS) {
- Sec.Offset = Off;
+ if (Sec->Type == SHT_NOBITS) {
+ Sec->Offset = Off;
continue;
}
if (!FirstSec) {
// FirstSec being nullptr generally means that Sec does not have the
// SHF_ALLOC flag.
- Off = Sec.Align ? alignTo(Off, Sec.Align) : Off;
- } else if (FirstSec != &Sec) {
+ Off = Sec->Align ? alignTo(Off, Sec->Align) : Off;
+ } else if (FirstSec != Sec) {
// The offset is relative to the first section in the PT_LOAD segment. Use
// sh_offset for non-SHF_ALLOC sections.
- Off = Sec.OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset;
+ Off = Sec->OriginalOffset - FirstSec->OriginalOffset + FirstSec->Offset;
}
- Sec.Offset = Off;
- Off += Sec.Size;
+ Sec->Offset = Off;
+ Off += Sec->Size;
}
return Off;
}
@@ -2460,7 +2547,6 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
if (Error E = removeUnneededSections(Obj))
return E;
- Obj.sortSections();
// We need to assign indexes before we perform layout because we need to know
// if we need large indexes or not. We can assign indexes first and check as
diff --git a/llvm/tools/llvm-objcopy/ELF/Object.h b/llvm/tools/llvm-objcopy/ELF/Object.h
index 6fd26afa3ca1..811af4b51310 100644
--- a/llvm/tools/llvm-objcopy/ELF/Object.h
+++ b/llvm/tools/llvm-objcopy/ELF/Object.h
@@ -48,12 +48,12 @@ class Object;
struct Symbol;
class SectionTableRef {
- MutableArrayRef<std::unique_ptr<SectionBase>> Sections;
+ ArrayRef<std::unique_ptr<SectionBase>> Sections;
public:
- using iterator = pointee_iterator<std::unique_ptr<SectionBase> *>;
+ using iterator = pointee_iterator<const std::unique_ptr<SectionBase> *>;
- explicit SectionTableRef(MutableArrayRef<std::unique_ptr<SectionBase>> Secs)
+ explicit SectionTableRef(ArrayRef<std::unique_ptr<SectionBase>> Secs)
: Sections(Secs) {}
SectionTableRef(const SectionTableRef &) = default;
@@ -429,6 +429,7 @@ public:
virtual void markSymbols();
virtual void
replaceSectionReferences(const DenseMap<SectionBase *, SectionBase *> &);
+ virtual bool hasContents() const { return false; }
// Notify the section that it is subject to removal.
virtual void onRemove();
};
@@ -493,6 +494,9 @@ public:
function_ref<bool(const SectionBase *)> ToRemove) override;
Error initialize(SectionTableRef SecTable) override;
void finalize() override;
+ bool hasContents() const override {
+ return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL;
+ }
};
class OwnedDataSection : public SectionBase {
@@ -518,9 +522,15 @@ public:
OriginalOffset = SecOff;
}
+ OwnedDataSection(SectionBase &S, ArrayRef<uint8_t> Data)
+ : SectionBase(S), Data(std::begin(Data), std::end(Data)) {
+ Size = Data.size();
+ }
+
void appendHexData(StringRef HexData);
Error accept(SectionVisitor &Sec) const override;
Error accept(MutableSectionVisitor &Visitor) override;
+ bool hasContents() const override { return true; }
};
class CompressedSection : public SectionBase {
@@ -745,6 +755,8 @@ public:
const SectionBase *getSection() const { return SecToApplyRel; }
void setSection(SectionBase *Sec) { SecToApplyRel = Sec; }
+ StringRef getNamePrefix() const;
+
static bool classof(const SectionBase *S) {
return S->OriginalType == ELF::SHT_REL || S->OriginalType == ELF::SHT_RELA;
}
@@ -1016,6 +1028,7 @@ private:
std::vector<SecPtr> Sections;
std::vector<SegPtr> Segments;
std::vector<SecPtr> RemovedSections;
+ DenseMap<SectionBase *, std::vector<uint8_t>> UpdatedSections;
static bool sectionIsAlloc(const SectionBase &Sec) {
return Sec.Flags & ELF::SHF_ALLOC;
@@ -1023,10 +1036,6 @@ private:
public:
template <class T>
- using Range = iterator_range<
- pointee_iterator<typename std::vector<std::unique_ptr<T>>::iterator>>;
-
- template <class T>
using ConstRange = iterator_range<pointee_iterator<
typename std::vector<std::unique_ptr<T>>::const_iterator>>;
@@ -1054,11 +1063,7 @@ public:
SymbolTableSection *SymbolTable = nullptr;
SectionIndexSection *SectionIndexTable = nullptr;
- void sortSections();
- SectionTableRef sections() { return SectionTableRef(Sections); }
- ConstRange<SectionBase> sections() const {
- return make_pointee_range(Sections);
- }
+ SectionTableRef sections() const { return SectionTableRef(Sections); }
iterator_range<
filter_iterator<pointee_iterator<std::vector<SecPtr>::const_iterator>,
decltype(&sectionIsAlloc)>>
@@ -1066,6 +1071,9 @@ public:
return make_filter_range(make_pointee_range(Sections), sectionIsAlloc);
}
+ const auto &getUpdatedSections() const { return UpdatedSections; }
+ Error updateSection(StringRef Name, ArrayRef<uint8_t> Data);
+
SectionBase *findSection(StringRef Name) {
auto SecIt =
find_if(Sections, [&](const SecPtr &Sec) { return Sec->Name == Name; });
@@ -1073,11 +1081,11 @@ public:
}
SectionTableRef removedSections() { return SectionTableRef(RemovedSections); }
- Range<Segment> segments() { return make_pointee_range(Segments); }
ConstRange<Segment> segments() const { return make_pointee_range(Segments); }
Error removeSections(bool AllowBrokenLinks,
std::function<bool(const SectionBase &)> ToRemove);
+ Error replaceSections(const DenseMap<SectionBase *, SectionBase *> &FromTo);
Error removeSymbols(function_ref<bool(const Symbol &)> ToRemove);
template <class T, class... Ts> T &addSection(Ts &&... Args) {
auto Sec = std::make_unique<T>(std::forward<Ts>(Args)...);
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h b/llvm/tools/llvm-objcopy/MachO/MachOConfig.h
index 7c5dbfde19a0..93f9facfcf0b 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOConfig.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOConfig.h
@@ -9,11 +9,33 @@
#ifndef LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
#define LLVM_TOOLS_LLVM_OBJCOPY_MACHO_MACHOCONFIG_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
namespace llvm {
namespace objcopy {
// Mach-O specific configuration for copying/stripping a single file.
-struct MachOConfig {};
+struct MachOConfig {
+ // Repeated options
+ std::vector<StringRef> RPathToAdd;
+ std::vector<StringRef> RPathToPrepend;
+ DenseMap<StringRef, StringRef> RPathsToUpdate;
+ DenseMap<StringRef, StringRef> InstallNamesToUpdate;
+ DenseSet<StringRef> RPathsToRemove;
+
+ // install-name-tool's id option
+ Optional<StringRef> SharedLibId;
+
+ // Boolean options
+ bool StripSwiftSymbols = false;
+ bool KeepUndefined = false;
+
+ // install-name-tool's --delete_all_rpaths
+ bool RemoveAllRpaths = false;
+};
} // namespace objcopy
} // namespace llvm
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
index 6ed21806fe5e..3cac77411845 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.cpp
@@ -249,8 +249,12 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
uint64_t StartOfExportTrie =
StartOfLazyBindingInfo + O.LazyBinds.Opcodes.size();
uint64_t StartOfFunctionStarts = StartOfExportTrie + O.Exports.Trie.size();
- uint64_t StartOfDataInCode =
+ uint64_t StartOfDyldExportsTrie =
StartOfFunctionStarts + O.FunctionStarts.Data.size();
+ uint64_t StartOfChainedFixups =
+ StartOfDyldExportsTrie + O.ExportsTrie.Data.size();
+ uint64_t StartOfDataInCode =
+ StartOfChainedFixups + O.ChainedFixups.Data.size();
uint64_t StartOfLinkerOptimizationHint =
StartOfDataInCode + O.DataInCode.Data.size();
uint64_t StartOfSymbols =
@@ -262,10 +266,31 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
sizeof(uint32_t) * O.IndirectSymTable.Symbols.size();
uint64_t StartOfCodeSignature =
StartOfSymbolStrings + StrTableBuilder.getSize();
- if (O.CodeSignatureCommandIndex)
+ uint32_t CodeSignatureSize = 0;
+ if (O.CodeSignatureCommandIndex) {
StartOfCodeSignature = alignTo(StartOfCodeSignature, 16);
+
+ // Note: These calculations are to be kept in sync with the same
+ // calculations performed in LLD's CodeSignatureSection.
+ const uint32_t AllHeadersSize =
+ alignTo(CodeSignature.FixedHeadersSize + OutputFileName.size() + 1,
+ CodeSignature.Align);
+ const uint32_t BlockCount =
+ (StartOfCodeSignature + CodeSignature.BlockSize - 1) /
+ CodeSignature.BlockSize;
+ const uint32_t Size =
+ alignTo(AllHeadersSize + BlockCount * CodeSignature.HashSize,
+ CodeSignature.Align);
+
+ CodeSignature.StartOffset = StartOfCodeSignature;
+ CodeSignature.AllHeadersSize = AllHeadersSize;
+ CodeSignature.BlockCount = BlockCount;
+ CodeSignature.OutputFileName = OutputFileName;
+ CodeSignature.Size = Size;
+ CodeSignatureSize = Size;
+ }
uint64_t LinkEditSize =
- (StartOfCodeSignature + O.CodeSignature.Data.size()) - StartOfLinkEdit;
+ StartOfCodeSignature + CodeSignatureSize - StartOfLinkEdit;
// Now we have determined the layout of the contents of the __LINKEDIT
// segment. Update its load command.
@@ -293,7 +318,7 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
switch (cmd) {
case MachO::LC_CODE_SIGNATURE:
MLC.linkedit_data_command_data.dataoff = StartOfCodeSignature;
- MLC.linkedit_data_command_data.datasize = O.CodeSignature.Data.size();
+ MLC.linkedit_data_command_data.datasize = CodeSignatureSize;
break;
case MachO::LC_SYMTAB:
MLC.symtab_command_data.symoff = StartOfSymbols;
@@ -332,6 +357,14 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
MLC.linkedit_data_command_data.dataoff = StartOfFunctionStarts;
MLC.linkedit_data_command_data.datasize = O.FunctionStarts.Data.size();
break;
+ case MachO::LC_DYLD_CHAINED_FIXUPS:
+ MLC.linkedit_data_command_data.dataoff = StartOfChainedFixups;
+ MLC.linkedit_data_command_data.datasize = O.ChainedFixups.Data.size();
+ break;
+ case MachO::LC_DYLD_EXPORTS_TRIE:
+ MLC.linkedit_data_command_data.dataoff = StartOfDyldExportsTrie;
+ MLC.linkedit_data_command_data.datasize = O.ExportsTrie.Data.size();
+ break;
case MachO::LC_DYLD_INFO:
case MachO::LC_DYLD_INFO_ONLY:
MLC.dyld_info_command_data.rebase_off =
@@ -380,6 +413,10 @@ Error MachOLayoutBuilder::layoutTail(uint64_t Offset) {
case MachO::LC_SOURCE_VERSION:
case MachO::LC_THREAD:
case MachO::LC_UNIXTHREAD:
+ case MachO::LC_SUB_FRAMEWORK:
+ case MachO::LC_SUB_UMBRELLA:
+ case MachO::LC_SUB_CLIENT:
+ case MachO::LC_SUB_LIBRARY:
// Nothing to update.
break;
default:
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
index 5fe6683e27f3..44d03b4af7e8 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOLayoutBuilder.h
@@ -16,10 +16,49 @@ namespace llvm {
namespace objcopy {
namespace macho {
+/// When MachO binaries include a LC_CODE_SIGNATURE load command,
+/// the __LINKEDIT data segment will include a section corresponding
+/// to the LC_CODE_SIGNATURE load command. This section serves as a signature
+/// for the binary. Included in the CodeSignature section is a header followed
+/// by a hash of the binary. If present, the CodeSignature section is the
+/// last component of the binary.
+struct CodeSignatureInfo {
+ // NOTE: These values are to be kept in sync with those in
+ // LLD's CodeSignatureSection class.
+
+ static constexpr uint32_t Align = 16;
+ static constexpr uint8_t BlockSizeShift = 12;
+ // The binary is read in blocks of the following size.
+ static constexpr size_t BlockSize = (1 << BlockSizeShift); // 4 KiB
+ // For each block, a SHA256 hash (256 bits, 32 bytes) is written to
+ // the CodeSignature section.
+ static constexpr size_t HashSize = 256 / 8;
+ static constexpr size_t BlobHeadersSize = llvm::alignTo<8>(
+ sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
+ // The size of the entire header depends upon the filename the binary is being
+ // written to, but the rest of the header is fixed in size.
+ static constexpr uint32_t FixedHeadersSize =
+ BlobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
+
+ // The offset relative to the start of the binary where
+ // the CodeSignature section should begin.
+ uint32_t StartOffset;
+ // The size of the entire header, output file name size included.
+ uint32_t AllHeadersSize;
+ // The number of blocks required to hash the binary.
+ uint32_t BlockCount;
+ StringRef OutputFileName;
+ // The size of the entire CodeSignature section, including both the header and
+ // hashes.
+ uint32_t Size;
+};
+
class MachOLayoutBuilder {
Object &O;
bool Is64Bit;
+ StringRef OutputFileName;
uint64_t PageSize;
+ CodeSignatureInfo CodeSignature;
// Points to the __LINKEDIT segment if it exists.
MachO::macho_load_command *LinkEditLoadCommand = nullptr;
@@ -37,14 +76,18 @@ class MachOLayoutBuilder {
bool Is64Bit);
public:
- MachOLayoutBuilder(Object &O, bool Is64Bit, uint64_t PageSize)
- : O(O), Is64Bit(Is64Bit), PageSize(PageSize),
+ MachOLayoutBuilder(Object &O, bool Is64Bit, StringRef OutputFileName,
+ uint64_t PageSize)
+ : O(O), Is64Bit(Is64Bit), OutputFileName(OutputFileName),
+ PageSize(PageSize),
StrTableBuilder(getStringTableBuilderKind(O, Is64Bit)) {}
// Recomputes and updates fields in the given object such as file offsets.
Error layout();
StringTableBuilder &getStringTableBuilder() { return StrTableBuilder; }
+
+ const CodeSignatureInfo &getCodeSignature() { return CodeSignature; }
};
} // end namespace macho
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 823306916bbe..9e7b91d73057 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -9,6 +9,7 @@
#include "MachOObjcopy.h"
#include "../llvm-objcopy.h"
#include "CommonConfig.h"
+#include "MachO/MachOConfig.h"
#include "MachOReader.h"
#include "MachOWriter.h"
#include "MultiFormatConfig.h"
@@ -19,6 +20,7 @@
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
using namespace llvm;
@@ -87,17 +89,20 @@ static void markSymbols(const CommonConfig &, Object &Obj) {
(*ISE.Symbol)->Referenced = true;
}
-static void updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
+static void updateAndRemoveSymbols(const CommonConfig &Config,
+ const MachOConfig &MachOConfig,
+ Object &Obj) {
for (SymbolEntry &Sym : Obj.SymTable) {
auto I = Config.SymbolsToRename.find(Sym.Name);
if (I != Config.SymbolsToRename.end())
Sym.Name = std::string(I->getValue());
}
- auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) {
+ auto RemovePred = [Config, MachOConfig,
+ &Obj](const std::unique_ptr<SymbolEntry> &N) {
if (N->Referenced)
return false;
- if (Config.KeepUndefined && N->isUndefinedSymbol())
+ if (MachOConfig.KeepUndefined && N->isUndefinedSymbol())
return false;
if (N->n_desc & MachO::REFERENCED_DYNAMICALLY)
return false;
@@ -106,8 +111,9 @@ static void updateAndRemoveSymbols(const CommonConfig &Config, Object &Obj) {
if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
return true;
// This behavior is consistent with cctools' strip.
- if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) &&
- Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol())
+ if (MachOConfig.StripSwiftSymbols &&
+ (Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
+ *Obj.SwiftVersion && N->isSwiftSymbol())
return true;
return false;
};
@@ -139,17 +145,17 @@ static LoadCommand buildRPathLoadCommand(StringRef Path) {
return LC;
}
-static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
+static Error processLoadCommands(const MachOConfig &MachOConfig, Object &Obj) {
// Remove RPaths.
- DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
- Config.RPathsToRemove.end());
+ DenseSet<StringRef> RPathsToRemove(MachOConfig.RPathsToRemove.begin(),
+ MachOConfig.RPathsToRemove.end());
LoadCommandPred RemovePred = [&RPathsToRemove,
- &Config](const LoadCommand &LC) {
+ &MachOConfig](const LoadCommand &LC) {
if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
// When removing all RPaths we don't need to care
// about what it contains
- if (Config.RemoveAllRpaths)
+ if (MachOConfig.RemoveAllRpaths)
return true;
StringRef RPath = getPayloadString(LC);
@@ -166,7 +172,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
// Emit an error if the Mach-O binary does not contain an rpath path name
// specified in -delete_rpath.
- for (StringRef RPath : Config.RPathsToRemove) {
+ for (StringRef RPath : MachOConfig.RPathsToRemove) {
if (RPathsToRemove.count(RPath))
return createStringError(errc::invalid_argument,
"no LC_RPATH load command with path: %s",
@@ -182,7 +188,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
}
// Throw errors for invalid RPaths.
- for (const auto &OldNew : Config.RPathsToUpdate) {
+ for (const auto &OldNew : MachOConfig.RPathsToUpdate) {
StringRef Old = OldNew.getFirst();
StringRef New = OldNew.getSecond();
if (!RPaths.contains(Old))
@@ -198,14 +204,14 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
for (LoadCommand &LC : Obj.LoadCommands) {
switch (LC.MachOLoadCommand.load_command_data.cmd) {
case MachO::LC_ID_DYLIB:
- if (Config.SharedLibId)
+ if (MachOConfig.SharedLibId)
updateLoadCommandPayloadString<MachO::dylib_command>(
- LC, *Config.SharedLibId);
+ LC, *MachOConfig.SharedLibId);
break;
case MachO::LC_RPATH: {
StringRef RPath = getPayloadString(LC);
- StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath);
+ StringRef NewRPath = MachOConfig.RPathsToUpdate.lookup(RPath);
if (!NewRPath.empty())
updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
break;
@@ -217,7 +223,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
case MachO::LC_LOAD_WEAK_DYLIB:
StringRef InstallName = getPayloadString(LC);
StringRef NewInstallName =
- Config.InstallNamesToUpdate.lookup(InstallName);
+ MachOConfig.InstallNamesToUpdate.lookup(InstallName);
if (!NewInstallName.empty())
updateLoadCommandPayloadString<MachO::dylib_command>(LC,
NewInstallName);
@@ -226,7 +232,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
}
// Add new RPaths.
- for (StringRef RPath : Config.RPathToAdd) {
+ for (StringRef RPath : MachOConfig.RPathToAdd) {
if (RPaths.contains(RPath))
return createStringError(errc::invalid_argument,
"rpath '" + RPath +
@@ -235,7 +241,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
}
- for (StringRef RPath : Config.RPathToPrepend) {
+ for (StringRef RPath : MachOConfig.RPathToPrepend) {
if (RPaths.contains(RPath))
return createStringError(errc::invalid_argument,
"rpath '" + RPath +
@@ -248,7 +254,7 @@ static Error processLoadCommands(const CommonConfig &Config, Object &Obj) {
// Unlike appending rpaths, the indexes of subsequent load commands must
// be recalculated after prepending one.
- if (!Config.RPathToPrepend.empty())
+ if (!MachOConfig.RPathToPrepend.empty())
Obj.updateLoadCommandIndexes();
return Error::success();
@@ -333,7 +339,8 @@ static Error isValidMachOCannonicalName(StringRef Name) {
return Error::success();
}
-static Error handleArgs(const CommonConfig &Config, Object &Obj) {
+static Error handleArgs(const CommonConfig &Config,
+ const MachOConfig &MachOConfig, Object &Obj) {
// Dump sections before add/remove for compatibility with GNU objcopy.
for (StringRef Flag : Config.DumpSection) {
StringRef SectionName;
@@ -350,7 +357,7 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
if (Config.StripAll)
markSymbols(Config, Obj);
- updateAndRemoveSymbols(Config, Obj);
+ updateAndRemoveSymbols(Config, MachOConfig, Obj);
if (Config.StripAll)
for (LoadCommand &LC : Obj.LoadCommands)
@@ -367,14 +374,14 @@ static Error handleArgs(const CommonConfig &Config, Object &Obj) {
return E;
}
- if (Error E = processLoadCommands(Config, Obj))
+ if (Error E = processLoadCommands(MachOConfig, Obj))
return E;
return Error::success();
}
Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
- const MachOConfig &,
+ const MachOConfig &MachOConfig,
object::MachOObjectFile &In,
raw_ostream &Out) {
MachOReader Reader(In);
@@ -382,7 +389,12 @@ Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
if (!O)
return createFileError(Config.InputFilename, O.takeError());
- if (Error E = handleArgs(Config, **O))
+ if (O->get()->Header.FileType == MachO::HeaderFileType::MH_PRELOAD)
+ return createStringError(std::errc::not_supported,
+ "%s: MH_PRELOAD files are not supported",
+ Config.InputFilename.str().c_str());
+
+ if (Error E = handleArgs(Config, MachOConfig, **O))
return createFileError(Config.InputFilename, std::move(E));
// Page size used for alignment of segment sizes in Mach-O executables and
@@ -398,7 +410,8 @@ Error objcopy::macho::executeObjcopyOnBinary(const CommonConfig &Config,
PageSize = 4096;
}
- MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
+ MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(),
+ sys::path::filename(Config.OutputFilename), PageSize, Out);
if (auto E = Writer.finalize())
return E;
return Writer.write();
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
index e30940a8d6eb..d03eee9d5fdb 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.h
@@ -24,7 +24,8 @@ struct MachOConfig;
class MultiFormatConfig;
namespace macho {
-Error executeObjcopyOnBinary(const CommonConfig &Config, const MachOConfig &,
+Error executeObjcopyOnBinary(const CommonConfig &Config,
+ const MachOConfig &MachOConfig,
object::MachOObjectFile &In, raw_ostream &Out);
Error executeObjcopyOnMachOUniversalBinary(
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index 7d1c29b42c2e..d68d1692997a 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -116,6 +116,7 @@ Expected<std::vector<std::unique_ptr<Section>>> static extractSections(
Error MachOReader::readLoadCommands(Object &O) const {
// For MachO sections indices start from 1.
uint32_t NextSectionIndex = 1;
+ static constexpr char TextSegmentName[] = "__TEXT";
for (auto LoadCmd : MachOObj.load_commands()) {
LoadCommand LC;
switch (LoadCmd.C.cmd) {
@@ -123,6 +124,14 @@ Error MachOReader::readLoadCommands(Object &O) const {
O.CodeSignatureCommandIndex = O.LoadCommands.size();
break;
case MachO::LC_SEGMENT:
+ // LoadCmd.Ptr might not be aligned temporarily as
+ // MachO::segment_command requires, but the segname char pointer do not
+ // have alignment restrictions.
+ if (StringRef(reinterpret_cast<const char *>(
+ LoadCmd.Ptr + offsetof(MachO::segment_command, segname))) ==
+ TextSegmentName)
+ O.TextSegmentCommandIndex = O.LoadCommands.size();
+
if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
extractSections<MachO::section, MachO::segment_command>(
LoadCmd, MachOObj, NextSectionIndex))
@@ -131,6 +140,14 @@ Error MachOReader::readLoadCommands(Object &O) const {
return Sections.takeError();
break;
case MachO::LC_SEGMENT_64:
+ // LoadCmd.Ptr might not be aligned temporarily as
+ // MachO::segment_command_64 requires, but the segname char pointer do
+ // not have alignment restrictions.
+ if (StringRef(reinterpret_cast<const char *>(
+ LoadCmd.Ptr + offsetof(MachO::segment_command_64, segname))) ==
+ TextSegmentName)
+ O.TextSegmentCommandIndex = O.LoadCommands.size();
+
if (Expected<std::vector<std::unique_ptr<Section>>> Sections =
extractSections<MachO::section_64, MachO::segment_command_64>(
LoadCmd, MachOObj, NextSectionIndex))
@@ -157,6 +174,12 @@ Error MachOReader::readLoadCommands(Object &O) const {
case MachO::LC_FUNCTION_STARTS:
O.FunctionStartsCommandIndex = O.LoadCommands.size();
break;
+ case MachO::LC_DYLD_EXPORTS_TRIE:
+ O.ExportsTrieCommandIndex = O.LoadCommands.size();
+ break;
+ case MachO::LC_DYLD_CHAINED_FIXUPS:
+ O.ChainedFixupsCommandIndex = O.LoadCommands.size();
+ break;
}
#define HANDLE_LOAD_COMMAND(LCName, LCValue, LCStruct) \
case MachO::LCName: \
@@ -271,10 +294,6 @@ void MachOReader::readLinkData(Object &O, Optional<size_t> LCIndex,
arrayRefFromStringRef(MachOObj.getData().substr(LC.dataoff, LC.datasize));
}
-void MachOReader::readCodeSignature(Object &O) const {
- return readLinkData(O, O.CodeSignatureCommandIndex, O.CodeSignature);
-}
-
void MachOReader::readDataInCodeData(Object &O) const {
return readLinkData(O, O.DataInCodeCommandIndex, O.DataInCode);
}
@@ -288,6 +307,14 @@ void MachOReader::readFunctionStartsData(Object &O) const {
return readLinkData(O, O.FunctionStartsCommandIndex, O.FunctionStarts);
}
+void MachOReader::readExportsTrie(Object &O) const {
+ return readLinkData(O, O.ExportsTrieCommandIndex, O.ExportsTrie);
+}
+
+void MachOReader::readChainedFixups(Object &O) const {
+ return readLinkData(O, O.ChainedFixupsCommandIndex, O.ChainedFixups);
+}
+
void MachOReader::readIndirectSymbolTable(Object &O) const {
MachO::dysymtab_command DySymTab = MachOObj.getDysymtabLoadCommand();
constexpr uint32_t AbsOrLocalMask =
@@ -336,10 +363,11 @@ Expected<std::unique_ptr<Object>> MachOReader::create() const {
readWeakBindInfo(*Obj);
readLazyBindInfo(*Obj);
readExportInfo(*Obj);
- readCodeSignature(*Obj);
readDataInCodeData(*Obj);
readLinkerOptimizationHint(*Obj);
readFunctionStartsData(*Obj);
+ readExportsTrie(*Obj);
+ readChainedFixups(*Obj);
readIndirectSymbolTable(*Obj);
readSwiftVersion(*Obj);
return std::move(Obj);
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.h b/llvm/tools/llvm-objcopy/MachO/MachOReader.h
index ca3a0214cb6d..b29e86ca642e 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.h
@@ -41,6 +41,8 @@ class MachOReader : public Reader {
void readDataInCodeData(Object &O) const;
void readLinkerOptimizationHint(Object &O) const;
void readFunctionStartsData(Object &O) const;
+ void readExportsTrie(Object &O) const;
+ void readChainedFixups(Object &O) const;
void readIndirectSymbolTable(Object &O) const;
void readSwiftVersion(Object &O) const;
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 295098ed4118..688945afe944 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -14,10 +14,16 @@
#include "llvm/Object/MachO.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SHA256.h"
#include <memory>
+#if defined(__APPLE__)
+#include <sys/mman.h>
+#endif
+
using namespace llvm;
using namespace llvm::objcopy::macho;
+using namespace llvm::support::endian;
size_t MachOWriter::headerSize() const {
return Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
@@ -127,6 +133,26 @@ size_t MachOWriter::totalSize() const {
LinkEditDataCommand.datasize);
}
+ if (O.ChainedFixupsCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ChainedFixupsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Ends.push_back(LinkEditDataCommand.dataoff +
+ LinkEditDataCommand.datasize);
+ }
+
+ if (O.ExportsTrieCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ExportsTrieCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Ends.push_back(LinkEditDataCommand.dataoff +
+ LinkEditDataCommand.datasize);
+ }
+
// Otherwise, use the last section / reloction.
for (const LoadCommand &LC : O.LoadCommands)
for (const std::unique_ptr<Section> &S : LC.Sections) {
@@ -423,8 +449,147 @@ void MachOWriter::writeLinkData(Optional<size_t> LCIndex, const LinkData &LD) {
memcpy(Out, LD.Data.data(), LD.Data.size());
}
+static uint64_t
+getSegmentFileOffset(const LoadCommand &TextSegmentLoadCommand) {
+ const MachO::macho_load_command &MLC =
+ TextSegmentLoadCommand.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ return MLC.segment_command_data.fileoff;
+ case MachO::LC_SEGMENT_64:
+ return MLC.segment_command_64_data.fileoff;
+ default:
+ return 0;
+ }
+}
+
+static uint64_t getSegmentFileSize(const LoadCommand &TextSegmentLoadCommand) {
+ const MachO::macho_load_command &MLC =
+ TextSegmentLoadCommand.MachOLoadCommand;
+ switch (MLC.load_command_data.cmd) {
+ case MachO::LC_SEGMENT:
+ return MLC.segment_command_data.filesize;
+ case MachO::LC_SEGMENT_64:
+ return MLC.segment_command_64_data.filesize;
+ default:
+ return 0;
+ }
+}
+
void MachOWriter::writeCodeSignatureData() {
- return writeLinkData(O.CodeSignatureCommandIndex, O.CodeSignature);
+ // NOTE: This CodeSignature section behaviour must be kept in sync with that
+ // performed in LLD's CodeSignatureSection::write /
+ // CodeSignatureSection::writeHashes. Furthermore, this call must occur only
+ // after the rest of the binary has already been written to the buffer. This
+ // is because the buffer is read from to perform the necessary hashing.
+
+ // The CodeSignature section is the last section in the MachO binary and
+ // contains a hash of all content in the binary before it. Since llvm-objcopy
+ // has likely modified the target binary, the hash must be regenerated
+ // entirely. To generate this hash, we must read from the start of the binary
+ // (HashReadStart) to just before the start of the CodeSignature section
+ // (HashReadEnd).
+
+ const CodeSignatureInfo &CodeSignature = LayoutBuilder.getCodeSignature();
+
+ uint8_t *BufferStart = reinterpret_cast<uint8_t *>(Buf->getBufferStart());
+ uint8_t *HashReadStart = BufferStart;
+ uint8_t *HashReadEnd = BufferStart + CodeSignature.StartOffset;
+
+ // The CodeSignature section begins with a header, after which the hashes
+ // of each page of the binary are written.
+ uint8_t *HashWriteStart = HashReadEnd + CodeSignature.AllHeadersSize;
+
+ uint32_t TextSegmentFileOff = 0;
+ uint32_t TextSegmentFileSize = 0;
+ if (O.TextSegmentCommandIndex) {
+ const LoadCommand &TextSegmentLoadCommand =
+ O.LoadCommands[*O.TextSegmentCommandIndex];
+ assert(TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd ==
+ MachO::LC_SEGMENT ||
+ TextSegmentLoadCommand.MachOLoadCommand.load_command_data.cmd ==
+ MachO::LC_SEGMENT_64);
+ assert(StringRef(TextSegmentLoadCommand.MachOLoadCommand
+ .segment_command_data.segname) == "__TEXT");
+ TextSegmentFileOff = getSegmentFileOffset(TextSegmentLoadCommand);
+ TextSegmentFileSize = getSegmentFileSize(TextSegmentLoadCommand);
+ }
+
+ const uint32_t FileNamePad = CodeSignature.AllHeadersSize -
+ CodeSignature.FixedHeadersSize -
+ CodeSignature.OutputFileName.size();
+
+ // Write code section header.
+ auto *SuperBlob = reinterpret_cast<MachO::CS_SuperBlob *>(HashReadEnd);
+ write32be(&SuperBlob->magic, MachO::CSMAGIC_EMBEDDED_SIGNATURE);
+ write32be(&SuperBlob->length, CodeSignature.Size);
+ write32be(&SuperBlob->count, 1);
+ auto *BlobIndex = reinterpret_cast<MachO::CS_BlobIndex *>(&SuperBlob[1]);
+ write32be(&BlobIndex->type, MachO::CSSLOT_CODEDIRECTORY);
+ write32be(&BlobIndex->offset, CodeSignature.BlobHeadersSize);
+ auto *CodeDirectory = reinterpret_cast<MachO::CS_CodeDirectory *>(
+ HashReadEnd + CodeSignature.BlobHeadersSize);
+ write32be(&CodeDirectory->magic, MachO::CSMAGIC_CODEDIRECTORY);
+ write32be(&CodeDirectory->length,
+ CodeSignature.Size - CodeSignature.BlobHeadersSize);
+ write32be(&CodeDirectory->version, MachO::CS_SUPPORTSEXECSEG);
+ write32be(&CodeDirectory->flags, MachO::CS_ADHOC | MachO::CS_LINKER_SIGNED);
+ write32be(&CodeDirectory->hashOffset,
+ sizeof(MachO::CS_CodeDirectory) +
+ CodeSignature.OutputFileName.size() + FileNamePad);
+ write32be(&CodeDirectory->identOffset, sizeof(MachO::CS_CodeDirectory));
+ CodeDirectory->nSpecialSlots = 0;
+ write32be(&CodeDirectory->nCodeSlots, CodeSignature.BlockCount);
+ write32be(&CodeDirectory->codeLimit, CodeSignature.StartOffset);
+ CodeDirectory->hashSize = static_cast<uint8_t>(CodeSignature.HashSize);
+ CodeDirectory->hashType = MachO::kSecCodeSignatureHashSHA256;
+ CodeDirectory->platform = 0;
+ CodeDirectory->pageSize = CodeSignature.BlockSizeShift;
+ CodeDirectory->spare2 = 0;
+ CodeDirectory->scatterOffset = 0;
+ CodeDirectory->teamOffset = 0;
+ CodeDirectory->spare3 = 0;
+ CodeDirectory->codeLimit64 = 0;
+ write64be(&CodeDirectory->execSegBase, TextSegmentFileOff);
+ write64be(&CodeDirectory->execSegLimit, TextSegmentFileSize);
+ write64be(&CodeDirectory->execSegFlags, O.Header.FileType == MachO::MH_EXECUTE
+ ? MachO::CS_EXECSEG_MAIN_BINARY
+ : 0);
+
+ auto *Id = reinterpret_cast<char *>(&CodeDirectory[1]);
+ memcpy(Id, CodeSignature.OutputFileName.begin(),
+ CodeSignature.OutputFileName.size());
+ memset(Id + CodeSignature.OutputFileName.size(), 0, FileNamePad);
+
+ // Write the hashes.
+ uint8_t *CurrHashReadPosition = HashReadStart;
+ uint8_t *CurrHashWritePosition = HashWriteStart;
+ while (CurrHashReadPosition < HashReadEnd) {
+ StringRef Block(reinterpret_cast<char *>(CurrHashReadPosition),
+ std::min(HashReadEnd - CurrHashReadPosition,
+ static_cast<ssize_t>(CodeSignature.BlockSize)));
+ SHA256 Hasher;
+ Hasher.update(Block);
+ StringRef Hash = Hasher.final();
+ assert(Hash.size() == CodeSignature.HashSize);
+ memcpy(CurrHashWritePosition, Hash.data(), CodeSignature.HashSize);
+ CurrHashReadPosition += CodeSignature.BlockSize;
+ CurrHashWritePosition += CodeSignature.HashSize;
+ }
+#if defined(__APPLE__)
+ // This is macOS-specific work-around and makes no sense for any
+ // other host OS. See https://openradar.appspot.com/FB8914231
+ //
+ // The macOS kernel maintains a signature-verification cache to
+ // quickly validate applications at time of execve(2). The trouble
+ // is that for the kernel creates the cache entry at the time of the
+ // mmap(2) call, before we have a chance to write either the code to
+ // sign or the signature header+hashes. The fix is to invalidate
+ // all cached data associated with the output file, thus discarding
+ // the bogus prematurely-cached signature.
+ msync(BufferStart, CodeSignature.StartOffset + CodeSignature.Size,
+ MS_INVALIDATE);
+#endif
}
void MachOWriter::writeDataInCodeData() {
@@ -440,6 +605,14 @@ void MachOWriter::writeFunctionStartsData() {
return writeLinkData(O.FunctionStartsCommandIndex, O.FunctionStarts);
}
+void MachOWriter::writeChainedFixupsData() {
+ return writeLinkData(O.ChainedFixupsCommandIndex, O.ChainedFixups);
+}
+
+void MachOWriter::writeExportsTrieData() {
+ return writeLinkData(O.ExportsTrieCommandIndex, O.ExportsTrie);
+}
+
void MachOWriter::writeTail() {
typedef void (MachOWriter::*WriteHandlerType)(void);
typedef std::pair<uint64_t, WriteHandlerType> WriteOperation;
@@ -525,6 +698,26 @@ void MachOWriter::writeTail() {
&MachOWriter::writeFunctionStartsData);
}
+ if (O.ChainedFixupsCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ChainedFixupsCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Queue.emplace_back(LinkEditDataCommand.dataoff,
+ &MachOWriter::writeChainedFixupsData);
+ }
+
+ if (O.ExportsTrieCommandIndex) {
+ const MachO::linkedit_data_command &LinkEditDataCommand =
+ O.LoadCommands[*O.ExportsTrieCommandIndex]
+ .MachOLoadCommand.linkedit_data_command_data;
+
+ if (LinkEditDataCommand.dataoff)
+ Queue.emplace_back(LinkEditDataCommand.dataoff,
+ &MachOWriter::writeExportsTrieData);
+ }
+
llvm::sort(Queue, [](const WriteOperation &LHS, const WriteOperation &RHS) {
return LHS.first < RHS.first;
});
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
index c8c06d644e9f..a172534dac8a 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.h
@@ -50,13 +50,16 @@ class MachOWriter {
void writeDataInCodeData();
void writeLinkerOptimizationHint();
void writeFunctionStartsData();
+ void writeChainedFixupsData();
+ void writeExportsTrieData();
void writeTail();
public:
- MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian, uint64_t PageSize,
- raw_ostream &Out)
+ MachOWriter(Object &O, bool Is64Bit, bool IsLittleEndian,
+ StringRef OutputFileName, uint64_t PageSize, raw_ostream &Out)
: O(O), Is64Bit(Is64Bit), IsLittleEndian(IsLittleEndian),
- PageSize(PageSize), Out(Out), LayoutBuilder(O, Is64Bit, PageSize) {}
+ PageSize(PageSize), Out(Out),
+ LayoutBuilder(O, Is64Bit, OutputFileName, PageSize) {}
size_t totalSize() const;
Error finalize();
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.cpp b/llvm/tools/llvm-objcopy/MachO/Object.cpp
index b4f98fa84cb5..6312adbbc9f7 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/Object.cpp
@@ -29,10 +29,24 @@ void SymbolTable::removeSymbols(
}
void Object::updateLoadCommandIndexes() {
+ static constexpr char TextSegmentName[] = "__TEXT";
// Update indices of special load commands
for (size_t Index = 0, Size = LoadCommands.size(); Index < Size; ++Index) {
LoadCommand &LC = LoadCommands[Index];
switch (LC.MachOLoadCommand.load_command_data.cmd) {
+ case MachO::LC_CODE_SIGNATURE:
+ CodeSignatureCommandIndex = Index;
+ break;
+ case MachO::LC_SEGMENT:
+ if (StringRef(LC.MachOLoadCommand.segment_command_data.segname) ==
+ TextSegmentName)
+ TextSegmentCommandIndex = Index;
+ break;
+ case MachO::LC_SEGMENT_64:
+ if (StringRef(LC.MachOLoadCommand.segment_command_64_data.segname) ==
+ TextSegmentName)
+ TextSegmentCommandIndex = Index;
+ break;
case MachO::LC_SYMTAB:
SymTabCommandIndex = Index;
break;
@@ -52,6 +66,12 @@ void Object::updateLoadCommandIndexes() {
case MachO::LC_FUNCTION_STARTS:
FunctionStartsCommandIndex = Index;
break;
+ case MachO::LC_DYLD_CHAINED_FIXUPS:
+ ChainedFixupsCommandIndex = Index;
+ break;
+ case MachO::LC_DYLD_EXPORTS_TRIE:
+ ExportsTrieCommandIndex = Index;
+ break;
}
}
}
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h
index 207502e2241b..13aaf42634b0 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -315,7 +315,8 @@ struct Object {
LinkData DataInCode;
LinkData LinkerOptimizationHint;
LinkData FunctionStarts;
- LinkData CodeSignature;
+ LinkData ExportsTrie;
+ LinkData ChainedFixups;
Optional<uint32_t> SwiftVersion;
@@ -325,14 +326,21 @@ struct Object {
Optional<size_t> SymTabCommandIndex;
/// The index of LC_DYLD_INFO or LC_DYLD_INFO_ONLY load command if present.
Optional<size_t> DyLdInfoCommandIndex;
- /// The index LC_DYSYMTAB load comamnd if present.
+ /// The index LC_DYSYMTAB load command if present.
Optional<size_t> DySymTabCommandIndex;
- /// The index LC_DATA_IN_CODE load comamnd if present.
+ /// The index LC_DATA_IN_CODE load command if present.
Optional<size_t> DataInCodeCommandIndex;
- /// The index of LC_LINKER_OPTIMIZATIN_HINT load comamnd if present.
+ /// The index of LC_LINKER_OPTIMIZATIN_HINT load command if present.
Optional<size_t> LinkerOptimizationHintCommandIndex;
- /// The index LC_FUNCTION_STARTS load comamnd if present.
+ /// The index LC_FUNCTION_STARTS load command if present.
Optional<size_t> FunctionStartsCommandIndex;
+ /// The index LC_DYLD_CHAINED_FIXUPS load command if present.
+ Optional<size_t> ChainedFixupsCommandIndex;
+ /// The index LC_DYLD_EXPORTS_TRIE load command if present.
+ Optional<size_t> ExportsTrieCommandIndex;
+ /// The index of the LC_SEGMENT or LC_SEGMENT_64 load command
+ /// corresponding to the __TEXT segment.
+ Optional<size_t> TextSegmentCommandIndex;
BumpPtrAllocator Alloc;
StringSaver NewSectionsContents;
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 63abbe4c2020..bc624442aa51 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -50,7 +50,8 @@ defm rename_section
: Eq<"rename-section",
"Renames a section from old to new, optionally with specified flags. "
"Flags supported for GNU compatibility: alloc, load, noload, "
- "readonly, debug, code, data, rom, share, contents, merge, strings.">,
+ "readonly, exclude, debug, code, data, rom, share, contents, merge, "
+ "strings.">,
MetaVarName<"old=new[,flag1,...]">;
defm redefine_symbol
: Eq<"redefine-sym", "Change the name of a symbol old to new">,
@@ -82,8 +83,8 @@ defm set_section_alignment
defm set_section_flags
: Eq<"set-section-flags",
"Set section flags for a given section. Flags supported for GNU "
- "compatibility: alloc, load, noload, readonly, debug, code, data, "
- "rom, share, contents, merge, strings.">,
+ "compatibility: alloc, load, noload, readonly, exclude, debug, code, "
+ "data, rom, share, contents, merge, strings.">,
MetaVarName<"section=flag1[,flag2,...]">;
def S : Flag<["-"], "S">,
@@ -214,3 +215,7 @@ defm add_symbol
"compatibility: debug, constructor, warning, indirect, synthetic, "
"unique-object, before.">,
MetaVarName<"name=[section:]value[,flags]">;
+
+defm update_section
+ : Eq<"update-section", "Add section <name> with contents from a file <file>.">,
+ MetaVarName<"name=file">;
diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp
index 09a900182d24..32fdd1a4d5c3 100644
--- a/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -31,6 +31,159 @@ using namespace llvm::objdump;
using namespace llvm::object;
using namespace llvm::Win64EH;
+namespace {
+template <typename T> struct EnumEntry {
+ T Value;
+ StringRef Name;
+};
+
+class COFFDumper {
+public:
+ explicit COFFDumper(const llvm::object::COFFObjectFile &Obj) : Obj(Obj) {
+ Is64 = !Obj.getPE32Header();
+ }
+
+ template <class PEHeader> void printPEHeader(const PEHeader &Hdr) const;
+
+private:
+ template <typename T> FormattedNumber formatAddr(T V) const {
+ return format_hex_no_prefix(V, Is64 ? 16 : 8);
+ }
+
+ uint32_t getBaseOfData(const void *Hdr) const {
+ return Is64 ? 0 : static_cast<const pe32_header *>(Hdr)->BaseOfData;
+ }
+
+ const llvm::object::COFFObjectFile &Obj;
+ bool Is64;
+};
+} // namespace
+
+constexpr EnumEntry<uint16_t> PEHeaderMagic[] = {
+ {uint16_t(COFF::PE32Header::PE32), "PE32"},
+ {uint16_t(COFF::PE32Header::PE32_PLUS), "PE32+"},
+};
+
+constexpr EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
+ {COFF::IMAGE_SUBSYSTEM_UNKNOWN, "unspecified"},
+ {COFF::IMAGE_SUBSYSTEM_NATIVE, "NT native"},
+ {COFF::IMAGE_SUBSYSTEM_WINDOWS_GUI, "Windows GUI"},
+ {COFF::IMAGE_SUBSYSTEM_WINDOWS_CUI, "Windows CUI"},
+ {COFF::IMAGE_SUBSYSTEM_POSIX_CUI, "POSIX CUI"},
+ {COFF::IMAGE_SUBSYSTEM_WINDOWS_CE_GUI, "Wince CUI"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_APPLICATION, "EFI application"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER, "EFI boot service driver"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER, "EFI runtime driver"},
+ {COFF::IMAGE_SUBSYSTEM_EFI_ROM, "SAL runtime driver"},
+ {COFF::IMAGE_SUBSYSTEM_XBOX, "XBOX"},
+};
+
+template <typename T, typename TEnum>
+static void printOptionalEnumName(T Value,
+ ArrayRef<EnumEntry<TEnum>> EnumValues) {
+ for (const EnumEntry<TEnum> &I : EnumValues)
+ if (I.Value == Value) {
+ outs() << "\t(" << I.Name << ')';
+ return;
+ }
+}
+
+template <class PEHeader>
+void COFFDumper::printPEHeader(const PEHeader &Hdr) const {
+ auto print = [](const char *K, auto V, const char *Fmt = "%d\n") {
+ outs() << format("%-23s ", K) << format(Fmt, V);
+ };
+ auto printU16 = [&](const char *K, support::ulittle16_t V,
+ const char *Fmt = "%d\n") { print(K, uint16_t(V), Fmt); };
+ auto printU32 = [&](const char *K, support::ulittle32_t V,
+ const char *Fmt = "%d\n") { print(K, uint32_t(V), Fmt); };
+ auto printAddr = [=](const char *K, uint64_t V) {
+ outs() << format("%-23s ", K) << formatAddr(V) << '\n';
+ };
+
+ printU16("Magic", Hdr.Magic, "%04x");
+ printOptionalEnumName(Hdr.Magic, makeArrayRef(PEHeaderMagic));
+ outs() << '\n';
+ print("MajorLinkerVersion", Hdr.MajorLinkerVersion);
+ print("MinorLinkerVersion", Hdr.MinorLinkerVersion);
+ printAddr("SizeOfCode", Hdr.SizeOfCode);
+ printAddr("SizeOfInitializedData", Hdr.SizeOfInitializedData);
+ printAddr("SizeOfUninitializedData", Hdr.SizeOfUninitializedData);
+ printAddr("AddressOfEntryPoint", Hdr.AddressOfEntryPoint);
+ printAddr("BaseOfCode", Hdr.BaseOfCode);
+ if (!Is64)
+ printAddr("BaseOfData", getBaseOfData(&Hdr));
+ printAddr("ImageBase", Hdr.ImageBase);
+ printU32("SectionAlignment", Hdr.SectionAlignment, "%08x\n");
+ printU32("FileAlignment", Hdr.FileAlignment, "%08x\n");
+ printU16("MajorOSystemVersion", Hdr.MajorOperatingSystemVersion);
+ printU16("MinorOSystemVersion", Hdr.MinorOperatingSystemVersion);
+ printU16("MajorImageVersion", Hdr.MajorImageVersion);
+ printU16("MinorImageVersion", Hdr.MinorImageVersion);
+ printU16("MajorSubsystemVersion", Hdr.MajorSubsystemVersion);
+ printU16("MinorSubsystemVersion", Hdr.MinorSubsystemVersion);
+ printU32("Win32Version", Hdr.Win32VersionValue, "%08x\n");
+ printU32("SizeOfImage", Hdr.SizeOfImage, "%08x\n");
+ printU32("SizeOfHeaders", Hdr.SizeOfHeaders, "%08x\n");
+ printU32("CheckSum", Hdr.CheckSum, "%08x\n");
+ printU16("Subsystem", Hdr.Subsystem, "%08x");
+ printOptionalEnumName(Hdr.Subsystem, makeArrayRef(PEWindowsSubsystem));
+ outs() << '\n';
+
+ printU16("DllCharacteristics", Hdr.DLLCharacteristics, "%08x\n");
+#define FLAG(Name) \
+ if (Hdr.DLLCharacteristics & COFF::IMAGE_DLL_CHARACTERISTICS_##Name) \
+ outs() << "\t\t\t\t\t" << #Name << '\n';
+ FLAG(HIGH_ENTROPY_VA);
+ FLAG(DYNAMIC_BASE);
+ FLAG(FORCE_INTEGRITY);
+ FLAG(NX_COMPAT);
+ FLAG(NO_ISOLATION);
+ FLAG(NO_SEH);
+ FLAG(NO_BIND);
+ FLAG(APPCONTAINER);
+ FLAG(WDM_DRIVER);
+ FLAG(GUARD_CF);
+ FLAG(TERMINAL_SERVER_AWARE);
+#undef FLAG
+
+ printAddr("SizeOfStackReserve", Hdr.SizeOfStackReserve);
+ printAddr("SizeOfStackCommit", Hdr.SizeOfStackCommit);
+ printAddr("SizeOfHeapReserve", Hdr.SizeOfHeapReserve);
+ printAddr("SizeOfHeapCommit", Hdr.SizeOfHeapCommit);
+ printU32("LoaderFlags", Hdr.LoaderFlags, "%08x\n");
+ printU32("NumberOfRvaAndSizes", Hdr.NumberOfRvaAndSize, "%08x\n");
+
+ static const char *DirName[COFF::NUM_DATA_DIRECTORIES + 1] = {
+ "Export Directory [.edata (or where ever we found it)]",
+ "Import Directory [parts of .idata]",
+ "Resource Directory [.rsrc]",
+ "Exception Directory [.pdata]",
+ "Security Directory",
+ "Base Relocation Directory [.reloc]",
+ "Debug Directory",
+ "Description Directory",
+ "Special Directory",
+ "Thread Storage Directory [.tls]",
+ "Load Configuration Directory",
+ "Bound Import Directory",
+ "Import Address Table Directory",
+ "Delay Import Directory",
+ "CLR Runtime Header",
+ "Reserved",
+ };
+ outs() << "\nThe Data Directory\n";
+ for (uint32_t I = 0; I != array_lengthof(DirName); ++I) {
+ uint32_t Addr = 0, Size = 0;
+ if (const data_directory *Data = Obj.getDataDirectory(I)) {
+ Addr = Data->RelativeVirtualAddress;
+ Size = Data->Size;
+ }
+ outs() << format("Entry %x ", I) << formatAddr(Addr)
+ << format(" %08x %s\n", uint32_t(Size), DirName[I]);
+ }
+}
+
// Returns the name of the unwind code.
static StringRef getUnwindCodeTypeName(uint8_t Code) {
switch(Code) {
@@ -278,10 +431,7 @@ static void printTLSDirectory(const COFFObjectFile *Obj) {
return;
const data_directory *DataDir = Obj->getDataDirectory(COFF::TLS_TABLE);
- if (!DataDir)
- reportError("missing data dir for TLS table", Obj->getFileName());
-
- if (DataDir->RelativeVirtualAddress == 0)
+ if (!DataDir || DataDir->RelativeVirtualAddress == 0)
return;
uintptr_t IntPtr = 0;
@@ -625,12 +775,47 @@ void objdump::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
}
}
-void objdump::printCOFFFileHeader(const object::ObjectFile *Obj) {
- const COFFObjectFile *file = dyn_cast<const COFFObjectFile>(Obj);
- printTLSDirectory(file);
- printLoadConfiguration(file);
- printImportTables(file);
- printExportTable(file);
+void objdump::printCOFFFileHeader(const COFFObjectFile &Obj) {
+ COFFDumper CD(Obj);
+ const uint16_t Cha = Obj.getCharacteristics();
+ outs() << "Characteristics 0x" << Twine::utohexstr(Cha) << '\n';
+#define FLAG(F, Name) \
+ if (Cha & F) \
+ outs() << '\t' << Name << '\n';
+ FLAG(COFF::IMAGE_FILE_RELOCS_STRIPPED, "relocations stripped");
+ FLAG(COFF::IMAGE_FILE_EXECUTABLE_IMAGE, "executable");
+ FLAG(COFF::IMAGE_FILE_LINE_NUMS_STRIPPED, "line numbers stripped");
+ FLAG(COFF::IMAGE_FILE_LOCAL_SYMS_STRIPPED, "symbols stripped");
+ FLAG(COFF::IMAGE_FILE_LARGE_ADDRESS_AWARE, "large address aware");
+ FLAG(COFF::IMAGE_FILE_BYTES_REVERSED_LO, "little endian");
+ FLAG(COFF::IMAGE_FILE_32BIT_MACHINE, "32 bit words");
+ FLAG(COFF::IMAGE_FILE_DEBUG_STRIPPED, "debugging information removed");
+ FLAG(COFF::IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP,
+ "copy to swap file if on removable media");
+ FLAG(COFF::IMAGE_FILE_NET_RUN_FROM_SWAP,
+ "copy to swap file if on network media");
+ FLAG(COFF::IMAGE_FILE_SYSTEM, "system file");
+ FLAG(COFF::IMAGE_FILE_DLL, "DLL");
+ FLAG(COFF::IMAGE_FILE_UP_SYSTEM_ONLY, "run only on uniprocessor machine");
+ FLAG(COFF::IMAGE_FILE_BYTES_REVERSED_HI, "big endian");
+#undef FLAG
+
+ // TODO Support PE_IMAGE_DEBUG_TYPE_REPRO.
+ // Since ctime(3) returns a 26 character string of the form:
+ // "Sun Sep 16 01:03:52 1973\n\0"
+ // just print 24 characters.
+ const time_t Timestamp = Obj.getTimeDateStamp();
+ outs() << format("\nTime/Date %.24s\n", ctime(&Timestamp));
+
+ if (const pe32_header *Hdr = Obj.getPE32Header())
+ CD.printPEHeader<pe32_header>(*Hdr);
+ else if (const pe32plus_header *Hdr = Obj.getPE32PlusHeader())
+ CD.printPEHeader<pe32plus_header>(*Hdr);
+
+ printTLSDirectory(&Obj);
+ printLoadConfiguration(&Obj);
+ printImportTables(&Obj);
+ printExportTable(&Obj);
}
void objdump::printCOFFSymbolTable(const object::COFFImportFile *i) {
diff --git a/llvm/tools/llvm-objdump/COFFDump.h b/llvm/tools/llvm-objdump/COFFDump.h
index 21f97bdeb83c..f933f79523a0 100644
--- a/llvm/tools/llvm-objdump/COFFDump.h
+++ b/llvm/tools/llvm-objdump/COFFDump.h
@@ -28,7 +28,7 @@ Error getCOFFRelocationValueString(const object::COFFObjectFile *Obj,
llvm::SmallVectorImpl<char> &Result);
void printCOFFUnwindInfo(const object::COFFObjectFile *O);
-void printCOFFFileHeader(const object::ObjectFile *O);
+void printCOFFFileHeader(const object::COFFObjectFile &Obj);
void printCOFFSymbolTable(const object::COFFImportFile *I);
void printCOFFSymbolTable(const object::COFFObjectFile *O);
} // namespace objdump
diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp
index da7415834c63..98e71497d022 100644
--- a/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -145,7 +145,7 @@ static uint64_t getSectionLMA(const ELFFile<ELFT> &Obj,
const object::ELFSectionRef &Sec) {
auto PhdrRangeOrErr = Obj.program_headers();
if (!PhdrRangeOrErr)
- report_fatal_error(toString(PhdrRangeOrErr.takeError()));
+ report_fatal_error(Twine(toString(PhdrRangeOrErr.takeError())));
// Search for a PT_LOAD segment containing the requested section. Use this
// segment's p_addr to calculate the section's LMA.
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 7c1fdf03542f..b0cf1f775ced 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -33,6 +33,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Option/ArgList.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
@@ -10053,6 +10053,10 @@ static void PrintLinkEditDataCommand(MachO::linkedit_data_command ld,
outs() << " cmd LC_DYLIB_CODE_SIGN_DRS\n";
else if (ld.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT)
outs() << " cmd LC_LINKER_OPTIMIZATION_HINT\n";
+ else if (ld.cmd == MachO::LC_DYLD_EXPORTS_TRIE)
+ outs() << " cmd LC_DYLD_EXPORTS_TRIE\n";
+ else if (ld.cmd == MachO::LC_DYLD_CHAINED_FIXUPS)
+ outs() << " cmd LC_DYLD_CHAINED_FIXUPS\n";
else
outs() << " cmd " << ld.cmd << " (?)\n";
outs() << " cmdsize " << ld.cmdsize;
@@ -10196,7 +10200,9 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
Command.C.cmd == MachO::LC_FUNCTION_STARTS ||
Command.C.cmd == MachO::LC_DATA_IN_CODE ||
Command.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS ||
- Command.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) {
+ Command.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT ||
+ Command.C.cmd == MachO::LC_DYLD_EXPORTS_TRIE ||
+ Command.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS) {
MachO::linkedit_data_command Ld =
Obj->getLinkeditDataLoadCommand(Command);
PrintLinkEditDataCommand(Ld, Buf.size());
diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td
index 1b19733c65d0..9f27a6cdf163 100644
--- a/llvm/tools/llvm-objdump/ObjdumpOpts.td
+++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td
@@ -1,5 +1,12 @@
include "llvm/Option/OptParser.td"
+multiclass Eq<string name, string help> {
+ def NAME : Separate<["--"], name>;
+ def NAME #_eq : Joined<["--"], name #"=">,
+ Alias<!cast<Separate>(NAME)>,
+ HelpText<help>;
+}
+
def help : Flag<["--"], "help">,
HelpText<"Display available options (--help-hidden for more)">;
@@ -16,7 +23,8 @@ def adjust_vma_EQ : Joined<["--"], "adjust-vma=">,
HelpText<"Increase the displayed address by the specified offset">;
def all_headers : Flag<["--"], "all-headers">,
- HelpText<"Display all available header information">;
+ HelpText<"Display all available header information, "
+ "relocation entries and the symbol table">;
def : Flag<["-"], "x">, Alias<all_headers>, HelpText<"Alias for --all-headers">;
def arch_name_EQ : Joined<["--"], "arch-name=">,
@@ -32,11 +40,11 @@ def demangle : Flag<["--"], "demangle">, HelpText<"Demangle symbol names">;
def : Flag<["-"], "C">, Alias<demangle>, HelpText<"Alias for --demangle">;
def disassemble : Flag<["--"], "disassemble">,
- HelpText<"Display assembler mnemonics for the machine instructions">;
+ HelpText<"Disassemble all executable sections found in the input files">;
def : Flag<["-"], "d">, Alias<disassemble>, HelpText<"Alias for --disassemble">;
def disassemble_all : Flag<["--"], "disassemble-all">,
- HelpText<"Display assembler mnemonics for the machine instructions">;
+ HelpText<"Disassemble all sections found in the input files">;
def : Flag<["-"], "D">, Alias<disassemble_all>,
HelpText<"Alias for --disassemble-all">;
@@ -66,10 +74,12 @@ def : Flag<["-"], "R">, Alias<dynamic_reloc>,
HelpText<"Alias for --dynamic-reloc">;
def dwarf_EQ : Joined<["--"], "dwarf=">,
- HelpText<"Dump of dwarf debug sections">, Values<"frames">;
+ HelpText<"Dump the specified DWARF debug sections. The "
+ "only supported value is 'frames'">,
+ Values<"frames">;
def fault_map_section : Flag<["--"], "fault-map-section">,
- HelpText<"Display contents of faultmap section">;
+ HelpText<"Display the content of the fault map section">;
def file_headers : Flag<["--"], "file-headers">,
HelpText<"Display the contents of the overall file header">;
@@ -82,9 +92,10 @@ def : Flag<["-"], "s">, Alias<full_contents>,
HelpText<"Alias for --full-contents">;
def line_numbers : Flag<["--"], "line-numbers">,
- HelpText<"Display source line numbers with "
- "disassembly. Implies disassemble object">;
-def : Flag<["-"], "l">, Alias<line_numbers>,
+ HelpText<"When disassembling, display source line numbers. "
+ "Implies --disassemble">;
+def : Flag<["-"], "l">,
+ Alias<line_numbers>,
HelpText<"Alias for --line-numbers">;
def macho : Flag<["--"], "macho">,
@@ -104,7 +115,7 @@ def no_show_raw_insn : Flag<["--"], "no-show-raw-insn">,
"do not print the instruction bytes.">;
def no_leading_addr : Flag<["--"], "no-leading-addr">,
- HelpText<"Print no leading address">;
+ HelpText<"When disassembling, do not print leading addresses">;
def raw_clang_ast : Flag<["--"], "raw-clang-ast">,
HelpText<"Dump the raw binary contents of the clang AST section">;
@@ -143,15 +154,18 @@ def show_lma : Flag<["--"], "show-lma">,
HelpText<"Display LMA column when dumping ELF section headers">;
def source : Flag<["--"], "source">,
- HelpText<"Display source inlined with disassembly. Implies disassemble object">;
+ HelpText<"When disassembling, display source interleaved with the "
+ "disassembly. Implies --disassemble">;
def : Flag<["-"], "S">, Alias<source>, HelpText<"Alias for --source">;
def start_address_EQ : Joined<["--"], "start-address=">,
MetaVarName<"address">,
- HelpText<"Disassemble beginning at address">;
+ HelpText<"Set the start address for disassembling, "
+ "printing relocations and printing symbols">;
def stop_address_EQ : Joined<["--"], "stop-address=">,
MetaVarName<"address">,
- HelpText<"Stop disassembly at address">;
+ HelpText<"Set the stop address for disassembling, "
+ "printing relocations and printing symbols">;
def syms : Flag<["--"], "syms">,
HelpText<"Display the symbol table">;
@@ -180,19 +194,19 @@ def wide : Flag<["--"], "wide">,
HelpText<"Ignored for compatibility with GNU objdump">;
def : Flag<["-"], "w">, Alias<wide>;
-def prefix : Separate<["--"], "prefix">,
- HelpText<"Add prefix to absolute paths">;
-
-def prefix_strip : Separate<["--"], "prefix-strip">,
- HelpText<"Strip out initial directories from absolute "
- "paths. No effect without --prefix">;
+defm prefix : Eq<"prefix", "Add prefix to absolute paths">,
+ MetaVarName<"prefix">;
+defm prefix_strip
+ : Eq<"prefix-strip", "Strip out initial directories from absolute "
+ "paths. No effect without --prefix">,
+ MetaVarName<"prefix">;
def debug_vars_EQ : Joined<["--"], "debug-vars=">,
- Values<"unicode,ascii">;
-def : Flag<["--"], "debug-vars">,
HelpText<"Print the locations (in registers or memory) of "
- "source-level variables alongside disassembly">,
- Alias<debug_vars_EQ>, AliasArgs<["unicode"]>;
+ "source-level variables alongside disassembly. "
+ "Supported formats: ascii, unicode (default)">,
+ Values<"unicode,ascii">;
+def : Flag<["--"], "debug-vars">, Alias<debug_vars_EQ>, AliasArgs<["unicode"]>;
def debug_vars_indent_EQ : Joined<["--"], "debug-vars-indent=">,
HelpText<"Distance to indent the source-level variable display, "
diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp
index c4cc5fe7e21c..b8fb2ed3d063 100644
--- a/llvm/tools/llvm-objdump/XCOFFDump.cpp
+++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp
@@ -58,6 +58,24 @@ objdump::getXCOFFSymbolCsectSMC(const XCOFFObjectFile *Obj,
return CsectAuxEntOrErr.get().getStorageMappingClass();
}
+Optional<object::SymbolRef>
+objdump::getXCOFFSymbolContainingSymbolRef(const XCOFFObjectFile *Obj,
+ const SymbolRef &Sym) {
+
+ const XCOFFSymbolRef SymRef = Obj->toSymbolRef(Sym.getRawDataRefImpl());
+ if (!SymRef.isCsectSymbol())
+ return None;
+
+ Expected<XCOFFCsectAuxRef> CsectAuxEntOrErr = SymRef.getXCOFFCsectAuxRef();
+ if (!CsectAuxEntOrErr || !CsectAuxEntOrErr.get().isLabel())
+ return None;
+ uint32_t Idx =
+ static_cast<uint32_t>(CsectAuxEntOrErr.get().getSectionOrLength());
+ DataRefImpl DRI;
+ DRI.p = Obj->getSymbolByIndex(Idx);
+ return SymbolRef(DRI, Obj);
+}
+
bool objdump::isLabel(const XCOFFObjectFile *Obj, const SymbolRef &Sym) {
const XCOFFSymbolRef SymRef = Obj->toSymbolRef(Sym.getRawDataRefImpl());
diff --git a/llvm/tools/llvm-objdump/XCOFFDump.h b/llvm/tools/llvm-objdump/XCOFFDump.h
index dbf520021594..6796f00aef6f 100644
--- a/llvm/tools/llvm-objdump/XCOFFDump.h
+++ b/llvm/tools/llvm-objdump/XCOFFDump.h
@@ -20,6 +20,10 @@ Optional<XCOFF::StorageMappingClass>
getXCOFFSymbolCsectSMC(const object::XCOFFObjectFile *Obj,
const object::SymbolRef &Sym);
+Optional<object::SymbolRef>
+getXCOFFSymbolContainingSymbolRef(const object::XCOFFObjectFile *Obj,
+ const object::SymbolRef &Sym);
+
bool isLabel(const object::XCOFFObjectFile *Obj, const object::SymbolRef &Sym);
std::string getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo,
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 48ae92f734c7..6f6f543f2f47 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -47,6 +47,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
@@ -71,7 +72,6 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/StringSaver.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -297,16 +297,15 @@ void objdump::reportWarning(const Twine &Message, StringRef File) {
<< "'" << File << "': " << Message << "\n";
}
-LLVM_ATTRIBUTE_NORETURN void objdump::reportError(StringRef File,
- const Twine &Message) {
+[[noreturn]] void objdump::reportError(StringRef File, const Twine &Message) {
outs().flush();
WithColor::error(errs(), ToolName) << "'" << File << "': " << Message << "\n";
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void objdump::reportError(Error E, StringRef FileName,
- StringRef ArchiveName,
- StringRef ArchitectureName) {
+[[noreturn]] void objdump::reportError(Error E, StringRef FileName,
+ StringRef ArchiveName,
+ StringRef ArchitectureName) {
assert(E);
outs().flush();
WithColor::error(errs(), ToolName);
@@ -325,7 +324,7 @@ static void reportCmdLineWarning(const Twine &Message) {
WithColor::warning(errs(), ToolName) << Message << "\n";
}
-LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
+[[noreturn]] static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
@@ -1286,6 +1285,10 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (shouldAdjustVA(Section))
VMAAdjustment = AdjustVMA;
+ // In executable and shared objects, r_offset holds a virtual address.
+ // Subtract SectionAddr from the r_offset field of a relocation to get
+ // the section offset.
+ uint64_t RelAdjustment = Obj->isRelocatableObject() ? 0 : SectionAddr;
uint64_t Size;
uint64_t Index;
bool PrintedSection = false;
@@ -1432,7 +1435,8 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
// For --reloc: print zero blocks patched by relocations, so that
// relocations can be shown in the dump.
if (RelCur != RelEnd)
- MaxOffset = RelCur->getOffset() - Index;
+ MaxOffset = std::min(RelCur->getOffset() - RelAdjustment - Index,
+ MaxOffset);
if (size_t N =
countSkippableZeroBytes(Bytes.slice(Index, MaxOffset))) {
@@ -1481,7 +1485,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (!PrintTarget)
if (Optional<uint64_t> MaybeTarget =
MIA->evaluateMemoryOperandAddress(
- Inst, SectionAddr + Index, Size)) {
+ Inst, STI, SectionAddr + Index, Size)) {
Target = *MaybeTarget;
PrintTarget = true;
// Do not print real address when symbolizing.
@@ -1581,7 +1585,7 @@ static void disassembleObject(const Target *TheTarget, const ObjectFile *Obj,
if (Obj->getArch() != Triple::hexagon) {
// Print relocation for instruction and data.
while (RelCur != RelEnd) {
- uint64_t Offset = RelCur->getOffset();
+ uint64_t Offset = RelCur->getOffset() - RelAdjustment;
// If this relocation is hidden, skip it.
if (getHidden(*RelCur) || SectionAddr + Offset < StartAddress) {
++RelCur;
@@ -1770,7 +1774,9 @@ void objdump::printDynamicRelocations(const ObjectFile *Obj) {
return;
const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
- if (!Elf || Elf->getEType() != ELF::ET_DYN) {
+ if (!Elf || !any_of(Elf->sections(), [](const ELFSectionRef Sec) {
+ return Sec.getType() == ELF::SHT_DYNAMIC;
+ })) {
reportError(Obj->getFileName(), "not a dynamic object");
return;
}
@@ -1779,7 +1785,12 @@ void objdump::printDynamicRelocations(const ObjectFile *Obj) {
if (DynRelSec.empty())
return;
- outs() << "DYNAMIC RELOCATION RECORDS\n";
+ outs() << "\nDYNAMIC RELOCATION RECORDS\n";
+ const uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
+ const uint32_t TypePadding = 24;
+ outs() << left_justify("OFFSET", OffsetPadding) << ' '
+ << left_justify("TYPE", TypePadding) << " VALUE\n";
+
StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
for (const SectionRef &Section : DynRelSec)
for (const RelocationRef &Reloc : Section.relocations()) {
@@ -1789,8 +1800,8 @@ void objdump::printDynamicRelocations(const ObjectFile *Obj) {
Reloc.getTypeName(RelocName);
if (Error E = getRelocationValueString(Reloc, ValueStr))
reportError(std::move(E), Obj->getFileName());
- outs() << format(Fmt.data(), Address) << " " << RelocName << " "
- << ValueStr << "\n";
+ outs() << format(Fmt.data(), Address) << ' '
+ << left_justify(RelocName, TypePadding) << ' ' << ValueStr << '\n';
}
}
@@ -1922,7 +1933,8 @@ void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
if (!DumpDynamic) {
outs() << "\nSYMBOL TABLE:\n";
for (auto I = O->symbol_begin(); I != O->symbol_end(); ++I)
- printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
+ printSymbol(O, *I, {}, FileName, ArchiveName, ArchitectureName,
+ DumpDynamic);
return;
}
@@ -1935,12 +1947,21 @@ void objdump::printSymbolTable(const ObjectFile *O, StringRef ArchiveName,
}
const ELFObjectFileBase *ELF = cast<const ELFObjectFileBase>(O);
- for (auto I = ELF->getDynamicSymbolIterators().begin();
- I != ELF->getDynamicSymbolIterators().end(); ++I)
- printSymbol(O, *I, FileName, ArchiveName, ArchitectureName, DumpDynamic);
+ auto Symbols = ELF->getDynamicSymbolIterators();
+ Expected<std::vector<VersionEntry>> SymbolVersionsOrErr =
+ ELF->readDynsymVersions();
+ if (!SymbolVersionsOrErr) {
+ reportWarning(toString(SymbolVersionsOrErr.takeError()), FileName);
+ SymbolVersionsOrErr = std::vector<VersionEntry>();
+ (void)!SymbolVersionsOrErr;
+ }
+ for (auto &Sym : Symbols)
+ printSymbol(O, Sym, *SymbolVersionsOrErr, FileName, ArchiveName,
+ ArchitectureName, DumpDynamic);
}
void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
+ ArrayRef<VersionEntry> SymbolVersions,
StringRef FileName, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic) {
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(O);
@@ -2029,22 +2050,66 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
} else if (Common) {
outs() << "*COM*";
} else if (Section == O->section_end()) {
- outs() << "*UND*";
+ if (O->isXCOFF()) {
+ XCOFFSymbolRef XCOFFSym = dyn_cast<const XCOFFObjectFile>(O)->toSymbolRef(
+ Symbol.getRawDataRefImpl());
+ if (XCOFF::N_DEBUG == XCOFFSym.getSectionNumber())
+ outs() << "*DEBUG*";
+ else
+ outs() << "*UND*";
+ } else
+ outs() << "*UND*";
} else {
StringRef SegmentName = getSegmentName(MachO, *Section);
if (!SegmentName.empty())
outs() << SegmentName << ",";
StringRef SectionName = unwrapOrError(Section->getName(), FileName);
outs() << SectionName;
- }
+ if (O->isXCOFF()) {
+ Optional<SymbolRef> SymRef = getXCOFFSymbolContainingSymbolRef(
+ dyn_cast<const XCOFFObjectFile>(O), Symbol);
+ if (SymRef) {
- if (Common || O->isELF()) {
- uint64_t Val =
- Common ? Symbol.getAlignment() : ELFSymbolRef(Symbol).getSize();
- outs() << '\t' << format(Fmt, Val);
+ Expected<StringRef> NameOrErr = SymRef.getValue().getName();
+
+ if (NameOrErr) {
+ outs() << " (csect:";
+ std::string SymName(NameOrErr.get());
+
+ if (Demangle)
+ SymName = demangle(SymName);
+
+ if (SymbolDescription)
+ SymName = getXCOFFSymbolDescription(
+ createSymbolInfo(O, SymRef.getValue()), SymName);
+
+ outs() << ' ' << SymName;
+ outs() << ") ";
+ } else
+ reportWarning(toString(NameOrErr.takeError()), FileName);
+ }
+ }
}
+ if (Common)
+ outs() << '\t' << format(Fmt, static_cast<uint64_t>(Symbol.getAlignment()));
+ else if (O->isXCOFF())
+ outs() << '\t'
+ << format(Fmt, dyn_cast<const XCOFFObjectFile>(O)->getSymbolSize(
+ Symbol.getRawDataRefImpl()));
+ else if (O->isELF())
+ outs() << '\t' << format(Fmt, ELFSymbolRef(Symbol).getSize());
+
if (O->isELF()) {
+ if (!SymbolVersions.empty()) {
+ const VersionEntry &Ver =
+ SymbolVersions[Symbol.getRawDataRefImpl().d.b - 1];
+ std::string Str;
+ if (!Ver.Name.empty())
+ Str = Ver.IsVerDef ? ' ' + Ver.Name : '(' + Ver.Name + ')';
+ outs() << ' ' << left_justify(Str, 12);
+ }
+
uint8_t Other = ELFSymbolRef(Symbol).getOther();
switch (Other) {
case ELF::STV_DEFAULT:
@@ -2066,10 +2131,14 @@ void objdump::printSymbol(const ObjectFile *O, const SymbolRef &Symbol,
outs() << " .hidden";
}
+ std::string SymName(Name);
if (Demangle)
- outs() << ' ' << demangle(std::string(Name)) << '\n';
- else
- outs() << ' ' << Name << '\n';
+ SymName = demangle(SymName);
+
+ if (O->isXCOFF() && SymbolDescription)
+ SymName = getXCOFFSymbolDescription(createSymbolInfo(O, Symbol), SymName);
+
+ outs() << ' ' << SymName << '\n';
}
static void printUnwindInfo(const ObjectFile *O) {
@@ -2176,7 +2245,7 @@ static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
return;
}
if (O->isCOFF())
- return printCOFFFileHeader(O);
+ return printCOFFFileHeader(cast<object::COFFObjectFile>(*O));
if (O->isWasm())
return printWasmFileHeader(O);
if (O->isMachO()) {
@@ -2431,6 +2500,11 @@ static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
}
}
+static void invalidArgValue(const opt::Arg *A) {
+ reportCmdLineError("'" + StringRef(A->getValue()) +
+ "' is not a valid value for '" + A->getSpelling() + "'");
+}
+
static std::vector<std::string>
commaSeparatedValues(const llvm::opt::InputArgList &InputArgs, int ID) {
std::vector<std::string> Values;
@@ -2504,8 +2578,11 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ);
DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes);
if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_dwarf_EQ)) {
- DwarfDumpType =
- StringSwitch<DIDumpType>(A->getValue()).Case("frames", DIDT_DebugFrame);
+ DwarfDumpType = StringSwitch<DIDumpType>(A->getValue())
+ .Case("frames", DIDT_DebugFrame)
+ .Default(DIDT_Null);
+ if (DwarfDumpType == DIDT_Null)
+ invalidArgValue(A);
}
DynamicRelocations = InputArgs.hasArg(OBJDUMP_dynamic_reloc);
FaultMapSection = InputArgs.hasArg(OBJDUMP_fault_map_section);
@@ -2542,7 +2619,10 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) {
DbgVariables = StringSwitch<DebugVarsFormat>(A->getValue())
.Case("ascii", DVASCII)
- .Case("unicode", DVUnicode);
+ .Case("unicode", DVUnicode)
+ .Default(DVInvalid);
+ if (DbgVariables == DVInvalid)
+ invalidArgValue(A);
}
parseIntArg(InputArgs, OBJDUMP_debug_vars_indent_EQ, DbgIndent);
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h
index 33fb3f207f8e..864a9920efbe 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -26,15 +26,12 @@ class ELFSectionRef;
class MachOObjectFile;
class MachOUniversalBinary;
class RelocationRef;
+struct VersionEntry;
} // namespace object
namespace objdump {
-enum DebugVarsFormat {
- DVDisabled,
- DVUnicode,
- DVASCII,
-};
+enum DebugVarsFormat { DVDisabled, DVUnicode, DVASCII, DVInvalid };
extern bool ArchiveHeaders;
extern int DbgIndent;
@@ -137,12 +134,13 @@ void printSymbolTable(const object::ObjectFile *O, StringRef ArchiveName,
StringRef ArchitectureName = StringRef(),
bool DumpDynamic = false);
void printSymbol(const object::ObjectFile *O, const object::SymbolRef &Symbol,
+ ArrayRef<object::VersionEntry> SymbolVersions,
StringRef FileName, StringRef ArchiveName,
StringRef ArchitectureName, bool DumpDynamic);
-LLVM_ATTRIBUTE_NORETURN void reportError(StringRef File, const Twine &Message);
-LLVM_ATTRIBUTE_NORETURN void reportError(Error E, StringRef FileName,
- StringRef ArchiveName = "",
- StringRef ArchitectureName = "");
+[[noreturn]] void reportError(StringRef File, const Twine &Message);
+[[noreturn]] void reportError(Error E, StringRef FileName,
+ StringRef ArchiveName = "",
+ StringRef ArchitectureName = "");
void reportWarning(const Twine &Message, StringRef File);
template <typename T, typename... Ts>
diff --git a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
index 3d2490509c03..b631bdf8f2b1 100644
--- a/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/ExplainOutputStyle.cpp
@@ -373,7 +373,7 @@ static void explainDbiModiSubstreamOffset(LinePrinter &P, DbiStream &Dbi,
++Index;
}
- DbiModuleDescriptor &Descriptor = *Prev;
+ const DbiModuleDescriptor &Descriptor = *Prev;
P.formatLine("which contains the descriptor for module {0} ({1}).", Index,
Descriptor.getModuleName());
}
diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.cpp b/llvm/tools/llvm-pdbutil/LinePrinter.cpp
index 280c000bd65f..dd6ca5bf41b1 100644
--- a/llvm/tools/llvm-pdbutil/LinePrinter.cpp
+++ b/llvm/tools/llvm-pdbutil/LinePrinter.cpp
@@ -100,7 +100,7 @@ bool LinePrinter::IsClassExcluded(const ClassLayout &Class) {
}
void LinePrinter::formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
- uint32_t StartOffset) {
+ uint64_t StartOffset) {
NewLine();
OS << Label << " (";
if (!Data.empty()) {
@@ -113,7 +113,7 @@ void LinePrinter::formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
}
void LinePrinter::formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
- uint64_t Base, uint32_t StartOffset) {
+ uint64_t Base, uint64_t StartOffset) {
NewLine();
OS << Label << " (";
if (!Data.empty()) {
@@ -131,7 +131,7 @@ struct Run {
Run() = default;
explicit Run(uint32_t Block) : Block(Block) {}
uint32_t Block = 0;
- uint32_t ByteLen = 0;
+ uint64_t ByteLen = 0;
};
} // namespace
@@ -143,7 +143,7 @@ static std::vector<Run> computeBlockRuns(uint32_t BlockSize,
ArrayRef<support::ulittle32_t> Blocks = Layout.Blocks;
assert(!Blocks.empty());
- uint32_t StreamBytesRemaining = Layout.Length;
+ uint64_t StreamBytesRemaining = Layout.Length;
uint32_t CurrentBlock = Blocks[0];
Runs.emplace_back(CurrentBlock);
while (!Blocks.empty()) {
@@ -153,7 +153,8 @@ static std::vector<Run> computeBlockRuns(uint32_t BlockSize,
Runs.emplace_back(NextBlock);
CurrentRun = &Runs.back();
}
- uint32_t Used = std::min(BlockSize, StreamBytesRemaining);
+ uint64_t Used =
+ std::min(static_cast<uint64_t>(BlockSize), StreamBytesRemaining);
CurrentRun->ByteLen += Used;
StreamBytesRemaining -= Used;
CurrentBlock = NextBlock;
@@ -162,7 +163,7 @@ static std::vector<Run> computeBlockRuns(uint32_t BlockSize,
return Runs;
}
-static std::pair<Run, uint32_t> findRun(uint32_t Offset, ArrayRef<Run> Runs) {
+static std::pair<Run, uint64_t> findRun(uint64_t Offset, ArrayRef<Run> Runs) {
for (const auto &R : Runs) {
if (Offset < R.ByteLen)
return std::make_pair(R, Offset);
@@ -173,8 +174,8 @@ static std::pair<Run, uint32_t> findRun(uint32_t Offset, ArrayRef<Run> Runs) {
void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
uint32_t StreamIdx,
- StringRef StreamPurpose, uint32_t Offset,
- uint32_t Size) {
+ StringRef StreamPurpose, uint64_t Offset,
+ uint64_t Size) {
if (StreamIdx >= File.getNumStreams()) {
formatLine("Stream {0}: Not present", StreamIdx);
return;
@@ -193,7 +194,7 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
return;
}
- uint32_t End =
+ uint64_t End =
(Size == 0) ? S->getLength() : std::min(Offset + Size, S->getLength());
Size = End - Offset;
@@ -222,10 +223,10 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
OS << "\n";
Run FoundRun;
- uint32_t RunOffset;
+ uint64_t RunOffset;
std::tie(FoundRun, RunOffset) = findRun(Substream.Offset, Runs);
assert(FoundRun.ByteLen >= RunOffset);
- uint32_t Len = FoundRun.ByteLen - RunOffset;
+ uint64_t Len = FoundRun.ByteLen - RunOffset;
Len = std::min(Len, Reader.bytesRemaining());
uint64_t Base = FoundRun.Block * File.getBlockSize() + RunOffset;
ArrayRef<uint8_t> Data;
@@ -246,13 +247,14 @@ void LinePrinter::formatMsfStreamData(StringRef Label, PDBFile &File,
void LinePrinter::formatMsfStreamBlocks(
PDBFile &File, const msf::MSFStreamLayout &StreamLayout) {
auto Blocks = makeArrayRef(StreamLayout.Blocks);
- uint32_t L = StreamLayout.Length;
+ uint64_t L = StreamLayout.Length;
while (L > 0) {
NewLine();
assert(!Blocks.empty());
OS << formatv("Block {0} (\n", uint32_t(Blocks.front()));
- uint32_t UsedBytes = std::min(L, File.getBlockSize());
+ uint64_t UsedBytes =
+ std::min(L, static_cast<uint64_t>(File.getBlockSize()));
ArrayRef<uint8_t> BlockData =
cantFail(File.getBlockData(Blocks.front(), File.getBlockSize()));
uint64_t BaseOffset = Blocks.front();
@@ -267,7 +269,7 @@ void LinePrinter::formatMsfStreamBlocks(
}
}
-bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName, uint32_t Size) {
+bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size) {
if (IsItemExcluded(TypeName, IncludeTypeFilters, ExcludeTypeFilters))
return true;
if (Size < opts::pretty::SizeThreshold)
diff --git a/llvm/tools/llvm-pdbutil/LinePrinter.h b/llvm/tools/llvm-pdbutil/LinePrinter.h
index 7ecfae17354f..aa8159c0e094 100644
--- a/llvm/tools/llvm-pdbutil/LinePrinter.h
+++ b/llvm/tools/llvm-pdbutil/LinePrinter.h
@@ -49,13 +49,13 @@ public:
}
void formatBinary(StringRef Label, ArrayRef<uint8_t> Data,
- uint32_t StartOffset);
+ uint64_t StartOffset);
void formatBinary(StringRef Label, ArrayRef<uint8_t> Data, uint64_t BaseAddr,
- uint32_t StartOffset);
+ uint64_t StartOffset);
void formatMsfStreamData(StringRef Label, PDBFile &File, uint32_t StreamIdx,
- StringRef StreamPurpose, uint32_t Offset,
- uint32_t Size);
+ StringRef StreamPurpose, uint64_t Offset,
+ uint64_t Size);
void formatMsfStreamData(StringRef Label, PDBFile &File,
const msf::MSFStreamLayout &Stream,
BinarySubstreamRef Substream);
@@ -66,7 +66,7 @@ public:
int getIndentLevel() const { return CurrentIndent; }
bool IsClassExcluded(const ClassLayout &Class);
- bool IsTypeExcluded(llvm::StringRef TypeName, uint32_t Size);
+ bool IsTypeExcluded(llvm::StringRef TypeName, uint64_t Size);
bool IsSymbolExcluded(llvm::StringRef SymbolName);
bool IsCompilandExcluded(llvm::StringRef CompilandName);
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 66d70120ac9b..fd67cac3cdd2 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -521,9 +521,9 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
// Find hot/warm functions in sample profile which is cold in instr profile
// and adjust the profiles of those functions in the instr profile.
for (const auto &PD : Reader->getProfiles()) {
- StringRef FName = PD.getKey();
- const sampleprof::FunctionSamples &FS = PD.getValue();
- auto It = InstrProfileMap.find(FName);
+ auto &FContext = PD.first;
+ const sampleprof::FunctionSamples &FS = PD.second;
+ auto It = InstrProfileMap.find(FContext.toString());
if (FS.getHeadSamples() > ColdSampleThreshold &&
It != InstrProfileMap.end() &&
It->second.MaxCount <= ColdInstrThreshold &&
@@ -690,7 +690,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
bool SampleMergeColdContext, bool SampleTrimColdContext,
bool SampleColdContextFrameDepth, FailureMode FailMode) {
using namespace sampleprof;
- StringMap<FunctionSamples> ProfileMap;
+ SampleProfileMap ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
LLVMContext Context;
sampleprof::ProfileSymbolList WriterList;
@@ -716,7 +716,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
continue;
}
- StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
+ SampleProfileMap &Profiles = Reader->getProfiles();
if (ProfileIsProbeBased.hasValue() &&
ProfileIsProbeBased != FunctionSamples::ProfileIsProbeBased)
exitWithError(
@@ -725,19 +725,19 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
if (ProfileIsCS.hasValue() && ProfileIsCS != FunctionSamples::ProfileIsCS)
exitWithError("cannot merge CS profile with non-CS profile");
ProfileIsCS = FunctionSamples::ProfileIsCS;
- for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
- E = Profiles.end();
+ for (SampleProfileMap::iterator I = Profiles.begin(), E = Profiles.end();
I != E; ++I) {
sampleprof_error Result = sampleprof_error::success;
FunctionSamples Remapped =
Remapper ? remapSamples(I->second, *Remapper, Result)
: FunctionSamples();
FunctionSamples &Samples = Remapper ? Remapped : I->second;
- StringRef FName = Samples.getNameWithContext();
- MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
+ SampleContext FContext = Samples.getContext();
+ MergeResult(Result, ProfileMap[FContext].merge(Samples, Input.Weight));
if (Result != sampleprof_error::success) {
std::error_code EC = make_error_code(Result);
- handleMergeWriterError(errorCodeToError(EC), Input.Filename, FName);
+ handleMergeWriterError(errorCodeToError(EC), Input.Filename,
+ FContext.toString());
}
}
@@ -759,7 +759,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
SampleProfColdThreshold, SampleTrimColdContext,
- SampleMergeColdContext, SampleColdContextFrameDepth);
+ SampleMergeColdContext, SampleColdContextFrameDepth, false);
}
auto WriterOrErr =
@@ -836,7 +836,7 @@ static void parseInputFilenamesFile(MemoryBuffer *Buffer,
if (SanitizedEntry.startswith("#"))
continue;
// If there's no comma, it's an unweighted profile.
- else if (SanitizedEntry.find(',') == StringRef::npos)
+ else if (!SanitizedEntry.contains(','))
addWeightedInput(WFV, {std::string(SanitizedEntry), 1});
else
addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
@@ -1022,8 +1022,8 @@ static void overlapInstrProfile(const std::string &BaseFilename,
namespace {
struct SampleOverlapStats {
- StringRef BaseName;
- StringRef TestName;
+ SampleContext BaseName;
+ SampleContext TestName;
// Number of overlap units
uint64_t OverlapCount;
// Total samples of overlap units
@@ -1226,6 +1226,9 @@ public:
/// Load profiles specified by BaseFilename and TestFilename.
std::error_code loadProfiles();
+ using FuncSampleStatsMap =
+ std::unordered_map<SampleContext, FuncSampleStats, SampleContext::Hash>;
+
private:
SampleOverlapStats ProfOverlap;
SampleOverlapStats HotFuncOverlap;
@@ -1236,8 +1239,8 @@ private:
std::unique_ptr<sampleprof::SampleProfileReader> TestReader;
// BaseStats and TestStats hold FuncSampleStats for each function, with
// function name as the key.
- StringMap<FuncSampleStats> BaseStats;
- StringMap<FuncSampleStats> TestStats;
+ FuncSampleStatsMap BaseStats;
+ FuncSampleStatsMap TestStats;
// Low similarity threshold in floating point number
double LowSimilarityThreshold;
// Block samples above BaseHotThreshold or TestHotThreshold are considered hot
@@ -1276,8 +1279,8 @@ private:
void updateHotBlockOverlap(uint64_t BaseSample, uint64_t TestSample,
uint64_t HotBlockCount);
- void getHotFunctions(const StringMap<FuncSampleStats> &ProfStats,
- StringMap<FuncSampleStats> &HotFunc,
+ void getHotFunctions(const FuncSampleStatsMap &ProfStats,
+ FuncSampleStatsMap &HotFunc,
uint64_t HotThreshold) const;
void computeHotFuncOverlap();
@@ -1381,26 +1384,26 @@ void SampleOverlapAggregator::updateHotBlockOverlap(uint64_t BaseSample,
}
void SampleOverlapAggregator::getHotFunctions(
- const StringMap<FuncSampleStats> &ProfStats,
- StringMap<FuncSampleStats> &HotFunc, uint64_t HotThreshold) const {
+ const FuncSampleStatsMap &ProfStats, FuncSampleStatsMap &HotFunc,
+ uint64_t HotThreshold) const {
for (const auto &F : ProfStats) {
if (isFunctionHot(F.second, HotThreshold))
- HotFunc.try_emplace(F.first(), F.second);
+ HotFunc.emplace(F.first, F.second);
}
}
void SampleOverlapAggregator::computeHotFuncOverlap() {
- StringMap<FuncSampleStats> BaseHotFunc;
+ FuncSampleStatsMap BaseHotFunc;
getHotFunctions(BaseStats, BaseHotFunc, BaseHotThreshold);
HotFuncOverlap.BaseCount = BaseHotFunc.size();
- StringMap<FuncSampleStats> TestHotFunc;
+ FuncSampleStatsMap TestHotFunc;
getHotFunctions(TestStats, TestHotFunc, TestHotThreshold);
HotFuncOverlap.TestCount = TestHotFunc.size();
HotFuncOverlap.UnionCount = HotFuncOverlap.TestCount;
for (const auto &F : BaseHotFunc) {
- if (TestHotFunc.count(F.first()))
+ if (TestHotFunc.count(F.first))
++HotFuncOverlap.OverlapCount;
else
++HotFuncOverlap.UnionCount;
@@ -1612,23 +1615,25 @@ double SampleOverlapAggregator::computeSampleFunctionOverlap(
void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
using namespace sampleprof;
- StringMap<const FunctionSamples *> BaseFuncProf;
+ std::unordered_map<SampleContext, const FunctionSamples *,
+ SampleContext::Hash>
+ BaseFuncProf;
const auto &BaseProfiles = BaseReader->getProfiles();
for (const auto &BaseFunc : BaseProfiles) {
- BaseFuncProf.try_emplace(BaseFunc.second.getNameWithContext(),
- &(BaseFunc.second));
+ BaseFuncProf.emplace(BaseFunc.second.getContext(), &(BaseFunc.second));
}
ProfOverlap.UnionCount = BaseFuncProf.size();
const auto &TestProfiles = TestReader->getProfiles();
for (const auto &TestFunc : TestProfiles) {
SampleOverlapStats FuncOverlap;
- FuncOverlap.TestName = TestFunc.second.getNameWithContext();
+ FuncOverlap.TestName = TestFunc.second.getContext();
assert(TestStats.count(FuncOverlap.TestName) &&
"TestStats should have records for all functions in test profile "
"except inlinees");
FuncOverlap.TestSample = TestStats[FuncOverlap.TestName].SampleSum;
+ bool Matched = false;
const auto Match = BaseFuncProf.find(FuncOverlap.TestName);
if (Match == BaseFuncProf.end()) {
const FuncSampleStats &FuncStats = TestStats[FuncOverlap.TestName];
@@ -1650,7 +1655,7 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
// Two functions match with each other. Compute function-level overlap and
// aggregate them into profile-level overlap.
- FuncOverlap.BaseName = Match->second->getNameWithContext();
+ FuncOverlap.BaseName = Match->second->getContext();
assert(BaseStats.count(FuncOverlap.BaseName) &&
"BaseStats should have records for all functions in base profile "
"except inlinees");
@@ -1673,6 +1678,7 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
// Remove matched base functions for later reporting functions not found
// in test profile.
BaseFuncProf.erase(Match);
+ Matched = true;
}
// Print function-level similarity information if specified by options.
@@ -1680,11 +1686,10 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
"TestStats should have records for all functions in test profile "
"except inlinees");
if (TestStats[FuncOverlap.TestName].MaxSample >= FuncFilter.ValueCutoff ||
- (Match != BaseFuncProf.end() &&
- FuncOverlap.Similarity < LowSimilarityThreshold) ||
- (Match != BaseFuncProf.end() && !FuncFilter.NameFilter.empty() &&
- FuncOverlap.BaseName.find(FuncFilter.NameFilter) !=
- FuncOverlap.BaseName.npos)) {
+ (Matched && FuncOverlap.Similarity < LowSimilarityThreshold) ||
+ (Matched && !FuncFilter.NameFilter.empty() &&
+ FuncOverlap.BaseName.toString().find(FuncFilter.NameFilter) !=
+ std::string::npos)) {
assert(ProfOverlap.BaseSample > 0 &&
"Total samples in base profile should be greater than 0");
FuncOverlap.BaseWeight =
@@ -1699,11 +1704,10 @@ void SampleOverlapAggregator::computeSampleProfileOverlap(raw_fd_ostream &OS) {
// Traverse through functions in base profile but not in test profile.
for (const auto &F : BaseFuncProf) {
- assert(BaseStats.count(F.second->getNameWithContext()) &&
+ assert(BaseStats.count(F.second->getContext()) &&
"BaseStats should have records for all functions in base profile "
"except inlinees");
- const FuncSampleStats &FuncStats =
- BaseStats[F.second->getNameWithContext()];
+ const FuncSampleStats &FuncStats = BaseStats[F.second->getContext()];
++ProfOverlap.BaseUniqueCount;
ProfOverlap.BaseUniqueSample += FuncStats.SampleSum;
@@ -1734,7 +1738,7 @@ void SampleOverlapAggregator::initializeSampleProfileOverlap() {
FuncSampleStats FuncStats;
getFuncSampleStats(I.second, FuncStats, BaseHotThreshold);
ProfOverlap.BaseSample += FuncStats.SampleSum;
- BaseStats.try_emplace(I.second.getNameWithContext(), FuncStats);
+ BaseStats.emplace(I.second.getContext(), FuncStats);
}
const auto &TestProf = TestReader->getProfiles();
@@ -1743,7 +1747,7 @@ void SampleOverlapAggregator::initializeSampleProfileOverlap() {
FuncSampleStats FuncStats;
getFuncSampleStats(I.second, FuncStats, TestHotThreshold);
ProfOverlap.TestSample += FuncStats.SampleSum;
- TestStats.try_emplace(I.second.getNameWithContext(), FuncStats);
+ TestStats.emplace(I.second.getContext(), FuncStats);
}
ProfOverlap.BaseName = StringRef(BaseFilename);
@@ -1807,13 +1811,15 @@ void SampleOverlapAggregator::dumpFuncSimilarity(raw_fd_ostream &OS) const {
FOS.PadToColumn(TestSampleCol);
FOS << F.second.TestSample;
FOS.PadToColumn(FuncNameCol);
- FOS << F.second.TestName << "\n";
+ FOS << F.second.TestName.toString() << "\n";
}
}
void SampleOverlapAggregator::dumpProgramSummary(raw_fd_ostream &OS) const {
- OS << "Profile overlap infomation for base_profile: " << ProfOverlap.BaseName
- << " and test_profile: " << ProfOverlap.TestName << "\nProgram level:\n";
+ OS << "Profile overlap infomation for base_profile: "
+ << ProfOverlap.BaseName.toString()
+ << " and test_profile: " << ProfOverlap.TestName.toString()
+ << "\nProgram level:\n";
OS << " Whole program profile similarity: "
<< format("%.3f%%", ProfOverlap.Similarity * 100) << "\n";
@@ -1909,22 +1915,13 @@ std::error_code SampleOverlapAggregator::loadProfiles() {
// Load BaseHotThreshold and TestHotThreshold as 99-percentile threshold in
// profile summary.
- const uint64_t HotCutoff = 990000;
ProfileSummary &BasePS = BaseReader->getSummary();
- for (const auto &SummaryEntry : BasePS.getDetailedSummary()) {
- if (SummaryEntry.Cutoff == HotCutoff) {
- BaseHotThreshold = SummaryEntry.MinCount;
- break;
- }
- }
-
ProfileSummary &TestPS = TestReader->getSummary();
- for (const auto &SummaryEntry : TestPS.getDetailedSummary()) {
- if (SummaryEntry.Cutoff == HotCutoff) {
- TestHotThreshold = SummaryEntry.MinCount;
- break;
- }
- }
+ BaseHotThreshold =
+ ProfileSummaryBuilder::getHotCountThreshold(BasePS.getDetailedSummary());
+ TestHotThreshold =
+ ProfileSummaryBuilder::getHotCountThreshold(TestPS.getDetailedSummary());
+
return std::error_code();
}
@@ -2111,9 +2108,8 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (FuncIsCS != ShowCS)
continue;
}
- bool Show =
- ShowAllFunctions || (!ShowFunction.empty() &&
- Func.Name.find(ShowFunction) != Func.Name.npos);
+ bool Show = ShowAllFunctions ||
+ (!ShowFunction.empty() && Func.Name.contains(ShowFunction));
bool doTextFormatDump = (Show && TextFormat);
@@ -2271,7 +2267,7 @@ static void showSectionInfo(sampleprof::SampleProfileReader *Reader,
namespace {
struct HotFuncInfo {
- StringRef FuncName;
+ std::string FuncName;
uint64_t TotalCount;
double TotalCountPercent;
uint64_t MaxCount;
@@ -2282,8 +2278,8 @@ struct HotFuncInfo {
EntryCount(0) {}
HotFuncInfo(StringRef FN, uint64_t TS, double TSP, uint64_t MS, uint64_t ES)
- : FuncName(FN), TotalCount(TS), TotalCountPercent(TSP), MaxCount(MS),
- EntryCount(ES) {}
+ : FuncName(FN.begin(), FN.end()), TotalCount(TS), TotalCountPercent(TSP),
+ MaxCount(MS), EntryCount(ES) {}
};
} // namespace
@@ -2298,7 +2294,7 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
uint64_t HotFuncCount, uint64_t TotalFuncCount,
uint64_t HotProfCount, uint64_t TotalProfCount,
const std::string &HotFuncMetric,
- raw_fd_ostream &OS) {
+ uint32_t TopNFunctions, raw_fd_ostream &OS) {
assert(ColumnOffset.size() == ColumnTitle.size() &&
"ColumnOffset and ColumnTitle should have the same size");
assert(ColumnTitle.size() >= 4 &&
@@ -2327,7 +2323,10 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
}
FOS << "\n";
- for (const HotFuncInfo &R : PrintValues) {
+ uint32_t Count = 0;
+ for (const auto &R : PrintValues) {
+ if (TopNFunctions && (Count++ == TopNFunctions))
+ break;
FOS.PadToColumn(ColumnOffset[0]);
FOS << R.TotalCount << " (" << format("%.2f%%", R.TotalCountPercent) << ")";
FOS.PadToColumn(ColumnOffset[1]);
@@ -2339,9 +2338,9 @@ static void dumpHotFunctionList(const std::vector<std::string> &ColumnTitle,
}
}
-static int
-showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
- ProfileSummary &PS, raw_fd_ostream &OS) {
+static int showHotFunctionList(const sampleprof::SampleProfileMap &Profiles,
+ ProfileSummary &PS, uint32_t TopN,
+ raw_fd_ostream &OS) {
using namespace sampleprof;
const uint32_t HotFuncCutoff = 990000;
@@ -2391,18 +2390,19 @@ showHotFunctionList(const StringMap<sampleprof::FunctionSamples> &Profiles,
? (Func.getTotalSamples() * 100.0) / ProfileTotalSample
: 0;
PrintValues.emplace_back(HotFuncInfo(
- Func.getNameWithContext(), Func.getTotalSamples(), TotalSamplePercent,
- FuncPair.second.second, Func.getEntrySamples()));
+ Func.getContext().toString(), Func.getTotalSamples(),
+ TotalSamplePercent, FuncPair.second.second, Func.getEntrySamples()));
}
dumpHotFunctionList(ColumnTitle, ColumnOffset, PrintValues, HotFuncCount,
Profiles.size(), HotFuncSample, ProfileTotalSample,
- Metric, OS);
+ Metric, TopN, OS);
return 0;
}
static int showSampleProfile(const std::string &Filename, bool ShowCounts,
- bool ShowAllFunctions, bool ShowDetailedSummary,
+ uint32_t TopN, bool ShowAllFunctions,
+ bool ShowDetailedSummary,
const std::string &ShowFunction,
bool ShowProfileSymbolList,
bool ShowSectionInfoOnly, bool ShowHotFuncList,
@@ -2426,7 +2426,8 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
if (ShowAllFunctions || ShowFunction.empty())
Reader->dump(OS);
else
- Reader->dumpFunctionProfile(ShowFunction, OS);
+ // TODO: parse context string to support filtering by contexts.
+ Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
if (ShowProfileSymbolList) {
std::unique_ptr<sampleprof::ProfileSymbolList> ReaderList =
@@ -2440,8 +2441,8 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
PS.printDetailedSummary(OS);
}
- if (ShowHotFuncList)
- showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), OS);
+ if (ShowHotFuncList || TopN)
+ showHotFunctionList(Reader->getProfiles(), Reader->getSummary(), TopN, OS);
return 0;
}
@@ -2532,10 +2533,10 @@ static int show_main(int argc, const char *argv[]) {
ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
TextFormat, ShowBinaryIds, OS);
else
- return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
- ShowDetailedSummary, ShowFunction,
- ShowProfileSymbolList, ShowSectionInfoOnly,
- ShowHotFuncList, OS);
+ return showSampleProfile(Filename, ShowCounts, TopNFunctions,
+ ShowAllFunctions, ShowDetailedSummary,
+ ShowFunction, ShowProfileSymbolList,
+ ShowSectionInfoOnly, ShowHotFuncList, OS);
}
int main(int argc, const char *argv[]) {
diff --git a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
index 3d8acbf48fa9..d97cea4b6d6a 100644
--- a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -426,7 +426,7 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
auto Ret = ELF.getSection(*Symbol, SymTab, ShndxTable);
if (!Ret)
- report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+ report_fatal_error(Twine(errorToErrorCode(Ret.takeError()).message()));
return *Ret;
}
}
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 99ee639fc45d..78be632f2153 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -238,22 +238,27 @@ ErrorOr<SymbolRef> Decoder::getRelocatedSymbol(const COFFObjectFile &,
return inconvertibleErrorCode();
}
-SymbolRef Decoder::getPreferredSymbol(const COFFObjectFile &COFF,
- SymbolRef Sym) {
+SymbolRef Decoder::getPreferredSymbol(const COFFObjectFile &COFF, SymbolRef Sym,
+ uint64_t &SymbolOffset) {
// The symbol resolved by getRelocatedSymbol can be any internal
// nondescriptive symbol; try to resolve a more descriptive one.
COFFSymbolRef CoffSym = COFF.getCOFFSymbol(Sym);
- if (CoffSym.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL)
+ if (CoffSym.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CoffSym.getSectionDefinition() == nullptr)
return Sym;
for (const auto &S : COFF.symbols()) {
COFFSymbolRef CS = COFF.getCOFFSymbol(S);
if (CS.getSectionNumber() == CoffSym.getSectionNumber() &&
- CS.getValue() == CoffSym.getValue()) {
- if (CS.isExternal())
- return S;
- if (CS.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL) {
+ CS.getValue() <= CoffSym.getValue() + SymbolOffset &&
+ CS.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CS.getSectionDefinition() == nullptr) {
+ uint32_t Offset = CoffSym.getValue() + SymbolOffset - CS.getValue();
+ if (Offset <= SymbolOffset) {
+ SymbolOffset = Offset;
Sym = S;
CoffSym = CS;
+ if (CS.isExternal() && SymbolOffset == 0)
+ return Sym;
}
}
}
@@ -277,12 +282,14 @@ ErrorOr<SymbolRef> Decoder::getSymbolForLocation(
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(AddressOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
// We apply SymbolOffset here directly. We return it separately to allow
// the caller to print it as an offset on the symbol name.
SymbolAddress = *AddressOrErr + SymbolOffset;
+
+ if (FunctionOnly) // Resolve label/section symbols into function names.
+ SymOrErr = getPreferredSymbol(COFF, *SymOrErr, SymbolOffset);
} else {
// No matching relocation found; operating on a linked image. Try to
// find a descriptive symbol if possible. The immediate offset contains
@@ -292,8 +299,6 @@ ErrorOr<SymbolRef> Decoder::getSymbolForLocation(
SymbolOffset = 0;
SymOrErr = getSymbol(COFF, SymbolAddress, FunctionOnly);
}
- if (SymOrErr && FunctionOnly) // Resolve label symbols into function names
- SymOrErr = getPreferredSymbol(COFF, *SymOrErr);
return SymOrErr;
}
@@ -1000,8 +1005,7 @@ bool Decoder::dumpXDataRecord(const COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(Name.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
ListScope EHS(SW, "ExceptionHandler");
@@ -1040,8 +1044,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
FunctionName = *FunctionNameOrErr;
}
@@ -1055,8 +1058,7 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(Name.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
SW.printString("ExceptionRecord",
@@ -1101,8 +1103,7 @@ bool Decoder::dumpPackedEntry(const object::COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
FunctionName = *FunctionNameOrErr;
}
@@ -1143,8 +1144,7 @@ bool Decoder::dumpPackedARM64Entry(const object::COFFObjectFile &COFF,
std::string Buf;
llvm::raw_string_ostream OS(Buf);
logAllUnhandledErrors(FunctionNameOrErr.takeError(), OS);
- OS.flush();
- report_fatal_error(Buf);
+ report_fatal_error(Twine(OS.str()));
}
FunctionName = *FunctionNameOrErr;
}
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
index efe16850c7fa..920d4e5f7332 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.h
@@ -154,7 +154,8 @@ class Decoder {
bool FunctionOnly = false);
object::SymbolRef getPreferredSymbol(const object::COFFObjectFile &COFF,
- object::SymbolRef Sym);
+ object::SymbolRef Sym,
+ uint64_t &SymbolOffset);
bool dumpXDataRecord(const object::COFFObjectFile &COFF,
const object::SectionRef &Section,
diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp
index 96124cc03484..b235398e7a45 100644
--- a/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -337,7 +337,7 @@ void COFFDumper::printBinaryBlockWithRelocs(StringRef Label,
}
}
-static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
+const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_UNKNOWN ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AM33 ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_AMD64 ),
@@ -362,7 +362,7 @@ static const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_WCEMIPSV2)
};
-static const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
+const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_RELOCS_STRIPPED ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_EXECUTABLE_IMAGE ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_LINE_NUMS_STRIPPED ),
@@ -380,7 +380,7 @@ static const EnumEntry<COFF::Characteristics> ImageFileCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_BYTES_REVERSED_HI )
};
-static const EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
+const EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_UNKNOWN ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_NATIVE ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_WINDOWS_GUI ),
@@ -394,7 +394,7 @@ static const EnumEntry<COFF::WindowsSubsystem> PEWindowsSubsystem[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SUBSYSTEM_XBOX ),
};
-static const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
+const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY ),
@@ -453,7 +453,7 @@ ImageSectionCharacteristics[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_MEM_WRITE )
};
-static const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
+const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
{ "Null" , COFF::IMAGE_SYM_TYPE_NULL },
{ "Void" , COFF::IMAGE_SYM_TYPE_VOID },
{ "Char" , COFF::IMAGE_SYM_TYPE_CHAR },
@@ -472,14 +472,14 @@ static const EnumEntry<COFF::SymbolBaseType> ImageSymType[] = {
{ "DWord" , COFF::IMAGE_SYM_TYPE_DWORD }
};
-static const EnumEntry<COFF::SymbolComplexType> ImageSymDType[] = {
+const EnumEntry<COFF::SymbolComplexType> ImageSymDType[] = {
{ "Null" , COFF::IMAGE_SYM_DTYPE_NULL },
{ "Pointer" , COFF::IMAGE_SYM_DTYPE_POINTER },
{ "Function", COFF::IMAGE_SYM_DTYPE_FUNCTION },
{ "Array" , COFF::IMAGE_SYM_DTYPE_ARRAY }
};
-static const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
+const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
{ "EndOfFunction" , COFF::IMAGE_SYM_CLASS_END_OF_FUNCTION },
{ "Null" , COFF::IMAGE_SYM_CLASS_NULL },
{ "Automatic" , COFF::IMAGE_SYM_CLASS_AUTOMATIC },
@@ -509,7 +509,7 @@ static const EnumEntry<COFF::SymbolStorageClass> ImageSymClass[] = {
{ "CLRToken" , COFF::IMAGE_SYM_CLASS_CLR_TOKEN }
};
-static const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
+const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
{ "NoDuplicates", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES },
{ "Any" , COFF::IMAGE_COMDAT_SELECT_ANY },
{ "SameSize" , COFF::IMAGE_COMDAT_SELECT_SAME_SIZE },
@@ -519,7 +519,7 @@ static const EnumEntry<COFF::COMDATType> ImageCOMDATSelect[] = {
{ "Newest" , COFF::IMAGE_COMDAT_SELECT_NEWEST }
};
-static const EnumEntry<COFF::DebugType> ImageDebugType[] = {
+const EnumEntry<COFF::DebugType> ImageDebugType[] = {
{"Unknown", COFF::IMAGE_DEBUG_TYPE_UNKNOWN},
{"COFF", COFF::IMAGE_DEBUG_TYPE_COFF},
{"CodeView", COFF::IMAGE_DEBUG_TYPE_CODEVIEW},
@@ -548,7 +548,7 @@ WeakExternalCharacteristics[] = {
{ "Alias" , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS }
};
-static const EnumEntry<uint32_t> SubSectionTypes[] = {
+const EnumEntry<uint32_t> SubSectionTypes[] = {
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, Symbols),
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, Lines),
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, StringTable),
@@ -564,13 +564,13 @@ static const EnumEntry<uint32_t> SubSectionTypes[] = {
LLVM_READOBJ_ENUM_CLASS_ENT(DebugSubsectionKind, CoffSymbolRVA),
};
-static const EnumEntry<uint32_t> FrameDataFlags[] = {
+const EnumEntry<uint32_t> FrameDataFlags[] = {
LLVM_READOBJ_ENUM_ENT(FrameData, HasSEH),
LLVM_READOBJ_ENUM_ENT(FrameData, HasEH),
LLVM_READOBJ_ENUM_ENT(FrameData, IsFunctionStart),
};
-static const EnumEntry<uint8_t> FileChecksumKindNames[] = {
+const EnumEntry<uint8_t> FileChecksumKindNames[] = {
LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, None),
LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, MD5),
LLVM_READOBJ_ENUM_CLASS_ENT(FileChecksumKind, SHA1),
@@ -709,7 +709,10 @@ void COFFDumper::printPEHeader(const PEHeader *Hdr) {
};
for (uint32_t i = 0; i < Hdr->NumberOfRvaAndSize; ++i)
- printDataDirectory(i, directory[i]);
+ if (i < sizeof(directory) / sizeof(char *))
+ printDataDirectory(i, directory[i]);
+ else
+ printDataDirectory(i, "Unknown");
}
}
diff --git a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
index 2dfe21684a62..5dc947e024b9 100644
--- a/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
+++ b/llvm/tools/llvm-readobj/DwarfCFIEHPrinter.h
@@ -185,7 +185,8 @@ void PrinterContext<ELFT>::printEHFrame(const Elf_Shdr *EHFrameShdr) const {
reportError(DataOrErr.takeError(), ObjF.getFileName());
// Construct DWARFDataExtractor to handle relocations ("PC Begin" fields).
- std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(ObjF, nullptr);
+ std::unique_ptr<DWARFContext> DICtx = DWARFContext::create(
+ ObjF, DWARFContext::ProcessDebugRelocations::Process, nullptr);
DWARFDataExtractor DE(DICtx->getDWARFObj(),
DICtx->getDWARFObj().getEHFrameSection(),
ELFT::TargetEndianness == support::endianness::little,
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index f221acba979a..4abea0b1d23d 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -49,6 +49,8 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/MSP430AttributeParser.h"
+#include "llvm/Support/MSP430Attributes.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MipsABIFlags.h"
#include "llvm/Support/RISCVAttributeParser.h"
@@ -339,7 +341,8 @@ protected:
return DynRegionInfo(ObjF, *this, Obj.base() + Offset, Size, EntSize);
}
- void printAttributes();
+ void printAttributes(unsigned, std::unique_ptr<ELFAttributeParser>,
+ support::endianness);
void printMipsReginfo();
void printMipsOptions();
@@ -963,19 +966,19 @@ findNotEmptySectionByAddress(const ELFO &Obj, StringRef FileName,
return nullptr;
}
-static const EnumEntry<unsigned> ElfClass[] = {
+const EnumEntry<unsigned> ElfClass[] = {
{"None", "none", ELF::ELFCLASSNONE},
{"32-bit", "ELF32", ELF::ELFCLASS32},
{"64-bit", "ELF64", ELF::ELFCLASS64},
};
-static const EnumEntry<unsigned> ElfDataEncoding[] = {
+const EnumEntry<unsigned> ElfDataEncoding[] = {
{"None", "none", ELF::ELFDATANONE},
{"LittleEndian", "2's complement, little endian", ELF::ELFDATA2LSB},
{"BigEndian", "2's complement, big endian", ELF::ELFDATA2MSB},
};
-static const EnumEntry<unsigned> ElfObjectFileType[] = {
+const EnumEntry<unsigned> ElfObjectFileType[] = {
{"None", "NONE (none)", ELF::ET_NONE},
{"Relocatable", "REL (Relocatable file)", ELF::ET_REL},
{"Executable", "EXEC (Executable file)", ELF::ET_EXEC},
@@ -983,7 +986,7 @@ static const EnumEntry<unsigned> ElfObjectFileType[] = {
{"Core", "CORE (Core file)", ELF::ET_CORE},
};
-static const EnumEntry<unsigned> ElfOSABI[] = {
+const EnumEntry<unsigned> ElfOSABI[] = {
{"SystemV", "UNIX - System V", ELF::ELFOSABI_NONE},
{"HPUX", "UNIX - HP-UX", ELF::ELFOSABI_HPUX},
{"NetBSD", "UNIX - NetBSD", ELF::ELFOSABI_NETBSD},
@@ -1004,22 +1007,22 @@ static const EnumEntry<unsigned> ElfOSABI[] = {
{"Standalone", "Standalone App", ELF::ELFOSABI_STANDALONE}
};
-static const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
+const EnumEntry<unsigned> AMDGPUElfOSABI[] = {
{"AMDGPU_HSA", "AMDGPU - HSA", ELF::ELFOSABI_AMDGPU_HSA},
{"AMDGPU_PAL", "AMDGPU - PAL", ELF::ELFOSABI_AMDGPU_PAL},
{"AMDGPU_MESA3D", "AMDGPU - MESA3D", ELF::ELFOSABI_AMDGPU_MESA3D}
};
-static const EnumEntry<unsigned> ARMElfOSABI[] = {
+const EnumEntry<unsigned> ARMElfOSABI[] = {
{"ARM", "ARM", ELF::ELFOSABI_ARM}
};
-static const EnumEntry<unsigned> C6000ElfOSABI[] = {
+const EnumEntry<unsigned> C6000ElfOSABI[] = {
{"C6000_ELFABI", "Bare-metal C6000", ELF::ELFOSABI_C6000_ELFABI},
{"C6000_LINUX", "Linux C6000", ELF::ELFOSABI_C6000_LINUX}
};
-static const EnumEntry<unsigned> ElfMachineType[] = {
+const EnumEntry<unsigned> ElfMachineType[] = {
ENUM_ENT(EM_NONE, "None"),
ENUM_ENT(EM_M32, "WE32100"),
ENUM_ENT(EM_SPARC, "Sparc"),
@@ -1185,19 +1188,19 @@ static const EnumEntry<unsigned> ElfMachineType[] = {
ENUM_ENT(EM_VE, "NEC SX-Aurora Vector Engine"),
};
-static const EnumEntry<unsigned> ElfSymbolBindings[] = {
+const EnumEntry<unsigned> ElfSymbolBindings[] = {
{"Local", "LOCAL", ELF::STB_LOCAL},
{"Global", "GLOBAL", ELF::STB_GLOBAL},
{"Weak", "WEAK", ELF::STB_WEAK},
{"Unique", "UNIQUE", ELF::STB_GNU_UNIQUE}};
-static const EnumEntry<unsigned> ElfSymbolVisibilities[] = {
+const EnumEntry<unsigned> ElfSymbolVisibilities[] = {
{"DEFAULT", "DEFAULT", ELF::STV_DEFAULT},
{"INTERNAL", "INTERNAL", ELF::STV_INTERNAL},
{"HIDDEN", "HIDDEN", ELF::STV_HIDDEN},
{"PROTECTED", "PROTECTED", ELF::STV_PROTECTED}};
-static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
+const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
{ "AMDGPU_HSA_KERNEL", ELF::STT_AMDGPU_HSA_KERNEL }
};
@@ -1208,7 +1211,7 @@ static const char *getGroupType(uint32_t Flag) {
return "(unknown)";
}
-static const EnumEntry<unsigned> ElfSectionFlags[] = {
+const EnumEntry<unsigned> ElfSectionFlags[] = {
ENUM_ENT(SHF_WRITE, "W"),
ENUM_ENT(SHF_ALLOC, "A"),
ENUM_ENT(SHF_EXECINSTR, "X"),
@@ -1224,20 +1227,20 @@ static const EnumEntry<unsigned> ElfSectionFlags[] = {
ENUM_ENT(SHF_EXCLUDE, "E"),
};
-static const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
+const EnumEntry<unsigned> ElfXCoreSectionFlags[] = {
ENUM_ENT(XCORE_SHF_CP_SECTION, ""),
ENUM_ENT(XCORE_SHF_DP_SECTION, "")
};
-static const EnumEntry<unsigned> ElfARMSectionFlags[] = {
+const EnumEntry<unsigned> ElfARMSectionFlags[] = {
ENUM_ENT(SHF_ARM_PURECODE, "y")
};
-static const EnumEntry<unsigned> ElfHexagonSectionFlags[] = {
+const EnumEntry<unsigned> ElfHexagonSectionFlags[] = {
ENUM_ENT(SHF_HEX_GPREL, "")
};
-static const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
+const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
ENUM_ENT(SHF_MIPS_NODUPES, ""),
ENUM_ENT(SHF_MIPS_NAMES, ""),
ENUM_ENT(SHF_MIPS_LOCAL, ""),
@@ -1248,7 +1251,7 @@ static const EnumEntry<unsigned> ElfMipsSectionFlags[] = {
ENUM_ENT(SHF_MIPS_STRING, "")
};
-static const EnumEntry<unsigned> ElfX86_64SectionFlags[] = {
+const EnumEntry<unsigned> ElfX86_64SectionFlags[] = {
ENUM_ENT(SHF_X86_64_LARGE, "l")
};
@@ -1395,13 +1398,13 @@ static std::string getGNUPtType(unsigned Arch, unsigned Type) {
return Seg.drop_front(3).str();
}
-static const EnumEntry<unsigned> ElfSegmentFlags[] = {
+const EnumEntry<unsigned> ElfSegmentFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, PF_X),
LLVM_READOBJ_ENUM_ENT(ELF, PF_W),
LLVM_READOBJ_ENUM_ENT(ELF, PF_R)
};
-static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
+const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_NOREORDER, "noreorder"),
ENUM_ENT(EF_MIPS_PIC, "pic"),
ENUM_ENT(EF_MIPS_CPIC, "cpic"),
@@ -1447,7 +1450,7 @@ static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
-static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
+const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
@@ -1501,7 +1504,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
};
-static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
+const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_NONE),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R600),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_R600_R630),
@@ -1559,7 +1562,7 @@ static const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
};
-static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
+const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
ENUM_ENT(EF_RISCV_RVC, "RVC"),
ENUM_ENT(EF_RISCV_FLOAT_ABI_SINGLE, "single-float ABI"),
ENUM_ENT(EF_RISCV_FLOAT_ABI_DOUBLE, "double-float ABI"),
@@ -1567,7 +1570,7 @@ static const EnumEntry<unsigned> ElfHeaderRISCVFlags[] = {
ENUM_ENT(EF_RISCV_RVE, "RVE")
};
-static const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
+const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR1),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR2),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AVR_ARCH_AVR25),
@@ -1590,29 +1593,32 @@ static const EnumEntry<unsigned> ElfHeaderAVRFlags[] = {
};
-static const EnumEntry<unsigned> ElfSymOtherFlags[] = {
+const EnumEntry<unsigned> ElfSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STV_INTERNAL),
LLVM_READOBJ_ENUM_ENT(ELF, STV_HIDDEN),
LLVM_READOBJ_ENUM_ENT(ELF, STV_PROTECTED)
};
-static const EnumEntry<unsigned> ElfMipsSymOtherFlags[] = {
+const EnumEntry<unsigned> ElfMipsSymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PIC),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MICROMIPS)
};
-static const EnumEntry<unsigned> ElfAArch64SymOtherFlags[] = {
+const EnumEntry<unsigned> ElfAArch64SymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_AARCH64_VARIANT_PCS)
};
-static const EnumEntry<unsigned> ElfMips16SymOtherFlags[] = {
+const EnumEntry<unsigned> ElfMips16SymOtherFlags[] = {
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_OPTIONAL),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_PLT),
LLVM_READOBJ_ENUM_ENT(ELF, STO_MIPS_MIPS16)
};
+const EnumEntry<unsigned> ElfRISCVSymOtherFlags[] = {
+ LLVM_READOBJ_ENUM_ENT(ELF, STO_RISCV_VARIANT_CC)};
+
static const char *getElfMipsOptionsOdkType(unsigned Odk) {
switch (Odk) {
LLVM_READOBJ_ENUM_CASE(ELF, ODK_NULL);
@@ -2065,7 +2071,7 @@ template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
#define LLVM_READOBJ_DT_FLAG_ENT(prefix, enum) \
{ #enum, prefix##_##enum }
-static const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
+const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF, ORIGIN),
LLVM_READOBJ_DT_FLAG_ENT(DF, SYMBOLIC),
LLVM_READOBJ_DT_FLAG_ENT(DF, TEXTREL),
@@ -2073,7 +2079,7 @@ static const EnumEntry<unsigned> ElfDynamicDTFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF, STATIC_TLS)
};
-static const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
+const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF_1, NOW),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, GLOBAL),
LLVM_READOBJ_DT_FLAG_ENT(DF_1, GROUP),
@@ -2103,7 +2109,7 @@ static const EnumEntry<unsigned> ElfDynamicDTFlags1[] = {
LLVM_READOBJ_DT_FLAG_ENT(DF_1, PIE),
};
-static const EnumEntry<unsigned> ElfDynamicDTMipsFlags[] = {
+const EnumEntry<unsigned> ElfDynamicDTMipsFlags[] = {
LLVM_READOBJ_DT_FLAG_ENT(RHF, NONE),
LLVM_READOBJ_DT_FLAG_ENT(RHF, QUICKSTART),
LLVM_READOBJ_DT_FLAG_ENT(RHF, NOTPOT),
@@ -2292,6 +2298,8 @@ std::string ELFDumper<ELFT>::getDynamicEntry(uint64_t Type,
case DT_INIT_ARRAYSZ:
case DT_FINI_ARRAYSZ:
case DT_PREINIT_ARRAYSZ:
+ case DT_RELRSZ:
+ case DT_RELRENT:
case DT_ANDROID_RELSZ:
case DT_ANDROID_RELASZ:
return std::to_string(Value) + " (bytes)";
@@ -2557,8 +2565,27 @@ template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
switch (Obj.getHeader().e_machine) {
case EM_ARM:
+ if (Obj.isLE())
+ printAttributes(ELF::SHT_ARM_ATTRIBUTES,
+ std::make_unique<ARMAttributeParser>(&W),
+ support::little);
+ else
+ reportUniqueWarning("attribute printing not implemented for big-endian "
+ "ARM objects");
+ break;
case EM_RISCV:
- printAttributes();
+ if (Obj.isLE())
+ printAttributes(ELF::SHT_RISCV_ATTRIBUTES,
+ std::make_unique<RISCVAttributeParser>(&W),
+ support::little);
+ else
+ reportUniqueWarning("attribute printing not implemented for big-endian "
+ "RISC-V objects");
+ break;
+ case EM_MSP430:
+ printAttributes(ELF::SHT_MSP430_ATTRIBUTES,
+ std::make_unique<MSP430AttributeParser>(&W),
+ support::little);
break;
case EM_MIPS: {
printMipsABIFlags();
@@ -2581,20 +2608,15 @@ template <class ELFT> void ELFDumper<ELFT>::printArchSpecificInfo() {
}
}
-template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
- if (!Obj.isLE()) {
- W.startLine() << "Attributes not implemented.\n";
- return;
- }
-
- const unsigned Machine = Obj.getHeader().e_machine;
- assert((Machine == EM_ARM || Machine == EM_RISCV) &&
- "Attributes not implemented.");
-
+template <class ELFT>
+void ELFDumper<ELFT>::printAttributes(
+ unsigned AttrShType, std::unique_ptr<ELFAttributeParser> AttrParser,
+ support::endianness Endianness) {
+ assert((AttrShType != ELF::SHT_NULL) && AttrParser &&
+ "Incomplete ELF attribute implementation");
DictScope BA(W, "BuildAttributes");
for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
- if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES &&
- Sec.sh_type != ELF::SHT_RISCV_ATTRIBUTES)
+ if (Sec.sh_type != AttrShType)
continue;
ArrayRef<uint8_t> Contents;
@@ -2613,13 +2635,7 @@ template <class ELFT> void ELFDumper<ELFT>::printAttributes() {
W.printHex("FormatVersion", Contents[0]);
- auto ParseAttrubutes = [&]() {
- if (Machine == EM_ARM)
- return ARMAttributeParser(&W).parse(Contents, support::little);
- return RISCVAttributeParser(&W).parse(Contents, support::little);
- };
-
- if (Error E = ParseAttrubutes())
+ if (Error E = AttrParser->parse(Contents, Endianness))
reportUniqueWarning("unable to dump attributes from the " +
describe(Sec) + ": " + toString(std::move(E)));
}
@@ -2934,7 +2950,7 @@ MipsGOTParser<ELFT>::getPltSym(const Entry *E) const {
}
}
-static const EnumEntry<unsigned> ElfMipsISAExtType[] = {
+const EnumEntry<unsigned> ElfMipsISAExtType[] = {
{"None", Mips::AFL_EXT_NONE},
{"Broadcom SB-1", Mips::AFL_EXT_SB1},
{"Cavium Networks Octeon", Mips::AFL_EXT_OCTEON},
@@ -2957,7 +2973,7 @@ static const EnumEntry<unsigned> ElfMipsISAExtType[] = {
{"Toshiba R3900", Mips::AFL_EXT_3900}
};
-static const EnumEntry<unsigned> ElfMipsASEFlags[] = {
+const EnumEntry<unsigned> ElfMipsASEFlags[] = {
{"DSP", Mips::AFL_ASE_DSP},
{"DSPR2", Mips::AFL_ASE_DSPR2},
{"Enhanced VA Scheme", Mips::AFL_ASE_EVA},
@@ -2975,7 +2991,7 @@ static const EnumEntry<unsigned> ElfMipsASEFlags[] = {
{"GINV", Mips::AFL_ASE_GINV},
};
-static const EnumEntry<unsigned> ElfMipsFpABIType[] = {
+const EnumEntry<unsigned> ElfMipsFpABIType[] = {
{"Hard or soft float", Mips::Val_GNU_MIPS_ABI_FP_ANY},
{"Hard float (double precision)", Mips::Val_GNU_MIPS_ABI_FP_DOUBLE},
{"Hard float (single precision)", Mips::Val_GNU_MIPS_ABI_FP_SINGLE},
@@ -3762,6 +3778,15 @@ void GNUELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
Fields[5].Str.append(" | " + to_hexString(Other, false));
Fields[5].Str.append("]");
}
+ } else if (this->Obj.getHeader().e_machine == ELF::EM_RISCV) {
+ uint8_t Other = Symbol.st_other & ~0x3;
+ if (Other & STO_RISCV_VARIANT_CC) {
+ Other &= ~STO_RISCV_VARIANT_CC;
+ Fields[5].Str += " [VARIANT_CC";
+ if (Other != 0)
+ Fields[5].Str.append(" | " + to_hexString(Other, false));
+ Fields[5].Str.append("]");
+ }
} else {
Fields[5].Str +=
" [<other: " + to_string(format_hex(Symbol.st_other, 2)) + ">]";
@@ -4358,7 +4383,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printDynamicTable() {
for (auto Entry : Table) {
uintX_t Tag = Entry.getTag();
std::string Type =
- std::string("(") + this->Obj.getDynamicTagAsString(Tag).c_str() + ")";
+ std::string("(") + this->Obj.getDynamicTagAsString(Tag) + ")";
std::string Value = this->getDynamicEntry(Tag, Entry.getVal());
OS << " " << format_hex(Tag, ELFT::Is64Bits ? 18 : 10)
<< format(ValueFmt.c_str(), Type.c_str()) << Value << "\n";
@@ -4951,7 +4976,7 @@ static std::string getGNUBuildId(ArrayRef<uint8_t> Desc) {
return OS.str();
}
-static StringRef getGNUGoldVersion(ArrayRef<uint8_t> Desc) {
+static StringRef getDescAsStringRef(ArrayRef<uint8_t> Desc) {
return StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
}
@@ -4975,7 +5000,7 @@ static bool printGNUNote(raw_ostream &OS, uint32_t NoteType,
break;
}
case ELF::NT_GNU_GOLD_VERSION:
- OS << " Version: " << getGNUGoldVersion(Desc);
+ OS << " Version: " << getDescAsStringRef(Desc);
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
OS << " Properties:";
@@ -4987,7 +5012,27 @@ static bool printGNUNote(raw_ostream &OS, uint32_t NoteType,
return true;
}
-static const EnumEntry<unsigned> FreeBSDFeatureCtlFlags[] = {
+template <typename ELFT>
+static bool printLLVMOMPOFFLOADNote(raw_ostream &OS, uint32_t NoteType,
+ ArrayRef<uint8_t> Desc) {
+ switch (NoteType) {
+ default:
+ return false;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION:
+ OS << " Version: " << getDescAsStringRef(Desc);
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER:
+ OS << " Producer: " << getDescAsStringRef(Desc);
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION:
+ OS << " Producer version: " << getDescAsStringRef(Desc);
+ break;
+ }
+ OS << '\n';
+ return true;
+}
+
+const EnumEntry<unsigned> FreeBSDFeatureCtlFlags[] = {
{"ASLR_DISABLE", NT_FREEBSD_FCTL_ASLR_DISABLE},
{"PROTMAX_DISABLE", NT_FREEBSD_FCTL_PROTMAX_DISABLE},
{"STKGAP_DISABLE", NT_FREEBSD_FCTL_STKGAP_DISABLE},
@@ -5251,14 +5296,14 @@ static void printCoreNote(raw_ostream &OS, const CoreNote &Note) {
}
}
-static const NoteType GenericNoteTypes[] = {
+const NoteType GenericNoteTypes[] = {
{ELF::NT_VERSION, "NT_VERSION (version)"},
{ELF::NT_ARCH, "NT_ARCH (architecture)"},
{ELF::NT_GNU_BUILD_ATTRIBUTE_OPEN, "OPEN"},
{ELF::NT_GNU_BUILD_ATTRIBUTE_FUNC, "func"},
};
-static const NoteType GNUNoteTypes[] = {
+const NoteType GNUNoteTypes[] = {
{ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"},
{ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"},
{ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"},
@@ -5266,7 +5311,7 @@ static const NoteType GNUNoteTypes[] = {
{ELF::NT_GNU_PROPERTY_TYPE_0, "NT_GNU_PROPERTY_TYPE_0 (property note)"},
};
-static const NoteType FreeBSDCoreNoteTypes[] = {
+const NoteType FreeBSDCoreNoteTypes[] = {
{ELF::NT_FREEBSD_THRMISC, "NT_THRMISC (thrmisc structure)"},
{ELF::NT_FREEBSD_PROCSTAT_PROC, "NT_PROCSTAT_PROC (proc data)"},
{ELF::NT_FREEBSD_PROCSTAT_FILES, "NT_PROCSTAT_FILES (files data)"},
@@ -5280,7 +5325,7 @@ static const NoteType FreeBSDCoreNoteTypes[] = {
{ELF::NT_FREEBSD_PROCSTAT_AUXV, "NT_PROCSTAT_AUXV (auxv data)"},
};
-static const NoteType FreeBSDNoteTypes[] = {
+const NoteType FreeBSDNoteTypes[] = {
{ELF::NT_FREEBSD_ABI_TAG, "NT_FREEBSD_ABI_TAG (ABI version tag)"},
{ELF::NT_FREEBSD_NOINIT_TAG, "NT_FREEBSD_NOINIT_TAG (no .init tag)"},
{ELF::NT_FREEBSD_ARCH_TAG, "NT_FREEBSD_ARCH_TAG (architecture tag)"},
@@ -5288,7 +5333,15 @@ static const NoteType FreeBSDNoteTypes[] = {
"NT_FREEBSD_FEATURE_CTL (FreeBSD feature control)"},
};
-static const NoteType AMDNoteTypes[] = {
+const NoteType OpenBSDCoreNoteTypes[] = {
+ {ELF::NT_OPENBSD_PROCINFO, "NT_OPENBSD_PROCINFO (procinfo structure)"},
+ {ELF::NT_OPENBSD_AUXV, "NT_OPENBSD_AUXV (ELF auxiliary vector data)"},
+ {ELF::NT_OPENBSD_REGS, "NT_OPENBSD_REGS (regular registers)"},
+ {ELF::NT_OPENBSD_FPREGS, "NT_OPENBSD_FPREGS (floating point registers)"},
+ {ELF::NT_OPENBSD_WCOOKIE, "NT_OPENBSD_WCOOKIE (window cookie)"},
+};
+
+const NoteType AMDNoteTypes[] = {
{ELF::NT_AMD_HSA_CODE_OBJECT_VERSION,
"NT_AMD_HSA_CODE_OBJECT_VERSION (AMD HSA Code Object Version)"},
{ELF::NT_AMD_HSA_HSAIL, "NT_AMD_HSA_HSAIL (AMD HSA HSAIL Properties)"},
@@ -5298,11 +5351,20 @@ static const NoteType AMDNoteTypes[] = {
{ELF::NT_AMD_PAL_METADATA, "NT_AMD_PAL_METADATA (AMD PAL Metadata)"},
};
-static const NoteType AMDGPUNoteTypes[] = {
+const NoteType AMDGPUNoteTypes[] = {
{ELF::NT_AMDGPU_METADATA, "NT_AMDGPU_METADATA (AMDGPU Metadata)"},
};
-static const NoteType CoreNoteTypes[] = {
+const NoteType LLVMOMPOFFLOADNoteTypes[] = {
+ {ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION,
+ "NT_LLVM_OPENMP_OFFLOAD_VERSION (image format version)"},
+ {ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER,
+ "NT_LLVM_OPENMP_OFFLOAD_PRODUCER (producing toolchain)"},
+ {ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION,
+ "NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION (producing toolchain version)"},
+};
+
+const NoteType CoreNoteTypes[] = {
{ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"},
{ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"},
{ELF::NT_PRPSINFO, "NT_PRPSINFO (prpsinfo structure)"},
@@ -5391,10 +5453,19 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) {
return FindNote(FreeBSDNoteTypes);
}
}
+ if (Name.startswith("OpenBSD") && ELFType == ELF::ET_CORE) {
+ // OpenBSD also places the generic core notes in the OpenBSD namespace.
+ StringRef Result = FindNote(OpenBSDCoreNoteTypes);
+ if (!Result.empty())
+ return Result;
+ return FindNote(CoreNoteTypes);
+ }
if (Name == "AMD")
return FindNote(AMDNoteTypes);
if (Name == "AMDGPU")
return FindNote(AMDGPUNoteTypes);
+ if (Name == "LLVMOMPOFFLOAD")
+ return FindNote(LLVMOMPOFFLOADNoteTypes);
if (ELFType == ELF::ET_CORE)
return FindNote(CoreNoteTypes);
@@ -5530,6 +5601,9 @@ template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
OS << " " << N.Type << ":\n " << N.Value << '\n';
return Error::success();
}
+ } else if (Name == "LLVMOMPOFFLOAD") {
+ if (printLLVMOMPOFFLOADNote<ELFT>(OS, Type, Descriptor))
+ return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
DataExtractor DescExtractor(Descriptor,
@@ -6532,6 +6606,10 @@ void LLVMELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
SymOtherFlags.insert(SymOtherFlags.end(),
std::begin(ElfAArch64SymOtherFlags),
std::end(ElfAArch64SymOtherFlags));
+ } else if (this->Obj.getHeader().e_machine == EM_RISCV) {
+ SymOtherFlags.insert(SymOtherFlags.end(),
+ std::begin(ElfRISCVSymOtherFlags),
+ std::end(ElfRISCVSymOtherFlags));
}
W.printFlags("Other", Symbol.st_other, makeArrayRef(SymOtherFlags), 0x3u);
}
@@ -6650,7 +6728,7 @@ void LLVMELFDumper<ELFT>::printVersionSymbolSection(const Elf_Shdr *Sec) {
}
}
-static const EnumEntry<unsigned> SymVersionFlags[] = {
+const EnumEntry<unsigned> SymVersionFlags[] = {
{"Base", "BASE", VER_FLG_BASE},
{"Weak", "WEAK", VER_FLG_WEAK},
{"Info", "INFO", VER_FLG_INFO}};
@@ -6818,14 +6896,14 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
FunctionSec =
unwrapOrError(this->FileName, this->Obj.getSection(Sec.sh_link));
ListScope L(W, "BBAddrMap");
- Expected<std::vector<Elf_BBAddrMap>> BBAddrMapOrErr =
+ Expected<std::vector<BBAddrMap>> BBAddrMapOrErr =
this->Obj.decodeBBAddrMap(Sec);
if (!BBAddrMapOrErr) {
this->reportUniqueWarning("unable to dump " + this->describe(Sec) + ": " +
toString(BBAddrMapOrErr.takeError()));
continue;
}
- for (const Elf_BBAddrMap &AM : *BBAddrMapOrErr) {
+ for (const BBAddrMap &AM : *BBAddrMapOrErr) {
DictScope D(W, "Function");
W.printHex("At", AM.Addr);
SmallVector<uint32_t> FuncSymIndex =
@@ -6840,7 +6918,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
W.printString("Name", FuncName);
ListScope L(W, "BB entries");
- for (const typename Elf_BBAddrMap::BBEntry &BBE : AM.BBEntries) {
+ for (const BBAddrMap::BBEntry &BBE : AM.BBEntries) {
DictScope L(W);
W.printHex("Offset", BBE.Offset);
W.printHex("Size", BBE.Size);
@@ -6892,7 +6970,7 @@ static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
break;
}
case ELF::NT_GNU_GOLD_VERSION:
- W.printString("Version", getGNUGoldVersion(Desc));
+ W.printString("Version", getDescAsStringRef(Desc));
break;
case ELF::NT_GNU_PROPERTY_TYPE_0:
ListScope D(W, "Property");
@@ -6903,6 +6981,26 @@ static bool printGNUNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
return true;
}
+template <typename ELFT>
+static bool printLLVMOMPOFFLOADNoteLLVMStyle(uint32_t NoteType,
+ ArrayRef<uint8_t> Desc,
+ ScopedPrinter &W) {
+ switch (NoteType) {
+ default:
+ return false;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_VERSION:
+ W.printString("Version", getDescAsStringRef(Desc));
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER:
+ W.printString("Producer", getDescAsStringRef(Desc));
+ break;
+ case ELF::NT_LLVM_OPENMP_OFFLOAD_PRODUCER_VERSION:
+ W.printString("Producer version", getDescAsStringRef(Desc));
+ break;
+ }
+ return true;
+}
+
static void printCoreNoteLLVMStyle(const CoreNote &Note, ScopedPrinter &W) {
W.printNumber("Page Size", Note.PageSize);
for (const CoreFileMapping &Mapping : Note.Mappings) {
@@ -6970,6 +7068,9 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
W.printString(N.Type, N.Value);
return Error::success();
}
+ } else if (Name == "LLVMOMPOFFLOAD") {
+ if (printLLVMOMPOFFLOADNoteLLVMStyle<ELFT>(Type, Descriptor, W))
+ return Error::success();
} else if (Name == "CORE") {
if (Type == ELF::NT_FILE) {
DataExtractor DescExtractor(Descriptor,
diff --git a/llvm/tools/llvm-readobj/MachODumper.cpp b/llvm/tools/llvm-readobj/MachODumper.cpp
index 433ca9335324..945b16b8db86 100644
--- a/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -74,7 +74,7 @@ std::unique_ptr<ObjDumper> createMachODumper(const object::MachOObjectFile &Obj,
} // namespace llvm
-static const EnumEntry<uint32_t> MachOMagics[] = {
+const EnumEntry<uint32_t> MachOMagics[] = {
{ "Magic", MachO::MH_MAGIC },
{ "Cigam", MachO::MH_CIGAM },
{ "Magic64", MachO::MH_MAGIC_64 },
@@ -83,7 +83,7 @@ static const EnumEntry<uint32_t> MachOMagics[] = {
{ "FatCigam", MachO::FAT_CIGAM },
};
-static const EnumEntry<uint32_t> MachOHeaderFileTypes[] = {
+const EnumEntry<uint32_t> MachOHeaderFileTypes[] = {
{ "Relocatable", MachO::MH_OBJECT },
{ "Executable", MachO::MH_EXECUTE },
{ "FixedVMLibrary", MachO::MH_FVMLIB },
@@ -97,7 +97,7 @@ static const EnumEntry<uint32_t> MachOHeaderFileTypes[] = {
{ "KextBundle", MachO::MH_KEXT_BUNDLE },
};
-static const EnumEntry<uint32_t> MachOHeaderCpuTypes[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuTypes[] = {
{ "Any" , static_cast<uint32_t>(MachO::CPU_TYPE_ANY) },
{ "X86" , MachO::CPU_TYPE_X86 },
{ "X86-64" , MachO::CPU_TYPE_X86_64 },
@@ -109,7 +109,7 @@ static const EnumEntry<uint32_t> MachOHeaderCpuTypes[] = {
{ "PowerPC64" , MachO::CPU_TYPE_POWERPC64 },
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX86[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX86[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_I386_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_386),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_486),
@@ -132,13 +132,13 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX86[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_XEON_MP),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX64[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesX64[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_X86_64_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_X86_ARCH1),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_X86_64_H),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_V4T),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_V6),
@@ -153,17 +153,17 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM_V7EM),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM64[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesARM64[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64_V8),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_ARM64E),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesSPARC[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesSPARC[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_SPARC_ALL),
};
-static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesPPC[] = {
+const EnumEntry<uint32_t> MachOHeaderCpuSubtypesPPC[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_ALL),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_601),
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_602),
@@ -179,7 +179,7 @@ static const EnumEntry<uint32_t> MachOHeaderCpuSubtypesPPC[] = {
LLVM_READOBJ_ENUM_ENT(MachO, CPU_SUBTYPE_POWERPC_970),
};
-static const EnumEntry<uint32_t> MachOHeaderFlags[] = {
+const EnumEntry<uint32_t> MachOHeaderFlags[] = {
LLVM_READOBJ_ENUM_ENT(MachO, MH_NOUNDEFS),
LLVM_READOBJ_ENUM_ENT(MachO, MH_INCRLINK),
LLVM_READOBJ_ENUM_ENT(MachO, MH_DYLDLINK),
@@ -208,7 +208,7 @@ static const EnumEntry<uint32_t> MachOHeaderFlags[] = {
LLVM_READOBJ_ENUM_ENT(MachO, MH_APP_EXTENSION_SAFE),
};
-static const EnumEntry<unsigned> MachOSectionTypes[] = {
+const EnumEntry<unsigned> MachOSectionTypes[] = {
{ "Regular" , MachO::S_REGULAR },
{ "ZeroFill" , MachO::S_ZEROFILL },
{ "CStringLiterals" , MachO::S_CSTRING_LITERALS },
@@ -233,7 +233,7 @@ static const EnumEntry<unsigned> MachOSectionTypes[] = {
{ "ThreadLocalInitFunctionPointers", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS }
};
-static const EnumEntry<unsigned> MachOSectionAttributes[] = {
+const EnumEntry<unsigned> MachOSectionAttributes[] = {
{ "LocReloc" , 1 << 0 /*S_ATTR_LOC_RELOC */ },
{ "ExtReloc" , 1 << 1 /*S_ATTR_EXT_RELOC */ },
{ "SomeInstructions" , 1 << 2 /*S_ATTR_SOME_INSTRUCTIONS */ },
@@ -246,7 +246,7 @@ static const EnumEntry<unsigned> MachOSectionAttributes[] = {
{ "PureInstructions" , 1 << 23 /*S_ATTR_PURE_INSTRUCTIONS */ },
};
-static const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
+const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
{ "UndefinedNonLazy", 0 },
{ "ReferenceFlagUndefinedLazy", 1 },
{ "ReferenceFlagDefined", 2 },
@@ -255,7 +255,7 @@ static const EnumEntry<unsigned> MachOSymbolRefTypes[] = {
{ "ReferenceFlagPrivateUndefinedLazy", 5 }
};
-static const EnumEntry<unsigned> MachOSymbolFlags[] = {
+const EnumEntry<unsigned> MachOSymbolFlags[] = {
{ "ThumbDef", 0x8 },
{ "ReferencedDynamically", 0x10 },
{ "NoDeadStrip", 0x20 },
@@ -266,7 +266,7 @@ static const EnumEntry<unsigned> MachOSymbolFlags[] = {
{ "ColdFunc", 0x400 },
};
-static const EnumEntry<unsigned> MachOSymbolTypes[] = {
+const EnumEntry<unsigned> MachOSymbolTypes[] = {
{ "Undef", 0x0 },
{ "Abs", 0x2 },
{ "Indirect", 0xA },
diff --git a/llvm/tools/llvm-readobj/ObjDumper.cpp b/llvm/tools/llvm-readobj/ObjDumper.cpp
index 87c229356e20..dc4a3031f914 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.cpp
+++ b/llvm/tools/llvm-readobj/ObjDumper.cpp
@@ -52,9 +52,23 @@ static void printAsPrintable(raw_ostream &W, const uint8_t *Start, size_t Len) {
W << (isPrint(Start[i]) ? static_cast<char>(Start[i]) : '.');
}
-void ObjDumper::printAsStringList(StringRef StringContent) {
+void ObjDumper::printAsStringList(StringRef StringContent,
+ size_t StringDataOffset) {
+ size_t StrSize = StringContent.size();
+ if (StrSize == 0)
+ return;
+ if (StrSize < StringDataOffset) {
+ reportUniqueWarning("offset (0x" + Twine::utohexstr(StringDataOffset) +
+ ") is past the end of the contents (size 0x" +
+ Twine::utohexstr(StrSize) + ")");
+ return;
+ }
+
const uint8_t *StrContent = StringContent.bytes_begin();
- const uint8_t *CurrentWord = StrContent;
+ // Some formats contain additional metadata at the start which should not be
+ // interpreted as strings. Skip these bytes, but account for them in the
+ // string offsets.
+ const uint8_t *CurrentWord = StrContent + StringDataOffset;
const uint8_t *StrEnd = StringContent.bytes_end();
while (CurrentWord <= StrEnd) {
diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h
index 7e1c0ca35127..b395a95f3cb4 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/llvm/tools/llvm-readobj/ObjDumper.h
@@ -97,6 +97,9 @@ public:
llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes,
bool GHash) {}
+ // Only implement for XCOFF
+ virtual void printAuxiliaryHeader() {}
+
// Only implemented for MachO.
virtual void printMachODataInCode() { }
virtual void printMachOVersionMin() { }
@@ -110,7 +113,7 @@ public:
virtual void printStackMap() const = 0;
- void printAsStringList(StringRef StringContent);
+ void printAsStringList(StringRef StringContent, size_t StringDataOffset = 0);
void printSectionsAsString(const object::ObjectFile &Obj,
ArrayRef<std::string> Sections);
diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td
index 493b93769eb4..7723691e8225 100644
--- a/llvm/tools/llvm-readobj/Opts.td
+++ b/llvm/tools/llvm-readobj/Opts.td
@@ -29,14 +29,14 @@ def file_header : FF<"file-header", "Display file header">;
def headers : FF<"headers", "Equivalent to setting: --file-header, --program-headers, --section-headers">;
defm hex_dump : Eq<"hex-dump", "Display the specified section(s) as hexadecimal bytes">, MetaVarName<"<name or index>">;
def relocs : FF<"relocs", "Display the relocation entries in the file">;
-def section_data : FF<"section-data", "Display section data for each section shown">;
+def section_data : FF<"section-data", "Display section data for each section shown. This option has no effect for GNU style output">;
def section_details : FF<"section-details", "Display the section details">;
def section_headers : FF<"section-headers", "Display section headers">;
def section_mapping : FF<"section-mapping", "Display the section to segment mapping">;
def section_mapping_EQ_false : FF<"section-mapping=false", "Don't display the section to segment mapping">, Flags<[HelpHidden]>;
-def section_relocations : FF<"section-relocations", "Display relocations for each section shown">;
-def section_symbols : FF<"section-symbols", "Display symbols for each section shown">;
-def stack_sizes : FF<"stack-sizes", "Display contents of all stack sizes sections">;
+def section_relocations : FF<"section-relocations", "Display relocations for each section shown. This option has no effect for GNU style output">;
+def section_symbols : FF<"section-symbols", "Display symbols for each section shown. This option has no effect for GNU style output">;
+def stack_sizes : FF<"stack-sizes", "Display contents of all stack sizes sections. This option has no effect for GNU style output">;
def stackmap : FF<"stackmap", "Display contents of stackmap section">;
defm string_dump : Eq<"string-dump", "Display the specified section(s) as a list of strings">, MetaVarName<"<name or index>">;
def string_table : FF<"string-table", "Display the string table (only for XCOFF now)">;
@@ -47,10 +47,10 @@ def unwind : FF<"unwind", "Display unwind information">;
def grp_elf : OptionGroup<"kind">, HelpText<"OPTIONS (ELF specific)">;
def dynamic_table : FF<"dynamic-table", "Display the dynamic section table">, Group<grp_elf>;
def elf_linker_options : FF<"elf-linker-options", "Display the .linker-options section">, Group<grp_elf>;
-defm elf_output_style : Eq<"elf-output-style", "Specify ELF dump style">, Group<grp_elf>;
+defm elf_output_style : Eq<"elf-output-style", "Specify ELF dump style: LLVM or GNU">, Group<grp_elf>;
def histogram : FF<"histogram", "Display bucket list histogram for hash sections">, Group<grp_elf>;
def section_groups : FF<"section-groups", "Display section groups">, Group<grp_elf>;
-def gnu_hash_table : FF<"gnu-hash-table", "Display .gnu.hash section">, Group<grp_elf>;
+def gnu_hash_table : FF<"gnu-hash-table", "Display the GNU hash table for dynamic symbols">, Group<grp_elf>;
def hash_symbols : FF<"hash-symbols", "Display the dynamic symbols derived from the hash section">, Group<grp_elf>;
def hash_table : FF<"hash-table", "Display .hash section">, Group<grp_elf>;
def needed_libs : FF<"needed-libs", "Display the needed libraries">, Group<grp_elf>;
@@ -83,12 +83,16 @@ def coff_load_config : FF<"coff-load-config", "Display load config">, Group<grp_
def coff_resources : FF<"coff-resources", "Display .rsrc section">, Group<grp_coff>;
def coff_tls_directory : FF<"coff-tls-directory", "Display TLS directory">, Group<grp_coff>;
+// XCOFF specific options.
+def grp_xcoff : OptionGroup<"kind">, HelpText<"OPTIONS (XCOFF specific)">;
+def auxiliary_header : FF<"auxiliary-header" , "display the auxiliary header">, Group<grp_xcoff>;
+
def help : FF<"help", "Display this help">;
def version : FF<"version", "Display the version">;
// Ignored for GNU readelf compatibility.
-def : F<"W", "Ignored for GNU readelf compatibility">;
-def : FF<"wide", "Ignored for GNU readelf compatibility">;
+def wide : FF<"wide", "Ignored for GNU readelf compatibility">;
+def : F<"W", "Ignored for GNU readelf compatibility">, Alias<wide>;
// Traditional llvm-readobj Aliases.
def : Flag<["--"], "dt">, Alias<dyn_syms>, HelpText<"Alias for --dyn-syms">;
diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp
index f7dcaa35656f..d76332d1ba36 100644
--- a/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -20,7 +20,7 @@ using namespace object;
namespace {
-static const EnumEntry<unsigned> WasmSymbolTypes[] = {
+const EnumEntry<unsigned> WasmSymbolTypes[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SYMBOL_TYPE_##X }
ENUM_ENTRY(FUNCTION), ENUM_ENTRY(DATA), ENUM_ENTRY(GLOBAL),
@@ -28,7 +28,7 @@ static const EnumEntry<unsigned> WasmSymbolTypes[] = {
#undef ENUM_ENTRY
};
-static const EnumEntry<uint32_t> WasmSectionTypes[] = {
+const EnumEntry<uint32_t> WasmSectionTypes[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SEC_##X }
ENUM_ENTRY(CUSTOM), ENUM_ENTRY(TYPE), ENUM_ENTRY(IMPORT),
@@ -39,7 +39,7 @@ static const EnumEntry<uint32_t> WasmSectionTypes[] = {
#undef ENUM_ENTRY
};
-static const EnumEntry<unsigned> WasmSymbolFlags[] = {
+const EnumEntry<unsigned> WasmSymbolFlags[] = {
#define ENUM_ENTRY(X) \
{ #X, wasm::WASM_SYMBOL_##X }
ENUM_ENTRY(BINDING_GLOBAL),
diff --git a/llvm/tools/llvm-readobj/Win64EHDumper.cpp b/llvm/tools/llvm-readobj/Win64EHDumper.cpp
index 7e84c1bca35d..da964d3132e7 100644
--- a/llvm/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/llvm/tools/llvm-readobj/Win64EHDumper.cpp
@@ -16,13 +16,13 @@ using namespace llvm;
using namespace llvm::object;
using namespace llvm::Win64EH;
-static const EnumEntry<unsigned> UnwindFlags[] = {
+const EnumEntry<unsigned> UnwindFlags[] = {
{ "ExceptionHandler", UNW_ExceptionHandler },
{ "TerminateHandler", UNW_TerminateHandler },
{ "ChainInfo" , UNW_ChainInfo }
};
-static const EnumEntry<unsigned> UnwindOpInfo[] = {
+const EnumEntry<unsigned> UnwindOpInfo[] = {
{ "RAX", 0 },
{ "RCX", 1 },
{ "RDX", 2 },
@@ -125,14 +125,52 @@ static std::error_code getSymbol(const COFFObjectFile &COFF, uint64_t VA,
return inconvertibleErrorCode();
}
+static object::SymbolRef getPreferredSymbol(const COFFObjectFile &COFF,
+ object::SymbolRef Sym,
+ uint32_t &SymbolOffset,
+ bool IsRangeEnd) {
+ // The symbol resolved by ResolveSymbol can be any internal
+ // nondescriptive symbol; try to resolve a more descriptive one.
+ COFFSymbolRef CoffSym = COFF.getCOFFSymbol(Sym);
+ if (CoffSym.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CoffSym.getSectionDefinition() == nullptr)
+ return Sym;
+ for (const auto &S : COFF.symbols()) {
+ COFFSymbolRef CS = COFF.getCOFFSymbol(S);
+ if (CS.getSectionNumber() == CoffSym.getSectionNumber() &&
+ CS.getValue() <= CoffSym.getValue() + SymbolOffset &&
+ CS.getStorageClass() != COFF::IMAGE_SYM_CLASS_LABEL &&
+ CS.getSectionDefinition() == nullptr) {
+ uint32_t Offset = CoffSym.getValue() + SymbolOffset - CS.getValue();
+ // For the end of a range, don't pick a symbol with a zero offset;
+ // prefer a symbol with a small positive offset.
+ if (Offset <= SymbolOffset && (!IsRangeEnd || Offset > 0)) {
+ SymbolOffset = Offset;
+ Sym = S;
+ CoffSym = CS;
+ if (CS.isExternal() && SymbolOffset == 0)
+ return Sym;
+ }
+ }
+ }
+ return Sym;
+}
+
static std::string formatSymbol(const Dumper::Context &Ctx,
const coff_section *Section, uint64_t Offset,
- uint32_t Displacement) {
+ uint32_t Displacement,
+ bool IsRangeEnd = false) {
std::string Buffer;
raw_string_ostream OS(Buffer);
SymbolRef Symbol;
if (!Ctx.ResolveSymbol(Section, Offset, Symbol, Ctx.UserData)) {
+ // We found a relocation at the given offset in the section, pointing
+ // at a symbol.
+
+ // Try to resolve label/section symbols into function names.
+ Symbol = getPreferredSymbol(Ctx.COFF, Symbol, Displacement, IsRangeEnd);
+
Expected<StringRef> Name = Symbol.getName();
if (Name) {
OS << *Name;
@@ -207,7 +245,8 @@ void Dumper::printRuntimeFunctionEntry(const Context &Ctx,
SW.printString("StartAddress",
formatSymbol(Ctx, Section, Offset + 0, RF.StartAddress));
SW.printString("EndAddress",
- formatSymbol(Ctx, Section, Offset + 4, RF.EndAddress));
+ formatSymbol(Ctx, Section, Offset + 4, RF.EndAddress,
+ /*IsRangeEnd=*/true));
SW.printString("UnwindInfoAddress",
formatSymbol(Ctx, Section, Offset + 8, RF.UnwindInfoOffset));
}
diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index 94ef96e447ce..38e459cd5425 100644
--- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -13,8 +13,11 @@
#include "ObjDumper.h"
#include "llvm-readobj.h"
#include "llvm/Object/XCOFFObjectFile.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/ScopedPrinter.h"
+#include <stddef.h>
+
using namespace llvm;
using namespace object;
@@ -27,6 +30,7 @@ public:
: ObjDumper(Writer, Obj.getFileName()), Obj(Obj) {}
void printFileHeaders() override;
+ void printAuxiliaryHeader() override;
void printSectionHeaders() override;
void printRelocations() override;
void printSymbols() override;
@@ -44,7 +48,11 @@ private:
void printCsectAuxEnt(XCOFFCsectAuxRef AuxEntRef);
void printSectAuxEntForStat(const XCOFFSectAuxEntForStat *AuxEntPtr);
void printSymbol(const SymbolRef &);
- void printRelocations(ArrayRef<XCOFFSectionHeader32> Sections);
+ template <typename RelTy> void printRelocation(RelTy Reloc);
+ template <typename Shdr, typename RelTy>
+ void printRelocations(ArrayRef<Shdr> Sections);
+ void printAuxiliaryHeader(const XCOFFAuxiliaryHeader32 *AuxHeader);
+ void printAuxiliaryHeader(const XCOFFAuxiliaryHeader64 *AuxHeader);
const XCOFFObjectFile &Obj;
};
} // anonymous namespace
@@ -96,6 +104,13 @@ void XCOFFDumper::printFileHeaders() {
// XCOFFObjectFile has the necessary support.
}
+void XCOFFDumper::printAuxiliaryHeader() {
+ if (Obj.is64Bit())
+ printAuxiliaryHeader(Obj.auxiliaryHeader64());
+ else
+ printAuxiliaryHeader(Obj.auxiliaryHeader32());
+}
+
void XCOFFDumper::printSectionHeaders() {
if (Obj.is64Bit())
printSectionHeaders(Obj.sections64());
@@ -105,12 +120,12 @@ void XCOFFDumper::printSectionHeaders() {
void XCOFFDumper::printRelocations() {
if (Obj.is64Bit())
- llvm_unreachable("64-bit relocation output not implemented!");
+ printRelocations<XCOFFSectionHeader64, XCOFFRelocation64>(Obj.sections64());
else
- printRelocations(Obj.sections32());
+ printRelocations<XCOFFSectionHeader32, XCOFFRelocation32>(Obj.sections32());
}
-static const EnumEntry<XCOFF::RelocationType> RelocationTypeNameclass[] = {
+const EnumEntry<XCOFF::RelocationType> RelocationTypeNameclass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(R_POS), ECase(R_RL), ECase(R_RLA), ECase(R_NEG),
@@ -122,50 +137,71 @@ static const EnumEntry<XCOFF::RelocationType> RelocationTypeNameclass[] = {
#undef ECase
};
-void XCOFFDumper::printRelocations(ArrayRef<XCOFFSectionHeader32> Sections) {
- if (!opts::ExpandRelocs)
- report_fatal_error("Unexpanded relocation output not implemented.");
+template <typename RelTy> void XCOFFDumper::printRelocation(RelTy Reloc) {
+ Expected<StringRef> ErrOrSymbolName =
+ Obj.getSymbolNameByIndex(Reloc.SymbolIndex);
+ if (Error E = ErrOrSymbolName.takeError()) {
+ reportUniqueWarning(std::move(E));
+ return;
+ }
+ StringRef SymbolName = *ErrOrSymbolName;
+ StringRef RelocName = XCOFF::getRelocationTypeString(Reloc.Type);
+ if (opts::ExpandRelocs) {
+ DictScope Group(W, "Relocation");
+ W.printHex("Virtual Address", Reloc.VirtualAddress);
+ W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex);
+ W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No");
+ W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0);
+ W.printNumber("Length", Reloc.getRelocatedLength());
+ W.printEnum("Type", (uint8_t)Reloc.Type,
+ makeArrayRef(RelocationTypeNameclass));
+ } else {
+ raw_ostream &OS = W.startLine();
+ OS << W.hex(Reloc.VirtualAddress) << " " << RelocName << " " << SymbolName
+ << "(" << Reloc.SymbolIndex << ") " << W.hex(Reloc.Info) << "\n";
+ }
+}
+template <typename Shdr, typename RelTy>
+void XCOFFDumper::printRelocations(ArrayRef<Shdr> Sections) {
ListScope LS(W, "Relocations");
uint16_t Index = 0;
- for (const auto &Sec : Sections) {
+ for (const Shdr &Sec : Sections) {
++Index;
// Only the .text, .data, .tdata, and STYP_DWARF sections have relocation.
if (Sec.Flags != XCOFF::STYP_TEXT && Sec.Flags != XCOFF::STYP_DATA &&
Sec.Flags != XCOFF::STYP_TDATA && Sec.Flags != XCOFF::STYP_DWARF)
continue;
- auto Relocations = unwrapOrError(Obj.getFileName(), Obj.relocations(Sec));
+ Expected<ArrayRef<RelTy>> ErrOrRelocations = Obj.relocations<Shdr, RelTy>(Sec);
+ if (Error E = ErrOrRelocations.takeError()) {
+ reportUniqueWarning(std::move(E));
+ continue;
+ }
+
+ const ArrayRef<RelTy> Relocations = *ErrOrRelocations;
if (Relocations.empty())
continue;
W.startLine() << "Section (index: " << Index << ") " << Sec.getName()
<< " {\n";
- for (auto Reloc : Relocations) {
- StringRef SymbolName = unwrapOrError(
- Obj.getFileName(), Obj.getSymbolNameByIndex(Reloc.SymbolIndex));
-
- DictScope RelocScope(W, "Relocation");
- W.printHex("Virtual Address", Reloc.VirtualAddress);
- W.printNumber("Symbol", SymbolName, Reloc.SymbolIndex);
- W.printString("IsSigned", Reloc.isRelocationSigned() ? "Yes" : "No");
- W.printNumber("FixupBitValue", Reloc.isFixupIndicated() ? 1 : 0);
- W.printNumber("Length", Reloc.getRelocatedLength());
- W.printEnum("Type", (uint8_t)Reloc.Type,
- makeArrayRef(RelocationTypeNameclass));
- }
+ W.indent();
+
+ for (const RelTy Reloc : Relocations)
+ printRelocation(Reloc);
+
W.unindent();
W.startLine() << "}\n";
}
}
-static const EnumEntry<XCOFF::CFileStringType> FileStringType[] = {
+const EnumEntry<XCOFF::CFileStringType> FileStringType[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(XFT_FN), ECase(XFT_CT), ECase(XFT_CV), ECase(XFT_CD)
#undef ECase
};
-static const EnumEntry<XCOFF::SymbolAuxType> SymAuxType[] = {
+const EnumEntry<XCOFF::SymbolAuxType> SymAuxType[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(AUX_EXCEPT), ECase(AUX_FCN), ECase(AUX_SYM), ECase(AUX_FILE),
@@ -203,7 +239,7 @@ static const EnumEntry<XCOFF::StorageMappingClass> CsectStorageMappingClass[] =
#undef ECase
};
-static const EnumEntry<XCOFF::SymbolType> CsectSymbolTypeClass[] = {
+const EnumEntry<XCOFF::SymbolType> CsectSymbolTypeClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(XTY_ER), ECase(XTY_SD), ECase(XTY_LD), ECase(XTY_CM)
@@ -253,7 +289,7 @@ void XCOFFDumper::printSectAuxEntForStat(
W.printNumber("NumberOfLineNum", AuxEntPtr->NumberOfLineNum);
}
-static const EnumEntry<XCOFF::StorageClass> SymStorageClass[] = {
+const EnumEntry<XCOFF::StorageClass> SymStorageClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(C_NULL), ECase(C_AUTO), ECase(C_EXT), ECase(C_STAT),
@@ -302,14 +338,14 @@ static StringRef GetSymbolValueName(XCOFF::StorageClass SC) {
}
}
-static const EnumEntry<XCOFF::CFileLangId> CFileLangIdClass[] = {
+const EnumEntry<XCOFF::CFileLangId> CFileLangIdClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(TB_C), ECase(TB_CPLUSPLUS)
#undef ECase
};
-static const EnumEntry<XCOFF::CFileCpuId> CFileCpuIdClass[] = {
+const EnumEntry<XCOFF::CFileCpuId> CFileCpuIdClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(TCPU_PPC64), ECase(TCPU_COM), ECase(TCPU_970)
@@ -460,7 +496,12 @@ void XCOFFDumper::printSymbols() {
void XCOFFDumper::printStringTable() {
DictScope DS(W, "StringTable");
StringRef StrTable = Obj.getStringTable();
- printAsStringList(StrTable);
+ uint32_t StrTabSize = StrTable.size();
+ W.printNumber("Length", StrTabSize);
+ // Print strings from the fifth byte, since the first four bytes contain the
+ // length (in bytes) of the string table (including the length field).
+ if (StrTabSize > 4)
+ printAsStringList(StrTable, 4);
}
void XCOFFDumper::printDynamicSymbols() {
@@ -476,10 +517,46 @@ void XCOFFDumper::printStackMap() const {
}
void XCOFFDumper::printNeededLibraries() {
- llvm_unreachable("Unimplemented functionality for XCOFFDumper");
+ ListScope D(W, "NeededLibraries");
+ auto ImportFilesOrError = Obj.getImportFileTable();
+ if (!ImportFilesOrError) {
+ reportUniqueWarning(ImportFilesOrError.takeError());
+ return;
+ }
+
+ StringRef ImportFileTable = ImportFilesOrError.get();
+ const char *CurrentStr = ImportFileTable.data();
+ const char *TableEnd = ImportFileTable.end();
+ // Default column width for names is 13 even if no names are that long.
+ size_t BaseWidth = 13;
+
+ // Get the max width of BASE columns.
+ for (size_t StrIndex = 0; CurrentStr < TableEnd; ++StrIndex) {
+ size_t CurrentLen = strlen(CurrentStr);
+ CurrentStr += strlen(CurrentStr) + 1;
+ if (StrIndex % 3 == 1)
+ BaseWidth = std::max(BaseWidth, CurrentLen);
+ }
+
+ auto &OS = static_cast<formatted_raw_ostream &>(W.startLine());
+ // Each entry consists of 3 strings: the path_name, base_name and
+ // archive_member_name. The first entry is a default LIBPATH value and other
+ // entries have no path_name. We just dump the base_name and
+ // archive_member_name here.
+ OS << left_justify("BASE", BaseWidth) << " MEMBER\n";
+ CurrentStr = ImportFileTable.data();
+ for (size_t StrIndex = 0; CurrentStr < TableEnd;
+ ++StrIndex, CurrentStr += strlen(CurrentStr) + 1) {
+ if (StrIndex >= 3 && StrIndex % 3 != 0) {
+ if (StrIndex % 3 == 1)
+ OS << " " << left_justify(CurrentStr, BaseWidth) << " ";
+ else
+ OS << CurrentStr << "\n";
+ }
+ }
}
-static const EnumEntry<XCOFF::SectionTypeFlags> SectionTypeFlagsNames[] = {
+const EnumEntry<XCOFF::SectionTypeFlags> SectionTypeFlagsNames[] = {
#define ECase(X) \
{ #X, XCOFF::X }
ECase(STYP_PAD), ECase(STYP_DWARF), ECase(STYP_TEXT),
@@ -523,6 +600,176 @@ void XCOFFDumper::printGenericSectionHeader(T &Sec) const {
W.printNumber("NumberOfLineNumbers", Sec.NumberOfLineNumbers);
}
+void XCOFFDumper::printAuxiliaryHeader(
+ const XCOFFAuxiliaryHeader32 *AuxHeader) {
+ if (AuxHeader == nullptr)
+ return;
+ uint16_t AuxSize = Obj.getOptionalHeaderSize();
+ uint16_t PartialFieldOffset = AuxSize;
+ const char *PartialFieldName = nullptr;
+
+ DictScope DS(W, "AuxiliaryHeader");
+
+#define PrintAuxMember32(H, S, T) \
+ if (offsetof(XCOFFAuxiliaryHeader32, T) + \
+ sizeof(XCOFFAuxiliaryHeader32::T) <= \
+ AuxSize) \
+ W.print##H(S, AuxHeader->T); \
+ else if (offsetof(XCOFFAuxiliaryHeader32, T) < AuxSize) { \
+ PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader32, T); \
+ PartialFieldName = S; \
+ }
+
+ PrintAuxMember32(Hex, "Magic", AuxMagic);
+ PrintAuxMember32(Hex, "Version", Version);
+ PrintAuxMember32(Hex, "Size of .text section", TextSize);
+ PrintAuxMember32(Hex, "Size of .data section", InitDataSize);
+ PrintAuxMember32(Hex, "Size of .bss section", BssDataSize);
+ PrintAuxMember32(Hex, "Entry point address", EntryPointAddr);
+ PrintAuxMember32(Hex, ".text section start address", TextStartAddr);
+ PrintAuxMember32(Hex, ".data section start address", DataStartAddr);
+ PrintAuxMember32(Hex, "TOC anchor address", TOCAnchorAddr);
+ PrintAuxMember32(Number, "Section number of entryPoint", SecNumOfEntryPoint);
+ PrintAuxMember32(Number, "Section number of .text", SecNumOfText);
+ PrintAuxMember32(Number, "Section number of .data", SecNumOfData);
+ PrintAuxMember32(Number, "Section number of TOC", SecNumOfTOC);
+ PrintAuxMember32(Number, "Section number of loader data", SecNumOfLoader);
+ PrintAuxMember32(Number, "Section number of .bss", SecNumOfBSS);
+ PrintAuxMember32(Hex, "Maxium alignment of .text", MaxAlignOfText);
+ PrintAuxMember32(Hex, "Maxium alignment of .data", MaxAlignOfData);
+ PrintAuxMember32(Hex, "Module type", ModuleType);
+ PrintAuxMember32(Hex, "CPU type of objects", CpuFlag);
+ PrintAuxMember32(Hex, "(Reserved)", CpuType);
+ PrintAuxMember32(Hex, "Maximum stack size", MaxStackSize);
+ PrintAuxMember32(Hex, "Maximum data size", MaxDataSize);
+ PrintAuxMember32(Hex, "Reserved for debugger", ReservedForDebugger);
+ PrintAuxMember32(Hex, "Text page size", TextPageSize);
+ PrintAuxMember32(Hex, "Data page size", DataPageSize);
+ PrintAuxMember32(Hex, "Stack page size", StackPageSize);
+ if (offsetof(XCOFFAuxiliaryHeader32, FlagAndTDataAlignment) +
+ sizeof(XCOFFAuxiliaryHeader32::FlagAndTDataAlignment) <=
+ AuxSize) {
+ W.printHex("Flag", AuxHeader->getFlag());
+ W.printHex("Alignment of thread-local storage",
+ AuxHeader->getTDataAlignment());
+ }
+
+ PrintAuxMember32(Number, "Section number for .tdata", SecNumOfTData);
+ PrintAuxMember32(Number, "Section number for .tbss", SecNumOfTBSS);
+
+ // Deal with error.
+ if (PartialFieldOffset < AuxSize) {
+ std::string ErrInfo;
+ llvm::raw_string_ostream StringOS(ErrInfo);
+ StringOS << "Only partial field for " << PartialFieldName << " at offset ("
+ << PartialFieldOffset << ").";
+ StringOS.flush();
+ reportWarning(
+ make_error<GenericBinaryError>(ErrInfo, object_error::parse_failed),
+ "-");
+ W.printBinary(
+ "Raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) + PartialFieldOffset,
+ AuxSize - PartialFieldOffset));
+ } else if (sizeof(XCOFFAuxiliaryHeader32) < AuxSize) {
+ reportWarning(make_error<GenericBinaryError>(
+ "There are extra data beyond auxiliary header",
+ object_error::parse_failed),
+ "-");
+ W.printBinary("Extra raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) +
+ sizeof(XCOFFAuxiliaryHeader32),
+ AuxSize - sizeof(XCOFFAuxiliaryHeader32)));
+ }
+
+#undef PrintAuxMember32
+}
+
+void XCOFFDumper::printAuxiliaryHeader(
+ const XCOFFAuxiliaryHeader64 *AuxHeader) {
+ if (AuxHeader == nullptr)
+ return;
+ uint16_t AuxSize = Obj.getOptionalHeaderSize();
+ uint16_t PartialFieldOffset = AuxSize;
+ const char *PartialFieldName = nullptr;
+
+ DictScope DS(W, "AuxiliaryHeader");
+
+#define PrintAuxMember64(H, S, T) \
+ if (offsetof(XCOFFAuxiliaryHeader64, T) + \
+ sizeof(XCOFFAuxiliaryHeader64::T) <= \
+ AuxSize) \
+ W.print##H(S, AuxHeader->T); \
+ else if (offsetof(XCOFFAuxiliaryHeader64, T) < AuxSize) { \
+ PartialFieldOffset = offsetof(XCOFFAuxiliaryHeader64, T); \
+ PartialFieldName = S; \
+ }
+
+ PrintAuxMember64(Hex, "Magic", AuxMagic);
+ PrintAuxMember64(Hex, "Version", Version);
+ PrintAuxMember64(Hex, "Reserved for debugger", ReservedForDebugger);
+ PrintAuxMember64(Hex, ".text section start address", TextStartAddr);
+ PrintAuxMember64(Hex, ".data section start address", DataStartAddr);
+ PrintAuxMember64(Hex, "TOC anchor address", TOCAnchorAddr);
+ PrintAuxMember64(Number, "Section number of entryPoint", SecNumOfEntryPoint);
+ PrintAuxMember64(Number, "Section number of .text", SecNumOfText);
+ PrintAuxMember64(Number, "Section number of .data", SecNumOfData);
+ PrintAuxMember64(Number, "Section number of TOC", SecNumOfTOC);
+ PrintAuxMember64(Number, "Section number of loader data", SecNumOfLoader);
+ PrintAuxMember64(Number, "Section number of .bss", SecNumOfBSS);
+ PrintAuxMember64(Hex, "Maxium alignment of .text", MaxAlignOfText);
+ PrintAuxMember64(Hex, "Maxium alignment of .data", MaxAlignOfData);
+ PrintAuxMember64(Hex, "Module type", ModuleType);
+ PrintAuxMember64(Hex, "CPU type of objects", CpuFlag);
+ PrintAuxMember64(Hex, "(Reserved)", CpuType);
+ PrintAuxMember64(Hex, "Text page size", TextPageSize);
+ PrintAuxMember64(Hex, "Data page size", DataPageSize);
+ PrintAuxMember64(Hex, "Stack page size", StackPageSize);
+ if (offsetof(XCOFFAuxiliaryHeader64, FlagAndTDataAlignment) +
+ sizeof(XCOFFAuxiliaryHeader64::FlagAndTDataAlignment) <=
+ AuxSize) {
+ W.printHex("Flag", AuxHeader->getFlag());
+ W.printHex("Alignment of thread-local storage",
+ AuxHeader->getTDataAlignment());
+ }
+ PrintAuxMember64(Hex, "Size of .text section", TextSize);
+ PrintAuxMember64(Hex, "Size of .data section", InitDataSize);
+ PrintAuxMember64(Hex, "Size of .bss section", BssDataSize);
+ PrintAuxMember64(Hex, "Entry point address", EntryPointAddr);
+ PrintAuxMember64(Hex, "Maximum stack size", MaxStackSize);
+ PrintAuxMember64(Hex, "Maximum data size", MaxDataSize);
+ PrintAuxMember64(Number, "Section number for .tdata", SecNumOfTData);
+ PrintAuxMember64(Number, "Section number for .tbss", SecNumOfTBSS);
+ PrintAuxMember64(Hex, "Additional flags 64-bit XCOFF", XCOFF64Flag);
+
+ if (PartialFieldOffset < AuxSize) {
+ std::string ErrInfo;
+ llvm::raw_string_ostream StringOS(ErrInfo);
+ StringOS << "Only partial field for " << PartialFieldName << " at offset ("
+ << PartialFieldOffset << ").";
+ StringOS.flush();
+ reportWarning(
+ make_error<GenericBinaryError>(ErrInfo, object_error::parse_failed),
+ "-");
+ ;
+ W.printBinary(
+ "Raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) + PartialFieldOffset,
+ AuxSize - PartialFieldOffset));
+ } else if (sizeof(XCOFFAuxiliaryHeader64) < AuxSize) {
+ reportWarning(make_error<GenericBinaryError>(
+ "There are extra data beyond auxiliary header",
+ object_error::parse_failed),
+ "-");
+ W.printBinary("Extra raw data", "",
+ ArrayRef<uint8_t>((const uint8_t *)(AuxHeader) +
+ sizeof(XCOFFAuxiliaryHeader64),
+ AuxSize - sizeof(XCOFFAuxiliaryHeader64)));
+ }
+
+#undef PrintAuxMember64
+}
+
template <typename T>
void XCOFFDumper::printSectionHeaders(ArrayRef<T> Sections) {
ListScope Group(W, "Sections");
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index 0b49f03f4275..a598e2c28832 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -23,6 +23,7 @@
#include "WindowsResourceDumper.h"
#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h"
#include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/WithColor.h"
using namespace llvm;
@@ -65,7 +65,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
@@ -149,6 +149,9 @@ static bool COFFLoadConfig;
static bool COFFResources;
static bool COFFTLSDirectory;
+// XCOFF specific options.
+static bool XCOFFAuxiliaryHeader;
+
OutputStyleTy Output = OutputStyleTy::LLVM;
static std::vector<std::string> InputFilenames;
} // namespace opts
@@ -157,7 +160,7 @@ static StringRef ToolName;
namespace llvm {
-LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) {
+[[noreturn]] static void error(Twine Msg) {
// Flush the standard output to print the error at a
// proper place.
fouts().flush();
@@ -165,7 +168,7 @@ LLVM_ATTRIBUTE_NORETURN static void error(Twine Msg) {
exit(1);
}
-LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input) {
+[[noreturn]] void reportError(Error Err, StringRef Input) {
assert(Err);
if (Input == "-")
Input = "<stdin>";
@@ -268,6 +271,9 @@ static void parseOptions(const opt::InputArgList &Args) {
opts::COFFResources = Args.hasArg(OPT_coff_resources);
opts::COFFTLSDirectory = Args.hasArg(OPT_coff_tls_directory);
+ // XCOFF specific options.
+ opts::XCOFFAuxiliaryHeader = Args.hasArg(OPT_auxiliary_header);
+
opts::InputFilenames = Args.getAllArgValues(OPT_INPUT);
}
@@ -343,6 +349,9 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
if (opts::FileHeaders)
Dumper->printFileHeaders();
+ if (Obj.isXCOFF() && opts::XCOFFAuxiliaryHeader)
+ Dumper->printAuxiliaryHeader();
+
// This is only used for ELF currently. In some cases, when an object is
// corrupt (e.g. truncated), we can't dump anything except the file header.
if (!ContentErrString.empty())
@@ -577,6 +586,7 @@ int main(int argc, char *argv[]) {
if (opts::All) {
opts::FileHeaders = true;
+ opts::XCOFFAuxiliaryHeader = true;
opts::ProgramHeaders = true;
opts::SectionHeaders = true;
opts::Symbols = true;
@@ -595,6 +605,7 @@ int main(int argc, char *argv[]) {
if (opts::Headers) {
opts::FileHeaders = true;
+ opts::XCOFFAuxiliaryHeader = true;
opts::ProgramHeaders = true;
opts::SectionHeaders = true;
}
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.h b/llvm/tools/llvm-readobj/llvm-readobj.h
index 43d19b4d3f5c..7672da5c0aae 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -21,7 +21,7 @@ namespace llvm {
}
// Various helper functions.
- LLVM_ATTRIBUTE_NORETURN void reportError(Error Err, StringRef Input);
+ [[noreturn]] void reportError(Error Err, StringRef Input);
void reportWarning(Error Err, StringRef Input);
template <class T> T unwrapOrError(StringRef Input, Expected<T> EO) {
diff --git a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index f02d8981b30e..21339a3f8f3d 100644
--- a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/DynamicLibrary.h"
@@ -33,7 +34,6 @@
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -206,6 +206,9 @@ public:
uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
unsigned SectionID, StringRef SectionName,
bool IsReadOnly) override;
+ TrivialMemoryManager::TLSSection
+ allocateTLSSection(uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName) override;
/// If non null, records subsequent Name -> SectionID mappings.
void setSectionIDsMap(SectionIDMap *SecIDMap) {
@@ -252,7 +255,8 @@ public:
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
- report_fatal_error("Can't allocate enough memory: " + EC.message());
+ report_fatal_error(Twine("Can't allocate enough memory: ") +
+ EC.message());
PreallocSlab = MB;
UsePreallocation = true;
@@ -282,6 +286,9 @@ private:
uintptr_t SlabSize = 0;
uintptr_t CurrentSlabOffset = 0;
SectionIDMap *SecIDMap = nullptr;
+#if defined(__x86_64__) && defined(__ELF__)
+ unsigned UsedTLSStorage = 0;
+#endif
};
uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
@@ -306,7 +313,8 @@ uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
- report_fatal_error("MemoryManager allocation failed: " + EC.message());
+ report_fatal_error(Twine("MemoryManager allocation failed: ") +
+ EC.message());
FunctionMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
@@ -334,11 +342,52 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
sys::Memory::MF_WRITE,
EC);
if (!MB.base())
- report_fatal_error("MemoryManager allocation failed: " + EC.message());
+ report_fatal_error(Twine("MemoryManager allocation failed: ") +
+ EC.message());
DataMemory.push_back(SectionInfo(SectionName, MB, SectionID));
return (uint8_t*)MB.base();
}
+// In case the execution needs TLS storage, we define a very small TLS memory
+// area here that will be used in allocateTLSSection().
+#if defined(__x86_64__) && defined(__ELF__)
+extern "C" {
+alignas(16) __attribute__((visibility("hidden"), tls_model("initial-exec"),
+ used)) thread_local char LLVMRTDyldTLSSpace[16];
+}
+#endif
+
+TrivialMemoryManager::TLSSection
+TrivialMemoryManager::allocateTLSSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) {
+#if defined(__x86_64__) && defined(__ELF__)
+ if (Size + UsedTLSStorage > sizeof(LLVMRTDyldTLSSpace)) {
+ return {};
+ }
+
+ // Get the offset of the TLSSpace in the TLS block by using a tpoff
+ // relocation here.
+ int64_t TLSOffset;
+ asm("leaq LLVMRTDyldTLSSpace@tpoff, %0" : "=r"(TLSOffset));
+
+ TLSSection Section;
+ // We use the storage directly as the initialization image. This means that
+ // when a new thread is spawned after this allocation, it will not be
+ // initialized correctly. This means, llvm-rtdyld will only support TLS in a
+ // single thread.
+ Section.InitializationImage =
+ reinterpret_cast<uint8_t *>(LLVMRTDyldTLSSpace + UsedTLSStorage);
+ Section.Offset = TLSOffset + UsedTLSStorage;
+
+ UsedTLSStorage += Size;
+
+ return Section;
+#else
+ return {};
+#endif
+}
+
static const char *ProgramName;
static void ErrorAndExit(const Twine &Msg) {
@@ -349,10 +398,10 @@ static void ErrorAndExit(const Twine &Msg) {
static void loadDylibs() {
for (const std::string &Dylib : Dylibs) {
if (!sys::fs::is_regular_file(Dylib))
- report_fatal_error("Dylib not found: '" + Dylib + "'.");
+ report_fatal_error(Twine("Dylib not found: '") + Dylib + "'.");
std::string ErrMsg;
if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
- report_fatal_error("Error loading '" + Dylib + "': " + ErrMsg);
+ report_fatal_error(Twine("Error loading '") + Dylib + "': " + ErrMsg);
}
}
@@ -413,8 +462,9 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
}
}
- std::unique_ptr<DIContext> Context =
- DWARFContext::create(*SymbolObj, LoadedObjInfo.get());
+ std::unique_ptr<DIContext> Context = DWARFContext::create(
+ *SymbolObj, DWARFContext::ProcessDebugRelocations::Process,
+ LoadedObjInfo.get());
std::vector<std::pair<SymbolRef, uint64_t>> SymAddr =
object::computeSymbolSizes(*SymbolObj);
@@ -710,15 +760,15 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
size_t EqualsIdx = Mapping.find_first_of('=');
if (EqualsIdx == StringRef::npos)
- report_fatal_error("Invalid dummy symbol specification '" + Mapping +
- "'. Should be '<symbol name>=<addr>'");
+ report_fatal_error(Twine("Invalid dummy symbol specification '") +
+ Mapping + "'. Should be '<symbol name>=<addr>'");
std::string Symbol = Mapping.substr(0, EqualsIdx);
std::string AddrStr = Mapping.substr(EqualsIdx + 1);
uint64_t Addr;
if (StringRef(AddrStr).getAsInteger(0, Addr))
- report_fatal_error("Invalid symbol mapping '" + Mapping + "'.");
+ report_fatal_error(Twine("Invalid symbol mapping '") + Mapping + "'.");
MemMgr.addDummySymbol(Symbol, Addr);
}
@@ -974,7 +1024,7 @@ int main(int argc, char **argv) {
Timers = ShowTimes ? std::make_unique<RTDyldTimers>() : nullptr;
- int Result;
+ int Result = 0;
switch (Action) {
case AC_Execute:
Result = executeInput();
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index ece322999107..f2be4e7d0712 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -452,10 +452,10 @@ struct ConstModifier: public Modifier {
switch (getRandom() % 7) {
case 0:
return PT->push_back(ConstantInt::get(
- Ty, APInt::getAllOnesValue(Ty->getPrimitiveSizeInBits())));
+ Ty, APInt::getAllOnes(Ty->getPrimitiveSizeInBits())));
case 1:
- return PT->push_back(ConstantInt::get(
- Ty, APInt::getNullValue(Ty->getPrimitiveSizeInBits())));
+ return PT->push_back(
+ ConstantInt::get(Ty, APInt::getZero(Ty->getPrimitiveSizeInBits())));
case 2:
case 3:
case 4:
diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp
index 0b068749917b..26be3914fb92 100644
--- a/llvm/tools/llvm-strings/llvm-strings.cpp
+++ b/llvm/tools/llvm-strings/llvm-strings.cpp
@@ -43,7 +43,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
@@ -73,7 +73,7 @@ static bool PrintFileName;
enum radix { none, octal, hexadecimal, decimal };
static radix Radix;
-LLVM_ATTRIBUTE_NORETURN static void reportCmdLineError(const Twine &Message) {
+[[noreturn]] static void reportCmdLineError(const Twine &Message) {
WithColor::error(errs(), ToolName) << Message << "\n";
exit(1);
}
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 227ce12a6d9a..2adbf1f1731d 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -52,7 +52,7 @@ enum ID {
#include "Opts.inc"
#undef PREFIX
-static const opt::OptTable::Info InfoTable[] = {
+const opt::OptTable::Info InfoTable[] = {
#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
HELPTEXT, METAVAR, VALUES) \
{ \
diff --git a/llvm/tools/llvm-tli-checker/Opts.td b/llvm/tools/llvm-tli-checker/Opts.td
new file mode 100644
index 000000000000..b1acef4093c4
--- /dev/null
+++ b/llvm/tools/llvm-tli-checker/Opts.td
@@ -0,0 +1,16 @@
+include "llvm/Option/OptParser.td"
+
+class F<string name, string help> : Flag<["--"], name>, HelpText<help>;
+multiclass Eq<string name, string metavar, string help> {
+ def NAME #_EQ : Joined<["--"], name #"=">,
+ HelpText<help>, MetaVarName<metavar>;
+ def : Separate<["--"], name>, Alias<!cast<Joined>(NAME #_EQ)>;
+}
+
+def help : F<"help", "Display available options">;
+def : Flag<["-"], "h">, HelpText<"Alias for --help">, Alias<help>;
+def dump_tli : F<"dump-tli", "Dump TLI's list of functions and whether they are available">;
+defm triple : Eq<"triple", "<triple>", "Target triple">;
+defm libdir : Eq<"libdir", "<directory>", "Root directory for finding library files">;
+def separate : F<"separate", "Report on each library file separately">;
+def report_EQ : Joined<["--"], "report=">, HelpText<"Level of detail to report">, Values<"summary,discrepancy,full">;
diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
new file mode 100644
index 000000000000..bf25efc0b0bd
--- /dev/null
+++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
@@ -0,0 +1,357 @@
+//===-- llvm-tli-checker.cpp - Compare TargetLibraryInfo to SDK libraries -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/WithColor.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+// Command-line option boilerplate.
+namespace {
+enum ID {
+ OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ OPT_##ID,
+#include "Opts.inc"
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Opts.inc"
+#undef PREFIX
+
+const opt::OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ { \
+ PREFIX, NAME, HELPTEXT, \
+ METAVAR, OPT_##ID, opt::Option::KIND##Class, \
+ PARAM, FLAGS, OPT_##GROUP, \
+ OPT_##ALIAS, ALIASARGS, VALUES},
+#include "Opts.inc"
+#undef OPTION
+};
+
+class TLICheckerOptTable : public opt::OptTable {
+public:
+ TLICheckerOptTable() : OptTable(InfoTable) {}
+};
+} // namespace
+
+// We have three levels of reporting.
+enum class ReportKind {
+ Error, // For argument parsing errors.
+ Summary, // Report counts but not details.
+ Discrepancy, // Report where TLI and the library differ.
+ Full // Report for every known-to-TLI function.
+};
+
+// Most of the ObjectFile interfaces return an Expected<T>, so make it easy
+// to ignore those.
+template <typename T> T unwrapIgnoreError(Expected<T> E) {
+ if (E)
+ return std::move(*E);
+ // Sink the error and return a nothing value.
+ consumeError(E.takeError());
+ return T();
+}
+
+static void fail(const Twine &Message) {
+ WithColor::error() << Message << '\n';
+ exit(EXIT_FAILURE);
+}
+
+// Some problem occurred with an archive member; complain and continue.
+static void reportArchiveChildIssue(const object::Archive::Child &C, int Index,
+ StringRef ArchiveFilename) {
+ // First get the member name.
+ std::string ChildName;
+ Expected<StringRef> NameOrErr = C.getName();
+ if (NameOrErr)
+ ChildName = std::string(NameOrErr.get());
+ else {
+ // Ignore the name-fetch error, just report the index.
+ consumeError(NameOrErr.takeError());
+ ChildName = "<file index: " + std::to_string(Index) + ">";
+ }
+
+ WithColor::warning() << ArchiveFilename << "(" << ChildName
+ << "): member is not usable\n";
+}
+
+// Return Name, and if Name is mangled, append "aka" and the demangled name.
+static std::string PrintableName(StringRef Name) {
+ std::string OutputName = "'";
+ OutputName += Name;
+ OutputName += "'";
+ if (Name.startswith("_Z") || Name.startswith("??")) {
+ OutputName += " aka ";
+ OutputName += demangle(Name.str());
+ }
+ return OutputName;
+}
+
+// Store all the names that TargetLibraryInfo knows about; the bool indicates
+// whether TLI has it marked as "available" for the target of interest.
+// This is a vector to preserve the sorted order for better reporting.
+struct TLINameList : std::vector<std::pair<StringRef, bool>> {
+ // Record all the TLI info in the vector.
+ void initialize(StringRef TargetTriple);
+ // Print out what we found.
+ void dump();
+};
+TLINameList TLINames;
+
+void TLINameList::initialize(StringRef TargetTriple) {
+ Triple T(TargetTriple);
+ TargetLibraryInfoImpl TLII(T);
+ TargetLibraryInfo TLI(TLII);
+
+ reserve(LibFunc::NumLibFuncs);
+ size_t NumAvailable = 0;
+ for (unsigned FI = 0; FI != LibFunc::NumLibFuncs; ++FI) {
+ LibFunc LF = (LibFunc)FI;
+ bool Available = TLI.has(LF);
+ // getName returns names only for available funcs.
+ TLII.setAvailable(LF);
+ emplace_back(TLI.getName(LF), Available);
+ if (Available)
+ ++NumAvailable;
+ }
+ outs() << "TLI knows " << LibFunc::NumLibFuncs << " symbols, " << NumAvailable
+ << " available for '" << TargetTriple << "'\n";
+}
+
+void TLINameList::dump() {
+ // Assume this gets called after initialize(), so we have the above line of
+ // output as a header. So, for example, no need to repeat the triple.
+ for (auto &TLIName : TLINames) {
+ outs() << (TLIName.second ? " " : "not ")
+ << "available: " << PrintableName(TLIName.first) << '\n';
+ }
+}
+
+// Store all the exported symbol names we found in the input libraries.
+// We use a map to get hashed lookup speed; the bool is meaningless.
+class SDKNameMap : public StringMap<bool> {
+ void populateFromObject(ObjectFile *O);
+ void populateFromArchive(Archive *A);
+
+public:
+ void populateFromFile(StringRef LibDir, StringRef LibName);
+};
+SDKNameMap SDKNames;
+
+// Given an ObjectFile, extract the global function symbols.
+void SDKNameMap::populateFromObject(ObjectFile *O) {
+ // FIXME: Support COFF.
+ if (!O->isELF()) {
+ WithColor::warning() << "Only ELF-format files are supported\n";
+ return;
+ }
+ auto *ELF = cast<const ELFObjectFileBase>(O);
+
+ for (auto I = ELF->getDynamicSymbolIterators().begin();
+ I != ELF->getDynamicSymbolIterators().end(); ++I) {
+ // We want only global function symbols.
+ SymbolRef::Type Type = unwrapIgnoreError(I->getType());
+ uint32_t Flags = unwrapIgnoreError(I->getFlags());
+ StringRef Name = unwrapIgnoreError(I->getName());
+ if (Type == SymbolRef::ST_Function && (Flags & SymbolRef::SF_Global))
+ insert({Name, true});
+ }
+}
+
+// Unpack an archive and populate from the component object files.
+// This roughly imitates dumpArchive() from llvm-objdump.cpp.
+void SDKNameMap::populateFromArchive(Archive *A) {
+ Error Err = Error::success();
+ int Index = -1;
+ for (auto &C : A->children(Err)) {
+ ++Index;
+ Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
+ if (!ChildOrErr) {
+ if (auto E = isNotObjectErrorInvalidFileType(ChildOrErr.takeError())) {
+ // Issue a generic warning.
+ consumeError(std::move(E));
+ reportArchiveChildIssue(C, Index, A->getFileName());
+ }
+ continue;
+ }
+ if (ObjectFile *O = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
+ populateFromObject(O);
+ // Ignore non-object archive members.
+ }
+ if (Err)
+ WithColor::defaultErrorHandler(std::move(Err));
+}
+
+// Unpack a library file and extract the global function names.
+void SDKNameMap::populateFromFile(StringRef LibDir, StringRef LibName) {
+ // Pick an arbitrary but reasonable default size.
+ SmallString<255> Filepath(LibDir);
+ sys::path::append(Filepath, LibName);
+ if (!sys::fs::exists(Filepath)) {
+ WithColor::warning() << "Could not find '" << StringRef(Filepath) << "'\n";
+ return;
+ }
+ outs() << "\nLooking for symbols in '" << StringRef(Filepath) << "'\n";
+ auto ExpectedBinary = createBinary(Filepath);
+ if (!ExpectedBinary) {
+ // FIXME: Report this better.
+ WithColor::defaultWarningHandler(ExpectedBinary.takeError());
+ return;
+ }
+ OwningBinary<Binary> OBinary = std::move(*ExpectedBinary);
+ Binary &Binary = *OBinary.getBinary();
+ size_t Precount = size();
+ if (Archive *A = dyn_cast<Archive>(&Binary))
+ populateFromArchive(A);
+ else if (ObjectFile *O = dyn_cast<ObjectFile>(&Binary))
+ populateFromObject(O);
+ else {
+ WithColor::warning() << "Not an Archive or ObjectFile: '"
+ << StringRef(Filepath) << "'\n";
+ return;
+ }
+ if (Precount == size())
+ WithColor::warning() << "No symbols found in '" << StringRef(Filepath)
+ << "'\n";
+ else
+ outs() << "Found " << size() - Precount << " global function symbols in '"
+ << StringRef(Filepath) << "'\n";
+}
+
+int main(int argc, char *argv[]) {
+ InitLLVM X(argc, argv);
+ BumpPtrAllocator A;
+ StringSaver Saver(A);
+ TLICheckerOptTable Tbl;
+ opt::InputArgList Args = Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver,
+ [&](StringRef Msg) { fail(Msg); });
+
+ if (Args.hasArg(OPT_help)) {
+ std::string Usage(argv[0]);
+ Usage += " [options] library-file [library-file...]";
+ Tbl.printHelp(outs(), Usage.c_str(),
+ "LLVM TargetLibraryInfo versus SDK checker");
+ outs() << "\nPass @FILE as argument to read options or library names from "
+ "FILE.\n";
+ return 0;
+ }
+
+ TLINames.initialize(Args.getLastArgValue(OPT_triple_EQ));
+
+ // --dump-tli doesn't require any input files.
+ if (Args.hasArg(OPT_dump_tli)) {
+ TLINames.dump();
+ return 0;
+ }
+
+ std::vector<std::string> LibList = Args.getAllArgValues(OPT_INPUT);
+ if (LibList.empty()) {
+ WithColor::error() << "No input files\n";
+ exit(EXIT_FAILURE);
+ }
+ StringRef LibDir = Args.getLastArgValue(OPT_libdir_EQ);
+ bool SeparateMode = Args.hasArg(OPT_separate);
+
+ ReportKind ReportLevel =
+ SeparateMode ? ReportKind::Summary : ReportKind::Discrepancy;
+ if (const opt::Arg *A = Args.getLastArg(OPT_report_EQ)) {
+ ReportLevel = StringSwitch<ReportKind>(A->getValue())
+ .Case("summary", ReportKind::Summary)
+ .Case("discrepancy", ReportKind::Discrepancy)
+ .Case("full", ReportKind::Full)
+ .Default(ReportKind::Error);
+ if (ReportLevel == ReportKind::Error) {
+ WithColor::error() << "invalid option for --report: " << A->getValue();
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (size_t I = 0; I < LibList.size(); ++I) {
+ // In SeparateMode we report on input libraries individually; otherwise
+ // we do one big combined search. Reading to the end of LibList here
+ // will cause the outer while loop to terminate cleanly.
+ if (SeparateMode) {
+ SDKNames.clear();
+ SDKNames.populateFromFile(LibDir, LibList[I]);
+ if (SDKNames.empty())
+ continue;
+ } else {
+ do
+ SDKNames.populateFromFile(LibDir, LibList[I]);
+ while (++I < LibList.size());
+ if (SDKNames.empty()) {
+ WithColor::error() << "NO symbols found!\n";
+ break;
+ }
+ outs() << "Found a grand total of " << SDKNames.size()
+ << " library symbols\n";
+ }
+ unsigned TLIdoesSDKdoesnt = 0;
+ unsigned TLIdoesntSDKdoes = 0;
+ unsigned TLIandSDKboth = 0;
+ unsigned TLIandSDKneither = 0;
+ for (auto &TLIName : TLINames) {
+ bool TLIHas = TLIName.second;
+ bool SDKHas = SDKNames.count(TLIName.first) == 1;
+ int Which = int(TLIHas) * 2 + int(SDKHas);
+ switch (Which) {
+ case 0: ++TLIandSDKneither; break;
+ case 1: ++TLIdoesntSDKdoes; break;
+ case 2: ++TLIdoesSDKdoesnt; break;
+ case 3: ++TLIandSDKboth; break;
+ }
+ // If the results match, report only if user requested a full report.
+ ReportKind Threshold =
+ TLIHas == SDKHas ? ReportKind::Full : ReportKind::Discrepancy;
+ if (Threshold <= ReportLevel) {
+ constexpr char YesNo[2][4] = {"no ", "yes"};
+ constexpr char Indicator[4][3] = {"!!", ">>", "<<", "=="};
+ outs() << Indicator[Which] << " TLI " << YesNo[TLIHas] << " SDK "
+ << YesNo[SDKHas] << ": " << PrintableName(TLIName.first) << '\n';
+ }
+ }
+
+ assert(TLIandSDKboth + TLIandSDKneither + TLIdoesSDKdoesnt +
+ TLIdoesntSDKdoes ==
+ LibFunc::NumLibFuncs);
+ outs() << "<< Total TLI yes SDK no: " << TLIdoesSDKdoesnt
+ << "\n>> Total TLI no SDK yes: " << TLIdoesntSDKdoes
+ << "\n== Total TLI yes SDK yes: " << TLIandSDKboth;
+ if (TLIandSDKboth == 0) {
+ outs() << " *** NO TLI SYMBOLS FOUND";
+ if (SeparateMode)
+ outs() << " in '" << LibList[I] << "'";
+ }
+ outs() << '\n';
+
+ if (!SeparateMode) {
+ if (TLIdoesSDKdoesnt == 0 && TLIdoesntSDKdoes == 0)
+ outs() << "PASS: LLVM TLI matched SDK libraries successfully.\n";
+ else
+ outs() << "FAIL: LLVM TLI doesn't match SDK libraries.\n";
+ }
+ }
+}
diff --git a/llvm/tools/llvm-xray/xray-color-helper.cpp b/llvm/tools/llvm-xray/xray-color-helper.cpp
index e2cae21e162b..b2ed63881bdc 100644
--- a/llvm/tools/llvm-xray/xray-color-helper.cpp
+++ b/llvm/tools/llvm-xray/xray-color-helper.cpp
@@ -21,7 +21,7 @@ using namespace xray;
// Sequential ColorMaps, which are used to represent information
// from some minimum to some maximum.
-static const std::tuple<uint8_t, uint8_t, uint8_t> SequentialMaps[][9] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> SequentialMaps[][9] = {
{// The greys color scheme from http://colorbrewer2.org/
std::make_tuple(255, 255, 255), std::make_tuple(240, 240, 240),
std::make_tuple(217, 217, 217), std::make_tuple(189, 189, 189),
@@ -42,7 +42,7 @@ static const std::tuple<uint8_t, uint8_t, uint8_t> SequentialMaps[][9] = {
std::make_tuple(2, 56, 88)}};
// Sequential Maps extend the last colors given out of range inputs.
-static const std::tuple<uint8_t, uint8_t, uint8_t> SequentialBounds[][2] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> SequentialBounds[][2] = {
{// The Bounds for the greys color scheme
std::make_tuple(255, 255, 255), std::make_tuple(0, 0, 0)},
{// The Bounds for the OrRd color Scheme
@@ -58,7 +58,7 @@ ColorHelper::ColorHelper(ColorHelper::SequentialScheme S)
// representing differenes, or a range that goes from negative to positive.
// These take an input in the range [-1,1].
-static const std::tuple<uint8_t, uint8_t, uint8_t> DivergingCoeffs[][11] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> DivergingCoeffs[][11] = {
{// The PiYG color scheme from http://colorbrewer2.org/
std::make_tuple(142, 1, 82), std::make_tuple(197, 27, 125),
std::make_tuple(222, 119, 174), std::make_tuple(241, 182, 218),
@@ -69,7 +69,7 @@ static const std::tuple<uint8_t, uint8_t, uint8_t> DivergingCoeffs[][11] = {
// Diverging maps use out of bounds ranges to show missing data. Missing Right
// Being below min, and missing left being above max.
-static const std::tuple<uint8_t, uint8_t, uint8_t> DivergingBounds[][2] = {
+const std::tuple<uint8_t, uint8_t, uint8_t> DivergingBounds[][2] = {
{// The PiYG color scheme has green and red for missing right and left
// respectively.
std::make_tuple(255, 0, 0), std::make_tuple(0, 255, 0)}};
diff --git a/llvm/tools/llvm-xray/xray-converter.cpp b/llvm/tools/llvm-xray/xray-converter.cpp
index 47cb645a5408..82d0261ec4da 100644
--- a/llvm/tools/llvm-xray/xray-converter.cpp
+++ b/llvm/tools/llvm-xray/xray-converter.cpp
@@ -57,6 +57,15 @@ static cl::opt<bool>
cl::init(false), cl::sub(Convert));
static cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize),
cl::desc("Alias for -symbolize"));
+static cl::opt<bool>
+ NoDemangle("no-demangle",
+ cl::desc("determines whether to demangle function name "
+ "when symbolizing function ids from the input log"),
+ cl::init(false), cl::sub(Convert));
+
+static cl::opt<bool> Demangle("demangle",
+ cl::desc("demangle symbols (default)"),
+ cl::sub(Convert));
static cl::opt<std::string>
ConvertInstrMap("instr_map",
@@ -373,7 +382,10 @@ static CommandRegistration Unused(&Convert, []() -> Error {
}
const auto &FunctionAddresses = Map.getFunctionAddresses();
- symbolize::LLVMSymbolizer Symbolizer;
+ symbolize::LLVMSymbolizer::Options SymbolizerOpts;
+ if (Demangle.getPosition() < NoDemangle.getPosition())
+ SymbolizerOpts.Demangle = false;
+ symbolize::LLVMSymbolizer Symbolizer(SymbolizerOpts);
llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer,
FunctionAddresses);
llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize);
diff --git a/llvm/tools/llvm-xray/xray-extract.cpp b/llvm/tools/llvm-xray/xray-extract.cpp
index a6ffacc6ab92..52767a00f615 100644
--- a/llvm/tools/llvm-xray/xray-extract.cpp
+++ b/llvm/tools/llvm-xray/xray-extract.cpp
@@ -45,11 +45,12 @@ static cl::opt<bool> ExtractSymbolize("symbolize", cl::value_desc("symbolize"),
cl::sub(Extract));
static cl::alias ExtractSymbolize2("s", cl::aliasopt(ExtractSymbolize),
cl::desc("alias for -symbolize"));
-static cl::opt<bool> ExtractNoDemangle("no-demangle",
- cl::value_desc("no-demangle"),
- cl::init(false),
- cl::desc("don't demangle symbols"),
- cl::sub(Extract));
+static cl::opt<bool> Demangle("demangle",
+ cl::desc("demangle symbols (default)"),
+ cl::sub(Extract));
+static cl::opt<bool> NoDemangle("no-demangle",
+ cl::desc("don't demangle symbols"),
+ cl::sub(Extract));
namespace {
@@ -90,7 +91,7 @@ static CommandRegistration Unused(&Extract, []() -> Error {
const auto &FunctionAddresses =
InstrumentationMapOrError->getFunctionAddresses();
symbolize::LLVMSymbolizer::Options opts;
- if (ExtractNoDemangle)
+ if (Demangle.getPosition() < NoDemangle.getPosition())
opts.Demangle = false;
symbolize::LLVMSymbolizer Symbolizer(opts);
llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer,
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 8b1fbd09e40b..631d8eed5d7a 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -137,6 +137,7 @@ extern cl::opt<std::string> ProfileFile;
extern cl::opt<CSPGOKind> CSPGOKindFlag;
extern cl::opt<std::string> CSProfileGenFile;
extern cl::opt<bool> DisableBasicAA;
+extern cl::opt<bool> PrintPipelinePasses;
} // namespace llvm
static cl::opt<std::string>
@@ -173,58 +174,58 @@ bool tryParsePipelineText(PassBuilder &PB,
static void registerEPCallbacks(PassBuilder &PB) {
if (tryParsePipelineText<FunctionPassManager>(PB, PeepholeEPPipeline))
PB.registerPeepholeEPCallback(
- [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse PeepholeEP pipeline: ");
Err(PB.parsePassPipeline(PM, PeepholeEPPipeline));
});
if (tryParsePipelineText<LoopPassManager>(PB,
LateLoopOptimizationsEPPipeline))
PB.registerLateLoopOptimizationsEPCallback(
- [&PB](LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](LoopPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse LateLoopOptimizationsEP pipeline: ");
Err(PB.parsePassPipeline(PM, LateLoopOptimizationsEPPipeline));
});
if (tryParsePipelineText<LoopPassManager>(PB, LoopOptimizerEndEPPipeline))
PB.registerLoopOptimizerEndEPCallback(
- [&PB](LoopPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](LoopPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse LoopOptimizerEndEP pipeline: ");
Err(PB.parsePassPipeline(PM, LoopOptimizerEndEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB,
ScalarOptimizerLateEPPipeline))
PB.registerScalarOptimizerLateEPCallback(
- [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse ScalarOptimizerLateEP pipeline: ");
Err(PB.parsePassPipeline(PM, ScalarOptimizerLateEPPipeline));
});
if (tryParsePipelineText<CGSCCPassManager>(PB, CGSCCOptimizerLateEPPipeline))
PB.registerCGSCCOptimizerLateEPCallback(
- [&PB](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](CGSCCPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse CGSCCOptimizerLateEP pipeline: ");
Err(PB.parsePassPipeline(PM, CGSCCOptimizerLateEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB, VectorizerStartEPPipeline))
PB.registerVectorizerStartEPCallback(
- [&PB](FunctionPassManager &PM, PassBuilder::OptimizationLevel Level) {
+ [&PB](FunctionPassManager &PM, OptimizationLevel Level) {
ExitOnError Err("Unable to parse VectorizerStartEP pipeline: ");
Err(PB.parsePassPipeline(PM, VectorizerStartEPPipeline));
});
if (tryParsePipelineText<ModulePassManager>(PB, PipelineStartEPPipeline))
PB.registerPipelineStartEPCallback(
- [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
ExitOnError Err("Unable to parse PipelineStartEP pipeline: ");
Err(PB.parsePassPipeline(PM, PipelineStartEPPipeline));
});
if (tryParsePipelineText<ModulePassManager>(
PB, PipelineEarlySimplificationEPPipeline))
PB.registerPipelineEarlySimplificationEPCallback(
- [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
ExitOnError Err("Unable to parse EarlySimplification pipeline: ");
Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline));
});
if (tryParsePipelineText<FunctionPassManager>(PB, OptimizerLastEPPipeline))
PB.registerOptimizerLastEPCallback(
- [&PB](ModulePassManager &PM, PassBuilder::OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel) {
ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline));
});
@@ -259,12 +260,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
PGOOptions::SampleUse);
break;
case NoPGO:
- if (DebugInfoForProfiling)
+ if (DebugInfoForProfiling || PseudoProbeForProfiling)
P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
- true);
- else if (PseudoProbeForProfiling)
- P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
- false, true);
+ DebugInfoForProfiling, PseudoProbeForProfiling);
else
P = None;
}
@@ -287,6 +285,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
P->CSAction = PGOOptions::CSIRUse;
}
}
+ if (TM)
+ TM->setPGOOption(P);
+
LoopAnalysisManager LAM;
FunctionAnalysisManager FAM;
CGSCCAnalysisManager CGAM;
@@ -339,18 +340,17 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
PB.registerPipelineParsingCallback(
[](StringRef Name, ModulePassManager &MPM,
ArrayRef<PassBuilder::PipelineElement>) {
+ AddressSanitizerOptions Opts;
if (Name == "asan-pipeline") {
MPM.addPass(
RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
- MPM.addPass(
- createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
- MPM.addPass(ModuleAddressSanitizerPass());
+ MPM.addPass(ModuleAddressSanitizerPass(Opts));
return true;
} else if (Name == "asan-function-pipeline") {
MPM.addPass(
RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
MPM.addPass(
- createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
+ createModuleToFunctionPassAdaptor(AddressSanitizerPass(Opts)));
return true;
}
return false;
@@ -411,6 +411,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
if (EnableDebugify)
MPM.addPass(NewPMDebugifyPass());
+ // Add passes according to the -passes options.
if (!PassPipeline.empty()) {
assert(Passes.empty() &&
"PassPipeline and Passes should not both contain passes");
@@ -419,10 +420,26 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
return false;
}
}
+ // Add passes specified using the legacy PM syntax (i.e. not using
+ // -passes). This should be removed later when such support has been
+ // deprecated, i.e. when all lit tests running opt (and not using
+ // -enable-new-pm=0) have been updated to use -passes.
for (auto PassName : Passes) {
std::string ModifiedPassName(PassName.begin(), PassName.end());
if (PB.isAnalysisPassName(PassName))
ModifiedPassName = "require<" + ModifiedPassName + ">";
+ // FIXME: These translations are supposed to be removed when lit tests that
+ // use these names have been updated to use the -passes syntax (and when the
+ // support for using the old syntax to specify passes is considered as
+ // deprecated for the new PM).
+ if (ModifiedPassName == "early-cse-memssa")
+ ModifiedPassName = "early-cse<memssa>";
+ else if (ModifiedPassName == "post-inline-ee-instrument")
+ ModifiedPassName = "ee-instrument<post-inline>";
+ else if (ModifiedPassName == "loop-extract-single")
+ ModifiedPassName = "loop-extract<single>";
+ else if (ModifiedPassName == "lower-matrix-intrinsics-minimal")
+ ModifiedPassName = "lower-matrix-intrinsics<minimal>";
if (auto Err = PB.parsePassPipeline(MPM, ModifiedPassName)) {
errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
return false;
@@ -455,6 +472,17 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
// Before executing passes, print the final values of the LLVM options.
cl::PrintOptionValues();
+ // Print a textual, '-passes=' compatible, representation of pipeline if
+ // requested.
+ if (PrintPipelinePasses) {
+ MPM.printPipeline(outs(), [&PIC](StringRef ClassName) {
+ auto PassName = PIC.getPassNameForClassName(ClassName);
+ return PassName.empty() ? ClassName : PassName;
+ });
+ outs() << "\n";
+ return true;
+ }
+
// Now that we have all of the passes ready, run them.
MPM.run(M, MAM);
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index 094f517fb703..7793a5471793 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -38,6 +38,7 @@
#include "llvm/LinkAllIR.h"
#include "llvm/LinkAllPasses.h"
#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/TargetRegistry.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
@@ -46,7 +47,6 @@
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
@@ -102,9 +102,6 @@ static cl::opt<bool>
Force("f", cl::desc("Enable binary output on terminals"));
static cl::opt<bool>
-PrintEachXForm("p", cl::desc("Print module after each transformation"));
-
-static cl::opt<bool>
NoOutput("disable-output",
cl::desc("Do not write result bitcode file"), cl::Hidden);
@@ -146,17 +143,7 @@ static cl::opt<bool>
StripNamedMetadata("strip-named-metadata",
cl::desc("Strip module-level named metadata"));
-static cl::opt<bool>
- DisableInline("disable-inlining",
- cl::desc("Do not run the inliner pass (legacy PM only)"));
-
-static cl::opt<bool>
-DisableOptimizations("disable-opt",
- cl::desc("Do not run any optimization passes"));
-static cl::opt<bool> StandardLinkOpts(
- "std-link-opts",
- cl::desc("Include the standard link time optimizations (legacy PM only)"));
static cl::opt<bool>
OptLevelO0("O0", cl::desc("Optimization level 0. Similar to clang -O0. "
@@ -368,9 +355,7 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
Builder.OptLevel = OptLevel;
Builder.SizeLevel = SizeLevel;
- if (DisableInline) {
- // No inlining pass
- } else if (OptLevel > 1) {
+ if (OptLevel > 1) {
Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false);
} else {
Builder.Inliner = createAlwaysInlinerLegacyPass();
@@ -418,17 +403,6 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
Builder.populateModulePassManager(MPM);
}
-static void AddStandardLinkPasses(legacy::PassManagerBase &PM) {
- PassManagerBuilder Builder;
- Builder.VerifyInput = true;
- if (DisableOptimizations)
- Builder.OptLevel = 0;
-
- if (!DisableInline)
- Builder.Inliner = createFunctionInliningPass();
- Builder.populateLTOPassManager(PM);
-}
-
//===----------------------------------------------------------------------===//
// CodeGen-related helper functions.
//
@@ -507,9 +481,10 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
return false;
std::vector<StringRef> PassNamePrefix = {
- "x86-", "xcore-", "wasm-", "systemz-", "ppc-", "nvvm-", "nvptx-",
- "mips-", "lanai-", "hexagon-", "bpf-", "avr-", "thumb2-", "arm-",
- "si-", "gcn-", "amdgpu-", "aarch64-", "amdgcn-", "polly-"};
+ "x86-", "xcore-", "wasm-", "systemz-", "ppc-", "nvvm-",
+ "nvptx-", "mips-", "lanai-", "hexagon-", "bpf-", "avr-",
+ "thumb2-", "arm-", "si-", "gcn-", "amdgpu-", "aarch64-",
+ "amdgcn-", "polly-", "riscv-"};
std::vector<StringRef> PassNameContain = {"ehprepare"};
std::vector<StringRef> PassNameExact = {
"safe-stack", "cost-model",
@@ -797,19 +772,32 @@ int main(int argc, char **argv) {
<< "Cannot specify passes via both -foo-pass and --passes=foo-pass\n";
return 1;
}
+ auto NumOLevel = OptLevelO0 + OptLevelO1 + OptLevelO2 + OptLevelO3 +
+ OptLevelOs + OptLevelOz;
+ if (NumOLevel > 1) {
+ errs() << "Cannot specify multiple -O#\n";
+ return 1;
+ }
+ if (NumOLevel > 0 && PassPipeline.getNumOccurrences() > 0) {
+ errs() << "Cannot specify -O# and --passes=, use "
+ "-passes='default<O#>,other-pass'\n";
+ return 1;
+ }
+ std::string Pipeline = PassPipeline;
+
SmallVector<StringRef, 4> Passes;
if (OptLevelO0)
- Passes.push_back("default<O0>");
+ Pipeline = "default<O0>";
if (OptLevelO1)
- Passes.push_back("default<O1>");
+ Pipeline = "default<O1>";
if (OptLevelO2)
- Passes.push_back("default<O2>");
+ Pipeline = "default<O2>";
if (OptLevelO3)
- Passes.push_back("default<O3>");
+ Pipeline = "default<O3>";
if (OptLevelOs)
- Passes.push_back("default<Os>");
+ Pipeline = "default<Os>";
if (OptLevelOz)
- Passes.push_back("default<Oz>");
+ Pipeline = "default<Oz>";
for (const auto &P : PassList)
Passes.push_back(P->getPassArgument());
OutputKind OK = OK_NoOutput;
@@ -828,7 +816,7 @@ int main(int argc, char **argv) {
// string. Hand off the rest of the functionality to the new code for that
// layer.
return runPassPipeline(argv[0], *M, TM.get(), &TLII, Out.get(),
- ThinLinkOut.get(), RemarksFile.get(), PassPipeline,
+ ThinLinkOut.get(), RemarksFile.get(), Pipeline,
Passes, OK, VK, PreserveAssemblyUseListOrder,
PreserveBitcodeUseListOrder, EmitSummaryIndex,
EmitModuleHash, EnableDebugify)
@@ -909,12 +897,6 @@ int main(int argc, char **argv) {
// Create a new optimization pass for each one specified on the command line
for (unsigned i = 0; i < PassList.size(); ++i) {
- if (StandardLinkOpts &&
- StandardLinkOpts.getPosition() < PassList.getPosition(i)) {
- AddStandardLinkPasses(Passes);
- StandardLinkOpts = false;
- }
-
if (OptLevelO0 && OptLevelO0.getPosition() < PassList.getPosition(i)) {
AddOptimizationPasses(Passes, *FPasses, TM.get(), 0, 0);
OptLevelO0 = false;
@@ -976,15 +958,6 @@ int main(int argc, char **argv) {
}
}
}
-
- if (PrintEachXForm)
- Passes.add(
- createPrintModulePass(errs(), "", PreserveAssemblyUseListOrder));
- }
-
- if (StandardLinkOpts) {
- AddStandardLinkPasses(Passes);
- StandardLinkOpts = false;
}
if (OptLevelO0)
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 00bdd127e3c2..be17d5c718c2 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -636,6 +636,15 @@ struct MatchableInfo {
if (RequiredFeatures.size() != RHS.RequiredFeatures.size())
return RequiredFeatures.size() > RHS.RequiredFeatures.size();
+ // For X86 AVX/AVX512 instructions, we prefer vex encoding because the
+ // vex encoding size is smaller. Since X86InstrSSE.td is included ahead
+ // of X86InstrAVX512.td, the AVX instruction ID is less than AVX512 ID.
+ // We use the ID to sort AVX instruction before AVX512 instruction in
+ // matching table.
+ if (TheDef->isSubClassOf("Instruction") &&
+ TheDef->getValueAsBit("HasPositionOrder"))
+ return TheDef->getID() < RHS.TheDef->getID();
+
return false;
}
@@ -1062,7 +1071,7 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
// Remove comments from the asm string. We know that the asmstring only
// has one line.
if (!CommentDelimiter.empty() &&
- StringRef(AsmString).find(CommentDelimiter) != StringRef::npos)
+ StringRef(AsmString).contains(CommentDelimiter))
PrintFatalError(TheDef->getLoc(),
"asmstring for instruction has comment character in it, "
"mark it isCodeGenOnly");
@@ -1077,7 +1086,7 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool IsAlias) const {
std::set<std::string> OperandNames;
for (const AsmOperand &Op : AsmOperands) {
StringRef Tok = Op.Token;
- if (Tok[0] == '$' && Tok.find(':') != StringRef::npos)
+ if (Tok[0] == '$' && Tok.contains(':'))
PrintFatalError(TheDef->getLoc(),
"matchable with operand modifier '" + Tok +
"' not supported by asm matcher. Mark isCodeGenOnly!");
@@ -3915,8 +3924,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (HasDeprecation) {
OS << " std::string Info;\n";
- OS << " if (!getParser().getTargetParser().\n";
- OS << " getTargetOptions().MCNoDeprecatedWarn &&\n";
+ OS << " if (!getParser().getTargetParser().getTargetOptions().MCNoDeprecatedWarn &&\n";
OS << " MII.getDeprecatedInfo(Inst, getSTI(), Info)) {\n";
OS << " SMLoc Loc = ((" << Target.getName()
<< "Operand &)*Operands[0]).getStartLoc();\n";
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 94fd8f7e92b4..bb13c4033db7 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -457,9 +457,14 @@ void AsmWriterEmitter::EmitPrintInstruction(
StringRef ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
bool PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
+ // This function has some huge switch statements that causing excessive
+ // compile time in LLVM profile instrumenation build. This print function
+ // usually is not frequently called in compilation. Here we disable the
+ // profile instrumenation for this function.
O << "/// printInstruction - This method is automatically generated by "
"tablegen\n"
"/// from the instruction set description.\n"
+ "LLVM_NO_PROFILE_INSTRUMENT_FUNCTION\n"
"void "
<< Target.getName() << ClassName
<< "::printInstruction(const MCInst *MI, uint64_t Address, "
diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index ffc2878d3508..ee77ef5eda5f 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -515,7 +515,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< " std::string msg;\n"
<< " raw_string_ostream Msg(msg);\n"
<< " Msg << \"Not supported instr: \" << MI;\n"
- << " report_fatal_error(Msg.str());\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
<< " }\n";
if (UseAPInt)
o << " Inst = Value;\n";
@@ -638,7 +638,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
<< " if (MissingFeatures.test(i))\n"
<< " Msg << SubtargetFeatureNames[i] << \" \";\n"
<< " Msg << \"predicate(s) are not met\";\n"
- << " report_fatal_error(Msg.str());\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
<< " }\n"
<< "#else\n"
<< " // Silence unused variable warning on targets that don't use MCII for "
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index c1a3a34d928b..4a247050ceeb 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -451,13 +451,16 @@ static Iter max_if(Iter B, Iter E, Pred P, Less L) {
}
/// Make sure that for each type in Small, there exists a larger type in Big.
-bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
- TypeSetByHwMode &Big) {
+bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big,
+ bool SmallIsVT) {
ValidateOnExit _1(Small, *this), _2(Big, *this);
if (TP.hasError())
return false;
bool Changed = false;
+ assert((!SmallIsVT || !Small.empty()) &&
+ "Small should not be empty for SDTCisVTSmallerThanOp");
+
if (Small.empty())
Changed |= EnforceAny(Small);
if (Big.empty())
@@ -476,7 +479,9 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
TypeSetByHwMode::SetType &S = Small.get(M);
TypeSetByHwMode::SetType &B = Big.get(M);
- if (any_of(S, isIntegerOrPtr) && any_of(S, isIntegerOrPtr)) {
+ assert((!SmallIsVT || !S.empty()) && "Expected non-empty type");
+
+ if (any_of(S, isIntegerOrPtr) && any_of(B, isIntegerOrPtr)) {
auto NotInt = [](MVT VT) { return !isIntegerOrPtr(VT); };
Changed |= berase_if(S, NotInt);
Changed |= berase_if(B, NotInt);
@@ -484,6 +489,11 @@ bool TypeInfer::EnforceSmallerThan(TypeSetByHwMode &Small,
auto NotFP = [](MVT VT) { return !isFloatingPoint(VT); };
Changed |= berase_if(S, NotFP);
Changed |= berase_if(B, NotFP);
+ } else if (SmallIsVT && B.empty()) {
+ // B is empty and since S is a specific VT, it will never be empty. Don't
+ // report this as a change, just clear S and continue. This prevents an
+ // infinite loop.
+ S.clear();
} else if (S.empty() || B.empty()) {
Changed = !S.empty() || !B.empty();
S.clear();
@@ -1612,20 +1622,22 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
unsigned OResNo = 0;
TreePatternNode *OtherNode =
getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
- return NodeToApply->UpdateNodeType(ResNo, OtherNode->getExtType(OResNo),TP)|
- OtherNode->UpdateNodeType(OResNo,NodeToApply->getExtType(ResNo),TP);
+ return (int)NodeToApply->UpdateNodeType(ResNo,
+ OtherNode->getExtType(OResNo), TP) |
+ (int)OtherNode->UpdateNodeType(OResNo,
+ NodeToApply->getExtType(ResNo), TP);
}
case SDTCisVTSmallerThanOp: {
// The NodeToApply must be a leaf node that is a VT. OtherOperandNum must
// have an integer type that is smaller than the VT.
if (!NodeToApply->isLeaf() ||
!isa<DefInit>(NodeToApply->getLeafValue()) ||
- !static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef()
+ !cast<DefInit>(NodeToApply->getLeafValue())->getDef()
->isSubClassOf("ValueType")) {
TP.error(N->getOperator()->getName() + " expects a VT operand!");
return false;
}
- DefInit *DI = static_cast<DefInit*>(NodeToApply->getLeafValue());
+ DefInit *DI = cast<DefInit>(NodeToApply->getLeafValue());
const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
auto VVT = getValueTypeByHwMode(DI->getDef(), T.getHwModes());
TypeSetByHwMode TypeListTmp(VVT);
@@ -1635,7 +1647,8 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo,
OResNo);
- return TI.EnforceSmallerThan(TypeListTmp, OtherNode->getExtType(OResNo));
+ return TI.EnforceSmallerThan(TypeListTmp, OtherNode->getExtType(OResNo),
+ /*SmallIsVT*/ true);
}
case SDTCisOpSmallerThanOp: {
unsigned BResNo = 0;
@@ -3819,7 +3832,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
InstInputs.erase(OpName); // It occurred, remove from map.
if (InVal->isLeaf() && isa<DefInit>(InVal->getLeafValue())) {
- Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
+ Record *InRec = cast<DefInit>(InVal->getLeafValue())->getDef();
if (!checkOperandClass(Op, InRec))
I.error("Operand $" + OpName + "'s register class disagrees"
" between the operand and pattern");
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h
index a69f1e2e3030..39d81230a4f2 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -298,8 +298,11 @@ struct TypeInfer {
/// unchanged.
bool EnforceAny(TypeSetByHwMode &Out);
/// Make sure that for each type in \p Small, there exists a larger type
- /// in \p Big.
- bool EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big);
+ /// in \p Big. \p SmallIsVT indicates that this is being called for
+ /// SDTCisVTSmallerThanOp. In that case the TypeSetByHwMode is re-created for
+ /// each call and needs special consideration in how we detect changes.
+ bool EnforceSmallerThan(TypeSetByHwMode &Small, TypeSetByHwMode &Big,
+ bool SmallIsVT = false);
/// 1. Ensure that for each type T in \p Vec, T is a vector type, and that
/// for each type U in \p Elem, U is a scalar type.
/// 2. Ensure that for each (scalar) type U in \p Elem, there exists a
diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp
index 6f718acbac3e..38871eb8cf3c 100644
--- a/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -5,7 +5,7 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// CodeGenMapTable provides functionality for the TabelGen to create
+// CodeGenMapTable provides functionality for the TableGen to create
// relation mapping between instructions. Relation models are defined using
// InstrMapping as a base class. This file implements the functionality which
// parses these definitions and generates relation maps using the information
@@ -443,14 +443,16 @@ void MapTableEmitter::emitMapFuncBody(raw_ostream &OS,
if (ValueCols.size() > 1) {
for (unsigned i = 0, e = ValueCols.size(); i < e; i++) {
ListInit *ColumnI = ValueCols[i];
+ OS << " if (";
for (unsigned j = 0, ColSize = ColumnI->size(); j < ColSize; ++j) {
std::string ColName = ColFields->getElement(j)->getAsUnquotedString();
- OS << " if (in" << ColName;
+ OS << "in" << ColName;
OS << " == ";
OS << ColName << "_" << ColumnI->getElement(j)->getAsUnquotedString();
- if (j < ColumnI->size() - 1) OS << " && ";
- else OS << ")\n";
+ if (j < ColumnI->size() - 1)
+ OS << " && ";
}
+ OS << ")\n";
OS << " return " << InstrMapDesc.getName();
OS << "Table[mid]["<<i+1<<"];\n";
}
@@ -480,9 +482,10 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
if (ValueCols.size() > 1) {
for (Init *CF : ColFields->getValues()) {
std::string ColName = CF->getAsUnquotedString();
- OS << ", enum " << ColName << " in" << ColName << ") {\n";
+ OS << ", enum " << ColName << " in" << ColName;
}
- } else { OS << ") {\n"; }
+ }
+ OS << ") {\n";
// Emit map table.
unsigned TableSize = emitBinSearchTable(OS);
diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp
index 930b7742103e..afaeb73ffab1 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -734,7 +734,7 @@ static void sortAndUniqueRegisters(CodeGenRegister::Vec &M) {
CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
: TheDef(R), Name(std::string(R->getName())),
- TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1) {
+ TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), TSFlags(0) {
GeneratePressureSet = R->getValueAsBit("GeneratePressureSet");
std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes");
if (TypeList.empty())
@@ -802,6 +802,12 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
if (AllocationPriority < 0 || AllocationPriority > 63)
PrintFatalError(R->getLoc(), "AllocationPriority out of range [0,63]");
this->AllocationPriority = AllocationPriority;
+
+ BitsInit *TSF = R->getValueAsBitsInit("TSFlags");
+ for (unsigned I = 0, E = TSF->getNumBits(); I != E; ++I) {
+ BitInit *Bit = cast<BitInit>(TSF->getBit(I));
+ TSFlags |= uint8_t(Bit->getValue()) << I;
+ }
}
// Create an inferred register class that was missing from the .td files.
@@ -811,7 +817,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank,
StringRef Name, Key Props)
: Members(*Props.Members), TheDef(nullptr), Name(std::string(Name)),
TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1), RSI(Props.RSI),
- CopyCost(0), Allocatable(true), AllocationPriority(0) {
+ CopyCost(0), Allocatable(true), AllocationPriority(0), TSFlags(0) {
Artificial = true;
GeneratePressureSet = false;
for (const auto R : Members) {
@@ -839,6 +845,7 @@ void CodeGenRegisterClass::inheritProperties(CodeGenRegBank &RegBank) {
});
AltOrderSelect = Super.AltOrderSelect;
AllocationPriority = Super.AllocationPriority;
+ TSFlags = Super.TSFlags;
GeneratePressureSet |= Super.GeneratePressureSet;
// Copy all allocation orders, filter out foreign registers from the larger
@@ -1617,9 +1624,9 @@ static void computeUberSets(std::vector<UberRegSet> &UberSets,
assert(USetID && "register number 0 is invalid");
AllocatableRegs.insert((*Regs.begin())->EnumValue);
- for (auto I = std::next(Regs.begin()), E = Regs.end(); I != E; ++I) {
- AllocatableRegs.insert((*I)->EnumValue);
- UberSetIDs.join(USetID, (*I)->EnumValue);
+ for (const CodeGenRegister *CGR : llvm::drop_begin(Regs)) {
+ AllocatableRegs.insert(CGR->EnumValue);
+ UberSetIDs.join(USetID, CGR->EnumValue);
}
}
// Combine non-allocatable regs.
@@ -1908,6 +1915,9 @@ void CodeGenRegBank::computeRegUnitSets() {
RegUnitSets.pop_back();
}
+ if (RegUnitSets.empty())
+ PrintFatalError("RegUnitSets cannot be empty!");
+
LLVM_DEBUG(dbgs() << "\nBefore pruning:\n"; for (unsigned USIdx = 0,
USEnd = RegUnitSets.size();
USIdx < USEnd; ++USIdx) {
@@ -2018,7 +2028,8 @@ void CodeGenRegBank::computeRegUnitSets() {
}
}
LLVM_DEBUG(dbgs() << "\n");
- assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass");
+ assert((!RegClassUnitSets[RCIdx].empty() || !RC.GeneratePressureSet) &&
+ "missing unit set for regclass");
}
// For each register unit, ensure that we have the list of UnitSets that
diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h
index 6a0696011a40..c9fcf83b0a8a 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/llvm/utils/TableGen/CodeGenRegisters.h
@@ -332,6 +332,7 @@ namespace llvm {
bool Allocatable;
StringRef AltOrderSelect;
uint8_t AllocationPriority;
+ uint8_t TSFlags;
/// Contains the combination of the lane masks of all subregisters.
LaneBitmask LaneMask;
/// True if there are at least 2 subregisters which do not interfere.
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 7311819f77ff..137f99078faf 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -77,6 +77,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::ppcf128: return "MVT::ppcf128";
case MVT::x86mmx: return "MVT::x86mmx";
case MVT::x86amx: return "MVT::x86amx";
+ case MVT::i64x8: return "MVT::i64x8";
case MVT::Glue: return "MVT::Glue";
case MVT::isVoid: return "MVT::isVoid";
case MVT::v1i1: return "MVT::v1i1";
diff --git a/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp
index e931801f82a4..94ad6ee285d4 100644
--- a/llvm/utils/TableGen/RISCVCompressInstEmitter.cpp
+++ b/llvm/utils/TableGen/CompressInstEmitter.cpp
@@ -1,17 +1,17 @@
-//===- RISCVCompressInstEmitter.cpp - Generator for RISCV Compression -===//
+//===-------- CompressInstEmitter.cpp - Generator for Compression ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-// RISCVCompressInstEmitter implements a tablegen-driven CompressPat based
-// RISCV Instruction Compression mechanism.
+// CompressInstEmitter implements a tablegen-driven CompressPat based
+// Instruction Compression mechanism.
//
-//===--------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
//
-// RISCVCompressInstEmitter implements a tablegen-driven CompressPat Instruction
-// Compression mechanism for generating RISCV compressed instructions
-// (C ISA Extension) from the expanded instruction form.
+// CompressInstEmitter implements a tablegen-driven CompressPat Instruction
+// Compression mechanism for generating compressed instructions from the
+// expanded instruction form.
// This tablegen backend processes CompressPat declarations in a
// td file and generates all the required checks to validate the pattern
@@ -21,10 +21,18 @@
// immediate inputs.
//
// Example:
-// class CompressPat<dag input, dag output> {
+// /// Defines a Pat match between compressed and uncompressed instruction.
+// /// The relationship and helper function generation are handled by
+// /// CompressInstEmitter backend.
+// class CompressPat<dag input, dag output, list<Predicate> predicates = []> {
+// /// Uncompressed instruction description.
// dag Input = input;
-// dag Output = output;
-// list<Predicate> Predicates = [];
+// /// Compressed instruction description.
+// dag Output = output;
+// /// Predicates that must be true for this to match.
+// list<Predicate> Predicates = predicates;
+// /// Duplicate match when tied operand is just different.
+// bit isCompressOnly = false;
// }
//
// let Predicates = [HasStdExtC] in {
@@ -32,10 +40,9 @@
// (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>;
// }
//
-// The result is an auto-generated header file
-// 'RISCVGenCompressInstEmitter.inc' which exports two functions for
-// compressing/uncompressing MCInst instructions, plus
-// some helper functions:
+// The <TargetName>GenCompressInstEmitter.inc is an auto-generated header
+// file which exports two functions for compressing/uncompressing MCInst
+// instructions, plus some helper functions:
//
// bool compressInst(MCInst &OutInst, const MCInst &MI,
// const MCSubtargetInfo &STI,
@@ -49,9 +56,9 @@
// an instruction is compressable:
//
// bool isCompressibleInst(const MachineInstr& MI,
-// const RISCVSubtarget *Subtarget,
-// const MCRegisterInfo &MRI,
-// const MCSubtargetInfo &STI);
+// const <TargetName>Subtarget *Subtarget,
+// const MCRegisterInfo &MRI,
+// const MCSubtargetInfo &STI);
//
// The clients that include this auto-generated header file and
// invoke these functions can compress an instruction before emitting
@@ -79,28 +86,35 @@ using namespace llvm;
#define DEBUG_TYPE "compress-inst-emitter"
namespace {
-class RISCVCompressInstEmitter {
+class CompressInstEmitter {
struct OpData {
enum MapKind { Operand, Imm, Reg };
MapKind Kind;
union {
- unsigned Operand; // Operand number mapped to.
- int64_t Imm; // Integer immediate value.
- Record *Reg; // Physical register.
+ // Operand number mapped to.
+ unsigned Operand;
+ // Integer immediate value.
+ int64_t Imm;
+ // Physical register.
+ Record *Reg;
} Data;
- int TiedOpIdx = -1; // Tied operand index within the instruction.
+ // Tied operand index within the instruction.
+ int TiedOpIdx = -1;
};
struct CompressPat {
- CodeGenInstruction Source; // The source instruction definition.
- CodeGenInstruction Dest; // The destination instruction to transform to.
- std::vector<Record *>
- PatReqFeatures; // Required target features to enable pattern.
- IndexedMap<OpData>
- SourceOperandMap; // Maps operands in the Source Instruction to
- // the corresponding Dest instruction operand.
- IndexedMap<OpData>
- DestOperandMap; // Maps operands in the Dest Instruction
- // to the corresponding Source instruction operand.
+ // The source instruction definition.
+ CodeGenInstruction Source;
+ // The destination instruction to transform to.
+ CodeGenInstruction Dest;
+ // Required target features to enable pattern.
+ std::vector<Record *> PatReqFeatures;
+ // Maps operands in the Source Instruction to
+ IndexedMap<OpData> SourceOperandMap;
+ // the corresponding Dest instruction operand.
+ // Maps operands in the Dest Instruction
+ // to the corresponding Source instruction operand.
+ IndexedMap<OpData> DestOperandMap;
+
bool IsCompressOnly;
CompressPat(CodeGenInstruction &S, CodeGenInstruction &D,
std::vector<Record *> RF, IndexedMap<OpData> &SourceMap,
@@ -132,13 +146,13 @@ class RISCVCompressInstEmitter {
CodeGenInstruction &DestInst);
public:
- RISCVCompressInstEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+ CompressInstEmitter(RecordKeeper &R) : Records(R), Target(R) {}
void run(raw_ostream &o);
};
} // End anonymous namespace.
-bool RISCVCompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
+bool CompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
assert(Reg->isSubClassOf("Register") && "Reg record should be a Register");
assert(RegClass->isSubClassOf("RegisterClass") &&
"RegClass record should be a RegisterClass");
@@ -148,9 +162,8 @@ bool RISCVCompressInstEmitter::validateRegister(Record *Reg, Record *RegClass) {
return RC.contains(R);
}
-bool RISCVCompressInstEmitter::validateTypes(Record *DagOpType,
- Record *InstOpType,
- bool IsSourceInst) {
+bool CompressInstEmitter::validateTypes(Record *DagOpType, Record *InstOpType,
+ bool IsSourceInst) {
if (DagOpType == InstOpType)
return true;
// Only source instruction operands are allowed to not match Input Dag
@@ -187,9 +200,10 @@ bool RISCVCompressInstEmitter::validateTypes(Record *DagOpType,
/// operands and fixed registers it expects the Dag operand type to be contained
/// in the instantiated instruction operand type. For immediate operands and
/// immediates no validation checks are enforced at pattern validation time.
-void RISCVCompressInstEmitter::addDagOperandMapping(
- Record *Rec, DagInit *Dag, CodeGenInstruction &Inst,
- IndexedMap<OpData> &OperandMap, bool IsSourceInst) {
+void CompressInstEmitter::addDagOperandMapping(Record *Rec, DagInit *Dag,
+ CodeGenInstruction &Inst,
+ IndexedMap<OpData> &OperandMap,
+ bool IsSourceInst) {
// TiedCount keeps track of the number of operands skipped in Inst
// operands list to get to the corresponding Dag operand. This is
// necessary because the number of operands in Inst might be greater
@@ -293,7 +307,7 @@ static bool validateArgsTypes(Init *Arg1, Init *Arg2) {
// name have the same types. For example in 'C_ADD $rs1, $rs2' we generate the
// mapping $rs1 --> 0, $rs2 ---> 1. If the operand appears twice in the (tied)
// same Dag we use the last occurrence for indexing.
-void RISCVCompressInstEmitter::createDagOperandMapping(
+void CompressInstEmitter::createDagOperandMapping(
Record *Rec, StringMap<unsigned> &SourceOperands,
StringMap<unsigned> &DestOperands, DagInit *SourceDag, DagInit *DestDag,
IndexedMap<OpData> &SourceOperandMap) {
@@ -340,7 +354,7 @@ void RISCVCompressInstEmitter::createDagOperandMapping(
/// Map operand names in the Dag to their index in both corresponding input and
/// output instructions. Validate that operands defined in the input are
/// used in the output pattern while populating the maps.
-void RISCVCompressInstEmitter::createInstOperandMapping(
+void CompressInstEmitter::createInstOperandMapping(
Record *Rec, DagInit *SourceDag, DagInit *DestDag,
IndexedMap<OpData> &SourceOperandMap, IndexedMap<OpData> &DestOperandMap,
StringMap<unsigned> &SourceOperands, CodeGenInstruction &DestInst) {
@@ -409,7 +423,7 @@ void RISCVCompressInstEmitter::createInstOperandMapping(
/// and generate warning.
/// - Immediate operand type in Dag Input differs from the corresponding Source
/// Instruction type and generate a warning.
-void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
+void CompressInstEmitter::evaluateCompressPat(Record *Rec) {
// Validate input Dag operands.
DagInit *SourceDag = Rec->getValueAsDag("Input");
assert(SourceDag && "Missing 'Input' in compress pattern!");
@@ -417,9 +431,6 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
// Checking we are transforming from compressed to uncompressed instructions.
Record *Operator = SourceDag->getOperatorAsDef(Rec->getLoc());
- if (!Operator->isSubClassOf("RVInst"))
- PrintFatalError(Rec->getLoc(), "Input instruction '" + Operator->getName() +
- "' is not a 32 bit wide instruction!");
CodeGenInstruction SourceInst(Operator);
verifyDagOpCount(SourceInst, SourceDag, true);
@@ -429,13 +440,16 @@ void RISCVCompressInstEmitter::evaluateCompressPat(Record *Rec) {
LLVM_DEBUG(dbgs() << "Output: " << *DestDag << "\n");
Record *DestOperator = DestDag->getOperatorAsDef(Rec->getLoc());
- if (!DestOperator->isSubClassOf("RVInst16"))
- PrintFatalError(Rec->getLoc(), "Output instruction '" +
- DestOperator->getName() +
- "' is not a 16 bit wide instruction!");
CodeGenInstruction DestInst(DestOperator);
verifyDagOpCount(DestInst, DestDag, false);
+ if (Operator->getValueAsInt("Size") <= DestOperator->getValueAsInt("Size"))
+ PrintFatalError(
+ Rec->getLoc(),
+ "Compressed instruction '" + DestOperator->getName() +
+ "'is not strictly smaller than the uncompressed instruction '" +
+ Operator->getName() + "' !");
+
// Fill the mapping from the source to destination instructions.
IndexedMap<OpData> SourceOperandMap;
@@ -548,15 +562,15 @@ static void mergeCondAndCode(raw_ostream &CombinedStream, StringRef CondStr,
CombinedStream.indent(4) << "} // if\n";
}
-void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
- EmitterType EType) {
+void CompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
+ EmitterType EType) {
Record *AsmWriter = Target.getAsmWriter();
if (!AsmWriter->getValueAsInt("PassSubtarget"))
PrintFatalError(AsmWriter->getLoc(),
"'PassSubtarget' is false. SubTargetInfo object is needed "
"for target features.\n");
- StringRef Namespace = Target.getName();
+ StringRef TargetName = Target.getName();
// Sort entries in CompressPatterns to handle instructions that can have more
// than one candidate for compression\uncompression, e.g ADD can be
@@ -599,14 +613,14 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
FuncH.indent(25) << "const MCInst &MI,\n";
FuncH.indent(25) << "const MCSubtargetInfo &STI,\n";
FuncH.indent(25) << "MCContext &Context) {\n";
- } else if (EType == EmitterType::Uncompress){
+ } else if (EType == EmitterType::Uncompress) {
FuncH << "static bool uncompressInst(MCInst &OutInst,\n";
FuncH.indent(27) << "const MCInst &MI,\n";
FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
} else if (EType == EmitterType::CheckCompress) {
FuncH << "static bool isCompressibleInst(const MachineInstr &MI,\n";
- FuncH.indent(27) << "const RISCVSubtarget *Subtarget,\n";
+ FuncH.indent(27) << "const " << TargetName << "Subtarget *Subtarget,\n";
FuncH.indent(27) << "const MCRegisterInfo &MRI,\n";
FuncH.indent(27) << "const MCSubtargetInfo &STI) {\n";
}
@@ -631,9 +645,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
CaseStream << " default: return false;\n";
bool CompressOrCheck =
- EType == EmitterType::Compress || EType == EmitterType::CheckCompress;
+ EType == EmitterType::Compress || EType == EmitterType::CheckCompress;
bool CompressOrUncompress =
- EType == EmitterType::Compress || EType == EmitterType::Uncompress;
+ EType == EmitterType::Compress || EType == EmitterType::Uncompress;
for (auto &CompressPat : CompressPatterns) {
if (EType == EmitterType::Uncompress && CompressPat.IsCompressOnly)
@@ -644,20 +658,22 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
raw_string_ostream CondStream(CondString);
raw_string_ostream CodeStream(CodeString);
CodeGenInstruction &Source =
- CompressOrCheck ? CompressPat.Source : CompressPat.Dest;
+ CompressOrCheck ? CompressPat.Source : CompressPat.Dest;
CodeGenInstruction &Dest =
CompressOrCheck ? CompressPat.Dest : CompressPat.Source;
- IndexedMap<OpData> SourceOperandMap = CompressOrCheck ?
- CompressPat.SourceOperandMap : CompressPat.DestOperandMap;
- IndexedMap<OpData> &DestOperandMap = CompressOrCheck ?
- CompressPat.DestOperandMap : CompressPat.SourceOperandMap;
+ IndexedMap<OpData> SourceOperandMap = CompressOrCheck
+ ? CompressPat.SourceOperandMap
+ : CompressPat.DestOperandMap;
+ IndexedMap<OpData> &DestOperandMap = CompressOrCheck
+ ? CompressPat.DestOperandMap
+ : CompressPat.SourceOperandMap;
CurOp = Source.TheDef->getName();
// Check current and previous opcode to decide to continue or end a case.
if (CurOp != PrevOp) {
if (!PrevOp.empty())
CaseStream.indent(6) << "break;\n } // case " + PrevOp + "\n";
- CaseStream.indent(4) << "case " + Namespace + "::" + CurOp + ": {\n";
+ CaseStream.indent(4) << "case " + TargetName + "::" + CurOp + ": {\n";
}
std::set<std::pair<bool, StringRef>> FeaturesSet;
@@ -676,7 +692,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Emit checks for all required features.
for (auto &Op : FeaturesSet) {
StringRef Not = Op.first ? "!" : "";
- CondStream.indent(6) << Not << "STI.getFeatureBits()[" << Namespace
+ CondStream.indent(6) << Not << "STI.getFeatureBits()[" << TargetName
<< "::" << Op.second << "]"
<< " &&\n";
}
@@ -687,7 +703,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
for (auto &Op : Set) {
bool isLast = &Op == &*Set.rbegin();
StringRef Not = Op.first ? "!" : "";
- CondStream << Not << "STI.getFeatureBits()[" << Namespace
+ CondStream << Not << "STI.getFeatureBits()[" << TargetName
<< "::" << Op.second << "]";
if (!isLast)
CondStream << " || ";
@@ -720,7 +736,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
case OpData::Reg: {
Record *Reg = SourceOperandMap[OpNo].Data.Reg;
CondStream.indent(6)
- << "(MI.getOperand(" << OpNo << ").getReg() == " << Namespace
+ << "(MI.getOperand(" << OpNo << ").getReg() == " << TargetName
<< "::" << Reg->getName() << ") &&\n";
break;
}
@@ -728,7 +744,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
}
CodeStream.indent(6) << "// " << Dest.AsmString << "\n";
if (CompressOrUncompress)
- CodeStream.indent(6) << "OutInst.setOpcode(" << Namespace
+ CodeStream.indent(6) << "OutInst.setOpcode(" << TargetName
<< "::" << Dest.TheDef->getName() << ");\n";
OpNo = 0;
for (const auto &DestOperand : Dest.Operands) {
@@ -744,7 +760,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Don't check register class if this is a tied operand, it was done
// for the operand its tied to.
if (DestOperand.getTiedRegister() == -1)
- CondStream.indent(6) << "(MRI.getRegClass(" << Namespace
+ CondStream.indent(6) << "(MRI.getRegClass(" << TargetName
<< "::" << DestOperand.Rec->getName()
<< "RegClassID).contains(MI.getOperand("
<< OpIdx << ").getReg())) &&\n";
@@ -759,7 +775,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
getPredicates(MCOpPredicateMap, MCOpPredicates, DestOperand.Rec,
"MCOperandPredicate");
CondStream.indent(6)
- << Namespace << "ValidateMCOperand("
+ << TargetName << "ValidateMCOperand("
<< "MI.getOperand(" << OpIdx << "), STI, " << Entry << ") &&\n";
} else {
unsigned Entry =
@@ -767,7 +783,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
DestOperand.Rec, "ImmediateCode");
CondStream.indent(6)
<< "MI.getOperand(" << OpIdx << ").isImm() &&\n";
- CondStream.indent(6) << Namespace << "ValidateMachineOperand("
+ CondStream.indent(6) << TargetName << "ValidateMachineOperand("
<< "MI.getOperand(" << OpIdx
<< "), Subtarget, " << Entry << ") &&\n";
}
@@ -782,14 +798,14 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
unsigned Entry = getPredicates(MCOpPredicateMap, MCOpPredicates,
DestOperand.Rec, "MCOperandPredicate");
CondStream.indent(6)
- << Namespace << "ValidateMCOperand("
+ << TargetName << "ValidateMCOperand("
<< "MCOperand::createImm(" << DestOperandMap[OpNo].Data.Imm
<< "), STI, " << Entry << ") &&\n";
} else {
unsigned Entry = getPredicates(ImmLeafPredicateMap, ImmLeafPredicates,
DestOperand.Rec, "ImmediateCode");
CondStream.indent(6)
- << Namespace
+ << TargetName
<< "ValidateMachineOperand(MachineOperand::CreateImm("
<< DestOperandMap[OpNo].Data.Imm << "), SubTarget, " << Entry
<< ") &&\n";
@@ -803,7 +819,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
// Fixed register has been validated at pattern validation time.
Record *Reg = DestOperandMap[OpNo].Data.Reg;
CodeStream.indent(6)
- << "OutInst.addOperand(MCOperand::createReg(" << Namespace
+ << "OutInst.addOperand(MCOperand::createReg(" << TargetName
<< "::" << Reg->getName() << "));\n";
}
} break;
@@ -822,7 +838,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
Func.indent(2) << "return false;\n}\n";
if (!MCOpPredicates.empty()) {
- o << "static bool " << Namespace
+ o << "static bool " << TargetName
<< "ValidateMCOperand(const MCOperand &MCOp,\n"
<< " const MCSubtargetInfo &STI,\n"
<< " unsigned PredicateIndex) {\n"
@@ -838,9 +854,9 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
}
if (!ImmLeafPredicates.empty()) {
- o << "static bool " << Namespace
+ o << "static bool " << TargetName
<< "ValidateMachineOperand(const MachineOperand &MO,\n"
- << " const RISCVSubtarget *Subtarget,\n"
+ << " const " << TargetName << "Subtarget *Subtarget,\n"
<< " unsigned PredicateIndex) {\n"
<< " int64_t Imm = MO.getImm();\n"
<< " switch (PredicateIndex) {\n"
@@ -867,7 +883,7 @@ void RISCVCompressInstEmitter::emitCompressInstEmitter(raw_ostream &o,
o << "\n#endif //GEN_CHECK_COMPRESS_INSTR\n\n";
}
-void RISCVCompressInstEmitter::run(raw_ostream &o) {
+void CompressInstEmitter::run(raw_ostream &o) {
std::vector<Record *> Insts = Records.getAllDerivedDefinitions("CompressPat");
// Process the CompressPat definitions, validating them as we do so.
@@ -887,7 +903,7 @@ void RISCVCompressInstEmitter::run(raw_ostream &o) {
namespace llvm {
void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS) {
- RISCVCompressInstEmitter(RK).run(OS);
+ CompressInstEmitter(RK).run(OS);
}
} // namespace llvm
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 693073672fc1..d08186b7094b 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -1212,11 +1212,13 @@ PredicateListMatcher<OperandPredicateMatcher>::getNoPredicateComment() const {
/// one as another.
class SameOperandMatcher : public OperandPredicateMatcher {
std::string MatchingName;
+ unsigned OrigOpIdx;
public:
- SameOperandMatcher(unsigned InsnVarID, unsigned OpIdx, StringRef MatchingName)
+ SameOperandMatcher(unsigned InsnVarID, unsigned OpIdx, StringRef MatchingName,
+ unsigned OrigOpIdx)
: OperandPredicateMatcher(OPM_SameOperand, InsnVarID, OpIdx),
- MatchingName(MatchingName) {}
+ MatchingName(MatchingName), OrigOpIdx(OrigOpIdx) {}
static bool classof(const PredicateMatcher *P) {
return P->getKind() == OPM_SameOperand;
@@ -1227,6 +1229,7 @@ public:
bool isIdentical(const PredicateMatcher &B) const override {
return OperandPredicateMatcher::isIdentical(B) &&
+ OrigOpIdx == cast<SameOperandMatcher>(&B)->OrigOpIdx &&
MatchingName == cast<SameOperandMatcher>(&B)->MatchingName;
}
};
@@ -3291,7 +3294,8 @@ void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
// If the operand is already defined, then we must ensure both references in
// the matcher have the exact same node.
- OM.addPredicate<SameOperandMatcher>(OM.getSymbolicName());
+ OM.addPredicate<SameOperandMatcher>(
+ OM.getSymbolicName(), getOperandMatcher(OM.getSymbolicName()).getOpIdx());
}
void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) {
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 3d1d258e342e..437b5f002027 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -249,7 +249,8 @@ enum IIT_Info {
IIT_BF16 = 48,
IIT_STRUCT9 = 49,
IIT_V256 = 50,
- IIT_AMX = 51
+ IIT_AMX = 51,
+ IIT_PPCF128 = 52
};
static void EncodeFixedValueType(MVT::SimpleValueType VT,
@@ -274,6 +275,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
case MVT::f32: return Sig.push_back(IIT_F32);
case MVT::f64: return Sig.push_back(IIT_F64);
case MVT::f128: return Sig.push_back(IIT_F128);
+ case MVT::ppcf128: return Sig.push_back(IIT_PPCF128);
case MVT::token: return Sig.push_back(IIT_TOKEN);
case MVT::Metadata: return Sig.push_back(IIT_METADATA);
case MVT::x86mmx: return Sig.push_back(IIT_MMX);
diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp
index a76640f6d11f..a7256499d566 100644
--- a/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/llvm/utils/TableGen/PredicateExpander.cpp
@@ -470,7 +470,7 @@ void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup
increaseIndentLevel();
OS.indent(getIndentLevel() * 2);
if (ShouldUpdateOpcodeMask) {
- if (PI.OperandMask.isNullValue())
+ if (PI.OperandMask.isZero())
OS << "Mask.clearAllBits();\n";
else
OS << "Mask = " << PI.OperandMask << ";\n";
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 037fad207ac7..1ed7bc103f9c 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1411,6 +1411,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
<< SuperRegIdxSeqs.get(SuperRegIdxLists[RC.EnumValue]) << ",\n ";
printMask(OS, RC.LaneMask);
OS << ",\n " << (unsigned)RC.AllocationPriority << ",\n "
+ << format("0x%02x", RC.TSFlags) << ", /* TSFlags */\n "
<< (RC.HasDisjunctSubRegs?"true":"false")
<< ", /* HasDisjunctSubRegs */\n "
<< (RC.CoveredBySubRegs?"true":"false")
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 89069ec3e4ff..d1a9ecb06a2b 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -994,6 +994,8 @@ void DisassemblerTables::emitContextDecisions(raw_ostream &o1, raw_ostream &o2,
emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[5], XOP9_MAP_STR);
emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[6], XOPA_MAP_STR);
emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[7], THREEDNOW_MAP_STR);
+ emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[8], MAP5_STR);
+ emitContextDecision(o1, o2, i1, i2, ModRMTableNum, *Tables[9], MAP6_STR);
}
void DisassemblerTables::emit(raw_ostream &o) const {
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.h b/llvm/utils/TableGen/X86DisassemblerTables.h
index 63af68b6fbfa..2e4ff1e2ce08 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.h
+++ b/llvm/utils/TableGen/X86DisassemblerTables.h
@@ -41,7 +41,9 @@ private:
/// [5] XOP9 map opcode
/// [6] XOPA map opcode
/// [7] 3dnow map opcode
- std::unique_ptr<ContextDecision> Tables[8];
+ /// [8] fixed length MAP5 opcode
+ /// [9] fixed length MAP6 opcode
+ std::unique_ptr<ContextDecision> Tables[10];
// Table of ModRM encodings.
typedef std::map<std::vector<unsigned>, unsigned> ModRMMapTy;
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 009dc036cf97..36c71843d70e 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -65,7 +65,7 @@ void X86EVEX2VEXTablesEmitter::printTable(const std::vector<Entry> &Table,
<< " // EVEX scalar with corresponding VEX.\n";
// Print all entries added to the table
- for (auto Pair : Table) {
+ for (const auto &Pair : Table) {
OS << " { X86::" << Pair.first->TheDef->getName()
<< ", X86::" << Pair.second->TheDef->getName() << " },\n";
}
@@ -80,7 +80,7 @@ void X86EVEX2VEXTablesEmitter::printCheckPredicate(
<< " unsigned Opc = MI.getOpcode();\n"
<< " switch (Opc) {\n"
<< " default: return true;\n";
- for (auto Pair : Predicates)
+ for (const auto &Pair : Predicates)
OS << " case X86::" << Pair.first << ": return " << Pair.second << ";\n";
OS << " }\n"
<< "}\n\n";
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 85d926215113..0a8d0750cf13 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -79,13 +79,13 @@ const ManualMapEntry ManualMapSet[] = {
static bool isExplicitAlign(const CodeGenInstruction *Inst) {
return any_of(ExplicitAlign, [Inst](const char *InstStr) {
- return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
+ return Inst->TheDef->getName().contains(InstStr);
});
}
static bool isExplicitUnalign(const CodeGenInstruction *Inst) {
return any_of(ExplicitUnalign, [Inst](const char *InstStr) {
- return Inst->TheDef->getName().find(InstStr) != StringRef::npos;
+ return Inst->TheDef->getName().contains(InstStr);
});
}
@@ -278,7 +278,7 @@ static inline bool hasMemoryFormat(const Record *Inst) {
}
static inline bool isNOREXRegClass(const Record *Op) {
- return Op->getName().find("_NOREX") != StringRef::npos;
+ return Op->getName().contains("_NOREX");
}
static inline bool isRegisterOperand(const Record *Rec) {
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index c2ca3791ac36..a9b384155965 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -109,12 +109,12 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
// FIXME: Is there some better way to check for In64BitMode?
std::vector<Record*> Predicates = Rec->getValueAsListOfDefs("Predicates");
for (unsigned i = 0, e = Predicates.size(); i != e; ++i) {
- if (Predicates[i]->getName().find("Not64Bit") != Name.npos ||
- Predicates[i]->getName().find("In32Bit") != Name.npos) {
+ if (Predicates[i]->getName().contains("Not64Bit") ||
+ Predicates[i]->getName().contains("In32Bit")) {
Is32Bit = true;
break;
}
- if (Predicates[i]->getName().find("In64Bit") != Name.npos) {
+ if (Predicates[i]->getName().contains("In64Bit")) {
Is64Bit = true;
break;
}
@@ -752,6 +752,8 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
case X86Local::XOP9: opcodeType = XOP9_MAP; break;
case X86Local::XOPA: opcodeType = XOPA_MAP; break;
case X86Local::ThreeDNow: opcodeType = THREEDNOW_MAP; break;
+ case X86Local::T_MAP5: opcodeType = MAP5; break;
+ case X86Local::T_MAP6: opcodeType = MAP6; break;
}
std::unique_ptr<ModRMFilter> filter;
@@ -901,10 +903,13 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("FR64X", TYPE_XMM)
TYPE("f64mem", TYPE_M)
TYPE("sdmem", TYPE_M)
+ TYPE("FR16X", TYPE_XMM)
TYPE("FR32", TYPE_XMM)
TYPE("FR32X", TYPE_XMM)
TYPE("f32mem", TYPE_M)
+ TYPE("f16mem", TYPE_M)
TYPE("ssmem", TYPE_M)
+ TYPE("shmem", TYPE_M)
TYPE("RST", TYPE_ST)
TYPE("RSTi", TYPE_ST)
TYPE("i128mem", TYPE_M)
@@ -1019,6 +1024,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
ENCODING("FR128", ENCODING_IB)
ENCODING("VR128", ENCODING_IB)
ENCODING("VR256", ENCODING_IB)
+ ENCODING("FR16X", ENCODING_IB)
ENCODING("FR32X", ENCODING_IB)
ENCODING("FR64X", ENCODING_IB)
ENCODING("VR128X", ENCODING_IB)
@@ -1047,6 +1053,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
ENCODING("FR32", ENCODING_RM)
ENCODING("FR64X", ENCODING_RM)
ENCODING("FR32X", ENCODING_RM)
+ ENCODING("FR16X", ENCODING_RM)
ENCODING("VR64", ENCODING_RM)
ENCODING("VR256", ENCODING_RM)
ENCODING("VR256X", ENCODING_RM)
@@ -1058,11 +1065,6 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
ENCODING("VK16", ENCODING_RM)
ENCODING("VK32", ENCODING_RM)
ENCODING("VK64", ENCODING_RM)
- ENCODING("VK1PAIR", ENCODING_RM)
- ENCODING("VK2PAIR", ENCODING_RM)
- ENCODING("VK4PAIR", ENCODING_RM)
- ENCODING("VK8PAIR", ENCODING_RM)
- ENCODING("VK16PAIR", ENCODING_RM)
ENCODING("BNDR", ENCODING_RM)
ENCODING("TILE", ENCODING_RM)
errs() << "Unhandled R/M register encoding " << s << "\n";
@@ -1091,6 +1093,7 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
ENCODING("VR128X", ENCODING_REG)
ENCODING("FR64X", ENCODING_REG)
ENCODING("FR32X", ENCODING_REG)
+ ENCODING("FR16X", ENCODING_REG)
ENCODING("VR512", ENCODING_REG)
ENCODING("VK1", ENCODING_REG)
ENCODING("VK2", ENCODING_REG)
@@ -1127,6 +1130,7 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
ENCODING("FR64", ENCODING_VVVV)
ENCODING("VR128", ENCODING_VVVV)
ENCODING("VR256", ENCODING_VVVV)
+ ENCODING("FR16X", ENCODING_VVVV)
ENCODING("FR32X", ENCODING_VVVV)
ENCODING("FR64X", ENCODING_VVVV)
ENCODING("VR128X", ENCODING_VVVV)
@@ -1139,11 +1143,6 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
ENCODING("VK16", ENCODING_VVVV)
ENCODING("VK32", ENCODING_VVVV)
ENCODING("VK64", ENCODING_VVVV)
- ENCODING("VK1PAIR", ENCODING_VVVV)
- ENCODING("VK2PAIR", ENCODING_VVVV)
- ENCODING("VK4PAIR", ENCODING_VVVV)
- ENCODING("VK8PAIR", ENCODING_VVVV)
- ENCODING("VK16PAIR", ENCODING_VVVV)
ENCODING("TILE", ENCODING_VVVV)
errs() << "Unhandled VEX.vvvv register encoding " << s << "\n";
llvm_unreachable("Unhandled VEX.vvvv register encoding");
@@ -1170,6 +1169,7 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
ENCODING("i32mem", ENCODING_RM)
ENCODING("i64mem", ENCODING_RM)
ENCODING("i8mem", ENCODING_RM)
+ ENCODING("shmem", ENCODING_RM)
ENCODING("ssmem", ENCODING_RM)
ENCODING("sdmem", ENCODING_RM)
ENCODING("f128mem", ENCODING_RM)
@@ -1177,6 +1177,7 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
ENCODING("f512mem", ENCODING_RM)
ENCODING("f64mem", ENCODING_RM)
ENCODING("f32mem", ENCODING_RM)
+ ENCODING("f16mem", ENCODING_RM)
ENCODING("i128mem", ENCODING_RM)
ENCODING("i256mem", ENCODING_RM)
ENCODING("i512mem", ENCODING_RM)
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index a7b88b4d12ed..d4fad2cc3f0f 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -130,7 +130,8 @@ namespace X86Local {
};
enum {
- OB = 0, TB = 1, T8 = 2, TA = 3, XOP8 = 4, XOP9 = 5, XOPA = 6, ThreeDNow = 7
+ OB = 0, TB = 1, T8 = 2, TA = 3, XOP8 = 4, XOP9 = 5, XOPA = 6, ThreeDNow = 7,
+ T_MAP5 = 8, T_MAP6 = 9
};
enum {